{
"tool_response": {
"type": "update",
"filePath": "\/var\/www\/scripts\/pipeline\/analyze.py",
"content": "\"\"\"\nSemantic analysis for KI-System Pipeline.\nEntity extraction, taxonomy classification, ontology mapping.\n\nBACKWARD COMPATIBILITY WRAPPER - All logic moved to analyzers\/ module.\n\"\"\"\n\n# Re-export all functions from analyzers module for backward compatibility\nfrom analyzers import (\n get_anthropic_client,\n extract_entities_ollama,\n extract_entities_anthropic,\n store_entities,\n find_entity_by_name,\n extract_relations,\n classify_taxonomy,\n analyze_chunk_semantics,\n analyze_chunks_semantics,\n extract_statements,\n store_statements,\n analyze_chunk_statements,\n analyze_document_statements,\n analyze_document,\n link_chunk_entities,\n)\n\n__all__ = [\n \"get_anthropic_client\",\n \"extract_entities_ollama\",\n \"extract_entities_anthropic\",\n \"store_entities\",\n \"find_entity_by_name\",\n \"extract_relations\",\n \"classify_taxonomy\",\n \"analyze_chunk_semantics\",\n \"analyze_chunks_semantics\",\n \"extract_statements\",\n \"store_statements\",\n \"analyze_chunk_statements\",\n \"analyze_document_statements\",\n \"analyze_document\",\n \"link_chunk_entities\",\n]\n\nif __name__ == \"__main__\":\n test_text = \"\"\"\n Carl Rogers entwickelte die klientenzentrierte Gesprächstherapie.\n Diese Methode basiert auf Empathie, Kongruenz und bedingungsloser Wertschätzung.\n Sie wird heute weltweit in der Psychotherapie eingesetzt.\n \"\"\"\n\n print(\"Testing entity extraction with Ollama...\")\n entities = extract_entities_ollama(test_text)\n print(f\"Found {len(entities)} entities:\")\n for e in entities:\n print(f\" - {e['name']} ({e['type']})\")\n",
"structuredPatch": [
{
"oldStart": 1,
"oldLines": 974,
"newStart": 1,
"newLines": 47,
"lines": [
" \"\"\"",
"-Semantic analysis for KI-System Pipeline",
"+Semantic analysis for KI-System Pipeline.",
" Entity extraction, taxonomy classification, ontology mapping.",
"+",
"+BACKWARD COMPATIBILITY WRAPPER - All logic moved to analyzers\/ module.",
" \"\"\"",
" ",
"-import json",
"-import time",
"+# Re-export all functions from analyzers module for backward compatibility",
"+from analyzers import (",
"+ get_anthropic_client,",
"+ extract_entities_ollama,",
"+ extract_entities_anthropic,",
"+ store_entities,",
"+ find_entity_by_name,",
"+ extract_relations,",
"+ classify_taxonomy,",
"+ analyze_chunk_semantics,",
"+ analyze_chunks_semantics,",
"+ extract_statements,",
"+ store_statements,",
"+ analyze_chunk_statements,",
"+ analyze_document_statements,",
"+ analyze_document,",
"+ link_chunk_entities,",
"+)",
" ",
"-import requests",
"+__all__ = [",
"+ \"get_anthropic_client\",",
"+ \"extract_entities_ollama\",",
"+ \"extract_entities_anthropic\",",
"+ \"store_entities\",",
"+ \"find_entity_by_name\",",
"+ \"extract_relations\",",
"+ \"classify_taxonomy\",",
"+ \"analyze_chunk_semantics\",",
"+ \"analyze_chunks_semantics\",",
"+ \"extract_statements\",",
"+ \"store_statements\",",
"+ \"analyze_chunk_statements\",",
"+ \"analyze_document_statements\",",
"+ \"analyze_document\",",
"+ \"link_chunk_entities\",",
"+]",
" ",
"-from config import ANTHROPIC_API_KEY, ANTHROPIC_MODEL, OLLAMA_CHAT_MODEL, OLLAMA_HOST",
"-from db import db",
"-from protokoll import protokoll",
"-",
"-",
"-def get_anthropic_client():",
"- \"\"\"Get Anthropic API client.\"\"\"",
"- try:",
"- import anthropic",
"-",
"- if ANTHROPIC_API_KEY:",
"- db.log(\"INFO\", \"Using Anthropic API (Claude)\")",
"- return anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)",
"- else:",
"- db.log(\"WARNING\", \"No Anthropic API key found, falling back to Ollama\")",
"- except ImportError:",
"- db.log(\"WARNING\", \"anthropic package not installed, falling back to Ollama\")",
"- return None",
"-",
"-",
"-def extract_entities_ollama(text, model=\"gemma3:27b-it-qat\"):",
"- \"\"\"Extract entities using Ollama.\"\"\"",
"- # Load prompt with versioning metadata",
"- prompt_data = db.get_prompt_by_use_case(\"entity_extraction\")",
"- prompt_template = prompt_data[\"content\"] if prompt_data else None",
"-",
"- if not prompt_template:",
"- db.log(\"WARNING\", \"entity_extraction prompt not found in DB, using fallback\")",
"- prompt_template = \"\"\"Analysiere den Text und extrahiere wichtige Entitäten.",
"-Kategorisiere als: PERSON, ORGANIZATION, CONCEPT, LOCATION",
"-Antworte NUR im JSON-Format:",
"-{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"description\": \"...\"}]}",
"-",
"-Text:",
"-{{TEXT}}\"\"\"",
"-",
"- prompt = prompt_template.replace(\"{{TEXT}}\", text[:3000])",
"-",
"- try:",
"- start_time = time.time()",
"- response = requests.post(",
"- f\"{OLLAMA_HOST}\/api\/generate\",",
"- json={\"model\": model, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},",
"- timeout=120,",
"- )",
"- response.raise_for_status()",
"- data = response.json()",
"- duration_ms = int((time.time() - start_time) * 1000)",
"-",
"- # Parse JSON from response",
"- response_text = data.get(\"response\", \"{}\")",
"-",
"- # Log to ki-protokoll",
"- protokoll.log_llm_call(",
"- request=f\"[entity_extraction] {prompt[:500]}...\",",
"- response=response_text[:2000],",
"- model_name=f\"ollama:{model}\",",
"- tokens_input=data.get(\"prompt_eval_count\", 0),",
"- tokens_output=data.get(\"eval_count\", 0),",
"- duration_ms=duration_ms,",
"- status=\"completed\",",
"- )",
"-",
"- try:",
"- entities = json.loads(response_text)",
"- return entities.get(\"entities\", [])",
"- except json.JSONDecodeError:",
"- db.log(\"WARNING\", \"Failed to parse entity JSON from Ollama\")",
"- return []",
"- except Exception as e:",
"- db.log(\"ERROR\", f\"Ollama entity extraction failed: {e}\")",
"- protokoll.log_llm_call(",
"- request=f\"[entity_extraction] {prompt[:500]}...\",",
"- model_name=f\"ollama:{model}\",",
"- status=\"error\",",
"- error_message=str(e),",
"- )",
"- return []",
"-",
"-",
"-def extract_entities_anthropic(text, client):",
"- \"\"\"Extract entities using Anthropic Claude.\"\"\"",
"- # Get prompt with versioning metadata",
"- prompt_data = db.get_prompt_by_use_case(\"entity_extraction\")",
"- prompt_template = prompt_data[\"content\"] if prompt_data else None",
"-",
"- if not prompt_template:",
"- prompt_template = \"\"\"Analysiere den folgenden deutschen Text und extrahiere alle wichtigen Entitäten.",
"-",
"-Kategorisiere jede Entität als:",
"-- PERSON (Namen von Personen)",
"-- ORGANIZATION (Firmen, Institutionen, Gruppen)",
"-- CONCEPT (Fachbegriffe, Methoden, Theorien)",
"-- LOCATION (Orte, Länder)",
"-- DATE (Zeitangaben)",
"-- OTHER (Sonstiges)",
"-",
"-Antworte NUR im JSON-Format:",
"-{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"context\": \"kurzer Kontext der Erwähnung\"}]}",
"-",
"-Text:",
"-{{TEXT}}\"\"\"",
"-",
"- prompt = prompt_template.replace(\"{{TEXT}}\", text[:4000])",
"-",
"- try:",
"- start_time = time.time()",
"- message = client.messages.create(",
"- model=ANTHROPIC_MODEL, max_tokens=2000, messages=[{\"role\": \"user\", \"content\": prompt}]",
"- )",
"- duration_ms = int((time.time() - start_time) * 1000)",
"-",
"- response_text = message.content[0].text",
"-",
"- # Log to ki-protokoll",
"- protokoll.log_llm_call(",
"- request=f\"[entity_extraction] {prompt[:500]}...\",",
"- response=response_text[:2000],",
"- model_name=ANTHROPIC_MODEL,",
"- tokens_input=message.usage.input_tokens,",
"- tokens_output=message.usage.output_tokens,",
"- duration_ms=duration_ms,",
"- status=\"completed\",",
"- )",
"-",
"- # Extract JSON from response",
"- import re",
"-",
"- json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)",
"- if json_match:",
"- entities = json.loads(json_match.group())",
"- return entities.get(\"entities\", [])",
"- return []",
"- except Exception as e:",
"- db.log(\"ERROR\", f\"Anthropic entity extraction failed: {e}\")",
"- protokoll.log_llm_call(",
"- request=f\"[entity_extraction] {prompt[:500]}...\",",
"- model_name=ANTHROPIC_MODEL,",
"- status=\"error\",",
"- error_message=str(e),",
"- )",
"- return []",
"-",
"-",
"-def extract_relations(text, entities, client=None):",
"- \"\"\"Extract relations between entities.\"\"\"",
"- if not entities or len(entities) < 2:",
"- return []",
"-",
"- entity_names = [e[\"name\"] for e in entities[:20]]",
"-",
"- # Load prompt from database",
"- prompt_template = db.get_prompt(\"relation_extraction\")",
"-",
"- if not prompt_template:",
"- db.log(\"WARNING\", \"relation_extraction prompt not found in DB, using fallback\")",
"- prompt_template = \"\"\"Identifiziere Beziehungen zwischen Entitäten.",
"-Entitäten: {{ENTITIES}}",
"-Beziehungstypen: DEVELOPED_BY, RELATED_TO, PART_OF, USED_IN, BASED_ON",
"-Antworte NUR im JSON-Format:",
"-{\"relations\": [{\"source\": \"...\", \"relation\": \"...\", \"target\": \"...\"}]}",
"-",
"-Text:",
"-{{TEXT}}\"\"\"",
"-",
"- prompt = prompt_template.replace(\"{{ENTITIES}}\", \", \".join(entity_names))",
"- prompt = prompt.replace(\"{{TEXT}}\", text[:3000])",
"-",
"- try:",
"- start_time = time.time()",
"- tokens_in, tokens_out = 0, 0",
"- model_name = \"\"",
"-",
"- if client:",
"- message = client.messages.create(",
"- model=ANTHROPIC_MODEL, max_tokens=1000, messages=[{\"role\": \"user\", \"content\": prompt}]",
"- )",
"- response_text = message.content[0].text",
"- tokens_in = message.usage.input_tokens",
"- tokens_out = message.usage.output_tokens",
"- model_name = ANTHROPIC_MODEL",
"- else:",
"- response = requests.post(",
"- f\"{OLLAMA_HOST}\/api\/generate\",",
"- json={\"model\": OLLAMA_CHAT_MODEL, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},",
"- timeout=120,",
"- )",
"- response.raise_for_status()",
"- data = response.json()",
"- response_text = data.get(\"response\", \"{}\")",
"- tokens_in = data.get(\"prompt_eval_count\", 0)",
"- tokens_out = data.get(\"eval_count\", 0)",
"- model_name = f\"ollama:{OLLAMA_CHAT_MODEL}\"",
"-",
"- duration_ms = int((time.time() - start_time) * 1000)",
"-",
"- # Log to ki-protokoll",
"- protokoll.log_llm_call(",
"- request=f\"[relation_extraction] {prompt[:500]}...\",",
"- response=response_text[:2000],",
"- model_name=model_name,",
"- tokens_input=tokens_in,",
"- tokens_output=tokens_out,",
"- duration_ms=duration_ms,",
"- status=\"completed\",",
"- )",
"-",
"- import re",
"-",
"- json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)",
"- if json_match:",
"- data = json.loads(json_match.group())",
"- return data.get(\"relations\", [])",
"- return []",
"- except Exception as e:",
"- db.log(\"ERROR\", f\"Relation extraction failed: {e}\")",
"- protokoll.log_llm_call(",
"- request=f\"[relation_extraction] {prompt[:500]}...\",",
"- model_name=ANTHROPIC_MODEL if client else f\"ollama:{OLLAMA_CHAT_MODEL}\",",
"- status=\"error\",",
"- error_message=str(e),",
"- )",
"- return []",
"-",
"-",
"-def classify_taxonomy(text, client=None):",
"- \"\"\"Classify text into taxonomy categories.\"\"\"",
"- prompt_template = db.get_prompt(\"taxonomy_classification\")",
"-",
"- if not prompt_template:",
"- prompt_template = \"\"\"Klassifiziere den folgenden Text in passende Kategorien.",
"-",
"-Wähle aus diesen Hauptkategorien:",
"-- Methoden (Therapiemethoden, Techniken)",
"-- Theorie (Konzepte, Modelle, Grundlagen)",
"-- Praxis (Anwendung, Fallbeispiele)",
"-- Organisation (Strukturen, Prozesse)",
"-- Kommunikation (Gesprächsführung, Interaktion)",
"-- Entwicklung (Persönliche Entwicklung, Veränderung)",
"-",
"-Antworte NUR im JSON-Format:",
"-{\"categories\": [\"...\", \"...\"], \"confidence\": 0.0-1.0}",
"-",
"-Text:",
"-{{TEXT}}\"\"\"",
"-",
"- prompt = prompt_template.replace(\"{{TEXT}}\", text[:2000])",
"-",
"- try:",
"- start_time = time.time()",
"- tokens_in, tokens_out = 0, 0",
"- model_name = \"\"",
"-",
"- if client:",
"- message = client.messages.create(",
"- model=ANTHROPIC_MODEL, max_tokens=500, messages=[{\"role\": \"user\", \"content\": prompt}]",
"- )",
"- response_text = message.content[0].text",
"- tokens_in = message.usage.input_tokens",
"- tokens_out = message.usage.output_tokens",
"- model_name = ANTHROPIC_MODEL",
"- else:",
"- response = requests.post(",
"- f\"{OLLAMA_HOST}\/api\/generate\",",
"- json={\"model\": OLLAMA_CHAT_MODEL, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},",
"- timeout=60,",
"- )",
"- response.raise_for_status()",
"- data = response.json()",
"- response_text = data.get(\"response\", \"{}\")",
"- tokens_in = data.get(\"prompt_eval_count\", 0)",
"- tokens_out = data.get(\"eval_count\", 0)",
"- model_name = f\"ollama:{OLLAMA_CHAT_MODEL}\"",
"-",
"- duration_ms = int((time.time() - start_time) * 1000)",
"-",
"- # Log to ki-protokoll",
"- protokoll.log_llm_call(",
"- request=f\"[taxonomy_classification] {prompt[:500]}...\",",
"- response=response_text[:2000],",
"- model_name=model_name,",
"- tokens_input=tokens_in,",
"- tokens_output=tokens_out,",
"- duration_ms=duration_ms,",
"- status=\"completed\",",
"- )",
"-",
"- import re",
"-",
"- json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)",
"- if json_match:",
"- return json.loads(json_match.group())",
"- return {\"categories\": [], \"confidence\": 0}",
"- except Exception as e:",
"- db.log(\"ERROR\", f\"Taxonomy classification failed: {e}\")",
"- protokoll.log_llm_call(",
"- request=f\"[taxonomy_classification] {prompt[:500]}...\",",
"- model_name=ANTHROPIC_MODEL if client else f\"ollama:{OLLAMA_CHAT_MODEL}\",",
"- status=\"error\",",
"- error_message=str(e),",
"- )",
"- return {\"categories\": [], \"confidence\": 0}",
"-",
"-",
"-def store_entities(document_id, entities):",
"- \"\"\"Store extracted entities in database.\"\"\"",
"- stored = 0",
"-",
"- for entity in entities:",
"- try:",
"- # Get description from entity",
"- description = entity.get(\"description\") or entity.get(\"context\") or None",
"-",
"- # Check if entity already exists",
"- cursor = db.execute(",
"- \"SELECT id, description FROM entities WHERE name = %s AND type = %s\", (entity[\"name\"], entity[\"type\"])",
"- )",
"- existing = cursor.fetchone()",
"- cursor.close()",
"-",
"- if existing:",
"- entity_id = existing[\"id\"]",
"- # Update description if it was empty and we have one now",
"- if description and not existing[\"description\"]:",
"- cursor = db.execute(\"UPDATE entities SET description = %s WHERE id = %s\", (description, entity_id))",
"- db.commit()",
"- cursor.close()",
"- else:",
"- cursor = db.execute(",
"- \"\"\"INSERT INTO entities (name, type, description, created_at)",
"- VALUES (%s, %s, %s, NOW())\"\"\",",
"- (entity[\"name\"], entity[\"type\"], description),",
"- )",
"- db.commit()",
"- entity_id = cursor.lastrowid",
"- cursor.close()",
"-",
"- # Log provenance for new entity",
"- db.log_provenance(",
"- artifact_type=\"entity\",",
"- artifact_id=entity_id,",
"- source_type=\"extraction\",",
"- source_id=document_id,",
"- pipeline_step=\"entity_extract\",",
"- )",
"-",
"- stored += 1",
"-",
"- except Exception as e:",
"- db.log(\"WARNING\", f\"Failed to store entity: {e}\")",
"-",
"- return stored",
"-",
"-",
"-def analyze_document(document_id, text, use_anthropic=True, progress=None):",
"- \"\"\"",
"- Full semantic analysis of a document.",
"- Extracts entities, relations, and taxonomy classification.",
"- \"\"\"",
"- db.log(\"INFO\", f\"Starting semantic analysis for document {document_id}\")",
"-",
"- if progress:",
"- progress.add_log(\"Analyse: Starte Entity-Extraktion...\")",
"-",
"- client = get_anthropic_client() if use_anthropic else None",
"-",
"- # Extract entities",
"- if client:",
"- entities = extract_entities_anthropic(text, client)",
"- else:",
"- entities = extract_entities_ollama(text)",
"-",
"- db.log(\"INFO\", f\"Extracted {len(entities)} entities\")",
"- if progress:",
"- progress.add_log(f\"Analyse: {len(entities)} Entitäten extrahiert\")",
"-",
"- # Store entities",
"- if entities:",
"- stored = store_entities(document_id, entities)",
"- db.log(\"INFO\", f\"Stored {stored} entities\")",
"- if progress:",
"- progress.add_log(f\"Analyse: {stored} Entitäten gespeichert\")",
"-",
"- # Extract relations",
"- if progress:",
"- progress.add_log(\"Analyse: Extrahiere Relationen...\")",
"- relations = extract_relations(text, entities, client)",
"- db.log(\"INFO\", f\"Extracted {len(relations)} relations\")",
"- if progress:",
"- progress.add_log(f\"Analyse: {len(relations)} Relationen extrahiert\")",
"-",
"- # Store relations",
"- for rel in relations:",
"- try:",
"- # Find entity IDs",
"- cursor = db.execute(\"SELECT id FROM entities WHERE name = %s LIMIT 1\", (rel[\"source\"],))",
"- source = cursor.fetchone()",
"- cursor.close()",
"-",
"- cursor = db.execute(\"SELECT id FROM entities WHERE name = %s LIMIT 1\", (rel[\"target\"],))",
"- target = cursor.fetchone()",
"- cursor.close()",
"-",
"- if source and target:",
"- # Store in entity_relations (raw pipeline data)",
"- cursor = db.execute(",
"- \"\"\"INSERT IGNORE INTO entity_relations",
"- (source_entity_id, target_entity_id, relation_type, created_at)",
"- VALUES (%s, %s, %s, NOW())\"\"\",",
"- (source[\"id\"], target[\"id\"], rel[\"relation\"]),",
"- )",
"- db.commit()",
"- cursor.close()",
"-",
"- # Store in entity_ontology (for semantic explorer)",
"- cursor = db.execute(",
"- \"\"\"INSERT IGNORE INTO entity_ontology",
"- (source_entity_id, target_entity_id, relation_type, direction,",
"- strength, source_type, source_id, created_at)",
"- VALUES (%s, %s, %s, 'unidirectional', 1.0, 'document', %s, NOW())\"\"\",",
"- (source[\"id\"], target[\"id\"], rel[\"relation\"], document_id),",
"- )",
"- db.commit()",
"- cursor.close()",
"- except Exception as e:",
"- db.log(\"WARNING\", f\"Failed to store relation: {e}\")",
"-",
"- # Taxonomy classification",
"- if progress:",
"- progress.add_log(\"Analyse: Klassifiziere Taxonomie...\")",
"- taxonomy = classify_taxonomy(text, client)",
"- db.log(\"INFO\", f\"Classified into {len(taxonomy.get('categories', []))} categories\")",
"- if progress:",
"- progress.add_log(f\"Analyse: {len(taxonomy.get('categories', []))} Kategorien zugewiesen\")",
"-",
"- # Store taxonomy assignments",
"- for category in taxonomy.get(\"categories\", []):",
"- try:",
"- # Strip number prefix like \"1. \" or \"2. \" from category name",
"- import re",
"-",
"- clean_category = re.sub(r\"^\\d+\\.\\s*\", \"\", category).strip()",
"-",
"- cursor = db.execute(\"SELECT id FROM taxonomy_terms WHERE name = %s LIMIT 1\", (clean_category,))",
"- term = cursor.fetchone()",
"- cursor.close()",
"-",
"- if term:",
"- cursor = db.execute(",
"- \"\"\"INSERT IGNORE INTO document_taxonomy",
"- (document_id, taxonomy_term_id, confidence, created_at)",
"- VALUES (%s, %s, %s, NOW())\"\"\",",
"- (document_id, term[\"id\"], taxonomy.get(\"confidence\", 0.5)),",
"- )",
"- db.commit()",
"- cursor.close()",
"- except Exception as e:",
"- db.log(\"WARNING\", f\"Failed to store taxonomy: {e}\")",
"-",
"- # Link entities to chunks",
"- chunk_entity_links = 0",
"- if entities:",
"- chunk_entity_links = link_chunk_entities(document_id)",
"- db.log(\"INFO\", f\"Created {chunk_entity_links} chunk-entity links\")",
"-",
"- # Analyze chunk semantics",
"- chunks_analyzed = analyze_chunks_semantics(document_id, client, progress)",
"- db.log(\"INFO\", f\"Chunk semantics: {chunks_analyzed} chunks analyzed\")",
"-",
"- return {",
"- \"entities\": len(entities),",
"- \"relations\": len(relations) if entities else 0,",
"- \"categories\": taxonomy.get(\"categories\", []),",
"- \"chunk_entity_links\": chunk_entity_links,",
"- \"chunks_semantics\": chunks_analyzed,",
"- }",
"-",
"-",
"-def link_chunk_entities(document_id):",
"- \"\"\"",
"- Link entities to their source chunks.",
"- Scans each chunk for entity mentions and populates chunk_entities.",
"- \"\"\"",
"- # Get all entities (we check which appear in chunks)",
"- cursor = db.execute(\"SELECT id, name, canonical_name FROM entities\")",
"- entities = cursor.fetchall()",
"- cursor.close()",
"-",
"- if not entities:",
"- db.log(\"INFO\", f\"No entities to link for document {document_id}\")",
"- return 0",
"-",
"- # Get all chunks for this document",
"- cursor = db.execute(\"SELECT id, content FROM chunks WHERE document_id = %s\", (document_id,))",
"- chunks = cursor.fetchall()",
"- cursor.close()",
"-",
"- linked = 0",
"- for chunk in chunks:",
"- chunk_text = chunk[\"content\"].lower()",
"-",
"- for entity in entities:",
"- # Check if entity name appears in chunk",
"- name_lower = entity[\"name\"].lower()",
"- canonical = (entity[\"canonical_name\"] or \"\").lower()",
"-",
"- mention_count = chunk_text.count(name_lower)",
"- if canonical and canonical != name_lower:",
"- mention_count += chunk_text.count(canonical)",
"-",
"- if mention_count > 0:",
"- # Calculate relevance score (0-1 based on mentions)",
"- relevance = min(1.0, mention_count * 0.2)",
"-",
"- try:",
"- cursor = db.execute(",
"- \"\"\"INSERT INTO chunk_entities",
"- (chunk_id, entity_id, relevance_score, mention_count)",
"- VALUES (%s, %s, %s, %s)",
"- ON DUPLICATE KEY UPDATE",
"- relevance_score = VALUES(relevance_score),",
"- mention_count = VALUES(mention_count)\"\"\",",
"- (chunk[\"id\"], entity[\"id\"], relevance, mention_count),",
"- )",
"- db.commit()",
"- cursor.close()",
"- linked += 1",
"- except Exception as e:",
"- db.log(\"WARNING\", f\"Failed to link chunk {chunk['id']} to entity {entity['id']}: {e}\")",
"-",
"- db.log(\"INFO\", f\"Linked {linked} chunk-entity pairs for document {document_id}\")",
"- return linked",
"-",
"-",
"-def analyze_chunk_semantics(chunk_id, content, client=None):",
"- \"\"\"",
"- Analyze a single chunk for semantics (summary, keywords, sentiment, topics).",
"- Stores result in chunk_semantics table.",
"- \"\"\"",
"- prompt_template = db.get_prompt(\"chunk_semantics\")",
"-",
"- if not prompt_template:",
"- prompt_template = \"\"\"Analysiere diesen Textabschnitt und extrahiere:",
"-",
"-1. **summary**: Eine kurze Zusammenfassung (1-2 Sätze)",
"-2. **keywords**: 3-5 wichtige Schlüsselwörter",
"-3. **sentiment**: Stimmung (positive, negative, neutral, mixed)",
"-4. **topics**: 2-3 Hauptthemen",
"-",
"-Antworte NUR im JSON-Format:",
"-{\"summary\": \"...\", \"keywords\": [\"...\", \"...\"], \"sentiment\": \"neutral\", \"topics\": [\"...\", \"...\"]}",
"-",
"-Text:",
"-{{TEXT}}\"\"\"",
"-",
"- prompt = prompt_template.replace(\"{{TEXT}}\", content[:2000])",
"-",
"- try:",
"- start_time = time.time()",
"- tokens_in, tokens_out = 0, 0",
"- model_name = \"\"",
"-",
"- if client:",
"- message = client.messages.create(",
"- model=ANTHROPIC_MODEL, max_tokens=500, messages=[{\"role\": \"user\", \"content\": prompt}]",
"- )",
"- response_text = message.content[0].text",
"- tokens_in = message.usage.input_tokens",
"- tokens_out = message.usage.output_tokens",
"- model_name = ANTHROPIC_MODEL",
"- else:",
"- response = requests.post(",
"- f\"{OLLAMA_HOST}\/api\/generate\",",
"- json={\"model\": OLLAMA_CHAT_MODEL, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},",
"- timeout=60,",
"- )",
"- response.raise_for_status()",
"- data = response.json()",
"- response_text = data.get(\"response\", \"{}\")",
"- tokens_in = data.get(\"prompt_eval_count\", 0)",
"- tokens_out = data.get(\"eval_count\", 0)",
"- model_name = f\"ollama:{OLLAMA_CHAT_MODEL}\"",
"-",
"- duration_ms = int((time.time() - start_time) * 1000)",
"-",
"- # Log to ki-protokoll",
"- protokoll.log_llm_call(",
"- request=f\"[chunk_semantics] chunk_id={chunk_id}\",",
"- response=response_text[:1000],",
"- model_name=model_name,",
"- tokens_input=tokens_in,",
"- tokens_output=tokens_out,",
"- duration_ms=duration_ms,",
"- status=\"completed\",",
"- )",
"-",
"- # Parse JSON",
"- import re",
"-",
"- json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)",
"- if json_match:",
"- result = json.loads(json_match.group())",
"-",
"- # Store in chunk_semantics",
"- cursor = db.execute(",
"- \"\"\"INSERT INTO chunk_semantics",
"- (chunk_id, summary, keywords, sentiment, topics, language, analyzed_at, analysis_model)",
"- VALUES (%s, %s, %s, %s, %s, 'de', NOW(), %s)",
"- ON DUPLICATE KEY UPDATE",
"- summary = VALUES(summary), keywords = VALUES(keywords),",
"- sentiment = VALUES(sentiment), topics = VALUES(topics),",
"- analyzed_at = NOW(), analysis_model = VALUES(analysis_model)\"\"\",",
"- (",
"- chunk_id,",
"- result.get(\"summary\", \"\"),",
"- json.dumps(result.get(\"keywords\", []), ensure_ascii=False),",
"- result.get(\"sentiment\", \"neutral\"),",
"- json.dumps(result.get(\"topics\", []), ensure_ascii=False),",
"- model_name,",
"- ),",
"- )",
"- db.commit()",
"- cursor.close()",
"- return result",
"-",
"- return None",
"-",
"- except Exception as e:",
"- db.log(\"ERROR\", f\"Chunk semantics analysis failed: {e}\")",
"- return None",
"-",
"-",
"-def analyze_chunks_semantics(document_id, client=None, progress=None):",
"- \"\"\"Analyze all chunks of a document for semantics.\"\"\"",
"- cursor = db.execute(\"SELECT id, content FROM chunks WHERE document_id = %s ORDER BY chunk_index\", (document_id,))",
"- chunks = cursor.fetchall()",
"- cursor.close()",
"-",
"- total = len(chunks)",
"- db.log(\"INFO\", f\"Analyzing semantics for {total} chunks\")",
"- if progress:",
"- progress.add_log(f\"Semantik: Analysiere {total} Chunks...\")",
"-",
"- analyzed = 0",
"- for i, chunk in enumerate(chunks, 1):",
"- if progress:",
"- progress.add_log(f\"Semantik: Chunk {i}\/{total}...\")",
"-",
"- result = analyze_chunk_semantics(chunk[\"id\"], chunk[\"content\"], client)",
"- if result:",
"- analyzed += 1",
"-",
"- db.log(\"INFO\", f\"Analyzed {analyzed}\/{total} chunks for semantics\")",
"- if progress:",
"- progress.add_log(f\"Semantik: {analyzed}\/{total} Chunks analysiert\")",
"- return analyzed",
"-",
"-",
"-def extract_statements(chunk_id: int, text: str, client=None) -> list[dict]:",
"- \"\"\"",
"- Extract SPO-triplets (Subject-Predicate-Object statements) from text.",
"-",
"- Args:",
"- chunk_id: ID of the chunk being analyzed",
"- text: Text content to extract statements from",
"- client: Optional Anthropic client (falls back to Ollama if None)",
"-",
"- Returns:",
"- List of extracted statements with entity linking",
"- \"\"\"",
"- # Get prompt with versioning metadata",
"- prompt_data = db.get_prompt_by_use_case(\"statement_extraction\")",
"- prompt_template = prompt_data[\"content\"] if prompt_data else None",
"- prompt_id = prompt_data[\"id\"] if prompt_data else None",
"- prompt_version = prompt_data[\"version\"] if prompt_data else None",
"-",
"- if not prompt_template:",
"- db.log(\"WARNING\", \"statement_extraction prompt not found in DB, using fallback\")",
"- prompt_template = \"\"\"Extrahiere alle faktischen Aussagen aus dem Text als SPO-Tripel.",
"-",
"-Regeln:",
"-- Subject: Eine benannte Entität (Person, Organisation, Konzept, Methode)",
"-- Predicate: Die Beziehung oder Eigenschaft (z.B. \"entwickelte\", \"basiert auf\", \"ist Teil von\")",
"-- Object: Eine Entität oder ein Literal-Wert",
"-",
"-Antworte NUR im JSON-Format:",
"-{\"statements\": [",
"- {\"subject\": \"Name der Subject-Entität\", \"predicate\": \"Beziehung\", \"object\": \"Name oder Wert\", \"confidence\": 0.0-1.0}",
"-]}",
"-",
"-Text:",
"-{{TEXT}}\"\"\"",
"-",
"- prompt = prompt_template.replace(\"{{TEXT}}\", text[:3000])",
"-",
"- try:",
"- start_time = time.time()",
"- tokens_in, tokens_out = 0, 0",
"- model_name = \"\"",
"-",
"- if client:",
"- message = client.messages.create(",
"- model=ANTHROPIC_MODEL,",
"- max_tokens=1500,",
"- messages=[{\"role\": \"user\", \"content\": prompt}],",
"- )",
"- response_text = message.content[0].text",
"- tokens_in = message.usage.input_tokens",
"- tokens_out = message.usage.output_tokens",
"- model_name = ANTHROPIC_MODEL",
"- else:",
"- response = requests.post(",
"- f\"{OLLAMA_HOST}\/api\/generate\",",
"- json={",
"- \"model\": OLLAMA_CHAT_MODEL,",
"- \"prompt\": prompt,",
"- \"stream\": False,",
"- \"format\": \"json\",",
"- },",
"- timeout=120,",
"- )",
"- response.raise_for_status()",
"- data = response.json()",
"- response_text = data.get(\"response\", \"{}\")",
"- tokens_in = data.get(\"prompt_eval_count\", 0)",
"- tokens_out = data.get(\"eval_count\", 0)",
"- model_name = f\"ollama:{OLLAMA_CHAT_MODEL}\"",
"-",
"- duration_ms = int((time.time() - start_time) * 1000)",
"-",
"- # Log to ki-protokoll",
"- protokoll.log_llm_call(",
"- request=f\"[statement_extraction] chunk_id={chunk_id}\",",
"- response=response_text[:2000],",
"- model_name=model_name,",
"- tokens_input=tokens_in,",
"- tokens_output=tokens_out,",
"- duration_ms=duration_ms,",
"- status=\"completed\",",
"- )",
"-",
"- # Parse JSON",
"- import re",
"-",
"- json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)",
"- if json_match:",
"- result = json.loads(json_match.group())",
"- return {",
"- \"statements\": result.get(\"statements\", []),",
"- \"prompt_id\": prompt_id,",
"- \"prompt_version\": prompt_version,",
"- \"model_used\": model_name,",
"- }",
"- return {\"statements\": [], \"prompt_id\": prompt_id, \"prompt_version\": prompt_version}",
"-",
"- except Exception as e:",
"- db.log(\"ERROR\", f\"Statement extraction failed for chunk {chunk_id}: {e}\")",
"- protokoll.log_llm_call(",
"- request=f\"[statement_extraction] chunk_id={chunk_id}\",",
"- model_name=ANTHROPIC_MODEL if client else f\"ollama:{OLLAMA_CHAT_MODEL}\",",
"- status=\"error\",",
"- error_message=str(e),",
"- )",
"- return {\"statements\": [], \"prompt_id\": prompt_id, \"prompt_version\": prompt_version}",
"-",
"-",
"-def find_entity_by_name(name: str) -> dict | None:",
"- \"\"\"",
"- Find entity by name or canonical_name.",
"-",
"- Args:",
"- name: Entity name to search for",
"-",
"- Returns:",
"- Entity dict with id, name, canonical_name or None",
"- \"\"\"",
"- name_lower = name.lower().strip()",
"- canonical = name_lower.replace(\" \", \"\").replace(\"-\", \"\")",
"-",
"- # Try exact canonical match first",
"- cursor = db.execute(",
"- \"SELECT id, name, canonical_name FROM entities WHERE canonical_name = %s LIMIT 1\",",
"- (canonical,),",
"- )",
"- result = cursor.fetchone()",
"- cursor.close()",
"- if result:",
"- return result",
"-",
"- # Try name match (case-insensitive)",
"- cursor = db.execute(",
"- \"SELECT id, name, canonical_name FROM entities WHERE LOWER(name) = %s LIMIT 1\",",
"- (name_lower,),",
"- )",
"- result = cursor.fetchone()",
"- cursor.close()",
"- if result:",
"- return result",
"-",
"- # Try partial canonical match",
"- cursor = db.execute(",
"- \"SELECT id, name, canonical_name FROM entities WHERE canonical_name LIKE %s LIMIT 1\",",
"- (f\"%{canonical}%\",),",
"- )",
"- result = cursor.fetchone()",
"- cursor.close()",
"- return result",
"-",
"-",
"-def store_statements(",
"- chunk_id: int,",
"- statements: list[dict],",
"- prompt_version: str = None,",
"- model_used: str = None,",
"-) -> int:",
"- \"\"\"",
"- Store extracted statements in the database with entity linking.",
"-",
"- Args:",
"- chunk_id: ID of the source chunk",
"- statements: List of statement dicts with subject, predicate, object, confidence",
"- prompt_version: Version of the prompt used for extraction",
"- model_used: Model used for extraction",
"-",
"- Returns:",
"- Number of successfully stored statements",
"- \"\"\"",
"- stored = 0",
"-",
"- for stmt in statements:",
"- try:",
"- subject_name = stmt.get(\"subject\", \"\").strip()",
"- predicate = stmt.get(\"predicate\", \"\").strip()",
"- object_value = stmt.get(\"object\", \"\").strip()",
"- confidence = float(stmt.get(\"confidence\", 0.8))",
"-",
"- if not subject_name or not predicate:",
"- continue",
"-",
"- # Find subject entity",
"- subject_entity = find_entity_by_name(subject_name)",
"- if not subject_entity:",
"- db.log(\"DEBUG\", f\"Subject entity not found: {subject_name}\")",
"- continue # Subject must be a known entity",
"-",
"- subject_entity_id = subject_entity[\"id\"]",
"-",
"- # Try to find object entity, otherwise use as literal",
"- object_entity_id = None",
"- object_literal = None",
"-",
"- if object_value:",
"- object_entity = find_entity_by_name(object_value)",
"- if object_entity:",
"- object_entity_id = object_entity[\"id\"]",
"- else:",
"- object_literal = object_value",
"-",
"- # Insert statement",
"- cursor = db.execute(",
"- \"\"\"INSERT INTO statements",
"- (subject_entity_id, predicate, object_entity_id, object_literal,",
"- chunk_id, confidence, status, created_at)",
"- VALUES (%s, %s, %s, %s, %s, %s, 'extracted', NOW())\"\"\",",
"- (",
"- subject_entity_id,",
"- predicate[:255], # Truncate to VARCHAR(255)",
"- object_entity_id,",
"- object_literal,",
"- chunk_id,",
"- confidence,",
"- ),",
"- )",
"- db.commit()",
"- statement_id = cursor.lastrowid",
"- cursor.close()",
"-",
"- # Log provenance for statement",
"- db.log_provenance(",
"- artifact_type=\"statement\",",
"- artifact_id=statement_id,",
"- source_type=\"extraction\",",
"- source_id=chunk_id,",
"- pipeline_step=\"statement_extract\",",
"- model_used=model_used,",
"- prompt_version=prompt_version,",
"- )",
"-",
"- stored += 1",
"-",
"- except Exception as e:",
"- db.log(\"WARNING\", f\"Failed to store statement: {e}\")",
"-",
"- return stored",
"-",
"-",
"-def analyze_chunk_statements(chunk_id: int, content: str, client=None) -> int:",
"- \"\"\"",
"- Extract and store statements for a single chunk.",
"-",
"- Args:",
"- chunk_id: ID of the chunk",
"- content: Text content of the chunk",
"- client: Optional Anthropic client",
"-",
"- Returns:",
"- Number of statements stored",
"- \"\"\"",
"- result = extract_statements(chunk_id, content, client)",
"- statements = result.get(\"statements\", [])",
"- if statements:",
"- stored = store_statements(",
"- chunk_id,",
"- statements,",
"- prompt_version=result.get(\"prompt_version\"),",
"- model_used=result.get(\"model_used\"),",
"- )",
"- db.log(\"INFO\", f\"Chunk {chunk_id}: {stored}\/{len(statements)} statements stored\")",
"- return stored",
"- return 0",
"-",
"-",
"-def analyze_document_statements(document_id: int, client=None, progress=None) -> int:",
"- \"\"\"",
"- Extract statements from all chunks of a document.",
"-",
"- Args:",
"- document_id: ID of the document",
"- client: Optional Anthropic client",
"- progress: Optional PipelineProgress for logging",
"-",
"- Returns:",
"- Total number of statements stored",
"- \"\"\"",
"- cursor = db.execute(",
"- \"SELECT id, content FROM chunks WHERE document_id = %s ORDER BY chunk_index\",",
"- (document_id,),",
"- )",
"- chunks = cursor.fetchall()",
"- cursor.close()",
"-",
"- total = len(chunks)",
"- db.log(\"INFO\", f\"Extracting statements from {total} chunks\")",
"- if progress:",
"- progress.add_log(f\"Statements: Extrahiere aus {total} Chunks...\")",
"-",
"- total_stored = 0",
"- for i, chunk in enumerate(chunks, 1):",
"- if progress:",
"- progress.add_log(f\"Statements: Chunk {i}\/{total}...\")",
"-",
"- stored = analyze_chunk_statements(chunk[\"id\"], chunk[\"content\"], client)",
"- total_stored += stored",
"-",
"- db.log(\"INFO\", f\"Total statements stored: {total_stored}\")",
"- if progress:",
"- progress.add_log(f\"Statements: {total_stored} Aussagen extrahiert\")",
"-",
"- return total_stored",
"-",
"-",
" if __name__ == \"__main__\":",
" test_text = \"\"\"",
" Carl Rogers entwickelte die klientenzentrierte Gesprächstherapie."
]
}
],
"originalFile": "\"\"\"\nSemantic analysis for KI-System Pipeline\nEntity extraction, taxonomy classification, ontology mapping.\n\"\"\"\n\nimport json\nimport time\n\nimport requests\n\nfrom config import ANTHROPIC_API_KEY, ANTHROPIC_MODEL, OLLAMA_CHAT_MODEL, OLLAMA_HOST\nfrom db import db\nfrom protokoll import protokoll\n\n\ndef get_anthropic_client():\n \"\"\"Get Anthropic API client.\"\"\"\n try:\n import anthropic\n\n if ANTHROPIC_API_KEY:\n db.log(\"INFO\", \"Using Anthropic API (Claude)\")\n return anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)\n else:\n db.log(\"WARNING\", \"No Anthropic API key found, falling back to Ollama\")\n except ImportError:\n db.log(\"WARNING\", \"anthropic package not installed, falling back to Ollama\")\n return None\n\n\ndef extract_entities_ollama(text, model=\"gemma3:27b-it-qat\"):\n \"\"\"Extract entities using Ollama.\"\"\"\n # Load prompt with versioning metadata\n prompt_data = db.get_prompt_by_use_case(\"entity_extraction\")\n prompt_template = prompt_data[\"content\"] if prompt_data else None\n\n if not prompt_template:\n db.log(\"WARNING\", \"entity_extraction prompt not found in DB, using fallback\")\n prompt_template = \"\"\"Analysiere den Text und extrahiere wichtige Entitäten.\nKategorisiere als: PERSON, ORGANIZATION, CONCEPT, LOCATION\nAntworte NUR im JSON-Format:\n{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"description\": \"...\"}]}\n\nText:\n{{TEXT}}\"\"\"\n\n prompt = prompt_template.replace(\"{{TEXT}}\", text[:3000])\n\n try:\n start_time = time.time()\n response = requests.post(\n f\"{OLLAMA_HOST}\/api\/generate\",\n json={\"model\": model, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n timeout=120,\n )\n response.raise_for_status()\n data = response.json()\n duration_ms = int((time.time() - start_time) * 1000)\n\n # Parse JSON from response\n response_text = data.get(\"response\", \"{}\")\n\n # Log to ki-protokoll\n protokoll.log_llm_call(\n request=f\"[entity_extraction] {prompt[:500]}...\",\n response=response_text[:2000],\n model_name=f\"ollama:{model}\",\n tokens_input=data.get(\"prompt_eval_count\", 0),\n tokens_output=data.get(\"eval_count\", 0),\n duration_ms=duration_ms,\n status=\"completed\",\n )\n\n try:\n entities = json.loads(response_text)\n return entities.get(\"entities\", [])\n except json.JSONDecodeError:\n db.log(\"WARNING\", \"Failed to parse entity JSON from Ollama\")\n return []\n except Exception as e:\n db.log(\"ERROR\", f\"Ollama entity extraction failed: {e}\")\n protokoll.log_llm_call(\n request=f\"[entity_extraction] {prompt[:500]}...\",\n model_name=f\"ollama:{model}\",\n status=\"error\",\n error_message=str(e),\n )\n return []\n\n\ndef extract_entities_anthropic(text, client):\n \"\"\"Extract entities using Anthropic Claude.\"\"\"\n # Get prompt with versioning metadata\n prompt_data = db.get_prompt_by_use_case(\"entity_extraction\")\n prompt_template = prompt_data[\"content\"] if prompt_data else None\n\n if not prompt_template:\n prompt_template = \"\"\"Analysiere den folgenden deutschen Text und extrahiere alle wichtigen Entitäten.\n\nKategorisiere jede Entität als:\n- PERSON (Namen von Personen)\n- ORGANIZATION (Firmen, Institutionen, Gruppen)\n- CONCEPT (Fachbegriffe, Methoden, Theorien)\n- LOCATION (Orte, Länder)\n- DATE (Zeitangaben)\n- OTHER (Sonstiges)\n\nAntworte NUR im JSON-Format:\n{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"context\": \"kurzer Kontext der Erwähnung\"}]}\n\nText:\n{{TEXT}}\"\"\"\n\n prompt = prompt_template.replace(\"{{TEXT}}\", text[:4000])\n\n try:\n start_time = time.time()\n message = client.messages.create(\n model=ANTHROPIC_MODEL, max_tokens=2000, messages=[{\"role\": \"user\", \"content\": prompt}]\n )\n duration_ms = int((time.time() - start_time) * 1000)\n\n response_text = message.content[0].text\n\n # Log to ki-protokoll\n protokoll.log_llm_call(\n request=f\"[entity_extraction] {prompt[:500]}...\",\n response=response_text[:2000],\n model_name=ANTHROPIC_MODEL,\n tokens_input=message.usage.input_tokens,\n tokens_output=message.usage.output_tokens,\n duration_ms=duration_ms,\n status=\"completed\",\n )\n\n # Extract JSON from response\n import re\n\n json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)\n if json_match:\n entities = json.loads(json_match.group())\n return entities.get(\"entities\", [])\n return []\n except Exception as e:\n db.log(\"ERROR\", f\"Anthropic entity extraction failed: {e}\")\n protokoll.log_llm_call(\n request=f\"[entity_extraction] {prompt[:500]}...\",\n model_name=ANTHROPIC_MODEL,\n status=\"error\",\n error_message=str(e),\n )\n return []\n\n\ndef extract_relations(text, entities, client=None):\n \"\"\"Extract relations between entities.\"\"\"\n if not entities or len(entities) < 2:\n return []\n\n entity_names = [e[\"name\"] for e in entities[:20]]\n\n # Load prompt from database\n prompt_template = db.get_prompt(\"relation_extraction\")\n\n if not prompt_template:\n db.log(\"WARNING\", \"relation_extraction prompt not found in DB, using fallback\")\n prompt_template = \"\"\"Identifiziere Beziehungen zwischen Entitäten.\nEntitäten: {{ENTITIES}}\nBeziehungstypen: DEVELOPED_BY, RELATED_TO, PART_OF, USED_IN, BASED_ON\nAntworte NUR im JSON-Format:\n{\"relations\": [{\"source\": \"...\", \"relation\": \"...\", \"target\": \"...\"}]}\n\nText:\n{{TEXT}}\"\"\"\n\n prompt = prompt_template.replace(\"{{ENTITIES}}\", \", \".join(entity_names))\n prompt = prompt.replace(\"{{TEXT}}\", text[:3000])\n\n try:\n start_time = time.time()\n tokens_in, tokens_out = 0, 0\n model_name = \"\"\n\n if client:\n message = client.messages.create(\n model=ANTHROPIC_MODEL, max_tokens=1000, messages=[{\"role\": \"user\", \"content\": prompt}]\n )\n response_text = message.content[0].text\n tokens_in = message.usage.input_tokens\n tokens_out = message.usage.output_tokens\n model_name = ANTHROPIC_MODEL\n else:\n response = requests.post(\n f\"{OLLAMA_HOST}\/api\/generate\",\n json={\"model\": OLLAMA_CHAT_MODEL, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n timeout=120,\n )\n response.raise_for_status()\n data = response.json()\n response_text = data.get(\"response\", \"{}\")\n tokens_in = data.get(\"prompt_eval_count\", 0)\n tokens_out = data.get(\"eval_count\", 0)\n model_name = f\"ollama:{OLLAMA_CHAT_MODEL}\"\n\n duration_ms = int((time.time() - start_time) * 1000)\n\n # Log to ki-protokoll\n protokoll.log_llm_call(\n request=f\"[relation_extraction] {prompt[:500]}...\",\n response=response_text[:2000],\n model_name=model_name,\n tokens_input=tokens_in,\n tokens_output=tokens_out,\n duration_ms=duration_ms,\n status=\"completed\",\n )\n\n import re\n\n json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)\n if json_match:\n data = json.loads(json_match.group())\n return data.get(\"relations\", [])\n return []\n except Exception as e:\n db.log(\"ERROR\", f\"Relation extraction failed: {e}\")\n protokoll.log_llm_call(\n request=f\"[relation_extraction] {prompt[:500]}...\",\n model_name=ANTHROPIC_MODEL if client else f\"ollama:{OLLAMA_CHAT_MODEL}\",\n status=\"error\",\n error_message=str(e),\n )\n return []\n\n\ndef classify_taxonomy(text, client=None):\n \"\"\"Classify text into taxonomy categories.\"\"\"\n prompt_template = db.get_prompt(\"taxonomy_classification\")\n\n if not prompt_template:\n prompt_template = \"\"\"Klassifiziere den folgenden Text in passende Kategorien.\n\nWähle aus diesen Hauptkategorien:\n- Methoden (Therapiemethoden, Techniken)\n- Theorie (Konzepte, Modelle, Grundlagen)\n- Praxis (Anwendung, Fallbeispiele)\n- Organisation (Strukturen, Prozesse)\n- Kommunikation (Gesprächsführung, Interaktion)\n- Entwicklung (Persönliche Entwicklung, Veränderung)\n\nAntworte NUR im JSON-Format:\n{\"categories\": [\"...\", \"...\"], \"confidence\": 0.0-1.0}\n\nText:\n{{TEXT}}\"\"\"\n\n prompt = prompt_template.replace(\"{{TEXT}}\", text[:2000])\n\n try:\n start_time = time.time()\n tokens_in, tokens_out = 0, 0\n model_name = \"\"\n\n if client:\n message = client.messages.create(\n model=ANTHROPIC_MODEL, max_tokens=500, messages=[{\"role\": \"user\", \"content\": prompt}]\n )\n response_text = message.content[0].text\n tokens_in = message.usage.input_tokens\n tokens_out = message.usage.output_tokens\n model_name = ANTHROPIC_MODEL\n else:\n response = requests.post(\n f\"{OLLAMA_HOST}\/api\/generate\",\n json={\"model\": OLLAMA_CHAT_MODEL, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n timeout=60,\n )\n response.raise_for_status()\n data = response.json()\n response_text = data.get(\"response\", \"{}\")\n tokens_in = data.get(\"prompt_eval_count\", 0)\n tokens_out = data.get(\"eval_count\", 0)\n model_name = f\"ollama:{OLLAMA_CHAT_MODEL}\"\n\n duration_ms = int((time.time() - start_time) * 1000)\n\n # Log to ki-protokoll\n protokoll.log_llm_call(\n request=f\"[taxonomy_classification] {prompt[:500]}...\",\n response=response_text[:2000],\n model_name=model_name,\n tokens_input=tokens_in,\n tokens_output=tokens_out,\n ... [TRUNCATED-47f636327abf188d]"
}
}