{
"tool_response": {
"filePath": "\/opt\/scripts\/pipeline\/analyze.py",
"oldString": "def store_entities(document_id, entities):\n \"\"\"Store extracted entities in database.\"\"\"\n stored = 0\n\n for entity in entities:\n try:\n # Check if entity already exists\n cursor = db.execute(\n \"SELECT id FROM entities WHERE name = %s AND type = %s\", (entity[\"name\"], entity[\"type\"])\n )\n existing = cursor.fetchone()\n cursor.close()\n\n if existing:\n entity_id = existing[\"id\"]\n else:\n cursor = db.execute(\n \"\"\"INSERT INTO entities (name, type, created_at)\n VALUES (%s, %s, NOW())\"\"\",\n (entity[\"name\"], entity[\"type\"]),\n )\n db.commit()\n entity_id = cursor.lastrowid\n cursor.close()",
"newString": "def store_entities(document_id, entities):\n \"\"\"Store extracted entities in database.\"\"\"\n stored = 0\n\n for entity in entities:\n try:\n # Get description from entity\n description = entity.get(\"description\") or entity.get(\"context\") or None\n\n # Check if entity already exists\n cursor = db.execute(\n \"SELECT id, description FROM entities WHERE name = %s AND type = %s\", (entity[\"name\"], entity[\"type\"])\n )\n existing = cursor.fetchone()\n cursor.close()\n\n if existing:\n entity_id = existing[\"id\"]\n # Update description if it was empty and we have one now\n if description and not existing[\"description\"]:\n cursor = db.execute(\n \"UPDATE entities SET description = %s WHERE id = %s\",\n (description, entity_id)\n )\n db.commit()\n cursor.close()\n else:\n cursor = db.execute(\n \"\"\"INSERT INTO entities (name, type, description, created_at)\n VALUES (%s, %s, %s, NOW())\"\"\",\n (entity[\"name\"], entity[\"type\"], description),\n )\n db.commit()\n entity_id = cursor.lastrowid\n cursor.close()",
"originalFile": "\"\"\"\nSemantic analysis for KI-System Pipeline\nEntity extraction, taxonomy classification, ontology mapping.\n\"\"\"\n\nimport json\nimport os\n\nimport requests\n\nfrom config import ANTHROPIC_MODEL, OLLAMA_CHAT_MODEL, OLLAMA_HOST\nfrom db import db\n\n\ndef get_anthropic_client():\n \"\"\"Get Anthropic API client.\"\"\"\n try:\n import anthropic\n\n api_key = os.environ.get(\"ANTHROPIC_API_KEY\", \"\")\n if not api_key:\n # Try reading from credentials\n cred_file = \"\/var\/www\/docs\/credentials\/credentials.md\"\n if os.path.exists(cred_file):\n with open(cred_file) as f:\n content = f.read()\n # Parse API key from markdown\n for line in content.split(\"\\n\"):\n if \"ANTHROPIC_API_KEY\" in line:\n parts = line.split(\"`\")\n if len(parts) >= 2:\n api_key = parts[1]\n break\n if api_key:\n return anthropic.Anthropic(api_key=api_key)\n except ImportError:\n pass\n return None\n\n\ndef extract_entities_ollama(text, model=\"gemma3:27b-it-qat\"):\n \"\"\"Extract entities using Ollama.\"\"\"\n # Load prompt from database\n prompt_template = db.get_prompt(\"entity_extraction\")\n\n if not prompt_template:\n db.log(\"WARNING\", \"entity_extraction prompt not found in DB, using fallback\")\n prompt_template = \"\"\"Analysiere den Text und extrahiere wichtige Entitäten.\nKategorisiere als: PERSON, ORGANIZATION, CONCEPT, LOCATION\nAntworte NUR im JSON-Format:\n{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"description\": \"...\"}]}\n\nText:\n{{TEXT}}\"\"\"\n\n prompt = prompt_template.replace(\"{{TEXT}}\", text[:3000])\n\n try:\n response = requests.post(\n f\"{OLLAMA_HOST}\/api\/generate\",\n json={\"model\": model, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n timeout=120,\n )\n response.raise_for_status()\n data = response.json()\n\n # Parse JSON from response\n response_text = data.get(\"response\", \"{}\")\n try:\n entities = json.loads(response_text)\n return entities.get(\"entities\", [])\n except json.JSONDecodeError:\n db.log(\"WARNING\", \"Failed to parse entity JSON from Ollama\")\n return []\n except Exception as e:\n db.log(\"ERROR\", f\"Ollama entity extraction failed: {e}\")\n return []\n\n\ndef extract_entities_anthropic(text, client):\n \"\"\"Extract entities using Anthropic Claude.\"\"\"\n # Get prompt from database\n prompt_template = db.get_prompt(\"entity_extraction\")\n\n if not prompt_template:\n prompt_template = \"\"\"Analysiere den folgenden deutschen Text und extrahiere alle wichtigen Entitäten.\n\nKategorisiere jede Entität als:\n- PERSON (Namen von Personen)\n- ORGANIZATION (Firmen, Institutionen, Gruppen)\n- CONCEPT (Fachbegriffe, Methoden, Theorien)\n- LOCATION (Orte, Länder)\n- DATE (Zeitangaben)\n- OTHER (Sonstiges)\n\nAntworte NUR im JSON-Format:\n{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"context\": \"kurzer Kontext der Erwähnung\"}]}\n\nText:\n{{TEXT}}\"\"\"\n\n prompt = prompt_template.replace(\"{{TEXT}}\", text[:4000])\n\n try:\n message = client.messages.create(\n model=ANTHROPIC_MODEL, max_tokens=2000, messages=[{\"role\": \"user\", \"content\": prompt}]\n )\n\n response_text = message.content[0].text\n\n # Extract JSON from response\n import re\n\n json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)\n if json_match:\n entities = json.loads(json_match.group())\n return entities.get(\"entities\", [])\n return []\n except Exception as e:\n db.log(\"ERROR\", f\"Anthropic entity extraction failed: {e}\")\n return []\n\n\ndef extract_relations(text, entities, client=None):\n \"\"\"Extract relations between entities.\"\"\"\n if not entities or len(entities) < 2:\n return []\n\n entity_names = [e[\"name\"] for e in entities[:20]]\n\n # Load prompt from database\n prompt_template = db.get_prompt(\"relation_extraction\")\n\n if not prompt_template:\n db.log(\"WARNING\", \"relation_extraction prompt not found in DB, using fallback\")\n prompt_template = \"\"\"Identifiziere Beziehungen zwischen Entitäten.\nEntitäten: {{ENTITIES}}\nBeziehungstypen: DEVELOPED_BY, RELATED_TO, PART_OF, USED_IN, BASED_ON\nAntworte NUR im JSON-Format:\n{\"relations\": [{\"source\": \"...\", \"relation\": \"...\", \"target\": \"...\"}]}\n\nText:\n{{TEXT}}\"\"\"\n\n prompt = prompt_template.replace(\"{{ENTITIES}}\", \", \".join(entity_names))\n prompt = prompt.replace(\"{{TEXT}}\", text[:3000])\n\n try:\n if client:\n message = client.messages.create(\n model=ANTHROPIC_MODEL, max_tokens=1000, messages=[{\"role\": \"user\", \"content\": prompt}]\n )\n response_text = message.content[0].text\n else:\n response = requests.post(\n f\"{OLLAMA_HOST}\/api\/generate\",\n json={\"model\": OLLAMA_CHAT_MODEL, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n timeout=120,\n )\n response.raise_for_status()\n response_text = response.json().get(\"response\", \"{}\")\n\n import re\n\n json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)\n if json_match:\n data = json.loads(json_match.group())\n return data.get(\"relations\", [])\n return []\n except Exception as e:\n db.log(\"ERROR\", f\"Relation extraction failed: {e}\")\n return []\n\n\ndef classify_taxonomy(text, client=None):\n \"\"\"Classify text into taxonomy categories.\"\"\"\n prompt_template = db.get_prompt(\"taxonomy_classification\")\n\n if not prompt_template:\n prompt_template = \"\"\"Klassifiziere den folgenden Text in passende Kategorien.\n\nWähle aus diesen Hauptkategorien:\n- Methoden (Therapiemethoden, Techniken)\n- Theorie (Konzepte, Modelle, Grundlagen)\n- Praxis (Anwendung, Fallbeispiele)\n- Organisation (Strukturen, Prozesse)\n- Kommunikation (Gesprächsführung, Interaktion)\n- Entwicklung (Persönliche Entwicklung, Veränderung)\n\nAntworte NUR im JSON-Format:\n{\"categories\": [\"...\", \"...\"], \"confidence\": 0.0-1.0}\n\nText:\n{{TEXT}}\"\"\"\n\n prompt = prompt_template.replace(\"{{TEXT}}\", text[:2000])\n\n try:\n if client:\n message = client.messages.create(\n model=ANTHROPIC_MODEL, max_tokens=500, messages=[{\"role\": \"user\", \"content\": prompt}]\n )\n response_text = message.content[0].text\n else:\n response = requests.post(\n f\"{OLLAMA_HOST}\/api\/generate\",\n json={\"model\": OLLAMA_CHAT_MODEL, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n timeout=60,\n )\n response.raise_for_status()\n response_text = response.json().get(\"response\", \"{}\")\n\n import re\n\n json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)\n if json_match:\n return json.loads(json_match.group())\n return {\"categories\": [], \"confidence\": 0}\n except Exception as e:\n db.log(\"ERROR\", f\"Taxonomy classification failed: {e}\")\n return {\"categories\": [], \"confidence\": 0}\n\n\ndef store_entities(document_id, entities):\n \"\"\"Store extracted entities in database.\"\"\"\n stored = 0\n\n for entity in entities:\n try:\n # Check if entity already exists\n cursor = db.execute(\n \"SELECT id FROM entities WHERE name = %s AND type = %s\", (entity[\"name\"], entity[\"type\"])\n )\n existing = cursor.fetchone()\n cursor.close()\n\n if existing:\n entity_id = existing[\"id\"]\n else:\n cursor = db.execute(\n \"\"\"INSERT INTO entities (name, type, created_at)\n VALUES (%s, %s, NOW())\"\"\",\n (entity[\"name\"], entity[\"type\"]),\n )\n db.commit()\n entity_id = cursor.lastrowid\n cursor.close()\n\n # Link to document\n cursor = db.execute(\n \"\"\"INSERT IGNORE INTO document_entities\n (document_id, entity_id, context, created_at)\n VALUES (%s, %s, %s, NOW())\"\"\",\n (document_id, entity_id, entity.get(\"context\", \"\")),\n )\n db.commit()\n cursor.close()\n stored += 1\n\n except Exception as e:\n db.log(\"WARNING\", f\"Failed to store entity: {e}\")\n\n return stored\n\n\ndef analyze_document(document_id, text, use_anthropic=True):\n \"\"\"\n Full semantic analysis of a document.\n Extracts entities, relations, and taxonomy classification.\n \"\"\"\n db.log(\"INFO\", f\"Starting semantic analysis for document {document_id}\")\n\n client = get_anthropic_client() if use_anthropic else None\n\n # Extract entities\n if client:\n entities = extract_entities_anthropic(text, client)\n else:\n entities = extract_entities_ollama(text)\n\n db.log(\"INFO\", f\"Extracted {len(entities)} entities\")\n\n # Store entities\n if entities:\n stored = store_entities(document_id, entities)\n db.log(\"INFO\", f\"Stored {stored} entities\")\n\n # Extract relations\n relations = extract_relations(text, entities, client)\n db.log(\"INFO\", f\"Extracted {len(relations)} relations\")\n\n # Store relations\n for rel in relations:\n try:\n # Find entity IDs\n cursor = db.execute(\"SELECT id FROM entities WHERE name = %s LIMIT 1\", (rel[\"source\"],))\n source = cursor.fetchone()\n cursor.close()\n\n cursor = db.execute(\"SELECT id FROM entities WHERE name = %s LIMIT 1\", (rel[\"target\"],))\n target = cursor.fetchone()\n cursor.close()\n\n if source and target:\n cursor = db.execute(\n ... [TRUNCATED-200590d2e9231851]",
"structuredPatch": [
{
"oldStart": 227,
"oldLines": 20,
"newStart": 227,
"newLines": 31,
"lines": [
" ",
" for entity in entities:",
" try:",
"+ # Get description from entity",
"+ description = entity.get(\"description\") or entity.get(\"context\") or None",
"+",
" # Check if entity already exists",
" cursor = db.execute(",
"- \"SELECT id FROM entities WHERE name = %s AND type = %s\", (entity[\"name\"], entity[\"type\"])",
"+ \"SELECT id, description FROM entities WHERE name = %s AND type = %s\", (entity[\"name\"], entity[\"type\"])",
" )",
" existing = cursor.fetchone()",
" cursor.close()",
" ",
" if existing:",
" entity_id = existing[\"id\"]",
"+ # Update description if it was empty and we have one now",
"+ if description and not existing[\"description\"]:",
"+ cursor = db.execute(",
"+ \"UPDATE entities SET description = %s WHERE id = %s\",",
"+ (description, entity_id)",
"+ )",
"+ db.commit()",
"+ cursor.close()",
" else:",
" cursor = db.execute(",
"- \"\"\"INSERT INTO entities (name, type, created_at)",
"- VALUES (%s, %s, NOW())\"\"\",",
"- (entity[\"name\"], entity[\"type\"]),",
"+ \"\"\"INSERT INTO entities (name, type, description, created_at)",
"+ VALUES (%s, %s, %s, NOW())\"\"\",",
"+ (entity[\"name\"], entity[\"type\"], description),",
" )",
" db.commit()",
" entity_id = cursor.lastrowid"
]
}
],
"userModified": false,
"replaceAll": false
}
}