{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/knowledge\/semantic_extractor.py",
"content": "\"\"\"Semantik-Extraktion für Wissensextraktion.\"\"\"\nimport json\n\nimport sys\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom db import db\n\nfrom .models import KnowledgeLevel, KnowledgeType\nfrom .llm_service import LLMService\n\n\nclass SemanticExtractor:\n \"\"\"Extrahiert Semantik (Bedeutung\/Definition) für Entitäten.\"\"\"\n\n def __init__(self, llm_service: LLMService, store_knowledge_fn):\n \"\"\"Initialisiere mit LLM-Service und Storage-Funktion.\"\"\"\n self.llm = llm_service\n self.store_knowledge = store_knowledge_fn\n\n def extract_semantics(\n self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int\n ) -> list[dict]:\n \"\"\"\n Extrahiere Semantik (Bedeutung\/Definition) für Entitäten.\n\n Args:\n entities: Liste der extrahierten Entitäten\n text: Ursprungstext für Kontext\n level: Ebene\n source_id: Quell-ID\n\n Returns:\n Liste von Semantik-Einträgen\n \"\"\"\n if not entities:\n return []\n\n entity_names = [e[\"name\"] for e in entities[:15]]\n\n prompt = f\"\"\"Für die folgenden Entitäten aus dem Text, extrahiere die Bedeutung\/Definition.\n\nEntitäten: {\", \".join(entity_names)}\n\nFür jede Entität gib an:\n- definition: Kurze Definition basierend auf dem Text\n- context: In welchem Kontext wird sie verwendet\n- references: Bezüge zu anderen Konzepten (falls erkennbar)\n\nAntworte NUR als JSON:\n{{\"semantics\": [\n {{\"entity\": \"...\", \"definition\": \"...\", \"context\": \"...\", \"references\": [\"...\"]}}\n]}}\n\nText:\n{text[:3000]}\"\"\"\n\n result = self.llm.call_llm(prompt)\n data = self.llm.parse_json(result)\n semantics = data.get(\"semantics\", [])\n\n # Speichere Semantik\n stored = []\n for sem in semantics:\n entity_name = sem.get(\"entity\", \"\")\n entity_match = next((e for e in entities if e[\"name\"].lower() == entity_name.lower()), None)\n if entity_match:\n stored_sem = self._store_semantic(\n entity_id=entity_match[\"id\"],\n definition=sem.get(\"definition\", \"\"),\n context=sem.get(\"context\", \"\"),\n references=sem.get(\"references\", []),\n level=level,\n source_id=source_id,\n )\n if stored_sem:\n stored.append(stored_sem)\n\n # Speichere in Knowledge-Tabelle\n self.store_knowledge(\n level,\n source_id,\n KnowledgeType.SEMANTIC,\n {\"definitions\": len(stored), \"entities\": [s[\"entity_name\"] for s in stored]},\n )\n\n return stored\n\n def _store_semantic(\n self, entity_id: int, definition: str, context: str, references: list,\n level: KnowledgeLevel, source_id: int\n ) -> dict | None:\n \"\"\"Speichere Semantik-Eintrag mit Abgleich.\"\"\"\n try:\n # Prüfe ob bereits Semantik existiert\n cursor = db.execute(\n \"\"\"SELECT id, definition FROM entity_semantics\n WHERE entity_id = %s AND source_type = %s AND source_id = %s\"\"\",\n (entity_id, level.value, source_id),\n )\n existing = cursor.fetchone()\n cursor.close()\n\n model_name = f\"{self.llm.model.provider}:{self.llm.model.model_name}\"\n\n if existing:\n if existing[\"definition\"] != definition:\n db.log(\"INFO\", f\"Abweichende Definition für Entität {entity_id}, füge hinzu\")\n cursor = db.execute(\n \"\"\"INSERT INTO entity_semantics\n (entity_id, definition, context, references_json,\n source_type, source_id, model_used, created_at)\n VALUES (%s, %s, %s, %s, %s, %s, %s, NOW())\"\"\",\n (entity_id, definition, context, json.dumps(references),\n level.value, source_id, model_name),\n )\n db.commit()\n sem_id = cursor.lastrowid\n cursor.close()\n else:\n sem_id = existing[\"id\"]\n else:\n cursor = db.execute(\n \"\"\"INSERT INTO entity_semantics\n (entity_id, definition, context, references_json,\n source_type, source_id, model_used, created_at)\n VALUES (%s, %s, %s, %s, %s, %s, %s, NOW())\"\"\",\n (entity_id, definition, context, json.dumps(references),\n level.value, source_id, model_name),\n )\n db.commit()\n sem_id = cursor.lastrowid\n cursor.close()\n\n # Hole Entity-Name für Rückgabe\n cursor = db.execute(\"SELECT name FROM entities WHERE id = %s\", (entity_id,))\n entity = cursor.fetchone()\n cursor.close()\n\n return {\n \"id\": sem_id,\n \"entity_id\": entity_id,\n \"entity_name\": entity[\"name\"] if entity else \"\",\n \"definition\": definition,\n }\n\n except Exception as e:\n db.log(\"ERROR\", f\"Fehler beim Speichern der Semantik: {e}\")\n return None\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/scripts\/pipeline\/knowledge\/semantic_extractor.py",
"content": "\"\"\"Semantik-Extraktion für Wissensextraktion.\"\"\"\nimport json\n\nimport sys\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom db import db\n\nfrom .models import KnowledgeLevel, KnowledgeType\nfrom .llm_service import LLMService\n\n\nclass SemanticExtractor:\n \"\"\"Extrahiert Semantik (Bedeutung\/Definition) für Entitäten.\"\"\"\n\n def __init__(self, llm_service: LLMService, store_knowledge_fn):\n \"\"\"Initialisiere mit LLM-Service und Storage-Funktion.\"\"\"\n self.llm = llm_service\n self.store_knowledge = store_knowledge_fn\n\n def extract_semantics(\n self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int\n ) -> list[dict]:\n \"\"\"\n Extrahiere Semantik (Bedeutung\/Definition) für Entitäten.\n\n Args:\n entities: Liste der extrahierten Entitäten\n text: Ursprungstext für Kontext\n level: Ebene\n source_id: Quell-ID\n\n Returns:\n Liste von Semantik-Einträgen\n \"\"\"\n if not entities:\n return []\n\n entity_names = [e[\"name\"] for e in entities[:15]]\n\n prompt = f\"\"\"Für die folgenden Entitäten aus dem Text, extrahiere die Bedeutung\/Definition.\n\nEntitäten: {\", \".join(entity_names)}\n\nFür jede Entität gib an:\n- definition: Kurze Definition basierend auf dem Text\n- context: In welchem Kontext wird sie verwendet\n- references: Bezüge zu anderen Konzepten (falls erkennbar)\n\nAntworte NUR als JSON:\n{{\"semantics\": [\n {{\"entity\": \"...\", \"definition\": \"...\", \"context\": \"...\", \"references\": [\"...\"]}}\n]}}\n\nText:\n{text[:3000]}\"\"\"\n\n result = self.llm.call_llm(prompt)\n data = self.llm.parse_json(result)\n semantics = data.get(\"semantics\", [])\n\n # Speichere Semantik\n stored = []\n for sem in semantics:\n entity_name = sem.get(\"entity\", \"\")\n entity_match = next((e for e in entities if e[\"name\"].lower() == entity_name.lower()), None)\n if entity_match:\n stored_sem = self._store_semantic(\n entity_id=entity_match[\"id\"],\n definition=sem.get(\"definition\", \"\"),\n context=sem.get(\"context\", \"\"),\n references=sem.get(\"references\", []),\n level=level,\n source_id=source_id,\n )\n if stored_sem:\n stored.append(stored_sem)\n\n # Speichere in Knowledge-Tabelle\n self.store_knowledge(\n level,\n source_id,\n KnowledgeType.SEMANTIC,\n {\"definitions\": len(stored), \"entities\": [s[\"entity_name\"] for s in stored]},\n )\n\n return stored\n\n def _store_semantic(\n self, entity_id: int, definition: str, context: str, references: list,\n level: KnowledgeLevel, source_id: int\n ) -> dict | None:\n \"\"\"Speichere Semantik-Eintrag mit Abgleich.\"\"\"\n try:\n # Prüfe ob bereits Semantik existiert\n cursor = db.execute(\n \"\"\"SELECT id, definition FROM entity_semantics\n WHERE entity_id = %s AND source_type = %s AND source_id = %s\"\"\",\n (entity_id, level.value, source_id),\n )\n existing = cursor.fetchone()\n cursor.close()\n\n model_name = f\"{self.llm.model.provider}:{self.llm.model.model_name}\"\n\n if existing:\n if existing[\"definition\"] != definition:\n db.log(\"INFO\", f\"Abweichende Definition für Entität {entity_id}, füge hinzu\")\n cursor = db.execute(\n \"\"\"INSERT INTO entity_semantics\n (entity_id, definition, context, references_json,\n source_type, source_id, model_used, created_at)\n VALUES (%s, %s, %s, %s, %s, %s, %s, NOW())\"\"\",\n (entity_id, definition, context, json.dumps(references),\n level.value, source_id, model_name),\n )\n db.commit()\n sem_id = cursor.lastrowid\n cursor.close()\n else:\n sem_id = existing[\"id\"]\n else:\n cursor = db.execute(\n \"\"\"INSERT INTO entity_semantics\n (entity_id, definition, context, references_json,\n source_type, source_id, model_used, created_at)\n VALUES (%s, %s, %s, %s, %s, %s, %s, NOW())\"\"\",\n (entity_id, definition, context, json.dumps(references),\n level.value, source_id, model_name),\n )\n db.commit()\n sem_id = cursor.lastrowid\n cursor.close()\n\n # Hole Entity-Name für Rückgabe\n cursor = db.execute(\"SELECT name FROM entities WHERE id = %s\", (entity_id,))\n entity = cursor.fetchone()\n cursor.close()\n\n return {\n \"id\": sem_id,\n \"entity_id\": entity_id,\n \"entity_name\": entity[\"name\"] if entity else \"\",\n \"definition\": definition,\n }\n\n except Exception as e:\n db.log(\"ERROR\", f\"Fehler beim Speichern der Semantik: {e}\")\n return None\n",
"structuredPatch": [],
"originalFile": null
}
}