Protokoll #15231

ID15231
Zeitstempel2025-12-25 15:47:49.835565
Clientroot
IP145.224.72.140
Modellclaude-sonnet-4-20250514
Statuscompleted
Tokens2,891 (Input: 1,443, Output: 1,448)
Dauer97 ms
Request-Zeit2025-12-25 15:47:49.835565
Response-Zeit2025-12-25 15:47:49.932164

Request

{
    "event": "PreToolUse",
    "tool_name": "Write",
    "tool_input": {
        "file_path": "\/var\/www\/scripts\/pipeline\/knowledge\/semantic_extractor.py",
        "content": "\"\"\"Semantik-Extraktion für Wissensextraktion.\"\"\"\nimport json\n\nimport sys\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom db import db\n\nfrom .models import KnowledgeLevel, KnowledgeType\nfrom .llm_service import LLMService\n\n\nclass SemanticExtractor:\n    \"\"\"Extrahiert Semantik (Bedeutung\/Definition) für Entitäten.\"\"\"\n\n    def __init__(self, llm_service: LLMService, store_knowledge_fn):\n        \"\"\"Initialisiere mit LLM-Service und Storage-Funktion.\"\"\"\n        self.llm = llm_service\n        self.store_knowledge = store_knowledge_fn\n\n    def extract_semantics(\n        self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int\n    ) -> list[dict]:\n        \"\"\"\n        Extrahiere Semantik (Bedeutung\/Definition) für Entitäten.\n\n        Args:\n            entities: Liste der extrahierten Entitäten\n            text: Ursprungstext für Kontext\n            level: Ebene\n            source_id: Quell-ID\n\n        Returns:\n            Liste von Semantik-Einträgen\n        \"\"\"\n        if not entities:\n            return []\n\n        entity_names = [e[\"name\"] for e in entities[:15]]\n\n        prompt = f\"\"\"Für die folgenden Entitäten aus dem Text, extrahiere die Bedeutung\/Definition.\n\nEntitäten: {\", \".join(entity_names)}\n\nFür jede Entität gib an:\n- definition: Kurze Definition basierend auf dem Text\n- context: In welchem Kontext wird sie verwendet\n- references: Bezüge zu anderen Konzepten (falls erkennbar)\n\nAntworte NUR als JSON:\n{{\"semantics\": [\n    {{\"entity\": \"...\", \"definition\": \"...\", \"context\": \"...\", \"references\": [\"...\"]}}\n]}}\n\nText:\n{text[:3000]}\"\"\"\n\n        result = self.llm.call_llm(prompt)\n        data = self.llm.parse_json(result)\n        semantics = data.get(\"semantics\", [])\n\n        # Speichere Semantik\n        stored = []\n        for sem in semantics:\n            entity_name = sem.get(\"entity\", \"\")\n            entity_match = next((e for e in entities if e[\"name\"].lower() == entity_name.lower()), None)\n            if entity_match:\n                stored_sem = self._store_semantic(\n                    entity_id=entity_match[\"id\"],\n                    definition=sem.get(\"definition\", \"\"),\n                    context=sem.get(\"context\", \"\"),\n                    references=sem.get(\"references\", []),\n                    level=level,\n                    source_id=source_id,\n                )\n                if stored_sem:\n                    stored.append(stored_sem)\n\n        # Speichere in Knowledge-Tabelle\n        self.store_knowledge(\n            level,\n            source_id,\n            KnowledgeType.SEMANTIC,\n            {\"definitions\": len(stored), \"entities\": [s[\"entity_name\"] for s in stored]},\n        )\n\n        return stored\n\n    def _store_semantic(\n        self, entity_id: int, definition: str, context: str, references: list,\n        level: KnowledgeLevel, source_id: int\n    ) -> dict | None:\n        \"\"\"Speichere Semantik-Eintrag mit Abgleich.\"\"\"\n        try:\n            # Prüfe ob bereits Semantik existiert\n            cursor = db.execute(\n                \"\"\"SELECT id, definition FROM entity_semantics\n                   WHERE entity_id = %s AND source_type = %s AND source_id = %s\"\"\",\n                (entity_id, level.value, source_id),\n            )\n            existing = cursor.fetchone()\n            cursor.close()\n\n            model_name = f\"{self.llm.model.provider}:{self.llm.model.model_name}\"\n\n            if existing:\n                if existing[\"definition\"] != definition:\n                    db.log(\"INFO\", f\"Abweichende Definition für Entität {entity_id}, füge hinzu\")\n                    cursor = db.execute(\n                        \"\"\"INSERT INTO entity_semantics\n                           (entity_id, definition, context, references_json,\n                            source_type, source_id, model_used, created_at)\n                           VALUES (%s, %s, %s, %s, %s, %s, %s, NOW())\"\"\",\n                        (entity_id, definition, context, json.dumps(references),\n                         level.value, source_id, model_name),\n                    )\n                    db.commit()\n                    sem_id = cursor.lastrowid\n                    cursor.close()\n                else:\n                    sem_id = existing[\"id\"]\n            else:\n                cursor = db.execute(\n                    \"\"\"INSERT INTO entity_semantics\n                       (entity_id, definition, context, references_json,\n                        source_type, source_id, model_used, created_at)\n                       VALUES (%s, %s, %s, %s, %s, %s, %s, NOW())\"\"\",\n                    (entity_id, definition, context, json.dumps(references),\n                     level.value, source_id, model_name),\n                )\n                db.commit()\n                sem_id = cursor.lastrowid\n                cursor.close()\n\n            # Hole Entity-Name für Rückgabe\n            cursor = db.execute(\"SELECT name FROM entities WHERE id = %s\", (entity_id,))\n            entity = cursor.fetchone()\n            cursor.close()\n\n            return {\n                \"id\": sem_id,\n                \"entity_id\": entity_id,\n                \"entity_name\": entity[\"name\"] if entity else \"\",\n                \"definition\": definition,\n            }\n\n        except Exception as e:\n            db.log(\"ERROR\", f\"Fehler beim Speichern der Semantik: {e}\")\n            return None\n"
    }
}

Response

{
    "tool_response": {
        "type": "create",
        "filePath": "\/var\/www\/scripts\/pipeline\/knowledge\/semantic_extractor.py",
        "content": "\"\"\"Semantik-Extraktion für Wissensextraktion.\"\"\"\nimport json\n\nimport sys\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom db import db\n\nfrom .models import KnowledgeLevel, KnowledgeType\nfrom .llm_service import LLMService\n\n\nclass SemanticExtractor:\n    \"\"\"Extrahiert Semantik (Bedeutung\/Definition) für Entitäten.\"\"\"\n\n    def __init__(self, llm_service: LLMService, store_knowledge_fn):\n        \"\"\"Initialisiere mit LLM-Service und Storage-Funktion.\"\"\"\n        self.llm = llm_service\n        self.store_knowledge = store_knowledge_fn\n\n    def extract_semantics(\n        self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int\n    ) -> list[dict]:\n        \"\"\"\n        Extrahiere Semantik (Bedeutung\/Definition) für Entitäten.\n\n        Args:\n            entities: Liste der extrahierten Entitäten\n            text: Ursprungstext für Kontext\n            level: Ebene\n            source_id: Quell-ID\n\n        Returns:\n            Liste von Semantik-Einträgen\n        \"\"\"\n        if not entities:\n            return []\n\n        entity_names = [e[\"name\"] for e in entities[:15]]\n\n        prompt = f\"\"\"Für die folgenden Entitäten aus dem Text, extrahiere die Bedeutung\/Definition.\n\nEntitäten: {\", \".join(entity_names)}\n\nFür jede Entität gib an:\n- definition: Kurze Definition basierend auf dem Text\n- context: In welchem Kontext wird sie verwendet\n- references: Bezüge zu anderen Konzepten (falls erkennbar)\n\nAntworte NUR als JSON:\n{{\"semantics\": [\n    {{\"entity\": \"...\", \"definition\": \"...\", \"context\": \"...\", \"references\": [\"...\"]}}\n]}}\n\nText:\n{text[:3000]}\"\"\"\n\n        result = self.llm.call_llm(prompt)\n        data = self.llm.parse_json(result)\n        semantics = data.get(\"semantics\", [])\n\n        # Speichere Semantik\n        stored = []\n        for sem in semantics:\n            entity_name = sem.get(\"entity\", \"\")\n            entity_match = next((e for e in entities if e[\"name\"].lower() == entity_name.lower()), None)\n            if entity_match:\n                stored_sem = self._store_semantic(\n                    entity_id=entity_match[\"id\"],\n                    definition=sem.get(\"definition\", \"\"),\n                    context=sem.get(\"context\", \"\"),\n                    references=sem.get(\"references\", []),\n                    level=level,\n                    source_id=source_id,\n                )\n                if stored_sem:\n                    stored.append(stored_sem)\n\n        # Speichere in Knowledge-Tabelle\n        self.store_knowledge(\n            level,\n            source_id,\n            KnowledgeType.SEMANTIC,\n            {\"definitions\": len(stored), \"entities\": [s[\"entity_name\"] for s in stored]},\n        )\n\n        return stored\n\n    def _store_semantic(\n        self, entity_id: int, definition: str, context: str, references: list,\n        level: KnowledgeLevel, source_id: int\n    ) -> dict | None:\n        \"\"\"Speichere Semantik-Eintrag mit Abgleich.\"\"\"\n        try:\n            # Prüfe ob bereits Semantik existiert\n            cursor = db.execute(\n                \"\"\"SELECT id, definition FROM entity_semantics\n                   WHERE entity_id = %s AND source_type = %s AND source_id = %s\"\"\",\n                (entity_id, level.value, source_id),\n            )\n            existing = cursor.fetchone()\n            cursor.close()\n\n            model_name = f\"{self.llm.model.provider}:{self.llm.model.model_name}\"\n\n            if existing:\n                if existing[\"definition\"] != definition:\n                    db.log(\"INFO\", f\"Abweichende Definition für Entität {entity_id}, füge hinzu\")\n                    cursor = db.execute(\n                        \"\"\"INSERT INTO entity_semantics\n                           (entity_id, definition, context, references_json,\n                            source_type, source_id, model_used, created_at)\n                           VALUES (%s, %s, %s, %s, %s, %s, %s, NOW())\"\"\",\n                        (entity_id, definition, context, json.dumps(references),\n                         level.value, source_id, model_name),\n                    )\n                    db.commit()\n                    sem_id = cursor.lastrowid\n                    cursor.close()\n                else:\n                    sem_id = existing[\"id\"]\n            else:\n                cursor = db.execute(\n                    \"\"\"INSERT INTO entity_semantics\n                       (entity_id, definition, context, references_json,\n                        source_type, source_id, model_used, created_at)\n                       VALUES (%s, %s, %s, %s, %s, %s, %s, NOW())\"\"\",\n                    (entity_id, definition, context, json.dumps(references),\n                     level.value, source_id, model_name),\n                )\n                db.commit()\n                sem_id = cursor.lastrowid\n                cursor.close()\n\n            # Hole Entity-Name für Rückgabe\n            cursor = db.execute(\"SELECT name FROM entities WHERE id = %s\", (entity_id,))\n            entity = cursor.fetchone()\n            cursor.close()\n\n            return {\n                \"id\": sem_id,\n                \"entity_id\": entity_id,\n                \"entity_name\": entity[\"name\"] if entity else \"\",\n                \"definition\": definition,\n            }\n\n        except Exception as e:\n            db.log(\"ERROR\", f\"Fehler beim Speichern der Semantik: {e}\")\n            return None\n",
        "structuredPatch": [],
        "originalFile": null
    }
}
← Vorheriger Zur Liste Nächster →