Protokoll #15233

ID15233
Zeitstempel2025-12-25 15:48:38.724156
Clientroot
IP145.224.72.140
Modellclaude-sonnet-4-20250514
Statuscompleted
Tokens3,163 (Input: 1,579, Output: 1,584)
Dauer106 ms
Request-Zeit2025-12-25 15:48:38.724156
Response-Zeit2025-12-25 15:48:38.830466

Request

{
    "event": "PreToolUse",
    "tool_name": "Write",
    "tool_input": {
        "file_path": "\/var\/www\/scripts\/pipeline\/knowledge\/taxonomy_extractor.py",
        "content": "\"\"\"Taxonomie-Extraktion für Wissensextraktion.\"\"\"\nimport re\n\nimport sys\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom db import db\n\nfrom .models import KnowledgeLevel, KnowledgeType\nfrom .llm_service import LLMService\n\n\nclass TaxonomyExtractor:\n    \"\"\"Extrahiert Taxonomie (hierarchische Einordnung) für Entitäten.\"\"\"\n\n    def __init__(self, llm_service: LLMService, store_knowledge_fn):\n        \"\"\"Initialisiere mit LLM-Service und Storage-Funktion.\"\"\"\n        self.llm = llm_service\n        self.store_knowledge = store_knowledge_fn\n\n    def extract_taxonomy(\n        self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int\n    ) -> list[dict]:\n        \"\"\"\n        Extrahiere Taxonomie (hierarchische Einordnung) für Entitäten.\n\n        Args:\n            entities: Liste der Entitäten\n            text: Ursprungstext\n            level: Ebene\n            source_id: Quell-ID\n\n        Returns:\n            Liste von Taxonomie-Zuordnungen\n        \"\"\"\n        if not entities:\n            return []\n\n        # Hole existierende Taxonomie-Terme\n        cursor = db.execute(\"SELECT id, name, path, depth FROM taxonomy_terms ORDER BY depth, name\")\n        existing_terms = cursor.fetchall()\n        cursor.close()\n\n        term_names = [t[\"name\"] for t in existing_terms]\n        entity_names = [e[\"name\"] for e in entities[:15]]\n\n        prompt = f\"\"\"Ordne die folgenden Entitäten in eine hierarchische Taxonomie ein.\n\nEntitäten: {\", \".join(entity_names)}\n\nExistierende Taxonomie-Kategorien: {\", \".join(term_names) if term_names else \"Keine vorhanden\"}\n\nAufgabe:\n1. Ordne jede Entität einer passenden Kategorie zu\n2. Wenn keine passende Kategorie existiert, schlage eine neue vor\n3. Gib die hierarchische Einordnung an\n\nAntworte NUR als JSON:\n{{\"mappings\": [\n    {{\"entity\": \"...\", \"category\": \"...\", \"parent_category\": null, \"confidence\": 0.0-1.0, \"is_new_category\": false}}\n]}}\n\nText-Kontext:\n{text[:2000]}\"\"\"\n\n        result = self.llm.call_llm(prompt)\n        data = self.llm.parse_json(result)\n        mappings = data.get(\"mappings\", [])\n\n        # Speichere Taxonomie-Zuordnungen\n        stored = []\n        for mapping in mappings:\n            entity_match = next(\n                (e for e in entities if e[\"name\"].lower() == mapping.get(\"entity\", \"\").lower()), None\n            )\n            if entity_match:\n                stored_mapping = self._store_taxonomy_mapping(\n                    entity_id=entity_match[\"id\"],\n                    category_name=mapping.get(\"category\", \"\"),\n                    parent_category=mapping.get(\"parent_category\"),\n                    confidence=mapping.get(\"confidence\", 0.8),\n                    is_new=mapping.get(\"is_new_category\", False),\n                    existing_terms=existing_terms,\n                    level=level,\n                    source_id=source_id,\n                )\n                if stored_mapping:\n                    stored.append(stored_mapping)\n\n        # Speichere in Knowledge-Tabelle\n        self.store_knowledge(\n            level,\n            source_id,\n            KnowledgeType.TAXONOMY,\n            {\"mappings\": len(stored), \"categories\": list({m[\"category\"] for m in stored})},\n        )\n\n        return stored\n\n    def _store_taxonomy_mapping(\n        self,\n        entity_id: int,\n        category_name: str,\n        parent_category: str | None,\n        confidence: float,\n        is_new: bool,\n        existing_terms: list,\n        level: KnowledgeLevel,\n        source_id: int,\n    ) -> dict | None:\n        \"\"\"Speichere Taxonomie-Zuordnung.\"\"\"\n        try:\n            # Finde oder erstelle Taxonomie-Term\n            term = next((t for t in existing_terms if t[\"name\"].lower() == category_name.lower()), None)\n\n            if term:\n                term_id = term[\"id\"]\n            elif is_new:\n                # Neuen Term anlegen\n                parent_id = None\n                depth = 0\n                path = f\"\/{category_name}\"\n\n                if parent_category:\n                    parent_term = next(\n                        (t for t in existing_terms if t[\"name\"].lower() == parent_category.lower()), None\n                    )\n                    if parent_term:\n                        parent_id = parent_term[\"id\"]\n                        depth = parent_term[\"depth\"] + 1\n                        path = f\"{parent_term['path']}\/{category_name}\"\n\n                # Erstelle Slug\n                slug = re.sub(r\"[^a-z0-9]+\", \"-\", category_name.lower()).strip(\"-\")\n\n                cursor = db.execute(\n                    \"\"\"INSERT INTO taxonomy_terms (name, slug, parent_id, depth, path, created_at)\n                       VALUES (%s, %s, %s, %s, %s, NOW())\"\"\",\n                    (category_name, slug, parent_id, depth, path),\n                )\n                db.commit()\n                term_id = cursor.lastrowid\n                cursor.close()\n                db.log(\"INFO\", f\"Neuer Taxonomie-Term: '{category_name}' (ID: {term_id})\")\n            else:\n                return None\n\n            model_name = f\"{self.llm.model.provider}:{self.llm.model.model_name}\"\n\n            # Speichere Zuordnung\n            cursor = db.execute(\n                \"\"\"INSERT INTO entity_taxonomy_mapping\n                   (entity_id, taxonomy_term_id, confidence, source_type, source_id, model_used, created_at)\n                   VALUES (%s, %s, %s, %s, %s, %s, NOW())\n                   ON DUPLICATE KEY UPDATE confidence = VALUES(confidence)\"\"\",\n                (entity_id, term_id, confidence, level.value, source_id, model_name),\n            )\n            db.commit()\n            cursor.close()\n\n            return {\"entity_id\": entity_id, \"term_id\": term_id, \"category\": category_name, \"confidence\": confidence}\n\n        except Exception as e:\n            db.log(\"ERROR\", f\"Fehler beim Speichern der Taxonomie: {e}\")\n            return None\n"
    }
}

Response

{
    "tool_response": {
        "type": "create",
        "filePath": "\/var\/www\/scripts\/pipeline\/knowledge\/taxonomy_extractor.py",
        "content": "\"\"\"Taxonomie-Extraktion für Wissensextraktion.\"\"\"\nimport re\n\nimport sys\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom db import db\n\nfrom .models import KnowledgeLevel, KnowledgeType\nfrom .llm_service import LLMService\n\n\nclass TaxonomyExtractor:\n    \"\"\"Extrahiert Taxonomie (hierarchische Einordnung) für Entitäten.\"\"\"\n\n    def __init__(self, llm_service: LLMService, store_knowledge_fn):\n        \"\"\"Initialisiere mit LLM-Service und Storage-Funktion.\"\"\"\n        self.llm = llm_service\n        self.store_knowledge = store_knowledge_fn\n\n    def extract_taxonomy(\n        self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int\n    ) -> list[dict]:\n        \"\"\"\n        Extrahiere Taxonomie (hierarchische Einordnung) für Entitäten.\n\n        Args:\n            entities: Liste der Entitäten\n            text: Ursprungstext\n            level: Ebene\n            source_id: Quell-ID\n\n        Returns:\n            Liste von Taxonomie-Zuordnungen\n        \"\"\"\n        if not entities:\n            return []\n\n        # Hole existierende Taxonomie-Terme\n        cursor = db.execute(\"SELECT id, name, path, depth FROM taxonomy_terms ORDER BY depth, name\")\n        existing_terms = cursor.fetchall()\n        cursor.close()\n\n        term_names = [t[\"name\"] for t in existing_terms]\n        entity_names = [e[\"name\"] for e in entities[:15]]\n\n        prompt = f\"\"\"Ordne die folgenden Entitäten in eine hierarchische Taxonomie ein.\n\nEntitäten: {\", \".join(entity_names)}\n\nExistierende Taxonomie-Kategorien: {\", \".join(term_names) if term_names else \"Keine vorhanden\"}\n\nAufgabe:\n1. Ordne jede Entität einer passenden Kategorie zu\n2. Wenn keine passende Kategorie existiert, schlage eine neue vor\n3. Gib die hierarchische Einordnung an\n\nAntworte NUR als JSON:\n{{\"mappings\": [\n    {{\"entity\": \"...\", \"category\": \"...\", \"parent_category\": null, \"confidence\": 0.0-1.0, \"is_new_category\": false}}\n]}}\n\nText-Kontext:\n{text[:2000]}\"\"\"\n\n        result = self.llm.call_llm(prompt)\n        data = self.llm.parse_json(result)\n        mappings = data.get(\"mappings\", [])\n\n        # Speichere Taxonomie-Zuordnungen\n        stored = []\n        for mapping in mappings:\n            entity_match = next(\n                (e for e in entities if e[\"name\"].lower() == mapping.get(\"entity\", \"\").lower()), None\n            )\n            if entity_match:\n                stored_mapping = self._store_taxonomy_mapping(\n                    entity_id=entity_match[\"id\"],\n                    category_name=mapping.get(\"category\", \"\"),\n                    parent_category=mapping.get(\"parent_category\"),\n                    confidence=mapping.get(\"confidence\", 0.8),\n                    is_new=mapping.get(\"is_new_category\", False),\n                    existing_terms=existing_terms,\n                    level=level,\n                    source_id=source_id,\n                )\n                if stored_mapping:\n                    stored.append(stored_mapping)\n\n        # Speichere in Knowledge-Tabelle\n        self.store_knowledge(\n            level,\n            source_id,\n            KnowledgeType.TAXONOMY,\n            {\"mappings\": len(stored), \"categories\": list({m[\"category\"] for m in stored})},\n        )\n\n        return stored\n\n    def _store_taxonomy_mapping(\n        self,\n        entity_id: int,\n        category_name: str,\n        parent_category: str | None,\n        confidence: float,\n        is_new: bool,\n        existing_terms: list,\n        level: KnowledgeLevel,\n        source_id: int,\n    ) -> dict | None:\n        \"\"\"Speichere Taxonomie-Zuordnung.\"\"\"\n        try:\n            # Finde oder erstelle Taxonomie-Term\n            term = next((t for t in existing_terms if t[\"name\"].lower() == category_name.lower()), None)\n\n            if term:\n                term_id = term[\"id\"]\n            elif is_new:\n                # Neuen Term anlegen\n                parent_id = None\n                depth = 0\n                path = f\"\/{category_name}\"\n\n                if parent_category:\n                    parent_term = next(\n                        (t for t in existing_terms if t[\"name\"].lower() == parent_category.lower()), None\n                    )\n                    if parent_term:\n                        parent_id = parent_term[\"id\"]\n                        depth = parent_term[\"depth\"] + 1\n                        path = f\"{parent_term['path']}\/{category_name}\"\n\n                # Erstelle Slug\n                slug = re.sub(r\"[^a-z0-9]+\", \"-\", category_name.lower()).strip(\"-\")\n\n                cursor = db.execute(\n                    \"\"\"INSERT INTO taxonomy_terms (name, slug, parent_id, depth, path, created_at)\n                       VALUES (%s, %s, %s, %s, %s, NOW())\"\"\",\n                    (category_name, slug, parent_id, depth, path),\n                )\n                db.commit()\n                term_id = cursor.lastrowid\n                cursor.close()\n                db.log(\"INFO\", f\"Neuer Taxonomie-Term: '{category_name}' (ID: {term_id})\")\n            else:\n                return None\n\n            model_name = f\"{self.llm.model.provider}:{self.llm.model.model_name}\"\n\n            # Speichere Zuordnung\n            cursor = db.execute(\n                \"\"\"INSERT INTO entity_taxonomy_mapping\n                   (entity_id, taxonomy_term_id, confidence, source_type, source_id, model_used, created_at)\n                   VALUES (%s, %s, %s, %s, %s, %s, NOW())\n                   ON DUPLICATE KEY UPDATE confidence = VALUES(confidence)\"\"\",\n                (entity_id, term_id, confidence, level.value, source_id, model_name),\n            )\n            db.commit()\n            cursor.close()\n\n            return {\"entity_id\": entity_id, \"term_id\": term_id, \"category\": category_name, \"confidence\": confidence}\n\n        except Exception as e:\n            db.log(\"ERROR\", f\"Fehler beim Speichern der Taxonomie: {e}\")\n            return None\n",
        "structuredPatch": [],
        "originalFile": null
    }
}
← Vorheriger Zur Liste Nächster →