Protokoll #15238

ID15238
Zeitstempel2025-12-25 15:50:02.522438
Clientroot
IP145.224.72.140
Modellclaude-sonnet-4-20250514
Statuscompleted
Tokens13,225 (Input: 701, Output: 12,524)
Dauer109 ms
Request-Zeit2025-12-25 15:50:02.522438
Response-Zeit2025-12-25 15:50:02.631906

Request

{
    "event": "PreToolUse",
    "tool_name": "Write",
    "tool_input": {
        "file_path": "\/var\/www\/scripts\/pipeline\/knowledge.py",
        "content": "\"\"\"\nModulares Wissensextraktions-System für 3-Ebenen-Analyse.\n\nHINWEIS: Dies ist ein Backward-Kompatibilitäts-Wrapper.\nDie eigentliche Implementierung ist in knowledge\/ Package.\n\nEbenen:\n1. Seite (page): Detailanalyse pro Einzelseite\n2. Abschnitt (section): Aggregation pro Kapitel\/Unterkapitel\n3. Dokument (document): Konsolidierte Gesamtsicht\n\nWissenstypen:\n- Entitäten: Personen, Konzepte, Methoden, Organisationen\n- Semantik: Bedeutung, Definition, Referenzen\n- Ontologie: Wechselwirkungen zwischen Entitäten\n- Taxonomie: Hierarchische Einordnung\n\nAutor: KI-System Pipeline\n\"\"\"\n\n# Re-export alles aus dem knowledge Package\nfrom knowledge import (\n    # Models\n    KnowledgeLevel,\n    KnowledgeType,\n    ModelConfig,\n    DEFAULT_MODELS,\n    # Main Classes\n    KnowledgeExtractor,\n    LLMService,\n    KnowledgeStorage,\n    # Extractors\n    EntityExtractor,\n    SemanticExtractor,\n    OntologyExtractor,\n    TaxonomyExtractor,\n    # Functions\n    store_knowledge,\n    get_model_config,\n    process_document_knowledge,\n)\n\nfrom db import db\n\n__all__ = [\n    \"KnowledgeLevel\",\n    \"KnowledgeType\",\n    \"ModelConfig\",\n    \"DEFAULT_MODELS\",\n    \"KnowledgeExtractor\",\n    \"LLMService\",\n    \"KnowledgeStorage\",\n    \"EntityExtractor\",\n    \"SemanticExtractor\",\n    \"OntologyExtractor\",\n    \"TaxonomyExtractor\",\n    \"store_knowledge\",\n    \"get_model_config\",\n    \"process_document_knowledge\",\n]\n\n\n# CLI Interface\nif __name__ == \"__main__\":\n    import sys\n\n    if len(sys.argv) < 2:\n        print(\"Verwendung: python knowledge.py <document_id> [provider] [model]\")\n        print(\"  provider: ollama (default) oder anthropic\")\n        print(\"  model: gemma3:27b-it-qat (ollama) oder claude-3-haiku-20240307 (anthropic)\")\n        sys.exit(1)\n\n    doc_id = int(sys.argv[1])\n    provider = sys.argv[2] if len(sys.argv) > 2 else \"ollama\"\n    model = sys.argv[3] if len(sys.argv) > 3 else None\n\n    if not db.connect():\n        print(\"Datenbankverbindung fehlgeschlagen\")\n        sys.exit(1)\n\n    print(f\"Starte Wissensextraktion für Dokument {doc_id}\")\n    print(f\"Provider: {provider}, Model: {model or 'default'}\")\n\n    results = process_document_knowledge(doc_id, provider, model)\n\n    print(\"\\nErgebnisse:\")\n    print(f\"  Seiten analysiert: {len(results['pages'])}\")\n    print(f\"  Abschnitte analysiert: {len(results['sections'])}\")\n    if results[\"document\"]:\n        print(f\"  Dokument-Entitäten: {results['document']['entities']}\")\n        print(f\"  Dokument-Relationen: {results['document']['ontology']}\")\n\n    db.disconnect()\n"
    }
}

Response

{
    "tool_response": {
        "type": "update",
        "filePath": "\/var\/www\/scripts\/pipeline\/knowledge.py",
        "content": "\"\"\"\nModulares Wissensextraktions-System für 3-Ebenen-Analyse.\n\nHINWEIS: Dies ist ein Backward-Kompatibilitäts-Wrapper.\nDie eigentliche Implementierung ist in knowledge\/ Package.\n\nEbenen:\n1. Seite (page): Detailanalyse pro Einzelseite\n2. Abschnitt (section): Aggregation pro Kapitel\/Unterkapitel\n3. Dokument (document): Konsolidierte Gesamtsicht\n\nWissenstypen:\n- Entitäten: Personen, Konzepte, Methoden, Organisationen\n- Semantik: Bedeutung, Definition, Referenzen\n- Ontologie: Wechselwirkungen zwischen Entitäten\n- Taxonomie: Hierarchische Einordnung\n\nAutor: KI-System Pipeline\n\"\"\"\n\n# Re-export alles aus dem knowledge Package\nfrom knowledge import (\n    # Models\n    KnowledgeLevel,\n    KnowledgeType,\n    ModelConfig,\n    DEFAULT_MODELS,\n    # Main Classes\n    KnowledgeExtractor,\n    LLMService,\n    KnowledgeStorage,\n    # Extractors\n    EntityExtractor,\n    SemanticExtractor,\n    OntologyExtractor,\n    TaxonomyExtractor,\n    # Functions\n    store_knowledge,\n    get_model_config,\n    process_document_knowledge,\n)\n\nfrom db import db\n\n__all__ = [\n    \"KnowledgeLevel\",\n    \"KnowledgeType\",\n    \"ModelConfig\",\n    \"DEFAULT_MODELS\",\n    \"KnowledgeExtractor\",\n    \"LLMService\",\n    \"KnowledgeStorage\",\n    \"EntityExtractor\",\n    \"SemanticExtractor\",\n    \"OntologyExtractor\",\n    \"TaxonomyExtractor\",\n    \"store_knowledge\",\n    \"get_model_config\",\n    \"process_document_knowledge\",\n]\n\n\n# CLI Interface\nif __name__ == \"__main__\":\n    import sys\n\n    if len(sys.argv) < 2:\n        print(\"Verwendung: python knowledge.py <document_id> [provider] [model]\")\n        print(\"  provider: ollama (default) oder anthropic\")\n        print(\"  model: gemma3:27b-it-qat (ollama) oder claude-3-haiku-20240307 (anthropic)\")\n        sys.exit(1)\n\n    doc_id = int(sys.argv[1])\n    provider = sys.argv[2] if len(sys.argv) > 2 else \"ollama\"\n    model = sys.argv[3] if len(sys.argv) > 3 else None\n\n    if not db.connect():\n        print(\"Datenbankverbindung fehlgeschlagen\")\n        sys.exit(1)\n\n    print(f\"Starte Wissensextraktion für Dokument {doc_id}\")\n    print(f\"Provider: {provider}, Model: {model or 'default'}\")\n\n    results = process_document_knowledge(doc_id, provider, model)\n\n    print(\"\\nErgebnisse:\")\n    print(f\"  Seiten analysiert: {len(results['pages'])}\")\n    print(f\"  Abschnitte analysiert: {len(results['sections'])}\")\n    if results[\"document\"]:\n        print(f\"  Dokument-Entitäten: {results['document']['entities']}\")\n        print(f\"  Dokument-Relationen: {results['document']['ontology']}\")\n\n    db.disconnect()\n",
        "structuredPatch": [
            {
                "oldStart": 1,
                "oldLines": 6,
                "newStart": 1,
                "newLines": 9,
                "lines": [
                    " \"\"\"",
                    " Modulares Wissensextraktions-System für 3-Ebenen-Analyse.",
                    " ",
                    "+HINWEIS: Dies ist ein Backward-Kompatibilitäts-Wrapper.",
                    "+Die eigentliche Implementierung ist in knowledge\/ Package.",
                    "+",
                    " Ebenen:",
                    " 1. Seite (page): Detailanalyse pro Einzelseite",
                    " 2. Abschnitt (section): Aggregation pro Kapitel\/Unterkapitel"
                ]
            },
            {
                "oldStart": 15,
                "oldLines": 863,
                "newStart": 18,
                "newLines": 49,
                "lines": [
                    " Autor: KI-System Pipeline",
                    " \"\"\"",
                    " ",
                    "-import json",
                    "-import re",
                    "-import time",
                    "-from dataclasses import dataclass",
                    "-from enum import Enum",
                    "+# Re-export alles aus dem knowledge Package",
                    "+from knowledge import (",
                    "+    # Models",
                    "+    KnowledgeLevel,",
                    "+    KnowledgeType,",
                    "+    ModelConfig,",
                    "+    DEFAULT_MODELS,",
                    "+    # Main Classes",
                    "+    KnowledgeExtractor,",
                    "+    LLMService,",
                    "+    KnowledgeStorage,",
                    "+    # Extractors",
                    "+    EntityExtractor,",
                    "+    SemanticExtractor,",
                    "+    OntologyExtractor,",
                    "+    TaxonomyExtractor,",
                    "+    # Functions",
                    "+    store_knowledge,",
                    "+    get_model_config,",
                    "+    process_document_knowledge,",
                    "+)",
                    " ",
                    "-import requests",
                    "-",
                    "-from config import ANTHROPIC_API_KEY, OLLAMA_HOST",
                    " from db import db",
                    " ",
                    "+__all__ = [",
                    "+    \"KnowledgeLevel\",",
                    "+    \"KnowledgeType\",",
                    "+    \"ModelConfig\",",
                    "+    \"DEFAULT_MODELS\",",
                    "+    \"KnowledgeExtractor\",",
                    "+    \"LLMService\",",
                    "+    \"KnowledgeStorage\",",
                    "+    \"EntityExtractor\",",
                    "+    \"SemanticExtractor\",",
                    "+    \"OntologyExtractor\",",
                    "+    \"TaxonomyExtractor\",",
                    "+    \"store_knowledge\",",
                    "+    \"get_model_config\",",
                    "+    \"process_document_knowledge\",",
                    "+]",
                    " ",
                    "-class KnowledgeLevel(Enum):",
                    "-    \"\"\"Ebene der Wissensextraktion.\"\"\"",
                    " ",
                    "-    PAGE = \"page\"",
                    "-    SECTION = \"section\"",
                    "-    DOCUMENT = \"document\"",
                    "-",
                    "-",
                    "-class KnowledgeType(Enum):",
                    "-    \"\"\"Typ des extrahierten Wissens.\"\"\"",
                    "-",
                    "-    ENTITY = \"entity\"",
                    "-    SEMANTIC = \"semantic\"",
                    "-    ONTOLOGY = \"ontology\"",
                    "-    TAXONOMY = \"taxonomy\"",
                    "-",
                    "-",
                    "-@dataclass",
                    "-class ModelConfig:",
                    "-    \"\"\"Konfiguration für LLM-Modell.\"\"\"",
                    "-",
                    "-    provider: str  # 'ollama' oder 'anthropic'",
                    "-    model_name: str",
                    "-    temperature: float = 0.3",
                    "-    max_tokens: int = 2000",
                    "-",
                    "-",
                    "-# Standard-Modellkonfigurationen",
                    "-DEFAULT_MODELS = {",
                    "-    \"ollama\": ModelConfig(\"ollama\", \"gemma3:27b-it-qat\"),",
                    "-    \"anthropic\": ModelConfig(\"anthropic\", \"claude-3-haiku-20240307\"),",
                    "-    \"anthropic_opus\": ModelConfig(\"anthropic\", \"claude-opus-4-5-20251101\"),",
                    "-}",
                    "-",
                    "-",
                    "-class KnowledgeExtractor:",
                    "-    \"\"\"",
                    "-    Modulare Wissensextraktion mit Datenbankabgleich.",
                    "-",
                    "-    Verwendung:",
                    "-        extractor = KnowledgeExtractor(model_config)",
                    "-",
                    "-        # Pro Seite",
                    "-        entities = extractor.extract_entities(text, KnowledgeLevel.PAGE, page_id)",
                    "-        semantics = extractor.extract_semantics(entities, text, KnowledgeLevel.PAGE, page_id)",
                    "-        ontology = extractor.extract_ontology(entities, text, KnowledgeLevel.PAGE, page_id)",
                    "-        taxonomy = extractor.extract_taxonomy(entities, text, KnowledgeLevel.PAGE, page_id)",
                    "-    \"\"\"",
                    "-",
                    "-    def __init__(self, model_config: ModelConfig | None = None):",
                    "-        \"\"\"Initialisiere Extractor mit Modellkonfiguration.\"\"\"",
                    "-        self.model = model_config or DEFAULT_MODELS[\"ollama\"]",
                    "-        self.anthropic_client = None",
                    "-",
                    "-        if self.model.provider == \"anthropic\":",
                    "-            self._init_anthropic()",
                    "-",
                    "-    def _init_anthropic(self):",
                    "-        \"\"\"Initialisiere Anthropic Client.\"\"\"",
                    "-        try:",
                    "-            import anthropic",
                    "-",
                    "-            if ANTHROPIC_API_KEY:",
                    "-                self.anthropic_client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)",
                    "-        except ImportError:",
                    "-            db.log(\"WARNING\", \"Anthropic SDK nicht installiert, fallback zu Ollama\")",
                    "-            self.model = DEFAULT_MODELS[\"ollama\"]",
                    "-",
                    "-    def _call_llm(self, prompt: str, json_output: bool = True) -> str:",
                    "-        \"\"\"Rufe LLM auf und gib Antwort zurück.\"\"\"",
                    "-        start_time = time.time()",
                    "-",
                    "-        try:",
                    "-            if self.model.provider == \"anthropic\" and self.anthropic_client:",
                    "-                response = self.anthropic_client.messages.create(",
                    "-                    model=self.model.model_name,",
                    "-                    max_tokens=self.model.max_tokens,",
                    "-                    temperature=self.model.temperature,",
                    "-                    messages=[{\"role\": \"user\", \"content\": prompt}],",
                    "-                )",
                    "-                result = response.content[0].text",
                    "-                tokens_in = response.usage.input_tokens",
                    "-                tokens_out = response.usage.output_tokens",
                    "-            else:",
                    "-                # Ollama",
                    "-                payload = {",
                    "-                    \"model\": self.model.model_name,",
                    "-                    \"prompt\": prompt,",
                    "-                    \"stream\": False,",
                    "-                    \"options\": {\"temperature\": self.model.temperature},",
                    "-                }",
                    "-                if json_output:",
                    "-                    payload[\"format\"] = \"json\"",
                    "-",
                    "-                resp = requests.post(f\"{OLLAMA_HOST}\/api\/generate\", json=payload, timeout=600)",
                    "-                resp.raise_for_status()",
                    "-                data = resp.json()",
                    "-                result = data.get(\"response\", \"\")",
                    "-                tokens_in = data.get(\"prompt_eval_count\", 0)",
                    "-                tokens_out = data.get(\"eval_count\", 0)",
                    "-",
                    "-            duration_ms = int((time.time() - start_time) * 1000)",
                    "-",
                    "-            # Protokolliere LLM-Aufruf",
                    "-            db.log_to_protokoll(",
                    "-                client_name=\"pipeline-knowledge\",",
                    "-                request=prompt[:500],",
                    "-                response=result[:500],",
                    "-                model_name=f\"{self.model.provider}:{self.model.model_name}\",",
                    "-                tokens_input=tokens_in,",
                    "-                tokens_output=tokens_out,",
                    "-                duration_ms=duration_ms,",
                    "-                status=\"completed\",",
                    "-            )",
                    "-",
                    "-            return result",
                    "-",
                    "-        except Exception as e:",
                    "-            db.log(\"ERROR\", f\"LLM-Aufruf fehlgeschlagen: {e}\")",
                    "-            return \"{}\"",
                    "-",
                    "-    def _parse_json(self, text: str) -> dict:",
                    "-        \"\"\"Extrahiere JSON aus LLM-Antwort.\"\"\"",
                    "-        try:",
                    "-            # Versuche direkt zu parsen",
                    "-            return json.loads(text)",
                    "-        except json.JSONDecodeError:",
                    "-            # Suche nach JSON-Block",
                    "-            match = re.search(r\"\\{[\\s\\S]*\\}\", text)",
                    "-            if match:",
                    "-                try:",
                    "-                    return json.loads(match.group())",
                    "-                except json.JSONDecodeError:",
                    "-                    pass",
                    "-        return {}",
                    "-",
                    "-    # =========================================================================",
                    "-    # ENTITÄTEN",
                    "-    # =========================================================================",
                    "-",
                    "-    def extract_entities(self, text: str, level: KnowledgeLevel, source_id: int) -> list[dict]:",
                    "-        \"\"\"",
                    "-        Extrahiere Entitäten aus Text.",
                    "-",
                    "-        Args:",
                    "-            text: Eingabetext",
                    "-            level: Ebene (PAGE, SECTION, DOCUMENT)",
                    "-            source_id: ID der Quelle (page_id, section_id, document_id)",
                    "-",
                    "-        Returns:",
                    "-            Liste von Entitäten mit DB-IDs",
                    "-        \"\"\"",
                    "-        prompt = f\"\"\"Analysiere den folgenden deutschen Text und extrahiere alle wichtigen Entitäten.",
                    "-",
                    "-Kategorien:",
                    "-- PERSON: Namen von Personen, Autoren, Therapeuten",
                    "-- ORGANIZATION: Firmen, Institute, Verbände",
                    "-- CONCEPT: Fachbegriffe, Theorien, Modelle",
                    "-- METHOD: Methoden, Techniken, Verfahren",
                    "-- TOOL: Werkzeuge, Instrumente, Materialien",
                    "-- LOCATION: Orte, Länder, Regionen",
                    "-- EVENT: Ereignisse, Konferenzen",
                    "-",
                    "-Antworte NUR als JSON:",
                    "-{{\"entities\": [",
                    "-    {{\"name\": \"...\", \"type\": \"CONCEPT\", \"context\": \"kurzer Kontext\", \"importance\": 0.0-1.0}}",
                    "-]}}",
                    "-",
                    "-Text ({level.value}-Ebene):",
                    "-{text[:4000]}\"\"\"",
                    "-",
                    "-        result = self._call_llm(prompt)",
                    "-        data = self._parse_json(result)",
                    "-        entities = data.get(\"entities\", [])",
                    "-",
                    "-        # Speichere und gleiche mit DB ab",
                    "-        stored_entities = []",
                    "-        for entity in entities:",
                    "-            stored = self._store_entity(entity, level, source_id)",
                    "-            if stored:",
                    "-                stored_entities.append(stored)",
                    "-",
                    "-        # Speichere in page_knowledge\/section_knowledge\/document_knowledge",
                    "-        self._store_knowledge(",
                    "-            level,",
                    "-            source_id,",
                    "-            KnowledgeType.ENTITY,",
                    "-            {\"entities\": [e[\"name\"] for e in stored_entities], \"count\": len(stored_entities)},",
                    "-        )",
                    "-",
                    "-        return stored_entities",
                    "-",
                    "-    def _store_entity(self, entity: dict, level: KnowledgeLevel, source_id: int) -> dict | None:",
                    "-        \"\"\"Speichere Entität in DB mit Duplikatprüfung.\"\"\"",
                    "-        try:",
                    "-            name = entity.get(\"name\", \"\").strip()",
                    "-            entity_type = entity.get(\"type\", \"OTHER\").upper()",
                    "-            context = entity.get(\"context\", \"\")",
                    "-",
                    "-            if not name:",
                    "-                return None",
                    "-",
                    "-            # Prüfe ob Entität existiert (case-insensitive)",
                    "-            cursor = db.execute(",
                    "-                \"\"\"SELECT id, name, type, description",
                    "-                   FROM entities",
                    "-                   WHERE LOWER(name) = LOWER(%s) OR LOWER(canonical_name) = LOWER(%s)",
                    "-                   LIMIT 1\"\"\",",
                    "-                (name, name),",
                    "-            )",
                    "-            existing = cursor.fetchone()",
                    "-            cursor.close()",
                    "-",
                    "-            if existing:",
                    "-                entity_id = existing[\"id\"]",
                    "-                # Entität existiert - verwende bestehende",
                    "-                db.log(\"DEBUG\", f\"Entität '{name}' existiert bereits (ID: {entity_id})\")",
                    "-            else:",
                    "-                # Neue Entität anlegen",
                    "-                cursor = db.execute(",
                    "-                    \"\"\"INSERT INTO entities (name, type, canonical_name, created_at)",
                    "-                       VALUES (%s, %s, %s, NOW())\"\"\",",
                    "-                    (name, entity_type, name.lower()),",
                    "-                )",
                    "-                db.commit()",
                    "-                entity_id = cursor.lastrowid",
                    "-                cursor.close()",
                    "-                db.log(\"INFO\", f\"Neue Entität angelegt: '{name}' (ID: {entity_id})\")",
                    "-",
                    "-            return {\"id\": entity_id, \"name\": name, \"type\": entity_type, \"context\": context, \"is_new\": existing is None}",
                    "-",
                    "-        except Exception as e:",
                    "-            db.log(\"ERROR\", f\"Fehler beim Speichern der Entität: {e}\")",
                    "-            return None",
                    "-",
                    "-    # =========================================================================",
                    "-    # SEMANTIK (Bedeutung\/Definition\/Referenz)",
                    "-    # =========================================================================",
                    "-",
                    "-    def extract_semantics(self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int) -> list[dict]:",
                    "-        \"\"\"",
                    "-        Extrahiere Semantik (Bedeutung\/Definition) für Entitäten.",
                    "-",
                    "-        Args:",
                    "-            entities: Liste der extrahierten Entitäten",
                    "-            text: Ursprungstext für Kontext",
                    "-            level: Ebene",
                    "-            source_id: Quell-ID",
                    "-",
                    "-        Returns:",
                    "-            Liste von Semantik-Einträgen",
                    "-        \"\"\"",
                    "-        if not entities:",
                    "-            return []",
                    "-",
                    "-        entity_names = [e[\"name\"] for e in entities[:15]]",
                    "-",
                    "-        prompt = f\"\"\"Für die folgenden Entitäten aus dem Text, extrahiere die Bedeutung\/Definition.",
                    "-",
                    "-Entitäten: {\", \".join(entity_names)}",
                    "-",
                    "-Für jede Entität gib an:",
                    "-- definition: Kurze Definition basierend auf dem Text",
                    "-- context: In welchem Kontext wird sie verwendet",
                    "-- references: Bezüge zu anderen Konzepten (falls erkennbar)",
                    "-",
                    "-Antworte NUR als JSON:",
                    "-{{\"semantics\": [",
                    "-    {{\"entity\": \"...\", \"definition\": \"...\", \"context\": \"...\", \"references\": [\"...\"]}}",
                    "-]}}",
                    "-",
                    "-Text:",
                    "-{text[:3000]}\"\"\"",
                    "-",
                    "-        result = self._call_llm(prompt)",
                    "-        data = self._parse_json(result)",
                    "-        semantics = data.get(\"semantics\", [])",
                    "-",
                    "-        # Speichere Semantik",
                    "-        stored = []",
                    "-        for sem in semantics:",
                    "-            entity_name = sem.get(\"entity\", \"\")",
                    "-            # Finde Entity-ID",
                    "-            entity_match = next((e for e in entities if e[\"name\"].lower() == entity_name.lower()), None)",
                    "-            if entity_match:",
                    "-                stored_sem = self._store_semantic(",
                    "-                    entity_id=entity_match[\"id\"],",
                    "-                    definition=sem.get(\"definition\", \"\"),",
                    "-                    context=sem.get(\"context\", \"\"),",
                    "-                    references=sem.get(\"references\", []),",
                    "-                    level=level,",
                    "-                    source_id=source_id,",
                    "-                )",
                    "-                if stored_sem:",
                    "-                    stored.append(stored_sem)",
                    "-",
                    "-        # Speichere in Knowledge-Tabelle",
                    "-        self._store_knowledge(",
                    "-            level,",
                    "-            source_id,",
                    "-            KnowledgeType.SEMANTIC,",
                    "-            {\"definitions\": len(stored), \"entities\": [s[\"entity_name\"] for s in stored]},",
                    "-        )",
                    "-",
                    "-        return stored",
                    "-",
                    "-    def _store_semantic(",
                    "-        self, entity_id: int, definition: str, context: str, references: list, level: KnowledgeLevel, source_id: int",
                    "-    ) -> dict | None:",
                    "-        \"\"\"Speichere Semantik-Eintrag mit Abgleich.\"\"\"",
                    "-        try:",
                    "-            # Prüfe ob bereits Semantik existiert",
                    "-            cursor = db.execute(",
                    "-                \"\"\"SELECT id, definition FROM entity_semantics",
                    "-                   WHERE entity_id = %s AND source_type = %s AND source_id = %s\"\"\",",
                    "-                (entity_id, level.value, source_id),",
                    "-            )",
                    "-            existing = cursor.fetchone()",
                    "-            cursor.close()",
                    "-",
                    "-            if existing:",
                    "-                # Prüfe ob Definition abweicht",
                    "-                if existing[\"definition\"] != definition:",
                    "-                    # Abweichende Definition - als zusätzliche Perspektive speichern",
                    "-                    db.log(\"INFO\", f\"Abweichende Definition für Entität {entity_id}, füge hinzu\")",
                    "-                    cursor = db.execute(",
                    "-                        \"\"\"INSERT INTO entity_semantics",
                    "-                           (entity_id, definition, context, references_json,",
                    "-                            source_type, source_id, model_used, created_at)",
                    "-                           VALUES (%s, %s, %s, %s, %s, %s, %s, NOW())\"\"\",",
                    "-                        (",
                    "-                            entity_id,",
                    "-                            definition,",
                    "-                            context,",
                    "-                            json.dumps(references),",
                    "-                            level.value,",
                    "-                            source_id,",
                    "-                            f\"{self.model.provider}:{self.model.model_name}\",",
                    "-                        ),",
                    "-                    )",
                    "-                    db.commit()",
                    "-                    sem_id = cursor.lastrowid",
                    "-                    cursor.close()",
                    "-                else:",
                    "-                    sem_id = existing[\"id\"]",
                    "-            else:",
                    "-                # Neue Semantik",
                    "-                cursor = db.execute(",
                    "-                    \"\"\"INSERT INTO entity_semantics",
                    "-                       (entity_id, definition, context, references_json,",
                    "-                        source_type, source_id, model_used, created_at)",
                    "-                       VALUES (%s, %s, %s, %s, %s, %s, %s, NOW())\"\"\",",
                    "-                    (",
                    "-                        entity_id,",
                    "-                        definition,",
                    "-                        context,",
                    "-                        json.dumps(references),",
                    "-                        level.value,",
                    "-                        source_id,",
                    "-                        f\"{self.model.provider}:{self.model.model_name}\",",
                    "-                    ),",
                    "-                )",
                    "-                db.commit()",
                    "-                sem_id = cursor.lastrowid",
                    "-                cursor.close()",
                    "-",
                    "-            # Hole Entity-Name für Rückgabe",
                    "-            cursor = db.execute(\"SELECT name FROM entities WHERE id = %s\", (entity_id,))",
                    "-            entity = cursor.fetchone()",
                    "-            cursor.close()",
                    "-",
                    "-            return {",
                    "-                \"id\": sem_id,",
                    "-                \"entity_id\": entity_id,",
                    "-                \"entity_name\": entity[\"name\"] if entity else \"\",",
                    "-                \"definition\": definition,",
                    "-            }",
                    "-",
                    "-        except Exception as e:",
                    "-            db.log(\"ERROR\", f\"Fehler beim Speichern der Semantik: {e}\")",
                    "-            return None",
                    "-",
                    "-    # =========================================================================",
                    "-    # ONTOLOGIE (Wechselwirkungen)",
                    "-    # =========================================================================",
                    "-",
                    "-    def extract_ontology(self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int) -> list[dict]:",
                    "-        \"\"\"",
                    "-        Extrahiere Ontologie (Wechselwirkungen) zwischen Entitäten.",
                    "-",
                    "-        Args:",
                    "-            entities: Liste der Entitäten",
                    "-            text: Ursprungstext",
                    "-            level: Ebene",
                    "-            source_id: Quell-ID",
                    "-",
                    "-        Returns:",
                    "-            Liste von Ontologie-Beziehungen",
                    "-        \"\"\"",
                    "-        if len(entities) < 2:",
                    "-            return []",
                    "-",
                    "-        entity_names = [e[\"name\"] for e in entities[:20]]",
                    "-",
                    "-        prompt = f\"\"\"Analysiere die Wechselwirkungen zwischen den folgenden Entitäten im Text.",
                    "-",
                    "-Entitäten: {\", \".join(entity_names)}",
                    "-",
                    "-Beziehungstypen:",
                    "-- CAUSES: A verursacht\/bewirkt B",
                    "-- REQUIRES: A benötigt\/erfordert B",
                    "-- INFLUENCES: A beeinflusst B",
                    "-- ENABLES: A ermöglicht B",
                    "-- CONTRADICTS: A widerspricht B",
                    "-- PART_OF: A ist Teil von B",
                    "-- INSTANCE_OF: A ist Instanz von B",
                    "-- USES: A verwendet B",
                    "-",
                    "-Antworte NUR als JSON:",
                    "-{{\"relations\": [",
                    "-    {{\"source\": \"...\", \"target\": \"...\", \"type\": \"CAUSES\", \"description\": \"...\", \"strength\": 0.0-1.0, \"bidirectional\": false}}",
                    "-]}}",
                    "-",
                    "-Text:",
                    "-{text[:3000]}\"\"\"",
                    "-",
                    "-        result = self._call_llm(prompt)",
                    "-        data = self._parse_json(result)",
                    "-        relations = data.get(\"relations\", [])",
                    "-",
                    "-        # Speichere Ontologie-Beziehungen",
                    "-        stored = []",
                    "-        for rel in relations:",
                    "-            source_entity = next((e for e in entities if e[\"name\"].lower() == rel.get(\"source\", \"\").lower()), None)",
                    "-            target_entity = next((e for e in entities if e[\"name\"].lower() == rel.get(\"target\", \"\").lower()), None)",
                    "-",
                    "-            if source_entity and target_entity:",
                    "-                stored_rel = self._store_ontology(",
                    "-                    source_id=source_entity[\"id\"],",
                    "-                    target_id=target_entity[\"id\"],",
                    "-                    relation_type=rel.get(\"type\", \"RELATED_TO\"),",
                    "-                    description=rel.get(\"description\", \"\"),",
                    "-                    strength=rel.get(\"strength\", 1.0),",
                    "-                    bidirectional=rel.get(\"bidirectional\", False),",
                    "-                    level=level,",
                    "-                    knowledge_source_id=source_id,",
                    "-                )",
                    "-                if stored_rel:",
                    "-                    stored.append(stored_rel)",
                    "-",
                    "-        # Speichere in Knowledge-Tabelle",
                    "-        self._store_knowledge(",
                    "-            level,",
                    "-            source_id,",
                    "-            KnowledgeType.ONTOLOGY,",
                    "-            {\"relations\": len(stored), \"types\": list({r[\"type\"] for r in stored})},",
                    "-        )",
                    "-",
                    "-        return stored",
                    "-",
                    "-    def _store_ontology(",
                    "-        self,",
                    "-        source_id: int,",
                    "-        target_id: int,",
                    "-        relation_type: str,",
                    "-        description: str,",
                    "-        strength: float,",
                    "-        bidirectional: bool,",
                    "-        level: KnowledgeLevel,",
                    "-        knowledge_source_id: int,",
                    "-    ) -> dict | None:",
                    "-        \"\"\"Speichere Ontologie-Beziehung.\"\"\"",
                    "-        try:",
                    "-            # Prüfe ob Beziehung existiert",
                    "-            cursor = db.execute(",
                    "-                \"\"\"SELECT id FROM entity_ontology",
                    "-                   WHERE source_entity_id = %s AND target_entity_id = %s AND relation_type = %s\"\"\",",
                    "-                (source_id, target_id, relation_type),",
                    "-            )",
                    "-            existing = cursor.fetchone()",
                    "-            cursor.close()",
                    "-",
                    "-            if existing:",
                    "-                rel_id = existing[\"id\"]",
                    "-            else:",
                    "-                cursor = db.execute(",
                    "-                    \"\"\"INSERT INTO entity_ontology",
                    "-                       (source_entity_id, target_entity_id, relation_type, direction,",
                    "-                        strength, description, source_type, source_id, model_used, created_at)",
                    "-                       VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())\"\"\",",
                    "-                    (",
                    "-                        source_id,",
                    "-                        target_id,",
                    "-                        relation_type,",
                    "-                        \"bidirectional\" if bidirectional else \"unidirectional\",",
                    "-                        strength,",
                    "-                        description,",
                    "-                        level.value,",
                    "-                        knowledge_source_id,",
                    "-                        f\"{self.model.provider}:{self.model.model_name}\",",
                    "-                    ),",
                    "-                )",
                    "-                db.commit()",
                    "-                rel_id = cursor.lastrowid",
                    "-                cursor.close()",
                    "-",
                    "-            return {",
                    "-                \"id\": rel_id,",
                    "-                \"source_id\": source_id,",
                    "-                \"target_id\": target_id,",
                    "-                \"type\": relation_type,",
                    "-                \"strength\": strength,",
                    "-            }",
                    "-",
                    "-        except Exception as e:",
                    "-            db.log(\"ERROR\", f\"Fehler beim Speichern der Ontologie: {e}\")",
                    "-            return None",
                    "-",
                    "-    # =========================================================================",
                    "-    # TAXONOMIE (Hierarchische Einordnung)",
                    "-    # =========================================================================",
                    "-",
                    "-    def extract_taxonomy(self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int) -> list[dict]:",
                    "-        \"\"\"",
                    "-        Extrahiere Taxonomie (hierarchische Einordnung) für Entitäten.",
                    "-",
                    "-        Args:",
                    "-            entities: Liste der Entitäten",
                    "-            text: Ursprungstext",
                    "-            level: Ebene",
                    "-            source_id: Quell-ID",
                    "-",
                    "-        Returns:",
                    "-            Liste von Taxonomie-Zuordnungen",
                    "-        \"\"\"",
                    "-        if not entities:",
                    "-            return []",
                    "-",
                    "-        # Hole existierende Taxonomie-Terme",
                    "-        cursor = db.execute(\"SELECT id, name, path, depth FROM taxonomy_terms ORDER BY depth, name\")",
                    "-        existing_terms = cursor.fetchall()",
                    "-        cursor.close()",
                    "-",
                    "-        term_names = [t[\"name\"] for t in existing_terms]",
                    "-        entity_names = [e[\"name\"] for e in entities[:15]]",
                    "-",
                    "-        prompt = f\"\"\"Ordne die folgenden Entitäten in eine hierarchische Taxonomie ein.",
                    "-",
                    "-Entitäten: {\", \".join(entity_names)}",
                    "-",
                    "-Existierende Taxonomie-Kategorien: {\", \".join(term_names) if term_names else \"Keine vorhanden\"}",
                    "-",
                    "-Aufgabe:",
                    "-1. Ordne jede Entität einer passenden Kategorie zu",
                    "-2. Wenn keine passende Kategorie existiert, schlage eine neue vor",
                    "-3. Gib die hierarchische Einordnung an",
                    "-",
                    "-Antworte NUR als JSON:",
                    "-{{\"mappings\": [",
                    "-    {{\"entity\": \"...\", \"category\": \"...\", \"parent_category\": null, \"confidence\": 0.0-1.0, \"is_new_category\": false}}",
                    "-]}}",
                    "-",
                    "-Text-Kontext:",
                    "-{text[:2000]}\"\"\"",
                    "-",
                    "-        result = self._call_llm(prompt)",
                    "-        data = self._parse_json(result)",
                    "-        mappings = data.get(\"mappings\", [])",
                    "-",
                    "-        # Speichere Taxonomie-Zuordnungen",
                    "-        stored = []",
                    "-        for mapping in mappings:",
                    "-            entity_match = next((e for e in entities if e[\"name\"].lower() == mapping.get(\"entity\", \"\").lower()), None)",
                    "-            if entity_match:",
                    "-                stored_mapping = self._store_taxonomy_mapping(",
                    "-                    entity_id=entity_match[\"id\"],",
                    "-                    category_name=mapping.get(\"category\", \"\"),",
                    "-                    parent_category=mapping.get(\"parent_category\"),",
                    "-                    confidence=mapping.get(\"confidence\", 0.8),",
                    "-                    is_new=mapping.get(\"is_new_category\", False),",
                    "-                    existing_terms=existing_terms,",
                    "-                    level=level,",
                    "-                    source_id=source_id,",
                    "-                )",
                    "-                if stored_mapping:",
                    "-                    stored.append(stored_mapping)",
                    "-",
                    "-        # Speichere in Knowledge-Tabelle",
                    "-        self._store_knowledge(",
                    "-            level,",
                    "-            source_id,",
                    "-            KnowledgeType.TAXONOMY,",
                    "-            {\"mappings\": len(stored), \"categories\": list({m[\"category\"] for m in stored})},",
                    "-        )",
                    "-",
                    "-        return stored",
                    "-",
                    "-    def _store_taxonomy_mapping(",
                    "-        self,",
                    "-        entity_id: int,",
                    "-        category_name: str,",
                    "-        parent_category: str | None,",
                    "-        confidence: float,",
                    "-        is_new: bool,",
                    "-        existing_terms: list,",
                    "-        level: KnowledgeLevel,",
                    "-        source_id: int,",
                    "-    ) -> dict | None:",
                    "-        \"\"\"Speichere Taxonomie-Zuordnung.\"\"\"",
                    "-        try:",
                    "-            # Finde oder erstelle Taxonomie-Term",
                    "-            term = next((t for t in existing_terms if t[\"name\"].lower() == category_name.lower()), None)",
                    "-",
                    "-            if term:",
                    "-                term_id = term[\"id\"]",
                    "-            elif is_new:",
                    "-                # Neuen Term anlegen",
                    "-                parent_id = None",
                    "-                depth = 0",
                    "-                path = f\"\/{category_name}\"",
                    "-",
                    "-                if parent_category:",
                    "-                    parent_term = next(",
                    "-                        (t for t in existing_terms if t[\"name\"].lower() == parent_category.lower()), None",
                    "-                    )",
                    "-                    if parent_term:",
                    "-                        parent_id = parent_term[\"id\"]",
                    "-                        depth = parent_term[\"depth\"] + 1",
                    "-                        path = f\"{parent_term['path']}\/{category_name}\"",
                    "-",
                    "-                # Erstelle Slug",
                    "-                slug = re.sub(r\"[^a-z0-9]+\", \"-\", category_name.lower()).strip(\"-\")",
                    "-",
                    "-                cursor = db.execute(",
                    "-                    \"\"\"INSERT INTO taxonomy_terms (name, slug, parent_id, depth, path, created_at)",
                    "-                       VALUES (%s, %s, %s, %s, %s, NOW())\"\"\",",
                    "-                    (category_name, slug, parent_id, depth, path),",
                    "-                )",
                    "-                db.commit()",
                    "-                term_id = cursor.lastrowid",
                    "-                cursor.close()",
                    "-                db.log(\"INFO\", f\"Neuer Taxonomie-Term: '{category_name}' (ID: {term_id})\")",
                    "-            else:",
                    "-                # Kategorie existiert nicht und soll nicht neu erstellt werden",
                    "-                return None",
                    "-",
                    "-            # Speichere Zuordnung",
                    "-            cursor = db.execute(",
                    "-                \"\"\"INSERT INTO entity_taxonomy_mapping",
                    "-                   (entity_id, taxonomy_term_id, confidence, source_type, source_id, model_used, created_at)",
                    "-                   VALUES (%s, %s, %s, %s, %s, %s, NOW())",
                    "-                   ON DUPLICATE KEY UPDATE confidence = VALUES(confidence)\"\"\",",
                    "-                (",
                    "-                    entity_id,",
                    "-                    term_id,",
                    "-                    confidence,",
                    "-                    level.value,",
                    "-                    source_id,",
                    "-                    f\"{self.model.provider}:{self.model.model_name}\",",
                    "-                ),",
                    "-            )",
                    "-            db.commit()",
                    "-            cursor.close()",
                    "-",
                    "-            return {\"entity_id\": entity_id, \"term_id\": term_id, \"category\": category_name, \"confidence\": confidence}",
                    "-",
                    "-        except Exception as e:",
                    "-            db.log(\"ERROR\", f\"Fehler beim Speichern der Taxonomie: {e}\")",
                    "-            return None",
                    "-",
                    "-    # =========================================================================",
                    "-    # KNOWLEDGE STORAGE",
                    "-    # =========================================================================",
                    "-",
                    "-    def _store_knowledge(self, level: KnowledgeLevel, source_id: int, knowledge_type: KnowledgeType, data: dict):",
                    "-        \"\"\"Speichere Wissen in der entsprechenden Tabelle.\"\"\"",
                    "-        table_map = {",
                    "-            KnowledgeLevel.PAGE: \"page_knowledge\",",
                    "-            KnowledgeLevel.SECTION: \"section_knowledge\",",
                    "-            KnowledgeLevel.DOCUMENT: \"document_knowledge\",",
                    "-        }",
                    "-",
                    "-        id_field_map = {",
                    "-            KnowledgeLevel.PAGE: \"page_id\",",
                    "-            KnowledgeLevel.SECTION: \"section_id\",",
                    "-            KnowledgeLevel.DOCUMENT: \"document_id\",",
                    "-        }",
                    "-",
                    "-        table = table_map[level]",
                    "-        id_field = id_field_map[level]",
                    "-",
                    "-        try:",
                    "-            cursor = db.execute(",
                    "-                f\"\"\"INSERT INTO {table} ({id_field}, knowledge_type, data, model_used, created_at)",
                    "-                    VALUES (%s, %s, %s, %s, NOW())\"\"\",  # noqa: S608 - table name is controlled",
                    "-                (source_id, knowledge_type.value, json.dumps(data), f\"{self.model.provider}:{self.model.model_name}\"),",
                    "-            )",
                    "-            db.commit()",
                    "-            cursor.close()",
                    "-        except Exception as e:",
                    "-            db.log(\"ERROR\", f\"Fehler beim Speichern in {table}: {e}\")",
                    "-",
                    "-    # =========================================================================",
                    "-    # KOMPLETTE ANALYSE",
                    "-    # =========================================================================",
                    "-",
                    "-    def analyze_page(self, page_id: int, text: str) -> dict:",
                    "-        \"\"\"",
                    "-        Vollständige Wissensanalyse für eine Seite.",
                    "-",
                    "-        Reihenfolge: Entitäten → Semantik → Ontologie → Taxonomie",
                    "-        \"\"\"",
                    "-        db.log(\"INFO\", f\"Starte Seitenanalyse für page_id={page_id}\")",
                    "-",
                    "-        # 1. Entitäten",
                    "-        entities = self.extract_entities(text, KnowledgeLevel.PAGE, page_id)",
                    "-",
                    "-        # 2. Semantik",
                    "-        semantics = self.extract_semantics(entities, text, KnowledgeLevel.PAGE, page_id)",
                    "-",
                    "-        # 3. Ontologie",
                    "-        ontology = self.extract_ontology(entities, text, KnowledgeLevel.PAGE, page_id)",
                    "-",
                    "-        # 4. Taxonomie",
                    "-        taxonomy = self.extract_taxonomy(entities, text, KnowledgeLevel.PAGE, page_id)",
                    "-",
                    "-        return {",
                    "-            \"page_id\": page_id,",
                    "-            \"entities\": len(entities),",
                    "-            \"semantics\": len(semantics),",
                    "-            \"ontology\": len(ontology),",
                    "-            \"taxonomy\": len(taxonomy),",
                    "-        }",
                    "-",
                    "-    def analyze_section(self, section_id: int, text: str) -> dict:",
                    "-        \"\"\"Vollständige Wissensanalyse für einen Abschnitt.\"\"\"",
                    "-        db.log(\"INFO\", f\"Starte Abschnittsanalyse für section_id={section_id}\")",
                    "-",
                    "-        entities = self.extract_entities(text, KnowledgeLevel.SECTION, section_id)",
                    "-        semantics = self.extract_semantics(entities, text, KnowledgeLevel.SECTION, section_id)",
                    "-        ontology = self.extract_ontology(entities, text, KnowledgeLevel.SECTION, section_id)",
                    "-        taxonomy = self.extract_taxonomy(entities, text, KnowledgeLevel.SECTION, section_id)",
                    "-",
                    "-        return {",
                    "-            \"section_id\": section_id,",
                    "-            \"entities\": len(entities),",
                    "-            \"semantics\": len(semantics),",
                    "-            \"ontology\": len(ontology),",
                    "-            \"taxonomy\": len(taxonomy),",
                    "-        }",
                    "-",
                    "-    def analyze_document(self, document_id: int, text: str) -> dict:",
                    "-        \"\"\"Vollständige Wissensanalyse für ein Dokument (konsolidiert).\"\"\"",
                    "-        db.log(\"INFO\", f\"Starte Dokumentanalyse für document_id={document_id}\")",
                    "-",
                    "-        entities = self.extract_entities(text, KnowledgeLevel.DOCUMENT, document_id)",
                    "-        semantics = self.extract_semantics(entities, text, KnowledgeLevel.DOCUMENT, document_id)",
                    "-        ontology = self.extract_ontology(entities, text, KnowledgeLevel.DOCUMENT, document_id)",
                    "-        taxonomy = self.extract_taxonomy(entities, text, KnowledgeLevel.DOCUMENT, document_id)",
                    "-",
                    "-        return {",
                    "-            \"document_id\": document_id,",
                    "-            \"entities\": len(entities),",
                    "-            \"semantics\": len(semantics),",
                    "-            \"ontology\": len(ontology),",
                    "-            \"taxonomy\": len(taxonomy),",
                    "-        }",
                    "-",
                    "-",
                    "-# =========================================================================",
                    "-# UTILITY FUNCTIONS",
                    "-# =========================================================================",
                    "-",
                    "-",
                    "-def get_model_config(provider: str = \"ollama\", model_name: str | None = None) -> ModelConfig:",
                    "-    \"\"\"",
                    "-    Erstelle Modellkonfiguration basierend auf Provider.",
                    "-",
                    "-    Args:",
                    "-        provider: 'ollama' oder 'anthropic'",
                    "-        model_name: Optional spezifisches Modell",
                    "-",
                    "-    Returns:",
                    "-        ModelConfig für den Extractor",
                    "-    \"\"\"",
                    "-    if provider == \"anthropic\":",
                    "-        return ModelConfig(provider=\"anthropic\", model_name=model_name or \"claude-3-haiku-20240307\")",
                    "-    else:",
                    "-        return ModelConfig(provider=\"ollama\", model_name=model_name or \"gemma3:27b-it-qat\")",
                    "-",
                    "-",
                    "-def process_document_knowledge(document_id: int, provider: str = \"ollama\", model_name: str | None = None) -> dict:",
                    "-    \"\"\"",
                    "-    Verarbeite komplettes Dokument mit 3-Ebenen-Analyse.",
                    "-",
                    "-    1. Seiten-Ebene",
                    "-    2. Abschnitt-Ebene",
                    "-    3. Dokument-Ebene (konsolidiert)",
                    "-    \"\"\"",
                    "-    config = get_model_config(provider, model_name)",
                    "-    extractor = KnowledgeExtractor(config)",
                    "-",
                    "-    results = {\"document_id\": document_id, \"pages\": [], \"sections\": [], \"document\": None}",
                    "-",
                    "-    # 1. Seiten analysieren",
                    "-    cursor = db.execute(",
                    "-        \"SELECT id, text_content FROM document_pages WHERE document_id = %s ORDER BY page_number\", (document_id,)",
                    "-    )",
                    "-    pages = cursor.fetchall()",
                    "-    cursor.close()",
                    "-",
                    "-    for page in pages:",
                    "-        if page[\"text_content\"]:",
                    "-            page_result = extractor.analyze_page(page[\"id\"], page[\"text_content\"])",
                    "-            results[\"pages\"].append(page_result)",
                    "-",
                    "-    # 2. Abschnitte analysieren",
                    "-    cursor = db.execute(",
                    "-        \"SELECT id, content FROM document_sections WHERE document_id = %s ORDER BY sort_order\", (document_id,)",
                    "-    )",
                    "-    sections = cursor.fetchall()",
                    "-    cursor.close()",
                    "-",
                    "-    for section in sections:",
                    "-        if section[\"content\"]:",
                    "-            section_result = extractor.analyze_section(section[\"id\"], section[\"content\"])",
                    "-            results[\"sections\"].append(section_result)",
                    "-",
                    "-    # 3. Dokument-Ebene (Gesamttext)",
                    "-    cursor = db.execute(",
                    "-        \"SELECT GROUP_CONCAT(content SEPARATOR ' ') as full_text FROM chunks WHERE document_id = %s\", (document_id,)",
                    "-    )",
                    "-    doc = cursor.fetchone()",
                    "-    cursor.close()",
                    "-",
                    "-    if doc and doc[\"full_text\"]:",
                    "-        results[\"document\"] = extractor.analyze_document(document_id, doc[\"full_text\"][:10000])",
                    "-",
                    "-    return results",
                    "-",
                    "-",
                    "-# =========================================================================",
                    "-# CLI INTERFACE",
                    "-# =========================================================================",
                    "-",
                    "+# CLI Interface",
                    " if __name__ == \"__main__\":",
                    "     import sys",
                    " "
                ]
            }
        ],
        "originalFile": "\"\"\"\nModulares Wissensextraktions-System für 3-Ebenen-Analyse.\n\nEbenen:\n1. Seite (page): Detailanalyse pro Einzelseite\n2. Abschnitt (section): Aggregation pro Kapitel\/Unterkapitel\n3. Dokument (document): Konsolidierte Gesamtsicht\n\nWissenstypen:\n- Entitäten: Personen, Konzepte, Methoden, Organisationen\n- Semantik: Bedeutung, Definition, Referenzen\n- Ontologie: Wechselwirkungen zwischen Entitäten\n- Taxonomie: Hierarchische Einordnung\n\nAutor: KI-System Pipeline\n\"\"\"\n\nimport json\nimport re\nimport time\nfrom dataclasses import dataclass\nfrom enum import Enum\n\nimport requests\n\nfrom config import ANTHROPIC_API_KEY, OLLAMA_HOST\nfrom db import db\n\n\nclass KnowledgeLevel(Enum):\n    \"\"\"Ebene der Wissensextraktion.\"\"\"\n\n    PAGE = \"page\"\n    SECTION = \"section\"\n    DOCUMENT = \"document\"\n\n\nclass KnowledgeType(Enum):\n    \"\"\"Typ des extrahierten Wissens.\"\"\"\n\n    ENTITY = \"entity\"\n    SEMANTIC = \"semantic\"\n    ONTOLOGY = \"ontology\"\n    TAXONOMY = \"taxonomy\"\n\n\n@dataclass\nclass ModelConfig:\n    \"\"\"Konfiguration für LLM-Modell.\"\"\"\n\n    provider: str  # 'ollama' oder 'anthropic'\n    model_name: str\n    temperature: float = 0.3\n    max_tokens: int = 2000\n\n\n# Standard-Modellkonfigurationen\nDEFAULT_MODELS = {\n    \"ollama\": ModelConfig(\"ollama\", \"gemma3:27b-it-qat\"),\n    \"anthropic\": ModelConfig(\"anthropic\", \"claude-3-haiku-20240307\"),\n    \"anthropic_opus\": ModelConfig(\"anthropic\", \"claude-opus-4-5-20251101\"),\n}\n\n\nclass KnowledgeExtractor:\n    \"\"\"\n    Modulare Wissensextraktion mit Datenbankabgleich.\n\n    Verwendung:\n        extractor = KnowledgeExtractor(model_config)\n\n        # Pro Seite\n        entities = extractor.extract_entities(text, KnowledgeLevel.PAGE, page_id)\n        semantics = extractor.extract_semantics(entities, text, KnowledgeLevel.PAGE, page_id)\n        ontology = extractor.extract_ontology(entities, text, KnowledgeLevel.PAGE, page_id)\n        taxonomy = extractor.extract_taxonomy(entities, text, KnowledgeLevel.PAGE, page_id)\n    \"\"\"\n\n    def __init__(self, model_config: ModelConfig | None = None):\n        \"\"\"Initialisiere Extractor mit Modellkonfiguration.\"\"\"\n        self.model = model_config or DEFAULT_MODELS[\"ollama\"]\n        self.anthropic_client = None\n\n        if self.model.provider == \"anthropic\":\n            self._init_anthropic()\n\n    def _init_anthropic(self):\n        \"\"\"Initialisiere Anthropic Client.\"\"\"\n        try:\n            import anthropic\n\n            if ANTHROPIC_API_KEY:\n                self.anthropic_client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)\n        except ImportError:\n            db.log(\"WARNING\", \"Anthropic SDK nicht installiert, fallback zu Ollama\")\n            self.model = DEFAULT_MODELS[\"ollama\"]\n\n    def _call_llm(self, prompt: str, json_output: bool = True) -> str:\n        \"\"\"Rufe LLM auf und gib Antwort zurück.\"\"\"\n        start_time = time.time()\n\n        try:\n            if self.model.provider == \"anthropic\" and self.anthropic_client:\n                response = self.anthropic_client.messages.create(\n                    model=self.model.model_name,\n                    max_tokens=self.model.max_tokens,\n                    temperature=self.model.temperature,\n                    messages=[{\"role\": \"user\", \"content\": prompt}],\n                )\n                result = response.content[0].text\n                tokens_in = response.usage.input_tokens\n                tokens_out = response.usage.output_tokens\n            else:\n                # Ollama\n                payload = {\n                    \"model\": self.model.model_name,\n                    \"prompt\": prompt,\n                    \"stream\": False,\n                    \"options\": {\"temperature\": self.model.temperature},\n                }\n                if json_output:\n                    payload[\"format\"] = \"json\"\n\n                resp = requests.post(f\"{OLLAMA_HOST}\/api\/generate\", json=payload, timeout=600)\n                resp.raise_for_status()\n                data = resp.json()\n                result = data.get(\"response\", \"\")\n                tokens_in = data.get(\"prompt_eval_count\", 0)\n                tokens_out = data.get(\"eval_count\", 0)\n\n            duration_ms = int((time.time() - start_time) * 1000)\n\n            # Protokolliere LLM-Aufruf\n            db.log_to_protokoll(\n                client_name=\"pipeline-knowledge\",\n                request=prompt[:500],\n                response=result[:500],\n                model_name=f\"{self.model.provider}:{self.model.model_name}\",\n                tokens_input=tokens_in,\n                tokens_output=tokens_out,\n                duration_ms=duration_ms,\n                status=\"completed\",\n            )\n\n            return result\n\n        except Exception as e:\n            db.log(\"ERROR\", f\"LLM-Aufruf fehlgeschlagen: {e}\")\n            return \"{}\"\n\n    def _parse_json(self, text: str) -> dict:\n        \"\"\"Extrahiere JSON aus LLM-Antwort.\"\"\"\n        try:\n            # Versuche direkt zu parsen\n            return json.loads(text)\n        except json.JSONDecodeError:\n            # Suche nach JSON-Block\n            match = re.search(r\"\\{[\\s\\S]*\\}\", text)\n            if match:\n                try:\n                    return json.loads(match.group())\n                except json.JSONDecodeError:\n                    pass\n        return {}\n\n    # =========================================================================\n    # ENTITÄTEN\n    # =========================================================================\n\n    def extract_entities(self, text: str, level: KnowledgeLevel, source_id: int) -> list[dict]:\n        \"\"\"\n        Extrahiere Entitäten aus Text.\n\n        Args:\n            text: Eingabetext\n            level: Ebene (PAGE, SECTION, DOCUMENT)\n            source_id: ID der Quelle (page_id, section_id, document_id)\n\n        Returns:\n            Liste von Entitäten mit DB-IDs\n        \"\"\"\n        prompt = f\"\"\"Analysiere den folgenden deutschen Text und extrahiere alle wichtigen Entitäten.\n\nKategorien:\n- PERSON: Namen von Personen, Autoren, Therapeuten\n- ORGANIZATION: Firmen, Institute, Verbände\n- CONCEPT: Fachbegriffe, Theorien, Modelle\n- METHOD: Methoden, Techniken, Verfahren\n- TOOL: Werkzeuge, Instrumente, Materialien\n- LOCATION: Orte, Länder, Regionen\n- EVENT: Ereignisse, Konferenzen\n\nAntworte NUR als JSON:\n{{\"entities\": [\n    {{\"name\": \"...\", \"type\": \"CONCEPT\", \"context\": \"kurzer Kontext\", \"importance\": 0.0-1.0}}\n]}}\n\nText ({level.value}-Ebene):\n{text[:4000]}\"\"\"\n\n        result = self._call_llm(prompt)\n        data = self._parse_json(result)\n        entities = data.get(\"entities\", [])\n\n        # Speichere und gleiche mit DB ab\n        stored_entities = []\n        for entity in entities:\n            stored = self._store_entity(entity, level, source_id)\n            if stored:\n                stored_entities.append(stored)\n\n        # Speichere in page_knowledge\/section_knowledge\/document_knowledge\n        self._store_knowledge(\n            level,\n            source_id,\n            KnowledgeType.ENTITY,\n            {\"entities\": [e[\"name\"] for e in stored_entities], \"count\": len(stored_entities)},\n        )\n\n        return stored_entities\n\n    def _store_entity(self, entity: dict, level: KnowledgeLevel, source_id: int) -> dict | None:\n        \"\"\"Speichere Entität in DB mit Duplikatprüfung.\"\"\"\n        try:\n            name = entity.get(\"name\", \"\").strip()\n            entity_type = entity.get(\"type\", \"OTHER\").upper()\n            context = entity.get(\"context\", \"\")\n\n            if not name:\n                return None\n\n            # Prüfe ob Entität existiert (case-insensitive)\n            cursor = db.execute(\n                \"\"\"SELECT id, name, type, description\n                   FROM entities\n                   WHERE LOWER(name) = LOWER(%s) OR LOWER(canonical_name) = LOWER(%s)\n                   LIMIT 1\"\"\",\n                (name, name),\n            )\n            existing = cursor.fetchone()\n            cursor.close()\n\n            if existing:\n                entity_id = existing[\"id\"]\n                # Entität existiert - verwende bestehende\n                db.log(\"DEBUG\", f\"Entität '{name}' existiert bereits (ID: {entity_id})\")\n            else:\n                # Neue Entität anlegen\n                cursor = db.execute(\n                    \"\"\"INSERT INTO entities (name, type, canonical_name, created_at)\n                       VALUES (%s, %s, %s, NOW())\"\"\",\n                    (name, entity_type, name.lower()),\n                )\n                db.commit()\n                entity_id = cursor.lastrowid\n                cursor.close()\n                db.log(\"INFO\", f\"Neue Entität angelegt: '{name}' (ID: {entity_id})\")\n\n            return {\"id\": entity_id, \"name\": name, \"type\": entity_type, \"context\": context, \"is_new\": existing is None}\n\n        except Exception as e:\n            db.log(\"ERROR\", f\"Fehler beim Speichern der Entität: {e}\")\n            return None\n\n    # =========================================================================\n    # SEMANTIK (Bedeutung\/Definition\/Referenz)\n    # =========================================================================\n\n    def extract_semantics(self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int) -> list[dict]:\n        \"\"\"\n        Extrahiere Semantik (Bedeutung\/Definition) für Entitäten.\n\n        Args:\n            entities: Liste der extrahierten Entitäten\n            text: Ursprungstext für Kontext\n            level: Ebene\n            source_id: Quell-ID\n\n        Returns:\n            Liste von Semantik-Einträgen\n        \"\"\"\n        if not entities:\n            return []\n\n        entity_names = [e[\"name\"] for e in entities[:15]]\n\n        prompt = f\"\"\"Für die folgenden Entitäten aus dem Text, extrahiere die Bedeutung\/Definition.\n\nEntitäten: {\", \".join(entity_names)}\n\nFür jede Entität gib an:\n- definition: Kurze Definition basierend auf dem Text\n- context: In welchem Kontext wird sie verwendet\n- references: Bezüge zu anderen Konzepten (falls erkennbar)\n\nAntworte NUR als JSON:\n{{\"semantics\": [\n    {{\"entity\": \"...\", \"definiti... [TRUNCATED-cb269c0254cd838e]"
    }
}
← Vorheriger Zur Liste Nächster →