{
"tool_response": {
"type": "update",
"filePath": "\/var\/www\/scripts\/pipeline\/knowledge.py",
"content": "\"\"\"\nModulares Wissensextraktions-System für 3-Ebenen-Analyse.\n\nHINWEIS: Dies ist ein Backward-Kompatibilitäts-Wrapper.\nDie eigentliche Implementierung ist in knowledge\/ Package.\n\nEbenen:\n1. Seite (page): Detailanalyse pro Einzelseite\n2. Abschnitt (section): Aggregation pro Kapitel\/Unterkapitel\n3. Dokument (document): Konsolidierte Gesamtsicht\n\nWissenstypen:\n- Entitäten: Personen, Konzepte, Methoden, Organisationen\n- Semantik: Bedeutung, Definition, Referenzen\n- Ontologie: Wechselwirkungen zwischen Entitäten\n- Taxonomie: Hierarchische Einordnung\n\nAutor: KI-System Pipeline\n\"\"\"\n\n# Re-export alles aus dem knowledge Package\nfrom knowledge import (\n # Models\n KnowledgeLevel,\n KnowledgeType,\n ModelConfig,\n DEFAULT_MODELS,\n # Main Classes\n KnowledgeExtractor,\n LLMService,\n KnowledgeStorage,\n # Extractors\n EntityExtractor,\n SemanticExtractor,\n OntologyExtractor,\n TaxonomyExtractor,\n # Functions\n store_knowledge,\n get_model_config,\n process_document_knowledge,\n)\n\nfrom db import db\n\n__all__ = [\n \"KnowledgeLevel\",\n \"KnowledgeType\",\n \"ModelConfig\",\n \"DEFAULT_MODELS\",\n \"KnowledgeExtractor\",\n \"LLMService\",\n \"KnowledgeStorage\",\n \"EntityExtractor\",\n \"SemanticExtractor\",\n \"OntologyExtractor\",\n \"TaxonomyExtractor\",\n \"store_knowledge\",\n \"get_model_config\",\n \"process_document_knowledge\",\n]\n\n\n# CLI Interface\nif __name__ == \"__main__\":\n import sys\n\n if len(sys.argv) < 2:\n print(\"Verwendung: python knowledge.py <document_id> [provider] [model]\")\n print(\" provider: ollama (default) oder anthropic\")\n print(\" model: gemma3:27b-it-qat (ollama) oder claude-3-haiku-20240307 (anthropic)\")\n sys.exit(1)\n\n doc_id = int(sys.argv[1])\n provider = sys.argv[2] if len(sys.argv) > 2 else \"ollama\"\n model = sys.argv[3] if len(sys.argv) > 3 else None\n\n if not db.connect():\n print(\"Datenbankverbindung fehlgeschlagen\")\n sys.exit(1)\n\n print(f\"Starte Wissensextraktion für Dokument {doc_id}\")\n print(f\"Provider: {provider}, Model: {model or 'default'}\")\n\n results = process_document_knowledge(doc_id, provider, model)\n\n print(\"\\nErgebnisse:\")\n print(f\" Seiten analysiert: {len(results['pages'])}\")\n print(f\" Abschnitte analysiert: {len(results['sections'])}\")\n if results[\"document\"]:\n print(f\" Dokument-Entitäten: {results['document']['entities']}\")\n print(f\" Dokument-Relationen: {results['document']['ontology']}\")\n\n db.disconnect()\n",
"structuredPatch": [
{
"oldStart": 1,
"oldLines": 6,
"newStart": 1,
"newLines": 9,
"lines": [
" \"\"\"",
" Modulares Wissensextraktions-System für 3-Ebenen-Analyse.",
" ",
"+HINWEIS: Dies ist ein Backward-Kompatibilitäts-Wrapper.",
"+Die eigentliche Implementierung ist in knowledge\/ Package.",
"+",
" Ebenen:",
" 1. Seite (page): Detailanalyse pro Einzelseite",
" 2. Abschnitt (section): Aggregation pro Kapitel\/Unterkapitel"
]
},
{
"oldStart": 15,
"oldLines": 863,
"newStart": 18,
"newLines": 49,
"lines": [
" Autor: KI-System Pipeline",
" \"\"\"",
" ",
"-import json",
"-import re",
"-import time",
"-from dataclasses import dataclass",
"-from enum import Enum",
"+# Re-export alles aus dem knowledge Package",
"+from knowledge import (",
"+ # Models",
"+ KnowledgeLevel,",
"+ KnowledgeType,",
"+ ModelConfig,",
"+ DEFAULT_MODELS,",
"+ # Main Classes",
"+ KnowledgeExtractor,",
"+ LLMService,",
"+ KnowledgeStorage,",
"+ # Extractors",
"+ EntityExtractor,",
"+ SemanticExtractor,",
"+ OntologyExtractor,",
"+ TaxonomyExtractor,",
"+ # Functions",
"+ store_knowledge,",
"+ get_model_config,",
"+ process_document_knowledge,",
"+)",
" ",
"-import requests",
"-",
"-from config import ANTHROPIC_API_KEY, OLLAMA_HOST",
" from db import db",
" ",
"+__all__ = [",
"+ \"KnowledgeLevel\",",
"+ \"KnowledgeType\",",
"+ \"ModelConfig\",",
"+ \"DEFAULT_MODELS\",",
"+ \"KnowledgeExtractor\",",
"+ \"LLMService\",",
"+ \"KnowledgeStorage\",",
"+ \"EntityExtractor\",",
"+ \"SemanticExtractor\",",
"+ \"OntologyExtractor\",",
"+ \"TaxonomyExtractor\",",
"+ \"store_knowledge\",",
"+ \"get_model_config\",",
"+ \"process_document_knowledge\",",
"+]",
" ",
"-class KnowledgeLevel(Enum):",
"- \"\"\"Ebene der Wissensextraktion.\"\"\"",
" ",
"- PAGE = \"page\"",
"- SECTION = \"section\"",
"- DOCUMENT = \"document\"",
"-",
"-",
"-class KnowledgeType(Enum):",
"- \"\"\"Typ des extrahierten Wissens.\"\"\"",
"-",
"- ENTITY = \"entity\"",
"- SEMANTIC = \"semantic\"",
"- ONTOLOGY = \"ontology\"",
"- TAXONOMY = \"taxonomy\"",
"-",
"-",
"-@dataclass",
"-class ModelConfig:",
"- \"\"\"Konfiguration für LLM-Modell.\"\"\"",
"-",
"- provider: str # 'ollama' oder 'anthropic'",
"- model_name: str",
"- temperature: float = 0.3",
"- max_tokens: int = 2000",
"-",
"-",
"-# Standard-Modellkonfigurationen",
"-DEFAULT_MODELS = {",
"- \"ollama\": ModelConfig(\"ollama\", \"gemma3:27b-it-qat\"),",
"- \"anthropic\": ModelConfig(\"anthropic\", \"claude-3-haiku-20240307\"),",
"- \"anthropic_opus\": ModelConfig(\"anthropic\", \"claude-opus-4-5-20251101\"),",
"-}",
"-",
"-",
"-class KnowledgeExtractor:",
"- \"\"\"",
"- Modulare Wissensextraktion mit Datenbankabgleich.",
"-",
"- Verwendung:",
"- extractor = KnowledgeExtractor(model_config)",
"-",
"- # Pro Seite",
"- entities = extractor.extract_entities(text, KnowledgeLevel.PAGE, page_id)",
"- semantics = extractor.extract_semantics(entities, text, KnowledgeLevel.PAGE, page_id)",
"- ontology = extractor.extract_ontology(entities, text, KnowledgeLevel.PAGE, page_id)",
"- taxonomy = extractor.extract_taxonomy(entities, text, KnowledgeLevel.PAGE, page_id)",
"- \"\"\"",
"-",
"- def __init__(self, model_config: ModelConfig | None = None):",
"- \"\"\"Initialisiere Extractor mit Modellkonfiguration.\"\"\"",
"- self.model = model_config or DEFAULT_MODELS[\"ollama\"]",
"- self.anthropic_client = None",
"-",
"- if self.model.provider == \"anthropic\":",
"- self._init_anthropic()",
"-",
"- def _init_anthropic(self):",
"- \"\"\"Initialisiere Anthropic Client.\"\"\"",
"- try:",
"- import anthropic",
"-",
"- if ANTHROPIC_API_KEY:",
"- self.anthropic_client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)",
"- except ImportError:",
"- db.log(\"WARNING\", \"Anthropic SDK nicht installiert, fallback zu Ollama\")",
"- self.model = DEFAULT_MODELS[\"ollama\"]",
"-",
"- def _call_llm(self, prompt: str, json_output: bool = True) -> str:",
"- \"\"\"Rufe LLM auf und gib Antwort zurück.\"\"\"",
"- start_time = time.time()",
"-",
"- try:",
"- if self.model.provider == \"anthropic\" and self.anthropic_client:",
"- response = self.anthropic_client.messages.create(",
"- model=self.model.model_name,",
"- max_tokens=self.model.max_tokens,",
"- temperature=self.model.temperature,",
"- messages=[{\"role\": \"user\", \"content\": prompt}],",
"- )",
"- result = response.content[0].text",
"- tokens_in = response.usage.input_tokens",
"- tokens_out = response.usage.output_tokens",
"- else:",
"- # Ollama",
"- payload = {",
"- \"model\": self.model.model_name,",
"- \"prompt\": prompt,",
"- \"stream\": False,",
"- \"options\": {\"temperature\": self.model.temperature},",
"- }",
"- if json_output:",
"- payload[\"format\"] = \"json\"",
"-",
"- resp = requests.post(f\"{OLLAMA_HOST}\/api\/generate\", json=payload, timeout=600)",
"- resp.raise_for_status()",
"- data = resp.json()",
"- result = data.get(\"response\", \"\")",
"- tokens_in = data.get(\"prompt_eval_count\", 0)",
"- tokens_out = data.get(\"eval_count\", 0)",
"-",
"- duration_ms = int((time.time() - start_time) * 1000)",
"-",
"- # Protokolliere LLM-Aufruf",
"- db.log_to_protokoll(",
"- client_name=\"pipeline-knowledge\",",
"- request=prompt[:500],",
"- response=result[:500],",
"- model_name=f\"{self.model.provider}:{self.model.model_name}\",",
"- tokens_input=tokens_in,",
"- tokens_output=tokens_out,",
"- duration_ms=duration_ms,",
"- status=\"completed\",",
"- )",
"-",
"- return result",
"-",
"- except Exception as e:",
"- db.log(\"ERROR\", f\"LLM-Aufruf fehlgeschlagen: {e}\")",
"- return \"{}\"",
"-",
"- def _parse_json(self, text: str) -> dict:",
"- \"\"\"Extrahiere JSON aus LLM-Antwort.\"\"\"",
"- try:",
"- # Versuche direkt zu parsen",
"- return json.loads(text)",
"- except json.JSONDecodeError:",
"- # Suche nach JSON-Block",
"- match = re.search(r\"\\{[\\s\\S]*\\}\", text)",
"- if match:",
"- try:",
"- return json.loads(match.group())",
"- except json.JSONDecodeError:",
"- pass",
"- return {}",
"-",
"- # =========================================================================",
"- # ENTITÄTEN",
"- # =========================================================================",
"-",
"- def extract_entities(self, text: str, level: KnowledgeLevel, source_id: int) -> list[dict]:",
"- \"\"\"",
"- Extrahiere Entitäten aus Text.",
"-",
"- Args:",
"- text: Eingabetext",
"- level: Ebene (PAGE, SECTION, DOCUMENT)",
"- source_id: ID der Quelle (page_id, section_id, document_id)",
"-",
"- Returns:",
"- Liste von Entitäten mit DB-IDs",
"- \"\"\"",
"- prompt = f\"\"\"Analysiere den folgenden deutschen Text und extrahiere alle wichtigen Entitäten.",
"-",
"-Kategorien:",
"-- PERSON: Namen von Personen, Autoren, Therapeuten",
"-- ORGANIZATION: Firmen, Institute, Verbände",
"-- CONCEPT: Fachbegriffe, Theorien, Modelle",
"-- METHOD: Methoden, Techniken, Verfahren",
"-- TOOL: Werkzeuge, Instrumente, Materialien",
"-- LOCATION: Orte, Länder, Regionen",
"-- EVENT: Ereignisse, Konferenzen",
"-",
"-Antworte NUR als JSON:",
"-{{\"entities\": [",
"- {{\"name\": \"...\", \"type\": \"CONCEPT\", \"context\": \"kurzer Kontext\", \"importance\": 0.0-1.0}}",
"-]}}",
"-",
"-Text ({level.value}-Ebene):",
"-{text[:4000]}\"\"\"",
"-",
"- result = self._call_llm(prompt)",
"- data = self._parse_json(result)",
"- entities = data.get(\"entities\", [])",
"-",
"- # Speichere und gleiche mit DB ab",
"- stored_entities = []",
"- for entity in entities:",
"- stored = self._store_entity(entity, level, source_id)",
"- if stored:",
"- stored_entities.append(stored)",
"-",
"- # Speichere in page_knowledge\/section_knowledge\/document_knowledge",
"- self._store_knowledge(",
"- level,",
"- source_id,",
"- KnowledgeType.ENTITY,",
"- {\"entities\": [e[\"name\"] for e in stored_entities], \"count\": len(stored_entities)},",
"- )",
"-",
"- return stored_entities",
"-",
"- def _store_entity(self, entity: dict, level: KnowledgeLevel, source_id: int) -> dict | None:",
"- \"\"\"Speichere Entität in DB mit Duplikatprüfung.\"\"\"",
"- try:",
"- name = entity.get(\"name\", \"\").strip()",
"- entity_type = entity.get(\"type\", \"OTHER\").upper()",
"- context = entity.get(\"context\", \"\")",
"-",
"- if not name:",
"- return None",
"-",
"- # Prüfe ob Entität existiert (case-insensitive)",
"- cursor = db.execute(",
"- \"\"\"SELECT id, name, type, description",
"- FROM entities",
"- WHERE LOWER(name) = LOWER(%s) OR LOWER(canonical_name) = LOWER(%s)",
"- LIMIT 1\"\"\",",
"- (name, name),",
"- )",
"- existing = cursor.fetchone()",
"- cursor.close()",
"-",
"- if existing:",
"- entity_id = existing[\"id\"]",
"- # Entität existiert - verwende bestehende",
"- db.log(\"DEBUG\", f\"Entität '{name}' existiert bereits (ID: {entity_id})\")",
"- else:",
"- # Neue Entität anlegen",
"- cursor = db.execute(",
"- \"\"\"INSERT INTO entities (name, type, canonical_name, created_at)",
"- VALUES (%s, %s, %s, NOW())\"\"\",",
"- (name, entity_type, name.lower()),",
"- )",
"- db.commit()",
"- entity_id = cursor.lastrowid",
"- cursor.close()",
"- db.log(\"INFO\", f\"Neue Entität angelegt: '{name}' (ID: {entity_id})\")",
"-",
"- return {\"id\": entity_id, \"name\": name, \"type\": entity_type, \"context\": context, \"is_new\": existing is None}",
"-",
"- except Exception as e:",
"- db.log(\"ERROR\", f\"Fehler beim Speichern der Entität: {e}\")",
"- return None",
"-",
"- # =========================================================================",
"- # SEMANTIK (Bedeutung\/Definition\/Referenz)",
"- # =========================================================================",
"-",
"- def extract_semantics(self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int) -> list[dict]:",
"- \"\"\"",
"- Extrahiere Semantik (Bedeutung\/Definition) für Entitäten.",
"-",
"- Args:",
"- entities: Liste der extrahierten Entitäten",
"- text: Ursprungstext für Kontext",
"- level: Ebene",
"- source_id: Quell-ID",
"-",
"- Returns:",
"- Liste von Semantik-Einträgen",
"- \"\"\"",
"- if not entities:",
"- return []",
"-",
"- entity_names = [e[\"name\"] for e in entities[:15]]",
"-",
"- prompt = f\"\"\"Für die folgenden Entitäten aus dem Text, extrahiere die Bedeutung\/Definition.",
"-",
"-Entitäten: {\", \".join(entity_names)}",
"-",
"-Für jede Entität gib an:",
"-- definition: Kurze Definition basierend auf dem Text",
"-- context: In welchem Kontext wird sie verwendet",
"-- references: Bezüge zu anderen Konzepten (falls erkennbar)",
"-",
"-Antworte NUR als JSON:",
"-{{\"semantics\": [",
"- {{\"entity\": \"...\", \"definition\": \"...\", \"context\": \"...\", \"references\": [\"...\"]}}",
"-]}}",
"-",
"-Text:",
"-{text[:3000]}\"\"\"",
"-",
"- result = self._call_llm(prompt)",
"- data = self._parse_json(result)",
"- semantics = data.get(\"semantics\", [])",
"-",
"- # Speichere Semantik",
"- stored = []",
"- for sem in semantics:",
"- entity_name = sem.get(\"entity\", \"\")",
"- # Finde Entity-ID",
"- entity_match = next((e for e in entities if e[\"name\"].lower() == entity_name.lower()), None)",
"- if entity_match:",
"- stored_sem = self._store_semantic(",
"- entity_id=entity_match[\"id\"],",
"- definition=sem.get(\"definition\", \"\"),",
"- context=sem.get(\"context\", \"\"),",
"- references=sem.get(\"references\", []),",
"- level=level,",
"- source_id=source_id,",
"- )",
"- if stored_sem:",
"- stored.append(stored_sem)",
"-",
"- # Speichere in Knowledge-Tabelle",
"- self._store_knowledge(",
"- level,",
"- source_id,",
"- KnowledgeType.SEMANTIC,",
"- {\"definitions\": len(stored), \"entities\": [s[\"entity_name\"] for s in stored]},",
"- )",
"-",
"- return stored",
"-",
"- def _store_semantic(",
"- self, entity_id: int, definition: str, context: str, references: list, level: KnowledgeLevel, source_id: int",
"- ) -> dict | None:",
"- \"\"\"Speichere Semantik-Eintrag mit Abgleich.\"\"\"",
"- try:",
"- # Prüfe ob bereits Semantik existiert",
"- cursor = db.execute(",
"- \"\"\"SELECT id, definition FROM entity_semantics",
"- WHERE entity_id = %s AND source_type = %s AND source_id = %s\"\"\",",
"- (entity_id, level.value, source_id),",
"- )",
"- existing = cursor.fetchone()",
"- cursor.close()",
"-",
"- if existing:",
"- # Prüfe ob Definition abweicht",
"- if existing[\"definition\"] != definition:",
"- # Abweichende Definition - als zusätzliche Perspektive speichern",
"- db.log(\"INFO\", f\"Abweichende Definition für Entität {entity_id}, füge hinzu\")",
"- cursor = db.execute(",
"- \"\"\"INSERT INTO entity_semantics",
"- (entity_id, definition, context, references_json,",
"- source_type, source_id, model_used, created_at)",
"- VALUES (%s, %s, %s, %s, %s, %s, %s, NOW())\"\"\",",
"- (",
"- entity_id,",
"- definition,",
"- context,",
"- json.dumps(references),",
"- level.value,",
"- source_id,",
"- f\"{self.model.provider}:{self.model.model_name}\",",
"- ),",
"- )",
"- db.commit()",
"- sem_id = cursor.lastrowid",
"- cursor.close()",
"- else:",
"- sem_id = existing[\"id\"]",
"- else:",
"- # Neue Semantik",
"- cursor = db.execute(",
"- \"\"\"INSERT INTO entity_semantics",
"- (entity_id, definition, context, references_json,",
"- source_type, source_id, model_used, created_at)",
"- VALUES (%s, %s, %s, %s, %s, %s, %s, NOW())\"\"\",",
"- (",
"- entity_id,",
"- definition,",
"- context,",
"- json.dumps(references),",
"- level.value,",
"- source_id,",
"- f\"{self.model.provider}:{self.model.model_name}\",",
"- ),",
"- )",
"- db.commit()",
"- sem_id = cursor.lastrowid",
"- cursor.close()",
"-",
"- # Hole Entity-Name für Rückgabe",
"- cursor = db.execute(\"SELECT name FROM entities WHERE id = %s\", (entity_id,))",
"- entity = cursor.fetchone()",
"- cursor.close()",
"-",
"- return {",
"- \"id\": sem_id,",
"- \"entity_id\": entity_id,",
"- \"entity_name\": entity[\"name\"] if entity else \"\",",
"- \"definition\": definition,",
"- }",
"-",
"- except Exception as e:",
"- db.log(\"ERROR\", f\"Fehler beim Speichern der Semantik: {e}\")",
"- return None",
"-",
"- # =========================================================================",
"- # ONTOLOGIE (Wechselwirkungen)",
"- # =========================================================================",
"-",
"- def extract_ontology(self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int) -> list[dict]:",
"- \"\"\"",
"- Extrahiere Ontologie (Wechselwirkungen) zwischen Entitäten.",
"-",
"- Args:",
"- entities: Liste der Entitäten",
"- text: Ursprungstext",
"- level: Ebene",
"- source_id: Quell-ID",
"-",
"- Returns:",
"- Liste von Ontologie-Beziehungen",
"- \"\"\"",
"- if len(entities) < 2:",
"- return []",
"-",
"- entity_names = [e[\"name\"] for e in entities[:20]]",
"-",
"- prompt = f\"\"\"Analysiere die Wechselwirkungen zwischen den folgenden Entitäten im Text.",
"-",
"-Entitäten: {\", \".join(entity_names)}",
"-",
"-Beziehungstypen:",
"-- CAUSES: A verursacht\/bewirkt B",
"-- REQUIRES: A benötigt\/erfordert B",
"-- INFLUENCES: A beeinflusst B",
"-- ENABLES: A ermöglicht B",
"-- CONTRADICTS: A widerspricht B",
"-- PART_OF: A ist Teil von B",
"-- INSTANCE_OF: A ist Instanz von B",
"-- USES: A verwendet B",
"-",
"-Antworte NUR als JSON:",
"-{{\"relations\": [",
"- {{\"source\": \"...\", \"target\": \"...\", \"type\": \"CAUSES\", \"description\": \"...\", \"strength\": 0.0-1.0, \"bidirectional\": false}}",
"-]}}",
"-",
"-Text:",
"-{text[:3000]}\"\"\"",
"-",
"- result = self._call_llm(prompt)",
"- data = self._parse_json(result)",
"- relations = data.get(\"relations\", [])",
"-",
"- # Speichere Ontologie-Beziehungen",
"- stored = []",
"- for rel in relations:",
"- source_entity = next((e for e in entities if e[\"name\"].lower() == rel.get(\"source\", \"\").lower()), None)",
"- target_entity = next((e for e in entities if e[\"name\"].lower() == rel.get(\"target\", \"\").lower()), None)",
"-",
"- if source_entity and target_entity:",
"- stored_rel = self._store_ontology(",
"- source_id=source_entity[\"id\"],",
"- target_id=target_entity[\"id\"],",
"- relation_type=rel.get(\"type\", \"RELATED_TO\"),",
"- description=rel.get(\"description\", \"\"),",
"- strength=rel.get(\"strength\", 1.0),",
"- bidirectional=rel.get(\"bidirectional\", False),",
"- level=level,",
"- knowledge_source_id=source_id,",
"- )",
"- if stored_rel:",
"- stored.append(stored_rel)",
"-",
"- # Speichere in Knowledge-Tabelle",
"- self._store_knowledge(",
"- level,",
"- source_id,",
"- KnowledgeType.ONTOLOGY,",
"- {\"relations\": len(stored), \"types\": list({r[\"type\"] for r in stored})},",
"- )",
"-",
"- return stored",
"-",
"- def _store_ontology(",
"- self,",
"- source_id: int,",
"- target_id: int,",
"- relation_type: str,",
"- description: str,",
"- strength: float,",
"- bidirectional: bool,",
"- level: KnowledgeLevel,",
"- knowledge_source_id: int,",
"- ) -> dict | None:",
"- \"\"\"Speichere Ontologie-Beziehung.\"\"\"",
"- try:",
"- # Prüfe ob Beziehung existiert",
"- cursor = db.execute(",
"- \"\"\"SELECT id FROM entity_ontology",
"- WHERE source_entity_id = %s AND target_entity_id = %s AND relation_type = %s\"\"\",",
"- (source_id, target_id, relation_type),",
"- )",
"- existing = cursor.fetchone()",
"- cursor.close()",
"-",
"- if existing:",
"- rel_id = existing[\"id\"]",
"- else:",
"- cursor = db.execute(",
"- \"\"\"INSERT INTO entity_ontology",
"- (source_entity_id, target_entity_id, relation_type, direction,",
"- strength, description, source_type, source_id, model_used, created_at)",
"- VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())\"\"\",",
"- (",
"- source_id,",
"- target_id,",
"- relation_type,",
"- \"bidirectional\" if bidirectional else \"unidirectional\",",
"- strength,",
"- description,",
"- level.value,",
"- knowledge_source_id,",
"- f\"{self.model.provider}:{self.model.model_name}\",",
"- ),",
"- )",
"- db.commit()",
"- rel_id = cursor.lastrowid",
"- cursor.close()",
"-",
"- return {",
"- \"id\": rel_id,",
"- \"source_id\": source_id,",
"- \"target_id\": target_id,",
"- \"type\": relation_type,",
"- \"strength\": strength,",
"- }",
"-",
"- except Exception as e:",
"- db.log(\"ERROR\", f\"Fehler beim Speichern der Ontologie: {e}\")",
"- return None",
"-",
"- # =========================================================================",
"- # TAXONOMIE (Hierarchische Einordnung)",
"- # =========================================================================",
"-",
"- def extract_taxonomy(self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int) -> list[dict]:",
"- \"\"\"",
"- Extrahiere Taxonomie (hierarchische Einordnung) für Entitäten.",
"-",
"- Args:",
"- entities: Liste der Entitäten",
"- text: Ursprungstext",
"- level: Ebene",
"- source_id: Quell-ID",
"-",
"- Returns:",
"- Liste von Taxonomie-Zuordnungen",
"- \"\"\"",
"- if not entities:",
"- return []",
"-",
"- # Hole existierende Taxonomie-Terme",
"- cursor = db.execute(\"SELECT id, name, path, depth FROM taxonomy_terms ORDER BY depth, name\")",
"- existing_terms = cursor.fetchall()",
"- cursor.close()",
"-",
"- term_names = [t[\"name\"] for t in existing_terms]",
"- entity_names = [e[\"name\"] for e in entities[:15]]",
"-",
"- prompt = f\"\"\"Ordne die folgenden Entitäten in eine hierarchische Taxonomie ein.",
"-",
"-Entitäten: {\", \".join(entity_names)}",
"-",
"-Existierende Taxonomie-Kategorien: {\", \".join(term_names) if term_names else \"Keine vorhanden\"}",
"-",
"-Aufgabe:",
"-1. Ordne jede Entität einer passenden Kategorie zu",
"-2. Wenn keine passende Kategorie existiert, schlage eine neue vor",
"-3. Gib die hierarchische Einordnung an",
"-",
"-Antworte NUR als JSON:",
"-{{\"mappings\": [",
"- {{\"entity\": \"...\", \"category\": \"...\", \"parent_category\": null, \"confidence\": 0.0-1.0, \"is_new_category\": false}}",
"-]}}",
"-",
"-Text-Kontext:",
"-{text[:2000]}\"\"\"",
"-",
"- result = self._call_llm(prompt)",
"- data = self._parse_json(result)",
"- mappings = data.get(\"mappings\", [])",
"-",
"- # Speichere Taxonomie-Zuordnungen",
"- stored = []",
"- for mapping in mappings:",
"- entity_match = next((e for e in entities if e[\"name\"].lower() == mapping.get(\"entity\", \"\").lower()), None)",
"- if entity_match:",
"- stored_mapping = self._store_taxonomy_mapping(",
"- entity_id=entity_match[\"id\"],",
"- category_name=mapping.get(\"category\", \"\"),",
"- parent_category=mapping.get(\"parent_category\"),",
"- confidence=mapping.get(\"confidence\", 0.8),",
"- is_new=mapping.get(\"is_new_category\", False),",
"- existing_terms=existing_terms,",
"- level=level,",
"- source_id=source_id,",
"- )",
"- if stored_mapping:",
"- stored.append(stored_mapping)",
"-",
"- # Speichere in Knowledge-Tabelle",
"- self._store_knowledge(",
"- level,",
"- source_id,",
"- KnowledgeType.TAXONOMY,",
"- {\"mappings\": len(stored), \"categories\": list({m[\"category\"] for m in stored})},",
"- )",
"-",
"- return stored",
"-",
"- def _store_taxonomy_mapping(",
"- self,",
"- entity_id: int,",
"- category_name: str,",
"- parent_category: str | None,",
"- confidence: float,",
"- is_new: bool,",
"- existing_terms: list,",
"- level: KnowledgeLevel,",
"- source_id: int,",
"- ) -> dict | None:",
"- \"\"\"Speichere Taxonomie-Zuordnung.\"\"\"",
"- try:",
"- # Finde oder erstelle Taxonomie-Term",
"- term = next((t for t in existing_terms if t[\"name\"].lower() == category_name.lower()), None)",
"-",
"- if term:",
"- term_id = term[\"id\"]",
"- elif is_new:",
"- # Neuen Term anlegen",
"- parent_id = None",
"- depth = 0",
"- path = f\"\/{category_name}\"",
"-",
"- if parent_category:",
"- parent_term = next(",
"- (t for t in existing_terms if t[\"name\"].lower() == parent_category.lower()), None",
"- )",
"- if parent_term:",
"- parent_id = parent_term[\"id\"]",
"- depth = parent_term[\"depth\"] + 1",
"- path = f\"{parent_term['path']}\/{category_name}\"",
"-",
"- # Erstelle Slug",
"- slug = re.sub(r\"[^a-z0-9]+\", \"-\", category_name.lower()).strip(\"-\")",
"-",
"- cursor = db.execute(",
"- \"\"\"INSERT INTO taxonomy_terms (name, slug, parent_id, depth, path, created_at)",
"- VALUES (%s, %s, %s, %s, %s, NOW())\"\"\",",
"- (category_name, slug, parent_id, depth, path),",
"- )",
"- db.commit()",
"- term_id = cursor.lastrowid",
"- cursor.close()",
"- db.log(\"INFO\", f\"Neuer Taxonomie-Term: '{category_name}' (ID: {term_id})\")",
"- else:",
"- # Kategorie existiert nicht und soll nicht neu erstellt werden",
"- return None",
"-",
"- # Speichere Zuordnung",
"- cursor = db.execute(",
"- \"\"\"INSERT INTO entity_taxonomy_mapping",
"- (entity_id, taxonomy_term_id, confidence, source_type, source_id, model_used, created_at)",
"- VALUES (%s, %s, %s, %s, %s, %s, NOW())",
"- ON DUPLICATE KEY UPDATE confidence = VALUES(confidence)\"\"\",",
"- (",
"- entity_id,",
"- term_id,",
"- confidence,",
"- level.value,",
"- source_id,",
"- f\"{self.model.provider}:{self.model.model_name}\",",
"- ),",
"- )",
"- db.commit()",
"- cursor.close()",
"-",
"- return {\"entity_id\": entity_id, \"term_id\": term_id, \"category\": category_name, \"confidence\": confidence}",
"-",
"- except Exception as e:",
"- db.log(\"ERROR\", f\"Fehler beim Speichern der Taxonomie: {e}\")",
"- return None",
"-",
"- # =========================================================================",
"- # KNOWLEDGE STORAGE",
"- # =========================================================================",
"-",
"- def _store_knowledge(self, level: KnowledgeLevel, source_id: int, knowledge_type: KnowledgeType, data: dict):",
"- \"\"\"Speichere Wissen in der entsprechenden Tabelle.\"\"\"",
"- table_map = {",
"- KnowledgeLevel.PAGE: \"page_knowledge\",",
"- KnowledgeLevel.SECTION: \"section_knowledge\",",
"- KnowledgeLevel.DOCUMENT: \"document_knowledge\",",
"- }",
"-",
"- id_field_map = {",
"- KnowledgeLevel.PAGE: \"page_id\",",
"- KnowledgeLevel.SECTION: \"section_id\",",
"- KnowledgeLevel.DOCUMENT: \"document_id\",",
"- }",
"-",
"- table = table_map[level]",
"- id_field = id_field_map[level]",
"-",
"- try:",
"- cursor = db.execute(",
"- f\"\"\"INSERT INTO {table} ({id_field}, knowledge_type, data, model_used, created_at)",
"- VALUES (%s, %s, %s, %s, NOW())\"\"\", # noqa: S608 - table name is controlled",
"- (source_id, knowledge_type.value, json.dumps(data), f\"{self.model.provider}:{self.model.model_name}\"),",
"- )",
"- db.commit()",
"- cursor.close()",
"- except Exception as e:",
"- db.log(\"ERROR\", f\"Fehler beim Speichern in {table}: {e}\")",
"-",
"- # =========================================================================",
"- # KOMPLETTE ANALYSE",
"- # =========================================================================",
"-",
"- def analyze_page(self, page_id: int, text: str) -> dict:",
"- \"\"\"",
"- Vollständige Wissensanalyse für eine Seite.",
"-",
"- Reihenfolge: Entitäten → Semantik → Ontologie → Taxonomie",
"- \"\"\"",
"- db.log(\"INFO\", f\"Starte Seitenanalyse für page_id={page_id}\")",
"-",
"- # 1. Entitäten",
"- entities = self.extract_entities(text, KnowledgeLevel.PAGE, page_id)",
"-",
"- # 2. Semantik",
"- semantics = self.extract_semantics(entities, text, KnowledgeLevel.PAGE, page_id)",
"-",
"- # 3. Ontologie",
"- ontology = self.extract_ontology(entities, text, KnowledgeLevel.PAGE, page_id)",
"-",
"- # 4. Taxonomie",
"- taxonomy = self.extract_taxonomy(entities, text, KnowledgeLevel.PAGE, page_id)",
"-",
"- return {",
"- \"page_id\": page_id,",
"- \"entities\": len(entities),",
"- \"semantics\": len(semantics),",
"- \"ontology\": len(ontology),",
"- \"taxonomy\": len(taxonomy),",
"- }",
"-",
"- def analyze_section(self, section_id: int, text: str) -> dict:",
"- \"\"\"Vollständige Wissensanalyse für einen Abschnitt.\"\"\"",
"- db.log(\"INFO\", f\"Starte Abschnittsanalyse für section_id={section_id}\")",
"-",
"- entities = self.extract_entities(text, KnowledgeLevel.SECTION, section_id)",
"- semantics = self.extract_semantics(entities, text, KnowledgeLevel.SECTION, section_id)",
"- ontology = self.extract_ontology(entities, text, KnowledgeLevel.SECTION, section_id)",
"- taxonomy = self.extract_taxonomy(entities, text, KnowledgeLevel.SECTION, section_id)",
"-",
"- return {",
"- \"section_id\": section_id,",
"- \"entities\": len(entities),",
"- \"semantics\": len(semantics),",
"- \"ontology\": len(ontology),",
"- \"taxonomy\": len(taxonomy),",
"- }",
"-",
"- def analyze_document(self, document_id: int, text: str) -> dict:",
"- \"\"\"Vollständige Wissensanalyse für ein Dokument (konsolidiert).\"\"\"",
"- db.log(\"INFO\", f\"Starte Dokumentanalyse für document_id={document_id}\")",
"-",
"- entities = self.extract_entities(text, KnowledgeLevel.DOCUMENT, document_id)",
"- semantics = self.extract_semantics(entities, text, KnowledgeLevel.DOCUMENT, document_id)",
"- ontology = self.extract_ontology(entities, text, KnowledgeLevel.DOCUMENT, document_id)",
"- taxonomy = self.extract_taxonomy(entities, text, KnowledgeLevel.DOCUMENT, document_id)",
"-",
"- return {",
"- \"document_id\": document_id,",
"- \"entities\": len(entities),",
"- \"semantics\": len(semantics),",
"- \"ontology\": len(ontology),",
"- \"taxonomy\": len(taxonomy),",
"- }",
"-",
"-",
"-# =========================================================================",
"-# UTILITY FUNCTIONS",
"-# =========================================================================",
"-",
"-",
"-def get_model_config(provider: str = \"ollama\", model_name: str | None = None) -> ModelConfig:",
"- \"\"\"",
"- Erstelle Modellkonfiguration basierend auf Provider.",
"-",
"- Args:",
"- provider: 'ollama' oder 'anthropic'",
"- model_name: Optional spezifisches Modell",
"-",
"- Returns:",
"- ModelConfig für den Extractor",
"- \"\"\"",
"- if provider == \"anthropic\":",
"- return ModelConfig(provider=\"anthropic\", model_name=model_name or \"claude-3-haiku-20240307\")",
"- else:",
"- return ModelConfig(provider=\"ollama\", model_name=model_name or \"gemma3:27b-it-qat\")",
"-",
"-",
"-def process_document_knowledge(document_id: int, provider: str = \"ollama\", model_name: str | None = None) -> dict:",
"- \"\"\"",
"- Verarbeite komplettes Dokument mit 3-Ebenen-Analyse.",
"-",
"- 1. Seiten-Ebene",
"- 2. Abschnitt-Ebene",
"- 3. Dokument-Ebene (konsolidiert)",
"- \"\"\"",
"- config = get_model_config(provider, model_name)",
"- extractor = KnowledgeExtractor(config)",
"-",
"- results = {\"document_id\": document_id, \"pages\": [], \"sections\": [], \"document\": None}",
"-",
"- # 1. Seiten analysieren",
"- cursor = db.execute(",
"- \"SELECT id, text_content FROM document_pages WHERE document_id = %s ORDER BY page_number\", (document_id,)",
"- )",
"- pages = cursor.fetchall()",
"- cursor.close()",
"-",
"- for page in pages:",
"- if page[\"text_content\"]:",
"- page_result = extractor.analyze_page(page[\"id\"], page[\"text_content\"])",
"- results[\"pages\"].append(page_result)",
"-",
"- # 2. Abschnitte analysieren",
"- cursor = db.execute(",
"- \"SELECT id, content FROM document_sections WHERE document_id = %s ORDER BY sort_order\", (document_id,)",
"- )",
"- sections = cursor.fetchall()",
"- cursor.close()",
"-",
"- for section in sections:",
"- if section[\"content\"]:",
"- section_result = extractor.analyze_section(section[\"id\"], section[\"content\"])",
"- results[\"sections\"].append(section_result)",
"-",
"- # 3. Dokument-Ebene (Gesamttext)",
"- cursor = db.execute(",
"- \"SELECT GROUP_CONCAT(content SEPARATOR ' ') as full_text FROM chunks WHERE document_id = %s\", (document_id,)",
"- )",
"- doc = cursor.fetchone()",
"- cursor.close()",
"-",
"- if doc and doc[\"full_text\"]:",
"- results[\"document\"] = extractor.analyze_document(document_id, doc[\"full_text\"][:10000])",
"-",
"- return results",
"-",
"-",
"-# =========================================================================",
"-# CLI INTERFACE",
"-# =========================================================================",
"-",
"+# CLI Interface",
" if __name__ == \"__main__\":",
" import sys",
" "
]
}
],
"originalFile": "\"\"\"\nModulares Wissensextraktions-System für 3-Ebenen-Analyse.\n\nEbenen:\n1. Seite (page): Detailanalyse pro Einzelseite\n2. Abschnitt (section): Aggregation pro Kapitel\/Unterkapitel\n3. Dokument (document): Konsolidierte Gesamtsicht\n\nWissenstypen:\n- Entitäten: Personen, Konzepte, Methoden, Organisationen\n- Semantik: Bedeutung, Definition, Referenzen\n- Ontologie: Wechselwirkungen zwischen Entitäten\n- Taxonomie: Hierarchische Einordnung\n\nAutor: KI-System Pipeline\n\"\"\"\n\nimport json\nimport re\nimport time\nfrom dataclasses import dataclass\nfrom enum import Enum\n\nimport requests\n\nfrom config import ANTHROPIC_API_KEY, OLLAMA_HOST\nfrom db import db\n\n\nclass KnowledgeLevel(Enum):\n \"\"\"Ebene der Wissensextraktion.\"\"\"\n\n PAGE = \"page\"\n SECTION = \"section\"\n DOCUMENT = \"document\"\n\n\nclass KnowledgeType(Enum):\n \"\"\"Typ des extrahierten Wissens.\"\"\"\n\n ENTITY = \"entity\"\n SEMANTIC = \"semantic\"\n ONTOLOGY = \"ontology\"\n TAXONOMY = \"taxonomy\"\n\n\n@dataclass\nclass ModelConfig:\n \"\"\"Konfiguration für LLM-Modell.\"\"\"\n\n provider: str # 'ollama' oder 'anthropic'\n model_name: str\n temperature: float = 0.3\n max_tokens: int = 2000\n\n\n# Standard-Modellkonfigurationen\nDEFAULT_MODELS = {\n \"ollama\": ModelConfig(\"ollama\", \"gemma3:27b-it-qat\"),\n \"anthropic\": ModelConfig(\"anthropic\", \"claude-3-haiku-20240307\"),\n \"anthropic_opus\": ModelConfig(\"anthropic\", \"claude-opus-4-5-20251101\"),\n}\n\n\nclass KnowledgeExtractor:\n \"\"\"\n Modulare Wissensextraktion mit Datenbankabgleich.\n\n Verwendung:\n extractor = KnowledgeExtractor(model_config)\n\n # Pro Seite\n entities = extractor.extract_entities(text, KnowledgeLevel.PAGE, page_id)\n semantics = extractor.extract_semantics(entities, text, KnowledgeLevel.PAGE, page_id)\n ontology = extractor.extract_ontology(entities, text, KnowledgeLevel.PAGE, page_id)\n taxonomy = extractor.extract_taxonomy(entities, text, KnowledgeLevel.PAGE, page_id)\n \"\"\"\n\n def __init__(self, model_config: ModelConfig | None = None):\n \"\"\"Initialisiere Extractor mit Modellkonfiguration.\"\"\"\n self.model = model_config or DEFAULT_MODELS[\"ollama\"]\n self.anthropic_client = None\n\n if self.model.provider == \"anthropic\":\n self._init_anthropic()\n\n def _init_anthropic(self):\n \"\"\"Initialisiere Anthropic Client.\"\"\"\n try:\n import anthropic\n\n if ANTHROPIC_API_KEY:\n self.anthropic_client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)\n except ImportError:\n db.log(\"WARNING\", \"Anthropic SDK nicht installiert, fallback zu Ollama\")\n self.model = DEFAULT_MODELS[\"ollama\"]\n\n def _call_llm(self, prompt: str, json_output: bool = True) -> str:\n \"\"\"Rufe LLM auf und gib Antwort zurück.\"\"\"\n start_time = time.time()\n\n try:\n if self.model.provider == \"anthropic\" and self.anthropic_client:\n response = self.anthropic_client.messages.create(\n model=self.model.model_name,\n max_tokens=self.model.max_tokens,\n temperature=self.model.temperature,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n )\n result = response.content[0].text\n tokens_in = response.usage.input_tokens\n tokens_out = response.usage.output_tokens\n else:\n # Ollama\n payload = {\n \"model\": self.model.model_name,\n \"prompt\": prompt,\n \"stream\": False,\n \"options\": {\"temperature\": self.model.temperature},\n }\n if json_output:\n payload[\"format\"] = \"json\"\n\n resp = requests.post(f\"{OLLAMA_HOST}\/api\/generate\", json=payload, timeout=600)\n resp.raise_for_status()\n data = resp.json()\n result = data.get(\"response\", \"\")\n tokens_in = data.get(\"prompt_eval_count\", 0)\n tokens_out = data.get(\"eval_count\", 0)\n\n duration_ms = int((time.time() - start_time) * 1000)\n\n # Protokolliere LLM-Aufruf\n db.log_to_protokoll(\n client_name=\"pipeline-knowledge\",\n request=prompt[:500],\n response=result[:500],\n model_name=f\"{self.model.provider}:{self.model.model_name}\",\n tokens_input=tokens_in,\n tokens_output=tokens_out,\n duration_ms=duration_ms,\n status=\"completed\",\n )\n\n return result\n\n except Exception as e:\n db.log(\"ERROR\", f\"LLM-Aufruf fehlgeschlagen: {e}\")\n return \"{}\"\n\n def _parse_json(self, text: str) -> dict:\n \"\"\"Extrahiere JSON aus LLM-Antwort.\"\"\"\n try:\n # Versuche direkt zu parsen\n return json.loads(text)\n except json.JSONDecodeError:\n # Suche nach JSON-Block\n match = re.search(r\"\\{[\\s\\S]*\\}\", text)\n if match:\n try:\n return json.loads(match.group())\n except json.JSONDecodeError:\n pass\n return {}\n\n # =========================================================================\n # ENTITÄTEN\n # =========================================================================\n\n def extract_entities(self, text: str, level: KnowledgeLevel, source_id: int) -> list[dict]:\n \"\"\"\n Extrahiere Entitäten aus Text.\n\n Args:\n text: Eingabetext\n level: Ebene (PAGE, SECTION, DOCUMENT)\n source_id: ID der Quelle (page_id, section_id, document_id)\n\n Returns:\n Liste von Entitäten mit DB-IDs\n \"\"\"\n prompt = f\"\"\"Analysiere den folgenden deutschen Text und extrahiere alle wichtigen Entitäten.\n\nKategorien:\n- PERSON: Namen von Personen, Autoren, Therapeuten\n- ORGANIZATION: Firmen, Institute, Verbände\n- CONCEPT: Fachbegriffe, Theorien, Modelle\n- METHOD: Methoden, Techniken, Verfahren\n- TOOL: Werkzeuge, Instrumente, Materialien\n- LOCATION: Orte, Länder, Regionen\n- EVENT: Ereignisse, Konferenzen\n\nAntworte NUR als JSON:\n{{\"entities\": [\n {{\"name\": \"...\", \"type\": \"CONCEPT\", \"context\": \"kurzer Kontext\", \"importance\": 0.0-1.0}}\n]}}\n\nText ({level.value}-Ebene):\n{text[:4000]}\"\"\"\n\n result = self._call_llm(prompt)\n data = self._parse_json(result)\n entities = data.get(\"entities\", [])\n\n # Speichere und gleiche mit DB ab\n stored_entities = []\n for entity in entities:\n stored = self._store_entity(entity, level, source_id)\n if stored:\n stored_entities.append(stored)\n\n # Speichere in page_knowledge\/section_knowledge\/document_knowledge\n self._store_knowledge(\n level,\n source_id,\n KnowledgeType.ENTITY,\n {\"entities\": [e[\"name\"] for e in stored_entities], \"count\": len(stored_entities)},\n )\n\n return stored_entities\n\n def _store_entity(self, entity: dict, level: KnowledgeLevel, source_id: int) -> dict | None:\n \"\"\"Speichere Entität in DB mit Duplikatprüfung.\"\"\"\n try:\n name = entity.get(\"name\", \"\").strip()\n entity_type = entity.get(\"type\", \"OTHER\").upper()\n context = entity.get(\"context\", \"\")\n\n if not name:\n return None\n\n # Prüfe ob Entität existiert (case-insensitive)\n cursor = db.execute(\n \"\"\"SELECT id, name, type, description\n FROM entities\n WHERE LOWER(name) = LOWER(%s) OR LOWER(canonical_name) = LOWER(%s)\n LIMIT 1\"\"\",\n (name, name),\n )\n existing = cursor.fetchone()\n cursor.close()\n\n if existing:\n entity_id = existing[\"id\"]\n # Entität existiert - verwende bestehende\n db.log(\"DEBUG\", f\"Entität '{name}' existiert bereits (ID: {entity_id})\")\n else:\n # Neue Entität anlegen\n cursor = db.execute(\n \"\"\"INSERT INTO entities (name, type, canonical_name, created_at)\n VALUES (%s, %s, %s, NOW())\"\"\",\n (name, entity_type, name.lower()),\n )\n db.commit()\n entity_id = cursor.lastrowid\n cursor.close()\n db.log(\"INFO\", f\"Neue Entität angelegt: '{name}' (ID: {entity_id})\")\n\n return {\"id\": entity_id, \"name\": name, \"type\": entity_type, \"context\": context, \"is_new\": existing is None}\n\n except Exception as e:\n db.log(\"ERROR\", f\"Fehler beim Speichern der Entität: {e}\")\n return None\n\n # =========================================================================\n # SEMANTIK (Bedeutung\/Definition\/Referenz)\n # =========================================================================\n\n def extract_semantics(self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int) -> list[dict]:\n \"\"\"\n Extrahiere Semantik (Bedeutung\/Definition) für Entitäten.\n\n Args:\n entities: Liste der extrahierten Entitäten\n text: Ursprungstext für Kontext\n level: Ebene\n source_id: Quell-ID\n\n Returns:\n Liste von Semantik-Einträgen\n \"\"\"\n if not entities:\n return []\n\n entity_names = [e[\"name\"] for e in entities[:15]]\n\n prompt = f\"\"\"Für die folgenden Entitäten aus dem Text, extrahiere die Bedeutung\/Definition.\n\nEntitäten: {\", \".join(entity_names)}\n\nFür jede Entität gib an:\n- definition: Kurze Definition basierend auf dem Text\n- context: In welchem Kontext wird sie verwendet\n- references: Bezüge zu anderen Konzepten (falls erkennbar)\n\nAntworte NUR als JSON:\n{{\"semantics\": [\n {{\"entity\": \"...\", \"definiti... [TRUNCATED-cb269c0254cd838e]"
}
}