Protokoll #4364
| ID | 4364 |
|---|---|
| Zeitstempel | 2025-12-22 09:54:03.079621 |
| Client | root |
| IP | Warning: Undefined array key "request_ip" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 Deprecated: htmlspecialchars(): Passing null to parameter #1 ($string) of type string is deprecated in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 |
| Modell | claude-sonnet-4-20250514 |
| Status | completed |
| Tokens | 5,361 (Input: 0, Output: 0) |
| Dauer | 87 ms |
| Request-Zeit | Warning: Undefined array key "request_timestamp" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 18 |
| Response-Zeit | - |
Warning: Undefined array key "error_message" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 22
Request
{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/opt\/scripts\/pipeline\/knowledge.py",
"content": "\"\"\"\nModulares Wissensextraktions-System für 3-Ebenen-Analyse.\n\nEbenen:\n1. Seite (page): Detailanalyse pro Einzelseite\n2. Abschnitt (section): Aggregation pro Kapitel\/Unterkapitel\n3. Dokument (document): Konsolidierte Gesamtsicht\n\nWissenstypen:\n- Entitäten: Personen, Konzepte, Methoden, Organisationen\n- Semantik: Bedeutung, Definition, Referenzen\n- Ontologie: Wechselwirkungen zwischen Entitäten\n- Taxonomie: Hierarchische Einordnung\n\nAutor: KI-System Pipeline\n\"\"\"\n\nimport json\nimport re\nimport time\nfrom dataclasses import dataclass\nfrom enum import Enum\nfrom typing import Any\n\nimport requests\n\nfrom config import ANTHROPIC_API_KEY, OLLAMA_HOST\nfrom db import db\n\n\nclass KnowledgeLevel(Enum):\n \"\"\"Ebene der Wissensextraktion.\"\"\"\n PAGE = \"page\"\n SECTION = \"section\"\n DOCUMENT = \"document\"\n\n\nclass KnowledgeType(Enum):\n \"\"\"Typ des extrahierten Wissens.\"\"\"\n ENTITY = \"entity\"\n SEMANTIC = \"semantic\"\n ONTOLOGY = \"ontology\"\n TAXONOMY = \"taxonomy\"\n\n\n@dataclass\nclass ModelConfig:\n \"\"\"Konfiguration für LLM-Modell.\"\"\"\n provider: str # 'ollama' oder 'anthropic'\n model_name: str\n temperature: float = 0.3\n max_tokens: int = 2000\n\n\n# Standard-Modellkonfigurationen\nDEFAULT_MODELS = {\n \"ollama\": ModelConfig(\"ollama\", \"mistral\"),\n \"anthropic\": ModelConfig(\"anthropic\", \"claude-3-haiku-20240307\"),\n \"anthropic_opus\": ModelConfig(\"anthropic\", \"claude-opus-4-5-20251101\"),\n}\n\n\nclass KnowledgeExtractor:\n \"\"\"\n Modulare Wissensextraktion mit Datenbankabgleich.\n\n Verwendung:\n extractor = KnowledgeExtractor(model_config)\n\n # Pro Seite\n entities = extractor.extract_entities(text, KnowledgeLevel.PAGE, page_id)\n semantics = extractor.extract_semantics(entities, text, KnowledgeLevel.PAGE, page_id)\n ontology = extractor.extract_ontology(entities, text, KnowledgeLevel.PAGE, page_id)\n taxonomy = extractor.extract_taxonomy(entities, text, KnowledgeLevel.PAGE, page_id)\n \"\"\"\n\n def __init__(self, model_config: ModelConfig | None = None):\n \"\"\"Initialisiere Extractor mit Modellkonfiguration.\"\"\"\n self.model = model_config or DEFAULT_MODELS[\"ollama\"]\n self.anthropic_client = None\n\n if self.model.provider == \"anthropic\":\n self._init_anthropic()\n\n def _init_anthropic(self):\n \"\"\"Initialisiere Anthropic Client.\"\"\"\n try:\n import anthropic\n if ANTHROPIC_API_KEY:\n self.anthropic_client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)\n except ImportError:\n db.log(\"WARNING\", \"Anthropic SDK nicht installiert, fallback zu Ollama\")\n self.model = DEFAULT_MODELS[\"ollama\"]\n\n def _call_llm(self, prompt: str, json_output: bool = True) -> str:\n \"\"\"Rufe LLM auf und gib Antwort zurück.\"\"\"\n start_time = time.time()\n\n try:\n if self.model.provider == \"anthropic\" and self.anthropic_client:\n response = self.anthropic_client.messages.create(\n model=self.model.model_name,\n max_tokens=self.model.max_tokens,\n temperature=self.model.temperature,\n messages=[{\"role\": \"user\", \"content\": prompt}]\n )\n result = response.content[0].text\n tokens_in = response.usage.input_tokens\n tokens_out = response.usage.output_tokens\n else:\n # Ollama\n payload = {\n \"model\": self.model.model_name,\n \"prompt\": prompt,\n \"stream\": False,\n \"options\": {\"temperature\": self.model.temperature}\n }\n if json_output:\n payload[\"format\"] = \"json\"\n\n resp = requests.post(\n f\"{OLLAMA_HOST}\/api\/generate\",\n json=payload,\n timeout=120\n )\n resp.raise_for_status()\n data = resp.json()\n result = data.get(\"response\", \"\")\n tokens_in = data.get(\"prompt_eval_count\", 0)\n tokens_out = data.get(\"eval_count\", 0)\n\n duration_ms = int((time.time() - start_time) * 1000)\n\n # Protokolliere LLM-Aufruf\n db.log_to_protokoll(\n client_name=\"pipeline-knowledge\",\n request=prompt[:500],\n response=result[:500],\n model_name=f\"{self.model.provider}:{self.model.model_name}\",\n tokens_input=tokens_in,\n tokens_output=tokens_out,\n duration_ms=duration_ms,\n status=\"completed\"\n )\n\n return result\n\n except Exception as e:\n db.log(\"ERROR\", f\"LLM-Aufruf fehlgeschlagen: {e}\")\n return \"{}\"\n\n def _parse_json(self, text: str) -> dict:\n \"\"\"Extrahiere JSON aus LLM-Antwort.\"\"\"\n try:\n # Versuche direkt zu parsen\n return json.loads(text)\n except json.JSONDecodeError:\n # Suche nach JSON-Block\n match = re.search(r'\\{[\\s\\S]*\\}', text)\n if match:\n try:\n return json.loads(match.group())\n except json.JSONDecodeError:\n pass\n return {}\n\n # =========================================================================\n # ENTITÄTEN\n # =========================================================================\n\n def extract_entities(\n self,\n text: str,\n level: KnowledgeLevel,\n source_id: int\n ) -> list[dict]:\n \"\"\"\n Extrahiere Entitäten aus Text.\n\n Args:\n text: Eingabetext\n level: Ebene (PAGE, SECTION, DOCUMENT)\n source_id: ID der Quelle (page_id, section_id, document_id)\n\n Returns:\n Liste von Entitäten mit DB-IDs\n \"\"\"\n prompt = f\"\"\"Analysiere den folgenden deutschen Text und extrahiere alle wichtigen Entitäten.\n\nKategorien:\n- PERSON: Namen von Personen, Autoren, Therapeuten\n- ORGANIZATION: Firmen, Institute, Verbände\n- CONCEPT: Fachbegriffe, Theorien, Modelle\n- METHOD: Methoden, Techniken, Verfahren\n- TOOL: Werkzeuge, Instrumente, Materialien\n- LOCATION: Orte, Länder, Regionen\n- EVENT: Ereignisse, Konferenzen\n\nAntworte NUR als JSON:\n{{\"entities\": [\n {{\"name\": \"...\", \"type\": \"CONCEPT\", \"context\": \"kurzer Kontext\", \"importance\": 0.0-1.0}}\n]}}\n\nText ({level.value}-Ebene):\n{text[:4000]}\"\"\"\n\n result = self._call_llm(prompt)\n data = self._parse_json(result)\n entities = data.get(\"entities\", [])\n\n # Speichere und gleiche mit DB ab\n stored_entities = []\n for entity in entities:\n stored = self._store_entity(entity, level, source_id)\n if stored:\n stored_entities.append(stored)\n\n # Speichere in page_knowledge\/section_knowledge\/document_knowledge\n self._store_knowledge(level, source_id, KnowledgeType.ENTITY, {\n \"entities\": [e[\"name\"] for e in stored_entities],\n \"count\": len(stored_entities)\n })\n\n return stored_entities\n\n def _store_entity(\n self,\n entity: dict,\n level: KnowledgeLevel,\n source_id: int\n ) -> dict | None:\n \"\"\"Speichere Entität in DB mit Duplikatprüfung.\"\"\"\n try:\n name = entity.get(\"name\", \"\").strip()\n entity_type = entity.get(\"type\", \"OTHER\").upper()\n context = entity.get(\"context\", \"\")\n\n if not name:\n return None\n\n # Prüfe ob Entität existiert (case-insensitive)\n cursor = db.execute(\n \"\"\"SELECT id, name, type, description\n FROM entities\n WHERE LOWER(name) = LOWER(%s) OR LOWER(canonical_name) = LOWER(%s)\n LIMIT 1\"\"\",\n (name, name)\n )\n existing = cursor.fetchone()\n cursor.close()\n\n if existing:\n entity_id = existing[\"id\"]\n # Entität existiert - verwende bestehende\n db.log(\"DEBUG\", f\"Entität '{name}' existiert bereits (ID: {entity_id})\")\n else:\n # Neue Entität anlegen\n cursor = db.execute(\n \"\"\"INSERT INTO entities (name, type, canonical_name, created_at)\n VALUES (%s, %s, %s, NOW())\"\"\",\n (name, entity_type, name.lower())\n )\n db.commit()\n entity_id = cursor.lastrowid\n cursor.close()\n db.log(\"INFO\", f\"Neue Entität angelegt: '{name}' (ID: {entity_id})\")\n\n return {\n \"id\": entity_id,\n \"name\": name,\n \"type\": entity_type,\n \"context\": context,\n \"is_new\": existing is None\n }\n\n except Exception as e:\n db.log(\"ERROR\", f\"Fehler beim Speichern der Entität: {e}\")\n return None\n\n # =========================================================================\n # SEMANTIK (Bedeutung\/Definition\/Referenz)\n # =========================================================================\n\n def extract_semantics(\n self,\n entities: list[dict],\n text: str,\n level: KnowledgeLevel,\n source_id: int\n ) -> list[dict]:\n \"\"\"\n Extrahiere Semantik (Bedeutung\/Definition) für Entitäten.\n\n Args:\n entities: Liste der extrahierten Entitäten\n text: Ursprungstext für Kontext\n level: Ebene\n source_id: Quell-ID\n\n Returns:\n Liste von Semantik-Einträgen\n \"\"\"\n if not entities:\n return []\n\n entity_names = [e[\"name\"] for e in entities[:15]]\n\n prompt = f\"\"\"Für die folgenden Entitäten aus dem Text, extrahiere die Bedeutung\/Definition.\n\nEntitäten: {\", \".join(enti... [TRUNCATED-1e99c3c7edef32ed]"
}
}
Response
{
"tool_response": {
"type": "create",
"filePath": "\/opt\/scripts\/pipeline\/knowledge.py",
"content": "\"\"\"\nModulares Wissensextraktions-System für 3-Ebenen-Analyse.\n\nEbenen:\n1. Seite (page): Detailanalyse pro Einzelseite\n2. Abschnitt (section): Aggregation pro Kapitel\/Unterkapitel\n3. Dokument (document): Konsolidierte Gesamtsicht\n\nWissenstypen:\n- Entitäten: Personen, Konzepte, Methoden, Organisationen\n- Semantik: Bedeutung, Definition, Referenzen\n- Ontologie: Wechselwirkungen zwischen Entitäten\n- Taxonomie: Hierarchische Einordnung\n\nAutor: KI-System Pipeline\n\"\"\"\n\nimport json\nimport re\nimport time\nfrom dataclasses import dataclass\nfrom enum import Enum\nfrom typing import Any\n\nimport requests\n\nfrom config import ANTHROPIC_API_KEY, OLLAMA_HOST\nfrom db import db\n\n\nclass KnowledgeLevel(Enum):\n \"\"\"Ebene der Wissensextraktion.\"\"\"\n PAGE = \"page\"\n SECTION = \"section\"\n DOCUMENT = \"document\"\n\n\nclass KnowledgeType(Enum):\n \"\"\"Typ des extrahierten Wissens.\"\"\"\n ENTITY = \"entity\"\n SEMANTIC = \"semantic\"\n ONTOLOGY = \"ontology\"\n TAXONOMY = \"taxonomy\"\n\n\n@dataclass\nclass ModelConfig:\n \"\"\"Konfiguration für LLM-Modell.\"\"\"\n provider: str # 'ollama' oder 'anthropic'\n model_name: str\n temperature: float = 0.3\n max_tokens: int = 2000\n\n\n# Standard-Modellkonfigurationen\nDEFAULT_MODELS = {\n \"ollama\": ModelConfig(\"ollama\", \"mistral\"),\n \"anthropic\": ModelConfig(\"anthropic\", \"claude-3-haiku-20240307\"),\n \"anthropic_opus\": ModelConfig(\"anthropic\", \"claude-opus-4-5-20251101\"),\n}\n\n\nclass KnowledgeExtractor:\n \"\"\"\n Modulare Wissensextraktion mit Datenbankabgleich.\n\n Verwendung:\n extractor = KnowledgeExtractor(model_config)\n\n # Pro Seite\n entities = extractor.extract_entities(text, KnowledgeLevel.PAGE, page_id)\n semantics = extractor.extract_semantics(entities, text, KnowledgeLevel.PAGE, page_id)\n ontology = extractor.extract_ontology(entities, text, KnowledgeLevel.PAGE, page_id)\n taxonomy = extractor.extract_taxonomy(entities, text, KnowledgeLevel.PAGE, page_id)\n \"\"\"\n\n def __init__(self, model_config: ModelConfig | None = None):\n \"\"\"Initialisiere Extractor mit Modellkonfiguration.\"\"\"\n self.model = model_config or DEFAULT_MODELS[\"ollama\"]\n self.anthropic_client = None\n\n if self.model.provider == \"anthropic\":\n self._init_anthropic()\n\n def _init_anthropic(self):\n \"\"\"Initialisiere Anthropic Client.\"\"\"\n try:\n import anthropic\n if ANTHROPIC_API_KEY:\n self.anthropic_client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)\n except ImportError:\n db.log(\"WARNING\", \"Anthropic SDK nicht installiert, fallback zu Ollama\")\n self.model = DEFAULT_MODELS[\"ollama\"]\n\n def _call_llm(self, prompt: str, json_output: bool = True) -> str:\n \"\"\"Rufe LLM auf und gib Antwort zurück.\"\"\"\n start_time = time.time()\n\n try:\n if self.model.provider == \"anthropic\" and self.anthropic_client:\n response = self.anthropic_client.messages.create(\n model=self.model.model_name,\n max_tokens=self.model.max_tokens,\n temperature=self.model.temperature,\n messages=[{\"role\": \"user\", \"content\": prompt}]\n )\n result = response.content[0].text\n tokens_in = response.usage.input_tokens\n tokens_out = response.usage.output_tokens\n else:\n # Ollama\n payload = {\n \"model\": self.model.model_name,\n \"prompt\": prompt,\n \"stream\": False,\n \"options\": {\"temperature\": self.model.temperature}\n }\n if json_output:\n payload[\"format\"] = \"json\"\n\n resp = requests.post(\n f\"{OLLAMA_HOST}\/api\/generate\",\n json=payload,\n timeout=120\n )\n resp.raise_for_status()\n data = resp.json()\n result = data.get(\"response\", \"\")\n tokens_in = data.get(\"prompt_eval_count\", 0)\n tokens_out = data.get(\"eval_count\", 0)\n\n duration_ms = int((time.time() - start_time) * 1000)\n\n # Protokolliere LLM-Aufruf\n db.log_to_protokoll(\n client_name=\"pipeline-knowledge\",\n request=prompt[:500],\n response=result[:500],\n model_name=f\"{self.model.provider}:{self.model.model_name}\",\n tokens_input=tokens_in,\n tokens_output=tokens_out,\n duration_ms=duration_ms,\n status=\"completed\"\n )\n\n return result\n\n except Exception as e:\n db.log(\"ERROR\", f\"LLM-Aufruf fehlgeschlagen: {e}\")\n return \"{}\"\n\n def _parse_json(self, text: str) -> dict:\n \"\"\"Extrahiere JSON aus LLM-Antwort.\"\"\"\n try:\n # Versuche direkt zu parsen\n return json.loads(text)\n except json.JSONDecodeError:\n # Suche nach JSON-Block\n match = re.search(r'\\{[\\s\\S]*\\}', text)\n if match:\n try:\n return json.loads(match.group())\n except json.JSONDecodeError:\n pass\n return {}\n\n # =========================================================================\n # ENTITÄTEN\n # =========================================================================\n\n def extract_entities(\n self,\n text: str,\n level: KnowledgeLevel,\n source_id: int\n ) -> list[dict]:\n \"\"\"\n Extrahiere Entitäten aus Text.\n\n Args:\n text: Eingabetext\n level: Ebene (PAGE, SECTION, DOCUMENT)\n source_id: ID der Quelle (page_id, section_id, document_id)\n\n Returns:\n Liste von Entitäten mit DB-IDs\n \"\"\"\n prompt = f\"\"\"Analysiere den folgenden deutschen Text und extrahiere alle wichtigen Entitäten.\n\nKategorien:\n- PERSON: Namen von Personen, Autoren, Therapeuten\n- ORGANIZATION: Firmen, Institute, Verbände\n- CONCEPT: Fachbegriffe, Theorien, Modelle\n- METHOD: Methoden, Techniken, Verfahren\n- TOOL: Werkzeuge, Instrumente, Materialien\n- LOCATION: Orte, Länder, Regionen\n- EVENT: Ereignisse, Konferenzen\n\nAntworte NUR als JSON:\n{{\"entities\": [\n {{\"name\": \"...\", \"type\": \"CONCEPT\", \"context\": \"kurzer Kontext\", \"importance\": 0.0-1.0}}\n]}}\n\nText ({level.value}-Ebene):\n{text[:4000]}\"\"\"\n\n result = self._call_llm(prompt)\n data = self._parse_json(result)\n entities = data.get(\"entities\", [])\n\n # Speichere und gleiche mit DB ab\n stored_entities = []\n for entity in entities:\n stored = self._store_entity(entity, level, source_id)\n if stored:\n stored_entities.append(stored)\n\n # Speichere in page_knowledge\/section_knowledge\/document_knowledge\n self._store_knowledge(level, source_id, KnowledgeType.ENTITY, {\n \"entities\": [e[\"name\"] for e in stored_entities],\n \"count\": len(stored_entities)\n })\n\n return stored_entities\n\n def _store_entity(\n self,\n entity: dict,\n level: KnowledgeLevel,\n source_id: int\n ) -> dict | None:\n \"\"\"Speichere Entität in DB mit Duplikatprüfung.\"\"\"\n try:\n name = entity.get(\"name\", \"\").strip()\n entity_type = entity.get(\"type\", \"OTHER\").upper()\n context = entity.get(\"context\", \"\")\n\n if not name:\n return None\n\n # Prüfe ob Entität existiert (case-insensitive)\n cursor = db.execute(\n \"\"\"SELECT id, name, type, description\n FROM entities\n WHERE LOWER(name) = LOWER(%s) OR LOWER(canonical_name) = LOWER(%s)\n LIMIT 1\"\"\",\n (name, name)\n )\n existing = cursor.fetchone()\n cursor.close()\n\n if existing:\n entity_id = existing[\"id\"]\n # Entität existiert - verwende bestehende\n db.log(\"DEBUG\", f\"Entität '{name}' existiert bereits (ID: {entity_id})\")\n else:\n # Neue Entität anlegen\n cursor = db.execute(\n \"\"\"INSERT INTO entities (name, type, canonical_name, created_at)\n VALUES (%s, %s, %s, NOW())\"\"\",\n (name, entity_type, name.lower())\n )\n db.commit()\n entity_id = cursor.lastrowid\n cursor.close()\n db.log(\"INFO\", f\"Neue Entität angelegt: '{name}' (ID: {entity_id})\")\n\n return {\n \"id\": entity_id,\n \"name\": name,\n \"type\": entity_type,\n \"context\": context,\n \"is_new\": existing is None\n }\n\n except Exception as e:\n db.log(\"ERROR\", f\"Fehler beim Speichern der Entität: {e}\")\n return None\n\n # =========================================================================\n # SEMANTIK (Bedeutung\/Definition\/Referenz)\n # =========================================================================\n\n def extract_semantics(\n self,\n entities: list[dict],\n text: str,\n level: KnowledgeLevel,\n source_id: int\n ) -> list[dict]:\n \"\"\"\n Extrahiere Semantik (Bedeutung\/Definition) für Entitäten.\n\n Args:\n entities: Liste der extrahierten Entitäten\n text: Ursprungstext für Kontext\n level: Ebene\n source_id: Quell-ID\n\n Returns:\n Liste von Semantik-Einträgen\n \"\"\"\n if not entities:\n return []\n\n entity_names = [e[\"name\"] for e in entities[:15]]\n\n prompt = f\"\"\"Für die folgenden Entitäten aus dem Text, extrahiere die Bedeutung\/Definition.\n\nEntitäten: {\", \".join(enti... [TRUNCATED-1e99c3c7edef32ed]",
"structuredPatch": [],
"originalFile": null
}
}