Protokoll #5355
| ID | 5355 |
|---|---|
| Zeitstempel | 2025-12-22 22:11:38.720786 |
| Client | root |
| IP | Warning: Undefined array key "request_ip" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 Deprecated: htmlspecialchars(): Passing null to parameter #1 ($string) of type string is deprecated in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 |
| Modell | claude-sonnet-4-20250514 |
| Status | completed |
| Tokens | 3,145 (Input: 0, Output: 0) |
| Dauer | 88 ms |
| Request-Zeit | Warning: Undefined array key "request_timestamp" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 18 |
| Response-Zeit | - |
Warning: Undefined array key "error_message" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 22
Request
{
"event": "PreToolUse",
"tool_name": "Edit",
"tool_input": {
"file_path": "\/opt\/scripts\/pipeline\/knowledge.py",
"old_string": " if len(sys.argv) < 2:\n print(\"Verwendung: python knowledge.py <document_id> [provider] [model]\")\n print(\" provider: ollama (default) oder anthropic\")\n print(\" model: mistral (ollama) oder claude-3-haiku-20240307 (anthropic)\")\n sys.exit(1)",
"new_string": " if len(sys.argv) < 2:\n print(\"Verwendung: python knowledge.py <document_id> [provider] [model]\")\n print(\" provider: ollama (default) oder anthropic\")\n print(\" model: gemma3:27b-it-qat (ollama) oder claude-3-haiku-20240307 (anthropic)\")\n sys.exit(1)"
}
}
Response
{
"tool_response": {
"filePath": "\/opt\/scripts\/pipeline\/knowledge.py",
"oldString": " if len(sys.argv) < 2:\n print(\"Verwendung: python knowledge.py <document_id> [provider] [model]\")\n print(\" provider: ollama (default) oder anthropic\")\n print(\" model: mistral (ollama) oder claude-3-haiku-20240307 (anthropic)\")\n sys.exit(1)",
"newString": " if len(sys.argv) < 2:\n print(\"Verwendung: python knowledge.py <document_id> [provider] [model]\")\n print(\" provider: ollama (default) oder anthropic\")\n print(\" model: gemma3:27b-it-qat (ollama) oder claude-3-haiku-20240307 (anthropic)\")\n sys.exit(1)",
"originalFile": "\"\"\"\nModulares Wissensextraktions-System für 3-Ebenen-Analyse.\n\nEbenen:\n1. Seite (page): Detailanalyse pro Einzelseite\n2. Abschnitt (section): Aggregation pro Kapitel\/Unterkapitel\n3. Dokument (document): Konsolidierte Gesamtsicht\n\nWissenstypen:\n- Entitäten: Personen, Konzepte, Methoden, Organisationen\n- Semantik: Bedeutung, Definition, Referenzen\n- Ontologie: Wechselwirkungen zwischen Entitäten\n- Taxonomie: Hierarchische Einordnung\n\nAutor: KI-System Pipeline\n\"\"\"\n\nimport json\nimport re\nimport time\nfrom dataclasses import dataclass\nfrom enum import Enum\n\nimport requests\n\nfrom config import ANTHROPIC_API_KEY, OLLAMA_HOST\nfrom db import db\n\n\nclass KnowledgeLevel(Enum):\n \"\"\"Ebene der Wissensextraktion.\"\"\"\n\n PAGE = \"page\"\n SECTION = \"section\"\n DOCUMENT = \"document\"\n\n\nclass KnowledgeType(Enum):\n \"\"\"Typ des extrahierten Wissens.\"\"\"\n\n ENTITY = \"entity\"\n SEMANTIC = \"semantic\"\n ONTOLOGY = \"ontology\"\n TAXONOMY = \"taxonomy\"\n\n\n@dataclass\nclass ModelConfig:\n \"\"\"Konfiguration für LLM-Modell.\"\"\"\n\n provider: str # 'ollama' oder 'anthropic'\n model_name: str\n temperature: float = 0.3\n max_tokens: int = 2000\n\n\n# Standard-Modellkonfigurationen\nDEFAULT_MODELS = {\n \"ollama\": ModelConfig(\"ollama\", \"gemma3:27b-it-qat\"),\n \"anthropic\": ModelConfig(\"anthropic\", \"claude-3-haiku-20240307\"),\n \"anthropic_opus\": ModelConfig(\"anthropic\", \"claude-opus-4-5-20251101\"),\n}\n\n\nclass KnowledgeExtractor:\n \"\"\"\n Modulare Wissensextraktion mit Datenbankabgleich.\n\n Verwendung:\n extractor = KnowledgeExtractor(model_config)\n\n # Pro Seite\n entities = extractor.extract_entities(text, KnowledgeLevel.PAGE, page_id)\n semantics = extractor.extract_semantics(entities, text, KnowledgeLevel.PAGE, page_id)\n ontology = extractor.extract_ontology(entities, text, KnowledgeLevel.PAGE, page_id)\n taxonomy = extractor.extract_taxonomy(entities, text, KnowledgeLevel.PAGE, page_id)\n \"\"\"\n\n def __init__(self, model_config: ModelConfig | None = None):\n \"\"\"Initialisiere Extractor mit Modellkonfiguration.\"\"\"\n self.model = model_config or DEFAULT_MODELS[\"ollama\"]\n self.anthropic_client = None\n\n if self.model.provider == \"anthropic\":\n self._init_anthropic()\n\n def _init_anthropic(self):\n \"\"\"Initialisiere Anthropic Client.\"\"\"\n try:\n import anthropic\n\n if ANTHROPIC_API_KEY:\n self.anthropic_client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)\n except ImportError:\n db.log(\"WARNING\", \"Anthropic SDK nicht installiert, fallback zu Ollama\")\n self.model = DEFAULT_MODELS[\"ollama\"]\n\n def _call_llm(self, prompt: str, json_output: bool = True) -> str:\n \"\"\"Rufe LLM auf und gib Antwort zurück.\"\"\"\n start_time = time.time()\n\n try:\n if self.model.provider == \"anthropic\" and self.anthropic_client:\n response = self.anthropic_client.messages.create(\n model=self.model.model_name,\n max_tokens=self.model.max_tokens,\n temperature=self.model.temperature,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n )\n result = response.content[0].text\n tokens_in = response.usage.input_tokens\n tokens_out = response.usage.output_tokens\n else:\n # Ollama\n payload = {\n \"model\": self.model.model_name,\n \"prompt\": prompt,\n \"stream\": False,\n \"options\": {\"temperature\": self.model.temperature},\n }\n if json_output:\n payload[\"format\"] = \"json\"\n\n resp = requests.post(f\"{OLLAMA_HOST}\/api\/generate\", json=payload, timeout=120)\n resp.raise_for_status()\n data = resp.json()\n result = data.get(\"response\", \"\")\n tokens_in = data.get(\"prompt_eval_count\", 0)\n tokens_out = data.get(\"eval_count\", 0)\n\n duration_ms = int((time.time() - start_time) * 1000)\n\n # Protokolliere LLM-Aufruf\n db.log_to_protokoll(\n client_name=\"pipeline-knowledge\",\n request=prompt[:500],\n response=result[:500],\n model_name=f\"{self.model.provider}:{self.model.model_name}\",\n tokens_input=tokens_in,\n tokens_output=tokens_out,\n duration_ms=duration_ms,\n status=\"completed\",\n )\n\n return result\n\n except Exception as e:\n db.log(\"ERROR\", f\"LLM-Aufruf fehlgeschlagen: {e}\")\n return \"{}\"\n\n def _parse_json(self, text: str) -> dict:\n \"\"\"Extrahiere JSON aus LLM-Antwort.\"\"\"\n try:\n # Versuche direkt zu parsen\n return json.loads(text)\n except json.JSONDecodeError:\n # Suche nach JSON-Block\n match = re.search(r\"\\{[\\s\\S]*\\}\", text)\n if match:\n try:\n return json.loads(match.group())\n except json.JSONDecodeError:\n pass\n return {}\n\n # =========================================================================\n # ENTITÄTEN\n # =========================================================================\n\n def extract_entities(self, text: str, level: KnowledgeLevel, source_id: int) -> list[dict]:\n \"\"\"\n Extrahiere Entitäten aus Text.\n\n Args:\n text: Eingabetext\n level: Ebene (PAGE, SECTION, DOCUMENT)\n source_id: ID der Quelle (page_id, section_id, document_id)\n\n Returns:\n Liste von Entitäten mit DB-IDs\n \"\"\"\n prompt = f\"\"\"Analysiere den folgenden deutschen Text und extrahiere alle wichtigen Entitäten.\n\nKategorien:\n- PERSON: Namen von Personen, Autoren, Therapeuten\n- ORGANIZATION: Firmen, Institute, Verbände\n- CONCEPT: Fachbegriffe, Theorien, Modelle\n- METHOD: Methoden, Techniken, Verfahren\n- TOOL: Werkzeuge, Instrumente, Materialien\n- LOCATION: Orte, Länder, Regionen\n- EVENT: Ereignisse, Konferenzen\n\nAntworte NUR als JSON:\n{{\"entities\": [\n {{\"name\": \"...\", \"type\": \"CONCEPT\", \"context\": \"kurzer Kontext\", \"importance\": 0.0-1.0}}\n]}}\n\nText ({level.value}-Ebene):\n{text[:4000]}\"\"\"\n\n result = self._call_llm(prompt)\n data = self._parse_json(result)\n entities = data.get(\"entities\", [])\n\n # Speichere und gleiche mit DB ab\n stored_entities = []\n for entity in entities:\n stored = self._store_entity(entity, level, source_id)\n if stored:\n stored_entities.append(stored)\n\n # Speichere in page_knowledge\/section_knowledge\/document_knowledge\n self._store_knowledge(\n level,\n source_id,\n KnowledgeType.ENTITY,\n {\"entities\": [e[\"name\"] for e in stored_entities], \"count\": len(stored_entities)},\n )\n\n return stored_entities\n\n def _store_entity(self, entity: dict, level: KnowledgeLevel, source_id: int) -> dict | None:\n \"\"\"Speichere Entität in DB mit Duplikatprüfung.\"\"\"\n try:\n name = entity.get(\"name\", \"\").strip()\n entity_type = entity.get(\"type\", \"OTHER\").upper()\n context = entity.get(\"context\", \"\")\n\n if not name:\n return None\n\n # Prüfe ob Entität existiert (case-insensitive)\n cursor = db.execute(\n \"\"\"SELECT id, name, type, description\n FROM entities\n WHERE LOWER(name) = LOWER(%s) OR LOWER(canonical_name) = LOWER(%s)\n LIMIT 1\"\"\",\n (name, name),\n )\n existing = cursor.fetchone()\n cursor.close()\n\n if existing:\n entity_id = existing[\"id\"]\n # Entität existiert - verwende bestehende\n db.log(\"DEBUG\", f\"Entität '{name}' existiert bereits (ID: {entity_id})\")\n else:\n # Neue Entität anlegen\n cursor = db.execute(\n \"\"\"INSERT INTO entities (name, type, canonical_name, created_at)\n VALUES (%s, %s, %s, NOW())\"\"\",\n (name, entity_type, name.lower()),\n )\n db.commit()\n entity_id = cursor.lastrowid\n cursor.close()\n db.log(\"INFO\", f\"Neue Entität angelegt: '{name}' (ID: {entity_id})\")\n\n return {\"id\": entity_id, \"name\": name, \"type\": entity_type, \"context\": context, \"is_new\": existing is None}\n\n except Exception as e:\n db.log(\"ERROR\", f\"Fehler beim Speichern der Entität: {e}\")\n return None\n\n # =========================================================================\n # SEMANTIK (Bedeutung\/Definition\/Referenz)\n # =========================================================================\n\n def extract_semantics(self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int) -> list[dict]:\n \"\"\"\n Extrahiere Semantik (Bedeutung\/Definition) für Entitäten.\n\n Args:\n entities: Liste der extrahierten Entitäten\n text: Ursprungstext für Kontext\n level: Ebene\n source_id: Quell-ID\n\n Returns:\n Liste von Semantik-Einträgen\n \"\"\"\n if not entities:\n return []\n\n entity_names = [e[\"name\"] for e in entities[:15]]\n\n prompt = f\"\"\"Für die folgenden Entitäten aus dem Text, extrahiere die Bedeutung\/Definition.\n\nEntitäten: {\", \".join(entity_names)}\n\nFür jede Entität gib an:\n- definition: Kurze Definition basierend auf dem Text\n- context: In welchem Kontext wird sie verwendet\n- references: Bezüge zu anderen Konzepten (falls erkennbar)\n\nAntworte NUR als JSON:\n{{\"semantics\": [\n {{\"entity\": \"...\", \"definiti... [TRUNCATED-22f8f5972456e30e]",
"structuredPatch": [
{
"oldStart": 878,
"oldLines": 7,
"newStart": 878,
"newLines": 7,
"lines": [
" if len(sys.argv) < 2:",
" print(\"Verwendung: python knowledge.py <document_id> [provider] [model]\")",
" print(\" provider: ollama (default) oder anthropic\")",
"- print(\" model: mistral (ollama) oder claude-3-haiku-20240307 (anthropic)\")",
"+ print(\" model: gemma3:27b-it-qat (ollama) oder claude-3-haiku-20240307 (anthropic)\")",
" sys.exit(1)",
" ",
" doc_id = int(sys.argv[1])"
]
}
],
"userModified": false,
"replaceAll": false
}
}