{
"event": "PreToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/knowledge\/utils.py"
}
}
{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/knowledge\/utils.py",
"content": "\"\"\"Utility-Funktionen für Wissensextraktion.\"\"\"\n\nimport sys\n\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom db import db\n\nfrom .analyzer import KnowledgeExtractor\nfrom .models import ModelConfig\n\n\ndef get_model_config(provider: str = \"ollama\", model_name: str | None = None) -> ModelConfig:\n \"\"\"\n Erstelle Modellkonfiguration basierend auf Provider.\n\n Args:\n provider: 'ollama' oder 'anthropic'\n model_name: Optional spezifisches Modell\n\n Returns:\n ModelConfig für den Extractor\n \"\"\"\n if provider == \"anthropic\":\n return ModelConfig(provider=\"anthropic\", model_name=model_name or \"claude-3-haiku-20240307\")\n else:\n return ModelConfig(provider=\"ollama\", model_name=model_name or \"gemma3:27b-it-qat\")\n\n\ndef process_document_knowledge(document_id: int, provider: str = \"ollama\", model_name: str | None = None) -> dict:\n \"\"\"\n Verarbeite komplettes Dokument mit 3-Ebenen-Analyse.\n\n 1. Seiten-Ebene\n 2. Abschnitt-Ebene\n 3. Dokument-Ebene (konsolidiert)\n \"\"\"\n config = get_model_config(provider, model_name)\n extractor = KnowledgeExtractor(config)\n\n results = {\"document_id\": document_id, \"pages\": [], \"sections\": [], \"document\": None}\n\n # 1. Seiten analysieren\n cursor = db.execute(\n \"SELECT id, text_content FROM document_pages WHERE document_id = %s ORDER BY page_number\",\n (document_id,),\n )\n pages = cursor.fetchall()\n cursor.close()\n\n for page in pages:\n if page[\"text_content\"]:\n page_result = extractor.analyze_page(page[\"id\"], page[\"text_content\"])\n results[\"pages\"].append(page_result)\n\n # 2. Abschnitte analysieren\n cursor = db.execute(\n \"SELECT id, content FROM document_sections WHERE document_id = %s ORDER BY sort_order\",\n (document_id,),\n )\n sections = cursor.fetchall()\n cursor.close()\n\n for section in sections:\n if section[\"content\"]:\n section_result = extractor.analyze_section(section[\"id\"], section[\"content\"])\n results[\"sections\"].append(section_result)\n\n # 3. Dokument-Ebene (Gesamttext)\n cursor = db.execute(\n \"SELECT GROUP_CONCAT(content SEPARATOR ' ') as full_text FROM chunks WHERE document_id = %s\",\n (document_id,),\n )\n doc = cursor.fetchone()\n cursor.close()\n\n if doc and doc[\"full_text\"]:\n results[\"document\"] = extractor.analyze_document(document_id, doc[\"full_text\"][:10000])\n\n return results\n",
"numLines": 81,
"startLine": 1,
"totalLines": 81
}
}
}