Protokoll #18548

ID18548
Zeitstempel2025-12-26 21:28:53.455914
Clientroot
IP145.224.72.140
Modellclaude-sonnet-4-20250514
Statuscompleted
Tokens728 (Input: 30, Output: 698)
Dauer85 ms
Request-Zeit2025-12-26 21:28:53.455914
Response-Zeit2025-12-26 21:28:53.541090

Request

{
    "event": "PreToolUse",
    "tool_name": "Read",
    "tool_input": {
        "file_path": "\/var\/www\/scripts\/pipeline\/knowledge\/utils.py"
    }
}

Response

{
    "tool_response": {
        "type": "text",
        "file": {
            "filePath": "\/var\/www\/scripts\/pipeline\/knowledge\/utils.py",
            "content": "\"\"\"Utility-Funktionen für Wissensextraktion.\"\"\"\n\nimport sys\n\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom db import db\n\nfrom .analyzer import KnowledgeExtractor\nfrom .models import ModelConfig\n\n\ndef get_model_config(provider: str = \"ollama\", model_name: str | None = None) -> ModelConfig:\n    \"\"\"\n    Erstelle Modellkonfiguration basierend auf Provider.\n\n    Args:\n        provider: 'ollama' oder 'anthropic'\n        model_name: Optional spezifisches Modell\n\n    Returns:\n        ModelConfig für den Extractor\n    \"\"\"\n    if provider == \"anthropic\":\n        return ModelConfig(provider=\"anthropic\", model_name=model_name or \"claude-3-haiku-20240307\")\n    else:\n        return ModelConfig(provider=\"ollama\", model_name=model_name or \"gemma3:27b-it-qat\")\n\n\ndef process_document_knowledge(document_id: int, provider: str = \"ollama\", model_name: str | None = None) -> dict:\n    \"\"\"\n    Verarbeite komplettes Dokument mit 3-Ebenen-Analyse.\n\n    1. Seiten-Ebene\n    2. Abschnitt-Ebene\n    3. Dokument-Ebene (konsolidiert)\n    \"\"\"\n    config = get_model_config(provider, model_name)\n    extractor = KnowledgeExtractor(config)\n\n    results = {\"document_id\": document_id, \"pages\": [], \"sections\": [], \"document\": None}\n\n    # 1. Seiten analysieren\n    cursor = db.execute(\n        \"SELECT id, text_content FROM document_pages WHERE document_id = %s ORDER BY page_number\",\n        (document_id,),\n    )\n    pages = cursor.fetchall()\n    cursor.close()\n\n    for page in pages:\n        if page[\"text_content\"]:\n            page_result = extractor.analyze_page(page[\"id\"], page[\"text_content\"])\n            results[\"pages\"].append(page_result)\n\n    # 2. Abschnitte analysieren\n    cursor = db.execute(\n        \"SELECT id, content FROM document_sections WHERE document_id = %s ORDER BY sort_order\",\n        (document_id,),\n    )\n    sections = cursor.fetchall()\n    cursor.close()\n\n    for section in sections:\n        if section[\"content\"]:\n            section_result = extractor.analyze_section(section[\"id\"], section[\"content\"])\n            results[\"sections\"].append(section_result)\n\n    # 3. Dokument-Ebene (Gesamttext)\n    cursor = db.execute(\n        \"SELECT GROUP_CONCAT(content SEPARATOR ' ') as full_text FROM chunks WHERE document_id = %s\",\n        (document_id,),\n    )\n    doc = cursor.fetchone()\n    cursor.close()\n\n    if doc and doc[\"full_text\"]:\n        results[\"document\"] = extractor.analyze_document(document_id, doc[\"full_text\"][:10000])\n\n    return results\n",
            "numLines": 81,
            "startLine": 1,
            "totalLines": 81
        }
    }
}
← Vorheriger Zur Liste Nächster →