Protokoll #15236

ID15236
Zeitstempel2025-12-25 15:49:33.281986
Clientroot
IP145.224.72.140
Modellclaude-sonnet-4-20250514
Statuscompleted
Tokens1,387 (Input: 691, Output: 696)
Dauer103 ms
Request-Zeit2025-12-25 15:49:33.281986
Response-Zeit2025-12-25 15:49:33.385394

Request

{
    "event": "PreToolUse",
    "tool_name": "Write",
    "tool_input": {
        "file_path": "\/var\/www\/scripts\/pipeline\/knowledge\/utils.py",
        "content": "\"\"\"Utility-Funktionen für Wissensextraktion.\"\"\"\nimport sys\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom db import db\n\nfrom .models import ModelConfig\nfrom .analyzer import KnowledgeExtractor\n\n\ndef get_model_config(provider: str = \"ollama\", model_name: str | None = None) -> ModelConfig:\n    \"\"\"\n    Erstelle Modellkonfiguration basierend auf Provider.\n\n    Args:\n        provider: 'ollama' oder 'anthropic'\n        model_name: Optional spezifisches Modell\n\n    Returns:\n        ModelConfig für den Extractor\n    \"\"\"\n    if provider == \"anthropic\":\n        return ModelConfig(provider=\"anthropic\", model_name=model_name or \"claude-3-haiku-20240307\")\n    else:\n        return ModelConfig(provider=\"ollama\", model_name=model_name or \"gemma3:27b-it-qat\")\n\n\ndef process_document_knowledge(\n    document_id: int, provider: str = \"ollama\", model_name: str | None = None\n) -> dict:\n    \"\"\"\n    Verarbeite komplettes Dokument mit 3-Ebenen-Analyse.\n\n    1. Seiten-Ebene\n    2. Abschnitt-Ebene\n    3. Dokument-Ebene (konsolidiert)\n    \"\"\"\n    config = get_model_config(provider, model_name)\n    extractor = KnowledgeExtractor(config)\n\n    results = {\"document_id\": document_id, \"pages\": [], \"sections\": [], \"document\": None}\n\n    # 1. Seiten analysieren\n    cursor = db.execute(\n        \"SELECT id, text_content FROM document_pages WHERE document_id = %s ORDER BY page_number\",\n        (document_id,),\n    )\n    pages = cursor.fetchall()\n    cursor.close()\n\n    for page in pages:\n        if page[\"text_content\"]:\n            page_result = extractor.analyze_page(page[\"id\"], page[\"text_content\"])\n            results[\"pages\"].append(page_result)\n\n    # 2. Abschnitte analysieren\n    cursor = db.execute(\n        \"SELECT id, content FROM document_sections WHERE document_id = %s ORDER BY sort_order\",\n        (document_id,),\n    )\n    sections = cursor.fetchall()\n    cursor.close()\n\n    for section in sections:\n        if section[\"content\"]:\n            section_result = extractor.analyze_section(section[\"id\"], section[\"content\"])\n            results[\"sections\"].append(section_result)\n\n    # 3. Dokument-Ebene (Gesamttext)\n    cursor = db.execute(\n        \"SELECT GROUP_CONCAT(content SEPARATOR ' ') as full_text FROM chunks WHERE document_id = %s\",\n        (document_id,),\n    )\n    doc = cursor.fetchone()\n    cursor.close()\n\n    if doc and doc[\"full_text\"]:\n        results[\"document\"] = extractor.analyze_document(document_id, doc[\"full_text\"][:10000])\n\n    return results\n"
    }
}

Response

{
    "tool_response": {
        "type": "create",
        "filePath": "\/var\/www\/scripts\/pipeline\/knowledge\/utils.py",
        "content": "\"\"\"Utility-Funktionen für Wissensextraktion.\"\"\"\nimport sys\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom db import db\n\nfrom .models import ModelConfig\nfrom .analyzer import KnowledgeExtractor\n\n\ndef get_model_config(provider: str = \"ollama\", model_name: str | None = None) -> ModelConfig:\n    \"\"\"\n    Erstelle Modellkonfiguration basierend auf Provider.\n\n    Args:\n        provider: 'ollama' oder 'anthropic'\n        model_name: Optional spezifisches Modell\n\n    Returns:\n        ModelConfig für den Extractor\n    \"\"\"\n    if provider == \"anthropic\":\n        return ModelConfig(provider=\"anthropic\", model_name=model_name or \"claude-3-haiku-20240307\")\n    else:\n        return ModelConfig(provider=\"ollama\", model_name=model_name or \"gemma3:27b-it-qat\")\n\n\ndef process_document_knowledge(\n    document_id: int, provider: str = \"ollama\", model_name: str | None = None\n) -> dict:\n    \"\"\"\n    Verarbeite komplettes Dokument mit 3-Ebenen-Analyse.\n\n    1. Seiten-Ebene\n    2. Abschnitt-Ebene\n    3. Dokument-Ebene (konsolidiert)\n    \"\"\"\n    config = get_model_config(provider, model_name)\n    extractor = KnowledgeExtractor(config)\n\n    results = {\"document_id\": document_id, \"pages\": [], \"sections\": [], \"document\": None}\n\n    # 1. Seiten analysieren\n    cursor = db.execute(\n        \"SELECT id, text_content FROM document_pages WHERE document_id = %s ORDER BY page_number\",\n        (document_id,),\n    )\n    pages = cursor.fetchall()\n    cursor.close()\n\n    for page in pages:\n        if page[\"text_content\"]:\n            page_result = extractor.analyze_page(page[\"id\"], page[\"text_content\"])\n            results[\"pages\"].append(page_result)\n\n    # 2. Abschnitte analysieren\n    cursor = db.execute(\n        \"SELECT id, content FROM document_sections WHERE document_id = %s ORDER BY sort_order\",\n        (document_id,),\n    )\n    sections = cursor.fetchall()\n    cursor.close()\n\n    for section in sections:\n        if section[\"content\"]:\n            section_result = extractor.analyze_section(section[\"id\"], section[\"content\"])\n            results[\"sections\"].append(section_result)\n\n    # 3. Dokument-Ebene (Gesamttext)\n    cursor = db.execute(\n        \"SELECT GROUP_CONCAT(content SEPARATOR ' ') as full_text FROM chunks WHERE document_id = %s\",\n        (document_id,),\n    )\n    doc = cursor.fetchone()\n    cursor.close()\n\n    if doc and doc[\"full_text\"]:\n        results[\"document\"] = extractor.analyze_document(document_id, doc[\"full_text\"][:10000])\n\n    return results\n",
        "structuredPatch": [],
        "originalFile": null
    }
}
← Vorheriger Zur Liste Nächster →