Protokoll #6248

ID6248
Zeitstempel2025-12-23 05:14:54.831808
Clientroot
IP145.224.96.190
Modellclaude-sonnet-4-20250514
Statuscompleted
Tokens748 (Input: 33, Output: 715)
Dauer94 ms
Request-Zeit2025-12-23 05:14:54.831808
Response-Zeit2025-12-23 05:14:54.925993

Request

{
    "event": "PreToolUse",
    "tool_name": "Read",
    "tool_input": {
        "file_path": "\/opt\/scripts\/pipeline\/vision.py",
        "offset": 50,
        "limit": 80
    }
}

Response

{
    "tool_response": {
        "type": "text",
        "file": {
            "filePath": "\/opt\/scripts\/pipeline\/vision.py",
            "content": "        mat = fitz.Matrix(dpi \/ 72, dpi \/ 72)  # 72 is default PDF DPI\n        pix = page.get_pixmap(matrix=mat)\n\n        # Convert to PNG bytes\n        img_bytes = pix.tobytes(IMAGE_FORMAT)\n\n        pages.append(\n            {\n                \"page_number\": page_num + 1,\n                \"image_bytes\": img_bytes,\n                \"width\": pix.width,\n                \"height\": pix.height,\n                \"size_kb\": len(img_bytes) \/ 1024,\n            }\n        )\n\n    doc.close()\n    return pages\n\n\ndef analyze_image_ollama(image_bytes, model=DEFAULT_VISION_MODEL, prompt=None):\n    \"\"\"\n    Analyze an image using Ollama vision model.\n\n    Args:\n        image_bytes: PNG\/JPEG image as bytes\n        model: Vision model name (e.g., minicpm-v:latest)\n        prompt: Custom prompt (default: document analysis prompt)\n\n    Returns:\n        dict with analysis results\n    \"\"\"\n    if prompt is None:\n        prompt = \"\"\"Analysiere diese Seite aus einem Schulungsdokument.\n\nBeschreibe strukturiert:\n1. **Überschriften\/Titel**: Welche Überschriften gibt es?\n2. **Hauptinhalt**: Worum geht es auf dieser Seite?\n3. **Visuelle Elemente**:\n   - Gibt es Bilder\/Fotos? Was zeigen sie?\n   - Gibt es Diagramme\/Charts? Was stellen sie dar?\n   - Gibt es Tabellen? Was enthalten sie?\n4. **Layout**: Wie ist die Seite aufgebaut (Spalten, Boxen, etc.)?\n5. **Besonderheiten**: Gibt es Hervorhebungen, Zitate oder Callouts?\n\nAntworte auf Deutsch und sei präzise.\"\"\"\n\n    # Encode image as base64\n    image_base64 = base64.b64encode(image_bytes).decode(\"utf-8\")\n\n    try:\n        response = requests.post(\n            f\"{OLLAMA_HOST}\/api\/generate\",\n            json={\n                \"model\": model,\n                \"prompt\": prompt,\n                \"images\": [image_base64],\n                \"stream\": False,\n                \"options\": {\"temperature\": 0.3, \"num_predict\": 2048, \"num_ctx\": 4096},\n            },\n            timeout=120,\n        )\n        response.raise_for_status()\n\n        result = response.json()\n        return {\n            \"success\": True,\n            \"analysis\": result.get(\"response\", \"\"),\n            \"model\": model,\n            \"eval_count\": result.get(\"eval_count\", 0),\n            \"eval_duration_ms\": result.get(\"eval_duration\", 0) \/ 1_000_000,\n        }\n\n    except requests.exceptions.Timeout:\n        return {\"success\": False, \"error\": \"Vision model timeout\"}\n    except requests.exceptions.RequestException as e:\n        return {\"success\": False, \"error\": str(e)}\n    except Exception as e:\n        return {\"success\": False, \"error\": str(e)}\n",
            "numLines": 80,
            "startLine": 50,
            "totalLines": 388
        }
    }
}
← Vorheriger Zur Liste Nächster →