{
"event": "PreToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/opt\/scripts\/pipeline\/vision.py",
"offset": 50,
"limit": 80
}
}
{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/opt\/scripts\/pipeline\/vision.py",
"content": " mat = fitz.Matrix(dpi \/ 72, dpi \/ 72) # 72 is default PDF DPI\n pix = page.get_pixmap(matrix=mat)\n\n # Convert to PNG bytes\n img_bytes = pix.tobytes(IMAGE_FORMAT)\n\n pages.append(\n {\n \"page_number\": page_num + 1,\n \"image_bytes\": img_bytes,\n \"width\": pix.width,\n \"height\": pix.height,\n \"size_kb\": len(img_bytes) \/ 1024,\n }\n )\n\n doc.close()\n return pages\n\n\ndef analyze_image_ollama(image_bytes, model=DEFAULT_VISION_MODEL, prompt=None):\n \"\"\"\n Analyze an image using Ollama vision model.\n\n Args:\n image_bytes: PNG\/JPEG image as bytes\n model: Vision model name (e.g., minicpm-v:latest)\n prompt: Custom prompt (default: document analysis prompt)\n\n Returns:\n dict with analysis results\n \"\"\"\n if prompt is None:\n prompt = \"\"\"Analysiere diese Seite aus einem Schulungsdokument.\n\nBeschreibe strukturiert:\n1. **Überschriften\/Titel**: Welche Überschriften gibt es?\n2. **Hauptinhalt**: Worum geht es auf dieser Seite?\n3. **Visuelle Elemente**:\n - Gibt es Bilder\/Fotos? Was zeigen sie?\n - Gibt es Diagramme\/Charts? Was stellen sie dar?\n - Gibt es Tabellen? Was enthalten sie?\n4. **Layout**: Wie ist die Seite aufgebaut (Spalten, Boxen, etc.)?\n5. **Besonderheiten**: Gibt es Hervorhebungen, Zitate oder Callouts?\n\nAntworte auf Deutsch und sei präzise.\"\"\"\n\n # Encode image as base64\n image_base64 = base64.b64encode(image_bytes).decode(\"utf-8\")\n\n try:\n response = requests.post(\n f\"{OLLAMA_HOST}\/api\/generate\",\n json={\n \"model\": model,\n \"prompt\": prompt,\n \"images\": [image_base64],\n \"stream\": False,\n \"options\": {\"temperature\": 0.3, \"num_predict\": 2048, \"num_ctx\": 4096},\n },\n timeout=120,\n )\n response.raise_for_status()\n\n result = response.json()\n return {\n \"success\": True,\n \"analysis\": result.get(\"response\", \"\"),\n \"model\": model,\n \"eval_count\": result.get(\"eval_count\", 0),\n \"eval_duration_ms\": result.get(\"eval_duration\", 0) \/ 1_000_000,\n }\n\n except requests.exceptions.Timeout:\n return {\"success\": False, \"error\": \"Vision model timeout\"}\n except requests.exceptions.RequestException as e:\n return {\"success\": False, \"error\": str(e)}\n except Exception as e:\n return {\"success\": False, \"error\": str(e)}\n",
"numLines": 80,
"startLine": 50,
"totalLines": 388
}
}
}