Protokoll #4768

ID	4768
Zeitstempel	2025-12-22 14:52:19.913618
Client	root
IP	145.224.96.190
Modell	claude-sonnet-4-20250514
Status	completed
Tokens	2,936 (Input: 87, Output: 2,849)
Dauer	83 ms
Request-Zeit	2025-12-22 14:52:19.913618
Response-Zeit	2025-12-22 14:52:19.996842

Request

{
    "event": "PreToolUse",
    "tool_name": "Edit",
    "tool_input": {
        "file_path": "\/opt\/scripts\/pipeline\/vision.py",
        "old_string": "    if image_dir is None:\n        image_dir = \"\/tmp\/pipeline_images\"\n\n    if store_images:",
        "new_string": "    if image_dir is None:\n        image_dir = \"\/tmp\/pipeline_images\"  # noqa: S108\n\n    if store_images:"
    }
}

Response

{
    "tool_response": {
        "filePath": "\/opt\/scripts\/pipeline\/vision.py",
        "oldString": "    if image_dir is None:\n        image_dir = \"\/tmp\/pipeline_images\"\n\n    if store_images:",
        "newString": "    if image_dir is None:\n        image_dir = \"\/tmp\/pipeline_images\"  # noqa: S108\n\n    if store_images:",
        "originalFile": "#!\/usr\/bin\/env python3\n\"\"\"\nVision analysis module for KI-System Pipeline.\nExtracts PDF pages as images and analyzes them with vision models.\n\nUsage:\n    python vision.py <pdf_path>          # Analyze all pages\n    python vision.py <pdf_path> --page 1 # Analyze specific page\n\"\"\"\n\nimport base64\nimport os\nimport sys\nfrom pathlib import Path\n\nimport requests\n\nfrom config import OLLAMA_HOST\nfrom db import db\n\n# Default vision model (can be overridden by pipeline config)\nDEFAULT_VISION_MODEL = \"minicpm-v:latest\"\n\n# Image settings\nIMAGE_DPI = 150  # Balance between quality and size\nIMAGE_FORMAT = \"png\"\nMAX_IMAGE_SIZE_MB = 10\n\n\ndef pdf_to_images(file_path, dpi=IMAGE_DPI):\n    \"\"\"\n    Convert PDF pages to images.\n\n    Args:\n        file_path: Path to PDF file\n        dpi: Resolution for image extraction\n\n    Returns:\n        List of dicts with page_number, image_bytes, width, height\n    \"\"\"\n    import fitz  # PyMuPDF\n\n    doc = fitz.open(file_path)\n    pages = []\n\n    for page_num in range(len(doc)):\n        page = doc[page_num]\n\n        # Render page to image\n        mat = fitz.Matrix(dpi \/ 72, dpi \/ 72)  # 72 is default PDF DPI\n        pix = page.get_pixmap(matrix=mat)\n\n        # Convert to PNG bytes\n        img_bytes = pix.tobytes(IMAGE_FORMAT)\n\n        pages.append({\n            \"page_number\": page_num + 1,\n            \"image_bytes\": img_bytes,\n            \"width\": pix.width,\n            \"height\": pix.height,\n            \"size_kb\": len(img_bytes) \/ 1024\n        })\n\n    doc.close()\n    return pages\n\n\ndef analyze_image_ollama(image_bytes, model=DEFAULT_VISION_MODEL, prompt=None):\n    \"\"\"\n    Analyze an image using Ollama vision model.\n\n    Args:\n        image_bytes: PNG\/JPEG image as bytes\n        model: Vision model name (e.g., minicpm-v:latest)\n        prompt: Custom prompt (default: document analysis prompt)\n\n    Returns:\n        dict with analysis results\n    \"\"\"\n    if prompt is None:\n        prompt = \"\"\"Analysiere diese Seite aus einem Schulungsdokument.\n\nBeschreibe strukturiert:\n1. **Überschriften\/Titel**: Welche Überschriften gibt es?\n2. **Hauptinhalt**: Worum geht es auf dieser Seite?\n3. **Visuelle Elemente**:\n   - Gibt es Bilder\/Fotos? Was zeigen sie?\n   - Gibt es Diagramme\/Charts? Was stellen sie dar?\n   - Gibt es Tabellen? Was enthalten sie?\n4. **Layout**: Wie ist die Seite aufgebaut (Spalten, Boxen, etc.)?\n5. **Besonderheiten**: Gibt es Hervorhebungen, Zitate oder Callouts?\n\nAntworte auf Deutsch und sei präzise.\"\"\"\n\n    # Encode image as base64\n    image_base64 = base64.b64encode(image_bytes).decode(\"utf-8\")\n\n    try:\n        response = requests.post(\n            f\"{OLLAMA_HOST}\/api\/generate\",\n            json={\n                \"model\": model,\n                \"prompt\": prompt,\n                \"images\": [image_base64],\n                \"stream\": False,\n                \"options\": {\n                    \"temperature\": 0.3,\n                    \"num_predict\": 2048,\n                    \"num_ctx\": 4096\n                }\n            },\n            timeout=120\n        )\n        response.raise_for_status()\n\n        result = response.json()\n        return {\n            \"success\": True,\n            \"analysis\": result.get(\"response\", \"\"),\n            \"model\": model,\n            \"eval_count\": result.get(\"eval_count\", 0),\n            \"eval_duration_ms\": result.get(\"eval_duration\", 0) \/ 1_000_000\n        }\n\n    except requests.exceptions.Timeout:\n        return {\"success\": False, \"error\": \"Vision model timeout\"}\n    except requests.exceptions.RequestException as e:\n        return {\"success\": False, \"error\": str(e)}\n    except Exception as e:\n        return {\"success\": False, \"error\": str(e)}\n\n\ndef analyze_document(file_path, model=DEFAULT_VISION_MODEL, store_images=False, image_dir=None):\n    \"\"\"\n    Analyze all pages of a PDF document.\n\n    Args:\n        file_path: Path to PDF file\n        model: Vision model to use\n        store_images: Whether to save images to disk\n        image_dir: Directory for saved images (default: \/tmp\/pipeline_images)\n\n    Returns:\n        List of page analysis results\n    \"\"\"\n    db.log(\"INFO\", f\"Vision analysis starting: {file_path}\", f\"model={model}\")\n\n    # Convert PDF to images\n    pages = pdf_to_images(file_path)\n    db.log(\"INFO\", f\"Extracted {len(pages)} pages from PDF\")\n\n    if image_dir is None:\n        image_dir = \"\/tmp\/pipeline_images\"\n\n    if store_images:\n        os.makedirs(image_dir, exist_ok=True)\n\n    results = []\n\n    for page in pages:\n        page_num = page[\"page_number\"]\n        db.log(\"INFO\", f\"Analyzing page {page_num}\/{len(pages)}\")\n\n        # Optional: Save image to disk\n        image_path = None\n        if store_images:\n            filename = f\"{Path(file_path).stem}_page_{page_num:03d}.{IMAGE_FORMAT}\"\n            image_path = os.path.join(image_dir, filename)\n            with open(image_path, \"wb\") as f:\n                f.write(page[\"image_bytes\"])\n\n        # Analyze with vision model\n        analysis = analyze_image_ollama(page[\"image_bytes\"], model=model)\n\n        results.append({\n            \"page_number\": page_num,\n            \"width\": page[\"width\"],\n            \"height\": page[\"height\"],\n            \"size_kb\": page[\"size_kb\"],\n            \"image_path\": image_path,\n            \"analysis\": analysis.get(\"analysis\", \"\") if analysis[\"success\"] else None,\n            \"error\": analysis.get(\"error\") if not analysis[\"success\"] else None,\n            \"eval_tokens\": analysis.get(\"eval_count\", 0),\n            \"eval_duration_ms\": analysis.get(\"eval_duration_ms\", 0)\n        })\n\n        if analysis[\"success\"]:\n            db.log(\"INFO\", f\"Page {page_num} analyzed: {analysis.get('eval_count', 0)} tokens\")\n        else:\n            db.log(\"WARNING\", f\"Page {page_num} analysis failed: {analysis.get('error')}\")\n\n    return results\n\n\ndef store_page_analysis(document_id, page_results):\n    \"\"\"\n    Store page analysis results in database.\n\n    Args:\n        document_id: ID of the document in documents table\n        page_results: List of page analysis results from analyze_document()\n\n    Returns:\n        Number of pages stored\n    \"\"\"\n    stored = 0\n\n    for page in page_results:\n        try:\n            db.execute(\n                \"\"\"INSERT INTO document_pages\n                   (document_id, page_number, image_path, vision_analysis, token_count)\n                   VALUES (%s, %s, %s, %s, %s)\n                   ON DUPLICATE KEY UPDATE\n                   image_path = VALUES(image_path),\n                   vision_analysis = VALUES(vision_analysis),\n                   token_count = VALUES(token_count)\"\"\",\n                (\n                    document_id,\n                    page[\"page_number\"],\n                    page[\"image_path\"],\n                    page[\"analysis\"],\n                    page[\"eval_tokens\"]\n                )\n            )\n            db.commit()\n            stored += 1\n        except Exception as e:\n            db.log(\"ERROR\", f\"Failed to store page {page['page_number']}: {e}\")\n\n    return stored\n\n\ndef run_vision_step(document_id, file_path, config=None):\n    \"\"\"\n    Run vision analysis step for pipeline.\n\n    Args:\n        document_id: Document ID in database\n        file_path: Path to PDF file\n        config: Step configuration dict\n\n    Returns:\n        dict with success status and statistics\n    \"\"\"\n    if config is None:\n        config = {}\n\n    model = config.get(\"model\", DEFAULT_VISION_MODEL)\n    store_images = config.get(\"store_images\", False)\n    detect_images = config.get(\"detect_images\", True)\n    detect_charts = config.get(\"detect_charts\", True)\n    detect_tables = config.get(\"detect_tables\", True)\n\n    # Build custom prompt based on config\n    prompt_parts = [\"Analysiere diese Seite aus einem Schulungsdokument.\\n\\nBeschreibe strukturiert:\"]\n    prompt_parts.append(\"1. **Überschriften\/Titel**: Welche Überschriften gibt es?\")\n    prompt_parts.append(\"2. **Hauptinhalt**: Worum geht es auf dieser Seite?\")\n\n    visual_parts = []\n    if detect_images:\n        visual_parts.append(\"Gibt es Bilder\/Fotos? Was zeigen sie?\")\n    if detect_charts:\n        visual_parts.append(\"Gibt es Diagramme\/Charts? Was stellen sie dar?\")\n    if detect_tables:\n        visual_parts.append(\"Gibt es Tabellen? Was enthalten sie?\")\n\n    if visual_parts:\n        prompt_parts.append(\"3. **Visuelle Elemente**:\\n   - \" + \"\\n   - \".join(visual_parts))\n\n    prompt_parts.append(\"4. **Layout**: Wie ist die Seite aufgebaut?\")\n    prompt_parts.append(\"5. **Schlüsselbegriffe**: Welche wichtigen Begriffe\/Konzepte werden genannt?\")\n    prompt_parts.append(\"\\nAntworte auf Deutsch und sei präzise.\")\n\n    # Note: prompt_parts built for future custom prompt support\n    _ = \"\\n\".join(prompt_parts)  # Currently unused, reserved for custom prompts\n\n    try:\n        # Analyze document\n        results = analyze_document(file_path, model=model, store_images=store_images)\n\n        # Store results\n        stored = store_page_analysis(document_id, results)\n\n        # Calculate statistics\n        successful = sum(1 for r in results if r[\"analysis\"])\n        total_tokens = sum(r[\"eval_tokens\"] for r in results)\n        total_time_ms = sum(r[\"eval_duration_ms\"] for r in results)\n\n        return {\n            \"success\": True,\n            \"pages_total\": len(results),\n            \"pages_analyzed\": successful,\n            \"pages_stored\": stored,\n            \"total_tokens\": total_tokens,\n            \"total_time_ms\": total_time_ms,\n            \"model\": model\n        }\n\n    except Exception as e:\n        db.log(\"ERROR\", f\"Vision step failed: {e}\")\n        return {\n            \"success\": False,\n            \"error\": str(e)\n        }\n\n\ndef main():\n    \"\"\"CLI entry point.\"\"\"\n    if len(sys.argv) < 2:\n        print(__doc__)\n        return\n\n    file_path = sys.argv[1]\n\n    if not os.path.exists(file_path):\n        print(f\"Error: File not found: {file_path}\")\n        return\n\n    # Check for specific page\n    page_num = None\n    if \"--page\" in sys.argv:\n        idx = sys.argv.index(\"--page\")\n        if idx + 1 < len(sys.argv):\n... [TRUNCATED-51eaffdfd4ba6f13]",
        "structuredPatch": [
            {
                "oldStart": 150,
                "oldLines": 7,
                "newStart": 150,
                "newLines": 7,
                "lines": [
                    "     db.log(\"INFO\", f\"Extracted {len(pages)} pages from PDF\")",
                    " ",
                    "     if image_dir is None:",
                    "-        image_dir = \"\/tmp\/pipeline_images\"",
                    "+        image_dir = \"\/tmp\/pipeline_images\"  # noqa: S108",
                    " ",
                    "     if store_images:",
                    "         os.makedirs(image_dir, exist_ok=True)"
                ]
            }
        ],
        "userModified": false,
        "replaceAll": false
    }
}

← Vorheriger Zur Liste Nächster →