Protokoll #21967

ID21967
Zeitstempel2025-12-27 11:40:41.091002
Clientroot
IP145.224.72.140
Modellclaude-sonnet-4-20250514
Statuscompleted
Tokens2,397 (Input: 1,196, Output: 1,201)
Dauer361 ms
Request-Zeit2025-12-27 11:40:41.091002
Response-Zeit2025-12-27 11:40:41.451513

Request

{
    "event": "PreToolUse",
    "tool_name": "Write",
    "tool_input": {
        "file_path": "\/var\/www\/scripts\/pipeline\/orientation.py",
        "content": "\"\"\"\nPage Orientation Detection Module for KI-System Pipeline.\nDetects and corrects rotated PDF pages using multi-stage detection.\n\nStage 1: PDF metadata (\/Rotate flag) - instant, free\nStage 2: Tesseract OSD - ~50-100ms per page\nStage 3: Vision LLM fallback - for images without text (optional)\n\"\"\"\n\nimport io\n\nimport pytesseract\nfrom PIL import Image\n\nfrom config import (\n    ROTATION_DETECTION_ENABLED,\n    ROTATION_OSD_CONFIDENCE_THRESHOLD,\n)\nfrom db import db\n\n\ndef detect_orientation(image_bytes: bytes) -> dict:\n    \"\"\"\n    Detect page orientation using Tesseract OSD.\n\n    Args:\n        image_bytes: PNG\/JPEG image as bytes\n\n    Returns:\n        dict: {\n            'rotation': int (0, 90, 180, 270),\n            'confidence': float,\n            'script': str,\n            'method': str ('osd' or 'fallback')\n        }\n    \"\"\"\n    if not ROTATION_DETECTION_ENABLED:\n        return {\n            \"rotation\": 0,\n            \"confidence\": 0.0,\n            \"script\": \"Unknown\",\n            \"method\": \"disabled\",\n        }\n\n    try:\n        img = Image.open(io.BytesIO(image_bytes))\n        osd = pytesseract.image_to_osd(img, output_type=pytesseract.Output.DICT)\n\n        return {\n            \"rotation\": osd.get(\"rotate\", 0),\n            \"confidence\": osd.get(\"orientation_conf\", 0.0),\n            \"script\": osd.get(\"script\", \"Unknown\"),\n            \"method\": \"osd\",\n        }\n    except pytesseract.TesseractError as e:\n        # OSD fails on images with too little text\n        db.log(\"DEBUG\", f\"OSD detection skipped (insufficient text): {e}\")\n        return {\n            \"rotation\": 0,\n            \"confidence\": 0.0,\n            \"script\": \"Unknown\",\n            \"method\": \"fallback\",\n        }\n    except Exception as e:\n        db.log(\"WARNING\", f\"OSD detection failed: {e}\")\n        return {\n            \"rotation\": 0,\n            \"confidence\": 0.0,\n            \"script\": \"Unknown\",\n            \"method\": \"error\",\n        }\n\n\ndef get_page_rotation(page) -> int:\n    \"\"\"\n    Get effective rotation for a PDF page.\n\n    Combines PDF metadata rotation with detected orientation.\n    Uses multi-stage detection:\n    1. PDF \/Rotate flag (instant)\n    2. Tesseract OSD if confidence threshold met\n\n    Args:\n        page: PyMuPDF page object\n\n    Returns:\n        int: Total rotation needed (0, 90, 180, 270)\n    \"\"\"\n    if not ROTATION_DETECTION_ENABLED:\n        return 0\n\n    # Stage 1: PDF metadata\n    pdf_rotation = page.rotation  # 0, 90, 180, 270\n\n    if pdf_rotation != 0:\n        db.log(\"DEBUG\", f\"PDF rotation from metadata: {pdf_rotation}°\")\n        return pdf_rotation\n\n    # Stage 2: Tesseract OSD\n    # Use lower DPI for speed (150 vs 300)\n    pix = page.get_pixmap(dpi=150)\n    img_bytes = pix.tobytes(\"png\")\n\n    detection = detect_orientation(img_bytes)\n\n    if detection[\"confidence\"] >= ROTATION_OSD_CONFIDENCE_THRESHOLD:\n        if detection[\"rotation\"] != 0:\n            db.log(\n                \"INFO\",\n                f\"OSD detected rotation: {detection['rotation']}° \"\n                f\"(confidence: {detection['confidence']:.1f})\",\n            )\n        return detection[\"rotation\"]\n\n    return 0\n\n\ndef rotate_image(image_bytes: bytes, rotation: int) -> bytes:\n    \"\"\"\n    Rotate image by specified degrees.\n\n    Args:\n        image_bytes: Original image as bytes\n        rotation: Degrees to rotate (0, 90, 180, 270)\n\n    Returns:\n        bytes: Rotated image as PNG\n    \"\"\"\n    if rotation == 0:\n        return image_bytes\n\n    img = Image.open(io.BytesIO(image_bytes))\n\n    # PIL rotation is counter-clockwise, PDF rotation is clockwise\n    # So we need to rotate by -rotation degrees\n    rotated = img.rotate(-rotation, expand=True)\n\n    output = io.BytesIO()\n    rotated.save(output, format=\"PNG\")\n    return output.getvalue()\n\n\ndef correct_page_orientation(page, target_dpi: int = 300) -> tuple[bytes, int]:\n    \"\"\"\n    Get correctly oriented image from PDF page.\n\n    Convenience function that combines detection and rotation.\n\n    Args:\n        page: PyMuPDF page object\n        target_dpi: DPI for final image\n\n    Returns:\n        tuple: (image_bytes, rotation_applied)\n    \"\"\"\n    rotation = get_page_rotation(page)\n\n    # Get pixmap at target DPI\n    pix = page.get_pixmap(dpi=target_dpi)\n    img_bytes = pix.tobytes(\"png\")\n\n    if rotation != 0:\n        img_bytes = rotate_image(img_bytes, rotation)\n\n    return img_bytes, rotation\n"
    }
}

Response

{
    "tool_response": {
        "type": "create",
        "filePath": "\/var\/www\/scripts\/pipeline\/orientation.py",
        "content": "\"\"\"\nPage Orientation Detection Module for KI-System Pipeline.\nDetects and corrects rotated PDF pages using multi-stage detection.\n\nStage 1: PDF metadata (\/Rotate flag) - instant, free\nStage 2: Tesseract OSD - ~50-100ms per page\nStage 3: Vision LLM fallback - for images without text (optional)\n\"\"\"\n\nimport io\n\nimport pytesseract\nfrom PIL import Image\n\nfrom config import (\n    ROTATION_DETECTION_ENABLED,\n    ROTATION_OSD_CONFIDENCE_THRESHOLD,\n)\nfrom db import db\n\n\ndef detect_orientation(image_bytes: bytes) -> dict:\n    \"\"\"\n    Detect page orientation using Tesseract OSD.\n\n    Args:\n        image_bytes: PNG\/JPEG image as bytes\n\n    Returns:\n        dict: {\n            'rotation': int (0, 90, 180, 270),\n            'confidence': float,\n            'script': str,\n            'method': str ('osd' or 'fallback')\n        }\n    \"\"\"\n    if not ROTATION_DETECTION_ENABLED:\n        return {\n            \"rotation\": 0,\n            \"confidence\": 0.0,\n            \"script\": \"Unknown\",\n            \"method\": \"disabled\",\n        }\n\n    try:\n        img = Image.open(io.BytesIO(image_bytes))\n        osd = pytesseract.image_to_osd(img, output_type=pytesseract.Output.DICT)\n\n        return {\n            \"rotation\": osd.get(\"rotate\", 0),\n            \"confidence\": osd.get(\"orientation_conf\", 0.0),\n            \"script\": osd.get(\"script\", \"Unknown\"),\n            \"method\": \"osd\",\n        }\n    except pytesseract.TesseractError as e:\n        # OSD fails on images with too little text\n        db.log(\"DEBUG\", f\"OSD detection skipped (insufficient text): {e}\")\n        return {\n            \"rotation\": 0,\n            \"confidence\": 0.0,\n            \"script\": \"Unknown\",\n            \"method\": \"fallback\",\n        }\n    except Exception as e:\n        db.log(\"WARNING\", f\"OSD detection failed: {e}\")\n        return {\n            \"rotation\": 0,\n            \"confidence\": 0.0,\n            \"script\": \"Unknown\",\n            \"method\": \"error\",\n        }\n\n\ndef get_page_rotation(page) -> int:\n    \"\"\"\n    Get effective rotation for a PDF page.\n\n    Combines PDF metadata rotation with detected orientation.\n    Uses multi-stage detection:\n    1. PDF \/Rotate flag (instant)\n    2. Tesseract OSD if confidence threshold met\n\n    Args:\n        page: PyMuPDF page object\n\n    Returns:\n        int: Total rotation needed (0, 90, 180, 270)\n    \"\"\"\n    if not ROTATION_DETECTION_ENABLED:\n        return 0\n\n    # Stage 1: PDF metadata\n    pdf_rotation = page.rotation  # 0, 90, 180, 270\n\n    if pdf_rotation != 0:\n        db.log(\"DEBUG\", f\"PDF rotation from metadata: {pdf_rotation}°\")\n        return pdf_rotation\n\n    # Stage 2: Tesseract OSD\n    # Use lower DPI for speed (150 vs 300)\n    pix = page.get_pixmap(dpi=150)\n    img_bytes = pix.tobytes(\"png\")\n\n    detection = detect_orientation(img_bytes)\n\n    if detection[\"confidence\"] >= ROTATION_OSD_CONFIDENCE_THRESHOLD:\n        if detection[\"rotation\"] != 0:\n            db.log(\n                \"INFO\",\n                f\"OSD detected rotation: {detection['rotation']}° \"\n                f\"(confidence: {detection['confidence']:.1f})\",\n            )\n        return detection[\"rotation\"]\n\n    return 0\n\n\ndef rotate_image(image_bytes: bytes, rotation: int) -> bytes:\n    \"\"\"\n    Rotate image by specified degrees.\n\n    Args:\n        image_bytes: Original image as bytes\n        rotation: Degrees to rotate (0, 90, 180, 270)\n\n    Returns:\n        bytes: Rotated image as PNG\n    \"\"\"\n    if rotation == 0:\n        return image_bytes\n\n    img = Image.open(io.BytesIO(image_bytes))\n\n    # PIL rotation is counter-clockwise, PDF rotation is clockwise\n    # So we need to rotate by -rotation degrees\n    rotated = img.rotate(-rotation, expand=True)\n\n    output = io.BytesIO()\n    rotated.save(output, format=\"PNG\")\n    return output.getvalue()\n\n\ndef correct_page_orientation(page, target_dpi: int = 300) -> tuple[bytes, int]:\n    \"\"\"\n    Get correctly oriented image from PDF page.\n\n    Convenience function that combines detection and rotation.\n\n    Args:\n        page: PyMuPDF page object\n        target_dpi: DPI for final image\n\n    Returns:\n        tuple: (image_bytes, rotation_applied)\n    \"\"\"\n    rotation = get_page_rotation(page)\n\n    # Get pixmap at target DPI\n    pix = page.get_pixmap(dpi=target_dpi)\n    img_bytes = pix.tobytes(\"png\")\n\n    if rotation != 0:\n        img_bytes = rotate_image(img_bytes, rotation)\n\n    return img_bytes, rotation\n",
        "structuredPatch": [],
        "originalFile": null
    }
}
← Vorheriger Zur Liste Nächster →