{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/orientation.py",
"content": "\"\"\"\nPage Orientation Detection Module for KI-System Pipeline.\nDetects and corrects rotated PDF pages using multi-stage detection.\n\nStage 1: PDF metadata (\/Rotate flag) - instant, free\nStage 2: Tesseract OSD - ~50-100ms per page\nStage 3: Vision LLM fallback - for images without text (optional)\n\"\"\"\n\nimport io\n\nimport pytesseract\nfrom PIL import Image\n\nfrom config import (\n ROTATION_DETECTION_ENABLED,\n ROTATION_OSD_CONFIDENCE_THRESHOLD,\n)\nfrom db import db\n\n\ndef detect_orientation(image_bytes: bytes) -> dict:\n \"\"\"\n Detect page orientation using Tesseract OSD.\n\n Args:\n image_bytes: PNG\/JPEG image as bytes\n\n Returns:\n dict: {\n 'rotation': int (0, 90, 180, 270),\n 'confidence': float,\n 'script': str,\n 'method': str ('osd' or 'fallback')\n }\n \"\"\"\n if not ROTATION_DETECTION_ENABLED:\n return {\n \"rotation\": 0,\n \"confidence\": 0.0,\n \"script\": \"Unknown\",\n \"method\": \"disabled\",\n }\n\n try:\n img = Image.open(io.BytesIO(image_bytes))\n osd = pytesseract.image_to_osd(img, output_type=pytesseract.Output.DICT)\n\n return {\n \"rotation\": osd.get(\"rotate\", 0),\n \"confidence\": osd.get(\"orientation_conf\", 0.0),\n \"script\": osd.get(\"script\", \"Unknown\"),\n \"method\": \"osd\",\n }\n except pytesseract.TesseractError as e:\n # OSD fails on images with too little text\n db.log(\"DEBUG\", f\"OSD detection skipped (insufficient text): {e}\")\n return {\n \"rotation\": 0,\n \"confidence\": 0.0,\n \"script\": \"Unknown\",\n \"method\": \"fallback\",\n }\n except Exception as e:\n db.log(\"WARNING\", f\"OSD detection failed: {e}\")\n return {\n \"rotation\": 0,\n \"confidence\": 0.0,\n \"script\": \"Unknown\",\n \"method\": \"error\",\n }\n\n\ndef get_page_rotation(page) -> int:\n \"\"\"\n Get effective rotation for a PDF page.\n\n Combines PDF metadata rotation with detected orientation.\n Uses multi-stage detection:\n 1. PDF \/Rotate flag (instant)\n 2. Tesseract OSD if confidence threshold met\n\n Args:\n page: PyMuPDF page object\n\n Returns:\n int: Total rotation needed (0, 90, 180, 270)\n \"\"\"\n if not ROTATION_DETECTION_ENABLED:\n return 0\n\n # Stage 1: PDF metadata\n pdf_rotation = page.rotation # 0, 90, 180, 270\n\n if pdf_rotation != 0:\n db.log(\"DEBUG\", f\"PDF rotation from metadata: {pdf_rotation}°\")\n return pdf_rotation\n\n # Stage 2: Tesseract OSD\n # Use lower DPI for speed (150 vs 300)\n pix = page.get_pixmap(dpi=150)\n img_bytes = pix.tobytes(\"png\")\n\n detection = detect_orientation(img_bytes)\n\n if detection[\"confidence\"] >= ROTATION_OSD_CONFIDENCE_THRESHOLD:\n if detection[\"rotation\"] != 0:\n db.log(\n \"INFO\",\n f\"OSD detected rotation: {detection['rotation']}° \"\n f\"(confidence: {detection['confidence']:.1f})\",\n )\n return detection[\"rotation\"]\n\n return 0\n\n\ndef rotate_image(image_bytes: bytes, rotation: int) -> bytes:\n \"\"\"\n Rotate image by specified degrees.\n\n Args:\n image_bytes: Original image as bytes\n rotation: Degrees to rotate (0, 90, 180, 270)\n\n Returns:\n bytes: Rotated image as PNG\n \"\"\"\n if rotation == 0:\n return image_bytes\n\n img = Image.open(io.BytesIO(image_bytes))\n\n # PIL rotation is counter-clockwise, PDF rotation is clockwise\n # So we need to rotate by -rotation degrees\n rotated = img.rotate(-rotation, expand=True)\n\n output = io.BytesIO()\n rotated.save(output, format=\"PNG\")\n return output.getvalue()\n\n\ndef correct_page_orientation(page, target_dpi: int = 300) -> tuple[bytes, int]:\n \"\"\"\n Get correctly oriented image from PDF page.\n\n Convenience function that combines detection and rotation.\n\n Args:\n page: PyMuPDF page object\n target_dpi: DPI for final image\n\n Returns:\n tuple: (image_bytes, rotation_applied)\n \"\"\"\n rotation = get_page_rotation(page)\n\n # Get pixmap at target DPI\n pix = page.get_pixmap(dpi=target_dpi)\n img_bytes = pix.tobytes(\"png\")\n\n if rotation != 0:\n img_bytes = rotate_image(img_bytes, rotation)\n\n return img_bytes, rotation\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/scripts\/pipeline\/orientation.py",
"content": "\"\"\"\nPage Orientation Detection Module for KI-System Pipeline.\nDetects and corrects rotated PDF pages using multi-stage detection.\n\nStage 1: PDF metadata (\/Rotate flag) - instant, free\nStage 2: Tesseract OSD - ~50-100ms per page\nStage 3: Vision LLM fallback - for images without text (optional)\n\"\"\"\n\nimport io\n\nimport pytesseract\nfrom PIL import Image\n\nfrom config import (\n ROTATION_DETECTION_ENABLED,\n ROTATION_OSD_CONFIDENCE_THRESHOLD,\n)\nfrom db import db\n\n\ndef detect_orientation(image_bytes: bytes) -> dict:\n \"\"\"\n Detect page orientation using Tesseract OSD.\n\n Args:\n image_bytes: PNG\/JPEG image as bytes\n\n Returns:\n dict: {\n 'rotation': int (0, 90, 180, 270),\n 'confidence': float,\n 'script': str,\n 'method': str ('osd' or 'fallback')\n }\n \"\"\"\n if not ROTATION_DETECTION_ENABLED:\n return {\n \"rotation\": 0,\n \"confidence\": 0.0,\n \"script\": \"Unknown\",\n \"method\": \"disabled\",\n }\n\n try:\n img = Image.open(io.BytesIO(image_bytes))\n osd = pytesseract.image_to_osd(img, output_type=pytesseract.Output.DICT)\n\n return {\n \"rotation\": osd.get(\"rotate\", 0),\n \"confidence\": osd.get(\"orientation_conf\", 0.0),\n \"script\": osd.get(\"script\", \"Unknown\"),\n \"method\": \"osd\",\n }\n except pytesseract.TesseractError as e:\n # OSD fails on images with too little text\n db.log(\"DEBUG\", f\"OSD detection skipped (insufficient text): {e}\")\n return {\n \"rotation\": 0,\n \"confidence\": 0.0,\n \"script\": \"Unknown\",\n \"method\": \"fallback\",\n }\n except Exception as e:\n db.log(\"WARNING\", f\"OSD detection failed: {e}\")\n return {\n \"rotation\": 0,\n \"confidence\": 0.0,\n \"script\": \"Unknown\",\n \"method\": \"error\",\n }\n\n\ndef get_page_rotation(page) -> int:\n \"\"\"\n Get effective rotation for a PDF page.\n\n Combines PDF metadata rotation with detected orientation.\n Uses multi-stage detection:\n 1. PDF \/Rotate flag (instant)\n 2. Tesseract OSD if confidence threshold met\n\n Args:\n page: PyMuPDF page object\n\n Returns:\n int: Total rotation needed (0, 90, 180, 270)\n \"\"\"\n if not ROTATION_DETECTION_ENABLED:\n return 0\n\n # Stage 1: PDF metadata\n pdf_rotation = page.rotation # 0, 90, 180, 270\n\n if pdf_rotation != 0:\n db.log(\"DEBUG\", f\"PDF rotation from metadata: {pdf_rotation}°\")\n return pdf_rotation\n\n # Stage 2: Tesseract OSD\n # Use lower DPI for speed (150 vs 300)\n pix = page.get_pixmap(dpi=150)\n img_bytes = pix.tobytes(\"png\")\n\n detection = detect_orientation(img_bytes)\n\n if detection[\"confidence\"] >= ROTATION_OSD_CONFIDENCE_THRESHOLD:\n if detection[\"rotation\"] != 0:\n db.log(\n \"INFO\",\n f\"OSD detected rotation: {detection['rotation']}° \"\n f\"(confidence: {detection['confidence']:.1f})\",\n )\n return detection[\"rotation\"]\n\n return 0\n\n\ndef rotate_image(image_bytes: bytes, rotation: int) -> bytes:\n \"\"\"\n Rotate image by specified degrees.\n\n Args:\n image_bytes: Original image as bytes\n rotation: Degrees to rotate (0, 90, 180, 270)\n\n Returns:\n bytes: Rotated image as PNG\n \"\"\"\n if rotation == 0:\n return image_bytes\n\n img = Image.open(io.BytesIO(image_bytes))\n\n # PIL rotation is counter-clockwise, PDF rotation is clockwise\n # So we need to rotate by -rotation degrees\n rotated = img.rotate(-rotation, expand=True)\n\n output = io.BytesIO()\n rotated.save(output, format=\"PNG\")\n return output.getvalue()\n\n\ndef correct_page_orientation(page, target_dpi: int = 300) -> tuple[bytes, int]:\n \"\"\"\n Get correctly oriented image from PDF page.\n\n Convenience function that combines detection and rotation.\n\n Args:\n page: PyMuPDF page object\n target_dpi: DPI for final image\n\n Returns:\n tuple: (image_bytes, rotation_applied)\n \"\"\"\n rotation = get_page_rotation(page)\n\n # Get pixmap at target DPI\n pix = page.get_pixmap(dpi=target_dpi)\n img_bytes = pix.tobytes(\"png\")\n\n if rotation != 0:\n img_bytes = rotate_image(img_bytes, rotation)\n\n return img_bytes, rotation\n",
"structuredPatch": [],
"originalFile": null
}
}