orientation.py

Code Hygiene Score: 98

Keine Issues gefunden.

Dependencies 6

Funktionen 4

Code

"""
Page Orientation Detection Module for KI-System Pipeline.
Detects and corrects rotated PDF pages using multi-stage detection.

Stage 1: PDF metadata (/Rotate flag) - instant, free
Stage 2: Tesseract OSD - ~50-100ms per page
Stage 3: Vision LLM fallback - for images without text (optional)
"""

import io

import pytesseract
from PIL import Image

from config import (
    ROTATION_DETECTION_ENABLED,
    ROTATION_OSD_CONFIDENCE_THRESHOLD,
)
from db import db


def detect_orientation(image_bytes: bytes) -> dict:
    """
    Detect page orientation using Tesseract OSD.

    Args:
        image_bytes: PNG/JPEG image as bytes

    Returns:
        dict: {
            'rotation': int (0, 90, 180, 270),
            'confidence': float,
            'script': str,
            'method': str ('osd' or 'fallback')
        }
    """
    if not ROTATION_DETECTION_ENABLED:
        return {
            "rotation": 0,
            "confidence": 0.0,
            "script": "Unknown",
            "method": "disabled",
        }

    try:
        img = Image.open(io.BytesIO(image_bytes))
        osd = pytesseract.image_to_osd(img, output_type=pytesseract.Output.DICT)

        return {
            "rotation": osd.get("rotate", 0),
            "confidence": osd.get("orientation_conf", 0.0),
            "script": osd.get("script", "Unknown"),
            "method": "osd",
        }
    except pytesseract.TesseractError as e:
        # OSD fails on images with too little text
        db.log("DEBUG", f"OSD detection skipped (insufficient text): {e}")
        return {
            "rotation": 0,
            "confidence": 0.0,
            "script": "Unknown",
            "method": "fallback",
        }
    except Exception as e:
        db.log("WARNING", f"OSD detection failed: {e}")
        return {
            "rotation": 0,
            "confidence": 0.0,
            "script": "Unknown",
            "method": "error",
        }


def get_page_rotation(page) -> int:
    """
    Get effective rotation for a PDF page.

    Combines PDF metadata rotation with detected orientation.
    Uses multi-stage detection:
    1. PDF /Rotate flag (instant)
    2. Tesseract OSD if confidence threshold met

    Args:
        page: PyMuPDF page object

    Returns:
        int: Total rotation needed (0, 90, 180, 270)
    """
    if not ROTATION_DETECTION_ENABLED:
        return 0

    # Stage 1: PDF metadata
    pdf_rotation = page.rotation  # 0, 90, 180, 270

    if pdf_rotation != 0:
        db.log("DEBUG", f"PDF rotation from metadata: {pdf_rotation}°")
        return pdf_rotation

    # Stage 2: Tesseract OSD
    # Use lower DPI for speed (150 vs 300)
    pix = page.get_pixmap(dpi=150)
    img_bytes = pix.tobytes("png")

    detection = detect_orientation(img_bytes)

    if detection["confidence"] >= ROTATION_OSD_CONFIDENCE_THRESHOLD:
        if detection["rotation"] != 0:
            db.log(
                "INFO",
                f"OSD detected rotation: {detection['rotation']}° (confidence: {detection['confidence']:.1f})",
            )
        return detection["rotation"]

    return 0


def rotate_image(image_bytes: bytes, rotation: int) -> bytes:
    """
    Rotate image by specified degrees.

    Args:
        image_bytes: Original image as bytes
        rotation: Degrees to rotate (0, 90, 180, 270)

    Returns:
        bytes: Rotated image as PNG
    """
    if rotation == 0:
        return image_bytes

    img = Image.open(io.BytesIO(image_bytes))

    # PIL rotation is counter-clockwise, PDF rotation is clockwise
    # So we need to rotate by -rotation degrees
    rotated = img.rotate(-rotation, expand=True)

    output = io.BytesIO()
    rotated.save(output, format="PNG")
    return output.getvalue()


def correct_page_orientation(page, target_dpi: int = 300) -> tuple[bytes, int]:
    """
    Get correctly oriented image from PDF page.

    Convenience function that combines detection and rotation.

    Args:
        page: PyMuPDF page object
        target_dpi: DPI for final image

    Returns:
        tuple: (image_bytes, rotation_applied)
    """
    rotation = get_page_rotation(page)

    # Get pixmap at target DPI
    pix = page.get_pixmap(dpi=target_dpi)
    img_bytes = pix.tobytes("png")

    if rotation != 0:
        img_bytes = rotate_image(img_bytes, rotation)

    return img_bytes, rotation
← Übersicht