orientation.py
- Pfad:
/var/www/scripts/pipeline/orientation.py - Namespace: pipeline
- Zeilen: 165 | Größe: 4,356 Bytes
- Geändert: 2025-12-31 03:01:09 | Gescannt: 2025-12-31 10:22:15
Code Hygiene Score: 98
- Dependencies: 90 (25%)
- LOC: 100 (20%)
- Methods: 100 (20%)
- Secrets: 100 (15%)
- Classes: 100 (10%)
- Magic Numbers: 100 (10%)
Keine Issues gefunden.
Dependencies 6
- use io
- use pytesseract
- use PIL.Image
- use config.ROTATION_DETECTION_ENABLED
- use config.ROTATION_OSD_CONFIDENCE_THRESHOLD
- use db.db
Funktionen 4
-
detect_orientation()Zeile 22 -
get_page_rotation()Zeile 74 -
rotate_image()Zeile 117 -
correct_page_orientation()Zeile 142
Code
"""
Page Orientation Detection Module for KI-System Pipeline.
Detects and corrects rotated PDF pages using multi-stage detection.
Stage 1: PDF metadata (/Rotate flag) - instant, free
Stage 2: Tesseract OSD - ~50-100ms per page
Stage 3: Vision LLM fallback - for images without text (optional)
"""
import io
import pytesseract
from PIL import Image
from config import (
ROTATION_DETECTION_ENABLED,
ROTATION_OSD_CONFIDENCE_THRESHOLD,
)
from db import db
def detect_orientation(image_bytes: bytes) -> dict:
"""
Detect page orientation using Tesseract OSD.
Args:
image_bytes: PNG/JPEG image as bytes
Returns:
dict: {
'rotation': int (0, 90, 180, 270),
'confidence': float,
'script': str,
'method': str ('osd' or 'fallback')
}
"""
if not ROTATION_DETECTION_ENABLED:
return {
"rotation": 0,
"confidence": 0.0,
"script": "Unknown",
"method": "disabled",
}
try:
img = Image.open(io.BytesIO(image_bytes))
osd = pytesseract.image_to_osd(img, output_type=pytesseract.Output.DICT)
return {
"rotation": osd.get("rotate", 0),
"confidence": osd.get("orientation_conf", 0.0),
"script": osd.get("script", "Unknown"),
"method": "osd",
}
except pytesseract.TesseractError as e:
# OSD fails on images with too little text
db.log("DEBUG", f"OSD detection skipped (insufficient text): {e}")
return {
"rotation": 0,
"confidence": 0.0,
"script": "Unknown",
"method": "fallback",
}
except Exception as e:
db.log("WARNING", f"OSD detection failed: {e}")
return {
"rotation": 0,
"confidence": 0.0,
"script": "Unknown",
"method": "error",
}
def get_page_rotation(page) -> int:
"""
Get effective rotation for a PDF page.
Combines PDF metadata rotation with detected orientation.
Uses multi-stage detection:
1. PDF /Rotate flag (instant)
2. Tesseract OSD if confidence threshold met
Args:
page: PyMuPDF page object
Returns:
int: Total rotation needed (0, 90, 180, 270)
"""
if not ROTATION_DETECTION_ENABLED:
return 0
# Stage 1: PDF metadata
pdf_rotation = page.rotation # 0, 90, 180, 270
if pdf_rotation != 0:
db.log("DEBUG", f"PDF rotation from metadata: {pdf_rotation}°")
return pdf_rotation
# Stage 2: Tesseract OSD
# Use lower DPI for speed (150 vs 300)
pix = page.get_pixmap(dpi=150)
img_bytes = pix.tobytes("png")
detection = detect_orientation(img_bytes)
if detection["confidence"] >= ROTATION_OSD_CONFIDENCE_THRESHOLD:
if detection["rotation"] != 0:
db.log(
"INFO",
f"OSD detected rotation: {detection['rotation']}° (confidence: {detection['confidence']:.1f})",
)
return detection["rotation"]
return 0
def rotate_image(image_bytes: bytes, rotation: int) -> bytes:
"""
Rotate image by specified degrees.
Args:
image_bytes: Original image as bytes
rotation: Degrees to rotate (0, 90, 180, 270)
Returns:
bytes: Rotated image as PNG
"""
if rotation == 0:
return image_bytes
img = Image.open(io.BytesIO(image_bytes))
# PIL rotation is counter-clockwise, PDF rotation is clockwise
# So we need to rotate by -rotation degrees
rotated = img.rotate(-rotation, expand=True)
output = io.BytesIO()
rotated.save(output, format="PNG")
return output.getvalue()
def correct_page_orientation(page, target_dpi: int = 300) -> tuple[bytes, int]:
"""
Get correctly oriented image from PDF page.
Convenience function that combines detection and rotation.
Args:
page: PyMuPDF page object
target_dpi: DPI for final image
Returns:
tuple: (image_bytes, rotation_applied)
"""
rotation = get_page_rotation(page)
# Get pixmap at target DPI
pix = page.get_pixmap(dpi=target_dpi)
img_bytes = pix.tobytes("png")
if rotation != 0:
img_bytes = rotate_image(img_bytes, rotation)
return img_bytes, rotation