Protokoll #24738
| ID | 24738 |
|---|---|
| Zeitstempel | 2025-12-28 01:10:13.209746 |
| Client | root |
| IP | Warning: Undefined array key "request_ip" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 Deprecated: htmlspecialchars(): Passing null to parameter #1 ($string) of type string is deprecated in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 |
| Modell | claude-sonnet-4-20250514 |
| Status | pending |
| Tokens | 1,255 (Input: 0, Output: 0) |
| Dauer | - |
| Request-Zeit | Warning: Undefined array key "request_timestamp" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 18 |
| Response-Zeit | - |
Warning: Undefined array key "error_message" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 22
Request
{
"event": "PostToolUse",
"tool_name": "Read",
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/enrich.py",
"content": "#!\/usr\/bin\/env python3\n\"\"\"\nVision-Enrichment module for KI-System Pipeline.\nEnriches chunks with visual context from page-level vision analysis.\n\nUsage:\n python enrich.py <document_id> # Enrich all chunks for a document\n python enrich.py --all # Enrich all documents with vision data\n\"\"\"\n\nimport json\nimport re\nimport sys\n\nfrom db import db\n\n\ndef get_vision_context(document_id, page_number):\n \"\"\"\n Get vision analysis for a specific page.\n\n Returns dict with structured vision info or None.\n \"\"\"\n cursor = db.execute(\n \"\"\"SELECT vision_analysis\n FROM document_pages\n WHERE document_id = %s AND page_number = %s\"\"\",\n (document_id, page_number),\n )\n result = cursor.fetchone()\n cursor.close()\n\n if not result or not result.get(\"vision_analysis\"):\n return None\n\n try:\n vision_data = json.loads(result[\"vision_analysis\"])\n return vision_data\n except (json.JSONDecodeError, TypeError):\n return None\n\n\ndef extract_vision_summary(vision_data):\n \"\"\"\n Extract key information from vision analysis for chunk enrichment.\n\n Returns compact dict with:\n - detected_elements: list of visual elements found\n - page_title: extracted title if any\n - has_images: bool\n - has_charts: bool\n - has_tables: bool\n - layout_type: detected layout style\n - key_topics: extracted key topics\/concepts\n \"\"\"\n if not vision_data:\n return None\n\n analysis_text = vision_data.get(\"analysis\", \"\")\n if not analysis_text:\n return None\n\n summary = {\n \"detected_elements\": [],\n \"page_title\": None,\n \"has_images\": False,\n \"has_charts\": False,\n \"has_tables\": False,\n \"layout_type\": \"standard\",\n \"key_topics\": [],\n \"vision_tokens\": vision_data.get(\"tokens\", 0),\n }\n\n # Detect visual elements\n analysis_lower = analysis_text.lower()\n\n # Check for images\n if any(word in analysis_lower for word in [\"bild\", \"foto\", \"image\", \"abbildung\", \"grafik\"]): # noqa: SIM102\n if \"keine\" not in analysis_lower.split(\"bild\")[0][-20:] if \"bild\" in analysis_lower else True:\n summary[\"has_images\"] = True\n summary[\"detected_elements\"].append(\"images\")\n\n # Check for charts\/diagrams\n if any(word in analysis_lower for word in [\"diagramm\", \"chart\", \"graph\", \"schaubild\"]): # noqa: SIM102\n if \"keine\" not in analysis_lower.split(\"diagramm\")[0][-20:] if \"diagramm\" in analysis_lower else True:\n summary[\"has_charts\"] = True\n summary[\"detected_elements\"].append(\"charts\")\n\n # Check for tables\n if any(word in analysis_lower for word in [\"tabelle\", \"table\", \"übersicht\"]): # noqa: SIM102\n if \"keine\" not in analysis_lower.split(\"tabelle\")[0][-20:] if \"tabelle\" in analysis_lower else True:\n summary[\"has_tables\"] = True\n summary[\"detected_elements\"].append(\"tables\")\n\n # Check for callouts\/highlights\n if any(word in analysis_lower for word in [\"callout\", \"hervorhebung\", \"box\", \"kasten\", \"zitat\"]):\n summary[\"detected_elements\"].append(\"callouts\")\n\n # Extract title (look for patterns like \"Titel: X\" or \"Überschrift: X\")\n title_patterns = [\n r'[\"\\']([^\"\\']{5,60})[\"\\']', # Quoted strings\n r'Titel[:\\s]+[\"\\']?([^\"\\'\\n]{5,60})',\n r'Überschrift[:\\s]+[\"\\']?([^\"\\'\\n]{5,60})',\n ]\n\n for pattern in title_patterns:\n match = re.search(pattern, analysis_text)\n if match:\n potential_title = match.group(1).strip()\n # Filter out common non-titles\n if not any(\n skip in potential_title.lower() for skip in [\"keine\", \"nicht\", \"gibt es\", \"vorhanden\", \"enthält\"]\n ):\n summary[\"page_title\"] = potential_title[:100]\n break\n\n # Detect layout type\n if any(word in analysis_lower for word in [\"zwei spalten\", \"zweispaltig\", \"columns\"]):\n summary[\"layout_type\"] = \"two-column\"\n elif any(word in analysis_lower for word in [\"liste\", \"aufzählung\", \"bullet\"]):\n summary[\"layout_type\"] = \"list\"\n elif any(word in analysis_lower for word in [\"vollbild\", \"full page\", \"ganzseitig\"]):\n summary[\"layout_type\"] = \"full-page\"\n\n # Extract key topics (look for bold\/emphasized terms)\n bold_pattern = r\"\\*\\*([^*]+)\\*\\*\"\n bold_matches = re.findall(bold_pattern, analysis_text)\n if bold_matches:\n # Filter and dedupe\n topics = []",
"numLines": 130,
"startLine": 1,
"totalLines": 298
}
}
}
Response
-