{
"event": "PostToolUse",
"tool_name": "Read",
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/analyzers\/document_analyzer.py",
"content": "\"\"\"\nDocument Analyzer - Full semantic analysis of documents.\n\"\"\"\n\nimport re\nimport sys\n\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom db import db\n\nfrom .client import get_anthropic_client\nfrom .entity_extractor import extract_entities_anthropic, extract_entities_ollama, store_entities\nfrom .relation_extractor import extract_relations\nfrom .semantic_analyzer import analyze_chunks_semantics\nfrom .taxonomy_classifier import classify_taxonomy\n\n\ndef analyze_document(document_id: int, text: str, use_anthropic: bool = True, progress=None) -> dict:\n \"\"\"\n Full semantic analysis of a document.\n Extracts entities, relations, and taxonomy classification.\n \"\"\"\n db.log(\"INFO\", f\"Starting semantic analysis for document {document_id}\")\n\n if progress:\n progress.add_log(\"Analyse: Starte Entity-Extraktion...\")\n\n client = get_anthropic_client() if use_anthropic else None\n\n # Extract entities\n if client:\n entities = extract_entities_anthropic(text, client)\n else:\n entities = extract_entities_ollama(text)\n\n db.log(\"INFO\", f\"Extracted {len(entities)} entities\")\n if progress:\n progress.add_log(f\"Analyse: {len(entities)} Entitäten extrahiert\")\n\n relations = []\n\n # Store entities\n if entities:\n stored = store_entities(document_id, entities)\n db.log(\"INFO\", f\"Stored {stored} entities\")\n if progress:\n progress.add_log(f\"Analyse: {stored} Entitäten gespeichert\")\n\n # Extract relations\n if progress:\n progress.add_log(\"Analyse: Extrahiere Relationen...\")\n relations = extract_relations(text, entities, client)\n db.log(\"INFO\", f\"Extracted {len(relations)} relations\")\n if progress:\n progress.add_log(f\"Analyse: {len(relations)} Relationen extrahiert\")\n\n # Store relations\n for rel in relations:\n try:\n cursor = db.execute(\"SELECT id FROM entities WHERE name = %s LIMIT 1\", (rel[\"source\"],))\n source = cursor.fetchone()\n cursor.close()\n\n cursor = db.execute(\"SELECT id FROM entities WHERE name = %s LIMIT 1\", (rel[\"target\"],))\n target = cursor.fetchone()\n cursor.close()\n\n if source and target:\n cursor = db.execute(\n \"\"\"INSERT IGNORE INTO entity_relations\n (source_entity_id, target_entity_id, relation_type, created_at)\n VALUES (%s, %s, %s, NOW())\"\"\",\n (source[\"id\"], target[\"id\"], rel[\"relation\"]),\n )\n db.commit()\n cursor.close()\n\n cursor = db.execute(\n \"\"\"INSERT IGNORE INTO entity_ontology\n (source_entity_id, target_entity_id, relation_type, direction,\n strength, source_type, source_id, created_at)\n VALUES (%s, %s, %s, 'unidirectional', 1.0, 'document', %s, NOW())\"\"\",\n (source[\"id\"], target[\"id\"], rel[\"relation\"], document_id),\n )\n db.commit()\n cursor.close()\n except Exception as e:\n db.log(\"WARNING\", f\"Failed to store relation: {e}\")\n\n # Taxonomy classification\n if progress:\n progress.add_log(\"Analyse: Klassifiziere Taxonomie...\")\n taxonomy = classify_taxonomy(text, client)\n db.log(\"INFO\", f\"Classified into {len(taxonomy.get('categories', []))} categories\")\n if progress:\n progress.add_log(f\"Analyse: {len(taxonomy.get('categories', []))} Kategorien zugewiesen\")\n\n # Store taxonomy assignments\n for category in taxonomy.get(\"categories\", []):",
"numLines": 100,
"startLine": 1,
"totalLines": 283
}
}
}