{
"event": "PreToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/step_semantic.py",
"limit": 80
}
}
{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/step_semantic.py",
"content": "\"\"\"\nSemantic Analysis Step Module\nHandles Layer 4: Entity extraction, relations, taxonomy, ontology.\n\nThis step runs AFTER embedding (Layer 3) and is optional\/async.\nDocuments are searchable without semantic analysis.\n\nPart of Progressive Pipeline Architecture.\n\"\"\"\n\nfrom analyzers.document_analyzer import analyze_document\n\n\nclass SemanticStep:\n \"\"\"Step: Semantic analysis of documents (Layer 4).\"\"\"\n\n def __init__(self, db, progress=None):\n \"\"\"\n Initialize semantic step.\n\n Args:\n db: Database instance\n progress: Optional PipelineProgress instance\n \"\"\"\n self.db = db\n self.progress = progress\n\n def execute(self, doc_id, text, use_anthropic=True):\n \"\"\"\n Execute full semantic analysis on a document.\n\n Args:\n doc_id: Document database ID\n text: Full document text\n use_anthropic: Use Anthropic API (True) or Ollama (False)\n\n Returns:\n dict: Analysis results with entity\/relation counts\n \"\"\"\n if self.progress:\n self.progress.update_step(\"semantic\")\n self.progress.add_log(\"Starte semantische Analyse...\")\n\n # Update document semantic status\n self._update_semantic_status(doc_id, \"processing\")\n\n try:\n # Run full analysis (entities, relations, taxonomy, ontology)\n result = analyze_document(\n document_id=doc_id,\n text=text,\n use_anthropic=use_anthropic,\n progress=self.progress,\n )\n\n # Update status based on result\n if result.get(\"entities\", 0) > 0 or result.get(\"categories\", []):\n self._update_semantic_status(doc_id, \"complete\")\n else:\n self._update_semantic_status(doc_id, \"partial\")\n\n self.db.log(\"INFO\", f\"Semantic analysis complete for doc {doc_id}: {result}\")\n\n if self.progress:\n self.progress.add_log(\n f\"Semantik: {result.get('entities', 0)} Entitäten, {result.get('relations', 0)} Relationen\"\n )\n\n return result\n\n except Exception as e:\n self._update_semantic_status(doc_id, \"error\")\n self.db.log(\"ERROR\", f\"Semantic analysis failed for doc {doc_id}: {e}\")\n if self.progress:\n self.progress.add_log(f\"Semantik-Fehler: {str(e)[:50]}\")\n raise\n\n def queue(self, doc_id, priority=5):\n \"\"\"\n Queue document for async semantic analysis.",
"numLines": 80,
"startLine": 1,
"totalLines": 228
}
}
}