{
"event": "PostToolUse",
"tool_name": "Read",
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/semantic_chunk_analyzer.py",
"content": "#!\/usr\/bin\/env python3\n\"\"\"\nSemantic Chunk Analyzer - Chunk-Level Semantic Analysis Pipeline\n\nAnalysiert Chunks aus der ki_system.chunks Tabelle und befüllt:\n- chunk_semantics (summary, keywords, sentiment, topics, language)\n- entities (extrahierte Entitäten)\n- chunk_entities (Verknüpfung Chunk <-> Entity)\n- entity_relations (Beziehungen zwischen Entitäten)\n- taxonomy_terms + chunk_taxonomy (Kategorisierung)\n\nBACKWARD COMPATIBILITY WRAPPER - Logic moved to semantic_chunk\/ package.\n\nUsage:\n python semantic_chunk_analyzer.py analyze [--limit N]\n python semantic_chunk_analyzer.py status\n python semantic_chunk_analyzer.py reset\n\"\"\"\n\nimport sys\n\nfrom db import db\n\n# Re-export for backward compatibility\nfrom semantic_chunk import (\n ANALYSIS_MODEL,\n BATCH_SIZE,\n ChunkRepository,\n ChunkSemantics,\n Entity,\n EntityExtractor,\n EntityRepository,\n OllamaService,\n Relation,\n RelationExtractor,\n SemanticChunkPipeline,\n SemanticsAnalyzer,\n SemanticsRepository,\n TaxonomyClassifier,\n TaxonomyRepository,\n)\n\n__all__ = [\n \"ChunkSemantics\",\n \"Entity\",\n \"Relation\",\n \"OllamaService\",\n \"ANALYSIS_MODEL\",\n \"SemanticsAnalyzer\",\n \"EntityExtractor\",\n \"RelationExtractor\",\n \"TaxonomyClassifier\",\n \"ChunkRepository\",\n \"SemanticsRepository\",\n \"EntityRepository\",\n \"TaxonomyRepository\",\n \"SemanticChunkPipeline\",\n \"BATCH_SIZE\",\n]\n\n\ndef main():\n \"\"\"CLI Entry Point.\"\"\"\n if len(sys.argv) < 2:\n print(__doc__)\n return\n\n command = sys.argv[1].lower()\n\n db.connect()\n\n try:\n pipeline = SemanticChunkPipeline()\n\n if command == \"analyze\":\n limit = BATCH_SIZE\n if len(sys.argv) > 2 and sys.argv[2] == \"--limit\":\n limit = int(sys.argv[3]) if len(sys.argv) > 3 else BATCH_SIZE\n\n result = pipeline.run(limit)\n print(f\"\\nErgebnis: {result['success']}\/{result['processed']} erfolgreich\")\n\n elif command == \"status\":\n stats = pipeline.status()\n print(f\"Chunks gesamt: {stats['total']}\")\n print(f\"Analysiert: {stats['analyzed']}\")\n print(f\"Entitäten: {stats['entities']}\")\n print(f\"Ausstehend: {stats['total'] - stats['analyzed']}\")\n\n elif command == \"reset\":\n confirm = input(\"Alle Semantik-Daten löschen? (ja\/nein): \")\n if confirm.lower() == \"ja\":\n for table in [\n \"chunk_semantics\",\n \"chunk_entities\",\n \"chunk_taxonomy\",\n \"entity_relations\",\n \"entities\",\n \"taxonomy_terms\",\n ]:\n db.execute(f\"TRUNCATE TABLE {table}\")\n db.commit()\n print(\"Alle Semantik-Tabellen geleert.\")\n\n else:\n print(f\"Unbekannter Befehl: {command}\")\n print(__doc__)\n\n except Exception as e:\n db.log(\"ERROR\", f\"Pipeline error: {e}\")\n print(f\"Fehler: {e}\")\n raise\n finally:\n db.disconnect()\n\n\nif __name__ == \"__main__\":\n main()\n",
"numLines": 119,
"startLine": 1,
"totalLines": 119
}
}
}