{
"event": "PreToolUse",
"tool_name": "Bash",
"tool_input": {
"command": "source \/etc\/environment 2>\/dev\/null; .\/venv\/bin\/python -c \"\nfrom db import Database\n\ndb = Database()\ndb.connect()\n\n# Entities ohne Semantik\ndb.execute('''\n SELECT e.id, e.name, e.type \n FROM entities e \n LEFT JOIN entity_semantics es ON e.id = es.entity_id \n WHERE es.id IS NULL\n ORDER BY e.type, e.name\n''')\nmissing_entities = db.cursor.fetchall()\n\n# Chunks ohne Semantik \ndb.execute('''\n SELECT c.id, LEFT(c.content, 80) as content_preview, c.document_id\n FROM chunks c\n LEFT JOIN chunk_semantics cs ON c.id = cs.chunk_id\n WHERE cs.id IS NULL\n ORDER BY c.id\n''')\nmissing_chunks = db.cursor.fetchall()\n\n# Statistiken\ndb.execute('SELECT COUNT(DISTINCT entity_id) FROM entity_semantics')\nunique_sem = db.cursor.fetchone()[0]\n\ndb.execute('SELECT COUNT(DISTINCT chunk_id) FROM chunk_semantics')\nunique_chunk_sem = db.cursor.fetchone()[0]\n\nprint('=== ENTITY SEMANTIK LÜCKE ===')\nprint(f'Entities ohne Semantik: {len(missing_entities)}')\nprint(f'Unique Entities mit Semantik: {unique_sem}')\nprint()\n\nif missing_entities:\n print('Fehlende Entities nach Typ:')\n types = {}\n for e in missing_entities:\n t = e[2] # type\n types[t] = types.get(t, 0) + 1\n for t, cnt in sorted(types.items(), key=lambda x: -x[1]):\n print(f' {t}: {cnt}')\n print()\n print('Beispiele (erste 15):')\n for e in missing_entities[:15]:\n print(f' [{e[0]}] {e[2]}: {e[1]}')\n\nprint()\nprint('=== CHUNK SEMANTIK LÜCKE ===')\nprint(f'Chunks ohne Semantik: {len(missing_chunks)}')\nprint(f'Unique Chunks mit Semantik: {unique_chunk_sem}')\nif missing_chunks:\n print()\n print('Beispiele (erste 10):')\n for c in missing_chunks[:10]:\n preview = str(c[1]).replace('\\\\n', ' ')[:50] if c[1] else '(leer)'\n print(f' [{c[0]}] Doc {c[2]}: {preview}...')\n\ndb.disconnect()\n\"",
"description": "Analyze semantic gaps via pipeline"
}
}