{
"event": "PreToolUse",
"tool_name": "Bash",
"tool_input": {
"command": "cd \/var\/www\/scripts\/pipeline && DB_PASSWORD=\"v8mFLmkF2fth_r\" .\/venv\/bin\/python -c \"\nimport mysql.connector\n\nconn = mysql.connector.connect(\n host='localhost',\n user='ki_user', \n password='v8mFLmkF2fth_r',\n database='ki_content'\n)\ncur = conn.cursor(dictionary=True)\n\n# Entities ohne Semantik\ncur.execute('''\n SELECT e.id, e.name, e.type \n FROM entities e \n LEFT JOIN entity_semantics es ON e.id = es.entity_id \n WHERE es.id IS NULL\n ORDER BY e.type, e.name\n LIMIT 100\n''')\nmissing_entities = cur.fetchall()\n\n# Chunks ohne Semantik \ncur.execute('''\n SELECT c.id, LEFT(c.content, 80) as content_preview, c.document_id\n FROM chunks c\n LEFT JOIN chunk_semantics cs ON c.id = cs.chunk_id\n WHERE cs.id IS NULL\n ORDER BY c.id\n LIMIT 50\n''')\nmissing_chunks = cur.fetchall()\n\n# Statistiken\ncur.execute('SELECT COUNT(DISTINCT entity_id) FROM entity_semantics')\nunique_entities_with_sem = cur.fetchone()['COUNT(DISTINCT entity_id)']\n\nprint('=== ENTITY SEMANTIK LÜCKE ===')\nprint(f'Entities ohne Semantik: {len(missing_entities)}')\nprint(f'Unique Entities mit Semantik: {unique_entities_with_sem}')\nprint()\n\nif missing_entities:\n print('Fehlende Entities nach Typ:')\n types = {}\n for e in missing_entities:\n t = e['type']\n types[t] = types.get(t, 0) + 1\n for t, cnt in sorted(types.items(), key=lambda x: -x[1]):\n print(f' {t}: {cnt}')\n print()\n print('Beispiele:')\n for e in missing_entities[:10]:\n print(f' [{e[\\\"id\\\"]}] {e[\\\"type\\\"]}: {e[\\\"name\\\"]}')\n\nprint()\nprint('=== CHUNK SEMANTIK LÜCKE ===')\nprint(f'Chunks ohne Semantik: {len(missing_chunks)}')\nif missing_chunks:\n print('Beispiele:')\n for c in missing_chunks[:5]:\n preview = c['content_preview'].replace('\\\\n', ' ')[:60]\n print(f' [{c[\\\"id\\\"]}] Doc {c[\\\"document_id\\\"]}: {preview}...')\n\nconn.close()\n\"",
"description": "Analyze semantic gaps"
}
}