{
"event": "PreToolUse",
"tool_name": "Bash",
"tool_input": {
"command": "cat << 'PYTHON' | DB_PASSWORD=\"v8mFLmkF2fth_r\" timeout 300 .\/venv\/bin\/python\nimport sys\nimport json\nimport requests\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\nfrom db import db\ndb.connect()\n\nOLLAMA_HOST = \"http:\/\/localhost:11434\"\nMODEL = \"gemma3:27b-it-qat\"\n\n# Get prompt directly\ncursor = db.execute(\"SELECT content FROM prompts WHERE name = 'entity_extraction_pass2'\")\nrow = cursor.fetchone()\ncursor.close()\nprompt_template = row['content'] if row else None\n\nif not prompt_template:\n print(\"ERROR: Prompt not found!\")\n sys.exit(1)\n\n# Get all PERSON entities\ncursor = db.execute(\"SELECT id, name, type FROM entities WHERE type = 'PERSON'\")\npersons = cursor.fetchall()\ncursor.close()\n\nprint(f\"Re-kategorisiere {len(persons)} PERSON-Einträge...\")\nprint(\"=\" * 70)\n\nentity_names = [p['name'] for p in persons]\nentities_json = json.dumps(entity_names, ensure_ascii=False)\n\nprompt = prompt_template.replace(\"{entities}\", entities_json)\n\nprint(\"Calling LLM...\")\nresp = requests.post(\n f\"{OLLAMA_HOST}\/api\/generate\",\n json={\"model\": MODEL, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n timeout=120\n)\ndata = resp.json()\n\nprint(f\"Response: {data.get('response', '')[:500]}\")\nprint()\n\nresult = json.loads(data.get(\"response\", \"{}\"))\ncategorized = result.get(\"kategorisiert\", [])\n\nprint(f\"Got {len(categorized)} categorizations\")\n\n# Update entities\nupdated = 0\nfor cat in categorized:\n name = cat.get(\"name\")\n new_type = cat.get(\"type\", \"\").upper()\n \n if new_type and new_type != \"PERSON\":\n cursor = db.execute(\n \"UPDATE entities SET type = %s WHERE name = %s AND type = 'PERSON'\",\n (new_type, name)\n )\n if cursor.rowcount > 0:\n print(f\" {name}: PERSON -> {new_type}\")\n updated += 1\n db.commit()\n cursor.close()\n\nprint(f\"\\nUpdated {updated} entities\")\n\n# Show new stats\ncursor = db.execute(\"SELECT type, COUNT(*) as cnt FROM entities GROUP BY type ORDER BY cnt DESC\")\nprint(\"\\nNeue Verteilung:\")\nfor t in cursor.fetchall():\n print(f\" {t['type']:12s}: {t['cnt']}\")\ncursor.close()\nPYTHON",
"timeout": 300000,
"description": "Re-categorize with direct DB query"
}
}