{
"event": "PreToolUse",
"tool_name": "Edit",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/generate_entity_descriptions.py",
"old_string": "def get_entities_to_enrich(db: Database, limit: int = 100) -> list:",
"new_string": "def get_entities_to_enrich(db: Database, limit: int = DEFAULT_LIMIT) -> list:"
}
}
{
"tool_response": {
"filePath": "\/var\/www\/scripts\/pipeline\/generate_entity_descriptions.py",
"oldString": "def get_entities_to_enrich(db: Database, limit: int = 100) -> list:",
"newString": "def get_entities_to_enrich(db: Database, limit: int = DEFAULT_LIMIT) -> list:",
"originalFile": "#!\/usr\/bin\/env python3\n\"\"\"\nEntity Description Enrichment Script\n=====================================\nGeneriert ausführliche Beschreibungen für Entitäten via Ollama.\n\nVoraussetzungen:\n- Ollama läuft lokal (http:\/\/localhost:11434)\n- Prompt in ki_content.prompts (use_case='entity_description')\n\nUsage:\n python generate_entity_descriptions.py [--model MODEL] [--limit N] [--dry-run]\n\"\"\"\n\nimport argparse\nimport os\nimport sys\nimport time\nfrom typing import Optional\n\nimport requests\n\n# Add parent directory for imports\nsys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))\n\nfrom constants import DEFAULT_LIMIT, OLLAMA_TIMEOUT\nfrom db import Database\n\n# Configuration\nOLLAMA_URL = \"http:\/\/localhost:11434\/api\/generate\"\nDEFAULT_MODEL = \"mistral:latest\"\nMIN_DESCRIPTION_LENGTH = 50 # Entities with description < this get enriched\n\n\ndef get_prompt_template(db: Database) -> Optional[str]:\n \"\"\"Load prompt template from database.\"\"\"\n cursor = db.execute(\"\"\"\n SELECT content FROM prompts\n WHERE use_case = 'entity_description' AND is_active = 1\n ORDER BY id DESC LIMIT 1\n \"\"\")\n row = cursor.fetchone()\n cursor.close()\n return row[\"content\"] if row else None\n\n\ndef get_entities_to_enrich(db: Database, limit: int = 100) -> list:\n \"\"\"Get entities with short or missing descriptions.\"\"\"\n cursor = db.execute(\"\"\"\n SELECT id, name, type, description\n FROM entities\n WHERE description IS NULL\n OR CHAR_LENGTH(description) < %s\n ORDER BY id\n LIMIT %s\n \"\"\", (MIN_DESCRIPTION_LENGTH, limit))\n entities = cursor.fetchall()\n cursor.close()\n return entities\n\n\ndef get_entity_context(db: Database, entity_id: int, max_chunks: int = 3) -> str:\n \"\"\"Get context from chunks where this entity appears.\"\"\"\n cursor = db.execute(\"\"\"\n SELECT c.content\n FROM chunk_entities ce\n JOIN chunks c ON ce.chunk_id = c.id\n WHERE ce.entity_id = %s\n LIMIT %s\n \"\"\", (entity_id, max_chunks))\n chunks = cursor.fetchall()\n cursor.close()\n\n if not chunks:\n return \"(Kein Kontext verfügbar)\"\n\n return \"\\n\\n---\\n\\n\".join(chunk[\"content\"][:500] for chunk in chunks)\n\n\ndef call_ollama(prompt: str, model: str) -> Optional[str]:\n \"\"\"Call Ollama API and return generated text.\"\"\"\n try:\n response = requests.post(\n OLLAMA_URL,\n json={\n \"model\": model,\n \"prompt\": prompt,\n \"stream\": False,\n \"options\": {\n \"temperature\": 0.7,\n \"num_predict\": 300,\n }\n },\n timeout=60\n )\n response.raise_for_status()\n result = response.json()\n return result.get(\"response\", \"\").strip()\n except requests.exceptions.RequestException as e:\n print(f\" ✗ Ollama error: {e}\")\n return None\n\n\ndef update_entity_description(db: Database, entity_id: int, description: str) -> bool:\n \"\"\"Update entity description in database.\"\"\"\n try:\n db.execute(\"\"\"\n UPDATE entities SET description = %s WHERE id = %s\n \"\"\", (description, entity_id))\n db.commit()\n return True\n except Exception as e:\n print(f\" ✗ DB error: {e}\")\n return False\n\n\ndef main():\n parser = argparse.ArgumentParser(description=\"Generate entity descriptions via Ollama\")\n parser.add_argument(\"--model\", default=DEFAULT_MODEL, help=\"Ollama model to use\")\n parser.add_argument(\"--limit\", type=int, default=50, help=\"Max entities to process\")\n parser.add_argument(\"--dry-run\", action=\"store_true\", help=\"Show what would be done\")\n parser.add_argument(\"--verbose\", \"-v\", action=\"store_true\", help=\"Show generated descriptions\")\n args = parser.parse_args()\n\n print(f\"Entity Description Enrichment\")\n print(f\"Model: {args.model}\")\n print(f\"Limit: {args.limit}\")\n print(\"-\" * 50)\n\n # Connect to database\n db = Database()\n if not db.connect():\n print(\"✗ Could not connect to database\")\n return 1\n\n # Get prompt template\n prompt_template = get_prompt_template(db)\n if not prompt_template:\n print(\"✗ No active prompt found for use_case='entity_description'\")\n db.disconnect()\n return 1\n\n print(f\"✓ Prompt loaded\")\n\n # Get entities to enrich\n entities = get_entities_to_enrich(db, args.limit)\n print(f\"✓ Found {len(entities)} entities with short descriptions\")\n\n if not entities:\n print(\"Nothing to do.\")\n db.disconnect()\n return 0\n\n # Process entities\n success_count = 0\n error_count = 0\n\n for i, entity in enumerate(entities, 1):\n print(f\"\\n[{i}\/{len(entities)}] {entity['name']} ({entity['type']})\")\n\n # Get context\n context = get_entity_context(db, entity[\"id\"])\n\n # Build prompt\n prompt = prompt_template.format(\n entity_name=entity[\"name\"],\n entity_type=entity[\"type\"],\n context=context[:1500] # Limit context size\n )\n\n if args.dry_run:\n print(f\" Would generate description...\")\n print(f\" Context: {len(context)} chars\")\n continue\n\n # Call Ollama\n description = call_ollama(prompt, args.model)\n\n if not description:\n error_count += 1\n continue\n\n if args.verbose:\n print(f\" Generated: {description[:100]}...\")\n\n # Update database\n if update_entity_description(db, entity[\"id\"], description):\n success_count += 1\n print(f\" ✓ Updated ({len(description)} chars)\")\n else:\n error_count += 1\n\n # Small delay to not overload Ollama\n time.sleep(0.5)\n\n # Summary\n print(\"\\n\" + \"=\" * 50)\n print(f\"Summary:\")\n print(f\" Processed: {len(entities)}\")\n print(f\" Success: {success_count}\")\n print(f\" Errors: {error_count}\")\n\n db.disconnect()\n return 0 if error_count == 0 else 1\n\n\nif __name__ == \"__main__\":\n sys.exit(main())\n",
"structuredPatch": [
{
"oldStart": 44,
"oldLines": 7,
"newStart": 44,
"newLines": 7,
"lines": [
" return row[\"content\"] if row else None",
" ",
" ",
"-def get_entities_to_enrich(db: Database, limit: int = 100) -> list:",
"+def get_entities_to_enrich(db: Database, limit: int = DEFAULT_LIMIT) -> list:",
" \"\"\"Get entities with short or missing descriptions.\"\"\"",
" cursor = db.execute(\"\"\"",
" SELECT id, name, type, description"
]
}
],
"userModified": false,
"replaceAll": false
}
}