{
"tool_response": {
"filePath": "\/var\/www\/scripts\/pipeline\/step_entity_enrich.py",
"oldString": " def execute(self, limit=100, model=DEFAULT_MODEL):",
"newString": " def execute(self, limit=DEFAULT_LIMIT, model=DEFAULT_MODEL):",
"originalFile": "\"\"\"\nEntity Enrichment Step Module\nGenerates detailed descriptions for entities via Ollama.\n\nThis step runs AFTER semantic analysis and enriches entities\nwith comprehensive descriptions (3-5 sentences).\n\nPart of Progressive Pipeline Architecture.\n\"\"\"\n\nimport time\n\nimport requests\n\nfrom constants import DEFAULT_LIMIT, OLLAMA_TIMEOUT\nfrom db import db\n\n# Configuration\nOLLAMA_URL = \"http:\/\/localhost:11434\/api\/generate\"\nDEFAULT_MODEL = \"mistral:latest\"\nMIN_DESCRIPTION_LENGTH = 50\n\n\nclass EntityEnrichStep:\n \"\"\"Step: Enrich entity descriptions via Ollama.\"\"\"\n\n def __init__(self, progress=None):\n \"\"\"\n Initialize entity enrichment step.\n\n Args:\n progress: Optional PipelineProgress instance\n \"\"\"\n self.progress = progress\n self.prompt_template = None\n\n def _load_prompt(self):\n \"\"\"Load prompt template from database.\"\"\"\n if self.prompt_template:\n return self.prompt_template\n\n cursor = db.execute(\"\"\"\n SELECT content FROM prompts\n WHERE use_case = 'entity_description' AND is_active = 1\n ORDER BY id DESC LIMIT 1\n \"\"\")\n row = cursor.fetchone()\n cursor.close()\n\n if row:\n self.prompt_template = row[\"content\"]\n else:\n # Fallback prompt\n self.prompt_template = \"\"\"Du bist ein Experte für systemisches Coaching und Organisationsentwicklung.\n\nAufgabe: Erstelle eine ausführliche Beschreibung für die folgende Entität.\n\nEntität: {entity_name}\nTyp: {entity_type}\nAktueller Kontext aus dem Dokument:\n{context}\n\nAnforderungen an die Beschreibung:\n1. Erster Satz: Grundsätzliche Definition des Begriffs\n2. Weitere 2-4 Sätze: Erläuterung der Bedeutung im Kontext von systemischem Coaching, Teamarbeit oder Organisationsentwicklung\n3. Falls relevant: Praktische Anwendung oder Beispiele\n\nSchreibe NUR die Beschreibung (3-5 Sätze), keine Überschriften oder Formatierung.\nSprache: Deutsch\"\"\"\n\n return self.prompt_template\n\n def _get_entities_to_enrich(self, limit=DEFAULT_LIMIT):\n \"\"\"Get entities with short or missing descriptions.\"\"\"\n cursor = db.execute(\"\"\"\n SELECT id, name, type, description\n FROM entities\n WHERE description IS NULL\n OR CHAR_LENGTH(description) < %s\n ORDER BY id\n LIMIT %s\n \"\"\", (MIN_DESCRIPTION_LENGTH, limit))\n entities = cursor.fetchall()\n cursor.close()\n return entities\n\n def _get_entity_context(self, entity_id, max_chunks=3):\n \"\"\"Get context from chunks where this entity appears.\"\"\"\n cursor = db.execute(\"\"\"\n SELECT c.content\n FROM chunk_entities ce\n JOIN chunks c ON ce.chunk_id = c.id\n WHERE ce.entity_id = %s\n LIMIT %s\n \"\"\", (entity_id, max_chunks))\n chunks = cursor.fetchall()\n cursor.close()\n\n if not chunks:\n return \"(Kein Kontext verfügbar)\"\n\n return \"\\n\\n---\\n\\n\".join(chunk[\"content\"][:500] for chunk in chunks)\n\n def _call_ollama(self, prompt, model):\n \"\"\"Call Ollama API and return generated text.\"\"\"\n try:\n response = requests.post(\n OLLAMA_URL,\n json={\n \"model\": model,\n \"prompt\": prompt,\n \"stream\": False,\n \"options\": {\n \"temperature\": 0.7,\n \"num_predict\": 300,\n }\n },\n timeout=OLLAMA_TIMEOUT\n )\n response.raise_for_status()\n result = response.json()\n return result.get(\"response\", \"\").strip()\n except requests.exceptions.RequestException as e:\n db.log(\"WARNING\", f\"Ollama error: {e}\")\n return None\n\n def _update_description(self, entity_id, description):\n \"\"\"Update entity description in database.\"\"\"\n try:\n db.execute(\"\"\"\n UPDATE entities SET description = %s WHERE id = %s\n \"\"\", (description, entity_id))\n db.commit()\n return True\n except Exception as e:\n db.log(\"ERROR\", f\"Failed to update entity {entity_id}: {e}\")\n return False\n\n def execute(self, limit=100, model=DEFAULT_MODEL):\n \"\"\"\n Enrich entity descriptions.\n\n Args:\n limit: Maximum entities to process\n model: Ollama model to use\n\n Returns:\n dict: Results with success\/error counts\n \"\"\"\n if self.progress:\n self.progress.update_step(\"entity_enrich\")\n self.progress.add_log(\"Starte Entity-Beschreibungs-Enrichment...\")\n\n db.log(\"INFO\", f\"Entity enrichment starting (limit={limit}, model={model})\")\n\n # Load prompt\n prompt_template = self._load_prompt()\n\n # Get entities\n entities = self._get_entities_to_enrich(limit)\n total = len(entities)\n\n if total == 0:\n db.log(\"INFO\", \"No entities need enrichment\")\n if self.progress:\n self.progress.add_log(\"Keine Entitäten benötigen Enrichment\")\n return {\"processed\": 0, \"success\": 0, \"errors\": 0}\n\n db.log(\"INFO\", f\"Found {total} entities to enrich\")\n if self.progress:\n self.progress.add_log(f\"Enriche {total} Entitäten...\")\n\n success_count = 0\n error_count = 0\n\n for i, entity in enumerate(entities, 1):\n # Check for cancellation\n if self.progress and self.progress.is_cancelled():\n db.log(\"INFO\", \"Entity enrichment cancelled\")\n break\n\n # Get context\n context = self._get_entity_context(entity[\"id\"])\n\n # Build prompt\n prompt = prompt_template.format(\n entity_name=entity[\"name\"],\n entity_type=entity[\"type\"],\n context=context[:1500]\n )\n\n # Call Ollama\n description = self._call_ollama(prompt, model)\n\n if not description:\n error_count += 1\n continue\n\n # Update database\n if self._update_description(entity[\"id\"], description):\n success_count += 1\n else:\n error_count += 1\n\n # Progress update every 10 entities\n if i % 10 == 0 and self.progress:\n self.progress.add_log(f\"Enrichment: {i}\/{total} ({success_count} OK)\")\n\n # Small delay\n time.sleep(0.3)\n\n db.log(\"INFO\", f\"Entity enrichment complete: {success_count} success, {error_count} errors\")\n\n if self.progress:\n self.progress.add_log(f\"Enrichment fertig: {success_count}\/{total} erfolgreich\")\n\n return {\n \"processed\": total,\n \"success\": success_count,\n \"errors\": error_count,\n }\n\n def get_stats(self):\n \"\"\"Get current entity description statistics.\"\"\"\n cursor = db.execute(\"\"\"\n SELECT\n COUNT(*) as total,\n SUM(CASE WHEN description IS NULL OR CHAR_LENGTH(description) < %s THEN 1 ELSE 0 END) as needs_enrichment,\n SUM(CASE WHEN CHAR_LENGTH(description) >= %s THEN 1 ELSE 0 END) as enriched,\n ROUND(AVG(CASE WHEN CHAR_LENGTH(description) >= %s THEN CHAR_LENGTH(description) END)) as avg_length\n FROM entities\n \"\"\", (MIN_DESCRIPTION_LENGTH, MIN_DESCRIPTION_LENGTH, MIN_DESCRIPTION_LENGTH))\n stats = cursor.fetchone()\n cursor.close()\n return stats\n",
"structuredPatch": [
{
"oldStart": 136,
"oldLines": 7,
"newStart": 136,
"newLines": 7,
"lines": [
" db.log(\"ERROR\", f\"Failed to update entity {entity_id}: {e}\")",
" return False",
" ",
"- def execute(self, limit=100, model=DEFAULT_MODEL):",
"+ def execute(self, limit=DEFAULT_LIMIT, model=DEFAULT_MODEL):",
" \"\"\"",
" Enrich entity descriptions.",
" "
]
}
],
"userModified": false,
"replaceAll": false
}
}