Protokoll #24784

ID24784
Zeitstempel2025-12-28 01:13:45.897036
Clientroot
IP145.224.72.140
Modellclaude-sonnet-4-20250514
Statuscompleted
Tokens2,265 (Input: 69, Output: 2,196)
Dauer141 ms
Request-Zeit2025-12-28 01:13:45.897036
Response-Zeit2025-12-28 01:13:46.038250

Request

{
    "event": "PreToolUse",
    "tool_name": "Edit",
    "tool_input": {
        "file_path": "\/var\/www\/scripts\/pipeline\/step_entity_enrich.py",
        "old_string": "    def execute(self, limit=100, model=DEFAULT_MODEL):",
        "new_string": "    def execute(self, limit=DEFAULT_LIMIT, model=DEFAULT_MODEL):"
    }
}

Response

{
    "tool_response": {
        "filePath": "\/var\/www\/scripts\/pipeline\/step_entity_enrich.py",
        "oldString": "    def execute(self, limit=100, model=DEFAULT_MODEL):",
        "newString": "    def execute(self, limit=DEFAULT_LIMIT, model=DEFAULT_MODEL):",
        "originalFile": "\"\"\"\nEntity Enrichment Step Module\nGenerates detailed descriptions for entities via Ollama.\n\nThis step runs AFTER semantic analysis and enriches entities\nwith comprehensive descriptions (3-5 sentences).\n\nPart of Progressive Pipeline Architecture.\n\"\"\"\n\nimport time\n\nimport requests\n\nfrom constants import DEFAULT_LIMIT, OLLAMA_TIMEOUT\nfrom db import db\n\n# Configuration\nOLLAMA_URL = \"http:\/\/localhost:11434\/api\/generate\"\nDEFAULT_MODEL = \"mistral:latest\"\nMIN_DESCRIPTION_LENGTH = 50\n\n\nclass EntityEnrichStep:\n    \"\"\"Step: Enrich entity descriptions via Ollama.\"\"\"\n\n    def __init__(self, progress=None):\n        \"\"\"\n        Initialize entity enrichment step.\n\n        Args:\n            progress: Optional PipelineProgress instance\n        \"\"\"\n        self.progress = progress\n        self.prompt_template = None\n\n    def _load_prompt(self):\n        \"\"\"Load prompt template from database.\"\"\"\n        if self.prompt_template:\n            return self.prompt_template\n\n        cursor = db.execute(\"\"\"\n            SELECT content FROM prompts\n            WHERE use_case = 'entity_description' AND is_active = 1\n            ORDER BY id DESC LIMIT 1\n        \"\"\")\n        row = cursor.fetchone()\n        cursor.close()\n\n        if row:\n            self.prompt_template = row[\"content\"]\n        else:\n            # Fallback prompt\n            self.prompt_template = \"\"\"Du bist ein Experte für systemisches Coaching und Organisationsentwicklung.\n\nAufgabe: Erstelle eine ausführliche Beschreibung für die folgende Entität.\n\nEntität: {entity_name}\nTyp: {entity_type}\nAktueller Kontext aus dem Dokument:\n{context}\n\nAnforderungen an die Beschreibung:\n1. Erster Satz: Grundsätzliche Definition des Begriffs\n2. Weitere 2-4 Sätze: Erläuterung der Bedeutung im Kontext von systemischem Coaching, Teamarbeit oder Organisationsentwicklung\n3. Falls relevant: Praktische Anwendung oder Beispiele\n\nSchreibe NUR die Beschreibung (3-5 Sätze), keine Überschriften oder Formatierung.\nSprache: Deutsch\"\"\"\n\n        return self.prompt_template\n\n    def _get_entities_to_enrich(self, limit=DEFAULT_LIMIT):\n        \"\"\"Get entities with short or missing descriptions.\"\"\"\n        cursor = db.execute(\"\"\"\n            SELECT id, name, type, description\n            FROM entities\n            WHERE description IS NULL\n               OR CHAR_LENGTH(description) < %s\n            ORDER BY id\n            LIMIT %s\n        \"\"\", (MIN_DESCRIPTION_LENGTH, limit))\n        entities = cursor.fetchall()\n        cursor.close()\n        return entities\n\n    def _get_entity_context(self, entity_id, max_chunks=3):\n        \"\"\"Get context from chunks where this entity appears.\"\"\"\n        cursor = db.execute(\"\"\"\n            SELECT c.content\n            FROM chunk_entities ce\n            JOIN chunks c ON ce.chunk_id = c.id\n            WHERE ce.entity_id = %s\n            LIMIT %s\n        \"\"\", (entity_id, max_chunks))\n        chunks = cursor.fetchall()\n        cursor.close()\n\n        if not chunks:\n            return \"(Kein Kontext verfügbar)\"\n\n        return \"\\n\\n---\\n\\n\".join(chunk[\"content\"][:500] for chunk in chunks)\n\n    def _call_ollama(self, prompt, model):\n        \"\"\"Call Ollama API and return generated text.\"\"\"\n        try:\n            response = requests.post(\n                OLLAMA_URL,\n                json={\n                    \"model\": model,\n                    \"prompt\": prompt,\n                    \"stream\": False,\n                    \"options\": {\n                        \"temperature\": 0.7,\n                        \"num_predict\": 300,\n                    }\n                },\n                timeout=OLLAMA_TIMEOUT\n            )\n            response.raise_for_status()\n            result = response.json()\n            return result.get(\"response\", \"\").strip()\n        except requests.exceptions.RequestException as e:\n            db.log(\"WARNING\", f\"Ollama error: {e}\")\n            return None\n\n    def _update_description(self, entity_id, description):\n        \"\"\"Update entity description in database.\"\"\"\n        try:\n            db.execute(\"\"\"\n                UPDATE entities SET description = %s WHERE id = %s\n            \"\"\", (description, entity_id))\n            db.commit()\n            return True\n        except Exception as e:\n            db.log(\"ERROR\", f\"Failed to update entity {entity_id}: {e}\")\n            return False\n\n    def execute(self, limit=100, model=DEFAULT_MODEL):\n        \"\"\"\n        Enrich entity descriptions.\n\n        Args:\n            limit: Maximum entities to process\n            model: Ollama model to use\n\n        Returns:\n            dict: Results with success\/error counts\n        \"\"\"\n        if self.progress:\n            self.progress.update_step(\"entity_enrich\")\n            self.progress.add_log(\"Starte Entity-Beschreibungs-Enrichment...\")\n\n        db.log(\"INFO\", f\"Entity enrichment starting (limit={limit}, model={model})\")\n\n        # Load prompt\n        prompt_template = self._load_prompt()\n\n        # Get entities\n        entities = self._get_entities_to_enrich(limit)\n        total = len(entities)\n\n        if total == 0:\n            db.log(\"INFO\", \"No entities need enrichment\")\n            if self.progress:\n                self.progress.add_log(\"Keine Entitäten benötigen Enrichment\")\n            return {\"processed\": 0, \"success\": 0, \"errors\": 0}\n\n        db.log(\"INFO\", f\"Found {total} entities to enrich\")\n        if self.progress:\n            self.progress.add_log(f\"Enriche {total} Entitäten...\")\n\n        success_count = 0\n        error_count = 0\n\n        for i, entity in enumerate(entities, 1):\n            # Check for cancellation\n            if self.progress and self.progress.is_cancelled():\n                db.log(\"INFO\", \"Entity enrichment cancelled\")\n                break\n\n            # Get context\n            context = self._get_entity_context(entity[\"id\"])\n\n            # Build prompt\n            prompt = prompt_template.format(\n                entity_name=entity[\"name\"],\n                entity_type=entity[\"type\"],\n                context=context[:1500]\n            )\n\n            # Call Ollama\n            description = self._call_ollama(prompt, model)\n\n            if not description:\n                error_count += 1\n                continue\n\n            # Update database\n            if self._update_description(entity[\"id\"], description):\n                success_count += 1\n            else:\n                error_count += 1\n\n            # Progress update every 10 entities\n            if i % 10 == 0 and self.progress:\n                self.progress.add_log(f\"Enrichment: {i}\/{total} ({success_count} OK)\")\n\n            # Small delay\n            time.sleep(0.3)\n\n        db.log(\"INFO\", f\"Entity enrichment complete: {success_count} success, {error_count} errors\")\n\n        if self.progress:\n            self.progress.add_log(f\"Enrichment fertig: {success_count}\/{total} erfolgreich\")\n\n        return {\n            \"processed\": total,\n            \"success\": success_count,\n            \"errors\": error_count,\n        }\n\n    def get_stats(self):\n        \"\"\"Get current entity description statistics.\"\"\"\n        cursor = db.execute(\"\"\"\n            SELECT\n                COUNT(*) as total,\n                SUM(CASE WHEN description IS NULL OR CHAR_LENGTH(description) < %s THEN 1 ELSE 0 END) as needs_enrichment,\n                SUM(CASE WHEN CHAR_LENGTH(description) >= %s THEN 1 ELSE 0 END) as enriched,\n                ROUND(AVG(CASE WHEN CHAR_LENGTH(description) >= %s THEN CHAR_LENGTH(description) END)) as avg_length\n            FROM entities\n        \"\"\", (MIN_DESCRIPTION_LENGTH, MIN_DESCRIPTION_LENGTH, MIN_DESCRIPTION_LENGTH))\n        stats = cursor.fetchone()\n        cursor.close()\n        return stats\n",
        "structuredPatch": [
            {
                "oldStart": 136,
                "oldLines": 7,
                "newStart": 136,
                "newLines": 7,
                "lines": [
                    "             db.log(\"ERROR\", f\"Failed to update entity {entity_id}: {e}\")",
                    "             return False",
                    " ",
                    "-    def execute(self, limit=100, model=DEFAULT_MODEL):",
                    "+    def execute(self, limit=DEFAULT_LIMIT, model=DEFAULT_MODEL):",
                    "         \"\"\"",
                    "         Enrich entity descriptions.",
                    " "
                ]
            }
        ],
        "userModified": false,
        "replaceAll": false
    }
}
← Vorheriger Zur Liste Nächster →