Protokoll #6448

ID	6448
Zeitstempel	2025-12-23 05:45:44.579410
Client	root
IP	145.224.96.190
Modell	claude-sonnet-4-20250514
Status	completed
Tokens	4,372 (Input: 621, Output: 3,751)
Dauer	107 ms
Request-Zeit	2025-12-23 05:45:44.579410
Response-Zeit	2025-12-23 05:45:44.686531

Request

{
    "event": "PreToolUse",
    "tool_name": "Edit",
    "tool_input": {
        "file_path": "\/opt\/scripts\/pipeline\/analyze.py",
        "old_string": "def store_entities(document_id, entities):\n    \"\"\"Store extracted entities in database.\"\"\"\n    stored = 0\n\n    for entity in entities:\n        try:\n            # Check if entity already exists\n            cursor = db.execute(\n                \"SELECT id FROM entities WHERE name = %s AND type = %s\", (entity[\"name\"], entity[\"type\"])\n            )\n            existing = cursor.fetchone()\n            cursor.close()\n\n            if existing:\n                entity_id = existing[\"id\"]\n            else:\n                cursor = db.execute(\n                    \"\"\"INSERT INTO entities (name, type, created_at)\n                       VALUES (%s, %s, NOW())\"\"\",\n                    (entity[\"name\"], entity[\"type\"]),\n                )\n                db.commit()\n                entity_id = cursor.lastrowid\n                cursor.close()",
        "new_string": "def store_entities(document_id, entities):\n    \"\"\"Store extracted entities in database.\"\"\"\n    stored = 0\n\n    for entity in entities:\n        try:\n            # Get description from entity\n            description = entity.get(\"description\") or entity.get(\"context\") or None\n\n            # Check if entity already exists\n            cursor = db.execute(\n                \"SELECT id, description FROM entities WHERE name = %s AND type = %s\", (entity[\"name\"], entity[\"type\"])\n            )\n            existing = cursor.fetchone()\n            cursor.close()\n\n            if existing:\n                entity_id = existing[\"id\"]\n                # Update description if it was empty and we have one now\n                if description and not existing[\"description\"]:\n                    cursor = db.execute(\n                        \"UPDATE entities SET description = %s WHERE id = %s\",\n                        (description, entity_id)\n                    )\n                    db.commit()\n                    cursor.close()\n            else:\n                cursor = db.execute(\n                    \"\"\"INSERT INTO entities (name, type, description, created_at)\n                       VALUES (%s, %s, %s, NOW())\"\"\",\n                    (entity[\"name\"], entity[\"type\"], description),\n                )\n                db.commit()\n                entity_id = cursor.lastrowid\n                cursor.close()"
    }
}

Response

{
    "tool_response": {
        "filePath": "\/opt\/scripts\/pipeline\/analyze.py",
        "oldString": "def store_entities(document_id, entities):\n    \"\"\"Store extracted entities in database.\"\"\"\n    stored = 0\n\n    for entity in entities:\n        try:\n            # Check if entity already exists\n            cursor = db.execute(\n                \"SELECT id FROM entities WHERE name = %s AND type = %s\", (entity[\"name\"], entity[\"type\"])\n            )\n            existing = cursor.fetchone()\n            cursor.close()\n\n            if existing:\n                entity_id = existing[\"id\"]\n            else:\n                cursor = db.execute(\n                    \"\"\"INSERT INTO entities (name, type, created_at)\n                       VALUES (%s, %s, NOW())\"\"\",\n                    (entity[\"name\"], entity[\"type\"]),\n                )\n                db.commit()\n                entity_id = cursor.lastrowid\n                cursor.close()",
        "newString": "def store_entities(document_id, entities):\n    \"\"\"Store extracted entities in database.\"\"\"\n    stored = 0\n\n    for entity in entities:\n        try:\n            # Get description from entity\n            description = entity.get(\"description\") or entity.get(\"context\") or None\n\n            # Check if entity already exists\n            cursor = db.execute(\n                \"SELECT id, description FROM entities WHERE name = %s AND type = %s\", (entity[\"name\"], entity[\"type\"])\n            )\n            existing = cursor.fetchone()\n            cursor.close()\n\n            if existing:\n                entity_id = existing[\"id\"]\n                # Update description if it was empty and we have one now\n                if description and not existing[\"description\"]:\n                    cursor = db.execute(\n                        \"UPDATE entities SET description = %s WHERE id = %s\",\n                        (description, entity_id)\n                    )\n                    db.commit()\n                    cursor.close()\n            else:\n                cursor = db.execute(\n                    \"\"\"INSERT INTO entities (name, type, description, created_at)\n                       VALUES (%s, %s, %s, NOW())\"\"\",\n                    (entity[\"name\"], entity[\"type\"], description),\n                )\n                db.commit()\n                entity_id = cursor.lastrowid\n                cursor.close()",
        "originalFile": "\"\"\"\nSemantic analysis for KI-System Pipeline\nEntity extraction, taxonomy classification, ontology mapping.\n\"\"\"\n\nimport json\nimport os\n\nimport requests\n\nfrom config import ANTHROPIC_MODEL, OLLAMA_CHAT_MODEL, OLLAMA_HOST\nfrom db import db\n\n\ndef get_anthropic_client():\n    \"\"\"Get Anthropic API client.\"\"\"\n    try:\n        import anthropic\n\n        api_key = os.environ.get(\"ANTHROPIC_API_KEY\", \"\")\n        if not api_key:\n            # Try reading from credentials\n            cred_file = \"\/var\/www\/docs\/credentials\/credentials.md\"\n            if os.path.exists(cred_file):\n                with open(cred_file) as f:\n                    content = f.read()\n                    # Parse API key from markdown\n                    for line in content.split(\"\\n\"):\n                        if \"ANTHROPIC_API_KEY\" in line:\n                            parts = line.split(\"`\")\n                            if len(parts) >= 2:\n                                api_key = parts[1]\n                                break\n        if api_key:\n            return anthropic.Anthropic(api_key=api_key)\n    except ImportError:\n        pass\n    return None\n\n\ndef extract_entities_ollama(text, model=\"gemma3:27b-it-qat\"):\n    \"\"\"Extract entities using Ollama.\"\"\"\n    # Load prompt from database\n    prompt_template = db.get_prompt(\"entity_extraction\")\n\n    if not prompt_template:\n        db.log(\"WARNING\", \"entity_extraction prompt not found in DB, using fallback\")\n        prompt_template = \"\"\"Analysiere den Text und extrahiere wichtige Entitäten.\nKategorisiere als: PERSON, ORGANIZATION, CONCEPT, LOCATION\nAntworte NUR im JSON-Format:\n{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"description\": \"...\"}]}\n\nText:\n{{TEXT}}\"\"\"\n\n    prompt = prompt_template.replace(\"{{TEXT}}\", text[:3000])\n\n    try:\n        response = requests.post(\n            f\"{OLLAMA_HOST}\/api\/generate\",\n            json={\"model\": model, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n            timeout=120,\n        )\n        response.raise_for_status()\n        data = response.json()\n\n        # Parse JSON from response\n        response_text = data.get(\"response\", \"{}\")\n        try:\n            entities = json.loads(response_text)\n            return entities.get(\"entities\", [])\n        except json.JSONDecodeError:\n            db.log(\"WARNING\", \"Failed to parse entity JSON from Ollama\")\n            return []\n    except Exception as e:\n        db.log(\"ERROR\", f\"Ollama entity extraction failed: {e}\")\n        return []\n\n\ndef extract_entities_anthropic(text, client):\n    \"\"\"Extract entities using Anthropic Claude.\"\"\"\n    # Get prompt from database\n    prompt_template = db.get_prompt(\"entity_extraction\")\n\n    if not prompt_template:\n        prompt_template = \"\"\"Analysiere den folgenden deutschen Text und extrahiere alle wichtigen Entitäten.\n\nKategorisiere jede Entität als:\n- PERSON (Namen von Personen)\n- ORGANIZATION (Firmen, Institutionen, Gruppen)\n- CONCEPT (Fachbegriffe, Methoden, Theorien)\n- LOCATION (Orte, Länder)\n- DATE (Zeitangaben)\n- OTHER (Sonstiges)\n\nAntworte NUR im JSON-Format:\n{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"context\": \"kurzer Kontext der Erwähnung\"}]}\n\nText:\n{{TEXT}}\"\"\"\n\n    prompt = prompt_template.replace(\"{{TEXT}}\", text[:4000])\n\n    try:\n        message = client.messages.create(\n            model=ANTHROPIC_MODEL, max_tokens=2000, messages=[{\"role\": \"user\", \"content\": prompt}]\n        )\n\n        response_text = message.content[0].text\n\n        # Extract JSON from response\n        import re\n\n        json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)\n        if json_match:\n            entities = json.loads(json_match.group())\n            return entities.get(\"entities\", [])\n        return []\n    except Exception as e:\n        db.log(\"ERROR\", f\"Anthropic entity extraction failed: {e}\")\n        return []\n\n\ndef extract_relations(text, entities, client=None):\n    \"\"\"Extract relations between entities.\"\"\"\n    if not entities or len(entities) < 2:\n        return []\n\n    entity_names = [e[\"name\"] for e in entities[:20]]\n\n    # Load prompt from database\n    prompt_template = db.get_prompt(\"relation_extraction\")\n\n    if not prompt_template:\n        db.log(\"WARNING\", \"relation_extraction prompt not found in DB, using fallback\")\n        prompt_template = \"\"\"Identifiziere Beziehungen zwischen Entitäten.\nEntitäten: {{ENTITIES}}\nBeziehungstypen: DEVELOPED_BY, RELATED_TO, PART_OF, USED_IN, BASED_ON\nAntworte NUR im JSON-Format:\n{\"relations\": [{\"source\": \"...\", \"relation\": \"...\", \"target\": \"...\"}]}\n\nText:\n{{TEXT}}\"\"\"\n\n    prompt = prompt_template.replace(\"{{ENTITIES}}\", \", \".join(entity_names))\n    prompt = prompt.replace(\"{{TEXT}}\", text[:3000])\n\n    try:\n        if client:\n            message = client.messages.create(\n                model=ANTHROPIC_MODEL, max_tokens=1000, messages=[{\"role\": \"user\", \"content\": prompt}]\n            )\n            response_text = message.content[0].text\n        else:\n            response = requests.post(\n                f\"{OLLAMA_HOST}\/api\/generate\",\n                json={\"model\": OLLAMA_CHAT_MODEL, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n                timeout=120,\n            )\n            response.raise_for_status()\n            response_text = response.json().get(\"response\", \"{}\")\n\n        import re\n\n        json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)\n        if json_match:\n            data = json.loads(json_match.group())\n            return data.get(\"relations\", [])\n        return []\n    except Exception as e:\n        db.log(\"ERROR\", f\"Relation extraction failed: {e}\")\n        return []\n\n\ndef classify_taxonomy(text, client=None):\n    \"\"\"Classify text into taxonomy categories.\"\"\"\n    prompt_template = db.get_prompt(\"taxonomy_classification\")\n\n    if not prompt_template:\n        prompt_template = \"\"\"Klassifiziere den folgenden Text in passende Kategorien.\n\nWähle aus diesen Hauptkategorien:\n- Methoden (Therapiemethoden, Techniken)\n- Theorie (Konzepte, Modelle, Grundlagen)\n- Praxis (Anwendung, Fallbeispiele)\n- Organisation (Strukturen, Prozesse)\n- Kommunikation (Gesprächsführung, Interaktion)\n- Entwicklung (Persönliche Entwicklung, Veränderung)\n\nAntworte NUR im JSON-Format:\n{\"categories\": [\"...\", \"...\"], \"confidence\": 0.0-1.0}\n\nText:\n{{TEXT}}\"\"\"\n\n    prompt = prompt_template.replace(\"{{TEXT}}\", text[:2000])\n\n    try:\n        if client:\n            message = client.messages.create(\n                model=ANTHROPIC_MODEL, max_tokens=500, messages=[{\"role\": \"user\", \"content\": prompt}]\n            )\n            response_text = message.content[0].text\n        else:\n            response = requests.post(\n                f\"{OLLAMA_HOST}\/api\/generate\",\n                json={\"model\": OLLAMA_CHAT_MODEL, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n                timeout=60,\n            )\n            response.raise_for_status()\n            response_text = response.json().get(\"response\", \"{}\")\n\n        import re\n\n        json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)\n        if json_match:\n            return json.loads(json_match.group())\n        return {\"categories\": [], \"confidence\": 0}\n    except Exception as e:\n        db.log(\"ERROR\", f\"Taxonomy classification failed: {e}\")\n        return {\"categories\": [], \"confidence\": 0}\n\n\ndef store_entities(document_id, entities):\n    \"\"\"Store extracted entities in database.\"\"\"\n    stored = 0\n\n    for entity in entities:\n        try:\n            # Check if entity already exists\n            cursor = db.execute(\n                \"SELECT id FROM entities WHERE name = %s AND type = %s\", (entity[\"name\"], entity[\"type\"])\n            )\n            existing = cursor.fetchone()\n            cursor.close()\n\n            if existing:\n                entity_id = existing[\"id\"]\n            else:\n                cursor = db.execute(\n                    \"\"\"INSERT INTO entities (name, type, created_at)\n                       VALUES (%s, %s, NOW())\"\"\",\n                    (entity[\"name\"], entity[\"type\"]),\n                )\n                db.commit()\n                entity_id = cursor.lastrowid\n                cursor.close()\n\n            # Link to document\n            cursor = db.execute(\n                \"\"\"INSERT IGNORE INTO document_entities\n                   (document_id, entity_id, context, created_at)\n                   VALUES (%s, %s, %s, NOW())\"\"\",\n                (document_id, entity_id, entity.get(\"context\", \"\")),\n            )\n            db.commit()\n            cursor.close()\n            stored += 1\n\n        except Exception as e:\n            db.log(\"WARNING\", f\"Failed to store entity: {e}\")\n\n    return stored\n\n\ndef analyze_document(document_id, text, use_anthropic=True):\n    \"\"\"\n    Full semantic analysis of a document.\n    Extracts entities, relations, and taxonomy classification.\n    \"\"\"\n    db.log(\"INFO\", f\"Starting semantic analysis for document {document_id}\")\n\n    client = get_anthropic_client() if use_anthropic else None\n\n    # Extract entities\n    if client:\n        entities = extract_entities_anthropic(text, client)\n    else:\n        entities = extract_entities_ollama(text)\n\n    db.log(\"INFO\", f\"Extracted {len(entities)} entities\")\n\n    # Store entities\n    if entities:\n        stored = store_entities(document_id, entities)\n        db.log(\"INFO\", f\"Stored {stored} entities\")\n\n        # Extract relations\n        relations = extract_relations(text, entities, client)\n        db.log(\"INFO\", f\"Extracted {len(relations)} relations\")\n\n        # Store relations\n        for rel in relations:\n            try:\n                # Find entity IDs\n                cursor = db.execute(\"SELECT id FROM entities WHERE name = %s LIMIT 1\", (rel[\"source\"],))\n                source = cursor.fetchone()\n                cursor.close()\n\n                cursor = db.execute(\"SELECT id FROM entities WHERE name = %s LIMIT 1\", (rel[\"target\"],))\n                target = cursor.fetchone()\n                cursor.close()\n\n                if source and target:\n                    cursor = db.execute(\n                 ... [TRUNCATED-200590d2e9231851]",
        "structuredPatch": [
            {
                "oldStart": 227,
                "oldLines": 20,
                "newStart": 227,
                "newLines": 31,
                "lines": [
                    " ",
                    "     for entity in entities:",
                    "         try:",
                    "+            # Get description from entity",
                    "+            description = entity.get(\"description\") or entity.get(\"context\") or None",
                    "+",
                    "             # Check if entity already exists",
                    "             cursor = db.execute(",
                    "-                \"SELECT id FROM entities WHERE name = %s AND type = %s\", (entity[\"name\"], entity[\"type\"])",
                    "+                \"SELECT id, description FROM entities WHERE name = %s AND type = %s\", (entity[\"name\"], entity[\"type\"])",
                    "             )",
                    "             existing = cursor.fetchone()",
                    "             cursor.close()",
                    " ",
                    "             if existing:",
                    "                 entity_id = existing[\"id\"]",
                    "+                # Update description if it was empty and we have one now",
                    "+                if description and not existing[\"description\"]:",
                    "+                    cursor = db.execute(",
                    "+                        \"UPDATE entities SET description = %s WHERE id = %s\",",
                    "+                        (description, entity_id)",
                    "+                    )",
                    "+                    db.commit()",
                    "+                    cursor.close()",
                    "             else:",
                    "                 cursor = db.execute(",
                    "-                    \"\"\"INSERT INTO entities (name, type, created_at)",
                    "-                       VALUES (%s, %s, NOW())\"\"\",",
                    "-                    (entity[\"name\"], entity[\"type\"]),",
                    "+                    \"\"\"INSERT INTO entities (name, type, description, created_at)",
                    "+                       VALUES (%s, %s, %s, NOW())\"\"\",",
                    "+                    (entity[\"name\"], entity[\"type\"], description),",
                    "                 )",
                    "                 db.commit()",
                    "                 entity_id = cursor.lastrowid"
                ]
            }
        ],
        "userModified": false,
        "replaceAll": false
    }
}

← Vorheriger Zur Liste Nächster →