Protokoll #5338

ID	5338
Zeitstempel	2025-12-22 22:10:22.128082
Client	root
IP	145.224.96.190
Modell	claude-sonnet-4-20250514
Status	completed
Tokens	1,732 (Input: 30, Output: 1,702)
Dauer	81 ms
Request-Zeit	2025-12-22 22:10:22.128082
Response-Zeit	2025-12-22 22:10:22.208863

Request

{
    "event": "PreToolUse",
    "tool_name": "Read",
    "tool_input": {
        "file_path": "\/opt\/scripts\/pipeline\/generate_semantics.py"
    }
}

Response

{
    "tool_response": {
        "type": "text",
        "file": {
            "filePath": "\/opt\/scripts\/pipeline\/generate_semantics.py",
            "content": "#!\/usr\/bin\/env python3\n\"\"\"\nGenerate semantic definitions for entities using Ollama.\nUses document context to create meaningful definitions.\n\"\"\"\n\nimport json\nimport time\n\nimport requests\n\n# Configuration\nOLLAMA_HOST = \"http:\/\/localhost:11434\"\nMODEL = \"gpt-oss:20b\"\nDB_CONFIG = {\n    \"host\": \"localhost\",\n    \"database\": \"ki_system\",\n    \"user\": \"root\",\n}\n\n\ndef get_db_password():\n    \"\"\"Read database password from credentials file.\"\"\"\n    cred_file = \"\/var\/www\/docs\/credentials\/credentials.md\"\n    with open(cred_file) as f:\n        for line in f:\n            if \"MariaDB\" in line and \"root\" in line:\n                parts = line.split(\"|\")\n                if len(parts) >= 4:\n                    return parts[3].strip()\n    return \"\"\n\n\ndef get_db_connection():\n    \"\"\"Create database connection.\"\"\"\n    import mysql.connector\n\n    return mysql.connector.connect(\n        host=DB_CONFIG[\"host\"],\n        database=DB_CONFIG[\"database\"],\n        user=DB_CONFIG[\"user\"],\n        password=get_db_password(),\n    )\n\n\ndef get_entities(conn):\n    \"\"\"Get all entities from database.\"\"\"\n    cursor = conn.cursor(dictionary=True)\n    cursor.execute(\"\"\"\n        SELECT e.id, e.name, e.type, e.description\n        FROM entities e\n        LEFT JOIN entity_semantics es ON e.id = es.entity_id\n        WHERE es.id IS NULL\n        ORDER BY e.id\n    \"\"\")\n    return cursor.fetchall()\n\n\ndef get_document_context(conn, limit=5):\n    \"\"\"Get document chunks for context.\"\"\"\n    cursor = conn.cursor(dictionary=True)\n    cursor.execute(\n        \"\"\"\n        SELECT content FROM chunks\n        ORDER BY chunk_index\n        LIMIT %s\n    \"\"\",\n        (limit,),\n    )\n    chunks = cursor.fetchall()\n    return \"\\n\\n\".join([c[\"content\"] for c in chunks])\n\n\ndef extract_json_from_text(text):\n    \"\"\"Extract JSON object from text response.\"\"\"\n    import re\n\n    # Try to find JSON object in the text\n    # Look for { ... } pattern\n    match = re.search(r\"\\{[^{}]*(?:\\{[^{}]*\\}[^{}]*)*\\}\", text, re.DOTALL)\n    if match:\n        try:\n            return json.loads(match.group())\n        except json.JSONDecodeError:\n            pass\n\n    # Try parsing the whole text as JSON\n    try:\n        return json.loads(text)\n    except json.JSONDecodeError:\n        pass\n\n    return None\n\n\ndef generate_semantic(entity, context, model=MODEL):\n    \"\"\"Generate semantic definition using Ollama.\"\"\"\n    prompt = f\"\"\"Analysiere diese Entity und erstelle eine semantische Definition.\n\nEntity: {entity[\"name\"]} (Typ: {entity[\"type\"]})\n\nKontext aus Dokument:\n{context[:3000]}\n\nAntworte mit einem JSON-Objekt:\n{{\"definition\": \"Bedeutung in 1-2 Sätzen\", \"domain\": \"Wissensdomäne\", \"context\": \"Verwendungskontext\", \"attributes\": {{}}, \"usage_notes\": \"\", \"confidence\": 0.8}}\n\nNur das JSON-Objekt ausgeben, nichts anderes.\"\"\"\n\n    try:\n        response = requests.post(\n            f\"{OLLAMA_HOST}\/api\/generate\",\n            json={\n                \"model\": model,\n                \"prompt\": prompt,\n                \"stream\": False,\n                \"options\": {\"temperature\": 0.3, \"num_predict\": 800},\n            },\n            timeout=180,\n        )\n        response.raise_for_status()\n        data = response.json()\n\n        response_text = data.get(\"response\", \"\")\n\n        # Debug output\n        if not response_text:\n            print(\"  Empty response from Ollama\")\n            return None\n\n        # Try to extract JSON from the response\n        result = extract_json_from_text(response_text)\n\n        if not result:\n            # If JSON extraction failed, try to create a basic semantic from the text\n            print(f\"  Could not parse JSON, raw: {response_text[:100]}...\")\n            return None\n\n        return result\n    except requests.exceptions.Timeout:\n        print(\"  Timeout after 180s\")\n        return None\n    except Exception as e:\n        print(f\"  Error: {e}\")\n        return None\n\n\ndef save_semantic(conn, entity_id, semantic):\n    \"\"\"Save semantic to database.\"\"\"\n    cursor = conn.cursor()\n    cursor.execute(\n        \"\"\"\n        INSERT INTO entity_semantics\n            (entity_id, definition, domain, context, attributes, usage_notes, confidence, source)\n        VALUES\n            (%s, %s, %s, %s, %s, %s, %s, %s)\n        ON DUPLICATE KEY UPDATE\n            definition = VALUES(definition),\n            domain = VALUES(domain),\n            context = VALUES(context),\n            attributes = VALUES(attributes),\n            usage_notes = VALUES(usage_notes),\n            confidence = VALUES(confidence),\n            source = VALUES(source),\n            updated_at = NOW()\n    \"\"\",\n        (\n            entity_id,\n            semantic.get(\"definition\"),\n            semantic.get(\"domain\"),\n            semantic.get(\"context\"),\n            json.dumps(semantic.get(\"attributes\", {}), ensure_ascii=False),\n            semantic.get(\"usage_notes\"),\n            float(semantic.get(\"confidence\", 0.5)),\n            \"ollama\",\n        ),\n    )\n    conn.commit()\n\n\ndef main():\n    \"\"\"Main function.\"\"\"\n    print(f\"Semantic Generation with {MODEL}\")\n    print(\"=\" * 50)\n\n    conn = get_db_connection()\n\n    # Get entities without semantics\n    entities = get_entities(conn)\n    print(f\"Entities without semantics: {len(entities)}\")\n\n    if not entities:\n        print(\"All entities already have semantics.\")\n        return\n\n    # Get document context\n    context = get_document_context(conn, limit=5)\n    print(f\"Context loaded: {len(context)} chars\")\n    print()\n\n    # Process each entity\n    success = 0\n    failed = 0\n\n    for i, entity in enumerate(entities, 1):\n        print(f\"[{i}\/{len(entities)}] {entity['name']} ({entity['type']})\")\n\n        semantic = generate_semantic(entity, context)\n\n        if semantic and \"definition\" in semantic:\n            save_semantic(conn, entity[\"id\"], semantic)\n            print(f\"  -> {semantic.get('domain', 'N\/A')}: {semantic.get('definition', '')[:60]}...\")\n            success += 1\n        else:\n            print(\"  -> FAILED\")\n            failed += 1\n\n        # Small delay to avoid overwhelming Ollama\n        time.sleep(0.5)\n\n    print()\n    print(\"=\" * 50)\n    print(f\"Done! Success: {success}, Failed: {failed}\")\n\n    conn.close()\n\n\nif __name__ == \"__main__\":\n    main()\n",
            "numLines": 230,
            "startLine": 1,
            "totalLines": 230
        }
    }
}

← Vorheriger Zur Liste Nächster →