Protokoll #4039

ID	4039
Zeitstempel	2025-12-22 09:04:29.579438
Client	root
IP	Warning: Undefined array key "request_ip" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 Deprecated: htmlspecialchars(): Passing null to parameter #1 ($string) of type string is deprecated in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13
Modell	claude-sonnet-4-20250514
Status	pending
Tokens	1,692 (Input: 0, Output: 0)
Dauer	-
Request-Zeit	Warning: Undefined array key "request_timestamp" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 18
Response-Zeit	-

Warning: Undefined array key "error_message" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 22

Request

{
    "event": "PostToolUse",
    "raw_data": {
        "session_id": "bf8a0fc3-34fa-408b-8067-b725d1fe9b14",
        "transcript_path": "\/root\/.claude\/projects\/-var-www\/bf8a0fc3-34fa-408b-8067-b725d1fe9b14.jsonl",
        "cwd": "\/var\/www\/dev.campus.systemische-tools.de",
        "permission_mode": "acceptEdits",
        "hook_event_name": "PostToolUse",
        "tool_name": "Read",
        "tool_input": {
            "file_path": "\/opt\/scripts\/pipeline\/embed.py"
        },
        "tool_response": {
            "type": "text",
            "file": {
                "filePath": "\/opt\/scripts\/pipeline\/embed.py",
                "content": "\"\"\"\nEmbedding generation for KI-System Pipeline\nUses Ollama (mxbai-embed-large) for vector embeddings.\n\"\"\"\n\nimport json\nimport uuid\n\nimport requests\n\nfrom config import EMBEDDING_DIMENSION, EMBEDDING_MODEL, OLLAMA_HOST, QDRANT_HOST, QDRANT_PORT\nfrom db import db\n\n\ndef get_embedding(text):\n    \"\"\"Get embedding vector from Ollama.\"\"\"\n    try:\n        response = requests.post(\n            f\"{OLLAMA_HOST}\/api\/embeddings\", json={\"model\": EMBEDDING_MODEL, \"prompt\": text}, timeout=60\n        )\n        response.raise_for_status()\n        data = response.json()\n        return data.get(\"embedding\")\n    except Exception as e:\n        db.log(\"ERROR\", f\"Embedding generation failed: {e}\")\n        return None\n\n\ndef store_in_qdrant(collection, point_id, vector, payload):\n    \"\"\"Store embedding in Qdrant.\"\"\"\n    try:\n        response = requests.put(\n            f\"http:\/\/{QDRANT_HOST}:{QDRANT_PORT}\/collections\/{collection}\/points\",\n            json={\"points\": [{\"id\": point_id, \"vector\": vector, \"payload\": payload}]},\n            headers={\"Content-Type\": \"application\/json\"},\n            timeout=30,\n        )\n        response.raise_for_status()\n        return True\n    except Exception as e:\n        db.log(\"ERROR\", f\"Qdrant storage failed: {e}\")\n        return False\n\n\ndef embed_chunks(chunks, document_id, document_title, source_path):\n    \"\"\"\n    Generate embeddings for chunks and store in Qdrant.\n    Returns number of successfully embedded chunks.\n    \"\"\"\n    embedded_count = 0\n\n    for i, chunk in enumerate(chunks):\n        # Generate embedding\n        embedding = get_embedding(chunk[\"content\"])\n\n        if not embedding:\n            db.log(\"WARNING\", f\"Failed to embed chunk {i} of document {document_id}\")\n            continue\n\n        # Verify dimension\n        if len(embedding) != EMBEDDING_DIMENSION:\n            db.log(\"ERROR\", f\"Wrong embedding dimension: {len(embedding)} vs {EMBEDDING_DIMENSION}\")\n            continue\n\n        # Generate UUID for Qdrant\n        point_id = str(uuid.uuid4())\n\n        # Prepare payload\n        payload = {\n            \"document_id\": document_id,\n            \"document_title\": document_title,\n            \"chunk_index\": i,\n            \"content\": chunk[\"content\"][:1000],  # Truncate for payload\n            \"heading_path\": json.dumps(chunk.get(\"heading_path\", [])),\n            \"source_path\": source_path,\n        }\n\n        # Add any chunk metadata\n        if chunk.get(\"metadata\"):\n            for key, value in chunk[\"metadata\"].items():\n                if isinstance(value, (str, int, float, bool)):\n                    payload[key] = value\n\n        # Store in Qdrant\n        if store_in_qdrant(\"documents\", point_id, embedding, payload):\n            # Update chunk in database with Qdrant ID\n            db.update_chunk_qdrant_id(chunk.get(\"db_id\"), point_id)\n            embedded_count += 1\n            db.log(\"INFO\", f\"Embedded chunk {i}\/{len(chunks)}\", f\"doc={document_id}\")\n        else:\n            db.log(\"ERROR\", f\"Failed to store chunk {i} in Qdrant\")\n\n    return embedded_count\n\n\ndef embed_pending_chunks(limit=100):\n    \"\"\"Process chunks that haven't been embedded yet.\"\"\"\n    db.connect()\n\n    try:\n        chunks = db.get_chunks_for_embedding(limit)\n        db.log(\"INFO\", f\"Found {len(chunks)} chunks to embed\")\n\n        if not chunks:\n            return 0\n\n        embedded = 0\n        for chunk in chunks:\n            embedding = get_embedding(chunk[\"content\"])\n\n            if not embedding:\n                continue\n\n            point_id = str(uuid.uuid4())\n\n            # Get document info\n            cursor = db.execute(\"SELECT filename, source_path FROM documents WHERE id = %s\", (chunk[\"document_id\"],))\n            doc = cursor.fetchone()\n            cursor.close()\n\n            payload = {\n                \"document_id\": chunk[\"document_id\"],\n                \"document_title\": doc[\"filename\"] if doc else \"\",\n                \"chunk_id\": chunk[\"id\"],\n                \"content\": chunk[\"content\"][:1000],\n                \"source_path\": doc[\"source_path\"] if doc else \"\",\n            }\n\n            if store_in_qdrant(\"documents\", point_id, embedding, payload):\n                db.update_chunk_qdrant_id(chunk[\"id\"], point_id)\n                embedded += 1\n\n        db.log(\"INFO\", f\"Embedded {embedded}\/{len(chunks)} chunks\")\n        return embedded\n\n    except Exception as e:\n        db.log(\"ERROR\", f\"Embedding error: {e}\")\n        raise\n    finally:\n        db.disconnect()\n\n\ndef search_similar(query, collection=\"documents\", limit=5):\n    \"\"\"Search for similar documents in Qdrant.\"\"\"\n    # Get query embedding\n    embedding = get_embedding(query)\n    if not embedding:\n        return []\n\n    try:\n        response = requests.post(\n            f\"http:\/\/{QDRANT_HOST}:{QDRANT_PORT}\/collections\/{collection}\/points\/search\",\n            json={\"vector\": embedding, \"limit\": limit, \"with_payload\": True},\n            headers={\"Content-Type\": \"application\/json\"},\n            timeout=30,\n        )\n        response.raise_for_status()\n        data = response.json()\n        return data.get(\"result\", [])\n    except Exception as e:\n        db.log(\"ERROR\", f\"Qdrant search failed: {e}\")\n        return []\n\n\nif __name__ == \"__main__\":\n    import sys\n\n    if len(sys.argv) > 1:\n        query = \" \".join(sys.argv[1:])\n        print(f\"Searching for: {query}\")\n        print(\"-\" * 50)\n\n        results = search_similar(query)\n        for i, result in enumerate(results):\n            print(f\"\\n[{i + 1}] Score: {result['score']:.4f}\")\n            print(f\"    Document: {result['payload'].get('document_title', 'Unknown')}\")\n            print(f\"    Content: {result['payload'].get('content', '')[:200]}...\")\n    else:\n        # Run pending embeddings\n        count = embed_pending_chunks()\n        print(f\"Embedded {count} chunks\")\n",
                "numLines": 182,
                "startLine": 1,
                "totalLines": 182
            }
        },
        "tool_use_id": "toolu_01Nb75HVjM1KCtDU9XSYuqwR"
    }
}

Response

← Vorheriger Zur Liste Nächster →