Protokoll #4039
| ID | 4039 |
|---|---|
| Zeitstempel | 2025-12-22 09:04:29.579438 |
| Client | root |
| IP | Warning: Undefined array key "request_ip" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 Deprecated: htmlspecialchars(): Passing null to parameter #1 ($string) of type string is deprecated in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 |
| Modell | claude-sonnet-4-20250514 |
| Status | pending |
| Tokens | 1,692 (Input: 0, Output: 0) |
| Dauer | - |
| Request-Zeit | Warning: Undefined array key "request_timestamp" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 18 |
| Response-Zeit | - |
Warning: Undefined array key "error_message" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 22
Request
{
"event": "PostToolUse",
"raw_data": {
"session_id": "bf8a0fc3-34fa-408b-8067-b725d1fe9b14",
"transcript_path": "\/root\/.claude\/projects\/-var-www\/bf8a0fc3-34fa-408b-8067-b725d1fe9b14.jsonl",
"cwd": "\/var\/www\/dev.campus.systemische-tools.de",
"permission_mode": "acceptEdits",
"hook_event_name": "PostToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/opt\/scripts\/pipeline\/embed.py"
},
"tool_response": {
"type": "text",
"file": {
"filePath": "\/opt\/scripts\/pipeline\/embed.py",
"content": "\"\"\"\nEmbedding generation for KI-System Pipeline\nUses Ollama (mxbai-embed-large) for vector embeddings.\n\"\"\"\n\nimport json\nimport uuid\n\nimport requests\n\nfrom config import EMBEDDING_DIMENSION, EMBEDDING_MODEL, OLLAMA_HOST, QDRANT_HOST, QDRANT_PORT\nfrom db import db\n\n\ndef get_embedding(text):\n \"\"\"Get embedding vector from Ollama.\"\"\"\n try:\n response = requests.post(\n f\"{OLLAMA_HOST}\/api\/embeddings\", json={\"model\": EMBEDDING_MODEL, \"prompt\": text}, timeout=60\n )\n response.raise_for_status()\n data = response.json()\n return data.get(\"embedding\")\n except Exception as e:\n db.log(\"ERROR\", f\"Embedding generation failed: {e}\")\n return None\n\n\ndef store_in_qdrant(collection, point_id, vector, payload):\n \"\"\"Store embedding in Qdrant.\"\"\"\n try:\n response = requests.put(\n f\"http:\/\/{QDRANT_HOST}:{QDRANT_PORT}\/collections\/{collection}\/points\",\n json={\"points\": [{\"id\": point_id, \"vector\": vector, \"payload\": payload}]},\n headers={\"Content-Type\": \"application\/json\"},\n timeout=30,\n )\n response.raise_for_status()\n return True\n except Exception as e:\n db.log(\"ERROR\", f\"Qdrant storage failed: {e}\")\n return False\n\n\ndef embed_chunks(chunks, document_id, document_title, source_path):\n \"\"\"\n Generate embeddings for chunks and store in Qdrant.\n Returns number of successfully embedded chunks.\n \"\"\"\n embedded_count = 0\n\n for i, chunk in enumerate(chunks):\n # Generate embedding\n embedding = get_embedding(chunk[\"content\"])\n\n if not embedding:\n db.log(\"WARNING\", f\"Failed to embed chunk {i} of document {document_id}\")\n continue\n\n # Verify dimension\n if len(embedding) != EMBEDDING_DIMENSION:\n db.log(\"ERROR\", f\"Wrong embedding dimension: {len(embedding)} vs {EMBEDDING_DIMENSION}\")\n continue\n\n # Generate UUID for Qdrant\n point_id = str(uuid.uuid4())\n\n # Prepare payload\n payload = {\n \"document_id\": document_id,\n \"document_title\": document_title,\n \"chunk_index\": i,\n \"content\": chunk[\"content\"][:1000], # Truncate for payload\n \"heading_path\": json.dumps(chunk.get(\"heading_path\", [])),\n \"source_path\": source_path,\n }\n\n # Add any chunk metadata\n if chunk.get(\"metadata\"):\n for key, value in chunk[\"metadata\"].items():\n if isinstance(value, (str, int, float, bool)):\n payload[key] = value\n\n # Store in Qdrant\n if store_in_qdrant(\"documents\", point_id, embedding, payload):\n # Update chunk in database with Qdrant ID\n db.update_chunk_qdrant_id(chunk.get(\"db_id\"), point_id)\n embedded_count += 1\n db.log(\"INFO\", f\"Embedded chunk {i}\/{len(chunks)}\", f\"doc={document_id}\")\n else:\n db.log(\"ERROR\", f\"Failed to store chunk {i} in Qdrant\")\n\n return embedded_count\n\n\ndef embed_pending_chunks(limit=100):\n \"\"\"Process chunks that haven't been embedded yet.\"\"\"\n db.connect()\n\n try:\n chunks = db.get_chunks_for_embedding(limit)\n db.log(\"INFO\", f\"Found {len(chunks)} chunks to embed\")\n\n if not chunks:\n return 0\n\n embedded = 0\n for chunk in chunks:\n embedding = get_embedding(chunk[\"content\"])\n\n if not embedding:\n continue\n\n point_id = str(uuid.uuid4())\n\n # Get document info\n cursor = db.execute(\"SELECT filename, source_path FROM documents WHERE id = %s\", (chunk[\"document_id\"],))\n doc = cursor.fetchone()\n cursor.close()\n\n payload = {\n \"document_id\": chunk[\"document_id\"],\n \"document_title\": doc[\"filename\"] if doc else \"\",\n \"chunk_id\": chunk[\"id\"],\n \"content\": chunk[\"content\"][:1000],\n \"source_path\": doc[\"source_path\"] if doc else \"\",\n }\n\n if store_in_qdrant(\"documents\", point_id, embedding, payload):\n db.update_chunk_qdrant_id(chunk[\"id\"], point_id)\n embedded += 1\n\n db.log(\"INFO\", f\"Embedded {embedded}\/{len(chunks)} chunks\")\n return embedded\n\n except Exception as e:\n db.log(\"ERROR\", f\"Embedding error: {e}\")\n raise\n finally:\n db.disconnect()\n\n\ndef search_similar(query, collection=\"documents\", limit=5):\n \"\"\"Search for similar documents in Qdrant.\"\"\"\n # Get query embedding\n embedding = get_embedding(query)\n if not embedding:\n return []\n\n try:\n response = requests.post(\n f\"http:\/\/{QDRANT_HOST}:{QDRANT_PORT}\/collections\/{collection}\/points\/search\",\n json={\"vector\": embedding, \"limit\": limit, \"with_payload\": True},\n headers={\"Content-Type\": \"application\/json\"},\n timeout=30,\n )\n response.raise_for_status()\n data = response.json()\n return data.get(\"result\", [])\n except Exception as e:\n db.log(\"ERROR\", f\"Qdrant search failed: {e}\")\n return []\n\n\nif __name__ == \"__main__\":\n import sys\n\n if len(sys.argv) > 1:\n query = \" \".join(sys.argv[1:])\n print(f\"Searching for: {query}\")\n print(\"-\" * 50)\n\n results = search_similar(query)\n for i, result in enumerate(results):\n print(f\"\\n[{i + 1}] Score: {result['score']:.4f}\")\n print(f\" Document: {result['payload'].get('document_title', 'Unknown')}\")\n print(f\" Content: {result['payload'].get('content', '')[:200]}...\")\n else:\n # Run pending embeddings\n count = embed_pending_chunks()\n print(f\"Embedded {count} chunks\")\n",
"numLines": 182,
"startLine": 1,
"totalLines": 182
}
},
"tool_use_id": "toolu_01Nb75HVjM1KCtDU9XSYuqwR"
}
}
Response
-