{
"event": "PostToolUse",
"raw_data": {
"session_id": "18ef5bf3-e872-4052-897e-53d6dc45b0b9",
"transcript_path": "\/root\/.claude\/projects\/-var-www\/18ef5bf3-e872-4052-897e-53d6dc45b0b9.jsonl",
"cwd": "\/var\/www\/scripts\/pipeline",
"permission_mode": "acceptEdits",
"hook_event_name": "PostToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/embed.py",
"limit": 80
},
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/embed.py",
"content": "\"\"\"\nEmbedding generation for KI-System Pipeline\nUses Ollama (mxbai-embed-large) for vector embeddings.\n\"\"\"\n\nimport json\nimport re\nimport uuid\n\nimport requests\n\nfrom config import EMBEDDING_DIMENSION, EMBEDDING_MODEL, OLLAMA_HOST, QDRANT_HOST, QDRANT_PORT\nfrom db import db\n\n# Max chars for mxbai-embed model (512 token context, varies by content)\n# Conservative limit to handle German compound words and special chars\nMAX_EMBED_CHARS = 800\n\n\ndef get_embedding(text):\n \"\"\"Get embedding vector from Ollama.\"\"\"\n # Skip empty content\n if not text or not text.strip():\n return None\n\n # Collapse consecutive dots\/periods (table of contents, etc.)\n text = re.sub(r\"\\.{3,}\", \"...\", text)\n\n # Truncate if too long for model context\n if len(text) > MAX_EMBED_CHARS:\n text = text[:MAX_EMBED_CHARS]\n\n try:\n response = requests.post(\n f\"{OLLAMA_HOST}\/api\/embeddings\",\n json={\"model\": EMBEDDING_MODEL, \"prompt\": text},\n timeout=60,\n )\n response.raise_for_status()\n data = response.json()\n return data.get(\"embedding\")\n except Exception as e:\n db.log(\"ERROR\", f\"Embedding generation failed: {e}\")\n return None\n\n\ndef store_in_qdrant(collection, point_id, vector, payload):\n \"\"\"Store embedding in Qdrant.\"\"\"\n try:\n response = requests.put(\n f\"http:\/\/{QDRANT_HOST}:{QDRANT_PORT}\/collections\/{collection}\/points\",\n json={\"points\": [{\"id\": point_id, \"vector\": vector, \"payload\": payload}]},\n headers={\"Content-Type\": \"application\/json\"},\n timeout=30,\n )\n response.raise_for_status()\n return True\n except Exception as e:\n db.log(\"ERROR\", f\"Qdrant storage failed: {e}\")\n return False\n\n\ndef embed_chunks(chunks, document_id, document_title, source_path, progress=None):\n \"\"\"\n Generate embeddings for chunks and store in Qdrant.\n Returns number of successfully embedded chunks.\n \"\"\"\n embedded_count = 0\n total_chunks = len(chunks)\n\n for i, chunk in enumerate(chunks):\n # Log every chunk for full visibility\n if progress:\n progress.add_log(f\"Embed: Chunk {i + 1}\/{total_chunks}...\")\n\n # Generate embedding\n embedding = get_embedding(chunk[\"content\"])\n\n if not embedding:\n db.log(\"WARNING\", f\"Failed to embed chunk {i} of document {document_id}\")",
"numLines": 80,
"startLine": 1,
"totalLines": 205
}
},
"tool_use_id": "toolu_012BGvjcSHjzGJtAp3suinMN"
}
}