{
"event": "PreToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/embed.py",
"limit": 50
}
}
{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/embed.py",
"content": "\"\"\"\nEmbedding generation for KI-System Pipeline\nUses Ollama (mxbai-embed-large) for vector embeddings.\n\"\"\"\n\nimport json\nimport re\nimport uuid\n\nimport requests\n\nfrom config import EMBEDDING_DIMENSION, EMBEDDING_MODEL, OLLAMA_HOST, QDRANT_HOST, QDRANT_PORT\nfrom db import db\n\n# Max chars for mxbai-embed model (512 token context, varies by content)\n# Conservative limit to handle German compound words and special chars\nMAX_EMBED_CHARS = 800\n\n\ndef get_embedding(text):\n \"\"\"Get embedding vector from Ollama.\"\"\"\n # Skip empty content\n if not text or not text.strip():\n return None\n\n # Collapse consecutive dots\/periods (table of contents, etc.)\n text = re.sub(r\"\\.{3,}\", \"...\", text)\n\n # Truncate if too long for model context\n if len(text) > MAX_EMBED_CHARS:\n text = text[:MAX_EMBED_CHARS]\n\n try:\n response = requests.post(\n f\"{OLLAMA_HOST}\/api\/embeddings\",\n json={\"model\": EMBEDDING_MODEL, \"prompt\": text},\n timeout=60,\n )\n response.raise_for_status()\n data = response.json()\n return data.get(\"embedding\")\n except Exception as e:\n db.log(\"ERROR\", f\"Embedding generation failed: {e}\")\n return None\n\n\ndef store_in_qdrant(collection, point_id, vector, payload):\n \"\"\"Store embedding in Qdrant.\"\"\"\n try:\n response = requests.put(",
"numLines": 50,
"startLine": 1,
"totalLines": 205
}
}
}