{
"event": "PreToolUse",
"tool_name": "Grep",
"tool_input": {
"pattern": "def embed_chunks",
"path": "\/opt\/scripts\/pipeline\/embed.py",
"output_mode": "content",
"-A": 40
}
}
{
"tool_response": {
"mode": "content",
"numFiles": 0,
"filenames": [],
"content": "63:def embed_chunks(chunks, document_id, document_title, source_path):\n64- \"\"\"\n65- Generate embeddings for chunks and store in Qdrant.\n66- Returns number of successfully embedded chunks.\n67- \"\"\"\n68- embedded_count = 0\n69-\n70- for i, chunk in enumerate(chunks):\n71- # Generate embedding\n72- embedding = get_embedding(chunk[\"content\"])\n73-\n74- if not embedding:\n75- db.log(\"WARNING\", f\"Failed to embed chunk {i} of document {document_id}\")\n76- continue\n77-\n78- # Verify dimension\n79- if len(embedding) != EMBEDDING_DIMENSION:\n80- db.log(\"ERROR\", f\"Wrong embedding dimension: {len(embedding)} vs {EMBEDDING_DIMENSION}\")\n81- continue\n82-\n83- # Generate UUID for Qdrant\n84- point_id = str(uuid.uuid4())\n85-\n86- # Prepare payload\n87- payload = {\n88- \"document_id\": document_id,\n89- \"document_title\": document_title,\n90- \"chunk_index\": i,\n91- \"content\": chunk[\"content\"][:1000], # Truncate for payload\n92- \"heading_path\": json.dumps(chunk.get(\"heading_path\", [])),\n93- \"source_path\": source_path,\n94- }\n95-\n96- # Add any chunk metadata\n97- if chunk.get(\"metadata\"):\n98- for key, value in chunk[\"metadata\"].items():\n99- if isinstance(value, (str, int, float, bool)):\n100- payload[key] = value\n101-\n102- # Store in Qdrant\n103- if store_in_qdrant(\"documents\", point_id, embedding, payload):",
"numLines": 41
}
}