{
"event": "PostToolUse",
"tool_name": "Read",
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/migration\/content-pipeline\/src\/step_embed.py",
"content": "\"\"\"\nEmbedding Step Module\nHandles vector embedding generation and storage.\n\nPart of modularized pipeline architecture.\n\"\"\"\n\nfrom embed import embed_chunks, embed_pending_chunks\n\n\nclass EmbeddingStep:\n \"\"\"Step: Generate and store vector embeddings.\"\"\"\n\n def __init__(self, db, progress=None):\n \"\"\"\n Initialize embedding step.\n\n Args:\n db: Database instance\n progress: Optional PipelineProgress instance\n \"\"\"\n self.db = db\n self.progress = progress\n\n def execute(self, chunks, doc_id, file_name, file_path):\n \"\"\"\n Generate embeddings for chunks.\n\n Args:\n chunks: List of chunk dictionaries with db_id\n doc_id: Document ID\n file_name: Document filename (for logging)\n file_path: Document file path (for metadata)\n\n Returns:\n int: Number of chunks successfully embedded\n \"\"\"\n if self.progress:\n self.progress.update_step(\"embed\")\n self.progress.add_log(f\"Erstelle Embeddings für {len(chunks)} Chunks...\")\n\n embedded = embed_chunks(chunks, doc_id, file_name, file_path, progress=self.progress)\n\n self.db.log(\"INFO\", f\"Embedded {embedded}\/{len(chunks)} chunks\")\n\n if self.progress:\n self.progress.add_log(f\"{embedded} Embeddings erstellt\")\n\n return embedded\n\n def embed_pending(self):\n \"\"\"\n Process any pending chunks that need embeddings.\n\n Returns:\n int: Number of chunks embedded\n \"\"\"\n if self.progress:\n self.progress.update_step(\"embed\")\n self.progress.add_log(\"Verarbeite ausstehende Embeddings...\")\n\n embedded = embed_pending_chunks()\n\n self.db.log(\"INFO\", f\"Embedded {embedded} pending chunks\")\n\n if self.progress and embedded > 0:\n self.progress.add_log(f\"{embedded} weitere Embeddings erstellt\")\n\n return embedded\n",
"numLines": 70,
"startLine": 1,
"totalLines": 70
}
}
}