{
"event": "PreToolUse",
"tool_name": "Edit",
"tool_input": {
"file_path": "\/var\/www\/migration\/content-pipeline\/src\/step_embed.py",
"old_string": "\"\"\"\nEmbedding Step Module\nHandles vector embedding generation and storage.\n\nPart of modularized pipeline architecture.\n\"\"\"\n\nfrom embed import embed_chunks, embed_pending_chunks\n\n\nclass EmbeddingStep:\n \"\"\"Step: Generate and store vector embeddings.\"\"\"\n\n def __init__(self, db, progress=None):\n \"\"\"\n Initialize embedding step.\n\n Args:\n db: Database instance\n progress: Optional PipelineProgress instance\n \"\"\"\n self.db = db\n self.progress = progress",
"new_string": "\"\"\"\nEmbedding Step Module\nHandles vector embedding generation and storage.\n\nPart of modularized pipeline architecture.\nSOLID: Dependency Injection via constructor.\n\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import TYPE_CHECKING\n\nfrom embed import embed_chunks, embed_pending_chunks\n\nif TYPE_CHECKING:\n from db import PipelineProgress\n from db_interface import DatabaseInterface\n\n\nclass EmbeddingStep:\n \"\"\"Step: Generate and store vector embeddings.\"\"\"\n\n def __init__(self, db: DatabaseInterface, progress: PipelineProgress | None = None):\n \"\"\"\n Initialize embedding step.\n\n Args:\n db: Database instance (injected dependency)\n progress: Optional PipelineProgress instance\n \"\"\"\n self.db = db\n self.progress = progress"
}
}
{
"tool_response": {
"filePath": "\/var\/www\/migration\/content-pipeline\/src\/step_embed.py",
"oldString": "\"\"\"\nEmbedding Step Module\nHandles vector embedding generation and storage.\n\nPart of modularized pipeline architecture.\n\"\"\"\n\nfrom embed import embed_chunks, embed_pending_chunks\n\n\nclass EmbeddingStep:\n \"\"\"Step: Generate and store vector embeddings.\"\"\"\n\n def __init__(self, db, progress=None):\n \"\"\"\n Initialize embedding step.\n\n Args:\n db: Database instance\n progress: Optional PipelineProgress instance\n \"\"\"\n self.db = db\n self.progress = progress",
"newString": "\"\"\"\nEmbedding Step Module\nHandles vector embedding generation and storage.\n\nPart of modularized pipeline architecture.\nSOLID: Dependency Injection via constructor.\n\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import TYPE_CHECKING\n\nfrom embed import embed_chunks, embed_pending_chunks\n\nif TYPE_CHECKING:\n from db import PipelineProgress\n from db_interface import DatabaseInterface\n\n\nclass EmbeddingStep:\n \"\"\"Step: Generate and store vector embeddings.\"\"\"\n\n def __init__(self, db: DatabaseInterface, progress: PipelineProgress | None = None):\n \"\"\"\n Initialize embedding step.\n\n Args:\n db: Database instance (injected dependency)\n progress: Optional PipelineProgress instance\n \"\"\"\n self.db = db\n self.progress = progress",
"originalFile": "\"\"\"\nEmbedding Step Module\nHandles vector embedding generation and storage.\n\nPart of modularized pipeline architecture.\n\"\"\"\n\nfrom embed import embed_chunks, embed_pending_chunks\n\n\nclass EmbeddingStep:\n \"\"\"Step: Generate and store vector embeddings.\"\"\"\n\n def __init__(self, db, progress=None):\n \"\"\"\n Initialize embedding step.\n\n Args:\n db: Database instance\n progress: Optional PipelineProgress instance\n \"\"\"\n self.db = db\n self.progress = progress\n\n def execute(self, chunks, doc_id, file_name, file_path):\n \"\"\"\n Generate embeddings for chunks.\n\n Args:\n chunks: List of chunk dictionaries with db_id\n doc_id: Document ID\n file_name: Document filename (for logging)\n file_path: Document file path (for metadata)\n\n Returns:\n int: Number of chunks successfully embedded\n \"\"\"\n if self.progress:\n self.progress.update_step(\"embed\")\n self.progress.add_log(f\"Erstelle Embeddings für {len(chunks)} Chunks...\")\n\n embedded = embed_chunks(chunks, doc_id, file_name, file_path, progress=self.progress)\n\n self.db.log(\"INFO\", f\"Embedded {embedded}\/{len(chunks)} chunks\")\n\n if self.progress:\n self.progress.add_log(f\"{embedded} Embeddings erstellt\")\n\n return embedded\n\n def embed_pending(self):\n \"\"\"\n Process any pending chunks that need embeddings.\n\n Returns:\n int: Number of chunks embedded\n \"\"\"\n if self.progress:\n self.progress.update_step(\"embed\")\n self.progress.add_log(\"Verarbeite ausstehende Embeddings...\")\n\n embedded = embed_pending_chunks()\n\n self.db.log(\"INFO\", f\"Embedded {embedded} pending chunks\")\n\n if self.progress and embedded > 0:\n self.progress.add_log(f\"{embedded} weitere Embeddings erstellt\")\n\n return embedded\n",
"structuredPatch": [
{
"oldStart": 3,
"oldLines": 20,
"newStart": 3,
"newLines": 29,
"lines": [
" Handles vector embedding generation and storage.",
" ",
" Part of modularized pipeline architecture.",
"+SOLID: Dependency Injection via constructor.",
" \"\"\"",
" ",
"+from __future__ import annotations",
"+",
"+from typing import TYPE_CHECKING",
"+",
" from embed import embed_chunks, embed_pending_chunks",
" ",
"+if TYPE_CHECKING:",
"+ from db import PipelineProgress",
"+ from db_interface import DatabaseInterface",
" ",
"+",
" class EmbeddingStep:",
" \"\"\"Step: Generate and store vector embeddings.\"\"\"",
" ",
"- def __init__(self, db, progress=None):",
"+ def __init__(self, db: DatabaseInterface, progress: PipelineProgress | None = None):",
" \"\"\"",
" Initialize embedding step.",
" ",
" Args:",
"- db: Database instance",
"+ db: Database instance (injected dependency)",
" progress: Optional PipelineProgress instance",
" \"\"\"",
" self.db = db"
]
}
],
"userModified": false,
"replaceAll": false
}
}