{
"tool_response": {
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Docs\/Doc2VectorPipeline.php",
"oldString": "namespace Infrastructure\\Docs;\n\n\/\/ @responsibility: Orchestriert Doc2Vector-Pipeline (Chunking → Analyse → Sync)\n\nfinal class Doc2VectorPipeline",
"newString": "namespace Infrastructure\\Docs;\n\n\/\/ @responsibility: Orchestriert Doc2Vector-Pipeline (Chunking → Analyse → Sync)\n\nuse Domain\\Constants;\n\nfinal class Doc2VectorPipeline",
"originalFile": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\Docs;\n\n\/\/ @responsibility: Orchestriert Doc2Vector-Pipeline (Chunking → Analyse → Sync)\n\nfinal class Doc2VectorPipeline\n{\n public function __construct(\n private ChunkingService $chunking,\n private ChunkAnalysisService $analysis,\n private ChunkSyncService $sync,\n private HybridSearchService $search\n ) {\n }\n\n \/**\n * Runs the full pipeline.\n *\n * @return array{\n * chunking: array{documents: int, chunks: int, tokens: int, errors: array<string>},\n * analysis: array{processed: int, failed: int, errors: array<string>},\n * sync: array{synced: int, failed: int, errors: array<string>},\n * duration_seconds: float\n * }\n *\/\n public function runFull(): array\n {\n $start = microtime(true);\n\n echo '=== Doc2Vector Pipeline ===' . PHP_EOL . PHP_EOL;\n\n \/\/ Stage 1: Chunking\n echo 'Stage 1: Chunking documents...' . PHP_EOL;\n $chunkResult = $this->chunking->chunkAll();\n echo sprintf(\n ' Completed: %d documents, %d chunks, %d tokens' . PHP_EOL,\n $chunkResult['documents'],\n $chunkResult['chunks'],\n $chunkResult['tokens']\n );\n\n \/\/ Stage 2: Analysis\n echo PHP_EOL . 'Stage 2: LLM Analysis (this may take a while)...' . PHP_EOL;\n $analysisResult = $this->analysis->analyzeAllPending(1000);\n echo sprintf(\n ' Completed: %d analyzed, %d failed' . PHP_EOL,\n $analysisResult['processed'],\n $analysisResult['failed']\n );\n\n \/\/ Stage 3: Sync to Qdrant\n echo PHP_EOL . 'Stage 3: Syncing to Qdrant...' . PHP_EOL;\n $syncResult = $this->sync->syncAllPending(1000);\n echo sprintf(\n ' Completed: %d synced, %d failed' . PHP_EOL,\n $syncResult['synced'],\n $syncResult['failed']\n );\n\n $duration = microtime(true) - $start;\n\n echo PHP_EOL . sprintf('Pipeline completed in %.1f seconds' . PHP_EOL, $duration);\n\n return [\n 'chunking' => $chunkResult,\n 'analysis' => $analysisResult,\n 'sync' => $syncResult,\n 'duration_seconds' => $duration,\n ];\n }\n\n \/**\n * Processes only new\/changed documents.\n *\n * @return array<string, mixed>\n *\/\n public function runIncremental(): array\n {\n $start = microtime(true);\n\n echo '=== Incremental Pipeline ===' . PHP_EOL . PHP_EOL;\n\n \/\/ Only analyze pending chunks\n echo 'Analyzing pending chunks...' . PHP_EOL;\n $analysisResult = $this->analysis->analyzeAllPending(100);\n echo sprintf(' %d analyzed, %d failed' . PHP_EOL, $analysisResult['processed'], $analysisResult['failed']);\n\n \/\/ Sync unsynced chunks\n echo 'Syncing to Qdrant...' . PHP_EOL;\n $syncResult = $this->sync->syncAllPending(100);\n echo sprintf(' %d synced, %d failed' . PHP_EOL, $syncResult['synced'], $syncResult['failed']);\n\n return [\n 'analysis' => $analysisResult,\n 'sync' => $syncResult,\n 'duration_seconds' => microtime(true) - $start,\n ];\n }\n\n \/**\n * Gets overall pipeline statistics.\n *\n * @return array<string, mixed>\n *\/\n public function getStats(): array\n {\n $chunkStats = $this->chunking->getStats();\n $analysisStats = $this->analysis->getStats();\n $qdrantStats = $this->sync->getStats();\n\n return [\n 'chunks' => $chunkStats,\n 'analysis' => $analysisStats,\n 'qdrant' => $qdrantStats,\n 'taxonomy_categories' => $this->search->getTaxonomyCategories(),\n ];\n }\n\n \/**\n * Performs a search.\n *\n * @param array<string, mixed> $filters\n * @return array<array<string, mixed>>\n *\/\n public function search(string $query, array $filters = [], int $limit = 10): array\n {\n return $this->search->search($query, $filters, $limit);\n }\n\n \/**\n * Re-chunks a specific document.\n *\n * @return array{chunks_created: int, tokens_total: int}\n *\/\n public function rechunkDocument(int $docId): array\n {\n return $this->chunking->chunkDocument($docId);\n }\n\n \/**\n * Re-analyzes a specific chunk.\n *\n * @return array{taxonomy: array<string>, entities: array<mixed>, keywords: array<string>}\n *\/\n public function reanalyzeChunk(int $chunkId): array\n {\n return $this->analysis->analyzeChunk($chunkId);\n }\n}\n",
"structuredPatch": [
{
"oldStart": 6,
"oldLines": 6,
"newStart": 6,
"newLines": 8,
"lines": [
" ",
" \/\/ @responsibility: Orchestriert Doc2Vector-Pipeline (Chunking → Analyse → Sync)",
" ",
"+use Domain\\Constants;",
"+",
" final class Doc2VectorPipeline",
" {",
" public function __construct("
]
}
],
"userModified": false,
"replaceAll": false
}
}