{
"tool_response": {
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Docs\/ChunkSyncService.php",
"oldString": " 'heading_path' => json_decode($chunk['heading_path'] ?? '[]', true) ?: [],\n 'taxonomy_category' => $chunk['taxonomy_category'] ?? null,\n 'taxonomy' => json_decode($chunk['taxonomy_path'] ?? '[]', true) ?: [],\n 'entities' => json_decode($chunk['entities'] ?? '[]', true) ?: [],\n 'keywords' => json_decode($chunk['keywords'] ?? '[]', true) ?: [],",
"newString": " 'heading_path' => $this->decodeJsonArray($chunk['heading_path'] ?? null),\n 'taxonomy_category' => $chunk['taxonomy_category'] ?? null,\n 'taxonomy' => $this->decodeJsonArray($chunk['taxonomy_path'] ?? null),\n 'entities' => $this->decodeJsonArray($chunk['entities'] ?? null),\n 'keywords' => $this->decodeJsonArray($chunk['keywords'] ?? null),",
"originalFile": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\Docs;\n\nuse Infrastructure\\AI\\OllamaService;\nuse Infrastructure\\Traits\\JsonDecodeTrait;\nuse PDO;\nuse RuntimeException;\n\n\/**\n * Service for synchronizing documentation chunks to Qdrant vector database.\n *\n * Syncs analyzed chunks with their embeddings and metadata to enable\n * semantic search with structured filtering.\n *\/\nfinal class ChunkSyncService\n{\n use JsonDecodeTrait;\n\n private const string COLLECTION = 'dokumentation_chunks';\n private const string QDRANT_HOST = 'http:\/\/localhost:6333';\n private const int VECTOR_SIZE = 1024;\n private const int TIMEOUT = 60;\n private const int BATCH_SIZE = 10;\n\n private PDO $pdo;\n private OllamaService $ollama;\n\n public function __construct()\n {\n $this->ollama = new OllamaService();\n $this->pdo = $this->createConnection();\n }\n\n \/**\n * Ensures the Qdrant collection exists with proper configuration.\n *\/\n public function ensureCollection(): bool\n {\n \/\/ Check if collection exists\n $url = sprintf('%s\/collections\/%s', self::QDRANT_HOST, self::COLLECTION);\n\n try {\n $response = $this->makeRequest($url, [], 'GET');\n if (isset($response['result'])) {\n return true; \/\/ Collection exists\n }\n } catch (RuntimeException) {\n \/\/ Collection doesn't exist, create it\n }\n\n \/\/ Create collection\n $payload = [\n 'vectors' => [\n 'size' => self::VECTOR_SIZE,\n 'distance' => 'Cosine',\n ],\n ];\n\n try {\n $this->makeRequest($url, $payload, 'PUT');\n\n return true;\n } catch (RuntimeException $e) {\n throw new RuntimeException('Failed to create collection: ' . $e->getMessage());\n }\n }\n\n \/**\n * Syncs a single chunk to Qdrant.\n *\/\n public function syncChunk(int $chunkId): bool\n {\n $chunk = $this->getChunk($chunkId);\n\n if ($chunk === null) {\n return false;\n }\n\n \/\/ Only sync completed analyses\n if ($chunk['analysis_status'] !== 'completed') {\n return false;\n }\n\n \/\/ Get document context\n $doc = $this->getDocument((int) $chunk['dokumentation_id']);\n\n \/\/ Prepare text for embedding\n $text = $this->prepareTextForEmbedding($chunk, $doc);\n\n \/\/ Generate embedding\n $embedding = $this->ollama->getEmbedding($text);\n\n \/\/ Build payload with metadata\n $payload = $this->buildPayload($chunk, $doc);\n\n \/\/ Generate UUID for Qdrant if not exists\n $qdrantId = $chunk['qdrant_id'] ?? $this->generateUuid();\n\n \/\/ Upsert to Qdrant\n $success = $this->upsertPoint($qdrantId, $embedding, $payload);\n\n if ($success && $chunk['qdrant_id'] === null) {\n $this->updateQdrantId($chunkId, $qdrantId);\n }\n\n return $success;\n }\n\n \/**\n * Syncs all analyzed chunks that haven't been synced yet.\n *\n * @return array{synced: int, failed: int, errors: array<string>}\n *\/\n public function syncAllPending(int $limit = 100): array\n {\n $this->ensureCollection();\n\n $results = ['synced' => 0, 'failed' => 0, 'errors' => []];\n\n $chunks = $this->getUnsyncedChunks($limit);\n\n foreach ($chunks as $chunk) {\n try {\n if ($this->syncChunk((int) $chunk['id'])) {\n $results['synced']++;\n\n if ($results['synced'] % self::BATCH_SIZE === 0) {\n echo \"Synced {$results['synced']} chunks...\\n\";\n }\n } else {\n $results['failed']++;\n $results['errors'][] = \"Chunk #{$chunk['id']}: Sync failed\";\n }\n } catch (RuntimeException $e) {\n $results['failed']++;\n $results['errors'][] = \"Chunk #{$chunk['id']}: \" . $e->getMessage();\n }\n }\n\n return $results;\n }\n\n \/**\n * Syncs all chunks (re-sync).\n *\n * @return array{synced: int, failed: int, errors: array<string>}\n *\/\n public function syncAll(): array\n {\n $this->ensureCollection();\n\n $results = ['synced' => 0, 'failed' => 0, 'errors' => []];\n\n $chunks = $this->getAllAnalyzedChunks();\n\n foreach ($chunks as $chunk) {\n try {\n if ($this->syncChunk((int) $chunk['id'])) {\n $results['synced']++;\n\n if ($results['synced'] % self::BATCH_SIZE === 0) {\n echo \"Synced {$results['synced']} chunks...\\n\";\n }\n } else {\n $results['failed']++;\n }\n } catch (RuntimeException $e) {\n $results['failed']++;\n $results['errors'][] = \"Chunk #{$chunk['id']}: \" . $e->getMessage();\n }\n }\n\n return $results;\n }\n\n \/**\n * Searches for similar chunks using semantic search.\n *\n * @param array<string, mixed>|null $filter Optional filter for taxonomy\/entities\n * @return array<array{id: int, doc_id: int, path: string, title: string, content: string, score: float, taxonomy: array<string>, entities: array<mixed>}>\n *\/\n public function search(string $query, int $limit = 5, ?array $filter = null): array\n {\n $embedding = $this->ollama->getEmbedding($query);\n\n $url = sprintf('%s\/collections\/%s\/points\/search', self::QDRANT_HOST, self::COLLECTION);\n\n $payload = [\n 'vector' => array_values($embedding),\n 'limit' => $limit,\n 'with_payload' => true,\n ];\n\n if ($filter !== null) {\n $payload['filter'] = $filter;\n }\n\n $response = $this->makeRequest($url, $payload, 'POST');\n\n if (!isset($response['result']) || !is_array($response['result'])) {\n return [];\n }\n\n return array_map(static function (array $item): array {\n $payload = $item['payload'] ?? [];\n\n return [\n 'id' => (int) ($payload['chunk_id'] ?? 0),\n 'doc_id' => (int) ($payload['doc_id'] ?? 0),\n 'path' => (string) ($payload['path'] ?? ''),\n 'title' => (string) ($payload['title'] ?? ''),\n 'content' => (string) ($payload['content_preview'] ?? ''),\n 'score' => (float) ($item['score'] ?? 0),\n 'taxonomy' => is_array($payload['taxonomy'] ?? null) ? $payload['taxonomy'] : [],\n 'entities' => is_array($payload['entities'] ?? null) ? $payload['entities'] : [],\n ];\n }, $response['result']);\n }\n\n \/**\n * Searches with taxonomy filter.\n *\n * @return array<array<string, mixed>>\n *\/\n public function searchByTaxonomy(string $query, string $category, int $limit = 5): array\n {\n $filter = [\n 'must' => [\n [\n 'key' => 'taxonomy_category',\n 'match' => ['value' => $category],\n ],\n ],\n ];\n\n return $this->search($query, $limit, $filter);\n }\n\n \/**\n * Gets collection statistics.\n *\n * @return array{points_count: int, status: string}|null\n *\/\n public function getStats(): ?array\n {\n $url = sprintf('%s\/collections\/%s', self::QDRANT_HOST, self::COLLECTION);\n\n try {\n $response = $this->makeRequest($url, [], 'GET');\n\n if (!isset($response['result'])) {\n return null;\n }\n\n return [\n 'points_count' => (int) ($response['result']['points_count'] ?? 0),\n 'status' => (string) ($response['result']['status'] ?? 'unknown'),\n ];\n } catch (RuntimeException) {\n return null;\n }\n }\n\n \/**\n * Prepares text for embedding.\n *\n * @param array<string, mixed> $chunk\n * @param array<string, mixed> $doc\n *\/\n private function prepareTextForEmbedding(array $chunk, array $doc): string\n {\n $parts = [];\n\n \/\/ Document context\n $parts[] = 'Dokument: ' . ($doc['title'] ?? '');\n\n \/\/ Heading path\n $headingPath = $this->decodeJsonArray($chunk['heading_path'] ?? null);\n if (!empty($headingPath)) {\n $parts[] = 'Abschnitt: ' . implode(' > ', $headingPath);\n }\n\n \/\/ Taxonomy\n $taxonomy = $this->decodeJsonArray($chunk['taxonomy_path'] ?? null);\n if (!empty($taxonomy)) {\n $parts[] = 'Kategorie: ' . implode(' > ', $taxonomy);\n }\n\n \/\/ Keywords\n $keywords = $this->decodeJsonArray($chunk['keywords'] ?? null);\n if (!empty($keywords)) {\n $parts[] = 'Keywords: ' . implode(', ', $keywords);\n }\n\n \/\/ Main content - sanitize and limit\n $content = $chunk['content_clean'] ?? $chunk['content'] ?? '';\n $content = $this->sanitizeForEmbedding($content);\n if (mb_strlen($content) > 1000) {\n $content = mb_substr($content, 0, 1000) . '...';\n }\n $parts[] = 'Inhalt: ' . $content;\n\n $text = implode(\"\\n\\n\", $parts);\n\n \/\/ Final safety limit for embedding model context\n if (mb_strlen($text) > 1800) {\n $text = mb_substr($text, 0, 1800) . '...';\n }\n\n return $text;\n }\n\n \/**\n * Sanitizes text for embedding by removing problematic characters.\n *\/\n private function sanitizeForEmbedding(string $text): string\n {\n \/\/ Remove box-drawing and other problematic Unicode characters\n $text = preg_replace('\/[\\x{2500}-\\x{257F}]\/u', ' ', $text) ?? $text; \/\/ Box Drawing\n $text = preg_replace('\/[\\x{2580}-\\x{259F}]\/u', ' ', $text) ?? $text; \/\/ Block Elements\n $text = preg_replace('\/[\\x{25A0}-\\x{25FF}]\/u', ' ', $text) ?? $text; \/\/ Geometric Shapes\n\n \/\/ Remove... [TRUNCATED-df2c606944507b63]",
"structuredPatch": [
{
"oldStart": 356,
"oldLines": 11,
"newStart": 356,
"newLines": 11,
"lines": [
" 'path' => $doc['path'] ?? '',",
" 'title' => $doc['title'] ?? '',",
" 'content_preview' => $preview,",
"- 'heading_path' => json_decode($chunk['heading_path'] ?? '[]', true) ?: [],",
"+ 'heading_path' => $this->decodeJsonArray($chunk['heading_path'] ?? null),",
" 'taxonomy_category' => $chunk['taxonomy_category'] ?? null,",
"- 'taxonomy' => json_decode($chunk['taxonomy_path'] ?? '[]', true) ?: [],",
"- 'entities' => json_decode($chunk['entities'] ?? '[]', true) ?: [],",
"- 'keywords' => json_decode($chunk['keywords'] ?? '[]', true) ?: [],",
"+ 'taxonomy' => $this->decodeJsonArray($chunk['taxonomy_path'] ?? null),",
"+ 'entities' => $this->decodeJsonArray($chunk['entities'] ?? null),",
"+ 'keywords' => $this->decodeJsonArray($chunk['keywords'] ?? null),",
" 'token_count' => (int) ($chunk['token_count'] ?? 0),",
" ];",
" }"
]
}
],
"userModified": false,
"replaceAll": false
}
}