Protokoll #14966

ID14966
Zeitstempel2025-12-25 13:01:04.197738
Clientroot
IP145.224.72.140
Modellclaude-sonnet-4-20250514
Statuscompleted
Tokens11,321 (Input: 2,474, Output: 8,847)
Dauer103 ms
Request-Zeit2025-12-25 13:01:04.197738
Response-Zeit2025-12-25 13:01:04.301107

Request

{
    "event": "PreToolUse",
    "tool_name": "Write",
    "tool_input": {
        "file_path": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Docs\/ChunkSyncService.php",
        "content": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\Docs;\n\n\/\/ @responsibility: Synchronisiert Chunks mit Embeddings nach Qdrant\n\nuse Infrastructure\\AI\\OllamaService;\nuse Infrastructure\\AI\\QdrantClient;\nuse Infrastructure\\Traits\\JsonDecodeTrait;\nuse PDO;\n\nfinal class ChunkSyncService\n{\n    use JsonDecodeTrait;\n\n    private const string COLLECTION = 'dokumentation_chunks';\n    private const int VECTOR_SIZE = 1024;\n    private const int BATCH_SIZE = 10;\n\n    public function __construct(\n        private PDO $pdo,\n        private OllamaService $ollama,\n        private QdrantClient $qdrant\n    ) {\n    }\n\n    \/**\n     * Ensures the Qdrant collection exists with proper configuration.\n     *\/\n    public function ensureCollection(): bool\n    {\n        return $this->qdrant->ensureCollection(self::COLLECTION, self::VECTOR_SIZE);\n    }\n\n    \/**\n     * Syncs a single chunk to Qdrant.\n     *\/\n    public function syncChunk(int $chunkId): bool\n    {\n        $chunk = $this->getChunk($chunkId);\n\n        if ($chunk === null) {\n            return false;\n        }\n\n        if ($chunk['analysis_status'] !== 'completed') {\n            return false;\n        }\n\n        $doc = $this->getDocument((int) $chunk['dokumentation_id']);\n        $text = $this->prepareTextForEmbedding($chunk, $doc);\n        $embedding = $this->ollama->getEmbedding($text);\n        $payload = $this->buildPayload($chunk, $doc);\n        $qdrantId = $chunk['qdrant_id'] ?? $this->qdrant->generateUuid();\n\n        $success = $this->qdrant->upsertPoint(self::COLLECTION, $qdrantId, $embedding, $payload);\n\n        if ($success && $chunk['qdrant_id'] === null) {\n            $this->updateQdrantId($chunkId, $qdrantId);\n        }\n\n        return $success;\n    }\n\n    \/**\n     * Syncs all analyzed chunks that haven't been synced yet.\n     *\n     * @return array{synced: int, failed: int, errors: array<string>}\n     *\/\n    public function syncAllPending(int $limit = 100): array\n    {\n        $this->ensureCollection();\n\n        $results = ['synced' => 0, 'failed' => 0, 'errors' => []];\n        $chunks = $this->getUnsyncedChunks($limit);\n\n        foreach ($chunks as $chunk) {\n            try {\n                if ($this->syncChunk((int) $chunk['id'])) {\n                    $results['synced']++;\n\n                    if ($results['synced'] % self::BATCH_SIZE === 0) {\n                        echo \"Synced {$results['synced']} chunks...\\n\";\n                    }\n                } else {\n                    $results['failed']++;\n                    $results['errors'][] = \"Chunk #{$chunk['id']}: Sync failed\";\n                }\n            } catch (\\RuntimeException $e) {\n                $results['failed']++;\n                $results['errors'][] = \"Chunk #{$chunk['id']}: \" . $e->getMessage();\n            }\n        }\n\n        return $results;\n    }\n\n    \/**\n     * Syncs all chunks (re-sync).\n     *\n     * @return array{synced: int, failed: int, errors: array<string>}\n     *\/\n    public function syncAll(): array\n    {\n        $this->ensureCollection();\n\n        $results = ['synced' => 0, 'failed' => 0, 'errors' => []];\n        $chunks = $this->getAllAnalyzedChunks();\n\n        foreach ($chunks as $chunk) {\n            try {\n                if ($this->syncChunk((int) $chunk['id'])) {\n                    $results['synced']++;\n\n                    if ($results['synced'] % self::BATCH_SIZE === 0) {\n                        echo \"Synced {$results['synced']} chunks...\\n\";\n                    }\n                } else {\n                    $results['failed']++;\n                }\n            } catch (\\RuntimeException $e) {\n                $results['failed']++;\n                $results['errors'][] = \"Chunk #{$chunk['id']}: \" . $e->getMessage();\n            }\n        }\n\n        return $results;\n    }\n\n    \/**\n     * Gets collection statistics.\n     *\n     * @return array{points_count: int, status: string}|null\n     *\/\n    public function getStats(): ?array\n    {\n        return $this->qdrant->getCollectionStats(self::COLLECTION);\n    }\n\n    \/**\n     * Prepares text for embedding.\n     *\n     * @param array<string, mixed> $chunk\n     * @param array<string, mixed> $doc\n     *\/\n    private function prepareTextForEmbedding(array $chunk, array $doc): string\n    {\n        $parts = [];\n\n        $parts[] = 'Dokument: ' . ($doc['title'] ?? '');\n\n        $headingPath = $this->decodeJsonArray($chunk['heading_path'] ?? null);\n        if (!empty($headingPath)) {\n            $parts[] = 'Abschnitt: ' . implode(' > ', $headingPath);\n        }\n\n        $taxonomy = $this->decodeJsonArray($chunk['taxonomy_path'] ?? null);\n        if (!empty($taxonomy)) {\n            $parts[] = 'Kategorie: ' . implode(' > ', $taxonomy);\n        }\n\n        $keywords = $this->decodeJsonArray($chunk['keywords'] ?? null);\n        if (!empty($keywords)) {\n            $parts[] = 'Keywords: ' . implode(', ', $keywords);\n        }\n\n        $content = $chunk['content_clean'] ?? $chunk['content'] ?? '';\n        $content = $this->sanitizeForEmbedding($content);\n        if (mb_strlen($content) > 1000) {\n            $content = mb_substr($content, 0, 1000) . '...';\n        }\n        $parts[] = 'Inhalt: ' . $content;\n\n        $text = implode(\"\\n\\n\", $parts);\n\n        if (mb_strlen($text) > 1800) {\n            $text = mb_substr($text, 0, 1800) . '...';\n        }\n\n        return $text;\n    }\n\n    \/**\n     * Sanitizes text for embedding by removing problematic characters.\n     *\/\n    private function sanitizeForEmbedding(string $text): string\n    {\n        $text = preg_replace('\/[\\x{2500}-\\x{257F}]\/u', ' ', $text) ?? $text;\n        $text = preg_replace('\/[\\x{2580}-\\x{259F}]\/u', ' ', $text) ?? $text;\n        $text = preg_replace('\/[\\x{25A0}-\\x{25FF}]\/u', ' ', $text) ?? $text;\n        $text = preg_replace('\/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F]\/u', '', $text) ?? $text;\n        $text = preg_replace('\/[ \\t]+\/', ' ', $text) ?? $text;\n        $text = preg_replace('\/\\n{3,}\/', \"\\n\\n\", $text) ?? $text;\n        $text = mb_convert_encoding($text, 'UTF-8', 'UTF-8');\n\n        return trim($text);\n    }\n\n    \/**\n     * Builds the Qdrant payload.\n     *\n     * @param array<string, mixed> $chunk\n     * @param array<string, mixed> $doc\n     * @return array<string, mixed>\n     *\/\n    private function buildPayload(array $chunk, array $doc): array\n    {\n        $content = $chunk['content_clean'] ?? $chunk['content'] ?? '';\n        $content = $this->sanitizeForEmbedding($content);\n        $preview = mb_strlen($content) > 300 ? mb_substr($content, 0, 300) . '...' : $content;\n\n        return [\n            'chunk_id' => (int) $chunk['id'],\n            'doc_id' => (int) $chunk['dokumentation_id'],\n            'chunk_index' => (int) $chunk['chunk_index'],\n            'path' => $doc['path'] ?? '',\n            'title' => $doc['title'] ?? '',\n            'content_preview' => $preview,\n            'heading_path' => $this->decodeJsonArray($chunk['heading_path'] ?? null),\n            'taxonomy_category' => $chunk['taxonomy_category'] ?? null,\n            'taxonomy' => $this->decodeJsonArray($chunk['taxonomy_path'] ?? null),\n            'entities' => $this->decodeJsonArray($chunk['entities'] ?? null),\n            'keywords' => $this->decodeJsonArray($chunk['keywords'] ?? null),\n            'token_count' => (int) ($chunk['token_count'] ?? 0),\n        ];\n    }\n\n    \/**\n     * Updates the qdrant_id in the database.\n     *\/\n    private function updateQdrantId(int $chunkId, string $qdrantId): void\n    {\n        $stmt = $this->pdo->prepare('UPDATE dokumentation_chunks SET qdrant_id = :qid WHERE id = :id');\n        $stmt->execute(['id' => $chunkId, 'qid' => $qdrantId]);\n    }\n\n    \/**\n     * Gets a chunk by ID.\n     *\n     * @return array<string, mixed>|null\n     *\/\n    private function getChunk(int $id): ?array\n    {\n        $stmt = $this->pdo->prepare('SELECT * FROM dokumentation_chunks WHERE id = :id');\n        $stmt->execute(['id' => $id]);\n        $result = $stmt->fetch(PDO::FETCH_ASSOC);\n\n        return $result !== false ? $result : null;\n    }\n\n    \/**\n     * Gets a document by ID.\n     *\n     * @return array<string, mixed>\n     *\/\n    private function getDocument(int $id): array\n    {\n        $stmt = $this->pdo->prepare('SELECT * FROM dokumentation WHERE id = :id');\n        $stmt->execute(['id' => $id]);\n        $result = $stmt->fetch(PDO::FETCH_ASSOC);\n\n        return $result !== false ? $result : [];\n    }\n\n    \/**\n     * Gets unsynced chunks (analyzed but not in Qdrant).\n     *\n     * @return array<array<string, mixed>>\n     *\/\n    private function getUnsyncedChunks(int $limit): array\n    {\n        $stmt = $this->pdo->prepare(\"\n            SELECT * FROM dokumentation_chunks\n            WHERE analysis_status = 'completed' AND qdrant_id IS NULL\n            ORDER BY dokumentation_id, chunk_index\n            LIMIT :limit\n        \");\n        $stmt->bindValue('limit', $limit, PDO::PARAM_INT);\n        $stmt->execute();\n\n        return $stmt->fetchAll(PDO::FETCH_ASSOC);\n    }\n\n    \/**\n     * Gets all analyzed chunks.\n     *\n     * @return array<array<string, mixed>>\n     *\/\n    private function getAllAnalyzedChunks(): array\n    {\n        $stmt = $this->pdo->query(\"\n            SELECT * FROM dokumentation_chunks\n            WHERE analysis_status = 'completed'\n            ORDER BY dokumentation_id, chunk_index\n        \");\n\n        return $stmt->fetchAll(PDO::FETCH_ASSOC);\n    }\n}\n"
    }
}

Response

{
    "tool_response": {
        "type": "update",
        "filePath": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Docs\/ChunkSyncService.php",
        "content": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\Docs;\n\n\/\/ @responsibility: Synchronisiert Chunks mit Embeddings nach Qdrant\n\nuse Infrastructure\\AI\\OllamaService;\nuse Infrastructure\\AI\\QdrantClient;\nuse Infrastructure\\Traits\\JsonDecodeTrait;\nuse PDO;\n\nfinal class ChunkSyncService\n{\n    use JsonDecodeTrait;\n\n    private const string COLLECTION = 'dokumentation_chunks';\n    private const int VECTOR_SIZE = 1024;\n    private const int BATCH_SIZE = 10;\n\n    public function __construct(\n        private PDO $pdo,\n        private OllamaService $ollama,\n        private QdrantClient $qdrant\n    ) {\n    }\n\n    \/**\n     * Ensures the Qdrant collection exists with proper configuration.\n     *\/\n    public function ensureCollection(): bool\n    {\n        return $this->qdrant->ensureCollection(self::COLLECTION, self::VECTOR_SIZE);\n    }\n\n    \/**\n     * Syncs a single chunk to Qdrant.\n     *\/\n    public function syncChunk(int $chunkId): bool\n    {\n        $chunk = $this->getChunk($chunkId);\n\n        if ($chunk === null) {\n            return false;\n        }\n\n        if ($chunk['analysis_status'] !== 'completed') {\n            return false;\n        }\n\n        $doc = $this->getDocument((int) $chunk['dokumentation_id']);\n        $text = $this->prepareTextForEmbedding($chunk, $doc);\n        $embedding = $this->ollama->getEmbedding($text);\n        $payload = $this->buildPayload($chunk, $doc);\n        $qdrantId = $chunk['qdrant_id'] ?? $this->qdrant->generateUuid();\n\n        $success = $this->qdrant->upsertPoint(self::COLLECTION, $qdrantId, $embedding, $payload);\n\n        if ($success && $chunk['qdrant_id'] === null) {\n            $this->updateQdrantId($chunkId, $qdrantId);\n        }\n\n        return $success;\n    }\n\n    \/**\n     * Syncs all analyzed chunks that haven't been synced yet.\n     *\n     * @return array{synced: int, failed: int, errors: array<string>}\n     *\/\n    public function syncAllPending(int $limit = 100): array\n    {\n        $this->ensureCollection();\n\n        $results = ['synced' => 0, 'failed' => 0, 'errors' => []];\n        $chunks = $this->getUnsyncedChunks($limit);\n\n        foreach ($chunks as $chunk) {\n            try {\n                if ($this->syncChunk((int) $chunk['id'])) {\n                    $results['synced']++;\n\n                    if ($results['synced'] % self::BATCH_SIZE === 0) {\n                        echo \"Synced {$results['synced']} chunks...\\n\";\n                    }\n                } else {\n                    $results['failed']++;\n                    $results['errors'][] = \"Chunk #{$chunk['id']}: Sync failed\";\n                }\n            } catch (\\RuntimeException $e) {\n                $results['failed']++;\n                $results['errors'][] = \"Chunk #{$chunk['id']}: \" . $e->getMessage();\n            }\n        }\n\n        return $results;\n    }\n\n    \/**\n     * Syncs all chunks (re-sync).\n     *\n     * @return array{synced: int, failed: int, errors: array<string>}\n     *\/\n    public function syncAll(): array\n    {\n        $this->ensureCollection();\n\n        $results = ['synced' => 0, 'failed' => 0, 'errors' => []];\n        $chunks = $this->getAllAnalyzedChunks();\n\n        foreach ($chunks as $chunk) {\n            try {\n                if ($this->syncChunk((int) $chunk['id'])) {\n                    $results['synced']++;\n\n                    if ($results['synced'] % self::BATCH_SIZE === 0) {\n                        echo \"Synced {$results['synced']} chunks...\\n\";\n                    }\n                } else {\n                    $results['failed']++;\n                }\n            } catch (\\RuntimeException $e) {\n                $results['failed']++;\n                $results['errors'][] = \"Chunk #{$chunk['id']}: \" . $e->getMessage();\n            }\n        }\n\n        return $results;\n    }\n\n    \/**\n     * Gets collection statistics.\n     *\n     * @return array{points_count: int, status: string}|null\n     *\/\n    public function getStats(): ?array\n    {\n        return $this->qdrant->getCollectionStats(self::COLLECTION);\n    }\n\n    \/**\n     * Prepares text for embedding.\n     *\n     * @param array<string, mixed> $chunk\n     * @param array<string, mixed> $doc\n     *\/\n    private function prepareTextForEmbedding(array $chunk, array $doc): string\n    {\n        $parts = [];\n\n        $parts[] = 'Dokument: ' . ($doc['title'] ?? '');\n\n        $headingPath = $this->decodeJsonArray($chunk['heading_path'] ?? null);\n        if (!empty($headingPath)) {\n            $parts[] = 'Abschnitt: ' . implode(' > ', $headingPath);\n        }\n\n        $taxonomy = $this->decodeJsonArray($chunk['taxonomy_path'] ?? null);\n        if (!empty($taxonomy)) {\n            $parts[] = 'Kategorie: ' . implode(' > ', $taxonomy);\n        }\n\n        $keywords = $this->decodeJsonArray($chunk['keywords'] ?? null);\n        if (!empty($keywords)) {\n            $parts[] = 'Keywords: ' . implode(', ', $keywords);\n        }\n\n        $content = $chunk['content_clean'] ?? $chunk['content'] ?? '';\n        $content = $this->sanitizeForEmbedding($content);\n        if (mb_strlen($content) > 1000) {\n            $content = mb_substr($content, 0, 1000) . '...';\n        }\n        $parts[] = 'Inhalt: ' . $content;\n\n        $text = implode(\"\\n\\n\", $parts);\n\n        if (mb_strlen($text) > 1800) {\n            $text = mb_substr($text, 0, 1800) . '...';\n        }\n\n        return $text;\n    }\n\n    \/**\n     * Sanitizes text for embedding by removing problematic characters.\n     *\/\n    private function sanitizeForEmbedding(string $text): string\n    {\n        $text = preg_replace('\/[\\x{2500}-\\x{257F}]\/u', ' ', $text) ?? $text;\n        $text = preg_replace('\/[\\x{2580}-\\x{259F}]\/u', ' ', $text) ?? $text;\n        $text = preg_replace('\/[\\x{25A0}-\\x{25FF}]\/u', ' ', $text) ?? $text;\n        $text = preg_replace('\/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F]\/u', '', $text) ?? $text;\n        $text = preg_replace('\/[ \\t]+\/', ' ', $text) ?? $text;\n        $text = preg_replace('\/\\n{3,}\/', \"\\n\\n\", $text) ?? $text;\n        $text = mb_convert_encoding($text, 'UTF-8', 'UTF-8');\n\n        return trim($text);\n    }\n\n    \/**\n     * Builds the Qdrant payload.\n     *\n     * @param array<string, mixed> $chunk\n     * @param array<string, mixed> $doc\n     * @return array<string, mixed>\n     *\/\n    private function buildPayload(array $chunk, array $doc): array\n    {\n        $content = $chunk['content_clean'] ?? $chunk['content'] ?? '';\n        $content = $this->sanitizeForEmbedding($content);\n        $preview = mb_strlen($content) > 300 ? mb_substr($content, 0, 300) . '...' : $content;\n\n        return [\n            'chunk_id' => (int) $chunk['id'],\n            'doc_id' => (int) $chunk['dokumentation_id'],\n            'chunk_index' => (int) $chunk['chunk_index'],\n            'path' => $doc['path'] ?? '',\n            'title' => $doc['title'] ?? '',\n            'content_preview' => $preview,\n            'heading_path' => $this->decodeJsonArray($chunk['heading_path'] ?? null),\n            'taxonomy_category' => $chunk['taxonomy_category'] ?? null,\n            'taxonomy' => $this->decodeJsonArray($chunk['taxonomy_path'] ?? null),\n            'entities' => $this->decodeJsonArray($chunk['entities'] ?? null),\n            'keywords' => $this->decodeJsonArray($chunk['keywords'] ?? null),\n            'token_count' => (int) ($chunk['token_count'] ?? 0),\n        ];\n    }\n\n    \/**\n     * Updates the qdrant_id in the database.\n     *\/\n    private function updateQdrantId(int $chunkId, string $qdrantId): void\n    {\n        $stmt = $this->pdo->prepare('UPDATE dokumentation_chunks SET qdrant_id = :qid WHERE id = :id');\n        $stmt->execute(['id' => $chunkId, 'qid' => $qdrantId]);\n    }\n\n    \/**\n     * Gets a chunk by ID.\n     *\n     * @return array<string, mixed>|null\n     *\/\n    private function getChunk(int $id): ?array\n    {\n        $stmt = $this->pdo->prepare('SELECT * FROM dokumentation_chunks WHERE id = :id');\n        $stmt->execute(['id' => $id]);\n        $result = $stmt->fetch(PDO::FETCH_ASSOC);\n\n        return $result !== false ? $result : null;\n    }\n\n    \/**\n     * Gets a document by ID.\n     *\n     * @return array<string, mixed>\n     *\/\n    private function getDocument(int $id): array\n    {\n        $stmt = $this->pdo->prepare('SELECT * FROM dokumentation WHERE id = :id');\n        $stmt->execute(['id' => $id]);\n        $result = $stmt->fetch(PDO::FETCH_ASSOC);\n\n        return $result !== false ? $result : [];\n    }\n\n    \/**\n     * Gets unsynced chunks (analyzed but not in Qdrant).\n     *\n     * @return array<array<string, mixed>>\n     *\/\n    private function getUnsyncedChunks(int $limit): array\n    {\n        $stmt = $this->pdo->prepare(\"\n            SELECT * FROM dokumentation_chunks\n            WHERE analysis_status = 'completed' AND qdrant_id IS NULL\n            ORDER BY dokumentation_id, chunk_index\n            LIMIT :limit\n        \");\n        $stmt->bindValue('limit', $limit, PDO::PARAM_INT);\n        $stmt->execute();\n\n        return $stmt->fetchAll(PDO::FETCH_ASSOC);\n    }\n\n    \/**\n     * Gets all analyzed chunks.\n     *\n     * @return array<array<string, mixed>>\n     *\/\n    private function getAllAnalyzedChunks(): array\n    {\n        $stmt = $this->pdo->query(\"\n            SELECT * FROM dokumentation_chunks\n            WHERE analysis_status = 'completed'\n            ORDER BY dokumentation_id, chunk_index\n        \");\n\n        return $stmt->fetchAll(PDO::FETCH_ASSOC);\n    }\n}\n",
        "structuredPatch": [
            {
                "oldStart": 7,
                "oldLines": 10,
                "newStart": 7,
                "newLines": 9,
                "lines": [
                    " \/\/ @responsibility: Synchronisiert Chunks mit Embeddings nach Qdrant",
                    " ",
                    " use Infrastructure\\AI\\OllamaService;",
                    "-use Infrastructure\\Config\\CredentialService;",
                    "+use Infrastructure\\AI\\QdrantClient;",
                    " use Infrastructure\\Traits\\JsonDecodeTrait;",
                    " use PDO;",
                    "-use RuntimeException;",
                    " ",
                    " final class ChunkSyncService",
                    " {"
                ]
            },
            {
                "oldStart": 18,
                "oldLines": 16,
                "newStart": 17,
                "newLines": 13,
                "lines": [
                    " ",
                    "     private const string COLLECTION = 'dokumentation_chunks';",
                    "     private const int VECTOR_SIZE = 1024;",
                    "-    private const int TIMEOUT = 60;",
                    "     private const int BATCH_SIZE = 10;",
                    " ",
                    "-    private string $qdrantHost;",
                    "-",
                    "     public function __construct(",
                    "         private PDO $pdo,",
                    "-        private OllamaService $ollama",
                    "+        private OllamaService $ollama,",
                    "+        private QdrantClient $qdrant",
                    "     ) {",
                    "-        $this->qdrantHost = CredentialService::getQdrantHost();",
                    "     }",
                    " ",
                    "     \/**"
                ]
            },
            {
                "oldStart": 35,
                "oldLines": 33,
                "newStart": 31,
                "newLines": 7,
                "lines": [
                    "      *\/",
                    "     public function ensureCollection(): bool",
                    "     {",
                    "-        \/\/ Check if collection exists",
                    "-        $url = sprintf('%s\/collections\/%s', $this->qdrantHost, self::COLLECTION);",
                    "-",
                    "-        try {",
                    "-            $response = $this->makeRequest($url, [], 'GET');",
                    "-            if (isset($response['result'])) {",
                    "-                return true; \/\/ Collection exists",
                    "-            }",
                    "-        } catch (RuntimeException) {",
                    "-            \/\/ Collection doesn't exist, create it",
                    "-        }",
                    "-",
                    "-        \/\/ Create collection",
                    "-        $payload = [",
                    "-            'vectors' => [",
                    "-                'size' => self::VECTOR_SIZE,",
                    "-                'distance' => 'Cosine',",
                    "-            ],",
                    "-        ];",
                    "-",
                    "-        try {",
                    "-            $this->makeRequest($url, $payload, 'PUT');",
                    "-",
                    "-            return true;",
                    "-        } catch (RuntimeException $e) {",
                    "-            throw new RuntimeException('Failed to create collection: ' . $e->getMessage());",
                    "-        }",
                    "+        return $this->qdrant->ensureCollection(self::COLLECTION, self::VECTOR_SIZE);",
                    "     }",
                    " ",
                    "     \/**"
                ]
            },
            {
                "oldStart": 75,
                "oldLines": 29,
                "newStart": 45,
                "newLines": 18,
                "lines": [
                    "             return false;",
                    "         }",
                    " ",
                    "-        \/\/ Only sync completed analyses",
                    "         if ($chunk['analysis_status'] !== 'completed') {",
                    "             return false;",
                    "         }",
                    " ",
                    "-        \/\/ Get document context",
                    "         $doc = $this->getDocument((int) $chunk['dokumentation_id']);",
                    "-",
                    "-        \/\/ Prepare text for embedding",
                    "         $text = $this->prepareTextForEmbedding($chunk, $doc);",
                    "-",
                    "-        \/\/ Generate embedding",
                    "         $embedding = $this->ollama->getEmbedding($text);",
                    "-",
                    "-        \/\/ Build payload with metadata",
                    "         $payload = $this->buildPayload($chunk, $doc);",
                    "+        $qdrantId = $chunk['qdrant_id'] ?? $this->qdrant->generateUuid();",
                    " ",
                    "-        \/\/ Generate UUID for Qdrant if not exists",
                    "-        $qdrantId = $chunk['qdrant_id'] ?? $this->generateUuid();",
                    "+        $success = $this->qdrant->upsertPoint(self::COLLECTION, $qdrantId, $embedding, $payload);",
                    " ",
                    "-        \/\/ Upsert to Qdrant",
                    "-        $success = $this->upsertPoint($qdrantId, $embedding, $payload);",
                    "-",
                    "         if ($success && $chunk['qdrant_id'] === null) {",
                    "             $this->updateQdrantId($chunkId, $qdrantId);",
                    "         }"
                ]
            },
            {
                "oldStart": 115,
                "oldLines": 7,
                "newStart": 74,
                "newLines": 6,
                "lines": [
                    "         $this->ensureCollection();",
                    " ",
                    "         $results = ['synced' => 0, 'failed' => 0, 'errors' => []];",
                    "-",
                    "         $chunks = $this->getUnsyncedChunks($limit);",
                    " ",
                    "         foreach ($chunks as $chunk) {"
                ]
            },
            {
                "oldStart": 130,
                "oldLines": 7,
                "newStart": 88,
                "newLines": 7,
                "lines": [
                    "                     $results['failed']++;",
                    "                     $results['errors'][] = \"Chunk #{$chunk['id']}: Sync failed\";",
                    "                 }",
                    "-            } catch (RuntimeException $e) {",
                    "+            } catch (\\RuntimeException $e) {",
                    "                 $results['failed']++;",
                    "                 $results['errors'][] = \"Chunk #{$chunk['id']}: \" . $e->getMessage();",
                    "             }"
                ]
            },
            {
                "oldStart": 149,
                "oldLines": 7,
                "newStart": 107,
                "newLines": 6,
                "lines": [
                    "         $this->ensureCollection();",
                    " ",
                    "         $results = ['synced' => 0, 'failed' => 0, 'errors' => []];",
                    "-",
                    "         $chunks = $this->getAllAnalyzedChunks();",
                    " ",
                    "         foreach ($chunks as $chunk) {"
                ]
            },
            {
                "oldStart": 163,
                "oldLines": 7,
                "newStart": 120,
                "newLines": 7,
                "lines": [
                    "                 } else {",
                    "                     $results['failed']++;",
                    "                 }",
                    "-            } catch (RuntimeException $e) {",
                    "+            } catch (\\RuntimeException $e) {",
                    "                 $results['failed']++;",
                    "                 $results['errors'][] = \"Chunk #{$chunk['id']}: \" . $e->getMessage();",
                    "             }"
                ]
            },
            {
                "oldStart": 173,
                "oldLines": 91,
                "newStart": 130,
                "newLines": 13,
                "lines": [
                    "     }",
                    " ",
                    "     \/**",
                    "-     * Searches for similar chunks using semantic search.",
                    "-     *",
                    "-     * @param array<string, mixed>|null $filter Optional filter for taxonomy\/entities",
                    "-     * @return array<array{id: int, doc_id: int, path: string, title: string, content: string, score: float, taxonomy: array<string>, entities: array<mixed>}>",
                    "-     *\/",
                    "-    public function search(string $query, int $limit = 5, ?array $filter = null): array",
                    "-    {",
                    "-        $embedding = $this->ollama->getEmbedding($query);",
                    "-",
                    "-        $url = sprintf('%s\/collections\/%s\/points\/search', $this->qdrantHost, self::COLLECTION);",
                    "-",
                    "-        $payload = [",
                    "-            'vector' => array_values($embedding),",
                    "-            'limit' => $limit,",
                    "-            'with_payload' => true,",
                    "-        ];",
                    "-",
                    "-        if ($filter !== null) {",
                    "-            $payload['filter'] = $filter;",
                    "-        }",
                    "-",
                    "-        $response = $this->makeRequest($url, $payload, 'POST');",
                    "-",
                    "-        if (!isset($response['result']) || !is_array($response['result'])) {",
                    "-            return [];",
                    "-        }",
                    "-",
                    "-        return array_map(static function (array $item): array {",
                    "-            $payload = $item['payload'] ?? [];",
                    "-",
                    "-            return [",
                    "-                'id' => (int) ($payload['chunk_id'] ?? 0),",
                    "-                'doc_id' => (int) ($payload['doc_id'] ?? 0),",
                    "-                'path' => (string) ($payload['path'] ?? ''),",
                    "-                'title' => (string) ($payload['title'] ?? ''),",
                    "-                'content' => (string) ($payload['content_preview'] ?? ''),",
                    "-                'score' => (float) ($item['score'] ?? 0),",
                    "-                'taxonomy' => is_array($payload['taxonomy'] ?? null) ? $payload['taxonomy'] : [],",
                    "-                'entities' => is_array($payload['entities'] ?? null) ? $payload['entities'] : [],",
                    "-            ];",
                    "-        }, $response['result']);",
                    "-    }",
                    "-",
                    "-    \/**",
                    "-     * Searches with taxonomy filter.",
                    "-     *",
                    "-     * @return array<array<string, mixed>>",
                    "-     *\/",
                    "-    public function searchByTaxonomy(string $query, string $category, int $limit = 5): array",
                    "-    {",
                    "-        $filter = [",
                    "-            'must' => [",
                    "-                [",
                    "-                    'key' => 'taxonomy_category',",
                    "-                    'match' => ['value' => $category],",
                    "-                ],",
                    "-            ],",
                    "-        ];",
                    "-",
                    "-        return $this->search($query, $limit, $filter);",
                    "-    }",
                    "-",
                    "-    \/**",
                    "      * Gets collection statistics.",
                    "      *",
                    "      * @return array{points_count: int, status: string}|null",
                    "      *\/",
                    "     public function getStats(): ?array",
                    "     {",
                    "-        $url = sprintf('%s\/collections\/%s', $this->qdrantHost, self::COLLECTION);",
                    "-",
                    "-        try {",
                    "-            $response = $this->makeRequest($url, [], 'GET');",
                    "-",
                    "-            if (!isset($response['result'])) {",
                    "-                return null;",
                    "-            }",
                    "-",
                    "-            return [",
                    "-                'points_count' => (int) ($response['result']['points_count'] ?? 0),",
                    "-                'status' => (string) ($response['result']['status'] ?? 'unknown'),",
                    "-            ];",
                    "-        } catch (RuntimeException) {",
                    "-            return null;",
                    "-        }",
                    "+        return $this->qdrant->getCollectionStats(self::COLLECTION);",
                    "     }",
                    " ",
                    "     \/**"
                ]
            },
            {
                "oldStart": 270,
                "oldLines": 28,
                "newStart": 149,
                "newLines": 23,
                "lines": [
                    "     {",
                    "         $parts = [];",
                    " ",
                    "-        \/\/ Document context",
                    "         $parts[] = 'Dokument: ' . ($doc['title'] ?? '');",
                    " ",
                    "-        \/\/ Heading path",
                    "         $headingPath = $this->decodeJsonArray($chunk['heading_path'] ?? null);",
                    "         if (!empty($headingPath)) {",
                    "             $parts[] = 'Abschnitt: ' . implode(' > ', $headingPath);",
                    "         }",
                    " ",
                    "-        \/\/ Taxonomy",
                    "         $taxonomy = $this->decodeJsonArray($chunk['taxonomy_path'] ?? null);",
                    "         if (!empty($taxonomy)) {",
                    "             $parts[] = 'Kategorie: ' . implode(' > ', $taxonomy);",
                    "         }",
                    " ",
                    "-        \/\/ Keywords",
                    "         $keywords = $this->decodeJsonArray($chunk['keywords'] ?? null);",
                    "         if (!empty($keywords)) {",
                    "             $parts[] = 'Keywords: ' . implode(', ', $keywords);",
                    "         }",
                    " ",
                    "-        \/\/ Main content - sanitize and limit",
                    "         $content = $chunk['content_clean'] ?? $chunk['content'] ?? '';",
                    "         $content = $this->sanitizeForEmbedding($content);",
                    "         if (mb_strlen($content) > 1000) {"
                ]
            },
            {
                "oldStart": 301,
                "oldLines": 7,
                "newStart": 175,
                "newLines": 6,
                "lines": [
                    " ",
                    "         $text = implode(\"\\n\\n\", $parts);",
                    " ",
                    "-        \/\/ Final safety limit for embedding model context",
                    "         if (mb_strlen($text) > 1800) {",
                    "             $text = mb_substr($text, 0, 1800) . '...';",
                    "         }"
                ]
            },
            {
                "oldStart": 314,
                "oldLines": 19,
                "newStart": 187,
                "newLines": 12,
                "lines": [
                    "      *\/",
                    "     private function sanitizeForEmbedding(string $text): string",
                    "     {",
                    "-        \/\/ Remove box-drawing and other problematic Unicode characters",
                    "-        $text = preg_replace('\/[\\x{2500}-\\x{257F}]\/u', ' ', $text) ?? $text; \/\/ Box Drawing",
                    "-        $text = preg_replace('\/[\\x{2580}-\\x{259F}]\/u', ' ', $text) ?? $text; \/\/ Block Elements",
                    "-        $text = preg_replace('\/[\\x{25A0}-\\x{25FF}]\/u', ' ', $text) ?? $text; \/\/ Geometric Shapes",
                    "-",
                    "-        \/\/ Remove control characters except newlines and tabs",
                    "+        $text = preg_replace('\/[\\x{2500}-\\x{257F}]\/u', ' ', $text) ?? $text;",
                    "+        $text = preg_replace('\/[\\x{2580}-\\x{259F}]\/u', ' ', $text) ?? $text;",
                    "+        $text = preg_replace('\/[\\x{25A0}-\\x{25FF}]\/u', ' ', $text) ?? $text;",
                    "         $text = preg_replace('\/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F]\/u', '', $text) ?? $text;",
                    "-",
                    "-        \/\/ Normalize whitespace",
                    "         $text = preg_replace('\/[ \\t]+\/', ' ', $text) ?? $text;",
                    "         $text = preg_replace('\/\\n{3,}\/', \"\\n\\n\", $text) ?? $text;",
                    "-",
                    "-        \/\/ Ensure valid UTF-8",
                    "         $text = mb_convert_encoding($text, 'UTF-8', 'UTF-8');",
                    " ",
                    "         return trim($text);"
                ]
            },
            {
                "oldStart": 362,
                "oldLines": 35,
                "newStart": 228,
                "newLines": 6,
                "lines": [
                    "     }",
                    " ",
                    "     \/**",
                    "-     * Upserts a point to Qdrant.",
                    "-     *",
                    "-     * @param array<int, float> $vector",
                    "-     * @param array<string, mixed> $payload",
                    "-     *\/",
                    "-    private function upsertPoint(string $id, array $vector, array $payload): bool",
                    "-    {",
                    "-        $url = sprintf('%s\/collections\/%s\/points', $this->qdrantHost, self::COLLECTION);",
                    "-",
                    "-        $data = [",
                    "-            'points' => [",
                    "-                [",
                    "-                    'id' => $id,",
                    "-                    'vector' => array_values($vector),",
                    "-                    'payload' => $payload,",
                    "-                ],",
                    "-            ],",
                    "-        ];",
                    "-",
                    "-        try {",
                    "-            $this->makeRequest($url, $data, 'PUT');",
                    "-",
                    "-            return true;",
                    "-        } catch (RuntimeException) {",
                    "-            return false;",
                    "-        }",
                    "-    }",
                    "-",
                    "-    \/**",
                    "      * Updates the qdrant_id in the database.",
                    "      *\/",
                    "     private function updateQdrantId(int $chunkId, string $qdrantId): void"
                ]
            },
            {
                "oldStart": 461,
                "oldLines": 80,
                "newStart": 298,
                "newLines": 4,
                "lines": [
                    " ",
                    "         return $stmt->fetchAll(PDO::FETCH_ASSOC);",
                    "     }",
                    "-",
                    "-    \/**",
                    "-     * Generates a UUID v4.",
                    "-     *\/",
                    "-    private function generateUuid(): string",
                    "-    {",
                    "-        $data = random_bytes(16);",
                    "-        $data[6] = chr((ord($data[6]) & 0x0f) | 0x40);",
                    "-        $data[8] = chr((ord($data[8]) & 0x3f) | 0x80);",
                    "-",
                    "-        return vsprintf('%s%s-%s-%s-%s-%s%s%s', str_split(bin2hex($data), 4));",
                    "-    }",
                    "-",
                    "-    \/**",
                    "-     * Makes an HTTP request to Qdrant.",
                    "-     *",
                    "-     * @param array<string, mixed> $payload",
                    "-     * @return array<string, mixed>",
                    "-     *\/",
                    "-    private function makeRequest(string $url, array $payload, string $method): array",
                    "-    {",
                    "-        $ch = curl_init($url);",
                    "-",
                    "-        if ($ch === false) {",
                    "-            throw new RuntimeException('Failed to initialize cURL');",
                    "-        }",
                    "-",
                    "-        $headers = ['Content-Type: application\/json'];",
                    "-",
                    "-        if ($method === 'GET') {",
                    "-            curl_setopt_array($ch, [",
                    "-                CURLOPT_RETURNTRANSFER => true,",
                    "-                CURLOPT_TIMEOUT => self::TIMEOUT,",
                    "-                CURLOPT_CONNECTTIMEOUT => 10,",
                    "-                CURLOPT_HTTPHEADER => $headers,",
                    "-                CURLOPT_CUSTOMREQUEST => 'GET',",
                    "-            ]);",
                    "-        } else {",
                    "-            $jsonPayload = json_encode($payload);",
                    "-",
                    "-            if ($jsonPayload === false) {",
                    "-                curl_close($ch);",
                    "-",
                    "-                throw new RuntimeException('Failed to encode JSON payload');",
                    "-            }",
                    "-",
                    "-            $headers[] = 'Content-Length: ' . strlen($jsonPayload);",
                    "-",
                    "-            curl_setopt_array($ch, [",
                    "-                CURLOPT_RETURNTRANSFER => true,",
                    "-                CURLOPT_TIMEOUT => self::TIMEOUT,",
                    "-                CURLOPT_CONNECTTIMEOUT => 10,",
                    "-                CURLOPT_HTTPHEADER => $headers,",
                    "-                CURLOPT_CUSTOMREQUEST => $method,",
                    "-                CURLOPT_POSTFIELDS => $jsonPayload,",
                    "-            ]);",
                    "-        }",
                    "-",
                    "-        $result = curl_exec($ch);",
                    "-        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);",
                    "-        $curlError = curl_error($ch);",
                    "-",
                    "-        curl_close($ch);",
                    "-",
                    "-        if ($result === false) {",
                    "-            throw new RuntimeException(sprintf('cURL request failed: %s', $curlError ?: 'Unknown error'));",
                    "-        }",
                    "-",
                    "-        if ($httpCode >= 400) {",
                    "-            throw new RuntimeException(sprintf('Qdrant API returned HTTP %d: %s', $httpCode, $result));",
                    "-        }",
                    "-",
                    "-        $decoded = json_decode((string) $result, true);",
                    "-",
                    "-        return is_array($decoded) ? $decoded : [];",
                    "-    }",
                    " }"
                ]
            }
        ],
        "originalFile": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\Docs;\n\n\/\/ @responsibility: Synchronisiert Chunks mit Embeddings nach Qdrant\n\nuse Infrastructure\\AI\\OllamaService;\nuse Infrastructure\\Config\\CredentialService;\nuse Infrastructure\\Traits\\JsonDecodeTrait;\nuse PDO;\nuse RuntimeException;\n\nfinal class ChunkSyncService\n{\n    use JsonDecodeTrait;\n\n    private const string COLLECTION = 'dokumentation_chunks';\n    private const int VECTOR_SIZE = 1024;\n    private const int TIMEOUT = 60;\n    private const int BATCH_SIZE = 10;\n\n    private string $qdrantHost;\n\n    public function __construct(\n        private PDO $pdo,\n        private OllamaService $ollama\n    ) {\n        $this->qdrantHost = CredentialService::getQdrantHost();\n    }\n\n    \/**\n     * Ensures the Qdrant collection exists with proper configuration.\n     *\/\n    public function ensureCollection(): bool\n    {\n        \/\/ Check if collection exists\n        $url = sprintf('%s\/collections\/%s', $this->qdrantHost, self::COLLECTION);\n\n        try {\n            $response = $this->makeRequest($url, [], 'GET');\n            if (isset($response['result'])) {\n                return true; \/\/ Collection exists\n            }\n        } catch (RuntimeException) {\n            \/\/ Collection doesn't exist, create it\n        }\n\n        \/\/ Create collection\n        $payload = [\n            'vectors' => [\n                'size' => self::VECTOR_SIZE,\n                'distance' => 'Cosine',\n            ],\n        ];\n\n        try {\n            $this->makeRequest($url, $payload, 'PUT');\n\n            return true;\n        } catch (RuntimeException $e) {\n            throw new RuntimeException('Failed to create collection: ' . $e->getMessage());\n        }\n    }\n\n    \/**\n     * Syncs a single chunk to Qdrant.\n     *\/\n    public function syncChunk(int $chunkId): bool\n    {\n        $chunk = $this->getChunk($chunkId);\n\n        if ($chunk === null) {\n            return false;\n        }\n\n        \/\/ Only sync completed analyses\n        if ($chunk['analysis_status'] !== 'completed') {\n            return false;\n        }\n\n        \/\/ Get document context\n        $doc = $this->getDocument((int) $chunk['dokumentation_id']);\n\n        \/\/ Prepare text for embedding\n        $text = $this->prepareTextForEmbedding($chunk, $doc);\n\n        \/\/ Generate embedding\n        $embedding = $this->ollama->getEmbedding($text);\n\n        \/\/ Build payload with metadata\n        $payload = $this->buildPayload($chunk, $doc);\n\n        \/\/ Generate UUID for Qdrant if not exists\n        $qdrantId = $chunk['qdrant_id'] ?? $this->generateUuid();\n\n        \/\/ Upsert to Qdrant\n        $success = $this->upsertPoint($qdrantId, $embedding, $payload);\n\n        if ($success && $chunk['qdrant_id'] === null) {\n            $this->updateQdrantId($chunkId, $qdrantId);\n        }\n\n        return $success;\n    }\n\n    \/**\n     * Syncs all analyzed chunks that haven't been synced yet.\n     *\n     * @return array{synced: int, failed: int, errors: array<string>}\n     *\/\n    public function syncAllPending(int $limit = 100): array\n    {\n        $this->ensureCollection();\n\n        $results = ['synced' => 0, 'failed' => 0, 'errors' => []];\n\n        $chunks = $this->getUnsyncedChunks($limit);\n\n        foreach ($chunks as $chunk) {\n            try {\n                if ($this->syncChunk((int) $chunk['id'])) {\n                    $results['synced']++;\n\n                    if ($results['synced'] % self::BATCH_SIZE === 0) {\n                        echo \"Synced {$results['synced']} chunks...\\n\";\n                    }\n                } else {\n                    $results['failed']++;\n                    $results['errors'][] = \"Chunk #{$chunk['id']}: Sync failed\";\n                }\n            } catch (RuntimeException $e) {\n                $results['failed']++;\n                $results['errors'][] = \"Chunk #{$chunk['id']}: \" . $e->getMessage();\n            }\n        }\n\n        return $results;\n    }\n\n    \/**\n     * Syncs all chunks (re-sync).\n     *\n     * @return array{synced: int, failed: int, errors: array<string>}\n     *\/\n    public function syncAll(): array\n    {\n        $this->ensureCollection();\n\n        $results = ['synced' => 0, 'failed' => 0, 'errors' => []];\n\n        $chunks = $this->getAllAnalyzedChunks();\n\n        foreach ($chunks as $chunk) {\n            try {\n                if ($this->syncChunk((int) $chunk['id'])) {\n                    $results['synced']++;\n\n                    if ($results['synced'] % self::BATCH_SIZE === 0) {\n                        echo \"Synced {$results['synced']} chunks...\\n\";\n                    }\n                } else {\n                    $results['failed']++;\n                }\n            } catch (RuntimeException $e) {\n                $results['failed']++;\n                $results['errors'][] = \"Chunk #{$chunk['id']}: \" . $e->getMessage();\n            }\n        }\n\n        return $results;\n    }\n\n    \/**\n     * Searches for similar chunks using semantic search.\n     *\n     * @param array<string, mixed>|null $filter Optional filter for taxonomy\/entities\n     * @return array<array{id: int, doc_id: int, path: string, title: string, content: string, score: float, taxonomy: array<string>, entities: array<mixed>}>\n     *\/\n    public function search(string $query, int $limit = 5, ?array $filter = null): array\n    {\n        $embedding = $this->ollama->getEmbedding($query);\n\n        $url = sprintf('%s\/collections\/%s\/points\/search', $this->qdrantHost, self::COLLECTION);\n\n        $payload = [\n            'vector' => array_values($embedding),\n            'limit' => $limit,\n            'with_payload' => true,\n        ];\n\n        if ($filter !== null) {\n            $payload['filter'] = $filter;\n        }\n\n        $response = $this->makeRequest($url, $payload, 'POST');\n\n        if (!isset($response['result']) || !is_array($response['result'])) {\n            return [];\n        }\n\n        return array_map(static function (array $item): array {\n            $payload = $item['payload'] ?? [];\n\n            return [\n                'id' => (int) ($payload['chunk_id'] ?? 0),\n                'doc_id' => (int) ($payload['doc_id'] ?? 0),\n                'path' => (string) ($payload['path'] ?? ''),\n                'title' => (string) ($payload['title'] ?? ''),\n                'content' => (string) ($payload['content_preview'] ?? ''),\n                'score' => (float) ($item['score'] ?? 0),\n                'taxonomy' => is_array($payload['taxonomy'] ?? null) ? $payload['taxonomy'] : [],\n                'entities' => is_array($payload['entities'] ?? null) ? $payload['entities'] : [],\n            ];\n        }, $response['result']);\n    }\n\n    \/**\n     * Searches with taxonomy filter.\n     *\n     * @return array<array<string, mixed>>\n     *\/\n    public function searchByTaxonomy(string $query, string $category, int $limit = 5): array\n    {\n        $filter = [\n            'must' => [\n                [\n                    'key' => 'taxonomy_category',\n                    'match' => ['value' => $category],\n                ],\n            ],\n        ];\n\n        return $this->search($query, $limit, $filter);\n    }\n\n    \/**\n     * Gets collection statistics.\n     *\n     * @return array{points_count: int, status: string}|null\n     *\/\n    public function getStats(): ?array\n    {\n        $url = sprintf('%s\/collections\/%s', $this->qdrantHost, self::COLLECTION);\n\n        try {\n            $response = $this->makeRequest($url, [], 'GET');\n\n            if (!isset($response['result'])) {\n                return null;\n            }\n\n            return [\n                'points_count' => (int) ($response['result']['points_count'] ?? 0),\n                'status' => (string) ($response['result']['status'] ?? 'unknown'),\n            ];\n        } catch (RuntimeException) {\n            return null;\n        }\n    }\n\n    \/**\n     * Prepares text for embedding.\n     *\n     * @param array<string, mixed> $chunk\n     * @param array<string, mixed> $doc\n     *\/\n    private function prepareTextForEmbedding(array $chunk, array $doc): string\n    {\n        $parts = [];\n\n        \/\/ Document context\n        $parts[] = 'Dokument: ' . ($doc['title'] ?? '');\n\n        \/\/ Heading path\n        $headingPath = $this->decodeJsonArray($chunk['heading_path'] ?? null);\n        if (!empty($headingPath)) {\n            $parts[] = 'Abschnitt: ' . implode(' > ', $headingPath);\n        }\n\n        \/\/ Taxonomy\n        $taxonomy = $this->decodeJsonArray($chunk['taxonomy_path'] ?? null);\n        if (!empty($taxonomy)) {\n            $parts[] = 'Kategorie: ' . implode(' > ', $taxonomy);\n        }\n\n        \/\/ Keywords\n        $keywords = $this->decodeJsonArray($chunk['keywords'] ?? null);\n        if (!empty($keywords)) {\n            $parts[] = 'Keywords: ' . implode(', ', $keywords);\n        }\n\n        \/\/ Main content - sanitize and limit\n        $content = $chunk['content_clean'] ?? $chunk['content'] ?? '';\n        $content = $this->sanitizeForEmbedding($content);\n        if (mb_strlen($content) > 1000) {\n            $content = mb_substr($content, 0, 1000) . '...';\n        }\n        $parts[] = 'Inhalt: ' . $content;\n\n        $text = implode(\"\\n\\n\", $parts);\n\n        \/\/ Final safety limit for embedding model context\n        if (mb_strlen($text) > 1800) {\n            $text = mb_substr($text, 0, 1800) . '...';\n        }\n\n        return $text;\n    }\n\n    \/**\n     * Sanitizes text for embedding by removing problematic characters.\n     *\/\n    private function sanitizeForEmbedding(string $text): string\n    {\n        \/\/ Remove box-drawing and other problematic Unicode characters\n        $text = preg_replace('\/[\\x{2500}-\\x{257F}]\/u', ' ', $text) ?? $text; \/\/ Box Drawing\n        $text = preg_replace('\/[\\x{2580}-\\x{259F}]\/u', ' ', $text) ?? $text; \/\/ Block Elements\n        $text = preg_replace('\/[\\x{25A0}-\\x{25FF}]\/u', ' ', $text) ?? $text; \/\/ Geometric Shapes\n\n        \/\/ Remove control characters except newlines and tabs\n        $text = preg_replace('\/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F]\/u', '', $text) ?? $text;\n\n       ... [TRUNCATED-13e08f81e4baea49]"
    }
}
← Vorheriger Zur Liste Nächster →