Protokoll #24543
| ID | 24543 |
|---|---|
| Zeitstempel | 2025-12-27 23:46:59.482197 |
| Client | root |
| IP | Warning: Undefined array key "request_ip" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 Deprecated: htmlspecialchars(): Passing null to parameter #1 ($string) of type string is deprecated in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 |
| Modell | claude-sonnet-4-20250514 |
| Status | completed |
| Tokens | 2,820 (Input: 0, Output: 0) |
| Dauer | 94 ms |
| Request-Zeit | Warning: Undefined array key "request_timestamp" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 18 |
| Response-Zeit | - |
Warning: Undefined array key "error_message" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 22
Request
{
"event": "PreToolUse",
"tool_name": "Edit",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Docs\/ChunkSyncService.php",
"old_string": " if (mb_strlen($content) > 1000) {\n $content = mb_substr($content, 0, 1000) . '...';",
"new_string": " if (mb_strlen($content) > Constants::EMBEDDING_TEXT_LIMIT) {\n $content = mb_substr($content, 0, Constants::EMBEDDING_TEXT_LIMIT) . '...';"
}
}
Response
{
"tool_response": {
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Docs\/ChunkSyncService.php",
"oldString": " if (mb_strlen($content) > 1000) {\n $content = mb_substr($content, 0, 1000) . '...';",
"newString": " if (mb_strlen($content) > Constants::EMBEDDING_TEXT_LIMIT) {\n $content = mb_substr($content, 0, Constants::EMBEDDING_TEXT_LIMIT) . '...';",
"originalFile": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\Docs;\n\n\/\/ @responsibility: Synchronisiert Chunks mit Embeddings nach Qdrant\n\nuse Domain\\Constants;\nuse Infrastructure\\AI\\OllamaService;\nuse Infrastructure\\AI\\QdrantClient;\nuse Infrastructure\\Traits\\JsonDecodeTrait;\nuse PDO;\n\nfinal class ChunkSyncService\n{\n use JsonDecodeTrait;\n\n private const string COLLECTION = 'dokumentation_chunks';\n private const int VECTOR_SIZE = 1024;\n private const int BATCH_SIZE = 10;\n\n public function __construct(\n private PDO $pdo,\n private OllamaService $ollama,\n private QdrantClient $qdrant\n ) {\n }\n\n \/**\n * Ensures the Qdrant collection exists with proper configuration.\n *\/\n public function ensureCollection(): bool\n {\n return $this->qdrant->ensureCollection(self::COLLECTION, self::VECTOR_SIZE);\n }\n\n \/**\n * Syncs a single chunk to Qdrant.\n *\/\n public function syncChunk(int $chunkId): bool\n {\n $chunk = $this->getChunk($chunkId);\n\n if ($chunk === null) {\n return false;\n }\n\n if ($chunk['analysis_status'] !== 'completed') {\n return false;\n }\n\n $doc = $this->getDocument((int) $chunk['dokumentation_id']);\n $text = $this->prepareTextForEmbedding($chunk, $doc);\n $embedding = $this->ollama->getEmbedding($text);\n $payload = $this->buildPayload($chunk, $doc);\n $qdrantId = $chunk['qdrant_id'] ?? $this->qdrant->generateUuid();\n\n $success = $this->qdrant->upsertPoint(self::COLLECTION, $qdrantId, $embedding, $payload);\n\n if ($success && $chunk['qdrant_id'] === null) {\n $this->updateQdrantId($chunkId, $qdrantId);\n }\n\n return $success;\n }\n\n \/**\n * Syncs all analyzed chunks that haven't been synced yet.\n *\n * @return array{synced: int, failed: int, errors: array<string>}\n *\/\n public function syncAllPending(int $limit = Constants::DEFAULT_LIMIT): array\n {\n $this->ensureCollection();\n\n $results = ['synced' => 0, 'failed' => 0, 'errors' => []];\n $chunks = $this->getUnsyncedChunks($limit);\n\n foreach ($chunks as $chunk) {\n try {\n if ($this->syncChunk((int) $chunk['id'])) {\n $results['synced']++;\n\n if ($results['synced'] % self::BATCH_SIZE === 0) {\n echo \"Synced {$results['synced']} chunks...\\n\";\n }\n } else {\n $results['failed']++;\n $results['errors'][] = \"Chunk #{$chunk['id']}: Sync failed\";\n }\n } catch (\\RuntimeException $e) {\n $results['failed']++;\n $results['errors'][] = \"Chunk #{$chunk['id']}: \" . $e->getMessage();\n }\n }\n\n return $results;\n }\n\n \/**\n * Syncs all chunks (re-sync).\n *\n * @return array{synced: int, failed: int, errors: array<string>}\n *\/\n public function syncAll(): array\n {\n $this->ensureCollection();\n\n $results = ['synced' => 0, 'failed' => 0, 'errors' => []];\n $chunks = $this->getAllAnalyzedChunks();\n\n foreach ($chunks as $chunk) {\n try {\n if ($this->syncChunk((int) $chunk['id'])) {\n $results['synced']++;\n\n if ($results['synced'] % self::BATCH_SIZE === 0) {\n echo \"Synced {$results['synced']} chunks...\\n\";\n }\n } else {\n $results['failed']++;\n }\n } catch (\\RuntimeException $e) {\n $results['failed']++;\n $results['errors'][] = \"Chunk #{$chunk['id']}: \" . $e->getMessage();\n }\n }\n\n return $results;\n }\n\n \/**\n * Gets collection statistics.\n *\n * @return array{points_count: int, status: string}|null\n *\/\n public function getStats(): ?array\n {\n return $this->qdrant->getCollectionStats(self::COLLECTION);\n }\n\n \/**\n * Prepares text for embedding.\n *\n * @param array<string, mixed> $chunk\n * @param array<string, mixed> $doc\n *\/\n private function prepareTextForEmbedding(array $chunk, array $doc): string\n {\n $parts = [];\n\n $parts[] = 'Dokument: ' . ($doc['title'] ?? '');\n\n $headingPath = $this->decodeJsonArray($chunk['heading_path'] ?? null);\n if (!empty($headingPath)) {\n $parts[] = 'Abschnitt: ' . implode(' > ', $headingPath);\n }\n\n $taxonomy = $this->decodeJsonArray($chunk['taxonomy_path'] ?? null);\n if (!empty($taxonomy)) {\n $parts[] = 'Kategorie: ' . implode(' > ', $taxonomy);\n }\n\n $keywords = $this->decodeJsonArray($chunk['keywords'] ?? null);\n if (!empty($keywords)) {\n $parts[] = 'Keywords: ' . implode(', ', $keywords);\n }\n\n $content = $chunk['content_clean'] ?? $chunk['content'] ?? '';\n $content = $this->sanitizeForEmbedding($content);\n if (mb_strlen($content) > 1000) {\n $content = mb_substr($content, 0, 1000) . '...';\n }\n $parts[] = 'Inhalt: ' . $content;\n\n $text = implode(\"\\n\\n\", $parts);\n\n if (mb_strlen($text) > 1800) {\n $text = mb_substr($text, 0, 1800) . '...';\n }\n\n return $text;\n }\n\n \/**\n * Sanitizes text for embedding by removing problematic characters.\n *\/\n private function sanitizeForEmbedding(string $text): string\n {\n $text = preg_replace('\/[\\x{2500}-\\x{257F}]\/u', ' ', $text) ?? $text;\n $text = preg_replace('\/[\\x{2580}-\\x{259F}]\/u', ' ', $text) ?? $text;\n $text = preg_replace('\/[\\x{25A0}-\\x{25FF}]\/u', ' ', $text) ?? $text;\n $text = preg_replace('\/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F]\/u', '', $text) ?? $text;\n $text = preg_replace('\/[ \\t]+\/', ' ', $text) ?? $text;\n $text = preg_replace('\/\\n{3,}\/', \"\\n\\n\", $text) ?? $text;\n $text = mb_convert_encoding($text, 'UTF-8', 'UTF-8');\n\n return trim($text);\n }\n\n \/**\n * Builds the Qdrant payload.\n *\n * @param array<string, mixed> $chunk\n * @param array<string, mixed> $doc\n * @return array<string, mixed>\n *\/\n private function buildPayload(array $chunk, array $doc): array\n {\n $content = $chunk['content_clean'] ?? $chunk['content'] ?? '';\n $content = $this->sanitizeForEmbedding($content);\n $preview = mb_strlen($content) > 300 ? mb_substr($content, 0, 300) . '...' : $content;\n\n return [\n 'chunk_id' => (int) $chunk['id'],\n 'doc_id' => (int) $chunk['dokumentation_id'],\n 'chunk_index' => (int) $chunk['chunk_index'],\n 'path' => $doc['path'] ?? '',\n 'title' => $doc['title'] ?? '',\n 'content_preview' => $preview,\n 'heading_path' => $this->decodeJsonArray($chunk['heading_path'] ?? null),\n 'taxonomy_category' => $chunk['taxonomy_category'] ?? null,\n 'taxonomy' => $this->decodeJsonArray($chunk['taxonomy_path'] ?? null),\n 'entities' => $this->decodeJsonArray($chunk['entities'] ?? null),\n 'keywords' => $this->decodeJsonArray($chunk['keywords'] ?? null),\n 'token_count' => (int) ($chunk['token_count'] ?? 0),\n ];\n }\n\n \/**\n * Updates the qdrant_id in the database.\n *\/\n private function updateQdrantId(int $chunkId, string $qdrantId): void\n {\n $stmt = $this->pdo->prepare('UPDATE dokumentation_chunks SET qdrant_id = :qid WHERE id = :id');\n $stmt->execute(['id' => $chunkId, 'qid' => $qdrantId]);\n }\n\n \/**\n * Gets a chunk by ID.\n *\n * @return array<string, mixed>|null\n *\/\n private function getChunk(int $id): ?array\n {\n $stmt = $this->pdo->prepare('SELECT * FROM dokumentation_chunks WHERE id = :id');\n $stmt->execute(['id' => $id]);\n $result = $stmt->fetch(PDO::FETCH_ASSOC);\n\n return $result !== false ? $result : null;\n }\n\n \/**\n * Gets a document by ID.\n *\n * @return array<string, mixed>\n *\/\n private function getDocument(int $id): array\n {\n $stmt = $this->pdo->prepare('SELECT * FROM dokumentation WHERE id = :id');\n $stmt->execute(['id' => $id]);\n $result = $stmt->fetch(PDO::FETCH_ASSOC);\n\n return $result !== false ? $result : [];\n }\n\n \/**\n * Gets unsynced chunks (analyzed but not in Qdrant).\n *\n * @return array<array<string, mixed>>\n *\/\n private function getUnsyncedChunks(int $limit): array\n {\n $stmt = $this->pdo->prepare(\"\n SELECT * FROM dokumentation_chunks\n WHERE analysis_status = 'completed' AND qdrant_id IS NULL\n ORDER BY dokumentation_id, chunk_index\n LIMIT :limit\n \");\n $stmt->bindValue('limit', $limit, PDO::PARAM_INT);\n $stmt->execute();\n\n return $stmt->fetchAll(PDO::FETCH_ASSOC);\n }\n\n \/**\n * Gets all analyzed chunks.\n *\n * @return array<array<string, mixed>>\n *\/\n private function getAllAnalyzedChunks(): array\n {\n $stmt = $this->pdo->query(\"\n SELECT * FROM dokumentation_chunks\n WHERE analysis_status = 'completed'\n ORDER BY dokumentation_id, chunk_index\n \");\n\n return $stmt->fetchAll(PDO::FETCH_ASSOC);\n }\n}\n",
"structuredPatch": [
{
"oldStart": 169,
"oldLines": 8,
"newStart": 169,
"newLines": 8,
"lines": [
" ",
" $content = $chunk['content_clean'] ?? $chunk['content'] ?? '';",
" $content = $this->sanitizeForEmbedding($content);",
"- if (mb_strlen($content) > 1000) {",
"- $content = mb_substr($content, 0, 1000) . '...';",
"+ if (mb_strlen($content) > Constants::EMBEDDING_TEXT_LIMIT) {",
"+ $content = mb_substr($content, 0, Constants::EMBEDDING_TEXT_LIMIT) . '...';",
" }",
" $parts[] = 'Inhalt: ' . $content;",
" "
]
}
],
"userModified": false,
"replaceAll": false
}
}