Protokoll #7864
| ID | 7864 |
|---|---|
| Zeitstempel | 2025-12-23 08:46:57.657118 |
| Client | root |
| IP | Warning: Undefined array key "request_ip" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 Deprecated: htmlspecialchars(): Passing null to parameter #1 ($string) of type string is deprecated in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 |
| Modell | claude-sonnet-4-20250514 |
| Status | completed |
| Tokens | 3,031 (Input: 0, Output: 0) |
| Dauer | 90 ms |
| Request-Zeit | Warning: Undefined array key "request_timestamp" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 18 |
| Response-Zeit | - |
Warning: Undefined array key "error_message" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 22
Request
{
"event": "PreToolUse",
"tool_name": "Edit",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Docs\/ChunkingService.php",
"old_string": " 'pending_analysis' => (int) ($result['pending_analysis'] ?? 0),\n 'completed_analysis' => (int) ($result['completed_analysis'] ?? 0),\n ];\n }\n\n private function createConnection(): PDO\n {\n return \\Infrastructure\\Config\\DatabaseFactory::dev();\n }\n}",
"new_string": " 'pending_analysis' => (int) ($result['pending_analysis'] ?? 0),\n 'completed_analysis' => (int) ($result['completed_analysis'] ?? 0),\n ];\n }\n}"
}
}
Response
{
"tool_response": {
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Docs\/ChunkingService.php",
"oldString": " 'pending_analysis' => (int) ($result['pending_analysis'] ?? 0),\n 'completed_analysis' => (int) ($result['completed_analysis'] ?? 0),\n ];\n }\n\n private function createConnection(): PDO\n {\n return \\Infrastructure\\Config\\DatabaseFactory::dev();\n }\n}",
"newString": " 'pending_analysis' => (int) ($result['pending_analysis'] ?? 0),\n 'completed_analysis' => (int) ($result['completed_analysis'] ?? 0),\n ];\n }\n}",
"originalFile": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\Docs;\n\n\/\/ @responsibility: Zerlegt Dokumentation in embedding-fähige Chunks\n\nuse Infrastructure\\Persistence\\DokumentationRepository;\nuse PDO;\nuse RuntimeException;\n\nfinal class ChunkingService\n{\n private const int MAX_CHUNK_TOKENS = 400;\n private const int MIN_CHUNK_TOKENS = 50;\n private const float CHARS_PER_TOKEN = 4.0;\n\n public function __construct(\n private PDO $pdo,\n private DokumentationRepository $repo\n ) {}\n\n \/**\n * Chunks a single document and stores in database.\n *\n * @return array{chunks_created: int, tokens_total: int}\n *\/\n public function chunkDocument(int $docId): array\n {\n $doc = $this->repo->findById($docId);\n\n if ($doc === null) {\n throw new RuntimeException(\"Document #{$docId} not found\");\n }\n\n \/\/ Delete existing chunks for this document\n $this->deleteChunksForDocument($docId);\n\n \/\/ Parse and chunk the content\n $chunks = $this->parseHtmlToChunks($doc['content'], $doc['title']);\n\n \/\/ Store chunks\n $tokensTotal = 0;\n foreach ($chunks as $index => $chunk) {\n $this->storeChunk($docId, $index, $chunk);\n $tokensTotal += $chunk['token_count'];\n }\n\n return [\n 'chunks_created' => count($chunks),\n 'tokens_total' => $tokensTotal,\n ];\n }\n\n \/**\n * Chunks all documents in the hierarchy.\n *\n * @return array{documents: int, chunks: int, tokens: int, errors: array<string>}\n *\/\n public function chunkAll(): array\n {\n $hierarchy = $this->repo->getHierarchy();\n $results = ['documents' => 0, 'chunks' => 0, 'tokens' => 0, 'errors' => []];\n\n $this->processHierarchy($hierarchy, $results);\n\n return $results;\n }\n\n \/**\n * @param array<array<string, mixed>> $items\n * @param array{documents: int, chunks: int, tokens: int, errors: array<string>} $results\n *\/\n private function processHierarchy(array $items, array &$results): void\n {\n foreach ($items as $item) {\n try {\n $result = $this->chunkDocument((int) $item['id']);\n $results['documents']++;\n $results['chunks'] += $result['chunks_created'];\n $results['tokens'] += $result['tokens_total'];\n } catch (RuntimeException $e) {\n $results['errors'][] = \"Doc #{$item['id']}: \" . $e->getMessage();\n }\n\n if (!empty($item['children'])) {\n $this->processHierarchy($item['children'], $results);\n }\n }\n }\n\n \/**\n * Parses HTML content into structured chunks.\n *\n * @return array<array{content: string, content_clean: string, heading_path: array<string>, token_count: int}>\n *\/\n private function parseHtmlToChunks(string $html, string $docTitle): array\n {\n $chunks = [];\n $currentHeadingPath = [$docTitle];\n\n \/\/ Strip PHP code if present\n $html = preg_replace('\/<\\?php.*?\\?>\/s', '', $html) ?? $html;\n\n \/\/ Split by headings (h1-h4)\n $pattern = '\/(<h[1-4][^>]*>.*?<\\\/h[1-4]>)\/is';\n $parts = preg_split($pattern, $html, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);\n\n if ($parts === false) {\n $parts = [$html];\n }\n\n $currentContent = '';\n\n foreach ($parts as $part) {\n \/\/ Check if this is a heading\n if (preg_match('\/<h([1-4])[^>]*>(.*?)<\\\/h[1-4]>\/is', $part, $matches)) {\n \/\/ Save previous content as chunk if substantial\n if (!empty(trim($currentContent))) {\n $chunk = $this->createChunk($currentContent, $currentHeadingPath);\n if ($chunk !== null) {\n $chunks[] = $chunk;\n }\n }\n\n \/\/ Update heading path based on level\n $level = (int) $matches[1];\n $headingText = strip_tags($matches[2]);\n $headingText = html_entity_decode($headingText, ENT_QUOTES | ENT_HTML5, 'UTF-8');\n $headingText = trim($headingText);\n\n \/\/ Adjust heading path based on level\n $currentHeadingPath = array_slice($currentHeadingPath, 0, $level);\n $currentHeadingPath[$level] = $headingText;\n\n $currentContent = '';\n } else {\n $currentContent .= $part;\n }\n }\n\n \/\/ Don't forget the last chunk\n if (!empty(trim($currentContent))) {\n $chunk = $this->createChunk($currentContent, $currentHeadingPath);\n if ($chunk !== null) {\n $chunks[] = $chunk;\n }\n }\n\n \/\/ If no chunks created, create one from the whole content\n if (empty($chunks)) {\n $chunk = $this->createChunk($html, [$docTitle]);\n if ($chunk !== null) {\n $chunks[] = $chunk;\n }\n }\n\n \/\/ Split large chunks\n $chunks = $this->splitLargeChunks($chunks);\n\n return $chunks;\n }\n\n \/**\n * Creates a chunk array from content.\n *\n * @param array<string> $headingPath\n * @return array{content: string, content_clean: string, heading_path: array<string>, token_count: int}|null\n *\/\n private function createChunk(string $content, array $headingPath): ?array\n {\n $cleanContent = $this->cleanHtml($content);\n\n if (empty(trim($cleanContent))) {\n return null;\n }\n\n $tokenCount = $this->estimateTokens($cleanContent);\n\n if ($tokenCount < self::MIN_CHUNK_TOKENS) {\n return null;\n }\n\n return [\n 'content' => trim($content),\n 'content_clean' => $cleanContent,\n 'heading_path' => array_values(array_filter($headingPath)),\n 'token_count' => $tokenCount,\n ];\n }\n\n \/**\n * Splits chunks that exceed the maximum token limit.\n *\n * @param array<array{content: string, content_clean: string, heading_path: array<string>, token_count: int}> $chunks\n * @return array<array{content: string, content_clean: string, heading_path: array<string>, token_count: int}>\n *\/\n private function splitLargeChunks(array $chunks): array\n {\n $result = [];\n\n foreach ($chunks as $chunk) {\n if ($chunk['token_count'] <= self::MAX_CHUNK_TOKENS) {\n $result[] = $chunk;\n continue;\n }\n\n \/\/ Split by paragraphs or sentences\n $paragraphs = preg_split('\/\\n\\n+\/', $chunk['content_clean']);\n if ($paragraphs === false) {\n $paragraphs = [$chunk['content_clean']];\n }\n\n $currentText = '';\n $currentTokens = 0;\n\n foreach ($paragraphs as $para) {\n $paraTokens = $this->estimateTokens($para);\n\n if ($currentTokens + $paraTokens > self::MAX_CHUNK_TOKENS && $currentTokens > 0) {\n \/\/ Save current chunk\n $result[] = [\n 'content' => $currentText,\n 'content_clean' => $currentText,\n 'heading_path' => $chunk['heading_path'],\n 'token_count' => $currentTokens,\n ];\n $currentText = $para;\n $currentTokens = $paraTokens;\n } else {\n $currentText .= ($currentText !== '' ? \"\\n\\n\" : '') . $para;\n $currentTokens += $paraTokens;\n }\n }\n\n \/\/ Don't forget the last part\n if ($currentTokens >= self::MIN_CHUNK_TOKENS) {\n $result[] = [\n 'content' => $currentText,\n 'content_clean' => $currentText,\n 'heading_path' => $chunk['heading_path'],\n 'token_count' => $currentTokens,\n ];\n }\n }\n\n return $result;\n }\n\n \/**\n * Cleans HTML to plain text.\n *\/\n private function cleanHtml(string $html): string\n {\n \/\/ Remove scripts and styles\n $html = preg_replace('\/<script[^>]*>.*?<\\\/script>\/is', '', $html) ?? $html;\n $html = preg_replace('\/<style[^>]*>.*?<\\\/style>\/is', '', $html) ?? $html;\n\n \/\/ Convert common elements to text equivalents\n $html = preg_replace('\/<br\\s*\\\/?>\/i', \"\\n\", $html) ?? $html;\n $html = preg_replace('\/<\\\/p>\/i', \"\\n\\n\", $html) ?? $html;\n $html = preg_replace('\/<\\\/div>\/i', \"\\n\", $html) ?? $html;\n $html = preg_replace('\/<\\\/li>\/i', \"\\n\", $html) ?? $html;\n $html = preg_replace('\/<\\\/tr>\/i', \"\\n\", $html) ?? $html;\n\n \/\/ Preserve code blocks\n $html = preg_replace('\/<pre[^>]*>(.*?)<\\\/pre>\/is', \"\\n```\\n$1\\n```\\n\", $html) ?? $html;\n $html = preg_replace('\/<code[^>]*>(.*?)<\\\/code>\/is', '`$1`', $html) ?? $html;\n\n \/\/ Strip remaining tags\n $text = strip_tags($html);\n\n \/\/ Decode entities\n $text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');\n\n \/\/ Normalize whitespace\n $text = preg_replace('\/[ \\t]+\/', ' ', $text) ?? $text;\n $text = preg_replace('\/\\n{3,}\/', \"\\n\\n\", $text) ?? $text;\n\n return trim($text);\n }\n\n \/**\n * Estimates token count for text.\n *\/\n private function estimateTokens(string $text): int\n {\n return (int) ceil(mb_strlen($text) \/ self::CHARS_PER_TOKEN);\n }\n\n \/**\n * Stores a chunk in the database.\n *\n * @param array{content: string, content_clean: string, heading_path: array<string>, token_count: int} $chunk\n *\/\n private function storeChunk(int $docId, int $index, array $chunk): void\n {\n $sql = \"INSERT INTO dokumentation_chunks\n (dokumentation_id, chunk_index, content, content_clean, token_count, heading_path, analysis_status... [TRUNCATED-bc30a42310740911]",
"structuredPatch": [
{
"oldStart": 345,
"oldLines": 9,
"newStart": 345,
"newLines": 4,
"lines": [
" 'completed_analysis' => (int) ($result['completed_analysis'] ?? 0),",
" ];",
" }",
"-",
"- private function createConnection(): PDO",
"- {",
"- return \\Infrastructure\\Config\\DatabaseFactory::dev();",
"- }",
" }"
]
}
],
"userModified": false,
"replaceAll": false
}
}