Protokoll #7864

ID7864
Zeitstempel2025-12-23 08:46:57.657118
Clientroot
IP
Warning: Undefined array key "request_ip" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13

Deprecated: htmlspecialchars(): Passing null to parameter #1 ($string) of type string is deprecated in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13
Modellclaude-sonnet-4-20250514
Statuscompleted
Tokens3,031 (Input: 0, Output: 0)
Dauer90 ms
Request-Zeit
Warning: Undefined array key "request_timestamp" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 18
Response-Zeit-

Warning: Undefined array key "error_message" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 22

Request

{
    "event": "PreToolUse",
    "tool_name": "Edit",
    "tool_input": {
        "file_path": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Docs\/ChunkingService.php",
        "old_string": "            'pending_analysis' => (int) ($result['pending_analysis'] ?? 0),\n            'completed_analysis' => (int) ($result['completed_analysis'] ?? 0),\n        ];\n    }\n\n    private function createConnection(): PDO\n    {\n        return \\Infrastructure\\Config\\DatabaseFactory::dev();\n    }\n}",
        "new_string": "            'pending_analysis' => (int) ($result['pending_analysis'] ?? 0),\n            'completed_analysis' => (int) ($result['completed_analysis'] ?? 0),\n        ];\n    }\n}"
    }
}

Response

{
    "tool_response": {
        "filePath": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Docs\/ChunkingService.php",
        "oldString": "            'pending_analysis' => (int) ($result['pending_analysis'] ?? 0),\n            'completed_analysis' => (int) ($result['completed_analysis'] ?? 0),\n        ];\n    }\n\n    private function createConnection(): PDO\n    {\n        return \\Infrastructure\\Config\\DatabaseFactory::dev();\n    }\n}",
        "newString": "            'pending_analysis' => (int) ($result['pending_analysis'] ?? 0),\n            'completed_analysis' => (int) ($result['completed_analysis'] ?? 0),\n        ];\n    }\n}",
        "originalFile": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\Docs;\n\n\/\/ @responsibility: Zerlegt Dokumentation in embedding-fähige Chunks\n\nuse Infrastructure\\Persistence\\DokumentationRepository;\nuse PDO;\nuse RuntimeException;\n\nfinal class ChunkingService\n{\n    private const int MAX_CHUNK_TOKENS = 400;\n    private const int MIN_CHUNK_TOKENS = 50;\n    private const float CHARS_PER_TOKEN = 4.0;\n\n    public function __construct(\n        private PDO $pdo,\n        private DokumentationRepository $repo\n    ) {}\n\n    \/**\n     * Chunks a single document and stores in database.\n     *\n     * @return array{chunks_created: int, tokens_total: int}\n     *\/\n    public function chunkDocument(int $docId): array\n    {\n        $doc = $this->repo->findById($docId);\n\n        if ($doc === null) {\n            throw new RuntimeException(\"Document #{$docId} not found\");\n        }\n\n        \/\/ Delete existing chunks for this document\n        $this->deleteChunksForDocument($docId);\n\n        \/\/ Parse and chunk the content\n        $chunks = $this->parseHtmlToChunks($doc['content'], $doc['title']);\n\n        \/\/ Store chunks\n        $tokensTotal = 0;\n        foreach ($chunks as $index => $chunk) {\n            $this->storeChunk($docId, $index, $chunk);\n            $tokensTotal += $chunk['token_count'];\n        }\n\n        return [\n            'chunks_created' => count($chunks),\n            'tokens_total' => $tokensTotal,\n        ];\n    }\n\n    \/**\n     * Chunks all documents in the hierarchy.\n     *\n     * @return array{documents: int, chunks: int, tokens: int, errors: array<string>}\n     *\/\n    public function chunkAll(): array\n    {\n        $hierarchy = $this->repo->getHierarchy();\n        $results = ['documents' => 0, 'chunks' => 0, 'tokens' => 0, 'errors' => []];\n\n        $this->processHierarchy($hierarchy, $results);\n\n        return $results;\n    }\n\n    \/**\n     * @param array<array<string, mixed>> $items\n     * @param array{documents: int, chunks: int, tokens: int, errors: array<string>} $results\n     *\/\n    private function processHierarchy(array $items, array &$results): void\n    {\n        foreach ($items as $item) {\n            try {\n                $result = $this->chunkDocument((int) $item['id']);\n                $results['documents']++;\n                $results['chunks'] += $result['chunks_created'];\n                $results['tokens'] += $result['tokens_total'];\n            } catch (RuntimeException $e) {\n                $results['errors'][] = \"Doc #{$item['id']}: \" . $e->getMessage();\n            }\n\n            if (!empty($item['children'])) {\n                $this->processHierarchy($item['children'], $results);\n            }\n        }\n    }\n\n    \/**\n     * Parses HTML content into structured chunks.\n     *\n     * @return array<array{content: string, content_clean: string, heading_path: array<string>, token_count: int}>\n     *\/\n    private function parseHtmlToChunks(string $html, string $docTitle): array\n    {\n        $chunks = [];\n        $currentHeadingPath = [$docTitle];\n\n        \/\/ Strip PHP code if present\n        $html = preg_replace('\/<\\?php.*?\\?>\/s', '', $html) ?? $html;\n\n        \/\/ Split by headings (h1-h4)\n        $pattern = '\/(<h[1-4][^>]*>.*?<\\\/h[1-4]>)\/is';\n        $parts = preg_split($pattern, $html, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);\n\n        if ($parts === false) {\n            $parts = [$html];\n        }\n\n        $currentContent = '';\n\n        foreach ($parts as $part) {\n            \/\/ Check if this is a heading\n            if (preg_match('\/<h([1-4])[^>]*>(.*?)<\\\/h[1-4]>\/is', $part, $matches)) {\n                \/\/ Save previous content as chunk if substantial\n                if (!empty(trim($currentContent))) {\n                    $chunk = $this->createChunk($currentContent, $currentHeadingPath);\n                    if ($chunk !== null) {\n                        $chunks[] = $chunk;\n                    }\n                }\n\n                \/\/ Update heading path based on level\n                $level = (int) $matches[1];\n                $headingText = strip_tags($matches[2]);\n                $headingText = html_entity_decode($headingText, ENT_QUOTES | ENT_HTML5, 'UTF-8');\n                $headingText = trim($headingText);\n\n                \/\/ Adjust heading path based on level\n                $currentHeadingPath = array_slice($currentHeadingPath, 0, $level);\n                $currentHeadingPath[$level] = $headingText;\n\n                $currentContent = '';\n            } else {\n                $currentContent .= $part;\n            }\n        }\n\n        \/\/ Don't forget the last chunk\n        if (!empty(trim($currentContent))) {\n            $chunk = $this->createChunk($currentContent, $currentHeadingPath);\n            if ($chunk !== null) {\n                $chunks[] = $chunk;\n            }\n        }\n\n        \/\/ If no chunks created, create one from the whole content\n        if (empty($chunks)) {\n            $chunk = $this->createChunk($html, [$docTitle]);\n            if ($chunk !== null) {\n                $chunks[] = $chunk;\n            }\n        }\n\n        \/\/ Split large chunks\n        $chunks = $this->splitLargeChunks($chunks);\n\n        return $chunks;\n    }\n\n    \/**\n     * Creates a chunk array from content.\n     *\n     * @param array<string> $headingPath\n     * @return array{content: string, content_clean: string, heading_path: array<string>, token_count: int}|null\n     *\/\n    private function createChunk(string $content, array $headingPath): ?array\n    {\n        $cleanContent = $this->cleanHtml($content);\n\n        if (empty(trim($cleanContent))) {\n            return null;\n        }\n\n        $tokenCount = $this->estimateTokens($cleanContent);\n\n        if ($tokenCount < self::MIN_CHUNK_TOKENS) {\n            return null;\n        }\n\n        return [\n            'content' => trim($content),\n            'content_clean' => $cleanContent,\n            'heading_path' => array_values(array_filter($headingPath)),\n            'token_count' => $tokenCount,\n        ];\n    }\n\n    \/**\n     * Splits chunks that exceed the maximum token limit.\n     *\n     * @param array<array{content: string, content_clean: string, heading_path: array<string>, token_count: int}> $chunks\n     * @return array<array{content: string, content_clean: string, heading_path: array<string>, token_count: int}>\n     *\/\n    private function splitLargeChunks(array $chunks): array\n    {\n        $result = [];\n\n        foreach ($chunks as $chunk) {\n            if ($chunk['token_count'] <= self::MAX_CHUNK_TOKENS) {\n                $result[] = $chunk;\n                continue;\n            }\n\n            \/\/ Split by paragraphs or sentences\n            $paragraphs = preg_split('\/\\n\\n+\/', $chunk['content_clean']);\n            if ($paragraphs === false) {\n                $paragraphs = [$chunk['content_clean']];\n            }\n\n            $currentText = '';\n            $currentTokens = 0;\n\n            foreach ($paragraphs as $para) {\n                $paraTokens = $this->estimateTokens($para);\n\n                if ($currentTokens + $paraTokens > self::MAX_CHUNK_TOKENS && $currentTokens > 0) {\n                    \/\/ Save current chunk\n                    $result[] = [\n                        'content' => $currentText,\n                        'content_clean' => $currentText,\n                        'heading_path' => $chunk['heading_path'],\n                        'token_count' => $currentTokens,\n                    ];\n                    $currentText = $para;\n                    $currentTokens = $paraTokens;\n                } else {\n                    $currentText .= ($currentText !== '' ? \"\\n\\n\" : '') . $para;\n                    $currentTokens += $paraTokens;\n                }\n            }\n\n            \/\/ Don't forget the last part\n            if ($currentTokens >= self::MIN_CHUNK_TOKENS) {\n                $result[] = [\n                    'content' => $currentText,\n                    'content_clean' => $currentText,\n                    'heading_path' => $chunk['heading_path'],\n                    'token_count' => $currentTokens,\n                ];\n            }\n        }\n\n        return $result;\n    }\n\n    \/**\n     * Cleans HTML to plain text.\n     *\/\n    private function cleanHtml(string $html): string\n    {\n        \/\/ Remove scripts and styles\n        $html = preg_replace('\/<script[^>]*>.*?<\\\/script>\/is', '', $html) ?? $html;\n        $html = preg_replace('\/<style[^>]*>.*?<\\\/style>\/is', '', $html) ?? $html;\n\n        \/\/ Convert common elements to text equivalents\n        $html = preg_replace('\/<br\\s*\\\/?>\/i', \"\\n\", $html) ?? $html;\n        $html = preg_replace('\/<\\\/p>\/i', \"\\n\\n\", $html) ?? $html;\n        $html = preg_replace('\/<\\\/div>\/i', \"\\n\", $html) ?? $html;\n        $html = preg_replace('\/<\\\/li>\/i', \"\\n\", $html) ?? $html;\n        $html = preg_replace('\/<\\\/tr>\/i', \"\\n\", $html) ?? $html;\n\n        \/\/ Preserve code blocks\n        $html = preg_replace('\/<pre[^>]*>(.*?)<\\\/pre>\/is', \"\\n```\\n$1\\n```\\n\", $html) ?? $html;\n        $html = preg_replace('\/<code[^>]*>(.*?)<\\\/code>\/is', '`$1`', $html) ?? $html;\n\n        \/\/ Strip remaining tags\n        $text = strip_tags($html);\n\n        \/\/ Decode entities\n        $text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');\n\n        \/\/ Normalize whitespace\n        $text = preg_replace('\/[ \\t]+\/', ' ', $text) ?? $text;\n        $text = preg_replace('\/\\n{3,}\/', \"\\n\\n\", $text) ?? $text;\n\n        return trim($text);\n    }\n\n    \/**\n     * Estimates token count for text.\n     *\/\n    private function estimateTokens(string $text): int\n    {\n        return (int) ceil(mb_strlen($text) \/ self::CHARS_PER_TOKEN);\n    }\n\n    \/**\n     * Stores a chunk in the database.\n     *\n     * @param array{content: string, content_clean: string, heading_path: array<string>, token_count: int} $chunk\n     *\/\n    private function storeChunk(int $docId, int $index, array $chunk): void\n    {\n        $sql = \"INSERT INTO dokumentation_chunks\n                (dokumentation_id, chunk_index, content, content_clean, token_count, heading_path, analysis_status... [TRUNCATED-bc30a42310740911]",
        "structuredPatch": [
            {
                "oldStart": 345,
                "oldLines": 9,
                "newStart": 345,
                "newLines": 4,
                "lines": [
                    "             'completed_analysis' => (int) ($result['completed_analysis'] ?? 0),",
                    "         ];",
                    "     }",
                    "-",
                    "-    private function createConnection(): PDO",
                    "-    {",
                    "-        return \\Infrastructure\\Config\\DatabaseFactory::dev();",
                    "-    }",
                    " }"
                ]
            }
        ],
        "userModified": false,
        "replaceAll": false
    }
}
← Vorheriger Zur Liste Nächster →