Protokoll #7852

ID7852
Zeitstempel2025-12-23 08:45:35.622639
Clientroot
IP145.224.96.190
Modellclaude-sonnet-4-20250514
Statuspending
Tokens2,774 (Input: 2,774, Output: 0)
Dauer-
Request-Zeit2025-12-23 08:45:35.622639
Response-Zeit-

Request

{
    "event": "PostToolUse",
    "raw_data": {
        "session_id": "18ef5bf3-e872-4052-897e-53d6dc45b0b9",
        "transcript_path": "\/root\/.claude\/projects\/-var-www\/18ef5bf3-e872-4052-897e-53d6dc45b0b9.jsonl",
        "cwd": "\/var\/www\/dev.campus.systemische-tools.de",
        "permission_mode": "acceptEdits",
        "hook_event_name": "PostToolUse",
        "tool_name": "Read",
        "tool_input": {
            "file_path": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Docs\/ChunkingService.php"
        },
        "tool_response": {
            "type": "text",
            "file": {
                "filePath": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Docs\/ChunkingService.php",
                "content": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\Docs;\n\n\/\/ @responsibility: Zerlegt Dokumentation in embedding-fähige Chunks\n\nuse Infrastructure\\Persistence\\DokumentationRepository;\nuse PDO;\nuse RuntimeException;\n\nfinal class ChunkingService\n{\n    private const int MAX_CHUNK_TOKENS = 400;\n    private const int MIN_CHUNK_TOKENS = 50;\n    private const float CHARS_PER_TOKEN = 4.0;\n\n    private PDO $pdo;\n    private DokumentationRepository $repo;\n\n    public function __construct()\n    {\n        $this->repo = new DokumentationRepository();\n        $this->pdo = $this->createConnection();\n    }\n\n    \/**\n     * Chunks a single document and stores in database.\n     *\n     * @return array{chunks_created: int, tokens_total: int}\n     *\/\n    public function chunkDocument(int $docId): array\n    {\n        $doc = $this->repo->findById($docId);\n\n        if ($doc === null) {\n            throw new RuntimeException(\"Document #{$docId} not found\");\n        }\n\n        \/\/ Delete existing chunks for this document\n        $this->deleteChunksForDocument($docId);\n\n        \/\/ Parse and chunk the content\n        $chunks = $this->parseHtmlToChunks($doc['content'], $doc['title']);\n\n        \/\/ Store chunks\n        $tokensTotal = 0;\n        foreach ($chunks as $index => $chunk) {\n            $this->storeChunk($docId, $index, $chunk);\n            $tokensTotal += $chunk['token_count'];\n        }\n\n        return [\n            'chunks_created' => count($chunks),\n            'tokens_total' => $tokensTotal,\n        ];\n    }\n\n    \/**\n     * Chunks all documents in the hierarchy.\n     *\n     * @return array{documents: int, chunks: int, tokens: int, errors: array<string>}\n     *\/\n    public function chunkAll(): array\n    {\n        $hierarchy = $this->repo->getHierarchy();\n        $results = ['documents' => 0, 'chunks' => 0, 'tokens' => 0, 'errors' => []];\n\n        $this->processHierarchy($hierarchy, $results);\n\n        return $results;\n    }\n\n    \/**\n     * @param array<array<string, mixed>> $items\n     * @param array{documents: int, chunks: int, tokens: int, errors: array<string>} $results\n     *\/\n    private function processHierarchy(array $items, array &$results): void\n    {\n        foreach ($items as $item) {\n            try {\n                $result = $this->chunkDocument((int) $item['id']);\n                $results['documents']++;\n                $results['chunks'] += $result['chunks_created'];\n                $results['tokens'] += $result['tokens_total'];\n            } catch (RuntimeException $e) {\n                $results['errors'][] = \"Doc #{$item['id']}: \" . $e->getMessage();\n            }\n\n            if (!empty($item['children'])) {\n                $this->processHierarchy($item['children'], $results);\n            }\n        }\n    }\n\n    \/**\n     * Parses HTML content into structured chunks.\n     *\n     * @return array<array{content: string, content_clean: string, heading_path: array<string>, token_count: int}>\n     *\/\n    private function parseHtmlToChunks(string $html, string $docTitle): array\n    {\n        $chunks = [];\n        $currentHeadingPath = [$docTitle];\n\n        \/\/ Strip PHP code if present\n        $html = preg_replace('\/<\\?php.*?\\?>\/s', '', $html) ?? $html;\n\n        \/\/ Split by headings (h1-h4)\n        $pattern = '\/(<h[1-4][^>]*>.*?<\\\/h[1-4]>)\/is';\n        $parts = preg_split($pattern, $html, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);\n\n        if ($parts === false) {\n            $parts = [$html];\n        }\n\n        $currentContent = '';\n\n        foreach ($parts as $part) {\n            \/\/ Check if this is a heading\n            if (preg_match('\/<h([1-4])[^>]*>(.*?)<\\\/h[1-4]>\/is', $part, $matches)) {\n                \/\/ Save previous content as chunk if substantial\n                if (!empty(trim($currentContent))) {\n                    $chunk = $this->createChunk($currentContent, $currentHeadingPath);\n                    if ($chunk !== null) {\n                        $chunks[] = $chunk;\n                    }\n                }\n\n                \/\/ Update heading path based on level\n                $level = (int) $matches[1];\n                $headingText = strip_tags($matches[2]);\n                $headingText = html_entity_decode($headingText, ENT_QUOTES | ENT_HTML5, 'UTF-8');\n                $headingText = trim($headingText);\n\n                \/\/ Adjust heading path based on level\n                $currentHeadingPath = array_slice($currentHeadingPath, 0, $level);\n                $currentHeadingPath[$level] = $headingText;\n\n                $currentContent = '';\n            } else {\n                $currentContent .= $part;\n            }\n        }\n\n        \/\/ Don't forget the last chunk\n        if (!empty(trim($currentContent))) {\n            $chunk = $this->createChunk($currentContent, $currentHeadingPath);\n            if ($chunk !== null) {\n                $chunks[] = $chunk;\n            }\n        }\n\n        \/\/ If no chunks created, create one from the whole content\n        if (empty($chunks)) {\n            $chunk = $this->createChunk($html, [$docTitle]);\n            if ($chunk !== null) {\n                $chunks[] = $chunk;\n            }\n        }\n\n        \/\/ Split large chunks\n        $chunks = $this->splitLargeChunks($chunks);\n\n        return $chunks;\n    }\n\n    \/**\n     * Creates a chunk array from content.\n     *\n     * @param array<string> $headingPath\n     * @return array{content: string, content_clean: string, heading_path: array<string>, token_count: int}|null\n     *\/\n    private function createChunk(string $content, array $headingPath): ?array\n    {\n        $cleanContent = $this->cleanHtml($content);\n\n        if (empty(trim($cleanContent))) {\n            return null;\n        }\n\n        $tokenCount = $this->estimateTokens($cleanContent);\n\n        if ($tokenCount < self::MIN_CHUNK_TOKENS) {\n            return null;\n        }\n\n        return [\n            'content' => trim($content),\n            'content_clean' => $cleanContent,\n            'heading_path' => array_values(array_filter($headingPath)),\n            'token_count' => $tokenCount,\n        ];\n    }\n\n    \/**\n     * Splits chunks that exceed the maximum token limit.\n     *\n     * @param array<array{content: string, content_clean: string, heading_path: array<string>, token_count: int}> $chunks\n     * @return array<array{content: string, content_clean: string, heading_path: array<string>, token_count: int}>\n     *\/\n    private function splitLargeChunks(array $chunks): array\n    {\n        $result = [];\n\n        foreach ($chunks as $chunk) {\n            if ($chunk['token_count'] <= self::MAX_CHUNK_TOKENS) {\n                $result[] = $chunk;\n                continue;\n            }\n\n            \/\/ Split by paragraphs or sentences\n            $paragraphs = preg_split('\/\\n\\n+\/', $chunk['content_clean']);\n            if ($paragraphs === false) {\n                $paragraphs = [$chunk['content_clean']];\n            }\n\n            $currentText = '';\n            $currentTokens = 0;\n\n            foreach ($paragraphs as $para) {\n                $paraTokens = $this->estimateTokens($para);\n\n                if ($currentTokens + $paraTokens > self::MAX_CHUNK_TOKENS && $currentTokens > 0) {\n                    \/\/ Save current chunk\n                    $result[] = [\n                        'content' => $currentText,\n                        'content_clean' => $currentText,\n                        'heading_path' => $chunk['heading_path'],\n                        'token_count' => $currentTokens,\n                    ];\n                    $currentText = $para;\n                    $currentTokens = $paraTokens;\n                } else {\n                    $currentText .= ($currentText !== '' ? \"\\n\\n\" : '') . $para;\n                    $currentTokens += $paraTokens;\n                }\n            }\n\n            \/\/ Don't forget the last part\n            if ($currentTokens >= self::MIN_CHUNK_TOKENS) {\n                $result[] = [\n                    'content' => $currentText,\n                    'content_clean' => $currentText,\n                    'heading_path' => $chunk['heading_path'],\n                    'token_count' => $currentTokens,\n                ];\n            }\n        }\n\n        return $result;\n    }\n\n    \/**\n     * Cleans HTML to plain text.\n     *\/\n    private function cleanHtml(string $html): string\n    {\n        \/\/ Remove scripts and styles\n        $html = preg_replace('\/<script[^>]*>.*?<\\\/script>\/is', '', $html) ?? $html;\n        $html = preg_replace('\/<style[^>]*>.*?<\\\/style>\/is', '', $html) ?? $html;\n\n        \/\/ Convert common elements to text equivalents\n        $html = preg_replace('\/<br\\s*\\\/?>\/i', \"\\n\", $html) ?? $html;\n        $html = preg_replace('\/<\\\/p>\/i', \"\\n\\n\", $html) ?? $html;\n        $html = preg_replace('\/<\\\/div>\/i', \"\\n\", $html) ?? $html;\n        $html = preg_replace('\/<\\\/li>\/i', \"\\n\", $html) ?? $html;\n        $html = preg_replace('\/<\\\/tr>\/i', \"\\n\", $html) ?? $html;\n\n        \/\/ Preserve code blocks\n        $html = preg_replace('\/<pre[^>]*>(.*?)<\\\/pre>\/is', \"\\n```\\n$1\\n```\\n\", $html) ?? $html;\n        $html = preg_replace('\/<code[^>]*>(.*?)<\\\/code>\/is', '`$1`', $html) ?? $html;\n\n        \/\/ Strip remaining tags\n        $text = strip_tags($html);\n\n        \/\/ Decode entities\n        $text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');\n\n        \/\/ Normalize whitespace\n        $text = preg_replace('\/[ \\t]+\/', ' ', $text) ?? $text;\n        $text = preg_replace('\/\\n{3,}\/', \"\\n\\n\", $text) ?? $text;\n\n        return trim($text);\n    }\n\n    \/**\n     * Estimates token count for text.\n     *\/\n    private function estimateTokens(string $text): int\n    {\n        return (int) ceil(mb_strlen($text) \/ self::CHARS_PER_TOKEN);\n    }\n\n    \/**\n     * Stores a chunk in the database.\n     *\n     * @param array{content: string, content_clean: string, heading_path: array<string>, token_count: int} $chunk\n     *\/\n    private function storeChunk(int $docId, int $index, array $chunk): void\n    {\n        $sql = \"INSERT INTO dokumentation_chunks\n               ... [TRUNCATED-a7d31fc38cd1698c]",
                "numLines": 358,
                "startLine": 1,
                "totalLines": 358
            }
        },
        "tool_use_id": "toolu_01DCnhMGPMJnneXJ8ZHe4Wn8"
    }
}

Response

-
← Vorheriger Zur Liste Nächster →