{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Docs\/ChunkAnalyzer.php",
"content": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\Docs;\n\n\/\/ @responsibility: Analysiert Chunks via LLM (Taxonomie, Entities, Keywords)\n\nuse Infrastructure\\AI\\OllamaService;\nuse RuntimeException;\n\nfinal class ChunkAnalyzer\n{\n private const string TAXONOMY_MODEL = 'gemma3:4b-it-qat';\n private const int MAX_RETRIES = 3;\n\n public function __construct(\n private OllamaService $ollama\n ) {\n }\n\n \/**\n * Performs LLM analysis on chunk content.\n *\n * @param array<string, mixed> $chunk\n * @param array{title: string, path: string} $docContext\n * @return array{taxonomy: array<string>, entities: array<array{name: string, type: string}>, keywords: array<string>}\n *\/\n public function analyze(array $chunk, array $docContext): array\n {\n $content = $chunk['content_clean'] ?? $chunk['content'];\n $headingPath = $this->decodeJsonArray($chunk['heading_path'] ?? null);\n\n $context = sprintf(\n \"Dokument: %s\\nPfad: %s\\nAbschnitt: %s\\n\\nInhalt:\\n%s\",\n $docContext['title'],\n $docContext['path'],\n implode(' > ', $headingPath),\n $content\n );\n\n $prompt = $this->buildAnalysisPrompt($context);\n $response = $this->callLlmWithRetry($prompt);\n $analysis = $this->parseAnalysisResponse($response);\n\n \/\/ Fallback: If no taxonomy, derive from document path\n if (empty($analysis['taxonomy'])) {\n $analysis['taxonomy'] = $this->deriveTaxonomyFromPath($docContext['path']);\n }\n\n return $analysis;\n }\n\n \/**\n * Builds the analysis prompt.\n *\/\n private function buildAnalysisPrompt(string $context): string\n {\n return <<<PROMPT\n Analysiere den folgenden technischen Dokumentationsabschnitt und extrahiere strukturierte Informationen.\n\n {$context}\n\n Antworte NUR mit einem JSON-Objekt in diesem exakten Format (keine Erklärungen):\n {\n \"taxonomy\": [\"Hauptkategorie\", \"Unterkategorie\", \"Thema\"],\n \"entities\": [\n {\"name\": \"Entitätsname\", \"type\": \"TECHNOLOGY|CONCEPT|CONFIG|COMMAND|SERVICE\"}\n ],\n \"keywords\": [\"keyword1\", \"keyword2\", \"keyword3\"]\n }\n\n Regeln:\n - taxonomy: Hierarchische Klassifikation (3 Ebenen: Bereich > Modul > Thema)\n - entities: Wichtige Technologien, Konzepte, Konfigurationen, Befehle, Dienste\n - keywords: 3-5 relevante Suchbegriffe\n - Antworte NUR mit dem JSON, keine anderen Texte\n PROMPT;\n }\n\n \/**\n * Calls the LLM with retry logic.\n *\/\n private function callLlmWithRetry(string $prompt): string\n {\n $lastError = new RuntimeException('No attempts made');\n\n for ($attempt = 1; $attempt <= self::MAX_RETRIES; $attempt++) {\n try {\n return $this->ollama->generate($prompt, self::TAXONOMY_MODEL);\n } catch (RuntimeException $e) {\n $lastError = $e;\n if ($attempt < self::MAX_RETRIES) {\n usleep(500000 * $attempt); \/\/ Progressive backoff\n }\n }\n }\n\n throw new RuntimeException(\n 'LLM call failed after ' . self::MAX_RETRIES . ' attempts: ' . $lastError->getMessage()\n );\n }\n\n \/**\n * Parses the LLM response into structured data.\n *\n * @return array{taxonomy: array<string>, entities: array<array{name: string, type: string}>, keywords: array<string>}\n *\/\n private function parseAnalysisResponse(string $response): array\n {\n $default = [\n 'taxonomy' => [],\n 'entities' => [],\n 'keywords' => [],\n ];\n\n \/\/ Extract JSON from response (handle markdown code blocks)\n $json = $response;\n if (preg_match('\/```(?:json)?\\s*([\\s\\S]*?)\\s*```\/', $response, $matches)) {\n $json = $matches[1];\n } elseif (preg_match('\/\\{[\\s\\S]*\\}\/', $response, $matches)) {\n $json = $matches[0];\n }\n\n $decoded = json_decode($json, true);\n\n if (!is_array($decoded)) {\n return $default;\n }\n\n return [\n 'taxonomy' => $this->validateArray($decoded['taxonomy'] ?? []),\n 'entities' => $this->validateEntities($decoded['entities'] ?? []),\n 'keywords' => $this->validateArray($decoded['keywords'] ?? []),\n ];\n }\n\n \/**\n * Validates an array of strings.\n *\n * @param mixed $arr\n * @return array<string>\n *\/\n private function validateArray(mixed $arr): array\n {\n if (!is_array($arr)) {\n return [];\n }\n\n return array_values(array_filter(\n $arr,\n static fn ($item): bool => is_string($item) && trim($item) !== ''\n ));\n }\n\n \/**\n * Validates entities array.\n *\n * @param mixed $entities\n * @return array<array{name: string, type: string}>\n *\/\n private function validateEntities(mixed $entities): array\n {\n if (!is_array($entities)) {\n return [];\n }\n\n $result = [];\n foreach ($entities as $entity) {\n if (is_array($entity) && isset($entity['name']) && is_string($entity['name'])) {\n $result[] = [\n 'name' => trim($entity['name']),\n 'type' => isset($entity['type']) && is_string($entity['type'])\n ? strtoupper($entity['type'])\n : 'OTHER',\n ];\n }\n }\n\n return $result;\n }\n\n \/**\n * Derives taxonomy from document path.\n *\n * @return array<string>\n *\/\n private function deriveTaxonomyFromPath(string $path): array\n {\n $parts = array_filter(explode('\/', trim($path, '\/')));\n\n $mapping = [\n 'server' => 'Server',\n 'modul' => 'Module',\n 'anwendungen' => 'Anwendungen',\n 'mcp' => 'MCP-Server',\n 'ki-tasks' => 'KI-Tasks',\n ];\n\n $taxonomy = [];\n foreach ($parts as $part) {\n $taxonomy[] = $mapping[$part] ?? ucfirst($part);\n }\n\n return array_slice($taxonomy, 0, 3);\n }\n\n \/**\n * Decodes JSON array safely.\n *\n * @return array<string>\n *\/\n private function decodeJsonArray(?string $json): array\n {\n if ($json === null || $json === '') {\n return [];\n }\n\n $decoded = json_decode($json, true);\n\n return is_array($decoded) ? $decoded : [];\n }\n}\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Docs\/ChunkAnalyzer.php",
"content": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\Docs;\n\n\/\/ @responsibility: Analysiert Chunks via LLM (Taxonomie, Entities, Keywords)\n\nuse Infrastructure\\AI\\OllamaService;\nuse RuntimeException;\n\nfinal class ChunkAnalyzer\n{\n private const string TAXONOMY_MODEL = 'gemma3:4b-it-qat';\n private const int MAX_RETRIES = 3;\n\n public function __construct(\n private OllamaService $ollama\n ) {\n }\n\n \/**\n * Performs LLM analysis on chunk content.\n *\n * @param array<string, mixed> $chunk\n * @param array{title: string, path: string} $docContext\n * @return array{taxonomy: array<string>, entities: array<array{name: string, type: string}>, keywords: array<string>}\n *\/\n public function analyze(array $chunk, array $docContext): array\n {\n $content = $chunk['content_clean'] ?? $chunk['content'];\n $headingPath = $this->decodeJsonArray($chunk['heading_path'] ?? null);\n\n $context = sprintf(\n \"Dokument: %s\\nPfad: %s\\nAbschnitt: %s\\n\\nInhalt:\\n%s\",\n $docContext['title'],\n $docContext['path'],\n implode(' > ', $headingPath),\n $content\n );\n\n $prompt = $this->buildAnalysisPrompt($context);\n $response = $this->callLlmWithRetry($prompt);\n $analysis = $this->parseAnalysisResponse($response);\n\n \/\/ Fallback: If no taxonomy, derive from document path\n if (empty($analysis['taxonomy'])) {\n $analysis['taxonomy'] = $this->deriveTaxonomyFromPath($docContext['path']);\n }\n\n return $analysis;\n }\n\n \/**\n * Builds the analysis prompt.\n *\/\n private function buildAnalysisPrompt(string $context): string\n {\n return <<<PROMPT\n Analysiere den folgenden technischen Dokumentationsabschnitt und extrahiere strukturierte Informationen.\n\n {$context}\n\n Antworte NUR mit einem JSON-Objekt in diesem exakten Format (keine Erklärungen):\n {\n \"taxonomy\": [\"Hauptkategorie\", \"Unterkategorie\", \"Thema\"],\n \"entities\": [\n {\"name\": \"Entitätsname\", \"type\": \"TECHNOLOGY|CONCEPT|CONFIG|COMMAND|SERVICE\"}\n ],\n \"keywords\": [\"keyword1\", \"keyword2\", \"keyword3\"]\n }\n\n Regeln:\n - taxonomy: Hierarchische Klassifikation (3 Ebenen: Bereich > Modul > Thema)\n - entities: Wichtige Technologien, Konzepte, Konfigurationen, Befehle, Dienste\n - keywords: 3-5 relevante Suchbegriffe\n - Antworte NUR mit dem JSON, keine anderen Texte\n PROMPT;\n }\n\n \/**\n * Calls the LLM with retry logic.\n *\/\n private function callLlmWithRetry(string $prompt): string\n {\n $lastError = new RuntimeException('No attempts made');\n\n for ($attempt = 1; $attempt <= self::MAX_RETRIES; $attempt++) {\n try {\n return $this->ollama->generate($prompt, self::TAXONOMY_MODEL);\n } catch (RuntimeException $e) {\n $lastError = $e;\n if ($attempt < self::MAX_RETRIES) {\n usleep(500000 * $attempt); \/\/ Progressive backoff\n }\n }\n }\n\n throw new RuntimeException(\n 'LLM call failed after ' . self::MAX_RETRIES . ' attempts: ' . $lastError->getMessage()\n );\n }\n\n \/**\n * Parses the LLM response into structured data.\n *\n * @return array{taxonomy: array<string>, entities: array<array{name: string, type: string}>, keywords: array<string>}\n *\/\n private function parseAnalysisResponse(string $response): array\n {\n $default = [\n 'taxonomy' => [],\n 'entities' => [],\n 'keywords' => [],\n ];\n\n \/\/ Extract JSON from response (handle markdown code blocks)\n $json = $response;\n if (preg_match('\/```(?:json)?\\s*([\\s\\S]*?)\\s*```\/', $response, $matches)) {\n $json = $matches[1];\n } elseif (preg_match('\/\\{[\\s\\S]*\\}\/', $response, $matches)) {\n $json = $matches[0];\n }\n\n $decoded = json_decode($json, true);\n\n if (!is_array($decoded)) {\n return $default;\n }\n\n return [\n 'taxonomy' => $this->validateArray($decoded['taxonomy'] ?? []),\n 'entities' => $this->validateEntities($decoded['entities'] ?? []),\n 'keywords' => $this->validateArray($decoded['keywords'] ?? []),\n ];\n }\n\n \/**\n * Validates an array of strings.\n *\n * @param mixed $arr\n * @return array<string>\n *\/\n private function validateArray(mixed $arr): array\n {\n if (!is_array($arr)) {\n return [];\n }\n\n return array_values(array_filter(\n $arr,\n static fn ($item): bool => is_string($item) && trim($item) !== ''\n ));\n }\n\n \/**\n * Validates entities array.\n *\n * @param mixed $entities\n * @return array<array{name: string, type: string}>\n *\/\n private function validateEntities(mixed $entities): array\n {\n if (!is_array($entities)) {\n return [];\n }\n\n $result = [];\n foreach ($entities as $entity) {\n if (is_array($entity) && isset($entity['name']) && is_string($entity['name'])) {\n $result[] = [\n 'name' => trim($entity['name']),\n 'type' => isset($entity['type']) && is_string($entity['type'])\n ? strtoupper($entity['type'])\n : 'OTHER',\n ];\n }\n }\n\n return $result;\n }\n\n \/**\n * Derives taxonomy from document path.\n *\n * @return array<string>\n *\/\n private function deriveTaxonomyFromPath(string $path): array\n {\n $parts = array_filter(explode('\/', trim($path, '\/')));\n\n $mapping = [\n 'server' => 'Server',\n 'modul' => 'Module',\n 'anwendungen' => 'Anwendungen',\n 'mcp' => 'MCP-Server',\n 'ki-tasks' => 'KI-Tasks',\n ];\n\n $taxonomy = [];\n foreach ($parts as $part) {\n $taxonomy[] = $mapping[$part] ?? ucfirst($part);\n }\n\n return array_slice($taxonomy, 0, 3);\n }\n\n \/**\n * Decodes JSON array safely.\n *\n * @return array<string>\n *\/\n private function decodeJsonArray(?string $json): array\n {\n if ($json === null || $json === '') {\n return [];\n }\n\n $decoded = json_decode($json, true);\n\n return is_array($decoded) ? $decoded : [];\n }\n}\n",
"structuredPatch": [],
"originalFile": null
}
}