{
"tool_response": {
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Docs\/ChunkAnalysisService.php",
"oldString": " \/**\n * Performs the actual LLM analysis.\n *\n * @param array<string, mixed> $chunk\n * @param array<string, mixed> $docContext\n * @return array{taxonomy: array<string>, entities: array<array{name: string, type: string}>, keywords: array<string>}\n *\/\n private function performAnalysis(array $chunk, array $docContext): array\n {\n $content = $chunk['content_clean'] ?? $chunk['content'];\n $headingPath = $this->decodeJsonArray($chunk['heading_path'] ?? null);\n\n \/\/ Build context\n $context = sprintf(\n \"Dokument: %s\\nPfad: %s\\nAbschnitt: %s\\n\\nInhalt:\\n%s\",\n $docContext['title'],\n $docContext['path'],\n implode(' > ', $headingPath),\n $content\n );\n\n \/\/ Combined analysis prompt for efficiency\n $prompt = $this->buildAnalysisPrompt($context);\n\n $response = $this->callLlmWithRetry($prompt, self::TAXONOMY_MODEL);\n $analysis = $this->parseAnalysisResponse($response);\n\n \/\/ Fallback: If no taxonomy, derive from document path\n if (empty($analysis['taxonomy'])) {\n $analysis['taxonomy'] = $this->deriveTaxonomyFromPath($docContext['path']);\n }\n\n return $analysis;\n }\n\n \/**\n * Builds the analysis prompt.\n *\/\n private function buildAnalysisPrompt(string $context): string\n {\n return <<<PROMPT\n Analysiere den folgenden technischen Dokumentationsabschnitt und extrahiere strukturierte Informationen.\n\n {$context}\n\n Antworte NUR mit einem JSON-Objekt in diesem exakten Format (keine Erklärungen):\n {\n \"taxonomy\": [\"Hauptkategorie\", \"Unterkategorie\", \"Thema\"],\n \"entities\": [\n {\"name\": \"Entitätsname\", \"type\": \"TECHNOLOGY|CONCEPT|CONFIG|COMMAND|SERVICE\"}\n ],\n \"keywords\": [\"keyword1\", \"keyword2\", \"keyword3\"]\n }\n\n Regeln:\n - taxonomy: Hierarchische Klassifikation (3 Ebenen: Bereich > Modul > Thema)\n - entities: Wichtige Technologien, Konzepte, Konfigurationen, Befehle, Dienste\n - keywords: 3-5 relevante Suchbegriffe\n - Antworte NUR mit dem JSON, keine anderen Texte\n PROMPT;\n }\n\n \/**\n * Calls the LLM with retry logic.\n *\/\n private function callLlmWithRetry(string $prompt, string $model): string\n {\n $lastError = new RuntimeException('No attempts made');\n\n for ($attempt = 1; $attempt <= self::MAX_RETRIES; $attempt++) {\n try {\n return $this->ollama->generate($prompt, $model);\n } catch (RuntimeException $e) {\n $lastError = $e;\n if ($attempt < self::MAX_RETRIES) {\n usleep(500000 * $attempt); \/\/ Progressive backoff\n }\n }\n }\n\n throw new RuntimeException('LLM call failed after ' . self::MAX_RETRIES . ' attempts: ' . $lastError->getMessage());\n }\n\n \/**\n * Parses the LLM response into structured data.\n *\n * @return array{taxonomy: array<string>, entities: array<array{name: string, type: string}>, keywords: array<string>}\n *\/\n private function parseAnalysisResponse(string $response): array\n {\n $default = [\n 'taxonomy' => [],\n 'entities' => [],\n 'keywords' => [],\n ];\n\n \/\/ Extract JSON from response (handle markdown code blocks)\n $json = $response;\n if (preg_match('\/```(?:json)?\\s*([\\s\\S]*?)\\s*```\/', $response, $matches)) {\n $json = $matches[1];\n } elseif (preg_match('\/\\{[\\s\\S]*\\}\/', $response, $matches)) {\n $json = $matches[0];\n }\n\n $decoded = json_decode($json, true);\n\n if (!is_array($decoded)) {\n return $default;\n }\n\n return [\n 'taxonomy' => $this->validateArray($decoded['taxonomy'] ?? [], 'string'),\n 'entities' => $this->validateEntities($decoded['entities'] ?? []),\n 'keywords' => $this->validateArray($decoded['keywords'] ?? [], 'string'),\n ];\n }\n\n \/**\n * Validates an array of strings.\n *\n * @param mixed $arr\n * @return array<string>\n *\/\n private function validateArray(mixed $arr, string $type): array\n {\n if (!is_array($arr)) {\n return [];\n }\n\n return array_values(array_filter($arr, static fn ($item): bool => is_string($item) && trim($item) !== ''));\n }\n\n \/**\n * Validates entities array.\n *\n * @param mixed $entities\n * @return array<array{name: string, type: string}>\n *\/\n private function validateEntities(mixed $entities): array\n {\n if (!is_array($entities)) {\n return [];\n }\n\n $result = [];\n foreach ($entities as $entity) {\n if (is_array($entity) && isset($entity['name']) && is_string($entity['name'])) {\n $result[] = [\n 'name' => trim($entity['name']),\n 'type' => isset($entity['type']) && is_string($entity['type']) ? strtoupper($entity['type']) : 'OTHER',\n ];\n }\n }\n\n return $result;\n }\n\n \/**\n * Derives taxonomy from document path.\n *\n * @return array<string>\n *\/\n private function deriveTaxonomyFromPath(string $path): array\n {\n $parts = array_filter(explode('\/', trim($path, '\/')));\n\n \/\/ Map common paths to categories\n $mapping = [\n 'server' => 'Server',\n 'modul' => 'Module',\n 'anwendungen' => 'Anwendungen',\n 'mcp' => 'MCP-Server',\n 'ki-tasks' => 'KI-Tasks',\n ];\n\n $taxonomy = [];\n foreach ($parts as $part) {\n $taxonomy[] = $mapping[$part] ?? ucfirst($part);\n }\n\n return array_slice($taxonomy, 0, 3);\n }\n\n \/**\n * Stores analysis results in the database.\n *\n * @param array{taxonomy: array<string>, entities: array<array{name: string, type: string}>, keywords: array<string>} $analysis\n *\/\n private function storeAnalysisResults(int $chunkId, array $analysis): void\n {\n $taxonomyPath = $analysis['taxonomy'];\n $taxonomyCategory = !empty($taxonomyPath) ? $taxonomyPath[0] : null;\n\n $sql = \"UPDATE dokumentation_chunks SET\n taxonomy_category = :category,\n taxonomy_path = :taxonomy,\n entities = :entities,\n keywords = :keywords,\n analysis_model = :model,\n analysis_status = 'completed',\n analysis_error = NULL,\n analyzed_at = NOW()\n WHERE id = :id\";\n\n $stmt = $this->pdo->prepare($sql);\n $stmt->execute([\n 'id' => $chunkId,\n 'category' => $taxonomyCategory,\n 'taxonomy' => json_encode($taxonomyPath),\n 'entities' => json_encode($analysis['entities']),\n 'keywords' => json_encode($analysis['keywords']),\n 'model' => self::TAXONOMY_MODEL,\n ]);\n }\n\n \/**\n * Updates chunk status.\n *\/\n private function updateStatus(int $chunkId, string $status, ?string $error = null): void\n {\n $sql = 'UPDATE dokumentation_chunks SET analysis_status = :status, analysis_error = :error WHERE id = :id';\n $stmt = $this->pdo->prepare($sql);\n $stmt->execute(['id' => $chunkId, 'status' => $status, 'error' => $error]);\n }\n\n \/**\n * Gets a chunk by ID.\n *\n * @return array<string, mixed>|null\n *\/\n private function getChunk(int $id): ?array\n {\n $stmt = $this->pdo->prepare('SELECT * FROM dokumentation_chunks WHERE id = :id');\n $stmt->execute(['id' => $id]);\n $result = $stmt->fetch(PDO::FETCH_ASSOC);\n\n return $result !== false ? $result : null;\n }\n\n \/**\n * Gets pending chunks.\n *\n * @return array<array<string, mixed>>\n *\/\n private function getPendingChunks(int $limit): array\n {\n $stmt = $this->pdo->prepare(\"\n SELECT * FROM dokumentation_chunks\n WHERE analysis_status = 'pending'\n ORDER BY dokumentation_id, chunk_index\n LIMIT :limit\n \");\n $stmt->bindValue('limit', $limit, PDO::PARAM_INT);\n $stmt->execute();\n\n return $stmt->fetchAll(PDO::FETCH_ASSOC);\n }\n\n \/**\n * Gets document context.\n *\n * @return array{title: string, path: string}\n *\/\n private function getDocumentContext(int $docId): array\n {\n $stmt = $this->pdo->prepare('SELECT title, path FROM dokumentation WHERE id = :id');\n $stmt->execute(['id' => $docId]);\n $result = $stmt->fetch(PDO::FETCH_ASSOC);\n\n return [\n 'title' => $result['title'] ?? 'Unbekannt',\n 'path' => $result['path'] ?? '\/',\n ];\n }",
"newString": "",
"originalFile": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\Docs;\n\n\/\/ @responsibility: Orchestriert Chunk-Analyse (koordiniert ChunkAnalyzer + ChunkRepository)\n\nuse RuntimeException;\n\nfinal class ChunkAnalysisService implements ChunkProcessorInterface\n{\n private const string TAXONOMY_MODEL = 'gemma3:4b-it-qat';\n private const int BATCH_SIZE = 10;\n\n public function __construct(\n private ChunkRepository $repository,\n private ChunkAnalyzer $analyzer\n ) {\n }\n\n \/**\n * Analyzes a single chunk.\n *\n * @return array{taxonomy: array<string>, entities: array<array{name: string, type: string}>, keywords: array<string>}\n *\/\n public function analyzeChunk(int $chunkId): array\n {\n return $this->process($chunkId);\n }\n\n \/**\n * Processes a single chunk (implements ChunkProcessorInterface).\n *\n * @return array{taxonomy: array<string>, entities: array<array{name: string, type: string}>, keywords: array<string>}\n *\/\n public function process(int $chunkId): array\n {\n $chunk = $this->repository->findById($chunkId);\n\n if ($chunk === null) {\n throw new RuntimeException(\"Chunk #{$chunkId} not found\");\n }\n\n \/\/ Mark as processing\n $this->repository->updateStatus($chunkId, 'processing');\n\n try {\n \/\/ Get document context\n $docContext = $this->repository->getDocumentContext((int) $chunk['dokumentation_id']);\n\n \/\/ Perform analysis via ChunkAnalyzer\n $analysis = $this->analyzer->analyze($chunk, $docContext);\n\n \/\/ Store results\n $this->repository->storeAnalysisResults($chunkId, $analysis, self::TAXONOMY_MODEL);\n\n return $analysis;\n } catch (RuntimeException $e) {\n $this->repository->updateStatus($chunkId, 'failed', $e->getMessage());\n\n throw $e;\n }\n }\n\n \/**\n * Analyzes all pending chunks in batches.\n *\n * @return array{analyzed: int, failed: int, errors: array<string>}\n *\/\n public function analyzeAllPending(int $limit = 100): array\n {\n return $this->processBatch($limit);\n }\n\n \/**\n * Processes multiple chunks in batch (implements ChunkProcessorInterface).\n *\n * @return array{processed: int, failed: int, errors: array<string>}\n *\/\n public function processBatch(int $limit): array\n {\n $results = ['processed' => 0, 'failed' => 0, 'errors' => []];\n\n $chunks = $this->repository->findPending($limit);\n\n foreach ($chunks as $chunk) {\n try {\n $this->process((int) $chunk['id']);\n $results['processed']++;\n\n \/\/ Progress output\n if ($results['processed'] % self::BATCH_SIZE === 0) {\n echo \"Analyzed {$results['processed']} chunks...\\n\";\n }\n } catch (RuntimeException $e) {\n $results['failed']++;\n $results['errors'][] = \"Chunk #{$chunk['id']}: \" . $e->getMessage();\n }\n }\n\n \/\/ Return with legacy key 'analyzed' for backward compatibility\n $results['analyzed'] = $results['processed'];\n\n return $results;\n }\n\n \/**\n * Performs the actual LLM analysis.\n *\n * @param array<string, mixed> $chunk\n * @param array<string, mixed> $docContext\n * @return array{taxonomy: array<string>, entities: array<array{name: string, type: string}>, keywords: array<string>}\n *\/\n private function performAnalysis(array $chunk, array $docContext): array\n {\n $content = $chunk['content_clean'] ?? $chunk['content'];\n $headingPath = $this->decodeJsonArray($chunk['heading_path'] ?? null);\n\n \/\/ Build context\n $context = sprintf(\n \"Dokument: %s\\nPfad: %s\\nAbschnitt: %s\\n\\nInhalt:\\n%s\",\n $docContext['title'],\n $docContext['path'],\n implode(' > ', $headingPath),\n $content\n );\n\n \/\/ Combined analysis prompt for efficiency\n $prompt = $this->buildAnalysisPrompt($context);\n\n $response = $this->callLlmWithRetry($prompt, self::TAXONOMY_MODEL);\n $analysis = $this->parseAnalysisResponse($response);\n\n \/\/ Fallback: If no taxonomy, derive from document path\n if (empty($analysis['taxonomy'])) {\n $analysis['taxonomy'] = $this->deriveTaxonomyFromPath($docContext['path']);\n }\n\n return $analysis;\n }\n\n \/**\n * Builds the analysis prompt.\n *\/\n private function buildAnalysisPrompt(string $context): string\n {\n return <<<PROMPT\n Analysiere den folgenden technischen Dokumentationsabschnitt und extrahiere strukturierte Informationen.\n\n {$context}\n\n Antworte NUR mit einem JSON-Objekt in diesem exakten Format (keine Erklärungen):\n {\n \"taxonomy\": [\"Hauptkategorie\", \"Unterkategorie\", \"Thema\"],\n \"entities\": [\n {\"name\": \"Entitätsname\", \"type\": \"TECHNOLOGY|CONCEPT|CONFIG|COMMAND|SERVICE\"}\n ],\n \"keywords\": [\"keyword1\", \"keyword2\", \"keyword3\"]\n }\n\n Regeln:\n - taxonomy: Hierarchische Klassifikation (3 Ebenen: Bereich > Modul > Thema)\n - entities: Wichtige Technologien, Konzepte, Konfigurationen, Befehle, Dienste\n - keywords: 3-5 relevante Suchbegriffe\n - Antworte NUR mit dem JSON, keine anderen Texte\n PROMPT;\n }\n\n \/**\n * Calls the LLM with retry logic.\n *\/\n private function callLlmWithRetry(string $prompt, string $model): string\n {\n $lastError = new RuntimeException('No attempts made');\n\n for ($attempt = 1; $attempt <= self::MAX_RETRIES; $attempt++) {\n try {\n return $this->ollama->generate($prompt, $model);\n } catch (RuntimeException $e) {\n $lastError = $e;\n if ($attempt < self::MAX_RETRIES) {\n usleep(500000 * $attempt); \/\/ Progressive backoff\n }\n }\n }\n\n throw new RuntimeException('LLM call failed after ' . self::MAX_RETRIES . ' attempts: ' . $lastError->getMessage());\n }\n\n \/**\n * Parses the LLM response into structured data.\n *\n * @return array{taxonomy: array<string>, entities: array<array{name: string, type: string}>, keywords: array<string>}\n *\/\n private function parseAnalysisResponse(string $response): array\n {\n $default = [\n 'taxonomy' => [],\n 'entities' => [],\n 'keywords' => [],\n ];\n\n \/\/ Extract JSON from response (handle markdown code blocks)\n $json = $response;\n if (preg_match('\/```(?:json)?\\s*([\\s\\S]*?)\\s*```\/', $response, $matches)) {\n $json = $matches[1];\n } elseif (preg_match('\/\\{[\\s\\S]*\\}\/', $response, $matches)) {\n $json = $matches[0];\n }\n\n $decoded = json_decode($json, true);\n\n if (!is_array($decoded)) {\n return $default;\n }\n\n return [\n 'taxonomy' => $this->validateArray($decoded['taxonomy'] ?? [], 'string'),\n 'entities' => $this->validateEntities($decoded['entities'] ?? []),\n 'keywords' => $this->validateArray($decoded['keywords'] ?? [], 'string'),\n ];\n }\n\n \/**\n * Validates an array of strings.\n *\n * @param mixed $arr\n * @return array<string>\n *\/\n private function validateArray(mixed $arr, string $type): array\n {\n if (!is_array($arr)) {\n return [];\n }\n\n return array_values(array_filter($arr, static fn ($item): bool => is_string($item) && trim($item) !== ''));\n }\n\n \/**\n * Validates entities array.\n *\n * @param mixed $entities\n * @return array<array{name: string, type: string}>\n *\/\n private function validateEntities(mixed $entities): array\n {\n if (!is_array($entities)) {\n return [];\n }\n\n $result = [];\n foreach ($entities as $entity) {\n if (is_array($entity) && isset($entity['name']) && is_string($entity['name'])) {\n $result[] = [\n 'name' => trim($entity['name']),\n 'type' => isset($entity['type']) && is_string($entity['type']) ? strtoupper($entity['type']) : 'OTHER',\n ];\n }\n }\n\n return $result;\n }\n\n \/**\n * Derives taxonomy from document path.\n *\n * @return array<string>\n *\/\n private function deriveTaxonomyFromPath(string $path): array\n {\n $parts = array_filter(explode('\/', trim($path, '\/')));\n\n \/\/ Map common paths to categories\n $mapping = [\n 'server' => 'Server',\n 'modul' => 'Module',\n 'anwendungen' => 'Anwendungen',\n 'mcp' => 'MCP-Server',\n 'ki-tasks' => 'KI-Tasks',\n ];\n\n $taxonomy = [];\n foreach ($parts as $part) {\n $taxonomy[] = $mapping[$part] ?? ucfirst($part);\n }\n\n return array_slice($taxonomy, 0, 3);\n }\n\n \/**\n * Stores analysis results in the database.\n *\n * @param array{taxonomy: array<string>, entities: array<array{name: string, type: string}>, keywords: array<string>} $analysis\n *\/\n private function storeAnalysisResults(int $chunkId, array $analysis): void\n {\n $taxonomyPath = $analysis['taxonomy'];\n $taxonomyCategory = !empty($taxonomyPath) ? $taxonomyPath[0] : null;\n\n $sql = \"UPDATE dokumentation_chunks SET\n taxonomy_category = :category,\n taxonomy_path = :taxonomy,\n entities = :entities,\n keywords = :keywords,\n analysis_model = :model,\n analysis_status = 'completed',\n analysis_error = NULL,\n analyzed_at = NOW()\n WHERE id = :id\";\n\n $stmt = $this->pdo->prepare($sql);\n ... [TRUNCATED-00432e4d86ea920b]",
"structuredPatch": [
{
"oldStart": 105,
"oldLines": 282,
"newStart": 105,
"newLines": 8,
"lines": [
" return $results;",
" }",
" ",
"- \/**",
"- * Performs the actual LLM analysis.",
"- *",
"- * @param array<string, mixed> $chunk",
"- * @param array<string, mixed> $docContext",
"- * @return array{taxonomy: array<string>, entities: array<array{name: string, type: string}>, keywords: array<string>}",
"- *\/",
"- private function performAnalysis(array $chunk, array $docContext): array",
"- {",
"- $content = $chunk['content_clean'] ?? $chunk['content'];",
"- $headingPath = $this->decodeJsonArray($chunk['heading_path'] ?? null);",
" ",
"- \/\/ Build context",
"- $context = sprintf(",
"- \"Dokument: %s\\nPfad: %s\\nAbschnitt: %s\\n\\nInhalt:\\n%s\",",
"- $docContext['title'],",
"- $docContext['path'],",
"- implode(' > ', $headingPath),",
"- $content",
"- );",
"-",
"- \/\/ Combined analysis prompt for efficiency",
"- $prompt = $this->buildAnalysisPrompt($context);",
"-",
"- $response = $this->callLlmWithRetry($prompt, self::TAXONOMY_MODEL);",
"- $analysis = $this->parseAnalysisResponse($response);",
"-",
"- \/\/ Fallback: If no taxonomy, derive from document path",
"- if (empty($analysis['taxonomy'])) {",
"- $analysis['taxonomy'] = $this->deriveTaxonomyFromPath($docContext['path']);",
"- }",
"-",
"- return $analysis;",
"- }",
"-",
" \/**",
"- * Builds the analysis prompt.",
"- *\/",
"- private function buildAnalysisPrompt(string $context): string",
"- {",
"- return <<<PROMPT",
"- Analysiere den folgenden technischen Dokumentationsabschnitt und extrahiere strukturierte Informationen.",
"-",
"- {$context}",
"-",
"- Antworte NUR mit einem JSON-Objekt in diesem exakten Format (keine Erklärungen):",
"- {",
"- \"taxonomy\": [\"Hauptkategorie\", \"Unterkategorie\", \"Thema\"],",
"- \"entities\": [",
"- {\"name\": \"Entitätsname\", \"type\": \"TECHNOLOGY|CONCEPT|CONFIG|COMMAND|SERVICE\"}",
"- ],",
"- \"keywords\": [\"keyword1\", \"keyword2\", \"keyword3\"]",
"- }",
"-",
"- Regeln:",
"- - taxonomy: Hierarchische Klassifikation (3 Ebenen: Bereich > Modul > Thema)",
"- - entities: Wichtige Technologien, Konzepte, Konfigurationen, Befehle, Dienste",
"- - keywords: 3-5 relevante Suchbegriffe",
"- - Antworte NUR mit dem JSON, keine anderen Texte",
"- PROMPT;",
"- }",
"-",
"- \/**",
"- * Calls the LLM with retry logic.",
"- *\/",
"- private function callLlmWithRetry(string $prompt, string $model): string",
"- {",
"- $lastError = new RuntimeException('No attempts made');",
"-",
"- for ($attempt = 1; $attempt <= self::MAX_RETRIES; $attempt++) {",
"- try {",
"- return $this->ollama->generate($prompt, $model);",
"- } catch (RuntimeException $e) {",
"- $lastError = $e;",
"- if ($attempt < self::MAX_RETRIES) {",
"- usleep(500000 * $attempt); \/\/ Progressive backoff",
"- }",
"- }",
"- }",
"-",
"- throw new RuntimeException('LLM call failed after ' . self::MAX_RETRIES . ' attempts: ' . $lastError->getMessage());",
"- }",
"-",
"- \/**",
"- * Parses the LLM response into structured data.",
"- *",
"- * @return array{taxonomy: array<string>, entities: array<array{name: string, type: string}>, keywords: array<string>}",
"- *\/",
"- private function parseAnalysisResponse(string $response): array",
"- {",
"- $default = [",
"- 'taxonomy' => [],",
"- 'entities' => [],",
"- 'keywords' => [],",
"- ];",
"-",
"- \/\/ Extract JSON from response (handle markdown code blocks)",
"- $json = $response;",
"- if (preg_match('\/```(?:json)?\\s*([\\s\\S]*?)\\s*```\/', $response, $matches)) {",
"- $json = $matches[1];",
"- } elseif (preg_match('\/\\{[\\s\\S]*\\}\/', $response, $matches)) {",
"- $json = $matches[0];",
"- }",
"-",
"- $decoded = json_decode($json, true);",
"-",
"- if (!is_array($decoded)) {",
"- return $default;",
"- }",
"-",
"- return [",
"- 'taxonomy' => $this->validateArray($decoded['taxonomy'] ?? [], 'string'),",
"- 'entities' => $this->validateEntities($decoded['entities'] ?? []),",
"- 'keywords' => $this->validateArray($decoded['keywords'] ?? [], 'string'),",
"- ];",
"- }",
"-",
"- \/**",
"- * Validates an array of strings.",
"- *",
"- * @param mixed $arr",
"- * @return array<string>",
"- *\/",
"- private function validateArray(mixed $arr, string $type): array",
"- {",
"- if (!is_array($arr)) {",
"- return [];",
"- }",
"-",
"- return array_values(array_filter($arr, static fn ($item): bool => is_string($item) && trim($item) !== ''));",
"- }",
"-",
"- \/**",
"- * Validates entities array.",
"- *",
"- * @param mixed $entities",
"- * @return array<array{name: string, type: string}>",
"- *\/",
"- private function validateEntities(mixed $entities): array",
"- {",
"- if (!is_array($entities)) {",
"- return [];",
"- }",
"-",
"- $result = [];",
"- foreach ($entities as $entity) {",
"- if (is_array($entity) && isset($entity['name']) && is_string($entity['name'])) {",
"- $result[] = [",
"- 'name' => trim($entity['name']),",
"- 'type' => isset($entity['type']) && is_string($entity['type']) ? strtoupper($entity['type']) : 'OTHER',",
"- ];",
"- }",
"- }",
"-",
"- return $result;",
"- }",
"-",
"- \/**",
"- * Derives taxonomy from document path.",
"- *",
"- * @return array<string>",
"- *\/",
"- private function deriveTaxonomyFromPath(string $path): array",
"- {",
"- $parts = array_filter(explode('\/', trim($path, '\/')));",
"-",
"- \/\/ Map common paths to categories",
"- $mapping = [",
"- 'server' => 'Server',",
"- 'modul' => 'Module',",
"- 'anwendungen' => 'Anwendungen',",
"- 'mcp' => 'MCP-Server',",
"- 'ki-tasks' => 'KI-Tasks',",
"- ];",
"-",
"- $taxonomy = [];",
"- foreach ($parts as $part) {",
"- $taxonomy[] = $mapping[$part] ?? ucfirst($part);",
"- }",
"-",
"- return array_slice($taxonomy, 0, 3);",
"- }",
"-",
"- \/**",
"- * Stores analysis results in the database.",
"- *",
"- * @param array{taxonomy: array<string>, entities: array<array{name: string, type: string}>, keywords: array<string>} $analysis",
"- *\/",
"- private function storeAnalysisResults(int $chunkId, array $analysis): void",
"- {",
"- $taxonomyPath = $analysis['taxonomy'];",
"- $taxonomyCategory = !empty($taxonomyPath) ? $taxonomyPath[0] : null;",
"-",
"- $sql = \"UPDATE dokumentation_chunks SET",
"- taxonomy_category = :category,",
"- taxonomy_path = :taxonomy,",
"- entities = :entities,",
"- keywords = :keywords,",
"- analysis_model = :model,",
"- analysis_status = 'completed',",
"- analysis_error = NULL,",
"- analyzed_at = NOW()",
"- WHERE id = :id\";",
"-",
"- $stmt = $this->pdo->prepare($sql);",
"- $stmt->execute([",
"- 'id' => $chunkId,",
"- 'category' => $taxonomyCategory,",
"- 'taxonomy' => json_encode($taxonomyPath),",
"- 'entities' => json_encode($analysis['entities']),",
"- 'keywords' => json_encode($analysis['keywords']),",
"- 'model' => self::TAXONOMY_MODEL,",
"- ]);",
"- }",
"-",
"- \/**",
"- * Updates chunk status.",
"- *\/",
"- private function updateStatus(int $chunkId, string $status, ?string $error = null): void",
"- {",
"- $sql = 'UPDATE dokumentation_chunks SET analysis_status = :status, analysis_error = :error WHERE id = :id';",
"- $stmt = $this->pdo->prepare($sql);",
"- $stmt->execute(['id' => $chunkId, 'status' => $status, 'error' => $error]);",
"- }",
"-",
"- \/**",
"- * Gets a chunk by ID.",
"- *",
"- * @return array<string, mixed>|null",
"- *\/",
"- private function getChunk(int $id): ?array",
"- {",
"- $stmt = $this->pdo->prepare('SELECT * FROM dokumentation_chunks WHERE id = :id');",
"- $stmt->execute(['id' => $id]);",
"- $result = $stmt->fetch(PDO::FETCH_ASSOC);",
"-",
"- return $result !== false ? $result : null;",
"- }",
"-",
"- \/**",
"- * Gets pending chunks.",
"- *",
"- * @return array<array<string, mixed>>",
"- *\/",
"- private function getPendingChunks(int $limit): array",
"- {",
"- $stmt = $this->pdo->prepare(\"",
"- SELECT * FROM dokumentation_chunks",
"- WHERE analysis_status = 'pending'",
"- ORDER BY dokumentation_id, chunk_index",
"- LIMIT :limit",
"- \");",
"- $stmt->bindValue('limit', $limit, PDO::PARAM_INT);",
"- $stmt->execute();",
"-",
"- return $stmt->fetchAll(PDO::FETCH_ASSOC);",
"- }",
"-",
"- \/**",
"- * Gets document context.",
"- *",
"- * @return array{title: string, path: string}",
"- *\/",
"- private function getDocumentContext(int $docId): array",
"- {",
"- $stmt = $this->pdo->prepare('SELECT title, path FROM dokumentation WHERE id = :id');",
"- $stmt->execute(['id' => $docId]);",
"- $result = $stmt->fetch(PDO::FETCH_ASSOC);",
"-",
"- return [",
"- 'title' => $result['title'] ?? 'Unbekannt',",
"- 'path' => $result['path'] ?? '\/',",
"- ];",
"- }",
"-",
"- \/**",
" * Gets analysis statistics.",
" *",
" * @return array{pending: int, processing: int, completed: int, failed: int, by_category: array<array{category: string, count: int}>}"
]
}
],
"userModified": false,
"replaceAll": false
}
}