{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/AI\/SemanticEnrichmentService.php",
"content": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\AI;\n\n\/\/ @responsibility: Semantic Enrichment für RAG-Ergebnisse (Graceful Degradation)\n\nuse Domain\\Repository\\ChunkRepositoryInterface;\n\n\/**\n * Service for enriching search results with semantic data.\n *\n * Implements Graceful Degradation: If semantic data (entities, taxonomy)\n * is not available for a chunk, it gracefully returns empty arrays.\n * The chat continues to work with whatever data is available.\n *\/\nfinal readonly class SemanticEnrichmentService\n{\n public function __construct(\n private ChunkRepositoryInterface $chunkRepository\n ) {\n }\n\n \/**\n * Enriches search results with semantic data (entities, taxonomy).\n *\n * For each result that has a chunk_id in its payload, fetches:\n * - Entities linked to the chunk\n * - Taxonomy terms assigned to the chunk\n *\n * If no semantic data exists, empty arrays are added (graceful degradation).\n *\n * @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults\n *\n * @return array<int, array{id: int|string, score: float, payload: array<string, mixed>}> Enriched results\n *\/\n public function enrichSearchResults(array $searchResults): array\n {\n foreach ($searchResults as &$result) {\n $payload = &$result['payload'];\n\n \/\/ Get chunk_id from payload (may be stored as 'chunk_id' or 'id')\n $chunkId = $payload['chunk_id'] ?? $payload['id'] ?? null;\n\n if ($chunkId !== null && is_numeric($chunkId)) {\n $chunkId = (int) $chunkId;\n\n \/\/ Fetch entities (graceful: empty array if none)\n $payload['entities'] = $this->chunkRepository->getChunkEntities($chunkId);\n\n \/\/ Fetch taxonomy (graceful: empty array if none)\n $payload['taxonomy'] = $this->chunkRepository->getChunkTaxonomy($chunkId);\n } else {\n \/\/ No chunk_id available - set empty arrays\n $payload['entities'] = [];\n $payload['taxonomy'] = [];\n }\n }\n unset($result, $payload);\n\n return $searchResults;\n }\n\n \/**\n * Extracts all unique entities from enriched search results.\n *\n * Useful for building entity-based context summaries.\n *\n * @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults\n *\n * @return array<int, array{id: int, name: string, type: string, count: int}> Unique entities with occurrence count\n *\/\n public function extractUniqueEntities(array $searchResults): array\n {\n $entities = [];\n\n foreach ($searchResults as $result) {\n $chunkEntities = $result['payload']['entities'] ?? [];\n\n foreach ($chunkEntities as $entity) {\n $entityId = (int) $entity['id'];\n\n if (!isset($entities[$entityId])) {\n $entities[$entityId] = [\n 'id' => $entityId,\n 'name' => $entity['name'],\n 'type' => $entity['type'],\n 'count' => 0,\n ];\n }\n\n $entities[$entityId]['count']++;\n }\n }\n\n \/\/ Sort by occurrence count descending\n usort($entities, static fn ($a, $b) => $b['count'] <=> $a['count']);\n\n return $entities;\n }\n\n \/**\n * Extracts all unique taxonomy terms from enriched search results.\n *\n * @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults\n *\n * @return array<int, array{term_id: int, term_name: string, term_path: string, count: int}>\n *\/\n public function extractUniqueTaxonomy(array $searchResults): array\n {\n $terms = [];\n\n foreach ($searchResults as $result) {\n $chunkTaxonomy = $result['payload']['taxonomy'] ?? [];\n\n foreach ($chunkTaxonomy as $term) {\n $termId = (int) $term['term_id'];\n\n if (!isset($terms[$termId])) {\n $terms[$termId] = [\n 'term_id' => $termId,\n 'term_name' => $term['term_name'],\n 'term_path' => $term['term_path'],\n 'count' => 0,\n ];\n }\n\n $terms[$termId]['count']++;\n }\n }\n\n \/\/ Sort by occurrence count descending\n usort($terms, static fn ($a, $b) => $b['count'] <=> $a['count']);\n\n return $terms;\n }\n\n \/**\n * Builds a semantic context summary from enriched results.\n *\n * Creates a text block summarizing the semantic context:\n * - Top entities found\n * - Taxonomy categories covered\n *\n * This can be prepended to the RAG context for richer LLM understanding.\n *\n * @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults\n * @param int $maxEntities Maximum entities to include\n * @param int $maxTerms Maximum taxonomy terms to include\n *\/\n public function buildSemanticSummary(array $searchResults, int $maxEntities = 5, int $maxTerms = 3): string\n {\n $entities = $this->extractUniqueEntities($searchResults);\n $terms = $this->extractUniqueTaxonomy($searchResults);\n\n if ($entities === [] && $terms === []) {\n return '';\n }\n\n $parts = [];\n\n if ($entities !== []) {\n $topEntities = array_slice($entities, 0, $maxEntities);\n $entityNames = array_map(\n static fn ($e) => sprintf('%s (%s)', $e['name'], $e['type']),\n $topEntities\n );\n $parts[] = 'Relevante Konzepte: ' . implode(', ', $entityNames);\n }\n\n if ($terms !== []) {\n $topTerms = array_slice($terms, 0, $maxTerms);\n $termNames = array_map(static fn ($t) => $t['term_name'], $topTerms);\n $parts[] = 'Themengebiete: ' . implode(', ', $termNames);\n }\n\n return implode(\"\\n\", $parts);\n }\n}\n",
"numLines": 181,
"startLine": 1,
"totalLines": 181
}
}
}