}> $searchResults * * @return array}> Enriched results */ public function enrichSearchResults(array $searchResults): array { foreach ($searchResults as &$result) { $payload = &$result['payload']; // Get chunk_id from payload (may be stored as 'chunk_id' or 'id') $chunkId = $payload['chunk_id'] ?? $payload['id'] ?? null; if ($chunkId !== null && is_numeric($chunkId)) { $chunkId = (int) $chunkId; // Fetch entities (graceful: empty array if none) $payload['entities'] = $this->chunkRepository->getChunkEntities($chunkId); // Fetch taxonomy (graceful: empty array if none) $payload['taxonomy'] = $this->chunkRepository->getChunkTaxonomy($chunkId); } else { // No chunk_id available - set empty arrays $payload['entities'] = []; $payload['taxonomy'] = []; } } unset($result, $payload); return $searchResults; } /** * Extracts all unique entities from enriched search results. * * Useful for building entity-based context summaries. * * @param array}> $searchResults * * @return array Unique entities with occurrence count */ public function extractUniqueEntities(array $searchResults): array { $entities = []; foreach ($searchResults as $result) { $chunkEntities = $result['payload']['entities'] ?? []; foreach ($chunkEntities as $entity) { $entityId = (int) $entity['id']; if (!isset($entities[$entityId])) { $entities[$entityId] = [ 'id' => $entityId, 'name' => $entity['name'], 'type' => $entity['type'], 'count' => 0, ]; } $entities[$entityId]['count']++; } } // Sort by occurrence count descending usort($entities, static fn ($a, $b) => $b['count'] <=> $a['count']); return $entities; } /** * Extracts all unique taxonomy terms from enriched search results. * * @param array}> $searchResults * * @return array */ public function extractUniqueTaxonomy(array $searchResults): array { $terms = []; foreach ($searchResults as $result) { $chunkTaxonomy = $result['payload']['taxonomy'] ?? []; foreach ($chunkTaxonomy as $term) { $termId = (int) $term['term_id']; if (!isset($terms[$termId])) { $terms[$termId] = [ 'term_id' => $termId, 'term_name' => $term['term_name'], 'term_path' => $term['term_path'], 'count' => 0, ]; } $terms[$termId]['count']++; } } // Sort by occurrence count descending usort($terms, static fn ($a, $b) => $b['count'] <=> $a['count']); return array_values($terms); } /** * Builds a semantic context summary from enriched results. * * Creates a text block summarizing the semantic context: * - Top entities found * - Taxonomy categories covered * * This can be prepended to the RAG context for richer LLM understanding. * * @param array}> $searchResults * @param int $maxEntities Maximum entities to include * @param int $maxTerms Maximum taxonomy terms to include */ public function buildSemanticSummary(array $searchResults, int $maxEntities = 5, int $maxTerms = 3): string { $entities = $this->extractUniqueEntities($searchResults); $terms = $this->extractUniqueTaxonomy($searchResults); if ($entities === [] && $terms === []) { return ''; } $parts = []; if ($entities !== []) { $topEntities = array_slice($entities, 0, $maxEntities); $entityNames = array_map( static fn ($e) => sprintf('%s (%s)', $e['name'], $e['type']), $topEntities ); $parts[] = 'Relevante Konzepte: ' . implode(', ', $entityNames); } if ($terms !== []) { $topTerms = array_slice($terms, 0, $maxTerms); $termNames = array_map(static fn ($t) => $t['term_name'], $topTerms); $parts[] = 'Themengebiete: ' . implode(', ', $termNames); } return implode("\n", $parts); } }