SemanticEnrichmentService.php
- Pfad:
src/Infrastructure/AI/SemanticEnrichmentService.php - Namespace: Infrastructure\AI
- Zeilen: 181 | Größe: 6,163 Bytes
- Geändert: 2025-12-27 00:15:53 | Gescannt: 2025-12-31 10:22:15
Code Hygiene Score: 97
- Dependencies: 100 (25%)
- LOC: 84 (20%)
- Methods: 100 (20%)
- Secrets: 100 (15%)
- Classes: 100 (10%)
- Magic Numbers: 100 (10%)
Keine Issues gefunden.
Dependencies 2
- constructor Domain\Repository\ChunkRepositoryInterface
- use Domain\Repository\ChunkRepositoryInterface
Klassen 1
-
SemanticEnrichmentServiceclass Zeile 18
Funktionen 5
-
__construct()public Zeile 20 -
enrichSearchResults()public Zeile 38 -
extractUniqueEntities()public Zeile 74 -
extractUniqueTaxonomy()public Zeile 110 -
buildSemanticSummary()public Zeile 152
Verwendet von 2
- ChatService.php constructor
- ChatServiceProvider.php use
Versionen 2
-
v2
2025-12-27 00:15 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation -
v1
2025-12-27 00:15 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
Code
<?php
declare(strict_types=1);
namespace Infrastructure\AI;
// @responsibility: Semantic Enrichment für RAG-Ergebnisse (Graceful Degradation)
use Domain\Repository\ChunkRepositoryInterface;
/**
* Service for enriching search results with semantic data.
*
* Implements Graceful Degradation: If semantic data (entities, taxonomy)
* is not available for a chunk, it gracefully returns empty arrays.
* The chat continues to work with whatever data is available.
*/
final readonly class SemanticEnrichmentService
{
public function __construct(
private ChunkRepositoryInterface $chunkRepository
) {
}
/**
* Enriches search results with semantic data (entities, taxonomy).
*
* For each result that has a chunk_id in its payload, fetches:
* - Entities linked to the chunk
* - Taxonomy terms assigned to the chunk
*
* If no semantic data exists, empty arrays are added (graceful degradation).
*
* @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults
*
* @return array<int, array{id: int|string, score: float, payload: array<string, mixed>}> Enriched results
*/
public function enrichSearchResults(array $searchResults): array
{
foreach ($searchResults as &$result) {
$payload = &$result['payload'];
// Get chunk_id from payload (may be stored as 'chunk_id' or 'id')
$chunkId = $payload['chunk_id'] ?? $payload['id'] ?? null;
if ($chunkId !== null && is_numeric($chunkId)) {
$chunkId = (int) $chunkId;
// Fetch entities (graceful: empty array if none)
$payload['entities'] = $this->chunkRepository->getChunkEntities($chunkId);
// Fetch taxonomy (graceful: empty array if none)
$payload['taxonomy'] = $this->chunkRepository->getChunkTaxonomy($chunkId);
} else {
// No chunk_id available - set empty arrays
$payload['entities'] = [];
$payload['taxonomy'] = [];
}
}
unset($result, $payload);
return $searchResults;
}
/**
* Extracts all unique entities from enriched search results.
*
* Useful for building entity-based context summaries.
*
* @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults
*
* @return array<int, array{id: int, name: string, type: string, count: int}> Unique entities with occurrence count
*/
public function extractUniqueEntities(array $searchResults): array
{
$entities = [];
foreach ($searchResults as $result) {
$chunkEntities = $result['payload']['entities'] ?? [];
foreach ($chunkEntities as $entity) {
$entityId = (int) $entity['id'];
if (!isset($entities[$entityId])) {
$entities[$entityId] = [
'id' => $entityId,
'name' => $entity['name'],
'type' => $entity['type'],
'count' => 0,
];
}
$entities[$entityId]['count']++;
}
}
// Sort by occurrence count descending
usort($entities, static fn ($a, $b) => $b['count'] <=> $a['count']);
return $entities;
}
/**
* Extracts all unique taxonomy terms from enriched search results.
*
* @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults
*
* @return array<int, array{term_id: int, term_name: string, term_path: string, count: int}>
*/
public function extractUniqueTaxonomy(array $searchResults): array
{
$terms = [];
foreach ($searchResults as $result) {
$chunkTaxonomy = $result['payload']['taxonomy'] ?? [];
foreach ($chunkTaxonomy as $term) {
$termId = (int) $term['term_id'];
if (!isset($terms[$termId])) {
$terms[$termId] = [
'term_id' => $termId,
'term_name' => $term['term_name'],
'term_path' => $term['term_path'],
'count' => 0,
];
}
$terms[$termId]['count']++;
}
}
// Sort by occurrence count descending
usort($terms, static fn ($a, $b) => $b['count'] <=> $a['count']);
return $terms;
}
/**
* Builds a semantic context summary from enriched results.
*
* Creates a text block summarizing the semantic context:
* - Top entities found
* - Taxonomy categories covered
*
* This can be prepended to the RAG context for richer LLM understanding.
*
* @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults
* @param int $maxEntities Maximum entities to include
* @param int $maxTerms Maximum taxonomy terms to include
*/
public function buildSemanticSummary(array $searchResults, int $maxEntities = 5, int $maxTerms = 3): string
{
$entities = $this->extractUniqueEntities($searchResults);
$terms = $this->extractUniqueTaxonomy($searchResults);
if ($entities === [] && $terms === []) {
return '';
}
$parts = [];
if ($entities !== []) {
$topEntities = array_slice($entities, 0, $maxEntities);
$entityNames = array_map(
static fn ($e) => sprintf('%s (%s)', $e['name'], $e['type']),
$topEntities
);
$parts[] = 'Relevante Konzepte: ' . implode(', ', $entityNames);
}
if ($terms !== []) {
$topTerms = array_slice($terms, 0, $maxTerms);
$termNames = array_map(static fn ($t) => $t['term_name'], $topTerms);
$parts[] = 'Themengebiete: ' . implode(', ', $termNames);
}
return implode("\n", $parts);
}
}