Backup #1594
| ID | 1594 |
| Dateipfad | /var/www/dev.campus.systemische-tools.de/src/Infrastructure/AI/SemanticEnrichmentService.php |
| Version | 1 |
| Typ |
modified |
| Größe | 6.0 KB |
| Hash | c56475a2f080bac0c968c89c5b5f2e02e058252b2eecc4e81a6efb09f301996d |
| Datum | 2025-12-27 00:15:47 |
| Geändert von | claude-code-hook |
| Grund | Claude Code Pre-Hook Backup vor Edit-Operation |
| Datei existiert |
Ja
|
Dateiinhalt
<?php
declare(strict_types=1);
namespace Infrastructure\AI;
// @responsibility: Semantic Enrichment für RAG-Ergebnisse (Graceful Degradation)
use Domain\Repository\ChunkRepositoryInterface;
/**
* Service for enriching search results with semantic data.
*
* Implements Graceful Degradation: If semantic data (entities, taxonomy)
* is not available for a chunk, it gracefully returns empty arrays.
* The chat continues to work with whatever data is available.
*/
final readonly class SemanticEnrichmentService
{
public function __construct(
private ChunkRepositoryInterface $chunkRepository
) {
}
/**
* Enriches search results with semantic data (entities, taxonomy).
*
* For each result that has a chunk_id in its payload, fetches:
* - Entities linked to the chunk
* - Taxonomy terms assigned to the chunk
*
* If no semantic data exists, empty arrays are added (graceful degradation).
*
* @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults
*
* @return array<int, array{id: int|string, score: float, payload: array<string, mixed>}> Enriched results
*/
public function enrichSearchResults(array $searchResults): array
{
foreach ($searchResults as &$result) {
$payload = &$result['payload'];
// Get chunk_id from payload (may be stored as 'chunk_id' or 'id')
$chunkId = $payload['chunk_id'] ?? $payload['id'] ?? null;
if ($chunkId !== null && is_numeric($chunkId)) {
$chunkId = (int) $chunkId;
// Fetch entities (graceful: empty array if none)
$payload['entities'] = $this->chunkRepository->getChunkEntities($chunkId);
// Fetch taxonomy (graceful: empty array if none)
$payload['taxonomy'] = $this->chunkRepository->getChunkTaxonomy($chunkId);
} else {
// No chunk_id available - set empty arrays
$payload['entities'] = [];
$payload['taxonomy'] = [];
}
}
unset($result, $payload);
return $searchResults;
}
/**
* Extracts all unique entities from enriched search results.
*
* Useful for building entity-based context summaries.
*
* @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults
*
* @return array<int, array{id: int, name: string, type: string, count: int}> Unique entities with occurrence count
*/
public function extractUniqueEntities(array $searchResults): array
{
$entities = [];
foreach ($searchResults as $result) {
$chunkEntities = $result['payload']['entities'] ?? [];
foreach ($chunkEntities as $entity) {
$entityId = (int) $entity['id'];
if (!isset($entities[$entityId])) {
$entities[$entityId] = [
'id' => $entityId,
'name' => $entity['name'],
'type' => $entity['type'],
'count' => 0,
];
}
$entities[$entityId]['count']++;
}
}
// Sort by occurrence count descending
usort($entities, static fn ($a, $b) => $b['count'] <=> $a['count']);
return array_values($entities);
}
/**
* Extracts all unique taxonomy terms from enriched search results.
*
* @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults
*
* @return array<int, array{term_id: int, term_name: string, term_path: string, count: int}>
*/
public function extractUniqueTaxonomy(array $searchResults): array
{
$terms = [];
foreach ($searchResults as $result) {
$chunkTaxonomy = $result['payload']['taxonomy'] ?? [];
foreach ($chunkTaxonomy as $term) {
$termId = (int) $term['term_id'];
if (!isset($terms[$termId])) {
$terms[$termId] = [
'term_id' => $termId,
'term_name' => $term['term_name'],
'term_path' => $term['term_path'],
'count' => 0,
];
}
$terms[$termId]['count']++;
}
}
// Sort by occurrence count descending
usort($terms, static fn ($a, $b) => $b['count'] <=> $a['count']);
return array_values($terms);
}
/**
* Builds a semantic context summary from enriched results.
*
* Creates a text block summarizing the semantic context:
* - Top entities found
* - Taxonomy categories covered
*
* This can be prepended to the RAG context for richer LLM understanding.
*
* @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults
* @param int $maxEntities Maximum entities to include
* @param int $maxTerms Maximum taxonomy terms to include
*/
public function buildSemanticSummary(array $searchResults, int $maxEntities = 5, int $maxTerms = 3): string
{
$entities = $this->extractUniqueEntities($searchResults);
$terms = $this->extractUniqueTaxonomy($searchResults);
if ($entities === [] && $terms === []) {
return '';
}
$parts = [];
if ($entities !== []) {
$topEntities = array_slice($entities, 0, $maxEntities);
$entityNames = array_map(
static fn ($e) => sprintf('%s (%s)', $e['name'], $e['type']),
$topEntities
);
$parts[] = 'Relevante Konzepte: ' . implode(', ', $entityNames);
}
if ($terms !== []) {
$topTerms = array_slice($terms, 0, $maxTerms);
$termNames = array_map(static fn ($t) => $t['term_name'], $topTerms);
$parts[] = 'Themengebiete: ' . implode(', ', $termNames);
}
return implode("\n", $parts);
}
}
Vollständig herunterladen
Aktionen
Andere Versionen dieser Datei
| ID |
Version |
Typ |
Größe |
Datum |
| 1595 |
2 |
modified |
6.0 KB |
2025-12-27 00:15 |
| 1594 |
1 |
modified |
6.0 KB |
2025-12-27 00:15 |
← Zurück zur Übersicht