Backup #1595

ID1595
Dateipfad/var/www/dev.campus.systemische-tools.de/src/Infrastructure/AI/SemanticEnrichmentService.php
Version2
Typ modified
Größe6.0 KB
Hash92d09b51460836d6b404915070816f79a576508981c593b8fb9658a42ece2f68
Datum2025-12-27 00:15:53
Geändert vonclaude-code-hook
GrundClaude Code Pre-Hook Backup vor Edit-Operation
Datei existiert Ja

Dateiinhalt

<?php

declare(strict_types=1);

namespace Infrastructure\AI;

// @responsibility: Semantic Enrichment für RAG-Ergebnisse (Graceful Degradation)

use Domain\Repository\ChunkRepositoryInterface;

/**
 * Service for enriching search results with semantic data.
 *
 * Implements Graceful Degradation: If semantic data (entities, taxonomy)
 * is not available for a chunk, it gracefully returns empty arrays.
 * The chat continues to work with whatever data is available.
 */
final readonly class SemanticEnrichmentService
{
    public function __construct(
        private ChunkRepositoryInterface $chunkRepository
    ) {
    }

    /**
     * Enriches search results with semantic data (entities, taxonomy).
     *
     * For each result that has a chunk_id in its payload, fetches:
     * - Entities linked to the chunk
     * - Taxonomy terms assigned to the chunk
     *
     * If no semantic data exists, empty arrays are added (graceful degradation).
     *
     * @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults
     *
     * @return array<int, array{id: int|string, score: float, payload: array<string, mixed>}> Enriched results
     */
    public function enrichSearchResults(array $searchResults): array
    {
        foreach ($searchResults as &$result) {
            $payload = &$result['payload'];

            // Get chunk_id from payload (may be stored as 'chunk_id' or 'id')
            $chunkId = $payload['chunk_id'] ?? $payload['id'] ?? null;

            if ($chunkId !== null && is_numeric($chunkId)) {
                $chunkId = (int) $chunkId;

                // Fetch entities (graceful: empty array if none)
                $payload['entities'] = $this->chunkRepository->getChunkEntities($chunkId);

                // Fetch taxonomy (graceful: empty array if none)
                $payload['taxonomy'] = $this->chunkRepository->getChunkTaxonomy($chunkId);
            } else {
                // No chunk_id available - set empty arrays
                $payload['entities'] = [];
                $payload['taxonomy'] = [];
            }
        }
        unset($result, $payload);

        return $searchResults;
    }

    /**
     * Extracts all unique entities from enriched search results.
     *
     * Useful for building entity-based context summaries.
     *
     * @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults
     *
     * @return array<int, array{id: int, name: string, type: string, count: int}> Unique entities with occurrence count
     */
    public function extractUniqueEntities(array $searchResults): array
    {
        $entities = [];

        foreach ($searchResults as $result) {
            $chunkEntities = $result['payload']['entities'] ?? [];

            foreach ($chunkEntities as $entity) {
                $entityId = (int) $entity['id'];

                if (!isset($entities[$entityId])) {
                    $entities[$entityId] = [
                        'id' => $entityId,
                        'name' => $entity['name'],
                        'type' => $entity['type'],
                        'count' => 0,
                    ];
                }

                $entities[$entityId]['count']++;
            }
        }

        // Sort by occurrence count descending
        usort($entities, static fn ($a, $b) => $b['count'] <=> $a['count']);

        return $entities;
    }

    /**
     * Extracts all unique taxonomy terms from enriched search results.
     *
     * @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults
     *
     * @return array<int, array{term_id: int, term_name: string, term_path: string, count: int}>
     */
    public function extractUniqueTaxonomy(array $searchResults): array
    {
        $terms = [];

        foreach ($searchResults as $result) {
            $chunkTaxonomy = $result['payload']['taxonomy'] ?? [];

            foreach ($chunkTaxonomy as $term) {
                $termId = (int) $term['term_id'];

                if (!isset($terms[$termId])) {
                    $terms[$termId] = [
                        'term_id' => $termId,
                        'term_name' => $term['term_name'],
                        'term_path' => $term['term_path'],
                        'count' => 0,
                    ];
                }

                $terms[$termId]['count']++;
            }
        }

        // Sort by occurrence count descending
        usort($terms, static fn ($a, $b) => $b['count'] <=> $a['count']);

        return array_values($terms);
    }

    /**
     * Builds a semantic context summary from enriched results.
     *
     * Creates a text block summarizing the semantic context:
     * - Top entities found
     * - Taxonomy categories covered
     *
     * This can be prepended to the RAG context for richer LLM understanding.
     *
     * @param array<int, array{id: int|string, score: float, payload: array<string, mixed>}> $searchResults
     * @param int $maxEntities Maximum entities to include
     * @param int $maxTerms Maximum taxonomy terms to include
     */
    public function buildSemanticSummary(array $searchResults, int $maxEntities = 5, int $maxTerms = 3): string
    {
        $entities = $this->extractUniqueEntities($searchResults);
        $terms = $this->extractUniqueTaxonomy($searchResults);

        if ($entities === [] && $terms === []) {
            return '';
        }

        $parts = [];

        if ($entities !== []) {
            $topEntities = array_slice($entities, 0, $maxEntities);
            $entityNames = array_map(
                static fn ($e) => sprintf('%s (%s)', $e['name'], $e['type']),
                $topEntities
            );
            $parts[] = 'Relevante Konzepte: ' . implode(', ', $entityNames);
        }

        if ($terms !== []) {
            $topTerms = array_slice($terms, 0, $maxTerms);
            $termNames = array_map(static fn ($t) => $t['term_name'], $topTerms);
            $parts[] = 'Themengebiete: ' . implode(', ', $termNames);
        }

        return implode("\n", $parts);
    }
}

Vollständig herunterladen

Aktionen

Herunterladen

Andere Versionen dieser Datei

ID Version Typ Größe Datum
1595 2 modified 6.0 KB 2025-12-27 00:15
1594 1 modified 6.0 KB 2025-12-27 00:15

← Zurück zur Übersicht