RagContextBuilder.php
- Pfad:
src/UseCases/Chat/RagContextBuilder.php - Namespace: UseCases\Chat
- Zeilen: 175 | Größe: 5,258 Bytes
- Geändert: 2025-12-29 00:01:16 | Gescannt: 2025-12-31 10:22:15
Code Hygiene Score: 97
- Dependencies: 100 (25%)
- LOC: 87 (20%)
- Methods: 100 (20%)
- Secrets: 100 (15%)
- Classes: 100 (10%)
- Magic Numbers: 100 (10%)
Keine Issues gefunden.
Dependencies 2
- constructor Domain\Service\SearchServiceInterface
- use Domain\Service\SearchServiceInterface
Klassen 1
-
RagContextBuilderclass Zeile 11
Funktionen 5
-
__construct()public Zeile 16 -
search()public Zeile 29 -
buildContext()public Zeile 62 -
extractSources()public Zeile 94 -
buildSemanticHeader()private Zeile 141
Verwendet von 2
- ChatServiceProvider.php use
- StreamingChatMessageUseCase.php constructor
Code
<?php
declare(strict_types=1);
namespace UseCases\Chat;
// @responsibility: Baut RAG-Kontext aus semantischer Suche
use Domain\Service\SearchServiceInterface;
final class RagContextBuilder
{
private const MAX_CONTEXT_TOKENS = 3000;
private const CHARS_PER_TOKEN = 4;
public function __construct(
private SearchServiceInterface $searchService
) {
}
/**
* Search with semantic enrichment.
*
* @param array<string> $collections
* @return array<array<string, mixed>>
*
* @throws \RuntimeException When search fails
*/
public function search(string $query, array $collections, int $limit): array
{
if ($collections === [] || !in_array('documents', $collections, true)) {
return [];
}
$results = $this->searchService->search($query, [], $limit);
$formatted = [];
foreach ($results as $result) {
$formatted[] = [
'chunk_id' => $result['chunk_id'],
'content' => $result['content'],
'title' => $result['source_path'] ?? $result['heading_path'] ?? 'Unbekannt',
'score' => $result['relevance_score'],
'summary' => $result['summary'] ?? null,
'keywords' => $result['keywords'] ?? [],
'intent' => $result['intent'] ?? null,
'discourse_role' => $result['discourse_role'] ?? null,
'sentiment' => $result['sentiment'] ?? null,
'frame' => $result['frame'] ?? null,
'_collection' => 'documents',
];
}
return $formatted;
}
/**
* Build context string from search results.
*
* @param array<array<string, mixed>> $searchResults
*/
public function buildContext(array $searchResults): string
{
if ($searchResults === []) {
return '';
}
$contextParts = [];
$totalChars = 0;
$maxChars = self::MAX_CONTEXT_TOKENS * self::CHARS_PER_TOKEN;
foreach ($searchResults as $index => $result) {
$content = (string) ($result['content'] ?? '');
$title = (string) ($result['title'] ?? 'Unbekannt');
if ($totalChars + strlen($content) > $maxChars) {
break;
}
$header = $this->buildSemanticHeader($index + 1, $title, $result);
$contextParts[] = $header . "\n" . $content;
$totalChars += strlen($content);
}
return implode("\n\n---\n\n", $contextParts);
}
/**
* Extract deduplicated sources from results.
*
* @param array<array<string, mixed>> $searchResults
* @return array<array<string, mixed>>
*/
public function extractSources(array $searchResults): array
{
$sources = [];
$seen = [];
foreach ($searchResults as $result) {
$title = (string) ($result['title'] ?? '');
if ($title === '' || isset($seen[$title])) {
continue;
}
$source = [
'title' => $title,
'score' => round((float) ($result['score'] ?? 0), 3),
];
$content = $result['content'] ?? null;
if (is_string($content) && $content !== '') {
$source['content'] = mb_substr($content, 0, 300);
}
if (isset($result['_collection'])) {
$source['collection'] = $result['_collection'];
}
if (isset($result['discourse_role'])) {
$source['discourse_role'] = $result['discourse_role'];
}
if (isset($result['intent'])) {
$source['intent'] = $result['intent'];
}
if (isset($result['summary'])) {
$source['summary'] = $result['summary'];
}
$sources[] = $source;
$seen[$title] = true;
}
return $sources;
}
/**
* Build semantic header for context chunk.
*
* @param array<string, mixed> $result
*/
private function buildSemanticHeader(int $index, string $title, array $result): string
{
$semanticInfo = [];
if (isset($result['discourse_role'])) {
$semanticInfo[] = match ($result['discourse_role']) {
'definition' => 'Definition',
'thesis' => 'These',
'evidence' => 'Beleg',
'example' => 'Beispiel',
'summary' => 'Zusammenfassung',
default => $result['discourse_role'],
};
}
if (isset($result['intent'])) {
$semanticInfo[] = match ($result['intent']) {
'explain' => 'erklärt',
'define' => 'definiert',
'argue' => 'argumentiert',
'compare' => 'vergleicht',
'exemplify' => 'veranschaulicht',
default => $result['intent'],
};
}
$header = sprintf('[Quelle %d: %s]', $index, $title);
if ($semanticInfo !== []) {
$header .= ' (' . implode(', ', $semanticInfo) . ')';
}
return $header;
}
}