{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/src\/UseCases\/Chat\/RagContextBuilder.php",
"content": "<?php\n\ndeclare(strict_types=1);\n\nnamespace UseCases\\Chat;\n\n\/\/ @responsibility: Baut RAG-Kontext aus semantischer Suche\n\nuse Domain\\Service\\SearchServiceInterface;\n\nfinal class RagContextBuilder\n{\n private const MAX_CONTEXT_TOKENS = 3000;\n private const CHARS_PER_TOKEN = 4;\n\n public function __construct(\n private SearchServiceInterface $searchService\n ) {\n }\n\n \/**\n * Search with semantic enrichment.\n *\n * @param array<string> $collections\n * @return array<array<string, mixed>>\n *\n * @throws \\RuntimeException When search fails\n *\/\n public function search(string $query, array $collections, int $limit): array\n {\n if ($collections === [] || !in_array('documents', $collections, true)) {\n return [];\n }\n\n $results = $this->searchService->search($query, [], $limit);\n $formatted = [];\n\n foreach ($results as $result) {\n $formatted[] = [\n 'chunk_id' => $result['chunk_id'],\n 'content' => $result['content'],\n 'title' => $result['source_path'] ?? $result['heading_path'] ?? 'Unbekannt',\n 'score' => $result['relevance_score'],\n 'summary' => $result['summary'] ?? null,\n 'keywords' => $result['keywords'] ?? [],\n 'intent' => $result['intent'] ?? null,\n 'discourse_role' => $result['discourse_role'] ?? null,\n 'sentiment' => $result['sentiment'] ?? null,\n 'frame' => $result['frame'] ?? null,\n '_collection' => 'documents',\n ];\n }\n\n return $formatted;\n }\n\n \/**\n * Build context string from search results.\n *\n * @param array<array<string, mixed>> $searchResults\n *\/\n public function buildContext(array $searchResults): string\n {\n if ($searchResults === []) {\n return '';\n }\n\n $contextParts = [];\n $totalChars = 0;\n $maxChars = self::MAX_CONTEXT_TOKENS * self::CHARS_PER_TOKEN;\n\n foreach ($searchResults as $index => $result) {\n $content = (string) ($result['content'] ?? '');\n $title = (string) ($result['title'] ?? 'Unbekannt');\n\n if ($totalChars + strlen($content) > $maxChars) {\n break;\n }\n\n $header = $this->buildSemanticHeader($index + 1, $title, $result);\n $contextParts[] = $header . \"\\n\" . $content;\n $totalChars += strlen($content);\n }\n\n return implode(\"\\n\\n---\\n\\n\", $contextParts);\n }\n\n \/**\n * Extract deduplicated sources from results.\n *\n * @param array<array<string, mixed>> $searchResults\n * @return array<array<string, mixed>>\n *\/\n public function extractSources(array $searchResults): array\n {\n $sources = [];\n $seen = [];\n\n foreach ($searchResults as $result) {\n $title = (string) ($result['title'] ?? '');\n if ($title === '' || isset($seen[$title])) {\n continue;\n }\n\n $source = [\n 'title' => $title,\n 'score' => round((float) ($result['score'] ?? 0), 3),\n ];\n\n $content = $result['content'] ?? null;\n if (is_string($content) && $content !== '') {\n $source['content'] = mb_substr($content, 0, 300);\n }\n\n if (isset($result['_collection'])) {\n $source['collection'] = $result['_collection'];\n }\n\n if (isset($result['discourse_role'])) {\n $source['discourse_role'] = $result['discourse_role'];\n }\n if (isset($result['intent'])) {\n $source['intent'] = $result['intent'];\n }\n if (isset($result['summary'])) {\n $source['summary'] = $result['summary'];\n }\n\n $sources[] = $source;\n $seen[$title] = true;\n }\n\n return $sources;\n }\n\n \/**\n * Build semantic header for context chunk.\n *\n * @param array<string, mixed> $result\n *\/\n private function buildSemanticHeader(int $index, string $title, array $result): string\n {\n $semanticInfo = [];\n\n if (isset($result['discourse_role'])) {\n $semanticInfo[] = match ($result['discourse_role']) {\n 'definition' => 'Definition',\n 'thesis' => 'These',\n 'evidence' => 'Beleg',\n 'example' => 'Beispiel',\n 'summary' => 'Zusammenfassung',\n default => $result['discourse_role'],\n };\n }\n\n if (isset($result['intent'])) {\n $semanticInfo[] = match ($result['intent']) {\n 'explain' => 'erklärt',\n 'define' => 'definiert',\n 'argue' => 'argumentiert',\n 'compare' => 'vergleicht',\n 'exemplify' => 'veranschaulicht',\n default => $result['intent'],\n };\n }\n\n $header = sprintf('[Quelle %d: %s]', $index, $title);\n if ($semanticInfo !== []) {\n $header .= ' (' . implode(', ', $semanticInfo) . ')';\n }\n\n return $header;\n }\n}\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/src\/UseCases\/Chat\/RagContextBuilder.php",
"content": "<?php\n\ndeclare(strict_types=1);\n\nnamespace UseCases\\Chat;\n\n\/\/ @responsibility: Baut RAG-Kontext aus semantischer Suche\n\nuse Domain\\Service\\SearchServiceInterface;\n\nfinal class RagContextBuilder\n{\n private const MAX_CONTEXT_TOKENS = 3000;\n private const CHARS_PER_TOKEN = 4;\n\n public function __construct(\n private SearchServiceInterface $searchService\n ) {\n }\n\n \/**\n * Search with semantic enrichment.\n *\n * @param array<string> $collections\n * @return array<array<string, mixed>>\n *\n * @throws \\RuntimeException When search fails\n *\/\n public function search(string $query, array $collections, int $limit): array\n {\n if ($collections === [] || !in_array('documents', $collections, true)) {\n return [];\n }\n\n $results = $this->searchService->search($query, [], $limit);\n $formatted = [];\n\n foreach ($results as $result) {\n $formatted[] = [\n 'chunk_id' => $result['chunk_id'],\n 'content' => $result['content'],\n 'title' => $result['source_path'] ?? $result['heading_path'] ?? 'Unbekannt',\n 'score' => $result['relevance_score'],\n 'summary' => $result['summary'] ?? null,\n 'keywords' => $result['keywords'] ?? [],\n 'intent' => $result['intent'] ?? null,\n 'discourse_role' => $result['discourse_role'] ?? null,\n 'sentiment' => $result['sentiment'] ?? null,\n 'frame' => $result['frame'] ?? null,\n '_collection' => 'documents',\n ];\n }\n\n return $formatted;\n }\n\n \/**\n * Build context string from search results.\n *\n * @param array<array<string, mixed>> $searchResults\n *\/\n public function buildContext(array $searchResults): string\n {\n if ($searchResults === []) {\n return '';\n }\n\n $contextParts = [];\n $totalChars = 0;\n $maxChars = self::MAX_CONTEXT_TOKENS * self::CHARS_PER_TOKEN;\n\n foreach ($searchResults as $index => $result) {\n $content = (string) ($result['content'] ?? '');\n $title = (string) ($result['title'] ?? 'Unbekannt');\n\n if ($totalChars + strlen($content) > $maxChars) {\n break;\n }\n\n $header = $this->buildSemanticHeader($index + 1, $title, $result);\n $contextParts[] = $header . \"\\n\" . $content;\n $totalChars += strlen($content);\n }\n\n return implode(\"\\n\\n---\\n\\n\", $contextParts);\n }\n\n \/**\n * Extract deduplicated sources from results.\n *\n * @param array<array<string, mixed>> $searchResults\n * @return array<array<string, mixed>>\n *\/\n public function extractSources(array $searchResults): array\n {\n $sources = [];\n $seen = [];\n\n foreach ($searchResults as $result) {\n $title = (string) ($result['title'] ?? '');\n if ($title === '' || isset($seen[$title])) {\n continue;\n }\n\n $source = [\n 'title' => $title,\n 'score' => round((float) ($result['score'] ?? 0), 3),\n ];\n\n $content = $result['content'] ?? null;\n if (is_string($content) && $content !== '') {\n $source['content'] = mb_substr($content, 0, 300);\n }\n\n if (isset($result['_collection'])) {\n $source['collection'] = $result['_collection'];\n }\n\n if (isset($result['discourse_role'])) {\n $source['discourse_role'] = $result['discourse_role'];\n }\n if (isset($result['intent'])) {\n $source['intent'] = $result['intent'];\n }\n if (isset($result['summary'])) {\n $source['summary'] = $result['summary'];\n }\n\n $sources[] = $source;\n $seen[$title] = true;\n }\n\n return $sources;\n }\n\n \/**\n * Build semantic header for context chunk.\n *\n * @param array<string, mixed> $result\n *\/\n private function buildSemanticHeader(int $index, string $title, array $result): string\n {\n $semanticInfo = [];\n\n if (isset($result['discourse_role'])) {\n $semanticInfo[] = match ($result['discourse_role']) {\n 'definition' => 'Definition',\n 'thesis' => 'These',\n 'evidence' => 'Beleg',\n 'example' => 'Beispiel',\n 'summary' => 'Zusammenfassung',\n default => $result['discourse_role'],\n };\n }\n\n if (isset($result['intent'])) {\n $semanticInfo[] = match ($result['intent']) {\n 'explain' => 'erklärt',\n 'define' => 'definiert',\n 'argue' => 'argumentiert',\n 'compare' => 'vergleicht',\n 'exemplify' => 'veranschaulicht',\n default => $result['intent'],\n };\n }\n\n $header = sprintf('[Quelle %d: %s]', $index, $title);\n if ($semanticInfo !== []) {\n $header .= ' (' . implode(', ', $semanticInfo) . ')';\n }\n\n return $header;\n }\n}\n",
"structuredPatch": [],
"originalFile": null
}
}