{
"tool_response": {
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Docs\/HybridSearchService.php",
"oldString": " \/** Re-ranks results based on combined semantic and structural relevance. *\/\n private function rerank(array $results, string $query): array\n {\n $queryWords = array_filter(preg_split('\/\\s+\/', strtolower($query)) ?: []);\n foreach ($results as &$result) {\n $boost = 0.0;\n foreach ($result['keywords'] as $kw) {\n foreach ($queryWords as $w) {\n if (stripos($kw, $w) !== false) { $boost += 0.05; }\n }\n }\n foreach ($result['entities'] as $e) {\n if (isset($e['name'])) {\n foreach ($queryWords as $w) {\n if (stripos($e['name'], $w) !== false) { $boost += 0.03; }\n }\n }\n }\n foreach ($queryWords as $w) {\n if (stripos($result['title'], $w) !== false) { $boost += 0.1; }\n }\n $result['relevance_score'] = min(1.0, $result['score'] + $boost);\n }\n usort($results, static fn (array $a, array $b): int => $b['relevance_score'] <=> $a['relevance_score']);\n return $results;\n }",
"newString": " \/** Re-ranks results based on combined semantic and structural relevance. *\/\n private function rerank(array $results, string $query): array\n {\n $queryWords = array_filter(preg_split('\/\\s+\/', strtolower($query)) ?: []);\n $isDefinitionQuery = $this->isDefinitionQuery($query);\n\n foreach ($results as &$result) {\n $boost = 0.0;\n\n \/\/ Keyword matching boost\n foreach ($result['keywords'] as $kw) {\n foreach ($queryWords as $w) {\n if (stripos($kw, $w) !== false) {\n $boost += 0.05;\n }\n }\n }\n\n \/\/ Entity matching boost\n foreach ($result['entities'] as $e) {\n if (isset($e['name'])) {\n foreach ($queryWords as $w) {\n if (stripos($e['name'], $w) !== false) {\n $boost += 0.03;\n }\n }\n }\n }\n\n \/\/ Title matching boost\n foreach ($queryWords as $w) {\n if (stripos($result['title'], $w) !== false) {\n $boost += 0.1;\n }\n }\n\n \/\/ Semantic boost based on discourse role\n $discourseRole = $result['discourse_role'] ?? null;\n if ($isDefinitionQuery && $discourseRole === 'definition') {\n $boost += 0.15; \/\/ Strong boost for definitions when asking \"was ist\"\n } elseif ($discourseRole === 'thesis') {\n $boost += 0.08; \/\/ Thesis statements are valuable\n } elseif ($discourseRole === 'evidence') {\n $boost += 0.05; \/\/ Evidence supports claims\n }\n\n \/\/ Intent boost - explanations are generally more useful\n $intent = $result['intent'] ?? null;\n if ($intent === 'explain' || $intent === 'define') {\n $boost += 0.05;\n }\n\n $result['relevance_score'] = min(1.0, $result['score'] + $boost);\n }\n\n usort($results, static fn (array $a, array $b): int => $b['relevance_score'] <=> $a['relevance_score']);\n\n return $results;\n }\n\n \/** Detects if query is asking for a definition. *\/\n private function isDefinitionQuery(string $query): bool\n {\n $patterns = [\n '\/^was\\s+ist\\b\/i',\n '\/^was\\s+sind\\b\/i',\n '\/^was\\s+bedeutet\\b\/i',\n '\/^definition\\b\/i',\n '\/^erkl[äa]r\/i',\n '\/^beschreib\/i',\n ];\n\n foreach ($patterns as $pattern) {\n if (preg_match($pattern, $query)) {\n return true;\n }\n }\n\n return false;\n }",
"originalFile": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\Docs;\n\n\/\/ @responsibility: Hybrid-Suche kombiniert Qdrant-Vektoren mit SQL-Filtern\n\nuse Domain\\Service\\SearchServiceInterface;\nuse Infrastructure\\AI\\OllamaService;\nuse Infrastructure\\Config\\CredentialService;\nuse Infrastructure\\Traits\\JsonDecodeTrait;\nuse PDO;\nuse RuntimeException;\n\nfinal class HybridSearchService implements SearchServiceInterface\n{\n use JsonDecodeTrait;\n\n private const string COLLECTION = 'dokumentation_chunks';\n private const int TIMEOUT = 30;\n\n private string $qdrantHost;\n\n public function __construct(\n private PDO $pdo,\n private OllamaService $ollama\n ) {\n $this->qdrantHost = CredentialService::getQdrantHost();\n }\n\n \/** Hybrid search combining semantic vectors with SQL filters. *\/\n public function search(string $query, array $filters = [], int $limit = 10): array\n {\n \/\/ Stage 1: Semantic search in Qdrant\n $vectorResults = $this->semanticSearch($query, $filters, $limit * 3);\n\n if (empty($vectorResults)) {\n return [];\n }\n\n \/\/ Stage 2: Enrich with SQL data and apply filters\n $enrichedResults = $this->enrichAndFilter($vectorResults, $filters);\n\n \/\/ Stage 3: Re-rank based on combined score\n $rankedResults = $this->rerank($enrichedResults, $query);\n\n return array_slice($rankedResults, 0, $limit);\n }\n\n \/** Searches within a specific taxonomy category. *\/\n public function searchByCategory(string $query, string $category, int $limit = 10): array\n {\n return $this->search($query, ['taxonomy_category' => $category], $limit);\n }\n\n \/** Searches for chunks containing a specific entity. *\/\n public function searchByEntity(string $query, string $entityName, int $limit = 10): array\n {\n return $this->search($query, ['entity_name' => $entityName], $limit);\n }\n\n \/** Searches for chunks with specific intent (explain, argue, define, etc.). *\/\n public function searchByIntent(string $query, string $intent, int $limit = 10): array\n {\n return $this->search($query, ['intent' => $intent], $limit);\n }\n\n \/** Searches for definition chunks only. *\/\n public function searchDefinitions(string $query, int $limit = 10): array\n {\n return $this->search($query, ['discourse_role' => 'definition'], $limit);\n }\n\n \/** Searches for evidence\/example chunks for a topic. *\/\n public function searchEvidence(string $query, int $limit = 10): array\n {\n return $this->search($query, ['discourse_role' => 'evidence'], $limit);\n }\n\n \/** Gets all available taxonomy categories with counts. *\/\n public function getTaxonomyCategories(): array\n {\n $stmt = $this->pdo->query('\n SELECT taxonomy_category as category, COUNT(*) as count\n FROM dokumentation_chunks\n WHERE taxonomy_category IS NOT NULL\n GROUP BY taxonomy_category\n ORDER BY count DESC\n ');\n\n return $stmt->fetchAll(PDO::FETCH_ASSOC);\n }\n\n \/** Gets all entities grouped by type. *\/\n public function getEntitiesByType(): array\n {\n $stmt = $this->pdo->query(\"\n SELECT entities FROM dokumentation_chunks\n WHERE entities IS NOT NULL AND entities != '[]'\n \");\n\n $byType = [];\n\n foreach ($stmt->fetchAll(PDO::FETCH_ASSOC) as $row) {\n $entities = $this->decodeJsonArray($row['entities'] ?? null);\n foreach ($entities as $entity) {\n if (isset($entity['name'], $entity['type'])) {\n $type = $entity['type'];\n if (!isset($byType[$type])) {\n $byType[$type] = [];\n }\n if (!in_array($entity['name'], $byType[$type], true)) {\n $byType[$type][] = $entity['name'];\n }\n }\n }\n }\n\n return $byType;\n }\n\n \/** Suggests related searches based on current results. *\/\n public function suggestRelatedSearches(array $results): array\n {\n $suggestions = [];\n\n foreach ($results as $result) {\n \/\/ Add keywords from results\n foreach ($result['keywords'] ?? [] as $keyword) {\n if (!in_array($keyword, $suggestions, true)) {\n $suggestions[] = $keyword;\n }\n }\n\n \/\/ Add entity names\n foreach ($result['entities'] ?? [] as $entity) {\n if (isset($entity['name']) && !in_array($entity['name'], $suggestions, true)) {\n $suggestions[] = $entity['name'];\n }\n }\n }\n\n return array_slice($suggestions, 0, 5);\n }\n\n \/** Performs semantic search in Qdrant. *\/\n private function semanticSearch(string $query, array $filters, int $limit): array\n {\n $embedding = $this->ollama->getEmbedding($query);\n\n $url = sprintf('%s\/collections\/%s\/points\/search', $this->qdrantHost, self::COLLECTION);\n\n $payload = [\n 'vector' => array_values($embedding),\n 'limit' => $limit,\n 'with_payload' => true,\n ];\n\n \/\/ Add Qdrant filter if taxonomy category specified\n if (isset($filters['taxonomy_category'])) {\n $payload['filter'] = [\n 'must' => [\n [\n 'key' => 'taxonomy_category',\n 'match' => ['value' => $filters['taxonomy_category']],\n ],\n ],\n ];\n }\n\n try {\n $response = $this->makeRequest($url, $payload, 'POST');\n\n if (!isset($response['result']) || !is_array($response['result'])) {\n return [];\n }\n\n return array_map(static function (array $item): array {\n return [\n 'id' => (string) $item['id'],\n 'score' => (float) ($item['score'] ?? 0),\n 'payload' => is_array($item['payload'] ?? null) ? $item['payload'] : [],\n ];\n }, $response['result']);\n } catch (RuntimeException) {\n return [];\n }\n }\n\n \/** Enriches vector results with SQL data and applies filters. *\/\n private function enrichAndFilter(array $vectorResults, array $filters): array\n {\n $results = [];\n $minScore = $filters['min_score'] ?? 0.3;\n foreach ($vectorResults as $vr) {\n if ($vr['score'] < $minScore) {\n continue;\n }\n $chunkId = (int) ($vr['payload']['chunk_id'] ?? 0);\n if ($chunkId === 0) {\n continue;\n }\n $chunk = $this->getChunkWithDocument($chunkId);\n if ($chunk === null) {\n continue;\n }\n \/\/ Apply entity\/keyword filters\n if (!$this->matchesFilters($chunk, $filters)) {\n continue;\n }\n $results[] = [\n 'chunk_id' => $chunkId, 'doc_id' => (int) $chunk['dokumentation_id'],\n 'path' => $chunk['doc_path'] ?? '', 'title' => $chunk['doc_title'] ?? '',\n 'content' => $chunk['content_clean'] ?? $chunk['content'] ?? '',\n 'heading_path' => $this->decodeJsonArray($chunk['heading_path'] ?? null),\n 'taxonomy' => $this->decodeJsonArray($chunk['taxonomy_path'] ?? null),\n 'entities' => $this->decodeJsonArray($chunk['entities'] ?? null),\n 'keywords' => $this->decodeJsonArray($chunk['keywords'] ?? null),\n \/\/ Semantic metadata\n 'summary' => $chunk['summary'] ?? null,\n 'sentiment' => $chunk['sentiment'] ?? 'neutral',\n 'intent' => $chunk['intent'] ?? null,\n 'discourse_role' => $chunk['discourse_role'] ?? null,\n 'score' => $vr['score'], 'relevance_score' => $vr['score'],\n ];\n }\n return $results;\n }\n\n \/** Checks if chunk matches entity\/keyword filters. *\/\n private function matchesFilters(array $chunk, array $filters): bool\n {\n if (isset($filters['entity_name'])) {\n $entities = $this->decodeJsonArray($chunk['entities'] ?? null);\n $found = false;\n foreach ($entities as $e) {\n if (isset($e['name']) && stripos($e['name'], $filters['entity_name']) !== false) {\n $found = true;\n break;\n }\n }\n if (!$found) {\n return false;\n }\n }\n if (isset($filters['entity_type'])) {\n $entities = $this->decodeJsonArray($chunk['entities'] ?? null);\n $found = false;\n foreach ($entities as $e) {\n if (isset($e['type']) && strtoupper($e['type']) === strtoupper($filters['entity_type'])) {\n $found = true;\n break;\n }\n }\n if (!$found) {\n return false;\n }\n }\n if (isset($filters['keyword'])) {\n $keywords = $this->decodeJsonArray($chunk['keywords'] ?? null);\n $found = false;\n foreach ($keywords as $kw) {\n if (stripos($kw, $filters['keyword']) !== false) {\n $found = true;\n break;\n }\n }\n if (!$found) {\n return false;\n }\n }\n \/\/ Semantic filters\n if (isset($filters['intent']) && ($chunk['intent'] ?? null) !== $filters['intent']) {\n return false;\n }\n if (isset($filters['discourse_role']) && ($chunk['discourse_role'] ?? null) !== $filters['discourse_role']) {\n return false;\n }\n if (isset($filters['sentiment']) && ($chunk['sentiment'] ?? null) !== $filters['sentiment']) {\n ... [TRUNCATED-6d21dc9cfe204cef]",
"structuredPatch": [
{
"oldStart": 287,
"oldLines": 29,
"newStart": 287,
"newLines": 83,
"lines": [
" private function rerank(array $results, string $query): array",
" {",
" $queryWords = array_filter(preg_split('\/\\s+\/', strtolower($query)) ?: []);",
"+ $isDefinitionQuery = $this->isDefinitionQuery($query);",
"+",
" foreach ($results as &$result) {",
" $boost = 0.0;",
"+",
"+ \/\/ Keyword matching boost",
" foreach ($result['keywords'] as $kw) {",
" foreach ($queryWords as $w) {",
"- if (stripos($kw, $w) !== false) { $boost += 0.05; }",
"+ if (stripos($kw, $w) !== false) {",
"+ $boost += 0.05;",
"+ }",
" }",
" }",
"+",
"+ \/\/ Entity matching boost",
" foreach ($result['entities'] as $e) {",
" if (isset($e['name'])) {",
" foreach ($queryWords as $w) {",
"- if (stripos($e['name'], $w) !== false) { $boost += 0.03; }",
"+ if (stripos($e['name'], $w) !== false) {",
"+ $boost += 0.03;",
"+ }",
" }",
" }",
" }",
"+",
"+ \/\/ Title matching boost",
" foreach ($queryWords as $w) {",
"- if (stripos($result['title'], $w) !== false) { $boost += 0.1; }",
"+ if (stripos($result['title'], $w) !== false) {",
"+ $boost += 0.1;",
"+ }",
" }",
"+",
"+ \/\/ Semantic boost based on discourse role",
"+ $discourseRole = $result['discourse_role'] ?? null;",
"+ if ($isDefinitionQuery && $discourseRole === 'definition') {",
"+ $boost += 0.15; \/\/ Strong boost for definitions when asking \"was ist\"",
"+ } elseif ($discourseRole === 'thesis') {",
"+ $boost += 0.08; \/\/ Thesis statements are valuable",
"+ } elseif ($discourseRole === 'evidence') {",
"+ $boost += 0.05; \/\/ Evidence supports claims",
"+ }",
"+",
"+ \/\/ Intent boost - explanations are generally more useful",
"+ $intent = $result['intent'] ?? null;",
"+ if ($intent === 'explain' || $intent === 'define') {",
"+ $boost += 0.05;",
"+ }",
"+",
" $result['relevance_score'] = min(1.0, $result['score'] + $boost);",
" }",
"+",
" usort($results, static fn (array $a, array $b): int => $b['relevance_score'] <=> $a['relevance_score']);",
"+",
" return $results;",
" }",
" ",
"+ \/** Detects if query is asking for a definition. *\/",
"+ private function isDefinitionQuery(string $query): bool",
"+ {",
"+ $patterns = [",
"+ '\/^was\\s+ist\\b\/i',",
"+ '\/^was\\s+sind\\b\/i',",
"+ '\/^was\\s+bedeutet\\b\/i',",
"+ '\/^definition\\b\/i',",
"+ '\/^erkl[äa]r\/i',",
"+ '\/^beschreib\/i',",
"+ ];",
"+",
"+ foreach ($patterns as $pattern) {",
"+ if (preg_match($pattern, $query)) {",
"+ return true;",
"+ }",
"+ }",
"+",
"+ return false;",
"+ }",
"+",
" \/** Gets chunk with document data. *\/",
" private function getChunkWithDocument(int $chunkId): ?array",
" {"
]
}
],
"userModified": false,
"replaceAll": false
}
}