HybridSearchService.php
- Pfad:
src/Infrastructure/Docs/HybridSearchService.php
- Namespace: Infrastructure\Docs
- Zeilen: 353 | Größe: 12,550 Bytes
- Geändert: 2025-12-30 03:15:35 | Gescannt: 2025-12-31 10:22:15
Code Hygiene Score: 68
- Dependencies: 100 (25%)
- LOC: 0 (20%)
- Methods: 40 (20%)
- Secrets: 100 (15%)
- Classes: 100 (10%)
- Magic Numbers: 100 (10%)
Issues 1
| Zeile |
Typ |
Beschreibung |
| - |
complexity |
Datei hat 353 Zeilen (max: 350) |
Dependencies 10
- implements Domain\Service\SearchServiceInterface
- trait Infrastructure\Traits\JsonDecodeTrait
- constructor PDO
- constructor Infrastructure\AI\OllamaService
- constructor Infrastructure\AI\QdrantClient
- use Domain\Service\SearchServiceInterface
- use Infrastructure\AI\OllamaService
- use Infrastructure\AI\QdrantClient
- use Infrastructure\Traits\JsonDecodeTrait
- use PDO
Klassen 1
-
HybridSearchService
class
Zeile 15
Funktionen 16
-
__construct()
public
Zeile 21
-
search()
public
Zeile 29
-
searchByCategory()
public
Zeile 48
-
searchByEntity()
public
Zeile 54
-
searchByIntent()
public
Zeile 60
-
searchDefinitions()
public
Zeile 66
-
searchEvidence()
public
Zeile 72
-
getTaxonomyCategories()
public
Zeile 78
-
getEntitiesByType()
public
Zeile 92
-
suggestRelatedSearches()
public
Zeile 120
-
semanticSearch()
private
Zeile 144
-
enrichAndFilter()
private
Zeile 168
-
matchesFilters()
private
Zeile 209
-
rerank()
private
Zeile 265
-
isDefinitionQuery()
private
Zeile 325
-
getChunkWithDocument()
private
Zeile 339
Verwendet von 4
Versionen 33
-
v33
2025-12-28 14:24 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v32
2025-12-28 14:23 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v31
2025-12-28 14:23 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v30
2025-12-28 14:22 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v29
2025-12-28 14:18 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v28
2025-12-28 14:18 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v27
2025-12-28 14:18 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v26
2025-12-28 14:17 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v25
2025-12-28 02:33 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v24
2025-12-28 02:32 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v23
2025-12-28 02:32 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v22
2025-12-28 02:31 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v21
2025-12-28 02:31 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v20
2025-12-28 02:31 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v19
2025-12-28 02:31 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v18
2025-12-28 02:30 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v17
2025-12-28 02:30 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v16
2025-12-28 02:30 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v15
2025-12-28 02:30 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v14
2025-12-25 18:21 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v13
2025-12-25 18:21 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v12
2025-12-25 17:48 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v11
2025-12-23 08:46 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v10
2025-12-23 08:46 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v9
2025-12-23 08:05 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v8
2025-12-22 08:24 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v7
2025-12-22 08:24 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v6
2025-12-22 08:09 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v5
2025-12-22 08:09 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v4
2025-12-22 08:09 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v3
2025-12-22 08:09 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v2
2025-12-20 17:24 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
-
v1
2025-12-20 17:17 | claude-code-hook | modified
Claude Code Pre-Hook Backup vor Edit-Operation
Code
<?php
declare(strict_types=1);
namespace Infrastructure\Docs;
// @responsibility: Hybrid-Suche kombiniert Qdrant-Vektoren mit SQL-Filtern
use Domain\Service\SearchServiceInterface;
use Infrastructure\AI\OllamaService;
use Infrastructure\AI\QdrantClient;
use Infrastructure\Traits\JsonDecodeTrait;
use PDO;
final class HybridSearchService implements SearchServiceInterface
{
use JsonDecodeTrait;
private const string COLLECTION = 'dokumentation_chunks';
public function __construct(
private PDO $pdo,
private OllamaService $ollama,
private QdrantClient $qdrant
) {
}
/** Hybrid search combining semantic vectors with SQL filters. */
public function search(string $query, array $filters = [], int $limit = 10): array
{
// Stage 1: Semantic search in Qdrant
$vectorResults = $this->semanticSearch($query, $filters, $limit * 3);
if (empty($vectorResults)) {
return [];
}
// Stage 2: Enrich with SQL data and apply filters
$enrichedResults = $this->enrichAndFilter($vectorResults, $filters);
// Stage 3: Re-rank based on combined score
$rankedResults = $this->rerank($enrichedResults, $query);
return array_slice($rankedResults, 0, $limit);
}
/** Searches within a specific taxonomy category. */
public function searchByCategory(string $query, string $category, int $limit = 10): array
{
return $this->search($query, ['taxonomy_category' => $category], $limit);
}
/** Searches for chunks containing a specific entity. */
public function searchByEntity(string $query, string $entityName, int $limit = 10): array
{
return $this->search($query, ['entity_name' => $entityName], $limit);
}
/** Searches for chunks with specific intent (explain, argue, define, etc.). */
public function searchByIntent(string $query, string $intent, int $limit = 10): array
{
return $this->search($query, ['intent' => $intent], $limit);
}
/** Searches for definition chunks only. */
public function searchDefinitions(string $query, int $limit = 10): array
{
return $this->search($query, ['discourse_role' => 'definition'], $limit);
}
/** Searches for evidence/example chunks for a topic. */
public function searchEvidence(string $query, int $limit = 10): array
{
return $this->search($query, ['discourse_role' => 'evidence'], $limit);
}
/** Gets all available taxonomy categories with counts. */
public function getTaxonomyCategories(): array
{
$stmt = $this->pdo->query('
SELECT taxonomy_category as category, COUNT(*) as count
FROM dokumentation_chunks
WHERE taxonomy_category IS NOT NULL
GROUP BY taxonomy_category
ORDER BY count DESC
');
return $stmt->fetchAll(PDO::FETCH_ASSOC);
}
/** Gets all entities grouped by type. */
public function getEntitiesByType(): array
{
$stmt = $this->pdo->query("
SELECT entities FROM dokumentation_chunks
WHERE entities IS NOT NULL AND entities != '[]'
");
$byType = [];
foreach ($stmt->fetchAll(PDO::FETCH_ASSOC) as $row) {
$entities = $this->decodeJsonArray($row['entities'] ?? null);
foreach ($entities as $entity) {
if (isset($entity['name'], $entity['type'])) {
$type = $entity['type'];
if (!isset($byType[$type])) {
$byType[$type] = [];
}
if (!in_array($entity['name'], $byType[$type], true)) {
$byType[$type][] = $entity['name'];
}
}
}
}
return $byType;
}
/** Suggests related searches based on current results. */
public function suggestRelatedSearches(array $results): array
{
$suggestions = [];
foreach ($results as $result) {
// Add keywords from results
foreach ($result['keywords'] ?? [] as $keyword) {
if (!in_array($keyword, $suggestions, true)) {
$suggestions[] = $keyword;
}
}
// Add entity names
foreach ($result['entities'] ?? [] as $entity) {
if (isset($entity['name']) && !in_array($entity['name'], $suggestions, true)) {
$suggestions[] = $entity['name'];
}
}
}
return array_slice($suggestions, 0, 5);
}
/** Performs semantic search in Qdrant. */
private function semanticSearch(string $query, array $filters, int $limit): array
{
$embedding = $this->ollama->getEmbedding($query);
// Build Qdrant filter if taxonomy category specified
$qdrantFilter = null;
if (isset($filters['taxonomy_category'])) {
$qdrantFilter = [
'must' => [
['key' => 'taxonomy_category', 'match' => ['value' => $filters['taxonomy_category']]],
],
];
}
$results = $this->qdrant->search(self::COLLECTION, $embedding, $limit, $qdrantFilter);
return array_map(static fn (array $item): array => [
'id' => (string) $item['id'],
'score' => (float) ($item['score'] ?? 0),
'payload' => is_array($item['payload'] ?? null) ? $item['payload'] : [],
], $results);
}
/** Enriches vector results with SQL data and applies filters. */
private function enrichAndFilter(array $vectorResults, array $filters): array
{
$results = [];
$minScore = $filters['min_score'] ?? 0.3;
foreach ($vectorResults as $vr) {
if ($vr['score'] < $minScore) {
continue;
}
$chunkId = (int) ($vr['payload']['chunk_id'] ?? 0);
if ($chunkId === 0) {
continue;
}
$chunk = $this->getChunkWithDocument($chunkId);
if ($chunk === null) {
continue;
}
// Apply entity/keyword filters
if (!$this->matchesFilters($chunk, $filters)) {
continue;
}
$results[] = [
'chunk_id' => $chunkId, 'doc_id' => (int) $chunk['dokumentation_id'],
'path' => $chunk['doc_path'] ?? '', 'title' => $chunk['doc_title'] ?? '',
'content' => $chunk['content_clean'] ?? $chunk['content'] ?? '',
'heading_path' => $this->decodeJsonArray($chunk['heading_path'] ?? null),
'taxonomy' => $this->decodeJsonArray($chunk['taxonomy_path'] ?? null),
'entities' => $this->decodeJsonArray($chunk['entities'] ?? null),
'keywords' => $this->decodeJsonArray($chunk['keywords'] ?? null),
// Semantic metadata
'summary' => $chunk['summary'] ?? null,
'sentiment' => $chunk['sentiment'] ?? 'neutral',
'intent' => $chunk['intent'] ?? null,
'discourse_role' => $chunk['discourse_role'] ?? null,
'score' => $vr['score'], 'relevance_score' => $vr['score'],
];
}
return $results;
}
/** Checks if chunk matches entity/keyword filters. */
private function matchesFilters(array $chunk, array $filters): bool
{
if (isset($filters['entity_name'])) {
$entities = $this->decodeJsonArray($chunk['entities'] ?? null);
$found = false;
foreach ($entities as $e) {
if (isset($e['name']) && stripos($e['name'], $filters['entity_name']) !== false) {
$found = true;
break;
}
}
if (!$found) {
return false;
}
}
if (isset($filters['entity_type'])) {
$entities = $this->decodeJsonArray($chunk['entities'] ?? null);
$found = false;
foreach ($entities as $e) {
if (isset($e['type']) && strtoupper($e['type']) === strtoupper($filters['entity_type'])) {
$found = true;
break;
}
}
if (!$found) {
return false;
}
}
if (isset($filters['keyword'])) {
$keywords = $this->decodeJsonArray($chunk['keywords'] ?? null);
$found = false;
foreach ($keywords as $kw) {
if (stripos($kw, $filters['keyword']) !== false) {
$found = true;
break;
}
}
if (!$found) {
return false;
}
}
// Semantic filters
if (isset($filters['intent']) && ($chunk['intent'] ?? null) !== $filters['intent']) {
return false;
}
if (isset($filters['discourse_role']) && ($chunk['discourse_role'] ?? null) !== $filters['discourse_role']) {
return false;
}
if (isset($filters['sentiment']) && ($chunk['sentiment'] ?? null) !== $filters['sentiment']) {
return false;
}
return true;
}
/** Re-ranks results based on combined semantic and structural relevance. */
private function rerank(array $results, string $query): array
{
$queryWords = array_filter(preg_split('/\s+/', strtolower($query)) ?: []);
$isDefinitionQuery = $this->isDefinitionQuery($query);
foreach ($results as &$result) {
$boost = 0.0;
// Keyword matching boost
foreach ($result['keywords'] as $kw) {
foreach ($queryWords as $w) {
if (stripos($kw, $w) !== false) {
$boost += 0.05;
}
}
}
// Entity matching boost
foreach ($result['entities'] as $e) {
if (isset($e['name'])) {
foreach ($queryWords as $w) {
if (stripos($e['name'], $w) !== false) {
$boost += 0.03;
}
}
}
}
// Title matching boost
foreach ($queryWords as $w) {
if (stripos($result['title'], $w) !== false) {
$boost += 0.1;
}
}
// Semantic boost based on discourse role
$discourseRole = $result['discourse_role'] ?? null;
if ($isDefinitionQuery && $discourseRole === 'definition') {
$boost += 0.15; // Strong boost for definitions when asking "was ist"
} elseif ($discourseRole === 'thesis') {
$boost += 0.08; // Thesis statements are valuable
} elseif ($discourseRole === 'evidence') {
$boost += 0.05; // Evidence supports claims
}
// Intent boost - explanations are generally more useful
$intent = $result['intent'] ?? null;
if ($intent === 'explain' || $intent === 'define') {
$boost += 0.05;
}
$result['relevance_score'] = min(1.0, $result['score'] + $boost);
}
usort($results, static fn (array $a, array $b): int => $b['relevance_score'] <=> $a['relevance_score']);
return $results;
}
/** Detects if query is asking for a definition. */
private function isDefinitionQuery(string $query): bool
{
$patterns = ['/^was\s+ist\b/i', '/^was\s+sind\b/i', '/^was\s+bedeutet\b/i',
'/^definition\b/i', '/^erkl[äa]r/i', '/^beschreib/i'];
foreach ($patterns as $p) {
if (preg_match($p, $query)) {
return true;
}
}
return false;
}
/** Gets chunk with document data. */
private function getChunkWithDocument(int $chunkId): ?array
{
$stmt = $this->pdo->prepare('
SELECT c.*, d.title as doc_title, d.path as doc_path
FROM dokumentation_chunks c
JOIN dokumentation d ON c.dokumentation_id = d.id
WHERE c.id = :id
');
$stmt->execute(['id' => $chunkId]);
$result = $stmt->fetch(PDO::FETCH_ASSOC);
return $result !== false ? $result : null;
}
}