Backup #2002
| ID | 2002 |
| Dateipfad | /var/www/dev.campus.systemische-tools.de/src/Infrastructure/Docs/HybridSearchService.php |
| Version | 27 |
| Typ |
modified |
| Größe | 12.2 KB |
| Hash | 7bae5b2f29085cc3111ef5406d9a1f28ecac6ac8b5105617624aaf283ff8f749 |
| Datum | 2025-12-28 14:18:00 |
| Geändert von | claude-code-hook |
| Grund | Claude Code Pre-Hook Backup vor Edit-Operation |
| Datei existiert |
Ja
|
Dateiinhalt
<?php
declare(strict_types=1);
namespace Infrastructure\Docs;
// @responsibility: Hybrid-Suche kombiniert Qdrant-Vektoren mit SQL-Filtern
use Domain\Service\SearchServiceInterface;
use Infrastructure\AI\OllamaService;
use Infrastructure\Config\CredentialService;
use Infrastructure\Traits\JsonDecodeTrait;
use PDO;
use RuntimeException;
final class HybridSearchService implements SearchServiceInterface
{
use JsonDecodeTrait;
private const string COLLECTION = 'dokumentation_chunks';
private const int TIMEOUT = 30;
private string $qdrantHost;
public function __construct(
private PDO $pdo,
private OllamaService $ollama
) {
$this->qdrantHost = CredentialService::getQdrantHost();
}
/** Hybrid search combining semantic vectors with SQL filters. */
public function search(string $query, array $filters = [], int $limit = 10): array
{
// Stage 1: Semantic search in Qdrant
$vectorResults = $this->semanticSearch($query, $filters, $limit * 3);
if (empty($vectorResults)) {
return [];
}
// Stage 2: Enrich with SQL data and apply filters
$enrichedResults = $this->enrichAndFilter($vectorResults, $filters);
// Stage 3: Re-rank based on combined score
$rankedResults = $this->rerank($enrichedResults, $query);
return array_slice($rankedResults, 0, $limit);
}
/** Searches within a specific taxonomy category. */
public function searchByCategory(string $query, string $category, int $limit = 10): array
{
return $this->search($query, ['taxonomy_category' => $category], $limit);
}
/** Searches for chunks containing a specific entity. */
public function searchByEntity(string $query, string $entityName, int $limit = 10): array
{
return $this->search($query, ['entity_name' => $entityName], $limit);
}
/** Searches for chunks with specific intent (explain, argue, define, etc.). */
public function searchByIntent(string $query, string $intent, int $limit = 10): array
{
return $this->search($query, ['intent' => $intent], $limit);
}
/** Searches for definition chunks only. */
public function searchDefinitions(string $query, int $limit = 10): array
{
return $this->search($query, ['discourse_role' => 'definition'], $limit);
}
/** Searches for evidence/example chunks for a topic. */
public function searchEvidence(string $query, int $limit = 10): array
{
return $this->search($query, ['discourse_role' => 'evidence'], $limit);
}
/** Gets all available taxonomy categories with counts. */
public function getTaxonomyCategories(): array
{
$stmt = $this->pdo->query('
SELECT taxonomy_category as category, COUNT(*) as count
FROM dokumentation_chunks
WHERE taxonomy_category IS NOT NULL
GROUP BY taxonomy_category
ORDER BY count DESC
');
return $stmt->fetchAll(PDO::FETCH_ASSOC);
}
/** Gets all entities grouped by type. */
public function getEntitiesByType(): array
{
$stmt = $this->pdo->query("
SELECT entities FROM dokumentation_chunks
WHERE entities IS NOT NULL AND entities != '[]'
");
$byType = [];
foreach ($stmt->fetchAll(PDO::FETCH_ASSOC) as $row) {
$entities = $this->decodeJsonArray($row['entities'] ?? null);
foreach ($entities as $entity) {
if (isset($entity['name'], $entity['type'])) {
$type = $entity['type'];
if (!isset($byType[$type])) {
$byType[$type] = [];
}
if (!in_array($entity['name'], $byType[$type], true)) {
$byType[$type][] = $entity['name'];
}
}
}
}
return $byType;
}
/** Suggests related searches based on current results. */
public function suggestRelatedSearches(array $results): array
{
$suggestions = [];
foreach ($results as $result) {
// Add keywords from results
foreach ($result['keywords'] ?? [] as $keyword) {
if (!in_array($keyword, $suggestions, true)) {
$suggestions[] = $keyword;
}
}
// Add entity names
foreach ($result['entities'] ?? [] as $entity) {
if (isset($entity['name']) && !in_array($entity['name'], $suggestions, true)) {
$suggestions[] = $entity['name'];
}
}
}
return array_slice($suggestions, 0, 5);
}
/** Performs semantic search in Qdrant. */
private function semanticSearch(string $query, array $filters, int $limit): array
{
$embedding = $this->ollama->getEmbedding($query);
$url = sprintf('%s/collections/%s/points/search', $this->qdrantHost, self::COLLECTION);
$payload = [
'vector' => array_values($embedding),
'limit' => $limit,
'with_payload' => true,
];
// Add Qdrant filter if taxonomy category specified
if (isset($filters['taxonomy_category'])) {
$payload['filter'] = [
'must' => [
[
'key' => 'taxonomy_category',
'match' => ['value' => $filters['taxonomy_category']],
],
],
];
}
try {
$response = $this->makeRequest($url, $payload, 'POST');
if (!isset($response['result']) || !is_array($response['result'])) {
return [];
}
return array_map(static function (array $item): array {
return [
'id' => (string) $item['id'],
'score' => (float) ($item['score'] ?? 0),
'payload' => is_array($item['payload'] ?? null) ? $item['payload'] : [],
];
}, $response['result']);
} catch (RuntimeException) {
return [];
}
}
/** Enriches vector results with SQL data and applies filters. */
private function enrichAndFilter(array $vectorResults, array $filters): array
{
$results = [];
$minScore = $filters['min_score'] ?? 0.3;
foreach ($vectorResults as $vr) {
if ($vr['score'] < $minScore) {
continue;
}
$chunkId = (int) ($vr['payload']['chunk_id'] ?? 0);
if ($chunkId === 0) {
continue;
}
$chunk = $this->getChunkWithDocument($chunkId);
if ($chunk === null) {
continue;
}
// Apply entity/keyword filters
if (!$this->matchesFilters($chunk, $filters)) {
continue;
}
$results[] = [
'chunk_id' => $chunkId, 'doc_id' => (int) $chunk['dokumentation_id'],
'path' => $chunk['doc_path'] ?? '', 'title' => $chunk['doc_title'] ?? '',
'content' => $chunk['content_clean'] ?? $chunk['content'] ?? '',
'heading_path' => $this->decodeJsonArray($chunk['heading_path'] ?? null),
'taxonomy' => $this->decodeJsonArray($chunk['taxonomy_path'] ?? null),
'entities' => $this->decodeJsonArray($chunk['entities'] ?? null),
'keywords' => $this->decodeJsonArray($chunk['keywords'] ?? null),
'score' => $vr['score'], 'relevance_score' => $vr['score'],
];
}
return $results;
}
/** Checks if chunk matches entity/keyword filters. */
private function matchesFilters(array $chunk, array $filters): bool
{
if (isset($filters['entity_name'])) {
$entities = $this->decodeJsonArray($chunk['entities'] ?? null);
$found = false;
foreach ($entities as $e) {
if (isset($e['name']) && stripos($e['name'], $filters['entity_name']) !== false) {
$found = true;
break;
}
}
if (!$found) {
return false;
}
}
if (isset($filters['entity_type'])) {
$entities = $this->decodeJsonArray($chunk['entities'] ?? null);
$found = false;
foreach ($entities as $e) {
if (isset($e['type']) && strtoupper($e['type']) === strtoupper($filters['entity_type'])) {
$found = true;
break;
}
}
if (!$found) {
return false;
}
}
if (isset($filters['keyword'])) {
$keywords = $this->decodeJsonArray($chunk['keywords'] ?? null);
$found = false;
foreach ($keywords as $kw) {
if (stripos($kw, $filters['keyword']) !== false) {
$found = true;
break;
}
}
if (!$found) {
return false;
}
}
return true;
}
/** Re-ranks results based on combined semantic and structural relevance. */
private function rerank(array $results, string $query): array
{
$queryWords = array_filter(preg_split('/\s+/', strtolower($query)) ?: []);
foreach ($results as &$result) {
$boost = 0.0;
foreach ($result['keywords'] as $kw) {
foreach ($queryWords as $w) {
if (stripos($kw, $w) !== false) { $boost += 0.05; }
}
}
foreach ($result['entities'] as $e) {
if (isset($e['name'])) {
foreach ($queryWords as $w) {
if (stripos($e['name'], $w) !== false) { $boost += 0.03; }
}
}
}
foreach ($queryWords as $w) {
if (stripos($result['title'], $w) !== false) { $boost += 0.1; }
}
$result['relevance_score'] = min(1.0, $result['score'] + $boost);
}
usort($results, static fn (array $a, array $b): int => $b['relevance_score'] <=> $a['relevance_score']);
return $results;
}
/** Gets chunk with document data. */
private function getChunkWithDocument(int $chunkId): ?array
{
$stmt = $this->pdo->prepare('
SELECT c.*, d.title as doc_title, d.path as doc_path
FROM dokumentation_chunks c
JOIN dokumentation d ON c.dokumentation_id = d.id
WHERE c.id = :id
');
$stmt->execute(['id' => $chunkId]);
$result = $stmt->fetch(PDO::FETCH_ASSOC);
return $result !== false ? $result : null;
}
/** Makes an HTTP request to Qdrant. */
private function makeRequest(string $url, array $payload, string $method): array
{
$ch = curl_init($url);
if ($ch === false) {
throw new RuntimeException('Failed to initialize cURL');
}
$jsonPayload = json_encode($payload);
if ($jsonPayload === false) {
curl_close($ch);
throw new RuntimeException('Failed to encode JSON payload');
}
curl_setopt_array($ch, [
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TIMEOUT => self::TIMEOUT,
CURLOPT_CONNECTTIMEOUT => 10,
CURLOPT_CUSTOMREQUEST => $method,
CURLOPT_POSTFIELDS => $jsonPayload,
CURLOPT_HTTPHEADER => [
'Content-Type: application/json',
'Content-Length: ' . strlen($jsonPayload),
],
]);
$result = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$curlError = curl_error($ch);
curl_close($ch);
if ($result === false) {
throw new RuntimeException(sprintf('cURL request failed: %s', $curlError ?: 'Unknown error'));
}
if ($httpCode >= 400) {
throw new RuntimeException(sprintf('Qdrant API returned HTTP %d', $httpCode));
}
$decoded = json_decode((string) $result, true);
return is_array($decoded) ? $decoded : [];
}
}
Vollständig herunterladen
Aktionen
Andere Versionen dieser Datei
| ID |
Version |
Typ |
Größe |
Datum |
| 2016 |
33 |
modified |
12.3 KB |
2025-12-28 14:24 |
| 2015 |
32 |
modified |
13.8 KB |
2025-12-28 14:23 |
| 2014 |
31 |
modified |
14.3 KB |
2025-12-28 14:23 |
| 2013 |
30 |
modified |
14.4 KB |
2025-12-28 14:22 |
| 2006 |
29 |
modified |
12.9 KB |
2025-12-28 14:18 |
| 2003 |
28 |
modified |
12.4 KB |
2025-12-28 14:18 |
| 2002 |
27 |
modified |
12.2 KB |
2025-12-28 14:18 |
| 2001 |
26 |
modified |
11.5 KB |
2025-12-28 14:17 |
| 1974 |
25 |
modified |
11.8 KB |
2025-12-28 02:33 |
| 1973 |
24 |
modified |
11.9 KB |
2025-12-28 02:32 |
| 1972 |
23 |
modified |
12.0 KB |
2025-12-28 02:32 |
| 1971 |
22 |
modified |
12.1 KB |
2025-12-28 02:31 |
| 1970 |
21 |
modified |
12.2 KB |
2025-12-28 02:31 |
| 1969 |
20 |
modified |
12.4 KB |
2025-12-28 02:31 |
| 1968 |
19 |
modified |
12.6 KB |
2025-12-28 02:31 |
| 1967 |
18 |
modified |
12.7 KB |
2025-12-28 02:30 |
| 1966 |
17 |
modified |
12.7 KB |
2025-12-28 02:30 |
| 1965 |
16 |
modified |
12.9 KB |
2025-12-28 02:30 |
| 1964 |
15 |
modified |
13.5 KB |
2025-12-28 02:30 |
| 1510 |
14 |
modified |
13.5 KB |
2025-12-25 18:21 |
| 1509 |
13 |
modified |
13.7 KB |
2025-12-25 18:21 |
| 1502 |
12 |
modified |
13.6 KB |
2025-12-25 17:48 |
| 854 |
11 |
modified |
13.7 KB |
2025-12-23 08:46 |
| 853 |
10 |
modified |
13.8 KB |
2025-12-23 08:46 |
| 787 |
9 |
modified |
14.0 KB |
2025-12-23 08:05 |
| 366 |
8 |
modified |
14.0 KB |
2025-12-22 08:24 |
| 365 |
7 |
modified |
13.9 KB |
2025-12-22 08:24 |
| 332 |
6 |
modified |
13.9 KB |
2025-12-22 08:09 |
| 331 |
5 |
modified |
13.9 KB |
2025-12-22 08:09 |
| 330 |
4 |
modified |
13.9 KB |
2025-12-22 08:09 |
| 329 |
3 |
modified |
13.9 KB |
2025-12-22 08:09 |
| 38 |
2 |
modified |
14.7 KB |
2025-12-20 17:24 |
| 26 |
1 |
modified |
14.7 KB |
2025-12-20 17:17 |
← Zurück zur Übersicht