{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Search\/ContentSearchService.php",
"content": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\Search;\n\n\/\/ @responsibility: Semantische Suche über ki_content mit Chunk-Semantik\n\nuse Domain\\Service\\SearchServiceInterface;\nuse Infrastructure\\AI\\OllamaService;\nuse Infrastructure\\Config\\CredentialService;\nuse Infrastructure\\Traits\\JsonDecodeTrait;\nuse PDO;\nuse RuntimeException;\n\nfinal class ContentSearchService implements SearchServiceInterface\n{\n use JsonDecodeTrait;\n\n private const string COLLECTION = 'documents';\n private const int TIMEOUT = 30;\n private const float MIN_SCORE = 0.3;\n\n private string $qdrantHost;\n\n public function __construct(\n private PDO $pdo,\n private OllamaService $ollama\n ) {\n $this->qdrantHost = CredentialService::getQdrantHost();\n }\n\n \/** Hybrid search combining semantic vectors with SQL filters. *\/\n public function search(string $query, array $filters = [], int $limit = 10): array\n {\n \/\/ Stage 1: Semantic search in Qdrant\n $vectorResults = $this->semanticSearch($query, $limit * 3);\n\n if (empty($vectorResults)) {\n return [];\n }\n\n \/\/ Stage 2: Enrich with SQL data (chunks + semantics)\n $enrichedResults = $this->enrichWithSemantics($vectorResults, $filters);\n\n \/\/ Stage 3: Re-rank based on semantic relevance\n $rankedResults = $this->rerank($enrichedResults, $query);\n\n return array_slice($rankedResults, 0, $limit);\n }\n\n \/** Searches for definition chunks only. *\/\n public function searchDefinitions(string $query, int $limit = 10): array\n {\n return $this->search($query, ['discourse_role' => 'definition'], $limit);\n }\n\n \/** Searches for chunks with specific intent. *\/\n public function searchByIntent(string $query, string $intent, int $limit = 10): array\n {\n return $this->search($query, ['intent' => $intent], $limit);\n }\n\n \/** Searches within a taxonomy path. *\/\n public function searchByTaxonomy(string $query, string $taxonomyPath, int $limit = 10): array\n {\n return $this->search($query, ['taxonomy_path' => $taxonomyPath], $limit);\n }\n\n \/** Suggests related searches based on current results. *\/\n public function suggestRelatedSearches(array $results): array\n {\n $suggestions = [];\n\n foreach ($results as $result) {\n foreach ($result['keywords'] ?? [] as $keyword) {\n if (!in_array($keyword, $suggestions, true) && count($suggestions) < 5) {\n $suggestions[] = $keyword;\n }\n }\n }\n\n return $suggestions;\n }\n\n \/** Gets semantic statistics for the content. *\/\n public function getSemanticStats(): array\n {\n $stmt = $this->pdo->query('\n SELECT\n cs.intent,\n cs.discourse_role,\n COUNT(*) as count\n FROM ki_content.chunk_semantics cs\n WHERE cs.intent IS NOT NULL\n GROUP BY cs.intent, cs.discourse_role\n ORDER BY count DESC\n ');\n\n return $stmt->fetchAll(PDO::FETCH_ASSOC);\n }\n\n \/** Performs semantic search in Qdrant. *\/\n private function semanticSearch(string $query, int $limit): array\n {\n $embedding = $this->ollama->getEmbedding($query);\n\n $url = sprintf('%s\/collections\/%s\/points\/search', $this->qdrantHost, self::COLLECTION);\n\n $payload = [\n 'vector' => array_values($embedding),\n 'limit' => $limit,\n 'with_payload' => true,\n ];\n\n try {\n $response = $this->makeRequest($url, $payload);\n\n if (!isset($response['result']) || !is_array($response['result'])) {\n return [];\n }\n\n return array_map(static function (array $item): array {\n return [\n 'qdrant_id' => (string) $item['id'],\n 'score' => (float) ($item['score'] ?? 0),\n 'payload' => is_array($item['payload'] ?? null) ? $item['payload'] : [],\n ];\n }, $response['result']);\n } catch (RuntimeException) {\n return [];\n }\n }\n\n \/** Enriches vector results with chunk semantics from database. *\/\n private function enrichWithSemantics(array $vectorResults, array $filters): array\n {\n $results = [];\n $minScore = $filters['min_score'] ?? self::MIN_SCORE;\n\n foreach ($vectorResults as $vr) {\n if ($vr['score'] < $minScore) {\n continue;\n }\n\n $qdrantId = $vr['qdrant_id'];\n $chunkData = $this->getChunkWithSemantics($qdrantId);\n\n if ($chunkData === null) {\n continue;\n }\n\n \/\/ Apply filters\n if (!$this->matchesFilters($chunkData, $filters)) {\n continue;\n }\n\n $results[] = [\n 'chunk_id' => (int) $chunkData['chunk_id'],\n 'document_id' => (int) $chunkData['document_id'],\n 'source_path' => $chunkData['source_path'] ?? '',\n 'content' => $chunkData['content'] ?? '',\n 'heading_path' => $chunkData['heading_path'] ?? '',\n \/\/ Semantic data\n 'summary' => $chunkData['summary'] ?? null,\n 'keywords' => $this->decodeJsonArray($chunkData['keywords'] ?? null),\n 'sentiment' => $chunkData['sentiment'] ?? 'neutral',\n 'intent' => $chunkData['intent'] ?? null,\n 'discourse_role' => $chunkData['discourse_role'] ?? null,\n 'statement_form' => $chunkData['statement_form'] ?? null,\n 'frame' => $chunkData['frame'] ?? null,\n \/\/ Scores\n 'score' => $vr['score'],\n 'relevance_score' => $vr['score'],\n ];\n }\n\n return $results;\n }\n\n \/** Gets chunk with semantic data from ki_content. *\/\n private function getChunkWithSemantics(string $qdrantId): ?array\n {\n $stmt = $this->pdo->prepare('\n SELECT\n c.id as chunk_id,\n c.document_id,\n c.content,\n c.heading_path,\n d.source_path,\n cs.summary,\n cs.keywords,\n cs.sentiment,\n cs.intent,\n cs.discourse_role,\n cs.statement_form,\n cs.frame\n FROM ki_content.chunks c\n JOIN ki_content.documents d ON c.document_id = d.id\n LEFT JOIN ki_content.chunk_semantics cs ON c.id = cs.chunk_id\n WHERE c.qdrant_id = :qdrant_id\n ');\n $stmt->execute(['qdrant_id' => $qdrantId]);\n $result = $stmt->fetch(PDO::FETCH_ASSOC);\n\n return $result !== false ? $result : null;\n }\n\n \/** Checks if chunk matches filters. *\/\n private function matchesFilters(array $chunk, array $filters): bool\n {\n if (isset($filters['intent']) && ($chunk['intent'] ?? null) !== $filters['intent']) {\n return false;\n }\n if (isset($filters['discourse_role']) && ($chunk['discourse_role'] ?? null) !== $filters['discourse_role']) {\n return false;\n }\n if (isset($filters['sentiment']) && ($chunk['sentiment'] ?? null) !== $filters['sentiment']) {\n return false;\n }\n if (isset($filters['frame']) && ($chunk['frame'] ?? null) !== $filters['frame']) {\n return false;\n }\n if (isset($filters['taxonomy_path'])) {\n \/\/ Check if chunk is in taxonomy path via chunk_taxonomy\n $hasMatch = $this->chunkMatchesTaxonomy((int) $chunk['chunk_id'], $filters['taxonomy_path']);\n if (!$hasMatch) {\n return false;\n }\n }\n\n return true;\n }\n\n \/** Checks if chunk belongs to taxonomy path. *\/\n private function chunkMatchesTaxonomy(int $chunkId, string $taxonomyPath): bool\n {\n $stmt = $this->pdo->prepare('\n SELECT 1 FROM ki_content.chunk_taxonomy ct\n JOIN ki_content.taxonomy_terms tt ON ct.taxonomy_term_id = tt.id\n WHERE ct.chunk_id = :chunk_id AND tt.path LIKE :path\n LIMIT 1\n ');\n $stmt->execute([\n 'chunk_id' => $chunkId,\n 'path' => $taxonomyPath . '%',\n ]);\n\n return $stmt->fetch() !== false;\n }\n\n \/** Re-ranks results based on semantic relevance. *\/\n private function rerank(array $results, string $query): array\n {\n $queryWords = array_filter(preg_split('\/\\s+\/', strtolower($query)) ?: []);\n $isDefinitionQuery = $this->isDefinitionQuery($query);\n\n foreach ($results as &$result) {\n $boost = 0.0;\n\n \/\/ Keyword matching\n foreach ($result['keywords'] as $kw) {\n foreach ($queryWords as $w) {\n if (stripos($kw, $w) !== false) {\n $boost += 0.05;\n }\n }\n }\n\n \/\/ Semantic boost: discourse role\n $discourseRole = $result['discourse_role'] ?? null;\n if ($isDefinitionQuery && $discourseRole === 'definition') {\n $boost += 0.15;\n } elseif ($discourseRole === 'thesis') {\n $boost += 0.08;\n } elseif ($discourseRole === 'evidence') {\n $boost += 0.05;\n }\n\n \/\/ Semantic boost: intent\n $intent = $result['intent'] ?? null;\n if ($intent === 'explain' || $intent === 'define') {\n $boost += 0.05;\n }\n\n \/\/ Summary matching\n if ($result['summary'] !== null) {\n foreach ($queryWords as $w) {\n if (stripos($result['summary'], $w) !== false) {\n $boost += 0.03;\n }\n }\n }\n\n $result['relevance_score'] = min(1.0, $resu... [TRUNCATED-b33dd050d04e3d91]"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Search\/ContentSearchService.php",
"content": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\Search;\n\n\/\/ @responsibility: Semantische Suche über ki_content mit Chunk-Semantik\n\nuse Domain\\Service\\SearchServiceInterface;\nuse Infrastructure\\AI\\OllamaService;\nuse Infrastructure\\Config\\CredentialService;\nuse Infrastructure\\Traits\\JsonDecodeTrait;\nuse PDO;\nuse RuntimeException;\n\nfinal class ContentSearchService implements SearchServiceInterface\n{\n use JsonDecodeTrait;\n\n private const string COLLECTION = 'documents';\n private const int TIMEOUT = 30;\n private const float MIN_SCORE = 0.3;\n\n private string $qdrantHost;\n\n public function __construct(\n private PDO $pdo,\n private OllamaService $ollama\n ) {\n $this->qdrantHost = CredentialService::getQdrantHost();\n }\n\n \/** Hybrid search combining semantic vectors with SQL filters. *\/\n public function search(string $query, array $filters = [], int $limit = 10): array\n {\n \/\/ Stage 1: Semantic search in Qdrant\n $vectorResults = $this->semanticSearch($query, $limit * 3);\n\n if (empty($vectorResults)) {\n return [];\n }\n\n \/\/ Stage 2: Enrich with SQL data (chunks + semantics)\n $enrichedResults = $this->enrichWithSemantics($vectorResults, $filters);\n\n \/\/ Stage 3: Re-rank based on semantic relevance\n $rankedResults = $this->rerank($enrichedResults, $query);\n\n return array_slice($rankedResults, 0, $limit);\n }\n\n \/** Searches for definition chunks only. *\/\n public function searchDefinitions(string $query, int $limit = 10): array\n {\n return $this->search($query, ['discourse_role' => 'definition'], $limit);\n }\n\n \/** Searches for chunks with specific intent. *\/\n public function searchByIntent(string $query, string $intent, int $limit = 10): array\n {\n return $this->search($query, ['intent' => $intent], $limit);\n }\n\n \/** Searches within a taxonomy path. *\/\n public function searchByTaxonomy(string $query, string $taxonomyPath, int $limit = 10): array\n {\n return $this->search($query, ['taxonomy_path' => $taxonomyPath], $limit);\n }\n\n \/** Suggests related searches based on current results. *\/\n public function suggestRelatedSearches(array $results): array\n {\n $suggestions = [];\n\n foreach ($results as $result) {\n foreach ($result['keywords'] ?? [] as $keyword) {\n if (!in_array($keyword, $suggestions, true) && count($suggestions) < 5) {\n $suggestions[] = $keyword;\n }\n }\n }\n\n return $suggestions;\n }\n\n \/** Gets semantic statistics for the content. *\/\n public function getSemanticStats(): array\n {\n $stmt = $this->pdo->query('\n SELECT\n cs.intent,\n cs.discourse_role,\n COUNT(*) as count\n FROM ki_content.chunk_semantics cs\n WHERE cs.intent IS NOT NULL\n GROUP BY cs.intent, cs.discourse_role\n ORDER BY count DESC\n ');\n\n return $stmt->fetchAll(PDO::FETCH_ASSOC);\n }\n\n \/** Performs semantic search in Qdrant. *\/\n private function semanticSearch(string $query, int $limit): array\n {\n $embedding = $this->ollama->getEmbedding($query);\n\n $url = sprintf('%s\/collections\/%s\/points\/search', $this->qdrantHost, self::COLLECTION);\n\n $payload = [\n 'vector' => array_values($embedding),\n 'limit' => $limit,\n 'with_payload' => true,\n ];\n\n try {\n $response = $this->makeRequest($url, $payload);\n\n if (!isset($response['result']) || !is_array($response['result'])) {\n return [];\n }\n\n return array_map(static function (array $item): array {\n return [\n 'qdrant_id' => (string) $item['id'],\n 'score' => (float) ($item['score'] ?? 0),\n 'payload' => is_array($item['payload'] ?? null) ? $item['payload'] : [],\n ];\n }, $response['result']);\n } catch (RuntimeException) {\n return [];\n }\n }\n\n \/** Enriches vector results with chunk semantics from database. *\/\n private function enrichWithSemantics(array $vectorResults, array $filters): array\n {\n $results = [];\n $minScore = $filters['min_score'] ?? self::MIN_SCORE;\n\n foreach ($vectorResults as $vr) {\n if ($vr['score'] < $minScore) {\n continue;\n }\n\n $qdrantId = $vr['qdrant_id'];\n $chunkData = $this->getChunkWithSemantics($qdrantId);\n\n if ($chunkData === null) {\n continue;\n }\n\n \/\/ Apply filters\n if (!$this->matchesFilters($chunkData, $filters)) {\n continue;\n }\n\n $results[] = [\n 'chunk_id' => (int) $chunkData['chunk_id'],\n 'document_id' => (int) $chunkData['document_id'],\n 'source_path' => $chunkData['source_path'] ?? '',\n 'content' => $chunkData['content'] ?? '',\n 'heading_path' => $chunkData['heading_path'] ?? '',\n \/\/ Semantic data\n 'summary' => $chunkData['summary'] ?? null,\n 'keywords' => $this->decodeJsonArray($chunkData['keywords'] ?? null),\n 'sentiment' => $chunkData['sentiment'] ?? 'neutral',\n 'intent' => $chunkData['intent'] ?? null,\n 'discourse_role' => $chunkData['discourse_role'] ?? null,\n 'statement_form' => $chunkData['statement_form'] ?? null,\n 'frame' => $chunkData['frame'] ?? null,\n \/\/ Scores\n 'score' => $vr['score'],\n 'relevance_score' => $vr['score'],\n ];\n }\n\n return $results;\n }\n\n \/** Gets chunk with semantic data from ki_content. *\/\n private function getChunkWithSemantics(string $qdrantId): ?array\n {\n $stmt = $this->pdo->prepare('\n SELECT\n c.id as chunk_id,\n c.document_id,\n c.content,\n c.heading_path,\n d.source_path,\n cs.summary,\n cs.keywords,\n cs.sentiment,\n cs.intent,\n cs.discourse_role,\n cs.statement_form,\n cs.frame\n FROM ki_content.chunks c\n JOIN ki_content.documents d ON c.document_id = d.id\n LEFT JOIN ki_content.chunk_semantics cs ON c.id = cs.chunk_id\n WHERE c.qdrant_id = :qdrant_id\n ');\n $stmt->execute(['qdrant_id' => $qdrantId]);\n $result = $stmt->fetch(PDO::FETCH_ASSOC);\n\n return $result !== false ? $result : null;\n }\n\n \/** Checks if chunk matches filters. *\/\n private function matchesFilters(array $chunk, array $filters): bool\n {\n if (isset($filters['intent']) && ($chunk['intent'] ?? null) !== $filters['intent']) {\n return false;\n }\n if (isset($filters['discourse_role']) && ($chunk['discourse_role'] ?? null) !== $filters['discourse_role']) {\n return false;\n }\n if (isset($filters['sentiment']) && ($chunk['sentiment'] ?? null) !== $filters['sentiment']) {\n return false;\n }\n if (isset($filters['frame']) && ($chunk['frame'] ?? null) !== $filters['frame']) {\n return false;\n }\n if (isset($filters['taxonomy_path'])) {\n \/\/ Check if chunk is in taxonomy path via chunk_taxonomy\n $hasMatch = $this->chunkMatchesTaxonomy((int) $chunk['chunk_id'], $filters['taxonomy_path']);\n if (!$hasMatch) {\n return false;\n }\n }\n\n return true;\n }\n\n \/** Checks if chunk belongs to taxonomy path. *\/\n private function chunkMatchesTaxonomy(int $chunkId, string $taxonomyPath): bool\n {\n $stmt = $this->pdo->prepare('\n SELECT 1 FROM ki_content.chunk_taxonomy ct\n JOIN ki_content.taxonomy_terms tt ON ct.taxonomy_term_id = tt.id\n WHERE ct.chunk_id = :chunk_id AND tt.path LIKE :path\n LIMIT 1\n ');\n $stmt->execute([\n 'chunk_id' => $chunkId,\n 'path' => $taxonomyPath . '%',\n ]);\n\n return $stmt->fetch() !== false;\n }\n\n \/** Re-ranks results based on semantic relevance. *\/\n private function rerank(array $results, string $query): array\n {\n $queryWords = array_filter(preg_split('\/\\s+\/', strtolower($query)) ?: []);\n $isDefinitionQuery = $this->isDefinitionQuery($query);\n\n foreach ($results as &$result) {\n $boost = 0.0;\n\n \/\/ Keyword matching\n foreach ($result['keywords'] as $kw) {\n foreach ($queryWords as $w) {\n if (stripos($kw, $w) !== false) {\n $boost += 0.05;\n }\n }\n }\n\n \/\/ Semantic boost: discourse role\n $discourseRole = $result['discourse_role'] ?? null;\n if ($isDefinitionQuery && $discourseRole === 'definition') {\n $boost += 0.15;\n } elseif ($discourseRole === 'thesis') {\n $boost += 0.08;\n } elseif ($discourseRole === 'evidence') {\n $boost += 0.05;\n }\n\n \/\/ Semantic boost: intent\n $intent = $result['intent'] ?? null;\n if ($intent === 'explain' || $intent === 'define') {\n $boost += 0.05;\n }\n\n \/\/ Summary matching\n if ($result['summary'] !== null) {\n foreach ($queryWords as $w) {\n if (stripos($result['summary'], $w) !== false) {\n $boost += 0.03;\n }\n }\n }\n\n $result['relevance_score'] = min(1.0, $resu... [TRUNCATED-b33dd050d04e3d91]",
"structuredPatch": [],
"originalFile": null
}
}