<?php
declare(strict_types=1);
namespace Infrastructure\Persistence;
// @responsibility: Semantik-Suche und Graph-Daten für Semantic Explorer
use Domain\Repository\SemanticSearchRepositoryInterface;
use PDO;
final class SemanticSearchRepository implements SemanticSearchRepositoryInterface
{
private PDO $db;
public function __construct(PDO $pdo)
{
$this->db = $pdo;
}
/**
* {@inheritDoc}
*/
public function getSemanticStats(): array
{
return $this->db->query(
'SELECT
(SELECT COUNT(*) FROM entities) as entities,
(SELECT COUNT(*) FROM entity_relations) as relations,
(SELECT COUNT(*) FROM taxonomy_terms) as taxonomy,
(SELECT COUNT(*) FROM ontology_classes) as ontology,
(SELECT COUNT(*) FROM chunk_semantics) as semantics,
(SELECT COUNT(*) FROM stopwords WHERE is_active = 1) as stopwords,
(SELECT COUNT(*) FROM entities) as total,
(SELECT COUNT(DISTINCT type) FROM entities) as types,
(SELECT COUNT(DISTINCT chunk_id) FROM chunk_entities) as linked_chunks,
(SELECT COUNT(DISTINCT c.document_id) FROM chunk_entities ce JOIN chunks c ON ce.chunk_id = c.id) as linked_docs'
)->fetch();
}
/**
* {@inheritDoc}
*/
public function findEntitySemantics(string $search = '', string $type = '', int $limit = 50, int $offset = 0): array
{
// Deduplicated entities with aggregated chunk/document counts
$sql = 'SELECT e.id, e.name, e.canonical_name, e.type, e.description, e.status,
COUNT(DISTINCT ce.chunk_id) as chunk_count,
COUNT(DISTINCT c.document_id) as document_count,
MIN(ce.chunk_id) as first_chunk_id,
MIN(d.filename) as first_filename,
MIN(d.id) as first_document_id
FROM entities e
LEFT JOIN chunk_entities ce ON e.id = ce.entity_id
LEFT JOIN chunks c ON ce.chunk_id = c.id
LEFT JOIN documents d ON c.document_id = d.id
WHERE 1=1';
$params = [];
if ($search !== '') {
$sql .= ' AND (e.name LIKE :search OR e.description LIKE :search2 OR e.canonical_name LIKE :search3)';
$params['search'] = '%' . $search . '%';
$params['search2'] = '%' . $search . '%';
$params['search3'] = '%' . $search . '%';
}
if ($type !== '') {
$sql .= ' AND e.type = :type';
$params['type'] = $type;
}
$sql .= ' GROUP BY e.id, e.name, e.canonical_name, e.type, e.description, e.status';
$sql .= ' ORDER BY e.name LIMIT ' . $limit . ' OFFSET ' . $offset;
$stmt = $this->db->prepare($sql);
$stmt->execute($params);
return $stmt->fetchAll();
}
/**
* {@inheritDoc}
*/
public function countEntitySemantics(string $search = '', string $type = ''): int
{
$sql = 'SELECT COUNT(DISTINCT e.id) FROM entities e WHERE 1=1';
$params = [];
if ($search !== '') {
$sql .= ' AND (e.name LIKE :search OR e.description LIKE :search2 OR e.canonical_name LIKE :search3)';
$params['search'] = '%' . $search . '%';
$params['search2'] = '%' . $search . '%';
$params['search3'] = '%' . $search . '%';
}
if ($type !== '') {
$sql .= ' AND e.type = :type';
$params['type'] = $type;
}
$stmt = $this->db->prepare($sql);
$stmt->execute($params);
return (int) $stmt->fetchColumn();
}
/**
* {@inheritDoc}
*/
public function getGraphData(): array
{
// Get all entities
$entities = $this->db->query(
'SELECT id, name, type FROM entities ORDER BY name'
)->fetchAll();
// Get all relations
$relations = $this->db->query(
'SELECT source_entity_id, target_entity_id, relation_type, strength
FROM entity_relations'
)->fetchAll();
// Build node index for link resolution
$nodeIndex = [];
$nodes = [];
foreach ($entities as $i => $entity) {
$nodeIndex[$entity['id']] = $i;
$nodes[] = [
'id' => 'entity_' . $entity['id'],
'label' => $entity['name'],
'type' => strtoupper($entity['type'] ?? 'OTHER'),
'entityId' => (int) $entity['id'],
];
}
// Build links with index references
$links = [];
foreach ($relations as $relation) {
$sourceId = $relation['source_entity_id'];
$targetId = $relation['target_entity_id'];
// Skip if entity not found
if (!isset($nodeIndex[$sourceId]) || !isset($nodeIndex[$targetId])) {
continue;
}
$links[] = [
'source' => $nodeIndex[$sourceId],
'target' => $nodeIndex[$targetId],
'type' => $relation['relation_type'],
'strength' => (float) ($relation['strength'] ?? 1.0),
];
}
// Get unique types for stats
$entityTypes = array_unique(array_column($nodes, 'type'));
$relationTypes = array_unique(array_column($links, 'type'));
return [
'nodes' => $nodes,
'links' => $links,
'stats' => [
'nodes' => count($nodes),
'links' => count($links),
'entityTypes' => count($entityTypes),
'relationTypes' => count($relationTypes),
],
];
}
/**
* {@inheritDoc}
*/
public function getTextSemanticStats(): array
{
// Coverage
$coverage = $this->db->query(
'SELECT
(SELECT COUNT(*) FROM chunk_semantics WHERE statement_form IS NOT NULL) as analyzed,
(SELECT COUNT(*) FROM chunks) as total'
)->fetch();
$analyzed = (int) ($coverage['analyzed'] ?? 0);
$total = (int) ($coverage['total'] ?? 1);
// By statement_form
$byStatementForm = $this->db->query(
'SELECT statement_form, COUNT(*) as count
FROM chunk_semantics
WHERE statement_form IS NOT NULL
GROUP BY statement_form
ORDER BY count DESC'
)->fetchAll();
// By intent
$byIntent = $this->db->query(
'SELECT intent, COUNT(*) as count
FROM chunk_semantics
WHERE intent IS NOT NULL
GROUP BY intent
ORDER BY count DESC'
)->fetchAll();
return [
'analyzed' => $analyzed,
'total' => $total,
'coverage' => $total > 0 ? round($analyzed / $total * 100, 1) : 0.0,
'by_statement_form' => $byStatementForm,
'by_intent' => $byIntent,
];
}
/**
* {@inheritDoc}
*/
public function getKnowledgeSemanticStats(): array
{
// Coverage
$coverage = $this->db->query(
'SELECT
(SELECT COUNT(DISTINCT entity_id) FROM entity_semantics WHERE semantic_role IS NOT NULL) as analyzed,
(SELECT COUNT(*) FROM entities) as total'
)->fetch();
$analyzed = (int) ($coverage['analyzed'] ?? 0);
$total = (int) ($coverage['total'] ?? 1);
// By semantic_role
$bySemanticRole = $this->db->query(
'SELECT semantic_role, COUNT(*) as count
FROM entity_knowledge_semantics
WHERE semantic_role IS NOT NULL
GROUP BY semantic_role
ORDER BY count DESC'
)->fetchAll();
// By functional_category
$byFunctionalCategory = $this->db->query(
'SELECT functional_category, COUNT(*) as count
FROM entity_knowledge_semantics
WHERE functional_category IS NOT NULL
GROUP BY functional_category
ORDER BY count DESC'
)->fetchAll();
return [
'analyzed' => $analyzed,
'total' => $total,
'coverage' => $total > 0 ? round($analyzed / $total * 100, 1) : 0.0,
'by_semantic_role' => $bySemanticRole,
'by_functional_category' => $byFunctionalCategory,
];
}
}