db = new \PDO( 'mysql:host=localhost;dbname=ki_content;charset=utf8mb4', 'root', $this->getPassword(), [ \PDO::ATTR_ERRMODE => \PDO::ERRMODE_EXCEPTION, \PDO::ATTR_DEFAULT_FETCH_MODE => \PDO::FETCH_ASSOC, ] ); } private function getPassword(): string { $file = '/var/www/docs/credentials/credentials.md'; $content = file_get_contents($file); foreach (explode("\n", $content) as $line) { if (str_contains($line, 'MariaDB') && str_contains($line, 'root')) { $parts = explode('|', $line); if (count($parts) >= 4) { return trim($parts[3]); } } } return ''; } /** * GET /semantic-explorer * Dashboard mit Statistiken */ public function index(): void { // Dokument-Statistiken $docStats = $this->db->query( 'SELECT COUNT(*) as total, SUM(CASE WHEN status = "done" THEN 1 ELSE 0 END) as processed, SUM(CASE WHEN status = "error" THEN 1 ELSE 0 END) as errors FROM documents' )->fetch(); // Chunk-Statistiken $chunkStats = $this->db->query( 'SELECT COUNT(*) as total, COALESCE(SUM(token_count), 0) as tokens, SUM(CASE WHEN qdrant_id IS NOT NULL THEN 1 ELSE 0 END) as embedded FROM chunks' )->fetch(); // Dokumente $documents = $this->db->query( 'SELECT d.id, d.filename, d.folder_path, d.mime_type, d.file_size, d.status, d.imported_at, d.processed_at, (SELECT COUNT(*) FROM chunks WHERE document_id = d.id) as chunk_count FROM documents d ORDER BY d.imported_at DESC' )->fetchAll(); // Neueste Chunks $recentChunks = $this->db->query( 'SELECT c.id, c.content, c.token_count, c.created_at, c.qdrant_id, d.filename FROM chunks c JOIN documents d ON c.document_id = d.id ORDER BY c.created_at DESC LIMIT 5' )->fetchAll(); $this->view('semantic-explorer.index', [ 'title' => 'Semantic Explorer', 'docStats' => $docStats, 'chunkStats' => $chunkStats, 'documents' => $documents, 'recentChunks' => $recentChunks, ]); } /** * GET /semantic-explorer/dokumente * Liste aller Dokumente */ public function dokumente(): void { $status = $_GET['status'] ?? ''; $search = $_GET['search'] ?? ''; $sql = 'SELECT d.id, d.filename, d.folder_path, d.source_path, d.mime_type, d.file_size, d.status, d.imported_at, d.processed_at, d.error_message, (SELECT COUNT(*) FROM chunks WHERE document_id = d.id) as chunk_count, (SELECT COALESCE(SUM(token_count), 0) FROM chunks WHERE document_id = d.id) as token_count FROM documents d WHERE 1=1'; $params = []; if ($status !== '') { $sql .= ' AND d.status = :status'; $params['status'] = $status; } if ($search !== '') { $sql .= ' AND (d.filename LIKE :search OR d.source_path LIKE :search2)'; $params['search'] = '%' . $search . '%'; $params['search2'] = '%' . $search . '%'; } $sql .= ' ORDER BY d.imported_at DESC'; $stmt = $this->db->prepare($sql); $stmt->execute($params); $documents = $stmt->fetchAll(); $this->view('semantic-explorer.dokumente.index', [ 'title' => 'Dokumente', 'documents' => $documents, 'currentStatus' => $status, 'currentSearch' => $search, ]); } /** * GET /semantic-explorer/dokumente/{id} * Dokument-Details mit Chunks */ public function dokumentShow(int $id): void { $stmt = $this->db->prepare('SELECT * FROM documents WHERE id = :id'); $stmt->execute(['id' => $id]); $document = $stmt->fetch(); if ($document === false) { http_response_code(404); echo '404 - Dokument nicht gefunden'; return; } // Chunks dieses Dokuments $stmt = $this->db->prepare( 'SELECT c.id, c.chunk_index, c.content, c.token_count, c.heading_path, c.metadata, c.qdrant_id, c.created_at FROM chunks c WHERE c.document_id = :id ORDER BY c.chunk_index' ); $stmt->execute(['id' => $id]); $chunks = $stmt->fetchAll(); // Heading-Paths dekodieren foreach ($chunks as &$chunk) { $chunk['heading_path_decoded'] = json_decode($chunk['heading_path'] ?? '[]', true) ?: []; $chunk['metadata_decoded'] = json_decode($chunk['metadata'] ?? '{}', true) ?: []; } $this->view('semantic-explorer.dokumente.show', [ 'title' => $document['filename'], 'document' => $document, 'chunks' => $chunks, ]); } /** * GET /semantic-explorer/chunks * Liste aller Chunks */ public function chunks(): void { $search = $_GET['search'] ?? ''; $embedded = $_GET['embedded'] ?? ''; $page = max(1, (int) ($_GET['page'] ?? 1)); $limit = 50; $offset = ($page - 1) * $limit; $sql = 'SELECT c.id, c.chunk_index, c.content, c.token_count, c.qdrant_id, c.created_at, d.filename, d.id as document_id FROM chunks c JOIN documents d ON c.document_id = d.id WHERE 1=1'; $countSql = 'SELECT COUNT(*) FROM chunks c JOIN documents d ON c.document_id = d.id WHERE 1=1'; $params = []; if ($search !== '') { $sql .= ' AND c.content LIKE :search'; $countSql .= ' AND c.content LIKE :search'; $params['search'] = '%' . $search . '%'; } if ($embedded === 'yes') { $sql .= ' AND c.qdrant_id IS NOT NULL'; $countSql .= ' AND c.qdrant_id IS NOT NULL'; } elseif ($embedded === 'no') { $sql .= ' AND c.qdrant_id IS NULL'; $countSql .= ' AND c.qdrant_id IS NULL'; } // Count total $countStmt = $this->db->prepare($countSql); $countStmt->execute($params); $totalCount = $countStmt->fetchColumn(); $sql .= ' ORDER BY c.created_at DESC LIMIT ' . $limit . ' OFFSET ' . $offset; $stmt = $this->db->prepare($sql); $stmt->execute($params); $chunks = $stmt->fetchAll(); $this->view('semantic-explorer.chunks.index', [ 'title' => 'Chunks', 'chunks' => $chunks, 'currentSearch' => $search, 'currentEmbedded' => $embedded, 'currentPage' => $page, 'totalCount' => $totalCount, 'totalPages' => ceil($totalCount / $limit), ]); } /** * GET /semantic-explorer/chunks/{id} * Chunk-Details */ public function chunkShow(int $id): void { $stmt = $this->db->prepare( 'SELECT c.*, d.filename, d.source_path, d.id as document_id FROM chunks c JOIN documents d ON c.document_id = d.id WHERE c.id = :id' ); $stmt->execute(['id' => $id]); $chunk = $stmt->fetch(); if ($chunk === false) { http_response_code(404); echo '404 - Chunk nicht gefunden'; return; } // JSON-Felder dekodieren $chunk['heading_path_decoded'] = json_decode($chunk['heading_path'] ?? '[]', true) ?: []; $chunk['metadata_decoded'] = json_decode($chunk['metadata'] ?? '{}', true) ?: []; // Nachbar-Chunks $stmt = $this->db->prepare( 'SELECT id, chunk_index FROM chunks WHERE document_id = :doc_id AND chunk_index = :prev' ); $stmt->execute(['doc_id' => $chunk['document_id'], 'prev' => $chunk['chunk_index'] - 1]); $prevChunk = $stmt->fetch(); $stmt = $this->db->prepare( 'SELECT id, chunk_index FROM chunks WHERE document_id = :doc_id AND chunk_index = :next' ); $stmt->execute(['doc_id' => $chunk['document_id'], 'next' => $chunk['chunk_index'] + 1]); $nextChunk = $stmt->fetch(); $this->view('semantic-explorer.chunks.show', [ 'title' => 'Chunk #' . $chunk['id'], 'chunk' => $chunk, 'prevChunk' => $prevChunk, 'nextChunk' => $nextChunk, ]); } /** * GET /semantic-explorer/suche * Semantische Suche in Nutzdaten */ public function suche(): void { $query = $_GET['q'] ?? ''; $limit = min(20, max(1, (int) ($_GET['limit'] ?? 10))); $results = []; if ($query !== '') { // Vektor-Suche via Qdrant $results = $this->vectorSearch($query, $limit); } $this->view('semantic-explorer.suche', [ 'title' => 'Semantische Suche', 'query' => $query, 'results' => $results, 'limit' => $limit, ]); } /** * Vektor-Suche in documents Collection */ private function vectorSearch(string $query, int $limit): array { // Embedding generieren $embedding = $this->getEmbedding($query); if (empty($embedding)) { return []; } // Qdrant suchen $response = $this->qdrantSearch($embedding, $limit); if (empty($response)) { return []; } // Chunk-Details aus DB laden $results = []; foreach ($response as $point) { $chunkId = $point['payload']['chunk_id'] ?? null; if ($chunkId === null) { continue; } $stmt = $this->db->prepare( 'SELECT c.*, d.filename, d.source_path FROM chunks c JOIN documents d ON c.document_id = d.id WHERE c.id = :id' ); $stmt->execute(['id' => $chunkId]); $chunk = $stmt->fetch(); if ($chunk !== false) { $chunk['score'] = $point['score']; $chunk['heading_path_decoded'] = json_decode($chunk['heading_path'] ?? '[]', true) ?: []; $results[] = $chunk; } } return $results; } /** * Embedding via Ollama */ private function getEmbedding(string $text): array { $ch = curl_init('http://localhost:11434/api/embeddings'); curl_setopt_array($ch, [ CURLOPT_RETURNTRANSFER => true, CURLOPT_POST => true, CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_POSTFIELDS => json_encode([ 'model' => 'mxbai-embed-large', 'prompt' => $text, ]), ]); $response = curl_exec($ch); curl_close($ch); $data = json_decode($response, true); return $data['embedding'] ?? []; } /** * Qdrant-Suche */ private function qdrantSearch(array $embedding, int $limit): array { $ch = curl_init('http://localhost:6333/collections/documents/points/search'); curl_setopt_array($ch, [ CURLOPT_RETURNTRANSFER => true, CURLOPT_POST => true, CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_POSTFIELDS => json_encode([ 'vector' => $embedding, 'limit' => $limit, 'with_payload' => true, ]), ]); $response = curl_exec($ch); curl_close($ch); $data = json_decode($response, true); return $data['result'] ?? []; } /** * GET /semantic-explorer/entitaeten * Liste aller Entitaeten */ public function entitaeten(): void { $type = $_GET['type'] ?? ''; $search = $_GET['search'] ?? ''; $sql = 'SELECT e.*, COUNT(DISTINCT ce.chunk_id) as chunk_count, COUNT(DISTINCT er.id) as relation_count FROM entities e LEFT JOIN chunk_entities ce ON e.id = ce.entity_id LEFT JOIN entity_relations er ON e.id = er.source_entity_id OR e.id = er.target_entity_id WHERE 1=1'; $params = []; if ($type !== '') { $sql .= ' AND e.type = :type'; $params['type'] = $type; } if ($search !== '') { $sql .= ' AND (e.name LIKE :search OR e.description LIKE :search2)'; $params['search'] = '%' . $search . '%'; $params['search2'] = '%' . $search . '%'; } $sql .= ' GROUP BY e.id ORDER BY chunk_count DESC, e.name LIMIT 100'; $stmt = $this->db->prepare($sql); $stmt->execute($params); $entities = $stmt->fetchAll(); // Statistiken $stats = $this->db->query( 'SELECT type, COUNT(*) as count FROM entities GROUP BY type' )->fetchAll(); $this->view('semantic-explorer.entitaeten.index', [ 'title' => 'Entitaeten', 'entities' => $entities, 'stats' => $stats, 'currentType' => $type, 'currentSearch' => $search, ]); } /** * GET /semantic-explorer/entitaeten/{id} * Entitaet-Details */ public function entitaetShow(int $id): void { $stmt = $this->db->prepare('SELECT * FROM entities WHERE id = :id'); $stmt->execute(['id' => $id]); $entity = $stmt->fetch(); if ($entity === false) { http_response_code(404); echo '404 - Entitaet nicht gefunden'; return; } // Synonyme $stmt = $this->db->prepare('SELECT * FROM entity_synonyms WHERE entity_id = :id'); $stmt->execute(['id' => $id]); $synonyms = $stmt->fetchAll(); // Relationen (ausgehend) $stmt = $this->db->prepare( 'SELECT er.*, e.name as target_name, e.type as target_type FROM entity_relations er JOIN entities e ON er.target_entity_id = e.id WHERE er.source_entity_id = :id ORDER BY er.strength DESC' ); $stmt->execute(['id' => $id]); $outgoingRelations = $stmt->fetchAll(); // Relationen (eingehend) $stmt = $this->db->prepare( 'SELECT er.*, e.name as source_name, e.type as source_type FROM entity_relations er JOIN entities e ON er.source_entity_id = e.id WHERE er.target_entity_id = :id ORDER BY er.strength DESC' ); $stmt->execute(['id' => $id]); $incomingRelations = $stmt->fetchAll(); // Chunks mit dieser Entitaet $stmt = $this->db->prepare( 'SELECT c.id, c.content, c.token_count, d.filename, ce.relevance_score FROM chunk_entities ce JOIN chunks c ON ce.chunk_id = c.id JOIN documents d ON c.document_id = d.id WHERE ce.entity_id = :id ORDER BY ce.relevance_score DESC LIMIT 20' ); $stmt->execute(['id' => $id]); $chunks = $stmt->fetchAll(); // Ontologie-Klassen $stmt = $this->db->prepare( 'SELECT oc.*, ec.confidence FROM entity_classifications ec JOIN ontology_classes oc ON ec.ontology_class_id = oc.id WHERE ec.entity_id = :id' ); $stmt->execute(['id' => $id]); $classifications = $stmt->fetchAll(); $this->view('semantic-explorer.entitaeten.show', [ 'title' => $entity['name'], 'entity' => $entity, 'synonyms' => $synonyms, 'outgoingRelations' => $outgoingRelations, 'incomingRelations' => $incomingRelations, 'chunks' => $chunks, 'classifications' => $classifications, ]); } /** * GET /semantic-explorer/relationen * Beziehungen zwischen Entitaeten */ public function relationen(): void { $type = $_GET['type'] ?? ''; $sql = 'SELECT er.*, es.name as source_name, es.type as source_type, et.name as target_name, et.type as target_type FROM entity_relations er JOIN entities es ON er.source_entity_id = es.id JOIN entities et ON er.target_entity_id = et.id WHERE 1=1'; $params = []; if ($type !== '') { $sql .= ' AND er.relation_type = :type'; $params['type'] = $type; } $sql .= ' ORDER BY er.strength DESC LIMIT 100'; $stmt = $this->db->prepare($sql); $stmt->execute($params); $relations = $stmt->fetchAll(); // Relation-Typen $relationTypes = $this->db->query( 'SELECT relation_type, COUNT(*) as count FROM entity_relations GROUP BY relation_type ORDER BY count DESC' )->fetchAll(); // Statistiken $stats = $this->db->query( 'SELECT (SELECT COUNT(*) FROM entity_relations) as total, (SELECT COUNT(DISTINCT source_entity_id) FROM entity_relations) as sources, (SELECT COUNT(DISTINCT target_entity_id) FROM entity_relations) as targets' )->fetch(); $this->view('semantic-explorer.relationen', [ 'title' => 'Relationen', 'relations' => $relations, 'relationTypes' => $relationTypes, 'stats' => $stats, 'currentType' => $type, ]); } /** * GET /semantic-explorer/taxonomie * Hierarchische Kategorisierung */ public function taxonomie(): void { // Alle Terms mit Chunk-Counts $terms = $this->db->query( 'SELECT t.*, COUNT(DISTINCT ct.chunk_id) as chunk_count, (SELECT COUNT(*) FROM taxonomy_terms WHERE parent_id = t.id) as children_count FROM taxonomy_terms t LEFT JOIN chunk_taxonomy ct ON t.id = ct.taxonomy_term_id GROUP BY t.id ORDER BY t.path, t.name' )->fetchAll(); // Hierarchie aufbauen $hierarchy = $this->buildTaxonomyTree($terms); // Statistiken $stats = $this->db->query( 'SELECT (SELECT COUNT(*) FROM taxonomy_terms) as total_terms, (SELECT COUNT(*) FROM taxonomy_terms WHERE parent_id IS NULL) as root_terms, (SELECT MAX(depth) FROM taxonomy_terms) as max_depth, (SELECT COUNT(DISTINCT chunk_id) FROM chunk_taxonomy) as tagged_chunks' )->fetch(); $this->view('semantic-explorer.taxonomie', [ 'title' => 'Taxonomie', 'terms' => $terms, 'hierarchy' => $hierarchy, 'stats' => $stats, ]); } /** * Baut Baum aus flacher Liste */ private function buildTaxonomyTree(array $items, ?int $parentId = null): array { $tree = []; foreach ($items as $item) { if ($item['parent_id'] == $parentId) { $item['children'] = $this->buildTaxonomyTree($items, $item['id']); $tree[] = $item; } } return $tree; } /** * GET /semantic-explorer/ontologie * Konzept-Klassen */ public function ontologie(): void { // Alle Klassen mit Entity-Counts $classes = $this->db->query( 'SELECT oc.*, COUNT(DISTINCT ec.entity_id) as entity_count, (SELECT COUNT(*) FROM ontology_classes WHERE parent_class_id = oc.id) as subclass_count FROM ontology_classes oc LEFT JOIN entity_classifications ec ON oc.id = ec.ontology_class_id GROUP BY oc.id ORDER BY oc.name' )->fetchAll(); // Properties dekodieren foreach ($classes as &$class) { $class['properties_decoded'] = json_decode($class['properties'] ?? '{}', true) ?: []; } // Statistiken $stats = $this->db->query( 'SELECT (SELECT COUNT(*) FROM ontology_classes) as total_classes, (SELECT COUNT(*) FROM ontology_classes WHERE parent_class_id IS NULL) as root_classes, (SELECT COUNT(DISTINCT entity_id) FROM entity_classifications) as classified_entities' )->fetch(); $this->view('semantic-explorer.ontologie', [ 'title' => 'Ontologie', 'classes' => $classes, 'stats' => $stats, ]); } /** * GET /semantic-explorer/semantik * Semantische Analyse pro Chunk */ public function semantik(): void { $sentiment = $_GET['sentiment'] ?? ''; $page = max(1, (int) ($_GET['page'] ?? 1)); $limit = 50; $offset = ($page - 1) * $limit; $sql = 'SELECT cs.*, c.content, c.token_count, d.filename, d.id as document_id FROM chunk_semantics cs JOIN chunks c ON cs.chunk_id = c.id JOIN documents d ON c.document_id = d.id WHERE 1=1'; $countSql = 'SELECT COUNT(*) FROM chunk_semantics cs WHERE 1=1'; $params = []; if ($sentiment !== '') { $sql .= ' AND cs.sentiment = :sentiment'; $countSql .= ' AND cs.sentiment = :sentiment'; $params['sentiment'] = $sentiment; } // Count $countStmt = $this->db->prepare($countSql); $countStmt->execute($params); $totalCount = $countStmt->fetchColumn(); $sql .= ' ORDER BY cs.analyzed_at DESC LIMIT ' . $limit . ' OFFSET ' . $offset; $stmt = $this->db->prepare($sql); $stmt->execute($params); $semantics = $stmt->fetchAll(); // JSON dekodieren foreach ($semantics as &$s) { $s['keywords_decoded'] = json_decode($s['keywords'] ?? '[]', true) ?: []; $s['topics_decoded'] = json_decode($s['topics'] ?? '[]', true) ?: []; } // Statistiken $stats = $this->db->query( 'SELECT COUNT(*) as total, SUM(CASE WHEN sentiment = "positive" THEN 1 ELSE 0 END) as positive, SUM(CASE WHEN sentiment = "negative" THEN 1 ELSE 0 END) as negative, SUM(CASE WHEN sentiment = "neutral" THEN 1 ELSE 0 END) as neutral, SUM(CASE WHEN sentiment = "mixed" THEN 1 ELSE 0 END) as mixed FROM chunk_semantics' )->fetch(); $this->view('semantic-explorer.semantik', [ 'title' => 'Semantik', 'semantics' => $semantics, 'stats' => $stats, 'currentSentiment' => $sentiment, 'currentPage' => $page, 'totalCount' => $totalCount, 'totalPages' => ceil($totalCount / $limit), ]); } }