Backup #10

ID10
Dateipfad/var/www/dev.campus.systemische-tools.de/src/Controller/SemanticExplorerController.php
Version1
Typ modified
Größe23.5 KB
Hashc09ecc2cb5a8fff7e0b9fc600173897737dbafcff7b5f0128b60ea21605fd717
Datum2025-12-20 16:35:28
Geändert vonclaude-code-hook
GrundClaude Code Pre-Hook Backup vor Write-Operation
Datei existiert Ja

Dateiinhalt

<?php

namespace Controller;

use Framework\Controller;

/**
 * SemanticExplorerController - Nutzdaten Explorer
 *
 * Zeigt Dokumente und Chunks aus Nextcloud (documents, chunks Tabellen).
 * Für Endnutzer - Coaching-Materialien, PDFs, später Mails.
 */
class SemanticExplorerController extends Controller
{
    private \PDO $db;

    public function __construct()
    {
        $this->db = new \PDO(
            'mysql:host=localhost;dbname=ki_content;charset=utf8mb4',
            'root',
            $this->getPassword(),
            [
                \PDO::ATTR_ERRMODE => \PDO::ERRMODE_EXCEPTION,
                \PDO::ATTR_DEFAULT_FETCH_MODE => \PDO::FETCH_ASSOC,
            ]
        );
    }

    private function getPassword(): string
    {
        $file = '/var/www/docs/credentials/credentials.md';
        $content = file_get_contents($file);
        foreach (explode("\n", $content) as $line) {
            if (str_contains($line, 'MariaDB') && str_contains($line, 'root')) {
                $parts = explode('|', $line);
                if (count($parts) >= 4) {
                    return trim($parts[3]);
                }
            }
        }

        return '';
    }

    /**
     * GET /semantic-explorer
     * Dashboard mit Statistiken
     */
    public function index(): void
    {
        // Dokument-Statistiken
        $docStats = $this->db->query(
            'SELECT
                COUNT(*) as total,
                SUM(CASE WHEN status = "done" THEN 1 ELSE 0 END) as processed,
                SUM(CASE WHEN status = "error" THEN 1 ELSE 0 END) as errors
             FROM documents'
        )->fetch();

        // Chunk-Statistiken
        $chunkStats = $this->db->query(
            'SELECT
                COUNT(*) as total,
                COALESCE(SUM(token_count), 0) as tokens,
                SUM(CASE WHEN qdrant_id IS NOT NULL THEN 1 ELSE 0 END) as embedded
             FROM chunks'
        )->fetch();

        // Dokumente
        $documents = $this->db->query(
            'SELECT d.id, d.filename, d.folder_path, d.mime_type, d.file_size,
                    d.status, d.imported_at, d.processed_at,
                    (SELECT COUNT(*) FROM chunks WHERE document_id = d.id) as chunk_count
             FROM documents d
             ORDER BY d.imported_at DESC'
        )->fetchAll();

        // Neueste Chunks
        $recentChunks = $this->db->query(
            'SELECT c.id, c.content, c.token_count, c.created_at, c.qdrant_id,
                    d.filename
             FROM chunks c
             JOIN documents d ON c.document_id = d.id
             ORDER BY c.created_at DESC
             LIMIT 5'
        )->fetchAll();

        $this->view('semantic-explorer.index', [
            'title' => 'Semantic Explorer',
            'docStats' => $docStats,
            'chunkStats' => $chunkStats,
            'documents' => $documents,
            'recentChunks' => $recentChunks,
        ]);
    }

    /**
     * GET /semantic-explorer/dokumente
     * Liste aller Dokumente
     */
    public function dokumente(): void
    {
        $status = $_GET['status'] ?? '';
        $search = $_GET['search'] ?? '';

        $sql = 'SELECT d.id, d.filename, d.folder_path, d.source_path, d.mime_type,
                       d.file_size, d.status, d.imported_at, d.processed_at, d.error_message,
                       (SELECT COUNT(*) FROM chunks WHERE document_id = d.id) as chunk_count,
                       (SELECT COALESCE(SUM(token_count), 0) FROM chunks WHERE document_id = d.id) as token_count
                FROM documents d
                WHERE 1=1';

        $params = [];

        if ($status !== '') {
            $sql .= ' AND d.status = :status';
            $params['status'] = $status;
        }

        if ($search !== '') {
            $sql .= ' AND (d.filename LIKE :search OR d.source_path LIKE :search2)';
            $params['search'] = '%' . $search . '%';
            $params['search2'] = '%' . $search . '%';
        }

        $sql .= ' ORDER BY d.imported_at DESC';

        $stmt = $this->db->prepare($sql);
        $stmt->execute($params);
        $documents = $stmt->fetchAll();

        $this->view('semantic-explorer.dokumente.index', [
            'title' => 'Dokumente',
            'documents' => $documents,
            'currentStatus' => $status,
            'currentSearch' => $search,
        ]);
    }

    /**
     * GET /semantic-explorer/dokumente/{id}
     * Dokument-Details mit Chunks
     */
    public function dokumentShow(int $id): void
    {
        $stmt = $this->db->prepare('SELECT * FROM documents WHERE id = :id');
        $stmt->execute(['id' => $id]);
        $document = $stmt->fetch();

        if ($document === false) {
            http_response_code(404);
            echo '404 - Dokument nicht gefunden';

            return;
        }

        // Chunks dieses Dokuments
        $stmt = $this->db->prepare(
            'SELECT c.id, c.chunk_index, c.content, c.token_count, c.heading_path,
                    c.metadata, c.qdrant_id, c.created_at
             FROM chunks c
             WHERE c.document_id = :id
             ORDER BY c.chunk_index'
        );
        $stmt->execute(['id' => $id]);
        $chunks = $stmt->fetchAll();

        // Heading-Paths dekodieren
        foreach ($chunks as &$chunk) {
            $chunk['heading_path_decoded'] = json_decode($chunk['heading_path'] ?? '[]', true) ?: [];
            $chunk['metadata_decoded'] = json_decode($chunk['metadata'] ?? '{}', true) ?: [];
        }

        $this->view('semantic-explorer.dokumente.show', [
            'title' => $document['filename'],
            'document' => $document,
            'chunks' => $chunks,
        ]);
    }

    /**
     * GET /semantic-explorer/chunks
     * Liste aller Chunks
     */
    public function chunks(): void
    {
        $search = $_GET['search'] ?? '';
        $embedded = $_GET['embedded'] ?? '';
        $page = max(1, (int) ($_GET['page'] ?? 1));
        $limit = 50;
        $offset = ($page - 1) * $limit;

        $sql = 'SELECT c.id, c.chunk_index, c.content, c.token_count, c.qdrant_id, c.created_at,
                       d.filename, d.id as document_id
                FROM chunks c
                JOIN documents d ON c.document_id = d.id
                WHERE 1=1';

        $countSql = 'SELECT COUNT(*) FROM chunks c
                     JOIN documents d ON c.document_id = d.id
                     WHERE 1=1';

        $params = [];

        if ($search !== '') {
            $sql .= ' AND c.content LIKE :search';
            $countSql .= ' AND c.content LIKE :search';
            $params['search'] = '%' . $search . '%';
        }

        if ($embedded === 'yes') {
            $sql .= ' AND c.qdrant_id IS NOT NULL';
            $countSql .= ' AND c.qdrant_id IS NOT NULL';
        } elseif ($embedded === 'no') {
            $sql .= ' AND c.qdrant_id IS NULL';
            $countSql .= ' AND c.qdrant_id IS NULL';
        }

        // Count total
        $countStmt = $this->db->prepare($countSql);
        $countStmt->execute($params);
        $totalCount = $countStmt->fetchColumn();

        $sql .= ' ORDER BY c.created_at DESC LIMIT ' . $limit . ' OFFSET ' . $offset;

        $stmt = $this->db->prepare($sql);
        $stmt->execute($params);
        $chunks = $stmt->fetchAll();

        $this->view('semantic-explorer.chunks.index', [
            'title' => 'Chunks',
            'chunks' => $chunks,
            'currentSearch' => $search,
            'currentEmbedded' => $embedded,
            'currentPage' => $page,
            'totalCount' => $totalCount,
            'totalPages' => ceil($totalCount / $limit),
        ]);
    }

    /**
     * GET /semantic-explorer/chunks/{id}
     * Chunk-Details
     */
    public function chunkShow(int $id): void
    {
        $stmt = $this->db->prepare(
            'SELECT c.*, d.filename, d.source_path, d.id as document_id
             FROM chunks c
             JOIN documents d ON c.document_id = d.id
             WHERE c.id = :id'
        );
        $stmt->execute(['id' => $id]);
        $chunk = $stmt->fetch();

        if ($chunk === false) {
            http_response_code(404);
            echo '404 - Chunk nicht gefunden';

            return;
        }

        // JSON-Felder dekodieren
        $chunk['heading_path_decoded'] = json_decode($chunk['heading_path'] ?? '[]', true) ?: [];
        $chunk['metadata_decoded'] = json_decode($chunk['metadata'] ?? '{}', true) ?: [];

        // Nachbar-Chunks
        $stmt = $this->db->prepare(
            'SELECT id, chunk_index FROM chunks
             WHERE document_id = :doc_id AND chunk_index = :prev'
        );
        $stmt->execute(['doc_id' => $chunk['document_id'], 'prev' => $chunk['chunk_index'] - 1]);
        $prevChunk = $stmt->fetch();

        $stmt = $this->db->prepare(
            'SELECT id, chunk_index FROM chunks
             WHERE document_id = :doc_id AND chunk_index = :next'
        );
        $stmt->execute(['doc_id' => $chunk['document_id'], 'next' => $chunk['chunk_index'] + 1]);
        $nextChunk = $stmt->fetch();

        $this->view('semantic-explorer.chunks.show', [
            'title' => 'Chunk #' . $chunk['id'],
            'chunk' => $chunk,
            'prevChunk' => $prevChunk,
            'nextChunk' => $nextChunk,
        ]);
    }

    /**
     * GET /semantic-explorer/suche
     * Semantische Suche in Nutzdaten
     */
    public function suche(): void
    {
        $query = $_GET['q'] ?? '';
        $limit = min(20, max(1, (int) ($_GET['limit'] ?? 10)));

        $results = [];

        if ($query !== '') {
            // Vektor-Suche via Qdrant
            $results = $this->vectorSearch($query, $limit);
        }

        $this->view('semantic-explorer.suche', [
            'title' => 'Semantische Suche',
            'query' => $query,
            'results' => $results,
            'limit' => $limit,
        ]);
    }

    /**
     * Vektor-Suche in documents Collection
     */
    private function vectorSearch(string $query, int $limit): array
    {
        // Embedding generieren
        $embedding = $this->getEmbedding($query);
        if (empty($embedding)) {
            return [];
        }

        // Qdrant suchen
        $response = $this->qdrantSearch($embedding, $limit);
        if (empty($response)) {
            return [];
        }

        // Chunk-Details aus DB laden
        $results = [];
        foreach ($response as $point) {
            $chunkId = $point['payload']['chunk_id'] ?? null;
            if ($chunkId === null) {
                continue;
            }

            $stmt = $this->db->prepare(
                'SELECT c.*, d.filename, d.source_path
                 FROM chunks c
                 JOIN documents d ON c.document_id = d.id
                 WHERE c.id = :id'
            );
            $stmt->execute(['id' => $chunkId]);
            $chunk = $stmt->fetch();

            if ($chunk !== false) {
                $chunk['score'] = $point['score'];
                $chunk['heading_path_decoded'] = json_decode($chunk['heading_path'] ?? '[]', true) ?: [];
                $results[] = $chunk;
            }
        }

        return $results;
    }

    /**
     * Embedding via Ollama
     */
    private function getEmbedding(string $text): array
    {
        $ch = curl_init('http://localhost:11434/api/embeddings');
        curl_setopt_array($ch, [
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_POST => true,
            CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
            CURLOPT_POSTFIELDS => json_encode([
                'model' => 'mxbai-embed-large',
                'prompt' => $text,
            ]),
        ]);

        $response = curl_exec($ch);
        curl_close($ch);

        $data = json_decode($response, true);

        return $data['embedding'] ?? [];
    }

    /**
     * Qdrant-Suche
     */
    private function qdrantSearch(array $embedding, int $limit): array
    {
        $ch = curl_init('http://localhost:6333/collections/documents/points/search');
        curl_setopt_array($ch, [
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_POST => true,
            CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
            CURLOPT_POSTFIELDS => json_encode([
                'vector' => $embedding,
                'limit' => $limit,
                'with_payload' => true,
            ]),
        ]);

        $response = curl_exec($ch);
        curl_close($ch);

        $data = json_decode($response, true);

        return $data['result'] ?? [];
    }

    /**
     * GET /semantic-explorer/entitaeten
     * Liste aller Entitaeten
     */
    public function entitaeten(): void
    {
        $type = $_GET['type'] ?? '';
        $search = $_GET['search'] ?? '';

        $sql = 'SELECT e.*,
                       COUNT(DISTINCT ce.chunk_id) as chunk_count,
                       COUNT(DISTINCT er.id) as relation_count
                FROM entities e
                LEFT JOIN chunk_entities ce ON e.id = ce.entity_id
                LEFT JOIN entity_relations er ON e.id = er.source_entity_id OR e.id = er.target_entity_id
                WHERE 1=1';

        $params = [];

        if ($type !== '') {
            $sql .= ' AND e.type = :type';
            $params['type'] = $type;
        }

        if ($search !== '') {
            $sql .= ' AND (e.name LIKE :search OR e.description LIKE :search2)';
            $params['search'] = '%' . $search . '%';
            $params['search2'] = '%' . $search . '%';
        }

        $sql .= ' GROUP BY e.id ORDER BY chunk_count DESC, e.name LIMIT 100';

        $stmt = $this->db->prepare($sql);
        $stmt->execute($params);
        $entities = $stmt->fetchAll();

        // Statistiken
        $stats = $this->db->query(
            'SELECT type, COUNT(*) as count FROM entities GROUP BY type'
        )->fetchAll();

        $this->view('semantic-explorer.entitaeten.index', [
            'title' => 'Entitaeten',
            'entities' => $entities,
            'stats' => $stats,
            'currentType' => $type,
            'currentSearch' => $search,
        ]);
    }

    /**
     * GET /semantic-explorer/entitaeten/{id}
     * Entitaet-Details
     */
    public function entitaetShow(int $id): void
    {
        $stmt = $this->db->prepare('SELECT * FROM entities WHERE id = :id');
        $stmt->execute(['id' => $id]);
        $entity = $stmt->fetch();

        if ($entity === false) {
            http_response_code(404);
            echo '404 - Entitaet nicht gefunden';

            return;
        }

        // Synonyme
        $stmt = $this->db->prepare('SELECT * FROM entity_synonyms WHERE entity_id = :id');
        $stmt->execute(['id' => $id]);
        $synonyms = $stmt->fetchAll();

        // Relationen (ausgehend)
        $stmt = $this->db->prepare(
            'SELECT er.*, e.name as target_name, e.type as target_type
             FROM entity_relations er
             JOIN entities e ON er.target_entity_id = e.id
             WHERE er.source_entity_id = :id
             ORDER BY er.strength DESC'
        );
        $stmt->execute(['id' => $id]);
        $outgoingRelations = $stmt->fetchAll();

        // Relationen (eingehend)
        $stmt = $this->db->prepare(
            'SELECT er.*, e.name as source_name, e.type as source_type
             FROM entity_relations er
             JOIN entities e ON er.source_entity_id = e.id
             WHERE er.target_entity_id = :id
             ORDER BY er.strength DESC'
        );
        $stmt->execute(['id' => $id]);
        $incomingRelations = $stmt->fetchAll();

... (245 weitere Zeilen)

Vollständig herunterladen

Aktionen

Herunterladen

Andere Versionen dieser Datei

ID Version Typ Größe Datum
1991 56 modified 9.3 KB 2025-12-28 03:05
1982 55 modified 9.1 KB 2025-12-28 02:54
1699 54 modified 8.8 KB 2025-12-27 12:18
1107 53 modified 8.8 KB 2025-12-25 09:22
1106 52 modified 8.8 KB 2025-12-25 09:22
1105 51 modified 8.8 KB 2025-12-25 09:22
1104 50 modified 8.8 KB 2025-12-25 09:22
1099 49 modified 8.8 KB 2025-12-25 09:20
1098 48 modified 8.8 KB 2025-12-25 09:20
1097 47 modified 8.6 KB 2025-12-25 09:20
1082 46 modified 8.6 KB 2025-12-25 02:29
1081 45 modified 8.4 KB 2025-12-25 02:29
1071 44 modified 8.3 KB 2025-12-25 02:26
1070 43 modified 8.3 KB 2025-12-25 02:26
1069 42 modified 8.3 KB 2025-12-25 02:26
1068 41 modified 8.3 KB 2025-12-25 02:26
1067 40 modified 8.3 KB 2025-12-25 02:26
1066 39 modified 8.3 KB 2025-12-25 02:26
1065 38 modified 8.3 KB 2025-12-25 02:25
1064 37 modified 8.3 KB 2025-12-25 02:25
1063 36 modified 7.8 KB 2025-12-25 02:25
912 35 modified 7.7 KB 2025-12-23 16:41
910 34 modified 7.1 KB 2025-12-23 16:40
698 33 modified 7.2 KB 2025-12-23 07:53
676 32 modified 7.2 KB 2025-12-23 07:00
641 31 modified 7.2 KB 2025-12-23 04:47
612 30 modified 7.3 KB 2025-12-23 04:42
586 29 modified 7.2 KB 2025-12-23 04:24
339 28 modified 7.1 KB 2025-12-22 08:11
338 27 modified 7.0 KB 2025-12-22 08:11
337 26 modified 7.0 KB 2025-12-22 08:11
313 25 modified 7.0 KB 2025-12-22 08:05
312 24 modified 7.0 KB 2025-12-22 08:05
311 23 modified 7.0 KB 2025-12-22 08:05
310 22 modified 7.0 KB 2025-12-22 08:05
289 21 modified 7.1 KB 2025-12-22 08:00
288 20 modified 7.1 KB 2025-12-22 08:00
287 19 modified 7.1 KB 2025-12-22 08:00
286 18 modified 7.1 KB 2025-12-22 08:00
265 17 modified 8.5 KB 2025-12-22 02:07
264 16 modified 8.4 KB 2025-12-22 02:07
263 15 modified 14.7 KB 2025-12-22 02:06
262 14 modified 16.2 KB 2025-12-22 02:05
261 13 modified 19.2 KB 2025-12-22 02:04
260 12 modified 19.8 KB 2025-12-22 02:04
259 11 modified 22.8 KB 2025-12-22 02:02
258 10 modified 24.4 KB 2025-12-22 02:02
224 9 modified 24.4 KB 2025-12-22 01:44
223 8 modified 24.5 KB 2025-12-22 01:44
222 7 modified 24.5 KB 2025-12-22 01:44
221 6 modified 24.6 KB 2025-12-22 01:44
220 5 modified 24.6 KB 2025-12-22 01:44
219 4 modified 24.7 KB 2025-12-22 01:44
218 3 modified 24.7 KB 2025-12-22 01:44
46 2 modified 12.2 KB 2025-12-20 17:44
10 1 modified 23.5 KB 2025-12-20 16:35

← Zurück zur Übersicht