Backup #1873

ID1873
Dateipfad/var/www/dev.campus.systemische-tools.de/src/Infrastructure/Persistence/ChunkExplorerRepository.php
Version1
Typ modified
Größe11.6 KB
Hashd636797101f86b40d7cce96c379dea0c22e909cf9d8549396799a4c24bcf0d3f
Datum2025-12-27 23:51:04
Geändert vonclaude-code-hook
GrundClaude Code Pre-Hook Backup vor Edit-Operation
Datei existiert Ja

Dateiinhalt

<?php

declare(strict_types=1);

namespace Infrastructure\Persistence;

use Domain\Repository\ChunkExplorerRepositoryInterface;
use Infrastructure\Traits\JsonDecodeTrait;
use PDO;

// @responsibility: Persistenz für Chunk-Explorer (Chunks mit Taxonomie/Entities)

class ChunkExplorerRepository implements ChunkExplorerRepositoryInterface
{
    use JsonDecodeTrait;

    private PDO $pdo;

    public function __construct(PDO $pdo)
    {
        $this->pdo = $pdo;
    }

    /**
     * @return array{total: int, tokens: int, analyzed: int, synced: int}
     */
    public function getChunkStats(): array
    {
        $result = $this->pdo->query(
            'SELECT
                COUNT(*) as total,
                COALESCE(SUM(token_count), 0) as tokens,
                SUM(CASE WHEN analysis_status = "completed" THEN 1 ELSE 0 END) as analyzed,
                SUM(CASE WHEN qdrant_id IS NOT NULL THEN 1 ELSE 0 END) as synced
             FROM dokumentation_chunks'
        )->fetch(PDO::FETCH_ASSOC);

        return $result ?: ['total' => 0, 'tokens' => 0, 'analyzed' => 0, 'synced' => 0];
    }

    public function countChunksFiltered(string $category = '', string $status = '', string $search = ''): int
    {
        $sql = 'SELECT COUNT(*) FROM dokumentation_chunks c
                JOIN dokumentation d ON c.dokumentation_id = d.id
                WHERE 1=1';
        $params = [];

        if ($category !== '') {
            $sql .= ' AND c.taxonomy_category = :category';
            $params['category'] = $category;
        }

        if ($status !== '') {
            $sql .= ' AND c.analysis_status = :status';
            $params['status'] = $status;
        }

        if ($search !== '') {
            $sql .= ' AND (c.content LIKE :search OR c.keywords LIKE :search2)';
            $params['search'] = '%' . $search . '%';
            $params['search2'] = '%' . $search . '%';
        }

        $stmt = $this->pdo->prepare($sql);
        $stmt->execute($params);

        return (int) $stmt->fetchColumn();
    }

    /**
     * @return array<array<string, mixed>>
     */
    public function getChunksFilteredPaginated(
        string $category = '',
        string $status = '',
        string $search = '',
        int $limit = 50,
        int $offset = 0
    ): array {
        $sql = 'SELECT c.id, c.chunk_index, c.content, c.token_count, c.taxonomy_category,
                       c.analysis_status, c.qdrant_id, c.created_at,
                       d.title as dokument_title, d.path as dokument_path
                FROM dokumentation_chunks c
                JOIN dokumentation d ON c.dokumentation_id = d.id
                WHERE 1=1';
        $params = [];

        if ($category !== '') {
            $sql .= ' AND c.taxonomy_category = :category';
            $params['category'] = $category;
        }

        if ($status !== '') {
            $sql .= ' AND c.analysis_status = :status';
            $params['status'] = $status;
        }

        if ($search !== '') {
            $sql .= ' AND (c.content LIKE :search OR c.keywords LIKE :search2)';
            $params['search'] = '%' . $search . '%';
            $params['search2'] = '%' . $search . '%';
        }

        $sql .= ' ORDER BY c.created_at DESC LIMIT :limit OFFSET :offset';

        $stmt = $this->pdo->prepare($sql);
        foreach ($params as $key => $value) {
            $stmt->bindValue(':' . $key, $value);
        }
        $stmt->bindValue(':limit', $limit, PDO::PARAM_INT);
        $stmt->bindValue(':offset', $offset, PDO::PARAM_INT);
        $stmt->execute();

        return $stmt->fetchAll(PDO::FETCH_ASSOC);
    }

    /**
     * @return array<array<string, mixed>>
     */
    public function getRecentChunks(int $limit = 5): array
    {
        $stmt = $this->pdo->prepare(
            'SELECT c.id, c.content, c.taxonomy_category, c.token_count, c.created_at,
                    d.title as dokument_title, d.path as dokument_path
             FROM dokumentation_chunks c
             JOIN dokumentation d ON c.dokumentation_id = d.id
             ORDER BY c.created_at DESC
             LIMIT :limit'
        );
        $stmt->bindValue(':limit', $limit, PDO::PARAM_INT);
        $stmt->execute();

        return $stmt->fetchAll(PDO::FETCH_ASSOC);
    }

    /**
     * @return array<string, mixed>|null
     */
    public function getChunk(int $id): ?array
    {
        $stmt = $this->pdo->prepare(
            'SELECT c.*, d.title as dokument_title, d.path as dokument_path, d.id as dokument_id
             FROM dokumentation_chunks c
             JOIN dokumentation d ON c.dokumentation_id = d.id
             WHERE c.id = :id'
        );
        $stmt->execute(['id' => $id]);
        $result = $stmt->fetch(PDO::FETCH_ASSOC);

        if ($result === false) {
            return null;
        }

        // Decode JSON fields
        $result['entities_decoded'] = $this->decodeJsonArray($result['entities'] ?? null);
        $result['keywords_decoded'] = $this->decodeJsonArray($result['keywords'] ?? null);
        $result['taxonomy_path_decoded'] = $this->decodeJsonArray($result['taxonomy_path'] ?? null);
        $result['heading_path_decoded'] = $this->decodeJsonArray($result['heading_path'] ?? null);

        return $result;
    }

    /**
     * @return array<array<string, mixed>>
     */
    public function getChunksForDokument(int $dokumentId): array
    {
        $stmt = $this->pdo->prepare(
            'SELECT id, chunk_index, content, token_count, taxonomy_category,
                    analysis_status, qdrant_id, created_at
             FROM dokumentation_chunks
             WHERE dokumentation_id = :id
             ORDER BY chunk_index'
        );
        $stmt->execute(['id' => $dokumentId]);

        return $stmt->fetchAll(PDO::FETCH_ASSOC);
    }

    /**
     * @return array<array<string, mixed>>
     */
    public function getChunksDetailedForDokument(int $dokumentId): array
    {
        $stmt = $this->pdo->prepare(
            'SELECT c.id, c.chunk_index, c.content, c.token_count, c.taxonomy_category,
                    c.taxonomy_path, c.entities, c.keywords, c.analysis_status, c.qdrant_id,
                    c.heading_path, c.analyzed_at
             FROM dokumentation_chunks c
             WHERE c.dokumentation_id = :id
             ORDER BY c.chunk_index'
        );
        $stmt->execute(['id' => $dokumentId]);

        return $stmt->fetchAll(PDO::FETCH_ASSOC);
    }

    /**
     * @return array<string, mixed>|null
     */
    public function getChunkByDokumentAndIndex(int $dokumentId, int $index): ?array
    {
        $stmt = $this->pdo->prepare(
            'SELECT id, chunk_index, content FROM dokumentation_chunks
             WHERE dokumentation_id = :doc_id AND chunk_index = :idx'
        );
        $stmt->execute(['doc_id' => $dokumentId, 'idx' => $index]);
        $result = $stmt->fetch(PDO::FETCH_ASSOC);

        return $result !== false ? $result : null;
    }

    /**
     * @return array<string>
     */
    public function getDistinctCategories(): array
    {
        return $this->pdo->query(
            'SELECT DISTINCT taxonomy_category FROM dokumentation_chunks
             WHERE taxonomy_category IS NOT NULL ORDER BY taxonomy_category'
        )->fetchAll(PDO::FETCH_COLUMN);
    }

    /**
     * @return array<array{taxonomy_category: string, count: int}>
     */
    public function getTopTaxonomyCategories(int $limit = 10): array
    {
        $stmt = $this->pdo->prepare(
            'SELECT taxonomy_category, COUNT(*) as count
             FROM dokumentation_chunks
             WHERE taxonomy_category IS NOT NULL
             GROUP BY taxonomy_category
             ORDER BY count DESC
             LIMIT :limit'
        );
        $stmt->bindValue(':limit', $limit, PDO::PARAM_INT);
        $stmt->execute();

        return $stmt->fetchAll(PDO::FETCH_ASSOC);
    }

    /**
     * @return array<array{taxonomy_category: string, chunk_count: int, token_count: int}>
     */
    public function getCategoriesWithStats(): array
    {
        return $this->pdo->query(
            'SELECT taxonomy_category, COUNT(*) as chunk_count,
                    COALESCE(SUM(token_count), 0) as token_count
             FROM dokumentation_chunks
             WHERE taxonomy_category IS NOT NULL
             GROUP BY taxonomy_category
             ORDER BY chunk_count DESC'
        )->fetchAll(PDO::FETCH_ASSOC);
    }

    /**
     * @return array<string, int>
     */
    public function getTopKeywords(int $limit = 30): array
    {
        $keywordsRaw = $this->pdo->query(
            'SELECT keywords FROM dokumentation_chunks WHERE keywords IS NOT NULL'
        )->fetchAll(PDO::FETCH_COLUMN);

        $keywordCounts = [];
        foreach ($keywordsRaw as $json) {
            $keywords = $this->decodeJsonArray($json);
            foreach ($keywords as $kw) {
                $kw = strtolower(trim($kw));
                if ($kw !== '') {
                    $keywordCounts[$kw] = ($keywordCounts[$kw] ?? 0) + 1;
                }
            }
        }
        arsort($keywordCounts);

        return array_slice($keywordCounts, 0, $limit, true);
    }

    /**
     * @return array<string, int>
     */
    public function getTopEntitiesRaw(int $limit = 30): array
    {
        $entityCounts = $this->aggregateEntities();
        arsort($entityCounts);

        return array_slice($entityCounts, 0, $limit, true);
    }

    /**
     * @return array{entities: array, by_type: array<string, array>, total: int}
     */
    public function getEntitiesGrouped(int $limit = 100): array
    {
        $entitiesRaw = $this->pdo->query(
            'SELECT entities FROM dokumentation_chunks WHERE entities IS NOT NULL'
        )->fetchAll(PDO::FETCH_COLUMN);

        $entityCounts = [];
        foreach ($entitiesRaw as $json) {
            $entities = $this->decodeJsonArray($json);
            foreach ($entities as $entity) {
                $name = $entity['name'] ?? '';
                $type = $entity['type'] ?? 'OTHER';
                if ($name !== '') {
                    $key = $name . '|' . $type;
                    if (!isset($entityCounts[$key])) {
                        $entityCounts[$key] = [
                            'name' => $name,
                            'type' => $type,
                            'count' => 0,
                        ];
                    }
                    $entityCounts[$key]['count']++;
                }
            }
        }

        // Sort by count
        usort($entityCounts, fn ($a, $b) => $b['count'] <=> $a['count']);

        // Group by type
        $byType = [];
        foreach ($entityCounts as $entity) {
            $type = $entity['type'];
            if (!isset($byType[$type])) {
                $byType[$type] = [];
            }
            $byType[$type][] = $entity;
        }

        return [
            'entities' => array_slice($entityCounts, 0, $limit),
            'by_type' => $byType,
            'total' => count($entityCounts),
        ];
    }

    /**
     * Aggregate entities from all chunks.
     *
     * @return array<string, int>
     */
    private function aggregateEntities(): array
    {
        $entitiesRaw = $this->pdo->query(
            'SELECT entities FROM dokumentation_chunks WHERE entities IS NOT NULL'
        )->fetchAll(PDO::FETCH_COLUMN);

        $entityCounts = [];
        foreach ($entitiesRaw as $json) {
            $entities = $this->decodeJsonArray($json);
            foreach ($entities as $entity) {
                $name = $entity['name'] ?? '';
                $type = $entity['type'] ?? 'OTHER';
                if ($name !== '') {
                    $key = $name . '|' . $type;
                    $entityCounts[$key] = ($entityCounts[$key] ?? 0) + 1;
                }
            }
        }

        return $entityCounts;
    }
}

Vollständig herunterladen

Aktionen

Herunterladen

Andere Versionen dieser Datei

ID Version Typ Größe Datum
1997 3 modified 11.7 KB 2025-12-28 14:00
1878 2 modified 11.6 KB 2025-12-27 23:51
1873 1 modified 11.6 KB 2025-12-27 23:51

← Zurück zur Übersicht