Backup #1495

ID1495
Dateipfad/var/www/dev.campus.systemische-tools.de/src/Infrastructure/Docs/Doc2VectorPipeline.php
Version3
Typ modified
Größe4.5 KB
Hash96a9a936834784cbd02c1e51889b7031f9096384452b156ac4e3c6e19be05417
Datum2025-12-25 17:31:48
Geändert vonclaude-code-hook
GrundClaude Code Pre-Hook Backup vor Edit-Operation
Datei existiert Ja

Dateiinhalt

<?php

declare(strict_types=1);

namespace Infrastructure\Docs;

// @responsibility: Orchestriert Doc2Vector-Pipeline (Chunking → Analyse → Sync)

final class Doc2VectorPipeline
{
    public function __construct(
        private ChunkingService $chunking,
        private ChunkAnalysisService $analysis,
        private ChunkSyncService $sync,
        private HybridSearchService $search
    ) {
    }

    /**
     * Runs the full pipeline.
     *
     * @return array{
     *     chunking: array{documents: int, chunks: int, tokens: int, errors: array<string>},
     *     analysis: array{analyzed: int, failed: int, errors: array<string>},
     *     sync: array{synced: int, failed: int, errors: array<string>},
     *     duration_seconds: float
     * }
     */
    public function runFull(): array
    {
        $start = microtime(true);

        echo '=== Doc2Vector Pipeline ===' . PHP_EOL . PHP_EOL;

        // Stage 1: Chunking
        echo 'Stage 1: Chunking documents...' . PHP_EOL;
        $chunkResult = $this->chunking->chunkAll();
        echo sprintf(
            '  Completed: %d documents, %d chunks, %d tokens' . PHP_EOL,
            $chunkResult['documents'],
            $chunkResult['chunks'],
            $chunkResult['tokens']
        );

        // Stage 2: Analysis
        echo PHP_EOL . 'Stage 2: LLM Analysis (this may take a while)...' . PHP_EOL;
        $analysisResult = $this->analysis->analyzeAllPending(1000);
        echo sprintf(
            '  Completed: %d analyzed, %d failed' . PHP_EOL,
            $analysisResult['analyzed'],
            $analysisResult['failed']
        );

        // Stage 3: Sync to Qdrant
        echo PHP_EOL . 'Stage 3: Syncing to Qdrant...' . PHP_EOL;
        $syncResult = $this->sync->syncAllPending(1000);
        echo sprintf(
            '  Completed: %d synced, %d failed' . PHP_EOL,
            $syncResult['synced'],
            $syncResult['failed']
        );

        $duration = microtime(true) - $start;

        echo PHP_EOL . sprintf('Pipeline completed in %.1f seconds' . PHP_EOL, $duration);

        return [
            'chunking' => $chunkResult,
            'analysis' => $analysisResult,
            'sync' => $syncResult,
            'duration_seconds' => $duration,
        ];
    }

    /**
     * Processes only new/changed documents.
     *
     * @return array<string, mixed>
     */
    public function runIncremental(): array
    {
        $start = microtime(true);

        echo '=== Incremental Pipeline ===' . PHP_EOL . PHP_EOL;

        // Only analyze pending chunks
        echo 'Analyzing pending chunks...' . PHP_EOL;
        $analysisResult = $this->analysis->analyzeAllPending(100);
        echo sprintf('  %d analyzed, %d failed' . PHP_EOL, $analysisResult['analyzed'], $analysisResult['failed']);

        // Sync unsynced chunks
        echo 'Syncing to Qdrant...' . PHP_EOL;
        $syncResult = $this->sync->syncAllPending(100);
        echo sprintf('  %d synced, %d failed' . PHP_EOL, $syncResult['synced'], $syncResult['failed']);

        return [
            'analysis' => $analysisResult,
            'sync' => $syncResult,
            'duration_seconds' => microtime(true) - $start,
        ];
    }

    /**
     * Gets overall pipeline statistics.
     *
     * @return array<string, mixed>
     */
    public function getStats(): array
    {
        $chunkStats = $this->chunking->getStats();
        $analysisStats = $this->analysis->getStats();
        $qdrantStats = $this->sync->getStats();

        return [
            'chunks' => $chunkStats,
            'analysis' => $analysisStats,
            'qdrant' => $qdrantStats,
            'taxonomy_categories' => $this->search->getTaxonomyCategories(),
        ];
    }

    /**
     * Performs a search.
     *
     * @param array<string, mixed> $filters
     * @return array<array<string, mixed>>
     */
    public function search(string $query, array $filters = [], int $limit = 10): array
    {
        return $this->search->search($query, $filters, $limit);
    }

    /**
     * Re-chunks a specific document.
     *
     * @return array{chunks_created: int, tokens_total: int}
     */
    public function rechunkDocument(int $docId): array
    {
        return $this->chunking->chunkDocument($docId);
    }

    /**
     * Re-analyzes a specific chunk.
     *
     * @return array{taxonomy: array<string>, entities: array<mixed>, keywords: array<string>}
     */
    public function reanalyzeChunk(int $chunkId): array
    {
        return $this->analysis->analyzeChunk($chunkId);
    }
}

Vollständig herunterladen

Aktionen

Herunterladen

Andere Versionen dieser Datei

ID Version Typ Größe Datum
1853 10 modified 4.6 KB 2025-12-27 23:46
1852 9 modified 4.5 KB 2025-12-27 23:45
1851 8 modified 4.5 KB 2025-12-27 23:45
1850 7 modified 4.5 KB 2025-12-27 23:45
1848 6 modified 4.5 KB 2025-12-27 23:45
1497 5 modified 4.5 KB 2025-12-25 17:32
1496 4 modified 4.5 KB 2025-12-25 17:31
1495 3 modified 4.5 KB 2025-12-25 17:31
862 2 modified 4.7 KB 2025-12-23 08:50
788 1 modified 4.9 KB 2025-12-23 08:05

← Zurück zur Übersicht