ChunkAnalysisService.php

Code Hygiene Score: 100

Keine Issues gefunden.

Dependencies 5

Klassen 1

Funktionen 6

Verwendet von 2

Versionen 19

Code

<?php

declare(strict_types=1);

namespace Infrastructure\Docs;

// @responsibility: Orchestriert Chunk-Analyse (koordiniert ChunkAnalyzer + ChunkRepository)

use Domain\Constants;
use RuntimeException;

final class ChunkAnalysisService implements ChunkProcessorInterface
{
    private const string TAXONOMY_MODEL = 'gemma3:4b-it-qat';
    private const int BATCH_SIZE = 10;

    public function __construct(
        private ChunkDataRepository $repository,
        private ChunkAnalyzer $analyzer
    ) {
    }

    /**
     * Analyzes a single chunk.
     *
     * @return array{taxonomy: array<string>, entities: array<array{name: string, type: string}>, keywords: array<string>}
     */
    public function analyzeChunk(int $chunkId): array
    {
        return $this->process($chunkId);
    }

    /**
     * Processes a single chunk (implements ChunkProcessorInterface).
     *
     * @return array{taxonomy: array<string>, entities: array<array{name: string, type: string}>, keywords: array<string>}
     */
    public function process(int $chunkId): array
    {
        $chunk = $this->repository->findById($chunkId);

        if ($chunk === null) {
            throw new RuntimeException("Chunk #{$chunkId} not found");
        }

        // Mark as processing
        $this->repository->updateStatus($chunkId, 'processing');

        try {
            // Get document context
            $docContext = $this->repository->getDocumentContext((int) $chunk['dokumentation_id']);

            // Perform analysis via ChunkAnalyzer
            $analysis = $this->analyzer->analyze($chunk, $docContext);

            // Store results
            $this->repository->storeAnalysisResults($chunkId, $analysis, self::TAXONOMY_MODEL);

            return $analysis;
        } catch (RuntimeException $e) {
            $this->repository->updateStatus($chunkId, 'failed', $e->getMessage());

            throw $e;
        }
    }

    /**
     * Analyzes all pending chunks in batches.
     *
     * @return array{processed: int, failed: int, errors: array<string>}
     */
    public function analyzeAllPending(int $limit = Constants::DEFAULT_LIMIT): array
    {
        return $this->processBatch($limit);
    }

    /**
     * Processes multiple chunks in batch (implements ChunkProcessorInterface).
     *
     * @return array{processed: int, failed: int, errors: array<string>}
     */
    public function processBatch(int $limit): array
    {
        $results = ['processed' => 0, 'failed' => 0, 'errors' => []];

        $chunks = $this->repository->findPending($limit);

        foreach ($chunks as $chunk) {
            try {
                $this->process((int) $chunk['id']);
                $results['processed']++;

                // Progress output
                if ($results['processed'] % self::BATCH_SIZE === 0) {
                    echo "Analyzed {$results['processed']} chunks...\n";
                }
            } catch (RuntimeException $e) {
                $results['failed']++;
                $results['errors'][] = "Chunk #{$chunk['id']}: " . $e->getMessage();
            }
        }

        // Return with legacy key 'analyzed' for backward compatibility
        $results['analyzed'] = $results['processed'];

        return $results;
    }

    /**
     * Gets analysis statistics (implements ChunkProcessorInterface).
     *
     * @return array{pending: int, processing: int, completed: int, failed: int, by_category: array<array{category: string, count: int}>}
     */
    public function getStats(): array
    {
        return $this->repository->getStats();
    }
}
← Übersicht Graph