, entities: array, keywords: array} */ public function analyzeChunk(int $chunkId): array { return $this->process($chunkId); } /** * Processes a single chunk (implements ChunkProcessorInterface). * * @return array{taxonomy: array, entities: array, keywords: array} */ public function process(int $chunkId): array { $chunk = $this->repository->findById($chunkId); if ($chunk === null) { throw new RuntimeException("Chunk #{$chunkId} not found"); } // Mark as processing $this->repository->updateStatus($chunkId, 'processing'); try { // Get document context $docContext = $this->repository->getDocumentContext((int) $chunk['dokumentation_id']); // Perform analysis via ChunkAnalyzer $analysis = $this->analyzer->analyze($chunk, $docContext); // Store results $this->repository->storeAnalysisResults($chunkId, $analysis, self::TAXONOMY_MODEL); return $analysis; } catch (RuntimeException $e) { $this->repository->updateStatus($chunkId, 'failed', $e->getMessage()); throw $e; } } /** * Analyzes all pending chunks in batches. * * @return array{analyzed: int, failed: int, errors: array} */ public function analyzeAllPending(int $limit = 100): array { return $this->processBatch($limit); } /** * Processes multiple chunks in batch (implements ChunkProcessorInterface). * * @return array{processed: int, failed: int, errors: array} */ public function processBatch(int $limit): array { $results = ['processed' => 0, 'failed' => 0, 'errors' => []]; $chunks = $this->repository->findPending($limit); foreach ($chunks as $chunk) { try { $this->process((int) $chunk['id']); $results['processed']++; // Progress output if ($results['processed'] % self::BATCH_SIZE === 0) { echo "Analyzed {$results['processed']} chunks...\n"; } } catch (RuntimeException $e) { $results['failed']++; $results['errors'][] = "Chunk #{$chunk['id']}: " . $e->getMessage(); } } // Return with legacy key 'analyzed' for backward compatibility $results['analyzed'] = $results['processed']; return $results; } /** * Gets analysis statistics. * * @return array{pending: int, processing: int, completed: int, failed: int, by_category: array} */ public function getStats(): array { $stmt = $this->pdo->query(" SELECT SUM(CASE WHEN analysis_status = 'pending' THEN 1 ELSE 0 END) as pending, SUM(CASE WHEN analysis_status = 'processing' THEN 1 ELSE 0 END) as processing, SUM(CASE WHEN analysis_status = 'completed' THEN 1 ELSE 0 END) as completed, SUM(CASE WHEN analysis_status = 'failed' THEN 1 ELSE 0 END) as failed FROM dokumentation_chunks "); $counts = $stmt->fetch(PDO::FETCH_ASSOC); $stmt = $this->pdo->query(' SELECT taxonomy_category as category, COUNT(*) as count FROM dokumentation_chunks WHERE taxonomy_category IS NOT NULL GROUP BY taxonomy_category ORDER BY count DESC '); $byCategory = $stmt->fetchAll(PDO::FETCH_ASSOC); return [ 'pending' => (int) ($counts['pending'] ?? 0), 'processing' => (int) ($counts['processing'] ?? 0), 'completed' => (int) ($counts['completed'] ?? 0), 'failed' => (int) ($counts['failed'] ?? 0), 'by_category' => $byCategory, ]; } }