Backup #1851
| ID | 1851 |
| Dateipfad | /var/www/dev.campus.systemische-tools.de/src/Infrastructure/Docs/Doc2VectorPipeline.php |
| Version | 8 |
| Typ |
modified |
| Größe | 4.5 KB |
| Hash | 95e672ccd5523702f2f0b5efd96ea4d13f9a499afcd7741b76b15ec5704ce73a |
| Datum | 2025-12-27 23:45:49 |
| Geändert von | claude-code-hook |
| Grund | Claude Code Pre-Hook Backup vor Edit-Operation |
| Datei existiert |
Ja
|
Dateiinhalt
<?php
declare(strict_types=1);
namespace Infrastructure\Docs;
// @responsibility: Orchestriert Doc2Vector-Pipeline (Chunking → Analyse → Sync)
use Domain\Constants;
final class Doc2VectorPipeline
{
public function __construct(
private ChunkingService $chunking,
private ChunkAnalysisService $analysis,
private ChunkSyncService $sync,
private HybridSearchService $search
) {
}
/**
* Runs the full pipeline.
*
* @return array{
* chunking: array{documents: int, chunks: int, tokens: int, errors: array<string>},
* analysis: array{processed: int, failed: int, errors: array<string>},
* sync: array{synced: int, failed: int, errors: array<string>},
* duration_seconds: float
* }
*/
public function runFull(): array
{
$start = microtime(true);
echo '=== Doc2Vector Pipeline ===' . PHP_EOL . PHP_EOL;
// Stage 1: Chunking
echo 'Stage 1: Chunking documents...' . PHP_EOL;
$chunkResult = $this->chunking->chunkAll();
echo sprintf(
' Completed: %d documents, %d chunks, %d tokens' . PHP_EOL,
$chunkResult['documents'],
$chunkResult['chunks'],
$chunkResult['tokens']
);
// Stage 2: Analysis
echo PHP_EOL . 'Stage 2: LLM Analysis (this may take a while)...' . PHP_EOL;
$analysisResult = $this->analysis->analyzeAllPending(Constants::BATCH_LIMIT);
echo sprintf(
' Completed: %d analyzed, %d failed' . PHP_EOL,
$analysisResult['processed'],
$analysisResult['failed']
);
// Stage 3: Sync to Qdrant
echo PHP_EOL . 'Stage 3: Syncing to Qdrant...' . PHP_EOL;
$syncResult = $this->sync->syncAllPending(1000);
echo sprintf(
' Completed: %d synced, %d failed' . PHP_EOL,
$syncResult['synced'],
$syncResult['failed']
);
$duration = microtime(true) - $start;
echo PHP_EOL . sprintf('Pipeline completed in %.1f seconds' . PHP_EOL, $duration);
return [
'chunking' => $chunkResult,
'analysis' => $analysisResult,
'sync' => $syncResult,
'duration_seconds' => $duration,
];
}
/**
* Processes only new/changed documents.
*
* @return array<string, mixed>
*/
public function runIncremental(): array
{
$start = microtime(true);
echo '=== Incremental Pipeline ===' . PHP_EOL . PHP_EOL;
// Only analyze pending chunks
echo 'Analyzing pending chunks...' . PHP_EOL;
$analysisResult = $this->analysis->analyzeAllPending(100);
echo sprintf(' %d analyzed, %d failed' . PHP_EOL, $analysisResult['processed'], $analysisResult['failed']);
// Sync unsynced chunks
echo 'Syncing to Qdrant...' . PHP_EOL;
$syncResult = $this->sync->syncAllPending(100);
echo sprintf(' %d synced, %d failed' . PHP_EOL, $syncResult['synced'], $syncResult['failed']);
return [
'analysis' => $analysisResult,
'sync' => $syncResult,
'duration_seconds' => microtime(true) - $start,
];
}
/**
* Gets overall pipeline statistics.
*
* @return array<string, mixed>
*/
public function getStats(): array
{
$chunkStats = $this->chunking->getStats();
$analysisStats = $this->analysis->getStats();
$qdrantStats = $this->sync->getStats();
return [
'chunks' => $chunkStats,
'analysis' => $analysisStats,
'qdrant' => $qdrantStats,
'taxonomy_categories' => $this->search->getTaxonomyCategories(),
];
}
/**
* Performs a search.
*
* @param array<string, mixed> $filters
* @return array<array<string, mixed>>
*/
public function search(string $query, array $filters = [], int $limit = 10): array
{
return $this->search->search($query, $filters, $limit);
}
/**
* Re-chunks a specific document.
*
* @return array{chunks_created: int, tokens_total: int}
*/
public function rechunkDocument(int $docId): array
{
return $this->chunking->chunkDocument($docId);
}
/**
* Re-analyzes a specific chunk.
*
* @return array{taxonomy: array<string>, entities: array<mixed>, keywords: array<string>}
*/
public function reanalyzeChunk(int $chunkId): array
{
return $this->analysis->analyzeChunk($chunkId);
}
}
Vollständig herunterladen
Aktionen
Andere Versionen dieser Datei
| ID |
Version |
Typ |
Größe |
Datum |
| 1853 |
10 |
modified |
4.6 KB |
2025-12-27 23:46 |
| 1852 |
9 |
modified |
4.5 KB |
2025-12-27 23:45 |
| 1851 |
8 |
modified |
4.5 KB |
2025-12-27 23:45 |
| 1850 |
7 |
modified |
4.5 KB |
2025-12-27 23:45 |
| 1848 |
6 |
modified |
4.5 KB |
2025-12-27 23:45 |
| 1497 |
5 |
modified |
4.5 KB |
2025-12-25 17:32 |
| 1496 |
4 |
modified |
4.5 KB |
2025-12-25 17:31 |
| 1495 |
3 |
modified |
4.5 KB |
2025-12-25 17:31 |
| 862 |
2 |
modified |
4.7 KB |
2025-12-23 08:50 |
| 788 |
1 |
modified |
4.9 KB |
2025-12-23 08:05 |
← Zurück zur Übersicht