Protokoll #4259
| ID | 4259 |
|---|---|
| Zeitstempel | 2025-12-22 09:42:01.213548 |
| Client | root |
| IP | Warning: Undefined array key "request_ip" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 Deprecated: htmlspecialchars(): Passing null to parameter #1 ($string) of type string is deprecated in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 |
| Modell | claude-sonnet-4-20250514 |
| Status | completed |
| Tokens | 9,906 (Input: 0, Output: 0) |
| Dauer | 112 ms |
| Request-Zeit | Warning: Undefined array key "request_timestamp" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 18 |
| Response-Zeit | - |
Warning: Undefined array key "error_message" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 22
Request
{
"event": "PreToolUse",
"tool_name": "Edit",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Controller\/ContentPipelineController.php",
"old_string": " \/**\n * @return array<string, array<string, mixed>>\n *\/\n private function getStepTypes(): array\n {\n return [\n \/\/ Phase 1: Preprocessing\n 'detect' => [\n 'label' => '1. Detect',\n 'icon' => 'search',\n 'description' => 'Dateien scannen und Format pruefen',\n 'phase' => 'Preprocessing',\n ],\n 'validate' => [\n 'label' => '2. Validate',\n 'icon' => 'check',\n 'description' => 'Datei-Validierung und Korruptionspruefung',\n 'phase' => 'Preprocessing',\n ],\n 'extract' => [\n 'label' => '3. Extract',\n 'icon' => 'file-text',\n 'description' => 'Text extrahieren und bereinigen',\n 'phase' => 'Preprocessing',\n ],\n 'structure' => [\n 'label' => '4. Structure',\n 'icon' => 'list',\n 'description' => 'Ueberschriften und Struktur erkennen',\n 'phase' => 'Preprocessing',\n ],\n 'segment' => [\n 'label' => '5. Segment',\n 'icon' => 'layout',\n 'description' => 'Logische Dokumentgliederung',\n 'phase' => 'Preprocessing',\n ],\n 'chunk' => [\n 'label' => '6. Chunk',\n 'icon' => 'scissors',\n 'description' => 'Textbausteine erstellen (max 800 Token)',\n 'phase' => 'Preprocessing',\n ],\n \/\/ Phase 2: Embedding & Storage\n 'embed' => [\n 'label' => '7. Embed',\n 'icon' => 'cpu',\n 'description' => 'Vektorisierung mit mxbai-embed-large',\n 'phase' => 'Embedding',\n ],\n 'collection_setup' => [\n 'label' => '8. Collection',\n 'icon' => 'database',\n 'description' => 'Qdrant-Collection einrichten',\n 'phase' => 'Embedding',\n ],\n 'vector_store' => [\n 'label' => '9. Store',\n 'icon' => 'upload',\n 'description' => 'Vektoren in Qdrant speichern',\n 'phase' => 'Embedding',\n ],\n 'metadata_store' => [\n 'label' => '10. Metadata',\n 'icon' => 'tag',\n 'description' => 'Metadaten in MariaDB speichern',\n 'phase' => 'Embedding',\n ],\n 'index_optimize' => [\n 'label' => '11. Index',\n 'icon' => 'zap',\n 'description' => 'HNSW-Index optimieren',\n 'phase' => 'Embedding',\n ],\n \/\/ Phase 3: Analysis\n 'entity_extract' => [\n 'label' => '12. Entities',\n 'icon' => 'users',\n 'description' => 'Entitaeten erkennen (Personen, Konzepte)',\n 'phase' => 'Analysis',\n ],\n 'relation_extract' => [\n 'label' => '13. Relations',\n 'icon' => 'git-branch',\n 'description' => 'Beziehungen zwischen Entitaeten',\n 'phase' => 'Analysis',\n ],\n 'taxonomy_build' => [\n 'label' => '14. Taxonomy',\n 'icon' => 'folder-tree',\n 'description' => 'Hierarchische Kategorisierung',\n 'phase' => 'Analysis',\n ],\n 'semantic_analyze' => [\n 'label' => '15. Semantic',\n 'icon' => 'brain',\n 'description' => 'Bedeutungs-Analyse und Konzepte',\n 'phase' => 'Analysis',\n ],\n 'summarize' => [\n 'label' => '16. Summary',\n 'icon' => 'file-minus',\n 'description' => 'Dokument-Zusammenfassung',\n 'phase' => 'Analysis',\n ],\n 'finalize' => [\n 'label' => '17. Finalize',\n 'icon' => 'check-circle',\n 'description' => 'Job abschliessen und Status setzen',\n 'phase' => 'Analysis',\n ],\n 'question_generate' => [\n 'label' => '18. Questions',\n 'icon' => 'help-circle',\n 'description' => 'Beispielfragen fuer RAG-Chat generieren',\n 'phase' => 'Analysis',\n ],\n \/\/ Legacy\n 'analyze' => [\n 'label' => 'Analyze',\n 'icon' => 'brain',\n 'description' => 'Kombinierte Analyse (Legacy)',\n 'phase' => 'Analysis',\n ],\n ];\n }",
"new_string": " \/**\n * @return array<string, array<string, mixed>>\n *\/\n private function getStepTypes(): array\n {\n return [\n \/\/ Phase 1: Vorverarbeitung\n 'detect' => [\n 'label' => 'Erkennung',\n 'description' => 'Dateien scannen und Format prüfen',\n 'phase' => 'Vorverarbeitung',\n ],\n 'validate' => [\n 'label' => 'Validierung',\n 'description' => 'Datei-Prüfung auf Lesbarkeit und Korruption',\n 'phase' => 'Vorverarbeitung',\n ],\n 'page_split' => [\n 'label' => 'Seitenzerlegung',\n 'description' => 'PDF in Einzelseiten zerlegen für Referenz und Vision-Analyse',\n 'phase' => 'Vorverarbeitung',\n ],\n 'vision_analyze' => [\n 'label' => 'Bildanalyse',\n 'description' => 'Seiten via Vision-Modell analysieren, Bilder und Grafiken erkennen',\n 'phase' => 'Vorverarbeitung',\n ],\n 'extract' => [\n 'label' => 'Textextraktion',\n 'description' => 'Text extrahieren, OCR für Bilder mit Text',\n 'phase' => 'Vorverarbeitung',\n ],\n 'structure' => [\n 'label' => 'Strukturerkennung',\n 'description' => 'Überschriften, Listen und Hierarchie erkennen',\n 'phase' => 'Vorverarbeitung',\n ],\n 'segment' => [\n 'label' => 'Abschnitte',\n 'description' => 'Logische Dokumentgliederung nach Struktur',\n 'phase' => 'Vorverarbeitung',\n ],\n 'chunk' => [\n 'label' => 'Textbausteine',\n 'description' => 'Chunks erstellen (max 800 Token) mit Seitenreferenz',\n 'phase' => 'Vorverarbeitung',\n ],\n \/\/ Phase 2: Speicherung & Vektorisierung\n 'metadata_store' => [\n 'label' => 'DB-Speicherung',\n 'description' => 'Dokument, Seiten und Chunks in MariaDB speichern',\n 'phase' => 'Speicherung',\n ],\n 'embed' => [\n 'label' => 'Vektorisierung',\n 'description' => 'Embeddings mit mxbai-embed-large (1024-dim)',\n 'phase' => 'Speicherung',\n ],\n 'collection_setup' => [\n 'label' => 'Collection',\n 'description' => 'Qdrant-Collection einrichten falls nötig',\n 'phase' => 'Speicherung',\n ],\n 'vector_store' => [\n 'label' => 'Vektorspeicherung',\n 'description' => 'Vektoren in Qdrant mit MariaDB-ID als Referenz',\n 'phase' => 'Speicherung',\n ],\n 'index_optimize' => [\n 'label' => 'Index-Optimierung',\n 'description' => 'HNSW-Index für schnelle Suche optimieren',\n 'phase' => 'Speicherung',\n ],\n \/\/ Phase 3: Analyse\n 'entity_extract' => [\n 'label' => 'Entitäten',\n 'description' => 'Personen, Organisationen, Konzepte, Methoden erkennen',\n 'phase' => 'Analyse',\n ],\n 'relation_extract' => [\n 'label' => 'Beziehungen',\n 'description' => 'Relationen zwischen Entitäten extrahieren',\n 'phase' => 'Analyse',\n ],\n 'taxonomy_build' => [\n 'label' => 'Taxonomie',\n 'description' => 'Hierarchische Kategorisierung aufbauen',\n 'phase' => 'Analyse',\n ],\n 'semantic_analyze' => [\n 'label' => 'Semantik',\n 'description' => 'Bedeutungs-Analyse, Konzepte und Definitionen',\n 'phase' => 'Analyse',\n ],\n 'summarize' => [\n 'label' => 'Zusammenfassung',\n 'description' => 'Dokument- und Seiten-Zusammenfassungen erstellen',\n 'phase' => 'Analyse',\n ],\n 'question_generate' => [\n 'label' => 'Fragengenerierung',\n 'description' => 'Beispielfragen für RAG-Chat erstellen',\n 'phase' => 'Analyse',\n ],\n 'finalize' => [\n 'label' => 'Abschluss',\n 'description' => 'Status finalisieren und Job beenden',\n 'phase' => 'Analyse',\n ],\n \/\/ Legacy\n 'analyze' => [\n 'label' => 'Analyse (Legacy)',\n 'description' => 'Kombinierte Analyse (veraltet)',\n 'phase' => 'Analyse',\n ],\n ];\n }"
}
}
Response
{
"tool_response": {
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Controller\/ContentPipelineController.php",
"oldString": " \/**\n * @return array<string, array<string, mixed>>\n *\/\n private function getStepTypes(): array\n {\n return [\n \/\/ Phase 1: Preprocessing\n 'detect' => [\n 'label' => '1. Detect',\n 'icon' => 'search',\n 'description' => 'Dateien scannen und Format pruefen',\n 'phase' => 'Preprocessing',\n ],\n 'validate' => [\n 'label' => '2. Validate',\n 'icon' => 'check',\n 'description' => 'Datei-Validierung und Korruptionspruefung',\n 'phase' => 'Preprocessing',\n ],\n 'extract' => [\n 'label' => '3. Extract',\n 'icon' => 'file-text',\n 'description' => 'Text extrahieren und bereinigen',\n 'phase' => 'Preprocessing',\n ],\n 'structure' => [\n 'label' => '4. Structure',\n 'icon' => 'list',\n 'description' => 'Ueberschriften und Struktur erkennen',\n 'phase' => 'Preprocessing',\n ],\n 'segment' => [\n 'label' => '5. Segment',\n 'icon' => 'layout',\n 'description' => 'Logische Dokumentgliederung',\n 'phase' => 'Preprocessing',\n ],\n 'chunk' => [\n 'label' => '6. Chunk',\n 'icon' => 'scissors',\n 'description' => 'Textbausteine erstellen (max 800 Token)',\n 'phase' => 'Preprocessing',\n ],\n \/\/ Phase 2: Embedding & Storage\n 'embed' => [\n 'label' => '7. Embed',\n 'icon' => 'cpu',\n 'description' => 'Vektorisierung mit mxbai-embed-large',\n 'phase' => 'Embedding',\n ],\n 'collection_setup' => [\n 'label' => '8. Collection',\n 'icon' => 'database',\n 'description' => 'Qdrant-Collection einrichten',\n 'phase' => 'Embedding',\n ],\n 'vector_store' => [\n 'label' => '9. Store',\n 'icon' => 'upload',\n 'description' => 'Vektoren in Qdrant speichern',\n 'phase' => 'Embedding',\n ],\n 'metadata_store' => [\n 'label' => '10. Metadata',\n 'icon' => 'tag',\n 'description' => 'Metadaten in MariaDB speichern',\n 'phase' => 'Embedding',\n ],\n 'index_optimize' => [\n 'label' => '11. Index',\n 'icon' => 'zap',\n 'description' => 'HNSW-Index optimieren',\n 'phase' => 'Embedding',\n ],\n \/\/ Phase 3: Analysis\n 'entity_extract' => [\n 'label' => '12. Entities',\n 'icon' => 'users',\n 'description' => 'Entitaeten erkennen (Personen, Konzepte)',\n 'phase' => 'Analysis',\n ],\n 'relation_extract' => [\n 'label' => '13. Relations',\n 'icon' => 'git-branch',\n 'description' => 'Beziehungen zwischen Entitaeten',\n 'phase' => 'Analysis',\n ],\n 'taxonomy_build' => [\n 'label' => '14. Taxonomy',\n 'icon' => 'folder-tree',\n 'description' => 'Hierarchische Kategorisierung',\n 'phase' => 'Analysis',\n ],\n 'semantic_analyze' => [\n 'label' => '15. Semantic',\n 'icon' => 'brain',\n 'description' => 'Bedeutungs-Analyse und Konzepte',\n 'phase' => 'Analysis',\n ],\n 'summarize' => [\n 'label' => '16. Summary',\n 'icon' => 'file-minus',\n 'description' => 'Dokument-Zusammenfassung',\n 'phase' => 'Analysis',\n ],\n 'finalize' => [\n 'label' => '17. Finalize',\n 'icon' => 'check-circle',\n 'description' => 'Job abschliessen und Status setzen',\n 'phase' => 'Analysis',\n ],\n 'question_generate' => [\n 'label' => '18. Questions',\n 'icon' => 'help-circle',\n 'description' => 'Beispielfragen fuer RAG-Chat generieren',\n 'phase' => 'Analysis',\n ],\n \/\/ Legacy\n 'analyze' => [\n 'label' => 'Analyze',\n 'icon' => 'brain',\n 'description' => 'Kombinierte Analyse (Legacy)',\n 'phase' => 'Analysis',\n ],\n ];\n }",
"newString": " \/**\n * @return array<string, array<string, mixed>>\n *\/\n private function getStepTypes(): array\n {\n return [\n \/\/ Phase 1: Vorverarbeitung\n 'detect' => [\n 'label' => 'Erkennung',\n 'description' => 'Dateien scannen und Format prüfen',\n 'phase' => 'Vorverarbeitung',\n ],\n 'validate' => [\n 'label' => 'Validierung',\n 'description' => 'Datei-Prüfung auf Lesbarkeit und Korruption',\n 'phase' => 'Vorverarbeitung',\n ],\n 'page_split' => [\n 'label' => 'Seitenzerlegung',\n 'description' => 'PDF in Einzelseiten zerlegen für Referenz und Vision-Analyse',\n 'phase' => 'Vorverarbeitung',\n ],\n 'vision_analyze' => [\n 'label' => 'Bildanalyse',\n 'description' => 'Seiten via Vision-Modell analysieren, Bilder und Grafiken erkennen',\n 'phase' => 'Vorverarbeitung',\n ],\n 'extract' => [\n 'label' => 'Textextraktion',\n 'description' => 'Text extrahieren, OCR für Bilder mit Text',\n 'phase' => 'Vorverarbeitung',\n ],\n 'structure' => [\n 'label' => 'Strukturerkennung',\n 'description' => 'Überschriften, Listen und Hierarchie erkennen',\n 'phase' => 'Vorverarbeitung',\n ],\n 'segment' => [\n 'label' => 'Abschnitte',\n 'description' => 'Logische Dokumentgliederung nach Struktur',\n 'phase' => 'Vorverarbeitung',\n ],\n 'chunk' => [\n 'label' => 'Textbausteine',\n 'description' => 'Chunks erstellen (max 800 Token) mit Seitenreferenz',\n 'phase' => 'Vorverarbeitung',\n ],\n \/\/ Phase 2: Speicherung & Vektorisierung\n 'metadata_store' => [\n 'label' => 'DB-Speicherung',\n 'description' => 'Dokument, Seiten und Chunks in MariaDB speichern',\n 'phase' => 'Speicherung',\n ],\n 'embed' => [\n 'label' => 'Vektorisierung',\n 'description' => 'Embeddings mit mxbai-embed-large (1024-dim)',\n 'phase' => 'Speicherung',\n ],\n 'collection_setup' => [\n 'label' => 'Collection',\n 'description' => 'Qdrant-Collection einrichten falls nötig',\n 'phase' => 'Speicherung',\n ],\n 'vector_store' => [\n 'label' => 'Vektorspeicherung',\n 'description' => 'Vektoren in Qdrant mit MariaDB-ID als Referenz',\n 'phase' => 'Speicherung',\n ],\n 'index_optimize' => [\n 'label' => 'Index-Optimierung',\n 'description' => 'HNSW-Index für schnelle Suche optimieren',\n 'phase' => 'Speicherung',\n ],\n \/\/ Phase 3: Analyse\n 'entity_extract' => [\n 'label' => 'Entitäten',\n 'description' => 'Personen, Organisationen, Konzepte, Methoden erkennen',\n 'phase' => 'Analyse',\n ],\n 'relation_extract' => [\n 'label' => 'Beziehungen',\n 'description' => 'Relationen zwischen Entitäten extrahieren',\n 'phase' => 'Analyse',\n ],\n 'taxonomy_build' => [\n 'label' => 'Taxonomie',\n 'description' => 'Hierarchische Kategorisierung aufbauen',\n 'phase' => 'Analyse',\n ],\n 'semantic_analyze' => [\n 'label' => 'Semantik',\n 'description' => 'Bedeutungs-Analyse, Konzepte und Definitionen',\n 'phase' => 'Analyse',\n ],\n 'summarize' => [\n 'label' => 'Zusammenfassung',\n 'description' => 'Dokument- und Seiten-Zusammenfassungen erstellen',\n 'phase' => 'Analyse',\n ],\n 'question_generate' => [\n 'label' => 'Fragengenerierung',\n 'description' => 'Beispielfragen für RAG-Chat erstellen',\n 'phase' => 'Analyse',\n ],\n 'finalize' => [\n 'label' => 'Abschluss',\n 'description' => 'Status finalisieren und Job beenden',\n 'phase' => 'Analyse',\n ],\n \/\/ Legacy\n 'analyze' => [\n 'label' => 'Analyse (Legacy)',\n 'description' => 'Kombinierte Analyse (veraltet)',\n 'phase' => 'Analyse',\n ],\n ];\n }",
"originalFile": "<?php\n\nnamespace Controller;\n\nuse Framework\\Controller;\nuse Infrastructure\\Persistence\\PipelineRepository;\n\nclass ContentPipelineController extends Controller\n{\n private PipelineRepository $repository;\n\n public function __construct()\n {\n $this->repository = new PipelineRepository();\n }\n\n \/**\n * GET \/content-pipeline\n *\/\n public function index(): void\n {\n $pipelines = $this->repository->findAll();\n $stats = $this->repository->getStatistics();\n\n $this->view('content-pipeline.index', [\n 'title' => 'Content Pipeline',\n 'pipelines' => $pipelines,\n 'stats' => $stats,\n ]);\n }\n\n \/**\n * GET \/content-pipeline\/import\n *\/\n public function import(): void\n {\n $pipeline = $this->repository->findDefault();\n\n if ($pipeline === null) {\n $pipelines = $this->repository->findAll(1);\n $pipeline = $pipelines[0] ?? null;\n }\n\n $latestRun = $pipeline !== null\n ? $this->repository->findLatestRun((int) $pipeline['id'])\n : null;\n\n $this->view('content-pipeline.import', [\n 'title' => 'Import Pipeline',\n 'pipeline' => $pipeline,\n 'latestRun' => $latestRun,\n ]);\n }\n\n \/**\n * GET \/content-pipeline\/new\n *\/\n public function pipelineNew(): void\n {\n $this->view('content-pipeline.form', [\n 'title' => 'Neue Pipeline',\n 'pipeline' => null,\n 'stepTypes' => $this->getStepTypes(),\n ]);\n }\n\n \/**\n * GET \/content-pipeline\/{id}\n *\/\n public function show(string $id): void\n {\n $pipeline = $this->repository->findById((int) $id);\n\n if ($pipeline === null) {\n $this->notFound('Pipeline nicht gefunden');\n }\n\n $runs = $this->repository->findRuns((int) $id, 10);\n\n $this->view('content-pipeline.show', [\n 'title' => 'Pipeline: ' . $pipeline['name'],\n 'pipeline' => $pipeline,\n 'runs' => $runs,\n 'stepTypes' => $this->getStepTypes(),\n ]);\n }\n\n \/**\n * GET \/content-pipeline\/{id}\/edit\n *\/\n public function edit(string $id): void\n {\n $pipeline = $this->repository->findById((int) $id);\n\n if ($pipeline === null) {\n $this->notFound('Pipeline nicht gefunden');\n }\n\n $this->view('content-pipeline.form', [\n 'title' => 'Pipeline bearbeiten: ' . $pipeline['name'],\n 'pipeline' => $pipeline,\n 'stepTypes' => $this->getStepTypes(),\n ]);\n }\n\n \/**\n * POST \/content-pipeline\n *\/\n public function store(): void\n {\n $this->requireCsrf();\n\n $name = trim($_POST['name'] ?? '');\n $description = trim($_POST['description'] ?? '');\n $sourcePath = trim($_POST['source_path'] ?? '\/var\/www\/nextcloud\/data\/root\/files\/Documents');\n $extensions = $this->parseExtensions($_POST['extensions'] ?? '');\n $isDefault = isset($_POST['is_default']) ? 1 : 0;\n\n if ($name === '') {\n $_SESSION['error'] = 'Name ist erforderlich.';\n header('Location: \/content-pipeline\/new');\n exit;\n }\n\n $pipelineId = $this->repository->create([\n 'name' => $name,\n 'description' => $description,\n 'source_path' => $sourcePath,\n 'extensions' => $extensions,\n 'is_default' => $isDefault,\n ]);\n\n \/\/ Standard-Steps hinzufuegen\n $this->createDefaultSteps($pipelineId);\n\n $_SESSION['success'] = 'Pipeline erfolgreich erstellt.';\n header('Location: \/content-pipeline\/' . $pipelineId);\n exit;\n }\n\n \/**\n * POST \/content-pipeline\/{id}\n *\/\n public function update(string $id): void\n {\n $this->requireCsrf();\n\n $pipeline = $this->repository->findById((int) $id);\n\n if ($pipeline === null) {\n $this->notFound('Pipeline nicht gefunden');\n }\n\n $name = trim($_POST['name'] ?? '');\n $description = trim($_POST['description'] ?? '');\n $sourcePath = trim($_POST['source_path'] ?? '');\n $extensions = $this->parseExtensions($_POST['extensions'] ?? '');\n $isDefault = isset($_POST['is_default']) ? 1 : 0;\n\n if ($name === '') {\n $_SESSION['error'] = 'Name ist erforderlich.';\n header('Location: \/content-pipeline\/' . $id . '\/edit');\n exit;\n }\n\n $this->repository->update((int) $id, [\n 'name' => $name,\n 'description' => $description,\n 'source_path' => $sourcePath,\n 'extensions' => $extensions,\n 'is_default' => $isDefault,\n ]);\n\n $_SESSION['success'] = 'Pipeline aktualisiert.';\n header('Location: \/content-pipeline\/' . $id);\n exit;\n }\n\n \/**\n * POST \/content-pipeline\/{id}\/run\n *\/\n public function run(string $id): void\n {\n $this->requireCsrf();\n\n $pipeline = $this->repository->findById((int) $id);\n\n if ($pipeline === null) {\n $this->notFound('Pipeline nicht gefunden');\n }\n\n \/\/ Neuen Run erstellen\n $runId = $this->repository->createRun((int) $id);\n\n \/\/ Pipeline im Hintergrund starten\n $pipelineScript = '\/opt\/scripts\/pipeline\/pipeline.py';\n $venvPython = '\/opt\/scripts\/pipeline\/venv\/bin\/python';\n $logFile = '\/tmp\/pipeline_run_' . $runId . '.log';\n\n $cmd = sprintf(\n 'nohup %s %s all --pipeline-id=%d --run-id=%d > %s 2>&1 &',\n escapeshellarg($venvPython),\n escapeshellarg($pipelineScript),\n (int) $id,\n $runId,\n escapeshellarg($logFile)\n );\n\n exec($cmd);\n\n $_SESSION['success'] = 'Pipeline gestartet (Run #' . $runId . ')';\n header('Location: \/content-pipeline\/' . $id);\n exit;\n }\n\n \/**\n * GET \/content-pipeline\/{id}\/status\n * AJAX endpoint for run status\n *\/\n public function status(string $id): void\n {\n $pipeline = $this->repository->findById((int) $id);\n\n if ($pipeline === null) {\n $this->json(['error' => 'Pipeline nicht gefunden'], 404);\n return;\n }\n\n $latestRun = $this->repository->findLatestRun((int) $id);\n\n $this->json([\n 'pipeline_id' => (int) $id,\n 'run' => $latestRun,\n ]);\n }\n\n \/**\n * POST \/content-pipeline\/{id}\/steps\/{stepId}\/toggle\n *\/\n public function toggleStep(string $id, string $stepId): void\n {\n $this->requireCsrf();\n\n $pipeline = $this->repository->findById((int) $id);\n\n if ($pipeline === null) {\n $this->notFound('Pipeline nicht gefunden');\n }\n\n \/\/ Find step and toggle\n foreach ($pipeline['steps'] as $step) {\n if ((int) $step['id'] === (int) $stepId) {\n $this->repository->updateStep((int) $stepId, [\n 'enabled' => $step['enabled'] ? 0 : 1,\n ]);\n break;\n }\n }\n\n header('Location: \/content-pipeline\/' . $id);\n exit;\n }\n\n \/**\n * POST \/content-pipeline\/{id}\/delete\n *\/\n public function delete(string $id): void\n {\n $this->requireCsrf();\n\n $pipeline = $this->repository->findById((int) $id);\n\n if ($pipeline === null) {\n $this->notFound('Pipeline nicht gefunden');\n }\n\n $this->repository->delete((int) $id);\n\n $_SESSION['success'] = 'Pipeline geloescht.';\n header('Location: \/content-pipeline');\n exit;\n }\n\n \/**\n * @return array<string, array<string, mixed>>\n *\/\n private function getStepTypes(): array\n {\n return [\n \/\/ Phase 1: Preprocessing\n 'detect' => [\n 'label' => '1. Detect',\n 'icon' => 'search',\n 'description' => 'Dateien scannen und Format pruefen',\n 'phase' => 'Preprocessing',\n ],\n 'validate' => [\n 'label' => '2. Validate',\n 'icon' => 'check',\n 'description' => 'Datei-Validierung und Korruptionspruefung',\n 'phase' => 'Preprocessing',\n ],\n 'extract' => [\n 'label' => '3. Extract',\n 'icon' => 'file-text',\n 'description' => 'Text extrahieren und bereinigen',\n 'phase' => 'Preprocessing',\n ],\n 'structure' => [\n 'label' => '4. Structure',\n 'icon' => 'list',\n 'description' => 'Ueberschriften und Struktur erkennen',\n 'phase' => 'Preprocessing',\n ],\n 'segment' => [\n 'label' => '5. Segment',\n 'icon' => 'layout',\n 'description' => 'Logische Dokumentgliederung',\n 'phase' => 'Preprocessing',\n ],\n 'chunk' => [\n 'label' => '6. Chunk',\n 'icon' => 'scissors',\n 'description' => 'Textbausteine erstellen (max 800 Token)',\n 'phase' => 'Preprocessing',\n ],\n \/\/ Phase 2: Embedding & Storage\n 'embed' => [\n 'label' => '7. Embed',\n 'icon' => 'cpu',\n 'description' => 'Vektorisierung mit mxbai-embed-large',\n 'phase' => 'Embedding',\n ],\n 'collection_setup' => [\n 'label' => '8. Collection',\n 'icon' => 'database',\n 'description' => 'Qdrant-Collection einrichten',\n 'phase' => 'Embedding',\n ],\n 'vector_store' => [\n 'label' => '9. Store',\n 'icon' => 'upload',\n 'description' => 'Vektoren in Qdrant speichern',\n 'phase' => 'Embedding',\n ],\n... [TRUNCATED-9e084279b30c0e8c]",
"structuredPatch": [
{
"oldStart": 287,
"oldLines": 123,
"newStart": 287,
"newLines": 114,
"lines": [
" private function getStepTypes(): array",
" {",
" return [",
"- \/\/ Phase 1: Preprocessing",
"+ \/\/ Phase 1: Vorverarbeitung",
" 'detect' => [",
"- 'label' => '1. Detect',",
"- 'icon' => 'search',",
"- 'description' => 'Dateien scannen und Format pruefen',",
"- 'phase' => 'Preprocessing',",
"+ 'label' => 'Erkennung',",
"+ 'description' => 'Dateien scannen und Format prüfen',",
"+ 'phase' => 'Vorverarbeitung',",
" ],",
" 'validate' => [",
"- 'label' => '2. Validate',",
"- 'icon' => 'check',",
"- 'description' => 'Datei-Validierung und Korruptionspruefung',",
"- 'phase' => 'Preprocessing',",
"+ 'label' => 'Validierung',",
"+ 'description' => 'Datei-Prüfung auf Lesbarkeit und Korruption',",
"+ 'phase' => 'Vorverarbeitung',",
" ],",
"+ 'page_split' => [",
"+ 'label' => 'Seitenzerlegung',",
"+ 'description' => 'PDF in Einzelseiten zerlegen für Referenz und Vision-Analyse',",
"+ 'phase' => 'Vorverarbeitung',",
"+ ],",
"+ 'vision_analyze' => [",
"+ 'label' => 'Bildanalyse',",
"+ 'description' => 'Seiten via Vision-Modell analysieren, Bilder und Grafiken erkennen',",
"+ 'phase' => 'Vorverarbeitung',",
"+ ],",
" 'extract' => [",
"- 'label' => '3. Extract',",
"- 'icon' => 'file-text',",
"- 'description' => 'Text extrahieren und bereinigen',",
"- 'phase' => 'Preprocessing',",
"+ 'label' => 'Textextraktion',",
"+ 'description' => 'Text extrahieren, OCR für Bilder mit Text',",
"+ 'phase' => 'Vorverarbeitung',",
" ],",
" 'structure' => [",
"- 'label' => '4. Structure',",
"- 'icon' => 'list',",
"- 'description' => 'Ueberschriften und Struktur erkennen',",
"- 'phase' => 'Preprocessing',",
"+ 'label' => 'Strukturerkennung',",
"+ 'description' => 'Überschriften, Listen und Hierarchie erkennen',",
"+ 'phase' => 'Vorverarbeitung',",
" ],",
" 'segment' => [",
"- 'label' => '5. Segment',",
"- 'icon' => 'layout',",
"- 'description' => 'Logische Dokumentgliederung',",
"- 'phase' => 'Preprocessing',",
"+ 'label' => 'Abschnitte',",
"+ 'description' => 'Logische Dokumentgliederung nach Struktur',",
"+ 'phase' => 'Vorverarbeitung',",
" ],",
" 'chunk' => [",
"- 'label' => '6. Chunk',",
"- 'icon' => 'scissors',",
"- 'description' => 'Textbausteine erstellen (max 800 Token)',",
"- 'phase' => 'Preprocessing',",
"+ 'label' => 'Textbausteine',",
"+ 'description' => 'Chunks erstellen (max 800 Token) mit Seitenreferenz',",
"+ 'phase' => 'Vorverarbeitung',",
" ],",
"- \/\/ Phase 2: Embedding & Storage",
"+ \/\/ Phase 2: Speicherung & Vektorisierung",
"+ 'metadata_store' => [",
"+ 'label' => 'DB-Speicherung',",
"+ 'description' => 'Dokument, Seiten und Chunks in MariaDB speichern',",
"+ 'phase' => 'Speicherung',",
"+ ],",
" 'embed' => [",
"- 'label' => '7. Embed',",
"- 'icon' => 'cpu',",
"- 'description' => 'Vektorisierung mit mxbai-embed-large',",
"- 'phase' => 'Embedding',",
"+ 'label' => 'Vektorisierung',",
"+ 'description' => 'Embeddings mit mxbai-embed-large (1024-dim)',",
"+ 'phase' => 'Speicherung',",
" ],",
" 'collection_setup' => [",
"- 'label' => '8. Collection',",
"- 'icon' => 'database',",
"- 'description' => 'Qdrant-Collection einrichten',",
"- 'phase' => 'Embedding',",
"+ 'label' => 'Collection',",
"+ 'description' => 'Qdrant-Collection einrichten falls nötig',",
"+ 'phase' => 'Speicherung',",
" ],",
" 'vector_store' => [",
"- 'label' => '9. Store',",
"- 'icon' => 'upload',",
"- 'description' => 'Vektoren in Qdrant speichern',",
"- 'phase' => 'Embedding',",
"+ 'label' => 'Vektorspeicherung',",
"+ 'description' => 'Vektoren in Qdrant mit MariaDB-ID als Referenz',",
"+ 'phase' => 'Speicherung',",
" ],",
"- 'metadata_store' => [",
"- 'label' => '10. Metadata',",
"- 'icon' => 'tag',",
"- 'description' => 'Metadaten in MariaDB speichern',",
"- 'phase' => 'Embedding',",
"- ],",
" 'index_optimize' => [",
"- 'label' => '11. Index',",
"- 'icon' => 'zap',",
"- 'description' => 'HNSW-Index optimieren',",
"- 'phase' => 'Embedding',",
"+ 'label' => 'Index-Optimierung',",
"+ 'description' => 'HNSW-Index für schnelle Suche optimieren',",
"+ 'phase' => 'Speicherung',",
" ],",
"- \/\/ Phase 3: Analysis",
"+ \/\/ Phase 3: Analyse",
" 'entity_extract' => [",
"- 'label' => '12. Entities',",
"- 'icon' => 'users',",
"- 'description' => 'Entitaeten erkennen (Personen, Konzepte)',",
"- 'phase' => 'Analysis',",
"+ 'label' => 'Entitäten',",
"+ 'description' => 'Personen, Organisationen, Konzepte, Methoden erkennen',",
"+ 'phase' => 'Analyse',",
" ],",
" 'relation_extract' => [",
"- 'label' => '13. Relations',",
"- 'icon' => 'git-branch',",
"- 'description' => 'Beziehungen zwischen Entitaeten',",
"- 'phase' => 'Analysis',",
"+ 'label' => 'Beziehungen',",
"+ 'description' => 'Relationen zwischen Entitäten extrahieren',",
"+ 'phase' => 'Analyse',",
" ],",
" 'taxonomy_build' => [",
"- 'label' => '14. Taxonomy',",
"- 'icon' => 'folder-tree',",
"- 'description' => 'Hierarchische Kategorisierung',",
"- 'phase' => 'Analysis',",
"+ 'label' => 'Taxonomie',",
"+ 'description' => 'Hierarchische Kategorisierung aufbauen',",
"+ 'phase' => 'Analyse',",
" ],",
" 'semantic_analyze' => [",
"- 'label' => '15. Semantic',",
"- 'icon' => 'brain',",
"- 'description' => 'Bedeutungs-Analyse und Konzepte',",
"- 'phase' => 'Analysis',",
"+ 'label' => 'Semantik',",
"+ 'description' => 'Bedeutungs-Analyse, Konzepte und Definitionen',",
"+ 'phase' => 'Analyse',",
" ],",
" 'summarize' => [",
"- 'label' => '16. Summary',",
"- 'icon' => 'file-minus',",
"- 'description' => 'Dokument-Zusammenfassung',",
"- 'phase' => 'Analysis',",
"+ 'label' => 'Zusammenfassung',",
"+ 'description' => 'Dokument- und Seiten-Zusammenfassungen erstellen',",
"+ 'phase' => 'Analyse',",
" ],",
"- 'finalize' => [",
"- 'label' => '17. Finalize',",
"- 'icon' => 'check-circle',",
"- 'description' => 'Job abschliessen und Status setzen',",
"- 'phase' => 'Analysis',",
"- ],",
" 'question_generate' => [",
"- 'label' => '18. Questions',",
"- 'icon' => 'help-circle',",
"- 'description' => 'Beispielfragen fuer RAG-Chat generieren',",
"- 'phase' => 'Analysis',",
"+ 'label' => 'Fragengenerierung',",
"+ 'description' => 'Beispielfragen für RAG-Chat erstellen',",
"+ 'phase' => 'Analyse',",
" ],",
"+ 'finalize' => [",
"+ 'label' => 'Abschluss',",
"+ 'description' => 'Status finalisieren und Job beenden',",
"+ 'phase' => 'Analyse',",
"+ ],",
" \/\/ Legacy",
" 'analyze' => [",
"- 'label' => 'Analyze',",
"- 'icon' => 'brain',",
"- 'description' => 'Kombinierte Analyse (Legacy)',",
"- 'phase' => 'Analysis',",
"+ 'label' => 'Analyse (Legacy)',",
"+ 'description' => 'Kombinierte Analyse (veraltet)',",
"+ 'phase' => 'Analyse',",
" ],",
" ];",
" }"
]
}
],
"userModified": false,
"replaceAll": false
}
}