Backup #475

ID475
Dateipfad/var/www/dev.campus.systemische-tools.de/src/Controller/ContentPipelineController.php
Version9
Typ modified
Größe18.4 KB
Hashb1d00613093e38e420edd442c47571e8da4fcfc4c43d9ef0410235fcac6b7af4
Datum2025-12-22 15:20:54
Geändert vonclaude-code-hook
GrundClaude Code Pre-Hook Backup vor Edit-Operation
Datei existiert Ja

Dateiinhalt

<?php

namespace Controller;

use Framework\Controller;
use Infrastructure\AI\ModelConfig;
use Infrastructure\Persistence\PipelineRepository;

class ContentPipelineController extends Controller
{
    private PipelineRepository $repository;

    public function __construct()
    {
        $this->repository = new PipelineRepository();
    }

    /**
     * GET /content-pipeline
     */
    public function index(): void
    {
        $pipelines = $this->repository->findAll();
        $stats = $this->repository->getStatistics();

        $this->view('content-pipeline.index', [
            'title' => 'Content Pipeline',
            'pipelines' => $pipelines,
            'stats' => $stats,
        ]);
    }

    /**
     * GET /content-pipeline/import
     */
    public function import(): void
    {
        $pipeline = $this->repository->findDefault();

        if ($pipeline === null) {
            $pipelines = $this->repository->findAll(1);
            $pipeline = $pipelines[0] ?? null;
        }

        $latestRun = $pipeline !== null
            ? $this->repository->findLatestRun((int) $pipeline['id'])
            : null;

        $this->view('content-pipeline.import', [
            'title' => 'Import Pipeline',
            'pipeline' => $pipeline,
            'latestRun' => $latestRun,
        ]);
    }

    /**
     * GET /content-pipeline/new
     */
    public function pipelineNew(): void
    {
        $this->view('content-pipeline.form', [
            'title' => 'Neue Pipeline',
            'pipeline' => null,
            'stepTypes' => $this->getStepTypes(),
        ]);
    }

    /**
     * GET /content-pipeline/{id}
     */
    public function show(string $id): void
    {
        $pipeline = $this->repository->findById((int) $id);

        if ($pipeline === null) {
            $this->notFound('Pipeline nicht gefunden');
        }

        $runs = $this->repository->findRuns((int) $id, 10);

        $this->view('content-pipeline.show', [
            'title' => 'Pipeline: ' . $pipeline['name'],
            'pipeline' => $pipeline,
            'runs' => $runs,
            'stepTypes' => $this->getStepTypes(),
            'models' => ModelConfig::getAll(),
            'defaultModel' => ModelConfig::DEFAULT_MODEL,
        ]);
    }

    /**
     * GET /content-pipeline/{id}/edit
     */
    public function edit(string $id): void
    {
        $pipeline = $this->repository->findById((int) $id);

        if ($pipeline === null) {
            $this->notFound('Pipeline nicht gefunden');
        }

        $this->view('content-pipeline.form', [
            'title' => 'Pipeline bearbeiten: ' . $pipeline['name'],
            'pipeline' => $pipeline,
            'stepTypes' => $this->getStepTypes(),
        ]);
    }

    /**
     * POST /content-pipeline
     */
    public function store(): void
    {
        $this->requireCsrf();

        $name = trim($_POST['name'] ?? '');
        $description = trim($_POST['description'] ?? '');
        $sourcePath = trim($_POST['source_path'] ?? '/var/www/nextcloud/data/root/files/Documents');
        $extensions = $this->parseExtensions($_POST['extensions'] ?? '');
        $isDefault = isset($_POST['is_default']) ? 1 : 0;

        if ($name === '') {
            $_SESSION['error'] = 'Name ist erforderlich.';
            header('Location: /content-pipeline/new');
            exit;
        }

        $pipelineId = $this->repository->create([
            'name' => $name,
            'description' => $description,
            'source_path' => $sourcePath,
            'extensions' => $extensions,
            'is_default' => $isDefault,
        ]);

        // Standard-Steps hinzufuegen
        $this->createDefaultSteps($pipelineId);

        $_SESSION['success'] = 'Pipeline erfolgreich erstellt.';
        header('Location: /content-pipeline/' . $pipelineId);
        exit;
    }

    /**
     * POST /content-pipeline/{id}
     */
    public function update(string $id): void
    {
        $this->requireCsrf();

        $pipeline = $this->repository->findById((int) $id);

        if ($pipeline === null) {
            $this->notFound('Pipeline nicht gefunden');
        }

        $name = trim($_POST['name'] ?? '');
        $description = trim($_POST['description'] ?? '');
        $sourcePath = trim($_POST['source_path'] ?? '');
        $extensions = $this->parseExtensions($_POST['extensions'] ?? '');
        $isDefault = isset($_POST['is_default']) ? 1 : 0;

        if ($name === '') {
            $_SESSION['error'] = 'Name ist erforderlich.';
            header('Location: /content-pipeline/' . $id . '/edit');
            exit;
        }

        $this->repository->update((int) $id, [
            'name' => $name,
            'description' => $description,
            'source_path' => $sourcePath,
            'extensions' => $extensions,
            'is_default' => $isDefault,
        ]);

        $_SESSION['success'] = 'Pipeline aktualisiert.';
        header('Location: /content-pipeline/' . $id);
        exit;
    }

    /**
     * POST /content-pipeline/{id}/run
     */
    public function run(string $id): void
    {
        $this->requireCsrf();

        $pipeline = $this->repository->findById((int) $id);

        if ($pipeline === null) {
            $this->notFound('Pipeline nicht gefunden');
        }

        // Neuen Run erstellen
        $runId = $this->repository->createRun((int) $id);

        // Pipeline im Hintergrund starten
        $pipelineScript = '/opt/scripts/pipeline/pipeline.py';
        $venvPython = '/opt/scripts/pipeline/venv/bin/python';
        $logFile = '/tmp/pipeline_run_' . $runId . '.log';

        $cmd = sprintf(
            'nohup %s %s all --pipeline-id=%d --run-id=%d > %s 2>&1 &',
            escapeshellarg($venvPython),
            escapeshellarg($pipelineScript),
            (int) $id,
            $runId,
            escapeshellarg($logFile)
        );

        exec($cmd);

        $_SESSION['success'] = 'Pipeline gestartet (Run #' . $runId . ')';
        header('Location: /content-pipeline/' . $id);
        exit;
    }

    /**
     * GET /content-pipeline/{id}/status
     * AJAX endpoint for run status
     */
    public function status(string $id): void
    {
        $pipeline = $this->repository->findById((int) $id);

        if ($pipeline === null) {
            $this->json(['error' => 'Pipeline nicht gefunden'], 404);

            return;
        }

        $latestRun = $this->repository->findLatestRun((int) $id);

        $this->json([
            'pipeline_id' => (int) $id,
            'run' => $latestRun,
        ]);
    }

    /**
     * POST /content-pipeline/{id}/steps/{stepId}/toggle
     */
    public function toggleStep(string $id, string $stepId): void
    {
        $this->requireCsrf();

        $pipeline = $this->repository->findById((int) $id);

        if ($pipeline === null) {
            $this->notFound('Pipeline nicht gefunden');
        }

        // Find step and toggle
        foreach ($pipeline['steps'] as $step) {
            if ((int) $step['id'] === (int) $stepId) {
                $this->repository->updateStep((int) $stepId, [
                    'enabled' => $step['enabled'] ? 0 : 1,
                ]);
                break;
            }
        }

        header('Location: /content-pipeline/' . $id);
        exit;
    }

    /**
     * POST /content-pipeline/{id}/steps/{stepId}/model (AJAX)
     * Update step model configuration
     */
    public function updateStepModel(string $id, string $stepId): void
    {
        $pipeline = $this->repository->findById((int) $id);

        if ($pipeline === null) {
            $this->json(['error' => 'Pipeline nicht gefunden'], 404);

            return;
        }

        $model = trim($_POST['model'] ?? '');

        if ($model === '' || !ModelConfig::isValid($model)) {
            $this->json(['error' => 'Ungültiges Modell'], 400);

            return;
        }

        // Find step
        $stepFound = false;
        foreach ($pipeline['steps'] as $step) {
            if ((int) $step['id'] === (int) $stepId) {
                $stepFound = true;
                $config = $step['config'] ?? [];

                // Determine provider from model
                $provider = ModelConfig::isLocal($model) ? 'ollama' : 'anthropic';

                // Update config with new model
                $config['model'] = ModelConfig::isLocal($model)
                    ? substr($model, 7)  // Remove 'ollama:' prefix
                    : $model;
                $config['provider'] = $provider;

                $this->repository->updateStep((int) $stepId, [
                    'config' => $config,
                ]);
                break;
            }
        }

        if (!$stepFound) {
            $this->json(['error' => 'Schritt nicht gefunden'], 404);

            return;
        }

        $this->json([
            'success' => true,
            'model' => $model,
            'label' => ModelConfig::getLabel($model),
        ]);
    }

    /**
     * POST /content-pipeline/{id}/delete
     */
    public function delete(string $id): void
    {
        $this->requireCsrf();

        $pipeline = $this->repository->findById((int) $id);

        if ($pipeline === null) {
            $this->notFound('Pipeline nicht gefunden');
        }

        $this->repository->delete((int) $id);

        $_SESSION['success'] = 'Pipeline geloescht.';
        header('Location: /content-pipeline');
        exit;
    }

    /**
     * @return array<string, array<string, mixed>>
     */
    private function getStepTypes(): array
    {
        return [
            // Phase 1: Vorverarbeitung
            'detect' => [
                'label' => 'Erkennung',
                'description' => 'Dateien scannen und Format prüfen',
                'phase' => 'Vorverarbeitung',
                'storage' => null,
            ],
            'validate' => [
                'label' => 'Validierung',
                'description' => 'Datei-Prüfung auf Lesbarkeit und Korruption',
                'phase' => 'Vorverarbeitung',
                'storage' => null,
            ],
            'page_split' => [
                'label' => 'Seitenzerlegung',
                'description' => 'PDF in Einzelseiten zerlegen für Referenz und Vision-Analyse',
                'phase' => 'Vorverarbeitung',
                'storage' => 'ki_content.document_pages',
            ],
            'vision_analyze' => [
                'label' => 'Bildanalyse',
                'description' => 'Seiten via Vision-Modell analysieren, Bilder und Grafiken erkennen',
                'phase' => 'Vorverarbeitung',
                'storage' => 'ki_content.document_pages (vision_analysis)',
                'uses_vision' => true,
            ],
            'extract' => [
                'label' => 'Textextraktion',
                'description' => 'Text extrahieren, OCR für Bilder mit Text',
                'phase' => 'Vorverarbeitung',
                'storage' => null,
            ],
            'structure' => [
                'label' => 'Strukturerkennung',
                'description' => 'Überschriften, Listen und Hierarchie erkennen',
                'phase' => 'Vorverarbeitung',
                'storage' => 'ki_content.document_sections',
            ],
            'segment' => [
                'label' => 'Abschnitte',
                'description' => 'Logische Dokumentgliederung nach Struktur',
                'phase' => 'Vorverarbeitung',
                'storage' => 'ki_content.document_sections',
            ],
            'chunk' => [
                'label' => 'Textbausteine',
                'description' => 'Chunks erstellen (max 800 Token) mit Seitenreferenz',
                'phase' => 'Vorverarbeitung',
                'storage' => 'ki_content.chunks',
            ],
            // Phase 2: Speicherung & Vektorisierung
            'metadata_store' => [
                'label' => 'DB-Speicherung',
                'description' => 'Dokument, Seiten und Chunks in MariaDB speichern',
                'phase' => 'Speicherung',
                'storage' => 'ki_content.documents, .document_pages, .chunks',
            ],
            'embed' => [
                'label' => 'Vektorisierung',
                'description' => 'Embeddings erstellen für Vektor-Suche',
                'phase' => 'Speicherung',
                'storage' => null,
                'fixed_model' => 'mxbai-embed-large (1024-dim)',
            ],
            'collection_setup' => [
                'label' => 'Collection',
                'description' => 'Qdrant-Collection einrichten falls nötig',
                'phase' => 'Speicherung',
                'storage' => 'Qdrant: {collection}',
            ],
            'vector_store' => [
                'label' => 'Vektorspeicherung',
                'description' => 'Vektoren in Qdrant mit MariaDB-ID als Referenz',
                'phase' => 'Speicherung',
                'storage' => 'Qdrant: {collection}',
            ],
            'index_optimize' => [
                'label' => 'Index-Optimierung',
                'description' => 'HNSW-Index für schnelle Suche optimieren',
                'phase' => 'Speicherung',
                'storage' => 'Qdrant: {collection}',
            ],
            // Phase 3: Wissensextraktion (3 Ebenen)
            'knowledge_page' => [
                'label' => 'Seiten-Wissen',
                'description' => 'Pro Seite: Entitäten → Semantik → Ontologie → Taxonomie',
                'phase' => 'Wissen',
                'storage' => 'ki_content.page_knowledge, .entities, .entity_semantics',
                'uses_llm' => true,
            ],
            'knowledge_section' => [
                'label' => 'Abschnitt-Wissen',
                'description' => 'Pro Kapitel: Aggregierte Wissensrepräsentation',
                'phase' => 'Wissen',
                'storage' => 'ki_content.section_knowledge',
                'uses_llm' => true,
            ],
            'knowledge_document' => [
                'label' => 'Dokument-Wissen',
                'description' => 'Konsolidierte Gesamtsicht des Dokuments',
                'phase' => 'Wissen',
                'storage' => 'ki_content.document_knowledge',
                'uses_llm' => true,
            ],
            'knowledge_validate' => [
                'label' => 'Wissens-Validierung',
                'description' => 'Abgleich mit DB, Duplikate zusammenführen, neue validieren',
                'phase' => 'Wissen',
                'storage' => 'ki_content.entities (merged)',
            ],
            // Legacy Analyse-Schritte
            'entity_extract' => [
                'label' => 'Entitäten (Legacy)',
                'description' => 'Personen, Organisationen, Konzepte, Methoden erkennen',
                'phase' => 'Analyse',
                'storage' => 'ki_content.chunk_entities',
                'uses_llm' => true,
            ],
            'relation_extract' => [
                'label' => 'Beziehungen (Legacy)',
                'description' => 'Relationen zwischen Entitäten extrahieren',
                'phase' => 'Analyse',
                'storage' => 'ki_content.entity_relations',
                'uses_llm' => true,
            ],
            'taxonomy_build' => [
                'label' => 'Taxonomie (Legacy)',
                'description' => 'Hierarchische Kategorisierung aufbauen',
                'phase' => 'Analyse',
                'storage' => 'ki_content.chunk_taxonomy, .taxonomy_terms',
                'uses_llm' => true,
            ],
            'semantic_analyze' => [
                'label' => 'Semantik (Legacy)',
                'description' => 'Bedeutungs-Analyse, Konzepte und Definitionen',
                'phase' => 'Analyse',
                'storage' => 'ki_content.chunk_semantics',
                'uses_llm' => true,
            ],
            'summarize' => [
                'label' => 'Zusammenfassung',
                'description' => 'Dokument- und Seiten-Zusammenfassungen erstellen',
                'phase' => 'Analyse',
                'storage' => 'ki_content.documents (summary), .document_pages',
                'uses_llm' => true,
            ],
            'question_generate' => [
                'label' => 'Fragengenerierung',
                'description' => 'Beispielfragen für RAG-Chat erstellen',

... (64 weitere Zeilen)

Vollständig herunterladen

Aktionen

Herunterladen

Andere Versionen dieser Datei

ID Version Typ Größe Datum
1701 33 modified 9.9 KB 2025-12-27 12:20
1683 32 modified 9.9 KB 2025-12-27 12:02
1682 31 modified 9.9 KB 2025-12-27 12:02
1681 30 modified 9.9 KB 2025-12-27 12:02
1299 29 modified 9.8 KB 2025-12-25 13:28
1298 28 modified 10.2 KB 2025-12-25 13:28
1275 27 modified 10.2 KB 2025-12-25 12:52
680 26 modified 10.2 KB 2025-12-23 07:44
678 25 modified 10.1 KB 2025-12-23 07:39
605 24 modified 10.4 KB 2025-12-23 04:39
603 23 modified 11.2 KB 2025-12-23 04:31
602 22 modified 10.9 KB 2025-12-23 04:31
601 21 modified 11.3 KB 2025-12-23 04:30
600 20 modified 13.3 KB 2025-12-23 04:30
599 19 modified 13.4 KB 2025-12-23 04:30
598 18 modified 13.9 KB 2025-12-23 04:30
597 17 modified 13.4 KB 2025-12-23 04:29
585 16 modified 13.3 KB 2025-12-23 04:24
577 15 modified 12.1 KB 2025-12-23 04:19
573 14 modified 8.0 KB 2025-12-23 04:11
572 13 modified 8.0 KB 2025-12-23 04:10
528 12 modified 20.4 KB 2025-12-22 19:03
477 11 modified 18.6 KB 2025-12-22 15:21
476 10 modified 18.5 KB 2025-12-22 15:21
475 9 modified 18.4 KB 2025-12-22 15:20
472 8 modified 18.4 KB 2025-12-22 15:19
439 7 modified 16.5 KB 2025-12-22 10:14
428 6 modified 14.9 KB 2025-12-22 10:02
427 5 modified 14.8 KB 2025-12-22 10:01
426 4 modified 14.8 KB 2025-12-22 10:01
421 3 modified 13.8 KB 2025-12-22 09:55
419 2 modified 13.8 KB 2025-12-22 09:42
418 1 modified 10.6 KB 2025-12-22 09:35

← Zurück zur Übersicht