{
"tool_response": {
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/src\/Infrastructure\/Config\/PipelineStepConfig.php",
"oldString": " 'question_generate' => [\n 'label' => 'Fragengenerierung',\n 'description' => 'Beispielfragen für RAG-Chat erstellen',\n 'phase' => 'Analyse',\n 'storage' => 'ki_content.generated_questions',\n 'uses_llm' => true,\n ],\n 'finalize' => [",
"newString": " 'question_generate' => [\n 'label' => 'Fragengenerierung',\n 'description' => 'Beispielfragen für RAG-Chat erstellen',\n 'phase' => 'Analyse',\n 'storage' => 'ki_content.generated_questions',\n 'uses_llm' => true,\n ],\n 'entity_store' => [\n 'label' => 'Entitäten-Speicherung',\n 'description' => 'Extrahierte Entitäten in Datenbank speichern',\n 'phase' => 'Wissen',\n 'storage' => 'ki_content.entities, .document_entities',\n ],\n 'entity_normalize' => [\n 'label' => 'Entitäten-Normalisierung',\n 'description' => 'Duplikate zusammenführen, Synonyme verknüpfen',\n 'phase' => 'Wissen',\n 'storage' => 'ki_content.entity_synonyms',\n ],\n 'relation_store' => [\n 'label' => 'Beziehungs-Speicherung',\n 'description' => 'Extrahierte Relationen in Datenbank speichern',\n 'phase' => 'Wissen',\n 'storage' => 'ki_content.entity_relations',\n ],\n 'ontology_classify' => [\n 'label' => 'Ontologie-Klassifikation',\n 'description' => 'Entitäten in Ontologie-Klassen einordnen',\n 'phase' => 'Wissen',\n 'storage' => 'ki_content.ontology_classes',\n ],\n 'chunk_entity_link' => [\n 'label' => 'Chunk-Entitäten-Verknüpfung',\n 'description' => 'Chunks mit erkannten Entitäten verknüpfen',\n 'phase' => 'Wissen',\n 'storage' => 'ki_content.chunk_entities',\n ],\n 'chunk_taxonomy' => [\n 'label' => 'Chunk-Taxonomie',\n 'description' => 'Chunks mit Taxonomie-Kategorien verknüpfen',\n 'phase' => 'Wissen',\n 'storage' => 'ki_content.chunk_taxonomy',\n ],\n 'entity_taxonomy' => [\n 'label' => 'Entitäten-Taxonomie',\n 'description' => 'Entitäten mit Taxonomie-Pfaden verknüpfen',\n 'phase' => 'Wissen',\n 'storage' => 'ki_content.entity_taxonomy_mapping',\n ],\n 'chunk_semantics' => [\n 'label' => 'Chunk-Semantik',\n 'description' => 'Semantische Analyse-Ergebnisse pro Chunk speichern',\n 'phase' => 'Wissen',\n 'storage' => 'ki_content.chunk_semantics',\n ],\n 'finalize' => [",
"originalFile": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Infrastructure\\Config;\n\n\/\/ @responsibility: Konfiguration für Pipeline-Schritte und Collections\n\nfinal class PipelineStepConfig\n{\n \/**\n * Get all available step types with their configuration.\n *\n * @return array<string, array<string, mixed>>\n *\/\n public static function getStepTypes(): array\n {\n return [\n \/\/ Phase 1: Vorverarbeitung\n 'detect' => [\n 'label' => 'Erkennung',\n 'description' => 'Dateien scannen und Format prüfen',\n 'phase' => 'Vorverarbeitung',\n 'storage' => null,\n ],\n 'validate' => [\n 'label' => 'Validierung',\n 'description' => 'Datei-Prüfung auf Lesbarkeit und Korruption',\n 'phase' => 'Vorverarbeitung',\n 'storage' => null,\n ],\n 'page_split' => [\n 'label' => 'Seitenzerlegung',\n 'description' => 'PDF in Einzelseiten zerlegen für Referenz und Vision-Analyse',\n 'phase' => 'Vorverarbeitung',\n 'storage' => 'ki_content.document_pages',\n ],\n 'vision_analyze' => [\n 'label' => 'Bildanalyse',\n 'description' => 'Seiten via Vision-Modell analysieren, Bilder und Grafiken erkennen',\n 'phase' => 'Vorverarbeitung',\n 'storage' => 'ki_content.document_pages (vision_analysis)',\n 'uses_vision' => true,\n ],\n 'extract' => [\n 'label' => 'Textextraktion',\n 'description' => 'Text extrahieren, OCR für Bilder mit Text',\n 'phase' => 'Vorverarbeitung',\n 'storage' => null,\n ],\n 'structure' => [\n 'label' => 'Strukturerkennung',\n 'description' => 'Überschriften, Listen und Hierarchie erkennen',\n 'phase' => 'Vorverarbeitung',\n 'storage' => 'ki_content.document_sections',\n ],\n 'segment' => [\n 'label' => 'Abschnitte',\n 'description' => 'Logische Dokumentgliederung nach Struktur',\n 'phase' => 'Vorverarbeitung',\n 'storage' => 'ki_content.document_sections',\n ],\n 'chunk' => [\n 'label' => 'Textbausteine',\n 'description' => 'Chunks erstellen (max 800 Token) mit Seitenreferenz',\n 'phase' => 'Vorverarbeitung',\n 'storage' => 'ki_content.chunks',\n ],\n 'queue' => [\n 'label' => 'Warteschlange',\n 'description' => 'Dokument zur Verarbeitung einreihen',\n 'phase' => 'Vorverarbeitung',\n 'storage' => null,\n ],\n 'hash' => [\n 'label' => 'Hash-Berechnung',\n 'description' => 'SHA256-Hash für Duplikat-Erkennung berechnen',\n 'phase' => 'Vorverarbeitung',\n 'storage' => null,\n ],\n 'rotation' => [\n 'label' => 'Seitenausrichtung',\n 'description' => 'Seiten-Rotation per OSD korrigieren',\n 'phase' => 'Vorverarbeitung',\n 'storage' => null,\n ],\n 'enrich' => [\n 'label' => 'Anreicherung',\n 'description' => 'Überschriften und Keywords extrahieren',\n 'phase' => 'Vorverarbeitung',\n 'storage' => 'ki_content.chunks (headings, keywords)',\n ],\n \/\/ Phase 2: Speicherung & Vektorisierung\n 'metadata_store' => [\n 'label' => 'DB-Speicherung',\n 'description' => 'Dokument, Seiten und Chunks in MariaDB speichern',\n 'phase' => 'Speicherung',\n 'storage' => 'ki_content.documents, .document_pages, .chunks',\n ],\n 'embed' => [\n 'label' => 'Vektorisierung',\n 'description' => 'Embeddings erstellen für Vektor-Suche',\n 'phase' => 'Speicherung',\n 'storage' => 'Qdrant: {collection}',\n 'fixed_model' => 'mxbai-embed-large (1024-dim)',\n 'has_collection' => true,\n ],\n 'collection_setup' => [\n 'label' => 'Collection',\n 'description' => 'Qdrant-Collection einrichten falls nötig',\n 'phase' => 'Speicherung',\n 'storage' => 'Qdrant: {collection}',\n ],\n 'vector_store' => [\n 'label' => 'Vektorspeicherung',\n 'description' => 'Vektoren in Qdrant mit MariaDB-ID als Referenz',\n 'phase' => 'Speicherung',\n 'storage' => 'Qdrant: {collection}',\n ],\n 'index_optimize' => [\n 'label' => 'Index-Optimierung',\n 'description' => 'HNSW-Index für schnelle Suche optimieren',\n 'phase' => 'Speicherung',\n 'storage' => 'Qdrant: {collection}',\n ],\n 'doc_create' => [\n 'label' => 'Dokument-Eintrag',\n 'description' => 'Dokument-Datensatz in Datenbank erstellen',\n 'phase' => 'Speicherung',\n 'storage' => 'ki_content.documents',\n ],\n 'page_store' => [\n 'label' => 'Seiten-Speicherung',\n 'description' => 'Einzelseiten in Datenbank speichern',\n 'phase' => 'Speicherung',\n 'storage' => 'ki_content.document_pages',\n ],\n 'vision' => [\n 'label' => 'Bild-Analyse',\n 'description' => 'Seiten via Vision-LLM analysieren',\n 'phase' => 'Analyse',\n 'storage' => null,\n 'uses_llm' => true,\n ],\n 'vision_store' => [\n 'label' => 'Vision-Speicherung',\n 'description' => 'Vision-Analyse-Ergebnisse speichern',\n 'phase' => 'Speicherung',\n 'storage' => 'ki_content.document_pages (vision_analysis)',\n ],\n 'chunk_store' => [\n 'label' => 'Chunk-Speicherung',\n 'description' => 'Textbausteine in Datenbank speichern',\n 'phase' => 'Speicherung',\n 'storage' => 'ki_content.chunks',\n ],\n 'qdrant_store' => [\n 'label' => 'Vektor-Speicherung',\n 'description' => 'Embedding-Vektoren in Qdrant speichern',\n 'phase' => 'Speicherung',\n 'storage' => 'Qdrant: {collection}',\n ],\n 'status_update' => [\n 'label' => 'Status-Update',\n 'description' => 'Dokument-Status aktualisieren',\n 'phase' => 'Speicherung',\n 'storage' => 'ki_content.documents (status)',\n ],\n \/\/ Phase 3: Wissensextraktion\n 'knowledge_page' => [\n 'label' => 'Seiten-Wissen',\n 'description' => 'Pro Seite: Entitäten → Semantik → Ontologie → Taxonomie',\n 'phase' => 'Wissen',\n 'storage' => 'ki_content.page_knowledge, .entities, .entity_semantics',\n 'uses_llm' => true,\n ],\n 'knowledge_section' => [\n 'label' => 'Abschnitt-Wissen',\n 'description' => 'Pro Kapitel: Aggregierte Wissensrepräsentation',\n 'phase' => 'Wissen',\n 'storage' => 'ki_content.section_knowledge',\n 'uses_llm' => true,\n ],\n 'knowledge_document' => [\n 'label' => 'Dokument-Wissen',\n 'description' => 'Konsolidierte Gesamtsicht des Dokuments',\n 'phase' => 'Wissen',\n 'storage' => 'ki_content.document_knowledge',\n 'uses_llm' => true,\n ],\n 'knowledge_validate' => [\n 'label' => 'Wissens-Validierung',\n 'description' => 'Abgleich mit DB, Duplikate zusammenführen, neue validieren',\n 'phase' => 'Wissen',\n 'storage' => 'ki_content.entities (merged)',\n ],\n \/\/ Legacy Analyse-Schritte\n 'entity_extract' => [\n 'label' => 'Entitäten (Legacy)',\n 'description' => 'Personen, Organisationen, Konzepte, Methoden erkennen',\n 'phase' => 'Analyse',\n 'storage' => 'ki_content.chunk_entities',\n 'uses_llm' => true,\n ],\n 'relation_extract' => [\n 'label' => 'Beziehungen (Legacy)',\n 'description' => 'Relationen zwischen Entitäten extrahieren',\n 'phase' => 'Analyse',\n 'storage' => 'ki_content.entity_relations',\n 'uses_llm' => true,\n ],\n 'taxonomy_build' => [\n 'label' => 'Taxonomie (Legacy)',\n 'description' => 'Hierarchische Kategorisierung aufbauen',\n 'phase' => 'Analyse',\n 'storage' => 'ki_content.chunk_taxonomy, .taxonomy_terms',\n 'uses_llm' => true,\n ],\n 'semantic_analyze' => [\n 'label' => 'Semantik (Legacy)',\n 'description' => 'Bedeutungs-Analyse, Konzepte und Definitionen',\n 'phase' => 'Analyse',\n 'storage' => 'ki_content.chunk_semantics',\n 'uses_llm' => true,\n ],\n 'summarize' => [\n 'label' => 'Zusammenfassung',\n 'description' => 'Dokument- und Seiten-Zusammenfassungen erstellen',\n 'phase' => 'Analyse',\n 'storage' => 'ki_content.documents (summary), .document_pages',\n 'uses_llm' => true,\n ],\n 'question_generate' => [\n ... [TRUNCATED-0da9dfb6c1e75c07]",
"structuredPatch": [
{
"oldStart": 237,
"oldLines": 6,
"newStart": 237,
"newLines": 54,
"lines": [
" 'storage' => 'ki_content.generated_questions',",
" 'uses_llm' => true,",
" ],",
"+ 'entity_store' => [",
"+ 'label' => 'Entitäten-Speicherung',",
"+ 'description' => 'Extrahierte Entitäten in Datenbank speichern',",
"+ 'phase' => 'Wissen',",
"+ 'storage' => 'ki_content.entities, .document_entities',",
"+ ],",
"+ 'entity_normalize' => [",
"+ 'label' => 'Entitäten-Normalisierung',",
"+ 'description' => 'Duplikate zusammenführen, Synonyme verknüpfen',",
"+ 'phase' => 'Wissen',",
"+ 'storage' => 'ki_content.entity_synonyms',",
"+ ],",
"+ 'relation_store' => [",
"+ 'label' => 'Beziehungs-Speicherung',",
"+ 'description' => 'Extrahierte Relationen in Datenbank speichern',",
"+ 'phase' => 'Wissen',",
"+ 'storage' => 'ki_content.entity_relations',",
"+ ],",
"+ 'ontology_classify' => [",
"+ 'label' => 'Ontologie-Klassifikation',",
"+ 'description' => 'Entitäten in Ontologie-Klassen einordnen',",
"+ 'phase' => 'Wissen',",
"+ 'storage' => 'ki_content.ontology_classes',",
"+ ],",
"+ 'chunk_entity_link' => [",
"+ 'label' => 'Chunk-Entitäten-Verknüpfung',",
"+ 'description' => 'Chunks mit erkannten Entitäten verknüpfen',",
"+ 'phase' => 'Wissen',",
"+ 'storage' => 'ki_content.chunk_entities',",
"+ ],",
"+ 'chunk_taxonomy' => [",
"+ 'label' => 'Chunk-Taxonomie',",
"+ 'description' => 'Chunks mit Taxonomie-Kategorien verknüpfen',",
"+ 'phase' => 'Wissen',",
"+ 'storage' => 'ki_content.chunk_taxonomy',",
"+ ],",
"+ 'entity_taxonomy' => [",
"+ 'label' => 'Entitäten-Taxonomie',",
"+ 'description' => 'Entitäten mit Taxonomie-Pfaden verknüpfen',",
"+ 'phase' => 'Wissen',",
"+ 'storage' => 'ki_content.entity_taxonomy_mapping',",
"+ ],",
"+ 'chunk_semantics' => [",
"+ 'label' => 'Chunk-Semantik',",
"+ 'description' => 'Semantische Analyse-Ergebnisse pro Chunk speichern',",
"+ 'phase' => 'Wissen',",
"+ 'storage' => 'ki_content.chunk_semantics',",
"+ ],",
" 'finalize' => [",
" 'label' => 'Abschluss',",
" 'description' => 'Status finalisieren und Job beenden',"
]
}
],
"userModified": false,
"replaceAll": false
}
}