semantic_chunk_analyzer.py

Pfad: /var/www/scripts/pipeline/semantic_chunk_analyzer.py
Namespace: pipeline
Zeilen: 119 | Größe: 3,140 Bytes
Geändert: 2025-12-25 18:29:01 | Gescannt: 2025-12-31 10:22:15

Code Hygiene Score: 75

Dependencies: 0 (25%)
LOC: 100 (20%)
Methods: 100 (20%)
Secrets: 100 (15%)
Classes: 100 (10%)
Magic Numbers: 100 (10%)

Issues 1

Zeile	Typ	Beschreibung
-	coupling	Klasse hat 17 Dependencies (max: 15)

Dependencies 17

use sys
use db.db
use semantic_chunk.ANALYSIS_MODEL
use semantic_chunk.BATCH_SIZE
use semantic_chunk.ChunkRepository
use semantic_chunk.ChunkSemantics
use semantic_chunk.Entity
use semantic_chunk.EntityExtractor
use semantic_chunk.EntityRepository
use semantic_chunk.OllamaService
use semantic_chunk.Relation
use semantic_chunk.RelationExtractor
use semantic_chunk.SemanticChunkPipeline
use semantic_chunk.SemanticsAnalyzer
use semantic_chunk.SemanticsRepository
use semantic_chunk.TaxonomyClassifier
use semantic_chunk.TaxonomyRepository

Funktionen 1

main() Zeile 62

Code

#!/usr/bin/env python3
"""
Semantic Chunk Analyzer - Chunk-Level Semantic Analysis Pipeline

Analysiert Chunks aus der ki_system.chunks Tabelle und befüllt:
- chunk_semantics (summary, keywords, sentiment, topics, language)
- entities (extrahierte Entitäten)
- chunk_entities (Verknüpfung Chunk <-> Entity)
- entity_relations (Beziehungen zwischen Entitäten)
- taxonomy_terms + chunk_taxonomy (Kategorisierung)

BACKWARD COMPATIBILITY WRAPPER - Logic moved to semantic_chunk/ package.

Usage:
    python semantic_chunk_analyzer.py analyze [--limit N]
    python semantic_chunk_analyzer.py status
    python semantic_chunk_analyzer.py reset
"""

import sys

from db import db

# Re-export for backward compatibility
from semantic_chunk import (
    ANALYSIS_MODEL,
    BATCH_SIZE,
    ChunkRepository,
    ChunkSemantics,
    Entity,
    EntityExtractor,
    EntityRepository,
    OllamaService,
    Relation,
    RelationExtractor,
    SemanticChunkPipeline,
    SemanticsAnalyzer,
    SemanticsRepository,
    TaxonomyClassifier,
    TaxonomyRepository,
)

__all__ = [
    "ChunkSemantics",
    "Entity",
    "Relation",
    "OllamaService",
    "ANALYSIS_MODEL",
    "SemanticsAnalyzer",
    "EntityExtractor",
    "RelationExtractor",
    "TaxonomyClassifier",
    "ChunkRepository",
    "SemanticsRepository",
    "EntityRepository",
    "TaxonomyRepository",
    "SemanticChunkPipeline",
    "BATCH_SIZE",
]


def main():
    """CLI Entry Point."""
    if len(sys.argv) < 2:
        print(__doc__)
        return

    command = sys.argv[1].lower()

    db.connect()

    try:
        pipeline = SemanticChunkPipeline()

        if command == "analyze":
            limit = BATCH_SIZE
            if len(sys.argv) > 2 and sys.argv[2] == "--limit":
                limit = int(sys.argv[3]) if len(sys.argv) > 3 else BATCH_SIZE

            result = pipeline.run(limit)
            print(f"\nErgebnis: {result['success']}/{result['processed']} erfolgreich")

        elif command == "status":
            stats = pipeline.status()
            print(f"Chunks gesamt: {stats['total']}")
            print(f"Analysiert:    {stats['analyzed']}")
            print(f"Entitäten:     {stats['entities']}")
            print(f"Ausstehend:    {stats['total'] - stats['analyzed']}")

        elif command == "reset":
            confirm = input("Alle Semantik-Daten löschen? (ja/nein): ")
            if confirm.lower() == "ja":
                for table in [
                    "chunk_semantics",
                    "chunk_entities",
                    "chunk_taxonomy",
                    "entity_relations",
                    "entities",
                    "taxonomy_terms",
                ]:
                    db.execute(f"TRUNCATE TABLE {table}")
                    db.commit()
                print("Alle Semantik-Tabellen geleert.")

        else:
            print(f"Unbekannter Befehl: {command}")
            print(__doc__)

    except Exception as e:
        db.log("ERROR", f"Pipeline error: {e}")
        print(f"Fehler: {e}")
        raise
    finally:
        db.disconnect()


if __name__ == "__main__":
    main()

← Übersicht