test_no_hardcoded_models.py

Pfad: /var/www/scripts/pipeline/tests/test_no_hardcoded_models.py
Namespace: pipeline
Zeilen: 103 | Größe: 2,899 Bytes
Geändert: 2025-12-31 03:01:09 | Gescannt: 2025-12-31 10:22:15

Code Hygiene Score: 99

Dependencies: 100 (25%)
LOC: 100 (20%)
Methods: 100 (20%)
Secrets: 100 (15%)
Classes: 100 (10%)
Magic Numbers: 90 (10%)

Issues 1

Zeile	Typ	Beschreibung
64	magic_number	Magic Number gefunden: 60

Dependencies 2

use re
use pathlib.Path

Funktionen 2

test_pipeline_no_literal_models() Zeile 40
test_no_model_in_execute_calls() Zeile 71

Code

#!/usr/bin/env python3
"""
Test: Keine hardcodierten Model-Namen in Pipeline-Dateien.

Supervision-Anforderung: Scannt auf Literal-Modelle.
Task #516
"""

import re
from pathlib import Path

# Dateien die gescannt werden sollen
SCAN_FILES = [
    "pipeline.py",
    "step_semantic_extended.py",
    "step_entity_enrich.py",
]

# Bekannte Model-Namen die nicht hardcoded sein dürfen
FORBIDDEN_MODELS = [
    "mistral",
    "gemma",
    "llama",
    "phi",
    "qwen",
    "claude",
    "gpt-4",
    "gpt-3.5",
]

# Patterns die erlaubt sind (z.B. in Kommentaren, Konstanten, Allowlists)
ALLOWED_PATTERNS = [
    r"^\s*#",  # Kommentare
    r"^\s*[A-Z][A-Z0-9_]*\s*=",  # UPPER_CASE Konstanten
    r"HARDCODED_MODELS",  # Allowlist-Definition
    r"FORBIDDEN_MODELS",  # Diese Testdatei
]


def test_pipeline_no_literal_models():
    """Scannt pipeline.py auf hardcodierte Model-Strings."""
    base_path = Path(__file__).parent.parent

    violations = []

    for filename in SCAN_FILES:
        filepath = base_path / filename
        if not filepath.exists():
            continue

        with open(filepath) as f:
            lines = f.readlines()

        for line_num, line in enumerate(lines, 1):
            # Skip erlaubte Patterns
            if any(re.search(pattern, line) for pattern in ALLOWED_PATTERNS):
                continue

            # Suche nach hardcodierten Model-Namen
            for model in FORBIDDEN_MODELS:
                # Pattern: "model" oder 'model' als Wert
                pattern = rf'["\']({model})["\']'
                if re.search(pattern, line, re.IGNORECASE):
                    violations.append(f"{filename}:{line_num}: Found '{model}' - {line.strip()[:60]}")

    if violations:
        msg = "Hardcodierte Models gefunden:\n" + "\n".join(violations)
        raise AssertionError(msg)


def test_no_model_in_execute_calls():
    """Prüft ob execute() Aufrufe kein hardcodiertes model enthalten."""
    base_path = Path(__file__).parent.parent
    filepath = base_path / "pipeline.py"

    if not filepath.exists():
        return

    with open(filepath) as f:
        content = f.read()

    # Pattern: .execute(..., {"model": "..."})
    pattern = r'\.execute\([^)]*["\']model["\']\s*:\s*["\'][^"\']+["\']'
    matches = re.findall(pattern, content)

    if matches:
        raise AssertionError(f"Hardcodierte model in execute() gefunden: {matches}")


if __name__ == "__main__":
    print("Running hardcoded model tests...")
    try:
        test_pipeline_no_literal_models()
        print("✓ test_pipeline_no_literal_models passed")
    except AssertionError as e:
        print(f"✗ test_pipeline_no_literal_models FAILED:\n{e}")

    try:
        test_no_model_in_execute_calls()
        print("✓ test_no_model_in_execute_calls passed")
    except AssertionError as e:
        print(f"✗ test_no_model_in_execute_calls FAILED:\n{e}")

← Übersicht