python-pipeline-contract_v1.0

ID 6
UUID 650a0a9e-e57c-4b01-a45b-f94fbc6d3f6c
Version 1.0
Status active
Scope
Erstellt 2025-12-20 10:59:58 von migration
Aktualisiert 2025-12-20 10:59:58

YAML-Inhalt

meta:
  document_type: architecture_contract
  document_id: python-pipeline-contract
  normative: true
  binding: mandatory
  effective_from: 2025-12-20

identity:
  name: python-pipeline-contract
  version: 1.0.0
  status: active
  stability: strict
  changelog:
    - version: 1.0.0
      date: 2025-12-20
      changes:
        - Initial version
        - Code style rules (Ruff)
        - Type hint requirements (mypy)
        - Docstring requirements
        - Error handling rules
        - Import rules
        - Security rules
        - Module structure

intent:
  purpose: >
    Defines mandatory code quality, type safety, and security constraints
    for Python code in the KI-Pipeline produced by humans or AI systems.
  goals:
    - code_quality
    - type_safety
    - maintainability
    - security
    - consistency

applicability:
  applies_to:
    - human_authored_code
    - ai_generated_code
  runtime:
    - python_3.13
  scope:
    - /opt/scripts/pipeline/**/*.py
  excluded_contexts:
    - virtual_environment
    - __pycache__
    - third_party_libraries

authority:
  precedence: highest
  overrides:
    - coding_guidelines
    - styleguides
  conflict_resolution: this_document_wins

compliance:
  required_for_execution: true
  violation_policy:
    on_violation: reject_output
    remediation: fix_and_recheck
  partial_compliance: forbidden
  tools:
    linting: ruff
    type_checking: mypy
    config_file: /opt/scripts/pipeline/ruff.toml

evolution:
  change_policy:
    breaking_changes: explicit_version_bump_required
    silent_behavior_changes: forbidden
  review_required: true

audience:
  primary:
    - ai_code_generators
    - automated_review_systems
  secondary:
    - backend_engineers
    - system_administrators

language:
  version: "3.13"
  encoding: utf-8
  line_ending: lf

code_style:
  formatter: ruff
  line_length: 120
  indent:
    style: space
    size: 4
  quotes:
    style: double
    docstrings: double
  trailing_comma: required_multiline
  blank_lines:
    after_imports: 2
    between_functions: 2
    between_classes: 2

imports:
  sorting:
    tool: isort
    order:
      - standard_library
      - third_party
      - first_party
      - local
    first_party_modules:
      - config
      - db
  rules:
    wildcards:
      status: forbidden
      example_bad: "from config import *"
      example_good: "from config import DB_CONFIG, OLLAMA_HOST"
    relative_imports:
      status: forbidden
      example_bad: "from .db import Database"
      example_good: "from db import Database"
    unused_imports:
      status: forbidden
      exception: "__init__.py"
    duplicate_imports:
      status: forbidden

type_hints:
  enforcement: required
  tool: mypy
  rules:
    function_signatures:
      status: required
      includes:
        - parameters
        - return_type
      example: "def process(path: str, verbose: bool = False) -> bool:"
    return_types:
      status: required
      none_explicit: required
      example_good: "def setup() -> None:"
      example_bad: "def setup():"
    parameters:
      status: required
      example: "def fetch(url: str, timeout: int = 30) -> dict[str, Any]:"
    variables:
      status: optional
      recommended_when: ambiguous
      example: "results: list[Document] = []"
    collections:
      generic_required: true
      example_good: "list[str], dict[str, int], tuple[int, ...]"
      example_bad: "list, dict, tuple"
    optional:
      use_pipe_syntax: true
      example_good: "str | None"
      example_bad: "Optional[str]"
    any:
      status: discouraged
      allowed_when: truly_dynamic

docstrings:
  style: google
  rules:
    modules:
      status: required
      format: single_line
      example: '"""Database operations for KI-System Pipeline"""'
    classes:
      status: required
      content:
        - description
        - attributes_if_public
      example: |
        """MariaDB connection wrapper with document operations.

        Attributes:
            connection: Active database connection or None.
        """
    public_functions:
      status: required
      content:
        - description
      args_section: optional
      returns_section: optional
      example: '"""Insert a new document and return its ID."""'
    private_functions:
      status: optional
      prefix: "_"
    magic_methods:
      status: optional

error_handling:
  rules:
    bare_except:
      status: forbidden
      example_bad: |
        try:
            process()
        except:
            pass
    except_exception:
      status: allowed
      use_case: top_level_handlers
      example: |
        try:
            process()
        except Exception as e:
            logger.error(f"Unexpected error: {e}")
            raise
    specific_exceptions:
      status: preferred
      example: |
        try:
            with open(path) as f:
                data = f.read()
        except FileNotFoundError:
            logger.warning(f"File not found: {path}")
            return None
        except PermissionError:
            logger.error(f"Permission denied: {path}")
            raise
    silent_failures:
      status: forbidden
      rule: always_log_or_raise
    exception_chaining:
      status: recommended
      example: "raise ProcessingError(f'Failed: {path}') from e"

security:
  rules:
    hardcoded_credentials:
      status: forbidden
      detection: ruff_B105
      example_bad: 'password = "secret123"'
      example_good: 'password = os.environ.get("DB_PASSWORD")'
    eval_exec:
      status: forbidden
      detection: ruff_S307
      includes:
        - eval
        - exec
        - compile
    sql_injection:
      status: forbidden
      rule: use_parameterized_queries
      example_bad: 'cursor.execute(f"SELECT * FROM users WHERE id = {user_id}")'
      example_good: 'cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))'
    path_traversal:
      status: forbidden
      rule: validate_paths
      recommendation: use_pathlib
    subprocess:
      status: restricted
      shell_equals_true: forbidden
      example_bad: 'subprocess.run(cmd, shell=True)'
      example_good: 'subprocess.run(["ls", "-la"], check=True)'
    credentials_source:
      allowed:
        - environment_variables
        - credentials_file
      forbidden:
        - source_code
        - config_constants

module_structure:
  root: /opt/scripts/pipeline
  layout:
    config:
      file: config.py
      purpose: All configuration values
      rules:
        - single_source_of_truth
        - no_hardcoded_secrets
    database:
      file: db.py
      purpose: Database operations
      rules:
        - only_module_with_db_access
        - parameterized_queries_only
    pipeline:
      file: pipeline.py
      purpose: Orchestration
      rules:
        - coordinates_other_modules
        - no_direct_db_access
    processors:
      files:
        - detect.py
        - extract.py
        - chunk.py
        - embed.py
        - analyze.py
      purpose: Data processing stages
      rules:
        - single_responsibility
        - pure_functions_preferred
    ai:
      files:
        - generate.py
        - chat.py
      purpose: AI/LLM integration
      rules:
        - retry_logic_required
        - timeout_handling_required
    web:
      files:
        - web_chat.py
        - web_generate.py
      purpose: Flask integration
      rules:
        - thin_wrappers_only
        - no_business_logic

dependencies:
  allowed:
    standard_library:
      - os
      - sys
      - json
      - hashlib
      - pathlib
      - logging
      - datetime
      - typing
      - dataclasses
    third_party:
      - mysql.connector
      - qdrant_client
      - requests
      - anthropic
      - flask
      - pypdf
      - python-docx
      - python-pptx
      - pytesseract
  forbidden:
    - pickle  # security risk
    - marshal  # security risk

logging:
  library: logging
  rules:
    print_statements:
      status: discouraged
      allowed_in: cli_scripts
    log_levels:
      debug: development_info
      info: normal_operations
      warning: recoverable_issues
      error: failures
      critical: system_failures
    format:
      include:
        - timestamp
        - level
        - message
      example: "%(asctime)s - %(levelname)s - %(message)s"

testing:
  framework: pytest
  rules:
    naming:
      files: "test_*.py"
      functions: "test_*"
    location: /opt/scripts/pipeline/tests/
    coverage:
      target: 70
      critical_modules:
        - db.py
        - config.py

ruff_rules:
  select:
    - E      # pycodestyle errors
    - W      # pycodestyle warnings
    - F      # Pyflakes
    - I      # isort
    - B      # flake8-bugbear
    - C4     # flake8-comprehensions
    - UP     # pyupgrade
    - SIM    # flake8-simplify
    - S      # flake8-bandit (security)
    - D100   # missing docstring in module
    - D101   # missing docstring in class
    - D102   # missing docstring in public method
    - D103   # missing docstring in public function
  ignore:
    - E501   # line too long (handled by formatter)
    - E402   # module level import not at top
    - B008   # function call in default argument
    - B905   # zip without strict
    - SIM108 # ternary operator
    - D105   # missing docstring in magic method
    - D107   # missing docstring in __init__

mypy_rules:
  strict_optional: true
  warn_return_any: true
  warn_unused_ignores: true
  disallow_untyped_defs: true
  disallow_incomplete_defs: true
  check_untyped_defs: true

validation:
  pre_commit:
    - ruff check
    - ruff format --check
    - mypy
  pre_sync:
    - ruff check
    - ruff format --check
    - mypy
    - pytest
  on_violation:
    block: true
    message: "Python quality check failed"

Aktionen

Bearbeiten

← Zurück zur Übersicht