python-pipeline-contract_v1.0
| ID |
6 |
| UUID |
650a0a9e-e57c-4b01-a45b-f94fbc6d3f6c |
| Version |
1.0 |
| Status |
active |
| Scope |
|
| Erstellt |
2025-12-20 10:59:58 von migration |
| Aktualisiert |
2025-12-20 10:59:58 |
YAML-Inhalt
meta:
document_type: architecture_contract
document_id: python-pipeline-contract
normative: true
binding: mandatory
effective_from: 2025-12-20
identity:
name: python-pipeline-contract
version: 1.0.0
status: active
stability: strict
changelog:
- version: 1.0.0
date: 2025-12-20
changes:
- Initial version
- Code style rules (Ruff)
- Type hint requirements (mypy)
- Docstring requirements
- Error handling rules
- Import rules
- Security rules
- Module structure
intent:
purpose: >
Defines mandatory code quality, type safety, and security constraints
for Python code in the KI-Pipeline produced by humans or AI systems.
goals:
- code_quality
- type_safety
- maintainability
- security
- consistency
applicability:
applies_to:
- human_authored_code
- ai_generated_code
runtime:
- python_3.13
scope:
- /opt/scripts/pipeline/**/*.py
excluded_contexts:
- virtual_environment
- __pycache__
- third_party_libraries
authority:
precedence: highest
overrides:
- coding_guidelines
- styleguides
conflict_resolution: this_document_wins
compliance:
required_for_execution: true
violation_policy:
on_violation: reject_output
remediation: fix_and_recheck
partial_compliance: forbidden
tools:
linting: ruff
type_checking: mypy
config_file: /opt/scripts/pipeline/ruff.toml
evolution:
change_policy:
breaking_changes: explicit_version_bump_required
silent_behavior_changes: forbidden
review_required: true
audience:
primary:
- ai_code_generators
- automated_review_systems
secondary:
- backend_engineers
- system_administrators
language:
version: "3.13"
encoding: utf-8
line_ending: lf
code_style:
formatter: ruff
line_length: 120
indent:
style: space
size: 4
quotes:
style: double
docstrings: double
trailing_comma: required_multiline
blank_lines:
after_imports: 2
between_functions: 2
between_classes: 2
imports:
sorting:
tool: isort
order:
- standard_library
- third_party
- first_party
- local
first_party_modules:
- config
- db
rules:
wildcards:
status: forbidden
example_bad: "from config import *"
example_good: "from config import DB_CONFIG, OLLAMA_HOST"
relative_imports:
status: forbidden
example_bad: "from .db import Database"
example_good: "from db import Database"
unused_imports:
status: forbidden
exception: "__init__.py"
duplicate_imports:
status: forbidden
type_hints:
enforcement: required
tool: mypy
rules:
function_signatures:
status: required
includes:
- parameters
- return_type
example: "def process(path: str, verbose: bool = False) -> bool:"
return_types:
status: required
none_explicit: required
example_good: "def setup() -> None:"
example_bad: "def setup():"
parameters:
status: required
example: "def fetch(url: str, timeout: int = 30) -> dict[str, Any]:"
variables:
status: optional
recommended_when: ambiguous
example: "results: list[Document] = []"
collections:
generic_required: true
example_good: "list[str], dict[str, int], tuple[int, ...]"
example_bad: "list, dict, tuple"
optional:
use_pipe_syntax: true
example_good: "str | None"
example_bad: "Optional[str]"
any:
status: discouraged
allowed_when: truly_dynamic
docstrings:
style: google
rules:
modules:
status: required
format: single_line
example: '"""Database operations for KI-System Pipeline"""'
classes:
status: required
content:
- description
- attributes_if_public
example: |
"""MariaDB connection wrapper with document operations.
Attributes:
connection: Active database connection or None.
"""
public_functions:
status: required
content:
- description
args_section: optional
returns_section: optional
example: '"""Insert a new document and return its ID."""'
private_functions:
status: optional
prefix: "_"
magic_methods:
status: optional
error_handling:
rules:
bare_except:
status: forbidden
example_bad: |
try:
process()
except:
pass
except_exception:
status: allowed
use_case: top_level_handlers
example: |
try:
process()
except Exception as e:
logger.error(f"Unexpected error: {e}")
raise
specific_exceptions:
status: preferred
example: |
try:
with open(path) as f:
data = f.read()
except FileNotFoundError:
logger.warning(f"File not found: {path}")
return None
except PermissionError:
logger.error(f"Permission denied: {path}")
raise
silent_failures:
status: forbidden
rule: always_log_or_raise
exception_chaining:
status: recommended
example: "raise ProcessingError(f'Failed: {path}') from e"
security:
rules:
hardcoded_credentials:
status: forbidden
detection: ruff_B105
example_bad: 'password = "secret123"'
example_good: 'password = os.environ.get("DB_PASSWORD")'
eval_exec:
status: forbidden
detection: ruff_S307
includes:
- eval
- exec
- compile
sql_injection:
status: forbidden
rule: use_parameterized_queries
example_bad: 'cursor.execute(f"SELECT * FROM users WHERE id = {user_id}")'
example_good: 'cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))'
path_traversal:
status: forbidden
rule: validate_paths
recommendation: use_pathlib
subprocess:
status: restricted
shell_equals_true: forbidden
example_bad: 'subprocess.run(cmd, shell=True)'
example_good: 'subprocess.run(["ls", "-la"], check=True)'
credentials_source:
allowed:
- environment_variables
- credentials_file
forbidden:
- source_code
- config_constants
module_structure:
root: /opt/scripts/pipeline
layout:
config:
file: config.py
purpose: All configuration values
rules:
- single_source_of_truth
- no_hardcoded_secrets
database:
file: db.py
purpose: Database operations
rules:
- only_module_with_db_access
- parameterized_queries_only
pipeline:
file: pipeline.py
purpose: Orchestration
rules:
- coordinates_other_modules
- no_direct_db_access
processors:
files:
- detect.py
- extract.py
- chunk.py
- embed.py
- analyze.py
purpose: Data processing stages
rules:
- single_responsibility
- pure_functions_preferred
ai:
files:
- generate.py
- chat.py
purpose: AI/LLM integration
rules:
- retry_logic_required
- timeout_handling_required
web:
files:
- web_chat.py
- web_generate.py
purpose: Flask integration
rules:
- thin_wrappers_only
- no_business_logic
dependencies:
allowed:
standard_library:
- os
- sys
- json
- hashlib
- pathlib
- logging
- datetime
- typing
- dataclasses
third_party:
- mysql.connector
- qdrant_client
- requests
- anthropic
- flask
- pypdf
- python-docx
- python-pptx
- pytesseract
forbidden:
- pickle # security risk
- marshal # security risk
logging:
library: logging
rules:
print_statements:
status: discouraged
allowed_in: cli_scripts
log_levels:
debug: development_info
info: normal_operations
warning: recoverable_issues
error: failures
critical: system_failures
format:
include:
- timestamp
- level
- message
example: "%(asctime)s - %(levelname)s - %(message)s"
testing:
framework: pytest
rules:
naming:
files: "test_*.py"
functions: "test_*"
location: /opt/scripts/pipeline/tests/
coverage:
target: 70
critical_modules:
- db.py
- config.py
ruff_rules:
select:
- E # pycodestyle errors
- W # pycodestyle warnings
- F # Pyflakes
- I # isort
- B # flake8-bugbear
- C4 # flake8-comprehensions
- UP # pyupgrade
- SIM # flake8-simplify
- S # flake8-bandit (security)
- D100 # missing docstring in module
- D101 # missing docstring in class
- D102 # missing docstring in public method
- D103 # missing docstring in public function
ignore:
- E501 # line too long (handled by formatter)
- E402 # module level import not at top
- B008 # function call in default argument
- B905 # zip without strict
- SIM108 # ternary operator
- D105 # missing docstring in magic method
- D107 # missing docstring in __init__
mypy_rules:
strict_optional: true
warn_return_any: true
warn_unused_ignores: true
disallow_untyped_defs: true
disallow_incomplete_defs: true
check_untyped_defs: true
validation:
pre_commit:
- ruff check
- ruff format --check
- mypy
pre_sync:
- ruff check
- ruff format --check
- mypy
- pytest
on_violation:
block: true
message: "Python quality check failed"
Aktionen
← Zurück zur Übersicht