{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/tests\/test_no_hardcoded_models.py",
"content": "#!\/usr\/bin\/env python3\n\"\"\"\nTest: Keine hardcodierten Model-Namen in Pipeline-Dateien.\n\nSupervision-Anforderung: Scannt auf Literal-Modelle.\nTask #516\n\"\"\"\n\nimport re\nfrom pathlib import Path\n\n\n# Dateien die gescannt werden sollen\nSCAN_FILES = [\n \"pipeline.py\",\n \"step_semantic_extended.py\",\n \"step_entity_enrich.py\",\n]\n\n# Bekannte Model-Namen die nicht hardcoded sein dürfen\nFORBIDDEN_MODELS = [\n \"mistral\",\n \"gemma\",\n \"llama\",\n \"phi\",\n \"qwen\",\n \"claude\",\n \"gpt-4\",\n \"gpt-3.5\",\n]\n\n# Patterns die erlaubt sind (z.B. in Kommentaren, Konstanten, Allowlists)\nALLOWED_PATTERNS = [\n r\"^\\s*#\", # Kommentare\n r\"^\\s*[A-Z][A-Z0-9_]*\\s*=\", # UPPER_CASE Konstanten\n r\"HARDCODED_MODELS\", # Allowlist-Definition\n r\"FORBIDDEN_MODELS\", # Diese Testdatei\n]\n\n\ndef test_pipeline_no_literal_models():\n \"\"\"Scannt pipeline.py auf hardcodierte Model-Strings.\"\"\"\n base_path = Path(__file__).parent.parent\n\n violations = []\n\n for filename in SCAN_FILES:\n filepath = base_path \/ filename\n if not filepath.exists():\n continue\n\n with open(filepath) as f:\n lines = f.readlines()\n\n for line_num, line in enumerate(lines, 1):\n # Skip erlaubte Patterns\n if any(re.search(pattern, line) for pattern in ALLOWED_PATTERNS):\n continue\n\n # Suche nach hardcodierten Model-Namen\n for model in FORBIDDEN_MODELS:\n # Pattern: \"model\" oder 'model' als Wert\n pattern = rf'[\"\\']({model})[\"\\']'\n if re.search(pattern, line, re.IGNORECASE):\n violations.append(\n f\"{filename}:{line_num}: Found '{model}' - {line.strip()[:60]}\"\n )\n\n if violations:\n msg = \"Hardcodierte Models gefunden:\\n\" + \"\\n\".join(violations)\n raise AssertionError(msg)\n\n\ndef test_no_model_in_execute_calls():\n \"\"\"Prüft ob execute() Aufrufe kein hardcodiertes model enthalten.\"\"\"\n base_path = Path(__file__).parent.parent\n filepath = base_path \/ \"pipeline.py\"\n\n if not filepath.exists():\n return\n\n with open(filepath) as f:\n content = f.read()\n\n # Pattern: .execute(..., {\"model\": \"...\"})\n pattern = r'\\.execute\\([^)]*[\"\\']model[\"\\']\\s*:\\s*[\"\\'][^\"\\']+[\"\\']'\n matches = re.findall(pattern, content)\n\n if matches:\n raise AssertionError(\n f\"Hardcodierte model in execute() gefunden: {matches}\"\n )\n\n\nif __name__ == \"__main__\":\n print(\"Running hardcoded model tests...\")\n try:\n test_pipeline_no_literal_models()\n print(\"✓ test_pipeline_no_literal_models passed\")\n except AssertionError as e:\n print(f\"✗ test_pipeline_no_literal_models FAILED:\\n{e}\")\n\n try:\n test_no_model_in_execute_calls()\n print(\"✓ test_no_model_in_execute_calls passed\")\n except AssertionError as e:\n print(f\"✗ test_no_model_in_execute_calls FAILED:\\n{e}\")\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/scripts\/pipeline\/tests\/test_no_hardcoded_models.py",
"content": "#!\/usr\/bin\/env python3\n\"\"\"\nTest: Keine hardcodierten Model-Namen in Pipeline-Dateien.\n\nSupervision-Anforderung: Scannt auf Literal-Modelle.\nTask #516\n\"\"\"\n\nimport re\nfrom pathlib import Path\n\n\n# Dateien die gescannt werden sollen\nSCAN_FILES = [\n \"pipeline.py\",\n \"step_semantic_extended.py\",\n \"step_entity_enrich.py\",\n]\n\n# Bekannte Model-Namen die nicht hardcoded sein dürfen\nFORBIDDEN_MODELS = [\n \"mistral\",\n \"gemma\",\n \"llama\",\n \"phi\",\n \"qwen\",\n \"claude\",\n \"gpt-4\",\n \"gpt-3.5\",\n]\n\n# Patterns die erlaubt sind (z.B. in Kommentaren, Konstanten, Allowlists)\nALLOWED_PATTERNS = [\n r\"^\\s*#\", # Kommentare\n r\"^\\s*[A-Z][A-Z0-9_]*\\s*=\", # UPPER_CASE Konstanten\n r\"HARDCODED_MODELS\", # Allowlist-Definition\n r\"FORBIDDEN_MODELS\", # Diese Testdatei\n]\n\n\ndef test_pipeline_no_literal_models():\n \"\"\"Scannt pipeline.py auf hardcodierte Model-Strings.\"\"\"\n base_path = Path(__file__).parent.parent\n\n violations = []\n\n for filename in SCAN_FILES:\n filepath = base_path \/ filename\n if not filepath.exists():\n continue\n\n with open(filepath) as f:\n lines = f.readlines()\n\n for line_num, line in enumerate(lines, 1):\n # Skip erlaubte Patterns\n if any(re.search(pattern, line) for pattern in ALLOWED_PATTERNS):\n continue\n\n # Suche nach hardcodierten Model-Namen\n for model in FORBIDDEN_MODELS:\n # Pattern: \"model\" oder 'model' als Wert\n pattern = rf'[\"\\']({model})[\"\\']'\n if re.search(pattern, line, re.IGNORECASE):\n violations.append(\n f\"{filename}:{line_num}: Found '{model}' - {line.strip()[:60]}\"\n )\n\n if violations:\n msg = \"Hardcodierte Models gefunden:\\n\" + \"\\n\".join(violations)\n raise AssertionError(msg)\n\n\ndef test_no_model_in_execute_calls():\n \"\"\"Prüft ob execute() Aufrufe kein hardcodiertes model enthalten.\"\"\"\n base_path = Path(__file__).parent.parent\n filepath = base_path \/ \"pipeline.py\"\n\n if not filepath.exists():\n return\n\n with open(filepath) as f:\n content = f.read()\n\n # Pattern: .execute(..., {\"model\": \"...\"})\n pattern = r'\\.execute\\([^)]*[\"\\']model[\"\\']\\s*:\\s*[\"\\'][^\"\\']+[\"\\']'\n matches = re.findall(pattern, content)\n\n if matches:\n raise AssertionError(\n f\"Hardcodierte model in execute() gefunden: {matches}\"\n )\n\n\nif __name__ == \"__main__\":\n print(\"Running hardcoded model tests...\")\n try:\n test_pipeline_no_literal_models()\n print(\"✓ test_pipeline_no_literal_models passed\")\n except AssertionError as e:\n print(f\"✗ test_pipeline_no_literal_models FAILED:\\n{e}\")\n\n try:\n test_no_model_in_execute_calls()\n print(\"✓ test_no_model_in_execute_calls passed\")\n except AssertionError as e:\n print(f\"✗ test_no_model_in_execute_calls FAILED:\\n{e}\")\n",
"structuredPatch": [],
"originalFile": null
}
}