{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/tools\/ki-protokoll\/claude-hook\/quality\/pre_rules_python.py",
"content": "#!\/usr\/bin\/env python3\n\"\"\"\nPre-Hook Python Regeln (BLOCK) - Pipeline Code Quality.\n\nPP1.x: Hardcoded Values\nPP2.x: Config Patterns\n\nDiese Regeln prüfen Python-Dateien in \/var\/www\/scripts\/pipeline\/\nauf häufige Fehler wie hardcoded Model-Namen oder Pipeline-IDs.\n\"\"\"\n\nimport re\nfrom typing import Optional\nfrom .rule_base import block, is_in_allowlist\n\n\n# =============================================================================\n# ALLOWLIST\n# =============================================================================\n\nPYTHON_ALLOWLIST = [\n \"\/venv\/\",\n \"\/__pycache__\/\",\n \"\/tests\/\",\n \"\/test_\",\n \"_test.py\",\n]\n\n\n# =============================================================================\n# HARDCODED VALUES DETECTION\n# =============================================================================\n\n# Model-Namen die als hardcoded Default blockiert werden\nHARDCODED_MODELS = {\n \"mistral\",\n \"llama\",\n \"llama2\",\n \"llama3\",\n \"gemma\",\n \"gemma2\",\n \"gemma3\",\n \"phi\",\n \"phi3\",\n \"qwen\",\n \"qwen2\",\n \"claude\",\n \"gpt-4\",\n \"gpt-3.5\",\n \"minicpm\",\n \"minicpm-v\",\n \"nomic\",\n \"nomic-embed\",\n}\n\n\n# =============================================================================\n# PRÜFUNG PP1: HARDCODED VALUES\n# =============================================================================\n\ndef pp1_1_hardcoded_model_default(file_path: str, content: str) -> Optional[dict]:\n \"\"\"\n PP1.1: Hardcoded LLM Model-Namen als Default blockieren.\n\n BLOCKIERT:\n parser.add_argument(\"--model\", default=\"mistral\")\n def foo(model: str = \"gemma\"):\n\n ERLAUBT:\n parser.add_argument(\"--model\", default=None)\n model = get_pipeline_model(\"step_type\")\n DEFAULT_MODEL = \"mistral\" # Als Konstante OK\n # Kommentar: mistral ist gut\n \"\"\"\n if not file_path.endswith(\".py\"):\n return None\n if is_in_allowlist(file_path, PYTHON_ALLOWLIST):\n return None\n\n lines = content.split('\\n')\n for line_num, line in enumerate(lines, 1):\n stripped = line.strip()\n\n # Skip Kommentare\n if stripped.startswith('#'):\n continue\n\n # Skip Docstrings (vereinfacht)\n if stripped.startswith('\"\"\"') or stripped.startswith(\"'''\"):\n continue\n\n # Skip Konstanten-Definitionen (UPPER_CASE = \"value\")\n if re.match(r'^[A-Z][A-Z0-9_]*\\s*=', stripped):\n continue\n\n # Skip Listen\/Sets von erlaubten Werten\n if 'VALID_' in line or 'ALLOWED_' in line or 'HARDCODED_' in line:\n continue\n\n # Suche nach hardcoded model defaults\n for model in HARDCODED_MODELS:\n # Pattern 1: default=\"model\" oder default='model'\n pattern1 = rf'default\\s*=\\s*[\"\\']({model})[\"\\']'\n # Pattern 2: model: str = \"model\" (type hint mit default)\n pattern2 = rf'model\\s*:\\s*str\\s*=\\s*[\"\\']({model})[\"\\']'\n # Pattern 3: model=\"model\" als Keyword-Argument\n pattern3 = rf'\\bmodel\\s*=\\s*[\"\\']({model})[\"\\']'\n\n for pattern in [pattern1, pattern2, pattern3]:\n match = re.search(pattern, line, re.IGNORECASE)\n if match:\n return block(\n \"PP1.1\",\n f\"Hardcoded model name '{model}' at line {line_num}. \"\n f\"Use get_pipeline_model() to read from pipeline_steps config, \"\n f\"or define as UPPER_CASE constant.\"\n )\n\n return None\n\n\ndef pp1_2_hardcoded_pipeline_id(file_path: str, content: str) -> Optional[dict]:\n \"\"\"\n PP1.2: Hardcoded Pipeline-IDs blockieren (außer als Konstante).\n\n BLOCKIERT:\n pipeline_id = 5\n run_pipeline(pipeline_id=3)\n\n ERLAUBT:\n DEFAULT_PIPELINE_ID = 5\n pipeline_id = args.pipeline_id\n pipeline_id = config.get(\"pipeline_id\")\n \"\"\"\n if not file_path.endswith(\".py\"):\n return None\n if is_in_allowlist(file_path, PYTHON_ALLOWLIST):\n return None\n\n lines = content.split('\\n')\n for line_num, line in enumerate(lines, 1):\n stripped = line.strip()\n\n # Skip Kommentare\n if stripped.startswith('#'):\n continue\n\n # Skip Konstanten-Definitionen\n if re.match(r'^[A-Z][A-Z0-9_]*\\s*=', stripped):\n continue\n\n # Skip wenn es eine Zuweisung von args\/config ist\n if 'args.' in line or 'config.' in line or 'get(' in line:\n continue\n\n # Suche nach hardcoded pipeline_id\n # Pattern: pipeline_id = 5 oder pipeline_id=5 (nicht als Teil eines größeren Ausdrucks)\n pattern = r'\\bpipeline_id\\s*=\\s*(\\d+)\\b'\n match = re.search(pattern, line)\n if match:\n pid = match.group(1)\n return block(\n \"PP1.2\",\n f\"Hardcoded pipeline_id={pid} at line {line_num}. \"\n f\"Use DEFAULT_PIPELINE_ID constant or accept via parameter.\"\n )\n\n return None\n\n\n# =============================================================================\n# RULE COLLECTION\n# =============================================================================\n\nRULES = [\n pp1_1_hardcoded_model_default,\n pp1_2_hardcoded_pipeline_id,\n]\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/tools\/ki-protokoll\/claude-hook\/quality\/pre_rules_python.py",
"content": "#!\/usr\/bin\/env python3\n\"\"\"\nPre-Hook Python Regeln (BLOCK) - Pipeline Code Quality.\n\nPP1.x: Hardcoded Values\nPP2.x: Config Patterns\n\nDiese Regeln prüfen Python-Dateien in \/var\/www\/scripts\/pipeline\/\nauf häufige Fehler wie hardcoded Model-Namen oder Pipeline-IDs.\n\"\"\"\n\nimport re\nfrom typing import Optional\nfrom .rule_base import block, is_in_allowlist\n\n\n# =============================================================================\n# ALLOWLIST\n# =============================================================================\n\nPYTHON_ALLOWLIST = [\n \"\/venv\/\",\n \"\/__pycache__\/\",\n \"\/tests\/\",\n \"\/test_\",\n \"_test.py\",\n]\n\n\n# =============================================================================\n# HARDCODED VALUES DETECTION\n# =============================================================================\n\n# Model-Namen die als hardcoded Default blockiert werden\nHARDCODED_MODELS = {\n \"mistral\",\n \"llama\",\n \"llama2\",\n \"llama3\",\n \"gemma\",\n \"gemma2\",\n \"gemma3\",\n \"phi\",\n \"phi3\",\n \"qwen\",\n \"qwen2\",\n \"claude\",\n \"gpt-4\",\n \"gpt-3.5\",\n \"minicpm\",\n \"minicpm-v\",\n \"nomic\",\n \"nomic-embed\",\n}\n\n\n# =============================================================================\n# PRÜFUNG PP1: HARDCODED VALUES\n# =============================================================================\n\ndef pp1_1_hardcoded_model_default(file_path: str, content: str) -> Optional[dict]:\n \"\"\"\n PP1.1: Hardcoded LLM Model-Namen als Default blockieren.\n\n BLOCKIERT:\n parser.add_argument(\"--model\", default=\"mistral\")\n def foo(model: str = \"gemma\"):\n\n ERLAUBT:\n parser.add_argument(\"--model\", default=None)\n model = get_pipeline_model(\"step_type\")\n DEFAULT_MODEL = \"mistral\" # Als Konstante OK\n # Kommentar: mistral ist gut\n \"\"\"\n if not file_path.endswith(\".py\"):\n return None\n if is_in_allowlist(file_path, PYTHON_ALLOWLIST):\n return None\n\n lines = content.split('\\n')\n for line_num, line in enumerate(lines, 1):\n stripped = line.strip()\n\n # Skip Kommentare\n if stripped.startswith('#'):\n continue\n\n # Skip Docstrings (vereinfacht)\n if stripped.startswith('\"\"\"') or stripped.startswith(\"'''\"):\n continue\n\n # Skip Konstanten-Definitionen (UPPER_CASE = \"value\")\n if re.match(r'^[A-Z][A-Z0-9_]*\\s*=', stripped):\n continue\n\n # Skip Listen\/Sets von erlaubten Werten\n if 'VALID_' in line or 'ALLOWED_' in line or 'HARDCODED_' in line:\n continue\n\n # Suche nach hardcoded model defaults\n for model in HARDCODED_MODELS:\n # Pattern 1: default=\"model\" oder default='model'\n pattern1 = rf'default\\s*=\\s*[\"\\']({model})[\"\\']'\n # Pattern 2: model: str = \"model\" (type hint mit default)\n pattern2 = rf'model\\s*:\\s*str\\s*=\\s*[\"\\']({model})[\"\\']'\n # Pattern 3: model=\"model\" als Keyword-Argument\n pattern3 = rf'\\bmodel\\s*=\\s*[\"\\']({model})[\"\\']'\n\n for pattern in [pattern1, pattern2, pattern3]:\n match = re.search(pattern, line, re.IGNORECASE)\n if match:\n return block(\n \"PP1.1\",\n f\"Hardcoded model name '{model}' at line {line_num}. \"\n f\"Use get_pipeline_model() to read from pipeline_steps config, \"\n f\"or define as UPPER_CASE constant.\"\n )\n\n return None\n\n\ndef pp1_2_hardcoded_pipeline_id(file_path: str, content: str) -> Optional[dict]:\n \"\"\"\n PP1.2: Hardcoded Pipeline-IDs blockieren (außer als Konstante).\n\n BLOCKIERT:\n pipeline_id = 5\n run_pipeline(pipeline_id=3)\n\n ERLAUBT:\n DEFAULT_PIPELINE_ID = 5\n pipeline_id = args.pipeline_id\n pipeline_id = config.get(\"pipeline_id\")\n \"\"\"\n if not file_path.endswith(\".py\"):\n return None\n if is_in_allowlist(file_path, PYTHON_ALLOWLIST):\n return None\n\n lines = content.split('\\n')\n for line_num, line in enumerate(lines, 1):\n stripped = line.strip()\n\n # Skip Kommentare\n if stripped.startswith('#'):\n continue\n\n # Skip Konstanten-Definitionen\n if re.match(r'^[A-Z][A-Z0-9_]*\\s*=', stripped):\n continue\n\n # Skip wenn es eine Zuweisung von args\/config ist\n if 'args.' in line or 'config.' in line or 'get(' in line:\n continue\n\n # Suche nach hardcoded pipeline_id\n # Pattern: pipeline_id = 5 oder pipeline_id=5 (nicht als Teil eines größeren Ausdrucks)\n pattern = r'\\bpipeline_id\\s*=\\s*(\\d+)\\b'\n match = re.search(pattern, line)\n if match:\n pid = match.group(1)\n return block(\n \"PP1.2\",\n f\"Hardcoded pipeline_id={pid} at line {line_num}. \"\n f\"Use DEFAULT_PIPELINE_ID constant or accept via parameter.\"\n )\n\n return None\n\n\n# =============================================================================\n# RULE COLLECTION\n# =============================================================================\n\nRULES = [\n pp1_1_hardcoded_model_default,\n pp1_2_hardcoded_pipeline_id,\n]\n",
"structuredPatch": [],
"originalFile": null
}
}