{
"event": "PreToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/backfill_text_semantics.py",
"limit": 50
}
}
{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/backfill_text_semantics.py",
"content": "#!\/usr\/bin\/env python3\n\"\"\"\nBackfill Text Semantics for existing chunks.\n\nAnalyzes all chunks that don't have text_semantics yet.\nResume-capable: skips already analyzed chunks.\n\nUsage:\n python backfill_text_semantics.py # Process all pending\n python backfill_text_semantics.py --limit 100 # Process max 100\n python backfill_text_semantics.py --batch 50 # Batch size 50\n python backfill_text_semantics.py --dry-run # Just count, don't process\n\"\"\"\n\nimport argparse\nimport json\nimport time\n\nimport ollama\n\nfrom db import db\nfrom json_utils import extract_json\n\n# Pipeline-ID für Wissenschaftliche Pipeline\nDEFAULT_PIPELINE_ID = 5\n\n\ndef get_pipeline_model(step_type: str, pipeline_id: int = DEFAULT_PIPELINE_ID) -> str:\n \"\"\"Get model from pipeline_steps config - NO HARDCODED DEFAULTS.\"\"\"\n cursor = db.execute(\n \"\"\"SELECT config FROM pipeline_steps\n WHERE pipeline_id = %s AND step_type = %s AND enabled = 1\n LIMIT 1\"\"\",\n (pipeline_id, step_type),\n )\n row = cursor.fetchone()\n cursor.close()\n\n if row and row.get(\"config\"):\n try:\n config = json.loads(row[\"config\"])\n model = config.get(\"model\")\n if model:\n return model\n except json.JSONDecodeError:\n pass\n\n raise ValueError(f\"No model configured for step_type={step_type} in pipeline {pipeline_id}\")\n\n",
"numLines": 50,
"startLine": 1,
"totalLines": 317
}
}
}