{
"event": "PostToolUse",
"tool_name": "Read",
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/backfill_text_semantics.py",
"content": "def get_pending_chunks(limit: int = 0) -> list:\n \"\"\"Get chunks without text semantics.\"\"\"\n sql = \"\"\"\n SELECT c.id, c.content, c.document_id\n FROM chunks c\n LEFT JOIN chunk_text_semantics cts ON c.id = cts.chunk_id\n WHERE cts.id IS NULL\n ORDER BY c.id\n \"\"\"\n if limit > 0:\n sql += f\" LIMIT {limit}\"\n\n cursor = db.execute(sql)\n chunks = cursor.fetchall()\n cursor.close()\n return list(chunks)\n\n\ndef analyze_chunk(chunk: dict, model: str) -> dict | None:\n \"\"\"Analyze a single chunk with Ollama.\"\"\"\n try:\n prompt = PROMPT_TEMPLATE.format(content=chunk[\"content\"][:2000])\n\n response = ollama.generate(\n model=model,\n prompt=prompt,\n options={\"num_predict\": 200},\n )\n\n response_text = response[\"response\"].strip()\n\n # Robuste JSON-Extraktion\n data = extract_json(response_text)\n if data:\n data = validate_and_fix(data)\n data[\"model_used\"] = model\n return data\n\n except Exception as e:\n db.log(\"WARNING\", f\"Backfill: Text semantic analysis failed for chunk {chunk['id']}: {e}\")\n\n return None\n\n\ndef store_semantics(chunk_id: int, semantics: dict) -> bool:\n \"\"\"Store text semantics to database.\"\"\"\n try:\n cursor = db.execute(\n \"\"\"INSERT INTO chunk_text_semantics\n (chunk_id, statement_form, intent, frame, is_negated,\n discourse_role, model_used)\n VALUES (%s, %s, %s, %s, %s, %s, %s)\n ON DUPLICATE KEY UPDATE\n statement_form = VALUES(statement_form),\n intent = VALUES(intent),\n frame = VALUES(frame),\n is_negated = VALUES(is_negated),\n discourse_role = VALUES(discourse_role),\n model_used = VALUES(model_used),\n updated_at = NOW()\"\"\",",
"numLines": 60,
"startLine": 160,
"totalLines": 317
}
}
}