{
"event": "PreToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/backfill_text_semantics.py",
"offset": 220,
"limit": 80
}
}
{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/backfill_text_semantics.py",
"content": " discourse_role = VALUES(discourse_role),\n model_used = VALUES(model_used),\n updated_at = NOW()\"\"\",\n (\n chunk_id,\n semantics.get(\"statement_form\"),\n semantics.get(\"intent\"),\n semantics.get(\"frame\"),\n semantics.get(\"is_negated\", False),\n semantics.get(\"discourse_role\"),\n semantics.get(\"model_used\"),\n ),\n )\n db.commit()\n cursor.close()\n return True\n except Exception as e:\n db.log(\"ERROR\", f\"Backfill: Failed to store text semantics for chunk {chunk_id}: {e}\")\n return False\n\n\ndef main():\n \"\"\"Main entry point.\"\"\"\n parser = argparse.ArgumentParser(description=\"Backfill Text Semantics\")\n parser.add_argument(\"--limit\", type=int, default=0, help=\"Max chunks to process (0=all)\")\n parser.add_argument(\"--batch\", type=int, default=50, help=\"Batch size for progress output\")\n parser.add_argument(\"--model\", default=\"mistral\", help=\"Ollama model to use\")\n parser.add_argument(\"--dry-run\", action=\"store_true\", help=\"Just count, don't process\")\n\n args = parser.parse_args()\n\n db.connect()\n\n try:\n # Get pending chunks\n chunks = get_pending_chunks(args.limit)\n total = len(chunks)\n\n print(\"Text Semantics Backfill\")\n print(\"=\" * 50)\n print(f\"Pending chunks: {total}\")\n print(f\"Model: {args.model}\")\n print(f\"Batch size: {args.batch}\")\n\n if args.dry_run:\n print(\"\\nDry run - no processing\")\n return\n\n if total == 0:\n print(\"\\nNo pending chunks - all done!\")\n return\n\n print(\"\\nStarting analysis...\")\n print(\"-\" * 50)\n\n success = 0\n errors = 0\n start_time = time.time()\n\n for i, chunk in enumerate(chunks, 1):\n # Analyze\n semantics = analyze_chunk(chunk, args.model)\n\n if semantics:\n if store_semantics(chunk[\"id\"], semantics):\n success += 1\n else:\n errors += 1\n else:\n errors += 1\n\n # Progress output\n if i % args.batch == 0 or i == total:\n elapsed = time.time() - start_time\n rate = i \/ elapsed if elapsed > 0 else 0\n eta = (total - i) \/ rate if rate > 0 else 0\n print(f\" [{i}\/{total}] Success: {success}, Errors: {errors}, \"\n f\"Rate: {rate:.1f}\/s, ETA: {eta\/60:.1f}min\")\n\n # Final summary",
"numLines": 80,
"startLine": 220,
"totalLines": 314
}
}
}