{
"tool_response": {
"filePath": "\/var\/www\/migration\/content-pipeline\/src\/generators\/persistence.py",
"oldString": "\"\"\"\nimport os\nPersistence Functions - Save content versions and sources to database.\n\"\"\"\n\nimport json\nimport re\nimport sys\n\nsys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))\n\nfrom db import db",
"newString": "\"\"\"\nPersistence Functions - Save content versions and sources to database.\n\"\"\"\n\nimport json\nimport os\nimport re\nimport sys\n\nsys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))\n\nfrom db import db\nfrom enums import ContentOrderStatus",
"originalFile": "\"\"\"\nimport os\nPersistence Functions - Save content versions and sources to database.\n\"\"\"\n\nimport json\nimport re\nimport sys\n\nsys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))\n\nfrom db import db\n\n\ndef strip_markdown(text: str) -> str:\n \"\"\"Remove markdown formatting from text for plain text output.\"\"\"\n # Remove bold **text** and __text__\n text = re.sub(r\"\\*\\*(.+?)\\*\\*\", r\"\\1\", text)\n text = re.sub(r\"__(.+?)__\", r\"\\1\", text)\n # Remove italic *text* and _text_ (single)\n text = re.sub(r\"(?<!\\*)\\*([^*]+?)\\*(?!\\*)\", r\"\\1\", text)\n text = re.sub(r\"(?<!_)_([^_]+?)_(?!_)\", r\"\\1\", text)\n # Remove headers # ## ###\n text = re.sub(r\"^#{1,6}\\s+\", \"\", text, flags=re.MULTILINE)\n # Remove inline code `text`\n text = re.sub(r\"`([^`]+?)`\", r\"\\1\", text)\n # Replace Gedankenstriche (en-dash, em-dash) - typically used as parenthetical\n # \" – \" becomes \". \" (new sentence) for cleaner flow\n text = text.replace(\" – \", \". \") # U+2013 en-dash with spaces\n text = text.replace(\" — \", \". \") # U+2014 em-dash with spaces\n # Standalone without spaces (rare)\n text = text.replace(\"–\", \"-\")\n text = text.replace(\"—\", \"-\")\n # Replace typographic quotation marks with straight quotes\n # German: „\" (U+201E, U+201C) and ‚' (U+201A, U+2018)\n # English: \"\" (U+201C, U+201D) and '' (U+2018, U+2019)\n text = text.replace(\"\\u201e\", '\"').replace(\"\\u201c\", '\"').replace(\"\\u201d\", '\"')\n text = text.replace(\"\\u201a\", \"'\").replace(\"\\u2018\", \"'\").replace(\"\\u2019\", \"'\")\n return text\n\n\ndef save_version(order_id: int, content: str, version_number: int = 1, output_format: str = \"markdown\") -> int:\n \"\"\"Save content version to database.\"\"\"\n # Strip markdown for plain text format\n if output_format == \"reiner Text\":\n content = strip_markdown(content)\n\n content_json = json.dumps({\"text\": content, \"format\": output_format})\n\n cursor = db.execute(\n \"\"\"INSERT INTO content_versions (order_id, version_number, content)\n VALUES (%s, %s, %s)\"\"\",\n (order_id, version_number, content_json),\n )\n db.commit()\n version_id = cursor.lastrowid\n cursor.close()\n return version_id\n\n\ndef save_sources(order_id: int, context: list[dict]) -> None:\n \"\"\"Save RAG sources to content_sources.\"\"\"\n for ctx in context:\n # Try to find chunk_id by content match\n cursor = db.execute(\"SELECT id FROM chunks WHERE content LIKE %s LIMIT 1\", (ctx[\"content\"][:100] + \"%\",))\n chunk = cursor.fetchone()\n cursor.close()\n\n if chunk:\n cursor = db.execute(\n \"\"\"INSERT IGNORE INTO content_sources (order_id, chunk_id, relevance_score)\n VALUES (%s, %s, %s)\"\"\",\n (order_id, chunk[\"id\"], ctx[\"score\"]),\n )\n db.commit()\n cursor.close()\n\n\ndef update_order_status(order_id: int, status: str) -> None:\n \"\"\"Update order status.\"\"\"\n cursor = db.execute(\"UPDATE content_orders SET status = %s, updated_at = NOW() WHERE id = %s\", (status, order_id))\n db.commit()\n cursor.close()\n",
"structuredPatch": [
{
"oldStart": 1,
"oldLines": 15,
"newStart": 1,
"newLines": 16,
"lines": [
" \"\"\"",
"-import os",
" Persistence Functions - Save content versions and sources to database.",
" \"\"\"",
" ",
" import json",
"+import os",
" import re",
" import sys",
" ",
" sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))",
" ",
" from db import db",
"+from enums import ContentOrderStatus",
" ",
" ",
" def strip_markdown(text: str) -> str:"
]
}
],
"userModified": false,
"replaceAll": false
}
}