{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/generators\/persistence.py",
"content": "\"\"\"\nPersistence Functions - Save content versions and sources to database.\n\"\"\"\n\nimport json\nimport re\nimport sys\n\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom db import db\n\n\ndef strip_markdown(text: str) -> str:\n \"\"\"Remove markdown formatting from text for plain text output.\"\"\"\n # Remove bold **text** and __text__\n text = re.sub(r\"\\*\\*(.+?)\\*\\*\", r\"\\1\", text)\n text = re.sub(r\"__(.+?)__\", r\"\\1\", text)\n # Remove italic *text* and _text_ (single)\n text = re.sub(r\"(?<!\\*)\\*([^*]+?)\\*(?!\\*)\", r\"\\1\", text)\n text = re.sub(r\"(?<!_)_([^_]+?)_(?!_)\", r\"\\1\", text)\n # Remove headers # ## ###\n text = re.sub(r\"^#{1,6}\\s+\", \"\", text, flags=re.MULTILINE)\n # Remove inline code `text`\n text = re.sub(r\"`([^`]+?)`\", r\"\\1\", text)\n # Replace Gedankenstriche (en-dash, em-dash) - typically used as parenthetical\n # \" – \" becomes \". \" (new sentence) for cleaner flow\n text = text.replace(\" – \", \". \") # U+2013 en-dash with spaces\n text = text.replace(\" — \", \". \") # U+2014 em-dash with spaces\n # Standalone without spaces (rare)\n text = text.replace(\"–\", \"-\")\n text = text.replace(\"—\", \"-\")\n # Replace typographic quotation marks with straight quotes\n # German: „\" (U+201E, U+201C) and ‚' (U+201A, U+2018)\n # English: \"\" (U+201C, U+201D) and '' (U+2018, U+2019)\n text = text.replace(\"„\", '\"').replace(\"\"\", '\"').replace(\"\"\", '\"')\n text = text.replace(\"‚\", \"'\").replace(\"'\", \"'\").replace(\"'\", \"'\")\n return text\n\n\ndef save_version(order_id: int, content: str, version_number: int = 1, output_format: str = \"markdown\") -> int:\n \"\"\"Save content version to database.\"\"\"\n # Strip markdown for plain text format\n if output_format == \"reiner Text\":\n content = strip_markdown(content)\n\n content_json = json.dumps({\"text\": content, \"format\": output_format})\n\n cursor = db.execute(\n \"\"\"INSERT INTO content_versions (order_id, version_number, content)\n VALUES (%s, %s, %s)\"\"\",\n (order_id, version_number, content_json),\n )\n db.commit()\n version_id = cursor.lastrowid\n cursor.close()\n return version_id\n\n\ndef save_sources(order_id: int, context: list[dict]) -> None:\n \"\"\"Save RAG sources to content_sources.\"\"\"\n for ctx in context:\n # Try to find chunk_id by content match\n cursor = db.execute(\"SELECT id FROM chunks WHERE content LIKE %s LIMIT 1\", (ctx[\"content\"][:100] + \"%\",))\n chunk = cursor.fetchone()\n cursor.close()\n\n if chunk:\n cursor = db.execute(\n \"\"\"INSERT IGNORE INTO content_sources (order_id, chunk_id, relevance_score)\n VALUES (%s, %s, %s)\"\"\",\n (order_id, chunk[\"id\"], ctx[\"score\"]),\n )\n db.commit()\n cursor.close()\n\n\ndef update_order_status(order_id: int, status: str) -> None:\n \"\"\"Update order status.\"\"\"\n cursor = db.execute(\"UPDATE content_orders SET status = %s, updated_at = NOW() WHERE id = %s\", (status, order_id))\n db.commit()\n cursor.close()\n",
"numLines": 83,
"startLine": 1,
"totalLines": 83
}
}
}