{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/reset_pipeline.py",
"content": "#!\/usr\/bin\/env python3\n\"\"\"\nPipeline Reset Script\nClears all pipeline data from MariaDB and Qdrant.\n\nUsage:\n python reset_pipeline.py # Interactive confirmation\n python reset_pipeline.py --force # No confirmation\n\"\"\"\n\nimport argparse\nimport sys\n\nimport mysql.connector\nimport requests\n\nfrom config import DB_CONFIG, QDRANT_HOST, QDRANT_PORT\n\n# Tables to truncate in order (respects FK dependencies)\nTABLES_TO_CLEAR = [\n # Child tables first (depend on chunks\/documents\/entities)\n \"chunk_entities\",\n \"chunk_semantics\",\n \"chunk_taxonomy\",\n \"entity_relations\",\n \"entity_ontology\",\n \"entity_synonyms\",\n \"entity_semantics\",\n \"entity_taxonomy_mapping\",\n \"entity_classifications\",\n \"generated_questions\",\n \"document_knowledge\",\n \"section_knowledge\",\n \"page_knowledge\",\n \"document_pages\",\n \"document_sections\",\n \"document_taxonomy\",\n \"content_sources\",\n # Parent tables last\n \"chunks\",\n \"documents\",\n \"entities\",\n \"pipeline_queue\",\n \"pipeline_runs\",\n]\n\n# Foreign keys to temporarily drop\nFK_TO_DROP = [\n (\"chunks\", \"chunks_ibfk_1\"),\n (\"chunk_entities\", \"chunk_entities_ibfk_1\"),\n (\"chunk_semantics\", \"chunk_semantics_ibfk_1\"),\n (\"chunk_taxonomy\", \"chunk_taxonomy_ibfk_1\"),\n (\"entity_relations\", \"entity_relations_ibfk_1\"),\n (\"entity_relations\", \"entity_relations_ibfk_2\"),\n (\"entity_relations\", \"entity_relations_ibfk_3\"),\n (\"document_entities\", \"document_entities_ibfk_1\"),\n (\"document_entities\", \"document_entities_ibfk_2\"),\n (\"generated_questions\", \"generated_questions_ibfk_1\"),\n (\"generated_questions\", \"generated_questions_ibfk_2\"),\n (\"generated_questions\", \"generated_questions_ibfk_3\"),\n (\"document_knowledge\", \"document_knowledge_ibfk_1\"),\n (\"document_pages\", \"document_pages_ibfk_1\"),\n (\"document_sections\", \"document_sections_ibfk_1\"),\n (\"document_taxonomy\", \"document_taxonomy_ibfk_1\"),\n (\"section_knowledge\", \"section_knowledge_ibfk_1\"),\n (\"page_knowledge\", \"page_knowledge_ibfk_1\"),\n (\"content_sources\", \"content_sources_ibfk_2\"),\n]\n\n# Foreign keys to recreate\nFK_TO_CREATE = [\n (\"chunks\", \"chunks_ibfk_1\", \"document_id\", \"documents\", \"id\"),\n (\"chunk_entities\", \"chunk_entities_ibfk_1\", \"chunk_id\", \"chunks\", \"id\"),\n (\"chunk_semantics\", \"chunk_semantics_ibfk_1\", \"chunk_id\", \"chunks\", \"id\"),\n (\"chunk_taxonomy\", \"chunk_taxonomy_ibfk_1\", \"chunk_id\", \"chunks\", \"id\"),\n (\"entity_relations\", \"entity_relations_ibfk_1\", \"source_entity_id\", \"entities\", \"id\"),\n (\"entity_relations\", \"entity_relations_ibfk_2\", \"target_entity_id\", \"entities\", \"id\"),\n (\"entity_relations\", \"entity_relations_ibfk_3\", \"chunk_id\", \"chunks\", \"id\"),\n (\"document_entities\", \"document_entities_ibfk_1\", \"document_id\", \"documents\", \"id\"),\n (\"document_entities\", \"document_entities_ibfk_2\", \"entity_id\", \"entities\", \"id\"),\n (\"generated_questions\", \"generated_questions_ibfk_1\", \"document_id\", \"documents\", \"id\"),\n (\"generated_questions\", \"generated_questions_ibfk_2\", \"page_id\", \"document_pages\", \"id\"),\n (\"generated_questions\", \"generated_questions_ibfk_3\", \"chunk_id\", \"chunks\", \"id\"),\n (\"document_knowledge\", \"document_knowledge_ibfk_1\", \"document_id\", \"documents\", \"id\"),\n (\"document_pages\", \"document_pages_ibfk_1\", \"document_id\", \"documents\", \"id\"),\n (\"document_sections\", \"document_sections_ibfk_1\", \"document_id\", \"documents\", \"id\"),\n (\"document_taxonomy\", \"document_taxonomy_ibfk_1\", \"document_id\", \"documents\", \"id\"),\n (\"section_knowledge\", \"section_knowledge_ibfk_1\", \"section_id\", \"document_sections\", \"id\"),\n (\"page_knowledge\", \"page_knowledge_ibfk_1\", \"page_id\", \"document_pages\", \"id\"),\n (\"content_sources\", \"content_sources_ibfk_2\", \"chunk_id\", \"chunks\", \"id\"),\n]\n\n\ndef reset_mariadb():\n \"\"\"Reset all pipeline tables in MariaDB.\"\"\"\n print(\"\\n[1\/2] Resetting MariaDB...\")\n\n conn = mysql.connector.connect(**DB_CONFIG)\n cursor = conn.cursor()\n\n # Disable foreign key checks for clean truncation\n cursor.execute(\"SET FOREIGN_KEY_CHECKS = 0\")\n conn.commit()\n\n # Truncate tables\n print(\" Truncating tables...\")\n for table in TABLES_TO_CLEAR:\n try:\n cursor.execute(f\"TRUNCATE TABLE {table}\")\n print(f\" - {table} cleared\")\n except mysql.connector.Error as e:\n print(f\" - {table} skipped: {e.msg}\")\n conn.commit()\n\n # Re-enable foreign key checks\n cursor.execute(\"SET FOREIGN_KEY_CHECKS = 1\")\n conn.commit()\n\n # Verify\n cursor.execute(\"SELECT COUNT(*) FROM documents\")\n doc_count = cursor.fetchone()[0]\n cursor.execute(\"SELECT COUNT(*) FROM chunks\")\n chunk_count = cursor.fetchone()[0]\n cursor.execute(\"SELECT COUNT(*) FROM entities\")\n entity_count = cursor.fetchone()[0]\n\n cursor.close()\n conn.close()\n\n print(f\" MariaDB reset complete: {doc_count} documents, {chunk_count} chunks, {entity_count} entities\")\n return doc_count == 0 and chunk_count == 0 and entity_count == 0\n\n\ndef reset_qdrant():\n \"\"\"Reset Qdrant documents collection.\"\"\"\n print(\"\\n[2\/2] Resetting Qdrant...\")\n\n base_url = f\"http:\/\/{QDRANT_HOST}:{QDRANT_PORT}\"\n\n # Delete collection\n try:\n resp = requests.delete(f\"{base_url}\/collections\/documents\", timeout=10)\n if resp.status_code in (200, 404):\n print(\" Collection deleted\")\n except requests.RequestException as e:\n print(f\" Delete failed: {e}\")\n return False\n\n # Recreate collection\n try:\n resp = requests.put(\n f\"{base_url}\/collections\/documents\", json={\"vectors\": {\"size\": 1024, \"distance\": \"Cosine\"}}, timeout=10\n )\n if resp.status_code == 200:\n print(\" Collection recreated\")\n except requests.RequestException as e:\n print(f\" Recreate failed: {e}\")\n return False\n\n # Verify\n try:\n resp = requests.get(f\"{base_url}\/collections\/documents\", timeout=10)\n data = resp.json()\n points = data.get(\"result\", {}).get(\"points_count\", 0)\n print(f\" Qdrant reset complete: {points} points\")\n return points == 0\n except requests.RequestException:\n return False\n\n\ndef main():\n parser = argparse.ArgumentParser(description=\"Reset pipeline data\")\n parser.add_argument(\"--force\", action=\"store_true\", help=\"Skip confirmation\")\n args = parser.parse_args()\n\n print(\"=\" * 50)\n print(\"Pipeline Reset Script\")\n print(\"=\" * 50)\n print(\"\\nThis will DELETE all:\")\n print(\" - Documents and chunks from MariaDB\")\n print(\" - Embeddings from Qdrant\")\n print(\" - Pipeline runs and queue\")\n\n if not args.force:\n confirm = input(\"\\nAre you sure? Type 'yes' to confirm: \")\n if confirm.lower() != \"yes\":\n print(\"Aborted.\")\n sys.exit(0)\n\n db_ok = reset_mariadb()\n qdrant_ok = reset_qdrant()\n\n print(\"\\n\" + \"=\" * 50)\n if db_ok and qdrant_ok:\n print(\"Reset complete!\")\n else:\n print(\"Reset completed with warnings.\")\n print(\"=\" * 50)\n\n\nif __name__ == \"__main__\":\n main()\n",
"numLines": 203,
"startLine": 1,
"totalLines": 203
}
}
}