{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/db.py",
"content": " def insert_document(self, file_path, title, file_type, file_size, file_hash):\n \"\"\"Insert a new document or update existing one.\"\"\"\n import os\n\n folder_path = os.path.dirname(file_path)\n cursor = self.execute(\n \"\"\"INSERT INTO documents\n (source_path, folder_path, filename, mime_type, file_size, file_hash, status)\n VALUES (%s, %s, %s, %s, %s, %s, 'processing')\n ON DUPLICATE KEY UPDATE\n file_hash = VALUES(file_hash),\n file_size = VALUES(file_size),\n status = 'processing',\n processed_at = NULL,\n error_message = NULL\"\"\",\n (file_path, folder_path, title, file_type, file_size, file_hash),\n )\n self.commit()\n doc_id = cursor.lastrowid\n # If ON DUPLICATE KEY UPDATE was triggered, lastrowid is 0\n if doc_id == 0:\n cursor_select = self.execute(\"SELECT id FROM documents WHERE source_path = %s\", (file_path,))\n result = cursor_select.fetchone()\n cursor_select.close()\n doc_id = result[\"id\"] if result else None\n cursor.close()\n return doc_id\n\n def update_document_status(self, doc_id, status, error_message=None):\n \"\"\"Update document processing status.\"\"\"\n if error_message:\n cursor = self.execute(\n \"\"\"UPDATE documents\n SET status = %s, error_message = %s, processed_at = NOW()\n WHERE id = %s\"\"\",\n (status, error_message, doc_id),\n )\n else:\n cursor = self.execute(\n \"UPDATE documents SET status = %s, processed_at = NOW() WHERE id = %s\", (status, doc_id)\n )\n self.commit()\n cursor.close()\n\n # Chunk Operations\n def insert_chunk(\n self, doc_id, chunk_index, content, heading_path, position_start=None, position_end=None, metadata=None\n ):\n \"\"\"Insert a text chunk.\"\"\"\n # Calculate token count (rough estimate: 4 chars per token)\n token_count = len(content) \/\/ 4\n cursor = self.execute(\n \"\"\"INSERT INTO chunks\n (document_id, chunk_index, content, token_count, heading_path, metadata)\n VALUES (%s, %s, %s, %s, %s, %s)\"\"\",\n (doc_id, chunk_index, content, token_count, heading_path, metadata),\n )\n self.commit()\n chunk_id = cursor.lastrowid\n cursor.close()\n return chunk_id\n\n def get_chunks_for_embedding(self, limit=100):\n \"\"\"Get chunks that need embeddings.\"\"\"\n cursor = self.execute(\n \"\"\"SELECT c.id, c.content, c.document_id\n FROM chunks c\n WHERE c.qdrant_id IS NULL\n ORDER BY c.created_at\n LIMIT %s\"\"\",\n (limit,),\n )\n results = cursor.fetchall()\n cursor.close()\n return results\n\n def update_chunk_qdrant_id(self, chunk_id, qdrant_id):\n \"\"\"Update chunk with Qdrant point ID.\"\"\"\n cursor = self.execute(\"UPDATE chunks SET qdrant_id = %s WHERE id = %s\", (qdrant_id, chunk_id))\n self.commit()",
"numLines": 80,
"startLine": 58,
"totalLines": 753
}
}
}