fix_chunk_status.py

Code Hygiene Score: 100

Keine Issues gefunden.

Dependencies 4

Funktionen 1

Code

#!/usr/bin/env python3
"""Fix chunk status based on qdrant_id presence."""

import os
import sys

import mysql.connector

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from config import DB_CONFIG


def main():
    print("Connecting to database...")
    config = DB_CONFIG.copy()
    config["database"] = "ki_content"
    conn = mysql.connector.connect(**config)
    cursor = conn.cursor(dictionary=True)

    # Count chunks by status
    cursor.execute("SELECT status, COUNT(*) as cnt FROM chunks GROUP BY status")
    before = {row["status"]: row["cnt"] for row in cursor.fetchall()}
    print(f"Before: {before}")

    # Update chunks with qdrant_id to 'embedded'
    cursor.execute("""
        UPDATE chunks
        SET status = 'embedded'
        WHERE qdrant_id IS NOT NULL AND (status IS NULL OR status = 'created')
    """)
    embedded_count = cursor.rowcount
    print(f"Set {embedded_count} chunks to 'embedded' (have qdrant_id)")

    # Update chunks without qdrant_id to 'created'
    cursor.execute("""
        UPDATE chunks
        SET status = 'created'
        WHERE qdrant_id IS NULL AND (status IS NULL OR status = '')
    """)
    created_count = cursor.rowcount
    print(f"Set {created_count} chunks to 'created' (no qdrant_id)")

    conn.commit()

    # Count after
    cursor.execute("SELECT status, COUNT(*) as cnt FROM chunks GROUP BY status")
    after = {row["status"]: row["cnt"] for row in cursor.fetchall()}
    print(f"After: {after}")

    cursor.close()
    conn.close()
    print("Done!")


if __name__ == "__main__":
    main()
← Übersicht