fix_chunk_status.py
- Pfad:
/var/www/scripts/pipeline/fix_chunk_status.py - Namespace: pipeline
- Zeilen: 57 | Größe: 1,582 Bytes
- Geändert: 2025-12-25 09:31:24 | Gescannt: 2025-12-31 10:22:15
Code Hygiene Score: 100
- Dependencies: 100 (25%)
- LOC: 100 (20%)
- Methods: 100 (20%)
- Secrets: 100 (15%)
- Classes: 100 (10%)
- Magic Numbers: 100 (10%)
Keine Issues gefunden.
Dependencies 4
- use os
- use sys
- use mysql.connector
- use config.DB_CONFIG
Funktionen 1
-
main()Zeile 13
Code
#!/usr/bin/env python3
"""Fix chunk status based on qdrant_id presence."""
import os
import sys
import mysql.connector
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from config import DB_CONFIG
def main():
print("Connecting to database...")
config = DB_CONFIG.copy()
config["database"] = "ki_content"
conn = mysql.connector.connect(**config)
cursor = conn.cursor(dictionary=True)
# Count chunks by status
cursor.execute("SELECT status, COUNT(*) as cnt FROM chunks GROUP BY status")
before = {row["status"]: row["cnt"] for row in cursor.fetchall()}
print(f"Before: {before}")
# Update chunks with qdrant_id to 'embedded'
cursor.execute("""
UPDATE chunks
SET status = 'embedded'
WHERE qdrant_id IS NOT NULL AND (status IS NULL OR status = 'created')
""")
embedded_count = cursor.rowcount
print(f"Set {embedded_count} chunks to 'embedded' (have qdrant_id)")
# Update chunks without qdrant_id to 'created'
cursor.execute("""
UPDATE chunks
SET status = 'created'
WHERE qdrant_id IS NULL AND (status IS NULL OR status = '')
""")
created_count = cursor.rowcount
print(f"Set {created_count} chunks to 'created' (no qdrant_id)")
conn.commit()
# Count after
cursor.execute("SELECT status, COUNT(*) as cnt FROM chunks GROUP BY status")
after = {row["status"]: row["cnt"] for row in cursor.fetchall()}
print(f"After: {after}")
cursor.close()
conn.close()
print("Done!")
if __name__ == "__main__":
main()