migrate_semantics.py

Code Hygiene Score: 96

Issues 4

Zeile Typ Beschreibung
138 magic_number Magic Number gefunden: 60
140 magic_number Magic Number gefunden: 60
156 magic_number Magic Number gefunden: 60
158 magic_number Magic Number gefunden: 60

Dependencies 3

Funktionen 4

Code

#!/usr/bin/env python3
"""
Migration Script: Unify semantic tables.
Phase 1 of semantic infrastructure cleanup.
"""

import os
import sys

# Add parent directory for imports
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from db import db


def migrate_chunk_text_semantics():
    """Migrate data from chunk_text_semantics to chunk_semantics."""
    print("Migrating chunk_text_semantics -> chunk_semantics...")

    # Get all chunk_text_semantics records
    cursor = db.execute("""
        SELECT chunk_id, statement_form, intent, frame, is_negated, discourse_role
        FROM chunk_text_semantics
    """)
    records = cursor.fetchall()
    cursor.close()

    print(f"  Found {len(records)} records to migrate")

    migrated = 0
    for rec in records:
        try:
            db.execute(
                """
                UPDATE chunk_semantics
                SET statement_form = %s,
                    intent = %s,
                    frame = %s,
                    is_negated = %s,
                    discourse_role = %s
                WHERE chunk_id = %s
            """,
                (
                    rec["statement_form"],
                    rec["intent"],
                    rec["frame"],
                    rec["is_negated"],
                    rec["discourse_role"],
                    rec["chunk_id"],
                ),
            )
            migrated += 1
        except Exception as e:
            print(f"  Error migrating chunk_id {rec['chunk_id']}: {e}")

    db.commit()
    print(f"  Migrated: {migrated}")
    return migrated


def migrate_entity_knowledge_semantics():
    """Migrate data from entity_knowledge_semantics to entity_semantics."""
    print("Migrating entity_knowledge_semantics -> entity_semantics...")

    # Get all entity_knowledge_semantics records
    cursor = db.execute("""
        SELECT entity_id, chunk_id, semantic_role, properties,
               functional_category, context_meaning, confidence, model_used
        FROM entity_knowledge_semantics
    """)
    records = cursor.fetchall()
    cursor.close()

    print(f"  Found {len(records)} records to migrate")

    migrated = 0
    for rec in records:
        try:
            # Insert or update entity_semantics with all fields
            db.execute(
                """
                INSERT INTO entity_semantics
                    (entity_id, chunk_id, context, semantic_role, properties,
                     functional_category, confidence, model_used)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
                ON DUPLICATE KEY UPDATE
                    chunk_id = VALUES(chunk_id),
                    context = VALUES(context),
                    semantic_role = VALUES(semantic_role),
                    properties = VALUES(properties),
                    functional_category = VALUES(functional_category),
                    confidence = VALUES(confidence),
                    model_used = VALUES(model_used)
            """,
                (
                    rec["entity_id"],
                    rec["chunk_id"],
                    rec["context_meaning"],
                    rec["semantic_role"],
                    rec["properties"],
                    rec["functional_category"],
                    rec["confidence"],
                    rec["model_used"],
                ),
            )
            migrated += 1
        except Exception as e:
            print(f"  Error migrating entity_id {rec['entity_id']}: {e}")

    db.commit()
    print(f"  Migrated: {migrated}")
    return migrated


def verify_migration():
    """Verify migration was successful."""
    print("\nVerification:")

    # Check chunk_semantics
    cursor = db.execute("""
        SELECT COUNT(*) as total,
               SUM(CASE WHEN statement_form IS NOT NULL THEN 1 ELSE 0 END) as with_form
        FROM chunk_semantics
    """)
    result = cursor.fetchone()
    cursor.close()
    print(f"  chunk_semantics: {result['with_form']}/{result['total']} have statement_form")

    # Check entity_semantics
    cursor = db.execute("SELECT COUNT(*) as cnt FROM entity_semantics")
    result = cursor.fetchone()
    cursor.close()
    print(f"  entity_semantics: {result['cnt']} records")


def main():
    """Run migration."""
    print("=" * 60)
    print("Semantic Infrastructure Migration")
    print("=" * 60)

    if not db.connect():
        print("ERROR: Could not connect to database")
        return 1

    try:
        # Step 1: Migrate chunk semantics
        migrate_chunk_text_semantics()

        # Step 2: Migrate entity semantics
        migrate_entity_knowledge_semantics()

        # Verify
        verify_migration()

        print("\n" + "=" * 60)
        print("Migration complete!")
        print("=" * 60)

    finally:
        db.disconnect()

    return 0


if __name__ == "__main__":
    sys.exit(main())
← Übersicht