migrate_semantics.py
- Pfad:
/var/www/scripts/pipeline/migrate_semantics.py
- Namespace: pipeline
- Zeilen: 168 | Größe: 4,903 Bytes
- Geändert: 2025-12-31 03:01:09 | Gescannt: 2025-12-31 10:22:15
Code Hygiene Score: 96
- Dependencies: 100 (25%)
- LOC: 100 (20%)
- Methods: 100 (20%)
- Secrets: 100 (15%)
- Classes: 100 (10%)
- Magic Numbers: 60 (10%)
Issues 4
| Zeile |
Typ |
Beschreibung |
| 138 |
magic_number |
Magic Number gefunden: 60 |
| 140 |
magic_number |
Magic Number gefunden: 60 |
| 156 |
magic_number |
Magic Number gefunden: 60 |
| 158 |
magic_number |
Magic Number gefunden: 60 |
Dependencies 3
Funktionen 4
-
migrate_chunk_text_semantics()
Zeile 16
-
migrate_entity_knowledge_semantics()
Zeile 61
-
verify_migration()
Zeile 115
-
main()
Zeile 136
Code
#!/usr/bin/env python3
"""
Migration Script: Unify semantic tables.
Phase 1 of semantic infrastructure cleanup.
"""
import os
import sys
# Add parent directory for imports
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from db import db
def migrate_chunk_text_semantics():
"""Migrate data from chunk_text_semantics to chunk_semantics."""
print("Migrating chunk_text_semantics -> chunk_semantics...")
# Get all chunk_text_semantics records
cursor = db.execute("""
SELECT chunk_id, statement_form, intent, frame, is_negated, discourse_role
FROM chunk_text_semantics
""")
records = cursor.fetchall()
cursor.close()
print(f" Found {len(records)} records to migrate")
migrated = 0
for rec in records:
try:
db.execute(
"""
UPDATE chunk_semantics
SET statement_form = %s,
intent = %s,
frame = %s,
is_negated = %s,
discourse_role = %s
WHERE chunk_id = %s
""",
(
rec["statement_form"],
rec["intent"],
rec["frame"],
rec["is_negated"],
rec["discourse_role"],
rec["chunk_id"],
),
)
migrated += 1
except Exception as e:
print(f" Error migrating chunk_id {rec['chunk_id']}: {e}")
db.commit()
print(f" Migrated: {migrated}")
return migrated
def migrate_entity_knowledge_semantics():
"""Migrate data from entity_knowledge_semantics to entity_semantics."""
print("Migrating entity_knowledge_semantics -> entity_semantics...")
# Get all entity_knowledge_semantics records
cursor = db.execute("""
SELECT entity_id, chunk_id, semantic_role, properties,
functional_category, context_meaning, confidence, model_used
FROM entity_knowledge_semantics
""")
records = cursor.fetchall()
cursor.close()
print(f" Found {len(records)} records to migrate")
migrated = 0
for rec in records:
try:
# Insert or update entity_semantics with all fields
db.execute(
"""
INSERT INTO entity_semantics
(entity_id, chunk_id, context, semantic_role, properties,
functional_category, confidence, model_used)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
chunk_id = VALUES(chunk_id),
context = VALUES(context),
semantic_role = VALUES(semantic_role),
properties = VALUES(properties),
functional_category = VALUES(functional_category),
confidence = VALUES(confidence),
model_used = VALUES(model_used)
""",
(
rec["entity_id"],
rec["chunk_id"],
rec["context_meaning"],
rec["semantic_role"],
rec["properties"],
rec["functional_category"],
rec["confidence"],
rec["model_used"],
),
)
migrated += 1
except Exception as e:
print(f" Error migrating entity_id {rec['entity_id']}: {e}")
db.commit()
print(f" Migrated: {migrated}")
return migrated
def verify_migration():
"""Verify migration was successful."""
print("\nVerification:")
# Check chunk_semantics
cursor = db.execute("""
SELECT COUNT(*) as total,
SUM(CASE WHEN statement_form IS NOT NULL THEN 1 ELSE 0 END) as with_form
FROM chunk_semantics
""")
result = cursor.fetchone()
cursor.close()
print(f" chunk_semantics: {result['with_form']}/{result['total']} have statement_form")
# Check entity_semantics
cursor = db.execute("SELECT COUNT(*) as cnt FROM entity_semantics")
result = cursor.fetchone()
cursor.close()
print(f" entity_semantics: {result['cnt']} records")
def main():
"""Run migration."""
print("=" * 60)
print("Semantic Infrastructure Migration")
print("=" * 60)
if not db.connect():
print("ERROR: Could not connect to database")
return 1
try:
# Step 1: Migrate chunk semantics
migrate_chunk_text_semantics()
# Step 2: Migrate entity semantics
migrate_entity_knowledge_semantics()
# Verify
verify_migration()
print("\n" + "=" * 60)
print("Migration complete!")
print("=" * 60)
finally:
db.disconnect()
return 0
if __name__ == "__main__":
sys.exit(main())