{
"event": "PreToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/var\/www\/migration\/content-pipeline\/src\/fix_canonical_names.py",
"limit": 80
}
}
{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/migration\/content-pipeline\/src\/fix_canonical_names.py",
"content": "#!\/usr\/bin\/env python3\n\"\"\"Fix missing canonical_name for all entities.\"\"\"\n\nimport os\nimport re\nimport sys\nimport unicodedata\n\nimport mysql.connector\n\n# Add pipeline directory to path\nsys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))\nfrom config import DB_CONFIG\n\n\ndef normalize_name(name: str) -> str:\n \"\"\"Generate canonical_name from entity name.\n\n Rules:\n - Lowercase\n - German umlauts: ä→ae, ö→oe, ü→ue, ß→ss\n - Replace spaces with underscores\n - Remove special characters except underscores\n - Collapse multiple underscores\n \"\"\"\n if not name:\n return \"\"\n\n # Lowercase\n result = name.lower()\n\n # German umlauts\n replacements = {\n \"ä\": \"ae\",\n \"ö\": \"oe\",\n \"ü\": \"ue\",\n \"ß\": \"ss\",\n \"Ä\": \"ae\",\n \"Ö\": \"oe\",\n \"Ü\": \"ue\",\n }\n for old, new in replacements.items():\n result = result.replace(old, new)\n\n # Normalize unicode (handle accents etc.)\n result = unicodedata.normalize(\"NFKD\", result)\n result = result.encode(\"ascii\", \"ignore\").decode(\"ascii\")\n\n # Replace spaces and hyphens with underscores\n result = re.sub(r\"[\\s\\-]+\", \"_\", result)\n\n # Remove all non-alphanumeric except underscores\n result = re.sub(r\"[^a-z0-9_]\", \"\", result)\n\n # Collapse multiple underscores\n result = re.sub(r\"_+\", \"_\", result)\n\n # Strip leading\/trailing underscores\n result = result.strip(\"_\")\n\n return result\n\n\ndef main():\n print(\"Connecting to database...\")\n # Override database to ki_content\n config = DB_CONFIG.copy()\n config[\"database\"] = \"ki_content\"\n conn = mysql.connector.connect(**config)\n cursor = conn.cursor(dictionary=True)\n\n # Get all entities without canonical_name\n cursor.execute(\"\"\"\n SELECT id, name, canonical_name\n FROM entities\n WHERE canonical_name IS NULL OR canonical_name = ''\n \"\"\")\n entities = cursor.fetchall()\n\n print(f\"Found {len(entities)} entities without canonical_name\")",
"numLines": 80,
"startLine": 1,
"totalLines": 118
}
}
}