{
"event": "PreToolUse",
"tool_name": "Edit",
"tool_input": {
"file_path": "\/var\/www\/migration\/content-pipeline\/src\/fix_canonical_names.py",
"old_string": "def main():\n print(\"Connecting to database...\")\n # Override database to ki_content\n config = DB_CONFIG.copy()\n config[\"database\"] = \"ki_content\"\n conn = mysql.connector.connect(**config)",
"new_string": "def main():\n print(\"Connecting to database...\")\n # Uses DB_CONFIG from config.py (configured via environment)\n conn = mysql.connector.connect(**DB_CONFIG)"
}
}
{
"tool_response": {
"filePath": "\/var\/www\/migration\/content-pipeline\/src\/fix_canonical_names.py",
"oldString": "def main():\n print(\"Connecting to database...\")\n # Override database to ki_content\n config = DB_CONFIG.copy()\n config[\"database\"] = \"ki_content\"\n conn = mysql.connector.connect(**config)",
"newString": "def main():\n print(\"Connecting to database...\")\n # Uses DB_CONFIG from config.py (configured via environment)\n conn = mysql.connector.connect(**DB_CONFIG)",
"originalFile": "#!\/usr\/bin\/env python3\n\"\"\"Fix missing canonical_name for all entities.\"\"\"\n\nimport os\nimport re\nimport sys\nimport unicodedata\n\nimport mysql.connector\n\n# Add pipeline directory to path\nsys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))\nfrom config import DB_CONFIG\n\n\ndef normalize_name(name: str) -> str:\n \"\"\"Generate canonical_name from entity name.\n\n Rules:\n - Lowercase\n - German umlauts: ä→ae, ö→oe, ü→ue, ß→ss\n - Replace spaces with underscores\n - Remove special characters except underscores\n - Collapse multiple underscores\n \"\"\"\n if not name:\n return \"\"\n\n # Lowercase\n result = name.lower()\n\n # German umlauts\n replacements = {\n \"ä\": \"ae\",\n \"ö\": \"oe\",\n \"ü\": \"ue\",\n \"ß\": \"ss\",\n \"Ä\": \"ae\",\n \"Ö\": \"oe\",\n \"Ü\": \"ue\",\n }\n for old, new in replacements.items():\n result = result.replace(old, new)\n\n # Normalize unicode (handle accents etc.)\n result = unicodedata.normalize(\"NFKD\", result)\n result = result.encode(\"ascii\", \"ignore\").decode(\"ascii\")\n\n # Replace spaces and hyphens with underscores\n result = re.sub(r\"[\\s\\-]+\", \"_\", result)\n\n # Remove all non-alphanumeric except underscores\n result = re.sub(r\"[^a-z0-9_]\", \"\", result)\n\n # Collapse multiple underscores\n result = re.sub(r\"_+\", \"_\", result)\n\n # Strip leading\/trailing underscores\n result = result.strip(\"_\")\n\n return result\n\n\ndef main():\n print(\"Connecting to database...\")\n # Override database to ki_content\n config = DB_CONFIG.copy()\n config[\"database\"] = \"ki_content\"\n conn = mysql.connector.connect(**config)\n cursor = conn.cursor(dictionary=True)\n\n # Get all entities without canonical_name\n cursor.execute(\"\"\"\n SELECT id, name, canonical_name\n FROM entities\n WHERE canonical_name IS NULL OR canonical_name = ''\n \"\"\")\n entities = cursor.fetchall()\n\n print(f\"Found {len(entities)} entities without canonical_name\")\n\n if not entities:\n print(\"Nothing to do!\")\n return\n\n # Update each entity\n updated = 0\n for entity in entities:\n canonical = normalize_name(entity[\"name\"])\n if canonical:\n cursor.execute(\"UPDATE entities SET canonical_name = %s WHERE id = %s\", (canonical, entity[\"id\"]))\n updated += 1\n if updated <= 10:\n print(f\" [{entity['id']}] {entity['name']} -> {canonical}\")\n\n if updated > 10:\n print(f\" ... and {updated - 10} more\")\n\n conn.commit()\n print(f\"\\nUpdated {updated} entities with canonical_name\")\n\n # Also update status to 'normalized' for entities with canonical_name\n cursor.execute(\"\"\"\n UPDATE entities\n SET status = 'normalized'\n WHERE canonical_name IS NOT NULL AND canonical_name != ''\n \"\"\")\n conn.commit()\n print(\"Set status='normalized' for all entities with canonical_name\")\n\n cursor.close()\n conn.close()\n print(\"Done!\")\n\n\nif __name__ == \"__main__\":\n main()\n",
"structuredPatch": [
{
"oldStart": 63,
"oldLines": 10,
"newStart": 63,
"newLines": 8,
"lines": [
" ",
" def main():",
" print(\"Connecting to database...\")",
"- # Override database to ki_content",
"- config = DB_CONFIG.copy()",
"- config[\"database\"] = \"ki_content\"",
"- conn = mysql.connector.connect(**config)",
"+ # Uses DB_CONFIG from config.py (configured via environment)",
"+ conn = mysql.connector.connect(**DB_CONFIG)",
" cursor = conn.cursor(dictionary=True)",
" ",
" # Get all entities without canonical_name"
]
}
],
"userModified": false,
"replaceAll": false
}
}