{
"event": "PostToolUse",
"tool_name": "Read",
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/analyzers\/entity_normalizer.py",
"content": "\"\"\"\nEntity Type Normalizer\nDeterministic rules for entity type assignment.\nReads rules from config\/entity_type_rules.yaml.\n\"\"\"\n\nimport re\nfrom pathlib import Path\n\nimport yaml\n\n\nclass EntityNormalizer:\n \"\"\"Normalizes entity types based on deterministic rules.\"\"\"\n\n def __init__(self, rules_path: str | None = None):\n if rules_path is None:\n rules_path = Path(__file__).parent.parent \/ \"config\" \/ \"entity_type_rules.yaml\"\n\n self.rules_path = Path(rules_path)\n self.rules = self._load_rules()\n\n # Build lookup structures\n self._explicit_map: dict[str, str] = {}\n self._pattern_rules: list[tuple[re.Pattern, str]] = []\n self._stopwords: set[str] = set()\n self._default_type = \"CONCEPT\"\n\n self._build_lookups()\n\n def _load_rules(self) -> dict:\n \"\"\"Load rules from YAML file.\"\"\"\n if not self.rules_path.exists():\n return {}\n\n with open(self.rules_path, encoding=\"utf-8\") as f:\n return yaml.safe_load(f) or {}\n\n def _build_lookups(self) -> None:\n \"\"\"Build efficient lookup structures from rules.\"\"\"\n # Explicit mappings (case-insensitive lookup)\n for entity_type, names in self.rules.get(\"explicit_mappings\", {}).items():\n for name in names:\n self._explicit_map[name.lower()] = entity_type\n\n # Pattern rules (compile regexes)\n for entity_type, patterns in self.rules.get(\"pattern_rules\", {}).items():\n for pattern in patterns:\n try:\n compiled = re.compile(pattern, re.IGNORECASE)\n self._pattern_rules.append((compiled, entity_type))\n except re.error:\n pass\n\n # Stopwords\n self._stopwords = set(w.lower() for w in self.rules.get(\"stopwords\", []))\n\n # Default type\n self._default_type = self.rules.get(\"default_type\", \"CONCEPT\")\n\n def is_stopword(self, name: str) -> bool:\n \"\"\"Check if entity name is a stopword.\"\"\"\n return name.lower() in self._stopwords\n\n def normalize_type(self, name: str, llm_type: str | None = None) -> str:\n \"\"\"\n Determine the correct type for an entity.\n\n Priority:\n 1. Explicit mapping (highest)\n 2. Pattern rules\n 3. LLM suggestion (if valid)\n 4. Default type\n \"\"\"\n name_lower = name.lower()\n\n # 1. Check explicit mapping\n if name_lower in self._explicit_map:\n return self._explicit_map[name_lower]\n\n # 2. Check pattern rules\n for pattern, entity_type in self._pattern_rules:\n if pattern.search(name):\n return entity_type\n\n # 3. Use LLM type if valid\n valid_types = {\n \"PERSON\", \"ROLE\", \"ORGANIZATION\", \"LOCATION\",\n \"THEORY\", \"METHOD\", \"MODEL\", \"CONCEPT\",\n \"ARTIFACT\", \"METAPHOR\", \"PRINCIPLE\", \"TOOL\", \"EVENT\", \"OTHER\"\n }\n if llm_type and llm_type.upper() in valid_types:\n return llm_type.upper()\n\n # 4. Default\n return self._default_type\n\n def normalize_entity(self, entity: dict) -> dict | None:\n \"\"\"\n Normalize a single entity.",
"numLines": 100,
"startLine": 1,
"totalLines": 175
}
}
}