Protokoll #19017

ID	19017
Zeitstempel	2025-12-26 23:25:36.454640
Client	root
IP	145.224.72.140
Modell	claude-sonnet-4-20250514
Status	pending
Tokens	907 (Input: 907, Output: 0)
Dauer	-
Request-Zeit	2025-12-26 23:25:36.454640
Response-Zeit	-
Request

{
    "event": "PostToolUse",
    "tool_name": "Read",
    "tool_response": {
        "type": "text",
        "file": {
            "filePath": "\/var\/www\/scripts\/pipeline\/analyzers\/entity_normalizer.py",
            "content": "\"\"\"\nEntity Type Normalizer\nDeterministic rules for entity type assignment.\nReads rules from config\/entity_type_rules.yaml.\n\"\"\"\n\nimport re\nfrom pathlib import Path\n\nimport yaml\n\n\nclass EntityNormalizer:\n    \"\"\"Normalizes entity types based on deterministic rules.\"\"\"\n\n    def __init__(self, rules_path: str | None = None):\n        if rules_path is None:\n            rules_path = Path(__file__).parent.parent \/ \"config\" \/ \"entity_type_rules.yaml\"\n\n        self.rules_path = Path(rules_path)\n        self.rules = self._load_rules()\n\n        # Build lookup structures\n        self._explicit_map: dict[str, str] = {}\n        self._pattern_rules: list[tuple[re.Pattern, str]] = []\n        self._stopwords: set[str] = set()\n        self._default_type = \"CONCEPT\"\n\n        self._build_lookups()\n\n    def _load_rules(self) -> dict:\n        \"\"\"Load rules from YAML file.\"\"\"\n        if not self.rules_path.exists():\n            return {}\n\n        with open(self.rules_path, encoding=\"utf-8\") as f:\n            return yaml.safe_load(f) or {}\n\n    def _build_lookups(self) -> None:\n        \"\"\"Build efficient lookup structures from rules.\"\"\"\n        # Explicit mappings (case-insensitive lookup)\n        for entity_type, names in self.rules.get(\"explicit_mappings\", {}).items():\n            for name in names:\n                self._explicit_map[name.lower()] = entity_type\n\n        # Pattern rules (compile regexes)\n        for entity_type, patterns in self.rules.get(\"pattern_rules\", {}).items():\n            for pattern in patterns:\n                try:\n                    compiled = re.compile(pattern, re.IGNORECASE)\n                    self._pattern_rules.append((compiled, entity_type))\n                except re.error:\n                    pass\n\n        # Stopwords\n        self._stopwords = set(w.lower() for w in self.rules.get(\"stopwords\", []))\n\n        # Default type\n        self._default_type = self.rules.get(\"default_type\", \"CONCEPT\")\n\n    def is_stopword(self, name: str) -> bool:\n        \"\"\"Check if entity name is a stopword.\"\"\"\n        return name.lower() in self._stopwords\n\n    def normalize_type(self, name: str, llm_type: str | None = None) -> str:\n        \"\"\"\n        Determine the correct type for an entity.\n\n        Priority:\n        1. Explicit mapping (highest)\n        2. Pattern rules\n        3. LLM suggestion (if valid)\n        4. Default type\n        \"\"\"\n        name_lower = name.lower()\n\n        # 1. Check explicit mapping\n        if name_lower in self._explicit_map:\n            return self._explicit_map[name_lower]\n\n        # 2. Check pattern rules\n        for pattern, entity_type in self._pattern_rules:\n            if pattern.search(name):\n                return entity_type\n\n        # 3. Use LLM type if valid\n        valid_types = {\n            \"PERSON\", \"ROLE\", \"ORGANIZATION\", \"LOCATION\",\n            \"THEORY\", \"METHOD\", \"MODEL\", \"CONCEPT\",\n            \"ARTIFACT\", \"METAPHOR\", \"PRINCIPLE\", \"TOOL\", \"EVENT\", \"OTHER\"\n        }\n        if llm_type and llm_type.upper() in valid_types:\n            return llm_type.upper()\n\n        # 4. Default\n        return self._default_type\n\n    def normalize_entity(self, entity: dict) -> dict | None:\n        \"\"\"\n        Normalize a single entity.",
            "numLines": 100,
            "startLine": 1,
            "totalLines": 175
        }
    }
}
Response

← Vorheriger Zur Liste Nächster →