{
"event": "PreToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/analyzers\/entity_extractor.py",
"offset": 80,
"limit": 50
}
}
{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/analyzers\/entity_extractor.py",
"content": " return False\n # Exact match required\n return entity_name in source_text\n\n\ndef _normalize_entity_response(result: dict, source_text: str) -> list[dict]:\n \"\"\"Normalize entity response to standard format with validation.\n\n Handles two formats:\n 1. New: {\"persons\":[], \"roles\":[], ...}\n 2. Legacy: {\"entities\": [...]}\n\n Also validates entities against source text to filter hallucinations.\n \"\"\"\n entities = []\n\n # Check for legacy format\n if \"entities\" in result:\n legacy_entities = result.get(\"entities\", [])\n # Validate legacy entities too\n for e in legacy_entities:\n if isinstance(e, dict) and \"name\" in e:\n if _validate_entity_in_text(e[\"name\"], source_text):\n entities.append(e)\n return entities\n\n # New categorized format\n for category, items in result.items():\n if not isinstance(items, list):\n continue\n\n entity_type = CATEGORY_TYPE_MAP.get(category, category.upper())\n\n for item in items:\n if not item or not isinstance(item, str):\n continue\n\n # Strict validation: entity must appear EXACTLY in source text\n if not _validate_entity_in_text(item, source_text):\n continue # Skip hallucinations\n\n entities.append({\n \"name\": item,\n \"type\": entity_type,\n \"description\": None,\n })\n\n return entities\n\n",
"numLines": 50,
"startLine": 80,
"totalLines": 460
}
}
}