{
"event": "PreToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/analyzers\/entity_extractor.py",
"limit": 80
}
}
{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/analyzers\/entity_extractor.py",
"content": "\"\"\"\nEntity Extraction - Extract and store entities from text.\n\"\"\"\n\nimport json\nimport re\nimport sys\nimport time\nimport unicodedata\n\nimport requests\n\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom config import ANTHROPIC_MODEL, OLLAMA_HOST\nfrom db import db\nfrom protokoll import protokoll\n\n\n\ndef _build_prompt_from_yaml(yaml_content: str, text: str) -> str:\n \"\"\"Pass YAML prompt directly to LLM with text placeholder replaced.\"\"\"\n return yaml_content.replace(\"{{TEXT}}\", text[:3000])\n\n\ndef normalize_name(name: str) -> str:\n \"\"\"Generate canonical_name from entity name.\n\n Rules:\n - Lowercase\n - German umlauts: ä→ae, ö→oe, ü→ue, ß→ss\n - Replace spaces with underscores\n - Remove special characters except underscores\n - Collapse multiple underscores\n \"\"\"\n if not name:\n return \"\"\n\n result = name.lower()\n\n replacements = {\n \"ä\": \"ae\", \"ö\": \"oe\", \"ü\": \"ue\", \"ß\": \"ss\",\n \"Ä\": \"ae\", \"Ö\": \"oe\", \"Ü\": \"ue\",\n }\n for old, new in replacements.items():\n result = result.replace(old, new)\n\n result = unicodedata.normalize(\"NFKD\", result)\n result = result.encode(\"ascii\", \"ignore\").decode(\"ascii\")\n result = re.sub(r\"[\\s\\-]+\", \"_\", result)\n result = re.sub(r\"[^a-z0-9_]\", \"\", result)\n result = re.sub(r\"_+\", \"_\", result)\n result = result.strip(\"_\")\n\n return result\n\n\n# Category to type mapping for new format\nCATEGORY_TYPE_MAP = {\n \"persons\": \"PERSON\",\n \"roles\": \"ROLE\",\n \"organizations\": \"ORGANIZATION\",\n \"theories\": \"THEORY\",\n \"models\": \"MODEL\",\n \"concepts\": \"CONCEPT\",\n \"artifacts\": \"ARTIFACT\",\n \"metaphors\": \"METAPHOR\",\n \"locations\": \"LOCATION\",\n}\n\n\ndef _validate_entity_in_text(entity_name: str, source_text: str) -> bool:\n \"\"\"Strictly validate that entity appears EXACTLY in source text.\"\"\"\n if not entity_name or len(entity_name) < 3:\n return False\n # Exact match required\n return entity_name in source_text\n\n\ndef _normalize_entity_response(result: dict, source_text: str) -> list[dict]:",
"numLines": 80,
"startLine": 1,
"totalLines": 455
}
}
}