{
"event": "PreToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/analyzers\/entity_extractor.py",
"offset": 195,
"limit": 80
}
}
{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/analyzers\/entity_extractor.py",
"content": " entities = _extract_entities_single_pass(text, model)\n\n return entities\n\n\ndef _extract_entities_2pass(\n text: str, pass1_template: str, pass2_template: str, model: str\n) -> list[dict]:\n \"\"\"2-pass entity extraction: extract then categorize.\"\"\"\n try:\n # PASS 1: Extract entity names\n prompt1 = pass1_template.replace(\"{text}\", text[:3000])\n resp1, tok_in1, tok_out1, dur1 = _call_ollama(prompt1, model)\n\n try:\n result1 = json.loads(resp1)\n raw_entities = result1.get(\"entities\", [])\n except json.JSONDecodeError:\n db.log(\"WARNING\", \"Failed to parse Pass 1 JSON\")\n return []\n\n # Validate: only keep entities that appear in text\n valid_entities = [e for e in raw_entities if _validate_entity_in_text(e, text)]\n\n if not valid_entities:\n return []\n\n protokoll.log_llm_call(\n request=f\"[entity_extraction_pass1] {len(valid_entities)} entities\",\n response=json.dumps(valid_entities[:10], ensure_ascii=False),\n model_name=f\"ollama:{model}\",\n tokens_input=tok_in1,\n tokens_output=tok_out1,\n duration_ms=dur1,\n status=\"completed\",\n )\n\n # PASS 2: Categorize entities (with dynamic categories from DB)\n entities_json = json.dumps(valid_entities, ensure_ascii=False)\n categories_text = _build_pass2_categories()\n prompt2 = pass2_template.replace(\"{categories}\", categories_text)\n prompt2 = prompt2.replace(\"{entities}\", entities_json)\n resp2, tok_in2, tok_out2, dur2 = _call_ollama(prompt2, model)\n\n try:\n result2 = json.loads(resp2)\n categorized = result2.get(\"kategorisiert\", [])\n except json.JSONDecodeError:\n db.log(\"WARNING\", \"Failed to parse Pass 2 JSON\")\n # Fallback: return uncategorized entities\n return [{\"name\": e, \"type\": \"CONCEPT\", \"description\": None} for e in valid_entities]\n\n protokoll.log_llm_call(\n request=f\"[entity_extraction_pass2] categorize {len(valid_entities)} entities\",\n response=resp2[:1000],\n model_name=f\"ollama:{model}\",\n tokens_input=tok_in2,\n tokens_output=tok_out2,\n duration_ms=dur2,\n status=\"completed\",\n )\n\n # Normalize output (validate types against DB)\n valid_types = _get_valid_type_codes()\n entities = []\n for e in categorized:\n if isinstance(e, dict) and \"name\" in e and \"type\" in e:\n # Final validation\n if _validate_entity_in_text(e[\"name\"], text):\n entity_type = e[\"type\"].upper()\n # Fallback to CONCEPT if type not in DB\n if entity_type not in valid_types:\n entity_type = \"CONCEPT\"\n entities.append({\n \"name\": e[\"name\"],\n \"type\": entity_type,\n \"description\": e.get(\"description\"),\n })\n\n return entities",
"numLines": 80,
"startLine": 195,
"totalLines": 496
}
}
}