{
"event": "PreToolUse",
"tool_name": "Bash",
"tool_input": {
"command": "grep -n -A50 \"def _extract_entities_2pass\" \/var\/www\/scripts\/pipeline\/analyzers\/entity_extractor.py | head -80",
"description": "Check 2pass extraction code"
}
}
{
"tool_response": {
"stdout": "157:def _extract_entities_2pass(\n158- text: str, pass1_template: str, pass2_template: str, model: str\n159-) -> list[dict]:\n160- \"\"\"2-pass entity extraction: extract then categorize.\"\"\"\n161- try:\n162- # PASS 1: Extract entity names\n163- prompt1 = pass1_template.replace(\"{text}\", text[:3000])\n164- resp1, tok_in1, tok_out1, dur1 = _call_ollama(prompt1, model)\n165-\n166- try:\n167- result1 = json.loads(resp1)\n168- raw_entities = result1.get(\"entities\", [])\n169- except json.JSONDecodeError:\n170- db.log(\"WARNING\", \"Failed to parse Pass 1 JSON\")\n171- return []\n172-\n173- # Validate: only keep entities that appear in text\n174- valid_entities = [e for e in raw_entities if _validate_entity_in_text(e, text)]\n175-\n176- if not valid_entities:\n177- return []\n178-\n179- protokoll.log_llm_call(\n180- request=f\"[entity_extraction_pass1] {len(valid_entities)} entities\",\n181- response=json.dumps(valid_entities[:10], ensure_ascii=False),\n182- model_name=f\"ollama:{model}\",\n183- tokens_input=tok_in1,\n184- tokens_output=tok_out1,\n185- duration_ms=dur1,\n186- status=\"completed\",\n187- )\n188-\n189- # PASS 2: Categorize entities\n190- entities_json = json.dumps(valid_entities, ensure_ascii=False)\n191- prompt2 = pass2_template.replace(\"{entities}\", entities_json)\n192- resp2, tok_in2, tok_out2, dur2 = _call_ollama(prompt2, model)\n193-\n194- try:\n195- result2 = json.loads(resp2)\n196- categorized = result2.get(\"kategorisiert\", [])\n197- except json.JSONDecodeError:\n198- db.log(\"WARNING\", \"Failed to parse Pass 2 JSON\")\n199- # Fallback: return uncategorized entities\n200- return [{\"name\": e, \"type\": \"CONCEPT\", \"description\": None} for e in valid_entities]\n201-\n202- protokoll.log_llm_call(\n203- request=f\"[entity_extraction_pass2] categorize {len(valid_entities)} entities\",\n204- response=resp2[:1000],\n205- model_name=f\"ollama:{model}\",\n206- tokens_input=tok_in2,\n207- tokens_output=tok_out2,",
"stderr": "",
"interrupted": false,
"isImage": false
}
}