{
"event": "PreToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/analyzers\/entity_extractor.py",
"offset": 105,
"limit": 80
}
}
{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/analyzers\/entity_extractor.py",
"content": "\n # Strict validation: entity must appear EXACTLY in source text\n if not _validate_entity_in_text(item, source_text):\n continue # Skip hallucinations\n\n entities.append({\n \"name\": item,\n \"type\": entity_type,\n \"description\": None,\n })\n\n return entities\n\n\ndef extract_entities_ollama(text: str, model: str = \"gemma3:27b-it-qat\") -> list[dict]:\n \"\"\"Extract entities using Ollama.\n\n Supports two response formats:\n 1. New format: {\"persons\":[], \"roles\":[], \"theories\":[], ...}\n 2. Legacy format: {\"entities\": [{\"name\": \"...\", \"type\": \"...\"}]}\n \"\"\"\n prompt_data = db.get_prompt_by_use_case(\"entity_extraction\")\n prompt_template = prompt_data[\"content\"] if prompt_data else None\n\n if not prompt_template:\n db.log(\"WARNING\", \"entity_extraction prompt not found in DB, using fallback\")\n prompt_template = \"\"\"Analysiere den Text und extrahiere wichtige Entitäten.\nKategorisiere als: PERSON, ORGANIZATION, CONCEPT, LOCATION\nAntworte NUR im JSON-Format:\n{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"description\": \"...\"}]}\n\nText:\n{text}\"\"\"\n\n # Support both {text} and {{TEXT}} placeholders\n prompt = prompt_template.replace(\"{text}\", text[:3000]).replace(\"{{TEXT}}\", text[:3000])\n\n try:\n start_time = time.time()\n response = requests.post(\n f\"{OLLAMA_HOST}\/api\/generate\",\n json={\"model\": model, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n timeout=120,\n )\n response.raise_for_status()\n data = response.json()\n duration_ms = int((time.time() - start_time) * 1000)\n\n response_text = data.get(\"response\", \"{}\")\n\n protokoll.log_llm_call(\n request=f\"[entity_extraction] {prompt[:500]}...\",\n response=response_text[:2000],\n model_name=f\"ollama:{model}\",\n tokens_input=data.get(\"prompt_eval_count\", 0),\n tokens_output=data.get(\"eval_count\", 0),\n duration_ms=duration_ms,\n status=\"completed\",\n )\n\n try:\n result = json.loads(response_text)\n return _normalize_entity_response(result, text)\n except json.JSONDecodeError:\n db.log(\"WARNING\", \"Failed to parse entity JSON from Ollama\")\n return []\n except Exception as e:\n db.log(\"ERROR\", f\"Ollama entity extraction failed: {e}\")\n protokoll.log_llm_call(\n request=f\"[entity_extraction] {prompt[:500]}...\",\n model_name=f\"ollama:{model}\",\n status=\"error\",\n error_message=str(e),\n )\n return []\n\n\ndef extract_entities_anthropic(text: str, client) -> list[dict]:\n \"\"\"Extract entities using Anthropic Claude.\"\"\"\n prompt_data = db.get_prompt_by_use_case(\"entity_extraction\")",
"numLines": 80,
"startLine": 105,
"totalLines": 354
}
}
}