Protokoll #19074

ID19074
Zeitstempel2025-12-26 23:34:02.272810
Clientroot
IP145.224.72.140
Modellclaude-sonnet-4-20250514
Statuscompleted
Tokens1,043 (Input: 40, Output: 1,003)
Dauer81 ms
Request-Zeit2025-12-26 23:34:02.272810
Response-Zeit2025-12-26 23:34:02.353556

Request

{
    "event": "PreToolUse",
    "tool_name": "Read",
    "tool_input": {
        "file_path": "\/var\/www\/scripts\/pipeline\/analyzers\/entity_extractor.py",
        "offset": 130,
        "limit": 100
    }
}

Response

{
    "tool_response": {
        "type": "text",
        "file": {
            "filePath": "\/var\/www\/scripts\/pipeline\/analyzers\/entity_extractor.py",
            "content": "\ndef _call_ollama(prompt: str, model: str, timeout: int = 120) -> tuple[str, int, int, int]:\n    \"\"\"Call Ollama API and return (response_text, tokens_in, tokens_out, duration_ms).\"\"\"\n    start_time = time.time()\n    response = requests.post(\n        f\"{OLLAMA_HOST}\/api\/generate\",\n        json={\"model\": model, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n        timeout=timeout,\n    )\n    response.raise_for_status()\n    data = response.json()\n    duration_ms = int((time.time() - start_time) * 1000)\n    return (\n        data.get(\"response\", \"{}\"),\n        data.get(\"prompt_eval_count\", 0),\n        data.get(\"eval_count\", 0),\n        duration_ms,\n    )\n\n\ndef extract_entities_ollama(text: str, model: str = \"gemma3:27b-it-qat\") -> list[dict]:\n    \"\"\"Extract entities using 2-pass approach for better categorization.\n\n    Pass 1: Extract entity names from text\n    Pass 2: Categorize extracted entities\n    Post: Normalize types using deterministic rules\n\n    Falls back to single-pass if 2-pass prompts not available.\n    \"\"\"\n    # Try 2-pass approach first\n    pass1_template = db.get_prompt(\"entity_extraction_pass1\")\n    pass2_template = db.get_prompt(\"entity_extraction_pass2\")\n\n    if pass1_template and pass2_template:\n        entities = _extract_entities_2pass(text, pass1_template, pass2_template, model)\n    else:\n        # Fallback to single-pass\n        entities = _extract_entities_single_pass(text, model)\n\n    return entities\n\n\ndef _extract_entities_2pass(\n    text: str, pass1_template: str, pass2_template: str, model: str\n) -> list[dict]:\n    \"\"\"2-pass entity extraction: extract then categorize.\"\"\"\n    try:\n        # PASS 1: Extract entity names\n        prompt1 = pass1_template.replace(\"{text}\", text[:3000])\n        resp1, tok_in1, tok_out1, dur1 = _call_ollama(prompt1, model)\n\n        try:\n            result1 = json.loads(resp1)\n            raw_entities = result1.get(\"entities\", [])\n        except json.JSONDecodeError:\n            db.log(\"WARNING\", \"Failed to parse Pass 1 JSON\")\n            return []\n\n        # Validate: only keep entities that appear in text\n        valid_entities = [e for e in raw_entities if _validate_entity_in_text(e, text)]\n\n        if not valid_entities:\n            return []\n\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction_pass1] {len(valid_entities)} entities\",\n            response=json.dumps(valid_entities[:10], ensure_ascii=False),\n            model_name=f\"ollama:{model}\",\n            tokens_input=tok_in1,\n            tokens_output=tok_out1,\n            duration_ms=dur1,\n            status=\"completed\",\n        )\n\n        # PASS 2: Categorize entities\n        entities_json = json.dumps(valid_entities, ensure_ascii=False)\n        prompt2 = pass2_template.replace(\"{entities}\", entities_json)\n        resp2, tok_in2, tok_out2, dur2 = _call_ollama(prompt2, model)\n\n        try:\n            result2 = json.loads(resp2)\n            categorized = result2.get(\"kategorisiert\", [])\n        except json.JSONDecodeError:\n            db.log(\"WARNING\", \"Failed to parse Pass 2 JSON\")\n            # Fallback: return uncategorized entities\n            return [{\"name\": e, \"type\": \"CONCEPT\", \"description\": None} for e in valid_entities]\n\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction_pass2] categorize {len(valid_entities)} entities\",\n            response=resp2[:1000],\n            model_name=f\"ollama:{model}\",\n            tokens_input=tok_in2,\n            tokens_output=tok_out2,\n            duration_ms=dur2,\n            status=\"completed\",\n        )\n\n        # Normalize output\n        entities = []\n        for e in categorized:",
            "numLines": 100,
            "startLine": 130,
            "totalLines": 461
        }
    }
}
← Vorheriger Zur Liste Nächster →