Protokoll #19414

ID19414
Zeitstempel2025-12-27 00:18:06.521825
Clientroot
IP145.224.72.140
Modellclaude-sonnet-4-20250514
Statuscompleted
Tokens902 (Input: 40, Output: 862)
Dauer86 ms
Request-Zeit2025-12-27 00:18:06.521825
Response-Zeit2025-12-27 00:18:06.608320

Request

{
    "event": "PreToolUse",
    "tool_name": "Read",
    "tool_input": {
        "file_path": "\/var\/www\/scripts\/pipeline\/analyzers\/entity_extractor.py",
        "offset": 195,
        "limit": 80
    }
}

Response

{
    "tool_response": {
        "type": "text",
        "file": {
            "filePath": "\/var\/www\/scripts\/pipeline\/analyzers\/entity_extractor.py",
            "content": "        entities = _extract_entities_single_pass(text, model)\n\n    return entities\n\n\ndef _extract_entities_2pass(\n    text: str, pass1_template: str, pass2_template: str, model: str\n) -> list[dict]:\n    \"\"\"2-pass entity extraction: extract then categorize.\"\"\"\n    try:\n        # PASS 1: Extract entity names\n        prompt1 = pass1_template.replace(\"{text}\", text[:3000])\n        resp1, tok_in1, tok_out1, dur1 = _call_ollama(prompt1, model)\n\n        try:\n            result1 = json.loads(resp1)\n            raw_entities = result1.get(\"entities\", [])\n        except json.JSONDecodeError:\n            db.log(\"WARNING\", \"Failed to parse Pass 1 JSON\")\n            return []\n\n        # Validate: only keep entities that appear in text\n        valid_entities = [e for e in raw_entities if _validate_entity_in_text(e, text)]\n\n        if not valid_entities:\n            return []\n\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction_pass1] {len(valid_entities)} entities\",\n            response=json.dumps(valid_entities[:10], ensure_ascii=False),\n            model_name=f\"ollama:{model}\",\n            tokens_input=tok_in1,\n            tokens_output=tok_out1,\n            duration_ms=dur1,\n            status=\"completed\",\n        )\n\n        # PASS 2: Categorize entities (with dynamic categories from DB)\n        entities_json = json.dumps(valid_entities, ensure_ascii=False)\n        categories_text = _build_pass2_categories()\n        prompt2 = pass2_template.replace(\"{categories}\", categories_text)\n        prompt2 = prompt2.replace(\"{entities}\", entities_json)\n        resp2, tok_in2, tok_out2, dur2 = _call_ollama(prompt2, model)\n\n        try:\n            result2 = json.loads(resp2)\n            categorized = result2.get(\"kategorisiert\", [])\n        except json.JSONDecodeError:\n            db.log(\"WARNING\", \"Failed to parse Pass 2 JSON\")\n            # Fallback: return uncategorized entities\n            return [{\"name\": e, \"type\": \"CONCEPT\", \"description\": None} for e in valid_entities]\n\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction_pass2] categorize {len(valid_entities)} entities\",\n            response=resp2[:1000],\n            model_name=f\"ollama:{model}\",\n            tokens_input=tok_in2,\n            tokens_output=tok_out2,\n            duration_ms=dur2,\n            status=\"completed\",\n        )\n\n        # Normalize output (validate types against DB)\n        valid_types = _get_valid_type_codes()\n        entities = []\n        for e in categorized:\n            if isinstance(e, dict) and \"name\" in e and \"type\" in e:\n                # Final validation\n                if _validate_entity_in_text(e[\"name\"], text):\n                    entity_type = e[\"type\"].upper()\n                    # Fallback to CONCEPT if type not in DB\n                    if entity_type not in valid_types:\n                        entity_type = \"CONCEPT\"\n                    entities.append({\n                        \"name\": e[\"name\"],\n                        \"type\": entity_type,\n                        \"description\": e.get(\"description\"),\n                    })\n\n        return entities",
            "numLines": 80,
            "startLine": 195,
            "totalLines": 496
        }
    }
}
← Vorheriger Zur Liste Nächster →