Protokoll #17791

ID17791
Zeitstempel2025-12-26 18:42:04.961492
Clientroot
IP
Warning: Undefined array key "request_ip" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13

Deprecated: htmlspecialchars(): Passing null to parameter #1 ($string) of type string is deprecated in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13
Modellclaude-sonnet-4-20250514
Statuscompleted
Tokens8,777 (Input: 0, Output: 0)
Dauer102 ms
Request-Zeit
Warning: Undefined array key "request_timestamp" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 18
Response-Zeit-

Warning: Undefined array key "error_message" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 22

Request

{
    "event": "PreToolUse",
    "tool_name": "Edit",
    "tool_input": {
        "file_path": "\/var\/www\/scripts\/pipeline\/analyzers\/entity_extractor.py",
        "old_string": "def extract_entities_ollama(text: str, model: str = \"gemma3:27b-it-qat\") -> list[dict]:\n    \"\"\"Extract entities using Ollama.\n\n    Supports two response formats:\n    1. New format: {\"persons\":[], \"roles\":[], \"theories\":[], ...}\n    2. Legacy format: {\"entities\": [{\"name\": \"...\", \"type\": \"...\"}]}\n    \"\"\"\n    prompt_data = db.get_prompt_by_use_case(\"entity_extraction\")\n    prompt_template = prompt_data[\"content\"] if prompt_data else None\n\n    if not prompt_template:\n        db.log(\"WARNING\", \"entity_extraction prompt not found in DB, using fallback\")\n        prompt_template = \"\"\"Analysiere den Text und extrahiere wichtige Entitäten.\nKategorisiere als: PERSON, ORGANIZATION, CONCEPT, LOCATION\nAntworte NUR im JSON-Format:\n{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"description\": \"...\"}]}\n\nText:\n{text}\"\"\"\n\n    # Support both {text} and {{TEXT}} placeholders\n    prompt = prompt_template.replace(\"{text}\", text[:3000]).replace(\"{{TEXT}}\", text[:3000])\n\n    try:\n        start_time = time.time()\n        response = requests.post(\n            f\"{OLLAMA_HOST}\/api\/generate\",\n            json={\"model\": model, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n            timeout=120,\n        )\n        response.raise_for_status()\n        data = response.json()\n        duration_ms = int((time.time() - start_time) * 1000)\n\n        response_text = data.get(\"response\", \"{}\")\n\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction] {prompt[:500]}...\",\n            response=response_text[:2000],\n            model_name=f\"ollama:{model}\",\n            tokens_input=data.get(\"prompt_eval_count\", 0),\n            tokens_output=data.get(\"eval_count\", 0),\n            duration_ms=duration_ms,\n            status=\"completed\",\n        )\n\n        try:\n            result = json.loads(response_text)\n            return _normalize_entity_response(result, text)\n        except json.JSONDecodeError:\n            db.log(\"WARNING\", \"Failed to parse entity JSON from Ollama\")\n            return []\n    except Exception as e:\n        db.log(\"ERROR\", f\"Ollama entity extraction failed: {e}\")\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction] {prompt[:500]}...\",\n            model_name=f\"ollama:{model}\",\n            status=\"error\",\n            error_message=str(e),\n        )\n        return []",
        "new_string": "def _call_ollama(prompt: str, model: str, timeout: int = 120) -> tuple[str, int, int, int]:\n    \"\"\"Call Ollama API and return (response_text, tokens_in, tokens_out, duration_ms).\"\"\"\n    start_time = time.time()\n    response = requests.post(\n        f\"{OLLAMA_HOST}\/api\/generate\",\n        json={\"model\": model, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n        timeout=timeout,\n    )\n    response.raise_for_status()\n    data = response.json()\n    duration_ms = int((time.time() - start_time) * 1000)\n    return (\n        data.get(\"response\", \"{}\"),\n        data.get(\"prompt_eval_count\", 0),\n        data.get(\"eval_count\", 0),\n        duration_ms,\n    )\n\n\ndef extract_entities_ollama(text: str, model: str = \"gemma3:27b-it-qat\") -> list[dict]:\n    \"\"\"Extract entities using 2-pass approach for better categorization.\n\n    Pass 1: Extract entity names from text\n    Pass 2: Categorize extracted entities\n\n    Falls back to single-pass if 2-pass prompts not available.\n    \"\"\"\n    # Try 2-pass approach first\n    pass1_data = db.get_prompt_by_name(\"entity_extraction_pass1\")\n    pass2_data = db.get_prompt_by_name(\"entity_extraction_pass2\")\n\n    if pass1_data and pass2_data:\n        return _extract_entities_2pass(text, pass1_data[\"content\"], pass2_data[\"content\"], model)\n\n    # Fallback to single-pass\n    return _extract_entities_single_pass(text, model)\n\n\ndef _extract_entities_2pass(\n    text: str, pass1_template: str, pass2_template: str, model: str\n) -> list[dict]:\n    \"\"\"2-pass entity extraction: extract then categorize.\"\"\"\n    try:\n        # PASS 1: Extract entity names\n        prompt1 = pass1_template.replace(\"{text}\", text[:3000])\n        resp1, tok_in1, tok_out1, dur1 = _call_ollama(prompt1, model)\n\n        try:\n            result1 = json.loads(resp1)\n            raw_entities = result1.get(\"entities\", [])\n        except json.JSONDecodeError:\n            db.log(\"WARNING\", \"Failed to parse Pass 1 JSON\")\n            return []\n\n        # Validate: only keep entities that appear in text\n        valid_entities = [e for e in raw_entities if _validate_entity_in_text(e, text)]\n\n        if not valid_entities:\n            return []\n\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction_pass1] {len(valid_entities)} entities\",\n            response=json.dumps(valid_entities[:10], ensure_ascii=False),\n            model_name=f\"ollama:{model}\",\n            tokens_input=tok_in1,\n            tokens_output=tok_out1,\n            duration_ms=dur1,\n            status=\"completed\",\n        )\n\n        # PASS 2: Categorize entities\n        entities_json = json.dumps(valid_entities, ensure_ascii=False)\n        prompt2 = pass2_template.replace(\"{entities}\", entities_json)\n        resp2, tok_in2, tok_out2, dur2 = _call_ollama(prompt2, model)\n\n        try:\n            result2 = json.loads(resp2)\n            categorized = result2.get(\"kategorisiert\", [])\n        except json.JSONDecodeError:\n            db.log(\"WARNING\", \"Failed to parse Pass 2 JSON\")\n            # Fallback: return uncategorized entities\n            return [{\"name\": e, \"type\": \"CONCEPT\", \"description\": None} for e in valid_entities]\n\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction_pass2] categorize {len(valid_entities)} entities\",\n            response=resp2[:1000],\n            model_name=f\"ollama:{model}\",\n            tokens_input=tok_in2,\n            tokens_output=tok_out2,\n            duration_ms=dur2,\n            status=\"completed\",\n        )\n\n        # Normalize output\n        entities = []\n        for e in categorized:\n            if isinstance(e, dict) and \"name\" in e and \"type\" in e:\n                # Final validation\n                if _validate_entity_in_text(e[\"name\"], text):\n                    entities.append({\n                        \"name\": e[\"name\"],\n                        \"type\": e[\"type\"].upper(),\n                        \"description\": e.get(\"description\"),\n                    })\n\n        return entities\n\n    except Exception as e:\n        db.log(\"ERROR\", f\"2-pass entity extraction failed: {e}\")\n        return []\n\n\ndef _extract_entities_single_pass(text: str, model: str) -> list[dict]:\n    \"\"\"Single-pass entity extraction (legacy fallback).\"\"\"\n    prompt_data = db.get_prompt_by_use_case(\"entity_extraction\")\n    prompt_template = prompt_data[\"content\"] if prompt_data else None\n\n    if not prompt_template:\n        db.log(\"WARNING\", \"entity_extraction prompt not found in DB, using fallback\")\n        prompt_template = \"\"\"Analysiere den Text und extrahiere wichtige Entitäten.\nKategorisiere als: PERSON, ORGANIZATION, CONCEPT, LOCATION\nAntworte NUR im JSON-Format:\n{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"description\": \"...\"}]}\n\nText:\n{text}\"\"\"\n\n    prompt = prompt_template.replace(\"{text}\", text[:3000]).replace(\"{{TEXT}}\", text[:3000])\n\n    try:\n        resp, tok_in, tok_out, dur = _call_ollama(prompt, model)\n\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction] {prompt[:500]}...\",\n            response=resp[:2000],\n            model_name=f\"ollama:{model}\",\n            tokens_input=tok_in,\n            tokens_output=tok_out,\n            duration_ms=dur,\n            status=\"completed\",\n        )\n\n        try:\n            result = json.loads(resp)\n            return _normalize_entity_response(result, text)\n        except json.JSONDecodeError:\n            db.log(\"WARNING\", \"Failed to parse entity JSON from Ollama\")\n            return []\n    except Exception as e:\n        db.log(\"ERROR\", f\"Ollama entity extraction failed: {e}\")\n        return []"
    }
}

Response

{
    "tool_response": {
        "filePath": "\/var\/www\/scripts\/pipeline\/analyzers\/entity_extractor.py",
        "oldString": "def extract_entities_ollama(text: str, model: str = \"gemma3:27b-it-qat\") -> list[dict]:\n    \"\"\"Extract entities using Ollama.\n\n    Supports two response formats:\n    1. New format: {\"persons\":[], \"roles\":[], \"theories\":[], ...}\n    2. Legacy format: {\"entities\": [{\"name\": \"...\", \"type\": \"...\"}]}\n    \"\"\"\n    prompt_data = db.get_prompt_by_use_case(\"entity_extraction\")\n    prompt_template = prompt_data[\"content\"] if prompt_data else None\n\n    if not prompt_template:\n        db.log(\"WARNING\", \"entity_extraction prompt not found in DB, using fallback\")\n        prompt_template = \"\"\"Analysiere den Text und extrahiere wichtige Entitäten.\nKategorisiere als: PERSON, ORGANIZATION, CONCEPT, LOCATION\nAntworte NUR im JSON-Format:\n{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"description\": \"...\"}]}\n\nText:\n{text}\"\"\"\n\n    # Support both {text} and {{TEXT}} placeholders\n    prompt = prompt_template.replace(\"{text}\", text[:3000]).replace(\"{{TEXT}}\", text[:3000])\n\n    try:\n        start_time = time.time()\n        response = requests.post(\n            f\"{OLLAMA_HOST}\/api\/generate\",\n            json={\"model\": model, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n            timeout=120,\n        )\n        response.raise_for_status()\n        data = response.json()\n        duration_ms = int((time.time() - start_time) * 1000)\n\n        response_text = data.get(\"response\", \"{}\")\n\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction] {prompt[:500]}...\",\n            response=response_text[:2000],\n            model_name=f\"ollama:{model}\",\n            tokens_input=data.get(\"prompt_eval_count\", 0),\n            tokens_output=data.get(\"eval_count\", 0),\n            duration_ms=duration_ms,\n            status=\"completed\",\n        )\n\n        try:\n            result = json.loads(response_text)\n            return _normalize_entity_response(result, text)\n        except json.JSONDecodeError:\n            db.log(\"WARNING\", \"Failed to parse entity JSON from Ollama\")\n            return []\n    except Exception as e:\n        db.log(\"ERROR\", f\"Ollama entity extraction failed: {e}\")\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction] {prompt[:500]}...\",\n            model_name=f\"ollama:{model}\",\n            status=\"error\",\n            error_message=str(e),\n        )\n        return []",
        "newString": "def _call_ollama(prompt: str, model: str, timeout: int = 120) -> tuple[str, int, int, int]:\n    \"\"\"Call Ollama API and return (response_text, tokens_in, tokens_out, duration_ms).\"\"\"\n    start_time = time.time()\n    response = requests.post(\n        f\"{OLLAMA_HOST}\/api\/generate\",\n        json={\"model\": model, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n        timeout=timeout,\n    )\n    response.raise_for_status()\n    data = response.json()\n    duration_ms = int((time.time() - start_time) * 1000)\n    return (\n        data.get(\"response\", \"{}\"),\n        data.get(\"prompt_eval_count\", 0),\n        data.get(\"eval_count\", 0),\n        duration_ms,\n    )\n\n\ndef extract_entities_ollama(text: str, model: str = \"gemma3:27b-it-qat\") -> list[dict]:\n    \"\"\"Extract entities using 2-pass approach for better categorization.\n\n    Pass 1: Extract entity names from text\n    Pass 2: Categorize extracted entities\n\n    Falls back to single-pass if 2-pass prompts not available.\n    \"\"\"\n    # Try 2-pass approach first\n    pass1_data = db.get_prompt_by_name(\"entity_extraction_pass1\")\n    pass2_data = db.get_prompt_by_name(\"entity_extraction_pass2\")\n\n    if pass1_data and pass2_data:\n        return _extract_entities_2pass(text, pass1_data[\"content\"], pass2_data[\"content\"], model)\n\n    # Fallback to single-pass\n    return _extract_entities_single_pass(text, model)\n\n\ndef _extract_entities_2pass(\n    text: str, pass1_template: str, pass2_template: str, model: str\n) -> list[dict]:\n    \"\"\"2-pass entity extraction: extract then categorize.\"\"\"\n    try:\n        # PASS 1: Extract entity names\n        prompt1 = pass1_template.replace(\"{text}\", text[:3000])\n        resp1, tok_in1, tok_out1, dur1 = _call_ollama(prompt1, model)\n\n        try:\n            result1 = json.loads(resp1)\n            raw_entities = result1.get(\"entities\", [])\n        except json.JSONDecodeError:\n            db.log(\"WARNING\", \"Failed to parse Pass 1 JSON\")\n            return []\n\n        # Validate: only keep entities that appear in text\n        valid_entities = [e for e in raw_entities if _validate_entity_in_text(e, text)]\n\n        if not valid_entities:\n            return []\n\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction_pass1] {len(valid_entities)} entities\",\n            response=json.dumps(valid_entities[:10], ensure_ascii=False),\n            model_name=f\"ollama:{model}\",\n            tokens_input=tok_in1,\n            tokens_output=tok_out1,\n            duration_ms=dur1,\n            status=\"completed\",\n        )\n\n        # PASS 2: Categorize entities\n        entities_json = json.dumps(valid_entities, ensure_ascii=False)\n        prompt2 = pass2_template.replace(\"{entities}\", entities_json)\n        resp2, tok_in2, tok_out2, dur2 = _call_ollama(prompt2, model)\n\n        try:\n            result2 = json.loads(resp2)\n            categorized = result2.get(\"kategorisiert\", [])\n        except json.JSONDecodeError:\n            db.log(\"WARNING\", \"Failed to parse Pass 2 JSON\")\n            # Fallback: return uncategorized entities\n            return [{\"name\": e, \"type\": \"CONCEPT\", \"description\": None} for e in valid_entities]\n\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction_pass2] categorize {len(valid_entities)} entities\",\n            response=resp2[:1000],\n            model_name=f\"ollama:{model}\",\n            tokens_input=tok_in2,\n            tokens_output=tok_out2,\n            duration_ms=dur2,\n            status=\"completed\",\n        )\n\n        # Normalize output\n        entities = []\n        for e in categorized:\n            if isinstance(e, dict) and \"name\" in e and \"type\" in e:\n                # Final validation\n                if _validate_entity_in_text(e[\"name\"], text):\n                    entities.append({\n                        \"name\": e[\"name\"],\n                        \"type\": e[\"type\"].upper(),\n                        \"description\": e.get(\"description\"),\n                    })\n\n        return entities\n\n    except Exception as e:\n        db.log(\"ERROR\", f\"2-pass entity extraction failed: {e}\")\n        return []\n\n\ndef _extract_entities_single_pass(text: str, model: str) -> list[dict]:\n    \"\"\"Single-pass entity extraction (legacy fallback).\"\"\"\n    prompt_data = db.get_prompt_by_use_case(\"entity_extraction\")\n    prompt_template = prompt_data[\"content\"] if prompt_data else None\n\n    if not prompt_template:\n        db.log(\"WARNING\", \"entity_extraction prompt not found in DB, using fallback\")\n        prompt_template = \"\"\"Analysiere den Text und extrahiere wichtige Entitäten.\nKategorisiere als: PERSON, ORGANIZATION, CONCEPT, LOCATION\nAntworte NUR im JSON-Format:\n{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"description\": \"...\"}]}\n\nText:\n{text}\"\"\"\n\n    prompt = prompt_template.replace(\"{text}\", text[:3000]).replace(\"{{TEXT}}\", text[:3000])\n\n    try:\n        resp, tok_in, tok_out, dur = _call_ollama(prompt, model)\n\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction] {prompt[:500]}...\",\n            response=resp[:2000],\n            model_name=f\"ollama:{model}\",\n            tokens_input=tok_in,\n            tokens_output=tok_out,\n            duration_ms=dur,\n            status=\"completed\",\n        )\n\n        try:\n            result = json.loads(resp)\n            return _normalize_entity_response(result, text)\n        except json.JSONDecodeError:\n            db.log(\"WARNING\", \"Failed to parse entity JSON from Ollama\")\n            return []\n    except Exception as e:\n        db.log(\"ERROR\", f\"Ollama entity extraction failed: {e}\")\n        return []",
        "originalFile": "\"\"\"\nEntity Extraction - Extract and store entities from text.\n\"\"\"\n\nimport json\nimport re\nimport sys\nimport time\nimport unicodedata\n\nimport requests\n\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom config import ANTHROPIC_MODEL, OLLAMA_HOST\nfrom db import db\nfrom protokoll import protokoll\n\n\ndef normalize_name(name: str) -> str:\n    \"\"\"Generate canonical_name from entity name.\n\n    Rules:\n    - Lowercase\n    - German umlauts: ä→ae, ö→oe, ü→ue, ß→ss\n    - Replace spaces with underscores\n    - Remove special characters except underscores\n    - Collapse multiple underscores\n    \"\"\"\n    if not name:\n        return \"\"\n\n    result = name.lower()\n\n    replacements = {\n        \"ä\": \"ae\", \"ö\": \"oe\", \"ü\": \"ue\", \"ß\": \"ss\",\n        \"Ä\": \"ae\", \"Ö\": \"oe\", \"Ü\": \"ue\",\n    }\n    for old, new in replacements.items():\n        result = result.replace(old, new)\n\n    result = unicodedata.normalize(\"NFKD\", result)\n    result = result.encode(\"ascii\", \"ignore\").decode(\"ascii\")\n    result = re.sub(r\"[\\s\\-]+\", \"_\", result)\n    result = re.sub(r\"[^a-z0-9_]\", \"\", result)\n    result = re.sub(r\"_+\", \"_\", result)\n    result = result.strip(\"_\")\n\n    return result\n\n\n# Category to type mapping for new format\nCATEGORY_TYPE_MAP = {\n    \"persons\": \"PERSON\",\n    \"roles\": \"ROLE\",\n    \"organizations\": \"ORGANIZATION\",\n    \"theories\": \"THEORY\",\n    \"models\": \"MODEL\",\n    \"concepts\": \"CONCEPT\",\n    \"artifacts\": \"ARTIFACT\",\n    \"metaphors\": \"METAPHOR\",\n    \"locations\": \"LOCATION\",\n}\n\n\ndef _validate_entity_in_text(entity_name: str, source_text: str) -> bool:\n    \"\"\"Strictly validate that entity appears EXACTLY in source text.\"\"\"\n    if not entity_name or len(entity_name) < 3:\n        return False\n    # Exact match required\n    return entity_name in source_text\n\n\ndef _normalize_entity_response(result: dict, source_text: str) -> list[dict]:\n    \"\"\"Normalize entity response to standard format with validation.\n\n    Handles two formats:\n    1. New: {\"persons\":[], \"roles\":[], ...}\n    2. Legacy: {\"entities\": [...]}\n\n    Also validates entities against source text to filter hallucinations.\n    \"\"\"\n    entities = []\n\n    # Check for legacy format\n    if \"entities\" in result:\n        legacy_entities = result.get(\"entities\", [])\n        # Validate legacy entities too\n        for e in legacy_entities:\n            if isinstance(e, dict) and \"name\" in e:\n                if _validate_entity_in_text(e[\"name\"], source_text):\n                    entities.append(e)\n        return entities\n\n    # New categorized format\n    for category, items in result.items():\n        if not isinstance(items, list):\n            continue\n\n        entity_type = CATEGORY_TYPE_MAP.get(category, category.upper())\n\n        for item in items:\n            if not item or not isinstance(item, str):\n                continue\n\n            # Strict validation: entity must appear EXACTLY in source text\n            if not _validate_entity_in_text(item, source_text):\n                continue  # Skip hallucinations\n\n            entities.append({\n                \"name\": item,\n                \"type\": entity_type,\n                \"description\": None,\n            })\n\n    return entities\n\n\ndef extract_entities_ollama(text: str, model: str = \"gemma3:27b-it-qat\") -> list[dict]:\n    \"\"\"Extract entities using Ollama.\n\n    Supports two response formats:\n    1. New format: {\"persons\":[], \"roles\":[], \"theories\":[], ...}\n    2. Legacy format: {\"entities\": [{\"name\": \"...\", \"type\": \"...\"}]}\n    \"\"\"\n    prompt_data = db.get_prompt_by_use_case(\"entity_extraction\")\n    prompt_template = prompt_data[\"content\"] if prompt_data else None\n\n    if not prompt_template:\n        db.log(\"WARNING\", \"entity_extraction prompt not found in DB, using fallback\")\n        prompt_template = \"\"\"Analysiere den Text und extrahiere wichtige Entitäten.\nKategorisiere als: PERSON, ORGANIZATION, CONCEPT, LOCATION\nAntworte NUR im JSON-Format:\n{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"description\": \"...\"}]}\n\nText:\n{text}\"\"\"\n\n    # Support both {text} and {{TEXT}} placeholders\n    prompt = prompt_template.replace(\"{text}\", text[:3000]).replace(\"{{TEXT}}\", text[:3000])\n\n    try:\n        start_time = time.time()\n        response = requests.post(\n            f\"{OLLAMA_HOST}\/api\/generate\",\n            json={\"model\": model, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n            timeout=120,\n        )\n        response.raise_for_status()\n        data = response.json()\n        duration_ms = int((time.time() - start_time) * 1000)\n\n        response_text = data.get(\"response\", \"{}\")\n\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction] {prompt[:500]}...\",\n            response=response_text[:2000],\n            model_name=f\"ollama:{model}\",\n            tokens_input=data.get(\"prompt_eval_count\", 0),\n            tokens_output=data.get(\"eval_count\", 0),\n            duration_ms=duration_ms,\n            status=\"completed\",\n        )\n\n        try:\n            result = json.loads(response_text)\n            return _normalize_entity_response(result, text)\n        except json.JSONDecodeError:\n            db.log(\"WARNING\", \"Failed to parse entity JSON from Ollama\")\n            return []\n    except Exception as e:\n        db.log(\"ERROR\", f\"Ollama entity extraction failed: {e}\")\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction] {prompt[:500]}...\",\n            model_name=f\"ollama:{model}\",\n            status=\"error\",\n            error_message=str(e),\n        )\n        return []\n\n\ndef extract_entities_anthropic(text: str, client) -> list[dict]:\n    \"\"\"Extract entities using Anthropic Claude.\"\"\"\n    prompt_data = db.get_prompt_by_use_case(\"entity_extraction\")\n    prompt_template = prompt_data[\"content\"] if prompt_data else None\n\n    if not prompt_template:\n        prompt_template = \"\"\"Analysiere den folgenden deutschen Text und extrahiere alle wichtigen Entitäten.\n\nKategorisiere jede Entität als:\n- PERSON (Namen von Personen)\n- ORGANIZATION (Firmen, Institutionen, Gruppen)\n- CONCEPT (Fachbegriffe, Methoden, Theorien)\n- LOCATION (Orte, Länder)\n- DATE (Zeitangaben)\n- OTHER (Sonstiges)\n\nAntworte NUR im JSON-Format:\n{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"context\": \"kurzer Kontext der Erwähnung\"}]}\n\nText:\n{{TEXT}}\"\"\"\n\n    prompt = prompt_template.replace(\"{{TEXT}}\", text[:4000])\n\n    try:\n        start_time = time.time()\n        message = client.messages.create(\n            model=ANTHROPIC_MODEL, max_tokens=2000, messages=[{\"role\": \"user\", \"content\": prompt}]\n        )\n        duration_ms = int((time.time() - start_time) * 1000)\n\n        response_text = message.content[0].text\n\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction] {prompt[:500]}...\",\n            response=response_text[:2000],\n            model_name=ANTHROPIC_MODEL,\n            tokens_input=message.usage.input_tokens,\n            tokens_output=message.usage.output_tokens,\n            duration_ms=duration_ms,\n            status=\"completed\",\n        )\n\n        json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)\n        if json_match:\n            entities = json.loads(json_match.group())\n            return entities.get(\"entities\", [])\n        return []\n    except Exception as e:\n        db.log(\"ERROR\", f\"Anthropic entity extraction failed: {e}\")\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction] {prompt[:500]}...\",\n            model_name=ANTHROPIC_MODEL,\n            status=\"error\",\n            error_message=str(e),\n        )\n        return []\n\n\ndef store_entities(document_id: int, entities: list[dict]) -> int:\n    \"\"\"Store extracted entities in database with deduplication via canonical_name.\"\"\"\n    stored = 0\n\n    for entity in entities:\n        try:\n            description = entity.get(\"description\") or entity.get(\"context\") or None\n            canonical = normalize_name(entity[\"name\"])\n\n            # Check for existing entity by canonical_name first (deduplication)\n            cursor = db.execute(\n                \"SELECT id, description FROM entities WHERE canonical_name = %s AND type = %s\",\n                (canonical, entity[\"type\"]),\n            )\n            existing = cursor.fetchone()\n            cursor.close()\n\n            # Fallback: check by exact name\n            if not existing:\n                cursor = db.execute(\n                    \"SELECT id, description FROM entities WHERE name = %s AND type = %s\",\n                    (entity[\"name\"], entity[\"type\"]),\n                )\n                existing = cursor.fetchone()\n                cursor.close()\n\n            if existing:\n                entity_id = existing[\"id\"]\n                # Update description and canonical_name if missing\n                if description and not existing[\"description\"]:\n                    cursor = db.execute(\n                        \"UPDATE entities SET description = %s, canonical_name = %s WHERE id = %s\",\n                        (description, canonical, entity_id),\n                    )\n                    db.commit()\n                    cursor.close()\n                else:\n                    # Ensure canonical_name is set\n                    cursor = db.execute(\n                        \"UPDATE entities SET canonical_name = %s WHERE id = %s AND canonical_name IS NULL\",\n                        (canonical, entity_id),\n                    )\n                    db.commit()\n                    cursor.close()\n            else:\n                # Insert new entity with canonical_name and status='normalized'\n                cursor = db.execute(\n                    \"\"\"INSERT INTO entities (name, type, description, canonical_name, status, created_at)\n                       VALUES (%s, %s, %s, %s, 'normalized', NOW())\"\"\",\n                    (entity[\"name\"], entity[\"type\"], description, canonical),\n                )\n                db.commit()\n                entity_id = cursor.lastrowid\n                cursor.close()\n\n                db.log_provenance(\n                    artifact_type=\"entity\",\n                    artifact_id=ent... [TRUNCATED-b92205cd77cf5a01]",
        "structuredPatch": [
            {
                "oldStart": 116,
                "oldLines": 13,
                "newStart": 116,
                "newLines": 120,
                "lines": [
                    "     return entities",
                    " ",
                    " ",
                    "+def _call_ollama(prompt: str, model: str, timeout: int = 120) -> tuple[str, int, int, int]:",
                    "+    \"\"\"Call Ollama API and return (response_text, tokens_in, tokens_out, duration_ms).\"\"\"",
                    "+    start_time = time.time()",
                    "+    response = requests.post(",
                    "+        f\"{OLLAMA_HOST}\/api\/generate\",",
                    "+        json={\"model\": model, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},",
                    "+        timeout=timeout,",
                    "+    )",
                    "+    response.raise_for_status()",
                    "+    data = response.json()",
                    "+    duration_ms = int((time.time() - start_time) * 1000)",
                    "+    return (",
                    "+        data.get(\"response\", \"{}\"),",
                    "+        data.get(\"prompt_eval_count\", 0),",
                    "+        data.get(\"eval_count\", 0),",
                    "+        duration_ms,",
                    "+    )",
                    "+",
                    "+",
                    " def extract_entities_ollama(text: str, model: str = \"gemma3:27b-it-qat\") -> list[dict]:",
                    "-    \"\"\"Extract entities using Ollama.",
                    "+    \"\"\"Extract entities using 2-pass approach for better categorization.",
                    " ",
                    "-    Supports two response formats:",
                    "-    1. New format: {\"persons\":[], \"roles\":[], \"theories\":[], ...}",
                    "-    2. Legacy format: {\"entities\": [{\"name\": \"...\", \"type\": \"...\"}]}",
                    "+    Pass 1: Extract entity names from text",
                    "+    Pass 2: Categorize extracted entities",
                    "+",
                    "+    Falls back to single-pass if 2-pass prompts not available.",
                    "     \"\"\"",
                    "+    # Try 2-pass approach first",
                    "+    pass1_data = db.get_prompt_by_name(\"entity_extraction_pass1\")",
                    "+    pass2_data = db.get_prompt_by_name(\"entity_extraction_pass2\")",
                    "+",
                    "+    if pass1_data and pass2_data:",
                    "+        return _extract_entities_2pass(text, pass1_data[\"content\"], pass2_data[\"content\"], model)",
                    "+",
                    "+    # Fallback to single-pass",
                    "+    return _extract_entities_single_pass(text, model)",
                    "+",
                    "+",
                    "+def _extract_entities_2pass(",
                    "+    text: str, pass1_template: str, pass2_template: str, model: str",
                    "+) -> list[dict]:",
                    "+    \"\"\"2-pass entity extraction: extract then categorize.\"\"\"",
                    "+    try:",
                    "+        # PASS 1: Extract entity names",
                    "+        prompt1 = pass1_template.replace(\"{text}\", text[:3000])",
                    "+        resp1, tok_in1, tok_out1, dur1 = _call_ollama(prompt1, model)",
                    "+",
                    "+        try:",
                    "+            result1 = json.loads(resp1)",
                    "+            raw_entities = result1.get(\"entities\", [])",
                    "+        except json.JSONDecodeError:",
                    "+            db.log(\"WARNING\", \"Failed to parse Pass 1 JSON\")",
                    "+            return []",
                    "+",
                    "+        # Validate: only keep entities that appear in text",
                    "+        valid_entities = [e for e in raw_entities if _validate_entity_in_text(e, text)]",
                    "+",
                    "+        if not valid_entities:",
                    "+            return []",
                    "+",
                    "+        protokoll.log_llm_call(",
                    "+            request=f\"[entity_extraction_pass1] {len(valid_entities)} entities\",",
                    "+            response=json.dumps(valid_entities[:10], ensure_ascii=False),",
                    "+            model_name=f\"ollama:{model}\",",
                    "+            tokens_input=tok_in1,",
                    "+            tokens_output=tok_out1,",
                    "+            duration_ms=dur1,",
                    "+            status=\"completed\",",
                    "+        )",
                    "+",
                    "+        # PASS 2: Categorize entities",
                    "+        entities_json = json.dumps(valid_entities, ensure_ascii=False)",
                    "+        prompt2 = pass2_template.replace(\"{entities}\", entities_json)",
                    "+        resp2, tok_in2, tok_out2, dur2 = _call_ollama(prompt2, model)",
                    "+",
                    "+        try:",
                    "+            result2 = json.loads(resp2)",
                    "+            categorized = result2.get(\"kategorisiert\", [])",
                    "+        except json.JSONDecodeError:",
                    "+            db.log(\"WARNING\", \"Failed to parse Pass 2 JSON\")",
                    "+            # Fallback: return uncategorized entities",
                    "+            return [{\"name\": e, \"type\": \"CONCEPT\", \"description\": None} for e in valid_entities]",
                    "+",
                    "+        protokoll.log_llm_call(",
                    "+            request=f\"[entity_extraction_pass2] categorize {len(valid_entities)} entities\",",
                    "+            response=resp2[:1000],",
                    "+            model_name=f\"ollama:{model}\",",
                    "+            tokens_input=tok_in2,",
                    "+            tokens_output=tok_out2,",
                    "+            duration_ms=dur2,",
                    "+            status=\"completed\",",
                    "+        )",
                    "+",
                    "+        # Normalize output",
                    "+        entities = []",
                    "+        for e in categorized:",
                    "+            if isinstance(e, dict) and \"name\" in e and \"type\" in e:",
                    "+                # Final validation",
                    "+                if _validate_entity_in_text(e[\"name\"], text):",
                    "+                    entities.append({",
                    "+                        \"name\": e[\"name\"],",
                    "+                        \"type\": e[\"type\"].upper(),",
                    "+                        \"description\": e.get(\"description\"),",
                    "+                    })",
                    "+",
                    "+        return entities",
                    "+",
                    "+    except Exception as e:",
                    "+        db.log(\"ERROR\", f\"2-pass entity extraction failed: {e}\")",
                    "+        return []",
                    "+",
                    "+",
                    "+def _extract_entities_single_pass(text: str, model: str) -> list[dict]:",
                    "+    \"\"\"Single-pass entity extraction (legacy fallback).\"\"\"",
                    "     prompt_data = db.get_prompt_by_use_case(\"entity_extraction\")",
                    "     prompt_template = prompt_data[\"content\"] if prompt_data else None",
                    " "
                ]
            },
            {
                "oldStart": 136,
                "oldLines": 46,
                "newStart": 243,
                "newLines": 29,
                "lines": [
                    " Text:",
                    " {text}\"\"\"",
                    " ",
                    "-    # Support both {text} and {{TEXT}} placeholders",
                    "     prompt = prompt_template.replace(\"{text}\", text[:3000]).replace(\"{{TEXT}}\", text[:3000])",
                    " ",
                    "     try:",
                    "-        start_time = time.time()",
                    "-        response = requests.post(",
                    "-            f\"{OLLAMA_HOST}\/api\/generate\",",
                    "-            json={\"model\": model, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},",
                    "-            timeout=120,",
                    "-        )",
                    "-        response.raise_for_status()",
                    "-        data = response.json()",
                    "-        duration_ms = int((time.time() - start_time) * 1000)",
                    "+        resp, tok_in, tok_out, dur = _call_ollama(prompt, model)",
                    " ",
                    "-        response_text = data.get(\"response\", \"{}\")",
                    "-",
                    "         protokoll.log_llm_call(",
                    "             request=f\"[entity_extraction] {prompt[:500]}...\",",
                    "-            response=response_text[:2000],",
                    "+            response=resp[:2000],",
                    "             model_name=f\"ollama:{model}\",",
                    "-            tokens_input=data.get(\"prompt_eval_count\", 0),",
                    "-            tokens_output=data.get(\"eval_count\", 0),",
                    "-            duration_ms=duration_ms,",
                    "+            tokens_input=tok_in,",
                    "+            tokens_output=tok_out,",
                    "+            duration_ms=dur,",
                    "             status=\"completed\",",
                    "         )",
                    " ",
                    "         try:",
                    "-            result = json.loads(response_text)",
                    "+            result = json.loads(resp)",
                    "             return _normalize_entity_response(result, text)",
                    "         except json.JSONDecodeError:",
                    "             db.log(\"WARNING\", \"Failed to parse entity JSON from Ollama\")",
                    "             return []",
                    "     except Exception as e:",
                    "         db.log(\"ERROR\", f\"Ollama entity extraction failed: {e}\")",
                    "-        protokoll.log_llm_call(",
                    "-            request=f\"[entity_extraction] {prompt[:500]}...\",",
                    "-            model_name=f\"ollama:{model}\",",
                    "-            status=\"error\",",
                    "-            error_message=str(e),",
                    "-        )",
                    "         return []",
                    " ",
                    " "
                ]
            }
        ],
        "userModified": false,
        "replaceAll": false
    }
}
← Vorheriger Zur Liste Nächster →