Protokoll #15154

ID15154
Zeitstempel2025-12-25 14:02:47.454584
Clientroot
IP145.224.72.140
Modellclaude-sonnet-4-20250514
Statuscompleted
Tokens13,638 (Input: 461, Output: 13,177)
Dauer109 ms
Request-Zeit2025-12-25 14:02:47.454584
Response-Zeit2025-12-25 14:02:47.563971

Request

{
    "event": "PreToolUse",
    "tool_name": "Write",
    "tool_input": {
        "file_path": "\/var\/www\/scripts\/pipeline\/analyze.py",
        "content": "\"\"\"\nSemantic analysis for KI-System Pipeline.\nEntity extraction, taxonomy classification, ontology mapping.\n\nBACKWARD COMPATIBILITY WRAPPER - All logic moved to analyzers\/ module.\n\"\"\"\n\n# Re-export all functions from analyzers module for backward compatibility\nfrom analyzers import (\n    get_anthropic_client,\n    extract_entities_ollama,\n    extract_entities_anthropic,\n    store_entities,\n    find_entity_by_name,\n    extract_relations,\n    classify_taxonomy,\n    analyze_chunk_semantics,\n    analyze_chunks_semantics,\n    extract_statements,\n    store_statements,\n    analyze_chunk_statements,\n    analyze_document_statements,\n    analyze_document,\n    link_chunk_entities,\n)\n\n__all__ = [\n    \"get_anthropic_client\",\n    \"extract_entities_ollama\",\n    \"extract_entities_anthropic\",\n    \"store_entities\",\n    \"find_entity_by_name\",\n    \"extract_relations\",\n    \"classify_taxonomy\",\n    \"analyze_chunk_semantics\",\n    \"analyze_chunks_semantics\",\n    \"extract_statements\",\n    \"store_statements\",\n    \"analyze_chunk_statements\",\n    \"analyze_document_statements\",\n    \"analyze_document\",\n    \"link_chunk_entities\",\n]\n\nif __name__ == \"__main__\":\n    test_text = \"\"\"\n    Carl Rogers entwickelte die klientenzentrierte Gesprächstherapie.\n    Diese Methode basiert auf Empathie, Kongruenz und bedingungsloser Wertschätzung.\n    Sie wird heute weltweit in der Psychotherapie eingesetzt.\n    \"\"\"\n\n    print(\"Testing entity extraction with Ollama...\")\n    entities = extract_entities_ollama(test_text)\n    print(f\"Found {len(entities)} entities:\")\n    for e in entities:\n        print(f\"  - {e['name']} ({e['type']})\")\n"
    }
}

Response

{
    "tool_response": {
        "type": "update",
        "filePath": "\/var\/www\/scripts\/pipeline\/analyze.py",
        "content": "\"\"\"\nSemantic analysis for KI-System Pipeline.\nEntity extraction, taxonomy classification, ontology mapping.\n\nBACKWARD COMPATIBILITY WRAPPER - All logic moved to analyzers\/ module.\n\"\"\"\n\n# Re-export all functions from analyzers module for backward compatibility\nfrom analyzers import (\n    get_anthropic_client,\n    extract_entities_ollama,\n    extract_entities_anthropic,\n    store_entities,\n    find_entity_by_name,\n    extract_relations,\n    classify_taxonomy,\n    analyze_chunk_semantics,\n    analyze_chunks_semantics,\n    extract_statements,\n    store_statements,\n    analyze_chunk_statements,\n    analyze_document_statements,\n    analyze_document,\n    link_chunk_entities,\n)\n\n__all__ = [\n    \"get_anthropic_client\",\n    \"extract_entities_ollama\",\n    \"extract_entities_anthropic\",\n    \"store_entities\",\n    \"find_entity_by_name\",\n    \"extract_relations\",\n    \"classify_taxonomy\",\n    \"analyze_chunk_semantics\",\n    \"analyze_chunks_semantics\",\n    \"extract_statements\",\n    \"store_statements\",\n    \"analyze_chunk_statements\",\n    \"analyze_document_statements\",\n    \"analyze_document\",\n    \"link_chunk_entities\",\n]\n\nif __name__ == \"__main__\":\n    test_text = \"\"\"\n    Carl Rogers entwickelte die klientenzentrierte Gesprächstherapie.\n    Diese Methode basiert auf Empathie, Kongruenz und bedingungsloser Wertschätzung.\n    Sie wird heute weltweit in der Psychotherapie eingesetzt.\n    \"\"\"\n\n    print(\"Testing entity extraction with Ollama...\")\n    entities = extract_entities_ollama(test_text)\n    print(f\"Found {len(entities)} entities:\")\n    for e in entities:\n        print(f\"  - {e['name']} ({e['type']})\")\n",
        "structuredPatch": [
            {
                "oldStart": 1,
                "oldLines": 974,
                "newStart": 1,
                "newLines": 47,
                "lines": [
                    " \"\"\"",
                    "-Semantic analysis for KI-System Pipeline",
                    "+Semantic analysis for KI-System Pipeline.",
                    " Entity extraction, taxonomy classification, ontology mapping.",
                    "+",
                    "+BACKWARD COMPATIBILITY WRAPPER - All logic moved to analyzers\/ module.",
                    " \"\"\"",
                    " ",
                    "-import json",
                    "-import time",
                    "+# Re-export all functions from analyzers module for backward compatibility",
                    "+from analyzers import (",
                    "+    get_anthropic_client,",
                    "+    extract_entities_ollama,",
                    "+    extract_entities_anthropic,",
                    "+    store_entities,",
                    "+    find_entity_by_name,",
                    "+    extract_relations,",
                    "+    classify_taxonomy,",
                    "+    analyze_chunk_semantics,",
                    "+    analyze_chunks_semantics,",
                    "+    extract_statements,",
                    "+    store_statements,",
                    "+    analyze_chunk_statements,",
                    "+    analyze_document_statements,",
                    "+    analyze_document,",
                    "+    link_chunk_entities,",
                    "+)",
                    " ",
                    "-import requests",
                    "+__all__ = [",
                    "+    \"get_anthropic_client\",",
                    "+    \"extract_entities_ollama\",",
                    "+    \"extract_entities_anthropic\",",
                    "+    \"store_entities\",",
                    "+    \"find_entity_by_name\",",
                    "+    \"extract_relations\",",
                    "+    \"classify_taxonomy\",",
                    "+    \"analyze_chunk_semantics\",",
                    "+    \"analyze_chunks_semantics\",",
                    "+    \"extract_statements\",",
                    "+    \"store_statements\",",
                    "+    \"analyze_chunk_statements\",",
                    "+    \"analyze_document_statements\",",
                    "+    \"analyze_document\",",
                    "+    \"link_chunk_entities\",",
                    "+]",
                    " ",
                    "-from config import ANTHROPIC_API_KEY, ANTHROPIC_MODEL, OLLAMA_CHAT_MODEL, OLLAMA_HOST",
                    "-from db import db",
                    "-from protokoll import protokoll",
                    "-",
                    "-",
                    "-def get_anthropic_client():",
                    "-    \"\"\"Get Anthropic API client.\"\"\"",
                    "-    try:",
                    "-        import anthropic",
                    "-",
                    "-        if ANTHROPIC_API_KEY:",
                    "-            db.log(\"INFO\", \"Using Anthropic API (Claude)\")",
                    "-            return anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)",
                    "-        else:",
                    "-            db.log(\"WARNING\", \"No Anthropic API key found, falling back to Ollama\")",
                    "-    except ImportError:",
                    "-        db.log(\"WARNING\", \"anthropic package not installed, falling back to Ollama\")",
                    "-    return None",
                    "-",
                    "-",
                    "-def extract_entities_ollama(text, model=\"gemma3:27b-it-qat\"):",
                    "-    \"\"\"Extract entities using Ollama.\"\"\"",
                    "-    # Load prompt with versioning metadata",
                    "-    prompt_data = db.get_prompt_by_use_case(\"entity_extraction\")",
                    "-    prompt_template = prompt_data[\"content\"] if prompt_data else None",
                    "-",
                    "-    if not prompt_template:",
                    "-        db.log(\"WARNING\", \"entity_extraction prompt not found in DB, using fallback\")",
                    "-        prompt_template = \"\"\"Analysiere den Text und extrahiere wichtige Entitäten.",
                    "-Kategorisiere als: PERSON, ORGANIZATION, CONCEPT, LOCATION",
                    "-Antworte NUR im JSON-Format:",
                    "-{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"description\": \"...\"}]}",
                    "-",
                    "-Text:",
                    "-{{TEXT}}\"\"\"",
                    "-",
                    "-    prompt = prompt_template.replace(\"{{TEXT}}\", text[:3000])",
                    "-",
                    "-    try:",
                    "-        start_time = time.time()",
                    "-        response = requests.post(",
                    "-            f\"{OLLAMA_HOST}\/api\/generate\",",
                    "-            json={\"model\": model, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},",
                    "-            timeout=120,",
                    "-        )",
                    "-        response.raise_for_status()",
                    "-        data = response.json()",
                    "-        duration_ms = int((time.time() - start_time) * 1000)",
                    "-",
                    "-        # Parse JSON from response",
                    "-        response_text = data.get(\"response\", \"{}\")",
                    "-",
                    "-        # Log to ki-protokoll",
                    "-        protokoll.log_llm_call(",
                    "-            request=f\"[entity_extraction] {prompt[:500]}...\",",
                    "-            response=response_text[:2000],",
                    "-            model_name=f\"ollama:{model}\",",
                    "-            tokens_input=data.get(\"prompt_eval_count\", 0),",
                    "-            tokens_output=data.get(\"eval_count\", 0),",
                    "-            duration_ms=duration_ms,",
                    "-            status=\"completed\",",
                    "-        )",
                    "-",
                    "-        try:",
                    "-            entities = json.loads(response_text)",
                    "-            return entities.get(\"entities\", [])",
                    "-        except json.JSONDecodeError:",
                    "-            db.log(\"WARNING\", \"Failed to parse entity JSON from Ollama\")",
                    "-            return []",
                    "-    except Exception as e:",
                    "-        db.log(\"ERROR\", f\"Ollama entity extraction failed: {e}\")",
                    "-        protokoll.log_llm_call(",
                    "-            request=f\"[entity_extraction] {prompt[:500]}...\",",
                    "-            model_name=f\"ollama:{model}\",",
                    "-            status=\"error\",",
                    "-            error_message=str(e),",
                    "-        )",
                    "-        return []",
                    "-",
                    "-",
                    "-def extract_entities_anthropic(text, client):",
                    "-    \"\"\"Extract entities using Anthropic Claude.\"\"\"",
                    "-    # Get prompt with versioning metadata",
                    "-    prompt_data = db.get_prompt_by_use_case(\"entity_extraction\")",
                    "-    prompt_template = prompt_data[\"content\"] if prompt_data else None",
                    "-",
                    "-    if not prompt_template:",
                    "-        prompt_template = \"\"\"Analysiere den folgenden deutschen Text und extrahiere alle wichtigen Entitäten.",
                    "-",
                    "-Kategorisiere jede Entität als:",
                    "-- PERSON (Namen von Personen)",
                    "-- ORGANIZATION (Firmen, Institutionen, Gruppen)",
                    "-- CONCEPT (Fachbegriffe, Methoden, Theorien)",
                    "-- LOCATION (Orte, Länder)",
                    "-- DATE (Zeitangaben)",
                    "-- OTHER (Sonstiges)",
                    "-",
                    "-Antworte NUR im JSON-Format:",
                    "-{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"context\": \"kurzer Kontext der Erwähnung\"}]}",
                    "-",
                    "-Text:",
                    "-{{TEXT}}\"\"\"",
                    "-",
                    "-    prompt = prompt_template.replace(\"{{TEXT}}\", text[:4000])",
                    "-",
                    "-    try:",
                    "-        start_time = time.time()",
                    "-        message = client.messages.create(",
                    "-            model=ANTHROPIC_MODEL, max_tokens=2000, messages=[{\"role\": \"user\", \"content\": prompt}]",
                    "-        )",
                    "-        duration_ms = int((time.time() - start_time) * 1000)",
                    "-",
                    "-        response_text = message.content[0].text",
                    "-",
                    "-        # Log to ki-protokoll",
                    "-        protokoll.log_llm_call(",
                    "-            request=f\"[entity_extraction] {prompt[:500]}...\",",
                    "-            response=response_text[:2000],",
                    "-            model_name=ANTHROPIC_MODEL,",
                    "-            tokens_input=message.usage.input_tokens,",
                    "-            tokens_output=message.usage.output_tokens,",
                    "-            duration_ms=duration_ms,",
                    "-            status=\"completed\",",
                    "-        )",
                    "-",
                    "-        # Extract JSON from response",
                    "-        import re",
                    "-",
                    "-        json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)",
                    "-        if json_match:",
                    "-            entities = json.loads(json_match.group())",
                    "-            return entities.get(\"entities\", [])",
                    "-        return []",
                    "-    except Exception as e:",
                    "-        db.log(\"ERROR\", f\"Anthropic entity extraction failed: {e}\")",
                    "-        protokoll.log_llm_call(",
                    "-            request=f\"[entity_extraction] {prompt[:500]}...\",",
                    "-            model_name=ANTHROPIC_MODEL,",
                    "-            status=\"error\",",
                    "-            error_message=str(e),",
                    "-        )",
                    "-        return []",
                    "-",
                    "-",
                    "-def extract_relations(text, entities, client=None):",
                    "-    \"\"\"Extract relations between entities.\"\"\"",
                    "-    if not entities or len(entities) < 2:",
                    "-        return []",
                    "-",
                    "-    entity_names = [e[\"name\"] for e in entities[:20]]",
                    "-",
                    "-    # Load prompt from database",
                    "-    prompt_template = db.get_prompt(\"relation_extraction\")",
                    "-",
                    "-    if not prompt_template:",
                    "-        db.log(\"WARNING\", \"relation_extraction prompt not found in DB, using fallback\")",
                    "-        prompt_template = \"\"\"Identifiziere Beziehungen zwischen Entitäten.",
                    "-Entitäten: {{ENTITIES}}",
                    "-Beziehungstypen: DEVELOPED_BY, RELATED_TO, PART_OF, USED_IN, BASED_ON",
                    "-Antworte NUR im JSON-Format:",
                    "-{\"relations\": [{\"source\": \"...\", \"relation\": \"...\", \"target\": \"...\"}]}",
                    "-",
                    "-Text:",
                    "-{{TEXT}}\"\"\"",
                    "-",
                    "-    prompt = prompt_template.replace(\"{{ENTITIES}}\", \", \".join(entity_names))",
                    "-    prompt = prompt.replace(\"{{TEXT}}\", text[:3000])",
                    "-",
                    "-    try:",
                    "-        start_time = time.time()",
                    "-        tokens_in, tokens_out = 0, 0",
                    "-        model_name = \"\"",
                    "-",
                    "-        if client:",
                    "-            message = client.messages.create(",
                    "-                model=ANTHROPIC_MODEL, max_tokens=1000, messages=[{\"role\": \"user\", \"content\": prompt}]",
                    "-            )",
                    "-            response_text = message.content[0].text",
                    "-            tokens_in = message.usage.input_tokens",
                    "-            tokens_out = message.usage.output_tokens",
                    "-            model_name = ANTHROPIC_MODEL",
                    "-        else:",
                    "-            response = requests.post(",
                    "-                f\"{OLLAMA_HOST}\/api\/generate\",",
                    "-                json={\"model\": OLLAMA_CHAT_MODEL, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},",
                    "-                timeout=120,",
                    "-            )",
                    "-            response.raise_for_status()",
                    "-            data = response.json()",
                    "-            response_text = data.get(\"response\", \"{}\")",
                    "-            tokens_in = data.get(\"prompt_eval_count\", 0)",
                    "-            tokens_out = data.get(\"eval_count\", 0)",
                    "-            model_name = f\"ollama:{OLLAMA_CHAT_MODEL}\"",
                    "-",
                    "-        duration_ms = int((time.time() - start_time) * 1000)",
                    "-",
                    "-        # Log to ki-protokoll",
                    "-        protokoll.log_llm_call(",
                    "-            request=f\"[relation_extraction] {prompt[:500]}...\",",
                    "-            response=response_text[:2000],",
                    "-            model_name=model_name,",
                    "-            tokens_input=tokens_in,",
                    "-            tokens_output=tokens_out,",
                    "-            duration_ms=duration_ms,",
                    "-            status=\"completed\",",
                    "-        )",
                    "-",
                    "-        import re",
                    "-",
                    "-        json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)",
                    "-        if json_match:",
                    "-            data = json.loads(json_match.group())",
                    "-            return data.get(\"relations\", [])",
                    "-        return []",
                    "-    except Exception as e:",
                    "-        db.log(\"ERROR\", f\"Relation extraction failed: {e}\")",
                    "-        protokoll.log_llm_call(",
                    "-            request=f\"[relation_extraction] {prompt[:500]}...\",",
                    "-            model_name=ANTHROPIC_MODEL if client else f\"ollama:{OLLAMA_CHAT_MODEL}\",",
                    "-            status=\"error\",",
                    "-            error_message=str(e),",
                    "-        )",
                    "-        return []",
                    "-",
                    "-",
                    "-def classify_taxonomy(text, client=None):",
                    "-    \"\"\"Classify text into taxonomy categories.\"\"\"",
                    "-    prompt_template = db.get_prompt(\"taxonomy_classification\")",
                    "-",
                    "-    if not prompt_template:",
                    "-        prompt_template = \"\"\"Klassifiziere den folgenden Text in passende Kategorien.",
                    "-",
                    "-Wähle aus diesen Hauptkategorien:",
                    "-- Methoden (Therapiemethoden, Techniken)",
                    "-- Theorie (Konzepte, Modelle, Grundlagen)",
                    "-- Praxis (Anwendung, Fallbeispiele)",
                    "-- Organisation (Strukturen, Prozesse)",
                    "-- Kommunikation (Gesprächsführung, Interaktion)",
                    "-- Entwicklung (Persönliche Entwicklung, Veränderung)",
                    "-",
                    "-Antworte NUR im JSON-Format:",
                    "-{\"categories\": [\"...\", \"...\"], \"confidence\": 0.0-1.0}",
                    "-",
                    "-Text:",
                    "-{{TEXT}}\"\"\"",
                    "-",
                    "-    prompt = prompt_template.replace(\"{{TEXT}}\", text[:2000])",
                    "-",
                    "-    try:",
                    "-        start_time = time.time()",
                    "-        tokens_in, tokens_out = 0, 0",
                    "-        model_name = \"\"",
                    "-",
                    "-        if client:",
                    "-            message = client.messages.create(",
                    "-                model=ANTHROPIC_MODEL, max_tokens=500, messages=[{\"role\": \"user\", \"content\": prompt}]",
                    "-            )",
                    "-            response_text = message.content[0].text",
                    "-            tokens_in = message.usage.input_tokens",
                    "-            tokens_out = message.usage.output_tokens",
                    "-            model_name = ANTHROPIC_MODEL",
                    "-        else:",
                    "-            response = requests.post(",
                    "-                f\"{OLLAMA_HOST}\/api\/generate\",",
                    "-                json={\"model\": OLLAMA_CHAT_MODEL, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},",
                    "-                timeout=60,",
                    "-            )",
                    "-            response.raise_for_status()",
                    "-            data = response.json()",
                    "-            response_text = data.get(\"response\", \"{}\")",
                    "-            tokens_in = data.get(\"prompt_eval_count\", 0)",
                    "-            tokens_out = data.get(\"eval_count\", 0)",
                    "-            model_name = f\"ollama:{OLLAMA_CHAT_MODEL}\"",
                    "-",
                    "-        duration_ms = int((time.time() - start_time) * 1000)",
                    "-",
                    "-        # Log to ki-protokoll",
                    "-        protokoll.log_llm_call(",
                    "-            request=f\"[taxonomy_classification] {prompt[:500]}...\",",
                    "-            response=response_text[:2000],",
                    "-            model_name=model_name,",
                    "-            tokens_input=tokens_in,",
                    "-            tokens_output=tokens_out,",
                    "-            duration_ms=duration_ms,",
                    "-            status=\"completed\",",
                    "-        )",
                    "-",
                    "-        import re",
                    "-",
                    "-        json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)",
                    "-        if json_match:",
                    "-            return json.loads(json_match.group())",
                    "-        return {\"categories\": [], \"confidence\": 0}",
                    "-    except Exception as e:",
                    "-        db.log(\"ERROR\", f\"Taxonomy classification failed: {e}\")",
                    "-        protokoll.log_llm_call(",
                    "-            request=f\"[taxonomy_classification] {prompt[:500]}...\",",
                    "-            model_name=ANTHROPIC_MODEL if client else f\"ollama:{OLLAMA_CHAT_MODEL}\",",
                    "-            status=\"error\",",
                    "-            error_message=str(e),",
                    "-        )",
                    "-        return {\"categories\": [], \"confidence\": 0}",
                    "-",
                    "-",
                    "-def store_entities(document_id, entities):",
                    "-    \"\"\"Store extracted entities in database.\"\"\"",
                    "-    stored = 0",
                    "-",
                    "-    for entity in entities:",
                    "-        try:",
                    "-            # Get description from entity",
                    "-            description = entity.get(\"description\") or entity.get(\"context\") or None",
                    "-",
                    "-            # Check if entity already exists",
                    "-            cursor = db.execute(",
                    "-                \"SELECT id, description FROM entities WHERE name = %s AND type = %s\", (entity[\"name\"], entity[\"type\"])",
                    "-            )",
                    "-            existing = cursor.fetchone()",
                    "-            cursor.close()",
                    "-",
                    "-            if existing:",
                    "-                entity_id = existing[\"id\"]",
                    "-                # Update description if it was empty and we have one now",
                    "-                if description and not existing[\"description\"]:",
                    "-                    cursor = db.execute(\"UPDATE entities SET description = %s WHERE id = %s\", (description, entity_id))",
                    "-                    db.commit()",
                    "-                    cursor.close()",
                    "-            else:",
                    "-                cursor = db.execute(",
                    "-                    \"\"\"INSERT INTO entities (name, type, description, created_at)",
                    "-                       VALUES (%s, %s, %s, NOW())\"\"\",",
                    "-                    (entity[\"name\"], entity[\"type\"], description),",
                    "-                )",
                    "-                db.commit()",
                    "-                entity_id = cursor.lastrowid",
                    "-                cursor.close()",
                    "-",
                    "-                # Log provenance for new entity",
                    "-                db.log_provenance(",
                    "-                    artifact_type=\"entity\",",
                    "-                    artifact_id=entity_id,",
                    "-                    source_type=\"extraction\",",
                    "-                    source_id=document_id,",
                    "-                    pipeline_step=\"entity_extract\",",
                    "-                )",
                    "-",
                    "-            stored += 1",
                    "-",
                    "-        except Exception as e:",
                    "-            db.log(\"WARNING\", f\"Failed to store entity: {e}\")",
                    "-",
                    "-    return stored",
                    "-",
                    "-",
                    "-def analyze_document(document_id, text, use_anthropic=True, progress=None):",
                    "-    \"\"\"",
                    "-    Full semantic analysis of a document.",
                    "-    Extracts entities, relations, and taxonomy classification.",
                    "-    \"\"\"",
                    "-    db.log(\"INFO\", f\"Starting semantic analysis for document {document_id}\")",
                    "-",
                    "-    if progress:",
                    "-        progress.add_log(\"Analyse: Starte Entity-Extraktion...\")",
                    "-",
                    "-    client = get_anthropic_client() if use_anthropic else None",
                    "-",
                    "-    # Extract entities",
                    "-    if client:",
                    "-        entities = extract_entities_anthropic(text, client)",
                    "-    else:",
                    "-        entities = extract_entities_ollama(text)",
                    "-",
                    "-    db.log(\"INFO\", f\"Extracted {len(entities)} entities\")",
                    "-    if progress:",
                    "-        progress.add_log(f\"Analyse: {len(entities)} Entitäten extrahiert\")",
                    "-",
                    "-    # Store entities",
                    "-    if entities:",
                    "-        stored = store_entities(document_id, entities)",
                    "-        db.log(\"INFO\", f\"Stored {stored} entities\")",
                    "-        if progress:",
                    "-            progress.add_log(f\"Analyse: {stored} Entitäten gespeichert\")",
                    "-",
                    "-        # Extract relations",
                    "-        if progress:",
                    "-            progress.add_log(\"Analyse: Extrahiere Relationen...\")",
                    "-        relations = extract_relations(text, entities, client)",
                    "-        db.log(\"INFO\", f\"Extracted {len(relations)} relations\")",
                    "-        if progress:",
                    "-            progress.add_log(f\"Analyse: {len(relations)} Relationen extrahiert\")",
                    "-",
                    "-        # Store relations",
                    "-        for rel in relations:",
                    "-            try:",
                    "-                # Find entity IDs",
                    "-                cursor = db.execute(\"SELECT id FROM entities WHERE name = %s LIMIT 1\", (rel[\"source\"],))",
                    "-                source = cursor.fetchone()",
                    "-                cursor.close()",
                    "-",
                    "-                cursor = db.execute(\"SELECT id FROM entities WHERE name = %s LIMIT 1\", (rel[\"target\"],))",
                    "-                target = cursor.fetchone()",
                    "-                cursor.close()",
                    "-",
                    "-                if source and target:",
                    "-                    # Store in entity_relations (raw pipeline data)",
                    "-                    cursor = db.execute(",
                    "-                        \"\"\"INSERT IGNORE INTO entity_relations",
                    "-                           (source_entity_id, target_entity_id, relation_type, created_at)",
                    "-                           VALUES (%s, %s, %s, NOW())\"\"\",",
                    "-                        (source[\"id\"], target[\"id\"], rel[\"relation\"]),",
                    "-                    )",
                    "-                    db.commit()",
                    "-                    cursor.close()",
                    "-",
                    "-                    # Store in entity_ontology (for semantic explorer)",
                    "-                    cursor = db.execute(",
                    "-                        \"\"\"INSERT IGNORE INTO entity_ontology",
                    "-                           (source_entity_id, target_entity_id, relation_type, direction,",
                    "-                            strength, source_type, source_id, created_at)",
                    "-                           VALUES (%s, %s, %s, 'unidirectional', 1.0, 'document', %s, NOW())\"\"\",",
                    "-                        (source[\"id\"], target[\"id\"], rel[\"relation\"], document_id),",
                    "-                    )",
                    "-                    db.commit()",
                    "-                    cursor.close()",
                    "-            except Exception as e:",
                    "-                db.log(\"WARNING\", f\"Failed to store relation: {e}\")",
                    "-",
                    "-    # Taxonomy classification",
                    "-    if progress:",
                    "-        progress.add_log(\"Analyse: Klassifiziere Taxonomie...\")",
                    "-    taxonomy = classify_taxonomy(text, client)",
                    "-    db.log(\"INFO\", f\"Classified into {len(taxonomy.get('categories', []))} categories\")",
                    "-    if progress:",
                    "-        progress.add_log(f\"Analyse: {len(taxonomy.get('categories', []))} Kategorien zugewiesen\")",
                    "-",
                    "-    # Store taxonomy assignments",
                    "-    for category in taxonomy.get(\"categories\", []):",
                    "-        try:",
                    "-            # Strip number prefix like \"1. \" or \"2. \" from category name",
                    "-            import re",
                    "-",
                    "-            clean_category = re.sub(r\"^\\d+\\.\\s*\", \"\", category).strip()",
                    "-",
                    "-            cursor = db.execute(\"SELECT id FROM taxonomy_terms WHERE name = %s LIMIT 1\", (clean_category,))",
                    "-            term = cursor.fetchone()",
                    "-            cursor.close()",
                    "-",
                    "-            if term:",
                    "-                cursor = db.execute(",
                    "-                    \"\"\"INSERT IGNORE INTO document_taxonomy",
                    "-                       (document_id, taxonomy_term_id, confidence, created_at)",
                    "-                       VALUES (%s, %s, %s, NOW())\"\"\",",
                    "-                    (document_id, term[\"id\"], taxonomy.get(\"confidence\", 0.5)),",
                    "-                )",
                    "-                db.commit()",
                    "-                cursor.close()",
                    "-        except Exception as e:",
                    "-            db.log(\"WARNING\", f\"Failed to store taxonomy: {e}\")",
                    "-",
                    "-    # Link entities to chunks",
                    "-    chunk_entity_links = 0",
                    "-    if entities:",
                    "-        chunk_entity_links = link_chunk_entities(document_id)",
                    "-        db.log(\"INFO\", f\"Created {chunk_entity_links} chunk-entity links\")",
                    "-",
                    "-    # Analyze chunk semantics",
                    "-    chunks_analyzed = analyze_chunks_semantics(document_id, client, progress)",
                    "-    db.log(\"INFO\", f\"Chunk semantics: {chunks_analyzed} chunks analyzed\")",
                    "-",
                    "-    return {",
                    "-        \"entities\": len(entities),",
                    "-        \"relations\": len(relations) if entities else 0,",
                    "-        \"categories\": taxonomy.get(\"categories\", []),",
                    "-        \"chunk_entity_links\": chunk_entity_links,",
                    "-        \"chunks_semantics\": chunks_analyzed,",
                    "-    }",
                    "-",
                    "-",
                    "-def link_chunk_entities(document_id):",
                    "-    \"\"\"",
                    "-    Link entities to their source chunks.",
                    "-    Scans each chunk for entity mentions and populates chunk_entities.",
                    "-    \"\"\"",
                    "-    # Get all entities (we check which appear in chunks)",
                    "-    cursor = db.execute(\"SELECT id, name, canonical_name FROM entities\")",
                    "-    entities = cursor.fetchall()",
                    "-    cursor.close()",
                    "-",
                    "-    if not entities:",
                    "-        db.log(\"INFO\", f\"No entities to link for document {document_id}\")",
                    "-        return 0",
                    "-",
                    "-    # Get all chunks for this document",
                    "-    cursor = db.execute(\"SELECT id, content FROM chunks WHERE document_id = %s\", (document_id,))",
                    "-    chunks = cursor.fetchall()",
                    "-    cursor.close()",
                    "-",
                    "-    linked = 0",
                    "-    for chunk in chunks:",
                    "-        chunk_text = chunk[\"content\"].lower()",
                    "-",
                    "-        for entity in entities:",
                    "-            # Check if entity name appears in chunk",
                    "-            name_lower = entity[\"name\"].lower()",
                    "-            canonical = (entity[\"canonical_name\"] or \"\").lower()",
                    "-",
                    "-            mention_count = chunk_text.count(name_lower)",
                    "-            if canonical and canonical != name_lower:",
                    "-                mention_count += chunk_text.count(canonical)",
                    "-",
                    "-            if mention_count > 0:",
                    "-                # Calculate relevance score (0-1 based on mentions)",
                    "-                relevance = min(1.0, mention_count * 0.2)",
                    "-",
                    "-                try:",
                    "-                    cursor = db.execute(",
                    "-                        \"\"\"INSERT INTO chunk_entities",
                    "-                           (chunk_id, entity_id, relevance_score, mention_count)",
                    "-                           VALUES (%s, %s, %s, %s)",
                    "-                           ON DUPLICATE KEY UPDATE",
                    "-                           relevance_score = VALUES(relevance_score),",
                    "-                           mention_count = VALUES(mention_count)\"\"\",",
                    "-                        (chunk[\"id\"], entity[\"id\"], relevance, mention_count),",
                    "-                    )",
                    "-                    db.commit()",
                    "-                    cursor.close()",
                    "-                    linked += 1",
                    "-                except Exception as e:",
                    "-                    db.log(\"WARNING\", f\"Failed to link chunk {chunk['id']} to entity {entity['id']}: {e}\")",
                    "-",
                    "-    db.log(\"INFO\", f\"Linked {linked} chunk-entity pairs for document {document_id}\")",
                    "-    return linked",
                    "-",
                    "-",
                    "-def analyze_chunk_semantics(chunk_id, content, client=None):",
                    "-    \"\"\"",
                    "-    Analyze a single chunk for semantics (summary, keywords, sentiment, topics).",
                    "-    Stores result in chunk_semantics table.",
                    "-    \"\"\"",
                    "-    prompt_template = db.get_prompt(\"chunk_semantics\")",
                    "-",
                    "-    if not prompt_template:",
                    "-        prompt_template = \"\"\"Analysiere diesen Textabschnitt und extrahiere:",
                    "-",
                    "-1. **summary**: Eine kurze Zusammenfassung (1-2 Sätze)",
                    "-2. **keywords**: 3-5 wichtige Schlüsselwörter",
                    "-3. **sentiment**: Stimmung (positive, negative, neutral, mixed)",
                    "-4. **topics**: 2-3 Hauptthemen",
                    "-",
                    "-Antworte NUR im JSON-Format:",
                    "-{\"summary\": \"...\", \"keywords\": [\"...\", \"...\"], \"sentiment\": \"neutral\", \"topics\": [\"...\", \"...\"]}",
                    "-",
                    "-Text:",
                    "-{{TEXT}}\"\"\"",
                    "-",
                    "-    prompt = prompt_template.replace(\"{{TEXT}}\", content[:2000])",
                    "-",
                    "-    try:",
                    "-        start_time = time.time()",
                    "-        tokens_in, tokens_out = 0, 0",
                    "-        model_name = \"\"",
                    "-",
                    "-        if client:",
                    "-            message = client.messages.create(",
                    "-                model=ANTHROPIC_MODEL, max_tokens=500, messages=[{\"role\": \"user\", \"content\": prompt}]",
                    "-            )",
                    "-            response_text = message.content[0].text",
                    "-            tokens_in = message.usage.input_tokens",
                    "-            tokens_out = message.usage.output_tokens",
                    "-            model_name = ANTHROPIC_MODEL",
                    "-        else:",
                    "-            response = requests.post(",
                    "-                f\"{OLLAMA_HOST}\/api\/generate\",",
                    "-                json={\"model\": OLLAMA_CHAT_MODEL, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},",
                    "-                timeout=60,",
                    "-            )",
                    "-            response.raise_for_status()",
                    "-            data = response.json()",
                    "-            response_text = data.get(\"response\", \"{}\")",
                    "-            tokens_in = data.get(\"prompt_eval_count\", 0)",
                    "-            tokens_out = data.get(\"eval_count\", 0)",
                    "-            model_name = f\"ollama:{OLLAMA_CHAT_MODEL}\"",
                    "-",
                    "-        duration_ms = int((time.time() - start_time) * 1000)",
                    "-",
                    "-        # Log to ki-protokoll",
                    "-        protokoll.log_llm_call(",
                    "-            request=f\"[chunk_semantics] chunk_id={chunk_id}\",",
                    "-            response=response_text[:1000],",
                    "-            model_name=model_name,",
                    "-            tokens_input=tokens_in,",
                    "-            tokens_output=tokens_out,",
                    "-            duration_ms=duration_ms,",
                    "-            status=\"completed\",",
                    "-        )",
                    "-",
                    "-        # Parse JSON",
                    "-        import re",
                    "-",
                    "-        json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)",
                    "-        if json_match:",
                    "-            result = json.loads(json_match.group())",
                    "-",
                    "-            # Store in chunk_semantics",
                    "-            cursor = db.execute(",
                    "-                \"\"\"INSERT INTO chunk_semantics",
                    "-                   (chunk_id, summary, keywords, sentiment, topics, language, analyzed_at, analysis_model)",
                    "-                   VALUES (%s, %s, %s, %s, %s, 'de', NOW(), %s)",
                    "-                   ON DUPLICATE KEY UPDATE",
                    "-                   summary = VALUES(summary), keywords = VALUES(keywords),",
                    "-                   sentiment = VALUES(sentiment), topics = VALUES(topics),",
                    "-                   analyzed_at = NOW(), analysis_model = VALUES(analysis_model)\"\"\",",
                    "-                (",
                    "-                    chunk_id,",
                    "-                    result.get(\"summary\", \"\"),",
                    "-                    json.dumps(result.get(\"keywords\", []), ensure_ascii=False),",
                    "-                    result.get(\"sentiment\", \"neutral\"),",
                    "-                    json.dumps(result.get(\"topics\", []), ensure_ascii=False),",
                    "-                    model_name,",
                    "-                ),",
                    "-            )",
                    "-            db.commit()",
                    "-            cursor.close()",
                    "-            return result",
                    "-",
                    "-        return None",
                    "-",
                    "-    except Exception as e:",
                    "-        db.log(\"ERROR\", f\"Chunk semantics analysis failed: {e}\")",
                    "-        return None",
                    "-",
                    "-",
                    "-def analyze_chunks_semantics(document_id, client=None, progress=None):",
                    "-    \"\"\"Analyze all chunks of a document for semantics.\"\"\"",
                    "-    cursor = db.execute(\"SELECT id, content FROM chunks WHERE document_id = %s ORDER BY chunk_index\", (document_id,))",
                    "-    chunks = cursor.fetchall()",
                    "-    cursor.close()",
                    "-",
                    "-    total = len(chunks)",
                    "-    db.log(\"INFO\", f\"Analyzing semantics for {total} chunks\")",
                    "-    if progress:",
                    "-        progress.add_log(f\"Semantik: Analysiere {total} Chunks...\")",
                    "-",
                    "-    analyzed = 0",
                    "-    for i, chunk in enumerate(chunks, 1):",
                    "-        if progress:",
                    "-            progress.add_log(f\"Semantik: Chunk {i}\/{total}...\")",
                    "-",
                    "-        result = analyze_chunk_semantics(chunk[\"id\"], chunk[\"content\"], client)",
                    "-        if result:",
                    "-            analyzed += 1",
                    "-",
                    "-    db.log(\"INFO\", f\"Analyzed {analyzed}\/{total} chunks for semantics\")",
                    "-    if progress:",
                    "-        progress.add_log(f\"Semantik: {analyzed}\/{total} Chunks analysiert\")",
                    "-    return analyzed",
                    "-",
                    "-",
                    "-def extract_statements(chunk_id: int, text: str, client=None) -> list[dict]:",
                    "-    \"\"\"",
                    "-    Extract SPO-triplets (Subject-Predicate-Object statements) from text.",
                    "-",
                    "-    Args:",
                    "-        chunk_id: ID of the chunk being analyzed",
                    "-        text: Text content to extract statements from",
                    "-        client: Optional Anthropic client (falls back to Ollama if None)",
                    "-",
                    "-    Returns:",
                    "-        List of extracted statements with entity linking",
                    "-    \"\"\"",
                    "-    # Get prompt with versioning metadata",
                    "-    prompt_data = db.get_prompt_by_use_case(\"statement_extraction\")",
                    "-    prompt_template = prompt_data[\"content\"] if prompt_data else None",
                    "-    prompt_id = prompt_data[\"id\"] if prompt_data else None",
                    "-    prompt_version = prompt_data[\"version\"] if prompt_data else None",
                    "-",
                    "-    if not prompt_template:",
                    "-        db.log(\"WARNING\", \"statement_extraction prompt not found in DB, using fallback\")",
                    "-        prompt_template = \"\"\"Extrahiere alle faktischen Aussagen aus dem Text als SPO-Tripel.",
                    "-",
                    "-Regeln:",
                    "-- Subject: Eine benannte Entität (Person, Organisation, Konzept, Methode)",
                    "-- Predicate: Die Beziehung oder Eigenschaft (z.B. \"entwickelte\", \"basiert auf\", \"ist Teil von\")",
                    "-- Object: Eine Entität oder ein Literal-Wert",
                    "-",
                    "-Antworte NUR im JSON-Format:",
                    "-{\"statements\": [",
                    "-  {\"subject\": \"Name der Subject-Entität\", \"predicate\": \"Beziehung\", \"object\": \"Name oder Wert\", \"confidence\": 0.0-1.0}",
                    "-]}",
                    "-",
                    "-Text:",
                    "-{{TEXT}}\"\"\"",
                    "-",
                    "-    prompt = prompt_template.replace(\"{{TEXT}}\", text[:3000])",
                    "-",
                    "-    try:",
                    "-        start_time = time.time()",
                    "-        tokens_in, tokens_out = 0, 0",
                    "-        model_name = \"\"",
                    "-",
                    "-        if client:",
                    "-            message = client.messages.create(",
                    "-                model=ANTHROPIC_MODEL,",
                    "-                max_tokens=1500,",
                    "-                messages=[{\"role\": \"user\", \"content\": prompt}],",
                    "-            )",
                    "-            response_text = message.content[0].text",
                    "-            tokens_in = message.usage.input_tokens",
                    "-            tokens_out = message.usage.output_tokens",
                    "-            model_name = ANTHROPIC_MODEL",
                    "-        else:",
                    "-            response = requests.post(",
                    "-                f\"{OLLAMA_HOST}\/api\/generate\",",
                    "-                json={",
                    "-                    \"model\": OLLAMA_CHAT_MODEL,",
                    "-                    \"prompt\": prompt,",
                    "-                    \"stream\": False,",
                    "-                    \"format\": \"json\",",
                    "-                },",
                    "-                timeout=120,",
                    "-            )",
                    "-            response.raise_for_status()",
                    "-            data = response.json()",
                    "-            response_text = data.get(\"response\", \"{}\")",
                    "-            tokens_in = data.get(\"prompt_eval_count\", 0)",
                    "-            tokens_out = data.get(\"eval_count\", 0)",
                    "-            model_name = f\"ollama:{OLLAMA_CHAT_MODEL}\"",
                    "-",
                    "-        duration_ms = int((time.time() - start_time) * 1000)",
                    "-",
                    "-        # Log to ki-protokoll",
                    "-        protokoll.log_llm_call(",
                    "-            request=f\"[statement_extraction] chunk_id={chunk_id}\",",
                    "-            response=response_text[:2000],",
                    "-            model_name=model_name,",
                    "-            tokens_input=tokens_in,",
                    "-            tokens_output=tokens_out,",
                    "-            duration_ms=duration_ms,",
                    "-            status=\"completed\",",
                    "-        )",
                    "-",
                    "-        # Parse JSON",
                    "-        import re",
                    "-",
                    "-        json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)",
                    "-        if json_match:",
                    "-            result = json.loads(json_match.group())",
                    "-            return {",
                    "-                \"statements\": result.get(\"statements\", []),",
                    "-                \"prompt_id\": prompt_id,",
                    "-                \"prompt_version\": prompt_version,",
                    "-                \"model_used\": model_name,",
                    "-            }",
                    "-        return {\"statements\": [], \"prompt_id\": prompt_id, \"prompt_version\": prompt_version}",
                    "-",
                    "-    except Exception as e:",
                    "-        db.log(\"ERROR\", f\"Statement extraction failed for chunk {chunk_id}: {e}\")",
                    "-        protokoll.log_llm_call(",
                    "-            request=f\"[statement_extraction] chunk_id={chunk_id}\",",
                    "-            model_name=ANTHROPIC_MODEL if client else f\"ollama:{OLLAMA_CHAT_MODEL}\",",
                    "-            status=\"error\",",
                    "-            error_message=str(e),",
                    "-        )",
                    "-        return {\"statements\": [], \"prompt_id\": prompt_id, \"prompt_version\": prompt_version}",
                    "-",
                    "-",
                    "-def find_entity_by_name(name: str) -> dict | None:",
                    "-    \"\"\"",
                    "-    Find entity by name or canonical_name.",
                    "-",
                    "-    Args:",
                    "-        name: Entity name to search for",
                    "-",
                    "-    Returns:",
                    "-        Entity dict with id, name, canonical_name or None",
                    "-    \"\"\"",
                    "-    name_lower = name.lower().strip()",
                    "-    canonical = name_lower.replace(\" \", \"\").replace(\"-\", \"\")",
                    "-",
                    "-    # Try exact canonical match first",
                    "-    cursor = db.execute(",
                    "-        \"SELECT id, name, canonical_name FROM entities WHERE canonical_name = %s LIMIT 1\",",
                    "-        (canonical,),",
                    "-    )",
                    "-    result = cursor.fetchone()",
                    "-    cursor.close()",
                    "-    if result:",
                    "-        return result",
                    "-",
                    "-    # Try name match (case-insensitive)",
                    "-    cursor = db.execute(",
                    "-        \"SELECT id, name, canonical_name FROM entities WHERE LOWER(name) = %s LIMIT 1\",",
                    "-        (name_lower,),",
                    "-    )",
                    "-    result = cursor.fetchone()",
                    "-    cursor.close()",
                    "-    if result:",
                    "-        return result",
                    "-",
                    "-    # Try partial canonical match",
                    "-    cursor = db.execute(",
                    "-        \"SELECT id, name, canonical_name FROM entities WHERE canonical_name LIKE %s LIMIT 1\",",
                    "-        (f\"%{canonical}%\",),",
                    "-    )",
                    "-    result = cursor.fetchone()",
                    "-    cursor.close()",
                    "-    return result",
                    "-",
                    "-",
                    "-def store_statements(",
                    "-    chunk_id: int,",
                    "-    statements: list[dict],",
                    "-    prompt_version: str = None,",
                    "-    model_used: str = None,",
                    "-) -> int:",
                    "-    \"\"\"",
                    "-    Store extracted statements in the database with entity linking.",
                    "-",
                    "-    Args:",
                    "-        chunk_id: ID of the source chunk",
                    "-        statements: List of statement dicts with subject, predicate, object, confidence",
                    "-        prompt_version: Version of the prompt used for extraction",
                    "-        model_used: Model used for extraction",
                    "-",
                    "-    Returns:",
                    "-        Number of successfully stored statements",
                    "-    \"\"\"",
                    "-    stored = 0",
                    "-",
                    "-    for stmt in statements:",
                    "-        try:",
                    "-            subject_name = stmt.get(\"subject\", \"\").strip()",
                    "-            predicate = stmt.get(\"predicate\", \"\").strip()",
                    "-            object_value = stmt.get(\"object\", \"\").strip()",
                    "-            confidence = float(stmt.get(\"confidence\", 0.8))",
                    "-",
                    "-            if not subject_name or not predicate:",
                    "-                continue",
                    "-",
                    "-            # Find subject entity",
                    "-            subject_entity = find_entity_by_name(subject_name)",
                    "-            if not subject_entity:",
                    "-                db.log(\"DEBUG\", f\"Subject entity not found: {subject_name}\")",
                    "-                continue  # Subject must be a known entity",
                    "-",
                    "-            subject_entity_id = subject_entity[\"id\"]",
                    "-",
                    "-            # Try to find object entity, otherwise use as literal",
                    "-            object_entity_id = None",
                    "-            object_literal = None",
                    "-",
                    "-            if object_value:",
                    "-                object_entity = find_entity_by_name(object_value)",
                    "-                if object_entity:",
                    "-                    object_entity_id = object_entity[\"id\"]",
                    "-                else:",
                    "-                    object_literal = object_value",
                    "-",
                    "-            # Insert statement",
                    "-            cursor = db.execute(",
                    "-                \"\"\"INSERT INTO statements",
                    "-                   (subject_entity_id, predicate, object_entity_id, object_literal,",
                    "-                    chunk_id, confidence, status, created_at)",
                    "-                   VALUES (%s, %s, %s, %s, %s, %s, 'extracted', NOW())\"\"\",",
                    "-                (",
                    "-                    subject_entity_id,",
                    "-                    predicate[:255],  # Truncate to VARCHAR(255)",
                    "-                    object_entity_id,",
                    "-                    object_literal,",
                    "-                    chunk_id,",
                    "-                    confidence,",
                    "-                ),",
                    "-            )",
                    "-            db.commit()",
                    "-            statement_id = cursor.lastrowid",
                    "-            cursor.close()",
                    "-",
                    "-            # Log provenance for statement",
                    "-            db.log_provenance(",
                    "-                artifact_type=\"statement\",",
                    "-                artifact_id=statement_id,",
                    "-                source_type=\"extraction\",",
                    "-                source_id=chunk_id,",
                    "-                pipeline_step=\"statement_extract\",",
                    "-                model_used=model_used,",
                    "-                prompt_version=prompt_version,",
                    "-            )",
                    "-",
                    "-            stored += 1",
                    "-",
                    "-        except Exception as e:",
                    "-            db.log(\"WARNING\", f\"Failed to store statement: {e}\")",
                    "-",
                    "-    return stored",
                    "-",
                    "-",
                    "-def analyze_chunk_statements(chunk_id: int, content: str, client=None) -> int:",
                    "-    \"\"\"",
                    "-    Extract and store statements for a single chunk.",
                    "-",
                    "-    Args:",
                    "-        chunk_id: ID of the chunk",
                    "-        content: Text content of the chunk",
                    "-        client: Optional Anthropic client",
                    "-",
                    "-    Returns:",
                    "-        Number of statements stored",
                    "-    \"\"\"",
                    "-    result = extract_statements(chunk_id, content, client)",
                    "-    statements = result.get(\"statements\", [])",
                    "-    if statements:",
                    "-        stored = store_statements(",
                    "-            chunk_id,",
                    "-            statements,",
                    "-            prompt_version=result.get(\"prompt_version\"),",
                    "-            model_used=result.get(\"model_used\"),",
                    "-        )",
                    "-        db.log(\"INFO\", f\"Chunk {chunk_id}: {stored}\/{len(statements)} statements stored\")",
                    "-        return stored",
                    "-    return 0",
                    "-",
                    "-",
                    "-def analyze_document_statements(document_id: int, client=None, progress=None) -> int:",
                    "-    \"\"\"",
                    "-    Extract statements from all chunks of a document.",
                    "-",
                    "-    Args:",
                    "-        document_id: ID of the document",
                    "-        client: Optional Anthropic client",
                    "-        progress: Optional PipelineProgress for logging",
                    "-",
                    "-    Returns:",
                    "-        Total number of statements stored",
                    "-    \"\"\"",
                    "-    cursor = db.execute(",
                    "-        \"SELECT id, content FROM chunks WHERE document_id = %s ORDER BY chunk_index\",",
                    "-        (document_id,),",
                    "-    )",
                    "-    chunks = cursor.fetchall()",
                    "-    cursor.close()",
                    "-",
                    "-    total = len(chunks)",
                    "-    db.log(\"INFO\", f\"Extracting statements from {total} chunks\")",
                    "-    if progress:",
                    "-        progress.add_log(f\"Statements: Extrahiere aus {total} Chunks...\")",
                    "-",
                    "-    total_stored = 0",
                    "-    for i, chunk in enumerate(chunks, 1):",
                    "-        if progress:",
                    "-            progress.add_log(f\"Statements: Chunk {i}\/{total}...\")",
                    "-",
                    "-        stored = analyze_chunk_statements(chunk[\"id\"], chunk[\"content\"], client)",
                    "-        total_stored += stored",
                    "-",
                    "-    db.log(\"INFO\", f\"Total statements stored: {total_stored}\")",
                    "-    if progress:",
                    "-        progress.add_log(f\"Statements: {total_stored} Aussagen extrahiert\")",
                    "-",
                    "-    return total_stored",
                    "-",
                    "-",
                    " if __name__ == \"__main__\":",
                    "     test_text = \"\"\"",
                    "     Carl Rogers entwickelte die klientenzentrierte Gesprächstherapie."
                ]
            }
        ],
        "originalFile": "\"\"\"\nSemantic analysis for KI-System Pipeline\nEntity extraction, taxonomy classification, ontology mapping.\n\"\"\"\n\nimport json\nimport time\n\nimport requests\n\nfrom config import ANTHROPIC_API_KEY, ANTHROPIC_MODEL, OLLAMA_CHAT_MODEL, OLLAMA_HOST\nfrom db import db\nfrom protokoll import protokoll\n\n\ndef get_anthropic_client():\n    \"\"\"Get Anthropic API client.\"\"\"\n    try:\n        import anthropic\n\n        if ANTHROPIC_API_KEY:\n            db.log(\"INFO\", \"Using Anthropic API (Claude)\")\n            return anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)\n        else:\n            db.log(\"WARNING\", \"No Anthropic API key found, falling back to Ollama\")\n    except ImportError:\n        db.log(\"WARNING\", \"anthropic package not installed, falling back to Ollama\")\n    return None\n\n\ndef extract_entities_ollama(text, model=\"gemma3:27b-it-qat\"):\n    \"\"\"Extract entities using Ollama.\"\"\"\n    # Load prompt with versioning metadata\n    prompt_data = db.get_prompt_by_use_case(\"entity_extraction\")\n    prompt_template = prompt_data[\"content\"] if prompt_data else None\n\n    if not prompt_template:\n        db.log(\"WARNING\", \"entity_extraction prompt not found in DB, using fallback\")\n        prompt_template = \"\"\"Analysiere den Text und extrahiere wichtige Entitäten.\nKategorisiere als: PERSON, ORGANIZATION, CONCEPT, LOCATION\nAntworte NUR im JSON-Format:\n{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"description\": \"...\"}]}\n\nText:\n{{TEXT}}\"\"\"\n\n    prompt = prompt_template.replace(\"{{TEXT}}\", text[:3000])\n\n    try:\n        start_time = time.time()\n        response = requests.post(\n            f\"{OLLAMA_HOST}\/api\/generate\",\n            json={\"model\": model, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n            timeout=120,\n        )\n        response.raise_for_status()\n        data = response.json()\n        duration_ms = int((time.time() - start_time) * 1000)\n\n        # Parse JSON from response\n        response_text = data.get(\"response\", \"{}\")\n\n        # Log to ki-protokoll\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction] {prompt[:500]}...\",\n            response=response_text[:2000],\n            model_name=f\"ollama:{model}\",\n            tokens_input=data.get(\"prompt_eval_count\", 0),\n            tokens_output=data.get(\"eval_count\", 0),\n            duration_ms=duration_ms,\n            status=\"completed\",\n        )\n\n        try:\n            entities = json.loads(response_text)\n            return entities.get(\"entities\", [])\n        except json.JSONDecodeError:\n            db.log(\"WARNING\", \"Failed to parse entity JSON from Ollama\")\n            return []\n    except Exception as e:\n        db.log(\"ERROR\", f\"Ollama entity extraction failed: {e}\")\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction] {prompt[:500]}...\",\n            model_name=f\"ollama:{model}\",\n            status=\"error\",\n            error_message=str(e),\n        )\n        return []\n\n\ndef extract_entities_anthropic(text, client):\n    \"\"\"Extract entities using Anthropic Claude.\"\"\"\n    # Get prompt with versioning metadata\n    prompt_data = db.get_prompt_by_use_case(\"entity_extraction\")\n    prompt_template = prompt_data[\"content\"] if prompt_data else None\n\n    if not prompt_template:\n        prompt_template = \"\"\"Analysiere den folgenden deutschen Text und extrahiere alle wichtigen Entitäten.\n\nKategorisiere jede Entität als:\n- PERSON (Namen von Personen)\n- ORGANIZATION (Firmen, Institutionen, Gruppen)\n- CONCEPT (Fachbegriffe, Methoden, Theorien)\n- LOCATION (Orte, Länder)\n- DATE (Zeitangaben)\n- OTHER (Sonstiges)\n\nAntworte NUR im JSON-Format:\n{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"context\": \"kurzer Kontext der Erwähnung\"}]}\n\nText:\n{{TEXT}}\"\"\"\n\n    prompt = prompt_template.replace(\"{{TEXT}}\", text[:4000])\n\n    try:\n        start_time = time.time()\n        message = client.messages.create(\n            model=ANTHROPIC_MODEL, max_tokens=2000, messages=[{\"role\": \"user\", \"content\": prompt}]\n        )\n        duration_ms = int((time.time() - start_time) * 1000)\n\n        response_text = message.content[0].text\n\n        # Log to ki-protokoll\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction] {prompt[:500]}...\",\n            response=response_text[:2000],\n            model_name=ANTHROPIC_MODEL,\n            tokens_input=message.usage.input_tokens,\n            tokens_output=message.usage.output_tokens,\n            duration_ms=duration_ms,\n            status=\"completed\",\n        )\n\n        # Extract JSON from response\n        import re\n\n        json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)\n        if json_match:\n            entities = json.loads(json_match.group())\n            return entities.get(\"entities\", [])\n        return []\n    except Exception as e:\n        db.log(\"ERROR\", f\"Anthropic entity extraction failed: {e}\")\n        protokoll.log_llm_call(\n            request=f\"[entity_extraction] {prompt[:500]}...\",\n            model_name=ANTHROPIC_MODEL,\n            status=\"error\",\n            error_message=str(e),\n        )\n        return []\n\n\ndef extract_relations(text, entities, client=None):\n    \"\"\"Extract relations between entities.\"\"\"\n    if not entities or len(entities) < 2:\n        return []\n\n    entity_names = [e[\"name\"] for e in entities[:20]]\n\n    # Load prompt from database\n    prompt_template = db.get_prompt(\"relation_extraction\")\n\n    if not prompt_template:\n        db.log(\"WARNING\", \"relation_extraction prompt not found in DB, using fallback\")\n        prompt_template = \"\"\"Identifiziere Beziehungen zwischen Entitäten.\nEntitäten: {{ENTITIES}}\nBeziehungstypen: DEVELOPED_BY, RELATED_TO, PART_OF, USED_IN, BASED_ON\nAntworte NUR im JSON-Format:\n{\"relations\": [{\"source\": \"...\", \"relation\": \"...\", \"target\": \"...\"}]}\n\nText:\n{{TEXT}}\"\"\"\n\n    prompt = prompt_template.replace(\"{{ENTITIES}}\", \", \".join(entity_names))\n    prompt = prompt.replace(\"{{TEXT}}\", text[:3000])\n\n    try:\n        start_time = time.time()\n        tokens_in, tokens_out = 0, 0\n        model_name = \"\"\n\n        if client:\n            message = client.messages.create(\n                model=ANTHROPIC_MODEL, max_tokens=1000, messages=[{\"role\": \"user\", \"content\": prompt}]\n            )\n            response_text = message.content[0].text\n            tokens_in = message.usage.input_tokens\n            tokens_out = message.usage.output_tokens\n            model_name = ANTHROPIC_MODEL\n        else:\n            response = requests.post(\n                f\"{OLLAMA_HOST}\/api\/generate\",\n                json={\"model\": OLLAMA_CHAT_MODEL, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n                timeout=120,\n            )\n            response.raise_for_status()\n            data = response.json()\n            response_text = data.get(\"response\", \"{}\")\n            tokens_in = data.get(\"prompt_eval_count\", 0)\n            tokens_out = data.get(\"eval_count\", 0)\n            model_name = f\"ollama:{OLLAMA_CHAT_MODEL}\"\n\n        duration_ms = int((time.time() - start_time) * 1000)\n\n        # Log to ki-protokoll\n        protokoll.log_llm_call(\n            request=f\"[relation_extraction] {prompt[:500]}...\",\n            response=response_text[:2000],\n            model_name=model_name,\n            tokens_input=tokens_in,\n            tokens_output=tokens_out,\n            duration_ms=duration_ms,\n            status=\"completed\",\n        )\n\n        import re\n\n        json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)\n        if json_match:\n            data = json.loads(json_match.group())\n            return data.get(\"relations\", [])\n        return []\n    except Exception as e:\n        db.log(\"ERROR\", f\"Relation extraction failed: {e}\")\n        protokoll.log_llm_call(\n            request=f\"[relation_extraction] {prompt[:500]}...\",\n            model_name=ANTHROPIC_MODEL if client else f\"ollama:{OLLAMA_CHAT_MODEL}\",\n            status=\"error\",\n            error_message=str(e),\n        )\n        return []\n\n\ndef classify_taxonomy(text, client=None):\n    \"\"\"Classify text into taxonomy categories.\"\"\"\n    prompt_template = db.get_prompt(\"taxonomy_classification\")\n\n    if not prompt_template:\n        prompt_template = \"\"\"Klassifiziere den folgenden Text in passende Kategorien.\n\nWähle aus diesen Hauptkategorien:\n- Methoden (Therapiemethoden, Techniken)\n- Theorie (Konzepte, Modelle, Grundlagen)\n- Praxis (Anwendung, Fallbeispiele)\n- Organisation (Strukturen, Prozesse)\n- Kommunikation (Gesprächsführung, Interaktion)\n- Entwicklung (Persönliche Entwicklung, Veränderung)\n\nAntworte NUR im JSON-Format:\n{\"categories\": [\"...\", \"...\"], \"confidence\": 0.0-1.0}\n\nText:\n{{TEXT}}\"\"\"\n\n    prompt = prompt_template.replace(\"{{TEXT}}\", text[:2000])\n\n    try:\n        start_time = time.time()\n        tokens_in, tokens_out = 0, 0\n        model_name = \"\"\n\n        if client:\n            message = client.messages.create(\n                model=ANTHROPIC_MODEL, max_tokens=500, messages=[{\"role\": \"user\", \"content\": prompt}]\n            )\n            response_text = message.content[0].text\n            tokens_in = message.usage.input_tokens\n            tokens_out = message.usage.output_tokens\n            model_name = ANTHROPIC_MODEL\n        else:\n            response = requests.post(\n                f\"{OLLAMA_HOST}\/api\/generate\",\n                json={\"model\": OLLAMA_CHAT_MODEL, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n                timeout=60,\n            )\n            response.raise_for_status()\n            data = response.json()\n            response_text = data.get(\"response\", \"{}\")\n            tokens_in = data.get(\"prompt_eval_count\", 0)\n            tokens_out = data.get(\"eval_count\", 0)\n            model_name = f\"ollama:{OLLAMA_CHAT_MODEL}\"\n\n        duration_ms = int((time.time() - start_time) * 1000)\n\n        # Log to ki-protokoll\n        protokoll.log_llm_call(\n            request=f\"[taxonomy_classification] {prompt[:500]}...\",\n            response=response_text[:2000],\n            model_name=model_name,\n            tokens_input=tokens_in,\n            tokens_output=tokens_out,\n          ... [TRUNCATED-47f636327abf188d]"
    }
}
← Vorheriger Zur Liste Nächster →