Protokoll #13371

ID	13371
Zeitstempel	2025-12-25 00:24:20.220014
Client	root
IP	145.224.72.140
Modell	claude-sonnet-4-20250514
Status	completed
Tokens	4,074 (Input: 576, Output: 3,498)
Dauer	100 ms
Request-Zeit	2025-12-25 00:24:20.220014
Response-Zeit	2025-12-25 00:24:20.319635

Request

{
    "event": "PreToolUse",
    "tool_name": "Edit",
    "tool_input": {
        "file_path": "\/var\/www\/scripts\/pipeline\/generate.py",
        "old_string": "    if prompt_template:\n        prompt = prompt_template.format(\n            profile_text=profile_text,\n            contract_text=contract_text,\n            structure_text=structure_text,\n            context=context_text,\n            briefing=briefing,\n            format_instruction=format_instruction,\n        )\n    else:\n        # Fallback if prompt not in DB\n        prompt = f\"\"\"Du bist ein professioneller Content-Autor. Erstelle basierend auf dem Briefing und dem bereitgestellten Kontext einen hochwertigen Text.\n\n{profile_text}\n{contract_text}\n{structure_text}\n\n## Kontext aus der Wissensbasis:\n{context_text}\n\n## Briefing:\n{briefing}\n\n## Anweisungen:\n1. Nutze die Informationen aus dem Kontext als Grundlage\n2. Halte dich an das Autorenprofil und den Schreibstil\n3. Beachte die Vorgaben aus dem Contract\n4. Strukturiere den Text gemäß dem Template (falls angegeben)\n5. Schreibe auf Deutsch\n6. Kennzeichne verwendete Quellen\n{format_instruction}",
        "new_string": "    if prompt_template:\n        prompt = prompt_template.format(\n            profile_text=profile_text,\n            contract_text=contract_text,\n            structure_text=structure_text,\n            context=context_text,\n            briefing=briefing,\n            format_instruction=format_instruction,\n            semantic_text=semantic_text,\n            taxonomy_text=taxonomy_text,\n        )\n    else:\n        # Fallback if prompt not in DB\n        prompt = f\"\"\"Du bist ein professioneller Content-Autor. Erstelle basierend auf dem Briefing und dem bereitgestellten Kontext einen hochwertigen Text.\n\n{profile_text}\n{contract_text}\n{structure_text}\n{semantic_text}\n{taxonomy_text}\n\n## Kontext aus der Wissensbasis:\n{context_text}\n\n## Briefing:\n{briefing}\n\n## Anweisungen:\n1. Nutze die Informationen aus dem Kontext als Grundlage\n2. Halte dich an das Autorenprofil und den Schreibstil\n3. Beachte die Vorgaben aus dem Contract\n4. Strukturiere den Text gemäß dem Template (falls angegeben)\n5. Schreibe auf Deutsch\n6. Kennzeichne verwendete Quellen\n7. Berücksichtige die relevanten Konzepte und deren Beziehungen\n{format_instruction}"
    }
}

Response

{
    "tool_response": {
        "filePath": "\/var\/www\/scripts\/pipeline\/generate.py",
        "oldString": "    if prompt_template:\n        prompt = prompt_template.format(\n            profile_text=profile_text,\n            contract_text=contract_text,\n            structure_text=structure_text,\n            context=context_text,\n            briefing=briefing,\n            format_instruction=format_instruction,\n        )\n    else:\n        # Fallback if prompt not in DB\n        prompt = f\"\"\"Du bist ein professioneller Content-Autor. Erstelle basierend auf dem Briefing und dem bereitgestellten Kontext einen hochwertigen Text.\n\n{profile_text}\n{contract_text}\n{structure_text}\n\n## Kontext aus der Wissensbasis:\n{context_text}\n\n## Briefing:\n{briefing}\n\n## Anweisungen:\n1. Nutze die Informationen aus dem Kontext als Grundlage\n2. Halte dich an das Autorenprofil und den Schreibstil\n3. Beachte die Vorgaben aus dem Contract\n4. Strukturiere den Text gemäß dem Template (falls angegeben)\n5. Schreibe auf Deutsch\n6. Kennzeichne verwendete Quellen\n{format_instruction}",
        "newString": "    if prompt_template:\n        prompt = prompt_template.format(\n            profile_text=profile_text,\n            contract_text=contract_text,\n            structure_text=structure_text,\n            context=context_text,\n            briefing=briefing,\n            format_instruction=format_instruction,\n            semantic_text=semantic_text,\n            taxonomy_text=taxonomy_text,\n        )\n    else:\n        # Fallback if prompt not in DB\n        prompt = f\"\"\"Du bist ein professioneller Content-Autor. Erstelle basierend auf dem Briefing und dem bereitgestellten Kontext einen hochwertigen Text.\n\n{profile_text}\n{contract_text}\n{structure_text}\n{semantic_text}\n{taxonomy_text}\n\n## Kontext aus der Wissensbasis:\n{context_text}\n\n## Briefing:\n{briefing}\n\n## Anweisungen:\n1. Nutze die Informationen aus dem Kontext als Grundlage\n2. Halte dich an das Autorenprofil und den Schreibstil\n3. Beachte die Vorgaben aus dem Contract\n4. Strukturiere den Text gemäß dem Template (falls angegeben)\n5. Schreibe auf Deutsch\n6. Kennzeichne verwendete Quellen\n7. Berücksichtige die relevanten Konzepte und deren Beziehungen\n{format_instruction}",
        "originalFile": "#!\/usr\/bin\/env python3\n\"\"\"\nContent Generation for KI-System\nGenerates content using RAG context, author profiles, and contracts.\n\"\"\"\n\nimport json\nimport sys\n\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom config import ANTHROPIC_API_KEY, ANTHROPIC_MODEL, OLLAMA_CHAT_MODEL, OLLAMA_HOST  # noqa: I001, E402\nfrom db import db  # noqa: E402\nfrom embed import search_similar  # noqa: E402\n\n\ndef _repair_json(json_str):\n    \"\"\"\n    Attempt to repair common JSON issues from LLM output.\n\n    Fixes:\n    - Unescaped quotes in strings\n    - Missing commas between array elements\n    - Trailing commas\n    - Control characters in strings\n    \"\"\"\n    import re\n\n    # Remove control characters except newlines and tabs\n    json_str = re.sub(r\"[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f]\", \"\", json_str)\n\n    # Fix common issue: missing comma before closing bracket in arrays\n    json_str = re.sub(r'\"\\s*\\n\\s*]', '\"\\n]', json_str)\n\n    # Fix trailing commas before closing brackets\/braces\n    json_str = re.sub(r\",\\s*}\", \"}\", json_str)\n    json_str = re.sub(r\",\\s*]\", \"]\", json_str)\n\n    # Fix missing commas between array elements (string followed by string)\n    json_str = re.sub(r'\"\\s*\\n\\s*\"', '\",\\n\"', json_str)\n\n    # Fix unescaped quotes within strings (heuristic: quotes not at boundaries)\n    # This is tricky, so we do a simple fix for common patterns\n    lines = json_str.split(\"\\n\")\n    fixed_lines = []\n    for line in lines:\n        # Count quotes - if odd number and line has content, try to fix\n        quote_count = line.count('\"') - line.count('\\\\\"')\n        if quote_count % 2 != 0 and \":\" in line:\n            # Try to escape internal quotes (very basic heuristic)\n            parts = line.split(\":\", 1)\n            if len(parts) == 2:\n                key_part = parts[0]\n                value_part = parts[1]\n                # If value has odd quotes, try to balance\n                if value_part.count('\"') % 2 != 0:\n                    # Add escaped quote or remove problematic char\n                    value_part = value_part.rstrip().rstrip(\",\")\n                    if not value_part.endswith('\"'):\n                        value_part += '\"'\n                    line = key_part + \":\" + value_part\n        fixed_lines.append(line)\n\n    return \"\\n\".join(fixed_lines)\n\n\ndef get_prompt(name):\n    \"\"\"Load prompt from database by name.\"\"\"\n    cursor = db.execute(\n        \"\"\"SELECT content FROM prompts WHERE name = %s AND is_active = 1 ORDER BY version DESC LIMIT 1\"\"\",\n        (name,),\n    )\n    result = cursor.fetchone()\n    cursor.close()\n    return result[\"content\"] if result else None\n\n\ndef get_rag_context(briefing, collection=\"documents\", limit=5):\n    \"\"\"\n    Get relevant context from Qdrant based on briefing.\n    Returns list of chunks with content and metadata.\n    \"\"\"\n    results = search_similar(briefing, collection=collection, limit=limit)\n\n    context_items = []\n    for result in results:\n        context_items.append(\n            {\n                \"content\": result[\"payload\"].get(\"content\", \"\"),\n                \"source\": result[\"payload\"].get(\"document_title\", \"Unknown\"),\n                \"score\": round(result[\"score\"], 4),\n                \"chunk_id\": result[\"payload\"].get(\"chunk_id\"),\n                \"document_id\": result[\"payload\"].get(\"document_id\"),\n            }\n        )\n\n    return context_items\n\n\ndef get_config_item(item_id, item_type):\n    \"\"\"Load configuration item from content_config table.\"\"\"\n    if not item_id:\n        return None\n\n    cursor = db.execute(\n        \"SELECT name, content FROM content_config WHERE id = %s AND type = %s AND status = 'active'\",\n        (item_id, item_type),\n    )\n    result = cursor.fetchone()\n    cursor.close()\n\n    if result:\n        config = json.loads(result[\"content\"]) if isinstance(result[\"content\"], str) else result[\"content\"]\n        return {\"name\": result[\"name\"], \"config\": config}\n    return None\n\n\ndef get_semantic_context(chunk_ids):\n    \"\"\"\n    Load entities and relations based on chunk_ids.\n\n    Uses the chunk_entities junction table to find relevant entities,\n    then loads relations between those entities.\n\n    Args:\n        chunk_ids: List of chunk IDs from RAG context\n\n    Returns:\n        dict with 'entities' and 'relations' lists\n    \"\"\"\n    if not chunk_ids:\n        return {\"entities\": [], \"relations\": []}\n\n    # Filter out None values\n    chunk_ids = [cid for cid in chunk_ids if cid is not None]\n    if not chunk_ids:\n        return {\"entities\": [], \"relations\": []}\n\n    placeholders = \", \".join([\"%s\"] * len(chunk_ids))\n\n    # Load entities via chunk_entities\n    cursor = db.execute(\n        f\"\"\"SELECT DISTINCT e.id, e.name, e.type, e.description,\n                  AVG(ce.relevance_score) as relevance\n           FROM chunk_entities ce\n           JOIN entities e ON ce.entity_id = e.id\n           WHERE ce.chunk_id IN ({placeholders})\n           GROUP BY e.id, e.name, e.type, e.description\n           ORDER BY relevance DESC\n           LIMIT 10\"\"\",\n        tuple(chunk_ids),\n    )\n    entities = cursor.fetchall()\n    cursor.close()\n\n    if not entities:\n        return {\"entities\": [], \"relations\": []}\n\n    # Get entity IDs for relation lookup\n    entity_ids = [e[\"id\"] for e in entities]\n    entity_placeholders = \", \".join([\"%s\"] * len(entity_ids))\n\n    # Load relations between found entities\n    cursor = db.execute(\n        f\"\"\"SELECT e1.name as source, er.relation_type, e2.name as target\n           FROM entity_relations er\n           JOIN entities e1 ON er.source_entity_id = e1.id\n           JOIN entities e2 ON er.target_entity_id = e2.id\n           WHERE e1.id IN ({entity_placeholders}) AND e2.id IN ({entity_placeholders})\n           LIMIT 15\"\"\",\n        tuple(entity_ids) + tuple(entity_ids),\n    )\n    relations = cursor.fetchall()\n    cursor.close()\n\n    return {\"entities\": entities, \"relations\": relations}\n\n\ndef get_taxonomy_context(document_ids):\n    \"\"\"\n    Load taxonomy terms for documents.\n\n    Args:\n        document_ids: List of document IDs from RAG context\n\n    Returns:\n        List of taxonomy term dicts with name, slug, confidence\n    \"\"\"\n    if not document_ids:\n        return []\n\n    # Filter out None values\n    document_ids = [did for did in document_ids if did is not None]\n    if not document_ids:\n        return []\n\n    placeholders = \", \".join([\"%s\"] * len(document_ids))\n\n    cursor = db.execute(\n        f\"\"\"SELECT DISTINCT tt.name, tt.slug, MAX(dt.confidence) as confidence\n           FROM document_taxonomy dt\n           JOIN taxonomy_terms tt ON dt.taxonomy_term_id = tt.id\n           WHERE dt.document_id IN ({placeholders})\n           GROUP BY tt.id, tt.name, tt.slug\n           ORDER BY confidence DESC\"\"\",\n        tuple(document_ids),\n    )\n    taxonomy = cursor.fetchall()\n    cursor.close()\n\n    return taxonomy\n\n\ndef get_author_profile(profile_id):\n    \"\"\"Load author profile from database.\"\"\"\n    return get_config_item(profile_id, \"author_profile\")\n\n\ndef get_contract(contract_id):\n    \"\"\"Load content contract from database.\"\"\"\n    return get_config_item(contract_id, \"contract\")\n\n\ndef get_structure(structure_id):\n    \"\"\"Load content structure from database.\"\"\"\n    result = get_config_item(structure_id, \"structure\")\n    if result:\n        # Structure has additional 'type' field in config\n        result[\"type\"] = result[\"config\"].get(\"type\", \"article\")\n    return result\n\n\ndef get_order(order_id):\n    \"\"\"Load content order with all related data.\"\"\"\n    cursor = db.execute(\n        \"\"\"SELECT co.*,\n                  ap.name as profile_name, ap.content as profile_config,\n                  cc.name as contract_name, cc.content as contract_config,\n                  cs.name as structure_name, cs.content as structure_config\n           FROM content_orders co\n           LEFT JOIN content_config ap ON co.author_profile_id = ap.id AND ap.type = 'author_profile'\n           LEFT JOIN content_config cc ON co.contract_id = cc.id AND cc.type = 'contract'\n           LEFT JOIN content_config cs ON co.structure_id = cs.id AND cs.type = 'structure'\n           WHERE co.id = %s\"\"\",\n        (order_id,),\n    )\n    result = cursor.fetchone()\n    cursor.close()\n    return result\n\n\ndef _parse_new_author_profile(config):\n    \"\"\"Parse new-style author profile (Cary format) into prompt text.\"\"\"\n    sections = []\n\n    # Haltung\n    haltung = config.get(\"haltung\", {})\n    if haltung:\n        sections.append(f\"\"\"### Haltung:\n- Grundhaltung: {haltung.get(\"grundhaltung\", \"\")}\n- Ausrichtung: {haltung.get(\"ausrichtung\", \"\")}\n- Spannungstoleranz: {haltung.get(\"spannungstoleranz\", \"\")}\n- Vereinfachung: {haltung.get(\"vereinfachung\", \"\")}\"\"\")\n\n    # Tonalität\n    tonalitaet = config.get(\"tonalitaet\", {})\n    if tonalitaet:\n        sections.append(f\"\"\"### Tonalität:\n- Charakter: {tonalitaet.get(\"charakter\", \"\")}\n- Stil: {tonalitaet.get(\"stil\", \"\")}\n- Wirkung: {tonalitaet.get(\"wirkung\", \"\")}\n- Abgrenzung: {tonalitaet.get(\"abgrenzung\", \"\")}\"\"\")\n\n    # Sprachmodus\n    sprachmodus = config.get(\"sprachmodus\", {})\n    if sprachmodus:\n        sections.append(f\"\"\"### Sprachmodus:\n- Denkstil: {sprachmodus.get(\"denkstil\", \"\")}\n- Aussagenform: {sprachmodus.get(\"aussagenform\", \"\")}\n- Fragenfunktion: {sprachmodus.get(\"fragenfunktion\", \"\")}\"\"\")\n\n    # Grammatik und Satzbau - WICHTIG für Verbote\n    grammatik = config.get(\"grammatik_und_satzbau\", {})\n    if grammatik:\n        verbote = []\n        if grammatik.get(\"stakkato\") == \"ausgeschlossen\":\n            verbote.append(\"Stakkato-Sätze\")\n        if grammatik.get(\"einschuebe\") == \"keine\":\n            verbote.append(\"Einschübe\")\n        if grammatik.get(\"gedankenstriche\") == \"verboten\":\n            verbote.append(\"Gedankenstriche (–)\")\n\n        sections.append(f\"\"\"### Grammatik und Satzbau:\n- Sätze: {grammatik.get(\"saetze\", \"\")}\n- Rhythmus: {grammatik.get(\"rhythmus\", \"\")}\n- **VERBOTEN:** {\", \".join(verbote) if verbote else \"keine\"}\"\"\")\n\n    # Wortwahl\n    wortwahl = config.get(\"wortwahl\", {})\n    if wortwahl:\n        verboten = []\n        if wortwahl.get(\"buzzwords\") == \"ausge... [TRUNCATED-20c6ec54c2ec9ff0]",
        "structuredPatch": [
            {
                "oldStart": 528,
                "oldLines": 6,
                "newStart": 528,
                "newLines": 8,
                "lines": [
                    "             context=context_text,",
                    "             briefing=briefing,",
                    "             format_instruction=format_instruction,",
                    "+            semantic_text=semantic_text,",
                    "+            taxonomy_text=taxonomy_text,",
                    "         )",
                    "     else:",
                    "         # Fallback if prompt not in DB"
                ]
            },
            {
                "oldStart": 536,
                "oldLines": 6,
                "newStart": 538,
                "newLines": 8,
                "lines": [
                    " {profile_text}",
                    " {contract_text}",
                    " {structure_text}",
                    "+{semantic_text}",
                    "+{taxonomy_text}",
                    " ",
                    " ## Kontext aus der Wissensbasis:",
                    " {context_text}"
                ]
            },
            {
                "oldStart": 550,
                "oldLines": 6,
                "newStart": 554,
                "newLines": 7,
                "lines": [
                    " 4. Strukturiere den Text gemäß dem Template (falls angegeben)",
                    " 5. Schreibe auf Deutsch",
                    " 6. Kennzeichne verwendete Quellen",
                    "+7. Berücksichtige die relevanten Konzepte und deren Beziehungen",
                    " {format_instruction}",
                    " ",
                    " Erstelle nun den Content:\"\"\""
                ]
            }
        ],
        "userModified": false,
        "replaceAll": false
    }
}

← Vorheriger Zur Liste Nächster →