{
"tool_response": {
"filePath": "\/opt\/scripts\/pipeline\/generate.py",
"oldString": " # Build revision prompt\n feedback_text = \"\"\n for critique in critiques:\n fb = json.loads(critique[\"feedback\"]) if isinstance(critique[\"feedback\"], str) else critique[\"feedback\"]\n feedback_text += f\"\\n### {critique['name']}:\\n\"\n feedback_text += f\"- Bewertung: {fb.get('rating', 'N\/A')}\/10\\n\"\n feedback_text += f\"- Probleme: {', '.join(fb.get('issues', []))}\\n\"\n feedback_text += f\"- Vorschläge: {', '.join(fb.get('suggestions', []))}\\n\"\n\n prompt = f\"\"\"Du bist ein professioneller Content-Editor. Überarbeite den folgenden Text basierend auf dem Feedback der Kritiker.\n\n## Originaler Text:\n{content_text}\n\n## Feedback der Kritiker:\n{feedback_text}\n\n## Anweisungen:\n1. Behebe alle genannten Probleme\n2. Setze die Verbesserungsvorschläge um\n3. Behalte den Grundton und Stil bei\n4. Achte auf eine kohärente Überarbeitung\n\nErstelle nun die überarbeitete Version:\"\"\"",
"newString": " # Build revision prompt\n feedback_text = \"\"\n for critique in critiques:\n fb = json.loads(critique[\"feedback\"]) if isinstance(critique[\"feedback\"], str) else critique[\"feedback\"]\n feedback_text += f\"\\n### {critique['name']}:\\n\"\n feedback_text += f\"- Bewertung: {fb.get('rating', 'N\/A')}\/10\\n\"\n feedback_text += f\"- Probleme: {', '.join(fb.get('issues', []))}\\n\"\n feedback_text += f\"- Vorschläge: {', '.join(fb.get('suggestions', []))}\\n\"\n\n # Check if profile requires HTML output\n html_instruction = \"\"\n if version.get(\"profile_config\"):\n profile_config = (\n json.loads(version[\"profile_config\"])\n if isinstance(version[\"profile_config\"], str)\n else version[\"profile_config\"]\n )\n autorenprofil = profile_config.get(\"autorenprofil\", profile_config)\n output_format = autorenprofil.get(\"output_format\", {})\n if output_format.get(\"typ\") == \"html\":\n html_instruction = \"\"\"\n5. **WICHTIG: Behalte das HTML-Format bei!**\n - Verwende nur: h1, h2, h3, h4, p, ul, ol, li, strong, a, section, table, tr, td\n - Jeder h2-Abschnitt muss in einer eigenen <section> stehen\n - Kein Markdown, kein div\/span\/br\n - Fließtext immer in <p>-Tags\"\"\"\n\n prompt = f\"\"\"Du bist ein professioneller Content-Editor. Überarbeite den folgenden Text basierend auf dem Feedback der Kritiker.\n\n## Originaler Text:\n{content_text}\n\n## Feedback der Kritiker:\n{feedback_text}\n\n## Anweisungen:\n1. Behebe alle genannten Probleme\n2. Setze die Verbesserungsvorschläge um\n3. Behalte den Grundton und Stil bei\n4. Achte auf eine kohärente Überarbeitung\n{html_instruction}\n\nErstelle nun die überarbeitete Version:\"\"\"",
"originalFile": "#!\/usr\/bin\/env python3\n\"\"\"\nContent Generation for KI-System\nGenerates content using RAG context, author profiles, and contracts.\n\"\"\"\n\nimport json\nimport sys\n\nsys.path.insert(0, \"\/opt\/scripts\/pipeline\")\n\nfrom config import ANTHROPIC_API_KEY, ANTHROPIC_MODEL, OLLAMA_CHAT_MODEL, OLLAMA_HOST # noqa: I001, E402\nfrom db import db # noqa: E402\nfrom embed import search_similar # noqa: E402\n\n\ndef _repair_json(json_str):\n \"\"\"\n Attempt to repair common JSON issues from LLM output.\n\n Fixes:\n - Unescaped quotes in strings\n - Missing commas between array elements\n - Trailing commas\n - Control characters in strings\n \"\"\"\n import re\n\n # Remove control characters except newlines and tabs\n json_str = re.sub(r'[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f]', '', json_str)\n\n # Fix common issue: missing comma before closing bracket in arrays\n json_str = re.sub(r'\"\\s*\\n\\s*]', '\"\\n]', json_str)\n\n # Fix trailing commas before closing brackets\/braces\n json_str = re.sub(r',\\s*}', '}', json_str)\n json_str = re.sub(r',\\s*]', ']', json_str)\n\n # Fix missing commas between array elements (string followed by string)\n json_str = re.sub(r'\"\\s*\\n\\s*\"', '\",\\n\"', json_str)\n\n # Fix unescaped quotes within strings (heuristic: quotes not at boundaries)\n # This is tricky, so we do a simple fix for common patterns\n lines = json_str.split('\\n')\n fixed_lines = []\n for line in lines:\n # Count quotes - if odd number and line has content, try to fix\n quote_count = line.count('\"') - line.count('\\\\\"')\n if quote_count % 2 != 0 and ':' in line:\n # Try to escape internal quotes (very basic heuristic)\n parts = line.split(':', 1)\n if len(parts) == 2:\n key_part = parts[0]\n value_part = parts[1]\n # If value has odd quotes, try to balance\n if value_part.count('\"') % 2 != 0:\n # Add escaped quote or remove problematic char\n value_part = value_part.rstrip().rstrip(',')\n if not value_part.endswith('\"'):\n value_part += '\"'\n line = key_part + ':' + value_part\n fixed_lines.append(line)\n\n return '\\n'.join(fixed_lines)\n\n\ndef get_rag_context(briefing, collection=\"documents\", limit=5):\n \"\"\"\n Get relevant context from Qdrant based on briefing.\n Returns list of chunks with content and metadata.\n \"\"\"\n results = search_similar(briefing, collection=collection, limit=limit)\n\n context_items = []\n for result in results:\n context_items.append(\n {\n \"content\": result[\"payload\"].get(\"content\", \"\"),\n \"source\": result[\"payload\"].get(\"document_title\", \"Unknown\"),\n \"score\": round(result[\"score\"], 4),\n }\n )\n\n return context_items\n\n\ndef get_config_item(item_id, item_type):\n \"\"\"Load configuration item from content_config table.\"\"\"\n if not item_id:\n return None\n\n cursor = db.execute(\n \"SELECT name, content FROM content_config WHERE id = %s AND type = %s AND status = 'active'\",\n (item_id, item_type),\n )\n result = cursor.fetchone()\n cursor.close()\n\n if result:\n config = json.loads(result[\"content\"]) if isinstance(result[\"content\"], str) else result[\"content\"]\n return {\"name\": result[\"name\"], \"config\": config}\n return None\n\n\ndef get_author_profile(profile_id):\n \"\"\"Load author profile from database.\"\"\"\n return get_config_item(profile_id, \"author_profile\")\n\n\ndef get_contract(contract_id):\n \"\"\"Load content contract from database.\"\"\"\n return get_config_item(contract_id, \"contract\")\n\n\ndef get_structure(structure_id):\n \"\"\"Load content structure from database.\"\"\"\n result = get_config_item(structure_id, \"structure\")\n if result:\n # Structure has additional 'type' field in config\n result[\"type\"] = result[\"config\"].get(\"type\", \"article\")\n return result\n\n\ndef get_order(order_id):\n \"\"\"Load content order with all related data.\"\"\"\n cursor = db.execute(\n \"\"\"SELECT co.*,\n ap.name as profile_name, ap.content as profile_config,\n cc.name as contract_name, cc.content as contract_config,\n cs.name as structure_name, cs.content as structure_config\n FROM content_orders co\n LEFT JOIN content_config ap ON co.author_profile_id = ap.id AND ap.type = 'author_profile'\n LEFT JOIN content_config cc ON co.contract_id = cc.id AND cc.type = 'contract'\n LEFT JOIN content_config cs ON co.structure_id = cs.id AND cs.type = 'structure'\n WHERE co.id = %s\"\"\",\n (order_id,),\n )\n result = cursor.fetchone()\n cursor.close()\n return result\n\n\ndef build_generation_prompt(briefing, context, profile, contract, structure=None):\n \"\"\"Build the content generation prompt.\"\"\"\n\n # Format context\n context_text = \"\"\n for i, ctx in enumerate(context, 1):\n context_text += f\"\\n[Quelle {i}: {ctx['source']}]\\n{ctx['content']}\\n\"\n\n # Build rich profile instructions\n profile_text = \"\"\n if profile:\n config = profile.get(\"config\", {})\n autorenprofil = config.get(\"autorenprofil\", config) # Support both structures\n\n # Extract voice\/stimme\n stimme = autorenprofil.get(\"stimme\", {})\n stimme_text = \"\"\n if stimme:\n stimme_text = f\"\"\"\n### Stimme\/Tonalität:\n- Ton: {stimme.get('ton', 'neutral')}\n- Perspektive: {stimme.get('perspektive', 'neutral')}\n- Komplexität: {stimme.get('komplexitaet', 'mittel')}\n- Autorität: {stimme.get('autoritaet', 'Experte')}\"\"\"\n\n # Extract style\/stil\n stil = autorenprofil.get(\"stil\", {})\n stil_text = \"\"\n if stil:\n fachsprache_beispiele = stil.get(\"fachsprache_beispiele\", [])\n fachsprache_str = \", \".join(fachsprache_beispiele[:5]) if fachsprache_beispiele else \"\"\n stil_text = f\"\"\"\n### Stil:\n- Fachsprache: {'Ja' if stil.get('fachsprache', False) else 'Nein'}\n- Fachbegriffe: {fachsprache_str}\n- Satzlänge: {stil.get('satzlaenge', 'mittel')}\n- Absatzstruktur: {stil.get('absatzstruktur', 'klar gegliedert')}\"\"\"\n\n # Extract language patterns\/sprachliche_muster\n muster = autorenprofil.get(\"sprachliche_muster\", {})\n muster_text = \"\"\n if muster:\n phrasen = muster.get(\"phrasen\", [])\n phrasen_str = \", \".join([f'\"{p}\"' for p in phrasen[:4]]) if phrasen else \"\"\n uebergaenge = muster.get(\"uebergaenge\", [])\n uebergaenge_str = \", \".join([f'\"{u}\"' for u in uebergaenge[:3]]) if uebergaenge else \"\"\n muster_text = f\"\"\"\n### Sprachliche Muster:\n- Typische Phrasen: {phrasen_str}\n- Übergangsformulierungen: {uebergaenge_str}\"\"\"\n\n # Extract taboos\/tabus\n tabus = autorenprofil.get(\"tabus\", [])\n tabus_text = \"\"\n if tabus:\n tabus_str = \", \".join(tabus[:5])\n tabus_text = f\"\"\"\n### Zu vermeiden (Tabus):\n{tabus_str}\"\"\"\n\n # Extract core messages\/kernbotschaften\n kernbotschaften = autorenprofil.get(\"kernbotschaften\", [])\n kern_text = \"\"\n if kernbotschaften:\n kern_str = \"\\n\".join([f\"- {k}\" for k in kernbotschaften[:4]])\n kern_text = f\"\"\"\n### Kernbotschaften (implizit vermitteln):\n{kern_str}\"\"\"\n\n # Extract output format (HTML for web profiles)\n output_format = autorenprofil.get(\"output_format\", {})\n format_text = \"\"\n if output_format and output_format.get(\"typ\") == \"html\":\n erlaubte = output_format.get(\"erlaubte_elemente\", {})\n verboten = output_format.get(\"verbotene_elemente\", [])\n regeln = output_format.get(\"struktur_regeln\", [])\n\n erlaubte_str = []\n for kategorie, elemente in erlaubte.items():\n erlaubte_str.extend(elemente[:3])\n\n format_text = f\"\"\"\n### Output-Format: HTML\n**Erlaubte Elemente:**\n{', '.join(erlaubte_str[:10])}\n\n**Struktur-Regeln:**\n- {chr(10).join('- ' + r for r in regeln[:4]) if regeln else 'Semantisches HTML verwenden'}\n\n**Verboten:** {', '.join(verboten[:4]) if verboten else 'div, span, br, style'}\"\"\"\n\n # Combine all profile sections\n profile_text = f\"\"\"\n## Autorenprofil: {profile.get(\"name\", \"Standard\")}\n{stimme_text}\n{stil_text}\n{muster_text}\n{tabus_text}\n{kern_text}\n{format_text}\n\"\"\"\n\n # Build contract requirements\n contract_text = \"\"\n if contract:\n config = contract.get(\"config\", {})\n req = config.get(\"requirements\", {})\n contract_text = f\"\"\"\nContract: {contract.get(\"name\", \"Standard\")}\n- Wortanzahl: {req.get(\"min_words\", 500)} - {req.get(\"max_words\", 5000)} Wörter\n- Struktur-Validierung: {req.get(\"structure_validation\", True)}\n\"\"\"\n\n # Build structure instructions\n structure_text = \"\"\n if structure:\n config = structure.get(\"config\", {})\n structure_text = f\"\"\"\nStruktur-Template: {structure.get(\"name\", \"\")}\n- Typ: {structure.get(\"type\", \"\")}\n- Abschnitte: {json.dumps(config.get(\"sections\", []), ensure_ascii=False)}\n\"\"\"\n\n # Determine if HTML output is required\n html_output = False\n if profile:\n config = profile.get(\"config\", {})\n autorenprofil = config.get(\"autorenprofil\", config)\n output_format = autorenprofil.get(\"output_format\", {})\n html_output = output_format.get(\"typ\") == \"html\"\n\n format_instruction = \"\"\n if html_output:\n format_instruction = \"\"\"7. **WICHTIG: Ausgabe als wohlformatiertes HTML**\n - Verwende nur: h1, h2, h3, h4, p, ul, ol, li, strong, a, section, table, tr, td\n - Jeder h2-Abschnitt muss in einer eigenen <section> stehen\n - Kein Markdown, kein div\/span\/br\n - Fließtext immer in <p>-Tags\"\"\"\n\n prompt = f\"\"\"Du bist ein professioneller Content-Autor. Erstelle basierend auf dem Briefing und dem bereitgestellten Kontext einen hochwertigen Text.\n\n{profile_text}\n{contract_text}\n{structure_text}\n\n## Kontex... [TRUNCATED-bca985bf0e015c45]",
"structuredPatch": [
{
"oldStart": 733,
"oldLines": 6,
"newStart": 733,
"newLines": 24,
"lines": [
" feedback_text += f\"- Probleme: {', '.join(fb.get('issues', []))}\\n\"",
" feedback_text += f\"- Vorschläge: {', '.join(fb.get('suggestions', []))}\\n\"",
" ",
"+ # Check if profile requires HTML output",
"+ html_instruction = \"\"",
"+ if version.get(\"profile_config\"):",
"+ profile_config = (",
"+ json.loads(version[\"profile_config\"])",
"+ if isinstance(version[\"profile_config\"], str)",
"+ else version[\"profile_config\"]",
"+ )",
"+ autorenprofil = profile_config.get(\"autorenprofil\", profile_config)",
"+ output_format = autorenprofil.get(\"output_format\", {})",
"+ if output_format.get(\"typ\") == \"html\":",
"+ html_instruction = \"\"\"",
"+5. **WICHTIG: Behalte das HTML-Format bei!**",
"+ - Verwende nur: h1, h2, h3, h4, p, ul, ol, li, strong, a, section, table, tr, td",
"+ - Jeder h2-Abschnitt muss in einer eigenen <section> stehen",
"+ - Kein Markdown, kein div\/span\/br",
"+ - Fließtext immer in <p>-Tags\"\"\"",
"+",
" prompt = f\"\"\"Du bist ein professioneller Content-Editor. Überarbeite den folgenden Text basierend auf dem Feedback der Kritiker.",
" ",
" ## Originaler Text:"
]
},
{
"oldStart": 746,
"oldLines": 6,
"newStart": 764,
"newLines": 7,
"lines": [
" 2. Setze die Verbesserungsvorschläge um",
" 3. Behalte den Grundton und Stil bei",
" 4. Achte auf eine kohärente Überarbeitung",
"+{html_instruction}",
" ",
" Erstelle nun die überarbeitete Version:\"\"\"",
" "
]
}
],
"userModified": false,
"replaceAll": false
}
}