critic.py

Pfad: /var/www/scripts/pipeline/generators/critic.py
Namespace: pipeline
Zeilen: 378 | Größe: 13,407 Bytes
Geändert: 2025-12-31 03:01:09 | Gescannt: 2025-12-31 10:22:15
Code Hygiene Score: 76

Dependencies: 50 (25%)
LOC: 40 (20%)
Methods: 100 (20%)
Secrets: 100 (15%)
Classes: 100 (10%)
Magic Numbers: 100 (10%)
Keine Issues gefunden.
Dependencies 10

use json
use re
use sys
use db.db
use config_loader.get_prompt
use content_generator.call_llm
use format_checker.check_formatting
use persistence.save_version
use persistence.update_order_status
use utils.repair_json
Funktionen 4

get_critic() Zeile 20
run_critic() Zeile 41
run_critique_round() Zeile 150
revise_content() Zeile 255
Code

"""
Critic Functions - Content critique and revision.
"""

import json
import re
import sys

sys.path.insert(0, "/var/www/scripts/pipeline")

from db import db

from .config_loader import get_prompt
from .content_generator import call_llm
from .format_checker import check_formatting
from .persistence import save_version, update_order_status
from .utils import repair_json


def get_critic(critic_id: int) -> dict | None:
    """Load critic from content_config table."""
    cursor = db.execute(
        """SELECT cc.id, cc.name, cc.content, cc.prompt_id, cc.sort_order,
                  p.content as prompt_content
           FROM content_config cc
           LEFT JOIN prompts p ON cc.prompt_id = p.id
           WHERE cc.id = %s AND cc.type = 'critic' AND cc.status = 'active'""",
        (critic_id,),
    )
    result = cursor.fetchone()
    cursor.close()

    if result:
        # Extract fokus from content JSON
        content = json.loads(result["content"]) if isinstance(result["content"], str) else result["content"]
        result["fokus"] = content.get("fokus", [])

    return result


def run_critic(
    content: str,
    critic_id: int,
    model: str = "anthropic",
    structure_config: dict | None = None,
    profile_config: dict | None = None,
) -> dict:
    """
    Run a single critic on content.

    Args:
        content: The text content to critique
        critic_id: ID of the critic in content_config
        model: LLM model to use (ignored for Formatierungsprüfer)
        structure_config: Optional structure config for format rules
        profile_config: Optional author profile for format rules

    Returns:
        dict with feedback and rating
    """
    db.connect()

    try:
        critic = get_critic(critic_id)
        if not critic:
            return {"error": f"Critic {critic_id} not found"}

        # Formatierungsprüfer: Use deterministic checker instead of LLM
        if critic["name"] == "Formatierungsprüfer" or critic_id == 33:
            result = check_formatting(content, structure_config, profile_config)
            return {
                "critic_name": "Formatierungsprüfer",
                "rating": result["score"],
                "score": result["score"],
                "passed": result["passed"],
                "issues": result["issues"],
                "suggestions": ["Formatierung korrigieren"] if not result["passed"] else [],
                "summary": result["summary"],
                "deterministic": True,  # Flag to indicate non-LLM check
            }

        fokus = json.loads(critic["fokus"]) if isinstance(critic["fokus"], str) else critic["fokus"]
        fokus_str = ", ".join(fokus)

        # Load prompt from database (via critic.prompt_id or fallback to generic)
        prompt_template = critic.get("prompt_content")
        if not prompt_template:
            prompt_template = get_prompt("critic-generic")
        if not prompt_template:
            # Ultimate fallback - should never happen if DB is properly set up
            prompt_template = """Du bist ein kritischer Lektor mit dem Fokus auf: {fokus}

Analysiere den folgenden Text und gib strukturiertes Feedback:

## Text:
{content}

## Deine Aufgabe:
1. Prüfe den Text auf die Aspekte: {fokus}
2. Identifiziere konkrete Verbesserungspunkte
3. Bewerte die Qualität (1-10)

Antworte im JSON-Format:
{{
  "rating": 8,
  "passed": true,
  "issues": ["Issue 1", "Issue 2"],
  "suggestions": ["Suggestion 1"],
  "summary": "Kurze Zusammenfassung"
}}"""

        # Format prompt with variables
        prompt = prompt_template.format(fokus=fokus_str, content=content)

        response = call_llm(prompt, model, client_name="content-studio-critique")

        # Parse JSON from response with robust error handling
        json_match = re.search(r"\{[\s\S]*\}", response)
        if json_match:
            json_str = json_match.group()
            try:
                feedback = json.loads(json_str)
                feedback["critic_name"] = critic["name"]
                return feedback
            except json.JSONDecodeError:
                # Try to repair common JSON issues
                repaired = repair_json(json_str)
                try:
                    feedback = json.loads(repaired)
                    feedback["critic_name"] = critic["name"]
                    return feedback
                except json.JSONDecodeError:
                    pass

        return {
            "critic_name": critic["name"],
            "rating": 5,
            "passed": False,
            "issues": ["Konnte Feedback nicht parsen"],
            "suggestions": [],
            "summary": response[:500],
        }

    except Exception as e:
        return {"error": str(e)}
    finally:
        db.disconnect()


def run_critique_round(version_id: int, model: str = "anthropic") -> dict:
    """
    Run all active critics on a content version.

    Returns:
        dict with all critique results
    """
    db.connect()

    try:
        # Get version content and order settings (including selected_critics)
        cursor = db.execute(
            """SELECT cv.*, co.id as order_id, co.current_critique_round,
                      co.selected_critics, co.quality_check
               FROM content_versions cv
               JOIN content_orders co ON cv.order_id = co.id
               WHERE cv.id = %s""",
            (version_id,),
        )
        version = cursor.fetchone()
        cursor.close()

        if not version:
            return {"error": "Version not found"}

        # Check if quality_check is enabled
        if not version.get("quality_check", False):
            return {"success": True, "skipped": True, "message": "Qualitätsprüfung deaktiviert"}

        content_data = json.loads(version["content"]) if isinstance(version["content"], str) else version["content"]
        content_text = content_data.get("text", "")

        # Parse selected_critics from order (JSON array of IDs)
        selected_critics_raw = version.get("selected_critics")
        if selected_critics_raw:
            if isinstance(selected_critics_raw, str):
                selected_critic_ids = json.loads(selected_critics_raw)
            else:
                selected_critic_ids = selected_critics_raw
        else:
            selected_critic_ids = []

        # Get critics - filter by selected_critics if specified
        if selected_critic_ids:
            # Only use selected critics
            placeholders = ", ".join(["%s"] * len(selected_critic_ids))
            sql = (
                "SELECT id, name FROM content_config "
                f"WHERE type = 'critic' AND status = 'active' AND id IN ({placeholders}) "
                "ORDER BY sort_order"
            )
            cursor = db.execute(sql, tuple(selected_critic_ids))
        else:
            # Fallback: use all active critics if none selected
            sql = "SELECT id, name FROM content_config WHERE type = 'critic' AND status = 'active' ORDER BY sort_order"
            cursor = db.execute(sql)
        critics = cursor.fetchall()
        cursor.close()

        # Increment critique round
        new_round = (version["current_critique_round"] or 0) + 1
        cursor = db.execute(
            "UPDATE content_orders SET current_critique_round = %s WHERE id = %s", (new_round, version["order_id"])
        )
        db.commit()
        cursor.close()

        # Run each critic
        results = []
        all_passed = True

        for critic in critics:
            db.disconnect()  # Disconnect before calling run_critic
            feedback = run_critic(content_text, critic["id"], model)
            db.connect()  # Reconnect

            if "error" not in feedback:
                # Save critique
                cursor = db.execute(
                    """INSERT INTO content_critiques (version_id, critic_id, round, feedback)
                       VALUES (%s, %s, %s, %s)""",
                    (version_id, critic["id"], new_round, json.dumps(feedback)),
                )
                db.commit()
                cursor.close()

                if not feedback.get("passed", True):
                    all_passed = False

            results.append(feedback)

        # Update order status based on results
        if all_passed:
            update_order_status(version["order_id"], "validate")
        else:
            update_order_status(version["order_id"], "revision")

        return {"success": True, "round": new_round, "critiques": results, "all_passed": all_passed}

    except Exception as e:
        return {"error": str(e)}
    finally:
        db.disconnect()


def revise_content(version_id: int, model: str = "anthropic") -> dict:
    """
    Create a revision based on critique feedback.

    Returns:
        dict with new version info
    """
    db.connect()

    try:
        # Get version and critiques
        cursor = db.execute(
            """SELECT cv.*, co.id as order_id, co.briefing, co.current_critique_round,
                  ap.content as profile_config,
                  cs.content as structure_config
               FROM content_versions cv
               JOIN content_orders co ON cv.order_id = co.id
               LEFT JOIN content_config ap ON co.author_profile_id = ap.id AND ap.type = 'author_profile'
               LEFT JOIN content_config cs ON co.structure_id = cs.id AND cs.type = 'structure'
               WHERE cv.id = %s""",
            (version_id,),
        )
        version = cursor.fetchone()
        cursor.close()

        if not version:
            return {"error": "Version not found"}

        content_data = json.loads(version["content"]) if isinstance(version["content"], str) else version["content"]
        content_text = content_data.get("text", "")

        # Get latest critiques (critics now in content_config)
        cursor = db.execute(
            """SELECT cfg.name, cc.feedback
               FROM content_critiques cc
               JOIN content_config cfg ON cc.critic_id = cfg.id AND cfg.type = 'critic'
               WHERE cc.version_id = %s AND cc.round = %s""",
            (version_id, version["current_critique_round"]),
        )
        critiques = cursor.fetchall()
        cursor.close()

        # Build revision prompt
        feedback_text = ""
        for critique in critiques:
            fb = json.loads(critique["feedback"]) if isinstance(critique["feedback"], str) else critique["feedback"]
            feedback_text += f"\n### {critique['name']}:\n"
            feedback_text += f"- Bewertung: {fb.get('rating', 'N/A')}/10\n"
            feedback_text += f"- Probleme: {', '.join(fb.get('issues', []))}\n"
            feedback_text += f"- Vorschläge: {', '.join(fb.get('suggestions', []))}\n"

        # Determine output format from structure
        output_format = "markdown"  # Default
        html_instruction = ""
        if version.get("structure_config"):
            structure_config = (
                json.loads(version["structure_config"])
                if isinstance(version["structure_config"], str)
                else version["structure_config"]
            )
            ausgabe = structure_config.get("ausgabe", {})
            output_format = ausgabe.get("format", "markdown")
            erlaubte_tags = ausgabe.get(
                "erlaubte_tags", ["h1", "h2", "h3", "h4", "p", "ul", "ol", "li", "strong", "a", "table", "hr"]
            )

            if output_format == "body-html":
                tags_str = ", ".join(erlaubte_tags)
                html_instruction = f"""
5. **KRITISCH - Behalte das HTML-Format bei!**
   - Nur diese Tags: {tags_str}
   - KEIN Markdown, KEINE ## oder ** oder -
   - KEIN div, span, br, img, script, style
   - Fließtext immer in <p>-Tags"""

        # Load revise prompt from database
        prompt_template = get_prompt("content-revise")
        if prompt_template:
            prompt = prompt_template.format(
                content=content_text, feedback=feedback_text, html_instruction=html_instruction
            )
        else:
            # Fallback if prompt not in DB
            prompt = f"""Du bist ein professioneller Content-Editor. Überarbeite den folgenden Text basierend auf dem Feedback der Kritiker.

## Originaler Text:
{content_text}

## Feedback der Kritiker:
{feedback_text}

## Anweisungen:
1. Behebe alle genannten Probleme
2. Setze die Verbesserungsvorschläge um
3. Behalte den Grundton und Stil bei
4. Achte auf eine kohärente Überarbeitung
{html_instruction}

Erstelle nun die überarbeitete Version:"""

        # Generate revision
        update_order_status(version["order_id"], "generating")
        revised_content = call_llm(prompt, model, client_name="content-studio-revise")

        # Save new version with correct format
        new_version_number = version["version_number"] + 1
        new_version_id = save_version(version["order_id"], revised_content, new_version_number, output_format)

        # Update status
        update_order_status(version["order_id"], "critique")

        return {
            "success": True,
            "order_id": version["order_id"],
            "version_id": new_version_id,
            "version_number": new_version_number,
            "content": revised_content,
        }

    except Exception as e:
        return {"error": str(e)}
    finally:
        db.disconnect()
← Übersicht