utils.py

Code Hygiene Score: 100

Keine Issues gefunden.

Dependencies 1

Funktionen 1

Code

"""
Utility functions for content generation.
"""

import re


def repair_json(json_str: str) -> str:
    """
    Attempt to repair common JSON issues from LLM output.

    Fixes:
    - Unescaped quotes in strings
    - Missing commas between array elements
    - Trailing commas
    - Control characters in strings
    """
    # Remove control characters except newlines and tabs
    json_str = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f]", "", json_str)

    # Fix common issue: missing comma before closing bracket in arrays
    json_str = re.sub(r'"\s*\n\s*]', '"\n]', json_str)

    # Fix trailing commas before closing brackets/braces
    json_str = re.sub(r",\s*}", "}", json_str)
    json_str = re.sub(r",\s*]", "]", json_str)

    # Fix missing commas between array elements (string followed by string)
    json_str = re.sub(r'"\s*\n\s*"', '",\n"', json_str)

    # Fix unescaped quotes within strings (heuristic: quotes not at boundaries)
    lines = json_str.split("\n")
    fixed_lines = []
    for line in lines:
        # Count quotes - if odd number and line has content, try to fix
        quote_count = line.count('"') - line.count('\\"')
        if quote_count % 2 != 0 and ":" in line:
            # Try to escape internal quotes (very basic heuristic)
            parts = line.split(":", 1)
            if len(parts) == 2:
                key_part = parts[0]
                value_part = parts[1]
                # If value has odd quotes, try to balance
                if value_part.count('"') % 2 != 0:
                    # Add escaped quote or remove problematic char
                    value_part = value_part.rstrip().rstrip(",")
                    if not value_part.endswith('"'):
                        value_part += '"'
                    line = key_part + ":" + value_part
        fixed_lines.append(line)

    return "\n".join(fixed_lines)
← Übersicht