utils.py
- Pfad:
/var/www/scripts/pipeline/generators/utils.py - Namespace: pipeline
- Zeilen: 53 | Größe: 1,870 Bytes
- Geändert: 2025-12-25 13:55:49 | Gescannt: 2025-12-31 10:22:15
Code Hygiene Score: 100
- Dependencies: 100 (25%)
- LOC: 100 (20%)
- Methods: 100 (20%)
- Secrets: 100 (15%)
- Classes: 100 (10%)
- Magic Numbers: 100 (10%)
Keine Issues gefunden.
Dependencies 1
- use re
Funktionen 1
-
repair_json()Zeile 8
Code
"""
Utility functions for content generation.
"""
import re
def repair_json(json_str: str) -> str:
"""
Attempt to repair common JSON issues from LLM output.
Fixes:
- Unescaped quotes in strings
- Missing commas between array elements
- Trailing commas
- Control characters in strings
"""
# Remove control characters except newlines and tabs
json_str = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f]", "", json_str)
# Fix common issue: missing comma before closing bracket in arrays
json_str = re.sub(r'"\s*\n\s*]', '"\n]', json_str)
# Fix trailing commas before closing brackets/braces
json_str = re.sub(r",\s*}", "}", json_str)
json_str = re.sub(r",\s*]", "]", json_str)
# Fix missing commas between array elements (string followed by string)
json_str = re.sub(r'"\s*\n\s*"', '",\n"', json_str)
# Fix unescaped quotes within strings (heuristic: quotes not at boundaries)
lines = json_str.split("\n")
fixed_lines = []
for line in lines:
# Count quotes - if odd number and line has content, try to fix
quote_count = line.count('"') - line.count('\\"')
if quote_count % 2 != 0 and ":" in line:
# Try to escape internal quotes (very basic heuristic)
parts = line.split(":", 1)
if len(parts) == 2:
key_part = parts[0]
value_part = parts[1]
# If value has odd quotes, try to balance
if value_part.count('"') % 2 != 0:
# Add escaped quote or remove problematic char
value_part = value_part.rstrip().rstrip(",")
if not value_part.endswith('"'):
value_part += '"'
line = key_part + ":" + value_part
fixed_lines.append(line)
return "\n".join(fixed_lines)