taxonomy_classifier.py

Code Hygiene Score: 83

Keine Issues gefunden.

Dependencies 12

Funktionen 1

Code

"""
Taxonomy Classification - Classify text into taxonomy categories.
"""

import json
import re
import sys
import time

import requests

sys.path.insert(0, "/var/www/scripts/pipeline")

from config import ANTHROPIC_MODEL, OLLAMA_CHAT_MODEL, OLLAMA_HOST
from constants import MS_PER_SECOND, OLLAMA_TIMEOUT
from db import db
from protokoll import protokoll


def classify_taxonomy(text: str, client=None) -> dict:
    """Classify text into taxonomy categories."""
    prompt_template = db.get_prompt("taxonomy_classification")

    if not prompt_template:
        prompt_template = """Klassifiziere den folgenden Text in passende Kategorien.

Wähle aus diesen Hauptkategorien:
- Methoden (Therapiemethoden, Techniken)
- Theorie (Konzepte, Modelle, Grundlagen)
- Praxis (Anwendung, Fallbeispiele)
- Organisation (Strukturen, Prozesse)
- Kommunikation (Gesprächsführung, Interaktion)
- Entwicklung (Persönliche Entwicklung, Veränderung)

Antworte NUR im JSON-Format:
{"categories": ["...", "..."], "confidence": 0.0-1.0}

Text:
{{TEXT}}"""

    prompt = prompt_template.replace("{{TEXT}}", text[:2000])

    try:
        start_time = time.time()
        tokens_in, tokens_out = 0, 0
        model_name = ""

        if client:
            message = client.messages.create(
                model=ANTHROPIC_MODEL, max_tokens=500, messages=[{"role": "user", "content": prompt}]
            )
            response_text = message.content[0].text
            tokens_in = message.usage.input_tokens
            tokens_out = message.usage.output_tokens
            model_name = ANTHROPIC_MODEL
        else:
            response = requests.post(
                f"{OLLAMA_HOST}/api/generate",
                json={"model": OLLAMA_CHAT_MODEL, "prompt": prompt, "stream": False, "format": "json"},
                timeout=OLLAMA_TIMEOUT,
            )
            response.raise_for_status()
            data = response.json()
            response_text = data.get("response", "{}")
            tokens_in = data.get("prompt_eval_count", 0)
            tokens_out = data.get("eval_count", 0)
            model_name = f"ollama:{OLLAMA_CHAT_MODEL}"

        duration_ms = int((time.time() - start_time) * MS_PER_SECOND)

        protokoll.log_llm_call(
            request=f"[taxonomy_classification] {prompt[:500]}...",
            response=response_text[:2000],
            model_name=model_name,
            tokens_input=tokens_in,
            tokens_output=tokens_out,
            duration_ms=duration_ms,
            status="completed",
        )

        json_match = re.search(r"\{[\s\S]*\}", response_text)
        if json_match:
            return json.loads(json_match.group())
        return {"categories": [], "confidence": 0}
    except Exception as e:
        db.log("ERROR", f"Taxonomy classification failed: {e}")
        protokoll.log_llm_call(
            request=f"[taxonomy_classification] {prompt[:500]}...",
            model_name=ANTHROPIC_MODEL if client else f"ollama:{OLLAMA_CHAT_MODEL}",
            status="error",
            error_message=str(e),
        )
        return {"categories": [], "confidence": 0}
← Übersicht