taxonomy_classifier.py
- Pfad:
/var/www/scripts/pipeline/analyzers/taxonomy_classifier.py - Namespace: pipeline
- Zeilen: 94 | Größe: 3,127 Bytes
- Geändert: 2025-12-27 15:37:10 | Gescannt: 2025-12-31 10:22:15
Code Hygiene Score: 83
- Dependencies: 30 (25%)
- LOC: 100 (20%)
- Methods: 100 (20%)
- Secrets: 100 (15%)
- Classes: 100 (10%)
- Magic Numbers: 100 (10%)
Keine Issues gefunden.
Dependencies 12
- use json
- use re
- use sys
- use time
- use requests
- use config.ANTHROPIC_MODEL
- use config.OLLAMA_CHAT_MODEL
- use config.OLLAMA_HOST
- use constants.MS_PER_SECOND
- use constants.OLLAMA_TIMEOUT
- use db.db
- use protokoll.protokoll
Funktionen 1
-
classify_taxonomy()Zeile 20
Code
"""
Taxonomy Classification - Classify text into taxonomy categories.
"""
import json
import re
import sys
import time
import requests
sys.path.insert(0, "/var/www/scripts/pipeline")
from config import ANTHROPIC_MODEL, OLLAMA_CHAT_MODEL, OLLAMA_HOST
from constants import MS_PER_SECOND, OLLAMA_TIMEOUT
from db import db
from protokoll import protokoll
def classify_taxonomy(text: str, client=None) -> dict:
"""Classify text into taxonomy categories."""
prompt_template = db.get_prompt("taxonomy_classification")
if not prompt_template:
prompt_template = """Klassifiziere den folgenden Text in passende Kategorien.
Wähle aus diesen Hauptkategorien:
- Methoden (Therapiemethoden, Techniken)
- Theorie (Konzepte, Modelle, Grundlagen)
- Praxis (Anwendung, Fallbeispiele)
- Organisation (Strukturen, Prozesse)
- Kommunikation (Gesprächsführung, Interaktion)
- Entwicklung (Persönliche Entwicklung, Veränderung)
Antworte NUR im JSON-Format:
{"categories": ["...", "..."], "confidence": 0.0-1.0}
Text:
{{TEXT}}"""
prompt = prompt_template.replace("{{TEXT}}", text[:2000])
try:
start_time = time.time()
tokens_in, tokens_out = 0, 0
model_name = ""
if client:
message = client.messages.create(
model=ANTHROPIC_MODEL, max_tokens=500, messages=[{"role": "user", "content": prompt}]
)
response_text = message.content[0].text
tokens_in = message.usage.input_tokens
tokens_out = message.usage.output_tokens
model_name = ANTHROPIC_MODEL
else:
response = requests.post(
f"{OLLAMA_HOST}/api/generate",
json={"model": OLLAMA_CHAT_MODEL, "prompt": prompt, "stream": False, "format": "json"},
timeout=OLLAMA_TIMEOUT,
)
response.raise_for_status()
data = response.json()
response_text = data.get("response", "{}")
tokens_in = data.get("prompt_eval_count", 0)
tokens_out = data.get("eval_count", 0)
model_name = f"ollama:{OLLAMA_CHAT_MODEL}"
duration_ms = int((time.time() - start_time) * MS_PER_SECOND)
protokoll.log_llm_call(
request=f"[taxonomy_classification] {prompt[:500]}...",
response=response_text[:2000],
model_name=model_name,
tokens_input=tokens_in,
tokens_output=tokens_out,
duration_ms=duration_ms,
status="completed",
)
json_match = re.search(r"\{[\s\S]*\}", response_text)
if json_match:
return json.loads(json_match.group())
return {"categories": [], "confidence": 0}
except Exception as e:
db.log("ERROR", f"Taxonomy classification failed: {e}")
protokoll.log_llm_call(
request=f"[taxonomy_classification] {prompt[:500]}...",
model_name=ANTHROPIC_MODEL if client else f"ollama:{OLLAMA_CHAT_MODEL}",
status="error",
error_message=str(e),
)
return {"categories": [], "confidence": 0}