Protokoll #5415
| ID | 5415 |
|---|---|
| Zeitstempel | 2025-12-22 22:20:24.896171 |
| Client | root |
| IP | Warning: Undefined array key "request_ip" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 Deprecated: htmlspecialchars(): Passing null to parameter #1 ($string) of type string is deprecated in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 |
| Modell | claude-sonnet-4-20250514 |
| Status | completed |
| Tokens | 2,999 (Input: 0, Output: 0) |
| Dauer | 103 ms |
| Request-Zeit | Warning: Undefined array key "request_timestamp" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 18 |
| Response-Zeit | - |
Warning: Undefined array key "error_message" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 22
Request
{
"event": "PreToolUse",
"tool_name": "Edit",
"tool_input": {
"file_path": "\/opt\/scripts\/pipeline\/quality_test.py",
"old_string": " except:\n pass\n\n result[\"entities\"] = entities",
"new_string": " except (json.JSONDecodeError, AttributeError):\n pass # JSON parsing failed, keep empty entities\n\n result[\"entities\"] = entities"
}
}
Response
{
"tool_response": {
"filePath": "\/opt\/scripts\/pipeline\/quality_test.py",
"oldString": " except:\n pass\n\n result[\"entities\"] = entities",
"newString": " except (json.JSONDecodeError, AttributeError):\n pass # JSON parsing failed, keep empty entities\n\n result[\"entities\"] = entities",
"originalFile": "#!\/usr\/bin\/env python3\n\"\"\"\nQuality comparison test for different LLM models in the pipeline.\nTests entity extraction, relation extraction, and taxonomy classification.\n\"\"\"\n\nimport json\nimport os\nimport sys\nimport time\n\nimport requests\n\n# Add pipeline directory to path\nsys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))\n\nfrom db import db\n\nOLLAMA_HOST = \"http:\/\/localhost:11434\"\n\n# Models to test\nMODELS = {\n \"gemma3\": \"gemma3:27b-it-qat\",\n \"anthropic\": \"claude-opus-4-5-20251101\",\n}\n\n\ndef get_anthropic_client():\n \"\"\"Get Anthropic API client.\"\"\"\n import anthropic\n api_key = os.environ.get(\"ANTHROPIC_API_KEY\", \"\")\n if not api_key:\n env_file = \"\/var\/www\/dev.campus.systemische-tools.de\/.env\"\n if os.path.exists(env_file):\n with open(env_file) as f:\n for line in f:\n if line.startswith(\"ANTHROPIC_API_KEY=\"):\n api_key = line.split(\"=\", 1)[1].strip()\n break\n return anthropic.Anthropic(api_key=api_key) if api_key else None\n\n\ndef run_ollama(model, prompt, timeout=180):\n \"\"\"Run prompt through Ollama model.\"\"\"\n start = time.time()\n try:\n response = requests.post(\n f\"{OLLAMA_HOST}\/api\/generate\",\n json={\n \"model\": model,\n \"prompt\": prompt,\n \"stream\": False,\n \"format\": \"json\",\n \"options\": {\"temperature\": 0.3, \"num_predict\": 2000}\n },\n timeout=timeout,\n )\n response.raise_for_status()\n data = response.json()\n elapsed = time.time() - start\n return {\n \"response\": data.get(\"response\", \"\"),\n \"tokens\": data.get(\"eval_count\", 0),\n \"duration_ms\": elapsed * 1000,\n \"success\": True\n }\n except Exception as e:\n return {\"response\": \"\", \"error\": str(e), \"success\": False, \"duration_ms\": (time.time() - start) * 1000}\n\n\ndef run_anthropic(client, prompt, model=\"claude-opus-4-5-20251101\"):\n \"\"\"Run prompt through Anthropic model.\"\"\"\n start = time.time()\n try:\n message = client.messages.create(\n model=model,\n max_tokens=2000,\n messages=[{\"role\": \"user\", \"content\": prompt}]\n )\n elapsed = time.time() - start\n return {\n \"response\": message.content[0].text,\n \"tokens\": message.usage.input_tokens + message.usage.output_tokens,\n \"input_tokens\": message.usage.input_tokens,\n \"output_tokens\": message.usage.output_tokens,\n \"duration_ms\": elapsed * 1000,\n \"success\": True\n }\n except Exception as e:\n return {\"response\": \"\", \"error\": str(e), \"success\": False, \"duration_ms\": (time.time() - start) * 1000}\n\n\ndef extract_entities(text, model_name, model_id, client=None):\n \"\"\"Extract entities using specified model.\"\"\"\n prompt = f\"\"\"Analysiere den folgenden deutschen Text und extrahiere alle wichtigen Entitäten.\n\nKategorisiere jede Entität als:\n- PERSON (Namen von Personen)\n- ORGANIZATION (Firmen, Institutionen, Gruppen)\n- CONCEPT (Fachbegriffe, Methoden, Theorien)\n- LOCATION (Orte, Länder)\n- DATE (Zeitangaben)\n- OTHER (Sonstiges)\n\nAntworte NUR im JSON-Format:\n{{\"entities\": [{{\"name\": \"...\", \"type\": \"...\", \"context\": \"kurze Beschreibung\"}}]}}\n\nText:\n{text[:3500]}\n\"\"\"\n\n if model_name == \"anthropic\":\n result = run_anthropic(client, prompt, model_id)\n else:\n result = run_ollama(model_id, prompt)\n\n # Parse JSON\n entities = []\n if result[\"success\"]:\n try:\n import re\n json_match = re.search(r'\\{[\\s\\S]*\\}', result[\"response\"])\n if json_match:\n data = json.loads(json_match.group())\n entities = data.get(\"entities\", [])\n except:\n pass\n\n result[\"entities\"] = entities\n result[\"entity_count\"] = len(entities)\n return result\n\n\ndef classify_taxonomy(text, model_name, model_id, client=None):\n \"\"\"Classify text into taxonomy categories.\"\"\"\n prompt = f\"\"\"Klassifiziere den folgenden Text in passende Kategorien.\n\nWähle aus diesen Hauptkategorien:\n- Methoden (Therapiemethoden, Coaching-Techniken)\n- Theorie (Konzepte, Modelle, Grundlagen)\n- Praxis (Anwendung, Fallbeispiele, Übungen)\n- Organisation (Strukturen, Prozesse, Rollen)\n- Kommunikation (Gesprächsführung, Interaktion)\n- Entwicklung (Persönliche Entwicklung, Veränderung)\n- Teamarbeit (Teamdynamik, Zusammenarbeit)\n\nAntworte NUR im JSON-Format:\n{{\"categories\": [\"...\", \"...\"], \"confidence\": 0.0-1.0, \"reasoning\": \"kurze Begründung\"}}\n\nText:\n{text[:2500]}\n\"\"\"\n\n if model_name == \"anthropic\":\n result = run_anthropic(client, prompt, model_id)\n else:\n result = run_ollama(model_id, prompt)\n\n # Parse JSON\n categories = []\n confidence = 0\n reasoning = \"\"\n if result[\"success\"]:\n try:\n import re\n json_match = re.search(r'\\{[\\s\\S]*\\}', result[\"response\"])\n if json_match:\n data = json.loads(json_match.group())\n categories = data.get(\"categories\", [])\n confidence = data.get(\"confidence\", 0)\n reasoning = data.get(\"reasoning\", \"\")\n except:\n pass\n\n result[\"categories\"] = categories\n result[\"confidence\"] = confidence\n result[\"reasoning\"] = reasoning\n return result\n\n\ndef generate_questions(text, model_name, model_id, client=None):\n \"\"\"Generate quiz questions from text.\"\"\"\n prompt = f\"\"\"Erstelle 3 Verständnisfragen zu folgendem Lerntext.\nDie Fragen sollen das Verständnis der Kernkonzepte prüfen.\n\nAntworte NUR im JSON-Format:\n{{\"questions\": [\n {{\"question\": \"...\", \"answer\": \"...\", \"difficulty\": \"leicht|mittel|schwer\"}}\n]}}\n\nText:\n{text[:2500]}\n\"\"\"\n\n if model_name == \"anthropic\":\n result = run_anthropic(client, prompt, model_id)\n else:\n result = run_ollama(model_id, prompt)\n\n # Parse JSON\n questions = []\n if result[\"success\"]:\n try:\n import re\n json_match = re.search(r'\\{[\\s\\S]*\\}', result[\"response\"])\n if json_match:\n data = json.loads(json_match.group())\n questions = data.get(\"questions\", [])\n except:\n pass\n\n result[\"questions\"] = questions\n result[\"question_count\"] = len(questions)\n return result\n\n\ndef run_quality_test(document_id):\n \"\"\"Run full quality comparison test.\"\"\"\n db.connect()\n\n # Get document content\n cursor = db.execute(\n \"\"\"SELECT c.content FROM chunks c\n WHERE c.document_id = %s\n ORDER BY c.chunk_index\"\"\",\n (document_id,)\n )\n chunks = cursor.fetchall()\n cursor.close()\n\n full_text = \"\\n\\n\".join([c[\"content\"] for c in chunks])\n print(f\"Dokument geladen: {len(full_text)} Zeichen, {len(chunks)} Chunks\\n\")\n\n # Get Anthropic client\n anthropic_client = get_anthropic_client()\n\n results = {}\n\n for model_name, model_id in MODELS.items():\n print(f\"\\n{'='*60}\")\n print(f\"TESTE: {model_name} ({model_id})\")\n print('='*60)\n\n results[model_name] = {\n \"model_id\": model_id,\n \"tests\": {}\n }\n\n # Skip Anthropic if no client\n client = anthropic_client if model_name == \"anthropic\" else None\n if model_name == \"anthropic\" and not client:\n print(\" ÜBERSPRUNGEN: Kein Anthropic API Key\")\n continue\n\n # Test 1: Entity Extraction\n print(\"\\n[1\/3] Entity Extraction...\")\n entity_result = extract_entities(full_text, model_name, model_id, client)\n results[model_name][\"tests\"][\"entities\"] = entity_result\n print(f\" → {entity_result['entity_count']} Entitäten gefunden ({entity_result['duration_ms']:.0f}ms)\")\n if entity_result.get(\"entities\"):\n for e in entity_result[\"entities\"][:5]:\n print(f\" • {e.get('name', '?')} ({e.get('type', '?')})\")\n\n # Test 2: Taxonomy Classification\n print(\"\\n[2\/3] Taxonomy Classification...\")\n taxonomy_result = classify_taxonomy(full_text, model_name, model_id, client)\n results[model_name][\"tests\"][\"taxonomy\"] = taxonomy_result\n print(f\" → Kategorien: {', '.join(taxonomy_result['categories'])} (Konfidenz: {taxonomy_result['confidence']})\")\n if taxonomy_result.get(\"reasoning\"):\n print(f\" Begründung: {taxonomy_result['reasoning'][:100]}...\")\n\n # Test 3: Question Generation\n print(\"\\n[3\/3] Question Generation...\")\n question_result = generate_questions(full_text, model_name, model_id, client)\n results[model_name][\"tests\"][\"questions\"] = question_result\n print(f\" → {question_result['question_count']} Fragen generiert ({question_result['duration_ms']:.0f}ms)\")\n if question_result.get(\"questions\"):\n for q in question_result[\"questions\"][:3]:\n print(f\" Q: {q.get('question', '?')[:60]}...\")\n\n db.disconnect()\n return results\n\n\ndef print_report(results):\n \"\"\"Print detailed comparison report.\"\"\"\n print(\"\\n\")\n print(\"=\" * 80)\n print(\"QUALITÄTSREPORT: Pipeline Output-Vergleich\")\n print(\"=\" * 80)\n\n # Entity comparison\n print(\"\\n### 1. ENTITY EXTRACTION ###\\n\")\n print(f\"{'Modell':<20} {'Entitäten':>10} {'Zeit (ms)':>12} {'Tokens':>10}\")\n print(\"-\" * 55)\n for model, data in results.items():\n if \"entities\" in data.get(\"tests\", {}):\n e = data[\"tests\"][\"entities\"]\n tokens = e.get(\"tokens\", e.get(\"output_tokens\", \"-\"))\n print(f\"{model:<20} {e['entity_count']:>10} {e['duration_ms']:>12.0f} {tokens:>10}\")\n\n # Taxonomy comparison\n print(\"\\n### 2. TAXONOMY CLASSIFICATION ###\\n\")\n for model, data in results.items():\n if \"taxonomy\" in data.get(\"tests\", {}):\n t = data[\"tests\"][\"taxonomy\"]\n print(f\"{model}: {', '.join(t['categories'])} (Konfidenz: {t['confidence']})\"... [TRUNCATED-66bcd843c8f6ac21]",
"structuredPatch": [
{
"oldStart": 123,
"oldLines": 8,
"newStart": 123,
"newLines": 8,
"lines": [
" if json_match:",
" data = json.loads(json_match.group())",
" entities = data.get(\"entities\", [])",
"- except:",
"- pass",
"+ except (json.JSONDecodeError, AttributeError):",
"+ pass # JSON parsing failed, keep empty entities",
" ",
" result[\"entities\"] = entities",
" result[\"entity_count\"] = len(entities)"
]
}
],
"userModified": false,
"replaceAll": false
}
}