Protokoll #10558
| ID | 10558 |
|---|---|
| Zeitstempel | 2025-12-23 15:28:27.802729 |
| Client | root |
| IP | Warning: Undefined array key "request_ip" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 Deprecated: htmlspecialchars(): Passing null to parameter #1 ($string) of type string is deprecated in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 13 |
| Modell | claude-sonnet-4-20250514 |
| Status | completed |
| Tokens | 3,955 (Input: 0, Output: 0) |
| Dauer | 128 ms |
| Request-Zeit | Warning: Undefined array key "request_timestamp" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 18 |
| Response-Zeit | - |
Warning: Undefined array key "error_message" in /var/www/dev.campus.systemische-tools.de/src/View/protokoll/show.php on line 22
Request
{
"event": "PreToolUse",
"tool_name": "Edit",
"tool_input": {
"file_path": "\/opt\/scripts\/pipeline\/analyze.py",
"old_string": "def analyze_chunks_semantics(document_id, client=None):\n \"\"\"Analyze all chunks of a document for semantics.\"\"\"\n cursor = db.execute(\n \"SELECT id, content FROM chunks WHERE document_id = %s ORDER BY chunk_index\",\n (document_id,)\n )\n chunks = cursor.fetchall()\n cursor.close()\n\n db.log(\"INFO\", f\"Analyzing semantics for {len(chunks)} chunks\")\n\n analyzed = 0\n for chunk in chunks:\n result = analyze_chunk_semantics(chunk[\"id\"], chunk[\"content\"], client)\n if result:\n analyzed += 1\n\n db.log(\"INFO\", f\"Analyzed {analyzed}\/{len(chunks)} chunks for semantics\")\n return analyzed",
"new_string": "def analyze_chunks_semantics(document_id, client=None, progress=None):\n \"\"\"Analyze all chunks of a document for semantics.\"\"\"\n cursor = db.execute(\n \"SELECT id, content FROM chunks WHERE document_id = %s ORDER BY chunk_index\",\n (document_id,)\n )\n chunks = cursor.fetchall()\n cursor.close()\n\n total = len(chunks)\n db.log(\"INFO\", f\"Analyzing semantics for {total} chunks\")\n if progress:\n progress.add_log(f\"Semantik: Analysiere {total} Chunks...\")\n\n analyzed = 0\n for i, chunk in enumerate(chunks, 1):\n if progress:\n progress.add_log(f\"Semantik: Chunk {i}\/{total}...\")\n\n result = analyze_chunk_semantics(chunk[\"id\"], chunk[\"content\"], client)\n if result:\n analyzed += 1\n\n db.log(\"INFO\", f\"Analyzed {analyzed}\/{total} chunks for semantics\")\n if progress:\n progress.add_log(f\"Semantik: {analyzed}\/{total} Chunks analysiert\")\n return analyzed"
}
}
Response
{
"tool_response": {
"filePath": "\/opt\/scripts\/pipeline\/analyze.py",
"oldString": "def analyze_chunks_semantics(document_id, client=None):\n \"\"\"Analyze all chunks of a document for semantics.\"\"\"\n cursor = db.execute(\n \"SELECT id, content FROM chunks WHERE document_id = %s ORDER BY chunk_index\",\n (document_id,)\n )\n chunks = cursor.fetchall()\n cursor.close()\n\n db.log(\"INFO\", f\"Analyzing semantics for {len(chunks)} chunks\")\n\n analyzed = 0\n for chunk in chunks:\n result = analyze_chunk_semantics(chunk[\"id\"], chunk[\"content\"], client)\n if result:\n analyzed += 1\n\n db.log(\"INFO\", f\"Analyzed {analyzed}\/{len(chunks)} chunks for semantics\")\n return analyzed",
"newString": "def analyze_chunks_semantics(document_id, client=None, progress=None):\n \"\"\"Analyze all chunks of a document for semantics.\"\"\"\n cursor = db.execute(\n \"SELECT id, content FROM chunks WHERE document_id = %s ORDER BY chunk_index\",\n (document_id,)\n )\n chunks = cursor.fetchall()\n cursor.close()\n\n total = len(chunks)\n db.log(\"INFO\", f\"Analyzing semantics for {total} chunks\")\n if progress:\n progress.add_log(f\"Semantik: Analysiere {total} Chunks...\")\n\n analyzed = 0\n for i, chunk in enumerate(chunks, 1):\n if progress:\n progress.add_log(f\"Semantik: Chunk {i}\/{total}...\")\n\n result = analyze_chunk_semantics(chunk[\"id\"], chunk[\"content\"], client)\n if result:\n analyzed += 1\n\n db.log(\"INFO\", f\"Analyzed {analyzed}\/{total} chunks for semantics\")\n if progress:\n progress.add_log(f\"Semantik: {analyzed}\/{total} Chunks analysiert\")\n return analyzed",
"originalFile": "\"\"\"\nSemantic analysis for KI-System Pipeline\nEntity extraction, taxonomy classification, ontology mapping.\n\"\"\"\n\nimport json\nimport os\nimport time\n\nimport requests\n\nfrom config import ANTHROPIC_API_KEY, ANTHROPIC_MODEL, OLLAMA_CHAT_MODEL, OLLAMA_HOST\nfrom db import db\nfrom protokoll import protokoll\n\n\ndef get_anthropic_client():\n \"\"\"Get Anthropic API client.\"\"\"\n try:\n import anthropic\n\n if ANTHROPIC_API_KEY:\n db.log(\"INFO\", \"Using Anthropic API (Claude)\")\n return anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)\n else:\n db.log(\"WARNING\", \"No Anthropic API key found, falling back to Ollama\")\n except ImportError:\n db.log(\"WARNING\", \"anthropic package not installed, falling back to Ollama\")\n return None\n\n\ndef extract_entities_ollama(text, model=\"gemma3:27b-it-qat\"):\n \"\"\"Extract entities using Ollama.\"\"\"\n # Load prompt from database\n prompt_template = db.get_prompt(\"entity_extraction\")\n\n if not prompt_template:\n db.log(\"WARNING\", \"entity_extraction prompt not found in DB, using fallback\")\n prompt_template = \"\"\"Analysiere den Text und extrahiere wichtige Entitäten.\nKategorisiere als: PERSON, ORGANIZATION, CONCEPT, LOCATION\nAntworte NUR im JSON-Format:\n{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"description\": \"...\"}]}\n\nText:\n{{TEXT}}\"\"\"\n\n prompt = prompt_template.replace(\"{{TEXT}}\", text[:3000])\n\n try:\n start_time = time.time()\n response = requests.post(\n f\"{OLLAMA_HOST}\/api\/generate\",\n json={\"model\": model, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n timeout=120,\n )\n response.raise_for_status()\n data = response.json()\n duration_ms = int((time.time() - start_time) * 1000)\n\n # Parse JSON from response\n response_text = data.get(\"response\", \"{}\")\n\n # Log to ki-protokoll\n protokoll.log_llm_call(\n request=f\"[entity_extraction] {prompt[:500]}...\",\n response=response_text[:2000],\n model_name=f\"ollama:{model}\",\n tokens_input=data.get(\"prompt_eval_count\", 0),\n tokens_output=data.get(\"eval_count\", 0),\n duration_ms=duration_ms,\n status=\"completed\",\n )\n\n try:\n entities = json.loads(response_text)\n return entities.get(\"entities\", [])\n except json.JSONDecodeError:\n db.log(\"WARNING\", \"Failed to parse entity JSON from Ollama\")\n return []\n except Exception as e:\n db.log(\"ERROR\", f\"Ollama entity extraction failed: {e}\")\n protokoll.log_llm_call(\n request=f\"[entity_extraction] {prompt[:500]}...\",\n model_name=f\"ollama:{model}\",\n status=\"error\",\n error_message=str(e),\n )\n return []\n\n\ndef extract_entities_anthropic(text, client):\n \"\"\"Extract entities using Anthropic Claude.\"\"\"\n # Get prompt from database\n prompt_template = db.get_prompt(\"entity_extraction\")\n\n if not prompt_template:\n prompt_template = \"\"\"Analysiere den folgenden deutschen Text und extrahiere alle wichtigen Entitäten.\n\nKategorisiere jede Entität als:\n- PERSON (Namen von Personen)\n- ORGANIZATION (Firmen, Institutionen, Gruppen)\n- CONCEPT (Fachbegriffe, Methoden, Theorien)\n- LOCATION (Orte, Länder)\n- DATE (Zeitangaben)\n- OTHER (Sonstiges)\n\nAntworte NUR im JSON-Format:\n{\"entities\": [{\"name\": \"...\", \"type\": \"...\", \"context\": \"kurzer Kontext der Erwähnung\"}]}\n\nText:\n{{TEXT}}\"\"\"\n\n prompt = prompt_template.replace(\"{{TEXT}}\", text[:4000])\n\n try:\n start_time = time.time()\n message = client.messages.create(\n model=ANTHROPIC_MODEL, max_tokens=2000, messages=[{\"role\": \"user\", \"content\": prompt}]\n )\n duration_ms = int((time.time() - start_time) * 1000)\n\n response_text = message.content[0].text\n\n # Log to ki-protokoll\n protokoll.log_llm_call(\n request=f\"[entity_extraction] {prompt[:500]}...\",\n response=response_text[:2000],\n model_name=ANTHROPIC_MODEL,\n tokens_input=message.usage.input_tokens,\n tokens_output=message.usage.output_tokens,\n duration_ms=duration_ms,\n status=\"completed\",\n )\n\n # Extract JSON from response\n import re\n\n json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)\n if json_match:\n entities = json.loads(json_match.group())\n return entities.get(\"entities\", [])\n return []\n except Exception as e:\n db.log(\"ERROR\", f\"Anthropic entity extraction failed: {e}\")\n protokoll.log_llm_call(\n request=f\"[entity_extraction] {prompt[:500]}...\",\n model_name=ANTHROPIC_MODEL,\n status=\"error\",\n error_message=str(e),\n )\n return []\n\n\ndef extract_relations(text, entities, client=None):\n \"\"\"Extract relations between entities.\"\"\"\n if not entities or len(entities) < 2:\n return []\n\n entity_names = [e[\"name\"] for e in entities[:20]]\n\n # Load prompt from database\n prompt_template = db.get_prompt(\"relation_extraction\")\n\n if not prompt_template:\n db.log(\"WARNING\", \"relation_extraction prompt not found in DB, using fallback\")\n prompt_template = \"\"\"Identifiziere Beziehungen zwischen Entitäten.\nEntitäten: {{ENTITIES}}\nBeziehungstypen: DEVELOPED_BY, RELATED_TO, PART_OF, USED_IN, BASED_ON\nAntworte NUR im JSON-Format:\n{\"relations\": [{\"source\": \"...\", \"relation\": \"...\", \"target\": \"...\"}]}\n\nText:\n{{TEXT}}\"\"\"\n\n prompt = prompt_template.replace(\"{{ENTITIES}}\", \", \".join(entity_names))\n prompt = prompt.replace(\"{{TEXT}}\", text[:3000])\n\n try:\n start_time = time.time()\n tokens_in, tokens_out = 0, 0\n model_name = \"\"\n\n if client:\n message = client.messages.create(\n model=ANTHROPIC_MODEL, max_tokens=1000, messages=[{\"role\": \"user\", \"content\": prompt}]\n )\n response_text = message.content[0].text\n tokens_in = message.usage.input_tokens\n tokens_out = message.usage.output_tokens\n model_name = ANTHROPIC_MODEL\n else:\n response = requests.post(\n f\"{OLLAMA_HOST}\/api\/generate\",\n json={\"model\": OLLAMA_CHAT_MODEL, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n timeout=120,\n )\n response.raise_for_status()\n data = response.json()\n response_text = data.get(\"response\", \"{}\")\n tokens_in = data.get(\"prompt_eval_count\", 0)\n tokens_out = data.get(\"eval_count\", 0)\n model_name = f\"ollama:{OLLAMA_CHAT_MODEL}\"\n\n duration_ms = int((time.time() - start_time) * 1000)\n\n # Log to ki-protokoll\n protokoll.log_llm_call(\n request=f\"[relation_extraction] {prompt[:500]}...\",\n response=response_text[:2000],\n model_name=model_name,\n tokens_input=tokens_in,\n tokens_output=tokens_out,\n duration_ms=duration_ms,\n status=\"completed\",\n )\n\n import re\n\n json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)\n if json_match:\n data = json.loads(json_match.group())\n return data.get(\"relations\", [])\n return []\n except Exception as e:\n db.log(\"ERROR\", f\"Relation extraction failed: {e}\")\n protokoll.log_llm_call(\n request=f\"[relation_extraction] {prompt[:500]}...\",\n model_name=ANTHROPIC_MODEL if client else f\"ollama:{OLLAMA_CHAT_MODEL}\",\n status=\"error\",\n error_message=str(e),\n )\n return []\n\n\ndef classify_taxonomy(text, client=None):\n \"\"\"Classify text into taxonomy categories.\"\"\"\n prompt_template = db.get_prompt(\"taxonomy_classification\")\n\n if not prompt_template:\n prompt_template = \"\"\"Klassifiziere den folgenden Text in passende Kategorien.\n\nWähle aus diesen Hauptkategorien:\n- Methoden (Therapiemethoden, Techniken)\n- Theorie (Konzepte, Modelle, Grundlagen)\n- Praxis (Anwendung, Fallbeispiele)\n- Organisation (Strukturen, Prozesse)\n- Kommunikation (Gesprächsführung, Interaktion)\n- Entwicklung (Persönliche Entwicklung, Veränderung)\n\nAntworte NUR im JSON-Format:\n{\"categories\": [\"...\", \"...\"], \"confidence\": 0.0-1.0}\n\nText:\n{{TEXT}}\"\"\"\n\n prompt = prompt_template.replace(\"{{TEXT}}\", text[:2000])\n\n try:\n start_time = time.time()\n tokens_in, tokens_out = 0, 0\n model_name = \"\"\n\n if client:\n message = client.messages.create(\n model=ANTHROPIC_MODEL, max_tokens=500, messages=[{\"role\": \"user\", \"content\": prompt}]\n )\n response_text = message.content[0].text\n tokens_in = message.usage.input_tokens\n tokens_out = message.usage.output_tokens\n model_name = ANTHROPIC_MODEL\n else:\n response = requests.post(\n f\"{OLLAMA_HOST}\/api\/generate\",\n json={\"model\": OLLAMA_CHAT_MODEL, \"prompt\": prompt, \"stream\": False, \"format\": \"json\"},\n timeout=60,\n )\n response.raise_for_status()\n data = response.json()\n response_text = data.get(\"response\", \"{}\")\n tokens_in = data.get(\"prompt_eval_count\", 0)\n tokens_out = data.get(\"eval_count\", 0)\n model_name = f\"ollama:{OLLAMA_CHAT_MODEL}\"\n\n duration_ms = int((time.time() - start_time) * 1000)\n\n # Log to ki-protokoll\n protokoll.log_llm_call(\n request=f\"[taxonomy_classification] {prompt[:500]}...\",\n response=response_text[:2000],\n model_name=model_name,\n tokens_input=tokens_in,\n tokens_output=tokens_out,\n duration_ms=duration_ms,\n status=\"completed\",\n )\n\n import re\n\n json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)\n if json_m... [TRUNCATED-f45abb00ef6eedee]",
"structuredPatch": [
{
"oldStart": 636,
"oldLines": 7,
"newStart": 636,
"newLines": 7,
"lines": [
" return None",
" ",
" ",
"-def analyze_chunks_semantics(document_id, client=None):",
"+def analyze_chunks_semantics(document_id, client=None, progress=None):",
" \"\"\"Analyze all chunks of a document for semantics.\"\"\"",
" cursor = db.execute(",
" \"SELECT id, content FROM chunks WHERE document_id = %s ORDER BY chunk_index\","
]
},
{
"oldStart": 645,
"oldLines": 15,
"newStart": 645,
"newLines": 23,
"lines": [
" chunks = cursor.fetchall()",
" cursor.close()",
" ",
"- db.log(\"INFO\", f\"Analyzing semantics for {len(chunks)} chunks\")",
"+ total = len(chunks)",
"+ db.log(\"INFO\", f\"Analyzing semantics for {total} chunks\")",
"+ if progress:",
"+ progress.add_log(f\"Semantik: Analysiere {total} Chunks...\")",
" ",
" analyzed = 0",
"- for chunk in chunks:",
"+ for i, chunk in enumerate(chunks, 1):",
"+ if progress:",
"+ progress.add_log(f\"Semantik: Chunk {i}\/{total}...\")",
"+",
" result = analyze_chunk_semantics(chunk[\"id\"], chunk[\"content\"], client)",
" if result:",
" analyzed += 1",
" ",
"- db.log(\"INFO\", f\"Analyzed {analyzed}\/{len(chunks)} chunks for semantics\")",
"+ db.log(\"INFO\", f\"Analyzed {analyzed}\/{total} chunks for semantics\")",
"+ if progress:",
"+ progress.add_log(f\"Semantik: {analyzed}\/{total} Chunks analysiert\")",
" return analyzed",
" ",
" "
]
}
],
"userModified": false,
"replaceAll": false
}
}