{
"event": "PreToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/analyze.py",
"offset": 677,
"limit": 80
}
}
{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/analyze.py",
"content": "def extract_statements(chunk_id: int, text: str, client=None) -> list[dict]:\n \"\"\"\n Extract SPO-triplets (Subject-Predicate-Object statements) from text.\n\n Args:\n chunk_id: ID of the chunk being analyzed\n text: Text content to extract statements from\n client: Optional Anthropic client (falls back to Ollama if None)\n\n Returns:\n List of extracted statements with entity linking\n \"\"\"\n prompt_template = db.get_prompt(\"statement_extraction\")\n\n if not prompt_template:\n db.log(\"WARNING\", \"statement_extraction prompt not found in DB, using fallback\")\n prompt_template = \"\"\"Extrahiere alle faktischen Aussagen aus dem Text als SPO-Tripel.\n\nRegeln:\n- Subject: Eine benannte Entität (Person, Organisation, Konzept, Methode)\n- Predicate: Die Beziehung oder Eigenschaft (z.B. \"entwickelte\", \"basiert auf\", \"ist Teil von\")\n- Object: Eine Entität oder ein Literal-Wert\n\nAntworte NUR im JSON-Format:\n{\"statements\": [\n {\"subject\": \"Name der Subject-Entität\", \"predicate\": \"Beziehung\", \"object\": \"Name oder Wert\", \"confidence\": 0.0-1.0}\n]}\n\nText:\n{{TEXT}}\"\"\"\n\n prompt = prompt_template.replace(\"{{TEXT}}\", text[:3000])\n\n try:\n start_time = time.time()\n tokens_in, tokens_out = 0, 0\n model_name = \"\"\n\n if client:\n message = client.messages.create(\n model=ANTHROPIC_MODEL,\n max_tokens=1500,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n )\n response_text = message.content[0].text\n tokens_in = message.usage.input_tokens\n tokens_out = message.usage.output_tokens\n model_name = ANTHROPIC_MODEL\n else:\n response = requests.post(\n f\"{OLLAMA_HOST}\/api\/generate\",\n json={\n \"model\": OLLAMA_CHAT_MODEL,\n \"prompt\": prompt,\n \"stream\": False,\n \"format\": \"json\",\n },\n timeout=120,\n )\n response.raise_for_status()\n data = response.json()\n response_text = data.get(\"response\", \"{}\")\n tokens_in = data.get(\"prompt_eval_count\", 0)\n tokens_out = data.get(\"eval_count\", 0)\n model_name = f\"ollama:{OLLAMA_CHAT_MODEL}\"\n\n duration_ms = int((time.time() - start_time) * 1000)\n\n # Log to ki-protokoll\n protokoll.log_llm_call(\n request=f\"[statement_extraction] chunk_id={chunk_id}\",\n response=response_text[:2000],\n model_name=model_name,\n tokens_input=tokens_in,\n tokens_output=tokens_out,\n duration_ms=duration_ms,\n status=\"completed\",\n )\n\n # Parse JSON",
"numLines": 80,
"startLine": 677,
"totalLines": 968
}
}
}