{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/analyze.py",
"content": " duration_ms=duration_ms,\n status=\"completed\",\n )\n\n # Parse JSON\n import re\n\n json_match = re.search(r\"\\{[\\s\\S]*\\}\", response_text)\n if json_match:\n result = json.loads(json_match.group())\n return result.get(\"statements\", [])\n return []\n\n except Exception as e:\n db.log(\"ERROR\", f\"Statement extraction failed for chunk {chunk_id}: {e}\")\n protokoll.log_llm_call(\n request=f\"[statement_extraction] chunk_id={chunk_id}\",\n model_name=ANTHROPIC_MODEL if client else f\"ollama:{OLLAMA_CHAT_MODEL}\",\n status=\"error\",\n error_message=str(e),\n )\n return []\n\n\ndef find_entity_by_name(name: str) -> dict | None:\n \"\"\"\n Find entity by name or canonical_name.\n\n Args:\n name: Entity name to search for\n\n Returns:\n Entity dict with id, name, canonical_name or None\n \"\"\"\n name_lower = name.lower().strip()\n canonical = name_lower.replace(\" \", \"\").replace(\"-\", \"\")\n\n # Try exact canonical match first\n cursor = db.execute(\n \"SELECT id, name, canonical_name FROM entities WHERE canonical_name = %s LIMIT 1\",\n (canonical,),\n )\n result = cursor.fetchone()\n cursor.close()\n if result:\n return result\n\n # Try name match (case-insensitive)\n cursor = db.execute(\n \"SELECT id, name, canonical_name FROM entities WHERE LOWER(name) = %s LIMIT 1\",\n (name_lower,),\n )\n result = cursor.fetchone()\n cursor.close()\n if result:\n return result\n\n # Try partial canonical match\n cursor = db.execute(\n \"SELECT id, name, canonical_name FROM entities WHERE canonical_name LIKE %s LIMIT 1\",\n (f\"%{canonical}%\",),\n )\n result = cursor.fetchone()\n cursor.close()\n return result\n\n\ndef store_statements(chunk_id: int, statements: list[dict]) -> int:\n \"\"\"\n Store extracted statements in the database with entity linking.\n\n Args:\n chunk_id: ID of the source chunk\n statements: List of statement dicts with subject, predicate, object, confidence\n\n Returns:\n Number of successfully stored statements\n \"\"\"\n stored = 0\n\n for stmt in statements:\n try:\n subject_name = stmt.get(\"subject\", \"\").strip()\n predicate = stmt.get(\"predicate\", \"\").strip()\n object_value = stmt.get(\"object\", \"\").strip()\n confidence = float(stmt.get(\"confidence\", 0.8))\n\n if not subject_name or not predicate:\n continue\n\n # Find subject entity\n subject_entity = find_entity_by_name(subject_name)\n if not subject_entity:\n db.log(\"DEBUG\", f\"Subject entity not found: {subject_name}\")\n continue # Subject must be a known entity\n\n subject_entity_id = subject_entity[\"id\"]\n\n # Try to find object entity, otherwise use as literal\n object_entity_id = None\n object_literal = None\n\n if object_value:\n object_entity = find_entity_by_name(object_value)\n if object_entity:\n object_entity_id = object_entity[\"id\"]\n else:\n object_literal = object_value\n\n # Insert statement\n cursor = db.execute(\n \"\"\"INSERT INTO statements\n (subject_entity_id, predicate, object_entity_id, object_literal,\n chunk_id, confidence, status, created_at)\n VALUES (%s, %s, %s, %s, %s, %s, 'extracted', NOW())\"\"\",\n (\n subject_entity_id,\n predicate[:255], # Truncate to VARCHAR(255)\n object_entity_id,\n object_literal,",
"numLines": 120,
"startLine": 756,
"totalLines": 972
}
}
}