{
"event": "PreToolUse",
"tool_name": "Bash",
"tool_input": {
"command": "DB_PASSWORD=\"v8mFLmkF2fth_r\" timeout 300 .\/venv\/bin\/python -c \"\nimport json\nimport re\nimport ollama\nfrom db import db\n\ndb.connect()\n\n# Get unanalyzed chunks\ncursor = db.execute('''\n SELECT c.id, c.content \n FROM chunks c \n LEFT JOIN chunk_text_semantics cts ON c.id = cts.chunk_id \n WHERE c.document_id = 12 AND cts.id IS NULL\n''')\nchunks = cursor.fetchall()\ncursor.close()\n\nprint(f'Analysiere {len(chunks)} Chunks...')\nprint()\n\nPROMPT = '''Analysiere den Text. Antworte NUR mit JSON (keine Erklärung):\n{\\\"statement_form\\\": \\\"assertion\\\", \\\"intent\\\": \\\"explain\\\", \\\"frame\\\": \\\"practical\\\", \\\"is_negated\\\": false, \\\"discourse_role\\\": \\\"thesis\\\"}\n\nErlaubte Werte:\n- statement_form: assertion, question, command, conditional\n- intent: explain, argue, define, compare, exemplify, warn, instruct\n- frame: theoretical, practical, historical, methodological, critical\n- discourse_role: thesis, evidence, example, counter, summary, definition\n\nText: '''\n\nsuccess = 0\nerrors = 0\n\nfor chunk in chunks:\n try:\n response = ollama.generate(model='mistral', prompt=PROMPT + chunk['content'][:1200], options={'num_predict': 150})\n resp_text = response['response'].strip()\n \n # Extract first JSON object\n match = re.search(r'\\{[^{}]+\\}', resp_text)\n if match:\n sem = json.loads(match.group())\n \n # Validate ENUM values\n valid_forms = ['assertion', 'question', 'command', 'conditional']\n valid_intents = ['explain', 'argue', 'define', 'compare', 'exemplify', 'warn', 'instruct']\n valid_frames = ['theoretical', 'practical', 'historical', 'methodological', 'critical']\n valid_roles = ['thesis', 'evidence', 'example', 'counter', 'summary', 'definition']\n \n form = sem.get('statement_form', 'assertion')\n if form not in valid_forms:\n form = 'assertion'\n \n intent = sem.get('intent')\n if intent and intent not in valid_intents:\n intent = 'explain'\n \n frame = sem.get('frame')\n if frame and frame not in valid_frames:\n frame = 'practical'\n \n role = sem.get('discourse_role')\n if role and role not in valid_roles:\n role = 'thesis'\n \n cursor = db.execute('''INSERT INTO chunk_text_semantics \n (chunk_id, statement_form, intent, frame, is_negated, discourse_role, model_used)\n VALUES (%s, %s, %s, %s, %s, %s, %s)''',\n (chunk['id'], form, intent, frame, sem.get('is_negated', False), role, 'mistral'))\n db.commit()\n cursor.close()\n \n print(f'Chunk {chunk[\\\"id\\\"]}: {form}, {intent}, {role}')\n success += 1\n else:\n print(f'Chunk {chunk[\\\"id\\\"]}: No JSON found')\n errors += 1\n except Exception as e:\n print(f'Chunk {chunk[\\\"id\\\"]}: Error - {str(e)[:50]}')\n errors += 1\n\nprint()\nprint(f'Fertig: {success} erfolgreich, {errors} Fehler')\n\ndb.disconnect()\n\"",
"timeout": 360000,
"description": "Analyze all remaining chunks with text semantics"
}
}
{
"tool_response": {
"stdout": "Analysiere 10 Chunks...\n\nChunk 41: assertion, explain, definition\nChunk 44: assertion, explain, thesis\nChunk 45: assertion, explain, definition\nChunk 46: assertion, explain, definition\nChunk 47: assertion, explain, thesis\nChunk 48: assertion, explain, thesis\nChunk 1918: assertion, explain, definition\nChunk 1919: assertion, explain, thesis\nChunk 1920: assertion, explain, thesis\nChunk 1921: assertion, explain, thesis\n\nFertig: 10 erfolgreich, 0 Fehler",
"stderr": "",
"interrupted": false,
"isImage": false
}
}