{
"event": "PreToolUse",
"tool_name": "Edit",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/step_semantic_extended.py",
"old_string": "class KnowledgeSemanticStoreStep:\n \"\"\"Step: Store knowledge semantics to database.\"\"\"\n\n def __init__(self, db, progress=None):\n self.db = db\n self.progress = progress\n\n def execute(self, entities: list, config: dict) -> dict:\n \"\"\"\n Store knowledge semantics from entities to entity_knowledge_semantics table.\n\n Args:\n entities: List of entities with 'knowledge_semantics' field\n config: Step config\n\n Returns:\n dict: {stored: int, skipped: int}\n \"\"\"\n if self.progress:\n self.progress.update_step(\"knowledge_semantic_store\")\n self.progress.add_log(\"Speichere Wissenssemantik...\")\n\n stored = 0\n skipped = 0\n\n for entity in entities:\n if not entity.get(\"knowledge_semantics\"):\n skipped += 1\n continue\n\n sem = entity[\"knowledge_semantics\"]\n\n try:\n cursor = self.db.execute(\n \"\"\"INSERT INTO entity_knowledge_semantics\n (entity_id, chunk_id, semantic_role, properties,\n functional_category, context_meaning, model_used)\n VALUES (%s, %s, %s, %s, %s, %s, %s)\n ON DUPLICATE KEY UPDATE\n semantic_role = VALUES(semantic_role),\n properties = VALUES(properties),\n functional_category = VALUES(functional_category),\n context_meaning = VALUES(context_meaning),\n model_used = VALUES(model_used),\n updated_at = NOW()\"\"\",\n (\n entity[\"id\"],\n entity.get(\"chunk_id\"),\n sem.get(\"semantic_role\"),\n json.dumps(sem.get(\"properties\", {})),\n sem.get(\"functional_category\"),\n sem.get(\"context_meaning\"),\n sem.get(\"model_used\"),\n ),\n )\n self.db.commit()\n cursor.close()\n stored += 1\n\n except Exception as e:\n self.db.log(\"ERROR\", f\"Failed to store knowledge semantics for entity {entity.get('id')}: {e}\")\n skipped += 1\n\n if self.progress:\n self.progress.add_log(f\"Wissenssemantik gespeichert: {stored}\")\n\n self.db.log(\"INFO\", f\"Knowledge semantics stored: {stored}, skipped: {skipped}\")\n return {\"stored\": stored, \"skipped\": skipped}",
"new_string": "class KnowledgeSemanticStoreStep:\n \"\"\"Step: Store knowledge semantics to unified entity_semantics table.\"\"\"\n\n def __init__(self, db, progress=None):\n self.db = db\n self.progress = progress\n\n def execute(self, entities: list, config: dict) -> dict:\n \"\"\"\n Store knowledge semantics to entity_semantics table (unified schema).\n\n Args:\n entities: List of entities with 'knowledge_semantics' field\n config: Step config\n\n Returns:\n dict: {stored: int, skipped: int}\n \"\"\"\n if self.progress:\n self.progress.update_step(\"knowledge_semantic_store\")\n self.progress.add_log(\"Speichere Wissenssemantik...\")\n\n stored = 0\n skipped = 0\n\n for entity in entities:\n if not entity.get(\"knowledge_semantics\"):\n skipped += 1\n continue\n\n sem = entity[\"knowledge_semantics\"]\n\n try:\n cursor = self.db.execute(\n \"\"\"INSERT INTO entity_semantics\n (entity_id, chunk_id, context, semantic_role, properties,\n functional_category, confidence, model_used)\n VALUES (%s, %s, %s, %s, %s, %s, %s, %s)\n ON DUPLICATE KEY UPDATE\n chunk_id = VALUES(chunk_id),\n context = VALUES(context),\n semantic_role = VALUES(semantic_role),\n properties = VALUES(properties),\n functional_category = VALUES(functional_category),\n model_used = VALUES(model_used),\n updated_at = NOW()\"\"\",\n (\n entity[\"id\"],\n entity.get(\"chunk_id\"),\n sem.get(\"context_meaning\"),\n sem.get(\"semantic_role\"),\n json.dumps(sem.get(\"properties\", {})),\n sem.get(\"functional_category\"),\n 0.8, # Default confidence\n sem.get(\"model_used\"),\n ),\n )\n self.db.commit()\n cursor.close()\n stored += 1\n\n except Exception as e:\n self.db.log(\"ERROR\", f\"Failed to store knowledge semantics for entity {entity.get('id')}: {e}\")\n skipped += 1\n\n if self.progress:\n self.progress.add_log(f\"Wissenssemantik gespeichert: {stored}\")\n\n self.db.log(\"INFO\", f\"Knowledge semantics stored: {stored}, skipped: {skipped}\")\n return {\"stored\": stored, \"skipped\": skipped}"
}
}
{
"tool_response": {
"filePath": "\/var\/www\/scripts\/pipeline\/step_semantic_extended.py",
"oldString": "class KnowledgeSemanticStoreStep:\n \"\"\"Step: Store knowledge semantics to database.\"\"\"\n\n def __init__(self, db, progress=None):\n self.db = db\n self.progress = progress\n\n def execute(self, entities: list, config: dict) -> dict:\n \"\"\"\n Store knowledge semantics from entities to entity_knowledge_semantics table.\n\n Args:\n entities: List of entities with 'knowledge_semantics' field\n config: Step config\n\n Returns:\n dict: {stored: int, skipped: int}\n \"\"\"\n if self.progress:\n self.progress.update_step(\"knowledge_semantic_store\")\n self.progress.add_log(\"Speichere Wissenssemantik...\")\n\n stored = 0\n skipped = 0\n\n for entity in entities:\n if not entity.get(\"knowledge_semantics\"):\n skipped += 1\n continue\n\n sem = entity[\"knowledge_semantics\"]\n\n try:\n cursor = self.db.execute(\n \"\"\"INSERT INTO entity_knowledge_semantics\n (entity_id, chunk_id, semantic_role, properties,\n functional_category, context_meaning, model_used)\n VALUES (%s, %s, %s, %s, %s, %s, %s)\n ON DUPLICATE KEY UPDATE\n semantic_role = VALUES(semantic_role),\n properties = VALUES(properties),\n functional_category = VALUES(functional_category),\n context_meaning = VALUES(context_meaning),\n model_used = VALUES(model_used),\n updated_at = NOW()\"\"\",\n (\n entity[\"id\"],\n entity.get(\"chunk_id\"),\n sem.get(\"semantic_role\"),\n json.dumps(sem.get(\"properties\", {})),\n sem.get(\"functional_category\"),\n sem.get(\"context_meaning\"),\n sem.get(\"model_used\"),\n ),\n )\n self.db.commit()\n cursor.close()\n stored += 1\n\n except Exception as e:\n self.db.log(\"ERROR\", f\"Failed to store knowledge semantics for entity {entity.get('id')}: {e}\")\n skipped += 1\n\n if self.progress:\n self.progress.add_log(f\"Wissenssemantik gespeichert: {stored}\")\n\n self.db.log(\"INFO\", f\"Knowledge semantics stored: {stored}, skipped: {skipped}\")\n return {\"stored\": stored, \"skipped\": skipped}",
"newString": "class KnowledgeSemanticStoreStep:\n \"\"\"Step: Store knowledge semantics to unified entity_semantics table.\"\"\"\n\n def __init__(self, db, progress=None):\n self.db = db\n self.progress = progress\n\n def execute(self, entities: list, config: dict) -> dict:\n \"\"\"\n Store knowledge semantics to entity_semantics table (unified schema).\n\n Args:\n entities: List of entities with 'knowledge_semantics' field\n config: Step config\n\n Returns:\n dict: {stored: int, skipped: int}\n \"\"\"\n if self.progress:\n self.progress.update_step(\"knowledge_semantic_store\")\n self.progress.add_log(\"Speichere Wissenssemantik...\")\n\n stored = 0\n skipped = 0\n\n for entity in entities:\n if not entity.get(\"knowledge_semantics\"):\n skipped += 1\n continue\n\n sem = entity[\"knowledge_semantics\"]\n\n try:\n cursor = self.db.execute(\n \"\"\"INSERT INTO entity_semantics\n (entity_id, chunk_id, context, semantic_role, properties,\n functional_category, confidence, model_used)\n VALUES (%s, %s, %s, %s, %s, %s, %s, %s)\n ON DUPLICATE KEY UPDATE\n chunk_id = VALUES(chunk_id),\n context = VALUES(context),\n semantic_role = VALUES(semantic_role),\n properties = VALUES(properties),\n functional_category = VALUES(functional_category),\n model_used = VALUES(model_used),\n updated_at = NOW()\"\"\",\n (\n entity[\"id\"],\n entity.get(\"chunk_id\"),\n sem.get(\"context_meaning\"),\n sem.get(\"semantic_role\"),\n json.dumps(sem.get(\"properties\", {})),\n sem.get(\"functional_category\"),\n 0.8, # Default confidence\n sem.get(\"model_used\"),\n ),\n )\n self.db.commit()\n cursor.close()\n stored += 1\n\n except Exception as e:\n self.db.log(\"ERROR\", f\"Failed to store knowledge semantics for entity {entity.get('id')}: {e}\")\n skipped += 1\n\n if self.progress:\n self.progress.add_log(f\"Wissenssemantik gespeichert: {stored}\")\n\n self.db.log(\"INFO\", f\"Knowledge semantics stored: {stored}, skipped: {skipped}\")\n return {\"stored\": stored, \"skipped\": skipped}",
"originalFile": "\"\"\"\nExtended Semantic Steps for Scientific Pipeline v1.\n\nImplements 6 new step_types for Pipeline #5:\n1. DuplicateCheckStep - Hash-based duplicate detection\n2. TextSemanticAnalyzeStep - Analyzes HOW text is structured (Textsemantik)\n3. TextSemanticStoreStep - Stores text semantics to chunk_text_semantics\n4. KnowledgeSemanticAnalyzeStep - Analyzes WHAT entities MEAN (Wissenssemantik)\n5. KnowledgeSemanticStoreStep - Stores knowledge semantics to entity_knowledge_semantics\n6. OntologyStoreStep - Stores ontology classifications\n\nPart of Pipeline-Refactoring based on scientific specification.\n\"\"\"\n\nimport json\n\nimport ollama\n\n\nclass DuplicateCheckStep:\n \"\"\"Step: Check for duplicate documents via content hash.\"\"\"\n\n def __init__(self, db, progress=None):\n self.db = db\n self.progress = progress\n\n def execute(self, doc_id: int, content_hash: str) -> dict:\n \"\"\"\n Check if document with same hash already exists.\n\n Args:\n doc_id: Current document ID\n content_hash: SHA-256 hash of document content\n\n Returns:\n dict: {status: 'ok'|'abort', reason: str, duplicate_id: int|None}\n \"\"\"\n if self.progress:\n self.progress.update_step(\"duplicate_check\")\n self.progress.add_log(\"Prüfe auf Duplikate...\")\n\n if not content_hash:\n return {\"status\": \"skip\", \"reason\": \"no_hash\"}\n\n cursor = self.db.execute(\n \"\"\"SELECT id, source_path FROM documents\n WHERE file_hash = %s AND id != %s AND status = 'done'\n LIMIT 1\"\"\",\n (content_hash, doc_id),\n )\n existing = cursor.fetchone()\n cursor.close()\n\n if existing:\n self.db.log(\n \"INFO\",\n f\"Duplicate found: doc {doc_id} matches {existing['id']} ({existing['source_path']})\",\n )\n if self.progress:\n self.progress.add_log(f\"Duplikat gefunden: ID {existing['id']}\")\n\n return {\n \"status\": \"abort\",\n \"reason\": \"duplicate\",\n \"duplicate_id\": existing[\"id\"],\n \"duplicate_path\": existing[\"source_path\"],\n }\n\n if self.progress:\n self.progress.add_log(\"Kein Duplikat gefunden\")\n\n return {\"status\": \"ok\"}\n\n\nclass TextSemanticAnalyzeStep:\n \"\"\"Step: Analyze HOW text is structured (Textsemantik).\n\n Analyzes each chunk for:\n - statement_form: assertion, question, command, conditional\n - intent: explain, argue, define, compare, exemplify, warn, instruct\n - frame: theoretical, practical, historical, methodological, critical\n - is_negated: whether the statement is negated\n - discourse_role: thesis, evidence, example, counter, summary, definition\n \"\"\"\n\n PROMPT_TEMPLATE = \"\"\"Analysiere den folgenden Text semantisch.\n\nBestimme:\n1. statement_form: Ist es eine Aussage (assertion), Frage (question), Aufforderung (command) oder Bedingung (conditional)?\n2. intent: Was ist die Absicht? explain, argue, define, compare, exemplify, warn, instruct\n3. frame: Welcher Rahmen? theoretical, practical, historical, methodological, critical\n4. is_negated: Wird etwas verneint? true\/false\n5. discourse_role: Welche Rolle im Diskurs? thesis, evidence, example, counter, summary, definition\n\nAntworte NUR mit gültigem JSON:\n{{\n \"statement_form\": \"assertion|question|command|conditional\",\n \"intent\": \"explain|argue|define|compare|exemplify|warn|instruct\",\n \"frame\": \"theoretical|practical|historical|methodological|critical\",\n \"is_negated\": false,\n \"discourse_role\": \"thesis|evidence|example|counter|summary|definition\"\n}}\n\nText:\n{content}\"\"\"\n\n def __init__(self, db, progress=None):\n self.db = db\n self.progress = progress\n\n def execute(self, chunks: list, config: dict) -> list:\n \"\"\"\n Analyze text semantics for each chunk.\n\n Args:\n chunks: List of chunk dicts with 'id' and 'content'\n config: Step config with 'model' (default: mistral)\n\n Returns:\n list: Chunks with added 'text_semantics' field\n \"\"\"\n if self.progress:\n self.progress.update_step(\"text_semantic_analyze\")\n self.progress.add_log(f\"Textsemantik-Analyse für {len(chunks)} Chunks...\")\n\n model = config.get(\"model\")\n if not model:\n raise ValueError(\"Model muss in config übergeben werden! Step: TextSemanticAnalyzeStep\")\n analyzed = 0\n errors = 0\n\n for chunk in chunks:\n try:\n prompt = self.PROMPT_TEMPLATE.format(content=chunk[\"content\"][:2000])\n\n response = ollama.generate(\n model=model,\n prompt=prompt,\n options={\"num_predict\": 200},\n )\n\n # Parse JSON response\n response_text = response[\"response\"].strip()\n # Find JSON in response\n start = response_text.find(\"{\")\n end = response_text.rfind(\"}\") + 1\n if start >= 0 and end > start:\n json_str = response_text[start:end]\n chunk[\"text_semantics\"] = json.loads(json_str)\n chunk[\"text_semantics\"][\"model_used\"] = model\n analyzed += 1\n else:\n chunk[\"text_semantics\"] = None\n errors += 1\n\n except Exception as e:\n self.db.log(\"WARNING\", f\"Text semantic analysis failed for chunk {chunk['id']}: {e}\")\n chunk[\"text_semantics\"] = None\n errors += 1\n\n if self.progress:\n self.progress.add_log(f\"Textsemantik: {analyzed} analysiert, {errors} Fehler\")\n\n self.db.log(\"INFO\", f\"Text semantic analysis: {analyzed} chunks, {errors} errors\")\n return chunks\n\n\nclass TextSemanticStoreStep:\n \"\"\"Step: Store text semantics to unified chunk_semantics table.\"\"\"\n\n def __init__(self, db, progress=None):\n self.db = db\n self.progress = progress\n\n def execute(self, chunks: list, config: dict) -> dict:\n \"\"\"\n Store text semantics to chunk_semantics table (unified schema).\n\n Args:\n chunks: List of chunks with 'text_semantics' field\n config: Step config\n\n Returns:\n dict: {stored: int, skipped: int}\n \"\"\"\n if self.progress:\n self.progress.update_step(\"text_semantic_store\")\n self.progress.add_log(\"Speichere Textsemantik...\")\n\n stored = 0\n skipped = 0\n\n for chunk in chunks:\n if not chunk.get(\"text_semantics\"):\n skipped += 1\n continue\n\n sem = chunk[\"text_semantics\"]\n\n try:\n # Update existing chunk_semantics record with text semantic fields\n cursor = self.db.execute(\n \"\"\"UPDATE chunk_semantics\n SET statement_form = %s,\n intent = %s,\n frame = %s,\n is_negated = %s,\n discourse_role = %s\n WHERE chunk_id = %s\"\"\",\n (\n sem.get(\"statement_form\"),\n sem.get(\"intent\"),\n sem.get(\"frame\"),\n sem.get(\"is_negated\", False),\n sem.get(\"discourse_role\"),\n chunk[\"id\"],\n ),\n )\n self.db.commit()\n cursor.close()\n stored += 1\n\n except Exception as e:\n self.db.log(\"ERROR\", f\"Failed to store text semantics for chunk {chunk['id']}: {e}\")\n skipped += 1\n\n if self.progress:\n self.progress.add_log(f\"Textsemantik gespeichert: {stored}\")\n\n self.db.log(\"INFO\", f\"Text semantics stored: {stored}, skipped: {skipped}\")\n return {\"stored\": stored, \"skipped\": skipped}\n\n\nclass KnowledgeSemanticAnalyzeStep:\n \"\"\"Step: Analyze WHAT entities MEAN in context (Wissenssemantik).\n\n Analyzes each entity for:\n - semantic_role: agent, patient, instrument, location, cause, effect\n - properties: JSON object with entity properties\n - functional_category: method, tool, concept, actor, outcome, process\n - context_meaning: Brief explanation of entity's meaning in context\n \"\"\"\n\n PROMPT_TEMPLATE = \"\"\"Analysiere die Bedeutung dieser Entität im Kontext.\n\nEntität: {name}\nTyp: {entity_type}\nKontext: {context}\n\nBestimme:\n1. semantic_role: Welche Rolle spielt die Entität? agent, patient, instrument, location, cause, effect\n2. properties: Welche Eigenschaften hat sie? (als JSON-Objekt)\n3. functional_category: Welche Funktion? method, tool, concept, actor, outcome, process\n4. context_meaning: Was bedeutet die Entität in diesem Kontext? (1 Satz)\n\nAntworte NUR mit gültigem JSON:\n{{\n \"semantic_role\": \"agent|patient|instrument|location|cause|effect\",\n \"properties\": {{\"key\": \"value\"}},\n \"functional_category\": \"method|tool|concept|actor|outcome|process\",\n \"context_meaning\": \"Kurze Erklärung\"\n}}\"\"\"\n\n def __init__(self, db, progress=None):\n self.db = db\n self.progress = progress\n\n def execute(self, entities: list, config: dict) -> list:\n \"\"\"\n Analyze knowledge semantics for each entity.\n\n Args:\n entities: List of entity dicts with 'id', 'name', 'type', 'context'\n config: Step config with 'model' (default: mistral)\n\n Returns:\n list: Entities with added 'knowledge_semantics' field\n \"\"\"\n if self.progress:\n self.progress.update_step(\"knowledge_semantic_analyze\")\n self.progress.add_log(f\"Wissenssemantik-Analyse für {len(entities)} Entitäten...\")\n\n model = config.get(\"model\")\n if not model:\n raise ValueError(\"Model muss in config überge... [TRUNCATED-8d3d6f9d3a377f05]",
"structuredPatch": [
{
"oldStart": 326,
"oldLines": 7,
"newStart": 326,
"newLines": 7,
"lines": [
" ",
" ",
" class KnowledgeSemanticStoreStep:",
"- \"\"\"Step: Store knowledge semantics to database.\"\"\"",
"+ \"\"\"Step: Store knowledge semantics to unified entity_semantics table.\"\"\"",
" ",
" def __init__(self, db, progress=None):",
" self.db = db"
]
},
{
"oldStart": 334,
"oldLines": 7,
"newStart": 334,
"newLines": 7,
"lines": [
" ",
" def execute(self, entities: list, config: dict) -> dict:",
" \"\"\"",
"- Store knowledge semantics from entities to entity_knowledge_semantics table.",
"+ Store knowledge semantics to entity_semantics table (unified schema).",
" ",
" Args:",
" entities: List of entities with 'knowledge_semantics' field"
]
},
{
"oldStart": 359,
"oldLines": 24,
"newStart": 359,
"newLines": 26,
"lines": [
" ",
" try:",
" cursor = self.db.execute(",
"- \"\"\"INSERT INTO entity_knowledge_semantics",
"- (entity_id, chunk_id, semantic_role, properties,",
"- functional_category, context_meaning, model_used)",
"- VALUES (%s, %s, %s, %s, %s, %s, %s)",
"+ \"\"\"INSERT INTO entity_semantics",
"+ (entity_id, chunk_id, context, semantic_role, properties,",
"+ functional_category, confidence, model_used)",
"+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s)",
" ON DUPLICATE KEY UPDATE",
"+ chunk_id = VALUES(chunk_id),",
"+ context = VALUES(context),",
" semantic_role = VALUES(semantic_role),",
" properties = VALUES(properties),",
" functional_category = VALUES(functional_category),",
"- context_meaning = VALUES(context_meaning),",
" model_used = VALUES(model_used),",
" updated_at = NOW()\"\"\",",
" (",
" entity[\"id\"],",
" entity.get(\"chunk_id\"),",
"+ sem.get(\"context_meaning\"),",
" sem.get(\"semantic_role\"),",
" json.dumps(sem.get(\"properties\", {})),",
" sem.get(\"functional_category\"),",
"- sem.get(\"context_meaning\"),",
"+ 0.8, # Default confidence",
" sem.get(\"model_used\"),",
" ),",
" )"
]
}
],
"userModified": false,
"replaceAll": false
}
}