{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/analyzers\/ontology_classifier.py",
"content": "\"\"\"\nOntology Classifier - Classify entities into ontology classes.\n\"\"\"\n\nimport sys\n\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom db import db\n\n\n# Keyword-based classification rules\nCLASSIFICATION_RULES = {\n 1: { # Coaching-Methode\n \"keywords\": [\n \"methode\", \"technik\", \"tool\", \"werkzeug\", \"intervention\",\n \"übung\", \"format\", \"frage\", \"skalierung\", \"aufstellung\",\n \"visualisierung\", \"reflexion\", \"fragetechnik\",\n ],\n \"entity_types\": [\"METHOD\", \"TOOL\"],\n },\n 2: { # Coaching-Konzept\n \"keywords\": [\n \"konzept\", \"theorie\", \"modell\", \"ansatz\", \"prinzip\",\n \"grundlage\", \"haltung\", \"systemisch\", \"lösungsorientiert\",\n \"konstruktivismus\", \"philosophie\", \"paradigma\",\n ],\n \"entity_types\": [\"CONCEPT\", \"THEORY\"],\n },\n 3: { # Coaching-Prozess\n \"keywords\": [\n \"prozess\", \"phase\", \"ablauf\", \"schritt\", \"struktur\",\n \"sitzung\", \"gespräch\", \"dialog\", \"begleitung\", \"verlauf\",\n ],\n \"entity_types\": [\"PROCESS\"],\n },\n 4: { # Team-Intervention\n \"keywords\": [\n \"team\", \"gruppe\", \"organisation\", \"zusammenarbeit\",\n \"konflikt\", \"dynamik\", \"rolle\", \"moderation\",\n ],\n \"entity_types\": [\"TEAM\", \"GROUP\"],\n },\n}\n\n\ndef classify_entity(entity: dict) -> list[dict]:\n \"\"\"\n Classify an entity into ontology classes.\n Returns list of {ontology_class_id, confidence} dicts.\n \"\"\"\n name = (entity.get(\"name\") or \"\").lower()\n description = (entity.get(\"description\") or \"\").lower()\n entity_type = entity.get(\"type\", \"\")\n combined = f\"{name} {description}\"\n\n classifications = []\n\n for class_id, rules in CLASSIFICATION_RULES.items():\n confidence = 0.0\n\n # Check entity type match\n if entity_type in rules.get(\"entity_types\", []):\n confidence += 0.3\n\n # Check keyword matches\n keyword_matches = sum(1 for kw in rules[\"keywords\"] if kw in combined)\n if keyword_matches > 0:\n confidence += min(0.6, keyword_matches * 0.15)\n\n if confidence >= 0.3:\n classifications.append({\n \"ontology_class_id\": class_id,\n \"confidence\": min(confidence, 1.0),\n })\n\n # Sort by confidence descending\n classifications.sort(key=lambda x: x[\"confidence\"], reverse=True)\n\n return classifications\n\n\ndef classify_entities(document_id: int) -> int:\n \"\"\"\n Classify all entities from a document into ontology classes.\n Stores results in entity_classifications table.\n \"\"\"\n # Get entities linked to this document via chunk_entities\n cursor = db.execute(\n \"\"\"\n SELECT DISTINCT e.id, e.name, e.type, e.description\n FROM entities e\n JOIN chunk_entities ce ON e.id = ce.entity_id\n JOIN chunks c ON ce.chunk_id = c.id\n WHERE c.document_id = %s\n \"\"\",\n (document_id,),\n )\n entities = cursor.fetchall()\n cursor.close()\n\n classified = 0\n\n for entity in entities:\n classifications = classify_entity(entity)\n\n for cls in classifications:\n try:\n # Insert or update classification\n cursor = db.execute(\n \"\"\"\n INSERT INTO entity_classifications (entity_id, ontology_class_id, confidence)\n VALUES (%s, %s, %s)\n ON DUPLICATE KEY UPDATE confidence = VALUES(confidence)\n \"\"\",\n (entity[\"id\"], cls[\"ontology_class_id\"], cls[\"confidence\"]),\n )\n db.commit()\n cursor.close()\n classified += 1\n except Exception as e:\n db.log(\"WARNING\", f\"Failed to classify entity {entity['id']}: {e}\")\n\n db.log(\"INFO\", f\"Classified {classified} entity-class mappings for document {document_id}\")\n return classified\n\n\ndef classify_all_entities() -> int:\n \"\"\"Classify all entities in the database.\"\"\"\n cursor = db.execute(\"SELECT id, name, type, description FROM entities\")\n entities = cursor.fetchall()\n cursor.close()\n\n classified = 0\n\n for entity in entities:\n classifications = classify_entity(entity)\n\n for cls in classifications:\n try:\n cursor = db.execute(\n \"\"\"\n INSERT INTO entity_classifications (entity_id, ontology_class_id, confidence)\n VALUES (%s, %s, %s)\n ON DUPLICATE KEY UPDATE confidence = VALUES(confidence)\n \"\"\",\n (entity[\"id\"], cls[\"ontology_class_id\"], cls[\"confidence\"]),\n )\n db.commit()\n cursor.close()\n classified += 1\n except Exception as e:\n db.log(\"WARNING\", f\"Failed to classify entity {entity['id']}: {e}\")\n\n return classified\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/scripts\/pipeline\/analyzers\/ontology_classifier.py",
"content": "\"\"\"\nOntology Classifier - Classify entities into ontology classes.\n\"\"\"\n\nimport sys\n\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom db import db\n\n\n# Keyword-based classification rules\nCLASSIFICATION_RULES = {\n 1: { # Coaching-Methode\n \"keywords\": [\n \"methode\", \"technik\", \"tool\", \"werkzeug\", \"intervention\",\n \"übung\", \"format\", \"frage\", \"skalierung\", \"aufstellung\",\n \"visualisierung\", \"reflexion\", \"fragetechnik\",\n ],\n \"entity_types\": [\"METHOD\", \"TOOL\"],\n },\n 2: { # Coaching-Konzept\n \"keywords\": [\n \"konzept\", \"theorie\", \"modell\", \"ansatz\", \"prinzip\",\n \"grundlage\", \"haltung\", \"systemisch\", \"lösungsorientiert\",\n \"konstruktivismus\", \"philosophie\", \"paradigma\",\n ],\n \"entity_types\": [\"CONCEPT\", \"THEORY\"],\n },\n 3: { # Coaching-Prozess\n \"keywords\": [\n \"prozess\", \"phase\", \"ablauf\", \"schritt\", \"struktur\",\n \"sitzung\", \"gespräch\", \"dialog\", \"begleitung\", \"verlauf\",\n ],\n \"entity_types\": [\"PROCESS\"],\n },\n 4: { # Team-Intervention\n \"keywords\": [\n \"team\", \"gruppe\", \"organisation\", \"zusammenarbeit\",\n \"konflikt\", \"dynamik\", \"rolle\", \"moderation\",\n ],\n \"entity_types\": [\"TEAM\", \"GROUP\"],\n },\n}\n\n\ndef classify_entity(entity: dict) -> list[dict]:\n \"\"\"\n Classify an entity into ontology classes.\n Returns list of {ontology_class_id, confidence} dicts.\n \"\"\"\n name = (entity.get(\"name\") or \"\").lower()\n description = (entity.get(\"description\") or \"\").lower()\n entity_type = entity.get(\"type\", \"\")\n combined = f\"{name} {description}\"\n\n classifications = []\n\n for class_id, rules in CLASSIFICATION_RULES.items():\n confidence = 0.0\n\n # Check entity type match\n if entity_type in rules.get(\"entity_types\", []):\n confidence += 0.3\n\n # Check keyword matches\n keyword_matches = sum(1 for kw in rules[\"keywords\"] if kw in combined)\n if keyword_matches > 0:\n confidence += min(0.6, keyword_matches * 0.15)\n\n if confidence >= 0.3:\n classifications.append({\n \"ontology_class_id\": class_id,\n \"confidence\": min(confidence, 1.0),\n })\n\n # Sort by confidence descending\n classifications.sort(key=lambda x: x[\"confidence\"], reverse=True)\n\n return classifications\n\n\ndef classify_entities(document_id: int) -> int:\n \"\"\"\n Classify all entities from a document into ontology classes.\n Stores results in entity_classifications table.\n \"\"\"\n # Get entities linked to this document via chunk_entities\n cursor = db.execute(\n \"\"\"\n SELECT DISTINCT e.id, e.name, e.type, e.description\n FROM entities e\n JOIN chunk_entities ce ON e.id = ce.entity_id\n JOIN chunks c ON ce.chunk_id = c.id\n WHERE c.document_id = %s\n \"\"\",\n (document_id,),\n )\n entities = cursor.fetchall()\n cursor.close()\n\n classified = 0\n\n for entity in entities:\n classifications = classify_entity(entity)\n\n for cls in classifications:\n try:\n # Insert or update classification\n cursor = db.execute(\n \"\"\"\n INSERT INTO entity_classifications (entity_id, ontology_class_id, confidence)\n VALUES (%s, %s, %s)\n ON DUPLICATE KEY UPDATE confidence = VALUES(confidence)\n \"\"\",\n (entity[\"id\"], cls[\"ontology_class_id\"], cls[\"confidence\"]),\n )\n db.commit()\n cursor.close()\n classified += 1\n except Exception as e:\n db.log(\"WARNING\", f\"Failed to classify entity {entity['id']}: {e}\")\n\n db.log(\"INFO\", f\"Classified {classified} entity-class mappings for document {document_id}\")\n return classified\n\n\ndef classify_all_entities() -> int:\n \"\"\"Classify all entities in the database.\"\"\"\n cursor = db.execute(\"SELECT id, name, type, description FROM entities\")\n entities = cursor.fetchall()\n cursor.close()\n\n classified = 0\n\n for entity in entities:\n classifications = classify_entity(entity)\n\n for cls in classifications:\n try:\n cursor = db.execute(\n \"\"\"\n INSERT INTO entity_classifications (entity_id, ontology_class_id, confidence)\n VALUES (%s, %s, %s)\n ON DUPLICATE KEY UPDATE confidence = VALUES(confidence)\n \"\"\",\n (entity[\"id\"], cls[\"ontology_class_id\"], cls[\"confidence\"]),\n )\n db.commit()\n cursor.close()\n classified += 1\n except Exception as e:\n db.log(\"WARNING\", f\"Failed to classify entity {entity['id']}: {e}\")\n\n return classified\n",
"structuredPatch": [],
"originalFile": null
}
}