ontology_classifier.py
- Pfad:
/var/www/scripts/pipeline/analyzers/ontology_classifier.py - Namespace: pipeline
- Zeilen: 190 | Größe: 5,388 Bytes
- Geändert: 2025-12-31 03:01:09 | Gescannt: 2025-12-31 10:22:15
Code Hygiene Score: 100
- Dependencies: 100 (25%)
- LOC: 100 (20%)
- Methods: 100 (20%)
- Secrets: 100 (15%)
- Classes: 100 (10%)
- Magic Numbers: 100 (10%)
Keine Issues gefunden.
Dependencies 2
- use sys
- use db.db
Funktionen 3
-
classify_entity()Zeile 79 -
classify_entities()Zeile 117 -
classify_all_entities()Zeile 162
Code
"""
Ontology Classifier - Classify entities into ontology classes.
"""
import sys
sys.path.insert(0, "/var/www/scripts/pipeline")
from db import db
# Keyword-based classification rules
CLASSIFICATION_RULES = {
1: { # Coaching-Methode
"keywords": [
"methode",
"technik",
"tool",
"werkzeug",
"intervention",
"übung",
"format",
"frage",
"skalierung",
"aufstellung",
"visualisierung",
"reflexion",
"fragetechnik",
],
"entity_types": ["METHOD", "TOOL"],
},
2: { # Coaching-Konzept
"keywords": [
"konzept",
"theorie",
"modell",
"ansatz",
"prinzip",
"grundlage",
"haltung",
"systemisch",
"lösungsorientiert",
"konstruktivismus",
"philosophie",
"paradigma",
],
"entity_types": ["CONCEPT", "THEORY"],
},
3: { # Coaching-Prozess
"keywords": [
"prozess",
"phase",
"ablauf",
"schritt",
"struktur",
"sitzung",
"gespräch",
"dialog",
"begleitung",
"verlauf",
],
"entity_types": ["PROCESS"],
},
4: { # Team-Intervention
"keywords": [
"team",
"gruppe",
"organisation",
"zusammenarbeit",
"konflikt",
"dynamik",
"rolle",
"moderation",
],
"entity_types": ["TEAM", "GROUP"],
},
}
def classify_entity(entity: dict) -> list[dict]:
"""
Classify an entity into ontology classes.
Returns list of {ontology_class_id, confidence} dicts.
"""
name = (entity.get("name") or "").lower()
description = (entity.get("description") or "").lower()
entity_type = entity.get("type", "")
combined = f"{name} {description}"
classifications = []
for class_id, rules in CLASSIFICATION_RULES.items():
confidence = 0.0
# Check entity type match
if entity_type in rules.get("entity_types", []):
confidence += 0.3
# Check keyword matches
keyword_matches = sum(1 for kw in rules["keywords"] if kw in combined)
if keyword_matches > 0:
confidence += min(0.6, keyword_matches * 0.15)
if confidence >= 0.3:
classifications.append(
{
"ontology_class_id": class_id,
"confidence": min(confidence, 1.0),
}
)
# Sort by confidence descending
classifications.sort(key=lambda x: x["confidence"], reverse=True)
return classifications
def classify_entities(document_id: int) -> int:
"""
Classify all entities from a document into ontology classes.
Stores results in entity_classifications table.
"""
# Get entities linked to this document via chunk_entities
cursor = db.execute(
"""
SELECT DISTINCT e.id, e.name, e.type, e.description
FROM entities e
JOIN chunk_entities ce ON e.id = ce.entity_id
JOIN chunks c ON ce.chunk_id = c.id
WHERE c.document_id = %s
""",
(document_id,),
)
entities = cursor.fetchall()
cursor.close()
classified = 0
for entity in entities:
classifications = classify_entity(entity)
for cls in classifications:
try:
# Insert or update classification
cursor = db.execute(
"""
INSERT INTO entity_classifications (entity_id, ontology_class_id, confidence)
VALUES (%s, %s, %s)
ON DUPLICATE KEY UPDATE confidence = VALUES(confidence)
""",
(entity["id"], cls["ontology_class_id"], cls["confidence"]),
)
db.commit()
cursor.close()
classified += 1
except Exception as e:
db.log("WARNING", f"Failed to classify entity {entity['id']}: {e}")
db.log("INFO", f"Classified {classified} entity-class mappings for document {document_id}")
return classified
def classify_all_entities() -> int:
"""Classify all entities in the database."""
cursor = db.execute("SELECT id, name, type, description FROM entities")
entities = cursor.fetchall()
cursor.close()
classified = 0
for entity in entities:
classifications = classify_entity(entity)
for cls in classifications:
try:
cursor = db.execute(
"""
INSERT INTO entity_classifications (entity_id, ontology_class_id, confidence)
VALUES (%s, %s, %s)
ON DUPLICATE KEY UPDATE confidence = VALUES(confidence)
""",
(entity["id"], cls["ontology_class_id"], cls["confidence"]),
)
db.commit()
cursor.close()
classified += 1
except Exception as e:
db.log("WARNING", f"Failed to classify entity {entity['id']}: {e}")
return classified