ontology_extractor.py

Code Hygiene Score: 100

Keine Issues gefunden.

Dependencies 5

Klassen 1

Code

"""Ontologie-Extraktion für Wissensextraktion."""

import sys

sys.path.insert(0, "/var/www/scripts/pipeline")

from db import db

from .llm_service import LLMService
from .models import KnowledgeLevel, KnowledgeType


class OntologyExtractor:
    """Extrahiert Ontologie (Wechselwirkungen) zwischen Entitäten."""

    def __init__(self, llm_service: LLMService, store_knowledge_fn):
        """Initialisiere mit LLM-Service und Storage-Funktion."""
        self.llm = llm_service
        self.store_knowledge = store_knowledge_fn

    def extract_ontology(self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int) -> list[dict]:
        """
        Extrahiere Ontologie (Wechselwirkungen) zwischen Entitäten.

        Args:
            entities: Liste der Entitäten
            text: Ursprungstext
            level: Ebene
            source_id: Quell-ID

        Returns:
            Liste von Ontologie-Beziehungen
        """
        if len(entities) < 2:
            return []

        entity_names = [e["name"] for e in entities[:20]]

        prompt = f"""Analysiere die Wechselwirkungen zwischen den folgenden Entitäten im Text.

Entitäten: {", ".join(entity_names)}

Beziehungstypen:
- CAUSES: A verursacht/bewirkt B
- REQUIRES: A benötigt/erfordert B
- INFLUENCES: A beeinflusst B
- ENABLES: A ermöglicht B
- CONTRADICTS: A widerspricht B
- PART_OF: A ist Teil von B
- INSTANCE_OF: A ist Instanz von B
- USES: A verwendet B

Antworte NUR als JSON:
{{"relations": [
    {{"source": "...", "target": "...", "type": "CAUSES", "description": "...", "strength": 0.0-1.0, "bidirectional": false}}
]}}

Text:
{text[:3000]}"""

        result = self.llm.call_llm(prompt)
        data = self.llm.parse_json(result)
        relations = data.get("relations", [])

        # Speichere Ontologie-Beziehungen
        stored = []
        for rel in relations:
            source_entity = next((e for e in entities if e["name"].lower() == rel.get("source", "").lower()), None)
            target_entity = next((e for e in entities if e["name"].lower() == rel.get("target", "").lower()), None)

            if source_entity and target_entity:
                stored_rel = self._store_ontology(
                    source_id=source_entity["id"],
                    target_id=target_entity["id"],
                    relation_type=rel.get("type", "RELATED_TO"),
                    description=rel.get("description", ""),
                    strength=rel.get("strength", 1.0),
                    bidirectional=rel.get("bidirectional", False),
                    level=level,
                    knowledge_source_id=source_id,
                )
                if stored_rel:
                    stored.append(stored_rel)

        # Speichere in Knowledge-Tabelle
        self.store_knowledge(
            level,
            source_id,
            KnowledgeType.ONTOLOGY,
            {"relations": len(stored), "types": list({r["type"] for r in stored})},
        )

        return stored

    def _store_ontology(
        self,
        source_id: int,
        target_id: int,
        relation_type: str,
        description: str,
        strength: float,
        bidirectional: bool,
        level: KnowledgeLevel,
        knowledge_source_id: int,
    ) -> dict | None:
        """Speichere Ontologie-Beziehung."""
        try:
            # Prüfe ob Beziehung existiert
            cursor = db.execute(
                """SELECT id FROM entity_ontology
                   WHERE source_entity_id = %s AND target_entity_id = %s AND relation_type = %s""",
                (source_id, target_id, relation_type),
            )
            existing = cursor.fetchone()
            cursor.close()

            model_name = f"{self.llm.model.provider}:{self.llm.model.model_name}"

            if existing:
                rel_id = existing["id"]
            else:
                cursor = db.execute(
                    """INSERT INTO entity_ontology
                       (source_entity_id, target_entity_id, relation_type, direction,
                        strength, description, source_type, source_id, model_used, created_at)
                       VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())""",
                    (
                        source_id,
                        target_id,
                        relation_type,
                        "bidirectional" if bidirectional else "unidirectional",
                        strength,
                        description,
                        level.value,
                        knowledge_source_id,
                        model_name,
                    ),
                )
                db.commit()
                rel_id = cursor.lastrowid
                cursor.close()

            return {
                "id": rel_id,
                "source_id": source_id,
                "target_id": target_id,
                "type": relation_type,
                "strength": strength,
            }

        except Exception as e:
            db.log("ERROR", f"Fehler beim Speichern der Ontologie: {e}")
            return None
← Übersicht Graph