{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/knowledge\/analyzer.py",
"content": "\"\"\"Hauptanalyzer für Wissensextraktion.\"\"\"\nimport sys\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom db import db\n\nfrom .models import ModelConfig, KnowledgeLevel, DEFAULT_MODELS\nfrom .llm_service import LLMService\nfrom .entity_extractor import EntityExtractor\nfrom .semantic_extractor import SemanticExtractor\nfrom .ontology_extractor import OntologyExtractor\nfrom .taxonomy_extractor import TaxonomyExtractor\nfrom .storage import KnowledgeStorage\n\n\nclass KnowledgeExtractor:\n \"\"\"\n Modulare Wissensextraktion mit Datenbankabgleich.\n\n Verwendung:\n extractor = KnowledgeExtractor(model_config)\n\n # Pro Seite\n entities = extractor.extract_entities(text, KnowledgeLevel.PAGE, page_id)\n semantics = extractor.extract_semantics(entities, text, KnowledgeLevel.PAGE, page_id)\n ontology = extractor.extract_ontology(entities, text, KnowledgeLevel.PAGE, page_id)\n taxonomy = extractor.extract_taxonomy(entities, text, KnowledgeLevel.PAGE, page_id)\n \"\"\"\n\n def __init__(self, model_config: ModelConfig | None = None):\n \"\"\"Initialisiere Extractor mit Modellkonfiguration.\"\"\"\n self.model = model_config or DEFAULT_MODELS[\"ollama\"]\n self.llm = LLMService(self.model)\n\n # Storage mit Modell-Kontext\n model_name = f\"{self.model.provider}:{self.model.model_name}\"\n self.storage = KnowledgeStorage(model_name)\n\n # Initialisiere Extraktoren\n self.entity_extractor = EntityExtractor(self.llm, self.storage.store)\n self.semantic_extractor = SemanticExtractor(self.llm, self.storage.store)\n self.ontology_extractor = OntologyExtractor(self.llm, self.storage.store)\n self.taxonomy_extractor = TaxonomyExtractor(self.llm, self.storage.store)\n\n def extract_entities(self, text: str, level: KnowledgeLevel, source_id: int) -> list[dict]:\n \"\"\"Extrahiere Entitäten aus Text.\"\"\"\n return self.entity_extractor.extract_entities(text, level, source_id)\n\n def extract_semantics(\n self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int\n ) -> list[dict]:\n \"\"\"Extrahiere Semantik für Entitäten.\"\"\"\n return self.semantic_extractor.extract_semantics(entities, text, level, source_id)\n\n def extract_ontology(\n self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int\n ) -> list[dict]:\n \"\"\"Extrahiere Ontologie zwischen Entitäten.\"\"\"\n return self.ontology_extractor.extract_ontology(entities, text, level, source_id)\n\n def extract_taxonomy(\n self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int\n ) -> list[dict]:\n \"\"\"Extrahiere Taxonomie für Entitäten.\"\"\"\n return self.taxonomy_extractor.extract_taxonomy(entities, text, level, source_id)\n\n def analyze_page(self, page_id: int, text: str) -> dict:\n \"\"\"\n Vollständige Wissensanalyse für eine Seite.\n\n Reihenfolge: Entitäten → Semantik → Ontologie → Taxonomie\n \"\"\"\n db.log(\"INFO\", f\"Starte Seitenanalyse für page_id={page_id}\")\n\n entities = self.extract_entities(text, KnowledgeLevel.PAGE, page_id)\n semantics = self.extract_semantics(entities, text, KnowledgeLevel.PAGE, page_id)\n ontology = self.extract_ontology(entities, text, KnowledgeLevel.PAGE, page_id)\n taxonomy = self.extract_taxonomy(entities, text, KnowledgeLevel.PAGE, page_id)\n\n return {\n \"page_id\": page_id,\n \"entities\": len(entities),\n \"semantics\": len(semantics),\n \"ontology\": len(ontology),\n \"taxonomy\": len(taxonomy),\n }\n\n def analyze_section(self, section_id: int, text: str) -> dict:\n \"\"\"Vollständige Wissensanalyse für einen Abschnitt.\"\"\"\n db.log(\"INFO\", f\"Starte Abschnittsanalyse für section_id={section_id}\")\n\n entities = self.extract_entities(text, KnowledgeLevel.SECTION, section_id)\n semantics = self.extract_semantics(entities, text, KnowledgeLevel.SECTION, section_id)\n ontology = self.extract_ontology(entities, text, KnowledgeLevel.SECTION, section_id)\n taxonomy = self.extract_taxonomy(entities, text, KnowledgeLevel.SECTION, section_id)\n\n return {\n \"section_id\": section_id,\n \"entities\": len(entities),\n \"semantics\": len(semantics),\n \"ontology\": len(ontology),\n \"taxonomy\": len(taxonomy),\n }\n\n def analyze_document(self, document_id: int, text: str) -> dict:\n \"\"\"Vollständige Wissensanalyse für ein Dokument (konsolidiert).\"\"\"\n db.log(\"INFO\", f\"Starte Dokumentanalyse für document_id={document_id}\")\n\n entities = self.extract_entities(text, KnowledgeLevel.DOCUMENT, document_id)\n semantics = self.extract_semantics(entities, text, KnowledgeLevel.DOCUMENT, document_id)\n ontology = self.extract_ontology(entities, text, KnowledgeLevel.DOCUMENT, document_id)\n taxonomy = self.extract_taxonomy(entities, text, KnowledgeLevel.DOCUMENT, document_id)\n\n return {\n \"document_id\": document_id,\n \"entities\": len(entities),\n \"semantics\": len(semantics),\n \"ontology\": len(ontology),\n \"taxonomy\": len(taxonomy),\n }\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/scripts\/pipeline\/knowledge\/analyzer.py",
"content": "\"\"\"Hauptanalyzer für Wissensextraktion.\"\"\"\nimport sys\nsys.path.insert(0, \"\/var\/www\/scripts\/pipeline\")\n\nfrom db import db\n\nfrom .models import ModelConfig, KnowledgeLevel, DEFAULT_MODELS\nfrom .llm_service import LLMService\nfrom .entity_extractor import EntityExtractor\nfrom .semantic_extractor import SemanticExtractor\nfrom .ontology_extractor import OntologyExtractor\nfrom .taxonomy_extractor import TaxonomyExtractor\nfrom .storage import KnowledgeStorage\n\n\nclass KnowledgeExtractor:\n \"\"\"\n Modulare Wissensextraktion mit Datenbankabgleich.\n\n Verwendung:\n extractor = KnowledgeExtractor(model_config)\n\n # Pro Seite\n entities = extractor.extract_entities(text, KnowledgeLevel.PAGE, page_id)\n semantics = extractor.extract_semantics(entities, text, KnowledgeLevel.PAGE, page_id)\n ontology = extractor.extract_ontology(entities, text, KnowledgeLevel.PAGE, page_id)\n taxonomy = extractor.extract_taxonomy(entities, text, KnowledgeLevel.PAGE, page_id)\n \"\"\"\n\n def __init__(self, model_config: ModelConfig | None = None):\n \"\"\"Initialisiere Extractor mit Modellkonfiguration.\"\"\"\n self.model = model_config or DEFAULT_MODELS[\"ollama\"]\n self.llm = LLMService(self.model)\n\n # Storage mit Modell-Kontext\n model_name = f\"{self.model.provider}:{self.model.model_name}\"\n self.storage = KnowledgeStorage(model_name)\n\n # Initialisiere Extraktoren\n self.entity_extractor = EntityExtractor(self.llm, self.storage.store)\n self.semantic_extractor = SemanticExtractor(self.llm, self.storage.store)\n self.ontology_extractor = OntologyExtractor(self.llm, self.storage.store)\n self.taxonomy_extractor = TaxonomyExtractor(self.llm, self.storage.store)\n\n def extract_entities(self, text: str, level: KnowledgeLevel, source_id: int) -> list[dict]:\n \"\"\"Extrahiere Entitäten aus Text.\"\"\"\n return self.entity_extractor.extract_entities(text, level, source_id)\n\n def extract_semantics(\n self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int\n ) -> list[dict]:\n \"\"\"Extrahiere Semantik für Entitäten.\"\"\"\n return self.semantic_extractor.extract_semantics(entities, text, level, source_id)\n\n def extract_ontology(\n self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int\n ) -> list[dict]:\n \"\"\"Extrahiere Ontologie zwischen Entitäten.\"\"\"\n return self.ontology_extractor.extract_ontology(entities, text, level, source_id)\n\n def extract_taxonomy(\n self, entities: list[dict], text: str, level: KnowledgeLevel, source_id: int\n ) -> list[dict]:\n \"\"\"Extrahiere Taxonomie für Entitäten.\"\"\"\n return self.taxonomy_extractor.extract_taxonomy(entities, text, level, source_id)\n\n def analyze_page(self, page_id: int, text: str) -> dict:\n \"\"\"\n Vollständige Wissensanalyse für eine Seite.\n\n Reihenfolge: Entitäten → Semantik → Ontologie → Taxonomie\n \"\"\"\n db.log(\"INFO\", f\"Starte Seitenanalyse für page_id={page_id}\")\n\n entities = self.extract_entities(text, KnowledgeLevel.PAGE, page_id)\n semantics = self.extract_semantics(entities, text, KnowledgeLevel.PAGE, page_id)\n ontology = self.extract_ontology(entities, text, KnowledgeLevel.PAGE, page_id)\n taxonomy = self.extract_taxonomy(entities, text, KnowledgeLevel.PAGE, page_id)\n\n return {\n \"page_id\": page_id,\n \"entities\": len(entities),\n \"semantics\": len(semantics),\n \"ontology\": len(ontology),\n \"taxonomy\": len(taxonomy),\n }\n\n def analyze_section(self, section_id: int, text: str) -> dict:\n \"\"\"Vollständige Wissensanalyse für einen Abschnitt.\"\"\"\n db.log(\"INFO\", f\"Starte Abschnittsanalyse für section_id={section_id}\")\n\n entities = self.extract_entities(text, KnowledgeLevel.SECTION, section_id)\n semantics = self.extract_semantics(entities, text, KnowledgeLevel.SECTION, section_id)\n ontology = self.extract_ontology(entities, text, KnowledgeLevel.SECTION, section_id)\n taxonomy = self.extract_taxonomy(entities, text, KnowledgeLevel.SECTION, section_id)\n\n return {\n \"section_id\": section_id,\n \"entities\": len(entities),\n \"semantics\": len(semantics),\n \"ontology\": len(ontology),\n \"taxonomy\": len(taxonomy),\n }\n\n def analyze_document(self, document_id: int, text: str) -> dict:\n \"\"\"Vollständige Wissensanalyse für ein Dokument (konsolidiert).\"\"\"\n db.log(\"INFO\", f\"Starte Dokumentanalyse für document_id={document_id}\")\n\n entities = self.extract_entities(text, KnowledgeLevel.DOCUMENT, document_id)\n semantics = self.extract_semantics(entities, text, KnowledgeLevel.DOCUMENT, document_id)\n ontology = self.extract_ontology(entities, text, KnowledgeLevel.DOCUMENT, document_id)\n taxonomy = self.extract_taxonomy(entities, text, KnowledgeLevel.DOCUMENT, document_id)\n\n return {\n \"document_id\": document_id,\n \"entities\": len(entities),\n \"semantics\": len(semantics),\n \"ontology\": len(ontology),\n \"taxonomy\": len(taxonomy),\n }\n",
"structuredPatch": [],
"originalFile": null
}
}