Protokoll #25043

ID	25043
Zeitstempel	2025-12-28 02:02:26.264735
Client	root
IP	145.224.72.140
Modell	claude-sonnet-4-20250514
Status	completed
Tokens	5,349 (Input: 2,672, Output: 2,677)
Dauer	116 ms
Request-Zeit	2025-12-28 02:02:26.264735
Response-Zeit	2025-12-28 02:02:26.380363

Request

{
    "event": "PreToolUse",
    "tool_name": "Write",
    "tool_input": {
        "file_path": "\/var\/www\/scripts\/pipeline\/step_semantic_extended.py",
        "content": "\"\"\"\nExtended Semantic Steps for Scientific Pipeline v1.\n\nImplements 6 new step_types for Pipeline #5:\n1. DuplicateCheckStep - Hash-based duplicate detection\n2. TextSemanticAnalyzeStep - Analyzes HOW text is structured (Textsemantik)\n3. TextSemanticStoreStep - Stores text semantics to chunk_text_semantics\n4. KnowledgeSemanticAnalyzeStep - Analyzes WHAT entities MEAN (Wissenssemantik)\n5. KnowledgeSemanticStoreStep - Stores knowledge semantics to entity_knowledge_semantics\n6. OntologyStoreStep - Stores ontology classifications\n\nPart of Pipeline-Refactoring based on scientific specification.\n\"\"\"\n\nimport json\n\nimport ollama\n\nfrom constants import LLM_TIMEOUT, MAX_RETRIES\n\n\nclass DuplicateCheckStep:\n    \"\"\"Step: Check for duplicate documents via content hash.\"\"\"\n\n    def __init__(self, db, progress=None):\n        self.db = db\n        self.progress = progress\n\n    def execute(self, doc_id: int, content_hash: str) -> dict:\n        \"\"\"\n        Check if document with same hash already exists.\n\n        Args:\n            doc_id: Current document ID\n            content_hash: SHA-256 hash of document content\n\n        Returns:\n            dict: {status: 'ok'|'abort', reason: str, duplicate_id: int|None}\n        \"\"\"\n        if self.progress:\n            self.progress.update_step(\"duplicate_check\")\n            self.progress.add_log(\"Prüfe auf Duplikate...\")\n\n        if not content_hash:\n            return {\"status\": \"skip\", \"reason\": \"no_hash\"}\n\n        cursor = self.db.execute(\n            \"\"\"SELECT id, source_path FROM documents\n               WHERE file_hash = %s AND id != %s AND status = 'done'\n               LIMIT 1\"\"\",\n            (content_hash, doc_id),\n        )\n        existing = cursor.fetchone()\n        cursor.close()\n\n        if existing:\n            self.db.log(\n                \"INFO\",\n                f\"Duplicate found: doc {doc_id} matches {existing['id']} ({existing['source_path']})\",\n            )\n            if self.progress:\n                self.progress.add_log(f\"Duplikat gefunden: ID {existing['id']}\")\n\n            return {\n                \"status\": \"abort\",\n                \"reason\": \"duplicate\",\n                \"duplicate_id\": existing[\"id\"],\n                \"duplicate_path\": existing[\"source_path\"],\n            }\n\n        if self.progress:\n            self.progress.add_log(\"Kein Duplikat gefunden\")\n\n        return {\"status\": \"ok\"}\n\n\nclass TextSemanticAnalyzeStep:\n    \"\"\"Step: Analyze HOW text is structured (Textsemantik).\n\n    Analyzes each chunk for:\n    - statement_form: assertion, question, command, conditional\n    - intent: explain, argue, define, compare, exemplify, warn, instruct\n    - frame: theoretical, practical, historical, methodological, critical\n    - is_negated: whether the statement is negated\n    - discourse_role: thesis, evidence, example, counter, summary, definition\n    \"\"\"\n\n    PROMPT_TEMPLATE = \"\"\"Analysiere den folgenden Text semantisch.\n\nBestimme:\n1. statement_form: Ist es eine Aussage (assertion), Frage (question), Aufforderung (command) oder Bedingung (conditional)?\n2. intent: Was ist die Absicht? explain, argue, define, compare, exemplify, warn, instruct\n3. frame: Welcher Rahmen? theoretical, practical, historical, methodological, critical\n4. is_negated: Wird etwas verneint? true\/false\n5. discourse_role: Welche Rolle im Diskurs? thesis, evidence, example, counter, summary, definition\n\nAntworte NUR mit gültigem JSON:\n{{\n  \"statement_form\": \"assertion|question|command|conditional\",\n  \"intent\": \"explain|argue|define|compare|exemplify|warn|instruct\",\n  \"frame\": \"theoretical|practical|historical|methodological|critical\",\n  \"is_negated\": false,\n  \"discourse_role\": \"thesis|evidence|example|counter|summary|definition\"\n}}\n\nText:\n{content}\"\"\"\n\n    def __init__(self, db, progress=None):\n        self.db = db\n        self.progress = progress\n\n    def execute(self, chunks: list, config: dict) -> list:\n        \"\"\"\n        Analyze text semantics for each chunk.\n\n        Args:\n            chunks: List of chunk dicts with 'id' and 'content'\n            config: Step config with 'model' (default: mistral)\n\n        Returns:\n            list: Chunks with added 'text_semantics' field\n        \"\"\"\n        if self.progress:\n            self.progress.update_step(\"text_semantic_analyze\")\n            self.progress.add_log(f\"Textsemantik-Analyse für {len(chunks)} Chunks...\")\n\n        model = config.get(\"model\", \"mistral\")\n        analyzed = 0\n        errors = 0\n\n        for chunk in chunks:\n            try:\n                prompt = self.PROMPT_TEMPLATE.format(content=chunk[\"content\"][:2000])\n\n                response = ollama.generate(\n                    model=model,\n                    prompt=prompt,\n                    options={\"num_predict\": 200},\n                )\n\n                # Parse JSON response\n                response_text = response[\"response\"].strip()\n                # Find JSON in response\n                start = response_text.find(\"{\")\n                end = response_text.rfind(\"}\") + 1\n                if start >= 0 and end > start:\n                    json_str = response_text[start:end]\n                    chunk[\"text_semantics\"] = json.loads(json_str)\n                    chunk[\"text_semantics\"][\"model_used\"] = model\n                    analyzed += 1\n                else:\n                    chunk[\"text_semantics\"] = None\n                    errors += 1\n\n            except Exception as e:\n                self.db.log(\"WARNING\", f\"Text semantic analysis failed for chunk {chunk['id']}: {e}\")\n                chunk[\"text_semantics\"] = None\n                errors += 1\n\n        if self.progress:\n            self.progress.add_log(f\"Textsemantik: {analyzed} analysiert, {errors} Fehler\")\n\n        self.db.log(\"INFO\", f\"Text semantic analysis: {analyzed} chunks, {errors} errors\")\n        return chunks\n\n\nclass TextSemanticStoreStep:\n    \"\"\"Step: Store text semantics to database.\"\"\"\n\n    def __init__(self, db, progress=None):\n        self.db = db\n        self.progress = progress\n\n    def execute(self, chunks: list, config: dict) -> dict:\n        \"\"\"\n        Store text semantics from chunks to chunk_text_semantics table.\n\n        Args:\n            chunks: List of chunks with 'text_semantics' field\n            config: Step config\n\n        Returns:\n            dict: {stored: int, skipped: int}\n        \"\"\"\n        if self.progress:\n            self.progress.update_step(\"text_semantic_store\")\n            self.progress.add_log(\"Speichere Textsemantik...\")\n\n        stored = 0\n        skipped = 0\n\n        for chunk in chunks:\n            if not chunk.get(\"text_semantics\"):\n                skipped += 1\n                continue\n\n            sem = chunk[\"text_semantics\"]\n\n            try:\n                cursor = self.db.execute(\n                    \"\"\"INSERT INTO chunk_text_semantics\n                       (chunk_id, statement_form, intent, frame, is_negated,\n                        discourse_role, model_used)\n                       VALUES (%s, %s, %s, %s, %s, %s, %s)\n                       ON DUPLICATE KEY UPDATE\n                       statement_form = VALUES(statement_form),\n                       intent = VALUES(intent),\n                       frame = VALUES(frame),\n                       is_negated = VALUES(is_negated),\n                       discourse_role = VALUES(discourse_role),\n                       model_used = VALUES(model_used),\n                       updated_at = NOW()\"\"\",\n                    (\n                        chunk[\"id\"],\n                        sem.get(\"statement_form\"),\n                        sem.get(\"intent\"),\n                        sem.get(\"frame\"),\n                        sem.get(\"is_negated\", False),\n                        sem.get(\"discourse_role\"),\n                        sem.get(\"model_used\"),\n                    ),\n                )\n                self.db.commit()\n                cursor.close()\n                stored += 1\n\n            except Exception as e:\n                self.db.log(\"ERROR\", f\"Failed to store text semantics for chunk {chunk['id']}: {e}\")\n                skipped += 1\n\n        if self.progress:\n            self.progress.add_log(f\"Textsemantik gespeichert: {stored}\")\n\n        self.db.log(\"INFO\", f\"Text semantics stored: {stored}, skipped: {skipped}\")\n        return {\"stored\": stored, \"skipped\": skipped}\n\n\nclass KnowledgeSemanticAnalyzeStep:\n    \"\"\"Step: Analyze WHAT entities MEAN in context (Wissenssemantik).\n\n    Analyzes each entity for:\n    - semantic_role: agent, patient, instrument, location, cause, effect\n    - properties: JSON object with entity properties\n    - functional_category: method, tool, concept, actor, outcome, process\n    - context_meaning: Brief explanation of entity's meaning in context\n    \"\"\"\n\n    PROMPT_TEMPLATE = \"\"\"Analysiere die Bedeutung dieser Entität im Kontext.\n\nEntität: {name}\nTyp: {entity_type}\nKontext: {context}\n\nBestimme:\n1. semantic_role: Welche Rolle spielt die Entität? agent, patient, instrument, location, cause, effect\n2. properties: Welche Eigenschaften hat sie? (als JSON-Objekt)\n3. functional_category: Welche Funktion? method, tool, concept, actor, outcome, process\n4. context_meaning: Was bedeutet die Entität in diesem Kontext? (1 Satz)\n\nAntworte NUR mit gültigem JSON:\n{{\n  \"semantic_role\": \"agent|patient|instrument|location|cause|effect\",\n  \"properties\": {{\"key\": \"value\"}},\n  \"functional_category\": \"method|tool|concept|actor|outcome|process\",\n  \"context_meaning\": \"Kurze Erklärung\"\n}}\"\"\"\n\n    def __init__(self, db, progress=None):\n        self.db = db\n        self.progress = progress\n\n    def execute(self, entities: list, config: dict) -> list:\n        \"\"\"\n        Analyze knowledge semantics for each entity.\n\n        Args:\n            entities: List of entity dicts with 'id', 'name', 'type', 'context'\n            config: Step config with 'model' (default: mistral)\n\n        Returns:\n            list: Entities with added 'knowledge_semantics' field\n        \"\"\"\n        if self.progress:\n            self.progress.update_step(\"k... [TRUNCATED-dea7bd333c7a02b9]"
    }
}

Response

{
    "tool_response": {
        "type": "create",
        "filePath": "\/var\/www\/scripts\/pipeline\/step_semantic_extended.py",
        "content": "\"\"\"\nExtended Semantic Steps for Scientific Pipeline v1.\n\nImplements 6 new step_types for Pipeline #5:\n1. DuplicateCheckStep - Hash-based duplicate detection\n2. TextSemanticAnalyzeStep - Analyzes HOW text is structured (Textsemantik)\n3. TextSemanticStoreStep - Stores text semantics to chunk_text_semantics\n4. KnowledgeSemanticAnalyzeStep - Analyzes WHAT entities MEAN (Wissenssemantik)\n5. KnowledgeSemanticStoreStep - Stores knowledge semantics to entity_knowledge_semantics\n6. OntologyStoreStep - Stores ontology classifications\n\nPart of Pipeline-Refactoring based on scientific specification.\n\"\"\"\n\nimport json\n\nimport ollama\n\nfrom constants import LLM_TIMEOUT, MAX_RETRIES\n\n\nclass DuplicateCheckStep:\n    \"\"\"Step: Check for duplicate documents via content hash.\"\"\"\n\n    def __init__(self, db, progress=None):\n        self.db = db\n        self.progress = progress\n\n    def execute(self, doc_id: int, content_hash: str) -> dict:\n        \"\"\"\n        Check if document with same hash already exists.\n\n        Args:\n            doc_id: Current document ID\n            content_hash: SHA-256 hash of document content\n\n        Returns:\n            dict: {status: 'ok'|'abort', reason: str, duplicate_id: int|None}\n        \"\"\"\n        if self.progress:\n            self.progress.update_step(\"duplicate_check\")\n            self.progress.add_log(\"Prüfe auf Duplikate...\")\n\n        if not content_hash:\n            return {\"status\": \"skip\", \"reason\": \"no_hash\"}\n\n        cursor = self.db.execute(\n            \"\"\"SELECT id, source_path FROM documents\n               WHERE file_hash = %s AND id != %s AND status = 'done'\n               LIMIT 1\"\"\",\n            (content_hash, doc_id),\n        )\n        existing = cursor.fetchone()\n        cursor.close()\n\n        if existing:\n            self.db.log(\n                \"INFO\",\n                f\"Duplicate found: doc {doc_id} matches {existing['id']} ({existing['source_path']})\",\n            )\n            if self.progress:\n                self.progress.add_log(f\"Duplikat gefunden: ID {existing['id']}\")\n\n            return {\n                \"status\": \"abort\",\n                \"reason\": \"duplicate\",\n                \"duplicate_id\": existing[\"id\"],\n                \"duplicate_path\": existing[\"source_path\"],\n            }\n\n        if self.progress:\n            self.progress.add_log(\"Kein Duplikat gefunden\")\n\n        return {\"status\": \"ok\"}\n\n\nclass TextSemanticAnalyzeStep:\n    \"\"\"Step: Analyze HOW text is structured (Textsemantik).\n\n    Analyzes each chunk for:\n    - statement_form: assertion, question, command, conditional\n    - intent: explain, argue, define, compare, exemplify, warn, instruct\n    - frame: theoretical, practical, historical, methodological, critical\n    - is_negated: whether the statement is negated\n    - discourse_role: thesis, evidence, example, counter, summary, definition\n    \"\"\"\n\n    PROMPT_TEMPLATE = \"\"\"Analysiere den folgenden Text semantisch.\n\nBestimme:\n1. statement_form: Ist es eine Aussage (assertion), Frage (question), Aufforderung (command) oder Bedingung (conditional)?\n2. intent: Was ist die Absicht? explain, argue, define, compare, exemplify, warn, instruct\n3. frame: Welcher Rahmen? theoretical, practical, historical, methodological, critical\n4. is_negated: Wird etwas verneint? true\/false\n5. discourse_role: Welche Rolle im Diskurs? thesis, evidence, example, counter, summary, definition\n\nAntworte NUR mit gültigem JSON:\n{{\n  \"statement_form\": \"assertion|question|command|conditional\",\n  \"intent\": \"explain|argue|define|compare|exemplify|warn|instruct\",\n  \"frame\": \"theoretical|practical|historical|methodological|critical\",\n  \"is_negated\": false,\n  \"discourse_role\": \"thesis|evidence|example|counter|summary|definition\"\n}}\n\nText:\n{content}\"\"\"\n\n    def __init__(self, db, progress=None):\n        self.db = db\n        self.progress = progress\n\n    def execute(self, chunks: list, config: dict) -> list:\n        \"\"\"\n        Analyze text semantics for each chunk.\n\n        Args:\n            chunks: List of chunk dicts with 'id' and 'content'\n            config: Step config with 'model' (default: mistral)\n\n        Returns:\n            list: Chunks with added 'text_semantics' field\n        \"\"\"\n        if self.progress:\n            self.progress.update_step(\"text_semantic_analyze\")\n            self.progress.add_log(f\"Textsemantik-Analyse für {len(chunks)} Chunks...\")\n\n        model = config.get(\"model\", \"mistral\")\n        analyzed = 0\n        errors = 0\n\n        for chunk in chunks:\n            try:\n                prompt = self.PROMPT_TEMPLATE.format(content=chunk[\"content\"][:2000])\n\n                response = ollama.generate(\n                    model=model,\n                    prompt=prompt,\n                    options={\"num_predict\": 200},\n                )\n\n                # Parse JSON response\n                response_text = response[\"response\"].strip()\n                # Find JSON in response\n                start = response_text.find(\"{\")\n                end = response_text.rfind(\"}\") + 1\n                if start >= 0 and end > start:\n                    json_str = response_text[start:end]\n                    chunk[\"text_semantics\"] = json.loads(json_str)\n                    chunk[\"text_semantics\"][\"model_used\"] = model\n                    analyzed += 1\n                else:\n                    chunk[\"text_semantics\"] = None\n                    errors += 1\n\n            except Exception as e:\n                self.db.log(\"WARNING\", f\"Text semantic analysis failed for chunk {chunk['id']}: {e}\")\n                chunk[\"text_semantics\"] = None\n                errors += 1\n\n        if self.progress:\n            self.progress.add_log(f\"Textsemantik: {analyzed} analysiert, {errors} Fehler\")\n\n        self.db.log(\"INFO\", f\"Text semantic analysis: {analyzed} chunks, {errors} errors\")\n        return chunks\n\n\nclass TextSemanticStoreStep:\n    \"\"\"Step: Store text semantics to database.\"\"\"\n\n    def __init__(self, db, progress=None):\n        self.db = db\n        self.progress = progress\n\n    def execute(self, chunks: list, config: dict) -> dict:\n        \"\"\"\n        Store text semantics from chunks to chunk_text_semantics table.\n\n        Args:\n            chunks: List of chunks with 'text_semantics' field\n            config: Step config\n\n        Returns:\n            dict: {stored: int, skipped: int}\n        \"\"\"\n        if self.progress:\n            self.progress.update_step(\"text_semantic_store\")\n            self.progress.add_log(\"Speichere Textsemantik...\")\n\n        stored = 0\n        skipped = 0\n\n        for chunk in chunks:\n            if not chunk.get(\"text_semantics\"):\n                skipped += 1\n                continue\n\n            sem = chunk[\"text_semantics\"]\n\n            try:\n                cursor = self.db.execute(\n                    \"\"\"INSERT INTO chunk_text_semantics\n                       (chunk_id, statement_form, intent, frame, is_negated,\n                        discourse_role, model_used)\n                       VALUES (%s, %s, %s, %s, %s, %s, %s)\n                       ON DUPLICATE KEY UPDATE\n                       statement_form = VALUES(statement_form),\n                       intent = VALUES(intent),\n                       frame = VALUES(frame),\n                       is_negated = VALUES(is_negated),\n                       discourse_role = VALUES(discourse_role),\n                       model_used = VALUES(model_used),\n                       updated_at = NOW()\"\"\",\n                    (\n                        chunk[\"id\"],\n                        sem.get(\"statement_form\"),\n                        sem.get(\"intent\"),\n                        sem.get(\"frame\"),\n                        sem.get(\"is_negated\", False),\n                        sem.get(\"discourse_role\"),\n                        sem.get(\"model_used\"),\n                    ),\n                )\n                self.db.commit()\n                cursor.close()\n                stored += 1\n\n            except Exception as e:\n                self.db.log(\"ERROR\", f\"Failed to store text semantics for chunk {chunk['id']}: {e}\")\n                skipped += 1\n\n        if self.progress:\n            self.progress.add_log(f\"Textsemantik gespeichert: {stored}\")\n\n        self.db.log(\"INFO\", f\"Text semantics stored: {stored}, skipped: {skipped}\")\n        return {\"stored\": stored, \"skipped\": skipped}\n\n\nclass KnowledgeSemanticAnalyzeStep:\n    \"\"\"Step: Analyze WHAT entities MEAN in context (Wissenssemantik).\n\n    Analyzes each entity for:\n    - semantic_role: agent, patient, instrument, location, cause, effect\n    - properties: JSON object with entity properties\n    - functional_category: method, tool, concept, actor, outcome, process\n    - context_meaning: Brief explanation of entity's meaning in context\n    \"\"\"\n\n    PROMPT_TEMPLATE = \"\"\"Analysiere die Bedeutung dieser Entität im Kontext.\n\nEntität: {name}\nTyp: {entity_type}\nKontext: {context}\n\nBestimme:\n1. semantic_role: Welche Rolle spielt die Entität? agent, patient, instrument, location, cause, effect\n2. properties: Welche Eigenschaften hat sie? (als JSON-Objekt)\n3. functional_category: Welche Funktion? method, tool, concept, actor, outcome, process\n4. context_meaning: Was bedeutet die Entität in diesem Kontext? (1 Satz)\n\nAntworte NUR mit gültigem JSON:\n{{\n  \"semantic_role\": \"agent|patient|instrument|location|cause|effect\",\n  \"properties\": {{\"key\": \"value\"}},\n  \"functional_category\": \"method|tool|concept|actor|outcome|process\",\n  \"context_meaning\": \"Kurze Erklärung\"\n}}\"\"\"\n\n    def __init__(self, db, progress=None):\n        self.db = db\n        self.progress = progress\n\n    def execute(self, entities: list, config: dict) -> list:\n        \"\"\"\n        Analyze knowledge semantics for each entity.\n\n        Args:\n            entities: List of entity dicts with 'id', 'name', 'type', 'context'\n            config: Step config with 'model' (default: mistral)\n\n        Returns:\n            list: Entities with added 'knowledge_semantics' field\n        \"\"\"\n        if self.progress:\n            self.progress.update_step(\"k... [TRUNCATED-dea7bd333c7a02b9]",
        "structuredPatch": [],
        "originalFile": null
    }
}

← Vorheriger Zur Liste Nächster →