Protokoll #17004

ID	17004
Zeitstempel	2025-12-26 02:46:33.242378
Client	root
IP	145.224.72.140
Modell	claude-sonnet-4-20250514
Status	completed
Tokens	2,577 (Input: 1,286, Output: 1,291)
Dauer	82 ms
Request-Zeit	2025-12-26 02:46:33.242378
Response-Zeit	2025-12-26 02:46:33.324431

Request

{
    "event": "PreToolUse",
    "tool_name": "Write",
    "tool_input": {
        "file_path": "\/var\/www\/scripts\/pipeline\/test_taxonomy.py",
        "content": "#!\/usr\/bin\/env python3\n\"\"\"\nTest script for Taxonomy Pipeline functionality.\nTests chunk and entity taxonomy assignments.\n\"\"\"\n\nimport sys\nimport os\n\n# Add pipeline directory to path\nsys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))\n\nfrom db import Database\n\n\ndef test_taxonomy_pipeline():\n    \"\"\"Test the taxonomy pipeline with existing chunks.\"\"\"\n    db = Database()\n\n    if not db.connect():\n        print(\"ERROR: Database connection failed\")\n        return False\n\n    print(\"=== Taxonomy Pipeline Test ===\\n\")\n\n    # 1. Get taxonomy terms\n    print(\"[1] Loading taxonomy terms...\")\n    terms = db.get_taxonomy_terms()\n    print(f\"    Found {len(terms)} taxonomy terms\")\n    for term in terms[:5]:\n        print(f\"    - {term['id']}: {term['name']}\")\n    print()\n\n    # 2. Get some chunks to test\n    print(\"[2] Loading test chunks...\")\n    cursor = db.execute(\n        \"\"\"SELECT c.id, c.document_id, LEFT(c.content, 200) as content_preview\n           FROM chunks c\n           LIMIT 5\"\"\"\n    )\n    chunks = cursor.fetchall()\n    cursor.close()\n    print(f\"    Found {len(chunks)} chunks for testing\")\n    print()\n\n    # 3. Assign taxonomy based on content keywords\n    print(\"[3] Assigning taxonomy terms to chunks...\")\n    keyword_map = {\n        \"Coaching\": [\"coaching\", \"coach\", \"begleitung\"],\n        \"Kommunikation\": [\"kommunikation\", \"fragen\", \"gespräch\", \"dialog\"],\n        \"Methoden\": [\"methode\", \"werkzeug\", \"tool\", \"intervention\"],\n        \"Theorie\": [\"theorie\", \"konzept\", \"modell\", \"ansatz\"],\n        \"Prozess\": [\"prozess\", \"ablauf\", \"schritt\", \"phase\"],\n        \"Organisation\": [\"team\", \"organisation\", \"gruppe\", \"zusammenarbeit\"],\n        \"Entwicklung\": [\"entwicklung\", \"veränderung\", \"wachstum\"],\n    }\n\n    # Build term lookup\n    term_lookup = {t[\"name\"]: t[\"id\"] for t in terms}\n\n    assignments = 0\n    for chunk in chunks:\n        content_lower = chunk[\"content_preview\"].lower()\n        chunk_id = chunk[\"id\"]\n\n        for term_name, keywords in keyword_map.items():\n            if term_name not in term_lookup:\n                continue\n\n            term_id = term_lookup[term_name]\n\n            # Check if any keyword matches\n            matches = sum(1 for kw in keywords if kw in content_lower)\n            if matches > 0:\n                # Calculate confidence based on matches\n                confidence = min(0.5 + (matches * 0.15), 0.95)\n\n                result = db.add_chunk_taxonomy(\n                    chunk_id=chunk_id,\n                    term_id=term_id,\n                    confidence=confidence,\n                    source=\"auto\",\n                )\n\n                if result:\n                    print(f\"    Chunk {chunk_id} -> {term_name} (conf: {confidence:.2f})\")\n                    assignments += 1\n\n    print(f\"\\n    Created {assignments} new taxonomy assignments\")\n    print()\n\n    # 4. Verify mappings\n    print(\"[4] Verifying taxonomy mappings...\")\n    cursor = db.execute(\"SELECT COUNT(*) as count FROM chunk_taxonomy\")\n    result = cursor.fetchone()\n    cursor.close()\n    print(f\"    Total chunk_taxonomy entries: {result['count']}\")\n\n    # 5. Show sample mappings\n    print(\"\\n[5] Sample mappings with details:\")\n    cursor = db.execute(\n        \"\"\"SELECT ct.chunk_id, ct.confidence, ct.source, tt.name as term_name\n           FROM chunk_taxonomy ct\n           JOIN taxonomy_terms tt ON ct.taxonomy_term_id = tt.id\n           ORDER BY ct.created_at DESC\n           LIMIT 10\"\"\"\n    )\n    mappings = cursor.fetchall()\n    cursor.close()\n\n    for m in mappings:\n        print(f\"    Chunk {m['chunk_id']}: {m['term_name']} ({m['confidence']:.2f}, {m['source']})\")\n\n    # 6. Test entity taxonomy (if entities exist)\n    print(\"\\n[6] Checking entities for taxonomy assignment...\")\n    cursor = db.execute(\"SELECT id, name, type FROM entities LIMIT 3\")\n    entities = cursor.fetchall()\n    cursor.close()\n\n    if entities:\n        print(f\"    Found {len(entities)} entities\")\n        for entity in entities:\n            # Assign first matching taxonomy term\n            if entity[\"type\"] and entity[\"type\"] in term_lookup:\n                term_id = term_lookup[entity[\"type\"]]\n                result = db.add_entity_taxonomy(\n                    entity_id=entity[\"id\"],\n                    term_id=term_id,\n                    relevance=0.8,\n                    validated=False,\n                )\n                if result:\n                    print(f\"    Entity {entity['id']} ({entity['name']}) -> {entity['type']}\")\n    else:\n        print(\"    No entities found to test\")\n\n    db.disconnect()\n    print(\"\\n=== Test Complete ===\")\n    return True\n\n\nif __name__ == \"__main__\":\n    success = test_taxonomy_pipeline()\n    sys.exit(0 if success else 1)\n"
    }
}

Response

{
    "tool_response": {
        "type": "create",
        "filePath": "\/var\/www\/scripts\/pipeline\/test_taxonomy.py",
        "content": "#!\/usr\/bin\/env python3\n\"\"\"\nTest script for Taxonomy Pipeline functionality.\nTests chunk and entity taxonomy assignments.\n\"\"\"\n\nimport sys\nimport os\n\n# Add pipeline directory to path\nsys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))\n\nfrom db import Database\n\n\ndef test_taxonomy_pipeline():\n    \"\"\"Test the taxonomy pipeline with existing chunks.\"\"\"\n    db = Database()\n\n    if not db.connect():\n        print(\"ERROR: Database connection failed\")\n        return False\n\n    print(\"=== Taxonomy Pipeline Test ===\\n\")\n\n    # 1. Get taxonomy terms\n    print(\"[1] Loading taxonomy terms...\")\n    terms = db.get_taxonomy_terms()\n    print(f\"    Found {len(terms)} taxonomy terms\")\n    for term in terms[:5]:\n        print(f\"    - {term['id']}: {term['name']}\")\n    print()\n\n    # 2. Get some chunks to test\n    print(\"[2] Loading test chunks...\")\n    cursor = db.execute(\n        \"\"\"SELECT c.id, c.document_id, LEFT(c.content, 200) as content_preview\n           FROM chunks c\n           LIMIT 5\"\"\"\n    )\n    chunks = cursor.fetchall()\n    cursor.close()\n    print(f\"    Found {len(chunks)} chunks for testing\")\n    print()\n\n    # 3. Assign taxonomy based on content keywords\n    print(\"[3] Assigning taxonomy terms to chunks...\")\n    keyword_map = {\n        \"Coaching\": [\"coaching\", \"coach\", \"begleitung\"],\n        \"Kommunikation\": [\"kommunikation\", \"fragen\", \"gespräch\", \"dialog\"],\n        \"Methoden\": [\"methode\", \"werkzeug\", \"tool\", \"intervention\"],\n        \"Theorie\": [\"theorie\", \"konzept\", \"modell\", \"ansatz\"],\n        \"Prozess\": [\"prozess\", \"ablauf\", \"schritt\", \"phase\"],\n        \"Organisation\": [\"team\", \"organisation\", \"gruppe\", \"zusammenarbeit\"],\n        \"Entwicklung\": [\"entwicklung\", \"veränderung\", \"wachstum\"],\n    }\n\n    # Build term lookup\n    term_lookup = {t[\"name\"]: t[\"id\"] for t in terms}\n\n    assignments = 0\n    for chunk in chunks:\n        content_lower = chunk[\"content_preview\"].lower()\n        chunk_id = chunk[\"id\"]\n\n        for term_name, keywords in keyword_map.items():\n            if term_name not in term_lookup:\n                continue\n\n            term_id = term_lookup[term_name]\n\n            # Check if any keyword matches\n            matches = sum(1 for kw in keywords if kw in content_lower)\n            if matches > 0:\n                # Calculate confidence based on matches\n                confidence = min(0.5 + (matches * 0.15), 0.95)\n\n                result = db.add_chunk_taxonomy(\n                    chunk_id=chunk_id,\n                    term_id=term_id,\n                    confidence=confidence,\n                    source=\"auto\",\n                )\n\n                if result:\n                    print(f\"    Chunk {chunk_id} -> {term_name} (conf: {confidence:.2f})\")\n                    assignments += 1\n\n    print(f\"\\n    Created {assignments} new taxonomy assignments\")\n    print()\n\n    # 4. Verify mappings\n    print(\"[4] Verifying taxonomy mappings...\")\n    cursor = db.execute(\"SELECT COUNT(*) as count FROM chunk_taxonomy\")\n    result = cursor.fetchone()\n    cursor.close()\n    print(f\"    Total chunk_taxonomy entries: {result['count']}\")\n\n    # 5. Show sample mappings\n    print(\"\\n[5] Sample mappings with details:\")\n    cursor = db.execute(\n        \"\"\"SELECT ct.chunk_id, ct.confidence, ct.source, tt.name as term_name\n           FROM chunk_taxonomy ct\n           JOIN taxonomy_terms tt ON ct.taxonomy_term_id = tt.id\n           ORDER BY ct.created_at DESC\n           LIMIT 10\"\"\"\n    )\n    mappings = cursor.fetchall()\n    cursor.close()\n\n    for m in mappings:\n        print(f\"    Chunk {m['chunk_id']}: {m['term_name']} ({m['confidence']:.2f}, {m['source']})\")\n\n    # 6. Test entity taxonomy (if entities exist)\n    print(\"\\n[6] Checking entities for taxonomy assignment...\")\n    cursor = db.execute(\"SELECT id, name, type FROM entities LIMIT 3\")\n    entities = cursor.fetchall()\n    cursor.close()\n\n    if entities:\n        print(f\"    Found {len(entities)} entities\")\n        for entity in entities:\n            # Assign first matching taxonomy term\n            if entity[\"type\"] and entity[\"type\"] in term_lookup:\n                term_id = term_lookup[entity[\"type\"]]\n                result = db.add_entity_taxonomy(\n                    entity_id=entity[\"id\"],\n                    term_id=term_id,\n                    relevance=0.8,\n                    validated=False,\n                )\n                if result:\n                    print(f\"    Entity {entity['id']} ({entity['name']}) -> {entity['type']}\")\n    else:\n        print(\"    No entities found to test\")\n\n    db.disconnect()\n    print(\"\\n=== Test Complete ===\")\n    return True\n\n\nif __name__ == \"__main__\":\n    success = test_taxonomy_pipeline()\n    sys.exit(0 if success else 1)\n",
        "structuredPatch": [],
        "originalFile": null
    }
}

← Vorheriger Zur Liste Nächster →