{
"event": "PostToolUse",
"raw_data": {
"session_id": "f0f145c4-ac1d-417a-b15b-271983ae648b",
"transcript_path": "\/root\/.claude\/projects\/-var-www\/f0f145c4-ac1d-417a-b15b-271983ae648b.jsonl",
"cwd": "\/var\/www\/dev.campus.systemische-tools.de",
"permission_mode": "acceptEdits",
"hook_event_name": "PostToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/chunk.py",
"limit": 50
},
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/chunk.py",
"content": "\"\"\"\nSemantic chunking for KI-System Pipeline\nSplits documents into meaningful chunks preserving hierarchy.\n\"\"\"\n\nimport re\n\nfrom config import CHUNK_OVERLAP_PERCENT, MAX_CHUNK_SIZE, MIN_CHUNK_SIZE\n\n\ndef split_into_sentences(text):\n \"\"\"Split text into sentences.\"\"\"\n # German-aware sentence splitting\n pattern = r\"(?<=[.!?])\\s+(?=[A-ZÄÖÜ])\"\n sentences = re.split(pattern, text)\n return [s.strip() for s in sentences if s.strip()]\n\n\ndef calculate_overlap(chunk_size):\n \"\"\"Calculate overlap size based on chunk size.\"\"\"\n return int(chunk_size * CHUNK_OVERLAP_PERCENT \/ 100)\n\n\ndef chunk_by_structure(extraction_result):\n \"\"\"\n Chunk document based on its structure.\n Preserves heading hierarchy in metadata.\n \"\"\"\n chunks = []\n file_type = extraction_result[\"file_type\"]\n content = extraction_result[\"content\"]\n\n if file_type == \".pdf\":\n chunks = chunk_pdf(content)\n elif file_type == \".pptx\":\n chunks = chunk_pptx(content)\n elif file_type == \".docx\":\n chunks = chunk_docx(content)\n elif file_type == \".md\":\n chunks = chunk_markdown(content)\n elif file_type == \".txt\":\n chunks = chunk_text(content[\"text\"])\n\n return chunks\n\n\ndef chunk_pdf(pages):\n \"\"\"Chunk PDF by pages and paragraphs.\"\"\"\n chunks = []\n position = 0",
"numLines": 50,
"startLine": 1,
"totalLines": 408
}
},
"tool_use_id": "toolu_01WoYWsYzaFKKXGRiiERYjo1"
}
}