{
"event": "PreToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/var\/www\/scripts\/pipeline\/extract.py",
"offset": 90,
"limit": 45
}
}
{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/scripts\/pipeline\/extract.py",
"content": "def extract_markdown(file_path):\n \"\"\"Extract text from Markdown, preserving structure.\"\"\"\n import frontmatter\n\n with open(file_path, encoding=\"utf-8\") as f:\n post = frontmatter.load(f)\n\n content = post.content\n metadata = dict(post.metadata)\n\n # Parse headings for structure\n sections = []\n current_section = {\"heading\": None, \"level\": 0, \"content\": []}\n\n for line in content.split(\"\\n\"):\n heading_match = re.match(r\"^(#{1,6})\\s+(.+)$\", line)\n if heading_match:\n if current_section[\"content\"] or current_section[\"heading\"]:\n sections.append(current_section)\n current_section = {\"heading\": heading_match.group(2), \"level\": len(heading_match.group(1)), \"content\": []}\n else:\n current_section[\"content\"].append(line)\n\n if current_section[\"content\"] or current_section[\"heading\"]:\n sections.append(current_section)\n\n return {\"metadata\": metadata, \"sections\": sections}\n\n\ndef extract_txt(file_path):\n \"\"\"Extract text from plain text file.\"\"\"\n with open(file_path, encoding=\"utf-8\") as f:\n content = f.read()\n return {\"text\": content}\n\n\ndef extract(file_path):\n \"\"\"\n Main extraction function.\n Returns extracted content with structure info.\n \"\"\"\n ext = Path(file_path).suffix.lower()\n\n extractors = {\n \".pdf\": extract_pdf,",
"numLines": 45,
"startLine": 90,
"totalLines": 200
}
}
}