{
"event": "PreToolUse",
"tool_name": "Bash",
"tool_input": {
"command": "grep -A 5 \"def.*pdf\\|def extract\" \/var\/www\/scripts\/pipeline\/extract.py | head -30",
"description": "Check PDF extraction functions"
}
}
{
"tool_response": {
"stdout": "def extract_pdf(file_path):\n \"\"\"Extract text from PDF using PyMuPDF with optional OCR.\"\"\"\n import fitz # PyMuPDF\n\n doc = fitz.open(file_path)\n pages = []\n--\ndef extract_pptx(file_path):\n \"\"\"Extract text from PowerPoint including speaker notes.\"\"\"\n from pptx import Presentation\n\n prs = Presentation(file_path)\n slides = []\n--\ndef extract_docx(file_path):\n \"\"\"Extract text from Word document.\"\"\"\n from docx import Document\n\n doc = Document(file_path)\n paragraphs = []\n--\ndef extract_markdown(file_path):\n \"\"\"Extract text from Markdown, preserving structure.\"\"\"\n import frontmatter\n\n with open(file_path, encoding=\"utf-8\") as f:\n post = frontmatter.load(f)\n--\ndef extract_txt(file_path):\n \"\"\"Extract text from plain text file.\"\"\"",
"stderr": "",
"interrupted": false,
"isImage": false
}
}