{
"event": "PreToolUse",
"tool_name": "Edit",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/application\/use_cases\/generate_crawl_report.py",
"old_string": " top_errors = tuple(Counter(e.error_type for e in errors).most_common(10))\n assert crawl.id is not None\n return CrawlReport(",
"new_string": " top_errors = tuple(Counter(e.error_type for e in errors).most_common(10))\n if crawl.id is None:\n raise RuntimeError(\"loaded crawl has no id\")\n return CrawlReport(",
"replace_all": false
}
}
{
"tool_response": {
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/application\/use_cases\/generate_crawl_report.py",
"oldString": " top_errors = tuple(Counter(e.error_type for e in errors).most_common(10))\n assert crawl.id is not None\n return CrawlReport(",
"newString": " top_errors = tuple(Counter(e.error_type for e in errors).most_common(10))\n if crawl.id is None:\n raise RuntimeError(\"loaded crawl has no id\")\n return CrawlReport(",
"originalFile": "from __future__ import annotations\n\nfrom collections import Counter\n\nfrom src.application.read_models.crawl_report import CrawlReport\nfrom src.domain.entities.page import Page\nfrom src.domain.ports.crawl_error_repository_port import CrawlErrorRepositoryPort\nfrom src.domain.ports.crawl_repository_port import CrawlRepositoryPort\nfrom src.domain.ports.page_repository_port import PageRepositoryPort\n\nPAGE_FETCH_CHUNK = 500\n\n\nclass GenerateCrawlReportUseCase:\n def __init__(\n self,\n *,\n crawls: CrawlRepositoryPort,\n pages: PageRepositoryPort,\n errors: CrawlErrorRepositoryPort,\n ) -> None:\n self._crawls = crawls\n self._pages = pages\n self._errors = errors\n\n def execute(self, *, crawl_id: int) -> CrawlReport:\n crawl = self._crawls.get(crawl_id)\n if crawl is None:\n raise LookupError(f\"crawl {crawl_id} not found\")\n pages = list(self._all_pages(crawl_id))\n errors = self._errors.list_by_crawl(crawl_id)\n status_hist = Counter(p.http.status.code for p in pages if p.http.status is not None)\n flag_hist = Counter(f.key.value for p in pages for f in p.quality_flags)\n top_errors = tuple(Counter(e.error_type for e in errors).most_common(10))\n assert crawl.id is not None\n return CrawlReport(\n crawl_id=crawl.id, base_url=crawl.base_url, mode=crawl.mode, status=crawl.status,\n started_at=crawl.started_at, finished_at=crawl.finished_at,\n total_urls=crawl.total_urls, total_errors=crawl.total_errors,\n pages_by_status=dict(status_hist), flag_counts=dict(flag_hist),\n top_errors=top_errors,\n )\n\n def _all_pages(self, crawl_id: int) -> list[Page]:\n collected: list[Page] = []\n offset = 0\n while True:\n batch = self._pages.list_by_crawl(crawl_id, limit=PAGE_FETCH_CHUNK, offset=offset)\n if not batch:\n return collected\n collected.extend(batch)\n offset += len(batch)\n",
"structuredPatch": [
{
"oldStart": 32,
"oldLines": 7,
"newStart": 32,
"newLines": 8,
"lines": [
" status_hist = Counter(p.http.status.code for p in pages if p.http.status is not None)",
" flag_hist = Counter(f.key.value for p in pages for f in p.quality_flags)",
" top_errors = tuple(Counter(e.error_type for e in errors).most_common(10))",
"- assert crawl.id is not None",
"+ if crawl.id is None:",
"+ raise RuntimeError(\"loaded crawl has no id\")",
" return CrawlReport(",
" crawl_id=crawl.id, base_url=crawl.base_url, mode=crawl.mode, status=crawl.status,",
" started_at=crawl.started_at, finished_at=crawl.finished_at,"
]
}
],
"userModified": false,
"replaceAll": false
}
}