Protokoll #33525

ID33525
Zeitstempel2026-04-21 02:37:37.332893
Clientroot
IP62.246.34.195
Modellclaude-sonnet-4-20250514
Statuscompleted
Tokens1,154 (Input: 572, Output: 582)
Dauer68 ms
Request-Zeit2026-04-21 02:37:37.332893
Response-Zeit2026-04-21 02:37:37.400580

Request

{
    "event": "PreToolUse",
    "tool_name": "Write",
    "tool_input": {
        "file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/application\/use_cases\/generate_crawl_report.py",
        "content": "from __future__ import annotations\n\nfrom collections import Counter\n\nfrom src.application.read_models.crawl_report import CrawlReport\nfrom src.domain.entities.page import Page\nfrom src.domain.ports.crawl_error_repository_port import CrawlErrorRepositoryPort\nfrom src.domain.ports.crawl_repository_port import CrawlRepositoryPort\nfrom src.domain.ports.page_repository_port import PageRepositoryPort\n\nPAGE_FETCH_CHUNK = 500\n\n\nclass GenerateCrawlReportUseCase:\n    def __init__(\n        self,\n        *,\n        crawls: CrawlRepositoryPort,\n        pages: PageRepositoryPort,\n        errors: CrawlErrorRepositoryPort,\n    ) -> None:\n        self._crawls = crawls\n        self._pages = pages\n        self._errors = errors\n\n    def execute(self, *, crawl_id: int) -> CrawlReport:\n        crawl = self._crawls.get(crawl_id)\n        if crawl is None:\n            raise LookupError(f\"crawl {crawl_id} not found\")\n        pages = list(self._all_pages(crawl_id))\n        errors = self._errors.list_by_crawl(crawl_id)\n        status_hist = Counter(p.http.status.code for p in pages if p.http.status is not None)\n        flag_hist = Counter(f.key.value for p in pages for f in p.quality_flags)\n        top_errors = tuple(Counter(e.error_type for e in errors).most_common(10))\n        assert crawl.id is not None\n        return CrawlReport(\n            crawl_id=crawl.id, base_url=crawl.base_url, mode=crawl.mode, status=crawl.status,\n            started_at=crawl.started_at, finished_at=crawl.finished_at,\n            total_urls=crawl.total_urls, total_errors=crawl.total_errors,\n            pages_by_status=dict(status_hist), flag_counts=dict(flag_hist),\n            top_errors=top_errors,\n        )\n\n    def _all_pages(self, crawl_id: int) -> list[Page]:\n        collected: list[Page] = []\n        offset = 0\n        while True:\n            batch = self._pages.list_by_crawl(crawl_id, limit=PAGE_FETCH_CHUNK, offset=offset)\n            if not batch:\n                return collected\n            collected.extend(batch)\n            offset += len(batch)\n"
    }
}

Response

{
    "tool_response": {
        "type": "create",
        "filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/application\/use_cases\/generate_crawl_report.py",
        "content": "from __future__ import annotations\n\nfrom collections import Counter\n\nfrom src.application.read_models.crawl_report import CrawlReport\nfrom src.domain.entities.page import Page\nfrom src.domain.ports.crawl_error_repository_port import CrawlErrorRepositoryPort\nfrom src.domain.ports.crawl_repository_port import CrawlRepositoryPort\nfrom src.domain.ports.page_repository_port import PageRepositoryPort\n\nPAGE_FETCH_CHUNK = 500\n\n\nclass GenerateCrawlReportUseCase:\n    def __init__(\n        self,\n        *,\n        crawls: CrawlRepositoryPort,\n        pages: PageRepositoryPort,\n        errors: CrawlErrorRepositoryPort,\n    ) -> None:\n        self._crawls = crawls\n        self._pages = pages\n        self._errors = errors\n\n    def execute(self, *, crawl_id: int) -> CrawlReport:\n        crawl = self._crawls.get(crawl_id)\n        if crawl is None:\n            raise LookupError(f\"crawl {crawl_id} not found\")\n        pages = list(self._all_pages(crawl_id))\n        errors = self._errors.list_by_crawl(crawl_id)\n        status_hist = Counter(p.http.status.code for p in pages if p.http.status is not None)\n        flag_hist = Counter(f.key.value for p in pages for f in p.quality_flags)\n        top_errors = tuple(Counter(e.error_type for e in errors).most_common(10))\n        assert crawl.id is not None\n        return CrawlReport(\n            crawl_id=crawl.id, base_url=crawl.base_url, mode=crawl.mode, status=crawl.status,\n            started_at=crawl.started_at, finished_at=crawl.finished_at,\n            total_urls=crawl.total_urls, total_errors=crawl.total_errors,\n            pages_by_status=dict(status_hist), flag_counts=dict(flag_hist),\n            top_errors=top_errors,\n        )\n\n    def _all_pages(self, crawl_id: int) -> list[Page]:\n        collected: list[Page] = []\n        offset = 0\n        while True:\n            batch = self._pages.list_by_crawl(crawl_id, limit=PAGE_FETCH_CHUNK, offset=offset)\n            if not batch:\n                return collected\n            collected.extend(batch)\n            offset += len(batch)\n",
        "structuredPatch": [],
        "originalFile": null,
        "userModified": false
    }
}
← Vorheriger Zur Liste Nächster →