Protokoll #33526

ID33526
Zeitstempel2026-04-21 02:37:46.911564
Clientroot
IP62.246.34.195
Modellclaude-sonnet-4-20250514
Statuscompleted
Tokens1,541 (Input: 765, Output: 776)
Dauer71 ms
Request-Zeit2026-04-21 02:37:46.911564
Response-Zeit2026-04-21 02:37:46.983046

Request

{
    "event": "PreToolUse",
    "tool_name": "Write",
    "tool_input": {
        "file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/application\/use_cases\/compare_crawls.py",
        "content": "from __future__ import annotations\n\nfrom src.application.read_models.crawl_diff import CrawlDiff\nfrom src.domain.entities.page import Page\nfrom src.domain.ports.page_repository_port import PageRepositoryPort\nfrom src.domain.ports.url_repository_port import UrlRepositoryPort\n\nPAGE_FETCH_CHUNK = 500\n\n\nclass CompareCrawlsUseCase:\n    def __init__(\n        self,\n        *,\n        pages: PageRepositoryPort,\n        urls: UrlRepositoryPort,\n    ) -> None:\n        self._pages = pages\n        self._urls = urls\n\n    def execute(self, *, from_crawl_id: int, to_crawl_id: int) -> CrawlDiff:\n        from_pages = self._index_by_url(from_crawl_id)\n        to_pages = self._index_by_url(to_crawl_id)\n        added = tuple(sorted(set(to_pages) - set(from_pages)))\n        removed = tuple(sorted(set(from_pages) - set(to_pages)))\n        status_changed = self._status_changes(from_pages, to_pages)\n        title_changed = self._title_changes(from_pages, to_pages)\n        return CrawlDiff(\n            from_crawl_id=from_crawl_id,\n            to_crawl_id=to_crawl_id,\n            added_urls=added,\n            removed_urls=removed,\n            status_changed=status_changed,\n            title_changed=title_changed,\n        )\n\n    def _index_by_url(self, crawl_id: int) -> dict[str, Page]:\n        index: dict[str, Page] = {}\n        offset = 0\n        while True:\n            batch = self._pages.list_by_crawl(crawl_id, limit=PAGE_FETCH_CHUNK, offset=offset)\n            if not batch:\n                return index\n            for page in batch:\n                index[_url_key(page, self._urls)] = page\n            offset += len(batch)\n\n    def _status_changes(\n        self, frm: dict[str, Page], to: dict[str, Page],\n    ) -> tuple[tuple[str, int, int], ...]:\n        changes: list[tuple[str, int, int]] = []\n        for url, to_page in to.items():\n            old = frm.get(url)\n            if old is None:\n                continue\n            s_old = old.http.status.code if old.http.status else 0\n            s_new = to_page.http.status.code if to_page.http.status else 0\n            if s_old != s_new:\n                changes.append((url, s_old, s_new))\n        return tuple(sorted(changes))\n\n    def _title_changes(\n        self, frm: dict[str, Page], to: dict[str, Page],\n    ) -> tuple[tuple[str, str | None, str | None], ...]:\n        changes: list[tuple[str, str | None, str | None]] = []\n        for url, to_page in to.items():\n            old = frm.get(url)\n            if old is None:\n                continue\n            if old.head.title != to_page.head.title:\n                changes.append((url, old.head.title, to_page.head.title))\n        return tuple(sorted(changes, key=lambda t: t[0]))\n\n\ndef _url_key(page: Page, urls: UrlRepositoryPort) -> str:\n    _ = urls\n    return str(page.url_id)\n"
    }
}

Response

{
    "tool_response": {
        "type": "create",
        "filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/application\/use_cases\/compare_crawls.py",
        "content": "from __future__ import annotations\n\nfrom src.application.read_models.crawl_diff import CrawlDiff\nfrom src.domain.entities.page import Page\nfrom src.domain.ports.page_repository_port import PageRepositoryPort\nfrom src.domain.ports.url_repository_port import UrlRepositoryPort\n\nPAGE_FETCH_CHUNK = 500\n\n\nclass CompareCrawlsUseCase:\n    def __init__(\n        self,\n        *,\n        pages: PageRepositoryPort,\n        urls: UrlRepositoryPort,\n    ) -> None:\n        self._pages = pages\n        self._urls = urls\n\n    def execute(self, *, from_crawl_id: int, to_crawl_id: int) -> CrawlDiff:\n        from_pages = self._index_by_url(from_crawl_id)\n        to_pages = self._index_by_url(to_crawl_id)\n        added = tuple(sorted(set(to_pages) - set(from_pages)))\n        removed = tuple(sorted(set(from_pages) - set(to_pages)))\n        status_changed = self._status_changes(from_pages, to_pages)\n        title_changed = self._title_changes(from_pages, to_pages)\n        return CrawlDiff(\n            from_crawl_id=from_crawl_id,\n            to_crawl_id=to_crawl_id,\n            added_urls=added,\n            removed_urls=removed,\n            status_changed=status_changed,\n            title_changed=title_changed,\n        )\n\n    def _index_by_url(self, crawl_id: int) -> dict[str, Page]:\n        index: dict[str, Page] = {}\n        offset = 0\n        while True:\n            batch = self._pages.list_by_crawl(crawl_id, limit=PAGE_FETCH_CHUNK, offset=offset)\n            if not batch:\n                return index\n            for page in batch:\n                index[_url_key(page, self._urls)] = page\n            offset += len(batch)\n\n    def _status_changes(\n        self, frm: dict[str, Page], to: dict[str, Page],\n    ) -> tuple[tuple[str, int, int], ...]:\n        changes: list[tuple[str, int, int]] = []\n        for url, to_page in to.items():\n            old = frm.get(url)\n            if old is None:\n                continue\n            s_old = old.http.status.code if old.http.status else 0\n            s_new = to_page.http.status.code if to_page.http.status else 0\n            if s_old != s_new:\n                changes.append((url, s_old, s_new))\n        return tuple(sorted(changes))\n\n    def _title_changes(\n        self, frm: dict[str, Page], to: dict[str, Page],\n    ) -> tuple[tuple[str, str | None, str | None], ...]:\n        changes: list[tuple[str, str | None, str | None]] = []\n        for url, to_page in to.items():\n            old = frm.get(url)\n            if old is None:\n                continue\n            if old.head.title != to_page.head.title:\n                changes.append((url, old.head.title, to_page.head.title))\n        return tuple(sorted(changes, key=lambda t: t[0]))\n\n\ndef _url_key(page: Page, urls: UrlRepositoryPort) -> str:\n    _ = urls\n    return str(page.url_id)\n",
        "structuredPatch": [],
        "originalFile": null,
        "userModified": false
    }
}
← Vorheriger Zur Liste Nächster →