{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/tests\/unit\/application\/fakes.py",
"content": "from __future__ import annotations\n\nfrom collections.abc import Iterable, Mapping\nfrom dataclasses import replace\nfrom datetime import datetime\n\nfrom src.domain.entities.crawl import Crawl\nfrom src.domain.entities.crawl_error import CrawlError\nfrom src.domain.entities.external_check import ExternalCheck\nfrom src.domain.entities.image import Image\nfrom src.domain.entities.js_error import JsError\nfrom src.domain.entities.link import Link\nfrom src.domain.entities.page import Page\nfrom src.domain.entities.resource import Resource\nfrom src.domain.entities.structured_data_item import StructuredDataItem\nfrom src.domain.entities.url import Url\nfrom src.domain.ports.clock_port import ClockPort\nfrom src.domain.ports.crawl_error_repository_port import CrawlErrorRepositoryPort\nfrom src.domain.ports.crawl_repository_port import CrawlRepositoryPort\nfrom src.domain.ports.external_check_repository_port import ExternalCheckRepositoryPort\nfrom src.domain.ports.hreflang_repository_port import HreflangRepositoryPort\nfrom src.domain.ports.image_repository_port import ImageRepositoryPort\nfrom src.domain.ports.js_error_repository_port import JsErrorRepositoryPort\nfrom src.domain.ports.link_repository_port import LinkRepositoryPort\nfrom src.domain.ports.page_repository_port import PageRepositoryPort\nfrom src.domain.ports.resource_repository_port import ResourceRepositoryPort\nfrom src.domain.ports.structured_data_repository_port import StructuredDataRepositoryPort\nfrom src.domain.ports.url_queue_port import UrlQueuePort\nfrom src.domain.ports.url_repository_port import UrlRepositoryPort\nfrom src.domain.value_objects.hreflang import HreflangEntry\nfrom src.domain.value_objects.normalized_url import NormalizedUrl\n\n\nclass FakeClock(ClockPort):\n def __init__(self, now: datetime) -> None:\n self._now = now\n\n def now(self) -> datetime:\n return self._now\n\n\nclass FakeCrawlRepo(CrawlRepositoryPort):\n def __init__(self) -> None:\n self.store: dict[int, Crawl] = {}\n self._seq = 0\n\n def save(self, crawl: Crawl) -> Crawl:\n if crawl.id is None:\n self._seq += 1\n saved = replace(crawl, id=self._seq)\n else:\n saved = crawl\n self.store[saved.id] = saved # type: ignore[index]\n return saved\n\n def get(self, crawl_id: int) -> Crawl | None:\n return self.store.get(crawl_id)\n\n def list_recent(self, limit: int) -> list[Crawl]:\n return sorted(self.store.values(), key=lambda c: c.started_at, reverse=True)[:limit]\n\n def update_counters(self, crawl_id: int, total_urls: int, total_errors: int) -> None:\n crawl = self.store[crawl_id]\n self.store[crawl_id] = replace(crawl, total_urls=total_urls, total_errors=total_errors)\n\n\nclass FakeUrlRepo(UrlRepositoryPort):\n def __init__(self) -> None:\n self.by_hash: dict[str, Url] = {}\n self._seq = 0\n\n def upsert(self, url: Url) -> Url:\n existing = self.by_hash.get(url.normalized.url_hash)\n if existing is not None:\n merged = replace(existing, last_seen_at=url.last_seen_at)\n self.by_hash[url.normalized.url_hash] = merged\n return merged\n self._seq += 1\n saved = replace(url, id=self._seq)\n self.by_hash[url.normalized.url_hash] = saved\n return saved\n\n def find_by_hash(self, url_hash: str) -> Url | None:\n return self.by_hash.get(url_hash)\n\n def find_by_normalized(self, normalized: NormalizedUrl) -> Url | None:\n return self.find_by_hash(normalized.url_hash)\n\n\nclass FakeUrlQueue(UrlQueuePort):\n def __init__(self) -> None:\n self.enqueued: list[tuple[int, NormalizedUrl]] = []\n self.done: list[tuple[int, NormalizedUrl]] = []\n\n def enqueue(self, crawl_id: int, url: NormalizedUrl) -> bool:\n self.enqueued.append((crawl_id, url))\n return True\n\n def dequeue(self, crawl_id: int) -> NormalizedUrl | None:\n for i, (cid, url) in enumerate(self.enqueued):\n if cid == crawl_id:\n self.enqueued.pop(i)\n return url\n return None\n\n def pending_count(self, crawl_id: int) -> int:\n return sum(1 for cid, _ in self.enqueued if cid == crawl_id)\n\n def mark_done(self, crawl_id: int, url: NormalizedUrl) -> None:\n self.done.append((crawl_id, url))\n\n\nclass FakePageRepo(PageRepositoryPort):\n def __init__(self) -> None:\n self.store: dict[int, Page] = {}\n self._seq = 0\n\n def save(self, page: Page) -> Page:\n self._seq += 1\n saved = replace(page, id=self._seq)\n self.store[self._seq] = saved\n return saved\n\n def get(self, page_id: int) -> Page | None:\n return self.store.get(page_id)\n\n def list_by_crawl(self, crawl_id: int, limit: int, offset: int) -> list[Page]:\n pages = [p for p in self.store.values() if p.crawl_id == crawl_id]\n return pages[offset : offset + limit]\n\n\nclass FakeLinkRepo(LinkRepositoryPort):\n def __init__(self) -> None:\n self.store: dict[int, Link] = {}\n self._seq = 0\n\n def save_many(self, links: Iterable[Link]) -> None:\n for link in links:\n self._seq += 1\n self.store[self._seq] = replace(link, id=self._seq)\n\n def list_by_page(self, page_id: int) -> list[Link]:\n return [link for link in self.store.values() if link.page_id == page_id]\n\n def update_external_check(self, link_id: int, link: Link) -> None:\n self.store[link_id] = link\n\n\nclass FakeImageRepo(ImageRepositoryPort):\n def __init__(self) -> None:\n self.store: list[Image] = []\n\n def save_many(self, images: Iterable[Image]) -> None:\n self.store.extend(images)\n\n def list_by_page(self, page_id: int) -> list[Image]:\n return [i for i in self.store if i.page_id == page_id]\n\n\nclass FakeResourceRepo(ResourceRepositoryPort):\n def __init__(self) -> None:\n self.store: list[Resource] = []\n\n def save_many(self, resources: Iterable[Resource]) -> None:\n self.store.extend(resources)\n\n def list_by_page(self, page_id: int) -> list[Resource]:\n return [r for r in self.store if r.page_id == page_id]\n\n\nclass FakeStructuredDataRepo(StructuredDataRepositoryPort):\n def __init__(self) -> None:\n self.store: list[StructuredDataItem] = []\n\n def save_many(self, items: Iterable[StructuredDataItem]) -> None:\n self.store.extend(items)\n\n def list_by_page(self, page_id: int) -> list[StructuredDataItem]:\n return [i for i in self.store if i.page_id == page_id]\n\n\nclass FakeHreflangRepo(HreflangRepositoryPort):\n def __init__(self) -> None:\n self.store: dict[int, list[HreflangEntry]] = {}\n\n def save_many(self, page_id: int, entries: Iterable[HreflangEntry]) -> None:\n self.store.setdefault(page_id, []).extend(entries)\n\n def list_by_page(self, page_id: int) -> list[HreflangEntry]:\n return list(self.store.get(page_id, []))\n\n\nclass FakeJsErrorRepo(JsErrorRepositoryPort):\n def __init__(self) -> None:\n self.store: list[JsError] = []\n\n def save_many(self, errors: Iterable[JsError]) -> None:\n self.store.extend(errors)\n\n def list_by_page(self, page_id: int) -> list[JsError]:\n return [e for e in self.store if e.page_id == page_id]\n\n\nclass FakeCrawlErrorRepo(CrawlErrorRepositoryPort):\n def __init__(self) -> None:\n self.store: list[CrawlError] = []\n\n def save(self, error: CrawlError) -> CrawlError:\n self.store.append(error)\n return error\n\n def list_by_crawl(self, crawl_id: int) -> list[CrawlError]:\n return [e for e in self.store if e.crawl_id == crawl_id]\n\n def count_by_crawl(self, crawl_id: int) -> int:\n return len(self.list_by_crawl(crawl_id))\n\n\nclass FakeExternalCheckRepo(ExternalCheckRepositoryPort):\n def __init__(self) -> None:\n self.store: dict[str, ExternalCheck] = {}\n\n def upsert(self, check: ExternalCheck) -> ExternalCheck:\n self.store[check.url_hash] = check\n return check\n\n def get_by_hash(self, url_hash: str) -> ExternalCheck | None:\n return self.store.get(url_hash)\n\n\nclass FakePageHeaderRepo:\n def __init__(self) -> None:\n self.store: dict[int, dict[str, str]] = {}\n\n def save_many(self, page_id: int, headers: Iterable[tuple[str, str]]) -> None:\n self.store.setdefault(page_id, {}).update(dict(headers))\n\n def list_by_page(self, page_id: int) -> Mapping[str, str]:\n return dict(self.store.get(page_id, {}))\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/tests\/unit\/application\/fakes.py",
"content": "from __future__ import annotations\n\nfrom collections.abc import Iterable, Mapping\nfrom dataclasses import replace\nfrom datetime import datetime\n\nfrom src.domain.entities.crawl import Crawl\nfrom src.domain.entities.crawl_error import CrawlError\nfrom src.domain.entities.external_check import ExternalCheck\nfrom src.domain.entities.image import Image\nfrom src.domain.entities.js_error import JsError\nfrom src.domain.entities.link import Link\nfrom src.domain.entities.page import Page\nfrom src.domain.entities.resource import Resource\nfrom src.domain.entities.structured_data_item import StructuredDataItem\nfrom src.domain.entities.url import Url\nfrom src.domain.ports.clock_port import ClockPort\nfrom src.domain.ports.crawl_error_repository_port import CrawlErrorRepositoryPort\nfrom src.domain.ports.crawl_repository_port import CrawlRepositoryPort\nfrom src.domain.ports.external_check_repository_port import ExternalCheckRepositoryPort\nfrom src.domain.ports.hreflang_repository_port import HreflangRepositoryPort\nfrom src.domain.ports.image_repository_port import ImageRepositoryPort\nfrom src.domain.ports.js_error_repository_port import JsErrorRepositoryPort\nfrom src.domain.ports.link_repository_port import LinkRepositoryPort\nfrom src.domain.ports.page_repository_port import PageRepositoryPort\nfrom src.domain.ports.resource_repository_port import ResourceRepositoryPort\nfrom src.domain.ports.structured_data_repository_port import StructuredDataRepositoryPort\nfrom src.domain.ports.url_queue_port import UrlQueuePort\nfrom src.domain.ports.url_repository_port import UrlRepositoryPort\nfrom src.domain.value_objects.hreflang import HreflangEntry\nfrom src.domain.value_objects.normalized_url import NormalizedUrl\n\n\nclass FakeClock(ClockPort):\n def __init__(self, now: datetime) -> None:\n self._now = now\n\n def now(self) -> datetime:\n return self._now\n\n\nclass FakeCrawlRepo(CrawlRepositoryPort):\n def __init__(self) -> None:\n self.store: dict[int, Crawl] = {}\n self._seq = 0\n\n def save(self, crawl: Crawl) -> Crawl:\n if crawl.id is None:\n self._seq += 1\n saved = replace(crawl, id=self._seq)\n else:\n saved = crawl\n self.store[saved.id] = saved # type: ignore[index]\n return saved\n\n def get(self, crawl_id: int) -> Crawl | None:\n return self.store.get(crawl_id)\n\n def list_recent(self, limit: int) -> list[Crawl]:\n return sorted(self.store.values(), key=lambda c: c.started_at, reverse=True)[:limit]\n\n def update_counters(self, crawl_id: int, total_urls: int, total_errors: int) -> None:\n crawl = self.store[crawl_id]\n self.store[crawl_id] = replace(crawl, total_urls=total_urls, total_errors=total_errors)\n\n\nclass FakeUrlRepo(UrlRepositoryPort):\n def __init__(self) -> None:\n self.by_hash: dict[str, Url] = {}\n self._seq = 0\n\n def upsert(self, url: Url) -> Url:\n existing = self.by_hash.get(url.normalized.url_hash)\n if existing is not None:\n merged = replace(existing, last_seen_at=url.last_seen_at)\n self.by_hash[url.normalized.url_hash] = merged\n return merged\n self._seq += 1\n saved = replace(url, id=self._seq)\n self.by_hash[url.normalized.url_hash] = saved\n return saved\n\n def find_by_hash(self, url_hash: str) -> Url | None:\n return self.by_hash.get(url_hash)\n\n def find_by_normalized(self, normalized: NormalizedUrl) -> Url | None:\n return self.find_by_hash(normalized.url_hash)\n\n\nclass FakeUrlQueue(UrlQueuePort):\n def __init__(self) -> None:\n self.enqueued: list[tuple[int, NormalizedUrl]] = []\n self.done: list[tuple[int, NormalizedUrl]] = []\n\n def enqueue(self, crawl_id: int, url: NormalizedUrl) -> bool:\n self.enqueued.append((crawl_id, url))\n return True\n\n def dequeue(self, crawl_id: int) -> NormalizedUrl | None:\n for i, (cid, url) in enumerate(self.enqueued):\n if cid == crawl_id:\n self.enqueued.pop(i)\n return url\n return None\n\n def pending_count(self, crawl_id: int) -> int:\n return sum(1 for cid, _ in self.enqueued if cid == crawl_id)\n\n def mark_done(self, crawl_id: int, url: NormalizedUrl) -> None:\n self.done.append((crawl_id, url))\n\n\nclass FakePageRepo(PageRepositoryPort):\n def __init__(self) -> None:\n self.store: dict[int, Page] = {}\n self._seq = 0\n\n def save(self, page: Page) -> Page:\n self._seq += 1\n saved = replace(page, id=self._seq)\n self.store[self._seq] = saved\n return saved\n\n def get(self, page_id: int) -> Page | None:\n return self.store.get(page_id)\n\n def list_by_crawl(self, crawl_id: int, limit: int, offset: int) -> list[Page]:\n pages = [p for p in self.store.values() if p.crawl_id == crawl_id]\n return pages[offset : offset + limit]\n\n\nclass FakeLinkRepo(LinkRepositoryPort):\n def __init__(self) -> None:\n self.store: dict[int, Link] = {}\n self._seq = 0\n\n def save_many(self, links: Iterable[Link]) -> None:\n for link in links:\n self._seq += 1\n self.store[self._seq] = replace(link, id=self._seq)\n\n def list_by_page(self, page_id: int) -> list[Link]:\n return [link for link in self.store.values() if link.page_id == page_id]\n\n def update_external_check(self, link_id: int, link: Link) -> None:\n self.store[link_id] = link\n\n\nclass FakeImageRepo(ImageRepositoryPort):\n def __init__(self) -> None:\n self.store: list[Image] = []\n\n def save_many(self, images: Iterable[Image]) -> None:\n self.store.extend(images)\n\n def list_by_page(self, page_id: int) -> list[Image]:\n return [i for i in self.store if i.page_id == page_id]\n\n\nclass FakeResourceRepo(ResourceRepositoryPort):\n def __init__(self) -> None:\n self.store: list[Resource] = []\n\n def save_many(self, resources: Iterable[Resource]) -> None:\n self.store.extend(resources)\n\n def list_by_page(self, page_id: int) -> list[Resource]:\n return [r for r in self.store if r.page_id == page_id]\n\n\nclass FakeStructuredDataRepo(StructuredDataRepositoryPort):\n def __init__(self) -> None:\n self.store: list[StructuredDataItem] = []\n\n def save_many(self, items: Iterable[StructuredDataItem]) -> None:\n self.store.extend(items)\n\n def list_by_page(self, page_id: int) -> list[StructuredDataItem]:\n return [i for i in self.store if i.page_id == page_id]\n\n\nclass FakeHreflangRepo(HreflangRepositoryPort):\n def __init__(self) -> None:\n self.store: dict[int, list[HreflangEntry]] = {}\n\n def save_many(self, page_id: int, entries: Iterable[HreflangEntry]) -> None:\n self.store.setdefault(page_id, []).extend(entries)\n\n def list_by_page(self, page_id: int) -> list[HreflangEntry]:\n return list(self.store.get(page_id, []))\n\n\nclass FakeJsErrorRepo(JsErrorRepositoryPort):\n def __init__(self) -> None:\n self.store: list[JsError] = []\n\n def save_many(self, errors: Iterable[JsError]) -> None:\n self.store.extend(errors)\n\n def list_by_page(self, page_id: int) -> list[JsError]:\n return [e for e in self.store if e.page_id == page_id]\n\n\nclass FakeCrawlErrorRepo(CrawlErrorRepositoryPort):\n def __init__(self) -> None:\n self.store: list[CrawlError] = []\n\n def save(self, error: CrawlError) -> CrawlError:\n self.store.append(error)\n return error\n\n def list_by_crawl(self, crawl_id: int) -> list[CrawlError]:\n return [e for e in self.store if e.crawl_id == crawl_id]\n\n def count_by_crawl(self, crawl_id: int) -> int:\n return len(self.list_by_crawl(crawl_id))\n\n\nclass FakeExternalCheckRepo(ExternalCheckRepositoryPort):\n def __init__(self) -> None:\n self.store: dict[str, ExternalCheck] = {}\n\n def upsert(self, check: ExternalCheck) -> ExternalCheck:\n self.store[check.url_hash] = check\n return check\n\n def get_by_hash(self, url_hash: str) -> ExternalCheck | None:\n return self.store.get(url_hash)\n\n\nclass FakePageHeaderRepo:\n def __init__(self) -> None:\n self.store: dict[int, dict[str, str]] = {}\n\n def save_many(self, page_id: int, headers: Iterable[tuple[str, str]]) -> None:\n self.store.setdefault(page_id, {}).update(dict(headers))\n\n def list_by_page(self, page_id: int) -> Mapping[str, str]:\n return dict(self.store.get(page_id, {}))\n",
"structuredPatch": [],
"originalFile": null,
"userModified": false
}
}