{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/tests\/unit\/application\/test_recheck_external_links.py",
"content": "from __future__ import annotations\n\nfrom collections.abc import Mapping\nfrom datetime import datetime, timedelta\n\nimport pytest\n\nfrom src.application.use_cases.check_external_link import CheckExternalLinkUseCase\nfrom src.application.use_cases.recheck_external_links import RecheckExternalLinksUseCase\nfrom src.domain.entities.link import Link\nfrom src.domain.entities.page import Page\nfrom src.domain.ports.http_fetcher_port import FetchResult, HttpFetcherPort\nfrom src.domain.services.url_normalizer import UrlNormalizer\nfrom src.domain.value_objects.body_stats import BodyStats\nfrom src.domain.value_objects.enums import RenderMode\nfrom src.domain.value_objects.head_meta import HeadMeta\nfrom src.domain.value_objects.http_response import HttpResponse\nfrom src.domain.value_objects.http_status import HttpStatus\nfrom src.domain.value_objects.normalized_url import NormalizedUrl\nfrom src.domain.value_objects.open_graph_meta import OpenGraphMeta\nfrom src.domain.value_objects.performance_metrics import PerformanceMetrics\nfrom src.domain.value_objects.storage_paths import StoragePaths\nfrom src.domain.value_objects.twitter_meta import TwitterMeta\nfrom tests.unit.application.fakes import (\n FakeClock,\n FakeExternalCheckRepo,\n FakeLinkRepo,\n FakePageRepo,\n)\n\n\nclass _StubFetcher(HttpFetcherPort):\n async def fetch(self, url: NormalizedUrl) -> FetchResult:\n headers: Mapping[str, str] = {}\n return FetchResult(url.url, 200, (), headers, b\"\", 5, None, None)\n\n\ndef _dummy_page(crawl_id: int) -> Page:\n return Page(\n id=None, crawl_id=crawl_id, url_id=1,\n fetched_at=datetime(2026, 4, 21),\n render_mode=RenderMode.HTTP,\n http=HttpResponse(HttpStatus(200), \"u\", (), 1, None, None, None, None),\n head=HeadMeta(\"T\", \"D\", None, None, None, \"de\", \"utf-8\", None),\n og=OpenGraphMeta(None, None, None, None, None, None, None),\n twitter=TwitterMeta(None, None, None, None),\n body=BodyStats((), (), 0, 0, 0, 0, 0, None),\n performance=PerformanceMetrics(None, None, None, None, None),\n storage=StoragePaths(None, None),\n quality_flags=(),\n )\n\n\n@pytest.mark.asyncio\nasync def test_recheck_updates_all_external_links() -> None:\n pages = FakePageRepo()\n links = FakeLinkRepo()\n checks = FakeExternalCheckRepo()\n page = pages.save(_dummy_page(crawl_id=1))\n assert page.id is not None\n links.save_many([\n Link(id=None, page_id=page.id, source_url_id=1, target_url=\"https:\/\/ext\/\",\n target_url_id=None, anchor_text=None, rel=frozenset(),\n is_internal=False, is_external=True, position_hint=None),\n Link(id=None, page_id=page.id, source_url_id=1, target_url=\"https:\/\/int\/\",\n target_url_id=2, anchor_text=None, rel=frozenset(),\n is_internal=True, is_external=False, position_hint=None),\n ])\n checker = CheckExternalLinkUseCase(\n fetcher=_StubFetcher(), checks=checks, normalizer=UrlNormalizer(),\n clock=FakeClock(datetime(2026, 4, 21)), cache_ttl=timedelta(hours=1),\n )\n\n count = await RecheckExternalLinksUseCase(\n pages=pages, links=links, checker=checker,\n ).execute(crawl_id=1)\n\n assert count == 1\n updated = next(link for link in links.store.values() if link.is_external)\n assert updated.external_http_status == 200\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/tests\/unit\/application\/test_recheck_external_links.py",
"content": "from __future__ import annotations\n\nfrom collections.abc import Mapping\nfrom datetime import datetime, timedelta\n\nimport pytest\n\nfrom src.application.use_cases.check_external_link import CheckExternalLinkUseCase\nfrom src.application.use_cases.recheck_external_links import RecheckExternalLinksUseCase\nfrom src.domain.entities.link import Link\nfrom src.domain.entities.page import Page\nfrom src.domain.ports.http_fetcher_port import FetchResult, HttpFetcherPort\nfrom src.domain.services.url_normalizer import UrlNormalizer\nfrom src.domain.value_objects.body_stats import BodyStats\nfrom src.domain.value_objects.enums import RenderMode\nfrom src.domain.value_objects.head_meta import HeadMeta\nfrom src.domain.value_objects.http_response import HttpResponse\nfrom src.domain.value_objects.http_status import HttpStatus\nfrom src.domain.value_objects.normalized_url import NormalizedUrl\nfrom src.domain.value_objects.open_graph_meta import OpenGraphMeta\nfrom src.domain.value_objects.performance_metrics import PerformanceMetrics\nfrom src.domain.value_objects.storage_paths import StoragePaths\nfrom src.domain.value_objects.twitter_meta import TwitterMeta\nfrom tests.unit.application.fakes import (\n FakeClock,\n FakeExternalCheckRepo,\n FakeLinkRepo,\n FakePageRepo,\n)\n\n\nclass _StubFetcher(HttpFetcherPort):\n async def fetch(self, url: NormalizedUrl) -> FetchResult:\n headers: Mapping[str, str] = {}\n return FetchResult(url.url, 200, (), headers, b\"\", 5, None, None)\n\n\ndef _dummy_page(crawl_id: int) -> Page:\n return Page(\n id=None, crawl_id=crawl_id, url_id=1,\n fetched_at=datetime(2026, 4, 21),\n render_mode=RenderMode.HTTP,\n http=HttpResponse(HttpStatus(200), \"u\", (), 1, None, None, None, None),\n head=HeadMeta(\"T\", \"D\", None, None, None, \"de\", \"utf-8\", None),\n og=OpenGraphMeta(None, None, None, None, None, None, None),\n twitter=TwitterMeta(None, None, None, None),\n body=BodyStats((), (), 0, 0, 0, 0, 0, None),\n performance=PerformanceMetrics(None, None, None, None, None),\n storage=StoragePaths(None, None),\n quality_flags=(),\n )\n\n\n@pytest.mark.asyncio\nasync def test_recheck_updates_all_external_links() -> None:\n pages = FakePageRepo()\n links = FakeLinkRepo()\n checks = FakeExternalCheckRepo()\n page = pages.save(_dummy_page(crawl_id=1))\n assert page.id is not None\n links.save_many([\n Link(id=None, page_id=page.id, source_url_id=1, target_url=\"https:\/\/ext\/\",\n target_url_id=None, anchor_text=None, rel=frozenset(),\n is_internal=False, is_external=True, position_hint=None),\n Link(id=None, page_id=page.id, source_url_id=1, target_url=\"https:\/\/int\/\",\n target_url_id=2, anchor_text=None, rel=frozenset(),\n is_internal=True, is_external=False, position_hint=None),\n ])\n checker = CheckExternalLinkUseCase(\n fetcher=_StubFetcher(), checks=checks, normalizer=UrlNormalizer(),\n clock=FakeClock(datetime(2026, 4, 21)), cache_ttl=timedelta(hours=1),\n )\n\n count = await RecheckExternalLinksUseCase(\n pages=pages, links=links, checker=checker,\n ).execute(crawl_id=1)\n\n assert count == 1\n updated = next(link for link in links.store.values() if link.is_external)\n assert updated.external_http_status == 200\n",
"structuredPatch": [],
"originalFile": null,
"userModified": false
}
}