{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/tests\/unit\/application\/test_generate_crawl_report.py",
"content": "from __future__ import annotations\n\nfrom datetime import datetime\n\nfrom src.application.use_cases.generate_crawl_report import GenerateCrawlReportUseCase\nfrom src.domain.entities.crawl import Crawl\nfrom src.domain.entities.crawl_error import CrawlError\nfrom src.domain.entities.page import Page\nfrom src.domain.value_objects.body_stats import BodyStats\nfrom src.domain.value_objects.enums import CrawlMode, CrawlStatus, RenderMode, TriggerSource\nfrom src.domain.value_objects.head_meta import HeadMeta\nfrom src.domain.value_objects.http_response import HttpResponse\nfrom src.domain.value_objects.http_status import HttpStatus\nfrom src.domain.value_objects.open_graph_meta import OpenGraphMeta\nfrom src.domain.value_objects.performance_metrics import PerformanceMetrics\nfrom src.domain.value_objects.storage_paths import StoragePaths\nfrom src.domain.value_objects.twitter_meta import TwitterMeta\nfrom tests.unit.application.fakes import FakeCrawlErrorRepo, FakeCrawlRepo, FakePageRepo\n\n\ndef _page(crawl_id: int, status_code: int) -> Page:\n return Page(\n id=None, crawl_id=crawl_id, url_id=1,\n fetched_at=datetime(2026, 4, 21),\n render_mode=RenderMode.HTTP,\n http=HttpResponse(HttpStatus(status_code), \"https:\/\/x\/\", (), 10, None, None, None, None),\n head=HeadMeta(\"T\", \"D\", None, None, None, \"de\", \"utf-8\", None),\n og=OpenGraphMeta(None, None, None, None, None, None, None),\n twitter=TwitterMeta(None, None, None, None),\n body=BodyStats((), (), 0, 0, 0, 0, 0, None),\n performance=PerformanceMetrics(None, None, None, None, None),\n storage=StoragePaths(None, None),\n quality_flags=(),\n )\n\n\ndef test_report_aggregates_status_and_errors() -> None:\n crawls = FakeCrawlRepo()\n pages = FakePageRepo()\n errors = FakeCrawlErrorRepo()\n crawl = crawls.save(\n Crawl(id=None, base_url=\"https:\/\/x\/\", mode=CrawlMode.FAST,\n started_at=datetime(2026, 4, 21), finished_at=datetime(2026, 4, 22),\n status=CrawlStatus.COMPLETED, trigger_source=TriggerSource.CLI,\n config={}, total_urls=3, total_errors=2),\n )\n assert crawl.id is not None\n pages.save(_page(crawl.id, 200))\n pages.save(_page(crawl.id, 200))\n pages.save(_page(crawl.id, 404))\n errors.save(CrawlError(id=None, crawl_id=crawl.id, url=\"u\", error_type=\"dns\",\n error_msg=\"m\", occurred_at=datetime(2026, 4, 21)))\n errors.save(CrawlError(id=None, crawl_id=crawl.id, url=\"u2\", error_type=\"dns\",\n error_msg=\"m\", occurred_at=datetime(2026, 4, 21)))\n\n report = GenerateCrawlReportUseCase(crawls=crawls, pages=pages, errors=errors).execute(\n crawl_id=crawl.id,\n )\n\n assert report.pages_by_status == {200: 2, 404: 1}\n assert report.top_errors == ((\"dns\", 2),)\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/tests\/unit\/application\/test_generate_crawl_report.py",
"content": "from __future__ import annotations\n\nfrom datetime import datetime\n\nfrom src.application.use_cases.generate_crawl_report import GenerateCrawlReportUseCase\nfrom src.domain.entities.crawl import Crawl\nfrom src.domain.entities.crawl_error import CrawlError\nfrom src.domain.entities.page import Page\nfrom src.domain.value_objects.body_stats import BodyStats\nfrom src.domain.value_objects.enums import CrawlMode, CrawlStatus, RenderMode, TriggerSource\nfrom src.domain.value_objects.head_meta import HeadMeta\nfrom src.domain.value_objects.http_response import HttpResponse\nfrom src.domain.value_objects.http_status import HttpStatus\nfrom src.domain.value_objects.open_graph_meta import OpenGraphMeta\nfrom src.domain.value_objects.performance_metrics import PerformanceMetrics\nfrom src.domain.value_objects.storage_paths import StoragePaths\nfrom src.domain.value_objects.twitter_meta import TwitterMeta\nfrom tests.unit.application.fakes import FakeCrawlErrorRepo, FakeCrawlRepo, FakePageRepo\n\n\ndef _page(crawl_id: int, status_code: int) -> Page:\n return Page(\n id=None, crawl_id=crawl_id, url_id=1,\n fetched_at=datetime(2026, 4, 21),\n render_mode=RenderMode.HTTP,\n http=HttpResponse(HttpStatus(status_code), \"https:\/\/x\/\", (), 10, None, None, None, None),\n head=HeadMeta(\"T\", \"D\", None, None, None, \"de\", \"utf-8\", None),\n og=OpenGraphMeta(None, None, None, None, None, None, None),\n twitter=TwitterMeta(None, None, None, None),\n body=BodyStats((), (), 0, 0, 0, 0, 0, None),\n performance=PerformanceMetrics(None, None, None, None, None),\n storage=StoragePaths(None, None),\n quality_flags=(),\n )\n\n\ndef test_report_aggregates_status_and_errors() -> None:\n crawls = FakeCrawlRepo()\n pages = FakePageRepo()\n errors = FakeCrawlErrorRepo()\n crawl = crawls.save(\n Crawl(id=None, base_url=\"https:\/\/x\/\", mode=CrawlMode.FAST,\n started_at=datetime(2026, 4, 21), finished_at=datetime(2026, 4, 22),\n status=CrawlStatus.COMPLETED, trigger_source=TriggerSource.CLI,\n config={}, total_urls=3, total_errors=2),\n )\n assert crawl.id is not None\n pages.save(_page(crawl.id, 200))\n pages.save(_page(crawl.id, 200))\n pages.save(_page(crawl.id, 404))\n errors.save(CrawlError(id=None, crawl_id=crawl.id, url=\"u\", error_type=\"dns\",\n error_msg=\"m\", occurred_at=datetime(2026, 4, 21)))\n errors.save(CrawlError(id=None, crawl_id=crawl.id, url=\"u2\", error_type=\"dns\",\n error_msg=\"m\", occurred_at=datetime(2026, 4, 21)))\n\n report = GenerateCrawlReportUseCase(crawls=crawls, pages=pages, errors=errors).execute(\n crawl_id=crawl.id,\n )\n\n assert report.pages_by_status == {200: 2, 404: 1}\n assert report.top_errors == ((\"dns\", 2),)\n",
"structuredPatch": [],
"originalFile": null,
"userModified": false
}
}