{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/tests\/integration\/infrastructure\/test_page_repository.py",
"content": "from __future__ import annotations\n\nfrom datetime import datetime\n\nfrom src.domain.entities.crawl import Crawl\nfrom src.domain.entities.page import Page\nfrom src.domain.entities.url import Url\nfrom src.domain.value_objects.body_stats import BodyStats\nfrom src.domain.value_objects.enums import CrawlMode, CrawlStatus, RenderMode, TriggerSource\nfrom src.domain.value_objects.head_meta import HeadMeta\nfrom src.domain.value_objects.http_response import HttpResponse\nfrom src.domain.value_objects.http_status import HttpStatus\nfrom src.domain.value_objects.normalized_url import normalize\nfrom src.domain.value_objects.open_graph_meta import OpenGraphMeta\nfrom src.domain.value_objects.performance_metrics import PerformanceMetrics\nfrom src.domain.value_objects.storage_paths import StoragePaths\nfrom src.domain.value_objects.twitter_meta import TwitterMeta\nfrom src.infrastructure.persistence.connection import MariaDbConnectionFactory\nfrom src.infrastructure.persistence.crawl_repository import CrawlRepository\nfrom src.infrastructure.persistence.page_repository import PageRepository\nfrom src.infrastructure.persistence.url_repository import UrlRepository\n\n\ndef _seed_ids(factory: MariaDbConnectionFactory) -> tuple[int, int]:\n now = datetime(2026, 4, 21, 12, 0, 0)\n crawl = CrawlRepository(factory).save(\n Crawl(id=None, base_url=\"https:\/\/x\/\", mode=CrawlMode.FAST,\n started_at=now, finished_at=None, status=CrawlStatus.RUNNING,\n trigger_source=TriggerSource.CLI, config={}),\n )\n url = UrlRepository(factory).upsert(\n Url(id=None, normalized=normalize(\"https:\/\/x\/\"), is_internal=True,\n first_seen_at=now, last_seen_at=now),\n )\n assert crawl.id is not None\n assert url.id is not None\n return crawl.id, url.id\n\n\ndef _page(crawl_id: int, url_id: int) -> Page:\n return Page(\n id=None, crawl_id=crawl_id, url_id=url_id,\n fetched_at=datetime(2026, 4, 21, 12, 0, 0),\n render_mode=RenderMode.HTTP,\n http=HttpResponse(HttpStatus(200), \"https:\/\/x\/\", (\"https:\/\/x\/r\",),\n 42, 10, \"text\/html\", 1234, None),\n head=HeadMeta(\"Title\", \"Desc\", None, None, \"https:\/\/x\/\", \"de\", \"utf-8\", None),\n og=OpenGraphMeta(\"OG\", None, None, None, None, None, None),\n twitter=TwitterMeta(None, None, None, None),\n body=BodyStats((\"Welcome\",), (\"Sub\",), 0, 0, 0, 0, 100, \"abc\"),\n performance=PerformanceMetrics(None, None, None, None, None),\n storage=StoragePaths(None, None),\n quality_flags=(),\n )\n\n\ndef test_page_roundtrip_preserves_core_fields(\n connection_factory: MariaDbConnectionFactory,\n) -> None:\n crawl_id, url_id = _seed_ids(connection_factory)\n repo = PageRepository(connection_factory)\n saved = repo.save(_page(crawl_id, url_id))\n assert saved.id is not None\n\n fetched = repo.get(saved.id)\n assert fetched is not None\n assert fetched.head.title == \"Title\"\n assert fetched.http.status is not None\n assert fetched.http.status.code == 200\n assert fetched.http.redirect_chain == (\"https:\/\/x\/r\",)\n assert fetched.body.h1_texts == (\"Welcome\",)\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/tests\/integration\/infrastructure\/test_page_repository.py",
"content": "from __future__ import annotations\n\nfrom datetime import datetime\n\nfrom src.domain.entities.crawl import Crawl\nfrom src.domain.entities.page import Page\nfrom src.domain.entities.url import Url\nfrom src.domain.value_objects.body_stats import BodyStats\nfrom src.domain.value_objects.enums import CrawlMode, CrawlStatus, RenderMode, TriggerSource\nfrom src.domain.value_objects.head_meta import HeadMeta\nfrom src.domain.value_objects.http_response import HttpResponse\nfrom src.domain.value_objects.http_status import HttpStatus\nfrom src.domain.value_objects.normalized_url import normalize\nfrom src.domain.value_objects.open_graph_meta import OpenGraphMeta\nfrom src.domain.value_objects.performance_metrics import PerformanceMetrics\nfrom src.domain.value_objects.storage_paths import StoragePaths\nfrom src.domain.value_objects.twitter_meta import TwitterMeta\nfrom src.infrastructure.persistence.connection import MariaDbConnectionFactory\nfrom src.infrastructure.persistence.crawl_repository import CrawlRepository\nfrom src.infrastructure.persistence.page_repository import PageRepository\nfrom src.infrastructure.persistence.url_repository import UrlRepository\n\n\ndef _seed_ids(factory: MariaDbConnectionFactory) -> tuple[int, int]:\n now = datetime(2026, 4, 21, 12, 0, 0)\n crawl = CrawlRepository(factory).save(\n Crawl(id=None, base_url=\"https:\/\/x\/\", mode=CrawlMode.FAST,\n started_at=now, finished_at=None, status=CrawlStatus.RUNNING,\n trigger_source=TriggerSource.CLI, config={}),\n )\n url = UrlRepository(factory).upsert(\n Url(id=None, normalized=normalize(\"https:\/\/x\/\"), is_internal=True,\n first_seen_at=now, last_seen_at=now),\n )\n assert crawl.id is not None\n assert url.id is not None\n return crawl.id, url.id\n\n\ndef _page(crawl_id: int, url_id: int) -> Page:\n return Page(\n id=None, crawl_id=crawl_id, url_id=url_id,\n fetched_at=datetime(2026, 4, 21, 12, 0, 0),\n render_mode=RenderMode.HTTP,\n http=HttpResponse(HttpStatus(200), \"https:\/\/x\/\", (\"https:\/\/x\/r\",),\n 42, 10, \"text\/html\", 1234, None),\n head=HeadMeta(\"Title\", \"Desc\", None, None, \"https:\/\/x\/\", \"de\", \"utf-8\", None),\n og=OpenGraphMeta(\"OG\", None, None, None, None, None, None),\n twitter=TwitterMeta(None, None, None, None),\n body=BodyStats((\"Welcome\",), (\"Sub\",), 0, 0, 0, 0, 100, \"abc\"),\n performance=PerformanceMetrics(None, None, None, None, None),\n storage=StoragePaths(None, None),\n quality_flags=(),\n )\n\n\ndef test_page_roundtrip_preserves_core_fields(\n connection_factory: MariaDbConnectionFactory,\n) -> None:\n crawl_id, url_id = _seed_ids(connection_factory)\n repo = PageRepository(connection_factory)\n saved = repo.save(_page(crawl_id, url_id))\n assert saved.id is not None\n\n fetched = repo.get(saved.id)\n assert fetched is not None\n assert fetched.head.title == \"Title\"\n assert fetched.http.status is not None\n assert fetched.http.status.code == 200\n assert fetched.http.redirect_chain == (\"https:\/\/x\/r\",)\n assert fetched.body.h1_texts == (\"Welcome\",)\n",
"structuredPatch": [],
"originalFile": null,
"userModified": false
}
}