{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/tests\/unit\/application\/test_extract_seo_data.py",
"content": "from __future__ import annotations\n\nfrom datetime import datetime\n\nfrom src.application.use_cases.extract_seo_data import ExtractInput, ExtractSeoDataUseCase\nfrom src.domain.ports.html_parser_port import ParsedDocument\nfrom src.domain.ports.http_fetcher_port import FetchResult\nfrom src.domain.services.quality_flag_calculator import QualityFlagCalculator\nfrom src.domain.value_objects.body_stats import BodyStats\nfrom src.domain.value_objects.enums import RenderMode\nfrom src.domain.value_objects.head_meta import HeadMeta\nfrom src.domain.value_objects.open_graph_meta import OpenGraphMeta\nfrom src.domain.value_objects.quality_flag import QualityFlagKey\nfrom src.domain.value_objects.twitter_meta import TwitterMeta\n\n\ndef _parsed() -> ParsedDocument:\n return ParsedDocument(\n head=HeadMeta(\n title=None, description=None, keywords=None, robots=None,\n canonical=None, lang=\"de\", charset=\"utf-8\", viewport=None,\n ),\n og=OpenGraphMeta(None, None, None, None, None, None, None),\n twitter=TwitterMeta(None, None, None, None),\n body=BodyStats(h1_texts=(), h2_texts=(), h3_count=0, h4_count=0, h5_count=0,\n h6_count=0, word_count=0, text_hash=None),\n hreflang=(),\n internal_links=(),\n external_links=(),\n image_specs=(),\n )\n\n\ndef test_extract_derives_flags_and_status_from_fetch() -> None:\n fetch = FetchResult(\n final_url=\"https:\/\/x\/\",\n status_code=500,\n redirect_chain=(),\n headers={\"content-type\": \"text\/html; charset=utf-8\", \"content-length\": \"123\"},\n body=b\"\",\n response_time_ms=10,\n ttfb_ms=None,\n error=None,\n )\n uc = ExtractSeoDataUseCase(calculator=QualityFlagCalculator())\n\n extracted = uc.execute(\n ExtractInput(\n crawl_id=1, url_id=1, fetched_at=datetime(2026, 4, 21),\n render_mode=RenderMode.HTTP, fetch=fetch, parsed=_parsed(),\n page_url=\"https:\/\/x\/\",\n ),\n )\n\n keys = {f.key for f in extracted.page.quality_flags}\n assert QualityFlagKey.HTTP_ERROR in keys\n assert QualityFlagKey.TITLE_MISSING in keys\n assert extracted.page.http.content_length == 123\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/tests\/unit\/application\/test_extract_seo_data.py",
"content": "from __future__ import annotations\n\nfrom datetime import datetime\n\nfrom src.application.use_cases.extract_seo_data import ExtractInput, ExtractSeoDataUseCase\nfrom src.domain.ports.html_parser_port import ParsedDocument\nfrom src.domain.ports.http_fetcher_port import FetchResult\nfrom src.domain.services.quality_flag_calculator import QualityFlagCalculator\nfrom src.domain.value_objects.body_stats import BodyStats\nfrom src.domain.value_objects.enums import RenderMode\nfrom src.domain.value_objects.head_meta import HeadMeta\nfrom src.domain.value_objects.open_graph_meta import OpenGraphMeta\nfrom src.domain.value_objects.quality_flag import QualityFlagKey\nfrom src.domain.value_objects.twitter_meta import TwitterMeta\n\n\ndef _parsed() -> ParsedDocument:\n return ParsedDocument(\n head=HeadMeta(\n title=None, description=None, keywords=None, robots=None,\n canonical=None, lang=\"de\", charset=\"utf-8\", viewport=None,\n ),\n og=OpenGraphMeta(None, None, None, None, None, None, None),\n twitter=TwitterMeta(None, None, None, None),\n body=BodyStats(h1_texts=(), h2_texts=(), h3_count=0, h4_count=0, h5_count=0,\n h6_count=0, word_count=0, text_hash=None),\n hreflang=(),\n internal_links=(),\n external_links=(),\n image_specs=(),\n )\n\n\ndef test_extract_derives_flags_and_status_from_fetch() -> None:\n fetch = FetchResult(\n final_url=\"https:\/\/x\/\",\n status_code=500,\n redirect_chain=(),\n headers={\"content-type\": \"text\/html; charset=utf-8\", \"content-length\": \"123\"},\n body=b\"\",\n response_time_ms=10,\n ttfb_ms=None,\n error=None,\n )\n uc = ExtractSeoDataUseCase(calculator=QualityFlagCalculator())\n\n extracted = uc.execute(\n ExtractInput(\n crawl_id=1, url_id=1, fetched_at=datetime(2026, 4, 21),\n render_mode=RenderMode.HTTP, fetch=fetch, parsed=_parsed(),\n page_url=\"https:\/\/x\/\",\n ),\n )\n\n keys = {f.key for f in extracted.page.quality_flags}\n assert QualityFlagKey.HTTP_ERROR in keys\n assert QualityFlagKey.TITLE_MISSING in keys\n assert extracted.page.http.content_length == 123\n",
"structuredPatch": [],
"originalFile": null,
"userModified": false
}
}