{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/infrastructure\/persistence\/mappers\/page_mapper.py",
"content": "from __future__ import annotations\n\nimport json\nfrom typing import Any\n\nfrom src.domain.entities.page import Page\nfrom src.domain.value_objects.body_stats import BodyStats\nfrom src.domain.value_objects.enums import RenderMode\nfrom src.domain.value_objects.head_meta import HeadMeta\nfrom src.domain.value_objects.http_response import HttpResponse\nfrom src.domain.value_objects.http_status import HttpStatus\nfrom src.domain.value_objects.meta_robots import parse_meta_robots\nfrom src.domain.value_objects.open_graph_meta import OpenGraphMeta\nfrom src.domain.value_objects.performance_metrics import PerformanceMetrics\nfrom src.domain.value_objects.storage_paths import StoragePaths\nfrom src.domain.value_objects.twitter_meta import TwitterMeta\n\n\ndef page_to_row(page: Page) -> dict[str, Any]:\n row = {\n \"crawl_id\": page.crawl_id,\n \"url_id\": page.url_id,\n \"fetched_at\": page.fetched_at,\n \"render_mode\": page.render_mode.value,\n }\n row.update(_http_fields(page.http))\n row.update(_head_fields(page.head))\n row.update(_og_fields(page.og))\n row.update(_twitter_fields(page.twitter))\n row.update(_body_fields(page.body))\n row.update(_performance_fields(page.performance))\n row.update(_storage_fields(page.storage))\n row[\"quality_flags\"] = json.dumps(\n [\n {\"key\": f.key.value, \"severity\": f.severity.value,\n \"message\": f.message, \"count\": f.count}\n for f in page.quality_flags\n ],\n )\n return row\n\n\ndef row_to_page(row: dict[str, Any]) -> Page:\n robots = parse_meta_robots(row.get(\"meta_robots\"))\n status = HttpStatus(row[\"http_status\"]) if row.get(\"http_status\") else None\n return Page(\n id=row[\"id\"], crawl_id=row[\"crawl_id\"], url_id=row[\"url_id\"],\n fetched_at=row[\"fetched_at\"], render_mode=RenderMode(row[\"render_mode\"]),\n http=HttpResponse(\n status=status,\n final_url=row.get(\"final_url\"),\n redirect_chain=tuple(json.loads(row[\"redirect_chain\"]) if row.get(\"redirect_chain\") else []),\n response_time_ms=row.get(\"response_time_ms\"),\n ttfb_ms=row.get(\"ttfb_ms\"),\n content_type=row.get(\"content_type\"),\n content_length=row.get(\"content_length\"),\n content_encoding=row.get(\"content_encoding\"),\n ),\n head=HeadMeta(\n title=row.get(\"title\"), description=row.get(\"meta_description\"),\n keywords=row.get(\"meta_keywords\"), robots=robots,\n canonical=row.get(\"canonical\"), lang=row.get(\"lang\"),\n charset=row.get(\"charset\"), viewport=row.get(\"viewport\"),\n ),\n og=OpenGraphMeta(\n title=row.get(\"og_title\"), description=row.get(\"og_description\"),\n image=row.get(\"og_image\"), og_type=row.get(\"og_type\"),\n url=row.get(\"og_url\"), site_name=row.get(\"og_site_name\"),\n locale=row.get(\"og_locale\"),\n ),\n twitter=TwitterMeta(\n card=row.get(\"twitter_card\"), title=row.get(\"twitter_title\"),\n description=row.get(\"twitter_description\"), image=row.get(\"twitter_image\"),\n ),\n body=BodyStats(\n h1_texts=tuple(json.loads(row[\"h1_texts\"]) if row.get(\"h1_texts\") else []),\n h2_texts=tuple(json.loads(row[\"h2_texts\"]) if row.get(\"h2_texts\") else []),\n h3_count=row.get(\"h3_count\") or 0, h4_count=row.get(\"h4_count\") or 0,\n h5_count=row.get(\"h5_count\") or 0, h6_count=row.get(\"h6_count\") or 0,\n word_count=row.get(\"word_count\") or 0, text_hash=row.get(\"text_hash\"),\n ),\n performance=PerformanceMetrics(\n dom_node_count=row.get(\"dom_node_count\"), render_time_ms=row.get(\"render_time_ms\"),\n lcp_ms=row.get(\"lcp_ms\"), cls=row.get(\"cls\"), tbt_ms=row.get(\"tbt_ms\"),\n ),\n storage=StoragePaths(\n screenshot_path=row.get(\"screenshot_path\"),\n html_raw_path=row.get(\"html_raw_path\"),\n ),\n quality_flags=(),\n )\n\n\ndef _http_fields(http: HttpResponse) -> dict[str, Any]:\n return {\n \"http_status\": http.status.code if http.status else None,\n \"final_url\": http.final_url,\n \"redirect_chain\": json.dumps(list(http.redirect_chain)) if http.redirect_chain else None,\n \"response_time_ms\": http.response_time_ms,\n \"ttfb_ms\": http.ttfb_ms,\n \"content_type\": http.content_type,\n \"content_length\": http.content_length,\n \"content_encoding\": http.content_encoding,\n }\n\n\ndef _head_fields(head: HeadMeta) -> dict[str, Any]:\n return {\n \"title\": head.title, \"title_length\": head.title_length,\n \"meta_description\": head.description,\n \"meta_description_length\": head.description_length,\n \"meta_keywords\": head.keywords,\n \"meta_robots\": head.robots.raw if head.robots else None,\n \"canonical\": head.canonical, \"lang\": head.lang,\n \"charset\": head.charset, \"viewport\": head.viewport,\n }\n\n\ndef _og_fields(og: OpenGraphMeta) -> dict[str, Any]:\n return {\n \"og_title\": og.title, \"og_description\": og.description, \"og_image\": og.image,\n \"og_type\": og.og_type, \"og_url\": og.url, \"og_site_name\": og.site_name,\n \"og_locale\": og.locale,\n }\n\n\ndef _twitter_fields(tw: TwitterMeta) -> dict[str, Any]:\n return {\n \"twitter_card\": tw.card, \"twitter_title\": tw.title,\n \"twitter_description\": tw.description, \"twitter_image\": tw.image,\n }\n\n\ndef _body_fields(body: BodyStats) -> dict[str, Any]:\n return {\n \"h1_count\": body.h1_count, \"h1_texts\": json.dumps(list(body.h1_texts)),\n \"h2_count\": body.h2_count, \"h2_texts\": json.dumps(list(body.h2_texts)),\n \"h3_count\": body.h3_count, \"h4_count\": body.h4_count,\n \"h5_count\": body.h5_count, \"h6_count\": body.h6_count,\n \"word_count\": body.word_count, \"text_hash\": body.text_hash,\n }\n\n\ndef _performance_fields(perf: PerformanceMetrics) -> dict[str, Any]:\n return {\n \"dom_node_count\": perf.dom_node_count, \"render_time_ms\": perf.render_time_ms,\n \"lcp_ms\": perf.lcp_ms, \"cls\": perf.cls, \"tbt_ms\": perf.tbt_ms,\n }\n\n\ndef _storage_fields(storage: StoragePaths) -> dict[str, Any]:\n return {\n \"screenshot_path\": storage.screenshot_path,\n \"html_raw_path\": storage.html_raw_path,\n }\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/infrastructure\/persistence\/mappers\/page_mapper.py",
"content": "from __future__ import annotations\n\nimport json\nfrom typing import Any\n\nfrom src.domain.entities.page import Page\nfrom src.domain.value_objects.body_stats import BodyStats\nfrom src.domain.value_objects.enums import RenderMode\nfrom src.domain.value_objects.head_meta import HeadMeta\nfrom src.domain.value_objects.http_response import HttpResponse\nfrom src.domain.value_objects.http_status import HttpStatus\nfrom src.domain.value_objects.meta_robots import parse_meta_robots\nfrom src.domain.value_objects.open_graph_meta import OpenGraphMeta\nfrom src.domain.value_objects.performance_metrics import PerformanceMetrics\nfrom src.domain.value_objects.storage_paths import StoragePaths\nfrom src.domain.value_objects.twitter_meta import TwitterMeta\n\n\ndef page_to_row(page: Page) -> dict[str, Any]:\n row = {\n \"crawl_id\": page.crawl_id,\n \"url_id\": page.url_id,\n \"fetched_at\": page.fetched_at,\n \"render_mode\": page.render_mode.value,\n }\n row.update(_http_fields(page.http))\n row.update(_head_fields(page.head))\n row.update(_og_fields(page.og))\n row.update(_twitter_fields(page.twitter))\n row.update(_body_fields(page.body))\n row.update(_performance_fields(page.performance))\n row.update(_storage_fields(page.storage))\n row[\"quality_flags\"] = json.dumps(\n [\n {\"key\": f.key.value, \"severity\": f.severity.value,\n \"message\": f.message, \"count\": f.count}\n for f in page.quality_flags\n ],\n )\n return row\n\n\ndef row_to_page(row: dict[str, Any]) -> Page:\n robots = parse_meta_robots(row.get(\"meta_robots\"))\n status = HttpStatus(row[\"http_status\"]) if row.get(\"http_status\") else None\n return Page(\n id=row[\"id\"], crawl_id=row[\"crawl_id\"], url_id=row[\"url_id\"],\n fetched_at=row[\"fetched_at\"], render_mode=RenderMode(row[\"render_mode\"]),\n http=HttpResponse(\n status=status,\n final_url=row.get(\"final_url\"),\n redirect_chain=tuple(json.loads(row[\"redirect_chain\"]) if row.get(\"redirect_chain\") else []),\n response_time_ms=row.get(\"response_time_ms\"),\n ttfb_ms=row.get(\"ttfb_ms\"),\n content_type=row.get(\"content_type\"),\n content_length=row.get(\"content_length\"),\n content_encoding=row.get(\"content_encoding\"),\n ),\n head=HeadMeta(\n title=row.get(\"title\"), description=row.get(\"meta_description\"),\n keywords=row.get(\"meta_keywords\"), robots=robots,\n canonical=row.get(\"canonical\"), lang=row.get(\"lang\"),\n charset=row.get(\"charset\"), viewport=row.get(\"viewport\"),\n ),\n og=OpenGraphMeta(\n title=row.get(\"og_title\"), description=row.get(\"og_description\"),\n image=row.get(\"og_image\"), og_type=row.get(\"og_type\"),\n url=row.get(\"og_url\"), site_name=row.get(\"og_site_name\"),\n locale=row.get(\"og_locale\"),\n ),\n twitter=TwitterMeta(\n card=row.get(\"twitter_card\"), title=row.get(\"twitter_title\"),\n description=row.get(\"twitter_description\"), image=row.get(\"twitter_image\"),\n ),\n body=BodyStats(\n h1_texts=tuple(json.loads(row[\"h1_texts\"]) if row.get(\"h1_texts\") else []),\n h2_texts=tuple(json.loads(row[\"h2_texts\"]) if row.get(\"h2_texts\") else []),\n h3_count=row.get(\"h3_count\") or 0, h4_count=row.get(\"h4_count\") or 0,\n h5_count=row.get(\"h5_count\") or 0, h6_count=row.get(\"h6_count\") or 0,\n word_count=row.get(\"word_count\") or 0, text_hash=row.get(\"text_hash\"),\n ),\n performance=PerformanceMetrics(\n dom_node_count=row.get(\"dom_node_count\"), render_time_ms=row.get(\"render_time_ms\"),\n lcp_ms=row.get(\"lcp_ms\"), cls=row.get(\"cls\"), tbt_ms=row.get(\"tbt_ms\"),\n ),\n storage=StoragePaths(\n screenshot_path=row.get(\"screenshot_path\"),\n html_raw_path=row.get(\"html_raw_path\"),\n ),\n quality_flags=(),\n )\n\n\ndef _http_fields(http: HttpResponse) -> dict[str, Any]:\n return {\n \"http_status\": http.status.code if http.status else None,\n \"final_url\": http.final_url,\n \"redirect_chain\": json.dumps(list(http.redirect_chain)) if http.redirect_chain else None,\n \"response_time_ms\": http.response_time_ms,\n \"ttfb_ms\": http.ttfb_ms,\n \"content_type\": http.content_type,\n \"content_length\": http.content_length,\n \"content_encoding\": http.content_encoding,\n }\n\n\ndef _head_fields(head: HeadMeta) -> dict[str, Any]:\n return {\n \"title\": head.title, \"title_length\": head.title_length,\n \"meta_description\": head.description,\n \"meta_description_length\": head.description_length,\n \"meta_keywords\": head.keywords,\n \"meta_robots\": head.robots.raw if head.robots else None,\n \"canonical\": head.canonical, \"lang\": head.lang,\n \"charset\": head.charset, \"viewport\": head.viewport,\n }\n\n\ndef _og_fields(og: OpenGraphMeta) -> dict[str, Any]:\n return {\n \"og_title\": og.title, \"og_description\": og.description, \"og_image\": og.image,\n \"og_type\": og.og_type, \"og_url\": og.url, \"og_site_name\": og.site_name,\n \"og_locale\": og.locale,\n }\n\n\ndef _twitter_fields(tw: TwitterMeta) -> dict[str, Any]:\n return {\n \"twitter_card\": tw.card, \"twitter_title\": tw.title,\n \"twitter_description\": tw.description, \"twitter_image\": tw.image,\n }\n\n\ndef _body_fields(body: BodyStats) -> dict[str, Any]:\n return {\n \"h1_count\": body.h1_count, \"h1_texts\": json.dumps(list(body.h1_texts)),\n \"h2_count\": body.h2_count, \"h2_texts\": json.dumps(list(body.h2_texts)),\n \"h3_count\": body.h3_count, \"h4_count\": body.h4_count,\n \"h5_count\": body.h5_count, \"h6_count\": body.h6_count,\n \"word_count\": body.word_count, \"text_hash\": body.text_hash,\n }\n\n\ndef _performance_fields(perf: PerformanceMetrics) -> dict[str, Any]:\n return {\n \"dom_node_count\": perf.dom_node_count, \"render_time_ms\": perf.render_time_ms,\n \"lcp_ms\": perf.lcp_ms, \"cls\": perf.cls, \"tbt_ms\": perf.tbt_ms,\n }\n\n\ndef _storage_fields(storage: StoragePaths) -> dict[str, Any]:\n return {\n \"screenshot_path\": storage.screenshot_path,\n \"html_raw_path\": storage.html_raw_path,\n }\n",
"structuredPatch": [],
"originalFile": null,
"userModified": false
}
}