{
"tool_response": {
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/infrastructure\/persistence\/mappers\/page_mapper.py",
"oldString": " quality_flags=(),\n )",
"newString": " quality_flags=_load_quality_flags(row.get(\"quality_flags\")),\n )\n\n\ndef _load_quality_flags(raw: object) -> tuple[QualityFlag, ...]:\n if not raw:\n return ()\n decoded = json.loads(str(raw))\n return tuple(\n QualityFlag(\n key=QualityFlagKey(entry[\"key\"]),\n severity=QualitySeverity(entry[\"severity\"]),\n message=str(entry.get(\"message\", \"\")),\n count=int(entry.get(\"count\", 1)),\n )\n for entry in decoded\n )",
"originalFile": "from __future__ import annotations\n\nimport json\nfrom typing import Any\n\nfrom src.domain.entities.page import Page\nfrom src.domain.value_objects.body_stats import BodyStats\nfrom src.domain.value_objects.enums import RenderMode\nfrom src.domain.value_objects.head_meta import HeadMeta\nfrom src.domain.value_objects.http_response import HttpResponse\nfrom src.domain.value_objects.http_status import HttpStatus\nfrom src.domain.value_objects.meta_robots import parse_meta_robots\nfrom src.domain.value_objects.enums import QualitySeverity\nfrom src.domain.value_objects.open_graph_meta import OpenGraphMeta\nfrom src.domain.value_objects.performance_metrics import PerformanceMetrics\nfrom src.domain.value_objects.quality_flag import QualityFlag, QualityFlagKey\nfrom src.domain.value_objects.storage_paths import StoragePaths\nfrom src.domain.value_objects.twitter_meta import TwitterMeta\n\n\ndef page_to_row(page: Page) -> dict[str, Any]:\n row = {\n \"crawl_id\": page.crawl_id,\n \"url_id\": page.url_id,\n \"fetched_at\": page.fetched_at,\n \"render_mode\": page.render_mode.value,\n }\n row.update(_http_fields(page.http))\n row.update(_head_fields(page.head))\n row.update(_og_fields(page.og))\n row.update(_twitter_fields(page.twitter))\n row.update(_body_fields(page.body))\n row.update(_performance_fields(page.performance))\n row.update(_storage_fields(page.storage))\n row[\"quality_flags\"] = json.dumps(\n [\n {\n \"key\": f.key.value,\n \"severity\": f.severity.value,\n \"message\": f.message,\n \"count\": f.count,\n }\n for f in page.quality_flags\n ],\n )\n return row\n\n\ndef row_to_page(row: dict[str, Any]) -> Page:\n robots = parse_meta_robots(row.get(\"meta_robots\"))\n status = HttpStatus(row[\"http_status\"]) if row.get(\"http_status\") else None\n return Page(\n id=row[\"id\"],\n crawl_id=row[\"crawl_id\"],\n url_id=row[\"url_id\"],\n fetched_at=row[\"fetched_at\"],\n render_mode=RenderMode(row[\"render_mode\"]),\n http=HttpResponse(\n status=status,\n final_url=row.get(\"final_url\"),\n redirect_chain=tuple(\n json.loads(row[\"redirect_chain\"]) if row.get(\"redirect_chain\") else []\n ),\n response_time_ms=row.get(\"response_time_ms\"),\n ttfb_ms=row.get(\"ttfb_ms\"),\n content_type=row.get(\"content_type\"),\n content_length=row.get(\"content_length\"),\n content_encoding=row.get(\"content_encoding\"),\n ),\n head=HeadMeta(\n title=row.get(\"title\"),\n description=row.get(\"meta_description\"),\n keywords=row.get(\"meta_keywords\"),\n robots=robots,\n canonical=row.get(\"canonical\"),\n lang=row.get(\"lang\"),\n charset=row.get(\"charset\"),\n viewport=row.get(\"viewport\"),\n ),\n og=OpenGraphMeta(\n title=row.get(\"og_title\"),\n description=row.get(\"og_description\"),\n image=row.get(\"og_image\"),\n og_type=row.get(\"og_type\"),\n url=row.get(\"og_url\"),\n site_name=row.get(\"og_site_name\"),\n locale=row.get(\"og_locale\"),\n ),\n twitter=TwitterMeta(\n card=row.get(\"twitter_card\"),\n title=row.get(\"twitter_title\"),\n description=row.get(\"twitter_description\"),\n image=row.get(\"twitter_image\"),\n ),\n body=BodyStats(\n h1_texts=tuple(json.loads(row[\"h1_texts\"]) if row.get(\"h1_texts\") else []),\n h2_texts=tuple(json.loads(row[\"h2_texts\"]) if row.get(\"h2_texts\") else []),\n h3_count=row.get(\"h3_count\") or 0,\n h4_count=row.get(\"h4_count\") or 0,\n h5_count=row.get(\"h5_count\") or 0,\n h6_count=row.get(\"h6_count\") or 0,\n word_count=row.get(\"word_count\") or 0,\n text_hash=row.get(\"text_hash\"),\n ),\n performance=PerformanceMetrics(\n dom_node_count=row.get(\"dom_node_count\"),\n render_time_ms=row.get(\"render_time_ms\"),\n lcp_ms=row.get(\"lcp_ms\"),\n cls=row.get(\"cls\"),\n tbt_ms=row.get(\"tbt_ms\"),\n ),\n storage=StoragePaths(\n screenshot_path=row.get(\"screenshot_path\"),\n html_raw_path=row.get(\"html_raw_path\"),\n ),\n quality_flags=(),\n )\n\n\ndef _http_fields(http: HttpResponse) -> dict[str, Any]:\n return {\n \"http_status\": http.status.code if http.status else None,\n \"final_url\": http.final_url,\n \"redirect_chain\": json.dumps(list(http.redirect_chain)) if http.redirect_chain else None,\n \"response_time_ms\": http.response_time_ms,\n \"ttfb_ms\": http.ttfb_ms,\n \"content_type\": http.content_type,\n \"content_length\": http.content_length,\n \"content_encoding\": http.content_encoding,\n }\n\n\ndef _head_fields(head: HeadMeta) -> dict[str, Any]:\n return {\n \"title\": head.title,\n \"title_length\": head.title_length,\n \"meta_description\": head.description,\n \"meta_description_length\": head.description_length,\n \"meta_keywords\": head.keywords,\n \"meta_robots\": head.robots.raw if head.robots else None,\n \"canonical\": head.canonical,\n \"lang\": head.lang,\n \"charset\": head.charset,\n \"viewport\": head.viewport,\n }\n\n\ndef _og_fields(og: OpenGraphMeta) -> dict[str, Any]:\n return {\n \"og_title\": og.title,\n \"og_description\": og.description,\n \"og_image\": og.image,\n \"og_type\": og.og_type,\n \"og_url\": og.url,\n \"og_site_name\": og.site_name,\n \"og_locale\": og.locale,\n }\n\n\ndef _twitter_fields(tw: TwitterMeta) -> dict[str, Any]:\n return {\n \"twitter_card\": tw.card,\n \"twitter_title\": tw.title,\n \"twitter_description\": tw.description,\n \"twitter_image\": tw.image,\n }\n\n\ndef _body_fields(body: BodyStats) -> dict[str, Any]:\n return {\n \"h1_count\": body.h1_count,\n \"h1_texts\": json.dumps(list(body.h1_texts)),\n \"h2_count\": body.h2_count,\n \"h2_texts\": json.dumps(list(body.h2_texts)),\n \"h3_count\": body.h3_count,\n \"h4_count\": body.h4_count,\n \"h5_count\": body.h5_count,\n \"h6_count\": body.h6_count,\n \"word_count\": body.word_count,\n \"text_hash\": body.text_hash,\n }\n\n\ndef _performance_fields(perf: PerformanceMetrics) -> dict[str, Any]:\n return {\n \"dom_node_count\": perf.dom_node_count,\n \"render_time_ms\": perf.render_time_ms,\n \"lcp_ms\": perf.lcp_ms,\n \"cls\": perf.cls,\n \"tbt_ms\": perf.tbt_ms,\n }\n\n\ndef _storage_fields(storage: StoragePaths) -> dict[str, Any]:\n return {\n \"screenshot_path\": storage.screenshot_path,\n \"html_raw_path\": storage.html_raw_path,\n }\n",
"structuredPatch": [
{
"oldStart": 113,
"oldLines": 10,
"newStart": 113,
"newLines": 25,
"lines": [
" screenshot_path=row.get(\"screenshot_path\"),",
" html_raw_path=row.get(\"html_raw_path\"),",
" ),",
"- quality_flags=(),",
"+ quality_flags=_load_quality_flags(row.get(\"quality_flags\")),",
" )",
" ",
" ",
"+def _load_quality_flags(raw: object) -> tuple[QualityFlag, ...]:",
"+ if not raw:",
"+ return ()",
"+ decoded = json.loads(str(raw))",
"+ return tuple(",
"+ QualityFlag(",
"+ key=QualityFlagKey(entry[\"key\"]),",
"+ severity=QualitySeverity(entry[\"severity\"]),",
"+ message=str(entry.get(\"message\", \"\")),",
"+ count=int(entry.get(\"count\", 1)),",
"+ )",
"+ for entry in decoded",
"+ )",
"+",
"+",
" def _http_fields(http: HttpResponse) -> dict[str, Any]:",
" return {",
" \"http_status\": http.status.code if http.status else None,"
]
}
],
"userModified": false,
"replaceAll": false
}
}