{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/application\/use_cases\/extract_seo_data.py",
"content": "from __future__ import annotations\n\nfrom dataclasses import dataclass\nfrom datetime import datetime\n\nfrom src.domain.entities.page import Page\nfrom src.domain.ports.html_parser_port import ParsedDocument\nfrom src.domain.ports.http_fetcher_port import FetchResult\nfrom src.domain.services.quality_flag_calculator import QualityFlagCalculator\nfrom src.domain.value_objects.enums import RenderMode\nfrom src.domain.value_objects.http_response import HttpResponse\nfrom src.domain.value_objects.http_status import HttpStatus\nfrom src.domain.value_objects.storage_paths import StoragePaths\n\n\n@dataclass(frozen=True, slots=True)\nclass ExtractedPage:\n page: Page\n parsed: ParsedDocument\n\n\nclass ExtractSeoDataUseCase:\n def __init__(self, *, calculator: QualityFlagCalculator) -> None:\n self._calculator = calculator\n\n def execute(\n self,\n *,\n crawl_id: int,\n url_id: int,\n fetched_at: datetime,\n render_mode: RenderMode,\n fetch: FetchResult,\n parsed: ParsedDocument,\n page_url: str,\n html_raw_path: str | None = None,\n screenshot_path: str | None = None,\n ) -> ExtractedPage:\n http = _build_http_response(fetch)\n flags = self._calculator.calculate(\n head=parsed.head,\n h1_count=parsed.body.h1_count,\n http_status=http.status,\n page_url=page_url,\n )\n page = Page(\n id=None,\n crawl_id=crawl_id,\n url_id=url_id,\n fetched_at=fetched_at,\n render_mode=render_mode,\n http=http,\n head=parsed.head,\n og=parsed.og,\n twitter=parsed.twitter,\n body=parsed.body,\n performance=_empty_performance(),\n storage=StoragePaths(screenshot_path=screenshot_path, html_raw_path=html_raw_path),\n quality_flags=flags,\n )\n return ExtractedPage(page=page, parsed=parsed)\n\n\ndef _build_http_response(fetch: FetchResult) -> HttpResponse:\n status = HttpStatus(fetch.status_code) if fetch.status_code else None\n return HttpResponse(\n status=status,\n final_url=fetch.final_url,\n redirect_chain=fetch.redirect_chain,\n response_time_ms=fetch.response_time_ms,\n ttfb_ms=fetch.ttfb_ms,\n content_type=fetch.headers.get(\"content-type\"),\n content_length=_safe_int(fetch.headers.get(\"content-length\")),\n content_encoding=fetch.headers.get(\"content-encoding\"),\n )\n\n\ndef _safe_int(value: str | None) -> int | None:\n if value is None:\n return None\n try:\n return int(value)\n except ValueError:\n return None\n\n\ndef _empty_performance() -> object:\n from src.domain.value_objects.performance_metrics import PerformanceMetrics\n return PerformanceMetrics(None, None, None, None, None)\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/application\/use_cases\/extract_seo_data.py",
"content": "from __future__ import annotations\n\nfrom dataclasses import dataclass\nfrom datetime import datetime\n\nfrom src.domain.entities.page import Page\nfrom src.domain.ports.html_parser_port import ParsedDocument\nfrom src.domain.ports.http_fetcher_port import FetchResult\nfrom src.domain.services.quality_flag_calculator import QualityFlagCalculator\nfrom src.domain.value_objects.enums import RenderMode\nfrom src.domain.value_objects.http_response import HttpResponse\nfrom src.domain.value_objects.http_status import HttpStatus\nfrom src.domain.value_objects.storage_paths import StoragePaths\n\n\n@dataclass(frozen=True, slots=True)\nclass ExtractedPage:\n page: Page\n parsed: ParsedDocument\n\n\nclass ExtractSeoDataUseCase:\n def __init__(self, *, calculator: QualityFlagCalculator) -> None:\n self._calculator = calculator\n\n def execute(\n self,\n *,\n crawl_id: int,\n url_id: int,\n fetched_at: datetime,\n render_mode: RenderMode,\n fetch: FetchResult,\n parsed: ParsedDocument,\n page_url: str,\n html_raw_path: str | None = None,\n screenshot_path: str | None = None,\n ) -> ExtractedPage:\n http = _build_http_response(fetch)\n flags = self._calculator.calculate(\n head=parsed.head,\n h1_count=parsed.body.h1_count,\n http_status=http.status,\n page_url=page_url,\n )\n page = Page(\n id=None,\n crawl_id=crawl_id,\n url_id=url_id,\n fetched_at=fetched_at,\n render_mode=render_mode,\n http=http,\n head=parsed.head,\n og=parsed.og,\n twitter=parsed.twitter,\n body=parsed.body,\n performance=_empty_performance(),\n storage=StoragePaths(screenshot_path=screenshot_path, html_raw_path=html_raw_path),\n quality_flags=flags,\n )\n return ExtractedPage(page=page, parsed=parsed)\n\n\ndef _build_http_response(fetch: FetchResult) -> HttpResponse:\n status = HttpStatus(fetch.status_code) if fetch.status_code else None\n return HttpResponse(\n status=status,\n final_url=fetch.final_url,\n redirect_chain=fetch.redirect_chain,\n response_time_ms=fetch.response_time_ms,\n ttfb_ms=fetch.ttfb_ms,\n content_type=fetch.headers.get(\"content-type\"),\n content_length=_safe_int(fetch.headers.get(\"content-length\")),\n content_encoding=fetch.headers.get(\"content-encoding\"),\n )\n\n\ndef _safe_int(value: str | None) -> int | None:\n if value is None:\n return None\n try:\n return int(value)\n except ValueError:\n return None\n\n\ndef _empty_performance() -> object:\n from src.domain.value_objects.performance_metrics import PerformanceMetrics\n return PerformanceMetrics(None, None, None, None, None)\n",
"structuredPatch": [],
"originalFile": null,
"userModified": false
}
}