{
"event": "PreToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/application\/use_cases\/process_url.py",
"offset": 28,
"limit": 50
}
}
{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/application\/use_cases\/process_url.py",
"content": "\n@dataclass(frozen=True, slots=True)\nclass ProcessUrlDeps:\n fetcher: HttpFetcherPort\n html_parser: HtmlParserPort\n sd_parser: StructuredDataParserPort\n pages: PageRepositoryPort\n urls: UrlRepositoryPort\n links: LinkRepositoryPort\n images: ImageRepositoryPort\n structured: StructuredDataRepositoryPort\n hreflang: HreflangRepositoryPort\n queue: UrlQueuePort\n classifier: LinkClassifier\n normalizer: UrlNormalizer\n extractor: ExtractSeoDataUseCase\n clock: ClockPort\n\n\n@dataclass(frozen=True, slots=True)\nclass _PageContext:\n crawl_id: int\n page_id: int\n source_url_id: int\n now: datetime\n\n\nclass ProcessUrlUseCase:\n def __init__(self, deps: ProcessUrlDeps) -> None:\n self._d = deps\n\n async def execute(self, *, crawl_id: int, url: NormalizedUrl) -> Page:\n now = self._d.clock.now()\n fetch = await self._d.fetcher.fetch(url)\n html = fetch.body.decode(\"utf-8\", errors=\"replace\")\n parsed = self._d.html_parser.parse(html, base_url=url.url)\n url_entity = _upsert_url(self._d.urls, url, now=now, is_internal=True)\n extracted = self._d.extractor.execute(\n ExtractInput(\n crawl_id=crawl_id,\n url_id=_id(url_entity),\n fetched_at=now,\n render_mode=RenderMode.HTTP,\n fetch=fetch,\n parsed=parsed,\n page_url=url.url,\n ),\n )\n page = self._d.pages.save(extracted.page)\n ctx = _PageContext(crawl_id, _id(page), _id(url_entity), now)",
"numLines": 50,
"startLine": 28,
"totalLines": 177
}
}
}