{
"event": "PreToolUse",
"tool_name": "Edit",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/interfaces\/cli\/commands\/crawl.py",
"old_string": "from src.application.use_cases.start_crawl import StartCrawlInput\nfrom src.domain.ports.http_fetcher_port import HttpFetcherPort",
"new_string": "from collections.abc import Awaitable, Callable\n\nfrom src.application.use_cases.start_crawl import StartCrawlInput\nfrom src.domain.ports.http_fetcher_port import HttpFetcherPort",
"replace_all": false
}
}
{
"tool_response": {
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/interfaces\/cli\/commands\/crawl.py",
"oldString": "from src.application.use_cases.start_crawl import StartCrawlInput\nfrom src.domain.ports.http_fetcher_port import HttpFetcherPort",
"newString": "from collections.abc import Awaitable, Callable\n\nfrom src.application.use_cases.start_crawl import StartCrawlInput\nfrom src.domain.ports.http_fetcher_port import HttpFetcherPort",
"originalFile": "from __future__ import annotations\n\nimport asyncio\nfrom dataclasses import dataclass\nfrom urllib.parse import urlsplit\n\nfrom src.application.use_cases.start_crawl import StartCrawlInput\nfrom src.domain.ports.http_fetcher_port import HttpFetcherPort\nfrom src.domain.value_objects.enums import CrawlMode, CrawlStatus, RenderMode, TriggerSource\nfrom src.infrastructure.config.constants import HTTP_CONCURRENCY, PLAYWRIGHT_CONCURRENCY\nfrom src.infrastructure.fetchers.httpx_fetcher import HttpxFetcher\nfrom src.infrastructure.fetchers.playwright_fetcher import PlaywrightFetcher\nfrom src.infrastructure.fetchers.playwright_http_adapter import PlaywrightHttpAdapter\nfrom src.interfaces.cli.wiring import Container\nfrom src.interfaces.cli.worker import QueueWorker, WorkerConfig\n\n\n@dataclass(frozen=True, slots=True)\nclass CrawlCliInput:\n base_url: str\n mode: CrawlMode\n trigger: TriggerSource\n concurrency: int = HTTP_CONCURRENCY\n max_urls: int | None = None\n\n\n@dataclass(frozen=True, slots=True)\nclass _FetcherChoice:\n fetcher: HttpFetcherPort\n render_mode: RenderMode\n concurrency: int\n close: object # async callable\n\n\nasync def _build_fetcher(command: CrawlCliInput) -> _FetcherChoice:\n if command.mode is CrawlMode.FAST:\n http = HttpxFetcher.create()\n return _FetcherChoice(http, RenderMode.HTTP, command.concurrency, http.close)\n browser = await PlaywrightFetcher.create()\n adapter = PlaywrightHttpAdapter(browser)\n concurrency = min(command.concurrency, PLAYWRIGHT_CONCURRENCY)\n return _FetcherChoice(adapter, RenderMode.PLAYWRIGHT, concurrency, browser.close)\n\n\nasync def run_crawl(container: Container, command: CrawlCliInput) -> int:\n crawl = container.start_crawl().execute(\n StartCrawlInput(\n base_url=command.base_url, mode=command.mode,\n trigger_source=command.trigger, config={},\n ),\n )\n if crawl.id is None:\n raise RuntimeError(\"start_crawl did not assign id\")\n choice = await _build_fetcher(command)\n registered_domain = urlsplit(command.base_url).hostname or \"\"\n try:\n process_uc = container.process_url(\n choice.fetcher,\n registered_domain=registered_domain,\n render_mode=choice.render_mode,\n )\n worker = QueueWorker(\n queue=container.queue, process_url=process_uc,\n errors=container.repos.errors, clock=container.clock,\n )\n processed = await worker.run(WorkerConfig(\n crawl_id=crawl.id, concurrency=choice.concurrency, max_urls=command.max_urls,\n ))\n finally:\n await choice.close() # type: ignore[misc]\n container.finalize().execute(\n crawl_id=crawl.id, status=CrawlStatus.COMPLETED, total_urls=processed,\n )\n return crawl.id\n\n\ndef sync_run_crawl(container: Container, command: CrawlCliInput) -> int:\n return asyncio.run(run_crawl(container, command))\n",
"structuredPatch": [
{
"oldStart": 4,
"oldLines": 6,
"newStart": 4,
"newLines": 8,
"lines": [
" from dataclasses import dataclass",
" from urllib.parse import urlsplit",
" ",
"+from collections.abc import Awaitable, Callable",
"+",
" from src.application.use_cases.start_crawl import StartCrawlInput",
" from src.domain.ports.http_fetcher_port import HttpFetcherPort",
" from src.domain.value_objects.enums import CrawlMode, CrawlStatus, RenderMode, TriggerSource"
]
}
],
"userModified": false,
"replaceAll": false
}
}