{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/application\/use_cases\/start_crawl.py",
"content": "from __future__ import annotations\n\nfrom dataclasses import dataclass\nfrom typing import Mapping\n\nfrom src.domain.entities.crawl import Crawl\nfrom src.domain.entities.url import Url\nfrom src.domain.ports.clock_port import ClockPort\nfrom src.domain.ports.crawl_repository_port import CrawlRepositoryPort\nfrom src.domain.ports.url_queue_port import UrlQueuePort\nfrom src.domain.ports.url_repository_port import UrlRepositoryPort\nfrom src.domain.services.url_normalizer import UrlNormalizer\nfrom src.domain.value_objects.enums import CrawlMode, CrawlStatus, TriggerSource\n\n\n@dataclass(frozen=True, slots=True)\nclass StartCrawlInput:\n base_url: str\n mode: CrawlMode\n trigger_source: TriggerSource\n config: Mapping[str, object]\n\n\nclass StartCrawlUseCase:\n def __init__(\n self,\n *,\n crawls: CrawlRepositoryPort,\n urls: UrlRepositoryPort,\n queue: UrlQueuePort,\n normalizer: UrlNormalizer,\n clock: ClockPort,\n ) -> None:\n self._crawls = crawls\n self._urls = urls\n self._queue = queue\n self._normalizer = normalizer\n self._clock = clock\n\n def execute(self, command: StartCrawlInput) -> Crawl:\n now = self._clock.now()\n crawl = self._crawls.save(\n Crawl(\n id=None,\n base_url=command.base_url,\n mode=command.mode,\n started_at=now,\n finished_at=None,\n status=CrawlStatus.RUNNING,\n trigger_source=command.trigger_source,\n config=command.config,\n ),\n )\n self._enqueue_seed(crawl_id=crawl.id, raw_url=command.base_url, now=now)\n return crawl\n\n def _enqueue_seed(self, *, crawl_id: int | None, raw_url: str, now: object) -> None:\n assert crawl_id is not None\n normalized = self._normalizer.normalize(raw_url)\n url = self._urls.upsert(\n Url(\n id=None,\n normalized=normalized,\n is_internal=True,\n first_seen_at=now, # type: ignore[arg-type]\n last_seen_at=now, # type: ignore[arg-type]\n ),\n )\n _ = url\n self._queue.enqueue(crawl_id, normalized)\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/application\/use_cases\/start_crawl.py",
"content": "from __future__ import annotations\n\nfrom dataclasses import dataclass\nfrom typing import Mapping\n\nfrom src.domain.entities.crawl import Crawl\nfrom src.domain.entities.url import Url\nfrom src.domain.ports.clock_port import ClockPort\nfrom src.domain.ports.crawl_repository_port import CrawlRepositoryPort\nfrom src.domain.ports.url_queue_port import UrlQueuePort\nfrom src.domain.ports.url_repository_port import UrlRepositoryPort\nfrom src.domain.services.url_normalizer import UrlNormalizer\nfrom src.domain.value_objects.enums import CrawlMode, CrawlStatus, TriggerSource\n\n\n@dataclass(frozen=True, slots=True)\nclass StartCrawlInput:\n base_url: str\n mode: CrawlMode\n trigger_source: TriggerSource\n config: Mapping[str, object]\n\n\nclass StartCrawlUseCase:\n def __init__(\n self,\n *,\n crawls: CrawlRepositoryPort,\n urls: UrlRepositoryPort,\n queue: UrlQueuePort,\n normalizer: UrlNormalizer,\n clock: ClockPort,\n ) -> None:\n self._crawls = crawls\n self._urls = urls\n self._queue = queue\n self._normalizer = normalizer\n self._clock = clock\n\n def execute(self, command: StartCrawlInput) -> Crawl:\n now = self._clock.now()\n crawl = self._crawls.save(\n Crawl(\n id=None,\n base_url=command.base_url,\n mode=command.mode,\n started_at=now,\n finished_at=None,\n status=CrawlStatus.RUNNING,\n trigger_source=command.trigger_source,\n config=command.config,\n ),\n )\n self._enqueue_seed(crawl_id=crawl.id, raw_url=command.base_url, now=now)\n return crawl\n\n def _enqueue_seed(self, *, crawl_id: int | None, raw_url: str, now: object) -> None:\n assert crawl_id is not None\n normalized = self._normalizer.normalize(raw_url)\n url = self._urls.upsert(\n Url(\n id=None,\n normalized=normalized,\n is_internal=True,\n first_seen_at=now, # type: ignore[arg-type]\n last_seen_at=now, # type: ignore[arg-type]\n ),\n )\n _ = url\n self._queue.enqueue(crawl_id, normalized)\n",
"structuredPatch": [],
"originalFile": null,
"userModified": false
}
}