Protokoll #33888

ID33888
Zeitstempel2026-04-21 03:42:20.765835
Clientroot
IP62.246.34.195
Modellclaude-sonnet-4-20250514
Statuscompleted
Tokens1,079 (Input: 534, Output: 545)
Dauer67 ms
Request-Zeit2026-04-21 03:42:20.765835
Response-Zeit2026-04-21 03:42:20.832907

Request

{
    "event": "PreToolUse",
    "tool_name": "Write",
    "tool_input": {
        "file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/infrastructure\/fetchers\/sitemap_loader.py",
        "content": "\"\"\"Discovers URLs from sitemap.xml \/ sitemap_index.xml via httpx.\"\"\"\nfrom __future__ import annotations\n\nimport re\nfrom typing import Self\nfrom urllib.parse import urljoin\n\nimport httpx\n\nfrom src.infrastructure.config.constants import HTTP_TIMEOUT_SEC, USER_AGENT\n\n_LOC_RE = re.compile(r\"<loc>\\s*([^<\\s]+)\\s*<\/loc>\", re.IGNORECASE)\n_CANDIDATE_PATHS = (\"sitemap_index.xml\", \"wp-sitemap.xml\", \"sitemap.xml\")\n\n\nclass SitemapLoader:\n    def __init__(self, client: httpx.AsyncClient) -> None:\n        self._client = client\n\n    async def discover(self, base_url: str) -> tuple[str, ...]:\n        visited: set[str] = set()\n        page_urls: set[str] = set()\n        queue: list[str] = [urljoin(base_url, p) for p in _CANDIDATE_PATHS]\n        while queue:\n            sm_url = queue.pop()\n            if sm_url in visited:\n                continue\n            visited.add(sm_url)\n            locs = await self._fetch_locs(sm_url)\n            for loc in locs:\n                if _looks_like_sitemap(loc):\n                    queue.append(loc)\n                else:\n                    page_urls.add(loc)\n        return tuple(sorted(page_urls))\n\n    async def _fetch_locs(self, sitemap_url: str) -> list[str]:\n        try:\n            response = await self._client.get(sitemap_url)\n        except httpx.HTTPError:\n            return []\n        if response.status_code != httpx.codes.OK:\n            return []\n        return _LOC_RE.findall(response.text)\n\n    @classmethod\n    def create(cls) -> Self:\n        return cls(httpx.AsyncClient(\n            timeout=HTTP_TIMEOUT_SEC,\n            follow_redirects=True,\n            headers={\"User-Agent\": USER_AGENT},\n        ))\n\n    async def close(self) -> None:\n        await self._client.aclose()\n\n\ndef _looks_like_sitemap(url: str) -> bool:\n    lowered = url.lower()\n    return lowered.endswith(\".xml\") or \"\/sitemap\" in lowered\n"
    }
}

Response

{
    "tool_response": {
        "type": "create",
        "filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/infrastructure\/fetchers\/sitemap_loader.py",
        "content": "\"\"\"Discovers URLs from sitemap.xml \/ sitemap_index.xml via httpx.\"\"\"\nfrom __future__ import annotations\n\nimport re\nfrom typing import Self\nfrom urllib.parse import urljoin\n\nimport httpx\n\nfrom src.infrastructure.config.constants import HTTP_TIMEOUT_SEC, USER_AGENT\n\n_LOC_RE = re.compile(r\"<loc>\\s*([^<\\s]+)\\s*<\/loc>\", re.IGNORECASE)\n_CANDIDATE_PATHS = (\"sitemap_index.xml\", \"wp-sitemap.xml\", \"sitemap.xml\")\n\n\nclass SitemapLoader:\n    def __init__(self, client: httpx.AsyncClient) -> None:\n        self._client = client\n\n    async def discover(self, base_url: str) -> tuple[str, ...]:\n        visited: set[str] = set()\n        page_urls: set[str] = set()\n        queue: list[str] = [urljoin(base_url, p) for p in _CANDIDATE_PATHS]\n        while queue:\n            sm_url = queue.pop()\n            if sm_url in visited:\n                continue\n            visited.add(sm_url)\n            locs = await self._fetch_locs(sm_url)\n            for loc in locs:\n                if _looks_like_sitemap(loc):\n                    queue.append(loc)\n                else:\n                    page_urls.add(loc)\n        return tuple(sorted(page_urls))\n\n    async def _fetch_locs(self, sitemap_url: str) -> list[str]:\n        try:\n            response = await self._client.get(sitemap_url)\n        except httpx.HTTPError:\n            return []\n        if response.status_code != httpx.codes.OK:\n            return []\n        return _LOC_RE.findall(response.text)\n\n    @classmethod\n    def create(cls) -> Self:\n        return cls(httpx.AsyncClient(\n            timeout=HTTP_TIMEOUT_SEC,\n            follow_redirects=True,\n            headers={\"User-Agent\": USER_AGENT},\n        ))\n\n    async def close(self) -> None:\n        await self._client.aclose()\n\n\ndef _looks_like_sitemap(url: str) -> bool:\n    lowered = url.lower()\n    return lowered.endswith(\".xml\") or \"\/sitemap\" in lowered\n",
        "structuredPatch": [],
        "originalFile": null,
        "userModified": false
    }
}
← Vorheriger Zur Liste Nächster →