{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/tests\/e2e\/test_crawl_fixture.py",
"content": "from __future__ import annotations\n\nimport asyncio\nimport socket\nfrom collections.abc import AsyncGenerator\n\nimport pytest\nimport pytest_asyncio\nfrom aiohttp import web\n\nfrom src.domain.value_objects.enums import CrawlMode, TriggerSource\nfrom src.infrastructure.config.db_config import load_db_settings\nfrom src.infrastructure.persistence.connection import MariaDbConnectionFactory\nfrom src.interfaces.cli.commands.crawl import CrawlCliInput, run_crawl\nfrom src.interfaces.cli.wiring import Container\nfrom tests.fixtures.fixture_server import build_app\n\n_TRUNCATE_ORDER = (\n \"url_queue\", \"external_checks\", \"errors\", \"js_errors\", \"hreflang\",\n \"structured_data\", \"resources\", \"images\", \"links\", \"page_headers\",\n \"pages\", \"urls\", \"crawls\",\n)\n\n\ndef _free_port() -> int:\n with socket.socket() as s:\n s.bind((\"127.0.0.1\", 0))\n return int(s.getsockname()[1])\n\n\n@pytest_asyncio.fixture\nasync def fixture_server() -> AsyncGenerator[str, None]:\n port = _free_port()\n runner = web.AppRunner(build_app())\n await runner.setup()\n site = web.TCPSite(runner, \"127.0.0.1\", port)\n await site.start()\n try:\n yield f\"http:\/\/127.0.0.1:{port}\/\"\n finally:\n await runner.cleanup()\n\n\n@pytest.fixture(autouse=True)\ndef _clean_db() -> None:\n factory = MariaDbConnectionFactory(load_db_settings(\"crawler\"))\n with factory.session() as conn:\n cur = conn.cursor()\n cur.execute(\"SET FOREIGN_KEY_CHECKS=0\")\n for table in _TRUNCATE_ORDER:\n cur.execute(f\"TRUNCATE TABLE {table}\")\n cur.execute(\"SET FOREIGN_KEY_CHECKS=1\")\n\n\n@pytest.mark.asyncio\nasync def test_end_to_end_crawl_against_fixture(fixture_server: str) -> None:\n factory = MariaDbConnectionFactory(load_db_settings(\"crawler\"))\n container = Container(factory)\n command = CrawlCliInput(\n base_url=fixture_server, mode=CrawlMode.FAST, trigger=TriggerSource.CLI,\n concurrency=4, max_urls=20,\n )\n\n crawl_id = await asyncio.wait_for(run_crawl(container, command), timeout=30)\n\n with factory.session() as conn:\n cur = conn.cursor(dictionary=True)\n cur.execute(\"SELECT COUNT(*) AS c FROM pages WHERE crawl_id=%s\", (crawl_id,))\n page_count = int(cur.fetchone()[\"c\"])\n cur.execute(\n \"SELECT quality_flags FROM pages WHERE crawl_id=%s\", (crawl_id,),\n )\n flags_blobs = [r[\"quality_flags\"] for r in cur.fetchall()]\n assert page_count >= 5\n assert any(\"h1_multiple\" in b for b in flags_blobs)\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/tests\/e2e\/test_crawl_fixture.py",
"content": "from __future__ import annotations\n\nimport asyncio\nimport socket\nfrom collections.abc import AsyncGenerator\n\nimport pytest\nimport pytest_asyncio\nfrom aiohttp import web\n\nfrom src.domain.value_objects.enums import CrawlMode, TriggerSource\nfrom src.infrastructure.config.db_config import load_db_settings\nfrom src.infrastructure.persistence.connection import MariaDbConnectionFactory\nfrom src.interfaces.cli.commands.crawl import CrawlCliInput, run_crawl\nfrom src.interfaces.cli.wiring import Container\nfrom tests.fixtures.fixture_server import build_app\n\n_TRUNCATE_ORDER = (\n \"url_queue\", \"external_checks\", \"errors\", \"js_errors\", \"hreflang\",\n \"structured_data\", \"resources\", \"images\", \"links\", \"page_headers\",\n \"pages\", \"urls\", \"crawls\",\n)\n\n\ndef _free_port() -> int:\n with socket.socket() as s:\n s.bind((\"127.0.0.1\", 0))\n return int(s.getsockname()[1])\n\n\n@pytest_asyncio.fixture\nasync def fixture_server() -> AsyncGenerator[str, None]:\n port = _free_port()\n runner = web.AppRunner(build_app())\n await runner.setup()\n site = web.TCPSite(runner, \"127.0.0.1\", port)\n await site.start()\n try:\n yield f\"http:\/\/127.0.0.1:{port}\/\"\n finally:\n await runner.cleanup()\n\n\n@pytest.fixture(autouse=True)\ndef _clean_db() -> None:\n factory = MariaDbConnectionFactory(load_db_settings(\"crawler\"))\n with factory.session() as conn:\n cur = conn.cursor()\n cur.execute(\"SET FOREIGN_KEY_CHECKS=0\")\n for table in _TRUNCATE_ORDER:\n cur.execute(f\"TRUNCATE TABLE {table}\")\n cur.execute(\"SET FOREIGN_KEY_CHECKS=1\")\n\n\n@pytest.mark.asyncio\nasync def test_end_to_end_crawl_against_fixture(fixture_server: str) -> None:\n factory = MariaDbConnectionFactory(load_db_settings(\"crawler\"))\n container = Container(factory)\n command = CrawlCliInput(\n base_url=fixture_server, mode=CrawlMode.FAST, trigger=TriggerSource.CLI,\n concurrency=4, max_urls=20,\n )\n\n crawl_id = await asyncio.wait_for(run_crawl(container, command), timeout=30)\n\n with factory.session() as conn:\n cur = conn.cursor(dictionary=True)\n cur.execute(\"SELECT COUNT(*) AS c FROM pages WHERE crawl_id=%s\", (crawl_id,))\n page_count = int(cur.fetchone()[\"c\"])\n cur.execute(\n \"SELECT quality_flags FROM pages WHERE crawl_id=%s\", (crawl_id,),\n )\n flags_blobs = [r[\"quality_flags\"] for r in cur.fetchall()]\n assert page_count >= 5\n assert any(\"h1_multiple\" in b for b in flags_blobs)\n",
"structuredPatch": [],
"originalFile": null,
"userModified": false
}
}