{
"event": "PreToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/tests\/e2e\/test_crawl_fixture.py",
"offset": 55
}
}
{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/tests\/e2e\/test_crawl_fixture.py",
"content": "def _clean_db() -> None:\n factory = MariaDbConnectionFactory(load_db_settings(\"crawler\"))\n with factory.session() as conn:\n cur = conn.cursor()\n cur.execute(\"SET FOREIGN_KEY_CHECKS=0\")\n for table in _TRUNCATE_ORDER:\n cur.execute(f\"TRUNCATE TABLE {table}\")\n cur.execute(\"SET FOREIGN_KEY_CHECKS=1\")\n\n\n@pytest.mark.asyncio\nasync def test_end_to_end_crawl_against_fixture(fixture_server: str) -> None:\n factory = MariaDbConnectionFactory(load_db_settings(\"crawler\"))\n container = Container(factory)\n command = CrawlCliInput(\n base_url=fixture_server,\n mode=CrawlMode.FAST,\n trigger=TriggerSource.CLI,\n concurrency=4,\n max_urls=20,\n )\n\n crawl_id = await asyncio.wait_for(run_crawl(container, command), timeout=30)\n\n with factory.session() as conn:\n cur = conn.cursor(dictionary=True)\n cur.execute(\"SELECT COUNT(*) AS c FROM pages WHERE crawl_id=%s\", (crawl_id,))\n page_count = int(cur.fetchone()[\"c\"])\n cur.execute(\n \"SELECT quality_flags FROM pages WHERE crawl_id=%s\",\n (crawl_id,),\n )\n flags_blobs = [r[\"quality_flags\"] for r in cur.fetchall()]\n assert page_count >= 5\n assert any(\"h1_multiple\" in b for b in flags_blobs)\n",
"numLines": 36,
"startLine": 55,
"totalLines": 90
}
}
}