{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/tests\/integration\/infrastructure\/test_html_parser_bs4.py",
"content": "from __future__ import annotations\n\nfrom src.infrastructure.parsers.html_parser_bs4 import Bs4HtmlParser\n\nSAMPLE_HTML = \"\"\"<!doctype html>\n<html lang=\"de\">\n<head>\n <meta charset=\"utf-8\">\n <title>Campus am See<\/title>\n <meta name=\"description\" content=\"SEO-Testseite\">\n <meta name=\"robots\" content=\"index, follow\">\n <link rel=\"canonical\" href=\"https:\/\/campus-am-see.de\/\">\n <link rel=\"alternate\" hreflang=\"en\" href=\"https:\/\/campus-am-see.de\/en\">\n <meta property=\"og:title\" content=\"OG Title\">\n <meta name=\"twitter:card\" content=\"summary\">\n<\/head>\n<body>\n <h1>Willkommen<\/h1>\n <h2>Zweiter Abschnitt<\/h2>\n <a href=\"\/team\">Team<\/a>\n <a href=\"https:\/\/www.example.com\/\">Extern<\/a>\n <img src=\"\/logo.png\" alt=\"Logo\" width=\"100\" height=\"50\">\n<\/body>\n<\/html>\"\"\"\n\n\ndef test_parser_extracts_all_core_fields() -> None:\n parsed = Bs4HtmlParser().parse(SAMPLE_HTML, base_url=\"https:\/\/campus-am-see.de\/\")\n assert parsed.head.title == \"Campus am See\"\n assert parsed.head.description == \"SEO-Testseite\"\n assert parsed.head.canonical == \"https:\/\/campus-am-see.de\/\"\n assert parsed.head.lang == \"de\"\n assert parsed.og.title == \"OG Title\"\n assert parsed.twitter.card == \"summary\"\n assert parsed.body.h1_texts == (\"Willkommen\",)\n assert parsed.body.h2_texts == (\"Zweiter Abschnitt\",)\n assert \"https:\/\/campus-am-see.de\/team\" in parsed.internal_links\n assert any(\"example.com\" in link for link in parsed.external_links)\n assert parsed.image_specs[0][0] == \"https:\/\/campus-am-see.de\/logo.png\"\n assert parsed.image_specs[0][1] == \"Logo\"\n assert parsed.hreflang == tuple(\n type(parsed.hreflang[0])(lang=\"en\", href=\"https:\/\/campus-am-see.de\/en\")\n for _ in [0]\n )\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/tests\/integration\/infrastructure\/test_html_parser_bs4.py",
"content": "from __future__ import annotations\n\nfrom src.infrastructure.parsers.html_parser_bs4 import Bs4HtmlParser\n\nSAMPLE_HTML = \"\"\"<!doctype html>\n<html lang=\"de\">\n<head>\n <meta charset=\"utf-8\">\n <title>Campus am See<\/title>\n <meta name=\"description\" content=\"SEO-Testseite\">\n <meta name=\"robots\" content=\"index, follow\">\n <link rel=\"canonical\" href=\"https:\/\/campus-am-see.de\/\">\n <link rel=\"alternate\" hreflang=\"en\" href=\"https:\/\/campus-am-see.de\/en\">\n <meta property=\"og:title\" content=\"OG Title\">\n <meta name=\"twitter:card\" content=\"summary\">\n<\/head>\n<body>\n <h1>Willkommen<\/h1>\n <h2>Zweiter Abschnitt<\/h2>\n <a href=\"\/team\">Team<\/a>\n <a href=\"https:\/\/www.example.com\/\">Extern<\/a>\n <img src=\"\/logo.png\" alt=\"Logo\" width=\"100\" height=\"50\">\n<\/body>\n<\/html>\"\"\"\n\n\ndef test_parser_extracts_all_core_fields() -> None:\n parsed = Bs4HtmlParser().parse(SAMPLE_HTML, base_url=\"https:\/\/campus-am-see.de\/\")\n assert parsed.head.title == \"Campus am See\"\n assert parsed.head.description == \"SEO-Testseite\"\n assert parsed.head.canonical == \"https:\/\/campus-am-see.de\/\"\n assert parsed.head.lang == \"de\"\n assert parsed.og.title == \"OG Title\"\n assert parsed.twitter.card == \"summary\"\n assert parsed.body.h1_texts == (\"Willkommen\",)\n assert parsed.body.h2_texts == (\"Zweiter Abschnitt\",)\n assert \"https:\/\/campus-am-see.de\/team\" in parsed.internal_links\n assert any(\"example.com\" in link for link in parsed.external_links)\n assert parsed.image_specs[0][0] == \"https:\/\/campus-am-see.de\/logo.png\"\n assert parsed.image_specs[0][1] == \"Logo\"\n assert parsed.hreflang == tuple(\n type(parsed.hreflang[0])(lang=\"en\", href=\"https:\/\/campus-am-see.de\/en\")\n for _ in [0]\n )\n",
"structuredPatch": [],
"originalFile": null,
"userModified": false
}
}