{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/infrastructure\/persistence\/link_repository.py",
"content": "from __future__ import annotations\n\nimport json\nfrom collections.abc import Iterable\nfrom typing import Any\n\nfrom src.domain.entities.link import Link\nfrom src.domain.ports.link_repository_port import LinkRepositoryPort\nfrom src.infrastructure.persistence.connection import MariaDbConnectionFactory\n\n_COLUMNS = (\n \"page_id, source_url_id, target_url, target_url_id, anchor_text, rel,\"\n \" is_internal, is_external, position_hint,\"\n \" external_http_status, external_final_url, external_checked_at, external_error\"\n)\n_PLACEHOLDERS = \", \".join(f\"%({name.strip()})s\" for name in _COLUMNS.split(\",\"))\n_INSERT = f\"INSERT INTO links ({_COLUMNS}) VALUES ({_PLACEHOLDERS})\"\n_SELECT_BY_PAGE = \"SELECT * FROM links WHERE page_id=%s ORDER BY id\"\n_UPDATE_EXTERNAL = (\n \"UPDATE links SET external_http_status=%s, external_final_url=%s,\"\n \" external_checked_at=%s, external_error=%s WHERE id=%s\"\n)\n\n\nclass LinkRepository(LinkRepositoryPort):\n def __init__(self, connections: MariaDbConnectionFactory) -> None:\n self._connections = connections\n\n def save_many(self, links: Iterable[Link]) -> None:\n rows = [_link_to_row(link) for link in links]\n if not rows:\n return\n with self._connections.session() as conn:\n conn.cursor().executemany(_INSERT, rows)\n\n def list_by_page(self, page_id: int) -> list[Link]:\n with self._connections.session() as conn:\n cur = conn.cursor(dictionary=True)\n cur.execute(_SELECT_BY_PAGE, (page_id,))\n return [_row_to_link(r) for r in cur.fetchall()]\n\n def update_external_check(self, link_id: int, link: Link) -> None:\n with self._connections.session() as conn:\n conn.cursor().execute(\n _UPDATE_EXTERNAL,\n (\n link.external_http_status,\n link.external_final_url,\n link.external_checked_at,\n link.external_error,\n link_id,\n ),\n )\n\n\ndef _link_to_row(link: Link) -> dict[str, Any]:\n return {\n \"page_id\": link.page_id, \"source_url_id\": link.source_url_id,\n \"target_url\": link.target_url, \"target_url_id\": link.target_url_id,\n \"anchor_text\": link.anchor_text, \"rel\": json.dumps(sorted(link.rel)),\n \"is_internal\": 1 if link.is_internal else 0,\n \"is_external\": 1 if link.is_external else 0,\n \"position_hint\": link.position_hint,\n \"external_http_status\": link.external_http_status,\n \"external_final_url\": link.external_final_url,\n \"external_checked_at\": link.external_checked_at,\n \"external_error\": link.external_error,\n }\n\n\ndef _row_to_link(row: dict[str, Any]) -> Link:\n return Link(\n id=row[\"id\"], page_id=row[\"page_id\"], source_url_id=row[\"source_url_id\"],\n target_url=row[\"target_url\"], target_url_id=row.get(\"target_url_id\"),\n anchor_text=row.get(\"anchor_text\"),\n rel=frozenset(json.loads(row[\"rel\"])) if row.get(\"rel\") else frozenset(),\n is_internal=bool(row[\"is_internal\"]), is_external=bool(row[\"is_external\"]),\n position_hint=row.get(\"position_hint\"),\n external_http_status=row.get(\"external_http_status\"),\n external_final_url=row.get(\"external_final_url\"),\n external_checked_at=row.get(\"external_checked_at\"),\n external_error=row.get(\"external_error\"),\n )\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/infrastructure\/persistence\/link_repository.py",
"content": "from __future__ import annotations\n\nimport json\nfrom collections.abc import Iterable\nfrom typing import Any\n\nfrom src.domain.entities.link import Link\nfrom src.domain.ports.link_repository_port import LinkRepositoryPort\nfrom src.infrastructure.persistence.connection import MariaDbConnectionFactory\n\n_COLUMNS = (\n \"page_id, source_url_id, target_url, target_url_id, anchor_text, rel,\"\n \" is_internal, is_external, position_hint,\"\n \" external_http_status, external_final_url, external_checked_at, external_error\"\n)\n_PLACEHOLDERS = \", \".join(f\"%({name.strip()})s\" for name in _COLUMNS.split(\",\"))\n_INSERT = f\"INSERT INTO links ({_COLUMNS}) VALUES ({_PLACEHOLDERS})\"\n_SELECT_BY_PAGE = \"SELECT * FROM links WHERE page_id=%s ORDER BY id\"\n_UPDATE_EXTERNAL = (\n \"UPDATE links SET external_http_status=%s, external_final_url=%s,\"\n \" external_checked_at=%s, external_error=%s WHERE id=%s\"\n)\n\n\nclass LinkRepository(LinkRepositoryPort):\n def __init__(self, connections: MariaDbConnectionFactory) -> None:\n self._connections = connections\n\n def save_many(self, links: Iterable[Link]) -> None:\n rows = [_link_to_row(link) for link in links]\n if not rows:\n return\n with self._connections.session() as conn:\n conn.cursor().executemany(_INSERT, rows)\n\n def list_by_page(self, page_id: int) -> list[Link]:\n with self._connections.session() as conn:\n cur = conn.cursor(dictionary=True)\n cur.execute(_SELECT_BY_PAGE, (page_id,))\n return [_row_to_link(r) for r in cur.fetchall()]\n\n def update_external_check(self, link_id: int, link: Link) -> None:\n with self._connections.session() as conn:\n conn.cursor().execute(\n _UPDATE_EXTERNAL,\n (\n link.external_http_status,\n link.external_final_url,\n link.external_checked_at,\n link.external_error,\n link_id,\n ),\n )\n\n\ndef _link_to_row(link: Link) -> dict[str, Any]:\n return {\n \"page_id\": link.page_id, \"source_url_id\": link.source_url_id,\n \"target_url\": link.target_url, \"target_url_id\": link.target_url_id,\n \"anchor_text\": link.anchor_text, \"rel\": json.dumps(sorted(link.rel)),\n \"is_internal\": 1 if link.is_internal else 0,\n \"is_external\": 1 if link.is_external else 0,\n \"position_hint\": link.position_hint,\n \"external_http_status\": link.external_http_status,\n \"external_final_url\": link.external_final_url,\n \"external_checked_at\": link.external_checked_at,\n \"external_error\": link.external_error,\n }\n\n\ndef _row_to_link(row: dict[str, Any]) -> Link:\n return Link(\n id=row[\"id\"], page_id=row[\"page_id\"], source_url_id=row[\"source_url_id\"],\n target_url=row[\"target_url\"], target_url_id=row.get(\"target_url_id\"),\n anchor_text=row.get(\"anchor_text\"),\n rel=frozenset(json.loads(row[\"rel\"])) if row.get(\"rel\") else frozenset(),\n is_internal=bool(row[\"is_internal\"]), is_external=bool(row[\"is_external\"]),\n position_hint=row.get(\"position_hint\"),\n external_http_status=row.get(\"external_http_status\"),\n external_final_url=row.get(\"external_final_url\"),\n external_checked_at=row.get(\"external_checked_at\"),\n external_error=row.get(\"external_error\"),\n )\n",
"structuredPatch": [],
"originalFile": null,
"userModified": false
}
}