{
"event": "PreToolUse",
"tool_name": "Write",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/domain\/services\/quality_flag_calculator.py",
"content": "from __future__ import annotations\n\nfrom dataclasses import dataclass\n\nfrom src.domain.value_objects.enums import QualitySeverity\nfrom src.domain.value_objects.head_meta import HeadMeta\nfrom src.domain.value_objects.http_status import HttpStatus\nfrom src.domain.value_objects.quality_flag import QualityFlag, QualityFlagKey\n\nTITLE_MIN, TITLE_MAX = 30, 60\nDESCRIPTION_MIN, DESCRIPTION_MAX = 70, 160\n\n\ndef _title_flags(head: HeadMeta) -> list[QualityFlag]:\n if head.title is None or head.title.strip() == \"\":\n return [_flag(QualityFlagKey.TITLE_MISSING, QualitySeverity.ERROR, \"title missing\")]\n length = len(head.title)\n if length < TITLE_MIN:\n return [_flag(QualityFlagKey.TITLE_TOO_SHORT, QualitySeverity.WARNING,\n f\"title is {length} chars (<{TITLE_MIN})\")]\n if length > TITLE_MAX:\n return [_flag(QualityFlagKey.TITLE_TOO_LONG, QualitySeverity.WARNING,\n f\"title is {length} chars (>{TITLE_MAX})\")]\n return []\n\n\ndef _description_flags(head: HeadMeta) -> list[QualityFlag]:\n if head.description is None or head.description.strip() == \"\":\n return [_flag(QualityFlagKey.DESCRIPTION_MISSING, QualitySeverity.WARNING,\n \"meta description missing\")]\n length = len(head.description)\n if length < DESCRIPTION_MIN:\n return [_flag(QualityFlagKey.DESCRIPTION_TOO_SHORT, QualitySeverity.INFO,\n f\"description is {length} chars (<{DESCRIPTION_MIN})\")]\n if length > DESCRIPTION_MAX:\n return [_flag(QualityFlagKey.DESCRIPTION_TOO_LONG, QualitySeverity.INFO,\n f\"description is {length} chars (>{DESCRIPTION_MAX})\")]\n return []\n\n\ndef _heading_flags(h1_count: int) -> list[QualityFlag]:\n if h1_count == 0:\n return [_flag(QualityFlagKey.H1_MISSING, QualitySeverity.ERROR, \"no h1\")]\n if h1_count > 1:\n return [_flag(QualityFlagKey.H1_MULTIPLE, QualitySeverity.WARNING,\n f\"{h1_count} h1 tags\", count=h1_count)]\n return []\n\n\ndef _robots_flags(head: HeadMeta) -> list[QualityFlag]:\n result: list[QualityFlag] = []\n if head.robots and head.robots.noindex:\n result.append(_flag(QualityFlagKey.NOINDEX, QualitySeverity.WARNING, \"noindex set\"))\n if head.robots and head.robots.nofollow:\n result.append(_flag(QualityFlagKey.NOFOLLOW, QualitySeverity.INFO, \"nofollow set\"))\n return result\n\n\ndef _canonical_flags(head: HeadMeta, page_url: str) -> list[QualityFlag]:\n if head.canonical is None:\n return [_flag(QualityFlagKey.CANONICAL_MISSING, QualitySeverity.INFO, \"canonical missing\")]\n if head.canonical != page_url:\n return [_flag(QualityFlagKey.CANONICAL_NOT_SELF, QualitySeverity.INFO,\n f\"canonical -> {head.canonical}\")]\n return []\n\n\ndef _http_flags(status: HttpStatus | None) -> list[QualityFlag]:\n if status is None or not status.is_error:\n return []\n return [_flag(QualityFlagKey.HTTP_ERROR, QualitySeverity.ERROR,\n f\"HTTP {status.code}\", count=1)]\n\n\ndef _flag(key: QualityFlagKey, severity: QualitySeverity, msg: str, count: int = 1) -> QualityFlag:\n return QualityFlag(key=key, severity=severity, message=msg, count=count)\n\n\n@dataclass(frozen=True, slots=True)\nclass QualityFlagCalculator:\n def calculate(\n self,\n *,\n head: HeadMeta,\n h1_count: int,\n http_status: HttpStatus | None,\n page_url: str,\n ) -> tuple[QualityFlag, ...]:\n flags: list[QualityFlag] = []\n flags.extend(_title_flags(head))\n flags.extend(_description_flags(head))\n flags.extend(_heading_flags(h1_count))\n flags.extend(_robots_flags(head))\n flags.extend(_canonical_flags(head, page_url))\n flags.extend(_http_flags(http_status))\n return tuple(flags)\n"
}
}
{
"tool_response": {
"type": "create",
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/domain\/services\/quality_flag_calculator.py",
"content": "from __future__ import annotations\n\nfrom dataclasses import dataclass\n\nfrom src.domain.value_objects.enums import QualitySeverity\nfrom src.domain.value_objects.head_meta import HeadMeta\nfrom src.domain.value_objects.http_status import HttpStatus\nfrom src.domain.value_objects.quality_flag import QualityFlag, QualityFlagKey\n\nTITLE_MIN, TITLE_MAX = 30, 60\nDESCRIPTION_MIN, DESCRIPTION_MAX = 70, 160\n\n\ndef _title_flags(head: HeadMeta) -> list[QualityFlag]:\n if head.title is None or head.title.strip() == \"\":\n return [_flag(QualityFlagKey.TITLE_MISSING, QualitySeverity.ERROR, \"title missing\")]\n length = len(head.title)\n if length < TITLE_MIN:\n return [_flag(QualityFlagKey.TITLE_TOO_SHORT, QualitySeverity.WARNING,\n f\"title is {length} chars (<{TITLE_MIN})\")]\n if length > TITLE_MAX:\n return [_flag(QualityFlagKey.TITLE_TOO_LONG, QualitySeverity.WARNING,\n f\"title is {length} chars (>{TITLE_MAX})\")]\n return []\n\n\ndef _description_flags(head: HeadMeta) -> list[QualityFlag]:\n if head.description is None or head.description.strip() == \"\":\n return [_flag(QualityFlagKey.DESCRIPTION_MISSING, QualitySeverity.WARNING,\n \"meta description missing\")]\n length = len(head.description)\n if length < DESCRIPTION_MIN:\n return [_flag(QualityFlagKey.DESCRIPTION_TOO_SHORT, QualitySeverity.INFO,\n f\"description is {length} chars (<{DESCRIPTION_MIN})\")]\n if length > DESCRIPTION_MAX:\n return [_flag(QualityFlagKey.DESCRIPTION_TOO_LONG, QualitySeverity.INFO,\n f\"description is {length} chars (>{DESCRIPTION_MAX})\")]\n return []\n\n\ndef _heading_flags(h1_count: int) -> list[QualityFlag]:\n if h1_count == 0:\n return [_flag(QualityFlagKey.H1_MISSING, QualitySeverity.ERROR, \"no h1\")]\n if h1_count > 1:\n return [_flag(QualityFlagKey.H1_MULTIPLE, QualitySeverity.WARNING,\n f\"{h1_count} h1 tags\", count=h1_count)]\n return []\n\n\ndef _robots_flags(head: HeadMeta) -> list[QualityFlag]:\n result: list[QualityFlag] = []\n if head.robots and head.robots.noindex:\n result.append(_flag(QualityFlagKey.NOINDEX, QualitySeverity.WARNING, \"noindex set\"))\n if head.robots and head.robots.nofollow:\n result.append(_flag(QualityFlagKey.NOFOLLOW, QualitySeverity.INFO, \"nofollow set\"))\n return result\n\n\ndef _canonical_flags(head: HeadMeta, page_url: str) -> list[QualityFlag]:\n if head.canonical is None:\n return [_flag(QualityFlagKey.CANONICAL_MISSING, QualitySeverity.INFO, \"canonical missing\")]\n if head.canonical != page_url:\n return [_flag(QualityFlagKey.CANONICAL_NOT_SELF, QualitySeverity.INFO,\n f\"canonical -> {head.canonical}\")]\n return []\n\n\ndef _http_flags(status: HttpStatus | None) -> list[QualityFlag]:\n if status is None or not status.is_error:\n return []\n return [_flag(QualityFlagKey.HTTP_ERROR, QualitySeverity.ERROR,\n f\"HTTP {status.code}\", count=1)]\n\n\ndef _flag(key: QualityFlagKey, severity: QualitySeverity, msg: str, count: int = 1) -> QualityFlag:\n return QualityFlag(key=key, severity=severity, message=msg, count=count)\n\n\n@dataclass(frozen=True, slots=True)\nclass QualityFlagCalculator:\n def calculate(\n self,\n *,\n head: HeadMeta,\n h1_count: int,\n http_status: HttpStatus | None,\n page_url: str,\n ) -> tuple[QualityFlag, ...]:\n flags: list[QualityFlag] = []\n flags.extend(_title_flags(head))\n flags.extend(_description_flags(head))\n flags.extend(_heading_flags(h1_count))\n flags.extend(_robots_flags(head))\n flags.extend(_canonical_flags(head, page_url))\n flags.extend(_http_flags(http_status))\n return tuple(flags)\n",
"structuredPatch": [],
"originalFile": null,
"userModified": false
}
}