{
"event": "PreToolUse",
"tool_name": "Read",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/infrastructure\/parsers\/structured_data_parser_extruct.py"
}
}
{
"tool_response": {
"type": "text",
"file": {
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/infrastructure\/parsers\/structured_data_parser_extruct.py",
"content": "from __future__ import annotations\n\nfrom collections.abc import Mapping\nfrom typing import Any\n\nimport extruct\n\nfrom src.domain.ports.structured_data_parser_port import (\n StructuredDataParserPort,\n StructuredDataTriple,\n)\nfrom src.domain.value_objects.enums import StructuredDataFormat\n\n_FORMAT_MAP = {\n \"json-ld\": StructuredDataFormat.JSON_LD,\n \"microdata\": StructuredDataFormat.MICRODATA,\n \"rdfa\": StructuredDataFormat.RDFA,\n \"opengraph\": StructuredDataFormat.OPEN_GRAPH,\n}\n\n\nclass ExtructStructuredDataParser(StructuredDataParserPort):\n def parse(self, html: str, *, base_url: str) -> list[StructuredDataTriple]:\n extracted = extruct.extract(\n html,\n base_url=base_url,\n syntaxes=list(_FORMAT_MAP.keys()),\n uniform=True,\n )\n triples: list[StructuredDataTriple] = []\n for key, fmt in _FORMAT_MAP.items():\n for item in extracted.get(key, []) or []:\n if isinstance(item, Mapping):\n triples.append((fmt, _schema_type(item), dict(item)))\n return triples\n\n\ndef _schema_type(payload: Mapping[str, Any]) -> str | None:\n value = payload.get(\"@type\") or payload.get(\"type\")\n if isinstance(value, str):\n return value\n if isinstance(value, list) and value and isinstance(value[0], str):\n return value[0]\n return None\n",
"numLines": 45,
"startLine": 1,
"totalLines": 45
}
}
}