{
"event": "PreToolUse",
"tool_name": "Edit",
"tool_input": {
"file_path": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/infrastructure\/parsers\/structured_data_parser_extruct.py",
"old_string": " triples: list[StructuredDataTriple] = []\n for key, fmt in _FORMAT_MAP.items():\n for item in extracted.get(key, []) or []:\n if isinstance(item, Mapping):\n triples.append((fmt, _schema_type(item), dict(item)))\n return triples",
"new_string": " triples: list[StructuredDataTriple] = []\n for key, fmt in _FORMAT_MAP.items():\n items = extracted.get(key, []) or []\n triples.extend(\n (fmt, _schema_type(item), dict(item))\n for item in items\n if isinstance(item, Mapping)\n )\n return triples",
"replace_all": false
}
}
{
"tool_response": {
"filePath": "\/var\/www\/dev.campus.systemische-tools.de\/crawler\/src\/infrastructure\/parsers\/structured_data_parser_extruct.py",
"oldString": " triples: list[StructuredDataTriple] = []\n for key, fmt in _FORMAT_MAP.items():\n for item in extracted.get(key, []) or []:\n if isinstance(item, Mapping):\n triples.append((fmt, _schema_type(item), dict(item)))\n return triples",
"newString": " triples: list[StructuredDataTriple] = []\n for key, fmt in _FORMAT_MAP.items():\n items = extracted.get(key, []) or []\n triples.extend(\n (fmt, _schema_type(item), dict(item))\n for item in items\n if isinstance(item, Mapping)\n )\n return triples",
"originalFile": "from __future__ import annotations\n\nfrom collections.abc import Mapping\nfrom typing import Any\n\nimport extruct\n\nfrom src.domain.ports.structured_data_parser_port import (\n StructuredDataParserPort,\n StructuredDataTriple,\n)\nfrom src.domain.value_objects.enums import StructuredDataFormat\n\n_FORMAT_MAP = {\n \"json-ld\": StructuredDataFormat.JSON_LD,\n \"microdata\": StructuredDataFormat.MICRODATA,\n \"rdfa\": StructuredDataFormat.RDFA,\n \"opengraph\": StructuredDataFormat.OPEN_GRAPH,\n}\n\n\nclass ExtructStructuredDataParser(StructuredDataParserPort):\n def parse(self, html: str, *, base_url: str) -> list[StructuredDataTriple]:\n extracted = extruct.extract(\n html,\n base_url=base_url,\n syntaxes=list(_FORMAT_MAP.keys()),\n uniform=True,\n )\n triples: list[StructuredDataTriple] = []\n for key, fmt in _FORMAT_MAP.items():\n for item in extracted.get(key, []) or []:\n if isinstance(item, Mapping):\n triples.append((fmt, _schema_type(item), dict(item)))\n return triples\n\n\ndef _schema_type(payload: Mapping[str, Any]) -> str | None:\n value = payload.get(\"@type\") or payload.get(\"type\")\n if isinstance(value, str):\n return value\n if isinstance(value, list) and value and isinstance(value[0], str):\n return value[0]\n return None\n",
"structuredPatch": [
{
"oldStart": 29,
"oldLines": 9,
"newStart": 29,
"newLines": 12,
"lines": [
" )",
" triples: list[StructuredDataTriple] = []",
" for key, fmt in _FORMAT_MAP.items():",
"- for item in extracted.get(key, []) or []:",
"- if isinstance(item, Mapping):",
"- triples.append((fmt, _schema_type(item), dict(item)))",
"+ items = extracted.get(key, []) or []",
"+ triples.extend(",
"+ (fmt, _schema_type(item), dict(item))",
"+ for item in items",
"+ if isinstance(item, Mapping)",
"+ )",
" return triples",
" ",
" "
]
}
],
"userModified": false,
"replaceAll": false
}
}