import sys from typing import Dict, TYPE_CHECKING from bs4.element import Tag if TYPE_CHECKING: from .form import FormField, Form INPUT, SELECT, OPTION = 'input', 'select', 'option' NAME, VALUE, SELECTED = 'name', 'value', 'selected' NON_PRINTABLE_TO_NONE = {code: None for code in range(sys.maxunicode + 1) if not chr(code).isprintable()} class WrongInterface(Exception): pass class FieldInterfaceWrong(WrongInterface): pass class SelectOptionsWrong(FieldInterfaceWrong): pass class IncompleteForm(WrongInterface): pass class UnknownField(WrongInterface): pass def printable_only(string: str) -> str: return string.translate(NON_PRINTABLE_TO_NONE) def check_select_field_options(field_tag: Tag, field_interface: 'FormField', check_defaults: bool = True) -> None: """ Compares the `options` and `default` attributes of a `'FormField'` object with the options of its HTML counterpart. Args: field_tag: The `bs4.Tag` object representing the or tag, but `options` were defined on the interface, or when the `default` does not match the tag's `value` attribute """ assert field_tag[NAME] == field_interface.name if field_tag.name == INPUT: if field_interface.options is not None: raise FieldInterfaceWrong(f"Options provided for input field '{field_interface.name}'") if check_defaults: default = field_tag.attrs.get(VALUE) if field_interface.default != default: raise FieldInterfaceWrong(f"Input field default not correct on '{field_interface}'") if field_tag.name == SELECT: check_select_field_options(field_tag, field_interface, check_defaults=check_defaults) def check_form_interface(form_tag: Tag, form_interface: 'Form', check_defaults: bool = True) -> None: """ Compares all fields of a `Form` object with the fields found in its HTML counterpart. Calls `check_field_interface` on each field. Args: form_tag: The `bs4.Tag` object representing the
HTML tag form_interface: The `Form` to check against the HTML tag check_defaults (optional): If set to `False`, pre-set field values or pre-selected options are not checked Raises: `UnknownField` if any of the fields' name does not correspond to the `name` attribute of a field in the HTML form `IncompleteForm` if a field in the HTML form has not been defined in the form interface """ field_tags: Dict[str, Tag] = {tag[NAME]: tag for tag in form_tag.find_all(INPUT) + form_tag.find_all(SELECT)} for field in form_interface.fields.values(): tag = field_tags.pop(field.name, None) if tag is None: raise UnknownField(f"The defined field does not exist in the form: {field}") check_field_interface(tag, field, check_defaults=check_defaults) if len(field_tags) > 0: raise IncompleteForm(f"Form interface missing fields: {list(field_tags.keys())}") def get_field_value(form_tag: Tag, field_name: str) -> str: """ Returns the string value of the `value` attribute of a form field with a specified name. Args: form_tag: The `bs4.Tag` object representing the HTML tag field_name: The value of the `name` attribute of the form field of interest Raises: `ValueError` if no field with the specified name exists in the HTML form `AttributeError` if the field with the specified name does not have a `value` attribute """ def form_field_has_the_name(tag: Tag): return tag.name in {INPUT, SELECT} and tag.has_attr(NAME) and tag[NAME] == field_name field = form_tag.find(form_field_has_the_name) if field is None: raise ValueError(f"No form field with the name '{field_name}' found") if not field.has_attr(VALUE): raise AttributeError(f"Field with the name '{field_name}' has no `value` attribute") return field[VALUE] def get_field_values(form_tag: Tag, form_interface: 'Form', required_fields_only: bool = True) -> Dict[str, str]: """ Goes through the fields of a `Form` object and tries to retrieve a value for each from an HTML . Args: form_tag: The `bs4.Tag` object representing the HTML tag form_interface: The `Form` for which to get the field values required_fields_only (optional): If `True` (default), values will be retrieved only for the required fields Returns: Dictionary with each key-value-pair representing a (required) field for which a `value` attribute existed in the HTML form, with the key being identical to that of the field in the `Form.required_fields` dictionary and the value being the actual `value` attribute's string value """ output_dict = {} for key, field in form_interface.fields.items(): if required_fields_only and not field.required: continue try: output_dict[key] = get_field_value(form_tag, field.name) except AttributeError: pass return output_dict