From 6f6d2192daee50f40ad0959e88bb2f3f385355aa Mon Sep 17 00:00:00 2001 From: Daniil Fajnberg Date: Sat, 1 Jan 2022 13:28:36 +0100 Subject: [PATCH] added functions to check interfaces against their HTML counterparts; optional dependency on bs4 --- requirements/dev.txt | 3 +- requirements/full.txt | 3 + requirements/html.txt | 2 + setup.cfg | 4 +- src/yamlhttpforms/form.py | 20 +++++ src/yamlhttpforms/html.py | 156 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 185 insertions(+), 3 deletions(-) create mode 100644 requirements/full.txt create mode 100644 requirements/html.txt create mode 100644 src/yamlhttpforms/html.py diff --git a/requirements/dev.txt b/requirements/dev.txt index 3165c48..093c73d 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,3 +1,2 @@ --r aio.txt --r req.txt +-r full.txt coverage \ No newline at end of file diff --git a/requirements/full.txt b/requirements/full.txt new file mode 100644 index 0000000..aad2a6e --- /dev/null +++ b/requirements/full.txt @@ -0,0 +1,3 @@ +-r aio.txt +-r req.txt +-r html.txt diff --git a/requirements/html.txt b/requirements/html.txt new file mode 100644 index 0000000..653f3ba --- /dev/null +++ b/requirements/html.txt @@ -0,0 +1,2 @@ +-r common.txt +beautifulsoup4 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 7fdcfea..ad24362 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = yamlhttpforms -version = 0.0.2 +version = 0.0.3 author = Daniil F. author_email = mail@placeholder123.to description = HTTP forms defined in YAML @@ -26,6 +26,8 @@ req = requests aio = aiohttp +html = + beautifulsoup4 [options.packages.find] where = src diff --git a/src/yamlhttpforms/form.py b/src/yamlhttpforms/form.py index b44d18a..edeb755 100644 --- a/src/yamlhttpforms/form.py +++ b/src/yamlhttpforms/form.py @@ -4,6 +4,7 @@ from typing import Dict, Callable, Union, Optional, Any, TYPE_CHECKING if TYPE_CHECKING: from aiohttp import ClientSession as AioSession, ClientResponse as AioResponse from requests import Session as ReqSession, Response as ReqResponse + from bs4.element import Tag as BS4Tag from .utils import PathT, yaml_overload @@ -67,6 +68,17 @@ class FormField: return key raise ValueError(f'"{value}" is not a valid option for {self}') + def check_with_html(self, field_tag: 'BS4Tag', check_defaults: bool = True) -> None: + from .html import check_field_interface + check_field_interface(field_tag, self, check_defaults=check_defaults) + + def get_value_from_html_form(self, form_tag: 'BS4Tag') -> Optional[str]: + from .html import get_field_value + try: + return get_field_value(form_tag, self.name) + except AttributeError: + return None + class Form: @@ -199,6 +211,14 @@ class Form: from requests import post return post(self.url, data=self.get_payload(**kwargs)) + def check_with_html(self, form_tag: 'BS4Tag', check_defaults: bool = True) -> None: + from .html import check_form_interface + check_form_interface(form_tag, self, check_defaults=check_defaults) + + def get_values_from_html_form(self, form_tag: 'BS4Tag', required_fields_only: bool = True) -> Dict[str, str]: + from .html import get_field_values + return get_field_values(form_tag, self, required_fields_only=required_fields_only) + def load_form(*def_paths: PathT, dir_sort_key: Callable[[PathT], Any] = None, dir_sort_reverse: bool = False, full_payload: bool = True, url: str = None) -> Form: diff --git a/src/yamlhttpforms/html.py b/src/yamlhttpforms/html.py new file mode 100644 index 0000000..bcdc3f3 --- /dev/null +++ b/src/yamlhttpforms/html.py @@ -0,0 +1,156 @@ +from typing import Dict, TYPE_CHECKING +from bs4.element import Tag + +if TYPE_CHECKING: + from .form import FormField, Form + + +INPUT, SELECT, OPTION = 'input', 'select', 'option' +NAME, VALUE, SELECTED = 'name', 'value', 'selected' + + +class WrongInterface(Exception): + pass + + +class FieldInterfaceWrong(WrongInterface): + pass + + +class SelectOptionsWrong(FieldInterfaceWrong): + pass + + +class IncompleteForm(WrongInterface): + pass + + +class UnknownField(WrongInterface): + pass + + +def check_select_field_options(field_tag: Tag, field_interface: 'FormField', check_defaults: bool = True) -> None: + """ + Compares the `options` and `default` attributes of a `'FormField'` object with the options of its HTML counterpart. + + Args: + field_tag: The `bs4.Tag` object representing the or tag, but `options` were defined on the interface, or + when the `default` does not match the tag's `value` attribute + """ + assert field_tag[NAME] == field_interface.name + if field_tag.name == INPUT: + if field_interface.options is not None: + raise FieldInterfaceWrong(f"Options provided for input field '{field_interface.name}'") + if check_defaults: + default = field_tag.attrs.get(VALUE) + if field_interface.default != default: + raise FieldInterfaceWrong(f"Input field default not correct on '{field_interface}'") + if field_tag.name == SELECT: + check_select_field_options(field_tag, field_interface, check_defaults=check_defaults) + + +def check_form_interface(form_tag: Tag, form_interface: 'Form', check_defaults: bool = True) -> None: + """ + Compares all fields of a `Form` object with the fields found in its HTML counterpart. + Calls `check_field_interface` on each field. + + Args: + form_tag: The `bs4.Tag` object representing the
HTML tag + form_interface: The `Form` to check against the HTML tag + check_defaults (optional): If set to `False`, pre-set field values or pre-selected options are not checked + + Raises: + `UnknownField` + if any of the fields' name does not correspond to the `name` attribute of a field in the HTML form + `IncompleteForm` + if a field in the HTML form has not been defined in the form interface + """ + field_tags: Dict[str, Tag] = {tag[NAME]: tag for tag in form_tag.find_all(INPUT) + form_tag.find_all(SELECT)} + for field in form_interface.fields.values(): + tag = field_tags.pop(field.name) + if tag is None: + raise UnknownField(f"The defined field does not exist in the form: {field}") + check_field_interface(tag, field, check_defaults=check_defaults) + if len(field_tags) > 0: + raise IncompleteForm(f"Form interface missing fields: {list(field_tags.keys())}") + + +def get_field_value(form_tag: Tag, field_name: str) -> str: + """ + Returns the string value of the `value` attribute of a form field with a specified name. + + Args: + form_tag: The `bs4.Tag` object representing the HTML tag + field_name: The value of the `name` attribute of the form field of interest + + Raises: + `ValueError` + if no field with the specified name exists in the HTML form + `AttributeError` + if the field with the specified name does not have a `value` attribute + """ + def form_field_has_the_name(tag: Tag): + return tag.name in {INPUT, SELECT} and tag.has_attr(NAME) and tag[NAME] == field_name + field = form_tag.find(form_field_has_the_name) + if field is None: + raise ValueError(f"No form field with the name '{field_name}' found") + if not field.has_attr(VALUE): + raise AttributeError(f"Field with the name '{field_name}' has no `value` attribute") + return field[VALUE] + + +def get_field_values(form_tag: Tag, form_interface: 'Form', required_fields_only: bool = True) -> Dict[str, str]: + """ + Goes through the fields of a `Form` object and tries to retrieve a value for each from an HTML . + + Args: + form_tag: The `bs4.Tag` object representing the HTML tag + form_interface: The `Form` for which to get the field values + required_fields_only (optional): If `True` (default), values will be retrieved only for the required fields + + Returns: + Dictionary with each key-value-pair representing a (required) field for which a `value` attribute existed in the + HTML form, with the key being identical to that of the field in the `Form.required_fields` dictionary and the + value being the actual `value` attribute's string value + """ + output_dict = {} + for key, field in form_interface.fields.items(): + if required_fields_only and not field.required: + continue + try: + output_dict[key] = get_field_value(form_tag, field.name) + except AttributeError: + pass + return output_dict