diff --git a/src/soupjobs/default.config.yaml b/src/soupjobs/default.config.yaml index c8d8064..c3ea5b3 100644 --- a/src/soupjobs/default.config.yaml +++ b/src/soupjobs/default.config.yaml @@ -53,7 +53,9 @@ target: # Example: #attrs: #id: firstHeading - #class: foo + #class: + # - foo + # - example #role: bar # Filter using a custom python function with the path specified in dot-notation. diff --git a/src/soupjobs/scrape.py b/src/soupjobs/scrape.py index a0c11b5..9f58271 100644 --- a/src/soupjobs/scrape.py +++ b/src/soupjobs/scrape.py @@ -148,15 +148,18 @@ def tag_filter(tag: Tag, name: str = None, text: str = None, attrs: TagAttrs = N return False if not string_matches(tag.text, text, regex): return False - for attr_name, attr_value in attrs.items(): + for attr_name, attr_values in attrs.items(): try: values = tag[attr_name] except KeyError: return False if not isinstance(values, list): values = [values] - if not any(string_matches(value, attr_value, regex) for value in values): - return False + if not isinstance(attr_values, list): + attr_values = [attr_values] + for attr_value in attr_values: + if not any(string_matches(value, attr_value, regex) for value in values): + return False if func: return func(tag) return True diff --git a/src/soupjobs/settings.py b/src/soupjobs/settings.py index ae943aa..d3d673f 100644 --- a/src/soupjobs/settings.py +++ b/src/soupjobs/settings.py @@ -9,8 +9,8 @@ import yaml from soupjobs import SOUPJOBS, CONFIG_FILE_ENV_VAR, CONFIG_FILE_PLACEHOLDER -TagAttrs = Dict[str, str] StrListOrStr = Union[str, List[str]] +TagAttrs = Dict[str, StrListOrStr] OptInt = Optional[int] OptStr = Optional[str] OptPyObj = Optional[PyObject]