soupjobs/src/soupjobs/settings.py

80 lines
2.2 KiB
Python
Raw Normal View History

2021-03-13 16:53:15 +01:00
from typing import Dict, List, Union, Optional, Any
from pathlib import Path
from enum import Enum
import os
from pydantic import BaseSettings, PyObject
import yaml
2021-07-25 14:45:07 +02:00
from soupjobs import SOUPJOBS, CONFIG_FILE_ENV_VAR, CONFIG_FILE_PLACEHOLDER
2021-03-13 16:53:15 +01:00
2021-07-25 16:30:16 +02:00
StrListOrStr = Union[str, List[str]]
TagAttrs = Dict[str, StrListOrStr]
2021-07-25 16:30:16 +02:00
OptInt = Optional[int]
OptStr = Optional[str]
OptPyObj = Optional[PyObject]
2021-03-13 16:53:15 +01:00
class OutputFormat(str, Enum):
simple = 'simple'
yaml = 'yaml'
json = 'json'
class Settings(BaseSettings):
2021-07-25 14:46:27 +02:00
_CONFIG_FILE: Optional[Path] = os.getenv(CONFIG_FILE_ENV_VAR, CONFIG_FILE_PLACEHOLDER)
2021-03-13 16:53:15 +01:00
2021-07-25 16:30:16 +02:00
entry_urls: StrListOrStr = [] # test
2021-03-13 16:53:15 +01:00
regex_mode: bool = False
max_depth: int = 0
2021-07-25 16:30:16 +02:00
max_pages: OptInt
2021-03-13 16:53:15 +01:00
output_with_urls: bool = False
output_format: OutputFormat = OutputFormat.simple
# Target to extract:
2021-07-25 16:30:16 +02:00
target_tag: OptStr
target_text: OptStr
2021-03-13 16:53:15 +01:00
target_attrs: TagAttrs = {}
2021-07-25 16:30:16 +02:00
target_match_func: OptPyObj
2021-03-13 16:53:15 +01:00
target_required: bool = False
2021-07-25 16:30:16 +02:00
target_limit: OptInt
2021-03-13 16:53:15 +01:00
target_extract_text: bool = True
2021-07-25 16:30:16 +02:00
target_transform: OptPyObj
2021-03-13 16:53:15 +01:00
# Link to next page:
2021-07-25 16:30:16 +02:00
next_link_text: OptStr
next_link_href: OptStr
2021-03-13 16:53:15 +01:00
next_link_attrs: TagAttrs = {}
2021-07-25 16:30:16 +02:00
next_link_match_func: OptPyObj
next_link_limit: OptInt
2021-03-13 16:53:15 +01:00
next_link_random: bool = True
def has_target_filters(self) -> bool:
return any([self.target_tag, self.target_text, self.target_attrs, self.target_match_func])
class Config:
validate_assignment = True
2021-07-25 14:45:07 +02:00
env_prefix = SOUPJOBS + '_'
2021-03-13 16:53:15 +01:00
env_file_encoding = 'utf-8'
@classmethod
def customise_sources(cls, init_settings, env_settings, file_secret_settings):
2021-07-25 14:45:07 +02:00
return init_settings, env_settings, file_secret_settings, yaml_settings
def yaml_settings(settings_obj: Settings) -> Dict[str, Any]:
try:
2021-07-25 14:46:27 +02:00
with open(getattr(settings_obj, '_CONFIG_FILE'), 'r') as f:
2021-07-25 14:45:07 +02:00
config = yaml.safe_load(f)
except FileNotFoundError:
return {}
for section_name in ('target', 'next_link'):
section = config.pop(section_name, {})
for key, value in section.items():
config.setdefault(f'{section_name}_{key}', value)
return config
2021-03-13 16:53:15 +01:00
settings = Settings()