soupjobs/src/soupjobs/settings.py

76 lines
2.1 KiB
Python

from typing import Dict, List, Union, Optional, Any
from pathlib import Path
from enum import Enum
import os
from pydantic import BaseSettings, PyObject
import yaml
from soupjobs import SOUPJOBS, CONFIG_FILE_ENV_VAR, CONFIG_FILE_PLACEHOLDER
TagAttrs = Dict[str, str]
class OutputFormat(str, Enum):
simple = 'simple'
yaml = 'yaml'
json = 'json'
class Settings(BaseSettings):
CONFIG_FILE: Optional[Path] = os.getenv(CONFIG_FILE_ENV_VAR, CONFIG_FILE_PLACEHOLDER)
entry_urls: Union[str, List[str]] = []
regex_mode: bool = False
max_depth: int = 0
max_pages: Optional[int]
output_with_urls: bool = False
output_format: OutputFormat = OutputFormat.simple
# Target to extract:
target_tag: Optional[str]
target_text: Optional[str]
target_attrs: TagAttrs = {}
target_match_func: Optional[PyObject]
target_required: bool = False
target_limit: Optional[int]
target_extract_text: bool = True
target_transform: Optional[PyObject]
# Link to next page:
next_link_text: Optional[str]
next_link_href: Optional[str]
next_link_attrs: TagAttrs = {}
next_link_match_func: Optional[PyObject]
next_link_limit: Optional[int]
next_link_random: bool = True
def has_target_filters(self) -> bool:
return any([self.target_tag, self.target_text, self.target_attrs, self.target_match_func])
class Config:
validate_assignment = True
env_prefix = SOUPJOBS + '_'
env_file_encoding = 'utf-8'
@classmethod
def customise_sources(cls, init_settings, env_settings, file_secret_settings):
return init_settings, env_settings, file_secret_settings, yaml_settings
def yaml_settings(settings_obj: Settings) -> Dict[str, Any]:
try:
with open(settings_obj.CONFIG_FILE, 'r') as f:
config = yaml.safe_load(f)
except FileNotFoundError:
return {}
for section_name in ('target', 'next_link'):
section = config.pop(section_name, {})
for key, value in section.items():
config.setdefault(f'{section_name}_{key}', value)
return config
settings = Settings()