minor changes

This commit is contained in:
Daniil Fajnberg 2021-07-25 16:30:16 +02:00
parent ab1468788e
commit 6a490715bf
2 changed files with 16 additions and 12 deletions

View File

@ -63,7 +63,7 @@ if __name__ == '__main__':
exit()
if not settings.has_target_filters():
warning = "Warning: No filters were set for target tags to scrape. This may return a LOT of data. " \
"Are you sure you want to proceed? (y/n)"
"Are you sure you want to proceed? (y/n) "
proceed = strtobool(input(warning).lower())
if not proceed:
print("Cancelled")

View File

@ -10,6 +10,10 @@ from soupjobs import SOUPJOBS, CONFIG_FILE_ENV_VAR, CONFIG_FILE_PLACEHOLDER
TagAttrs = Dict[str, str]
StrListOrStr = Union[str, List[str]]
OptInt = Optional[int]
OptStr = Optional[str]
OptPyObj = Optional[PyObject]
class OutputFormat(str, Enum):
@ -21,29 +25,29 @@ class OutputFormat(str, Enum):
class Settings(BaseSettings):
_CONFIG_FILE: Optional[Path] = os.getenv(CONFIG_FILE_ENV_VAR, CONFIG_FILE_PLACEHOLDER)
entry_urls: Union[str, List[str]] = []
entry_urls: StrListOrStr = [] # test
regex_mode: bool = False
max_depth: int = 0
max_pages: Optional[int]
max_pages: OptInt
output_with_urls: bool = False
output_format: OutputFormat = OutputFormat.simple
# Target to extract:
target_tag: Optional[str]
target_text: Optional[str]
target_tag: OptStr
target_text: OptStr
target_attrs: TagAttrs = {}
target_match_func: Optional[PyObject]
target_match_func: OptPyObj
target_required: bool = False
target_limit: Optional[int]
target_limit: OptInt
target_extract_text: bool = True
target_transform: Optional[PyObject]
target_transform: OptPyObj
# Link to next page:
next_link_text: Optional[str]
next_link_href: Optional[str]
next_link_text: OptStr
next_link_href: OptStr
next_link_attrs: TagAttrs = {}
next_link_match_func: Optional[PyObject]
next_link_limit: Optional[int]
next_link_match_func: OptPyObj
next_link_limit: OptInt
next_link_random: bool = True
def has_target_filters(self) -> bool: