minor changes
This commit is contained in:
parent
ab1468788e
commit
6a490715bf
@ -63,7 +63,7 @@ if __name__ == '__main__':
|
||||
exit()
|
||||
if not settings.has_target_filters():
|
||||
warning = "Warning: No filters were set for target tags to scrape. This may return a LOT of data. " \
|
||||
"Are you sure you want to proceed? (y/n)"
|
||||
"Are you sure you want to proceed? (y/n) "
|
||||
proceed = strtobool(input(warning).lower())
|
||||
if not proceed:
|
||||
print("Cancelled")
|
||||
|
@ -10,6 +10,10 @@ from soupjobs import SOUPJOBS, CONFIG_FILE_ENV_VAR, CONFIG_FILE_PLACEHOLDER
|
||||
|
||||
|
||||
TagAttrs = Dict[str, str]
|
||||
StrListOrStr = Union[str, List[str]]
|
||||
OptInt = Optional[int]
|
||||
OptStr = Optional[str]
|
||||
OptPyObj = Optional[PyObject]
|
||||
|
||||
|
||||
class OutputFormat(str, Enum):
|
||||
@ -21,29 +25,29 @@ class OutputFormat(str, Enum):
|
||||
class Settings(BaseSettings):
|
||||
_CONFIG_FILE: Optional[Path] = os.getenv(CONFIG_FILE_ENV_VAR, CONFIG_FILE_PLACEHOLDER)
|
||||
|
||||
entry_urls: Union[str, List[str]] = []
|
||||
entry_urls: StrListOrStr = [] # test
|
||||
regex_mode: bool = False
|
||||
max_depth: int = 0
|
||||
max_pages: Optional[int]
|
||||
max_pages: OptInt
|
||||
output_with_urls: bool = False
|
||||
output_format: OutputFormat = OutputFormat.simple
|
||||
|
||||
# Target to extract:
|
||||
target_tag: Optional[str]
|
||||
target_text: Optional[str]
|
||||
target_tag: OptStr
|
||||
target_text: OptStr
|
||||
target_attrs: TagAttrs = {}
|
||||
target_match_func: Optional[PyObject]
|
||||
target_match_func: OptPyObj
|
||||
target_required: bool = False
|
||||
target_limit: Optional[int]
|
||||
target_limit: OptInt
|
||||
target_extract_text: bool = True
|
||||
target_transform: Optional[PyObject]
|
||||
target_transform: OptPyObj
|
||||
|
||||
# Link to next page:
|
||||
next_link_text: Optional[str]
|
||||
next_link_href: Optional[str]
|
||||
next_link_text: OptStr
|
||||
next_link_href: OptStr
|
||||
next_link_attrs: TagAttrs = {}
|
||||
next_link_match_func: Optional[PyObject]
|
||||
next_link_limit: Optional[int]
|
||||
next_link_match_func: OptPyObj
|
||||
next_link_limit: OptInt
|
||||
next_link_random: bool = True
|
||||
|
||||
def has_target_filters(self) -> bool:
|
||||
|
Loading…
Reference in New Issue
Block a user