minor changes
This commit is contained in:
parent
ab1468788e
commit
6a490715bf
@ -63,7 +63,7 @@ if __name__ == '__main__':
|
|||||||
exit()
|
exit()
|
||||||
if not settings.has_target_filters():
|
if not settings.has_target_filters():
|
||||||
warning = "Warning: No filters were set for target tags to scrape. This may return a LOT of data. " \
|
warning = "Warning: No filters were set for target tags to scrape. This may return a LOT of data. " \
|
||||||
"Are you sure you want to proceed? (y/n)"
|
"Are you sure you want to proceed? (y/n) "
|
||||||
proceed = strtobool(input(warning).lower())
|
proceed = strtobool(input(warning).lower())
|
||||||
if not proceed:
|
if not proceed:
|
||||||
print("Cancelled")
|
print("Cancelled")
|
||||||
|
@ -10,6 +10,10 @@ from soupjobs import SOUPJOBS, CONFIG_FILE_ENV_VAR, CONFIG_FILE_PLACEHOLDER
|
|||||||
|
|
||||||
|
|
||||||
TagAttrs = Dict[str, str]
|
TagAttrs = Dict[str, str]
|
||||||
|
StrListOrStr = Union[str, List[str]]
|
||||||
|
OptInt = Optional[int]
|
||||||
|
OptStr = Optional[str]
|
||||||
|
OptPyObj = Optional[PyObject]
|
||||||
|
|
||||||
|
|
||||||
class OutputFormat(str, Enum):
|
class OutputFormat(str, Enum):
|
||||||
@ -21,29 +25,29 @@ class OutputFormat(str, Enum):
|
|||||||
class Settings(BaseSettings):
|
class Settings(BaseSettings):
|
||||||
_CONFIG_FILE: Optional[Path] = os.getenv(CONFIG_FILE_ENV_VAR, CONFIG_FILE_PLACEHOLDER)
|
_CONFIG_FILE: Optional[Path] = os.getenv(CONFIG_FILE_ENV_VAR, CONFIG_FILE_PLACEHOLDER)
|
||||||
|
|
||||||
entry_urls: Union[str, List[str]] = []
|
entry_urls: StrListOrStr = [] # test
|
||||||
regex_mode: bool = False
|
regex_mode: bool = False
|
||||||
max_depth: int = 0
|
max_depth: int = 0
|
||||||
max_pages: Optional[int]
|
max_pages: OptInt
|
||||||
output_with_urls: bool = False
|
output_with_urls: bool = False
|
||||||
output_format: OutputFormat = OutputFormat.simple
|
output_format: OutputFormat = OutputFormat.simple
|
||||||
|
|
||||||
# Target to extract:
|
# Target to extract:
|
||||||
target_tag: Optional[str]
|
target_tag: OptStr
|
||||||
target_text: Optional[str]
|
target_text: OptStr
|
||||||
target_attrs: TagAttrs = {}
|
target_attrs: TagAttrs = {}
|
||||||
target_match_func: Optional[PyObject]
|
target_match_func: OptPyObj
|
||||||
target_required: bool = False
|
target_required: bool = False
|
||||||
target_limit: Optional[int]
|
target_limit: OptInt
|
||||||
target_extract_text: bool = True
|
target_extract_text: bool = True
|
||||||
target_transform: Optional[PyObject]
|
target_transform: OptPyObj
|
||||||
|
|
||||||
# Link to next page:
|
# Link to next page:
|
||||||
next_link_text: Optional[str]
|
next_link_text: OptStr
|
||||||
next_link_href: Optional[str]
|
next_link_href: OptStr
|
||||||
next_link_attrs: TagAttrs = {}
|
next_link_attrs: TagAttrs = {}
|
||||||
next_link_match_func: Optional[PyObject]
|
next_link_match_func: OptPyObj
|
||||||
next_link_limit: Optional[int]
|
next_link_limit: OptInt
|
||||||
next_link_random: bool = True
|
next_link_random: bool = True
|
||||||
|
|
||||||
def has_target_filters(self) -> bool:
|
def has_target_filters(self) -> bool:
|
||||||
|
Loading…
Reference in New Issue
Block a user