from argparse import ArgumentParser, SUPPRESS
from pathlib import Path
from distutils.util import strtobool
import asyncio
import os

from soupjobs import CONFIG_FILE_ENV_VAR, CONFIG_FILE_DEFAULT_NAME


if __name__ == '__main__':
    parser = ArgumentParser(
        argument_default=SUPPRESS,
        description="CLI tool for starting scraping jobs. "
                    "The provided settings always override environment variables and those in the config file. "
                    "For more detailed descriptions of options refer to the example config file."
    )
    parser.add_argument(
        '-c', '--config-file',
        type=Path,
        default=Path(Path.cwd(), CONFIG_FILE_DEFAULT_NAME),
        help="Specify a different config file path. Defaults to 'config.yaml' in the current working directory."
    )
    parser.add_argument(
        'entry_urls',
        type=str,
        nargs='*',
        help="Can be a url, list of urls or path to a text file with urls"
    )
    parser.add_argument(
        '-r', '--regex-mode',
        action='store_true',
        help="Set this flag to treat all filter strings as regular expressions"
    )
    parser.add_argument(
        '-d', '--max-depth',
        type=int,
        help="Maximum recursion depth for following matching links to other pages"
    )
    parser.add_argument(
        '-p', '--max-pages',
        type=int,
        help="Maximum number of pages to visit"
    )
    parser.add_argument(
        '--output-with-urls',
        action='store_true',
        help="Set this flag to map scraped results to the url of the page they were found on"
    )
    parser.add_argument(
        '-o', '--output-format',
        type=str,
        help="Set to either 'yaml', 'json' or 'simple' format"
    )
    kwargs = vars(parser.parse_args())
    os.environ[CONFIG_FILE_ENV_VAR] = str(kwargs.pop('config_file'))
    from soupjobs.settings import settings
    from soupjobs.scrape import Job
    for key, value in kwargs.items():
        print(key, value, type(value))
        setattr(settings, key, value)
    if not settings.entry_urls:
        print("No urls specified")
        exit()
    if not settings.has_target_filters():
        warning = "Warning: No filters were set for target tags to scrape. This may return a LOT of data. " \
                  "Are you sure you want to proceed? (y/n)"
        proceed = strtobool(input(warning).lower())
        if not proceed:
            print("Cancelled")
            exit()
    asyncio.run(Job().start())