added async-session decorator from own project; changed logging setup; added CLI parameters

This commit is contained in:
Daniil Fajnberg 2021-11-30 12:46:48 +01:00
parent 36226fc1be
commit c23c963cd8
5 changed files with 48 additions and 14 deletions

View File

@ -1,2 +1,3 @@
beautifulsoup4 beautifulsoup4
aiohttp aiohttp
webutils-df

View File

@ -23,6 +23,7 @@ python_requires = >=3.8
install_requires = install_requires =
beautifulsoup4 beautifulsoup4
aiohttp aiohttp
webutils-df
[options.extras_require] [options.extras_require]
tests = tests =

View File

@ -1,3 +1,4 @@
import logging
import asyncio import asyncio
import csv import csv
import json import json
@ -5,17 +6,19 @@ from argparse import ArgumentParser
from pathlib import Path from pathlib import Path
from typing import Dict from typing import Dict
from aiohttp import ClientSession from .functions import get_company_financials, ResultDict
from .constants import END_DATE, MAIN_LOGGER_NAME
from .functions import get_company_financials, ResultDict, log
from .constants import END_DATE
log = logging.getLogger(MAIN_LOGGER_NAME)
JSON_EXT, CSV_EXT = '.json', '.csv' JSON_EXT, CSV_EXT = '.json', '.csv'
TICKER_SYMBOL = 'ticker_symbol' TICKER_SYMBOL = 'ticker_symbol'
QUARTERLY = 'quarterly' QUARTERLY = 'quarterly'
TO_FILE = 'to_file' TO_FILE = 'to_file'
JSON_INDENT = 'json_indent'
VERBOSE = 'verbose'
def parse_cli() -> dict: def parse_cli() -> dict:
@ -33,11 +36,36 @@ def parse_cli() -> dict:
parser.add_argument( parser.add_argument(
'-f', f'--{TO_FILE.replace("_", "-")}', '-f', f'--{TO_FILE.replace("_", "-")}',
type=Path, type=Path,
help="Writes results to the specified destination file. If omitted results are printed to stdout." help="Writes results to the specified destination file. If omitted results are printed to stdout. "
"Depending on the file name suffix, the output format can be either as CSV or in JSON."
)
parser.add_argument(
f'--{JSON_INDENT.replace("_", "-")}',
type=int,
help="If set to a positive integer and the output format is JSON (default), the resulting JSON document is "
"indented accordingly for more readability; if omitted, output is returned in one line."
)
parser.add_argument(
'-v', f'--{VERBOSE}',
action='count',
default=0,
help="Verbose mode. Reduces the log level and thus prints out more status messages while running. "
"Using this flag multiple times increases verbosity further."
) )
return vars(parser.parse_args()) return vars(parser.parse_args())
def configure_logging(verbosity: int) -> None:
root_logger = logging.getLogger()
root_logger.addHandler(logging.StreamHandler())
if verbosity > 2:
root_logger.setLevel(logging.DEBUG)
elif verbosity == 2:
root_logger.setLevel(logging.INFO)
elif verbosity == 1:
root_logger.setLevel(logging.WARNING)
def write_to_csv(data: Dict[str, ResultDict], file_obj) -> None: def write_to_csv(data: Dict[str, ResultDict], file_obj) -> None:
writer = csv.writer(file_obj) writer = csv.writer(file_obj)
for statement_key, statement_dict in data.items(): for statement_key, statement_dict in data.items():
@ -49,14 +77,11 @@ def write_to_csv(data: Dict[str, ResultDict], file_obj) -> None:
async def main() -> None: async def main() -> None:
args = parse_cli() args = parse_cli()
session = ClientSession() configure_logging(args[VERBOSE])
try: data = await get_company_financials(args[TICKER_SYMBOL], quarterly=args[QUARTERLY])
data = await get_company_financials(args[TICKER_SYMBOL], quarterly=args[QUARTERLY], session=session)
finally:
await session.close()
path: Path = args[TO_FILE] path: Path = args[TO_FILE]
if path is None: if path is None:
print(json.dumps(data, indent=2)) print(json.dumps(data, indent=args[JSON_INDENT]))
return return
with open(path, 'w') as f: with open(path, 'w') as f:
if path.suffix.lower() == CSV_EXT: if path.suffix.lower() == CSV_EXT:
@ -64,7 +89,7 @@ async def main() -> None:
return return
if not path.suffix.lower() == JSON_EXT: if not path.suffix.lower() == JSON_EXT:
log.warning(f"Extension '{path.suffix}' unknown; using JSON format") log.warning(f"Extension '{path.suffix}' unknown; using JSON format")
json.dump(data, f, indent=2) json.dump(data, f, indent=args[JSON_INDENT])
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -1,4 +1,4 @@
LOGGER_NAME = 'mwfin' MAIN_LOGGER_NAME = 'mwfin'
HTML_PARSER = 'html.parser' HTML_PARSER = 'html.parser'
DOMAIN = 'www.marketwatch.com' DOMAIN = 'www.marketwatch.com'

View File

@ -4,11 +4,12 @@ from typing import Union, List, Dict
from aiohttp.client import ClientSession from aiohttp.client import ClientSession
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from bs4.element import Tag from bs4.element import Tag
from webutils import in_async_session
from . import constants from . import constants
log = logging.getLogger(constants.LOGGER_NAME) log = logging.getLogger(__name__)
# The resulting dictionary's keys correspond to the name of the item (row) in the financial statement, # The resulting dictionary's keys correspond to the name of the item (row) in the financial statement,
# while its values will always be tuples with a length corresponding to the number of periods (columns) # while its values will always be tuples with a length corresponding to the number of periods (columns)
@ -17,6 +18,7 @@ log = logging.getLogger(constants.LOGGER_NAME)
ResultDict = dict[str, Union[tuple[int], tuple[str]]] ResultDict = dict[str, Union[tuple[int], tuple[str]]]
@in_async_session
async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup: async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup:
""" """
Requests a web page and turns the response text into BeautifulSoup. Requests a web page and turns the response text into BeautifulSoup.
@ -78,6 +80,7 @@ def extract_all_data(soup: BeautifulSoup) -> ResultDict:
return output return output
@in_async_session
async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly: bool = False, async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly: bool = False,
session: ClientSession = None) -> ResultDict: session: ClientSession = None) -> ResultDict:
""" """
@ -90,6 +93,7 @@ async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly
return extract_all_data(soup) return extract_all_data(soup)
@in_async_session
async def get_balance_sheet(*ticker_symbols: str, quarterly: bool = False, async def get_balance_sheet(*ticker_symbols: str, quarterly: bool = False,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]: session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
""" """
@ -103,6 +107,7 @@ async def get_balance_sheet(*ticker_symbols: str, quarterly: bool = False,
} }
@in_async_session
async def get_income_statement(*ticker_symbols: str, quarterly: bool = False, async def get_income_statement(*ticker_symbols: str, quarterly: bool = False,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]: session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
""" """
@ -116,6 +121,7 @@ async def get_income_statement(*ticker_symbols: str, quarterly: bool = False,
} }
@in_async_session
async def get_cash_flow_statement(*ticker_symbols: str, quarterly: bool = False, async def get_cash_flow_statement(*ticker_symbols: str, quarterly: bool = False,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]: session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
""" """
@ -129,6 +135,7 @@ async def get_cash_flow_statement(*ticker_symbols: str, quarterly: bool = False,
} }
@in_async_session
async def get_company_financials(*ticker_symbols: str, quarterly: bool = False, async def get_company_financials(*ticker_symbols: str, quarterly: bool = False,
session: ClientSession = None) -> Union[Dict[str, ResultDict], session: ClientSession = None) -> Union[Dict[str, ResultDict],
Dict[str, Dict[str, ResultDict]]]: Dict[str, Dict[str, ResultDict]]]: