added async-session decorator from own project; changed logging setup; added CLI parameters
This commit is contained in:
parent
36226fc1be
commit
c23c963cd8
@ -1,2 +1,3 @@
|
|||||||
beautifulsoup4
|
beautifulsoup4
|
||||||
aiohttp
|
aiohttp
|
||||||
|
webutils-df
|
@ -23,6 +23,7 @@ python_requires = >=3.8
|
|||||||
install_requires =
|
install_requires =
|
||||||
beautifulsoup4
|
beautifulsoup4
|
||||||
aiohttp
|
aiohttp
|
||||||
|
webutils-df
|
||||||
|
|
||||||
[options.extras_require]
|
[options.extras_require]
|
||||||
tests =
|
tests =
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import logging
|
||||||
import asyncio
|
import asyncio
|
||||||
import csv
|
import csv
|
||||||
import json
|
import json
|
||||||
@ -5,17 +6,19 @@ from argparse import ArgumentParser
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
from aiohttp import ClientSession
|
from .functions import get_company_financials, ResultDict
|
||||||
|
from .constants import END_DATE, MAIN_LOGGER_NAME
|
||||||
|
|
||||||
from .functions import get_company_financials, ResultDict, log
|
|
||||||
from .constants import END_DATE
|
|
||||||
|
|
||||||
|
log = logging.getLogger(MAIN_LOGGER_NAME)
|
||||||
|
|
||||||
JSON_EXT, CSV_EXT = '.json', '.csv'
|
JSON_EXT, CSV_EXT = '.json', '.csv'
|
||||||
|
|
||||||
TICKER_SYMBOL = 'ticker_symbol'
|
TICKER_SYMBOL = 'ticker_symbol'
|
||||||
QUARTERLY = 'quarterly'
|
QUARTERLY = 'quarterly'
|
||||||
TO_FILE = 'to_file'
|
TO_FILE = 'to_file'
|
||||||
|
JSON_INDENT = 'json_indent'
|
||||||
|
VERBOSE = 'verbose'
|
||||||
|
|
||||||
|
|
||||||
def parse_cli() -> dict:
|
def parse_cli() -> dict:
|
||||||
@ -34,10 +37,35 @@ def parse_cli() -> dict:
|
|||||||
'-f', f'--{TO_FILE.replace("_", "-")}',
|
'-f', f'--{TO_FILE.replace("_", "-")}',
|
||||||
type=Path,
|
type=Path,
|
||||||
help="Writes results to the specified destination file. If omitted results are printed to stdout. "
|
help="Writes results to the specified destination file. If omitted results are printed to stdout. "
|
||||||
|
"Depending on the file name suffix, the output format can be either as CSV or in JSON."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
f'--{JSON_INDENT.replace("_", "-")}',
|
||||||
|
type=int,
|
||||||
|
help="If set to a positive integer and the output format is JSON (default), the resulting JSON document is "
|
||||||
|
"indented accordingly for more readability; if omitted, output is returned in one line."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'-v', f'--{VERBOSE}',
|
||||||
|
action='count',
|
||||||
|
default=0,
|
||||||
|
help="Verbose mode. Reduces the log level and thus prints out more status messages while running. "
|
||||||
|
"Using this flag multiple times increases verbosity further."
|
||||||
)
|
)
|
||||||
return vars(parser.parse_args())
|
return vars(parser.parse_args())
|
||||||
|
|
||||||
|
|
||||||
|
def configure_logging(verbosity: int) -> None:
|
||||||
|
root_logger = logging.getLogger()
|
||||||
|
root_logger.addHandler(logging.StreamHandler())
|
||||||
|
if verbosity > 2:
|
||||||
|
root_logger.setLevel(logging.DEBUG)
|
||||||
|
elif verbosity == 2:
|
||||||
|
root_logger.setLevel(logging.INFO)
|
||||||
|
elif verbosity == 1:
|
||||||
|
root_logger.setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
|
||||||
def write_to_csv(data: Dict[str, ResultDict], file_obj) -> None:
|
def write_to_csv(data: Dict[str, ResultDict], file_obj) -> None:
|
||||||
writer = csv.writer(file_obj)
|
writer = csv.writer(file_obj)
|
||||||
for statement_key, statement_dict in data.items():
|
for statement_key, statement_dict in data.items():
|
||||||
@ -49,14 +77,11 @@ def write_to_csv(data: Dict[str, ResultDict], file_obj) -> None:
|
|||||||
|
|
||||||
async def main() -> None:
|
async def main() -> None:
|
||||||
args = parse_cli()
|
args = parse_cli()
|
||||||
session = ClientSession()
|
configure_logging(args[VERBOSE])
|
||||||
try:
|
data = await get_company_financials(args[TICKER_SYMBOL], quarterly=args[QUARTERLY])
|
||||||
data = await get_company_financials(args[TICKER_SYMBOL], quarterly=args[QUARTERLY], session=session)
|
|
||||||
finally:
|
|
||||||
await session.close()
|
|
||||||
path: Path = args[TO_FILE]
|
path: Path = args[TO_FILE]
|
||||||
if path is None:
|
if path is None:
|
||||||
print(json.dumps(data, indent=2))
|
print(json.dumps(data, indent=args[JSON_INDENT]))
|
||||||
return
|
return
|
||||||
with open(path, 'w') as f:
|
with open(path, 'w') as f:
|
||||||
if path.suffix.lower() == CSV_EXT:
|
if path.suffix.lower() == CSV_EXT:
|
||||||
@ -64,7 +89,7 @@ async def main() -> None:
|
|||||||
return
|
return
|
||||||
if not path.suffix.lower() == JSON_EXT:
|
if not path.suffix.lower() == JSON_EXT:
|
||||||
log.warning(f"Extension '{path.suffix}' unknown; using JSON format")
|
log.warning(f"Extension '{path.suffix}' unknown; using JSON format")
|
||||||
json.dump(data, f, indent=2)
|
json.dump(data, f, indent=args[JSON_INDENT])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
LOGGER_NAME = 'mwfin'
|
MAIN_LOGGER_NAME = 'mwfin'
|
||||||
|
|
||||||
HTML_PARSER = 'html.parser'
|
HTML_PARSER = 'html.parser'
|
||||||
DOMAIN = 'www.marketwatch.com'
|
DOMAIN = 'www.marketwatch.com'
|
||||||
|
@ -4,11 +4,12 @@ from typing import Union, List, Dict
|
|||||||
from aiohttp.client import ClientSession
|
from aiohttp.client import ClientSession
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from bs4.element import Tag
|
from bs4.element import Tag
|
||||||
|
from webutils import in_async_session
|
||||||
|
|
||||||
from . import constants
|
from . import constants
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(constants.LOGGER_NAME)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
# The resulting dictionary's keys correspond to the name of the item (row) in the financial statement,
|
# The resulting dictionary's keys correspond to the name of the item (row) in the financial statement,
|
||||||
# while its values will always be tuples with a length corresponding to the number of periods (columns)
|
# while its values will always be tuples with a length corresponding to the number of periods (columns)
|
||||||
@ -17,6 +18,7 @@ log = logging.getLogger(constants.LOGGER_NAME)
|
|||||||
ResultDict = dict[str, Union[tuple[int], tuple[str]]]
|
ResultDict = dict[str, Union[tuple[int], tuple[str]]]
|
||||||
|
|
||||||
|
|
||||||
|
@in_async_session
|
||||||
async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup:
|
async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup:
|
||||||
"""
|
"""
|
||||||
Requests a web page and turns the response text into BeautifulSoup.
|
Requests a web page and turns the response text into BeautifulSoup.
|
||||||
@ -78,6 +80,7 @@ def extract_all_data(soup: BeautifulSoup) -> ResultDict:
|
|||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
@in_async_session
|
||||||
async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly: bool = False,
|
async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly: bool = False,
|
||||||
session: ClientSession = None) -> ResultDict:
|
session: ClientSession = None) -> ResultDict:
|
||||||
"""
|
"""
|
||||||
@ -90,6 +93,7 @@ async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly
|
|||||||
return extract_all_data(soup)
|
return extract_all_data(soup)
|
||||||
|
|
||||||
|
|
||||||
|
@in_async_session
|
||||||
async def get_balance_sheet(*ticker_symbols: str, quarterly: bool = False,
|
async def get_balance_sheet(*ticker_symbols: str, quarterly: bool = False,
|
||||||
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
|
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
|
||||||
"""
|
"""
|
||||||
@ -103,6 +107,7 @@ async def get_balance_sheet(*ticker_symbols: str, quarterly: bool = False,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@in_async_session
|
||||||
async def get_income_statement(*ticker_symbols: str, quarterly: bool = False,
|
async def get_income_statement(*ticker_symbols: str, quarterly: bool = False,
|
||||||
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
|
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
|
||||||
"""
|
"""
|
||||||
@ -116,6 +121,7 @@ async def get_income_statement(*ticker_symbols: str, quarterly: bool = False,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@in_async_session
|
||||||
async def get_cash_flow_statement(*ticker_symbols: str, quarterly: bool = False,
|
async def get_cash_flow_statement(*ticker_symbols: str, quarterly: bool = False,
|
||||||
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
|
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
|
||||||
"""
|
"""
|
||||||
@ -129,6 +135,7 @@ async def get_cash_flow_statement(*ticker_symbols: str, quarterly: bool = False,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@in_async_session
|
||||||
async def get_company_financials(*ticker_symbols: str, quarterly: bool = False,
|
async def get_company_financials(*ticker_symbols: str, quarterly: bool = False,
|
||||||
session: ClientSession = None) -> Union[Dict[str, ResultDict],
|
session: ClientSession = None) -> Union[Dict[str, ResultDict],
|
||||||
Dict[str, Dict[str, ResultDict]]]:
|
Dict[str, Dict[str, ResultDict]]]:
|
||||||
|
Loading…
Reference in New Issue
Block a user