mwfin/src/mwfin/functions.py

158 lines
6.1 KiB
Python

import logging
from typing import Union, List, Dict
from aiohttp.client import ClientSession
from bs4 import BeautifulSoup
from bs4.element import Tag
from webutils import in_async_session
from . import constants
log = logging.getLogger(__name__)
# The resulting dictionary's keys correspond to the name of the item (row) in the financial statement,
# while its values will always be tuples with a length corresponding to the number of periods (columns)
# and elements being the actual numbers, with the exception of the first key-value-pair, which will represent
# the end dates of the reporting periods as strings (either years or quarters).
ResultDict = dict[str, Union[tuple[int], tuple[str]]]
@in_async_session
async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup:
"""
Requests a web page and turns the response text into BeautifulSoup.
"""
async with session.get(url) as response:
html = await response.text()
return BeautifulSoup(html, constants.HTML_PARSER)
def extract_end_dates(soup: BeautifulSoup) -> tuple[str]:
"""
Finds and returns the end dates of the reporting periods as strings (either years or quarters) from the page of a
financial statement.
"""
ths = soup.find('div', attrs={'class': 'financials'}).thead.find_all('th')
return tuple(str(th.string).strip() for th in ths[1:-1])
def is_relevant_table_row(tr: Tag) -> bool:
"""
Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown.
"""
item_name = str(tr.td.div.string).strip()
try:
return constants.FINANCIAL_STATEMENT_ITEMS[item_name]
except KeyError:
log.warning(f"Unknown item name '{item_name}' found in financial statement.")
return False
def find_relevant_table_rows(soup: BeautifulSoup) -> List[Tag]:
"""
Returns the table rows containing the data of interest.
"""
trs = soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr')
return [tr for tr in trs if is_relevant_table_row(tr)]
def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]:
"""
Returns the name of the item displayed in the table row (of a financial statement)
as well as a number for each reporting period.
"""
item_name = str(tr.td.div.string).strip()
data_div = tr.find_all('td')[-1].div.div
values_str: str = data_div.attrs['data-chart-data']
values = tuple(int(float(s if s != '' else 0)) for s in values_str.split(','))
return item_name, values
def extract_all_data(soup: BeautifulSoup) -> ResultDict:
"""
Extracts financials from the page.
"""
output = {constants.END_DATE: extract_end_dates(soup)}
for row in find_relevant_table_rows(soup):
row_data = extract_row_data(row)
output[row_data[0]] = row_data[1]
return output
@in_async_session
async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly: bool = False,
session: ClientSession = None) -> ResultDict:
"""
Returns data from the specified financial statement of the specified company.
"""
url = f'{constants.BASE_URL}/{ticker_symbol}/financials{constants.FIN_STMT_URL_SUFFIX[statement]}'
if quarterly:
url += '/quarter'
soup = await soup_from_url(url, session)
return extract_all_data(soup)
@in_async_session
async def get_balance_sheet(*ticker_symbols: str, quarterly: bool = False,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
"""
Returns data from the balance sheet of the specified company.
"""
if len(ticker_symbols) == 1:
return await _get_financial_statement(constants.BS, ticker_symbols[0], quarterly, session)
return {
sym: await _get_financial_statement(constants.BS, sym, quarterly, session)
for sym in ticker_symbols
}
@in_async_session
async def get_income_statement(*ticker_symbols: str, quarterly: bool = False,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
"""
Returns data from the income statement of the specified company.
"""
if len(ticker_symbols) == 1:
return await _get_financial_statement(constants.IS, ticker_symbols[0], quarterly, session)
return {
sym: await _get_financial_statement(constants.IS, sym, quarterly, session)
for sym in ticker_symbols
}
@in_async_session
async def get_cash_flow_statement(*ticker_symbols: str, quarterly: bool = False,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
"""
Returns data from the cash flow statement of the specified company.
"""
if len(ticker_symbols) == 1:
return await _get_financial_statement(constants.CF, ticker_symbols[0], quarterly, session)
return {
sym: await _get_financial_statement(constants.CF, sym, quarterly, session)
for sym in ticker_symbols
}
@in_async_session
async def get_company_financials(*ticker_symbols: str, quarterly: bool = False,
session: ClientSession = None) -> Union[Dict[str, ResultDict],
Dict[str, Dict[str, ResultDict]]]:
"""
Returns all fundamentals (balance sheet, income statement and cash flow statement) of the specified company.
"""
if len(ticker_symbols) == 1:
return {
constants.BS: await get_balance_sheet(ticker_symbols[0], quarterly=quarterly, session=session),
constants.IS: await get_income_statement(ticker_symbols[0], quarterly=quarterly, session=session),
constants.CF: await get_cash_flow_statement(ticker_symbols[0], quarterly=quarterly, session=session)
}
return {
sym: {
constants.BS: await get_balance_sheet(sym, quarterly=quarterly, session=session),
constants.IS: await get_income_statement(sym, quarterly=quarterly, session=session),
constants.CF: await get_cash_flow_statement(sym, quarterly=quarterly, session=session)
} for sym in ticker_symbols
}