166 lines
7.3 KiB
Python
166 lines
7.3 KiB
Python
import logging
|
|
import asyncio
|
|
from typing import Union, List, Dict
|
|
|
|
from aiohttp.client import ClientSession
|
|
from bs4 import BeautifulSoup
|
|
from bs4.element import Tag
|
|
from webutils import in_async_session, gather_in_batches
|
|
|
|
from .constants import (HTML_PARSER, BASE_URL, END_DATE, BS, IS, CF, FIN_STMT_URL_SUFFIX, FIN_STMT_ITEMS,
|
|
DEFAULT_CONCURRENT_BATCH_SIZE)
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
# The resulting dictionary's keys correspond to the name of the item (row) in the financial statement,
|
|
# while its values will always be tuples with a length corresponding to the number of periods (columns)
|
|
# and elements being the actual numbers, with the exception of the first key-value-pair, which will represent
|
|
# the end dates of the reporting periods as strings (either years or quarters).
|
|
ResultDict = dict[str, Union[tuple[int], tuple[str]]]
|
|
|
|
|
|
@in_async_session
|
|
async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup:
|
|
"""
|
|
Requests a web page and turns the response text into BeautifulSoup.
|
|
"""
|
|
async with session.get(url) as response:
|
|
html = await response.text()
|
|
return BeautifulSoup(html, HTML_PARSER)
|
|
|
|
|
|
def extract_end_dates(soup: BeautifulSoup) -> tuple[str]:
|
|
"""
|
|
Finds and returns the end dates of the reporting periods as strings (either years or quarters) from the page of a
|
|
financial statement.
|
|
"""
|
|
ths = soup.find('div', attrs={'class': 'financials'}).thead.find_all('th')
|
|
return tuple(str(th.string).strip() for th in ths[1:-1])
|
|
|
|
|
|
def is_relevant_table_row(tr: Tag) -> bool:
|
|
"""
|
|
Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown.
|
|
"""
|
|
item_name = str(tr.td.div.string).strip()
|
|
try:
|
|
return FIN_STMT_ITEMS[item_name]
|
|
except KeyError:
|
|
log.warning(f"Unknown item name '{item_name}' found in financial statement.")
|
|
return False
|
|
|
|
|
|
def find_relevant_table_rows(soup: BeautifulSoup) -> List[Tag]:
|
|
"""
|
|
Returns the table rows containing the data of interest.
|
|
"""
|
|
trs = soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr')
|
|
return [tr for tr in trs if is_relevant_table_row(tr)]
|
|
|
|
|
|
def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]:
|
|
"""
|
|
Returns the name of the item displayed in the table row (of a financial statement)
|
|
as well as a number for each reporting period.
|
|
"""
|
|
item_name = str(tr.td.div.string).strip()
|
|
data_div = tr.find_all('td')[-1].div.div
|
|
values_str: str = data_div.attrs['data-chart-data']
|
|
values = tuple(int(float(s if s != '' else 0)) for s in values_str.split(','))
|
|
return item_name, values
|
|
|
|
|
|
def extract_all_data(soup: BeautifulSoup) -> ResultDict:
|
|
"""
|
|
Extracts financials from the page.
|
|
"""
|
|
output = {END_DATE: extract_end_dates(soup)}
|
|
for row in find_relevant_table_rows(soup):
|
|
row_data = extract_row_data(row)
|
|
output[row_data[0]] = row_data[1]
|
|
return output
|
|
|
|
|
|
@in_async_session
|
|
async def _get_single_company_fin_stmt(statement: str, ticker_symbol: str, quarterly: bool = False,
|
|
session: ClientSession = None) -> ResultDict:
|
|
"""
|
|
Returns data from the specified financial statement of the specified company.
|
|
"""
|
|
url = f'{BASE_URL}/{ticker_symbol}/financials{FIN_STMT_URL_SUFFIX[statement]}'
|
|
if quarterly:
|
|
url += '/quarter'
|
|
soup = await soup_from_url(url, session)
|
|
return extract_all_data(soup)
|
|
|
|
|
|
@in_async_session
|
|
async def _get_multi_companies_fin_stmt(statement: str, *ticker_symbols: str, quarterly: bool = False,
|
|
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
|
|
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
|
|
if len(ticker_symbols) == 1:
|
|
return await _get_single_company_fin_stmt(statement, ticker_symbols[0], quarterly, session)
|
|
coroutines = (_get_single_company_fin_stmt(statement, symbol, quarterly, session) for symbol in ticker_symbols)
|
|
result_list = await gather_in_batches(concurrent_batch_size, *coroutines)
|
|
return {symbol: data for symbol, data in zip(ticker_symbols, result_list)}
|
|
|
|
|
|
@in_async_session
|
|
async def get_balance_sheet(*ticker_symbols: str, quarterly: bool = False,
|
|
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
|
|
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
|
|
"""
|
|
Returns data from the balance sheet of the specified company.
|
|
"""
|
|
return await _get_multi_companies_fin_stmt(BS, *ticker_symbols,
|
|
quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
|
|
session=session)
|
|
|
|
|
|
@in_async_session
|
|
async def get_income_statement(*ticker_symbols: str, quarterly: bool = False,
|
|
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
|
|
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
|
|
"""
|
|
Returns data from the income statement of the specified company.
|
|
"""
|
|
return await _get_multi_companies_fin_stmt(IS, *ticker_symbols,
|
|
quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
|
|
session=session)
|
|
|
|
|
|
@in_async_session
|
|
async def get_cash_flow_statement(*ticker_symbols: str, quarterly: bool = False,
|
|
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
|
|
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
|
|
"""
|
|
Returns data from the cash flow statement of the specified company.
|
|
"""
|
|
return await _get_multi_companies_fin_stmt(CF, *ticker_symbols,
|
|
quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
|
|
session=session)
|
|
|
|
|
|
@in_async_session
|
|
async def _get_single_company_all_financials(ticker_symbol: str, quarterly: bool = False,
|
|
session: ClientSession = None) -> Dict[str, ResultDict]:
|
|
coroutines = (_get_single_company_fin_stmt(stmt, ticker_symbol, quarterly, session) for stmt in (BS, IS, CF))
|
|
results = await asyncio.gather(*coroutines)
|
|
return {stmt: data for stmt, data in zip((BS, IS, CF), results)}
|
|
|
|
|
|
@in_async_session
|
|
async def get_all_financials(*ticker_symbols: str, quarterly: bool = False,
|
|
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
|
|
session: ClientSession = None) -> Union[Dict[str, ResultDict],
|
|
Dict[str, Dict[str, ResultDict]]]:
|
|
"""
|
|
Returns all fundamentals (balance sheet, income statement and cash flow statement) of the specified company.
|
|
"""
|
|
if len(ticker_symbols) == 1:
|
|
return await _get_single_company_all_financials(ticker_symbols[0], quarterly, session)
|
|
coroutines = (_get_single_company_all_financials(symbol, quarterly, session) for symbol in ticker_symbols)
|
|
result_list = await gather_in_batches(concurrent_batch_size, *coroutines)
|
|
return {symbol: data for symbol, data in zip(ticker_symbols, result_list)}
|