Compare commits

...

3 Commits

3 changed files with 82 additions and 63 deletions

View File

@ -3,6 +3,7 @@ MAIN_LOGGER_NAME = 'mwfin'
HTML_PARSER = 'html.parser' HTML_PARSER = 'html.parser'
DOMAIN = 'www.marketwatch.com' DOMAIN = 'www.marketwatch.com'
BASE_URL = f'https://{DOMAIN}/investing/stock' BASE_URL = f'https://{DOMAIN}/investing/stock'
DEFAULT_CONCURRENT_BATCH_SIZE = 1
BS, IS, CF = 'Balance Sheet', 'Income Statement', 'Cash Flow Statement' BS, IS, CF = 'Balance Sheet', 'Income Statement', 'Cash Flow Statement'
FIN_STMT_URL_SUFFIX = { FIN_STMT_URL_SUFFIX = {

View File

@ -4,7 +4,7 @@ from typing import Union, List, Dict
from aiohttp.client import ClientSession from aiohttp.client import ClientSession
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from bs4.element import Tag from bs4.element import Tag
from webutils import in_async_session from webutils import in_async_session, gather_in_batches
from . import constants from . import constants
@ -81,8 +81,8 @@ def extract_all_data(soup: BeautifulSoup) -> ResultDict:
@in_async_session @in_async_session
async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly: bool = False, async def _get_single_company_fin_stmt(statement: str, ticker_symbol: str, quarterly: bool = False,
session: ClientSession = None) -> ResultDict: session: ClientSession = None) -> ResultDict:
""" """
Returns data from the specified financial statement of the specified company. Returns data from the specified financial statement of the specified company.
""" """
@ -93,55 +93,62 @@ async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly
return extract_all_data(soup) return extract_all_data(soup)
@in_async_session
async def _get_multi_companies_fin_stmt(statement: str, *ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = constants.DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
if len(ticker_symbols) == 1:
return await _get_single_company_fin_stmt(statement, ticker_symbols[0], quarterly, session)
result_list = await gather_in_batches(
concurrent_batch_size,
*(_get_single_company_fin_stmt(statement, symbol, quarterly, session) for symbol in ticker_symbols)
)
return {symbol: data for symbol, data in zip(ticker_symbols, result_list)}
@in_async_session @in_async_session
async def get_balance_sheet(*ticker_symbols: str, quarterly: bool = False, async def get_balance_sheet(*ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = constants.DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]: session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
""" """
Returns data from the balance sheet of the specified company. Returns data from the balance sheet of the specified company.
""" """
if len(ticker_symbols) == 1: return await _get_multi_companies_fin_stmt(constants.BS, *ticker_symbols,
return await _get_financial_statement(constants.BS, ticker_symbols[0], quarterly, session) quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
return { session=session)
sym: await _get_financial_statement(constants.BS, sym, quarterly, session)
for sym in ticker_symbols
}
@in_async_session @in_async_session
async def get_income_statement(*ticker_symbols: str, quarterly: bool = False, async def get_income_statement(*ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = constants.DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]: session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
""" """
Returns data from the income statement of the specified company. Returns data from the income statement of the specified company.
""" """
if len(ticker_symbols) == 1: return await _get_multi_companies_fin_stmt(constants.IS, *ticker_symbols,
return await _get_financial_statement(constants.IS, ticker_symbols[0], quarterly, session) quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
return { session=session)
sym: await _get_financial_statement(constants.IS, sym, quarterly, session)
for sym in ticker_symbols
}
@in_async_session @in_async_session
async def get_cash_flow_statement(*ticker_symbols: str, quarterly: bool = False, async def get_cash_flow_statement(*ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = constants.DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]: session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
""" """
Returns data from the cash flow statement of the specified company. Returns data from the cash flow statement of the specified company.
""" """
if len(ticker_symbols) == 1: return await _get_multi_companies_fin_stmt(constants.CF, *ticker_symbols,
return await _get_financial_statement(constants.CF, ticker_symbols[0], quarterly, session) quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
return { session=session)
sym: await _get_financial_statement(constants.CF, sym, quarterly, session)
for sym in ticker_symbols
}
@in_async_session @in_async_session
async def _get_single_company_financials(ticker_symbol: str, quarterly: bool = False, async def _get_single_company_financials(ticker_symbol: str, quarterly: bool = False,
session: ClientSession = None) -> Dict[str, ResultDict]: session: ClientSession = None) -> Dict[str, ResultDict]:
return { return {
constants.BS: await _get_financial_statement(constants.BS, ticker_symbol, quarterly, session), constants.BS: await _get_single_company_fin_stmt(constants.BS, ticker_symbol, quarterly, session),
constants.IS: await _get_financial_statement(constants.IS, ticker_symbol, quarterly, session), constants.IS: await _get_single_company_fin_stmt(constants.IS, ticker_symbol, quarterly, session),
constants.CF: await _get_financial_statement(constants.CF, ticker_symbol, quarterly, session) constants.CF: await _get_single_company_fin_stmt(constants.CF, ticker_symbol, quarterly, session)
} }

View File

@ -101,7 +101,7 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
@patch.object(functions, 'extract_all_data') @patch.object(functions, 'extract_all_data')
@patch.object(functions, 'soup_from_url') @patch.object(functions, 'soup_from_url')
async def test__get_financial_statement(self, mock_soup_from_url, mock_extract_all_data): async def test__get_single_company_fin_stmt(self, mock_soup_from_url, mock_extract_all_data):
mock_session = MagicMock() mock_session = MagicMock()
test_ticker, statement = 'bar', BS test_ticker, statement = 'bar', BS
test_url = f'{BASE_URL}/{test_ticker}/financials{FIN_STMT_URL_SUFFIX[statement]}' test_url = f'{BASE_URL}/{test_ticker}/financials{FIN_STMT_URL_SUFFIX[statement]}'
@ -109,7 +109,7 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
mock_extract_all_data.return_value = expected_output = {'foo': 'bar'} mock_extract_all_data.return_value = expected_output = {'foo': 'bar'}
quarterly = False quarterly = False
output = await functions._get_financial_statement(statement, test_ticker, quarterly, mock_session) output = await functions._get_single_company_fin_stmt(statement, test_ticker, quarterly, mock_session)
self.assertDictEqual(expected_output, output) self.assertDictEqual(expected_output, output)
mock_soup_from_url.assert_called_once_with(test_url, mock_session) mock_soup_from_url.assert_called_once_with(test_url, mock_session)
mock_extract_all_data.assert_called_once_with(mock_soup) mock_extract_all_data.assert_called_once_with(mock_soup)
@ -117,56 +117,67 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
mock_extract_all_data.reset_mock() mock_extract_all_data.reset_mock()
quarterly = True quarterly = True
output = await functions._get_financial_statement(statement, test_ticker, quarterly, mock_session) output = await functions._get_single_company_fin_stmt(statement, test_ticker, quarterly, mock_session)
self.assertDictEqual(expected_output, output) self.assertDictEqual(expected_output, output)
mock_soup_from_url.assert_called_once_with(test_url + '/quarter', mock_session) mock_soup_from_url.assert_called_once_with(test_url + '/quarter', mock_session)
mock_extract_all_data.assert_called_once_with(mock_soup) mock_extract_all_data.assert_called_once_with(mock_soup)
async def _helper_test_get_any_statement(self, statement: str, mock__get_financial_statement): @patch.object(functions, '_get_single_company_fin_stmt')
symbol, quarterly, mock_session = 'foo', False, MagicMock() async def test__get_multi_companies_fin_stmt(self, mock__get_single_company_fin_stmt):
mock__get_financial_statement.return_value = expected_output = 'bar' statement, sym1, sym2, quarterly, mock_session = 'xyz', 'foo', 'bar', False, MagicMock()
if statement == BS: mock__get_single_company_fin_stmt.return_value = expected_output = 'baz'
output = await functions._get_multi_companies_fin_stmt(statement, sym1,
quarterly=quarterly, session=mock_session)
self.assertEqual(expected_output, output)
mock__get_single_company_fin_stmt.assert_called_once_with(statement, sym1, quarterly, mock_session)
mock__get_single_company_fin_stmt.reset_mock()
expected_output = {sym1: expected_output, sym2: expected_output}
output = await functions._get_multi_companies_fin_stmt(statement, sym1, sym2,
quarterly=quarterly, session=mock_session)
self.assertDictEqual(expected_output, output)
mock__get_single_company_fin_stmt.assert_has_calls([
call(statement, sym1, quarterly, mock_session),
call(statement, sym2, quarterly, mock_session)
])
async def _helper_test_get_any_statement(self, stmt: str, mock__get_multi_companies_fin_stmt):
sym1, sym2, quarterly, batch_size, mock_session = 'foo', 'bar', False, 2, MagicMock()
mock__get_multi_companies_fin_stmt.return_value = expected_output = 'baz'
if stmt == BS:
function = functions.get_balance_sheet function = functions.get_balance_sheet
elif statement == IS: elif stmt == IS:
function = functions.get_income_statement function = functions.get_income_statement
elif statement == CF: elif stmt == CF:
function = functions.get_cash_flow_statement function = functions.get_cash_flow_statement
else: else:
raise ValueError raise ValueError
output = await function(symbol, quarterly=quarterly, session=mock_session) output = await function(sym1, sym2, quarterly=quarterly, concurrent_batch_size=batch_size, session=mock_session)
self.assertEqual(expected_output, output) self.assertEqual(expected_output, output)
mock__get_financial_statement.assert_called_once_with(statement, symbol, quarterly, mock_session) mock__get_multi_companies_fin_stmt.assert_called_once_with(
mock__get_financial_statement.reset_mock() stmt, sym1, sym2, quarterly=quarterly, concurrent_batch_size=batch_size, session=mock_session
)
symbol1, symbol2 = 'x', 'y'
expected_output = {symbol1: expected_output, symbol2: expected_output}
output = await function(symbol1, symbol2, quarterly=quarterly, session=mock_session)
self.assertDictEqual(expected_output, output)
mock__get_financial_statement.assert_has_calls([
call(statement, symbol1, quarterly, mock_session),
call(statement, symbol2, quarterly, mock_session),
])
@patch.object(functions, '_get_financial_statement') @patch.object(functions, '_get_multi_companies_fin_stmt')
async def test_get_balance_sheet(self, mock__get_financial_statement): async def test_get_balance_sheet(self, mock__get_multi_companies_fin_stmt):
await self._helper_test_get_any_statement(BS, mock__get_financial_statement) await self._helper_test_get_any_statement(BS, mock__get_multi_companies_fin_stmt)
@patch.object(functions, '_get_financial_statement') @patch.object(functions, '_get_multi_companies_fin_stmt')
async def test_get_income_statement(self, mock__get_financial_statement): async def test_get_income_statement(self, mock__get_multi_companies_fin_stmt):
await self._helper_test_get_any_statement(IS, mock__get_financial_statement) await self._helper_test_get_any_statement(IS, mock__get_multi_companies_fin_stmt)
@patch.object(functions, '_get_financial_statement') @patch.object(functions, '_get_multi_companies_fin_stmt')
async def test_get_cash_flow_statement(self, mock__get_financial_statement): async def test_get_cash_flow_statement(self, mock__get_multi_companies_fin_stmt):
await self._helper_test_get_any_statement(CF, mock__get_financial_statement) await self._helper_test_get_any_statement(CF, mock__get_multi_companies_fin_stmt)
@patch.object(functions, '_get_financial_statement') @patch.object(functions, '_get_single_company_fin_stmt')
async def test__get_single_company_financials(self, mock__get_financial_statement): async def test__get_single_company_financials(self, mock__get_single_company_fin_stmt):
symbol, quarterly, mock_session = 'foo', False, MagicMock() symbol, quarterly, mock_session = 'foo', False, MagicMock()
mock__get_financial_statement.return_value = bar = 'bar' mock__get_single_company_fin_stmt.return_value = bar = 'bar'
expected_output = {BS: bar, IS: bar, CF: bar} expected_output = {BS: bar, IS: bar, CF: bar}
output = await functions._get_single_company_financials(symbol, quarterly, mock_session) output = await functions._get_single_company_financials(symbol, quarterly, mock_session)
self.assertDictEqual(expected_output, output) self.assertDictEqual(expected_output, output)
mock__get_financial_statement.assert_has_calls([ mock__get_single_company_fin_stmt.assert_has_calls([
call(BS, symbol, quarterly, mock_session), call(BS, symbol, quarterly, mock_session),
call(IS, symbol, quarterly, mock_session), call(IS, symbol, quarterly, mock_session),
call(CF, symbol, quarterly, mock_session) call(CF, symbol, quarterly, mock_session)
@ -181,14 +192,14 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
mock__get_single_company_financials.assert_called_once_with(symbol, quarterly, mock_session) mock__get_single_company_financials.assert_called_once_with(symbol, quarterly, mock_session)
mock__get_single_company_financials.reset_mock() mock__get_single_company_financials.reset_mock()
test_symbol1, test_symbol2 = 'x', 'y' test_sym1, test_sym2 = 'x', 'y'
expected_output = {test_symbol1: expected_output, test_symbol2: expected_output} expected_output = {test_sym1: expected_output, test_sym2: expected_output}
output = await functions.get_company_financials(test_symbol1, test_symbol2, output = await functions.get_company_financials(test_sym1, test_sym2,
quarterly=quarterly, session=mock_session) quarterly=quarterly, session=mock_session)
self.assertDictEqual(expected_output, output) self.assertDictEqual(expected_output, output)
mock__get_single_company_financials.assert_has_calls([ mock__get_single_company_financials.assert_has_calls([
call(test_symbol1, quarterly, mock_session), call(test_sym1, quarterly, mock_session),
call(test_symbol2, quarterly, mock_session) call(test_sym2, quarterly, mock_session)
]) ])
@patch.object(functions, 'ClientSession') @patch.object(functions, 'ClientSession')