more tests, idea for change

This commit is contained in:
Daniil Fajnberg 2021-11-26 12:47:58 +01:00
parent 1e0410a254
commit 776c909956
3 changed files with 122 additions and 28 deletions

View File

@ -1,3 +1,14 @@
HTML_PARSER = 'html.parser'
DOMAIN = 'www.marketwatch.com'
BASE_URL = f'https://{DOMAIN}/investing/stock'
BS, IS, CF = 'bs', 'is', 'cf'
FIN_STMT_URL_SUFFIX = {
BS: '/balance-sheet',
IS: '',
CF: '/cash-flow'
}
# All items marked `False` do not need to be scraped # All items marked `False` do not need to be scraped
# because they are calculated from other items (e.g. growth or ratios). # because they are calculated from other items (e.g. growth or ratios).
FINANCIAL_STATEMENT_ITEMS = { FINANCIAL_STATEMENT_ITEMS = {

View File

@ -11,8 +11,6 @@ from bs4.element import ResultSet, Tag
# the end dates of the reporting periods as strings (either years or quarters). # the end dates of the reporting periods as strings (either years or quarters).
ResultDict = dict[str, Union[tuple[int], tuple[str]]] ResultDict = dict[str, Union[tuple[int], tuple[str]]]
HTML_PARSER = 'html.parser'
async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup: async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup:
""" """
@ -58,6 +56,14 @@ def extract_all_data(soup: BeautifulSoup) -> ResultDict:
pass pass
async def _get_financial_statement(statement: str, ticker_symbol: str, yearly: bool = True, quarterly: bool = True,
session: ClientSession = None) -> ResultDict:
"""
Returns data from the specified financial statement of the specified company.
"""
pass # TODO: Don't allow both yearly and quarterly, instead only have `quarterly` Flag
async def get_balance_sheet(ticker_symbol: str, yearly: bool = True, quarterly: bool = True, async def get_balance_sheet(ticker_symbol: str, yearly: bool = True, quarterly: bool = True,
session: ClientSession = None) -> ResultDict: session: ClientSession = None) -> ResultDict:
""" """

View File

@ -5,6 +5,7 @@ from unittest.mock import patch, MagicMock, AsyncMock, call
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from mwfin import functions from mwfin import functions
from mwfin.constants import HTML_PARSER, BASE_URL, FIN_STMT_URL_SUFFIX, IS, BS, CF
THIS_DIR = Path(__file__).parent THIS_DIR = Path(__file__).parent
@ -16,23 +17,26 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
# view page source @ line 2055 # view page source @ line 2055
TEST_HTML_FILE_PATH = Path(THIS_DIR, 'test_structure.html') TEST_HTML_FILE_PATH = Path(THIS_DIR, 'test_structure.html')
@classmethod @staticmethod
def setUpClass(cls) -> None: def get_mock_session(response_text: str = None) -> MagicMock:
with open(cls.TEST_HTML_FILE_PATH, 'r') as f:
test_html = f.read()
cls.test_soup = BeautifulSoup(test_html, functions.HTML_PARSER)
@patch.object(functions, 'ClientSession')
async def test_soup_from_url(self, mock_session_cls):
test_html = '<b>foo</b>'
mock_response = MagicMock() mock_response = MagicMock()
mock_response.text = AsyncMock(return_value=test_html) mock_response.text = AsyncMock(return_value=response_text)
mock_get_return = MagicMock() mock_get_return = MagicMock()
mock_get_return.__aenter__ = AsyncMock(return_value=mock_response) mock_get_return.__aenter__ = AsyncMock(return_value=mock_response)
mock_session_obj = MagicMock() mock_session_obj = MagicMock()
mock_session_obj.get = MagicMock(return_value=mock_get_return) mock_session_obj.get = MagicMock(return_value=mock_get_return)
mock_session_cls.return_value = mock_session_obj return mock_session_obj
@classmethod
def setUpClass(cls) -> None:
with open(cls.TEST_HTML_FILE_PATH, 'r') as f:
test_html = f.read()
cls.test_soup = BeautifulSoup(test_html, HTML_PARSER)
@patch.object(functions, 'ClientSession')
async def test_soup_from_url(self, mock_session_cls):
test_html = '<b>foo</b>'
mock_session_cls.return_value = mock_session_obj = self.get_mock_session(test_html)
expected_output = BeautifulSoup(test_html, 'html.parser') expected_output = BeautifulSoup(test_html, 'html.parser')
output = await functions.soup_from_url('baz') output = await functions.soup_from_url('baz')
self.assertEqual(expected_output, output) self.assertEqual(expected_output, output)
@ -46,10 +50,10 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
def test_is_relevant_table_row(self): def test_is_relevant_table_row(self):
test_html = '<tr><td><div> Cash & Short Term Investments </div></td></tr>' test_html = '<tr><td><div> Cash & Short Term Investments </div></td></tr>'
test_soup = BeautifulSoup(test_html, functions.HTML_PARSER) test_soup = BeautifulSoup(test_html, HTML_PARSER)
self.assertTrue(functions.is_relevant_table_row(test_soup.tr)) self.assertTrue(functions.is_relevant_table_row(test_soup.tr))
test_html = '<tr><td><div> Cash & Short Term Investments Growth </div></td></tr>' test_html = '<tr><td><div> Cash & Short Term Investments Growth </div></td></tr>'
test_soup = BeautifulSoup(test_html, functions.HTML_PARSER) test_soup = BeautifulSoup(test_html, HTML_PARSER)
self.assertFalse(functions.is_relevant_table_row(test_soup.tr)) self.assertFalse(functions.is_relevant_table_row(test_soup.tr))
@patch.object(functions, 'is_relevant_table_row') @patch.object(functions, 'is_relevant_table_row')
@ -62,24 +66,97 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
def test_extract_row_data(self): def test_extract_row_data(self):
test_table = self.test_soup.find('div', attrs={'class': 'financials'}).div.div.table test_table = self.test_soup.find('div', attrs={'class': 'financials'}).div.div.table
expected_output = ('Item_1', (11000000.0, -22000000.0)) expected_output = ('Item_1', (11000000, -22000000))
output = functions.extract_row_data(test_table.tbody.tr) output = functions.extract_row_data(test_table.tbody.tr)
# print(test_table.tbody.tr) # debug
self.assertTupleEqual(expected_output, output) self.assertTupleEqual(expected_output, output)
def test_extract_all_data(self): @patch.object(functions, 'extract_row_data')
expected_output = {'Item_1': (11000000.0, -22000000.0)} @patch.object(functions, 'find_relevant_table_rows')
output = functions.extract_row_data(self.test_soup) @patch.object(functions, 'extract_end_dates')
def test_extract_all_data(self, mock_extract_end_dates, mock_find_relevant_table_rows, mock_extract_row_data):
test_end_dates = ('foo', 'bar')
mock_extract_end_dates.return_value = test_end_dates
test_relevant_rows = ['tr1', 'tr2']
mock_find_relevant_table_rows.return_value = test_relevant_rows
test_row_data = ('item_name', (123, 456))
mock_extract_row_data.return_value = test_row_data
expected_output = {
None: test_end_dates,
test_row_data[0]: test_row_data[1],
test_row_data[0]: test_row_data[1],
}
output = functions.extract_all_data(self.test_soup)
self.assertDictEqual(expected_output, output) self.assertDictEqual(expected_output, output)
mock_extract_end_dates.assert_called_once_with(self.test_soup)
mock_find_relevant_table_rows.assert_called_once_with(self.test_soup)
mock_extract_row_data.assert_has_calls([call(test_relevant_rows[0]), call(test_relevant_rows[1])])
async def test_get_balance_sheet(self): @patch.object(functions, 'extract_all_data')
pass @patch.object(functions, 'soup_from_url')
async def test__get_financial_statement(self, mock_soup_from_url, mock_extract_all_data):
# TODO: separate dictionaries for different periods?
mock_session = MagicMock()
test_ticker = 'bar'
test_url = f'{BASE_URL}/{test_ticker}/financials{FIN_STMT_URL_SUFFIX[BS]}'
mock_soup_from_url.return_value = mock_soup = MagicMock()
mock_extract_all_data.return_value = mock_data = {'foo': 'bar'}
async def test_get_income_statement(self): yearly, quarterly = False, False
pass expected_output = {}
output = await functions._get_financial_statement(BS, test_ticker, yearly, quarterly, mock_session)
self.assertDictEqual(expected_output, output)
mock_soup_from_url.assert_not_called()
mock_extract_all_data.assert_not_called()
async def test_get_cash_flow_statement(self): yearly = True
pass expected_output = mock_data
output = await functions._get_financial_statement(BS, test_ticker, yearly, quarterly, mock_session)
self.assertDictEqual(expected_output, output)
mock_soup_from_url.assert_called_once_with(test_url, mock_session)
mock_extract_all_data.assert_called_once_with(mock_soup)
mock_soup_from_url.reset_mock()
mock_extract_all_data.reset_mock()
async def test_get_company_financials(self): quarterly = True
pass output = await functions._get_financial_statement(BS, test_ticker, yearly, quarterly, mock_session)
self.assertDictEqual(expected_output, output)
mock_soup_from_url.assert_has_calls([
call(test_url, mock_session),
call(test_url + '/quarter', mock_session),
])
mock_extract_all_data.assert_has_calls([call(mock_soup), call(mock_soup)])
@patch.object(functions, '_get_financial_statement')
async def test_get_balance_sheet(self, mock__get_financial_statement):
symbol, yearly, quarterly, mock_session = 'foo', True, False, MagicMock()
mock__get_financial_statement.return_value = expected_output = 'bar'
output = functions.get_balance_sheet(symbol, yearly, quarterly, mock_session)
self.assertEqual(expected_output, output)
mock__get_financial_statement.assert_called_once_with(BS, symbol, yearly, quarterly, mock_session)
@patch.object(functions, '_get_financial_statement')
async def test_get_income_statement(self, mock__get_financial_statement):
symbol, yearly, quarterly, mock_session = 'foo', True, False, MagicMock()
mock__get_financial_statement.return_value = expected_output = 'bar'
output = functions.get_income_statement(symbol, yearly, quarterly, mock_session)
self.assertEqual(expected_output, output)
mock__get_financial_statement.assert_called_once_with(IS, symbol, yearly, quarterly, mock_session)
@patch.object(functions, '_get_financial_statement')
async def test_get_cash_flow_statement(self, mock__get_financial_statement):
symbol, yearly, quarterly, mock_session = 'foo', True, False, MagicMock()
mock__get_financial_statement.return_value = expected_output = 'bar'
output = functions.get_cash_flow_statement(symbol, yearly, quarterly, mock_session)
self.assertEqual(expected_output, output)
mock__get_financial_statement.assert_called_once_with(CF, symbol, yearly, quarterly, mock_session)
@patch.object(functions, 'get_cash_flow_statement')
@patch.object(functions, 'get_income_statement')
@patch.object(functions, 'get_balance_sheet')
async def test_get_company_financials(self, mock_get_bs, mock_get_is, mock_get_cf):
symbol, yearly, quarterly, mock_session = 'foo', True, False, MagicMock()
mock_get_bs.return_value = {'a': 1}
mock_get_is.return_value = {'b': 2}
mock_get_cf.return_value = {'c': 3}
expected_output = {'a': 1, 'b': 2, 'c': 3}
# TODO: unfinished