diff --git a/src/mwfin/constants.py b/src/mwfin/constants.py index b39a482..9e1f0aa 100644 --- a/src/mwfin/constants.py +++ b/src/mwfin/constants.py @@ -1,3 +1,14 @@ +HTML_PARSER = 'html.parser' +DOMAIN = 'www.marketwatch.com' +BASE_URL = f'https://{DOMAIN}/investing/stock' + +BS, IS, CF = 'bs', 'is', 'cf' +FIN_STMT_URL_SUFFIX = { + BS: '/balance-sheet', + IS: '', + CF: '/cash-flow' +} + # All items marked `False` do not need to be scraped # because they are calculated from other items (e.g. growth or ratios). FINANCIAL_STATEMENT_ITEMS = { diff --git a/src/mwfin/functions.py b/src/mwfin/functions.py index e22133f..59f2ed7 100644 --- a/src/mwfin/functions.py +++ b/src/mwfin/functions.py @@ -11,8 +11,6 @@ from bs4.element import ResultSet, Tag # the end dates of the reporting periods as strings (either years or quarters). ResultDict = dict[str, Union[tuple[int], tuple[str]]] -HTML_PARSER = 'html.parser' - async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup: """ @@ -58,6 +56,14 @@ def extract_all_data(soup: BeautifulSoup) -> ResultDict: pass +async def _get_financial_statement(statement: str, ticker_symbol: str, yearly: bool = True, quarterly: bool = True, + session: ClientSession = None) -> ResultDict: + """ + Returns data from the specified financial statement of the specified company. + """ + pass # TODO: Don't allow both yearly and quarterly, instead only have `quarterly` Flag + + async def get_balance_sheet(ticker_symbol: str, yearly: bool = True, quarterly: bool = True, session: ClientSession = None) -> ResultDict: """ diff --git a/tests/test_functions.py b/tests/test_functions.py index ad4b223..a466d12 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -5,6 +5,7 @@ from unittest.mock import patch, MagicMock, AsyncMock, call from bs4 import BeautifulSoup from mwfin import functions +from mwfin.constants import HTML_PARSER, BASE_URL, FIN_STMT_URL_SUFFIX, IS, BS, CF THIS_DIR = Path(__file__).parent @@ -16,23 +17,26 @@ class FunctionsTestCase(IsolatedAsyncioTestCase): # view page source @ line 2055 TEST_HTML_FILE_PATH = Path(THIS_DIR, 'test_structure.html') - @classmethod - def setUpClass(cls) -> None: - with open(cls.TEST_HTML_FILE_PATH, 'r') as f: - test_html = f.read() - cls.test_soup = BeautifulSoup(test_html, functions.HTML_PARSER) - - @patch.object(functions, 'ClientSession') - async def test_soup_from_url(self, mock_session_cls): - test_html = 'foo' + @staticmethod + def get_mock_session(response_text: str = None) -> MagicMock: mock_response = MagicMock() - mock_response.text = AsyncMock(return_value=test_html) + mock_response.text = AsyncMock(return_value=response_text) mock_get_return = MagicMock() mock_get_return.__aenter__ = AsyncMock(return_value=mock_response) mock_session_obj = MagicMock() mock_session_obj.get = MagicMock(return_value=mock_get_return) - mock_session_cls.return_value = mock_session_obj + return mock_session_obj + @classmethod + def setUpClass(cls) -> None: + with open(cls.TEST_HTML_FILE_PATH, 'r') as f: + test_html = f.read() + cls.test_soup = BeautifulSoup(test_html, HTML_PARSER) + + @patch.object(functions, 'ClientSession') + async def test_soup_from_url(self, mock_session_cls): + test_html = 'foo' + mock_session_cls.return_value = mock_session_obj = self.get_mock_session(test_html) expected_output = BeautifulSoup(test_html, 'html.parser') output = await functions.soup_from_url('baz') self.assertEqual(expected_output, output) @@ -46,10 +50,10 @@ class FunctionsTestCase(IsolatedAsyncioTestCase): def test_is_relevant_table_row(self): test_html = '
Cash & Short Term Investments
' - test_soup = BeautifulSoup(test_html, functions.HTML_PARSER) + test_soup = BeautifulSoup(test_html, HTML_PARSER) self.assertTrue(functions.is_relevant_table_row(test_soup.tr)) test_html = '
Cash & Short Term Investments Growth
' - test_soup = BeautifulSoup(test_html, functions.HTML_PARSER) + test_soup = BeautifulSoup(test_html, HTML_PARSER) self.assertFalse(functions.is_relevant_table_row(test_soup.tr)) @patch.object(functions, 'is_relevant_table_row') @@ -62,24 +66,97 @@ class FunctionsTestCase(IsolatedAsyncioTestCase): def test_extract_row_data(self): test_table = self.test_soup.find('div', attrs={'class': 'financials'}).div.div.table - expected_output = ('Item_1', (11000000.0, -22000000.0)) + expected_output = ('Item_1', (11000000, -22000000)) output = functions.extract_row_data(test_table.tbody.tr) - # print(test_table.tbody.tr) # debug self.assertTupleEqual(expected_output, output) - def test_extract_all_data(self): - expected_output = {'Item_1': (11000000.0, -22000000.0)} - output = functions.extract_row_data(self.test_soup) + @patch.object(functions, 'extract_row_data') + @patch.object(functions, 'find_relevant_table_rows') + @patch.object(functions, 'extract_end_dates') + def test_extract_all_data(self, mock_extract_end_dates, mock_find_relevant_table_rows, mock_extract_row_data): + test_end_dates = ('foo', 'bar') + mock_extract_end_dates.return_value = test_end_dates + test_relevant_rows = ['tr1', 'tr2'] + mock_find_relevant_table_rows.return_value = test_relevant_rows + test_row_data = ('item_name', (123, 456)) + mock_extract_row_data.return_value = test_row_data + expected_output = { + None: test_end_dates, + test_row_data[0]: test_row_data[1], + test_row_data[0]: test_row_data[1], + } + output = functions.extract_all_data(self.test_soup) self.assertDictEqual(expected_output, output) + mock_extract_end_dates.assert_called_once_with(self.test_soup) + mock_find_relevant_table_rows.assert_called_once_with(self.test_soup) + mock_extract_row_data.assert_has_calls([call(test_relevant_rows[0]), call(test_relevant_rows[1])]) - async def test_get_balance_sheet(self): - pass + @patch.object(functions, 'extract_all_data') + @patch.object(functions, 'soup_from_url') + async def test__get_financial_statement(self, mock_soup_from_url, mock_extract_all_data): + # TODO: separate dictionaries for different periods? + mock_session = MagicMock() + test_ticker = 'bar' + test_url = f'{BASE_URL}/{test_ticker}/financials{FIN_STMT_URL_SUFFIX[BS]}' + mock_soup_from_url.return_value = mock_soup = MagicMock() + mock_extract_all_data.return_value = mock_data = {'foo': 'bar'} - async def test_get_income_statement(self): - pass + yearly, quarterly = False, False + expected_output = {} + output = await functions._get_financial_statement(BS, test_ticker, yearly, quarterly, mock_session) + self.assertDictEqual(expected_output, output) + mock_soup_from_url.assert_not_called() + mock_extract_all_data.assert_not_called() - async def test_get_cash_flow_statement(self): - pass + yearly = True + expected_output = mock_data + output = await functions._get_financial_statement(BS, test_ticker, yearly, quarterly, mock_session) + self.assertDictEqual(expected_output, output) + mock_soup_from_url.assert_called_once_with(test_url, mock_session) + mock_extract_all_data.assert_called_once_with(mock_soup) + mock_soup_from_url.reset_mock() + mock_extract_all_data.reset_mock() - async def test_get_company_financials(self): - pass + quarterly = True + output = await functions._get_financial_statement(BS, test_ticker, yearly, quarterly, mock_session) + self.assertDictEqual(expected_output, output) + mock_soup_from_url.assert_has_calls([ + call(test_url, mock_session), + call(test_url + '/quarter', mock_session), + ]) + mock_extract_all_data.assert_has_calls([call(mock_soup), call(mock_soup)]) + + @patch.object(functions, '_get_financial_statement') + async def test_get_balance_sheet(self, mock__get_financial_statement): + symbol, yearly, quarterly, mock_session = 'foo', True, False, MagicMock() + mock__get_financial_statement.return_value = expected_output = 'bar' + output = functions.get_balance_sheet(symbol, yearly, quarterly, mock_session) + self.assertEqual(expected_output, output) + mock__get_financial_statement.assert_called_once_with(BS, symbol, yearly, quarterly, mock_session) + + @patch.object(functions, '_get_financial_statement') + async def test_get_income_statement(self, mock__get_financial_statement): + symbol, yearly, quarterly, mock_session = 'foo', True, False, MagicMock() + mock__get_financial_statement.return_value = expected_output = 'bar' + output = functions.get_income_statement(symbol, yearly, quarterly, mock_session) + self.assertEqual(expected_output, output) + mock__get_financial_statement.assert_called_once_with(IS, symbol, yearly, quarterly, mock_session) + + @patch.object(functions, '_get_financial_statement') + async def test_get_cash_flow_statement(self, mock__get_financial_statement): + symbol, yearly, quarterly, mock_session = 'foo', True, False, MagicMock() + mock__get_financial_statement.return_value = expected_output = 'bar' + output = functions.get_cash_flow_statement(symbol, yearly, quarterly, mock_session) + self.assertEqual(expected_output, output) + mock__get_financial_statement.assert_called_once_with(CF, symbol, yearly, quarterly, mock_session) + + @patch.object(functions, 'get_cash_flow_statement') + @patch.object(functions, 'get_income_statement') + @patch.object(functions, 'get_balance_sheet') + async def test_get_company_financials(self, mock_get_bs, mock_get_is, mock_get_cf): + symbol, yearly, quarterly, mock_session = 'foo', True, False, MagicMock() + mock_get_bs.return_value = {'a': 1} + mock_get_is.return_value = {'b': 2} + mock_get_cf.return_value = {'c': 3} + expected_output = {'a': 1, 'b': 2, 'c': 3} + # TODO: unfinished