diff --git a/src/mwfin/constants.py b/src/mwfin/constants.py
index b39a482..9e1f0aa 100644
--- a/src/mwfin/constants.py
+++ b/src/mwfin/constants.py
@@ -1,3 +1,14 @@
+HTML_PARSER = 'html.parser'
+DOMAIN = 'www.marketwatch.com'
+BASE_URL = f'https://{DOMAIN}/investing/stock'
+
+BS, IS, CF = 'bs', 'is', 'cf'
+FIN_STMT_URL_SUFFIX = {
+ BS: '/balance-sheet',
+ IS: '',
+ CF: '/cash-flow'
+}
+
# All items marked `False` do not need to be scraped
# because they are calculated from other items (e.g. growth or ratios).
FINANCIAL_STATEMENT_ITEMS = {
diff --git a/src/mwfin/functions.py b/src/mwfin/functions.py
index e22133f..59f2ed7 100644
--- a/src/mwfin/functions.py
+++ b/src/mwfin/functions.py
@@ -11,8 +11,6 @@ from bs4.element import ResultSet, Tag
# the end dates of the reporting periods as strings (either years or quarters).
ResultDict = dict[str, Union[tuple[int], tuple[str]]]
-HTML_PARSER = 'html.parser'
-
async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup:
"""
@@ -58,6 +56,14 @@ def extract_all_data(soup: BeautifulSoup) -> ResultDict:
pass
+async def _get_financial_statement(statement: str, ticker_symbol: str, yearly: bool = True, quarterly: bool = True,
+ session: ClientSession = None) -> ResultDict:
+ """
+ Returns data from the specified financial statement of the specified company.
+ """
+ pass # TODO: Don't allow both yearly and quarterly, instead only have `quarterly` Flag
+
+
async def get_balance_sheet(ticker_symbol: str, yearly: bool = True, quarterly: bool = True,
session: ClientSession = None) -> ResultDict:
"""
diff --git a/tests/test_functions.py b/tests/test_functions.py
index ad4b223..a466d12 100644
--- a/tests/test_functions.py
+++ b/tests/test_functions.py
@@ -5,6 +5,7 @@ from unittest.mock import patch, MagicMock, AsyncMock, call
from bs4 import BeautifulSoup
from mwfin import functions
+from mwfin.constants import HTML_PARSER, BASE_URL, FIN_STMT_URL_SUFFIX, IS, BS, CF
THIS_DIR = Path(__file__).parent
@@ -16,23 +17,26 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
# view page source @ line 2055
TEST_HTML_FILE_PATH = Path(THIS_DIR, 'test_structure.html')
- @classmethod
- def setUpClass(cls) -> None:
- with open(cls.TEST_HTML_FILE_PATH, 'r') as f:
- test_html = f.read()
- cls.test_soup = BeautifulSoup(test_html, functions.HTML_PARSER)
-
- @patch.object(functions, 'ClientSession')
- async def test_soup_from_url(self, mock_session_cls):
- test_html = 'foo'
+ @staticmethod
+ def get_mock_session(response_text: str = None) -> MagicMock:
mock_response = MagicMock()
- mock_response.text = AsyncMock(return_value=test_html)
+ mock_response.text = AsyncMock(return_value=response_text)
mock_get_return = MagicMock()
mock_get_return.__aenter__ = AsyncMock(return_value=mock_response)
mock_session_obj = MagicMock()
mock_session_obj.get = MagicMock(return_value=mock_get_return)
- mock_session_cls.return_value = mock_session_obj
+ return mock_session_obj
+ @classmethod
+ def setUpClass(cls) -> None:
+ with open(cls.TEST_HTML_FILE_PATH, 'r') as f:
+ test_html = f.read()
+ cls.test_soup = BeautifulSoup(test_html, HTML_PARSER)
+
+ @patch.object(functions, 'ClientSession')
+ async def test_soup_from_url(self, mock_session_cls):
+ test_html = 'foo'
+ mock_session_cls.return_value = mock_session_obj = self.get_mock_session(test_html)
expected_output = BeautifulSoup(test_html, 'html.parser')
output = await functions.soup_from_url('baz')
self.assertEqual(expected_output, output)
@@ -46,10 +50,10 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
def test_is_relevant_table_row(self):
test_html = '
Cash & Short Term Investments |
'
- test_soup = BeautifulSoup(test_html, functions.HTML_PARSER)
+ test_soup = BeautifulSoup(test_html, HTML_PARSER)
self.assertTrue(functions.is_relevant_table_row(test_soup.tr))
test_html = ' Cash & Short Term Investments Growth |
'
- test_soup = BeautifulSoup(test_html, functions.HTML_PARSER)
+ test_soup = BeautifulSoup(test_html, HTML_PARSER)
self.assertFalse(functions.is_relevant_table_row(test_soup.tr))
@patch.object(functions, 'is_relevant_table_row')
@@ -62,24 +66,97 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
def test_extract_row_data(self):
test_table = self.test_soup.find('div', attrs={'class': 'financials'}).div.div.table
- expected_output = ('Item_1', (11000000.0, -22000000.0))
+ expected_output = ('Item_1', (11000000, -22000000))
output = functions.extract_row_data(test_table.tbody.tr)
- # print(test_table.tbody.tr) # debug
self.assertTupleEqual(expected_output, output)
- def test_extract_all_data(self):
- expected_output = {'Item_1': (11000000.0, -22000000.0)}
- output = functions.extract_row_data(self.test_soup)
+ @patch.object(functions, 'extract_row_data')
+ @patch.object(functions, 'find_relevant_table_rows')
+ @patch.object(functions, 'extract_end_dates')
+ def test_extract_all_data(self, mock_extract_end_dates, mock_find_relevant_table_rows, mock_extract_row_data):
+ test_end_dates = ('foo', 'bar')
+ mock_extract_end_dates.return_value = test_end_dates
+ test_relevant_rows = ['tr1', 'tr2']
+ mock_find_relevant_table_rows.return_value = test_relevant_rows
+ test_row_data = ('item_name', (123, 456))
+ mock_extract_row_data.return_value = test_row_data
+ expected_output = {
+ None: test_end_dates,
+ test_row_data[0]: test_row_data[1],
+ test_row_data[0]: test_row_data[1],
+ }
+ output = functions.extract_all_data(self.test_soup)
self.assertDictEqual(expected_output, output)
+ mock_extract_end_dates.assert_called_once_with(self.test_soup)
+ mock_find_relevant_table_rows.assert_called_once_with(self.test_soup)
+ mock_extract_row_data.assert_has_calls([call(test_relevant_rows[0]), call(test_relevant_rows[1])])
- async def test_get_balance_sheet(self):
- pass
+ @patch.object(functions, 'extract_all_data')
+ @patch.object(functions, 'soup_from_url')
+ async def test__get_financial_statement(self, mock_soup_from_url, mock_extract_all_data):
+ # TODO: separate dictionaries for different periods?
+ mock_session = MagicMock()
+ test_ticker = 'bar'
+ test_url = f'{BASE_URL}/{test_ticker}/financials{FIN_STMT_URL_SUFFIX[BS]}'
+ mock_soup_from_url.return_value = mock_soup = MagicMock()
+ mock_extract_all_data.return_value = mock_data = {'foo': 'bar'}
- async def test_get_income_statement(self):
- pass
+ yearly, quarterly = False, False
+ expected_output = {}
+ output = await functions._get_financial_statement(BS, test_ticker, yearly, quarterly, mock_session)
+ self.assertDictEqual(expected_output, output)
+ mock_soup_from_url.assert_not_called()
+ mock_extract_all_data.assert_not_called()
- async def test_get_cash_flow_statement(self):
- pass
+ yearly = True
+ expected_output = mock_data
+ output = await functions._get_financial_statement(BS, test_ticker, yearly, quarterly, mock_session)
+ self.assertDictEqual(expected_output, output)
+ mock_soup_from_url.assert_called_once_with(test_url, mock_session)
+ mock_extract_all_data.assert_called_once_with(mock_soup)
+ mock_soup_from_url.reset_mock()
+ mock_extract_all_data.reset_mock()
- async def test_get_company_financials(self):
- pass
+ quarterly = True
+ output = await functions._get_financial_statement(BS, test_ticker, yearly, quarterly, mock_session)
+ self.assertDictEqual(expected_output, output)
+ mock_soup_from_url.assert_has_calls([
+ call(test_url, mock_session),
+ call(test_url + '/quarter', mock_session),
+ ])
+ mock_extract_all_data.assert_has_calls([call(mock_soup), call(mock_soup)])
+
+ @patch.object(functions, '_get_financial_statement')
+ async def test_get_balance_sheet(self, mock__get_financial_statement):
+ symbol, yearly, quarterly, mock_session = 'foo', True, False, MagicMock()
+ mock__get_financial_statement.return_value = expected_output = 'bar'
+ output = functions.get_balance_sheet(symbol, yearly, quarterly, mock_session)
+ self.assertEqual(expected_output, output)
+ mock__get_financial_statement.assert_called_once_with(BS, symbol, yearly, quarterly, mock_session)
+
+ @patch.object(functions, '_get_financial_statement')
+ async def test_get_income_statement(self, mock__get_financial_statement):
+ symbol, yearly, quarterly, mock_session = 'foo', True, False, MagicMock()
+ mock__get_financial_statement.return_value = expected_output = 'bar'
+ output = functions.get_income_statement(symbol, yearly, quarterly, mock_session)
+ self.assertEqual(expected_output, output)
+ mock__get_financial_statement.assert_called_once_with(IS, symbol, yearly, quarterly, mock_session)
+
+ @patch.object(functions, '_get_financial_statement')
+ async def test_get_cash_flow_statement(self, mock__get_financial_statement):
+ symbol, yearly, quarterly, mock_session = 'foo', True, False, MagicMock()
+ mock__get_financial_statement.return_value = expected_output = 'bar'
+ output = functions.get_cash_flow_statement(symbol, yearly, quarterly, mock_session)
+ self.assertEqual(expected_output, output)
+ mock__get_financial_statement.assert_called_once_with(CF, symbol, yearly, quarterly, mock_session)
+
+ @patch.object(functions, 'get_cash_flow_statement')
+ @patch.object(functions, 'get_income_statement')
+ @patch.object(functions, 'get_balance_sheet')
+ async def test_get_company_financials(self, mock_get_bs, mock_get_is, mock_get_cf):
+ symbol, yearly, quarterly, mock_session = 'foo', True, False, MagicMock()
+ mock_get_bs.return_value = {'a': 1}
+ mock_get_is.return_value = {'b': 2}
+ mock_get_cf.return_value = {'c': 3}
+ expected_output = {'a': 1, 'b': 2, 'c': 3}
+ # TODO: unfinished