more tests, idea for change
This commit is contained in:
parent
1e0410a254
commit
776c909956
@ -1,3 +1,14 @@
|
|||||||
|
HTML_PARSER = 'html.parser'
|
||||||
|
DOMAIN = 'www.marketwatch.com'
|
||||||
|
BASE_URL = f'https://{DOMAIN}/investing/stock'
|
||||||
|
|
||||||
|
BS, IS, CF = 'bs', 'is', 'cf'
|
||||||
|
FIN_STMT_URL_SUFFIX = {
|
||||||
|
BS: '/balance-sheet',
|
||||||
|
IS: '',
|
||||||
|
CF: '/cash-flow'
|
||||||
|
}
|
||||||
|
|
||||||
# All items marked `False` do not need to be scraped
|
# All items marked `False` do not need to be scraped
|
||||||
# because they are calculated from other items (e.g. growth or ratios).
|
# because they are calculated from other items (e.g. growth or ratios).
|
||||||
FINANCIAL_STATEMENT_ITEMS = {
|
FINANCIAL_STATEMENT_ITEMS = {
|
||||||
|
@ -11,8 +11,6 @@ from bs4.element import ResultSet, Tag
|
|||||||
# the end dates of the reporting periods as strings (either years or quarters).
|
# the end dates of the reporting periods as strings (either years or quarters).
|
||||||
ResultDict = dict[str, Union[tuple[int], tuple[str]]]
|
ResultDict = dict[str, Union[tuple[int], tuple[str]]]
|
||||||
|
|
||||||
HTML_PARSER = 'html.parser'
|
|
||||||
|
|
||||||
|
|
||||||
async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup:
|
async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup:
|
||||||
"""
|
"""
|
||||||
@ -58,6 +56,14 @@ def extract_all_data(soup: BeautifulSoup) -> ResultDict:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_financial_statement(statement: str, ticker_symbol: str, yearly: bool = True, quarterly: bool = True,
|
||||||
|
session: ClientSession = None) -> ResultDict:
|
||||||
|
"""
|
||||||
|
Returns data from the specified financial statement of the specified company.
|
||||||
|
"""
|
||||||
|
pass # TODO: Don't allow both yearly and quarterly, instead only have `quarterly` Flag
|
||||||
|
|
||||||
|
|
||||||
async def get_balance_sheet(ticker_symbol: str, yearly: bool = True, quarterly: bool = True,
|
async def get_balance_sheet(ticker_symbol: str, yearly: bool = True, quarterly: bool = True,
|
||||||
session: ClientSession = None) -> ResultDict:
|
session: ClientSession = None) -> ResultDict:
|
||||||
"""
|
"""
|
||||||
|
@ -5,6 +5,7 @@ from unittest.mock import patch, MagicMock, AsyncMock, call
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from mwfin import functions
|
from mwfin import functions
|
||||||
|
from mwfin.constants import HTML_PARSER, BASE_URL, FIN_STMT_URL_SUFFIX, IS, BS, CF
|
||||||
|
|
||||||
|
|
||||||
THIS_DIR = Path(__file__).parent
|
THIS_DIR = Path(__file__).parent
|
||||||
@ -16,23 +17,26 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
# view page source @ line 2055
|
# view page source @ line 2055
|
||||||
TEST_HTML_FILE_PATH = Path(THIS_DIR, 'test_structure.html')
|
TEST_HTML_FILE_PATH = Path(THIS_DIR, 'test_structure.html')
|
||||||
|
|
||||||
@classmethod
|
@staticmethod
|
||||||
def setUpClass(cls) -> None:
|
def get_mock_session(response_text: str = None) -> MagicMock:
|
||||||
with open(cls.TEST_HTML_FILE_PATH, 'r') as f:
|
|
||||||
test_html = f.read()
|
|
||||||
cls.test_soup = BeautifulSoup(test_html, functions.HTML_PARSER)
|
|
||||||
|
|
||||||
@patch.object(functions, 'ClientSession')
|
|
||||||
async def test_soup_from_url(self, mock_session_cls):
|
|
||||||
test_html = '<b>foo</b>'
|
|
||||||
mock_response = MagicMock()
|
mock_response = MagicMock()
|
||||||
mock_response.text = AsyncMock(return_value=test_html)
|
mock_response.text = AsyncMock(return_value=response_text)
|
||||||
mock_get_return = MagicMock()
|
mock_get_return = MagicMock()
|
||||||
mock_get_return.__aenter__ = AsyncMock(return_value=mock_response)
|
mock_get_return.__aenter__ = AsyncMock(return_value=mock_response)
|
||||||
mock_session_obj = MagicMock()
|
mock_session_obj = MagicMock()
|
||||||
mock_session_obj.get = MagicMock(return_value=mock_get_return)
|
mock_session_obj.get = MagicMock(return_value=mock_get_return)
|
||||||
mock_session_cls.return_value = mock_session_obj
|
return mock_session_obj
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls) -> None:
|
||||||
|
with open(cls.TEST_HTML_FILE_PATH, 'r') as f:
|
||||||
|
test_html = f.read()
|
||||||
|
cls.test_soup = BeautifulSoup(test_html, HTML_PARSER)
|
||||||
|
|
||||||
|
@patch.object(functions, 'ClientSession')
|
||||||
|
async def test_soup_from_url(self, mock_session_cls):
|
||||||
|
test_html = '<b>foo</b>'
|
||||||
|
mock_session_cls.return_value = mock_session_obj = self.get_mock_session(test_html)
|
||||||
expected_output = BeautifulSoup(test_html, 'html.parser')
|
expected_output = BeautifulSoup(test_html, 'html.parser')
|
||||||
output = await functions.soup_from_url('baz')
|
output = await functions.soup_from_url('baz')
|
||||||
self.assertEqual(expected_output, output)
|
self.assertEqual(expected_output, output)
|
||||||
@ -46,10 +50,10 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
|
|
||||||
def test_is_relevant_table_row(self):
|
def test_is_relevant_table_row(self):
|
||||||
test_html = '<tr><td><div> Cash & Short Term Investments </div></td></tr>'
|
test_html = '<tr><td><div> Cash & Short Term Investments </div></td></tr>'
|
||||||
test_soup = BeautifulSoup(test_html, functions.HTML_PARSER)
|
test_soup = BeautifulSoup(test_html, HTML_PARSER)
|
||||||
self.assertTrue(functions.is_relevant_table_row(test_soup.tr))
|
self.assertTrue(functions.is_relevant_table_row(test_soup.tr))
|
||||||
test_html = '<tr><td><div> Cash & Short Term Investments Growth </div></td></tr>'
|
test_html = '<tr><td><div> Cash & Short Term Investments Growth </div></td></tr>'
|
||||||
test_soup = BeautifulSoup(test_html, functions.HTML_PARSER)
|
test_soup = BeautifulSoup(test_html, HTML_PARSER)
|
||||||
self.assertFalse(functions.is_relevant_table_row(test_soup.tr))
|
self.assertFalse(functions.is_relevant_table_row(test_soup.tr))
|
||||||
|
|
||||||
@patch.object(functions, 'is_relevant_table_row')
|
@patch.object(functions, 'is_relevant_table_row')
|
||||||
@ -62,24 +66,97 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
|
|
||||||
def test_extract_row_data(self):
|
def test_extract_row_data(self):
|
||||||
test_table = self.test_soup.find('div', attrs={'class': 'financials'}).div.div.table
|
test_table = self.test_soup.find('div', attrs={'class': 'financials'}).div.div.table
|
||||||
expected_output = ('Item_1', (11000000.0, -22000000.0))
|
expected_output = ('Item_1', (11000000, -22000000))
|
||||||
output = functions.extract_row_data(test_table.tbody.tr)
|
output = functions.extract_row_data(test_table.tbody.tr)
|
||||||
# print(test_table.tbody.tr) # debug
|
|
||||||
self.assertTupleEqual(expected_output, output)
|
self.assertTupleEqual(expected_output, output)
|
||||||
|
|
||||||
def test_extract_all_data(self):
|
@patch.object(functions, 'extract_row_data')
|
||||||
expected_output = {'Item_1': (11000000.0, -22000000.0)}
|
@patch.object(functions, 'find_relevant_table_rows')
|
||||||
output = functions.extract_row_data(self.test_soup)
|
@patch.object(functions, 'extract_end_dates')
|
||||||
|
def test_extract_all_data(self, mock_extract_end_dates, mock_find_relevant_table_rows, mock_extract_row_data):
|
||||||
|
test_end_dates = ('foo', 'bar')
|
||||||
|
mock_extract_end_dates.return_value = test_end_dates
|
||||||
|
test_relevant_rows = ['tr1', 'tr2']
|
||||||
|
mock_find_relevant_table_rows.return_value = test_relevant_rows
|
||||||
|
test_row_data = ('item_name', (123, 456))
|
||||||
|
mock_extract_row_data.return_value = test_row_data
|
||||||
|
expected_output = {
|
||||||
|
None: test_end_dates,
|
||||||
|
test_row_data[0]: test_row_data[1],
|
||||||
|
test_row_data[0]: test_row_data[1],
|
||||||
|
}
|
||||||
|
output = functions.extract_all_data(self.test_soup)
|
||||||
self.assertDictEqual(expected_output, output)
|
self.assertDictEqual(expected_output, output)
|
||||||
|
mock_extract_end_dates.assert_called_once_with(self.test_soup)
|
||||||
|
mock_find_relevant_table_rows.assert_called_once_with(self.test_soup)
|
||||||
|
mock_extract_row_data.assert_has_calls([call(test_relevant_rows[0]), call(test_relevant_rows[1])])
|
||||||
|
|
||||||
async def test_get_balance_sheet(self):
|
@patch.object(functions, 'extract_all_data')
|
||||||
pass
|
@patch.object(functions, 'soup_from_url')
|
||||||
|
async def test__get_financial_statement(self, mock_soup_from_url, mock_extract_all_data):
|
||||||
|
# TODO: separate dictionaries for different periods?
|
||||||
|
mock_session = MagicMock()
|
||||||
|
test_ticker = 'bar'
|
||||||
|
test_url = f'{BASE_URL}/{test_ticker}/financials{FIN_STMT_URL_SUFFIX[BS]}'
|
||||||
|
mock_soup_from_url.return_value = mock_soup = MagicMock()
|
||||||
|
mock_extract_all_data.return_value = mock_data = {'foo': 'bar'}
|
||||||
|
|
||||||
async def test_get_income_statement(self):
|
yearly, quarterly = False, False
|
||||||
pass
|
expected_output = {}
|
||||||
|
output = await functions._get_financial_statement(BS, test_ticker, yearly, quarterly, mock_session)
|
||||||
|
self.assertDictEqual(expected_output, output)
|
||||||
|
mock_soup_from_url.assert_not_called()
|
||||||
|
mock_extract_all_data.assert_not_called()
|
||||||
|
|
||||||
async def test_get_cash_flow_statement(self):
|
yearly = True
|
||||||
pass
|
expected_output = mock_data
|
||||||
|
output = await functions._get_financial_statement(BS, test_ticker, yearly, quarterly, mock_session)
|
||||||
|
self.assertDictEqual(expected_output, output)
|
||||||
|
mock_soup_from_url.assert_called_once_with(test_url, mock_session)
|
||||||
|
mock_extract_all_data.assert_called_once_with(mock_soup)
|
||||||
|
mock_soup_from_url.reset_mock()
|
||||||
|
mock_extract_all_data.reset_mock()
|
||||||
|
|
||||||
async def test_get_company_financials(self):
|
quarterly = True
|
||||||
pass
|
output = await functions._get_financial_statement(BS, test_ticker, yearly, quarterly, mock_session)
|
||||||
|
self.assertDictEqual(expected_output, output)
|
||||||
|
mock_soup_from_url.assert_has_calls([
|
||||||
|
call(test_url, mock_session),
|
||||||
|
call(test_url + '/quarter', mock_session),
|
||||||
|
])
|
||||||
|
mock_extract_all_data.assert_has_calls([call(mock_soup), call(mock_soup)])
|
||||||
|
|
||||||
|
@patch.object(functions, '_get_financial_statement')
|
||||||
|
async def test_get_balance_sheet(self, mock__get_financial_statement):
|
||||||
|
symbol, yearly, quarterly, mock_session = 'foo', True, False, MagicMock()
|
||||||
|
mock__get_financial_statement.return_value = expected_output = 'bar'
|
||||||
|
output = functions.get_balance_sheet(symbol, yearly, quarterly, mock_session)
|
||||||
|
self.assertEqual(expected_output, output)
|
||||||
|
mock__get_financial_statement.assert_called_once_with(BS, symbol, yearly, quarterly, mock_session)
|
||||||
|
|
||||||
|
@patch.object(functions, '_get_financial_statement')
|
||||||
|
async def test_get_income_statement(self, mock__get_financial_statement):
|
||||||
|
symbol, yearly, quarterly, mock_session = 'foo', True, False, MagicMock()
|
||||||
|
mock__get_financial_statement.return_value = expected_output = 'bar'
|
||||||
|
output = functions.get_income_statement(symbol, yearly, quarterly, mock_session)
|
||||||
|
self.assertEqual(expected_output, output)
|
||||||
|
mock__get_financial_statement.assert_called_once_with(IS, symbol, yearly, quarterly, mock_session)
|
||||||
|
|
||||||
|
@patch.object(functions, '_get_financial_statement')
|
||||||
|
async def test_get_cash_flow_statement(self, mock__get_financial_statement):
|
||||||
|
symbol, yearly, quarterly, mock_session = 'foo', True, False, MagicMock()
|
||||||
|
mock__get_financial_statement.return_value = expected_output = 'bar'
|
||||||
|
output = functions.get_cash_flow_statement(symbol, yearly, quarterly, mock_session)
|
||||||
|
self.assertEqual(expected_output, output)
|
||||||
|
mock__get_financial_statement.assert_called_once_with(CF, symbol, yearly, quarterly, mock_session)
|
||||||
|
|
||||||
|
@patch.object(functions, 'get_cash_flow_statement')
|
||||||
|
@patch.object(functions, 'get_income_statement')
|
||||||
|
@patch.object(functions, 'get_balance_sheet')
|
||||||
|
async def test_get_company_financials(self, mock_get_bs, mock_get_is, mock_get_cf):
|
||||||
|
symbol, yearly, quarterly, mock_session = 'foo', True, False, MagicMock()
|
||||||
|
mock_get_bs.return_value = {'a': 1}
|
||||||
|
mock_get_is.return_value = {'b': 2}
|
||||||
|
mock_get_cf.return_value = {'c': 3}
|
||||||
|
expected_output = {'a': 1, 'b': 2, 'c': 3}
|
||||||
|
# TODO: unfinished
|
||||||
|
Loading…
Reference in New Issue
Block a user