Compare commits

...

2 Commits

Author SHA1 Message Date
Daniil Fajnberg 326b956be4 added small integration test 2021-11-27 18:35:12 +01:00
Daniil Fajnberg 63008be829 bugfix, minor change, full coverage 2021-11-27 18:11:16 +01:00
4 changed files with 34 additions and 15 deletions

View File

@ -1,6 +1,6 @@
HTML_PARSER = 'html.parser' HTML_PARSER = 'html.parser'
DOMAIN = 'www.marketwatch.com' DOMAIN = 'www.marketwatch.com'
BASE_URL = f'https://{DOMAIN}/investing/stock/' BASE_URL = f'https://{DOMAIN}/investing/stock'
BS, IS, CF = 'bs', 'is', 'cf' BS, IS, CF = 'bs', 'is', 'cf'
FIN_STMT_URL_SUFFIX = { FIN_STMT_URL_SUFFIX = {

View File

@ -1,9 +1,9 @@
import logging import logging
from typing import Union from typing import Union, List
from aiohttp.client import ClientSession from aiohttp.client import ClientSession
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from bs4.element import ResultSet, Tag from bs4.element import Tag
from . import constants from . import constants
@ -39,8 +39,6 @@ def is_relevant_table_row(tr: Tag) -> bool:
""" """
Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown. Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown.
""" """
if tr.name != 'tr':
return False
item_name = str(tr.td.div.string).strip() item_name = str(tr.td.div.string).strip()
try: try:
return constants.FINANCIAL_STATEMENT_ITEMS[item_name] return constants.FINANCIAL_STATEMENT_ITEMS[item_name]
@ -49,11 +47,12 @@ def is_relevant_table_row(tr: Tag) -> bool:
return False return False
def find_relevant_table_rows(soup: BeautifulSoup) -> ResultSet: def find_relevant_table_rows(soup: BeautifulSoup) -> List[Tag]:
""" """
Returns the table rows containing the data of interest. Returns the table rows containing the data of interest.
""" """
return soup.find('div', attrs={'class': 'financials'}).tbody.find_all(is_relevant_table_row) trs = soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr')
return [tr for tr in trs if is_relevant_table_row(tr)]
def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]: def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]:
@ -84,8 +83,9 @@ async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly
""" """
Returns data from the specified financial statement of the specified company. Returns data from the specified financial statement of the specified company.
""" """
q = '/quarter' if quarterly else '' url = f'{constants.BASE_URL}/{ticker_symbol}/financials{constants.FIN_STMT_URL_SUFFIX[statement]}'
url = constants.BASE_URL + ticker_symbol + '/financials' + constants.FIN_STMT_URL_SUFFIX[statement] + q if quarterly:
url += '/quarter'
soup = await soup_from_url(url, session) soup = await soup_from_url(url, session)
return extract_all_data(soup) return extract_all_data(soup)

View File

@ -19,6 +19,7 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
TEST_HTML_FILE_PATH = Path(THIS_DIR, 'test_structure.html') TEST_HTML_FILE_PATH = Path(THIS_DIR, 'test_structure.html')
log_lvl: int log_lvl: int
test_html: str
@staticmethod @staticmethod
def get_mock_session(response_text: str = None) -> MagicMock: def get_mock_session(response_text: str = None) -> MagicMock:
@ -33,8 +34,8 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
@classmethod @classmethod
def setUpClass(cls) -> None: def setUpClass(cls) -> None:
with open(cls.TEST_HTML_FILE_PATH, 'r') as f: with open(cls.TEST_HTML_FILE_PATH, 'r') as f:
test_html = f.read() cls.test_html = f.read()
cls.test_soup = BeautifulSoup(test_html, HTML_PARSER) cls.test_soup = BeautifulSoup(cls.test_html, HTML_PARSER)
cls.log_lvl = functions.log.level cls.log_lvl = functions.log.level
functions.log.setLevel(logging.CRITICAL) functions.log.setLevel(logging.CRITICAL)
@ -62,7 +63,6 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
self.assertFalse(functions.is_relevant_table_row(test_soup.tr)) self.assertFalse(functions.is_relevant_table_row(test_soup.tr))
test_soup = BeautifulSoup('<tr><td><div> baz </div></td></tr>', HTML_PARSER) test_soup = BeautifulSoup('<tr><td><div> baz </div></td></tr>', HTML_PARSER)
self.assertFalse(functions.is_relevant_table_row(test_soup.tr)) self.assertFalse(functions.is_relevant_table_row(test_soup.tr))
self.assertFalse(functions.is_relevant_table_row(test_soup.div))
@patch.object(functions, 'is_relevant_table_row') @patch.object(functions, 'is_relevant_table_row')
def test_find_relevant_table_rows(self, mock_is_relevant_table_row): def test_find_relevant_table_rows(self, mock_is_relevant_table_row):
@ -104,7 +104,7 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
async def test__get_financial_statement(self, mock_soup_from_url, mock_extract_all_data): async def test__get_financial_statement(self, mock_soup_from_url, mock_extract_all_data):
mock_session = MagicMock() mock_session = MagicMock()
test_ticker, statement = 'bar', BS test_ticker, statement = 'bar', BS
test_url = f'{BASE_URL}{test_ticker}/financials{FIN_STMT_URL_SUFFIX[statement]}' test_url = f'{BASE_URL}/{test_ticker}/financials{FIN_STMT_URL_SUFFIX[statement]}'
mock_soup_from_url.return_value = mock_soup = MagicMock() mock_soup_from_url.return_value = mock_soup = MagicMock()
mock_extract_all_data.return_value = expected_output = {'foo': 'bar'} mock_extract_all_data.return_value = expected_output = {'foo': 'bar'}
@ -166,3 +166,22 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
mock_get_bs.assert_called_once_with(symbol, quarterly, mock_session) mock_get_bs.assert_called_once_with(symbol, quarterly, mock_session)
mock_get_is.assert_called_once_with(symbol, quarterly, mock_session) mock_get_is.assert_called_once_with(symbol, quarterly, mock_session)
mock_get_cf.assert_called_once_with(symbol, quarterly, mock_session) mock_get_cf.assert_called_once_with(symbol, quarterly, mock_session)
@patch.object(functions, 'ClientSession')
async def test_integration_get_company_financials(self, mock_session_cls):
mock_session_cls.return_value = mock_session_obj = self.get_mock_session(self.test_html)
symbol = 'foo'
# Since we mock the web request and always receive the same HTML markup,
# and the function essentially does 3 separate requests always updating the output dictionary with the same
# data, we expect it to remain unchanged and only having one item.
expected_output = {
END_DATE: ('End_Date_1', 'End_Date_2'),
'Cash & Short Term Investments': (11000000, -22000000),
}
output = await functions.get_company_financials(symbol, session=mock_session_obj)
self.assertDictEqual(expected_output, output)
mock_session_obj.get.assert_has_calls([
call(f'{BASE_URL}/{symbol}/financials{FIN_STMT_URL_SUFFIX[BS]}'),
call(f'{BASE_URL}/{symbol}/financials{FIN_STMT_URL_SUFFIX[IS]}'),
call(f'{BASE_URL}/{symbol}/financials{FIN_STMT_URL_SUFFIX[CF]}'),
])

View File

@ -23,13 +23,13 @@
</thead> </thead>
<tbody> <tbody>
<tr> <tr>
<td><div> Item_1 </div><div> Item_1 </div></td> <td><div> Cash & Short Term Investments </div><div> Cash & Short Term Investments </div></td>
<td></td> <td></td>
<td></td> <td></td>
<td><div> <div data-chart-data="11000000.0,-22000000.0"><div></div></td> <td><div> <div data-chart-data="11000000.0,-22000000.0"><div></div></td>
</tr> </tr>
<tr> <tr>
<td><div> Item_2 </div><div> Item_2 </div></td> <td><div> Cash & Short Term Investments Growth </div><div> Cash & Short Term Investments Growth </div></td>
<td></td> <td></td>
<td></td> <td></td>
<td></td> <td></td>