Compare commits
No commits in common. "326b956be4a29e22984dc62f80bb5f8b768e26a9" and "238054065957e1581bc4c80a19210d0ff40090dd" have entirely different histories.
326b956be4
...
2380540659
@ -1,6 +1,6 @@
|
|||||||
HTML_PARSER = 'html.parser'
|
HTML_PARSER = 'html.parser'
|
||||||
DOMAIN = 'www.marketwatch.com'
|
DOMAIN = 'www.marketwatch.com'
|
||||||
BASE_URL = f'https://{DOMAIN}/investing/stock'
|
BASE_URL = f'https://{DOMAIN}/investing/stock/'
|
||||||
|
|
||||||
BS, IS, CF = 'bs', 'is', 'cf'
|
BS, IS, CF = 'bs', 'is', 'cf'
|
||||||
FIN_STMT_URL_SUFFIX = {
|
FIN_STMT_URL_SUFFIX = {
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Union, List
|
from typing import Union
|
||||||
|
|
||||||
from aiohttp.client import ClientSession
|
from aiohttp.client import ClientSession
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from bs4.element import Tag
|
from bs4.element import ResultSet, Tag
|
||||||
|
|
||||||
from . import constants
|
from . import constants
|
||||||
|
|
||||||
@ -39,6 +39,8 @@ def is_relevant_table_row(tr: Tag) -> bool:
|
|||||||
"""
|
"""
|
||||||
Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown.
|
Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown.
|
||||||
"""
|
"""
|
||||||
|
if tr.name != 'tr':
|
||||||
|
return False
|
||||||
item_name = str(tr.td.div.string).strip()
|
item_name = str(tr.td.div.string).strip()
|
||||||
try:
|
try:
|
||||||
return constants.FINANCIAL_STATEMENT_ITEMS[item_name]
|
return constants.FINANCIAL_STATEMENT_ITEMS[item_name]
|
||||||
@ -47,12 +49,11 @@ def is_relevant_table_row(tr: Tag) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def find_relevant_table_rows(soup: BeautifulSoup) -> List[Tag]:
|
def find_relevant_table_rows(soup: BeautifulSoup) -> ResultSet:
|
||||||
"""
|
"""
|
||||||
Returns the table rows containing the data of interest.
|
Returns the table rows containing the data of interest.
|
||||||
"""
|
"""
|
||||||
trs = soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr')
|
return soup.find('div', attrs={'class': 'financials'}).tbody.find_all(is_relevant_table_row)
|
||||||
return [tr for tr in trs if is_relevant_table_row(tr)]
|
|
||||||
|
|
||||||
|
|
||||||
def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]:
|
def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]:
|
||||||
@ -83,9 +84,8 @@ async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly
|
|||||||
"""
|
"""
|
||||||
Returns data from the specified financial statement of the specified company.
|
Returns data from the specified financial statement of the specified company.
|
||||||
"""
|
"""
|
||||||
url = f'{constants.BASE_URL}/{ticker_symbol}/financials{constants.FIN_STMT_URL_SUFFIX[statement]}'
|
q = '/quarter' if quarterly else ''
|
||||||
if quarterly:
|
url = constants.BASE_URL + ticker_symbol + '/financials' + constants.FIN_STMT_URL_SUFFIX[statement] + q
|
||||||
url += '/quarter'
|
|
||||||
soup = await soup_from_url(url, session)
|
soup = await soup_from_url(url, session)
|
||||||
return extract_all_data(soup)
|
return extract_all_data(soup)
|
||||||
|
|
||||||
|
@ -19,7 +19,6 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
TEST_HTML_FILE_PATH = Path(THIS_DIR, 'test_structure.html')
|
TEST_HTML_FILE_PATH = Path(THIS_DIR, 'test_structure.html')
|
||||||
|
|
||||||
log_lvl: int
|
log_lvl: int
|
||||||
test_html: str
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_mock_session(response_text: str = None) -> MagicMock:
|
def get_mock_session(response_text: str = None) -> MagicMock:
|
||||||
@ -34,8 +33,8 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls) -> None:
|
def setUpClass(cls) -> None:
|
||||||
with open(cls.TEST_HTML_FILE_PATH, 'r') as f:
|
with open(cls.TEST_HTML_FILE_PATH, 'r') as f:
|
||||||
cls.test_html = f.read()
|
test_html = f.read()
|
||||||
cls.test_soup = BeautifulSoup(cls.test_html, HTML_PARSER)
|
cls.test_soup = BeautifulSoup(test_html, HTML_PARSER)
|
||||||
cls.log_lvl = functions.log.level
|
cls.log_lvl = functions.log.level
|
||||||
functions.log.setLevel(logging.CRITICAL)
|
functions.log.setLevel(logging.CRITICAL)
|
||||||
|
|
||||||
@ -63,6 +62,7 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
self.assertFalse(functions.is_relevant_table_row(test_soup.tr))
|
self.assertFalse(functions.is_relevant_table_row(test_soup.tr))
|
||||||
test_soup = BeautifulSoup('<tr><td><div> baz </div></td></tr>', HTML_PARSER)
|
test_soup = BeautifulSoup('<tr><td><div> baz </div></td></tr>', HTML_PARSER)
|
||||||
self.assertFalse(functions.is_relevant_table_row(test_soup.tr))
|
self.assertFalse(functions.is_relevant_table_row(test_soup.tr))
|
||||||
|
self.assertFalse(functions.is_relevant_table_row(test_soup.div))
|
||||||
|
|
||||||
@patch.object(functions, 'is_relevant_table_row')
|
@patch.object(functions, 'is_relevant_table_row')
|
||||||
def test_find_relevant_table_rows(self, mock_is_relevant_table_row):
|
def test_find_relevant_table_rows(self, mock_is_relevant_table_row):
|
||||||
@ -104,7 +104,7 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
async def test__get_financial_statement(self, mock_soup_from_url, mock_extract_all_data):
|
async def test__get_financial_statement(self, mock_soup_from_url, mock_extract_all_data):
|
||||||
mock_session = MagicMock()
|
mock_session = MagicMock()
|
||||||
test_ticker, statement = 'bar', BS
|
test_ticker, statement = 'bar', BS
|
||||||
test_url = f'{BASE_URL}/{test_ticker}/financials{FIN_STMT_URL_SUFFIX[statement]}'
|
test_url = f'{BASE_URL}{test_ticker}/financials{FIN_STMT_URL_SUFFIX[statement]}'
|
||||||
mock_soup_from_url.return_value = mock_soup = MagicMock()
|
mock_soup_from_url.return_value = mock_soup = MagicMock()
|
||||||
mock_extract_all_data.return_value = expected_output = {'foo': 'bar'}
|
mock_extract_all_data.return_value = expected_output = {'foo': 'bar'}
|
||||||
|
|
||||||
@ -166,22 +166,3 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
mock_get_bs.assert_called_once_with(symbol, quarterly, mock_session)
|
mock_get_bs.assert_called_once_with(symbol, quarterly, mock_session)
|
||||||
mock_get_is.assert_called_once_with(symbol, quarterly, mock_session)
|
mock_get_is.assert_called_once_with(symbol, quarterly, mock_session)
|
||||||
mock_get_cf.assert_called_once_with(symbol, quarterly, mock_session)
|
mock_get_cf.assert_called_once_with(symbol, quarterly, mock_session)
|
||||||
|
|
||||||
@patch.object(functions, 'ClientSession')
|
|
||||||
async def test_integration_get_company_financials(self, mock_session_cls):
|
|
||||||
mock_session_cls.return_value = mock_session_obj = self.get_mock_session(self.test_html)
|
|
||||||
symbol = 'foo'
|
|
||||||
# Since we mock the web request and always receive the same HTML markup,
|
|
||||||
# and the function essentially does 3 separate requests always updating the output dictionary with the same
|
|
||||||
# data, we expect it to remain unchanged and only having one item.
|
|
||||||
expected_output = {
|
|
||||||
END_DATE: ('End_Date_1', 'End_Date_2'),
|
|
||||||
'Cash & Short Term Investments': (11000000, -22000000),
|
|
||||||
}
|
|
||||||
output = await functions.get_company_financials(symbol, session=mock_session_obj)
|
|
||||||
self.assertDictEqual(expected_output, output)
|
|
||||||
mock_session_obj.get.assert_has_calls([
|
|
||||||
call(f'{BASE_URL}/{symbol}/financials{FIN_STMT_URL_SUFFIX[BS]}'),
|
|
||||||
call(f'{BASE_URL}/{symbol}/financials{FIN_STMT_URL_SUFFIX[IS]}'),
|
|
||||||
call(f'{BASE_URL}/{symbol}/financials{FIN_STMT_URL_SUFFIX[CF]}'),
|
|
||||||
])
|
|
||||||
|
@ -23,13 +23,13 @@
|
|||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr>
|
<tr>
|
||||||
<td><div> Cash & Short Term Investments </div><div> Cash & Short Term Investments </div></td>
|
<td><div> Item_1 </div><div> Item_1 </div></td>
|
||||||
<td></td>
|
<td></td>
|
||||||
<td></td>
|
<td></td>
|
||||||
<td><div> <div data-chart-data="11000000.0,-22000000.0"><div></div></td>
|
<td><div> <div data-chart-data="11000000.0,-22000000.0"><div></div></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><div> Cash & Short Term Investments Growth </div><div> Cash & Short Term Investments Growth </div></td>
|
<td><div> Item_2 </div><div> Item_2 </div></td>
|
||||||
<td></td>
|
<td></td>
|
||||||
<td></td>
|
<td></td>
|
||||||
<td></td>
|
<td></td>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user