diff --git a/src/mwfin/constants.py b/src/mwfin/constants.py index 80c72ce..4f6468e 100644 --- a/src/mwfin/constants.py +++ b/src/mwfin/constants.py @@ -1,6 +1,6 @@ HTML_PARSER = 'html.parser' DOMAIN = 'www.marketwatch.com' -BASE_URL = f'https://{DOMAIN}/investing/stock/' +BASE_URL = f'https://{DOMAIN}/investing/stock' BS, IS, CF = 'bs', 'is', 'cf' FIN_STMT_URL_SUFFIX = { diff --git a/src/mwfin/functions.py b/src/mwfin/functions.py index 5ab711e..840fd85 100644 --- a/src/mwfin/functions.py +++ b/src/mwfin/functions.py @@ -1,9 +1,9 @@ import logging -from typing import Union +from typing import Union, List from aiohttp.client import ClientSession from bs4 import BeautifulSoup -from bs4.element import ResultSet, Tag +from bs4.element import Tag from . import constants @@ -39,8 +39,6 @@ def is_relevant_table_row(tr: Tag) -> bool: """ Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown. """ - if tr.name != 'tr': - return False item_name = str(tr.td.div.string).strip() try: return constants.FINANCIAL_STATEMENT_ITEMS[item_name] @@ -49,11 +47,12 @@ def is_relevant_table_row(tr: Tag) -> bool: return False -def find_relevant_table_rows(soup: BeautifulSoup) -> ResultSet: +def find_relevant_table_rows(soup: BeautifulSoup) -> List[Tag]: """ Returns the table rows containing the data of interest. """ - return soup.find('div', attrs={'class': 'financials'}).tbody.find_all(is_relevant_table_row) + trs = soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr') + return [tr for tr in trs if is_relevant_table_row(tr)] def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]: @@ -84,8 +83,9 @@ async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly """ Returns data from the specified financial statement of the specified company. """ - q = '/quarter' if quarterly else '' - url = constants.BASE_URL + ticker_symbol + '/financials' + constants.FIN_STMT_URL_SUFFIX[statement] + q + url = f'{constants.BASE_URL}/{ticker_symbol}/financials{constants.FIN_STMT_URL_SUFFIX[statement]}' + if quarterly: + url += '/quarter' soup = await soup_from_url(url, session) return extract_all_data(soup) diff --git a/tests/test_functions.py b/tests/test_functions.py index aabbc56..fec2326 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -62,7 +62,6 @@ class FunctionsTestCase(IsolatedAsyncioTestCase): self.assertFalse(functions.is_relevant_table_row(test_soup.tr)) test_soup = BeautifulSoup('
baz
', HTML_PARSER) self.assertFalse(functions.is_relevant_table_row(test_soup.tr)) - self.assertFalse(functions.is_relevant_table_row(test_soup.div)) @patch.object(functions, 'is_relevant_table_row') def test_find_relevant_table_rows(self, mock_is_relevant_table_row): @@ -104,7 +103,7 @@ class FunctionsTestCase(IsolatedAsyncioTestCase): async def test__get_financial_statement(self, mock_soup_from_url, mock_extract_all_data): mock_session = MagicMock() test_ticker, statement = 'bar', BS - test_url = f'{BASE_URL}{test_ticker}/financials{FIN_STMT_URL_SUFFIX[statement]}' + test_url = f'{BASE_URL}/{test_ticker}/financials{FIN_STMT_URL_SUFFIX[statement]}' mock_soup_from_url.return_value = mock_soup = MagicMock() mock_extract_all_data.return_value = expected_output = {'foo': 'bar'}