diff --git a/src/mwfin/functions.py b/src/mwfin/functions.py index ea64023..a1a8827 100644 --- a/src/mwfin/functions.py +++ b/src/mwfin/functions.py @@ -1,5 +1,5 @@ +import logging from typing import Union -import re from aiohttp.client import ClientSession from bs4 import BeautifulSoup @@ -8,6 +8,8 @@ from bs4.element import ResultSet, Tag from . import constants +log = logging.getLogger(__name__) + # The resulting dictionary's keys correspond to the name of the item (row) in the financial statement, # while its values will always be tuples with a length corresponding to the number of periods (columns) # and elements being the actual numbers, with the exception of the first key-value-pair, which will represent @@ -37,21 +39,21 @@ def is_relevant_table_row(tr: Tag) -> bool: """ Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown. """ - item_name = str(tr.find_next('td').find_next('div').string).strip() - if constants.FINANCIAL_STATEMENT_ITEMS[item_name]: - return True + if tr.name != 'tr': + return False + item_name = str(tr.td.div.string).strip() + try: + return constants.FINANCIAL_STATEMENT_ITEMS[item_name] + except KeyError: + log.warning(f"Unknown item name '{item_name}' found in financial statement.") + return False def find_relevant_table_rows(soup: BeautifulSoup) -> ResultSet: """ Returns the table rows containing the data of interest. """ - table_div = soup.find('div', attrs={'class': 'financials'}) - rows = table_div.find_all('tr')[1:-1] - for r in rows: - if not is_relevant_table_row(r): - rows.remove(r) - return rows + return soup.find('div', attrs={'class': 'financials'}).tbody.find_all(is_relevant_table_row) def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]: diff --git a/tests/test_functions.py b/tests/test_functions.py index ba86bf5..3a1221b 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -1,3 +1,4 @@ +import logging from pathlib import Path from unittest import IsolatedAsyncioTestCase from unittest.mock import patch, MagicMock, AsyncMock, call @@ -17,6 +18,8 @@ class FunctionsTestCase(IsolatedAsyncioTestCase): # view page source @ line 2055 TEST_HTML_FILE_PATH = Path(THIS_DIR, 'test_structure.html') + log_lvl: int + @staticmethod def get_mock_session(response_text: str = None) -> MagicMock: mock_response = MagicMock() @@ -32,6 +35,12 @@ class FunctionsTestCase(IsolatedAsyncioTestCase): with open(cls.TEST_HTML_FILE_PATH, 'r') as f: test_html = f.read() cls.test_soup = BeautifulSoup(test_html, HTML_PARSER) + cls.log_lvl = functions.log.level + functions.log.setLevel(logging.CRITICAL) + + @classmethod + def tearDownClass(cls) -> None: + functions.log.setLevel(cls.log_lvl) @patch.object(functions, 'ClientSession') async def test_soup_from_url(self, mock_session_cls): @@ -47,21 +56,21 @@ class FunctionsTestCase(IsolatedAsyncioTestCase): self.assertTupleEqual(expected_output, output) def test_is_relevant_table_row(self): - test_html = '
Cash & Short Term Investments
' - test_soup = BeautifulSoup(test_html, HTML_PARSER) + test_soup = BeautifulSoup('
Cash & Short Term Investments
', HTML_PARSER) self.assertTrue(functions.is_relevant_table_row(test_soup.tr)) - test_html = '
Cash & Short Term Investments Growth
' - test_soup = BeautifulSoup(test_html, HTML_PARSER) + test_soup = BeautifulSoup('
Cash & Short Term Investments Growth
', HTML_PARSER) self.assertFalse(functions.is_relevant_table_row(test_soup.tr)) + test_soup = BeautifulSoup('
baz
', HTML_PARSER) + self.assertFalse(functions.is_relevant_table_row(test_soup.tr)) + self.assertFalse(functions.is_relevant_table_row(test_soup.div)) @patch.object(functions, 'is_relevant_table_row') def test_find_relevant_table_rows(self, mock_is_relevant_table_row): mock_is_relevant_table_row.return_value = True - test_table = self.test_soup.find('div', attrs={'class': 'financials'}) - expected_output = test_table.find_all('tr')[1:-1] + expected_output = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr') output = functions.find_relevant_table_rows(self.test_soup) self.assertListEqual(expected_output, output) - mock_is_relevant_table_row.assert_called_once_with(expected_output[0]) + mock_is_relevant_table_row.assert_has_calls([call(expected_output[0]), call(expected_output[1])]) def test_extract_row_data(self): test_table = self.test_soup.find('div', attrs={'class': 'financials'}).div.div.table