From 25be101cf9e3d558b065b07c0dfdd5edb6743501 Mon Sep 17 00:00:00 2001 From: Maximilian Fajnberg Date: Fri, 26 Nov 2021 23:37:46 +0100 Subject: [PATCH] function drafts --- src/mwfin/functions.py | 19 +++++++++++++++---- tests/test_functions.py | 7 +++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/mwfin/functions.py b/src/mwfin/functions.py index 12ad9ff..ea64023 100644 --- a/src/mwfin/functions.py +++ b/src/mwfin/functions.py @@ -1,4 +1,5 @@ from typing import Union +import re from aiohttp.client import ClientSession from bs4 import BeautifulSoup @@ -18,7 +19,9 @@ async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSou """ Requests a web page and turns the response text into BeautifulSoup. """ - pass + async with session.get(url) as response: + html = await response.text() + return BeautifulSoup(html, constants.HTML_PARSER) def extract_end_dates(soup: BeautifulSoup) -> tuple[str]: @@ -26,21 +29,29 @@ def extract_end_dates(soup: BeautifulSoup) -> tuple[str]: Finds and returns the end dates of the reporting periods as strings (either years or quarters) from the page of a financial statement. """ - pass + ths = soup.find('div', attrs={'class': 'financials'}).thead.find_all('th') + return tuple(str(th.string).strip() for th in ths[1:-1]) def is_relevant_table_row(tr: Tag) -> bool: """ Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown. """ - pass + item_name = str(tr.find_next('td').find_next('div').string).strip() + if constants.FINANCIAL_STATEMENT_ITEMS[item_name]: + return True def find_relevant_table_rows(soup: BeautifulSoup) -> ResultSet: """ Returns the table rows containing the data of interest. """ - pass + table_div = soup.find('div', attrs={'class': 'financials'}) + rows = table_div.find_all('tr')[1:-1] + for r in rows: + if not is_relevant_table_row(r): + rows.remove(r) + return rows def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]: diff --git a/tests/test_functions.py b/tests/test_functions.py index c8e6c23..ba86bf5 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -38,8 +38,6 @@ class FunctionsTestCase(IsolatedAsyncioTestCase): test_html = 'foo' mock_session_cls.return_value = mock_session_obj = self.get_mock_session(test_html) expected_output = BeautifulSoup(test_html, 'html.parser') - output = await functions.soup_from_url('baz') - self.assertEqual(expected_output, output) output = await functions.soup_from_url('baz', mock_session_obj) self.assertEqual(expected_output, output) @@ -59,10 +57,11 @@ class FunctionsTestCase(IsolatedAsyncioTestCase): @patch.object(functions, 'is_relevant_table_row') def test_find_relevant_table_rows(self, mock_is_relevant_table_row): mock_is_relevant_table_row.return_value = True - expected_output = self.test_soup.find('div', attrs={'class': 'financials'}).div.div.table.tbody.find_all('tr') + test_table = self.test_soup.find('div', attrs={'class': 'financials'}) + expected_output = test_table.find_all('tr')[1:-1] output = functions.find_relevant_table_rows(self.test_soup) self.assertListEqual(expected_output, output) - mock_is_relevant_table_row.assert_has_calls([call(expected_output[0]), call(expected_output[1])]) + mock_is_relevant_table_row.assert_called_once_with(expected_output[0]) def test_extract_row_data(self): test_table = self.test_soup.find('div', attrs={'class': 'financials'}).div.div.table