function drafts

This commit is contained in:
Maximilian Fajnberg 2021-11-26 23:37:46 +01:00
parent 44dee0b762
commit 25be101cf9
2 changed files with 18 additions and 8 deletions

View File

@ -1,4 +1,5 @@
from typing import Union from typing import Union
import re
from aiohttp.client import ClientSession from aiohttp.client import ClientSession
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -18,7 +19,9 @@ async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSou
""" """
Requests a web page and turns the response text into BeautifulSoup. Requests a web page and turns the response text into BeautifulSoup.
""" """
pass async with session.get(url) as response:
html = await response.text()
return BeautifulSoup(html, constants.HTML_PARSER)
def extract_end_dates(soup: BeautifulSoup) -> tuple[str]: def extract_end_dates(soup: BeautifulSoup) -> tuple[str]:
@ -26,21 +29,29 @@ def extract_end_dates(soup: BeautifulSoup) -> tuple[str]:
Finds and returns the end dates of the reporting periods as strings (either years or quarters) from the page of a Finds and returns the end dates of the reporting periods as strings (either years or quarters) from the page of a
financial statement. financial statement.
""" """
pass ths = soup.find('div', attrs={'class': 'financials'}).thead.find_all('th')
return tuple(str(th.string).strip() for th in ths[1:-1])
def is_relevant_table_row(tr: Tag) -> bool: def is_relevant_table_row(tr: Tag) -> bool:
""" """
Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown. Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown.
""" """
pass item_name = str(tr.find_next('td').find_next('div').string).strip()
if constants.FINANCIAL_STATEMENT_ITEMS[item_name]:
return True
def find_relevant_table_rows(soup: BeautifulSoup) -> ResultSet: def find_relevant_table_rows(soup: BeautifulSoup) -> ResultSet:
""" """
Returns the table rows containing the data of interest. Returns the table rows containing the data of interest.
""" """
pass table_div = soup.find('div', attrs={'class': 'financials'})
rows = table_div.find_all('tr')[1:-1]
for r in rows:
if not is_relevant_table_row(r):
rows.remove(r)
return rows
def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]: def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]:

View File

@ -38,8 +38,6 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
test_html = '<b>foo</b>' test_html = '<b>foo</b>'
mock_session_cls.return_value = mock_session_obj = self.get_mock_session(test_html) mock_session_cls.return_value = mock_session_obj = self.get_mock_session(test_html)
expected_output = BeautifulSoup(test_html, 'html.parser') expected_output = BeautifulSoup(test_html, 'html.parser')
output = await functions.soup_from_url('baz')
self.assertEqual(expected_output, output)
output = await functions.soup_from_url('baz', mock_session_obj) output = await functions.soup_from_url('baz', mock_session_obj)
self.assertEqual(expected_output, output) self.assertEqual(expected_output, output)
@ -59,10 +57,11 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
@patch.object(functions, 'is_relevant_table_row') @patch.object(functions, 'is_relevant_table_row')
def test_find_relevant_table_rows(self, mock_is_relevant_table_row): def test_find_relevant_table_rows(self, mock_is_relevant_table_row):
mock_is_relevant_table_row.return_value = True mock_is_relevant_table_row.return_value = True
expected_output = self.test_soup.find('div', attrs={'class': 'financials'}).div.div.table.tbody.find_all('tr') test_table = self.test_soup.find('div', attrs={'class': 'financials'})
expected_output = test_table.find_all('tr')[1:-1]
output = functions.find_relevant_table_rows(self.test_soup) output = functions.find_relevant_table_rows(self.test_soup)
self.assertListEqual(expected_output, output) self.assertListEqual(expected_output, output)
mock_is_relevant_table_row.assert_has_calls([call(expected_output[0]), call(expected_output[1])]) mock_is_relevant_table_row.assert_called_once_with(expected_output[0])
def test_extract_row_data(self): def test_extract_row_data(self):
test_table = self.test_soup.find('div', attrs={'class': 'financials'}).div.div.table test_table = self.test_soup.find('div', attrs={'class': 'financials'}).div.div.table