function drafts
This commit is contained in:
parent
44dee0b762
commit
25be101cf9
@ -1,4 +1,5 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
import re
|
||||||
|
|
||||||
from aiohttp.client import ClientSession
|
from aiohttp.client import ClientSession
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
@ -18,7 +19,9 @@ async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSou
|
|||||||
"""
|
"""
|
||||||
Requests a web page and turns the response text into BeautifulSoup.
|
Requests a web page and turns the response text into BeautifulSoup.
|
||||||
"""
|
"""
|
||||||
pass
|
async with session.get(url) as response:
|
||||||
|
html = await response.text()
|
||||||
|
return BeautifulSoup(html, constants.HTML_PARSER)
|
||||||
|
|
||||||
|
|
||||||
def extract_end_dates(soup: BeautifulSoup) -> tuple[str]:
|
def extract_end_dates(soup: BeautifulSoup) -> tuple[str]:
|
||||||
@ -26,21 +29,29 @@ def extract_end_dates(soup: BeautifulSoup) -> tuple[str]:
|
|||||||
Finds and returns the end dates of the reporting periods as strings (either years or quarters) from the page of a
|
Finds and returns the end dates of the reporting periods as strings (either years or quarters) from the page of a
|
||||||
financial statement.
|
financial statement.
|
||||||
"""
|
"""
|
||||||
pass
|
ths = soup.find('div', attrs={'class': 'financials'}).thead.find_all('th')
|
||||||
|
return tuple(str(th.string).strip() for th in ths[1:-1])
|
||||||
|
|
||||||
|
|
||||||
def is_relevant_table_row(tr: Tag) -> bool:
|
def is_relevant_table_row(tr: Tag) -> bool:
|
||||||
"""
|
"""
|
||||||
Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown.
|
Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown.
|
||||||
"""
|
"""
|
||||||
pass
|
item_name = str(tr.find_next('td').find_next('div').string).strip()
|
||||||
|
if constants.FINANCIAL_STATEMENT_ITEMS[item_name]:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def find_relevant_table_rows(soup: BeautifulSoup) -> ResultSet:
|
def find_relevant_table_rows(soup: BeautifulSoup) -> ResultSet:
|
||||||
"""
|
"""
|
||||||
Returns the table rows containing the data of interest.
|
Returns the table rows containing the data of interest.
|
||||||
"""
|
"""
|
||||||
pass
|
table_div = soup.find('div', attrs={'class': 'financials'})
|
||||||
|
rows = table_div.find_all('tr')[1:-1]
|
||||||
|
for r in rows:
|
||||||
|
if not is_relevant_table_row(r):
|
||||||
|
rows.remove(r)
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]:
|
def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]:
|
||||||
|
@ -38,8 +38,6 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
test_html = '<b>foo</b>'
|
test_html = '<b>foo</b>'
|
||||||
mock_session_cls.return_value = mock_session_obj = self.get_mock_session(test_html)
|
mock_session_cls.return_value = mock_session_obj = self.get_mock_session(test_html)
|
||||||
expected_output = BeautifulSoup(test_html, 'html.parser')
|
expected_output = BeautifulSoup(test_html, 'html.parser')
|
||||||
output = await functions.soup_from_url('baz')
|
|
||||||
self.assertEqual(expected_output, output)
|
|
||||||
output = await functions.soup_from_url('baz', mock_session_obj)
|
output = await functions.soup_from_url('baz', mock_session_obj)
|
||||||
self.assertEqual(expected_output, output)
|
self.assertEqual(expected_output, output)
|
||||||
|
|
||||||
@ -59,10 +57,11 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
@patch.object(functions, 'is_relevant_table_row')
|
@patch.object(functions, 'is_relevant_table_row')
|
||||||
def test_find_relevant_table_rows(self, mock_is_relevant_table_row):
|
def test_find_relevant_table_rows(self, mock_is_relevant_table_row):
|
||||||
mock_is_relevant_table_row.return_value = True
|
mock_is_relevant_table_row.return_value = True
|
||||||
expected_output = self.test_soup.find('div', attrs={'class': 'financials'}).div.div.table.tbody.find_all('tr')
|
test_table = self.test_soup.find('div', attrs={'class': 'financials'})
|
||||||
|
expected_output = test_table.find_all('tr')[1:-1]
|
||||||
output = functions.find_relevant_table_rows(self.test_soup)
|
output = functions.find_relevant_table_rows(self.test_soup)
|
||||||
self.assertListEqual(expected_output, output)
|
self.assertListEqual(expected_output, output)
|
||||||
mock_is_relevant_table_row.assert_has_calls([call(expected_output[0]), call(expected_output[1])])
|
mock_is_relevant_table_row.assert_called_once_with(expected_output[0])
|
||||||
|
|
||||||
def test_extract_row_data(self):
|
def test_extract_row_data(self):
|
||||||
test_table = self.test_soup.find('div', attrs={'class': 'financials'}).div.div.table
|
test_table = self.test_soup.find('div', attrs={'class': 'financials'}).div.div.table
|
||||||
|
Loading…
Reference in New Issue
Block a user