function drafts

This commit is contained in:
2021-11-26 23:37:46 +01:00
parent 44dee0b762
commit 25be101cf9
2 changed files with 18 additions and 8 deletions

View File

@ -1,4 +1,5 @@
from typing import Union
import re
from aiohttp.client import ClientSession
from bs4 import BeautifulSoup
@ -18,7 +19,9 @@ async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSou
"""
Requests a web page and turns the response text into BeautifulSoup.
"""
pass
async with session.get(url) as response:
html = await response.text()
return BeautifulSoup(html, constants.HTML_PARSER)
def extract_end_dates(soup: BeautifulSoup) -> tuple[str]:
@ -26,21 +29,29 @@ def extract_end_dates(soup: BeautifulSoup) -> tuple[str]:
Finds and returns the end dates of the reporting periods as strings (either years or quarters) from the page of a
financial statement.
"""
pass
ths = soup.find('div', attrs={'class': 'financials'}).thead.find_all('th')
return tuple(str(th.string).strip() for th in ths[1:-1])
def is_relevant_table_row(tr: Tag) -> bool:
"""
Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown.
"""
pass
item_name = str(tr.find_next('td').find_next('div').string).strip()
if constants.FINANCIAL_STATEMENT_ITEMS[item_name]:
return True
def find_relevant_table_rows(soup: BeautifulSoup) -> ResultSet:
"""
Returns the table rows containing the data of interest.
"""
pass
table_div = soup.find('div', attrs={'class': 'financials'})
rows = table_div.find_all('tr')[1:-1]
for r in rows:
if not is_relevant_table_row(r):
rows.remove(r)
return rows
def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]: