implemented
This commit is contained in:
parent
8afab2fde8
commit
2b3fb9f7ba
@ -11,4 +11,9 @@ FIN_STMT_URL_SUFFIX = {
|
||||
IS: '',
|
||||
CF: '/cash-flow'
|
||||
}
|
||||
INDENT_MAP = {
|
||||
'indent--small': 1,
|
||||
'indent--medium': 2,
|
||||
'indent--large': 3,
|
||||
}
|
||||
END_DATE = 'End Date'
|
||||
|
@ -1,13 +1,14 @@
|
||||
import logging
|
||||
import asyncio
|
||||
from typing import Union, List, Dict
|
||||
from typing import Union, Tuple, List, Dict
|
||||
|
||||
from aiohttp.client import ClientSession
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import Tag
|
||||
from webutils import in_async_session, gather_in_batches
|
||||
|
||||
from .constants import HTML_PARSER, BASE_URL, END_DATE, BS, IS, CF, FIN_STMT_URL_SUFFIX, DEFAULT_CONCURRENT_BATCH_SIZE
|
||||
from .constants import (HTML_PARSER, BASE_URL, END_DATE, BS, IS, CF, FIN_STMT_URL_SUFFIX, DEFAULT_CONCURRENT_BATCH_SIZE,
|
||||
INDENT_MAP)
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@ -16,7 +17,9 @@ log = logging.getLogger(__name__)
|
||||
# while its values will always be tuples with a length corresponding to the number of periods (columns)
|
||||
# and elements being the actual numbers, with the exception of the first key-value-pair, which will represent
|
||||
# the end dates of the reporting periods as strings (either years or quarters).
|
||||
ResultDict = dict[str, Union[tuple[float], tuple[str]]]
|
||||
HeaderData = Tuple[int, str, str, str, str, str]
|
||||
RowData = Tuple[int, float, float, float, float, float]
|
||||
ResultDict = dict[str, Union[HeaderData, RowData]]
|
||||
|
||||
|
||||
@in_async_session
|
||||
@ -29,13 +32,25 @@ async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSou
|
||||
return BeautifulSoup(html, HTML_PARSER)
|
||||
|
||||
|
||||
def extract_end_dates(soup: BeautifulSoup) -> tuple[str]:
|
||||
def get_row_indent(tr: Tag) -> int:
|
||||
try:
|
||||
classes = tr.div.attrs['class']
|
||||
except KeyError:
|
||||
return 0
|
||||
for class_name, indent in INDENT_MAP.items():
|
||||
if class_name in classes:
|
||||
return indent
|
||||
return 0
|
||||
|
||||
|
||||
def extract_end_dates(soup: BeautifulSoup) -> HeaderData:
|
||||
"""
|
||||
Finds and returns the end dates of the reporting periods as strings (either years or quarters) from the page of a
|
||||
financial statement.
|
||||
"""
|
||||
ths = soup.find('div', attrs={'class': 'financials'}).thead.find_all('th')
|
||||
return tuple(str(th.string).strip() for th in ths[1:-1])
|
||||
tr = soup.find('div', attrs={'class': 'financials'}).thead.tr
|
||||
ths = tr.find_all('th')
|
||||
return (get_row_indent(tr), ) + tuple(str(th.string).strip() for th in ths[1:-1])
|
||||
|
||||
|
||||
def get_all_table_rows(soup: BeautifulSoup) -> List[Tag]:
|
||||
@ -45,7 +60,7 @@ def get_all_table_rows(soup: BeautifulSoup) -> List[Tag]:
|
||||
return soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr')
|
||||
|
||||
|
||||
def extract_row_data(tr: Tag) -> tuple[str, tuple[float]]:
|
||||
def extract_row_data(tr: Tag) -> Tuple[str, RowData]:
|
||||
"""
|
||||
Returns the name of the item displayed in the table row (of a financial statement)
|
||||
as well as a number for each reporting period.
|
||||
@ -54,7 +69,7 @@ def extract_row_data(tr: Tag) -> tuple[str, tuple[float]]:
|
||||
data_div = tr.find_all('td')[-1].div.div
|
||||
values_str: str = data_div.attrs['data-chart-data']
|
||||
values = tuple(float(s if s != '' else 0) for s in values_str.split(','))
|
||||
return item_name, values
|
||||
return item_name, (get_row_indent(tr), ) + values
|
||||
|
||||
|
||||
def extract_all_data(soup: BeautifulSoup) -> ResultDict:
|
||||
|
@ -213,19 +213,19 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
||||
# Since the web request is mocked we always receive the same HTML markup.
|
||||
expected_output = {
|
||||
BS: {
|
||||
END_DATE: ('End_Date_1', 'End_Date_2'),
|
||||
END_DATE: (0, 'End_Date_1', 'End_Date_2'),
|
||||
'foo': (1, 1., -2.),
|
||||
'bar': (2, 2., -3.),
|
||||
'baz': (3, 3., -4.)
|
||||
},
|
||||
IS: {
|
||||
END_DATE: ('End_Date_1', 'End_Date_2'),
|
||||
END_DATE: (0, 'End_Date_1', 'End_Date_2'),
|
||||
'foo': (1, 1., -2.),
|
||||
'bar': (2, 2., -3.),
|
||||
'baz': (3, 3., -4.)
|
||||
},
|
||||
CF: {
|
||||
END_DATE: ('End_Date_1', 'End_Date_2'),
|
||||
END_DATE: (0, 'End_Date_1', 'End_Date_2'),
|
||||
'foo': (1, 1., -2.),
|
||||
'bar': (2, 2., -3.),
|
||||
'baz': (3, 3., -4.)
|
||||
|
Loading…
Reference in New Issue
Block a user