Compare commits
No commits in common. "f9745ff46d66c6d10c0d9cc3af08901253443b10" and "25be101cf9e3d558b065b07c0dfdd5edb6743501" have entirely different histories.
f9745ff46d
...
25be101cf9
@ -1,5 +1,5 @@
|
|||||||
import logging
|
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
import re
|
||||||
|
|
||||||
from aiohttp.client import ClientSession
|
from aiohttp.client import ClientSession
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
@ -8,8 +8,6 @@ from bs4.element import ResultSet, Tag
|
|||||||
from . import constants
|
from . import constants
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# The resulting dictionary's keys correspond to the name of the item (row) in the financial statement,
|
# The resulting dictionary's keys correspond to the name of the item (row) in the financial statement,
|
||||||
# while its values will always be tuples with a length corresponding to the number of periods (columns)
|
# while its values will always be tuples with a length corresponding to the number of periods (columns)
|
||||||
# and elements being the actual numbers, with the exception of the first key-value-pair, which will represent
|
# and elements being the actual numbers, with the exception of the first key-value-pair, which will represent
|
||||||
@ -39,21 +37,21 @@ def is_relevant_table_row(tr: Tag) -> bool:
|
|||||||
"""
|
"""
|
||||||
Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown.
|
Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown.
|
||||||
"""
|
"""
|
||||||
if tr.name != 'tr':
|
item_name = str(tr.find_next('td').find_next('div').string).strip()
|
||||||
return False
|
if constants.FINANCIAL_STATEMENT_ITEMS[item_name]:
|
||||||
item_name = str(tr.td.div.string).strip()
|
return True
|
||||||
try:
|
|
||||||
return constants.FINANCIAL_STATEMENT_ITEMS[item_name]
|
|
||||||
except KeyError:
|
|
||||||
log.warning(f"Unknown item name '{item_name}' found in financial statement.")
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def find_relevant_table_rows(soup: BeautifulSoup) -> ResultSet:
|
def find_relevant_table_rows(soup: BeautifulSoup) -> ResultSet:
|
||||||
"""
|
"""
|
||||||
Returns the table rows containing the data of interest.
|
Returns the table rows containing the data of interest.
|
||||||
"""
|
"""
|
||||||
return soup.find('div', attrs={'class': 'financials'}).tbody.find_all(is_relevant_table_row)
|
table_div = soup.find('div', attrs={'class': 'financials'})
|
||||||
|
rows = table_div.find_all('tr')[1:-1]
|
||||||
|
for r in rows:
|
||||||
|
if not is_relevant_table_row(r):
|
||||||
|
rows.remove(r)
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]:
|
def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]:
|
||||||
@ -61,22 +59,14 @@ def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]:
|
|||||||
Returns the name of the item displayed in the table row (of a financial statement)
|
Returns the name of the item displayed in the table row (of a financial statement)
|
||||||
as well as a number for each reporting period.
|
as well as a number for each reporting period.
|
||||||
"""
|
"""
|
||||||
item_name = str(tr.td.div.string).strip()
|
pass
|
||||||
data_div = tr.find_all('td')[-1].div.div
|
|
||||||
values_str: str = data_div.attrs['data-chart-data']
|
|
||||||
values = tuple(int(float(s)) for s in values_str.split(','))
|
|
||||||
return item_name, values
|
|
||||||
|
|
||||||
|
|
||||||
def extract_all_data(soup: BeautifulSoup) -> ResultDict:
|
def extract_all_data(soup: BeautifulSoup) -> ResultDict:
|
||||||
"""
|
"""
|
||||||
Extracts financials from the page.
|
Extracts financials from the page.
|
||||||
"""
|
"""
|
||||||
output = {constants.END_DATE: extract_end_dates(soup)}
|
pass
|
||||||
for row in find_relevant_table_rows(soup):
|
|
||||||
row_data = extract_row_data(row)
|
|
||||||
output[row_data[0]] = row_data[1]
|
|
||||||
return output
|
|
||||||
|
|
||||||
|
|
||||||
async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly: bool = False,
|
async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly: bool = False,
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
import logging
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest import IsolatedAsyncioTestCase
|
from unittest import IsolatedAsyncioTestCase
|
||||||
from unittest.mock import patch, MagicMock, AsyncMock, call
|
from unittest.mock import patch, MagicMock, AsyncMock, call
|
||||||
@ -18,8 +17,6 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
# view page source @ line 2055
|
# view page source @ line 2055
|
||||||
TEST_HTML_FILE_PATH = Path(THIS_DIR, 'test_structure.html')
|
TEST_HTML_FILE_PATH = Path(THIS_DIR, 'test_structure.html')
|
||||||
|
|
||||||
log_lvl: int
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_mock_session(response_text: str = None) -> MagicMock:
|
def get_mock_session(response_text: str = None) -> MagicMock:
|
||||||
mock_response = MagicMock()
|
mock_response = MagicMock()
|
||||||
@ -35,12 +32,6 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
with open(cls.TEST_HTML_FILE_PATH, 'r') as f:
|
with open(cls.TEST_HTML_FILE_PATH, 'r') as f:
|
||||||
test_html = f.read()
|
test_html = f.read()
|
||||||
cls.test_soup = BeautifulSoup(test_html, HTML_PARSER)
|
cls.test_soup = BeautifulSoup(test_html, HTML_PARSER)
|
||||||
cls.log_lvl = functions.log.level
|
|
||||||
functions.log.setLevel(logging.CRITICAL)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def tearDownClass(cls) -> None:
|
|
||||||
functions.log.setLevel(cls.log_lvl)
|
|
||||||
|
|
||||||
@patch.object(functions, 'ClientSession')
|
@patch.object(functions, 'ClientSession')
|
||||||
async def test_soup_from_url(self, mock_session_cls):
|
async def test_soup_from_url(self, mock_session_cls):
|
||||||
@ -56,26 +47,26 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
self.assertTupleEqual(expected_output, output)
|
self.assertTupleEqual(expected_output, output)
|
||||||
|
|
||||||
def test_is_relevant_table_row(self):
|
def test_is_relevant_table_row(self):
|
||||||
test_soup = BeautifulSoup('<tr><td><div> Cash & Short Term Investments </div></td></tr>', HTML_PARSER)
|
test_html = '<tr><td><div> Cash & Short Term Investments </div></td></tr>'
|
||||||
|
test_soup = BeautifulSoup(test_html, HTML_PARSER)
|
||||||
self.assertTrue(functions.is_relevant_table_row(test_soup.tr))
|
self.assertTrue(functions.is_relevant_table_row(test_soup.tr))
|
||||||
test_soup = BeautifulSoup('<tr><td><div> Cash & Short Term Investments Growth </div></td></tr>', HTML_PARSER)
|
test_html = '<tr><td><div> Cash & Short Term Investments Growth </div></td></tr>'
|
||||||
|
test_soup = BeautifulSoup(test_html, HTML_PARSER)
|
||||||
self.assertFalse(functions.is_relevant_table_row(test_soup.tr))
|
self.assertFalse(functions.is_relevant_table_row(test_soup.tr))
|
||||||
test_soup = BeautifulSoup('<tr><td><div> baz </div></td></tr>', HTML_PARSER)
|
|
||||||
self.assertFalse(functions.is_relevant_table_row(test_soup.tr))
|
|
||||||
self.assertFalse(functions.is_relevant_table_row(test_soup.div))
|
|
||||||
|
|
||||||
@patch.object(functions, 'is_relevant_table_row')
|
@patch.object(functions, 'is_relevant_table_row')
|
||||||
def test_find_relevant_table_rows(self, mock_is_relevant_table_row):
|
def test_find_relevant_table_rows(self, mock_is_relevant_table_row):
|
||||||
mock_is_relevant_table_row.return_value = True
|
mock_is_relevant_table_row.return_value = True
|
||||||
expected_output = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr')
|
test_table = self.test_soup.find('div', attrs={'class': 'financials'})
|
||||||
|
expected_output = test_table.find_all('tr')[1:-1]
|
||||||
output = functions.find_relevant_table_rows(self.test_soup)
|
output = functions.find_relevant_table_rows(self.test_soup)
|
||||||
self.assertListEqual(expected_output, output)
|
self.assertListEqual(expected_output, output)
|
||||||
mock_is_relevant_table_row.assert_has_calls([call(expected_output[0]), call(expected_output[1])])
|
mock_is_relevant_table_row.assert_called_once_with(expected_output[0])
|
||||||
|
|
||||||
def test_extract_row_data(self):
|
def test_extract_row_data(self):
|
||||||
test_row = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.tr
|
test_table = self.test_soup.find('div', attrs={'class': 'financials'}).div.div.table
|
||||||
expected_output = ('Item_1', (11000000, -22000000))
|
expected_output = ('Item_1', (11000000, -22000000))
|
||||||
output = functions.extract_row_data(test_row)
|
output = functions.extract_row_data(test_table.tbody.tr)
|
||||||
self.assertTupleEqual(expected_output, output)
|
self.assertTupleEqual(expected_output, output)
|
||||||
|
|
||||||
@patch.object(functions, 'extract_row_data')
|
@patch.object(functions, 'extract_row_data')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user