Compare commits

..

No commits in common. "f9745ff46d66c6d10c0d9cc3af08901253443b10" and "25be101cf9e3d558b065b07c0dfdd5edb6743501" have entirely different histories.

2 changed files with 21 additions and 40 deletions

View File

@ -1,5 +1,5 @@
import logging
from typing import Union from typing import Union
import re
from aiohttp.client import ClientSession from aiohttp.client import ClientSession
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -8,8 +8,6 @@ from bs4.element import ResultSet, Tag
from . import constants from . import constants
log = logging.getLogger(__name__)
# The resulting dictionary's keys correspond to the name of the item (row) in the financial statement, # The resulting dictionary's keys correspond to the name of the item (row) in the financial statement,
# while its values will always be tuples with a length corresponding to the number of periods (columns) # while its values will always be tuples with a length corresponding to the number of periods (columns)
# and elements being the actual numbers, with the exception of the first key-value-pair, which will represent # and elements being the actual numbers, with the exception of the first key-value-pair, which will represent
@ -39,21 +37,21 @@ def is_relevant_table_row(tr: Tag) -> bool:
""" """
Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown. Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown.
""" """
if tr.name != 'tr': item_name = str(tr.find_next('td').find_next('div').string).strip()
return False if constants.FINANCIAL_STATEMENT_ITEMS[item_name]:
item_name = str(tr.td.div.string).strip() return True
try:
return constants.FINANCIAL_STATEMENT_ITEMS[item_name]
except KeyError:
log.warning(f"Unknown item name '{item_name}' found in financial statement.")
return False
def find_relevant_table_rows(soup: BeautifulSoup) -> ResultSet: def find_relevant_table_rows(soup: BeautifulSoup) -> ResultSet:
""" """
Returns the table rows containing the data of interest. Returns the table rows containing the data of interest.
""" """
return soup.find('div', attrs={'class': 'financials'}).tbody.find_all(is_relevant_table_row) table_div = soup.find('div', attrs={'class': 'financials'})
rows = table_div.find_all('tr')[1:-1]
for r in rows:
if not is_relevant_table_row(r):
rows.remove(r)
return rows
def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]: def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]:
@ -61,22 +59,14 @@ def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]:
Returns the name of the item displayed in the table row (of a financial statement) Returns the name of the item displayed in the table row (of a financial statement)
as well as a number for each reporting period. as well as a number for each reporting period.
""" """
item_name = str(tr.td.div.string).strip() pass
data_div = tr.find_all('td')[-1].div.div
values_str: str = data_div.attrs['data-chart-data']
values = tuple(int(float(s)) for s in values_str.split(','))
return item_name, values
def extract_all_data(soup: BeautifulSoup) -> ResultDict: def extract_all_data(soup: BeautifulSoup) -> ResultDict:
""" """
Extracts financials from the page. Extracts financials from the page.
""" """
output = {constants.END_DATE: extract_end_dates(soup)} pass
for row in find_relevant_table_rows(soup):
row_data = extract_row_data(row)
output[row_data[0]] = row_data[1]
return output
async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly: bool = False, async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly: bool = False,

View File

@ -1,4 +1,3 @@
import logging
from pathlib import Path from pathlib import Path
from unittest import IsolatedAsyncioTestCase from unittest import IsolatedAsyncioTestCase
from unittest.mock import patch, MagicMock, AsyncMock, call from unittest.mock import patch, MagicMock, AsyncMock, call
@ -18,8 +17,6 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
# view page source @ line 2055 # view page source @ line 2055
TEST_HTML_FILE_PATH = Path(THIS_DIR, 'test_structure.html') TEST_HTML_FILE_PATH = Path(THIS_DIR, 'test_structure.html')
log_lvl: int
@staticmethod @staticmethod
def get_mock_session(response_text: str = None) -> MagicMock: def get_mock_session(response_text: str = None) -> MagicMock:
mock_response = MagicMock() mock_response = MagicMock()
@ -35,12 +32,6 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
with open(cls.TEST_HTML_FILE_PATH, 'r') as f: with open(cls.TEST_HTML_FILE_PATH, 'r') as f:
test_html = f.read() test_html = f.read()
cls.test_soup = BeautifulSoup(test_html, HTML_PARSER) cls.test_soup = BeautifulSoup(test_html, HTML_PARSER)
cls.log_lvl = functions.log.level
functions.log.setLevel(logging.CRITICAL)
@classmethod
def tearDownClass(cls) -> None:
functions.log.setLevel(cls.log_lvl)
@patch.object(functions, 'ClientSession') @patch.object(functions, 'ClientSession')
async def test_soup_from_url(self, mock_session_cls): async def test_soup_from_url(self, mock_session_cls):
@ -56,26 +47,26 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
self.assertTupleEqual(expected_output, output) self.assertTupleEqual(expected_output, output)
def test_is_relevant_table_row(self): def test_is_relevant_table_row(self):
test_soup = BeautifulSoup('<tr><td><div> Cash & Short Term Investments </div></td></tr>', HTML_PARSER) test_html = '<tr><td><div> Cash & Short Term Investments </div></td></tr>'
test_soup = BeautifulSoup(test_html, HTML_PARSER)
self.assertTrue(functions.is_relevant_table_row(test_soup.tr)) self.assertTrue(functions.is_relevant_table_row(test_soup.tr))
test_soup = BeautifulSoup('<tr><td><div> Cash & Short Term Investments Growth </div></td></tr>', HTML_PARSER) test_html = '<tr><td><div> Cash & Short Term Investments Growth </div></td></tr>'
test_soup = BeautifulSoup(test_html, HTML_PARSER)
self.assertFalse(functions.is_relevant_table_row(test_soup.tr)) self.assertFalse(functions.is_relevant_table_row(test_soup.tr))
test_soup = BeautifulSoup('<tr><td><div> baz </div></td></tr>', HTML_PARSER)
self.assertFalse(functions.is_relevant_table_row(test_soup.tr))
self.assertFalse(functions.is_relevant_table_row(test_soup.div))
@patch.object(functions, 'is_relevant_table_row') @patch.object(functions, 'is_relevant_table_row')
def test_find_relevant_table_rows(self, mock_is_relevant_table_row): def test_find_relevant_table_rows(self, mock_is_relevant_table_row):
mock_is_relevant_table_row.return_value = True mock_is_relevant_table_row.return_value = True
expected_output = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr') test_table = self.test_soup.find('div', attrs={'class': 'financials'})
expected_output = test_table.find_all('tr')[1:-1]
output = functions.find_relevant_table_rows(self.test_soup) output = functions.find_relevant_table_rows(self.test_soup)
self.assertListEqual(expected_output, output) self.assertListEqual(expected_output, output)
mock_is_relevant_table_row.assert_has_calls([call(expected_output[0]), call(expected_output[1])]) mock_is_relevant_table_row.assert_called_once_with(expected_output[0])
def test_extract_row_data(self): def test_extract_row_data(self):
test_row = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.tr test_table = self.test_soup.find('div', attrs={'class': 'financials'}).div.div.table
expected_output = ('Item_1', (11000000, -22000000)) expected_output = ('Item_1', (11000000, -22000000))
output = functions.extract_row_data(test_row) output = functions.extract_row_data(test_table.tbody.tr)
self.assertTupleEqual(expected_output, output) self.assertTupleEqual(expected_output, output)
@patch.object(functions, 'extract_row_data') @patch.object(functions, 'extract_row_data')