Compare commits
	
		
			5 Commits
		
	
	
		
			462e34013e
			...
			2b3fb9f7ba
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 2b3fb9f7ba | ||
|  | 8afab2fde8 | ||
|  | f7cb5de928 | ||
|  | c93fb9692e | ||
|  | 38dd29f35b | 
| @@ -1,4 +1,3 @@ | |||||||
| DEV_MODE = False |  | ||||||
| MAIN_LOGGER_NAME = 'mwfin' | MAIN_LOGGER_NAME = 'mwfin' | ||||||
|  |  | ||||||
| HTML_PARSER = 'html.parser' | HTML_PARSER = 'html.parser' | ||||||
| @@ -12,214 +11,9 @@ FIN_STMT_URL_SUFFIX = { | |||||||
|     IS: '', |     IS: '', | ||||||
|     CF: '/cash-flow' |     CF: '/cash-flow' | ||||||
| } | } | ||||||
| END_DATE = 'End Date' | INDENT_MAP = { | ||||||
|  |     'indent--small': 1, | ||||||
| # All items marked `False` do not need to be scraped |     'indent--medium': 2, | ||||||
| # because they are calculated from other items (e.g. growth or ratios). |     'indent--large': 3, | ||||||
| FIN_STMT_ITEMS = { |  | ||||||
|     ################# |  | ||||||
|     # Balance Sheet # |  | ||||||
|     ################# |  | ||||||
|     "Cash & Short Term Investments": True, |  | ||||||
|     "Cash & Short Term Investments Growth": False, |  | ||||||
|     "Cash Only": True, |  | ||||||
|     "Short-Term Investments": True, |  | ||||||
|     "Cash & ST Investments / Total Assets": False, |  | ||||||
|     "Total Accounts Receivable": True, |  | ||||||
|     "Total Accounts Receivable Growth": False, |  | ||||||
|     "Accounts Receivables, Net": True, |  | ||||||
|     "Accounts Receivables, Gross": True, |  | ||||||
|     "Bad Debt/Doubtful Accounts": True, |  | ||||||
|     "Other Receivable": True, |  | ||||||
|     "Accounts Receivable Turnover": False, |  | ||||||
|     "Inventories": True, |  | ||||||
|     "Finished Goods": True, |  | ||||||
|     "Work in Progress": True, |  | ||||||
|     "Raw Materials": True, |  | ||||||
|     "Progress Payments & Other": True, |  | ||||||
|     "Other Current Assets": True, |  | ||||||
|     "Miscellaneous Current Assets": True, |  | ||||||
|     "Total Current Assets": True, |  | ||||||
|     "Net Property, Plant & Equipment": True, |  | ||||||
|     "Property, Plant & Equipment - Gross": True, |  | ||||||
|     "Buildings": True, |  | ||||||
|     "Land & Improvements": True, |  | ||||||
|     "Computer Software and Equipment": True, |  | ||||||
|     "Other Property, Plant & Equipment": True, |  | ||||||
|     "Accumulated Depreciation": True, |  | ||||||
|     "Total Investments and Advances": True, |  | ||||||
|     "Other Long-Term Investments": True, |  | ||||||
|     "Long-Term Note Receivables": True, |  | ||||||
|     "Intangible Assets": True, |  | ||||||
|     "Net Goodwill": True, |  | ||||||
|     "Net Other Intangibles": True, |  | ||||||
|     "Other Assets": True, |  | ||||||
|     "Total Assets": True, |  | ||||||
|     "Total Assets Growth": False, |  | ||||||
|     "ST Debt & Current Portion LT Debt": True, |  | ||||||
|     "Short Term Debt": True, |  | ||||||
|     "Current Portion of Long Term Debt": True, |  | ||||||
|     "Accounts Payable": True, |  | ||||||
|     "Accounts Payable Growth": False, |  | ||||||
|     "Income Tax Payable": True, |  | ||||||
|     "Other Current Liabilities": True, |  | ||||||
|     "Dividends Payable": True, |  | ||||||
|     "Accrued Payroll": True, |  | ||||||
|     "Miscellaneous Current Liabilities": True, |  | ||||||
|     "Total Current Liabilities": True, |  | ||||||
|     "Long-Term Debt": True, |  | ||||||
|     "Long-Term Debt excl. Capitalized Leases": True, |  | ||||||
|     "Non-Convertible Debt": True, |  | ||||||
|     "Convertible Debt": True, |  | ||||||
|     "Capitalized Lease Obligations": True, |  | ||||||
|     "Provision for Risks & Charges": True, |  | ||||||
|     "Deferred Taxes": True, |  | ||||||
|     "Deferred Taxes - Credits": True, |  | ||||||
|     "Deferred Taxes - Debit": True, |  | ||||||
|     "Other Liabilities": True, |  | ||||||
|     "Other Liabilities (excl. Deferred Income)": True, |  | ||||||
|     "Deferred Income": True, |  | ||||||
|     "Total Liabilities": True, |  | ||||||
|     "Non-Equity Reserves": True, |  | ||||||
|     "Total Liabilities / Total Assets": False, |  | ||||||
|     "Preferred Stock (Carrying Value)": True, |  | ||||||
|     "Redeemable Preferred Stock": True, |  | ||||||
|     "Non-Redeemable Preferred Stock": True, |  | ||||||
|     "Common Equity (Total)": True, |  | ||||||
|     "Common Equity / Total Assets": False, |  | ||||||
|     "Common Stock Par/Carry Value": True, |  | ||||||
|     "Retained Earnings": True, |  | ||||||
|     "ESOP Debt Guarantee": True, |  | ||||||
|     "Cumulative Translation Adjustment/Unrealized For. Exch. Gain": True, |  | ||||||
|     "Unrealized Gain/Loss Marketable Securities": True, |  | ||||||
|     "Revaluation Reserves": True, |  | ||||||
|     "Treasury Stock": True, |  | ||||||
|     "Total Shareholders' Equity": True, |  | ||||||
|     "Total Shareholders' Equity / Total Assets": False, |  | ||||||
|     "Accumulated Minority Interest": True, |  | ||||||
|     "Total Equity": True, |  | ||||||
|     "Liabilities & Shareholders' Equity": True, |  | ||||||
|  |  | ||||||
|     #################### |  | ||||||
|     # Income Statement # |  | ||||||
|     #################### |  | ||||||
|     "Sales/Revenue": True, |  | ||||||
|     "Sales Growth": False, |  | ||||||
|     "Cost of Goods Sold (COGS) incl. D&A": True, |  | ||||||
|     "COGS Growth": False, |  | ||||||
|     "COGS excluding D&A": True, |  | ||||||
|     "Depreciation & Amortization Expense": True, |  | ||||||
|     "Depreciation": True, |  | ||||||
|     "Amortization of Intangibles": True, |  | ||||||
|     "Gross Income": True, |  | ||||||
|     "Gross Income Growth": False, |  | ||||||
|     "Gross Profit Margin": False, |  | ||||||
|     "SG&A Expense": True, |  | ||||||
|     "SGA Growth": False, |  | ||||||
|     "Research & Development": True, |  | ||||||
|     "Other SG&A": True, |  | ||||||
|     "Other Operating Expense": True, |  | ||||||
|     "Unusual Expense": True, |  | ||||||
|     "EBIT after Unusual Expense": True, |  | ||||||
|     "Non Operating Income/Expense": True, |  | ||||||
|     "Non-Operating Interest Income": True, |  | ||||||
|     "Equity in Affiliates (Pretax)": True, |  | ||||||
|     "Interest Expense": True, |  | ||||||
|     "Interest Expense Growth": False, |  | ||||||
|     "Gross Interest Expense": True, |  | ||||||
|     "Interest Capitalized": True, |  | ||||||
|     "Pretax Income": True, |  | ||||||
|     "Pretax Income Growth": False, |  | ||||||
|     "Pretax Margin": False, |  | ||||||
|     "Income Tax": True, |  | ||||||
|     "Income Tax - Current Domestic": True, |  | ||||||
|     "Income Tax - Current Foreign": True, |  | ||||||
|     "Income Tax - Deferred Domestic": True, |  | ||||||
|     "Income Tax - Deferred Foreign": True, |  | ||||||
|     "Income Tax Credits": True, |  | ||||||
|     "Equity in Affiliates": True, |  | ||||||
|     "Other After Tax Income (Expense)": True, |  | ||||||
|     "Consolidated Net Income": True, |  | ||||||
|     "Minority Interest Expense": True, |  | ||||||
|     "Net Income": True, |  | ||||||
|     "Net Income Growth": False, |  | ||||||
|     "Net Margin Growth": False, |  | ||||||
|     "Extraordinaries & Discontinued Operations": True, |  | ||||||
|     "Extra Items & Gain/Loss Sale Of Assets": True, |  | ||||||
|     "Cumulative Effect - Accounting Chg": True, |  | ||||||
|     "Discontinued Operations": True, |  | ||||||
|     "Net Income After Extraordinaries": True, |  | ||||||
|     "Preferred Dividends": True, |  | ||||||
|     "Net Income Available to Common": True, |  | ||||||
|     "EPS (Basic)": True, |  | ||||||
|     "EPS (Basic) Growth": False, |  | ||||||
|     "Basic Shares Outstanding": True, |  | ||||||
|     "EPS (Diluted)": True, |  | ||||||
|     "EPS (Diluted) Growth": False, |  | ||||||
|     "Diluted Shares Outstanding": True, |  | ||||||
|     "EBITDA": True, |  | ||||||
|     "EBITDA Growth": False, |  | ||||||
|     "EBITDA Margin": False, |  | ||||||
|  |  | ||||||
|     ####################### |  | ||||||
|     # Cash Flow Statement # |  | ||||||
|     ####################### |  | ||||||
|     "Net Income before Extraordinaries": True, |  | ||||||
|     # "Net Income Growth": False, |  | ||||||
|     "Depreciation, Depletion & Amortization": True, |  | ||||||
|     "Depreciation and Depletion": True, |  | ||||||
|     "Amortization of Intangible Assets": True, |  | ||||||
|     "Deferred Taxes & Investment Tax Credit": True, |  | ||||||
|     # "Deferred Taxes": True, |  | ||||||
|     "Investment Tax Credit": True, |  | ||||||
|     "Other Funds": True, |  | ||||||
|     "Funds from Operations": True, |  | ||||||
|     "Extraordinaries": True, |  | ||||||
|     "Changes in Working Capital": True, |  | ||||||
|     "Receivables": True, |  | ||||||
|     # "Accounts Payable": True, |  | ||||||
|     "Other Assets/Liabilities": True, |  | ||||||
|     "Net Operating Cash Flow": True, |  | ||||||
|     "Net Operating Cash Flow Growth": False, |  | ||||||
|     "Net Operating Cash Flow / Sales": False, |  | ||||||
|     "Capital Expenditures": True, |  | ||||||
|     "Capital Expenditures Growth": False, |  | ||||||
|     "Capital Expenditures / Sales": False, |  | ||||||
|     "Capital Expenditures (Fixed Assets)": True, |  | ||||||
|     "Capital Expenditures (Other Assets)": True, |  | ||||||
|     "Net Assets from Acquisitions": True, |  | ||||||
|     "Sale of Fixed Assets & Businesses": True, |  | ||||||
|     "Purchase/Sale of Investments": True, |  | ||||||
|     "Purchase of Investments": True, |  | ||||||
|     "Sale/Maturity of Investments": True, |  | ||||||
|     "Other Uses": True, |  | ||||||
|     "Other Sources": True, |  | ||||||
|     "Net Investing Cash Flow": True, |  | ||||||
|     "Net Investing Cash Flow Growth": False, |  | ||||||
|     "Net Investing Cash Flow / Sales": False, |  | ||||||
|     "Cash Dividends Paid - Total": True, |  | ||||||
|     "Common Dividends": True, |  | ||||||
|     # "Preferred Dividends": True, |  | ||||||
|     "Change in Capital Stock": True, |  | ||||||
|     "Repurchase of Common & Preferred Stk.": True, |  | ||||||
|     "Sale of Common & Preferred Stock": True, |  | ||||||
|     "Proceeds from Stock Options": True, |  | ||||||
|     "Other Proceeds from Sale of Stock": True, |  | ||||||
|     "Issuance/Reduction of Debt, Net": True, |  | ||||||
|     "Change in Current Debt": True, |  | ||||||
|     "Change in Long-Term Debt": True, |  | ||||||
|     "Issuance of Long-Term Debt": True, |  | ||||||
|     "Reduction in Long-Term Debt": True, |  | ||||||
|     # "Other Funds": True, |  | ||||||
|     # "Other Uses": True, |  | ||||||
|     # "Other Sources": True, |  | ||||||
|     "Net Financing Cash Flow": True, |  | ||||||
|     "Net Financing Cash Flow Growth": False, |  | ||||||
|     "Net Financing Cash Flow / Sales": False, |  | ||||||
|     "Exchange Rate Effect": True, |  | ||||||
|     "Miscellaneous Funds": True, |  | ||||||
|     "Net Change in Cash": True, |  | ||||||
|     "Free Cash Flow": True, |  | ||||||
|     "Free Cash Flow Growth": False, |  | ||||||
|     "Free Cash Flow Yield": False, |  | ||||||
| } | } | ||||||
|  | END_DATE = 'End Date' | ||||||
|   | |||||||
| @@ -4,7 +4,3 @@ class WrongAssumptions(Exception): | |||||||
|  |  | ||||||
| class UnexpectedMarkup(WrongAssumptions): | class UnexpectedMarkup(WrongAssumptions): | ||||||
|     pass |     pass | ||||||
|  |  | ||||||
|  |  | ||||||
| class UnknownFinancialStatementItem(WrongAssumptions): |  | ||||||
|     pass |  | ||||||
|   | |||||||
| @@ -1,16 +1,14 @@ | |||||||
| import logging | import logging | ||||||
| import asyncio | import asyncio | ||||||
| from typing import Union, List, Dict | from typing import Union, Tuple, List, Dict | ||||||
| from datetime import datetime |  | ||||||
|  |  | ||||||
| from aiohttp.client import ClientSession | from aiohttp.client import ClientSession | ||||||
| from bs4 import BeautifulSoup | from bs4 import BeautifulSoup | ||||||
| from bs4.element import Tag | from bs4.element import Tag | ||||||
| from webutils import in_async_session, gather_in_batches | from webutils import in_async_session, gather_in_batches | ||||||
|  |  | ||||||
| from .constants import (DEV_MODE, HTML_PARSER, BASE_URL, END_DATE, BS, IS, CF, FIN_STMT_URL_SUFFIX, FIN_STMT_ITEMS, | from .constants import (HTML_PARSER, BASE_URL, END_DATE, BS, IS, CF, FIN_STMT_URL_SUFFIX, DEFAULT_CONCURRENT_BATCH_SIZE, | ||||||
|                         DEFAULT_CONCURRENT_BATCH_SIZE) |                         INDENT_MAP) | ||||||
| from .exceptions import UnknownFinancialStatementItem |  | ||||||
|  |  | ||||||
|  |  | ||||||
| log = logging.getLogger(__name__) | log = logging.getLogger(__name__) | ||||||
| @@ -19,7 +17,9 @@ log = logging.getLogger(__name__) | |||||||
| # while its values will always be tuples with a length corresponding to the number of periods (columns) | # while its values will always be tuples with a length corresponding to the number of periods (columns) | ||||||
| # and elements being the actual numbers, with the exception of the first key-value-pair, which will represent | # and elements being the actual numbers, with the exception of the first key-value-pair, which will represent | ||||||
| # the end dates of the reporting periods as strings (either years or quarters). | # the end dates of the reporting periods as strings (either years or quarters). | ||||||
| ResultDict = dict[str, Union[tuple[int], tuple[str]]] | HeaderData = Tuple[int, str, str, str, str, str] | ||||||
|  | RowData = Tuple[int, float, float, float, float, float] | ||||||
|  | ResultDict = dict[str, Union[HeaderData, RowData]] | ||||||
|  |  | ||||||
|  |  | ||||||
| @in_async_session | @in_async_session | ||||||
| @@ -32,45 +32,35 @@ async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSou | |||||||
|     return BeautifulSoup(html, HTML_PARSER) |     return BeautifulSoup(html, HTML_PARSER) | ||||||
|  |  | ||||||
|  |  | ||||||
| def extract_end_dates(soup: BeautifulSoup) -> tuple[str]: | def get_row_indent(tr: Tag) -> int: | ||||||
|  |     try: | ||||||
|  |         classes = tr.div.attrs['class'] | ||||||
|  |     except KeyError: | ||||||
|  |         return 0 | ||||||
|  |     for class_name, indent in INDENT_MAP.items(): | ||||||
|  |         if class_name in classes: | ||||||
|  |             return indent | ||||||
|  |     return 0 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def extract_end_dates(soup: BeautifulSoup) -> HeaderData: | ||||||
|     """ |     """ | ||||||
|     Finds and returns the end dates of the reporting periods as strings (either years or quarters) from the page of a |     Finds and returns the end dates of the reporting periods as strings (either years or quarters) from the page of a | ||||||
|     financial statement. |     financial statement. | ||||||
|     """ |     """ | ||||||
|     ths = soup.find('div', attrs={'class': 'financials'}).thead.find_all('th') |     tr = soup.find('div', attrs={'class': 'financials'}).thead.tr | ||||||
|     return tuple(str(th.string).strip() for th in ths[1:-1]) |     ths = tr.find_all('th') | ||||||
|  |     return (get_row_indent(tr), ) + tuple(str(th.string).strip() for th in ths[1:-1]) | ||||||
|  |  | ||||||
|  |  | ||||||
| def is_relevant_table_row(tr: Tag) -> bool: | def get_all_table_rows(soup: BeautifulSoup) -> List[Tag]: | ||||||
|     """ |  | ||||||
|     Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown. |  | ||||||
|     """ |  | ||||||
|     item_name = str(tr.td.div.string).strip() |  | ||||||
|     try: |  | ||||||
|         return FIN_STMT_ITEMS[item_name] |  | ||||||
|     except KeyError: |  | ||||||
|         log.warning(f"Unknown item name '{item_name}' found in financial statement.") |  | ||||||
|         raise UnknownFinancialStatementItem |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def find_relevant_table_rows(soup: BeautifulSoup) -> List[Tag]: |  | ||||||
|     """ |     """ | ||||||
|     Returns the table rows containing the data of interest. |     Returns the table rows containing the data of interest. | ||||||
|     """ |     """ | ||||||
|     now = datetime.utcnow() |     return soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr') | ||||||
|     trs = [] |  | ||||||
|     for tr in soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr'): |  | ||||||
|         try: |  | ||||||
|             if is_relevant_table_row(tr): |  | ||||||
|                 trs.append(tr) |  | ||||||
|         except UnknownFinancialStatementItem: |  | ||||||
|             if DEV_MODE: |  | ||||||
|                 with open(f'mwfin_unknown_items_{now.strftime("%Y-%m-%d_%H-%M-%S")}.html', 'w') as f: |  | ||||||
|                     f.write(str(soup)) |  | ||||||
|     return trs |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]: | def extract_row_data(tr: Tag) -> Tuple[str, RowData]: | ||||||
|     """ |     """ | ||||||
|     Returns the name of the item displayed in the table row (of a financial statement) |     Returns the name of the item displayed in the table row (of a financial statement) | ||||||
|     as well as a number for each reporting period. |     as well as a number for each reporting period. | ||||||
| @@ -78,8 +68,8 @@ def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]: | |||||||
|     item_name = str(tr.td.div.string).strip() |     item_name = str(tr.td.div.string).strip() | ||||||
|     data_div = tr.find_all('td')[-1].div.div |     data_div = tr.find_all('td')[-1].div.div | ||||||
|     values_str: str = data_div.attrs['data-chart-data'] |     values_str: str = data_div.attrs['data-chart-data'] | ||||||
|     values = tuple(int(float(s if s != '' else 0)) for s in values_str.split(',')) |     values = tuple(float(s if s != '' else 0) for s in values_str.split(',')) | ||||||
|     return item_name, values |     return item_name, (get_row_indent(tr), ) + values | ||||||
|  |  | ||||||
|  |  | ||||||
| def extract_all_data(soup: BeautifulSoup) -> ResultDict: | def extract_all_data(soup: BeautifulSoup) -> ResultDict: | ||||||
| @@ -87,7 +77,7 @@ def extract_all_data(soup: BeautifulSoup) -> ResultDict: | |||||||
|     Extracts financials from the page. |     Extracts financials from the page. | ||||||
|     """ |     """ | ||||||
|     output = {END_DATE: extract_end_dates(soup)} |     output = {END_DATE: extract_end_dates(soup)} | ||||||
|     for row in find_relevant_table_rows(soup): |     for row in get_all_table_rows(soup): | ||||||
|         row_data = extract_row_data(row) |         row_data = extract_row_data(row) | ||||||
|         output[row_data[0]] = row_data[1] |         output[row_data[0]] = row_data[1] | ||||||
|     return output |     return output | ||||||
|   | |||||||
| @@ -7,7 +7,6 @@ from bs4 import BeautifulSoup | |||||||
|  |  | ||||||
| from mwfin import functions | from mwfin import functions | ||||||
| from mwfin.constants import HTML_PARSER, BASE_URL, FIN_STMT_URL_SUFFIX, IS, BS, CF, END_DATE | from mwfin.constants import HTML_PARSER, BASE_URL, FIN_STMT_URL_SUFFIX, IS, BS, CF, END_DATE | ||||||
| from mwfin.exceptions import UnknownFinancialStatementItem |  | ||||||
|  |  | ||||||
|  |  | ||||||
| THIS_DIR = Path(__file__).parent | THIS_DIR = Path(__file__).parent | ||||||
| @@ -52,59 +51,45 @@ class FunctionsTestCase(IsolatedAsyncioTestCase): | |||||||
|         output = await functions.soup_from_url('baz', mock_session_obj) |         output = await functions.soup_from_url('baz', mock_session_obj) | ||||||
|         self.assertEqual(expected_output, output) |         self.assertEqual(expected_output, output) | ||||||
|  |  | ||||||
|     def test_extract_end_dates(self): |     def test_get_row_indent(self): | ||||||
|         expected_output = ('End_Date_1', 'End_Date_2') |         trs = self.test_soup.find_all('tr') | ||||||
|  |         expected_output = 0 | ||||||
|  |         output = functions.get_row_indent(trs[0]) | ||||||
|  |         self.assertEqual(expected_output, output) | ||||||
|  |         for i, tr in enumerate(trs[1:], start=1): | ||||||
|  |             output = functions.get_row_indent(tr) | ||||||
|  |             self.assertEqual(i, output) | ||||||
|  |  | ||||||
|  |     @patch.object(functions, 'get_row_indent') | ||||||
|  |     def test_extract_end_dates(self, mock_get_row_indent): | ||||||
|  |         mock_get_row_indent.return_value = 0 | ||||||
|  |         expected_output = (0, 'End_Date_1', 'End_Date_2') | ||||||
|         output = functions.extract_end_dates(self.test_soup) |         output = functions.extract_end_dates(self.test_soup) | ||||||
|         self.assertTupleEqual(expected_output, output) |         self.assertTupleEqual(expected_output, output) | ||||||
|  |         mock_get_row_indent.assert_called_once_with(self.test_soup.tr) | ||||||
|  |  | ||||||
|     def test_is_relevant_table_row(self): |     def test_get_all_table_rows(self): | ||||||
|         test_soup = BeautifulSoup('<tr><td><div> Cash & Short Term Investments </div></td></tr>', HTML_PARSER) |  | ||||||
|         self.assertTrue(functions.is_relevant_table_row(test_soup.tr)) |  | ||||||
|         test_soup = BeautifulSoup('<tr><td><div> Cash & Short Term Investments Growth </div></td></tr>', HTML_PARSER) |  | ||||||
|         self.assertFalse(functions.is_relevant_table_row(test_soup.tr)) |  | ||||||
|         test_soup = BeautifulSoup('<tr><td><div> baz </div></td></tr>', HTML_PARSER) |  | ||||||
|         with self.assertRaises(UnknownFinancialStatementItem): |  | ||||||
|             functions.is_relevant_table_row(test_soup.tr) |  | ||||||
|  |  | ||||||
|     @patch.object(functions, 'open') |  | ||||||
|     @patch.object(functions, 'is_relevant_table_row') |  | ||||||
|     def test_find_relevant_table_rows(self, mock_is_relevant_table_row, mock_open): |  | ||||||
|         mock_is_relevant_table_row.return_value = True |  | ||||||
|         expected_output = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr') |         expected_output = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr') | ||||||
|         tr0, tr1 = expected_output |         output = functions.get_all_table_rows(self.test_soup) | ||||||
|         output = functions.find_relevant_table_rows(self.test_soup) |         self.assertSequenceEqual(expected_output, output) | ||||||
|         self.assertListEqual(expected_output, output) |  | ||||||
|         mock_is_relevant_table_row.assert_has_calls([call(tr0), call(tr1)]) |  | ||||||
|         mock_is_relevant_table_row.reset_mock() |  | ||||||
|  |  | ||||||
|         mock_is_relevant_table_row.side_effect = UnknownFinancialStatementItem() |     @patch.object(functions, 'get_row_indent') | ||||||
|         expected_output = self.test_soup.find_all('thistagdoesntexist') |     def test_extract_row_data(self, mock_get_row_indent): | ||||||
|         output = functions.find_relevant_table_rows(self.test_soup) |         mock_get_row_indent.return_value = 1 | ||||||
|         self.assertListEqual(expected_output, output) |  | ||||||
|         mock_is_relevant_table_row.assert_has_calls([call(tr0), call(tr1)]) |  | ||||||
|         mock_is_relevant_table_row.reset_mock() |  | ||||||
|  |  | ||||||
|         mock_write = mock_open.return_value.__enter__.return_value.write |  | ||||||
|         with patch.object(functions, 'DEV_MODE', new=True): |  | ||||||
|             output = functions.find_relevant_table_rows(self.test_soup) |  | ||||||
|         self.assertListEqual(expected_output, output) |  | ||||||
|         mock_is_relevant_table_row.assert_has_calls([call(tr0), call(tr1)]) |  | ||||||
|         mock_write.assert_has_calls([call(str(self.test_soup)), call(str(self.test_soup))]) |  | ||||||
|  |  | ||||||
|     def test_extract_row_data(self): |  | ||||||
|         test_row = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.tr |         test_row = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.tr | ||||||
|         expected_output = ('Cash & Short Term Investments', (11000000, -22000000)) |         expected_output = ('foo', (1, 1., -2.)) | ||||||
|         output = functions.extract_row_data(test_row) |         output = functions.extract_row_data(test_row) | ||||||
|         self.assertTupleEqual(expected_output, output) |         self.assertTupleEqual(expected_output, output) | ||||||
|  |         mock_get_row_indent.assert_called_once_with(test_row) | ||||||
|  |  | ||||||
|     @patch.object(functions, 'extract_row_data') |     @patch.object(functions, 'extract_row_data') | ||||||
|     @patch.object(functions, 'find_relevant_table_rows') |     @patch.object(functions, 'get_all_table_rows') | ||||||
|     @patch.object(functions, 'extract_end_dates') |     @patch.object(functions, 'extract_end_dates') | ||||||
|     def test_extract_all_data(self, mock_extract_end_dates, mock_find_relevant_table_rows, mock_extract_row_data): |     def test_extract_all_data(self, mock_extract_end_dates, mock_get_all_table_rows, mock_extract_row_data): | ||||||
|         test_end_dates = ('foo', 'bar') |         test_end_dates = ('foo', 'bar') | ||||||
|         mock_extract_end_dates.return_value = test_end_dates |         mock_extract_end_dates.return_value = test_end_dates | ||||||
|         test_relevant_rows = ['tr1', 'tr2'] |         test_relevant_rows = ['tr1', 'tr2'] | ||||||
|         mock_find_relevant_table_rows.return_value = test_relevant_rows |         mock_get_all_table_rows.return_value = test_relevant_rows | ||||||
|         test_row_data = ('item_name', (123, 456)) |         test_row_data = ('item_name', (123, 456)) | ||||||
|         mock_extract_row_data.return_value = test_row_data |         mock_extract_row_data.return_value = test_row_data | ||||||
|         expected_output = { |         expected_output = { | ||||||
| @@ -115,7 +100,7 @@ class FunctionsTestCase(IsolatedAsyncioTestCase): | |||||||
|         output = functions.extract_all_data(self.test_soup) |         output = functions.extract_all_data(self.test_soup) | ||||||
|         self.assertDictEqual(expected_output, output) |         self.assertDictEqual(expected_output, output) | ||||||
|         mock_extract_end_dates.assert_called_once_with(self.test_soup) |         mock_extract_end_dates.assert_called_once_with(self.test_soup) | ||||||
|         mock_find_relevant_table_rows.assert_called_once_with(self.test_soup) |         mock_get_all_table_rows.assert_called_once_with(self.test_soup) | ||||||
|         mock_extract_row_data.assert_has_calls([call(test_relevant_rows[0]), call(test_relevant_rows[1])]) |         mock_extract_row_data.assert_has_calls([call(test_relevant_rows[0]), call(test_relevant_rows[1])]) | ||||||
|  |  | ||||||
|     @patch.object(functions, 'extract_all_data') |     @patch.object(functions, 'extract_all_data') | ||||||
| @@ -227,9 +212,24 @@ class FunctionsTestCase(IsolatedAsyncioTestCase): | |||||||
|         symbol = 'foo' |         symbol = 'foo' | ||||||
|         # Since the web request is mocked we always receive the same HTML markup. |         # Since the web request is mocked we always receive the same HTML markup. | ||||||
|         expected_output = { |         expected_output = { | ||||||
|             BS: {END_DATE: ('End_Date_1', 'End_Date_2'), 'Cash & Short Term Investments': (11000000, -22000000)}, |             BS: { | ||||||
|             IS: {END_DATE: ('End_Date_1', 'End_Date_2'), 'Cash & Short Term Investments': (11000000, -22000000)}, |                 END_DATE: (0, 'End_Date_1', 'End_Date_2'), | ||||||
|             CF: {END_DATE: ('End_Date_1', 'End_Date_2'), 'Cash & Short Term Investments': (11000000, -22000000)} |                 'foo': (1, 1., -2.), | ||||||
|  |                 'bar': (2, 2., -3.), | ||||||
|  |                 'baz': (3, 3., -4.) | ||||||
|  |             }, | ||||||
|  |             IS: { | ||||||
|  |                 END_DATE: (0, 'End_Date_1', 'End_Date_2'), | ||||||
|  |                 'foo': (1, 1., -2.), | ||||||
|  |                 'bar': (2, 2., -3.), | ||||||
|  |                 'baz': (3, 3., -4.) | ||||||
|  |             }, | ||||||
|  |             CF: { | ||||||
|  |                 END_DATE: (0, 'End_Date_1', 'End_Date_2'), | ||||||
|  |                 'foo': (1, 1., -2.), | ||||||
|  |                 'bar': (2, 2., -3.), | ||||||
|  |                 'baz': (3, 3., -4.) | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|         output = await functions.get_all_financials(symbol, session=mock_session_obj) |         output = await functions.get_all_financials(symbol, session=mock_session_obj) | ||||||
|         self.assertDictEqual(expected_output, output) |         self.assertDictEqual(expected_output, output) | ||||||
|   | |||||||
| @@ -15,7 +15,7 @@ | |||||||
|                 <table> |                 <table> | ||||||
|                     <thead> |                     <thead> | ||||||
|                         <tr> |                         <tr> | ||||||
|                             <th><div> !!Item </div><div> !!Item </div></th> |                             <th><div class="xyz abc"> !!Item </div><div> !!Item </div></th> | ||||||
|                             <th><div> End_Date_1 </div></th> |                             <th><div> End_Date_1 </div></th> | ||||||
|                             <th><div> End_Date_2 </div></th> |                             <th><div> End_Date_2 </div></th> | ||||||
|                             <th></th> |                             <th></th> | ||||||
| @@ -23,16 +23,22 @@ | |||||||
|                     </thead> |                     </thead> | ||||||
|                     <tbody> |                     <tbody> | ||||||
|                         <tr> |                         <tr> | ||||||
|                             <td><div> Cash & Short Term Investments </div><div> Cash & Short Term Investments </div></td> |                             <td><div class="xyz indent--small"> foo </div><div> foo </div></td> | ||||||
|                             <td></td> |                             <td></td> | ||||||
|                             <td></td> |                             <td></td> | ||||||
|                             <td><div> <div data-chart-data="11000000.0,-22000000.0"><div></div></td> |                             <td><div> <div data-chart-data="1.0,-2.0"><div></div></td> | ||||||
|                         </tr> |                         </tr> | ||||||
|                         <tr> |                         <tr> | ||||||
|                             <td><div> Cash & Short Term Investments Growth </div><div> Cash & Short Term Investments Growth </div></td> |                             <td><div class="xyz indent--medium"> bar </div><div> bar </div></td> | ||||||
|                             <td></td> |                             <td></td> | ||||||
|                             <td></td> |                             <td></td> | ||||||
|  |                             <td><div> <div data-chart-data="2.0,-3.0"><div></div></td> | ||||||
|  |                         </tr> | ||||||
|  |                         <tr> | ||||||
|  |                             <td><div class="xyz indent--large"> baz </div><div> baz </div></td> | ||||||
|                             <td></td> |                             <td></td> | ||||||
|  |                             <td></td> | ||||||
|  |                             <td><div> <div data-chart-data="3.0,-4.0"><div></div></td> | ||||||
|                         </tr> |                         </tr> | ||||||
|                     </tbody> |                     </tbody> | ||||||
|                 </table> |                 </table> | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user