minor refactoring & adjustments
This commit is contained in:
parent
38dd29f35b
commit
c93fb9692e
@ -1,4 +1,3 @@
|
||||
DEV_MODE = False
|
||||
MAIN_LOGGER_NAME = 'mwfin'
|
||||
|
||||
HTML_PARSER = 'html.parser'
|
||||
@ -13,213 +12,3 @@ FIN_STMT_URL_SUFFIX = {
|
||||
CF: '/cash-flow'
|
||||
}
|
||||
END_DATE = 'End Date'
|
||||
|
||||
# All items marked `False` do not need to be scraped
|
||||
# because they are calculated from other items (e.g. growth or ratios).
|
||||
FIN_STMT_ITEMS = {
|
||||
#################
|
||||
# Balance Sheet #
|
||||
#################
|
||||
"Cash & Short Term Investments": True,
|
||||
"Cash & Short Term Investments Growth": False,
|
||||
"Cash Only": True,
|
||||
"Short-Term Investments": True,
|
||||
"Cash & ST Investments / Total Assets": False,
|
||||
"Total Accounts Receivable": True,
|
||||
"Total Accounts Receivable Growth": False,
|
||||
"Accounts Receivables, Net": True,
|
||||
"Accounts Receivables, Gross": True,
|
||||
"Bad Debt/Doubtful Accounts": True,
|
||||
"Other Receivable": True,
|
||||
"Accounts Receivable Turnover": False,
|
||||
"Inventories": True,
|
||||
"Finished Goods": True,
|
||||
"Work in Progress": True,
|
||||
"Raw Materials": True,
|
||||
"Progress Payments & Other": True,
|
||||
"Other Current Assets": True,
|
||||
"Miscellaneous Current Assets": True,
|
||||
"Total Current Assets": True,
|
||||
"Net Property, Plant & Equipment": True,
|
||||
"Property, Plant & Equipment - Gross": True,
|
||||
"Buildings": True,
|
||||
"Land & Improvements": True,
|
||||
"Computer Software and Equipment": True,
|
||||
"Other Property, Plant & Equipment": True,
|
||||
"Accumulated Depreciation": True,
|
||||
"Total Investments and Advances": True,
|
||||
"Other Long-Term Investments": True,
|
||||
"Long-Term Note Receivables": True,
|
||||
"Intangible Assets": True,
|
||||
"Net Goodwill": True,
|
||||
"Net Other Intangibles": True,
|
||||
"Other Assets": True,
|
||||
"Total Assets": True,
|
||||
"Total Assets Growth": False,
|
||||
"ST Debt & Current Portion LT Debt": True,
|
||||
"Short Term Debt": True,
|
||||
"Current Portion of Long Term Debt": True,
|
||||
"Accounts Payable": True,
|
||||
"Accounts Payable Growth": False,
|
||||
"Income Tax Payable": True,
|
||||
"Other Current Liabilities": True,
|
||||
"Dividends Payable": True,
|
||||
"Accrued Payroll": True,
|
||||
"Miscellaneous Current Liabilities": True,
|
||||
"Total Current Liabilities": True,
|
||||
"Long-Term Debt": True,
|
||||
"Long-Term Debt excl. Capitalized Leases": True,
|
||||
"Non-Convertible Debt": True,
|
||||
"Convertible Debt": True,
|
||||
"Capitalized Lease Obligations": True,
|
||||
"Provision for Risks & Charges": True,
|
||||
"Deferred Taxes": True,
|
||||
"Deferred Taxes - Credits": True,
|
||||
"Deferred Taxes - Debit": True,
|
||||
"Other Liabilities": True,
|
||||
"Other Liabilities (excl. Deferred Income)": True,
|
||||
"Deferred Income": True,
|
||||
"Total Liabilities": True,
|
||||
"Non-Equity Reserves": True,
|
||||
"Total Liabilities / Total Assets": False,
|
||||
"Preferred Stock (Carrying Value)": True,
|
||||
"Redeemable Preferred Stock": True,
|
||||
"Non-Redeemable Preferred Stock": True,
|
||||
"Common Equity (Total)": True,
|
||||
"Common Equity / Total Assets": False,
|
||||
"Common Stock Par/Carry Value": True,
|
||||
"Retained Earnings": True,
|
||||
"ESOP Debt Guarantee": True,
|
||||
"Cumulative Translation Adjustment/Unrealized For. Exch. Gain": True,
|
||||
"Unrealized Gain/Loss Marketable Securities": True,
|
||||
"Revaluation Reserves": True,
|
||||
"Treasury Stock": True,
|
||||
"Total Shareholders' Equity": True,
|
||||
"Total Shareholders' Equity / Total Assets": False,
|
||||
"Accumulated Minority Interest": True,
|
||||
"Total Equity": True,
|
||||
"Liabilities & Shareholders' Equity": True,
|
||||
|
||||
####################
|
||||
# Income Statement #
|
||||
####################
|
||||
"Sales/Revenue": True,
|
||||
"Sales Growth": False,
|
||||
"Cost of Goods Sold (COGS) incl. D&A": True,
|
||||
"COGS Growth": False,
|
||||
"COGS excluding D&A": True,
|
||||
"Depreciation & Amortization Expense": True,
|
||||
"Depreciation": True,
|
||||
"Amortization of Intangibles": True,
|
||||
"Gross Income": True,
|
||||
"Gross Income Growth": False,
|
||||
"Gross Profit Margin": False,
|
||||
"SG&A Expense": True,
|
||||
"SGA Growth": False,
|
||||
"Research & Development": True,
|
||||
"Other SG&A": True,
|
||||
"Other Operating Expense": True,
|
||||
"Unusual Expense": True,
|
||||
"EBIT after Unusual Expense": True,
|
||||
"Non Operating Income/Expense": True,
|
||||
"Non-Operating Interest Income": True,
|
||||
"Equity in Affiliates (Pretax)": True,
|
||||
"Interest Expense": True,
|
||||
"Interest Expense Growth": False,
|
||||
"Gross Interest Expense": True,
|
||||
"Interest Capitalized": True,
|
||||
"Pretax Income": True,
|
||||
"Pretax Income Growth": False,
|
||||
"Pretax Margin": False,
|
||||
"Income Tax": True,
|
||||
"Income Tax - Current Domestic": True,
|
||||
"Income Tax - Current Foreign": True,
|
||||
"Income Tax - Deferred Domestic": True,
|
||||
"Income Tax - Deferred Foreign": True,
|
||||
"Income Tax Credits": True,
|
||||
"Equity in Affiliates": True,
|
||||
"Other After Tax Income (Expense)": True,
|
||||
"Consolidated Net Income": True,
|
||||
"Minority Interest Expense": True,
|
||||
"Net Income": True,
|
||||
"Net Income Growth": False,
|
||||
"Net Margin Growth": False,
|
||||
"Extraordinaries & Discontinued Operations": True,
|
||||
"Extra Items & Gain/Loss Sale Of Assets": True,
|
||||
"Cumulative Effect - Accounting Chg": True,
|
||||
"Discontinued Operations": True,
|
||||
"Net Income After Extraordinaries": True,
|
||||
"Preferred Dividends": True,
|
||||
"Net Income Available to Common": True,
|
||||
"EPS (Basic)": True,
|
||||
"EPS (Basic) Growth": False,
|
||||
"Basic Shares Outstanding": True,
|
||||
"EPS (Diluted)": True,
|
||||
"EPS (Diluted) Growth": False,
|
||||
"Diluted Shares Outstanding": True,
|
||||
"EBITDA": True,
|
||||
"EBITDA Growth": False,
|
||||
"EBITDA Margin": False,
|
||||
|
||||
#######################
|
||||
# Cash Flow Statement #
|
||||
#######################
|
||||
"Net Income before Extraordinaries": True,
|
||||
# "Net Income Growth": False,
|
||||
"Depreciation, Depletion & Amortization": True,
|
||||
"Depreciation and Depletion": True,
|
||||
"Amortization of Intangible Assets": True,
|
||||
"Deferred Taxes & Investment Tax Credit": True,
|
||||
# "Deferred Taxes": True,
|
||||
"Investment Tax Credit": True,
|
||||
"Other Funds": True,
|
||||
"Funds from Operations": True,
|
||||
"Extraordinaries": True,
|
||||
"Changes in Working Capital": True,
|
||||
"Receivables": True,
|
||||
# "Accounts Payable": True,
|
||||
"Other Assets/Liabilities": True,
|
||||
"Net Operating Cash Flow": True,
|
||||
"Net Operating Cash Flow Growth": False,
|
||||
"Net Operating Cash Flow / Sales": False,
|
||||
"Capital Expenditures": True,
|
||||
"Capital Expenditures Growth": False,
|
||||
"Capital Expenditures / Sales": False,
|
||||
"Capital Expenditures (Fixed Assets)": True,
|
||||
"Capital Expenditures (Other Assets)": True,
|
||||
"Net Assets from Acquisitions": True,
|
||||
"Sale of Fixed Assets & Businesses": True,
|
||||
"Purchase/Sale of Investments": True,
|
||||
"Purchase of Investments": True,
|
||||
"Sale/Maturity of Investments": True,
|
||||
"Other Uses": True,
|
||||
"Other Sources": True,
|
||||
"Net Investing Cash Flow": True,
|
||||
"Net Investing Cash Flow Growth": False,
|
||||
"Net Investing Cash Flow / Sales": False,
|
||||
"Cash Dividends Paid - Total": True,
|
||||
"Common Dividends": True,
|
||||
# "Preferred Dividends": True,
|
||||
"Change in Capital Stock": True,
|
||||
"Repurchase of Common & Preferred Stk.": True,
|
||||
"Sale of Common & Preferred Stock": True,
|
||||
"Proceeds from Stock Options": True,
|
||||
"Other Proceeds from Sale of Stock": True,
|
||||
"Issuance/Reduction of Debt, Net": True,
|
||||
"Change in Current Debt": True,
|
||||
"Change in Long-Term Debt": True,
|
||||
"Issuance of Long-Term Debt": True,
|
||||
"Reduction in Long-Term Debt": True,
|
||||
# "Other Funds": True,
|
||||
# "Other Uses": True,
|
||||
# "Other Sources": True,
|
||||
"Net Financing Cash Flow": True,
|
||||
"Net Financing Cash Flow Growth": False,
|
||||
"Net Financing Cash Flow / Sales": False,
|
||||
"Exchange Rate Effect": True,
|
||||
"Miscellaneous Funds": True,
|
||||
"Net Change in Cash": True,
|
||||
"Free Cash Flow": True,
|
||||
"Free Cash Flow Growth": False,
|
||||
"Free Cash Flow Yield": False,
|
||||
}
|
||||
|
@ -4,7 +4,3 @@ class WrongAssumptions(Exception):
|
||||
|
||||
class UnexpectedMarkup(WrongAssumptions):
|
||||
pass
|
||||
|
||||
|
||||
class UnknownFinancialStatementItem(WrongAssumptions):
|
||||
pass
|
||||
|
@ -1,16 +1,13 @@
|
||||
import logging
|
||||
import asyncio
|
||||
from typing import Union, List, Dict
|
||||
from datetime import datetime
|
||||
|
||||
from aiohttp.client import ClientSession
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import Tag
|
||||
from webutils import in_async_session, gather_in_batches
|
||||
|
||||
from .constants import (DEV_MODE, HTML_PARSER, BASE_URL, END_DATE, BS, IS, CF, FIN_STMT_URL_SUFFIX, FIN_STMT_ITEMS,
|
||||
DEFAULT_CONCURRENT_BATCH_SIZE)
|
||||
from .exceptions import UnknownFinancialStatementItem
|
||||
from .constants import HTML_PARSER, BASE_URL, END_DATE, BS, IS, CF, FIN_STMT_URL_SUFFIX, DEFAULT_CONCURRENT_BATCH_SIZE
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@ -19,7 +16,7 @@ log = logging.getLogger(__name__)
|
||||
# while its values will always be tuples with a length corresponding to the number of periods (columns)
|
||||
# and elements being the actual numbers, with the exception of the first key-value-pair, which will represent
|
||||
# the end dates of the reporting periods as strings (either years or quarters).
|
||||
ResultDict = dict[str, Union[tuple[int], tuple[str]]]
|
||||
ResultDict = dict[str, Union[tuple[float], tuple[str]]]
|
||||
|
||||
|
||||
@in_async_session
|
||||
@ -41,36 +38,14 @@ def extract_end_dates(soup: BeautifulSoup) -> tuple[str]:
|
||||
return tuple(str(th.string).strip() for th in ths[1:-1])
|
||||
|
||||
|
||||
def is_relevant_table_row(tr: Tag) -> bool:
|
||||
"""
|
||||
Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown.
|
||||
"""
|
||||
item_name = str(tr.td.div.string).strip()
|
||||
try:
|
||||
return FIN_STMT_ITEMS[item_name]
|
||||
except KeyError:
|
||||
log.warning(f"Unknown item name '{item_name}' found in financial statement.")
|
||||
raise UnknownFinancialStatementItem
|
||||
|
||||
|
||||
def find_relevant_table_rows(soup: BeautifulSoup) -> List[Tag]:
|
||||
def get_all_table_rows(soup: BeautifulSoup) -> List[Tag]:
|
||||
"""
|
||||
Returns the table rows containing the data of interest.
|
||||
"""
|
||||
now = datetime.utcnow()
|
||||
trs = []
|
||||
for tr in soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr'):
|
||||
try:
|
||||
if is_relevant_table_row(tr):
|
||||
trs.append(tr)
|
||||
except UnknownFinancialStatementItem:
|
||||
if DEV_MODE:
|
||||
with open(f'mwfin_unknown_items_{now.strftime("%Y-%m-%d_%H-%M-%S")}.html', 'w') as f:
|
||||
f.write(str(soup))
|
||||
return trs
|
||||
return soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr')
|
||||
|
||||
|
||||
def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]:
|
||||
def extract_row_data(tr: Tag) -> tuple[str, tuple[float]]:
|
||||
"""
|
||||
Returns the name of the item displayed in the table row (of a financial statement)
|
||||
as well as a number for each reporting period.
|
||||
@ -78,7 +53,7 @@ def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]:
|
||||
item_name = str(tr.td.div.string).strip()
|
||||
data_div = tr.find_all('td')[-1].div.div
|
||||
values_str: str = data_div.attrs['data-chart-data']
|
||||
values = tuple(int(float(s if s != '' else 0)) for s in values_str.split(','))
|
||||
values = tuple(float(s if s != '' else 0) for s in values_str.split(','))
|
||||
return item_name, values
|
||||
|
||||
|
||||
@ -87,7 +62,7 @@ def extract_all_data(soup: BeautifulSoup) -> ResultDict:
|
||||
Extracts financials from the page.
|
||||
"""
|
||||
output = {END_DATE: extract_end_dates(soup)}
|
||||
for row in find_relevant_table_rows(soup):
|
||||
for row in get_all_table_rows(soup):
|
||||
row_data = extract_row_data(row)
|
||||
output[row_data[0]] = row_data[1]
|
||||
return output
|
||||
|
@ -7,7 +7,6 @@ from bs4 import BeautifulSoup
|
||||
|
||||
from mwfin import functions
|
||||
from mwfin.constants import HTML_PARSER, BASE_URL, FIN_STMT_URL_SUFFIX, IS, BS, CF, END_DATE
|
||||
from mwfin.exceptions import UnknownFinancialStatementItem
|
||||
|
||||
|
||||
THIS_DIR = Path(__file__).parent
|
||||
@ -64,18 +63,18 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
||||
|
||||
def test_extract_row_data(self):
|
||||
test_row = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.tr
|
||||
expected_output = ('Cash & Short Term Investments', (11000000, -22000000))
|
||||
expected_output = ('foo', (1., -2.))
|
||||
output = functions.extract_row_data(test_row)
|
||||
self.assertTupleEqual(expected_output, output)
|
||||
|
||||
@patch.object(functions, 'extract_row_data')
|
||||
@patch.object(functions, 'find_relevant_table_rows')
|
||||
@patch.object(functions, 'get_all_table_rows')
|
||||
@patch.object(functions, 'extract_end_dates')
|
||||
def test_extract_all_data(self, mock_extract_end_dates, mock_find_relevant_table_rows, mock_extract_row_data):
|
||||
def test_extract_all_data(self, mock_extract_end_dates, mock_get_all_table_rows, mock_extract_row_data):
|
||||
test_end_dates = ('foo', 'bar')
|
||||
mock_extract_end_dates.return_value = test_end_dates
|
||||
test_relevant_rows = ['tr1', 'tr2']
|
||||
mock_find_relevant_table_rows.return_value = test_relevant_rows
|
||||
mock_get_all_table_rows.return_value = test_relevant_rows
|
||||
test_row_data = ('item_name', (123, 456))
|
||||
mock_extract_row_data.return_value = test_row_data
|
||||
expected_output = {
|
||||
@ -86,7 +85,7 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
||||
output = functions.extract_all_data(self.test_soup)
|
||||
self.assertDictEqual(expected_output, output)
|
||||
mock_extract_end_dates.assert_called_once_with(self.test_soup)
|
||||
mock_find_relevant_table_rows.assert_called_once_with(self.test_soup)
|
||||
mock_get_all_table_rows.assert_called_once_with(self.test_soup)
|
||||
mock_extract_row_data.assert_has_calls([call(test_relevant_rows[0]), call(test_relevant_rows[1])])
|
||||
|
||||
@patch.object(functions, 'extract_all_data')
|
||||
@ -198,9 +197,9 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
||||
symbol = 'foo'
|
||||
# Since the web request is mocked we always receive the same HTML markup.
|
||||
expected_output = {
|
||||
BS: {END_DATE: ('End_Date_1', 'End_Date_2'), 'Cash & Short Term Investments': (11000000, -22000000)},
|
||||
IS: {END_DATE: ('End_Date_1', 'End_Date_2'), 'Cash & Short Term Investments': (11000000, -22000000)},
|
||||
CF: {END_DATE: ('End_Date_1', 'End_Date_2'), 'Cash & Short Term Investments': (11000000, -22000000)}
|
||||
BS: {END_DATE: ('End_Date_1', 'End_Date_2'), 'foo': (1., -2.), 'bar': (2., -3.)},
|
||||
IS: {END_DATE: ('End_Date_1', 'End_Date_2'), 'foo': (1., -2.), 'bar': (2., -3.)},
|
||||
CF: {END_DATE: ('End_Date_1', 'End_Date_2'), 'foo': (1., -2.), 'bar': (2., -3.)}
|
||||
}
|
||||
output = await functions.get_all_financials(symbol, session=mock_session_obj)
|
||||
self.assertDictEqual(expected_output, output)
|
||||
|
@ -23,16 +23,16 @@
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><div> Cash & Short Term Investments </div><div> Cash & Short Term Investments </div></td>
|
||||
<td><div> foo </div><div> foo </div></td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
<td><div> <div data-chart-data="11000000.0,-22000000.0"><div></div></td>
|
||||
<td><div> <div data-chart-data="1.0,-2.0"><div></div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><div> Cash & Short Term Investments Growth </div><div> Cash & Short Term Investments Growth </div></td>
|
||||
<td></td>
|
||||
<td><div> bar </div><div> bar </div></td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
<td><div> <div data-chart-data="2.0,-3.0"><div></div></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
Loading…
Reference in New Issue
Block a user