Compare commits
9 Commits
462e34013e
...
master
Author | SHA1 | Date | |
---|---|---|---|
911fa15f46 | |||
f29f22a52e | |||
a2f3f590f6 | |||
3276dbfae3 | |||
2b3fb9f7ba | |||
8afab2fde8 | |||
f7cb5de928 | |||
c93fb9692e | |||
38dd29f35b |
@ -8,3 +8,5 @@ omit =
|
|||||||
fail_under = 100
|
fail_under = 100
|
||||||
show_missing = True
|
show_missing = True
|
||||||
skip_covered = True
|
skip_covered = True
|
||||||
|
exclude_lines =
|
||||||
|
if __name__ == .__main__.:
|
||||||
|
@ -1,19 +1,15 @@
|
|||||||
import logging
|
import logging
|
||||||
import asyncio
|
import asyncio
|
||||||
import csv
|
|
||||||
import json
|
import json
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict
|
|
||||||
|
|
||||||
from .functions import get_all_financials, ResultDict
|
from .functions import get_all_financials
|
||||||
from .constants import END_DATE, MAIN_LOGGER_NAME, DEFAULT_CONCURRENT_BATCH_SIZE
|
from .constants import MAIN_LOGGER_NAME, DEFAULT_CONCURRENT_BATCH_SIZE
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(MAIN_LOGGER_NAME)
|
log = logging.getLogger(MAIN_LOGGER_NAME)
|
||||||
|
|
||||||
JSON_EXT, CSV_EXT = '.json', '.csv'
|
|
||||||
|
|
||||||
TICKER_SYMBOL = 'ticker_symbol'
|
TICKER_SYMBOL = 'ticker_symbol'
|
||||||
QUARTERLY = 'quarterly'
|
QUARTERLY = 'quarterly'
|
||||||
BATCH_SIZE = 'concurrent_batch_size'
|
BATCH_SIZE = 'concurrent_batch_size'
|
||||||
@ -23,6 +19,7 @@ VERBOSE = 'verbose'
|
|||||||
|
|
||||||
|
|
||||||
def parse_cli() -> dict:
|
def parse_cli() -> dict:
|
||||||
|
"""Returns the parsed command line arguments for the program as a dictionary."""
|
||||||
parser = ArgumentParser(description="Scrape company financials")
|
parser = ArgumentParser(description="Scrape company financials")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
TICKER_SYMBOL,
|
TICKER_SYMBOL,
|
||||||
@ -40,14 +37,13 @@ def parse_cli() -> dict:
|
|||||||
type=int,
|
type=int,
|
||||||
default=DEFAULT_CONCURRENT_BATCH_SIZE,
|
default=DEFAULT_CONCURRENT_BATCH_SIZE,
|
||||||
help="If multiple ticker symbols are passed, the company financials can be scraped concurrently. "
|
help="If multiple ticker symbols are passed, the company financials can be scraped concurrently. "
|
||||||
"This argument determines how many datasets are scraped concurrently at any moment in time. "
|
"This argument determines how many companies are scraped concurrently. "
|
||||||
"By default, they are scraped sequentially."
|
"By default, they are scraped sequentially (i.e. a batch size of 1)."
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-f', f'--{TO_FILE.replace("_", "-")}',
|
'-f', f'--{TO_FILE.replace("_", "-")}',
|
||||||
type=Path,
|
type=Path,
|
||||||
help="Writes results to the specified destination file. If omitted results are printed to stdout. "
|
help="Writes results to the specified destination file. If omitted results are printed to stdout."
|
||||||
"Depending on the file name suffix, the output format can be either as CSV or in JSON."
|
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
f'--{JSON_INDENT.replace("_", "-")}',
|
f'--{JSON_INDENT.replace("_", "-")}',
|
||||||
@ -66,8 +62,13 @@ def parse_cli() -> dict:
|
|||||||
|
|
||||||
|
|
||||||
def configure_logging(verbosity: int) -> None:
|
def configure_logging(verbosity: int) -> None:
|
||||||
|
"""
|
||||||
|
Sets up logging by adding a stdout-handler and setting the root logger's level according to the specified verbosity.
|
||||||
|
A verbosity of 0 (or less) sets the log level to CRITICAL.
|
||||||
|
"""
|
||||||
root_logger = logging.getLogger()
|
root_logger = logging.getLogger()
|
||||||
root_logger.addHandler(logging.StreamHandler())
|
root_logger.addHandler(logging.StreamHandler())
|
||||||
|
root_logger.setLevel(logging.CRITICAL)
|
||||||
if verbosity > 2:
|
if verbosity > 2:
|
||||||
root_logger.setLevel(logging.DEBUG)
|
root_logger.setLevel(logging.DEBUG)
|
||||||
elif verbosity == 2:
|
elif verbosity == 2:
|
||||||
@ -76,16 +77,8 @@ def configure_logging(verbosity: int) -> None:
|
|||||||
root_logger.setLevel(logging.WARNING)
|
root_logger.setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
|
||||||
def write_to_csv(data: Dict[str, ResultDict], file_obj) -> None:
|
|
||||||
writer = csv.writer(file_obj)
|
|
||||||
for statement_key, statement_dict in data.items():
|
|
||||||
end_dates = statement_dict.pop(END_DATE)
|
|
||||||
writer.writerow([statement_key] + list(end_dates))
|
|
||||||
for key, values in statement_dict.items():
|
|
||||||
writer.writerow([key] + list(str(val) for val in values))
|
|
||||||
|
|
||||||
|
|
||||||
async def main() -> None:
|
async def main() -> None:
|
||||||
|
"""Parses CLI arguments, configures logging to stderr, performs the scraping, and prints/saves the data."""
|
||||||
args = parse_cli()
|
args = parse_cli()
|
||||||
configure_logging(args[VERBOSE])
|
configure_logging(args[VERBOSE])
|
||||||
data = await get_all_financials(*args[TICKER_SYMBOL], quarterly=args[QUARTERLY],
|
data = await get_all_financials(*args[TICKER_SYMBOL], quarterly=args[QUARTERLY],
|
||||||
@ -95,11 +88,6 @@ async def main() -> None:
|
|||||||
print(json.dumps(data, indent=args[JSON_INDENT]))
|
print(json.dumps(data, indent=args[JSON_INDENT]))
|
||||||
return
|
return
|
||||||
with open(path, 'w') as f:
|
with open(path, 'w') as f:
|
||||||
if path.suffix.lower() == CSV_EXT:
|
|
||||||
write_to_csv(data, f)
|
|
||||||
return
|
|
||||||
if not path.suffix.lower() == JSON_EXT:
|
|
||||||
log.warning(f"Extension '{path.suffix}' unknown; using JSON format")
|
|
||||||
json.dump(data, f, indent=args[JSON_INDENT])
|
json.dump(data, f, indent=args[JSON_INDENT])
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
DEV_MODE = False
|
|
||||||
MAIN_LOGGER_NAME = 'mwfin'
|
MAIN_LOGGER_NAME = 'mwfin'
|
||||||
|
|
||||||
HTML_PARSER = 'html.parser'
|
HTML_PARSER = 'html.parser'
|
||||||
@ -12,214 +11,9 @@ FIN_STMT_URL_SUFFIX = {
|
|||||||
IS: '',
|
IS: '',
|
||||||
CF: '/cash-flow'
|
CF: '/cash-flow'
|
||||||
}
|
}
|
||||||
END_DATE = 'End Date'
|
INDENT_MAP = {
|
||||||
|
'indent--small': 1,
|
||||||
# All items marked `False` do not need to be scraped
|
'indent--medium': 2,
|
||||||
# because they are calculated from other items (e.g. growth or ratios).
|
'indent--large': 3,
|
||||||
FIN_STMT_ITEMS = {
|
|
||||||
#################
|
|
||||||
# Balance Sheet #
|
|
||||||
#################
|
|
||||||
"Cash & Short Term Investments": True,
|
|
||||||
"Cash & Short Term Investments Growth": False,
|
|
||||||
"Cash Only": True,
|
|
||||||
"Short-Term Investments": True,
|
|
||||||
"Cash & ST Investments / Total Assets": False,
|
|
||||||
"Total Accounts Receivable": True,
|
|
||||||
"Total Accounts Receivable Growth": False,
|
|
||||||
"Accounts Receivables, Net": True,
|
|
||||||
"Accounts Receivables, Gross": True,
|
|
||||||
"Bad Debt/Doubtful Accounts": True,
|
|
||||||
"Other Receivable": True,
|
|
||||||
"Accounts Receivable Turnover": False,
|
|
||||||
"Inventories": True,
|
|
||||||
"Finished Goods": True,
|
|
||||||
"Work in Progress": True,
|
|
||||||
"Raw Materials": True,
|
|
||||||
"Progress Payments & Other": True,
|
|
||||||
"Other Current Assets": True,
|
|
||||||
"Miscellaneous Current Assets": True,
|
|
||||||
"Total Current Assets": True,
|
|
||||||
"Net Property, Plant & Equipment": True,
|
|
||||||
"Property, Plant & Equipment - Gross": True,
|
|
||||||
"Buildings": True,
|
|
||||||
"Land & Improvements": True,
|
|
||||||
"Computer Software and Equipment": True,
|
|
||||||
"Other Property, Plant & Equipment": True,
|
|
||||||
"Accumulated Depreciation": True,
|
|
||||||
"Total Investments and Advances": True,
|
|
||||||
"Other Long-Term Investments": True,
|
|
||||||
"Long-Term Note Receivables": True,
|
|
||||||
"Intangible Assets": True,
|
|
||||||
"Net Goodwill": True,
|
|
||||||
"Net Other Intangibles": True,
|
|
||||||
"Other Assets": True,
|
|
||||||
"Total Assets": True,
|
|
||||||
"Total Assets Growth": False,
|
|
||||||
"ST Debt & Current Portion LT Debt": True,
|
|
||||||
"Short Term Debt": True,
|
|
||||||
"Current Portion of Long Term Debt": True,
|
|
||||||
"Accounts Payable": True,
|
|
||||||
"Accounts Payable Growth": False,
|
|
||||||
"Income Tax Payable": True,
|
|
||||||
"Other Current Liabilities": True,
|
|
||||||
"Dividends Payable": True,
|
|
||||||
"Accrued Payroll": True,
|
|
||||||
"Miscellaneous Current Liabilities": True,
|
|
||||||
"Total Current Liabilities": True,
|
|
||||||
"Long-Term Debt": True,
|
|
||||||
"Long-Term Debt excl. Capitalized Leases": True,
|
|
||||||
"Non-Convertible Debt": True,
|
|
||||||
"Convertible Debt": True,
|
|
||||||
"Capitalized Lease Obligations": True,
|
|
||||||
"Provision for Risks & Charges": True,
|
|
||||||
"Deferred Taxes": True,
|
|
||||||
"Deferred Taxes - Credits": True,
|
|
||||||
"Deferred Taxes - Debit": True,
|
|
||||||
"Other Liabilities": True,
|
|
||||||
"Other Liabilities (excl. Deferred Income)": True,
|
|
||||||
"Deferred Income": True,
|
|
||||||
"Total Liabilities": True,
|
|
||||||
"Non-Equity Reserves": True,
|
|
||||||
"Total Liabilities / Total Assets": False,
|
|
||||||
"Preferred Stock (Carrying Value)": True,
|
|
||||||
"Redeemable Preferred Stock": True,
|
|
||||||
"Non-Redeemable Preferred Stock": True,
|
|
||||||
"Common Equity (Total)": True,
|
|
||||||
"Common Equity / Total Assets": False,
|
|
||||||
"Common Stock Par/Carry Value": True,
|
|
||||||
"Retained Earnings": True,
|
|
||||||
"ESOP Debt Guarantee": True,
|
|
||||||
"Cumulative Translation Adjustment/Unrealized For. Exch. Gain": True,
|
|
||||||
"Unrealized Gain/Loss Marketable Securities": True,
|
|
||||||
"Revaluation Reserves": True,
|
|
||||||
"Treasury Stock": True,
|
|
||||||
"Total Shareholders' Equity": True,
|
|
||||||
"Total Shareholders' Equity / Total Assets": False,
|
|
||||||
"Accumulated Minority Interest": True,
|
|
||||||
"Total Equity": True,
|
|
||||||
"Liabilities & Shareholders' Equity": True,
|
|
||||||
|
|
||||||
####################
|
|
||||||
# Income Statement #
|
|
||||||
####################
|
|
||||||
"Sales/Revenue": True,
|
|
||||||
"Sales Growth": False,
|
|
||||||
"Cost of Goods Sold (COGS) incl. D&A": True,
|
|
||||||
"COGS Growth": False,
|
|
||||||
"COGS excluding D&A": True,
|
|
||||||
"Depreciation & Amortization Expense": True,
|
|
||||||
"Depreciation": True,
|
|
||||||
"Amortization of Intangibles": True,
|
|
||||||
"Gross Income": True,
|
|
||||||
"Gross Income Growth": False,
|
|
||||||
"Gross Profit Margin": False,
|
|
||||||
"SG&A Expense": True,
|
|
||||||
"SGA Growth": False,
|
|
||||||
"Research & Development": True,
|
|
||||||
"Other SG&A": True,
|
|
||||||
"Other Operating Expense": True,
|
|
||||||
"Unusual Expense": True,
|
|
||||||
"EBIT after Unusual Expense": True,
|
|
||||||
"Non Operating Income/Expense": True,
|
|
||||||
"Non-Operating Interest Income": True,
|
|
||||||
"Equity in Affiliates (Pretax)": True,
|
|
||||||
"Interest Expense": True,
|
|
||||||
"Interest Expense Growth": False,
|
|
||||||
"Gross Interest Expense": True,
|
|
||||||
"Interest Capitalized": True,
|
|
||||||
"Pretax Income": True,
|
|
||||||
"Pretax Income Growth": False,
|
|
||||||
"Pretax Margin": False,
|
|
||||||
"Income Tax": True,
|
|
||||||
"Income Tax - Current Domestic": True,
|
|
||||||
"Income Tax - Current Foreign": True,
|
|
||||||
"Income Tax - Deferred Domestic": True,
|
|
||||||
"Income Tax - Deferred Foreign": True,
|
|
||||||
"Income Tax Credits": True,
|
|
||||||
"Equity in Affiliates": True,
|
|
||||||
"Other After Tax Income (Expense)": True,
|
|
||||||
"Consolidated Net Income": True,
|
|
||||||
"Minority Interest Expense": True,
|
|
||||||
"Net Income": True,
|
|
||||||
"Net Income Growth": False,
|
|
||||||
"Net Margin Growth": False,
|
|
||||||
"Extraordinaries & Discontinued Operations": True,
|
|
||||||
"Extra Items & Gain/Loss Sale Of Assets": True,
|
|
||||||
"Cumulative Effect - Accounting Chg": True,
|
|
||||||
"Discontinued Operations": True,
|
|
||||||
"Net Income After Extraordinaries": True,
|
|
||||||
"Preferred Dividends": True,
|
|
||||||
"Net Income Available to Common": True,
|
|
||||||
"EPS (Basic)": True,
|
|
||||||
"EPS (Basic) Growth": False,
|
|
||||||
"Basic Shares Outstanding": True,
|
|
||||||
"EPS (Diluted)": True,
|
|
||||||
"EPS (Diluted) Growth": False,
|
|
||||||
"Diluted Shares Outstanding": True,
|
|
||||||
"EBITDA": True,
|
|
||||||
"EBITDA Growth": False,
|
|
||||||
"EBITDA Margin": False,
|
|
||||||
|
|
||||||
#######################
|
|
||||||
# Cash Flow Statement #
|
|
||||||
#######################
|
|
||||||
"Net Income before Extraordinaries": True,
|
|
||||||
# "Net Income Growth": False,
|
|
||||||
"Depreciation, Depletion & Amortization": True,
|
|
||||||
"Depreciation and Depletion": True,
|
|
||||||
"Amortization of Intangible Assets": True,
|
|
||||||
"Deferred Taxes & Investment Tax Credit": True,
|
|
||||||
# "Deferred Taxes": True,
|
|
||||||
"Investment Tax Credit": True,
|
|
||||||
"Other Funds": True,
|
|
||||||
"Funds from Operations": True,
|
|
||||||
"Extraordinaries": True,
|
|
||||||
"Changes in Working Capital": True,
|
|
||||||
"Receivables": True,
|
|
||||||
# "Accounts Payable": True,
|
|
||||||
"Other Assets/Liabilities": True,
|
|
||||||
"Net Operating Cash Flow": True,
|
|
||||||
"Net Operating Cash Flow Growth": False,
|
|
||||||
"Net Operating Cash Flow / Sales": False,
|
|
||||||
"Capital Expenditures": True,
|
|
||||||
"Capital Expenditures Growth": False,
|
|
||||||
"Capital Expenditures / Sales": False,
|
|
||||||
"Capital Expenditures (Fixed Assets)": True,
|
|
||||||
"Capital Expenditures (Other Assets)": True,
|
|
||||||
"Net Assets from Acquisitions": True,
|
|
||||||
"Sale of Fixed Assets & Businesses": True,
|
|
||||||
"Purchase/Sale of Investments": True,
|
|
||||||
"Purchase of Investments": True,
|
|
||||||
"Sale/Maturity of Investments": True,
|
|
||||||
"Other Uses": True,
|
|
||||||
"Other Sources": True,
|
|
||||||
"Net Investing Cash Flow": True,
|
|
||||||
"Net Investing Cash Flow Growth": False,
|
|
||||||
"Net Investing Cash Flow / Sales": False,
|
|
||||||
"Cash Dividends Paid - Total": True,
|
|
||||||
"Common Dividends": True,
|
|
||||||
# "Preferred Dividends": True,
|
|
||||||
"Change in Capital Stock": True,
|
|
||||||
"Repurchase of Common & Preferred Stk.": True,
|
|
||||||
"Sale of Common & Preferred Stock": True,
|
|
||||||
"Proceeds from Stock Options": True,
|
|
||||||
"Other Proceeds from Sale of Stock": True,
|
|
||||||
"Issuance/Reduction of Debt, Net": True,
|
|
||||||
"Change in Current Debt": True,
|
|
||||||
"Change in Long-Term Debt": True,
|
|
||||||
"Issuance of Long-Term Debt": True,
|
|
||||||
"Reduction in Long-Term Debt": True,
|
|
||||||
# "Other Funds": True,
|
|
||||||
# "Other Uses": True,
|
|
||||||
# "Other Sources": True,
|
|
||||||
"Net Financing Cash Flow": True,
|
|
||||||
"Net Financing Cash Flow Growth": False,
|
|
||||||
"Net Financing Cash Flow / Sales": False,
|
|
||||||
"Exchange Rate Effect": True,
|
|
||||||
"Miscellaneous Funds": True,
|
|
||||||
"Net Change in Cash": True,
|
|
||||||
"Free Cash Flow": True,
|
|
||||||
"Free Cash Flow Growth": False,
|
|
||||||
"Free Cash Flow Yield": False,
|
|
||||||
}
|
}
|
||||||
|
END_DATE = 'End Date'
|
||||||
|
@ -1,10 +0,0 @@
|
|||||||
class WrongAssumptions(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class UnexpectedMarkup(WrongAssumptions):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class UnknownFinancialStatementItem(WrongAssumptions):
|
|
||||||
pass
|
|
@ -1,103 +1,153 @@
|
|||||||
import logging
|
import logging
|
||||||
import asyncio
|
import asyncio
|
||||||
from typing import Union, List, Dict
|
from typing import Union, Tuple, Dict
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from aiohttp.client import ClientSession
|
from aiohttp.client import ClientSession
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from bs4.element import Tag
|
from bs4.element import Tag, ResultSet
|
||||||
from webutils import in_async_session, gather_in_batches
|
from webutils import in_async_session, gather_in_batches
|
||||||
|
|
||||||
from .constants import (DEV_MODE, HTML_PARSER, BASE_URL, END_DATE, BS, IS, CF, FIN_STMT_URL_SUFFIX, FIN_STMT_ITEMS,
|
from .constants import (HTML_PARSER, BASE_URL, END_DATE, BS, IS, CF, FIN_STMT_URL_SUFFIX, DEFAULT_CONCURRENT_BATCH_SIZE,
|
||||||
DEFAULT_CONCURRENT_BATCH_SIZE)
|
INDENT_MAP)
|
||||||
from .exceptions import UnknownFinancialStatementItem
|
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
# The resulting dictionary's keys correspond to the name of the item (row) in the financial statement,
|
# First element in each Tuple is an integer indicating the row indent
|
||||||
# while its values will always be tuples with a length corresponding to the number of periods (columns)
|
HeaderData = Tuple[int, str, str, str, str, str]
|
||||||
# and elements being the actual numbers, with the exception of the first key-value-pair, which will represent
|
RowData = Tuple[int, float, float, float, float, float]
|
||||||
# the end dates of the reporting periods as strings (either years or quarters).
|
# The resulting dictionary's keys correspond to the name of the item (row) in the financial statement.
|
||||||
ResultDict = dict[str, Union[tuple[int], tuple[str]]]
|
# The first value is a tuple of the end dates of the reporting periods as strings (see above).
|
||||||
|
# The other values are the actual data tuples containing the financial figures.
|
||||||
|
ResultDict = dict[str, Union[HeaderData, RowData]]
|
||||||
|
|
||||||
|
|
||||||
@in_async_session
|
@in_async_session
|
||||||
async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup:
|
async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup:
|
||||||
"""
|
"""
|
||||||
Requests a web page and turns the response text into BeautifulSoup.
|
Requests a web page and turns the response text into BeautifulSoup.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url:
|
||||||
|
The GET request is sent to this URL
|
||||||
|
session (optional):
|
||||||
|
If passed an `aiohttp.ClientSession` object, it will be used to perform the request.
|
||||||
|
Otherwise a new session is created and automatically closed after the request (see `@in_async_session`).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The parsed html response text as BeautifulSoup
|
||||||
"""
|
"""
|
||||||
async with session.get(url) as response:
|
async with session.get(url) as response:
|
||||||
html = await response.text()
|
html = await response.text()
|
||||||
return BeautifulSoup(html, HTML_PARSER)
|
return BeautifulSoup(html, HTML_PARSER)
|
||||||
|
|
||||||
|
|
||||||
def extract_end_dates(soup: BeautifulSoup) -> tuple[str]:
|
def get_row_indent(tr: Tag) -> int:
|
||||||
|
"""
|
||||||
|
Determines the visual indent of a table row.
|
||||||
|
Some positions in a financial statement have sub-positions below them indicated by indentation of the text in the
|
||||||
|
position name's cell.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tr: The table row element
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Each indentation level corresponds to an integer. 0 = no indentation, 1 = small, 2 = medium, 3 = large
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
classes = tr.div.attrs['class']
|
||||||
|
except KeyError:
|
||||||
|
return 0
|
||||||
|
for class_name, indent in INDENT_MAP.items():
|
||||||
|
if class_name in classes:
|
||||||
|
return indent
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def extract_end_dates(soup: BeautifulSoup) -> HeaderData:
|
||||||
"""
|
"""
|
||||||
Finds and returns the end dates of the reporting periods as strings (either years or quarters) from the page of a
|
Finds and returns the end dates of the reporting periods as strings (either years or quarters) from the page of a
|
||||||
financial statement.
|
financial statement.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
soup: The parsed page containing the financial statement
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A 6-tuple, the first element being the indent (in this case 0) and the rest being the actual end dates.
|
||||||
"""
|
"""
|
||||||
ths = soup.find('div', attrs={'class': 'financials'}).thead.find_all('th')
|
tr = soup.find('div', attrs={'class': 'financials'}).thead.tr
|
||||||
return tuple(str(th.string).strip() for th in ths[1:-1])
|
ths = tr.find_all('th')
|
||||||
|
return (0, ) + tuple(str(th.string).strip() for th in ths[1:-1])
|
||||||
|
|
||||||
|
|
||||||
def is_relevant_table_row(tr: Tag) -> bool:
|
def get_all_table_rows(soup: BeautifulSoup) -> ResultSet:
|
||||||
"""
|
|
||||||
Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown.
|
|
||||||
"""
|
|
||||||
item_name = str(tr.td.div.string).strip()
|
|
||||||
try:
|
|
||||||
return FIN_STMT_ITEMS[item_name]
|
|
||||||
except KeyError:
|
|
||||||
log.warning(f"Unknown item name '{item_name}' found in financial statement.")
|
|
||||||
raise UnknownFinancialStatementItem
|
|
||||||
|
|
||||||
|
|
||||||
def find_relevant_table_rows(soup: BeautifulSoup) -> List[Tag]:
|
|
||||||
"""
|
"""
|
||||||
Returns the table rows containing the data of interest.
|
Returns the table rows containing the data of interest.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
soup: The parsed page containing the financial statement
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
All table rows containing data from the financial statement
|
||||||
"""
|
"""
|
||||||
now = datetime.utcnow()
|
return soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr')
|
||||||
trs = []
|
|
||||||
for tr in soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr'):
|
|
||||||
try:
|
|
||||||
if is_relevant_table_row(tr):
|
|
||||||
trs.append(tr)
|
|
||||||
except UnknownFinancialStatementItem:
|
|
||||||
if DEV_MODE:
|
|
||||||
with open(f'mwfin_unknown_items_{now.strftime("%Y-%m-%d_%H-%M-%S")}.html', 'w') as f:
|
|
||||||
f.write(str(soup))
|
|
||||||
return trs
|
|
||||||
|
|
||||||
|
|
||||||
def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]:
|
def extract_row_data(tr: Tag) -> Tuple[str, RowData]:
|
||||||
"""
|
"""
|
||||||
Returns the name of the item displayed in the table row (of a financial statement)
|
Returns the name of the item displayed in the table row (of a financial statement)
|
||||||
as well as a number for each reporting period.
|
as well as the position's indent and a figure for each reporting period.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tr: A table row containing data from a financial statement
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
2-tuple where the 1st element is the position's name and the second is a 6-tuple, of which the first element is
|
||||||
|
the indent and the rest are the actual figures.
|
||||||
"""
|
"""
|
||||||
item_name = str(tr.td.div.string).strip()
|
item_name = str(tr.td.div.string).strip()
|
||||||
data_div = tr.find_all('td')[-1].div.div
|
data_div = tr.find_all('td')[-1].div.div
|
||||||
values_str: str = data_div.attrs['data-chart-data']
|
values_str: str = data_div.attrs['data-chart-data']
|
||||||
values = tuple(int(float(s if s != '' else 0)) for s in values_str.split(','))
|
values = tuple(float(s if s != '' else 0) for s in values_str.split(','))
|
||||||
return item_name, values
|
return item_name, (get_row_indent(tr), ) + values
|
||||||
|
|
||||||
|
|
||||||
def extract_all_data(soup: BeautifulSoup) -> ResultDict:
|
def extract_all_data(soup: BeautifulSoup) -> ResultDict:
|
||||||
"""
|
"""
|
||||||
Extracts financials from the page.
|
Extracts financials from the page, which can contain either a balance sheet, income statement or cash flow
|
||||||
|
statement.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
soup: The parsed page containing a financial statement
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Custom result dictionary (see `ResultDict`)
|
||||||
"""
|
"""
|
||||||
output = {END_DATE: extract_end_dates(soup)}
|
output = {END_DATE: extract_end_dates(soup)}
|
||||||
for row in find_relevant_table_rows(soup):
|
for row in get_all_table_rows(soup):
|
||||||
row_data = extract_row_data(row)
|
row_data = extract_row_data(row)
|
||||||
output[row_data[0]] = row_data[1]
|
output[row_data[0]] = row_data[1]
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
@in_async_session
|
@in_async_session
|
||||||
async def _get_single_company_fin_stmt(statement: str, ticker_symbol: str, quarterly: bool = False,
|
async def get_single_company_fin_stmt(statement: str, ticker_symbol: str, quarterly: bool = False,
|
||||||
session: ClientSession = None) -> ResultDict:
|
session: ClientSession = None) -> ResultDict:
|
||||||
"""
|
"""
|
||||||
Returns data from the specified financial statement of the specified company.
|
Returns data from the specified financial statement of the specified company.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
statement:
|
||||||
|
Must be one of the strings defined in the constants `BS`, `IS`, `CF`
|
||||||
|
ticker_symbol:
|
||||||
|
The company's stock ticker symbol
|
||||||
|
quarterly (optional):
|
||||||
|
If true the financial data of the last five quarters is scraped; otherwise (default) the last five years.
|
||||||
|
session (optional):
|
||||||
|
See `soup_from_url`
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Custom result dictionary (see `ResultDict`)
|
||||||
"""
|
"""
|
||||||
log.info(f"Scraping {statement} for {ticker_symbol}")
|
log.info(f"Scraping {statement} for {ticker_symbol}")
|
||||||
url = f'{BASE_URL}/{ticker_symbol}/financials{FIN_STMT_URL_SUFFIX[statement]}'
|
url = f'{BASE_URL}/{ticker_symbol}/financials{FIN_STMT_URL_SUFFIX[statement]}'
|
||||||
@ -108,12 +158,34 @@ async def _get_single_company_fin_stmt(statement: str, ticker_symbol: str, quart
|
|||||||
|
|
||||||
|
|
||||||
@in_async_session
|
@in_async_session
|
||||||
async def _get_multi_companies_fin_stmt(statement: str, *ticker_symbols: str, quarterly: bool = False,
|
async def get_multi_companies_fin_stmt(statement: str, *ticker_symbols: str, quarterly: bool = False,
|
||||||
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
|
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
|
||||||
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
|
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
|
||||||
|
"""
|
||||||
|
Returns data from the specified financial statement of the specified companies.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
statement:
|
||||||
|
See `get_single_company_fin_stmt`
|
||||||
|
ticker_symbols:
|
||||||
|
Arbitrary number of companies' stock ticker symbols
|
||||||
|
quarterly (optional):
|
||||||
|
See `get_single_company_fin_stmt`
|
||||||
|
concurrent_batch_size (optional):
|
||||||
|
If multiple ticker symbols are passed, the company financials can be scraped concurrently.
|
||||||
|
This argument determines how many companies are scraped concurrently.
|
||||||
|
By default, they are scraped sequentially (i.e. a batch size of 1).
|
||||||
|
session (optional):
|
||||||
|
See `get_single_company_fin_stmt`
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
If only one ticker symbol is passed, the `ResultDict` for that financial statement is returned. If multiple
|
||||||
|
symbols are passed, a dictionary is returned, where the keys are the symbols and the values are the
|
||||||
|
corresponding `ResultDict`s.
|
||||||
|
"""
|
||||||
if len(ticker_symbols) == 1:
|
if len(ticker_symbols) == 1:
|
||||||
return await _get_single_company_fin_stmt(statement, ticker_symbols[0], quarterly, session)
|
return await get_single_company_fin_stmt(statement, ticker_symbols[0], quarterly, session)
|
||||||
coroutines = (_get_single_company_fin_stmt(statement, symbol, quarterly, session) for symbol in ticker_symbols)
|
coroutines = (get_single_company_fin_stmt(statement, symbol, quarterly, session) for symbol in ticker_symbols)
|
||||||
result_list = await gather_in_batches(concurrent_batch_size, *coroutines)
|
result_list = await gather_in_batches(concurrent_batch_size, *coroutines)
|
||||||
return {symbol: data for symbol, data in zip(ticker_symbols, result_list)}
|
return {symbol: data for symbol, data in zip(ticker_symbols, result_list)}
|
||||||
|
|
||||||
@ -123,11 +195,12 @@ async def get_balance_sheet(*ticker_symbols: str, quarterly: bool = False,
|
|||||||
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
|
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
|
||||||
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
|
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
|
||||||
"""
|
"""
|
||||||
Returns data from the balance sheet of the specified company.
|
Returns data from the balance sheet of the specified companies.
|
||||||
|
Convenience function around `get_multi_companies_fin_stmt`
|
||||||
"""
|
"""
|
||||||
return await _get_multi_companies_fin_stmt(BS, *ticker_symbols,
|
return await get_multi_companies_fin_stmt(BS, *ticker_symbols,
|
||||||
quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
|
quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
|
||||||
session=session)
|
session=session)
|
||||||
|
|
||||||
|
|
||||||
@in_async_session
|
@in_async_session
|
||||||
@ -135,11 +208,12 @@ async def get_income_statement(*ticker_symbols: str, quarterly: bool = False,
|
|||||||
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
|
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
|
||||||
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
|
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
|
||||||
"""
|
"""
|
||||||
Returns data from the income statement of the specified company.
|
Returns data from the income statement of the specified companies.
|
||||||
|
Convenience function around `get_multi_companies_fin_stmt`
|
||||||
"""
|
"""
|
||||||
return await _get_multi_companies_fin_stmt(IS, *ticker_symbols,
|
return await get_multi_companies_fin_stmt(IS, *ticker_symbols,
|
||||||
quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
|
quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
|
||||||
session=session)
|
session=session)
|
||||||
|
|
||||||
|
|
||||||
@in_async_session
|
@in_async_session
|
||||||
@ -147,17 +221,34 @@ async def get_cash_flow_statement(*ticker_symbols: str, quarterly: bool = False,
|
|||||||
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
|
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
|
||||||
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
|
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
|
||||||
"""
|
"""
|
||||||
Returns data from the cash flow statement of the specified company.
|
Returns data from the cash flow statement of the specified companies.
|
||||||
|
Convenience function around `get_multi_companies_fin_stmt`
|
||||||
"""
|
"""
|
||||||
return await _get_multi_companies_fin_stmt(CF, *ticker_symbols,
|
return await get_multi_companies_fin_stmt(CF, *ticker_symbols,
|
||||||
quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
|
quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
|
||||||
session=session)
|
session=session)
|
||||||
|
|
||||||
|
|
||||||
@in_async_session
|
@in_async_session
|
||||||
async def _get_single_company_all_financials(ticker_symbol: str, quarterly: bool = False,
|
async def get_single_company_all_financials(ticker_symbol: str, quarterly: bool = False,
|
||||||
session: ClientSession = None) -> Dict[str, ResultDict]:
|
session: ClientSession = None) -> Dict[str, ResultDict]:
|
||||||
coroutines = (_get_single_company_fin_stmt(stmt, ticker_symbol, quarterly, session) for stmt in (BS, IS, CF))
|
"""
|
||||||
|
Returns data from all financial statements of the specified company.
|
||||||
|
Concurrently calls `get_single_company_fin_stmt` three times.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ticker_symbol:
|
||||||
|
The company's stock ticker symbol
|
||||||
|
quarterly (optional):
|
||||||
|
See `get_single_company_fin_stmt`
|
||||||
|
session (optional):
|
||||||
|
See `get_single_company_fin_stmt`
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A dictionary where the keys are the three different statement names and the values are the
|
||||||
|
corresponding `ResultDict`s
|
||||||
|
"""
|
||||||
|
coroutines = (get_single_company_fin_stmt(stmt, ticker_symbol, quarterly, session) for stmt in (BS, IS, CF))
|
||||||
results = await asyncio.gather(*coroutines)
|
results = await asyncio.gather(*coroutines)
|
||||||
return {stmt: data for stmt, data in zip((BS, IS, CF), results)}
|
return {stmt: data for stmt, data in zip((BS, IS, CF), results)}
|
||||||
|
|
||||||
@ -168,10 +259,27 @@ async def get_all_financials(*ticker_symbols: str, quarterly: bool = False,
|
|||||||
session: ClientSession = None) -> Union[Dict[str, ResultDict],
|
session: ClientSession = None) -> Union[Dict[str, ResultDict],
|
||||||
Dict[str, Dict[str, ResultDict]]]:
|
Dict[str, Dict[str, ResultDict]]]:
|
||||||
"""
|
"""
|
||||||
Returns all fundamentals (balance sheet, income statement and cash flow statement) of the specified company.
|
Returns all fundamentals (balance sheet, income statement and cash flow statement) of the specified companies.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ticker_symbols:
|
||||||
|
Arbitrary number of companies' stock ticker symbols
|
||||||
|
quarterly (optional):
|
||||||
|
See `get_single_company_all_financials`
|
||||||
|
concurrent_batch_size (optional):
|
||||||
|
If multiple ticker symbols are passed, the company financials can be scraped concurrently.
|
||||||
|
This argument determines how many companies are scraped concurrently.
|
||||||
|
By default, they are scraped sequentially (i.e. a batch size of 1).
|
||||||
|
session (optional):
|
||||||
|
See `get_single_company_all_financials`
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
If only one ticker symbol is passed, the output of `get_single_company_all_financials` is returned. If multiple
|
||||||
|
symbols are passed, a dictionary is returned, where the keys are the symbols and the values are the
|
||||||
|
corresponding outputs of `get_single_company_all_financials`.
|
||||||
"""
|
"""
|
||||||
if len(ticker_symbols) == 1:
|
if len(ticker_symbols) == 1:
|
||||||
return await _get_single_company_all_financials(ticker_symbols[0], quarterly, session)
|
return await get_single_company_all_financials(ticker_symbols[0], quarterly, session)
|
||||||
coroutines = (_get_single_company_all_financials(symbol, quarterly, session) for symbol in ticker_symbols)
|
coroutines = (get_single_company_all_financials(symbol, quarterly, session) for symbol in ticker_symbols)
|
||||||
result_list = await gather_in_batches(concurrent_batch_size, *coroutines)
|
result_list = await gather_in_batches(concurrent_batch_size, *coroutines)
|
||||||
return {symbol: data for symbol, data in zip(ticker_symbols, result_list)}
|
return {symbol: data for symbol, data in zip(ticker_symbols, result_list)}
|
||||||
|
@ -7,7 +7,6 @@ from bs4 import BeautifulSoup
|
|||||||
|
|
||||||
from mwfin import functions
|
from mwfin import functions
|
||||||
from mwfin.constants import HTML_PARSER, BASE_URL, FIN_STMT_URL_SUFFIX, IS, BS, CF, END_DATE
|
from mwfin.constants import HTML_PARSER, BASE_URL, FIN_STMT_URL_SUFFIX, IS, BS, CF, END_DATE
|
||||||
from mwfin.exceptions import UnknownFinancialStatementItem
|
|
||||||
|
|
||||||
|
|
||||||
THIS_DIR = Path(__file__).parent
|
THIS_DIR = Path(__file__).parent
|
||||||
@ -52,59 +51,48 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
output = await functions.soup_from_url('baz', mock_session_obj)
|
output = await functions.soup_from_url('baz', mock_session_obj)
|
||||||
self.assertEqual(expected_output, output)
|
self.assertEqual(expected_output, output)
|
||||||
|
|
||||||
def test_extract_end_dates(self):
|
def test_get_row_indent(self):
|
||||||
expected_output = ('End_Date_1', 'End_Date_2')
|
mock_row = BeautifulSoup('<tr><div>foo</div></tr>', HTML_PARSER).tr
|
||||||
|
expected_output = 0
|
||||||
|
output = functions.get_row_indent(mock_row)
|
||||||
|
self.assertEqual(expected_output, output)
|
||||||
|
trs = self.test_soup.find_all('tr')
|
||||||
|
output = functions.get_row_indent(trs[0])
|
||||||
|
self.assertEqual(expected_output, output)
|
||||||
|
for i, tr in enumerate(trs[1:], start=1):
|
||||||
|
output = functions.get_row_indent(tr)
|
||||||
|
self.assertEqual(i, output)
|
||||||
|
|
||||||
|
@patch.object(functions, 'get_row_indent')
|
||||||
|
def test_extract_end_dates(self, mock_get_row_indent):
|
||||||
|
mock_get_row_indent.return_value = 0
|
||||||
|
expected_output = (0, 'End_Date_1', 'End_Date_2')
|
||||||
output = functions.extract_end_dates(self.test_soup)
|
output = functions.extract_end_dates(self.test_soup)
|
||||||
self.assertTupleEqual(expected_output, output)
|
self.assertTupleEqual(expected_output, output)
|
||||||
|
mock_get_row_indent.assert_called_once_with(self.test_soup.tr)
|
||||||
|
|
||||||
def test_is_relevant_table_row(self):
|
def test_get_all_table_rows(self):
|
||||||
test_soup = BeautifulSoup('<tr><td><div> Cash & Short Term Investments </div></td></tr>', HTML_PARSER)
|
|
||||||
self.assertTrue(functions.is_relevant_table_row(test_soup.tr))
|
|
||||||
test_soup = BeautifulSoup('<tr><td><div> Cash & Short Term Investments Growth </div></td></tr>', HTML_PARSER)
|
|
||||||
self.assertFalse(functions.is_relevant_table_row(test_soup.tr))
|
|
||||||
test_soup = BeautifulSoup('<tr><td><div> baz </div></td></tr>', HTML_PARSER)
|
|
||||||
with self.assertRaises(UnknownFinancialStatementItem):
|
|
||||||
functions.is_relevant_table_row(test_soup.tr)
|
|
||||||
|
|
||||||
@patch.object(functions, 'open')
|
|
||||||
@patch.object(functions, 'is_relevant_table_row')
|
|
||||||
def test_find_relevant_table_rows(self, mock_is_relevant_table_row, mock_open):
|
|
||||||
mock_is_relevant_table_row.return_value = True
|
|
||||||
expected_output = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr')
|
expected_output = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr')
|
||||||
tr0, tr1 = expected_output
|
output = functions.get_all_table_rows(self.test_soup)
|
||||||
output = functions.find_relevant_table_rows(self.test_soup)
|
self.assertSequenceEqual(expected_output, output)
|
||||||
self.assertListEqual(expected_output, output)
|
|
||||||
mock_is_relevant_table_row.assert_has_calls([call(tr0), call(tr1)])
|
|
||||||
mock_is_relevant_table_row.reset_mock()
|
|
||||||
|
|
||||||
mock_is_relevant_table_row.side_effect = UnknownFinancialStatementItem()
|
@patch.object(functions, 'get_row_indent')
|
||||||
expected_output = self.test_soup.find_all('thistagdoesntexist')
|
def test_extract_row_data(self, mock_get_row_indent):
|
||||||
output = functions.find_relevant_table_rows(self.test_soup)
|
mock_get_row_indent.return_value = 1
|
||||||
self.assertListEqual(expected_output, output)
|
|
||||||
mock_is_relevant_table_row.assert_has_calls([call(tr0), call(tr1)])
|
|
||||||
mock_is_relevant_table_row.reset_mock()
|
|
||||||
|
|
||||||
mock_write = mock_open.return_value.__enter__.return_value.write
|
|
||||||
with patch.object(functions, 'DEV_MODE', new=True):
|
|
||||||
output = functions.find_relevant_table_rows(self.test_soup)
|
|
||||||
self.assertListEqual(expected_output, output)
|
|
||||||
mock_is_relevant_table_row.assert_has_calls([call(tr0), call(tr1)])
|
|
||||||
mock_write.assert_has_calls([call(str(self.test_soup)), call(str(self.test_soup))])
|
|
||||||
|
|
||||||
def test_extract_row_data(self):
|
|
||||||
test_row = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.tr
|
test_row = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.tr
|
||||||
expected_output = ('Cash & Short Term Investments', (11000000, -22000000))
|
expected_output = ('foo', (1, 1., -2.))
|
||||||
output = functions.extract_row_data(test_row)
|
output = functions.extract_row_data(test_row)
|
||||||
self.assertTupleEqual(expected_output, output)
|
self.assertTupleEqual(expected_output, output)
|
||||||
|
mock_get_row_indent.assert_called_once_with(test_row)
|
||||||
|
|
||||||
@patch.object(functions, 'extract_row_data')
|
@patch.object(functions, 'extract_row_data')
|
||||||
@patch.object(functions, 'find_relevant_table_rows')
|
@patch.object(functions, 'get_all_table_rows')
|
||||||
@patch.object(functions, 'extract_end_dates')
|
@patch.object(functions, 'extract_end_dates')
|
||||||
def test_extract_all_data(self, mock_extract_end_dates, mock_find_relevant_table_rows, mock_extract_row_data):
|
def test_extract_all_data(self, mock_extract_end_dates, mock_get_all_table_rows, mock_extract_row_data):
|
||||||
test_end_dates = ('foo', 'bar')
|
test_end_dates = ('foo', 'bar')
|
||||||
mock_extract_end_dates.return_value = test_end_dates
|
mock_extract_end_dates.return_value = test_end_dates
|
||||||
test_relevant_rows = ['tr1', 'tr2']
|
test_relevant_rows = ['tr1', 'tr2']
|
||||||
mock_find_relevant_table_rows.return_value = test_relevant_rows
|
mock_get_all_table_rows.return_value = test_relevant_rows
|
||||||
test_row_data = ('item_name', (123, 456))
|
test_row_data = ('item_name', (123, 456))
|
||||||
mock_extract_row_data.return_value = test_row_data
|
mock_extract_row_data.return_value = test_row_data
|
||||||
expected_output = {
|
expected_output = {
|
||||||
@ -115,12 +103,12 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
output = functions.extract_all_data(self.test_soup)
|
output = functions.extract_all_data(self.test_soup)
|
||||||
self.assertDictEqual(expected_output, output)
|
self.assertDictEqual(expected_output, output)
|
||||||
mock_extract_end_dates.assert_called_once_with(self.test_soup)
|
mock_extract_end_dates.assert_called_once_with(self.test_soup)
|
||||||
mock_find_relevant_table_rows.assert_called_once_with(self.test_soup)
|
mock_get_all_table_rows.assert_called_once_with(self.test_soup)
|
||||||
mock_extract_row_data.assert_has_calls([call(test_relevant_rows[0]), call(test_relevant_rows[1])])
|
mock_extract_row_data.assert_has_calls([call(test_relevant_rows[0]), call(test_relevant_rows[1])])
|
||||||
|
|
||||||
@patch.object(functions, 'extract_all_data')
|
@patch.object(functions, 'extract_all_data')
|
||||||
@patch.object(functions, 'soup_from_url')
|
@patch.object(functions, 'soup_from_url')
|
||||||
async def test__get_single_company_fin_stmt(self, mock_soup_from_url, mock_extract_all_data):
|
async def test_get_single_company_fin_stmt(self, mock_soup_from_url, mock_extract_all_data):
|
||||||
mock_session = MagicMock()
|
mock_session = MagicMock()
|
||||||
test_ticker, statement = 'bar', BS
|
test_ticker, statement = 'bar', BS
|
||||||
test_url = f'{BASE_URL}/{test_ticker}/financials{FIN_STMT_URL_SUFFIX[statement]}'
|
test_url = f'{BASE_URL}/{test_ticker}/financials{FIN_STMT_URL_SUFFIX[statement]}'
|
||||||
@ -128,7 +116,7 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
mock_extract_all_data.return_value = expected_output = {'foo': 'bar'}
|
mock_extract_all_data.return_value = expected_output = {'foo': 'bar'}
|
||||||
|
|
||||||
quarterly = False
|
quarterly = False
|
||||||
output = await functions._get_single_company_fin_stmt(statement, test_ticker, quarterly, mock_session)
|
output = await functions.get_single_company_fin_stmt(statement, test_ticker, quarterly, mock_session)
|
||||||
self.assertDictEqual(expected_output, output)
|
self.assertDictEqual(expected_output, output)
|
||||||
mock_soup_from_url.assert_called_once_with(test_url, mock_session)
|
mock_soup_from_url.assert_called_once_with(test_url, mock_session)
|
||||||
mock_extract_all_data.assert_called_once_with(mock_soup)
|
mock_extract_all_data.assert_called_once_with(mock_soup)
|
||||||
@ -136,33 +124,33 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
mock_extract_all_data.reset_mock()
|
mock_extract_all_data.reset_mock()
|
||||||
|
|
||||||
quarterly = True
|
quarterly = True
|
||||||
output = await functions._get_single_company_fin_stmt(statement, test_ticker, quarterly, mock_session)
|
output = await functions.get_single_company_fin_stmt(statement, test_ticker, quarterly, mock_session)
|
||||||
self.assertDictEqual(expected_output, output)
|
self.assertDictEqual(expected_output, output)
|
||||||
mock_soup_from_url.assert_called_once_with(test_url + '/quarter', mock_session)
|
mock_soup_from_url.assert_called_once_with(test_url + '/quarter', mock_session)
|
||||||
mock_extract_all_data.assert_called_once_with(mock_soup)
|
mock_extract_all_data.assert_called_once_with(mock_soup)
|
||||||
|
|
||||||
@patch.object(functions, '_get_single_company_fin_stmt')
|
@patch.object(functions, 'get_single_company_fin_stmt')
|
||||||
async def test__get_multi_companies_fin_stmt(self, mock__get_single_company_fin_stmt):
|
async def test_get_multi_companies_fin_stmt(self, mock_get_single_company_fin_stmt):
|
||||||
statement, sym1, sym2, quarterly, mock_session = 'xyz', 'foo', 'bar', False, MagicMock()
|
statement, sym1, sym2, quarterly, mock_session = 'xyz', 'foo', 'bar', False, MagicMock()
|
||||||
mock__get_single_company_fin_stmt.return_value = expected_output = 'baz'
|
mock_get_single_company_fin_stmt.return_value = expected_output = 'baz'
|
||||||
output = await functions._get_multi_companies_fin_stmt(statement, sym1,
|
output = await functions.get_multi_companies_fin_stmt(statement, sym1,
|
||||||
quarterly=quarterly, session=mock_session)
|
quarterly=quarterly, session=mock_session)
|
||||||
self.assertEqual(expected_output, output)
|
self.assertEqual(expected_output, output)
|
||||||
mock__get_single_company_fin_stmt.assert_called_once_with(statement, sym1, quarterly, mock_session)
|
mock_get_single_company_fin_stmt.assert_called_once_with(statement, sym1, quarterly, mock_session)
|
||||||
mock__get_single_company_fin_stmt.reset_mock()
|
mock_get_single_company_fin_stmt.reset_mock()
|
||||||
|
|
||||||
expected_output = {sym1: expected_output, sym2: expected_output}
|
expected_output = {sym1: expected_output, sym2: expected_output}
|
||||||
output = await functions._get_multi_companies_fin_stmt(statement, sym1, sym2,
|
output = await functions.get_multi_companies_fin_stmt(statement, sym1, sym2,
|
||||||
quarterly=quarterly, session=mock_session)
|
quarterly=quarterly, session=mock_session)
|
||||||
self.assertDictEqual(expected_output, output)
|
self.assertDictEqual(expected_output, output)
|
||||||
mock__get_single_company_fin_stmt.assert_has_calls([
|
mock_get_single_company_fin_stmt.assert_has_calls([
|
||||||
call(statement, sym1, quarterly, mock_session),
|
call(statement, sym1, quarterly, mock_session),
|
||||||
call(statement, sym2, quarterly, mock_session)
|
call(statement, sym2, quarterly, mock_session)
|
||||||
])
|
])
|
||||||
|
|
||||||
async def _helper_test_get_any_statement(self, stmt: str, mock__get_multi_companies_fin_stmt):
|
async def _helper_test_get_any_statement(self, stmt: str, mock_get_multi_companies_fin_stmt):
|
||||||
sym1, sym2, quarterly, batch_size, mock_session = 'foo', 'bar', False, 2, MagicMock()
|
sym1, sym2, quarterly, batch_size, mock_session = 'foo', 'bar', False, 2, MagicMock()
|
||||||
mock__get_multi_companies_fin_stmt.return_value = expected_output = 'baz'
|
mock_get_multi_companies_fin_stmt.return_value = expected_output = 'baz'
|
||||||
if stmt == BS:
|
if stmt == BS:
|
||||||
function = functions.get_balance_sheet
|
function = functions.get_balance_sheet
|
||||||
elif stmt == IS:
|
elif stmt == IS:
|
||||||
@ -173,50 +161,50 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
raise ValueError
|
raise ValueError
|
||||||
output = await function(sym1, sym2, quarterly=quarterly, concurrent_batch_size=batch_size, session=mock_session)
|
output = await function(sym1, sym2, quarterly=quarterly, concurrent_batch_size=batch_size, session=mock_session)
|
||||||
self.assertEqual(expected_output, output)
|
self.assertEqual(expected_output, output)
|
||||||
mock__get_multi_companies_fin_stmt.assert_called_once_with(
|
mock_get_multi_companies_fin_stmt.assert_called_once_with(
|
||||||
stmt, sym1, sym2, quarterly=quarterly, concurrent_batch_size=batch_size, session=mock_session
|
stmt, sym1, sym2, quarterly=quarterly, concurrent_batch_size=batch_size, session=mock_session
|
||||||
)
|
)
|
||||||
|
|
||||||
@patch.object(functions, '_get_multi_companies_fin_stmt')
|
@patch.object(functions, 'get_multi_companies_fin_stmt')
|
||||||
async def test_get_balance_sheet(self, mock__get_multi_companies_fin_stmt):
|
async def test_get_balance_sheet(self, mock_get_multi_companies_fin_stmt):
|
||||||
await self._helper_test_get_any_statement(BS, mock__get_multi_companies_fin_stmt)
|
await self._helper_test_get_any_statement(BS, mock_get_multi_companies_fin_stmt)
|
||||||
|
|
||||||
@patch.object(functions, '_get_multi_companies_fin_stmt')
|
@patch.object(functions, 'get_multi_companies_fin_stmt')
|
||||||
async def test_get_income_statement(self, mock__get_multi_companies_fin_stmt):
|
async def test_get_income_statement(self, mock_get_multi_companies_fin_stmt):
|
||||||
await self._helper_test_get_any_statement(IS, mock__get_multi_companies_fin_stmt)
|
await self._helper_test_get_any_statement(IS, mock_get_multi_companies_fin_stmt)
|
||||||
|
|
||||||
@patch.object(functions, '_get_multi_companies_fin_stmt')
|
@patch.object(functions, 'get_multi_companies_fin_stmt')
|
||||||
async def test_get_cash_flow_statement(self, mock__get_multi_companies_fin_stmt):
|
async def test_get_cash_flow_statement(self, mock_get_multi_companies_fin_stmt):
|
||||||
await self._helper_test_get_any_statement(CF, mock__get_multi_companies_fin_stmt)
|
await self._helper_test_get_any_statement(CF, mock_get_multi_companies_fin_stmt)
|
||||||
|
|
||||||
@patch.object(functions, '_get_single_company_fin_stmt')
|
@patch.object(functions, 'get_single_company_fin_stmt')
|
||||||
async def test__get_single_company_all_financials(self, mock__get_single_company_fin_stmt):
|
async def test_get_single_company_all_financials(self, mock_get_single_company_fin_stmt):
|
||||||
symbol, quarterly, mock_session = 'foo', False, MagicMock()
|
symbol, quarterly, mock_session = 'foo', False, MagicMock()
|
||||||
mock__get_single_company_fin_stmt.return_value = bar = 'bar'
|
mock_get_single_company_fin_stmt.return_value = bar = 'bar'
|
||||||
expected_output = {BS: bar, IS: bar, CF: bar}
|
expected_output = {BS: bar, IS: bar, CF: bar}
|
||||||
output = await functions._get_single_company_all_financials(symbol, quarterly, mock_session)
|
output = await functions.get_single_company_all_financials(symbol, quarterly, mock_session)
|
||||||
self.assertDictEqual(expected_output, output)
|
self.assertDictEqual(expected_output, output)
|
||||||
mock__get_single_company_fin_stmt.assert_has_calls([
|
mock_get_single_company_fin_stmt.assert_has_calls([
|
||||||
call(BS, symbol, quarterly, mock_session),
|
call(BS, symbol, quarterly, mock_session),
|
||||||
call(IS, symbol, quarterly, mock_session),
|
call(IS, symbol, quarterly, mock_session),
|
||||||
call(CF, symbol, quarterly, mock_session)
|
call(CF, symbol, quarterly, mock_session)
|
||||||
])
|
])
|
||||||
|
|
||||||
@patch.object(functions, '_get_single_company_all_financials')
|
@patch.object(functions, 'get_single_company_all_financials')
|
||||||
async def test_get_company_financials(self, mock__get_single_company_all_financials):
|
async def test_get_company_financials(self, mock_get_single_company_all_financials):
|
||||||
mock__get_single_company_all_financials.return_value = expected_output = 'baz'
|
mock_get_single_company_all_financials.return_value = expected_output = 'baz'
|
||||||
symbol, quarterly, mock_session = 'foo', False, MagicMock()
|
symbol, quarterly, mock_session = 'foo', False, MagicMock()
|
||||||
output = await functions.get_all_financials(symbol, quarterly=quarterly, session=mock_session)
|
output = await functions.get_all_financials(symbol, quarterly=quarterly, session=mock_session)
|
||||||
self.assertEqual(expected_output, output)
|
self.assertEqual(expected_output, output)
|
||||||
mock__get_single_company_all_financials.assert_called_once_with(symbol, quarterly, mock_session)
|
mock_get_single_company_all_financials.assert_called_once_with(symbol, quarterly, mock_session)
|
||||||
mock__get_single_company_all_financials.reset_mock()
|
mock_get_single_company_all_financials.reset_mock()
|
||||||
|
|
||||||
test_sym1, test_sym2 = 'x', 'y'
|
test_sym1, test_sym2 = 'x', 'y'
|
||||||
expected_output = {test_sym1: expected_output, test_sym2: expected_output}
|
expected_output = {test_sym1: expected_output, test_sym2: expected_output}
|
||||||
output = await functions.get_all_financials(test_sym1, test_sym2,
|
output = await functions.get_all_financials(test_sym1, test_sym2,
|
||||||
quarterly=quarterly, session=mock_session)
|
quarterly=quarterly, session=mock_session)
|
||||||
self.assertDictEqual(expected_output, output)
|
self.assertDictEqual(expected_output, output)
|
||||||
mock__get_single_company_all_financials.assert_has_calls([
|
mock_get_single_company_all_financials.assert_has_calls([
|
||||||
call(test_sym1, quarterly, mock_session),
|
call(test_sym1, quarterly, mock_session),
|
||||||
call(test_sym2, quarterly, mock_session)
|
call(test_sym2, quarterly, mock_session)
|
||||||
])
|
])
|
||||||
@ -227,9 +215,24 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
|
|||||||
symbol = 'foo'
|
symbol = 'foo'
|
||||||
# Since the web request is mocked we always receive the same HTML markup.
|
# Since the web request is mocked we always receive the same HTML markup.
|
||||||
expected_output = {
|
expected_output = {
|
||||||
BS: {END_DATE: ('End_Date_1', 'End_Date_2'), 'Cash & Short Term Investments': (11000000, -22000000)},
|
BS: {
|
||||||
IS: {END_DATE: ('End_Date_1', 'End_Date_2'), 'Cash & Short Term Investments': (11000000, -22000000)},
|
END_DATE: (0, 'End_Date_1', 'End_Date_2'),
|
||||||
CF: {END_DATE: ('End_Date_1', 'End_Date_2'), 'Cash & Short Term Investments': (11000000, -22000000)}
|
'foo': (1, 1., -2.),
|
||||||
|
'bar': (2, 2., -3.),
|
||||||
|
'baz': (3, 3., -4.)
|
||||||
|
},
|
||||||
|
IS: {
|
||||||
|
END_DATE: (0, 'End_Date_1', 'End_Date_2'),
|
||||||
|
'foo': (1, 1., -2.),
|
||||||
|
'bar': (2, 2., -3.),
|
||||||
|
'baz': (3, 3., -4.)
|
||||||
|
},
|
||||||
|
CF: {
|
||||||
|
END_DATE: (0, 'End_Date_1', 'End_Date_2'),
|
||||||
|
'foo': (1, 1., -2.),
|
||||||
|
'bar': (2, 2., -3.),
|
||||||
|
'baz': (3, 3., -4.)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
output = await functions.get_all_financials(symbol, session=mock_session_obj)
|
output = await functions.get_all_financials(symbol, session=mock_session_obj)
|
||||||
self.assertDictEqual(expected_output, output)
|
self.assertDictEqual(expected_output, output)
|
||||||
|
87
tests/test_main.py
Normal file
87
tests/test_main.py
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
import logging
|
||||||
|
import json
|
||||||
|
from unittest import IsolatedAsyncioTestCase
|
||||||
|
from unittest.mock import patch
|
||||||
|
from argparse import Namespace
|
||||||
|
from io import StringIO
|
||||||
|
|
||||||
|
from mwfin import __main__ as main_module
|
||||||
|
|
||||||
|
|
||||||
|
class MainModuleTestCase(IsolatedAsyncioTestCase):
|
||||||
|
|
||||||
|
@patch.object(main_module.ArgumentParser, 'parse_args')
|
||||||
|
def test_parse_cli(self, mock_parse_args):
|
||||||
|
mock_parse_args.return_value = mock_args = Namespace(foo='a', bar='b')
|
||||||
|
expected_output = vars(mock_args)
|
||||||
|
output = main_module.parse_cli()
|
||||||
|
self.assertDictEqual(expected_output, output)
|
||||||
|
|
||||||
|
def test_configure_logging(self):
|
||||||
|
root_logger = logging.getLogger()
|
||||||
|
root_logger.handlers = []
|
||||||
|
main_module.configure_logging(verbosity=0)
|
||||||
|
self.assertEqual(1, len(root_logger.handlers))
|
||||||
|
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
|
||||||
|
self.assertEqual(logging.CRITICAL, root_logger.level)
|
||||||
|
root_logger.handlers = []
|
||||||
|
main_module.configure_logging(verbosity=1)
|
||||||
|
self.assertEqual(1, len(root_logger.handlers))
|
||||||
|
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
|
||||||
|
self.assertEqual(logging.WARNING, root_logger.level)
|
||||||
|
root_logger.handlers = []
|
||||||
|
main_module.configure_logging(verbosity=2)
|
||||||
|
self.assertEqual(1, len(root_logger.handlers))
|
||||||
|
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
|
||||||
|
self.assertEqual(logging.INFO, root_logger.level)
|
||||||
|
root_logger.handlers = []
|
||||||
|
main_module.configure_logging(verbosity=3)
|
||||||
|
self.assertEqual(1, len(root_logger.handlers))
|
||||||
|
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
|
||||||
|
self.assertEqual(logging.DEBUG, root_logger.level)
|
||||||
|
root_logger.handlers = []
|
||||||
|
main_module.configure_logging(verbosity=9999)
|
||||||
|
self.assertEqual(1, len(root_logger.handlers))
|
||||||
|
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
|
||||||
|
self.assertEqual(logging.DEBUG, root_logger.level)
|
||||||
|
|
||||||
|
@patch.object(main_module, 'get_all_financials')
|
||||||
|
@patch.object(main_module, 'configure_logging')
|
||||||
|
@patch.object(main_module, 'parse_cli')
|
||||||
|
async def test_main(self, mock_parse_cli, mock_configure_logging, mock_get_all_financials):
|
||||||
|
mock_parse_cli.return_value = args = {
|
||||||
|
main_module.VERBOSE: 'foo',
|
||||||
|
main_module.TICKER_SYMBOL: ['bar', 'baz'],
|
||||||
|
main_module.QUARTERLY: 'perhaps',
|
||||||
|
main_module.BATCH_SIZE: 'xyz',
|
||||||
|
main_module.TO_FILE: None,
|
||||||
|
main_module.JSON_INDENT: 42,
|
||||||
|
}
|
||||||
|
mock_get_all_financials.return_value = mock_data = {'data': 'something cool'}
|
||||||
|
|
||||||
|
# To stdout:
|
||||||
|
with patch.object(main_module, 'print') as mock_print:
|
||||||
|
await main_module.main()
|
||||||
|
mock_parse_cli.assert_called_once_with()
|
||||||
|
mock_configure_logging.assert_called_once_with(args[main_module.VERBOSE])
|
||||||
|
mock_get_all_financials.assert_awaited_once_with(*args[main_module.TICKER_SYMBOL],
|
||||||
|
quarterly=args[main_module.QUARTERLY],
|
||||||
|
concurrent_batch_size=args[main_module.BATCH_SIZE])
|
||||||
|
mock_print.assert_called_once_with(json.dumps(mock_data, indent=args[main_module.JSON_INDENT]))
|
||||||
|
mock_parse_cli.reset_mock()
|
||||||
|
mock_configure_logging.reset_mock()
|
||||||
|
mock_get_all_financials.reset_mock()
|
||||||
|
|
||||||
|
# To file:
|
||||||
|
args[main_module.TO_FILE] = 'some_file'
|
||||||
|
with patch.object(main_module, 'open') as mock_open:
|
||||||
|
mock_open.return_value.__enter__.return_value = mock_file = StringIO()
|
||||||
|
await main_module.main()
|
||||||
|
mock_parse_cli.assert_called_once_with()
|
||||||
|
mock_configure_logging.assert_called_once_with(args[main_module.VERBOSE])
|
||||||
|
mock_get_all_financials.assert_awaited_once_with(*args[main_module.TICKER_SYMBOL],
|
||||||
|
quarterly=args[main_module.QUARTERLY],
|
||||||
|
concurrent_batch_size=args[main_module.BATCH_SIZE])
|
||||||
|
expected_contents = json.dumps(mock_data, indent=args[main_module.JSON_INDENT])
|
||||||
|
mock_file.seek(0)
|
||||||
|
self.assertEqual(expected_contents, mock_file.read())
|
@ -15,7 +15,7 @@
|
|||||||
<table>
|
<table>
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
<th><div> !!Item </div><div> !!Item </div></th>
|
<th><div class="xyz abc"> !!Item </div><div> !!Item </div></th>
|
||||||
<th><div> End_Date_1 </div></th>
|
<th><div> End_Date_1 </div></th>
|
||||||
<th><div> End_Date_2 </div></th>
|
<th><div> End_Date_2 </div></th>
|
||||||
<th></th>
|
<th></th>
|
||||||
@ -23,16 +23,22 @@
|
|||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr>
|
<tr>
|
||||||
<td><div> Cash & Short Term Investments </div><div> Cash & Short Term Investments </div></td>
|
<td><div class="xyz indent--small"> foo </div><div> foo </div></td>
|
||||||
<td></td>
|
<td></td>
|
||||||
<td></td>
|
<td></td>
|
||||||
<td><div> <div data-chart-data="11000000.0,-22000000.0"><div></div></td>
|
<td><div> <div data-chart-data="1.0,-2.0"><div></div></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><div> Cash & Short Term Investments Growth </div><div> Cash & Short Term Investments Growth </div></td>
|
<td><div class="xyz indent--medium"> bar </div><div> bar </div></td>
|
||||||
<td></td>
|
<td></td>
|
||||||
<td></td>
|
<td></td>
|
||||||
|
<td><div> <div data-chart-data="2.0,-3.0"><div></div></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><div class="xyz indent--large"> baz </div><div> baz </div></td>
|
||||||
<td></td>
|
<td></td>
|
||||||
|
<td></td>
|
||||||
|
<td><div> <div data-chart-data="3.0,-4.0"><div></div></td>
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
|
Reference in New Issue
Block a user