Compare commits

..

31 Commits

Author SHA1 Message Date
911fa15f46 extensive docstrings; made all functions public (not protected); minor refactoring 2021-12-28 23:59:54 +01:00
f29f22a52e 100% coverage 2021-12-26 23:32:15 +01:00
a2f3f590f6 test for __main__ 2021-12-26 23:22:12 +01:00
3276dbfae3 ditched csv; minor help text change 2021-12-26 21:08:36 +01:00
2b3fb9f7ba implemented 2021-12-26 19:04:23 +01:00
8afab2fde8 planned function to get row indent 2021-12-26 18:40:29 +01:00
f7cb5de928 plan to encode row indentation as int & include it as the 1st element in the data tuple 2021-12-26 18:08:25 +01:00
c93fb9692e minor refactoring & adjustments 2021-12-26 17:55:34 +01:00
38dd29f35b all table rows to be considered relevant 2021-12-26 17:34:59 +01:00
462e34013e test now passing 2021-12-03 17:51:10 +01:00
4fd69602f7 tests for writing html to file when encountering unknown items in DEV_MODE 2021-12-03 16:45:40 +01:00
94568c6165 implementation to pass tests 2021-12-03 16:28:12 +01:00
d46148a510 is_relevant_table_row should raise a custom exception for unknown items, find_relevant_table_rows should handle the rest 2021-12-03 16:20:04 +01:00
3959aff10e added log statement 2021-12-03 16:01:00 +01:00
c18ab65a7d cli now supports multiple ticker symbols and concurrency 2021-12-03 15:53:53 +01:00
55a56c2f9c concurrency for getting all three financial statements; improved imports; renamed functions 2021-12-03 15:38:21 +01:00
8d18e03018 implemented new function and adjusted function call sequence 2021-12-03 15:19:21 +01:00
a7dbdcf917 adjusted tests to call the new function 2021-12-03 14:32:52 +01:00
ca360413c3 test for new function to get any statement from many companies; renamed functions 2021-12-03 14:17:49 +01:00
1f8104b1a0 refactoring 2021-12-03 11:56:09 +01:00
68f3e428ab plan to refactor get_company_financials; tests changed accordingly 2021-12-03 11:46:39 +01:00
a6b3c57021 fixed and added things to README 2021-11-30 12:53:11 +01:00
c23c963cd8 added async-session decorator from own project; changed logging setup; added CLI parameters 2021-11-30 12:46:48 +01:00
36226fc1be fixed and refactored cli 2021-11-28 17:19:12 +01:00
9186d8734f fixed type hints 2021-11-28 17:10:00 +01:00
e522897991 implemented getting specific financial statement for multiple companies; fixed tests 2021-11-28 17:07:23 +01:00
1529df252c allow getting specific financial statement for multiple companies 2021-11-28 16:58:21 +01:00
3c7afcd879 factored out some tests 2021-11-28 16:52:16 +01:00
4a68d45bc4 implemented last idea 2021-11-28 16:42:21 +01:00
e7f3730c64 idea for getting financials for an arbitrary number of companies 2021-11-28 16:39:53 +01:00
fd03815172 improved writing to file 2021-11-28 16:24:14 +01:00
11 changed files with 536 additions and 380 deletions

View File

@ -8,3 +8,5 @@ omit =
fail_under = 100 fail_under = 100
show_missing = True show_missing = True
skip_covered = True skip_covered = True
exclude_lines =
if __name__ == .__main__.:

View File

@ -8,15 +8,23 @@ Asynchronous HTTP requests are currently used by default.
- [Python](https://www.python.org/) 3.8+ - [Python](https://www.python.org/) 3.8+
Should run on most Linux/Windows systems. Tested on Arch. Should run on most Linux/Windows systems. Tested on Arch and Windows 10.
### Building ### Building
Clone this repo, install `build` via pip, then run `pip -m build` Clone this repo, install `build` via pip, then run `python -m build`
from the repository's root directory. This should produce a `dist/` from the repository's root directory. This should produce a `dist/`
subdirectory with a wheel (build) and archive (source) distribution. subdirectory with a wheel (build) and archive (source) distribution.
The resulting `whl`-file can be installed via `pip install path/dist/***.whl`. The resulting `whl`-file can be installed via `pip install path/dist/***.whl`.
### Running ### Running
... ```shell
$ python -m mwfin -h
```
gives a description of available CLI parameters and their usage. For example, running
```shell
$ python -m mwfin -f output.json -Q AAPL
```
will retrieve the most current available quarterly data from all three financial statements Apple Inc. has published and save it in JSON format to `output.json`.

View File

@ -1,2 +1,3 @@
beautifulsoup4 beautifulsoup4
aiohttp aiohttp
webutils-df

View File

@ -23,6 +23,7 @@ python_requires = >=3.8
install_requires = install_requires =
beautifulsoup4 beautifulsoup4
aiohttp aiohttp
webutils-df
[options.extras_require] [options.extras_require]
tests = tests =

View File

@ -1 +1 @@
from .functions import get_company_financials from .functions import get_balance_sheet, get_income_statement, get_cash_flow_statement, get_all_financials

View File

@ -1,58 +1,94 @@
import logging
import asyncio import asyncio
import sys
import csv
import json import json
from argparse import ArgumentParser from argparse import ArgumentParser
from pathlib import Path from pathlib import Path
from aiohttp import ClientSession from .functions import get_all_financials
from .constants import MAIN_LOGGER_NAME, DEFAULT_CONCURRENT_BATCH_SIZE
from . import get_company_financials
JSON_EXT, CSV_EXT = '.json', '.csv' log = logging.getLogger(MAIN_LOGGER_NAME)
TICKER_SYMBOL = 'ticker_symbol'
QUARTERLY = 'quarterly'
BATCH_SIZE = 'concurrent_batch_size'
TO_FILE = 'to_file'
JSON_INDENT = 'json_indent'
VERBOSE = 'verbose'
def write_to_csv(data, file_obj) -> None: def parse_cli() -> dict:
writer = csv.writer(file_obj) """Returns the parsed command line arguments for the program as a dictionary."""
for statement in data.values():
for key, values in statement.items():
writer.writerow([key] + list(values))
async def main() -> None:
parser = ArgumentParser(description="Scrape company financials") parser = ArgumentParser(description="Scrape company financials")
parser.add_argument( parser.add_argument(
'-s', '--symbol', TICKER_SYMBOL,
type=str, type=str,
nargs='+',
help="Stock ticker symbol of the company to be scraped the financials of" help="Stock ticker symbol of the company to be scraped the financials of"
) )
parser.add_argument( parser.add_argument(
'-f', '--to-file', '-Q', f'--{QUARTERLY}',
action='store_true',
help="If set, the financial data for the last quarters is returned; otherwise yearly data is returned."
)
parser.add_argument(
'-b', f'--{BATCH_SIZE.replace("_", "-")}',
type=int,
default=DEFAULT_CONCURRENT_BATCH_SIZE,
help="If multiple ticker symbols are passed, the company financials can be scraped concurrently. "
"This argument determines how many companies are scraped concurrently. "
"By default, they are scraped sequentially (i.e. a batch size of 1)."
)
parser.add_argument(
'-f', f'--{TO_FILE.replace("_", "-")}',
type=Path, type=Path,
help="Writes results to the specified destination file. If omitted results are printed to stdout." help="Writes results to the specified destination file. If omitted results are printed to stdout."
) )
parser.add_argument(
f'--{JSON_INDENT.replace("_", "-")}',
type=int,
help="If set to a positive integer and the output format is JSON (default), the resulting JSON document is "
"indented accordingly for more readability; if omitted, output is returned in one line."
)
parser.add_argument(
'-v', f'--{VERBOSE}',
action='count',
default=0,
help="Verbose mode. Reduces the log level and thus prints out more status messages while running. "
"Using this flag multiple times increases verbosity further."
)
return vars(parser.parse_args())
args = parser.parse_args()
session = ClientSession() def configure_logging(verbosity: int) -> None:
try: """
data = await get_company_financials(args.symbol, False, session) Sets up logging by adding a stdout-handler and setting the root logger's level according to the specified verbosity.
finally: A verbosity of 0 (or less) sets the log level to CRITICAL.
await session.close() """
path: Path = args.to_file root_logger = logging.getLogger()
root_logger.addHandler(logging.StreamHandler())
root_logger.setLevel(logging.CRITICAL)
if verbosity > 2:
root_logger.setLevel(logging.DEBUG)
elif verbosity == 2:
root_logger.setLevel(logging.INFO)
elif verbosity == 1:
root_logger.setLevel(logging.WARNING)
async def main() -> None:
"""Parses CLI arguments, configures logging to stderr, performs the scraping, and prints/saves the data."""
args = parse_cli()
configure_logging(args[VERBOSE])
data = await get_all_financials(*args[TICKER_SYMBOL], quarterly=args[QUARTERLY],
concurrent_batch_size=args[BATCH_SIZE])
path: Path = args[TO_FILE]
if path is None: if path is None:
print(json.dumps(data, indent=2)) print(json.dumps(data, indent=args[JSON_INDENT]))
return return
if path.suffix.lower() == JSON_EXT: with open(path, 'w') as f:
with open(path, 'w') as f: json.dump(data, f, indent=args[JSON_INDENT])
json.dump(data, f, indent=2)
elif path.suffix.lower() == CSV_EXT:
with open(path, 'w') as f:
write_to_csv(data, f)
else:
print('unknown extension')
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -1,221 +1,19 @@
MAIN_LOGGER_NAME = 'mwfin'
HTML_PARSER = 'html.parser' HTML_PARSER = 'html.parser'
DOMAIN = 'www.marketwatch.com' DOMAIN = 'www.marketwatch.com'
BASE_URL = f'https://{DOMAIN}/investing/stock' BASE_URL = f'https://{DOMAIN}/investing/stock'
DEFAULT_CONCURRENT_BATCH_SIZE = 1
BS, IS, CF = 'bs', 'is', 'cf' BS, IS, CF = 'Balance Sheet', 'Income Statement', 'Cash Flow Statement'
FIN_STMT_URL_SUFFIX = { FIN_STMT_URL_SUFFIX = {
BS: '/balance-sheet', BS: '/balance-sheet',
IS: '', IS: '',
CF: '/cash-flow' CF: '/cash-flow'
} }
END_DATE = 'End Date' INDENT_MAP = {
'indent--small': 1,
# All items marked `False` do not need to be scraped 'indent--medium': 2,
# because they are calculated from other items (e.g. growth or ratios). 'indent--large': 3,
FINANCIAL_STATEMENT_ITEMS = {
#################
# Balance Sheet #
#################
"Cash & Short Term Investments": True,
"Cash & Short Term Investments Growth": False,
"Cash Only": True,
"Short-Term Investments": True,
"Cash & ST Investments / Total Assets": False,
"Total Accounts Receivable": True,
"Total Accounts Receivable Growth": False,
"Accounts Receivables, Net": True,
"Accounts Receivables, Gross": True,
"Bad Debt/Doubtful Accounts": True,
"Other Receivable": True,
"Accounts Receivable Turnover": False,
"Inventories": True,
"Finished Goods": True,
"Work in Progress": True,
"Raw Materials": True,
"Progress Payments & Other": True,
"Other Current Assets": True,
"Miscellaneous Current Assets": True,
"Total Current Assets": True,
"Net Property, Plant & Equipment": True,
"Property, Plant & Equipment - Gross": True,
"Buildings": True,
"Land & Improvements": True,
"Computer Software and Equipment": True,
"Other Property, Plant & Equipment": True,
"Accumulated Depreciation": True,
"Total Investments and Advances": True,
"Other Long-Term Investments": True,
"Long-Term Note Receivables": True,
"Intangible Assets": True,
"Net Goodwill": True,
"Net Other Intangibles": True,
"Other Assets": True,
"Total Assets": True,
"Total Assets Growth": False,
"ST Debt & Current Portion LT Debt": True,
"Short Term Debt": True,
"Current Portion of Long Term Debt": True,
"Accounts Payable": True,
"Accounts Payable Growth": False,
"Income Tax Payable": True,
"Other Current Liabilities": True,
"Dividends Payable": True,
"Accrued Payroll": True,
"Miscellaneous Current Liabilities": True,
"Total Current Liabilities": True,
"Long-Term Debt": True,
"Long-Term Debt excl. Capitalized Leases": True,
"Non-Convertible Debt": True,
"Convertible Debt": True,
"Capitalized Lease Obligations": True,
"Provision for Risks & Charges": True,
"Deferred Taxes": True,
"Deferred Taxes - Credits": True,
"Deferred Taxes - Debit": True,
"Other Liabilities": True,
"Other Liabilities (excl. Deferred Income)": True,
"Deferred Income": True,
"Total Liabilities": True,
"Non-Equity Reserves": True,
"Total Liabilities / Total Assets": False,
"Preferred Stock (Carrying Value)": True,
"Redeemable Preferred Stock": True,
"Non-Redeemable Preferred Stock": True,
"Common Equity (Total)": True,
"Common Equity / Total Assets": False,
"Common Stock Par/Carry Value": True,
"Retained Earnings": True,
"ESOP Debt Guarantee": True,
"Cumulative Translation Adjustment/Unrealized For. Exch. Gain": True,
"Unrealized Gain/Loss Marketable Securities": True,
"Revaluation Reserves": True,
"Treasury Stock": True,
"Total Shareholders' Equity": True,
"Total Shareholders' Equity / Total Assets": False,
"Accumulated Minority Interest": True,
"Total Equity": True,
"Liabilities & Shareholders' Equity": True,
####################
# Income Statement #
####################
"Sales/Revenue": True,
"Sales Growth": False,
"Cost of Goods Sold (COGS) incl. D&A": True,
"COGS Growth": False,
"COGS excluding D&A": True,
"Depreciation & Amortization Expense": True,
"Depreciation": True,
"Amortization of Intangibles": True,
"Gross Income": True,
"Gross Income Growth": False,
"Gross Profit Margin": False,
"SG&A Expense": True,
"SGA Growth": False,
"Research & Development": True,
"Other SG&A": True,
"Other Operating Expense": True,
"Unusual Expense": True,
"EBIT after Unusual Expense": True,
"Non Operating Income/Expense": True,
"Non-Operating Interest Income": True,
"Equity in Affiliates (Pretax)": True,
"Interest Expense": True,
"Interest Expense Growth": False,
"Gross Interest Expense": True,
"Interest Capitalized": True,
"Pretax Income": True,
"Pretax Income Growth": False,
"Pretax Margin": False,
"Income Tax": True,
"Income Tax - Current Domestic": True,
"Income Tax - Current Foreign": True,
"Income Tax - Deferred Domestic": True,
"Income Tax - Deferred Foreign": True,
"Income Tax Credits": True,
"Equity in Affiliates": True,
"Other After Tax Income (Expense)": True,
"Consolidated Net Income": True,
"Minority Interest Expense": True,
"Net Income": True,
"Net Income Growth": False,
"Net Margin Growth": False,
"Extraordinaries & Discontinued Operations": True,
"Extra Items & Gain/Loss Sale Of Assets": True,
"Cumulative Effect - Accounting Chg": True,
"Discontinued Operations": True,
"Net Income After Extraordinaries": True,
"Preferred Dividends": True,
"Net Income Available to Common": True,
"EPS (Basic)": True,
"EPS (Basic) Growth": False,
"Basic Shares Outstanding": True,
"EPS (Diluted)": True,
"EPS (Diluted) Growth": False,
"Diluted Shares Outstanding": True,
"EBITDA": True,
"EBITDA Growth": False,
"EBITDA Margin": False,
#######################
# Cash Flow Statement #
#######################
"Net Income before Extraordinaries": True,
# "Net Income Growth": False,
"Depreciation, Depletion & Amortization": True,
"Depreciation and Depletion": True,
"Amortization of Intangible Assets": True,
"Deferred Taxes & Investment Tax Credit": True,
# "Deferred Taxes": True,
"Investment Tax Credit": True,
"Other Funds": True,
"Funds from Operations": True,
"Extraordinaries": True,
"Changes in Working Capital": True,
"Receivables": True,
# "Accounts Payable": True,
"Other Assets/Liabilities": True,
"Net Operating Cash Flow": True,
"Net Operating Cash Flow Growth": False,
"Net Operating Cash Flow / Sales": False,
"Capital Expenditures": True,
"Capital Expenditures Growth": False,
"Capital Expenditures / Sales": False,
"Capital Expenditures (Fixed Assets)": True,
"Capital Expenditures (Other Assets)": True,
"Net Assets from Acquisitions": True,
"Sale of Fixed Assets & Businesses": True,
"Purchase/Sale of Investments": True,
"Purchase of Investments": True,
"Sale/Maturity of Investments": True,
"Other Uses": True,
"Other Sources": True,
"Net Investing Cash Flow": True,
"Net Investing Cash Flow Growth": False,
"Net Investing Cash Flow / Sales": False,
"Cash Dividends Paid - Total": True,
"Common Dividends": True,
# "Preferred Dividends": True,
"Change in Capital Stock": True,
"Repurchase of Common & Preferred Stk.": True,
"Sale of Common & Preferred Stock": True,
"Proceeds from Stock Options": True,
"Other Proceeds from Sale of Stock": True,
"Issuance/Reduction of Debt, Net": True,
"Change in Current Debt": True,
"Change in Long-Term Debt": True,
"Issuance of Long-Term Debt": True,
"Reduction in Long-Term Debt": True,
# "Other Funds": True,
# "Other Uses": True,
# "Other Sources": True,
"Net Financing Cash Flow": True,
"Net Financing Cash Flow Growth": False,
"Net Financing Cash Flow / Sales": False,
"Exchange Rate Effect": True,
"Miscellaneous Funds": True,
"Net Change in Cash": True,
"Free Cash Flow": True,
"Free Cash Flow Growth": False,
"Free Cash Flow Yield": False,
} }
END_DATE = 'End Date'

View File

@ -1,126 +1,285 @@
import logging import logging
from typing import Union, List, Dict import asyncio
from typing import Union, Tuple, Dict
from aiohttp.client import ClientSession from aiohttp.client import ClientSession
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from bs4.element import Tag from bs4.element import Tag, ResultSet
from webutils import in_async_session, gather_in_batches
from . import constants from .constants import (HTML_PARSER, BASE_URL, END_DATE, BS, IS, CF, FIN_STMT_URL_SUFFIX, DEFAULT_CONCURRENT_BATCH_SIZE,
INDENT_MAP)
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
# The resulting dictionary's keys correspond to the name of the item (row) in the financial statement, # First element in each Tuple is an integer indicating the row indent
# while its values will always be tuples with a length corresponding to the number of periods (columns) HeaderData = Tuple[int, str, str, str, str, str]
# and elements being the actual numbers, with the exception of the first key-value-pair, which will represent RowData = Tuple[int, float, float, float, float, float]
# the end dates of the reporting periods as strings (either years or quarters). # The resulting dictionary's keys correspond to the name of the item (row) in the financial statement.
ResultDict = dict[str, Union[tuple[int], tuple[str]]] # The first value is a tuple of the end dates of the reporting periods as strings (see above).
# The other values are the actual data tuples containing the financial figures.
ResultDict = dict[str, Union[HeaderData, RowData]]
@in_async_session
async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup: async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup:
""" """
Requests a web page and turns the response text into BeautifulSoup. Requests a web page and turns the response text into BeautifulSoup.
Args:
url:
The GET request is sent to this URL
session (optional):
If passed an `aiohttp.ClientSession` object, it will be used to perform the request.
Otherwise a new session is created and automatically closed after the request (see `@in_async_session`).
Returns:
The parsed html response text as BeautifulSoup
""" """
async with session.get(url) as response: async with session.get(url) as response:
html = await response.text() html = await response.text()
return BeautifulSoup(html, constants.HTML_PARSER) return BeautifulSoup(html, HTML_PARSER)
def extract_end_dates(soup: BeautifulSoup) -> tuple[str]: def get_row_indent(tr: Tag) -> int:
"""
Determines the visual indent of a table row.
Some positions in a financial statement have sub-positions below them indicated by indentation of the text in the
position name's cell.
Args:
tr: The table row element
Returns:
Each indentation level corresponds to an integer. 0 = no indentation, 1 = small, 2 = medium, 3 = large
"""
try:
classes = tr.div.attrs['class']
except KeyError:
return 0
for class_name, indent in INDENT_MAP.items():
if class_name in classes:
return indent
return 0
def extract_end_dates(soup: BeautifulSoup) -> HeaderData:
""" """
Finds and returns the end dates of the reporting periods as strings (either years or quarters) from the page of a Finds and returns the end dates of the reporting periods as strings (either years or quarters) from the page of a
financial statement. financial statement.
Args:
soup: The parsed page containing the financial statement
Returns:
A 6-tuple, the first element being the indent (in this case 0) and the rest being the actual end dates.
""" """
ths = soup.find('div', attrs={'class': 'financials'}).thead.find_all('th') tr = soup.find('div', attrs={'class': 'financials'}).thead.tr
return tuple(str(th.string).strip() for th in ths[1:-1]) ths = tr.find_all('th')
return (0, ) + tuple(str(th.string).strip() for th in ths[1:-1])
def is_relevant_table_row(tr: Tag) -> bool: def get_all_table_rows(soup: BeautifulSoup) -> ResultSet:
"""
Returns True if the item in the table row is marked as relevant. Additionally warns when an item is unknown.
"""
item_name = str(tr.td.div.string).strip()
try:
return constants.FINANCIAL_STATEMENT_ITEMS[item_name]
except KeyError:
log.warning(f"Unknown item name '{item_name}' found in financial statement.")
return False
def find_relevant_table_rows(soup: BeautifulSoup) -> List[Tag]:
""" """
Returns the table rows containing the data of interest. Returns the table rows containing the data of interest.
Args:
soup: The parsed page containing the financial statement
Returns:
All table rows containing data from the financial statement
""" """
trs = soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr') return soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr')
return [tr for tr in trs if is_relevant_table_row(tr)]
def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]: def extract_row_data(tr: Tag) -> Tuple[str, RowData]:
""" """
Returns the name of the item displayed in the table row (of a financial statement) Returns the name of the item displayed in the table row (of a financial statement)
as well as a number for each reporting period. as well as the position's indent and a figure for each reporting period.
Args:
tr: A table row containing data from a financial statement
Returns:
2-tuple where the 1st element is the position's name and the second is a 6-tuple, of which the first element is
the indent and the rest are the actual figures.
""" """
item_name = str(tr.td.div.string).strip() item_name = str(tr.td.div.string).strip()
data_div = tr.find_all('td')[-1].div.div data_div = tr.find_all('td')[-1].div.div
values_str: str = data_div.attrs['data-chart-data'] values_str: str = data_div.attrs['data-chart-data']
values = tuple(int(float(s if s != '' else 0)) for s in values_str.split(',')) values = tuple(float(s if s != '' else 0) for s in values_str.split(','))
return item_name, values return item_name, (get_row_indent(tr), ) + values
def extract_all_data(soup: BeautifulSoup) -> ResultDict: def extract_all_data(soup: BeautifulSoup) -> ResultDict:
""" """
Extracts financials from the page. Extracts financials from the page, which can contain either a balance sheet, income statement or cash flow
statement.
Args:
soup: The parsed page containing a financial statement
Returns:
Custom result dictionary (see `ResultDict`)
""" """
output = {constants.END_DATE: extract_end_dates(soup)} output = {END_DATE: extract_end_dates(soup)}
for row in find_relevant_table_rows(soup): for row in get_all_table_rows(soup):
row_data = extract_row_data(row) row_data = extract_row_data(row)
output[row_data[0]] = row_data[1] output[row_data[0]] = row_data[1]
return output return output
async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly: bool = False, @in_async_session
session: ClientSession = None) -> ResultDict: async def get_single_company_fin_stmt(statement: str, ticker_symbol: str, quarterly: bool = False,
session: ClientSession = None) -> ResultDict:
""" """
Returns data from the specified financial statement of the specified company. Returns data from the specified financial statement of the specified company.
Args:
statement:
Must be one of the strings defined in the constants `BS`, `IS`, `CF`
ticker_symbol:
The company's stock ticker symbol
quarterly (optional):
If true the financial data of the last five quarters is scraped; otherwise (default) the last five years.
session (optional):
See `soup_from_url`
Returns:
Custom result dictionary (see `ResultDict`)
""" """
url = f'{constants.BASE_URL}/{ticker_symbol}/financials{constants.FIN_STMT_URL_SUFFIX[statement]}' log.info(f"Scraping {statement} for {ticker_symbol}")
url = f'{BASE_URL}/{ticker_symbol}/financials{FIN_STMT_URL_SUFFIX[statement]}'
if quarterly: if quarterly:
url += '/quarter' url += '/quarter'
soup = await soup_from_url(url, session) soup = await soup_from_url(url, session)
return extract_all_data(soup) return extract_all_data(soup)
async def get_balance_sheet(ticker_symbol: str, quarterly: bool = False, @in_async_session
session: ClientSession = None) -> ResultDict: async def get_multi_companies_fin_stmt(statement: str, *ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
""" """
Returns data from the balance sheet of the specified company. Returns data from the specified financial statement of the specified companies.
Args:
statement:
See `get_single_company_fin_stmt`
ticker_symbols:
Arbitrary number of companies' stock ticker symbols
quarterly (optional):
See `get_single_company_fin_stmt`
concurrent_batch_size (optional):
If multiple ticker symbols are passed, the company financials can be scraped concurrently.
This argument determines how many companies are scraped concurrently.
By default, they are scraped sequentially (i.e. a batch size of 1).
session (optional):
See `get_single_company_fin_stmt`
Returns:
If only one ticker symbol is passed, the `ResultDict` for that financial statement is returned. If multiple
symbols are passed, a dictionary is returned, where the keys are the symbols and the values are the
corresponding `ResultDict`s.
""" """
return await _get_financial_statement(constants.BS, ticker_symbol, quarterly, session) if len(ticker_symbols) == 1:
return await get_single_company_fin_stmt(statement, ticker_symbols[0], quarterly, session)
coroutines = (get_single_company_fin_stmt(statement, symbol, quarterly, session) for symbol in ticker_symbols)
result_list = await gather_in_batches(concurrent_batch_size, *coroutines)
return {symbol: data for symbol, data in zip(ticker_symbols, result_list)}
async def get_income_statement(ticker_symbol: str, quarterly: bool = False, @in_async_session
session: ClientSession = None) -> ResultDict: async def get_balance_sheet(*ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
""" """
Returns data from the income statement of the specified company. Returns data from the balance sheet of the specified companies.
Convenience function around `get_multi_companies_fin_stmt`
""" """
return await _get_financial_statement(constants.IS, ticker_symbol, quarterly, session) return await get_multi_companies_fin_stmt(BS, *ticker_symbols,
quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
session=session)
async def get_cash_flow_statement(ticker_symbol: str, quarterly: bool = False, @in_async_session
session: ClientSession = None) -> ResultDict: async def get_income_statement(*ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
""" """
Returns data from the cash flow statement of the specified company. Returns data from the income statement of the specified companies.
Convenience function around `get_multi_companies_fin_stmt`
""" """
return await _get_financial_statement(constants.CF, ticker_symbol, quarterly, session) return await get_multi_companies_fin_stmt(IS, *ticker_symbols,
quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
session=session)
async def get_company_financials(ticker_symbol: str, quarterly: bool = False, @in_async_session
session: ClientSession = None) -> Dict[str, ResultDict]: async def get_cash_flow_statement(*ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
""" """
Returns all fundamentals (balance sheet, income statement and cash flow statement) of the specified company. Returns data from the cash flow statement of the specified companies.
Convenience function around `get_multi_companies_fin_stmt`
""" """
return { return await get_multi_companies_fin_stmt(CF, *ticker_symbols,
constants.BS: await get_balance_sheet(ticker_symbol, quarterly, session), quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
constants.IS: await get_income_statement(ticker_symbol, quarterly, session), session=session)
constants.CF: await get_cash_flow_statement(ticker_symbol, quarterly, session)
}
@in_async_session
async def get_single_company_all_financials(ticker_symbol: str, quarterly: bool = False,
session: ClientSession = None) -> Dict[str, ResultDict]:
"""
Returns data from all financial statements of the specified company.
Concurrently calls `get_single_company_fin_stmt` three times.
Args:
ticker_symbol:
The company's stock ticker symbol
quarterly (optional):
See `get_single_company_fin_stmt`
session (optional):
See `get_single_company_fin_stmt`
Returns:
A dictionary where the keys are the three different statement names and the values are the
corresponding `ResultDict`s
"""
coroutines = (get_single_company_fin_stmt(stmt, ticker_symbol, quarterly, session) for stmt in (BS, IS, CF))
results = await asyncio.gather(*coroutines)
return {stmt: data for stmt, data in zip((BS, IS, CF), results)}
@in_async_session
async def get_all_financials(*ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[Dict[str, ResultDict],
Dict[str, Dict[str, ResultDict]]]:
"""
Returns all fundamentals (balance sheet, income statement and cash flow statement) of the specified companies.
Args:
ticker_symbols:
Arbitrary number of companies' stock ticker symbols
quarterly (optional):
See `get_single_company_all_financials`
concurrent_batch_size (optional):
If multiple ticker symbols are passed, the company financials can be scraped concurrently.
This argument determines how many companies are scraped concurrently.
By default, they are scraped sequentially (i.e. a batch size of 1).
session (optional):
See `get_single_company_all_financials`
Returns:
If only one ticker symbol is passed, the output of `get_single_company_all_financials` is returned. If multiple
symbols are passed, a dictionary is returned, where the keys are the symbols and the values are the
corresponding outputs of `get_single_company_all_financials`.
"""
if len(ticker_symbols) == 1:
return await get_single_company_all_financials(ticker_symbols[0], quarterly, session)
coroutines = (get_single_company_all_financials(symbol, quarterly, session) for symbol in ticker_symbols)
result_list = await gather_in_batches(concurrent_batch_size, *coroutines)
return {symbol: data for symbol, data in zip(ticker_symbols, result_list)}

View File

@ -51,41 +51,48 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
output = await functions.soup_from_url('baz', mock_session_obj) output = await functions.soup_from_url('baz', mock_session_obj)
self.assertEqual(expected_output, output) self.assertEqual(expected_output, output)
def test_extract_end_dates(self): def test_get_row_indent(self):
expected_output = ('End_Date_1', 'End_Date_2') mock_row = BeautifulSoup('<tr><div>foo</div></tr>', HTML_PARSER).tr
expected_output = 0
output = functions.get_row_indent(mock_row)
self.assertEqual(expected_output, output)
trs = self.test_soup.find_all('tr')
output = functions.get_row_indent(trs[0])
self.assertEqual(expected_output, output)
for i, tr in enumerate(trs[1:], start=1):
output = functions.get_row_indent(tr)
self.assertEqual(i, output)
@patch.object(functions, 'get_row_indent')
def test_extract_end_dates(self, mock_get_row_indent):
mock_get_row_indent.return_value = 0
expected_output = (0, 'End_Date_1', 'End_Date_2')
output = functions.extract_end_dates(self.test_soup) output = functions.extract_end_dates(self.test_soup)
self.assertTupleEqual(expected_output, output) self.assertTupleEqual(expected_output, output)
mock_get_row_indent.assert_called_once_with(self.test_soup.tr)
def test_is_relevant_table_row(self): def test_get_all_table_rows(self):
test_soup = BeautifulSoup('<tr><td><div> Cash & Short Term Investments </div></td></tr>', HTML_PARSER)
self.assertTrue(functions.is_relevant_table_row(test_soup.tr))
test_soup = BeautifulSoup('<tr><td><div> Cash & Short Term Investments Growth </div></td></tr>', HTML_PARSER)
self.assertFalse(functions.is_relevant_table_row(test_soup.tr))
test_soup = BeautifulSoup('<tr><td><div> baz </div></td></tr>', HTML_PARSER)
self.assertFalse(functions.is_relevant_table_row(test_soup.tr))
@patch.object(functions, 'is_relevant_table_row')
def test_find_relevant_table_rows(self, mock_is_relevant_table_row):
mock_is_relevant_table_row.return_value = True
expected_output = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr') expected_output = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr')
output = functions.find_relevant_table_rows(self.test_soup) output = functions.get_all_table_rows(self.test_soup)
self.assertListEqual(expected_output, output) self.assertSequenceEqual(expected_output, output)
mock_is_relevant_table_row.assert_has_calls([call(expected_output[0]), call(expected_output[1])])
def test_extract_row_data(self): @patch.object(functions, 'get_row_indent')
def test_extract_row_data(self, mock_get_row_indent):
mock_get_row_indent.return_value = 1
test_row = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.tr test_row = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.tr
expected_output = ('Cash & Short Term Investments', (11000000, -22000000)) expected_output = ('foo', (1, 1., -2.))
output = functions.extract_row_data(test_row) output = functions.extract_row_data(test_row)
self.assertTupleEqual(expected_output, output) self.assertTupleEqual(expected_output, output)
mock_get_row_indent.assert_called_once_with(test_row)
@patch.object(functions, 'extract_row_data') @patch.object(functions, 'extract_row_data')
@patch.object(functions, 'find_relevant_table_rows') @patch.object(functions, 'get_all_table_rows')
@patch.object(functions, 'extract_end_dates') @patch.object(functions, 'extract_end_dates')
def test_extract_all_data(self, mock_extract_end_dates, mock_find_relevant_table_rows, mock_extract_row_data): def test_extract_all_data(self, mock_extract_end_dates, mock_get_all_table_rows, mock_extract_row_data):
test_end_dates = ('foo', 'bar') test_end_dates = ('foo', 'bar')
mock_extract_end_dates.return_value = test_end_dates mock_extract_end_dates.return_value = test_end_dates
test_relevant_rows = ['tr1', 'tr2'] test_relevant_rows = ['tr1', 'tr2']
mock_find_relevant_table_rows.return_value = test_relevant_rows mock_get_all_table_rows.return_value = test_relevant_rows
test_row_data = ('item_name', (123, 456)) test_row_data = ('item_name', (123, 456))
mock_extract_row_data.return_value = test_row_data mock_extract_row_data.return_value = test_row_data
expected_output = { expected_output = {
@ -96,12 +103,12 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
output = functions.extract_all_data(self.test_soup) output = functions.extract_all_data(self.test_soup)
self.assertDictEqual(expected_output, output) self.assertDictEqual(expected_output, output)
mock_extract_end_dates.assert_called_once_with(self.test_soup) mock_extract_end_dates.assert_called_once_with(self.test_soup)
mock_find_relevant_table_rows.assert_called_once_with(self.test_soup) mock_get_all_table_rows.assert_called_once_with(self.test_soup)
mock_extract_row_data.assert_has_calls([call(test_relevant_rows[0]), call(test_relevant_rows[1])]) mock_extract_row_data.assert_has_calls([call(test_relevant_rows[0]), call(test_relevant_rows[1])])
@patch.object(functions, 'extract_all_data') @patch.object(functions, 'extract_all_data')
@patch.object(functions, 'soup_from_url') @patch.object(functions, 'soup_from_url')
async def test__get_financial_statement(self, mock_soup_from_url, mock_extract_all_data): async def test_get_single_company_fin_stmt(self, mock_soup_from_url, mock_extract_all_data):
mock_session = MagicMock() mock_session = MagicMock()
test_ticker, statement = 'bar', BS test_ticker, statement = 'bar', BS
test_url = f'{BASE_URL}/{test_ticker}/financials{FIN_STMT_URL_SUFFIX[statement]}' test_url = f'{BASE_URL}/{test_ticker}/financials{FIN_STMT_URL_SUFFIX[statement]}'
@ -109,7 +116,7 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
mock_extract_all_data.return_value = expected_output = {'foo': 'bar'} mock_extract_all_data.return_value = expected_output = {'foo': 'bar'}
quarterly = False quarterly = False
output = await functions._get_financial_statement(statement, test_ticker, quarterly, mock_session) output = await functions.get_single_company_fin_stmt(statement, test_ticker, quarterly, mock_session)
self.assertDictEqual(expected_output, output) self.assertDictEqual(expected_output, output)
mock_soup_from_url.assert_called_once_with(test_url, mock_session) mock_soup_from_url.assert_called_once_with(test_url, mock_session)
mock_extract_all_data.assert_called_once_with(mock_soup) mock_extract_all_data.assert_called_once_with(mock_soup)
@ -117,54 +124,90 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
mock_extract_all_data.reset_mock() mock_extract_all_data.reset_mock()
quarterly = True quarterly = True
output = await functions._get_financial_statement(statement, test_ticker, quarterly, mock_session) output = await functions.get_single_company_fin_stmt(statement, test_ticker, quarterly, mock_session)
self.assertDictEqual(expected_output, output) self.assertDictEqual(expected_output, output)
mock_soup_from_url.assert_called_once_with(test_url + '/quarter', mock_session) mock_soup_from_url.assert_called_once_with(test_url + '/quarter', mock_session)
mock_extract_all_data.assert_called_once_with(mock_soup) mock_extract_all_data.assert_called_once_with(mock_soup)
@patch.object(functions, '_get_financial_statement') @patch.object(functions, 'get_single_company_fin_stmt')
async def test_get_balance_sheet(self, mock__get_financial_statement): async def test_get_multi_companies_fin_stmt(self, mock_get_single_company_fin_stmt):
symbol, quarterly, mock_session = 'foo', False, MagicMock() statement, sym1, sym2, quarterly, mock_session = 'xyz', 'foo', 'bar', False, MagicMock()
mock__get_financial_statement.return_value = expected_output = 'bar' mock_get_single_company_fin_stmt.return_value = expected_output = 'baz'
output = await functions.get_balance_sheet(symbol, quarterly, mock_session) output = await functions.get_multi_companies_fin_stmt(statement, sym1,
quarterly=quarterly, session=mock_session)
self.assertEqual(expected_output, output) self.assertEqual(expected_output, output)
mock__get_financial_statement.assert_called_once_with(BS, symbol, quarterly, mock_session) mock_get_single_company_fin_stmt.assert_called_once_with(statement, sym1, quarterly, mock_session)
mock_get_single_company_fin_stmt.reset_mock()
@patch.object(functions, '_get_financial_statement') expected_output = {sym1: expected_output, sym2: expected_output}
async def test_get_income_statement(self, mock__get_financial_statement): output = await functions.get_multi_companies_fin_stmt(statement, sym1, sym2,
symbol, quarterly, mock_session = 'foo', False, MagicMock() quarterly=quarterly, session=mock_session)
mock__get_financial_statement.return_value = expected_output = 'bar'
output = await functions.get_income_statement(symbol, quarterly, mock_session)
self.assertEqual(expected_output, output)
mock__get_financial_statement.assert_called_once_with(IS, symbol, quarterly, mock_session)
@patch.object(functions, '_get_financial_statement')
async def test_get_cash_flow_statement(self, mock__get_financial_statement):
symbol, quarterly, mock_session = 'foo', False, MagicMock()
mock__get_financial_statement.return_value = expected_output = 'bar'
output = await functions.get_cash_flow_statement(symbol, quarterly, mock_session)
self.assertEqual(expected_output, output)
mock__get_financial_statement.assert_called_once_with(CF, symbol, quarterly, mock_session)
@patch.object(functions, 'get_cash_flow_statement')
@patch.object(functions, 'get_income_statement')
@patch.object(functions, 'get_balance_sheet')
async def test_get_company_financials(self, mock_get_bs, mock_get_is, mock_get_cf):
mock_end_dates = ('bar', 'baz')
mock_get_bs.return_value = {END_DATE: mock_end_dates, 'a': (1, 2)}
mock_get_is.return_value = {END_DATE: mock_end_dates, 'b': (2, 3)}
mock_get_cf.return_value = {END_DATE: mock_end_dates, 'c': (3, 4)}
expected_output = {
BS: {END_DATE: mock_end_dates, 'a': (1, 2)},
IS: {END_DATE: mock_end_dates, 'b': (2, 3)},
CF: {END_DATE: mock_end_dates, 'c': (3, 4)}
}
symbol, quarterly, mock_session = 'foo', False, MagicMock()
output = await functions.get_company_financials(symbol, quarterly, mock_session)
self.assertDictEqual(expected_output, output) self.assertDictEqual(expected_output, output)
mock_get_bs.assert_called_once_with(symbol, quarterly, mock_session) mock_get_single_company_fin_stmt.assert_has_calls([
mock_get_is.assert_called_once_with(symbol, quarterly, mock_session) call(statement, sym1, quarterly, mock_session),
mock_get_cf.assert_called_once_with(symbol, quarterly, mock_session) call(statement, sym2, quarterly, mock_session)
])
async def _helper_test_get_any_statement(self, stmt: str, mock_get_multi_companies_fin_stmt):
sym1, sym2, quarterly, batch_size, mock_session = 'foo', 'bar', False, 2, MagicMock()
mock_get_multi_companies_fin_stmt.return_value = expected_output = 'baz'
if stmt == BS:
function = functions.get_balance_sheet
elif stmt == IS:
function = functions.get_income_statement
elif stmt == CF:
function = functions.get_cash_flow_statement
else:
raise ValueError
output = await function(sym1, sym2, quarterly=quarterly, concurrent_batch_size=batch_size, session=mock_session)
self.assertEqual(expected_output, output)
mock_get_multi_companies_fin_stmt.assert_called_once_with(
stmt, sym1, sym2, quarterly=quarterly, concurrent_batch_size=batch_size, session=mock_session
)
@patch.object(functions, 'get_multi_companies_fin_stmt')
async def test_get_balance_sheet(self, mock_get_multi_companies_fin_stmt):
await self._helper_test_get_any_statement(BS, mock_get_multi_companies_fin_stmt)
@patch.object(functions, 'get_multi_companies_fin_stmt')
async def test_get_income_statement(self, mock_get_multi_companies_fin_stmt):
await self._helper_test_get_any_statement(IS, mock_get_multi_companies_fin_stmt)
@patch.object(functions, 'get_multi_companies_fin_stmt')
async def test_get_cash_flow_statement(self, mock_get_multi_companies_fin_stmt):
await self._helper_test_get_any_statement(CF, mock_get_multi_companies_fin_stmt)
@patch.object(functions, 'get_single_company_fin_stmt')
async def test_get_single_company_all_financials(self, mock_get_single_company_fin_stmt):
symbol, quarterly, mock_session = 'foo', False, MagicMock()
mock_get_single_company_fin_stmt.return_value = bar = 'bar'
expected_output = {BS: bar, IS: bar, CF: bar}
output = await functions.get_single_company_all_financials(symbol, quarterly, mock_session)
self.assertDictEqual(expected_output, output)
mock_get_single_company_fin_stmt.assert_has_calls([
call(BS, symbol, quarterly, mock_session),
call(IS, symbol, quarterly, mock_session),
call(CF, symbol, quarterly, mock_session)
])
@patch.object(functions, 'get_single_company_all_financials')
async def test_get_company_financials(self, mock_get_single_company_all_financials):
mock_get_single_company_all_financials.return_value = expected_output = 'baz'
symbol, quarterly, mock_session = 'foo', False, MagicMock()
output = await functions.get_all_financials(symbol, quarterly=quarterly, session=mock_session)
self.assertEqual(expected_output, output)
mock_get_single_company_all_financials.assert_called_once_with(symbol, quarterly, mock_session)
mock_get_single_company_all_financials.reset_mock()
test_sym1, test_sym2 = 'x', 'y'
expected_output = {test_sym1: expected_output, test_sym2: expected_output}
output = await functions.get_all_financials(test_sym1, test_sym2,
quarterly=quarterly, session=mock_session)
self.assertDictEqual(expected_output, output)
mock_get_single_company_all_financials.assert_has_calls([
call(test_sym1, quarterly, mock_session),
call(test_sym2, quarterly, mock_session)
])
@patch.object(functions, 'ClientSession') @patch.object(functions, 'ClientSession')
async def test_integration_get_company_financials(self, mock_session_cls): async def test_integration_get_company_financials(self, mock_session_cls):
@ -172,11 +215,26 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
symbol = 'foo' symbol = 'foo'
# Since the web request is mocked we always receive the same HTML markup. # Since the web request is mocked we always receive the same HTML markup.
expected_output = { expected_output = {
BS: {END_DATE: ('End_Date_1', 'End_Date_2'), 'Cash & Short Term Investments': (11000000, -22000000)}, BS: {
IS: {END_DATE: ('End_Date_1', 'End_Date_2'), 'Cash & Short Term Investments': (11000000, -22000000)}, END_DATE: (0, 'End_Date_1', 'End_Date_2'),
CF: {END_DATE: ('End_Date_1', 'End_Date_2'), 'Cash & Short Term Investments': (11000000, -22000000)} 'foo': (1, 1., -2.),
'bar': (2, 2., -3.),
'baz': (3, 3., -4.)
},
IS: {
END_DATE: (0, 'End_Date_1', 'End_Date_2'),
'foo': (1, 1., -2.),
'bar': (2, 2., -3.),
'baz': (3, 3., -4.)
},
CF: {
END_DATE: (0, 'End_Date_1', 'End_Date_2'),
'foo': (1, 1., -2.),
'bar': (2, 2., -3.),
'baz': (3, 3., -4.)
}
} }
output = await functions.get_company_financials(symbol, session=mock_session_obj) output = await functions.get_all_financials(symbol, session=mock_session_obj)
self.assertDictEqual(expected_output, output) self.assertDictEqual(expected_output, output)
mock_session_obj.get.assert_has_calls([ mock_session_obj.get.assert_has_calls([
call(f'{BASE_URL}/{symbol}/financials{FIN_STMT_URL_SUFFIX[BS]}'), call(f'{BASE_URL}/{symbol}/financials{FIN_STMT_URL_SUFFIX[BS]}'),

87
tests/test_main.py Normal file
View File

@ -0,0 +1,87 @@
import logging
import json
from unittest import IsolatedAsyncioTestCase
from unittest.mock import patch
from argparse import Namespace
from io import StringIO
from mwfin import __main__ as main_module
class MainModuleTestCase(IsolatedAsyncioTestCase):
@patch.object(main_module.ArgumentParser, 'parse_args')
def test_parse_cli(self, mock_parse_args):
mock_parse_args.return_value = mock_args = Namespace(foo='a', bar='b')
expected_output = vars(mock_args)
output = main_module.parse_cli()
self.assertDictEqual(expected_output, output)
def test_configure_logging(self):
root_logger = logging.getLogger()
root_logger.handlers = []
main_module.configure_logging(verbosity=0)
self.assertEqual(1, len(root_logger.handlers))
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
self.assertEqual(logging.CRITICAL, root_logger.level)
root_logger.handlers = []
main_module.configure_logging(verbosity=1)
self.assertEqual(1, len(root_logger.handlers))
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
self.assertEqual(logging.WARNING, root_logger.level)
root_logger.handlers = []
main_module.configure_logging(verbosity=2)
self.assertEqual(1, len(root_logger.handlers))
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
self.assertEqual(logging.INFO, root_logger.level)
root_logger.handlers = []
main_module.configure_logging(verbosity=3)
self.assertEqual(1, len(root_logger.handlers))
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
self.assertEqual(logging.DEBUG, root_logger.level)
root_logger.handlers = []
main_module.configure_logging(verbosity=9999)
self.assertEqual(1, len(root_logger.handlers))
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
self.assertEqual(logging.DEBUG, root_logger.level)
@patch.object(main_module, 'get_all_financials')
@patch.object(main_module, 'configure_logging')
@patch.object(main_module, 'parse_cli')
async def test_main(self, mock_parse_cli, mock_configure_logging, mock_get_all_financials):
mock_parse_cli.return_value = args = {
main_module.VERBOSE: 'foo',
main_module.TICKER_SYMBOL: ['bar', 'baz'],
main_module.QUARTERLY: 'perhaps',
main_module.BATCH_SIZE: 'xyz',
main_module.TO_FILE: None,
main_module.JSON_INDENT: 42,
}
mock_get_all_financials.return_value = mock_data = {'data': 'something cool'}
# To stdout:
with patch.object(main_module, 'print') as mock_print:
await main_module.main()
mock_parse_cli.assert_called_once_with()
mock_configure_logging.assert_called_once_with(args[main_module.VERBOSE])
mock_get_all_financials.assert_awaited_once_with(*args[main_module.TICKER_SYMBOL],
quarterly=args[main_module.QUARTERLY],
concurrent_batch_size=args[main_module.BATCH_SIZE])
mock_print.assert_called_once_with(json.dumps(mock_data, indent=args[main_module.JSON_INDENT]))
mock_parse_cli.reset_mock()
mock_configure_logging.reset_mock()
mock_get_all_financials.reset_mock()
# To file:
args[main_module.TO_FILE] = 'some_file'
with patch.object(main_module, 'open') as mock_open:
mock_open.return_value.__enter__.return_value = mock_file = StringIO()
await main_module.main()
mock_parse_cli.assert_called_once_with()
mock_configure_logging.assert_called_once_with(args[main_module.VERBOSE])
mock_get_all_financials.assert_awaited_once_with(*args[main_module.TICKER_SYMBOL],
quarterly=args[main_module.QUARTERLY],
concurrent_batch_size=args[main_module.BATCH_SIZE])
expected_contents = json.dumps(mock_data, indent=args[main_module.JSON_INDENT])
mock_file.seek(0)
self.assertEqual(expected_contents, mock_file.read())

View File

@ -15,7 +15,7 @@
<table> <table>
<thead> <thead>
<tr> <tr>
<th><div> !!Item </div><div> !!Item </div></th> <th><div class="xyz abc"> !!Item </div><div> !!Item </div></th>
<th><div> End_Date_1 </div></th> <th><div> End_Date_1 </div></th>
<th><div> End_Date_2 </div></th> <th><div> End_Date_2 </div></th>
<th></th> <th></th>
@ -23,16 +23,22 @@
</thead> </thead>
<tbody> <tbody>
<tr> <tr>
<td><div> Cash & Short Term Investments </div><div> Cash & Short Term Investments </div></td> <td><div class="xyz indent--small"> foo </div><div> foo </div></td>
<td></td> <td></td>
<td></td> <td></td>
<td><div> <div data-chart-data="11000000.0,-22000000.0"><div></div></td> <td><div> <div data-chart-data="1.0,-2.0"><div></div></td>
</tr> </tr>
<tr> <tr>
<td><div> Cash & Short Term Investments Growth </div><div> Cash & Short Term Investments Growth </div></td> <td><div class="xyz indent--medium"> bar </div><div> bar </div></td>
<td></td> <td></td>
<td></td> <td></td>
<td><div> <div data-chart-data="2.0,-3.0"><div></div></td>
</tr>
<tr>
<td><div class="xyz indent--large"> baz </div><div> baz </div></td>
<td></td> <td></td>
<td></td>
<td><div> <div data-chart-data="3.0,-4.0"><div></div></td>
</tr> </tr>
</tbody> </tbody>
</table> </table>