Compare commits

...

50 Commits

Author SHA1 Message Date
911fa15f46 extensive docstrings; made all functions public (not protected); minor refactoring 2021-12-28 23:59:54 +01:00
f29f22a52e 100% coverage 2021-12-26 23:32:15 +01:00
a2f3f590f6 test for __main__ 2021-12-26 23:22:12 +01:00
3276dbfae3 ditched csv; minor help text change 2021-12-26 21:08:36 +01:00
2b3fb9f7ba implemented 2021-12-26 19:04:23 +01:00
8afab2fde8 planned function to get row indent 2021-12-26 18:40:29 +01:00
f7cb5de928 plan to encode row indentation as int & include it as the 1st element in the data tuple 2021-12-26 18:08:25 +01:00
c93fb9692e minor refactoring & adjustments 2021-12-26 17:55:34 +01:00
38dd29f35b all table rows to be considered relevant 2021-12-26 17:34:59 +01:00
462e34013e test now passing 2021-12-03 17:51:10 +01:00
4fd69602f7 tests for writing html to file when encountering unknown items in DEV_MODE 2021-12-03 16:45:40 +01:00
94568c6165 implementation to pass tests 2021-12-03 16:28:12 +01:00
d46148a510 is_relevant_table_row should raise a custom exception for unknown items, find_relevant_table_rows should handle the rest 2021-12-03 16:20:04 +01:00
3959aff10e added log statement 2021-12-03 16:01:00 +01:00
c18ab65a7d cli now supports multiple ticker symbols and concurrency 2021-12-03 15:53:53 +01:00
55a56c2f9c concurrency for getting all three financial statements; improved imports; renamed functions 2021-12-03 15:38:21 +01:00
8d18e03018 implemented new function and adjusted function call sequence 2021-12-03 15:19:21 +01:00
a7dbdcf917 adjusted tests to call the new function 2021-12-03 14:32:52 +01:00
ca360413c3 test for new function to get any statement from many companies; renamed functions 2021-12-03 14:17:49 +01:00
1f8104b1a0 refactoring 2021-12-03 11:56:09 +01:00
68f3e428ab plan to refactor get_company_financials; tests changed accordingly 2021-12-03 11:46:39 +01:00
a6b3c57021 fixed and added things to README 2021-11-30 12:53:11 +01:00
c23c963cd8 added async-session decorator from own project; changed logging setup; added CLI parameters 2021-11-30 12:46:48 +01:00
36226fc1be fixed and refactored cli 2021-11-28 17:19:12 +01:00
9186d8734f fixed type hints 2021-11-28 17:10:00 +01:00
e522897991 implemented getting specific financial statement for multiple companies; fixed tests 2021-11-28 17:07:23 +01:00
1529df252c allow getting specific financial statement for multiple companies 2021-11-28 16:58:21 +01:00
3c7afcd879 factored out some tests 2021-11-28 16:52:16 +01:00
4a68d45bc4 implemented last idea 2021-11-28 16:42:21 +01:00
e7f3730c64 idea for getting financials for an arbitrary number of companies 2021-11-28 16:39:53 +01:00
fd03815172 improved writing to file 2021-11-28 16:24:14 +01:00
6dd37a4aa2 cli for writing to json/csv 2021-11-28 15:53:19 +01:00
2d9119a4be company financials returned as separate dicts; test fix 2021-11-28 15:20:28 +01:00
326b956be4 added small integration test 2021-11-27 18:35:12 +01:00
63008be829 bugfix, minor change, full coverage 2021-11-27 18:11:16 +01:00
2380540659 drafted remaining functions 2021-11-27 17:23:36 +01:00
f9745ff46d two more functions implemented 2021-11-27 15:36:51 +01:00
f2abd8f2ce fixes; another function implemented 2021-11-27 15:11:42 +01:00
25be101cf9 function drafts 2021-11-26 23:37:46 +01:00
44dee0b762 finished tests 2021-11-26 21:38:10 +01:00
776c909956 more tests, idea for change 2021-11-26 12:47:58 +01:00
1e0410a254 function for checking tr relevance 2021-11-24 22:28:33 +01:00
9adad5dba1 financial statements items 2021-11-24 21:01:16 +01:00
8041386d52 more unit tests 2021-11-24 15:28:34 +01:00
d504ddfc33 reading test html file in test class set up method 2021-11-23 19:57:37 +01:00
6ac38ad06d updated/condensed html 2021-11-23 19:42:20 +01:00
652ded4cd9 Record of essential html composition (mainly for testing) 2021-11-23 14:51:29 +01:00
525d56ef63 first two unit tests 2021-11-22 19:52:36 +01:00
33e1fca03f added planned function signatures, docstrings, and corresponding test case 2021-11-21 22:45:13 +01:00
94e34bf388 empty python files added 2021-11-21 21:54:25 +01:00
12 changed files with 795 additions and 4 deletions

View File

@ -8,3 +8,5 @@ omit =
fail_under = 100
show_missing = True
skip_covered = True
exclude_lines =
if __name__ == .__main__.:

View File

@ -8,15 +8,23 @@ Asynchronous HTTP requests are currently used by default.
- [Python](https://www.python.org/) 3.8+
Should run on most Linux/Windows systems. Tested on Arch.
Should run on most Linux/Windows systems. Tested on Arch and Windows 10.
### Building
Clone this repo, install `build` via pip, then run `pip -m build`
Clone this repo, install `build` via pip, then run `python -m build`
from the repository's root directory. This should produce a `dist/`
subdirectory with a wheel (build) and archive (source) distribution.
The resulting `whl`-file can be installed via `pip install path/dist/***.whl`.
### Running
...
```shell
$ python -m mwfin -h
```
gives a description of available CLI parameters and their usage. For example, running
```shell
$ python -m mwfin -f output.json -Q AAPL
```
will retrieve the most current available quarterly data from all three financial statements Apple Inc. has published and save it in JSON format to `output.json`.

View File

@ -1,2 +1,3 @@
beautifulsoup4
aiohttp
webutils-df

View File

@ -23,6 +23,7 @@ python_requires = >=3.8
install_requires =
beautifulsoup4
aiohttp
webutils-df
[options.extras_require]
tests =

1
src/mwfin/__init__.py Normal file
View File

@ -0,0 +1 @@
from .functions import get_balance_sheet, get_income_statement, get_cash_flow_statement, get_all_financials

95
src/mwfin/__main__.py Normal file
View File

@ -0,0 +1,95 @@
import logging
import asyncio
import json
from argparse import ArgumentParser
from pathlib import Path
from .functions import get_all_financials
from .constants import MAIN_LOGGER_NAME, DEFAULT_CONCURRENT_BATCH_SIZE
log = logging.getLogger(MAIN_LOGGER_NAME)
TICKER_SYMBOL = 'ticker_symbol'
QUARTERLY = 'quarterly'
BATCH_SIZE = 'concurrent_batch_size'
TO_FILE = 'to_file'
JSON_INDENT = 'json_indent'
VERBOSE = 'verbose'
def parse_cli() -> dict:
"""Returns the parsed command line arguments for the program as a dictionary."""
parser = ArgumentParser(description="Scrape company financials")
parser.add_argument(
TICKER_SYMBOL,
type=str,
nargs='+',
help="Stock ticker symbol of the company to be scraped the financials of"
)
parser.add_argument(
'-Q', f'--{QUARTERLY}',
action='store_true',
help="If set, the financial data for the last quarters is returned; otherwise yearly data is returned."
)
parser.add_argument(
'-b', f'--{BATCH_SIZE.replace("_", "-")}',
type=int,
default=DEFAULT_CONCURRENT_BATCH_SIZE,
help="If multiple ticker symbols are passed, the company financials can be scraped concurrently. "
"This argument determines how many companies are scraped concurrently. "
"By default, they are scraped sequentially (i.e. a batch size of 1)."
)
parser.add_argument(
'-f', f'--{TO_FILE.replace("_", "-")}',
type=Path,
help="Writes results to the specified destination file. If omitted results are printed to stdout."
)
parser.add_argument(
f'--{JSON_INDENT.replace("_", "-")}',
type=int,
help="If set to a positive integer and the output format is JSON (default), the resulting JSON document is "
"indented accordingly for more readability; if omitted, output is returned in one line."
)
parser.add_argument(
'-v', f'--{VERBOSE}',
action='count',
default=0,
help="Verbose mode. Reduces the log level and thus prints out more status messages while running. "
"Using this flag multiple times increases verbosity further."
)
return vars(parser.parse_args())
def configure_logging(verbosity: int) -> None:
"""
Sets up logging by adding a stdout-handler and setting the root logger's level according to the specified verbosity.
A verbosity of 0 (or less) sets the log level to CRITICAL.
"""
root_logger = logging.getLogger()
root_logger.addHandler(logging.StreamHandler())
root_logger.setLevel(logging.CRITICAL)
if verbosity > 2:
root_logger.setLevel(logging.DEBUG)
elif verbosity == 2:
root_logger.setLevel(logging.INFO)
elif verbosity == 1:
root_logger.setLevel(logging.WARNING)
async def main() -> None:
"""Parses CLI arguments, configures logging to stderr, performs the scraping, and prints/saves the data."""
args = parse_cli()
configure_logging(args[VERBOSE])
data = await get_all_financials(*args[TICKER_SYMBOL], quarterly=args[QUARTERLY],
concurrent_batch_size=args[BATCH_SIZE])
path: Path = args[TO_FILE]
if path is None:
print(json.dumps(data, indent=args[JSON_INDENT]))
return
with open(path, 'w') as f:
json.dump(data, f, indent=args[JSON_INDENT])
if __name__ == '__main__':
asyncio.run(main())

19
src/mwfin/constants.py Normal file
View File

@ -0,0 +1,19 @@
MAIN_LOGGER_NAME = 'mwfin'
HTML_PARSER = 'html.parser'
DOMAIN = 'www.marketwatch.com'
BASE_URL = f'https://{DOMAIN}/investing/stock'
DEFAULT_CONCURRENT_BATCH_SIZE = 1
BS, IS, CF = 'Balance Sheet', 'Income Statement', 'Cash Flow Statement'
FIN_STMT_URL_SUFFIX = {
BS: '/balance-sheet',
IS: '',
CF: '/cash-flow'
}
INDENT_MAP = {
'indent--small': 1,
'indent--medium': 2,
'indent--large': 3,
}
END_DATE = 'End Date'

285
src/mwfin/functions.py Normal file
View File

@ -0,0 +1,285 @@
import logging
import asyncio
from typing import Union, Tuple, Dict
from aiohttp.client import ClientSession
from bs4 import BeautifulSoup
from bs4.element import Tag, ResultSet
from webutils import in_async_session, gather_in_batches
from .constants import (HTML_PARSER, BASE_URL, END_DATE, BS, IS, CF, FIN_STMT_URL_SUFFIX, DEFAULT_CONCURRENT_BATCH_SIZE,
INDENT_MAP)
log = logging.getLogger(__name__)
# First element in each Tuple is an integer indicating the row indent
HeaderData = Tuple[int, str, str, str, str, str]
RowData = Tuple[int, float, float, float, float, float]
# The resulting dictionary's keys correspond to the name of the item (row) in the financial statement.
# The first value is a tuple of the end dates of the reporting periods as strings (see above).
# The other values are the actual data tuples containing the financial figures.
ResultDict = dict[str, Union[HeaderData, RowData]]
@in_async_session
async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup:
"""
Requests a web page and turns the response text into BeautifulSoup.
Args:
url:
The GET request is sent to this URL
session (optional):
If passed an `aiohttp.ClientSession` object, it will be used to perform the request.
Otherwise a new session is created and automatically closed after the request (see `@in_async_session`).
Returns:
The parsed html response text as BeautifulSoup
"""
async with session.get(url) as response:
html = await response.text()
return BeautifulSoup(html, HTML_PARSER)
def get_row_indent(tr: Tag) -> int:
"""
Determines the visual indent of a table row.
Some positions in a financial statement have sub-positions below them indicated by indentation of the text in the
position name's cell.
Args:
tr: The table row element
Returns:
Each indentation level corresponds to an integer. 0 = no indentation, 1 = small, 2 = medium, 3 = large
"""
try:
classes = tr.div.attrs['class']
except KeyError:
return 0
for class_name, indent in INDENT_MAP.items():
if class_name in classes:
return indent
return 0
def extract_end_dates(soup: BeautifulSoup) -> HeaderData:
"""
Finds and returns the end dates of the reporting periods as strings (either years or quarters) from the page of a
financial statement.
Args:
soup: The parsed page containing the financial statement
Returns:
A 6-tuple, the first element being the indent (in this case 0) and the rest being the actual end dates.
"""
tr = soup.find('div', attrs={'class': 'financials'}).thead.tr
ths = tr.find_all('th')
return (0, ) + tuple(str(th.string).strip() for th in ths[1:-1])
def get_all_table_rows(soup: BeautifulSoup) -> ResultSet:
"""
Returns the table rows containing the data of interest.
Args:
soup: The parsed page containing the financial statement
Returns:
All table rows containing data from the financial statement
"""
return soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr')
def extract_row_data(tr: Tag) -> Tuple[str, RowData]:
"""
Returns the name of the item displayed in the table row (of a financial statement)
as well as the position's indent and a figure for each reporting period.
Args:
tr: A table row containing data from a financial statement
Returns:
2-tuple where the 1st element is the position's name and the second is a 6-tuple, of which the first element is
the indent and the rest are the actual figures.
"""
item_name = str(tr.td.div.string).strip()
data_div = tr.find_all('td')[-1].div.div
values_str: str = data_div.attrs['data-chart-data']
values = tuple(float(s if s != '' else 0) for s in values_str.split(','))
return item_name, (get_row_indent(tr), ) + values
def extract_all_data(soup: BeautifulSoup) -> ResultDict:
"""
Extracts financials from the page, which can contain either a balance sheet, income statement or cash flow
statement.
Args:
soup: The parsed page containing a financial statement
Returns:
Custom result dictionary (see `ResultDict`)
"""
output = {END_DATE: extract_end_dates(soup)}
for row in get_all_table_rows(soup):
row_data = extract_row_data(row)
output[row_data[0]] = row_data[1]
return output
@in_async_session
async def get_single_company_fin_stmt(statement: str, ticker_symbol: str, quarterly: bool = False,
session: ClientSession = None) -> ResultDict:
"""
Returns data from the specified financial statement of the specified company.
Args:
statement:
Must be one of the strings defined in the constants `BS`, `IS`, `CF`
ticker_symbol:
The company's stock ticker symbol
quarterly (optional):
If true the financial data of the last five quarters is scraped; otherwise (default) the last five years.
session (optional):
See `soup_from_url`
Returns:
Custom result dictionary (see `ResultDict`)
"""
log.info(f"Scraping {statement} for {ticker_symbol}")
url = f'{BASE_URL}/{ticker_symbol}/financials{FIN_STMT_URL_SUFFIX[statement]}'
if quarterly:
url += '/quarter'
soup = await soup_from_url(url, session)
return extract_all_data(soup)
@in_async_session
async def get_multi_companies_fin_stmt(statement: str, *ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
"""
Returns data from the specified financial statement of the specified companies.
Args:
statement:
See `get_single_company_fin_stmt`
ticker_symbols:
Arbitrary number of companies' stock ticker symbols
quarterly (optional):
See `get_single_company_fin_stmt`
concurrent_batch_size (optional):
If multiple ticker symbols are passed, the company financials can be scraped concurrently.
This argument determines how many companies are scraped concurrently.
By default, they are scraped sequentially (i.e. a batch size of 1).
session (optional):
See `get_single_company_fin_stmt`
Returns:
If only one ticker symbol is passed, the `ResultDict` for that financial statement is returned. If multiple
symbols are passed, a dictionary is returned, where the keys are the symbols and the values are the
corresponding `ResultDict`s.
"""
if len(ticker_symbols) == 1:
return await get_single_company_fin_stmt(statement, ticker_symbols[0], quarterly, session)
coroutines = (get_single_company_fin_stmt(statement, symbol, quarterly, session) for symbol in ticker_symbols)
result_list = await gather_in_batches(concurrent_batch_size, *coroutines)
return {symbol: data for symbol, data in zip(ticker_symbols, result_list)}
@in_async_session
async def get_balance_sheet(*ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
"""
Returns data from the balance sheet of the specified companies.
Convenience function around `get_multi_companies_fin_stmt`
"""
return await get_multi_companies_fin_stmt(BS, *ticker_symbols,
quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
session=session)
@in_async_session
async def get_income_statement(*ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
"""
Returns data from the income statement of the specified companies.
Convenience function around `get_multi_companies_fin_stmt`
"""
return await get_multi_companies_fin_stmt(IS, *ticker_symbols,
quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
session=session)
@in_async_session
async def get_cash_flow_statement(*ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
"""
Returns data from the cash flow statement of the specified companies.
Convenience function around `get_multi_companies_fin_stmt`
"""
return await get_multi_companies_fin_stmt(CF, *ticker_symbols,
quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
session=session)
@in_async_session
async def get_single_company_all_financials(ticker_symbol: str, quarterly: bool = False,
session: ClientSession = None) -> Dict[str, ResultDict]:
"""
Returns data from all financial statements of the specified company.
Concurrently calls `get_single_company_fin_stmt` three times.
Args:
ticker_symbol:
The company's stock ticker symbol
quarterly (optional):
See `get_single_company_fin_stmt`
session (optional):
See `get_single_company_fin_stmt`
Returns:
A dictionary where the keys are the three different statement names and the values are the
corresponding `ResultDict`s
"""
coroutines = (get_single_company_fin_stmt(stmt, ticker_symbol, quarterly, session) for stmt in (BS, IS, CF))
results = await asyncio.gather(*coroutines)
return {stmt: data for stmt, data in zip((BS, IS, CF), results)}
@in_async_session
async def get_all_financials(*ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[Dict[str, ResultDict],
Dict[str, Dict[str, ResultDict]]]:
"""
Returns all fundamentals (balance sheet, income statement and cash flow statement) of the specified companies.
Args:
ticker_symbols:
Arbitrary number of companies' stock ticker symbols
quarterly (optional):
See `get_single_company_all_financials`
concurrent_batch_size (optional):
If multiple ticker symbols are passed, the company financials can be scraped concurrently.
This argument determines how many companies are scraped concurrently.
By default, they are scraped sequentially (i.e. a batch size of 1).
session (optional):
See `get_single_company_all_financials`
Returns:
If only one ticker symbol is passed, the output of `get_single_company_all_financials` is returned. If multiple
symbols are passed, a dictionary is returned, where the keys are the symbols and the values are the
corresponding outputs of `get_single_company_all_financials`.
"""
if len(ticker_symbols) == 1:
return await get_single_company_all_financials(ticker_symbols[0], quarterly, session)
coroutines = (get_single_company_all_financials(symbol, quarterly, session) for symbol in ticker_symbols)
result_list = await gather_in_batches(concurrent_batch_size, *coroutines)
return {symbol: data for symbol, data in zip(ticker_symbols, result_list)}

0
tests/__init__.py Normal file
View File

243
tests/test_functions.py Normal file
View File

@ -0,0 +1,243 @@
import logging
from pathlib import Path
from unittest import IsolatedAsyncioTestCase
from unittest.mock import patch, MagicMock, AsyncMock, call
from bs4 import BeautifulSoup
from mwfin import functions
from mwfin.constants import HTML_PARSER, BASE_URL, FIN_STMT_URL_SUFFIX, IS, BS, CF, END_DATE
THIS_DIR = Path(__file__).parent
class FunctionsTestCase(IsolatedAsyncioTestCase):
# boiled down & accurate structure of a relevant data table
# https://www.marketwatch.com/investing/stock/aapl/financials/cash-flow
# view page source @ line 2055
TEST_HTML_FILE_PATH = Path(THIS_DIR, 'test_structure.html')
log_lvl: int
test_html: str
@staticmethod
def get_mock_session(response_text: str = None) -> MagicMock:
mock_response = MagicMock()
mock_response.text = AsyncMock(return_value=response_text)
mock_get_return = MagicMock()
mock_get_return.__aenter__ = AsyncMock(return_value=mock_response)
mock_session_obj = MagicMock()
mock_session_obj.get = MagicMock(return_value=mock_get_return)
return mock_session_obj
@classmethod
def setUpClass(cls) -> None:
with open(cls.TEST_HTML_FILE_PATH, 'r') as f:
cls.test_html = f.read()
cls.test_soup = BeautifulSoup(cls.test_html, HTML_PARSER)
cls.log_lvl = functions.log.level
functions.log.setLevel(logging.CRITICAL)
@classmethod
def tearDownClass(cls) -> None:
functions.log.setLevel(cls.log_lvl)
@patch.object(functions, 'ClientSession')
async def test_soup_from_url(self, mock_session_cls):
test_html = '<b>foo</b>'
mock_session_cls.return_value = mock_session_obj = self.get_mock_session(test_html)
expected_output = BeautifulSoup(test_html, 'html.parser')
output = await functions.soup_from_url('baz', mock_session_obj)
self.assertEqual(expected_output, output)
def test_get_row_indent(self):
mock_row = BeautifulSoup('<tr><div>foo</div></tr>', HTML_PARSER).tr
expected_output = 0
output = functions.get_row_indent(mock_row)
self.assertEqual(expected_output, output)
trs = self.test_soup.find_all('tr')
output = functions.get_row_indent(trs[0])
self.assertEqual(expected_output, output)
for i, tr in enumerate(trs[1:], start=1):
output = functions.get_row_indent(tr)
self.assertEqual(i, output)
@patch.object(functions, 'get_row_indent')
def test_extract_end_dates(self, mock_get_row_indent):
mock_get_row_indent.return_value = 0
expected_output = (0, 'End_Date_1', 'End_Date_2')
output = functions.extract_end_dates(self.test_soup)
self.assertTupleEqual(expected_output, output)
mock_get_row_indent.assert_called_once_with(self.test_soup.tr)
def test_get_all_table_rows(self):
expected_output = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr')
output = functions.get_all_table_rows(self.test_soup)
self.assertSequenceEqual(expected_output, output)
@patch.object(functions, 'get_row_indent')
def test_extract_row_data(self, mock_get_row_indent):
mock_get_row_indent.return_value = 1
test_row = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.tr
expected_output = ('foo', (1, 1., -2.))
output = functions.extract_row_data(test_row)
self.assertTupleEqual(expected_output, output)
mock_get_row_indent.assert_called_once_with(test_row)
@patch.object(functions, 'extract_row_data')
@patch.object(functions, 'get_all_table_rows')
@patch.object(functions, 'extract_end_dates')
def test_extract_all_data(self, mock_extract_end_dates, mock_get_all_table_rows, mock_extract_row_data):
test_end_dates = ('foo', 'bar')
mock_extract_end_dates.return_value = test_end_dates
test_relevant_rows = ['tr1', 'tr2']
mock_get_all_table_rows.return_value = test_relevant_rows
test_row_data = ('item_name', (123, 456))
mock_extract_row_data.return_value = test_row_data
expected_output = {
END_DATE: test_end_dates,
test_row_data[0]: test_row_data[1],
test_row_data[0]: test_row_data[1],
}
output = functions.extract_all_data(self.test_soup)
self.assertDictEqual(expected_output, output)
mock_extract_end_dates.assert_called_once_with(self.test_soup)
mock_get_all_table_rows.assert_called_once_with(self.test_soup)
mock_extract_row_data.assert_has_calls([call(test_relevant_rows[0]), call(test_relevant_rows[1])])
@patch.object(functions, 'extract_all_data')
@patch.object(functions, 'soup_from_url')
async def test_get_single_company_fin_stmt(self, mock_soup_from_url, mock_extract_all_data):
mock_session = MagicMock()
test_ticker, statement = 'bar', BS
test_url = f'{BASE_URL}/{test_ticker}/financials{FIN_STMT_URL_SUFFIX[statement]}'
mock_soup_from_url.return_value = mock_soup = MagicMock()
mock_extract_all_data.return_value = expected_output = {'foo': 'bar'}
quarterly = False
output = await functions.get_single_company_fin_stmt(statement, test_ticker, quarterly, mock_session)
self.assertDictEqual(expected_output, output)
mock_soup_from_url.assert_called_once_with(test_url, mock_session)
mock_extract_all_data.assert_called_once_with(mock_soup)
mock_soup_from_url.reset_mock()
mock_extract_all_data.reset_mock()
quarterly = True
output = await functions.get_single_company_fin_stmt(statement, test_ticker, quarterly, mock_session)
self.assertDictEqual(expected_output, output)
mock_soup_from_url.assert_called_once_with(test_url + '/quarter', mock_session)
mock_extract_all_data.assert_called_once_with(mock_soup)
@patch.object(functions, 'get_single_company_fin_stmt')
async def test_get_multi_companies_fin_stmt(self, mock_get_single_company_fin_stmt):
statement, sym1, sym2, quarterly, mock_session = 'xyz', 'foo', 'bar', False, MagicMock()
mock_get_single_company_fin_stmt.return_value = expected_output = 'baz'
output = await functions.get_multi_companies_fin_stmt(statement, sym1,
quarterly=quarterly, session=mock_session)
self.assertEqual(expected_output, output)
mock_get_single_company_fin_stmt.assert_called_once_with(statement, sym1, quarterly, mock_session)
mock_get_single_company_fin_stmt.reset_mock()
expected_output = {sym1: expected_output, sym2: expected_output}
output = await functions.get_multi_companies_fin_stmt(statement, sym1, sym2,
quarterly=quarterly, session=mock_session)
self.assertDictEqual(expected_output, output)
mock_get_single_company_fin_stmt.assert_has_calls([
call(statement, sym1, quarterly, mock_session),
call(statement, sym2, quarterly, mock_session)
])
async def _helper_test_get_any_statement(self, stmt: str, mock_get_multi_companies_fin_stmt):
sym1, sym2, quarterly, batch_size, mock_session = 'foo', 'bar', False, 2, MagicMock()
mock_get_multi_companies_fin_stmt.return_value = expected_output = 'baz'
if stmt == BS:
function = functions.get_balance_sheet
elif stmt == IS:
function = functions.get_income_statement
elif stmt == CF:
function = functions.get_cash_flow_statement
else:
raise ValueError
output = await function(sym1, sym2, quarterly=quarterly, concurrent_batch_size=batch_size, session=mock_session)
self.assertEqual(expected_output, output)
mock_get_multi_companies_fin_stmt.assert_called_once_with(
stmt, sym1, sym2, quarterly=quarterly, concurrent_batch_size=batch_size, session=mock_session
)
@patch.object(functions, 'get_multi_companies_fin_stmt')
async def test_get_balance_sheet(self, mock_get_multi_companies_fin_stmt):
await self._helper_test_get_any_statement(BS, mock_get_multi_companies_fin_stmt)
@patch.object(functions, 'get_multi_companies_fin_stmt')
async def test_get_income_statement(self, mock_get_multi_companies_fin_stmt):
await self._helper_test_get_any_statement(IS, mock_get_multi_companies_fin_stmt)
@patch.object(functions, 'get_multi_companies_fin_stmt')
async def test_get_cash_flow_statement(self, mock_get_multi_companies_fin_stmt):
await self._helper_test_get_any_statement(CF, mock_get_multi_companies_fin_stmt)
@patch.object(functions, 'get_single_company_fin_stmt')
async def test_get_single_company_all_financials(self, mock_get_single_company_fin_stmt):
symbol, quarterly, mock_session = 'foo', False, MagicMock()
mock_get_single_company_fin_stmt.return_value = bar = 'bar'
expected_output = {BS: bar, IS: bar, CF: bar}
output = await functions.get_single_company_all_financials(symbol, quarterly, mock_session)
self.assertDictEqual(expected_output, output)
mock_get_single_company_fin_stmt.assert_has_calls([
call(BS, symbol, quarterly, mock_session),
call(IS, symbol, quarterly, mock_session),
call(CF, symbol, quarterly, mock_session)
])
@patch.object(functions, 'get_single_company_all_financials')
async def test_get_company_financials(self, mock_get_single_company_all_financials):
mock_get_single_company_all_financials.return_value = expected_output = 'baz'
symbol, quarterly, mock_session = 'foo', False, MagicMock()
output = await functions.get_all_financials(symbol, quarterly=quarterly, session=mock_session)
self.assertEqual(expected_output, output)
mock_get_single_company_all_financials.assert_called_once_with(symbol, quarterly, mock_session)
mock_get_single_company_all_financials.reset_mock()
test_sym1, test_sym2 = 'x', 'y'
expected_output = {test_sym1: expected_output, test_sym2: expected_output}
output = await functions.get_all_financials(test_sym1, test_sym2,
quarterly=quarterly, session=mock_session)
self.assertDictEqual(expected_output, output)
mock_get_single_company_all_financials.assert_has_calls([
call(test_sym1, quarterly, mock_session),
call(test_sym2, quarterly, mock_session)
])
@patch.object(functions, 'ClientSession')
async def test_integration_get_company_financials(self, mock_session_cls):
mock_session_cls.return_value = mock_session_obj = self.get_mock_session(self.test_html)
symbol = 'foo'
# Since the web request is mocked we always receive the same HTML markup.
expected_output = {
BS: {
END_DATE: (0, 'End_Date_1', 'End_Date_2'),
'foo': (1, 1., -2.),
'bar': (2, 2., -3.),
'baz': (3, 3., -4.)
},
IS: {
END_DATE: (0, 'End_Date_1', 'End_Date_2'),
'foo': (1, 1., -2.),
'bar': (2, 2., -3.),
'baz': (3, 3., -4.)
},
CF: {
END_DATE: (0, 'End_Date_1', 'End_Date_2'),
'foo': (1, 1., -2.),
'bar': (2, 2., -3.),
'baz': (3, 3., -4.)
}
}
output = await functions.get_all_financials(symbol, session=mock_session_obj)
self.assertDictEqual(expected_output, output)
mock_session_obj.get.assert_has_calls([
call(f'{BASE_URL}/{symbol}/financials{FIN_STMT_URL_SUFFIX[BS]}'),
call(f'{BASE_URL}/{symbol}/financials{FIN_STMT_URL_SUFFIX[IS]}'),
call(f'{BASE_URL}/{symbol}/financials{FIN_STMT_URL_SUFFIX[CF]}'),
])

87
tests/test_main.py Normal file
View File

@ -0,0 +1,87 @@
import logging
import json
from unittest import IsolatedAsyncioTestCase
from unittest.mock import patch
from argparse import Namespace
from io import StringIO
from mwfin import __main__ as main_module
class MainModuleTestCase(IsolatedAsyncioTestCase):
@patch.object(main_module.ArgumentParser, 'parse_args')
def test_parse_cli(self, mock_parse_args):
mock_parse_args.return_value = mock_args = Namespace(foo='a', bar='b')
expected_output = vars(mock_args)
output = main_module.parse_cli()
self.assertDictEqual(expected_output, output)
def test_configure_logging(self):
root_logger = logging.getLogger()
root_logger.handlers = []
main_module.configure_logging(verbosity=0)
self.assertEqual(1, len(root_logger.handlers))
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
self.assertEqual(logging.CRITICAL, root_logger.level)
root_logger.handlers = []
main_module.configure_logging(verbosity=1)
self.assertEqual(1, len(root_logger.handlers))
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
self.assertEqual(logging.WARNING, root_logger.level)
root_logger.handlers = []
main_module.configure_logging(verbosity=2)
self.assertEqual(1, len(root_logger.handlers))
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
self.assertEqual(logging.INFO, root_logger.level)
root_logger.handlers = []
main_module.configure_logging(verbosity=3)
self.assertEqual(1, len(root_logger.handlers))
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
self.assertEqual(logging.DEBUG, root_logger.level)
root_logger.handlers = []
main_module.configure_logging(verbosity=9999)
self.assertEqual(1, len(root_logger.handlers))
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
self.assertEqual(logging.DEBUG, root_logger.level)
@patch.object(main_module, 'get_all_financials')
@patch.object(main_module, 'configure_logging')
@patch.object(main_module, 'parse_cli')
async def test_main(self, mock_parse_cli, mock_configure_logging, mock_get_all_financials):
mock_parse_cli.return_value = args = {
main_module.VERBOSE: 'foo',
main_module.TICKER_SYMBOL: ['bar', 'baz'],
main_module.QUARTERLY: 'perhaps',
main_module.BATCH_SIZE: 'xyz',
main_module.TO_FILE: None,
main_module.JSON_INDENT: 42,
}
mock_get_all_financials.return_value = mock_data = {'data': 'something cool'}
# To stdout:
with patch.object(main_module, 'print') as mock_print:
await main_module.main()
mock_parse_cli.assert_called_once_with()
mock_configure_logging.assert_called_once_with(args[main_module.VERBOSE])
mock_get_all_financials.assert_awaited_once_with(*args[main_module.TICKER_SYMBOL],
quarterly=args[main_module.QUARTERLY],
concurrent_batch_size=args[main_module.BATCH_SIZE])
mock_print.assert_called_once_with(json.dumps(mock_data, indent=args[main_module.JSON_INDENT]))
mock_parse_cli.reset_mock()
mock_configure_logging.reset_mock()
mock_get_all_financials.reset_mock()
# To file:
args[main_module.TO_FILE] = 'some_file'
with patch.object(main_module, 'open') as mock_open:
mock_open.return_value.__enter__.return_value = mock_file = StringIO()
await main_module.main()
mock_parse_cli.assert_called_once_with()
mock_configure_logging.assert_called_once_with(args[main_module.VERBOSE])
mock_get_all_financials.assert_awaited_once_with(*args[main_module.TICKER_SYMBOL],
quarterly=args[main_module.QUARTERLY],
concurrent_batch_size=args[main_module.BATCH_SIZE])
expected_contents = json.dumps(mock_data, indent=args[main_module.JSON_INDENT])
mock_file.seek(0)
self.assertEqual(expected_contents, mock_file.read())

49
tests/test_structure.html Normal file
View File

@ -0,0 +1,49 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<div class="financials">
<header>
<h2><span>Foo table</span></h2>
<small> All values USD.</small>
</header>
<div>
<div>
<table>
<thead>
<tr>
<th><div class="xyz abc"> !!Item </div><div> !!Item </div></th>
<th><div> End_Date_1 </div></th>
<th><div> End_Date_2 </div></th>
<th></th>
</tr>
</thead>
<tbody>
<tr>
<td><div class="xyz indent--small"> foo </div><div> foo </div></td>
<td></td>
<td></td>
<td><div> <div data-chart-data="1.0,-2.0"><div></div></td>
</tr>
<tr>
<td><div class="xyz indent--medium"> bar </div><div> bar </div></td>
<td></td>
<td></td>
<td><div> <div data-chart-data="2.0,-3.0"><div></div></td>
</tr>
<tr>
<td><div class="xyz indent--large"> baz </div><div> baz </div></td>
<td></td>
<td></td>
<td><div> <div data-chart-data="3.0,-4.0"><div></div></td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
</body>
</html>