Compare commits

..

46 Commits

Author SHA1 Message Date
911fa15f46 extensive docstrings; made all functions public (not protected); minor refactoring 2021-12-28 23:59:54 +01:00
f29f22a52e 100% coverage 2021-12-26 23:32:15 +01:00
a2f3f590f6 test for __main__ 2021-12-26 23:22:12 +01:00
3276dbfae3 ditched csv; minor help text change 2021-12-26 21:08:36 +01:00
2b3fb9f7ba implemented 2021-12-26 19:04:23 +01:00
8afab2fde8 planned function to get row indent 2021-12-26 18:40:29 +01:00
f7cb5de928 plan to encode row indentation as int & include it as the 1st element in the data tuple 2021-12-26 18:08:25 +01:00
c93fb9692e minor refactoring & adjustments 2021-12-26 17:55:34 +01:00
38dd29f35b all table rows to be considered relevant 2021-12-26 17:34:59 +01:00
462e34013e test now passing 2021-12-03 17:51:10 +01:00
4fd69602f7 tests for writing html to file when encountering unknown items in DEV_MODE 2021-12-03 16:45:40 +01:00
94568c6165 implementation to pass tests 2021-12-03 16:28:12 +01:00
d46148a510 is_relevant_table_row should raise a custom exception for unknown items, find_relevant_table_rows should handle the rest 2021-12-03 16:20:04 +01:00
3959aff10e added log statement 2021-12-03 16:01:00 +01:00
c18ab65a7d cli now supports multiple ticker symbols and concurrency 2021-12-03 15:53:53 +01:00
55a56c2f9c concurrency for getting all three financial statements; improved imports; renamed functions 2021-12-03 15:38:21 +01:00
8d18e03018 implemented new function and adjusted function call sequence 2021-12-03 15:19:21 +01:00
a7dbdcf917 adjusted tests to call the new function 2021-12-03 14:32:52 +01:00
ca360413c3 test for new function to get any statement from many companies; renamed functions 2021-12-03 14:17:49 +01:00
1f8104b1a0 refactoring 2021-12-03 11:56:09 +01:00
68f3e428ab plan to refactor get_company_financials; tests changed accordingly 2021-12-03 11:46:39 +01:00
a6b3c57021 fixed and added things to README 2021-11-30 12:53:11 +01:00
c23c963cd8 added async-session decorator from own project; changed logging setup; added CLI parameters 2021-11-30 12:46:48 +01:00
36226fc1be fixed and refactored cli 2021-11-28 17:19:12 +01:00
9186d8734f fixed type hints 2021-11-28 17:10:00 +01:00
e522897991 implemented getting specific financial statement for multiple companies; fixed tests 2021-11-28 17:07:23 +01:00
1529df252c allow getting specific financial statement for multiple companies 2021-11-28 16:58:21 +01:00
3c7afcd879 factored out some tests 2021-11-28 16:52:16 +01:00
4a68d45bc4 implemented last idea 2021-11-28 16:42:21 +01:00
e7f3730c64 idea for getting financials for an arbitrary number of companies 2021-11-28 16:39:53 +01:00
fd03815172 improved writing to file 2021-11-28 16:24:14 +01:00
6dd37a4aa2 cli for writing to json/csv 2021-11-28 15:53:19 +01:00
2d9119a4be company financials returned as separate dicts; test fix 2021-11-28 15:20:28 +01:00
326b956be4 added small integration test 2021-11-27 18:35:12 +01:00
63008be829 bugfix, minor change, full coverage 2021-11-27 18:11:16 +01:00
2380540659 drafted remaining functions 2021-11-27 17:23:36 +01:00
f9745ff46d two more functions implemented 2021-11-27 15:36:51 +01:00
f2abd8f2ce fixes; another function implemented 2021-11-27 15:11:42 +01:00
25be101cf9 function drafts 2021-11-26 23:37:46 +01:00
44dee0b762 finished tests 2021-11-26 21:38:10 +01:00
776c909956 more tests, idea for change 2021-11-26 12:47:58 +01:00
1e0410a254 function for checking tr relevance 2021-11-24 22:28:33 +01:00
9adad5dba1 financial statements items 2021-11-24 21:01:16 +01:00
8041386d52 more unit tests 2021-11-24 15:28:34 +01:00
d504ddfc33 reading test html file in test class set up method 2021-11-23 19:57:37 +01:00
6ac38ad06d updated/condensed html 2021-11-23 19:42:20 +01:00
11 changed files with 708 additions and 98 deletions

View File

@ -8,3 +8,5 @@ omit =
fail_under = 100
show_missing = True
skip_covered = True
exclude_lines =
if __name__ == .__main__.:

View File

@ -8,15 +8,23 @@ Asynchronous HTTP requests are currently used by default.
- [Python](https://www.python.org/) 3.8+
Should run on most Linux/Windows systems. Tested on Arch.
Should run on most Linux/Windows systems. Tested on Arch and Windows 10.
### Building
Clone this repo, install `build` via pip, then run `pip -m build`
Clone this repo, install `build` via pip, then run `python -m build`
from the repository's root directory. This should produce a `dist/`
subdirectory with a wheel (build) and archive (source) distribution.
The resulting `whl`-file can be installed via `pip install path/dist/***.whl`.
### Running
...
```shell
$ python -m mwfin -h
```
gives a description of available CLI parameters and their usage. For example, running
```shell
$ python -m mwfin -f output.json -Q AAPL
```
will retrieve the most current available quarterly data from all three financial statements Apple Inc. has published and save it in JSON format to `output.json`.

View File

@ -1,2 +1,3 @@
beautifulsoup4
aiohttp
aiohttp
webutils-df

View File

@ -23,6 +23,7 @@ python_requires = >=3.8
install_requires =
beautifulsoup4
aiohttp
webutils-df
[options.extras_require]
tests =

View File

@ -0,0 +1 @@
from .functions import get_balance_sheet, get_income_statement, get_cash_flow_statement, get_all_financials

View File

@ -0,0 +1,95 @@
import logging
import asyncio
import json
from argparse import ArgumentParser
from pathlib import Path
from .functions import get_all_financials
from .constants import MAIN_LOGGER_NAME, DEFAULT_CONCURRENT_BATCH_SIZE
log = logging.getLogger(MAIN_LOGGER_NAME)
TICKER_SYMBOL = 'ticker_symbol'
QUARTERLY = 'quarterly'
BATCH_SIZE = 'concurrent_batch_size'
TO_FILE = 'to_file'
JSON_INDENT = 'json_indent'
VERBOSE = 'verbose'
def parse_cli() -> dict:
"""Returns the parsed command line arguments for the program as a dictionary."""
parser = ArgumentParser(description="Scrape company financials")
parser.add_argument(
TICKER_SYMBOL,
type=str,
nargs='+',
help="Stock ticker symbol of the company to be scraped the financials of"
)
parser.add_argument(
'-Q', f'--{QUARTERLY}',
action='store_true',
help="If set, the financial data for the last quarters is returned; otherwise yearly data is returned."
)
parser.add_argument(
'-b', f'--{BATCH_SIZE.replace("_", "-")}',
type=int,
default=DEFAULT_CONCURRENT_BATCH_SIZE,
help="If multiple ticker symbols are passed, the company financials can be scraped concurrently. "
"This argument determines how many companies are scraped concurrently. "
"By default, they are scraped sequentially (i.e. a batch size of 1)."
)
parser.add_argument(
'-f', f'--{TO_FILE.replace("_", "-")}',
type=Path,
help="Writes results to the specified destination file. If omitted results are printed to stdout."
)
parser.add_argument(
f'--{JSON_INDENT.replace("_", "-")}',
type=int,
help="If set to a positive integer and the output format is JSON (default), the resulting JSON document is "
"indented accordingly for more readability; if omitted, output is returned in one line."
)
parser.add_argument(
'-v', f'--{VERBOSE}',
action='count',
default=0,
help="Verbose mode. Reduces the log level and thus prints out more status messages while running. "
"Using this flag multiple times increases verbosity further."
)
return vars(parser.parse_args())
def configure_logging(verbosity: int) -> None:
"""
Sets up logging by adding a stdout-handler and setting the root logger's level according to the specified verbosity.
A verbosity of 0 (or less) sets the log level to CRITICAL.
"""
root_logger = logging.getLogger()
root_logger.addHandler(logging.StreamHandler())
root_logger.setLevel(logging.CRITICAL)
if verbosity > 2:
root_logger.setLevel(logging.DEBUG)
elif verbosity == 2:
root_logger.setLevel(logging.INFO)
elif verbosity == 1:
root_logger.setLevel(logging.WARNING)
async def main() -> None:
"""Parses CLI arguments, configures logging to stderr, performs the scraping, and prints/saves the data."""
args = parse_cli()
configure_logging(args[VERBOSE])
data = await get_all_financials(*args[TICKER_SYMBOL], quarterly=args[QUARTERLY],
concurrent_batch_size=args[BATCH_SIZE])
path: Path = args[TO_FILE]
if path is None:
print(json.dumps(data, indent=args[JSON_INDENT]))
return
with open(path, 'w') as f:
json.dump(data, f, indent=args[JSON_INDENT])
if __name__ == '__main__':
asyncio.run(main())

19
src/mwfin/constants.py Normal file
View File

@ -0,0 +1,19 @@
MAIN_LOGGER_NAME = 'mwfin'
HTML_PARSER = 'html.parser'
DOMAIN = 'www.marketwatch.com'
BASE_URL = f'https://{DOMAIN}/investing/stock'
DEFAULT_CONCURRENT_BATCH_SIZE = 1
BS, IS, CF = 'Balance Sheet', 'Income Statement', 'Cash Flow Statement'
FIN_STMT_URL_SUFFIX = {
BS: '/balance-sheet',
IS: '',
CF: '/cash-flow'
}
INDENT_MAP = {
'indent--small': 1,
'indent--medium': 2,
'indent--large': 3,
}
END_DATE = 'End Date'

View File

@ -1,88 +1,285 @@
from typing import Union
import logging
import asyncio
from typing import Union, Tuple, Dict
from aiohttp.client import ClientSession
from bs4 import BeautifulSoup
from bs4.element import ResultSet, Tag
from bs4.element import Tag, ResultSet
from webutils import in_async_session, gather_in_batches
from .constants import (HTML_PARSER, BASE_URL, END_DATE, BS, IS, CF, FIN_STMT_URL_SUFFIX, DEFAULT_CONCURRENT_BATCH_SIZE,
INDENT_MAP)
# The resulting dictionary's keys correspond to the name of the item (row) in the financial statement,
# while its values will always be tuples with a length corresponding to the number of periods (columns)
# and elements being the actual numbers, with the exception of the first key-value-pair, which will represent
# the end dates of the reporting periods as strings (either years or quarters).
ResultDict = dict[str, Union[tuple[int], tuple[str]]]
log = logging.getLogger(__name__)
# First element in each Tuple is an integer indicating the row indent
HeaderData = Tuple[int, str, str, str, str, str]
RowData = Tuple[int, float, float, float, float, float]
# The resulting dictionary's keys correspond to the name of the item (row) in the financial statement.
# The first value is a tuple of the end dates of the reporting periods as strings (see above).
# The other values are the actual data tuples containing the financial figures.
ResultDict = dict[str, Union[HeaderData, RowData]]
@in_async_session
async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup:
"""
Requests a web page and turns the response text into BeautifulSoup.
Args:
url:
The GET request is sent to this URL
session (optional):
If passed an `aiohttp.ClientSession` object, it will be used to perform the request.
Otherwise a new session is created and automatically closed after the request (see `@in_async_session`).
Returns:
The parsed html response text as BeautifulSoup
"""
pass
async with session.get(url) as response:
html = await response.text()
return BeautifulSoup(html, HTML_PARSER)
def extract_end_dates(soup: BeautifulSoup) -> tuple[str]:
def get_row_indent(tr: Tag) -> int:
"""
Determines the visual indent of a table row.
Some positions in a financial statement have sub-positions below them indicated by indentation of the text in the
position name's cell.
Args:
tr: The table row element
Returns:
Each indentation level corresponds to an integer. 0 = no indentation, 1 = small, 2 = medium, 3 = large
"""
try:
classes = tr.div.attrs['class']
except KeyError:
return 0
for class_name, indent in INDENT_MAP.items():
if class_name in classes:
return indent
return 0
def extract_end_dates(soup: BeautifulSoup) -> HeaderData:
"""
Finds and returns the end dates of the reporting periods as strings (either years or quarters) from the page of a
financial statement.
Args:
soup: The parsed page containing the financial statement
Returns:
A 6-tuple, the first element being the indent (in this case 0) and the rest being the actual end dates.
"""
pass
tr = soup.find('div', attrs={'class': 'financials'}).thead.tr
ths = tr.find_all('th')
return (0, ) + tuple(str(th.string).strip() for th in ths[1:-1])
def find_relevant_table_rows(soup: BeautifulSoup) -> ResultSet:
def get_all_table_rows(soup: BeautifulSoup) -> ResultSet:
"""
Returns the table rows containing the data of interest.
Args:
soup: The parsed page containing the financial statement
Returns:
All table rows containing data from the financial statement
"""
pass
return soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr')
def convert_number(num_str: str) -> int:
"""
Takes a string like e.g. "420.69M" and returns 42069000000.
"""
pass
def extract_row_data(tr: Tag) -> tuple[str, tuple[int]]:
def extract_row_data(tr: Tag) -> Tuple[str, RowData]:
"""
Returns the name of the item displayed in the table row (of a financial statement)
as well as a number for each reporting period.
as well as the position's indent and a figure for each reporting period.
Args:
tr: A table row containing data from a financial statement
Returns:
2-tuple where the 1st element is the position's name and the second is a 6-tuple, of which the first element is
the indent and the rest are the actual figures.
"""
pass
item_name = str(tr.td.div.string).strip()
data_div = tr.find_all('td')[-1].div.div
values_str: str = data_div.attrs['data-chart-data']
values = tuple(float(s if s != '' else 0) for s in values_str.split(','))
return item_name, (get_row_indent(tr), ) + values
def extract_all_data(soup: BeautifulSoup) -> ResultDict:
"""
Extracts financials from the page.
Extracts financials from the page, which can contain either a balance sheet, income statement or cash flow
statement.
Args:
soup: The parsed page containing a financial statement
Returns:
Custom result dictionary (see `ResultDict`)
"""
pass
output = {END_DATE: extract_end_dates(soup)}
for row in get_all_table_rows(soup):
row_data = extract_row_data(row)
output[row_data[0]] = row_data[1]
return output
async def get_balance_sheet(ticker_symbol: str, yearly: bool = True, quarterly: bool = True,
session: ClientSession = None) -> ResultDict:
@in_async_session
async def get_single_company_fin_stmt(statement: str, ticker_symbol: str, quarterly: bool = False,
session: ClientSession = None) -> ResultDict:
"""
Returns data from the balance sheet of the specified company.
Returns data from the specified financial statement of the specified company.
Args:
statement:
Must be one of the strings defined in the constants `BS`, `IS`, `CF`
ticker_symbol:
The company's stock ticker symbol
quarterly (optional):
If true the financial data of the last five quarters is scraped; otherwise (default) the last five years.
session (optional):
See `soup_from_url`
Returns:
Custom result dictionary (see `ResultDict`)
"""
pass
log.info(f"Scraping {statement} for {ticker_symbol}")
url = f'{BASE_URL}/{ticker_symbol}/financials{FIN_STMT_URL_SUFFIX[statement]}'
if quarterly:
url += '/quarter'
soup = await soup_from_url(url, session)
return extract_all_data(soup)
async def get_income_statement(ticker_symbol: str, yearly: bool = True, quarterly: bool = True,
session: ClientSession = None) -> ResultDict:
@in_async_session
async def get_multi_companies_fin_stmt(statement: str, *ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
"""
Returns data from the income statement of the specified company.
Returns data from the specified financial statement of the specified companies.
Args:
statement:
See `get_single_company_fin_stmt`
ticker_symbols:
Arbitrary number of companies' stock ticker symbols
quarterly (optional):
See `get_single_company_fin_stmt`
concurrent_batch_size (optional):
If multiple ticker symbols are passed, the company financials can be scraped concurrently.
This argument determines how many companies are scraped concurrently.
By default, they are scraped sequentially (i.e. a batch size of 1).
session (optional):
See `get_single_company_fin_stmt`
Returns:
If only one ticker symbol is passed, the `ResultDict` for that financial statement is returned. If multiple
symbols are passed, a dictionary is returned, where the keys are the symbols and the values are the
corresponding `ResultDict`s.
"""
pass
if len(ticker_symbols) == 1:
return await get_single_company_fin_stmt(statement, ticker_symbols[0], quarterly, session)
coroutines = (get_single_company_fin_stmt(statement, symbol, quarterly, session) for symbol in ticker_symbols)
result_list = await gather_in_batches(concurrent_batch_size, *coroutines)
return {symbol: data for symbol, data in zip(ticker_symbols, result_list)}
async def get_cash_flow_statement(ticker_symbol: str, yearly: bool = True, quarterly: bool = True,
session: ClientSession = None) -> ResultDict:
@in_async_session
async def get_balance_sheet(*ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
"""
Returns data from the cash flow statement of the specified company.
Returns data from the balance sheet of the specified companies.
Convenience function around `get_multi_companies_fin_stmt`
"""
pass
return await get_multi_companies_fin_stmt(BS, *ticker_symbols,
quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
session=session)
async def get_company_financials(ticker_symbol: str, yearly: bool = True, quarterly: bool = True,
session: ClientSession = None) -> ResultDict:
@in_async_session
async def get_income_statement(*ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
"""
Returns all fundamentals (balance sheet, income statement and cash flow statement) of the specified company.
Returns data from the income statement of the specified companies.
Convenience function around `get_multi_companies_fin_stmt`
"""
pass
return await get_multi_companies_fin_stmt(IS, *ticker_symbols,
quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
session=session)
@in_async_session
async def get_cash_flow_statement(*ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[ResultDict, Dict[str, ResultDict]]:
"""
Returns data from the cash flow statement of the specified companies.
Convenience function around `get_multi_companies_fin_stmt`
"""
return await get_multi_companies_fin_stmt(CF, *ticker_symbols,
quarterly=quarterly, concurrent_batch_size=concurrent_batch_size,
session=session)
@in_async_session
async def get_single_company_all_financials(ticker_symbol: str, quarterly: bool = False,
session: ClientSession = None) -> Dict[str, ResultDict]:
"""
Returns data from all financial statements of the specified company.
Concurrently calls `get_single_company_fin_stmt` three times.
Args:
ticker_symbol:
The company's stock ticker symbol
quarterly (optional):
See `get_single_company_fin_stmt`
session (optional):
See `get_single_company_fin_stmt`
Returns:
A dictionary where the keys are the three different statement names and the values are the
corresponding `ResultDict`s
"""
coroutines = (get_single_company_fin_stmt(stmt, ticker_symbol, quarterly, session) for stmt in (BS, IS, CF))
results = await asyncio.gather(*coroutines)
return {stmt: data for stmt, data in zip((BS, IS, CF), results)}
@in_async_session
async def get_all_financials(*ticker_symbols: str, quarterly: bool = False,
concurrent_batch_size: int = DEFAULT_CONCURRENT_BATCH_SIZE,
session: ClientSession = None) -> Union[Dict[str, ResultDict],
Dict[str, Dict[str, ResultDict]]]:
"""
Returns all fundamentals (balance sheet, income statement and cash flow statement) of the specified companies.
Args:
ticker_symbols:
Arbitrary number of companies' stock ticker symbols
quarterly (optional):
See `get_single_company_all_financials`
concurrent_batch_size (optional):
If multiple ticker symbols are passed, the company financials can be scraped concurrently.
This argument determines how many companies are scraped concurrently.
By default, they are scraped sequentially (i.e. a batch size of 1).
session (optional):
See `get_single_company_all_financials`
Returns:
If only one ticker symbol is passed, the output of `get_single_company_all_financials` is returned. If multiple
symbols are passed, a dictionary is returned, where the keys are the symbols and the values are the
corresponding outputs of `get_single_company_all_financials`.
"""
if len(ticker_symbols) == 1:
return await get_single_company_all_financials(ticker_symbols[0], quarterly, session)
coroutines = (get_single_company_all_financials(symbol, quarterly, session) for symbol in ticker_symbols)
result_list = await gather_in_batches(concurrent_batch_size, *coroutines)
return {symbol: data for symbol, data in zip(ticker_symbols, result_list)}

View File

@ -1,61 +1,243 @@
import logging
from pathlib import Path
from unittest import IsolatedAsyncioTestCase
from unittest.mock import patch, MagicMock, AsyncMock, call
from bs4 import BeautifulSoup
from mwfin import functions
from mwfin.constants import HTML_PARSER, BASE_URL, FIN_STMT_URL_SUFFIX, IS, BS, CF, END_DATE
# boiled down & accurate structure of a relevant data table
# https://www.marketwatch.com/investing/stock/aapl/financials/cash-flow
# view page source @ line 2055
TEST_HTML = ''
TEST_SOUP = BeautifulSoup(TEST_HTML, 'html.parser')
THIS_DIR = Path(__file__).parent
class FunctionsTestCase(IsolatedAsyncioTestCase):
# boiled down & accurate structure of a relevant data table
# https://www.marketwatch.com/investing/stock/aapl/financials/cash-flow
# view page source @ line 2055
TEST_HTML_FILE_PATH = Path(THIS_DIR, 'test_structure.html')
@patch.object(functions, 'ClientSession')
async def test_soup_from_url(self, mock_session_cls):
test_html = '<b>foo</b>'
log_lvl: int
test_html: str
@staticmethod
def get_mock_session(response_text: str = None) -> MagicMock:
mock_response = MagicMock()
mock_response.text = AsyncMock(return_value=test_html)
mock_response.text = AsyncMock(return_value=response_text)
mock_get_return = MagicMock()
mock_get_return.__aenter__ = AsyncMock(return_value=mock_response)
mock_session_obj = MagicMock()
mock_session_obj.get = MagicMock(return_value=mock_get_return)
mock_session_cls.return_value = mock_session_obj
return mock_session_obj
@classmethod
def setUpClass(cls) -> None:
with open(cls.TEST_HTML_FILE_PATH, 'r') as f:
cls.test_html = f.read()
cls.test_soup = BeautifulSoup(cls.test_html, HTML_PARSER)
cls.log_lvl = functions.log.level
functions.log.setLevel(logging.CRITICAL)
@classmethod
def tearDownClass(cls) -> None:
functions.log.setLevel(cls.log_lvl)
@patch.object(functions, 'ClientSession')
async def test_soup_from_url(self, mock_session_cls):
test_html = '<b>foo</b>'
mock_session_cls.return_value = mock_session_obj = self.get_mock_session(test_html)
expected_output = BeautifulSoup(test_html, 'html.parser')
output = await functions.soup_from_url('baz')
self.assertEqual(expected_output, output)
output = await functions.soup_from_url('baz', mock_session_obj)
self.assertEqual(expected_output, output)
def test_extract_end_dates(self):
expected_output = ('End_Date_1', 'End_Date_2')
output = functions.extract_end_dates(TEST_SOUP)
def test_get_row_indent(self):
mock_row = BeautifulSoup('<tr><div>foo</div></tr>', HTML_PARSER).tr
expected_output = 0
output = functions.get_row_indent(mock_row)
self.assertEqual(expected_output, output)
trs = self.test_soup.find_all('tr')
output = functions.get_row_indent(trs[0])
self.assertEqual(expected_output, output)
for i, tr in enumerate(trs[1:], start=1):
output = functions.get_row_indent(tr)
self.assertEqual(i, output)
@patch.object(functions, 'get_row_indent')
def test_extract_end_dates(self, mock_get_row_indent):
mock_get_row_indent.return_value = 0
expected_output = (0, 'End_Date_1', 'End_Date_2')
output = functions.extract_end_dates(self.test_soup)
self.assertTupleEqual(expected_output, output)
mock_get_row_indent.assert_called_once_with(self.test_soup.tr)
def test_find_relevant_table_rows(self):
pass
def test_get_all_table_rows(self):
expected_output = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr')
output = functions.get_all_table_rows(self.test_soup)
self.assertSequenceEqual(expected_output, output)
def test_convert_number(self):
pass
@patch.object(functions, 'get_row_indent')
def test_extract_row_data(self, mock_get_row_indent):
mock_get_row_indent.return_value = 1
test_row = self.test_soup.find('div', attrs={'class': 'financials'}).tbody.tr
expected_output = ('foo', (1, 1., -2.))
output = functions.extract_row_data(test_row)
self.assertTupleEqual(expected_output, output)
mock_get_row_indent.assert_called_once_with(test_row)
def test_extract_row_data(self):
pass
@patch.object(functions, 'extract_row_data')
@patch.object(functions, 'get_all_table_rows')
@patch.object(functions, 'extract_end_dates')
def test_extract_all_data(self, mock_extract_end_dates, mock_get_all_table_rows, mock_extract_row_data):
test_end_dates = ('foo', 'bar')
mock_extract_end_dates.return_value = test_end_dates
test_relevant_rows = ['tr1', 'tr2']
mock_get_all_table_rows.return_value = test_relevant_rows
test_row_data = ('item_name', (123, 456))
mock_extract_row_data.return_value = test_row_data
expected_output = {
END_DATE: test_end_dates,
test_row_data[0]: test_row_data[1],
test_row_data[0]: test_row_data[1],
}
output = functions.extract_all_data(self.test_soup)
self.assertDictEqual(expected_output, output)
mock_extract_end_dates.assert_called_once_with(self.test_soup)
mock_get_all_table_rows.assert_called_once_with(self.test_soup)
mock_extract_row_data.assert_has_calls([call(test_relevant_rows[0]), call(test_relevant_rows[1])])
def test_extract_all_data(self):
pass
@patch.object(functions, 'extract_all_data')
@patch.object(functions, 'soup_from_url')
async def test_get_single_company_fin_stmt(self, mock_soup_from_url, mock_extract_all_data):
mock_session = MagicMock()
test_ticker, statement = 'bar', BS
test_url = f'{BASE_URL}/{test_ticker}/financials{FIN_STMT_URL_SUFFIX[statement]}'
mock_soup_from_url.return_value = mock_soup = MagicMock()
mock_extract_all_data.return_value = expected_output = {'foo': 'bar'}
async def test_get_balance_sheet(self):
pass
quarterly = False
output = await functions.get_single_company_fin_stmt(statement, test_ticker, quarterly, mock_session)
self.assertDictEqual(expected_output, output)
mock_soup_from_url.assert_called_once_with(test_url, mock_session)
mock_extract_all_data.assert_called_once_with(mock_soup)
mock_soup_from_url.reset_mock()
mock_extract_all_data.reset_mock()
async def test_get_income_statement(self):
pass
quarterly = True
output = await functions.get_single_company_fin_stmt(statement, test_ticker, quarterly, mock_session)
self.assertDictEqual(expected_output, output)
mock_soup_from_url.assert_called_once_with(test_url + '/quarter', mock_session)
mock_extract_all_data.assert_called_once_with(mock_soup)
async def test_get_cash_flow_statement(self):
pass
@patch.object(functions, 'get_single_company_fin_stmt')
async def test_get_multi_companies_fin_stmt(self, mock_get_single_company_fin_stmt):
statement, sym1, sym2, quarterly, mock_session = 'xyz', 'foo', 'bar', False, MagicMock()
mock_get_single_company_fin_stmt.return_value = expected_output = 'baz'
output = await functions.get_multi_companies_fin_stmt(statement, sym1,
quarterly=quarterly, session=mock_session)
self.assertEqual(expected_output, output)
mock_get_single_company_fin_stmt.assert_called_once_with(statement, sym1, quarterly, mock_session)
mock_get_single_company_fin_stmt.reset_mock()
async def test_get_company_financials(self):
pass
expected_output = {sym1: expected_output, sym2: expected_output}
output = await functions.get_multi_companies_fin_stmt(statement, sym1, sym2,
quarterly=quarterly, session=mock_session)
self.assertDictEqual(expected_output, output)
mock_get_single_company_fin_stmt.assert_has_calls([
call(statement, sym1, quarterly, mock_session),
call(statement, sym2, quarterly, mock_session)
])
async def _helper_test_get_any_statement(self, stmt: str, mock_get_multi_companies_fin_stmt):
sym1, sym2, quarterly, batch_size, mock_session = 'foo', 'bar', False, 2, MagicMock()
mock_get_multi_companies_fin_stmt.return_value = expected_output = 'baz'
if stmt == BS:
function = functions.get_balance_sheet
elif stmt == IS:
function = functions.get_income_statement
elif stmt == CF:
function = functions.get_cash_flow_statement
else:
raise ValueError
output = await function(sym1, sym2, quarterly=quarterly, concurrent_batch_size=batch_size, session=mock_session)
self.assertEqual(expected_output, output)
mock_get_multi_companies_fin_stmt.assert_called_once_with(
stmt, sym1, sym2, quarterly=quarterly, concurrent_batch_size=batch_size, session=mock_session
)
@patch.object(functions, 'get_multi_companies_fin_stmt')
async def test_get_balance_sheet(self, mock_get_multi_companies_fin_stmt):
await self._helper_test_get_any_statement(BS, mock_get_multi_companies_fin_stmt)
@patch.object(functions, 'get_multi_companies_fin_stmt')
async def test_get_income_statement(self, mock_get_multi_companies_fin_stmt):
await self._helper_test_get_any_statement(IS, mock_get_multi_companies_fin_stmt)
@patch.object(functions, 'get_multi_companies_fin_stmt')
async def test_get_cash_flow_statement(self, mock_get_multi_companies_fin_stmt):
await self._helper_test_get_any_statement(CF, mock_get_multi_companies_fin_stmt)
@patch.object(functions, 'get_single_company_fin_stmt')
async def test_get_single_company_all_financials(self, mock_get_single_company_fin_stmt):
symbol, quarterly, mock_session = 'foo', False, MagicMock()
mock_get_single_company_fin_stmt.return_value = bar = 'bar'
expected_output = {BS: bar, IS: bar, CF: bar}
output = await functions.get_single_company_all_financials(symbol, quarterly, mock_session)
self.assertDictEqual(expected_output, output)
mock_get_single_company_fin_stmt.assert_has_calls([
call(BS, symbol, quarterly, mock_session),
call(IS, symbol, quarterly, mock_session),
call(CF, symbol, quarterly, mock_session)
])
@patch.object(functions, 'get_single_company_all_financials')
async def test_get_company_financials(self, mock_get_single_company_all_financials):
mock_get_single_company_all_financials.return_value = expected_output = 'baz'
symbol, quarterly, mock_session = 'foo', False, MagicMock()
output = await functions.get_all_financials(symbol, quarterly=quarterly, session=mock_session)
self.assertEqual(expected_output, output)
mock_get_single_company_all_financials.assert_called_once_with(symbol, quarterly, mock_session)
mock_get_single_company_all_financials.reset_mock()
test_sym1, test_sym2 = 'x', 'y'
expected_output = {test_sym1: expected_output, test_sym2: expected_output}
output = await functions.get_all_financials(test_sym1, test_sym2,
quarterly=quarterly, session=mock_session)
self.assertDictEqual(expected_output, output)
mock_get_single_company_all_financials.assert_has_calls([
call(test_sym1, quarterly, mock_session),
call(test_sym2, quarterly, mock_session)
])
@patch.object(functions, 'ClientSession')
async def test_integration_get_company_financials(self, mock_session_cls):
mock_session_cls.return_value = mock_session_obj = self.get_mock_session(self.test_html)
symbol = 'foo'
# Since the web request is mocked we always receive the same HTML markup.
expected_output = {
BS: {
END_DATE: (0, 'End_Date_1', 'End_Date_2'),
'foo': (1, 1., -2.),
'bar': (2, 2., -3.),
'baz': (3, 3., -4.)
},
IS: {
END_DATE: (0, 'End_Date_1', 'End_Date_2'),
'foo': (1, 1., -2.),
'bar': (2, 2., -3.),
'baz': (3, 3., -4.)
},
CF: {
END_DATE: (0, 'End_Date_1', 'End_Date_2'),
'foo': (1, 1., -2.),
'bar': (2, 2., -3.),
'baz': (3, 3., -4.)
}
}
output = await functions.get_all_financials(symbol, session=mock_session_obj)
self.assertDictEqual(expected_output, output)
mock_session_obj.get.assert_has_calls([
call(f'{BASE_URL}/{symbol}/financials{FIN_STMT_URL_SUFFIX[BS]}'),
call(f'{BASE_URL}/{symbol}/financials{FIN_STMT_URL_SUFFIX[IS]}'),
call(f'{BASE_URL}/{symbol}/financials{FIN_STMT_URL_SUFFIX[CF]}'),
])

87
tests/test_main.py Normal file
View File

@ -0,0 +1,87 @@
import logging
import json
from unittest import IsolatedAsyncioTestCase
from unittest.mock import patch
from argparse import Namespace
from io import StringIO
from mwfin import __main__ as main_module
class MainModuleTestCase(IsolatedAsyncioTestCase):
@patch.object(main_module.ArgumentParser, 'parse_args')
def test_parse_cli(self, mock_parse_args):
mock_parse_args.return_value = mock_args = Namespace(foo='a', bar='b')
expected_output = vars(mock_args)
output = main_module.parse_cli()
self.assertDictEqual(expected_output, output)
def test_configure_logging(self):
root_logger = logging.getLogger()
root_logger.handlers = []
main_module.configure_logging(verbosity=0)
self.assertEqual(1, len(root_logger.handlers))
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
self.assertEqual(logging.CRITICAL, root_logger.level)
root_logger.handlers = []
main_module.configure_logging(verbosity=1)
self.assertEqual(1, len(root_logger.handlers))
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
self.assertEqual(logging.WARNING, root_logger.level)
root_logger.handlers = []
main_module.configure_logging(verbosity=2)
self.assertEqual(1, len(root_logger.handlers))
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
self.assertEqual(logging.INFO, root_logger.level)
root_logger.handlers = []
main_module.configure_logging(verbosity=3)
self.assertEqual(1, len(root_logger.handlers))
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
self.assertEqual(logging.DEBUG, root_logger.level)
root_logger.handlers = []
main_module.configure_logging(verbosity=9999)
self.assertEqual(1, len(root_logger.handlers))
self.assertIsInstance(root_logger.handlers[0], logging.StreamHandler)
self.assertEqual(logging.DEBUG, root_logger.level)
@patch.object(main_module, 'get_all_financials')
@patch.object(main_module, 'configure_logging')
@patch.object(main_module, 'parse_cli')
async def test_main(self, mock_parse_cli, mock_configure_logging, mock_get_all_financials):
mock_parse_cli.return_value = args = {
main_module.VERBOSE: 'foo',
main_module.TICKER_SYMBOL: ['bar', 'baz'],
main_module.QUARTERLY: 'perhaps',
main_module.BATCH_SIZE: 'xyz',
main_module.TO_FILE: None,
main_module.JSON_INDENT: 42,
}
mock_get_all_financials.return_value = mock_data = {'data': 'something cool'}
# To stdout:
with patch.object(main_module, 'print') as mock_print:
await main_module.main()
mock_parse_cli.assert_called_once_with()
mock_configure_logging.assert_called_once_with(args[main_module.VERBOSE])
mock_get_all_financials.assert_awaited_once_with(*args[main_module.TICKER_SYMBOL],
quarterly=args[main_module.QUARTERLY],
concurrent_batch_size=args[main_module.BATCH_SIZE])
mock_print.assert_called_once_with(json.dumps(mock_data, indent=args[main_module.JSON_INDENT]))
mock_parse_cli.reset_mock()
mock_configure_logging.reset_mock()
mock_get_all_financials.reset_mock()
# To file:
args[main_module.TO_FILE] = 'some_file'
with patch.object(main_module, 'open') as mock_open:
mock_open.return_value.__enter__.return_value = mock_file = StringIO()
await main_module.main()
mock_parse_cli.assert_called_once_with()
mock_configure_logging.assert_called_once_with(args[main_module.VERBOSE])
mock_get_all_financials.assert_awaited_once_with(*args[main_module.TICKER_SYMBOL],
quarterly=args[main_module.QUARTERLY],
concurrent_batch_size=args[main_module.BATCH_SIZE])
expected_contents = json.dumps(mock_data, indent=args[main_module.JSON_INDENT])
mock_file.seek(0)
self.assertEqual(expected_contents, mock_file.read())

View File

@ -5,28 +5,45 @@
<title>Title</title>
</head>
<body>
<div><table aria-label="(something something) data table">
<thead>
<tr>
<th><div> 'Item' </div><div> 'Item' </div></th>
<th><div> End_Date_1 </div></th>
<th><div> End_Date_2 </div></th>
</tr>
</thead>
<tbody>
<tr>
<td><div> Item_1 </div><div class="other"> Item_1 </div></td>
<td><div><span class=""> 11M </span></div></td>
<td><div><span class="negative"> (22M) </span></div></td>
<td><div> <div data-chart-data="11000000.0,-22000000.0"><div></div></td>
</tr>
<tr>
<td><div> Item_2 </div><div class="other"> Item_2 </div></td>
<td><div><span class="positive"> 12% </span></div></td>
<td><div><span class="negative"> 13% </span></div></td>
<td><div> <div data-chart-data="0.12bazbazbaz,-0.13bazbazbaz"> <div></div></td>
</tr>
</tbody>
</table></div>
<div class="financials">
<header>
<h2><span>Foo table</span></h2>
<small> All values USD.</small>
</header>
<div>
<div>
<table>
<thead>
<tr>
<th><div class="xyz abc"> !!Item </div><div> !!Item </div></th>
<th><div> End_Date_1 </div></th>
<th><div> End_Date_2 </div></th>
<th></th>
</tr>
</thead>
<tbody>
<tr>
<td><div class="xyz indent--small"> foo </div><div> foo </div></td>
<td></td>
<td></td>
<td><div> <div data-chart-data="1.0,-2.0"><div></div></td>
</tr>
<tr>
<td><div class="xyz indent--medium"> bar </div><div> bar </div></td>
<td></td>
<td></td>
<td><div> <div data-chart-data="2.0,-3.0"><div></div></td>
</tr>
<tr>
<td><div class="xyz indent--large"> baz </div><div> baz </div></td>
<td></td>
<td></td>
<td><div> <div data-chart-data="3.0,-4.0"><div></div></td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
</body>
</html>