Compare commits

..

No commits in common. "e522897991e212413daf464f21a120afb7dbe51b" and "6dd37a4aa2279b2c8451b0b00d8d756391aaa889" have entirely different histories.

4 changed files with 48 additions and 106 deletions

View File

@ -1,40 +1,40 @@
import asyncio import asyncio
import sys
import csv import csv
import json import json
from argparse import ArgumentParser from argparse import ArgumentParser
from pathlib import Path from pathlib import Path
from typing import Dict
from aiohttp import ClientSession from aiohttp import ClientSession
from .functions import get_company_financials, ResultDict, log from . import get_company_financials
from .constants import END_DATE
JSON_EXT, CSV_EXT = '.json', '.csv' JSON_EXT, CSV_EXT = '.json', '.csv'
def write_to_csv(data: Dict[str, ResultDict], file_obj) -> None: def write_to_csv(data, file_obj) -> None:
writer = csv.writer(file_obj) writer = csv.writer(file_obj)
for statement_key, statement_dict in data.items(): for statement in data.values():
end_dates = statement_dict.pop(END_DATE) for key, values in statement.items():
writer.writerow([statement_key] + list(end_dates)) writer.writerow([key] + list(values))
for key, values in statement_dict.items():
writer.writerow([key] + list(str(val) for val in values))
async def main() -> None: async def main() -> None:
parser = ArgumentParser(description="Scrape company financials") parser = ArgumentParser(description="Scrape company financials")
parser.add_argument( parser.add_argument(
'symbol', '-s', '--symbol',
type=str, type=str,
help="Stock ticker symbol of the company to be scraped the financials of" help="Stock ticker symbol of the company to be scraped the financials of"
) )
parser.add_argument( parser.add_argument(
'-f', '--to-file', '-f', '--to-file',
type=Path, type=Path,
help="Writes results to the specified destination file. If omitted results are printed to stdout." help="Writes results to the specified destination file. If omitted results are printed to stdout."
) )
args = parser.parse_args() args = parser.parse_args()
session = ClientSession() session = ClientSession()
try: try:
@ -45,13 +45,14 @@ async def main() -> None:
if path is None: if path is None:
print(json.dumps(data, indent=2)) print(json.dumps(data, indent=2))
return return
with open(path, 'w') as f: if path.suffix.lower() == JSON_EXT:
if path.suffix.lower() == CSV_EXT: with open(path, 'w') as f:
json.dump(data, f, indent=2)
elif path.suffix.lower() == CSV_EXT:
with open(path, 'w') as f:
write_to_csv(data, f) write_to_csv(data, f)
return else:
if not path.suffix.lower() == JSON_EXT: print('unknown extension')
log.warning(f"Extension '{path.suffix}' unknown; using JSON format")
json.dump(data, f, indent=2)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -1,10 +1,8 @@
LOGGER_NAME = 'mwfin'
HTML_PARSER = 'html.parser' HTML_PARSER = 'html.parser'
DOMAIN = 'www.marketwatch.com' DOMAIN = 'www.marketwatch.com'
BASE_URL = f'https://{DOMAIN}/investing/stock' BASE_URL = f'https://{DOMAIN}/investing/stock'
BS, IS, CF = 'Balance Sheet', 'Income Statement', 'Cash Flow Statement' BS, IS, CF = 'bs', 'is', 'cf'
FIN_STMT_URL_SUFFIX = { FIN_STMT_URL_SUFFIX = {
BS: '/balance-sheet', BS: '/balance-sheet',
IS: '', IS: '',

View File

@ -8,7 +8,7 @@ from bs4.element import Tag
from . import constants from . import constants
log = logging.getLogger(constants.LOGGER_NAME) log = logging.getLogger(__name__)
# The resulting dictionary's keys correspond to the name of the item (row) in the financial statement, # The resulting dictionary's keys correspond to the name of the item (row) in the financial statement,
# while its values will always be tuples with a length corresponding to the number of periods (columns) # while its values will always be tuples with a length corresponding to the number of periods (columns)
@ -90,60 +90,37 @@ async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly
return extract_all_data(soup) return extract_all_data(soup)
async def get_balance_sheet(*ticker_symbols: str, quarterly: bool = False, async def get_balance_sheet(ticker_symbol: str, quarterly: bool = False,
session: ClientSession = None) -> ResultDict: session: ClientSession = None) -> ResultDict:
""" """
Returns data from the balance sheet of the specified company. Returns data from the balance sheet of the specified company.
""" """
if len(ticker_symbols) == 1: return await _get_financial_statement(constants.BS, ticker_symbol, quarterly, session)
return await _get_financial_statement(constants.BS, ticker_symbols[0], quarterly, session)
return {
sym: await _get_financial_statement(constants.BS, sym, quarterly, session)
for sym in ticker_symbols
}
async def get_income_statement(*ticker_symbols: str, quarterly: bool = False, async def get_income_statement(ticker_symbol: str, quarterly: bool = False,
session: ClientSession = None) -> ResultDict: session: ClientSession = None) -> ResultDict:
""" """
Returns data from the income statement of the specified company. Returns data from the income statement of the specified company.
""" """
if len(ticker_symbols) == 1: return await _get_financial_statement(constants.IS, ticker_symbol, quarterly, session)
return await _get_financial_statement(constants.IS, ticker_symbols[0], quarterly, session)
return {
sym: await _get_financial_statement(constants.IS, sym, quarterly, session)
for sym in ticker_symbols
}
async def get_cash_flow_statement(*ticker_symbols: str, quarterly: bool = False, async def get_cash_flow_statement(ticker_symbol: str, quarterly: bool = False,
session: ClientSession = None) -> ResultDict: session: ClientSession = None) -> ResultDict:
""" """
Returns data from the cash flow statement of the specified company. Returns data from the cash flow statement of the specified company.
""" """
if len(ticker_symbols) == 1: return await _get_financial_statement(constants.CF, ticker_symbol, quarterly, session)
return await _get_financial_statement(constants.CF, ticker_symbols[0], quarterly, session)
return {
sym: await _get_financial_statement(constants.CF, sym, quarterly, session)
for sym in ticker_symbols
}
async def get_company_financials(*ticker_symbols: str, quarterly: bool = False, async def get_company_financials(ticker_symbol: str, quarterly: bool = False,
session: ClientSession = None) -> Dict[str, ResultDict]: session: ClientSession = None) -> Dict[str, ResultDict]:
""" """
Returns all fundamentals (balance sheet, income statement and cash flow statement) of the specified company. Returns all fundamentals (balance sheet, income statement and cash flow statement) of the specified company.
""" """
if len(ticker_symbols) == 1:
return {
constants.BS: await get_balance_sheet(ticker_symbols[0], quarterly=quarterly, session=session),
constants.IS: await get_income_statement(ticker_symbols[0], quarterly=quarterly, session=session),
constants.CF: await get_cash_flow_statement(ticker_symbols[0], quarterly=quarterly, session=session)
}
return { return {
sym: { constants.BS: await get_balance_sheet(ticker_symbol, quarterly, session),
constants.BS: await get_balance_sheet(sym, quarterly=quarterly, session=session), constants.IS: await get_income_statement(ticker_symbol, quarterly, session),
constants.IS: await get_income_statement(sym, quarterly=quarterly, session=session), constants.CF: await get_cash_flow_statement(ticker_symbol, quarterly, session)
constants.CF: await get_cash_flow_statement(sym, quarterly=quarterly, session=session)
} for sym in ticker_symbols
} }

View File

@ -122,42 +122,29 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
mock_soup_from_url.assert_called_once_with(test_url + '/quarter', mock_session) mock_soup_from_url.assert_called_once_with(test_url + '/quarter', mock_session)
mock_extract_all_data.assert_called_once_with(mock_soup) mock_extract_all_data.assert_called_once_with(mock_soup)
async def _helper_test_get_any_statement(self, statement: str, mock__get_financial_statement):
symbol, quarterly, mock_session = 'foo', False, MagicMock()
mock__get_financial_statement.return_value = expected_output = 'bar'
if statement == BS:
function = functions.get_balance_sheet
elif statement == IS:
function = functions.get_income_statement
elif statement == CF:
function = functions.get_cash_flow_statement
else:
raise ValueError
output = await function(symbol, quarterly=quarterly, session=mock_session)
self.assertEqual(expected_output, output)
mock__get_financial_statement.assert_called_once_with(statement, symbol, quarterly, mock_session)
mock__get_financial_statement.reset_mock()
symbol1, symbol2 = 'x', 'y'
expected_output = {symbol1: expected_output, symbol2: expected_output}
output = await function(symbol1, symbol2, quarterly=quarterly, session=mock_session)
self.assertDictEqual(expected_output, output)
mock__get_financial_statement.assert_has_calls([
call(statement, symbol1, quarterly, mock_session),
call(statement, symbol2, quarterly, mock_session),
])
@patch.object(functions, '_get_financial_statement') @patch.object(functions, '_get_financial_statement')
async def test_get_balance_sheet(self, mock__get_financial_statement): async def test_get_balance_sheet(self, mock__get_financial_statement):
await self._helper_test_get_any_statement(BS, mock__get_financial_statement) symbol, quarterly, mock_session = 'foo', False, MagicMock()
mock__get_financial_statement.return_value = expected_output = 'bar'
output = await functions.get_balance_sheet(symbol, quarterly, mock_session)
self.assertEqual(expected_output, output)
mock__get_financial_statement.assert_called_once_with(BS, symbol, quarterly, mock_session)
@patch.object(functions, '_get_financial_statement') @patch.object(functions, '_get_financial_statement')
async def test_get_income_statement(self, mock__get_financial_statement): async def test_get_income_statement(self, mock__get_financial_statement):
await self._helper_test_get_any_statement(IS, mock__get_financial_statement) symbol, quarterly, mock_session = 'foo', False, MagicMock()
mock__get_financial_statement.return_value = expected_output = 'bar'
output = await functions.get_income_statement(symbol, quarterly, mock_session)
self.assertEqual(expected_output, output)
mock__get_financial_statement.assert_called_once_with(IS, symbol, quarterly, mock_session)
@patch.object(functions, '_get_financial_statement') @patch.object(functions, '_get_financial_statement')
async def test_get_cash_flow_statement(self, mock__get_financial_statement): async def test_get_cash_flow_statement(self, mock__get_financial_statement):
await self._helper_test_get_any_statement(CF, mock__get_financial_statement) symbol, quarterly, mock_session = 'foo', False, MagicMock()
mock__get_financial_statement.return_value = expected_output = 'bar'
output = await functions.get_cash_flow_statement(symbol, quarterly, mock_session)
self.assertEqual(expected_output, output)
mock__get_financial_statement.assert_called_once_with(CF, symbol, quarterly, mock_session)
@patch.object(functions, 'get_cash_flow_statement') @patch.object(functions, 'get_cash_flow_statement')
@patch.object(functions, 'get_income_statement') @patch.object(functions, 'get_income_statement')
@ -173,32 +160,11 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
CF: {END_DATE: mock_end_dates, 'c': (3, 4)} CF: {END_DATE: mock_end_dates, 'c': (3, 4)}
} }
symbol, quarterly, mock_session = 'foo', False, MagicMock() symbol, quarterly, mock_session = 'foo', False, MagicMock()
output = await functions.get_company_financials(symbol, quarterly=quarterly, session=mock_session) output = await functions.get_company_financials(symbol, quarterly, mock_session)
self.assertDictEqual(expected_output, output) self.assertDictEqual(expected_output, output)
mock_get_bs.assert_called_once_with(symbol, quarterly=quarterly, session=mock_session) mock_get_bs.assert_called_once_with(symbol, quarterly, mock_session)
mock_get_is.assert_called_once_with(symbol, quarterly=quarterly, session=mock_session) mock_get_is.assert_called_once_with(symbol, quarterly, mock_session)
mock_get_cf.assert_called_once_with(symbol, quarterly=quarterly, session=mock_session) mock_get_cf.assert_called_once_with(symbol, quarterly, mock_session)
mock_get_bs.reset_mock()
mock_get_is.reset_mock()
mock_get_cf.reset_mock()
test_symbol1, test_symbol2 = 'x', 'y'
expected_output = {test_symbol1: expected_output, test_symbol2: expected_output}
output = await functions.get_company_financials(test_symbol1, test_symbol2,
quarterly=quarterly, session=mock_session)
self.assertDictEqual(expected_output, output)
mock_get_bs.assert_has_calls([
call(test_symbol1, quarterly=quarterly, session=mock_session),
call(test_symbol2, quarterly=quarterly, session=mock_session)
])
mock_get_is.assert_has_calls([
call(test_symbol1, quarterly=quarterly, session=mock_session),
call(test_symbol2, quarterly=quarterly, session=mock_session)
])
mock_get_cf.assert_has_calls([
call(test_symbol1, quarterly=quarterly, session=mock_session),
call(test_symbol2, quarterly=quarterly, session=mock_session)
])
@patch.object(functions, 'ClientSession') @patch.object(functions, 'ClientSession')
async def test_integration_get_company_financials(self, mock_session_cls): async def test_integration_get_company_financials(self, mock_session_cls):