Compare commits
	
		
			6 Commits
		
	
	
		
			6dd37a4aa2
			...
			e522897991
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| e522897991 | |||
| 1529df252c | |||
| 3c7afcd879 | |||
| 4a68d45bc4 | |||
| e7f3730c64 | |||
| fd03815172 | 
@@ -1,40 +1,40 @@
 | 
			
		||||
import asyncio
 | 
			
		||||
import sys
 | 
			
		||||
import csv
 | 
			
		||||
import json
 | 
			
		||||
from argparse import ArgumentParser
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from typing import Dict
 | 
			
		||||
 | 
			
		||||
from aiohttp import ClientSession
 | 
			
		||||
 | 
			
		||||
from . import get_company_financials
 | 
			
		||||
from .functions import get_company_financials, ResultDict, log
 | 
			
		||||
from .constants import END_DATE
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
JSON_EXT, CSV_EXT = '.json', '.csv'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def write_to_csv(data, file_obj) -> None:
 | 
			
		||||
def write_to_csv(data: Dict[str, ResultDict], file_obj) -> None:
 | 
			
		||||
    writer = csv.writer(file_obj)
 | 
			
		||||
    for statement in data.values():
 | 
			
		||||
        for key, values in statement.items():
 | 
			
		||||
            writer.writerow([key] + list(values))
 | 
			
		||||
    for statement_key, statement_dict in data.items():
 | 
			
		||||
        end_dates = statement_dict.pop(END_DATE)
 | 
			
		||||
        writer.writerow([statement_key] + list(end_dates))
 | 
			
		||||
        for key, values in statement_dict.items():
 | 
			
		||||
            writer.writerow([key] + list(str(val) for val in values))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def main() -> None:
 | 
			
		||||
    parser = ArgumentParser(description="Scrape company financials")
 | 
			
		||||
 | 
			
		||||
    parser.add_argument(
 | 
			
		||||
        '-s', '--symbol',
 | 
			
		||||
        'symbol',
 | 
			
		||||
        type=str,
 | 
			
		||||
        help="Stock ticker symbol of the company to be scraped the financials of"
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    parser.add_argument(
 | 
			
		||||
        '-f', '--to-file',
 | 
			
		||||
        type=Path,
 | 
			
		||||
        help="Writes results to the specified destination file. If omitted results are printed to stdout."
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    args = parser.parse_args()
 | 
			
		||||
    session = ClientSession()
 | 
			
		||||
    try:
 | 
			
		||||
@@ -45,14 +45,13 @@ async def main() -> None:
 | 
			
		||||
    if path is None:
 | 
			
		||||
        print(json.dumps(data, indent=2))
 | 
			
		||||
        return
 | 
			
		||||
    if path.suffix.lower() == JSON_EXT:
 | 
			
		||||
        with open(path, 'w') as f:
 | 
			
		||||
            json.dump(data, f, indent=2)
 | 
			
		||||
    elif path.suffix.lower() == CSV_EXT:
 | 
			
		||||
    with open(path, 'w') as f:
 | 
			
		||||
        if path.suffix.lower() == CSV_EXT:
 | 
			
		||||
            write_to_csv(data, f)
 | 
			
		||||
    else:
 | 
			
		||||
        print('unknown extension')
 | 
			
		||||
            return
 | 
			
		||||
        if not path.suffix.lower() == JSON_EXT:
 | 
			
		||||
            log.warning(f"Extension '{path.suffix}' unknown; using JSON format")
 | 
			
		||||
        json.dump(data, f, indent=2)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
 
 | 
			
		||||
@@ -1,8 +1,10 @@
 | 
			
		||||
LOGGER_NAME = 'mwfin'
 | 
			
		||||
 | 
			
		||||
HTML_PARSER = 'html.parser'
 | 
			
		||||
DOMAIN = 'www.marketwatch.com'
 | 
			
		||||
BASE_URL = f'https://{DOMAIN}/investing/stock'
 | 
			
		||||
 | 
			
		||||
BS, IS, CF = 'bs', 'is', 'cf'
 | 
			
		||||
BS, IS, CF = 'Balance Sheet', 'Income Statement', 'Cash Flow Statement'
 | 
			
		||||
FIN_STMT_URL_SUFFIX = {
 | 
			
		||||
    BS: '/balance-sheet',
 | 
			
		||||
    IS: '',
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,7 @@ from bs4.element import Tag
 | 
			
		||||
from . import constants
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
log = logging.getLogger(__name__)
 | 
			
		||||
log = logging.getLogger(constants.LOGGER_NAME)
 | 
			
		||||
 | 
			
		||||
# The resulting dictionary's keys correspond to the name of the item (row) in the financial statement,
 | 
			
		||||
# while its values will always be tuples with a length corresponding to the number of periods (columns)
 | 
			
		||||
@@ -90,37 +90,60 @@ async def _get_financial_statement(statement: str, ticker_symbol: str, quarterly
 | 
			
		||||
    return extract_all_data(soup)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def get_balance_sheet(ticker_symbol: str, quarterly: bool = False,
 | 
			
		||||
async def get_balance_sheet(*ticker_symbols: str, quarterly: bool = False,
 | 
			
		||||
                            session: ClientSession = None) -> ResultDict:
 | 
			
		||||
    """
 | 
			
		||||
    Returns data from the balance sheet of the specified company.
 | 
			
		||||
    """
 | 
			
		||||
    return await _get_financial_statement(constants.BS, ticker_symbol, quarterly, session)
 | 
			
		||||
    if len(ticker_symbols) == 1:
 | 
			
		||||
        return await _get_financial_statement(constants.BS, ticker_symbols[0], quarterly, session)
 | 
			
		||||
    return {
 | 
			
		||||
        sym: await _get_financial_statement(constants.BS, sym, quarterly, session)
 | 
			
		||||
        for sym in ticker_symbols
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def get_income_statement(ticker_symbol: str, quarterly: bool = False,
 | 
			
		||||
async def get_income_statement(*ticker_symbols: str, quarterly: bool = False,
 | 
			
		||||
                               session: ClientSession = None) -> ResultDict:
 | 
			
		||||
    """
 | 
			
		||||
    Returns data from the income statement of the specified company.
 | 
			
		||||
    """
 | 
			
		||||
    return await _get_financial_statement(constants.IS, ticker_symbol, quarterly, session)
 | 
			
		||||
    if len(ticker_symbols) == 1:
 | 
			
		||||
        return await _get_financial_statement(constants.IS, ticker_symbols[0], quarterly, session)
 | 
			
		||||
    return {
 | 
			
		||||
        sym: await _get_financial_statement(constants.IS, sym, quarterly, session)
 | 
			
		||||
        for sym in ticker_symbols
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def get_cash_flow_statement(ticker_symbol: str, quarterly: bool = False,
 | 
			
		||||
async def get_cash_flow_statement(*ticker_symbols: str, quarterly: bool = False,
 | 
			
		||||
                                  session: ClientSession = None) -> ResultDict:
 | 
			
		||||
    """
 | 
			
		||||
    Returns data from the cash flow statement of the specified company.
 | 
			
		||||
    """
 | 
			
		||||
    return await _get_financial_statement(constants.CF, ticker_symbol, quarterly, session)
 | 
			
		||||
    if len(ticker_symbols) == 1:
 | 
			
		||||
        return await _get_financial_statement(constants.CF, ticker_symbols[0], quarterly, session)
 | 
			
		||||
    return {
 | 
			
		||||
        sym: await _get_financial_statement(constants.CF, sym, quarterly, session)
 | 
			
		||||
        for sym in ticker_symbols
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def get_company_financials(ticker_symbol: str, quarterly: bool = False,
 | 
			
		||||
async def get_company_financials(*ticker_symbols: str, quarterly: bool = False,
 | 
			
		||||
                                 session: ClientSession = None) -> Dict[str, ResultDict]:
 | 
			
		||||
    """
 | 
			
		||||
    Returns all fundamentals (balance sheet, income statement and cash flow statement) of the specified company.
 | 
			
		||||
    """
 | 
			
		||||
    if len(ticker_symbols) == 1:
 | 
			
		||||
        return {
 | 
			
		||||
        constants.BS: await get_balance_sheet(ticker_symbol, quarterly, session),
 | 
			
		||||
        constants.IS: await get_income_statement(ticker_symbol, quarterly, session),
 | 
			
		||||
        constants.CF: await get_cash_flow_statement(ticker_symbol, quarterly, session)
 | 
			
		||||
            constants.BS: await get_balance_sheet(ticker_symbols[0], quarterly=quarterly, session=session),
 | 
			
		||||
            constants.IS: await get_income_statement(ticker_symbols[0], quarterly=quarterly, session=session),
 | 
			
		||||
            constants.CF: await get_cash_flow_statement(ticker_symbols[0], quarterly=quarterly, session=session)
 | 
			
		||||
        }
 | 
			
		||||
    return {
 | 
			
		||||
        sym: {
 | 
			
		||||
            constants.BS: await get_balance_sheet(sym, quarterly=quarterly, session=session),
 | 
			
		||||
            constants.IS: await get_income_statement(sym, quarterly=quarterly, session=session),
 | 
			
		||||
            constants.CF: await get_cash_flow_statement(sym, quarterly=quarterly, session=session)
 | 
			
		||||
        } for sym in ticker_symbols
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
@@ -122,29 +122,42 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
 | 
			
		||||
        mock_soup_from_url.assert_called_once_with(test_url + '/quarter', mock_session)
 | 
			
		||||
        mock_extract_all_data.assert_called_once_with(mock_soup)
 | 
			
		||||
 | 
			
		||||
    @patch.object(functions, '_get_financial_statement')
 | 
			
		||||
    async def test_get_balance_sheet(self, mock__get_financial_statement):
 | 
			
		||||
    async def _helper_test_get_any_statement(self, statement: str, mock__get_financial_statement):
 | 
			
		||||
        symbol, quarterly, mock_session = 'foo', False, MagicMock()
 | 
			
		||||
        mock__get_financial_statement.return_value = expected_output = 'bar'
 | 
			
		||||
        output = await functions.get_balance_sheet(symbol, quarterly, mock_session)
 | 
			
		||||
        if statement == BS:
 | 
			
		||||
            function = functions.get_balance_sheet
 | 
			
		||||
        elif statement == IS:
 | 
			
		||||
            function = functions.get_income_statement
 | 
			
		||||
        elif statement == CF:
 | 
			
		||||
            function = functions.get_cash_flow_statement
 | 
			
		||||
        else:
 | 
			
		||||
            raise ValueError
 | 
			
		||||
        output = await function(symbol, quarterly=quarterly, session=mock_session)
 | 
			
		||||
        self.assertEqual(expected_output, output)
 | 
			
		||||
        mock__get_financial_statement.assert_called_once_with(BS, symbol, quarterly, mock_session)
 | 
			
		||||
        mock__get_financial_statement.assert_called_once_with(statement, symbol, quarterly, mock_session)
 | 
			
		||||
        mock__get_financial_statement.reset_mock()
 | 
			
		||||
        
 | 
			
		||||
        symbol1, symbol2 = 'x', 'y'
 | 
			
		||||
        expected_output = {symbol1: expected_output, symbol2: expected_output}
 | 
			
		||||
        output = await function(symbol1, symbol2, quarterly=quarterly, session=mock_session)
 | 
			
		||||
        self.assertDictEqual(expected_output, output)
 | 
			
		||||
        mock__get_financial_statement.assert_has_calls([
 | 
			
		||||
            call(statement, symbol1, quarterly, mock_session),
 | 
			
		||||
            call(statement, symbol2, quarterly, mock_session),
 | 
			
		||||
        ])
 | 
			
		||||
 | 
			
		||||
    @patch.object(functions, '_get_financial_statement')
 | 
			
		||||
    async def test_get_balance_sheet(self, mock__get_financial_statement):
 | 
			
		||||
        await self._helper_test_get_any_statement(BS, mock__get_financial_statement)
 | 
			
		||||
 | 
			
		||||
    @patch.object(functions, '_get_financial_statement')
 | 
			
		||||
    async def test_get_income_statement(self, mock__get_financial_statement):
 | 
			
		||||
        symbol, quarterly, mock_session = 'foo', False, MagicMock()
 | 
			
		||||
        mock__get_financial_statement.return_value = expected_output = 'bar'
 | 
			
		||||
        output = await functions.get_income_statement(symbol, quarterly, mock_session)
 | 
			
		||||
        self.assertEqual(expected_output, output)
 | 
			
		||||
        mock__get_financial_statement.assert_called_once_with(IS, symbol, quarterly, mock_session)
 | 
			
		||||
        await self._helper_test_get_any_statement(IS, mock__get_financial_statement)
 | 
			
		||||
 | 
			
		||||
    @patch.object(functions, '_get_financial_statement')
 | 
			
		||||
    async def test_get_cash_flow_statement(self, mock__get_financial_statement):
 | 
			
		||||
        symbol, quarterly, mock_session = 'foo', False, MagicMock()
 | 
			
		||||
        mock__get_financial_statement.return_value = expected_output = 'bar'
 | 
			
		||||
        output = await functions.get_cash_flow_statement(symbol, quarterly, mock_session)
 | 
			
		||||
        self.assertEqual(expected_output, output)
 | 
			
		||||
        mock__get_financial_statement.assert_called_once_with(CF, symbol, quarterly, mock_session)
 | 
			
		||||
        await self._helper_test_get_any_statement(CF, mock__get_financial_statement)
 | 
			
		||||
 | 
			
		||||
    @patch.object(functions, 'get_cash_flow_statement')
 | 
			
		||||
    @patch.object(functions, 'get_income_statement')
 | 
			
		||||
@@ -160,11 +173,32 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
 | 
			
		||||
            CF: {END_DATE: mock_end_dates, 'c': (3, 4)}
 | 
			
		||||
        }
 | 
			
		||||
        symbol, quarterly, mock_session = 'foo', False, MagicMock()
 | 
			
		||||
        output = await functions.get_company_financials(symbol, quarterly, mock_session)
 | 
			
		||||
        output = await functions.get_company_financials(symbol, quarterly=quarterly, session=mock_session)
 | 
			
		||||
        self.assertDictEqual(expected_output, output)
 | 
			
		||||
        mock_get_bs.assert_called_once_with(symbol, quarterly, mock_session)
 | 
			
		||||
        mock_get_is.assert_called_once_with(symbol, quarterly, mock_session)
 | 
			
		||||
        mock_get_cf.assert_called_once_with(symbol, quarterly, mock_session)
 | 
			
		||||
        mock_get_bs.assert_called_once_with(symbol, quarterly=quarterly, session=mock_session)
 | 
			
		||||
        mock_get_is.assert_called_once_with(symbol, quarterly=quarterly, session=mock_session)
 | 
			
		||||
        mock_get_cf.assert_called_once_with(symbol, quarterly=quarterly, session=mock_session)
 | 
			
		||||
        mock_get_bs.reset_mock()
 | 
			
		||||
        mock_get_is.reset_mock()
 | 
			
		||||
        mock_get_cf.reset_mock()
 | 
			
		||||
 | 
			
		||||
        test_symbol1, test_symbol2 = 'x', 'y'
 | 
			
		||||
        expected_output = {test_symbol1: expected_output, test_symbol2: expected_output}
 | 
			
		||||
        output = await functions.get_company_financials(test_symbol1, test_symbol2,
 | 
			
		||||
                                                        quarterly=quarterly, session=mock_session)
 | 
			
		||||
        self.assertDictEqual(expected_output, output)
 | 
			
		||||
        mock_get_bs.assert_has_calls([
 | 
			
		||||
            call(test_symbol1, quarterly=quarterly, session=mock_session),
 | 
			
		||||
            call(test_symbol2, quarterly=quarterly, session=mock_session)
 | 
			
		||||
        ])
 | 
			
		||||
        mock_get_is.assert_has_calls([
 | 
			
		||||
            call(test_symbol1, quarterly=quarterly, session=mock_session),
 | 
			
		||||
            call(test_symbol2, quarterly=quarterly, session=mock_session)
 | 
			
		||||
        ])
 | 
			
		||||
        mock_get_cf.assert_has_calls([
 | 
			
		||||
            call(test_symbol1, quarterly=quarterly, session=mock_session),
 | 
			
		||||
            call(test_symbol2, quarterly=quarterly, session=mock_session)
 | 
			
		||||
        ])
 | 
			
		||||
 | 
			
		||||
    @patch.object(functions, 'ClientSession')
 | 
			
		||||
    async def test_integration_get_company_financials(self, mock_session_cls):
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user