diff --git a/src/mwfin/__main__.py b/src/mwfin/__main__.py index e9c41c5..9492793 100644 --- a/src/mwfin/__main__.py +++ b/src/mwfin/__main__.py @@ -1,40 +1,40 @@ import asyncio -import sys import csv import json from argparse import ArgumentParser from pathlib import Path +from typing import Dict from aiohttp import ClientSession -from . import get_company_financials +from .functions import get_company_financials, ResultDict, log +from .constants import END_DATE JSON_EXT, CSV_EXT = '.json', '.csv' -def write_to_csv(data, file_obj) -> None: +def write_to_csv(data: Dict[str, ResultDict], file_obj) -> None: writer = csv.writer(file_obj) - for statement in data.values(): - for key, values in statement.items(): - writer.writerow([key] + list(values)) + for statement_key, statement_dict in data.items(): + end_dates = statement_dict.pop(END_DATE) + writer.writerow([statement_key] + list(end_dates)) + for key, values in statement_dict.items(): + writer.writerow([key] + list(str(val) for val in values)) async def main() -> None: parser = ArgumentParser(description="Scrape company financials") - parser.add_argument( - '-s', '--symbol', + 'symbol', type=str, help="Stock ticker symbol of the company to be scraped the financials of" ) - parser.add_argument( '-f', '--to-file', type=Path, help="Writes results to the specified destination file. If omitted results are printed to stdout." ) - args = parser.parse_args() session = ClientSession() try: @@ -45,14 +45,13 @@ async def main() -> None: if path is None: print(json.dumps(data, indent=2)) return - if path.suffix.lower() == JSON_EXT: - with open(path, 'w') as f: - json.dump(data, f, indent=2) - elif path.suffix.lower() == CSV_EXT: - with open(path, 'w') as f: + with open(path, 'w') as f: + if path.suffix.lower() == CSV_EXT: write_to_csv(data, f) - else: - print('unknown extension') + return + if not path.suffix.lower() == JSON_EXT: + log.warning(f"Extension '{path.suffix}' unknown; using JSON format") + json.dump(data, f, indent=2) if __name__ == '__main__': diff --git a/src/mwfin/constants.py b/src/mwfin/constants.py index 4f6468e..26a3bf0 100644 --- a/src/mwfin/constants.py +++ b/src/mwfin/constants.py @@ -1,8 +1,10 @@ +LOGGER_NAME = 'mwfin' + HTML_PARSER = 'html.parser' DOMAIN = 'www.marketwatch.com' BASE_URL = f'https://{DOMAIN}/investing/stock' -BS, IS, CF = 'bs', 'is', 'cf' +BS, IS, CF = 'Balance Sheet', 'Income Statement', 'Cash Flow Statement' FIN_STMT_URL_SUFFIX = { BS: '/balance-sheet', IS: '', diff --git a/src/mwfin/functions.py b/src/mwfin/functions.py index 75b5317..a060183 100644 --- a/src/mwfin/functions.py +++ b/src/mwfin/functions.py @@ -8,7 +8,7 @@ from bs4.element import Tag from . import constants -log = logging.getLogger(__name__) +log = logging.getLogger(constants.LOGGER_NAME) # The resulting dictionary's keys correspond to the name of the item (row) in the financial statement, # while its values will always be tuples with a length corresponding to the number of periods (columns)