improved writing to file

This commit is contained in:
Daniil Fajnberg 2021-11-28 16:24:14 +01:00
parent 6dd37a4aa2
commit fd03815172
3 changed files with 20 additions and 19 deletions

View File

@ -1,40 +1,40 @@
import asyncio import asyncio
import sys
import csv import csv
import json import json
from argparse import ArgumentParser from argparse import ArgumentParser
from pathlib import Path from pathlib import Path
from typing import Dict
from aiohttp import ClientSession from aiohttp import ClientSession
from . import get_company_financials from .functions import get_company_financials, ResultDict, log
from .constants import END_DATE
JSON_EXT, CSV_EXT = '.json', '.csv' JSON_EXT, CSV_EXT = '.json', '.csv'
def write_to_csv(data, file_obj) -> None: def write_to_csv(data: Dict[str, ResultDict], file_obj) -> None:
writer = csv.writer(file_obj) writer = csv.writer(file_obj)
for statement in data.values(): for statement_key, statement_dict in data.items():
for key, values in statement.items(): end_dates = statement_dict.pop(END_DATE)
writer.writerow([key] + list(values)) writer.writerow([statement_key] + list(end_dates))
for key, values in statement_dict.items():
writer.writerow([key] + list(str(val) for val in values))
async def main() -> None: async def main() -> None:
parser = ArgumentParser(description="Scrape company financials") parser = ArgumentParser(description="Scrape company financials")
parser.add_argument( parser.add_argument(
'-s', '--symbol', 'symbol',
type=str, type=str,
help="Stock ticker symbol of the company to be scraped the financials of" help="Stock ticker symbol of the company to be scraped the financials of"
) )
parser.add_argument( parser.add_argument(
'-f', '--to-file', '-f', '--to-file',
type=Path, type=Path,
help="Writes results to the specified destination file. If omitted results are printed to stdout." help="Writes results to the specified destination file. If omitted results are printed to stdout."
) )
args = parser.parse_args() args = parser.parse_args()
session = ClientSession() session = ClientSession()
try: try:
@ -45,14 +45,13 @@ async def main() -> None:
if path is None: if path is None:
print(json.dumps(data, indent=2)) print(json.dumps(data, indent=2))
return return
if path.suffix.lower() == JSON_EXT: with open(path, 'w') as f:
with open(path, 'w') as f: if path.suffix.lower() == CSV_EXT:
json.dump(data, f, indent=2)
elif path.suffix.lower() == CSV_EXT:
with open(path, 'w') as f:
write_to_csv(data, f) write_to_csv(data, f)
else: return
print('unknown extension') if not path.suffix.lower() == JSON_EXT:
log.warning(f"Extension '{path.suffix}' unknown; using JSON format")
json.dump(data, f, indent=2)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -1,8 +1,10 @@
LOGGER_NAME = 'mwfin'
HTML_PARSER = 'html.parser' HTML_PARSER = 'html.parser'
DOMAIN = 'www.marketwatch.com' DOMAIN = 'www.marketwatch.com'
BASE_URL = f'https://{DOMAIN}/investing/stock' BASE_URL = f'https://{DOMAIN}/investing/stock'
BS, IS, CF = 'bs', 'is', 'cf' BS, IS, CF = 'Balance Sheet', 'Income Statement', 'Cash Flow Statement'
FIN_STMT_URL_SUFFIX = { FIN_STMT_URL_SUFFIX = {
BS: '/balance-sheet', BS: '/balance-sheet',
IS: '', IS: '',

View File

@ -8,7 +8,7 @@ from bs4.element import Tag
from . import constants from . import constants
log = logging.getLogger(__name__) log = logging.getLogger(constants.LOGGER_NAME)
# The resulting dictionary's keys correspond to the name of the item (row) in the financial statement, # The resulting dictionary's keys correspond to the name of the item (row) in the financial statement,
# while its values will always be tuples with a length corresponding to the number of periods (columns) # while its values will always be tuples with a length corresponding to the number of periods (columns)