improved writing to file

This commit is contained in:
Daniil Fajnberg 2021-11-28 16:24:14 +01:00
parent 6dd37a4aa2
commit fd03815172
3 changed files with 20 additions and 19 deletions

View File

@ -1,40 +1,40 @@
import asyncio
import sys
import csv
import json
from argparse import ArgumentParser
from pathlib import Path
from typing import Dict
from aiohttp import ClientSession
from . import get_company_financials
from .functions import get_company_financials, ResultDict, log
from .constants import END_DATE
JSON_EXT, CSV_EXT = '.json', '.csv'
def write_to_csv(data, file_obj) -> None:
def write_to_csv(data: Dict[str, ResultDict], file_obj) -> None:
writer = csv.writer(file_obj)
for statement in data.values():
for key, values in statement.items():
writer.writerow([key] + list(values))
for statement_key, statement_dict in data.items():
end_dates = statement_dict.pop(END_DATE)
writer.writerow([statement_key] + list(end_dates))
for key, values in statement_dict.items():
writer.writerow([key] + list(str(val) for val in values))
async def main() -> None:
parser = ArgumentParser(description="Scrape company financials")
parser.add_argument(
'-s', '--symbol',
'symbol',
type=str,
help="Stock ticker symbol of the company to be scraped the financials of"
)
parser.add_argument(
'-f', '--to-file',
type=Path,
help="Writes results to the specified destination file. If omitted results are printed to stdout."
)
args = parser.parse_args()
session = ClientSession()
try:
@ -45,14 +45,13 @@ async def main() -> None:
if path is None:
print(json.dumps(data, indent=2))
return
if path.suffix.lower() == JSON_EXT:
with open(path, 'w') as f:
json.dump(data, f, indent=2)
elif path.suffix.lower() == CSV_EXT:
with open(path, 'w') as f:
if path.suffix.lower() == CSV_EXT:
write_to_csv(data, f)
else:
print('unknown extension')
return
if not path.suffix.lower() == JSON_EXT:
log.warning(f"Extension '{path.suffix}' unknown; using JSON format")
json.dump(data, f, indent=2)
if __name__ == '__main__':

View File

@ -1,8 +1,10 @@
LOGGER_NAME = 'mwfin'
HTML_PARSER = 'html.parser'
DOMAIN = 'www.marketwatch.com'
BASE_URL = f'https://{DOMAIN}/investing/stock'
BS, IS, CF = 'bs', 'is', 'cf'
BS, IS, CF = 'Balance Sheet', 'Income Statement', 'Cash Flow Statement'
FIN_STMT_URL_SUFFIX = {
BS: '/balance-sheet',
IS: '',

View File

@ -8,7 +8,7 @@ from bs4.element import Tag
from . import constants
log = logging.getLogger(__name__)
log = logging.getLogger(constants.LOGGER_NAME)
# The resulting dictionary's keys correspond to the name of the item (row) in the financial statement,
# while its values will always be tuples with a length corresponding to the number of periods (columns)