test now passing

This commit is contained in:
Daniil Fajnberg 2021-12-03 17:51:10 +01:00
parent 4fd69602f7
commit 462e34013e
1 changed files with 6 additions and 2 deletions

View File

@ -1,13 +1,14 @@
import logging import logging
import asyncio import asyncio
from typing import Union, List, Dict from typing import Union, List, Dict
from datetime import datetime
from aiohttp.client import ClientSession from aiohttp.client import ClientSession
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from bs4.element import Tag from bs4.element import Tag
from webutils import in_async_session, gather_in_batches from webutils import in_async_session, gather_in_batches
from .constants import (HTML_PARSER, BASE_URL, END_DATE, BS, IS, CF, FIN_STMT_URL_SUFFIX, FIN_STMT_ITEMS, from .constants import (DEV_MODE, HTML_PARSER, BASE_URL, END_DATE, BS, IS, CF, FIN_STMT_URL_SUFFIX, FIN_STMT_ITEMS,
DEFAULT_CONCURRENT_BATCH_SIZE) DEFAULT_CONCURRENT_BATCH_SIZE)
from .exceptions import UnknownFinancialStatementItem from .exceptions import UnknownFinancialStatementItem
@ -56,13 +57,16 @@ def find_relevant_table_rows(soup: BeautifulSoup) -> List[Tag]:
""" """
Returns the table rows containing the data of interest. Returns the table rows containing the data of interest.
""" """
now = datetime.utcnow()
trs = [] trs = []
for tr in soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr'): for tr in soup.find('div', attrs={'class': 'financials'}).tbody.find_all('tr'):
try: try:
if is_relevant_table_row(tr): if is_relevant_table_row(tr):
trs.append(tr) trs.append(tr)
except UnknownFinancialStatementItem: except UnknownFinancialStatementItem:
pass if DEV_MODE:
with open(f'mwfin_unknown_items_{now.strftime("%Y-%m-%d_%H-%M-%S")}.html', 'w') as f:
f.write(str(soup))
return trs return trs