Record of essential html composition (mainly for testing)

This commit is contained in:
Maximilian Fajnberg 2021-11-23 14:51:29 +01:00
parent 525d56ef63
commit 652ded4cd9
3 changed files with 40 additions and 16 deletions

View File

@ -11,9 +11,6 @@ from bs4.element import ResultSet, Tag
# the end dates of the reporting periods as strings (either years or quarters). # the end dates of the reporting periods as strings (either years or quarters).
ResultDict = dict[str, Union[tuple[int], tuple[str]]] ResultDict = dict[str, Union[tuple[int], tuple[str]]]
DOMAIN = 'www.marketwatch.com'
HTML_PARSER = 'html.parser'
async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup: async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup:
""" """

View File

@ -5,6 +5,12 @@ from bs4 import BeautifulSoup
from mwfin import functions from mwfin import functions
# boiled down & accurate structure of a relevant data table
# https://www.marketwatch.com/investing/stock/aapl/financials/cash-flow
# view page source @ line 2055
TEST_HTML = ''
TEST_SOUP = BeautifulSoup(TEST_HTML, 'html.parser')
class FunctionsTestCase(IsolatedAsyncioTestCase): class FunctionsTestCase(IsolatedAsyncioTestCase):
@ -26,19 +32,8 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
self.assertEqual(expected_output, output) self.assertEqual(expected_output, output)
def test_extract_end_dates(self): def test_extract_end_dates(self):
test_html = '<tr>' \ expected_output = ('End_Date_1', 'End_Date_2')
'<th><div> ITEM </div></th>' \ output = functions.extract_end_dates(TEST_SOUP)
'<th><div> 30-SEP-2020 </div></th>' \
'<th><div> 31-DEC-2020 </div></th>' \
'<th><div> 31-MAR-2021 </div></th>' \
'<th><div> 30-JUN-2021 </div></th>' \
'<th><div> 30-SEP-2021 </div></th>' \
'</tr>'
test_soup = BeautifulSoup(test_html, 'html.parser')
expected_output = ('30-SEP-2020', '31-DEC-2020', '31-MAR-2021',
'30-JUN-2021', '30-SEP-2021')
output = functions.extract_end_dates(test_soup)
self.assertTupleEqual(expected_output, output) self.assertTupleEqual(expected_output, output)
def test_find_relevant_table_rows(self): def test_find_relevant_table_rows(self):

32
tests/test_structure.html Normal file
View File

@ -0,0 +1,32 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<div><table aria-label="(something something) data table">
<thead>
<tr>
<th><div> 'Item' </div><div> 'Item' </div></th>
<th><div> End_Date_1 </div></th>
<th><div> End_Date_2 </div></th>
</tr>
</thead>
<tbody>
<tr>
<td><div> Item_1 </div><div class="other"> Item_1 </div></td>
<td><div><span class=""> 11M </span></div></td>
<td><div><span class="negative"> (22M) </span></div></td>
<td><div> <div data-chart-data="11000000.0,-22000000.0"><div></div></td>
</tr>
<tr>
<td><div> Item_2 </div><div class="other"> Item_2 </div></td>
<td><div><span class="positive"> 12% </span></div></td>
<td><div><span class="negative"> 13% </span></div></td>
<td><div> <div data-chart-data="0.12bazbazbaz,-0.13bazbazbaz"> <div></div></td>
</tr>
</tbody>
</table></div>
</body>
</html>