From 652ded4cd9bbcc93b2ad5c7aa0d9a16c82437760 Mon Sep 17 00:00:00 2001 From: Maximilian Fajnberg Date: Tue, 23 Nov 2021 14:51:29 +0100 Subject: [PATCH] Record of essential html composition (mainly for testing) --- src/mwfin/functions.py | 3 --- tests/test_functions.py | 21 ++++++++------------- tests/test_structure.html | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 16 deletions(-) create mode 100644 tests/test_structure.html diff --git a/src/mwfin/functions.py b/src/mwfin/functions.py index ac4022f..a1cd4cd 100644 --- a/src/mwfin/functions.py +++ b/src/mwfin/functions.py @@ -11,9 +11,6 @@ from bs4.element import ResultSet, Tag # the end dates of the reporting periods as strings (either years or quarters). ResultDict = dict[str, Union[tuple[int], tuple[str]]] -DOMAIN = 'www.marketwatch.com' -HTML_PARSER = 'html.parser' - async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup: """ diff --git a/tests/test_functions.py b/tests/test_functions.py index c0b0f06..6ed68dd 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -5,6 +5,12 @@ from bs4 import BeautifulSoup from mwfin import functions +# boiled down & accurate structure of a relevant data table +# https://www.marketwatch.com/investing/stock/aapl/financials/cash-flow +# view page source @ line 2055 +TEST_HTML = '' +TEST_SOUP = BeautifulSoup(TEST_HTML, 'html.parser') + class FunctionsTestCase(IsolatedAsyncioTestCase): @@ -26,19 +32,8 @@ class FunctionsTestCase(IsolatedAsyncioTestCase): self.assertEqual(expected_output, output) def test_extract_end_dates(self): - test_html = '' \ - '
ITEM
' \ - '
30-SEP-2020
' \ - '
31-DEC-2020
' \ - '
31-MAR-2021
' \ - '
30-JUN-2021
' \ - '
30-SEP-2021
' \ - '' - test_soup = BeautifulSoup(test_html, 'html.parser') - - expected_output = ('30-SEP-2020', '31-DEC-2020', '31-MAR-2021', - '30-JUN-2021', '30-SEP-2021') - output = functions.extract_end_dates(test_soup) + expected_output = ('End_Date_1', 'End_Date_2') + output = functions.extract_end_dates(TEST_SOUP) self.assertTupleEqual(expected_output, output) def test_find_relevant_table_rows(self): diff --git a/tests/test_structure.html b/tests/test_structure.html new file mode 100644 index 0000000..ef8cd05 --- /dev/null +++ b/tests/test_structure.html @@ -0,0 +1,32 @@ + + + + + Title + + +
+ + + + + + + + + + + + + + + + + + + + + +
'Item'
'Item'
End_Date_1
End_Date_2
Item_1
Item_1
11M
(22M)
Item_2
Item_2
12%
13%
+ + \ No newline at end of file