Record of essential html composition (mainly for testing)
This commit is contained in:
		@@ -11,9 +11,6 @@ from bs4.element import ResultSet, Tag
 | 
				
			|||||||
# the end dates of the reporting periods as strings (either years or quarters).
 | 
					# the end dates of the reporting periods as strings (either years or quarters).
 | 
				
			||||||
ResultDict = dict[str, Union[tuple[int], tuple[str]]]
 | 
					ResultDict = dict[str, Union[tuple[int], tuple[str]]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
DOMAIN = 'www.marketwatch.com'
 | 
					 | 
				
			||||||
HTML_PARSER = 'html.parser'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup:
 | 
					async def soup_from_url(url: str, session: ClientSession = None) -> BeautifulSoup:
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -5,6 +5,12 @@ from bs4 import BeautifulSoup
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from mwfin import functions
 | 
					from mwfin import functions
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# boiled down & accurate structure of a relevant data table
 | 
				
			||||||
 | 
					# https://www.marketwatch.com/investing/stock/aapl/financials/cash-flow
 | 
				
			||||||
 | 
					# view page source @ line 2055
 | 
				
			||||||
 | 
					TEST_HTML = ''
 | 
				
			||||||
 | 
					TEST_SOUP = BeautifulSoup(TEST_HTML, 'html.parser')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class FunctionsTestCase(IsolatedAsyncioTestCase):
 | 
					class FunctionsTestCase(IsolatedAsyncioTestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -26,19 +32,8 @@ class FunctionsTestCase(IsolatedAsyncioTestCase):
 | 
				
			|||||||
        self.assertEqual(expected_output, output)
 | 
					        self.assertEqual(expected_output, output)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_extract_end_dates(self):
 | 
					    def test_extract_end_dates(self):
 | 
				
			||||||
        test_html = '<tr>' \
 | 
					        expected_output = ('End_Date_1', 'End_Date_2')
 | 
				
			||||||
                    '<th><div> ITEM </div></th>' \
 | 
					        output = functions.extract_end_dates(TEST_SOUP)
 | 
				
			||||||
                    '<th><div> 30-SEP-2020 </div></th>' \
 | 
					 | 
				
			||||||
                    '<th><div> 31-DEC-2020 </div></th>' \
 | 
					 | 
				
			||||||
                    '<th><div> 31-MAR-2021 </div></th>' \
 | 
					 | 
				
			||||||
                    '<th><div> 30-JUN-2021 </div></th>' \
 | 
					 | 
				
			||||||
                    '<th><div> 30-SEP-2021 </div></th>' \
 | 
					 | 
				
			||||||
                    '</tr>'
 | 
					 | 
				
			||||||
        test_soup = BeautifulSoup(test_html, 'html.parser')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        expected_output = ('30-SEP-2020', '31-DEC-2020', '31-MAR-2021',
 | 
					 | 
				
			||||||
                           '30-JUN-2021', '30-SEP-2021')
 | 
					 | 
				
			||||||
        output = functions.extract_end_dates(test_soup)
 | 
					 | 
				
			||||||
        self.assertTupleEqual(expected_output, output)
 | 
					        self.assertTupleEqual(expected_output, output)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_find_relevant_table_rows(self):
 | 
					    def test_find_relevant_table_rows(self):
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										32
									
								
								tests/test_structure.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								tests/test_structure.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,32 @@
 | 
				
			|||||||
 | 
					<!DOCTYPE html>
 | 
				
			||||||
 | 
					<html lang="en">
 | 
				
			||||||
 | 
					<head>
 | 
				
			||||||
 | 
					    <meta charset="UTF-8">
 | 
				
			||||||
 | 
					    <title>Title</title>
 | 
				
			||||||
 | 
					</head>
 | 
				
			||||||
 | 
					<body>
 | 
				
			||||||
 | 
					    <div><table aria-label="(something something) data table">
 | 
				
			||||||
 | 
					        <thead>
 | 
				
			||||||
 | 
					            <tr>
 | 
				
			||||||
 | 
					                <th><div> 'Item' </div><div> 'Item' </div></th>
 | 
				
			||||||
 | 
					                <th><div> End_Date_1 </div></th>
 | 
				
			||||||
 | 
					                <th><div> End_Date_2 </div></th>
 | 
				
			||||||
 | 
					            </tr>
 | 
				
			||||||
 | 
					        </thead>
 | 
				
			||||||
 | 
					        <tbody>
 | 
				
			||||||
 | 
					            <tr>
 | 
				
			||||||
 | 
					                <td><div> Item_1 </div><div class="other"> Item_1 </div></td>
 | 
				
			||||||
 | 
					                <td><div><span class=""> 11M </span></div></td>
 | 
				
			||||||
 | 
					                <td><div><span class="negative"> (22M) </span></div></td>
 | 
				
			||||||
 | 
					                <td><div> <div data-chart-data="11000000.0,-22000000.0"><div></div></td>
 | 
				
			||||||
 | 
					            </tr>
 | 
				
			||||||
 | 
					            <tr>
 | 
				
			||||||
 | 
					                <td><div> Item_2 </div><div class="other"> Item_2 </div></td>
 | 
				
			||||||
 | 
					                <td><div><span class="positive"> 12% </span></div></td>
 | 
				
			||||||
 | 
					                <td><div><span class="negative"> 13% </span></div></td>
 | 
				
			||||||
 | 
					                <td><div> <div data-chart-data="0.12bazbazbaz,-0.13bazbazbaz"> <div></div></td>
 | 
				
			||||||
 | 
					            </tr>
 | 
				
			||||||
 | 
					        </tbody>
 | 
				
			||||||
 | 
					    </table></div>
 | 
				
			||||||
 | 
					</body>
 | 
				
			||||||
 | 
					</html>
 | 
				
			||||||
		Reference in New Issue
	
	Block a user