90 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			90 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import logging
 | |
| from unittest import IsolatedAsyncioTestCase
 | |
| from unittest.mock import patch, MagicMock, AsyncMock, call
 | |
| 
 | |
| from bs4 import BeautifulSoup
 | |
| 
 | |
| from stocksymbolscraper import scrape
 | |
| 
 | |
| 
 | |
| class ScrapeTestCase(IsolatedAsyncioTestCase):
 | |
| 
 | |
|     @patch.object(scrape, 'get_single_tr_data')
 | |
|     def test_extract_row_data(self, mock_get_single_tr_data: MagicMock):
 | |
|         foo = 'foo'
 | |
|         mock_get_single_tr_data.return_value = foo
 | |
|         input1, input2, input3 = MagicMock(), MagicMock(), MagicMock()
 | |
|         # Although the function expects BS4 Tag objects as arguments, we substitute with Mocks here
 | |
|         # because those arguments are immediately passed into another function, which we mock out anyway.
 | |
|         output = scrape.extract_row_data(input1, input2, input3)
 | |
|         expected_output = [foo, foo, foo]
 | |
|         self.assertListEqual(expected_output, output)
 | |
|         mock_get_single_tr_data.assert_has_calls([call(input1), call(input2), call(input3)])
 | |
| 
 | |
|     @patch.object(scrape, 'get_str_from_td')
 | |
|     def test_get_single_tr_data(self, mock_get_str_from_td: MagicMock):
 | |
|         a, b, x = 'a', 'b', 'x'
 | |
|         mock_get_str_from_td.return_value = x
 | |
|         test_html = f'<tr> <td><a>{a}<small>({b})</small></a></td>' + \
 | |
|                     f'<td>foo</td> <td>bar</td> <td>baz</td> </tr>'
 | |
|         test_tr = BeautifulSoup(test_html, scrape.HTML_PARSER).tr
 | |
|         test_tds = test_tr.find_all('td')
 | |
|         output = scrape.get_single_tr_data(test_tr)
 | |
|         expected_output = (a, b, x, x, x)
 | |
|         self.assertTupleEqual(expected_output, output)
 | |
|         mock_get_str_from_td.assert_has_calls([call(test_tds[1]), call(test_tds[2]), call(test_tds[3])])
 | |
| 
 | |
|         test_html = f'<tr> <td><a>{a}<small>***{b}***</small></a></td>' + \
 | |
|                     f'<td>foo</td> <td>bar</td> <td>baz</td> </tr>'
 | |
|         test_tr = BeautifulSoup(test_html, scrape.HTML_PARSER).tr
 | |
|         test_tds = test_tr.find_all('td')
 | |
|         output = scrape.get_single_tr_data(test_tr)
 | |
|         expected_output = (a, f'***{b}***', x, x, x)
 | |
|         self.assertTupleEqual(expected_output, output)
 | |
|         mock_get_str_from_td.assert_has_calls([call(test_tds[1]), call(test_tds[2]), call(test_tds[3])])
 | |
| 
 | |
|     def test_get_str_from_td(self):
 | |
|         expected_output = 'foo bar'
 | |
|         test_td = BeautifulSoup(f'<td>   {expected_output} </td>', scrape.HTML_PARSER).td
 | |
|         output = scrape.get_str_from_td(test_td)
 | |
|         self.assertEqual(expected_output, output)
 | |
| 
 | |
|         expected_output = ''
 | |
|         test_td = BeautifulSoup('<td></td>', scrape.HTML_PARSER).td
 | |
|         output = scrape.get_str_from_td(test_td)
 | |
|         self.assertEqual(expected_output, output)
 | |
| 
 | |
|     @patch.object(scrape, 'ClientSession')
 | |
|     async def test_soup_from_url(self, mock_session_cls):
 | |
|         test_html = '<b>foo</b>'
 | |
|         mock_response = MagicMock()
 | |
|         mock_response.text = AsyncMock(return_value=test_html)
 | |
|         mock_get_return = MagicMock()
 | |
|         mock_get_return.__aenter__ = AsyncMock(return_value=mock_response)
 | |
|         mock_session_obj = MagicMock()
 | |
|         mock_session_obj.get = MagicMock(return_value=mock_get_return)
 | |
|         mock_session_cls.return_value = mock_session_obj
 | |
|         output = await scrape.soup_from_url('foo')
 | |
|         expected_output = BeautifulSoup(test_html, scrape.HTML_PARSER)
 | |
|         self.assertEqual(expected_output, output)
 | |
| 
 | |
|         output = await scrape.soup_from_url('foo', mock_session_obj)
 | |
|         self.assertEqual(expected_output, output)
 | |
| 
 | |
|     def test_trs_from_page(self):
 | |
|         tr1_text, tr2_text = '<tr>foo</tr>', '<tr>bar</tr>'
 | |
|         test_html = f'<div id="marketsindex"><table><tbody>{tr1_text}{tr2_text}</tbody></table></div>'
 | |
|         test_soup = BeautifulSoup(test_html, scrape.HTML_PARSER)
 | |
|         output = scrape.trs_from_page(test_soup)
 | |
|         expected_output = test_soup.find_all('tr')
 | |
|         self.assertSequenceEqual(expected_output, output)
 | |
| 
 | |
|         logging.disable(logging.CRITICAL)
 | |
|         test_html = f'<div id="marketsindex"><table>garbage</table></div>'
 | |
|         test_soup = BeautifulSoup(test_html, scrape.HTML_PARSER)
 | |
|         with patch.object(scrape, 'open') as mock_open:
 | |
|             self.assertRaises(scrape.UnexpectedMarkupError, scrape.trs_from_page, test_soup)
 | |
|             mock_open.assert_called_once()
 | |
|             mock_open.return_value.__enter__.return_value.write.assert_called_once_with(test_soup.prettify())
 | |
|         logging.disable(logging.NOTSET)
 |