import logging from unittest import IsolatedAsyncioTestCase from unittest.mock import patch, MagicMock, AsyncMock, call from bs4 import BeautifulSoup from stocksymbolscraper import scrape class ScrapeTestCase(IsolatedAsyncioTestCase): @patch.object(scrape, 'get_single_tr_data') def test_extract_row_data(self, mock_get_single_tr_data: MagicMock): foo = 'foo' mock_get_single_tr_data.return_value = foo input1, input2, input3 = MagicMock(), MagicMock(), MagicMock() # Although the function expects BS4 Tag objects as arguments, we substitute with Mocks here # because those arguments are immediately passed into another function, which we mock out anyway. output = scrape.extract_row_data(input1, input2, input3) expected_output = [foo, foo, foo] self.assertListEqual(expected_output, output) mock_get_single_tr_data.assert_has_calls([call(input1), call(input2), call(input3)]) @patch.object(scrape, 'get_str_from_td') def test_get_single_tr_data(self, mock_get_str_from_td: MagicMock): a, b, x = 'a', 'b', 'x' mock_get_str_from_td.return_value = x test_html = f' {a}({b})' + \ f'foo bar baz ' test_tr = BeautifulSoup(test_html, scrape.HTML_PARSER).tr test_tds = test_tr.find_all('td') output = scrape.get_single_tr_data(test_tr) expected_output = (a, b, x, x, x) self.assertTupleEqual(expected_output, output) mock_get_str_from_td.assert_has_calls([call(test_tds[1]), call(test_tds[2]), call(test_tds[3])]) test_html = f' {a}***{b}***' + \ f'foo bar baz ' test_tr = BeautifulSoup(test_html, scrape.HTML_PARSER).tr test_tds = test_tr.find_all('td') output = scrape.get_single_tr_data(test_tr) expected_output = (a, f'***{b}***', x, x, x) self.assertTupleEqual(expected_output, output) mock_get_str_from_td.assert_has_calls([call(test_tds[1]), call(test_tds[2]), call(test_tds[3])]) def test_get_str_from_td(self): expected_output = 'foo bar' test_td = BeautifulSoup(f' {expected_output} ', scrape.HTML_PARSER).td output = scrape.get_str_from_td(test_td) self.assertEqual(expected_output, output) expected_output = '' test_td = BeautifulSoup('', scrape.HTML_PARSER).td output = scrape.get_str_from_td(test_td) self.assertEqual(expected_output, output) @patch.object(scrape, 'ClientSession') async def test_soup_from_url(self, mock_session_cls): test_html = 'foo' mock_response = MagicMock() mock_response.text = AsyncMock(return_value=test_html) mock_get_return = MagicMock() mock_get_return.__aenter__ = AsyncMock(return_value=mock_response) mock_session_obj = MagicMock() mock_session_obj.get = MagicMock(return_value=mock_get_return) mock_session_cls.return_value = mock_session_obj output = await scrape.soup_from_url('foo') expected_output = BeautifulSoup(test_html, scrape.HTML_PARSER) self.assertEqual(expected_output, output) output = await scrape.soup_from_url('foo', mock_session_obj) self.assertEqual(expected_output, output) def test_trs_from_page(self): tr1_text, tr2_text = 'foo', 'bar' test_html = f'
{tr1_text}{tr2_text}
' test_soup = BeautifulSoup(test_html, scrape.HTML_PARSER) output = scrape.trs_from_page(test_soup) expected_output = test_soup.find_all('tr') self.assertSequenceEqual(expected_output, output) logging.disable(logging.CRITICAL) test_html = f'
garbage
' test_soup = BeautifulSoup(test_html, scrape.HTML_PARSER) with patch.object(scrape, 'open') as mock_open: self.assertRaises(scrape.UnexpectedMarkupError, scrape.trs_from_page, test_soup) mock_open.assert_called_once() mock_open.return_value.__enter__.return_value.write.assert_called_once_with(test_soup.prettify()) logging.disable(logging.NOTSET)