unittest for 'get_single_tr_data'; added 'html.parser' constant to 'scrape.py'

2021-11-13 18:57:13 +01:00
parent 9a9ee461b1
commit fa19f557c9
2 changed files with 32 additions and 3 deletions
@@ -25,6 +25,7 @@ DIGIT_CATEGORY = '0-9'
 OTHER_CATEGORY = 'Other'
 CATEGORIES = [DIGIT_CATEGORY] + list(ascii_uppercase) + [OTHER_CATEGORY]
 STOCK_SYMBOL_PATTERN = re.compile(r'\(([\w.&]+)\)')
 HTML_PARSER = 'html.parser'
 class UnexpectedMarkupError(Exception):
@@ -113,7 +114,7 @@ async def trs_from_page(url: str, session: ClientSession = None, limit: int = No
        session = ClientSession()
    async with session.get(url) as response:
        html = await response.text()
-    soup = BeautifulSoup(html, 'html.parser')
+    soup = BeautifulSoup(html, HTML_PARSER)
    try:
        return soup.find('div', {'id': 'marketsindex'}).table.tbody.find_all('tr', limit=limit)
    except AttributeError:
@@ -20,13 +20,41 @@ class ScrapeTestCase(TestCase):
        self.assertListEqual(expected_output, output)
        mock_get_single_tr_data.assert_has_calls([call(input1), call(input2), call(input3)])
    @patch.object(scrape, 'get_str_from_td')
    def test_get_single_tr_data(self, mock_get_str_from_td: MagicMock):
        a, b, x = 'a', 'b', 'x'
        mock_get_str_from_td.return_value = x
        test_html = f'<tr> <td><a>{a}<small>({b})</small></a></td>' + \
                    f'<td>foo</td> <td>bar</td> <td>baz</td> </tr>'
        test_tr = BeautifulSoup(test_html, scrape.HTML_PARSER).tr
        test_tds = test_tr.find_all('td')
        output = scrape.get_single_tr_data(test_tr)
        expected_output = (a, b, x, x, x)
        self.assertTupleEqual(expected_output, output)
        mock_get_str_from_td.assert_has_calls([call(test_tds[1]), call(test_tds[2]), call(test_tds[3])])
        test_html = f'<tr> <td><a>{a}<small>***{b}***</small></a></td>' + \
                    f'<td>foo</td> <td>bar</td> <td>baz</td> </tr>'
        test_tr = BeautifulSoup(test_html, scrape.HTML_PARSER).tr
        test_tds = test_tr.find_all('td')
        output = scrape.get_single_tr_data(test_tr)
        expected_output = (a, f'***{b}***', x, x, x)
        self.assertTupleEqual(expected_output, output)
        mock_get_str_from_td.assert_has_calls([call(test_tds[1]), call(test_tds[2]), call(test_tds[3])])
    def test_get_str_from_td(self):
        expected_output = 'foo bar'
-        test_td = BeautifulSoup(f'<td>   {expected_output} </td>', 'html.parser').td
+        test_td = BeautifulSoup(f'<td>   {expected_output} </td>', scrape.HTML_PARSER).td
        output = scrape.get_str_from_td(test_td)
        self.assertEqual(expected_output, output)
        expected_output = ''
-        test_td = BeautifulSoup('<td></td>', 'html.parser').td
+        test_td = BeautifulSoup('<td></td>', scrape.HTML_PARSER).td
        output = scrape.get_str_from_td(test_td)
        self.assertEqual(expected_output, output)
    def test_trs_from_page(self):
        # Tested function takes URL as argument (GET request is issued)
        # HTML to be parsed could be substituted
        #
        pass