diff --git a/src/stocksymbolscraper/scrape.py b/src/stocksymbolscraper/scrape.py index 5421f53..b40f5cf 100644 --- a/src/stocksymbolscraper/scrape.py +++ b/src/stocksymbolscraper/scrape.py @@ -25,6 +25,7 @@ DIGIT_CATEGORY = '0-9' OTHER_CATEGORY = 'Other' CATEGORIES = [DIGIT_CATEGORY] + list(ascii_uppercase) + [OTHER_CATEGORY] STOCK_SYMBOL_PATTERN = re.compile(r'\(([\w.&]+)\)') +HTML_PARSER = 'html.parser' class UnexpectedMarkupError(Exception): @@ -113,7 +114,7 @@ async def trs_from_page(url: str, session: ClientSession = None, limit: int = No session = ClientSession() async with session.get(url) as response: html = await response.text() - soup = BeautifulSoup(html, 'html.parser') + soup = BeautifulSoup(html, HTML_PARSER) try: return soup.find('div', {'id': 'marketsindex'}).table.tbody.find_all('tr', limit=limit) except AttributeError: diff --git a/tests/test_scrape.py b/tests/test_scrape.py index e8d2029..9ad1e38 100644 --- a/tests/test_scrape.py +++ b/tests/test_scrape.py @@ -20,13 +20,41 @@ class ScrapeTestCase(TestCase): self.assertListEqual(expected_output, output) mock_get_single_tr_data.assert_has_calls([call(input1), call(input2), call(input3)]) + @patch.object(scrape, 'get_str_from_td') + def test_get_single_tr_data(self, mock_get_str_from_td: MagicMock): + a, b, x = 'a', 'b', 'x' + mock_get_str_from_td.return_value = x + test_html = f' {a}({b})' + \ + f'foo bar baz ' + test_tr = BeautifulSoup(test_html, scrape.HTML_PARSER).tr + test_tds = test_tr.find_all('td') + output = scrape.get_single_tr_data(test_tr) + expected_output = (a, b, x, x, x) + self.assertTupleEqual(expected_output, output) + mock_get_str_from_td.assert_has_calls([call(test_tds[1]), call(test_tds[2]), call(test_tds[3])]) + + test_html = f' {a}***{b}***' + \ + f'foo bar baz ' + test_tr = BeautifulSoup(test_html, scrape.HTML_PARSER).tr + test_tds = test_tr.find_all('td') + output = scrape.get_single_tr_data(test_tr) + expected_output = (a, f'***{b}***', x, x, x) + self.assertTupleEqual(expected_output, output) + mock_get_str_from_td.assert_has_calls([call(test_tds[1]), call(test_tds[2]), call(test_tds[3])]) + def test_get_str_from_td(self): expected_output = 'foo bar' - test_td = BeautifulSoup(f' {expected_output} ', 'html.parser').td + test_td = BeautifulSoup(f' {expected_output} ', scrape.HTML_PARSER).td output = scrape.get_str_from_td(test_td) self.assertEqual(expected_output, output) expected_output = '' - test_td = BeautifulSoup('', 'html.parser').td + test_td = BeautifulSoup('', scrape.HTML_PARSER).td output = scrape.get_str_from_td(test_td) self.assertEqual(expected_output, output) + + def test_trs_from_page(self): + # Tested function takes URL as argument (GET request is issued) + # HTML to be parsed could be substituted + # + pass