diff --git a/src/stocksymbolscraper/scrape.py b/src/stocksymbolscraper/scrape.py
index 5421f53..b40f5cf 100644
--- a/src/stocksymbolscraper/scrape.py
+++ b/src/stocksymbolscraper/scrape.py
@@ -25,6 +25,7 @@ DIGIT_CATEGORY = '0-9'
OTHER_CATEGORY = 'Other'
CATEGORIES = [DIGIT_CATEGORY] + list(ascii_uppercase) + [OTHER_CATEGORY]
STOCK_SYMBOL_PATTERN = re.compile(r'\(([\w.&]+)\)')
+HTML_PARSER = 'html.parser'
class UnexpectedMarkupError(Exception):
@@ -113,7 +114,7 @@ async def trs_from_page(url: str, session: ClientSession = None, limit: int = No
session = ClientSession()
async with session.get(url) as response:
html = await response.text()
- soup = BeautifulSoup(html, 'html.parser')
+ soup = BeautifulSoup(html, HTML_PARSER)
try:
return soup.find('div', {'id': 'marketsindex'}).table.tbody.find_all('tr', limit=limit)
except AttributeError:
diff --git a/tests/test_scrape.py b/tests/test_scrape.py
index e8d2029..9ad1e38 100644
--- a/tests/test_scrape.py
+++ b/tests/test_scrape.py
@@ -20,13 +20,41 @@ class ScrapeTestCase(TestCase):
self.assertListEqual(expected_output, output)
mock_get_single_tr_data.assert_has_calls([call(input1), call(input2), call(input3)])
+ @patch.object(scrape, 'get_str_from_td')
+ def test_get_single_tr_data(self, mock_get_str_from_td: MagicMock):
+ a, b, x = 'a', 'b', 'x'
+ mock_get_str_from_td.return_value = x
+ test_html = f'
{a}({b}) | ' + \
+ f'foo | bar | baz |
'
+ test_tr = BeautifulSoup(test_html, scrape.HTML_PARSER).tr
+ test_tds = test_tr.find_all('td')
+ output = scrape.get_single_tr_data(test_tr)
+ expected_output = (a, b, x, x, x)
+ self.assertTupleEqual(expected_output, output)
+ mock_get_str_from_td.assert_has_calls([call(test_tds[1]), call(test_tds[2]), call(test_tds[3])])
+
+ test_html = f' {a}***{b}*** | ' + \
+ f'foo | bar | baz |
'
+ test_tr = BeautifulSoup(test_html, scrape.HTML_PARSER).tr
+ test_tds = test_tr.find_all('td')
+ output = scrape.get_single_tr_data(test_tr)
+ expected_output = (a, f'***{b}***', x, x, x)
+ self.assertTupleEqual(expected_output, output)
+ mock_get_str_from_td.assert_has_calls([call(test_tds[1]), call(test_tds[2]), call(test_tds[3])])
+
def test_get_str_from_td(self):
expected_output = 'foo bar'
- test_td = BeautifulSoup(f' {expected_output} | ', 'html.parser').td
+ test_td = BeautifulSoup(f' {expected_output} | ', scrape.HTML_PARSER).td
output = scrape.get_str_from_td(test_td)
self.assertEqual(expected_output, output)
expected_output = ''
- test_td = BeautifulSoup(' | ', 'html.parser').td
+ test_td = BeautifulSoup(' | ', scrape.HTML_PARSER).td
output = scrape.get_str_from_td(test_td)
self.assertEqual(expected_output, output)
+
+ def test_trs_from_page(self):
+ # Tested function takes URL as argument (GET request is issued)
+ # HTML to be parsed could be substituted
+ #
+ pass