unittest for 'get_single_tr_data'; added 'html.parser' constant to 'scrape.py'

This commit is contained in:
Maximilian Fajnberg 2021-11-13 18:57:13 +01:00
parent 9a9ee461b1
commit fa19f557c9
2 changed files with 32 additions and 3 deletions

View File

@ -25,6 +25,7 @@ DIGIT_CATEGORY = '0-9'
OTHER_CATEGORY = 'Other'
CATEGORIES = [DIGIT_CATEGORY] + list(ascii_uppercase) + [OTHER_CATEGORY]
STOCK_SYMBOL_PATTERN = re.compile(r'\(([\w.&]+)\)')
HTML_PARSER = 'html.parser'
class UnexpectedMarkupError(Exception):
@ -113,7 +114,7 @@ async def trs_from_page(url: str, session: ClientSession = None, limit: int = No
session = ClientSession()
async with session.get(url) as response:
html = await response.text()
soup = BeautifulSoup(html, 'html.parser')
soup = BeautifulSoup(html, HTML_PARSER)
try:
return soup.find('div', {'id': 'marketsindex'}).table.tbody.find_all('tr', limit=limit)
except AttributeError:

View File

@ -20,13 +20,41 @@ class ScrapeTestCase(TestCase):
self.assertListEqual(expected_output, output)
mock_get_single_tr_data.assert_has_calls([call(input1), call(input2), call(input3)])
@patch.object(scrape, 'get_str_from_td')
def test_get_single_tr_data(self, mock_get_str_from_td: MagicMock):
a, b, x = 'a', 'b', 'x'
mock_get_str_from_td.return_value = x
test_html = f'<tr> <td><a>{a}<small>({b})</small></a></td>' + \
f'<td>foo</td> <td>bar</td> <td>baz</td> </tr>'
test_tr = BeautifulSoup(test_html, scrape.HTML_PARSER).tr
test_tds = test_tr.find_all('td')
output = scrape.get_single_tr_data(test_tr)
expected_output = (a, b, x, x, x)
self.assertTupleEqual(expected_output, output)
mock_get_str_from_td.assert_has_calls([call(test_tds[1]), call(test_tds[2]), call(test_tds[3])])
test_html = f'<tr> <td><a>{a}<small>***{b}***</small></a></td>' + \
f'<td>foo</td> <td>bar</td> <td>baz</td> </tr>'
test_tr = BeautifulSoup(test_html, scrape.HTML_PARSER).tr
test_tds = test_tr.find_all('td')
output = scrape.get_single_tr_data(test_tr)
expected_output = (a, f'***{b}***', x, x, x)
self.assertTupleEqual(expected_output, output)
mock_get_str_from_td.assert_has_calls([call(test_tds[1]), call(test_tds[2]), call(test_tds[3])])
def test_get_str_from_td(self):
expected_output = 'foo bar'
test_td = BeautifulSoup(f'<td> {expected_output} </td>', 'html.parser').td
test_td = BeautifulSoup(f'<td> {expected_output} </td>', scrape.HTML_PARSER).td
output = scrape.get_str_from_td(test_td)
self.assertEqual(expected_output, output)
expected_output = ''
test_td = BeautifulSoup('<td></td>', 'html.parser').td
test_td = BeautifulSoup('<td></td>', scrape.HTML_PARSER).td
output = scrape.get_str_from_td(test_td)
self.assertEqual(expected_output, output)
def test_trs_from_page(self):
# Tested function takes URL as argument (GET request is issued)
# HTML to be parsed could be substituted
#
pass