unittest for 'get_single_tr_data'; added 'html.parser' constant to 'scrape.py'
This commit is contained in:
parent
9a9ee461b1
commit
fa19f557c9
@ -25,6 +25,7 @@ DIGIT_CATEGORY = '0-9'
|
||||
OTHER_CATEGORY = 'Other'
|
||||
CATEGORIES = [DIGIT_CATEGORY] + list(ascii_uppercase) + [OTHER_CATEGORY]
|
||||
STOCK_SYMBOL_PATTERN = re.compile(r'\(([\w.&]+)\)')
|
||||
HTML_PARSER = 'html.parser'
|
||||
|
||||
|
||||
class UnexpectedMarkupError(Exception):
|
||||
@ -113,7 +114,7 @@ async def trs_from_page(url: str, session: ClientSession = None, limit: int = No
|
||||
session = ClientSession()
|
||||
async with session.get(url) as response:
|
||||
html = await response.text()
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
soup = BeautifulSoup(html, HTML_PARSER)
|
||||
try:
|
||||
return soup.find('div', {'id': 'marketsindex'}).table.tbody.find_all('tr', limit=limit)
|
||||
except AttributeError:
|
||||
|
@ -20,13 +20,41 @@ class ScrapeTestCase(TestCase):
|
||||
self.assertListEqual(expected_output, output)
|
||||
mock_get_single_tr_data.assert_has_calls([call(input1), call(input2), call(input3)])
|
||||
|
||||
@patch.object(scrape, 'get_str_from_td')
|
||||
def test_get_single_tr_data(self, mock_get_str_from_td: MagicMock):
|
||||
a, b, x = 'a', 'b', 'x'
|
||||
mock_get_str_from_td.return_value = x
|
||||
test_html = f'<tr> <td><a>{a}<small>({b})</small></a></td>' + \
|
||||
f'<td>foo</td> <td>bar</td> <td>baz</td> </tr>'
|
||||
test_tr = BeautifulSoup(test_html, scrape.HTML_PARSER).tr
|
||||
test_tds = test_tr.find_all('td')
|
||||
output = scrape.get_single_tr_data(test_tr)
|
||||
expected_output = (a, b, x, x, x)
|
||||
self.assertTupleEqual(expected_output, output)
|
||||
mock_get_str_from_td.assert_has_calls([call(test_tds[1]), call(test_tds[2]), call(test_tds[3])])
|
||||
|
||||
test_html = f'<tr> <td><a>{a}<small>***{b}***</small></a></td>' + \
|
||||
f'<td>foo</td> <td>bar</td> <td>baz</td> </tr>'
|
||||
test_tr = BeautifulSoup(test_html, scrape.HTML_PARSER).tr
|
||||
test_tds = test_tr.find_all('td')
|
||||
output = scrape.get_single_tr_data(test_tr)
|
||||
expected_output = (a, f'***{b}***', x, x, x)
|
||||
self.assertTupleEqual(expected_output, output)
|
||||
mock_get_str_from_td.assert_has_calls([call(test_tds[1]), call(test_tds[2]), call(test_tds[3])])
|
||||
|
||||
def test_get_str_from_td(self):
|
||||
expected_output = 'foo bar'
|
||||
test_td = BeautifulSoup(f'<td> {expected_output} </td>', 'html.parser').td
|
||||
test_td = BeautifulSoup(f'<td> {expected_output} </td>', scrape.HTML_PARSER).td
|
||||
output = scrape.get_str_from_td(test_td)
|
||||
self.assertEqual(expected_output, output)
|
||||
|
||||
expected_output = ''
|
||||
test_td = BeautifulSoup('<td></td>', 'html.parser').td
|
||||
test_td = BeautifulSoup('<td></td>', scrape.HTML_PARSER).td
|
||||
output = scrape.get_str_from_td(test_td)
|
||||
self.assertEqual(expected_output, output)
|
||||
|
||||
def test_trs_from_page(self):
|
||||
# Tested function takes URL as argument (GET request is issued)
|
||||
# HTML to be parsed could be substituted
|
||||
#
|
||||
pass
|
||||
|
Loading…
Reference in New Issue
Block a user