unittest for 'get_single_tr_data'; added 'html.parser' constant to 'scrape.py'
This commit is contained in:
parent
9a9ee461b1
commit
fa19f557c9
@ -25,6 +25,7 @@ DIGIT_CATEGORY = '0-9'
|
|||||||
OTHER_CATEGORY = 'Other'
|
OTHER_CATEGORY = 'Other'
|
||||||
CATEGORIES = [DIGIT_CATEGORY] + list(ascii_uppercase) + [OTHER_CATEGORY]
|
CATEGORIES = [DIGIT_CATEGORY] + list(ascii_uppercase) + [OTHER_CATEGORY]
|
||||||
STOCK_SYMBOL_PATTERN = re.compile(r'\(([\w.&]+)\)')
|
STOCK_SYMBOL_PATTERN = re.compile(r'\(([\w.&]+)\)')
|
||||||
|
HTML_PARSER = 'html.parser'
|
||||||
|
|
||||||
|
|
||||||
class UnexpectedMarkupError(Exception):
|
class UnexpectedMarkupError(Exception):
|
||||||
@ -113,7 +114,7 @@ async def trs_from_page(url: str, session: ClientSession = None, limit: int = No
|
|||||||
session = ClientSession()
|
session = ClientSession()
|
||||||
async with session.get(url) as response:
|
async with session.get(url) as response:
|
||||||
html = await response.text()
|
html = await response.text()
|
||||||
soup = BeautifulSoup(html, 'html.parser')
|
soup = BeautifulSoup(html, HTML_PARSER)
|
||||||
try:
|
try:
|
||||||
return soup.find('div', {'id': 'marketsindex'}).table.tbody.find_all('tr', limit=limit)
|
return soup.find('div', {'id': 'marketsindex'}).table.tbody.find_all('tr', limit=limit)
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
|
@ -20,13 +20,41 @@ class ScrapeTestCase(TestCase):
|
|||||||
self.assertListEqual(expected_output, output)
|
self.assertListEqual(expected_output, output)
|
||||||
mock_get_single_tr_data.assert_has_calls([call(input1), call(input2), call(input3)])
|
mock_get_single_tr_data.assert_has_calls([call(input1), call(input2), call(input3)])
|
||||||
|
|
||||||
|
@patch.object(scrape, 'get_str_from_td')
|
||||||
|
def test_get_single_tr_data(self, mock_get_str_from_td: MagicMock):
|
||||||
|
a, b, x = 'a', 'b', 'x'
|
||||||
|
mock_get_str_from_td.return_value = x
|
||||||
|
test_html = f'<tr> <td><a>{a}<small>({b})</small></a></td>' + \
|
||||||
|
f'<td>foo</td> <td>bar</td> <td>baz</td> </tr>'
|
||||||
|
test_tr = BeautifulSoup(test_html, scrape.HTML_PARSER).tr
|
||||||
|
test_tds = test_tr.find_all('td')
|
||||||
|
output = scrape.get_single_tr_data(test_tr)
|
||||||
|
expected_output = (a, b, x, x, x)
|
||||||
|
self.assertTupleEqual(expected_output, output)
|
||||||
|
mock_get_str_from_td.assert_has_calls([call(test_tds[1]), call(test_tds[2]), call(test_tds[3])])
|
||||||
|
|
||||||
|
test_html = f'<tr> <td><a>{a}<small>***{b}***</small></a></td>' + \
|
||||||
|
f'<td>foo</td> <td>bar</td> <td>baz</td> </tr>'
|
||||||
|
test_tr = BeautifulSoup(test_html, scrape.HTML_PARSER).tr
|
||||||
|
test_tds = test_tr.find_all('td')
|
||||||
|
output = scrape.get_single_tr_data(test_tr)
|
||||||
|
expected_output = (a, f'***{b}***', x, x, x)
|
||||||
|
self.assertTupleEqual(expected_output, output)
|
||||||
|
mock_get_str_from_td.assert_has_calls([call(test_tds[1]), call(test_tds[2]), call(test_tds[3])])
|
||||||
|
|
||||||
def test_get_str_from_td(self):
|
def test_get_str_from_td(self):
|
||||||
expected_output = 'foo bar'
|
expected_output = 'foo bar'
|
||||||
test_td = BeautifulSoup(f'<td> {expected_output} </td>', 'html.parser').td
|
test_td = BeautifulSoup(f'<td> {expected_output} </td>', scrape.HTML_PARSER).td
|
||||||
output = scrape.get_str_from_td(test_td)
|
output = scrape.get_str_from_td(test_td)
|
||||||
self.assertEqual(expected_output, output)
|
self.assertEqual(expected_output, output)
|
||||||
|
|
||||||
expected_output = ''
|
expected_output = ''
|
||||||
test_td = BeautifulSoup('<td></td>', 'html.parser').td
|
test_td = BeautifulSoup('<td></td>', scrape.HTML_PARSER).td
|
||||||
output = scrape.get_str_from_td(test_td)
|
output = scrape.get_str_from_td(test_td)
|
||||||
self.assertEqual(expected_output, output)
|
self.assertEqual(expected_output, output)
|
||||||
|
|
||||||
|
def test_trs_from_page(self):
|
||||||
|
# Tested function takes URL as argument (GET request is issued)
|
||||||
|
# HTML to be parsed could be substituted
|
||||||
|
#
|
||||||
|
pass
|
||||||
|
Loading…
Reference in New Issue
Block a user