Compare commits

..

No commits in common. "d9344b1b4b271edfc0d5f75db0187146538d6872" and "19d5d1e3ebab381453f3bff1b4cd8e7a3126667d" have entirely different histories.

6 changed files with 12 additions and 58 deletions

View File

@ -1,10 +0,0 @@
[run]
source = .
branch = true
omit =
.venv/*
[report]
fail_under = 100
show_missing = True
skip_covered = True

4
.gitignore vendored
View File

@ -4,7 +4,3 @@
/.idea/ /.idea/
# Distribution / packaging: # Distribution / packaging:
*.egg-info/ *.egg-info/
# Python cache:
__pycache__/
# Tests:
.coverage

View File

@ -1,3 +0,0 @@
#!/usr/bin/env sh
coverage erase && coverage run -m unittest discover && coverage report

View File

@ -19,7 +19,7 @@ keywords = webscraping, html, markup, dom, scraper, attributes, tags, stocks, fi
package_dir = package_dir =
= src = src
packages = find: packages = find:
python_requires = >=3.8 python_requires = >=3.7
install_requires = install_requires =
beautifulsoup4 beautifulsoup4
aiohttp aiohttp

View File

@ -8,7 +8,7 @@ from pathlib import Path
from . import get_all_data, log from . import get_all_data, log
async def main() -> None: def main() -> None:
parser = ArgumentParser(description="Scrape all stock symbols") parser = ArgumentParser(description="Scrape all stock symbols")
parser.add_argument( parser.add_argument(
'-v', '--verbose', '-v', '--verbose',
@ -29,7 +29,7 @@ async def main() -> None:
if args.verbose: if args.verbose:
log.setLevel(logging.DEBUG) log.setLevel(logging.DEBUG)
data = await get_all_data(args.sequential) data = asyncio.run(get_all_data(args.sequential))
if args.to_file is None: if args.to_file is None:
csv.writer(sys.stdout).writerows(data) csv.writer(sys.stdout).writerows(data)
@ -39,4 +39,4 @@ async def main() -> None:
if __name__ == '__main__': if __name__ == '__main__':
asyncio.run(main()) main()

View File

@ -1,13 +1,12 @@
import logging from unittest import TestCase
from unittest import IsolatedAsyncioTestCase from unittest.mock import patch, MagicMock, call
from unittest.mock import patch, MagicMock, AsyncMock, call
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from stocksymbolscraper import scrape from stocksymbolscraper import scrape
class ScrapeTestCase(IsolatedAsyncioTestCase): class ScrapeTestCase(TestCase):
@patch.object(scrape, 'get_single_tr_data') @patch.object(scrape, 'get_single_tr_data')
def test_extract_row_data(self, mock_get_single_tr_data: MagicMock): def test_extract_row_data(self, mock_get_single_tr_data: MagicMock):
@ -54,36 +53,8 @@ class ScrapeTestCase(IsolatedAsyncioTestCase):
output = scrape.get_str_from_td(test_td) output = scrape.get_str_from_td(test_td)
self.assertEqual(expected_output, output) self.assertEqual(expected_output, output)
@patch.object(scrape, 'ClientSession')
async def test_soup_from_url(self, mock_session_cls):
test_html = '<b>foo</b>'
mock_response = MagicMock()
mock_response.text = AsyncMock(return_value=test_html)
mock_get_return = MagicMock()
mock_get_return.__aenter__ = AsyncMock(return_value=mock_response)
mock_session_obj = MagicMock()
mock_session_obj.get = MagicMock(return_value=mock_get_return)
mock_session_cls.return_value = mock_session_obj
output = await scrape.soup_from_url('foo')
expected_output = BeautifulSoup(test_html, scrape.HTML_PARSER)
self.assertEqual(expected_output, output)
output = await scrape.soup_from_url('foo', mock_session_obj)
self.assertEqual(expected_output, output)
def test_trs_from_page(self): def test_trs_from_page(self):
tr1_text, tr2_text = '<tr>foo</tr>', '<tr>bar</tr>' # Tested function takes URL as argument (GET request is issued)
test_html = f'<div id="marketsindex"><table><tbody>{tr1_text}{tr2_text}</tbody></table></div>' # HTML to be parsed could be substituted
test_soup = BeautifulSoup(test_html, scrape.HTML_PARSER) #
output = scrape.trs_from_page(test_soup) pass
expected_output = test_soup.find_all('tr')
self.assertSequenceEqual(expected_output, output)
logging.disable(logging.CRITICAL)
test_html = f'<div id="marketsindex"><table>garbage</table></div>'
test_soup = BeautifulSoup(test_html, scrape.HTML_PARSER)
with patch.object(scrape, 'open') as mock_open:
self.assertRaises(scrape.UnexpectedMarkupError, scrape.trs_from_page, test_soup)
mock_open.assert_called_once()
mock_open.return_value.__enter__.return_value.write.assert_called_once_with(test_soup.prettify())
logging.disable(logging.NOTSET)