From 29b37c756d8f94e6d75f49e01565c3deee13833f Mon Sep 17 00:00:00 2001 From: Daniil Fajnberg Date: Mon, 8 Nov 2021 18:40:34 +0100 Subject: [PATCH] first draft (proof of concept) for getting data from single row/company --- src/stock-symbol-scraper/main.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/stock-symbol-scraper/main.py b/src/stock-symbol-scraper/main.py index e69de29..7a30ab6 100644 --- a/src/stock-symbol-scraper/main.py +++ b/src/stock-symbol-scraper/main.py @@ -0,0 +1,30 @@ +import re + +from requests import get +from bs4 import BeautifulSoup +from bs4.element import Tag + + +row_type = tuple[str, str, str, str, str] + + +def get_row_data(table_row: Tag) -> row_type: + tds = table_row.find_all('td') + company_name = str(tds[0].a.contents[0]).strip() + stock_symbol = str(tds[0].a.contents[1].contents[0]).strip() + stock_symbol_pattern = re.compile(r'\((\w+)\)') + stock_symbol = re.search(stock_symbol_pattern, stock_symbol).group(1) + country = str(tds[1].contents[0]) + exchange = str(tds[2].contents[0]) + sector = str(tds[3].contents[0]) + return company_name, stock_symbol, country, exchange, sector + + +if __name__ == '__main__': + response = get('https://www.marketwatch.com/tools/markets/stocks/a-z/0-9') + soup = BeautifulSoup(response.text, 'html.parser') + trs = soup.find('div', {'id': 'marketsindex'}).table.tbody.find_all('tr') + print(get_row_data(trs[0])) + print(get_row_data(trs[1])) + print(get_row_data(trs[149])) + data: list[row_type] = []