first draft (proof of concept) for getting data from single row/company
This commit is contained in:
		@@ -0,0 +1,30 @@
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from requests import get
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
from bs4.element import Tag
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
row_type = tuple[str, str, str, str, str]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_row_data(table_row: Tag) -> row_type:
 | 
			
		||||
    tds = table_row.find_all('td')
 | 
			
		||||
    company_name = str(tds[0].a.contents[0]).strip()
 | 
			
		||||
    stock_symbol = str(tds[0].a.contents[1].contents[0]).strip()
 | 
			
		||||
    stock_symbol_pattern = re.compile(r'\((\w+)\)')
 | 
			
		||||
    stock_symbol = re.search(stock_symbol_pattern, stock_symbol).group(1)
 | 
			
		||||
    country = str(tds[1].contents[0])
 | 
			
		||||
    exchange = str(tds[2].contents[0])
 | 
			
		||||
    sector = str(tds[3].contents[0])
 | 
			
		||||
    return company_name, stock_symbol, country, exchange, sector
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    response = get('https://www.marketwatch.com/tools/markets/stocks/a-z/0-9')
 | 
			
		||||
    soup = BeautifulSoup(response.text, 'html.parser')
 | 
			
		||||
    trs = soup.find('div', {'id': 'marketsindex'}).table.tbody.find_all('tr')
 | 
			
		||||
    print(get_row_data(trs[0]))
 | 
			
		||||
    print(get_row_data(trs[1]))
 | 
			
		||||
    print(get_row_data(trs[149]))
 | 
			
		||||
    data: list[row_type] = []
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user