only use one http session in the main function
This commit is contained in:
		@@ -61,8 +61,9 @@ def get_str_from_td(td: Tag) -> str:
 | 
			
		||||
    return str(content).strip()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def all_trs_from_page(url: str) -> ResultSet:
 | 
			
		||||
    async with ClientSession() as session:
 | 
			
		||||
async def all_trs_from_page(url: str, session: ClientSession = None) -> ResultSet:
 | 
			
		||||
    if session is None:
 | 
			
		||||
        session = ClientSession()
 | 
			
		||||
    async with session.get(url) as response:
 | 
			
		||||
        html = await response.text()
 | 
			
		||||
    soup = BeautifulSoup(html, 'html.parser')
 | 
			
		||||
@@ -76,24 +77,27 @@ async def all_trs_from_page(url: str) -> ResultSet:
 | 
			
		||||
        raise UnexpectedMarkupError
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def all_data_from_category(category: str) -> list[row_type]:
 | 
			
		||||
async def all_data_from_category(category: str, session: ClientSession = None) -> list[row_type]:
 | 
			
		||||
    log.info(f"Getting companies starting with '{category}'")
 | 
			
		||||
    if session is None:
 | 
			
		||||
        session = ClientSession()
 | 
			
		||||
    data: list[row_type] = []
 | 
			
		||||
    page = 1
 | 
			
		||||
    trs = await all_trs_from_page(f'{BASE_URL}{category}')
 | 
			
		||||
    trs = await all_trs_from_page(f'{BASE_URL}{category}', session)
 | 
			
		||||
    while len(trs) > 0:
 | 
			
		||||
        log.info(f"Scraping page {page}")
 | 
			
		||||
        log.info(f"Scraping '{category}' page {page}")
 | 
			
		||||
        data.extend(data_from_rows(trs))
 | 
			
		||||
        page += 1
 | 
			
		||||
        trs = await all_trs_from_page(f'{BASE_URL}{category}/{page}')
 | 
			
		||||
        trs = await all_trs_from_page(f'{BASE_URL}{category}/{page}', session)
 | 
			
		||||
    return data
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def get_all_data(asynchronous: bool = False) -> list[row_type]:
 | 
			
		||||
    async with ClientSession() as session:
 | 
			
		||||
        if asynchronous:
 | 
			
		||||
        results = await asyncio.gather(*(all_data_from_category(category) for category in CATEGORIES))
 | 
			
		||||
            results = await asyncio.gather(*(all_data_from_category(category, session) for category in CATEGORIES))
 | 
			
		||||
        else:
 | 
			
		||||
        results = [await all_data_from_category(category) for category in CATEGORIES]
 | 
			
		||||
            results = [await all_data_from_category(category, session) for category in CATEGORIES]
 | 
			
		||||
    data = []
 | 
			
		||||
    for result in results:
 | 
			
		||||
        data.extend(result)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user