added function `gather_in_batches`

This commit is contained in:
Daniil Fajnberg 2021-11-30 15:20:27 +01:00
parent 55ff24e180
commit 406828eaca
3 changed files with 38 additions and 2 deletions

View File

@ -2,6 +2,12 @@
Stuff I frequently use in various unrelated projects that deal with web requests.
## Functions
### gather_in_batches
Makes it more convenient to run awaitable objects in concurrent batches.
## Decorators
### in_async_session

View File

@ -1,6 +1,6 @@
[metadata]
name = webutils-df
version = 0.0.2
version = 0.0.3
author = Daniil F.
author_email = mail@placeholder123.to
description = Miscellaneous web utilities

View File

@ -1,7 +1,8 @@
import logging
import asyncio
from functools import wraps
from inspect import signature
from typing import Callable, Dict, Tuple, Any
from typing import Callable, Awaitable, Dict, Tuple, Any
from aiohttp.client import ClientSession
@ -83,3 +84,32 @@ def in_async_session(_func: Callable = None, *,
logger.debug("Temporary `ClientSession` closed")
return wrapper
return decorator if _func is None else decorator(_func)
async def gather_in_batches(batch_size: int, *aws: Awaitable, return_exceptions: bool = False) -> list:
"""
Simple extension of the `asyncio.gather` function to make it easy to run awaitable objects in concurrent batches.
(see: https://docs.python.org/3/library/asyncio-task.html#asyncio.gather)
A batch is run concurrently using `gather`, while the calls to `gather` for each batch are done sequentially.
This can be useful if for example there is a very large number of requests to a single website to be made
and you want to make use of concurrency, but not by performing all of them at the same time.
Args:
batch_size:
The maximum number of awaitable objects to run concurrently at any given moment.
If this is higher than the number of awaitable objects, this is equivalent to a single `gather` call.
Every batch will have this number of concurrent runs, but the last batch can obviously be smaller.
aws:
The awaitable objects/coroutines; subsets of these are passed to `gather` always maintaining overall order
return_exceptions (optional):
Passed into each `gather` call
Returns:
The aggregate list of returned values for all awaitable objects.
The order of result values corresponds to the order of `aws`.
"""
results = []
for idx in range(0, len(aws), batch_size):
results.extend(await asyncio.gather(*aws[idx:idx + batch_size], return_exceptions=return_exceptions))
return results