renamed the package for import purposes
This commit is contained in:
5
src/webutils_df/__init__.py
Normal file
5
src/webutils_df/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from .util import (
|
||||
in_async_session,
|
||||
attempt,
|
||||
gather_in_batches
|
||||
)
|
||||
176
src/webutils_df/util.py
Normal file
176
src/webutils_df/util.py
Normal file
@@ -0,0 +1,176 @@
|
||||
import logging
|
||||
import asyncio
|
||||
from functools import wraps
|
||||
from inspect import signature
|
||||
from math import inf
|
||||
from timeit import default_timer
|
||||
from typing import Callable, Awaitable, Dict, Tuple, Sequence, Any, Type, Union, TypeVar
|
||||
|
||||
from aiohttp.client import ClientSession
|
||||
|
||||
|
||||
LOGGER_NAME = 'webutils'
|
||||
logger = logging.getLogger(LOGGER_NAME)
|
||||
|
||||
AsyncFunction = TypeVar('AsyncFunction')
|
||||
AttemptCallbackT = Callable[[AsyncFunction, Exception, int, float, tuple, dict], Awaitable[None]]
|
||||
|
||||
|
||||
def _get_param_idx_and_default(function: Callable, param_name: str) -> Tuple[int, Any]:
|
||||
params = signature(function).parameters
|
||||
return list(params.keys()).index(param_name), params[param_name].default
|
||||
|
||||
|
||||
def in_async_session(_func: AsyncFunction = None, *,
|
||||
session_kwargs: Dict[str, Any] = None, session_param_name: str = 'session') -> AsyncFunction:
|
||||
"""
|
||||
Useful decorator for any async function that uses the `aiohttp.ClientSession` to make requests.
|
||||
|
||||
Using this decorator allows the decorated function to have an optional session parameter,
|
||||
without the need to ensure proper initialization and closing of a session within the function body itself.
|
||||
|
||||
The wrapper has no effect, if the default value is passed to the session parameter during the function call,
|
||||
otherwise it initializes a session, passes it into the function and ensures that it is closed in the end.
|
||||
|
||||
The wrapper distinguishes between positional and keyword arguments. This means that if the function is called by
|
||||
passing the default value as a positional argument, the temporary session is injected in its place as a positional
|
||||
argument also; if the default is passed as a keyword argument, the temporary session is injected as such instead.
|
||||
|
||||
Args:
|
||||
_func:
|
||||
If this decorator is used *with any* arguments, this will always be the decorated function itself.
|
||||
This is a trick to allow the decorator to be used with as well as without arguments, i.e. in the form
|
||||
`@in_async_session` or `@in_async_session(...)`.
|
||||
session_kwargs (optional):
|
||||
If passed a dictionary, it will be unpacked and passed as keyword arguments into the `ClientSession`
|
||||
constructor, if and only if the decorator actually handles session initialization/closing,
|
||||
i.e. only when the function is called **without** passing a session object into it.
|
||||
session_param_name (optional):
|
||||
The name of the decorated function's parameter that should be passed the session object as an argument.
|
||||
In case the decorated function's session parameter is named anything other than "session", that name should
|
||||
be provided here.
|
||||
"""
|
||||
if session_kwargs is None:
|
||||
session_kwargs = {}
|
||||
|
||||
def decorator(function: AsyncFunction) -> AsyncFunction:
|
||||
# Using `functools.wraps` to preserve information about the actual function being decorated
|
||||
# More details: https://docs.python.org/3/library/functools.html#functools.wraps
|
||||
@wraps(function)
|
||||
async def wrapper(*args, **kwargs) -> Any:
|
||||
"""The actual function wrapper that may perform the session initialization and closing."""
|
||||
temp_session = None
|
||||
session_param_idx, session_param_default = _get_param_idx_and_default(function, session_param_name)
|
||||
try:
|
||||
session = args[session_param_idx]
|
||||
except IndexError:
|
||||
# If we end up here, this means there was no positional session argument passed.
|
||||
session = kwargs.get(session_param_name)
|
||||
if session == session_param_default:
|
||||
logger.debug(f"Injecting temporary `ClientSession` as keyword argument `{session_param_name}` "
|
||||
f"into `{function.__name__}`")
|
||||
temp_session = ClientSession(**session_kwargs)
|
||||
kwargs[session_param_name] = temp_session
|
||||
else:
|
||||
if session == session_param_default:
|
||||
logger.debug(f"Injecting temporary `ClientSession` as positional argument {session_param_idx} "
|
||||
f"into `{function.__name__}`")
|
||||
temp_session = ClientSession(**session_kwargs)
|
||||
args = list(args)
|
||||
args[session_param_idx] = temp_session
|
||||
try:
|
||||
return await function(*args, **kwargs)
|
||||
finally:
|
||||
if temp_session:
|
||||
await temp_session.close()
|
||||
logger.debug("Temporary `ClientSession` closed")
|
||||
return wrapper
|
||||
return decorator if _func is None else decorator(_func)
|
||||
|
||||
|
||||
def attempt(_func: AsyncFunction = None, *,
|
||||
exception: Union[Type[Exception], Sequence[Type[Exception]]] = Exception,
|
||||
max_attempts: float = inf,
|
||||
timeout_seconds: float = inf,
|
||||
seconds_between: float = 0,
|
||||
callback: AttemptCallbackT = None) -> AsyncFunction:
|
||||
"""
|
||||
Decorator allowing an async function to be called repeatedly, if previous attempts cause specific exceptions.
|
||||
Note: If no limiting arguments are passed to the decorator, the decorated function **will** be called repeatedly in
|
||||
a potentially infinite loop, as long as it keeps throwing an exception.
|
||||
|
||||
Args:
|
||||
_func:
|
||||
Control parameter; allows using the decorator with or without arguments.
|
||||
If this decorator is used *with any* arguments, this will always be the decorated function itself.
|
||||
exception (optional):
|
||||
An `Exception` (sub-)class or a sequence thereof; a failed call of the decorated function will only be
|
||||
repeated if it fails with a matching exception. Defaults to `Exception`, i.e. any exception.
|
||||
max_attempts (optional):
|
||||
The maximum number of (re-)attempts at calling the decorated function; if it is called `max_attempts` times
|
||||
and fails, the exception will be propagated. The number of attempts is unlimited by default.
|
||||
timeout_seconds (optional):
|
||||
Defines the cutoff time (in seconds) for the entirety of attempts at executing the decorated function;
|
||||
if the attempts take longer in total and fail, the exception will be propagated. No timeout by default.
|
||||
seconds_between (optional):
|
||||
Sets a sleep interval (in seconds) between each attempt to call the decorated function. Defaults to 0.
|
||||
callback (optional):
|
||||
If passed an async function (with matching parameters), a failed **and caught** attempt will call it with
|
||||
the following positional arguments (in that order):
|
||||
- the decorated async function itself
|
||||
- the exception class encountered and caught
|
||||
- the total number of failed attempts up to that point
|
||||
- the `seconds_between` argument
|
||||
- positional and keyword arguments (as tuple and dictionary respectively) passed to the decorated function
|
||||
|
||||
Raises:
|
||||
Any exceptions that do **not** match those passed to the `exception` parameter are immediately propagated.
|
||||
Those that were specified in `exception` are propagated when `max_attempts` or `timeout_seconds` are reached.
|
||||
"""
|
||||
def decorator(function: AsyncFunction) -> AsyncFunction:
|
||||
# Using `functools.wraps` to preserve information about the actual function being decorated
|
||||
# More details: https://docs.python.org/3/library/functools.html#functools.wraps
|
||||
@wraps(function)
|
||||
async def wrapper(*args, **kwargs) -> Any:
|
||||
start, failed_attempts = default_timer(), 0
|
||||
while True:
|
||||
try:
|
||||
return await function(*args, **kwargs)
|
||||
except exception as e:
|
||||
failed_attempts += 1
|
||||
if default_timer() - start >= timeout_seconds or failed_attempts >= max_attempts:
|
||||
raise e
|
||||
if callback:
|
||||
await callback(function, e, failed_attempts, seconds_between, args, kwargs)
|
||||
await asyncio.sleep(seconds_between)
|
||||
return wrapper
|
||||
return decorator if _func is None else decorator(_func)
|
||||
|
||||
|
||||
async def gather_in_batches(batch_size: int, *aws: Awaitable, return_exceptions: bool = False) -> list:
|
||||
"""
|
||||
Simple extension of the `asyncio.gather` function to make it easy to run awaitable objects in concurrent batches.
|
||||
(see: https://docs.python.org/3/library/asyncio-task.html#asyncio.gather)
|
||||
|
||||
A batch is run concurrently using `gather`, while the calls to `gather` for each batch are done sequentially.
|
||||
This can be useful if for example there is a very large number of requests to a single website to be made
|
||||
and you want to make use of concurrency, but not by performing all of them at the same time.
|
||||
|
||||
Args:
|
||||
batch_size:
|
||||
The maximum number of awaitable objects to run concurrently at any given moment.
|
||||
If this is higher than the number of awaitable objects, this is equivalent to a single `gather` call.
|
||||
Every batch will have this number of concurrent runs, but the last batch can obviously be smaller.
|
||||
aws:
|
||||
The awaitable objects/coroutines; subsets of these are passed to `gather` always maintaining overall order
|
||||
return_exceptions (optional):
|
||||
Passed into each `gather` call
|
||||
|
||||
Returns:
|
||||
The aggregate list of returned values for all awaitable objects.
|
||||
The order of result values corresponds to the order of `aws`.
|
||||
"""
|
||||
results = []
|
||||
for idx in range(0, len(aws), batch_size):
|
||||
results.extend(await asyncio.gather(*aws[idx:idx + batch_size], return_exceptions=return_exceptions))
|
||||
return results
|
||||
Reference in New Issue
Block a user