docstrings for all functions besides 'main'

This commit is contained in:
Maximilian Fajnberg 2021-11-12 16:08:58 +01:00
parent 973f3524f4
commit 1e5bb78053
1 changed files with 61 additions and 0 deletions

View File

@ -36,10 +36,30 @@ class UnexpectedMarkupError(Exception):
def extract_row_data(*table_rows: Tag) -> list[row_type]:
"""
Iterates over any number of table rows to extract data from them.
Args:
table_rows:
Arbitrary number of 'tr' Tag objects to be processed for data.
Returns:
A list of 5-tuples (of string elements)
"""
return [get_single_tr_data(tr) for tr in table_rows]
def get_single_tr_data(table_row: Tag) -> row_type:
"""
Returns the data from a table row.
Args:
table_row:
Specific 'tr' Tag object to be processed for data.
Returns:
A 5-tuple of string elements
"""
tds = table_row.find_all('td')
company_name = str(tds[0].a.contents[0]).strip()
stock_symbol = str(tds[0].a.contents[1].contents[0]).strip()
@ -55,6 +75,16 @@ def get_single_tr_data(table_row: Tag) -> row_type:
def get_str_from_td(td: Tag) -> str:
"""
Returns content of a 'td' Tag object as a string. The only content has to be a NavigableString object.
Args:
td:
The table cell to be converted into a string.
Returns:
String content from a cell
"""
try:
content = td.contents[0]
except IndexError:
@ -63,6 +93,26 @@ def get_str_from_td(td: Tag) -> str:
async def trs_from_page(url: str, session: ClientSession = None, limit: int = None) -> ResultSet:
"""
Returns the table rows found on the specified page.
Args:
url:
URL string leading to a page with matching content.
session (optional):
If passed a ClientSession instance, all HTTP requests will be made using that session;
otherwise a new one is created.
limit (optional):
Stop looking after finding this many results;
finds all matches by default.
Raises:
UnexpectedMarkupError:
If no table or table body are found.
Returns:
A ResultSet object containing all extracted 'tr' Tag objects
"""
if session is None:
session = ClientSession()
async with session.get(url) as response:
@ -112,6 +162,17 @@ async def get_data_from_category(category: str, session: ClientSession = None,
async def get_all_data(sequential: bool = False) -> list[row_type]:
"""
Returns a list with every available data row from all categories.
Args:
sequential (optional):
Whether or not to forgo the asynchronous gathering capabilities;
by default requests are issued concurrently.
Returns:
A list of 5-tuples (of strings)
"""
async with ClientSession() as session:
if sequential:
results = [await get_data_from_category(category, session) for category in CATEGORIES]