docstrings for all functions besides 'main'
This commit is contained in:
parent
973f3524f4
commit
1e5bb78053
@ -36,10 +36,30 @@ class UnexpectedMarkupError(Exception):
|
||||
|
||||
|
||||
def extract_row_data(*table_rows: Tag) -> list[row_type]:
|
||||
"""
|
||||
Iterates over any number of table rows to extract data from them.
|
||||
|
||||
Args:
|
||||
table_rows:
|
||||
Arbitrary number of 'tr' Tag objects to be processed for data.
|
||||
|
||||
Returns:
|
||||
A list of 5-tuples (of string elements)
|
||||
"""
|
||||
return [get_single_tr_data(tr) for tr in table_rows]
|
||||
|
||||
|
||||
def get_single_tr_data(table_row: Tag) -> row_type:
|
||||
"""
|
||||
Returns the data from a table row.
|
||||
|
||||
Args:
|
||||
table_row:
|
||||
Specific 'tr' Tag object to be processed for data.
|
||||
|
||||
Returns:
|
||||
A 5-tuple of string elements
|
||||
"""
|
||||
tds = table_row.find_all('td')
|
||||
company_name = str(tds[0].a.contents[0]).strip()
|
||||
stock_symbol = str(tds[0].a.contents[1].contents[0]).strip()
|
||||
@ -55,6 +75,16 @@ def get_single_tr_data(table_row: Tag) -> row_type:
|
||||
|
||||
|
||||
def get_str_from_td(td: Tag) -> str:
|
||||
"""
|
||||
Returns content of a 'td' Tag object as a string. The only content has to be a NavigableString object.
|
||||
|
||||
Args:
|
||||
td:
|
||||
The table cell to be converted into a string.
|
||||
|
||||
Returns:
|
||||
String content from a cell
|
||||
"""
|
||||
try:
|
||||
content = td.contents[0]
|
||||
except IndexError:
|
||||
@ -63,6 +93,26 @@ def get_str_from_td(td: Tag) -> str:
|
||||
|
||||
|
||||
async def trs_from_page(url: str, session: ClientSession = None, limit: int = None) -> ResultSet:
|
||||
"""
|
||||
Returns the table rows found on the specified page.
|
||||
|
||||
Args:
|
||||
url:
|
||||
URL string leading to a page with matching content.
|
||||
session (optional):
|
||||
If passed a ClientSession instance, all HTTP requests will be made using that session;
|
||||
otherwise a new one is created.
|
||||
limit (optional):
|
||||
Stop looking after finding this many results;
|
||||
finds all matches by default.
|
||||
|
||||
Raises:
|
||||
UnexpectedMarkupError:
|
||||
If no table or table body are found.
|
||||
|
||||
Returns:
|
||||
A ResultSet object containing all extracted 'tr' Tag objects
|
||||
"""
|
||||
if session is None:
|
||||
session = ClientSession()
|
||||
async with session.get(url) as response:
|
||||
@ -112,6 +162,17 @@ async def get_data_from_category(category: str, session: ClientSession = None,
|
||||
|
||||
|
||||
async def get_all_data(sequential: bool = False) -> list[row_type]:
|
||||
"""
|
||||
Returns a list with every available data row from all categories.
|
||||
|
||||
Args:
|
||||
sequential (optional):
|
||||
Whether or not to forgo the asynchronous gathering capabilities;
|
||||
by default requests are issued concurrently.
|
||||
|
||||
Returns:
|
||||
A list of 5-tuples (of strings)
|
||||
"""
|
||||
async with ClientSession() as session:
|
||||
if sequential:
|
||||
results = [await get_data_from_category(category, session) for category in CATEGORIES]
|
||||
|
Loading…
Reference in New Issue
Block a user