docstrings for all functions besides 'main'
This commit is contained in:
parent
973f3524f4
commit
1e5bb78053
@ -36,10 +36,30 @@ class UnexpectedMarkupError(Exception):
|
|||||||
|
|
||||||
|
|
||||||
def extract_row_data(*table_rows: Tag) -> list[row_type]:
|
def extract_row_data(*table_rows: Tag) -> list[row_type]:
|
||||||
|
"""
|
||||||
|
Iterates over any number of table rows to extract data from them.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table_rows:
|
||||||
|
Arbitrary number of 'tr' Tag objects to be processed for data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of 5-tuples (of string elements)
|
||||||
|
"""
|
||||||
return [get_single_tr_data(tr) for tr in table_rows]
|
return [get_single_tr_data(tr) for tr in table_rows]
|
||||||
|
|
||||||
|
|
||||||
def get_single_tr_data(table_row: Tag) -> row_type:
|
def get_single_tr_data(table_row: Tag) -> row_type:
|
||||||
|
"""
|
||||||
|
Returns the data from a table row.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table_row:
|
||||||
|
Specific 'tr' Tag object to be processed for data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A 5-tuple of string elements
|
||||||
|
"""
|
||||||
tds = table_row.find_all('td')
|
tds = table_row.find_all('td')
|
||||||
company_name = str(tds[0].a.contents[0]).strip()
|
company_name = str(tds[0].a.contents[0]).strip()
|
||||||
stock_symbol = str(tds[0].a.contents[1].contents[0]).strip()
|
stock_symbol = str(tds[0].a.contents[1].contents[0]).strip()
|
||||||
@ -55,6 +75,16 @@ def get_single_tr_data(table_row: Tag) -> row_type:
|
|||||||
|
|
||||||
|
|
||||||
def get_str_from_td(td: Tag) -> str:
|
def get_str_from_td(td: Tag) -> str:
|
||||||
|
"""
|
||||||
|
Returns content of a 'td' Tag object as a string. The only content has to be a NavigableString object.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
td:
|
||||||
|
The table cell to be converted into a string.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
String content from a cell
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
content = td.contents[0]
|
content = td.contents[0]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
@ -63,6 +93,26 @@ def get_str_from_td(td: Tag) -> str:
|
|||||||
|
|
||||||
|
|
||||||
async def trs_from_page(url: str, session: ClientSession = None, limit: int = None) -> ResultSet:
|
async def trs_from_page(url: str, session: ClientSession = None, limit: int = None) -> ResultSet:
|
||||||
|
"""
|
||||||
|
Returns the table rows found on the specified page.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url:
|
||||||
|
URL string leading to a page with matching content.
|
||||||
|
session (optional):
|
||||||
|
If passed a ClientSession instance, all HTTP requests will be made using that session;
|
||||||
|
otherwise a new one is created.
|
||||||
|
limit (optional):
|
||||||
|
Stop looking after finding this many results;
|
||||||
|
finds all matches by default.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
UnexpectedMarkupError:
|
||||||
|
If no table or table body are found.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A ResultSet object containing all extracted 'tr' Tag objects
|
||||||
|
"""
|
||||||
if session is None:
|
if session is None:
|
||||||
session = ClientSession()
|
session = ClientSession()
|
||||||
async with session.get(url) as response:
|
async with session.get(url) as response:
|
||||||
@ -112,6 +162,17 @@ async def get_data_from_category(category: str, session: ClientSession = None,
|
|||||||
|
|
||||||
|
|
||||||
async def get_all_data(sequential: bool = False) -> list[row_type]:
|
async def get_all_data(sequential: bool = False) -> list[row_type]:
|
||||||
|
"""
|
||||||
|
Returns a list with every available data row from all categories.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sequential (optional):
|
||||||
|
Whether or not to forgo the asynchronous gathering capabilities;
|
||||||
|
by default requests are issued concurrently.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of 5-tuples (of strings)
|
||||||
|
"""
|
||||||
async with ClientSession() as session:
|
async with ClientSession() as session:
|
||||||
if sequential:
|
if sequential:
|
||||||
results = [await get_data_from_category(category, session) for category in CATEGORIES]
|
results = [await get_data_from_category(category, session) for category in CATEGORIES]
|
||||||
|
Loading…
Reference in New Issue
Block a user