ff-performance-tests/py/gen_data.py

138 lines
5.1 KiB
Python
Raw Normal View History

2022-07-20 11:13:04 +02:00
import sys
from argparse import ArgumentParser
from pathlib import Path
from typing import Sequence
import numpy as np
THIS_DIR = Path(__file__).parent
DEFAULT_DATA_DIR = Path(THIS_DIR, '..', 'data')
INPUTS_FILE_NAME = 'inputs'
WEIGHTS_FILE_NAME = 'weights'
BIASES_FILE_NAME = 'biases'
RNG = np.random.default_rng()
def extension_with_dot(string: str) -> str:
string = string.strip()
if not string:
return string
return string if string.startswith('.') else '.' + string
def parse_cli(args: list[str] = None) -> dict:
parser = ArgumentParser(description="Create test data files (input vector, weight matrices, and bias vectors).")
parser.add_argument(
'num_inputs',
type=int,
metavar='inputs',
help="Number of input dimensions. Random values are generated from a uniform distribution between 0.0 and 1.0."
)
parser.add_argument(
'num_neurons',
nargs='+',
type=int,
metavar='neurons',
help="Number of neurons in a layer. A weights file and a biases file will be created for each value passed. "
"Random values are generated from a uniform distribution between -1.0 and 1.0."
)
parser.add_argument(
'-d', '--directory',
type=Path,
default=DEFAULT_DATA_DIR,
help=f"Target directory to create the generated files in. Defaults to '{DEFAULT_DATA_DIR}'."
)
parser.add_argument(
'-e', '--file-extension',
type=extension_with_dot,
default='.csv',
help="File extension to use for the generated file names. Defaults to '.csv'."
)
parser.add_argument(
'--fmt',
default='%f',
help="Passed as the `fmt` parameter to numpy's `savetxt` function. Defaults to '%%f'. "
"(https://numpy.org/doc/stable/reference/generated/numpy.savetxt.html)"
)
parser.add_argument(
'--delimiter',
default=',',
help="Passed as the `delimiter` parameter to numpy's `savetxt` function. Defaults to ','. "
"(https://numpy.org/doc/stable/reference/generated/numpy.savetxt.html)"
)
parser.add_argument(
'-q', '--quiet',
action='store_true',
help="If this flag is set, no additional info is printed throughout the script."
)
parser.add_argument(
'-Y', '--yes',
action='store_true',
help="If this flag is set, confirmation is assumed throughout the script."
)
return vars(parser.parse_args(args))
def prepare_directory(directory: Path, file_extension: str, yes: bool = False, quiet: bool = False) -> None:
directory.mkdir(exist_ok=True)
existing_files = list(directory.glob(f'*{file_extension}'))
if existing_files:
if yes:
delete = 'y'
else:
delete = input(f"{len(existing_files)} existing files with '{file_extension}' extension "
f"found in {directory}. Delete these first? [Y/n] ").strip().lower() or 'y'
if delete == 'y':
for file_path in existing_files:
file_path.unlink()
if not quiet:
print("Deleted existing files.")
elif delete != 'n':
raise ValueError
def generate_inputs(num_inputs: int, directory: Path, file_extension: str, quiet: bool = False, **kwargs) -> None:
inputs_file = Path(directory, INPUTS_FILE_NAME).with_suffix(file_extension)
input_vector = RNG.uniform(0.0, 1.0, size=num_inputs)
np.savetxt(inputs_file, input_vector, **kwargs)
if not quiet:
print(inputs_file, 'x'.join(str(n) for n in input_vector.shape))
def generate_layers(num_inputs: int, num_neurons: Sequence[int], directory: Path, file_extension: str,
quiet: bool = False, **kwargs) -> None:
weights_file = Path(directory, WEIGHTS_FILE_NAME).with_suffix(file_extension)
biases_file = Path(directory, BIASES_FILE_NAME).with_suffix(file_extension)
dim_before = num_inputs
for i, dim in enumerate(num_neurons, start=1):
weight_matrix = RNG.uniform(-1.0, 1.0, size=(dim, dim_before))
bias_vector = RNG.uniform(-1.0, 1.0, size=dim)
weights_file = weights_file.with_stem(f'{WEIGHTS_FILE_NAME}{i:02}')
biases_file = biases_file.with_stem(f'{BIASES_FILE_NAME}{i:02}')
np.savetxt(weights_file, weight_matrix, **kwargs)
np.savetxt(biases_file, bias_vector, **kwargs)
if not quiet:
print(weights_file, 'x'.join(str(n) for n in weight_matrix.shape))
print(biases_file, 'x'.join(str(n) for n in bias_vector.shape))
dim_before = dim
def generate_data(num_inputs: int, num_neurons: Sequence[int], directory: Path, file_extension: str,
quiet: bool = False, yes: bool = False, **kwargs) -> None:
prepare_directory(directory, file_extension, quiet, yes)
if not quiet:
print("Creating new test data...")
generate_inputs(num_inputs, directory, file_extension, quiet, **kwargs)
generate_layers(num_inputs, num_neurons, directory, file_extension, quiet, **kwargs)
def main() -> None:
generate_data(**parse_cli())
if __name__ == '__main__':
main()