import sys from argparse import ArgumentParser from pathlib import Path from typing import Sequence import numpy as np THIS_DIR = Path(__file__).parent DEFAULT_DATA_DIR = Path(THIS_DIR, '..', 'data') INPUTS_FILE_NAME = 'inputs' WEIGHTS_FILE_NAME = 'weights' BIASES_FILE_NAME = 'biases' RNG = np.random.default_rng() def extension_with_dot(string: str) -> str: string = string.strip() if not string: return string return string if string.startswith('.') else '.' + string def parse_cli(args: list[str] = None) -> dict: parser = ArgumentParser(description="Create test data files (input vector, weight matrices, and bias vectors).") parser.add_argument( 'num_inputs', type=int, metavar='inputs', help="Number of input dimensions. Random values are generated from a uniform distribution between 0.0 and 1.0." ) parser.add_argument( 'num_neurons', nargs='+', type=int, metavar='neurons', help="Number of neurons in a layer. A weights file and a biases file will be created for each value passed. " "Random values are generated from a uniform distribution between -1.0 and 1.0." ) parser.add_argument( '-d', '--directory', type=Path, default=DEFAULT_DATA_DIR, help=f"Target directory to create the generated files in. Defaults to '{DEFAULT_DATA_DIR}'." ) parser.add_argument( '-e', '--file-extension', type=extension_with_dot, default='.csv', help="File extension to use for the generated file names. Defaults to '.csv'." ) parser.add_argument( '--fmt', default='%f', help="Passed as the `fmt` parameter to numpy's `savetxt` function. Defaults to '%%f'. " "(https://numpy.org/doc/stable/reference/generated/numpy.savetxt.html)" ) parser.add_argument( '--delimiter', default=',', help="Passed as the `delimiter` parameter to numpy's `savetxt` function. Defaults to ','. " "(https://numpy.org/doc/stable/reference/generated/numpy.savetxt.html)" ) parser.add_argument( '-q', '--quiet', action='store_true', help="If this flag is set, no additional info is printed throughout the script." ) parser.add_argument( '-Y', '--yes', action='store_true', help="If this flag is set, confirmation is assumed throughout the script." ) return vars(parser.parse_args(args)) def prepare_directory(directory: Path, file_extension: str, yes: bool = False, quiet: bool = False) -> None: directory.mkdir(exist_ok=True) existing_files = list(directory.glob(f'*{file_extension}')) if existing_files: if yes: delete = 'y' else: delete = input(f"{len(existing_files)} existing files with '{file_extension}' extension " f"found in {directory}. Delete these first? [Y/n] ").strip().lower() or 'y' if delete == 'y': for file_path in existing_files: file_path.unlink() if not quiet: print("Deleted existing files.") elif delete != 'n': raise ValueError def generate_inputs(num_inputs: int, directory: Path, file_extension: str, quiet: bool = False, **kwargs) -> None: inputs_file = Path(directory, INPUTS_FILE_NAME).with_suffix(file_extension) input_vector = RNG.uniform(0.0, 1.0, size=num_inputs) np.savetxt(inputs_file, input_vector, **kwargs) if not quiet: print(inputs_file, 'x'.join(str(n) for n in input_vector.shape)) def generate_layers(num_inputs: int, num_neurons: Sequence[int], directory: Path, file_extension: str, quiet: bool = False, **kwargs) -> None: weights_file = Path(directory, WEIGHTS_FILE_NAME).with_suffix(file_extension) biases_file = Path(directory, BIASES_FILE_NAME).with_suffix(file_extension) dim_before = num_inputs for i, dim in enumerate(num_neurons, start=1): weight_matrix = RNG.uniform(-1.0, 1.0, size=(dim, dim_before)) bias_vector = RNG.uniform(-1.0, 1.0, size=dim) weights_file = weights_file.with_stem(f'{WEIGHTS_FILE_NAME}{i:02}') biases_file = biases_file.with_stem(f'{BIASES_FILE_NAME}{i:02}') np.savetxt(weights_file, weight_matrix, **kwargs) np.savetxt(biases_file, bias_vector, **kwargs) if not quiet: print(weights_file, 'x'.join(str(n) for n in weight_matrix.shape)) print(biases_file, 'x'.join(str(n) for n in bias_vector.shape)) dim_before = dim def generate_data(num_inputs: int, num_neurons: Sequence[int], directory: Path, file_extension: str, quiet: bool = False, yes: bool = False, **kwargs) -> None: prepare_directory(directory, file_extension, quiet, yes) if not quiet: print("Creating new test data...") generate_inputs(num_inputs, directory, file_extension, quiet, **kwargs) generate_layers(num_inputs, num_neurons, directory, file_extension, quiet, **kwargs) def main() -> None: generate_data(**parse_cli()) if __name__ == '__main__': main()