138 lines
5.1 KiB
Python
138 lines
5.1 KiB
Python
|
import sys
|
||
|
from argparse import ArgumentParser
|
||
|
from pathlib import Path
|
||
|
from typing import Sequence
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
|
||
|
THIS_DIR = Path(__file__).parent
|
||
|
DEFAULT_DATA_DIR = Path(THIS_DIR, '..', 'data')
|
||
|
|
||
|
INPUTS_FILE_NAME = 'inputs'
|
||
|
WEIGHTS_FILE_NAME = 'weights'
|
||
|
BIASES_FILE_NAME = 'biases'
|
||
|
|
||
|
RNG = np.random.default_rng()
|
||
|
|
||
|
|
||
|
def extension_with_dot(string: str) -> str:
|
||
|
string = string.strip()
|
||
|
if not string:
|
||
|
return string
|
||
|
return string if string.startswith('.') else '.' + string
|
||
|
|
||
|
|
||
|
def parse_cli(args: list[str] = None) -> dict:
|
||
|
parser = ArgumentParser(description="Create test data files (input vector, weight matrices, and bias vectors).")
|
||
|
parser.add_argument(
|
||
|
'num_inputs',
|
||
|
type=int,
|
||
|
metavar='inputs',
|
||
|
help="Number of input dimensions. Random values are generated from a uniform distribution between 0.0 and 1.0."
|
||
|
)
|
||
|
parser.add_argument(
|
||
|
'num_neurons',
|
||
|
nargs='+',
|
||
|
type=int,
|
||
|
metavar='neurons',
|
||
|
help="Number of neurons in a layer. A weights file and a biases file will be created for each value passed. "
|
||
|
"Random values are generated from a uniform distribution between -1.0 and 1.0."
|
||
|
)
|
||
|
parser.add_argument(
|
||
|
'-d', '--directory',
|
||
|
type=Path,
|
||
|
default=DEFAULT_DATA_DIR,
|
||
|
help=f"Target directory to create the generated files in. Defaults to '{DEFAULT_DATA_DIR}'."
|
||
|
)
|
||
|
parser.add_argument(
|
||
|
'-e', '--file-extension',
|
||
|
type=extension_with_dot,
|
||
|
default='.csv',
|
||
|
help="File extension to use for the generated file names. Defaults to '.csv'."
|
||
|
)
|
||
|
parser.add_argument(
|
||
|
'--fmt',
|
||
|
default='%f',
|
||
|
help="Passed as the `fmt` parameter to numpy's `savetxt` function. Defaults to '%%f'. "
|
||
|
"(https://numpy.org/doc/stable/reference/generated/numpy.savetxt.html)"
|
||
|
)
|
||
|
parser.add_argument(
|
||
|
'--delimiter',
|
||
|
default=',',
|
||
|
help="Passed as the `delimiter` parameter to numpy's `savetxt` function. Defaults to ','. "
|
||
|
"(https://numpy.org/doc/stable/reference/generated/numpy.savetxt.html)"
|
||
|
)
|
||
|
parser.add_argument(
|
||
|
'-q', '--quiet',
|
||
|
action='store_true',
|
||
|
help="If this flag is set, no additional info is printed throughout the script."
|
||
|
)
|
||
|
parser.add_argument(
|
||
|
'-Y', '--yes',
|
||
|
action='store_true',
|
||
|
help="If this flag is set, confirmation is assumed throughout the script."
|
||
|
)
|
||
|
return vars(parser.parse_args(args))
|
||
|
|
||
|
|
||
|
def prepare_directory(directory: Path, file_extension: str, yes: bool = False, quiet: bool = False) -> None:
|
||
|
directory.mkdir(exist_ok=True)
|
||
|
existing_files = list(directory.glob(f'*{file_extension}'))
|
||
|
if existing_files:
|
||
|
if yes:
|
||
|
delete = 'y'
|
||
|
else:
|
||
|
delete = input(f"{len(existing_files)} existing files with '{file_extension}' extension "
|
||
|
f"found in {directory}. Delete these first? [Y/n] ").strip().lower() or 'y'
|
||
|
if delete == 'y':
|
||
|
for file_path in existing_files:
|
||
|
file_path.unlink()
|
||
|
if not quiet:
|
||
|
print("Deleted existing files.")
|
||
|
elif delete != 'n':
|
||
|
raise ValueError
|
||
|
|
||
|
|
||
|
def generate_inputs(num_inputs: int, directory: Path, file_extension: str, quiet: bool = False, **kwargs) -> None:
|
||
|
inputs_file = Path(directory, INPUTS_FILE_NAME).with_suffix(file_extension)
|
||
|
input_vector = RNG.uniform(0.0, 1.0, size=num_inputs)
|
||
|
np.savetxt(inputs_file, input_vector, **kwargs)
|
||
|
if not quiet:
|
||
|
print(inputs_file, 'x'.join(str(n) for n in input_vector.shape))
|
||
|
|
||
|
|
||
|
def generate_layers(num_inputs: int, num_neurons: Sequence[int], directory: Path, file_extension: str,
|
||
|
quiet: bool = False, **kwargs) -> None:
|
||
|
weights_file = Path(directory, WEIGHTS_FILE_NAME).with_suffix(file_extension)
|
||
|
biases_file = Path(directory, BIASES_FILE_NAME).with_suffix(file_extension)
|
||
|
dim_before = num_inputs
|
||
|
for i, dim in enumerate(num_neurons, start=1):
|
||
|
weight_matrix = RNG.uniform(-1.0, 1.0, size=(dim, dim_before))
|
||
|
bias_vector = RNG.uniform(-1.0, 1.0, size=dim)
|
||
|
weights_file = weights_file.with_stem(f'{WEIGHTS_FILE_NAME}{i:02}')
|
||
|
biases_file = biases_file.with_stem(f'{BIASES_FILE_NAME}{i:02}')
|
||
|
np.savetxt(weights_file, weight_matrix, **kwargs)
|
||
|
np.savetxt(biases_file, bias_vector, **kwargs)
|
||
|
if not quiet:
|
||
|
print(weights_file, 'x'.join(str(n) for n in weight_matrix.shape))
|
||
|
print(biases_file, 'x'.join(str(n) for n in bias_vector.shape))
|
||
|
dim_before = dim
|
||
|
|
||
|
|
||
|
def generate_data(num_inputs: int, num_neurons: Sequence[int], directory: Path, file_extension: str,
|
||
|
quiet: bool = False, yes: bool = False, **kwargs) -> None:
|
||
|
prepare_directory(directory, file_extension, quiet, yes)
|
||
|
if not quiet:
|
||
|
print("Creating new test data...")
|
||
|
generate_inputs(num_inputs, directory, file_extension, quiet, **kwargs)
|
||
|
generate_layers(num_inputs, num_neurons, directory, file_extension, quiet, **kwargs)
|
||
|
|
||
|
|
||
|
def main() -> None:
|
||
|
generate_data(**parse_cli())
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|