commit aebd885ab7e794fe2b258512d70cdb3fede86e45 Author: Daniil Fajnberg Date: Wed Jul 20 11:13:04 2022 +0200 Move to separate repo diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6626a9a --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +# Python virtual environment: +.venv/ +# IDE project settings: +.vscode/ +.idea/ +# Compiled scripts/documents: +build/ +target/ +# Package lock files / version checksums / etc.: +go.sum +Cargo.lock +# Ephemeral testing data: +data/ diff --git a/go/feed_forward.go b/go/feed_forward.go new file mode 100644 index 0000000..8d92dc7 --- /dev/null +++ b/go/feed_forward.go @@ -0,0 +1,137 @@ +package main + +import ( + "encoding/csv" + "fmt" + "math" + "os" + "path/filepath" + "strconv" + "time" + + "gonum.org/v1/gonum/mat" +) + +const DATA_DIR string = "data" + +type layer struct { + weights *mat.Dense + biases *mat.Dense +} + +func array_flatten(two_d_array [][]string) []float64 { + var result []float64 + for _, line := range two_d_array { + for _, value := range line { + float_value, _ := strconv.ParseFloat(value, 64) + result = append(result, float_value) + } + } + return result +} + +func load_matrix_from_csv(file_path string) (*mat.Dense, error) { + f, err := os.Open(file_path) + if err != nil { + return nil, err + } + defer f.Close() + data, err := csv.NewReader(f).ReadAll() + return mat.NewDense(len(data), len(data[0]), array_flatten(data)), err +} + +func load_test_data(dir string) (*mat.Dense, []layer, error) { + inputs, err := load_matrix_from_csv(filepath.Join(dir, "inputs.csv")) + if err != nil { return nil, nil, err } + var layers []layer + var weights *mat.Dense + var biases *mat.Dense + var path string + + for n := 1; n < 100; n++ { + path = filepath.Join(dir, fmt.Sprintf("weights%02d.csv", n)) + _, err = os.Stat(path) + if err != nil { break } + weights, err = load_matrix_from_csv(path) + if err != nil { return nil, nil, err } + + path = filepath.Join(dir, fmt.Sprintf("biases%02d.csv", n)) + _, err = os.Stat(path) + if err != nil { break } + biases, err = load_matrix_from_csv(path) + if err != nil { return nil, nil, err } + + layers = append(layers, layer{weights, biases}) + } + return inputs, layers, nil +} + +func sigmoid_scalar(x float64) float64 { + return 1 / (1 + math.Exp(-x)) +} + +func sigmoid_element(i, j int, value float64) float64 { + return sigmoid_scalar(value) +} + +func sigmoid_matrix(x mat.Matrix) *mat.Dense { + var y mat.Dense + y.Apply(sigmoid_element, x) + return &y +} + +func layer_func(inputs *mat.Dense, weights *mat.Dense, biases *mat.Dense, activation func(x mat.Matrix) *mat.Dense) *mat.Dense { + var output mat.Dense + output.Mul(weights, inputs) + output.Add(&output, biases) + return sigmoid_matrix(&output) +} + +func feed_forward(x *mat.Dense, layers []layer) *mat.Dense { + var y *mat.Dense = x + for _, l := range layers { + y = layer_func(y, l.weights, l.biases, sigmoid_matrix) + } + return y +} + +func time_feed_forward(n int) { + inputs, layers, _ := load_test_data(DATA_DIR) + t0 := time.Now() + for i := 0; i < n; i++ { feed_forward(inputs, layers) } + elapsed := time.Since(t0) + fmt.Printf("%.5f\n", elapsed.Seconds()) +} + +func examples() { + x := mat.NewDense(2, 1, []float64{0, 2}) + w1 := mat.NewDense(2, 2, []float64{1, 0, 2, 4}) + b1 := mat.NewDense(2, 1, []float64{0, 1}) + w2 := mat.NewDense(2, 2, []float64{1, 0, 2, 4}) + b2 := mat.NewDense(2, 1, []float64{-0.5, 1}) + layers := []layer{{w1, b1}, {w2, b2}} + + pre := " " + fmt.Printf("x1 = %v\n", mat.Formatted(x, mat.Prefix(pre))) + + fmt.Printf("w1 = %v\n", mat.Formatted(w1, mat.Prefix(pre))) + fmt.Printf("b1 = %v\n", mat.Formatted(b1, mat.Prefix(pre))) + fmt.Println("σ(w1 * x1 + b1) = ") + x2 := layer_func(x, w1, b1, sigmoid_matrix) + fmt.Printf("x2 = %v\n\n", mat.Formatted(x2, mat.Prefix(pre))) + + fmt.Printf("w2 = %v\n", mat.Formatted(w2, mat.Prefix(pre))) + fmt.Printf("b2 = %v\n", mat.Formatted(b2, mat.Prefix(pre))) + fmt.Println("σ(w2 * x2 + b2) = ") + x3 := feed_forward(x, layers) + fmt.Printf("x3 = %v\n", mat.Formatted(x3, mat.Prefix(pre))) +} + +func main() { + n, err := strconv.Atoi(os.Args[1]) + if err != nil { + fmt.Println(err) + os.Exit(2) + } + time_feed_forward(n) +} diff --git a/go/go.mod b/go/go.mod new file mode 100644 index 0000000..71bc361 --- /dev/null +++ b/go/go.mod @@ -0,0 +1,5 @@ +module feedforward.tests/something + +go 1.18 + +require gonum.org/v1/gonum v0.11.0 diff --git a/jl/feed_forward.jl b/jl/feed_forward.jl new file mode 100644 index 0000000..8dbbb5e --- /dev/null +++ b/jl/feed_forward.jl @@ -0,0 +1,64 @@ +using CSV, Tables, Glob + + +const TEST_DATA_DIR = joinpath(@__DIR__, "..", "data") + + +function csv_to_matrix(file_path, types=Float32) + return CSV.read(file_path, Tables.matrix, header=0, delim=',', types=types) +end + + +function load_test_data() + inputs_path = joinpath(TEST_DATA_DIR, "inputs.csv") + inputs = vec(csv_to_matrix(inputs_path)) + weight_matrices, bias_vectors = Matrix{Float32}[], Vector{Float32}[] + for file_path in glob("weights*.csv", TEST_DATA_DIR) + push!(weight_matrices, csv_to_matrix(file_path)) + end + for file_path in glob("biases*.csv", TEST_DATA_DIR) + push!(bias_vectors, vec(csv_to_matrix(file_path))) + end + return inputs, [zip(weight_matrices, bias_vectors)...] +end + + +function sigmoid(x) + return 1 ./ (1 .+ exp.(-x)) +end + + +function layer_func(input_vector, weight_matrix, bias_vector) + return sigmoid(weight_matrix * input_vector + bias_vector) +end + + +function feed_forward(x, layers...) + for (w, b) in layers + x = layer_func(x, w, b) + end + return x +end + + +function main() + n = parse(Int32, ARGS[1]) + x, weights_biases = load_test_data() + + # # To get info about memory allocations etc.: + # @time feed_forward(x, weights_biases...) + + # Call once to compile, then again to time + @elapsed feed_forward(x, weights_biases...) + t = @elapsed begin + for _ in 1:n + feed_forward(x, weights_biases...) + end + end + println(round(t, digits=5)) +end + + +if abspath(PROGRAM_FILE) == @__FILE__ + main() +end diff --git a/py/feed_forward.py b/py/feed_forward.py new file mode 100644 index 0000000..9e179a3 --- /dev/null +++ b/py/feed_forward.py @@ -0,0 +1,48 @@ +import sys +from pathlib import Path +from timeit import timeit + +import numpy as np + + +THIS_DIR = Path(__file__).parent +TEST_DATA_DIR = Path(THIS_DIR, '..', 'data') + + +def csv_to_array(file_path: Path, dtype=np.float32) -> np.ndarray: + return np.loadtxt(file_path, dtype=dtype, delimiter=',') + + +def load_test_data() -> tuple[np.ndarray, list[tuple[np.ndarray, np.ndarray]]]: + inputs = csv_to_array(Path(TEST_DATA_DIR, 'inputs.csv')) + weights_iter = (csv_to_array(p) for p in sorted(TEST_DATA_DIR.glob('weights*.csv'))) + biases_iter = (csv_to_array(p) for p in sorted(TEST_DATA_DIR.glob('biases*.csv'))) + return inputs, list(zip(weights_iter, biases_iter)) + + +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + + +def layer_func(input_vector: np.ndarray, weight_matrix: np.ndarray, bias_vector: np.ndarray) -> np.ndarray: + return sigmoid(np.matmul(weight_matrix, input_vector) + bias_vector) + + +def feed_forward(x: np.ndarray, *layers: tuple[np.ndarray, np.ndarray]) -> np.ndarray: + for w, b in layers: + x = layer_func(x, w, b) + return x + + +def main(n): + t = timeit( + 'feed_forward(inp, *layers)', + setup='from __main__ import feed_forward, load_test_data;' + + 'inp, layers = load_test_data()', + number=n + ) + print(round(t, 5)) + + +if __name__ == '__main__': + main(int(sys.argv[1])) diff --git a/py/gen_data.py b/py/gen_data.py new file mode 100644 index 0000000..0081ca8 --- /dev/null +++ b/py/gen_data.py @@ -0,0 +1,137 @@ +import sys +from argparse import ArgumentParser +from pathlib import Path +from typing import Sequence + +import numpy as np + + +THIS_DIR = Path(__file__).parent +DEFAULT_DATA_DIR = Path(THIS_DIR, '..', 'data') + +INPUTS_FILE_NAME = 'inputs' +WEIGHTS_FILE_NAME = 'weights' +BIASES_FILE_NAME = 'biases' + +RNG = np.random.default_rng() + + +def extension_with_dot(string: str) -> str: + string = string.strip() + if not string: + return string + return string if string.startswith('.') else '.' + string + + +def parse_cli(args: list[str] = None) -> dict: + parser = ArgumentParser(description="Create test data files (input vector, weight matrices, and bias vectors).") + parser.add_argument( + 'num_inputs', + type=int, + metavar='inputs', + help="Number of input dimensions. Random values are generated from a uniform distribution between 0.0 and 1.0." + ) + parser.add_argument( + 'num_neurons', + nargs='+', + type=int, + metavar='neurons', + help="Number of neurons in a layer. A weights file and a biases file will be created for each value passed. " + "Random values are generated from a uniform distribution between -1.0 and 1.0." + ) + parser.add_argument( + '-d', '--directory', + type=Path, + default=DEFAULT_DATA_DIR, + help=f"Target directory to create the generated files in. Defaults to '{DEFAULT_DATA_DIR}'." + ) + parser.add_argument( + '-e', '--file-extension', + type=extension_with_dot, + default='.csv', + help="File extension to use for the generated file names. Defaults to '.csv'." + ) + parser.add_argument( + '--fmt', + default='%f', + help="Passed as the `fmt` parameter to numpy's `savetxt` function. Defaults to '%%f'. " + "(https://numpy.org/doc/stable/reference/generated/numpy.savetxt.html)" + ) + parser.add_argument( + '--delimiter', + default=',', + help="Passed as the `delimiter` parameter to numpy's `savetxt` function. Defaults to ','. " + "(https://numpy.org/doc/stable/reference/generated/numpy.savetxt.html)" + ) + parser.add_argument( + '-q', '--quiet', + action='store_true', + help="If this flag is set, no additional info is printed throughout the script." + ) + parser.add_argument( + '-Y', '--yes', + action='store_true', + help="If this flag is set, confirmation is assumed throughout the script." + ) + return vars(parser.parse_args(args)) + + +def prepare_directory(directory: Path, file_extension: str, yes: bool = False, quiet: bool = False) -> None: + directory.mkdir(exist_ok=True) + existing_files = list(directory.glob(f'*{file_extension}')) + if existing_files: + if yes: + delete = 'y' + else: + delete = input(f"{len(existing_files)} existing files with '{file_extension}' extension " + f"found in {directory}. Delete these first? [Y/n] ").strip().lower() or 'y' + if delete == 'y': + for file_path in existing_files: + file_path.unlink() + if not quiet: + print("Deleted existing files.") + elif delete != 'n': + raise ValueError + + +def generate_inputs(num_inputs: int, directory: Path, file_extension: str, quiet: bool = False, **kwargs) -> None: + inputs_file = Path(directory, INPUTS_FILE_NAME).with_suffix(file_extension) + input_vector = RNG.uniform(0.0, 1.0, size=num_inputs) + np.savetxt(inputs_file, input_vector, **kwargs) + if not quiet: + print(inputs_file, 'x'.join(str(n) for n in input_vector.shape)) + + +def generate_layers(num_inputs: int, num_neurons: Sequence[int], directory: Path, file_extension: str, + quiet: bool = False, **kwargs) -> None: + weights_file = Path(directory, WEIGHTS_FILE_NAME).with_suffix(file_extension) + biases_file = Path(directory, BIASES_FILE_NAME).with_suffix(file_extension) + dim_before = num_inputs + for i, dim in enumerate(num_neurons, start=1): + weight_matrix = RNG.uniform(-1.0, 1.0, size=(dim, dim_before)) + bias_vector = RNG.uniform(-1.0, 1.0, size=dim) + weights_file = weights_file.with_stem(f'{WEIGHTS_FILE_NAME}{i:02}') + biases_file = biases_file.with_stem(f'{BIASES_FILE_NAME}{i:02}') + np.savetxt(weights_file, weight_matrix, **kwargs) + np.savetxt(biases_file, bias_vector, **kwargs) + if not quiet: + print(weights_file, 'x'.join(str(n) for n in weight_matrix.shape)) + print(biases_file, 'x'.join(str(n) for n in bias_vector.shape)) + dim_before = dim + + +def generate_data(num_inputs: int, num_neurons: Sequence[int], directory: Path, file_extension: str, + quiet: bool = False, yes: bool = False, **kwargs) -> None: + prepare_directory(directory, file_extension, quiet, yes) + if not quiet: + print("Creating new test data...") + generate_inputs(num_inputs, directory, file_extension, quiet, **kwargs) + generate_layers(num_inputs, num_neurons, directory, file_extension, quiet, **kwargs) + + +def main() -> None: + generate_data(**parse_cli()) + + +if __name__ == '__main__': + main() diff --git a/py/requirements.txt b/py/requirements.txt new file mode 100644 index 0000000..296d654 --- /dev/null +++ b/py/requirements.txt @@ -0,0 +1 @@ +numpy \ No newline at end of file diff --git a/rs/Cargo.toml b/rs/Cargo.toml new file mode 100644 index 0000000..acd2dff --- /dev/null +++ b/rs/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "feed_forward" +version = "0.1.0" +edition = "2021" + +[[bin]] +name = "feed_forward" +path = "src/feed_forward.rs" + +[dependencies] +ndarray = { version = "0.15.0", features = ["rayon", "blas"] } +blas-src = { version = "0.8", features = ["openblas"] } +openblas-src = { version = "0.10", features = ["cblas", "system"] } +ndarray-rand = "0.14.0" +csv = "*" +glob = "*" diff --git a/rs/src/feed_forward.rs b/rs/src/feed_forward.rs new file mode 100644 index 0000000..2f38653 --- /dev/null +++ b/rs/src/feed_forward.rs @@ -0,0 +1,98 @@ +#![allow(dead_code)] + +extern crate blas_src; + +use std::env; +use std::fs::File; +use std::path::Path; +use std::time::Instant; + +use csv; +use glob::glob; +use ndarray::{Array, Array1, Array2}; + +mod sigmoids; // different custom implementations of the sigmoid function +use sigmoids::{sigmoid_ndarr_mapv_inplace as sigmoid}; + + +const DIR_DATA: &str = "data"; + + +fn csv_row_to_float_vec(row: &csv::StringRecord) -> Vec { + return row.iter().map( + |item| {item.parse::().expect("Not a float")} + ).collect(); +} + + +fn csv_to_float_vec(file_path: &Path) -> (Vec, usize) { + let mut output: Vec = Vec::new(); + let mut num_columns: usize = 0; + let file = File::open(file_path).expect("Can't open file"); + let mut reader = csv::ReaderBuilder::new().has_headers(false).from_reader(file); + for result in reader.records() { + let row: Vec = csv_row_to_float_vec(&result.expect("No csv record")); + if num_columns == 0 { + num_columns = row.len(); + } + output.extend(row); + } + return (output, num_columns); +} + +fn csv_to_array1(file_path: &Path) -> Array1 { + let (vec, _) = csv_to_float_vec(&file_path); + return Array::from_shape_vec((vec.len(), ), vec).unwrap(); +} + +fn csv_to_array2(file_path: &Path) -> Array2 { + let (vec, num_colums) = csv_to_float_vec(&file_path); + return Array2::from_shape_vec((vec.len() / num_colums, num_colums), vec).unwrap(); +} + +fn load_test_data() -> (Array1, Vec<(Array2, Array1)>) { + let test_data_dir = Path::new(".").join(DIR_DATA); + let inputs: Array1 = csv_to_array1(&test_data_dir.join("inputs.csv")); + let weights_glob = test_data_dir.join("weights*.csv"); + let biases_glob = test_data_dir.join("biases*.csv"); + let weights_iter = glob(weights_glob.to_str().unwrap()).unwrap().map( + |path_result| {csv_to_array2(&path_result.unwrap())} + ); + let biases_iter = glob(biases_glob.to_str().unwrap()).unwrap().map( + |path_result| {csv_to_array1(&path_result.unwrap())} + ); + return (inputs, weights_iter.zip(biases_iter).collect()); +} + + +fn layer_func(input_vector: &Array1, weight_matrix: &Array2, + bias_vector: &Array1, activation: &F, + virtual_vector: Option<&Array1>) -> Array1 +where F: Fn(Array1, Option<&Array1>) -> Array1 { + return activation(weight_matrix.dot(input_vector) + bias_vector, virtual_vector) +} + + +fn feed_forward(x: &Array1, layers: &[(Array2, Array1)], activation: &F) -> Array1 +where F: Fn(Array1, Option<&Array1>) -> Array1 { + let mut y = x.to_owned(); + for (w, b) in layers { + y = layer_func(&y, w, b, activation, Some(&Array::from_elem(b.raw_dim(), false))) + } + return y; +} + +fn time_feed_forward(n: usize) { + let (inp, layers) = load_test_data(); + let t0 = Instant::now(); + for _ in 0..n { feed_forward(&inp, &layers, &sigmoid); } + let elapsed = t0.elapsed(); + println!("{:.5}", elapsed.as_secs_f64()); +} + + +fn main() { + let args: Vec = env::args().collect(); + let n: usize = args[1].parse::().expect("Not an integer"); + time_feed_forward(n); +} diff --git a/rs/src/sigmoids.rs b/rs/src/sigmoids.rs new file mode 100644 index 0000000..fdadb16 --- /dev/null +++ b/rs/src/sigmoids.rs @@ -0,0 +1,107 @@ +use std::time::Instant; + +use ndarray::{Array1, Zip}; +use ndarray_rand::{RandomExt, rand_distr::Uniform}; + + +pub fn sigmoid_val(x: f32) -> f32 { + return 1. / (1. + (-x).exp()); +} + +pub fn sigmoid_ref(x: &f32) -> f32 { + return 1. / (1. + (-x).exp()); +} + +pub fn sigmoid_mut_ref(x: &mut f32) { + *x = 1. / (1. + (-*x).exp()); +} + + +pub fn sigmoid_vec(x: &Vec) -> Vec { + return x.iter().map(|&v| sigmoid_val(v)).collect(); +} + + +pub fn sigmoid_ndarr_map(x: Array1) -> Array1 { + return x.map(sigmoid_ref); +} + +pub fn sigmoid_ndarr_mapv(x: Array1) -> Array1 { + return x.mapv(sigmoid_val); +} + +pub fn sigmoid_ndarr_map_inplace(mut x: Array1) -> Array1 { + x.map_inplace(sigmoid_mut_ref); + return x; +} + +pub fn sigmoid_ndarr_mapv_inplace(mut x: Array1, virt: Option<&Array1>) -> Array1 { + if virt.is_some() { + Zip::from(&mut x).and(virt.unwrap()).for_each( + |elem, is_virt| *elem = if *is_virt { *elem } else { sigmoid_ref(elem) } + ); + } else { + x.mapv_inplace(sigmoid_val); + } + return x; +} + +pub fn sigmoid_ndarr_par_map_inplace(mut x: Array1) -> Array1 { + x.par_map_inplace(sigmoid_mut_ref); + return x; +} + +pub fn sigmoid_ndarr_par_mapv_inplace(mut x: Array1) -> Array1 { + x.par_mapv_inplace(sigmoid_val); + return x; +} + + +pub fn time_sigmoids(n: usize, arr_len: usize) { + let arr: Array1 = Array1::random(arr_len, Uniform::new(0., 1.)); + let vec: Vec = arr.to_vec(); + + let t0 = Instant::now(); + for _ in 0..n { let _result = sigmoid_vec(&vec); } + let elapsed = t0.elapsed(); + println!("sigmoid_vec took {:.5} seconds", elapsed.as_secs_f64()); + + let mut arr_copy = arr.to_owned(); + + let t0 = Instant::now(); + for _ in 0..n { arr_copy = sigmoid_ndarr_map(arr_copy); } + let elapsed = t0.elapsed(); + println!("sigmoid_ndarr_map took {:.5} seconds", elapsed.as_secs_f64()); + + arr_copy = arr.to_owned(); + let t0 = Instant::now(); + for _ in 0..n { arr_copy = sigmoid_ndarr_mapv(arr_copy); } + let elapsed = t0.elapsed(); + println!("sigmoid_ndarr_mapv took {:.5} seconds", elapsed.as_secs_f64()); + + arr_copy = arr.to_owned(); + let t0 = Instant::now(); + for _ in 0..n { arr_copy = sigmoid_ndarr_map_inplace(arr_copy); } + let elapsed = t0.elapsed(); + println!("sigmoid_ndarr_map_inplace took {:.5} seconds", elapsed.as_secs_f64()); + + arr_copy = arr.to_owned(); + let virt = ndarray::Array::from_elem(arr.raw_dim(), false); + let t0 = Instant::now(); + for _ in 0..n { arr_copy = sigmoid_ndarr_mapv_inplace(arr_copy, Some(&virt)); } + let elapsed = t0.elapsed(); + println!("sigmoid_ndarr_mapv_inplace took {:.5} seconds", elapsed.as_secs_f64()); + + arr_copy = arr.to_owned(); + let t0 = Instant::now(); + for _ in 0..n { arr_copy = sigmoid_ndarr_par_map_inplace(arr_copy); } + let elapsed = t0.elapsed(); + println!("sigmoid_ndarr_par_map_inplace took {:.5} seconds", elapsed.as_secs_f64()); + + arr_copy = arr.to_owned(); + let t0 = Instant::now(); + for _ in 0..n { arr_copy = sigmoid_ndarr_par_mapv_inplace(arr_copy); } + let elapsed = t0.elapsed(); + println!("sigmoid_ndarr_par_mapv_inplace took {:.5} seconds", elapsed.as_secs_f64()); + +} diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..d4b984a --- /dev/null +++ b/run.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env sh + +venv_dir=.venv + +run_tests() ( + echo "py $("${venv_dir}/bin/python" py/feed_forward.py "$1")" + echo "jl $(julia jl/feed_forward.jl "$1")" + echo "go $(go/target/feed_forward "$1")" + echo "rs $(rs/target/release/feed_forward "$1")" +) + +num_times=$1 +shift 1 + +echo 'Compiling Rust binaries' >&2 +cargo build -q --release --manifest-path rs/Cargo.toml + +echo 'Compiling Go binary' >&2 +cd go && go get && go build -o target/feed_forward feed_forward.go && cd .. + +[ -d "${venv_dir}" ] || ( + echo 'Creating Python virtual environment' && + python -m venv "${venv_dir}" && + "${venv_dir}/bin/pip" install -r py/requirements.txt &> /dev/null +) + +old="$IFS" +IFS='-' +echo "Generating $*-network test data" >&2 +IFS=$old +"${venv_dir}/bin/python" py/gen_data.py -q -Y "$@" + +echo "Running feed forward $num_times times" >&2 +run_tests "$num_times"