Move to separate repo

This commit is contained in:
Daniil Fajnberg 2022-07-20 11:13:04 +02:00
commit aebd885ab7
11 changed files with 660 additions and 0 deletions

13
.gitignore vendored Normal file
View File

@ -0,0 +1,13 @@
# Python virtual environment:
.venv/
# IDE project settings:
.vscode/
.idea/
# Compiled scripts/documents:
build/
target/
# Package lock files / version checksums / etc.:
go.sum
Cargo.lock
# Ephemeral testing data:
data/

137
go/feed_forward.go Normal file
View File

@ -0,0 +1,137 @@
package main
import (
"encoding/csv"
"fmt"
"math"
"os"
"path/filepath"
"strconv"
"time"
"gonum.org/v1/gonum/mat"
)
const DATA_DIR string = "data"
type layer struct {
weights *mat.Dense
biases *mat.Dense
}
func array_flatten(two_d_array [][]string) []float64 {
var result []float64
for _, line := range two_d_array {
for _, value := range line {
float_value, _ := strconv.ParseFloat(value, 64)
result = append(result, float_value)
}
}
return result
}
func load_matrix_from_csv(file_path string) (*mat.Dense, error) {
f, err := os.Open(file_path)
if err != nil {
return nil, err
}
defer f.Close()
data, err := csv.NewReader(f).ReadAll()
return mat.NewDense(len(data), len(data[0]), array_flatten(data)), err
}
func load_test_data(dir string) (*mat.Dense, []layer, error) {
inputs, err := load_matrix_from_csv(filepath.Join(dir, "inputs.csv"))
if err != nil { return nil, nil, err }
var layers []layer
var weights *mat.Dense
var biases *mat.Dense
var path string
for n := 1; n < 100; n++ {
path = filepath.Join(dir, fmt.Sprintf("weights%02d.csv", n))
_, err = os.Stat(path)
if err != nil { break }
weights, err = load_matrix_from_csv(path)
if err != nil { return nil, nil, err }
path = filepath.Join(dir, fmt.Sprintf("biases%02d.csv", n))
_, err = os.Stat(path)
if err != nil { break }
biases, err = load_matrix_from_csv(path)
if err != nil { return nil, nil, err }
layers = append(layers, layer{weights, biases})
}
return inputs, layers, nil
}
func sigmoid_scalar(x float64) float64 {
return 1 / (1 + math.Exp(-x))
}
func sigmoid_element(i, j int, value float64) float64 {
return sigmoid_scalar(value)
}
func sigmoid_matrix(x mat.Matrix) *mat.Dense {
var y mat.Dense
y.Apply(sigmoid_element, x)
return &y
}
func layer_func(inputs *mat.Dense, weights *mat.Dense, biases *mat.Dense, activation func(x mat.Matrix) *mat.Dense) *mat.Dense {
var output mat.Dense
output.Mul(weights, inputs)
output.Add(&output, biases)
return sigmoid_matrix(&output)
}
func feed_forward(x *mat.Dense, layers []layer) *mat.Dense {
var y *mat.Dense = x
for _, l := range layers {
y = layer_func(y, l.weights, l.biases, sigmoid_matrix)
}
return y
}
func time_feed_forward(n int) {
inputs, layers, _ := load_test_data(DATA_DIR)
t0 := time.Now()
for i := 0; i < n; i++ { feed_forward(inputs, layers) }
elapsed := time.Since(t0)
fmt.Printf("%.5f\n", elapsed.Seconds())
}
func examples() {
x := mat.NewDense(2, 1, []float64{0, 2})
w1 := mat.NewDense(2, 2, []float64{1, 0, 2, 4})
b1 := mat.NewDense(2, 1, []float64{0, 1})
w2 := mat.NewDense(2, 2, []float64{1, 0, 2, 4})
b2 := mat.NewDense(2, 1, []float64{-0.5, 1})
layers := []layer{{w1, b1}, {w2, b2}}
pre := " "
fmt.Printf("x1 = %v\n", mat.Formatted(x, mat.Prefix(pre)))
fmt.Printf("w1 = %v\n", mat.Formatted(w1, mat.Prefix(pre)))
fmt.Printf("b1 = %v\n", mat.Formatted(b1, mat.Prefix(pre)))
fmt.Println("σ(w1 * x1 + b1) = ")
x2 := layer_func(x, w1, b1, sigmoid_matrix)
fmt.Printf("x2 = %v\n\n", mat.Formatted(x2, mat.Prefix(pre)))
fmt.Printf("w2 = %v\n", mat.Formatted(w2, mat.Prefix(pre)))
fmt.Printf("b2 = %v\n", mat.Formatted(b2, mat.Prefix(pre)))
fmt.Println("σ(w2 * x2 + b2) = ")
x3 := feed_forward(x, layers)
fmt.Printf("x3 = %v\n", mat.Formatted(x3, mat.Prefix(pre)))
}
func main() {
n, err := strconv.Atoi(os.Args[1])
if err != nil {
fmt.Println(err)
os.Exit(2)
}
time_feed_forward(n)
}

5
go/go.mod Normal file
View File

@ -0,0 +1,5 @@
module feedforward.tests/something
go 1.18
require gonum.org/v1/gonum v0.11.0

64
jl/feed_forward.jl Normal file
View File

@ -0,0 +1,64 @@
using CSV, Tables, Glob
const TEST_DATA_DIR = joinpath(@__DIR__, "..", "data")
function csv_to_matrix(file_path, types=Float32)
return CSV.read(file_path, Tables.matrix, header=0, delim=',', types=types)
end
function load_test_data()
inputs_path = joinpath(TEST_DATA_DIR, "inputs.csv")
inputs = vec(csv_to_matrix(inputs_path))
weight_matrices, bias_vectors = Matrix{Float32}[], Vector{Float32}[]
for file_path in glob("weights*.csv", TEST_DATA_DIR)
push!(weight_matrices, csv_to_matrix(file_path))
end
for file_path in glob("biases*.csv", TEST_DATA_DIR)
push!(bias_vectors, vec(csv_to_matrix(file_path)))
end
return inputs, [zip(weight_matrices, bias_vectors)...]
end
function sigmoid(x)
return 1 ./ (1 .+ exp.(-x))
end
function layer_func(input_vector, weight_matrix, bias_vector)
return sigmoid(weight_matrix * input_vector + bias_vector)
end
function feed_forward(x, layers...)
for (w, b) in layers
x = layer_func(x, w, b)
end
return x
end
function main()
n = parse(Int32, ARGS[1])
x, weights_biases = load_test_data()
# # To get info about memory allocations etc.:
# @time feed_forward(x, weights_biases...)
# Call once to compile, then again to time
@elapsed feed_forward(x, weights_biases...)
t = @elapsed begin
for _ in 1:n
feed_forward(x, weights_biases...)
end
end
println(round(t, digits=5))
end
if abspath(PROGRAM_FILE) == @__FILE__
main()
end

48
py/feed_forward.py Normal file
View File

@ -0,0 +1,48 @@
import sys
from pathlib import Path
from timeit import timeit
import numpy as np
THIS_DIR = Path(__file__).parent
TEST_DATA_DIR = Path(THIS_DIR, '..', 'data')
def csv_to_array(file_path: Path, dtype=np.float32) -> np.ndarray:
return np.loadtxt(file_path, dtype=dtype, delimiter=',')
def load_test_data() -> tuple[np.ndarray, list[tuple[np.ndarray, np.ndarray]]]:
inputs = csv_to_array(Path(TEST_DATA_DIR, 'inputs.csv'))
weights_iter = (csv_to_array(p) for p in sorted(TEST_DATA_DIR.glob('weights*.csv')))
biases_iter = (csv_to_array(p) for p in sorted(TEST_DATA_DIR.glob('biases*.csv')))
return inputs, list(zip(weights_iter, biases_iter))
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def layer_func(input_vector: np.ndarray, weight_matrix: np.ndarray, bias_vector: np.ndarray) -> np.ndarray:
return sigmoid(np.matmul(weight_matrix, input_vector) + bias_vector)
def feed_forward(x: np.ndarray, *layers: tuple[np.ndarray, np.ndarray]) -> np.ndarray:
for w, b in layers:
x = layer_func(x, w, b)
return x
def main(n):
t = timeit(
'feed_forward(inp, *layers)',
setup='from __main__ import feed_forward, load_test_data;' +
'inp, layers = load_test_data()',
number=n
)
print(round(t, 5))
if __name__ == '__main__':
main(int(sys.argv[1]))

137
py/gen_data.py Normal file
View File

@ -0,0 +1,137 @@
import sys
from argparse import ArgumentParser
from pathlib import Path
from typing import Sequence
import numpy as np
THIS_DIR = Path(__file__).parent
DEFAULT_DATA_DIR = Path(THIS_DIR, '..', 'data')
INPUTS_FILE_NAME = 'inputs'
WEIGHTS_FILE_NAME = 'weights'
BIASES_FILE_NAME = 'biases'
RNG = np.random.default_rng()
def extension_with_dot(string: str) -> str:
string = string.strip()
if not string:
return string
return string if string.startswith('.') else '.' + string
def parse_cli(args: list[str] = None) -> dict:
parser = ArgumentParser(description="Create test data files (input vector, weight matrices, and bias vectors).")
parser.add_argument(
'num_inputs',
type=int,
metavar='inputs',
help="Number of input dimensions. Random values are generated from a uniform distribution between 0.0 and 1.0."
)
parser.add_argument(
'num_neurons',
nargs='+',
type=int,
metavar='neurons',
help="Number of neurons in a layer. A weights file and a biases file will be created for each value passed. "
"Random values are generated from a uniform distribution between -1.0 and 1.0."
)
parser.add_argument(
'-d', '--directory',
type=Path,
default=DEFAULT_DATA_DIR,
help=f"Target directory to create the generated files in. Defaults to '{DEFAULT_DATA_DIR}'."
)
parser.add_argument(
'-e', '--file-extension',
type=extension_with_dot,
default='.csv',
help="File extension to use for the generated file names. Defaults to '.csv'."
)
parser.add_argument(
'--fmt',
default='%f',
help="Passed as the `fmt` parameter to numpy's `savetxt` function. Defaults to '%%f'. "
"(https://numpy.org/doc/stable/reference/generated/numpy.savetxt.html)"
)
parser.add_argument(
'--delimiter',
default=',',
help="Passed as the `delimiter` parameter to numpy's `savetxt` function. Defaults to ','. "
"(https://numpy.org/doc/stable/reference/generated/numpy.savetxt.html)"
)
parser.add_argument(
'-q', '--quiet',
action='store_true',
help="If this flag is set, no additional info is printed throughout the script."
)
parser.add_argument(
'-Y', '--yes',
action='store_true',
help="If this flag is set, confirmation is assumed throughout the script."
)
return vars(parser.parse_args(args))
def prepare_directory(directory: Path, file_extension: str, yes: bool = False, quiet: bool = False) -> None:
directory.mkdir(exist_ok=True)
existing_files = list(directory.glob(f'*{file_extension}'))
if existing_files:
if yes:
delete = 'y'
else:
delete = input(f"{len(existing_files)} existing files with '{file_extension}' extension "
f"found in {directory}. Delete these first? [Y/n] ").strip().lower() or 'y'
if delete == 'y':
for file_path in existing_files:
file_path.unlink()
if not quiet:
print("Deleted existing files.")
elif delete != 'n':
raise ValueError
def generate_inputs(num_inputs: int, directory: Path, file_extension: str, quiet: bool = False, **kwargs) -> None:
inputs_file = Path(directory, INPUTS_FILE_NAME).with_suffix(file_extension)
input_vector = RNG.uniform(0.0, 1.0, size=num_inputs)
np.savetxt(inputs_file, input_vector, **kwargs)
if not quiet:
print(inputs_file, 'x'.join(str(n) for n in input_vector.shape))
def generate_layers(num_inputs: int, num_neurons: Sequence[int], directory: Path, file_extension: str,
quiet: bool = False, **kwargs) -> None:
weights_file = Path(directory, WEIGHTS_FILE_NAME).with_suffix(file_extension)
biases_file = Path(directory, BIASES_FILE_NAME).with_suffix(file_extension)
dim_before = num_inputs
for i, dim in enumerate(num_neurons, start=1):
weight_matrix = RNG.uniform(-1.0, 1.0, size=(dim, dim_before))
bias_vector = RNG.uniform(-1.0, 1.0, size=dim)
weights_file = weights_file.with_stem(f'{WEIGHTS_FILE_NAME}{i:02}')
biases_file = biases_file.with_stem(f'{BIASES_FILE_NAME}{i:02}')
np.savetxt(weights_file, weight_matrix, **kwargs)
np.savetxt(biases_file, bias_vector, **kwargs)
if not quiet:
print(weights_file, 'x'.join(str(n) for n in weight_matrix.shape))
print(biases_file, 'x'.join(str(n) for n in bias_vector.shape))
dim_before = dim
def generate_data(num_inputs: int, num_neurons: Sequence[int], directory: Path, file_extension: str,
quiet: bool = False, yes: bool = False, **kwargs) -> None:
prepare_directory(directory, file_extension, quiet, yes)
if not quiet:
print("Creating new test data...")
generate_inputs(num_inputs, directory, file_extension, quiet, **kwargs)
generate_layers(num_inputs, num_neurons, directory, file_extension, quiet, **kwargs)
def main() -> None:
generate_data(**parse_cli())
if __name__ == '__main__':
main()

1
py/requirements.txt Normal file
View File

@ -0,0 +1 @@
numpy

16
rs/Cargo.toml Normal file
View File

@ -0,0 +1,16 @@
[package]
name = "feed_forward"
version = "0.1.0"
edition = "2021"
[[bin]]
name = "feed_forward"
path = "src/feed_forward.rs"
[dependencies]
ndarray = { version = "0.15.0", features = ["rayon", "blas"] }
blas-src = { version = "0.8", features = ["openblas"] }
openblas-src = { version = "0.10", features = ["cblas", "system"] }
ndarray-rand = "0.14.0"
csv = "*"
glob = "*"

98
rs/src/feed_forward.rs Normal file
View File

@ -0,0 +1,98 @@
#![allow(dead_code)]
extern crate blas_src;
use std::env;
use std::fs::File;
use std::path::Path;
use std::time::Instant;
use csv;
use glob::glob;
use ndarray::{Array, Array1, Array2};
mod sigmoids; // different custom implementations of the sigmoid function
use sigmoids::{sigmoid_ndarr_mapv_inplace as sigmoid};
const DIR_DATA: &str = "data";
fn csv_row_to_float_vec(row: &csv::StringRecord) -> Vec<f32> {
return row.iter().map(
|item| {item.parse::<f32>().expect("Not a float")}
).collect();
}
fn csv_to_float_vec(file_path: &Path) -> (Vec<f32>, usize) {
let mut output: Vec<f32> = Vec::new();
let mut num_columns: usize = 0;
let file = File::open(file_path).expect("Can't open file");
let mut reader = csv::ReaderBuilder::new().has_headers(false).from_reader(file);
for result in reader.records() {
let row: Vec<f32> = csv_row_to_float_vec(&result.expect("No csv record"));
if num_columns == 0 {
num_columns = row.len();
}
output.extend(row);
}
return (output, num_columns);
}
fn csv_to_array1(file_path: &Path) -> Array1<f32> {
let (vec, _) = csv_to_float_vec(&file_path);
return Array::from_shape_vec((vec.len(), ), vec).unwrap();
}
fn csv_to_array2(file_path: &Path) -> Array2<f32> {
let (vec, num_colums) = csv_to_float_vec(&file_path);
return Array2::from_shape_vec((vec.len() / num_colums, num_colums), vec).unwrap();
}
fn load_test_data() -> (Array1<f32>, Vec<(Array2<f32>, Array1<f32>)>) {
let test_data_dir = Path::new(".").join(DIR_DATA);
let inputs: Array1<f32> = csv_to_array1(&test_data_dir.join("inputs.csv"));
let weights_glob = test_data_dir.join("weights*.csv");
let biases_glob = test_data_dir.join("biases*.csv");
let weights_iter = glob(weights_glob.to_str().unwrap()).unwrap().map(
|path_result| {csv_to_array2(&path_result.unwrap())}
);
let biases_iter = glob(biases_glob.to_str().unwrap()).unwrap().map(
|path_result| {csv_to_array1(&path_result.unwrap())}
);
return (inputs, weights_iter.zip(biases_iter).collect());
}
fn layer_func<F>(input_vector: &Array1<f32>, weight_matrix: &Array2<f32>,
bias_vector: &Array1<f32>, activation: &F,
virtual_vector: Option<&Array1<bool>>) -> Array1<f32>
where F: Fn(Array1<f32>, Option<&Array1<bool>>) -> Array1<f32> {
return activation(weight_matrix.dot(input_vector) + bias_vector, virtual_vector)
}
fn feed_forward<F>(x: &Array1<f32>, layers: &[(Array2<f32>, Array1<f32>)], activation: &F) -> Array1<f32>
where F: Fn(Array1<f32>, Option<&Array1<bool>>) -> Array1<f32> {
let mut y = x.to_owned();
for (w, b) in layers {
y = layer_func(&y, w, b, activation, Some(&Array::from_elem(b.raw_dim(), false)))
}
return y;
}
fn time_feed_forward(n: usize) {
let (inp, layers) = load_test_data();
let t0 = Instant::now();
for _ in 0..n { feed_forward(&inp, &layers, &sigmoid); }
let elapsed = t0.elapsed();
println!("{:.5}", elapsed.as_secs_f64());
}
fn main() {
let args: Vec<String> = env::args().collect();
let n: usize = args[1].parse::<usize>().expect("Not an integer");
time_feed_forward(n);
}

107
rs/src/sigmoids.rs Normal file
View File

@ -0,0 +1,107 @@
use std::time::Instant;
use ndarray::{Array1, Zip};
use ndarray_rand::{RandomExt, rand_distr::Uniform};
pub fn sigmoid_val(x: f32) -> f32 {
return 1. / (1. + (-x).exp());
}
pub fn sigmoid_ref(x: &f32) -> f32 {
return 1. / (1. + (-x).exp());
}
pub fn sigmoid_mut_ref(x: &mut f32) {
*x = 1. / (1. + (-*x).exp());
}
pub fn sigmoid_vec(x: &Vec<f32>) -> Vec<f32> {
return x.iter().map(|&v| sigmoid_val(v)).collect();
}
pub fn sigmoid_ndarr_map(x: Array1<f32>) -> Array1<f32> {
return x.map(sigmoid_ref);
}
pub fn sigmoid_ndarr_mapv(x: Array1<f32>) -> Array1<f32> {
return x.mapv(sigmoid_val);
}
pub fn sigmoid_ndarr_map_inplace(mut x: Array1<f32>) -> Array1<f32> {
x.map_inplace(sigmoid_mut_ref);
return x;
}
pub fn sigmoid_ndarr_mapv_inplace(mut x: Array1<f32>, virt: Option<&Array1<bool>>) -> Array1<f32> {
if virt.is_some() {
Zip::from(&mut x).and(virt.unwrap()).for_each(
|elem, is_virt| *elem = if *is_virt { *elem } else { sigmoid_ref(elem) }
);
} else {
x.mapv_inplace(sigmoid_val);
}
return x;
}
pub fn sigmoid_ndarr_par_map_inplace(mut x: Array1<f32>) -> Array1<f32> {
x.par_map_inplace(sigmoid_mut_ref);
return x;
}
pub fn sigmoid_ndarr_par_mapv_inplace(mut x: Array1<f32>) -> Array1<f32> {
x.par_mapv_inplace(sigmoid_val);
return x;
}
pub fn time_sigmoids(n: usize, arr_len: usize) {
let arr: Array1<f32> = Array1::random(arr_len, Uniform::new(0., 1.));
let vec: Vec<f32> = arr.to_vec();
let t0 = Instant::now();
for _ in 0..n { let _result = sigmoid_vec(&vec); }
let elapsed = t0.elapsed();
println!("sigmoid_vec took {:.5} seconds", elapsed.as_secs_f64());
let mut arr_copy = arr.to_owned();
let t0 = Instant::now();
for _ in 0..n { arr_copy = sigmoid_ndarr_map(arr_copy); }
let elapsed = t0.elapsed();
println!("sigmoid_ndarr_map took {:.5} seconds", elapsed.as_secs_f64());
arr_copy = arr.to_owned();
let t0 = Instant::now();
for _ in 0..n { arr_copy = sigmoid_ndarr_mapv(arr_copy); }
let elapsed = t0.elapsed();
println!("sigmoid_ndarr_mapv took {:.5} seconds", elapsed.as_secs_f64());
arr_copy = arr.to_owned();
let t0 = Instant::now();
for _ in 0..n { arr_copy = sigmoid_ndarr_map_inplace(arr_copy); }
let elapsed = t0.elapsed();
println!("sigmoid_ndarr_map_inplace took {:.5} seconds", elapsed.as_secs_f64());
arr_copy = arr.to_owned();
let virt = ndarray::Array::from_elem(arr.raw_dim(), false);
let t0 = Instant::now();
for _ in 0..n { arr_copy = sigmoid_ndarr_mapv_inplace(arr_copy, Some(&virt)); }
let elapsed = t0.elapsed();
println!("sigmoid_ndarr_mapv_inplace took {:.5} seconds", elapsed.as_secs_f64());
arr_copy = arr.to_owned();
let t0 = Instant::now();
for _ in 0..n { arr_copy = sigmoid_ndarr_par_map_inplace(arr_copy); }
let elapsed = t0.elapsed();
println!("sigmoid_ndarr_par_map_inplace took {:.5} seconds", elapsed.as_secs_f64());
arr_copy = arr.to_owned();
let t0 = Instant::now();
for _ in 0..n { arr_copy = sigmoid_ndarr_par_mapv_inplace(arr_copy); }
let elapsed = t0.elapsed();
println!("sigmoid_ndarr_par_mapv_inplace took {:.5} seconds", elapsed.as_secs_f64());
}

34
run.sh Executable file
View File

@ -0,0 +1,34 @@
#!/usr/bin/env sh
venv_dir=.venv
run_tests() (
echo "py $("${venv_dir}/bin/python" py/feed_forward.py "$1")"
echo "jl $(julia jl/feed_forward.jl "$1")"
echo "go $(go/target/feed_forward "$1")"
echo "rs $(rs/target/release/feed_forward "$1")"
)
num_times=$1
shift 1
echo 'Compiling Rust binaries' >&2
cargo build -q --release --manifest-path rs/Cargo.toml
echo 'Compiling Go binary' >&2
cd go && go get && go build -o target/feed_forward feed_forward.go && cd ..
[ -d "${venv_dir}" ] || (
echo 'Creating Python virtual environment' &&
python -m venv "${venv_dir}" &&
"${venv_dir}/bin/pip" install -r py/requirements.txt &> /dev/null
)
old="$IFS"
IFS='-'
echo "Generating $*-network test data" >&2
IFS=$old
"${venv_dir}/bin/python" py/gen_data.py -q -Y "$@"
echo "Running feed forward $num_times times" >&2
run_tests "$num_times"