generated from daniil-berg/boilerplate-py
refactor entire package; add CLI
This commit is contained in:
parent
be3aebaf07
commit
7c9f97f21b
2
.gitignore
vendored
2
.gitignore
vendored
@ -8,3 +8,5 @@
|
|||||||
/dist/
|
/dist/
|
||||||
# Python cache:
|
# Python cache:
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
|
||||||
|
saved_models/
|
||||||
|
@ -6,16 +6,26 @@ from .config import CONFIG
|
|||||||
|
|
||||||
|
|
||||||
CMD = 'command'
|
CMD = 'command'
|
||||||
|
|
||||||
TRAIN = 'train'
|
TRAIN = 'train'
|
||||||
|
|
||||||
DATA_DIR = 'data_dir'
|
DATA_DIR = 'data_dir'
|
||||||
SAVE_DIR = 'save_dir'
|
SAVE_DIR = 'save_dir'
|
||||||
FILE_EXTENSIONS = 'file_extensions'
|
FILE_EXT = 'file_ext'
|
||||||
BATCH_SIZE = 'batch_size'
|
BATCH_SIZE = 'batch_size'
|
||||||
|
VALIDATION_RATIO = 'validation_ratio'
|
||||||
|
IMG_WIDTH = 'img_width'
|
||||||
|
IMG_HEIGHT = 'img_height'
|
||||||
NUM_EPOCHS = 'num_epochs'
|
NUM_EPOCHS = 'num_epochs'
|
||||||
EARLY_STOPPING_PATIENCE = 'early_stopping_patience'
|
EARLY_STOPPING_PATIENCE = 'early_stopping_patience'
|
||||||
|
_PREPROCESSING_KEYS = (DATA_DIR, FILE_EXT, BATCH_SIZE, VALIDATION_RATIO, IMG_WIDTH, IMG_HEIGHT)
|
||||||
|
_TRAINING_KEYS = (SAVE_DIR, NUM_EPOCHS, EARLY_STOPPING_PATIENCE)
|
||||||
|
|
||||||
INFER = 'infer'
|
INFER = 'infer'
|
||||||
MODEL_DIR = 'model_dir'
|
MODEL_DIR = 'model_dir'
|
||||||
|
IMAGES_DIR = 'images_dir'
|
||||||
|
IMAGE_FILES = 'image_files'
|
||||||
|
PLOT_RESULTS = 'plot_results'
|
||||||
|
|
||||||
|
|
||||||
def ext_list(string: str) -> list[str]:
|
def ext_list(string: str) -> list[str]:
|
||||||
@ -34,14 +44,14 @@ def parse_cli(args: Sequence[str] = None) -> dict[str, Any]:
|
|||||||
description="Character CAPTCHA Solver",
|
description="Character CAPTCHA Solver",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-E', f'--{FILE_EXTENSIONS.replace("_", "-")}',
|
'-E', f'--{FILE_EXT.replace("_", "-")}',
|
||||||
default=CONFIG.DEFAULT_DATA_FILE_EXTENSIONS,
|
default=CONFIG.DEFAULT_IMG_FILE_EXT,
|
||||||
type=ext_list,
|
type=ext_list,
|
||||||
help=f"When used in `{TRAIN}` mode, extensions of the image files to be used for training/testing the model. "
|
help=f"When used in `{TRAIN}` mode, extensions of the image files to be used for training/testing the model. "
|
||||||
f"When used in `{INFER}` mode, extensions of the image files to use the model on."
|
f"When used in `{INFER}` mode, extensions of the image files to use the model on."
|
||||||
f"Defaults to {CONFIG.DEFAULT_DATA_FILE_EXTENSIONS}."
|
f"Defaults to {CONFIG.DEFAULT_IMG_FILE_EXT}."
|
||||||
)
|
)
|
||||||
subparsers = parser.add_subparsers(dest=CMD)
|
subparsers = parser.add_subparsers(dest=CMD, title="Commands")
|
||||||
|
|
||||||
parser_train = subparsers.add_parser(TRAIN, help="trains a new model")
|
parser_train = subparsers.add_parser(TRAIN, help="trains a new model")
|
||||||
parser_train.add_argument(
|
parser_train.add_argument(
|
||||||
@ -49,7 +59,9 @@ def parse_cli(args: Sequence[str] = None) -> dict[str, Any]:
|
|||||||
type=Path,
|
type=Path,
|
||||||
help="Directory containing the image files to be used for training/testing the model."
|
help="Directory containing the image files to be used for training/testing the model."
|
||||||
)
|
)
|
||||||
parser_train.add_argument(
|
preprocessing_group = parser_train.add_argument_group("Preprocessing options")
|
||||||
|
training_group = parser_train.add_argument_group("Training options")
|
||||||
|
training_group.add_argument(
|
||||||
'-s', f'--{SAVE_DIR.replace("_", "-")}',
|
'-s', f'--{SAVE_DIR.replace("_", "-")}',
|
||||||
default=CONFIG.DEFAULT_SAVE_DIR,
|
default=CONFIG.DEFAULT_SAVE_DIR,
|
||||||
type=Path,
|
type=Path,
|
||||||
@ -57,20 +69,40 @@ def parse_cli(args: Sequence[str] = None) -> dict[str, Any]:
|
|||||||
f"current date and time will be created there and the model will be saved in that subdirectory. "
|
f"current date and time will be created there and the model will be saved in that subdirectory. "
|
||||||
f"Defaults to '{CONFIG.DEFAULT_SAVE_DIR}'."
|
f"Defaults to '{CONFIG.DEFAULT_SAVE_DIR}'."
|
||||||
)
|
)
|
||||||
parser_train.add_argument(
|
preprocessing_group.add_argument(
|
||||||
'-b', f'--{BATCH_SIZE.replace("_", "-")}',
|
'-b', f'--{BATCH_SIZE.replace("_", "-")}',
|
||||||
default=CONFIG.DEFAULT_BATCH_SIZE,
|
default=CONFIG.DEFAULT_BATCH_SIZE,
|
||||||
type=int,
|
type=int,
|
||||||
help=f"The dataset will be divided into batches; this determines the number of images in each batch. "
|
help=f"The dataset will be divided into batches; this determines the number of images in each batch. "
|
||||||
f"Defaults to {CONFIG.DEFAULT_BATCH_SIZE}."
|
f"Defaults to {CONFIG.DEFAULT_BATCH_SIZE}."
|
||||||
)
|
)
|
||||||
parser_train.add_argument(
|
preprocessing_group.add_argument(
|
||||||
|
'-r', f'--{VALIDATION_RATIO.replace("_", "-")}',
|
||||||
|
default=CONFIG.DEFAULT_VALIDATION_RATIO,
|
||||||
|
type=float,
|
||||||
|
help=f"The dataset will split into training and validation data; this argument should be a float between 0 "
|
||||||
|
f"and 1 determining the relative size of the validation dataset to the whole dataset. "
|
||||||
|
f"Defaults to {round(CONFIG.DEFAULT_VALIDATION_RATIO, 3)}."
|
||||||
|
)
|
||||||
|
preprocessing_group.add_argument(
|
||||||
|
'-W', f'--{IMG_WIDTH.replace("_", "-")}',
|
||||||
|
default=CONFIG.DEFAULT_IMG_WIDTH,
|
||||||
|
type=int,
|
||||||
|
help=f"The width of an image in pixels. Defaults to {CONFIG.DEFAULT_IMG_WIDTH}."
|
||||||
|
)
|
||||||
|
preprocessing_group.add_argument(
|
||||||
|
'-H', f'--{IMG_HEIGHT.replace("_", "-")}',
|
||||||
|
default=CONFIG.DEFAULT_IMG_HEIGHT,
|
||||||
|
type=int,
|
||||||
|
help=f"The height of an image in pixels. Defaults to {CONFIG.DEFAULT_IMG_HEIGHT}."
|
||||||
|
)
|
||||||
|
training_group.add_argument(
|
||||||
'-n', f'--{NUM_EPOCHS.replace("_", "-")}',
|
'-n', f'--{NUM_EPOCHS.replace("_", "-")}',
|
||||||
default=CONFIG.DEFAULT_NUM_EPOCHS,
|
default=CONFIG.DEFAULT_NUM_EPOCHS,
|
||||||
type=int,
|
type=int,
|
||||||
help=f"The number of training epochs. Defaults to {CONFIG.DEFAULT_NUM_EPOCHS}."
|
help=f"The number of training epochs. Defaults to {CONFIG.DEFAULT_NUM_EPOCHS}."
|
||||||
)
|
)
|
||||||
parser_train.add_argument(
|
training_group.add_argument(
|
||||||
'-p', f'--{EARLY_STOPPING_PATIENCE.replace("_", "-")}',
|
'-p', f'--{EARLY_STOPPING_PATIENCE.replace("_", "-")}',
|
||||||
default=CONFIG.DEFAULT_EARLY_STOPPING_PATIENCE,
|
default=CONFIG.DEFAULT_EARLY_STOPPING_PATIENCE,
|
||||||
type=int,
|
type=int,
|
||||||
@ -85,10 +117,24 @@ def parse_cli(args: Sequence[str] = None) -> dict[str, Any]:
|
|||||||
type=Path,
|
type=Path,
|
||||||
help="Directory containing the model to use for inference."
|
help="Directory containing the model to use for inference."
|
||||||
)
|
)
|
||||||
parser_infer.add_argument(
|
data_group = parser_infer.add_mutually_exclusive_group()
|
||||||
DATA_DIR,
|
data_group.add_argument(
|
||||||
|
'-f', f'--{IMAGE_FILES.replace("_", "-")}',
|
||||||
type=Path,
|
type=Path,
|
||||||
help="Directory containing the image files to use the model on."
|
nargs='*',
|
||||||
|
metavar='PATH',
|
||||||
|
help="Paths to image files to use the model on."
|
||||||
|
)
|
||||||
|
data_group.add_argument(
|
||||||
|
'-d', f'--{IMAGES_DIR.replace("_", "-")}',
|
||||||
|
type=Path,
|
||||||
|
metavar='PATH',
|
||||||
|
help="Path to directory containing the image files to use the model on."
|
||||||
|
)
|
||||||
|
parser_infer.add_argument(
|
||||||
|
'-p', f'--{PLOT_RESULTS.replace("_", "-")}',
|
||||||
|
action='store_true',
|
||||||
|
help="If set, a plot will be displayed, showing the images with the inferred labels."
|
||||||
)
|
)
|
||||||
return vars(parser.parse_args(args))
|
return vars(parser.parse_args(args))
|
||||||
|
|
||||||
@ -98,12 +144,15 @@ def main() -> None:
|
|||||||
cmd = kwargs.pop(CMD)
|
cmd = kwargs.pop(CMD)
|
||||||
if cmd == TRAIN:
|
if cmd == TRAIN:
|
||||||
from .model import start
|
from .model import start
|
||||||
start(**kwargs)
|
from .preprocess import load_datasets
|
||||||
|
pre_kwargs = {k: kwargs.pop(k) for k in _PREPROCESSING_KEYS}
|
||||||
|
training_data, validation_data, vocabulary = load_datasets(**pre_kwargs)
|
||||||
|
start(training_data, validation_data, vocabulary, **kwargs)
|
||||||
elif cmd == INFER:
|
elif cmd == INFER:
|
||||||
from .infer import start
|
from .infer import start
|
||||||
start(**kwargs)
|
start(**kwargs)
|
||||||
else:
|
else:
|
||||||
raise NotImplemented
|
raise SystemExit # Should be unreachable since argument parser will throw an error earlier
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -5,16 +5,29 @@ class CONFIG(object):
|
|||||||
__slots__ = ()
|
__slots__ = ()
|
||||||
PROGRAM_NAME = 'ccaptchas'
|
PROGRAM_NAME = 'ccaptchas'
|
||||||
|
|
||||||
DEFAULT_SAVE_DIR = Path('.', 'saved_models')
|
EXT_PNG, EXT_JPG = '.png', '.jpg'
|
||||||
DEFAULT_DATA_FILE_EXTENSIONS = ('.jpg', '.png')
|
DEFAULT_IMG_FILE_EXT = (EXT_PNG, EXT_JPG)
|
||||||
DEFAULT_BATCH_SIZE = 10
|
|
||||||
|
# StringLookup parameters:
|
||||||
|
DEFAULT_NUM_OOV_INDICES = 0 # assuming no out-of-vocabulary (OOV) characters will be encountered
|
||||||
|
DEFAULT_MASK_TOKEN = None # assuming there will never be a value missing
|
||||||
|
|
||||||
|
# Data splitting:
|
||||||
|
DEFAULT_VALIDATION_RATIO = 1 / 8
|
||||||
|
DEFAULT_SHUFFLE_DATA = True
|
||||||
|
|
||||||
|
# Image processing:
|
||||||
|
DEFAULT_IMG_WIDTH, DEFAULT_IMG_HEIGHT = 250, 50 # Desired image dimensions
|
||||||
|
|
||||||
|
# Training hyper-parameters:
|
||||||
|
DEFAULT_BATCH_SIZE = 16
|
||||||
DEFAULT_NUM_EPOCHS = 100
|
DEFAULT_NUM_EPOCHS = 100
|
||||||
DEFAULT_EARLY_STOPPING_PATIENCE = 10
|
DEFAULT_EARLY_STOPPING_PATIENCE = 10
|
||||||
|
|
||||||
VALIDATION_DATA_RATIO = 1 / 8
|
DEFAULT_SAVE_DIR = Path('.', 'saved_models')
|
||||||
SHUFFLE_DATA = True
|
MODEL_NAME = f'{PROGRAM_NAME}_model'
|
||||||
INPUT_LAYER_NAME_IMAGE, INPUT_LAYER_NAME_LABEL = 'image', 'label'
|
LAYER_NAME_INPUT_IMAGE, LAYER_NAME_INPUT_LABEL = 'image', 'label'
|
||||||
OUTPUT_LAYER_NAME = 'encoded_output'
|
LAYER_NAME_OUTPUT = 'encoded_output'
|
||||||
MAX_STRING_LENGTH = 6 # Maximum number of character in any captcha image in the dataset
|
MAX_STRING_LENGTH = 6 # Maximum number of character in any captcha image in the dataset
|
||||||
IMG_WIDTH, IMG_HEIGHT = 250, 50 # Desired image dimensions
|
|
||||||
VOCABULARY_FILE_NAME = '.vocabulary'
|
VOCABULARY_FILE_NAME = '.vocabulary'
|
||||||
|
HISTORY_FILE_NAME = '.history.json'
|
||||||
|
24
src/ccaptchas/ctc_layer.py
Normal file
24
src/ccaptchas/ctc_layer.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
import tensorflow as tf
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from .keras.backend import ctc_batch_cost
|
||||||
|
from .keras.layers import Layer
|
||||||
|
|
||||||
|
|
||||||
|
class CTCLayer(Layer):
|
||||||
|
def __init__(self, name: str = None):
|
||||||
|
super().__init__(name=name)
|
||||||
|
self.loss_fn = ctc_batch_cost
|
||||||
|
|
||||||
|
def call(self, y_true: np.ndarray = None, y_pred: np.ndarray = None) -> np.ndarray:
|
||||||
|
# Compute the training-time loss value and add it
|
||||||
|
# to the layer using `self.add_loss()`.
|
||||||
|
batch_len = tf.cast(tf.shape(y_true)[0], dtype='int64')
|
||||||
|
input_length = tf.cast(tf.shape(y_pred)[1], dtype='int64')
|
||||||
|
label_length = tf.cast(tf.shape(y_true)[1], dtype='int64')
|
||||||
|
input_length = input_length * tf.ones(shape=(batch_len, 1), dtype='int64')
|
||||||
|
label_length = label_length * tf.ones(shape=(batch_len, 1), dtype='int64')
|
||||||
|
loss = self.loss_fn(y_true, y_pred, input_length, label_length)
|
||||||
|
self.add_loss(loss)
|
||||||
|
# At test time, just return the computed predictions
|
||||||
|
return y_pred
|
@ -1,63 +1,78 @@
|
|||||||
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterable
|
from typing import Iterable, Sequence
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from keras.api._v2.keras.models import Model, load_model
|
from keras.api._v2.keras.models import Model, load_model
|
||||||
from keras.api._v2.keras.layers.experimental.preprocessing import StringLookup
|
from keras.api._v2.keras.layers import StringLookup
|
||||||
from keras.api._v2.keras.backend import ctc_decode
|
from keras.api._v2.keras.backend import ctc_decode
|
||||||
|
|
||||||
from .config import CONFIG
|
from .config import CONFIG
|
||||||
from .preproc import encode_image, decode_label, plot_images
|
from .preprocess import process_image, decode_label, find_image_files, get_lookup_table
|
||||||
from .types import PathT
|
from .types import PathT, ImgT, Array
|
||||||
|
from .visualize import plot_images
|
||||||
|
|
||||||
|
|
||||||
def images_to_input(*images) -> tf.data.Dataset:
|
def process_predictions(predictions: tf.Tensor) -> tf.Tensor:
|
||||||
array = np.array([encode_image(img) for img in images])
|
num_predictions = predictions.shape[0] # corresponds to the number of images passed into the model for inference
|
||||||
return tf.data.Dataset.from_tensor_slices(array)
|
output_width = predictions.shape[1] # corresponds to the (down-sampled) width of an image
|
||||||
|
# It is worth noting that `predictions.shape[2]` corresponds to the size of the vocabulary + 1,
|
||||||
|
# i.e. one more than the number of distinct characters that can occur in a label.
|
||||||
|
|
||||||
|
# Since the `predictions` tensor is the output of a softmax activation function, we need to decode the values along
|
||||||
|
# the "width axis" from arrays of floats between 0 and 1 to single integers representing the inferred characters.
|
||||||
|
# (see CTC concepts)
|
||||||
|
|
||||||
|
# Construct 1D array, each element representing the width of a single prediction, i.e. the down-sampled image width:
|
||||||
|
seq_lengths = np.ones(num_predictions) * output_width
|
||||||
|
# Retrieve the sequences of label indices inferred by the model:
|
||||||
|
sequences, _probabilities = ctc_decode(predictions, input_length=seq_lengths, greedy=True)
|
||||||
|
# Since we use a greedy approach, only one sequence per prediction is returned, so we discard the other dimensions:
|
||||||
|
sequences = sequences[0]
|
||||||
|
# Now this is a 2D tensor, for which `sequences.shape[0]` corresponds to the number of samples/images,
|
||||||
|
# while `sequences.shape[1]` corresponds to the size of the vocabulary + 1.
|
||||||
|
# Assuming n characters were inferred, the first n elements of each array will be the label indices of those
|
||||||
|
# characters, whereas the rest of the elements will be -1, implying blank labels. Since we know the maximum length
|
||||||
|
# a string of characters in an image can have, we can discard all those labels, that must be blank.
|
||||||
|
# What we are then left with, will be an array of relevant label indices for each image passed through the model.
|
||||||
|
# Using a backward lookup table, these can later be easily decoded to the actual characters.
|
||||||
|
return sequences[:, :CONFIG.MAX_STRING_LENGTH]
|
||||||
|
|
||||||
|
|
||||||
def decode_batch_outputs(predictions, backward_lookup_table: StringLookup) -> list[str]:
|
def load_inference_model(model_dir: PathT) -> tuple[Model, StringLookup]:
|
||||||
input_len = np.ones(predictions.shape[0]) * predictions.shape[1]
|
with open(Path(model_dir, CONFIG.VOCABULARY_FILE_NAME), 'r') as vocab_file:
|
||||||
# Use greedy search. For complex tasks, you can use beam search
|
backward_lookup = get_lookup_table(vocab_file.read(), invert=True)
|
||||||
sequences, _ = ctc_decode(predictions, input_length=input_len, greedy=True)
|
saved_model = load_model(model_dir)
|
||||||
results = sequences[0][:, :CONFIG.MAX_STRING_LENGTH]
|
|
||||||
# Iterate over the results and get back the text
|
|
||||||
return [decode_label(result, backward_lookup_table) for result in results]
|
|
||||||
|
|
||||||
|
|
||||||
def load_inference_model(path: PathT) -> Model:
|
|
||||||
with open(Path(path, CONFIG.VOCABULARY_FILE_NAME), 'r') as vocab_file:
|
|
||||||
backward_lookup_table = StringLookup(vocabulary=list(vocab_file.read()), mask_token=None, invert=True)
|
|
||||||
saved_model = load_model(path)
|
|
||||||
inference_model = Model(
|
inference_model = Model(
|
||||||
saved_model.get_layer(name=CONFIG.INPUT_LAYER_NAME_IMAGE).input,
|
saved_model.get_layer(name=CONFIG.LAYER_NAME_INPUT_IMAGE).input,
|
||||||
saved_model.get_layer(name=CONFIG.OUTPUT_LAYER_NAME).output
|
saved_model.get_layer(name=CONFIG.LAYER_NAME_OUTPUT).output
|
||||||
)
|
)
|
||||||
|
return inference_model, backward_lookup
|
||||||
def infer_and_decode(x: tf.data.Dataset) -> list[str]:
|
|
||||||
return decode_batch_outputs(predictions=inference_model.predict(x), backward_lookup_table=backward_lookup_table)
|
|
||||||
|
|
||||||
inference_model.infer_and_decode = infer_and_decode
|
|
||||||
inference_model.backward_lookup_table = backward_lookup_table
|
|
||||||
return inference_model
|
|
||||||
|
|
||||||
|
|
||||||
def start(model_dir: PathT, data_dir: PathT,
|
def predict_and_decode(images: Sequence[ImgT], model: Model, backward_lookup: StringLookup) -> tuple[Array, list[str]]:
|
||||||
file_extensions: Iterable[str] = CONFIG.DEFAULT_DATA_FILE_EXTENSIONS) -> None:
|
dataset = np.array([process_image(img) for img in images])
|
||||||
data_dir = Path(data_dir)
|
encoded_labels = process_predictions(model.predict(dataset))
|
||||||
file_paths = []
|
return dataset, [decode_label(label, backward_lookup) for label in encoded_labels]
|
||||||
for ext in file_extensions:
|
|
||||||
file_paths.extend(data_dir.glob(f'*{ext}'))
|
|
||||||
file_paths.sort()
|
def load_and_infer(images: Sequence[ImgT], model_dir: PathT, plot_results: bool = False) -> list[str]:
|
||||||
count = len(file_paths)
|
model, backward_lookup = load_inference_model(model_dir)
|
||||||
if count > 24:
|
images, labels = predict_and_decode(images, model, backward_lookup)
|
||||||
raise ValueError("Too many files")
|
if plot_results:
|
||||||
# images = []
|
per_plot = 24
|
||||||
# for path in file_paths:
|
for i in range(0, len(images), per_plot):
|
||||||
# with open(path, 'rb') as f:
|
plot_images(images[i:(i + per_plot)], labels=labels[i:(i + per_plot)])
|
||||||
# images.append(f.read())
|
return labels
|
||||||
dataset = images_to_input(*file_paths)
|
|
||||||
model = load_inference_model(model_dir)
|
|
||||||
labels = model.infer_and_decode(dataset.batch(count))
|
def start(model_dir: PathT, image_files: Sequence[Path] = (), images_dir: PathT = None,
|
||||||
plot_images(list(dataset.as_numpy_iterator()), labels=labels)
|
file_ext: Iterable[str] = CONFIG.DEFAULT_IMG_FILE_EXT, plot_results: bool = False) -> None:
|
||||||
|
if images_dir is not None:
|
||||||
|
image_files = sorted(find_image_files(images_dir, file_ext=file_ext))
|
||||||
|
if not image_files:
|
||||||
|
image_files = [sys.stdin.buffer.read()]
|
||||||
|
labels = load_and_infer(image_files, model_dir, plot_results=plot_results)
|
||||||
|
for label in labels:
|
||||||
|
print(label)
|
||||||
|
0
src/ccaptchas/keras/__init__.py
Normal file
0
src/ccaptchas/keras/__init__.py
Normal file
1
src/ccaptchas/keras/backend.py
Normal file
1
src/ccaptchas/keras/backend.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from keras.api._v2.keras.backend import *
|
1
src/ccaptchas/keras/callbacks.py
Normal file
1
src/ccaptchas/keras/callbacks.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from keras.api._v2.keras.callbacks import *
|
1
src/ccaptchas/keras/layers.py
Normal file
1
src/ccaptchas/keras/layers.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from keras.api._v2.keras.layers import *
|
1
src/ccaptchas/keras/models.py
Normal file
1
src/ccaptchas/keras/models.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from keras.api._v2.keras.models import *
|
1
src/ccaptchas/keras/optimizers.py
Normal file
1
src/ccaptchas/keras/optimizers.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from keras.api._v2.keras.optimizers import *
|
@ -1,66 +1,40 @@
|
|||||||
import os
|
import logging
|
||||||
|
import json
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterable
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from tensorflow import keras
|
|
||||||
from tensorflow.keras import layers
|
|
||||||
|
|
||||||
from .config import CONFIG
|
from .config import CONFIG
|
||||||
from .preproc import DatasetsInterface
|
from .ctc_layer import CTCLayer
|
||||||
|
from .keras.callbacks import EarlyStopping, History
|
||||||
|
from .keras.layers import Bidirectional, Conv2D, Dense, Dropout, Input, LSTM, MaxPooling2D, Reshape
|
||||||
|
from .keras.models import Model
|
||||||
|
from .keras.optimizers import Adam, Optimizer
|
||||||
from .types import PathT
|
from .types import PathT
|
||||||
|
|
||||||
|
|
||||||
THIS_DIR = os.path.dirname(os.path.realpath(__file__))
|
log = logging.getLogger(__name__)
|
||||||
# Build paths relative to this file's directory like this: os_path.join(THIS_DIR, ...)
|
|
||||||
|
|
||||||
|
|
||||||
class CTCLayer(layers.Layer):
|
|
||||||
def __init__(self, name: str = None):
|
|
||||||
super().__init__(name=name)
|
|
||||||
self.loss_fn = keras.backend.ctc_batch_cost
|
|
||||||
|
|
||||||
def call(self, y_true: np.ndarray = None, y_pred: np.ndarray = None) -> np.ndarray:
|
|
||||||
# Compute the training-time loss value and add it
|
|
||||||
# to the layer using `self.add_loss()`.
|
|
||||||
batch_len = tf.cast(tf.shape(y_true)[0], dtype='int64')
|
|
||||||
input_length = tf.cast(tf.shape(y_pred)[1], dtype='int64')
|
|
||||||
label_length = tf.cast(tf.shape(y_true)[1], dtype='int64')
|
|
||||||
input_length = input_length * tf.ones(shape=(batch_len, 1), dtype='int64')
|
|
||||||
label_length = label_length * tf.ones(shape=(batch_len, 1), dtype='int64')
|
|
||||||
loss = self.loss_fn(y_true, y_pred, input_length, label_length)
|
|
||||||
self.add_loss(loss)
|
|
||||||
# At test time, just return the computed predictions
|
|
||||||
return y_pred
|
|
||||||
|
|
||||||
|
|
||||||
# Factor by which the image is going to be downsampled
|
|
||||||
# by the convolutional blocks. We will be using two
|
|
||||||
# convolution blocks and each block will have
|
|
||||||
# a pooling layer which downsample the features by a factor of 2.
|
|
||||||
# Hence total downsampling factor would be 4.
|
|
||||||
downsample_factor = 4
|
|
||||||
|
|
||||||
|
|
||||||
def build_model(alphabet_size: int,
|
def build_model(alphabet_size: int,
|
||||||
img_width: int = CONFIG.IMG_WIDTH,
|
img_width: int = CONFIG.DEFAULT_IMG_WIDTH,
|
||||||
img_height: int = CONFIG.IMG_HEIGHT,
|
img_height: int = CONFIG.DEFAULT_IMG_HEIGHT,
|
||||||
optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam()) -> keras.models.Model:
|
optimizer: Optimizer = Adam()) -> Model:
|
||||||
|
log.info("Building model")
|
||||||
# Inputs to the model
|
# Inputs to the model
|
||||||
input_img = layers.Input(
|
input_img = Input(
|
||||||
shape=(img_width, img_height, 1),
|
shape=(img_width, img_height, 1),
|
||||||
dtype='float32',
|
dtype='float32',
|
||||||
name=CONFIG.INPUT_LAYER_NAME_IMAGE
|
name=CONFIG.LAYER_NAME_INPUT_IMAGE
|
||||||
)
|
)
|
||||||
labels = layers.Input(
|
labels = Input(
|
||||||
shape=(None, ),
|
shape=(None, ),
|
||||||
dtype='float32',
|
dtype='float32',
|
||||||
name=CONFIG.INPUT_LAYER_NAME_LABEL,
|
name=CONFIG.LAYER_NAME_INPUT_LABEL,
|
||||||
)
|
)
|
||||||
# First conv block
|
# First conv block
|
||||||
x = layers.Conv2D(
|
x = Conv2D(
|
||||||
filters=32,
|
filters=32,
|
||||||
kernel_size=(3, 3),
|
kernel_size=(3, 3),
|
||||||
activation='relu',
|
activation='relu',
|
||||||
@ -68,12 +42,12 @@ def build_model(alphabet_size: int,
|
|||||||
padding='same',
|
padding='same',
|
||||||
name='conv1',
|
name='conv1',
|
||||||
)(input_img)
|
)(input_img)
|
||||||
x = layers.MaxPooling2D(
|
x = MaxPooling2D(
|
||||||
pool_size=(2, 2),
|
pool_size=(2, 2),
|
||||||
name='pool1'
|
name='pool1'
|
||||||
)(x)
|
)(x)
|
||||||
# Second conv block
|
# Second conv block
|
||||||
x = layers.Conv2D(
|
x = Conv2D(
|
||||||
filters=64,
|
filters=64,
|
||||||
kernel_size=(3, 3),
|
kernel_size=(3, 3),
|
||||||
activation='relu',
|
activation='relu',
|
||||||
@ -81,72 +55,72 @@ def build_model(alphabet_size: int,
|
|||||||
padding='same',
|
padding='same',
|
||||||
name='conv2',
|
name='conv2',
|
||||||
)(x)
|
)(x)
|
||||||
x = layers.MaxPooling2D(
|
x = MaxPooling2D(
|
||||||
pool_size=(2, 2),
|
pool_size=(2, 2),
|
||||||
name='pool2'
|
name='pool2'
|
||||||
)(x)
|
)(x)
|
||||||
# We have used two max. pooling layers with pool size and strides 2.
|
# We have used two max. pooling layers with pool size and strides 2.
|
||||||
# Hence, downsampled feature maps are 4x smaller. The number of
|
# Hence, down-sampled feature maps are 4x smaller. The number of
|
||||||
# filters in the last layer is 64. Reshape accordingly before
|
# filters in the last layer is 64. Reshape accordingly before
|
||||||
# passing the output to the RNN part of the model
|
# passing the output to the RNN part of the model
|
||||||
|
down_sample_factor = 4
|
||||||
new_shape = (
|
new_shape = (
|
||||||
(img_width // 4),
|
(img_width // down_sample_factor),
|
||||||
(img_height // 4) * 64
|
(img_height // down_sample_factor) * 64
|
||||||
)
|
)
|
||||||
x = layers.Reshape(
|
x = Reshape(
|
||||||
target_shape=new_shape,
|
target_shape=new_shape,
|
||||||
name='reshape'
|
name='reshape'
|
||||||
)(x)
|
)(x)
|
||||||
x = layers.Dense(
|
x = Dense(
|
||||||
units=64,
|
units=64,
|
||||||
activation='relu',
|
activation='relu',
|
||||||
name='dense1'
|
name='dense1'
|
||||||
)(x)
|
)(x)
|
||||||
x = layers.Dropout(rate=0.2)(x)
|
x = Dropout(rate=0.2)(x)
|
||||||
# RNNs
|
# RNNs
|
||||||
x = layers.Bidirectional(
|
x = Bidirectional(
|
||||||
layers.LSTM(
|
LSTM(
|
||||||
units=128,
|
units=128,
|
||||||
return_sequences=True,
|
return_sequences=True,
|
||||||
dropout=0.25,
|
dropout=0.25,
|
||||||
)
|
)
|
||||||
)(x)
|
)(x)
|
||||||
x = layers.Bidirectional(
|
x = Bidirectional(
|
||||||
layers.LSTM(
|
LSTM(
|
||||||
units=64,
|
units=64,
|
||||||
return_sequences=True,
|
return_sequences=True,
|
||||||
dropout=0.25,
|
dropout=0.25,
|
||||||
)
|
)
|
||||||
)(x)
|
)(x)
|
||||||
# Output layer
|
# Output layer
|
||||||
x = layers.Dense(
|
x = Dense(
|
||||||
units=alphabet_size + 1,
|
units=alphabet_size + 1,
|
||||||
activation='softmax',
|
activation='softmax',
|
||||||
name=CONFIG.OUTPUT_LAYER_NAME,
|
name=CONFIG.LAYER_NAME_OUTPUT,
|
||||||
)(x)
|
)(x)
|
||||||
# Add CTC layer for calculating CTC loss at each step
|
# Add CTC layer for calculating CTC loss at each step
|
||||||
output = CTCLayer(name='ctc_loss')(labels, x)
|
output = CTCLayer(name='ctc_loss')(labels, x)
|
||||||
# Define the model
|
# Define the model
|
||||||
model = keras.models.Model(
|
model = Model(
|
||||||
inputs=[input_img, labels],
|
inputs=[input_img, labels],
|
||||||
outputs=output,
|
outputs=output,
|
||||||
name='ocr_model_v1'
|
name=CONFIG.MODEL_NAME
|
||||||
)
|
)
|
||||||
# Compile the model and return
|
log.debug("Compiling model")
|
||||||
model.compile(optimizer=optimizer)
|
model.compile(optimizer=optimizer)
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
def train_model(model: keras.models.Model, train_dataset: tf.data.Dataset, valid_dataset: tf.data.Dataset,
|
def train_model(model: Model, train_dataset: tf.data.Dataset, valid_dataset: tf.data.Dataset,
|
||||||
num_epochs: int = CONFIG.DEFAULT_NUM_EPOCHS,
|
num_epochs: int = CONFIG.DEFAULT_NUM_EPOCHS,
|
||||||
early_stopping_patience: int = CONFIG.DEFAULT_EARLY_STOPPING_PATIENCE) -> keras.callbacks.History:
|
early_stopping_patience: int = CONFIG.DEFAULT_EARLY_STOPPING_PATIENCE) -> History:
|
||||||
# Add early stopping
|
early_stopping = EarlyStopping(
|
||||||
early_stopping = keras.callbacks.EarlyStopping(
|
|
||||||
monitor='val_loss',
|
monitor='val_loss',
|
||||||
patience=early_stopping_patience,
|
patience=early_stopping_patience,
|
||||||
restore_best_weights=True,
|
restore_best_weights=True,
|
||||||
)
|
)
|
||||||
# Train the model
|
log.debug("Beginning training")
|
||||||
history = model.fit(
|
history = model.fit(
|
||||||
x=train_dataset,
|
x=train_dataset,
|
||||||
validation_data=valid_dataset,
|
validation_data=valid_dataset,
|
||||||
@ -156,23 +130,20 @@ def train_model(model: keras.models.Model, train_dataset: tf.data.Dataset, valid
|
|||||||
return history
|
return history
|
||||||
|
|
||||||
|
|
||||||
def start(data_dir: PathT, save_dir: PathT, file_extensions: Iterable[str] = CONFIG.DEFAULT_DATA_FILE_EXTENSIONS,
|
def start(training_data: tf.data.Dataset, validation_data: tf.data.Dataset, vocabulary: str,
|
||||||
batch_size: int = CONFIG.DEFAULT_BATCH_SIZE, num_epochs: int = CONFIG.DEFAULT_NUM_EPOCHS,
|
save_dir: PathT = CONFIG.DEFAULT_SAVE_DIR, num_epochs: int = CONFIG.DEFAULT_NUM_EPOCHS,
|
||||||
early_stopping_patience: int = CONFIG.DEFAULT_EARLY_STOPPING_PATIENCE) -> None:
|
early_stopping_patience: int = CONFIG.DEFAULT_EARLY_STOPPING_PATIENCE) -> None:
|
||||||
save_dir = Path(save_dir, datetime.now().strftime('%Y-%m-%d_%H-%M'))
|
save_dir = Path(save_dir, datetime.now().strftime('%Y-%m-%d_%H-%M'))
|
||||||
save_dir.mkdir(parents=True)
|
save_dir.mkdir(parents=True)
|
||||||
print("\nConstructing datasets\n")
|
model = build_model(len(vocabulary))
|
||||||
data_interface = DatasetsInterface(batch_size=int(batch_size), data_dir=data_dir, extensions=file_extensions)
|
history = train_model(model, training_data, validation_data,
|
||||||
data_interface.split_and_make_datasets()
|
num_epochs=num_epochs, early_stopping_patience=early_stopping_patience)
|
||||||
print("\nBuilding model\n")
|
log.debug("Saving model")
|
||||||
model = build_model(len(data_interface.characters))
|
|
||||||
print("\nBeginning training\n")
|
|
||||||
train_model(model, data_interface.training, data_interface.validation, num_epochs=num_epochs,
|
|
||||||
early_stopping_patience=early_stopping_patience)
|
|
||||||
print("\nSaving model\n")
|
|
||||||
model.save(save_dir)
|
model.save(save_dir)
|
||||||
print("\nSaving vocabulary\n")
|
log.debug("Saving vocabulary")
|
||||||
vocabulary = ''.join(data_interface.forward_lookup_table.get_vocabulary())
|
with open(Path(save_dir, CONFIG.VOCABULARY_FILE_NAME), 'w') as f:
|
||||||
with open(os.path.join(save_dir, CONFIG.VOCABULARY_FILE_NAME), 'w') as f:
|
|
||||||
f.write(vocabulary)
|
f.write(vocabulary)
|
||||||
print("\nAll saved!\n")
|
log.debug("Saving history")
|
||||||
|
with open(Path(save_dir, CONFIG.HISTORY_FILE_NAME), 'w') as f:
|
||||||
|
json.dump(history.history, f, indent=4)
|
||||||
|
log.info("All saved!")
|
||||||
|
@ -1,330 +0,0 @@
|
|||||||
import os
|
|
||||||
import shutil
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Union, Mapping, Sequence, Iterable, Callable
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import numpy as np
|
|
||||||
import tensorflow as tf
|
|
||||||
from tensorflow.keras import layers
|
|
||||||
|
|
||||||
from .config import CONFIG
|
|
||||||
from .types import PathT
|
|
||||||
|
|
||||||
|
|
||||||
def label_files(src_dir: PathT, dest_dir: PathT, labels: Union[PathT, Sequence[str]],
|
|
||||||
reverse: bool = False, extensions: Iterable[str] = None) -> None:
|
|
||||||
"""
|
|
||||||
Copies files giving them new names by using specified labels.
|
|
||||||
|
|
||||||
All matching files are sorted by their file name before applying the sequence of labels to them.
|
|
||||||
The first file is named with the first label, the second is named with the second label, and so on.
|
|
||||||
If a label duplicate is encountered, a dot followed by a counter is appended to the file name
|
|
||||||
*preceding* the file extension, e.g. if 'some_label.jpg' exists, 'some_label.1.jpg' may be created.
|
|
||||||
|
|
||||||
The number of matching files must be greater than or equal to the number of labels.
|
|
||||||
Exactly one file is copied for every label; thus, after every label has been used, the operation ends.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
src_dir:
|
|
||||||
Path to directory containing the files to be copied/renamed
|
|
||||||
dest_dir:
|
|
||||||
Path to destination directory
|
|
||||||
labels:
|
|
||||||
Either a sequence of labels (strings) or a path to a files containing the labels (newline separated)
|
|
||||||
reverse (optional):
|
|
||||||
Defines which file receives which label;
|
|
||||||
if False (default), the files in `img_dir` are sorted ascending by their file name,
|
|
||||||
if True, the files are sorted descending by name.
|
|
||||||
extensions (optional):
|
|
||||||
Iterable of file extensions; only files with these extensions will be considered.
|
|
||||||
|
|
||||||
"""
|
|
||||||
extensions = '' if extensions is None else tuple(extensions)
|
|
||||||
file_names = [name for name in os.listdir(src_dir) if name.endswith(extensions)]
|
|
||||||
file_names.sort(reverse=reverse)
|
|
||||||
try:
|
|
||||||
with open(labels, 'r') as f:
|
|
||||||
labels = f.read().strip().split('\n')
|
|
||||||
except TypeError:
|
|
||||||
pass # Assume, labels is already a sequence of strings
|
|
||||||
if not os.path.isdir(dest_dir):
|
|
||||||
raise NotADirectoryError(f"'{dest_dir}' is not a directory.")
|
|
||||||
if len(labels) > len(file_names):
|
|
||||||
raise IndexError(f"There are more labels ({len(labels)}) than files "
|
|
||||||
f"in the source directory ({len(file_names)} matching).")
|
|
||||||
for idx, label in enumerate(labels):
|
|
||||||
file_name = file_names[idx]
|
|
||||||
_, ext = os.path.splitext(file_name)
|
|
||||||
while True:
|
|
||||||
new_path = os.path.join(dest_dir, label + ext)
|
|
||||||
if not os.path.exists(new_path):
|
|
||||||
shutil.copyfile(os.path.join(src_dir, file_name), new_path)
|
|
||||||
break
|
|
||||||
pre_label, n = os.path.splitext(label)
|
|
||||||
try:
|
|
||||||
n = int(n[1:])
|
|
||||||
except ValueError:
|
|
||||||
label = label + '.1'
|
|
||||||
else:
|
|
||||||
label = pre_label + '.' + str(n + 1)
|
|
||||||
|
|
||||||
|
|
||||||
def load_images_data(data_dir: PathT, extensions: Iterable[str] = ('.jpg', '.png'), verbose: bool = True
|
|
||||||
) -> tuple[dict[str, str], str]:
|
|
||||||
"""
|
|
||||||
Creates a dictionary mapping file paths (of images) to their labels.
|
|
||||||
Everything up to the first dot in the filename is taken to be the label;
|
|
||||||
this naturally excludes file extensions, but also a filename like `ABC.1.jpg` results in the label `ABC`.
|
|
||||||
Also creates a vocabulary of characters encountered in the file names.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
data_dir:
|
|
||||||
Path-like object or string to a directory containing the desired image files
|
|
||||||
extensions (optional):
|
|
||||||
Iterable of extensions that the files considered for the resulting data should be restricted to;
|
|
||||||
defaults to restricting finds to JPEG and PNG files.
|
|
||||||
verbose (optional):
|
|
||||||
If True, the function will print out a summary of the findings before returning.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
2-tuple with the first element being a dictionary where the keys are the file paths and the values are the
|
|
||||||
file names (i.e. image labels) and the second element being a string of all characters present in the labels.
|
|
||||||
"""
|
|
||||||
data_dir = Path(data_dir)
|
|
||||||
file_paths_and_labels, characters = {}, set()
|
|
||||||
for file_path in data_dir.iterdir():
|
|
||||||
if file_path.suffix not in extensions:
|
|
||||||
continue
|
|
||||||
label = file_path.name.split('.')[0]
|
|
||||||
for char in label:
|
|
||||||
characters.add(char)
|
|
||||||
file_paths_and_labels[str(file_path)] = label
|
|
||||||
if verbose:
|
|
||||||
print("Number of images/labels found: ", len(file_paths_and_labels))
|
|
||||||
print("Number of unique characters: ", len(characters))
|
|
||||||
print("Characters present: ", characters)
|
|
||||||
return file_paths_and_labels, ''.join(characters)
|
|
||||||
|
|
||||||
|
|
||||||
def get_vocab_maps(characters: Iterable[str], num_oov_indices: int = 0, mask_token: str = None
|
|
||||||
) -> tuple[layers.StringLookup, layers.StringLookup]:
|
|
||||||
"""
|
|
||||||
Constructs two table-based lookup objects that map characters to integers and back.
|
|
||||||
|
|
||||||
Details about the `StringLookup` class in the documentation:
|
|
||||||
https://tensorflow.org/api_docs/python/tf/keras/layers/experimental/preprocessing/StringLookup
|
|
||||||
|
|
||||||
Args:
|
|
||||||
characters:
|
|
||||||
An iterable of strings representing the vocabulary to be mapped
|
|
||||||
num_oov_indices (optional):
|
|
||||||
Passed to the `IndexLookup` constructor;
|
|
||||||
defines the number of out-of-vocabulary (OOV) tokens to create;
|
|
||||||
assuming that no OOV characters will be encountered, the default is 0.
|
|
||||||
mask_token (optional):
|
|
||||||
Passed to the `IndexLookup` constructor;
|
|
||||||
the token representing missing values;
|
|
||||||
assuming that there will never be a value missing, the default is None.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
2-tuple of `StringLookup` objects, the first one mapping characters to integers, the second doing the reverse.
|
|
||||||
By default, no OOV or missing values are assumed to be encountered,
|
|
||||||
and thus each index (uniquely) represents a character from the vocabulary.
|
|
||||||
"""
|
|
||||||
char_to_int = layers.StringLookup(
|
|
||||||
vocabulary=list(characters),
|
|
||||||
num_oov_indices=num_oov_indices,
|
|
||||||
mask_token=mask_token,
|
|
||||||
)
|
|
||||||
int_to_char = layers.StringLookup(
|
|
||||||
vocabulary=char_to_int.get_vocabulary(),
|
|
||||||
mask_token=mask_token,
|
|
||||||
invert=True,
|
|
||||||
)
|
|
||||||
return char_to_int, int_to_char
|
|
||||||
|
|
||||||
|
|
||||||
def encode_image(img):
|
|
||||||
"""
|
|
||||||
Creates a `Tensor` object from an image file and transposes it.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# 0. Read image
|
|
||||||
img = tf.io.read_file(str(img))
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
# 1. Decode and convert to grayscale
|
|
||||||
img = tf.io.decode_png(img, channels=1)
|
|
||||||
# 2. Convert to float32 in [0, 1] range
|
|
||||||
img = tf.image.convert_image_dtype(img, tf.float32)
|
|
||||||
# 3. Resize to the desired size
|
|
||||||
img = tf.image.resize(img, [CONFIG.IMG_HEIGHT, CONFIG.IMG_WIDTH])
|
|
||||||
# 4. Transpose the image because we want the time
|
|
||||||
# dimension to correspond to the width of the image.
|
|
||||||
return tf.transpose(img, perm=[1, 0, 2])
|
|
||||||
|
|
||||||
|
|
||||||
def encode_label(label: str, forward_lookup_table: layers.StringLookup):
|
|
||||||
"""
|
|
||||||
Creates a `Tensor` object from a label string by passing passing its characters through a `StringLookup` instance.
|
|
||||||
"""
|
|
||||||
return forward_lookup_table(tf.strings.unicode_split(label, input_encoding='UTF-8'))
|
|
||||||
|
|
||||||
|
|
||||||
def decode_label(tensor, backward_lookup_table: layers.StringLookup) -> str:
|
|
||||||
return tf.strings.reduce_join(backward_lookup_table(tensor)).numpy().decode('utf-8')
|
|
||||||
|
|
||||||
|
|
||||||
def get_sample_encoder(forward_lookup_table: layers.StringLookup) -> Callable[[str, str], dict]:
|
|
||||||
"""
|
|
||||||
Returns a function for usage in the `map(...)` method of a `Dataset` instance.
|
|
||||||
|
|
||||||
The function will accept an image path and a label and return a dictionary;
|
|
||||||
the dictionary values will be a tensor representing the image and a tensor representing the label;
|
|
||||||
the keys for each are pre-configured and will correspond to the models input layers' names.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
forward_lookup_table:
|
|
||||||
Passed to the `encode_label` function; required for mapping individual characters to floats.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Callable taking two strings as arguments and returning a dictionary with string keys and Tensor values.
|
|
||||||
"""
|
|
||||||
def func(img_path: PathT, label: str) -> dict:
|
|
||||||
return {
|
|
||||||
CONFIG.INPUT_LAYER_NAME_IMAGE: encode_image(img_path),
|
|
||||||
CONFIG.INPUT_LAYER_NAME_LABEL: encode_label(label, forward_lookup_table)
|
|
||||||
}
|
|
||||||
return func
|
|
||||||
|
|
||||||
|
|
||||||
def make_dataset(file_paths: np.ndarray, labels: np.ndarray,
|
|
||||||
sample_encode_func: Callable[[str, str], dict], batch_size: int) -> tf.data.Dataset:
|
|
||||||
"""
|
|
||||||
Generates a `Dataset` instance from an array of image file paths and an array of corresponding labels.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_paths:
|
|
||||||
Array of strings, each representing a path to an image file;
|
|
||||||
each of those paths will be passed into the function encoding one data sample (as the first argument).
|
|
||||||
labels:
|
|
||||||
Array of strings, each representing a label for an image pointed to by a file path
|
|
||||||
in the `file_paths` array with the corresponding index;
|
|
||||||
each of those labels will be passed into the function encoding one data sample (as the second argument).
|
|
||||||
sample_encode_func:
|
|
||||||
Will be passed as the `map_func` argument into the `map(...)` method of the new `Dataset`;
|
|
||||||
should be a function taking two strings (image path and label) as arguments and
|
|
||||||
returning a dictionary of Tensors representing the image and label.
|
|
||||||
batch_size:
|
|
||||||
Will be passed as the `batch_size` argument into the `batch(...)` method of the new `Dataset`;
|
|
||||||
determines how the dataset will be divided into batches.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A `Dataset` ready to be fed into a model's `fit(...)` method, provided that model has two input layers
|
|
||||||
named in accordance with the keys of the dictionary returned by the `sample_encode_func` function.
|
|
||||||
"""
|
|
||||||
if file_paths.size != labels.size:
|
|
||||||
raise ValueError("Number of file paths must be equal to number of labels")
|
|
||||||
|
|
||||||
dataset = tf.data.Dataset.from_tensor_slices((file_paths, labels))
|
|
||||||
dataset = dataset.map(
|
|
||||||
map_func=sample_encode_func,
|
|
||||||
num_parallel_calls=tf.data.experimental.AUTOTUNE
|
|
||||||
).batch(
|
|
||||||
batch_size=batch_size
|
|
||||||
).prefetch(
|
|
||||||
buffer_size=tf.data.experimental.AUTOTUNE
|
|
||||||
)
|
|
||||||
return dataset
|
|
||||||
|
|
||||||
|
|
||||||
def get_datasets(file_paths_and_labels: Mapping[str, str], sample_encode_func: Callable[[str, str], dict],
|
|
||||||
batch_size: int, train_data_ratio: float, shuffle: bool = CONFIG.SHUFFLE_DATA
|
|
||||||
) -> tuple[tf.data.Dataset, tf.data.Dataset]:
|
|
||||||
"""
|
|
||||||
Creates a training dataset and a validation dataset from a mapping of image file paths to labels.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_paths_and_labels:
|
|
||||||
Mapping with keys being image file paths and values being labels of the corresponding images;
|
|
||||||
this represents the full dataset used for fitting the model.
|
|
||||||
sample_encode_func:
|
|
||||||
Will be passed as the `sample_encode_func` argument into the `make_dataset(...)` function;
|
|
||||||
should be a function taking two strings (image path and label) as arguments and
|
|
||||||
returning a dictionary of Tensors representing the image and label.
|
|
||||||
batch_size:
|
|
||||||
Will be passed as the `batch_size` argument into the `make_dataset(...)` function;
|
|
||||||
determines how each dataset will be divided into batches.
|
|
||||||
train_data_ratio:
|
|
||||||
Floating point value between 0 and 1 determining what ratio of the full dataset will be used for training;
|
|
||||||
this implies that (1 - `train_data_ratio`) will be the ratio used for validation.
|
|
||||||
shuffle:
|
|
||||||
If True, the full dataset is shuffled pseudo-randomly before being split.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Two `Dataset` objects ready to be fed into a model's `fit(...)` method, provided that model has two input layers
|
|
||||||
named in accordance with the keys of the dictionary returned by the `sample_encode_func` function.
|
|
||||||
"""
|
|
||||||
# 1. Get the total size of the dataset
|
|
||||||
size = len(file_paths_and_labels)
|
|
||||||
# 2. Make an indices array and shuffle it, if required
|
|
||||||
indices = np.arange(size)
|
|
||||||
if shuffle:
|
|
||||||
np.random.shuffle(indices)
|
|
||||||
# 3. Get the size of training samples; that will be the array cutoff index for the data-indices array
|
|
||||||
cutoff = int(size * train_data_ratio)
|
|
||||||
train_indices, valid_indices = indices[:cutoff], indices[cutoff:]
|
|
||||||
# 4. Split data into training and validation sets
|
|
||||||
file_paths, labels = np.array(list(file_paths_and_labels.keys())), np.array(list(file_paths_and_labels.values()))
|
|
||||||
x_train, x_valid = file_paths[train_indices], file_paths[valid_indices]
|
|
||||||
y_train, y_valid = labels[train_indices], labels[valid_indices]
|
|
||||||
# 5. Construct the actual Dataset-class objects
|
|
||||||
train_dataset = make_dataset(x_train, y_train, sample_encode_func, batch_size)
|
|
||||||
valid_dataset = make_dataset(x_valid, y_valid, sample_encode_func, batch_size)
|
|
||||||
return train_dataset, valid_dataset
|
|
||||||
|
|
||||||
|
|
||||||
def plot_images(images: Sequence[np.ndarray], labels: Sequence[str] = None, num_columns: int = 4,
|
|
||||||
transpose: bool = True) -> None:
|
|
||||||
if transpose:
|
|
||||||
images = tf.transpose(images, perm=[0, 2, 1, 3])
|
|
||||||
images = images[:, :, :, 0] * 255
|
|
||||||
images = images.numpy().astype('uint8')
|
|
||||||
num_rows = len(images) // num_columns or 1
|
|
||||||
_, axs = plt.subplots(num_rows, num_columns, figsize=(10, 5))
|
|
||||||
for idx, image in enumerate(images):
|
|
||||||
if num_rows == 1:
|
|
||||||
if num_columns == 1:
|
|
||||||
ax = axs
|
|
||||||
else:
|
|
||||||
ax = axs[idx // num_columns]
|
|
||||||
else:
|
|
||||||
ax = axs[idx // num_columns, idx % num_columns]
|
|
||||||
ax.imshow(image, cmap='gray')
|
|
||||||
if labels is not None:
|
|
||||||
ax.set_title(labels[idx])
|
|
||||||
ax.axis('off')
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
|
|
||||||
class DatasetsInterface:
|
|
||||||
"""
|
|
||||||
Convenience class for loading and pre-processing the training and validation data for usage with a model.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, batch_size: int, data_dir: PathT,
|
|
||||||
extensions: Iterable[str] = CONFIG.DEFAULT_DATA_FILE_EXTENSIONS) -> None:
|
|
||||||
self.file_paths_and_labels, self.characters = load_images_data(data_dir=data_dir, extensions=extensions)
|
|
||||||
self.forward_lookup_table, self.backward_lookup_table = get_vocab_maps(characters=self.characters)
|
|
||||||
self.sample_encode_func = get_sample_encoder(forward_lookup_table=self.forward_lookup_table)
|
|
||||||
self.batch_size = batch_size
|
|
||||||
self.training, self.validation = None, None
|
|
||||||
|
|
||||||
def split_and_make_datasets(self, train_data_ratio: float = (1 - CONFIG.VALIDATION_DATA_RATIO),
|
|
||||||
shuffle: bool = CONFIG.SHUFFLE_DATA) -> None:
|
|
||||||
self.training, self.validation = get_datasets(file_paths_and_labels=self.file_paths_and_labels,
|
|
||||||
sample_encode_func=self.sample_encode_func,
|
|
||||||
batch_size=self.batch_size,
|
|
||||||
train_data_ratio=train_data_ratio,
|
|
||||||
shuffle=shuffle)
|
|
221
src/ccaptchas/preprocess.py
Normal file
221
src/ccaptchas/preprocess.py
Normal file
@ -0,0 +1,221 @@
|
|||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from .config import CONFIG
|
||||||
|
from .keras.layers import StringLookup
|
||||||
|
from .types import PathT, SampleEncFuncT, ImgT
|
||||||
|
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
UTF8 = 'UTF-8'
|
||||||
|
IMG_DECODE_MAP = {
|
||||||
|
CONFIG.EXT_PNG: tf.image.decode_png,
|
||||||
|
CONFIG.EXT_JPG: tf.image.decode_jpeg,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def find_image_files(data_dir: PathT, file_ext: Iterable[str] = CONFIG.DEFAULT_IMG_FILE_EXT) -> list[Path]:
|
||||||
|
data_dir = Path(data_dir)
|
||||||
|
if not data_dir.is_dir():
|
||||||
|
raise NotADirectoryError
|
||||||
|
log.debug("Finding labeled image files in directory '%s'", str(data_dir))
|
||||||
|
img_paths = []
|
||||||
|
for ext in file_ext:
|
||||||
|
if not ext.startswith('.'):
|
||||||
|
ext = f'.{ext}'
|
||||||
|
img_paths.extend(path for path in data_dir.glob(f'*{ext}') if path.is_file())
|
||||||
|
log.info("Found %d image files", len(img_paths))
|
||||||
|
return img_paths
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_characters(img_paths: Iterable[Path]) -> str:
|
||||||
|
characters = set()
|
||||||
|
for path in img_paths:
|
||||||
|
characters.update(path.stem)
|
||||||
|
characters = ''.join(characters)
|
||||||
|
log.info("Identified %d distinct characters in the file labels: '%s'", len(characters), characters)
|
||||||
|
return characters
|
||||||
|
|
||||||
|
|
||||||
|
def get_lookup_table(vocabulary: Iterable[str], invert: bool = False, **kwargs) -> StringLookup:
|
||||||
|
"""
|
||||||
|
Constructs a string lookup table mapping characters to integers or vice-versa.
|
||||||
|
|
||||||
|
Details about the `StringLookup` class in the documentation:
|
||||||
|
https://keras.io/api/layers/preprocessing_layers/categorical/string_lookup/
|
||||||
|
|
||||||
|
Args:
|
||||||
|
vocabulary:
|
||||||
|
An iterable of strings representing the vocabulary to be mapped
|
||||||
|
invert (optional):
|
||||||
|
If `True`, a backward lookup table is returned, which maps indices to characters. Otherwise a forward
|
||||||
|
lookup table is returned mapping characters to indices. Defaults to `False`.
|
||||||
|
**kwargs (optional):
|
||||||
|
Other keyword arguments to pass into the `StringLookup` constructors.
|
||||||
|
Defaults for `num_oov_indices` and `mask_token` are defined in the package config.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
`StringLookup` object with the specified properties.
|
||||||
|
"""
|
||||||
|
kwargs.setdefault('num_oov_indices', CONFIG.DEFAULT_NUM_OOV_INDICES)
|
||||||
|
kwargs.setdefault('mask_token', CONFIG.DEFAULT_MASK_TOKEN)
|
||||||
|
if isinstance(vocabulary, str):
|
||||||
|
vocabulary = list(vocabulary)
|
||||||
|
return StringLookup(vocabulary=vocabulary, invert=invert, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def get_vocab_maps(characters: str, **kwargs) -> tuple[StringLookup, StringLookup]:
|
||||||
|
"""
|
||||||
|
Constructs two table-based lookup objects that map characters to integers and back.
|
||||||
|
|
||||||
|
See `get_lookup_table` for details.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
characters:
|
||||||
|
A string of all characters in the vocabulary to be mapped; the characters should all be distinct.
|
||||||
|
**kwargs (optional):
|
||||||
|
Keyword arguments to pass into both `StringLookup` constructors.
|
||||||
|
Must not contain the `invert` and `vocabulary` keywords.
|
||||||
|
Defaults for `num_oov_indices` and `mask_token` are defined in the package config.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
2-tuple of `StringLookup` objects, the first one mapping characters to integers, the second doing the reverse.
|
||||||
|
"""
|
||||||
|
char_to_int = get_lookup_table(characters, invert=False, **kwargs)
|
||||||
|
int_to_char = get_lookup_table(char_to_int.get_vocabulary(), invert=True, **kwargs)
|
||||||
|
log.info("Constructed vocabulary lookup tables")
|
||||||
|
return char_to_int, int_to_char
|
||||||
|
|
||||||
|
|
||||||
|
def split_data(img_paths: Iterable[Path], validation_ratio: float = CONFIG.DEFAULT_VALIDATION_RATIO,
|
||||||
|
shuffle: bool = CONFIG.DEFAULT_SHUFFLE_DATA) -> tuple[np.ndarray, np.ndarray]:
|
||||||
|
"""
|
||||||
|
Splits an iterable of image paths into two arrays of training and validation data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img_paths:
|
||||||
|
Iterable of paths to the image files to be used for training and validation.
|
||||||
|
validation_ratio:
|
||||||
|
Float between 0 and 1 determining what ratio of the full dataset will be used for validation;
|
||||||
|
this implies that (1 - `validation_ratio`) will be the ratio used for training.
|
||||||
|
shuffle:
|
||||||
|
If True, the full dataset is shuffled pseudo-randomly before being split.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
2-tuple of 2D numpy arrays, the first representing the training data and the second representing the validation
|
||||||
|
data. The first dimension of each array is the dataset dimension (i.e. the samples) and the second contains the
|
||||||
|
path (as a string) at index 0 and the label for each image at index 1.
|
||||||
|
"""
|
||||||
|
if not 0 < validation_ratio < 1:
|
||||||
|
raise ValueError
|
||||||
|
paths_and_labels = np.array(tuple((str(path), path.stem) for path in img_paths))
|
||||||
|
# 1. Get the total size of the dataset
|
||||||
|
size = len(paths_and_labels)
|
||||||
|
cutoff = int(size * (1 - validation_ratio))
|
||||||
|
# 2. Make an indices array and shuffle it, if required
|
||||||
|
indices = np.arange(size)
|
||||||
|
if shuffle:
|
||||||
|
np.random.shuffle(indices)
|
||||||
|
# 4. Split data into training and validation sets
|
||||||
|
training_data, validation_data = paths_and_labels[indices[:cutoff]], paths_and_labels[indices[cutoff:]]
|
||||||
|
log.info("Split data into %d images for training and %d for validation", len(training_data), len(validation_data))
|
||||||
|
return training_data, validation_data
|
||||||
|
|
||||||
|
|
||||||
|
def process_image(img: ImgT, img_width: int = CONFIG.DEFAULT_IMG_WIDTH,
|
||||||
|
img_height: int = CONFIG.DEFAULT_IMG_HEIGHT) -> tf.Tensor:
|
||||||
|
# 0. Read image
|
||||||
|
if isinstance(img, (str, Path)):
|
||||||
|
img = tf.io.read_file(str(img))
|
||||||
|
# 1. Decode and convert to grayscale
|
||||||
|
img = tf.io.decode_image(img, channels=1, expand_animations=False)
|
||||||
|
# img = tf.io.decode_jpeg(img, channels=1)
|
||||||
|
# 2. Convert to float32 in [0, 1] range
|
||||||
|
img = tf.image.convert_image_dtype(img, tf.float32)
|
||||||
|
# 3. Resize to the desired size
|
||||||
|
img = tf.image.resize(img, [img_height, img_width])
|
||||||
|
# 4. Transpose the image because we want the time
|
||||||
|
# dimension to correspond to the width of the image.
|
||||||
|
return tf.transpose(img, perm=[1, 0, 2])
|
||||||
|
|
||||||
|
|
||||||
|
def encode_label(label: str, forward_lookup: StringLookup):
|
||||||
|
"""
|
||||||
|
Creates a `Tensor` object from a label string by passing passing its characters through a `StringLookup` instance.
|
||||||
|
"""
|
||||||
|
return forward_lookup(tf.strings.unicode_split(label, input_encoding=UTF8))
|
||||||
|
|
||||||
|
|
||||||
|
def decode_label(tensor: tf.Tensor, backward_lookup: StringLookup) -> str:
|
||||||
|
return tf.strings.reduce_join(backward_lookup(tensor)).numpy().decode(UTF8)
|
||||||
|
|
||||||
|
|
||||||
|
def get_sample_encode_func(forward_lookup: StringLookup, img_width: int = CONFIG.DEFAULT_IMG_WIDTH,
|
||||||
|
img_height: int = CONFIG.DEFAULT_IMG_HEIGHT) -> SampleEncFuncT:
|
||||||
|
def encode_sample(img_path: PathT, label: str) -> dict[str, tf.Tensor]:
|
||||||
|
log.debug("Encoding image '%s'", str(img_path))
|
||||||
|
img = process_image(tf.io.read_file(img_path), img_width=img_width, img_height=img_height)
|
||||||
|
label = encode_label(label, forward_lookup)
|
||||||
|
# Return a dict as our model is expecting two inputs
|
||||||
|
return {CONFIG.LAYER_NAME_INPUT_IMAGE: img, CONFIG.LAYER_NAME_INPUT_LABEL: label}
|
||||||
|
return encode_sample
|
||||||
|
|
||||||
|
|
||||||
|
def make_dataset(data: np.ndarray, sample_encode_func: SampleEncFuncT, batch_size: int) -> tf.data.Dataset:
|
||||||
|
"""
|
||||||
|
Generates a `Dataset` instance from an array of image file paths and an array of corresponding labels.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data:
|
||||||
|
A 2D numpy array representing the data and labels to turn into a dataset for training/validation.
|
||||||
|
The first dimension of the array is the dataset dimension (i.e. the samples) and the second contains the
|
||||||
|
path (as a string) at index 0 and the label for each image at index 1.
|
||||||
|
Each path-label-pair will be passed into the `sample_encode_func` during construction of the dataset as
|
||||||
|
the only two positional arguments.
|
||||||
|
sample_encode_func:
|
||||||
|
Will be passed as the `map_func` argument into the `map(...)` method of the new `Dataset`;
|
||||||
|
should be a function taking two strings (image path and label) as arguments and
|
||||||
|
returning a dictionary of Tensors representing the image and label.
|
||||||
|
batch_size:
|
||||||
|
Will be passed as the `batch_size` argument into the `batch(...)` method of the new `Dataset`;
|
||||||
|
determines how the dataset will be divided into batches.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A `Dataset` ready to be fed into a model's `fit(...)` method, provided that model has two input layers
|
||||||
|
named in accordance with the keys of the dictionary returned by the `sample_encode_func` function.
|
||||||
|
"""
|
||||||
|
log.info("Constructing dataset of %d samples, split into batches of %d", len(data), batch_size)
|
||||||
|
dataset = tf.data.Dataset.from_tensor_slices((data[:, 0], data[:, 1]))
|
||||||
|
dataset = dataset.map(
|
||||||
|
map_func=sample_encode_func,
|
||||||
|
num_parallel_calls=tf.data.experimental.AUTOTUNE
|
||||||
|
).batch(
|
||||||
|
batch_size=batch_size
|
||||||
|
).prefetch(
|
||||||
|
buffer_size=tf.data.experimental.AUTOTUNE
|
||||||
|
)
|
||||||
|
return dataset
|
||||||
|
|
||||||
|
|
||||||
|
def load_datasets(data_dir: PathT,
|
||||||
|
file_ext: Iterable[str] = CONFIG.DEFAULT_IMG_FILE_EXT,
|
||||||
|
batch_size: int = CONFIG.DEFAULT_BATCH_SIZE,
|
||||||
|
validation_ratio: float = CONFIG.DEFAULT_VALIDATION_RATIO,
|
||||||
|
shuffle: bool = CONFIG.DEFAULT_SHUFFLE_DATA,
|
||||||
|
img_width: int = CONFIG.DEFAULT_IMG_WIDTH,
|
||||||
|
img_height: int = CONFIG.DEFAULT_IMG_HEIGHT) -> tuple[tf.data.Dataset, tf.data.Dataset, str]:
|
||||||
|
log.info("Constructing datasets")
|
||||||
|
img_paths = find_image_files(data_dir, file_ext=file_ext)
|
||||||
|
characters = get_all_characters(img_paths)
|
||||||
|
forward_lookup, _ = get_vocab_maps(characters)
|
||||||
|
arr_train, arr_valid = split_data(img_paths, validation_ratio=validation_ratio, shuffle=shuffle)
|
||||||
|
encode_func = get_sample_encode_func(forward_lookup, img_width=img_width, img_height=img_height)
|
||||||
|
ds_train = make_dataset(arr_train, encode_func, batch_size=batch_size)
|
||||||
|
ds_valid = make_dataset(arr_valid, encode_func, batch_size=batch_size)
|
||||||
|
assert characters == ''.join(forward_lookup.get_vocabulary())
|
||||||
|
return ds_train, ds_valid, characters
|
@ -1,5 +1,11 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Union
|
from typing import Callable, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
|
||||||
PathT = Union[Path, str]
|
PathT = Union[Path, str]
|
||||||
|
SampleEncFuncT = Callable[[PathT, str], dict[str, tf.Tensor]]
|
||||||
|
ImgT = Union[PathT, bytes]
|
||||||
|
Array = np.ndarray
|
||||||
|
28
src/ccaptchas/visualize.py
Normal file
28
src/ccaptchas/visualize.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
from typing import Sequence
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
|
||||||
|
def plot_images(images: Sequence[np.ndarray], labels: Sequence[str] = None, num_columns: int = 4,
|
||||||
|
transpose: bool = True) -> None:
|
||||||
|
if transpose:
|
||||||
|
images = tf.transpose(images, perm=[0, 2, 1, 3])
|
||||||
|
images = images[:, :, :, 0] * 255
|
||||||
|
images = images.numpy().astype('uint8')
|
||||||
|
num_rows = len(images) // num_columns or 1
|
||||||
|
_, axs = plt.subplots(num_rows, num_columns, figsize=(10, 5))
|
||||||
|
for idx, image in enumerate(images):
|
||||||
|
if num_rows == 1:
|
||||||
|
if num_columns == 1:
|
||||||
|
ax = axs
|
||||||
|
else:
|
||||||
|
ax = axs[idx // num_columns]
|
||||||
|
else:
|
||||||
|
ax = axs[idx // num_columns, idx % num_columns]
|
||||||
|
ax.imshow(image, cmap='gray')
|
||||||
|
if labels is not None:
|
||||||
|
ax.set_title(labels[idx])
|
||||||
|
ax.axis('off')
|
||||||
|
plt.show()
|
Loading…
Reference in New Issue
Block a user