From cf11ce90275c57939cc73b261ca064d8608c68b3 Mon Sep 17 00:00:00 2001 From: "RAA\\deepakb4437" Date: Mon, 24 Apr 2023 14:46:17 +0930 Subject: [PATCH] Updated Activitaions, new calbacks, Optimizers. WIP Layers --- Examples/BasicSamples/EarlyStopExample.cs | 1 + Examples/BasicSamples/ImplementCallback.cs | 1 + Examples/BasicSamples/MNIST_CNN.cs | 1 + Examples/ImageExamples/Cifar10_CNN.cs | 1 + Examples/ImageExamples/MNIST_CNN.cs | 1 + Keras.NET.sln | 37 +-- Keras.UnitTest/Keras.Layers.Core.cs | 2 +- Keras/Activations.cs | 38 +++ Keras/Callbacks.cs | 88 +++++-- Keras/Constraints.cs | 14 ++ Keras/ExportArchive.cs | 14 ++ Keras/Keras.csproj | 2 +- Keras/Models/BaseModel.cs | 266 ++++++++++++++++++--- Keras/Optimizers.cs | 109 ++++++++- {Keras => Tensorflow}/Base.cs | 2 +- {Keras => Tensorflow}/InternalTypes.cs | 0 {Keras => Tensorflow}/Keras.cs | 12 +- {Keras => Tensorflow}/Setup.cs | 0 Tensorflow/Train/CheckpointOptions.cs | 12 + 19 files changed, 493 insertions(+), 108 deletions(-) create mode 100644 Keras/ExportArchive.cs rename {Keras => Tensorflow}/Base.cs (99%) rename {Keras => Tensorflow}/InternalTypes.cs (100%) rename {Keras => Tensorflow}/Keras.cs (95%) rename {Keras => Tensorflow}/Setup.cs (100%) create mode 100644 Tensorflow/Train/CheckpointOptions.cs diff --git a/Examples/BasicSamples/EarlyStopExample.cs b/Examples/BasicSamples/EarlyStopExample.cs index 121ad5d..6eaef87 100644 --- a/Examples/BasicSamples/EarlyStopExample.cs +++ b/Examples/BasicSamples/EarlyStopExample.cs @@ -3,6 +3,7 @@ using Keras.Layers; using Keras.Models; using Keras.Optimizers; +using Keras.Optimizers.Legacy; using Numpy; using System; using System.Collections.Generic; diff --git a/Examples/BasicSamples/ImplementCallback.cs b/Examples/BasicSamples/ImplementCallback.cs index 32bbd80..17fd97b 100644 --- a/Examples/BasicSamples/ImplementCallback.cs +++ b/Examples/BasicSamples/ImplementCallback.cs @@ -3,6 +3,7 @@ using Keras.Layers; using Keras.Models; using Keras.Optimizers; +using Keras.Optimizers.Legacy; using Numpy; using System; using System.Collections.Generic; diff --git a/Examples/BasicSamples/MNIST_CNN.cs b/Examples/BasicSamples/MNIST_CNN.cs index b7c6148..e10f7bf 100644 --- a/Examples/BasicSamples/MNIST_CNN.cs +++ b/Examples/BasicSamples/MNIST_CNN.cs @@ -9,6 +9,7 @@ using Keras.Layers; using Keras.Utils; using Keras.Optimizers; +using Keras.Optimizers.Legacy; namespace BasicSamples { diff --git a/Examples/ImageExamples/Cifar10_CNN.cs b/Examples/ImageExamples/Cifar10_CNN.cs index 8df6808..f3a3cd9 100644 --- a/Examples/ImageExamples/Cifar10_CNN.cs +++ b/Examples/ImageExamples/Cifar10_CNN.cs @@ -11,6 +11,7 @@ using Keras.Optimizers; using Keras.PreProcessing.Image; using System.IO; +using Keras.Optimizers.Legacy; namespace ImageExamples { diff --git a/Examples/ImageExamples/MNIST_CNN.cs b/Examples/ImageExamples/MNIST_CNN.cs index 32fbb78..a8c1c0b 100644 --- a/Examples/ImageExamples/MNIST_CNN.cs +++ b/Examples/ImageExamples/MNIST_CNN.cs @@ -10,6 +10,7 @@ using Keras.Utils; using Keras.Optimizers; using System.IO; +using Keras.Optimizers.Legacy; namespace ImageExamples { diff --git a/Keras.NET.sln b/Keras.NET.sln index c55c9bd..428594e 100644 --- a/Keras.NET.sln +++ b/Keras.NET.sln @@ -19,11 +19,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Keras", "Keras\Keras.csproj EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ReleaseBot", "ReleaseBot\ReleaseBot.csproj", "{2BAEA60C-88A2-45DC-8044-2C9571E1B8CF}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "KerasExampleWinApp", "Examples\KerasExampleWinApp\KerasExampleWinApp.csproj", "{0C0B0830-4871-4979-8675-93F980F5EBE2}" -EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MusicGeneration", "Examples\MusicGeneration\MusicGeneration.csproj", "{108C3326-58D2-4C26-9D78-5F045D620A26}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Tensorflow", "Tensorflow\Tensorflow.csproj", "{27230C96-FCB4-406C-8AAD-450020F9074D}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tensorflow", "Tensorflow\Tensorflow.csproj", "{27230C96-FCB4-406C-8AAD-450020F9074D}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -255,38 +253,6 @@ Global {2BAEA60C-88A2-45DC-8044-2C9571E1B8CF}.Release|Any CPU.Build.0 = Release|Any CPU {2BAEA60C-88A2-45DC-8044-2C9571E1B8CF}.Release|x64.ActiveCfg = Release|Any CPU {2BAEA60C-88A2-45DC-8044-2C9571E1B8CF}.Release|x64.Build.0 = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Debug|Any CPU.Build.0 = Debug|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Debug|x64.ActiveCfg = Debug|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Debug|x64.Build.0 = Debug|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py2.7_Mono|Any CPU.ActiveCfg = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py2.7_Mono|Any CPU.Build.0 = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py2.7_Mono|x64.ActiveCfg = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py2.7_Mono|x64.Build.0 = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py2.7_OSX|Any CPU.ActiveCfg = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py2.7_OSX|Any CPU.Build.0 = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py2.7_OSX|x64.ActiveCfg = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py2.7_OSX|x64.Build.0 = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py2.7_WIN|Any CPU.ActiveCfg = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py2.7_WIN|Any CPU.Build.0 = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py2.7_WIN|x64.ActiveCfg = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py2.7_WIN|x64.Build.0 = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py3.6_Mono|Any CPU.ActiveCfg = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py3.6_Mono|Any CPU.Build.0 = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py3.6_Mono|x64.ActiveCfg = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py3.6_Mono|x64.Build.0 = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py3.6_OSX|Any CPU.ActiveCfg = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py3.6_OSX|Any CPU.Build.0 = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py3.6_OSX|x64.ActiveCfg = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py3.6_OSX|x64.Build.0 = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py3.6_WIN|Any CPU.ActiveCfg = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py3.6_WIN|Any CPU.Build.0 = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py3.6_WIN|x64.ActiveCfg = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Py3.6_WIN|x64.Build.0 = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Release|Any CPU.ActiveCfg = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Release|Any CPU.Build.0 = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Release|x64.ActiveCfg = Release|Any CPU - {0C0B0830-4871-4979-8675-93F980F5EBE2}.Release|x64.Build.0 = Release|Any CPU {108C3326-58D2-4C26-9D78-5F045D620A26}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {108C3326-58D2-4C26-9D78-5F045D620A26}.Debug|Any CPU.Build.0 = Debug|Any CPU {108C3326-58D2-4C26-9D78-5F045D620A26}.Debug|x64.ActiveCfg = Debug|Any CPU @@ -359,7 +325,6 @@ Global {A0786763-88EC-41DB-9E4F-6DDACA1A1162} = {96B07D94-46E0-4A1C-9484-E842B47FFE04} {EC18ED5C-A9EC-414F-948C-DD1BC052D312} = {96B07D94-46E0-4A1C-9484-E842B47FFE04} {7F906C3D-4C18-4185-8235-4908FC082398} = {96B07D94-46E0-4A1C-9484-E842B47FFE04} - {0C0B0830-4871-4979-8675-93F980F5EBE2} = {96B07D94-46E0-4A1C-9484-E842B47FFE04} {108C3326-58D2-4C26-9D78-5F045D620A26} = {96B07D94-46E0-4A1C-9484-E842B47FFE04} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution diff --git a/Keras.UnitTest/Keras.Layers.Core.cs b/Keras.UnitTest/Keras.Layers.Core.cs index 766b6ec..e856521 100644 --- a/Keras.UnitTest/Keras.Layers.Core.cs +++ b/Keras.UnitTest/Keras.Layers.Core.cs @@ -46,7 +46,7 @@ public void Dense_CustomKRegularizerAndKInitParams() Assert.AreEqual(2000, modelAsJson.config.layers[i].config.kernel_regularizer.config.l2.Value); // Compile and train - model.Compile(optimizer: new Adam(lr: 0.001F), loss: "binary_crossentropy", metrics: new string[] { "accuracy" }); + model.Compile(optimizer: new Adam(learning_rate: 0.001F), loss: "binary_crossentropy", metrics: new string[] { "accuracy" }); model.Fit(x, y, batch_size: x.shape[0], epochs: 100, verbose: 0); Assert.AreEqual(2, model.GetWeights().Count); } diff --git a/Keras/Activations.cs b/Keras/Activations.cs index 126fc34..6e70455 100644 --- a/Keras/Activations.cs +++ b/Keras/Activations.cs @@ -157,5 +157,43 @@ public static NDarray Linear(NDarray x) parameters["x"] = x; return new NDarray(InvokeStaticMethod(caller, "linear", parameters)); } + + /// + /// Gaussian error linear unit (GELU) computes x * P(X <= x), where P(X) ~ N(0, 1). The (GELU) nonlinearity weights inputs by their value, rather than gates inputs by their sign as in ReLU. + /// + /// Input tensor. + /// A bool, whether to enable approximation. + /// + public static NDarray Gelu(NDarray x, bool approximate = false) + { + Dictionary parameters = new Dictionary(); + parameters["x"] = x; + parameters["approximate"] = approximate; + return new NDarray(InvokeStaticMethod(caller, "gelu", parameters)); + } + + /// + /// Mish activation function. + /// + /// Input tensor. + /// Output tensor + public static NDarray Mish(NDarray x) + { + Dictionary parameters = new Dictionary(); + parameters["x"] = x; + return new NDarray(InvokeStaticMethod(caller, "mish", parameters)); + } + + /// + /// Swish activation function, swish(x) = x * sigmoid(x). + /// + /// Input tensor. + /// Output tensor + public static NDarray Swish(NDarray x) + { + Dictionary parameters = new Dictionary(); + parameters["x"] = x; + return new NDarray(InvokeStaticMethod(caller, "swish", parameters)); + } } } diff --git a/Keras/Callbacks.cs b/Keras/Callbacks.cs index 07ab8bf..3bef351 100644 --- a/Keras/Callbacks.cs +++ b/Keras/Callbacks.cs @@ -6,6 +6,8 @@ using Python.Runtime; using Numpy; using System.IO; +using static System.Net.WebRequestMethods; +using Keras.Models; namespace Keras.Callbacks { @@ -36,7 +38,7 @@ public static Callback Custom(string name, string fileOrcode, bool isFile = true string code = ""; if(isFile) { - code = File.ReadAllText(fileOrcode); + code = System.IO.File.ReadAllText(fileOrcode); } else { @@ -183,9 +185,9 @@ public class ModelCheckpoint : Callback /// if save_best_only=True, the latest best model according to the quantity monitored will not be overwritten. /// if True, then only the model's weights will be saved (model.save_weights(filepath)), else the full model is saved (model.save(filepath)). /// one of {auto, min, max}. If save_best_only=True, the decision to overwrite the current save file is made based on either the maximization or the minimization of the monitored quantity. For val_acc, this should be max, for val_loss this should be min, etc. In auto mode, the direction is automatically inferred from the name of the monitored quantity. - /// Interval (number of epochs) between checkpoints. - public ModelCheckpoint(string filepath, string monitor = "val_loss", int verbose = 0, bool save_best_only = true - , bool save_weights_only = false, string mode = "auto", int period = 1) + /// 'epoch' or integer. When using 'epoch', the callback saves the model after each epoch. When using integer, the callback saves the model at end of this many batches. + public ModelCheckpoint(string filepath, string monitor = "val_loss", int verbose = 0, bool save_best_only = false + , bool save_weights_only = false, string mode = "auto", string save_freq= "epoch") { Parameters["filepath"] = filepath; Parameters["monitor"] = monitor; @@ -193,8 +195,8 @@ public ModelCheckpoint(string filepath, string monitor = "val_loss", int verbose Parameters["save_best_only"] = save_best_only; Parameters["save_weights_only"] = save_weights_only; Parameters["mode"] = mode; - Parameters["period"] = period; - + Parameters["save_freq"] = save_freq; + //ToDo: extend options parameter PyInstance = Instance.keras.callbacks.ModelCheckpoint; Init(); } @@ -216,7 +218,9 @@ public class EarlyStopping : Callback /// one of {auto, min, max}. In min mode, training will stop when the quantity monitored has stopped decreasing; in max mode it will stop when the quantity monitored has stopped increasing; in auto mode, the direction is automatically inferred from the name of the monitored quantity. /// Baseline value for the monitored quantity to reach. Training will stop if the model doesn't show improvement over the baseline. /// whether to restore model weights from the epoch with the best value of the monitored quantity. If False, the model weights obtained at the last step of training are used. - public EarlyStopping(string monitor = "val_loss", float min_delta = 0, int patience = 0, int verbose = 0, string mode = "auto", float? baseline = null, bool restore_best_weights = false) + /// Number of epochs to wait before starting to monitor improvement. This allows for a warm-up period in which no improvement is expected and thus training will not be stopped. + public EarlyStopping(string monitor = "val_loss", float min_delta = 0, int patience = 0, int verbose = 0, string mode = "auto", + float? baseline = null, bool restore_best_weights = false, int start_from_epoch = 0) { Parameters["monitor"] = monitor; Parameters["min_delta"] = min_delta; @@ -225,6 +229,7 @@ public EarlyStopping(string monitor = "val_loss", float min_delta = 0, int patie Parameters["mode"] = mode; Parameters["baseline"] = baseline; Parameters["restore_best_weights"] = restore_best_weights; + Parameters["start_from_epoch"] = start_from_epoch; PyInstance = Instance.keras.callbacks.EarlyStopping; Init(); @@ -294,28 +299,24 @@ public class TensorBoard : Callback /// /// the path of the directory where to save the log files to be parsed by TensorBoard. /// frequency (in epochs) at which to compute activation and weight histograms for the layers of the model. If set to 0, histograms won't be computed. Validation data (or split) must be specified for histogram visualizations. - /// size of batch of inputs to feed to the network for histograms computation. /// whether to visualize the graph in TensorBoard. The log file can become quite large when write_graph is set to True. - /// whether to visualize gradient histograms in TensorBoard. histogram_freq must be greater than 0. /// whether to write model weights to visualize as image in TensorBoard. + /// whether to log the training steps per second into TensorBoard. This supports both epoch and batch frequency logging. + /// 'batch' or 'epoch' or integer. When using 'epoch', writes the losses and metrics to TensorBoard after every epoch. /// frequency (in epochs) at which selected embedding layers will be saved. If set to 0, embeddings won't be computed. Data to be visualized in TensorBoard's Embedding tab must be passed as embeddings_data. - /// a list of names of layers to keep eye on. If None or empty list all the embedding layer will be watched. /// a dictionary which maps layer name to a file name in which metadata for this embedding layer is saved. See the details about metadata files format. In case if the same metadata file is used for all embedding layers, string can be passed. - /// data to be embedded at layers specified in embeddings_layer_names. Numpy array (if the model has a single input) or list of Numpy arrays (if the model has multiple inputs). Learn more about embeddings. - public TensorBoard(string log_dir= "./logs", int histogram_freq= 0, int batch_size= 32, bool write_graph= true, bool write_grads= false, - bool write_images= false, int embeddings_freq= 0, string[] embeddings_layer_names= null, Dictionary embeddings_metadata= null, - NDarray embeddings_data= null, string update_freq= "epoch") + public TensorBoard(string log_dir= "./logs", int histogram_freq= 0, bool write_graph= true, bool write_images= false, int? write_steps_per_second = null, + string update_freq = "epoch", int embeddings_freq= 0, Dictionary embeddings_metadata= null) { Parameters["log_dir"] = log_dir; Parameters["histogram_freq"] = histogram_freq; - Parameters["batch_size"] = batch_size; Parameters["write_graph"] = write_graph; + Parameters["write_images"] = write_images; + Parameters["write_steps_per_second"] = write_steps_per_second; + Parameters["update_freq"] = update_freq; Parameters["embeddings_freq"] = embeddings_freq; - Parameters["embeddings_layer_names"] = embeddings_layer_names; Parameters["embeddings_metadata"] = embeddings_metadata; - Parameters["embeddings_data"] = embeddings_data?.PyObject; - Parameters["update_freq"] = update_freq; - + PyInstance = Instance.keras.callbacks.TensorBoard; Init(); } @@ -380,4 +381,53 @@ public CSVLogger(string filename, string separator = ",", bool append = false) Init(); } } + + /// + /// BackupAndRestore callback is intended to recover training from an interruption that has happened in the middle of a Model.fit execution, + /// by backing up the training states in a temporary checkpoint file (with the help of a tf.train.CheckpointManager), at the end of each epoch. + /// + public class BackupAndRestore : Callback + { + /// + /// Initializes a new instance of the class. + /// + /// String, path to store the checkpoint. e.g. backup_dir = os.path.join(working_dir, 'backup') + /// 'epoch', integer, or False. When set to 'epoch' the callback saves the checkpoint at the end of each epoch + /// Boolean, default to True. This BackupAndRestore callback works by saving a checkpoint to back up the training state + /// A boolean value instructing whether to turn on the automatic checkpoint saving for preemption/maintenance events. + public BackupAndRestore(string backup_dir, string save_freq = "epoch", bool delete_checkpoint = true, bool save_before_preemption = false) + { + Parameters["backup_dir"] = backup_dir; + Parameters["save_freq"] = save_freq; + Parameters["delete_checkpoint"] = delete_checkpoint; + Parameters["save_before_preemption"] = save_before_preemption; + + PyInstance = Instance.keras.callbacks.BackupAndRestore; + Init(); + } + } + + /// + /// Container abstracting a list of callbacks. + /// + public class CallbackList : Callback + { + /// + /// Initializes a new instance of the class. + /// + /// List of Callback instances. + /// Whether a History callback should be added, if one does not already exist in the callbacks list. + /// Whether a ProgbarLogger callback should be added, if one does not already exist in the callbacks list. + /// The Model these callbacks are used with. + public CallbackList(List callbacks, bool add_history = false, bool add_progbar = false, BaseModel model = null) + { + Parameters["callbacks"] = callbacks; + Parameters["add_history"] = add_history; + Parameters["add_progbar"] = add_progbar; + Parameters["model"] = model.ToPython(); + + PyInstance = Instance.keras.callbacks.CallbackList; + Init(); + } + } } diff --git a/Keras/Constraints.cs b/Keras/Constraints.cs index b122a1e..5f2af30 100644 --- a/Keras/Constraints.cs +++ b/Keras/Constraints.cs @@ -49,6 +49,7 @@ public class UnitNorm : Base /// integer, axis along which to calculate weight norms. For instance, in a Dense layer the weight matrix has shape (input_dim, output_dim), set axis to 0 to constrain each weight vector of length (input_dim,). In a Conv2D layer with data_format="channels_last", the weight tensor has shape (rows, cols, input_depth, output_depth), set axis to [0, 1, 2] to constrain the weights of each filter tensor of size (rows, cols, input_depth). public UnitNorm(int axis = 0) { + Parameters["axis"] = axis; PyInstance = keras.constraints.NonNeg; Init(); } @@ -70,8 +71,21 @@ public class MinMaxNorm : Base /// integer, axis along which to calculate weight norms. For instance, in a Dense layer the weight matrix has shape (input_dim, output_dim), set axis to 0 to constrain each weight vector of length (input_dim,). In a Conv2D layer with data_format="channels_last", the weight tensor has shape (rows, cols, input_depth, output_depth), set axis to [0, 1, 2] to constrain the weights of each filter tensor of size (rows, cols, input_depth). public MinMaxNorm(float min_value= 0.0f, float max_value= 1.0f, float rate= 1.0f, int axis = 0) { + Parameters["min_value"] = min_value; + Parameters["max_value"] = max_value; + Parameters["rate"] = rate; + Parameters["axis"] = axis; PyInstance = keras.constraints.NonNeg; Init(); } } + + public class RadialConstraint : Base + { + public RadialConstraint() + { + PyInstance = keras.constraints.RadialConstraint; + Init(); + } + } } diff --git a/Keras/ExportArchive.cs b/Keras/ExportArchive.cs new file mode 100644 index 0000000..c582223 --- /dev/null +++ b/Keras/ExportArchive.cs @@ -0,0 +1,14 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Keras +{ + public class ExportArchive : Base + { + public ExportArchive() + { + //ToDo: Build based on the tendor specs + } + } +} diff --git a/Keras/Keras.csproj b/Keras/Keras.csproj index a5c2695..aaac2b8 100644 --- a/Keras/Keras.csproj +++ b/Keras/Keras.csproj @@ -43,7 +43,7 @@ - + diff --git a/Keras/Models/BaseModel.cs b/Keras/Models/BaseModel.cs index 7d5cdd2..2c7bdbe 100644 --- a/Keras/Models/BaseModel.cs +++ b/Keras/Models/BaseModel.cs @@ -1,4 +1,5 @@ using Keras.Callbacks; +using Keras.Layers; using Keras.Utils; using Numpy; using Python.Runtime; @@ -7,6 +8,8 @@ using System.IO; using System.Linq; using System.Text; +using Tensorflow.Train; +using static System.Net.WebRequestMethods; namespace Keras.Models { @@ -16,23 +19,25 @@ public class BaseModel : Base ///Configures the model for training. /// /// String (name of optimizer) or optimizer instance. See optimizers. - /// String (name of objective function) or objective function. See losses. If the model has multiple outputs, you can use a different loss on each output by passing a dictionary or a list of losses. The loss value that will be minimized by the model will then be the sum of all individual losses. + /// List of Strings (name of objective function) or objective function. See losses. If the model has multiple outputs, you can use a different loss on each output by passing a dictionary or a list of losses. The loss value that will be minimized by the model will then be the sum of all individual losses. /// List of metrics to be evaluated by the model during training and testing. Typically you will use metrics=['accuracy']. To specify different metrics for different outputs of a multi-output model, you could also pass a dictionary, such as metrics={'output_a': 'accuracy'}. /// Optional list or dictionary specifying scalar coefficients (Python floats) to weight the loss contributions of different model outputs. The loss value that will be minimized by the model will then be the weighted sum of all individual losses, weighted by the loss_weightscoefficients. If a list, it is expected to have a 1:1 mapping to the model's outputs. If a tensor, it is expected to map output names (strings) to scalar coefficients. - /// If you need to do timestep-wise sample weighting (2D weights), set this to "temporal". None defaults to sample-wise weights (1D). If the model has multiple outputs, you can use a different sample_weight_mode on each output by passing a dictionary or a list of modes. /// List of metrics to be evaluated and weighted by sample_weight or class_weight during training and testing. - /// By default, Keras will create placeholders for the model's target, which will be fed with the target data during training. If instead you would like to use your own target tensors (in turn, Keras will not expect external Numpy data for these targets at training time), you can specify them via the target_tensors argument. It can be a single tensor (for a single-output model), a list of tensors, or a dict mapping output names to target tensors. + /// Bool. Defaults to False. If True, this Model's logic will not be wrapped in a tf.function. Recommended to leave this as None unless your Model cannot be run inside a tf.function. run_eagerly=True is not supported when using. + /// Int. Defaults to 1. The number of batches to run during each tf.function call. Running multiple batches inside a single tf.function call can greatly improve performance on TPUs or small models with a large Python overhead. At most, one full epoch will be run each execution + /// If True, compile the model training step with XLA. XLA is an optimizing compiler for machine learning. jit_compile is not enabled for by default. Note that jit_compile=True may not necessarily work for all models. public void Compile(StringOrInstance optimizer, string loss, string[] metrics = null, float[] loss_weights = null, - string sample_weight_mode = null, string[] weighted_metrics = null, NDarray[] target_tensors = null) + string[] weighted_metrics = null, bool run_eagerly = false, int steps_per_execution = 1, bool jit_compile = false) { var args = new Dictionary(); args["optimizer"] = optimizer; args["loss"] = loss; args["metrics"] = metrics; args["loss_weights"] = loss_weights; - args["sample_weight_mode"] = sample_weight_mode; args["weighted_metrics"] = weighted_metrics; - args["target_tensors"] = target_tensors; + args["run_eagerly"] = run_eagerly; + args["steps_per_execution"] = steps_per_execution; + args["jit_compile"] = jit_compile; InvokeMethod("compile", args); } @@ -44,24 +49,64 @@ public void Compile(StringOrInstance optimizer, string loss, string[] metrics = /// List of Strings (name of objective function) or objective function. See losses. If the model has multiple outputs, you can use a different loss on each output by passing a dictionary or a list of losses. The loss value that will be minimized by the model will then be the sum of all individual losses. /// List of metrics to be evaluated by the model during training and testing. Typically you will use metrics=['accuracy']. To specify different metrics for different outputs of a multi-output model, you could also pass a dictionary, such as metrics={'output_a': 'accuracy'}. /// Optional list or dictionary specifying scalar coefficients (Python floats) to weight the loss contributions of different model outputs. The loss value that will be minimized by the model will then be the weighted sum of all individual losses, weighted by the loss_weightscoefficients. If a list, it is expected to have a 1:1 mapping to the model's outputs. If a tensor, it is expected to map output names (strings) to scalar coefficients. - /// If you need to do timestep-wise sample weighting (2D weights), set this to "temporal". None defaults to sample-wise weights (1D). If the model has multiple outputs, you can use a different sample_weight_mode on each output by passing a dictionary or a list of modes. /// List of metrics to be evaluated and weighted by sample_weight or class_weight during training and testing. - /// By default, Keras will create placeholders for the model's target, which will be fed with the target data during training. If instead you would like to use your own target tensors (in turn, Keras will not expect external Numpy data for these targets at training time), you can specify them via the target_tensors argument. It can be a single tensor (for a single-output model), a list of tensors, or a dict mapping output names to target tensors. + /// Bool. Defaults to False. If True, this Model's logic will not be wrapped in a tf.function. Recommended to leave this as None unless your Model cannot be run inside a tf.function. run_eagerly=True is not supported when using. + /// Int. Defaults to 1. The number of batches to run during each tf.function call. Running multiple batches inside a single tf.function call can greatly improve performance on TPUs or small models with a large Python overhead. At most, one full epoch will be run each execution + /// If True, compile the model training step with XLA. XLA is an optimizing compiler for machine learning. jit_compile is not enabled for by default. Note that jit_compile=True may not necessarily work for all models. public void Compile(StringOrInstance optimizer, string[] loss, string[] metrics = null, float[] loss_weights = null, - string sample_weight_mode = null, string[] weighted_metrics = null, NDarray[] target_tensors = null) + string[] weighted_metrics = null, bool run_eagerly = false, int steps_per_execution = 1, bool jit_compile = false) { var args = new Dictionary(); args["optimizer"] = optimizer; args["loss"] = loss; args["metrics"] = metrics; args["loss_weights"] = loss_weights; - args["sample_weight_mode"] = sample_weight_mode; args["weighted_metrics"] = weighted_metrics; - args["target_tensors"] = target_tensors; + args["run_eagerly"] = run_eagerly; + args["steps_per_execution"] = steps_per_execution; + args["jit_compile"] = jit_compile; InvokeMethod("compile", args); } + /// + /// Compute the total loss, validate it, and return it. + /// Subclasses can optionally override this method to provide custom loss computation logic. + /// + /// Input data. + /// Target data. + /// Predictions returned by the model (output of model(x)) + /// Sample weights for weighting the loss function. + public void ComputeLoss(NDarray x, NDarray y, NDarray y_pred = null, NDarray sample_weight = null) + { + var args = new Dictionary(); + args["x"] = x; + args["y"] = y; + args["y_pred"] = y_pred; + args["sample_weight"] = sample_weight; + + InvokeMethod("compute_loss", args); + } + + /// + /// Update metric states and collect all metrics to be returned. + /// Subclasses can optionally override this method to provide custom metric updating and collection logic. + /// + /// Input data. + /// Target data. + /// Predictions returned by the model (output of model(x)) + /// Sample weights for weighting the loss function. + public void ComputeMetrics(NDarray x, NDarray y, NDarray y_pred = null, NDarray sample_weight = null) + { + var args = new Dictionary(); + args["x"] = x; + args["y"] = y; + args["y_pred"] = y_pred; + args["sample_weight"] = sample_weight; + + InvokeMethod("compute_metrics", args); + } + /// /// Trains the model for a given number of epochs (iterations on a dataset). /// @@ -79,10 +124,16 @@ public void Compile(StringOrInstance optimizer, string[] loss, string[] metrics /// Integer. Epoch at which to start training (useful for resuming a previous training run). /// Integer or None. Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. When training with input tensors such as TensorFlow data tensors, the default None is equal to the number of samples in your dataset divided by the batch size, or 1 if that cannot be determined. /// Only relevant if steps_per_epoch is specified. Total number of steps (batches of samples) to validate before stopping. + /// Integer or None. Number of samples per validation batch. If unspecified, will default to batch_size. Do not specify the validation_batch_size if your data is in the form of datasets, generators, or keras.utils.Sequence instances + /// Only relevant if validation data is provided. Integer or collections.abc.Container instance (e.g. list, tuple, etc.). If an integer, specifies how many training epochs to run before a new validation run is performed, e.g. validation_freq=2 runs validation every 2 epochs. If a Container, specifies the epochs on which to run validation, e.g. validation_freq=[1, 2, 10] runs validation at the end of the 1st, 2nd, and 10th epochs. + /// Integer. Used for generator or keras.utils.Sequence input only. Maximum size for the generator queue. If unspecified, max_queue_size will default to 10. + /// Integer. Used for generator or keras.utils.Sequence input only. Maximum number of processes to spin up when using process-based threading. If unspecified, workers will default to 1. + /// Boolean. Used for generator or keras.utils.Sequence input only. If True, use process-based threading. If unspecified, use_multiprocessing will default to False. . /// A History object. Its History.history attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). public History Fit(NDarray x, NDarray y, int? batch_size = null, int epochs = 1, int verbose = 1, Callback[] callbacks = null, float validation_split = 0.0f, NDarray[] validation_data = null, bool shuffle = true, Dictionary class_weight = null, - NDarray sample_weight = null, int initial_epoch = 0, int? steps_per_epoch = null, int? validation_steps = null) + NDarray sample_weight = null, int initial_epoch = 0, int? steps_per_epoch = null, int? validation_steps = null, + int? validation_batch_size = null, int[] validation_freq = null, int max_queue_size = 10, int workers = 1, bool use_multiprocessing = false) { var args = new Dictionary(); args["x"] = x; @@ -107,6 +158,11 @@ public History Fit(NDarray x, NDarray y, int? batch_size = null, int epochs = 1, args["initial_epoch"] = initial_epoch; args["steps_per_epoch"] = steps_per_epoch; args["validation_steps"] = validation_steps; + args["validation_batch_size"] = validation_batch_size; + args["validation_freq"] = validation_freq; + args["max_queue_size"] = max_queue_size; + args["workers"] = workers; + args["use_multiprocessing"] = use_multiprocessing; PyObject py = InvokeMethod("fit", args); @@ -131,10 +187,16 @@ public History Fit(NDarray x, NDarray y, int? batch_size = null, int epochs = 1, /// Integer. Epoch at which to start training (useful for resuming a previous training run). /// Integer or None. Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. When training with input tensors such as TensorFlow data tensors, the default None is equal to the number of samples in your dataset divided by the batch size, or 1 if that cannot be determined. /// Only relevant if steps_per_epoch is specified. Total number of steps (batches of samples) to validate before stopping. + /// Integer or None. Number of samples per validation batch. If unspecified, will default to batch_size. Do not specify the validation_batch_size if your data is in the form of datasets, generators, or keras.utils.Sequence instances + /// Only relevant if validation data is provided. Integer or collections.abc.Container instance (e.g. list, tuple, etc.). If an integer, specifies how many training epochs to run before a new validation run is performed, e.g. validation_freq=2 runs validation every 2 epochs. If a Container, specifies the epochs on which to run validation, e.g. validation_freq=[1, 2, 10] runs validation at the end of the 1st, 2nd, and 10th epochs. + /// Integer. Used for generator or keras.utils.Sequence input only. Maximum size for the generator queue. If unspecified, max_queue_size will default to 10. + /// Integer. Used for generator or keras.utils.Sequence input only. Maximum number of processes to spin up when using process-based threading. If unspecified, workers will default to 1. + /// Boolean. Used for generator or keras.utils.Sequence input only. If True, use process-based threading. If unspecified, use_multiprocessing will default to False. . /// A History object. Its History.history attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). public History Fit(NDarray x, NDarray[] y, int? batch_size = null, int epochs = 1, int verbose = 1, Callback[] callbacks = null, float validation_split = 0.0f, NDarray[] validation_data = null, bool shuffle = true, Dictionary class_weight = null, - NDarray sample_weight = null, int initial_epoch = 0, int? steps_per_epoch = null, int? validation_steps = null) + NDarray sample_weight = null, int initial_epoch = 0, int? steps_per_epoch = null, int? validation_steps = null, + int? validation_batch_size = null, int[] validation_freq = null, int max_queue_size = 10, int workers = 1, bool use_multiprocessing = false) { var args = new Dictionary(); args["x"] = x; @@ -159,6 +221,11 @@ public History Fit(NDarray x, NDarray[] y, int? batch_size = null, int epochs = args["initial_epoch"] = initial_epoch; args["steps_per_epoch"] = steps_per_epoch; args["validation_steps"] = validation_steps; + args["validation_batch_size"] = validation_batch_size; + args["validation_freq"] = validation_freq; + args["max_queue_size"] = max_queue_size; + args["workers"] = workers; + args["use_multiprocessing"] = use_multiprocessing; PyObject py = InvokeMethod("fit", args); @@ -175,8 +242,14 @@ public History Fit(NDarray x, NDarray[] y, int? batch_size = null, int epochs = /// Optional Numpy array of weights for the test samples, used for weighting the loss function. You can either pass a flat (1D) Numpy array with the same length as the input samples (1:1 mapping between weights and samples), or in the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample. In this case you should make sure to specifysample_weight_mode="temporal" in compile(). /// Integer or None. Total number of steps (batches of samples) before declaring the evaluation round finished. Ignored with the default value of None. /// List of keras.callbacks.Callback instances. List of callbacks to apply during evaluation. See callbacks. + /// Integer. Used for generator or keras.utils.Sequence input only. Maximum size for the generator queue. If unspecified, max_queue_size will default to 10. + /// Integer. Used for generator or keras.utils.Sequence input only. Maximum number of processes to spin up when using process-based threading. If unspecified, workers will default to 1. + /// Boolean. Used for generator or keras.utils.Sequence input only. If True, use process-based threading. If unspecified, use_multiprocessing will default to False. + /// If True, loss and metric results are returned as a dict, with each key being the name of the metric. If False, they are returned as a list. /// Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute model.metrics_names will give you the display labels for the scalar outputs. - public double[] Evaluate(NDarray x, NDarray y, int? batch_size = null, int verbose = 1, NDarray sample_weight = null, int? steps = null, Callback[] callbacks = null) + public double[] Evaluate(NDarray x, NDarray y, int? batch_size = null, int verbose = 1, + NDarray sample_weight = null, int? steps = null, Callback[] callbacks = null, int max_queue_size=10, + int workers = 1, bool use_multiprocessing = false, bool return_dict = false) { var args = new Dictionary(); args["x"] = x.PyObject; @@ -186,6 +259,10 @@ public double[] Evaluate(NDarray x, NDarray y, int? batch_size = null, int verbo args["sample_weight"] = sample_weight; args["steps"] = steps; args["callbacks"] = callbacks != null ? callbacks : null; + args["max_queue_size"] = max_queue_size; + args["workers"] = workers; + args["use_multiprocessing"] = use_multiprocessing; + args["return_dict"] = return_dict; return InvokeMethod("evaluate", args)?.As(); } @@ -199,8 +276,12 @@ public double[] Evaluate(NDarray x, NDarray y, int? batch_size = null, int verbo /// Verbosity mode, 0 or 1. /// Total number of steps (batches of samples) before declaring the prediction round finished. Ignored with the default value of None. /// List of keras.callbacks.Callback instances. List of callbacks to apply during prediction. See callbacks. + /// Integer. Used for generator or keras.utils.Sequence input only. Maximum size for the generator queue. If unspecified, max_queue_size will default to 10. + /// Integer. Used for generator or keras.utils.Sequence input only. Maximum number of processes to spin up when using process-based threading. If unspecified, workers will default to 1. + /// Boolean. Used for generator or keras.utils.Sequence input only. If True, use process-based threading. If unspecified, use_multiprocessing will default to False. . /// Numpy array(s) of predictions. - public NDarray Predict(NDarray x, int? batch_size = null, int verbose = 1, int? steps = null, Callback[] callbacks = null) + public NDarray Predict(NDarray x, int? batch_size = null, int verbose = 1, int? steps = null + , Callback[] callbacks = null, int max_queue_size = 10, int workers = 1, bool use_multiprocessing = false) { var args = new Dictionary(); args["x"] = x; @@ -208,6 +289,9 @@ public NDarray Predict(NDarray x, int? batch_size = null, int verbose = 1, int? args["verbose"] = verbose; args["steps"] = steps; args["callbacks"] = callbacks != null ? callbacks : null; + args["max_queue_size"] = max_queue_size; + args["workers"] = workers; + args["use_multiprocessing"] = use_multiprocessing; return new NDarray(InvokeMethod("predict", args)); } @@ -253,8 +337,12 @@ public NDarray[] PredictMultipleOutputs(NDarray x, int? batch_size = null, int v /// Verbosity mode, 0 or 1. /// Total number of steps (batches of samples) before declaring the prediction round finished. Ignored with the default value of None. /// List of keras.callbacks.Callback instances. List of callbacks to apply during prediction. See callbacks. + /// Integer. Used for generator or keras.utils.Sequence input only. Maximum size for the generator queue. If unspecified, max_queue_size will default to 10. + /// Integer. Used for generator or keras.utils.Sequence input only. Maximum number of processes to spin up when using process-based threading. If unspecified, workers will default to 1. + /// Boolean. Used for generator or keras.utils.Sequence input only. If True, use process-based threading. If unspecified, use_multiprocessing will default to False. . /// Numpy array(s) of predictions. - public NDarray Predict(List x, int? batch_size = null, int verbose = 1, int? steps = null, Callback[] callbacks = null) + public NDarray Predict(List x, int? batch_size = null, int verbose = 1, int? steps = null, + Callback[] callbacks = null, int max_queue_size = 10, int workers = 1, bool use_multiprocessing = false) { var args = new Dictionary(); @@ -271,6 +359,9 @@ public NDarray Predict(List x, int? batch_size = null, int verbose = 1, args["verbose"] = verbose; args["steps"] = steps; args["callbacks"] = callbacks != null ? callbacks : null; + args["max_queue_size"] = max_queue_size; + args["workers"] = workers; + args["use_multiprocessing"] = use_multiprocessing; return new NDarray(InvokeMethod("predict", args)); } @@ -283,15 +374,19 @@ public NDarray Predict(List x, int? batch_size = null, int verbose = 1, /// Numpy array of target data, or list of Numpy arrays if the model has multiple outputs. If all outputs in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. /// Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample. In this case you should make sure to specify sample_weight_mode="temporal" in compile(). /// Optional dictionary mapping class indices (integers) to a weight (float) to apply to the model's loss for the samples from this class during training. This can be useful to tell the model to "pay more attention" to samples from an under-represented class. + /// Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample. In this case you should make sure to specify sample_weight_mode="temporal" in compile(). + /// If True, loss and metric results are returned as a dict, with each key being the name of the metric. If False, they are returned as a list. /// Scalar training loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute model.metrics_names will give you the display labels for the scalar outputs. - public double[] TrainOnBatch(NDarray x, NDarray y, NDarray sample_weight = null, Dictionary class_weight = null) + public double[] TrainOnBatch(NDarray x, NDarray y, NDarray sample_weight = null, Dictionary class_weight = null, bool reset_metrics = false, bool return_dict = false) { var args = new Dictionary(); args["x"] = x; args["y"] = y; args["sample_weight"] = sample_weight; args["class_weight"] = class_weight; + args["reset_metrics"] = reset_metrics; + args["return_dict"] = return_dict; var pyresult = InvokeMethod("train_on_batch", args); if (pyresult == null) return default; @@ -304,13 +399,25 @@ public double[] TrainOnBatch(NDarray x, NDarray y, NDarray sample_weight = null, return result; } - public double[] TrainOnBatch(NDarray[] x, NDarray y, NDarray sample_weight = null, Dictionary class_weight = null) + /// + /// Runs a single gradient update on a single batch of data. + /// + /// Numpy array of training data, or list of Numpy arrays if the model has multiple inputs. If all inputs in the model are named, you can also pass a dictionary mapping input names to Numpy arrays. + /// Numpy array of target data, or list of Numpy arrays if the model has multiple outputs. If all outputs in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. + /// Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample. In this case you should make sure to specify sample_weight_mode="temporal" in compile(). + /// Optional dictionary mapping class indices (integers) to a weight (float) to apply to the model's loss for the samples from this class during training. This can be useful to tell the model to "pay more attention" to samples from an under-represented class. + /// Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample. In this case you should make sure to specify sample_weight_mode="temporal" in compile(). + /// If True, loss and metric results are returned as a dict, with each key being the name of the metric. If False, they are returned as a list. + /// Scalar training loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute model.metrics_names will give you the display labels for the scalar outputs. + public double[] TrainOnBatch(NDarray[] x, NDarray y, NDarray sample_weight = null, Dictionary class_weight = null, bool reset_metrics = false, bool return_dict = false) { var args = new Dictionary(); args["x"] = x; args["y"] = y; args["sample_weight"] = sample_weight; args["class_weight"] = class_weight; + args["reset_metrics"] = reset_metrics; + args["return_dict"] = return_dict; var pyresult = InvokeMethod("train_on_batch", args); if (pyresult == null) return default; @@ -329,13 +436,17 @@ public double[] TrainOnBatch(NDarray[] x, NDarray y, NDarray sample_weight = nul /// Numpy array of test data, or list of Numpy arrays if the model has multiple inputs. If all inputs in the model are named, you can also pass a dictionary mapping input names to Numpy arrays. /// Numpy array of target data, or list of Numpy arrays if the model has multiple outputs. If all outputs in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. /// Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample. In this case you should make sure to specify sample_weight_mode="temporal" in compile(). + /// Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample. In this case you should make sure to specify sample_weight_mode="temporal" in compile(). + /// If True, loss and metric results are returned as a dict, with each key being the name of the metric. If False, they are returned as a list. /// Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute model.metrics_names will give you the display labels for the scalar outputs. - public double[] TestOnBatch(NDarray x, NDarray y, NDarray sample_weight = null) + public double[] TestOnBatch(NDarray x, NDarray y, NDarray sample_weight = null, bool reset_metrics = false, bool return_dict = false) { var args = new Dictionary(); args["x"] = x; args["y"] = y; args["sample_weight"] = sample_weight; + args["reset_metrics"] = reset_metrics; + args["return_dict"] = return_dict; //return InvokeMethod("test_on_batch", args)?.As(); var pyresult = InvokeMethod("test_on_batch", args); @@ -349,12 +460,23 @@ public double[] TestOnBatch(NDarray x, NDarray y, NDarray sample_weight = null) return result; } - public double[] TestOnBatch(NDarray[] x, NDarray y, NDarray sample_weight = null) + /// + /// Tests the on batch. + /// + /// Numpy array of test data, or list of Numpy arrays if the model has multiple inputs. If all inputs in the model are named, you can also pass a dictionary mapping input names to Numpy arrays. + /// Numpy array of target data, or list of Numpy arrays if the model has multiple outputs. If all outputs in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. + /// Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample. In this case you should make sure to specify sample_weight_mode="temporal" in compile(). + /// Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample. In this case you should make sure to specify sample_weight_mode="temporal" in compile(). + /// If True, loss and metric results are returned as a dict, with each key being the name of the metric. If False, they are returned as a list. + /// Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute model.metrics_names will give you the display labels for the scalar outputs. + public double[] TestOnBatch(NDarray[] x, NDarray y, NDarray sample_weight = null, bool reset_metrics = false, bool return_dict = false) { var args = new Dictionary(); args["x"] = x; args["y"] = y; args["sample_weight"] = sample_weight; + args["reset_metrics"] = reset_metrics; + args["return_dict"] = return_dict; //return InvokeMethod("test_on_batch", args)?.As(); var pyresult = InvokeMethod("test_on_batch", args); @@ -381,6 +503,11 @@ public NDarray PredictOnBatch(NDarray x) return new NDarray(InvokeMethod("predict_on_batch", args)); } + /// + /// Returns predictions for a single batch of samples. + /// + /// Input samples, as a Numpy array. + /// Numpy array(s) of predictions. public NDarray PredictOnBatch(NDarray[] x) { var args = new Dictionary(); @@ -389,6 +516,32 @@ public NDarray PredictOnBatch(NDarray[] x) return new NDarray(InvokeMethod("predict_on_batch", args)); } + /// + /// The logic for one inference step. + /// + /// A nested structure of Tensors. + /// Numpy array(s) of predictions. + public NDarray PredictStep(NDarray data) + { + var args = new Dictionary(); + args["data"] = data; + + return new NDarray(InvokeMethod("predict_step", args)); + } + + /// + /// The logic for one inference step. + /// + /// A nested structure of Tensors. + /// Numpy array(s) of predictions. + public NDarray PredictStep(NDarray[] data) + { + var args = new Dictionary(); + args["data"] = data; + + return new NDarray(InvokeMethod("predict_step", args)); + } + public History FitGenerator(Sequence generator, int? steps_per_epoch = null, int epochs = 1, int verbose = 1, Callback[] callbacks = null, Sequence validation_data = null, int? validation_steps = null, int validation_freq = 1, Dictionary class_weight = null, int max_queue_size = 10, int workers = 1, bool use_multiprocessing = false, bool shuffle = true, int initial_epoch = 0) @@ -473,19 +626,23 @@ public string ToJson() /// /// Saves the weight of the trained model to a file. /// - /// The path of the weight to save. - public void SaveWeight(string path) + /// str or pathlib.Path object. Path where to save the model. + /// Whether we should overwrite any existing model at the target location, or instead ask the user via an interactive prompt. + /// Either "keras", "tf", "h5", indicating whether to save the model in the native Keras format (.keras), in the TensorFlow SavedModel format (referred to as "SavedModel" below), or in the legacy HDF5 format (.h5). Defaults to "tf" in TF 2.X, and "h5" in TF 1.X. + public void SaveWeight(string filepath, bool overwrite = true, string save_format = "tf") { - PyInstance.save_weights(path); + PyInstance.save_weights(filepath, overwrite, save_format); } /// - /// Save the model to h5 file + /// Saves a model as a TensorFlow SavedModel or HDF5 file. /// - /// The path with filename eg: model.h5. - public void Save(string filepath, bool overwrite = true, bool include_optimizer = true) + /// str or pathlib.Path object. Path where to save the model. + /// Whether we should overwrite any existing model at the target location, or instead ask the user via an interactive prompt. + /// Either "keras", "tf", "h5", indicating whether to save the model in the native Keras format (.keras), in the TensorFlow SavedModel format (referred to as "SavedModel" below), or in the legacy HDF5 format (.h5). Defaults to "tf" in TF 2.X, and "h5" in TF 1.X. + public void Save(string filepath, bool overwrite = true, string save_format = "tf") { - PyInstance.save(filepath: filepath, overwrite: overwrite, include_optimizer: include_optimizer); + PyInstance.save(filepath: filepath, overwrite: overwrite, save_format: save_format); } /// @@ -525,10 +682,17 @@ public void SetWeights(List weights) /// /// Loads the weight to the model from a file. /// - /// The path of of the weight file. - public void LoadWeight(string path) + /// String, path to the weights file to load. For weight files in TensorFlow format, this is the file prefix (the same as was passed to save_weights()). + /// A list of Numpy arrays with shapes and types matching the output of model.GetWeights() + /// A list of Numpy arrays with shapes and types matching the output of model.GetWeights() + /// A list of Numpy arrays with shapes and types matching the output of model.GetWeights() + public void LoadWeight(string filepath, bool skip_mismatch= false, bool by_name= false, CheckpointOptions options= null) { - PyInstance.load_weights(path); + PyObject optionPyObject = null; + if (options != null) + optionPyObject = options.ToPython(); + + PyInstance.load_weights(filepath, skip_mismatch, by_name, optionPyObject); } /// @@ -587,7 +751,7 @@ public static BaseModel ModelFromYaml(string json_string) public void SaveOnnx(string filePath) { var onnx_model = Instance.keras2onnx.convert_keras(model: (PyObject)this.PyInstance); - File.WriteAllText(filePath, onnx_model.ToString()); + System.IO.File.WriteAllText(filePath, onnx_model.ToString()); } /// @@ -595,9 +759,9 @@ public void SaveOnnx(string filePath) /// /// Length of the line. /// The positions. - public void Summary(int? line_length = null, float[] positions = null) + public void Summary(int? line_length = null, float[] positions = null, string print_fn = null, bool expand_nested = false, bool show_trainable = false, int[] layer_range = null) { - PyInstance.summary(line_length: line_length, positions: positions); + PyInstance.summary(line_length: line_length, positions: positions, print_fn: print_fn, expand_nested: expand_nested, show_trainable: show_trainable, layer_range: layer_range); } /// @@ -609,5 +773,41 @@ public void SaveTensorflowJSFormat(string artifacts_dir, bool quantize = false) { Instance.tfjs.converters.save_keras_model(model: this.PyInstance, artifacts_dir: artifacts_dir); } + + /// + /// Export the model + /// + /// File path to export + public void Export(string filepath) + { + PyInstance.export(filepath); + } + + /// + /// Get the layer based on name and/or index + /// + /// Name of the layer + /// Index of the layer + /// + public BaseLayer GetLayer(string name = null, int? index = null) + { + return (BaseLayer)PyInstance.get_layer(name, index); + } + + /// + /// Resets the state of all the metrics in the model. + /// + public void ResetMetrics() + { + PyInstance.reset_metrics(); + } + + /// + /// Reset the states of the model + /// + public void ResetStates() + { + PyInstance.reset_states(); + } } } diff --git a/Keras/Optimizers.cs b/Keras/Optimizers.cs index aae320a..c545549 100644 --- a/Keras/Optimizers.cs +++ b/Keras/Optimizers.cs @@ -1,8 +1,97 @@ using System; using System.Collections.Generic; +using System.Runtime.InteropServices; using System.Text; namespace Keras.Optimizers +{ + /// + /// Adam optimizer. Default parameters follow those provided in the original paper. + /// + /// + public class Adam : Base + { + public Adam(float learning_rate = 0.001f, float beta_1= 0.9f, float beta_2= 0.999f, float epsilon = 1e-07f, bool amsgrad = false, float? weight_decay = null, + bool? clipnorm = null, bool? clipvalue = null, float? global_clipnorm = null, bool use_ema = false, float ema_momentum = 0.99f, + int? ema_overwrite_frequency = null, bool jit_compile = true) + { + Parameters["learning_rate"] = learning_rate; + Parameters["beta_1"] = beta_1; + Parameters["beta_2"] = beta_2; + Parameters["epsilon"] = epsilon; + Parameters["amsgrad"] = amsgrad; + Parameters["weight_decay"] = weight_decay; + Parameters["clipnorm"] = clipnorm; + Parameters["clipvalue"] = clipvalue; + Parameters["global_clipnorm"] = global_clipnorm; + Parameters["use_ema"] = use_ema; + Parameters["ema_momentum"] = ema_momentum; + Parameters["ema_overwrite_frequency"] = ema_overwrite_frequency; + Parameters["jit_compile"] = jit_compile; + + PyInstance = Instance.keras.optimizers.Adam; + Init(); + } + } + + /// + /// Optimizer that implements the Adafactor algorithm. + /// + /// + public class Adafactor : Base + { + public Adafactor(float learning_rate = 0.001f, float beta_2_decay = -0.8f, float epsilon_1 = 1e-30f, float epsilon_2 = 1e-3f, float clip_threshold = 1, + bool relative_step = true, string name = null, float? weight_decay = null, bool? clipnorm = null, bool? clipvalue = null, + float? global_clipnorm = null, bool use_ema = false, float ema_momentum = 0.99f, int? ema_overwrite_frequency = null, bool jit_compile = true) + { + Parameters["learning_rate"] = learning_rate; + Parameters["beta_2_decay"] = beta_2_decay; + Parameters["epsilon_1"] = epsilon_1; + Parameters["epsilon_2"] = epsilon_2; + Parameters["clip_threshold"] = clip_threshold; + Parameters["relative_step"] = relative_step; + Parameters["name"] = name; + Parameters["weight_decay"] = weight_decay; + Parameters["clipnorm"] = clipnorm; + Parameters["clipvalue"] = clipvalue; + Parameters["global_clipnorm"] = global_clipnorm; + Parameters["use_ema"] = use_ema; + Parameters["ema_momentum"] = ema_momentum; + Parameters["ema_overwrite_frequency"] = ema_overwrite_frequency; + Parameters["jit_compile"] = jit_compile; + + PyInstance = Instance.keras.optimizers.Adafactor; + Init(); + } + } + + public class AdamW : Base + { + public AdamW(float learning_rate = 0.001f, float weight_decay = 0.004f, float beta_1 = 0.9f, float beta_2 = 0.999f, float epsilon = 1e-07f, bool amsgrad = false, + bool? clipnorm = null, bool? clipvalue = null, float? global_clipnorm = null, bool use_ema = false, float ema_momentum = 0.99f, + int? ema_overwrite_frequency = null, bool jit_compile = true) + { + Parameters["learning_rate"] = learning_rate; + Parameters["weight_decay"] = weight_decay; + Parameters["beta_1"] = beta_1; + Parameters["beta_2"] = beta_2; + Parameters["epsilon"] = epsilon; + Parameters["amsgrad"] = amsgrad; + Parameters["clipnorm"] = clipnorm; + Parameters["clipvalue"] = clipvalue; + Parameters["global_clipnorm"] = global_clipnorm; + Parameters["use_ema"] = use_ema; + Parameters["ema_momentum"] = ema_momentum; + Parameters["ema_overwrite_frequency"] = ema_overwrite_frequency; + Parameters["jit_compile"] = jit_compile; + + PyInstance = Instance.keras.optimizers.AdamW; + Init(); + } + } +} + +namespace Keras.Optimizers.Legacy { /// /// Stochastic gradient descent optimizer. Includes support for momentum, learning rate decay, and Nesterov momentum. @@ -24,7 +113,7 @@ public SGD(float lr = 0.01f, float momentum = 0.0f, float decay = 0.0f, bool nes Parameters["decay"] = decay; Parameters["nesterov"] = nesterov; - PyInstance = Instance.keras.optimizers.SGD; + PyInstance = Instance.keras.optimizers.legacy.SGD; Init(); } } @@ -50,7 +139,7 @@ public RMSprop(float lr = 0.01f, float rho = 0.9f, float? epsilon = null, float Parameters["epsilon"] = epsilon; Parameters["decay"] = decay; - PyInstance = Instance.keras.optimizers.RMSprop; + PyInstance = Instance.keras.optimizers.legacy.RMSprop; Init(); } } @@ -73,7 +162,7 @@ public Adagrad(float lr = 0.01f, float? epsilon = null, float decay = 0.0f) Parameters["epsilon"] = epsilon; Parameters["decay"] = lr; - PyInstance = Instance.keras.optimizers.Adagrad; + PyInstance = Instance.keras.optimizers.legacy.Adagrad; Init(); } } @@ -98,7 +187,7 @@ public Adadelta(float lr = 1.0f, float rho = 0.95f, float? epsilon = null, float Parameters["epsilon"] = epsilon; Parameters["decay"] = decay; - PyInstance = Instance.keras.optimizers.Adadelta; + PyInstance = Instance.keras.optimizers.legacy.Adadelta; Init(); } } @@ -118,7 +207,7 @@ public class Adam : Base /// The epsilon. /// The decay. /// boolean. Whether to apply the AMSGrad variant of this algorithm from the paper "On the Convergence of Adam and Beyond". - public Adam(float lr = 0.001f, float beta_1= 0.9f, float beta_2= 0.999f, float? epsilon = null, float decay = 0.0f, bool amsgrad = false) + public Adam(float lr = 0.001f, float beta_1 = 0.9f, float beta_2 = 0.999f, float? epsilon = null, float decay = 0.0f, bool amsgrad = false) { Parameters["lr"] = lr; Parameters["beta_1"] = beta_1; @@ -127,7 +216,7 @@ public Adam(float lr = 0.001f, float beta_1= 0.9f, float beta_2= 0.999f, float? Parameters["decay"] = decay; Parameters["amsgrad"] = amsgrad; - PyInstance = Instance.keras.optimizers.Adam; + PyInstance = Instance.keras.optimizers.legacy.Adam; Init(); } } @@ -154,7 +243,7 @@ public Adamax(float lr = 0.002f, float beta_1 = 0.9f, float beta_2 = 0.999f, flo Parameters["epsilon"] = epsilon; Parameters["decay"] = decay; - PyInstance = Instance.keras.optimizers.Adamax; + PyInstance = Instance.keras.optimizers.legacy.Adamax; Init(); } } @@ -180,7 +269,7 @@ public Nadam(float lr = 0.002f, float beta_1 = 0.9f, float beta_2 = 0.999f) Parameters["beta_1"] = beta_1; Parameters["beta_2"] = beta_2; - PyInstance = Instance.keras.optimizers.Adamax; + PyInstance = Instance.keras.optimizers.legacy.Nadam; Init(); } } @@ -203,7 +292,7 @@ public class Ftrl : Base /// float <= 0. Lambda 2 Regularization Strength. /// float <= 0. Lambda 2 Shrinkage Regularization Strength. /// floats, 0 < beta < 1. Generally close to 1. - public Ftrl(float lr = 0.001f,float lrp = -0.5f, float iav = 0.1f, float l1rs = 0.0f, float l2rs = 0.0f, float l2srs = 0.0f, float beta = 0.0f) + public Ftrl(float lr = 0.001f, float lrp = -0.5f, float iav = 0.1f, float l1rs = 0.0f, float l2rs = 0.0f, float l2srs = 0.0f, float beta = 0.0f) { Parameters["learning_rate"] = lr; Parameters["learning_rate_power"] = lrp; @@ -213,7 +302,7 @@ public Ftrl(float lr = 0.001f,float lrp = -0.5f, float iav = 0.1f, float l1rs = Parameters["l2_shrinkage_regularization_strength"] = l2srs; Parameters["beta"] = beta; - PyInstance = Instance.keras.optimizers.Ftrl; + PyInstance = Instance.keras.optimizers.legacy.Ftrl; Init(); } } diff --git a/Keras/Base.cs b/Tensorflow/Base.cs similarity index 99% rename from Keras/Base.cs rename to Tensorflow/Base.cs index 1cb0c5a..a97cd75 100644 --- a/Keras/Base.cs +++ b/Tensorflow/Base.cs @@ -8,7 +8,7 @@ namespace Keras { public abstract class Base : Keras { - internal dynamic PyInstance; + public dynamic PyInstance; public Dictionary Parameters = new Dictionary(); public object None = null; diff --git a/Keras/InternalTypes.cs b/Tensorflow/InternalTypes.cs similarity index 100% rename from Keras/InternalTypes.cs rename to Tensorflow/InternalTypes.cs diff --git a/Keras/Keras.cs b/Tensorflow/Keras.cs similarity index 95% rename from Keras/Keras.cs rename to Tensorflow/Keras.cs index dc0504f..93687f0 100644 --- a/Keras/Keras.cs +++ b/Tensorflow/Keras.cs @@ -1,6 +1,4 @@ -using Keras.Layers; -using Keras.Utils; -using Numpy; +using Numpy; using Numpy.Models; using Python.Runtime; using System; @@ -22,7 +20,7 @@ public class Keras : IDisposable private static bool alreadyDisabled = false; - private static Lazy _instance = new Lazy(() => + public static Lazy _instance = new Lazy(() => { var instance = new Keras(); instance.keras = InstallAndImport(Setup.KerasModule); @@ -92,7 +90,7 @@ private static PyObject InstallAndImport(string module) private bool IsInitialized => keras != null; - internal Keras() { } + public Keras() { } public void Dispose() { @@ -100,7 +98,7 @@ public void Dispose() PythonEngine.Shutdown(); } - internal static PyObject ToPython(object obj) + public static PyObject ToPython(object obj) { if (obj == null) return Runtime.None; switch (obj) @@ -122,7 +120,7 @@ internal static PyObject ToPython(object obj) case Slice o: return o.ToPython(); case PythonObject o: return o.PyObject; case PyObject o: return o; - case Sequence o: return o.PyInstance; + //case Sequence o: return o.PyInstance; case StringOrInstance o: return o.PyObject; case KerasFunction o: return o.PyObject; case Base o: return o.PyInstance; diff --git a/Keras/Setup.cs b/Tensorflow/Setup.cs similarity index 100% rename from Keras/Setup.cs rename to Tensorflow/Setup.cs diff --git a/Tensorflow/Train/CheckpointOptions.cs b/Tensorflow/Train/CheckpointOptions.cs new file mode 100644 index 0000000..75fc983 --- /dev/null +++ b/Tensorflow/Train/CheckpointOptions.cs @@ -0,0 +1,12 @@ +using Keras; +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Train +{ + public class CheckpointOptions : Base + { + + } +}