From 92dd22ad76a2f8c8e06c1d814e28842431303a80 Mon Sep 17 00:00:00 2001 From: RoberLopez Date: Wed, 4 Dec 2024 19:41:53 +0100 Subject: [PATCH] clean --- examples/airfoil_self_noise/main.cpp | 2 +- opennn/adaptive_moment_estimation.cpp | 39 ++++-------- opennn/bounding_layer.h | 4 +- opennn/convolutional_layer.h | 6 +- opennn/embedding_layer.h | 14 ++--- opennn/flatten_layer.cpp | 10 +-- opennn/flatten_layer.h | 10 +-- opennn/genetic_algorithm.cpp | 2 +- opennn/levenberg_marquardt_algorithm.cpp | 55 ++++++++--------- opennn/levenberg_marquardt_algorithm.h | 6 +- opennn/long_short_term_memory_layer.cpp | 76 +++++++++++------------ opennn/long_short_term_memory_layer.h | 6 +- opennn/loss_index.cpp | 10 ++- opennn/mean_squared_error.cpp | 1 + opennn/mean_squared_error.h | 4 +- opennn/multihead_attention_layer.h | 6 +- opennn/neural_network.cpp | 4 +- opennn/normalized_squared_error.h | 2 +- opennn/perceptron_layer.cpp | 22 ++++--- opennn/perceptron_layer.h | 12 ++-- opennn/perceptron_layer_3d.h | 6 +- opennn/pooling_layer.h | 14 ++--- opennn/probabilistic_layer.cpp | 32 +++++----- opennn/probabilistic_layer.h | 12 ++-- opennn/probabilistic_layer_3d.h | 6 +- opennn/quasi_newton_method.cpp | 22 ++----- opennn/recurrent_layer.cpp | 14 ++--- opennn/recurrent_layer.h | 6 +- opennn/scaling_layer_2d.cpp | 62 +++++++++---------- opennn/scaling_layer_2d.h | 8 +-- opennn/scaling_layer_4d.h | 8 +-- opennn/transformer.cpp | 78 ++++++++++++------------ opennn/transformer.h | 32 +++++----- opennn/unscaling_layer.cpp | 44 ++++++------- opennn/unscaling_layer.h | 6 +- 35 files changed, 302 insertions(+), 339 deletions(-) diff --git a/examples/airfoil_self_noise/main.cpp b/examples/airfoil_self_noise/main.cpp index 93358a4e6..804558b07 100644 --- a/examples/airfoil_self_noise/main.cpp +++ b/examples/airfoil_self_noise/main.cpp @@ -65,7 +65,7 @@ int main() //training_strategy.load("../data/training_strategy.xml"); training_strategy.perform_training(); - +/* ModelSelection model_selection(&training_strategy); model_selection.perform_inputs_selection(); diff --git a/opennn/adaptive_moment_estimation.cpp b/opennn/adaptive_moment_estimation.cpp index f817f40e9..af5668fc0 100644 --- a/opennn/adaptive_moment_estimation.cpp +++ b/opennn/adaptive_moment_estimation.cpp @@ -11,8 +11,6 @@ #include "adaptive_moment_estimation.h" #include "forward_propagation.h" #include "back_propagation.h" -#include "scaling_layer_2d.h" -#include "unscaling_layer.h" namespace opennn { @@ -365,54 +363,39 @@ TrainingResults AdaptiveMomentEstimation::perform_training() cout << "Elapsed time: " << write_time(elapsed_time) << endl; } - // Training history + // @todo loss and error missmatch + + stop_training = true; if(epoch == maximum_epochs_number) { if(display) cout << "Epoch " << epoch << "\nMaximum epochs number reached: " << epoch << endl; - - stop_training = true; - results.stopping_condition = StoppingCondition::MaximumEpochsNumber; } - - if(elapsed_time >= maximum_time) + else if(elapsed_time >= maximum_time) { if(display) cout << "Epoch " << epoch << "\nMaximum training time reached: " << write_time(elapsed_time) << endl; - - stop_training = true; - results.stopping_condition = StoppingCondition::MaximumTime; } - - // @todo loss and error missmatch - - if(results.training_error_history(epoch) < training_loss_goal) + else if(results.training_error_history(epoch) < training_loss_goal) { - stop_training = true; - results.stopping_condition = StoppingCondition::LossGoal; - if(display) cout << "Epoch " << epoch << "\nLoss goal reached: " << results.training_error_history(epoch) << endl; } - - if(training_accuracy >= training_accuracy_goal) + else if(training_accuracy >= training_accuracy_goal) { - stop_training = true; - results.stopping_condition = StoppingCondition::LossGoal; - if(display) cout << "Epoch " << epoch << "\nAccuracy goal reached: " << training_accuracy << endl; } - - if(selection_failures >= maximum_selection_failures) + else if(selection_failures >= maximum_selection_failures) { if(display) cout << "Epoch " << epoch << "\nMaximum selection failures reached: " << selection_failures << endl; - - stop_training = true; - results.stopping_condition = StoppingCondition::MaximumSelectionErrorIncreases; } + else + { + stop_training = false; + } if(stop_training) { diff --git a/opennn/bounding_layer.h b/opennn/bounding_layer.h index 4915838b2..c05918c9f 100644 --- a/opennn/bounding_layer.h +++ b/opennn/bounding_layer.h @@ -64,7 +64,7 @@ class BoundingLayer : public Layer // Serialization - void print() const; + void print() const override; void from_XML(const XMLDocument&) override; @@ -88,7 +88,7 @@ struct BoundingLayerForwardPropagation : LayerForwardPropagation void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor outputs; }; diff --git a/opennn/convolutional_layer.h b/opennn/convolutional_layer.h index edb720cc6..9a270345f 100644 --- a/opennn/convolutional_layer.h +++ b/opennn/convolutional_layer.h @@ -203,7 +203,7 @@ struct ConvolutionalLayerForwardPropagation : LayerForwardPropagation void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor outputs; @@ -220,11 +220,11 @@ struct ConvolutionalLayerBackPropagation : LayerBackPropagation { ConvolutionalLayerBackPropagation(const Index& = 0, Layer* = nullptr); - vector> get_input_derivative_pairs() const; + vector> get_input_derivative_pairs() const override; void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; //Tensor image_convolutions_derivatives; diff --git a/opennn/embedding_layer.h b/opennn/embedding_layer.h index cc8fb5cb2..b46da0b84 100644 --- a/opennn/embedding_layer.h +++ b/opennn/embedding_layer.h @@ -35,7 +35,7 @@ class EmbeddingLayer : public Layer Index get_depth() const; bool get_positional_encoding() const; - dimensions get_input_dimensions() const; + dimensions get_input_dimensions() const override; dimensions get_output_dimensions() const override; Index get_parameters_number() const override; @@ -69,9 +69,9 @@ class EmbeddingLayer : public Layer void add_deltas(const vector>&) const; - void insert_gradient(unique_ptr& back_propagation, - const Index& index, - Tensor& gradient) const; + void insert_gradient(unique_ptr&, + const Index&, + Tensor&) const override; void from_XML(const XMLDocument&) override; void to_XML(XMLPrinter&) const override; @@ -106,7 +106,7 @@ struct EmbeddingLayerForwardPropagation : LayerForwardPropagation void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; void build_positional_encoding_matrix(); @@ -122,11 +122,11 @@ struct EmbeddingLayerBackPropagation : LayerBackPropagation { EmbeddingLayerBackPropagation(const Index& = 0, Layer* = nullptr); - vector> get_input_derivative_pairs() const; + vector> get_input_derivative_pairs() const override; void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor sample_deltas; Tensor embedding_weights_derivatives; diff --git a/opennn/flatten_layer.cpp b/opennn/flatten_layer.cpp index f39114803..ce7112995 100644 --- a/opennn/flatten_layer.cpp +++ b/opennn/flatten_layer.cpp @@ -64,7 +64,7 @@ void FlattenLayer::forward_propagate(const vector>& inpu { const Index batch_samples_number = layer_forward_propagation->batch_samples_number; - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); FlattenLayerForwardPropagation* flatten_layer_forward_propagation = static_cast(layer_forward_propagation.get()); @@ -73,9 +73,9 @@ void FlattenLayer::forward_propagate(const vector>& inpu memcpy(outputs_data, input_pairs[0].first, - batch_samples_number*neurons_number*sizeof(type)); + batch_samples_number*outputs_number*sizeof(type)); - flatten_layer_forward_propagation->outputs = TensorMap>(input_pairs[0].first, batch_samples_number, neurons_number); + flatten_layer_forward_propagation->outputs = TensorMap>(input_pairs[0].first, batch_samples_number, outputs_number); } @@ -85,7 +85,7 @@ void FlattenLayer::back_propagate(const vector>& input_p unique_ptr& back_propagation) const { const Index batch_samples_number = input_pairs[0].second[0]; - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); // Back propagation @@ -96,7 +96,7 @@ void FlattenLayer::back_propagate(const vector>& input_p memcpy(input_derivatives.data(), delta_pairs[0].first, - Index(batch_samples_number * neurons_number * sizeof(type))); + Index(batch_samples_number * outputs_number * sizeof(type))); } diff --git a/opennn/flatten_layer.h b/opennn/flatten_layer.h index bc1b6c0f8..457411199 100644 --- a/opennn/flatten_layer.h +++ b/opennn/flatten_layer.h @@ -29,7 +29,7 @@ class FlattenLayer : public Layer FlattenLayer(const dimensions& = {0,0,0}); - dimensions get_input_dimensions() const; + dimensions get_input_dimensions() const override; dimensions get_output_dimensions() const override; Index get_input_height() const; @@ -57,7 +57,7 @@ class FlattenLayer : public Layer void to_XML(XMLPrinter&) const override; - void print() const; + void print() const override; #ifdef OPENNN_CUDA #include "../../opennn_cuda/opennn_cuda/flatten_layer_cuda.h" @@ -77,7 +77,7 @@ struct FlattenLayerForwardPropagation : LayerForwardPropagation void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor outputs; }; @@ -87,11 +87,11 @@ struct FlattenLayerBackPropagation : LayerBackPropagation { FlattenLayerBackPropagation(const Index& = 0, Layer* = nullptr); - vector> get_input_derivative_pairs() const; + vector> get_input_derivative_pairs() const override; void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor input_derivatives; }; diff --git a/opennn/genetic_algorithm.cpp b/opennn/genetic_algorithm.cpp index dc04288aa..9627b25a3 100644 --- a/opennn/genetic_algorithm.cpp +++ b/opennn/genetic_algorithm.cpp @@ -577,7 +577,7 @@ void GeneticAlgorithm::perform_crossover() Index descendent_index = 0; - for(Index i = 0; i < parent_1_indices.size(); i++) + for(size_t i = 0; i < parent_1_indices.size(); i++) { parent_1_variables = population.chip(parent_1_indices[i], 0); diff --git a/opennn/levenberg_marquardt_algorithm.cpp b/opennn/levenberg_marquardt_algorithm.cpp index 127ccc50c..8691e82b0 100644 --- a/opennn/levenberg_marquardt_algorithm.cpp +++ b/opennn/levenberg_marquardt_algorithm.cpp @@ -264,11 +264,12 @@ TrainingResults LevenbergMarquardtAlgorithm::perform_training() loss_index->back_propagate_lm(training_batch, training_forward_propagation, training_back_propagation_lm); -/* + results.training_error_history(epoch) = training_back_propagation_lm.error(); if(has_selection) - { + { +/* neural_network->forward_propagate(selection_batch.get_input_pairs(), selection_forward_propagation, is_training); @@ -289,9 +290,12 @@ TrainingResults LevenbergMarquardtAlgorithm::perform_training() if(epoch != 0 && results.selection_error_history(epoch) > results.selection_error_history(epoch-1)) selection_failures++; - } */ + } + elapsed_time = get_elapsed_time(beginning_time); + if(epoch != 0) loss_decrease = old_loss - training_back_propagation_lm.loss; + old_loss = training_back_propagation_lm.loss; if(display && epoch%display_period == 0) { @@ -301,54 +305,37 @@ TrainingResults LevenbergMarquardtAlgorithm::perform_training() cout << "Elapsed time: " << write_time(elapsed_time) << endl; } + stop_training = true; + if(results.training_error_history(epoch) < training_loss_goal) { - stop_training = true; - - results.stopping_condition = StoppingCondition::LossGoal; - if(display) cout << "Epoch " << epoch << "\nLoss goal reached: " << results.training_error_history(epoch) << endl; + results.stopping_condition = StoppingCondition::LossGoal; } - - if(epoch != 0) loss_decrease = old_loss - training_back_propagation_lm.loss; - - if(loss_decrease < minimum_loss_decrease) + else if(loss_decrease < minimum_loss_decrease) { if(display) cout << "Epoch " << epoch << "\nMinimum loss decrease reached: " << loss_decrease << endl; - - stop_training = true; - results.stopping_condition = StoppingCondition::MinimumLossDecrease; } - - old_loss = training_back_propagation_lm.loss; - - if(selection_failures >= maximum_selection_failures) + else if(selection_failures >= maximum_selection_failures) { if(display) cout << "Epoch " << epoch << "Maximum selection failures reached: " << selection_failures << endl; - - stop_training = true; - results.stopping_condition = StoppingCondition::MaximumSelectionErrorIncreases; } - - if(epoch == maximum_epochs_number) + else if(epoch == maximum_epochs_number) { if(display) cout << "Epoch " << epoch << "\nMaximum epochs number reached: " << epoch << endl; - - stop_training = true; - results.stopping_condition = StoppingCondition::MaximumEpochsNumber; } - - if(elapsed_time >= maximum_time) + else if(elapsed_time >= maximum_time) { if(display) cout << "Epoch " << epoch << "Maximum training time reached: " << elapsed_time << endl; - - stop_training = true; - results.stopping_condition = StoppingCondition::MaximumTime; } + else + { + stop_training = false; + } if(stop_training) { @@ -534,6 +521,12 @@ void LevenbergMarquardtAlgorithm::from_XML(const XMLDocument& document) } +LevenbergMarquardtAlgorithmData::LevenbergMarquardtAlgorithmData(LevenbergMarquardtAlgorithm *new_Levenberg_Marquardt_method) +{ + set(new_Levenberg_Marquardt_method); +} + + void LevenbergMarquardtAlgorithmData::set(LevenbergMarquardtAlgorithm* new_Levenberg_Marquardt_method) { Levenberg_Marquardt_algorithm = new_Levenberg_Marquardt_method; diff --git a/opennn/levenberg_marquardt_algorithm.h b/opennn/levenberg_marquardt_algorithm.h index 235507074..9ed26b4fb 100644 --- a/opennn/levenberg_marquardt_algorithm.h +++ b/opennn/levenberg_marquardt_algorithm.h @@ -108,10 +108,7 @@ class LevenbergMarquardtAlgorithm : public OptimizationAlgorithm struct LevenbergMarquardtAlgorithmData : public OptimizationAlgorithmData { - LevenbergMarquardtAlgorithmData(LevenbergMarquardtAlgorithm* new_Levenberg_Marquardt_method = nullptr) - { - set(new_Levenberg_Marquardt_method); - } + LevenbergMarquardtAlgorithmData(LevenbergMarquardtAlgorithm* new_Levenberg_Marquardt_method = nullptr); void set(LevenbergMarquardtAlgorithm* = nullptr); @@ -133,7 +130,6 @@ struct LevenbergMarquardtAlgorithmData : public OptimizationAlgorithmData Index epoch = 0; }; - } #endif diff --git a/opennn/long_short_term_memory_layer.cpp b/opennn/long_short_term_memory_layer.cpp index de06f26c4..b17c202f1 100755 --- a/opennn/long_short_term_memory_layer.cpp +++ b/opennn/long_short_term_memory_layer.cpp @@ -41,10 +41,10 @@ dimensions LongShortTermMemoryLayer::get_output_dimensions() const Index LongShortTermMemoryLayer::get_parameters_number() const { - const Index inputs_number = get_input_dimensions()[0]; - const Index neurons_number = get_output_dimensions()[0]; + const Index inputs_number = get_inputs_number(); + const Index outputs_number = get_outputs_number(); - return 4 * neurons_number * (1 + inputs_number + neurons_number); + return 4 * outputs_number * (1 + inputs_number + outputs_number); } @@ -210,16 +210,16 @@ void LongShortTermMemoryLayer::set(const Index& new_inputs_number, const Index& void LongShortTermMemoryLayer::set_input_dimensions(const dimensions& new_input_dimensions) { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); const Index time_steps = get_timesteps(); - set(new_input_dimensions[0], neurons_number, time_steps); + set(new_input_dimensions[0], outputs_number, time_steps); } void LongShortTermMemoryLayer::set_output_dimensions(const dimensions& new_output_dimensions) { - const Index inputs_number = get_input_dimensions()[0]; + const Index inputs_number = get_inputs_number(); const Index time_steps = get_timesteps(); set(inputs_number, new_output_dimensions[0], time_steps); @@ -228,8 +228,8 @@ void LongShortTermMemoryLayer::set_output_dimensions(const dimensions& new_outpu void LongShortTermMemoryLayer::set_parameters(const Tensor& new_parameters, const Index& index) { - const Index neurons_number = get_output_dimensions()[0]; - const Index inputs_number = get_input_dimensions()[0]; + const Index outputs_number = get_outputs_number(); + const Index inputs_number = get_inputs_number(); Index current_index = index; @@ -239,7 +239,7 @@ void LongShortTermMemoryLayer::set_parameters(const Tensor& new_paramet { #pragma omp section { - Index size = neurons_number; + Index size = outputs_number; memcpy(forget_biases.data(), new_parameters_data + current_index, size * sizeof(type)); current_index += size; @@ -255,8 +255,8 @@ void LongShortTermMemoryLayer::set_parameters(const Tensor& new_paramet #pragma omp section { - Index size = inputs_number * neurons_number; - Index local_index = current_index + neurons_number * 4; + Index size = inputs_number * outputs_number; + Index local_index = current_index + outputs_number * 4; memcpy(forget_weights.data(), new_parameters_data + local_index, size * sizeof(type)); local_index += size; @@ -272,8 +272,8 @@ void LongShortTermMemoryLayer::set_parameters(const Tensor& new_paramet #pragma omp section { - Index size = neurons_number * neurons_number; - Index local_index = current_index + neurons_number * 4 + inputs_number * neurons_number * 4; // Skip bias and weights size + Index size = outputs_number * outputs_number; + Index local_index = current_index + outputs_number * 4 + inputs_number * outputs_number * 4; // Skip bias and weights size memcpy(forget_recurrent_weights.data(), new_parameters_data + local_index, size * sizeof(type)); local_index += size; @@ -1950,8 +1950,8 @@ void LongShortTermMemoryLayer::insert_gradient(unique_ptr& const Index& index, Tensor& gradient) const { - const Index inputs_number = get_input_dimensions()[0]; - const Index neurons_number = get_output_dimensions()[0]; + const Index inputs_number = get_inputs_number(); + const Index outputs_number = get_outputs_number(); LongShortTermMemoryLayerBackPropagation* long_short_term_memory_layer_back_propagation = static_cast(back_propagation.get()); @@ -1961,56 +1961,56 @@ void LongShortTermMemoryLayer::insert_gradient(unique_ptr& // Biases copy(long_short_term_memory_layer_back_propagation->forget_biases_derivatives.data(), - long_short_term_memory_layer_back_propagation->forget_biases_derivatives.data() + neurons_number, + long_short_term_memory_layer_back_propagation->forget_biases_derivatives.data() + outputs_number, gradient_data + index); copy(long_short_term_memory_layer_back_propagation->input_biases_derivatives.data(), - long_short_term_memory_layer_back_propagation->input_biases_derivatives.data() + neurons_number, - gradient_data + index + neurons_number); + long_short_term_memory_layer_back_propagation->input_biases_derivatives.data() + outputs_number, + gradient_data + index + outputs_number); copy(long_short_term_memory_layer_back_propagation->state_biases_derivatives.data(), - long_short_term_memory_layer_back_propagation->state_biases_derivatives.data() + neurons_number, - gradient_data + index + 2*neurons_number); + long_short_term_memory_layer_back_propagation->state_biases_derivatives.data() + outputs_number, + gradient_data + index + 2*outputs_number); copy(long_short_term_memory_layer_back_propagation->output_biases_derivatives.data(), - long_short_term_memory_layer_back_propagation->output_biases_derivatives.data() + neurons_number, - gradient_data + index + 3*neurons_number); + long_short_term_memory_layer_back_propagation->output_biases_derivatives.data() + outputs_number, + gradient_data + index + 3*outputs_number); // Weights copy(long_short_term_memory_layer_back_propagation->forget_weights_derivatives.data(), - long_short_term_memory_layer_back_propagation->forget_weights_derivatives.data() + inputs_number*neurons_number, - gradient_data + index + 4*neurons_number); + long_short_term_memory_layer_back_propagation->forget_weights_derivatives.data() + inputs_number*outputs_number, + gradient_data + index + 4*outputs_number); copy(long_short_term_memory_layer_back_propagation->input_weights_derivatives.data(), - long_short_term_memory_layer_back_propagation->input_weights_derivatives.data() + inputs_number*neurons_number, - gradient_data + index + 4*neurons_number + inputs_number*neurons_number); + long_short_term_memory_layer_back_propagation->input_weights_derivatives.data() + inputs_number*outputs_number, + gradient_data + index + 4*outputs_number + inputs_number*outputs_number); copy(long_short_term_memory_layer_back_propagation->state_weights_derivatives.data(), - long_short_term_memory_layer_back_propagation->state_weights_derivatives.data() + inputs_number*neurons_number, - gradient_data + index + 4*neurons_number + 2*inputs_number*neurons_number); + long_short_term_memory_layer_back_propagation->state_weights_derivatives.data() + inputs_number*outputs_number, + gradient_data + index + 4*outputs_number + 2*inputs_number*outputs_number); copy(long_short_term_memory_layer_back_propagation->output_weights_derivatives.data(), - long_short_term_memory_layer_back_propagation->output_weights_derivatives.data() + inputs_number*neurons_number, - gradient_data + index + 4*neurons_number + 3*inputs_number*neurons_number); + long_short_term_memory_layer_back_propagation->output_weights_derivatives.data() + inputs_number*outputs_number, + gradient_data + index + 4*outputs_number + 3*inputs_number*outputs_number); // Recurrent weights copy(long_short_term_memory_layer_back_propagation->forget_recurrent_weights_derivatives.data(), - long_short_term_memory_layer_back_propagation->forget_recurrent_weights_derivatives.data() + neurons_number*neurons_number, - gradient_data + index + 4*neurons_number + 4*inputs_number*neurons_number); + long_short_term_memory_layer_back_propagation->forget_recurrent_weights_derivatives.data() + outputs_number*outputs_number, + gradient_data + index + 4*outputs_number + 4*inputs_number*outputs_number); copy(long_short_term_memory_layer_back_propagation->input_recurrent_weights_derivatives.data(), - long_short_term_memory_layer_back_propagation->input_recurrent_weights_derivatives.data() + neurons_number*neurons_number, - gradient_data + index + 4*neurons_number + 4*inputs_number*neurons_number + neurons_number*neurons_number); + long_short_term_memory_layer_back_propagation->input_recurrent_weights_derivatives.data() + outputs_number*outputs_number, + gradient_data + index + 4*outputs_number + 4*inputs_number*outputs_number + outputs_number*outputs_number); copy(long_short_term_memory_layer_back_propagation->state_recurrent_weights_derivatives.data(), - long_short_term_memory_layer_back_propagation->state_recurrent_weights_derivatives.data() + neurons_number*neurons_number, - gradient_data + index + 4*neurons_number + 4*inputs_number*neurons_number + 2*neurons_number*neurons_number); + long_short_term_memory_layer_back_propagation->state_recurrent_weights_derivatives.data() + outputs_number*outputs_number, + gradient_data + index + 4*outputs_number + 4*inputs_number*outputs_number + 2*outputs_number*outputs_number); copy(long_short_term_memory_layer_back_propagation->output_recurrent_weights_derivatives.data(), - long_short_term_memory_layer_back_propagation->output_recurrent_weights_derivatives.data() + neurons_number*neurons_number, - gradient_data + index + 4*neurons_number + 4*inputs_number*neurons_number + 3*neurons_number*neurons_number); + long_short_term_memory_layer_back_propagation->output_recurrent_weights_derivatives.data() + outputs_number*outputs_number, + gradient_data + index + 4*outputs_number + 4*inputs_number*outputs_number + 3*outputs_number*outputs_number); } diff --git a/opennn/long_short_term_memory_layer.h b/opennn/long_short_term_memory_layer.h index 12e1f642b..84bd674b2 100755 --- a/opennn/long_short_term_memory_layer.h +++ b/opennn/long_short_term_memory_layer.h @@ -176,7 +176,7 @@ struct LongShortTermMemoryLayerForwardPropagation : LayerForwardPropagation void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor current_inputs; @@ -219,13 +219,13 @@ struct LongShortTermMemoryLayerBackPropagation : LayerBackPropagation LongShortTermMemoryLayerBackPropagation(const Index& = 0, Layer* = nullptr); - vector> get_input_derivative_pairs() const; + vector> get_input_derivative_pairs() const override; void set(const Index& = 0, Layer* = nullptr) override; void set_derivatives_zero(); - void print() const; + void print() const override; Tensor current_deltas; diff --git a/opennn/loss_index.cpp b/opennn/loss_index.cpp index 5f19d77fc..281c3e7de 100644 --- a/opennn/loss_index.cpp +++ b/opennn/loss_index.cpp @@ -269,29 +269,27 @@ void LossIndex::calculate_layers_squared_errors_jacobian_lm(const Batch& batch, = back_propagation_lm.get_layer_delta_pairs(); calculate_output_delta_lm(batch, forward_propagation, back_propagation_lm); -/* + for(Index i = last_trainable_layer_index; i >= first_trainable_layer_index; i--) layers[i]->back_propagate_lm(layer_input_pairs[i], layer_delta_pairs[i], forward_propagation.layers[i], back_propagation_lm.neural_network.layers[i]); - const vector layer_parameter_numbers - = neural_network->get_layer_parameter_numbers(); + const vector layer_parameter_numbers = neural_network->get_layer_parameter_numbers(); const Index batch_samples_number = batch.get_batch_samples_number(); Index index = 0; - for(Index i = 0; i < last_trainable_layer_index - first_trainable_layer_index; i++) + for(Index i = 0; i < layers_number; i++) { layers[i]->insert_squared_errors_Jacobian_lm(back_propagation_lm.neural_network.layers[i], index, back_propagation_lm.squared_errors_jacobian); - + index += layer_parameter_numbers[i] * batch_samples_number; } -*/ } diff --git a/opennn/mean_squared_error.cpp b/opennn/mean_squared_error.cpp index b8d67aaf5..7b1dcd033 100644 --- a/opennn/mean_squared_error.cpp +++ b/opennn/mean_squared_error.cpp @@ -142,6 +142,7 @@ void MeanSquaredError::calculate_error_gradient_lm(const Batch& batch, void MeanSquaredError::calculate_error_hessian_lm(const Batch& batch, BackPropagationLM& back_propagation_lm) const { + const Index outputs_number = neural_network->get_outputs_number(); const Index batch_samples_number = outputs_number * batch.get_batch_samples_number(); diff --git a/opennn/mean_squared_error.h b/opennn/mean_squared_error.h index a28edd96d..a12ec9cca 100644 --- a/opennn/mean_squared_error.h +++ b/opennn/mean_squared_error.h @@ -40,10 +40,10 @@ class MeanSquaredError : public LossIndex BackPropagationLM&) const override; void calculate_error_gradient_lm(const Batch&, - BackPropagationLM&) const override; + BackPropagationLM&) const override; void calculate_error_hessian_lm(const Batch&, - BackPropagationLM&) const override; + BackPropagationLM&) const override; // Serialization diff --git a/opennn/multihead_attention_layer.h b/opennn/multihead_attention_layer.h index 343d8e601..26f408b7c 100644 --- a/opennn/multihead_attention_layer.h +++ b/opennn/multihead_attention_layer.h @@ -150,7 +150,7 @@ struct MultiheadAttentionLayerForwardPropagation : LayerForwardPropagation void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor query; Tensor key; @@ -172,11 +172,11 @@ struct MultiheadAttentionLayerBackPropagation : LayerBackPropagation MultiheadAttentionLayerBackPropagation(const Index& = 0, Layer* = nullptr); - vector> get_input_derivative_pairs() const; + vector> get_input_derivative_pairs() const override; void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor error_attention_scores_derivatives; Tensor error_attention_weights_derivatives; diff --git a/opennn/neural_network.cpp b/opennn/neural_network.cpp index 8d0ff7cb2..f2ceacc7d 100644 --- a/opennn/neural_network.cpp +++ b/opennn/neural_network.cpp @@ -210,9 +210,9 @@ vector> NeuralNetwork::get_layer_output_indices() const for (Index i = 0; i < layers_number; i++) { - for (Index k = 0; k < Index(layer_input_indices[i].size()); k++) + for (size_t j = 0; j < layer_input_indices[i].size(); j++) { - const Index input_index = layer_input_indices[i][k]; + const Index input_index = layer_input_indices[i][j]; if (input_index != -1) layer_output_indices[input_index].push_back(i); diff --git a/opennn/normalized_squared_error.h b/opennn/normalized_squared_error.h index c7ee12203..5217fdd32 100644 --- a/opennn/normalized_squared_error.h +++ b/opennn/normalized_squared_error.h @@ -24,7 +24,7 @@ class NormalizedSquaredError : public LossIndex type get_normalization_coefficient() const; type get_selection_normalization_coefficient() const; - void set_normalization_coefficient(); + void set_normalization_coefficient() override; // void set_normalization_coefficient(const type&); void set_time_series_normalization_coefficient(); diff --git a/opennn/perceptron_layer.cpp b/opennn/perceptron_layer.cpp index ba57f0069..33a094e13 100644 --- a/opennn/perceptron_layer.cpp +++ b/opennn/perceptron_layer.cpp @@ -142,17 +142,17 @@ void PerceptronLayer::set(const dimensions& new_input_dimensions, void PerceptronLayer::set_input_dimensions(const dimensions& new_input_dimensions) { const Index inputs_number = new_input_dimensions[0]; - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); - biases.resize(neurons_number); + biases.resize(outputs_number); - synaptic_weights.resize(inputs_number, neurons_number); + synaptic_weights.resize(inputs_number, outputs_number); } void PerceptronLayer::set_output_dimensions(const dimensions& new_output_dimensions) { - const Index inputs_number = get_input_dimensions()[0]; + const Index inputs_number = get_inputs_number(); const Index neurons_number = new_output_dimensions[0]; biases.resize(neurons_number); @@ -362,8 +362,8 @@ void PerceptronLayer::back_propagate_lm(const vector>& i const TensorMap> inputs = tensor_map_2(input_pairs[0]); const TensorMap> deltas = tensor_map_2(delta_pairs[0]); - const Index inputs_number = get_input_dimensions()[0]; - const Index neurons_number = get_output_dimensions()[0]; + const Index inputs_number = get_inputs_number(); + const Index outputs_number = get_outputs_number(); const Index synaptic_weights_number = synaptic_weights.size(); @@ -394,7 +394,7 @@ void PerceptronLayer::back_propagate_lm(const vector>& i Index synaptic_weight_index = 0; - for(Index neuron_index = 0; neuron_index < neurons_number; neuron_index++) + for(Index neuron_index = 0; neuron_index < outputs_number; neuron_index++) { const TensorMap> combinations_derivatives_neuron = tensor_map(combinations_derivatives, neuron_index); @@ -460,7 +460,7 @@ void PerceptronLayer::insert_squared_errors_Jacobian_lm(unique_ptr& squared_errors_Jacobian) const { - const Index layer_parameters_number = get_parameters_number(); + const Index parameters_number = get_parameters_number(); const Index batch_samples_number = back_propagation->batch_samples_number; PerceptronLayerBackPropagationLM* perceptron_layer_back_propagation_lm = @@ -470,7 +470,11 @@ void PerceptronLayer::insert_squared_errors_Jacobian_lm(unique_ptr outputs; @@ -146,11 +146,11 @@ struct PerceptronLayerBackPropagation : LayerBackPropagation { PerceptronLayerBackPropagation(const Index& = 0, Layer* = nullptr); - vector> get_input_derivative_pairs() const; + vector> get_input_derivative_pairs() const override; void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor combinations_derivatives; Tensor input_derivatives; @@ -164,11 +164,11 @@ struct PerceptronLayerBackPropagationLM : LayerBackPropagationLM { PerceptronLayerBackPropagationLM(const Index& = 0, Layer* = nullptr); - vector> get_input_derivative_pairs() const; + vector> get_input_derivative_pairs() const override; void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor combinations_derivatives; Tensor input_derivatives; diff --git a/opennn/perceptron_layer_3d.h b/opennn/perceptron_layer_3d.h index c5c82d5dc..d1d3dc3af 100644 --- a/opennn/perceptron_layer_3d.h +++ b/opennn/perceptron_layer_3d.h @@ -133,7 +133,7 @@ struct PerceptronLayer3DForwardPropagation : LayerForwardPropagation void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor outputs; @@ -145,11 +145,11 @@ struct PerceptronLayer3DBackPropagation : LayerBackPropagation { PerceptronLayer3DBackPropagation(const Index& = 0, Layer* = 0); - vector> get_input_derivative_pairs() const; + vector> get_input_derivative_pairs() const override; void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor biases_derivatives; Tensor synaptic_weights_derivatives; diff --git a/opennn/pooling_layer.h b/opennn/pooling_layer.h index 55081e664..c462e90aa 100644 --- a/opennn/pooling_layer.h +++ b/opennn/pooling_layer.h @@ -35,8 +35,8 @@ class PoolingLayer : public Layer const PoolingMethod& = PoolingMethod::MaxPooling, const string = "pooling_layer"); - dimensions get_input_dimensions() const; - dimensions get_output_dimensions() const; + dimensions get_input_dimensions() const override; + dimensions get_output_dimensions() const override; Index get_input_height() const; Index get_input_width() const; @@ -66,7 +66,7 @@ class PoolingLayer : public Layer const PoolingMethod& = PoolingMethod::MaxPooling, const string = "pooling_layer"); - void set_input_dimensions(const dimensions&); + void set_input_dimensions(const dimensions&) override; void set_padding_height(const Index&); void set_padding_width(const Index&); @@ -108,7 +108,7 @@ class PoolingLayer : public Layer void from_XML(const XMLDocument&) override; void to_XML(XMLPrinter&) const override; - void print() const; + void print() const override; #ifdef OPENNN_CUDA #include "../../opennn_cuda/opennn_cuda/pooling_layer_cuda.h" @@ -145,7 +145,7 @@ struct PoolingLayerForwardPropagation : LayerForwardPropagation void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor outputs; @@ -159,11 +159,11 @@ struct PoolingLayerBackPropagation : LayerBackPropagation { PoolingLayerBackPropagation(const Index& = 0, Layer* = nullptr); - vector> get_input_derivative_pairs() const; + vector> get_input_derivative_pairs() const override; void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor deltas_by_pool_size; diff --git a/opennn/probabilistic_layer.cpp b/opennn/probabilistic_layer.cpp index f387afbe1..8073b8314 100644 --- a/opennn/probabilistic_layer.cpp +++ b/opennn/probabilistic_layer.cpp @@ -200,7 +200,7 @@ void ProbabilisticLayer::forward_propagate(const vector> unique_ptr& forward_propagation, const bool& is_training) { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); const TensorMap> inputs = tensor_map_2(input_pairs[0]); @@ -211,17 +211,17 @@ void ProbabilisticLayer::forward_propagate(const vector> calculate_combinations(inputs, outputs); - if (neurons_number == 1 && !is_training) + if (outputs_number == 1 && !is_training) { logistic(outputs, empty); } - else if (neurons_number == 1 && is_training) + else if (outputs_number == 1 && is_training) { Tensor& activation_derivatives = probabilistic_layer_forward_propagation->activation_derivatives; logistic(outputs, activation_derivatives); } - else if (neurons_number > 1) + else if (outputs_number > 1) { softmax(outputs); } @@ -237,7 +237,7 @@ void ProbabilisticLayer::back_propagate(const vector>& i unique_ptr& forward_propagation, unique_ptr& back_propagation) const { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); const TensorMap> inputs = tensor_map_2(input_pairs[0]); const TensorMap> deltas = tensor_map_2(delta_pairs[0]); @@ -247,8 +247,6 @@ void ProbabilisticLayer::back_propagate(const vector>& i ProbabilisticLayerForwardPropagation* probabilistic_layer_forward_propagation = static_cast(forward_propagation.get()); - const Tensor& outputs = probabilistic_layer_forward_propagation->outputs; - // Back propagation ProbabilisticLayerBackPropagation* probabilistic_layer_back_propagation = @@ -258,7 +256,7 @@ void ProbabilisticLayer::back_propagate(const vector>& i Tensor& combinations_derivatives = probabilistic_layer_back_propagation->combinations_derivatives; - if(neurons_number == 1) + if(outputs_number == 1) { const Tensor& activation_derivatives = probabilistic_layer_forward_propagation->activation_derivatives; @@ -266,8 +264,6 @@ void ProbabilisticLayer::back_propagate(const vector>& i } else { - const Tensor& targets = probabilistic_layer_back_propagation->targets; - combinations_derivatives.device(*thread_pool_device) = deltas; } @@ -422,10 +418,10 @@ string ProbabilisticLayer::write_combinations(const vector& input_names) { ostringstream buffer; - const Index inputs_number = get_input_dimensions()[0]; - const Index neurons_number = get_output_dimensions()[0]; + const Index inputs_number = get_inputs_number(); + const Index outputs_number = get_outputs_number(); - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) { buffer << "probabilistic_layer_combinations_" << to_string(i) << " = " << biases(i); @@ -445,9 +441,9 @@ string ProbabilisticLayer::write_activations(const vector& output_names) { ostringstream buffer; - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) { switch(activation_function) { @@ -474,17 +470,17 @@ string ProbabilisticLayer::write_activations(const vector& output_names) { buffer << "sum = "; - for (Index i = 0; i < neurons_number; i++) + for (Index i = 0; i < outputs_number; i++) { buffer << "exp(probabilistic_layer_combinations_" << to_string(i) << ")"; - if (i != neurons_number - 1) + if (i != outputs_number - 1) buffer << " + "; } buffer << ";\n" << endl; - for (Index i = 0; i < neurons_number; i++) + for (Index i = 0; i < outputs_number; i++) buffer << output_names[i] << " = exp(probabilistic_layer_combinations_" << to_string(i) << ")/sum;\n"; } break; diff --git a/opennn/probabilistic_layer.h b/opennn/probabilistic_layer.h index 442735ad2..301ed2bb1 100644 --- a/opennn/probabilistic_layer.h +++ b/opennn/probabilistic_layer.h @@ -28,7 +28,7 @@ struct ProbabilisticLayerForwardPropagation : LayerForwardPropagation void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor outputs; Tensor activation_derivatives; @@ -39,11 +39,11 @@ struct ProbabilisticLayerBackPropagation : LayerBackPropagation { ProbabilisticLayerBackPropagation(const Index& = 0, Layer* = nullptr); - vector> get_input_derivative_pairs() const; + vector> get_input_derivative_pairs() const override; void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor targets; @@ -64,11 +64,11 @@ struct ProbabilisticLayerBackPropagationLM : LayerBackPropagationLM ProbabilisticLayerBackPropagationLM(const Index& new_batch_samples_number = 0, Layer* new_layer = nullptr); - vector> get_input_derivative_pairs() const; + vector> get_input_derivative_pairs() const override; void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor deltas_row; @@ -156,7 +156,7 @@ class ProbabilisticLayer : public Layer void from_XML(const XMLDocument&) override; void to_XML(XMLPrinter&) const override; - void print() const; + void print() const override; private: diff --git a/opennn/probabilistic_layer_3d.h b/opennn/probabilistic_layer_3d.h index 17eb7dde8..6091b8e22 100644 --- a/opennn/probabilistic_layer_3d.h +++ b/opennn/probabilistic_layer_3d.h @@ -139,7 +139,7 @@ struct ProbabilisticLayer3DForwardPropagation : LayerForwardPropagation void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor outputs; }; @@ -149,11 +149,11 @@ struct ProbabilisticLayer3DBackPropagation : LayerBackPropagation { ProbabilisticLayer3DBackPropagation(const Index& = 0, Layer* = nullptr); - vector> get_input_derivative_pairs() const; + vector> get_input_derivative_pairs() const override; void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor targets; Tensor mask; diff --git a/opennn/quasi_newton_method.cpp b/opennn/quasi_newton_method.cpp index fdda15d83..d3f1293e0 100644 --- a/opennn/quasi_newton_method.cpp +++ b/opennn/quasi_newton_method.cpp @@ -10,8 +10,6 @@ #include "forward_propagation.h" #include "back_propagation.h" #include "tensors.h" -#include "scaling_layer_2d.h" -#include "unscaling_layer.h" namespace opennn { @@ -534,38 +532,32 @@ TrainingResults QuasiNewtonMethod::perform_training() old_loss = training_back_propagation.loss; + stop_training = true; + if(results.training_error_history(epoch) < training_loss_goal) { - stop_training = true; - results.stopping_condition = OptimizationAlgorithm::StoppingCondition::LossGoal; - if(display) cout << "Epoch " << epoch << "\nLoss goal reached: " << results.training_error_history(epoch) << endl; } else if(selection_failures >= maximum_selection_failures) { if(display) cout << "Epoch " << epoch << "\nMaximum selection failures reached: " << selection_failures << endl; - - stop_training = true; - results.stopping_condition = OptimizationAlgorithm::StoppingCondition::MaximumSelectionErrorIncreases; } else if(epoch == maximum_epochs_number) { if(display) cout << "Epoch " << epoch << "\nMaximum epochs number reached: " << epoch << endl; - - stop_training = true; - results.stopping_condition = OptimizationAlgorithm::StoppingCondition::MaximumEpochsNumber; } else if(elapsed_time >= maximum_time) { if(display) cout << "Epoch " << epoch << "\nMaximum training time reached: " << write_time(elapsed_time) << endl; - - stop_training = true; - results.stopping_condition = OptimizationAlgorithm::StoppingCondition::MaximumTime; } + else + { + stop_training = true; + } if(stop_training) { @@ -580,8 +572,6 @@ TrainingResults QuasiNewtonMethod::perform_training() } if(epoch != 0 && epoch % save_period == 0) neural_network->save(neural_network_file_name); - - if(stop_training) break; } set_unscaling(); diff --git a/opennn/recurrent_layer.cpp b/opennn/recurrent_layer.cpp index 491ef0097..02be91277 100644 --- a/opennn/recurrent_layer.cpp +++ b/opennn/recurrent_layer.cpp @@ -133,15 +133,15 @@ void RecurrentLayer::set(const dimensions& new_input_dimensions, const dimension void RecurrentLayer::set_input_dimensions(const dimensions& new_input_dimensions) { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); - input_weights.resize(new_input_dimensions[0], neurons_number); + input_weights.resize(new_input_dimensions[0], outputs_number); } void RecurrentLayer::set_output_dimensions(const dimensions& new_output_dimensions) { - const Index inputs_number = get_input_dimensions()[0]; + const Index inputs_number = get_inputs_number(); biases.resize(new_output_dimensions[0]); @@ -290,9 +290,9 @@ void RecurrentLayer::forward_propagate(const vector>& in Tensor& current_activations_derivatives = recurrent_layer_forward_propagation->current_activations_derivatives; - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); - Tensor current_hidden_states(batch_size, neurons_number); + Tensor current_hidden_states(batch_size, outputs_number); current_hidden_states.setZero(); for (Index time_step = 0; time_step < time_steps; time_step++) @@ -323,8 +323,8 @@ void RecurrentLayer::back_propagate(const vector>& input unique_ptr& back_propagation) const { const Index samples_number = input_pairs[0].second[0]; - const Index neurons_number = get_output_dimensions()[0]; - const Index inputs_number = get_input_dimensions()[0]; + const Index outputs_number = get_outputs_number(); + const Index inputs_number = get_inputs_number(); RecurrentLayerForwardPropagation* recurrent_layer_forward_propagation = static_cast(forward_propagation.get()); diff --git a/opennn/recurrent_layer.h b/opennn/recurrent_layer.h index d92202ddf..fd3db726c 100644 --- a/opennn/recurrent_layer.h +++ b/opennn/recurrent_layer.h @@ -119,7 +119,7 @@ struct RecurrentLayerForwardPropagation : LayerForwardPropagation void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor outputs; @@ -134,11 +134,11 @@ struct RecurrentLayerBackPropagation : LayerBackPropagation { RecurrentLayerBackPropagation(const Index& = 0, Layer* = nullptr); - vector> get_input_derivative_pairs() const; + vector> get_input_derivative_pairs() const override; void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; //Tensor current_deltas; diff --git a/opennn/scaling_layer_2d.cpp b/opennn/scaling_layer_2d.cpp index 8156f9db6..1670956d0 100644 --- a/opennn/scaling_layer_2d.cpp +++ b/opennn/scaling_layer_2d.cpp @@ -45,12 +45,12 @@ Descriptives ScalingLayer2D::get_descriptives(const Index& index) const Tensor ScalingLayer2D::get_minimums() const { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); - Tensor minimums(neurons_number); + Tensor minimums(outputs_number); #pragma omp parallel for - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) minimums[i] = descriptives[i].minimum; return minimums; @@ -59,12 +59,12 @@ Tensor ScalingLayer2D::get_minimums() const Tensor ScalingLayer2D::get_maximums() const { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); - Tensor maximums(neurons_number); + Tensor maximums(outputs_number); #pragma omp parallel for - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) maximums[i] = descriptives[i].maximum; return maximums; @@ -73,12 +73,12 @@ Tensor ScalingLayer2D::get_maximums() const Tensor ScalingLayer2D::get_means() const { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); - Tensor means(neurons_number); + Tensor means(outputs_number); #pragma omp parallel for - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) means[i] = descriptives[i].mean; return means; @@ -87,12 +87,12 @@ Tensor ScalingLayer2D::get_means() const Tensor ScalingLayer2D::get_standard_deviations() const { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); - Tensor standard_deviations(neurons_number); + Tensor standard_deviations(outputs_number); #pragma omp parallel for - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) standard_deviations[i] = descriptives[i].standard_deviation; return standard_deviations; @@ -107,12 +107,12 @@ vector ScalingLayer2D::get_scaling_methods() const vector ScalingLayer2D::write_scalers() const { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); - vector scaling_methods_strings(neurons_number); + vector scaling_methods_strings(outputs_number); #pragma omp parallel for - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) if(scalers[i] == Scaler::None) scaling_methods_strings[i] = "None"; else if(scalers[i] == Scaler::MinimumMaximum) @@ -132,12 +132,12 @@ vector ScalingLayer2D::write_scalers() const vector ScalingLayer2D::write_scalers_text() const { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); - vector scaling_methods_strings(neurons_number); + vector scaling_methods_strings(outputs_number); #pragma omp parallel for - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) if(scalers[i] == Scaler::None) scaling_methods_strings[i] = "no scaling"; else if(scalers[i] == Scaler::MeanStandardDeviation) @@ -242,12 +242,12 @@ void ScalingLayer2D::set_scalers(const vector& new_scaling_methods) void ScalingLayer2D::set_scalers(const vector& new_scaling_methods_string) { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); - vector new_scaling_methods(neurons_number); + vector new_scaling_methods(outputs_number); #pragma omp parallel for - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) if(new_scaling_methods_string[i] == "None") new_scaling_methods[i] = Scaler::None; else if(new_scaling_methods_string[i] == "MinimumMaximum") @@ -290,20 +290,20 @@ void ScalingLayer2D::set_scaler(const Index& variable_index, const string& new_s void ScalingLayer2D::set_scalers(const string& new_scaling_methods_string) { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); #pragma omp parallel for - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) set_scaler(i, new_scaling_methods_string); } void ScalingLayer2D::set_scalers(const Scaler& new_scaling_method) { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); #pragma omp parallel for - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) scalers[i] = new_scaling_method; } @@ -318,7 +318,7 @@ void ScalingLayer2D::forward_propagate(const vector>& in unique_ptr& forward_propagation, const bool& is_training) { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); ScalingLayer2DForwardPropagation* scaling_layer_forward_propagation = static_cast(forward_propagation.get()); @@ -327,7 +327,7 @@ void ScalingLayer2D::forward_propagate(const vector>& in Tensor& outputs = scaling_layer_forward_propagation->outputs; - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) { const Scaler& scaler = scalers[i]; @@ -424,7 +424,7 @@ string ScalingLayer2D::write_minimum_maximum_expression(const vector& in string ScalingLayer2D::write_mean_standard_deviation_expression(const vector& input_names, const vector& output_names) const { - const Index inputs_number = get_input_dimensions()[0]; + const Index inputs_number = get_inputs_number(); ostringstream buffer; @@ -505,7 +505,7 @@ void ScalingLayer2D::print() const { cout << "Scaling layer" << endl; - const Index inputs_number = get_input_dimensions()[0]; + const Index inputs_number = get_inputs_number(); const vector scalers_text = write_scalers_text(); @@ -526,10 +526,10 @@ void ScalingLayer2D::to_XML(XMLPrinter& printer) const add_xml_element(printer, "Name", name); add_xml_element(printer, "NeuronsNumber", to_string(get_output_dimensions()[0])); - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); const vector scaling_methods_string = write_scalers(); - for (Index i = 0; i < neurons_number; i++) + for (Index i = 0; i < outputs_number; i++) { printer.OpenElement("ScalingNeuron"); printer.PushAttribute("Index", int(i + 1)); diff --git a/opennn/scaling_layer_2d.h b/opennn/scaling_layer_2d.h index 35e9097a5..bfeb366df 100644 --- a/opennn/scaling_layer_2d.h +++ b/opennn/scaling_layer_2d.h @@ -22,8 +22,8 @@ class ScalingLayer2D : public Layer ScalingLayer2D(const dimensions& = {0}); - dimensions get_input_dimensions() const; - dimensions get_output_dimensions() const; + dimensions get_input_dimensions() const override; + dimensions get_output_dimensions() const override; vector get_descriptives() const; Descriptives get_descriptives(const Index&) const; @@ -77,7 +77,7 @@ class ScalingLayer2D : public Layer string get_expression(const vector& = vector(), const vector& = vector()) const override; - void print() const; + void print() const override; void from_XML(const XMLDocument&) override; void to_XML(XMLPrinter&) const override; @@ -101,7 +101,7 @@ struct ScalingLayer2DForwardPropagation : LayerForwardPropagation void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor outputs; }; diff --git a/opennn/scaling_layer_4d.h b/opennn/scaling_layer_4d.h index 1dab1593a..841b7c935 100644 --- a/opennn/scaling_layer_4d.h +++ b/opennn/scaling_layer_4d.h @@ -21,8 +21,8 @@ class ScalingLayer4D : public Layer ScalingLayer4D(const dimensions& = {0, 0, 0, 0}); - dimensions get_input_dimensions() const; - dimensions get_output_dimensions() const; + dimensions get_input_dimensions() const override; + dimensions get_output_dimensions() const override; void set(const dimensions& = { 0, 0, 0, 0 }); @@ -34,7 +34,7 @@ class ScalingLayer4D : public Layer unique_ptr&, const bool&) override; - void print() const; + void print() const override; void from_XML(const XMLDocument&) override; void to_XML(XMLPrinter&) const override; @@ -57,7 +57,7 @@ struct ScalingLayer4DForwardPropagation : LayerForwardPropagation void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor outputs; }; diff --git a/opennn/transformer.cpp b/opennn/transformer.cpp index d56ca5d6f..59b3d9447 100644 --- a/opennn/transformer.cpp +++ b/opennn/transformer.cpp @@ -14,6 +14,7 @@ #include "addition_layer_3d.h" #include "perceptron_layer_3d.h" #include "probabilistic_layer_3d.h" +#include "forward_propagation.h" //#include "strings_utilities.h" namespace opennn @@ -371,6 +372,7 @@ string Transformer::calculate_outputs(const string& context_string) } + Tensor Transformer::calculate_outputs(const Tensor& input, const Tensor& context) { const pair input_pair((type*)input.data(), { input.dimension(0), input.dimension(1) }); @@ -544,59 +546,59 @@ void Transformer::load_transformer(const string& path) } -void TransformerForwardPropagation::set(const Index& new_batch_samples, NeuralNetwork* new_neural_network) -{ - Transformer* neural_network = static_cast(new_neural_network); +// void TransformerForwardPropagation::set(const Index& new_batch_samples, NeuralNetwork* new_neural_network) +// { +// Transformer* neural_network = static_cast(new_neural_network); - batch_samples_number = new_batch_samples; +// batch_samples_number = new_batch_samples; - const vector>& neural_network_layers = neural_network->get_layers(); +// const vector>& neural_network_layers = neural_network->get_layers(); - const Index layers_number = layers.size(); +// const Index layers_number = layers.size(); - layers.resize(layers_number); +// layers.resize(layers_number); - for(Index i = 0; i < layers_number; i++) - { - switch (neural_network_layers[i]->get_type()) - { - case Layer::Type::Embedding: - layers[i] = make_unique(batch_samples_number, neural_network_layers[i].get()); - break; +// for(Index i = 0; i < layers_number; i++) +// { +// switch (neural_network_layers[i]->get_type()) +// { +// case Layer::Type::Embedding: +// layers[i] = make_unique(batch_samples_number, neural_network_layers[i].get()); +// break; - case Layer::Type::MultiheadAttention: - layers[i] = make_unique < MultiheadAttentionLayerForwardPropagation>(batch_samples_number, neural_network_layers[i].get()); - break; +// case Layer::Type::MultiheadAttention: +// layers[i] = make_unique < MultiheadAttentionLayerForwardPropagation>(batch_samples_number, neural_network_layers[i].get()); +// break; - case Layer::Type::Perceptron3D: - layers[i] = make_unique < PerceptronLayer3DForwardPropagation>(batch_samples_number, neural_network_layers[i].get()); - break; +// case Layer::Type::Perceptron3D: +// layers[i] = make_unique < PerceptronLayer3DForwardPropagation>(batch_samples_number, neural_network_layers[i].get()); +// break; - case Layer::Type::Probabilistic3D: - layers[i] = make_unique < ProbabilisticLayer3DForwardPropagation>(batch_samples_number, neural_network_layers[i].get()); - break; +// case Layer::Type::Probabilistic3D: +// layers[i] = make_unique < ProbabilisticLayer3DForwardPropagation>(batch_samples_number, neural_network_layers[i].get()); +// break; - default: break; - } - } -} +// default: break; +// } +// } +// } -void TransformerForwardPropagation::print() const -{ - cout << "Transformer forward propagation" << endl; +// void TransformerForwardPropagation::print() const +// { +// cout << "Transformer forward propagation" << endl; - const Index layers_number = layers.size(); +// const Index layers_number = layers.size(); - cout << "Layers number: " << layers_number << endl; +// cout << "Layers number: " << layers_number << endl; - for(Index i = 0; i < layers_number; i++) - { - cout << "Layer " << i + 1 << ": " << layers[i]->layer->get_name() << endl; +// for(Index i = 0; i < layers_number; i++) +// { +// cout << "Layer " << i + 1 << ": " << layers[i]->layer->get_name() << endl; - layers[i]->print(); - } -} +// layers[i]->print(); +// } +// } }; diff --git a/opennn/transformer.h b/opennn/transformer.h index fd472f974..81fefd20a 100644 --- a/opennn/transformer.h +++ b/opennn/transformer.h @@ -10,13 +10,13 @@ #define TRANSFORMER_H #include "neural_network.h" -#include "forward_propagation.h" +//#include "forward_propagation.h" namespace opennn { -struct TransformerForwardPropagation; -struct TransformerBackPropagation; +//struct TransformerForwardPropagation; +//struct TransformerBackPropagation; class Transformer : public NeuralNetwork { @@ -86,25 +86,25 @@ class Transformer : public NeuralNetwork }; -struct TransformerForwardPropagation : ForwardPropagation -{ - // Constructors +// struct TransformerForwardPropagation : ForwardPropagation +// { +// // Constructors - TransformerForwardPropagation() {} +// TransformerForwardPropagation() {} - TransformerForwardPropagation(const Index& new_batch_samples, NeuralNetwork* new_neural_network) - { - set(new_batch_samples, new_neural_network); - } +// TransformerForwardPropagation(const Index& new_batch_samples, NeuralNetwork* new_neural_network) +// { +// set(new_batch_samples, new_neural_network); +// } - void set(const Index& new_batch_samples, NeuralNetwork* new_neural_network); +// void set(const Index& new_batch_samples, NeuralNetwork* new_neural_network); - void print() const; +// void print() const; - Index batch_samples_number = 0; +// Index batch_samples_number = 0; - Tensor, 1> layers; -}; +// Tensor, 1> layers; +// }; }; #endif // TRANSFORMER_H diff --git a/opennn/unscaling_layer.cpp b/opennn/unscaling_layer.cpp index 198548a69..d33d0267c 100644 --- a/opennn/unscaling_layer.cpp +++ b/opennn/unscaling_layer.cpp @@ -44,12 +44,12 @@ vector UnscalingLayer::get_descriptives() const Tensor UnscalingLayer::get_minimums() const { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); - Tensor minimums(neurons_number); + Tensor minimums(outputs_number); #pragma omp parallel for - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) minimums[i] = descriptives[i].minimum; return minimums; @@ -58,12 +58,12 @@ Tensor UnscalingLayer::get_minimums() const Tensor UnscalingLayer::get_maximums() const { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); - Tensor maximums(neurons_number); + Tensor maximums(outputs_number); #pragma omp parallel for - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) maximums[i] = descriptives[i].maximum; return maximums; @@ -155,11 +155,11 @@ string UnscalingLayer::get_expression(const vector& new_input_names, vector UnscalingLayer::write_unscaling_methods() const { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); - vector scaling_methods_strings(neurons_number); + vector scaling_methods_strings(outputs_number); - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) if(scalers[i] == Scaler::None) scaling_methods_strings[i] = "None"; else if(scalers[i] == Scaler::MinimumMaximum) @@ -179,11 +179,11 @@ vector UnscalingLayer::write_unscaling_methods() const vector UnscalingLayer::write_unscaling_method_text() const { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); - vector scaling_methods_strings(neurons_number); + vector scaling_methods_strings(outputs_number); - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) if(scalers[i] == Scaler::None) scaling_methods_strings[i] = "no unscaling"; else if(scalers[i] == Scaler::MinimumMaximum) @@ -280,18 +280,18 @@ void UnscalingLayer::set_scalers(const string& new_scaling_methods_string) void UnscalingLayer::set_scalers(const vector& new_scalers) { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) set_scaler(i, new_scalers[i]); } void UnscalingLayer::set_scalers(const Scaler& new_unscaling_method) { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) scalers[i] = new_unscaling_method; } @@ -323,7 +323,7 @@ void UnscalingLayer::forward_propagate(const vector>& in unique_ptr& forward_propagation, const bool& is_training) { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); UnscalingLayerForwardPropagation* unscaling_layer_forward_propagation = static_cast(forward_propagation.get()); @@ -332,7 +332,7 @@ void UnscalingLayer::forward_propagate(const vector>& in Tensor& outputs = unscaling_layer_forward_propagation->outputs; - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) { const Scaler& scaler = scalers[i]; @@ -391,11 +391,11 @@ void UnscalingLayer::forward_propagate(const vector>& in vector UnscalingLayer::write_scalers_text() const { - const Index neurons_number = get_output_dimensions()[0]; + const Index outputs_number = get_outputs_number(); - vector scaling_methods_strings(neurons_number); + vector scaling_methods_strings(outputs_number); - for(Index i = 0; i < neurons_number; i++) + for(Index i = 0; i < outputs_number; i++) if(scalers[i] == Scaler::None) scaling_methods_strings[i] = "no scaling"; else if(scalers[i] == Scaler::MeanStandardDeviation) @@ -417,7 +417,7 @@ void UnscalingLayer::print() const { cout << "Unscaling layer" << endl; - const Index inputs_number = get_input_dimensions()[0]; + const Index inputs_number = get_inputs_number(); const vector scalers_text = write_scalers_text(); diff --git a/opennn/unscaling_layer.h b/opennn/unscaling_layer.h index f2b6a4b9a..18c461c92 100644 --- a/opennn/unscaling_layer.h +++ b/opennn/unscaling_layer.h @@ -22,7 +22,7 @@ class UnscalingLayer : public Layer UnscalingLayer(const dimensions& = {0}, const string& = "unscaling_layer"); - dimensions get_input_dimensions() const; + dimensions get_input_dimensions() const override; dimensions get_output_dimensions() const override; vector get_descriptives() const; @@ -62,7 +62,7 @@ class UnscalingLayer : public Layer vector write_scalers_text() const; - void print() const; + void print() const override; void from_XML(const XMLDocument&) override; void to_XML(XMLPrinter&) const override; @@ -88,7 +88,7 @@ struct UnscalingLayerForwardPropagation : LayerForwardPropagation void set(const Index& = 0, Layer* = nullptr) override; - void print() const; + void print() const override; Tensor outputs; };