9#include "adaptive_moment_estimation.h"
146 if(new_maximum_epochs_number <
static_cast<type
>(0.0))
148 ostringstream buffer;
150 buffer <<
"OpenNN Exception: AdaptiveMomentEstimation class.\n"
151 <<
"void set_maximum_epochs_number(const type&) method.\n"
152 <<
"Maximum epochs number must be equal or greater than 0.\n";
154 throw logic_error(buffer.str());
182 if(new_maximum_time <
static_cast<type
>(0.0))
184 ostringstream buffer;
186 buffer <<
"OpenNN Exception: AdaptiveMomentEstimation class.\n"
187 <<
"void set_maximum_time(const type&) method.\n"
188 <<
"Maximum time must be equal or greater than 0.\n";
190 throw logic_error(buffer.str());
214 if(
display) cout <<
"Training with adaptive moment estimation \"Adam\" ...\n";
220 const bool has_selection = data_set_pointer->has_selection();
231 const Tensor<Scaler, 1> input_variables_scalers = data_set_pointer->get_input_variables_scalers();
232 const Tensor<Scaler, 1> target_variables_scalers = data_set_pointer->get_target_variables_scalers();
234 const Tensor<Descriptives, 1> input_variables_descriptives = data_set_pointer->
scale_input_variables();
235 Tensor<Descriptives, 1> target_variables_descriptives;
237 Index batch_size_training = 0;
238 Index batch_size_selection = 0;
244 ? batch_size_training = training_samples_number
248 ? batch_size_selection = selection_samples_number
251 DataSetBatch batch_training(batch_size_training, data_set_pointer);
252 DataSetBatch batch_selection(batch_size_selection, data_set_pointer);
254 const Index training_batches_number = training_samples_number/batch_size_training;
255 const Index selection_batches_number = selection_samples_number/batch_size_selection;
257 Tensor<Index, 2> training_batches(training_batches_number, batch_size_training);
258 Tensor<Index, 2> selection_batches(selection_batches_number, batch_size_selection);
270 scaling_layer_pointer->
set(input_variables_descriptives, input_variables_scalers);
278 unscaling_layer_pointer->
set(target_variables_descriptives, target_variables_scalers);
291 type training_error = type(0);
292 type training_loss = type(0);
294 type selection_error = type(0);
296 Index selection_failures = 0;
302 bool stop_training =
false;
304 time_t beginning_time, current_time;
305 time(&beginning_time);
306 type elapsed_time = type(0);
308 bool shuffle =
false;
320 training_batches = data_set_pointer->
get_batches(training_samples_indices, batch_size_training, shuffle);
322 const Index batches_number = training_batches.dimension(0);
324 training_loss = type(0);
325 training_error = type(0);
327 optimization_data.iteration = 1;
329 for(Index iteration = 0; iteration < batches_number; iteration++)
333 batch_training.fill(training_batches.chip(iteration, 0), input_variables_indices, target_variables_indices);
337 neural_network_pointer->
forward_propagate(batch_training, training_forward_propagation);
341 loss_index_pointer->back_propagate(batch_training, training_forward_propagation, training_back_propagation);
343 training_error += training_back_propagation.error;
344 training_loss += training_back_propagation.loss;
351 training_loss /=
static_cast<type
>(batches_number);
352 training_error /=
static_cast<type
>(batches_number);
358 selection_batches = data_set_pointer->
get_batches(selection_samples_indices, batch_size_selection, shuffle);
360 selection_error = type(0);
362 for(Index iteration = 0; iteration < selection_batches_number; iteration++)
366 batch_selection.fill(selection_batches.chip(iteration,0), input_variables_indices, target_variables_indices);
370 neural_network_pointer->
forward_propagate(batch_selection, selection_forward_propagation);
374 loss_index_pointer->calculate_errors(batch_selection, selection_forward_propagation, selection_back_propagation);
376 loss_index_pointer->calculate_error(batch_selection, selection_forward_propagation, selection_back_propagation);
378 selection_error += selection_back_propagation.error;
381 selection_error /=
static_cast<type
>(selection_batches_number);
391 elapsed_time =
static_cast<type
>(difftime(current_time, beginning_time));
395 cout <<
"Training error: " << training_error << endl;
396 if(has_selection) cout <<
"Selection error: " << selection_error << endl;
397 cout <<
"Elapsed time: " <<
write_time(elapsed_time) << endl;
404 if(
display) cout <<
"Epoch " << epoch << endl <<
"Maximum number of epochs reached: " << epoch << endl;
406 stop_training =
true;
413 if(
display) cout <<
"Epoch " << epoch << endl <<
"Maximum training time reached: " <<
write_time(elapsed_time) << endl;
415 stop_training =
true;
422 if(
display) cout <<
"Epoch " << epoch << endl <<
"Loss goal reached: " << training_loss << endl;
424 stop_training =
true;
431 if(
display) cout <<
"Epoch " << epoch << endl <<
"Maximum selection failures reached: " << selection_failures << endl;
433 stop_training =
true;
466 return "ADAPTIVE_MOMENT_ESTIMATION";
474 Tensor<string, 2> labels_values(9, 2);
478 labels_values(0,0) =
"Initial learning rate";
483 labels_values(1,0) =
"Initial decay";
488 labels_values(2,0) =
"Beta 1";
489 labels_values(2,1) = to_string(
double(
beta_1));
493 labels_values(3,0) =
"Beta 2";
494 labels_values(3,1) = to_string(
double(
beta_2));
498 labels_values(4,0) =
"Epsilon";
499 labels_values(4,1) = to_string(
double(
epsilon));
503 labels_values(5,0) =
"Training loss goal";
508 labels_values(6,0) =
"Maximum epochs number";
513 labels_values(7,0) =
"Maximum time";
518 labels_values(8,0) =
"Batch samples number";
521 return labels_values;
530 ostringstream buffer;
532 file_stream.OpenElement(
"AdaptiveMomentEstimation");
536 file_stream.OpenElement(
"BatchSize");
541 file_stream.
PushText(buffer.str().c_str());
547 file_stream.OpenElement(
"LossGoal");
552 file_stream.
PushText(buffer.str().c_str());
558 file_stream.OpenElement(
"MaximumEpochsNumber");
563 file_stream.
PushText(buffer.str().c_str());
569 file_stream.OpenElement(
"MaximumTime");
574 file_stream.
PushText(buffer.str().c_str());
580 file_stream.OpenElement(
"HardwareUse");
585 file_stream.
PushText(buffer.str().c_str());
597 const tinyxml2::XMLElement* root_element = document.FirstChildElement(
"AdaptiveMomentEstimation");
601 ostringstream buffer;
603 buffer <<
"OpenNN Exception: AdaptiveMomentEstimation class.\n"
604 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
605 <<
"Adaptive moment estimation element is nullptr.\n";
607 throw logic_error(buffer.str());
614 if(batch_size_element)
616 const Index new_batch_size =
static_cast<Index
>(atoi(batch_size_element->GetText()));
622 catch(
const logic_error& e)
624 cerr << e.what() << endl;
634 const type new_loss_goal =
static_cast<type
>(atof(element->GetText()));
640 catch(
const logic_error& e)
642 cerr << e.what() << endl;
653 const Index new_maximum_epochs_number =
static_cast<Index
>(atoi(element->GetText()));
659 catch(
const logic_error& e)
661 cerr << e.what() << endl;
672 const type new_maximum_time =
static_cast<type
>(atof(element->GetText()));
678 catch(
const logic_error& e)
680 cerr << e.what() << endl;
691 const string new_hardware_use = element->GetText();
697 catch(
const logic_error& e)
699 cerr << e.what() << endl;
720Index AdaptiveMomentEstimation::get_batch_samples_number()
const
731 const type learning_rate =
733 sqrt(type(1) -
pow(
beta_2,
static_cast<type
>(optimization_data.iteration)))/
734 (type(1) -
pow(
beta_1,
static_cast<type
>(optimization_data.iteration))));
736 optimization_data.gradient_exponential_decay.device(*thread_pool_device)
737 = optimization_data.gradient_exponential_decay*
beta_1
738 + back_propagation.gradient*(type(1) -
beta_1);
740 optimization_data.square_gradient_exponential_decay.device(*thread_pool_device)
741 = optimization_data.square_gradient_exponential_decay*
beta_2
742 + back_propagation.gradient*back_propagation.gradient*(type(1) -
beta_2);
744 back_propagation.parameters.device(*thread_pool_device) -=
745 optimization_data.gradient_exponential_decay*learning_rate/(optimization_data.square_gradient_exponential_decay.sqrt() +
epsilon);
747 optimization_data.iteration++;
762 set(new_stochastic_gradient_descent_pointer);
766AdaptiveMomentEstimationData::~AdaptiveMomentEstimationData()
771void AdaptiveMomentEstimationData::set(AdaptiveMomentEstimation* new_adaptive_moment_estimation_pointer)
773 adaptive_moment_estimation_pointer = new_adaptive_moment_estimation_pointer;
781 gradient_exponential_decay.resize(parameters_number);
782 gradient_exponential_decay.setZero();
784 square_gradient_exponential_decay.resize(parameters_number);
785 square_gradient_exponential_decay.setZero();
789void AdaptiveMomentEstimationData::print()
const
791 cout <<
"Gradient exponential decay:" << endl
792 <<gradient_exponential_decay << endl;
794 cout <<
"Square gradient exponential decay:" << endl
795 << square_gradient_exponential_decay << endl;
TrainingResults perform_training()
const type & get_epsilon() const
Returns epsilon.
void set_loss_index_pointer(LossIndex *)
const type & get_maximum_time() const
Returns the maximum training time.
const type & get_beta_2() const
Returns beta 2.
const type & get_loss_goal() const
void from_XML(const tinyxml2::XMLDocument &)
void set_default()
Sets the members of the optimization algorithm object to their default values.
AdaptiveMomentEstimation()
type initial_learning_rate
Initial learning rate.
const type & get_beta_1() const
Returns beta 1.
void set_epsilon(const type &)
Tensor< string, 2 > to_string_matrix() const
Writes as matrix of strings the most representative atributes.
type beta_1
Exponential decay over gradient estimates.
string write_optimization_algorithm_type() const
Return the algorithm optimum for your model.
void set_initial_learning_rate(const type &)
void set_maximum_time(const type &)
void set_loss_goal(const type &)
type epsilon
Small number to prevent any division by zero.
void set_beta_2(const type &)
void set_batch_samples_number(const Index &new_batch_samples_number)
Set number of samples in each batch. Default 1000.
type maximum_time
Maximum training time. It is used as a stopping criterion.
void set_maximum_epochs_number(const Index &)
void update_parameters(LossIndexBackPropagation &, AdaptiveMomentEstimationData &)
Update iteration parameters.
type initial_decay
Learning rate decay over each update.
type training_loss_goal
Goal value for the loss. It is used as a stopping criterion.
virtual ~AdaptiveMomentEstimation()
Destructor.
Index maximum_epochs_number
Maximum epochs number.
void write_XML(tinyxml2::XMLPrinter &) const
Index batch_samples_number
Training and selection batch size.
Index maximum_selection_failures
Maximum number of times when selection error increases.
void set_beta_1(const type &)
const type & get_initial_learning_rate() const
Returns the initial learning rate.
type beta_2
Exponential decay over square gradient estimates.
This class represents the concept of data set for data modelling problems, such as approximation,...
Index get_training_samples_number() const
Returns the number of samples in the data set which will be used for training.
Tensor< Descriptives, 1 > scale_target_variables()
Tensor< Index, 1 > get_training_samples_indices() const
Returns the indices of the samples which will be used for training.
Tensor< Index, 1 > get_selection_samples_indices() const
Returns the indices of the samples which will be used for selection.
void unscale_input_variables(const Tensor< Descriptives, 1 > &)
Tensor< Index, 1 > get_target_variables_indices() const
Returns the indices of the target variables.
Index get_selection_samples_number() const
Returns the number of samples in the data set which will be used for selection.
void unscale_target_variables(const Tensor< Descriptives, 1 > &)
Tensor< string, 1 > get_target_variables_names() const
Tensor< Index, 1 > get_input_variables_indices() const
Returns the indices of the input variables.
Tensor< string, 1 > get_input_variables_names() const
Tensor< Index, 2 > get_batches(const Tensor< Index, 1 > &, const Index &, const bool &, const Index &buffer_size=100) const
Tensor< Descriptives, 1 > scale_input_variables()
This abstract class represents the concept of loss index composed of an error term and a regularizati...
NeuralNetwork * get_neural_network_pointer() const
Returns a pointer to the neural network object associated to the error term.
DataSet * get_data_set_pointer() const
Returns a pointer to the data set object associated to the error term.
ScalingLayer * get_scaling_layer_pointer() const
Returns a pointer to the scaling layers object composing this neural network object.
bool has_long_short_term_memory_layer() const
bool has_scaling_layer() const
bool has_unscaling_layer() const
bool has_recurrent_layer() const
void forward_propagate(const DataSetBatch &, NeuralNetworkForwardPropagation &) const
Calculate forward propagation in neural network.
void save(const string &) const
void set_parameters(Tensor< type, 1 > &)
UnscalingLayer * get_unscaling_layer_pointer() const
Returns a pointer to the unscaling layers object composing this neural network object.
void set_inputs_names(const Tensor< string, 1 > &)
Index get_parameters_number() const
void set_outputs_names(const Tensor< string, 1 > &)
string neural_network_file_name
Path where the neural network is saved.
void set_hardware_use(const string &)
Set hardware to use. Default: Multi-core.
string get_hardware_use() const
Hardware use.
LossIndex * loss_index_pointer
Pointer to a loss index for a neural network object.
virtual void check() const
bool display
Display messages to screen.
const string write_time(const type &) const
Writes the time from seconds in format HH:mm:ss.
Index save_period
Number of iterations between the training saving progress.
LossIndex * get_loss_index_pointer() const
Index display_period
Number of iterations between the training showing progress.
This class represents a layer of scaling neurons.
void set()
Sets the scaling layer to be empty.
This class represents a layer of unscaling neurons.
void set()
Sets the unscaling layer to be empty.
void PushText(const char *text, bool cdata=false)
Add a text node.
virtual void CloseElement(bool compactMode=false)
If streaming, close the Element.
AdaptiveMomentEstimationData()
Default constructor.
This structure contains the optimization algorithm results.
Tensor< type, 1 > selection_error_history
History of the selection error over the training iterations.
void resize_training_error_history(const Index &)
Resizes the training error history keeping the values.
OptimizationAlgorithm::StoppingCondition stopping_condition
Stopping condition of the algorithm.
void resize_selection_error_history(const Index &)
Resizes the selection error history keeping the values.
Tensor< type, 1 > training_error_history
History of the loss function loss over the training iterations.
string elapsed_time
Elapsed time of the training process.