9#include "gradient_descent.h"
148 if(new_maximum_epochs_number <
static_cast<type
>(0.0))
150 ostringstream buffer;
152 buffer <<
"OpenNN Exception: GradientDescent class.\n"
153 <<
"void set_maximum_epochs_number(const type&) method.\n"
154 <<
"Maximum epochs number must be equal or greater than 0.\n";
156 throw logic_error(buffer.str());
201 if(new_maximum_time <
static_cast<type
>(0.0))
203 ostringstream buffer;
205 buffer <<
"OpenNN Exception: GradientDescent class.\n"
206 <<
"void set_maximum_time(const type&) method.\n"
207 <<
"Maximum time must be equal or greater than 0.\n";
209 throw logic_error(buffer.str());
228 ostringstream buffer;
232 buffer <<
"OpenNN Exception: GradientDescent class.\n"
233 <<
"Tensor<type, 1> calculate_training_direction(const Tensor<type, 1>&) const method.\n"
234 <<
"Loss index pointer is nullptr.\n";
236 throw logic_error(buffer.str());
243 const Index gradient_size = gradient.size();
245 if(gradient_size != parameters_number)
247 buffer <<
"OpenNN Exception: GradientDescent class.\n"
248 <<
"Tensor<type, 1> calculate_training_direction(const Tensor<type, 1>&) const method.\n"
249 <<
"Size of gradient(" << gradient_size
250 <<
") is not equal to number of parameters(" << parameters_number <<
").\n";
252 throw logic_error(buffer.str());
257 training_direction.device(*thread_pool_device) = -gradient;
275 if(is_zero(optimization_data.training_direction))
return;
280 optimization_data.epoch == 0
281 ? optimization_data.initial_learning_rate = first_learning_rate
282 : optimization_data.initial_learning_rate = optimization_data.old_learning_rate;
290 optimization_data.learning_rate = directional_point.first;
291 back_propagation.loss = directional_point.second;
293 if(
abs(optimization_data.learning_rate) > type(0))
295 optimization_data.parameters_increment.device(*thread_pool_device)
296 = optimization_data.training_direction*optimization_data.learning_rate;
298 back_propagation.parameters.device(*thread_pool_device) += optimization_data.parameters_increment;
302 const Index parameters_number = back_propagation.parameters.size();
304 for(Index i = 0; i < parameters_number; i++)
306 if(
abs(back_propagation.gradient(i)) < type(NUMERIC_LIMITS_MIN))
308 optimization_data.parameters_increment(i) = type(0);
310 else if(back_propagation.gradient(i) > type(0))
312 back_propagation.parameters(i) -= numeric_limits<type>::epsilon();
314 optimization_data.parameters_increment(i) = -numeric_limits<type>::epsilon();
316 else if(back_propagation.gradient(i) < type(0))
318 back_propagation.parameters(i) += numeric_limits<type>::epsilon();
320 optimization_data.parameters_increment(i) = numeric_limits<type>::epsilon();
324 optimization_data.learning_rate = optimization_data.old_learning_rate;
329 optimization_data.old_learning_rate = optimization_data.learning_rate;
331 forward_propagation.neural_network_pointer->
set_parameters(back_propagation.parameters);
351 if(
display) cout <<
"Training with gradient descent...\n";
360 const bool has_selection = data_set_pointer->has_selection();
371 const Tensor<Scaler, 1> input_variables_scalers = data_set_pointer->get_input_variables_scalers();
372 const Tensor<Scaler, 1> target_variables_scalers = data_set_pointer->get_target_variables_scalers();
374 const Tensor<Descriptives, 1> input_variables_descriptives = data_set_pointer->
scale_input_variables();
375 Tensor<Descriptives, 1> target_variables_descriptives;
387 scaling_layer_pointer->
set(input_variables_descriptives, input_variables_scalers);
395 unscaling_layer_pointer->
set(target_variables_descriptives, target_variables_scalers);
401 DataSetBatch training_batch(training_samples_number, data_set_pointer);
402 training_batch.fill(training_samples_indices, input_variables_indices, target_variables_indices);
404 DataSetBatch selection_batch(selection_samples_number, data_set_pointer);
405 selection_batch.fill(selection_samples_indices, input_variables_indices, target_variables_indices);
420 Index selection_failures = 0;
422 bool stop_training =
false;
424 type old_loss = type(0);
425 type loss_decrease = numeric_limits<type>::max();
429 time_t beginning_time, current_time;
430 time(&beginning_time);
431 type elapsed_time = type(0);
437 optimization_data.epoch = epoch;
441 neural_network_pointer->
forward_propagate(training_batch, training_forward_propagation);
445 loss_index_pointer->back_propagate(training_batch, training_forward_propagation, training_back_propagation);
450 neural_network_pointer->
forward_propagate(selection_batch, selection_forward_propagation);
452 loss_index_pointer->calculate_errors(selection_batch, selection_forward_propagation, selection_back_propagation);
453 loss_index_pointer->calculate_error(selection_batch, selection_forward_propagation, selection_back_propagation);
463 elapsed_time =
static_cast<type
>(difftime(current_time, beginning_time));
469 cout <<
"Training error: " << training_back_propagation.error << endl;
470 if(has_selection) cout <<
"Selection error: " << selection_back_propagation.error << endl;
471 cout <<
"Learning rate: " << optimization_data.learning_rate << endl;
472 cout <<
"Elapsed time: " <<
write_time(elapsed_time) << endl;
480 cout <<
"Epoch " << epoch << endl <<
"Loss goal reached: " << training_back_propagation.loss << endl;
482 stop_training =
true;
489 if(
display) cout <<
"Epoch " << epoch << endl <<
"Maximum selection failures reached: " << selection_failures << endl;
491 stop_training =
true;
498 if(
display) cout <<
"Epoch " << epoch << endl <<
"Maximum number of epochs reached: " << epoch << endl;
500 stop_training =
true;
507 if(
display) cout <<
"Epoch " << epoch << endl <<
"Maximum training time reached: " << elapsed_time;
509 stop_training =
true;
514 if(epoch != 0) loss_decrease = old_loss - training_back_propagation.loss;
518 if(
display) cout <<
"Epoch " << epoch << endl <<
"Minimum loss decrease reached: " << loss_decrease << endl;
520 stop_training =
true;
525 old_loss = training_back_propagation.loss;
541 update_parameters(training_batch, training_forward_propagation, training_back_propagation, optimization_data);
555string GradientDescent::write_optimization_algorithm_type()
const
557 return "GRADIENT_DESCENT";
565 Tensor<string, 2> labels_values(7, 2);
569 labels_values(0,0) =
"Learning rate method";
574 labels_values(1,0) =
"Learning rate tolerance";
579 labels_values(2,0) =
"Minimum loss decrease";
584 labels_values(3,0) =
"Loss goal";
589 labels_values(4,0) =
"Maximum selection error increases";
594 labels_values(5,0) =
"Maximum epochs number";
599 labels_values(6,0) =
"Maximum time";
602 return labels_values;
612 ostringstream buffer;
616 file_stream.OpenElement(
"GradientDescent");
622 file_stream.OpenElement(
"MinimumLossDecrease");
627 file_stream.
PushText(buffer.str().c_str());
633 file_stream.OpenElement(
"LossGoal");
638 file_stream.
PushText(buffer.str().c_str());
644 file_stream.OpenElement(
"MaximumSelectionErrorIncreases");
649 file_stream.
PushText(buffer.str().c_str());
655 file_stream.OpenElement(
"MaximumEpochsNumber");
660 file_stream.
PushText(buffer.str().c_str());
666 file_stream.OpenElement(
"MaximumTime");
671 file_stream.
PushText(buffer.str().c_str());
677 file_stream.OpenElement(
"HardwareUse");
682 file_stream.
PushText(buffer.str().c_str());
696 ostringstream buffer;
698 buffer <<
"OpenNN Exception: GradientDescent class.\n"
699 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
700 <<
"Gradient descent element is nullptr.\n";
702 throw logic_error(buffer.str());
708 = root_element->FirstChildElement(
"LearningRateAlgorithm");
710 if(learning_rate_algorithm_element)
715 element_clone = learning_rate_algorithm_element->DeepClone(&learning_rate_algorithm_document);
717 learning_rate_algorithm_document.InsertFirstChild(element_clone);
729 cout <<
"MinimumLossDecrease" << endl;
730 const type new_minimum_loss_decrease =
static_cast<type
>(atof(element->GetText()));
736 catch(
const logic_error& e)
738 cerr << e.what() << endl;
749 const type new_loss_goal =
static_cast<type
>(atof(element->GetText()));
755 catch(
const logic_error& e)
757 cerr << e.what() << endl;
764 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"MaximumSelectionErrorIncreases");
768 const Index new_maximum_selection_failures =
static_cast<Index
>(atoi(element->GetText()));
774 catch(
const logic_error& e)
776 cerr << e.what() << endl;
787 const Index new_maximum_epochs_number =
static_cast<Index
>(atoi(element->GetText()));
793 catch(
const logic_error& e)
795 cerr << e.what() << endl;
806 const type new_maximum_time =
static_cast<type
>(atof(element->GetText()));
812 catch(
const logic_error& e)
814 cerr << e.what() << endl;
825 const string new_hardware_use = element->GetText();
830 catch(
const logic_error& e)
832 cerr << e.what() << endl;
This class represents the concept of data set for data modelling problems, such as approximation,...
Index get_training_samples_number() const
Returns the number of samples in the data set which will be used for training.
Tensor< Descriptives, 1 > scale_target_variables()
Tensor< Index, 1 > get_training_samples_indices() const
Returns the indices of the samples which will be used for training.
Tensor< Index, 1 > get_selection_samples_indices() const
Returns the indices of the samples which will be used for selection.
void unscale_input_variables(const Tensor< Descriptives, 1 > &)
Tensor< Index, 1 > get_target_variables_indices() const
Returns the indices of the target variables.
Index get_selection_samples_number() const
Returns the number of samples in the data set which will be used for selection.
void unscale_target_variables(const Tensor< Descriptives, 1 > &)
Tensor< string, 1 > get_target_variables_names() const
Tensor< Index, 1 > get_input_variables_indices() const
Returns the indices of the input variables.
Tensor< string, 1 > get_input_variables_names() const
Tensor< Descriptives, 1 > scale_input_variables()
TrainingResults perform_training()
void set_maximum_selection_failures(const Index &)
void set_loss_index_pointer(LossIndex *)
const type & get_maximum_time() const
Returns the maximum training time.
string get_hardware_use() const
Returns the hardware used. Default: Multi-core.
const type & get_loss_goal() const
void from_XML(const tinyxml2::XMLDocument &)
void set_default()
Sets the members of the optimization algorithm object to their default values.
const Index & get_maximum_epochs_number() const
Returns the maximum number of iterations for training.
Tensor< string, 2 > to_string_matrix() const
Writes as matrix of strings the most representative atributes.
type minimum_loss_decrease
Minimum loss improvement between two successive iterations. It is used as a stopping criterion.
LearningRateAlgorithm * get_learning_rate_algorithm_pointer()
Returns a pointer to the learning rate algorithm object inside the gradient descent object.
const LearningRateAlgorithm & get_learning_rate_algorithm() const
Returns a constant reference to the learning rate algorithm object inside the gradient descent object...
void set_maximum_time(const type &)
LearningRateAlgorithm learning_rate_algorithm
Learning rate algorithm object for one-dimensional minimization.
void set_loss_goal(const type &)
type maximum_time
Maximum training time. It is used as a stopping criterion.
void set_maximum_epochs_number(const Index &)
void calculate_training_direction(const Tensor< type, 1 > &, Tensor< type, 1 > &) const
void set_minimum_loss_decrease(const type &)
type training_loss_goal
Goal value for the loss. It is used as a stopping criterion.
Index maximum_epochs_number
Maximum epochs number.
virtual ~GradientDescent()
Destructor.
void write_XML(tinyxml2::XMLPrinter &) const
void update_parameters(const DataSetBatch &batch, NeuralNetworkForwardPropagation &forward_propagation, LossIndexBackPropagation &back_propagation, GradientDescentData &optimization_data)
GradientDescent::update_parameters.
const Index & get_maximum_selection_failures() const
Returns the maximum number of selection error increases during the training process.
Index maximum_selection_failures
const type & get_minimum_loss_decrease() const
Returns the minimum loss improvement during training.
A learning rate that is adjusted according to an algorithm during training to minimize training time.
void set_loss_index_pointer(LossIndex *)
void from_XML(const tinyxml2::XMLDocument &)
string write_learning_rate_method() const
Returns a string with the name of the learning rate method to be used.
pair< type, type > calculate_directional_point(const DataSetBatch &, NeuralNetworkForwardPropagation &, LossIndexBackPropagation &, OptimizationAlgorithmData &) const
void write_XML(tinyxml2::XMLPrinter &) const
This abstract class represents the concept of loss index composed of an error term and a regularizati...
virtual string get_error_type() const
Returns a string with the default type of error term, "USER_PERFORMANCE_TERM".
NeuralNetwork * get_neural_network_pointer() const
Returns a pointer to the neural network object associated to the error term.
DataSet * get_data_set_pointer() const
Returns a pointer to the data set object associated to the error term.
ScalingLayer * get_scaling_layer_pointer() const
Returns a pointer to the scaling layers object composing this neural network object.
bool has_scaling_layer() const
bool has_unscaling_layer() const
void forward_propagate(const DataSetBatch &, NeuralNetworkForwardPropagation &) const
Calculate forward propagation in neural network.
void save(const string &) const
void set_parameters(Tensor< type, 1 > &)
UnscalingLayer * get_unscaling_layer_pointer() const
Returns a pointer to the unscaling layers object composing this neural network object.
void set_inputs_names(const Tensor< string, 1 > &)
Index get_parameters_number() const
void set_outputs_names(const Tensor< string, 1 > &)
string neural_network_file_name
Path where the neural network is saved.
void set_hardware_use(const string &)
Set hardware to use. Default: Multi-core.
LossIndex * loss_index_pointer
Pointer to a loss index for a neural network object.
virtual void check() const
bool display
Display messages to screen.
const string write_time(const type &) const
Writes the time from seconds in format HH:mm:ss.
Index save_period
Number of iterations between the training saving progress.
string hardware_use
Hardware use.
Index display_period
Number of iterations between the training showing progress.
This class represents a layer of scaling neurons.
void set()
Sets the scaling layer to be empty.
This class represents a layer of unscaling neurons.
void set()
Sets the unscaling layer to be empty.
void PushText(const char *text, bool cdata=false)
Add a text node.
virtual void CloseElement(bool compactMode=false)
If streaming, close the Element.
HALF_CONSTEXPR half abs(half arg)
This structure contains the optimization algorithm results.
Tensor< type, 1 > selection_error_history
History of the selection error over the training iterations.
void resize_training_error_history(const Index &)
Resizes the training error history keeping the values.
OptimizationAlgorithm::StoppingCondition stopping_condition
Stopping condition of the algorithm.
void resize_selection_error_history(const Index &)
Resizes the selection error history keeping the values.
Tensor< type, 1 > training_error_history
History of the loss function loss over the training iterations.
string elapsed_time
Elapsed time of the training process.