9#include "stochastic_gradient_descent.h"
124Index StochasticGradientDescent::get_batch_samples_number()
const
138 if(new_learning_rate <=
static_cast<type
>(0.0))
140 ostringstream buffer;
142 buffer <<
"OpenNN Exception: StochasticGradientDescent class.\n"
143 <<
"void set_initial_learning_rate(const type&) method.\n"
144 <<
"initial_learning_rate must be greater than 0.\n";
146 throw logic_error(buffer.str());
164 if(new_dacay <
static_cast<type
>(0.0))
166 ostringstream buffer;
168 buffer <<
"OpenNN Exception: StochasticGradientDescent class.\n"
169 <<
"void set_initial_decay(const type&) method.\n"
170 <<
"new_dacay must be equal or greater than 0.\n";
172 throw logic_error(buffer.str());
191 if(new_momentum <
static_cast<type
>(0.0))
193 ostringstream buffer;
195 buffer <<
"OpenNN Exception: StochasticGradientDescent class.\n"
196 <<
"void set_momentum(const type&) method.\n"
197 <<
"new_momentum must be equal or greater than 0.\n";
199 throw logic_error(buffer.str());
226 if(new_maximum_epochs_number <
static_cast<type
>(0.0))
228 ostringstream buffer;
230 buffer <<
"OpenNN Exception: StochasticGradientDescent class.\n"
231 <<
"void set_maximum_epochs_number(const type&) method.\n"
232 <<
"Maximum epochs number must be equal or greater than 0.\n";
234 throw logic_error(buffer.str());
262 if(new_maximum_time <
static_cast<type
>(0.0))
264 ostringstream buffer;
266 buffer <<
"OpenNN Exception: StochasticGradientDescent class.\n"
267 <<
"void set_maximum_time(const type&) method.\n"
268 <<
"Maximum time must be equal or greater than 0.\n";
270 throw logic_error(buffer.str());
288 optimization_data.parameters_increment.device(*thread_pool_device) = back_propagation.gradient*(-learning_rate);
292 optimization_data.parameters_increment.device(*thread_pool_device) +=
momentum*optimization_data.last_parameters_increment;
296 back_propagation.parameters.device(*thread_pool_device) += optimization_data.parameters_increment;
300 optimization_data.nesterov_increment.device(*thread_pool_device)
301 = optimization_data.parameters_increment*
momentum - back_propagation.gradient*learning_rate;
303 back_propagation.parameters.device(*thread_pool_device) += optimization_data.nesterov_increment;
308 back_propagation.parameters.device(*thread_pool_device) += optimization_data.parameters_increment;
311 optimization_data.last_parameters_increment = optimization_data.parameters_increment;
313 optimization_data.iteration++;
319 neural_network_pointer->
set_parameters(back_propagation.parameters);
336 if(
display) cout <<
"Training with stochastic gradient descent (SGD)...\n";
342 const bool has_selection = data_set_pointer->has_selection();
350 Index batch_size_training = 0;
351 Index batch_size_selection = 0;
357 ? batch_size_training = training_samples_number
361 ? batch_size_selection = selection_samples_number
367 const Tensor<Scaler, 1> input_variables_scalers = data_set_pointer->get_input_variables_scalers();
368 const Tensor<Scaler, 1> target_variables_scalers = data_set_pointer->get_target_variables_scalers();
370 const Tensor<Descriptives, 1> input_variables_descriptives = data_set_pointer->
scale_input_variables();
371 Tensor<Descriptives, 1> target_variables_descriptives;
373 DataSetBatch batch_training(batch_size_training, data_set_pointer);
374 DataSetBatch batch_selection(batch_size_selection, data_set_pointer);
376 const Index training_batches_number = training_samples_number/batch_size_training;
377 const Index selection_batches_number = selection_samples_number/batch_size_selection;
379 Tensor<Index, 2> training_batches(training_batches_number, batch_size_training);
380 Tensor<Index, 2> selection_batches(selection_batches_number, batch_size_selection);
392 scaling_layer_pointer->
set(input_variables_descriptives, input_variables_scalers);
400 unscaling_layer_pointer->
set(target_variables_descriptives, target_variables_scalers);
413 type training_error = type(0);
414 type training_loss = type(0);
416 type selection_error = type(0);
418 Index selection_failures = 0;
424 bool stop_training =
false;
426 time_t beginning_time, current_time;
427 time(&beginning_time);
428 type elapsed_time = type(0);
430 bool shuffle =
false;
442 training_batches = data_set_pointer->
get_batches(training_samples_indices, batch_size_training, shuffle);
444 const Index batches_number = training_batches.dimension(0);
446 training_loss = type(0);
447 training_error = type(0);
449 optimization_data.iteration = 0;
451 for(Index iteration = 0; iteration < batches_number; iteration++)
453 optimization_data.iteration++;
457 batch_training.fill(training_batches.chip(iteration, 0), input_variables_indices, target_variables_indices);
461 neural_network_pointer->
forward_propagate(batch_training, training_forward_propagation);
465 loss_index_pointer->back_propagate(batch_training, training_forward_propagation, training_back_propagation);
467 training_error += training_back_propagation.error;
468 training_loss += training_back_propagation.loss;
477 training_loss /=
static_cast<type
>(batches_number);
478 training_error /=
static_cast<type
>(batches_number);
484 selection_batches = data_set_pointer->
get_batches(selection_samples_indices, batch_size_selection, shuffle);
486 selection_error = type(0);
488 for(Index iteration = 0; iteration < selection_batches_number; iteration++)
492 batch_selection.fill(selection_batches.chip(iteration,0), input_variables_indices, target_variables_indices);
496 neural_network_pointer->
forward_propagate(batch_selection, selection_forward_propagation);
500 loss_index_pointer->calculate_errors(batch_selection, selection_forward_propagation, selection_back_propagation);
501 loss_index_pointer->calculate_error(batch_selection, selection_forward_propagation, selection_back_propagation);
503 selection_error += selection_back_propagation.error;
506 selection_error /=
static_cast<type
>(selection_batches_number);
516 elapsed_time =
static_cast<type
>(difftime(current_time, beginning_time));
520 cout <<
"Training error: " << training_error << endl;
521 if(has_selection) cout <<
"Selection error: " << selection_error << endl<<endl;
522 cout <<
"Elapsed time: " <<
write_time(elapsed_time) << endl;
529 if(
display) cout <<
"Epoch " << epoch << endl <<
"Maximum number of epochs reached: " << epoch << endl;
531 stop_training =
true;
538 if(
display) cout <<
"Epoch " << epoch << endl <<
"Maximum training time reached: " <<
write_time(elapsed_time) << endl;
540 stop_training =
true;
547 if(
display) cout <<
"Epoch " << epoch << endl <<
"Loss goal reached: " << training_loss << endl;
549 stop_training =
true;
556 if(
display) cout <<
"Epoch " << epoch << endl <<
"Maximum selection failures reached: " << selection_failures << endl;
558 stop_training =
true;
591string StochasticGradientDescent::write_optimization_algorithm_type()
const
593 return "STOCHASTIC_GRADIENT_DESCENT";
601 Tensor<string, 2> labels_values(7, 2);
605 labels_values(0,0) =
"Inital learning rate";
610 labels_values(1,0) =
"Inital decay";
615 labels_values(2,0) =
"Apply momentum";
616 momentum > type(0) ? labels_values(2,1) =
"true" : labels_values(2,1) =
"false";
620 labels_values(3,0) =
"Training loss goal";
625 labels_values(4,0) =
"Maximum epochs number";
630 labels_values(5,0) =
"Maximum time";
635 labels_values(6,0) =
"Batch samples number";
638 return labels_values;
647 ostringstream buffer;
649 file_stream.OpenElement(
"StochasticGradientDescent");
653 file_stream.OpenElement(
"BatchSize");
658 file_stream.
PushText(buffer.str().c_str());
664 file_stream.OpenElement(
"ApplyMomentum");
667 buffer << (
momentum >
static_cast<type
>(0.0));
669 file_stream.
PushText(buffer.str().c_str());
675 file_stream.OpenElement(
"LossGoal");
680 file_stream.
PushText(buffer.str().c_str());
686 file_stream.OpenElement(
"MaximumEpochsNumber");
691 file_stream.
PushText(buffer.str().c_str());
697 file_stream.OpenElement(
"MaximumTime");
702 file_stream.
PushText(buffer.str().c_str());
708 file_stream.OpenElement(
"HardwareUse");
713 file_stream.
PushText(buffer.str().c_str());
725 const tinyxml2::XMLElement* root_element = document.FirstChildElement(
"StochasticGradientDescent");
729 ostringstream buffer;
731 buffer <<
"OpenNN Exception: StochasticGradientDescent class.\n"
732 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
733 <<
"Stochastic gradient descent element is nullptr.\n";
735 throw logic_error(buffer.str());
742 if(batch_size_element)
744 const Index new_batch_size =
static_cast<Index
>(atoi(batch_size_element->GetText()));
748 set_batch_samples_number(new_batch_size);
750 catch(
const logic_error& e)
752 cerr << e.what() << endl;
758 const tinyxml2::XMLElement* apply_momentum_element = root_element->FirstChildElement(
"ApplyMomentum");
760 if(batch_size_element)
762 string new_apply_momentum_state = apply_momentum_element->GetText();
766 if(new_apply_momentum_state !=
"0")
775 catch(
const logic_error& e)
777 cerr << e.what() << endl;
787 const type new_loss_goal =
static_cast<type
>(atof(element->GetText()));
793 catch(
const logic_error& e)
795 cerr << e.what() << endl;
806 const Index new_maximum_epochs_number =
static_cast<Index
>(atoi(element->GetText()));
812 catch(
const logic_error& e)
814 cerr << e.what() << endl;
825 const type new_maximum_time =
static_cast<type
>(atof(element->GetText()));
831 catch(
const logic_error& e)
833 cerr << e.what() << endl;
844 const string new_hardware_use = element->GetText();
850 catch(
const logic_error& e)
852 cerr << e.what() << endl;
This class represents the concept of data set for data modelling problems, such as approximation,...
Index get_training_samples_number() const
Returns the number of samples in the data set which will be used for training.
Tensor< Descriptives, 1 > scale_target_variables()
Tensor< Index, 1 > get_training_samples_indices() const
Returns the indices of the samples which will be used for training.
Tensor< Index, 1 > get_selection_samples_indices() const
Returns the indices of the samples which will be used for selection.
void unscale_input_variables(const Tensor< Descriptives, 1 > &)
Tensor< Index, 1 > get_target_variables_indices() const
Returns the indices of the target variables.
Index get_selection_samples_number() const
Returns the number of samples in the data set which will be used for selection.
void unscale_target_variables(const Tensor< Descriptives, 1 > &)
Tensor< string, 1 > get_target_variables_names() const
Tensor< Index, 1 > get_input_variables_indices() const
Returns the indices of the input variables.
Tensor< string, 1 > get_input_variables_names() const
Tensor< Index, 2 > get_batches(const Tensor< Index, 1 > &, const Index &, const bool &, const Index &buffer_size=100) const
Tensor< Descriptives, 1 > scale_input_variables()
This abstract class represents the concept of loss index composed of an error term and a regularizati...
NeuralNetwork * get_neural_network_pointer() const
Returns a pointer to the neural network object associated to the error term.
DataSet * get_data_set_pointer() const
Returns a pointer to the data set object associated to the error term.
ScalingLayer * get_scaling_layer_pointer() const
Returns a pointer to the scaling layers object composing this neural network object.
bool has_long_short_term_memory_layer() const
bool has_scaling_layer() const
bool has_unscaling_layer() const
bool has_recurrent_layer() const
void forward_propagate(const DataSetBatch &, NeuralNetworkForwardPropagation &) const
Calculate forward propagation in neural network.
void save(const string &) const
void set_parameters(Tensor< type, 1 > &)
UnscalingLayer * get_unscaling_layer_pointer() const
Returns a pointer to the unscaling layers object composing this neural network object.
void set_inputs_names(const Tensor< string, 1 > &)
void set_outputs_names(const Tensor< string, 1 > &)
string neural_network_file_name
Path where the neural network is saved.
void set_hardware_use(const string &)
Set hardware to use. Default: Multi-core.
LossIndex * loss_index_pointer
Pointer to a loss index for a neural network object.
virtual void check() const
bool display
Display messages to screen.
const string write_time(const type &) const
Writes the time from seconds in format HH:mm:ss.
Index save_period
Number of iterations between the training saving progress.
string hardware_use
Hardware use.
Index display_period
Number of iterations between the training showing progress.
This class represents a layer of scaling neurons.
void set()
Sets the scaling layer to be empty.
void set_momentum(const type &)
TrainingResults perform_training()
void set_loss_index_pointer(LossIndex *)
const type & get_maximum_time() const
Returns the maximum training time.
type momentum
Parameter that accelerates SGD in the relevant direction and dampens oscillations.
const type & get_loss_goal() const
void from_XML(const tinyxml2::XMLDocument &)
void set_default()
Sets the members of the optimization algorithm object to their default values.
type initial_learning_rate
Initial learning rate.
Tensor< string, 2 > to_string_matrix() const
Writes as matrix of strings the most representative atributes.
void set_initial_learning_rate(const type &)
const type & get_momentum() const
Returns the momentum.
void set_maximum_time(const type &)
const bool & get_nesterov() const
Returns true if nesterov is active, and false otherwise.
void set_loss_goal(const type &)
type maximum_time
Maximum training time. It is used as a stopping criterion.
void set_maximum_epochs_number(const Index &)
virtual ~StochasticGradientDescent()
Destructor.
type initial_decay
Learning rate decay over each update.
void set_nesterov(const bool &)
void update_parameters(LossIndexBackPropagation &back_propagation, StochasticGradientDescentData &optimization_data)
Set hardware to use. Default: Multi-core.
type training_loss_goal
Goal value for the loss. It is used as a stopping criterion.
void set_initial_decay(const type &)
Index maximum_epochs_number
Maximum epochs number.
void write_XML(tinyxml2::XMLPrinter &) const
bool nesterov
Boolean. Whether to apply Nesterov momentum.
StochasticGradientDescent()
Index batch_samples_number
Number of samples per training batch.
Index maximum_selection_failures
Maximum selection error allowed.
const type & get_initial_learning_rate() const
Returns the initial learning rate.
const type & get_initial_decay() const
Returns the initial decay.
This class represents a layer of unscaling neurons.
void set()
Sets the unscaling layer to be empty.
void PushText(const char *text, bool cdata=false)
Add a text node.
virtual void CloseElement(bool compactMode=false)
If streaming, close the Element.
This structure contains the optimization algorithm results.
Tensor< type, 1 > selection_error_history
History of the selection error over the training iterations.
void resize_training_error_history(const Index &)
Resizes the training error history keeping the values.
OptimizationAlgorithm::StoppingCondition stopping_condition
Stopping condition of the algorithm.
void resize_selection_error_history(const Index &)
Resizes the selection error history keeping the values.
Tensor< type, 1 > training_error_history
History of the loss function loss over the training iterations.
string elapsed_time
Elapsed time of the training process.