stochastic_gradient_descent.cpp
1// OpenNN: Open Neural Networks Library
2// www.opennn.net
3//
4// S T O C H A S T I C G R A D I E N T D E S C E N T C L A S S
5//
6// Artificial Intelligence Techniques SL
7// artelnics@artelnics.com
8
9#include "stochastic_gradient_descent.h"
10
11namespace OpenNN
12{
13
17
20{
22}
23
24
29
31 : OptimizationAlgorithm(new_loss_index_pointer)
32{
34}
35
36
38
40{
41}
42
43
45
47{
49}
50
51
53
55{
56 return initial_decay;
57}
58
59
61
63{
64 return momentum;
65}
66
67
69
71{
72 return nesterov;
73}
74
75
78
80{
81 return training_loss_goal;
82}
83
84
86
88{
89 return maximum_time;
90}
91
92
96
98{
99 loss_index_pointer = new_loss_index_pointer;
100}
101
102
104{
105 // TRAINING OPERATORS
106
107 initial_learning_rate = static_cast<type>(0.01);
108 initial_decay = type(0);
109 momentum = type(0);
110 nesterov = false;
111
112 // Stopping criteria
113
114 training_loss_goal = type(0);
115 maximum_time = type(3600.0);
116 maximum_epochs_number = 10000;
117
118 // UTILITIES
119
120 display_period = 100;
121}
122
123
124Index StochasticGradientDescent::get_batch_samples_number() const
125{
127}
128
129
133
135{
136#ifdef OPENNN_DEBUG
137
138 if(new_learning_rate <= static_cast<type>(0.0))
139 {
140 ostringstream buffer;
141
142 buffer << "OpenNN Exception: StochasticGradientDescent class.\n"
143 << "void set_initial_learning_rate(const type&) method.\n"
144 << "initial_learning_rate must be greater than 0.\n";
145
146 throw logic_error(buffer.str());
147 }
148
149#endif
150
151 // Set learning rate
152
153 initial_learning_rate = new_learning_rate;
154}
155
156
159
161{
162#ifdef OPENNN_DEBUG
163
164 if(new_dacay < static_cast<type>(0.0))
165 {
166 ostringstream buffer;
167
168 buffer << "OpenNN Exception: StochasticGradientDescent class.\n"
169 << "void set_initial_decay(const type&) method.\n"
170 << "new_dacay must be equal or greater than 0.\n";
171
172 throw logic_error(buffer.str());
173 }
174
175#endif
176
177 // Set initial decay
178
179 initial_decay = new_dacay;
180}
181
182
186
187void StochasticGradientDescent::set_momentum(const type& new_momentum)
188{
189#ifdef OPENNN_DEBUG
190
191 if(new_momentum < static_cast<type>(0.0))
192 {
193 ostringstream buffer;
194
195 buffer << "OpenNN Exception: StochasticGradientDescent class.\n"
196 << "void set_momentum(const type&) method.\n"
197 << "new_momentum must be equal or greater than 0.\n";
198
199 throw logic_error(buffer.str());
200 }
201
202#endif
203
204 // Set momentum
205
206 momentum = new_momentum;
207}
208
209
212
213void StochasticGradientDescent::set_nesterov(const bool& new_nesterov_momentum)
214{
215 nesterov = new_nesterov_momentum;
216}
217
218
221
222void StochasticGradientDescent::set_maximum_epochs_number(const Index& new_maximum_epochs_number)
223{
224#ifdef OPENNN_DEBUG
225
226 if(new_maximum_epochs_number < static_cast<type>(0.0))
227 {
228 ostringstream buffer;
229
230 buffer << "OpenNN Exception: StochasticGradientDescent class.\n"
231 << "void set_maximum_epochs_number(const type&) method.\n"
232 << "Maximum epochs number must be equal or greater than 0.\n";
233
234 throw logic_error(buffer.str());
235 }
236
237#endif
238
239 // Set maximum_epochs number
240
241 maximum_epochs_number = new_maximum_epochs_number;
242}
243
244
248
249void StochasticGradientDescent::set_loss_goal(const type& new_loss_goal)
250{
251 training_loss_goal = new_loss_goal;
252}
253
254
257
258void StochasticGradientDescent::set_maximum_time(const type& new_maximum_time)
259{
260#ifdef OPENNN_DEBUG
261
262 if(new_maximum_time < static_cast<type>(0.0))
263 {
264 ostringstream buffer;
265
266 buffer << "OpenNN Exception: StochasticGradientDescent class.\n"
267 << "void set_maximum_time(const type&) method.\n"
268 << "Maximum time must be equal or greater than 0.\n";
269
270 throw logic_error(buffer.str());
271 }
272
273#endif
274
275 // Set maximum time
276
277 maximum_time = new_maximum_time;
278}
279
280
282
284 StochasticGradientDescentData& optimization_data)
285{
286 const type learning_rate = initial_learning_rate/(type(1) + type(optimization_data.iteration)*initial_decay);
287
288 optimization_data.parameters_increment.device(*thread_pool_device) = back_propagation.gradient*(-learning_rate);
289
290 if(momentum > type(0))
291 {
292 optimization_data.parameters_increment.device(*thread_pool_device) += momentum*optimization_data.last_parameters_increment;
293
294 if(!nesterov)
295 {
296 back_propagation.parameters.device(*thread_pool_device) += optimization_data.parameters_increment;
297 }
298 else
299 {
300 optimization_data.nesterov_increment.device(*thread_pool_device)
301 = optimization_data.parameters_increment*momentum - back_propagation.gradient*learning_rate;
302
303 back_propagation.parameters.device(*thread_pool_device) += optimization_data.nesterov_increment;
304 }
305 }
306 else
307 {
308 back_propagation.parameters.device(*thread_pool_device) += optimization_data.parameters_increment;
309 }
310
311 optimization_data.last_parameters_increment = optimization_data.parameters_increment;
312
313 optimization_data.iteration++;
314
315 // Update parameters
316
317 NeuralNetwork* neural_network_pointer = back_propagation.loss_index_pointer->get_neural_network_pointer();
318
319 neural_network_pointer->set_parameters(back_propagation.parameters);
320}
321
322
327
329{
331
332 check();
333
334 // Start training
335
336 if(display) cout << "Training with stochastic gradient descent (SGD)...\n";
337
338 // Data set
339
340 DataSet* data_set_pointer = loss_index_pointer->get_data_set_pointer();
341
342 const bool has_selection = data_set_pointer->has_selection();
343
344 const Tensor<Index, 1> input_variables_indices = data_set_pointer->get_input_variables_indices();
345 const Tensor<Index, 1> target_variables_indices = data_set_pointer->get_target_variables_indices();
346
347 const Tensor<Index, 1> training_samples_indices = data_set_pointer->get_training_samples_indices();
348 const Tensor<Index, 1> selection_samples_indices = data_set_pointer->get_selection_samples_indices();
349
350 Index batch_size_training = 0;
351 Index batch_size_selection = 0;
352
353 const Index training_samples_number = data_set_pointer->get_training_samples_number();
354 const Index selection_samples_number = data_set_pointer->get_selection_samples_number();
355
356 training_samples_number < batch_samples_number
357 ? batch_size_training = training_samples_number
358 : batch_size_training = batch_samples_number;
359
360 selection_samples_number < batch_samples_number && selection_samples_number != 0
361 ? batch_size_selection = selection_samples_number
362 : batch_size_selection = batch_samples_number;
363
364 const Tensor<string, 1> inputs_names = data_set_pointer->get_input_variables_names();
365 const Tensor<string, 1> targets_names = data_set_pointer->get_target_variables_names();
366
367 const Tensor<Scaler, 1> input_variables_scalers = data_set_pointer->get_input_variables_scalers();
368 const Tensor<Scaler, 1> target_variables_scalers = data_set_pointer->get_target_variables_scalers();
369
370 const Tensor<Descriptives, 1> input_variables_descriptives = data_set_pointer->scale_input_variables();
371 Tensor<Descriptives, 1> target_variables_descriptives;
372
373 DataSetBatch batch_training(batch_size_training, data_set_pointer);
374 DataSetBatch batch_selection(batch_size_selection, data_set_pointer);
375
376 const Index training_batches_number = training_samples_number/batch_size_training;
377 const Index selection_batches_number = selection_samples_number/batch_size_selection;
378
379 Tensor<Index, 2> training_batches(training_batches_number, batch_size_training);
380 Tensor<Index, 2> selection_batches(selection_batches_number, batch_size_selection);
381
382 // Neural network
383
385
386 neural_network_pointer->set_inputs_names(inputs_names);
387 neural_network_pointer->set_outputs_names(targets_names);
388
389 if(neural_network_pointer->has_scaling_layer())
390 {
391 ScalingLayer* scaling_layer_pointer = neural_network_pointer->get_scaling_layer_pointer();
392 scaling_layer_pointer->set(input_variables_descriptives, input_variables_scalers);
393 }
394
395 if(neural_network_pointer->has_unscaling_layer())
396 {
397 target_variables_descriptives = data_set_pointer->scale_target_variables();
398
399 UnscalingLayer* unscaling_layer_pointer = neural_network_pointer->get_unscaling_layer_pointer();
400 unscaling_layer_pointer->set(target_variables_descriptives, target_variables_scalers);
401 }
402
403 NeuralNetworkForwardPropagation training_forward_propagation(batch_size_training, neural_network_pointer);
404 NeuralNetworkForwardPropagation selection_forward_propagation(batch_size_selection, neural_network_pointer);
405
406 // Loss index
407
408 loss_index_pointer->set_normalization_coefficient();
409
410 LossIndexBackPropagation training_back_propagation(batch_size_training, loss_index_pointer);
411 LossIndexBackPropagation selection_back_propagation(batch_size_selection, loss_index_pointer);
412
413 type training_error = type(0);
414 type training_loss = type(0);
415
416 type selection_error = type(0);
417
418 Index selection_failures = 0;
419
420 // Optimization algorithm
421
422 StochasticGradientDescentData optimization_data(this);
423
424 bool stop_training = false;
425
426 time_t beginning_time, current_time;
427 time(&beginning_time);
428 type elapsed_time = type(0);
429
430 bool shuffle = false;
431
432 if(neural_network_pointer->has_long_short_term_memory_layer()
433 || neural_network_pointer->has_recurrent_layer())
434 shuffle = false;
435
436 // Main loop
437
438 for(Index epoch = 0; epoch <= maximum_epochs_number; epoch++)
439 {
440 if(display && epoch%display_period == 0) cout << "Epoch: " << epoch << endl;
441
442 training_batches = data_set_pointer->get_batches(training_samples_indices, batch_size_training, shuffle);
443
444 const Index batches_number = training_batches.dimension(0);
445
446 training_loss = type(0);
447 training_error = type(0);
448
449 optimization_data.iteration = 0;
450
451 for(Index iteration = 0; iteration < batches_number; iteration++)
452 {
453 optimization_data.iteration++;
454
455 // Data set
456
457 batch_training.fill(training_batches.chip(iteration, 0), input_variables_indices, target_variables_indices);
458
459 // Neural network
460
461 neural_network_pointer->forward_propagate(batch_training, training_forward_propagation);
462
463 // Loss index
464
465 loss_index_pointer->back_propagate(batch_training, training_forward_propagation, training_back_propagation);
466
467 training_error += training_back_propagation.error;
468 training_loss += training_back_propagation.loss;
469
470 // Gradient
471
472 update_parameters(training_back_propagation, optimization_data);
473 }
474
475 // Loss
476
477 training_loss /= static_cast<type>(batches_number);
478 training_error /= static_cast<type>(batches_number);
479
480 results.training_error_history(epoch) = training_error;
481
482 if(has_selection)
483 {
484 selection_batches = data_set_pointer->get_batches(selection_samples_indices, batch_size_selection, shuffle);
485
486 selection_error = type(0);
487
488 for(Index iteration = 0; iteration < selection_batches_number; iteration++)
489 {
490 // Data set
491
492 batch_selection.fill(selection_batches.chip(iteration,0), input_variables_indices, target_variables_indices);
493
494 // Neural network
495
496 neural_network_pointer->forward_propagate(batch_selection, selection_forward_propagation);
497
498 // Loss
499
500 loss_index_pointer->calculate_errors(batch_selection, selection_forward_propagation, selection_back_propagation);
501 loss_index_pointer->calculate_error(batch_selection, selection_forward_propagation, selection_back_propagation);
502
503 selection_error += selection_back_propagation.error;
504 }
505
506 selection_error /= static_cast<type>(selection_batches_number);
507
508 results.selection_error_history(epoch) = selection_error;
509
510 if(epoch != 0 && results.selection_error_history(epoch) > results.selection_error_history(epoch-1)) selection_failures++;
511 }
512
513 // Elapsed time
514
515 time(&current_time);
516 elapsed_time = static_cast<type>(difftime(current_time, beginning_time));
517
518 if(display && epoch%display_period == 0)
519 {
520 cout << "Training error: " << training_error << endl;
521 if(has_selection) cout << "Selection error: " << selection_error << endl<<endl;
522 cout << "Elapsed time: " << write_time(elapsed_time) << endl;
523 }
524
525 // Stopping criteria
526
527 if(epoch == maximum_epochs_number)
528 {
529 if(display) cout << "Epoch " << epoch << endl << "Maximum number of epochs reached: " << epoch << endl;
530
531 stop_training = true;
532
533 results.stopping_condition = StoppingCondition::MaximumEpochsNumber;
534 }
535
536 if(elapsed_time >= maximum_time)
537 {
538 if(display) cout << "Epoch " << epoch << endl << "Maximum training time reached: " << write_time(elapsed_time) << endl;
539
540 stop_training = true;
541
542 results.stopping_condition = StoppingCondition::MaximumTime;
543 }
544
545 if(training_loss <= training_loss_goal)
546 {
547 if(display) cout << "Epoch " << epoch << endl << "Loss goal reached: " << training_loss << endl;
548
549 stop_training = true;
550
551 results.stopping_condition = StoppingCondition::LossGoal;
552 }
553
554 if(selection_failures >= maximum_selection_failures)
555 {
556 if(display) cout << "Epoch " << epoch << endl << "Maximum selection failures reached: " << selection_failures << endl;
557
558 stop_training = true;
559
560 results.stopping_condition = StoppingCondition::MaximumSelectionErrorIncreases;
561 }
562
563 if(stop_training)
564 {
565 results.resize_training_error_history(epoch + 1);
566
567 if(has_selection) results.resize_selection_error_history(epoch+1);
568 else results.resize_selection_error_history(0);
569
570 results.elapsed_time = write_time(elapsed_time);
571
572 break;
573 }
574
575 // Update stuff
576
577 if(epoch != 0 && epoch%save_period == 0) neural_network_pointer->save(neural_network_file_name);
578 }
579
580 data_set_pointer->unscale_input_variables(input_variables_descriptives);
581
582 if(neural_network_pointer->has_unscaling_layer())
583 data_set_pointer->unscale_target_variables(target_variables_descriptives);
584
585 if(display) results.print();
586
587 return results;
588}
589
590
591string StochasticGradientDescent::write_optimization_algorithm_type() const
592{
593 return "STOCHASTIC_GRADIENT_DESCENT";
594}
595
596
598
600{
601 Tensor<string, 2> labels_values(7, 2);
602
603 // Initial learning rate
604
605 labels_values(0,0) = "Inital learning rate";
606 labels_values(0,1) = to_string(double(initial_learning_rate));
607
608 // Initial decay
609
610 labels_values(1,0) = "Inital decay";
611 labels_values(1,1) = to_string(double(initial_decay));
612
613 // Momentum
614
615 labels_values(2,0) = "Apply momentum";
616 momentum > type(0) ? labels_values(2,1) = "true" : labels_values(2,1) = "false";
617
618 // Training loss goal
619
620 labels_values(3,0) = "Training loss goal";
621 labels_values(3,1) = to_string(double(training_loss_goal));
622
623 // Maximum epochs number
624
625 labels_values(4,0) = "Maximum epochs number";
626 labels_values(4,1) = to_string(maximum_epochs_number);
627
628 // Maximum time
629
630 labels_values(5,0) = "Maximum time";
631 labels_values(5,1) = write_time(maximum_time);
632
633 // Batch samples number
634
635 labels_values(6,0) = "Batch samples number";
636 labels_values(6,1) = to_string(batch_samples_number);
637
638 return labels_values;
639}
640
641
644
646{
647 ostringstream buffer;
648
649 file_stream.OpenElement("StochasticGradientDescent");
650
651 // DataSetBatch size
652
653 file_stream.OpenElement("BatchSize");
654
655 buffer.str("");
656 buffer << batch_samples_number;
657
658 file_stream.PushText(buffer.str().c_str());
659
660 file_stream.CloseElement();
661
662 // Apply momentum
663
664 file_stream.OpenElement("ApplyMomentum");
665
666 buffer.str("");
667 buffer << (momentum > static_cast<type>(0.0));
668
669 file_stream.PushText(buffer.str().c_str());
670
671 file_stream.CloseElement();
672
673 // Loss goal
674
675 file_stream.OpenElement("LossGoal");
676
677 buffer.str("");
678 buffer << training_loss_goal;
679
680 file_stream.PushText(buffer.str().c_str());
681
682 file_stream.CloseElement();
683
684 // Maximum iterations number
685
686 file_stream.OpenElement("MaximumEpochsNumber");
687
688 buffer.str("");
689 buffer << maximum_epochs_number;
690
691 file_stream.PushText(buffer.str().c_str());
692
693 file_stream.CloseElement();
694
695 // Maximum time
696
697 file_stream.OpenElement("MaximumTime");
698
699 buffer.str("");
700 buffer << maximum_time;
701
702 file_stream.PushText(buffer.str().c_str());
703
704 file_stream.CloseElement();
705
706 // Hardware use
707
708 file_stream.OpenElement("HardwareUse");
709
710 buffer.str("");
711 buffer << hardware_use;
712
713 file_stream.PushText(buffer.str().c_str());
714
715 file_stream.CloseElement();
716
717 // End element
718
719 file_stream.CloseElement();
720}
721
722
724{
725 const tinyxml2::XMLElement* root_element = document.FirstChildElement("StochasticGradientDescent");
726
727 if(!root_element)
728 {
729 ostringstream buffer;
730
731 buffer << "OpenNN Exception: StochasticGradientDescent class.\n"
732 << "void from_XML(const tinyxml2::XMLDocument&) method.\n"
733 << "Stochastic gradient descent element is nullptr.\n";
734
735 throw logic_error(buffer.str());
736 }
737
738 // DataSetBatch size
739
740 const tinyxml2::XMLElement* batch_size_element = root_element->FirstChildElement("BatchSize");
741
742 if(batch_size_element)
743 {
744 const Index new_batch_size = static_cast<Index>(atoi(batch_size_element->GetText()));
745
746 try
747 {
748 set_batch_samples_number(new_batch_size);
749 }
750 catch(const logic_error& e)
751 {
752 cerr << e.what() << endl;
753 }
754 }
755
756 // Momentum
757
758 const tinyxml2::XMLElement* apply_momentum_element = root_element->FirstChildElement("ApplyMomentum");
759
760 if(batch_size_element)
761 {
762 string new_apply_momentum_state = apply_momentum_element->GetText();
763
764 try
765 {
766 if(new_apply_momentum_state != "0")
767 {
768 set_momentum(static_cast<type>(0.9));
769 }
770 else
771 {
772 set_momentum(static_cast<type>(0.0));
773 }
774 }
775 catch(const logic_error& e)
776 {
777 cerr << e.what() << endl;
778 }
779 }
780
781 // Loss goal
782 {
783 const tinyxml2::XMLElement* element = root_element->FirstChildElement("LossGoal");
784
785 if(element)
786 {
787 const type new_loss_goal = static_cast<type>(atof(element->GetText()));
788
789 try
790 {
791 set_loss_goal(new_loss_goal);
792 }
793 catch(const logic_error& e)
794 {
795 cerr << e.what() << endl;
796 }
797 }
798 }
799
800 // Maximum epochs number
801 {
802 const tinyxml2::XMLElement* element = root_element->FirstChildElement("MaximumEpochsNumber");
803
804 if(element)
805 {
806 const Index new_maximum_epochs_number = static_cast<Index>(atoi(element->GetText()));
807
808 try
809 {
810 set_maximum_epochs_number(new_maximum_epochs_number);
811 }
812 catch(const logic_error& e)
813 {
814 cerr << e.what() << endl;
815 }
816 }
817 }
818
819 // Maximum time
820 {
821 const tinyxml2::XMLElement* element = root_element->FirstChildElement("MaximumTime");
822
823 if(element)
824 {
825 const type new_maximum_time = static_cast<type>(atof(element->GetText()));
826
827 try
828 {
829 set_maximum_time(new_maximum_time);
830 }
831 catch(const logic_error& e)
832 {
833 cerr << e.what() << endl;
834 }
835 }
836 }
837
838 // Hardware use
839 {
840 const tinyxml2::XMLElement* element = root_element->FirstChildElement("HardwareUse");
841
842 if(element)
843 {
844 const string new_hardware_use = element->GetText();
845
846 try
847 {
848 set_hardware_use(new_hardware_use);
849 }
850 catch(const logic_error& e)
851 {
852 cerr << e.what() << endl;
853 }
854 }
855 }
856}
857
858}
859
860
861// OpenNN: Open Neural Networks Library.
862// Copyright(C) 2005-2021 Artificial Intelligence Techniques, SL.
863//
864// This library is free software; you can redistribute it and/or
865// modify it under the terms of the GNU Lesser General Public
866// License as published by the Free Software Foundation; either
867// version 2.1 of the License, or any later version.
868//
869// This library is distributed in the hope that it will be useful,
870// but WITHOUT ANY WARRANTY; without even the implied warranty of
871// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
872// Lesser General Public License for more details.
873
874// You should have received a copy of the GNU Lesser General Public
875// License along with this library; if not, write to the Free Software
876// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
This class represents the concept of data set for data modelling problems, such as approximation,...
Definition: data_set.h:57
Index get_training_samples_number() const
Returns the number of samples in the data set which will be used for training.
Definition: data_set.cpp:1382
Tensor< Descriptives, 1 > scale_target_variables()
Definition: data_set.cpp:6298
Tensor< Index, 1 > get_training_samples_indices() const
Returns the indices of the samples which will be used for training.
Definition: data_set.cpp:1073
Tensor< Index, 1 > get_selection_samples_indices() const
Returns the indices of the samples which will be used for selection.
Definition: data_set.cpp:1098
void unscale_input_variables(const Tensor< Descriptives, 1 > &)
Definition: data_set.cpp:6351
Tensor< Index, 1 > get_target_variables_indices() const
Returns the indices of the target variables.
Definition: data_set.cpp:3094
Index get_selection_samples_number() const
Returns the number of samples in the data set which will be used for selection.
Definition: data_set.cpp:1402
void unscale_target_variables(const Tensor< Descriptives, 1 > &)
Definition: data_set.cpp:6397
Tensor< string, 1 > get_target_variables_names() const
Definition: data_set.cpp:2215
Tensor< Index, 1 > get_input_variables_indices() const
Returns the indices of the input variables.
Definition: data_set.cpp:3047
Tensor< string, 1 > get_input_variables_names() const
Definition: data_set.cpp:2184
Tensor< Index, 2 > get_batches(const Tensor< Index, 1 > &, const Index &, const bool &, const Index &buffer_size=100) const
Definition: data_set.cpp:1217
Tensor< Descriptives, 1 > scale_input_variables()
Definition: data_set.cpp:6243
This abstract class represents the concept of loss index composed of an error term and a regularizati...
Definition: loss_index.h:48
NeuralNetwork * get_neural_network_pointer() const
Returns a pointer to the neural network object associated to the error term.
Definition: loss_index.h:70
DataSet * get_data_set_pointer() const
Returns a pointer to the data set object associated to the error term.
Definition: loss_index.h:92
ScalingLayer * get_scaling_layer_pointer() const
Returns a pointer to the scaling layers object composing this neural network object.
bool has_long_short_term_memory_layer() const
bool has_scaling_layer() const
bool has_unscaling_layer() const
bool has_recurrent_layer() const
void forward_propagate(const DataSetBatch &, NeuralNetworkForwardPropagation &) const
Calculate forward propagation in neural network.
void save(const string &) const
void set_parameters(Tensor< type, 1 > &)
UnscalingLayer * get_unscaling_layer_pointer() const
Returns a pointer to the unscaling layers object composing this neural network object.
void set_inputs_names(const Tensor< string, 1 > &)
void set_outputs_names(const Tensor< string, 1 > &)
string neural_network_file_name
Path where the neural network is saved.
void set_hardware_use(const string &)
Set hardware to use. Default: Multi-core.
LossIndex * loss_index_pointer
Pointer to a loss index for a neural network object.
bool display
Display messages to screen.
const string write_time(const type &) const
Writes the time from seconds in format HH:mm:ss.
Index save_period
Number of iterations between the training saving progress.
Index display_period
Number of iterations between the training showing progress.
This class represents a layer of scaling neurons.
Definition: scaling_layer.h:38
void set()
Sets the scaling layer to be empty.
const type & get_maximum_time() const
Returns the maximum training time.
type momentum
Parameter that accelerates SGD in the relevant direction and dampens oscillations.
void from_XML(const tinyxml2::XMLDocument &)
void set_default()
Sets the members of the optimization algorithm object to their default values.
type initial_learning_rate
Initial learning rate.
Tensor< string, 2 > to_string_matrix() const
Writes as matrix of strings the most representative atributes.
const type & get_momentum() const
Returns the momentum.
const bool & get_nesterov() const
Returns true if nesterov is active, and false otherwise.
type maximum_time
Maximum training time. It is used as a stopping criterion.
type initial_decay
Learning rate decay over each update.
void update_parameters(LossIndexBackPropagation &back_propagation, StochasticGradientDescentData &optimization_data)
Set hardware to use. Default: Multi-core.
type training_loss_goal
Goal value for the loss. It is used as a stopping criterion.
Index maximum_epochs_number
Maximum epochs number.
void write_XML(tinyxml2::XMLPrinter &) const
bool nesterov
Boolean. Whether to apply Nesterov momentum.
Index batch_samples_number
Number of samples per training batch.
Index maximum_selection_failures
Maximum selection error allowed.
const type & get_initial_learning_rate() const
Returns the initial learning rate.
const type & get_initial_decay() const
Returns the initial decay.
This class represents a layer of unscaling neurons.
void set()
Sets the unscaling layer to be empty.
void PushText(const char *text, bool cdata=false)
Add a text node.
Definition: tinyxml2.cpp:2878
virtual void CloseElement(bool compactMode=false)
If streaming, close the Element.
Definition: tinyxml2.cpp:2834
This structure contains the optimization algorithm results.
Tensor< type, 1 > selection_error_history
History of the selection error over the training iterations.
void resize_training_error_history(const Index &)
Resizes the training error history keeping the values.
OptimizationAlgorithm::StoppingCondition stopping_condition
Stopping condition of the algorithm.
void resize_selection_error_history(const Index &)
Resizes the selection error history keeping the values.
Tensor< type, 1 > training_error_history
History of the loss function loss over the training iterations.
string elapsed_time
Elapsed time of the training process.