quasi_newton_method.cpp
1// OpenNN: Open Neural Networks Library
2// www.opennn.net
3//
4// Q U A S I - N E W T O N M E T H O D C L A S S
5//
6// Artificial Intelligence Techniques SL
7// artelnics@artelnics.com
8
9#include "quasi_newton_method.h"
10
11namespace OpenNN
12{
13
17
20{
22}
23
24
29
31 : OptimizationAlgorithm(new_loss_index_pointer)
32{
33 learning_rate_algorithm.set_loss_index_pointer(new_loss_index_pointer);
34
36}
37
38
41
43{
44}
45
46
48
50{
52}
53
54
56
58{
60}
61
62
64
66{
68}
69
70
72
74{
76 {
77 case InverseHessianApproximationMethod::DFP:
78 return "DFP";
79
80 case InverseHessianApproximationMethod::BFGS:
81 return "BFGS";
82 }
83
84 ostringstream buffer;
85
86 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
87 << "string write_inverse_hessian_approximation_method() const method.\n"
88 << "Unknown inverse hessian approximation method.\n";
89
90 throw logic_error(buffer.str());
91}
92
93
94const Index& QuasiNewtonMethod::get_epochs_number() const
95{
96 return epochs_number;
97}
98
99
101
103{
105}
106
107
110
112{
113 return training_loss_goal;
114}
115
116
118
120{
122}
123
124
126
128{
130}
131
132
134
136{
137 return maximum_time;
138}
139
140
144
146{
147 loss_index_pointer = new_loss_index_pointer;
148
149 learning_rate_algorithm.set_loss_index_pointer(new_loss_index_pointer);
150}
151
152
155
157 const QuasiNewtonMethod::InverseHessianApproximationMethod& new_inverse_hessian_approximation_method)
158{
159 inverse_hessian_approximation_method = new_inverse_hessian_approximation_method;
160}
161
162
170
171void QuasiNewtonMethod::set_inverse_hessian_approximation_method(const string& new_inverse_hessian_approximation_method_name)
172{
173 if(new_inverse_hessian_approximation_method_name == "DFP")
174 {
175 inverse_hessian_approximation_method = InverseHessianApproximationMethod::DFP;
176 }
177 else if(new_inverse_hessian_approximation_method_name == "BFGS")
178 {
179 inverse_hessian_approximation_method = InverseHessianApproximationMethod::BFGS;
180 }
181 else
182 {
183 ostringstream buffer;
184
185 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
186 << "void set_inverse_hessian_approximation_method(const string&) method.\n"
187 << "Unknown inverse hessian approximation method: " << new_inverse_hessian_approximation_method_name << ".\n";
188
189 throw logic_error(buffer.str());
190 }
191}
192
193
198
199void QuasiNewtonMethod::set_display(const bool& new_display)
200{
201 display = new_display;
202}
203
204
206{
207 inverse_hessian_approximation_method = InverseHessianApproximationMethod::BFGS;
208
210
211 // Stopping criteria
212
213 minimum_loss_decrease = type(0);
214 training_loss_goal = type(0);
215 maximum_selection_failures = numeric_limits<Index>::max();
216
218 maximum_time = type(3600.0);
219
220 // UTILITIES
221
222 display = true;
223 display_period = 10;
224}
225
226
229
230void QuasiNewtonMethod::set_minimum_loss_decrease(const type& new_minimum_loss_decrease)
231{
232 minimum_loss_decrease = new_minimum_loss_decrease;
233}
234
235
239
240void QuasiNewtonMethod::set_loss_goal(const type& new_loss_goal)
241{
242 training_loss_goal = new_loss_goal;
243}
244
245
248
249void QuasiNewtonMethod::set_maximum_selection_failures(const Index& new_maximum_selection_failures)
250{
251 maximum_selection_failures = new_maximum_selection_failures;
252}
253
254
257
258void QuasiNewtonMethod::set_maximum_epochs_number(const Index& new_maximum_epochs_number)
259{
260 maximum_epochs_number = new_maximum_epochs_number;
261}
262
263
266
267void QuasiNewtonMethod::set_maximum_time(const type& new_maximum_time)
268{
269#ifdef OPENNN_DEBUG
270
271 if(new_maximum_time < static_cast<type>(0.0))
272 {
273 ostringstream buffer;
274
275 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
276 << "void set_maximum_time(const type&) method.\n"
277 << "Maximum time must be equal or greater than 0.\n";
278
279 throw logic_error(buffer.str());
280 }
281
282#endif
283
284 // Set maximum time
285
286 maximum_time = new_maximum_time;
287}
288
289
290void QuasiNewtonMethod::initialize_inverse_hessian_approximation(QuasiNewtonMehtodData& optimization_data) const
291{
292 optimization_data.inverse_hessian.setZero();
293
294 const Index parameters_number = optimization_data.inverse_hessian.dimension(0);
295
296 for(Index i = 0; i < parameters_number; i++) optimization_data.inverse_hessian(i,i) = type(1);
297}
298
299
306
308{
310 {
311 case InverseHessianApproximationMethod::DFP:
312 calculate_DFP_inverse_hessian(optimization_data);
313
314 return;
315
316 case InverseHessianApproximationMethod::BFGS:
317 calculate_BFGS_inverse_hessian(optimization_data);
318
319 return;
320 }
321
322 ostringstream buffer;
323
324 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
325 << "Tensor<type, 1> calculate_inverse_hessian_approximation(const Tensor<type, 1>&, "
326 "const Tensor<type, 1>&, const Tensor<type, 1>&, const Tensor<type, 1>&, const Tensor<type, 2>&) method.\n"
327 << "Unknown inverse hessian approximation method.\n";
328
329 throw logic_error(buffer.str());
330}
331
332
333const Tensor<type, 2> QuasiNewtonMethod::kronecker_product(Tensor<type, 1>& left_matrix, Tensor<type, 1>& right_matrix) const
334{
335 // Transform Tensors into Dense matrix
336
337 auto ml = Eigen::Map<Eigen::Matrix<type,Eigen::Dynamic,Eigen::Dynamic,Eigen::RowMajor >>
338 (left_matrix.data(),left_matrix.dimension(0), 1);
339
340 auto mr = Eigen::Map<Eigen::Matrix<type,Eigen::Dynamic,Eigen::Dynamic,Eigen::RowMajor>>
341 (right_matrix.data(),right_matrix.dimension(0), 1);
342
343 // Kronecker Product
344
345 auto product = kroneckerProduct(ml,mr).eval();
346
347 // Matrix into a Tensor
348
349 TensorMap< Tensor<type, 2> > direct_matrix(product.data(), left_matrix.size(), left_matrix.size());
350
351 return direct_matrix;
352}
353
354
359
360const Tensor<type, 2> QuasiNewtonMethod::kronecker_product(Tensor<type, 2>& left_matrix, Tensor<type, 2>& right_matrix) const
361{
362 // Transform Tensors into Dense matrix
363
364 auto ml = Eigen::Map<Eigen::Matrix<type,Eigen::Dynamic,Eigen::Dynamic,Eigen::RowMajor >>
365 (left_matrix.data(),left_matrix.dimension(0),left_matrix.dimension(1));
366
367 auto mr = Eigen::Map<Eigen::Matrix<type,Eigen::Dynamic,Eigen::Dynamic,Eigen::RowMajor>>
368 (right_matrix.data(),right_matrix.dimension(0),right_matrix.dimension(1));
369
370 // Kronecker Product
371
372 auto product = kroneckerProduct(ml,mr).eval();
373
374 // Matrix into a Tensor
375
376 TensorMap< Tensor<type, 2> > direct_matrix(product.data(), product.rows(), product.cols());
377
378 return direct_matrix;
379}
380
381
389
391{
392 // Dots
393
394 Tensor<type, 0> parameters_difference_dot_gradient_difference;
395
396 parameters_difference_dot_gradient_difference.device(*thread_pool_device)
397 = optimization_data.parameters_difference.contract(optimization_data.gradient_difference, AT_B);
398
399 optimization_data.old_inverse_hessian_dot_gradient_difference.device(*thread_pool_device)
400 = optimization_data.old_inverse_hessian.contract(optimization_data.gradient_difference, A_B);
401
402 Tensor<type, 0> gradient_dot_hessian_dot_gradient;
403
404 gradient_dot_hessian_dot_gradient.device(*thread_pool_device)
405 = optimization_data.gradient_difference.contract(optimization_data.old_inverse_hessian_dot_gradient_difference, AT_B); // Ok , auto?
406
407 // Calculates Approximation
408
409 optimization_data.inverse_hessian = optimization_data.old_inverse_hessian;
410
411 optimization_data.inverse_hessian
412 += kronecker_product(optimization_data.parameters_difference, optimization_data.parameters_difference)
413 /parameters_difference_dot_gradient_difference(0);
414
415 optimization_data.inverse_hessian
416 -= kronecker_product(optimization_data.old_inverse_hessian_dot_gradient_difference, optimization_data.old_inverse_hessian_dot_gradient_difference)
417 / gradient_dot_hessian_dot_gradient(0);
418}
419
420
429
431{
432 const NeuralNetwork* neural_network_pointer = loss_index_pointer->get_neural_network_pointer();
433
434 const Index parameters_number = neural_network_pointer->get_parameters_number();
435
436 Tensor<type, 0> parameters_difference_dot_gradient_difference;
437
438 parameters_difference_dot_gradient_difference.device(*thread_pool_device)
439 = optimization_data.parameters_difference.contract(optimization_data.gradient_difference, AT_B);
440
441
442 optimization_data.old_inverse_hessian_dot_gradient_difference.device(*thread_pool_device)
443 = optimization_data.old_inverse_hessian.contract(optimization_data.gradient_difference, A_B);
444
445 Tensor<type, 0> gradient_dot_hessian_dot_gradient;
446
447 gradient_dot_hessian_dot_gradient.device(*thread_pool_device)
448 = optimization_data.gradient_difference.contract(optimization_data.old_inverse_hessian_dot_gradient_difference, AT_B);
449
450 Tensor<type, 1> BFGS(parameters_number);
451
452 BFGS.device(*thread_pool_device)
453 = optimization_data.parameters_difference/parameters_difference_dot_gradient_difference(0)
454 - optimization_data.old_inverse_hessian_dot_gradient_difference/gradient_dot_hessian_dot_gradient(0);
455
456 // Calculates Approximation
457
458 optimization_data.inverse_hessian = optimization_data.old_inverse_hessian;
459
460 optimization_data.inverse_hessian
461 += kronecker_product(optimization_data.parameters_difference, optimization_data.parameters_difference)
462 / parameters_difference_dot_gradient_difference(0); // Ok
463
464 optimization_data.inverse_hessian
465 -= kronecker_product(optimization_data.old_inverse_hessian_dot_gradient_difference, optimization_data.old_inverse_hessian_dot_gradient_difference)
466 / gradient_dot_hessian_dot_gradient(0); // Ok
467
468 optimization_data.inverse_hessian
469 += kronecker_product(BFGS, BFGS)*(gradient_dot_hessian_dot_gradient(0)); // Ok
470}
471
472
478
480 const DataSetBatch& batch,
481 NeuralNetworkForwardPropagation& forward_propagation,
482 LossIndexBackPropagation& back_propagation,
483 QuasiNewtonMehtodData& optimization_data)
484{
485 #ifdef OPENNN_DEBUG
486
487 check();
488
489 #endif
490
491
492 optimization_data.parameters_difference.device(*thread_pool_device)
493 = back_propagation.parameters - optimization_data.old_parameters;
494
495 optimization_data.gradient_difference.device(*thread_pool_device)
496 = back_propagation.gradient - optimization_data.old_gradient;
497
498 optimization_data.old_parameters = back_propagation.parameters; // do not move above
499
500 // Get training direction
501
502 if(optimization_data.epoch == 0
503 || is_zero(optimization_data.parameters_difference)
504 || is_zero(optimization_data.gradient_difference))
505 {
506 initialize_inverse_hessian_approximation(optimization_data);
507 }
508 else
509 {
511 }
512
513 optimization_data.training_direction.device(*thread_pool_device)
514 = -optimization_data.inverse_hessian.contract(back_propagation.gradient, A_B);
515
516 optimization_data.training_slope.device(*thread_pool_device)
517 = back_propagation.gradient.contract(optimization_data.training_direction, AT_B);
518
519 if(optimization_data.training_slope(0) >= type(0))
520 {
521 optimization_data.training_direction.device(*thread_pool_device) = -back_propagation.gradient;
522 }
523
524 // Get learning rate
525
526 optimization_data.epoch == 0
527 ? optimization_data.initial_learning_rate = first_learning_rate
528 : optimization_data.initial_learning_rate = optimization_data.old_learning_rate;
529
530 const pair<type,type> directional_point = learning_rate_algorithm.calculate_directional_point(
531 batch,
532 forward_propagation,
533 back_propagation,
534 optimization_data);
535
536 optimization_data.learning_rate = directional_point.first;
537 back_propagation.loss = directional_point.second;
538
539 if(abs(optimization_data.learning_rate) > type(0))
540 {
541 optimization_data.parameters_increment.device(*thread_pool_device)
542 = optimization_data.training_direction*optimization_data.learning_rate;
543
544 back_propagation.parameters.device(*thread_pool_device) += optimization_data.parameters_increment;
545 }
546 else
547 {
548 const Index parameters_number = back_propagation.parameters.size();
549
550 for(Index i = 0; i < parameters_number; i++)
551 {
552 if(abs(back_propagation.gradient(i)) < type(NUMERIC_LIMITS_MIN))
553 {
554 optimization_data.parameters_increment(i) = type(0);
555 }
556 else if(back_propagation.gradient(i) > type(0))
557 {
558 back_propagation.parameters(i) -= numeric_limits<type>::epsilon();
559
560 optimization_data.parameters_increment(i) = -numeric_limits<type>::epsilon();
561 }
562 else if(back_propagation.gradient(i) < type(0))
563 {
564 back_propagation.parameters(i) += numeric_limits<type>::epsilon();
565
566 optimization_data.parameters_increment(i) = numeric_limits<type>::epsilon();
567 }
568 }
569
570 optimization_data.learning_rate = optimization_data.initial_learning_rate;
571 }
572
573 // Update stuff
574
575 optimization_data.old_gradient = back_propagation.gradient;
576
577 optimization_data.old_inverse_hessian = optimization_data.inverse_hessian;
578
579 optimization_data.old_learning_rate = optimization_data.learning_rate;
580
581 // Set parameters
582
583 NeuralNetwork* neural_network_pointer = forward_propagation.neural_network_pointer;
584
585 neural_network_pointer->set_parameters(back_propagation.parameters);
586}
587
588
591
593{
594#ifdef OPENNN_DEBUG
595
596 check();
597
598#endif
599
600 // Start training
601
602 if(display) cout << "Training with quasi-Newton method...\n";
603
605
606 // Data set
607
608 DataSet* data_set_pointer = loss_index_pointer->get_data_set_pointer();
609
610 // Loss index
611
612 const string error_type = loss_index_pointer->get_error_type();
613
614 const Index training_samples_number = data_set_pointer->get_training_samples_number();
615
616 const Index selection_samples_number = data_set_pointer->get_selection_samples_number();
617 const bool has_selection = data_set_pointer->has_selection();
618
619 const Tensor<Index, 1> training_samples_indices = data_set_pointer->get_training_samples_indices();
620 const Tensor<Index, 1> selection_samples_indices = data_set_pointer->get_selection_samples_indices();
621
622 const Tensor<Index, 1> input_variables_indices = data_set_pointer->get_input_variables_indices();
623 const Tensor<Index, 1> target_variables_indices = data_set_pointer->get_target_variables_indices();
624
625 const Tensor<string, 1> inputs_names = data_set_pointer->get_input_variables_names();
626 const Tensor<string, 1> targets_names = data_set_pointer->get_target_variables_names();
627
628 const Tensor<Scaler, 1> input_variables_scalers = data_set_pointer->get_input_variables_scalers();
629 const Tensor<Scaler, 1> target_variables_scalers = data_set_pointer->get_target_variables_scalers();
630
631 Tensor<Descriptives, 1> input_variables_descriptives;
632 Tensor<Descriptives, 1> target_variables_descriptives;
633
634 // Neural network
635
637
638 NeuralNetworkForwardPropagation training_forward_propagation(training_samples_number, neural_network_pointer);
639 NeuralNetworkForwardPropagation selection_forward_propagation(selection_samples_number, neural_network_pointer);
640
641 neural_network_pointer->set_inputs_names(inputs_names);
642 neural_network_pointer->set_outputs_names(targets_names);
643
644 if(neural_network_pointer->has_scaling_layer())
645 {
646 input_variables_descriptives = data_set_pointer->scale_input_variables();
647
648 ScalingLayer* scaling_layer_pointer = neural_network_pointer->get_scaling_layer_pointer();
649 scaling_layer_pointer->set(input_variables_descriptives, input_variables_scalers);
650 }
651
652 if(neural_network_pointer->has_unscaling_layer())
653 {
654 target_variables_descriptives = data_set_pointer->scale_target_variables();
655
656 UnscalingLayer* unscaling_layer_pointer = neural_network_pointer->get_unscaling_layer_pointer();
657 unscaling_layer_pointer->set(target_variables_descriptives, target_variables_scalers);
658 }
659
660 DataSetBatch training_batch(training_samples_number, data_set_pointer);
661 training_batch.fill(training_samples_indices, input_variables_indices, target_variables_indices);
662
663 DataSetBatch selection_batch(selection_samples_number, data_set_pointer);
664 selection_batch.fill(selection_samples_indices, input_variables_indices, target_variables_indices);
665
666 // Loss index
667
668 loss_index_pointer->set_normalization_coefficient();
669
670 LossIndexBackPropagation training_back_propagation(training_samples_number, loss_index_pointer);
671 LossIndexBackPropagation selection_back_propagation(selection_samples_number, loss_index_pointer);
672
673 // Optimization algorithm
674
675 bool stop_training = false;
676
677 Index selection_failures = 0;
678
679 type old_loss = type(0);
680 type loss_decrease = numeric_limits<type>::max();
681
682 time_t beginning_time, current_time;
683 time(&beginning_time);
684 type elapsed_time;
685
686 QuasiNewtonMehtodData optimization_data(this);
687
688 // Main loop
689
690 for(Index epoch = 0; epoch <= maximum_epochs_number; epoch++)
691 {
692 if(display && epoch%display_period == 0) cout << "Epoch: " << epoch << endl;
693
694 optimization_data.epoch = epoch;
695
696 // Neural network
697
698 neural_network_pointer->forward_propagate(training_batch, training_forward_propagation);
699
700 loss_index_pointer->back_propagate(training_batch, training_forward_propagation, training_back_propagation);
701
702 results.training_error_history(epoch) = training_back_propagation.error;
703
704 // Selection error
705
706 if(has_selection)
707 {
708 neural_network_pointer->forward_propagate(selection_batch, selection_forward_propagation);
709
710 // Loss Index
711
712 loss_index_pointer->calculate_errors(selection_batch, selection_forward_propagation, selection_back_propagation);
713 loss_index_pointer->calculate_error(selection_batch, selection_forward_propagation, selection_back_propagation);
714
715 results.selection_error_history(epoch) = selection_back_propagation.error;
716
717 if(epoch != 0 && results.selection_error_history(epoch) > results.selection_error_history(epoch-1)) selection_failures++;
718 }
719
720 time(&current_time);
721 elapsed_time = static_cast<type>(difftime(current_time, beginning_time));
722
723 if(display && epoch%display_period == 0)
724 {
725 cout << "Training error: " << training_back_propagation.error << endl;
726 if(has_selection) cout << "Selection error: " << selection_back_propagation.error << endl;
727 cout << "Learning rate: " << optimization_data.learning_rate << endl;
728 cout << "Elapsed time: " << write_time(elapsed_time) << endl;
729 }
730
731 if(epoch != 0) loss_decrease = old_loss - training_back_propagation.loss;
732
733 if(loss_decrease < minimum_loss_decrease)
734 {
735 if(display) cout << "Epoch " << epoch << endl << "Minimum loss decrease reached: " << loss_decrease << endl;
736
737 stop_training = true;
738
739 results.stopping_condition = OptimizationAlgorithm::StoppingCondition::MinimumLossDecrease;
740 }
741
742 old_loss = training_back_propagation.loss;
743
744 if(training_back_propagation.loss <= training_loss_goal)
745 {
746 if(display) cout << "Epoch " << epoch << endl << "Loss goal reached: " << training_back_propagation.loss << endl;
747
748 stop_training = true;
749
750 results.stopping_condition = OptimizationAlgorithm::StoppingCondition::LossGoal;
751 }
752 else if(selection_failures >= maximum_selection_failures)
753 {
754 if(display) cout << "Epoch " << epoch << endl << "Maximum selection failures reached: " << selection_failures << endl;
755
756 stop_training = true;
757
758 results.stopping_condition = OptimizationAlgorithm::StoppingCondition::MaximumSelectionErrorIncreases;
759 }
760 else if(epoch == maximum_epochs_number)
761 {
762 if(display) cout << "Epoch " << epoch << endl << "Maximum number of epochs reached: " << epoch << endl;
763
764 stop_training = true;
765
766 results.stopping_condition = OptimizationAlgorithm::StoppingCondition::MaximumEpochsNumber;
767 }
768 else if(elapsed_time >= maximum_time)
769 {
770 if(display) cout << "Epoch " << epoch << endl << "Maximum training time reached: " << write_time(elapsed_time) << endl;
771
772 stop_training = true;
773
774 results.stopping_condition = OptimizationAlgorithm::StoppingCondition::MaximumTime;
775 }
776
777 if(stop_training)
778 {
779 results.resize_training_error_history(epoch+1);
780 if(has_selection) results.resize_selection_error_history(epoch+1);
781 else results.resize_selection_error_history(0);
782
783 results.elapsed_time = write_time(elapsed_time);
784
785 break;
786 }
787
788 if(epoch != 0 && epoch % save_period == 0) neural_network_pointer->save(neural_network_file_name);
789
790 if(stop_training) break;
791
792 update_parameters(training_batch, training_forward_propagation, training_back_propagation, optimization_data);
793 }
794
795 data_set_pointer->unscale_input_variables(input_variables_descriptives);
796
797 if(neural_network_pointer->has_unscaling_layer())
798 data_set_pointer->unscale_target_variables(target_variables_descriptives);
799
800 if(display) results.print();
801
802 return results;
803}
804
805
806string QuasiNewtonMethod::write_optimization_algorithm_type() const
807{
808 return "QUASI_NEWTON_METHOD";
809}
810
811
814
816{
817 ostringstream buffer;
818
819 file_stream.OpenElement("QuasiNewtonMethod");
820
821 // Inverse hessian approximation method
822
823 file_stream.OpenElement("InverseHessianApproximationMethod");
824
826
827 file_stream.CloseElement();
828
829 // Learning rate algorithm
830
832
833 // Minimum loss decrease
834
835 file_stream.OpenElement("MinimumLossDecrease");
836
837 buffer.str("");
838 buffer << minimum_loss_decrease;
839
840 file_stream.PushText(buffer.str().c_str());
841
842 file_stream.CloseElement();
843
844 // Loss goal
845
846 file_stream.OpenElement("LossGoal");
847
848 buffer.str("");
849 buffer << training_loss_goal;
850
851 file_stream.PushText(buffer.str().c_str());
852
853 file_stream.CloseElement();
854
855 // Maximum selection error increases
856
857 file_stream.OpenElement("MaximumSelectionErrorIncreases");
858
859 buffer.str("");
861
862 file_stream.PushText(buffer.str().c_str());
863
864 file_stream.CloseElement();
865
866 // Maximum iterations number
867
868 file_stream.OpenElement("MaximumEpochsNumber");
869
870 buffer.str("");
871 buffer << maximum_epochs_number;
872
873 file_stream.PushText(buffer.str().c_str());
874
875 file_stream.CloseElement();
876
877 // Maximum time
878
879 file_stream.OpenElement("MaximumTime");
880
881 buffer.str("");
882 buffer << maximum_time;
883
884 file_stream.PushText(buffer.str().c_str());
885
886 file_stream.CloseElement();
887
888 // Hardware use
889
890 file_stream.OpenElement("HardwareUse");
891
892 buffer.str("");
893 buffer << hardware_use;
894
895 file_stream.PushText(buffer.str().c_str());
896
897 file_stream.CloseElement();
898
899 file_stream.CloseElement();
900}
901
902
904
905Tensor<string, 2> QuasiNewtonMethod::to_string_matrix() const
906{
907 Tensor<string, 2> labels_values(8, 2);
908
909 // Inverse hessian approximation method
910
911 labels_values(0,0) = "Inverse hessian approximation method";
912 labels_values(0,1) = write_inverse_hessian_approximation_method();
913
914 // Learning rate method
915
916 labels_values(1,0) = "Learning rate method";
918
919 // Loss tolerance
920
921 labels_values(2,0) = "Learning rate tolerance";
922 labels_values(2,1) = to_string(double(learning_rate_algorithm.get_learning_rate_tolerance()));
923
924 // Minimum loss decrease
925
926 labels_values(3,0) = "Minimum loss decrease";
927 labels_values(3,1) = to_string(double(minimum_loss_decrease));
928
929 // Loss goal
930
931 labels_values(4,0) = "Loss goal";
932 labels_values(4,1) = to_string(double(training_loss_goal));
933
934 // Maximum selection error increases
935
936 labels_values(5,0) = "Maximum selection error increases";
937 labels_values(5,1) = to_string(maximum_selection_failures);
938
939 // Maximum epochs number
940
941 labels_values(6,0) = "Maximum epochs number";
942 labels_values(6,1) = to_string(maximum_epochs_number);
943
944 // Maximum time
945
946 labels_values(7,0) = "Maximum time";
947 labels_values(7,1) = write_time(maximum_time);
948
949 return labels_values;
950}
951
952
954{
955 const tinyxml2::XMLElement* root_element = document.FirstChildElement("QuasiNewtonMethod");
956
957 if(!root_element)
958 {
959 ostringstream buffer;
960
961 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
962 << "void from_XML(const tinyxml2::XMLDocument&) method.\n"
963 << "Quasi-Newton method element is nullptr.\n";
964
965 throw logic_error(buffer.str());
966 }
967
968 // Inverse hessian approximation method
969 {
970 const tinyxml2::XMLElement* element = root_element->FirstChildElement("InverseHessianApproximationMethod");
971
972 if(element)
973 {
974 const string new_inverse_hessian_approximation_method = element->GetText();
975
976 try
977 {
978 set_inverse_hessian_approximation_method(new_inverse_hessian_approximation_method);
979 }
980 catch(const logic_error& e)
981 {
982 cerr << e.what() << endl;
983 }
984 }
985 }
986
987 // Learning rate algorithm
988 {
989 const tinyxml2::XMLElement* element = root_element->FirstChildElement("LearningRateAlgorithm");
990
991 if(element)
992 {
993 tinyxml2::XMLDocument learning_rate_algorithm_document;
994 tinyxml2::XMLNode* element_clone;
995
996 element_clone = element->DeepClone(&learning_rate_algorithm_document);
997
998 learning_rate_algorithm_document.InsertFirstChild(element_clone);
999
1000 learning_rate_algorithm.from_XML(learning_rate_algorithm_document);
1001 }
1002 }
1003
1004 // Minimum loss decrease
1005 {
1006 const tinyxml2::XMLElement* element = root_element->FirstChildElement("MinimumLossDecrease");
1007
1008 if(element)
1009 {
1010 const type new_minimum_loss_decrease = static_cast<type>(atof(element->GetText()));
1011
1012 try
1013 {
1014 set_minimum_loss_decrease(new_minimum_loss_decrease);
1015 }
1016 catch(const logic_error& e)
1017 {
1018 cerr << e.what() << endl;
1019 }
1020 }
1021 }
1022
1023 // Loss goal
1024 {
1025 const tinyxml2::XMLElement* element = root_element->FirstChildElement("LossGoal");
1026
1027 if(element)
1028 {
1029 const type new_loss_goal = static_cast<type>(atof(element->GetText()));
1030
1031 try
1032 {
1033 set_loss_goal(new_loss_goal);
1034 }
1035 catch(const logic_error& e)
1036 {
1037 cerr << e.what() << endl;
1038 }
1039 }
1040 }
1041
1042 // Maximum selection error increases
1043 {
1044 const tinyxml2::XMLElement* element = root_element->FirstChildElement("MaximumSelectionErrorIncreases");
1045
1046 if(element)
1047 {
1048 const Index new_maximum_selection_failures = static_cast<Index>(atoi(element->GetText()));
1049
1050 try
1051 {
1052 set_maximum_selection_failures(new_maximum_selection_failures);
1053 }
1054 catch(const logic_error& e)
1055 {
1056 cerr << e.what() << endl;
1057 }
1058 }
1059 }
1060
1061 // Maximum epochs number
1062 {
1063 const tinyxml2::XMLElement* element = root_element->FirstChildElement("MaximumEpochsNumber");
1064
1065 if(element)
1066 {
1067 const Index new_maximum_epochs_number = static_cast<Index>(atoi(element->GetText()));
1068
1069 try
1070 {
1071 set_maximum_epochs_number(new_maximum_epochs_number);
1072 }
1073 catch(const logic_error& e)
1074 {
1075 cerr << e.what() << endl;
1076 }
1077 }
1078 }
1079
1080 // Maximum time
1081 {
1082 const tinyxml2::XMLElement* element = root_element->FirstChildElement("MaximumTime");
1083
1084 if(element)
1085 {
1086 const type new_maximum_time = static_cast<type>(atof(element->GetText()));
1087
1088 try
1089 {
1090 set_maximum_time(new_maximum_time);
1091 }
1092 catch(const logic_error& e)
1093 {
1094 cerr << e.what() << endl;
1095 }
1096 }
1097 }
1098
1099 // Hardware use
1100 {
1101 const tinyxml2::XMLElement* element = root_element->FirstChildElement("HardwareUse");
1102
1103 if(element)
1104 {
1105 const string new_hardware_use = element->GetText();
1106
1107 try
1108 {
1109 set_hardware_use(new_hardware_use);
1110 }
1111 catch(const logic_error& e)
1112 {
1113 cerr << e.what() << endl;
1114 }
1115 }
1116 }
1117}
1118
1119}
1120
1121// OpenNN: Open Neural Networks Library.
1122// Copyright(C) 2005-2021 Artificial Intelligence Techniques, SL.
1123//
1124// This library is free software; you can redistribute it and/or
1125// modify it under the terms of the GNU Lesser General Public
1126// License as published by the Free Software Foundation; either
1127// version 2.1 of the License, or any later version.
1128//
1129// This library is distributed in the hope that it will be useful,
1130// but WITHOUT ANY WARRANTY; without even the implied warranty of
1131// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1132// Lesser General Public License for more details.
1133
1134// You should have received a copy of the GNU Lesser General Public
1135// License along with this library; if not, write to the Free Software
1136// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
This class represents the concept of data set for data modelling problems, such as approximation,...
Definition: data_set.h:57
Index get_training_samples_number() const
Returns the number of samples in the data set which will be used for training.
Definition: data_set.cpp:1382
Tensor< Descriptives, 1 > scale_target_variables()
Definition: data_set.cpp:6298
Tensor< Index, 1 > get_training_samples_indices() const
Returns the indices of the samples which will be used for training.
Definition: data_set.cpp:1073
Tensor< Index, 1 > get_selection_samples_indices() const
Returns the indices of the samples which will be used for selection.
Definition: data_set.cpp:1098
void unscale_input_variables(const Tensor< Descriptives, 1 > &)
Definition: data_set.cpp:6351
Tensor< Index, 1 > get_target_variables_indices() const
Returns the indices of the target variables.
Definition: data_set.cpp:3094
Index get_selection_samples_number() const
Returns the number of samples in the data set which will be used for selection.
Definition: data_set.cpp:1402
void unscale_target_variables(const Tensor< Descriptives, 1 > &)
Definition: data_set.cpp:6397
Tensor< string, 1 > get_target_variables_names() const
Definition: data_set.cpp:2215
Tensor< Index, 1 > get_input_variables_indices() const
Returns the indices of the input variables.
Definition: data_set.cpp:3047
Tensor< string, 1 > get_input_variables_names() const
Definition: data_set.cpp:2184
Tensor< Descriptives, 1 > scale_input_variables()
Definition: data_set.cpp:6243
A learning rate that is adjusted according to an algorithm during training to minimize training time.
void from_XML(const tinyxml2::XMLDocument &)
void set_default()
Sets the members of the learning rate algorithm to their default values.
string write_learning_rate_method() const
Returns a string with the name of the learning rate method to be used.
pair< type, type > calculate_directional_point(const DataSetBatch &, NeuralNetworkForwardPropagation &, LossIndexBackPropagation &, OptimizationAlgorithmData &) const
void write_XML(tinyxml2::XMLPrinter &) const
This abstract class represents the concept of loss index composed of an error term and a regularizati...
Definition: loss_index.h:48
virtual string get_error_type() const
Returns a string with the default type of error term, "USER_PERFORMANCE_TERM".
Definition: loss_index.cpp:608
NeuralNetwork * get_neural_network_pointer() const
Returns a pointer to the neural network object associated to the error term.
Definition: loss_index.h:70
DataSet * get_data_set_pointer() const
Returns a pointer to the data set object associated to the error term.
Definition: loss_index.h:92
ScalingLayer * get_scaling_layer_pointer() const
Returns a pointer to the scaling layers object composing this neural network object.
bool has_scaling_layer() const
bool has_unscaling_layer() const
void forward_propagate(const DataSetBatch &, NeuralNetworkForwardPropagation &) const
Calculate forward propagation in neural network.
void save(const string &) const
void set_parameters(Tensor< type, 1 > &)
UnscalingLayer * get_unscaling_layer_pointer() const
Returns a pointer to the unscaling layers object composing this neural network object.
void set_inputs_names(const Tensor< string, 1 > &)
Index get_parameters_number() const
void set_outputs_names(const Tensor< string, 1 > &)
string neural_network_file_name
Path where the neural network is saved.
void set_hardware_use(const string &)
Set hardware to use. Default: Multi-core.
LossIndex * loss_index_pointer
Pointer to a loss index for a neural network object.
bool display
Display messages to screen.
const string write_time(const type &) const
Writes the time from seconds in format HH:mm:ss.
Index save_period
Number of iterations between the training saving progress.
Index epochs_number
Number of training epochs in the neural network.
Index display_period
Number of iterations between the training showing progress.
void set_maximum_selection_failures(const Index &)
void update_parameters(const DataSetBatch &batch, NeuralNetworkForwardPropagation &forward_propagation, LossIndexBackPropagation &back_propagation, QuasiNewtonMehtodData &optimization_data)
QuasiNewtonMethod::update_parameters.
const InverseHessianApproximationMethod & get_inverse_hessian_approximation_method() const
Returns the method for approximating the inverse hessian matrix to be used when training.
void set_loss_index_pointer(LossIndex *)
const type & get_maximum_time() const
Returns the maximum training time.
const type & get_loss_goal() const
void from_XML(const tinyxml2::XMLDocument &)
void calculate_DFP_inverse_hessian(QuasiNewtonMehtodData &) const
void set_default()
Sets the members of the optimization algorithm object to their default values.
InverseHessianApproximationMethod inverse_hessian_approximation_method
Variable containing the actual method used to obtain a suitable learning rate.
const Index & get_maximum_epochs_number() const
Returns the maximum number of epochs for training.
const Tensor< type, 2 > kronecker_product(Tensor< type, 2 > &, Tensor< type, 2 > &) const
Tensor< string, 2 > to_string_matrix() const
Writes as matrix of strings the most representative atributes.
type minimum_loss_decrease
Minimum loss improvement between two successive epochs. It is used as a stopping criterion.
LearningRateAlgorithm * get_learning_rate_algorithm_pointer()
Returns a pointer to the learning rate algorithm object inside the quasi-Newton method object.
const LearningRateAlgorithm & get_learning_rate_algorithm() const
Returns a constant reference to the learning rate algorithm object inside the quasi-Newton method obj...
void set_maximum_time(const type &)
void set_inverse_hessian_approximation_method(const InverseHessianApproximationMethod &)
void calculate_BFGS_inverse_hessian(QuasiNewtonMehtodData &) const
InverseHessianApproximationMethod
Enumeration of the available training operators for obtaining the approximation to the inverse hessia...
LearningRateAlgorithm learning_rate_algorithm
type maximum_time
Maximum training time. It is used as a stopping criterion.
void set_maximum_epochs_number(const Index &)
void set_minimum_loss_decrease(const type &)
void calculate_inverse_hessian_approximation(QuasiNewtonMehtodData &) const
string write_inverse_hessian_approximation_method() const
Returns the name of the method for the approximation of the inverse hessian.
type training_loss_goal
Goal value for the loss. It is used as a stopping criterion.
Index maximum_epochs_number
Maximum number of epochs to perform_training. It is used as a stopping criterion.
void write_XML(tinyxml2::XMLPrinter &) const
const Index & get_maximum_selection_failures() const
Returns the maximum number of selection error increases during the training process.
const type & get_minimum_loss_decrease() const
Returns the minimum loss improvement during training.
This class represents a layer of scaling neurons.
Definition: scaling_layer.h:38
void set()
Sets the scaling layer to be empty.
This class represents a layer of unscaling neurons.
void set()
Sets the unscaling layer to be empty.
void PushText(const char *text, bool cdata=false)
Add a text node.
Definition: tinyxml2.cpp:2878
virtual void CloseElement(bool compactMode=false)
If streaming, close the Element.
Definition: tinyxml2.cpp:2834
HALF_CONSTEXPR half abs(half arg)
Definition: half.hpp:2735
This structure contains the optimization algorithm results.
Tensor< type, 1 > selection_error_history
History of the selection error over the training iterations.
void resize_training_error_history(const Index &)
Resizes the training error history keeping the values.
OptimizationAlgorithm::StoppingCondition stopping_condition
Stopping condition of the algorithm.
void resize_selection_error_history(const Index &)
Resizes the selection error history keeping the values.
Tensor< type, 1 > training_error_history
History of the loss function loss over the training iterations.
string elapsed_time
Elapsed time of the training process.