conjugate_gradient.cpp
1// OpenNN: Open Neural Networks Library
2// www.opennn.net
3//
4// C O N J U G A T E G R A D I E N T C L A S S
5//
6// Artificial Intelligence Techniques SL
7// artelnics@artelnics.com
8
9#include "conjugate_gradient.h"
10#include "training_strategy.h"
11
12namespace OpenNN
13{
14
18
21{
23}
24
25
30
32 : OptimizationAlgorithm(new_loss_index_pointer)
33{
34 learning_rate_algorithm.set_loss_index_pointer(new_loss_index_pointer);
35
37}
38
39
41
43{
44}
45
46
48
50{
52}
53
54
56
58{
60}
61
62
64
66{
68}
69
70
72
74{
76 {
77 case TrainingDirectionMethod::PR:
78 return "PR";
79
80 case TrainingDirectionMethod::FR:
81 return "FR";
82 }
83
84 return string();
85}
86
87
89
91{
93}
94
95
98
100{
101 return training_loss_goal;
102}
103
104
106
108{
110}
111
112
114
116{
118}
119
120
122
124{
125 return maximum_time;
126}
127
128
132
134{
135 loss_index_pointer = new_loss_index_pointer;
136
137 learning_rate_algorithm.set_loss_index_pointer(new_loss_index_pointer);
138}
139
140
143
145(const ConjugateGradient::TrainingDirectionMethod& new_training_direction_method)
146{
147 training_direction_method = new_training_direction_method;
148}
149
150
158
159void ConjugateGradient::set_training_direction_method(const string& new_training_direction_method_name)
160{
161 if(new_training_direction_method_name == "PR")
162 {
163 training_direction_method = TrainingDirectionMethod::PR;
164
165 }
166 else if(new_training_direction_method_name == "FR")
167 {
168 training_direction_method = TrainingDirectionMethod::FR;
169 }
170 else
171 {
172 ostringstream buffer;
173
174 buffer << "OpenNN Exception: ConjugateGradient class.\n"
175 << "void set_training_direction_method(const string&) method.\n"
176 << "Unknown training direction method: " << new_training_direction_method_name << ".\n";
177
178 throw logic_error(buffer.str());
179 }
180}
181
182
215
217{
218 // Stopping criteria
219
220 minimum_loss_decrease = type(0);
221 training_loss_goal = type(0);
223
225 maximum_time = type(3600.0);
226
227 // UTILITIES
228
229 display_period = 10;
230
231 training_direction_method = TrainingDirectionMethod::FR;
232}
233
234
237
238void ConjugateGradient::set_minimum_loss_decrease(const type& new_minimum_loss_decrease)
239{
240 minimum_loss_decrease = new_minimum_loss_decrease;
241}
242
243
247
248void ConjugateGradient::set_loss_goal(const type& new_loss_goal)
249{
250 training_loss_goal = new_loss_goal;
251}
252
253
256
257void ConjugateGradient::set_maximum_selection_failures(const Index& new_maximum_selection_failures)
258{
259 maximum_selection_failures = new_maximum_selection_failures;
260}
261
262
265
266void ConjugateGradient::set_maximum_epochs_number(const Index& new_maximum_epochs_number)
267{
268 maximum_epochs_number = new_maximum_epochs_number;
269}
270
271
274
275void ConjugateGradient::set_maximum_time(const type& new_maximum_time)
276{
277#ifdef OPENNN_DEBUG
278
279 if(new_maximum_time < static_cast<type>(0.0))
280 {
281 ostringstream buffer;
282
283 buffer << "OpenNN Exception: ConjugateGradient class.\n"
284 << "void set_maximum_time(const type&) method.\n"
285 << "Maximum time must be equal or greater than 0.\n";
286
287 throw logic_error(buffer.str());
288 }
289
290#endif
291
292 // Set maximum time
293
294 maximum_time = new_maximum_time;
295}
296
297
301
302void ConjugateGradient::set_save_period(const Index& new_save_period)
303{
304#ifdef OPENNN_DEBUG
305
306 if(new_save_period <= 0)
307 {
308 ostringstream buffer;
309
310 buffer << "OpenNN Exception: ConjugateGradient class.\n"
311 << "void set_save_period(const type&) method.\n"
312 << "Save period must be greater than 0.\n";
313
314 throw logic_error(buffer.str());
315 }
316
317#endif
318
319 save_period = new_save_period;
320}
321
322
326
327type ConjugateGradient::calculate_FR_parameter(const Tensor<type, 1>& old_gradient, const Tensor<type, 1>& gradient) const
328{
329#ifdef OPENNN_DEBUG
330
331 ostringstream buffer;
332
334 {
335 buffer << "OpenNN Exception: ConjugateGradient class.\n"
336 << "type calculate_FR_parameter(const Tensor<type, 1>&, const Tensor<type, 1>&) const method.\n"
337
338 << "Loss index pointer is nullptr.\n";
339
340 throw logic_error(buffer.str());
341 }
342
343 const NeuralNetwork* neural_network_pointer = loss_index_pointer->get_neural_network_pointer();
344
345 const Index parameters_number = neural_network_pointer->get_parameters_number();
346
347 const Index old_gradient_size = old_gradient.size();
348
349 if(old_gradient_size != parameters_number)
350 {
351 buffer << "OpenNN Exception: ConjugateGradient class.\n"
352 << "type calculate_FR_parameter(const Tensor<type, 1>&, const Tensor<type, 1>&) const method.\n"
353 << "Size of old gradient(" << old_gradient_size << ") is not equal to number of parameters(" << parameters_number << ").\n";
354
355 throw logic_error(buffer.str());
356 }
357
358 const Index gradient_size = gradient.size();
359
360 if(gradient_size != parameters_number)
361 {
362 buffer << "OpenNN Exception: ConjugateGradient class.\n"
363 << "type calculate_FR_parameter(const Tensor<type, 1>&, const Tensor<type, 1>&) const method.\n"
364 << "Size of gradient(" << gradient_size << ") is not equal to number of parameters(" << parameters_number << ").\n";
365
366 throw logic_error(buffer.str());
367 }
368
369#endif
370
371 type FR_parameter = type(0);
372
373 Tensor<type, 0> numerator;
374 Tensor<type, 0> denominator;
375
376 numerator.device(*thread_pool_device) = gradient.contract(gradient, AT_B);
377 denominator.device(*thread_pool_device) = old_gradient.contract(old_gradient, AT_B);
378
379 // Prevent a possible division by 0
380
381 if(abs(denominator(0)) < type(NUMERIC_LIMITS_MIN))
382 {
383 FR_parameter = type(0);
384 }
385 else
386 {
387 FR_parameter = numerator(0)/denominator(0);
388 }
389
390 // Bound the Fletcher-Reeves parameter between 0 and 1
391
392 if(FR_parameter < static_cast<type>(0.0))
393 {
394 FR_parameter = type(0);
395 }
396 else if(FR_parameter > static_cast<type>(1.0))
397 {
398 FR_parameter = type(1);
399 }
400
401 return FR_parameter;
402}
403
404
408
409type ConjugateGradient::calculate_PR_parameter(const Tensor<type, 1>& old_gradient, const Tensor<type, 1>& gradient) const
410{
411#ifdef OPENNN_DEBUG
412
413 ostringstream buffer;
414
416 {
417 buffer << "OpenNN Exception: ConjugateGradient class.\n"
418 << "type calculate_PR_parameter(const Tensor<type, 1>&, const Tensor<type, 1>&) const method.\n"
419 << "Loss index pointer is nullptr.\n";
420
421 throw logic_error(buffer.str());
422 }
423
424 const NeuralNetwork* neural_network_pointer = loss_index_pointer->get_neural_network_pointer();
425
426 const Index parameters_number = neural_network_pointer->get_parameters_number();
427
428 const Index old_gradient_size = old_gradient.size();
429
430 if(old_gradient_size != parameters_number)
431 {
432 buffer << "OpenNN Exception: ConjugateGradient class.\n"
433 << "type calculate_PR_parameter(const Tensor<type, 1>&, const Tensor<type, 1>&) const method.\n"
434 << "Size of old gradient(" << old_gradient_size << ") is not equal to number of parameters(" << parameters_number << ").\n";
435
436 throw logic_error(buffer.str());
437 }
438
439 const Index gradient_size = gradient.size();
440
441 if(gradient_size != parameters_number)
442 {
443 buffer << "OpenNN Exception: ConjugateGradient class.\n"
444 << "type calculate_PR_parameter(const Tensor<type, 1>&, const Tensor<type, 1>&) const method.\n"
445 << "Size of gradient(" << gradient_size << ") is not equal to number of parameters(" << parameters_number << ").\n";
446
447 throw logic_error(buffer.str());
448 }
449
450#endif
451
452 type PR_parameter = type(0);
453
454 Tensor<type, 0> numerator;
455 Tensor<type, 0> denominator;
456
457 numerator.device(*thread_pool_device) = (gradient-old_gradient).contract(gradient, AT_B);
458 denominator.device(*thread_pool_device) = old_gradient.contract(old_gradient, AT_B);
459
460 // Prevent a possible division by 0
461
462 if(abs(denominator(0)) < type(NUMERIC_LIMITS_MIN))
463 {
464 PR_parameter = type(0);
465 }
466 else
467 {
468 PR_parameter = numerator(0)/denominator(0);
469 }
470
471 // Bound the Polak-Ribiere parameter between 0 and 1
472
473 if(PR_parameter < static_cast<type>(0.0))
474 {
475 PR_parameter = type(0);
476 }
477 else if(PR_parameter > static_cast<type>(1.0))
478 {
479 PR_parameter = type(1);
480 }
481
482 return PR_parameter;
483
484}
485
486
491
492void ConjugateGradient::calculate_PR_training_direction(const Tensor<type, 1>& old_gradient,
493 const Tensor<type, 1>& gradient,
494 const Tensor<type, 1>& old_training_direction,
495 Tensor<type, 1>& training_direction) const
496{
497#ifdef OPENNN_DEBUG
498
499 ostringstream buffer;
500
502 {
503 buffer << "OpenNN Exception: ConjugateGradient class.\n"
504 << "void calculate_PR_training_direction() const method.\n"
505 << "Loss index pointer is nullptr.\n";
506
507 throw logic_error(buffer.str());
508 }
509
510 const NeuralNetwork* neural_network_pointer = loss_index_pointer->get_neural_network_pointer();
511
512 const Index parameters_number = neural_network_pointer->get_parameters_number();
513
514 const Index old_gradient_size = old_gradient.size();
515
516 if(old_gradient_size != parameters_number)
517 {
518 buffer << "OpenNN Exception: ConjugateGradient class.\n"
519 << "void calculate_PR_training_direction() const method.\n"
520 << "Size of old gradient(" << old_gradient_size << ") is not equal to number of parameters(" << parameters_number << ").\n";
521
522 throw logic_error(buffer.str());
523 }
524
525 const Index gradient_size = gradient.size();
526
527 if(gradient_size != parameters_number)
528 {
529 buffer << "OpenNN Exception: ConjugateGradient class.\n"
530 << "void calculate_PR_training_direction() const method.\n"
531 << "Size of gradient(" << gradient_size << ") is not equal to number of parameters(" << parameters_number << ").\n";
532
533 throw logic_error(buffer.str());
534 }
535
536 const Index old_training_direction_size = old_training_direction.size();
537
538 if(old_training_direction_size != parameters_number)
539 {
540 buffer << "OpenNN Exception: ConjugateGradient class.\n"
541 << "void calculate_PR_training_direction() const method.\n"
542 << "Size of old training direction(" << old_training_direction_size
543 << ") is not equal to number of parameters(" << parameters_number << ").\n";
544
545 throw logic_error(buffer.str());
546 }
547
548#endif
549
550 const type PR_parameter = calculate_PR_parameter(old_gradient, gradient);
551
552 training_direction.device(*thread_pool_device) = -gradient + old_training_direction*PR_parameter;
553}
554
555
560
561void ConjugateGradient::calculate_FR_training_direction(const Tensor<type, 1>& old_gradient,
562 const Tensor<type, 1>& gradient,
563 const Tensor<type, 1>& old_training_direction,
564 Tensor<type, 1>& training_direction) const
565{
566#ifdef OPENNN_DEBUG
567
568 ostringstream buffer;
569
571 {
572 buffer << "OpenNN Exception: ConjugateGradient class.\n"
573 << "void calculate_FR_training_direction() const method.\n"
574 << "Loss index pointer is nullptr.\n";
575
576 throw logic_error(buffer.str());
577 }
578
579 const NeuralNetwork* neural_network_pointer = loss_index_pointer->get_neural_network_pointer();
580
581 const Index parameters_number = neural_network_pointer->get_parameters_number();
582
583 const Index old_gradient_size = old_gradient.size();
584
585 if(old_gradient_size != parameters_number)
586 {
587 buffer << "OpenNN Exception: ConjugateGradient class.\n"
588 << "void calculate_FR_training_direction() const method.\n"
589 << "Size of old gradient (" << old_gradient_size << ") is not equal to number of parameters (" << parameters_number << ").\n";
590
591 throw logic_error(buffer.str());
592 }
593
594 const Index gradient_size = gradient.size();
595
596 if(gradient_size != parameters_number)
597 {
598 buffer << "OpenNN Exception: ConjugateGradient class.\n"
599 << "void calculate_FR_training_direction() const method.\n"
600 << "Size of gradient (" << gradient_size << ") is not equal to number of parameters (" << parameters_number << ").\n";
601
602 throw logic_error(buffer.str());
603 }
604
605 const Index old_training_direction_size = old_training_direction.size();
606
607 if(old_training_direction_size != parameters_number)
608 {
609 buffer << "OpenNN Exception: ConjugateGradient class.\n"
610 << "void calculate_FR_training_direction() const method.\n"
611 << "Size of old training direction (" << old_training_direction_size
612 << ") is not equal to number of parameters (" << parameters_number << ").\n";
613
614 throw logic_error(buffer.str());
615 }
616
617#endif
618
619 const type FR_parameter = calculate_FR_parameter(old_gradient, gradient);
620
621 training_direction.device(*thread_pool_device) = -gradient + old_training_direction*FR_parameter;
622}
623
624
626// \brief ConjugateGradient::calculate_gradient_descent_training_direction
627// \param gradient
628// \param training_direction
629
630void ConjugateGradient::calculate_gradient_descent_training_direction(const Tensor<type, 1>& gradient,
631 Tensor<type, 1>& training_direction) const
632{
633 training_direction.device(*thread_pool_device) = -gradient;
634}
635
640
642 const Tensor<type, 1>& gradient,
643 const Tensor<type, 1>& old_training_direction,
644 Tensor<type, 1>& training_direction) const
645{
646
647#ifdef OPENNN_DEBUG
648 const NeuralNetwork* neural_network_pointer = loss_index_pointer->get_neural_network_pointer();
649
650 const Index parameters_number = neural_network_pointer->get_parameters_number();
651
652 ostringstream buffer;
653
655 {
656 buffer << "OpenNN Exception: ConjugateGradient class.\n"
657 << "void calculate_training_direction() const method.\n"
658 << "Loss index pointer is nullptr.\n";
659
660 throw logic_error(buffer.str());
661 }
662
663 const Index old_gradient_size = old_gradient.size();
664
665 if(old_gradient_size != parameters_number)
666 {
667 buffer << "OpenNN Exception: ConjugateGradient class.\n"
668 << "void calculate_training_direction() const method.\n"
669 << "Size of old gradient (" << old_gradient_size << ") is not equal to number of parameters (" << parameters_number << ").\n";
670
671 throw logic_error(buffer.str());
672 }
673
674 const Index gradient_size = gradient.size();
675
676 if(gradient_size != parameters_number)
677 {
678 buffer << "OpenNN Exception: ConjugateGradient class.\n"
679 << "void calculate_training_direction() const method.\n"
680 << "Size of gradient (" << gradient_size << ") is not equal to number of parameters (" << parameters_number << ").\n";
681
682 throw logic_error(buffer.str());
683 }
684
685 const Index old_training_direction_size = old_training_direction.size();
686
687 if(old_training_direction_size != parameters_number)
688 {
689 buffer << "OpenNN Exception: ConjugateGradient class.\n"
690 << "void calculate_training_direction() const method.\n"
691 << "Size of old training direction (" << old_training_direction_size
692 << ") is not equal to number of parameters (" << parameters_number << ").\n";
693
694 throw logic_error(buffer.str());
695 }
696
697#endif
698
700 {
701 case TrainingDirectionMethod::FR:
702 calculate_FR_training_direction(old_gradient, gradient, old_training_direction, training_direction);
703
704 return;
705 case TrainingDirectionMethod::PR:
706 calculate_PR_training_direction(old_gradient, gradient, old_training_direction, training_direction);
707 return;
708 }
709}
710
711
714
716{
717 check();
718
719 // Start training
720
721 if(display) cout << "Training with conjugate gradient...\n";
722
724
725 // Elapsed time
726
727 time_t beginning_time, current_time;
728 time(&beginning_time);
729 type elapsed_time = type(0);
730
731 // Data set
732
733 DataSet* data_set_pointer = loss_index_pointer->get_data_set_pointer();
734
735 const Index training_samples_number = data_set_pointer->get_training_samples_number();
736 const Index selection_samples_number = data_set_pointer->get_selection_samples_number();
737 const bool has_selection = data_set_pointer->has_selection();
738
739 const Tensor<Index, 1> training_samples_indices = data_set_pointer->get_training_samples_indices();
740 const Tensor<Index, 1> selection_samples_indices = data_set_pointer->get_selection_samples_indices();
741
742 const Tensor<Index, 1> input_variables_indices = data_set_pointer->get_input_variables_indices();
743 const Tensor<Index, 1> target_variables_indices = data_set_pointer->get_target_variables_indices();
744
745 const Tensor<string, 1> inputs_names = data_set_pointer->get_input_variables_names();
746 const Tensor<string, 1> targets_names = data_set_pointer->get_target_variables_names();
747
748 const Tensor<Scaler, 1> input_variables_scalers = data_set_pointer->get_input_variables_scalers();
749 const Tensor<Scaler, 1> target_variables_scalers = data_set_pointer->get_target_variables_scalers();
750
751 const Tensor<Descriptives, 1> input_variables_descriptives = data_set_pointer->scale_input_variables();
752 Tensor<Descriptives, 1> target_variables_descriptives;
753
754 // Neural network
755
757
758 if(neural_network_pointer->has_scaling_layer())
759 {
760 ScalingLayer* scaling_layer_pointer = neural_network_pointer->get_scaling_layer_pointer();
761 scaling_layer_pointer->set(input_variables_descriptives, input_variables_scalers);
762 }
763
764 if(neural_network_pointer->has_unscaling_layer())
765 {
766 target_variables_descriptives = data_set_pointer->scale_target_variables();
767
768 UnscalingLayer* unscaling_layer_pointer = neural_network_pointer->get_unscaling_layer_pointer();
769 unscaling_layer_pointer->set(target_variables_descriptives, target_variables_scalers);
770 }
771
772 DataSetBatch training_batch(training_samples_number, data_set_pointer);
773 training_batch.fill(training_samples_indices, input_variables_indices, target_variables_indices);
774
775 DataSetBatch selection_batch(selection_samples_number, data_set_pointer);
776 selection_batch.fill(selection_samples_indices, input_variables_indices, target_variables_indices);
777
778 NeuralNetworkForwardPropagation training_forward_propagation(training_samples_number, neural_network_pointer);
779 NeuralNetworkForwardPropagation selection_forward_propagation(selection_samples_number, neural_network_pointer);
780
781 // Loss index
782
783 string information;
784
785 loss_index_pointer->set_normalization_coefficient();
786
787 LossIndexBackPropagation training_back_propagation(training_samples_number, loss_index_pointer);
788 LossIndexBackPropagation selection_back_propagation(selection_samples_number, loss_index_pointer);
789
790 // Optimization algorithm
791
792 type old_loss = type(0);
793 type loss_decrease = numeric_limits<type>::max();
794
795 bool stop_training = false;
796
797 Index selection_failures = 0;
798
799 ConjugateGradientData optimization_data(this);
800
801 // Main loop
802
803 for(Index epoch = 0; epoch <= maximum_epochs_number; epoch++)
804 {
805 if(display && epoch%display_period == 0) cout << "Epoch: " << epoch << endl;
806
807 optimization_data.epoch = epoch;
808
809 // Neural network
810
811 neural_network_pointer->forward_propagate(training_batch, training_forward_propagation);
812
813 // Loss index
814
815 loss_index_pointer->back_propagate(training_batch, training_forward_propagation, training_back_propagation);
816 results.training_error_history(epoch) = training_back_propagation.error;
817
818 if(has_selection)
819 {
820 neural_network_pointer->forward_propagate(selection_batch, selection_forward_propagation);
821
822 loss_index_pointer->calculate_errors(selection_batch, selection_forward_propagation, selection_back_propagation);
823 loss_index_pointer->calculate_error(selection_batch, selection_forward_propagation, selection_back_propagation);
824
825 results.selection_error_history(epoch) = selection_back_propagation.error;
826
827 if(epoch != 0 && results.selection_error_history(epoch) > results.selection_error_history(epoch-1)) selection_failures++;
828 }
829
830 // Optimization algorithm
831
832 time(&current_time);
833 elapsed_time = static_cast<type>(difftime(current_time, beginning_time));
834
835 if(display && epoch%display_period == 0)
836 {
837 cout << "Training error: " << training_back_propagation.error << endl;
838 if(has_selection) cout << "Selection error: " << selection_back_propagation.error << endl;
839 cout << "Learning rate: " << optimization_data.learning_rate << endl;
840 cout << "Elapsed time: " << write_time(elapsed_time) << endl;
841 }
842
843 // Stopping Criteria
844
845 if(training_back_propagation.loss <= training_loss_goal)
846 {
847 if(display) cout << "Epoch " << epoch << endl << "Loss goal reached: " << training_back_propagation.loss << endl;
848
849 stop_training = true;
850
851 results.stopping_condition = StoppingCondition::LossGoal;
852 }
853
854 if(has_selection && selection_failures >= maximum_selection_failures)
855 {
856 if(display) cout << "Epoch " << epoch << endl << "Maximum selection failures reached: " << selection_failures << endl;
857
858 stop_training = true;
859
860 results.stopping_condition = StoppingCondition::MaximumSelectionErrorIncreases;
861 }
862
863 if(epoch == maximum_epochs_number)
864 {
865 if(display) cout << "Epoch " << epoch << endl << "Maximum number of epochs reached: " << epoch << endl;;
866
867 stop_training = true;
868
869 results.stopping_condition = StoppingCondition::MaximumEpochsNumber;
870 }
871
872 if(elapsed_time >= maximum_time)
873 {
874 if(display) cout << "Epoch " << epoch << endl << "Maximum training time reached: " << write_time(elapsed_time) << endl;
875
876 stop_training = true;
877
878 results.stopping_condition = StoppingCondition::MaximumTime;
879 }
880
881 if(epoch != 0) loss_decrease = old_loss - training_back_propagation.loss;
882
883 if(loss_decrease <= minimum_loss_decrease)
884 {
885 if(display) cout << "Epoch " << epoch << endl << "Minimum loss decrease reached: " << minimum_loss_decrease << endl;
886
887 stop_training = true;
888
889 results.stopping_condition = StoppingCondition::MinimumLossDecrease;
890 }
891
892 old_loss = training_back_propagation.loss;
893
894 if(stop_training)
895 {
896 results.resize_training_error_history(epoch+1);
897 if(has_selection) results.resize_selection_error_history(epoch+1);
898 else results.resize_selection_error_history(0);
899
900 results.elapsed_time = write_time(elapsed_time);
901
902 break;
903 }
904
905 // Update stuff
906
907 if(epoch != 0 && epoch%save_period == 0) neural_network_pointer->save(neural_network_file_name);
908
909 update_parameters(training_batch, training_forward_propagation, training_back_propagation, optimization_data);
910 }
911
912 data_set_pointer->unscale_input_variables(input_variables_descriptives);
913
914 if(neural_network_pointer->has_unscaling_layer())
915 data_set_pointer->unscale_target_variables(target_variables_descriptives);
916
917 if(display) results.print();
918
919 return results;
920}
921
922
924
926{
927 return "CONJUGATE_GRADIENT";
928}
929
930
932
933Tensor<string, 2> ConjugateGradient::to_string_matrix() const
934{
935 Tensor<string, 2> labels_values(8, 2);
936
937 // Training direction method
938
939 labels_values(0,0) = "Training direction method";
940 labels_values(0,1) = write_training_direction_method();
941
942 // Learning rate method
943
944 labels_values(1,0) = "Learning rate method";
946
947 // Learning rate tolerance
948
949 labels_values(2,0) = "Learning rate tolerance";
950 labels_values(2,1) = to_string(double(learning_rate_algorithm.get_learning_rate_tolerance()));
951
952 // Minimum loss decrease
953
954 labels_values(3,0) = "Minimum loss decrease";
955 labels_values(3,1) = to_string(double(minimum_loss_decrease));
956
957 // Loss goal
958
959 labels_values(4,0) = "Loss goal";
960 labels_values(4,1) = to_string(double(training_loss_goal));
961
962 // Maximum selection error increases
963
964 labels_values(5,0) = "Maximum selection error increases";
965 labels_values(5,1) = to_string(maximum_selection_failures);
966
967 // Maximum epochs number
968
969 labels_values(6,0) = "Maximum epochs number";
970 labels_values(6,1) = to_string(maximum_epochs_number);
971
972 // Maximum time
973
974 labels_values(7,0) = "Maximum time";
975 labels_values(7,1) = write_time(maximum_time);
976
977 return labels_values;
978}
979
980
983
985{
986 ostringstream buffer;
987
988 file_stream.OpenElement("ConjugateGradient");
989
990 // Training direction method
991
992 {
993 file_stream.OpenElement("TrainingDirectionMethod");
994
995 file_stream.PushText(write_training_direction_method().c_str());
996
997 file_stream.CloseElement();
998 }
999
1000 // Learning rate algorithm
1001
1003
1004 // Minimum loss decrease
1005
1006 {
1007 file_stream.OpenElement("MinimumLossDecrease");
1008
1009 buffer.str("");
1010 buffer << minimum_loss_decrease;
1011
1012 file_stream.PushText(buffer.str().c_str());
1013
1014 file_stream.CloseElement();
1015 }
1016
1017 // Loss goal
1018
1019 {
1020 file_stream.OpenElement("LossGoal");
1021
1022 buffer.str("");
1023 buffer << training_loss_goal;
1024
1025 file_stream.PushText(buffer.str().c_str());
1026
1027 file_stream.CloseElement();
1028 }
1029
1030 // Maximum selection error increases
1031 {
1032 file_stream.OpenElement("MaximumSelectionErrorIncreases");
1033
1034 buffer.str("");
1036
1037 file_stream.PushText(buffer.str().c_str());
1038
1039 file_stream.CloseElement();
1040 }
1041
1042 // Maximum iterations number
1043 {
1044 file_stream.OpenElement("MaximumEpochsNumber");
1045
1046 buffer.str("");
1047 buffer << maximum_epochs_number;
1048
1049 file_stream.PushText(buffer.str().c_str());
1050
1051 file_stream.CloseElement();
1052 }
1053
1054 // Maximum time
1055
1056 {
1057 file_stream.OpenElement("MaximumTime");
1058
1059 buffer.str("");
1060 buffer << maximum_time;
1061
1062 file_stream.PushText(buffer.str().c_str());
1063
1064 file_stream.CloseElement();
1065 }
1066
1067 // Hardware use
1068 {
1069 file_stream.OpenElement("HardwareUse");
1070
1071 buffer.str("");
1072 buffer << hardware_use;
1073
1074 file_stream.PushText(buffer.str().c_str());
1075
1076 file_stream.CloseElement();
1077 }
1078
1079 file_stream.CloseElement();
1080}
1081
1082
1085
1087{
1088 const tinyxml2::XMLElement* root_element = document.FirstChildElement("ConjugateGradient");
1089
1090 if(!root_element)
1091 {
1092 ostringstream buffer;
1093
1094 buffer << "OpenNN Exception: ConjugateGradient class.\n"
1095 << "void from_XML(const tinyxml2::XMLDocument&) method.\n"
1096 << "Conjugate gradient element is nullptr.\n";
1097
1098 throw logic_error(buffer.str());
1099 }
1100
1101 // Training direction method
1102 {
1103 const tinyxml2::XMLElement* training_direction_method_element = root_element->FirstChildElement("TrainingDirectionMethod");
1104
1105 if(training_direction_method_element)
1106 {
1107 const string new_training_direction_method = training_direction_method_element->GetText();
1108
1109 try
1110 {
1111 set_training_direction_method(new_training_direction_method);
1112 }
1113 catch(const logic_error& e)
1114 {
1115 cerr << e.what() << endl;
1116 }
1117 }
1118 }
1119
1120 // Learning rate algorithm
1121 {
1122 const tinyxml2::XMLElement* learning_rate_algorithm_element = root_element->FirstChildElement("LearningRateAlgorithm");
1123
1124 if(learning_rate_algorithm_element)
1125 {
1126 tinyxml2::XMLDocument learning_rate_algorithm_document;
1127 tinyxml2::XMLNode* element_clone;
1128
1129 element_clone = learning_rate_algorithm_element->DeepClone(&learning_rate_algorithm_document);
1130
1131 learning_rate_algorithm_document.InsertFirstChild(element_clone);
1132
1133 learning_rate_algorithm.from_XML(learning_rate_algorithm_document);
1134 }
1135 }
1136
1137 // Minimum loss decrease
1138 {
1139 const tinyxml2::XMLElement* minimum_loss_decrease_element = root_element->FirstChildElement("MinimumLossDecrease");
1140
1141 if(minimum_loss_decrease_element)
1142 {
1143 const type new_minimum_loss_decrease = static_cast<type>(atof(minimum_loss_decrease_element->GetText()));
1144
1145 try
1146 {
1147 set_minimum_loss_decrease(new_minimum_loss_decrease);
1148 }
1149 catch(const logic_error& e)
1150 {
1151 cerr << e.what() << endl;
1152 }
1153 }
1154 }
1155
1156 // Loss goal
1157 {
1158 const tinyxml2::XMLElement* loss_goal_element = root_element->FirstChildElement("LossGoal");
1159
1160 if(loss_goal_element)
1161 {
1162 const type new_loss_goal = static_cast<type>(atof(loss_goal_element->GetText()));
1163
1164 try
1165 {
1166 set_loss_goal(new_loss_goal);
1167 }
1168 catch(const logic_error& e)
1169 {
1170 cerr << e.what() << endl;
1171 }
1172 }
1173 }
1174
1175 // Maximum selection error increases
1176 {
1177 const tinyxml2::XMLElement* maximum_selection_failures_element = root_element->FirstChildElement("MaximumSelectionErrorIncreases");
1178
1179 if(maximum_selection_failures_element)
1180 {
1181 const Index new_maximum_selection_failures = static_cast<Index>(atoi(maximum_selection_failures_element->GetText()));
1182
1183 try
1184 {
1185 set_maximum_selection_failures(new_maximum_selection_failures);
1186 }
1187 catch(const logic_error& e)
1188 {
1189 cerr << e.what() << endl;
1190 }
1191 }
1192 }
1193
1194 // Maximum epochs number
1195 {
1196 const tinyxml2::XMLElement* maximum_iterations_number_element = root_element->FirstChildElement("MaximumEpochsNumber");
1197
1198 if(maximum_iterations_number_element)
1199 {
1200 const Index new_maximum_iterations_number = static_cast<Index>(atoi(maximum_iterations_number_element->GetText()));
1201
1202 try
1203 {
1204 set_maximum_epochs_number(new_maximum_iterations_number);
1205 }
1206 catch(const logic_error& e)
1207 {
1208 cerr << e.what() << endl;
1209 }
1210 }
1211 }
1212
1213 // Maximum time
1214 {
1215 const tinyxml2::XMLElement* maximum_time_element = root_element->FirstChildElement("MaximumTime");
1216
1217 if(maximum_time_element)
1218 {
1219 const type new_maximum_time = static_cast<type>(atof(maximum_time_element->GetText()));
1220
1221 try
1222 {
1223 set_maximum_time(new_maximum_time);
1224 }
1225 catch(const logic_error& e)
1226 {
1227 cerr << e.what() << endl;
1228 }
1229 }
1230 }
1231
1232 // Display period
1233 {
1234 const tinyxml2::XMLElement* display_period_element = root_element->FirstChildElement("DisplayPeriod");
1235
1236 if(display_period_element)
1237 {
1238 const Index new_display_period = static_cast<Index>(atoi(display_period_element->GetText()));
1239
1240 try
1241 {
1242 set_display_period(new_display_period);
1243 }
1244 catch(const logic_error& e)
1245 {
1246 cerr << e.what() << endl;
1247 }
1248 }
1249 }
1250
1251 // Save period
1252 {
1253 const tinyxml2::XMLElement* element = root_element->FirstChildElement("SavePeriod");
1254
1255 if(element)
1256 {
1257 const Index new_save_period = static_cast<Index>(atoi(element->GetText()));
1258
1259 try
1260 {
1261 set_save_period(new_save_period);
1262 }
1263 catch(const logic_error& e)
1264 {
1265 cerr << e.what() << endl;
1266 }
1267 }
1268 }
1269
1270 // Neural network file name
1271 {
1272 const tinyxml2::XMLElement* element = root_element->FirstChildElement("NeuralNetworkFileName");
1273
1274 if(element)
1275 {
1276 const string new_neural_network_file_name = element->GetText();
1277
1278 try
1279 {
1280 set_neural_network_file_name(new_neural_network_file_name);
1281 }
1282 catch(const logic_error& e)
1283 {
1284 cerr << e.what() << endl;
1285 }
1286 }
1287 }
1288
1289 // Display
1290 {
1291 const tinyxml2::XMLElement* display_element = root_element->FirstChildElement("Display");
1292
1293 if(display_element)
1294 {
1295 const string new_display = display_element->GetText();
1296
1297 try
1298 {
1299 set_display(new_display != "0");
1300 }
1301 catch(const logic_error& e)
1302 {
1303 cerr << e.what() << endl;
1304 }
1305 }
1306 }
1307
1308 // Hardware use
1309 {
1310 const tinyxml2::XMLElement* element = root_element->FirstChildElement("HardwareUse");
1311
1312 if(element)
1313 {
1314 const string new_hardware_use = element->GetText();
1315
1316 try
1317 {
1318 set_hardware_use(new_hardware_use);
1319 }
1320 catch(const logic_error& e)
1321 {
1322 cerr << e.what() << endl;
1323 }
1324 }
1325 }
1326}
1327
1328
1334
1336 const DataSetBatch& batch,
1337 NeuralNetworkForwardPropagation& forward_propagation,
1338 LossIndexBackPropagation& back_propagation,
1339 ConjugateGradientData& optimization_data)
1340{
1341 const Index parameters_number = back_propagation.parameters.dimension(0);
1342
1343 if(optimization_data.epoch == 0 || optimization_data.epoch % parameters_number == 0)
1344 {
1345 calculate_gradient_descent_training_direction(
1346 back_propagation.gradient,
1347 optimization_data.training_direction);
1348 }
1349 else
1350 {
1352 optimization_data.old_gradient,
1353 back_propagation.gradient,
1354 optimization_data.old_training_direction,
1355 optimization_data.training_direction);
1356 }
1357
1358 optimization_data.training_slope.device(*thread_pool_device)
1359 = (back_propagation.gradient).contract(optimization_data.training_direction, AT_B);
1360
1361 if(optimization_data.training_slope(0) >= type(0))
1362 {
1363 calculate_gradient_descent_training_direction(
1364 back_propagation.gradient,
1365 optimization_data.training_direction);
1366 }
1367
1368 // Get initial learning rate
1369
1370 optimization_data.epoch == 0
1371 ? optimization_data.initial_learning_rate = first_learning_rate
1372 : optimization_data.initial_learning_rate = optimization_data.old_learning_rate;
1373
1374 pair<type,type> directional_point = learning_rate_algorithm.calculate_directional_point(
1375 batch,
1376 forward_propagation,
1377 back_propagation,
1378 optimization_data);
1379
1380 optimization_data.learning_rate = directional_point.first;
1381 back_propagation.loss = directional_point.second;
1382
1383 if(abs(optimization_data.learning_rate) > type(0))
1384 {
1385 optimization_data.parameters_increment.device(*thread_pool_device)
1386 = optimization_data.training_direction*optimization_data.learning_rate;
1387
1388 back_propagation.parameters.device(*thread_pool_device) += optimization_data.parameters_increment;
1389 }
1390 else
1391 {
1392 const Index parameters_number = back_propagation.parameters.size();
1393
1394 for(Index i = 0; i < parameters_number; i++)
1395 {
1396 if(abs(back_propagation.gradient(i)) < type(NUMERIC_LIMITS_MIN))
1397 {
1398 optimization_data.parameters_increment(i) = type(0);
1399 }
1400 else if(back_propagation.gradient(i) > type(0))
1401 {
1402 back_propagation.parameters(i) -= numeric_limits<type>::epsilon();;
1403
1404 optimization_data.parameters_increment(i) = -numeric_limits<type>::epsilon();
1405 }
1406 else if(back_propagation.gradient(i) < type(0))
1407 {
1408 back_propagation.parameters(i) += numeric_limits<type>::epsilon();;
1409
1410 optimization_data.parameters_increment(i) = numeric_limits<type>::epsilon();
1411 }
1412 }
1413
1414 optimization_data.learning_rate = optimization_data.initial_learning_rate;
1415 }
1416
1417 // Update stuff
1418
1419 optimization_data.old_gradient = back_propagation.gradient;
1420
1421 optimization_data.old_training_direction = optimization_data.training_direction;
1422
1423 optimization_data.old_learning_rate = optimization_data.learning_rate;
1424
1425 // Update parameters
1426
1427 forward_propagation.neural_network_pointer->set_parameters(back_propagation.parameters);
1428}
1429
1430
1432{
1433}
1434
1435
1437{
1438 set(new_conjugate_gradient_pointer);
1439}
1440
1441
1442ConjugateGradientData::~ConjugateGradientData()
1443{
1444
1445}
1446
1447
1448void ConjugateGradientData::set(ConjugateGradient* new_conjugate_gradient_pointer)
1449{
1450 conjugate_gradient_pointer = new_conjugate_gradient_pointer;
1451
1452 LossIndex* loss_index_pointer = conjugate_gradient_pointer->get_loss_index_pointer();
1453
1454 NeuralNetwork* neural_network_pointer = loss_index_pointer->get_neural_network_pointer();
1455
1456 const Index parameters_number = neural_network_pointer->get_parameters_number();
1457
1458 potential_parameters.resize(parameters_number);
1459
1460 parameters_increment.resize(parameters_number);
1461
1462 old_gradient.resize(parameters_number);
1463
1464 training_direction.resize(parameters_number);
1465 old_training_direction.resize(parameters_number);
1466}
1467
1468
1469void ConjugateGradientData::print() const
1470{
1471}
1472
1473}
1474
1475// OpenNN: Open Neural Networks Library.
1476// Copyright(C) 2005-2021 Artificial Intelligence Techniques, SL.
1477//
1478// This library is free software; you can redistribute it and/or
1479// modify it under the terms of the GNU Lesser General Public
1480// License as published by the Free Software Foundation; either
1481// version 2.1 of the License, or any later version.
1482//
1483// This library is distributed in the hope that it will be useful,
1484// but WITHOUT ANY WARRANTY; without even the implied warranty of
1485// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1486// Lesser General Public License for more details.
1487
1488// You should have received a copy of the GNU Lesser General Public
1489// License along with this library; if not, write to the Free Software
1490// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
TrainingResults perform_training()
void set_maximum_selection_failures(const Index &)
void calculate_FR_training_direction(const Tensor< type, 1 > &, const Tensor< type, 1 > &, const Tensor< type, 1 > &, Tensor< type, 1 > &) const
void set_loss_index_pointer(LossIndex *)
const type & get_maximum_time() const
Returns the maximum training time.
const type & get_loss_goal() const
void from_XML(const tinyxml2::XMLDocument &)
const Index & get_maximum_epochs_number() const
Returns the maximum number of epochs for training.
Tensor< string, 2 > to_string_matrix() const
Writes as matrix of strings the most representative atributes.
type minimum_loss_decrease
Minimum loss improvement between two successive iterations. It is used as a stopping criterion.
LearningRateAlgorithm * get_learning_rate_algorithm_pointer()
Returns a pointer to the learning rate algorithm object inside the conjugate gradient method object.
void calculate_conjugate_gradient_training_direction(const Tensor< type, 1 > &, const Tensor< type, 1 > &, const Tensor< type, 1 > &, Tensor< type, 1 > &) const
string write_optimization_algorithm_type() const
Write a string with best algorithm type for the model.
const LearningRateAlgorithm & get_learning_rate_algorithm() const
Returns a constant reference to the learning rate algorithm object inside the conjugate gradient meth...
void set_save_period(const Index &)
void set_maximum_time(const type &)
LearningRateAlgorithm learning_rate_algorithm
Learning rate algorithm object for one-dimensional minimization.
void set_loss_goal(const type &)
type calculate_PR_parameter(const Tensor< type, 1 > &, const Tensor< type, 1 > &) const
void set_training_direction_method(const TrainingDirectionMethod &)
type maximum_time
Maximum training time. It is used as a stopping criterion.
void set_maximum_epochs_number(const Index &)
void set_minimum_loss_decrease(const type &)
TrainingDirectionMethod
Enumeration of the available training operators for obtaining the training direction.
virtual ~ConjugateGradient()
Destructor.
type calculate_FR_parameter(const Tensor< type, 1 > &, const Tensor< type, 1 > &) const
type training_loss_goal
Goal value for the loss. It is used as a stopping criterion.
Index maximum_epochs_number
Maximum number of epochs to perform_training. It is used as a stopping criterion.
void write_XML(tinyxml2::XMLPrinter &) const
const Index & get_maximum_selection_failures() const
Returns the maximum number of selection error increases during the training process.
void calculate_PR_training_direction(const Tensor< type, 1 > &, const Tensor< type, 1 > &, const Tensor< type, 1 > &, Tensor< type, 1 > &) const
string write_training_direction_method() const
Returns a string with the name of the training direction.
TrainingDirectionMethod training_direction_method
Applied method for calculating the conjugate gradient direction.
void update_parameters(const DataSetBatch &batch, NeuralNetworkForwardPropagation &forward_propagation, LossIndexBackPropagation &back_propagation, ConjugateGradientData &optimization_data)
ConjugateGradient::update_parameters.
const TrainingDirectionMethod & get_training_direction_method() const
Returns the conjugate gradient training direction method used for training.
const type & get_minimum_loss_decrease() const
Returns the minimum loss improvement during training.
This class represents the concept of data set for data modelling problems, such as approximation,...
Definition: data_set.h:57
Index get_training_samples_number() const
Returns the number of samples in the data set which will be used for training.
Definition: data_set.cpp:1382
Tensor< Descriptives, 1 > scale_target_variables()
Definition: data_set.cpp:6298
Tensor< Index, 1 > get_training_samples_indices() const
Returns the indices of the samples which will be used for training.
Definition: data_set.cpp:1073
Tensor< Index, 1 > get_selection_samples_indices() const
Returns the indices of the samples which will be used for selection.
Definition: data_set.cpp:1098
void unscale_input_variables(const Tensor< Descriptives, 1 > &)
Definition: data_set.cpp:6351
Tensor< Index, 1 > get_target_variables_indices() const
Returns the indices of the target variables.
Definition: data_set.cpp:3094
Index get_selection_samples_number() const
Returns the number of samples in the data set which will be used for selection.
Definition: data_set.cpp:1402
void unscale_target_variables(const Tensor< Descriptives, 1 > &)
Definition: data_set.cpp:6397
Tensor< string, 1 > get_target_variables_names() const
Definition: data_set.cpp:2215
Tensor< Index, 1 > get_input_variables_indices() const
Returns the indices of the input variables.
Definition: data_set.cpp:3047
Tensor< string, 1 > get_input_variables_names() const
Definition: data_set.cpp:2184
Tensor< Descriptives, 1 > scale_input_variables()
Definition: data_set.cpp:6243
A learning rate that is adjusted according to an algorithm during training to minimize training time.
void from_XML(const tinyxml2::XMLDocument &)
string write_learning_rate_method() const
Returns a string with the name of the learning rate method to be used.
pair< type, type > calculate_directional_point(const DataSetBatch &, NeuralNetworkForwardPropagation &, LossIndexBackPropagation &, OptimizationAlgorithmData &) const
void write_XML(tinyxml2::XMLPrinter &) const
This abstract class represents the concept of loss index composed of an error term and a regularizati...
Definition: loss_index.h:48
NeuralNetwork * get_neural_network_pointer() const
Returns a pointer to the neural network object associated to the error term.
Definition: loss_index.h:70
DataSet * get_data_set_pointer() const
Returns a pointer to the data set object associated to the error term.
Definition: loss_index.h:92
ScalingLayer * get_scaling_layer_pointer() const
Returns a pointer to the scaling layers object composing this neural network object.
bool has_scaling_layer() const
bool has_unscaling_layer() const
void forward_propagate(const DataSetBatch &, NeuralNetworkForwardPropagation &) const
Calculate forward propagation in neural network.
void save(const string &) const
void set_parameters(Tensor< type, 1 > &)
UnscalingLayer * get_unscaling_layer_pointer() const
Returns a pointer to the unscaling layers object composing this neural network object.
Index get_parameters_number() const
string neural_network_file_name
Path where the neural network is saved.
void set_hardware_use(const string &)
Set hardware to use. Default: Multi-core.
void set_neural_network_file_name(const string &)
LossIndex * loss_index_pointer
Pointer to a loss index for a neural network object.
bool display
Display messages to screen.
const string write_time(const type &) const
Writes the time from seconds in format HH:mm:ss.
Index save_period
Number of iterations between the training saving progress.
virtual void set_display(const bool &)
Index display_period
Number of iterations between the training showing progress.
This class represents a layer of scaling neurons.
Definition: scaling_layer.h:38
void set()
Sets the scaling layer to be empty.
This class represents a layer of unscaling neurons.
void set()
Sets the unscaling layer to be empty.
void PushText(const char *text, bool cdata=false)
Add a text node.
Definition: tinyxml2.cpp:2878
virtual void CloseElement(bool compactMode=false)
If streaming, close the Element.
Definition: tinyxml2.cpp:2834
HALF_CONSTEXPR half abs(half arg)
Definition: half.hpp:2735
ConjugateGradientData()
Default constructor.
This structure contains the optimization algorithm results.
Tensor< type, 1 > selection_error_history
History of the selection error over the training iterations.
void resize_training_error_history(const Index &)
Resizes the training error history keeping the values.
OptimizationAlgorithm::StoppingCondition stopping_condition
Stopping condition of the algorithm.
void resize_selection_error_history(const Index &)
Resizes the selection error history keeping the values.
Tensor< type, 1 > training_error_history
History of the loss function loss over the training iterations.
string elapsed_time
Elapsed time of the training process.