long_short_term_memory_layer.cpp
1// OpenNN: Open Neural Networks Library
2// www.opennn.net
3//
4// L O N G S H O R T T E R M M E M O R Y L A Y E R C L A S S
5//
6// Artificial Intelligence Techniques SL
7// artelnics@artelnics.com
8
9// OpeNN Includes
10
11#include "long_short_term_memory_layer.h"
12
13namespace OpenNN
14{
15
19
21{
22 set();
23
24 layer_type = Type::LongShortTermMemory;
25}
26
27
34
35LongShortTermMemoryLayer::LongShortTermMemoryLayer(const Index& new_inputs_number, const Index& new_neurons_number) : Layer()
36{
37 set(new_inputs_number, new_neurons_number);
38
39 layer_type = Type::LongShortTermMemory;
40}
41
42
45
47{
48}
49
50
52
54{
55 return input_weights.dimension(0);
56}
57
58
60
62{
63 return output_biases.size();
64}
65
66
68
70{
71 Index neurons_number = get_neurons_number();
72 Index inputs_number = get_inputs_number();
73
74 return 4 * neurons_number * (1 + inputs_number + neurons_number);
75}
76
77
81
83{
84 return forget_biases;
85}
86
87
91
93{
94 return input_biases;
95}
96
97
101
103{
104 return state_biases;
105}
106
107
111
113{
114 return output_biases;
115}
116
123{
124 return forget_weights;
125}
126
131
133{
134 return input_weights;
135}
136
137
142
144{
145 return state_weights;
146}
147
152
154{
155 return output_weights;
156}
157
158
163
165{
166 return forget_recurrent_weights;
167}
168
169
174
176{
177 return input_recurrent_weights;
178}
179
180
185
187{
188 return state_recurrent_weights;
189}
190
191
196
198{
199 return output_recurrent_weights;
200}
201
202
204
206{
207 return timesteps;
208}
209
210
214
216{
217 const Index parameters_number = get_parameters_number();
218
219 Tensor<type, 1> parameters(parameters_number);
220
221 Index current_position = 0;
222
223 // Biases
224
225 for(Index i = 0; i < forget_biases.size(); i++) fill_n(parameters.data()+current_position+i, 1, forget_biases(i));
226
227 current_position += forget_biases.size();
228
229 for(Index i = 0; i < input_biases.size(); i++) fill_n(parameters.data()+current_position+i, 1, input_biases(i));
230
231 current_position += input_biases.size();
232
233 for(Index i = 0; i < state_biases.size(); i++) fill_n(parameters.data()+current_position+i, 1, state_biases(i));
234
235 current_position += state_biases.size();
236
237 for(Index i = 0; i < output_biases.size(); i++) fill_n(parameters.data()+current_position+i, 1, output_biases(i));
238
239 current_position += output_biases.size();
240
241 // Weights
242
243 for(Index i = 0; i < forget_weights.size(); i++) fill_n(parameters.data()+current_position+i, 1, forget_weights(i));
244
245 current_position += forget_weights.size();
246
247 for(Index i = 0; i < input_weights.size(); i++) fill_n(parameters.data()+current_position+i, 1, input_weights(i));
248
249 current_position += input_weights.size();
250
251 for(Index i = 0; i < state_weights.size(); i++) fill_n(parameters.data()+current_position+i, 1, state_weights(i));
252
253 current_position += state_weights.size();
254
255 for(Index i = 0; i < output_weights.size(); i++) fill_n(parameters.data()+current_position+i, 1, output_weights(i));
256
257 current_position += output_weights.size();
258
259 // Recurrent weights
260
261 for(Index i = 0; i < forget_recurrent_weights.size(); i++) fill_n(parameters.data()+current_position+i, 1, forget_recurrent_weights(i));
262
263 current_position += forget_recurrent_weights.size();
264
265 for(Index i = 0; i < input_recurrent_weights.size(); i++) fill_n(parameters.data()+current_position+i, 1, input_recurrent_weights(i));
266
267 current_position += input_recurrent_weights.size();
268
269 for(Index i = 0; i < state_recurrent_weights.size(); i++) fill_n(parameters.data()+current_position+i, 1, state_recurrent_weights(i));
270
271 current_position += state_recurrent_weights.size();
272
273 for(Index i = 0; i < output_recurrent_weights.size(); i++) fill_n(parameters.data()+current_position+i, 1, output_recurrent_weights(i));
274
275 return parameters;
276}
277
278
280
282{
283 return activation_function;
284}
285
286
288
290{
291 return recurrent_activation_function;
292}
293
294
298
300{
301 switch(activation_function)
302 {
303 case ActivationFunction::Logistic: return "Logistic";
304
305 case ActivationFunction::HyperbolicTangent: return "HyperbolicTangent";
306
307 case ActivationFunction::Threshold: return "Threshold";
308
309 case ActivationFunction::SymmetricThreshold: return "SymmetricThreshold";
310
311 case ActivationFunction::Linear: return "Linear";
312
313 case ActivationFunction::RectifiedLinear: return "RectifiedLinear";
314
315 case ActivationFunction::ScaledExponentialLinear: return "ScaledExponentialLinear";
316
317 case ActivationFunction::SoftPlus: return "SoftPlus";
318
319 case ActivationFunction::SoftSign: return "SoftSign";
320
321 case ActivationFunction::HardSigmoid: return "HardSigmoid";
322
323 case ActivationFunction::ExponentialLinear: return "ExponentialLinear";
324 }
325
326 return string();
327}
328
329
332
334{
335 switch(recurrent_activation_function)
336 {
337 case ActivationFunction::Logistic: return "Logistic";
338
339 case ActivationFunction::HyperbolicTangent: return "HyperbolicTangent";
340
341 case ActivationFunction::Threshold: return "Threshold";
342
343 case ActivationFunction::SymmetricThreshold: return "SymmetricThreshold";
344
345 case ActivationFunction::Linear: return "Linear";
346
347 case ActivationFunction::RectifiedLinear: return "RectifiedLinear";
348
349 case ActivationFunction::ScaledExponentialLinear: return "ScaledExponentialLinear";
350
351 case ActivationFunction::SoftPlus: return "SoftPlus";
352
353 case ActivationFunction::SoftSign: return "SoftSign";
354
355 case ActivationFunction::HardSigmoid: return "HardSigmoid";
356
357 case ActivationFunction::ExponentialLinear: return "ExponentialLinear";
358 }
359
360 return string();
361}
362
363
366
368{
369 return display;
370}
371
372
375
377{
378 set_default();
379}
380
381
386
387void LongShortTermMemoryLayer::set(const Index& new_inputs_number, const Index& new_neurons_number)
388{
389 input_biases.resize(new_neurons_number);
390 forget_biases.resize(new_neurons_number);
391 state_biases.resize(new_neurons_number);
392 output_biases.resize(new_neurons_number);
393
394 input_weights.resize(new_inputs_number, new_neurons_number);
395 forget_weights.resize(new_inputs_number, new_neurons_number);
396 state_weights.resize(new_inputs_number, new_neurons_number);
397 output_weights.resize(new_inputs_number, new_neurons_number);
398
399 input_recurrent_weights.resize(new_neurons_number, new_neurons_number);
400 forget_recurrent_weights.resize(new_neurons_number, new_neurons_number);
401 state_recurrent_weights.resize(new_neurons_number, new_neurons_number);
402 output_recurrent_weights.resize(new_neurons_number, new_neurons_number);
403
404 hidden_states.resize(new_neurons_number); // memory
405 hidden_states.setZero();
406
407 cell_states.resize(new_neurons_number); // carry
408 cell_states.setZero();
409
411
412 set_default();
413}
414
415
418
420{
421 activation_function = other_neuron_layer.activation_function;
422
423 display = other_neuron_layer.display;
424
425 set_default();
426}
427
428
435
437{
438 layer_name = "long_short_term_memory_layer";
439 layer_type = Type::LongShortTermMemory;
440}
441
442
443void LongShortTermMemoryLayer::set_name(const string& new_layer_name)
444{
445 layer_name = new_layer_name;
446}
447
448
452
453void LongShortTermMemoryLayer::set_inputs_number(const Index& new_inputs_number)
454{
455 const Index neurons_number = get_neurons_number();
456
457 set(new_inputs_number, neurons_number);
458}
459
460
464
465void LongShortTermMemoryLayer::set_input_shape(const Tensor<Index, 1>& size)
466{
467 const Index new_size = size[0];
468
469 set_inputs_number(new_size);
470}
471
472
476
477void LongShortTermMemoryLayer::set_neurons_number(const Index& new_neurons_number)
478{
479 const Index inputs_number = get_inputs_number();
480
481 set(inputs_number, new_neurons_number);
482}
483
484
487
488void LongShortTermMemoryLayer::set_forget_biases(const Tensor<type, 1>& new_biases)
489{
490 forget_biases = new_biases;
491}
492
493
496
497void LongShortTermMemoryLayer::set_input_biases(const Tensor<type, 1>& new_biases)
498{
499 input_biases = new_biases;
500}
501
502
505
506void LongShortTermMemoryLayer::set_state_biases(const Tensor<type, 1>& new_biases)
507{
508 state_biases = new_biases;
509}
510
511
514
515void LongShortTermMemoryLayer::set_output_biases(const Tensor<type, 1>& new_biases)
516{
517 output_biases = new_biases;
518}
519
520
526
527void LongShortTermMemoryLayer::set_forget_weights(const Tensor<type, 2>& new_forget_weights)
528{
529 forget_weights = new_forget_weights;
530}
531
532
538
539void LongShortTermMemoryLayer::set_input_weights(const Tensor<type, 2>& new_input_weight)
540{
541 input_weights = new_input_weight;
542}
543
544
550
551void LongShortTermMemoryLayer::set_state_weights(const Tensor<type, 2>& new_state_weights)
552{
553 state_weights = new_state_weights;
554}
555
556
562
563void LongShortTermMemoryLayer::set_output_weights(const Tensor<type, 2>& new_output_weight)
564{
565 output_weights = new_output_weight;
566
567}
568
569
575
576void LongShortTermMemoryLayer::set_forget_recurrent_weights(const Tensor<type, 2>& new_forget_recurrent_weight)
577{
578 forget_recurrent_weights = new_forget_recurrent_weight;
579}
580
581
587
588
589void LongShortTermMemoryLayer::set_input_recurrent_weights(const Tensor<type, 2>& new_input_recurrent_weight)
590{
591 input_recurrent_weights = new_input_recurrent_weight;
592}
593
594
600
601void LongShortTermMemoryLayer::set_state_recurrent_weights(const Tensor<type, 2>& new_state_recurrent_weight)
602{
603 state_recurrent_weights = new_state_recurrent_weight;
604}
605
606
612
613void LongShortTermMemoryLayer::set_output_recurrent_weights(const Tensor<type, 2>& new_output_recurrent_weight)
614{
615 output_recurrent_weights = new_output_recurrent_weight;
616}
617
618
621
622void LongShortTermMemoryLayer::set_parameters(const Tensor<type, 1>& new_parameters, const Index& index)
623{
624#ifdef OPENNN_DEBUG
625check_size(new_parameters, get_parameters_number(), LOG);
626#endif
627
628 const Index neurons_number = get_neurons_number();
629 const Index inputs_number = get_inputs_number();
630
631 Index current_index = index;
632
633 // Biases
634
635 Index size = neurons_number;
636
637 memcpy(forget_biases.data(),
638 new_parameters.data() + current_index,
639 static_cast<size_t>(size)*sizeof(type));
640
641 current_index += size;
642
643 memcpy(input_biases.data(),
644 new_parameters.data() + current_index,
645 static_cast<size_t>(size)*sizeof(type));
646
647 current_index += size;
648
649 memcpy(state_biases.data(),
650 new_parameters.data() + current_index,
651 static_cast<size_t>(size)*sizeof(type));
652
653 current_index += size;
654
655 memcpy(output_biases.data(),
656 new_parameters.data() + current_index,
657 static_cast<size_t>(size)*sizeof(type));
658
659 current_index += size;
660
661 // Weights
662
663 size = inputs_number*neurons_number;
664
665 memcpy(forget_weights.data(),
666 new_parameters.data() + current_index,
667 static_cast<size_t>(size)*sizeof(type));
668
669 current_index += size;
670
671 memcpy(input_weights.data(),
672 new_parameters.data() + current_index,
673 static_cast<size_t>(size)*sizeof(type));
674
675 current_index += size;
676
677 memcpy(state_weights.data(),
678 new_parameters.data() + current_index,
679 static_cast<size_t>(size)*sizeof(type));
680
681 current_index += size;
682
683 memcpy(output_weights.data(),
684 new_parameters.data() + current_index,
685 static_cast<size_t>(size)*sizeof(type));
686
687 current_index += size;
688
689 // Recurrent weights
690
691 size = neurons_number*neurons_number;
692
693 memcpy(forget_recurrent_weights.data(),
694 new_parameters.data() + current_index,
695 static_cast<size_t>(size)*sizeof(type));
696
697 current_index += size;
698
699 memcpy(input_recurrent_weights.data(),
700 new_parameters.data() + current_index,
701 static_cast<size_t>(size)*sizeof(type));
702
703 current_index += size;
704
705 memcpy(state_recurrent_weights.data(),
706 new_parameters.data() + current_index,
707 static_cast<size_t>(size)*sizeof(type));
708
709 current_index += size;
710
711 memcpy(output_recurrent_weights.data(),
712 new_parameters.data() + current_index,
713 static_cast<size_t>(size)*sizeof(type));
714
715 current_index += size;
716}
717
718
721
723{
724 activation_function = new_activation_function;
725}
726
727
731
732void LongShortTermMemoryLayer::set_activation_function(const string& new_activation_function_name)
733{
734 if(new_activation_function_name == "Logistic")
735 {
736 activation_function = ActivationFunction::Logistic;
737 }
738 else if(new_activation_function_name == "HyperbolicTangent")
739 {
740 activation_function = ActivationFunction::HyperbolicTangent;
741 }
742 else if(new_activation_function_name == "Threshold")
743 {
744 activation_function = ActivationFunction::Threshold;
745 }
746 else if(new_activation_function_name == "SymmetricThreshold")
747 {
748 activation_function = ActivationFunction::SymmetricThreshold;
749 }
750 else if(new_activation_function_name == "Linear")
751 {
752 activation_function = ActivationFunction::Linear;
753 }
754 else if(new_activation_function_name == "RectifiedLinear")
755 {
756 activation_function = ActivationFunction::RectifiedLinear;
757 }
758 else if(new_activation_function_name == "ScaledExponentialLinear")
759 {
760 activation_function = ActivationFunction::ScaledExponentialLinear;
761 }
762 else if(new_activation_function_name == "SoftPlus")
763 {
764 activation_function = ActivationFunction::SoftPlus;
765 }
766 else if(new_activation_function_name == "SoftSign")
767 {
768 activation_function = ActivationFunction::SoftSign;
769 }
770 else if(new_activation_function_name == "HardSigmoid")
771 {
772 activation_function = ActivationFunction::HardSigmoid;
773 }
774 else if(new_activation_function_name == "ExponentialLinear")
775 {
776 activation_function = ActivationFunction::ExponentialLinear;
777 }
778 else
779 {
780 ostringstream buffer;
781
782 buffer << "OpenNN Exception: neuron class.\n"
783 << "void set_activation_function(const string&) method.\n"
784 << "Unknown activation function: " << new_activation_function_name << ".\n";
785
786 throw logic_error(buffer.str());
787 }
788}
789
790
793
795{
796 recurrent_activation_function = new_recurrent_activation_function;
797}
798
799
803
804void LongShortTermMemoryLayer::set_recurrent_activation_function(const string& new_recurrent_activation_function_name)
805{
806 if(new_recurrent_activation_function_name == "Logistic")
807 {
808 recurrent_activation_function = ActivationFunction::Logistic;
809 }
810 else if(new_recurrent_activation_function_name == "HyperbolicTangent")
811 {
812 recurrent_activation_function = ActivationFunction::HyperbolicTangent;
813 }
814 else if(new_recurrent_activation_function_name == "Threshold")
815 {
816 recurrent_activation_function = ActivationFunction::Threshold;
817 }
818 else if(new_recurrent_activation_function_name == "SymmetricThreshold")
819 {
820 recurrent_activation_function = ActivationFunction::SymmetricThreshold;
821 }
822 else if(new_recurrent_activation_function_name == "Linear")
823 {
824 recurrent_activation_function = ActivationFunction::Linear;
825 }
826 else if(new_recurrent_activation_function_name == "RectifiedLinear")
827 {
828 recurrent_activation_function = ActivationFunction::RectifiedLinear;
829 }
830 else if(new_recurrent_activation_function_name == "ScaledExponentialLinear")
831 {
832 recurrent_activation_function = ActivationFunction::ScaledExponentialLinear;
833 }
834 else if(new_recurrent_activation_function_name == "SoftPlus")
835 {
836 recurrent_activation_function = ActivationFunction::SoftPlus;
837 }
838 else if(new_recurrent_activation_function_name == "SoftSign")
839 {
840 recurrent_activation_function = ActivationFunction::SoftSign;
841 }
842 else if(new_recurrent_activation_function_name == "HardSigmoid")
843 {
844 recurrent_activation_function = ActivationFunction::HardSigmoid;
845 }
846 else if(new_recurrent_activation_function_name == "ExponentialLinear")
847 {
848 recurrent_activation_function = ActivationFunction::ExponentialLinear;
849 }
850 else
851 {
852 ostringstream buffer;
853
854 buffer << "OpenNN Exception: neuron class.\n"
855 << "void set_recurrent_activation_function(const string&) method.\n"
856 << "Unknown activation function: " << new_recurrent_activation_function_name << ".\n";
857
858 throw logic_error(buffer.str());
859 }
860}
861
862
865
866void LongShortTermMemoryLayer::set_timesteps(const Index& new_timesteps)
867{
868 timesteps = new_timesteps;
869}
870
871
876
877void LongShortTermMemoryLayer::set_display(const bool& new_display)
878{
879 display = new_display;
880}
881
882
885
887{
888 forget_biases.setConstant(value);
889 input_biases.setConstant(value);
890 state_biases.setConstant(value);
891 output_biases.setConstant(value);
892}
893
894
897
899{
900 forget_biases.setConstant(value);
901}
902
903
906
908{
909 input_biases.setConstant(value);
910}
911
912
915
917{
918 state_biases.setConstant(value);
919}
920
921
924
926{
927 output_biases.setConstant(value);
928}
929
930
933
935{
936 forget_weights.setConstant(value);
937 input_weights.setConstant(value);
938 state_weights.setConstant(value);
939 output_weights.setConstant(value);
940}
941
942
945
947{
948 forget_weights.setConstant(value);
949}
950
951
954
956{
957 input_weights.setConstant(value);
958}
959
960
963
965{
966 state_weights.setConstant(value);
967}
968
969
972
974{
975 output_weights.setConstant(value);
976}
977
978
981
983{
984 forget_recurrent_weights.setConstant(value);
985 input_recurrent_weights.setConstant(value);
986 state_recurrent_weights.setConstant(value);
987 output_recurrent_weights.setConstant(value);
988}
989
990
993
995{
996 forget_recurrent_weights.setConstant(value);
997}
998
999
1002
1004{
1005 input_recurrent_weights.setConstant(value);
1006}
1007
1008
1011
1013{
1014 state_recurrent_weights.setConstant(value);
1015}
1016
1017
1020
1022{
1023 output_recurrent_weights.setConstant(value);
1024}
1025
1026
1029
1031{
1032 hidden_states.setConstant(value);
1033}
1034
1035
1038
1040{
1041 cell_states.setConstant(value);
1042}
1043
1044
1047
1049{
1050 forget_biases.setConstant(value);
1051 input_biases.setConstant(value);
1052 state_biases.setConstant(value);
1053 output_biases.setConstant(value);
1054
1055 forget_weights.setConstant(value);
1056 input_weights.setConstant(value);
1057 state_weights.setConstant(value);
1058 output_weights.setConstant(value);
1059
1060 forget_recurrent_weights.setConstant(value);
1061 input_recurrent_weights.setConstant(value);
1062 state_recurrent_weights.setConstant(value);
1063 output_recurrent_weights.setConstant(value);
1064
1065 hidden_states.setZero();
1066
1067 cell_states.setZero();
1068}
1069
1070
1073
1075{
1076 const type minimum = type(-0.2);
1077 const type maximum = type(0.2);
1078
1079 // Biases
1080
1081 for(Index i = 0; i < forget_biases.size(); i++)
1082 {
1083 const type random = static_cast<type>(rand()/(RAND_MAX+1.0));
1084
1085 forget_biases(i) = minimum + (maximum - minimum)*random;
1086 }
1087
1088 for(Index i = 0; i < input_biases.size(); i++)
1089 {
1090 const type random = static_cast<type>(rand()/(RAND_MAX+1.0));
1091
1092 input_biases(i) = minimum + (maximum - minimum)*random;
1093 }
1094
1095 for(Index i = 0; i < state_biases.size(); i++)
1096 {
1097 const type random = static_cast<type>(rand()/(RAND_MAX+1.0));
1098
1099 state_biases(i) = minimum + (maximum - minimum)*random;
1100 }
1101
1102 for(Index i = 0; i < output_biases.size(); i++)
1103 {
1104 const type random = static_cast<type>(rand()/(RAND_MAX+1.0));
1105
1106 output_biases(i) = minimum + (maximum - minimum)*random;
1107 }
1108
1109 // Weights
1110
1111 for(Index i = 0; i < forget_weights.size(); i++)
1112 {
1113 const type random = static_cast<type>(rand()/(RAND_MAX+1.0));
1114
1115 forget_weights(i) = minimum + (maximum - minimum)*random;
1116 }
1117
1118 for(Index i = 0; i < input_weights.size(); i++)
1119 {
1120 const type random = static_cast<type>(rand()/(RAND_MAX+1.0));
1121
1122 input_weights(i) = minimum + (maximum - minimum)*random;
1123 }
1124
1125 for(Index i = 0; i < state_weights.size(); i++)
1126 {
1127 const type random = static_cast<type>(rand()/(RAND_MAX+1.0));
1128
1129 state_weights(i) = minimum + (maximum - minimum)*random;
1130 }
1131
1132 for(Index i = 0; i < output_weights.size(); i++)
1133 {
1134 const type random = static_cast<type>(rand()/(RAND_MAX+1.0));
1135
1136 output_weights(i) = minimum + (maximum - minimum)*random;
1137 }
1138
1139 // Recurrent weights
1140
1141 for(Index i = 0; i < forget_recurrent_weights.size(); i++)
1142 {
1143 const type random = static_cast<type>(rand()/(RAND_MAX+1.0));
1144
1145 forget_recurrent_weights(i) = minimum + (maximum - minimum)*random;
1146 }
1147
1148 for(Index i = 0; i < input_recurrent_weights.size(); i++)
1149 {
1150 const type random = static_cast<type>(rand()/(RAND_MAX+1.0));
1151
1152 input_recurrent_weights(i) = minimum + (maximum - minimum)*random;
1153 }
1154
1155 for(Index i = 0; i < state_recurrent_weights.size(); i++)
1156 {
1157 const type random = static_cast<type>(rand()/(RAND_MAX+1.0));
1158
1159 state_recurrent_weights(i) = minimum + (maximum - minimum)*random;
1160 }
1161
1162 for(Index i = 0; i < output_recurrent_weights.size(); i++)
1163 {
1164 const type random = static_cast<type>(rand()/(RAND_MAX+1.0));
1165
1166 output_recurrent_weights(i) = minimum + (maximum - minimum)*random;
1167 }
1168}
1169
1170
1171void LongShortTermMemoryLayer::calculate_combinations(const Tensor<type, 1>& inputs,
1172 const Tensor<type, 2>& weights,
1173 const Tensor<type, 2>& recurrent_weights,
1174 const Tensor<type, 1>& biases,
1175 Tensor<type, 1>& combinations) const
1176{
1177#ifdef OPENNN_DEBUG
1178check_size(inputs, get_inputs_number(), LOG);
1179#endif
1180
1181 combinations.device(*thread_pool_device) = inputs.contract(weights, AT_B);
1182
1183 combinations.device(*thread_pool_device) += biases;
1184
1185 combinations.device(*thread_pool_device) += hidden_states.contract(recurrent_weights, AT_B);
1186}
1187
1188
1189void LongShortTermMemoryLayer::calculate_activations(const Tensor<type, 2>& combinations, Tensor<type, 2>& activations) const
1190{
1191#ifdef OPENNN_DEBUG
1192check_columns_number(combinations, get_neurons_number(), LOG);
1193check_dimensions(activations, combinations.dimension(0), get_neurons_number(), LOG);
1194#endif
1195
1196 switch(activation_function)
1197 {
1198 case ActivationFunction::Linear: linear(combinations, activations); return;
1199
1200 case ActivationFunction::Logistic: logistic(combinations, activations); return;
1201
1202 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent(combinations, activations); return;
1203
1204 case ActivationFunction::Threshold: threshold(combinations, activations); return;
1205
1206 case ActivationFunction::SymmetricThreshold: symmetric_threshold(combinations, activations); return;
1207
1208 case ActivationFunction::RectifiedLinear: rectified_linear(combinations, activations); return;
1209
1210 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear(combinations, activations); return;
1211
1212 case ActivationFunction::SoftPlus: soft_plus(combinations, activations); return;
1213
1214 case ActivationFunction::SoftSign: soft_sign(combinations, activations); return;
1215
1216 case ActivationFunction::HardSigmoid: hard_sigmoid(combinations, activations); return;
1217
1218 case ActivationFunction::ExponentialLinear: exponential_linear(combinations, activations); return;
1219 }
1220}
1221
1222
1223void LongShortTermMemoryLayer::calculate_activations(const Tensor<type, 1>& combinations_1d, Tensor<type, 1>& activations_1d) const
1224{
1225#ifdef OPENNN_DEBUG
1226
1227 const Index neurons_number = get_neurons_number();
1228
1229 const Index combinations_columns_number = combinations_1d.size();
1230
1231 if(combinations_columns_number != neurons_number)
1232 {
1233 ostringstream buffer;
1234
1235 buffer << "OpenNN Exception: LongShortTermMemoryLayer class.\n"
1236 << "void calculate_activations(const Tensor<type, 1>&) const method.\n"
1237 << "Size of combinations must be equal to number of neurons.\n";
1238
1239 throw logic_error(buffer.str());
1240 }
1241
1242#endif
1243
1244 switch(activation_function)
1245 {
1246 case ActivationFunction::Linear: linear(combinations_1d, activations_1d); return;
1247
1248 case ActivationFunction::Logistic: logistic(combinations_1d, activations_1d); return;
1249
1250 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent(combinations_1d, activations_1d); return;
1251
1252 case ActivationFunction::Threshold: threshold(combinations_1d, activations_1d); return;
1253
1254 case ActivationFunction::SymmetricThreshold: symmetric_threshold(combinations_1d, activations_1d); return;
1255
1256 case ActivationFunction::RectifiedLinear: rectified_linear(combinations_1d, activations_1d); return;
1257
1258 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear(combinations_1d, activations_1d); return;
1259
1260 case ActivationFunction::SoftPlus: soft_plus(combinations_1d, activations_1d); return;
1261
1262 case ActivationFunction::SoftSign: soft_sign(combinations_1d, activations_1d); return;
1263
1264 case ActivationFunction::HardSigmoid: hard_sigmoid(combinations_1d, activations_1d); return;
1265
1266 case ActivationFunction::ExponentialLinear: exponential_linear(combinations_1d, activations_1d); return;
1267 }
1268}
1269
1270
1271Tensor<type, 1> LongShortTermMemoryLayer::calculate_activations(const Tensor<type, 1>& combinations_1d) const
1272{
1273#ifdef OPENNN_DEBUG
1274
1275 const Index neurons_number = get_neurons_number();
1276
1277 const Index combinations_columns_number = combinations_1d.size();
1278
1279 if(combinations_columns_number != neurons_number)
1280 {
1281 ostringstream buffer;
1282
1283 buffer << "OpenNN Exception: LongShortTermMemoryLayer class.\n"
1284 << "Tensor<type, 2> calculate_activations(const Tensor<type, 1>&) const method.\n"
1285 << "Size of combinations must be equal to number of neurons.\n";
1286
1287 throw logic_error(buffer.str());
1288 }
1289
1290#endif
1291
1292 Tensor<type, 1> activations_1d(combinations_1d.size());
1293
1294 switch(activation_function)
1295 {
1296 case ActivationFunction::Linear: linear(combinations_1d, activations_1d); break;
1297
1298 case ActivationFunction::Logistic: logistic(combinations_1d, activations_1d); break;
1299
1300 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent(combinations_1d, activations_1d); break;
1301
1302 case ActivationFunction::Threshold: threshold(combinations_1d, activations_1d); break;
1303
1304 case ActivationFunction::SymmetricThreshold: symmetric_threshold(combinations_1d, activations_1d); break;
1305
1306 case ActivationFunction::RectifiedLinear: rectified_linear(combinations_1d, activations_1d); break;
1307
1308 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear(combinations_1d, activations_1d); break;
1309
1310 case ActivationFunction::SoftPlus: soft_plus(combinations_1d, activations_1d); break;
1311
1312 case ActivationFunction::SoftSign: soft_sign(combinations_1d, activations_1d); break;
1313
1314 case ActivationFunction::HardSigmoid: hard_sigmoid(combinations_1d, activations_1d); break;
1315
1316 case ActivationFunction::ExponentialLinear: exponential_linear(combinations_1d, activations_1d); break;
1317 }
1318
1319 return activations_1d;
1320}
1321
1322
1323void LongShortTermMemoryLayer::calculate_recurrent_activations(const Tensor<type, 2>& combinations,
1324 Tensor<type, 2>& activations) const
1325{
1326#ifdef OPENNN_DEBUG
1327
1328 const Index neurons_number = get_neurons_number();
1329
1330 const Index combinations_columns_number = combinations.dimension(2);
1331
1332 if(combinations_columns_number != neurons_number)
1333 {
1334 ostringstream buffer;
1335
1336 buffer << "OpenNN Exception: LongShortTermMemoryLayer class.\n"
1337 << "void calculate_recurrent_activations(const Tensor<type, 2>&) const method.\n"
1338 << "Number of columns("<< combinations_columns_number <<") of combinations must be equal to number of neurons("<<neurons_number<<").\n";
1339
1340 throw logic_error(buffer.str());
1341 }
1342
1343#endif
1344
1345 switch(recurrent_activation_function)
1346 {
1347 case ActivationFunction::Linear: linear(combinations, activations); break;
1348
1349 case ActivationFunction::Logistic: logistic(combinations, activations); break;
1350
1351 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent(combinations, activations); break;
1352
1353 case ActivationFunction::Threshold: threshold(combinations, activations); break;
1354
1355 case ActivationFunction::SymmetricThreshold: symmetric_threshold(combinations, activations); break;
1356
1357 case ActivationFunction::RectifiedLinear: rectified_linear(combinations, activations); break;
1358
1359 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear(combinations, activations); break;
1360
1361 case ActivationFunction::SoftPlus: soft_plus(combinations, activations); break;
1362
1363 case ActivationFunction::SoftSign: soft_sign(combinations, activations); break;
1364
1365 case ActivationFunction::HardSigmoid: hard_sigmoid(combinations, activations); break;
1366
1367 case ActivationFunction::ExponentialLinear: exponential_linear(combinations, activations); break;
1368 }
1369}
1370
1371
1372void LongShortTermMemoryLayer::calculate_recurrent_activations(const Tensor<type, 1>& combinations_1d,
1373 Tensor<type, 1>& recurrent_activations_1d) const
1374{
1375
1376#ifdef OPENNN_DEBUG
1377
1378 const Index neurons_number = get_neurons_number();
1379
1380 const Index combinations_columns_number = combinations_1d.size();
1381
1382 if(combinations_columns_number != neurons_number)
1383 {
1384 ostringstream buffer;
1385
1386 buffer << "OpenNN Exception: LongShortTermMemoryLayer class.\n"
1387 << "void calculate_activations(const Tensor<type, 2>&) const method.\n"
1388 << "Size of combinations must be equal to number of neurons.\n";
1389
1390 throw logic_error(buffer.str());
1391 }
1392
1393#endif
1394
1395 switch(recurrent_activation_function)
1396 {
1397 case ActivationFunction::Linear: linear(combinations_1d, recurrent_activations_1d); break;
1398
1399 case ActivationFunction::Logistic: logistic(combinations_1d, recurrent_activations_1d); break;
1400
1401 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent(combinations_1d, recurrent_activations_1d); break;
1402
1403 case ActivationFunction::Threshold: threshold(combinations_1d, recurrent_activations_1d); break;
1404
1405 case ActivationFunction::SymmetricThreshold: symmetric_threshold(combinations_1d, recurrent_activations_1d); break;
1406
1407 case ActivationFunction::RectifiedLinear: rectified_linear(combinations_1d, recurrent_activations_1d); break;
1408
1409 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear(combinations_1d, recurrent_activations_1d); break;
1410
1411 case ActivationFunction::SoftPlus: soft_plus(combinations_1d, recurrent_activations_1d); break;
1412
1413 case ActivationFunction::SoftSign: soft_sign(combinations_1d, recurrent_activations_1d); break;
1414
1415 case ActivationFunction::HardSigmoid: hard_sigmoid(combinations_1d, recurrent_activations_1d); break;
1416
1417 case ActivationFunction::ExponentialLinear: exponential_linear(combinations_1d, recurrent_activations_1d); break;
1418 }
1419}
1420
1421
1422void LongShortTermMemoryLayer::calculate_activations_derivatives(const Tensor<type, 2>& combinations,
1423 Tensor<type, 2>& activations,
1424 Tensor<type, 2>& activations_derivatives_2d) const
1425{
1426#ifdef OPENNN_DEBUG
1427
1428 const Index neurons_number = get_neurons_number();
1429
1430 const Index combinations_columns_number = combinations.dimension(1);
1431
1432 if(combinations_columns_number != neurons_number)
1433 {
1434 ostringstream buffer;
1435
1436 buffer << "OpenNN Exception: LongShortTermMemoryLayer class.\n"
1437 << "void calculate_activations_derivatives(const Tensor<type, 2>&) const method.\n"
1438 << "Number of columns("<< combinations_columns_number <<") of combinations must be equal to number of neurons("<<neurons_number<<").\n";
1439
1440 throw logic_error(buffer.str());
1441 }
1442
1443#endif
1444
1445 switch(activation_function)
1446 {
1447 case ActivationFunction::Linear: linear_derivatives(combinations, activations, activations_derivatives_2d); return;
1448
1449 case ActivationFunction::Logistic: logistic_derivatives(combinations, activations, activations_derivatives_2d); return;
1450
1451 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent_derivatives(combinations, activations, activations_derivatives_2d); return;
1452
1453 case ActivationFunction::Threshold: threshold_derivatives(combinations, activations, activations_derivatives_2d); return;
1454
1455 case ActivationFunction::SymmetricThreshold: symmetric_threshold_derivatives(combinations, activations, activations_derivatives_2d); return;
1456
1457 case ActivationFunction::RectifiedLinear: rectified_linear_derivatives(combinations, activations, activations_derivatives_2d); return;
1458
1459 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear_derivatives(combinations, activations, activations_derivatives_2d); return;
1460
1461 case ActivationFunction::SoftPlus: soft_plus_derivatives(combinations, activations, activations_derivatives_2d); return;
1462
1463 case ActivationFunction::SoftSign: soft_sign_derivatives(combinations, activations, activations_derivatives_2d); return;
1464
1465 case ActivationFunction::HardSigmoid: hard_sigmoid_derivatives(combinations, activations, activations_derivatives_2d); return;
1466
1467 case ActivationFunction::ExponentialLinear: exponential_linear_derivatives(combinations, activations, activations_derivatives_2d); return;
1468 }
1469}
1470
1471
1472void LongShortTermMemoryLayer::calculate_activations_derivatives(const Tensor<type, 1>& combinations_1d,
1473 Tensor<type, 1>& activations_1d,
1474 Tensor<type, 1>& activations_derivatives_1d) const
1475{
1476
1477#ifdef OPENNN_DEBUG
1478
1479 const Index neurons_number = get_neurons_number();
1480
1481 const Index combinations_columns_number = combinations_1d.size();
1482
1483 if(combinations_columns_number != neurons_number)
1484 {
1485 ostringstream buffer;
1486
1487 buffer << "OpenNN Exception: LongShortTermMemoryLayer class.\n"
1488 << "void calculate_activations_derivatives(const Tensor<type, 2>&) const method.\n"
1489 << "Size of combinations must be equal to number of neurons.\n";
1490
1491 throw logic_error(buffer.str());
1492 }
1493
1494#endif
1495
1496 switch(activation_function)
1497 {
1498 case ActivationFunction::Linear: linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1499
1500 case ActivationFunction::Logistic: logistic_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1501
1502 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1503
1504 case ActivationFunction::Threshold: threshold_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1505
1506 case ActivationFunction::SymmetricThreshold: symmetric_threshold_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1507
1508 case ActivationFunction::RectifiedLinear: rectified_linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1509
1510 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1511
1512 case ActivationFunction::SoftPlus: soft_plus_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1513
1514 case ActivationFunction::SoftSign: soft_sign_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1515
1516 case ActivationFunction::HardSigmoid: hard_sigmoid_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1517
1518 case ActivationFunction::ExponentialLinear: exponential_linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1519 }
1520}
1521
1522
1523void LongShortTermMemoryLayer::calculate_recurrent_activations_derivatives(const Tensor<type, 1>& combinations_1d,
1524 Tensor<type, 1>& activations_1d,
1525 Tensor<type, 1>& activations_derivatives_1d) const
1526{
1527#ifdef OPENNN_DEBUG
1528
1529 const Index neurons_number = get_neurons_number();
1530
1531 const Index combinations_columns_number = combinations_1d.size();
1532
1533 if(combinations_columns_number != neurons_number)
1534 {
1535 ostringstream buffer;
1536
1537 buffer << "OpenNN Exception: LongShortTermMemoryLayer class.\n"
1538 << "void calculate_recurrent_activations_derivatives(const Tensor<type, 2>&) const method.\n"
1539 << "Size of combinations must be equal to number of neurons.\n";
1540
1541 throw logic_error(buffer.str());
1542 }
1543
1544#endif
1545
1546 switch(recurrent_activation_function)
1547 {
1548 case ActivationFunction::Linear: linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1549
1550 case ActivationFunction::Logistic: logistic_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1551
1552 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1553
1554 case ActivationFunction::Threshold: threshold_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1555
1556 case ActivationFunction::SymmetricThreshold: symmetric_threshold_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1557
1558 case ActivationFunction::RectifiedLinear: rectified_linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1559
1560 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1561
1562 case ActivationFunction::SoftPlus: soft_plus_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1563
1564 case ActivationFunction::SoftSign: soft_sign_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1565
1566 case ActivationFunction::HardSigmoid: hard_sigmoid_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1567
1568 case ActivationFunction::ExponentialLinear: exponential_linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d); return;
1569 }
1570}
1571
1572
1573Tensor<type, 2> LongShortTermMemoryLayer::calculate_outputs(const Tensor<type, 2>& inputs)
1574{
1575#ifdef OPENNN_DEBUG
1576
1577 const Index inputs_number = get_inputs_number();
1578
1579 const Index inputs_columns_number = inputs.dimension(1);
1580
1581 if(inputs_columns_number != inputs_number)
1582 {
1583 ostringstream buffer;
1584
1585 buffer << "OpenNN Exception: LongShortTermMemoryLayer class.\n"
1586 << "Tensor<type, 2> calculate_outputs(const Tensor<type, 2>&) const method.\n"
1587 << "Number of columns ("<<inputs_columns_number<<") of inputs matrix must be equal to number of inputs ("<<inputs_number<<").\n";
1588
1589 throw logic_error(buffer.str());
1590 }
1591#endif
1592
1593 const Index samples_number = inputs.dimension(0);
1594
1595 const Index neurons_number = get_neurons_number();
1596
1597 Tensor<type, 2> outputs(samples_number, neurons_number);
1598
1599 Tensor<type, 1> forget_combinations(neurons_number);
1600 Tensor<type, 1> forget_activations(neurons_number);
1601
1602 Tensor<type, 1> input_combinations(neurons_number);
1603 Tensor<type, 1> input_activations(neurons_number);
1604
1605 Tensor<type, 1> state_combinations(neurons_number);
1606 Tensor<type, 1> state_activations(neurons_number);
1607
1608 Tensor<type, 1> output_combinations(neurons_number);
1609 Tensor<type, 1> output_activations(neurons_number);
1610
1611 for(Index i = 0; i < samples_number; i++)
1612 {
1613 if(i%timesteps == 0)
1614 {
1615 hidden_states.setZero();
1616 cell_states.setZero();
1617 }
1618
1619 const Tensor<type, 1> current_inputs = inputs.chip(i, 0);
1620
1621#pragma omp parallel
1622 {
1623 calculate_combinations(current_inputs, forget_weights, forget_recurrent_weights, forget_biases, forget_combinations);
1624 calculate_recurrent_activations(forget_combinations, forget_activations);
1625
1626 calculate_combinations(current_inputs, input_weights, input_recurrent_weights, input_biases, input_combinations);
1627 calculate_recurrent_activations(input_combinations, input_activations);
1628
1629 calculate_combinations(current_inputs, state_weights, state_recurrent_weights, state_biases, state_combinations);
1630 calculate_activations(state_combinations, state_activations);
1631
1632 calculate_combinations(current_inputs, output_weights, output_recurrent_weights, output_biases, output_combinations);
1633 calculate_recurrent_activations(output_combinations, output_activations);
1634 }
1635
1636 cell_states = forget_activations * cell_states + input_activations * state_activations;
1637 calculate_activations(cell_states, hidden_states);
1638 hidden_states *= output_activations;
1639
1640 for(Index j = 0; j < neurons_number; j++)
1641 outputs(i,j) = hidden_states(j);
1642 }
1643
1644 return outputs;
1645}
1646
1647
1648void LongShortTermMemoryLayer::calculate_hidden_delta(LayerForwardPropagation* next_forward_propagation,
1649 LayerBackPropagation* next_back_propagation,
1650 LayerBackPropagation* back_propagation) const
1651{
1652 LongShortTermMemoryLayerBackPropagation* long_short_term_memory_layer_back_propagation =
1653 static_cast<LongShortTermMemoryLayerBackPropagation*>(back_propagation);
1654
1655 switch(next_back_propagation->layer_pointer->get_type())
1656 {
1657 case Type::Perceptron:
1658 {
1659 PerceptronLayerForwardPropagation* next_perceptron_layer_forward_propagation =
1660 static_cast<PerceptronLayerForwardPropagation*>(next_forward_propagation);
1661
1662 PerceptronLayerBackPropagation* next_perceptron_layer_back_propagation =
1663 static_cast<PerceptronLayerBackPropagation*>(next_back_propagation);
1664
1665 calculate_hidden_delta_perceptron(next_perceptron_layer_forward_propagation,
1666 next_perceptron_layer_back_propagation,
1667 long_short_term_memory_layer_back_propagation);
1668 }
1669 break;
1670
1671 case Type::Probabilistic:
1672 {
1673 ProbabilisticLayerForwardPropagation* next_probabilistic_layer_forward_propagation =
1674 static_cast<ProbabilisticLayerForwardPropagation*>(next_forward_propagation);
1675
1676 ProbabilisticLayerBackPropagation* next_probabilistic_layer_back_propagation =
1677 static_cast<ProbabilisticLayerBackPropagation*>(next_back_propagation);
1678
1679 calculate_hidden_delta_probabilistic(next_probabilistic_layer_forward_propagation,
1680 next_probabilistic_layer_back_propagation,
1681 long_short_term_memory_layer_back_propagation);
1682 }
1683 break;
1684
1685 default: return;
1686 }
1687}
1688
1689
1690void LongShortTermMemoryLayer::calculate_hidden_delta_perceptron(PerceptronLayerForwardPropagation* next_forward_propagation,
1691 PerceptronLayerBackPropagation* next_back_propagation,
1692 LongShortTermMemoryLayerBackPropagation* back_propagation) const
1693{
1694 const Tensor<type, 2>& next_synaptic_weights = static_cast<PerceptronLayer*>(next_back_propagation->layer_pointer)->get_synaptic_weights();
1695
1696 back_propagation->delta.device(*thread_pool_device) =
1697 (next_back_propagation->delta*next_forward_propagation->activations_derivatives).contract(next_synaptic_weights, A_BT);
1698}
1699
1700
1701void LongShortTermMemoryLayer::calculate_hidden_delta_probabilistic(ProbabilisticLayerForwardPropagation* next_forward_propagation,
1702 ProbabilisticLayerBackPropagation* next_back_propagation,
1703 LongShortTermMemoryLayerBackPropagation* back_propagation) const
1704{
1705 const ProbabilisticLayer* probabilistic_layer_pointer = static_cast<ProbabilisticLayer*>(next_back_propagation->layer_pointer);
1706
1707 const Tensor<type, 2>& next_synaptic_weights = probabilistic_layer_pointer->get_synaptic_weights();
1708
1709 if(probabilistic_layer_pointer->get_neurons_number() == 1) // Binary
1710 {
1711 back_propagation->delta.device(*thread_pool_device) =
1712 (next_back_propagation->delta*next_forward_propagation->activations_derivatives).contract(next_synaptic_weights, A_BT);
1713 }
1714 else // Multiple
1715 {
1716 const Index samples_number = next_back_propagation->delta.dimension(0);
1717 const Index outputs_number = next_back_propagation->delta.dimension(1);
1718 const Index next_layer_neurons_number = probabilistic_layer_pointer->get_neurons_number();
1719
1720 if(outputs_number != next_layer_neurons_number)
1721 {
1722 ostringstream buffer;
1723
1724 buffer << "OpenNN Exception: ProbabilisticLayer class.\n"
1725 << "void calculate_hidden_delta_probabilistic(ProbabilisticLayerForwardPropagation*,ProbabilisticLayerBackPropagation*,PerceptronLayerBackPropagation*) const.\n"
1726 << "Number of columns in delta (" << outputs_number << ") must be equal to number of neurons in probabilistic layer (" << next_layer_neurons_number << ").\n";
1727
1728 throw logic_error(buffer.str());
1729 }
1730
1731 if(next_forward_propagation->activations_derivatives.dimension(1) != next_layer_neurons_number)
1732 {
1733 ostringstream buffer;
1734
1735 buffer << "OpenNN Exception: ProbabilisticLayer class.\n"
1736 << "void calculate_hidden_delta_probabilistic(ProbabilisticLayerForwardPropagation*,ProbabilisticLayerBackPropagation*,PerceptronLayerBackPropagation*) const.\n"
1737 << "Dimension 1 of activations derivatives (" << outputs_number << ") must be equal to number of neurons in probabilistic layer (" << next_layer_neurons_number << ").\n";
1738
1739 throw logic_error(buffer.str());
1740 }
1741
1742 if(next_forward_propagation->activations_derivatives.dimension(2) != next_layer_neurons_number)
1743 {
1744 ostringstream buffer;
1745
1746 buffer << "OpenNN Exception: ProbabilisticLayer class.\n"
1747 << "void calculate_hidden_delta_probabilistic(ProbabilisticLayerForwardPropagation*,ProbabilisticLayerBackPropagation*,PerceptronLayerBackPropagation*) const.\n"
1748 << "Dimension 2 of activations derivatives (" << outputs_number << ") must be equal to number of neurons in probabilistic layer (" << next_layer_neurons_number << ").\n";
1749
1750 throw logic_error(buffer.str());
1751 }
1752
1753 const Index step = next_layer_neurons_number*next_layer_neurons_number;
1754
1755 next_back_propagation->biases_derivatives.setZero();
1756
1757 for(Index i = 0; i < samples_number; i++)
1758 {
1759 next_back_propagation->delta_row = next_back_propagation->delta.chip(i,0);
1760
1761 TensorMap< Tensor<type, 2> > activations_derivatives_matrix(next_forward_propagation->activations_derivatives.data() + i*step,
1762 next_layer_neurons_number, next_layer_neurons_number);
1763
1764 next_back_propagation->error_combinations_derivatives.chip(i,0) =
1765 next_back_propagation->delta_row.contract(activations_derivatives_matrix, AT_B);
1766 }
1767
1768 back_propagation->delta.device(*thread_pool_device) =
1769 (next_back_propagation->error_combinations_derivatives).contract(next_synaptic_weights, A_BT);
1770 }
1771}
1772
1773
1774void LongShortTermMemoryLayer::forward_propagate(const Tensor<type, 2>&inputs, LayerForwardPropagation* forward_propagation)
1775{
1776 LongShortTermMemoryLayerForwardPropagation* long_short_term_memory_layer_forward_propagation
1777 = static_cast<LongShortTermMemoryLayerForwardPropagation*>(forward_propagation);
1778
1779 const Index samples_number = inputs.dimension(0);
1780 const Index neurons_number = get_neurons_number();
1781
1782 Index copy_index = 0;
1783
1784 for(Index i = 0; i < samples_number; i++)
1785 {
1786 if(i%timesteps == 0)
1787 {
1788 hidden_states.setZero();
1789 cell_states.setZero();
1790 }
1791
1792 long_short_term_memory_layer_forward_propagation->current_inputs = inputs.chip(i,0);
1793
1794 calculate_combinations(long_short_term_memory_layer_forward_propagation->current_inputs,
1795 forget_weights,
1796 forget_recurrent_weights,
1797 forget_biases,
1798 long_short_term_memory_layer_forward_propagation->current_forget_combinations);
1799
1800 calculate_recurrent_activations_derivatives(long_short_term_memory_layer_forward_propagation->current_forget_combinations,
1801 long_short_term_memory_layer_forward_propagation->current_forget_activations,
1802 long_short_term_memory_layer_forward_propagation->current_forget_activations_derivatives);
1803
1804 calculate_combinations(long_short_term_memory_layer_forward_propagation->current_inputs,
1805 input_weights,
1806 input_recurrent_weights,
1807 input_biases,
1808 long_short_term_memory_layer_forward_propagation->current_input_combinations);
1809
1810 calculate_recurrent_activations_derivatives(long_short_term_memory_layer_forward_propagation->current_input_combinations,
1811 long_short_term_memory_layer_forward_propagation->current_input_activations,
1812 long_short_term_memory_layer_forward_propagation->current_input_activations_derivatives);
1813
1814 calculate_combinations(long_short_term_memory_layer_forward_propagation->current_inputs,
1815 state_weights,
1816 state_recurrent_weights,
1817 state_biases,
1818 long_short_term_memory_layer_forward_propagation->current_state_combinations);
1819
1820 calculate_recurrent_activations_derivatives(long_short_term_memory_layer_forward_propagation->current_state_combinations,
1821 long_short_term_memory_layer_forward_propagation->current_state_activations,
1822 long_short_term_memory_layer_forward_propagation->current_state_activations_derivatives);
1823
1824 calculate_combinations(long_short_term_memory_layer_forward_propagation->current_inputs,
1825 output_weights,
1826 output_recurrent_weights,
1827 output_biases,
1828 long_short_term_memory_layer_forward_propagation->current_output_combinations);
1829
1830 calculate_recurrent_activations_derivatives(long_short_term_memory_layer_forward_propagation->current_output_combinations,
1831 long_short_term_memory_layer_forward_propagation->current_output_activations,
1832 long_short_term_memory_layer_forward_propagation->current_output_activations_derivatives);
1833
1834 cell_states = long_short_term_memory_layer_forward_propagation->current_forget_activations * cell_states +
1835 long_short_term_memory_layer_forward_propagation->current_input_activations * long_short_term_memory_layer_forward_propagation->current_state_activations;
1836
1837 calculate_activations_derivatives(cell_states, hidden_states, long_short_term_memory_layer_forward_propagation->current_hidden_states_derivatives);
1838
1839 hidden_states *= long_short_term_memory_layer_forward_propagation->current_output_activations;
1840
1841 // Activations 2d
1842
1843 for(Index j = 0; j < neurons_number; j++) long_short_term_memory_layer_forward_propagation->activations(i,j) = hidden_states(j);
1844
1845 // Forget (activations and activations derivatives)
1846
1847 memcpy(long_short_term_memory_layer_forward_propagation->forget_activations.data() + copy_index,
1848 long_short_term_memory_layer_forward_propagation->current_forget_activations.data(),
1849 static_cast<size_t>(neurons_number)*sizeof(type));
1850
1851 memcpy(long_short_term_memory_layer_forward_propagation->forget_activations_derivatives.data() + copy_index,
1852 long_short_term_memory_layer_forward_propagation->current_forget_activations_derivatives.data(),
1853 static_cast<size_t>(neurons_number)*sizeof(type));
1854
1855 // Input (activations and activations derivatives)
1856
1857 memcpy(long_short_term_memory_layer_forward_propagation->input_activations.data() + copy_index,
1858 long_short_term_memory_layer_forward_propagation->current_input_activations.data(),
1859 static_cast<size_t>(neurons_number)*sizeof(type));
1860
1861 memcpy(long_short_term_memory_layer_forward_propagation->input_activations_derivatives.data() + copy_index,
1862 long_short_term_memory_layer_forward_propagation->current_input_activations_derivatives.data(),
1863 static_cast<size_t>(neurons_number)*sizeof(type));
1864
1865 // State (activations and activations derivatives)
1866
1867 memcpy(long_short_term_memory_layer_forward_propagation->state_activations.data() + copy_index,
1868 long_short_term_memory_layer_forward_propagation->current_state_activations.data(),
1869 static_cast<size_t>(neurons_number)*sizeof(type));
1870
1871 memcpy(long_short_term_memory_layer_forward_propagation->state_activations_derivatives.data() + copy_index,
1872 long_short_term_memory_layer_forward_propagation->current_state_activations_derivatives.data(),
1873 static_cast<size_t>(neurons_number)*sizeof(type));
1874
1875 // Output (activations and activations derivatives)
1876
1877 memcpy(long_short_term_memory_layer_forward_propagation->output_activations.data() + copy_index,
1878 long_short_term_memory_layer_forward_propagation->current_output_activations.data(),
1879 static_cast<size_t>(neurons_number)*sizeof(type));
1880
1881 memcpy(long_short_term_memory_layer_forward_propagation->output_activations_derivatives.data() + copy_index,
1882 long_short_term_memory_layer_forward_propagation->current_output_activations_derivatives.data(),
1883 static_cast<size_t>(neurons_number)*sizeof(type));
1884
1885 // Cell states (activations)
1886
1887 memcpy(long_short_term_memory_layer_forward_propagation->cell_states_activations.data() + copy_index,
1888 cell_states.data(),
1889 static_cast<size_t>(neurons_number)*sizeof(type));
1890
1891 // Hidden states (activations and activations derivatives)
1892
1893 memcpy(long_short_term_memory_layer_forward_propagation->hidden_states_activations.data() + copy_index,
1894 hidden_states.data(),
1895 static_cast<size_t>(neurons_number)*sizeof(type));
1896
1897 memcpy(long_short_term_memory_layer_forward_propagation->hidden_states_activations_derivatives.data() + copy_index,
1898 long_short_term_memory_layer_forward_propagation->current_hidden_states_derivatives.data(),
1899 static_cast<size_t>(neurons_number)*sizeof(type));
1900
1901 copy_index += neurons_number;
1902 }
1903}
1904
1905
1906void LongShortTermMemoryLayer::forward_propagate(const Tensor<type, 2>& inputs,
1907 Tensor<type, 1> parameters,
1908 LayerForwardPropagation* forward_propagation)
1909{
1910 LongShortTermMemoryLayerForwardPropagation* long_short_term_memory_layer_forward_propagation
1911 = static_cast<LongShortTermMemoryLayerForwardPropagation*>(forward_propagation);
1912
1913 const Index inputs_number = get_inputs_number();
1914 const Index neurons_number = get_neurons_number();
1915
1916 const TensorMap<Tensor<type, 1>> forget_biases(parameters.data(), neurons_number);
1917 const TensorMap<Tensor<type, 1>> input_biases(parameters.data()+neurons_number, neurons_number);
1918 const TensorMap<Tensor<type, 1>> state_biases(parameters.data()+2*neurons_number, neurons_number);
1919 const TensorMap<Tensor<type, 1>> output_biases(parameters.data()+3*neurons_number, neurons_number);
1920
1921 const TensorMap<Tensor<type, 2>> forget_weights(parameters.data()+4*neurons_number, inputs_number, neurons_number);
1922 const TensorMap<Tensor<type, 2>> input_weights(parameters.data()+4*neurons_number+inputs_number*neurons_number, inputs_number, neurons_number);
1923 const TensorMap<Tensor<type, 2>> state_weights(parameters.data()+4*neurons_number+2*inputs_number*neurons_number, inputs_number, neurons_number);
1924 const TensorMap<Tensor<type, 2>> output_weights(parameters.data()+4*neurons_number+3*inputs_number*neurons_number, inputs_number, neurons_number);
1925
1926 const TensorMap<Tensor<type, 2>> forget_recurrent_weights(parameters.data()+4*neurons_number+4*inputs_number*neurons_number, neurons_number, neurons_number);
1927 const TensorMap<Tensor<type, 2>> input_recurrent_weights(parameters.data()+4*neurons_number+4*inputs_number*neurons_number+neurons_number*neurons_number, neurons_number, neurons_number);
1928 const TensorMap<Tensor<type, 2>> state_recurrent_weights(parameters.data()+4*neurons_number+4*inputs_number*neurons_number+2*neurons_number*neurons_number, neurons_number, neurons_number);
1929 const TensorMap<Tensor<type, 2>> output_recurrent_weights(parameters.data()+4*neurons_number+4*inputs_number*neurons_number+3*neurons_number*neurons_number, neurons_number, neurons_number);
1930
1931 const Index samples_number = inputs.dimension(0);
1932
1933 Tensor<type, 1> forget_combinations(neurons_number);
1934 Tensor<type, 1> input_combinations(neurons_number);
1935 Tensor<type, 1> state_combinations(neurons_number);
1936 Tensor<type, 1> output_combinations(neurons_number);
1937
1938 Tensor<type, 1> forget_activations(neurons_number);
1939 Tensor<type, 1> input_activations(neurons_number);
1940 Tensor<type, 1> state_activations(neurons_number);
1941 Tensor<type, 1> output_activations(neurons_number);
1942
1943 Tensor<type, 1> forget_activations_derivatives(neurons_number);
1944 Tensor<type, 1> input_activations_derivatives(neurons_number);
1945 Tensor<type, 1> state_activations_derivatives(neurons_number);
1946 Tensor<type, 1> output_activations_derivatives(neurons_number);
1947
1948 Tensor<type, 1> hidden_states_derivatives(neurons_number);
1949
1950 Index copy_index = 0;
1951
1952 for(Index i = 0; i < samples_number; i++)
1953 {
1954 if(i%timesteps == 0)
1955 {
1956 hidden_states.setZero();
1957 cell_states.setZero();
1958 }
1959
1960 const Tensor<type, 1> current_inputs = inputs.chip(i,0);
1961
1962 calculate_combinations(current_inputs, forget_weights, forget_recurrent_weights, forget_biases, forget_combinations);
1963 calculate_recurrent_activations_derivatives(forget_combinations, forget_activations, forget_activations_derivatives);
1964
1965 calculate_combinations(current_inputs, input_weights, input_recurrent_weights, input_biases, input_combinations);
1966 calculate_recurrent_activations_derivatives(input_combinations, input_activations, input_activations_derivatives);
1967
1968 calculate_combinations(current_inputs, state_weights, state_recurrent_weights, state_biases, state_combinations);
1969 calculate_recurrent_activations_derivatives(state_combinations, state_activations, state_activations_derivatives);
1970
1971 calculate_combinations(current_inputs, output_weights, output_recurrent_weights, output_biases, output_combinations);
1972 calculate_recurrent_activations_derivatives(output_combinations, output_activations, output_activations_derivatives);
1973
1974 cell_states = forget_activations * cell_states + input_activations * state_activations;
1975 calculate_activations_derivatives(cell_states, hidden_states, hidden_states_derivatives);
1976
1977 hidden_states *= output_activations;
1978
1979 // Activations 2d
1980
1981 for(Index j = 0; j < neurons_number; j++) long_short_term_memory_layer_forward_propagation->activations(i,j) = hidden_states(j);
1982
1983 // Forget (activations and activations derivatives)
1984
1985 memcpy(long_short_term_memory_layer_forward_propagation->forget_activations.data() + copy_index,
1986 long_short_term_memory_layer_forward_propagation->current_forget_activations.data(),
1987 static_cast<size_t>(neurons_number)*sizeof(type));
1988
1989 memcpy(long_short_term_memory_layer_forward_propagation->forget_activations_derivatives.data() + copy_index,
1990 long_short_term_memory_layer_forward_propagation->current_forget_activations_derivatives.data(),
1991 static_cast<size_t>(neurons_number)*sizeof(type));
1992
1993 // Input (activations and activations derivatives)
1994
1995 memcpy(long_short_term_memory_layer_forward_propagation->input_activations.data() + copy_index,
1996 long_short_term_memory_layer_forward_propagation->current_input_activations.data(),
1997 static_cast<size_t>(neurons_number)*sizeof(type));
1998
1999 memcpy(long_short_term_memory_layer_forward_propagation->input_activations_derivatives.data() + copy_index,
2000 long_short_term_memory_layer_forward_propagation->current_input_activations_derivatives.data(),
2001 static_cast<size_t>(neurons_number)*sizeof(type));
2002
2003 // State (activations and activations derivatives)
2004
2005 memcpy(long_short_term_memory_layer_forward_propagation->state_activations.data() + copy_index,
2006 long_short_term_memory_layer_forward_propagation->current_state_activations.data(),
2007 static_cast<size_t>(neurons_number)*sizeof(type));
2008
2009 memcpy(long_short_term_memory_layer_forward_propagation->state_activations_derivatives.data() + copy_index,
2010 long_short_term_memory_layer_forward_propagation->current_state_activations_derivatives.data(),
2011 static_cast<size_t>(neurons_number)*sizeof(type));
2012
2013 // Output (activations and activations derivatives)
2014
2015 memcpy(long_short_term_memory_layer_forward_propagation->output_activations.data() + copy_index,
2016 long_short_term_memory_layer_forward_propagation->current_output_activations.data(),
2017 static_cast<size_t>(neurons_number)*sizeof(type));
2018
2019 memcpy(long_short_term_memory_layer_forward_propagation->output_activations_derivatives.data() + copy_index,
2020 long_short_term_memory_layer_forward_propagation->current_output_activations_derivatives.data(),
2021 static_cast<size_t>(neurons_number)*sizeof(type));
2022
2023 // Cell states (activations)
2024
2025 memcpy(long_short_term_memory_layer_forward_propagation->cell_states_activations.data() + copy_index,
2026 cell_states.data(),
2027 static_cast<size_t>(neurons_number)*sizeof(type));
2028
2029 // Hidden states (activations and activations derivatives)
2030
2031 memcpy(long_short_term_memory_layer_forward_propagation->hidden_states_activations.data() + copy_index,
2032 hidden_states.data(),
2033 static_cast<size_t>(neurons_number)*sizeof(type));
2034
2035 memcpy(long_short_term_memory_layer_forward_propagation->hidden_states_activations_derivatives.data() + copy_index,
2036 long_short_term_memory_layer_forward_propagation->current_hidden_states_derivatives.data(),
2037 static_cast<size_t>(neurons_number)*sizeof(type));
2038
2039 copy_index += neurons_number;
2040 }
2041}
2042
2043
2044void LongShortTermMemoryLayer::insert_gradient(LayerBackPropagation* back_propagation,
2045 const Index& index,
2046 Tensor<type, 1>& gradient) const
2047{
2048 const Index inputs_number = get_inputs_number();
2049 const Index neurons_number = get_neurons_number();
2050
2051 LongShortTermMemoryLayerBackPropagation* long_short_term_memory_layer_back_propagation =
2052 static_cast<LongShortTermMemoryLayerBackPropagation*>(back_propagation);
2053
2054 // Biases
2055
2056 memcpy(gradient.data() + index,
2057 long_short_term_memory_layer_back_propagation->forget_biases_derivatives.data(),
2058 static_cast<size_t>(neurons_number)*sizeof(type));
2059
2060 memcpy(gradient.data() + index + neurons_number,
2061 long_short_term_memory_layer_back_propagation->input_biases_derivatives.data(),
2062 static_cast<size_t>(neurons_number)*sizeof(type));
2063
2064 memcpy(gradient.data() + index + 2*neurons_number,
2065 long_short_term_memory_layer_back_propagation->state_biases_derivatives.data(),
2066 static_cast<size_t>(neurons_number)*sizeof(type));
2067
2068 memcpy(gradient.data() + index + 3*neurons_number,
2069 long_short_term_memory_layer_back_propagation->output_biases_derivatives.data(),
2070 static_cast<size_t>(neurons_number)*sizeof(type));
2071
2072 // Weights
2073
2074 memcpy(gradient.data() + index + 4*neurons_number,
2075 long_short_term_memory_layer_back_propagation->forget_weights_derivatives.data(),
2076 static_cast<size_t>(inputs_number*neurons_number)*sizeof(type));
2077
2078 memcpy(gradient.data() + index + 4*neurons_number + inputs_number*neurons_number,
2079 long_short_term_memory_layer_back_propagation->input_weights_derivatives.data(),
2080 static_cast<size_t>(inputs_number*neurons_number)*sizeof(type));
2081
2082 memcpy(gradient.data() + index + 4*neurons_number + 2*inputs_number*neurons_number,
2083 long_short_term_memory_layer_back_propagation->state_weights_derivatives.data(),
2084 static_cast<size_t>(inputs_number*neurons_number)*sizeof(type));
2085
2086 memcpy(gradient.data() + index + 4*neurons_number + 3*inputs_number*neurons_number,
2087 long_short_term_memory_layer_back_propagation->output_weights_derivatives.data(),
2088 static_cast<size_t>(inputs_number*neurons_number)*sizeof(type));
2089
2090 // Recurrent weights
2091
2092 memcpy(gradient.data() + index + 4*neurons_number + 4*inputs_number*neurons_number,
2093 long_short_term_memory_layer_back_propagation->forget_recurrent_weights_derivatives.data(),
2094 static_cast<size_t>(neurons_number*neurons_number)*sizeof(type));
2095
2096 memcpy(gradient.data() + index + 4*neurons_number + 4*inputs_number*neurons_number + neurons_number*neurons_number,
2097 long_short_term_memory_layer_back_propagation->input_recurrent_weights_derivatives.data(),
2098 static_cast<size_t>(neurons_number*neurons_number)*sizeof(type));
2099
2100 memcpy(gradient.data() + index + 4*neurons_number + 4*inputs_number*neurons_number + 2*neurons_number*neurons_number,
2101 long_short_term_memory_layer_back_propagation->state_recurrent_weights_derivatives.data(),
2102 static_cast<size_t>(neurons_number*neurons_number)*sizeof(type));
2103
2104 memcpy(gradient.data() + index + 4*neurons_number + 4*inputs_number*neurons_number + 3*neurons_number*neurons_number,
2105 long_short_term_memory_layer_back_propagation->output_recurrent_weights_derivatives.data(),
2106 static_cast<size_t>(neurons_number*neurons_number)*sizeof(type));
2107}
2108
2109
2110void LongShortTermMemoryLayer::calculate_error_gradient(const Tensor<type, 2>& inputs,
2111 LayerForwardPropagation* forward_propagation,
2112 LayerBackPropagation* back_propagation) const
2113{
2114 LongShortTermMemoryLayerForwardPropagation* long_short_term_memory_layer_forward_propagation =
2115 static_cast<LongShortTermMemoryLayerForwardPropagation*>(forward_propagation);
2116
2117 LongShortTermMemoryLayerBackPropagation* long_short_term_memory_layer_back_propagation =
2118 static_cast<LongShortTermMemoryLayerBackPropagation*>(back_propagation);
2119
2120//#pragma omp parallel
2121 {
2122 // Biases
2123
2124 calculate_forget_biases_error_gradient(inputs,
2125 long_short_term_memory_layer_forward_propagation,
2126 long_short_term_memory_layer_back_propagation);
2127
2128 calculate_input_biases_error_gradient(inputs,
2129 long_short_term_memory_layer_forward_propagation,
2130 long_short_term_memory_layer_back_propagation);
2131
2132 calculate_state_biases_error_gradient(inputs,
2133 long_short_term_memory_layer_forward_propagation,
2134 long_short_term_memory_layer_back_propagation);
2135
2136 calculate_output_biases_error_gradient(inputs,
2137 long_short_term_memory_layer_forward_propagation,
2138 long_short_term_memory_layer_back_propagation);
2139
2140 // Weights
2141
2142 calculate_forget_weights_error_gradient(inputs,
2143 long_short_term_memory_layer_forward_propagation,
2144 long_short_term_memory_layer_back_propagation);
2145
2146 calculate_input_weights_error_gradient(inputs,
2147 long_short_term_memory_layer_forward_propagation,
2148 long_short_term_memory_layer_back_propagation);
2149
2150 calculate_state_weights_error_gradient(inputs,
2151 long_short_term_memory_layer_forward_propagation,
2152 long_short_term_memory_layer_back_propagation);
2153
2154 calculate_output_weights_error_gradient(inputs,
2155 long_short_term_memory_layer_forward_propagation,
2156 long_short_term_memory_layer_back_propagation);
2157
2158 // Recurrent weights
2159
2160 calculate_forget_recurrent_weights_error_gradient(inputs,
2161 long_short_term_memory_layer_forward_propagation,
2162 long_short_term_memory_layer_back_propagation);
2163
2164 calculate_input_recurrent_weights_error_gradient(inputs,
2165 long_short_term_memory_layer_forward_propagation,
2166 long_short_term_memory_layer_back_propagation);
2167
2168 calculate_state_recurrent_weights_error_gradient(inputs,
2169 long_short_term_memory_layer_forward_propagation,
2170 long_short_term_memory_layer_back_propagation);
2171
2172 calculate_output_recurrent_weights_error_gradient(inputs,
2173 long_short_term_memory_layer_forward_propagation,
2174 long_short_term_memory_layer_back_propagation);
2175 }
2176}
2177
2178
2179void LongShortTermMemoryLayer::calculate_forget_weights_error_gradient(const Tensor<type, 2>& inputs,
2180 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
2181 LongShortTermMemoryLayerBackPropagation* back_propagation) const
2182{
2183 const Index samples_number = inputs.dimension(0);
2184 const Index inputs_number = get_inputs_number();
2185 const Index neurons_number = get_neurons_number();
2186 const Index parameters_number = inputs_number*neurons_number;
2187
2188 Tensor<type, 2> input_combinations_weights_derivatives(parameters_number, neurons_number);
2189 Tensor<type, 2> forget_combinations_weights_derivatives(parameters_number, neurons_number);
2190 Tensor<type, 2> state_combinations_weights_derivatives(parameters_number, neurons_number);
2191 Tensor<type, 2> output_combinations_weights_derivatives(parameters_number, neurons_number);
2192
2193 Tensor<type, 2> hidden_states_weights_derivatives(parameters_number, neurons_number);
2194 Tensor<type, 2> cell_state_weights_derivatives(parameters_number, neurons_number);
2195
2196 input_combinations_weights_derivatives.setZero();
2197 forget_combinations_weights_derivatives.setZero();
2198 state_combinations_weights_derivatives.setZero();
2199 output_combinations_weights_derivatives.setZero();
2200 hidden_states_weights_derivatives.setZero();
2201 cell_state_weights_derivatives.setZero();
2202
2203 Index column_index = 0;
2204 Index input_index = 0;
2205
2206 Index copy_index = 0;
2207
2208 back_propagation->forget_weights_derivatives.setZero();
2209
2210 for(Index sample = 0; sample < samples_number; sample++)
2211 {
2212 const Tensor<type, 1> current_inputs = inputs.chip(sample, 0); // memcpy?
2213 const Tensor<type, 1> current_layer_deltas = back_propagation->delta.chip(sample,0); // memcpy?
2214
2215 // Forget activations and derivatives
2216
2217 memcpy(forward_propagation->current_forget_activations.data(),
2218 forward_propagation->forget_activations.data()+copy_index,
2219 static_cast<size_t>(neurons_number)*sizeof(type));
2220
2221 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
2222 forward_propagation->forget_activations_derivatives.data()+copy_index,
2223 static_cast<size_t>(neurons_number)*sizeof(type));
2224
2225 // Input activations and derivatives
2226
2227 memcpy(forward_propagation->current_input_activations.data(),
2228 forward_propagation->input_activations.data()+copy_index,
2229 static_cast<size_t>(neurons_number)*sizeof(type));
2230 memcpy(forward_propagation->current_input_activations_derivatives.data(),
2231 forward_propagation->input_activations_derivatives.data()+copy_index,
2232 static_cast<size_t>(neurons_number)*sizeof(type));
2233
2234 // State activations and derivatives
2235
2236 memcpy(forward_propagation->current_state_activations.data(),
2237 forward_propagation->state_activations.data()+copy_index,
2238 static_cast<size_t>(neurons_number)*sizeof(type));
2239 memcpy(forward_propagation->current_state_activations_derivatives.data(),
2240 forward_propagation->state_activations_derivatives.data()+copy_index,
2241 static_cast<size_t>(neurons_number)*sizeof(type));
2242
2243 // Output activations and derivatives
2244
2245 memcpy(forward_propagation->current_output_activations.data(),
2246 forward_propagation->output_activations.data()+copy_index,
2247 static_cast<size_t>(neurons_number)*sizeof(type));
2248 memcpy(forward_propagation->current_output_activations_derivatives.data(),
2249 forward_propagation->output_activations_derivatives.data()+copy_index,
2250 static_cast<size_t>(neurons_number)*sizeof(type));
2251
2252 // Cell states and hidden states
2253
2254 memcpy(forward_propagation->current_cell_state_activations.data(),
2255 forward_propagation->cell_states_activations.data()+copy_index,
2256 static_cast<size_t>(neurons_number)*sizeof(type));
2257
2258 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
2259 forward_propagation->hidden_states_activations_derivatives.data()+copy_index,
2260 static_cast<size_t>(neurons_number)*sizeof(type));
2261
2262 if(sample%timesteps == 0)
2263 {
2264 forward_propagation->previous_cell_state_activations.setZero();
2265
2266 forget_combinations_weights_derivatives.setZero();
2267 input_combinations_weights_derivatives.setZero();
2268 output_combinations_weights_derivatives.setZero();
2269 state_combinations_weights_derivatives.setZero();
2270
2271 cell_state_weights_derivatives.setZero();
2272 }
2273 else
2274 {
2275 memcpy(forward_propagation->previous_cell_state_activations.data(),
2276 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
2277 static_cast<size_t>(neurons_number)*sizeof(type));
2278
2279 forget_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(forget_recurrent_weights, A_B);
2280
2281 input_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(input_recurrent_weights, A_B);
2282 multiply_rows(input_combinations_weights_derivatives, forward_propagation->current_input_activations_derivatives);
2283
2284 state_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(state_recurrent_weights, A_B);
2285 multiply_rows(state_combinations_weights_derivatives, forward_propagation->current_state_activations_derivatives);
2286
2287 output_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(output_recurrent_weights, A_B);
2288 multiply_rows(output_combinations_weights_derivatives, forward_propagation->current_output_activations_derivatives);
2289 }
2290
2291 column_index = 0;
2292 input_index = 0;
2293
2294 for(Index i = 0; i < parameters_number; i++)
2295 {
2296 forget_combinations_weights_derivatives(i, column_index) += current_inputs(input_index);
2297
2298 input_index++;
2299
2300 if(input_index == inputs_number)
2301 {
2302 input_index = 0;
2303 column_index++;
2304 }
2305 }
2306
2307 multiply_rows(cell_state_weights_derivatives,
2308 forward_propagation->current_forget_activations);
2309
2310 multiply_rows(input_combinations_weights_derivatives,
2311 forward_propagation->current_state_activations);
2312
2313 cell_state_weights_derivatives += input_combinations_weights_derivatives;
2314
2315 multiply_rows(state_combinations_weights_derivatives,
2316 forward_propagation->current_input_activations);
2317
2318 cell_state_weights_derivatives += state_combinations_weights_derivatives;
2319
2320 multiply_rows(forget_combinations_weights_derivatives,
2321 forward_propagation->current_forget_activations_derivatives*forward_propagation->previous_cell_state_activations);
2322
2323 cell_state_weights_derivatives += forget_combinations_weights_derivatives;
2324
2325
2326 memcpy(hidden_states_weights_derivatives.data(),
2327 cell_state_weights_derivatives.data(),
2328 static_cast<size_t>(cell_state_weights_derivatives.size())*sizeof(type));
2329
2330 multiply_rows(hidden_states_weights_derivatives,
2331 forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
2332 multiply_rows(output_combinations_weights_derivatives,
2333 calculate_activations(forward_propagation->current_cell_state_activations));
2334 hidden_states_weights_derivatives += output_combinations_weights_derivatives;
2335
2336 back_propagation->forget_weights_derivatives += hidden_states_weights_derivatives.contract(current_layer_deltas, A_B);
2337
2338 copy_index += neurons_number;
2339 }
2340}
2341
2342
2343void LongShortTermMemoryLayer::calculate_input_weights_error_gradient(const Tensor<type, 2>& inputs,
2344 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
2345 LongShortTermMemoryLayerBackPropagation* back_propagation) const
2346{
2347 const Index samples_number = inputs.dimension(0);
2348 const Index inputs_number = get_inputs_number();
2349 const Index neurons_number = get_neurons_number();
2350 const Index parameters_number = inputs_number*neurons_number;
2351
2352 Tensor<type, 2> input_combinations_weights_derivatives(parameters_number, neurons_number);
2353 Tensor<type, 2> forget_combinations_weights_derivatives(parameters_number, neurons_number);
2354 Tensor<type, 2> state_combinations_weights_derivatives(parameters_number, neurons_number);
2355 Tensor<type, 2> output_combinations_weights_derivatives(parameters_number, neurons_number);
2356
2357 Tensor<type, 2> hidden_states_weights_derivatives(parameters_number, neurons_number);
2358 Tensor<type, 2> cell_state_weights_derivatives(parameters_number, neurons_number);
2359
2360 input_combinations_weights_derivatives.setZero();
2361 forget_combinations_weights_derivatives.setZero();
2362 state_combinations_weights_derivatives.setZero();
2363 output_combinations_weights_derivatives.setZero();
2364 hidden_states_weights_derivatives.setZero();
2365 cell_state_weights_derivatives.setZero();
2366
2367 Index column_index = 0;
2368 Index input_index = 0;
2369
2370 Index copy_index = 0;
2371
2372 back_propagation->input_weights_derivatives.setZero();
2373
2374 for(Index sample = 0; sample < samples_number; sample++)
2375 {
2376 forward_propagation->current_inputs = inputs.chip(sample, 0); // memcpy?
2377
2378 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample,0); // memcpy?
2379
2380 memcpy(forward_propagation->current_forget_activations.data(),
2381 forward_propagation->forget_activations.data()+copy_index,
2382 static_cast<size_t>(neurons_number)*sizeof(type));
2383
2384 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
2385 forward_propagation->forget_activations_derivatives.data()+copy_index,
2386 static_cast<size_t>(neurons_number)*sizeof(type));
2387
2388 memcpy(forward_propagation->current_input_activations.data(),
2389 forward_propagation->input_activations.data()+copy_index,
2390 static_cast<size_t>(neurons_number)*sizeof(type));
2391
2392 memcpy(forward_propagation->current_input_activations_derivatives.data(),
2393 forward_propagation->input_activations_derivatives.data()+copy_index,
2394 static_cast<size_t>(neurons_number)*sizeof(type));
2395
2396 memcpy(forward_propagation->current_state_activations.data(),
2397 forward_propagation->state_activations.data()+copy_index,
2398 static_cast<size_t>(neurons_number)*sizeof(type));
2399
2400 memcpy(forward_propagation->current_state_activations_derivatives.data(),
2401 forward_propagation->state_activations_derivatives.data()+copy_index,
2402 static_cast<size_t>(neurons_number)*sizeof(type));
2403
2404 memcpy(forward_propagation->current_output_activations.data(),
2405 forward_propagation->output_activations.data()+copy_index,
2406 static_cast<size_t>(neurons_number)*sizeof(type));
2407
2408 memcpy(forward_propagation->current_output_activations_derivatives.data(),
2409 forward_propagation->output_activations_derivatives.data()+copy_index,
2410 static_cast<size_t>(neurons_number)*sizeof(type));
2411
2412 memcpy(forward_propagation->current_cell_state_activations.data(),
2413 forward_propagation->cell_states_activations.data()+copy_index,
2414 static_cast<size_t>(neurons_number)*sizeof(type));
2415
2416 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
2417 forward_propagation->hidden_states_activations_derivatives.data()+copy_index,
2418 static_cast<size_t>(neurons_number)*sizeof(type));
2419
2420 if(sample%timesteps == 0)
2421 {
2422 forward_propagation->previous_cell_state_activations.setZero();
2423
2424 forget_combinations_weights_derivatives.setZero();
2425 input_combinations_weights_derivatives.setZero();
2426 output_combinations_weights_derivatives.setZero();
2427 state_combinations_weights_derivatives.setZero();
2428
2429 cell_state_weights_derivatives.setZero();
2430 hidden_states_weights_derivatives.setZero();
2431 }
2432 else
2433 {
2434 memcpy(forward_propagation->previous_cell_state_activations.data(),
2435 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
2436 static_cast<size_t>(neurons_number)*sizeof(type));
2437
2438 forget_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(forget_recurrent_weights, A_B);
2439
2440 multiply_rows(forget_combinations_weights_derivatives,
2441 forward_propagation->current_forget_activations_derivatives);
2442
2443 input_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(input_recurrent_weights, A_B);
2444
2445 state_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(state_recurrent_weights, A_B);
2446
2447 multiply_rows(state_combinations_weights_derivatives,
2448 forward_propagation->current_state_activations_derivatives);
2449
2450 output_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(output_recurrent_weights, A_B);
2451
2452 multiply_rows(output_combinations_weights_derivatives,
2453 forward_propagation->current_output_activations_derivatives);
2454 }
2455
2456 column_index = 0;
2457 input_index = 0;
2458
2459 for(Index i = 0; i < parameters_number; i++)
2460 {
2461 input_combinations_weights_derivatives(i, column_index) += forward_propagation->current_inputs[input_index];
2462
2463 input_index++;
2464
2465 if(input_index == inputs_number)
2466 {
2467 input_index = 0;
2468 column_index++;
2469 }
2470 }
2471
2472 multiply_rows(cell_state_weights_derivatives,
2473 forward_propagation->current_forget_activations);
2474
2475 multiply_rows(forget_combinations_weights_derivatives,
2476 forward_propagation->previous_cell_state_activations);
2477
2478 cell_state_weights_derivatives += forget_combinations_weights_derivatives;
2479
2480 multiply_rows(state_combinations_weights_derivatives,
2481 forward_propagation->current_input_activations);
2482
2483 cell_state_weights_derivatives += state_combinations_weights_derivatives;
2484
2485 multiply_rows(input_combinations_weights_derivatives,
2486 forward_propagation->current_input_activations_derivatives*forward_propagation->current_state_activations);
2487
2488 cell_state_weights_derivatives += input_combinations_weights_derivatives;
2489
2490 hidden_states_weights_derivatives = cell_state_weights_derivatives;
2491
2492 multiply_rows(hidden_states_weights_derivatives,
2493 forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
2494
2495 multiply_rows(output_combinations_weights_derivatives,
2496 calculate_activations(forward_propagation->current_cell_state_activations));
2497
2498 hidden_states_weights_derivatives += output_combinations_weights_derivatives;
2499
2500 back_propagation->input_weights_derivatives
2501 += hidden_states_weights_derivatives.contract(back_propagation->current_layer_deltas, A_B);
2502
2503 copy_index += neurons_number;
2504 }
2505}
2506
2507
2508void LongShortTermMemoryLayer::calculate_state_weights_error_gradient(const Tensor<type, 2>& inputs,
2509 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
2510 LongShortTermMemoryLayerBackPropagation* back_propagation) const
2511{
2512 const Index samples_number = inputs.dimension(0);
2513 const Index inputs_number = get_inputs_number();
2514 const Index neurons_number = get_neurons_number();
2515 const Index parameters_number = inputs_number*neurons_number;
2516
2517 Tensor<type, 2> input_combinations_weights_derivatives(parameters_number, neurons_number);
2518 Tensor<type, 2> forget_combinations_weights_derivatives(parameters_number, neurons_number);
2519 Tensor<type, 2> state_combinations_weights_derivatives(parameters_number, neurons_number);
2520 Tensor<type, 2> output_combinations_weights_derivatives(parameters_number, neurons_number);
2521
2522 Tensor<type, 2> hidden_states_weights_derivatives(parameters_number, neurons_number);
2523 Tensor<type, 2> cell_state_weights_derivatives(parameters_number, neurons_number);
2524
2525 input_combinations_weights_derivatives.setZero();
2526 forget_combinations_weights_derivatives.setZero();
2527 state_combinations_weights_derivatives.setZero();
2528 output_combinations_weights_derivatives.setZero();
2529 hidden_states_weights_derivatives.setZero();
2530 cell_state_weights_derivatives.setZero();
2531
2532 Index column_index = 0;
2533 Index input_index = 0;
2534
2535 Index copy_index = 0;
2536
2537 back_propagation->state_weights_derivatives.setZero();
2538
2539 for(Index sample = 0; sample < samples_number; sample++)
2540 {
2541 forward_propagation->current_inputs = inputs.chip(sample, 0); // memcpy?
2542
2543 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample,0); // memcpy?
2544
2545 memcpy(forward_propagation->current_forget_activations.data(),
2546 forward_propagation->forget_activations.data()+copy_index,
2547 static_cast<size_t>(neurons_number)*sizeof(type));
2548
2549 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
2550 forward_propagation->forget_activations_derivatives.data()+copy_index,
2551 static_cast<size_t>(neurons_number)*sizeof(type));
2552
2553 memcpy(forward_propagation->current_input_activations.data(),
2554 forward_propagation->input_activations.data()+copy_index,
2555 static_cast<size_t>(neurons_number)*sizeof(type));
2556
2557 memcpy(forward_propagation->current_input_activations_derivatives.data(),
2558 forward_propagation->input_activations_derivatives.data()+copy_index,
2559 static_cast<size_t>(neurons_number)*sizeof(type));
2560
2561 memcpy(forward_propagation->current_state_activations.data(),
2562 forward_propagation->state_activations.data()+copy_index,
2563 static_cast<size_t>(neurons_number)*sizeof(type));
2564
2565 memcpy(forward_propagation->current_state_activations_derivatives.data(),
2566 forward_propagation->state_activations_derivatives.data()+copy_index,
2567 static_cast<size_t>(neurons_number)*sizeof(type));
2568
2569 memcpy(forward_propagation->current_output_activations.data(),
2570 forward_propagation->output_activations.data()+copy_index,
2571 static_cast<size_t>(neurons_number)*sizeof(type));
2572
2573 memcpy(forward_propagation->current_output_activations_derivatives.data(),
2574 forward_propagation->output_activations_derivatives.data()+copy_index,
2575 static_cast<size_t>(neurons_number)*sizeof(type));
2576
2577 memcpy(forward_propagation->current_cell_state_activations.data(),
2578 forward_propagation->cell_states_activations.data()+copy_index,
2579 static_cast<size_t>(neurons_number)*sizeof(type));
2580
2581 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
2582 forward_propagation->hidden_states_activations_derivatives.data()+copy_index,
2583 static_cast<size_t>(neurons_number)*sizeof(type));
2584
2585 if(sample%timesteps == 0)
2586 {
2587 forward_propagation->previous_cell_state_activations.setZero();
2588
2589 forget_combinations_weights_derivatives.setZero();
2590 input_combinations_weights_derivatives.setZero();
2591 output_combinations_weights_derivatives.setZero();
2592 state_combinations_weights_derivatives.setZero();
2593
2594 cell_state_weights_derivatives.setZero();
2595 }
2596 else
2597 {
2598 memcpy(forward_propagation->previous_cell_state_activations.data(),
2599 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
2600 static_cast<size_t>(neurons_number)*sizeof(type));
2601
2602 forget_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(forget_recurrent_weights, A_B);
2603 multiply_rows(forget_combinations_weights_derivatives, forward_propagation->current_forget_activations_derivatives);
2604
2605 input_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(input_recurrent_weights, A_B);
2606 multiply_rows(input_combinations_weights_derivatives, forward_propagation->current_input_activations_derivatives);
2607
2608 state_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(state_recurrent_weights, A_B);
2609
2610 output_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(output_recurrent_weights, A_B);
2611 multiply_rows(output_combinations_weights_derivatives, forward_propagation->current_output_activations_derivatives);
2612 }
2613
2614 column_index = 0;
2615 input_index = 0;
2616
2617 for(Index i = 0; i < parameters_number; i++)
2618 {
2619 state_combinations_weights_derivatives(i, column_index) += forward_propagation->current_inputs[input_index];
2620
2621 input_index++;
2622
2623 if(input_index == inputs_number)
2624 {
2625 input_index = 0;
2626 column_index++;
2627 }
2628 }
2629
2630 multiply_rows(cell_state_weights_derivatives, forward_propagation->current_forget_activations);
2631 multiply_rows(forget_combinations_weights_derivatives, forward_propagation->previous_cell_state_activations);
2632 cell_state_weights_derivatives += forget_combinations_weights_derivatives;
2633 multiply_rows(input_combinations_weights_derivatives, forward_propagation->current_state_activations);
2634 cell_state_weights_derivatives += input_combinations_weights_derivatives;
2635 multiply_rows(state_combinations_weights_derivatives, (forward_propagation->current_state_activations_derivatives*forward_propagation->current_input_activations));
2636 cell_state_weights_derivatives += state_combinations_weights_derivatives;
2637
2638 hidden_states_weights_derivatives = cell_state_weights_derivatives;
2639 multiply_rows(hidden_states_weights_derivatives, forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
2640 multiply_rows(output_combinations_weights_derivatives, calculate_activations(forward_propagation->current_cell_state_activations));
2641 hidden_states_weights_derivatives += output_combinations_weights_derivatives;
2642
2643 back_propagation->state_weights_derivatives += hidden_states_weights_derivatives.contract(back_propagation->current_layer_deltas, A_B);
2644
2645 copy_index += neurons_number;
2646 }
2647}
2648
2649
2650void LongShortTermMemoryLayer::calculate_output_weights_error_gradient(const Tensor<type, 2>& inputs,
2651 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
2652 LongShortTermMemoryLayerBackPropagation* back_propagation) const
2653{
2654 const Index samples_number = inputs.dimension(0);
2655 const Index inputs_number = get_inputs_number();
2656 const Index neurons_number = get_neurons_number();
2657 const Index parameters_number = inputs_number*neurons_number;
2658
2659 Tensor<type, 2> input_combinations_weights_derivatives(parameters_number, neurons_number);
2660 Tensor<type, 2> forget_combinations_weights_derivatives(parameters_number, neurons_number);
2661 Tensor<type, 2> state_combinations_weights_derivatives(parameters_number, neurons_number);
2662 Tensor<type, 2> output_combinations_weights_derivatives(parameters_number, neurons_number);
2663
2664 Tensor<type, 2> hidden_states_weights_derivatives(parameters_number, neurons_number);
2665 Tensor<type, 2> cell_state_weights_derivatives(parameters_number, neurons_number);
2666
2667 input_combinations_weights_derivatives.setZero();
2668 forget_combinations_weights_derivatives.setZero();
2669 state_combinations_weights_derivatives.setZero();
2670 output_combinations_weights_derivatives.setZero();
2671 hidden_states_weights_derivatives.setZero();
2672 cell_state_weights_derivatives.setZero();
2673
2674 Index column_index = 0;
2675 Index input_index = 0;
2676
2677 Index copy_index = 0;
2678
2679 back_propagation->output_weights_derivatives.setZero();
2680
2681 for(Index sample = 0; sample < samples_number; sample++)
2682 {
2683 forward_propagation->current_inputs = inputs.chip(sample, 0); // memcpy?
2684
2685 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample,0); // memcpy?
2686
2687 memcpy(forward_propagation->current_forget_activations.data(),
2688 forward_propagation->forget_activations.data()+copy_index,
2689 static_cast<size_t>(neurons_number)*sizeof(type));
2690
2691 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
2692 forward_propagation->forget_activations_derivatives.data()+copy_index,
2693 static_cast<size_t>(neurons_number)*sizeof(type));
2694
2695 memcpy(forward_propagation->current_input_activations.data(),
2696 forward_propagation->input_activations.data()+copy_index,
2697 static_cast<size_t>(neurons_number)*sizeof(type));
2698
2699 memcpy(forward_propagation->current_input_activations_derivatives.data(),
2700 forward_propagation->input_activations_derivatives.data()+copy_index,
2701 static_cast<size_t>(neurons_number)*sizeof(type));
2702
2703 memcpy(forward_propagation->current_state_activations.data(),
2704 forward_propagation->state_activations.data()+copy_index,
2705 static_cast<size_t>(neurons_number)*sizeof(type));
2706
2707 memcpy(forward_propagation->current_state_activations_derivatives.data(),
2708 forward_propagation->state_activations_derivatives.data()+copy_index,
2709 static_cast<size_t>(neurons_number)*sizeof(type));
2710
2711 memcpy(forward_propagation->current_output_activations.data(),
2712 forward_propagation->output_activations.data()+copy_index,
2713 static_cast<size_t>(neurons_number)*sizeof(type));
2714
2715 memcpy(forward_propagation->current_output_activations_derivatives.data(),
2716 forward_propagation->output_activations_derivatives.data()+copy_index,
2717 static_cast<size_t>(neurons_number)*sizeof(type));
2718
2719 memcpy(forward_propagation->current_cell_state_activations.data(),
2720 forward_propagation->cell_states_activations.data()+copy_index,
2721 static_cast<size_t>(neurons_number)*sizeof(type));
2722
2723 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
2724 forward_propagation->hidden_states_activations_derivatives.data()+copy_index,
2725 static_cast<size_t>(neurons_number)*sizeof(type));
2726
2727 if(sample%timesteps == 0)
2728 {
2729 forward_propagation->previous_cell_state_activations.setZero();
2730
2731 forget_combinations_weights_derivatives.setZero();
2732 input_combinations_weights_derivatives.setZero();
2733 output_combinations_weights_derivatives.setZero();
2734 state_combinations_weights_derivatives.setZero();
2735
2736 cell_state_weights_derivatives.setZero();
2737 }
2738 else
2739 {
2740 memcpy(forward_propagation->previous_cell_state_activations.data(),
2741 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
2742 static_cast<size_t>(neurons_number)*sizeof(type));
2743
2744 forget_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(forget_recurrent_weights, A_B);
2745 multiply_rows(forget_combinations_weights_derivatives, forward_propagation->current_forget_activations_derivatives);
2746 input_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(input_recurrent_weights, A_B);
2747 multiply_rows(input_combinations_weights_derivatives, forward_propagation->current_input_activations_derivatives);
2748 state_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(state_recurrent_weights, A_B);
2749 multiply_rows(state_combinations_weights_derivatives, forward_propagation->current_state_activations_derivatives);
2750 output_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(output_recurrent_weights, A_B);
2751 }
2752
2753 column_index = 0;
2754 input_index = 0;
2755
2756 for(Index i = 0; i < parameters_number; i++)
2757 {
2758 output_combinations_weights_derivatives(i, column_index) += forward_propagation->current_inputs[input_index];
2759
2760 input_index++;
2761
2762 if(input_index == inputs_number)
2763 {
2764 input_index = 0;
2765 column_index++;
2766 }
2767 }
2768
2769 multiply_rows(cell_state_weights_derivatives, forward_propagation->current_forget_activations);
2770 multiply_rows(forget_combinations_weights_derivatives, forward_propagation->previous_cell_state_activations);
2771 cell_state_weights_derivatives += forget_combinations_weights_derivatives;
2772 multiply_rows(state_combinations_weights_derivatives, forward_propagation->current_input_activations);
2773 cell_state_weights_derivatives += state_combinations_weights_derivatives;
2774 multiply_rows(input_combinations_weights_derivatives, forward_propagation->current_state_activations);
2775 cell_state_weights_derivatives += input_combinations_weights_derivatives;
2776
2777 hidden_states_weights_derivatives = cell_state_weights_derivatives;
2778 multiply_rows(hidden_states_weights_derivatives, forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
2779 multiply_rows(output_combinations_weights_derivatives, forward_propagation->current_output_activations_derivatives*calculate_activations(forward_propagation->current_cell_state_activations));
2780 hidden_states_weights_derivatives += output_combinations_weights_derivatives;
2781
2782 back_propagation->output_weights_derivatives += hidden_states_weights_derivatives.contract(back_propagation->current_layer_deltas, A_B);
2783
2784 copy_index += neurons_number;
2785 }
2786}
2787
2788
2789void LongShortTermMemoryLayer::calculate_forget_recurrent_weights_error_gradient(const Tensor<type, 2>& inputs,
2790 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
2791 LongShortTermMemoryLayerBackPropagation* back_propagation) const
2792{
2793 const Index samples_number = inputs.dimension(0);
2794 const Index neurons_number = get_neurons_number();
2795 const Index parameters_number = neurons_number*neurons_number;
2796
2797 Tensor<type, 1> forget_recurrent_weights_error_gradient(parameters_number);
2798 forget_recurrent_weights_error_gradient.setZero();
2799
2800 Tensor<type, 2> input_combinations_recurrent_weights_derivatives(parameters_number, neurons_number);
2801 Tensor<type, 2> forget_combinations_recurrent_weights_derivatives(parameters_number, neurons_number);
2802 Tensor<type, 2> state_combinations_recurrent_weights_derivatives(parameters_number, neurons_number);
2803 Tensor<type, 2> output_combinations_recurrent_weights_derivatives(parameters_number, neurons_number);
2804
2805 Tensor<type, 2> hidden_states_recurrent_weights_derivatives(parameters_number, neurons_number);
2806 Tensor<type, 2> cell_state_recurrent_weights_derivatives(parameters_number, neurons_number);
2807
2808 Index column_index = 0;
2809 Index activation_index = 0;
2810
2811 Index copy_index = 0;
2812
2813 back_propagation->forget_recurrent_weights_derivatives.setZero();
2814
2815 for(Index sample = 0; sample < samples_number; sample++)
2816 {
2817 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample, 0);
2818
2819 memcpy(forward_propagation->current_forget_activations.data(),
2820 forward_propagation->forget_activations.data() + copy_index, static_cast<size_t>(neurons_number)*sizeof(type));
2821
2822 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
2823 forward_propagation->forget_activations_derivatives.data() + copy_index,
2824 static_cast<size_t>(neurons_number)*sizeof(type));
2825
2826 memcpy(forward_propagation->current_input_activations.data(),
2827 forward_propagation->input_activations.data() + copy_index,
2828 static_cast<size_t>(neurons_number)*sizeof(type));
2829
2830 memcpy(forward_propagation->current_input_activations_derivatives.data(),
2831 forward_propagation->input_activations_derivatives.data() + copy_index,
2832 static_cast<size_t>(neurons_number)*sizeof(type));
2833
2834 memcpy(forward_propagation->current_state_activations.data(),
2835 forward_propagation->state_activations.data() + copy_index,
2836 static_cast<size_t>(neurons_number)*sizeof(type));
2837
2838 memcpy(forward_propagation->current_state_activations_derivatives.data(),
2839 forward_propagation->state_activations_derivatives.data() + copy_index,
2840 static_cast<size_t>(neurons_number)*sizeof(type));
2841
2842 memcpy(forward_propagation->current_output_activations.data(),
2843 forward_propagation->output_activations.data() + copy_index,
2844 static_cast<size_t>(neurons_number)*sizeof(type));
2845
2846 memcpy(forward_propagation->current_output_activations_derivatives.data(),
2847 forward_propagation->output_activations_derivatives.data() + copy_index,
2848 static_cast<size_t>(neurons_number)*sizeof(type));
2849
2850 memcpy(forward_propagation->current_cell_state_activations.data(),
2851 forward_propagation->cell_states_activations.data() + copy_index,
2852 static_cast<size_t>(neurons_number)*sizeof(type));
2853
2854 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
2855 forward_propagation->hidden_states_activations_derivatives.data() + copy_index,
2856 static_cast<size_t>(neurons_number)*sizeof(type));
2857
2858 if(sample%timesteps == 0)
2859 {
2860 cell_state_recurrent_weights_derivatives.setZero();
2861 hidden_states_recurrent_weights_derivatives.setZero();
2862 }
2863 else
2864 {
2865 memcpy(forward_propagation->previous_hidden_state_activations.data(),
2866 forward_propagation->hidden_states_activations.data() + (copy_index-neurons_number),
2867 static_cast<size_t>(neurons_number)*sizeof(type));
2868
2869 memcpy(forward_propagation->previous_cell_state_activations.data(),
2870 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
2871 static_cast<size_t>(neurons_number)*sizeof(type));
2872
2873 forget_combinations_recurrent_weights_derivatives = hidden_states_recurrent_weights_derivatives.contract(forget_recurrent_weights, A_B);
2874 input_combinations_recurrent_weights_derivatives = hidden_states_recurrent_weights_derivatives.contract(input_recurrent_weights, A_B);
2875 multiply_rows(input_combinations_recurrent_weights_derivatives, forward_propagation->current_input_activations_derivatives);
2876 state_combinations_recurrent_weights_derivatives = hidden_states_recurrent_weights_derivatives.contract(state_recurrent_weights, A_B);
2877 multiply_rows(state_combinations_recurrent_weights_derivatives, forward_propagation->current_state_activations_derivatives);
2878 output_combinations_recurrent_weights_derivatives = hidden_states_recurrent_weights_derivatives.contract(output_recurrent_weights, A_B);
2879 multiply_rows(output_combinations_recurrent_weights_derivatives, forward_propagation->current_output_activations_derivatives);
2880
2881 column_index = 0;
2882 activation_index = 0;
2883
2884 for(Index i = 0; i < parameters_number; i++)
2885 {
2886 forget_combinations_recurrent_weights_derivatives(i, column_index) += forward_propagation->previous_hidden_state_activations[activation_index];
2887
2888 activation_index++;
2889
2890 if(activation_index == neurons_number)
2891 {
2892 activation_index = 0;
2893 column_index++;
2894 }
2895 }
2896
2897 multiply_rows(cell_state_recurrent_weights_derivatives, forward_propagation->current_forget_activations);
2898 multiply_rows(input_combinations_recurrent_weights_derivatives, forward_propagation->current_state_activations);
2899 cell_state_recurrent_weights_derivatives += input_combinations_recurrent_weights_derivatives;
2900 multiply_rows(state_combinations_recurrent_weights_derivatives, forward_propagation->current_input_activations);
2901 cell_state_recurrent_weights_derivatives += state_combinations_recurrent_weights_derivatives;
2902 multiply_rows(forget_combinations_recurrent_weights_derivatives, (forward_propagation->current_forget_activations_derivatives*forward_propagation->previous_cell_state_activations));
2903 cell_state_recurrent_weights_derivatives += forget_combinations_recurrent_weights_derivatives;
2904
2905 hidden_states_recurrent_weights_derivatives = cell_state_recurrent_weights_derivatives;
2906 multiply_rows(hidden_states_recurrent_weights_derivatives, forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
2907 multiply_rows(output_combinations_recurrent_weights_derivatives, calculate_activations(forward_propagation->current_cell_state_activations));
2908 hidden_states_recurrent_weights_derivatives += output_combinations_recurrent_weights_derivatives;
2909 }
2910
2911 back_propagation->forget_recurrent_weights_derivatives += hidden_states_recurrent_weights_derivatives.contract(back_propagation->current_layer_deltas, A_B);
2912
2913 copy_index += neurons_number;
2914 }
2915}
2916
2917
2918void LongShortTermMemoryLayer::calculate_input_recurrent_weights_error_gradient(const Tensor<type, 2>& inputs,
2919 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
2920 LongShortTermMemoryLayerBackPropagation* back_propagation) const
2921{
2922 const Index samples_number = inputs.dimension(0);
2923 const Index neurons_number = get_neurons_number();
2924 const Index parameters_number = neurons_number*neurons_number;
2925
2926 Tensor<type, 1> forget_recurrent_weights_error_gradient(parameters_number);
2927 forget_recurrent_weights_error_gradient.setZero();
2928
2929 Tensor<type, 2> input_combinations_recurrent_weights_derivatives(parameters_number, neurons_number);
2930 Tensor<type, 2> forget_combinations_recurrent_weights_derivatives(parameters_number, neurons_number);
2931 Tensor<type, 2> state_combinations_recurrent_weights_derivatives(parameters_number, neurons_number);
2932 Tensor<type, 2> output_combinations_recurrent_weights_derivatives(parameters_number, neurons_number);
2933
2934 Tensor<type, 2> hidden_states_recurrent_weights_derivatives(parameters_number, neurons_number);
2935 Tensor<type, 2> cell_state_recurrent_weights_derivatives(parameters_number, neurons_number);
2936
2937 Index column_index = 0;
2938 Index activation_index = 0;
2939
2940 Index copy_index = 0;
2941
2942 back_propagation->input_recurrent_weights_derivatives.setZero();
2943
2944 for(Index sample = 0; sample < samples_number; sample++)
2945 {
2946 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample, 0);
2947
2948 memcpy(forward_propagation->current_forget_activations.data(),
2949 forward_propagation->forget_activations.data() + copy_index, static_cast<size_t>(neurons_number)*sizeof(type));
2950 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
2951 forward_propagation->forget_activations_derivatives.data() + copy_index,
2952 static_cast<size_t>(neurons_number)*sizeof(type));
2953
2954 memcpy(forward_propagation->current_input_activations.data(),
2955 forward_propagation->input_activations.data() + copy_index,
2956 static_cast<size_t>(neurons_number)*sizeof(type));
2957 memcpy(forward_propagation->current_input_activations_derivatives.data(),
2958 forward_propagation->input_activations_derivatives.data() + copy_index,
2959 static_cast<size_t>(neurons_number)*sizeof(type));
2960
2961 memcpy(forward_propagation->current_state_activations.data(),
2962 forward_propagation->state_activations.data() + copy_index,
2963 static_cast<size_t>(neurons_number)*sizeof(type));
2964 memcpy(forward_propagation->current_state_activations_derivatives.data(),
2965 forward_propagation->state_activations_derivatives.data() + copy_index,
2966 static_cast<size_t>(neurons_number)*sizeof(type));
2967
2968 memcpy(forward_propagation->current_output_activations.data(),
2969 forward_propagation->output_activations.data() + copy_index,
2970 static_cast<size_t>(neurons_number)*sizeof(type));
2971 memcpy(forward_propagation->current_output_activations_derivatives.data(),
2972 forward_propagation->output_activations_derivatives.data() + copy_index,
2973 static_cast<size_t>(neurons_number)*sizeof(type));
2974
2975 memcpy(forward_propagation->current_cell_state_activations.data(),
2976 forward_propagation->cell_states_activations.data() + copy_index,
2977 static_cast<size_t>(neurons_number)*sizeof(type));
2978
2979 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
2980 forward_propagation->hidden_states_activations_derivatives.data() + copy_index,
2981 static_cast<size_t>(neurons_number)*sizeof(type));
2982
2983 if(sample%timesteps == 0)
2984 {
2985 cell_state_recurrent_weights_derivatives.setZero();
2986 hidden_states_recurrent_weights_derivatives.setZero();
2987 }
2988 else
2989 {
2990 memcpy(forward_propagation->previous_hidden_state_activations.data(),
2991 forward_propagation->hidden_states_activations.data() + (copy_index-neurons_number),
2992 static_cast<size_t>(neurons_number)*sizeof(type));
2993
2994 memcpy(forward_propagation->previous_cell_state_activations.data(),
2995 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
2996 static_cast<size_t>(neurons_number)*sizeof(type));
2997
2998 forget_combinations_recurrent_weights_derivatives = hidden_states_recurrent_weights_derivatives.contract(forget_recurrent_weights, A_B);
2999 multiply_rows(forget_combinations_recurrent_weights_derivatives, forward_propagation->current_forget_activations_derivatives);
3000 input_combinations_recurrent_weights_derivatives = hidden_states_recurrent_weights_derivatives.contract(input_recurrent_weights, A_B);
3001 state_combinations_recurrent_weights_derivatives = hidden_states_recurrent_weights_derivatives.contract(state_recurrent_weights, A_B);
3002 multiply_rows(state_combinations_recurrent_weights_derivatives, forward_propagation->current_state_activations_derivatives);
3003 output_combinations_recurrent_weights_derivatives = hidden_states_recurrent_weights_derivatives.contract(output_recurrent_weights, A_B);
3004 multiply_rows(output_combinations_recurrent_weights_derivatives, forward_propagation->current_output_activations_derivatives);
3005
3006 column_index = 0;
3007 activation_index = 0;
3008
3009 for(Index i = 0; i < parameters_number; i++)
3010 {
3011 input_combinations_recurrent_weights_derivatives(i, column_index) += forward_propagation->previous_hidden_state_activations[activation_index];
3012
3013 activation_index++;
3014
3015 if(activation_index == neurons_number)
3016 {
3017 activation_index = 0;
3018 column_index++;
3019 }
3020 }
3021
3022 multiply_rows(cell_state_recurrent_weights_derivatives, forward_propagation->current_forget_activations);
3023 multiply_rows(input_combinations_recurrent_weights_derivatives, forward_propagation->current_input_activations_derivatives*forward_propagation->current_state_activations);
3024 cell_state_recurrent_weights_derivatives += input_combinations_recurrent_weights_derivatives;
3025 multiply_rows(state_combinations_recurrent_weights_derivatives, forward_propagation->current_input_activations);
3026 cell_state_recurrent_weights_derivatives += state_combinations_recurrent_weights_derivatives;
3027 multiply_rows(forget_combinations_recurrent_weights_derivatives, forward_propagation->previous_cell_state_activations);
3028 cell_state_recurrent_weights_derivatives += forget_combinations_recurrent_weights_derivatives;
3029
3030 hidden_states_recurrent_weights_derivatives = cell_state_recurrent_weights_derivatives;
3031 multiply_rows(hidden_states_recurrent_weights_derivatives, forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
3032 multiply_rows(output_combinations_recurrent_weights_derivatives, calculate_activations(forward_propagation->current_cell_state_activations));
3033 hidden_states_recurrent_weights_derivatives += output_combinations_recurrent_weights_derivatives;
3034 }
3035
3036 back_propagation->input_recurrent_weights_derivatives += hidden_states_recurrent_weights_derivatives.contract(back_propagation->current_layer_deltas, A_B);
3037
3038 copy_index += neurons_number;
3039 }
3040}
3041
3042
3043void LongShortTermMemoryLayer::calculate_state_recurrent_weights_error_gradient(const Tensor<type, 2>& inputs,
3044 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
3045 LongShortTermMemoryLayerBackPropagation* back_propagation) const
3046{
3047 const Index samples_number = inputs.dimension(0);
3048 const Index neurons_number = get_neurons_number();
3049 const Index parameters_number = neurons_number*neurons_number;
3050
3051 Tensor<type, 1> forget_recurrent_weights_error_gradient(parameters_number);
3052 forget_recurrent_weights_error_gradient.setZero();
3053
3054 Index column_index = 0;
3055 Index activation_index = 0;
3056
3057 Index copy_index = 0;
3058
3059 back_propagation->state_recurrent_weights_derivatives.setZero();
3060
3061 for(Index sample = 0; sample < samples_number; sample++)
3062 {
3063 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample, 0);
3064
3065 memcpy(forward_propagation->current_forget_activations.data(),
3066 forward_propagation->forget_activations.data() + copy_index, static_cast<size_t>(neurons_number)*sizeof(type));
3067 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
3068 forward_propagation->forget_activations_derivatives.data() + copy_index,
3069 static_cast<size_t>(neurons_number)*sizeof(type));
3070
3071 memcpy(forward_propagation->current_input_activations.data(),
3072 forward_propagation->input_activations.data() + copy_index,
3073 static_cast<size_t>(neurons_number)*sizeof(type));
3074 memcpy(forward_propagation->current_input_activations_derivatives.data(),
3075 forward_propagation->input_activations_derivatives.data() + copy_index,
3076 static_cast<size_t>(neurons_number)*sizeof(type));
3077
3078 memcpy(forward_propagation->current_state_activations.data(),
3079 forward_propagation->state_activations.data() + copy_index,
3080 static_cast<size_t>(neurons_number)*sizeof(type));
3081 memcpy(forward_propagation->current_state_activations_derivatives.data(),
3082 forward_propagation->state_activations_derivatives.data() + copy_index,
3083 static_cast<size_t>(neurons_number)*sizeof(type));
3084
3085 memcpy(forward_propagation->current_output_activations.data(),
3086 forward_propagation->output_activations.data() + copy_index,
3087 static_cast<size_t>(neurons_number)*sizeof(type));
3088 memcpy(forward_propagation->current_output_activations_derivatives.data(),
3089 forward_propagation->output_activations_derivatives.data() + copy_index,
3090 static_cast<size_t>(neurons_number)*sizeof(type));
3091
3092 memcpy(forward_propagation->current_cell_state_activations.data(),
3093 forward_propagation->cell_states_activations.data() + copy_index,
3094 static_cast<size_t>(neurons_number)*sizeof(type));
3095
3096 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
3097 forward_propagation->hidden_states_activations_derivatives.data() + copy_index,
3098 static_cast<size_t>(neurons_number)*sizeof(type));
3099
3100 if(sample%timesteps == 0)
3101 {
3102 back_propagation->cell_state_recurrent_weights_derivatives.setZero();
3103 back_propagation->hidden_states_recurrent_weights_derivatives.setZero();
3104 }
3105 else
3106 {
3107 memcpy(forward_propagation->previous_hidden_state_activations.data(),
3108 forward_propagation->hidden_states_activations.data() + (copy_index-neurons_number),
3109 static_cast<size_t>(neurons_number)*sizeof(type));
3110
3111 memcpy(forward_propagation->previous_cell_state_activations.data(),
3112 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
3113 static_cast<size_t>(neurons_number)*sizeof(type));
3114
3115 back_propagation->forget_combinations_recurrent_weights_derivatives = back_propagation->hidden_states_recurrent_weights_derivatives.contract(forget_recurrent_weights, A_B);
3116 multiply_rows(back_propagation->forget_combinations_recurrent_weights_derivatives, forward_propagation->current_forget_activations_derivatives);
3117 back_propagation->input_combinations_recurrent_weights_derivatives = back_propagation->hidden_states_recurrent_weights_derivatives.contract(input_recurrent_weights, A_B);
3118 multiply_rows(back_propagation->input_combinations_recurrent_weights_derivatives, forward_propagation->current_input_activations_derivatives);
3119 back_propagation->state_combinations_recurrent_weights_derivatives = back_propagation->hidden_states_recurrent_weights_derivatives.contract(state_recurrent_weights, A_B);
3120 back_propagation->state_combinations_recurrent_weights_derivatives = back_propagation->hidden_states_recurrent_weights_derivatives.contract(output_recurrent_weights, A_B);
3121 multiply_rows(back_propagation->state_combinations_recurrent_weights_derivatives, forward_propagation->current_output_activations_derivatives);
3122
3123 column_index = 0;
3124 activation_index = 0;
3125
3126 for(Index i = 0; i < parameters_number; i++)
3127 {
3128 back_propagation->state_combinations_recurrent_weights_derivatives(i, column_index) += forward_propagation->previous_hidden_state_activations[activation_index];
3129
3130 activation_index++;
3131
3132 if(activation_index == neurons_number)
3133 {
3134 activation_index = 0;
3135 column_index++;
3136 }
3137 }
3138
3139 multiply_rows(back_propagation->cell_state_recurrent_weights_derivatives, forward_propagation->current_forget_activations);
3140 multiply_rows(back_propagation->input_combinations_recurrent_weights_derivatives, forward_propagation->current_state_activations);
3141 back_propagation->cell_state_recurrent_weights_derivatives += back_propagation->input_combinations_recurrent_weights_derivatives;
3142 multiply_rows(back_propagation->state_combinations_recurrent_weights_derivatives, forward_propagation->current_state_activations_derivatives*forward_propagation->current_input_activations);
3143 back_propagation->cell_state_recurrent_weights_derivatives += back_propagation->state_combinations_recurrent_weights_derivatives;
3144 multiply_rows(back_propagation->forget_combinations_recurrent_weights_derivatives, forward_propagation->previous_cell_state_activations);
3145 back_propagation->cell_state_recurrent_weights_derivatives += back_propagation->forget_combinations_recurrent_weights_derivatives;
3146
3147 back_propagation->hidden_states_recurrent_weights_derivatives = back_propagation->cell_state_recurrent_weights_derivatives;
3148 multiply_rows(back_propagation->hidden_states_recurrent_weights_derivatives, forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
3149 multiply_rows(back_propagation->state_combinations_recurrent_weights_derivatives, calculate_activations(forward_propagation->current_cell_state_activations));
3150 back_propagation->hidden_states_recurrent_weights_derivatives += back_propagation->state_combinations_recurrent_weights_derivatives;
3151 }
3152
3153 back_propagation->state_recurrent_weights_derivatives += back_propagation->hidden_states_recurrent_weights_derivatives.contract(back_propagation->current_layer_deltas, A_B);
3154
3155 copy_index += neurons_number;
3156 }
3157}
3158
3159
3160void LongShortTermMemoryLayer::calculate_output_recurrent_weights_error_gradient(const Tensor<type, 2>& inputs,
3161 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
3162 LongShortTermMemoryLayerBackPropagation* back_propagation) const
3163{
3164 const Index samples_number = inputs.dimension(0);
3165 const Index neurons_number = get_neurons_number();
3166 const Index parameters_number = neurons_number*neurons_number;
3167
3168 Index column_index = 0;
3169 Index activation_index = 0;
3170
3171 Index copy_index = 0;
3172
3173 back_propagation->output_recurrent_weights_derivatives.setZero();
3174
3175 for(Index sample = 0; sample < samples_number; sample++)
3176 {
3177 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample, 0);
3178
3179 memcpy(forward_propagation->current_forget_activations.data(),
3180 forward_propagation->forget_activations.data()+copy_index,
3181 static_cast<size_t>(neurons_number)*sizeof(type));
3182
3183 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
3184 forward_propagation->forget_activations_derivatives.data()+copy_index,
3185 static_cast<size_t>(neurons_number)*sizeof(type));
3186
3187 memcpy(forward_propagation->current_input_activations.data(),
3188 forward_propagation->input_activations.data()+copy_index,
3189 static_cast<size_t>(neurons_number)*sizeof(type));
3190
3191 memcpy(forward_propagation->current_input_activations_derivatives.data(),
3192 forward_propagation->input_activations_derivatives.data()+copy_index,
3193 static_cast<size_t>(neurons_number)*sizeof(type));
3194
3195 memcpy(forward_propagation->current_state_activations.data(),
3196 forward_propagation->state_activations.data()+copy_index,
3197 static_cast<size_t>(neurons_number)*sizeof(type));
3198
3199 memcpy(forward_propagation->current_state_activations_derivatives.data(),
3200 forward_propagation->state_activations_derivatives.data()+copy_index,
3201 static_cast<size_t>(neurons_number)*sizeof(type));
3202
3203 memcpy(forward_propagation->current_output_activations.data(),
3204 forward_propagation->output_activations.data()+copy_index,
3205 static_cast<size_t>(neurons_number)*sizeof(type));
3206
3207 memcpy(forward_propagation->current_output_activations_derivatives.data(),
3208 forward_propagation->output_activations_derivatives.data()+copy_index,
3209 static_cast<size_t>(neurons_number)*sizeof(type));
3210
3211 memcpy(forward_propagation->current_cell_state_activations.data(),
3212 forward_propagation->cell_states_activations.data()+copy_index,
3213 static_cast<size_t>(neurons_number)*sizeof(type));
3214
3215 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
3216 forward_propagation->hidden_states_activations_derivatives.data()+copy_index,
3217 static_cast<size_t>(neurons_number)*sizeof(type));
3218
3219 if(sample%timesteps == 0)
3220 {
3221 back_propagation->cell_state_recurrent_weights_derivatives.setZero();
3222 back_propagation->hidden_states_recurrent_weights_derivatives.setZero();
3223 }
3224 else
3225 {
3226 memcpy(forward_propagation->previous_hidden_state_activations.data(),
3227 forward_propagation->hidden_states_activations.data() + (copy_index-neurons_number),
3228 static_cast<size_t>(neurons_number)*sizeof(type));
3229
3230 memcpy(forward_propagation->previous_cell_state_activations.data(),
3231 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
3232 static_cast<size_t>(neurons_number)*sizeof(type));
3233
3234 back_propagation->forget_combinations_recurrent_weights_derivatives = back_propagation->hidden_states_recurrent_weights_derivatives.contract(forget_recurrent_weights, A_B);
3235 multiply_rows(back_propagation->forget_combinations_recurrent_weights_derivatives, forward_propagation->current_forget_activations_derivatives);
3236 back_propagation->input_combinations_recurrent_weights_derivatives = back_propagation->hidden_states_recurrent_weights_derivatives.contract(input_recurrent_weights, A_B);
3237 multiply_rows(back_propagation->input_combinations_recurrent_weights_derivatives, forward_propagation->current_input_activations_derivatives);
3238 back_propagation->state_combinations_recurrent_weights_derivatives = back_propagation->hidden_states_recurrent_weights_derivatives.contract(state_recurrent_weights, A_B);
3239 multiply_rows(back_propagation->state_combinations_recurrent_weights_derivatives, forward_propagation->current_state_activations_derivatives);
3240 back_propagation->output_combinations_recurrent_weights_derivatives = back_propagation->hidden_states_recurrent_weights_derivatives.contract(output_recurrent_weights, A_B);
3241
3242 column_index = 0;
3243 activation_index = 0;
3244
3245 for(Index i = 0; i < parameters_number; i++)
3246 {
3247 back_propagation->output_combinations_recurrent_weights_derivatives(i, column_index) += forward_propagation->previous_hidden_state_activations[activation_index];
3248
3249 activation_index++;
3250
3251 if(activation_index == neurons_number)
3252 {
3253 activation_index = 0;
3254 column_index++;
3255 }
3256 }
3257
3258 multiply_rows(back_propagation->cell_state_recurrent_weights_derivatives, forward_propagation->current_forget_activations);
3259 multiply_rows(back_propagation->input_combinations_recurrent_weights_derivatives, forward_propagation->current_state_activations);
3260 back_propagation->cell_state_recurrent_weights_derivatives += back_propagation->input_combinations_recurrent_weights_derivatives;
3261 multiply_rows(back_propagation->state_combinations_recurrent_weights_derivatives, forward_propagation->current_input_activations);
3262 back_propagation->cell_state_recurrent_weights_derivatives += back_propagation->state_combinations_recurrent_weights_derivatives;
3263 multiply_rows(back_propagation->forget_combinations_recurrent_weights_derivatives, forward_propagation->previous_cell_state_activations);
3264 back_propagation->cell_state_recurrent_weights_derivatives += back_propagation->forget_combinations_recurrent_weights_derivatives;
3265
3266 back_propagation->hidden_states_recurrent_weights_derivatives = back_propagation->cell_state_recurrent_weights_derivatives;
3267 multiply_rows(back_propagation->cell_state_recurrent_weights_derivatives, forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
3268 multiply_rows(back_propagation->output_combinations_recurrent_weights_derivatives, forward_propagation->current_output_activations_derivatives*calculate_activations(forward_propagation->current_cell_state_activations));
3269 back_propagation->hidden_states_recurrent_weights_derivatives += back_propagation->output_combinations_recurrent_weights_derivatives;
3270 }
3271
3272 back_propagation->output_recurrent_weights_derivatives += back_propagation->hidden_states_recurrent_weights_derivatives.contract(back_propagation->current_layer_deltas, A_B);
3273
3274 copy_index += neurons_number;
3275 }
3276}
3277
3278
3279void LongShortTermMemoryLayer::calculate_forget_biases_error_gradient(const Tensor<type, 2>& inputs,
3280 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
3281 LongShortTermMemoryLayerBackPropagation* back_propagation) const
3282{
3283 const Index samples_number = inputs.dimension(0);
3284 const Index neurons_number = get_neurons_number();
3285 const Index parameters_number = neurons_number;
3286
3287 back_propagation->input_combinations_biases_derivatives.setZero();
3288 back_propagation->forget_combinations_biases_derivatives.setZero();
3289 back_propagation->state_combinations_biases_derivatives.setZero();
3290 back_propagation->output_combinations_biases_derivatives.setZero();
3291
3292 back_propagation->hidden_states_biases_derivatives.setZero();
3293 back_propagation->cell_state_biases_derivatives.setZero();
3294
3295 Index copy_index = 0;
3296
3297 back_propagation->forget_biases_derivatives.setZero();
3298
3299 for(Index sample = 0; sample < samples_number; sample++)
3300 {
3301 const Tensor<type, 1> current_layer_deltas = back_propagation->delta.chip(sample, 0);
3302
3303 memcpy(forward_propagation->current_forget_activations.data(),
3304 forward_propagation->forget_activations.data()+copy_index,
3305 static_cast<size_t>(neurons_number)*sizeof(type));
3306
3307 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
3308 forward_propagation->forget_activations_derivatives.data()+copy_index,
3309 static_cast<size_t>(neurons_number)*sizeof(type));
3310
3311 memcpy(forward_propagation->current_input_activations.data(),
3312 forward_propagation->input_activations.data()+copy_index,
3313 static_cast<size_t>(neurons_number)*sizeof(type));
3314
3315 memcpy(forward_propagation->current_input_activations_derivatives.data(),
3316 forward_propagation->input_activations_derivatives.data()+copy_index,
3317 static_cast<size_t>(neurons_number)*sizeof(type));
3318
3319 memcpy(forward_propagation->current_state_activations.data(),
3320 forward_propagation->state_activations.data()+copy_index,
3321 static_cast<size_t>(neurons_number)*sizeof(type));
3322
3323 memcpy(forward_propagation->current_state_activations_derivatives.data(),
3324 forward_propagation->state_activations_derivatives.data()+copy_index,
3325 static_cast<size_t>(neurons_number)*sizeof(type));
3326
3327 memcpy(forward_propagation->current_output_activations.data(),
3328 forward_propagation->output_activations.data()+copy_index,
3329 static_cast<size_t>(neurons_number)*sizeof(type));
3330
3331 memcpy(forward_propagation->current_output_activations_derivatives.data(),
3332 forward_propagation->output_activations_derivatives.data()+copy_index,
3333 static_cast<size_t>(neurons_number)*sizeof(type));
3334
3335 memcpy(forward_propagation->current_cell_state_activations.data(),
3336 forward_propagation->cell_states_activations.data()+copy_index,
3337 static_cast<size_t>(neurons_number)*sizeof(type));
3338
3339 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
3340 forward_propagation->hidden_states_activations_derivatives.data()+copy_index,
3341 static_cast<size_t>(neurons_number)*sizeof(type));
3342
3343 if(sample%timesteps == 0)
3344 {
3345 back_propagation->forget_combinations_biases_derivatives.setZero();
3346 back_propagation->input_combinations_biases_derivatives.setZero();
3347 back_propagation->state_combinations_biases_derivatives.setZero();
3348 back_propagation->output_combinations_biases_derivatives.setZero();
3349
3350 forward_propagation->previous_cell_state_activations.setZero();
3351
3352 back_propagation->cell_state_biases_derivatives.setZero();
3353 }
3354 else
3355 {
3356 memcpy(forward_propagation->previous_cell_state_activations.data(),
3357 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
3358 static_cast<size_t>(neurons_number)*sizeof(type));
3359
3360 back_propagation->forget_combinations_biases_derivatives
3361 = back_propagation->hidden_states_biases_derivatives.contract(forget_recurrent_weights, A_B);
3362
3363 back_propagation->input_combinations_biases_derivatives
3364 = back_propagation->hidden_states_biases_derivatives.contract(input_recurrent_weights, A_B);
3365
3366 multiply_rows(back_propagation->input_combinations_biases_derivatives,
3367 forward_propagation->current_input_activations_derivatives);
3368
3369 back_propagation->state_combinations_biases_derivatives
3370 = back_propagation->hidden_states_biases_derivatives.contract(state_recurrent_weights, A_B);
3371
3372 multiply_rows(back_propagation->state_combinations_biases_derivatives,
3373 forward_propagation->current_state_activations_derivatives);
3374
3375 back_propagation->output_combinations_biases_derivatives
3376 = back_propagation->hidden_states_biases_derivatives.contract(output_recurrent_weights, A_B);
3377
3378 multiply_rows(back_propagation->output_combinations_biases_derivatives,
3379 forward_propagation->current_output_activations_derivatives);
3380 }
3381
3382 for(Index row = 0; row < parameters_number; row++) back_propagation->forget_combinations_biases_derivatives(row, row) += static_cast<type>(1.0);
3383
3384 multiply_rows(back_propagation->cell_state_biases_derivatives,
3385 forward_propagation->current_forget_activations);
3386
3387 multiply_rows(back_propagation->input_combinations_biases_derivatives,
3388 forward_propagation->current_state_activations);
3389
3390 back_propagation->cell_state_biases_derivatives += back_propagation->input_combinations_biases_derivatives;
3391
3392 multiply_rows(back_propagation->state_combinations_biases_derivatives,
3393 forward_propagation->current_input_activations);
3394
3395 back_propagation->cell_state_biases_derivatives += back_propagation->state_combinations_biases_derivatives;
3396
3397 multiply_rows(back_propagation->forget_combinations_biases_derivatives,
3398 forward_propagation->current_forget_activations_derivatives*forward_propagation->previous_cell_state_activations);
3399
3400 back_propagation->cell_state_biases_derivatives += back_propagation->forget_combinations_biases_derivatives;
3401
3402 back_propagation->hidden_states_biases_derivatives = back_propagation->cell_state_biases_derivatives;
3403
3404 multiply_rows(back_propagation->hidden_states_biases_derivatives,
3405 forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
3406
3407 multiply_rows(back_propagation->output_combinations_biases_derivatives,
3408 calculate_activations(forward_propagation->current_cell_state_activations));
3409
3410 back_propagation->hidden_states_biases_derivatives += back_propagation->output_combinations_biases_derivatives;
3411
3412 back_propagation->forget_biases_derivatives += back_propagation->hidden_states_biases_derivatives.contract(current_layer_deltas, A_B);
3413
3414 copy_index += neurons_number;
3415 }
3416}
3417
3418
3419void LongShortTermMemoryLayer::calculate_input_biases_error_gradient(const Tensor<type, 2>& inputs,
3420 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
3421 LongShortTermMemoryLayerBackPropagation* back_propagation) const
3422{
3423 const Index samples_number = inputs.dimension(0);
3424 const Index neurons_number = get_neurons_number();
3425 const Index parameters_number = neurons_number;
3426
3427 back_propagation->input_combinations_biases_derivatives.setZero();
3428 back_propagation->forget_combinations_biases_derivatives.setZero();
3429 back_propagation->state_combinations_biases_derivatives.setZero();
3430 back_propagation->output_combinations_biases_derivatives.setZero();
3431
3432 back_propagation->hidden_states_biases_derivatives.setZero();
3433 back_propagation->cell_state_biases_derivatives.setZero();
3434
3435 Tensor<type, 1> previous_cell_state_activations(neurons_number);
3436
3437 Index copy_index = 0;
3438
3439 back_propagation->input_biases_derivatives.setZero();
3440
3441 for(Index sample = 0; sample < samples_number; sample++)
3442 {
3443 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample, 0);
3444
3445 memcpy(forward_propagation->current_forget_activations.data(),
3446 forward_propagation->forget_activations.data()+copy_index,
3447 static_cast<size_t>(neurons_number)*sizeof(type));
3448
3449 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
3450 forward_propagation->forget_activations_derivatives.data()+copy_index,
3451 static_cast<size_t>(neurons_number)*sizeof(type));
3452
3453 memcpy(forward_propagation->current_input_activations.data(),
3454 forward_propagation->input_activations.data()+copy_index,
3455 static_cast<size_t>(neurons_number)*sizeof(type));
3456
3457 memcpy(forward_propagation->current_input_activations_derivatives.data(),
3458 forward_propagation->input_activations_derivatives.data()+copy_index,
3459 static_cast<size_t>(neurons_number)*sizeof(type));
3460
3461 memcpy(forward_propagation->current_state_activations.data(),
3462 forward_propagation->state_activations.data()+copy_index,
3463 static_cast<size_t>(neurons_number)*sizeof(type));
3464
3465 memcpy(forward_propagation->current_state_activations_derivatives.data(),
3466 forward_propagation->state_activations_derivatives.data()+copy_index,
3467 static_cast<size_t>(neurons_number)*sizeof(type));
3468
3469 memcpy(forward_propagation->current_output_activations.data(),
3470 forward_propagation->output_activations.data()+copy_index,
3471 static_cast<size_t>(neurons_number)*sizeof(type));
3472
3473 memcpy(forward_propagation->current_output_activations_derivatives.data(),
3474 forward_propagation->output_activations_derivatives.data()+copy_index,
3475 static_cast<size_t>(neurons_number)*sizeof(type));
3476
3477 memcpy(forward_propagation->current_cell_state_activations.data(),
3478 forward_propagation->cell_states_activations.data()+copy_index,
3479 static_cast<size_t>(neurons_number)*sizeof(type));
3480
3481 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
3482 forward_propagation->hidden_states_activations_derivatives.data()+copy_index,
3483 static_cast<size_t>(neurons_number)*sizeof(type));
3484
3485 if(sample%timesteps == 0)
3486 {
3487 back_propagation->forget_combinations_biases_derivatives.setZero();
3488 back_propagation->input_combinations_biases_derivatives.setZero();
3489 back_propagation->state_combinations_biases_derivatives.setZero();
3490 back_propagation->output_combinations_biases_derivatives.setZero();
3491
3492 previous_cell_state_activations.setZero();
3493 back_propagation->cell_state_biases_derivatives.setZero();
3494 }
3495 else
3496 {
3497 memcpy(previous_cell_state_activations.data(),
3498 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
3499 static_cast<size_t>(neurons_number)*sizeof(type));
3500
3501 back_propagation->forget_combinations_biases_derivatives
3502 = back_propagation->hidden_states_biases_derivatives.contract(forget_recurrent_weights, A_B);
3503
3504 multiply_rows(back_propagation->forget_combinations_biases_derivatives,
3505 forward_propagation->current_forget_activations_derivatives);
3506
3507 back_propagation->input_combinations_biases_derivatives
3508 = back_propagation->hidden_states_biases_derivatives.contract(input_recurrent_weights, A_B);
3509
3510 back_propagation->state_combinations_biases_derivatives
3511 = back_propagation->hidden_states_biases_derivatives.contract(state_recurrent_weights, A_B);
3512
3513 multiply_rows(back_propagation->state_combinations_biases_derivatives,
3514 forward_propagation->current_state_activations_derivatives);
3515
3516 back_propagation->output_combinations_biases_derivatives
3517 = back_propagation->hidden_states_biases_derivatives.contract(output_recurrent_weights, A_B);
3518
3519 multiply_rows(back_propagation->output_combinations_biases_derivatives,
3520 forward_propagation->current_output_activations_derivatives);
3521 }
3522
3523 for(Index row = 0; row < parameters_number; row++)
3524 back_propagation->input_combinations_biases_derivatives(row, row) += static_cast<type>(1.0);
3525
3526 multiply_rows(back_propagation->cell_state_biases_derivatives,
3527 forward_propagation->current_forget_activations);
3528
3529 multiply_rows(back_propagation->forget_combinations_biases_derivatives, previous_cell_state_activations);
3530
3531 back_propagation->cell_state_biases_derivatives += back_propagation->forget_combinations_biases_derivatives;
3532
3533 multiply_rows(back_propagation->state_combinations_biases_derivatives,
3534 forward_propagation->current_input_activations);
3535
3536 back_propagation->cell_state_biases_derivatives += back_propagation->state_combinations_biases_derivatives;
3537
3538 multiply_rows(back_propagation->input_combinations_biases_derivatives,
3539 forward_propagation->current_input_activations_derivatives*forward_propagation->current_state_activations);
3540
3541 back_propagation->cell_state_biases_derivatives += back_propagation->input_combinations_biases_derivatives;
3542
3543 back_propagation->hidden_states_biases_derivatives = back_propagation->cell_state_biases_derivatives;
3544
3545 multiply_rows(back_propagation->hidden_states_biases_derivatives,
3546 forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
3547
3548 multiply_rows(back_propagation->output_combinations_biases_derivatives,
3549 calculate_activations(forward_propagation->current_cell_state_activations));
3550
3551 back_propagation->hidden_states_biases_derivatives += back_propagation->output_combinations_biases_derivatives;
3552
3553 back_propagation->input_biases_derivatives
3554 += back_propagation->hidden_states_biases_derivatives.contract(back_propagation->current_layer_deltas, A_B);
3555
3556 copy_index += neurons_number;
3557 }
3558}
3559
3560
3561void LongShortTermMemoryLayer::calculate_state_biases_error_gradient(const Tensor<type, 2>& inputs,
3562 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
3563 LongShortTermMemoryLayerBackPropagation* back_propagation) const
3564{
3565 const Index samples_number = inputs.dimension(0);
3566 const Index neurons_number = get_neurons_number();
3567 const Index parameters_number = neurons_number;
3568
3569 back_propagation->input_combinations_biases_derivatives.setZero();
3570 back_propagation->forget_combinations_biases_derivatives.setZero();
3571 back_propagation->state_combinations_biases_derivatives.setZero();
3572 back_propagation->output_combinations_biases_derivatives.setZero();
3573
3574 back_propagation->hidden_states_biases_derivatives.setZero();
3575 back_propagation->cell_state_biases_derivatives.setZero();
3576
3577 Index copy_index = 0;
3578
3579 back_propagation->state_biases_derivatives.setZero();
3580
3581 for(Index sample = 0; sample < samples_number; sample++)
3582 {
3583 const Tensor<type, 1> current_layer_deltas = back_propagation->delta.chip(sample, 0);
3584
3585 memcpy(forward_propagation->current_forget_activations.data(),
3586 forward_propagation->forget_activations.data()+copy_index,
3587 static_cast<size_t>(neurons_number)*sizeof(type));
3588
3589 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
3590 forward_propagation->forget_activations_derivatives.data()+copy_index,
3591 static_cast<size_t>(neurons_number)*sizeof(type));
3592
3593 memcpy(forward_propagation->current_input_activations.data(),
3594 forward_propagation->input_activations.data()+copy_index,
3595 static_cast<size_t>(neurons_number)*sizeof(type));
3596
3597 memcpy(forward_propagation->current_input_activations_derivatives.data(),
3598 forward_propagation->input_activations_derivatives.data()+copy_index,
3599 static_cast<size_t>(neurons_number)*sizeof(type));
3600
3601 memcpy(forward_propagation->current_state_activations.data(),
3602 forward_propagation->state_activations.data()+copy_index,
3603 static_cast<size_t>(neurons_number)*sizeof(type));
3604
3605 memcpy(forward_propagation->current_state_activations_derivatives.data(),
3606 forward_propagation->state_activations_derivatives.data()+copy_index,
3607 static_cast<size_t>(neurons_number)*sizeof(type));
3608
3609 memcpy(forward_propagation->current_output_activations.data(),
3610 forward_propagation->output_activations.data()+copy_index,
3611 static_cast<size_t>(neurons_number)*sizeof(type));
3612
3613 memcpy(forward_propagation->current_output_activations_derivatives.data(),
3614 forward_propagation->output_activations_derivatives.data()+copy_index,
3615 static_cast<size_t>(neurons_number)*sizeof(type));
3616
3617 memcpy(forward_propagation->current_cell_state_activations.data(),
3618 forward_propagation->cell_states_activations.data()+copy_index,
3619 static_cast<size_t>(neurons_number)*sizeof(type));
3620
3621 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
3622 forward_propagation->hidden_states_activations_derivatives.data()+copy_index,
3623 static_cast<size_t>(neurons_number)*sizeof(type));
3624
3625 if(sample%timesteps == 0)
3626 {
3627 back_propagation->forget_combinations_biases_derivatives.setZero();
3628 back_propagation->input_combinations_biases_derivatives.setZero();
3629 back_propagation->state_combinations_biases_derivatives.setZero();
3630 back_propagation->output_combinations_biases_derivatives.setZero();
3631
3632 forward_propagation->previous_cell_state_activations.setZero();
3633 back_propagation->cell_state_biases_derivatives.setZero();
3634 }
3635 else
3636 {
3637 memcpy(forward_propagation->previous_cell_state_activations.data(),
3638 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
3639 static_cast<size_t>(neurons_number)*sizeof(type));
3640
3641 back_propagation->forget_combinations_biases_derivatives = back_propagation->hidden_states_biases_derivatives.contract(forget_recurrent_weights, A_B);
3642
3643 multiply_rows(back_propagation->forget_combinations_biases_derivatives,
3644 forward_propagation->current_forget_activations_derivatives);
3645
3646 back_propagation->input_combinations_biases_derivatives = back_propagation->hidden_states_biases_derivatives.contract(input_recurrent_weights, A_B);
3647
3648 multiply_rows(back_propagation->input_combinations_biases_derivatives,
3649 forward_propagation->current_input_activations_derivatives);
3650
3651 back_propagation->state_combinations_biases_derivatives = back_propagation->hidden_states_biases_derivatives.contract(state_recurrent_weights, A_B);
3652
3653 back_propagation->output_combinations_biases_derivatives = back_propagation->hidden_states_biases_derivatives.contract(output_recurrent_weights, A_B);
3654
3655 multiply_rows(back_propagation->output_combinations_biases_derivatives,
3656 forward_propagation->current_output_activations_derivatives);
3657 }
3658
3659 for(Index row = 0; row < parameters_number; row++) back_propagation->state_combinations_biases_derivatives(row, row) += static_cast<type>(1.0);
3660
3661 multiply_rows(back_propagation->cell_state_biases_derivatives,
3662 forward_propagation->current_forget_activations);
3663
3664 multiply_rows(back_propagation->forget_combinations_biases_derivatives, forward_propagation->previous_cell_state_activations);
3665
3666 back_propagation->cell_state_biases_derivatives += back_propagation->forget_combinations_biases_derivatives;
3667
3668 multiply_rows(back_propagation->input_combinations_biases_derivatives,
3669 forward_propagation->current_state_activations);
3670
3671 back_propagation->cell_state_biases_derivatives += back_propagation->input_combinations_biases_derivatives;
3672
3673 multiply_rows(back_propagation->state_combinations_biases_derivatives,
3674 forward_propagation->current_state_activations_derivatives*forward_propagation->current_input_activations);
3675
3676 back_propagation->cell_state_biases_derivatives += back_propagation->state_combinations_biases_derivatives;
3677
3678 back_propagation->hidden_states_biases_derivatives = back_propagation->cell_state_biases_derivatives;
3679
3680 multiply_rows(back_propagation->hidden_states_biases_derivatives,
3681 forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
3682
3683 multiply_rows(back_propagation->output_combinations_biases_derivatives,
3684 calculate_activations(forward_propagation->current_cell_state_activations));
3685
3686 back_propagation->hidden_states_biases_derivatives += back_propagation->output_combinations_biases_derivatives;
3687
3688 back_propagation->state_biases_derivatives += back_propagation->hidden_states_biases_derivatives.contract(current_layer_deltas, A_B);
3689
3690 copy_index += neurons_number;
3691 }
3692}
3693
3694
3695void LongShortTermMemoryLayer::calculate_output_biases_error_gradient(const Tensor<type, 2>& inputs,
3696 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
3697 LongShortTermMemoryLayerBackPropagation* back_propagation) const
3698{
3699 const Index samples_number = inputs.dimension(0);
3700 const Index neurons_number = get_neurons_number();
3701 const Index parameters_number = neurons_number;
3702
3703 back_propagation->input_combinations_biases_derivatives.setZero();
3704 back_propagation->forget_combinations_biases_derivatives.setZero();
3705 back_propagation->state_combinations_biases_derivatives.setZero();
3706 back_propagation->output_combinations_biases_derivatives.setZero();
3707 back_propagation->hidden_states_biases_derivatives.setZero();
3708 back_propagation->cell_state_biases_derivatives.setZero();
3709
3710 Index copy_index = 0;
3711
3712 back_propagation->output_biases_derivatives.setZero();
3713
3714 for(Index sample = 0; sample < samples_number; sample++)
3715 {
3716 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample, 0);
3717
3718 memcpy(forward_propagation->current_forget_activations.data(),
3719 forward_propagation->forget_activations.data()+copy_index,
3720 static_cast<size_t>(neurons_number)*sizeof(type));
3721
3722 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
3723 forward_propagation->forget_activations_derivatives.data()+copy_index,
3724 static_cast<size_t>(neurons_number)*sizeof(type));
3725
3726 memcpy(forward_propagation->current_input_activations.data(),
3727 forward_propagation->input_activations.data()+copy_index,
3728 static_cast<size_t>(neurons_number)*sizeof(type));
3729
3730 memcpy(forward_propagation->current_input_activations_derivatives.data(),
3731 forward_propagation->input_activations_derivatives.data()+copy_index,
3732 static_cast<size_t>(neurons_number)*sizeof(type));
3733
3734 memcpy(forward_propagation->current_state_activations.data(),
3735 forward_propagation->state_activations.data()+copy_index,
3736 static_cast<size_t>(neurons_number)*sizeof(type));
3737
3738 memcpy(forward_propagation->current_state_activations_derivatives.data(),
3739 forward_propagation->state_activations_derivatives.data()+copy_index,
3740 static_cast<size_t>(neurons_number)*sizeof(type));
3741
3742 memcpy(forward_propagation->current_output_activations.data(),
3743 forward_propagation->output_activations.data()+copy_index,
3744 static_cast<size_t>(neurons_number)*sizeof(type));
3745
3746 memcpy(forward_propagation->current_output_activations_derivatives.data(),
3747 forward_propagation->output_activations_derivatives.data()+copy_index,
3748 static_cast<size_t>(neurons_number)*sizeof(type));
3749
3750 memcpy(forward_propagation->current_cell_state_activations.data(),
3751 forward_propagation->cell_states_activations.data()+copy_index,
3752 static_cast<size_t>(neurons_number)*sizeof(type));
3753
3754 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
3755 forward_propagation->hidden_states_activations_derivatives.data()+copy_index,
3756 static_cast<size_t>(neurons_number)*sizeof(type));
3757
3758 if(sample%timesteps == 0)
3759 {
3760 back_propagation->forget_combinations_biases_derivatives.setZero();
3761 back_propagation->input_combinations_biases_derivatives.setZero();
3762 back_propagation->state_combinations_biases_derivatives.setZero();
3763 back_propagation->output_combinations_biases_derivatives.setZero();
3764
3765 forward_propagation->previous_cell_state_activations.setZero();
3766 back_propagation->cell_state_biases_derivatives.setZero();
3767 }
3768 else
3769 {
3770 memcpy(forward_propagation->previous_cell_state_activations.data(),
3771 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
3772 static_cast<size_t>(neurons_number)*sizeof(type));
3773
3774 back_propagation->forget_combinations_biases_derivatives = back_propagation->hidden_states_biases_derivatives.contract(forget_recurrent_weights, A_B);
3775
3776 multiply_rows(back_propagation->forget_combinations_biases_derivatives,
3777 forward_propagation->current_forget_activations_derivatives);
3778
3779 back_propagation->input_combinations_biases_derivatives = back_propagation->hidden_states_biases_derivatives.contract(input_recurrent_weights, A_B);
3780
3781 multiply_rows(back_propagation->input_combinations_biases_derivatives,
3782 forward_propagation->current_input_activations_derivatives);
3783
3784 back_propagation->state_combinations_biases_derivatives = back_propagation->hidden_states_biases_derivatives.contract(state_recurrent_weights, A_B);
3785
3786 multiply_rows(back_propagation->state_combinations_biases_derivatives,
3787 forward_propagation->current_state_activations_derivatives);
3788
3789 back_propagation->output_combinations_biases_derivatives = back_propagation->hidden_states_biases_derivatives.contract(output_recurrent_weights, A_B);
3790 }
3791
3792 for(Index row = 0; row < parameters_number; row++) back_propagation->output_combinations_biases_derivatives(row, row) += static_cast<type>(1.0);
3793
3794 multiply_rows(back_propagation->cell_state_biases_derivatives,
3795 forward_propagation->current_forget_activations);
3796
3797 multiply_rows(back_propagation->forget_combinations_biases_derivatives, forward_propagation->previous_cell_state_activations);
3798
3799 back_propagation->cell_state_biases_derivatives += back_propagation->forget_combinations_biases_derivatives;
3800
3801 multiply_rows(back_propagation->state_combinations_biases_derivatives,
3802 forward_propagation->current_input_activations);
3803
3804 back_propagation->cell_state_biases_derivatives += back_propagation->state_combinations_biases_derivatives;
3805
3806 multiply_rows(back_propagation->input_combinations_biases_derivatives,
3807 forward_propagation->current_state_activations);
3808
3809 back_propagation->cell_state_biases_derivatives += back_propagation->input_combinations_biases_derivatives;
3810
3811 back_propagation->hidden_states_biases_derivatives = back_propagation->cell_state_biases_derivatives;
3812
3813 multiply_rows(back_propagation->hidden_states_biases_derivatives,
3814 forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
3815
3816 multiply_rows(back_propagation->output_combinations_biases_derivatives,
3817 forward_propagation->current_output_activations_derivatives*calculate_activations(forward_propagation->current_cell_state_activations));
3818
3819 back_propagation->hidden_states_biases_derivatives += back_propagation->output_combinations_biases_derivatives;
3820
3821 back_propagation->output_biases_derivatives += back_propagation->hidden_states_biases_derivatives.contract(back_propagation->current_layer_deltas, A_B);
3822
3823 copy_index += neurons_number;
3824 }
3825}
3826
3827
3832
3833string LongShortTermMemoryLayer::write_expression(const Tensor<string, 1>& inputs_names, const Tensor<string, 1>& outputs_names) const
3834{
3835 const Index neurons_number = get_neurons_number();
3836
3837 const Index inputs_number = get_inputs_number();
3838
3839#ifdef OPENNN_DEBUG
3840
3841 const Index inputs_name_size = inputs_names.size();
3842
3843 if(inputs_name_size != inputs_number)
3844 {
3845 ostringstream buffer;
3846
3847 buffer << "OpenNN Exception: LongShortTermMemoryLayer class.\n"
3848 << "string write_expression(const Tensor<string, 1>&, const Tensor<string, 1>&) const method.\n"
3849 << "Size of inputs name must be equal to number of layer inputs.\n";
3850
3851 throw logic_error(buffer.str());
3852 }
3853
3854 const Index outputs_name_size = outputs_names.size();
3855
3856 if(outputs_name_size != neurons_number)
3857 {
3858 ostringstream buffer;
3859
3860 buffer << "OpenNN Exception: LongShortTermMemoryLayer class.\n"
3861 << "string write_expression(const Tensor<string, 1>&, const Tensor<string, 1>&) const method.\n"
3862 << "Size of outputs name must be equal to number of neurons.\n";
3863
3864 throw logic_error(buffer.str());
3865 }
3866
3867#endif
3868
3869 ostringstream buffer;
3870
3871 // Forget gate
3872
3873 for(Index i = 0; i < neurons_number; i++)
3874 {
3875 buffer << "forget_gate_" << to_string(i) << " = " << write_recurrent_activation_function_expression() << " (" << forget_biases[i] << " + ";
3876
3877 for(Index j = 0; j < inputs_number; j++)
3878 {
3879 buffer << inputs_names[j] << " * (" << forget_weights(j,i) << ") + ";
3880 }
3881
3882 for(Index k = 0; k < neurons_number-1; k++)
3883 {
3884 buffer << "hidden_state_" << to_string(k) << "(t-1) * (" << forget_recurrent_weights(k,i) << ") + ";
3885 }
3886
3887 buffer << "hidden_state_" << to_string(neurons_number-1) << "(t-1) * (" << forget_recurrent_weights(neurons_number-1,i) << ") );\n";
3888 }
3889
3890 // Input gate
3891
3892 for(Index i = 0; i < neurons_number; i++)
3893 {
3894 buffer << "input_gate_" << to_string(i) << " = " << write_recurrent_activation_function_expression() << " (" << input_biases[i] << " + ";
3895
3896 for(Index j = 0; j < inputs_number; j++)
3897 {
3898 buffer << inputs_names[j] << " * (" << input_weights(j,i) << ") + ";
3899 }
3900
3901 for(Index k = 0; k < neurons_number-1; k++)
3902 {
3903 buffer << "hidden_state_" << to_string(k) << "(t-1) * (" << input_recurrent_weights(k,i) << ") + ";
3904 }
3905
3906 buffer << "hidden_state_" << to_string(neurons_number-1) << "(t-1) * (" << input_recurrent_weights(neurons_number-1,i) << ") );\n";
3907 }
3908
3909 // State gate
3910
3911 for(Index i = 0; i < neurons_number; i++)
3912 {
3913 buffer << "state_gate_" << to_string(i) << " = " << write_activation_function_expression() << " (" << state_biases[i] << " + ";
3914
3915 for(Index j = 0; j < inputs_number; j++)
3916 {
3917 buffer << inputs_names[j] << " * (" << state_weights(j,i) << ") + ";
3918 }
3919
3920 for(Index k = 0; k < neurons_number-1; k++)
3921 {
3922 buffer << "hidden_state_" << to_string(k) << "(t-1) * (" << state_recurrent_weights(k,i) << ") + ";
3923 }
3924
3925 buffer << "hidden_state_" << to_string(neurons_number-1) << "(t-1) * (" << state_recurrent_weights(neurons_number-1,i) << ") );\n";
3926 }
3927
3928 // Output gate
3929
3930 for(Index i = 0; i < neurons_number; i++)
3931 {
3932 buffer << "output_gate_" << to_string(i) << " = " << write_recurrent_activation_function_expression() << " (" << output_biases[i] << " + ";
3933
3934 for(Index j = 0; j < inputs_number; j++)
3935 {
3936 buffer << inputs_names[j] << " * (" << output_weights(j,i) << ") + ";
3937 }
3938
3939 for(Index k = 0; k < neurons_number-1; k++)
3940 {
3941 buffer << "hidden_state_" << to_string(k) << "(t-1) * (" << output_recurrent_weights(k,i) << ") + ";
3942 }
3943
3944 buffer << "hidden_state_" << to_string(neurons_number-1) << "(t-1) * (" << output_recurrent_weights(neurons_number-1,i) << ") );\n";
3945 }
3946
3947 // Cell state
3948
3949 for(Index i = 0; i < neurons_number; i++)
3950 {
3951 buffer << "cell_state_" << to_string(i) << "(t) = forget_gate_" << to_string(i) << " * cell_state_" << to_string(i) << "(t-1)+input_gate_" << to_string(i) << " * state_gate_" << to_string(i) << ";\n";
3952 }
3953
3954 // Hidden state
3955
3956 for(Index i = 0; i < neurons_number; i++)
3957 {
3958 buffer << "hidden_state_" << to_string(i) << "(t) = output_gate_" << to_string(i) << " * " << write_activation_function_expression() << "(cell_state_" << to_string(i) << ");\n";
3959 }
3960
3961 // Output
3962
3963 for(Index i = 0; i < neurons_number; i++)
3964 {
3965 buffer << outputs_names[i] << " = " << "hidden_state_" << to_string(i) << "(t);\n";
3966 }
3967
3968 return buffer.str();
3969}
3970
3971string LongShortTermMemoryLayer::write_expression_c() const
3972{
3973 ostringstream buffer;
3974
3975 buffer << "vector<float> " << layer_name << "(const vector<float>& inputs)\n{" << endl;
3976
3977 buffer << write_combinations_c();
3978
3979 buffer << "\n\treturn long_short_term_memory_output;\n}" << endl;
3980
3981 return buffer.str();
3982}
3983
3984
3986{
3987 ostringstream buffer;
3988
3989 const Index neurons_number = get_neurons_number();
3990 const Index inputs_number = get_inputs_number();
3991
3992 // Forget gate
3993
3994 buffer << "\tvector<float> forget_gate_combinations(" << neurons_number << ");\n" << endl;
3995
3996 for(Index i = 0; i < neurons_number; i++)
3997 {
3998 buffer << "\tforget_gate_combinations[" << i << "] = " << forget_biases(i) << " + ";
3999
4000 for(Index j = 0; j < inputs_number; j++)
4001 {
4002 buffer << " inputs[" << j << "] * (" << forget_weights(j,i) << ") + ";
4003 }
4004
4005 for(Index k = 0; k < neurons_number-1; k++)
4006 {
4007 buffer << "hidden_states[" << k << "]" << " * (" << forget_recurrent_weights(k,i) << ") + ";
4008 }
4009
4010 buffer << "hidden_states[" << neurons_number-1 << "]" << " * (" << forget_recurrent_weights(neurons_number-1,i) << "); \n";
4011 }
4012
4013 buffer << endl;
4014
4015
4016 buffer << "\tvector<float> forget_gate_activations(" << neurons_number << ");\n" << endl;
4017
4018 for(Index i = 0; i < neurons_number; i++)
4019 {
4020 buffer << "\tforget_gate_activations[" << i << "] = ";
4021
4022 switch(recurrent_activation_function)
4023 {
4024 case ActivationFunction::HyperbolicTangent:
4025 buffer << "tanh(forget_gate_combinations[" << i << "]);\n";
4026 break;
4027
4028 case ActivationFunction::RectifiedLinear:
4029 buffer << "forget_gate_combinations[" << i << "] < 0.0 ? 0.0 : forget_gate_combinations[" << i << "];\n";
4030 break;
4031
4032 case ActivationFunction::Logistic:
4033 buffer << "1.0/(1.0 + exp(-forget_gate_combinations[" << i << "]));\n";
4034 break;
4035
4036 case ActivationFunction::Threshold:
4037 buffer << "forget_gate_combinations[" << i << "] >= 0.0 ? 1.0 : 0.0;\n";
4038 break;
4039
4040 case ActivationFunction::SymmetricThreshold:
4041 buffer << "forget_gate_combinations[" << i << "] >= 0.0 ? 1.0 : -1.0;\n";
4042 break;
4043
4044 case ActivationFunction::Linear:
4045 buffer << "forget_gate_combinations[" << i << "];\n";
4046 break;
4047
4048 case ActivationFunction::ScaledExponentialLinear:
4049 buffer << "forget_gate_combinations[" << i << "] < 0.0 ? 1.0507*1.67326*(exp(forget_gate_combinations[" << i << "]) - 1.0) : 1.0507*forget_gate_combinations[" << i << "];\n";
4050 break;
4051
4052 case ActivationFunction::SoftPlus:
4053 buffer << "log(1.0 + exp(forget_gate_combinations[" << i << "]));\n";
4054 break;
4055
4056 case ActivationFunction::SoftSign:
4057 buffer << "forget_gate_combinations[" << i << "] < 0.0 ? forget_gate_combinations[" << i << "]/(1.0 - forget_gate_combinations[" << i << "] ) : forget_gate_combinations[" << i << "]/(1.0 + forget_gate_combinations[" << i << "] );\n";
4058 break;
4059
4060 case ActivationFunction::ExponentialLinear:
4061 buffer << "forget_gate_combinations[" << i << "] < 0.0 ? 1.0*(exp(forget_gate_combinations[" << i << "]) - 1.0) : forget_gate_combinations[" << i << "];\n";
4062 break;
4063
4064 case ActivationFunction::HardSigmoid:
4066 break;
4067 }
4068 }
4069
4070 buffer << endl;
4071
4072
4073 // Input gate
4074
4075 buffer << "\tvector<float> input_gate_combinations(" << neurons_number << ");\n" << endl;
4076
4077 for(Index i = 0; i < neurons_number; i++)
4078 {
4079 buffer << "\tinput_gate_combinations[" << i << "] = " << input_biases(i) << " + ";
4080
4081 for(Index j = 0; j < inputs_number; j++)
4082 {
4083 buffer << "inputs[" << j << "] * (" << input_weights(j,i) << ") + ";
4084 }
4085
4086 for(Index k = 0; k < neurons_number-1; k++)
4087 {
4088 buffer << "hidden_states[" << k << "]" << " * (" << input_recurrent_weights(k,i) << ") + ";
4089 }
4090
4091 buffer << "hidden_states[" << neurons_number-1 << "]" << " * (" << input_recurrent_weights(neurons_number-1,i) << "); \n";
4092 }
4093
4094 buffer << endl;
4095
4096
4097 buffer << "\tvector<float> input_gate_activations(" << neurons_number << ");\n" << endl;
4098
4099 for(Index i = 0; i < neurons_number; i++)
4100 {
4101 buffer << "\tinput_gate_activations[" << i << "] = ";
4102
4103 switch(recurrent_activation_function)
4104 {
4105 case ActivationFunction::HyperbolicTangent:
4106 buffer << "tanh(input_gate_combinations[" << i << "]);\n";
4107 break;
4108
4109 case ActivationFunction::RectifiedLinear:
4110 buffer << "input_gate_combinations[" << i << "] < 0.0 ? 0.0 : input_gate_combinations[" << i << "];\n";
4111 break;
4112
4113 case ActivationFunction::Logistic:
4114 buffer << "1.0/(1.0 + exp(-input_gate_combinations[" << i << "]));\n";
4115 break;
4116
4117 case ActivationFunction::Threshold:
4118 buffer << "input_gate_combinations[" << i << "] >= 0.0 ? 1.0 : 0.0;\n";
4119 break;
4120
4121 case ActivationFunction::SymmetricThreshold:
4122 buffer << "input_gate_combinations[" << i << "] >= 0.0 ? 1.0 : -1.0;\n";
4123 break;
4124
4125 case ActivationFunction::Linear:
4126 buffer << "input_gate_combinations[" << i << "];\n";
4127 break;
4128
4129 case ActivationFunction::ScaledExponentialLinear:
4130 buffer << "input_gate_combinations[" << i << "] < 0.0 ? 1.0507*1.67326*(exp(input_gate_combinations[" << i << "]) - 1.0) : 1.0507*input_gate_combinations[" << i << "];\n";
4131 break;
4132
4133 case ActivationFunction::SoftPlus:
4134 buffer << "log(1.0 + exp(input_gate_combinations[" << i << "]));\n";
4135 break;
4136
4137 case ActivationFunction::SoftSign:
4138 buffer << "input_gate_combinations[" << i << "] < 0.0 ? input_gate_combinations[" << i << "]/(1.0 - input_gate_combinations[" << i << "] ) : input_gate_combinations[" << i << "]/(1.0 + input_gate_combinations[" << i << "] );\n";
4139 break;
4140
4141 case ActivationFunction::ExponentialLinear:
4142 buffer << "input_gate_combinations[" << i << "] < 0.0 ? 1.0*(exp(input_gate_combinations[" << i << "]) - 1.0) : input_gate_combinations[" << i << "];\n";
4143 break;
4144
4145 case ActivationFunction::HardSigmoid:
4147 break;
4148 }
4149 }
4150
4151 buffer << endl;
4152
4153
4154 // State gate
4155
4156 buffer << "\tvector<float> state_gate_combinations(" << neurons_number << ");\n" << endl;
4157
4158 for(Index i = 0; i < neurons_number; i++)
4159 {
4160 buffer << "\tstate_gate_combinations[" << i << "] = " << state_biases(i) << " + ";
4161
4162 for(Index j = 0; j < inputs_number; j++)
4163 {
4164 buffer << "inputs[" << j << "] * (" << state_weights(j,i) << ") + ";
4165 }
4166
4167 for(Index k = 0; k < neurons_number-1; k++)
4168 {
4169 buffer << "hidden_states[" << k << "]" << " * (" << state_recurrent_weights(k,i) << ") + ";
4170 }
4171
4172 buffer << "hidden_states[" << neurons_number-1 << "]" << " * (" << state_recurrent_weights(neurons_number-1,i) << "); \n";
4173 }
4174
4175 buffer << endl;
4176
4177
4178 buffer << "\tvector<float> state_gate_activations(" << neurons_number << ");\n" << endl;
4179
4180 for(Index i = 0; i < neurons_number; i++)
4181 {
4182 buffer << "\tstate_gate_activations[" << i << "] = ";
4183
4184 switch(activation_function)
4185 {
4186 case ActivationFunction::HyperbolicTangent:
4187 buffer << "tanh(state_gate_combinations[" << i << "]);\n";
4188 break;
4189
4190 case ActivationFunction::RectifiedLinear:
4191 buffer << "state_gate_combinations[" << i << "] < 0.0 ? 0.0 : state_gate_combinations[" << i << "];\n";
4192 break;
4193
4194 case ActivationFunction::Logistic:
4195 buffer << "1.0/(1.0 + exp(-state_gate_combinations[" << i << "]));\n";
4196 break;
4197
4198 case ActivationFunction::Threshold:
4199 buffer << "state_gate_combinations[" << i << "] >= 0.0 ? 1.0 : 0.0;\n";
4200 break;
4201
4202 case ActivationFunction::SymmetricThreshold:
4203 buffer << "state_gate_combinations[" << i << "] >= 0.0 ? 1.0 : -1.0;\n";
4204 break;
4205
4206 case ActivationFunction::Linear:
4207 buffer << "state_gate_combinations[" << i << "];\n";
4208 break;
4209
4210 case ActivationFunction::ScaledExponentialLinear:
4211 buffer << "state_gate_combinations[" << i << "] < 0.0 ? 1.0507*1.67326*(exp(state_gate_combinations[" << i << "]) - 1.0) : 1.0507*state_gate_combinations[" << i << "];\n";
4212 break;
4213
4214 case ActivationFunction::SoftPlus:
4215 buffer << "log(1.0 + exp(state_gate_combinations[" << i << "]));\n";
4216 break;
4217
4218 case ActivationFunction::SoftSign:
4219 buffer << "state_gate_combinations[" << i << "] < 0.0 ? state_gate_combinations[" << i << "]/(1.0 - state_gate_combinations[" << i << "] ) : state_gate_combinations[" << i << "]/(1.0 + state_gate_combinations[" << i << "] );\n";
4220 break;
4221
4222 case ActivationFunction::ExponentialLinear:
4223 buffer << "state_gate_combinations[" << i << "] < 0.0 ? 1.0*(exp(state_gate_combinations[" << i << "]) - 1.0) : state_gate_combinations[" << i << "];\n";
4224 break;
4225
4226 case ActivationFunction::HardSigmoid:
4228 break;
4229 }
4230 }
4231
4232 buffer << endl;
4233
4234
4235 // Output gate
4236
4237 buffer << "\tvector<float> output_gate_combinations(" << neurons_number << ");\n" << endl;
4238
4239 for(Index i = 0; i < neurons_number; i++)
4240 {
4241 buffer << "\toutput_gate_combinations[" << i << "] = " << output_biases(i) << " + ";
4242
4243 for(Index j = 0; j < inputs_number; j++)
4244 {
4245 buffer << "inputs[" << j << "] * (" << output_weights(j,i) << ") + ";
4246 }
4247
4248 for(Index k = 0; k < neurons_number-1; k++)
4249 {
4250 buffer << "hidden_states[" << k << "]" << " * (" << output_recurrent_weights(k,i) << ") + ";
4251 }
4252
4253 buffer << "hidden_states[" << neurons_number-1 << "]" << " * (" << output_recurrent_weights(neurons_number-1,i) << "); \n";
4254 }
4255
4256 buffer << endl;
4257
4258
4259 buffer << "\tvector<float> output_gate_activations(" << neurons_number << ");\n" << endl;
4260
4261 for(Index i = 0; i < neurons_number; i++)
4262 {
4263 buffer << "\toutput_gate_activations[" << i << "] = ";
4264
4265 switch(recurrent_activation_function)
4266 {
4267 case ActivationFunction::HyperbolicTangent:
4268 buffer << "tanh(output_gate_combinations[" << i << "]);\n";
4269 break;
4270
4271 case ActivationFunction::RectifiedLinear:
4272 buffer << "output_gate_combinations[" << i << "] < 0.0 ? 0.0 : output_gate_combinations[" << i << "];\n";
4273 break;
4274
4275 case ActivationFunction::Logistic:
4276 buffer << "1.0/(1.0 + exp(-output_gate_combinations[" << i << "]));\n";
4277 break;
4278
4279 case ActivationFunction::Threshold:
4280 buffer << "output_gate_combinations[" << i << "] >= 0.0 ? 1.0 : 0.0;\n";
4281 break;
4282
4283 case ActivationFunction::SymmetricThreshold:
4284 buffer << "output_gate_combinations[" << i << "] >= 0.0 ? 1.0 : -1.0;\n";
4285 break;
4286
4287 case ActivationFunction::Linear:
4288 buffer << "output_gate_combinations[" << i << "];\n";
4289 break;
4290
4291 case ActivationFunction::ScaledExponentialLinear:
4292 buffer << "output_gate_combinations[" << i << "] < 0.0 ? 1.0507*1.67326*(exp(output_gate_combinations[" << i << "]) - 1.0) : 1.0507*output_gate_combinations[" << i << "];\n";
4293 break;
4294
4295 case ActivationFunction::SoftPlus:
4296 buffer << "log(1.0 + exp(output_gate_combinations[" << i << "]));\n";
4297 break;
4298
4299 case ActivationFunction::SoftSign:
4300 buffer << "output_gate_combinations[" << i << "] < 0.0 ? output_gate_combinations[" << i << "]/(1.0 - output_gate_combinations[" << i << "] ) : output_gate_combinations[" << i << "]/(1.0 + output_gate_combinations[" << i << "] );\n";
4301 break;
4302
4303 case ActivationFunction::ExponentialLinear:
4304 buffer << "output_gate_combinations[" << i << "] < 0.0 ? 1.0*(exp(output_gate_combinations[" << i << "]) - 1.0) : output_gate_combinations[" << i << "];\n";
4305 break;
4306
4307 case ActivationFunction::HardSigmoid:
4309 break;
4310 }
4311 }
4312
4313 buffer << endl;
4314
4315
4316 // Cell State
4317
4318 for(Index i = 0; i < neurons_number; i++)
4319 {
4320 buffer << "\tcell_states[" << i << "] = forget_gate_activations[" << i << "] * cell_states[" << i << "] + input_gate_activations[" << i << "] * state_gate_activations[" << i << "]; \n";
4321 }
4322
4323 buffer << endl;
4324
4325
4326 buffer << "\tvector<float> cell_state_activations(" << neurons_number << ");\n" << endl;
4327
4328 for(Index i = 0; i < neurons_number; i++)
4329 {
4330 buffer << "\tcell_state_activations[" << i << "] = ";
4331
4332 switch(activation_function)
4333 {
4334 case ActivationFunction::HyperbolicTangent:
4335 buffer << "tanh(cell_states[" << i << "]);\n";
4336 break;
4337
4338 case ActivationFunction::RectifiedLinear:
4339 buffer << "cell_states[" << i << "] < 0.0 ? 0.0 : cell_states[" << i << "];\n";
4340 break;
4341
4342 case ActivationFunction::Logistic:
4343 buffer << "1.0/(1.0 + exp(-cell_states[" << i << "]));\n";
4344 break;
4345
4346 case ActivationFunction::Threshold:
4347 buffer << "cell_states[" << i << "] >= 0.0 ? 1.0 : 0.0;\n";
4348 break;
4349
4350 case ActivationFunction::SymmetricThreshold:
4351 buffer << "cell_states[" << i << "] >= 0.0 ? 1.0 : -1.0;\n";
4352 break;
4353
4354 case ActivationFunction::Linear:
4355 buffer << "cell_states[" << i << "];\n";
4356 break;
4357
4358 case ActivationFunction::ScaledExponentialLinear:
4359 buffer << "cell_states[" << i << "] < 0.0 ? 1.0507*1.67326*(exp(cell_states[" << i << "]) - 1.0) : 1.0507*cell_states[" << i << "];\n";
4360 break;
4361
4362 case ActivationFunction::SoftPlus:
4363 buffer << "log(1.0 + exp(cell_states[" << i << "]));\n";
4364 break;
4365
4366 case ActivationFunction::SoftSign:
4367 buffer << "cell_states[" << i << "] < 0.0 ? cell_states[" << i << "]/(1.0 - cell_states[" << i << "] ) : cell_states[" << i << "]/(1.0 + cell_states[" << i << "] );\n";
4368 break;
4369
4370 case ActivationFunction::ExponentialLinear:
4371 buffer << "cell_states[" << i << "] < 0.0 ? 1.0*(exp(cell_states[" << i << "]) - 1.0) : cell_states[" << i << "];\n";
4372 break;
4373
4374 case ActivationFunction::HardSigmoid:
4376 break;
4377 }
4378 }
4379
4380 buffer << endl;
4381
4382
4383 // Hidden state
4384
4385 for(Index i = 0; i < neurons_number; i++)
4386 {
4387 buffer << "\thidden_states[" << i << "] = output_gate_activations[" << i << "] * cell_state_activations[" << i << "];\n";
4388 }
4389
4390 buffer << endl;
4391
4392
4393 // LSTM output
4394
4395 buffer << "\tvector<float> long_short_term_memory_output(" << neurons_number << ");\n" << endl;
4396
4397 for(Index i = 0; i < neurons_number; i++)
4398 {
4399 buffer << "\tlong_short_term_memory_output[" << i << "] = hidden_states[" << i << "];\n";
4400 }
4401
4402 return buffer.str();
4403}
4404
4405string LongShortTermMemoryLayer::write_expression_python() const
4406{
4407 ostringstream buffer;
4408
4409 buffer << "\tdef " << layer_name << "(self,inputs):\n" << endl;
4410
4411 buffer << write_combinations_python();
4412
4413 buffer << "\n\t\treturn long_short_term_memory_output;\n" << endl;
4414
4415 return buffer.str();
4416}
4417
4419{
4420 ostringstream buffer;
4421
4422 const Index inputs_number = get_inputs_number();
4423 const Index neurons_number = get_neurons_number();
4424
4425 // Forget gate
4426
4427 buffer << "\t\tforget_gate_combinations = [None] * "<<neurons_number<<"\n" << endl;
4428
4429 for(Index i = 0; i < neurons_number; i++)
4430 {
4431 buffer << "\t\tforget_gate_combinations[" << i << "] = " << forget_biases(i) << " + ";
4432
4433 for(Index j = 0; j < inputs_number; j++)
4434 {
4435 buffer << "inputs[" << j << "] * (" << forget_weights(j,i) << ") + ";
4436 }
4437
4438 for(Index k = 0; k < neurons_number-1; k++)
4439 {
4440 buffer << "self.hidden_states[" << k << "] * (" << forget_recurrent_weights(k,i) << ") + ";
4441 }
4442
4443 buffer << "self.hidden_states[" << neurons_number-1 << "] * (" << forget_recurrent_weights(neurons_number-1,i) << ")";
4444
4445 buffer << " " << endl;
4446 }
4447
4448 buffer << "\t\t" << endl;
4449
4450
4451 buffer << "\t\tforget_gate_activations = [None] * "<<neurons_number<<"\n" << endl;
4452
4453 for(Index i = 0; i < neurons_number; i++)
4454 {
4455 buffer << "\t\tforget_gate_activations[" << i << "] = ";
4456
4457 switch(recurrent_activation_function)
4458 {
4459 case ActivationFunction::HyperbolicTangent:
4460 buffer << "np.tanh(forget_gate_combinations[" << i << "])\n";
4461 break;
4462
4463 case ActivationFunction::RectifiedLinear:
4464 buffer << "np.maximum(0.0, forget_gate_combinations[" << i << "])\n";
4465 break;
4466
4467 case ActivationFunction::Logistic:
4468 buffer << "1.0/(1.0 + np.exp(-forget_gate_combinations[" << i << "]))\n";
4469 break;
4470
4471 case ActivationFunction::Threshold:
4472 buffer << "1.0 if forget_gate_combinations[" << i << "] >= 0.0 else 0.0\n";
4473 break;
4474
4475 case ActivationFunction::SymmetricThreshold:
4476 buffer << "1.0 if forget_gate_combinations[" << i << "] >= 0.0 else -1.0\n";
4477 break;
4478
4479 case ActivationFunction::Linear:
4480 buffer << "forget_gate_combinations[" << i << "]\n";
4481 break;
4482
4483 case ActivationFunction::ScaledExponentialLinear:
4484 buffer << "1.0507*1.67326*(np.exp(forget_gate_combinations[" << i << "]) - 1.0) if forget_gate_combinations[" << i << "] < 0.0 else 1.0507*forget_gate_combinations[" << i << "]\n";
4485 break;
4486
4487 case ActivationFunction::SoftPlus:
4488 buffer << "np.log(1.0 + np.exp(forget_gate_combinations[" << i << "]))\n";
4489 break;
4490
4491 case ActivationFunction::SoftSign:
4492 buffer << "forget_gate_combinations[" << i << "]/(1.0 - forget_gate_combinations[" << i << "] ) if forget_gate_combinations[" << i << "] < 0.0 else forget_gate_combinations[" << i << "]/(1.0 + forget_gate_combinations[" << i << "] )\n";
4493 break;
4494
4495 case ActivationFunction::ExponentialLinear:
4496 buffer << "1.0*(np.exp(forget_gate_combinations[" << i << "]) - 1.0) if forget_gate_combinations[" << i << "] < 0.0 else forget_gate_combinations[" << i << "]\n";
4497 break;
4498
4499 case ActivationFunction::HardSigmoid:
4501 break;
4502 }
4503 }
4504
4505
4506 buffer << "\t\t" << endl;
4507
4508 // Input gate
4509
4510 buffer << "\t\tinput_gate_combinations = [None] * "<<neurons_number<<"\n" << endl;
4511
4512 for(Index i = 0; i < neurons_number; i++)
4513 {
4514 buffer << "\t\tinput_gate_combinations[" << i << "] = " << input_biases(i) << " + ";
4515
4516 for(Index j = 0; j < inputs_number; j++)
4517 {
4518 buffer << "inputs[" << j << "] * (" << input_weights(j,i) << ") + ";
4519 }
4520
4521 for(Index k = 0; k < neurons_number-1; k++)
4522 {
4523 buffer << "self.hidden_states[" << k << "] * (" << input_recurrent_weights(k,i) << ") + ";
4524 }
4525
4526 buffer << "self.hidden_states[" << neurons_number-1 << "] * (" << input_recurrent_weights(neurons_number-1,i) << ")";
4527
4528 buffer << " " << endl;
4529 }
4530
4531
4532 buffer << "\t\t" << endl;
4533
4534 buffer << "\t\tinput_gate_activations = [None] * "<<neurons_number<<"\n" << endl;
4535
4536 for(Index i = 0; i < neurons_number; i++)
4537 {
4538 buffer << "\t\tinput_gate_activations[" << i << "] = ";
4539
4540 switch(recurrent_activation_function)
4541 {
4542 case ActivationFunction::HyperbolicTangent:
4543 buffer << "np.tanh(input_gate_combinations[" << i << "])\n";
4544 break;
4545
4546 case ActivationFunction::RectifiedLinear:
4547 buffer << "np.maximum(0.0, input_gate_combinations[" << i << "])\n";
4548 break;
4549
4550 case ActivationFunction::Logistic:
4551 buffer << "1.0/(1.0 + np.exp(-input_gate_combinations[" << i << "]))\n";
4552 break;
4553
4554 case ActivationFunction::Threshold:
4555 buffer << "1.0 if input_gate_combinations[" << i << "] >= 0.0 else 0.0\n";
4556 break;
4557
4558 case ActivationFunction::SymmetricThreshold:
4559 buffer << "1.0 if input_gate_combinations[" << i << "] >= 0.0 else -1.0\n";
4560 break;
4561
4562 case ActivationFunction::Linear:
4563 buffer << "input_gate_combinations[" << i << "]\n";
4564 break;
4565
4566 case ActivationFunction::ScaledExponentialLinear:
4567 buffer << "1.0507*1.67326*(np.exp(input_gate_combinations[" << i << "]) - 1.0) if input_gate_combinations[" << i << "] < 0.0 else 1.0507*input_gate_combinations[" << i << "]\n";
4568 break;
4569
4570 case ActivationFunction::SoftPlus:
4571 buffer << "np.log(1.0 + np.exp(input_gate_combinations[" << i << "]))\n";
4572 break;
4573
4574 case ActivationFunction::SoftSign:
4575 buffer << "input_gate_combinations[" << i << "]/(1.0 - input_gate_combinations[" << i << "] ) if input_gate_combinations[" << i << "] < 0.0 else input_gate_combinations[" << i << "]/(1.0 + input_gate_combinations[" << i << "] )\n";
4576 break;
4577
4578 case ActivationFunction::ExponentialLinear:
4579 buffer << "1.0*(np.exp(input_gate_combinations[" << i << "]) - 1.0) if input_gate_combinations[" << i << "] < 0.0 else input_gate_combinations[" << i << "]\n";
4580 break;
4581
4582 case ActivationFunction::HardSigmoid:
4584 break;
4585 }
4586 }
4587
4588 buffer << "\t\t" << endl;
4589
4590
4591 // State gate
4592
4593 buffer << "\t\tstate_gate_combinations = [None] * "<<neurons_number<<"\n" << endl;
4594
4595 for(Index i = 0; i < neurons_number; i++)
4596 {
4597 buffer << "\t\tstate_gate_combinations[" << i << "] = " << state_biases(i) << " + ";
4598
4599 for(Index j = 0; j < inputs_number; j++)
4600 {
4601 buffer << "inputs[" << j << "] * (" << state_weights(j,i) << ") + ";
4602 }
4603
4604 for(Index k = 0; k < neurons_number-1; k++)
4605 {
4606 buffer << "self.hidden_states[" << k << "] * (" << state_recurrent_weights(k,i) << ") + ";
4607 }
4608
4609 buffer << "self.hidden_states[" << neurons_number-1 << "] * (" << state_recurrent_weights(neurons_number-1,i) << ")";
4610
4611 buffer << " " << endl;
4612 }
4613
4614
4615 buffer << "\t\t" << endl;
4616
4617 buffer << "\t\tstate_gate_activations = [None] * "<<neurons_number<<"\n" << endl;
4618
4619 for(Index i = 0; i < neurons_number; i++)
4620 {
4621 buffer << "\t\tstate_gate_activations[" << i << "] = ";
4622
4623 switch(activation_function)
4624 {
4625 case ActivationFunction::HyperbolicTangent:
4626 buffer << "np.tanh(state_gate_combinations[" << i << "])\n";
4627 break;
4628
4629 case ActivationFunction::RectifiedLinear:
4630 buffer << "np.maximum(0.0, state_gate_combinations[" << i << "])\n";
4631 break;
4632
4633 case ActivationFunction::Logistic:
4634 buffer << "1.0/(1.0 + np.exp(-state_gate_combinations[" << i << "]))\n";
4635 break;
4636
4637 case ActivationFunction::Threshold:
4638 buffer << "1.0 if state_gate_combinations[" << i << "] >= 0.0 else 0.0\n";
4639 break;
4640
4641 case ActivationFunction::SymmetricThreshold:
4642 buffer << "1.0 if state_gate_combinations[" << i << "] >= 0.0 else -1.0\n";
4643 break;
4644
4645 case ActivationFunction::Linear:
4646 buffer << "state_gate_combinations[" << i << "]\n";
4647 break;
4648
4649 case ActivationFunction::ScaledExponentialLinear:
4650 buffer << "1.0507*1.67326*(np.exp(state_gate_combinations[" << i << "]) - 1.0) if state_gate_combinations[" << i << "] < 0.0 else 1.0507*state_gate_combinations[" << i << "]\n";
4651 break;
4652
4653 case ActivationFunction::SoftPlus:
4654 buffer << "np.log(1.0 + np.exp(state_gate_combinations[" << i << "]))\n";
4655 break;
4656
4657 case ActivationFunction::SoftSign:
4658 buffer << "state_gate_combinations[" << i << "]/(1.0 - state_gate_combinations[" << i << "] ) if state_gate_combinations[" << i << "] < 0.0 else state_gate_combinations[" << i << "]/(1.0 + state_gate_combinations[" << i << "] )\n";
4659 break;
4660
4661 case ActivationFunction::ExponentialLinear:
4662 buffer << "1.0*(np.exp(state_gate_combinations[" << i << "]) - 1.0) if state_gate_combinations[" << i << "] < 0.0 else state_gate_combinations[" << i << "]\n";
4663 break;
4664
4665 case ActivationFunction::HardSigmoid:
4667 break;
4668 }
4669 }
4670
4671 buffer << "\t\t" << endl;
4672
4673
4674 // Output gate
4675
4676 buffer << "\t\toutput_gate_combinations = [None] * "<<neurons_number<<"\n" << endl;
4677
4678 for(Index i = 0; i < neurons_number; i++)
4679 {
4680 buffer << "\t\toutput_gate_combinations[" << i << "] = " << output_biases(i) << " + ";
4681
4682 for(Index j = 0; j < inputs_number; j++)
4683 {
4684 buffer << "inputs[" << j << "] * (" << output_weights(j,i) << ") + ";
4685 }
4686
4687 for(Index k = 0; k < neurons_number-1; k++)
4688 {
4689 buffer << "self.hidden_states[" << k << "] * (" << output_recurrent_weights(k,i) << ") + ";
4690 }
4691
4692 buffer << "self.hidden_states[" << neurons_number-1 << "] * (" << output_recurrent_weights(neurons_number-1,i) << ")";
4693
4694 buffer << " " << endl;
4695 }
4696
4697
4698 buffer << "\t\t" << endl;
4699
4700 buffer << "\t\toutput_gate_activations = [None] * "<<neurons_number<<"\n" << endl;
4701
4702 for(Index i = 0; i < neurons_number; i++)
4703 {
4704 buffer << "\t\toutput_gate_activations[" << i << "] = ";
4705
4706 switch(activation_function)
4707 {
4708 case ActivationFunction::HyperbolicTangent:
4709 buffer << "np.tanh(output_gate_combinations[" << i << "])\n";
4710 break;
4711
4712 case ActivationFunction::RectifiedLinear:
4713 buffer << "np.maximum(0.0, output_gate_combinations[" << i << "])\n";
4714 break;
4715
4716 case ActivationFunction::Logistic:
4717 buffer << "1.0/(1.0 + np.exp(-output_gate_combinations[" << i << "]))\n";
4718 break;
4719
4720 case ActivationFunction::Threshold:
4721 buffer << "1.0 if output_gate_combinations[" << i << "] >= 0.0 else 0.0\n";
4722 break;
4723
4724 case ActivationFunction::SymmetricThreshold:
4725 buffer << "1.0 if output_gate_combinations[" << i << "] >= 0.0 else -1.0\n";
4726 break;
4727
4728 case ActivationFunction::Linear:
4729 buffer << "output_gate_combinations[" << i << "]\n";
4730 break;
4731
4732 case ActivationFunction::ScaledExponentialLinear:
4733 buffer << "1.0507*1.67326*(np.exp(output_gate_combinations[" << i << "]) - 1.0) if output_gate_combinations[" << i << "] < 0.0 else 1.0507*output_gate_combinations[" << i << "]\n";
4734 break;
4735
4736 case ActivationFunction::SoftPlus:
4737 buffer << "np.log(1.0 + np.exp(output_gate_combinations[" << i << "]))\n";
4738 break;
4739
4740 case ActivationFunction::SoftSign:
4741 buffer << "output_gate_combinations[" << i << "]/(1.0 - output_gate_combinations[" << i << "] ) if output_gate_combinations[" << i << "] < 0.0 else output_gate_combinations[" << i << "]/(1.0 + output_gate_combinations[" << i << "] )\n";
4742 break;
4743
4744 case ActivationFunction::ExponentialLinear:
4745 buffer << "1.0*(np.exp(output_gate_combinations[" << i << "]) - 1.0) if output_gate_combinations[" << i << "] < 0.0 else output_gate_combinations[" << i << "]\n";
4746 break;
4747
4748 case ActivationFunction::HardSigmoid:
4750 break;
4751 }
4752 }
4753
4754 buffer << "\t\t" << endl;
4755
4756
4757 // Cell states
4758
4759 for(Index i = 0; i < neurons_number; i++)
4760 {
4761 buffer << "\t\tself.cell_states[" << i << "] = forget_gate_activations[" << i << "] * self.cell_states[" << i << "] + input_gate_activations[" << i << "] * state_gate_activations[" << i << "] \n";
4762 }
4763
4764 buffer << " " << endl;
4765
4766 buffer << "\t\t" << endl;
4767
4768 buffer << "\t\tcell_state_activations = [None] * "<<neurons_number<<"\n" << endl;
4769
4770 for(Index i = 0; i < neurons_number; i++)
4771 {
4772 buffer << "\t\tcell_state_activations[" << i << "] = ";
4773
4774 switch(activation_function)
4775 {
4776 case ActivationFunction::HyperbolicTangent:
4777 buffer << "np.tanh(self.cell_states[" << i << "])\n";
4778 break;
4779
4780 case ActivationFunction::RectifiedLinear:
4781 buffer << "np.maximum(0.0, self.cell_states[" << i << "])\n";
4782 break;
4783
4784 case ActivationFunction::Logistic:
4785 buffer << "1.0/(1.0 + np.exp(-self.cell_states[" << i << "]))\n";
4786 break;
4787
4788 case ActivationFunction::Threshold:
4789 buffer << "1.0 if self.cell_states[" << i << "] >= 0.0 else 0.0\n";
4790 break;
4791
4792 case ActivationFunction::SymmetricThreshold:
4793 buffer << "1.0 if self.cell_states[" << i << "] >= 0.0 else -1.0\n";
4794 break;
4795
4796 case ActivationFunction::Linear:
4797 buffer << "self.cell_states[" << i << "]\n";
4798 break;
4799
4800 case ActivationFunction::ScaledExponentialLinear:
4801 buffer << "1.0507*1.67326*(np.exp(self.cell_states[" << i << "]) - 1.0) if self.cell_states[" << i << "] < 0.0 else 1.0507*self.cell_states[" << i << "]\n";
4802 break;
4803
4804 case ActivationFunction::SoftPlus:
4805 buffer << "np.log(1.0 + np.exp(self.cell_states[" << i << "]))\n";
4806 break;
4807
4808 case ActivationFunction::SoftSign:
4809 buffer << "self.cell_states[" << i << "]/(1.0 - self.cell_states[" << i << "] ) if self.cell_states[" << i << "] < 0.0 else self.cell_states[" << i << "]/(1.0 + self.cell_states[" << i << "] )\n";
4810 break;
4811
4812 case ActivationFunction::ExponentialLinear:
4813 buffer << "1.0*(np.exp(self.cell_states[" << i << "]) - 1.0) if self.cell_states[" << i << "] < 0.0 else self.cell_states[" << i << "]\n";
4814 break;
4815
4816 case ActivationFunction::HardSigmoid:
4818 break;
4819 }
4820 }
4821
4822 buffer << "\t\t" << endl;
4823
4824
4825 // Hidden state
4826
4827 for(Index i = 0; i < neurons_number; i++)
4828 {
4829 buffer << "\t\tself.hidden_states[" << i << "] = output_gate_activations[" << i << "] * cell_state_activations[" << i << "]\n";
4830 }
4831
4832 buffer << " " << endl;
4833
4834 buffer << "\t\t" << endl;
4835
4836
4837 // LSTM output
4838
4839 buffer << "\t\tlong_short_term_memory_output = [None] * "<<neurons_number<<"\n" << endl;
4840
4841 for(Index i = 0; i < neurons_number; i++)
4842 {
4843 buffer << "\t\tlong_short_term_memory_output[" << i << "] = self.hidden_states[" << i << "]\n";
4844 }
4845
4846 return buffer.str();
4847}
4848
4849void LongShortTermMemoryLayer::from_XML(const tinyxml2::XMLDocument& document)
4850{
4851 ostringstream buffer;
4852
4853 // LongShortTermMemoryLayer layer
4854
4855 const tinyxml2::XMLElement* long_short_term_memory_layer_element = document.FirstChildElement("LongShortTermMemoryLayer");
4856
4857 if(!long_short_term_memory_layer_element)
4858 {
4859 buffer << "OpenNN Exception: LongShortTermMemoryLayer class.\n"
4860 << "void from_XML(const tinyxml2::XMLDocument&) method.\n"
4861 << "PerceptronLayer element is nullptr.\n";
4862
4863 throw logic_error(buffer.str());
4864 }
4865
4866 // Layer name
4867
4868 const tinyxml2::XMLElement* layer_name_element = long_short_term_memory_layer_element->FirstChildElement("LayerName");
4869
4870 if(!layer_name_element)
4871 {
4872 buffer << "OpenNN Exception: LongShortTermMemoryLayer class.\n"
4873 << "void from_XML(const tinyxml2::XMLDocument&) method.\n"
4874 << "LayerName element is nullptr.\n";
4875
4876 throw logic_error(buffer.str());
4877 }
4878
4879 if(layer_name_element->GetText())
4880 {
4881 set_name(layer_name_element->GetText());
4882 }
4883
4884 // Inputs number
4885
4886 const tinyxml2::XMLElement* inputs_number_element = long_short_term_memory_layer_element->FirstChildElement("InputsNumber");
4887
4888 if(!inputs_number_element)
4889 {
4890 buffer << "OpenNN Exception: LongShortTermMemoryLayer class.\n"
4891 << "void from_XML(const tinyxml2::XMLDocument&) method.\n"
4892 << "InputsNumber element is nullptr.\n";
4893
4894 throw logic_error(buffer.str());
4895 }
4896
4897 if(inputs_number_element->GetText())
4898 {
4899 set_inputs_number(static_cast<Index>(stoi(inputs_number_element->GetText())));
4900 }
4901
4902 // Neurons number
4903
4904 const tinyxml2::XMLElement* neurons_number_element = long_short_term_memory_layer_element->FirstChildElement("NeuronsNumber");
4905
4906 if(!neurons_number_element)
4907 {
4908 buffer << "OpenNN Exception: LongShortTermMemoryLayer class.\n"
4909 << "void from_XML(const tinyxml2::XMLDocument&) method.\n"
4910 << "NeuronsNumber element is nullptr.\n";
4911
4912 throw logic_error(buffer.str());
4913 }
4914
4915 if(neurons_number_element->GetText())
4916 {
4917 set_neurons_number(static_cast<Index>(stoi(neurons_number_element->GetText())));
4918 }
4919
4920 // Time step
4921
4922 const tinyxml2::XMLElement* time_step_element = long_short_term_memory_layer_element->FirstChildElement("TimeStep");
4923
4924 if(!time_step_element)
4925 {
4926 buffer << "OpenNN Exception: LongShortTermMemoryLayer class.\n"
4927 << "void from_XML(const tinyxml2::XMLDocument&) method.\n"
4928 << "TimeStep element is nullptr.\n";
4929
4930 throw logic_error(buffer.str());
4931 }
4932
4933 if(time_step_element->GetText())
4934 {
4935 set_timesteps(static_cast<Index>(stoi(time_step_element->GetText())));
4936 }
4937
4938 // Activation function
4939
4940 const tinyxml2::XMLElement* activation_function_element = long_short_term_memory_layer_element->FirstChildElement("ActivationFunction");
4941
4942 if(!activation_function_element)
4943 {
4944 buffer << "OpenNN Exception: LongShortTermMemoryLayer class.\n"
4945 << "void from_XML(const tinyxml2::XMLDocument&) method.\n"
4946 << "ActivationFunction element is nullptr.\n";
4947
4948 throw logic_error(buffer.str());
4949 }
4950
4951 if(activation_function_element->GetText())
4952 {
4953 set_activation_function(activation_function_element->GetText());
4954 }
4955
4956 // Recurrent activation function
4957
4958 const tinyxml2::XMLElement* recurrent_activation_function_element = long_short_term_memory_layer_element->FirstChildElement("RecurrentActivationFunction");
4959
4960 if(!recurrent_activation_function_element)
4961 {
4962 buffer << "OpenNN Exception: LongShortTermMemoryLayer class.\n"
4963 << "void from_XML(const tinyxml2::XMLDocument&) method.\n"
4964 << "ActivationFunction element is nullptr.\n";
4965
4966 throw logic_error(buffer.str());
4967 }
4968
4969 if(recurrent_activation_function_element->GetText())
4970 {
4971 set_recurrent_activation_function(recurrent_activation_function_element->GetText());
4972 }
4973
4974 // Parameters
4975
4976 const tinyxml2::XMLElement* parameters_element = long_short_term_memory_layer_element->FirstChildElement("Parameters");
4977
4978 if(!parameters_element)
4979 {
4980 buffer << "OpenNN Exception: LongShortTermMemoryLayer class.\n"
4981 << "void from_XML(const tinyxml2::XMLDocument&) method.\n"
4982 << "Parameters element is nullptr.\n";
4983
4984 throw logic_error(buffer.str());
4985 }
4986
4987 if(parameters_element->GetText())
4988 {
4989 const string parameters_string = parameters_element->GetText();
4990
4991 set_parameters(to_type_vector(parameters_string, ' '));
4992 }
4993}
4994
4995
4996void LongShortTermMemoryLayer::write_XML(tinyxml2::XMLPrinter& file_stream) const
4997{
4998 ostringstream buffer;
4999
5000 // Long short term memory layer
5001
5002 file_stream.OpenElement("LongShortTermMemoryLayer");
5003
5004 // Layer name
5005
5006 file_stream.OpenElement("LayerName");
5007 buffer.str("");
5008 buffer << layer_name;
5009 file_stream.PushText(buffer.str().c_str());
5010 file_stream.CloseElement();
5011
5012 // Inputs number
5013
5014 file_stream.OpenElement("InputsNumber");
5015
5016 buffer.str("");
5017 buffer << get_inputs_number();
5018
5019 file_stream.PushText(buffer.str().c_str());
5020
5021 file_stream.CloseElement();
5022
5023 // Outputs number
5024
5025 file_stream.OpenElement("NeuronsNumber");
5026
5027 buffer.str("");
5028 buffer << get_neurons_number();
5029
5030 file_stream.PushText(buffer.str().c_str());
5031
5032 file_stream.CloseElement();
5033
5034 // Time step
5035
5036 file_stream.OpenElement("TimeStep");
5037
5038 buffer.str("");
5039 buffer << get_timesteps();
5040
5041 file_stream.PushText(buffer.str().c_str());
5042
5043 file_stream.CloseElement();
5044
5045 // Activation function
5046
5047 file_stream.OpenElement("ActivationFunction");
5048
5049 file_stream.PushText(write_activation_function().c_str());
5050
5051 file_stream.CloseElement();
5052
5053 // Recurrent activation function
5054
5055 file_stream.OpenElement("RecurrentActivationFunction");
5056
5057 file_stream.PushText(write_recurrent_activation_function().c_str());
5058
5059 file_stream.CloseElement();
5060
5061 // Parameters
5062
5063 file_stream.OpenElement("Parameters");
5064
5065 buffer.str("");
5066
5067 const Tensor<type, 1> parameters = get_parameters();
5068 const Index parameters_size = parameters.size();
5069
5070 for(Index i = 0; i < parameters_size; i++)
5071 {
5072 buffer << parameters(i);
5073
5074 if(i != (parameters_size-1)) buffer << " ";
5075 }
5076
5077 file_stream.PushText(buffer.str().c_str());
5078
5079 file_stream.CloseElement();
5080
5081 // Long short term memory layer (end tag)
5082
5083 file_stream.CloseElement();
5084}
5085
5086string LongShortTermMemoryLayer::write_recurrent_activation_function_expression() const
5087{
5088 switch(recurrent_activation_function)
5089 {
5090 case ActivationFunction::HyperbolicTangent:
5091 {
5092 return "tanh";
5093 }
5094 case ActivationFunction::Linear:
5095 {
5096 return string();
5097 }
5098 default:
5099 {
5101 }
5102 }
5103}
5104
5105
5106string LongShortTermMemoryLayer::write_activation_function_expression() const
5107{
5108 switch(activation_function)
5109 {
5110 case ActivationFunction::HyperbolicTangent:
5111 {
5112 return "tanh";
5113 }
5114 case ActivationFunction::Linear:
5115 {
5116 return string();
5117 }
5118 default:
5119 {
5121 }
5122 }
5123}
5124}
5125
5126// OpenNN: Open Neural Networks Library.
5127// Copyright(C) 2005-2021 Artificial Intelligence Techniques, SL.
5128//
5129// This library is free software; you can redistribute it and/or
5130// modify it under the terms of the GNU Lesser General Public
5131// License as published by the Free Software Foundation; either
5132// version 2.1 of the License, or any later version.
5133//
5134// This library is distributed in the hope that it will be useful,
5135// but WITHOUT ANY WARRANTY; without even the implied warranty of
5136// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5137// Lesser General Public License for more details.
5138
5139// You should have received a copy of the GNU Lesser General Public
5140// License along with this library; if not, write to the Free Software
5141// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
This abstract class represents the concept of layer of neurons in OpenNN.
Definition: layer.h:53
string layer_name
Layer name.
Definition: layer.h:179
Type layer_type
Layer type.
Definition: layer.h:183
void set_recurrent_activation_function(const ActivationFunction &)
void set_state_weights(const Tensor< type, 2 > &)
Tensor< type, 2 > get_output_recurrent_weights() const
Index get_inputs_number() const
Returns the number of inputs to the layer.
void set_forget_recurrent_weights(const Tensor< type, 2 > &)
string write_expression(const Tensor< string, 1 > &, const Tensor< string, 1 > &) const
bool display
Display messages to screen.
Tensor< type, 2 > get_forget_recurrent_weights() const
ActivationFunction
Enumeration of available activation functions for the long-short term memory layer.
void set_input_biases(const Tensor< type, 1 > &)
void set_activation_function(const ActivationFunction &)
Tensor< type, 2 > get_input_recurrent_weights() const
void set_input_weights(const Tensor< type, 2 > &)
void set_state_recurrent_weights(const Tensor< type, 2 > &)
void set_forget_biases(const Tensor< type, 1 > &)
Tensor< type, 2 > get_state_recurrent_weights() const
void set_state_biases(const Tensor< type, 1 > &)
void set_output_recurrent_weights(const Tensor< type, 2 > &)
Index get_neurons_number() const
Returns the size of the neurons vector.
void set_input_shape(const Tensor< Index, 1 > &)
void set_input_recurrent_weights(const Tensor< type, 2 > &)
Index get_timesteps() const
Returns the number of timesteps.
void set_forget_weights(const Tensor< type, 2 > &)
void set_output_weights(const Tensor< type, 2 > &)
const LongShortTermMemoryLayer::ActivationFunction & get_activation_function() const
Returns the activation function of the layer.
void set_parameters(const Tensor< type, 1 > &, const Index &=0)
void set_output_biases(const Tensor< type, 1 > &)
Index get_parameters_number() const
Returns the number of parameters (biases, weights, recurrent weights) of the layer.
ActivationFunction activation_function
Activation function variable.
const LongShortTermMemoryLayer::ActivationFunction & get_recurrent_activation_function() const
Returns the recurrent activation function of the layer.
void PushText(const char *text, bool cdata=false)
Add a text node.
Definition: tinyxml2.cpp:2878
virtual void CloseElement(bool compactMode=false)
If streaming, close the Element.
Definition: tinyxml2.cpp:2834