11#include "long_short_term_memory_layer.h"
37 set(new_inputs_number, new_neurons_number);
55 return input_weights.dimension(0);
63 return output_biases.size();
74 return 4 * neurons_number * (1 + inputs_number + neurons_number);
114 return output_biases;
124 return forget_weights;
134 return input_weights;
145 return state_weights;
155 return output_weights;
166 return forget_recurrent_weights;
177 return input_recurrent_weights;
188 return state_recurrent_weights;
199 return output_recurrent_weights;
219 Tensor<type, 1> parameters(parameters_number);
221 Index current_position = 0;
225 for(Index i = 0; i < forget_biases.size(); i++) fill_n(parameters.data()+current_position+i, 1, forget_biases(i));
227 current_position += forget_biases.size();
229 for(Index i = 0; i < input_biases.size(); i++) fill_n(parameters.data()+current_position+i, 1, input_biases(i));
231 current_position += input_biases.size();
233 for(Index i = 0; i < state_biases.size(); i++) fill_n(parameters.data()+current_position+i, 1, state_biases(i));
235 current_position += state_biases.size();
237 for(Index i = 0; i < output_biases.size(); i++) fill_n(parameters.data()+current_position+i, 1, output_biases(i));
239 current_position += output_biases.size();
243 for(Index i = 0; i < forget_weights.size(); i++) fill_n(parameters.data()+current_position+i, 1, forget_weights(i));
245 current_position += forget_weights.size();
247 for(Index i = 0; i < input_weights.size(); i++) fill_n(parameters.data()+current_position+i, 1, input_weights(i));
249 current_position += input_weights.size();
251 for(Index i = 0; i < state_weights.size(); i++) fill_n(parameters.data()+current_position+i, 1, state_weights(i));
253 current_position += state_weights.size();
255 for(Index i = 0; i < output_weights.size(); i++) fill_n(parameters.data()+current_position+i, 1, output_weights(i));
257 current_position += output_weights.size();
261 for(Index i = 0; i < forget_recurrent_weights.size(); i++) fill_n(parameters.data()+current_position+i, 1, forget_recurrent_weights(i));
263 current_position += forget_recurrent_weights.size();
265 for(Index i = 0; i < input_recurrent_weights.size(); i++) fill_n(parameters.data()+current_position+i, 1, input_recurrent_weights(i));
267 current_position += input_recurrent_weights.size();
269 for(Index i = 0; i < state_recurrent_weights.size(); i++) fill_n(parameters.data()+current_position+i, 1, state_recurrent_weights(i));
271 current_position += state_recurrent_weights.size();
273 for(Index i = 0; i < output_recurrent_weights.size(); i++) fill_n(parameters.data()+current_position+i, 1, output_recurrent_weights(i));
291 return recurrent_activation_function;
303 case ActivationFunction::Logistic:
return "Logistic";
305 case ActivationFunction::HyperbolicTangent:
return "HyperbolicTangent";
307 case ActivationFunction::Threshold:
return "Threshold";
309 case ActivationFunction::SymmetricThreshold:
return "SymmetricThreshold";
311 case ActivationFunction::Linear:
return "Linear";
313 case ActivationFunction::RectifiedLinear:
return "RectifiedLinear";
315 case ActivationFunction::ScaledExponentialLinear:
return "ScaledExponentialLinear";
317 case ActivationFunction::SoftPlus:
return "SoftPlus";
319 case ActivationFunction::SoftSign:
return "SoftSign";
321 case ActivationFunction::HardSigmoid:
return "HardSigmoid";
323 case ActivationFunction::ExponentialLinear:
return "ExponentialLinear";
335 switch(recurrent_activation_function)
337 case ActivationFunction::Logistic:
return "Logistic";
339 case ActivationFunction::HyperbolicTangent:
return "HyperbolicTangent";
341 case ActivationFunction::Threshold:
return "Threshold";
343 case ActivationFunction::SymmetricThreshold:
return "SymmetricThreshold";
345 case ActivationFunction::Linear:
return "Linear";
347 case ActivationFunction::RectifiedLinear:
return "RectifiedLinear";
349 case ActivationFunction::ScaledExponentialLinear:
return "ScaledExponentialLinear";
351 case ActivationFunction::SoftPlus:
return "SoftPlus";
353 case ActivationFunction::SoftSign:
return "SoftSign";
355 case ActivationFunction::HardSigmoid:
return "HardSigmoid";
357 case ActivationFunction::ExponentialLinear:
return "ExponentialLinear";
389 input_biases.resize(new_neurons_number);
390 forget_biases.resize(new_neurons_number);
391 state_biases.resize(new_neurons_number);
392 output_biases.resize(new_neurons_number);
394 input_weights.resize(new_inputs_number, new_neurons_number);
395 forget_weights.resize(new_inputs_number, new_neurons_number);
396 state_weights.resize(new_inputs_number, new_neurons_number);
397 output_weights.resize(new_inputs_number, new_neurons_number);
399 input_recurrent_weights.resize(new_neurons_number, new_neurons_number);
400 forget_recurrent_weights.resize(new_neurons_number, new_neurons_number);
401 state_recurrent_weights.resize(new_neurons_number, new_neurons_number);
402 output_recurrent_weights.resize(new_neurons_number, new_neurons_number);
404 hidden_states.resize(new_neurons_number);
405 hidden_states.setZero();
407 cell_states.resize(new_neurons_number);
408 cell_states.setZero();
443void LongShortTermMemoryLayer::set_name(
const string& new_layer_name)
457 set(new_inputs_number, neurons_number);
467 const Index new_size = size[0];
481 set(inputs_number, new_neurons_number);
490 forget_biases = new_biases;
499 input_biases = new_biases;
508 state_biases = new_biases;
517 output_biases = new_biases;
529 forget_weights = new_forget_weights;
541 input_weights = new_input_weight;
553 state_weights = new_state_weights;
565 output_weights = new_output_weight;
578 forget_recurrent_weights = new_forget_recurrent_weight;
591 input_recurrent_weights = new_input_recurrent_weight;
603 state_recurrent_weights = new_state_recurrent_weight;
615 output_recurrent_weights = new_output_recurrent_weight;
631 Index current_index = index;
635 Index size = neurons_number;
637 memcpy(forget_biases.data(),
638 new_parameters.data() + current_index,
639 static_cast<size_t>(size)*
sizeof(type));
641 current_index += size;
643 memcpy(input_biases.data(),
644 new_parameters.data() + current_index,
645 static_cast<size_t>(size)*
sizeof(type));
647 current_index += size;
649 memcpy(state_biases.data(),
650 new_parameters.data() + current_index,
651 static_cast<size_t>(size)*
sizeof(type));
653 current_index += size;
655 memcpy(output_biases.data(),
656 new_parameters.data() + current_index,
657 static_cast<size_t>(size)*
sizeof(type));
659 current_index += size;
663 size = inputs_number*neurons_number;
665 memcpy(forget_weights.data(),
666 new_parameters.data() + current_index,
667 static_cast<size_t>(size)*
sizeof(type));
669 current_index += size;
671 memcpy(input_weights.data(),
672 new_parameters.data() + current_index,
673 static_cast<size_t>(size)*
sizeof(type));
675 current_index += size;
677 memcpy(state_weights.data(),
678 new_parameters.data() + current_index,
679 static_cast<size_t>(size)*
sizeof(type));
681 current_index += size;
683 memcpy(output_weights.data(),
684 new_parameters.data() + current_index,
685 static_cast<size_t>(size)*
sizeof(type));
687 current_index += size;
691 size = neurons_number*neurons_number;
693 memcpy(forget_recurrent_weights.data(),
694 new_parameters.data() + current_index,
695 static_cast<size_t>(size)*
sizeof(type));
697 current_index += size;
699 memcpy(input_recurrent_weights.data(),
700 new_parameters.data() + current_index,
701 static_cast<size_t>(size)*
sizeof(type));
703 current_index += size;
705 memcpy(state_recurrent_weights.data(),
706 new_parameters.data() + current_index,
707 static_cast<size_t>(size)*
sizeof(type));
709 current_index += size;
711 memcpy(output_recurrent_weights.data(),
712 new_parameters.data() + current_index,
713 static_cast<size_t>(size)*
sizeof(type));
715 current_index += size;
734 if(new_activation_function_name ==
"Logistic")
738 else if(new_activation_function_name ==
"HyperbolicTangent")
742 else if(new_activation_function_name ==
"Threshold")
746 else if(new_activation_function_name ==
"SymmetricThreshold")
750 else if(new_activation_function_name ==
"Linear")
754 else if(new_activation_function_name ==
"RectifiedLinear")
758 else if(new_activation_function_name ==
"ScaledExponentialLinear")
762 else if(new_activation_function_name ==
"SoftPlus")
766 else if(new_activation_function_name ==
"SoftSign")
770 else if(new_activation_function_name ==
"HardSigmoid")
774 else if(new_activation_function_name ==
"ExponentialLinear")
780 ostringstream buffer;
782 buffer <<
"OpenNN Exception: neuron class.\n"
783 <<
"void set_activation_function(const string&) method.\n"
784 <<
"Unknown activation function: " << new_activation_function_name <<
".\n";
786 throw logic_error(buffer.str());
796 recurrent_activation_function = new_recurrent_activation_function;
806 if(new_recurrent_activation_function_name ==
"Logistic")
808 recurrent_activation_function = ActivationFunction::Logistic;
810 else if(new_recurrent_activation_function_name ==
"HyperbolicTangent")
812 recurrent_activation_function = ActivationFunction::HyperbolicTangent;
814 else if(new_recurrent_activation_function_name ==
"Threshold")
816 recurrent_activation_function = ActivationFunction::Threshold;
818 else if(new_recurrent_activation_function_name ==
"SymmetricThreshold")
820 recurrent_activation_function = ActivationFunction::SymmetricThreshold;
822 else if(new_recurrent_activation_function_name ==
"Linear")
824 recurrent_activation_function = ActivationFunction::Linear;
826 else if(new_recurrent_activation_function_name ==
"RectifiedLinear")
828 recurrent_activation_function = ActivationFunction::RectifiedLinear;
830 else if(new_recurrent_activation_function_name ==
"ScaledExponentialLinear")
832 recurrent_activation_function = ActivationFunction::ScaledExponentialLinear;
834 else if(new_recurrent_activation_function_name ==
"SoftPlus")
836 recurrent_activation_function = ActivationFunction::SoftPlus;
838 else if(new_recurrent_activation_function_name ==
"SoftSign")
840 recurrent_activation_function = ActivationFunction::SoftSign;
842 else if(new_recurrent_activation_function_name ==
"HardSigmoid")
844 recurrent_activation_function = ActivationFunction::HardSigmoid;
846 else if(new_recurrent_activation_function_name ==
"ExponentialLinear")
848 recurrent_activation_function = ActivationFunction::ExponentialLinear;
852 ostringstream buffer;
854 buffer <<
"OpenNN Exception: neuron class.\n"
855 <<
"void set_recurrent_activation_function(const string&) method.\n"
856 <<
"Unknown activation function: " << new_recurrent_activation_function_name <<
".\n";
858 throw logic_error(buffer.str());
868 timesteps = new_timesteps;
888 forget_biases.setConstant(value);
889 input_biases.setConstant(value);
890 state_biases.setConstant(value);
891 output_biases.setConstant(value);
900 forget_biases.setConstant(value);
909 input_biases.setConstant(value);
918 state_biases.setConstant(value);
927 output_biases.setConstant(value);
936 forget_weights.setConstant(value);
937 input_weights.setConstant(value);
938 state_weights.setConstant(value);
939 output_weights.setConstant(value);
948 forget_weights.setConstant(value);
957 input_weights.setConstant(value);
966 state_weights.setConstant(value);
975 output_weights.setConstant(value);
984 forget_recurrent_weights.setConstant(value);
985 input_recurrent_weights.setConstant(value);
986 state_recurrent_weights.setConstant(value);
987 output_recurrent_weights.setConstant(value);
996 forget_recurrent_weights.setConstant(value);
1005 input_recurrent_weights.setConstant(value);
1014 state_recurrent_weights.setConstant(value);
1023 output_recurrent_weights.setConstant(value);
1032 hidden_states.setConstant(value);
1041 cell_states.setConstant(value);
1050 forget_biases.setConstant(value);
1051 input_biases.setConstant(value);
1052 state_biases.setConstant(value);
1053 output_biases.setConstant(value);
1055 forget_weights.setConstant(value);
1056 input_weights.setConstant(value);
1057 state_weights.setConstant(value);
1058 output_weights.setConstant(value);
1060 forget_recurrent_weights.setConstant(value);
1061 input_recurrent_weights.setConstant(value);
1062 state_recurrent_weights.setConstant(value);
1063 output_recurrent_weights.setConstant(value);
1065 hidden_states.setZero();
1067 cell_states.setZero();
1076 const type minimum = type(-0.2);
1077 const type maximum = type(0.2);
1081 for(Index i = 0; i < forget_biases.size(); i++)
1083 const type random =
static_cast<type
>(rand()/(RAND_MAX+1.0));
1085 forget_biases(i) = minimum + (maximum - minimum)*random;
1088 for(Index i = 0; i < input_biases.size(); i++)
1090 const type random =
static_cast<type
>(rand()/(RAND_MAX+1.0));
1092 input_biases(i) = minimum + (maximum - minimum)*random;
1095 for(Index i = 0; i < state_biases.size(); i++)
1097 const type random =
static_cast<type
>(rand()/(RAND_MAX+1.0));
1099 state_biases(i) = minimum + (maximum - minimum)*random;
1102 for(Index i = 0; i < output_biases.size(); i++)
1104 const type random =
static_cast<type
>(rand()/(RAND_MAX+1.0));
1106 output_biases(i) = minimum + (maximum - minimum)*random;
1111 for(Index i = 0; i < forget_weights.size(); i++)
1113 const type random =
static_cast<type
>(rand()/(RAND_MAX+1.0));
1115 forget_weights(i) = minimum + (maximum - minimum)*random;
1118 for(Index i = 0; i < input_weights.size(); i++)
1120 const type random =
static_cast<type
>(rand()/(RAND_MAX+1.0));
1122 input_weights(i) = minimum + (maximum - minimum)*random;
1125 for(Index i = 0; i < state_weights.size(); i++)
1127 const type random =
static_cast<type
>(rand()/(RAND_MAX+1.0));
1129 state_weights(i) = minimum + (maximum - minimum)*random;
1132 for(Index i = 0; i < output_weights.size(); i++)
1134 const type random =
static_cast<type
>(rand()/(RAND_MAX+1.0));
1136 output_weights(i) = minimum + (maximum - minimum)*random;
1141 for(Index i = 0; i < forget_recurrent_weights.size(); i++)
1143 const type random =
static_cast<type
>(rand()/(RAND_MAX+1.0));
1145 forget_recurrent_weights(i) = minimum + (maximum - minimum)*random;
1148 for(Index i = 0; i < input_recurrent_weights.size(); i++)
1150 const type random =
static_cast<type
>(rand()/(RAND_MAX+1.0));
1152 input_recurrent_weights(i) = minimum + (maximum - minimum)*random;
1155 for(Index i = 0; i < state_recurrent_weights.size(); i++)
1157 const type random =
static_cast<type
>(rand()/(RAND_MAX+1.0));
1159 state_recurrent_weights(i) = minimum + (maximum - minimum)*random;
1162 for(Index i = 0; i < output_recurrent_weights.size(); i++)
1164 const type random =
static_cast<type
>(rand()/(RAND_MAX+1.0));
1166 output_recurrent_weights(i) = minimum + (maximum - minimum)*random;
1171void LongShortTermMemoryLayer::calculate_combinations(
const Tensor<type, 1>& inputs,
1172 const Tensor<type, 2>& weights,
1173 const Tensor<type, 2>& recurrent_weights,
1174 const Tensor<type, 1>& biases,
1175 Tensor<type, 1>& combinations)
const
1181 combinations.device(*thread_pool_device) = inputs.contract(weights, AT_B);
1183 combinations.device(*thread_pool_device) += biases;
1185 combinations.device(*thread_pool_device) += hidden_states.contract(recurrent_weights, AT_B);
1189void LongShortTermMemoryLayer::calculate_activations(
const Tensor<type, 2>& combinations, Tensor<type, 2>& activations)
const
1198 case ActivationFunction::Linear: linear(combinations, activations);
return;
1200 case ActivationFunction::Logistic: logistic(combinations, activations);
return;
1202 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent(combinations, activations);
return;
1204 case ActivationFunction::Threshold: threshold(combinations, activations);
return;
1206 case ActivationFunction::SymmetricThreshold: symmetric_threshold(combinations, activations);
return;
1208 case ActivationFunction::RectifiedLinear: rectified_linear(combinations, activations);
return;
1210 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear(combinations, activations);
return;
1212 case ActivationFunction::SoftPlus: soft_plus(combinations, activations);
return;
1214 case ActivationFunction::SoftSign: soft_sign(combinations, activations);
return;
1216 case ActivationFunction::HardSigmoid: hard_sigmoid(combinations, activations);
return;
1218 case ActivationFunction::ExponentialLinear: exponential_linear(combinations, activations);
return;
1223void LongShortTermMemoryLayer::calculate_activations(
const Tensor<type, 1>& combinations_1d, Tensor<type, 1>& activations_1d)
const
1229 const Index combinations_columns_number = combinations_1d.size();
1231 if(combinations_columns_number != neurons_number)
1233 ostringstream buffer;
1235 buffer <<
"OpenNN Exception: LongShortTermMemoryLayer class.\n"
1236 <<
"void calculate_activations(const Tensor<type, 1>&) const method.\n"
1237 <<
"Size of combinations must be equal to number of neurons.\n";
1239 throw logic_error(buffer.str());
1246 case ActivationFunction::Linear: linear(combinations_1d, activations_1d);
return;
1248 case ActivationFunction::Logistic: logistic(combinations_1d, activations_1d);
return;
1250 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent(combinations_1d, activations_1d);
return;
1252 case ActivationFunction::Threshold: threshold(combinations_1d, activations_1d);
return;
1254 case ActivationFunction::SymmetricThreshold: symmetric_threshold(combinations_1d, activations_1d);
return;
1256 case ActivationFunction::RectifiedLinear: rectified_linear(combinations_1d, activations_1d);
return;
1258 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear(combinations_1d, activations_1d);
return;
1260 case ActivationFunction::SoftPlus: soft_plus(combinations_1d, activations_1d);
return;
1262 case ActivationFunction::SoftSign: soft_sign(combinations_1d, activations_1d);
return;
1264 case ActivationFunction::HardSigmoid: hard_sigmoid(combinations_1d, activations_1d);
return;
1266 case ActivationFunction::ExponentialLinear: exponential_linear(combinations_1d, activations_1d);
return;
1271Tensor<type, 1> LongShortTermMemoryLayer::calculate_activations(
const Tensor<type, 1>& combinations_1d)
const
1277 const Index combinations_columns_number = combinations_1d.size();
1279 if(combinations_columns_number != neurons_number)
1281 ostringstream buffer;
1283 buffer <<
"OpenNN Exception: LongShortTermMemoryLayer class.\n"
1284 <<
"Tensor<type, 2> calculate_activations(const Tensor<type, 1>&) const method.\n"
1285 <<
"Size of combinations must be equal to number of neurons.\n";
1287 throw logic_error(buffer.str());
1292 Tensor<type, 1> activations_1d(combinations_1d.size());
1296 case ActivationFunction::Linear: linear(combinations_1d, activations_1d);
break;
1298 case ActivationFunction::Logistic: logistic(combinations_1d, activations_1d);
break;
1300 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent(combinations_1d, activations_1d);
break;
1302 case ActivationFunction::Threshold: threshold(combinations_1d, activations_1d);
break;
1304 case ActivationFunction::SymmetricThreshold: symmetric_threshold(combinations_1d, activations_1d);
break;
1306 case ActivationFunction::RectifiedLinear: rectified_linear(combinations_1d, activations_1d);
break;
1308 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear(combinations_1d, activations_1d);
break;
1310 case ActivationFunction::SoftPlus: soft_plus(combinations_1d, activations_1d);
break;
1312 case ActivationFunction::SoftSign: soft_sign(combinations_1d, activations_1d);
break;
1314 case ActivationFunction::HardSigmoid: hard_sigmoid(combinations_1d, activations_1d);
break;
1316 case ActivationFunction::ExponentialLinear: exponential_linear(combinations_1d, activations_1d);
break;
1319 return activations_1d;
1323void LongShortTermMemoryLayer::calculate_recurrent_activations(
const Tensor<type, 2>& combinations,
1324 Tensor<type, 2>& activations)
const
1330 const Index combinations_columns_number = combinations.dimension(2);
1332 if(combinations_columns_number != neurons_number)
1334 ostringstream buffer;
1336 buffer <<
"OpenNN Exception: LongShortTermMemoryLayer class.\n"
1337 <<
"void calculate_recurrent_activations(const Tensor<type, 2>&) const method.\n"
1338 <<
"Number of columns("<< combinations_columns_number <<
") of combinations must be equal to number of neurons("<<neurons_number<<
").\n";
1340 throw logic_error(buffer.str());
1345 switch(recurrent_activation_function)
1347 case ActivationFunction::Linear: linear(combinations, activations);
break;
1349 case ActivationFunction::Logistic: logistic(combinations, activations);
break;
1351 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent(combinations, activations);
break;
1353 case ActivationFunction::Threshold: threshold(combinations, activations);
break;
1355 case ActivationFunction::SymmetricThreshold: symmetric_threshold(combinations, activations);
break;
1357 case ActivationFunction::RectifiedLinear: rectified_linear(combinations, activations);
break;
1359 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear(combinations, activations);
break;
1361 case ActivationFunction::SoftPlus: soft_plus(combinations, activations);
break;
1363 case ActivationFunction::SoftSign: soft_sign(combinations, activations);
break;
1365 case ActivationFunction::HardSigmoid: hard_sigmoid(combinations, activations);
break;
1367 case ActivationFunction::ExponentialLinear: exponential_linear(combinations, activations);
break;
1372void LongShortTermMemoryLayer::calculate_recurrent_activations(
const Tensor<type, 1>& combinations_1d,
1373 Tensor<type, 1>& recurrent_activations_1d)
const
1380 const Index combinations_columns_number = combinations_1d.size();
1382 if(combinations_columns_number != neurons_number)
1384 ostringstream buffer;
1386 buffer <<
"OpenNN Exception: LongShortTermMemoryLayer class.\n"
1387 <<
"void calculate_activations(const Tensor<type, 2>&) const method.\n"
1388 <<
"Size of combinations must be equal to number of neurons.\n";
1390 throw logic_error(buffer.str());
1395 switch(recurrent_activation_function)
1397 case ActivationFunction::Linear: linear(combinations_1d, recurrent_activations_1d);
break;
1399 case ActivationFunction::Logistic: logistic(combinations_1d, recurrent_activations_1d);
break;
1401 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent(combinations_1d, recurrent_activations_1d);
break;
1403 case ActivationFunction::Threshold: threshold(combinations_1d, recurrent_activations_1d);
break;
1405 case ActivationFunction::SymmetricThreshold: symmetric_threshold(combinations_1d, recurrent_activations_1d);
break;
1407 case ActivationFunction::RectifiedLinear: rectified_linear(combinations_1d, recurrent_activations_1d);
break;
1409 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear(combinations_1d, recurrent_activations_1d);
break;
1411 case ActivationFunction::SoftPlus: soft_plus(combinations_1d, recurrent_activations_1d);
break;
1413 case ActivationFunction::SoftSign: soft_sign(combinations_1d, recurrent_activations_1d);
break;
1415 case ActivationFunction::HardSigmoid: hard_sigmoid(combinations_1d, recurrent_activations_1d);
break;
1417 case ActivationFunction::ExponentialLinear: exponential_linear(combinations_1d, recurrent_activations_1d);
break;
1422void LongShortTermMemoryLayer::calculate_activations_derivatives(
const Tensor<type, 2>& combinations,
1423 Tensor<type, 2>& activations,
1424 Tensor<type, 2>& activations_derivatives_2d)
const
1430 const Index combinations_columns_number = combinations.dimension(1);
1432 if(combinations_columns_number != neurons_number)
1434 ostringstream buffer;
1436 buffer <<
"OpenNN Exception: LongShortTermMemoryLayer class.\n"
1437 <<
"void calculate_activations_derivatives(const Tensor<type, 2>&) const method.\n"
1438 <<
"Number of columns("<< combinations_columns_number <<
") of combinations must be equal to number of neurons("<<neurons_number<<
").\n";
1440 throw logic_error(buffer.str());
1447 case ActivationFunction::Linear: linear_derivatives(combinations, activations, activations_derivatives_2d);
return;
1449 case ActivationFunction::Logistic: logistic_derivatives(combinations, activations, activations_derivatives_2d);
return;
1451 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent_derivatives(combinations, activations, activations_derivatives_2d);
return;
1453 case ActivationFunction::Threshold: threshold_derivatives(combinations, activations, activations_derivatives_2d);
return;
1455 case ActivationFunction::SymmetricThreshold: symmetric_threshold_derivatives(combinations, activations, activations_derivatives_2d);
return;
1457 case ActivationFunction::RectifiedLinear: rectified_linear_derivatives(combinations, activations, activations_derivatives_2d);
return;
1459 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear_derivatives(combinations, activations, activations_derivatives_2d);
return;
1461 case ActivationFunction::SoftPlus: soft_plus_derivatives(combinations, activations, activations_derivatives_2d);
return;
1463 case ActivationFunction::SoftSign: soft_sign_derivatives(combinations, activations, activations_derivatives_2d);
return;
1465 case ActivationFunction::HardSigmoid: hard_sigmoid_derivatives(combinations, activations, activations_derivatives_2d);
return;
1467 case ActivationFunction::ExponentialLinear: exponential_linear_derivatives(combinations, activations, activations_derivatives_2d);
return;
1472void LongShortTermMemoryLayer::calculate_activations_derivatives(
const Tensor<type, 1>& combinations_1d,
1473 Tensor<type, 1>& activations_1d,
1474 Tensor<type, 1>& activations_derivatives_1d)
const
1481 const Index combinations_columns_number = combinations_1d.size();
1483 if(combinations_columns_number != neurons_number)
1485 ostringstream buffer;
1487 buffer <<
"OpenNN Exception: LongShortTermMemoryLayer class.\n"
1488 <<
"void calculate_activations_derivatives(const Tensor<type, 2>&) const method.\n"
1489 <<
"Size of combinations must be equal to number of neurons.\n";
1491 throw logic_error(buffer.str());
1498 case ActivationFunction::Linear: linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1500 case ActivationFunction::Logistic: logistic_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1502 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1504 case ActivationFunction::Threshold: threshold_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1506 case ActivationFunction::SymmetricThreshold: symmetric_threshold_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1508 case ActivationFunction::RectifiedLinear: rectified_linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1510 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1512 case ActivationFunction::SoftPlus: soft_plus_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1514 case ActivationFunction::SoftSign: soft_sign_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1516 case ActivationFunction::HardSigmoid: hard_sigmoid_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1518 case ActivationFunction::ExponentialLinear: exponential_linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1523void LongShortTermMemoryLayer::calculate_recurrent_activations_derivatives(
const Tensor<type, 1>& combinations_1d,
1524 Tensor<type, 1>& activations_1d,
1525 Tensor<type, 1>& activations_derivatives_1d)
const
1531 const Index combinations_columns_number = combinations_1d.size();
1533 if(combinations_columns_number != neurons_number)
1535 ostringstream buffer;
1537 buffer <<
"OpenNN Exception: LongShortTermMemoryLayer class.\n"
1538 <<
"void calculate_recurrent_activations_derivatives(const Tensor<type, 2>&) const method.\n"
1539 <<
"Size of combinations must be equal to number of neurons.\n";
1541 throw logic_error(buffer.str());
1546 switch(recurrent_activation_function)
1548 case ActivationFunction::Linear: linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1550 case ActivationFunction::Logistic: logistic_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1552 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1554 case ActivationFunction::Threshold: threshold_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1556 case ActivationFunction::SymmetricThreshold: symmetric_threshold_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1558 case ActivationFunction::RectifiedLinear: rectified_linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1560 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1562 case ActivationFunction::SoftPlus: soft_plus_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1564 case ActivationFunction::SoftSign: soft_sign_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1566 case ActivationFunction::HardSigmoid: hard_sigmoid_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1568 case ActivationFunction::ExponentialLinear: exponential_linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
1573Tensor<type, 2> LongShortTermMemoryLayer::calculate_outputs(
const Tensor<type, 2>& inputs)
1579 const Index inputs_columns_number = inputs.dimension(1);
1581 if(inputs_columns_number != inputs_number)
1583 ostringstream buffer;
1585 buffer <<
"OpenNN Exception: LongShortTermMemoryLayer class.\n"
1586 <<
"Tensor<type, 2> calculate_outputs(const Tensor<type, 2>&) const method.\n"
1587 <<
"Number of columns ("<<inputs_columns_number<<
") of inputs matrix must be equal to number of inputs ("<<inputs_number<<
").\n";
1589 throw logic_error(buffer.str());
1593 const Index samples_number = inputs.dimension(0);
1597 Tensor<type, 2> outputs(samples_number, neurons_number);
1599 Tensor<type, 1> forget_combinations(neurons_number);
1600 Tensor<type, 1> forget_activations(neurons_number);
1602 Tensor<type, 1> input_combinations(neurons_number);
1603 Tensor<type, 1> input_activations(neurons_number);
1605 Tensor<type, 1> state_combinations(neurons_number);
1606 Tensor<type, 1> state_activations(neurons_number);
1608 Tensor<type, 1> output_combinations(neurons_number);
1609 Tensor<type, 1> output_activations(neurons_number);
1611 for(Index i = 0; i < samples_number; i++)
1613 if(i%timesteps == 0)
1615 hidden_states.setZero();
1616 cell_states.setZero();
1619 const Tensor<type, 1> current_inputs = inputs.chip(i, 0);
1623 calculate_combinations(current_inputs, forget_weights, forget_recurrent_weights, forget_biases, forget_combinations);
1624 calculate_recurrent_activations(forget_combinations, forget_activations);
1626 calculate_combinations(current_inputs, input_weights, input_recurrent_weights, input_biases, input_combinations);
1627 calculate_recurrent_activations(input_combinations, input_activations);
1629 calculate_combinations(current_inputs, state_weights, state_recurrent_weights, state_biases, state_combinations);
1630 calculate_activations(state_combinations, state_activations);
1632 calculate_combinations(current_inputs, output_weights, output_recurrent_weights, output_biases, output_combinations);
1633 calculate_recurrent_activations(output_combinations, output_activations);
1636 cell_states = forget_activations * cell_states + input_activations * state_activations;
1637 calculate_activations(cell_states, hidden_states);
1638 hidden_states *= output_activations;
1640 for(Index j = 0; j < neurons_number; j++)
1641 outputs(i,j) = hidden_states(j);
1648void LongShortTermMemoryLayer::calculate_hidden_delta(LayerForwardPropagation* next_forward_propagation,
1649 LayerBackPropagation* next_back_propagation,
1650 LayerBackPropagation* back_propagation)
const
1652 LongShortTermMemoryLayerBackPropagation* long_short_term_memory_layer_back_propagation =
1653 static_cast<LongShortTermMemoryLayerBackPropagation*
>(back_propagation);
1655 switch(next_back_propagation->layer_pointer->get_type())
1657 case Type::Perceptron:
1659 PerceptronLayerForwardPropagation* next_perceptron_layer_forward_propagation =
1660 static_cast<PerceptronLayerForwardPropagation*
>(next_forward_propagation);
1662 PerceptronLayerBackPropagation* next_perceptron_layer_back_propagation =
1663 static_cast<PerceptronLayerBackPropagation*
>(next_back_propagation);
1665 calculate_hidden_delta_perceptron(next_perceptron_layer_forward_propagation,
1666 next_perceptron_layer_back_propagation,
1667 long_short_term_memory_layer_back_propagation);
1671 case Type::Probabilistic:
1673 ProbabilisticLayerForwardPropagation* next_probabilistic_layer_forward_propagation =
1674 static_cast<ProbabilisticLayerForwardPropagation*
>(next_forward_propagation);
1676 ProbabilisticLayerBackPropagation* next_probabilistic_layer_back_propagation =
1677 static_cast<ProbabilisticLayerBackPropagation*
>(next_back_propagation);
1679 calculate_hidden_delta_probabilistic(next_probabilistic_layer_forward_propagation,
1680 next_probabilistic_layer_back_propagation,
1681 long_short_term_memory_layer_back_propagation);
1690void LongShortTermMemoryLayer::calculate_hidden_delta_perceptron(PerceptronLayerForwardPropagation* next_forward_propagation,
1691 PerceptronLayerBackPropagation* next_back_propagation,
1692 LongShortTermMemoryLayerBackPropagation* back_propagation)
const
1694 const Tensor<type, 2>& next_synaptic_weights =
static_cast<PerceptronLayer*
>(next_back_propagation->layer_pointer)->get_synaptic_weights();
1696 back_propagation->delta.device(*thread_pool_device) =
1697 (next_back_propagation->delta*next_forward_propagation->activations_derivatives).contract(next_synaptic_weights, A_BT);
1701void LongShortTermMemoryLayer::calculate_hidden_delta_probabilistic(ProbabilisticLayerForwardPropagation* next_forward_propagation,
1702 ProbabilisticLayerBackPropagation* next_back_propagation,
1703 LongShortTermMemoryLayerBackPropagation* back_propagation)
const
1705 const ProbabilisticLayer* probabilistic_layer_pointer =
static_cast<ProbabilisticLayer*
>(next_back_propagation->layer_pointer);
1707 const Tensor<type, 2>& next_synaptic_weights = probabilistic_layer_pointer->get_synaptic_weights();
1709 if(probabilistic_layer_pointer->get_neurons_number() == 1)
1711 back_propagation->delta.device(*thread_pool_device) =
1712 (next_back_propagation->delta*next_forward_propagation->activations_derivatives).contract(next_synaptic_weights, A_BT);
1716 const Index samples_number = next_back_propagation->delta.dimension(0);
1717 const Index outputs_number = next_back_propagation->delta.dimension(1);
1718 const Index next_layer_neurons_number = probabilistic_layer_pointer->get_neurons_number();
1720 if(outputs_number != next_layer_neurons_number)
1722 ostringstream buffer;
1724 buffer <<
"OpenNN Exception: ProbabilisticLayer class.\n"
1725 <<
"void calculate_hidden_delta_probabilistic(ProbabilisticLayerForwardPropagation*,ProbabilisticLayerBackPropagation*,PerceptronLayerBackPropagation*) const.\n"
1726 <<
"Number of columns in delta (" << outputs_number <<
") must be equal to number of neurons in probabilistic layer (" << next_layer_neurons_number <<
").\n";
1728 throw logic_error(buffer.str());
1731 if(next_forward_propagation->activations_derivatives.dimension(1) != next_layer_neurons_number)
1733 ostringstream buffer;
1735 buffer <<
"OpenNN Exception: ProbabilisticLayer class.\n"
1736 <<
"void calculate_hidden_delta_probabilistic(ProbabilisticLayerForwardPropagation*,ProbabilisticLayerBackPropagation*,PerceptronLayerBackPropagation*) const.\n"
1737 <<
"Dimension 1 of activations derivatives (" << outputs_number <<
") must be equal to number of neurons in probabilistic layer (" << next_layer_neurons_number <<
").\n";
1739 throw logic_error(buffer.str());
1742 if(next_forward_propagation->activations_derivatives.dimension(2) != next_layer_neurons_number)
1744 ostringstream buffer;
1746 buffer <<
"OpenNN Exception: ProbabilisticLayer class.\n"
1747 <<
"void calculate_hidden_delta_probabilistic(ProbabilisticLayerForwardPropagation*,ProbabilisticLayerBackPropagation*,PerceptronLayerBackPropagation*) const.\n"
1748 <<
"Dimension 2 of activations derivatives (" << outputs_number <<
") must be equal to number of neurons in probabilistic layer (" << next_layer_neurons_number <<
").\n";
1750 throw logic_error(buffer.str());
1753 const Index step = next_layer_neurons_number*next_layer_neurons_number;
1755 next_back_propagation->biases_derivatives.setZero();
1757 for(Index i = 0; i < samples_number; i++)
1759 next_back_propagation->delta_row = next_back_propagation->delta.chip(i,0);
1761 TensorMap< Tensor<type, 2> > activations_derivatives_matrix(next_forward_propagation->activations_derivatives.data() + i*step,
1762 next_layer_neurons_number, next_layer_neurons_number);
1764 next_back_propagation->error_combinations_derivatives.chip(i,0) =
1765 next_back_propagation->delta_row.contract(activations_derivatives_matrix, AT_B);
1768 back_propagation->delta.device(*thread_pool_device) =
1769 (next_back_propagation->error_combinations_derivatives).contract(next_synaptic_weights, A_BT);
1774void LongShortTermMemoryLayer::forward_propagate(
const Tensor<type, 2>&inputs, LayerForwardPropagation* forward_propagation)
1776 LongShortTermMemoryLayerForwardPropagation* long_short_term_memory_layer_forward_propagation
1777 =
static_cast<LongShortTermMemoryLayerForwardPropagation*
>(forward_propagation);
1779 const Index samples_number = inputs.dimension(0);
1782 Index copy_index = 0;
1784 for(Index i = 0; i < samples_number; i++)
1786 if(i%timesteps == 0)
1788 hidden_states.setZero();
1789 cell_states.setZero();
1792 long_short_term_memory_layer_forward_propagation->current_inputs = inputs.chip(i,0);
1794 calculate_combinations(long_short_term_memory_layer_forward_propagation->current_inputs,
1796 forget_recurrent_weights,
1798 long_short_term_memory_layer_forward_propagation->current_forget_combinations);
1800 calculate_recurrent_activations_derivatives(long_short_term_memory_layer_forward_propagation->current_forget_combinations,
1801 long_short_term_memory_layer_forward_propagation->current_forget_activations,
1802 long_short_term_memory_layer_forward_propagation->current_forget_activations_derivatives);
1804 calculate_combinations(long_short_term_memory_layer_forward_propagation->current_inputs,
1806 input_recurrent_weights,
1808 long_short_term_memory_layer_forward_propagation->current_input_combinations);
1810 calculate_recurrent_activations_derivatives(long_short_term_memory_layer_forward_propagation->current_input_combinations,
1811 long_short_term_memory_layer_forward_propagation->current_input_activations,
1812 long_short_term_memory_layer_forward_propagation->current_input_activations_derivatives);
1814 calculate_combinations(long_short_term_memory_layer_forward_propagation->current_inputs,
1816 state_recurrent_weights,
1818 long_short_term_memory_layer_forward_propagation->current_state_combinations);
1820 calculate_recurrent_activations_derivatives(long_short_term_memory_layer_forward_propagation->current_state_combinations,
1821 long_short_term_memory_layer_forward_propagation->current_state_activations,
1822 long_short_term_memory_layer_forward_propagation->current_state_activations_derivatives);
1824 calculate_combinations(long_short_term_memory_layer_forward_propagation->current_inputs,
1826 output_recurrent_weights,
1828 long_short_term_memory_layer_forward_propagation->current_output_combinations);
1830 calculate_recurrent_activations_derivatives(long_short_term_memory_layer_forward_propagation->current_output_combinations,
1831 long_short_term_memory_layer_forward_propagation->current_output_activations,
1832 long_short_term_memory_layer_forward_propagation->current_output_activations_derivatives);
1834 cell_states = long_short_term_memory_layer_forward_propagation->current_forget_activations * cell_states +
1835 long_short_term_memory_layer_forward_propagation->current_input_activations * long_short_term_memory_layer_forward_propagation->current_state_activations;
1837 calculate_activations_derivatives(cell_states, hidden_states, long_short_term_memory_layer_forward_propagation->current_hidden_states_derivatives);
1839 hidden_states *= long_short_term_memory_layer_forward_propagation->current_output_activations;
1843 for(Index j = 0; j < neurons_number; j++) long_short_term_memory_layer_forward_propagation->activations(i,j) = hidden_states(j);
1847 memcpy(long_short_term_memory_layer_forward_propagation->forget_activations.data() + copy_index,
1848 long_short_term_memory_layer_forward_propagation->current_forget_activations.data(),
1849 static_cast<size_t>(neurons_number)*
sizeof(type));
1851 memcpy(long_short_term_memory_layer_forward_propagation->forget_activations_derivatives.data() + copy_index,
1852 long_short_term_memory_layer_forward_propagation->current_forget_activations_derivatives.data(),
1853 static_cast<size_t>(neurons_number)*
sizeof(type));
1857 memcpy(long_short_term_memory_layer_forward_propagation->input_activations.data() + copy_index,
1858 long_short_term_memory_layer_forward_propagation->current_input_activations.data(),
1859 static_cast<size_t>(neurons_number)*
sizeof(type));
1861 memcpy(long_short_term_memory_layer_forward_propagation->input_activations_derivatives.data() + copy_index,
1862 long_short_term_memory_layer_forward_propagation->current_input_activations_derivatives.data(),
1863 static_cast<size_t>(neurons_number)*
sizeof(type));
1867 memcpy(long_short_term_memory_layer_forward_propagation->state_activations.data() + copy_index,
1868 long_short_term_memory_layer_forward_propagation->current_state_activations.data(),
1869 static_cast<size_t>(neurons_number)*
sizeof(type));
1871 memcpy(long_short_term_memory_layer_forward_propagation->state_activations_derivatives.data() + copy_index,
1872 long_short_term_memory_layer_forward_propagation->current_state_activations_derivatives.data(),
1873 static_cast<size_t>(neurons_number)*
sizeof(type));
1877 memcpy(long_short_term_memory_layer_forward_propagation->output_activations.data() + copy_index,
1878 long_short_term_memory_layer_forward_propagation->current_output_activations.data(),
1879 static_cast<size_t>(neurons_number)*
sizeof(type));
1881 memcpy(long_short_term_memory_layer_forward_propagation->output_activations_derivatives.data() + copy_index,
1882 long_short_term_memory_layer_forward_propagation->current_output_activations_derivatives.data(),
1883 static_cast<size_t>(neurons_number)*
sizeof(type));
1887 memcpy(long_short_term_memory_layer_forward_propagation->cell_states_activations.data() + copy_index,
1889 static_cast<size_t>(neurons_number)*
sizeof(type));
1893 memcpy(long_short_term_memory_layer_forward_propagation->hidden_states_activations.data() + copy_index,
1894 hidden_states.data(),
1895 static_cast<size_t>(neurons_number)*
sizeof(type));
1897 memcpy(long_short_term_memory_layer_forward_propagation->hidden_states_activations_derivatives.data() + copy_index,
1898 long_short_term_memory_layer_forward_propagation->current_hidden_states_derivatives.data(),
1899 static_cast<size_t>(neurons_number)*
sizeof(type));
1901 copy_index += neurons_number;
1906void LongShortTermMemoryLayer::forward_propagate(
const Tensor<type, 2>& inputs,
1907 Tensor<type, 1> parameters,
1908 LayerForwardPropagation* forward_propagation)
1910 LongShortTermMemoryLayerForwardPropagation* long_short_term_memory_layer_forward_propagation
1911 =
static_cast<LongShortTermMemoryLayerForwardPropagation*
>(forward_propagation);
1916 const TensorMap<Tensor<type, 1>> forget_biases(parameters.data(), neurons_number);
1917 const TensorMap<Tensor<type, 1>> input_biases(parameters.data()+neurons_number, neurons_number);
1918 const TensorMap<Tensor<type, 1>> state_biases(parameters.data()+2*neurons_number, neurons_number);
1919 const TensorMap<Tensor<type, 1>> output_biases(parameters.data()+3*neurons_number, neurons_number);
1921 const TensorMap<Tensor<type, 2>> forget_weights(parameters.data()+4*neurons_number, inputs_number, neurons_number);
1922 const TensorMap<Tensor<type, 2>> input_weights(parameters.data()+4*neurons_number+inputs_number*neurons_number, inputs_number, neurons_number);
1923 const TensorMap<Tensor<type, 2>> state_weights(parameters.data()+4*neurons_number+2*inputs_number*neurons_number, inputs_number, neurons_number);
1924 const TensorMap<Tensor<type, 2>> output_weights(parameters.data()+4*neurons_number+3*inputs_number*neurons_number, inputs_number, neurons_number);
1926 const TensorMap<Tensor<type, 2>> forget_recurrent_weights(parameters.data()+4*neurons_number+4*inputs_number*neurons_number, neurons_number, neurons_number);
1927 const TensorMap<Tensor<type, 2>> input_recurrent_weights(parameters.data()+4*neurons_number+4*inputs_number*neurons_number+neurons_number*neurons_number, neurons_number, neurons_number);
1928 const TensorMap<Tensor<type, 2>> state_recurrent_weights(parameters.data()+4*neurons_number+4*inputs_number*neurons_number+2*neurons_number*neurons_number, neurons_number, neurons_number);
1929 const TensorMap<Tensor<type, 2>> output_recurrent_weights(parameters.data()+4*neurons_number+4*inputs_number*neurons_number+3*neurons_number*neurons_number, neurons_number, neurons_number);
1931 const Index samples_number = inputs.dimension(0);
1933 Tensor<type, 1> forget_combinations(neurons_number);
1934 Tensor<type, 1> input_combinations(neurons_number);
1935 Tensor<type, 1> state_combinations(neurons_number);
1936 Tensor<type, 1> output_combinations(neurons_number);
1938 Tensor<type, 1> forget_activations(neurons_number);
1939 Tensor<type, 1> input_activations(neurons_number);
1940 Tensor<type, 1> state_activations(neurons_number);
1941 Tensor<type, 1> output_activations(neurons_number);
1943 Tensor<type, 1> forget_activations_derivatives(neurons_number);
1944 Tensor<type, 1> input_activations_derivatives(neurons_number);
1945 Tensor<type, 1> state_activations_derivatives(neurons_number);
1946 Tensor<type, 1> output_activations_derivatives(neurons_number);
1948 Tensor<type, 1> hidden_states_derivatives(neurons_number);
1950 Index copy_index = 0;
1952 for(Index i = 0; i < samples_number; i++)
1954 if(i%timesteps == 0)
1956 hidden_states.setZero();
1957 cell_states.setZero();
1960 const Tensor<type, 1> current_inputs = inputs.chip(i,0);
1962 calculate_combinations(current_inputs, forget_weights, forget_recurrent_weights, forget_biases, forget_combinations);
1963 calculate_recurrent_activations_derivatives(forget_combinations, forget_activations, forget_activations_derivatives);
1965 calculate_combinations(current_inputs, input_weights, input_recurrent_weights, input_biases, input_combinations);
1966 calculate_recurrent_activations_derivatives(input_combinations, input_activations, input_activations_derivatives);
1968 calculate_combinations(current_inputs, state_weights, state_recurrent_weights, state_biases, state_combinations);
1969 calculate_recurrent_activations_derivatives(state_combinations, state_activations, state_activations_derivatives);
1971 calculate_combinations(current_inputs, output_weights, output_recurrent_weights, output_biases, output_combinations);
1972 calculate_recurrent_activations_derivatives(output_combinations, output_activations, output_activations_derivatives);
1974 cell_states = forget_activations * cell_states + input_activations * state_activations;
1975 calculate_activations_derivatives(cell_states, hidden_states, hidden_states_derivatives);
1977 hidden_states *= output_activations;
1981 for(Index j = 0; j < neurons_number; j++) long_short_term_memory_layer_forward_propagation->activations(i,j) = hidden_states(j);
1985 memcpy(long_short_term_memory_layer_forward_propagation->forget_activations.data() + copy_index,
1986 long_short_term_memory_layer_forward_propagation->current_forget_activations.data(),
1987 static_cast<size_t>(neurons_number)*
sizeof(type));
1989 memcpy(long_short_term_memory_layer_forward_propagation->forget_activations_derivatives.data() + copy_index,
1990 long_short_term_memory_layer_forward_propagation->current_forget_activations_derivatives.data(),
1991 static_cast<size_t>(neurons_number)*
sizeof(type));
1995 memcpy(long_short_term_memory_layer_forward_propagation->input_activations.data() + copy_index,
1996 long_short_term_memory_layer_forward_propagation->current_input_activations.data(),
1997 static_cast<size_t>(neurons_number)*
sizeof(type));
1999 memcpy(long_short_term_memory_layer_forward_propagation->input_activations_derivatives.data() + copy_index,
2000 long_short_term_memory_layer_forward_propagation->current_input_activations_derivatives.data(),
2001 static_cast<size_t>(neurons_number)*
sizeof(type));
2005 memcpy(long_short_term_memory_layer_forward_propagation->state_activations.data() + copy_index,
2006 long_short_term_memory_layer_forward_propagation->current_state_activations.data(),
2007 static_cast<size_t>(neurons_number)*
sizeof(type));
2009 memcpy(long_short_term_memory_layer_forward_propagation->state_activations_derivatives.data() + copy_index,
2010 long_short_term_memory_layer_forward_propagation->current_state_activations_derivatives.data(),
2011 static_cast<size_t>(neurons_number)*
sizeof(type));
2015 memcpy(long_short_term_memory_layer_forward_propagation->output_activations.data() + copy_index,
2016 long_short_term_memory_layer_forward_propagation->current_output_activations.data(),
2017 static_cast<size_t>(neurons_number)*
sizeof(type));
2019 memcpy(long_short_term_memory_layer_forward_propagation->output_activations_derivatives.data() + copy_index,
2020 long_short_term_memory_layer_forward_propagation->current_output_activations_derivatives.data(),
2021 static_cast<size_t>(neurons_number)*
sizeof(type));
2025 memcpy(long_short_term_memory_layer_forward_propagation->cell_states_activations.data() + copy_index,
2027 static_cast<size_t>(neurons_number)*
sizeof(type));
2031 memcpy(long_short_term_memory_layer_forward_propagation->hidden_states_activations.data() + copy_index,
2032 hidden_states.data(),
2033 static_cast<size_t>(neurons_number)*
sizeof(type));
2035 memcpy(long_short_term_memory_layer_forward_propagation->hidden_states_activations_derivatives.data() + copy_index,
2036 long_short_term_memory_layer_forward_propagation->current_hidden_states_derivatives.data(),
2037 static_cast<size_t>(neurons_number)*
sizeof(type));
2039 copy_index += neurons_number;
2044void LongShortTermMemoryLayer::insert_gradient(LayerBackPropagation* back_propagation,
2046 Tensor<type, 1>& gradient)
const
2051 LongShortTermMemoryLayerBackPropagation* long_short_term_memory_layer_back_propagation =
2052 static_cast<LongShortTermMemoryLayerBackPropagation*
>(back_propagation);
2056 memcpy(gradient.data() + index,
2057 long_short_term_memory_layer_back_propagation->forget_biases_derivatives.data(),
2058 static_cast<size_t>(neurons_number)*
sizeof(type));
2060 memcpy(gradient.data() + index + neurons_number,
2061 long_short_term_memory_layer_back_propagation->input_biases_derivatives.data(),
2062 static_cast<size_t>(neurons_number)*
sizeof(type));
2064 memcpy(gradient.data() + index + 2*neurons_number,
2065 long_short_term_memory_layer_back_propagation->state_biases_derivatives.data(),
2066 static_cast<size_t>(neurons_number)*
sizeof(type));
2068 memcpy(gradient.data() + index + 3*neurons_number,
2069 long_short_term_memory_layer_back_propagation->output_biases_derivatives.data(),
2070 static_cast<size_t>(neurons_number)*
sizeof(type));
2074 memcpy(gradient.data() + index + 4*neurons_number,
2075 long_short_term_memory_layer_back_propagation->forget_weights_derivatives.data(),
2076 static_cast<size_t>(inputs_number*neurons_number)*
sizeof(type));
2078 memcpy(gradient.data() + index + 4*neurons_number + inputs_number*neurons_number,
2079 long_short_term_memory_layer_back_propagation->input_weights_derivatives.data(),
2080 static_cast<size_t>(inputs_number*neurons_number)*
sizeof(type));
2082 memcpy(gradient.data() + index + 4*neurons_number + 2*inputs_number*neurons_number,
2083 long_short_term_memory_layer_back_propagation->state_weights_derivatives.data(),
2084 static_cast<size_t>(inputs_number*neurons_number)*
sizeof(type));
2086 memcpy(gradient.data() + index + 4*neurons_number + 3*inputs_number*neurons_number,
2087 long_short_term_memory_layer_back_propagation->output_weights_derivatives.data(),
2088 static_cast<size_t>(inputs_number*neurons_number)*
sizeof(type));
2092 memcpy(gradient.data() + index + 4*neurons_number + 4*inputs_number*neurons_number,
2093 long_short_term_memory_layer_back_propagation->forget_recurrent_weights_derivatives.data(),
2094 static_cast<size_t>(neurons_number*neurons_number)*
sizeof(type));
2096 memcpy(gradient.data() + index + 4*neurons_number + 4*inputs_number*neurons_number + neurons_number*neurons_number,
2097 long_short_term_memory_layer_back_propagation->input_recurrent_weights_derivatives.data(),
2098 static_cast<size_t>(neurons_number*neurons_number)*
sizeof(type));
2100 memcpy(gradient.data() + index + 4*neurons_number + 4*inputs_number*neurons_number + 2*neurons_number*neurons_number,
2101 long_short_term_memory_layer_back_propagation->state_recurrent_weights_derivatives.data(),
2102 static_cast<size_t>(neurons_number*neurons_number)*
sizeof(type));
2104 memcpy(gradient.data() + index + 4*neurons_number + 4*inputs_number*neurons_number + 3*neurons_number*neurons_number,
2105 long_short_term_memory_layer_back_propagation->output_recurrent_weights_derivatives.data(),
2106 static_cast<size_t>(neurons_number*neurons_number)*
sizeof(type));
2110void LongShortTermMemoryLayer::calculate_error_gradient(
const Tensor<type, 2>& inputs,
2111 LayerForwardPropagation* forward_propagation,
2112 LayerBackPropagation* back_propagation)
const
2114 LongShortTermMemoryLayerForwardPropagation* long_short_term_memory_layer_forward_propagation =
2115 static_cast<LongShortTermMemoryLayerForwardPropagation*
>(forward_propagation);
2117 LongShortTermMemoryLayerBackPropagation* long_short_term_memory_layer_back_propagation =
2118 static_cast<LongShortTermMemoryLayerBackPropagation*
>(back_propagation);
2124 calculate_forget_biases_error_gradient(inputs,
2125 long_short_term_memory_layer_forward_propagation,
2126 long_short_term_memory_layer_back_propagation);
2128 calculate_input_biases_error_gradient(inputs,
2129 long_short_term_memory_layer_forward_propagation,
2130 long_short_term_memory_layer_back_propagation);
2132 calculate_state_biases_error_gradient(inputs,
2133 long_short_term_memory_layer_forward_propagation,
2134 long_short_term_memory_layer_back_propagation);
2136 calculate_output_biases_error_gradient(inputs,
2137 long_short_term_memory_layer_forward_propagation,
2138 long_short_term_memory_layer_back_propagation);
2142 calculate_forget_weights_error_gradient(inputs,
2143 long_short_term_memory_layer_forward_propagation,
2144 long_short_term_memory_layer_back_propagation);
2146 calculate_input_weights_error_gradient(inputs,
2147 long_short_term_memory_layer_forward_propagation,
2148 long_short_term_memory_layer_back_propagation);
2150 calculate_state_weights_error_gradient(inputs,
2151 long_short_term_memory_layer_forward_propagation,
2152 long_short_term_memory_layer_back_propagation);
2154 calculate_output_weights_error_gradient(inputs,
2155 long_short_term_memory_layer_forward_propagation,
2156 long_short_term_memory_layer_back_propagation);
2160 calculate_forget_recurrent_weights_error_gradient(inputs,
2161 long_short_term_memory_layer_forward_propagation,
2162 long_short_term_memory_layer_back_propagation);
2164 calculate_input_recurrent_weights_error_gradient(inputs,
2165 long_short_term_memory_layer_forward_propagation,
2166 long_short_term_memory_layer_back_propagation);
2168 calculate_state_recurrent_weights_error_gradient(inputs,
2169 long_short_term_memory_layer_forward_propagation,
2170 long_short_term_memory_layer_back_propagation);
2172 calculate_output_recurrent_weights_error_gradient(inputs,
2173 long_short_term_memory_layer_forward_propagation,
2174 long_short_term_memory_layer_back_propagation);
2179void LongShortTermMemoryLayer::calculate_forget_weights_error_gradient(
const Tensor<type, 2>& inputs,
2180 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
2181 LongShortTermMemoryLayerBackPropagation* back_propagation)
const
2183 const Index samples_number = inputs.dimension(0);
2186 const Index parameters_number = inputs_number*neurons_number;
2188 Tensor<type, 2> input_combinations_weights_derivatives(parameters_number, neurons_number);
2189 Tensor<type, 2> forget_combinations_weights_derivatives(parameters_number, neurons_number);
2190 Tensor<type, 2> state_combinations_weights_derivatives(parameters_number, neurons_number);
2191 Tensor<type, 2> output_combinations_weights_derivatives(parameters_number, neurons_number);
2193 Tensor<type, 2> hidden_states_weights_derivatives(parameters_number, neurons_number);
2194 Tensor<type, 2> cell_state_weights_derivatives(parameters_number, neurons_number);
2196 input_combinations_weights_derivatives.setZero();
2197 forget_combinations_weights_derivatives.setZero();
2198 state_combinations_weights_derivatives.setZero();
2199 output_combinations_weights_derivatives.setZero();
2200 hidden_states_weights_derivatives.setZero();
2201 cell_state_weights_derivatives.setZero();
2203 Index column_index = 0;
2204 Index input_index = 0;
2206 Index copy_index = 0;
2208 back_propagation->forget_weights_derivatives.setZero();
2210 for(Index sample = 0; sample < samples_number; sample++)
2212 const Tensor<type, 1> current_inputs = inputs.chip(sample, 0);
2213 const Tensor<type, 1> current_layer_deltas = back_propagation->delta.chip(sample,0);
2217 memcpy(forward_propagation->current_forget_activations.data(),
2218 forward_propagation->forget_activations.data()+copy_index,
2219 static_cast<size_t>(neurons_number)*
sizeof(type));
2221 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
2222 forward_propagation->forget_activations_derivatives.data()+copy_index,
2223 static_cast<size_t>(neurons_number)*
sizeof(type));
2227 memcpy(forward_propagation->current_input_activations.data(),
2228 forward_propagation->input_activations.data()+copy_index,
2229 static_cast<size_t>(neurons_number)*
sizeof(type));
2230 memcpy(forward_propagation->current_input_activations_derivatives.data(),
2231 forward_propagation->input_activations_derivatives.data()+copy_index,
2232 static_cast<size_t>(neurons_number)*
sizeof(type));
2236 memcpy(forward_propagation->current_state_activations.data(),
2237 forward_propagation->state_activations.data()+copy_index,
2238 static_cast<size_t>(neurons_number)*
sizeof(type));
2239 memcpy(forward_propagation->current_state_activations_derivatives.data(),
2240 forward_propagation->state_activations_derivatives.data()+copy_index,
2241 static_cast<size_t>(neurons_number)*
sizeof(type));
2245 memcpy(forward_propagation->current_output_activations.data(),
2246 forward_propagation->output_activations.data()+copy_index,
2247 static_cast<size_t>(neurons_number)*
sizeof(type));
2248 memcpy(forward_propagation->current_output_activations_derivatives.data(),
2249 forward_propagation->output_activations_derivatives.data()+copy_index,
2250 static_cast<size_t>(neurons_number)*
sizeof(type));
2254 memcpy(forward_propagation->current_cell_state_activations.data(),
2255 forward_propagation->cell_states_activations.data()+copy_index,
2256 static_cast<size_t>(neurons_number)*
sizeof(type));
2258 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
2259 forward_propagation->hidden_states_activations_derivatives.data()+copy_index,
2260 static_cast<size_t>(neurons_number)*
sizeof(type));
2262 if(sample%timesteps == 0)
2264 forward_propagation->previous_cell_state_activations.setZero();
2266 forget_combinations_weights_derivatives.setZero();
2267 input_combinations_weights_derivatives.setZero();
2268 output_combinations_weights_derivatives.setZero();
2269 state_combinations_weights_derivatives.setZero();
2271 cell_state_weights_derivatives.setZero();
2275 memcpy(forward_propagation->previous_cell_state_activations.data(),
2276 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
2277 static_cast<size_t>(neurons_number)*
sizeof(type));
2279 forget_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(forget_recurrent_weights, A_B);
2281 input_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(input_recurrent_weights, A_B);
2282 multiply_rows(input_combinations_weights_derivatives, forward_propagation->current_input_activations_derivatives);
2284 state_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(state_recurrent_weights, A_B);
2285 multiply_rows(state_combinations_weights_derivatives, forward_propagation->current_state_activations_derivatives);
2287 output_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(output_recurrent_weights, A_B);
2288 multiply_rows(output_combinations_weights_derivatives, forward_propagation->current_output_activations_derivatives);
2294 for(Index i = 0; i < parameters_number; i++)
2296 forget_combinations_weights_derivatives(i, column_index) += current_inputs(input_index);
2300 if(input_index == inputs_number)
2307 multiply_rows(cell_state_weights_derivatives,
2308 forward_propagation->current_forget_activations);
2310 multiply_rows(input_combinations_weights_derivatives,
2311 forward_propagation->current_state_activations);
2313 cell_state_weights_derivatives += input_combinations_weights_derivatives;
2315 multiply_rows(state_combinations_weights_derivatives,
2316 forward_propagation->current_input_activations);
2318 cell_state_weights_derivatives += state_combinations_weights_derivatives;
2320 multiply_rows(forget_combinations_weights_derivatives,
2321 forward_propagation->current_forget_activations_derivatives*forward_propagation->previous_cell_state_activations);
2323 cell_state_weights_derivatives += forget_combinations_weights_derivatives;
2326 memcpy(hidden_states_weights_derivatives.data(),
2327 cell_state_weights_derivatives.data(),
2328 static_cast<size_t>(cell_state_weights_derivatives.size())*
sizeof(type));
2330 multiply_rows(hidden_states_weights_derivatives,
2331 forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
2332 multiply_rows(output_combinations_weights_derivatives,
2333 calculate_activations(forward_propagation->current_cell_state_activations));
2334 hidden_states_weights_derivatives += output_combinations_weights_derivatives;
2336 back_propagation->forget_weights_derivatives += hidden_states_weights_derivatives.contract(current_layer_deltas, A_B);
2338 copy_index += neurons_number;
2343void LongShortTermMemoryLayer::calculate_input_weights_error_gradient(
const Tensor<type, 2>& inputs,
2344 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
2345 LongShortTermMemoryLayerBackPropagation* back_propagation)
const
2347 const Index samples_number = inputs.dimension(0);
2350 const Index parameters_number = inputs_number*neurons_number;
2352 Tensor<type, 2> input_combinations_weights_derivatives(parameters_number, neurons_number);
2353 Tensor<type, 2> forget_combinations_weights_derivatives(parameters_number, neurons_number);
2354 Tensor<type, 2> state_combinations_weights_derivatives(parameters_number, neurons_number);
2355 Tensor<type, 2> output_combinations_weights_derivatives(parameters_number, neurons_number);
2357 Tensor<type, 2> hidden_states_weights_derivatives(parameters_number, neurons_number);
2358 Tensor<type, 2> cell_state_weights_derivatives(parameters_number, neurons_number);
2360 input_combinations_weights_derivatives.setZero();
2361 forget_combinations_weights_derivatives.setZero();
2362 state_combinations_weights_derivatives.setZero();
2363 output_combinations_weights_derivatives.setZero();
2364 hidden_states_weights_derivatives.setZero();
2365 cell_state_weights_derivatives.setZero();
2367 Index column_index = 0;
2368 Index input_index = 0;
2370 Index copy_index = 0;
2372 back_propagation->input_weights_derivatives.setZero();
2374 for(Index sample = 0; sample < samples_number; sample++)
2376 forward_propagation->current_inputs = inputs.chip(sample, 0);
2378 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample,0);
2380 memcpy(forward_propagation->current_forget_activations.data(),
2381 forward_propagation->forget_activations.data()+copy_index,
2382 static_cast<size_t>(neurons_number)*
sizeof(type));
2384 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
2385 forward_propagation->forget_activations_derivatives.data()+copy_index,
2386 static_cast<size_t>(neurons_number)*
sizeof(type));
2388 memcpy(forward_propagation->current_input_activations.data(),
2389 forward_propagation->input_activations.data()+copy_index,
2390 static_cast<size_t>(neurons_number)*
sizeof(type));
2392 memcpy(forward_propagation->current_input_activations_derivatives.data(),
2393 forward_propagation->input_activations_derivatives.data()+copy_index,
2394 static_cast<size_t>(neurons_number)*
sizeof(type));
2396 memcpy(forward_propagation->current_state_activations.data(),
2397 forward_propagation->state_activations.data()+copy_index,
2398 static_cast<size_t>(neurons_number)*
sizeof(type));
2400 memcpy(forward_propagation->current_state_activations_derivatives.data(),
2401 forward_propagation->state_activations_derivatives.data()+copy_index,
2402 static_cast<size_t>(neurons_number)*
sizeof(type));
2404 memcpy(forward_propagation->current_output_activations.data(),
2405 forward_propagation->output_activations.data()+copy_index,
2406 static_cast<size_t>(neurons_number)*
sizeof(type));
2408 memcpy(forward_propagation->current_output_activations_derivatives.data(),
2409 forward_propagation->output_activations_derivatives.data()+copy_index,
2410 static_cast<size_t>(neurons_number)*
sizeof(type));
2412 memcpy(forward_propagation->current_cell_state_activations.data(),
2413 forward_propagation->cell_states_activations.data()+copy_index,
2414 static_cast<size_t>(neurons_number)*
sizeof(type));
2416 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
2417 forward_propagation->hidden_states_activations_derivatives.data()+copy_index,
2418 static_cast<size_t>(neurons_number)*
sizeof(type));
2420 if(sample%timesteps == 0)
2422 forward_propagation->previous_cell_state_activations.setZero();
2424 forget_combinations_weights_derivatives.setZero();
2425 input_combinations_weights_derivatives.setZero();
2426 output_combinations_weights_derivatives.setZero();
2427 state_combinations_weights_derivatives.setZero();
2429 cell_state_weights_derivatives.setZero();
2430 hidden_states_weights_derivatives.setZero();
2434 memcpy(forward_propagation->previous_cell_state_activations.data(),
2435 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
2436 static_cast<size_t>(neurons_number)*
sizeof(type));
2438 forget_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(forget_recurrent_weights, A_B);
2440 multiply_rows(forget_combinations_weights_derivatives,
2441 forward_propagation->current_forget_activations_derivatives);
2443 input_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(input_recurrent_weights, A_B);
2445 state_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(state_recurrent_weights, A_B);
2447 multiply_rows(state_combinations_weights_derivatives,
2448 forward_propagation->current_state_activations_derivatives);
2450 output_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(output_recurrent_weights, A_B);
2452 multiply_rows(output_combinations_weights_derivatives,
2453 forward_propagation->current_output_activations_derivatives);
2459 for(Index i = 0; i < parameters_number; i++)
2461 input_combinations_weights_derivatives(i, column_index) += forward_propagation->current_inputs[input_index];
2465 if(input_index == inputs_number)
2472 multiply_rows(cell_state_weights_derivatives,
2473 forward_propagation->current_forget_activations);
2475 multiply_rows(forget_combinations_weights_derivatives,
2476 forward_propagation->previous_cell_state_activations);
2478 cell_state_weights_derivatives += forget_combinations_weights_derivatives;
2480 multiply_rows(state_combinations_weights_derivatives,
2481 forward_propagation->current_input_activations);
2483 cell_state_weights_derivatives += state_combinations_weights_derivatives;
2485 multiply_rows(input_combinations_weights_derivatives,
2486 forward_propagation->current_input_activations_derivatives*forward_propagation->current_state_activations);
2488 cell_state_weights_derivatives += input_combinations_weights_derivatives;
2490 hidden_states_weights_derivatives = cell_state_weights_derivatives;
2492 multiply_rows(hidden_states_weights_derivatives,
2493 forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
2495 multiply_rows(output_combinations_weights_derivatives,
2496 calculate_activations(forward_propagation->current_cell_state_activations));
2498 hidden_states_weights_derivatives += output_combinations_weights_derivatives;
2500 back_propagation->input_weights_derivatives
2501 += hidden_states_weights_derivatives.contract(back_propagation->current_layer_deltas, A_B);
2503 copy_index += neurons_number;
2508void LongShortTermMemoryLayer::calculate_state_weights_error_gradient(
const Tensor<type, 2>& inputs,
2509 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
2510 LongShortTermMemoryLayerBackPropagation* back_propagation)
const
2512 const Index samples_number = inputs.dimension(0);
2515 const Index parameters_number = inputs_number*neurons_number;
2517 Tensor<type, 2> input_combinations_weights_derivatives(parameters_number, neurons_number);
2518 Tensor<type, 2> forget_combinations_weights_derivatives(parameters_number, neurons_number);
2519 Tensor<type, 2> state_combinations_weights_derivatives(parameters_number, neurons_number);
2520 Tensor<type, 2> output_combinations_weights_derivatives(parameters_number, neurons_number);
2522 Tensor<type, 2> hidden_states_weights_derivatives(parameters_number, neurons_number);
2523 Tensor<type, 2> cell_state_weights_derivatives(parameters_number, neurons_number);
2525 input_combinations_weights_derivatives.setZero();
2526 forget_combinations_weights_derivatives.setZero();
2527 state_combinations_weights_derivatives.setZero();
2528 output_combinations_weights_derivatives.setZero();
2529 hidden_states_weights_derivatives.setZero();
2530 cell_state_weights_derivatives.setZero();
2532 Index column_index = 0;
2533 Index input_index = 0;
2535 Index copy_index = 0;
2537 back_propagation->state_weights_derivatives.setZero();
2539 for(Index sample = 0; sample < samples_number; sample++)
2541 forward_propagation->current_inputs = inputs.chip(sample, 0);
2543 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample,0);
2545 memcpy(forward_propagation->current_forget_activations.data(),
2546 forward_propagation->forget_activations.data()+copy_index,
2547 static_cast<size_t>(neurons_number)*
sizeof(type));
2549 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
2550 forward_propagation->forget_activations_derivatives.data()+copy_index,
2551 static_cast<size_t>(neurons_number)*
sizeof(type));
2553 memcpy(forward_propagation->current_input_activations.data(),
2554 forward_propagation->input_activations.data()+copy_index,
2555 static_cast<size_t>(neurons_number)*
sizeof(type));
2557 memcpy(forward_propagation->current_input_activations_derivatives.data(),
2558 forward_propagation->input_activations_derivatives.data()+copy_index,
2559 static_cast<size_t>(neurons_number)*
sizeof(type));
2561 memcpy(forward_propagation->current_state_activations.data(),
2562 forward_propagation->state_activations.data()+copy_index,
2563 static_cast<size_t>(neurons_number)*
sizeof(type));
2565 memcpy(forward_propagation->current_state_activations_derivatives.data(),
2566 forward_propagation->state_activations_derivatives.data()+copy_index,
2567 static_cast<size_t>(neurons_number)*
sizeof(type));
2569 memcpy(forward_propagation->current_output_activations.data(),
2570 forward_propagation->output_activations.data()+copy_index,
2571 static_cast<size_t>(neurons_number)*
sizeof(type));
2573 memcpy(forward_propagation->current_output_activations_derivatives.data(),
2574 forward_propagation->output_activations_derivatives.data()+copy_index,
2575 static_cast<size_t>(neurons_number)*
sizeof(type));
2577 memcpy(forward_propagation->current_cell_state_activations.data(),
2578 forward_propagation->cell_states_activations.data()+copy_index,
2579 static_cast<size_t>(neurons_number)*
sizeof(type));
2581 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
2582 forward_propagation->hidden_states_activations_derivatives.data()+copy_index,
2583 static_cast<size_t>(neurons_number)*
sizeof(type));
2585 if(sample%timesteps == 0)
2587 forward_propagation->previous_cell_state_activations.setZero();
2589 forget_combinations_weights_derivatives.setZero();
2590 input_combinations_weights_derivatives.setZero();
2591 output_combinations_weights_derivatives.setZero();
2592 state_combinations_weights_derivatives.setZero();
2594 cell_state_weights_derivatives.setZero();
2598 memcpy(forward_propagation->previous_cell_state_activations.data(),
2599 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
2600 static_cast<size_t>(neurons_number)*
sizeof(type));
2602 forget_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(forget_recurrent_weights, A_B);
2603 multiply_rows(forget_combinations_weights_derivatives, forward_propagation->current_forget_activations_derivatives);
2605 input_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(input_recurrent_weights, A_B);
2606 multiply_rows(input_combinations_weights_derivatives, forward_propagation->current_input_activations_derivatives);
2608 state_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(state_recurrent_weights, A_B);
2610 output_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(output_recurrent_weights, A_B);
2611 multiply_rows(output_combinations_weights_derivatives, forward_propagation->current_output_activations_derivatives);
2617 for(Index i = 0; i < parameters_number; i++)
2619 state_combinations_weights_derivatives(i, column_index) += forward_propagation->current_inputs[input_index];
2623 if(input_index == inputs_number)
2630 multiply_rows(cell_state_weights_derivatives, forward_propagation->current_forget_activations);
2631 multiply_rows(forget_combinations_weights_derivatives, forward_propagation->previous_cell_state_activations);
2632 cell_state_weights_derivatives += forget_combinations_weights_derivatives;
2633 multiply_rows(input_combinations_weights_derivatives, forward_propagation->current_state_activations);
2634 cell_state_weights_derivatives += input_combinations_weights_derivatives;
2635 multiply_rows(state_combinations_weights_derivatives, (forward_propagation->current_state_activations_derivatives*forward_propagation->current_input_activations));
2636 cell_state_weights_derivatives += state_combinations_weights_derivatives;
2638 hidden_states_weights_derivatives = cell_state_weights_derivatives;
2639 multiply_rows(hidden_states_weights_derivatives, forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
2640 multiply_rows(output_combinations_weights_derivatives, calculate_activations(forward_propagation->current_cell_state_activations));
2641 hidden_states_weights_derivatives += output_combinations_weights_derivatives;
2643 back_propagation->state_weights_derivatives += hidden_states_weights_derivatives.contract(back_propagation->current_layer_deltas, A_B);
2645 copy_index += neurons_number;
2650void LongShortTermMemoryLayer::calculate_output_weights_error_gradient(
const Tensor<type, 2>& inputs,
2651 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
2652 LongShortTermMemoryLayerBackPropagation* back_propagation)
const
2654 const Index samples_number = inputs.dimension(0);
2657 const Index parameters_number = inputs_number*neurons_number;
2659 Tensor<type, 2> input_combinations_weights_derivatives(parameters_number, neurons_number);
2660 Tensor<type, 2> forget_combinations_weights_derivatives(parameters_number, neurons_number);
2661 Tensor<type, 2> state_combinations_weights_derivatives(parameters_number, neurons_number);
2662 Tensor<type, 2> output_combinations_weights_derivatives(parameters_number, neurons_number);
2664 Tensor<type, 2> hidden_states_weights_derivatives(parameters_number, neurons_number);
2665 Tensor<type, 2> cell_state_weights_derivatives(parameters_number, neurons_number);
2667 input_combinations_weights_derivatives.setZero();
2668 forget_combinations_weights_derivatives.setZero();
2669 state_combinations_weights_derivatives.setZero();
2670 output_combinations_weights_derivatives.setZero();
2671 hidden_states_weights_derivatives.setZero();
2672 cell_state_weights_derivatives.setZero();
2674 Index column_index = 0;
2675 Index input_index = 0;
2677 Index copy_index = 0;
2679 back_propagation->output_weights_derivatives.setZero();
2681 for(Index sample = 0; sample < samples_number; sample++)
2683 forward_propagation->current_inputs = inputs.chip(sample, 0);
2685 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample,0);
2687 memcpy(forward_propagation->current_forget_activations.data(),
2688 forward_propagation->forget_activations.data()+copy_index,
2689 static_cast<size_t>(neurons_number)*
sizeof(type));
2691 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
2692 forward_propagation->forget_activations_derivatives.data()+copy_index,
2693 static_cast<size_t>(neurons_number)*
sizeof(type));
2695 memcpy(forward_propagation->current_input_activations.data(),
2696 forward_propagation->input_activations.data()+copy_index,
2697 static_cast<size_t>(neurons_number)*
sizeof(type));
2699 memcpy(forward_propagation->current_input_activations_derivatives.data(),
2700 forward_propagation->input_activations_derivatives.data()+copy_index,
2701 static_cast<size_t>(neurons_number)*
sizeof(type));
2703 memcpy(forward_propagation->current_state_activations.data(),
2704 forward_propagation->state_activations.data()+copy_index,
2705 static_cast<size_t>(neurons_number)*
sizeof(type));
2707 memcpy(forward_propagation->current_state_activations_derivatives.data(),
2708 forward_propagation->state_activations_derivatives.data()+copy_index,
2709 static_cast<size_t>(neurons_number)*
sizeof(type));
2711 memcpy(forward_propagation->current_output_activations.data(),
2712 forward_propagation->output_activations.data()+copy_index,
2713 static_cast<size_t>(neurons_number)*
sizeof(type));
2715 memcpy(forward_propagation->current_output_activations_derivatives.data(),
2716 forward_propagation->output_activations_derivatives.data()+copy_index,
2717 static_cast<size_t>(neurons_number)*
sizeof(type));
2719 memcpy(forward_propagation->current_cell_state_activations.data(),
2720 forward_propagation->cell_states_activations.data()+copy_index,
2721 static_cast<size_t>(neurons_number)*
sizeof(type));
2723 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
2724 forward_propagation->hidden_states_activations_derivatives.data()+copy_index,
2725 static_cast<size_t>(neurons_number)*
sizeof(type));
2727 if(sample%timesteps == 0)
2729 forward_propagation->previous_cell_state_activations.setZero();
2731 forget_combinations_weights_derivatives.setZero();
2732 input_combinations_weights_derivatives.setZero();
2733 output_combinations_weights_derivatives.setZero();
2734 state_combinations_weights_derivatives.setZero();
2736 cell_state_weights_derivatives.setZero();
2740 memcpy(forward_propagation->previous_cell_state_activations.data(),
2741 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
2742 static_cast<size_t>(neurons_number)*
sizeof(type));
2744 forget_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(forget_recurrent_weights, A_B);
2745 multiply_rows(forget_combinations_weights_derivatives, forward_propagation->current_forget_activations_derivatives);
2746 input_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(input_recurrent_weights, A_B);
2747 multiply_rows(input_combinations_weights_derivatives, forward_propagation->current_input_activations_derivatives);
2748 state_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(state_recurrent_weights, A_B);
2749 multiply_rows(state_combinations_weights_derivatives, forward_propagation->current_state_activations_derivatives);
2750 output_combinations_weights_derivatives = hidden_states_weights_derivatives.contract(output_recurrent_weights, A_B);
2756 for(Index i = 0; i < parameters_number; i++)
2758 output_combinations_weights_derivatives(i, column_index) += forward_propagation->current_inputs[input_index];
2762 if(input_index == inputs_number)
2769 multiply_rows(cell_state_weights_derivatives, forward_propagation->current_forget_activations);
2770 multiply_rows(forget_combinations_weights_derivatives, forward_propagation->previous_cell_state_activations);
2771 cell_state_weights_derivatives += forget_combinations_weights_derivatives;
2772 multiply_rows(state_combinations_weights_derivatives, forward_propagation->current_input_activations);
2773 cell_state_weights_derivatives += state_combinations_weights_derivatives;
2774 multiply_rows(input_combinations_weights_derivatives, forward_propagation->current_state_activations);
2775 cell_state_weights_derivatives += input_combinations_weights_derivatives;
2777 hidden_states_weights_derivatives = cell_state_weights_derivatives;
2778 multiply_rows(hidden_states_weights_derivatives, forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
2779 multiply_rows(output_combinations_weights_derivatives, forward_propagation->current_output_activations_derivatives*calculate_activations(forward_propagation->current_cell_state_activations));
2780 hidden_states_weights_derivatives += output_combinations_weights_derivatives;
2782 back_propagation->output_weights_derivatives += hidden_states_weights_derivatives.contract(back_propagation->current_layer_deltas, A_B);
2784 copy_index += neurons_number;
2789void LongShortTermMemoryLayer::calculate_forget_recurrent_weights_error_gradient(
const Tensor<type, 2>& inputs,
2790 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
2791 LongShortTermMemoryLayerBackPropagation* back_propagation)
const
2793 const Index samples_number = inputs.dimension(0);
2795 const Index parameters_number = neurons_number*neurons_number;
2797 Tensor<type, 1> forget_recurrent_weights_error_gradient(parameters_number);
2798 forget_recurrent_weights_error_gradient.setZero();
2800 Tensor<type, 2> input_combinations_recurrent_weights_derivatives(parameters_number, neurons_number);
2801 Tensor<type, 2> forget_combinations_recurrent_weights_derivatives(parameters_number, neurons_number);
2802 Tensor<type, 2> state_combinations_recurrent_weights_derivatives(parameters_number, neurons_number);
2803 Tensor<type, 2> output_combinations_recurrent_weights_derivatives(parameters_number, neurons_number);
2805 Tensor<type, 2> hidden_states_recurrent_weights_derivatives(parameters_number, neurons_number);
2806 Tensor<type, 2> cell_state_recurrent_weights_derivatives(parameters_number, neurons_number);
2808 Index column_index = 0;
2809 Index activation_index = 0;
2811 Index copy_index = 0;
2813 back_propagation->forget_recurrent_weights_derivatives.setZero();
2815 for(Index sample = 0; sample < samples_number; sample++)
2817 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample, 0);
2819 memcpy(forward_propagation->current_forget_activations.data(),
2820 forward_propagation->forget_activations.data() + copy_index,
static_cast<size_t>(neurons_number)*
sizeof(type));
2822 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
2823 forward_propagation->forget_activations_derivatives.data() + copy_index,
2824 static_cast<size_t>(neurons_number)*
sizeof(type));
2826 memcpy(forward_propagation->current_input_activations.data(),
2827 forward_propagation->input_activations.data() + copy_index,
2828 static_cast<size_t>(neurons_number)*
sizeof(type));
2830 memcpy(forward_propagation->current_input_activations_derivatives.data(),
2831 forward_propagation->input_activations_derivatives.data() + copy_index,
2832 static_cast<size_t>(neurons_number)*
sizeof(type));
2834 memcpy(forward_propagation->current_state_activations.data(),
2835 forward_propagation->state_activations.data() + copy_index,
2836 static_cast<size_t>(neurons_number)*
sizeof(type));
2838 memcpy(forward_propagation->current_state_activations_derivatives.data(),
2839 forward_propagation->state_activations_derivatives.data() + copy_index,
2840 static_cast<size_t>(neurons_number)*
sizeof(type));
2842 memcpy(forward_propagation->current_output_activations.data(),
2843 forward_propagation->output_activations.data() + copy_index,
2844 static_cast<size_t>(neurons_number)*
sizeof(type));
2846 memcpy(forward_propagation->current_output_activations_derivatives.data(),
2847 forward_propagation->output_activations_derivatives.data() + copy_index,
2848 static_cast<size_t>(neurons_number)*
sizeof(type));
2850 memcpy(forward_propagation->current_cell_state_activations.data(),
2851 forward_propagation->cell_states_activations.data() + copy_index,
2852 static_cast<size_t>(neurons_number)*
sizeof(type));
2854 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
2855 forward_propagation->hidden_states_activations_derivatives.data() + copy_index,
2856 static_cast<size_t>(neurons_number)*
sizeof(type));
2858 if(sample%timesteps == 0)
2860 cell_state_recurrent_weights_derivatives.setZero();
2861 hidden_states_recurrent_weights_derivatives.setZero();
2865 memcpy(forward_propagation->previous_hidden_state_activations.data(),
2866 forward_propagation->hidden_states_activations.data() + (copy_index-neurons_number),
2867 static_cast<size_t>(neurons_number)*
sizeof(type));
2869 memcpy(forward_propagation->previous_cell_state_activations.data(),
2870 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
2871 static_cast<size_t>(neurons_number)*
sizeof(type));
2873 forget_combinations_recurrent_weights_derivatives = hidden_states_recurrent_weights_derivatives.contract(forget_recurrent_weights, A_B);
2874 input_combinations_recurrent_weights_derivatives = hidden_states_recurrent_weights_derivatives.contract(input_recurrent_weights, A_B);
2875 multiply_rows(input_combinations_recurrent_weights_derivatives, forward_propagation->current_input_activations_derivatives);
2876 state_combinations_recurrent_weights_derivatives = hidden_states_recurrent_weights_derivatives.contract(state_recurrent_weights, A_B);
2877 multiply_rows(state_combinations_recurrent_weights_derivatives, forward_propagation->current_state_activations_derivatives);
2878 output_combinations_recurrent_weights_derivatives = hidden_states_recurrent_weights_derivatives.contract(output_recurrent_weights, A_B);
2879 multiply_rows(output_combinations_recurrent_weights_derivatives, forward_propagation->current_output_activations_derivatives);
2882 activation_index = 0;
2884 for(Index i = 0; i < parameters_number; i++)
2886 forget_combinations_recurrent_weights_derivatives(i, column_index) += forward_propagation->previous_hidden_state_activations[activation_index];
2890 if(activation_index == neurons_number)
2892 activation_index = 0;
2897 multiply_rows(cell_state_recurrent_weights_derivatives, forward_propagation->current_forget_activations);
2898 multiply_rows(input_combinations_recurrent_weights_derivatives, forward_propagation->current_state_activations);
2899 cell_state_recurrent_weights_derivatives += input_combinations_recurrent_weights_derivatives;
2900 multiply_rows(state_combinations_recurrent_weights_derivatives, forward_propagation->current_input_activations);
2901 cell_state_recurrent_weights_derivatives += state_combinations_recurrent_weights_derivatives;
2902 multiply_rows(forget_combinations_recurrent_weights_derivatives, (forward_propagation->current_forget_activations_derivatives*forward_propagation->previous_cell_state_activations));
2903 cell_state_recurrent_weights_derivatives += forget_combinations_recurrent_weights_derivatives;
2905 hidden_states_recurrent_weights_derivatives = cell_state_recurrent_weights_derivatives;
2906 multiply_rows(hidden_states_recurrent_weights_derivatives, forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
2907 multiply_rows(output_combinations_recurrent_weights_derivatives, calculate_activations(forward_propagation->current_cell_state_activations));
2908 hidden_states_recurrent_weights_derivatives += output_combinations_recurrent_weights_derivatives;
2911 back_propagation->forget_recurrent_weights_derivatives += hidden_states_recurrent_weights_derivatives.contract(back_propagation->current_layer_deltas, A_B);
2913 copy_index += neurons_number;
2918void LongShortTermMemoryLayer::calculate_input_recurrent_weights_error_gradient(
const Tensor<type, 2>& inputs,
2919 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
2920 LongShortTermMemoryLayerBackPropagation* back_propagation)
const
2922 const Index samples_number = inputs.dimension(0);
2924 const Index parameters_number = neurons_number*neurons_number;
2926 Tensor<type, 1> forget_recurrent_weights_error_gradient(parameters_number);
2927 forget_recurrent_weights_error_gradient.setZero();
2929 Tensor<type, 2> input_combinations_recurrent_weights_derivatives(parameters_number, neurons_number);
2930 Tensor<type, 2> forget_combinations_recurrent_weights_derivatives(parameters_number, neurons_number);
2931 Tensor<type, 2> state_combinations_recurrent_weights_derivatives(parameters_number, neurons_number);
2932 Tensor<type, 2> output_combinations_recurrent_weights_derivatives(parameters_number, neurons_number);
2934 Tensor<type, 2> hidden_states_recurrent_weights_derivatives(parameters_number, neurons_number);
2935 Tensor<type, 2> cell_state_recurrent_weights_derivatives(parameters_number, neurons_number);
2937 Index column_index = 0;
2938 Index activation_index = 0;
2940 Index copy_index = 0;
2942 back_propagation->input_recurrent_weights_derivatives.setZero();
2944 for(Index sample = 0; sample < samples_number; sample++)
2946 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample, 0);
2948 memcpy(forward_propagation->current_forget_activations.data(),
2949 forward_propagation->forget_activations.data() + copy_index,
static_cast<size_t>(neurons_number)*
sizeof(type));
2950 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
2951 forward_propagation->forget_activations_derivatives.data() + copy_index,
2952 static_cast<size_t>(neurons_number)*
sizeof(type));
2954 memcpy(forward_propagation->current_input_activations.data(),
2955 forward_propagation->input_activations.data() + copy_index,
2956 static_cast<size_t>(neurons_number)*
sizeof(type));
2957 memcpy(forward_propagation->current_input_activations_derivatives.data(),
2958 forward_propagation->input_activations_derivatives.data() + copy_index,
2959 static_cast<size_t>(neurons_number)*
sizeof(type));
2961 memcpy(forward_propagation->current_state_activations.data(),
2962 forward_propagation->state_activations.data() + copy_index,
2963 static_cast<size_t>(neurons_number)*
sizeof(type));
2964 memcpy(forward_propagation->current_state_activations_derivatives.data(),
2965 forward_propagation->state_activations_derivatives.data() + copy_index,
2966 static_cast<size_t>(neurons_number)*
sizeof(type));
2968 memcpy(forward_propagation->current_output_activations.data(),
2969 forward_propagation->output_activations.data() + copy_index,
2970 static_cast<size_t>(neurons_number)*
sizeof(type));
2971 memcpy(forward_propagation->current_output_activations_derivatives.data(),
2972 forward_propagation->output_activations_derivatives.data() + copy_index,
2973 static_cast<size_t>(neurons_number)*
sizeof(type));
2975 memcpy(forward_propagation->current_cell_state_activations.data(),
2976 forward_propagation->cell_states_activations.data() + copy_index,
2977 static_cast<size_t>(neurons_number)*
sizeof(type));
2979 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
2980 forward_propagation->hidden_states_activations_derivatives.data() + copy_index,
2981 static_cast<size_t>(neurons_number)*
sizeof(type));
2983 if(sample%timesteps == 0)
2985 cell_state_recurrent_weights_derivatives.setZero();
2986 hidden_states_recurrent_weights_derivatives.setZero();
2990 memcpy(forward_propagation->previous_hidden_state_activations.data(),
2991 forward_propagation->hidden_states_activations.data() + (copy_index-neurons_number),
2992 static_cast<size_t>(neurons_number)*
sizeof(type));
2994 memcpy(forward_propagation->previous_cell_state_activations.data(),
2995 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
2996 static_cast<size_t>(neurons_number)*
sizeof(type));
2998 forget_combinations_recurrent_weights_derivatives = hidden_states_recurrent_weights_derivatives.contract(forget_recurrent_weights, A_B);
2999 multiply_rows(forget_combinations_recurrent_weights_derivatives, forward_propagation->current_forget_activations_derivatives);
3000 input_combinations_recurrent_weights_derivatives = hidden_states_recurrent_weights_derivatives.contract(input_recurrent_weights, A_B);
3001 state_combinations_recurrent_weights_derivatives = hidden_states_recurrent_weights_derivatives.contract(state_recurrent_weights, A_B);
3002 multiply_rows(state_combinations_recurrent_weights_derivatives, forward_propagation->current_state_activations_derivatives);
3003 output_combinations_recurrent_weights_derivatives = hidden_states_recurrent_weights_derivatives.contract(output_recurrent_weights, A_B);
3004 multiply_rows(output_combinations_recurrent_weights_derivatives, forward_propagation->current_output_activations_derivatives);
3007 activation_index = 0;
3009 for(Index i = 0; i < parameters_number; i++)
3011 input_combinations_recurrent_weights_derivatives(i, column_index) += forward_propagation->previous_hidden_state_activations[activation_index];
3015 if(activation_index == neurons_number)
3017 activation_index = 0;
3022 multiply_rows(cell_state_recurrent_weights_derivatives, forward_propagation->current_forget_activations);
3023 multiply_rows(input_combinations_recurrent_weights_derivatives, forward_propagation->current_input_activations_derivatives*forward_propagation->current_state_activations);
3024 cell_state_recurrent_weights_derivatives += input_combinations_recurrent_weights_derivatives;
3025 multiply_rows(state_combinations_recurrent_weights_derivatives, forward_propagation->current_input_activations);
3026 cell_state_recurrent_weights_derivatives += state_combinations_recurrent_weights_derivatives;
3027 multiply_rows(forget_combinations_recurrent_weights_derivatives, forward_propagation->previous_cell_state_activations);
3028 cell_state_recurrent_weights_derivatives += forget_combinations_recurrent_weights_derivatives;
3030 hidden_states_recurrent_weights_derivatives = cell_state_recurrent_weights_derivatives;
3031 multiply_rows(hidden_states_recurrent_weights_derivatives, forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
3032 multiply_rows(output_combinations_recurrent_weights_derivatives, calculate_activations(forward_propagation->current_cell_state_activations));
3033 hidden_states_recurrent_weights_derivatives += output_combinations_recurrent_weights_derivatives;
3036 back_propagation->input_recurrent_weights_derivatives += hidden_states_recurrent_weights_derivatives.contract(back_propagation->current_layer_deltas, A_B);
3038 copy_index += neurons_number;
3043void LongShortTermMemoryLayer::calculate_state_recurrent_weights_error_gradient(
const Tensor<type, 2>& inputs,
3044 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
3045 LongShortTermMemoryLayerBackPropagation* back_propagation)
const
3047 const Index samples_number = inputs.dimension(0);
3049 const Index parameters_number = neurons_number*neurons_number;
3051 Tensor<type, 1> forget_recurrent_weights_error_gradient(parameters_number);
3052 forget_recurrent_weights_error_gradient.setZero();
3054 Index column_index = 0;
3055 Index activation_index = 0;
3057 Index copy_index = 0;
3059 back_propagation->state_recurrent_weights_derivatives.setZero();
3061 for(Index sample = 0; sample < samples_number; sample++)
3063 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample, 0);
3065 memcpy(forward_propagation->current_forget_activations.data(),
3066 forward_propagation->forget_activations.data() + copy_index,
static_cast<size_t>(neurons_number)*
sizeof(type));
3067 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
3068 forward_propagation->forget_activations_derivatives.data() + copy_index,
3069 static_cast<size_t>(neurons_number)*
sizeof(type));
3071 memcpy(forward_propagation->current_input_activations.data(),
3072 forward_propagation->input_activations.data() + copy_index,
3073 static_cast<size_t>(neurons_number)*
sizeof(type));
3074 memcpy(forward_propagation->current_input_activations_derivatives.data(),
3075 forward_propagation->input_activations_derivatives.data() + copy_index,
3076 static_cast<size_t>(neurons_number)*
sizeof(type));
3078 memcpy(forward_propagation->current_state_activations.data(),
3079 forward_propagation->state_activations.data() + copy_index,
3080 static_cast<size_t>(neurons_number)*
sizeof(type));
3081 memcpy(forward_propagation->current_state_activations_derivatives.data(),
3082 forward_propagation->state_activations_derivatives.data() + copy_index,
3083 static_cast<size_t>(neurons_number)*
sizeof(type));
3085 memcpy(forward_propagation->current_output_activations.data(),
3086 forward_propagation->output_activations.data() + copy_index,
3087 static_cast<size_t>(neurons_number)*
sizeof(type));
3088 memcpy(forward_propagation->current_output_activations_derivatives.data(),
3089 forward_propagation->output_activations_derivatives.data() + copy_index,
3090 static_cast<size_t>(neurons_number)*
sizeof(type));
3092 memcpy(forward_propagation->current_cell_state_activations.data(),
3093 forward_propagation->cell_states_activations.data() + copy_index,
3094 static_cast<size_t>(neurons_number)*
sizeof(type));
3096 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
3097 forward_propagation->hidden_states_activations_derivatives.data() + copy_index,
3098 static_cast<size_t>(neurons_number)*
sizeof(type));
3100 if(sample%timesteps == 0)
3102 back_propagation->cell_state_recurrent_weights_derivatives.setZero();
3103 back_propagation->hidden_states_recurrent_weights_derivatives.setZero();
3107 memcpy(forward_propagation->previous_hidden_state_activations.data(),
3108 forward_propagation->hidden_states_activations.data() + (copy_index-neurons_number),
3109 static_cast<size_t>(neurons_number)*
sizeof(type));
3111 memcpy(forward_propagation->previous_cell_state_activations.data(),
3112 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
3113 static_cast<size_t>(neurons_number)*
sizeof(type));
3115 back_propagation->forget_combinations_recurrent_weights_derivatives = back_propagation->hidden_states_recurrent_weights_derivatives.contract(forget_recurrent_weights, A_B);
3116 multiply_rows(back_propagation->forget_combinations_recurrent_weights_derivatives, forward_propagation->current_forget_activations_derivatives);
3117 back_propagation->input_combinations_recurrent_weights_derivatives = back_propagation->hidden_states_recurrent_weights_derivatives.contract(input_recurrent_weights, A_B);
3118 multiply_rows(back_propagation->input_combinations_recurrent_weights_derivatives, forward_propagation->current_input_activations_derivatives);
3119 back_propagation->state_combinations_recurrent_weights_derivatives = back_propagation->hidden_states_recurrent_weights_derivatives.contract(state_recurrent_weights, A_B);
3120 back_propagation->state_combinations_recurrent_weights_derivatives = back_propagation->hidden_states_recurrent_weights_derivatives.contract(output_recurrent_weights, A_B);
3121 multiply_rows(back_propagation->state_combinations_recurrent_weights_derivatives, forward_propagation->current_output_activations_derivatives);
3124 activation_index = 0;
3126 for(Index i = 0; i < parameters_number; i++)
3128 back_propagation->state_combinations_recurrent_weights_derivatives(i, column_index) += forward_propagation->previous_hidden_state_activations[activation_index];
3132 if(activation_index == neurons_number)
3134 activation_index = 0;
3139 multiply_rows(back_propagation->cell_state_recurrent_weights_derivatives, forward_propagation->current_forget_activations);
3140 multiply_rows(back_propagation->input_combinations_recurrent_weights_derivatives, forward_propagation->current_state_activations);
3141 back_propagation->cell_state_recurrent_weights_derivatives += back_propagation->input_combinations_recurrent_weights_derivatives;
3142 multiply_rows(back_propagation->state_combinations_recurrent_weights_derivatives, forward_propagation->current_state_activations_derivatives*forward_propagation->current_input_activations);
3143 back_propagation->cell_state_recurrent_weights_derivatives += back_propagation->state_combinations_recurrent_weights_derivatives;
3144 multiply_rows(back_propagation->forget_combinations_recurrent_weights_derivatives, forward_propagation->previous_cell_state_activations);
3145 back_propagation->cell_state_recurrent_weights_derivatives += back_propagation->forget_combinations_recurrent_weights_derivatives;
3147 back_propagation->hidden_states_recurrent_weights_derivatives = back_propagation->cell_state_recurrent_weights_derivatives;
3148 multiply_rows(back_propagation->hidden_states_recurrent_weights_derivatives, forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
3149 multiply_rows(back_propagation->state_combinations_recurrent_weights_derivatives, calculate_activations(forward_propagation->current_cell_state_activations));
3150 back_propagation->hidden_states_recurrent_weights_derivatives += back_propagation->state_combinations_recurrent_weights_derivatives;
3153 back_propagation->state_recurrent_weights_derivatives += back_propagation->hidden_states_recurrent_weights_derivatives.contract(back_propagation->current_layer_deltas, A_B);
3155 copy_index += neurons_number;
3160void LongShortTermMemoryLayer::calculate_output_recurrent_weights_error_gradient(
const Tensor<type, 2>& inputs,
3161 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
3162 LongShortTermMemoryLayerBackPropagation* back_propagation)
const
3164 const Index samples_number = inputs.dimension(0);
3166 const Index parameters_number = neurons_number*neurons_number;
3168 Index column_index = 0;
3169 Index activation_index = 0;
3171 Index copy_index = 0;
3173 back_propagation->output_recurrent_weights_derivatives.setZero();
3175 for(Index sample = 0; sample < samples_number; sample++)
3177 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample, 0);
3179 memcpy(forward_propagation->current_forget_activations.data(),
3180 forward_propagation->forget_activations.data()+copy_index,
3181 static_cast<size_t>(neurons_number)*
sizeof(type));
3183 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
3184 forward_propagation->forget_activations_derivatives.data()+copy_index,
3185 static_cast<size_t>(neurons_number)*
sizeof(type));
3187 memcpy(forward_propagation->current_input_activations.data(),
3188 forward_propagation->input_activations.data()+copy_index,
3189 static_cast<size_t>(neurons_number)*
sizeof(type));
3191 memcpy(forward_propagation->current_input_activations_derivatives.data(),
3192 forward_propagation->input_activations_derivatives.data()+copy_index,
3193 static_cast<size_t>(neurons_number)*
sizeof(type));
3195 memcpy(forward_propagation->current_state_activations.data(),
3196 forward_propagation->state_activations.data()+copy_index,
3197 static_cast<size_t>(neurons_number)*
sizeof(type));
3199 memcpy(forward_propagation->current_state_activations_derivatives.data(),
3200 forward_propagation->state_activations_derivatives.data()+copy_index,
3201 static_cast<size_t>(neurons_number)*
sizeof(type));
3203 memcpy(forward_propagation->current_output_activations.data(),
3204 forward_propagation->output_activations.data()+copy_index,
3205 static_cast<size_t>(neurons_number)*
sizeof(type));
3207 memcpy(forward_propagation->current_output_activations_derivatives.data(),
3208 forward_propagation->output_activations_derivatives.data()+copy_index,
3209 static_cast<size_t>(neurons_number)*
sizeof(type));
3211 memcpy(forward_propagation->current_cell_state_activations.data(),
3212 forward_propagation->cell_states_activations.data()+copy_index,
3213 static_cast<size_t>(neurons_number)*
sizeof(type));
3215 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
3216 forward_propagation->hidden_states_activations_derivatives.data()+copy_index,
3217 static_cast<size_t>(neurons_number)*
sizeof(type));
3219 if(sample%timesteps == 0)
3221 back_propagation->cell_state_recurrent_weights_derivatives.setZero();
3222 back_propagation->hidden_states_recurrent_weights_derivatives.setZero();
3226 memcpy(forward_propagation->previous_hidden_state_activations.data(),
3227 forward_propagation->hidden_states_activations.data() + (copy_index-neurons_number),
3228 static_cast<size_t>(neurons_number)*
sizeof(type));
3230 memcpy(forward_propagation->previous_cell_state_activations.data(),
3231 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
3232 static_cast<size_t>(neurons_number)*
sizeof(type));
3234 back_propagation->forget_combinations_recurrent_weights_derivatives = back_propagation->hidden_states_recurrent_weights_derivatives.contract(forget_recurrent_weights, A_B);
3235 multiply_rows(back_propagation->forget_combinations_recurrent_weights_derivatives, forward_propagation->current_forget_activations_derivatives);
3236 back_propagation->input_combinations_recurrent_weights_derivatives = back_propagation->hidden_states_recurrent_weights_derivatives.contract(input_recurrent_weights, A_B);
3237 multiply_rows(back_propagation->input_combinations_recurrent_weights_derivatives, forward_propagation->current_input_activations_derivatives);
3238 back_propagation->state_combinations_recurrent_weights_derivatives = back_propagation->hidden_states_recurrent_weights_derivatives.contract(state_recurrent_weights, A_B);
3239 multiply_rows(back_propagation->state_combinations_recurrent_weights_derivatives, forward_propagation->current_state_activations_derivatives);
3240 back_propagation->output_combinations_recurrent_weights_derivatives = back_propagation->hidden_states_recurrent_weights_derivatives.contract(output_recurrent_weights, A_B);
3243 activation_index = 0;
3245 for(Index i = 0; i < parameters_number; i++)
3247 back_propagation->output_combinations_recurrent_weights_derivatives(i, column_index) += forward_propagation->previous_hidden_state_activations[activation_index];
3251 if(activation_index == neurons_number)
3253 activation_index = 0;
3258 multiply_rows(back_propagation->cell_state_recurrent_weights_derivatives, forward_propagation->current_forget_activations);
3259 multiply_rows(back_propagation->input_combinations_recurrent_weights_derivatives, forward_propagation->current_state_activations);
3260 back_propagation->cell_state_recurrent_weights_derivatives += back_propagation->input_combinations_recurrent_weights_derivatives;
3261 multiply_rows(back_propagation->state_combinations_recurrent_weights_derivatives, forward_propagation->current_input_activations);
3262 back_propagation->cell_state_recurrent_weights_derivatives += back_propagation->state_combinations_recurrent_weights_derivatives;
3263 multiply_rows(back_propagation->forget_combinations_recurrent_weights_derivatives, forward_propagation->previous_cell_state_activations);
3264 back_propagation->cell_state_recurrent_weights_derivatives += back_propagation->forget_combinations_recurrent_weights_derivatives;
3266 back_propagation->hidden_states_recurrent_weights_derivatives = back_propagation->cell_state_recurrent_weights_derivatives;
3267 multiply_rows(back_propagation->cell_state_recurrent_weights_derivatives, forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
3268 multiply_rows(back_propagation->output_combinations_recurrent_weights_derivatives, forward_propagation->current_output_activations_derivatives*calculate_activations(forward_propagation->current_cell_state_activations));
3269 back_propagation->hidden_states_recurrent_weights_derivatives += back_propagation->output_combinations_recurrent_weights_derivatives;
3272 back_propagation->output_recurrent_weights_derivatives += back_propagation->hidden_states_recurrent_weights_derivatives.contract(back_propagation->current_layer_deltas, A_B);
3274 copy_index += neurons_number;
3279void LongShortTermMemoryLayer::calculate_forget_biases_error_gradient(
const Tensor<type, 2>& inputs,
3280 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
3281 LongShortTermMemoryLayerBackPropagation* back_propagation)
const
3283 const Index samples_number = inputs.dimension(0);
3285 const Index parameters_number = neurons_number;
3287 back_propagation->input_combinations_biases_derivatives.setZero();
3288 back_propagation->forget_combinations_biases_derivatives.setZero();
3289 back_propagation->state_combinations_biases_derivatives.setZero();
3290 back_propagation->output_combinations_biases_derivatives.setZero();
3292 back_propagation->hidden_states_biases_derivatives.setZero();
3293 back_propagation->cell_state_biases_derivatives.setZero();
3295 Index copy_index = 0;
3297 back_propagation->forget_biases_derivatives.setZero();
3299 for(Index sample = 0; sample < samples_number; sample++)
3301 const Tensor<type, 1> current_layer_deltas = back_propagation->delta.chip(sample, 0);
3303 memcpy(forward_propagation->current_forget_activations.data(),
3304 forward_propagation->forget_activations.data()+copy_index,
3305 static_cast<size_t>(neurons_number)*
sizeof(type));
3307 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
3308 forward_propagation->forget_activations_derivatives.data()+copy_index,
3309 static_cast<size_t>(neurons_number)*
sizeof(type));
3311 memcpy(forward_propagation->current_input_activations.data(),
3312 forward_propagation->input_activations.data()+copy_index,
3313 static_cast<size_t>(neurons_number)*
sizeof(type));
3315 memcpy(forward_propagation->current_input_activations_derivatives.data(),
3316 forward_propagation->input_activations_derivatives.data()+copy_index,
3317 static_cast<size_t>(neurons_number)*
sizeof(type));
3319 memcpy(forward_propagation->current_state_activations.data(),
3320 forward_propagation->state_activations.data()+copy_index,
3321 static_cast<size_t>(neurons_number)*
sizeof(type));
3323 memcpy(forward_propagation->current_state_activations_derivatives.data(),
3324 forward_propagation->state_activations_derivatives.data()+copy_index,
3325 static_cast<size_t>(neurons_number)*
sizeof(type));
3327 memcpy(forward_propagation->current_output_activations.data(),
3328 forward_propagation->output_activations.data()+copy_index,
3329 static_cast<size_t>(neurons_number)*
sizeof(type));
3331 memcpy(forward_propagation->current_output_activations_derivatives.data(),
3332 forward_propagation->output_activations_derivatives.data()+copy_index,
3333 static_cast<size_t>(neurons_number)*
sizeof(type));
3335 memcpy(forward_propagation->current_cell_state_activations.data(),
3336 forward_propagation->cell_states_activations.data()+copy_index,
3337 static_cast<size_t>(neurons_number)*
sizeof(type));
3339 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
3340 forward_propagation->hidden_states_activations_derivatives.data()+copy_index,
3341 static_cast<size_t>(neurons_number)*
sizeof(type));
3343 if(sample%timesteps == 0)
3345 back_propagation->forget_combinations_biases_derivatives.setZero();
3346 back_propagation->input_combinations_biases_derivatives.setZero();
3347 back_propagation->state_combinations_biases_derivatives.setZero();
3348 back_propagation->output_combinations_biases_derivatives.setZero();
3350 forward_propagation->previous_cell_state_activations.setZero();
3352 back_propagation->cell_state_biases_derivatives.setZero();
3356 memcpy(forward_propagation->previous_cell_state_activations.data(),
3357 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
3358 static_cast<size_t>(neurons_number)*
sizeof(type));
3360 back_propagation->forget_combinations_biases_derivatives
3361 = back_propagation->hidden_states_biases_derivatives.contract(forget_recurrent_weights, A_B);
3363 back_propagation->input_combinations_biases_derivatives
3364 = back_propagation->hidden_states_biases_derivatives.contract(input_recurrent_weights, A_B);
3366 multiply_rows(back_propagation->input_combinations_biases_derivatives,
3367 forward_propagation->current_input_activations_derivatives);
3369 back_propagation->state_combinations_biases_derivatives
3370 = back_propagation->hidden_states_biases_derivatives.contract(state_recurrent_weights, A_B);
3372 multiply_rows(back_propagation->state_combinations_biases_derivatives,
3373 forward_propagation->current_state_activations_derivatives);
3375 back_propagation->output_combinations_biases_derivatives
3376 = back_propagation->hidden_states_biases_derivatives.contract(output_recurrent_weights, A_B);
3378 multiply_rows(back_propagation->output_combinations_biases_derivatives,
3379 forward_propagation->current_output_activations_derivatives);
3382 for(Index row = 0; row < parameters_number; row++) back_propagation->forget_combinations_biases_derivatives(row, row) +=
static_cast<type
>(1.0);
3384 multiply_rows(back_propagation->cell_state_biases_derivatives,
3385 forward_propagation->current_forget_activations);
3387 multiply_rows(back_propagation->input_combinations_biases_derivatives,
3388 forward_propagation->current_state_activations);
3390 back_propagation->cell_state_biases_derivatives += back_propagation->input_combinations_biases_derivatives;
3392 multiply_rows(back_propagation->state_combinations_biases_derivatives,
3393 forward_propagation->current_input_activations);
3395 back_propagation->cell_state_biases_derivatives += back_propagation->state_combinations_biases_derivatives;
3397 multiply_rows(back_propagation->forget_combinations_biases_derivatives,
3398 forward_propagation->current_forget_activations_derivatives*forward_propagation->previous_cell_state_activations);
3400 back_propagation->cell_state_biases_derivatives += back_propagation->forget_combinations_biases_derivatives;
3402 back_propagation->hidden_states_biases_derivatives = back_propagation->cell_state_biases_derivatives;
3404 multiply_rows(back_propagation->hidden_states_biases_derivatives,
3405 forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
3407 multiply_rows(back_propagation->output_combinations_biases_derivatives,
3408 calculate_activations(forward_propagation->current_cell_state_activations));
3410 back_propagation->hidden_states_biases_derivatives += back_propagation->output_combinations_biases_derivatives;
3412 back_propagation->forget_biases_derivatives += back_propagation->hidden_states_biases_derivatives.contract(current_layer_deltas, A_B);
3414 copy_index += neurons_number;
3419void LongShortTermMemoryLayer::calculate_input_biases_error_gradient(
const Tensor<type, 2>& inputs,
3420 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
3421 LongShortTermMemoryLayerBackPropagation* back_propagation)
const
3423 const Index samples_number = inputs.dimension(0);
3425 const Index parameters_number = neurons_number;
3427 back_propagation->input_combinations_biases_derivatives.setZero();
3428 back_propagation->forget_combinations_biases_derivatives.setZero();
3429 back_propagation->state_combinations_biases_derivatives.setZero();
3430 back_propagation->output_combinations_biases_derivatives.setZero();
3432 back_propagation->hidden_states_biases_derivatives.setZero();
3433 back_propagation->cell_state_biases_derivatives.setZero();
3435 Tensor<type, 1> previous_cell_state_activations(neurons_number);
3437 Index copy_index = 0;
3439 back_propagation->input_biases_derivatives.setZero();
3441 for(Index sample = 0; sample < samples_number; sample++)
3443 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample, 0);
3445 memcpy(forward_propagation->current_forget_activations.data(),
3446 forward_propagation->forget_activations.data()+copy_index,
3447 static_cast<size_t>(neurons_number)*
sizeof(type));
3449 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
3450 forward_propagation->forget_activations_derivatives.data()+copy_index,
3451 static_cast<size_t>(neurons_number)*
sizeof(type));
3453 memcpy(forward_propagation->current_input_activations.data(),
3454 forward_propagation->input_activations.data()+copy_index,
3455 static_cast<size_t>(neurons_number)*
sizeof(type));
3457 memcpy(forward_propagation->current_input_activations_derivatives.data(),
3458 forward_propagation->input_activations_derivatives.data()+copy_index,
3459 static_cast<size_t>(neurons_number)*
sizeof(type));
3461 memcpy(forward_propagation->current_state_activations.data(),
3462 forward_propagation->state_activations.data()+copy_index,
3463 static_cast<size_t>(neurons_number)*
sizeof(type));
3465 memcpy(forward_propagation->current_state_activations_derivatives.data(),
3466 forward_propagation->state_activations_derivatives.data()+copy_index,
3467 static_cast<size_t>(neurons_number)*
sizeof(type));
3469 memcpy(forward_propagation->current_output_activations.data(),
3470 forward_propagation->output_activations.data()+copy_index,
3471 static_cast<size_t>(neurons_number)*
sizeof(type));
3473 memcpy(forward_propagation->current_output_activations_derivatives.data(),
3474 forward_propagation->output_activations_derivatives.data()+copy_index,
3475 static_cast<size_t>(neurons_number)*
sizeof(type));
3477 memcpy(forward_propagation->current_cell_state_activations.data(),
3478 forward_propagation->cell_states_activations.data()+copy_index,
3479 static_cast<size_t>(neurons_number)*
sizeof(type));
3481 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
3482 forward_propagation->hidden_states_activations_derivatives.data()+copy_index,
3483 static_cast<size_t>(neurons_number)*
sizeof(type));
3485 if(sample%timesteps == 0)
3487 back_propagation->forget_combinations_biases_derivatives.setZero();
3488 back_propagation->input_combinations_biases_derivatives.setZero();
3489 back_propagation->state_combinations_biases_derivatives.setZero();
3490 back_propagation->output_combinations_biases_derivatives.setZero();
3492 previous_cell_state_activations.setZero();
3493 back_propagation->cell_state_biases_derivatives.setZero();
3497 memcpy(previous_cell_state_activations.data(),
3498 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
3499 static_cast<size_t>(neurons_number)*
sizeof(type));
3501 back_propagation->forget_combinations_biases_derivatives
3502 = back_propagation->hidden_states_biases_derivatives.contract(forget_recurrent_weights, A_B);
3504 multiply_rows(back_propagation->forget_combinations_biases_derivatives,
3505 forward_propagation->current_forget_activations_derivatives);
3507 back_propagation->input_combinations_biases_derivatives
3508 = back_propagation->hidden_states_biases_derivatives.contract(input_recurrent_weights, A_B);
3510 back_propagation->state_combinations_biases_derivatives
3511 = back_propagation->hidden_states_biases_derivatives.contract(state_recurrent_weights, A_B);
3513 multiply_rows(back_propagation->state_combinations_biases_derivatives,
3514 forward_propagation->current_state_activations_derivatives);
3516 back_propagation->output_combinations_biases_derivatives
3517 = back_propagation->hidden_states_biases_derivatives.contract(output_recurrent_weights, A_B);
3519 multiply_rows(back_propagation->output_combinations_biases_derivatives,
3520 forward_propagation->current_output_activations_derivatives);
3523 for(Index row = 0; row < parameters_number; row++)
3524 back_propagation->input_combinations_biases_derivatives(row, row) +=
static_cast<type
>(1.0);
3526 multiply_rows(back_propagation->cell_state_biases_derivatives,
3527 forward_propagation->current_forget_activations);
3529 multiply_rows(back_propagation->forget_combinations_biases_derivatives, previous_cell_state_activations);
3531 back_propagation->cell_state_biases_derivatives += back_propagation->forget_combinations_biases_derivatives;
3533 multiply_rows(back_propagation->state_combinations_biases_derivatives,
3534 forward_propagation->current_input_activations);
3536 back_propagation->cell_state_biases_derivatives += back_propagation->state_combinations_biases_derivatives;
3538 multiply_rows(back_propagation->input_combinations_biases_derivatives,
3539 forward_propagation->current_input_activations_derivatives*forward_propagation->current_state_activations);
3541 back_propagation->cell_state_biases_derivatives += back_propagation->input_combinations_biases_derivatives;
3543 back_propagation->hidden_states_biases_derivatives = back_propagation->cell_state_biases_derivatives;
3545 multiply_rows(back_propagation->hidden_states_biases_derivatives,
3546 forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
3548 multiply_rows(back_propagation->output_combinations_biases_derivatives,
3549 calculate_activations(forward_propagation->current_cell_state_activations));
3551 back_propagation->hidden_states_biases_derivatives += back_propagation->output_combinations_biases_derivatives;
3553 back_propagation->input_biases_derivatives
3554 += back_propagation->hidden_states_biases_derivatives.contract(back_propagation->current_layer_deltas, A_B);
3556 copy_index += neurons_number;
3561void LongShortTermMemoryLayer::calculate_state_biases_error_gradient(
const Tensor<type, 2>& inputs,
3562 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
3563 LongShortTermMemoryLayerBackPropagation* back_propagation)
const
3565 const Index samples_number = inputs.dimension(0);
3567 const Index parameters_number = neurons_number;
3569 back_propagation->input_combinations_biases_derivatives.setZero();
3570 back_propagation->forget_combinations_biases_derivatives.setZero();
3571 back_propagation->state_combinations_biases_derivatives.setZero();
3572 back_propagation->output_combinations_biases_derivatives.setZero();
3574 back_propagation->hidden_states_biases_derivatives.setZero();
3575 back_propagation->cell_state_biases_derivatives.setZero();
3577 Index copy_index = 0;
3579 back_propagation->state_biases_derivatives.setZero();
3581 for(Index sample = 0; sample < samples_number; sample++)
3583 const Tensor<type, 1> current_layer_deltas = back_propagation->delta.chip(sample, 0);
3585 memcpy(forward_propagation->current_forget_activations.data(),
3586 forward_propagation->forget_activations.data()+copy_index,
3587 static_cast<size_t>(neurons_number)*
sizeof(type));
3589 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
3590 forward_propagation->forget_activations_derivatives.data()+copy_index,
3591 static_cast<size_t>(neurons_number)*
sizeof(type));
3593 memcpy(forward_propagation->current_input_activations.data(),
3594 forward_propagation->input_activations.data()+copy_index,
3595 static_cast<size_t>(neurons_number)*
sizeof(type));
3597 memcpy(forward_propagation->current_input_activations_derivatives.data(),
3598 forward_propagation->input_activations_derivatives.data()+copy_index,
3599 static_cast<size_t>(neurons_number)*
sizeof(type));
3601 memcpy(forward_propagation->current_state_activations.data(),
3602 forward_propagation->state_activations.data()+copy_index,
3603 static_cast<size_t>(neurons_number)*
sizeof(type));
3605 memcpy(forward_propagation->current_state_activations_derivatives.data(),
3606 forward_propagation->state_activations_derivatives.data()+copy_index,
3607 static_cast<size_t>(neurons_number)*
sizeof(type));
3609 memcpy(forward_propagation->current_output_activations.data(),
3610 forward_propagation->output_activations.data()+copy_index,
3611 static_cast<size_t>(neurons_number)*
sizeof(type));
3613 memcpy(forward_propagation->current_output_activations_derivatives.data(),
3614 forward_propagation->output_activations_derivatives.data()+copy_index,
3615 static_cast<size_t>(neurons_number)*
sizeof(type));
3617 memcpy(forward_propagation->current_cell_state_activations.data(),
3618 forward_propagation->cell_states_activations.data()+copy_index,
3619 static_cast<size_t>(neurons_number)*
sizeof(type));
3621 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
3622 forward_propagation->hidden_states_activations_derivatives.data()+copy_index,
3623 static_cast<size_t>(neurons_number)*
sizeof(type));
3625 if(sample%timesteps == 0)
3627 back_propagation->forget_combinations_biases_derivatives.setZero();
3628 back_propagation->input_combinations_biases_derivatives.setZero();
3629 back_propagation->state_combinations_biases_derivatives.setZero();
3630 back_propagation->output_combinations_biases_derivatives.setZero();
3632 forward_propagation->previous_cell_state_activations.setZero();
3633 back_propagation->cell_state_biases_derivatives.setZero();
3637 memcpy(forward_propagation->previous_cell_state_activations.data(),
3638 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
3639 static_cast<size_t>(neurons_number)*
sizeof(type));
3641 back_propagation->forget_combinations_biases_derivatives = back_propagation->hidden_states_biases_derivatives.contract(forget_recurrent_weights, A_B);
3643 multiply_rows(back_propagation->forget_combinations_biases_derivatives,
3644 forward_propagation->current_forget_activations_derivatives);
3646 back_propagation->input_combinations_biases_derivatives = back_propagation->hidden_states_biases_derivatives.contract(input_recurrent_weights, A_B);
3648 multiply_rows(back_propagation->input_combinations_biases_derivatives,
3649 forward_propagation->current_input_activations_derivatives);
3651 back_propagation->state_combinations_biases_derivatives = back_propagation->hidden_states_biases_derivatives.contract(state_recurrent_weights, A_B);
3653 back_propagation->output_combinations_biases_derivatives = back_propagation->hidden_states_biases_derivatives.contract(output_recurrent_weights, A_B);
3655 multiply_rows(back_propagation->output_combinations_biases_derivatives,
3656 forward_propagation->current_output_activations_derivatives);
3659 for(Index row = 0; row < parameters_number; row++) back_propagation->state_combinations_biases_derivatives(row, row) +=
static_cast<type
>(1.0);
3661 multiply_rows(back_propagation->cell_state_biases_derivatives,
3662 forward_propagation->current_forget_activations);
3664 multiply_rows(back_propagation->forget_combinations_biases_derivatives, forward_propagation->previous_cell_state_activations);
3666 back_propagation->cell_state_biases_derivatives += back_propagation->forget_combinations_biases_derivatives;
3668 multiply_rows(back_propagation->input_combinations_biases_derivatives,
3669 forward_propagation->current_state_activations);
3671 back_propagation->cell_state_biases_derivatives += back_propagation->input_combinations_biases_derivatives;
3673 multiply_rows(back_propagation->state_combinations_biases_derivatives,
3674 forward_propagation->current_state_activations_derivatives*forward_propagation->current_input_activations);
3676 back_propagation->cell_state_biases_derivatives += back_propagation->state_combinations_biases_derivatives;
3678 back_propagation->hidden_states_biases_derivatives = back_propagation->cell_state_biases_derivatives;
3680 multiply_rows(back_propagation->hidden_states_biases_derivatives,
3681 forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
3683 multiply_rows(back_propagation->output_combinations_biases_derivatives,
3684 calculate_activations(forward_propagation->current_cell_state_activations));
3686 back_propagation->hidden_states_biases_derivatives += back_propagation->output_combinations_biases_derivatives;
3688 back_propagation->state_biases_derivatives += back_propagation->hidden_states_biases_derivatives.contract(current_layer_deltas, A_B);
3690 copy_index += neurons_number;
3695void LongShortTermMemoryLayer::calculate_output_biases_error_gradient(
const Tensor<type, 2>& inputs,
3696 LongShortTermMemoryLayerForwardPropagation* forward_propagation,
3697 LongShortTermMemoryLayerBackPropagation* back_propagation)
const
3699 const Index samples_number = inputs.dimension(0);
3701 const Index parameters_number = neurons_number;
3703 back_propagation->input_combinations_biases_derivatives.setZero();
3704 back_propagation->forget_combinations_biases_derivatives.setZero();
3705 back_propagation->state_combinations_biases_derivatives.setZero();
3706 back_propagation->output_combinations_biases_derivatives.setZero();
3707 back_propagation->hidden_states_biases_derivatives.setZero();
3708 back_propagation->cell_state_biases_derivatives.setZero();
3710 Index copy_index = 0;
3712 back_propagation->output_biases_derivatives.setZero();
3714 for(Index sample = 0; sample < samples_number; sample++)
3716 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample, 0);
3718 memcpy(forward_propagation->current_forget_activations.data(),
3719 forward_propagation->forget_activations.data()+copy_index,
3720 static_cast<size_t>(neurons_number)*
sizeof(type));
3722 memcpy(forward_propagation->current_forget_activations_derivatives.data(),
3723 forward_propagation->forget_activations_derivatives.data()+copy_index,
3724 static_cast<size_t>(neurons_number)*
sizeof(type));
3726 memcpy(forward_propagation->current_input_activations.data(),
3727 forward_propagation->input_activations.data()+copy_index,
3728 static_cast<size_t>(neurons_number)*
sizeof(type));
3730 memcpy(forward_propagation->current_input_activations_derivatives.data(),
3731 forward_propagation->input_activations_derivatives.data()+copy_index,
3732 static_cast<size_t>(neurons_number)*
sizeof(type));
3734 memcpy(forward_propagation->current_state_activations.data(),
3735 forward_propagation->state_activations.data()+copy_index,
3736 static_cast<size_t>(neurons_number)*
sizeof(type));
3738 memcpy(forward_propagation->current_state_activations_derivatives.data(),
3739 forward_propagation->state_activations_derivatives.data()+copy_index,
3740 static_cast<size_t>(neurons_number)*
sizeof(type));
3742 memcpy(forward_propagation->current_output_activations.data(),
3743 forward_propagation->output_activations.data()+copy_index,
3744 static_cast<size_t>(neurons_number)*
sizeof(type));
3746 memcpy(forward_propagation->current_output_activations_derivatives.data(),
3747 forward_propagation->output_activations_derivatives.data()+copy_index,
3748 static_cast<size_t>(neurons_number)*
sizeof(type));
3750 memcpy(forward_propagation->current_cell_state_activations.data(),
3751 forward_propagation->cell_states_activations.data()+copy_index,
3752 static_cast<size_t>(neurons_number)*
sizeof(type));
3754 memcpy(forward_propagation->current_hidden_states_derivatives.data(),
3755 forward_propagation->hidden_states_activations_derivatives.data()+copy_index,
3756 static_cast<size_t>(neurons_number)*
sizeof(type));
3758 if(sample%timesteps == 0)
3760 back_propagation->forget_combinations_biases_derivatives.setZero();
3761 back_propagation->input_combinations_biases_derivatives.setZero();
3762 back_propagation->state_combinations_biases_derivatives.setZero();
3763 back_propagation->output_combinations_biases_derivatives.setZero();
3765 forward_propagation->previous_cell_state_activations.setZero();
3766 back_propagation->cell_state_biases_derivatives.setZero();
3770 memcpy(forward_propagation->previous_cell_state_activations.data(),
3771 forward_propagation->cell_states_activations.data() + (copy_index-neurons_number),
3772 static_cast<size_t>(neurons_number)*
sizeof(type));
3774 back_propagation->forget_combinations_biases_derivatives = back_propagation->hidden_states_biases_derivatives.contract(forget_recurrent_weights, A_B);
3776 multiply_rows(back_propagation->forget_combinations_biases_derivatives,
3777 forward_propagation->current_forget_activations_derivatives);
3779 back_propagation->input_combinations_biases_derivatives = back_propagation->hidden_states_biases_derivatives.contract(input_recurrent_weights, A_B);
3781 multiply_rows(back_propagation->input_combinations_biases_derivatives,
3782 forward_propagation->current_input_activations_derivatives);
3784 back_propagation->state_combinations_biases_derivatives = back_propagation->hidden_states_biases_derivatives.contract(state_recurrent_weights, A_B);
3786 multiply_rows(back_propagation->state_combinations_biases_derivatives,
3787 forward_propagation->current_state_activations_derivatives);
3789 back_propagation->output_combinations_biases_derivatives = back_propagation->hidden_states_biases_derivatives.contract(output_recurrent_weights, A_B);
3792 for(Index row = 0; row < parameters_number; row++) back_propagation->output_combinations_biases_derivatives(row, row) +=
static_cast<type
>(1.0);
3794 multiply_rows(back_propagation->cell_state_biases_derivatives,
3795 forward_propagation->current_forget_activations);
3797 multiply_rows(back_propagation->forget_combinations_biases_derivatives, forward_propagation->previous_cell_state_activations);
3799 back_propagation->cell_state_biases_derivatives += back_propagation->forget_combinations_biases_derivatives;
3801 multiply_rows(back_propagation->state_combinations_biases_derivatives,
3802 forward_propagation->current_input_activations);
3804 back_propagation->cell_state_biases_derivatives += back_propagation->state_combinations_biases_derivatives;
3806 multiply_rows(back_propagation->input_combinations_biases_derivatives,
3807 forward_propagation->current_state_activations);
3809 back_propagation->cell_state_biases_derivatives += back_propagation->input_combinations_biases_derivatives;
3811 back_propagation->hidden_states_biases_derivatives = back_propagation->cell_state_biases_derivatives;
3813 multiply_rows(back_propagation->hidden_states_biases_derivatives,
3814 forward_propagation->current_output_activations*forward_propagation->current_hidden_states_derivatives);
3816 multiply_rows(back_propagation->output_combinations_biases_derivatives,
3817 forward_propagation->current_output_activations_derivatives*calculate_activations(forward_propagation->current_cell_state_activations));
3819 back_propagation->hidden_states_biases_derivatives += back_propagation->output_combinations_biases_derivatives;
3821 back_propagation->output_biases_derivatives += back_propagation->hidden_states_biases_derivatives.contract(back_propagation->current_layer_deltas, A_B);
3823 copy_index += neurons_number;
3841 const Index inputs_name_size = inputs_names.size();
3843 if(inputs_name_size != inputs_number)
3845 ostringstream buffer;
3847 buffer <<
"OpenNN Exception: LongShortTermMemoryLayer class.\n"
3848 <<
"string write_expression(const Tensor<string, 1>&, const Tensor<string, 1>&) const method.\n"
3849 <<
"Size of inputs name must be equal to number of layer inputs.\n";
3851 throw logic_error(buffer.str());
3854 const Index outputs_name_size = outputs_names.size();
3856 if(outputs_name_size != neurons_number)
3858 ostringstream buffer;
3860 buffer <<
"OpenNN Exception: LongShortTermMemoryLayer class.\n"
3861 <<
"string write_expression(const Tensor<string, 1>&, const Tensor<string, 1>&) const method.\n"
3862 <<
"Size of outputs name must be equal to number of neurons.\n";
3864 throw logic_error(buffer.str());
3869 ostringstream buffer;
3873 for(Index i = 0; i < neurons_number; i++)
3875 buffer <<
"forget_gate_" << to_string(i) <<
" = " << write_recurrent_activation_function_expression() <<
" (" << forget_biases[i] <<
" + ";
3877 for(Index j = 0; j < inputs_number; j++)
3879 buffer << inputs_names[j] <<
" * (" << forget_weights(j,i) <<
") + ";
3882 for(Index k = 0; k < neurons_number-1; k++)
3884 buffer <<
"hidden_state_" << to_string(k) <<
"(t-1) * (" << forget_recurrent_weights(k,i) <<
") + ";
3887 buffer <<
"hidden_state_" << to_string(neurons_number-1) <<
"(t-1) * (" << forget_recurrent_weights(neurons_number-1,i) <<
") );\n";
3892 for(Index i = 0; i < neurons_number; i++)
3894 buffer <<
"input_gate_" << to_string(i) <<
" = " << write_recurrent_activation_function_expression() <<
" (" << input_biases[i] <<
" + ";
3896 for(Index j = 0; j < inputs_number; j++)
3898 buffer << inputs_names[j] <<
" * (" << input_weights(j,i) <<
") + ";
3901 for(Index k = 0; k < neurons_number-1; k++)
3903 buffer <<
"hidden_state_" << to_string(k) <<
"(t-1) * (" << input_recurrent_weights(k,i) <<
") + ";
3906 buffer <<
"hidden_state_" << to_string(neurons_number-1) <<
"(t-1) * (" << input_recurrent_weights(neurons_number-1,i) <<
") );\n";
3911 for(Index i = 0; i < neurons_number; i++)
3913 buffer <<
"state_gate_" << to_string(i) <<
" = " << write_activation_function_expression() <<
" (" << state_biases[i] <<
" + ";
3915 for(Index j = 0; j < inputs_number; j++)
3917 buffer << inputs_names[j] <<
" * (" << state_weights(j,i) <<
") + ";
3920 for(Index k = 0; k < neurons_number-1; k++)
3922 buffer <<
"hidden_state_" << to_string(k) <<
"(t-1) * (" << state_recurrent_weights(k,i) <<
") + ";
3925 buffer <<
"hidden_state_" << to_string(neurons_number-1) <<
"(t-1) * (" << state_recurrent_weights(neurons_number-1,i) <<
") );\n";
3930 for(Index i = 0; i < neurons_number; i++)
3932 buffer <<
"output_gate_" << to_string(i) <<
" = " << write_recurrent_activation_function_expression() <<
" (" << output_biases[i] <<
" + ";
3934 for(Index j = 0; j < inputs_number; j++)
3936 buffer << inputs_names[j] <<
" * (" << output_weights(j,i) <<
") + ";
3939 for(Index k = 0; k < neurons_number-1; k++)
3941 buffer <<
"hidden_state_" << to_string(k) <<
"(t-1) * (" << output_recurrent_weights(k,i) <<
") + ";
3944 buffer <<
"hidden_state_" << to_string(neurons_number-1) <<
"(t-1) * (" << output_recurrent_weights(neurons_number-1,i) <<
") );\n";
3949 for(Index i = 0; i < neurons_number; i++)
3951 buffer <<
"cell_state_" << to_string(i) <<
"(t) = forget_gate_" << to_string(i) <<
" * cell_state_" << to_string(i) <<
"(t-1)+input_gate_" << to_string(i) <<
" * state_gate_" << to_string(i) <<
";\n";
3956 for(Index i = 0; i < neurons_number; i++)
3958 buffer <<
"hidden_state_" << to_string(i) <<
"(t) = output_gate_" << to_string(i) <<
" * " << write_activation_function_expression() <<
"(cell_state_" << to_string(i) <<
");\n";
3963 for(Index i = 0; i < neurons_number; i++)
3965 buffer << outputs_names[i] <<
" = " <<
"hidden_state_" << to_string(i) <<
"(t);\n";
3968 return buffer.str();
3971string LongShortTermMemoryLayer::write_expression_c()
const
3973 ostringstream buffer;
3975 buffer <<
"vector<float> " <<
layer_name <<
"(const vector<float>& inputs)\n{" << endl;
3979 buffer <<
"\n\treturn long_short_term_memory_output;\n}" << endl;
3981 return buffer.str();
3987 ostringstream buffer;
3994 buffer <<
"\tvector<float> forget_gate_combinations(" << neurons_number <<
");\n" << endl;
3996 for(Index i = 0; i < neurons_number; i++)
3998 buffer <<
"\tforget_gate_combinations[" << i <<
"] = " << forget_biases(i) <<
" + ";
4000 for(Index j = 0; j < inputs_number; j++)
4002 buffer <<
" inputs[" << j <<
"] * (" << forget_weights(j,i) <<
") + ";
4005 for(Index k = 0; k < neurons_number-1; k++)
4007 buffer <<
"hidden_states[" << k <<
"]" <<
" * (" << forget_recurrent_weights(k,i) <<
") + ";
4010 buffer <<
"hidden_states[" << neurons_number-1 <<
"]" <<
" * (" << forget_recurrent_weights(neurons_number-1,i) <<
"); \n";
4016 buffer <<
"\tvector<float> forget_gate_activations(" << neurons_number <<
");\n" << endl;
4018 for(Index i = 0; i < neurons_number; i++)
4020 buffer <<
"\tforget_gate_activations[" << i <<
"] = ";
4022 switch(recurrent_activation_function)
4024 case ActivationFunction::HyperbolicTangent:
4025 buffer <<
"tanh(forget_gate_combinations[" << i <<
"]);\n";
4028 case ActivationFunction::RectifiedLinear:
4029 buffer <<
"forget_gate_combinations[" << i <<
"] < 0.0 ? 0.0 : forget_gate_combinations[" << i <<
"];\n";
4032 case ActivationFunction::Logistic:
4033 buffer <<
"1.0/(1.0 + exp(-forget_gate_combinations[" << i <<
"]));\n";
4036 case ActivationFunction::Threshold:
4037 buffer <<
"forget_gate_combinations[" << i <<
"] >= 0.0 ? 1.0 : 0.0;\n";
4040 case ActivationFunction::SymmetricThreshold:
4041 buffer <<
"forget_gate_combinations[" << i <<
"] >= 0.0 ? 1.0 : -1.0;\n";
4044 case ActivationFunction::Linear:
4045 buffer <<
"forget_gate_combinations[" << i <<
"];\n";
4048 case ActivationFunction::ScaledExponentialLinear:
4049 buffer <<
"forget_gate_combinations[" << i <<
"] < 0.0 ? 1.0507*1.67326*(exp(forget_gate_combinations[" << i <<
"]) - 1.0) : 1.0507*forget_gate_combinations[" << i <<
"];\n";
4052 case ActivationFunction::SoftPlus:
4053 buffer <<
"log(1.0 + exp(forget_gate_combinations[" << i <<
"]));\n";
4056 case ActivationFunction::SoftSign:
4057 buffer <<
"forget_gate_combinations[" << i <<
"] < 0.0 ? forget_gate_combinations[" << i <<
"]/(1.0 - forget_gate_combinations[" << i <<
"] ) : forget_gate_combinations[" << i <<
"]/(1.0 + forget_gate_combinations[" << i <<
"] );\n";
4060 case ActivationFunction::ExponentialLinear:
4061 buffer <<
"forget_gate_combinations[" << i <<
"] < 0.0 ? 1.0*(exp(forget_gate_combinations[" << i <<
"]) - 1.0) : forget_gate_combinations[" << i <<
"];\n";
4064 case ActivationFunction::HardSigmoid:
4075 buffer <<
"\tvector<float> input_gate_combinations(" << neurons_number <<
");\n" << endl;
4077 for(Index i = 0; i < neurons_number; i++)
4079 buffer <<
"\tinput_gate_combinations[" << i <<
"] = " << input_biases(i) <<
" + ";
4081 for(Index j = 0; j < inputs_number; j++)
4083 buffer <<
"inputs[" << j <<
"] * (" << input_weights(j,i) <<
") + ";
4086 for(Index k = 0; k < neurons_number-1; k++)
4088 buffer <<
"hidden_states[" << k <<
"]" <<
" * (" << input_recurrent_weights(k,i) <<
") + ";
4091 buffer <<
"hidden_states[" << neurons_number-1 <<
"]" <<
" * (" << input_recurrent_weights(neurons_number-1,i) <<
"); \n";
4097 buffer <<
"\tvector<float> input_gate_activations(" << neurons_number <<
");\n" << endl;
4099 for(Index i = 0; i < neurons_number; i++)
4101 buffer <<
"\tinput_gate_activations[" << i <<
"] = ";
4103 switch(recurrent_activation_function)
4105 case ActivationFunction::HyperbolicTangent:
4106 buffer <<
"tanh(input_gate_combinations[" << i <<
"]);\n";
4109 case ActivationFunction::RectifiedLinear:
4110 buffer <<
"input_gate_combinations[" << i <<
"] < 0.0 ? 0.0 : input_gate_combinations[" << i <<
"];\n";
4113 case ActivationFunction::Logistic:
4114 buffer <<
"1.0/(1.0 + exp(-input_gate_combinations[" << i <<
"]));\n";
4117 case ActivationFunction::Threshold:
4118 buffer <<
"input_gate_combinations[" << i <<
"] >= 0.0 ? 1.0 : 0.0;\n";
4121 case ActivationFunction::SymmetricThreshold:
4122 buffer <<
"input_gate_combinations[" << i <<
"] >= 0.0 ? 1.0 : -1.0;\n";
4125 case ActivationFunction::Linear:
4126 buffer <<
"input_gate_combinations[" << i <<
"];\n";
4129 case ActivationFunction::ScaledExponentialLinear:
4130 buffer <<
"input_gate_combinations[" << i <<
"] < 0.0 ? 1.0507*1.67326*(exp(input_gate_combinations[" << i <<
"]) - 1.0) : 1.0507*input_gate_combinations[" << i <<
"];\n";
4133 case ActivationFunction::SoftPlus:
4134 buffer <<
"log(1.0 + exp(input_gate_combinations[" << i <<
"]));\n";
4137 case ActivationFunction::SoftSign:
4138 buffer <<
"input_gate_combinations[" << i <<
"] < 0.0 ? input_gate_combinations[" << i <<
"]/(1.0 - input_gate_combinations[" << i <<
"] ) : input_gate_combinations[" << i <<
"]/(1.0 + input_gate_combinations[" << i <<
"] );\n";
4141 case ActivationFunction::ExponentialLinear:
4142 buffer <<
"input_gate_combinations[" << i <<
"] < 0.0 ? 1.0*(exp(input_gate_combinations[" << i <<
"]) - 1.0) : input_gate_combinations[" << i <<
"];\n";
4145 case ActivationFunction::HardSigmoid:
4156 buffer <<
"\tvector<float> state_gate_combinations(" << neurons_number <<
");\n" << endl;
4158 for(Index i = 0; i < neurons_number; i++)
4160 buffer <<
"\tstate_gate_combinations[" << i <<
"] = " << state_biases(i) <<
" + ";
4162 for(Index j = 0; j < inputs_number; j++)
4164 buffer <<
"inputs[" << j <<
"] * (" << state_weights(j,i) <<
") + ";
4167 for(Index k = 0; k < neurons_number-1; k++)
4169 buffer <<
"hidden_states[" << k <<
"]" <<
" * (" << state_recurrent_weights(k,i) <<
") + ";
4172 buffer <<
"hidden_states[" << neurons_number-1 <<
"]" <<
" * (" << state_recurrent_weights(neurons_number-1,i) <<
"); \n";
4178 buffer <<
"\tvector<float> state_gate_activations(" << neurons_number <<
");\n" << endl;
4180 for(Index i = 0; i < neurons_number; i++)
4182 buffer <<
"\tstate_gate_activations[" << i <<
"] = ";
4186 case ActivationFunction::HyperbolicTangent:
4187 buffer <<
"tanh(state_gate_combinations[" << i <<
"]);\n";
4190 case ActivationFunction::RectifiedLinear:
4191 buffer <<
"state_gate_combinations[" << i <<
"] < 0.0 ? 0.0 : state_gate_combinations[" << i <<
"];\n";
4194 case ActivationFunction::Logistic:
4195 buffer <<
"1.0/(1.0 + exp(-state_gate_combinations[" << i <<
"]));\n";
4198 case ActivationFunction::Threshold:
4199 buffer <<
"state_gate_combinations[" << i <<
"] >= 0.0 ? 1.0 : 0.0;\n";
4202 case ActivationFunction::SymmetricThreshold:
4203 buffer <<
"state_gate_combinations[" << i <<
"] >= 0.0 ? 1.0 : -1.0;\n";
4206 case ActivationFunction::Linear:
4207 buffer <<
"state_gate_combinations[" << i <<
"];\n";
4210 case ActivationFunction::ScaledExponentialLinear:
4211 buffer <<
"state_gate_combinations[" << i <<
"] < 0.0 ? 1.0507*1.67326*(exp(state_gate_combinations[" << i <<
"]) - 1.0) : 1.0507*state_gate_combinations[" << i <<
"];\n";
4214 case ActivationFunction::SoftPlus:
4215 buffer <<
"log(1.0 + exp(state_gate_combinations[" << i <<
"]));\n";
4218 case ActivationFunction::SoftSign:
4219 buffer <<
"state_gate_combinations[" << i <<
"] < 0.0 ? state_gate_combinations[" << i <<
"]/(1.0 - state_gate_combinations[" << i <<
"] ) : state_gate_combinations[" << i <<
"]/(1.0 + state_gate_combinations[" << i <<
"] );\n";
4222 case ActivationFunction::ExponentialLinear:
4223 buffer <<
"state_gate_combinations[" << i <<
"] < 0.0 ? 1.0*(exp(state_gate_combinations[" << i <<
"]) - 1.0) : state_gate_combinations[" << i <<
"];\n";
4226 case ActivationFunction::HardSigmoid:
4237 buffer <<
"\tvector<float> output_gate_combinations(" << neurons_number <<
");\n" << endl;
4239 for(Index i = 0; i < neurons_number; i++)
4241 buffer <<
"\toutput_gate_combinations[" << i <<
"] = " << output_biases(i) <<
" + ";
4243 for(Index j = 0; j < inputs_number; j++)
4245 buffer <<
"inputs[" << j <<
"] * (" << output_weights(j,i) <<
") + ";
4248 for(Index k = 0; k < neurons_number-1; k++)
4250 buffer <<
"hidden_states[" << k <<
"]" <<
" * (" << output_recurrent_weights(k,i) <<
") + ";
4253 buffer <<
"hidden_states[" << neurons_number-1 <<
"]" <<
" * (" << output_recurrent_weights(neurons_number-1,i) <<
"); \n";
4259 buffer <<
"\tvector<float> output_gate_activations(" << neurons_number <<
");\n" << endl;
4261 for(Index i = 0; i < neurons_number; i++)
4263 buffer <<
"\toutput_gate_activations[" << i <<
"] = ";
4265 switch(recurrent_activation_function)
4267 case ActivationFunction::HyperbolicTangent:
4268 buffer <<
"tanh(output_gate_combinations[" << i <<
"]);\n";
4271 case ActivationFunction::RectifiedLinear:
4272 buffer <<
"output_gate_combinations[" << i <<
"] < 0.0 ? 0.0 : output_gate_combinations[" << i <<
"];\n";
4275 case ActivationFunction::Logistic:
4276 buffer <<
"1.0/(1.0 + exp(-output_gate_combinations[" << i <<
"]));\n";
4279 case ActivationFunction::Threshold:
4280 buffer <<
"output_gate_combinations[" << i <<
"] >= 0.0 ? 1.0 : 0.0;\n";
4283 case ActivationFunction::SymmetricThreshold:
4284 buffer <<
"output_gate_combinations[" << i <<
"] >= 0.0 ? 1.0 : -1.0;\n";
4287 case ActivationFunction::Linear:
4288 buffer <<
"output_gate_combinations[" << i <<
"];\n";
4291 case ActivationFunction::ScaledExponentialLinear:
4292 buffer <<
"output_gate_combinations[" << i <<
"] < 0.0 ? 1.0507*1.67326*(exp(output_gate_combinations[" << i <<
"]) - 1.0) : 1.0507*output_gate_combinations[" << i <<
"];\n";
4295 case ActivationFunction::SoftPlus:
4296 buffer <<
"log(1.0 + exp(output_gate_combinations[" << i <<
"]));\n";
4299 case ActivationFunction::SoftSign:
4300 buffer <<
"output_gate_combinations[" << i <<
"] < 0.0 ? output_gate_combinations[" << i <<
"]/(1.0 - output_gate_combinations[" << i <<
"] ) : output_gate_combinations[" << i <<
"]/(1.0 + output_gate_combinations[" << i <<
"] );\n";
4303 case ActivationFunction::ExponentialLinear:
4304 buffer <<
"output_gate_combinations[" << i <<
"] < 0.0 ? 1.0*(exp(output_gate_combinations[" << i <<
"]) - 1.0) : output_gate_combinations[" << i <<
"];\n";
4307 case ActivationFunction::HardSigmoid:
4318 for(Index i = 0; i < neurons_number; i++)
4320 buffer <<
"\tcell_states[" << i <<
"] = forget_gate_activations[" << i <<
"] * cell_states[" << i <<
"] + input_gate_activations[" << i <<
"] * state_gate_activations[" << i <<
"]; \n";
4326 buffer <<
"\tvector<float> cell_state_activations(" << neurons_number <<
");\n" << endl;
4328 for(Index i = 0; i < neurons_number; i++)
4330 buffer <<
"\tcell_state_activations[" << i <<
"] = ";
4334 case ActivationFunction::HyperbolicTangent:
4335 buffer <<
"tanh(cell_states[" << i <<
"]);\n";
4338 case ActivationFunction::RectifiedLinear:
4339 buffer <<
"cell_states[" << i <<
"] < 0.0 ? 0.0 : cell_states[" << i <<
"];\n";
4342 case ActivationFunction::Logistic:
4343 buffer <<
"1.0/(1.0 + exp(-cell_states[" << i <<
"]));\n";
4346 case ActivationFunction::Threshold:
4347 buffer <<
"cell_states[" << i <<
"] >= 0.0 ? 1.0 : 0.0;\n";
4350 case ActivationFunction::SymmetricThreshold:
4351 buffer <<
"cell_states[" << i <<
"] >= 0.0 ? 1.0 : -1.0;\n";
4354 case ActivationFunction::Linear:
4355 buffer <<
"cell_states[" << i <<
"];\n";
4358 case ActivationFunction::ScaledExponentialLinear:
4359 buffer <<
"cell_states[" << i <<
"] < 0.0 ? 1.0507*1.67326*(exp(cell_states[" << i <<
"]) - 1.0) : 1.0507*cell_states[" << i <<
"];\n";
4362 case ActivationFunction::SoftPlus:
4363 buffer <<
"log(1.0 + exp(cell_states[" << i <<
"]));\n";
4366 case ActivationFunction::SoftSign:
4367 buffer <<
"cell_states[" << i <<
"] < 0.0 ? cell_states[" << i <<
"]/(1.0 - cell_states[" << i <<
"] ) : cell_states[" << i <<
"]/(1.0 + cell_states[" << i <<
"] );\n";
4370 case ActivationFunction::ExponentialLinear:
4371 buffer <<
"cell_states[" << i <<
"] < 0.0 ? 1.0*(exp(cell_states[" << i <<
"]) - 1.0) : cell_states[" << i <<
"];\n";
4374 case ActivationFunction::HardSigmoid:
4385 for(Index i = 0; i < neurons_number; i++)
4387 buffer <<
"\thidden_states[" << i <<
"] = output_gate_activations[" << i <<
"] * cell_state_activations[" << i <<
"];\n";
4395 buffer <<
"\tvector<float> long_short_term_memory_output(" << neurons_number <<
");\n" << endl;
4397 for(Index i = 0; i < neurons_number; i++)
4399 buffer <<
"\tlong_short_term_memory_output[" << i <<
"] = hidden_states[" << i <<
"];\n";
4402 return buffer.str();
4405string LongShortTermMemoryLayer::write_expression_python()
const
4407 ostringstream buffer;
4409 buffer <<
"\tdef " <<
layer_name <<
"(self,inputs):\n" << endl;
4413 buffer <<
"\n\t\treturn long_short_term_memory_output;\n" << endl;
4415 return buffer.str();
4420 ostringstream buffer;
4427 buffer <<
"\t\tforget_gate_combinations = [None] * "<<neurons_number<<
"\n" << endl;
4429 for(Index i = 0; i < neurons_number; i++)
4431 buffer <<
"\t\tforget_gate_combinations[" << i <<
"] = " << forget_biases(i) <<
" + ";
4433 for(Index j = 0; j < inputs_number; j++)
4435 buffer <<
"inputs[" << j <<
"] * (" << forget_weights(j,i) <<
") + ";
4438 for(Index k = 0; k < neurons_number-1; k++)
4440 buffer <<
"self.hidden_states[" << k <<
"] * (" << forget_recurrent_weights(k,i) <<
") + ";
4443 buffer <<
"self.hidden_states[" << neurons_number-1 <<
"] * (" << forget_recurrent_weights(neurons_number-1,i) <<
")";
4445 buffer <<
" " << endl;
4448 buffer <<
"\t\t" << endl;
4451 buffer <<
"\t\tforget_gate_activations = [None] * "<<neurons_number<<
"\n" << endl;
4453 for(Index i = 0; i < neurons_number; i++)
4455 buffer <<
"\t\tforget_gate_activations[" << i <<
"] = ";
4457 switch(recurrent_activation_function)
4459 case ActivationFunction::HyperbolicTangent:
4460 buffer <<
"np.tanh(forget_gate_combinations[" << i <<
"])\n";
4463 case ActivationFunction::RectifiedLinear:
4464 buffer <<
"np.maximum(0.0, forget_gate_combinations[" << i <<
"])\n";
4467 case ActivationFunction::Logistic:
4468 buffer <<
"1.0/(1.0 + np.exp(-forget_gate_combinations[" << i <<
"]))\n";
4471 case ActivationFunction::Threshold:
4472 buffer <<
"1.0 if forget_gate_combinations[" << i <<
"] >= 0.0 else 0.0\n";
4475 case ActivationFunction::SymmetricThreshold:
4476 buffer <<
"1.0 if forget_gate_combinations[" << i <<
"] >= 0.0 else -1.0\n";
4479 case ActivationFunction::Linear:
4480 buffer <<
"forget_gate_combinations[" << i <<
"]\n";
4483 case ActivationFunction::ScaledExponentialLinear:
4484 buffer <<
"1.0507*1.67326*(np.exp(forget_gate_combinations[" << i <<
"]) - 1.0) if forget_gate_combinations[" << i <<
"] < 0.0 else 1.0507*forget_gate_combinations[" << i <<
"]\n";
4487 case ActivationFunction::SoftPlus:
4488 buffer <<
"np.log(1.0 + np.exp(forget_gate_combinations[" << i <<
"]))\n";
4491 case ActivationFunction::SoftSign:
4492 buffer <<
"forget_gate_combinations[" << i <<
"]/(1.0 - forget_gate_combinations[" << i <<
"] ) if forget_gate_combinations[" << i <<
"] < 0.0 else forget_gate_combinations[" << i <<
"]/(1.0 + forget_gate_combinations[" << i <<
"] )\n";
4495 case ActivationFunction::ExponentialLinear:
4496 buffer <<
"1.0*(np.exp(forget_gate_combinations[" << i <<
"]) - 1.0) if forget_gate_combinations[" << i <<
"] < 0.0 else forget_gate_combinations[" << i <<
"]\n";
4499 case ActivationFunction::HardSigmoid:
4506 buffer <<
"\t\t" << endl;
4510 buffer <<
"\t\tinput_gate_combinations = [None] * "<<neurons_number<<
"\n" << endl;
4512 for(Index i = 0; i < neurons_number; i++)
4514 buffer <<
"\t\tinput_gate_combinations[" << i <<
"] = " << input_biases(i) <<
" + ";
4516 for(Index j = 0; j < inputs_number; j++)
4518 buffer <<
"inputs[" << j <<
"] * (" << input_weights(j,i) <<
") + ";
4521 for(Index k = 0; k < neurons_number-1; k++)
4523 buffer <<
"self.hidden_states[" << k <<
"] * (" << input_recurrent_weights(k,i) <<
") + ";
4526 buffer <<
"self.hidden_states[" << neurons_number-1 <<
"] * (" << input_recurrent_weights(neurons_number-1,i) <<
")";
4528 buffer <<
" " << endl;
4532 buffer <<
"\t\t" << endl;
4534 buffer <<
"\t\tinput_gate_activations = [None] * "<<neurons_number<<
"\n" << endl;
4536 for(Index i = 0; i < neurons_number; i++)
4538 buffer <<
"\t\tinput_gate_activations[" << i <<
"] = ";
4540 switch(recurrent_activation_function)
4542 case ActivationFunction::HyperbolicTangent:
4543 buffer <<
"np.tanh(input_gate_combinations[" << i <<
"])\n";
4546 case ActivationFunction::RectifiedLinear:
4547 buffer <<
"np.maximum(0.0, input_gate_combinations[" << i <<
"])\n";
4550 case ActivationFunction::Logistic:
4551 buffer <<
"1.0/(1.0 + np.exp(-input_gate_combinations[" << i <<
"]))\n";
4554 case ActivationFunction::Threshold:
4555 buffer <<
"1.0 if input_gate_combinations[" << i <<
"] >= 0.0 else 0.0\n";
4558 case ActivationFunction::SymmetricThreshold:
4559 buffer <<
"1.0 if input_gate_combinations[" << i <<
"] >= 0.0 else -1.0\n";
4562 case ActivationFunction::Linear:
4563 buffer <<
"input_gate_combinations[" << i <<
"]\n";
4566 case ActivationFunction::ScaledExponentialLinear:
4567 buffer <<
"1.0507*1.67326*(np.exp(input_gate_combinations[" << i <<
"]) - 1.0) if input_gate_combinations[" << i <<
"] < 0.0 else 1.0507*input_gate_combinations[" << i <<
"]\n";
4570 case ActivationFunction::SoftPlus:
4571 buffer <<
"np.log(1.0 + np.exp(input_gate_combinations[" << i <<
"]))\n";
4574 case ActivationFunction::SoftSign:
4575 buffer <<
"input_gate_combinations[" << i <<
"]/(1.0 - input_gate_combinations[" << i <<
"] ) if input_gate_combinations[" << i <<
"] < 0.0 else input_gate_combinations[" << i <<
"]/(1.0 + input_gate_combinations[" << i <<
"] )\n";
4578 case ActivationFunction::ExponentialLinear:
4579 buffer <<
"1.0*(np.exp(input_gate_combinations[" << i <<
"]) - 1.0) if input_gate_combinations[" << i <<
"] < 0.0 else input_gate_combinations[" << i <<
"]\n";
4582 case ActivationFunction::HardSigmoid:
4588 buffer <<
"\t\t" << endl;
4593 buffer <<
"\t\tstate_gate_combinations = [None] * "<<neurons_number<<
"\n" << endl;
4595 for(Index i = 0; i < neurons_number; i++)
4597 buffer <<
"\t\tstate_gate_combinations[" << i <<
"] = " << state_biases(i) <<
" + ";
4599 for(Index j = 0; j < inputs_number; j++)
4601 buffer <<
"inputs[" << j <<
"] * (" << state_weights(j,i) <<
") + ";
4604 for(Index k = 0; k < neurons_number-1; k++)
4606 buffer <<
"self.hidden_states[" << k <<
"] * (" << state_recurrent_weights(k,i) <<
") + ";
4609 buffer <<
"self.hidden_states[" << neurons_number-1 <<
"] * (" << state_recurrent_weights(neurons_number-1,i) <<
")";
4611 buffer <<
" " << endl;
4615 buffer <<
"\t\t" << endl;
4617 buffer <<
"\t\tstate_gate_activations = [None] * "<<neurons_number<<
"\n" << endl;
4619 for(Index i = 0; i < neurons_number; i++)
4621 buffer <<
"\t\tstate_gate_activations[" << i <<
"] = ";
4625 case ActivationFunction::HyperbolicTangent:
4626 buffer <<
"np.tanh(state_gate_combinations[" << i <<
"])\n";
4629 case ActivationFunction::RectifiedLinear:
4630 buffer <<
"np.maximum(0.0, state_gate_combinations[" << i <<
"])\n";
4633 case ActivationFunction::Logistic:
4634 buffer <<
"1.0/(1.0 + np.exp(-state_gate_combinations[" << i <<
"]))\n";
4637 case ActivationFunction::Threshold:
4638 buffer <<
"1.0 if state_gate_combinations[" << i <<
"] >= 0.0 else 0.0\n";
4641 case ActivationFunction::SymmetricThreshold:
4642 buffer <<
"1.0 if state_gate_combinations[" << i <<
"] >= 0.0 else -1.0\n";
4645 case ActivationFunction::Linear:
4646 buffer <<
"state_gate_combinations[" << i <<
"]\n";
4649 case ActivationFunction::ScaledExponentialLinear:
4650 buffer <<
"1.0507*1.67326*(np.exp(state_gate_combinations[" << i <<
"]) - 1.0) if state_gate_combinations[" << i <<
"] < 0.0 else 1.0507*state_gate_combinations[" << i <<
"]\n";
4653 case ActivationFunction::SoftPlus:
4654 buffer <<
"np.log(1.0 + np.exp(state_gate_combinations[" << i <<
"]))\n";
4657 case ActivationFunction::SoftSign:
4658 buffer <<
"state_gate_combinations[" << i <<
"]/(1.0 - state_gate_combinations[" << i <<
"] ) if state_gate_combinations[" << i <<
"] < 0.0 else state_gate_combinations[" << i <<
"]/(1.0 + state_gate_combinations[" << i <<
"] )\n";
4661 case ActivationFunction::ExponentialLinear:
4662 buffer <<
"1.0*(np.exp(state_gate_combinations[" << i <<
"]) - 1.0) if state_gate_combinations[" << i <<
"] < 0.0 else state_gate_combinations[" << i <<
"]\n";
4665 case ActivationFunction::HardSigmoid:
4671 buffer <<
"\t\t" << endl;
4676 buffer <<
"\t\toutput_gate_combinations = [None] * "<<neurons_number<<
"\n" << endl;
4678 for(Index i = 0; i < neurons_number; i++)
4680 buffer <<
"\t\toutput_gate_combinations[" << i <<
"] = " << output_biases(i) <<
" + ";
4682 for(Index j = 0; j < inputs_number; j++)
4684 buffer <<
"inputs[" << j <<
"] * (" << output_weights(j,i) <<
") + ";
4687 for(Index k = 0; k < neurons_number-1; k++)
4689 buffer <<
"self.hidden_states[" << k <<
"] * (" << output_recurrent_weights(k,i) <<
") + ";
4692 buffer <<
"self.hidden_states[" << neurons_number-1 <<
"] * (" << output_recurrent_weights(neurons_number-1,i) <<
")";
4694 buffer <<
" " << endl;
4698 buffer <<
"\t\t" << endl;
4700 buffer <<
"\t\toutput_gate_activations = [None] * "<<neurons_number<<
"\n" << endl;
4702 for(Index i = 0; i < neurons_number; i++)
4704 buffer <<
"\t\toutput_gate_activations[" << i <<
"] = ";
4708 case ActivationFunction::HyperbolicTangent:
4709 buffer <<
"np.tanh(output_gate_combinations[" << i <<
"])\n";
4712 case ActivationFunction::RectifiedLinear:
4713 buffer <<
"np.maximum(0.0, output_gate_combinations[" << i <<
"])\n";
4716 case ActivationFunction::Logistic:
4717 buffer <<
"1.0/(1.0 + np.exp(-output_gate_combinations[" << i <<
"]))\n";
4720 case ActivationFunction::Threshold:
4721 buffer <<
"1.0 if output_gate_combinations[" << i <<
"] >= 0.0 else 0.0\n";
4724 case ActivationFunction::SymmetricThreshold:
4725 buffer <<
"1.0 if output_gate_combinations[" << i <<
"] >= 0.0 else -1.0\n";
4728 case ActivationFunction::Linear:
4729 buffer <<
"output_gate_combinations[" << i <<
"]\n";
4732 case ActivationFunction::ScaledExponentialLinear:
4733 buffer <<
"1.0507*1.67326*(np.exp(output_gate_combinations[" << i <<
"]) - 1.0) if output_gate_combinations[" << i <<
"] < 0.0 else 1.0507*output_gate_combinations[" << i <<
"]\n";
4736 case ActivationFunction::SoftPlus:
4737 buffer <<
"np.log(1.0 + np.exp(output_gate_combinations[" << i <<
"]))\n";
4740 case ActivationFunction::SoftSign:
4741 buffer <<
"output_gate_combinations[" << i <<
"]/(1.0 - output_gate_combinations[" << i <<
"] ) if output_gate_combinations[" << i <<
"] < 0.0 else output_gate_combinations[" << i <<
"]/(1.0 + output_gate_combinations[" << i <<
"] )\n";
4744 case ActivationFunction::ExponentialLinear:
4745 buffer <<
"1.0*(np.exp(output_gate_combinations[" << i <<
"]) - 1.0) if output_gate_combinations[" << i <<
"] < 0.0 else output_gate_combinations[" << i <<
"]\n";
4748 case ActivationFunction::HardSigmoid:
4754 buffer <<
"\t\t" << endl;
4759 for(Index i = 0; i < neurons_number; i++)
4761 buffer <<
"\t\tself.cell_states[" << i <<
"] = forget_gate_activations[" << i <<
"] * self.cell_states[" << i <<
"] + input_gate_activations[" << i <<
"] * state_gate_activations[" << i <<
"] \n";
4764 buffer <<
" " << endl;
4766 buffer <<
"\t\t" << endl;
4768 buffer <<
"\t\tcell_state_activations = [None] * "<<neurons_number<<
"\n" << endl;
4770 for(Index i = 0; i < neurons_number; i++)
4772 buffer <<
"\t\tcell_state_activations[" << i <<
"] = ";
4776 case ActivationFunction::HyperbolicTangent:
4777 buffer <<
"np.tanh(self.cell_states[" << i <<
"])\n";
4780 case ActivationFunction::RectifiedLinear:
4781 buffer <<
"np.maximum(0.0, self.cell_states[" << i <<
"])\n";
4784 case ActivationFunction::Logistic:
4785 buffer <<
"1.0/(1.0 + np.exp(-self.cell_states[" << i <<
"]))\n";
4788 case ActivationFunction::Threshold:
4789 buffer <<
"1.0 if self.cell_states[" << i <<
"] >= 0.0 else 0.0\n";
4792 case ActivationFunction::SymmetricThreshold:
4793 buffer <<
"1.0 if self.cell_states[" << i <<
"] >= 0.0 else -1.0\n";
4796 case ActivationFunction::Linear:
4797 buffer <<
"self.cell_states[" << i <<
"]\n";
4800 case ActivationFunction::ScaledExponentialLinear:
4801 buffer <<
"1.0507*1.67326*(np.exp(self.cell_states[" << i <<
"]) - 1.0) if self.cell_states[" << i <<
"] < 0.0 else 1.0507*self.cell_states[" << i <<
"]\n";
4804 case ActivationFunction::SoftPlus:
4805 buffer <<
"np.log(1.0 + np.exp(self.cell_states[" << i <<
"]))\n";
4808 case ActivationFunction::SoftSign:
4809 buffer <<
"self.cell_states[" << i <<
"]/(1.0 - self.cell_states[" << i <<
"] ) if self.cell_states[" << i <<
"] < 0.0 else self.cell_states[" << i <<
"]/(1.0 + self.cell_states[" << i <<
"] )\n";
4812 case ActivationFunction::ExponentialLinear:
4813 buffer <<
"1.0*(np.exp(self.cell_states[" << i <<
"]) - 1.0) if self.cell_states[" << i <<
"] < 0.0 else self.cell_states[" << i <<
"]\n";
4816 case ActivationFunction::HardSigmoid:
4822 buffer <<
"\t\t" << endl;
4827 for(Index i = 0; i < neurons_number; i++)
4829 buffer <<
"\t\tself.hidden_states[" << i <<
"] = output_gate_activations[" << i <<
"] * cell_state_activations[" << i <<
"]\n";
4832 buffer <<
" " << endl;
4834 buffer <<
"\t\t" << endl;
4839 buffer <<
"\t\tlong_short_term_memory_output = [None] * "<<neurons_number<<
"\n" << endl;
4841 for(Index i = 0; i < neurons_number; i++)
4843 buffer <<
"\t\tlong_short_term_memory_output[" << i <<
"] = self.hidden_states[" << i <<
"]\n";
4846 return buffer.str();
4851 ostringstream buffer;
4855 const tinyxml2::XMLElement* long_short_term_memory_layer_element = document.FirstChildElement(
"LongShortTermMemoryLayer");
4857 if(!long_short_term_memory_layer_element)
4859 buffer <<
"OpenNN Exception: LongShortTermMemoryLayer class.\n"
4860 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
4861 <<
"PerceptronLayer element is nullptr.\n";
4863 throw logic_error(buffer.str());
4868 const tinyxml2::XMLElement* layer_name_element = long_short_term_memory_layer_element->FirstChildElement(
"LayerName");
4870 if(!layer_name_element)
4872 buffer <<
"OpenNN Exception: LongShortTermMemoryLayer class.\n"
4873 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
4874 <<
"LayerName element is nullptr.\n";
4876 throw logic_error(buffer.str());
4879 if(layer_name_element->GetText())
4881 set_name(layer_name_element->GetText());
4886 const tinyxml2::XMLElement* inputs_number_element = long_short_term_memory_layer_element->FirstChildElement(
"InputsNumber");
4888 if(!inputs_number_element)
4890 buffer <<
"OpenNN Exception: LongShortTermMemoryLayer class.\n"
4891 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
4892 <<
"InputsNumber element is nullptr.\n";
4894 throw logic_error(buffer.str());
4897 if(inputs_number_element->GetText())
4904 const tinyxml2::XMLElement* neurons_number_element = long_short_term_memory_layer_element->FirstChildElement(
"NeuronsNumber");
4906 if(!neurons_number_element)
4908 buffer <<
"OpenNN Exception: LongShortTermMemoryLayer class.\n"
4909 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
4910 <<
"NeuronsNumber element is nullptr.\n";
4912 throw logic_error(buffer.str());
4915 if(neurons_number_element->GetText())
4922 const tinyxml2::XMLElement* time_step_element = long_short_term_memory_layer_element->FirstChildElement(
"TimeStep");
4924 if(!time_step_element)
4926 buffer <<
"OpenNN Exception: LongShortTermMemoryLayer class.\n"
4927 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
4928 <<
"TimeStep element is nullptr.\n";
4930 throw logic_error(buffer.str());
4933 if(time_step_element->GetText())
4935 set_timesteps(
static_cast<Index
>(stoi(time_step_element->GetText())));
4940 const tinyxml2::XMLElement* activation_function_element = long_short_term_memory_layer_element->FirstChildElement(
"ActivationFunction");
4942 if(!activation_function_element)
4944 buffer <<
"OpenNN Exception: LongShortTermMemoryLayer class.\n"
4945 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
4946 <<
"ActivationFunction element is nullptr.\n";
4948 throw logic_error(buffer.str());
4951 if(activation_function_element->GetText())
4958 const tinyxml2::XMLElement* recurrent_activation_function_element = long_short_term_memory_layer_element->FirstChildElement(
"RecurrentActivationFunction");
4960 if(!recurrent_activation_function_element)
4962 buffer <<
"OpenNN Exception: LongShortTermMemoryLayer class.\n"
4963 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
4964 <<
"ActivationFunction element is nullptr.\n";
4966 throw logic_error(buffer.str());
4969 if(recurrent_activation_function_element->GetText())
4976 const tinyxml2::XMLElement* parameters_element = long_short_term_memory_layer_element->FirstChildElement(
"Parameters");
4978 if(!parameters_element)
4980 buffer <<
"OpenNN Exception: LongShortTermMemoryLayer class.\n"
4981 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
4982 <<
"Parameters element is nullptr.\n";
4984 throw logic_error(buffer.str());
4987 if(parameters_element->GetText())
4989 const string parameters_string = parameters_element->GetText();
4998 ostringstream buffer;
5002 file_stream.OpenElement(
"LongShortTermMemoryLayer");
5006 file_stream.OpenElement(
"LayerName");
5009 file_stream.
PushText(buffer.str().c_str());
5014 file_stream.OpenElement(
"InputsNumber");
5019 file_stream.
PushText(buffer.str().c_str());
5025 file_stream.OpenElement(
"NeuronsNumber");
5030 file_stream.
PushText(buffer.str().c_str());
5036 file_stream.OpenElement(
"TimeStep");
5041 file_stream.
PushText(buffer.str().c_str());
5047 file_stream.OpenElement(
"ActivationFunction");
5055 file_stream.OpenElement(
"RecurrentActivationFunction");
5063 file_stream.OpenElement(
"Parameters");
5068 const Index parameters_size = parameters.size();
5070 for(Index i = 0; i < parameters_size; i++)
5072 buffer << parameters(i);
5074 if(i != (parameters_size-1)) buffer <<
" ";
5077 file_stream.
PushText(buffer.str().c_str());
5086string LongShortTermMemoryLayer::write_recurrent_activation_function_expression()
const
5088 switch(recurrent_activation_function)
5090 case ActivationFunction::HyperbolicTangent:
5094 case ActivationFunction::Linear:
5106string LongShortTermMemoryLayer::write_activation_function_expression()
const
5110 case ActivationFunction::HyperbolicTangent:
5114 case ActivationFunction::Linear:
This abstract class represents the concept of layer of neurons in OpenNN.
string layer_name
Layer name.
Type layer_type
Layer type.
void set_recurrent_activation_function(const ActivationFunction &)
string write_activation_function() const
void set_state_weights(const Tensor< type, 2 > &)
Tensor< type, 2 > get_forget_weights() const
Tensor< type, 2 > get_output_recurrent_weights() const
void set_parameters_constant(const type &)
Tensor< type, 2 > get_output_weights() const
void set_input_weights_constant(const type &)
const bool & get_display() const
Tensor< type, 1 > get_state_biases() const
Index get_inputs_number() const
Returns the number of inputs to the layer.
Tensor< type, 2 > get_input_weights() const
void set_forget_recurrent_weights(const Tensor< type, 2 > &)
void set_biases_constant(const type &)
string write_expression(const Tensor< string, 1 > &, const Tensor< string, 1 > &) const
void set_output_weights_constant(const type &)
LongShortTermMemoryLayer()
void set_output_biases_constant(const type &)
void set_weights_constant(const type &)
void set_forget_weights_constant(const type &)
void set_output_recurrent_weights_constant(const type &)
bool display
Display messages to screen.
void set_forget_biases_constant(const type &)
Tensor< type, 2 > get_forget_recurrent_weights() const
ActivationFunction
Enumeration of available activation functions for the long-short term memory layer.
void set_state_biases_constant(const type &)
void set_input_biases(const Tensor< type, 1 > &)
Tensor< type, 2 > get_state_weights() const
void set_activation_function(const ActivationFunction &)
void set_recurrent_weights_constant(const type &)
Tensor< type, 2 > get_input_recurrent_weights() const
Tensor< type, 1 > get_forget_biases() const
void set_input_recurrent_weights_constant(const type &)
void set_input_weights(const Tensor< type, 2 > &)
void set_state_recurrent_weights(const Tensor< type, 2 > &)
void set_cell_states_constant(const type &)
void set_forget_biases(const Tensor< type, 1 > &)
Tensor< type, 2 > get_state_recurrent_weights() const
string write_recurrent_activation_function() const
void set_input_biases_constant(const type &)
void set_inputs_number(const Index &)
Tensor< type, 1 > get_output_biases() const
void set_timesteps(const Index &)
string write_combinations_c() const
void set_forget_recurrent_weights_constant(const type &)
void set_state_biases(const Tensor< type, 1 > &)
void set_output_recurrent_weights(const Tensor< type, 2 > &)
Index get_neurons_number() const
Returns the size of the neurons vector.
void set_input_shape(const Tensor< Index, 1 > &)
void set_input_recurrent_weights(const Tensor< type, 2 > &)
Index get_timesteps() const
Returns the number of timesteps.
void set_forget_weights(const Tensor< type, 2 > &)
void set_parameters_random()
void set_output_weights(const Tensor< type, 2 > &)
virtual ~LongShortTermMemoryLayer()
const LongShortTermMemoryLayer::ActivationFunction & get_activation_function() const
Returns the activation function of the layer.
void set_state_weights_constant(const type &)
void set_parameters(const Tensor< type, 1 > &, const Index &=0)
void set_display(const bool &)
void set_hidden_states_constant(const type &)
void set_output_biases(const Tensor< type, 1 > &)
Index get_parameters_number() const
Returns the number of parameters (biases, weights, recurrent weights) of the layer.
ActivationFunction activation_function
Activation function variable.
const LongShortTermMemoryLayer::ActivationFunction & get_recurrent_activation_function() const
Returns the recurrent activation function of the layer.
void set_state_recurrent_weights_constant(const type &)
void set_neurons_number(const Index &)
string write_combinations_python() const
Tensor< type, 1 > get_input_biases() const
Tensor< type, 1 > get_parameters() const
void PushText(const char *text, bool cdata=false)
Add a text node.
virtual void CloseElement(bool compactMode=false)
If streaming, close the Element.