9#include "recurrent_layer.h"
35 set(new_inputs_number, new_neurons_number);
53 return input_weights.dimension(0);
80 return neurons_number * (1 + inputs_number + neurons_number);
84Index RecurrentLayer::get_timesteps()
const
107 return input_weights;
122Index RecurrentLayer::get_biases_number()
const
128Index RecurrentLayer::get_input_weights_number()
const
130 return input_weights.size();
134Index RecurrentLayer::get_recurrent_weights_number()
const
148 Tensor<type, 1> parameters(parameters_number);
150 Index current_position = 0;
154 for(Index i = 0; i <
biases.size(); i++) fill_n(parameters.data()+current_position+i, 1,
biases(i));
156 current_position +=
biases.size();
160 for(Index i = 0; i < input_weights.size(); i++) fill_n(parameters.data()+current_position+i, 1, input_weights(i));
162 current_position += input_weights.size();
186 const Index biases_number = get_biases_number();
187 const Index input_weights_number = get_input_weights_number();
189 Tensor<type, 1> new_biases(biases_number);
191 new_biases = parameters.slice(Eigen::array<Eigen::Index, 1>({input_weights_number}), Eigen::array<Eigen::Index, 1>({biases_number}));
193 Eigen::array<Index, 2> two_dim{{1,
biases.dimension(1)}};
195 return new_biases.reshape(two_dim);
208 const Index input_weights_number = get_input_weights_number();
210 const Tensor<type, 1> new_inputs_weights
211 = parameters.slice(Eigen::array<Eigen::Index, 1>({0}), Eigen::array<Eigen::Index, 1>({input_weights_number}));
213 const Eigen::array<Index, 2> two_dim{{inputs_number, neurons_number}};
215 return new_inputs_weights.reshape(two_dim);
229 const Index parameters_size = parameters.size();
231 const Index start_recurrent_weights_number = (parameters_size - recurrent_weights_number);
233 const Tensor<type, 1> new_synaptic_weights
234 = parameters.slice(Eigen::array<Eigen::Index, 1>({start_recurrent_weights_number}), Eigen::array<Eigen::Index, 1>({recurrent_weights_number}));
236 const Eigen::array<Index, 2> two_dim{{neurons_number, neurons_number}};
238 return new_synaptic_weights.reshape(two_dim);
249 case ActivationFunction::Logistic:
return "Logistic";
251 case ActivationFunction::HyperbolicTangent:
return "HyperbolicTangent";
253 case ActivationFunction::Threshold:
return "Threshold";
255 case ActivationFunction::SymmetricThreshold:
return "SymmetricThreshold";
257 case ActivationFunction::Linear:
return "Linear";
259 case ActivationFunction::RectifiedLinear:
return "RectifiedLinear";
261 case ActivationFunction::ScaledExponentialLinear:
return "ScaledExponentialLinear";
263 case ActivationFunction::SoftPlus:
return "SoftPlus";
265 case ActivationFunction::SoftSign:
return "SoftSign";
267 case ActivationFunction::HardSigmoid:
return "HardSigmoid";
269 case ActivationFunction::ExponentialLinear:
return "ExponentialLinear";
301 biases.resize(new_neurons_number);
303 input_weights.resize(new_inputs_number, new_neurons_number);
307 hidden_states.resize(new_neurons_number);
309 hidden_states.setConstant(type(0));
355 input_weights.resize(new_inputs_number, neurons_number);
359void RecurrentLayer::set_input_shape(
const Tensor<Index, 1>& size)
361 const Index new_size = size[0];
375 biases.resize(new_neurons_number);
377 input_weights.resize(inputs_number, new_neurons_number);
383void RecurrentLayer::set_timesteps(
const Index& new_timesteps)
385 timesteps = new_timesteps;
389void RecurrentLayer::set_biases(
const Tensor<type, 1>& new_biases)
395void RecurrentLayer::set_input_weights(
const Tensor<type, 2>& new_input_weights)
397 input_weights = new_input_weights;
401void RecurrentLayer::set_recurrent_weights(
const Tensor<type, 2>& new_recurrent_weights)
416 const Index biases_number = get_biases_number();
417 const Index inputs_weights_number = get_input_weights_number();
418 const Index recurrent_weights_number = get_recurrent_weights_number();
421 new_parameters.data() + index,
422 static_cast<size_t>(biases_number)*
sizeof(type));
424 memcpy(input_weights.data(),
425 new_parameters.data() + index + biases_number,
426 static_cast<size_t>(inputs_weights_number)*
sizeof(type));
429 new_parameters.data() + biases_number + inputs_weights_number + index,
430 static_cast<size_t>(recurrent_weights_number)*
sizeof(type));
449 if(new_activation_function_name ==
"Logistic")
453 else if(new_activation_function_name ==
"HyperbolicTangent")
457 else if(new_activation_function_name ==
"Threshold")
461 else if(new_activation_function_name ==
"SymmetricThreshold")
465 else if(new_activation_function_name ==
"Linear")
469 else if(new_activation_function_name ==
"RectifiedLinear")
473 else if(new_activation_function_name ==
"ScaledExponentialLinear")
477 else if(new_activation_function_name ==
"SoftPlus")
481 else if(new_activation_function_name ==
"SoftSign")
485 else if(new_activation_function_name ==
"HardSigmoid")
489 else if(new_activation_function_name ==
"ExponentialLinear")
495 ostringstream buffer;
497 buffer <<
"OpenNN Exception: neuron class.\n"
498 <<
"void set_activation_function(const string&) method.\n"
499 <<
"Unknown activation function: " << new_activation_function_name <<
".\n";
501 throw logic_error(buffer.str());
522 hidden_states.setConstant(value);
531 biases.setConstant(value);
540 input_weights.setConstant(value);
557 input_weights.setRandom();
566 biases.setConstant(value);
568 input_weights.setConstant(value);
572 hidden_states.setZero();
581 const type minimum = type(-0.2);
582 const type maximum = type(0.2);
586 for(Index i = 0; i <
biases.size(); i++)
588 const type random =
static_cast<type
>(rand()/(RAND_MAX+1.0));
590 biases(i) = minimum + (maximum - minimum)*random;
595 for(Index i = 0; i < input_weights.size(); i++)
597 const type random =
static_cast<type
>(rand()/(RAND_MAX+1.0));
599 input_weights(i) = minimum + (maximum - minimum)*random;
606 const type random =
static_cast<type
>(rand()/(RAND_MAX+1.0));
613void RecurrentLayer::calculate_combinations(
const Tensor<type, 1>& inputs,
614 const Tensor<type, 2>& input_weights,
615 const Tensor<type, 2>& recurrent_weights,
616 const Tensor<type, 1>& biases,
617 Tensor<type, 1>& combinations)
const
619 combinations.device(*thread_pool_device) = inputs.contract(input_weights, AT_B);
621 combinations.device(*thread_pool_device) +=
biases;
623 combinations.device(*thread_pool_device) += hidden_states.contract(
recurrent_weights, AT_B);
627void RecurrentLayer::calculate_activations(
const Tensor<type, 1>& combinations_1d,
628 Tensor<type, 1>& activations_1d)
const
637 case ActivationFunction::Linear: linear(combinations_1d, activations_1d);
return;
639 case ActivationFunction::Logistic: logistic(combinations_1d, activations_1d);
return;
641 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent(combinations_1d, activations_1d);
return;
643 case ActivationFunction::Threshold: threshold(combinations_1d, activations_1d);
return;
645 case ActivationFunction::SymmetricThreshold: symmetric_threshold(combinations_1d, activations_1d);
return;
647 case ActivationFunction::RectifiedLinear: rectified_linear(combinations_1d, activations_1d);
return;
649 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear(combinations_1d, activations_1d);
return;
651 case ActivationFunction::SoftPlus: soft_plus(combinations_1d, activations_1d);
return;
653 case ActivationFunction::SoftSign: soft_sign(combinations_1d, activations_1d);
return;
655 case ActivationFunction::HardSigmoid: hard_sigmoid(combinations_1d, activations_1d);
return;
657 case ActivationFunction::ExponentialLinear: exponential_linear(combinations_1d, activations_1d);
return;
662void RecurrentLayer::calculate_activations_derivatives(
const Tensor<type, 1>& combinations_1d,
663 Tensor<type, 1>& activations_1d,
664 Tensor<type, 1>& activations_derivatives_1d)
const
670 const Index combinations_columns_number = combinations_1d.dimension(1);
672 if(combinations_columns_number != neurons_number)
674 ostringstream buffer;
676 buffer <<
"OpenNN Exception: RecurrentLayer class.\n"
677 <<
"void calculate_activations_derivatives(const Tensor<type, 2>&, Tensor<type, 2>&) const method.\n"
678 <<
"Number of combinations_1d columns (" << combinations_columns_number
679 <<
") must be equal to number of neurons (" << neurons_number <<
").\n";
681 throw logic_error(buffer.str());
688 case ActivationFunction::Linear: linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
690 case ActivationFunction::Logistic: logistic_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
692 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
694 case ActivationFunction::Threshold: threshold_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
696 case ActivationFunction::SymmetricThreshold: symmetric_threshold_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
698 case ActivationFunction::RectifiedLinear: rectified_linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
700 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
702 case ActivationFunction::SoftPlus: soft_plus_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
704 case ActivationFunction::SoftSign: soft_sign_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
706 case ActivationFunction::HardSigmoid: hard_sigmoid_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
708 case ActivationFunction::ExponentialLinear: exponential_linear_derivatives(combinations_1d, activations_1d, activations_derivatives_1d);
return;
712void RecurrentLayer::calculate_activations_derivatives(
const Tensor<type, 2>& combinations_2d,
713 Tensor<type, 2>& activations_2d,
714 Tensor<type, 2>& activations_derivatives_2d)
const
720 const Index combinations_columns_number = combinations_2d.dimension(1);
722 if(combinations_columns_number != neurons_number)
724 ostringstream buffer;
726 buffer <<
"OpenNN Exception: RecurrentLayer class.\n"
727 <<
"void calculate_activations_derivatives(const Tensor<type, 2>&, Tensor<type, 2>&) const method.\n"
728 <<
"Number of combinations_2d columns (" << combinations_columns_number
729 <<
") must be equal to number of neurons (" << neurons_number <<
").\n";
731 throw logic_error(buffer.str());
738 case ActivationFunction::Linear: linear_derivatives(combinations_2d, activations_2d, activations_derivatives_2d);
return;
740 case ActivationFunction::Logistic: logistic_derivatives(combinations_2d, activations_2d, activations_derivatives_2d);
return;
742 case ActivationFunction::HyperbolicTangent: hyperbolic_tangent_derivatives(combinations_2d, activations_2d, activations_derivatives_2d);
return;
744 case ActivationFunction::Threshold: threshold_derivatives(combinations_2d, activations_2d, activations_derivatives_2d);
return;
746 case ActivationFunction::SymmetricThreshold: symmetric_threshold_derivatives(combinations_2d, activations_2d, activations_derivatives_2d);
return;
748 case ActivationFunction::RectifiedLinear: rectified_linear_derivatives(combinations_2d, activations_2d, activations_derivatives_2d);
return;
750 case ActivationFunction::ScaledExponentialLinear: scaled_exponential_linear_derivatives(combinations_2d, activations_2d, activations_derivatives_2d);
return;
752 case ActivationFunction::SoftPlus: soft_plus_derivatives(combinations_2d, activations_2d, activations_derivatives_2d);
return;
754 case ActivationFunction::SoftSign: soft_sign_derivatives(combinations_2d, activations_2d, activations_derivatives_2d);
return;
756 case ActivationFunction::HardSigmoid: hard_sigmoid_derivatives(combinations_2d, activations_2d, activations_derivatives_2d);
return;
758 case ActivationFunction::ExponentialLinear: exponential_linear_derivatives(combinations_2d, activations_2d, activations_derivatives_2d);
return;
763void RecurrentLayer::forward_propagate(
const Tensor<type, 2>& inputs, LayerForwardPropagation* forward_propagation)
769 RecurrentLayerForwardPropagation* recurrent_layer_forward_propagation =
static_cast<RecurrentLayerForwardPropagation*
>(forward_propagation);
771 const Index samples_number = inputs.dimension(0);
774 for(Index i = 0; i < samples_number; i++)
776 if(i%timesteps == 0) hidden_states.setZero();
778 recurrent_layer_forward_propagation->current_inputs = inputs.chip(i, 0);
780 calculate_combinations(recurrent_layer_forward_propagation->current_inputs,
784 recurrent_layer_forward_propagation->current_combinations);
786 calculate_activations_derivatives(recurrent_layer_forward_propagation->current_combinations,
788 recurrent_layer_forward_propagation->current_activations_derivatives);
790 for(Index j = 0; j < neurons_number; j++)
792 recurrent_layer_forward_propagation->combinations(i,j) = recurrent_layer_forward_propagation->current_combinations(j);
793 recurrent_layer_forward_propagation->activations(i,j) = hidden_states(j);
794 recurrent_layer_forward_propagation->activations_derivatives(i,j) = recurrent_layer_forward_propagation->current_activations_derivatives(j);
800void RecurrentLayer::forward_propagate(
const Tensor<type, 2>&inputs,
801 Tensor<type, 1> parameters,
802 LayerForwardPropagation* forward_propagation)
804 RecurrentLayerForwardPropagation* recurrent_layer_forward_propagation
805 =
static_cast<RecurrentLayerForwardPropagation*
>(forward_propagation);
810 const TensorMap<Tensor<type, 1>>
biases(parameters.data(), neurons_number);
811 const TensorMap<Tensor<type, 2>> input_weights(parameters.data()+neurons_number, inputs_number, neurons_number);
812 const TensorMap<Tensor<type, 2>>
recurrent_weights(parameters.data()+neurons_number+inputs_number*neurons_number, neurons_number, neurons_number);
814 const Index samples_number = inputs.dimension(0);
816 for(Index i = 0; i < samples_number; i++)
818 if(i%timesteps == 0) hidden_states.setZero();
820 recurrent_layer_forward_propagation->current_inputs = inputs.chip(i, 0);
822 calculate_combinations(recurrent_layer_forward_propagation->current_inputs,
826 recurrent_layer_forward_propagation->current_combinations);
828 calculate_activations_derivatives(recurrent_layer_forward_propagation->current_combinations,
830 recurrent_layer_forward_propagation->current_activations_derivatives);
832 for(Index j = 0; j < neurons_number; j++)
834 recurrent_layer_forward_propagation->combinations(i,j)
835 = recurrent_layer_forward_propagation->current_combinations(j);
837 recurrent_layer_forward_propagation->activations(i,j) = hidden_states(j);
839 recurrent_layer_forward_propagation->activations_derivatives(i,j)
840 = recurrent_layer_forward_propagation->current_activations_derivatives(j);
846Tensor<type, 2> RecurrentLayer::calculate_outputs(
const Tensor<type, 2>& inputs)
852 const Index inputs_columns_number = inputs.dimension(1);
854 if(inputs_columns_number != inputs_number)
856 ostringstream buffer;
858 buffer <<
"OpenNN Exception: RecurrentLayer class.\n"
859 <<
"Tensor<type, 2> calculate_outputs(const Tensor<type, 2>&) const method.\n"
860 <<
"Number of columns("<<inputs_columns_number<<
") of inputs matrix must be equal to number of inputs("<<inputs_number<<
").\n";
862 throw logic_error(buffer.str());
866 const Index samples_number = inputs.dimension(0);
870 Tensor<type, 1> current_inputs(neurons_number);
872 Tensor<type, 1> current_outputs(neurons_number);
874 Tensor<type, 2> outputs(samples_number, neurons_number);
876 for(Index i = 0; i < samples_number; i++)
878 if(i%timesteps == 0) hidden_states.setZero();
880 current_inputs = inputs.chip(i, 0);
884 calculate_activations(current_outputs, hidden_states);
886 for(Index j = 0; j < neurons_number; j++)
887 outputs(i,j) = hidden_states(j);
894void RecurrentLayer::calculate_hidden_delta(LayerForwardPropagation* next_layer_forward_propagation,
895 LayerBackPropagation* next_layer_back_propagation,
896 LayerBackPropagation* current_layer_back_propagation)
const
898 RecurrentLayerBackPropagation* recurrent_layer_back_propagation =
899 static_cast<RecurrentLayerBackPropagation*
>(current_layer_back_propagation);
901 switch(next_layer_back_propagation->layer_pointer->get_type())
903 case Type::Perceptron:
905 PerceptronLayerForwardPropagation* perceptron_layer_forward_propagation =
906 static_cast<PerceptronLayerForwardPropagation*
>(next_layer_forward_propagation);
908 PerceptronLayerBackPropagation* perceptron_layer_back_propagation =
909 static_cast<PerceptronLayerBackPropagation*
>(next_layer_back_propagation);
911 calculate_hidden_delta_perceptron(perceptron_layer_forward_propagation,
912 perceptron_layer_back_propagation,
913 recurrent_layer_back_propagation);
917 case Type::Probabilistic:
919 ProbabilisticLayerForwardPropagation* probabilistic_layer_forward_propagation =
920 static_cast<ProbabilisticLayerForwardPropagation*
>(next_layer_forward_propagation);
922 ProbabilisticLayerBackPropagation* probabilistic_layer_back_propagation =
923 static_cast<ProbabilisticLayerBackPropagation*
>(next_layer_back_propagation);
925 calculate_hidden_delta_probabilistic(probabilistic_layer_forward_propagation,
926 probabilistic_layer_back_propagation,
927 recurrent_layer_back_propagation);
936void RecurrentLayer::calculate_hidden_delta_perceptron(PerceptronLayerForwardPropagation* next_forward_propagation,
937 PerceptronLayerBackPropagation* next_back_propagation,
938 RecurrentLayerBackPropagation* back_propagation)
const
940 const Tensor<type, 2>& next_synaptic_weights
941 =
static_cast<PerceptronLayer*
>(next_back_propagation->layer_pointer)->get_synaptic_weights();
943 back_propagation->delta.device(*thread_pool_device) =
944 (next_back_propagation->delta*next_forward_propagation->activations_derivatives).contract(next_synaptic_weights, A_BT);
948void RecurrentLayer::calculate_hidden_delta_probabilistic(ProbabilisticLayerForwardPropagation* next_forward_propagation,
949 ProbabilisticLayerBackPropagation* next_back_propagation,
950 RecurrentLayerBackPropagation* back_propagation)
const
952 const ProbabilisticLayer* probabilistic_layer_pointer =
static_cast<ProbabilisticLayer*
>(next_back_propagation->layer_pointer);
954 const Tensor<type, 2>& next_synaptic_weights = probabilistic_layer_pointer->get_synaptic_weights();
956 if(probabilistic_layer_pointer->get_neurons_number() == 1)
958 back_propagation->delta.device(*thread_pool_device) =
959 (next_back_propagation->delta*next_forward_propagation->activations_derivatives).contract(next_synaptic_weights, A_BT);
963 const Index samples_number = next_back_propagation->delta.dimension(0);
964 const Index outputs_number = next_back_propagation->delta.dimension(1);
965 const Index next_layer_neurons_number = probabilistic_layer_pointer->get_neurons_number();
967 if(outputs_number != next_layer_neurons_number)
969 ostringstream buffer;
971 buffer <<
"OpenNN Exception: ProbabilisticLayer class.\n"
972 <<
"void calculate_hidden_delta_probabilistic(ProbabilisticLayerForwardPropagation*,ProbabilisticLayerBackPropagation*,RecurrentLayerBackPropagation*) const.\n"
973 <<
"Number of columns in delta (" << outputs_number <<
") must be equal to number of neurons in probabilistic layer (" << next_layer_neurons_number <<
").\n";
975 throw logic_error(buffer.str());
978 if(next_forward_propagation->activations_derivatives.dimension(1) != next_layer_neurons_number)
980 ostringstream buffer;
982 buffer <<
"OpenNN Exception: ProbabilisticLayer class.\n"
983 <<
"void calculate_hidden_delta_probabilistic(ProbabilisticLayerForwardPropagation*,ProbabilisticLayerBackPropagation*,RecurrentLayerBackPropagation*) const.\n"
984 <<
"Dimension 1 of activations derivatives (" << outputs_number <<
") must be equal to number of neurons in probabilistic layer (" << next_layer_neurons_number <<
").\n";
986 throw logic_error(buffer.str());
989 if(next_forward_propagation->activations_derivatives.dimension(2) != next_layer_neurons_number)
991 ostringstream buffer;
993 buffer <<
"OpenNN Exception: ProbabilisticLayer class.\n"
994 <<
"void calculate_hidden_delta_probabilistic(ProbabilisticLayerForwardPropagation*,ProbabilisticLayerBackPropagation*,RecurrentLayerBackPropagation*) const.\n"
995 <<
"Dimension 2 of activations derivatives (" << outputs_number <<
") must be equal to number of neurons in probabilistic layer (" << next_layer_neurons_number <<
").\n";
997 throw logic_error(buffer.str());
1000 const Index step = next_layer_neurons_number*next_layer_neurons_number;
1002 next_back_propagation->biases_derivatives.setZero();
1004 for(Index i = 0; i < samples_number; i++)
1006 next_back_propagation->delta_row = next_back_propagation->delta.chip(i,0);
1008 TensorMap< Tensor<type, 2> > activations_derivatives_matrix(next_forward_propagation->activations_derivatives.data() + i*step,
1009 next_layer_neurons_number, next_layer_neurons_number);
1011 next_back_propagation->error_combinations_derivatives.chip(i,0) =
1012 next_back_propagation->delta_row.contract(activations_derivatives_matrix, AT_B);
1015 back_propagation->delta.device(*thread_pool_device) =
1016 (next_back_propagation->error_combinations_derivatives).contract(next_synaptic_weights, A_BT);
1021void RecurrentLayer::insert_gradient(LayerBackPropagation* back_propagation,
const Index& index, Tensor<type, 1>& gradient)
const
1026 RecurrentLayerBackPropagation* recurrent_layer_back_propagation
1027 =
static_cast<RecurrentLayerBackPropagation*
>(back_propagation);
1031 memcpy(gradient.data() + index,
1032 recurrent_layer_back_propagation->biases_derivatives.data(),
1033 static_cast<size_t>(neurons_number)*
sizeof(type));
1037 memcpy(gradient.data() + index + neurons_number,
1038 recurrent_layer_back_propagation->input_weights_derivatives.data(),
1039 static_cast<size_t>(inputs_number*neurons_number)*
sizeof(type));
1043 memcpy(gradient.data() + index + neurons_number + inputs_number*neurons_number,
1044 recurrent_layer_back_propagation->recurrent_weights_derivatives.data(),
1045 static_cast<size_t>(neurons_number*neurons_number)*
sizeof(type));
1049void RecurrentLayer::calculate_error_gradient(
const Tensor<type, 2>& inputs,
1050 LayerForwardPropagation* forward_propagation,
1051 LayerBackPropagation* back_propagation)
const
1053 RecurrentLayerForwardPropagation* recurrent_layer_forward_propagation =
1054 static_cast<RecurrentLayerForwardPropagation*
>(forward_propagation);
1056 RecurrentLayerBackPropagation* recurrent_layer_back_propagation =
1057 static_cast<RecurrentLayerBackPropagation*
>(back_propagation);
1060 calculate_biases_error_gradient(inputs, recurrent_layer_forward_propagation, recurrent_layer_back_propagation);
1062 calculate_input_weights_error_gradient(inputs, recurrent_layer_forward_propagation, recurrent_layer_back_propagation);
1064 calculate_recurrent_weights_error_gradient(inputs, recurrent_layer_forward_propagation, recurrent_layer_back_propagation);
1069void RecurrentLayer::calculate_biases_error_gradient(
const Tensor<type, 2>& inputs,
1070 RecurrentLayerForwardPropagation* forward_propagation,
1071 RecurrentLayerBackPropagation* back_propagation)
const
1075 const Index samples_number = inputs.dimension(0);
1077 const Index parameters_number = neurons_number;
1079 back_propagation->combinations_biases_derivatives.setZero();
1081 back_propagation->biases_derivatives.setZero();
1083 for(Index sample = 0; sample < samples_number; sample++)
1085 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample,0);
1087 if(sample%timesteps == 0)
1089 back_propagation->combinations_biases_derivatives.setZero();
1093 multiply_rows(back_propagation->combinations_biases_derivatives, forward_propagation->current_activations_derivatives);
1095 back_propagation->combinations_biases_derivatives
1096 = back_propagation->combinations_biases_derivatives.contract(
recurrent_weights, A_B).eval();
1099 forward_propagation->current_activations_derivatives
1100 = forward_propagation->activations_derivatives.chip(sample, 0);
1102 for(Index i = 0; i < parameters_number; i++) back_propagation->combinations_biases_derivatives(i,i) +=
static_cast<type
>(1);
1104 back_propagation->biases_derivatives += back_propagation->combinations_biases_derivatives
1105 .contract(back_propagation->current_layer_deltas*forward_propagation->current_activations_derivatives, A_B);
1110void RecurrentLayer::calculate_input_weights_error_gradient(
const Tensor<type, 2>& inputs,
1111 RecurrentLayerForwardPropagation* forward_propagation,
1112 RecurrentLayerBackPropagation* back_propagation)
const
1116 const Index samples_number = inputs.dimension(0);
1119 const Index parameters_number = inputs_number*neurons_number;
1121 Index column_index = 0;
1122 Index input_index = 0;
1124 back_propagation->combinations_weights_derivatives.setZero();
1125 back_propagation->input_weights_derivatives.setZero();
1127 for(Index sample = 0; sample < samples_number; sample++)
1129 forward_propagation->current_inputs = inputs.chip(sample, 0);
1131 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample, 0);
1133 if(sample%timesteps == 0)
1135 back_propagation->combinations_weights_derivatives.setZero();
1139 multiply_rows(back_propagation->combinations_weights_derivatives, forward_propagation->current_activations_derivatives);
1141 back_propagation->combinations_weights_derivatives
1142 = back_propagation->combinations_weights_derivatives.contract(
recurrent_weights, A_B).eval();
1145 forward_propagation->current_activations_derivatives
1146 = forward_propagation->activations_derivatives.chip(sample, 0);
1151 for(Index i = 0; i < parameters_number; i++)
1153 back_propagation->combinations_weights_derivatives(i, column_index) += forward_propagation->current_inputs(input_index);
1157 if(input_index == inputs_number)
1164 back_propagation->input_weights_derivatives += back_propagation->combinations_weights_derivatives
1165 .contract(back_propagation->current_layer_deltas*forward_propagation->current_activations_derivatives, A_B);
1170void RecurrentLayer::calculate_recurrent_weights_error_gradient(
const Tensor<type, 2>& inputs,
1171 RecurrentLayerForwardPropagation* forward_propagation,
1172 RecurrentLayerBackPropagation* back_propagation)
const
1174 const Index samples_number = inputs.dimension(0);
1177 const Index parameters_number = neurons_number*neurons_number;
1181 back_propagation->combinations_recurrent_weights_derivatives.setZero();
1183 back_propagation->recurrent_weights_derivatives.setZero();
1185 Index column_index = 0;
1186 Index activation_index = 0;
1188 for(Index sample = 0; sample < samples_number; sample++)
1190 back_propagation->current_layer_deltas = back_propagation->delta.chip(sample,0);
1192 if(sample%timesteps == 0)
1194 back_propagation->combinations_recurrent_weights_derivatives.setZero();
1198 forward_propagation->previous_activations = forward_propagation->activations.chip(sample-1, 0);
1200 multiply_rows(back_propagation->combinations_recurrent_weights_derivatives, forward_propagation->current_activations_derivatives);
1202 back_propagation->combinations_recurrent_weights_derivatives
1203 = back_propagation->combinations_recurrent_weights_derivatives.contract(
recurrent_weights,A_B).eval();
1206 activation_index = 0;
1208 for(Index i = 0; i < parameters_number; i++)
1210 back_propagation->combinations_recurrent_weights_derivatives(i, column_index)
1211 += forward_propagation->previous_activations(activation_index);
1215 if(activation_index == neurons_number)
1217 activation_index = 0;
1223 forward_propagation->current_activations_derivatives = forward_propagation->activations_derivatives.chip(sample, 0);
1225 back_propagation->recurrent_weights_derivatives += back_propagation->combinations_recurrent_weights_derivatives
1226 .contract(back_propagation->current_layer_deltas*forward_propagation->current_activations_derivatives, A_B);
1238 const Tensor<string, 1>& outputs_names)
const
1245 const Index inputs_name_size = inputs_names.size();
1247 if(inputs_name_size != inputs_number)
1249 ostringstream buffer;
1251 buffer <<
"OpenNN Exception: RecurrentLayer class.\n"
1252 <<
"string write_expression(const Tensor<string, 1>&, const Tensor<string, 1>&) const method.\n"
1253 <<
"Size of inputs name must be equal to number of layer inputs.\n";
1255 throw logic_error(buffer.str());
1258 const Index outputs_name_size = outputs_names.size();
1260 if(outputs_name_size != neurons_number)
1262 ostringstream buffer;
1264 buffer <<
"OpenNN Exception: RecurrentLayer class.\n"
1265 <<
"string write_expression(const Tensor<string, 1>&, const Tensor<string, 1>&) const method.\n"
1266 <<
"Size of outputs name must be equal to number of neurons.\n";
1268 throw logic_error(buffer.str());
1273 ostringstream buffer;
1275 for(Index j = 0; j < outputs_names.size(); j++)
1280 buffer << outputs_names(j) <<
" = " << write_activation_function_expression() <<
"( " <<
biases(j) <<
" +";
1282 for(Index i = 0; i < inputs_names.size() - 1; i++)
1284 buffer <<
" (" << inputs_names[i] <<
"*" << synaptic_weights_column(i) <<
") +";
1287 buffer <<
" (" << inputs_names[inputs_names.size() - 1] <<
"*" << synaptic_weights_column[inputs_names.size() - 1] <<
") );\n";
1290 return buffer.str();
1294string RecurrentLayer::write_activation_function_expression()
const
1298 case ActivationFunction::HyperbolicTangent:
return "tanh";
1300 case ActivationFunction::Linear:
return string();
1309 ostringstream buffer;
1313 const tinyxml2::XMLElement* perceptron_layer_element = document.FirstChildElement(
"RecurrentLayer");
1315 if(!perceptron_layer_element)
1317 buffer <<
"OpenNN Exception: RecurrentLayer class.\n"
1318 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
1319 <<
"RecurrentLayer element is nullptr.\n";
1321 throw logic_error(buffer.str());
1326 const tinyxml2::XMLElement* inputs_number_element = perceptron_layer_element->FirstChildElement(
"InputsNumber");
1328 if(!inputs_number_element)
1330 buffer <<
"OpenNN Exception: RecurrentLayer class.\n"
1331 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
1332 <<
"InputsNumber element is nullptr.\n";
1334 throw logic_error(buffer.str());
1337 if(inputs_number_element->GetText())
1344 const tinyxml2::XMLElement* neurons_number_element = perceptron_layer_element->FirstChildElement(
"NeuronsNumber");
1346 if(!neurons_number_element)
1348 buffer <<
"OpenNN Exception: RecurrentLayer class.\n"
1349 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
1350 <<
"NeuronsNumber element is nullptr.\n";
1352 throw logic_error(buffer.str());
1355 if(neurons_number_element->GetText())
1362 const tinyxml2::XMLElement* activation_function_element = perceptron_layer_element->FirstChildElement(
"ActivationFunction");
1364 if(!activation_function_element)
1366 buffer <<
"OpenNN Exception: RecurrentLayer class.\n"
1367 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
1368 <<
"ActivationFunction element is nullptr.\n";
1370 throw logic_error(buffer.str());
1373 if(activation_function_element->GetText())
1380 const tinyxml2::XMLElement* parameters_element = perceptron_layer_element->FirstChildElement(
"Parameters");
1382 if(!parameters_element)
1384 buffer <<
"OpenNN Exception: RecurrentLayer class.\n"
1385 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
1386 <<
"Parameters element is nullptr.\n";
1388 throw logic_error(buffer.str());
1391 if(parameters_element->GetText())
1393 const string parameters_string = parameters_element->GetText();
1403 ostringstream buffer;
1407 file_stream.OpenElement(
"RecurrentLayer");
1411 file_stream.OpenElement(
"InputsNumber");
1416 file_stream.
PushText(buffer.str().c_str());
1422 file_stream.OpenElement(
"NeuronsNumber");
1427 file_stream.
PushText(buffer.str().c_str());
1433 file_stream.OpenElement(
"ActivationFunction");
1441 file_stream.OpenElement(
"Parameters");
1446 const Index parameters_size = parameters.size();
1448 for(Index i = 0; i < parameters_size; i++)
1450 buffer << parameters(i);
1452 if(i != (parameters_size-1)) buffer <<
" ";
1455 file_stream.
PushText(buffer.str().c_str());
1465string RecurrentLayer::write_combinations_python()
const
1467 ostringstream buffer;
1472 buffer <<
"\t\tcombinations = [None] * "<<neurons_number<<
"\n" << endl;
1474 for(Index i = 0; i < neurons_number; i++)
1476 buffer <<
"\t\tcombinations[" << i <<
"] = " <<
biases(i);
1478 for(Index j = 0; j < neurons_number; j++)
1480 buffer <<
" +" <<
recurrent_weights(j, i) <<
"*self.hidden_states[" << j <<
"]";
1483 for(Index j = 0; j < inputs_number; j++)
1485 buffer <<
" +" << input_weights(j, i) <<
"*inputs[" << j <<
"]";
1488 buffer <<
" " << endl;
1491 buffer <<
"\t\t" << endl;
1493 return buffer.str();
1499 ostringstream buffer;
1503 buffer <<
"\t\tactivations = [None] * "<<neurons_number<<
"\n" << endl;
1505 for(Index i = 0; i < neurons_number; i++)
1507 buffer <<
"\t\tactivations[" << i <<
"] = ";
1512 case ActivationFunction::HyperbolicTangent:
1513 buffer <<
"np.tanh(combinations[" << i <<
"])\n";
1516 case ActivationFunction::RectifiedLinear:
1517 buffer <<
"np.maximum(0.0, combinations[" << i <<
"])\n";
1520 case ActivationFunction::Logistic:
1521 buffer <<
"1.0/(1.0 + np.exp(-combinations[" << i <<
"]))\n";
1524 case ActivationFunction::Threshold:
1525 buffer <<
"1.0 if combinations[" << i <<
"] >= 0.0 else 0.0\n";
1528 case ActivationFunction::SymmetricThreshold:
1529 buffer <<
"1.0 if combinations[" << i <<
"] >= 0.0 else -1.0\n";
1532 case ActivationFunction::Linear:
1533 buffer <<
"combinations[" << i <<
"]\n";
1536 case ActivationFunction::ScaledExponentialLinear:
1537 buffer <<
"1.0507*1.67326*(np.exp(combinations[" << i <<
"]) - 1.0) if combinations[" << i <<
"] < 0.0 else 1.0507*combinations[" << i <<
"]\n";
1540 case ActivationFunction::SoftPlus:
1541 buffer <<
"np.log(1.0 + np.exp(combinations[" << i <<
"]))\n";
1544 case ActivationFunction::SoftSign:
1545 buffer <<
"combinations[" << i <<
"]/(1.0 - combinations[" << i <<
"] ) if combinations[" << i <<
"] < 0.0 else combinations[" << i <<
"]/(1.0 + combinations[" << i <<
"] )\n";
1548 case ActivationFunction::ExponentialLinear:
1549 buffer <<
"1.0*(np.exp(combinations[" << i <<
"]) - 1.0) if combinations[" << i <<
"] < 0.0 else combinations[" << i <<
"]\n";
1552 case ActivationFunction::HardSigmoid:
1558 buffer <<
"\t\tself.hidden_states = activations" << endl;
1560 return buffer.str();
1564string RecurrentLayer::write_expression_python()
const
1566 ostringstream buffer;
1568 buffer <<
"\tdef " <<
layer_name <<
"(self,inputs):\n" << endl;
1570 buffer << write_combinations_python();
1574 buffer <<
"\n\t\treturn activations;\n" << endl;
1576 return buffer.str();
This abstract class represents the concept of layer of neurons in OpenNN.
string layer_name
Layer name.
Type layer_type
Layer type.
const Tensor< type, 2 > & get_recurrent_weights() const
string write_activation_function() const
void set_parameters_constant(const type &)
string write_activations_python() const
void set_input_weights_constant(const type &)
const bool & get_display() const
Index get_inputs_number() const
Returns the number of inputs to the layer.
void set_biases_constant(const type &)
string write_expression(const Tensor< string, 1 > &, const Tensor< string, 1 > &) const
virtual ~RecurrentLayer()
Tensor< type, 2 > recurrent_weights
This matrix containing conection strengths from a recurrent layer inputs to its neurons.
bool display
Display messages to screen.
ActivationFunction
Enumeration of the available activation functions for the recurrent layer.
void set_activation_function(const ActivationFunction &)
void set_recurrent_weights_constant(const type &)
const Tensor< type, 2 > & get_input_weights() const
void set_inputs_number(const Index &)
Index get_neurons_number() const
Returns the size of the neurons vector.
void set_parameters_random()
const RecurrentLayer::ActivationFunction & get_activation_function() const
Returns the activation function of the layer.
void set_parameters(const Tensor< type, 1 > &, const Index &=0)
void set_display(const bool &)
void set_hidden_states_constant(const type &)
void initialize_input_weights_Glorot(const type &, const type &)
const Tensor< type, 1 > & get_hidden_states() const
Returns the hidden states of the layer.
Index get_parameters_number() const
Returns the number of parameters (biases and weights) of the layer.
ActivationFunction activation_function
Activation function variable.
Tensor< type, 1 > get_biases() const
void set_neurons_number(const Index &)
Tensor< type, 1 > get_parameters() const
void PushText(const char *text, bool cdata=false)
Add a text node.
virtual void CloseElement(bool compactMode=false)
If streaming, close the Element.