statistics.cpp
1// OpenNN: Open Neural Networks Library
2// www.opennn.net
3//
4// S T A T I S T I C S S O U R C E
5//
6// Artificial Intelligence Techniques, SL
7// artelnics@artelnics.com
8
9#include "statistics.h"
10
11namespace OpenNN
12{
13
15
17{
18 name = "Descriptives";
19 minimum = type(-1.0);
20 maximum = type(1);
21 mean = type(0);
22 standard_deviation = type(1);
23}
24
25
27
28Descriptives::Descriptives(const type& new_minimum, const type& new_maximum,
29 const type& new_mean, const type& new_standard_deviation)
30{
31 name = "Descriptives";
32 minimum = new_minimum;
33 maximum = new_maximum;
34 mean = new_mean;
35 standard_deviation = new_standard_deviation;
36}
37
38
40
42{}
43
44
45void Descriptives::set(const type& new_minimum, const type& new_maximum,
46 const type& new_mean, const type& new_standard_deviation)
47{
48 minimum = new_minimum;
49 maximum = new_maximum;
50 mean = new_mean;
51 standard_deviation = new_standard_deviation;
52}
53
54
57
58void Descriptives::set_minimum(const type& new_minimum)
59{
60 minimum = new_minimum;
61}
62
63
66
67void Descriptives::set_maximum(const type& new_maximum)
68{
69 maximum = new_maximum;
70}
71
72
75
76void Descriptives::set_mean(const type& new_mean)
77{
78 mean = new_mean;
79}
80
81
84
85void Descriptives::set_standard_deviation(const type& new_standard_deviation)
86{
87 standard_deviation = new_standard_deviation;
88}
89
90
95
96Tensor<type, 1> Descriptives::to_vector() const
97{
98 Tensor<type, 1> statistics_vector(4);
99 statistics_vector[0] = minimum;
100 statistics_vector[1] = maximum;
101 statistics_vector[2] = mean;
102 statistics_vector[3] = standard_deviation;
103
104 return statistics_vector;
105}
106
107
110
112{
113 if(abs(minimum + type(1)) < type(NUMERIC_LIMITS_MIN) && abs(maximum - type(1)) < type(NUMERIC_LIMITS_MIN))
114 {
115 return true;
116 }
117
118 return false;
119}
120
121
124
126{
127 if(abs(mean) < type(NUMERIC_LIMITS_MIN) && abs(standard_deviation - type(1)) < type(NUMERIC_LIMITS_MIN))
128 {
129 return true;
130 }
131 else
132 {
133 return false;
134 }
135}
136
137
139
140void Descriptives::print(const string& title) const
141{
142 cout << title << endl
143 << "Minimum: " << minimum << endl
144 << "Maximum: " << maximum << endl
145 << "Mean: " << mean << endl
146 << "Standard deviation: " << standard_deviation << endl;
147}
148
149BoxPlot::BoxPlot(const type& new_minimum,
150 const type& new_first_cuartile,
151 const type& new_median,
152 const type& new_third_quartile,
153 const type& new_maximum)
154{
155 minimum = new_minimum;
156 first_quartile = new_first_cuartile;
157 median = new_median;
158 third_quartile = new_third_quartile;
159 maximum = new_maximum;
160}
161
162
163void BoxPlot::set(const type& new_minimum,
164 const type& new_first_cuartile,
165 const type& new_median,
166 const type& new_third_quartile,
167 const type& new_maximum)
168{
169 minimum = new_minimum;
170 first_quartile = new_first_cuartile;
171 median = new_median;
172 third_quartile = new_third_quartile;
173 maximum = new_maximum;
174}
175
176
180
181void Descriptives::save(const string &file_name) const
182{
183 ofstream file(file_name.c_str());
184
185 if(!file.is_open())
186 {
187 ostringstream buffer;
188
189 buffer << "OpenNN Exception: Statistics Class.\n"
190 << "void save(const string&) const method.\n"
191 << "Cannot open descriptives data file.\n";
192
193 throw logic_error(buffer.str());
194 }
195
196 // Write file
197
198 file << "Minimum: " << minimum << endl
199 << "Maximum: " << maximum << endl
200 << "Mean: " << mean << endl
201 << "Standard deviation: " << standard_deviation << endl;
202
203 // Close file
204
205 file.close();
206}
207
208
210
211
213
215
216
219
220Histogram::Histogram(const Index& bins_number)
221{
222 centers.resize(bins_number);
223 frequencies.resize(bins_number);
224}
225
226
230
231Histogram::Histogram(const Tensor<type, 1>&new_centers,
232 const Tensor<Index, 1>&new_frequencies)
233{
234 centers = new_centers;
235 frequencies = new_frequencies;
236}
237
238
242
243Histogram::Histogram(const Tensor<type, 1>& data,
244 const Index& number_of_bins)
245{
246 const type data_maximum = maximum(data);
247 const type data_minimum = minimum(data);
248 const type step = (data_maximum - data_minimum) / type(number_of_bins);
249
250 Tensor<type, 1> new_centers(number_of_bins);
251
252 for(Index i = 0; i < number_of_bins; i++)
253 {
254 new_centers(i) = data_minimum + (type(0.5) * step) + (step * type(i));
255 }
256
257 Tensor<Index, 1> new_frequencies(number_of_bins);
258 new_frequencies.setZero();
259
260 type value;
261 Index corresponding_bin;
262
263 for(Index i = 0; i < data.dimension(0); i++)
264 {
265 value = data(i);
266 corresponding_bin = int((value - data_minimum) / step);
267
268 new_frequencies(corresponding_bin)++;
269 }
270
271 centers = new_centers;
272 frequencies = new_frequencies;
273}
274
275
278
279Histogram::Histogram(const Tensor<type, 1>& probability_data)
280{
281 const size_t number_of_bins = 10;
282 type data_maximum = maximum(probability_data);
283 const type data_minimum = type(0);
284
285 if(data_maximum > type(1))
286 {
287 data_maximum = type(100.0);
288 }
289 else
290 {
291 data_maximum = type(1);
292 }
293
294 const type step = (data_maximum - data_minimum) / type(number_of_bins);
295
296 Tensor<type, 1> new_centers(number_of_bins);
297
298 for(size_t i = 0; i < number_of_bins; i++)
299 {
300 new_centers(i) = data_minimum + (type(0.5) * step) + (step * type(i));
301 }
302
303 Tensor<Index, 1> new_frequencies(number_of_bins);
304 new_frequencies.setZero();
305
306 type value;
307 Index corresponding_bin;
308
309 for(Index i = 0; i < probability_data.dimension(0); i++)
310 {
311 value = probability_data(i);
312 corresponding_bin = int((value - data_minimum) / step);
313
314 new_frequencies(corresponding_bin)++;
315 }
316
317 centers = new_centers;
318 frequencies = new_frequencies;
319}
320
321
323
325{
326 return centers.size();
327}
328
329
331
333{
334 const auto size = frequencies.dimension(0);
335
336 Index count = 0;
337
338 for(Index i = 0; i < size; i++)
339 {
340 if(frequencies(i) == 0) count++;
341 }
342
343 return count;
344}
345
346
348
350{
351 return minimum(frequencies);
352}
353
354
356
358{
359 return maximum(frequencies);
360}
361
362
364
366{
367 const Tensor<Index, 0> max_element = frequencies.maximum();
368
369 for(Index i = 0; i < frequencies.size(); i++)
370 {
371 if(max_element(0) == frequencies(i)) return i;
372 }
373
374 return 0;
375}
376
377
379
381{
382 const Index minimum_frequency = calculate_minimum_frequency();
383
384 Index minimal_indices_size = 0;
385
386 if(frequencies.size() == 0)
387 {
388 Tensor<type, 1> nan(1);
389 nan.setValues({static_cast<type>(NAN)});
390 return nan;
391 }
392
393 for(Index i = 0; i < frequencies.size(); i++)
394 {
395 if(frequencies(i) == minimum_frequency)
396 {
397 minimal_indices_size++;
398 }
399 }
400
401 Index index = 0;
402
403 Tensor<type, 1> minimal_centers(minimal_indices_size);
404
405 for(Index i = 0; i < frequencies.size(); i++)
406 {
407 if(frequencies(i) == minimum_frequency)
408 {
409 minimal_centers(index) = static_cast<type>(centers(i));
410
411 index++;
412 }
413 }
414
415 return minimal_centers;
416}
417
418
420
422{
423 const Index maximum_frequency = calculate_maximum_frequency();
424
425 Index maximal_indices_size = 0;
426
427 if(frequencies.size() == 0)
428 {
429 Tensor<type, 1> nan(1);
430 nan.setValues({static_cast<type>(NAN)});
431 return nan;
432 }
433
434 for(Index i = 0; i < frequencies.size(); i++)
435 {
436 if(frequencies(i) == maximum_frequency)
437 {
438 maximal_indices_size++;
439 }
440 }
441
442 Index index = 0;
443
444 Tensor<type, 1> maximal_centers(maximal_indices_size);
445
446 for(Index i = 0; i < frequencies.size(); i++)
447 {
448 if(maximum_frequency == frequencies(i))
449 {
450 maximal_centers(index) = static_cast<type>(centers(i));
451
452 index++;
453 }
454 }
455
456 return maximal_centers;
457}
458
459
462
463Index Histogram::calculate_bin(const type& value) const
464{
465 const Index bins_number = get_bins_number();
466
467 if(bins_number == 0) return 0;
468
469 const type minimum_center = centers[0];
470 const type maximum_center = centers[bins_number - 1];
471
472 const type length = static_cast<type>(maximum_center - minimum_center)/static_cast<type>(bins_number - 1.0);
473
474 type minimum_value = centers[0] - length / type(2);
475 type maximum_value = minimum_value + length;
476
477 if(value < maximum_value) return 0;
478
479 for(Index j = 1; j < bins_number - 1; j++)
480 {
481 minimum_value = minimum_value + length;
482 maximum_value = maximum_value + length;
483
484 if(value >= minimum_value && value < maximum_value) return j;
485 }
486
487 if(value >= maximum_value)
488 {
489 return bins_number - 1;
490 }
491 else
492 {
493 ostringstream buffer;
494
495 buffer << "OpenNN Exception: Statistics Class.\n"
496 << "Index Histogram::calculate_bin(const type&) const.\n"
497 << "Unknown return value.\n";
498
499 throw logic_error(buffer.str());
500 }
501}
502
503
506
507Index Histogram::calculate_frequency(const type&value) const
508{
509 const Index bins_number = get_bins_number();
510
511 if(bins_number == 0) return 0;
512
513 const Index bin_number = calculate_bin(value);
514
515 const Index frequency = frequencies[bin_number];
516
517 return frequency;
518}
519
520
521void Histogram::save(const string& histogram_file_name) const
522{
523 const Index number_of_bins = centers.dimension(0);
524 ofstream histogram_file(histogram_file_name);
525
526 histogram_file << "centers,frequencies" << endl;
527 for(Index i = 0; i < number_of_bins; i++)
528 {
529 histogram_file << centers(i) << ",";
530 histogram_file << frequencies(i) << endl;
531 }
532
533 histogram_file.close();
534}
535
536
539
540type minimum(const Tensor<type, 1>& vector)
541{
542 const Index size = vector.dimension(0);
543
544 if(size == 0) return type(NAN);
545
546 type minimum = numeric_limits<type>::max();
547
548 for(Index i = 0; i < size; i++)
549 {
550 if(vector(i) < minimum && !isnan(vector(i)))
551 {
552 minimum = vector(i);
553 }
554 }
555
556 return minimum;
557}
558
559
562
563Index minimum(const Tensor<Index, 1>& vector)
564{
565 const Index size = vector.size();
566
567 if(size == 0) return NAN;
568
569 Index minimum = numeric_limits<Index>::max();
570
571 for(Index i = 0; i < size; i++)
572 {
573 if(vector(i) < minimum)
574 {
575 minimum = vector(i);
576 }
577 }
578
579 return minimum;
580}
581
582
586
587type minimum(const Tensor<type, 1>& vector, const Tensor<Index, 1>& indices)
588{
589 const Index size = indices.dimension(0);
590
591 if(size == 0) return type(NAN);
592
593 type minimum = numeric_limits<type>::max();
594
595 Index index;
596
597 for(Index i = 0; i < size; i++)
598 {
599 index = indices(i);
600
601 if(vector(index) < minimum && !isnan(vector(index)))
602 {
603 minimum = vector(index);
604 }
605 }
606
607 return minimum;
608}
609
610
613
614type maximum(const Tensor<type, 1>& vector)
615{
616 const Index size = vector.dimension(0);
617
618 if(size == 0) return type(NAN);
619
620 type maximum = -numeric_limits<type>::max();
621
622 for(Index i = 0; i < size; i++)
623 {
624 if(!isnan(vector(i)) && vector(i) > maximum)
625 {
626 maximum = vector(i);
627 }
628 }
629
630 return maximum;
631}
632
633
637
638type maximum(const Tensor<type, 1>& vector, const Tensor<Index, 1>& indices)
639{
640 const Index size = indices.dimension(0);
641
642 if(size == 0) return type(NAN);
643
644 type maximum = -numeric_limits<type>::max();
645
646 Index index;
647
648 for(Index i = 0; i < size; i++)
649 {
650 index = indices(i);
651
652 if(!isnan(vector(index)) && vector(index) > maximum)
653 {
654 maximum = vector(index);
655 }
656 }
657
658 return maximum;
659}
660
663
664Index maximum(const Tensor<Index, 1>& vector)
665{
666 const Index size = vector.size();
667
668 if(size == 0) return NAN;
669
670 Index maximum = -numeric_limits<Index>::max();
671
672 for(Index i = 0; i < size; i++)
673 {
674 if(vector(i) > maximum)
675 {
676 maximum = vector(i);
677 }
678 }
679
680 return maximum;
681}
682
683
690
691Tensor<type, 1> columns_maximums(const Tensor<type, 2>& matrix,
692 const Tensor<Index, 1>& rows_indices,
693 const Tensor<Index, 1>& columns_indices)
694{
695 const Index rows_number = matrix.dimension(0);
696 const Index columns_number = matrix.dimension(1);
697
698 Tensor<Index, 1> used_columns_indices;
699
700 if(columns_indices.dimension(0) == 0)
701 {
702 used_columns_indices.resize(columns_number);
703
704 for(Index i = 0; i < columns_number; i++)
705 {
706 used_columns_indices(i) = i;
707 }
708 }
709 else
710 {
711 used_columns_indices = columns_indices;
712 }
713
714 Tensor<Index, 1> used_rows_indices;
715
716 if(rows_indices.dimension(0) == 0)
717 {
718 used_rows_indices.resize(rows_number);
719
720 for(Index i = 0; i < rows_number; i++)
721 {
722 used_rows_indices(i) = i;
723 }
724 }
725 else
726 {
727 used_rows_indices = rows_indices;
728 }
729
730 const Index rows_indices_size = used_rows_indices.size();
731 const Index columns_indices_size = used_columns_indices.size();
732
733 Tensor<type, 1> maximums(columns_indices_size);
734
735 Index row_index;
736 Index column_index;
737
738 Tensor<type, 1> column(rows_indices_size);
739
740 for(Index j = 0; j < columns_indices_size; j++)
741 {
742 column_index = used_columns_indices(j);
743
744 for(Index i = 0; i < rows_indices_size; i++)
745 {
746 row_index = used_rows_indices(i);
747
748 column(i) = matrix(row_index,column_index);
749 }
750
751 maximums(j) = maximum(column);
752 }
753
754 return maximums;
755}
756
757
762
763type mean(const Tensor<type, 1>& vector, const Index& begin, const Index& end)
764{
765#ifdef OPENNN_DEBUG
766
767 if(begin > end)
768 {
769 ostringstream buffer;
770
771 buffer << "OpenNN Exception: Statistics class.\n"
772 << "type mean(const Tensor<type, 1>& vector, const Index& begin, const Index& end) \n"
773 << "Begin must be less or equal than end.\n";
774
775 throw logic_error(buffer.str());
776 }
777
778#endif
779
780 if(end == begin) return vector[begin];
781
782 type sum = type(0);
783
784 for(Index i = begin; i <= end; i++)
785 {
786 sum += vector(i);
787 }
788
789 return sum /static_cast<type>(end-begin+1);
790}
791
792
795
796type mean(const Tensor<type, 1>& vector)
797{
798 const Index size = vector.dimension(0);
799
800 if(size == 0) return type(0);
801
802#ifdef OPENNN_DEBUG
803
804 if(size == 0)
805 {
806 ostringstream buffer;
807
808 buffer << "OpenNN Exception: Statistics Class.\n"
809 << "type mean(const Tensor<type, 1>& vector, const Index& begin, const Index& end) "
810 "const method.\n"
811 << "Size must be greater than zero.\n";
812
813 throw logic_error(buffer.str());
814 }
815
816#endif
817
818 type sum = type(0);
819
820 Index count = 0;
821
822 for(Index i = 0; i < size; i++)
823 {
824 if(!isnan(vector(i)))
825 {
826 sum += vector(i);
827 count++;
828 }
829 }
830
831 const type mean = sum /static_cast<type>(count);
832
833 return mean;
834}
835
836
839
840type variance(const Tensor<type, 1>& vector)
841{
842 const Index size = vector.dimension(0);
843
844#ifdef OPENNN_DEBUG
845
846 if(size == 0)
847 {
848 ostringstream buffer;
849
850 buffer << "OpenNN Exception: Statistics Class.\n"
851 << "type variance(const Tensor<type, 1>& vector) "
852 "const method.\n"
853 << "Size must be greater than zero.\n";
854
855 throw logic_error(buffer.str());
856 }
857
858#endif
859
860 type sum = type(0);
861 type squared_sum = type(0);
862
863 Index count = 0;
864
865 for(Index i = 0; i < size; i++)
866 {
867 if(!isnan(vector(i)))
868 {
869 sum += vector(i);
870 squared_sum += vector(i) * vector(i);
871
872 count++;
873 }
874 }
875
876 if(count <= 1) return type(0);
877
878 const type variance = squared_sum/static_cast<type>(count - 1)
879 - (sum/static_cast<type>(count))*(sum/static_cast<type>(count))*static_cast<type>(count)/static_cast<type>(count-1);
880
881 return variance;
882}
883
884
887
888type variance(const Tensor<type, 1>& vector, const Tensor<Index, 1>& indices)
889{
890 const Index size = indices.dimension(0);
891
892#ifdef OPENNN_DEBUG
893
894 if(size == 0)
895 {
896 ostringstream buffer;
897
898 buffer << "OpenNN Exception: Statistics Class.\n"
899 << "type variance(const Tensor<type, 1>&, const Tensor<Index, 1>&) "
900 "const method.\n"
901 << "Indeces size must be greater than zero.\n";
902
903 throw logic_error(buffer.str());
904 }
905
906#endif
907
908 type sum = type(0);
909 type squared_sum = type(0);
910
911 Index count = 0;
912
913 Index index = 0;
914
915 for(Index i = 0; i < size; i++)
916 {
917 index = indices(i);
918
919 if(!isnan(vector(index)))
920 {
921 sum += vector(index);
922 squared_sum += vector(index) * vector(index);
923
924 count++;
925 }
926 }
927
928 if(count <= 1) return type(0);
929
930 const type variance = squared_sum/static_cast<type>(count - 1) -(sum/static_cast<type>(count))*(sum/static_cast<type>(count))*static_cast<type>(count)/static_cast<type>(count-1);
931
932 return variance;
933}
934
935
938
939type standard_deviation(const Tensor<type, 1>& vector)
940{
941#ifdef OPENNN_DEBUG
942
943 const Index size = vector.dimension(0);
944
945 if(size == 0)
946 {
947 ostringstream buffer;
948
949 buffer << "OpenNN Exception: Statistics Class.\n"
950 << "type standard_deviation(const Tensor<type, 1>&) const method.\n"
951 << "Size must be greater than zero.\n";
952
953 throw logic_error(buffer.str());
954 }
955
956#endif
957
958 if(vector.size() == 0) return type(0);
959
960 if(variance(vector) < static_cast<type>(1e-9)){
961 return static_cast<type>(0);
962 }else{
963 return sqrt(variance(vector));
964 }
965}
966
967
970
971type standard_deviation(const Tensor<type, 1>& vector, const Tensor<Index, 1>& indices)
972{
973#ifdef OPENNN_DEBUG
974
975 const Index size = vector.dimension(0);
976
977 if(size == 0)
978 {
979 ostringstream buffer;
980
981 buffer << "OpenNN Exception: Statistics Class.\n"
982 << "type standard_deviation(const Tensor<type, 1>&, const Tensor<Index, 1>&) const method.\n"
983 << "Size must be greater than zero.\n";
984
985 throw logic_error(buffer.str());
986 }
987
988#endif
989
990 if(variance(vector, indices) < static_cast<type>(1e-9))
991 {
992 return static_cast<type>(0);
993 }else
994 {
995 return sqrt(variance(vector, indices));
996 }
997}
998
999
1000Tensor<type, 1> standard_deviation(const Tensor<type, 1>& vector, const Index& period)
1001{
1002 const Index size = vector.dimension(0);
1003
1004 Tensor<type, 1> std(size);
1005
1006 type mean_value = type(0);
1007 type sum = type(0);
1008
1009 for(Index i = 0; i < size; i++)
1010 {
1011 const Index begin = i < period ? 0 : i - period + 1;
1012 const Index end = i;
1013
1014 mean_value = mean(vector, begin,end);
1015
1016 for(Index j = begin; j < end+1; j++)
1017 {
1018 sum += (vector(j) - mean_value) *(vector(j) - mean_value);
1019 }
1020
1021 std(i) = sqrt(sum / type(period));
1022
1023 mean_value = type(0);
1024 sum = type(0);
1025 }
1026
1027 return std;
1028}
1029
1030
1033
1034type asymmetry(const Tensor<type, 1>& vector)
1035{
1036 const Index size = vector.dimension(0);
1037
1038#ifdef OPENNN_DEBUG
1039
1040 if(size == 0)
1041 {
1042 ostringstream buffer;
1043
1044 buffer << "OpenNN Exception: Statistics Class.\n"
1045 << "type asymmetry(const Tensor<type, 1>& vector) const method.\n"
1046 << "Size must be greater than zero.\n";
1047
1048 throw logic_error(buffer.str());
1049 }
1050
1051#endif
1052
1053 if(size == 0 || 1)
1054 {
1055 return type(0);
1056 }
1057
1058 const type standard_deviation_value = standard_deviation(vector);
1059
1060 const type mean_value = mean(vector);
1061
1062 type sum = type(0);
1063
1064 Index count = 0;
1065
1066 for(Index i = 0; i < size; i++)
1067 {
1068 if(!isnan(vector(i)))
1069 {
1070 sum += (vector(i) - mean_value) *(vector(i) - mean_value) *(vector(i) - mean_value);
1071
1072 count++;
1073 }
1074 }
1075
1076 const type numerator = sum / type(count);
1077 const type denominator = standard_deviation_value * standard_deviation_value * standard_deviation_value;
1078
1079 return numerator/denominator;
1080
1081}
1082
1085
1086type kurtosis(const Tensor<type, 1>& vector)
1087{
1088 const Index size = vector.dimension(0);
1089#ifdef OPENNN_DEBUG
1090
1091 if(size == 0)
1092 {
1093 ostringstream buffer;
1094
1095 buffer << "OpenNN Exception: Statistics Class.\n"
1096 << "type kurtosis(const Tensor<type, 1>& vector) const method.\n"
1097 << "Size must be greater than zero.\n";
1098
1099 throw logic_error(buffer.str());
1100 }
1101
1102#endif
1103
1104 if(size == 1)
1105 {
1106 return type(0);
1107 }
1108
1109 const type standard_deviation_value = standard_deviation(vector);
1110
1111 const type mean_value = mean(vector);
1112
1113 type sum = type(0);
1114
1115 Index count = 0;
1116
1117 for(Index i = 0; i < size; i++)
1118 {
1119 if(!isnan(vector(i)))
1120 {
1121 sum += (vector(i) - mean_value)*(vector(i) - mean_value)*(vector(i) - mean_value)*(vector(i) - mean_value);
1122
1123 count++;
1124 }
1125 }
1126
1127 const type numerator = sum / type(count);
1128 const type denominator = standard_deviation_value*standard_deviation_value*standard_deviation_value*standard_deviation_value;
1129
1130 return numerator/denominator - type(3);
1131}
1132
1133
1136
1137type median(const Tensor<type, 1>& vector)
1138{
1139 const Index size = vector.dimension(0);
1140
1141 // Fix missing values
1142
1143 Index new_size = 0;
1144
1145 for(Index i = 0; i < size; i++)
1146 {
1147 if(!isnan(vector(i))) new_size++;
1148 }
1149
1150 Tensor<type, 1> sorted_vector;
1151 sorted_vector.resize(new_size);
1152
1153 Index sorted_index = 0;
1154
1155 for(Index i = 0; i < size; i++)
1156 {
1157 if(!isnan(vector(i)))
1158 {
1159 sorted_vector(sorted_index) = vector(i);
1160
1161 sorted_index++;
1162 }
1163 }
1164
1165 // Calculate median
1166
1167 sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
1168
1169 Index median_index;
1170
1171 if(new_size % 2 == 0)
1172 {
1173 median_index = static_cast<Index>(new_size / 2);
1174
1175 return (sorted_vector(median_index-1) + sorted_vector(median_index)) / static_cast<type>(2.0);
1176 }
1177 else
1178 {
1179 median_index = static_cast<Index>(new_size / 2);
1180
1181 return sorted_vector(median_index);
1182 }
1183}
1184
1185
1188
1189Tensor<type, 1> quartiles(const Tensor<type, 1>& vector)
1190{
1191 const Index size = vector.dimension(0);
1192
1193 // Fix missing values
1194
1195 Index new_size = 0;
1196
1197 for(Index i = 0; i < size; i++)
1198 {
1199 if(!isnan(vector(i))) new_size++;
1200 }
1201
1202 Tensor<type, 1> sorted_vector;
1203 sorted_vector.resize(new_size);
1204
1205 Index sorted_index = 0;
1206
1207 for(Index i = 0; i < size; i++)
1208 {
1209 if(!isnan(vector(i)))
1210 {
1211 sorted_vector(sorted_index) = vector(i);
1212
1213 sorted_index++;
1214 }
1215 }
1216
1217 sort(sorted_vector.data(), sorted_vector.data() + new_size, less<type>());
1218
1219 // Calculate quartiles
1220
1221 Tensor<type, 1> first_sorted_vector(new_size/2);
1222 Tensor<type, 1> last_sorted_vector(new_size/2);
1223
1224 if(new_size % 2 == 0)
1225 {
1226 for(Index i = 0; i < new_size/2 ; i++)
1227 {
1228 first_sorted_vector(i) = sorted_vector(i);
1229 last_sorted_vector(i) = sorted_vector[i + new_size/2];
1230 }
1231 }
1232 else
1233 {
1234 for(Index i = 0; i < new_size/2 ; i++)
1235 {
1236 first_sorted_vector(i) = sorted_vector(i);
1237 last_sorted_vector(i) = sorted_vector[i + new_size/2 + 1];
1238 }
1239 }
1240
1241
1242 Tensor<type, 1> quartiles(3);
1243
1244 if(new_size == 1)
1245 {
1246 quartiles(0) = sorted_vector(0);
1247 quartiles(1) = sorted_vector(0);
1248 quartiles(2) = sorted_vector(0);
1249 }
1250 else if(new_size == 2)
1251 {
1252 quartiles(0) = (sorted_vector(0)+sorted_vector(1))/ type(4);
1253 quartiles(1) = (sorted_vector(0)+sorted_vector(1))/ type(2);
1254 quartiles(2) = (sorted_vector(0)+sorted_vector(1))* type(3/4);
1255 }
1256 else if(new_size == 3)
1257 {
1258 quartiles(0) = (sorted_vector(0)+sorted_vector(1))/ type(2);
1259 quartiles(1) = sorted_vector(1);
1260 quartiles(2) = (sorted_vector(2)+sorted_vector(1))/ type(2);
1261 }
1262 else
1263 {
1264 quartiles(0) = median(first_sorted_vector);
1265 quartiles(1) = median(sorted_vector);
1266 quartiles(2) = median(last_sorted_vector);
1267 }
1268 return quartiles;
1269}
1270
1271
1275
1276Tensor<type, 1> quartiles(const Tensor<type, 1>& vector, const Tensor<Index, 1>& indices)
1277{
1278 const Index indices_size = indices.dimension(0);
1279
1280 // Fix missing values
1281
1282 Index index;
1283 Index new_size = 0;
1284
1285 for(Index i = 0; i < indices_size; i++)
1286 {
1287 index = indices(i);
1288
1289 if(!isnan(vector(index))) new_size++;
1290 }
1291
1292 Tensor<type, 1> sorted_vector;
1293 sorted_vector.resize(new_size);
1294
1295 Index sorted_index = 0;
1296
1297 for(Index i = 0; i < indices_size; i++)
1298 {
1299 index = indices(i);
1300
1301 if(!isnan(vector(index)))
1302 {
1303 sorted_vector(sorted_index) = vector(index);
1304
1305 sorted_index++;
1306 }
1307 }
1308
1309 sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
1310
1311 // Calculate quartiles
1312
1313 Tensor<type, 1> first_sorted_vector(new_size/2);
1314 Tensor<type, 1> last_sorted_vector(new_size/2);
1315
1316 for(Index i = 0; i < new_size/2 ; i++)
1317 {
1318 first_sorted_vector(i) = sorted_vector(i);
1319 }
1320
1321 for(Index i = 0; i < new_size/2; i++)
1322 {
1323 last_sorted_vector(i) = sorted_vector(i + new_size - new_size/2);
1324 }
1325
1326 Tensor<type, 1> quartiles(3);
1327
1328 if(new_size == 1)
1329 {
1330 quartiles(0) = sorted_vector(0);
1331 quartiles(1) = sorted_vector(0);
1332 quartiles(2) = sorted_vector(0);
1333 }
1334 else if(new_size == 2)
1335 {
1336 quartiles(0) = (sorted_vector(0)+sorted_vector(1))/ type(4);
1337 quartiles(1) = (sorted_vector(0)+sorted_vector(1))/ type(2);
1338 quartiles(2) = (sorted_vector(0)+sorted_vector(1))* type(3/4);
1339 }
1340 else if(new_size == 3)
1341 {
1342 quartiles(0) = (sorted_vector(0)+sorted_vector(1))/ type(2);
1343 quartiles(1) = sorted_vector(1);
1344 quartiles(2) = (sorted_vector(2)+sorted_vector(1))/ type(2);
1345 }
1346 else if(new_size % 2 == 0)
1347 {
1348 Index median_index = static_cast<Index>(first_sorted_vector.size() / 2);
1349 quartiles(0) = (first_sorted_vector(median_index-1) + first_sorted_vector(median_index)) / static_cast<type>(2.0);
1350
1351 median_index = static_cast<Index>(new_size / 2);
1352 quartiles(1) = (sorted_vector(median_index-1) + sorted_vector(median_index)) / static_cast<type>(2.0);
1353
1354 median_index = static_cast<Index>(last_sorted_vector.size() / 2);
1355 quartiles(2) = (last_sorted_vector(median_index-1) + last_sorted_vector(median_index)) / static_cast<type>(2.0);
1356 }
1357 else
1358 {
1359 quartiles(0) = sorted_vector(new_size/4);
1360 quartiles(1) = sorted_vector(new_size/2);
1361 quartiles(2) = sorted_vector(new_size*3/4);
1362 }
1363
1364 return quartiles;
1365}
1366
1367
1368
1371
1372BoxPlot box_plot(const Tensor<type, 1>& vector)
1373{
1374 BoxPlot box_plot;
1375
1376 if(vector.dimension(0) == 0) {
1377 box_plot.minimum = type(NAN);
1378 box_plot.first_quartile = type(NAN);
1379 box_plot.median = type(NAN);
1380 box_plot.third_quartile = type(NAN);
1381 box_plot.maximum = type(NAN);
1382 return box_plot;
1383 }
1384
1385
1386 const Tensor<type, 1> quartiles = OpenNN::quartiles(vector);
1387
1388 box_plot.minimum = minimum(vector);
1389 box_plot.first_quartile = quartiles(0);
1390 box_plot.median = quartiles(1);
1391 box_plot.third_quartile = quartiles(2);
1392 box_plot.maximum = maximum(vector);
1393
1394 return box_plot;
1395}
1396
1397
1401
1402BoxPlot box_plot(const Tensor<type, 1>& vector, const Tensor<Index, 1>& indices)
1403{
1404 BoxPlot box_plot;
1405
1406 if(vector.dimension(0) == 0 || indices.dimension(0) == 0) return box_plot;
1407
1408 const Tensor<type, 1> quartiles = OpenNN::quartiles(vector, indices);
1409
1410 box_plot.minimum = minimum(vector, indices);
1411 box_plot.first_quartile = quartiles(0);
1412 box_plot.median = quartiles(1);
1413 box_plot.third_quartile = quartiles(2);
1414 box_plot.maximum = maximum(vector, indices);
1415
1416 return box_plot;
1417}
1418
1419
1428
1429Histogram histogram(const Tensor<type, 1>& vector, const Index& bins_number)
1430{
1431#ifdef OPENNN_DEBUG
1432
1433 if(bins_number < 1)
1434 {
1435 ostringstream buffer;
1436
1437 buffer << "OpenNN Exception: Statistics Class.\n"
1438 << "Histogram histogram(const Tensor<type, 1>&, "
1439 "const Index&) const method.\n"
1440 << "Number of bins is less than one.\n";
1441
1442 throw logic_error(buffer.str());
1443 }
1444
1445#endif
1446
1447 const Index size = vector.dimension(0);
1448
1449 Tensor<type, 1> minimums(bins_number);
1450 Tensor<type, 1> maximums(bins_number);
1451
1452 Tensor<type, 1> centers(bins_number);
1453 Tensor<Index, 1> frequencies(bins_number);
1454 frequencies.setZero();
1455
1456 Index unique_values_number = 1;
1457 Tensor<type, 1> old_unique_values(1);
1458 Tensor<type, 1> unique_values(1);
1459 unique_values(0) = vector(0);
1460 old_unique_values = unique_values;
1461
1462 for(Index i = 1; i < size; i++)
1463 {
1464 if(find(unique_values.data(), unique_values.data()+unique_values.size(), vector(i))
1465 == unique_values.data()+unique_values.size())
1466 {
1467 unique_values_number++;
1468
1469 unique_values.resize(unique_values_number);
1470
1471 for(Index j = 0; j < unique_values_number-1; j++) unique_values(j) = old_unique_values(j);
1472
1473 unique_values(unique_values_number-1) = vector(i);
1474
1475 old_unique_values = unique_values;
1476 }
1477
1478 if(unique_values_number > bins_number) break;
1479 }
1480
1481 if(unique_values_number <= bins_number)
1482 {
1483 sort(unique_values.data(), unique_values.data() + unique_values.size(), less<type>());
1484
1485 centers = unique_values;
1486 minimums = unique_values;
1487 maximums = unique_values;
1488
1489 frequencies.resize(unique_values_number);
1490 frequencies.setZero();
1491
1492 for(Index i = 0; i < size; i++)
1493 {
1494 if(isnan(vector(i))) continue;
1495
1496 for(Index j = 0; j < unique_values_number; j++)
1497 {
1498 if(vector(i) - centers(j) < static_cast<type>(1e-6))
1499 {
1500 frequencies(j)++;
1501 break;
1502 }
1503 }
1504 }
1505 }
1506 else
1507 {
1508 const type min = minimum(vector);
1509 const type max = maximum(vector);
1510
1511 const type length = (max - min) /static_cast<type>(bins_number);
1512
1513 minimums(0) = min;
1514 maximums(0) = min + length;
1515 centers(0) = (maximums(0) + minimums(0)) /static_cast<type>(2.0);
1516
1517 // Calculate bins center
1518
1519 for(Index i = 1; i < bins_number; i++)
1520 {
1521 minimums(i) = minimums(i - 1) + length;
1522 maximums(i) = maximums(i - 1) + length;
1523
1524 centers(i) = (maximums(i) + minimums(i)) /static_cast<type>(2.0);
1525 }
1526
1527 // Calculate bins frequency
1528
1529 const Index size = vector.dimension(0);
1530
1531 for(Index i = 0; i < size; i++)
1532 {
1533 if(isnan(vector(i)))
1534 {
1535 continue;
1536 }
1537
1538 for(Index j = 0; j < bins_number - 1; j++)
1539 {
1540 if(vector(i) >= minimums(j) && vector(i) < maximums(j))
1541 {
1542 frequencies(j)++;
1543 break;
1544 }
1545 }
1546
1547 if(vector(i) >= minimums(bins_number - 1))
1548 {
1549 frequencies(bins_number - 1)++;
1550 }
1551 }
1552 }
1553
1554 Histogram histogram;
1555 histogram.centers = centers;
1556 histogram.minimums = minimums;
1557 histogram.maximums = maximums;
1558 histogram.frequencies = frequencies;
1559
1560 return histogram;
1561}
1562
1563
1573
1574Histogram histogram_centered(const Tensor<type, 1>& vector, const type& center, const Index& bins_number)
1575{
1576#ifdef OPENNN_DEBUG
1577
1578 if(bins_number < 1)
1579 {
1580 ostringstream buffer;
1581
1582 buffer << "OpenNN Exception: Statistics Class.\n"
1583 << "Histogram histogram_centered(const Tensor<type, 1>&, "
1584 "const type&, const Index&) const method.\n"
1585 << "Number of bins is less than one.\n";
1586
1587 throw logic_error(buffer.str());
1588 }
1589
1590#endif
1591
1592 Index bin_center;
1593
1594 if(bins_number%2 == 0)
1595 {
1596 bin_center = static_cast<Index>(static_cast<type>(bins_number)/static_cast<type>(2.0));
1597 }
1598 else
1599 {
1600 bin_center = static_cast<Index>(static_cast<type>(bins_number)/static_cast<type>(2.0) + static_cast<type>(0.5));
1601 }
1602
1603 Tensor<type, 1> minimums(bins_number);
1604 Tensor<type, 1> maximums(bins_number);
1605
1606 Tensor<type, 1> centers(bins_number);
1607 Tensor<Index, 1> frequencies(bins_number);
1608 frequencies.setZero();
1609
1610 const type min = minimum(vector);
1611 const type max = maximum(vector);
1612
1613 const type length = (max - min)/static_cast<type>(bins_number);
1614
1615 minimums(bin_center-1) = center - length;
1616 maximums(bin_center-1) = center + length;
1617 centers(bin_center-1) = center;
1618
1619 // Calculate bins center
1620
1621 for(Index i = bin_center; i < bins_number; i++) // Upper centers
1622 {
1623 minimums(i) = minimums(i - 1) + length;
1624 maximums(i) = maximums(i - 1) + length;
1625
1626 centers(i) = (maximums(i) + minimums(i)) /static_cast<type>(2.0);
1627 }
1628
1629 for(Index i = static_cast<Index>(bin_center)-2; i >= 0; i--) // Lower centers
1630 {
1631 minimums(i) = minimums(i + 1) - length;
1632 maximums(i) = maximums(i + 1) - length;
1633
1634 centers(i) = (maximums(i) + minimums(i)) /static_cast<type>(2.0);
1635 }
1636
1637 // Calculate bins frequency
1638
1639 const Index size = vector.dimension(0);
1640
1641 for(Index i = 0; i < size; i++)
1642 {
1643 for(Index j = 0; j < bins_number - 1; j++)
1644 {
1645 if(vector(i) >= minimums(j) && vector(i) < maximums(j))
1646 {
1647 frequencies(j)++;
1648 }
1649 }
1650
1651 if(vector(i) >= minimums(bins_number - 1))
1652 {
1653 frequencies(bins_number - 1)++;
1654 }
1655 }
1656
1657 Histogram histogram(bins_number);
1658 histogram.centers = centers;
1659 histogram.minimums = minimums;
1660 histogram.maximums = maximums;
1661 histogram.frequencies = frequencies;
1662
1663 return histogram;
1664}
1665
1666
1674
1675Histogram histogram(const Tensor<bool, 1>& v)
1676{
1677 Tensor<type, 1> minimums(2);
1678 minimums.setZero();
1679 Tensor<type, 1> maximums(2);
1680 maximums.setConstant(type(1));
1681
1682 Tensor<type, 1> centers(2);
1683 centers.setValues({type(0), type(1)});
1684 Tensor<Index, 1> frequencies(2);
1685 frequencies.setZero();
1686
1687 // Calculate bins frequency
1688
1689 const Index size = v.dimension(0);
1690
1691 for(Index i = 0; i < size; i++)
1692 {
1693 for(Index j = 0; j < 2; j++)
1694 {
1695 if(static_cast<Index>(v(i)) == static_cast<Index>(minimums(j)))
1696 {
1697 frequencies(j)++;
1698 }
1699 }
1700 }
1701
1702 Histogram histogram(2);
1703 histogram.centers = centers;
1704 histogram.minimums = minimums;
1705 histogram.maximums = maximums;
1706 histogram.frequencies = frequencies;
1707
1708 return histogram;
1709}
1710
1711
1715
1716Tensor<Index, 1> total_frequencies(const Tensor<Histogram, 1>& histograms)
1717{
1718 const Index histograms_number = histograms.size();
1719
1720 Tensor<Index, 1> total_frequencies(histograms_number);
1721
1722 for(Index i = 0; i < histograms_number; i++)
1723 {
1724 total_frequencies(i) = histograms(i).frequencies(i);
1725 }
1726
1727 return total_frequencies;
1728}
1729
1730
1738
1739Tensor<Histogram, 1> histograms(const Tensor<type, 2>& matrix, const Index& bins_number)
1740{
1741 const Index rows_number = matrix.dimension(0);
1742 const Index columns_number = matrix.dimension(1);
1743
1744 Tensor<Histogram, 1> histograms(columns_number);
1745
1746 Tensor<type, 1> column(rows_number);
1747
1748 for(Index i = 0; i < columns_number; i++)
1749 {
1750 column = matrix.chip(i,1);
1751
1752 histograms(i) = histogram(column, bins_number);
1753 }
1754
1755 return histograms;
1756}
1757
1758
1763
1764Tensor<Descriptives, 1> descriptives(const Tensor<type, 2>& matrix)
1765{
1766 const Index rows_number = matrix.dimension(0);
1767 const Index columns_number = matrix.dimension(1);
1768
1769#ifdef OPENNN_DEBUG
1770
1771 if(rows_number == 0)
1772 {
1773 ostringstream buffer;
1774
1775 buffer << "OpenNN Exception: Statistics Class.\n"
1776 << "Tensor<Descriptives, 1> descriptives(const Tensor<type, 2>&) "
1777 "const method.\n"
1778 << "Number of rows must be greater than one.\n";
1779
1780 throw logic_error(buffer.str());
1781 }
1782
1783#endif
1784
1785 Tensor<Descriptives, 1> descriptives(columns_number);
1786
1787 Tensor<type, 1> column(rows_number);
1788
1789 #pragma omp parallel for private(column)
1790
1791 for(Index i = 0; i < columns_number; i++)
1792 {
1793 column = matrix.chip(i,1);
1794
1795 descriptives(i) = OpenNN::descriptives(column);
1796 }
1797
1798 return descriptives;
1799}
1800
1801
1807
1808Tensor<Descriptives, 1> descriptives(const Tensor<type, 2>& matrix,
1809 const Tensor<Index, 1>& row_indices,
1810 const Tensor<Index, 1>& columns_indices)
1811{
1812 const Index row_indices_size = row_indices.size();
1813 const Index columns_indices_size = columns_indices.size();
1814
1815 Tensor<Descriptives, 1> descriptives(columns_indices_size);
1816
1817 Index row_index, column_index;
1818
1819 Tensor<type, 1> minimums(columns_indices_size);
1820 minimums.setConstant(numeric_limits<type>::max());
1821
1822 Tensor<type, 1> maximums(columns_indices_size);
1823 maximums.setConstant(type(NUMERIC_LIMITS_MIN));
1824
1825 Tensor<double, 1> sums(columns_indices_size);
1826 Tensor<double, 1> squared_sums(columns_indices_size);
1827 Tensor<Index, 1> count(columns_indices_size);
1828
1829 sums.setZero();
1830 squared_sums.setZero();
1831 count.setZero();
1832
1833 for(Index i = 0; i < row_indices_size; i++)
1834 {
1835 row_index = row_indices(i);
1836
1837 #pragma omp parallel for private(column_index)
1838
1839 for(Index j = 0; j < columns_indices_size; j++)
1840 {
1841 column_index = columns_indices(j);
1842
1843 const type value = matrix(row_index,column_index);
1844
1845 if(isnan(value)) continue;
1846
1847 if(value < minimums(j)) minimums(j) = value;
1848
1849 if(value > maximums(j)) maximums(j) = value;
1850
1851 sums(j) += double(value);
1852 squared_sums(j) += double(value*value);
1853 count(j)++;
1854 }
1855 }
1856
1857 const Tensor<double, 1> mean = sums/count.cast<double>();
1858
1859 Tensor<double, 1> standard_deviation(columns_indices_size);
1860
1861 if(row_indices_size > 1)
1862 {
1863 #pragma omp parallel for
1864
1865 for(Index i = 0; i < columns_indices_size; i++)
1866 {
1867 const double variance = squared_sums(i)/static_cast<double>(count(i)-1)
1868 - (sums(i)/static_cast<double>(count(i)))*(sums(i)/static_cast<double>(count(i)))*static_cast<double>(count(i))/static_cast<double>(count(i)-1);
1869
1870 standard_deviation(i) = sqrt(variance);
1871 }
1872 }
1873
1874 for(Index i = 0; i < columns_indices_size; i++)
1875 {
1876 descriptives(i).minimum = minimums(i);
1877 descriptives(i).maximum = maximums(i);
1878 descriptives(i).mean = type(mean(i));
1879 descriptives(i).standard_deviation = type(standard_deviation(i));
1880 }
1881
1882 return descriptives;
1883}
1884
1885
1892
1893Tensor<type, 1> columns_minimums(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& rows_indices, const Tensor<Index, 1>& columns_indices)
1894{
1895 const Index rows_number = matrix.dimension(0);
1896 const Index columns_number = matrix.dimension(1);
1897
1898 Tensor<Index, 1> used_columns_indices;
1899
1900 if(columns_indices.dimension(0) == 0)
1901 {
1902 used_columns_indices.resize(columns_number);
1903
1904 for(Index i = 0; i < columns_number; i++)
1905 {
1906 used_columns_indices(i) = i;
1907 }
1908 }
1909 else
1910 {
1911 used_columns_indices = columns_indices;
1912 }
1913
1914 Tensor<Index, 1> used_rows_indices;
1915
1916 if(rows_indices.dimension(0) == 0)
1917 {
1918 used_rows_indices.resize(rows_number);
1919
1920 for(Index i = 0; i < rows_number; i++)
1921 {
1922 used_rows_indices(i) = i;
1923 }
1924 }
1925 else
1926 {
1927 used_rows_indices = rows_indices;
1928 }
1929
1930 const Index rows_indices_size = used_rows_indices.size();
1931 const Index columns_indices_size = used_columns_indices.size();
1932
1933 Tensor<type, 1> minimums(columns_indices_size);
1934
1935 Index row_index;
1936 Index column_index;
1937
1938 for(Index j = 0; j < columns_indices_size; j++)
1939 {
1940 column_index = used_columns_indices(j);
1941
1942 Tensor<type, 1> column(rows_indices_size);
1943
1944 for(Index i = 0; i < rows_indices_size; i++)
1945 {
1946 row_index = used_rows_indices(i);
1947
1948 column(i) = matrix(row_index,column_index);
1949 }
1950
1951 minimums(j) = minimum(column);
1952 }
1953
1954 return minimums;
1955}
1956
1957
1963
1964Tensor<type, 1> columns_maximums(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& columns_indices)
1965{
1966 const Index rows_number = matrix.dimension(0);
1967 const Index columns_number = matrix.dimension(1);
1968
1969 Tensor<Index, 1> used_columns_indices;
1970
1971 if(columns_indices.dimension(0) == 0 && columns_indices.dimension(1) == 0)
1972 {
1973 used_columns_indices.resize(columns_number);
1974 }
1975 else
1976 {
1977 used_columns_indices = columns_indices;
1978 }
1979
1980 const Index columns_indices_size = used_columns_indices.size();
1981
1982 Tensor<type, 1> maximums(columns_indices_size);
1983
1984 Index column_index;
1985 Tensor<type, 1> column(rows_number);
1986
1987 for(Index i = 0; i < columns_indices_size; i++)
1988 {
1989 column_index = used_columns_indices(i);
1990
1991 column = matrix.chip(column_index,1);
1992
1993 maximums(i) = maximum(column);
1994 }
1995
1996 return maximums;
1997}
1998
1999
2000type range(const Tensor<type, 1>& vector)
2001{
2002 const type min = minimum(vector);
2003 const type max = maximum(vector);
2004
2005 return abs(max - min);
2006}
2007
2008
2011
2012Descriptives descriptives(const Tensor<type, 1>& vector)
2013{
2014 const Index size = vector.dimension(0);
2015
2016#ifdef OPENNN_DEBUG
2017
2018 if(size == 0)
2019 {
2020 ostringstream buffer;
2021
2022 buffer << "OpenNN Exception: Statistics Class.\n"
2023 << "type descriptives(const Tensor<type, 1>&, "
2024 "const Tensor<Index, 1>&).\n"
2025 << "Size must be greater than zero.\n";
2026
2027 throw logic_error(buffer.str());
2028 }
2029
2030#endif
2031
2032 Descriptives descriptives;
2033
2034 type minimum = numeric_limits<type>::max();
2035 type maximum = -numeric_limits<type>::max();
2036
2037 type sum = type(0);
2038 type squared_sum = type(0);
2039 Index count = 0;
2040
2041 for(Index i = 0; i < size; i++)
2042 {
2043 if(!isnan(vector(i)))
2044 {
2045 if(vector(i) < minimum) minimum = vector(i);
2046
2047 if(vector(i) > maximum) maximum = vector(i);
2048
2049 sum += vector(i);
2050 squared_sum += vector(i) *vector(i);
2051
2052 count++;
2053 }
2054 }
2055
2056 const type mean = sum/static_cast<type>(count);
2057
2058 type standard_deviation;
2059
2060 if(count <= 1)
2061 {
2062 standard_deviation = type(0);
2063 }
2064 else
2065 {
2066 const type numerator = type(squared_sum) - (sum * sum) / type(count);
2067 const type denominator = type(size) - static_cast<type>(1.0);
2068
2069 standard_deviation = numerator / denominator;
2070 }
2071
2072 standard_deviation = sqrt(standard_deviation);
2073
2074 descriptives.minimum = minimum;
2075 descriptives.maximum = maximum;
2076 descriptives.mean = mean;
2077 descriptives.standard_deviation = standard_deviation;
2078
2079 return descriptives;
2080}
2081
2082
2088
2089Index perform_distribution_distance_analysis(const Tensor<type, 1>& vector)
2090{
2091 Tensor<type, 1> distances(2);
2092 distances.setZero();
2093
2094 const Index nans = count_nan(vector);
2095
2096 const Index new_size = vector.size() - nans;
2097
2098 Tensor<type, 1> new_vector(new_size);
2099
2100 Index index = 0;
2101
2102 for(Index i = 0; i < vector.size(); i++)
2103 {
2104 if(!isnan(vector(i)))
2105 {
2106 new_vector(index) = vector(i);
2107 index++;
2108 }
2109 }
2110
2111 Tensor<type, 1> sorted_vector(new_vector);
2112
2113 sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
2114
2115 const Descriptives descriptives = OpenNN::descriptives(vector);
2116
2117 const type mean = descriptives.mean;
2118 const type standard_deviation = descriptives.standard_deviation;
2119 const type minimum = sorted_vector(0);
2120 const type maximum = sorted_vector(new_size-1);
2121
2122 #pragma omp parallel for schedule(dynamic)
2123
2124 for(Index i = 0; i < new_size; i++)
2125 {
2126 const type normal_distribution = static_cast<type>(0.5)
2127 * type(erfc(double(mean - sorted_vector(i))))/static_cast<type>((standard_deviation*static_cast<type>(sqrt(2))));
2128
2129 const type uniform_distribution = (sorted_vector(i)-minimum)/(maximum - minimum);
2130
2131 type empirical_distribution;
2132
2133 Index counter = 0;
2134
2135 if(new_vector(i) < sorted_vector(0))
2136 {
2137 empirical_distribution = type(0);
2138 }
2139 else if(new_vector(i) >= sorted_vector(new_size-1))
2140 {
2141 empirical_distribution = type(1);
2142 }
2143 else
2144 {
2145 counter = static_cast<Index>(i + 1);
2146
2147 for(Index j = i+1; j < new_size; j++)
2148 {
2149 if(sorted_vector(j) <= sorted_vector(i))
2150 {
2151 counter++;
2152 }
2153 else
2154 {
2155 break;
2156 }
2157 }
2158
2159 empirical_distribution = static_cast<type>(counter)/static_cast<type>(new_size);
2160 }
2161
2162 #pragma omp critical
2163 {
2164 distances(0) += abs(normal_distribution - empirical_distribution);
2165 distances(1) += abs(uniform_distribution - empirical_distribution);
2166 }
2167 }
2168
2169 return minimal_index(distances);
2170}
2171
2172
2176
2177Tensor<type, 1> mean(const Tensor<type, 2>& matrix)
2178{
2179 const Index rows_number = matrix.dimension(0);
2180 const Index columns_number = matrix.dimension(1);
2181
2182#ifdef OPENNN_DEBUG
2183
2184 if(rows_number == 0)
2185 {
2186 ostringstream buffer;
2187
2188 buffer << "OpenNN Exception: Statistics class.\n"
2189 << "Tensor<type, 1> mean(const Tensor<type, 2>&) const method.\n"
2190 << "Number of rows must be greater than one.\n";
2191
2192 throw logic_error(buffer.str());
2193 }
2194
2195#endif
2196
2197 // Mean
2198
2199 Tensor<type, 1> mean(columns_number);
2200 mean.setZero();
2201
2202 for(Index j = 0; j < columns_number; j++)
2203 {
2204 for(Index i = 0; i < rows_number; i++)
2205 {
2206 if(!isnan(matrix(i,j)))
2207 {
2208 mean(j) += matrix(i,j);
2209 }
2210 }
2211
2212 mean(j) /= static_cast<type>(rows_number);
2213 }
2214
2215 return mean;
2216}
2217
2218
2222
2223Tensor<type, 1> mean(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& columns_indices)
2224{
2225 const Index rows_number = matrix.dimension(0);
2226
2227 const Index columns_indices_size = columns_indices.size();
2228
2229 Index column_index;
2230
2231 // Mean
2232
2233 Tensor<type, 1> mean(columns_indices_size);
2234 mean.setZero();
2235
2236 for(Index j = 0; j < columns_indices_size; j++)
2237 {
2238 column_index = columns_indices(j);
2239
2240 for(Index i = 0; i < rows_number; i++)
2241 {
2242 mean(j) += matrix(i, column_index);
2243 }
2244
2245 mean(j) /= static_cast<type>(rows_number);
2246 }
2247
2248 return mean;
2249}
2250
2251
2257
2258Tensor<type, 1> mean(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& row_indices, const Tensor<Index, 1>& columns_indices)
2259{
2260 const Index row_indices_size = row_indices.size();
2261 const Index columns_indices_size = columns_indices.size();
2262
2263 if(row_indices_size == 0 && columns_indices_size == 0) return Tensor<type, 1>();
2264
2265#ifdef OPENNN_DEBUG
2266
2267 const Index rows_number = matrix.dimension(0);
2268 const Index columns_number = matrix.dimension(1);
2269
2270 // Rows check
2271
2272 if(row_indices_size > rows_number)
2273 {
2274 ostringstream buffer;
2275
2276 buffer << "OpenNN Exception: Statistics class.\n"
2277 << "Tensor<type, 1> mean(const Tensor<type, 2>& matrix, "
2278 "const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2279 << "Size of row indices(" << row_indices_size << ") is greater than number of rows(" << rows_number << ").\n";
2280
2281 throw logic_error(buffer.str());
2282 }
2283
2284 for(Index i = 0; i < row_indices_size; i++)
2285 {
2286 if(row_indices(i) >= rows_number)
2287 {
2288 ostringstream buffer;
2289
2290 buffer << "OpenNN Exception: Statistics class.\n"
2291 << "Tensor<type, 1> mean(const Tensor<type, 2>& matrix, "
2292 "const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2293 << "Row index " << i << " must be less than rows number.\n";
2294
2295 throw logic_error(buffer.str());
2296 }
2297 }
2298
2299 if(row_indices_size == 0)
2300 {
2301 ostringstream buffer;
2302
2303 buffer << "OpenNN Exception: Statistics class.\n"
2304 << "Tensor<type, 1> mean(const Tensor<type, 2>& matrix, "
2305 "const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2306 << "Size of row indices must be greater than zero.\n";
2307
2308 throw logic_error(buffer.str());
2309 }
2310
2311 // Columns check
2312
2313 if(columns_indices_size > columns_number)
2314 {
2315 ostringstream buffer;
2316
2317 buffer << "OpenNN Exception: Statistics class.\n"
2318 << "Tensor<type, 1> mean(const Tensor<type, 2>& matrix, "
2319 "const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2320 << "Column indices size must be equal or less than columns number.\n";
2321
2322 throw logic_error(buffer.str());
2323 }
2324
2325 for(Index i = 0; i < columns_indices_size; i++)
2326 {
2327 if(columns_indices(i) >= columns_number)
2328 {
2329 ostringstream buffer;
2330
2331 buffer << "OpenNN Exception: Statistics class.\n"
2332 << "Tensor<type, 1> mean(const Tensor<type, 2>& matrix, "
2333 "const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2334 << "Column index " << i << " must be less than columns number.\n";
2335
2336 throw logic_error(buffer.str());
2337 }
2338 }
2339
2340#endif
2341
2342 Index row_index;
2343 Index column_index;
2344
2345 Index count = 0;
2346
2347 // Mean
2348
2349 Tensor<type, 1> mean(columns_indices_size);
2350 mean.setZero();
2351
2352 for(Index j = 0; j < columns_indices_size; j++)
2353 {
2354 column_index = columns_indices(j);
2355
2356 count = 0;
2357
2358 for(Index i = 0; i < row_indices_size; i++)
2359 {
2360 row_index = row_indices(i);
2361
2362 if(!isnan(matrix(row_index,column_index)))
2363 {
2364 mean(j) += matrix(row_index,column_index);
2365 count++;
2366 }
2367 }
2368
2369 mean(j) /= static_cast<type>(count);
2370 }
2371
2372 return mean;
2373}
2374
2375
2378
2379type mean(const Tensor<type, 2>& matrix, const Index& column_index)
2380{
2381 const Index rows_number = matrix.dimension(0);
2382 const Index columns_number = matrix.dimension(1);
2383
2384 if(rows_number == 0 && columns_number == 0) return type(NAN);
2385
2386#ifdef OPENNN_DEBUG
2387
2388 if(rows_number == 0)
2389 {
2390 ostringstream buffer;
2391
2392 buffer << "OpenNN Exception: Statistics class.\n"
2393 << "type mean(const Tensor<type, 2>&, const Index&) const method.\n"
2394 << "Number of rows must be greater than one.\n";
2395
2396 throw logic_error(buffer.str());
2397 }
2398
2399 if(column_index >= columns_number)
2400 {
2401 ostringstream buffer;
2402
2403 buffer << "OpenNN Exception: Statistics class.\n"
2404 << "type mean(const Tensor<type, 2>&, const Index&) const method.\n"
2405 << "Index of column must be less than number of columns.\n";
2406
2407 throw logic_error(buffer.str());
2408 }
2409
2410#endif
2411
2412 if(rows_number == 0 && columns_number == 0) return type(NAN);
2413
2414 // Mean
2415
2416 type mean = type(0);
2417
2418 Index count = 0;
2419
2420 for(Index i = 0; i < rows_number; i++)
2421 {
2422 if(!isnan(matrix(i,column_index)))
2423 {
2424 mean += matrix(i,column_index);
2425 count++;
2426 }
2427 }
2428
2429 mean /= static_cast<type>(count);
2430
2431 return mean;
2432}
2433
2434
2437
2438Tensor<type, 1> median(const Tensor<type, 2>& matrix)
2439{
2440 const Index rows_number = matrix.dimension(0);
2441 const Index columns_number = matrix.dimension(1);
2442
2443#ifdef OPENNN_DEBUG
2444
2445 if(rows_number == 0)
2446 {
2447 ostringstream buffer;
2448
2449 buffer << "OpenNN Exception: Matrix template.\n"
2450 << "Tensor<type, 1> median() const method.\n"
2451 << "Number of rows must be greater than one.\n";
2452
2453 throw logic_error(buffer.str());
2454 }
2455
2456#endif
2457
2458 // median
2459
2460 Tensor<type, 1> median(columns_number);
2461
2462 for(Index j = 0; j < columns_number; j++)
2463 {
2464 Tensor<type, 1> sorted_column(matrix.chip(j,1));
2465
2466 sort(sorted_column.data(), sorted_column.data() + sorted_column.size(), less<type>());
2467
2468 if(rows_number % 2 == 0)
2469 {
2470 median(j) = (sorted_column[rows_number*2/4] + sorted_column[rows_number*2/4+1])/ type(2);
2471 }
2472 else
2473 {
2474 median(j) = sorted_column[rows_number*2/4];
2475 }
2476 }
2477
2478 return median;
2479}
2480
2481
2484
2485type median(const Tensor<type, 2>& matrix, const Index& column_index)
2486{
2487 const Index rows_number = matrix.dimension(0);
2488
2489#ifdef OPENNN_DEBUG
2490
2491 const Index columns_number = matrix.dimension(1);
2492
2493 if(rows_number == 0)
2494 {
2495 ostringstream buffer;
2496
2497 buffer << "OpenNN Exception: Matrix template.\n"
2498 << "type median(const Index&) const method.\n"
2499 << "Number of rows must be greater than one.\n";
2500
2501 throw logic_error(buffer.str());
2502 }
2503
2504 if(column_index >= columns_number)
2505 {
2506 ostringstream buffer;
2507
2508 buffer << "OpenNN Exception: Matrix template.\n"
2509 << "type median(const Index&) const method.\n"
2510 << "Index of column must be less than number of columns.\n";
2511
2512 throw logic_error(buffer.str());
2513 }
2514
2515#endif
2516
2517 // median
2518
2519 type median = type(0);
2520
2521 Tensor<type, 1> sorted_column(matrix.chip(column_index,1));
2522
2523 sort(sorted_column.data(), sorted_column.data() + sorted_column.size(), less<type>());
2524
2525 if(rows_number % 2 == 0)
2526 {
2527 median = (sorted_column[rows_number*2/4] + sorted_column[rows_number*2/4+1])/ type(2);
2528 }
2529 else
2530 {
2531 median = sorted_column[rows_number*2/4];
2532 }
2533
2534 return median;
2535}
2536
2537
2541
2542
2543Tensor<type, 1> median(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& columns_indices)
2544{
2545 const Index rows_number = matrix.dimension(0);
2546
2547 const Index columns_indices_size = columns_indices.size();
2548
2549 Index column_index;
2550
2551 // median
2552
2553 Tensor<type, 1> median(columns_indices_size);
2554
2555 for(Index j = 0; j < columns_indices_size; j++)
2556 {
2557 column_index = columns_indices(j);
2558
2559 Tensor<type, 1> sorted_column(matrix.chip(column_index, 1));
2560
2561 sort(sorted_column.data(), sorted_column.data() + sorted_column.size(), less<type>());
2562
2563 if(rows_number % 2 == 0)
2564 {
2565 median(j) = (sorted_column[rows_number*2/4] + sorted_column[rows_number*2/4+1])/type(2);
2566 }
2567 else
2568 {
2569 median(j) = sorted_column[rows_number*2/4];
2570 }
2571 }
2572
2573 return median;
2574}
2575
2576
2581
2582Tensor<type, 1> median(const Tensor<type, 2>& matrix, const Tensor<Index, 1>& row_indices, const Tensor<Index, 1>& columns_indices)
2583{
2584
2585 const Index row_indices_size = row_indices.size();
2586 const Index columns_indices_size = columns_indices.size();
2587
2588#ifdef OPENNN_DEBUG
2589
2590 const Index rows_number = matrix.dimension(0);
2591 const Index columns_number = matrix.dimension(1);
2592
2593 // Rows check
2594
2595 if(row_indices_size > rows_number)
2596 {
2597 ostringstream buffer;
2598
2599 buffer << "OpenNN Exception: Matrix template.\n"
2600 << "Tensor<type, 1> median(const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2601 << "Size of row indices(" << row_indices_size << ") is greater than number of rows(" << rows_number << ").\n";
2602
2603 throw logic_error(buffer.str());
2604 }
2605
2606 for(Index i = 0; i < row_indices_size; i++)
2607 {
2608 if(row_indices(i) >= rows_number)
2609 {
2610 ostringstream buffer;
2611
2612 buffer << "OpenNN Exception: Matrix template.\n"
2613 << "Tensor<type, 1> median(const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2614 << "Row index " << i << " must be less than rows number.\n";
2615
2616 throw logic_error(buffer.str());
2617 }
2618 }
2619
2620 if(row_indices_size == 0)
2621 {
2622 ostringstream buffer;
2623
2624 buffer << "OpenNN Exception: Matrix template.\n"
2625 << "Tensor<type, 1> median(const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2626 << "Size of row indices must be greater than zero.\n";
2627
2628 throw logic_error(buffer.str());
2629 }
2630
2631 // Columns check
2632
2633 if(columns_indices_size > columns_number)
2634 {
2635 ostringstream buffer;
2636
2637 buffer << "OpenNN Exception: Matrix template.\n"
2638 << "Tensor<type, 1> median(const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2639 << "Column indices size must be equal or less than columns number.\n";
2640
2641 throw logic_error(buffer.str());
2642 }
2643
2644 for(Index i = 0; i < columns_indices_size; i++)
2645 {
2646 if(columns_indices(i) >= columns_number)
2647 {
2648 ostringstream buffer;
2649
2650 buffer << "OpenNN Exception: Matrix template.\n"
2651 << "Tensor<type, 1> median(const Tensor<Index, 1>&, const Tensor<Index, 1>&) const method.\n"
2652 << "Column index " << i << " must be less than columns number.\n";
2653
2654 throw logic_error(buffer.str());
2655 }
2656 }
2657
2658#endif
2659
2660 Index column_index;
2661
2662 // median
2663
2664 Tensor<type, 1> median(columns_indices_size);
2665
2666 for(Index j = 0; j < columns_indices_size; j++)
2667 {
2668 column_index = columns_indices(j);
2669
2670 Tensor<type, 1> sorted_column(row_indices_size);
2671
2672 for(Index k = 0; k < row_indices_size; k++)
2673 {
2674 const Index row_index = row_indices(k);
2675
2676 sorted_column(k) = matrix(row_index, column_index);
2677 }
2678
2679 sort(sorted_column.data(), sorted_column.data() + sorted_column.size(), less<type>());
2680
2681 if(row_indices_size % 2 == 0)
2682 {
2683 median(j) = (sorted_column[row_indices_size*2/4] + sorted_column[row_indices_size*2/4 + 1])/ type(2);
2684 }
2685 else
2686 {
2687 median(j) = sorted_column[row_indices_size * 2 / 4];
2688 }
2689 }
2690
2691 return median;
2692}
2693
2694
2698
2699type normal_distribution_distance(const Tensor<type, 1>& vector)
2700{
2701 type normal_distribution_distance = type(0);
2702
2703 const Index n = vector.dimension(0);
2704
2705 const type mean_value = mean(vector);
2706 const type standard_deviation = OpenNN::standard_deviation(vector);
2707
2708 type normal_distribution; // Normal distribution
2709 type empirical_distribution; // Empirical distribution
2710
2711 Tensor<type, 1> sorted_vector(vector);
2712
2713 sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
2714
2715 Index counter = 0;
2716
2717 for(Index i = 0; i < n; i++)
2718 {
2719 normal_distribution = static_cast<type>(0.5) * static_cast<type>(erfc(double(mean_value - sorted_vector(i))))/(standard_deviation*static_cast<type>(sqrt(2.0)));
2720 counter = 0;
2721
2722 for(Index j = 0; j < n; j++)
2723 {
2724 if(sorted_vector(j) <= sorted_vector(i))
2725 {
2726 counter++;
2727 }
2728 else
2729 {
2730 break;
2731 }
2732 }
2733
2734 empirical_distribution = static_cast<type>(counter)/static_cast<type>(n);
2735
2736 normal_distribution_distance += abs(normal_distribution - empirical_distribution);
2737 }
2738
2739 return normal_distribution_distance;
2740}
2741
2742
2746
2747type half_normal_distribution_distance(const Tensor<type, 1>& vector)
2748{
2749 type half_normal_distribution_distance = type(0);
2750
2751 const Index n = vector.dimension(0);
2752
2753 const type standard_deviation = OpenNN::standard_deviation(vector);
2754
2755 type half_normal_distribution;
2756 type empirical_distribution;
2757
2758 Tensor<type, 1> sorted_vector(vector);
2759
2760 sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
2761
2762 Index counter = 0;
2763
2764 for(Index i = 0; i < n; i++)
2765 {
2766 half_normal_distribution = type(erf(double(sorted_vector(i))))/(standard_deviation * static_cast<type>(sqrt(2)));
2767
2768 counter = 0;
2769
2770 for(Index j = 0; j < n; j++)
2771 {
2772 if(sorted_vector(j) <= sorted_vector(i))
2773 {
2774 counter++;
2775 }
2776 else
2777 {
2778 break;
2779 }
2780 }
2781
2782 empirical_distribution = static_cast<type>(counter)/static_cast<type>(n);
2783
2784 half_normal_distribution_distance += abs(half_normal_distribution - empirical_distribution);
2785 }
2786
2787 return half_normal_distribution_distance;
2788}
2789
2790
2794
2795type uniform_distribution_distance(const Tensor<type, 1>& vector)
2796{
2797 type uniform_distribution_distance = type(0);
2798
2799 const Index n = vector.dimension(0);
2800
2801 type uniform_distribution; // Uniform distribution
2802 type empirical_distribution; // Empirical distribution
2803
2804 Tensor<type, 1> sorted_vector(vector);
2805
2806 sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
2807
2808 const type minimum = sorted_vector[0];
2809 const type maximum = sorted_vector[n-1];
2810
2811 Index counter = 0;
2812
2813 for(Index i = 0; i < n; i++)
2814 {
2815 uniform_distribution = (sorted_vector(i)-minimum)/(maximum - minimum);
2816 counter = 0;
2817
2818 for(Index j = 0; j < n; j++)
2819 {
2820 if(sorted_vector(j) <= sorted_vector(i))
2821 {
2822 counter++;
2823 }
2824 else
2825 {
2826 break;
2827 }
2828 }
2829
2830 empirical_distribution = static_cast<type>(counter)/static_cast<type>(n);
2831
2832 uniform_distribution_distance += abs(uniform_distribution - empirical_distribution);
2833 }
2834
2835 return uniform_distribution_distance;
2836}
2837
2838
2840
2841type normality_parameter(const Tensor<type, 1>& vector)
2842{
2843 const type max = maximum(vector);
2844 const type min = minimum(vector);
2845
2846 const Index n = vector.dimension(0);
2847
2848 const type mean_value = mean(vector);
2849 const type standard_deviation = OpenNN::standard_deviation(vector);
2850
2851 type normal_distribution;
2852 type empirical_distribution;
2853 type previous_normal_distribution = type(0);
2854 type previous_empirical_distribution = type(0);
2855
2856 Tensor<type, 1> sorted_vector(vector);
2857
2858 sort(sorted_vector.data(), sorted_vector.data() + sorted_vector.size(), less<type>());
2859
2860 type empirical_area = type(0);
2861 type normal_area = type(0);
2862
2863 Index counter = 0;
2864
2865 for(Index i = 0; i < n; i++)
2866 {
2867 normal_distribution = static_cast<type>(0.5) * static_cast<type>(erfc(double(mean_value - sorted_vector(i))))/(standard_deviation*static_cast<type>(sqrt(2.0)));
2868 counter = 0;
2869
2870 for(Index j = 0; j < n; j++)
2871 {
2872 if(sorted_vector(j) <= sorted_vector(i))
2873 {
2874 counter++;
2875 }
2876 else
2877 {
2878 break;
2879 }
2880 }
2881
2882 empirical_distribution = static_cast<type>(counter)/static_cast<type>(n);
2883
2884 if(i == 0)
2885 {
2886 previous_normal_distribution = normal_distribution;
2887 previous_empirical_distribution = empirical_distribution;
2888 }
2889 else
2890 {
2891 normal_area += static_cast<type>(0.5)*(sorted_vector(i)-sorted_vector[i-1])*(normal_distribution+previous_normal_distribution);
2892 empirical_area += static_cast<type>(0.5)*(sorted_vector(i)-sorted_vector[i-1])*(empirical_distribution+previous_empirical_distribution);
2893
2894 previous_normal_distribution = normal_distribution;
2895 previous_empirical_distribution = empirical_distribution;
2896 }
2897 }
2898
2899 const type uniform_area = (max - min)/static_cast<type>(2.0);
2900
2901 return uniform_area;
2902}
2903
2904
2905Tensor<type, 1> variation_percentage(const Tensor<type, 1>& vector)
2906{
2907 const Index size = vector.dimension(0);
2908
2909 Tensor<type, 1> new_vector(size);
2910
2911 for(Index i = 1; i < size; i++)
2912 {
2913 if(abs(vector[i-1]) < type(NUMERIC_LIMITS_MIN))
2914 {
2915 new_vector(i) = (vector(i) - vector[i-1])*static_cast<type>(100.0)/vector[i-1];
2916 }
2917 }
2918
2919 return new_vector;
2920}
2921
2922
2924
2925Index minimal_index(const Tensor<type, 1>& vector)
2926{
2927 const Index size = vector.dimension(0);
2928
2929 if(size == 0) return Index();
2930
2931 Index minimal_index = 0;
2932 type minimum = vector[0];
2933
2934 for(Index i = 1; i < size; i++)
2935 {
2936 if(vector(i) < minimum)
2937 {
2938 minimal_index = i;
2939 minimum = vector(i);
2940 }
2941 }
2942
2943 return minimal_index;
2944}
2945
2946
2948
2949Index maximal_index(const Tensor<type, 1>& vector)
2950{
2951 const Index size = vector.dimension(0);
2952
2953 if(size == 0) return Index();
2954
2955 Index maximal_index = 0;
2956 type maximum = vector[0];
2957
2958 for(Index i = 1; i < size; i++)
2959 {
2960 if(vector(i) > maximum)
2961 {
2962 maximal_index = i;
2963 maximum = vector(i);
2964 }
2965 }
2966
2967 return maximal_index;
2968}
2969
2970
2973
2974Tensor<Index, 1> minimal_indices(const Tensor<type, 1>& vector, const Index& number)
2975{
2976 Tensor<type, 1> vector_ = vector;
2977
2978 const Index size = vector.dimension(0);
2979 Tensor<Index, 1> minimal_indices(number);
2980 Eigen::Tensor<type, 0> maxim = vector.maximum();
2981
2982#ifdef OPENNN_DEBUG
2983
2984if(number > size)
2985{
2986 ostringstream buffer;
2987
2988 buffer << "OpenNN Exception: Statistics class.\n"
2989 << "Tensor<Index, 1> minimal_indices(Tensor<type, 1>& , const Index& ) \n"
2990 << "Number of minimal indices to be computed must be lower (or equal) than the size of the imput vector.\n";
2991
2992 throw logic_error(buffer.str());
2993}
2994#endif
2995
2996 for(Index j = 0; j < number; j++)
2997 {
2998 Index minimal_index = 0;
2999 type minimum = vector_(0);
3000
3001 for(Index i = 0; i < size; i++)
3002 {
3003 if(vector_(i) < minimum)
3004 {
3005 minimal_index = i;
3006 minimum = vector_(i);
3007 }
3008 }
3009
3010 vector_(minimal_index) = maxim(0) + type(1);
3011 minimal_indices(j) = minimal_index;
3012 }
3013 return minimal_indices;
3014}
3015
3016
3020
3021Tensor<Index, 1> maximal_indices(const Tensor<type, 1>& vector, const Index& number)
3022{
3023 Tensor<type, 1> vector_ = vector;
3024
3025 const Index size = vector.dimension(0);
3026 Tensor<Index, 1> maximal_indices(number);
3027 const Eigen::Tensor<type, 0> minim = vector.minimum();
3028
3029#ifdef OPENNN_DEBUG
3030
3031if(number > size)
3032{
3033 ostringstream buffer;
3034
3035 buffer << "OpenNN Exception: Statistics class.\n"
3036 << "Tensor<Index, 1> maximal_indices(Tensor<type, 1>& , const Index& ) \n"
3037 << "Number of maximal indices to be computed must be lower (or equal) than the size of the imput vector.\n";
3038
3039 throw logic_error(buffer.str());
3040}
3041#endif
3042
3043 for(Index j = 0; j < number; j++)
3044 {
3045 Index maximal_index = 0;
3046 type maximal = vector_(0);
3047
3048 for(Index i = 0; i < size; i++)
3049 {
3050 if(vector_(i) > maximal)
3051 {
3052 maximal_index = i;
3053 maximal = vector_(i);
3054 }
3055 }
3056
3057 vector_(maximal_index) = minim(0) - type(1);
3058 maximal_indices(j) = maximal_index;
3059 }
3060
3061 return maximal_indices;
3062}
3063
3064
3066
3067Tensor<Index, 1> minimal_indices(const Tensor<type, 2>& matrix)
3068{
3069 const Index rows_number = matrix.dimension(0);
3070 const Index columns_number = matrix.dimension(1);
3071
3072 type minimum = matrix(0,0);
3073
3074 Tensor<Index, 1> minimal_indices(2);
3075 minimal_indices.setZero();
3076
3077 for(Index i = 0; i < rows_number; i++)
3078 {
3079 for(Index j = 0; j < columns_number; j++)
3080 {
3081 if(!isnan(matrix(i,j)) && matrix(i,j) < minimum)
3082 {
3083 minimum = matrix(i,j);
3084 minimal_indices(0) = i;
3085 minimal_indices(1) = j;
3086 }
3087 }
3088 }
3089
3090 return minimal_indices;
3091}
3092
3093
3095
3096Tensor<Index, 1> maximal_indices(const Tensor<type, 2>& matrix)
3097{
3098 const Index rows_number = matrix.dimension(0);
3099 const Index columns_number = matrix.dimension(1);
3100
3101 type maximum = matrix(0,0);
3102
3103 Tensor<Index, 1> maximal_indices(2);
3104 maximal_indices.setZero();
3105
3106 for(Index i = 0; i < rows_number; i++)
3107 {
3108 for(Index j = 0; j < columns_number; j++)
3109 {
3110 if(!isnan(matrix(i,j)) && matrix(i,j) > maximum)
3111 {
3112 maximum = matrix(i,j);
3113 maximal_indices(0) = i;
3114 maximal_indices(1) = j;
3115 }
3116 }
3117 }
3118
3119 return maximal_indices;
3120}
3121
3122
3125
3126Tensor<Index, 2> maximal_columns_indices(const Tensor<type, 2>& matrix, const Index& maximum_number)
3127{
3128 const Index rows_number = matrix.dimension(0);
3129 const Index columns_number = matrix.dimension(1);
3130
3131 Tensor<Index, 2> maximal_columns_indices(maximum_number, columns_number);
3132
3133 Tensor<type, 1> columns_minimums = OpenNN::columns_minimums(matrix);
3134
3135 for(Index j = 0; j < columns_number; j++)
3136 {
3137 Tensor<type, 1> column = matrix.chip(j,1);
3138
3139 for(Index i = 0; i < maximum_number; i++)
3140 {
3141 Index maximal_index = 0;
3142 type maximal = column(0);
3143
3144 for(Index k = 0; k < rows_number; k++)
3145 {
3146 if(column(k) > maximal && !isnan(column(k)))
3147 {
3148 maximal_index = k;
3149 maximal = column(k);
3150 }
3151 }
3152
3153 column(maximal_index) = columns_minimums(j)-static_cast<type>(1);
3154 maximal_columns_indices(i,j) = maximal_index;
3155 }
3156 }
3157
3158 return maximal_columns_indices;
3159}
3160
3161
3163
3164Tensor<type, 1> percentiles(const Tensor<type, 1>& vector)
3165{
3166 const Index size = vector.dimension(0);
3167
3168#ifdef OPENNN_DEBUG
3169
3170 if(size < 10)
3171 {
3172 ostringstream buffer;
3173
3174 buffer << "OpenNN Exception: Statistics.\n"
3175 << "Tensor<type, 1> percentiles(const Tensor<type, 1>& vector) method.\n"
3176 << "Size must be greater than 10.\n";
3177
3178 throw logic_error(buffer.str());
3179 }
3180
3181#endif
3182
3183 Index new_size = 0;
3184
3185 for(Index i = 0; i < size; i++)
3186 {
3187 if(!isnan(vector(i)))
3188 {
3189 new_size++;
3190 }
3191 }
3192
3193 if(new_size == 0)
3194 {
3195 Tensor<type, 1> nan(1);
3196 nan.setValues({static_cast<type>(NAN)});
3197 return nan;
3198 }
3199
3200 Index index = 0;
3201
3202 Tensor<type, 1> new_vector(new_size);
3203
3204 for(Index i = 0; i < size; i++)
3205 {
3206 if(!isnan(vector(i)))
3207 {
3208 new_vector(index) = vector(i);
3209 index++;
3210 }
3211 }
3212
3213 Tensor<type, 1> sorted_vector(new_vector);
3214
3215 sort(sorted_vector.data(), sorted_vector.data() + new_size, less<type>());
3216
3217
3219
3220 Tensor<type, 1> percentiles(10);
3221
3222 for(Index i = 0; i < 9; i++)
3223 {
3224 if(new_size * (i + 1) % 10 == 0)
3225 percentiles[i] = (sorted_vector[new_size * (i + 1) / 10 - 1] + sorted_vector[new_size * (i + 1) / 10]) / static_cast<type>(2.0);
3226
3227 else
3228 percentiles[i] = static_cast<type>(sorted_vector[new_size * (i + 1) / 10]);
3229 }
3230 percentiles[9] = maximum(new_vector);
3231
3232 return percentiles;
3233}
3234
3235
3238
3239Index count_nan(const Tensor<type, 1>& vector)
3240{
3241 Index nan_number = 0;
3242
3243 for(Index i = 0; i < vector.dimension(0); i++)
3244 {
3245 if(isnan(vector(i))) nan_number++;
3246 }
3247
3248 return nan_number;
3249}
3250
3251}
3252
3253
3254// OpenNN: Open Neural Networks Library.
3255// Copyright(C) 2005-2021 Artificial Intelligence Techniques, SL.
3256//
3257// This library is free software; you can redistribute it and/or
3258// modify it under the terms of the GNU Lesser General Public
3259// License as published by the Free Software Foundation; either
3260// version 2.1 of the License, or any later version.
3261//
3262// This library is distributed in the hope that it will be useful,
3263// but WITHOUT ANY WARRANTY; without even the implied warranty of
3264// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
3265// Lesser General Public License for more details.
3266
3267// You should have received a copy of the GNU Lesser General Public
3268// License along with this library; if not, write to the Free Software
3269// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
uint32 sqrt(uint32 &r, int &exp)
Definition: half.hpp:1480
unsigned int erf(unsigned int arg)
Definition: half.hpp:1918
HALF_CONSTEXPR half abs(half arg)
Definition: half.hpp:2735
half erfc(half arg)
Definition: half.hpp:3988
HALF_CONSTEXPR bool isnan(half arg)
Definition: half.hpp:4385
Extensions to the C++ standard library.
Definition: half.hpp:2325
bool has_minimum_minus_one_maximum_one()
Definition: statistics.cpp:111
Tensor< type, 1 > to_vector() const
Definition: statistics.cpp:96
Descriptives()
Default constructor.
Definition: statistics.cpp:16
bool has_mean_zero_standard_deviation_one()
Definition: statistics.cpp:125
virtual ~Descriptives()
Destructor.
Definition: statistics.cpp:41
void print(const string &="Descriptives:") const
Print the tittle of descriptives structure.
Definition: statistics.cpp:140
type minimum
Smallest value of a set, function, etc.
Definition: statistics.h:82
string name
Name of variable.
Definition: statistics.h:78
void set_maximum(const type &)
Definition: statistics.cpp:67
void set_standard_deviation(const type &)
Definition: statistics.cpp:85
type standard_deviation
Standard deviation value of a set, function, etc.
Definition: statistics.h:94
void set_mean(const type &)
Definition: statistics.cpp:76
type mean
Mean value of a set, function, etc.
Definition: statistics.h:90
void set_minimum(const type &)
Definition: statistics.cpp:58
type maximum
Biggest value of a set, function, etc.
Definition: statistics.h:86
void save(const string &file_name) const
Definition: statistics.cpp:181
virtual ~Histogram()
Destructor.
Definition: statistics.cpp:214
Index calculate_most_populated_bin() const
Retuns the index of the most populated bin.
Definition: statistics.cpp:365
Index count_empty_bins() const
Returns the number of bins with zero variates.
Definition: statistics.cpp:332
Tensor< type, 1 > minimums
Minimum positions of the bins in the histogram.
Definition: statistics.h:202
Index calculate_minimum_frequency() const
Returns the number of variates in the less populated bin.
Definition: statistics.cpp:349
Index calculate_frequency(const type &) const
Definition: statistics.cpp:507
Tensor< type, 1 > calculate_minimal_centers() const
Returns a vector with the centers of the less populated bins.
Definition: statistics.cpp:380
Tensor< type, 1 > calculate_maximal_centers() const
Returns a vector with the centers of the most populated bins.
Definition: statistics.cpp:421
Index calculate_bin(const type &) const
Definition: statistics.cpp:463
Index calculate_maximum_frequency() const
Returns the number of variates in the most populated bin.
Definition: statistics.cpp:357
Tensor< type, 1 > maximums
Maximum positions of the bins in the histogram.
Definition: statistics.h:206
Tensor< Index, 1 > frequencies
Population of the bins in the histogram.
Definition: statistics.h:210
Tensor< type, 1 > centers
Positions of the bins in the histogram.
Definition: statistics.h:198
Histogram()
Default constructor.
Definition: statistics.cpp:209
Index get_bins_number() const
Returns the number of bins in the histogram.
Definition: statistics.cpp:324