OpenNN
Open-source neural networks library
Loading...
Searching...
No Matches
statistics.h
Go to the documentation of this file.
1// OpenNN: Open Neural Networks Library
2// www.opennn.net
3//
4// S T A T I S T I C S H E A D E R
5//
6// Artificial Intelligence Techniques, SL
7// artelnics@artelnics.com
8
9#pragma once
10
11#include "pch.h"
12
13namespace opennn
14{
15
18{
20 Descriptives(const float = NAN, float = NAN, float = NAN, float = NAN);
21
23 [[nodiscard]] VectorR to_tensor() const;
24
26 void set(const float = NAN, float = NAN, float = NAN, float = NAN);
27
29 void save(const filesystem::path&) const;
30
32 void print(const string& = "Descriptives:") const;
33
34 string name = "Descriptives";
35
36 float minimum = -1.0f;
37
38 float maximum = 1.0f;
39
40 float mean = 0.0f;
41
42 float standard_deviation = 1.0f;
43
44};
45
47struct BoxPlot
48{
50 BoxPlot(const float = NAN,
51 float = NAN,
52 float = NAN,
53 float = NAN,
54 float = NAN);
55
57 void set(const float = NAN,
58 float = NAN,
59 float = NAN,
60 float = NAN,
61 float = NAN);
62
63 float minimum = NAN;
64
65 float first_quartile = NAN;
66
67 float median = NAN;
68
69 float third_quartile = NAN;
70
71 float maximum = NAN;
72};
73
76{
78 Histogram(const Index = 0);
79
81 Histogram(const VectorR&, const VectorR&);
82
84 Histogram(const VectorR&, const VectorR&, const VectorR&, const VectorR&);
85
87 Histogram(const VectorR&, Index);
88
91
93 [[nodiscard]] Index get_bins_number() const;
94
96 [[nodiscard]] Index count_empty_bins() const;
97
99 [[nodiscard]] Index calculate_minimum_frequency() const;
100
102 [[nodiscard]] Index calculate_maximum_frequency() const;
103
105 [[nodiscard]] Index calculate_most_populated_bin() const;
106
108 [[nodiscard]] VectorR calculate_minimal_centers() const;
109
111 [[nodiscard]] VectorR calculate_maximal_centers() const;
112
114 [[nodiscard]] Index calculate_bin(const float) const;
115
117 [[nodiscard]] Index calculate_frequency(const float) const;
118
120 void save(const filesystem::path&) const;
121
123
125
127
129};
130
131[[nodiscard]] float minimum(const MatrixR&);
133[[nodiscard]] float minimum(const VectorR&);
135[[nodiscard]] float minimum(const VectorR&, const vector<Index>&);
140[[nodiscard]] VectorR column_minimums(const Tensor2&, const vector<Index>& = {}, const vector<Index>& = {});
142[[nodiscard]] float maximum(const MatrixR&);
144[[nodiscard]] float maximum(const VectorR&);
146[[nodiscard]] float maximum(const VectorR&, const vector<Index>&);
148[[nodiscard]] VectorR column_maximums(const Tensor2&, const vector<Index>& = {}, const vector<Index>& = {});
150[[nodiscard]] float range(const VectorR&);
152[[nodiscard]] float mean(const VectorR&);
154[[nodiscard]] float mean(const MatrixR&, Index);
156[[nodiscard]] VectorR mean(const MatrixR&);
158[[nodiscard]] VectorR mean(const MatrixR&, const vector<Index>&, const vector<Index>&);
160[[nodiscard]] float median(const VectorR&);
162[[nodiscard]] float median(const MatrixR&, Index);
164[[nodiscard]] VectorR median(const MatrixR&);
166[[nodiscard]] VectorR median(const MatrixR&, const vector<Index>&);
168[[nodiscard]] VectorR median(const MatrixR&, const vector<Index>&, const vector<Index>&);
170[[nodiscard]] float variance(const VectorR&);
172[[nodiscard]] float variance(const VectorR&, const VectorI&);
174[[nodiscard]] float standard_deviation(const VectorR&);
176[[nodiscard]] VectorR standard_deviation(const VectorR&, Index);
178[[nodiscard]] VectorR quartiles(const VectorR&);
180[[nodiscard]] VectorR quartiles(const VectorR&, const vector<Index>&);
182[[nodiscard]] BoxPlot box_plot(const VectorR&);
184[[nodiscard]] BoxPlot box_plot(const VectorR&, const vector<Index>&);
188[[nodiscard]] vector<Descriptives> descriptives(const MatrixR&);
190[[nodiscard]] vector<Descriptives> descriptives(const MatrixR&, const vector<Index>&, const vector<Index>&);
194[[nodiscard]] Histogram histogram(const VectorR&, Index = 10);
196[[nodiscard]] Histogram histogram_centered(const VectorR&, float = 0.0f, Index = 10);
198[[nodiscard]] Histogram histogram(const VectorB&);
200[[nodiscard]] Histogram histogram(const VectorI&, Index = 10);
202[[nodiscard]] vector<Histogram> histograms(const MatrixR&, Index = 10);
204[[nodiscard]] VectorI total_frequencies(const vector<Histogram>&);
206[[nodiscard]] Index minimal_index(const VectorR&);
208[[nodiscard]] VectorI minimal_indices(const VectorR&, Index);
210[[nodiscard]] VectorI minimal_indices(const MatrixR&);
212[[nodiscard]] Index maximal_index(const VectorR&);
214[[nodiscard]] VectorI maximal_indices(const VectorR&, Index);
216[[nodiscard]] VectorI maximal_indices(const MatrixR&);
218[[nodiscard]] inline bool row_finite(const VectorR& values, Index i) { return isfinite(values(i)); }
220[[nodiscard]] inline bool row_finite(const MatrixR& matrix, Index i) { return matrix.row(i).array().isFinite().all(); }
221
223[[nodiscard]] inline VectorR slice_rows(const VectorR& values, const vector<Index>& indices)
224{
225 VectorR result(indices.size());
226 for (Index i = 0; i < Index(indices.size()); ++i) result(i) = values(indices[i]);
227 return result;
228}
229
231[[nodiscard]] inline MatrixR slice_rows(const MatrixR& matrix, const vector<Index>& indices)
232{
233 MatrixR result(indices.size(), matrix.cols());
234 for (Index i = 0; i < Index(indices.size()); ++i) result.row(i) = matrix.row(indices[i]);
235 return result;
236}
237
240
242template<typename X, typename Y>
243[[nodiscard]] pair<X, Y> filter_missing_values(const X& x, const Y& y)
244{
245 if (x.rows() != y.rows())
246 throw runtime_error("filter_missing_values: row count mismatch");
247
248 vector<Index> valid;
249 valid.reserve(x.rows());
250
251 for (Index i = 0; i < x.rows(); ++i)
252 if (row_finite(x, i) && row_finite(y, i))
253 valid.push_back(i);
254
255 return { slice_rows(x, valid), slice_rows(y, valid) };
256}
257
259[[nodiscard]] inline bool is_contiguous(const vector<Index>& indices)
260{
261 return ranges::adjacent_find(indices,
262 [](Index a, Index b) { return b != a + 1; }) == indices.end();
263}
264
266template <typename T>
267[[nodiscard]] inline bool is_binary(const T& tensor)
268{
269 return all_of(tensor.data(), tensor.data() + tensor.size(),
270 [](float value) { return value == 0.0f || value == 1.0f || isnan(value); });
271}
272
274[[nodiscard]] MatrixR append_rows(const MatrixR&, const MatrixR&);
275
277template<typename T>
278[[nodiscard]] vector<T> gather_by_index(const vector<T>& data, const vector<Index>& indices)
279{
280 vector<T> result;
281 result.reserve(indices.size());
282
283 ranges::transform(indices, back_inserter(result),
284 [&data](Index i) { return data[i]; });
285
286 return result;
287}
288
290[[nodiscard]] vector<Index> build_feasible_rows_mask(const MatrixR& outputs, const VectorR& minimums, const VectorR& maximums);
291
293template <typename T>
294[[nodiscard]] inline bool is_constant(const T& tensor)
295{
296 const float* data = tensor.data();
297 const float* end = data + tensor.size();
298
299 const float* first = find_if(data, end, [](float value) { return !isnan(value); });
300
301 if (first == end)
302 return true;
303
304 const float reference_value = *first;
305
306 return all_of(first + 1, end,
307 [reference_value](float value) { return isnan(value) || abs(reference_value - value) <= numeric_limits<float>::min(); });
308}
309
311[[nodiscard]] inline vector<Index> get_true_indices(const VectorB& flags)
312{
313 vector<Index> indices;
314 indices.reserve(flags.size());
315
316 for (Index i = 0; i < flags.size(); ++i)
317 if (flags(i))
318 indices.push_back(i);
319
320 return indices;
321}
322
324[[nodiscard]] VectorI calculate_rank(const VectorR&, bool ascending = true);
325
327[[nodiscard]] vector<Index> get_elements_greater_than(const vector<Index>&, Index);
328
330[[nodiscard]] VectorI get_nearest_points(const MatrixR&, const VectorR&, int = 1);
331
339void fill_tensor_data(const MatrixR&, const vector<Index>&, const vector<Index>&, float*, bool = true, int contiguous = -1);
340
343
345[[nodiscard]] VectorMap vector_map(const MatrixR&, Index);
346
347}
348
349// OpenNN: Open Neural Networks Library.
350// Copyright(C) 2005-2026 Artificial Intelligence Techniques, SL.
351// Licensed under the GNU Lesser General Public License v2.1 or later.
Definition adaptive_moment_estimation.h:14
bool is_constant(const T &tensor)
Returns true if every non-NaN entry of the tensor equals the first finite entry.
Definition statistics.h:294
bool is_binary(const T &tensor)
Returns true if every non-NaN entry of the tensor is exactly 0.0 or 1.0.
Definition statistics.h:267
VectorMap vector_map(const MatrixR &, Index)
Returns an Eigen VectorMap that views a single column of a matrix without copying.
Index minimal_index(const VectorR &)
Index of the smallest element in a vector.
float mean(const VectorR &)
Arithmetic mean of a vector, ignoring NaNs.
VectorR filter_missing_values(const VectorR &)
Returns a copy of the vector with NaN entries removed.
BoxPlot box_plot(const VectorR &)
Five-number summary (box plot) of a vector.
Index maximal_index(const VectorR &)
Index of the largest element in a vector.
float variance(const VectorR &)
Sample variance of a vector.
void fill_tensor_data(const MatrixR &, const vector< Index > &, const vector< Index > &, float *, bool=true, int contiguous=-1)
Copies the selected sub-matrix into a flat float buffer.
Histogram histogram(const VectorR &, Index=10)
Builds an equal-width histogram of a vector.
float maximum(const MatrixR &)
Returns the largest finite element of a matrix.
vector< Descriptives > descriptives(const MatrixR &)
Returns the per-column descriptives of a matrix.
float median(const VectorR &)
Median of a vector.
float minimum(const MatrixR &)
Returns the smallest finite element of a matrix.
VectorI maximal_indices(const VectorR &, Index)
Indices of the n largest elements of a vector.
Descriptives vector_descriptives(const VectorR &)
Returns the (min, max, mean, std) descriptives of a vector.
float range(const VectorR &)
Returns the maximum minus the minimum of a vector.
VectorI minimal_indices(const VectorR &, Index)
Indices of the n smallest elements of a vector.
bool is_contiguous(const vector< Index > &indices)
Returns true if the sorted indices form a contiguous run (each entry equals the previous plus one).
Definition statistics.h:259
bool row_finite(const VectorR &values, Index i)
Returns true if the i-th entry of the vector is finite.
Definition statistics.h:218
VectorR column_maximums(const Tensor2 &, const vector< Index > &={}, const vector< Index > &={})
Per-column maximums of a 2D tensor, optionally restricted to a row/column subset.
float standard_deviation(const VectorR &)
Sample standard deviation of a vector.
VectorI calculate_rank(const VectorR &, bool ascending=true)
Returns the rank of each element (1-based), ascending by default.
vector< Index > get_elements_greater_than(const vector< Index > &, Index)
Returns the entries of indices that are strictly greater than the given threshold.
VectorI total_frequencies(const vector< Histogram > &)
Sums the per-bin frequencies across a collection of histograms.
Histogram histogram_centered(const VectorR &, float=0.0f, Index=10)
Builds a histogram with one bin centered on the given value.
VectorR slice_rows(const VectorR &values, const vector< Index > &indices)
Returns a copy of the vector containing only the entries at the given indices.
Definition statistics.h:223
vector< Index > get_true_indices(const VectorB &flags)
Returns the positions of the true entries in a boolean vector.
Definition statistics.h:311
VectorI get_nearest_points(const MatrixR &, const VectorR &, int=1)
Finds the n rows of the matrix closest to the given point by Euclidean distance.
MatrixR append_rows(const MatrixR &, const MatrixR &)
Returns the row-wise concatenation of two matrices with matching column counts.
vector< Histogram > histograms(const MatrixR &, Index=10)
Builds one histogram per matrix column.
vector< T > gather_by_index(const vector< T > &data, const vector< Index > &indices)
Returns the elements of data at the given indices.
Definition statistics.h:278
vector< Index > build_feasible_rows_mask(const MatrixR &outputs, const VectorR &minimums, const VectorR &maximums)
Returns the indices of the rows of outputs that lie within the per-column bounds.
VectorR perform_Householder_QR_decomposition(const MatrixR &, const VectorR &)
Solves a linear least-squares problem via Householder QR decomposition.
VectorR quartiles(const VectorR &)
Returns [Q1, Q2, Q3] of a vector.
VectorR column_minimums(const Tensor2 &, const vector< Index > &={}, const vector< Index > &={})
Per-column minimums of a 2D tensor.
Map< VectorR, AlignedMax > VectorMap
Definition pch.h:185
Matrix< float, Dynamic, 1 > VectorR
Definition pch.h:181
Matrix< bool, Dynamic, 1 > VectorB
Definition pch.h:183
Tensor< float, 2, Layout|AlignedMax > Tensor2
Definition pch.h:189
Matrix< float, Dynamic, Dynamic, Layout > MatrixR
Definition pch.h:177
Matrix< Index, Dynamic, 1 > VectorI
Definition pch.h:182
Five-number summary (minimum, Q1, median, Q3, maximum) used to draw a box plot.
Definition statistics.h:48
void set(const float=NAN, float=NAN, float=NAN, float=NAN, float=NAN)
Sets the five statistics in place.
float first_quartile
Definition statistics.h:65
float median
Definition statistics.h:67
float third_quartile
Definition statistics.h:69
float minimum
Definition statistics.h:63
float maximum
Definition statistics.h:71
BoxPlot(const float=NAN, float=NAN, float=NAN, float=NAN, float=NAN)
Constructs a box plot from minimum, first quartile, median, third quartile, and maximum.
Summary statistics (minimum, maximum, mean, standard deviation) for one variable.
Definition statistics.h:18
Descriptives(const float=NAN, float=NAN, float=NAN, float=NAN)
Constructs a descriptives record from minimum, maximum, mean, and standard deviation.
void set(const float=NAN, float=NAN, float=NAN, float=NAN)
Sets the four statistics in place.
string name
Definition statistics.h:34
void save(const filesystem::path &) const
Saves the descriptives to a text file at the given path.
float minimum
Definition statistics.h:36
float mean
Definition statistics.h:40
VectorR to_tensor() const
Returns the four statistics as a length-4 vector [min, max, mean, std].
float standard_deviation
Definition statistics.h:42
void print(const string &="Descriptives:") const
Prints the descriptives to stdout under the given header.
float maximum
Definition statistics.h:38
Frequency histogram with per-bin minimums, maximums, centers, and counts.
Definition statistics.h:76
Index calculate_frequency(const float) const
Returns the frequency of the bin that contains the given value.
Index get_bins_number() const
Returns the number of bins in the histogram.
VectorR frequencies
Definition statistics.h:128
VectorR calculate_maximal_centers() const
Returns the centers of all bins tied for the maximum frequency.
VectorR calculate_minimal_centers() const
Returns the centers of all bins tied for the minimum frequency.
Index calculate_most_populated_bin() const
Returns the index of the bin with the largest frequency.
Histogram(const VectorR &, const VectorR &)
Constructs a histogram from precomputed bin centers and frequencies.
VectorR maximums
Definition statistics.h:124
void save(const filesystem::path &) const
Saves the histogram (centers and frequencies) to a text file.
Histogram(const VectorR &)
Builds a histogram of the data using a default bin count.
Index calculate_minimum_frequency() const
Returns the smallest bin frequency.
Histogram(const VectorR &, Index)
Builds a histogram of the data with the given number of equal-width bins.
Histogram(const VectorR &, const VectorR &, const VectorR &, const VectorR &)
Constructs a histogram from bin minimums, maximums, centers, and frequencies.
Index count_empty_bins() const
Returns the number of bins with zero frequency.
Index calculate_maximum_frequency() const
Returns the largest bin frequency.
VectorR minimums
Definition statistics.h:122
VectorR centers
Definition statistics.h:126
Index calculate_bin(const float) const
Returns the bin index that contains the given value.
Histogram(const Index=0)
Constructs an empty histogram with the given number of bins.