20 Descriptives(
const float = NAN,
float = NAN,
float = NAN,
float = NAN);
26 void set(
const float = NAN,
float = NAN,
float = NAN,
float = NAN);
29 void save(
const filesystem::path&)
const;
32 void print(
const string& =
"Descriptives:")
const;
34 string name =
"Descriptives";
57 void set(
const float = NAN,
120 void save(
const filesystem::path&)
const;
190[[nodiscard]] vector<Descriptives>
descriptives(
const MatrixR&,
const vector<Index>&,
const vector<Index>&);
218[[nodiscard]]
inline bool row_finite(
const VectorR& values, Index i) {
return isfinite(values(i)); }
220[[nodiscard]]
inline bool row_finite(
const MatrixR& matrix, Index i) {
return matrix.row(i).array().isFinite().all(); }
225 VectorR result(indices.size());
226 for (Index i = 0; i < Index(indices.size()); ++i) result(i) = values(indices[i]);
233 MatrixR result(indices.size(), matrix.cols());
234 for (Index i = 0; i < Index(indices.size()); ++i) result.row(i) = matrix.row(indices[i]);
242template<
typename X,
typename Y>
245 if (x.rows() != y.rows())
246 throw runtime_error(
"filter_missing_values: row count mismatch");
249 valid.reserve(x.rows());
251 for (Index i = 0; i < x.rows(); ++i)
261 return ranges::adjacent_find(indices,
262 [](Index a, Index b) {
return b != a + 1; }) == indices.end();
269 return all_of(tensor.data(), tensor.data() + tensor.size(),
270 [](
float value) { return value == 0.0f || value == 1.0f || isnan(value); });
278[[nodiscard]] vector<T>
gather_by_index(
const vector<T>& data,
const vector<Index>& indices)
281 result.reserve(indices.size());
283 ranges::transform(indices, back_inserter(result),
284 [&data](Index i) {
return data[i]; });
296 const float* data = tensor.data();
297 const float* end = data + tensor.size();
299 const float* first = find_if(data, end, [](
float value) {
return !isnan(value); });
304 const float reference_value = *first;
306 return all_of(first + 1, end,
307 [reference_value](
float value) {
return isnan(value) || abs(reference_value - value) <= numeric_limits<float>::min(); });
313 vector<Index> indices;
314 indices.reserve(flags.size());
316 for (Index i = 0; i < flags.size(); ++i)
318 indices.push_back(i);
Definition adaptive_moment_estimation.h:14
bool is_constant(const T &tensor)
Returns true if every non-NaN entry of the tensor equals the first finite entry.
Definition statistics.h:294
bool is_binary(const T &tensor)
Returns true if every non-NaN entry of the tensor is exactly 0.0 or 1.0.
Definition statistics.h:267
VectorMap vector_map(const MatrixR &, Index)
Returns an Eigen VectorMap that views a single column of a matrix without copying.
Index minimal_index(const VectorR &)
Index of the smallest element in a vector.
float mean(const VectorR &)
Arithmetic mean of a vector, ignoring NaNs.
VectorR filter_missing_values(const VectorR &)
Returns a copy of the vector with NaN entries removed.
BoxPlot box_plot(const VectorR &)
Five-number summary (box plot) of a vector.
Index maximal_index(const VectorR &)
Index of the largest element in a vector.
float variance(const VectorR &)
Sample variance of a vector.
void fill_tensor_data(const MatrixR &, const vector< Index > &, const vector< Index > &, float *, bool=true, int contiguous=-1)
Copies the selected sub-matrix into a flat float buffer.
Histogram histogram(const VectorR &, Index=10)
Builds an equal-width histogram of a vector.
float maximum(const MatrixR &)
Returns the largest finite element of a matrix.
vector< Descriptives > descriptives(const MatrixR &)
Returns the per-column descriptives of a matrix.
float median(const VectorR &)
Median of a vector.
float minimum(const MatrixR &)
Returns the smallest finite element of a matrix.
VectorI maximal_indices(const VectorR &, Index)
Indices of the n largest elements of a vector.
Descriptives vector_descriptives(const VectorR &)
Returns the (min, max, mean, std) descriptives of a vector.
float range(const VectorR &)
Returns the maximum minus the minimum of a vector.
VectorI minimal_indices(const VectorR &, Index)
Indices of the n smallest elements of a vector.
bool is_contiguous(const vector< Index > &indices)
Returns true if the sorted indices form a contiguous run (each entry equals the previous plus one).
Definition statistics.h:259
bool row_finite(const VectorR &values, Index i)
Returns true if the i-th entry of the vector is finite.
Definition statistics.h:218
VectorR column_maximums(const Tensor2 &, const vector< Index > &={}, const vector< Index > &={})
Per-column maximums of a 2D tensor, optionally restricted to a row/column subset.
float standard_deviation(const VectorR &)
Sample standard deviation of a vector.
VectorI calculate_rank(const VectorR &, bool ascending=true)
Returns the rank of each element (1-based), ascending by default.
vector< Index > get_elements_greater_than(const vector< Index > &, Index)
Returns the entries of indices that are strictly greater than the given threshold.
VectorI total_frequencies(const vector< Histogram > &)
Sums the per-bin frequencies across a collection of histograms.
Histogram histogram_centered(const VectorR &, float=0.0f, Index=10)
Builds a histogram with one bin centered on the given value.
VectorR slice_rows(const VectorR &values, const vector< Index > &indices)
Returns a copy of the vector containing only the entries at the given indices.
Definition statistics.h:223
vector< Index > get_true_indices(const VectorB &flags)
Returns the positions of the true entries in a boolean vector.
Definition statistics.h:311
VectorI get_nearest_points(const MatrixR &, const VectorR &, int=1)
Finds the n rows of the matrix closest to the given point by Euclidean distance.
MatrixR append_rows(const MatrixR &, const MatrixR &)
Returns the row-wise concatenation of two matrices with matching column counts.
vector< Histogram > histograms(const MatrixR &, Index=10)
Builds one histogram per matrix column.
vector< T > gather_by_index(const vector< T > &data, const vector< Index > &indices)
Returns the elements of data at the given indices.
Definition statistics.h:278
vector< Index > build_feasible_rows_mask(const MatrixR &outputs, const VectorR &minimums, const VectorR &maximums)
Returns the indices of the rows of outputs that lie within the per-column bounds.
VectorR perform_Householder_QR_decomposition(const MatrixR &, const VectorR &)
Solves a linear least-squares problem via Householder QR decomposition.
VectorR quartiles(const VectorR &)
Returns [Q1, Q2, Q3] of a vector.
VectorR column_minimums(const Tensor2 &, const vector< Index > &={}, const vector< Index > &={})
Per-column minimums of a 2D tensor.
Map< VectorR, AlignedMax > VectorMap
Definition pch.h:185
Matrix< float, Dynamic, 1 > VectorR
Definition pch.h:181
Matrix< bool, Dynamic, 1 > VectorB
Definition pch.h:183
Tensor< float, 2, Layout|AlignedMax > Tensor2
Definition pch.h:189
Matrix< float, Dynamic, Dynamic, Layout > MatrixR
Definition pch.h:177
Matrix< Index, Dynamic, 1 > VectorI
Definition pch.h:182
Five-number summary (minimum, Q1, median, Q3, maximum) used to draw a box plot.
Definition statistics.h:48
void set(const float=NAN, float=NAN, float=NAN, float=NAN, float=NAN)
Sets the five statistics in place.
float first_quartile
Definition statistics.h:65
float median
Definition statistics.h:67
float third_quartile
Definition statistics.h:69
float minimum
Definition statistics.h:63
float maximum
Definition statistics.h:71
BoxPlot(const float=NAN, float=NAN, float=NAN, float=NAN, float=NAN)
Constructs a box plot from minimum, first quartile, median, third quartile, and maximum.
Summary statistics (minimum, maximum, mean, standard deviation) for one variable.
Definition statistics.h:18
Descriptives(const float=NAN, float=NAN, float=NAN, float=NAN)
Constructs a descriptives record from minimum, maximum, mean, and standard deviation.
void set(const float=NAN, float=NAN, float=NAN, float=NAN)
Sets the four statistics in place.
string name
Definition statistics.h:34
void save(const filesystem::path &) const
Saves the descriptives to a text file at the given path.
float minimum
Definition statistics.h:36
float mean
Definition statistics.h:40
VectorR to_tensor() const
Returns the four statistics as a length-4 vector [min, max, mean, std].
float standard_deviation
Definition statistics.h:42
void print(const string &="Descriptives:") const
Prints the descriptives to stdout under the given header.
float maximum
Definition statistics.h:38
Frequency histogram with per-bin minimums, maximums, centers, and counts.
Definition statistics.h:76
Index calculate_frequency(const float) const
Returns the frequency of the bin that contains the given value.
Index get_bins_number() const
Returns the number of bins in the histogram.
VectorR frequencies
Definition statistics.h:128
VectorR calculate_maximal_centers() const
Returns the centers of all bins tied for the maximum frequency.
VectorR calculate_minimal_centers() const
Returns the centers of all bins tied for the minimum frequency.
Index calculate_most_populated_bin() const
Returns the index of the bin with the largest frequency.
Histogram(const VectorR &, const VectorR &)
Constructs a histogram from precomputed bin centers and frequencies.
VectorR maximums
Definition statistics.h:124
void save(const filesystem::path &) const
Saves the histogram (centers and frequencies) to a text file.
Histogram(const VectorR &)
Builds a histogram of the data using a default bin count.
Index calculate_minimum_frequency() const
Returns the smallest bin frequency.
Histogram(const VectorR &, Index)
Builds a histogram of the data with the given number of equal-width bins.
Histogram(const VectorR &, const VectorR &, const VectorR &, const VectorR &)
Constructs a histogram from bin minimums, maximums, centers, and frequencies.
Index count_empty_bins() const
Returns the number of bins with zero frequency.
Index calculate_maximum_frequency() const
Returns the largest bin frequency.
VectorR minimums
Definition statistics.h:122
VectorR centers
Definition statistics.h:126
Index calculate_bin(const float) const
Returns the bin index that contains the given value.
Histogram(const Index=0)
Constructs an empty histogram with the given number of bins.