44 void save(
const filesystem::path&)
const;
288 string test_transformer(
const vector<string>& context_string,
bool imported_vocabulary)
const;
297 void save(
const filesystem::path&)
const;
300 void load(
const filesystem::path&);
304 pair<MatrixR, MatrixR> get_targets_and_outputs(
const string&)
const;
306 vector<Index> filter_classification_samples(
const MatrixR&,
const MatrixR&,
const vector<Index>&,
float,
307 bool target_positive,
bool output_positive)
const;
311 Tensor<string, 2> classify_samples(
const MatrixR&,
const MatrixR&,
const vector<string>&,
bool match)
const;
313 static VectorR extract_probabilities(
const Tensor<string, 2>&);
315 void save_classified_samples_csv(
const Tensor<string, 2>&,
const filesystem::path&)
const;
316 void save_classified_samples_statistics_csv(
const Tensor<string, 2>&,
const filesystem::path&)
const;
317 void save_classified_samples_probability_histogram(
const Tensor<string, 2>&,
const filesystem::path&)
const;
323 Index batch_size = 0;
Abstract base class for OpenNN datasets, owning samples, variables, and metadata.
Definition dataset.h:61
Container of layers forming a feed-forward neural network, with parameter storage and I/O.
Definition neural_network.h:20
vector< Descriptives > calculate_percentage_errors_descriptives(const MatrixR &, const MatrixR &) const
Computes descriptive statistics of the percentage errors between the supplied targets and outputs.
MatrixR calculate_multiple_classification_tests() const
Computes the standard multi-class classification metrics.
vector< Histogram > calculate_error_data_histograms(const Index=10) const
Builds histograms of the per-variable error data on the testing samples.
VectorI calculate_positives_negatives_rate(const MatrixR &, const MatrixR &) const
Counts positives and negatives in targets and outputs (returns TP, FP, FN, TN).
MatrixR calculate_cumulative_gain(const MatrixR &, const MatrixR &) const
Computes the positive cumulative gain curve from the supplied targets and outputs.
TestingAnalysis(NeuralNetwork *=nullptr, Dataset *=nullptr)
Constructs the analyser bound to an optional neural network and dataset.
float calculate_determination(const VectorR &, const VectorR &) const
Computes the coefficient of determination R^2 between the supplied target and output series.
Tensor< string, 2 > calculate_well_classified_samples(const MatrixR &, const MatrixR &, const vector< string > &) const
Returns the well-classified samples annotated with their target and output labels.
vector< Index > calculate_false_positive_samples(const MatrixR &, const MatrixR &, const vector< Index > &, float) const
Returns the indices of false positive samples given targets, outputs and a candidate index list.
Tensor3 calculate_error_data() const
Computes the per-sample, per-variable error tensor on the testing samples.
void save_well_classified_samples_statistics(const MatrixR &, const MatrixR &, const vector< string > &, const filesystem::path &) const
Saves per-class statistics of the well-classified samples to disk.
void print_linear_correlations() const
Prints the linear correlations between targets and outputs to stdout.
void check() const
Verifies that the neural network and dataset are consistent for testing analysis.
BinaryClassificationRates calculate_binary_classification_rates(const float=0.50) const
Returns sample indices in the four cells of the binary confusion matrix for the given threshold.
pair< float, float > test_transformer() const
Computes loss and accuracy of a transformer model on the testing samples.
Tensor< VectorI, 2 > calculate_multiple_classification_rates() const
Returns the per-cell sample indices of the multi-class confusion matrix for the testing samples.
string test_transformer(const vector< string > &context_string, bool imported_vocabulary) const
Generates a transformer prediction string from a context.
Tensor< string, 2 > calculate_misclassified_samples(const MatrixR &, const MatrixR &, const vector< string > &) const
Returns the misclassified samples annotated with their target and output labels.
vector< VectorR > calculate_error_autocorrelation(const Index=10) const
Computes the autocorrelation of the residual errors up to the given lag.
void print_error_data_descriptives() const
Prints the error-data descriptive statistics to stdout.
void from_JSON(const JsonDocument &)
Loads the testing analysis configuration from a JSON document.
const Dataset * get_dataset() const
Definition testing_analysis.h:87
void set_neural_network(NeuralNetwork *new_neural_network)
Definition testing_analysis.h:88
void print_binary_classification_tests() const
Prints binary classification metrics to stdout.
float calculate_optimal_threshold(const MatrixR &) const
Computes the decision threshold that maximizes the ROC criterion.
float calculate_area_under_curve(const MatrixR &) const
Computes the area under the supplied ROC curve.
void set_batch_size(Index new_batch_size)
Definition testing_analysis.h:90
void print_goodness_of_fit_analysis() const
Prints the goodness-of-fit analysis to stdout.
VectorR calculate_maximum_gain(const MatrixR &, const MatrixR &) const
Computes the maximum gain between positive and negative cumulative gain curves.
void save_well_classified_samples(const MatrixR &, const MatrixR &, const vector< string > &, const filesystem::path &) const
Saves the well-classified samples annotated table to disk.
KolmogorovSmirnovResults perform_Kolmogorov_Smirnov_analysis() const
Performs a Kolmogorov-Smirnov analysis on the testing samples.
const NeuralNetwork * get_neural_network() const
Definition testing_analysis.h:86
vector< vector< Descriptives > > calculate_error_data_descriptives() const
Computes descriptive statistics of the per-sample error data on the testing samples.
MatrixR calculate_roc_curve(const MatrixR &, const MatrixR &) const
Computes the ROC curve from the supplied targets and outputs.
vector< Descriptives > calculate_percentage_errors_descriptives() const
Computes descriptive statistics of the percentage errors over the testing samples.
MatrixR calculate_lift_chart(const MatrixR &) const
Computes the lift chart from the supplied cumulative gain matrix.
void save(const filesystem::path &) const
Saves the testing analysis configuration to disk.
Tensor< VectorI, 2 > calculate_multiple_classification_rates(const MatrixR &, const MatrixR &, const vector< Index > &) const
Returns the per-cell sample indices of the multi-class confusion matrix for the supplied data.
Tensor< Correlation, 1 > linear_correlation(const MatrixR &, const MatrixR &) const
Computes the linear correlation between each target/output column pair.
vector< VectorR > calculate_inputs_errors_cross_correlation(const Index=10) const
Computes the cross-correlation between input variables and residual errors up to the given lag.
MatrixR calculate_errors() const
Computes the per-variable error metrics on the testing samples.
RocAnalysis perform_roc_analysis() const
Performs ROC analysis on the testing samples.
Tensor< VectorI, 1 > calculate_maximal_errors(const Index=10) const
Returns the indices of the samples with the largest errors per output variable.
Index get_batch_size() const
Definition testing_analysis.h:91
float calculate_area_under_curve_confidence_limit(const MatrixR &, const MatrixR &) const
Computes the confidence limit of the area under the ROC curve.
vector< Histogram > calculate_output_histogram(const MatrixR &, Index=10) const
Builds histograms of the supplied outputs for the given number of bins.
MatrixR calculate_multiple_classification_errors() const
Computes multi-class classification error metrics on the testing samples.
MatrixR calculate_error() const
Computes the overall error matrix between targets and outputs on the testing samples.
VectorR calculate_errors(const MatrixR &, const MatrixR &) const
Computes the error metrics for the supplied targets and outputs.
MatrixR calculate_binary_classification_errors() const
Computes binary classification error metrics on the testing samples.
VectorR calculate_multiple_classification_errors(const string &) const
Computes multi-class classification error metrics on the samples with the given role name.
void set_dataset(Dataset *new_dataset)
Definition testing_analysis.h:89
MatrixI calculate_confusion(const MatrixR &, const MatrixR &, float=0.50) const
Computes the confusion matrix from the supplied targets and outputs.
MatrixR perform_cumulative_gain_analysis() const
Performs a cumulative gain analysis on the testing samples.
MatrixI calculate_confusion(const float=0.50) const
Computes the confusion matrix on the testing samples for the given decision threshold.
vector< Index > calculate_false_negative_samples(const MatrixR &, const MatrixR &, const vector< Index > &, float) const
Returns the indices of false negative samples given targets, outputs and a candidate index list.
void save_misclassified_samples(const MatrixR &, const MatrixR &, const vector< string > &, const filesystem::path &) const
Saves the misclassified samples annotated table to disk.
MatrixR calculate_negative_cumulative_gain(const MatrixR &, const MatrixR &) const
Computes the negative cumulative gain curve from the supplied targets and outputs.
MatrixR calculate_percentage_error_data() const
Computes the per-sample percentage error matrix on the testing samples.
VectorR calculate_errors(const string &) const
Computes the error metrics for the sample subset with the given role name.
VectorR calculate_multiple_classification_precision() const
Computes the per-class precision for multi-class classification.
vector< Descriptives > calculate_absolute_errors_descriptives() const
Computes descriptive statistics of the absolute errors over the testing samples.
VectorR calculate_binary_classification_errors(const string &) const
Computes binary classification error metrics on the samples with the given role name.
Tensor< GoodnessOfFitAnalysis, 1 > perform_goodness_of_fit_analysis() const
Performs goodness-of-fit analysis for each output variable.
VectorR calculate_binary_classification_tests(const float=0.50) const
Computes the standard binary classification metrics for the given decision threshold.
vector< MatrixI > calculate_multilabel_confusion(const float) const
Computes per-label binary confusion matrices for multi-label classification.
vector< Index > calculate_true_positive_samples(const MatrixR &, const MatrixR &, const vector< Index > &, float) const
Returns the indices of true positive samples given targets, outputs and a candidate index list.
void save_multiple_classification_tests(const filesystem::path &) const
Saves the multi-class classification metrics of the testing samples to disk.
void load(const filesystem::path &)
Loads the testing analysis configuration from disk.
MatrixR perform_lift_chart_analysis() const
Performs a lift chart analysis on the testing samples.
vector< Descriptives > calculate_absolute_errors_descriptives(const MatrixR &, const MatrixR &) const
Computes descriptive statistics of the absolute errors between the supplied targets and outputs.
void to_JSON(JsonWriter &) const
Writes the testing analysis configuration to a JSON writer.
void save_confusion(const filesystem::path &) const
Saves the confusion matrix of the testing samples to disk.
vector< Index > calculate_true_negative_samples(const MatrixR &, const MatrixR &, const vector< Index > &, float) const
Returns the indices of true negative samples given targets, outputs and a candidate index list.
float calculate_masked_accuracy(const Tensor3 &, const MatrixR &) const
Computes accuracy when a masking matrix indicates which tokens to consider (e.g. for language models)...
void save_misclassified_samples_statistics(const MatrixR &, const MatrixR &, const vector< string > &, const filesystem::path &) const
Saves per-class statistics of the misclassified samples to disk.
Definition adaptive_moment_estimation.h:14
Tensor< float, 3, Layout|AlignedMax > Tensor3
Definition pch.h:190
Matrix< float, Dynamic, 1 > VectorR
Definition pch.h:181
Matrix< Index, Dynamic, Dynamic, Layout > MatrixI
Definition pch.h:178
Matrix< float, Dynamic, Dynamic, Layout > MatrixR
Definition pch.h:177
Matrix< Index, Dynamic, 1 > VectorI
Definition pch.h:182
Result of a correlation analysis: model parameters, fit quality, and the method/form used.
Definition correlations.h:18
Summary statistics (minimum, maximum, mean, standard deviation) for one variable.
Definition statistics.h:18
Frequency histogram with per-bin minimums, maximums, centers, and counts.
Definition statistics.h:76
Sample indices split into the four cells of a binary classification confusion matrix.
Definition testing_analysis.h:77
vector< Index > false_positives_indices
Definition testing_analysis.h:80
vector< Index > true_positives_indices
Definition testing_analysis.h:78
vector< Index > false_negatives_indices
Definition testing_analysis.h:82
vector< Index > true_negatives_indices
Definition testing_analysis.h:84
Coefficient of determination and the matching target/output series for a single output variable.
Definition testing_analysis.h:34
void print() const
Prints the analysis to stdout.
void set(const VectorR &, const VectorR &, float)
Stores the target and output series together with the determination coefficient.
VectorR outputs
Definition testing_analysis.h:38
float determination
Definition testing_analysis.h:35
void save(const filesystem::path &) const
Saves the analysis to disk.
VectorR targets
Definition testing_analysis.h:37
Results of a Kolmogorov-Smirnov analysis: cumulative gains and maximum gain.
Definition testing_analysis.h:67
MatrixR positive_cumulative_gain
Definition testing_analysis.h:68
MatrixR negative_cumulative_gain
Definition testing_analysis.h:70
VectorR maximum_gain
Definition testing_analysis.h:72
Results of a ROC analysis: ROC curve, area under it and optimal threshold.
Definition testing_analysis.h:52
void print() const
Prints the ROC analysis to stdout.
MatrixR roc_curve
Definition testing_analysis.h:53
float area_under_curve
Definition testing_analysis.h:55
float confidence_limit
Definition testing_analysis.h:57
float optimal_threshold
Definition testing_analysis.h:59