|
OpenNN
Open-source neural networks library
|
Abstract base class for OpenNN datasets, owning samples, variables, and metadata. More...
#include <dataset.h>
Public Types | |
| enum class | Codification { UTF8 , SHIFT_JIS } |
| Text encoding of the source data file. More... | |
| enum class | Separator { Space , Tab , Comma , Semicolon } |
| Field separator used when reading delimited text files. More... | |
Public Member Functions | |
| virtual | ~Dataset ()=default |
| virtual Index | get_samples_number () const |
| Returns the total number of samples (rows) in the data matrix. | |
| Index | get_samples_number (const string &) const |
| Returns the number of samples with the given role ("Training", "Validation", ...). | |
| Index | get_used_samples_number () const |
| Returns the number of samples whose role is not None. | |
| vector< Index > | get_sample_indices (const string &) const |
| Returns indices of samples with the given role name. | |
| vector< Index > | get_used_sample_indices () const |
| Returns indices of all samples whose role is not None. | |
| const vector< SampleRole > & | get_sample_roles () const |
| Returns the per-sample role vector. | |
| vector< Index > | get_sample_roles_vector () const |
| Returns the per-sample roles as integer indices. | |
| VectorI | get_sample_role_numbers () const |
| Returns the per-sample roles as a tensor of integer indices. | |
| Index | get_variables_number () const |
| Returns the total number of variables (columns descriptors). | |
| Index | get_variables_number (const string &) const |
| Returns the number of variables with the given role name. | |
| Index | get_used_variables_number () const |
| Returns the number of variables whose role is in use. | |
| const vector< Variable > & | get_variables () const |
| Returns the variable descriptors. | |
| vector< Variable > | get_variables (const string &) const |
| Returns the variables with the given role name. | |
| Index | get_variable_index (const string &) const |
| Returns the index of the variable with the given name. | |
| Index | get_variable_index (const Index) const |
| Returns the variable index corresponding to a flat feature index. | |
| vector< Index > | get_variable_indices (const string &) const |
| Returns the indices of variables matching the given role name. | |
| vector< Index > | get_used_variables_indices () const |
| Returns the indices of all in-use variables. | |
| vector< string > | get_variable_names () const |
| Returns the names of all variables. | |
| vector< string > | get_variable_names (const string &) const |
| Returns the names of variables with the given role name. | |
| VariableType | get_variable_type (const Index index) const |
| Returns the VariableType of the variable at the given index. | |
| vector< VariableType > | get_variable_types (const vector< Index > &indices) const |
| Returns the VariableType for each of the given variable indices. | |
| Index | get_features_number () const |
| Returns the total number of features (data matrix columns). | |
| Index | get_features_number (const string &) const |
| Returns the number of features for variables with the given role name. | |
| Index | get_used_features_number () const |
| Returns the number of features for in-use variables. | |
| vector< string > | get_feature_names () const |
| Returns the expanded feature names (one entry per data matrix column). | |
| vector< string > | get_feature_names (const string &) const |
| Returns the expanded feature names restricted to the given role. | |
| vector< vector< Index > > | get_feature_indices () const |
| Returns the data column indices grouped per variable. | |
| vector< Index > | get_feature_indices (const Index) const |
| Returns the data column indices that belong to the given variable index. | |
| vector< Index > | get_feature_indices (const string &) const |
| Returns the data column indices that belong to variables with the given role name. | |
| vector< Index > | get_used_feature_indices () const |
| Returns the data column indices that belong to in-use variables. | |
| vector< Index > | get_feature_dimensions () const |
| Returns the per-variable feature dimension counts. | |
| Shape | get_shape (const string &) const |
| Returns the configured Shape for the given role ("Input", "Target", "Decoder"). | |
| virtual void | get_batches (const vector< Index > &, Index, bool, vector< vector< Index > > &) const |
Splits sample indices into batches and writes them into batches. | |
| const vector< vector< string > > & | get_data_file_preview () const |
| Returns the parsed preview rows captured during the last file read. | |
| const filesystem::path & | get_data_path () const |
| Returns the configured data file path. | |
| const Separator & | get_separator () const |
| Returns the current field Separator. | |
| string | get_separator_string () const |
| Returns the separator as the literal character used in files. | |
| string | get_separator_name () const |
| Returns the separator as its enumerator name ("Space", "Tab", ...). | |
| const Codification & | get_codification () const |
| Returns the configured text Codification. | |
| string | get_codification_string () const |
| Returns the codification as its enumerator name. | |
| bool | get_display () const |
| Returns whether progress messages are printed. | |
| virtual bool | is_empty () const |
| Returns true when the dataset contains no samples. | |
| Shape | get_input_shape () const |
| Returns the configured input tensor shape. | |
| Shape | get_target_shape () const |
| Returns the configured target tensor shape. | |
| const MatrixR & | get_data () const |
| Returns the raw data matrix (rows = samples, columns = features). | |
| void | set_data (const MatrixR &) |
| Replaces the underlying data matrix. | |
| void | set_data_constant (const float) |
| Fills the data matrix with the given constant value. | |
| void | set_default () |
| Restores default dataset settings. | |
| void | set_sample_roles (const string &) |
| Assigns the same role to every sample. | |
| void | set_sample_role (const Index, const string &) |
| Sets the role of a single sample by index. | |
| void | set_sample_roles (const vector< string > &) |
| Assigns a role per sample from a string vector. | |
| void | set_sample_roles (const vector< Index > &, const string &) |
| Assigns the same role to the given sample indices. | |
| void | set_variables (const vector< Variable > &new_variables) |
| void | set_default_variable_names () |
| Assigns default placeholder names to all variables. | |
| void | set_variable_roles (const vector< string > &) |
| Sets the role of every variable from the given string list. | |
| void | set_variable_indices (const vector< Index > &, const vector< Index > &) |
| Sets which variable indices are inputs and which are targets. | |
| void | set_input_variables_unused () |
| Marks all input variables as unused (role None). | |
| void | set_variable_role (const Index, const string &) |
| Sets the role of the variable at the given index. | |
| void | set_variable_role (const string &, const string &) |
| Sets the role of the variable with the given name. | |
| void | set_variable_type (const Index, const VariableType &) |
| Sets the type of the variable at the given index. | |
| void | set_variable_type (const string &, const VariableType &) |
| Sets the type of the variable with the given name. | |
| void | set_variable_types (const VariableType &) |
| Sets every variable to the given type. | |
| void | set_binary_variables () |
| Detects and marks variables with binary values as VariableType::Binary. | |
| void | set_variable_names (const vector< string > &) |
| Assigns names to all variables from the given list. | |
| void | set_variables_number (const Index new_size) |
| void | set_feature_names (const vector< string > &) |
| Assigns expanded feature names, propagating categories back to variables. | |
| void | set_variable_roles (const string &) |
| Assigns the same role to every variable. | |
| void | set_shape (const string &, const Shape &) |
| Sets the tensor Shape associated with the given role ("Input"/"Target"/"Decoder"). | |
| virtual void | resize_input_shape (Index input_features_count) |
| Resizes the input shape to the given flat feature count. | |
| void | set_data_path (const filesystem::path &new_data_path) |
| void | set_has_header (bool new_has_header) |
| void | set_has_ids (bool new_has_ids) |
| void | set_separator (const Separator &new_separator) |
| void | set_separator_string (const string &) |
| Sets the separator from its literal character. | |
| void | set_separator_name (const string &) |
| Sets the separator from its enumerator name. | |
| void | set_codification (const Codification &new_codification) |
| void | set_codification (const string &) |
| Sets the codification from its enumerator name. | |
| void | set_display (bool new_display) |
| bool | is_sample_used (const Index i) const |
Returns true if the sample at i has a role other than None. | |
| bool | has_binary_variables () const |
| Returns true if any variable has type Binary. | |
| bool | has_categorical_variables () const |
| Returns true if any variable has type Categorical. | |
| bool | has_binary_or_categorical_variables () const |
| Returns true if any variable is Binary or Categorical. | |
| bool | has_time_variable () const |
| Returns true if any variable has role Time. | |
| bool | has_validation () const |
| Returns true if any sample is assigned the Validation role. | |
| void | split_samples (const float training_ratio=0.6f, float selection_ratio=0.2f, float testing_ratio=0.2f, bool shuffle=true) |
| Splits samples into Training/Validation/Testing roles, optionally shuffled. | |
| void | split_samples_sequential (const float training_ratio=0.6f, float selection_ratio=0.2f, float testing_ratio=0.2f) |
| Splits samples sequentially without shuffling. | |
| void | split_samples_random (const float training_ratio=0.6f, float selection_ratio=0.2f, float testing_ratio=0.2f) |
| Splits samples randomly across roles. | |
| vector< vector< Index > > | split_samples (const vector< Index > &, Index) const |
| Splits the given indices into chunks of the requested size. | |
| virtual vector< Descriptives > | scale_features (const string &) |
| Scales the features with the configured method; returns the applied descriptives. | |
| virtual void | set_data_random () |
| Fills the data matrix with random values (no-op in base class). | |
| virtual void | set_data_integer (const Index) |
| Fills the data matrix with random integers up to the given vocabulary size. | |
| virtual void | from_JSON (const JsonDocument &)=0 |
| Loads dataset state from a JSON document. | |
| virtual void | to_JSON (JsonWriter &) const |
| Writes dataset state to a JSON writer. | |
| MatrixR | get_data (const string &, const string &) const |
| Returns the data submatrix for the given sample role and feature role. | |
| MatrixR | get_data_from_indices (const vector< Index > &, const vector< Index > &) const |
| Returns a submatrix from the data using explicit row and column indices. | |
| VectorR | get_sample_data (const Index) const |
| Returns the row of the data matrix at the given sample index. | |
| MatrixR | get_variable_data (const Index) const |
Returns the data columns belonging to the variable at index. | |
| MatrixR | get_variable_data (const Index, const vector< Index > &) const |
| Returns the variable data restricted to the given sample indices. | |
| MatrixR | get_variable_data (const string &) const |
| Returns the data columns of the variable with the given name. | |
| MatrixR | get_feature_data (const string &) const |
| Returns the data columns belonging to features with the given role name. | |
| void | set (const Index=0, const Shape &={}, const Shape &={}) |
| Resets the dataset with the given sample count and input/target shapes. | |
| bool | has_nan () const |
| Returns true if any entry in the data matrix is NaN. | |
| bool | has_nan_row (const Index) const |
| Returns true if the sample at the given row contains a NaN. | |
| VectorI | count_nans_per_variable () const |
| Returns the NaN count for each variable. | |
| Index | count_variables_with_nan () const |
| Returns the number of variables that contain at least one NaN. | |
| Index | count_rows_with_nan () const |
| Returns the number of rows that contain at least one NaN. | |
| Index | count_nan () const |
| Returns the total NaN count in the data matrix. | |
| virtual void | scrub_missing_values () |
| Removes or imputes missing values using the configured strategy. | |
| virtual vector< Descriptives > | calculate_feature_descriptives (const string &) const |
| Returns descriptive statistics for features with the given role (subclass-specific). | |
| virtual Tensor< Correlation, 2 > | calculate_input_target_variable_pearson_correlations () const |
| Returns the Pearson correlations between input and target variables. | |
| virtual VectorI | calculate_target_distribution () const |
| Returns the distribution of target values. | |
| virtual VectorI | calculate_correlations_rank () const |
| Returns input variables ranked by absolute correlation with the target. | |
| virtual void | unscale_features (const string &, const vector< Descriptives > &) |
| Reverts a previously applied scaling using the supplied descriptives. | |
| void | save (const filesystem::path &) const |
| Saves the dataset metadata to a JSON file at the given path. | |
| void | load (const filesystem::path &) |
| Loads the dataset metadata from the JSON file at the given path. | |
| void | save_data () const |
| Saves the data matrix to the configured data path. | |
| void | save_data_binary (const filesystem::path &) const |
| Saves the data matrix to the given path in binary form. | |
| void | load_data_binary () |
| Loads the data matrix from the binary file at the configured data path. | |
| virtual void | fill_inputs (const vector< Index > &, const vector< Index > &, float *, bool is_training, bool parallelize=true, int contiguous=-1) const |
| Copies input features of the given samples into a destination buffer. | |
| virtual void | augment_inputs (float *, Index) const |
| Applies data augmentation in place to the input buffer (no-op in base class). | |
| virtual void | fill_decoder (const vector< Index > &, const vector< Index > &, float *, bool is_training, bool parallelize=true, int contiguous=-1) const |
| Copies decoder input features into a destination buffer (sequence models). | |
| virtual void | fill_targets (const vector< Index > &, const vector< Index > &, float *, bool is_training, bool parallelize=true, int contiguous=-1) const |
| Copies target features of the given samples into a destination buffer. | |
Protected Member Functions | |
| Dataset ()=default | |
| void | set_default_variable_roles () |
| void | set_default_variable_roles_forecasting () |
| void | infer_variable_types_from_data () |
| void | read_data_file_preview (const vector< vector< string_view > > &) |
| void | check_separators (string_view) const |
| void | samples_from_JSON (const Json *) |
| void | variables_to_JSON (JsonWriter &) const |
| void | samples_to_JSON (JsonWriter &) const |
| void | preview_data_to_JSON (JsonWriter &) const |
| void | variables_from_JSON (const Json *) |
| void | preview_data_from_JSON (const Json *) |
Protected Attributes | |
| Shape | input_shape |
| Shape | target_shape |
| Shape | decoder_shape |
| vector< SampleRole > | sample_roles |
| vector< string > | sample_ids |
| vector< Variable > | variables |
| MatrixR | data |
| filesystem::path | data_path |
| Separator | separator = Separator::Comma |
| bool | has_header = false |
| bool | has_sample_ids = false |
| Codification | codification = Codification::UTF8 |
| vector< vector< string > > | data_file_preview |
| bool | display = true |
| const vector< string > | positive_words = {"1", "yes", "positive", "+", "true", "good", "si", "sí", "Sí"} |
| const vector< string > | negative_words = {"0", "no", "negative", "-", "false", "bad", "not", "No"} |
Abstract base class for OpenNN datasets, owning samples, variables, and metadata.
|
strong |
|
strong |
|
virtualdefault |
|
protecteddefault |
|
inlinevirtual |
Applies data augmentation in place to the input buffer (no-op in base class).
Reimplemented in opennn::ImageDataset.
|
inlinenodiscardvirtual |
Returns input variables ranked by absolute correlation with the target.
Reimplemented in opennn::TabularDataset.
|
inlinenodiscardvirtual |
Returns descriptive statistics for features with the given role (subclass-specific).
Reimplemented in opennn::TabularDataset.
|
inlinenodiscardvirtual |
Returns the Pearson correlations between input and target variables.
Reimplemented in opennn::TabularDataset.
|
inlinenodiscardvirtual |
Returns the distribution of target values.
Reimplemented in opennn::LanguageDataset, and opennn::TabularDataset.
|
protected |
|
nodiscard |
Returns the total NaN count in the data matrix.
|
nodiscard |
Returns the NaN count for each variable.
|
nodiscard |
Returns the number of rows that contain at least one NaN.
|
nodiscard |
Returns the number of variables that contain at least one NaN.
|
virtual |
Copies decoder input features into a destination buffer (sequence models).
Reimplemented in opennn::LanguageDataset.
|
virtual |
Copies input features of the given samples into a destination buffer.
| sample_indices | Row indices to read. |
| variable_indices | Variable indices selecting which features to copy. |
| destination | Output pointer that receives the flattened tensor. |
| is_training | True when the call is part of a training batch. |
| parallelize | If true, parallelize the copy. |
| contiguous | Stride hint for contiguous copies (-1 = auto). |
Reimplemented in opennn::ImageDataset, opennn::LanguageDataset, and opennn::TimeSeriesDataset.
|
virtual |
Copies target features of the given samples into a destination buffer.
Reimplemented in opennn::ImageDataset, opennn::LanguageDataset, and opennn::TimeSeriesDataset.
|
pure virtual |
Loads dataset state from a JSON document.
Implemented in opennn::ImageDataset, opennn::LanguageDataset, opennn::TabularDataset, and opennn::TimeSeriesDataset.
|
virtual |
Splits sample indices into batches and writes them into batches.
| indices | Sample indices to draw from. |
| batch_size | Number of samples per batch. |
| shuffle | Whether to shuffle indices before batching. |
| batches | Output vector receiving the batched index lists. |
|
inlinenodiscard |
Returns the configured text Codification.
|
nodiscard |
Returns the codification as its enumerator name.
|
inlinenodiscard |
Returns the raw data matrix (rows = samples, columns = features).
|
nodiscard |
Returns the data submatrix for the given sample role and feature role.
|
inlinenodiscard |
Returns the parsed preview rows captured during the last file read.
|
nodiscard |
Returns a submatrix from the data using explicit row and column indices.
|
inlinenodiscard |
Returns the configured data file path.
|
inlinenodiscard |
Returns whether progress messages are printed.
|
nodiscard |
Returns the data columns belonging to features with the given role name.
|
nodiscard |
Returns the per-variable feature dimension counts.
|
nodiscard |
Returns the data column indices grouped per variable.
|
nodiscard |
Returns the data column indices that belong to the given variable index.
|
nodiscard |
Returns the data column indices that belong to variables with the given role name.
|
nodiscard |
Returns the expanded feature names (one entry per data matrix column).
|
nodiscard |
Returns the expanded feature names restricted to the given role.
|
nodiscard |
Returns the total number of features (data matrix columns).
|
nodiscard |
Returns the number of features for variables with the given role name.
|
inlinenodiscard |
Returns the configured input tensor shape.
|
nodiscard |
Returns the row of the data matrix at the given sample index.
|
nodiscard |
Returns indices of samples with the given role name.
|
nodiscard |
Returns the per-sample roles as a tensor of integer indices.
|
inlinenodiscard |
Returns the per-sample role vector.
|
nodiscard |
Returns the per-sample roles as integer indices.
|
inlinenodiscardvirtual |
Returns the total number of samples (rows) in the data matrix.
Reimplemented in opennn::ImageDataset, and opennn::LanguageDataset.
|
nodiscard |
Returns the number of samples with the given role ("Training", "Validation", ...).
|
inlinenodiscard |
Returns the current field Separator.
|
nodiscard |
Returns the separator as its enumerator name ("Space", "Tab", ...).
|
nodiscard |
Returns the separator as the literal character used in files.
|
nodiscard |
Returns the configured Shape for the given role ("Input", "Target", "Decoder").
|
inlinenodiscard |
Returns the configured target tensor shape.
|
nodiscard |
Returns the data column indices that belong to in-use variables.
|
nodiscard |
Returns the number of features for in-use variables.
|
nodiscard |
Returns indices of all samples whose role is not None.
|
nodiscard |
Returns the number of samples whose role is not None.
|
nodiscard |
Returns the indices of all in-use variables.
|
nodiscard |
Returns the number of variables whose role is in use.
|
nodiscard |
Returns the data columns belonging to the variable at index.
|
nodiscard |
Returns the variable data restricted to the given sample indices.
|
nodiscard |
Returns the data columns of the variable with the given name.
|
nodiscard |
Returns the variable index corresponding to a flat feature index.
|
nodiscard |
Returns the index of the variable with the given name.
|
nodiscard |
Returns the indices of variables matching the given role name.
|
nodiscard |
Returns the names of all variables.
|
nodiscard |
Returns the names of variables with the given role name.
|
inlinenodiscard |
Returns the VariableType of the variable at the given index.
|
nodiscard |
Returns the VariableType for each of the given variable indices.
|
inlinenodiscard |
Returns the variable descriptors.
|
nodiscard |
Returns the variables with the given role name.
|
inlinenodiscard |
Returns the total number of variables (columns descriptors).
|
nodiscard |
Returns the number of variables with the given role name.
|
nodiscard |
Returns true if any variable is Binary or Categorical.
|
nodiscard |
Returns true if any variable has type Binary.
|
nodiscard |
Returns true if any variable has type Categorical.
|
nodiscard |
Returns true if any entry in the data matrix is NaN.
|
nodiscard |
Returns true if the sample at the given row contains a NaN.
|
nodiscard |
Returns true if any variable has role Time.
|
nodiscard |
Returns true if any sample is assigned the Validation role.
|
protected |
|
inlinenodiscardvirtual |
Returns true when the dataset contains no samples.
|
inlinenodiscard |
Returns true if the sample at i has a role other than None.
| void opennn::Dataset::load | ( | const filesystem::path & | ) |
Loads the dataset metadata from the JSON file at the given path.
| void opennn::Dataset::load_data_binary | ( | ) |
Loads the data matrix from the binary file at the configured data path.
|
protected |
|
protected |
|
protected |
|
inlinevirtual |
Resizes the input shape to the given flat feature count.
Reimplemented in opennn::TimeSeriesDataset.
|
protected |
|
protected |
| void opennn::Dataset::save | ( | const filesystem::path & | ) | const |
Saves the dataset metadata to a JSON file at the given path.
| void opennn::Dataset::save_data | ( | ) | const |
Saves the data matrix to the configured data path.
| void opennn::Dataset::save_data_binary | ( | const filesystem::path & | ) | const |
Saves the data matrix to the given path in binary form.
|
inlinevirtual |
Scales the features with the configured method; returns the applied descriptives.
Reimplemented in opennn::ImageDataset, and opennn::TabularDataset.
|
inlinevirtual |
Removes or imputes missing values using the configured strategy.
Reimplemented in opennn::TabularDataset.
| void opennn::Dataset::set_binary_variables | ( | ) |
Detects and marks variables with binary values as VariableType::Binary.
|
inline |
| void opennn::Dataset::set_codification | ( | const string & | ) |
Sets the codification from its enumerator name.
| void opennn::Dataset::set_data | ( | const MatrixR & | ) |
Replaces the underlying data matrix.
| void opennn::Dataset::set_data_constant | ( | const float | ) |
Fills the data matrix with the given constant value.
|
inlinevirtual |
Fills the data matrix with random integers up to the given vocabulary size.
Reimplemented in opennn::TabularDataset.
|
inline |
|
inlinevirtual |
Fills the data matrix with random values (no-op in base class).
Reimplemented in opennn::ImageDataset, and opennn::TabularDataset.
| void opennn::Dataset::set_default | ( | ) |
Restores default dataset settings.
| void opennn::Dataset::set_default_variable_names | ( | ) |
Assigns default placeholder names to all variables.
|
protected |
|
protected |
|
inline |
| void opennn::Dataset::set_feature_names | ( | const vector< string > & | ) |
Assigns expanded feature names, propagating categories back to variables.
|
inline |
|
inline |
| void opennn::Dataset::set_input_variables_unused | ( | ) |
Marks all input variables as unused (role None).
| void opennn::Dataset::set_sample_role | ( | const Index | , |
| const string & | ) |
Sets the role of a single sample by index.
| void opennn::Dataset::set_sample_roles | ( | const string & | ) |
Assigns the same role to every sample.
| void opennn::Dataset::set_sample_roles | ( | const vector< Index > & | , |
| const string & | ) |
Assigns the same role to the given sample indices.
| void opennn::Dataset::set_sample_roles | ( | const vector< string > & | ) |
Assigns a role per sample from a string vector.
|
inline |
| void opennn::Dataset::set_separator_name | ( | const string & | ) |
Sets the separator from its enumerator name.
| void opennn::Dataset::set_separator_string | ( | const string & | ) |
Sets the separator from its literal character.
| void opennn::Dataset::set_shape | ( | const string & | , |
| const Shape & | ) |
Sets the tensor Shape associated with the given role ("Input"/"Target"/"Decoder").
| void opennn::Dataset::set_variable_indices | ( | const vector< Index > & | , |
| const vector< Index > & | ) |
Sets which variable indices are inputs and which are targets.
| input_indices | Indices for variables to mark as Input. |
| target_indices | Indices for variables to mark as Target. |
| void opennn::Dataset::set_variable_names | ( | const vector< string > & | ) |
Assigns names to all variables from the given list.
| void opennn::Dataset::set_variable_role | ( | const Index | , |
| const string & | ) |
Sets the role of the variable at the given index.
| void opennn::Dataset::set_variable_role | ( | const string & | , |
| const string & | ) |
Sets the role of the variable with the given name.
| void opennn::Dataset::set_variable_roles | ( | const string & | ) |
Assigns the same role to every variable.
| void opennn::Dataset::set_variable_roles | ( | const vector< string > & | ) |
Sets the role of every variable from the given string list.
| void opennn::Dataset::set_variable_type | ( | const Index | , |
| const VariableType & | ) |
Sets the type of the variable at the given index.
| void opennn::Dataset::set_variable_type | ( | const string & | , |
| const VariableType & | ) |
Sets the type of the variable with the given name.
| void opennn::Dataset::set_variable_types | ( | const VariableType & | ) |
Sets every variable to the given type.
|
inline |
|
inline |
| void opennn::Dataset::split_samples | ( | const float | training_ratio = 0.6f, |
| float | selection_ratio = 0.2f, | ||
| float | testing_ratio = 0.2f, | ||
| bool | shuffle = true ) |
Splits samples into Training/Validation/Testing roles, optionally shuffled.
| training_ratio | Fraction of samples assigned to Training. |
| selection_ratio | Fraction of samples assigned to Validation. |
| testing_ratio | Fraction of samples assigned to Testing. |
| shuffle | If true, shuffles samples before splitting. |
|
nodiscard |
Splits the given indices into chunks of the requested size.
| void opennn::Dataset::split_samples_random | ( | const float | training_ratio = 0.6f, |
| float | selection_ratio = 0.2f, | ||
| float | testing_ratio = 0.2f ) |
Splits samples randomly across roles.
| void opennn::Dataset::split_samples_sequential | ( | const float | training_ratio = 0.6f, |
| float | selection_ratio = 0.2f, | ||
| float | testing_ratio = 0.2f ) |
Splits samples sequentially without shuffling.
|
inlinevirtual |
Writes dataset state to a JSON writer.
Reimplemented in opennn::ImageDataset, opennn::LanguageDataset, opennn::TabularDataset, and opennn::TimeSeriesDataset.
|
inlinevirtual |
Reverts a previously applied scaling using the supplied descriptives.
Reimplemented in opennn::ImageDataset, and opennn::TabularDataset.
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |