35#include "statistics.h"
37#include "correlations.h"
38#include "opennn_strings.h"
39#include "tensor_utilities.h"
65 explicit DataSet(
const Tensor<type, 2>&);
67 explicit DataSet(
const Index&,
const Index&);
69 explicit DataSet(
const Index&,
const Index&,
const Index&);
71 explicit DataSet(
const string&,
const char&,
const bool&);
89 enum class ProjectType{Approximation, Classification, Forecasting, ImageClassification};
94 enum class SampleUse{Training, Selection, Testing, UnusedSample};
99 enum class VariableUse{Id, Input, Target, Time, UnusedVariable};
104 enum class ColumnType{Numeric, Binary, Categorical, DateTime, Constant};
121 const Scaler& = Scaler::MeanStandardDeviation,
122 const Tensor<string, 1>& = Tensor<string, 1>(),
123 const Tensor<VariableUse, 1>& = Tensor<VariableUse, 1>());
153 Index get_variables_number()
const;
160 void set_scaler(
const Scaler&);
161 void set_scaler(
const string&);
184 inline Index get_samples_number()
const {
return samples_uses.size();}
211 Tensor<Column, 1> get_time_series_columns()
const;
212 Index get_time_series_data_rows_number()
const;
221 Index get_input_time_series_columns_number()
const;
223 Index get_target_time_series_columns_number()
const;
232 Tensor<Index, 1> get_input_time_series_columns_indices()
const;
234 Tensor<Index, 1> get_target_time_series_columns_indices()
const;
244 ColumnType get_column_type(
const Index& index)
const {
return columns[index].type;}
278 Index get_input_variables_rank()
const;
282 Tensor<Scaler, 1> get_columns_scalers()
const;
284 Tensor<Scaler, 1> get_input_variables_scalers()
const;
285 Tensor<Scaler, 1> get_target_variables_scalers()
const;
289 Tensor<Index, 2>
get_batches(
const Tensor<Index,1>&,
const Index&,
const bool&,
const Index& buffer_size= 100)
const;
293 const Tensor<type, 2>&
get_data()
const;
294 Tensor<type, 2>* get_data_pointer();
301 Tensor<string, 1> get_time_series_columns_names()
const;
320 Tensor<type, 1>
get_sample_data(
const Index&,
const Tensor<Index, 1>&)
const;
325 Tensor<type, 2>
get_column_data(
const Index&,
const Tensor<Index, 1>&)
const;
335 Tensor<Tensor<string, 1>, 1> get_data_file_preview()
const;
337 Tensor<type, 2> get_subtensor_data(
const Tensor<Index, 1>&,
const Tensor<Index, 1>&)
const;
348 Tensor<string, 1> get_rows_label_tensor()
const;
349 Tensor<string, 1> get_selection_rows_label_tensor();
350 Tensor<string, 1> get_testing_rows_label_tensor();
361 Index get_time_series_time_column_index()
const;
363 static Tensor<string, 1> get_default_columns_names(
const Index&);
374 void set(
const Tensor<type, 2>&);
375 void set(
const Index&,
const Index&);
376 void set(
const Index&,
const Index&,
const Index&);
379 void set(
const string&);
380 void set(
const string&,
const char&,
const bool&);
384 void set_threads_number(
const int&);
409 void set_columns(
const Tensor<Column, 1>&);
420 void set_input_target_columns(
const Tensor<Index, 1>&,
const Tensor<Index, 1>&);
423 void set_input_columns(
const Tensor<Index, 1>&,
const Tensor<bool, 1>&);
428 void set_column_type(
const Index&,
const ColumnType&);
429 void set_column_type(
const string&,
const ColumnType&);
435 void set_columns_scalers(
const Scaler&);
438 void set_binary_simple_columns();
442 void check_constant_columns();
444 Tensor<type, 2> transform_binary_column(
const Tensor<type, 1>&)
const;
459 void set_data(
const Tensor<type, 2>&);
491 bool has_binary_columns()
const;
492 bool has_categorical_columns()
const;
493 bool has_time_columns()
const;
494 bool has_time_time_series_columns()
const;
496 bool has_selection()
const;
501 const type& selection_ratio =
static_cast<type
>(0.2),
502 const type& testing_ratio =
static_cast<type
>(0.2));
505 const type& selection_ratio =
static_cast<type
>(0.2),
506 const type& testing_ratio =
static_cast<type
>(0.2));
538 Tensor<Descriptives, 1> calculate_testing_target_variables_descriptives()
const;
548 Tensor<type, 1> calculate_used_targets_mean()
const;
582 Tensor<Index, 1>
filter_data(
const Tensor<type, 1>&,
const Tensor<type, 1>&);
590 Tensor<Descriptives, 1> scale_data();
597 void unscale_data(
const Tensor<Descriptives, 1>&);
616 void unuse_local_outlier_factor_outliers(
const Index& = 20,
const type& = type(1.5));
620 Tensor<Index, 1> calculate_isolation_forest_outliers(
const Index& = 100,
const Index& = 256,
const type& = type(0))
const;
622 void unuse_isolation_forest_outliers(
const Index& = 20,
const type& = type(1.5));
629 void transform_time_series_data();
631 void set_time_series_data(
const Tensor<type, 2>&);
632 void set_time_series_columns_number(
const Index&);
644 void generate_sum_data(
const Index&,
const Index&);
653 void save(
const string&)
const;
654 void load(
const string&);
656 void print_columns()
const;
657 void print_columns_types()
const;
658 void print_columns_uses()
const;
663 void print_data_file_preview()
const;
678 Tensor<type, 2>
read_input_csv(
const string&,
const char&,
const string&,
const bool&,
const bool&)
const;
682 void fill_time_series(
const Index&);
698 Tensor<Index, 1> count_nan_columns()
const;
699 Index count_rows_with_nan()
const;
700 Index count_nan()
const;
702 void set_missing_values_number(
const Index&);
703 void set_missing_values_number();
705 void set_columns_missing_values_number(
const Tensor<Index, 1>&);
706 void set_columns_missing_values_number();
708 void set_rows_missing_values_number(
const Index&);
709 void set_rows_missing_values_number();
713 void fix_repeated_names();
717 Tensor<Index, 1> push_back(
const Tensor<Index, 1>&,
const Index&)
const;
718 Tensor<string, 1> push_back(
const Tensor<string, 1>&,
const string&)
const;
720 void initialize_sequential(Tensor<Index, 1>&,
const Index&,
const Index&,
const Index&)
const;
721 void intialize_sequential(Tensor<type, 1>&,
const type&,
const type&,
const type&)
const;
723 Tensor<Index, 2> split_samples(
const Tensor<Index, 1>&,
const Index&)
const;
725 bool get_has_rows_labels()
const;
733 void read_csv_2_simple();
734 void read_csv_3_simple();
736 void read_csv_2_complete();
737 void read_csv_3_complete();
739 void check_separators(
const string&)
const;
741 void check_special_characters(
const string&)
const;
745 NonBlockingThreadPool* non_blocking_thread_pool =
nullptr;
746 ThreadPoolDevice* thread_pool_device =
nullptr;
758 Tensor<SampleUse, 1> samples_uses;
760 Tensor<string, 1> rows_labels;
764 Tensor<Column, 1> columns;
766 Tensor<Index, 1> input_variables_dimensions;
790 Tensor<Tensor<string, 1>, 1> data_file_preview;
812 Tensor<Column, 1> time_series_columns;
826 Tensor<Index, 1> columns_missing_values_number;
828 Index rows_missing_values_number;
834 const Eigen::array<IndexPair<Index>, 1> product_vector_vector = {IndexPair<Index>(0, 0)};
838 Tensor<Index, 1> select_outliers_via_standard_deviation(
const Tensor<type, 1>&,
const type & = type(2.0),
bool =
true)
const;
840 Tensor<Index, 1> select_outliers_via_contamination(
const Tensor<type, 1>&,
const type & = type(0.05),
bool =
true)
const;
842 type calculate_euclidean_distance(
const Tensor<Index, 1>&,
const Index&,
const Index&)
const;
844 Tensor<type, 2> calculate_distance_matrix(
const Tensor<Index, 1>&)
const;
846 Tensor<list<Index>, 1> calculate_k_nearest_neighbors(
const Tensor<type, 2>&,
const Index& = 20)
const;
848 Tensor<Tensor<type, 1>, 1> get_kd_tree_data()
const;
850 Tensor<Tensor<Index, 1>, 1> create_bounding_limits_kd_tree(
const Index&)
const;
852 void create_kd_tree(Tensor<Tensor<type, 1>, 1>&,
const Tensor<Tensor<Index, 1>, 1>&)
const;
854 Tensor<list<Index>, 1> calculate_bounding_boxes_neighbors(
const Tensor<Tensor<type, 1>, 1>&,
855 const Tensor<Index, 1>&,
856 const Index&,
const Index&)
const;
858 Tensor<list<Index>, 1> calculate_kd_tree_neighbors(
const Index& = 20,
const Index& = 40)
const;
860 Tensor<type, 1> calculate_average_reachability(Tensor<list<Index>, 1>&,
const Index&)
const;
862 Tensor<type, 1> calculate_local_outlier_factor(Tensor<list<Index>, 1>&,
const Tensor<type, 1>&,
const Index &)
const;
867 void calculate_min_max_indices_list(list<Index>&,
const Index&, type&, type&)
const;
869 Index split_isolation_tree(Tensor<type, 2>&, list<list<Index>>&, list<Index>&)
const;
871 Tensor<type, 2> create_isolation_tree(
const Tensor<Index, 1>&,
const Index&)
const;
873 Tensor<Tensor<type, 2>, 1> create_isolation_forest(
const Index&,
const Index&,
const Index&)
const;
875 type calculate_tree_path(
const Tensor<type, 2>&,
const Index&,
const Index&)
const;
877 Tensor<type, 1> calculate_average_forest_paths(
const Tensor<Tensor<type, 2>, 1>&,
const Index&)
const;
883 #include "../../opennn-cuda/opennn-cuda/data_set_cuda.h"
898 Index get_samples_number()
const;
900 void set(
const Index&,
DataSet*);
902 void fill(
const Tensor<Index, 1>&,
const Tensor<Index, 1>&,
const Tensor<Index, 1>&);
906 Index samples_number = 0;
908 DataSet* data_set_pointer =
nullptr;
910 Tensor<type, 2> inputs_2d;
911 Tensor<type, 4> inputs_4d;
913 Tensor<type, 2> targets_2d;
This class represents the concept of data set for data modelling problems, such as approximation,...
Index calculate_testing_negatives(const Index &) const
bool is_sample_used(const Index &) const
ProjectType
Enumeration of the learning tasks.
Tensor< type, 1 > calculate_input_variables_maximums() const
Returns a vector containing the maximums of the input variables.
void transform_time_series_columns()
This method transforms the columns into time series for forecasting problems.
Tensor< type, 2 > get_target_data() const
Index get_training_samples_number() const
Returns the number of samples in the data set which will be used for training.
Tensor< type, 1 > calculate_used_variables_minimums() const
Returns a vector containing the maximum of the used variables.
Tensor< type, 2 > calculate_autocorrelations(const Index &=10) const
VariableUse get_column_use(const Index &) const
Returns a vector containing the use of the column, without taking into account the categories.
Index get_target_variables_number() const
Returns the number of target variables of the data set.
Tensor< Descriptives, 1 > calculate_columns_descriptives_training_samples() const
void set_sample_use(const Index &, const SampleUse &)
void set_training()
Sets all the samples in the data set for training.
void impute_missing_values_unuse()
Sets all the samples with missing values to "Unused".
Tensor< Descriptives, 1 > calculate_columns_descriptives_positive_samples() const
Calculate the descriptives of the samples with positive targets in binary classification problems.
void save_data_binary(const string &) const
Saves to the data file the values of the data matrix in binary format.
Tensor< string, 1 > get_target_columns_names() const
Returns a string vector which contains the names of the columns whose uses are Target.
Tensor< string, 1 > get_columns_names() const
Returns a string vector that contains the names of the columns.
Tensor< Descriptives, 1 > calculate_input_variables_descriptives() const
void set_variable_name(const Index &, const string &)
Tensor< Descriptives, 1 > scale_target_variables()
Tensor< Index, 1 > calculate_local_outlier_factor_outliers(const Index &=20, const Index &=0, const type &=type(0)) const
void set_input_variables_dimensions(const Tensor< Index, 1 > &)
Sets new input dimensions in the data set.
Tensor< Index, 1 > get_training_samples_indices() const
Returns the indices of the samples which will be used for training.
void set_input()
Sets all the variables in the data set as input variables.
bool has_columns_names
Header which contains variables name.
void generate_constant_data(const Index &, const Index &, const type &)
void generate_Rosenbrock_data(const Index &, const Index &)
Tensor< string, 1 > get_time_series_variables_names() const
void print_top_input_target_columns_correlations() const
void set_has_columns_names(const bool &)
Sets if the data file contains a header with the names of the columns.
Tensor< Index, 1 > get_testing_samples_indices() const
Returns the indices of the samples which will be used for testing.
void set_columns_number(const Index &)
Tensor< Index, 1 > get_selection_samples_indices() const
Returns the indices of the samples which will be used for selection.
Tensor< type, 1 > calculate_selection_targets_mean() const
Returns the mean values of the target variables on the selection.
const bool & get_display() const
void set_selection()
Sets all the samples in the data set for selection.
Tensor< Histogram, 1 > calculate_columns_distribution(const Index &=10) const
const Tensor< type, 2 > & get_data() const
Index get_variables_number() const
Returns the number of variables in the data set.
void set_data_constant(const type &)
Tensor< type, 2 > get_sample_target_data(const Index &) const
void set_samples_unused()
Sets all the samples in the data set for unused.
Tensor< type, 3 > calculate_cross_correlations(const Index &=10) const
Calculates the cross-correlation between all the variables in the data set.
Tensor< Index, 1 > get_target_columns_indices() const
Returns a indices vector with the positions of the targets.
void split_samples_sequential(const type &training_ratio=static_cast< type >(0.6), const type &selection_ratio=static_cast< type >(0.2), const type &testing_ratio=static_cast< type >(0.2))
void unscale_input_variables(const Tensor< Descriptives, 1 > &)
Index lags_number
Number of lags.
Tensor< Descriptives, 1 > calculate_target_variables_descriptives() const
void set_target()
Sets all the variables in the data set as target variables.
void impute_missing_values_mean()
Substitutes all the missing values by the mean of the corresponding variable.
string data_file_name
Data file name.
Tensor< Descriptives, 1 > calculate_variables_descriptives() const
void check_input_csv(const string &, const char &) const
This method checks if the input data file has the correct format. Returns an error message.
char get_separator_char() const
Returns the string which will be used as separator in the data file.
const bool & get_rows_label() const
Returns true if the data file has rows label, and false otherwise.
Tensor< Tensor< Index, 1 >, 1 > calculate_Tukey_outliers(const type &=type(1.5)) const
void load(const string &)
void load_data_binary()
This method loads the data from a binary data file.
void set_samples_uses(const Tensor< SampleUse, 1 > &)
string missing_values_label
Missing values label.
const Index & get_lags_number() const
Returns the number of lags to be used in a time series prediction application.
void set_variables_unused()
Sets all the variables in the data set as unused variables.
void load_time_series_data_binary(const string &)
This method loads time series data from a binary data.
Index calculate_training_negatives(const Index &) const
Tensor< Index, 1 > get_used_samples_indices() const
Returns the indices of the used samples(those which are not set unused).
void set_missing_values_method(const MissingValuesMethod &)
Tensor< type, 1 > get_sample_data(const Index &) const
Tensor< type, 2 > get_sample_input_data(const Index &) const
bool display
Display messages to screen.
void impute_missing_values_median()
Substitutes all the missing values by the median of the corresponding variable.
void set_separator(const Separator &)
Tensor< type, 1 > calculate_input_variables_minimums() const
Returns a vector containing the minimums of the input variables.
string get_sample_string(const Index &, const string &=",") const
Index get_time_series_columns_number() const
Returns the number of columns in the time series.
Index get_unused_samples_number() const
virtual ~DataSet()
Destructor.
void print_inputs_correlations() const
Print on screen the correlation between variables in the data set.
Index get_unused_variables_number() const
Returns the number of variables which will neither be used as input nor as target.
Index steps_ahead
Number of steps ahead.
bool is_empty() const
Returns true if the data matrix is empty, and false otherwise.
void set_lags_number(const Index &)
Index get_time_columns_number() const
Returns the number of columns whose uses are Time.
const Tensor< Index, 1 > & get_input_variables_dimensions() const
Returns the dimensions of the input variables.
Tensor< Index, 1 > calculate_target_distribution() const
void set_samples_number(const Index &)
Tensor< Index, 1 > get_input_columns_indices() const
Returns a indices vector with the positions of the inputs.
SampleUse get_sample_use(const Index &) const
Tensor< VariableUse, 1 > get_columns_uses() const
Returns the uses of each columns of the data set.
Tensor< string, 1 > get_used_columns_names() const
Returns a string vector which contains the names of the columns used whether Input,...
void set()
Sets zero samples and zero variables in the data set.
void set_has_rows_label(const bool &)
Sets if the data file contains rows label.
Tensor< Index, 1 > get_target_variables_indices() const
Returns the indices of the target variables.
Tensor< string, 1 > unuse_uncorrelated_columns(const type &=type(0.25))
void set_variables_names(const Tensor< string, 1 > &)
void split_samples_random(const type &training_ratio=static_cast< type >(0.6), const type &selection_ratio=static_cast< type >(0.2), const type &testing_ratio=static_cast< type >(0.2))
MissingValuesMethod
Enumeration of available methods for missing values in the data.
Tensor< Index, 1 > get_samples_uses_numbers() const
Tensor< Index, 1 > unuse_repeated_samples()
void set_testing()
Sets all the samples in the data set for testing.
void print_top_inputs_correlations() const
const string & get_missing_values_label() const
Returns the string which will be used as label for the missing values in the data file.
string get_variable_name(const Index &) const
void print_data() const
Prints to the screen the values of the data matrix.
const Separator & get_separator() const
Returns the separator to be used in the data file.
Tensor< type, 1 > calculate_target_variables_minimums() const
Returns a vector containing the minimums of the target variables.
void set_missing_values_label(const string &)
Index get_selection_samples_number() const
Returns the number of samples in the data set which will be used for selection.
const Tensor< SampleUse, 1 > & get_samples_uses() const
Returns the use of every sample (training, selection, testing or unused) in a vector.
Index get_variable_index(const string &name) const
Tensor< type, 2 > get_selection_target_data() const
void save_data() const
Saves to the data file the values of the data matrix.
void save(const string &) const
Tensor< type, 2 > get_testing_data() const
Tensor< Descriptives, 1 > calculate_used_variables_descriptives() const
MissingValuesMethod missing_values_method
Missing values method.
Tensor< type, 2 > get_training_data() const
Tensor< bool, 1 > get_input_columns_binary() const
Returns the input columns of the data set.
Tensor< type, 2 > get_testing_input_data() const
void transform_time_series()
Arranges an input-target DataSet from a time series matrix, according to the number of lags.
VariableUse get_variable_use(const Index &) const
Tensor< type, 1 > calculate_variables_means(const Tensor< Index, 1 > &) const
Tensor< Descriptives, 1 > calculate_columns_descriptives_selection_samples() const
string get_separator_string() const
Returns the string which will be used as separator in the data file.
Separator
Enumeration of available separators for the data file.
Index get_input_variables_number() const
Tensor< Index, 1 > get_unused_columns_indices() const
Returns a indices vector with the positions of the unused columns.
Tensor< type, 2 > read_input_csv(const string &, const char &, const string &, const bool &, const bool &) const
This method loads data from a file and returns a matrix containing the input columns.
Index get_target_columns_number() const
Returns the number of columns whose uses are Target.
Tensor< type, 2 > time_series_data
Tensor< Correlation, 2 > calculate_input_columns_correlations() const
void generate_random_data(const Index &, const Index &)
void scrub_missing_values()
Index get_testing_samples_number() const
Returns the number of samples in the data set which will be used for testing.
const string & get_time_column() const
Returns the indices of the time variables in the data set.
Index calculate_selection_negatives(const Index &) const
Index get_used_columns_number() const
Returns the number of columns that are used.
bool has_nan_row(const Index &) const
Returns true if the given row contains missing values.
Index get_time_series_variables_number() const
Returns the number of variables in the time series data.
void print_missing_values_information() const
void set_default_columns_names()
Index calculate_used_negatives(const Index &) const
Index get_used_samples_number() const
void save_time_series_data_binary(const string &) const
Saves to the data file the values of the time series data matrix in binary format.
string time_column
Index where time variable is located for forecasting applications.
Tensor< Index, 1 > get_used_columns_indices() const
Returns a indices vector with the positions of the used columns.
Tensor< string, 1 > get_variables_names() const
Tensor< type, 2 > get_selection_input_data() const
Tensor< type, 2 > get_time_series_column_data(const Index &) const
Index get_unused_columns_number() const
Returns the number of columns that are not used.
const Index & get_steps_ahead() const
Returns the number of steps ahead to be used in a time series prediction application.
bool has_rows_labels
Header which contains the rows label.
Tensor< Correlation, 2 > calculate_input_target_columns_correlations() const
void set_default_columns_scalers()
void unscale_target_variables(const Tensor< Descriptives, 1 > &)
Tensor< VariableUse, 1 > get_variables_uses() const
static Scaler get_scaling_unscaling_method(const string &)
void set_data_binary_random()
void set_time_column(const string &)
Tensor< Index, 1 > get_variable_indices(const Index &) const
Tensor< type, 1 > get_variable_data(const Index &) const
Tensor< type, 2 > get_selection_data() const
Tensor< type, 2 > get_column_data(const Index &) const
Tensor< string, 1 > get_input_columns_names() const
Returns a string vector that contains the names of the columns whose uses are Input.
void set_data_file_name(const string &)
void print() const
Prints to the screen in text format the main numbers from the data set object.
const bool & get_header_line() const
Returns true if the first line of the data file has a header with the names of the variables,...
Index missing_values_number
Missing values.
Tensor< type, 2 > get_input_data() const
void set_column_name(const Index &, const string &)
void set_display(const bool &)
Tensor< string, 1 > get_target_variables_names() const
Tensor< Index, 1 > get_unused_samples_indices() const
Returns the indices of the samples set unused.
void set_input_columns_unused()
Sets all input columns in the data_set as unused columns.
void print_data_preview() const
Tensor< type, 2 > get_training_input_data() const
Tensor< Index, 1 > get_input_variables_indices() const
Returns the indices of the input variables.
Tensor< string, 1 > get_input_variables_names() const
void set_columns_unused()
Sets all columns in the data_set as unused columns.
Tensor< Index, 1 > filter_data(const Tensor< type, 1 > &, const Tensor< type, 1 > &)
void write_XML(tinyxml2::XMLPrinter &) const
Serializes the data set object into a XML document of the TinyXML library without keep the DOM tree i...
Tensor< Descriptives, 1 > calculate_columns_descriptives_negative_samples() const
Calculate the descriptives of the samples with neagtive targets in binary classification problems.
Tensor< type, 1 > get_samples_uses_percentages() const
void set_data(const Tensor< type, 2 > &)
Tensor< BoxPlot, 1 > calculate_columns_box_plots() const
void generate_sequential_data(const Index &, const Index &)
Tensor< Index, 2 > get_batches(const Tensor< Index, 1 > &, const Index &, const bool &, const Index &buffer_size=100) const
Separator separator
Separator character.
Index get_column_index(const string &) const
void set_columns_names(const Tensor< string, 1 > &)
Tensor< Column, 1 > get_input_columns() const
Returns the input columns of the data set.
Tensor< string, 1 > unuse_constant_columns()
bool is_sample_unused(const Index &) const
void set_column_use(const Index &, const VariableUse &)
MissingValuesMethod get_missing_values_method() const
Returns a string with the method used.
Tensor< Descriptives, 1 > calculate_columns_descriptives_categories(const Index &) const
Index get_columns_number() const
Returns the number of columns in the data set.
Tensor< Index, 1 > get_unused_variables_indices() const
Returns the indices of the unused variables.
bool has_nan() const
Returns true if the data contain missing values.
const string & get_data_file_name() const
Returns the name of the data file.
void set_steps_ahead_number(const Index &)
Tensor< Column, 1 > get_used_columns() const
Returns the used columns of the data set.
void set_default_columns_uses()
const Tensor< type, 2 > & get_time_series_data() const
void print_input_target_columns_correlations() const
Print on screen the correlation between targets and inputs.
Tensor< type, 2 > get_testing_target_data() const
Tensor< Descriptives, 1 > scale_input_variables()
Tensor< Index, 1 > get_used_variables_indices() const
Returns the indices of the used variables.
void unuse_Tukey_outliers(const type &=type(1.5))
Tensor< Column, 1 > get_columns() const
Returns the columns of the data set.
Tensor< type, 1 > calculate_target_variables_maximums() const
Returns a vector containing the maximums of the target variables.
Index get_input_columns_number() const
Returns the number of columns whose uses are Input.
Tensor< type, 2 > get_training_target_data() const
Index get_used_variables_number() const
Returns the number of variables which are either input nor target.
Tensor< Column, 1 > get_target_columns() const
Returns the target columns of the data set.
void set_columns_uses(const Tensor< string, 1 > &)
Extensions to the C++ standard library.
This structure represents the columns of the DataSet.
Index get_categories_number() const
Returns the number of categories.
virtual ~Column()
Destructor.
void set_categories_uses(const Tensor< string, 1 > &)
Tensor< string, 1 > categories
Categories within the column.
ColumnType type
Column type.
void add_category(const string &)
VariableUse column_use
Column use.
Tensor< VariableUse, 1 > categories_uses
Categories use.
void set_use(const VariableUse &)
void set_type(const string &)
Tensor< string, 1 > get_used_variables_names() const
Returns a string vector that contains the names of the used variables in the data set.
Index get_used_categories_number() const
Returns the number of used categories.
Column()
Default constructor.
DataSetBatch()
Default constructor.
virtual ~DataSetBatch()
Destructor.