OpenNN
Open-source neural networks library
Loading...
Searching...
No Matches
optimizer.h
Go to the documentation of this file.
1// OpenNN: Open Neural Networks Library
2// www.opennn.net
3//
4// O P T I M I Z A T I O N A L G O R I T H M C L A S S H E A D E R
5//
6// Artificial Intelligence Techniques SL
7// artelnics@artelnics.com
8
9#pragma once
10
11#include <functional>
12#include "json.h"
13#include "tensor_utilities.h"
14#include "thread_safe_queue.h"
15
16namespace opennn
17{
18
19inline constexpr float GRADIENT_NORM_EPS = 1e-6f;
20
21class Loss;
22struct Batch;
23struct Buffer;
25struct BackPropagation;
26
27struct TrainingResults;
28
31{
32
33public:
34
37 {
38 float error = 0.0f;
39 float accuracy = 0.0f;
40 };
41
43 Optimizer(Loss* = nullptr);
44 virtual ~Optimizer() = default;
45
53
54 const Loss* get_loss() const { return loss; }
55
56 bool get_display() const { return display; }
57
59 void set(Loss* new_loss) { loss = new_loss; }
60
62 virtual void set_loss(Loss* new_loss) { loss = new_loss; }
63
65 virtual void set_display(bool new_display) { display = new_display; }
66
67 void set_display_period(const Index new_display_period) { display_period = new_display_period; }
68
69 void set_num_workers(int n) { num_workers = max(1, n); }
70 int get_num_workers() const { return num_workers; }
71
72 void set_maximum_epochs(const Index new_maximum_epochs) { maximum_epochs = new_maximum_epochs; }
73 void set_maximum_time(const float new_maximum_time) { maximum_time = new_maximum_time; }
74
75 void set_loss_goal(const float new_loss_goal) { training_loss_goal = new_loss_goal; }
76 void set_maximum_validation_failures(const Index new_maximum_validation_failures) { maximum_validation_failures = new_maximum_validation_failures; }
78 virtual TrainingResults train() = 0;
79
82
83 const string& get_name() const { return name; }
84
86 virtual void print() const {}
87
89 virtual void from_JSON(const JsonDocument&);
90
92 virtual void to_JSON(JsonWriter&) const;
93
95 void save(const filesystem::path&) const;
97 void load(const filesystem::path&);
98
100 static float get_elapsed_time(const time_t& beginning_time);
101
102protected:
103
104 void set_names();
107
108 bool check_stopping_condition(TrainingResults&, Index epoch, float elapsed_time,
109 float training_error, Index validation_failures) const;
110
112 void read_common_json(const Json*);
113
116
117 void prefetch_batch(Batch& batch, Index sample_count, int slot);
118
119 void wait_prefetch(int slot);
120
122
124
126
127 static void clip_gradient_norm(Buffer& gradient, float max_norm);
128
129 bool should_display(Index epoch) const { return display && epoch % display_period == 0; }
130
131 void warn_dropped_samples(Index batch_size,
132 Index samples_number,
133 const char* context) const;
134
135 EpochStats train_epoch(bool tracks_accuracy,
136 ForwardPropagation& forward_propagation,
137 BackPropagation& back_propagation,
138 ThreadSafeQueue<Batch*>& empty_queue,
139 const vector<vector<Index>>& batches,
140 const vector<Index>& input_feature_indices,
141 const vector<Index>& decoder_feature_indices,
142 const vector<Index>& target_feature_indices,
143 const function<void(BackPropagation&)>& update,
144 bool show_progress = true);
145
146 EpochStats evaluate_epoch(bool tracks_accuracy,
147 ForwardPropagation& forward_propagation,
148 ThreadSafeQueue<Batch*>& empty_queue,
149 const vector<vector<Index>>& batches,
150 const vector<Index>& input_feature_indices,
151 const vector<Index>& decoder_feature_indices,
152 const vector<Index>& target_feature_indices);
153
154 Loss* loss = nullptr;
155
156 float training_loss_goal = 0.0f;
157
158 Index maximum_validation_failures = numeric_limits<Index>::max();
159
160 Index maximum_epochs = 10000;
161
162 float maximum_time = 360000.0f;
163
164 Index display_period = 10;
165
166 bool display = true;
167
168 string name;
169
170 int num_workers = 2;
171
173 cudaEvent_t batch_ready_event[2] = {nullptr, nullptr};
174 unordered_map<Batch*, cudaEvent_t> batch_reuse_events;
175 unordered_set<Batch*> batch_reuse_recorded;
176
178};
179
182{
183 OptimizerData() = default;
184 virtual ~OptimizerData() = default;
185
187 virtual void print() const;
188
190 void set(const vector<Shape>& slot_shapes, Device device = Device::CPU);
191
193 vector<TensorView> views;
194
195 // Shared state across all optimizers
199 Index iteration = 0;
200};
201
204{
206 TrainingResults(const Index = 0);
207 virtual ~TrainingResults() = default;
208
211
213 float get_training_error() const;
214
216 float get_validation_error() const;
217
219 Index get_epochs_number() const;
220
222 void save(const filesystem::path&) const;
223
225 void print(const string& message = {}) const;
226
228
230 Tensor<string, 2> write_override_results(const Index = 3) const;
231
234
237
239
241
243
244 float loss = NAN;
245
247
248 float loss_decrease = 0.0f;
249};
250
251}
252
253// OpenNN: Open Neural Networks Library.
254// Copyright(C) 2005-2026 Artificial Intelligence Techniques, SL.
255// Licensed under the GNU Lesser General Public License v2.1 or later.
Definition json.h:72
Definition json.h:85
Definition json.h:23
Unified loss container supporting MSE, cross-entropy, Minkowski, weighted, and regularized variants.
Definition loss.h:24
bool get_display() const
Definition optimizer.h:56
static void clip_gradient_norm(Buffer &gradient, float max_norm)
float training_loss_goal
Definition optimizer.h:156
bool should_display(Index epoch) const
Definition optimizer.h:129
const Loss * get_loss() const
Definition optimizer.h:54
Index get_maximum_batch_size() const
Largest batch size compatible with the dataset and configured memory budget.
unordered_set< Batch * > batch_reuse_recorded
Definition optimizer.h:175
virtual void from_JSON(const JsonDocument &)
Restores optimizer configuration from a JSON document.
cudaEvent_t batch_ready_event[2]
Definition optimizer.h:173
void set_maximum_time(const float new_maximum_time)
Definition optimizer.h:73
int get_num_workers() const
Definition optimizer.h:70
void setup_device_training()
void warn_dropped_samples(Index batch_size, Index samples_number, const char *context) const
void save(const filesystem::path &) const
Writes the current optimizer configuration to a JSON file at the given path.
void read_common_json(const Json *)
Loss * loss
Definition optimizer.h:154
unordered_map< Batch *, cudaEvent_t > batch_reuse_events
Definition optimizer.h:174
virtual TrainingResults train()=0
Runs the training loop and returns the recorded results (must be implemented by subclasses).
void prefetch_batch(Batch &batch, Index sample_count, int slot)
bool display
Definition optimizer.h:166
void teardown_device_training()
void set_display_period(const Index new_display_period)
Definition optimizer.h:67
Index display_period
Definition optimizer.h:164
EpochStats train_epoch(bool tracks_accuracy, ForwardPropagation &forward_propagation, BackPropagation &back_propagation, ThreadSafeQueue< Batch * > &empty_queue, const vector< vector< Index > > &batches, const vector< Index > &input_feature_indices, const vector< Index > &decoder_feature_indices, const vector< Index > &target_feature_indices, const function< void(BackPropagation &)> &update, bool show_progress=true)
Index maximum_validation_failures
Definition optimizer.h:158
void wait_prefetch(int slot)
virtual void print() const
Prints a human-readable description of the optimizer (no-op default).
Definition optimizer.h:86
virtual ~Optimizer()=default
bool check_stopping_condition(TrainingResults &, Index epoch, float elapsed_time, float training_error, Index validation_failures) const
void set_num_workers(int n)
Definition optimizer.h:69
EpochStats evaluate_epoch(bool tracks_accuracy, ForwardPropagation &forward_propagation, ThreadSafeQueue< Batch * > &empty_queue, const vector< vector< Index > > &batches, const vector< Index > &input_feature_indices, const vector< Index > &decoder_feature_indices, const vector< Index > &target_feature_indices)
Optimizer(Loss *=nullptr)
Constructs an optimizer optionally bound to a Loss instance.
const string & get_name() const
Definition optimizer.h:83
void clear_batch_reuse_events()
StoppingCondition
Reason that training was halted in the last call to train().
Definition optimizer.h:47
@ MaximumEpochsNumber
Definition optimizer.h:51
@ MaximumSelectionErrorIncreases
Definition optimizer.h:50
@ None
Definition optimizer.h:47
@ LossGoal
Definition optimizer.h:49
@ MinimumLossDecrease
Definition optimizer.h:48
@ MaximumTime
Definition optimizer.h:52
Buffer prefetch_fp32_staging
Definition optimizer.h:177
void set_loss_goal(const float new_loss_goal)
Definition optimizer.h:75
void set_maximum_epochs(const Index new_maximum_epochs)
Definition optimizer.h:72
string name
Definition optimizer.h:168
virtual void to_JSON(JsonWriter &) const
Serializes the optimizer configuration to JSON.
void write_common_json(JsonWriter &) const
static float get_elapsed_time(const time_t &beginning_time)
Returns seconds elapsed since the given start time.
virtual void set_loss(Loss *new_loss)
Binds the optimizer to a new Loss instance (virtual hook for derived classes).
Definition optimizer.h:62
Index maximum_epochs
Definition optimizer.h:160
void load(const filesystem::path &)
Loads the optimizer configuration from a JSON file at the given path.
int num_workers
Definition optimizer.h:170
float maximum_time
Definition optimizer.h:162
void set_maximum_validation_failures(const Index new_maximum_validation_failures)
Definition optimizer.h:76
void set(Loss *new_loss)
Binds the optimizer to a new Loss instance.
Definition optimizer.h:59
void record_batch_reuse(Batch &batch)
cudaStream_t memory_stream
Definition optimizer.h:172
virtual void set_display(bool new_display)
Enables or disables console progress reporting during training.
Definition optimizer.h:65
Definition thread_safe_queue.h:24
Definition adaptive_moment_estimation.h:14
Device
Execution device selection for OpenNN runtime (auto-detected, CPU or CUDA GPU).
Definition configuration.h:17
@ CPU
Definition configuration.h:17
@ CUDA
Definition configuration.h:17
constexpr float GRADIENT_NORM_EPS
Definition optimizer.h:19
void * cudaEvent_t
Definition pch.h:83
Matrix< float, Dynamic, 1 > VectorR
Definition pch.h:181
void * cudaStream_t
Definition pch.h:82
Workspace holding parameter gradients and per-layer deltas during a backward pass.
Definition back_propagation.h:21
Minibatch container holding pinned host/device buffers and views into a Dataset.
Definition batch.h:19
Owning raw byte buffer that lives on CPU or CUDA memory, with aligned (re)allocation.
Definition tensor_utilities.h:166
Workspace holding the activations of every layer during a forward pass.
Definition forward_propagation.h:20
VectorR training_direction
Definition optimizer.h:197
VectorR potential_parameters
Definition optimizer.h:196
Buffer data
Definition optimizer.h:192
void set(const vector< Shape > &slot_shapes, Device device=Device::CPU)
Allocates a buffer with the requested slot shapes on the target device and refreshes the views.
virtual void print() const
Prints the optimizer scratch state for debugging.
vector< TensorView > views
Definition optimizer.h:193
Index iteration
Definition optimizer.h:199
virtual ~OptimizerData()=default
float initial_learning_rate
Definition optimizer.h:198
Aggregated per-epoch error and accuracy returned by training/evaluation passes.
Definition optimizer.h:37
float accuracy
Definition optimizer.h:39
float error
Definition optimizer.h:38
History and final metrics produced by a training run.
Definition optimizer.h:204
VectorR training_error_history
Definition optimizer.h:238
float loss
Definition optimizer.h:244
void print(const string &message={}) const
Prints a summary of the training run, optionally prefixed by a message.
void resize_validation_error_history(const Index)
Resizes the stored validation-error history.
Index validation_failures
Definition optimizer.h:246
void resize_training_error_history(const Index)
Resizes the stored training-error history.
virtual ~TrainingResults()=default
VectorR validation_error_history
Definition optimizer.h:240
float get_training_error() const
Returns the final training error.
string elapsed_time
Definition optimizer.h:242
void save(const filesystem::path &) const
Saves the training-error and validation-error histories to a JSON file.
string write_stopping_condition() const
Returns a human-readable label for the stored stopping condition.
Index get_epochs_number() const
Returns the number of epochs that were actually executed.
float get_validation_error() const
Returns the final validation (selection) error.
Optimizer::StoppingCondition stopping_condition
Definition optimizer.h:227
Tensor< string, 2 > write_override_results(const Index=3) const
Builds a table of final metrics for embedding in higher-level reports.
float loss_decrease
Definition optimizer.h:248
TrainingResults(const Index=0)
Constructs a TrainingResults pre-sized for the given number of epochs.