Stochastic gradient descent with optional momentum, Nesterov, and learning-rate decay. More...

#include <stochastic_gradient_descent.h>

Inheritance diagram for opennn::StochasticGradientDescent:

Public Types
enum	DataSlot { Velocity }
	Slot index into the optimizer scratch buffer (momentum velocity). More...

Public Types inherited from opennn::Optimizer
enum class	StoppingCondition { None , MinimumLossDecrease , LossGoal , MaximumSelectionErrorIncreases , MaximumEpochsNumber , MaximumTime }
	Reason that training was halted in the last call to train(). More...

Public Member Functions
	StochasticGradientDescent (Loss *=nullptr)
	Constructs SGD optionally bound to a Loss instance.

void	set_default ()
	Resets all hyperparameters (learning rate, decay, momentum, Nesterov) to library defaults.

void	set_batch_size (const Index)
	Sets the minibatch size used by train().

Index	get_samples_number () const
	Returns the number of training samples seen by the bound dataset.

void	set_initial_learning_rate (const float)
	Sets the initial learning rate eta_0.

void	set_initial_decay (const float)
	Sets the learning-rate decay applied each epoch.

void	set_momentum (const float)
	Sets the momentum coefficient (0 disables momentum).

void	set_nesterov (bool)
	Enables or disables Nesterov-accelerated momentum.

void	update_parameters (BackPropagation &, OptimizerData &, float) const
	Applies one SGD update to the network parameters using the gradient and current learning rate.

TrainingResults	train () override
	Runs the SGD training loop and returns the recorded error history.

void	from_JSON (const JsonDocument &) override
	Restores hyperparameters from a JSON document.

void	to_JSON (JsonWriter &) const override
	Serializes hyperparameters to JSON.

Public Member Functions inherited from opennn::Optimizer
	Optimizer (Loss *=nullptr)
	Constructs an optimizer optionally bound to a Loss instance.

virtual	~Optimizer ()=default

const Loss *	get_loss () const

bool	get_display () const

void	set (Loss *new_loss)
	Binds the optimizer to a new Loss instance.

virtual void	set_loss (Loss *new_loss)
	Binds the optimizer to a new Loss instance (virtual hook for derived classes).

virtual void	set_display (bool new_display)
	Enables or disables console progress reporting during training.

void	set_display_period (const Index new_display_period)

void	set_num_workers (int n)

int	get_num_workers () const

void	set_maximum_epochs (const Index new_maximum_epochs)

void	set_maximum_time (const float new_maximum_time)

void	set_loss_goal (const float new_loss_goal)

void	set_maximum_validation_failures (const Index new_maximum_validation_failures)

Index	get_maximum_batch_size () const
	Largest batch size compatible with the dataset and configured memory budget.

const string &	get_name () const

virtual void	print () const
	Prints a human-readable description of the optimizer (no-op default).

void	save (const filesystem::path &) const
	Writes the current optimizer configuration to a JSON file at the given path.

void	load (const filesystem::path &)
	Loads the optimizer configuration from a JSON file at the given path.

Additional Inherited Members
Static Public Member Functions inherited from opennn::Optimizer
static float	get_elapsed_time (const time_t &beginning_time)
	Returns seconds elapsed since the given start time.

Protected Member Functions inherited from opennn::Optimizer
void	set_names ()

void	set_scaling ()

void	set_unscaling ()

bool	check_stopping_condition (TrainingResults &, Index epoch, float elapsed_time, float training_error, Index validation_failures) const

void	write_common_json (JsonWriter &) const

void	read_common_json (const Json *)

void	setup_device_training ()

void	teardown_device_training ()

void	prefetch_batch (Batch &batch, Index sample_count, int slot)

void	wait_prefetch (int slot)

void	record_batch_reuse (Batch &batch)

void	clear_batch_reuse_events ()

void	sync_device ()

bool	should_display (Index epoch) const

void	warn_dropped_samples (Index batch_size, Index samples_number, const char *context) const

EpochStats	train_epoch (bool tracks_accuracy, ForwardPropagation &forward_propagation, BackPropagation &back_propagation, ThreadSafeQueue< Batch * > &empty_queue, const vector< vector< Index > > &batches, const vector< Index > &input_feature_indices, const vector< Index > &decoder_feature_indices, const vector< Index > &target_feature_indices, const function< void(BackPropagation &)> &update, bool show_progress=true)

EpochStats	evaluate_epoch (bool tracks_accuracy, ForwardPropagation &forward_propagation, ThreadSafeQueue< Batch * > &empty_queue, const vector< vector< Index > > &batches, const vector< Index > &input_feature_indices, const vector< Index > &decoder_feature_indices, const vector< Index > &target_feature_indices)

Static Protected Member Functions inherited from opennn::Optimizer
static void	clip_gradient_norm (Buffer &gradient, float max_norm)

Protected Attributes inherited from opennn::Optimizer
Loss *	loss = nullptr

float	training_loss_goal = 0.0f

Index	maximum_validation_failures = numeric_limits<Index>::max()

Index	maximum_epochs = 10000

float	maximum_time = 360000.0f

Index	display_period = 10

bool	display = true

string	name

int	num_workers = 2

cudaStream_t	memory_stream = nullptr

cudaEvent_t	batch_ready_event [2] = {nullptr, nullptr}

unordered_map< Batch *, cudaEvent_t >	batch_reuse_events

unordered_set< Batch * >	batch_reuse_recorded

Buffer	prefetch_fp32_staging {Device::CUDA}

Detailed Description

Stochastic gradient descent with optional momentum, Nesterov, and learning-rate decay.

Member Enumeration Documentation

◆ DataSlot

enum opennn::StochasticGradientDescent::DataSlot

Slot index into the optimizer scratch buffer (momentum velocity).

Enumerator
Velocity

Constructor & Destructor Documentation

◆ StochasticGradientDescent()

opennn::StochasticGradientDescent::StochasticGradientDescent ( Loss * = nullptr )

Constructs SGD optionally bound to a Loss instance.

Member Function Documentation

◆ from_JSON()

void opennn::StochasticGradientDescent::from_JSON ( const JsonDocument & )

overridevirtual

Restores hyperparameters from a JSON document.

Reimplemented from opennn::Optimizer.

◆ get_samples_number()

Index opennn::StochasticGradientDescent::get_samples_number ( ) const

Returns the number of training samples seen by the bound dataset.

◆ set_batch_size()

void opennn::StochasticGradientDescent::set_batch_size ( const Index )

Sets the minibatch size used by train().

◆ set_default()

void opennn::StochasticGradientDescent::set_default ( )

Resets all hyperparameters (learning rate, decay, momentum, Nesterov) to library defaults.

◆ set_initial_decay()

void opennn::StochasticGradientDescent::set_initial_decay ( const float )

Sets the learning-rate decay applied each epoch.

◆ set_initial_learning_rate()

void opennn::StochasticGradientDescent::set_initial_learning_rate ( const float )

Sets the initial learning rate eta_0.

◆ set_momentum()

void opennn::StochasticGradientDescent::set_momentum ( const float )

Sets the momentum coefficient (0 disables momentum).

◆ set_nesterov()

void opennn::StochasticGradientDescent::set_nesterov ( bool )

Enables or disables Nesterov-accelerated momentum.

◆ to_JSON()

void opennn::StochasticGradientDescent::to_JSON ( JsonWriter & ) const

overridevirtual

Serializes hyperparameters to JSON.

Reimplemented from opennn::Optimizer.

◆ train()

TrainingResults opennn::StochasticGradientDescent::train ( )

overridevirtual

Runs the SGD training loop and returns the recorded error history.

Implements opennn::Optimizer.

◆ update_parameters()

void opennn::StochasticGradientDescent::update_parameters	(	BackPropagation &	,
		OptimizerData &	,
		float	) const

Applies one SGD update to the network parameters using the gradient and current learning rate.

Public Types

Public Member Functions

Additional Inherited Members

Detailed Description

Member Enumeration Documentation

◆ DataSlot

Constructor & Destructor Documentation

◆ StochasticGradientDescent()

Member Function Documentation

◆ from_JSON()

◆ get_samples_number()

◆ set_batch_size()

◆ set_default()

◆ set_initial_decay()

◆ set_initial_learning_rate()

◆ set_momentum()

◆ set_nesterov()

◆ to_JSON()

◆ train()

◆ update_parameters()