Levenberg-Marquardt optimizer with adaptive damping. More...

#include <levenberg_marquardt_algorithm.h>

Inheritance diagram for opennn::LevenbergMarquardtAlgorithm:

Public Types
enum	DataSlot { ParameterUpdate }
	Slot indices into OptimizerData::views used by LM. More...

Public Types inherited from opennn::Optimizer
enum class	StoppingCondition { None , MinimumLossDecrease , LossGoal , MaximumSelectionErrorIncreases , MaximumEpochsNumber , MaximumTime }
	Reasons that can terminate a training run. More...

Public Member Functions
	LevenbergMarquardtAlgorithm (Loss *loss=nullptr)
	Constructs the optimizer.

void	set_default ()
	Resets all hyperparameters to their default values.

void	set_damping_parameter (const float)
	Sets the initial damping parameter (lambda).

void	set_damping_parameter_factor (const float)
	Sets the multiplicative factor used to grow / shrink lambda.

void	set_minimum_damping_parameter (const float)
	Sets the lower bound for the damping parameter.

void	set_maximum_damping_parameter (const float)
	Sets the upper bound for the damping parameter.

void	set_minimum_loss_decrease (const float)
	Sets the minimum acceptable loss decrease between iterations.

TrainingResults	train () override
	Runs the LM algorithm to completion.

void	update_parameters (const Batch &batch, ForwardPropagation &forward_propagation, BackPropagationLM &back_propagation_lm, OptimizerData &data)
	Applies one LM parameter update.

void	from_JSON (const JsonDocument &) override
	Loads optimizer hyperparameters from a parsed JSON document.

void	to_JSON (JsonWriter &) const override
	Writes optimizer hyperparameters to a streaming JSON writer.

Public Member Functions inherited from opennn::Optimizer
	Optimizer (Loss *loss=nullptr)
	Constructs an optimizer bound to a loss function.

virtual	~Optimizer ()=default
	Virtual destructor.

const Loss *	get_loss () const
	Read-only access to the loss being optimized.

bool	get_display () const
	Whether progress should be printed to stdout during training.

void	set (Loss *new_loss)
	Re-initializes the optimizer by setting its loss pointer.

virtual void	set_loss (Loss *new_loss)
	Updates the loss pointer; subclasses may override to refresh cached state derived from the loss.

virtual void	set_display (bool new_display)
	Toggles per-epoch progress printing.

void	set_display_period (const Index new_display_period)
	Sets how often progress is printed.

void	set_maximum_epochs (const Index new_maximum_epochs)
	Sets the maximum number of epochs.

void	set_maximum_time (const float new_maximum_time)
	Sets the maximum wall-clock training time.

void	set_loss_goal (const float new_loss_goal)
	Sets the training-loss goal.

void	set_maximum_validation_failures (const Index new_maximum_validation_failures)
	Sets the maximum number of consecutive validation-error increases tolerated.

const string &	get_name () const
	Canonical name of the optimizer (set by subclasses).

virtual void	print () const
	Prints a human-readable summary of the optimizer to stdout.

void	save (const filesystem::path &) const
	Saves the optimizer state to a file.

void	load (const filesystem::path &)
	Loads the optimizer state from a file.

Additional Inherited Members
Static Public Member Functions inherited from opennn::Optimizer
static float	get_elapsed_time (const time_t &beginning_time)
	Computes the elapsed wall-clock time since a reference instant.

Protected Member Functions inherited from opennn::Optimizer
void	set_names ()
	Subclass hook to refresh layer name caches after a loss change.

void	set_scaling ()
	Subclass hook to install the dataset-derived input scalers.

void	set_unscaling ()
	Subclass hook to install the dataset-derived output unscalers.

bool	check_stopping_condition (TrainingResults &results, Index epoch, float elapsed_time, float training_error, Index validation_failures) const
	Evaluates every stopping criterion and updates the result accordingly.

void	write_common_xml (JsonWriter &) const
	Writes the common Optimizer fields to JSON.

void	read_common_xml (const Json *)
	Reads the common Optimizer fields from JSON.

void	setup_device_training ()
	Allocates the CUDA stream and events used for batch prefetching.

void	teardown_device_training ()
	Releases the CUDA stream and events allocated by setup_device_training().

void	prefetch_batch (Batch &batch, Index sample_count, int slot)
	Asynchronously prefetches the next training batch into a slot.

void	wait_prefetch (int slot)
	Waits for the prefetch into a given slot to finish.

void	sync_device ()
	Synchronizes the device on the optimizer's CUDA stream.

bool	should_display (Index epoch) const
	Whether the current epoch should print progress.

EpochStats	train_epoch (bool is_classification, ForwardPropagation &forward_propagation, BackPropagation &back_propagation, ThreadSafeQueue< Batch * > &empty_queue, ThreadSafeQueue< Batch * > &ready_queue, const vector< vector< Index > > &batches, const vector< Index > &input_feature_indices, const vector< Index > &decoder_feature_indices, const vector< Index > &target_feature_indices, const std::function< void(BackPropagation &)> &update)
	Runs a single training epoch over all batches.

EpochStats	evaluate_epoch (bool is_classification, ForwardPropagation &forward_propagation, ThreadSafeQueue< Batch * > &empty_queue, ThreadSafeQueue< Batch * > &ready_queue, const vector< vector< Index > > &batches, const vector< Index > &input_feature_indices, const vector< Index > &decoder_feature_indices, const vector< Index > &target_feature_indices)
	Runs a single evaluation pass over all batches without updating parameters.

Static Protected Member Functions inherited from opennn::Optimizer
static void	clip_gradient_norm (Buffer &gradient, float max_norm)
	In-place gradient norm clipping.

Protected Attributes inherited from opennn::Optimizer
Loss *	loss = nullptr
	Loss being optimized; not owned.

float	training_loss_goal = 0.0f
	Training stops when the training loss reaches this value.

Index	maximum_validation_failures = numeric_limits<Index>::max()
	Maximum number of consecutive validation-error increases tolerated.

Index	maximum_epochs = 10000
	Maximum number of training epochs.

float	maximum_time = 360000.0f
	Maximum wall-clock training time in seconds.

Index	display_period = 10
	Number of epochs between progress prints.

bool	display = true
	Whether progress should be printed to stdout during training.

string	name
	Canonical name of the optimizer (set by subclasses).

cudaStream_t	memory_stream = nullptr
	CUDA stream used to prefetch batches into device memory.

cudaEvent_t	batch_ready_event [2] = {nullptr, nullptr}
	CUDA events signaling when each prefetched batch is ready.

Detailed Description

Levenberg-Marquardt optimizer with adaptive damping.

Trust-region method that interpolates between gradient descent (large damping) and Gauss-Newton (small damping). At each iteration it solves (J^T J + lambda * I) d = -J^T r, accepting the step when the loss decreases (and decreasing lambda) or rejecting it (and increasing lambda) otherwise.

Best suited to small / medium dense networks trained on regression losses; not suited to very large models or non-twice-differentiable losses.

Member Enumeration Documentation

◆ DataSlot

enum opennn::LevenbergMarquardtAlgorithm::DataSlot

Slot indices into OptimizerData::views used by LM.

Enumerator
ParameterUpdate

Constructor & Destructor Documentation

◆ LevenbergMarquardtAlgorithm()

opennn::LevenbergMarquardtAlgorithm::LevenbergMarquardtAlgorithm ( Loss * loss = nullptr )

Constructs the optimizer.

Parameters

loss	Loss to optimize; may be nullptr if set later.

Member Function Documentation

◆ from_JSON()

void opennn::LevenbergMarquardtAlgorithm::from_JSON ( const JsonDocument & )

overridevirtual

Loads optimizer hyperparameters from a parsed JSON document.

Reimplemented from opennn::Optimizer.

◆ set_damping_parameter()

void opennn::LevenbergMarquardtAlgorithm::set_damping_parameter ( const float )

Sets the initial damping parameter (lambda).

Receives the new damping value.

◆ set_damping_parameter_factor()

void opennn::LevenbergMarquardtAlgorithm::set_damping_parameter_factor ( const float )

Sets the multiplicative factor used to grow / shrink lambda.

Receives the factor (>1); lambda is multiplied by it on rejected steps and divided by it on accepted steps.

◆ set_default()

void opennn::LevenbergMarquardtAlgorithm::set_default ( )

Resets all hyperparameters to their default values.

◆ set_maximum_damping_parameter()

void opennn::LevenbergMarquardtAlgorithm::set_maximum_damping_parameter ( const float )

Sets the upper bound for the damping parameter.

Receives the maximum lambda allowed.

◆ set_minimum_damping_parameter()

void opennn::LevenbergMarquardtAlgorithm::set_minimum_damping_parameter ( const float )

Sets the lower bound for the damping parameter.

Receives the minimum lambda allowed.

◆ set_minimum_loss_decrease()

void opennn::LevenbergMarquardtAlgorithm::set_minimum_loss_decrease ( const float )

Sets the minimum acceptable loss decrease between iterations.

Receives the threshold below which training stops.

◆ to_JSON()

void opennn::LevenbergMarquardtAlgorithm::to_JSON ( JsonWriter & ) const

overridevirtual

Writes optimizer hyperparameters to a streaming JSON writer.

Reimplemented from opennn::Optimizer.

◆ train()

TrainingResults opennn::LevenbergMarquardtAlgorithm::train ( )

overridevirtual

Runs the LM algorithm to completion.

Returns: Per-epoch error history and the stopping condition that fired.

Implements opennn::Optimizer.

◆ update_parameters()

void opennn::LevenbergMarquardtAlgorithm::update_parameters	(	const Batch &	batch,
		ForwardPropagation &	forward_propagation,
		BackPropagationLM &	back_propagation_lm,
		OptimizerData &	data )

Applies one LM parameter update.

Parameters

batch	Current training batch.
forward_propagation	Forward intermediates for the batch.
back_propagation_lm	Scratch holding the Jacobian and approximate Hessian.
data	Mutable optimizer state.

Public Types

Public Member Functions

Additional Inherited Members

Detailed Description

Member Enumeration Documentation

◆ DataSlot

Constructor & Destructor Documentation

◆ LevenbergMarquardtAlgorithm()

Member Function Documentation

◆ from_JSON()

◆ set_damping_parameter()

◆ set_damping_parameter_factor()

◆ set_default()

◆ set_maximum_damping_parameter()

◆ set_minimum_damping_parameter()

◆ set_minimum_loss_decrease()

◆ to_JSON()

◆ train()

◆ update_parameters()