48 float min_range,
float max_range,
55 float min_range,
float max_range,
63 float min_range,
float max_range,
72 float min_range,
float max_range,
79 float min_range,
float max_range,
87 float min_range,
float max_range,
104#ifdef OPENNN_HAS_CUDA
120#ifdef OPENNN_HAS_CUDA
122void multiply_gpu(
const TensorView& input_a,
bool transpose_a,
const TensorView& input_b,
bool transpose_b,
TensorView& output,
float alpha = 1.0f,
float beta = 0.0f);
129#ifdef OPENNN_HAS_CUDA
141#ifdef OPENNN_HAS_CUDA
150#ifdef OPENNN_HAS_CUDA
159#ifdef OPENNN_HAS_CUDA
168#ifdef OPENNN_HAS_CUDA
176#ifdef OPENNN_HAS_CUDA
185#ifdef OPENNN_HAS_CUDA
Definition adaptive_moment_estimation.h:14
void copy(const TensorView &source, TensorView &destination)
Copies the contents of source into destination, dispatching to CPU or GPU as needed.
void unscale(const TensorView &input, const TensorView &minimums, const TensorView &maximums, const TensorView &means, const TensorView &standard_deviations, const TensorView &scalers, float min_range, float max_range, TensorView &output)
Inverse of scale(); reconstructs original values from a previously scaled tensor.
void softmax(TensorView &output)
Applies softmax in place along the trailing dimension of output.
void copy_cpu(const TensorView &source, TensorView &destination)
CPU implementation of copy().
void multiply_cpu(const TensorView &input_a, bool transpose_a, const TensorView &input_b, bool transpose_b, TensorView &output, float alpha=1.0f, float beta=0.0f)
CPU implementation of multiply().
void bound(const TensorView &input, const TensorView &lower_bounds, const TensorView &upper_bounds, TensorView &output)
Clamps each element of input to the [lower_bounds, upper_bounds] range.
void bound_cpu(const TensorView &input, const TensorView &lower_bounds, const TensorView &upper_bounds, TensorView &output)
CPU implementation of bound().
void unscale_cpu(const TensorView &input, const TensorView &minimums, const TensorView &maximums, const TensorView &means, const TensorView &standard_deviations, const TensorView &scalers, float min_range, float max_range, TensorView &output)
CPU implementation of unscale().
void max_pooling_3d_backward_cpu(const TensorView &maximal_indices, const TensorView &output_delta, TensorView &input_delta)
CPU implementation of max_pooling_3d_backward().
void pad(const TensorView &input, TensorView &output)
Pads the input tensor and writes the result into output.
VectorI maximal_indices(const VectorR &, Index)
Indices of the n largest elements of a vector.
void max_pooling_3d_backward(const TensorView &maximal_indices, const TensorView &output_delta, TensorView &input_delta)
Backward pass for 3D max pooling; routes gradients to argmax positions.
void merge_heads(const TensorView &source, TensorView &destination)
Inverse of split_heads(); merges per-head tensors back into a single representation.
void merge_heads_cpu(const TensorView &source, TensorView &destination)
CPU implementation of merge_heads().
void average_pooling_3d_forward_cpu(const TensorView &input, TensorView &output)
CPU implementation of average_pooling_3d_forward().
void average_pooling_3d_backward_cpu(const TensorView &input, const TensorView &output_delta, TensorView &input_delta)
CPU implementation of average_pooling_3d_backward().
void average_pooling_3d_forward(const TensorView &input, TensorView &output)
Forward pass of 3D average pooling.
void add(const TensorView &input_1, const TensorView &input_2, TensorView &output)
Element-wise addition: output = input_1 + input_2.
void multiply(const TensorView &input_a, bool transpose_a, const TensorView &input_b, bool transpose_b, TensorView &output, float alpha=1.0f, float beta=0.0f)
General matrix multiply: output = alpha * op(input_a) * op(input_b) + beta * output.
void max_pooling_3d_forward(const TensorView &input, TensorView &output, TensorView &maximal_indices, bool is_training)
Forward pass of 3D max pooling; records argmax positions when training.
void scale_cpu(const TensorView &input, const TensorView &minimums, const TensorView &maximums, const TensorView &means, const TensorView &standard_deviations, const TensorView &scalers, float min_range, float max_range, TensorView &output)
CPU implementation of scale().
void split_heads(const TensorView &source, TensorView &destination)
Reshapes a multi-head attention tensor by splitting the last axis into heads.
void split_heads_cpu(const TensorView &source, TensorView &destination)
CPU implementation of split_heads().
void softmax_cpu(TensorView &output)
CPU implementation of softmax().
void scale(const TensorView &input, const TensorView &minimums, const TensorView &maximums, const TensorView &means, const TensorView &standard_deviations, const TensorView &scalers, float min_range, float max_range, TensorView &output)
Applies per-feature scaling (mean/std, min/max, or other scalers) to a tensor.
void max_pooling_3d_forward_cpu(const TensorView &input, TensorView &output, TensorView &maximal_indices, bool is_training)
CPU implementation of max_pooling_3d_forward().
void average_pooling_3d_backward(const TensorView &input, const TensorView &output_delta, TensorView &input_delta)
Backward pass for 3D average pooling.
void add_cpu(const TensorView &input_1, const TensorView &input_2, TensorView &output)
CPU implementation of add().
Non-owning view over a tensor: pointer, shape, and data type with rich reshape helpers.
Definition tensor_utilities.h:293