77 unique_ptr<ForwardPropagation> forward_propagation;
79 vector<TensorView> inputs;
83 vector<Index> history;
85 vector<uint16_t> bf16_staging;
87 Index decoder_embedding_layer_index = -1;
88 Index encoder_embedding_layer_index = -1;
89 Index encoder_last_layer_index = -1;
90 Index decoder_stack_first_layer_index = -1;
91 Index output_projection_layer_index = -1;
93 void identify_layer_ranges();
94 void encode_source(
const string& source);
95 Index decode_step(Index step_index,
const SamplingConfig& config);
96 void reset_per_prompt_state();
97 string assemble_output_string()
const;
Token-based language dataset with input/target vocabularies and binary token cache.
Definition language_dataset.h:19
Definition adaptive_moment_estimation.h:14
@ CUDA
Definition configuration.h:17
Matrix< float, Dynamic, 1 > VectorR
Definition pch.h:181
Tensor< float, 2, Layout|AlignedMax > Tensor2
Definition pch.h:189
Owning raw byte buffer that lives on CPU or CUDA memory, with aligned (re)allocation.
Definition tensor_utilities.h:166
Non-owning view over a tensor: pointer, shape, and data type with rich reshape helpers.
Definition tensor_utilities.h:293