Index _ | A | B | C | D | E | F | G | H | I | J | L | M | N | O | P | R | S | T | U | V | W | Z _ __call__() (megatron.data.autoaugment.ImageNetPolicy method) (megatron.data.autoaugment.SubPolicy method) (megatron.text_generation.forward_step.ForwardStep method) (megatron.timers.Timers method) __eq__() (megatron.wandb_logger.WandBConfig method) __getitem__() (megatron.data.biencoder_dataset_utils.BlockSamplesMapping method) (megatron.data.ict_dataset.ICTDataset method) (megatron.data.image_folder.DatasetFolder method) (megatron.data.realm_dataset_utils.BlockSamplesMapping method) __getnewargs__() (megatron.data.dataset_utils.MaskedLmInstance method) __hash__ (megatron.wandb_logger.WandBConfig attribute) __len__() (megatron.text_generation.beam_utils.BeamHypotheses method) __new__() (megatron.data.dataset_utils.MaskedLmInstance static method) __repr__() (megatron.data.autoaugment.ImageNetPolicy method) (megatron.data.dataset_utils.MaskedLmInstance method) (megatron.wandb_logger.WandBConfig method) __str__() (megatron.optimizer.distrib_optimizer.Range method) A AbstractTokenizer (class in megatron.tokenizer.tokenizer) accimage_loader() (in module megatron.data.image_folder) add() (megatron.core.tensor_parallel.random.CudaRNGStatesTracker method) (megatron.memory.MemoryBuffer method) (megatron.text_generation.beam_utils.BeamHypotheses method) add_block_data() (megatron.data.realm_index.OpenRetreivalDataStore method) add_embed_data() (megatron.data.realm_index.FaissMIPSIndex method) add_tokentype_embeddings() (megatron.model.language_model.Embedding method) all_gather_item() (in module megatron.dist_signal_handler) allocate_mem_buff() (in module megatron.memory) allreduce_embedding_grads() (megatron.optimizer.optimizer.MegatronOptimizer method) allreduce_gradients() (megatron.model.distributed.DistributedDataParallel method) allreduce_layernorm_grads() (megatron.optimizer.optimizer.MegatronOptimizer method) allreduce_position_embedding_grads() (megatron.optimizer.optimizer.MegatronOptimizer method) allreduce_word_embedding_grads() (megatron.optimizer.optimizer.MegatronOptimizer method) apply_rotary_emb() (in module megatron.model.positional_embeddings) assert_viewless_tensor() (in module megatron.core.utils) attention_mask_func() (in module megatron.model.utils) AttnMaskType (class in megatron.model.enums) AttnType (class in megatron.model.enums) average_losses_across_data_parallel_group() (in module megatron.utils) B backward() (megatron.core.tensor_parallel.layers.LinearWithGradAccumulationAndAsyncCommunication static method) (megatron.core.tensor_parallel.random.CheckpointFunction static method) (megatron.core.utils.MakeViewlessTensor static method) (megatron.model.fused_bias_gelu.GeLUFunction static method) (megatron.model.fused_layer_norm.FusedLayerNormAffineFunction static method) (megatron.model.fused_softmax.ScaledMaskedSoftmax static method) (megatron.model.fused_softmax.ScaledSoftmax static method) (megatron.model.fused_softmax.ScaledUpperTriangMaskedSoftmax static method) backward_step() (in module megatron.schedules) BasicTokenizer (class in megatron.tokenizer.bert_tokenization) beam_search() (in module megatron.text_generation.api) beam_search_and_post_process() (in module megatron.text_generation.api) beam_search_and_return_on_first_stage() (in module megatron.text_generation.generation) BeamHypotheses (class in megatron.text_generation.beam_utils) bert_extended_attention_mask() (in module megatron.model.bert_model) bert_position_ids() (in module megatron.model.bert_model) BertDataset (class in megatron.data.bert_dataset) BertLMHead (class in megatron.model.bert_model) BertModel (class in megatron.model.bert_model) bias_dropout_add() (in module megatron.model.transformer) biencoder_model_provider() (in module megatron.model.biencoder_model) BiEncoderModel (class in megatron.model.biencoder_model) BlendableDataset (class in megatron.data.blendable_dataset) BlockSampleData (class in megatron.data.biencoder_dataset_utils) (class in megatron.data.realm_dataset_utils) BlockSamplesMapping (class in megatron.data.biencoder_dataset_utils) (class in megatron.data.realm_dataset_utils) broadcast_data() (in module megatron.core.tensor_parallel.data) broadcast_float_list() (in module megatron.text_generation.communication) broadcast_from_last_pipeline_stage() (in module megatron.text_generation.communication) broadcast_from_last_to_first_pipeline_stage() (in module megatron.text_generation.communication) broadcast_int_list() (in module megatron.text_generation.communication) broadcast_list() (in module megatron.text_generation.communication) broadcast_tensor() (in module megatron.text_generation.communication) build_and_save_index() (megatron.indexer.IndexBuilder method) build_base_parser() (in module megatron.arguments) build_model_and_main_param_groups() (megatron.optimizer.distrib_optimizer.DistributedOptimizer class method) build_model_gbuf_param_range_map() (megatron.optimizer.distrib_optimizer.DistributedOptimizer class method) build_model_gbuf_range() (megatron.optimizer.distrib_optimizer.DistributedOptimizer class method) build_model_gbuf_range_map() (megatron.optimizer.distrib_optimizer.DistributedOptimizer class method) build_model_param_gbuf_map() (megatron.optimizer.distrib_optimizer.DistributedOptimizer class method) build_num_microbatches_calculator() (in module megatron.microbatches) build_optimizer_group_ranges() (megatron.optimizer.distrib_optimizer.DistributedOptimizer class method) build_pretraining_data_loader() (in module megatron.data.data_samplers) build_sample() (in module megatron.data.orqa_wiki_dataset) build_tokenizer() (in module megatron.tokenizer.tokenizer) build_tokens_types_paddings_from_ids() (in module megatron.data.orqa_wiki_dataset) build_tokens_types_paddings_from_text() (in module megatron.data.orqa_wiki_dataset) build_train_valid_test_data_iterators() (in module megatron.training) build_train_valid_test_datasets() (in module megatron.data.dataset_utils) (in module megatron.data.gpt_dataset) build_training_sample() (in module megatron.data.bert_dataset) (in module megatron.data.t5_dataset) bytes_to_unicode() (in module megatron.tokenizer.gpt2_tokenization) C calc_params_l2_norm() (in module megatron.utils) check_adlr_autoresume_termination() (in module megatron.utils) check_checkpoint_args() (in module megatron.checkpointing) checkpoint() (in module megatron.core.tensor_parallel.random) CheckpointFunction (class in megatron.core.tensor_parallel.random) Classification (class in megatron.model.classification) clear() (megatron.data.realm_index.OpenRetreivalDataStore method) clip_grad_norm_fp32() (in module megatron.optimizer.clip_grads) code() (in module megatron.data.indexed_dataset) ColumnParallelLinear (class in megatron.core.tensor_parallel.layers) compile_helper() (in module megatron.data.dataset_utils) concat_and_pad_tokens() (megatron.data.ict_dataset.ICTDataset method) ConstantGradScaler (class in megatron.optimizer.grad_scaler) ConstantNumMicroBatches (class in megatron.microbatches) conversion_helper() (in module megatron.model.module) convert_by_vocab() (in module megatron.tokenizer.bert_tokenization) convert_ids_to_tokens() (in module megatron.tokenizer.bert_tokenization) (megatron.tokenizer.gpt2_tokenization.GPT2Tokenizer method) convert_to_unicode() (in module megatron.tokenizer.bert_tokenization) convert_tokens_to_ids() (in module megatron.tokenizer.bert_tokenization) (megatron.tokenizer.gpt2_tokenization.GPT2Tokenizer method) convert_tokens_to_string() (megatron.tokenizer.bert_tokenization.FullTokenizer static method) copy_from_last_to_first_pipeline_stage() (in module megatron.text_generation.communication) copy_tensor_model_parallel_attributes() (in module megatron.core.tensor_parallel.layers) copy_to_tensor_model_parallel_region() (in module megatron.core.tensor_parallel.mappings) CoreAttention (class in megatron.model.transformer) count_zeros_fp32() (in module megatron.optimizer.clip_grads) create_doc_idx() (in module megatron.data.indexed_dataset) create_masked_lm_predictions() (in module megatron.data.dataset_utils) create_tokens_and_tokentypes() (in module megatron.data.dataset_utils) CudaRNGStatesTracker (class in megatron.core.tensor_parallel.random) custom_backward() (in module megatron.schedules) cyclic_iter() (in module megatron.training) D data_file_path() (in module megatron.data.indexed_dataset) dataset_exists() (in module megatron.data.indexed_dataset) DatasetFolder (class in megatron.data.image_folder) deallocate_output_tensor() (in module megatron.schedules) default_loader() (in module megatron.data.image_folder) destroy_model_parallel() (in module megatron.core.parallel_state) detach() (in module megatron.data.realm_index) detokenize_generations() (in module megatron.text_generation.tokenization) DistributedDataParallel (class in megatron.model.distributed) DistributedDataParallelBase (class in megatron.model.distributed) DistributedOptimizer (class in megatron.optimizer.distrib_optimizer) DistributedSignalHandler (class in megatron.dist_signal_handler) divide() (in module megatron.core.utils) dropout_add() (in module megatron.model.transformer) DropPath (class in megatron.model.transformer) dummy_handler() (in module megatron.schedules) DummyTimer (class in megatron.timers) DynamicGradScaler (class in megatron.optimizer.grad_scaler) E elapsed() (megatron.timers.Timer method) embed_text() (megatron.model.biencoder_model.BiEncoderModel static method) Embedding (class in megatron.model.language_model) ensure_directory_exists() (in module megatron.checkpointing) ensure_divisibility() (in module megatron.core.utils) evaluate() (in module megatron.training) evaluate_and_print_results() (in module megatron.training) F FaissMIPSIndex (class in megatron.data.realm_index) FalconModel (class in megatron.model.falcon_model) find_checkpoint_rank_0() (in module megatron.checkpointing) fix_query_key_value_ordering() (in module megatron.checkpointing) float16_to_fp32() (in module megatron.model.module) Float16Module (class in megatron.model.module) Float16OptimizerWithFloat16Params (class in megatron.optimizer.optimizer) fork() (megatron.core.tensor_parallel.random.CudaRNGStatesTracker method) forward() (megatron.core.tensor_parallel.layers.ColumnParallelLinear method) (megatron.core.tensor_parallel.layers.LinearWithGradAccumulationAndAsyncCommunication static method) (megatron.core.tensor_parallel.layers.RowParallelLinear method) (megatron.core.tensor_parallel.layers.VocabParallelEmbedding method) (megatron.core.tensor_parallel.random.CheckpointFunction static method) (megatron.core.utils.MakeViewlessTensor static method) (megatron.model.bert_model.BertLMHead method) (megatron.model.bert_model.BertModel method) (megatron.model.biencoder_model.BiEncoderModel method) (megatron.model.biencoder_model.PretrainedBertModel method) (megatron.model.classification.Classification method) (megatron.model.distributed.DistributedDataParallelBase method) (megatron.model.fused_bias_gelu.GeLUFunction static method) (megatron.model.fused_layer_norm.FusedLayerNormAffineFunction static method) (megatron.model.fused_layer_norm.MixedFusedLayerNorm method) (megatron.model.fused_layer_norm.RMSNorm method) (megatron.model.fused_softmax.FusedScaleMaskSoftmax method) (megatron.model.fused_softmax.ScaledMaskedSoftmax static method) (megatron.model.fused_softmax.ScaledSoftmax static method) (megatron.model.fused_softmax.ScaledUpperTriangMaskedSoftmax static method) (megatron.model.gpt_model.GPTModel method) (megatron.model.language_model.Embedding method) (megatron.model.language_model.Pooler method) (megatron.model.language_model.TransformerLanguageModel method) (megatron.model.module.Float16Module method) (megatron.model.multiple_choice.MultipleChoice method) (megatron.model.t5_model.T5LMHead method) (megatron.model.t5_model.T5Model method) (megatron.model.transformer.CoreAttention method) (megatron.model.transformer.DropPath method) (megatron.model.transformer.NoopTransformerLayer method) (megatron.model.transformer.ParallelAttention method) (megatron.model.transformer.ParallelMLP method) (megatron.model.transformer.ParallelTransformer method) (megatron.model.transformer.ParallelTransformerLayer method) forward_backward_no_pipelining() (in module megatron.schedules) forward_backward_pipelining_with_interleaving() (in module megatron.schedules) forward_backward_pipelining_without_interleaving() (in module megatron.schedules) forward_step() (in module megatron.schedules) ForwardStep (class in megatron.text_generation.forward_step) fp32_to_float16() (in module megatron.model.module) FP32Optimizer (class in megatron.optimizer.optimizer) from_pretrained() (megatron.tokenizer.gpt2_tokenization.GPT2Tokenizer class method) FullTokenizer (class in megatron.tokenizer.bert_tokenization) FusedLayerNormAffineFunction (class in megatron.model.fused_layer_norm) FusedScaleMaskSoftmax (class in megatron.model.fused_softmax) G gather_from_sequence_parallel_region() (in module megatron.core.tensor_parallel.mappings) gather_from_tensor_model_parallel_region() (in module megatron.core.tensor_parallel.mappings) gather_model_params() (megatron.optimizer.distrib_optimizer.DistributedOptimizer method) (megatron.optimizer.optimizer.MegatronOptimizer method) gather_split_1d_tensor() (in module megatron.core.tensor_parallel.utils) GEGLU (class in megatron.model.glu_activations) GeLUFunction (class in megatron.model.fused_bias_gelu) generate() (in module megatron.text_generation.api) generate_and_post_process() (in module megatron.text_generation.api) generate_tokens_probs_and_return_on_first_stage() (in module megatron.text_generation.generation) get() (megatron.data.indexed_dataset.MMapIndexedDataset method) (megatron.model.distributed.MemoryBuffer method) get_a_and_b_segments() (in module megatron.data.dataset_utils) get_adlr_autoresume() (in module megatron.global_vars) get_args() (in module megatron.global_vars) get_available_dataset_impl() (in module megatron.data.indexed_dataset) get_bias_dropout_add() (in module megatron.model.transformer) get_block() (megatron.data.ict_dataset.ICTDataset method) get_block_samples_mapping() (in module megatron.data.biencoder_dataset_utils) (in module megatron.data.realm_dataset_utils) get_checkpoint_name() (in module megatron.checkpointing) get_checkpoint_names() (in module megatron.checkpointing) get_checkpoint_tracker_filename() (in module megatron.checkpointing) get_checkpoint_version() (in module megatron.checkpointing) get_counters() (in module megatron.global_vars) get_cuda_rng_tracker() (in module megatron.core.tensor_parallel.random) get_current_global_batch_size() (in module megatron.global_vars) get_data() (megatron.memory.MemoryBuffer method) get_data_parallel_group() (in module megatron.core.parallel_state) get_data_parallel_rank() (in module megatron.core.parallel_state) get_data_parallel_src_rank() (in module megatron.core.parallel_state) get_data_parallel_world_size() (in module megatron.core.parallel_state) get_datasets_weights_and_num_samples() (in module megatron.data.dataset_utils) get_device() (in module megatron.dist_signal_handler) get_dropout_add() (in module megatron.model.transformer) get_embedding_group() (in module megatron.core.parallel_state) get_forward_backward_func() (in module megatron.schedules) get_global_memory_buffer() (in module megatron.core.parallel_state) get_ict_batch() (in module megatron.data.biencoder_dataset_utils) (in module megatron.data.realm_dataset_utils) get_ict_dataset() (in module megatron.data.ict_dataset) get_indexed_dataset_() (in module megatron.data.dataset_utils) (in module megatron.data.gpt_dataset) get_language_model() (in module megatron.model.language_model) get_linear_layer() (in module megatron.model.utils) get_loss_scale() (megatron.optimizer.optimizer.FP32Optimizer method) (megatron.optimizer.optimizer.MegatronOptimizer method) (megatron.optimizer.optimizer.MixedPrecisionOptimizer method) get_lr() (megatron.optimizer_param_scheduler.OptimizerParamScheduler method) get_ltor_masks_and_position_ids() (in module megatron.utils) get_mem_buff() (in module megatron.memory) get_model() (in module megatron.training) get_model_buffer_dp_views() (megatron.optimizer.distrib_optimizer.DistributedOptimizer static method) get_model_parallel_group() (in module megatron.core.parallel_state) (megatron.optimizer.distrib_optimizer.DistributedOptimizer method) (megatron.optimizer.optimizer.MegatronOptimizer method) get_model_param_range_map() (megatron.optimizer.distrib_optimizer.DistributedOptimizer method) get_model_provider() (in module megatron.model.biencoder_model) get_null_block() (megatron.data.ict_dataset.ICTDataset method) get_num_microbatches() (in module megatron.global_vars) get_one_epoch_dataloader() (in module megatron.data.biencoder_dataset_utils) (in module megatron.data.realm_dataset_utils) get_open_retrieval_batch() (in module megatron.data.orqa_wiki_dataset) get_open_retrieval_wiki_dataset() (in module megatron.data.orqa_wiki_dataset) get_pairs() (in module megatron.tokenizer.gpt2_tokenization) get_pipeline_model_parallel_first_rank() (in module megatron.core.parallel_state) get_pipeline_model_parallel_group() (in module megatron.core.parallel_state) get_pipeline_model_parallel_last_rank() (in module megatron.core.parallel_state) get_pipeline_model_parallel_next_rank() (in module megatron.core.parallel_state) get_pipeline_model_parallel_prev_rank() (in module megatron.core.parallel_state) get_pipeline_model_parallel_rank() (in module megatron.core.parallel_state) get_pipeline_model_parallel_world_size() (in module megatron.core.parallel_state) get_position_embedding_group() (in module megatron.core.parallel_state) get_rng_state() (in module megatron.checkpointing) get_samples_mapping() (in module megatron.data.dataset_utils) get_signal_handler() (in module megatron.global_vars) get_states() (megatron.core.tensor_parallel.random.CudaRNGStatesTracker method) get_tensor_model_parallel_group() (in module megatron.core.parallel_state) get_tensor_model_parallel_rank() (in module megatron.core.parallel_state) get_tensor_model_parallel_src_rank() (in module megatron.core.parallel_state) get_tensor_model_parallel_world_size() (in module megatron.core.parallel_state) get_tensor_shapes() (in module megatron.schedules) get_tensorboard_writer() (in module megatron.global_vars) get_timers() (in module megatron.global_vars) get_tokenizer() (in module megatron.global_vars) get_train_valid_test_split_() (in module megatron.data.dataset_utils) get_virtual_pipeline_model_parallel_rank() (in module megatron.core.parallel_state) get_virtual_pipeline_model_parallel_world_size() (in module megatron.core.parallel_state) get_wd() (megatron.optimizer_param_scheduler.OptimizerParamScheduler method) get_world_size() (in module megatron.dist_signal_handler) GlobalMemoryBuffer (class in megatron.core.utils) GPT2Tokenizer (class in megatron.tokenizer.gpt2_tokenization) GPTDataset (class in megatron.data.gpt_dataset) GPTModel (class in megatron.model.gpt_model) H has_file_allowed_extension() (in module megatron.data.image_folder) I ICTDataset (class in megatron.data.ict_dataset) ImageFolder (class in megatron.data.image_folder) ImageNetPolicy (class in megatron.data.autoaugment) index (megatron.data.dataset_utils.MaskedLmInstance attribute) index_file_path() (in module megatron.data.indexed_dataset) IndexBuilder (class in megatron.indexer) IndexedCachedDataset (class in megatron.data.indexed_dataset) IndexedDataset (class in megatron.data.indexed_dataset) IndexedDatasetBuilder (class in megatron.data.indexed_dataset) infer_dataset_impl() (in module megatron.data.indexed_dataset) InferenceParams (class in megatron.text_generation.forward_step) init_method_normal() (in module megatron.model.utils) init_state_dict_from_bert() (megatron.model.biencoder_model.BiEncoderModel method) initialize_megatron() (in module megatron.initialize) initialize_model_parallel() (in module megatron.core.parallel_state) inv_vocab (megatron.tokenizer.tokenizer.AbstractTokenizer property) is_done() (megatron.text_generation.beam_utils.BeamHypotheses method) is_image_file() (in module megatron.data.image_folder) is_in_use() (megatron.memory.MemoryBuffer method) is_last_local_rank() (in module megatron.utils) is_last_rank() (in module megatron.utils) is_pipeline_first_stage() (in module megatron.core.parallel_state) is_pipeline_last_stage() (in module megatron.core.parallel_state) is_pipeline_stage_after_split() (in module megatron.core.parallel_state) is_pipeline_stage_at_split() (in module megatron.core.parallel_state) is_pipeline_stage_before_split() (in module megatron.core.parallel_state) is_rank_in_embedding_group() (in module megatron.core.parallel_state) is_rank_in_position_embedding_group() (in module megatron.core.parallel_state) is_start_piece() (in module megatron.data.dataset_utils) J join_str_list() (in module megatron.data.biencoder_dataset_utils) (in module megatron.data.realm_dataset_utils) L label (megatron.data.dataset_utils.MaskedLmInstance attribute) LayerType (class in megatron.model.enums) LiGLU (class in megatron.model.glu_activations) linear_with_grad_accumulation_and_async_allreduce() (in module megatron.core.tensor_parallel.layers) LinearWithGradAccumulationAndAsyncCommunication (class in megatron.core.tensor_parallel.layers) LlamaModel (class in megatron.model.llama_model) load_args_from_checkpoint() (in module megatron.checkpointing) load_attributes() (megatron.indexer.IndexBuilder method) load_biencoder_checkpoint() (in module megatron.checkpointing) load_checkpoint() (in module megatron.checkpointing) load_from_file() (megatron.data.realm_index.OpenRetreivalDataStore method) load_state_dict() (megatron.model.bert_model.BertModel method) (megatron.model.biencoder_model.BiEncoderModel method) (megatron.model.biencoder_model.PretrainedBertModel method) (megatron.model.classification.Classification method) (megatron.model.distributed.DistributedDataParallelBase method) (megatron.model.gpt_model.GPTModel method) (megatron.model.language_model.Embedding method) (megatron.model.language_model.TransformerLanguageModel method) (megatron.model.module.Float16Module method) (megatron.model.multiple_choice.MultipleChoice method) (megatron.model.t5_model.T5Model method) (megatron.optimizer.distrib_optimizer.DistributedOptimizer method) load_vocab() (in module megatron.tokenizer.bert_tokenization) log() (megatron.timers.Timers method) M make_attention_mask() (in module megatron.data.biencoder_dataset_utils) (in module megatron.data.ict_dataset) (in module megatron.data.t5_dataset) make_attention_mask_3d() (in module megatron.data.t5_dataset) make_builder() (in module megatron.data.indexed_dataset) make_dataset() (in module megatron.data.image_folder) (in module megatron.data.indexed_dataset) make_history_mask() (in module megatron.data.t5_dataset) make_history_mask_3d() (in module megatron.data.t5_dataset) make_viewless_tensor() (in module megatron.core.utils) MakeViewlessTensor (class in megatron.core.utils) MaskedLmInstance (class in megatron.data.dataset_utils) megatron.arguments module megatron.checkpointing module megatron.core.parallel_state module megatron.core.tensor_parallel.cross_entropy module megatron.core.tensor_parallel.data module megatron.core.tensor_parallel.layers module megatron.core.tensor_parallel.mappings module megatron.core.tensor_parallel.random module megatron.core.tensor_parallel.utils module megatron.core.utils module megatron.data.autoaugment module megatron.data.bert_dataset module megatron.data.biencoder_dataset_utils module megatron.data.blendable_dataset module megatron.data.data_samplers module megatron.data.dataset_utils module megatron.data.gpt_dataset module megatron.data.ict_dataset module megatron.data.image_folder module megatron.data.indexed_dataset module megatron.data.orqa_wiki_dataset module megatron.data.realm_dataset_utils module megatron.data.realm_index module megatron.data.t5_dataset module megatron.dist_signal_handler module megatron.global_vars module megatron.indexer module megatron.initialize module megatron.memory module megatron.microbatches module megatron.model.bert_model module megatron.model.biencoder_model module megatron.model.classification module megatron.model.distributed module megatron.model.enums module megatron.model.falcon_model module megatron.model.fused_bias_gelu module megatron.model.fused_layer_norm module megatron.model.fused_softmax module megatron.model.glu_activations module megatron.model.gpt_model module megatron.model.language_model module megatron.model.llama_model module megatron.model.module module megatron.model.multiple_choice module megatron.model.positional_embeddings module megatron.model.t5_model module megatron.model.transformer module megatron.model.utils module megatron.optimizer.clip_grads module megatron.optimizer.distrib_optimizer module megatron.optimizer.grad_scaler module megatron.optimizer.optimizer module megatron.optimizer_param_scheduler module megatron.p2p_communication module megatron.schedules module megatron.text_generation.api module megatron.text_generation.beam_utils module megatron.text_generation.communication module megatron.text_generation.forward_step module megatron.text_generation.generation module megatron.text_generation.sampling module megatron.text_generation.tokenization module megatron.text_generation_server module megatron.timers module megatron.tokenizer.bert_tokenization module megatron.tokenizer.gpt2_tokenization module megatron.tokenizer.tokenizer module megatron.training module megatron.utils module megatron.wandb_logger module MegatronGenerate (class in megatron.text_generation_server) MegatronGradScaler (class in megatron.optimizer.grad_scaler) MegatronModule (class in megatron.model.module) MegatronOptimizer (class in megatron.optimizer.optimizer) MegatronPretrainingRandomSampler (class in megatron.data.data_samplers) MegatronPretrainingSampler (class in megatron.data.data_samplers) MegatronServer (class in megatron.text_generation_server) MemoryBuffer (class in megatron.memory) (class in megatron.model.distributed) methods (megatron.text_generation_server.MegatronGenerate attribute) MixedFusedLayerNorm (class in megatron.model.fused_layer_norm) MixedPrecisionOptimizer (class in megatron.optimizer.optimizer) MMapIndexedDataset (class in megatron.data.indexed_dataset) MMapIndexedDatasetBuilder (class in megatron.data.indexed_dataset) model_parallel_cuda_manual_seed() (in module megatron.core.tensor_parallel.random) model_parallel_is_initialized() (in module megatron.core.parallel_state) ModelType (class in megatron.model.enums) modify_logits_for_top_k_filtering() (in module megatron.text_generation.sampling) modify_logits_for_top_p_filtering() (in module megatron.text_generation.sampling) module megatron.arguments megatron.checkpointing megatron.core.parallel_state megatron.core.tensor_parallel.cross_entropy megatron.core.tensor_parallel.data megatron.core.tensor_parallel.layers megatron.core.tensor_parallel.mappings megatron.core.tensor_parallel.random megatron.core.tensor_parallel.utils megatron.core.utils megatron.data.autoaugment megatron.data.bert_dataset megatron.data.biencoder_dataset_utils megatron.data.blendable_dataset megatron.data.data_samplers megatron.data.dataset_utils megatron.data.gpt_dataset megatron.data.ict_dataset megatron.data.image_folder megatron.data.indexed_dataset megatron.data.orqa_wiki_dataset megatron.data.realm_dataset_utils megatron.data.realm_index megatron.data.t5_dataset megatron.dist_signal_handler megatron.global_vars megatron.indexer megatron.initialize megatron.memory megatron.microbatches megatron.model.bert_model megatron.model.biencoder_model megatron.model.classification megatron.model.distributed megatron.model.enums megatron.model.falcon_model megatron.model.fused_bias_gelu megatron.model.fused_layer_norm megatron.model.fused_softmax megatron.model.glu_activations megatron.model.gpt_model megatron.model.language_model megatron.model.llama_model megatron.model.module megatron.model.multiple_choice megatron.model.positional_embeddings megatron.model.t5_model megatron.model.transformer megatron.model.utils megatron.optimizer.clip_grads megatron.optimizer.distrib_optimizer megatron.optimizer.grad_scaler megatron.optimizer.optimizer megatron.optimizer_param_scheduler megatron.p2p_communication megatron.schedules megatron.text_generation.api megatron.text_generation.beam_utils megatron.text_generation.communication megatron.text_generation.forward_step megatron.text_generation.generation megatron.text_generation.sampling megatron.text_generation.tokenization megatron.text_generation_server megatron.timers megatron.tokenizer.bert_tokenization megatron.tokenizer.gpt2_tokenization megatron.tokenizer.tokenizer megatron.training megatron.utils megatron.wandb_logger MultipleChoice (class in megatron.model.multiple_choice) N NoopTransformerLayer (class in megatron.model.transformer) numel_in_use() (megatron.memory.MemoryBuffer method) NumMicroBatchesCalculator (class in megatron.microbatches) O OpenRetreivalDataStore (class in megatron.data.realm_index) OpenRetrievalEvidenceDataset (class in megatron.data.orqa_wiki_dataset) OptimizerParamScheduler (class in megatron.optimizer_param_scheduler) P pad_and_convert_to_numpy() (in module megatron.data.bert_dataset) (in module megatron.data.dataset_utils) (in module megatron.data.t5_dataset) parallel_lm_logits() (in module megatron.model.language_model) ParallelAttention (class in megatron.model.transformer) ParallelMLP (class in megatron.model.transformer) ParallelTransformer (class in megatron.model.transformer) ParallelTransformerLayer (class in megatron.model.transformer) param_is_not_shared() (in module megatron.model.module) param_is_not_tensor_parallel_duplicate() (in module megatron.core.tensor_parallel.layers) parse_args() (in module megatron.arguments) pil_loader() (in module megatron.data.image_folder) Pooler (class in megatron.model.language_model) PositionEmbeddingType (class in megatron.model.enums) post_language_model_processing() (in module megatron.model.bert_model) (in module megatron.model.gpt_model) precompute_freqs_cis() (in module megatron.model.positional_embeddings) pretrain() (in module megatron.training) PretrainedBertModel (class in megatron.model.biencoder_model) print_all_nodes() (in module megatron.utils) print_average_usage() (megatron.memory.MemoryBuffer method) print_datetime() (in module megatron.training) print_params_min_max_norm() (in module megatron.utils) print_rank_0() (in module megatron.utils) print_rank_last() (in module megatron.utils) printable_text() (in module megatron.tokenizer.bert_tokenization) R RampupBatchsizeNumMicroBatches (class in megatron.microbatches) RandomSeedDataset (class in megatron.data.data_samplers) Range (class in megatron.optimizer.distrib_optimizer) read_longs() (in module megatron.data.indexed_dataset) read_metadata() (in module megatron.checkpointing) rebuild_tokenizer() (in module megatron.global_vars) recv_backward() (in module megatron.p2p_communication) (in module megatron.schedules) recv_forward() (in module megatron.p2p_communication) (in module megatron.schedules) recv_from_prev_pipeline_rank_() (in module megatron.text_generation.communication) reduce_from_tensor_model_parallel_region() (in module megatron.core.tensor_parallel.mappings) reduce_model_grads() (megatron.optimizer.distrib_optimizer.DistributedOptimizer method) (megatron.optimizer.optimizer.MegatronOptimizer method) reduce_scatter_to_sequence_parallel_region() (in module megatron.core.tensor_parallel.mappings) ReGLU (class in megatron.model.glu_activations) reload_model_params() (megatron.optimizer.optimizer.FP32Optimizer method) (megatron.optimizer.optimizer.MegatronOptimizer method) (megatron.optimizer.optimizer.MixedPrecisionOptimizer method) report_memory() (in module megatron.utils) reset() (megatron.core.tensor_parallel.random.CudaRNGStatesTracker method) (megatron.memory.MemoryBuffer method) (megatron.timers.Timer method) reset_index() (megatron.data.realm_index.FaissMIPSIndex method) reshape_for_broadcast() (in module megatron.model.positional_embeddings) RingMemBuffer (class in megatron.memory) RMSNorm (class in megatron.model.fused_layer_norm) RowParallelLinear (class in megatron.core.tensor_parallel.layers) S safely_set_viewless_tensor_data() (in module megatron.core.utils) sample() (in module megatron.text_generation.sampling) save_checkpoint() (in module megatron.checkpointing) save_checkpoint_and_time() (in module megatron.training) save_shard() (megatron.data.realm_index.OpenRetreivalDataStore method) save_vocabulary() (megatron.tokenizer.gpt2_tokenization.GPT2Tokenizer method) scale_loss() (megatron.optimizer.optimizer.MegatronOptimizer method) scaled_init_method_normal() (in module megatron.model.utils) ScaledMaskedSoftmax (class in megatron.model.fused_softmax) ScaledSoftmax (class in megatron.model.fused_softmax) ScaledUpperTriangMaskedSoftmax (class in megatron.model.fused_softmax) scatter_to_sequence_parallel_region() (in module megatron.core.tensor_parallel.mappings) scatter_to_tensor_model_parallel_region() (in module megatron.core.tensor_parallel.mappings) score_and_return_on_first_stage() (in module megatron.text_generation.generation) search_mips_index() (megatron.data.realm_index.FaissMIPSIndex method) send_backward() (in module megatron.p2p_communication) (in module megatron.schedules) send_backward_recv_backward() (in module megatron.p2p_communication) send_backward_recv_forward() (in module megatron.p2p_communication) (in module megatron.schedules) send_forward() (in module megatron.p2p_communication) (in module megatron.schedules) send_forward_backward_recv_forward_backward() (in module megatron.p2p_communication) send_forward_recv_backward() (in module megatron.p2p_communication) (in module megatron.schedules) send_forward_recv_forward() (in module megatron.p2p_communication) send_to_next_pipeline_rank() (in module megatron.text_generation.communication) set_checkpoint_version() (in module megatron.checkpointing) set_defaults_if_not_set_tensor_model_parallel_attributes() (in module megatron.core.tensor_parallel.layers) set_global_variables() (in module megatron.global_vars) set_input_tensor() (megatron.model.bert_model.BertModel method) (megatron.model.biencoder_model.BiEncoderModel method) (megatron.model.classification.Classification method) (megatron.model.gpt_model.GPTModel method) (megatron.model.language_model.TransformerLanguageModel method) (megatron.model.multiple_choice.MultipleChoice method) (megatron.model.t5_model.T5Model method) (megatron.model.transformer.ParallelTransformer method) set_jit_fusion_options() (in module megatron.initialize) set_pipeline_model_parallel_rank() (in module megatron.core.parallel_state) set_pipeline_model_parallel_split_rank() (in module megatron.core.parallel_state) set_pipeline_model_parallel_world_size() (in module megatron.core.parallel_state) set_special_tokens() (megatron.tokenizer.gpt2_tokenization.GPT2Tokenizer method) set_states() (megatron.core.tensor_parallel.random.CudaRNGStatesTracker method) set_tensor_model_parallel_attributes() (in module megatron.core.tensor_parallel.layers) set_tensor_model_parallel_rank() (in module megatron.core.parallel_state) set_tensor_model_parallel_world_size() (in module megatron.core.parallel_state) set_virtual_pipeline_model_parallel_rank() (in module megatron.core.parallel_state) split_tensor_along_last_dim() (in module megatron.core.tensor_parallel.utils) split_tensor_into_1d_equal_chunks() (in module megatron.core.tensor_parallel.utils) start() (megatron.timers.Timer method) state_dict() (megatron.model.distributed.DistributedDataParallelBase method) (megatron.model.module.Float16Module method) (megatron.optimizer.distrib_optimizer.DistributedOptimizer method) state_dict_for_save_checkpoint() (megatron.model.bert_model.BertModel method) (megatron.model.biencoder_model.BiEncoderModel method) (megatron.model.biencoder_model.PretrainedBertModel method) (megatron.model.classification.Classification method) (megatron.model.distributed.DistributedDataParallelBase method) (megatron.model.gpt_model.GPTModel method) (megatron.model.language_model.Embedding method) (megatron.model.language_model.TransformerLanguageModel method) (megatron.model.module.Float16Module method) (megatron.model.module.MegatronModule method) (megatron.model.multiple_choice.MultipleChoice method) (megatron.model.t5_model.T5Model method) step() (megatron.optimizer.optimizer.FP32Optimizer method) (megatron.optimizer_param_scheduler.OptimizerParamScheduler method) stop() (megatron.timers.Timer method) SubPolicy (class in megatron.data.autoaugment) swap_key_value_dict() (megatron.text_generation.forward_step.InferenceParams method) SwiGLU (class in megatron.model.glu_activations) T t5_extended_attention_mask() (in module megatron.model.t5_model) t5_position_ids() (in module megatron.model.t5_model) T5Dataset (class in megatron.data.t5_dataset) T5LMHead (class in megatron.model.t5_model) T5Model (class in megatron.model.t5_model) Timer (class in megatron.timers) TimerBase (class in megatron.timers) Timers (class in megatron.timers) tokenize() (megatron.tokenizer.bert_tokenization.BasicTokenizer method) (megatron.tokenizer.bert_tokenization.WordpieceTokenizer method) (megatron.tokenizer.gpt2_tokenization.GPT2Tokenizer method) tokenize_prompts() (in module megatron.text_generation.tokenization) toy_test() (in module megatron.wandb_logger) track_and_report_progress() (megatron.indexer.IndexBuilder method) train_step() (in module megatron.training) training_log() (in module megatron.training) TransformerLanguageModel (class in megatron.model.language_model) truncate_segments() (in module megatron.data.dataset_utils) try_catch_guard() (in module megatron.wandb_logger) U unwrap_model() (in module megatron.utils) update_index() (megatron.data.realm_index.FaissMIPSIndex method) update_num_microbatches() (in module megatron.global_vars) V validate_args() (in module megatron.arguments) validate_case_matches_checkpoint() (in module megatron.tokenizer.bert_tokenization) vocab (megatron.tokenizer.tokenizer.AbstractTokenizer property) vocab_parallel_cross_entropy() (in module megatron.core.tensor_parallel.cross_entropy) vocab_parallel_max_indices() (in module megatron.core.tensor_parallel.cross_entropy) VocabParallelEmbedding (class in megatron.core.tensor_parallel.layers) VocabUtility (class in megatron.core.tensor_parallel.utils) W WandBConfig (class in megatron.wandb_logger) WandbTBShim (class in megatron.wandb_logger) whitespace_tokenize() (in module megatron.tokenizer.bert_tokenization) WordpieceTokenizer (class in megatron.tokenizer.bert_tokenization) write() (megatron.timers.Timers method) write_args_to_tensorboard() (in module megatron.initialize) write_longs() (in module megatron.data.indexed_dataset) Z zero() (megatron.model.distributed.MemoryBuffer method) zero_grad() (megatron.optimizer.distrib_optimizer.DistributedOptimizer method) (megatron.optimizer.optimizer.Float16OptimizerWithFloat16Params method) (megatron.optimizer.optimizer.FP32Optimizer method) zero_grad_buffer() (megatron.model.distributed.DistributedDataParallel method) zero_parameters() (megatron.model.language_model.Embedding method)