Skip to main content
Back to top
Ctrl
+
K
Megatron-LLM 0.1.0 documentation
User guide
API
User guide
API
Section Navigation
megatron.arguments
megatron.arguments.build_base_parser
megatron.arguments.parse_args
megatron.arguments.validate_args
megatron.checkpointing
megatron.checkpointing.check_checkpoint_args
megatron.checkpointing.ensure_directory_exists
megatron.checkpointing.find_checkpoint_rank_0
megatron.checkpointing.fix_query_key_value_ordering
megatron.checkpointing.get_checkpoint_name
megatron.checkpointing.get_checkpoint_names
megatron.checkpointing.get_checkpoint_tracker_filename
megatron.checkpointing.get_checkpoint_version
megatron.checkpointing.get_rng_state
megatron.checkpointing.load_args_from_checkpoint
megatron.checkpointing.load_biencoder_checkpoint
megatron.checkpointing.load_checkpoint
megatron.checkpointing.read_metadata
megatron.checkpointing.save_checkpoint
megatron.checkpointing.set_checkpoint_version
megatron.dist_signal_handler
megatron.dist_signal_handler.DistributedSignalHandler
megatron.dist_signal_handler.all_gather_item
megatron.dist_signal_handler.get_device
megatron.dist_signal_handler.get_world_size
megatron.global_vars
megatron.global_vars.get_adlr_autoresume
megatron.global_vars.get_args
megatron.global_vars.get_counters
megatron.global_vars.get_current_global_batch_size
megatron.global_vars.get_num_microbatches
megatron.global_vars.get_signal_handler
megatron.global_vars.get_tensorboard_writer
megatron.global_vars.get_timers
megatron.global_vars.get_tokenizer
megatron.global_vars.rebuild_tokenizer
megatron.global_vars.set_global_variables
megatron.global_vars.update_num_microbatches
megatron.indexer
megatron.indexer.IndexBuilder
megatron.initialize
megatron.initialize.initialize_megatron
megatron.initialize.set_jit_fusion_options
megatron.initialize.write_args_to_tensorboard
megatron.memory
megatron.memory.MemoryBuffer
megatron.memory.RingMemBuffer
megatron.memory.allocate_mem_buff
megatron.memory.get_mem_buff
megatron.microbatches
megatron.microbatches.ConstantNumMicroBatches
megatron.microbatches.NumMicroBatchesCalculator
megatron.microbatches.RampupBatchsizeNumMicroBatches
megatron.microbatches.build_num_microbatches_calculator
megatron.optimizer_param_scheduler
megatron.optimizer_param_scheduler.OptimizerParamScheduler
megatron.p2p_communication
megatron.p2p_communication.recv_backward
megatron.p2p_communication.recv_forward
megatron.p2p_communication.send_backward
megatron.p2p_communication.send_backward_recv_backward
megatron.p2p_communication.send_backward_recv_forward
megatron.p2p_communication.send_forward
megatron.p2p_communication.send_forward_backward_recv_forward_backward
megatron.p2p_communication.send_forward_recv_backward
megatron.p2p_communication.send_forward_recv_forward
megatron.schedules
megatron.schedules.backward_step
megatron.schedules.custom_backward
megatron.schedules.deallocate_output_tensor
megatron.schedules.dummy_handler
megatron.schedules.forward_backward_no_pipelining
megatron.schedules.forward_backward_pipelining_with_interleaving
megatron.schedules.forward_backward_pipelining_without_interleaving
megatron.schedules.forward_step
megatron.schedules.get_forward_backward_func
megatron.schedules.get_tensor_shapes
megatron.schedules.recv_backward
megatron.schedules.recv_forward
megatron.schedules.send_backward
megatron.schedules.send_backward_recv_forward
megatron.schedules.send_forward
megatron.schedules.send_forward_recv_backward
megatron.text_generation_server
megatron.text_generation_server.MegatronGenerate
megatron.text_generation_server.MegatronServer
megatron.timers
megatron.timers.DummyTimer
megatron.timers.Timer
megatron.timers.TimerBase
megatron.timers.Timers
megatron.training
megatron.training.build_train_valid_test_data_iterators
megatron.training.cyclic_iter
megatron.training.evaluate
megatron.training.evaluate_and_print_results
megatron.training.get_model
megatron.training.pretrain
megatron.training.print_datetime
megatron.training.save_checkpoint_and_time
megatron.training.train_step
megatron.training.training_log
megatron.utils
megatron.utils.average_losses_across_data_parallel_group
megatron.utils.calc_params_l2_norm
megatron.utils.check_adlr_autoresume_termination
megatron.utils.get_ltor_masks_and_position_ids
megatron.utils.is_last_local_rank
megatron.utils.is_last_rank
megatron.utils.print_all_nodes
megatron.utils.print_params_min_max_norm
megatron.utils.print_rank_0
megatron.utils.print_rank_last
megatron.utils.report_memory
megatron.utils.unwrap_model
megatron.wandb_logger
megatron.wandb_logger.WandBConfig
megatron.wandb_logger.WandbTBShim
megatron.wandb_logger.toy_test
megatron.wandb_logger.try_catch_guard
megatron.core.parallel_state
megatron.core.parallel_state.destroy_model_parallel
megatron.core.parallel_state.get_data_parallel_group
megatron.core.parallel_state.get_data_parallel_rank
megatron.core.parallel_state.get_data_parallel_src_rank
megatron.core.parallel_state.get_data_parallel_world_size
megatron.core.parallel_state.get_embedding_group
megatron.core.parallel_state.get_global_memory_buffer
megatron.core.parallel_state.get_model_parallel_group
megatron.core.parallel_state.get_pipeline_model_parallel_first_rank
megatron.core.parallel_state.get_pipeline_model_parallel_group
megatron.core.parallel_state.get_pipeline_model_parallel_last_rank
megatron.core.parallel_state.get_pipeline_model_parallel_next_rank
megatron.core.parallel_state.get_pipeline_model_parallel_prev_rank
megatron.core.parallel_state.get_pipeline_model_parallel_rank
megatron.core.parallel_state.get_pipeline_model_parallel_world_size
megatron.core.parallel_state.get_position_embedding_group
megatron.core.parallel_state.get_tensor_model_parallel_group
megatron.core.parallel_state.get_tensor_model_parallel_rank
megatron.core.parallel_state.get_tensor_model_parallel_src_rank
megatron.core.parallel_state.get_tensor_model_parallel_world_size
megatron.core.parallel_state.get_virtual_pipeline_model_parallel_rank
megatron.core.parallel_state.get_virtual_pipeline_model_parallel_world_size
megatron.core.parallel_state.initialize_model_parallel
megatron.core.parallel_state.is_pipeline_first_stage
megatron.core.parallel_state.is_pipeline_last_stage
megatron.core.parallel_state.is_pipeline_stage_after_split
megatron.core.parallel_state.is_pipeline_stage_at_split
megatron.core.parallel_state.is_pipeline_stage_before_split
megatron.core.parallel_state.is_rank_in_embedding_group
megatron.core.parallel_state.is_rank_in_position_embedding_group
megatron.core.parallel_state.model_parallel_is_initialized
megatron.core.parallel_state.set_pipeline_model_parallel_rank
megatron.core.parallel_state.set_pipeline_model_parallel_split_rank
megatron.core.parallel_state.set_pipeline_model_parallel_world_size
megatron.core.parallel_state.set_tensor_model_parallel_rank
megatron.core.parallel_state.set_tensor_model_parallel_world_size
megatron.core.parallel_state.set_virtual_pipeline_model_parallel_rank
megatron.core.utils
megatron.core.utils.GlobalMemoryBuffer
megatron.core.utils.MakeViewlessTensor
megatron.core.utils.assert_viewless_tensor
megatron.core.utils.divide
megatron.core.utils.ensure_divisibility
megatron.core.utils.make_viewless_tensor
megatron.core.utils.safely_set_viewless_tensor_data
megatron.core.tensor_parallel.cross_entropy
megatron.core.tensor_parallel.cross_entropy.vocab_parallel_cross_entropy
megatron.core.tensor_parallel.cross_entropy.vocab_parallel_max_indices
megatron.core.tensor_parallel.data
megatron.core.tensor_parallel.data.broadcast_data
megatron.core.tensor_parallel.layers
megatron.core.tensor_parallel.layers.ColumnParallelLinear
megatron.core.tensor_parallel.layers.LinearWithGradAccumulationAndAsyncCommunication
megatron.core.tensor_parallel.layers.RowParallelLinear
megatron.core.tensor_parallel.layers.VocabParallelEmbedding
megatron.core.tensor_parallel.layers.copy_tensor_model_parallel_attributes
megatron.core.tensor_parallel.layers.linear_with_grad_accumulation_and_async_allreduce
megatron.core.tensor_parallel.layers.param_is_not_tensor_parallel_duplicate
megatron.core.tensor_parallel.layers.set_defaults_if_not_set_tensor_model_parallel_attributes
megatron.core.tensor_parallel.layers.set_tensor_model_parallel_attributes
megatron.core.tensor_parallel.mappings
megatron.core.tensor_parallel.mappings.copy_to_tensor_model_parallel_region
megatron.core.tensor_parallel.mappings.gather_from_sequence_parallel_region
megatron.core.tensor_parallel.mappings.gather_from_tensor_model_parallel_region
megatron.core.tensor_parallel.mappings.reduce_from_tensor_model_parallel_region
megatron.core.tensor_parallel.mappings.reduce_scatter_to_sequence_parallel_region
megatron.core.tensor_parallel.mappings.scatter_to_sequence_parallel_region
megatron.core.tensor_parallel.mappings.scatter_to_tensor_model_parallel_region
megatron.core.tensor_parallel.random
megatron.core.tensor_parallel.random.CheckpointFunction
megatron.core.tensor_parallel.random.CudaRNGStatesTracker
megatron.core.tensor_parallel.random.checkpoint
megatron.core.tensor_parallel.random.get_cuda_rng_tracker
megatron.core.tensor_parallel.random.model_parallel_cuda_manual_seed
megatron.core.tensor_parallel.utils
megatron.core.tensor_parallel.utils.VocabUtility
megatron.core.tensor_parallel.utils.gather_split_1d_tensor
megatron.core.tensor_parallel.utils.split_tensor_along_last_dim
megatron.core.tensor_parallel.utils.split_tensor_into_1d_equal_chunks
megatron.data.autoaugment
megatron.data.autoaugment.ImageNetPolicy
megatron.data.autoaugment.SubPolicy
megatron.data.blendable_dataset
megatron.data.blendable_dataset.BlendableDataset
megatron.data.gpt_dataset
megatron.data.gpt_dataset.GPTDataset
megatron.data.gpt_dataset.build_train_valid_test_datasets
megatron.data.gpt_dataset.get_indexed_dataset_
megatron.data.image_folder
megatron.data.image_folder.DatasetFolder
megatron.data.image_folder.ImageFolder
megatron.data.image_folder.accimage_loader
megatron.data.image_folder.default_loader
megatron.data.image_folder.has_file_allowed_extension
megatron.data.image_folder.is_image_file
megatron.data.image_folder.make_dataset
megatron.data.image_folder.pil_loader
megatron.data.realm_dataset_utils
megatron.data.realm_dataset_utils.BlockSampleData
megatron.data.realm_dataset_utils.BlockSamplesMapping
megatron.data.realm_dataset_utils.get_block_samples_mapping
megatron.data.realm_dataset_utils.get_ict_batch
megatron.data.realm_dataset_utils.get_one_epoch_dataloader
megatron.data.realm_dataset_utils.join_str_list
megatron.data.bert_dataset
megatron.data.bert_dataset.BertDataset
megatron.data.bert_dataset.build_training_sample
megatron.data.bert_dataset.pad_and_convert_to_numpy
megatron.data.data_samplers
megatron.data.data_samplers.MegatronPretrainingRandomSampler
megatron.data.data_samplers.MegatronPretrainingSampler
megatron.data.data_samplers.RandomSeedDataset
megatron.data.data_samplers.build_pretraining_data_loader
megatron.data.indexed_dataset
megatron.data.indexed_dataset.IndexedCachedDataset
megatron.data.indexed_dataset.IndexedDataset
megatron.data.indexed_dataset.IndexedDatasetBuilder
megatron.data.indexed_dataset.MMapIndexedDataset
megatron.data.indexed_dataset.MMapIndexedDatasetBuilder
megatron.data.indexed_dataset.code
megatron.data.indexed_dataset.create_doc_idx
megatron.data.indexed_dataset.data_file_path
megatron.data.indexed_dataset.dataset_exists
megatron.data.indexed_dataset.get_available_dataset_impl
megatron.data.indexed_dataset.index_file_path
megatron.data.indexed_dataset.infer_dataset_impl
megatron.data.indexed_dataset.make_builder
megatron.data.indexed_dataset.make_dataset
megatron.data.indexed_dataset.read_longs
megatron.data.indexed_dataset.write_longs
megatron.data.orqa_wiki_dataset
megatron.data.orqa_wiki_dataset.OpenRetrievalEvidenceDataset
megatron.data.orqa_wiki_dataset.build_sample
megatron.data.orqa_wiki_dataset.build_tokens_types_paddings_from_ids
megatron.data.orqa_wiki_dataset.build_tokens_types_paddings_from_text
megatron.data.orqa_wiki_dataset.get_open_retrieval_batch
megatron.data.orqa_wiki_dataset.get_open_retrieval_wiki_dataset
megatron.data.realm_index
megatron.data.realm_index.FaissMIPSIndex
megatron.data.realm_index.OpenRetreivalDataStore
megatron.data.realm_index.detach
megatron.data.biencoder_dataset_utils
megatron.data.biencoder_dataset_utils.BlockSampleData
megatron.data.biencoder_dataset_utils.BlockSamplesMapping
megatron.data.biencoder_dataset_utils.get_block_samples_mapping
megatron.data.biencoder_dataset_utils.get_ict_batch
megatron.data.biencoder_dataset_utils.get_one_epoch_dataloader
megatron.data.biencoder_dataset_utils.join_str_list
megatron.data.biencoder_dataset_utils.make_attention_mask
megatron.data.dataset_utils
megatron.data.dataset_utils.MaskedLmInstance
megatron.data.dataset_utils.build_train_valid_test_datasets
megatron.data.dataset_utils.compile_helper
megatron.data.dataset_utils.create_masked_lm_predictions
megatron.data.dataset_utils.create_tokens_and_tokentypes
megatron.data.dataset_utils.get_a_and_b_segments
megatron.data.dataset_utils.get_datasets_weights_and_num_samples
megatron.data.dataset_utils.get_indexed_dataset_
megatron.data.dataset_utils.get_samples_mapping
megatron.data.dataset_utils.get_train_valid_test_split_
megatron.data.dataset_utils.is_start_piece
megatron.data.dataset_utils.pad_and_convert_to_numpy
megatron.data.dataset_utils.truncate_segments
megatron.data.ict_dataset
megatron.data.ict_dataset.ICTDataset
megatron.data.ict_dataset.get_ict_dataset
megatron.data.ict_dataset.make_attention_mask
megatron.data.t5_dataset
megatron.data.t5_dataset.T5Dataset
megatron.data.t5_dataset.build_training_sample
megatron.data.t5_dataset.make_attention_mask
megatron.data.t5_dataset.make_attention_mask_3d
megatron.data.t5_dataset.make_history_mask
megatron.data.t5_dataset.make_history_mask_3d
megatron.data.t5_dataset.pad_and_convert_to_numpy
megatron.model.bert_model
megatron.model.bert_model.BertLMHead
megatron.model.bert_model.BertModel
megatron.model.bert_model.bert_extended_attention_mask
megatron.model.bert_model.bert_position_ids
megatron.model.bert_model.post_language_model_processing
megatron.model.biencoder_model
megatron.model.biencoder_model.BiEncoderModel
megatron.model.biencoder_model.PretrainedBertModel
megatron.model.biencoder_model.biencoder_model_provider
megatron.model.biencoder_model.get_model_provider
megatron.model.classification
megatron.model.classification.Classification
megatron.model.distributed
megatron.model.distributed.DistributedDataParallel
megatron.model.distributed.DistributedDataParallelBase
megatron.model.distributed.MemoryBuffer
megatron.model.enums
megatron.model.enums.AttnMaskType
megatron.model.enums.AttnType
megatron.model.enums.LayerType
megatron.model.enums.ModelType
megatron.model.enums.PositionEmbeddingType
megatron.model.falcon_model
megatron.model.falcon_model.FalconModel
megatron.model.fused_bias_gelu
megatron.model.fused_bias_gelu.GeLUFunction
megatron.model.fused_layer_norm
megatron.model.fused_layer_norm.FusedLayerNormAffineFunction
megatron.model.fused_layer_norm.MixedFusedLayerNorm
megatron.model.fused_layer_norm.RMSNorm
megatron.model.fused_softmax
megatron.model.fused_softmax.FusedScaleMaskSoftmax
megatron.model.fused_softmax.ScaledMaskedSoftmax
megatron.model.fused_softmax.ScaledSoftmax
megatron.model.fused_softmax.ScaledUpperTriangMaskedSoftmax
megatron.model.glu_activations
megatron.model.glu_activations.GEGLU
megatron.model.glu_activations.LiGLU
megatron.model.glu_activations.ReGLU
megatron.model.glu_activations.SwiGLU
megatron.model.gpt_model
megatron.model.gpt_model.GPTModel
megatron.model.gpt_model.post_language_model_processing
megatron.model.language_model
megatron.model.language_model.Embedding
megatron.model.language_model.Pooler
megatron.model.language_model.TransformerLanguageModel
megatron.model.language_model.get_language_model
megatron.model.language_model.parallel_lm_logits
megatron.model.llama_model
megatron.model.llama_model.LlamaModel
megatron.model.module
megatron.model.module.Float16Module
megatron.model.module.MegatronModule
megatron.model.module.conversion_helper
megatron.model.module.float16_to_fp32
megatron.model.module.fp32_to_float16
megatron.model.module.param_is_not_shared
megatron.model.multiple_choice
megatron.model.multiple_choice.MultipleChoice
megatron.model.positional_embeddings
megatron.model.positional_embeddings.apply_rotary_emb
megatron.model.positional_embeddings.precompute_freqs_cis
megatron.model.positional_embeddings.reshape_for_broadcast
megatron.model.t5_model
megatron.model.t5_model.T5LMHead
megatron.model.t5_model.T5Model
megatron.model.t5_model.t5_extended_attention_mask
megatron.model.t5_model.t5_position_ids
megatron.model.transformer
megatron.model.transformer.CoreAttention
megatron.model.transformer.DropPath
megatron.model.transformer.NoopTransformerLayer
megatron.model.transformer.ParallelAttention
megatron.model.transformer.ParallelMLP
megatron.model.transformer.ParallelTransformer
megatron.model.transformer.ParallelTransformerLayer
megatron.model.transformer.bias_dropout_add
megatron.model.transformer.dropout_add
megatron.model.transformer.get_bias_dropout_add
megatron.model.transformer.get_dropout_add
megatron.model.utils
megatron.model.utils.attention_mask_func
megatron.model.utils.get_linear_layer
megatron.model.utils.init_method_normal
megatron.model.utils.scaled_init_method_normal
megatron.optimizer.clip_grads
megatron.optimizer.clip_grads.clip_grad_norm_fp32
megatron.optimizer.clip_grads.count_zeros_fp32
megatron.optimizer.distrib_optimizer
megatron.optimizer.distrib_optimizer.DistributedOptimizer
megatron.optimizer.distrib_optimizer.Range
megatron.optimizer.grad_scaler
megatron.optimizer.grad_scaler.ConstantGradScaler
megatron.optimizer.grad_scaler.DynamicGradScaler
megatron.optimizer.grad_scaler.MegatronGradScaler
megatron.optimizer.optimizer
megatron.optimizer.optimizer.FP32Optimizer
megatron.optimizer.optimizer.Float16OptimizerWithFloat16Params
megatron.optimizer.optimizer.MegatronOptimizer
megatron.optimizer.optimizer.MixedPrecisionOptimizer
megatron.text_generation.api
megatron.text_generation.api.beam_search
megatron.text_generation.api.beam_search_and_post_process
megatron.text_generation.api.generate
megatron.text_generation.api.generate_and_post_process
megatron.text_generation.beam_utils
megatron.text_generation.beam_utils.BeamHypotheses
megatron.text_generation.communication
megatron.text_generation.communication.broadcast_float_list
megatron.text_generation.communication.broadcast_from_last_pipeline_stage
megatron.text_generation.communication.broadcast_from_last_to_first_pipeline_stage
megatron.text_generation.communication.broadcast_int_list
megatron.text_generation.communication.broadcast_list
megatron.text_generation.communication.broadcast_tensor
megatron.text_generation.communication.copy_from_last_to_first_pipeline_stage
megatron.text_generation.communication.recv_from_prev_pipeline_rank_
megatron.text_generation.communication.send_to_next_pipeline_rank
megatron.text_generation.forward_step
megatron.text_generation.forward_step.ForwardStep
megatron.text_generation.forward_step.InferenceParams
megatron.text_generation.generation
megatron.text_generation.generation.beam_search_and_return_on_first_stage
megatron.text_generation.generation.generate_tokens_probs_and_return_on_first_stage
megatron.text_generation.generation.score_and_return_on_first_stage
megatron.text_generation.sampling
megatron.text_generation.sampling.modify_logits_for_top_k_filtering
megatron.text_generation.sampling.modify_logits_for_top_p_filtering
megatron.text_generation.sampling.sample
megatron.text_generation.tokenization
megatron.text_generation.tokenization.detokenize_generations
megatron.text_generation.tokenization.tokenize_prompts
megatron.tokenizer.bert_tokenization
megatron.tokenizer.bert_tokenization.BasicTokenizer
megatron.tokenizer.bert_tokenization.FullTokenizer
megatron.tokenizer.bert_tokenization.WordpieceTokenizer
megatron.tokenizer.bert_tokenization.convert_by_vocab
megatron.tokenizer.bert_tokenization.convert_ids_to_tokens
megatron.tokenizer.bert_tokenization.convert_to_unicode
megatron.tokenizer.bert_tokenization.convert_tokens_to_ids
megatron.tokenizer.bert_tokenization.load_vocab
megatron.tokenizer.bert_tokenization.printable_text
megatron.tokenizer.bert_tokenization.validate_case_matches_checkpoint
megatron.tokenizer.bert_tokenization.whitespace_tokenize
megatron.tokenizer.gpt2_tokenization
megatron.tokenizer.gpt2_tokenization.GPT2Tokenizer
megatron.tokenizer.gpt2_tokenization.bytes_to_unicode
megatron.tokenizer.gpt2_tokenization.get_pairs
megatron.tokenizer.tokenizer
megatron.tokenizer.tokenizer.AbstractTokenizer
megatron.tokenizer.tokenizer.build_tokenizer
API
megatron.global_vars
megatron.glo...
megatron.global_vars.get_num_microbatches
#
megatron.global_vars.
get_num_microbatches
(
)
#
On this page
get_num_microbatches()
Show Source