跳到内容

配方

ESM2Recipes

基类: BaseModel

ESM2 的预制配方。

此 PYDANTIC 模型不用于序列化。仅用于方便 argparse。每个配方应将 `args` 作为唯一参数。我们使用 partials 以便在运行时提供此信息。向此模型添加新配方。

源代码位于 `bionemo/esm2/run/recipes.py` 中
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
class ESM2Recipes(BaseModel):
    """Pre-baked recipes for ESM2.

    THIS PYDANTIC MODEL IS NOT MEANT FOR SERIALIZATION. Only used to facilitate argparse. Each recipe should take `args`
    as the only argument. We use partials so we can provide this information at runtime. Add new recipes to this model.
    """

    # Use partials so we can still parameterize the recipes from the CLI (e.g. data paths.)
    esm2_tiny_test_recipe: Callable[[argparse.Namespace], MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig]] = (
        partial(esm2_tiny_test_recipe)
    )
    esm2_8m_recipe: Callable[[argparse.Namespace], MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig]] = partial(
        esm2_8m_recipe
    )
    esm2_650m_recipe: Callable[[argparse.Namespace], MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig]] = partial(
        esm2_650m_recipe
    )
    esm2_3b_recipe: Callable[[argparse.Namespace], MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig]] = partial(
        esm2_3b_recipe
    )

default_adam_optimizer_with_cosine_annealing_recipe(max_steps=None)

ESM2 的默认优化器调度器配置。

源代码位于 `bionemo/esm2/run/recipes.py` 中
291
292
293
def default_adam_optimizer_with_cosine_annealing_recipe(max_steps: Optional[int] = None) -> OptimizerSchedulerConfig:
    """Default optimizer scheduler config for ESM2."""
    return OptimizerSchedulerConfig(max_steps=max_steps)

esm2_3b_experiment_config(result_dir)

ESM2 650m 的实验配置。

源代码位于 `bionemo/esm2/run/recipes.py` 中
244
245
246
247
248
249
250
251
252
def esm2_3b_experiment_config(result_dir) -> ExperimentConfig:
    """Experiment config for ESM2 650m."""
    return ExperimentConfig(
        save_every_n_steps=50,
        result_dir=result_dir,
        experiment_name="esm2-3b-pretraining",
        # TODO should this be exposed?
        restore_from_checkpoint_path=None,
    )

esm2_3b_model_config(initial_ckpt_path=None)

ESM2 3b 的模型配置。

源代码位于 `bionemo/esm2/run/recipes.py` 中
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
def esm2_3b_model_config(initial_ckpt_path=None) -> ExposedESM2PretrainConfig:
    """Model config for ESM2 3b."""
    return ExposedESM2PretrainConfig(
        num_layers=36,
        hidden_size=2560,
        ffn_hidden_size=2560 * 4,
        num_attention_heads=40,
        seq_length=1024,
        biobert_spec_option=BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec,
        initial_ckpt_path=initial_ckpt_path,
        get_attention_mask_from_fusion=True,
        params_dtype="bf16-mixed",
        pipeline_dtype="bf16-mixed",
        autocast_dtype="bf16-mixed",
    )

esm2_3b_parallel_config()

ESM2 3b 的并行配置。

源代码位于 `bionemo/esm2/run/recipes.py` 中
200
201
202
203
204
205
206
207
208
209
210
def esm2_3b_parallel_config() -> ParallelConfig:
    """Parallel config for ESM2 3b."""
    return ParallelConfig(
        tensor_model_parallel_size=2,
        pipeline_model_parallel_size=1,
        # TODO: is this correct?
        accumulate_grad_batches=1,
        ddp="megatron",
        # NOTE assumes 8xGPU node. Can always edit the config.
        num_devices=8,
    )

esm2_3b_recipe(args)

ESM2 3b 的配方。

源代码位于 `bionemo/esm2/run/recipes.py` 中
255
256
257
258
259
260
261
262
263
264
265
def esm2_3b_recipe(args) -> MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig]:
    """Recipe for ESM2 3b."""
    return MainConfig(
        data_config=esm2_base_data_config(args),
        parallel_config=esm2_3b_parallel_config(),
        training_config=esm2_base_training_config(max_steps=args.max_steps),  # no changes for 8m
        bionemo_model_config=esm2_3b_model_config(args.initial_ckpt_path),
        optim_config=esm2_base_optimizer_scheduler_config(max_steps=args.scheduler_max_steps),  # no changes for 8m
        experiment_config=esm2_3b_experiment_config(args.result_dir),
        wandb_config=esm2_3b_wandb_config(),
    )

esm2_3b_wandb_config()

ESM2 3b 的 Wandb 配置。

源代码位于 `bionemo/esm2/run/recipes.py` 中
230
231
232
233
234
235
236
237
238
239
240
241
def esm2_3b_wandb_config() -> WandbConfig:
    """Wandb config for ESM2 3b."""
    return WandbConfig(
        entity="esm2-3b_pretraining",
        project="esm2-3b_pretraining",
        group="esm2-3b",
        tags=["esm2-650m"],
        offline=True,
        anonymous=True,
        id="1",
        log_model=False,
    )

esm2_650m_experiment_config(result_dir)

ESM2 650m 的实验配置。

源代码位于 `bionemo/esm2/run/recipes.py` 中
176
177
178
179
180
181
182
183
184
def esm2_650m_experiment_config(result_dir) -> ExperimentConfig:
    """Experiment config for ESM2 650m."""
    return ExperimentConfig(
        save_every_n_steps=50,
        result_dir=result_dir,
        experiment_name="esm2-650m-pretraining",
        # TODO should this be exposed?
        restore_from_checkpoint_path=None,
    )

esm2_650m_model_config(initial_ckpt_path=None)

ESM2 650m 的模型配置。

源代码位于 `bionemo/esm2/run/recipes.py` 中
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
def esm2_650m_model_config(initial_ckpt_path=None) -> ExposedESM2PretrainConfig:
    """Model config for ESM2 650m."""
    return ExposedESM2PretrainConfig(
        num_layers=33,
        hidden_size=1280,
        ffn_hidden_size=1280 * 4,
        seq_length=1024,
        num_attention_heads=20,
        biobert_spec_option=BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec,
        initial_ckpt_path=initial_ckpt_path,
        get_attention_mask_from_fusion=True,
        params_dtype="bf16-mixed",
        pipeline_dtype="bf16-mixed",
        autocast_dtype="bf16-mixed",
    )

esm2_650m_recipe(args)

ESM2 650m 的配方。

源代码位于 `bionemo/esm2/run/recipes.py` 中
187
188
189
190
191
192
193
194
195
196
197
def esm2_650m_recipe(args) -> MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig]:
    """Recipe for ESM2 650m."""
    return MainConfig(
        data_config=esm2_base_data_config(args),
        parallel_config=esm2_base_parallel_config(),
        training_config=esm2_base_training_config(max_steps=args.max_steps),  # no changes for 8m
        bionemo_model_config=esm2_650m_model_config(args.initial_ckpt_path),
        optim_config=esm2_base_optimizer_scheduler_config(max_steps=args.scheduler_max_steps),  # no changes for 8m
        experiment_config=esm2_650m_experiment_config(args.result_dir),
        wandb_config=esm2_650m_wandb_config(),
    )

esm2_650m_wandb_config()

ESM2 650m 的 Wandb 配置。

源代码位于 `bionemo/esm2/run/recipes.py` 中
162
163
164
165
166
167
168
169
170
171
172
173
def esm2_650m_wandb_config() -> WandbConfig:
    """Wandb config for ESM2 650m."""
    return WandbConfig(
        entity="esm2-650m_pretraining",
        project="esm2-650m_pretraining",
        group="esm2-650m",
        tags=["esm2", "pretraining"],
        offline=True,
        anonymous=True,
        id="1",
        log_model=False,
    )

esm2_8m_experiment_config(result_dir)

ESM2 8m 的实验配置。

源代码位于 `bionemo/esm2/run/recipes.py` 中
105
106
107
108
109
110
111
112
def esm2_8m_experiment_config(result_dir) -> ExperimentConfig:
    """Experiment config for ESM2 8m."""
    return ExperimentConfig(
        save_every_n_steps=50,  # default set in previous script.
        result_dir=result_dir,
        experiment_name="esm2-8m-pretraining",
        restore_from_checkpoint_path=None,
    )

esm2_8m_model_config(initial_ckpt_path=None)

ESM2 8m 的模型配置。

源代码位于 `bionemo/esm2/run/recipes.py` 中
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def esm2_8m_model_config(initial_ckpt_path=None) -> ExposedESM2PretrainConfig:
    """Model config for ESM2 8m."""
    return ExposedESM2PretrainConfig(
        num_layers=6,
        hidden_size=320,
        ffn_hidden_size=320 * 4,
        num_attention_heads=20,
        seq_length=1024,
        biobert_spec_option=BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec,
        initial_ckpt_path=initial_ckpt_path,
        get_attention_mask_from_fusion=True,
        params_dtype="bf16-mixed",
        pipeline_dtype="bf16-mixed",
        autocast_dtype="bf16-mixed",
    )

esm2_8m_recipe(args)

ESM2 8m 的配方。

源代码位于 `bionemo/esm2/run/recipes.py` 中
132
133
134
135
136
137
138
139
140
141
142
def esm2_8m_recipe(args) -> MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig]:
    """Recipe for ESM2 8m."""
    return MainConfig(
        data_config=esm2_base_data_config(args),
        parallel_config=esm2_base_parallel_config(),
        training_config=esm2_base_training_config(max_steps=args.max_steps),  # no changes for 8m
        bionemo_model_config=esm2_8m_model_config(args.initial_ckpt_path),
        optim_config=esm2_base_optimizer_scheduler_config(max_steps=args.scheduler_max_steps),  # no changes for 8m
        experiment_config=esm2_8m_experiment_config(args.result_dir),
        wandb_config=esm2_8m_wandb_config(),
    )

esm2_8m_wandb_config()

ESM2 8m 的 Wandb 配置。

源代码位于 `bionemo/esm2/run/recipes.py` 中
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def esm2_8m_wandb_config() -> WandbConfig:
    """Wandb config for ESM2 8m."""
    wandb_config = WandbConfig(
        entity="esm2-8m_pretraining",
        project="esm2-8m_pretraining",
        group="esm2-8m",
        tags=["esm2", "pretraining"],
        offline=True,
        anonymous=True,
        id="1",
        log_model=False,
    )
    return wandb_config

esm2_base_data_config(args)

ESM2 的基础数据配置。

源代码位于 `bionemo/esm2/run/recipes.py` 中
75
76
77
78
79
80
81
82
83
84
85
86
87
def esm2_base_data_config(args) -> ESM2DataConfig:
    """Base data config for ESM2."""
    data_config = ESM2DataConfig(
        min_seq_length=1024,
        max_seq_length=1024,
        micro_batch_size=1,
        num_dataset_workers=8,
        train_cluster_path=args.train_cluster_path,
        train_database_path=args.train_database_path,
        valid_cluster_path=args.valid_cluster_path,
        valid_database_path=args.valid_database_path,
    )
    return data_config

esm2_base_optimizer_scheduler_config(max_steps=None)

ESM2 的基础优化器调度器配置。

源代码位于 `bionemo/esm2/run/recipes.py` 中
50
51
52
53
54
55
56
57
58
59
60
def esm2_base_optimizer_scheduler_config(max_steps: Optional[int] = None) -> OptimizerSchedulerConfig:
    """Base optimizer scheduler config for ESM2."""
    return OptimizerSchedulerConfig(
        optimizer="adam",
        lr=4e-4,
        interval="step",
        monitor="val_loss",
        lr_scheduler="warmup_anneal",
        warmup_steps=2000,
        max_steps=max_steps,
    )

esm2_base_parallel_config()

ESM2 的基础并行配置。

源代码位于 `bionemo/esm2/run/recipes.py` 中
63
64
65
66
67
68
69
70
71
72
def esm2_base_parallel_config() -> ParallelConfig:
    """Base parallel config for ESM2."""
    return ParallelConfig(
        tensor_model_parallel_size=1,
        pipeline_model_parallel_size=1,
        accumulate_grad_batches=1,
        ddp="megatron",
        num_devices=1,
        num_nodes=1,
    )

esm2_base_training_config(max_steps=500000)

ESM2 的基础训练配置。

源代码位于 `bionemo/esm2/run/recipes.py` 中
39
40
41
42
43
44
45
46
47
def esm2_base_training_config(max_steps: int = 500000) -> TrainingConfig:
    """Base training config for ESM2."""
    return TrainingConfig(
        max_steps=max_steps,
        limit_val_batches=1.0,
        val_check_interval=10_000,
        precision="bf16-mixed",
        include_perplexity=True,
    )

esm2_tiny_model_config(seq_length=2048, precision='bf16-mixed', nemo1_init_path=None, initial_ckpt_path=None, biobert_spec_option=BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec, variable_seq_lengths=False)

ESM2 tiny 的模型配置,用于测试。

源代码位于 `bionemo/esm2/run/recipes.py` 中
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
def esm2_tiny_model_config(
    seq_length: int = 2048,
    precision: PrecisionTypes = "bf16-mixed",
    nemo1_init_path: Optional[str] = None,
    initial_ckpt_path: Optional[str] = None,
    biobert_spec_option: BiobertSpecOption = BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec,
    variable_seq_lengths: bool = False,
) -> ExposedESM2PretrainConfig:
    """Model config for ESM2 tiny, used for testing."""
    return ExposedESM2PretrainConfig(
        seq_length=seq_length,
        num_layers=2,
        hidden_size=32,
        num_attention_heads=2,
        ffn_hidden_size=4 * 32,
        params_dtype=precision,
        pipeline_dtype=precision,
        autocast_dtype=precision,
        biobert_spec_option=biobert_spec_option,
        get_attention_mask_from_fusion=True,
        nemo1_ckpt_path=str(nemo1_init_path) if nemo1_init_path is not None else None,
        # handle checkpoint resumption here rather than auto-resume so this supports fine-tuning capabilities
        initial_ckpt_path=str(initial_ckpt_path) if initial_ckpt_path is not None else None,
        variable_seq_lengths=variable_seq_lengths,
    )

esm2_tiny_test_recipe(args)

ESM2 tiny 的测试配方,用于测试。

源代码位于 `bionemo/esm2/run/recipes.py` 中
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
def esm2_tiny_test_recipe(args):
    """Test recipe for ESM2 tiny, used for testing."""
    parallel_config = simple_parallel_recipe()
    training_config = tiny_train_config_recipe()

    data_config = ESM2DataConfig(
        min_seq_length=128,
        max_seq_length=128,
        micro_batch_size=2,
        num_dataset_workers=1,
        train_cluster_path=args.train_cluster_path,
        train_database_path=args.train_database_path,
        valid_cluster_path=args.valid_cluster_path,
        valid_database_path=args.valid_database_path,
    )
    bionemo_model_config = esm2_tiny_model_config(
        seq_length=data_config.max_seq_length, initial_ckpt_path=args.initial_ckpt_path
    )

    optim_config = default_adam_optimizer_with_cosine_annealing_recipe(max_steps=args.scheduler_max_steps)
    experiment_config = experiment_config_recipe(args.result_dir)
    wandb_config = WandbConfig(
        project="bionemo2-demo",
        entity="nvidia",
        offline=True,
        tags=[],
        group="dev",
        id="dev",
        log_model=False,
        anonymous=True,
    )
    main_config = MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig](
        data_config=data_config,
        parallel_config=parallel_config,
        training_config=training_config,
        bionemo_model_config=bionemo_model_config,
        optim_config=optim_config,
        experiment_config=experiment_config,
        wandb_config=wandb_config,
    )
    return main_config

experiment_config_recipe(result_dir='./results')

ESM2 的实验配置。

源代码位于 `bionemo/esm2/run/recipes.py` 中
296
297
298
299
300
301
302
303
304
305
306
307
def experiment_config_recipe(result_dir="./results") -> ExperimentConfig:
    """Experiment config for ESM2."""
    return ExperimentConfig(
        save_every_n_steps=100,
        result_dir=result_dir,
        experiment_name="default_experiment",
        restore_from_checkpoint_path=None,
        save_last_checkpoint=True,
        metric_to_monitor_for_checkpoints="val_loss",
        save_top_k=2,
        create_tensorboard_logger=False,
    )

simple_parallel_recipe(tensor_model_parallel_size=1, pipeline_model_parallel_size=1, num_devices=1, accumulate_grad_batches=1)

ESM2 的简单并行配方。

源代码位于 `bionemo/esm2/run/recipes.py` 中
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
def simple_parallel_recipe(
    tensor_model_parallel_size: int = 1,
    pipeline_model_parallel_size: int = 1,
    num_devices: int = 1,
    accumulate_grad_batches: int = 1,
) -> ParallelConfig:
    """Simple parallel recipe for ESM2."""
    assert (
        num_devices >= tensor_model_parallel_size * pipeline_model_parallel_size
    ), "devices must be divisible by tensor_model_parallel_size * pipeline_model_parallel_size"
    return ParallelConfig(
        tensor_model_parallel_size=tensor_model_parallel_size,
        pipeline_model_parallel_size=pipeline_model_parallel_size,
        num_devices=num_devices,
        accumulate_grad_batches=accumulate_grad_batches,
    )

tiny_train_config_recipe()

ESM2 的微型训练配置。

源代码位于 `bionemo/esm2/run/recipes.py` 中
286
287
288
def tiny_train_config_recipe() -> TrainingConfig:
    """Tiny training config for ESM2."""
    return TrainingConfig(max_steps=10, limit_val_batches=2, val_check_interval=2)