|
@@ -121,21 +121,21 @@ def train(cfg):
|
|
|
logger.info(f'Creating model:{cfg.model.type}/{cfg.model_name}')
|
|
|
model = build_model(cfg.model)
|
|
|
|
|
|
- # load_checkpoint(cfg, model, None, None)
|
|
|
+ # # load_checkpoint(cfg, model, None, None)
|
|
|
|
|
|
- # 冻结所有层
|
|
|
- for param in model.parameters():
|
|
|
- param.requires_grad = False
|
|
|
+ # # 冻结所有层
|
|
|
+ # for param in model.parameters():
|
|
|
+ # param.requires_grad = False
|
|
|
|
|
|
- # 如果你只想冻结特定的层,可以按照以下方式进行
|
|
|
- # 例如,冻结所有的 img_projector 层
|
|
|
- for param in model.img_projector.parameters():
|
|
|
- param.requires_grad = True
|
|
|
-
|
|
|
- # 如果你只想冻结特定的层,可以按照以下方式进行
|
|
|
- # 例如,冻结所有的 text_projector 层
|
|
|
- for param in model.text_projector.parameters():
|
|
|
- param.requires_grad = True
|
|
|
+ # # 如果你只想冻结特定的层,可以按照以下方式进行
|
|
|
+ # # 例如,冻结所有的 img_projector 层
|
|
|
+ # for param in model.img_projector.parameters():
|
|
|
+ # param.requires_grad = True
|
|
|
+
|
|
|
+ # # 如果你只想冻结特定的层,可以按照以下方式进行
|
|
|
+ # # 例如,冻结所有的 text_projector 层
|
|
|
+ # for param in model.text_projector.parameters():
|
|
|
+ # param.requires_grad = True
|
|
|
|
|
|
model.cuda()
|
|
|
logger.info(str(model))
|
|
@@ -266,7 +266,7 @@ def train_one_epoch(config, model, data_loader, optimizer, epoch, lr_scheduler):
|
|
|
start = time.time()
|
|
|
end = time.time()
|
|
|
for idx, samples in enumerate(data_loader):
|
|
|
- print('\n\n1\n\n')
|
|
|
+ # print('\n\n1\n\n')
|
|
|
|
|
|
batch_size = config.data.batch_size
|
|
|
|
|
@@ -312,7 +312,7 @@ def train_one_epoch(config, model, data_loader, optimizer, epoch, lr_scheduler):
|
|
|
lr_scheduler.step_update(epoch * num_steps + idx)
|
|
|
|
|
|
torch.cuda.synchronize()
|
|
|
- print('\n\n2\n\n')
|
|
|
+ # print('\n\n2\n\n')
|
|
|
|
|
|
loss_meter.update(loss.item(), batch_size)
|
|
|
for loss_name in log_vars:
|
|
@@ -320,10 +320,10 @@ def train_one_epoch(config, model, data_loader, optimizer, epoch, lr_scheduler):
|
|
|
norm_meter.update(grad_norm)
|
|
|
batch_time.update(time.time() - end)
|
|
|
end = time.time()
|
|
|
- print('\n\n3\n\n')
|
|
|
+ # print('\n\n3\n\n')
|
|
|
|
|
|
if idx % config.print_freq == 0:
|
|
|
- print('\n\n4\n\n')
|
|
|
+ # print('\n\n4\n\n')
|
|
|
lr = optimizer.param_groups[0]['lr']
|
|
|
memory_used = torch.cuda.max_memory_allocated() / (1024.0 * 1024.0)
|
|
|
etas = batch_time.avg * (num_steps - idx)
|
|
@@ -335,14 +335,14 @@ def train_one_epoch(config, model, data_loader, optimizer, epoch, lr_scheduler):
|
|
|
f'{log_vars_str}\t'
|
|
|
f'grad_norm {norm_meter.val:.4f} ({norm_meter.avg:.4f})\t'
|
|
|
f'mem {memory_used:.0f}MB')
|
|
|
- print('\n\n5\n\n')
|
|
|
+ # print('\n\n5\n\n')
|
|
|
if wandb is not None:
|
|
|
log_stat = {f'iter/train_{n}': m.avg for n, m in log_vars_meters.items()}
|
|
|
log_stat['iter/train_total_loss'] = loss_meter.avg
|
|
|
log_stat['iter/learning_rate'] = lr
|
|
|
wandb.log(log_stat)
|
|
|
|
|
|
- print('\n\n6\n\n')
|
|
|
+ # print('\n\n6\n\n')
|
|
|
epoch_time = time.time() - start
|
|
|
logger.info(f'EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}')
|
|
|
result_dict = dict(total_loss=loss_meter.avg)
|