data: batch_size: 256 pin_memory: true num_workers: 6 # Thomas said it should be at least about 5-10x your batch size; beyond that, # the differences become academic. shuffle_buffer: 10000 seed: ${train.seed} dataset: meta: gcc3m: type: img_txt_pair path: local_data/gcc3m_shards prefix: gcc-train-{000000..00436}.tar length: 2891445 gcc12m: type: img_txt_pair path: local_data/gcc12m_shards prefix: gcc-conceptual-12m-{000000..001943}.tar length: 11156203 yfcc14m: type: img_txt_pair path: local_data/yfcc14m_shards prefix: yfcc14m-{000000..001888}.tar length: 14615499 redcap12m: type: img_txt_pair path: local_data/redcap12m_shards prefix: redcap12m-{000000..001211}.tar length: 11866987 imagenet: type: img_cls_pair path: local_data/imagenet_shards prefix: imagenet-val-{000000..000049}.tar length: 50000 cuhkpedes_train: type: img_txt_pair path: local_data/cuhkpedes_shards prefix: cuhkpedes-train-{000000..000255}.tar length: 34054 cuhkpedes_val: type: img_txt_pair path: local_data/cuhkpedes_shards prefix: cuhkpedes-val-{000000..000023}.tar length: 3078 train: # - gcc3m # - gcc12m # - yfcc14m - cuhkpedes_train val: # - imagenet - cuhkpedes_val img_aug: deit_aug: true img_size: 224 img_scale: [0.08, 1.0] interpolation: bilinear color_jitter: 0.4 auto_augment: 'rand-m9-mstd0.5-inc1' re_prob: 0.25 re_mode: 'pixel' re_count: 1 text_aug: max_seq_len: 77 multi_label: 0 word_type: 'noun' train: start_epoch: 0 epochs: 30 warmup_epochs: 2 base_lr: 1.6e-3 weight_decay: 0.05 warmup_lr: 4e-6 min_lr: 4e-5 clip_grad: 5.0 accumulation_steps: 0 amp_opt_level: O2 seed: 0 lr_scheduler: name: cosine optimizer: name: adamw eps: 1e-8 betas: [0.9, 0.999] evaluate: eval_only: false eval_freq: 1 task: - cls - seg cls: save_best: true template: subset seg: save_best: true cfg: segmentation/configs/_base_/datasets/pascal_voc12.py template: simple opts: [] checkpoint: auto_resume: true resume: '' freq: 1 max_kept: -1 save_freq: 1 model_name: '' # display name in the logger output: ??? tag: default print_freq: 10 seed: 0 wandb: false local_rank: ??? vis: []