default.yml 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. data:
  2. batch_size: 256
  3. pin_memory: true
  4. num_workers: 10
  5. # Thomas said it should be at least about 5-10x your batch size; beyond that,
  6. # the differences become academic.
  7. shuffle_buffer: 10000
  8. seed: ${train.seed}
  9. dataset:
  10. meta:
  11. gcc3m:
  12. type: img_txt_pair
  13. path: local_data/gcc3m_shards
  14. prefix: gcc-train-{000000..00436}.tar
  15. length: 2891445
  16. gcc12m:
  17. type: img_txt_pair
  18. path: local_data/gcc12m_shards
  19. prefix: gcc-conceptual-12m-{000000..001943}.tar
  20. length: 11156203
  21. yfcc14m:
  22. type: img_txt_pair
  23. path: local_data/yfcc14m_shards
  24. prefix: yfcc14m-{000000..001888}.tar
  25. length: 14615499
  26. redcap12m:
  27. type: img_txt_pair
  28. path: local_data/redcap12m_shards
  29. prefix: redcap12m-{000000..001211}.tar
  30. length: 11866987
  31. imagenet:
  32. type: img_cls_pair
  33. path: local_data/imagenet_shards
  34. prefix: imagenet-val-{000000..000049}.tar
  35. length: 50000
  36. train:
  37. - gcc3m
  38. - gcc12m
  39. - yfcc14m
  40. val:
  41. - imagenet
  42. img_aug:
  43. deit_aug: true
  44. img_size: 224
  45. img_scale: [0.08, 1.0]
  46. interpolation: bilinear
  47. color_jitter: 0.4
  48. auto_augment: 'rand-m9-mstd0.5-inc1'
  49. re_prob: 0.25
  50. re_mode: 'pixel'
  51. re_count: 1
  52. text_aug:
  53. max_seq_len: 77
  54. multi_label: 0
  55. word_type: 'noun'
  56. train:
  57. start_epoch: 0
  58. epochs: 30
  59. warmup_epochs: 2
  60. base_lr: 1.6e-3
  61. weight_decay: 0.05
  62. warmup_lr: 4e-6
  63. min_lr: 4e-5
  64. clip_grad: 5.0
  65. accumulation_steps: 0
  66. amp_opt_level: O1
  67. seed: 0
  68. use_entity: False
  69. lr_scheduler:
  70. name: cosine
  71. optimizer:
  72. name: adamw
  73. eps: 1e-8
  74. betas: [0.9, 0.999]
  75. evaluate:
  76. eval_only: false
  77. eval_freq: 1
  78. task:
  79. - cls
  80. - seg
  81. cls:
  82. save_best: true
  83. template: subset
  84. seg:
  85. save_best: true
  86. cfg: segmentation/configs/_base_/datasets/pascal_voc12.py
  87. template: simple
  88. opts: []
  89. checkpoint:
  90. auto_resume: true
  91. resume: ''
  92. stage1_checkpoint: '' ## add this for stage2 training
  93. freq: 1
  94. max_kept: -1
  95. save_freq: 1
  96. model:
  97. use_maskloss: false
  98. use_entityloss: false
  99. model_name: '' # display name in the logger
  100. output: ???
  101. tag: default
  102. print_freq: 10
  103. seed: 0
  104. wandb: false
  105. local_rank: ???
  106. vis: []