- 在 default.yml 中注释掉 gcc12m 和 yfcc14m 数据集 - 删除 group_vit_cuhkpedes_30e.yml 配置文件
@@ -45,8 +45,8 @@ data:
length: 3078
train:
# - gcc3m
- - gcc12m
- - yfcc14m
+ # - gcc12m
+ # - yfcc14m
- cuhkpedes_train
val:
# - imagenet
@@ -0,0 +1,29 @@
+_base_: 'default.yml'
+
+model:
+ type: MultiLabelContrastive
+ img_encoder:
+ type: GroupViT
+ embed_dim: 384
+ num_heads: [6, 6, 6]
+ depths: [6, 3, 3]
+ num_group_tokens: [64, 8, 0]
+ num_output_groups: [64, 8]
+ drop_rate: 0.0
+ drop_path_rate: 0.1
+ text_encoder:
+ type: TextTransformer
+ context_length: 77
+ width: 256
+ layers: 12
+ vocab_size: 49408
+ contrast_temperature: 0.07
+ proj_num_layers: 2
+ output_dim: 256
+ multi_label: ${data.text_aug.multi_label} # multi_label=0 is better for RedCap
+data:
+ dataset:
+ train:
+ - gcc3m
+ - gcc12m
+ - redcap12m
@@ -25,3 +25,7 @@ model:
proj_num_layers: 2
output_dim: 256
multi_label: ${data.text_aug.multi_label}
+ - cuhkpedes