1 개월 전 · 3cd5923c6f
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 
				 *.pyc
			
 
				 *.pth
			
 
				-*.pt
			
 
				+*.pt
			
 
				+exps/final_exps/*
			
--- a/configs/ovsegmentor/ovsegmentor_pretrain_vit_bert_stage1_reid.yml
+++ b/configs/ovsegmentor/ovsegmentor_pretrain_vit_bert_stage1_reid.yml
@@ -0,0 +1,35 @@
 
				+_base_: '../default.yml'
			
 
				+model_name: 'ovsegmentor_pretrain_vit_bert_cc4m_stage1' # display name in the logger
			
 
				+output: /mnt/vos-s9gjtkm2/reid/ovsegmentor/OVSegmentor/exps/final_exps/
			
 
				+
			
 
				+data:
			
 
				+  train: 
			
 
				+      root_dir: '/mnt/vos-s9gjtkm2/reid/dataset/cross_reid/CUHK-PEDES/imgs'
			
 
				+      meta_file: '/mnt/vos-s9gjtkm2/reid/dataset/cross_reid/CUHK-PEDES/train_entity_pair.csv'
			
 
				+      read_from: petrel
			
 
				+      batch_size: 256
			
 
				+  val:
			
 
				+      root_dir: '/mnt/cache/share/images/val/'
			
 
				+          
			
 
				+  img_aug:
			
 
				+    deit_aug: true
			
 
				+    img_size: 224
			
 
				+    img_scale: [0.08, 1.0]
			
 
				+    interpolation: 'bilinear'
			
 
				+    color_jitter: 0.4
			
 
				+    auto_augment: 'rand-m9-mstd0.5-inc1'
			
 
				+    re_prob: 0.25
			
 
				+    re_mode: 'pixel'
			
 
				+    re_count: 1
			
 
				+
			
 
				+model:
			
 
				+  img_encoder:
			
 
				+    imgnet_pretrained_checkpoint: '/mnt/vos-s9gjtkm2/reid/ovsegmentor/OVSegmentor/checkpoints/dino_vitbase16_pretrain.pth'
			
 
				+
			
 
				+  use_entityloss: true
			
 
				+  
			
 
				+train:
			
 
				+  epochs: 30
			
 
				+  base_lr: 6.4e-4
			
 
				+  warmup_lr: 1.6e-5
			
 
				+  min_lr: 1.6e-4
			
--- a/configs/ovsegmentor/ovsegmentor_pretrain_vit_bert_stage2_reid.yml
+++ b/configs/ovsegmentor/ovsegmentor_pretrain_vit_bert_stage2_reid.yml
@@ -0,0 +1,44 @@
 
				+_base_: '../default.yml'
			
 
				+model_name: 'ovsegmentor_pretrain_vit_bert_cc4m_stage2' # display name in the logger
			
 
				+output: /mnt/petrelfs/xujilan/exps/final_exps/
			
 
				+
			
 
				+data:
			
 
				+  train: 
			
 
				+      root_dir: 's3://GCC/GCC12m/'
			
 
				+      meta_file: '/mnt/petrelfs/xujilan/data/cc12m_100/subsets/cc12m_filtered_subset_pair_debug.csv'
			
 
				+      read_from: petrel
			
 
				+      batch_size: 128
			
 
				+  val:
			
 
				+      root_dir: '/mnt/cache/share/images/val/'
			
 
				+
			
 
				+  img_aug:
			
 
				+    deit_aug: false
			
 
				+    img_size: 224
			
 
				+    img_scale: [0.4, 1.0]
			
 
				+    interpolation: 'bilinear'
			
 
				+    color_jitter: 0.4
			
 
				+    auto_augment: 'rand-m9-mstd0.5-inc1'
			
 
				+    re_prob: 0.25
			
 
				+    re_mode: 'pixel'
			
 
				+    re_count: 1    
			
 
				+
			
 
				+model:
			
 
				+  img_encoder:
			
 
				+    imgnet_pretrained_checkpoint: '/mnt/petrelfs/xujilan/checkpoints/dino_vitbase16_pretrain.pth'
			
 
				+
			
 
				+  text_encoder:
			
 
				+    fixed: true
			
 
				+
			
 
				+  use_entityloss: true
			
 
				+  use_maskloss: true
			
 
				+  cross_threshold: 0.6
			
 
				+  
			
 
				+train:
			
 
				+  epochs: 10
			
 
				+  base_lr: 1e-5
			
 
				+  min_lr: 1e-6
			
 
				+  warmup_epochs: 0
			
 
				+
			
 
				+checkpoint:
			
 
				+  stage1_checkpoint: /mnt/petrelfs/xujilan/exps/cc12m_100/best_miou.pth
			
 
				+
			
--- a/docs/DATA_README_zh.md
+++ b/docs/DATA_README_zh.md
@@ -0,0 +1,75 @@
 
				+# 准备数据集
			
 
				+
			
 
				+## 训练数据
			
 
				+请准备 [CC12M 数据集](https://github.com/google-research-datasets/conceptual-12m)。所有图像应存储在一个文件夹中。需要一个元文件（csv 或 tsv 文件），其中包含图像 ID 及其对应的标题。
			
 
				+```shell
			
 
				+image_id, caption
			
 
				+00001.jpg, a boy is running on the beach,
			
 
				+00002.jpg, The bride was wearing a chic lace.
			
 
				+...
			
 
				+```
			
 
				+
			
 
				+- 重要更新：
			
 
				+我们提供了过滤 cc12m 子集并从头构建跨图像对的脚本：
			
 
				+
			
 
				+1. 使用多进程（例如 32 个进程）过滤 cc12m 数据集，使用 Top-K 频繁出现的实体。可以自由修改 [data_process_cc12m.py](../datasets/filter_cc12m_subset.py) 中的实体。
			
 
				+```shell
			
 
				+cd datasets
			
 
				+python data_process_cc12m.py --mode filter --srcdir /path/to/your/cc12m.csv --processor 32
			
 
				+```
			
 
				+这将在 subset/ 目录中生成 32 个子文件。
			
 
				+
			
 
				+2. 接下来，将这些子文件合并为单个元文件（并可选择通过传递 --remove_subfiles=True 删除子文件）。
			
 
				+```shell
			
 
				+python data_process_cc12m.py --mode merge --dstdir /path/to/your/cc12m/subsets/ --remove_subfiles True
			
 
				+```
			
 
				+
			
 
				+3. 根据过滤后的数据构建跨图像对。
			
 
				+```shell
			
 
				+python data_process_cc12m.py --mode makepair --metafile /path/to/your/cc12m_filtered_subset.csv
			
 
				+```
			
 
				+生成的元文件将自动保存到 /path/to/your/cc12m_filtered_subset_pair.csv。此元文件可用于训练模型。
			
 
				+
			
 
				+4. 修改 configs/ovsegmentor/ovsegmentor_pretrain_vit_bert_stage1.yml 中的根路径和元文件路径
			
 
				+```shell
			
 
				+data:
			
 
				+    train:
			
 
				+        root_dir: '/path/to/your/cc12m_images/'
			
 
				+        meta_file: '/path/to/your/cc12m_filtered_subset_pair.csv'
			
 
				+```
			
 
				+
			
 
				+也可以尝试不同的 [图像-标题数据集](https://github.com/rom1504/img2dataset)（例如 YFCC、RedCaps），方法是提供图像及其对应的元文件。
			
 
				+
			
 
				+## 评估
			
 
				+1. 按照官方网站准备 [PASCAL VOC](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#pascal-voc)、[PASCAL Context](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#pascal-context)、[COCO](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#coco-stuff-164k) 和 [ADE20K](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#ade20k)。
			
 
				+2. 对于 COCO 数据集，按照 [GroupViT](https://github.com/NVlabs/GroupViT) 将其转换为语义分割格式。
			
 
				+```shell
			
 
				+python convert_dataset/convert_coco_object.py /path/to/your/coco/ -o /path/to/output/coco/
			
 
				+```
			
 
				+3. 更改 segmentation/configs/_base_/datasets/*.py 中的图像目录。
			
 
				+- [PASCAL VOC](../segmentation/configs/_base_/datasets/pascal_voc12.py)
			
 
				+```shell
			
 
				+data_root = '/path/to/your/VOCdevkit/VOC2012'
			
 
				+```
			
 
				+- [PASCAL CONTEXT](../segmentation/configs/_base_/datasets/pascal_context.py)
			
 
				+```shell
			
 
				+data_root = '/path/to/your/pascal_context/VOCdevkit/VOC2010/'
			
 
				+```
			
 
				+- [COCO Object](../segmentation/configs/_base_/datasets/coco.py)
			
 
				+```shell
			
 
				+data_root = '/path/to/your/coco/'
			
 
				+```
			
 
				+- [COCO STUFF](../segmentation/configs/_base_/datasets/coco_stuff.py)
			
 
				+```shell
			
 
				+data_root = '/path/to/your/coco/'
			
 
				+```
			
 
				+- [ADE20K](../segmentation/configs/_base_/datasets/ade20k.py)
			
 
				+```shell
			
 
				+data_root = '/path/to/your/ADEChallengeData2016/'
			
 
				+```
			
 
				+
			
 
				+4. 要启用零样本分类评估，请准备 [ImageNet](https://www.image-net.org/) 的验证集。验证集的元文件已经在[此处](../imagenet_info/val.csv)提供。修改 configs/ovsegmentor/ovsegmentor_pretrain_vit_bert_stage1.yml 中的图像路径
			
 
				+```shell
			
 
				+val:
			
 
				+    root_dir: '/path/to/your/cc12m_images/'
			
 
				+```
			
--- a/scripts/run.sh
+++ b/scripts/run.sh
@@ -1,3 +1,7 @@
 
				+# PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
			
 
				+# python -m torch.distributed.launch --nproc_per_node=8 --master_port=29500 \
			
 
				+#     main_pretrain.py --cfg configs/ovsegmentor/ovsegmentor_pretrain_vit_bert_stage1.yml
			
 
				+
			
 
				 PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
			
 
				-python -m torch.distributed.launch --nproc_per_node=8 --master_port=29500 \
			
 
				-    main_pretrain.py --cfg configs/ovsegmentor/ovsegmentor_pretrain_vit_bert_stage1.yml
			
 
				+python -m torch.distributed.launch --nproc_per_node=1 --master_port=29500 \
			
 
				+    main_pretrain.py --cfg configs/ovsegmentor/ovsegmentor_pretrain_vit_bert_stage1_reid.yml
			
--- a/segmentation/configs/_base_/datasets/pascal_voc12.py
+++ b/segmentation/configs/_base_/datasets/pascal_voc12.py
@@ -17,7 +17,7 @@
 
				 _base_ = ['../custom_import.py']
			
 
				 # dataset settings
			
 
				 dataset_type = 'PascalVOCDataset'
			
 
				-data_root = '/mnt/petrelfs/xujilan/data/VOCdevkit/VOC2012'
			
 
				+data_root = '/mnt/vos-s9gjtkm2/reid/dataset/VOC2012'
			
 
				 img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
			
 
				 test_pipeline = [
			
 
				     dict(type='LoadImageFromFile'),