浏览代码

data(CUHK-PEDES): 更新数据集路径并调整数据处理逻辑

- 更新 dataset_path 为 /mnt/vos-s9gjtkm2/reid/dataset/cross_reid
- 调整数据处理逻辑,使用 processed_train_data、processed_test_data 和 processed_val_data
- 修复 SettingWithCopyWarning 问题
- 更新数据集统计信息
Yijun Fu 1 月之前
父节点
当前提交
eca41a3370
共有 1 个文件被更改,包括 63 次插入21 次删除
  1. 63 21
      cuhkpedes/CUHK-PEDES2webdataset.ipynb

+ 63 - 21
cuhkpedes/CUHK-PEDES2webdataset.ipynb

@@ -9,7 +9,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/root/miniconda3/envs/groupvit/lib/python3.7/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "/mnt/vos-s9gjtkm2/reid/miniconda3/envs/groupvit/lib/python3.7/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
       "  from .autonotebook import tqdm as notebook_tqdm\n"
      ]
     }
@@ -25,26 +25,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/root/dataset\n"
+      "/mnt/vos-s9gjtkm2/reid/dataset/cross_reid\n"
      ]
     }
    ],
    "source": [
     "home_dir = os.path.expanduser('~')\n",
-    "dataset_path = os.path.join(home_dir, 'dataset')\n",
+    "# dataset_path = os.path.join(home_dir, 'dataset')\n",
+    "dataset_path = os.path.join(home_dir, 'dataset/cross_reid')\n",
     "print(dataset_path)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -60,7 +61,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -87,7 +88,7 @@
       "2  [[the, man, is, wearing, a, black, jacket, gre...   1  \n",
       "3  [[hes, wearing, a, black, hooded, sweatshirt, ...   1  \n",
       "4  [[the, man, is, walking, he, is, wearing, a, b...   2  \n",
-      "40206\n"
+      "34054\n"
      ]
     }
    ],
@@ -118,7 +119,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -131,7 +132,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -213,7 +214,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -260,6 +261,30 @@
       "3  man wearing a grey jacket, brown pants and bla...  11004  \n",
       "4  the woman is wearing a floral printed shirt wi...  11005  \n"
      ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/mnt/vos-s9gjtkm2/reid/miniconda3/envs/groupvit/lib/python3.7/site-packages/ipykernel_launcher.py:8: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  \n",
+      "/mnt/vos-s9gjtkm2/reid/miniconda3/envs/groupvit/lib/python3.7/site-packages/ipykernel_launcher.py:9: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  if __name__ == \"__main__\":\n",
+      "/mnt/vos-s9gjtkm2/reid/miniconda3/envs/groupvit/lib/python3.7/site-packages/ipykernel_launcher.py:10: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  # Remove the CWD from sys.path while we load stuff.\n"
+     ]
     }
    ],
    "source": [
@@ -285,9 +310,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "34054\n",
+      "3074\n",
+      "3078\n"
+     ]
+    }
+   ],
    "source": [
     "train_images = processed_train_data.shape[0]\n",
     "print(train_images)\n",
@@ -311,7 +346,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -327,20 +362,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "# writing /root/dataset/CUHK-PEDES_shards/cuhkpedes-train-000000.tar 0 0.0 GB 0\n",
-      "# writing /root/dataset/CUHK-PEDES_shards/cuhkpedes-train-000001.tar 8060 0.1 GB 8060\n",
-      "# writing /root/dataset/CUHK-PEDES_shards/cuhkpedes-train-000002.tar 8010 0.1 GB 16070\n",
-      "# writing /root/dataset/CUHK-PEDES_shards/cuhkpedes-train-000003.tar 7924 0.1 GB 23994\n",
-      "# writing /root/dataset/CUHK-PEDES_shards/cuhkpedes-train-000004.tar 7933 0.1 GB 31927\n",
-      "# writing /root/dataset/CUHK-PEDES_shards/cuhkpedes-test-000000.tar 0 0.0 GB 0\n",
-      "# writing /root/dataset/CUHK-PEDES_shards/cuhkpedes-val-000000.tar 0 0.0 GB 0\n"
+      "# writing /mnt/vos-s9gjtkm2/reid/dataset/cross_reid/CUHK-PEDES_shards/cuhkpedes-train-000000.tar 0 0.0 GB 0\n",
+      "# writing /mnt/vos-s9gjtkm2/reid/dataset/cross_reid/CUHK-PEDES_shards/cuhkpedes-train-000001.tar 7968 0.1 GB 7968\n",
+      "# writing /mnt/vos-s9gjtkm2/reid/dataset/cross_reid/CUHK-PEDES_shards/cuhkpedes-train-000002.tar 8010 0.1 GB 15978\n",
+      "# writing /mnt/vos-s9gjtkm2/reid/dataset/cross_reid/CUHK-PEDES_shards/cuhkpedes-train-000003.tar 7972 0.1 GB 23950\n",
+      "# writing /mnt/vos-s9gjtkm2/reid/dataset/cross_reid/CUHK-PEDES_shards/cuhkpedes-train-000004.tar 7945 0.1 GB 31895\n",
+      "# writing /mnt/vos-s9gjtkm2/reid/dataset/cross_reid/CUHK-PEDES_shards/cuhkpedes-test-000000.tar 0 0.0 GB 0\n",
+      "# writing /mnt/vos-s9gjtkm2/reid/dataset/cross_reid/CUHK-PEDES_shards/cuhkpedes-val-000000.tar 0 0.0 GB 0\n"
      ]
     }
    ],
@@ -382,6 +417,13 @@
     "write_to_tar(processed_test_data, image_path, test_indexes, test_keys, test_pattern)\n",
     "write_to_tar(processed_val_data, image_path, val_indexes, val_keys, val_pattern)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {