1234567891011121314151617181920212223242526272829303132333435363738394041 |
- import os
- import tarfile
- TRAIN_SRC_DIR = '/mnt/vos-s9gjtkm2/reid/dataset/ImageNet/ILSVRC2012/ILSVRC2012_img_train.tar'
- TRAIN_DEST_DIR = '/mnt/vos-s9gjtkm2/reid/dataset/imagenet/train'
- VAL_SRC_DIR = '/mnt/vos-s9gjtkm2/reid/dataset/ImageNet/ILSVRC2012/ILSVRC2012_img_val.tar'
- VAL_DEST_DIR = '/mnt/vos-s9gjtkm2/reid/dataset/imagenet/val'
- def extract_train():
- with open(TRAIN_SRC_DIR, 'rb') as f:
- tar = tarfile.open(fileobj=f, mode='r:')
- for i, item in enumerate(tar):
- cls_name = item.name.strip(".tar")
- a = tar.extractfile(item)
- b = tarfile.open(fileobj=a, mode="r:")
- e_path = "{}/{}/".format(TRAIN_DEST_DIR, cls_name)
- if not os.path.isdir(e_path):
- os.makedirs(e_path)
- print("#", i, "extract train dateset to >>>", e_path)
- b.extractall(e_path)
- # names = b.getnames()
- # for name in names:
- # b.extract(name, e_path)
- def extract_val():
- with open(VAL_SRC_DIR, 'rb') as f:
- tar = tarfile.open(fileobj=f, mode='r:')
- if not os.path.isdir(VAL_DEST_DIR):
- os.makedirs(VAL_DEST_DIR)
- print("extract val dateset to >>>", VAL_DEST_DIR)
- names = tar.getnames()
- for name in names:
- tar.extract(name, VAL_DEST_DIR)
- if __name__ == '__main__':
- extract_train()
- extract_val()
|