extract_imagenet.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. import os
  2. import tarfile
  3. TRAIN_SRC_DIR = '/mnt/vos-s9gjtkm2/reid/dataset/ImageNet/ILSVRC2012/ILSVRC2012_img_train.tar'
  4. TRAIN_DEST_DIR = '/mnt/vos-s9gjtkm2/reid/dataset/imagenet/train'
  5. VAL_SRC_DIR = '/mnt/vos-s9gjtkm2/reid/dataset/ImageNet/ILSVRC2012/ILSVRC2012_img_val.tar'
  6. VAL_DEST_DIR = '/mnt/vos-s9gjtkm2/reid/dataset/imagenet/val'
  7. def extract_train():
  8. with open(TRAIN_SRC_DIR, 'rb') as f:
  9. tar = tarfile.open(fileobj=f, mode='r:')
  10. for i, item in enumerate(tar):
  11. cls_name = item.name.strip(".tar")
  12. a = tar.extractfile(item)
  13. b = tarfile.open(fileobj=a, mode="r:")
  14. e_path = "{}/{}/".format(TRAIN_DEST_DIR, cls_name)
  15. if not os.path.isdir(e_path):
  16. os.makedirs(e_path)
  17. print("#", i, "extract train dateset to >>>", e_path)
  18. b.extractall(e_path)
  19. # names = b.getnames()
  20. # for name in names:
  21. # b.extract(name, e_path)
  22. def extract_val():
  23. with open(VAL_SRC_DIR, 'rb') as f:
  24. tar = tarfile.open(fileobj=f, mode='r:')
  25. if not os.path.isdir(VAL_DEST_DIR):
  26. os.makedirs(VAL_DEST_DIR)
  27. print("extract val dateset to >>>", VAL_DEST_DIR)
  28. names = tar.getnames()
  29. for name in names:
  30. tar.extract(name, VAL_DEST_DIR)
  31. if __name__ == '__main__':
  32. extract_train()
  33. extract_val()