check_image.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. # 数据需准备成以下格式
  2. # ├── 数据集路径:data_path
  3. # └── image:存放所有图片
  4. # └── train.txt:训练图片的绝对路径(或相对data_path下路径)和类别号,如-->image/mask/0.jpg 0 2<--表示该图片类别为0和2,空类别图片无类别号
  5. # └── val.txt:验证图片的绝对路径(或相对data_path下路径)和类别
  6. # └── class.txt:所有的类别名称
  7. import os
  8. import tqdm
  9. import argparse
  10. from concurrent.futures import ThreadPoolExecutor
  11. # -------------------------------------------------------------------------------------------------------------------- #
  12. # 设置
  13. parser = argparse.ArgumentParser(description='检查标签train.txt和val.txt中的图片是否存在')
  14. parser.add_argument('--data_path', default=r'D:\dataset\classification\mask', type=str, help='|数据集根目录|')
  15. args = parser.parse_args()
  16. args.train = args.data_path + '/' + 'train.txt'
  17. args.val = args.data_path + '/' + 'val.txt'
  18. # -------------------------------------------------------------------------------------------------------------------- #
  19. # 程序
  20. def _check_image(image_path):
  21. if not os.path.exists(image_path):
  22. print(f'没有找到图片:{image_path}')
  23. args.record += 1
  24. args.tqdm_show.update(1)
  25. def check_image(txt_path):
  26. with open(txt_path)as f:
  27. image_path_list = [_.strip().split(' ')[0] for _ in f.readlines()]
  28. args.record = 0
  29. args.tqdm_show = tqdm.tqdm(total=len(image_path_list))
  30. with ThreadPoolExecutor() as executer:
  31. executer.map(_check_image, image_path_list)
  32. args.tqdm_show.close()
  33. print(f'| {txt_path}找到图片数:{len(image_path_list) - args.record} 缺失图片数:{args.record} |')
  34. if __name__ == '__main__':
  35. check_image(args.train)
  36. check_image(args.val)