12345678910111213141516171819202122232425262728293031323334353637383940414243 |
- # 数据需准备成以下格式
- # ├── 数据集路径:data_path
- # └── image:存放所有图片
- # └── train.txt:训练图片的绝对路径(或相对data_path下路径)和类别号,如-->image/mask/0.jpg 0 2<--表示该图片类别为0和2,空类别图片无类别号
- # └── val.txt:验证图片的绝对路径(或相对data_path下路径)和类别
- # └── class.txt:所有的类别名称
- import os
- import tqdm
- import argparse
- from concurrent.futures import ThreadPoolExecutor
- # -------------------------------------------------------------------------------------------------------------------- #
- # 设置
- parser = argparse.ArgumentParser(description='检查标签train.txt和val.txt中的图片是否存在')
- parser.add_argument('--data_path', default=r'D:\dataset\classification\mask', type=str, help='|数据集根目录|')
- args = parser.parse_args()
- args.train = args.data_path + '/' + 'train.txt'
- args.val = args.data_path + '/' + 'val.txt'
- # -------------------------------------------------------------------------------------------------------------------- #
- # 程序
- def _check_image(image_path):
- if not os.path.exists(image_path):
- print(f'没有找到图片:{image_path}')
- args.record += 1
- args.tqdm_show.update(1)
- def check_image(txt_path):
- with open(txt_path)as f:
- image_path_list = [_.strip().split(' ')[0] for _ in f.readlines()]
- args.record = 0
- args.tqdm_show = tqdm.tqdm(total=len(image_path_list))
- with ThreadPoolExecutor() as executer:
- executer.map(_check_image, image_path_list)
- args.tqdm_show.close()
- print(f'| {txt_path}找到图片数:{len(image_path_list) - args.record} 缺失图片数:{args.record} |')
- if __name__ == '__main__':
- check_image(args.train)
- check_image(args.val)
|