detect_pruned.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2. """
  3. Run inference on images, videos, directories, streams, etc.
  4. Usage - sources:
  5. $ python path/to/detect.py --weights yolov5s.pt --source 0 # webcam
  6. img.jpg # image
  7. vid.mp4 # video
  8. path/ # directory
  9. path/*.jpg # glob
  10. 'https://youtu.be/Zgi9g1ksQHc' # YouTube
  11. 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream
  12. Usage - formats:
  13. $ python path/to/detect.py --weights yolov5s.pt # PyTorch
  14. yolov5s.torchscript # TorchScript
  15. yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn
  16. yolov5s.xml # OpenVINO
  17. yolov5s.engine # TensorRT
  18. yolov5s.mlmodel # CoreML (MacOS-only)
  19. yolov5s_saved_model # TensorFlow SavedModel
  20. yolov5s.pb # TensorFlow GraphDef
  21. yolov5s.tflite # TensorFlow Lite
  22. yolov5s_edgetpu.tflite # TensorFlow Edge TPU
  23. """
  24. import argparse
  25. import os
  26. import sys
  27. from pathlib import Path
  28. import cv2
  29. import torch
  30. import torch.backends.cudnn as cudnn
  31. FILE = Path(__file__).resolve()
  32. ROOT = FILE.parents[0] # YOLOv5 root directory
  33. if str(ROOT) not in sys.path:
  34. sys.path.append(str(ROOT)) # add ROOT to PATH
  35. ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
  36. from models.common import DetectPrunedMultiBackend
  37. from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
  38. from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr,
  39. increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
  40. from utils.plots import Annotator, colors, save_one_box
  41. from utils.torch_utils import select_device, time_sync
  42. @torch.no_grad()
  43. def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s)
  44. source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam
  45. data=ROOT / 'data/coco128.yaml', # dataset.yaml path
  46. imgsz=(640, 640), # inference size (height, width)
  47. conf_thres=0.25, # confidence threshold
  48. iou_thres=0.45, # NMS IOU threshold
  49. max_det=1000, # maximum detections per image
  50. device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
  51. view_img=False, # show results
  52. save_txt=False, # save results to *.txt
  53. save_conf=False, # save confidences in --save-txt labels
  54. save_crop=False, # save cropped prediction boxes
  55. nosave=False, # do not save images/videos
  56. classes=None, # filter by class: --class 0, or --class 0 2 3
  57. agnostic_nms=False, # class-agnostic NMS
  58. augment=False, # augmented inference
  59. visualize=False, # visualize features
  60. update=False, # update all models
  61. project=ROOT / 'runs/detect', # save results to project/name
  62. name='exp', # save results to project/name
  63. exist_ok=False, # existing project/name ok, do not increment
  64. line_thickness=3, # bounding box thickness (pixels)
  65. hide_labels=False, # hide labels
  66. hide_conf=False, # hide confidences
  67. half=False, # use FP16 half-precision inference
  68. dnn=False, # use OpenCV DNN for ONNX inference
  69. ):
  70. source = str(source)
  71. save_img = not nosave and not source.endswith('.txt') # save inference images
  72. is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
  73. is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
  74. webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
  75. if is_url and is_file:
  76. source = check_file(source) # download
  77. # Directories
  78. save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
  79. (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
  80. # Load model
  81. device = select_device(device)
  82. print("weights:",weights)
  83. model = DetectPrunedMultiBackend(weights, device=device, dnn=dnn)
  84. stride, names, pt, jit, onnx, engine = model.stride, model.names, model.pt, model.jit, model.onnx, model.engine
  85. imgsz = check_img_size(imgsz, s=stride) # check image size
  86. # Half
  87. half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA
  88. if pt or jit:
  89. model.model.half() if half else model.model.float()
  90. # Dataloader
  91. if webcam:
  92. view_img = check_imshow()
  93. cudnn.benchmark = True # set True to speed up constant image size inference
  94. dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
  95. bs = len(dataset) # batch_size
  96. else:
  97. dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
  98. bs = 1 # batch_size
  99. vid_path, vid_writer = [None] * bs, [None] * bs
  100. # Run inference
  101. model.warmup(imgsz=(1 if pt else bs, 3, *imgsz), half=half) # warmup
  102. dt, seen = [0.0, 0.0, 0.0], 0
  103. for path, im, im0s, vid_cap, s in dataset:
  104. t1 = time_sync()
  105. im = torch.from_numpy(im).to(device)
  106. im = im.half() if half else im.float() # uint8 to fp16/32
  107. im /= 255 # 0 - 255 to 0.0 - 1.0
  108. if len(im.shape) == 3:
  109. im = im[None] # expand for batch dim
  110. t2 = time_sync()
  111. dt[0] += t2 - t1
  112. # Inference
  113. visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
  114. pred = model(im, augment=augment, visualize=visualize)
  115. t3 = time_sync()
  116. dt[1] += t3 - t2
  117. # NMS
  118. pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
  119. dt[2] += time_sync() - t3
  120. # Second-stage classifier (optional)
  121. # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
  122. # Process predictions
  123. for i, det in enumerate(pred): # per image
  124. seen += 1
  125. if webcam: # batch_size >= 1
  126. p, im0, frame = path[i], im0s[i].copy(), dataset.count
  127. s += f'{i}: '
  128. else:
  129. p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
  130. p = Path(p) # to Path
  131. save_path = str(save_dir / p.name) # im.jpg
  132. txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt
  133. s += '%gx%g ' % im.shape[2:] # print string
  134. gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
  135. imc = im0.copy() if save_crop else im0 # for save_crop
  136. annotator = Annotator(im0, line_width=line_thickness, example=str(names))
  137. if len(det):
  138. # Rescale boxes from img_size to im0 size
  139. det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
  140. # Print results
  141. for c in det[:, -1].unique():
  142. n = (det[:, -1] == c).sum() # detections per class
  143. s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
  144. # Write results
  145. for *xyxy, conf, cls in reversed(det):
  146. if save_txt: # Write to file
  147. xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
  148. line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
  149. with open(txt_path + '.txt', 'a') as f:
  150. f.write(('%g ' * len(line)).rstrip() % line + '\n')
  151. if save_img or save_crop or view_img: # Add bbox to image
  152. c = int(cls) # integer class
  153. label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
  154. annotator.box_label(xyxy, label, color=colors(c, True))
  155. if save_crop:
  156. save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
  157. # Stream results
  158. im0 = annotator.result()
  159. if view_img:
  160. cv2.imshow(str(p), im0)
  161. cv2.waitKey(1) # 1 millisecond
  162. # Save results (image with detections)
  163. if save_img:
  164. if dataset.mode == 'image':
  165. cv2.imwrite(save_path, im0)
  166. else: # 'video' or 'stream'
  167. if vid_path[i] != save_path: # new video
  168. vid_path[i] = save_path
  169. if isinstance(vid_writer[i], cv2.VideoWriter):
  170. vid_writer[i].release() # release previous video writer
  171. if vid_cap: # video
  172. fps = vid_cap.get(cv2.CAP_PROP_FPS)
  173. w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  174. h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  175. else: # stream
  176. fps, w, h = 30, im0.shape[1], im0.shape[0]
  177. save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
  178. vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
  179. vid_writer[i].write(im0)
  180. # Print time (inference-only)
  181. LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)')
  182. # Print results
  183. t = tuple(x / seen * 1E3 for x in dt) # speeds per image
  184. LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
  185. if save_txt or save_img:
  186. s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
  187. LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
  188. if update:
  189. strip_optimizer(weights) # update model (to fix SourceChangeWarning)
  190. def parse_opt():
  191. parser = argparse.ArgumentParser()
  192. parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'VOC2007_wm/finetune_pruned/exp/weights/best.pt', help='model path(s)')
  193. parser.add_argument('--source', type=str, default=ROOT / 'datasets/VOC2007_wm/images', help='file/dir/URL/glob, 0 for webcam')
  194. parser.add_argument('--data', type=str, default=ROOT / 'data/VOC.yaml', help='(optional) dataset.yaml path')
  195. parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
  196. parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
  197. parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
  198. parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
  199. parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
  200. parser.add_argument('--view-img', action='store_true', help='show results')
  201. parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
  202. parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
  203. parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
  204. parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
  205. parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
  206. parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
  207. parser.add_argument('--augment', action='store_true', help='augmented inference')
  208. parser.add_argument('--visualize', action='store_true', help='visualize features')
  209. parser.add_argument('--update', action='store_true', help='update all models')
  210. parser.add_argument('--project', default=ROOT / 'VOC2007_wm/finetune_pruned/detect', help='save results to project/name')
  211. parser.add_argument('--name', default='exp', help='save results to project/name')
  212. parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
  213. parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
  214. parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
  215. parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
  216. parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
  217. parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
  218. opt = parser.parse_args()
  219. opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
  220. print_args(FILE.stem, opt)
  221. return opt
  222. def main(opt):
  223. check_requirements(exclude=('tensorboard', 'thop'))
  224. run(**vars(opt))
  225. if __name__ == "__main__":
  226. opt = parse_opt()
  227. main(opt)