123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143 |
- import onnxruntime
- import numpy as np
- import cv2
- def letterbox(im, new_shape=(640, 640), color=(114, 114, 114)):
- """Resize image and pad to meet stride multiple."""
- shape = im.shape[:2] # current shape [height, width]
- if isinstance(new_shape, int):
- new_shape = (new_shape, new_shape)
- r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
- new_unpad = (int(round(shape[1] * r)), int(round(shape[0] * r)))
- dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
- dw /= 2
- dh /= 2
- im_resized = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
- top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
- left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
- im_padded = cv2.copyMakeBorder(im_resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
- return im_padded, r, (dw, dh)
- def preprocess(image_path, input_shape=(640, 640)):
- img0 = cv2.imread(image_path)
- assert img0 is not None, f"Image not found: {image_path}"
- img, ratio, (dw, dh) = letterbox(img0, new_shape=input_shape)
- img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3xHxW
- img = np.ascontiguousarray(img, dtype=np.float32) / 255.0 # normalize to 0-1
- return img0, img, ratio, dw, dh
- def xywh2xyxy(x):
- y = np.zeros_like(x)
- y[:, 0] = x[:, 0] - x[:, 2] / 2 # x1
- y[:, 1] = x[:, 1] - x[:, 3] / 2 # y1
- y[:, 2] = x[:, 0] + x[:, 2] / 2 # x2
- y[:, 3] = x[:, 1] + x[:, 3] / 2 # y2
- return y
- def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45):
- """Performs Non-Maximum Suppression on inference results."""
- # Adapted from https://github.com/ultralytics/yolov5/blob/master/utils/general.py
- boxes = prediction[:, :4]
- scores = prediction[:, 4] * prediction[:, 5:].max(axis=1)
- classes = prediction[:, 5:].argmax(axis=1)
- # Filter by confidence threshold
- mask = scores > conf_thres
- boxes = boxes[mask]
- scores = scores[mask]
- classes = classes[mask]
- if boxes.shape[0] == 0:
- return np.empty((0, 6))
- # Convert boxes to x1,y1,x2,y2
- boxes = xywh2xyxy(boxes)
- # Compute areas
- areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
- order = scores.argsort()[::-1]
- keep = []
- while order.size > 0:
- i = order[0]
- keep.append(i)
- xx1 = np.maximum(boxes[i, 0], boxes[order[1:], 0])
- yy1 = np.maximum(boxes[i, 1], boxes[order[1:], 1])
- xx2 = np.minimum(boxes[i, 2], boxes[order[1:], 2])
- yy2 = np.minimum(boxes[i, 3], boxes[order[1:], 3])
- w = np.maximum(0.0, xx2 - xx1)
- h = np.maximum(0.0, yy2 - yy1)
- inter = w * h
- ovr = inter / (areas[i] + areas[order[1:]] - inter)
- inds = np.where(ovr <= iou_thres)[0]
- order = order[inds + 1]
- return np.concatenate([
- boxes[keep],
- scores[keep, None],
- classes[keep, None].astype(np.float32)
- ], axis=1)
- def infer_onnx(onnx_path, image_path, input_size=(640, 640), conf_thres=0.25, iou_thres=0.45):
- # 1. 预处理
- img0, img, ratio, dw, dh = preprocess(image_path, input_size)
- img_input = np.expand_dims(img, axis=0) # batch size 1
- # 2. 加载 ONNX 模型
- session = onnxruntime.InferenceSession(onnx_path, providers=['CPUExecutionProvider'])
- input_name = session.get_inputs()[0].name
- outputs = session.run(None, {input_name: img_input})
- pred = outputs[0] # shape (1, N, 85) for COCO 80 classes + 5
- # 3. NMS 处理
- dets = non_max_suppression(pred[0], conf_thres, iou_thres)
- print("dets:", dets) # NMS后的最终框
- print("ratio, dw, dh:", ratio, dw, dh) # 预处理返回的缩放和偏移
- # 4. 恢复坐标到原图
- if dets.shape[0]:
- dets[:, [0, 2]] -= dw # x padding
- dets[:, [1, 3]] -= dh # y padding
- dets[:, :4] /= ratio
- CLASS_NAMES = ['0', '1', '2', 'B2']
- # 5. 打印结果
- for *box, conf, cls in dets:
- print(f"Raw class index from model: {cls}")
- x1, y1, x2, y2 = map(int, box)
- class_id = int(cls)
- class_name = CLASS_NAMES[class_id] if class_id < len(CLASS_NAMES) else str(class_id)
- label = f"{class_name} {conf:.2f}"
- print(f"Class: {class_name}, Conf: {conf:.2f}, Box: [{x1}, {y1}, {x2}, {y2}]")
- cv2.rectangle(img0, (x1, y1), (x2, y2), (0, 255, 0), 2)
- cv2.putText(img0, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
- # 6. 显示图片(可注释掉)
- cv2.imshow("result", img0)
- cv2.waitKey(0)
- cv2.destroyAllWindows()
- return dets
- if __name__ == "__main__":
- import sys
- if len(sys.argv) != 3:
- print("Usage: python yolov5_onnx_infer.py model.onnx image.jpg")
- exit(1)
- onnx_path = sys.argv[1]
- image_path = sys.argv[2]
- infer_onnx(onnx_path, image_path)
|