import onnxruntime import numpy as np import cv2 def letterbox(im, new_shape=(640, 640), color=(114, 114, 114)): """Resize image and pad to meet stride multiple.""" shape = im.shape[:2] # current shape [height, width] if isinstance(new_shape, int): new_shape = (new_shape, new_shape) r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) new_unpad = (int(round(shape[1] * r)), int(round(shape[0] * r))) dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] dw /= 2 dh /= 2 im_resized = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) im_padded = cv2.copyMakeBorder(im_resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) return im_padded, r, (dw, dh) def preprocess(image_path, input_shape=(640, 640)): img0 = cv2.imread(image_path) assert img0 is not None, f"Image not found: {image_path}" img, ratio, (dw, dh) = letterbox(img0, new_shape=input_shape) img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3xHxW img = np.ascontiguousarray(img, dtype=np.float32) / 255.0 # normalize to 0-1 return img0, img, ratio, dw, dh def xywh2xyxy(x): y = np.zeros_like(x) y[:, 0] = x[:, 0] - x[:, 2] / 2 # x1 y[:, 1] = x[:, 1] - x[:, 3] / 2 # y1 y[:, 2] = x[:, 0] + x[:, 2] / 2 # x2 y[:, 3] = x[:, 1] + x[:, 3] / 2 # y2 return y def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45): """Performs Non-Maximum Suppression on inference results.""" # Adapted from https://github.com/ultralytics/yolov5/blob/master/utils/general.py boxes = prediction[:, :4] scores = prediction[:, 4] * prediction[:, 5:].max(axis=1) classes = prediction[:, 5:].argmax(axis=1) # Filter by confidence threshold mask = scores > conf_thres boxes = boxes[mask] scores = scores[mask] classes = classes[mask] if boxes.shape[0] == 0: return np.empty((0, 6)) # Convert boxes to x1,y1,x2,y2 boxes = xywh2xyxy(boxes) # Compute areas areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) xx1 = np.maximum(boxes[i, 0], boxes[order[1:], 0]) yy1 = np.maximum(boxes[i, 1], boxes[order[1:], 1]) xx2 = np.minimum(boxes[i, 2], boxes[order[1:], 2]) yy2 = np.minimum(boxes[i, 3], boxes[order[1:], 3]) w = np.maximum(0.0, xx2 - xx1) h = np.maximum(0.0, yy2 - yy1) inter = w * h ovr = inter / (areas[i] + areas[order[1:]] - inter) inds = np.where(ovr <= iou_thres)[0] order = order[inds + 1] return np.concatenate([ boxes[keep], scores[keep, None], classes[keep, None].astype(np.float32) ], axis=1) def infer_onnx(onnx_path, image_path, input_size=(640, 640), conf_thres=0.25, iou_thres=0.45): # 1. 预处理 img0, img, ratio, dw, dh = preprocess(image_path, input_size) img_input = np.expand_dims(img, axis=0) # batch size 1 # 2. 加载 ONNX 模型 session = onnxruntime.InferenceSession(onnx_path, providers=['CPUExecutionProvider']) input_name = session.get_inputs()[0].name outputs = session.run(None, {input_name: img_input}) pred = outputs[0] # shape (1, N, 85) for COCO 80 classes + 5 # 3. NMS 处理 dets = non_max_suppression(pred[0], conf_thres, iou_thres) print("dets:", dets) # NMS后的最终框 print("ratio, dw, dh:", ratio, dw, dh) # 预处理返回的缩放和偏移 # 4. 恢复坐标到原图 if dets.shape[0]: dets[:, [0, 2]] -= dw # x padding dets[:, [1, 3]] -= dh # y padding dets[:, :4] /= ratio CLASS_NAMES = ['0', '1', '2', 'B2'] # 5. 打印结果 for *box, conf, cls in dets: print(f"Raw class index from model: {cls}") x1, y1, x2, y2 = map(int, box) class_id = int(cls) class_name = CLASS_NAMES[class_id] if class_id < len(CLASS_NAMES) else str(class_id) label = f"{class_name} {conf:.2f}" print(f"Class: {class_name}, Conf: {conf:.2f}, Box: [{x1}, {y1}, {x2}, {y2}]") cv2.rectangle(img0, (x1, y1), (x2, y2), (0, 255, 0), 2) cv2.putText(img0, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) # 6. 显示图片(可注释掉) cv2.imshow("result", img0) cv2.waitKey(0) cv2.destroyAllWindows() return dets if __name__ == "__main__": import sys if len(sys.argv) != 3: print("Usage: python yolov5_onnx_infer.py model.onnx image.jpg") exit(1) onnx_path = sys.argv[1] image_path = sys.argv[2] infer_onnx(onnx_path, image_path)