import onnxruntime
import numpy as np
import cv2

def letterbox(im, new_shape=(640, 640), color=(114, 114, 114)):
    """Resize image and pad to meet stride multiple."""
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    new_unpad = (int(round(shape[1] * r)), int(round(shape[0] * r)))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
    dw /= 2
    dh /= 2

    im_resized = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im_padded = cv2.copyMakeBorder(im_resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)

    return im_padded, r, (dw, dh)

def preprocess(image_path, input_shape=(640, 640)):
    img0 = cv2.imread(image_path)
    assert img0 is not None, f"Image not found: {image_path}"

    img, ratio, (dw, dh) = letterbox(img0, new_shape=input_shape)
    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3xHxW
    img = np.ascontiguousarray(img, dtype=np.float32) / 255.0  # normalize to 0-1

    return img0, img, ratio, dw, dh

def xywh2xyxy(x):
    y = np.zeros_like(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # x1
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # y1
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # x2
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # y2
    return y

def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45):
    """Performs Non-Maximum Suppression on inference results."""
    # Adapted from https://github.com/ultralytics/yolov5/blob/master/utils/general.py

    boxes = prediction[:, :4]
    scores = prediction[:, 4] * prediction[:, 5:].max(axis=1)
    classes = prediction[:, 5:].argmax(axis=1)

    # Filter by confidence threshold
    mask = scores > conf_thres
    boxes = boxes[mask]
    scores = scores[mask]
    classes = classes[mask]

    if boxes.shape[0] == 0:
        return np.empty((0, 6))

    # Convert boxes to x1,y1,x2,y2
    boxes = xywh2xyxy(boxes)


    # Compute areas
    areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        xx1 = np.maximum(boxes[i, 0], boxes[order[1:], 0])
        yy1 = np.maximum(boxes[i, 1], boxes[order[1:], 1])
        xx2 = np.minimum(boxes[i, 2], boxes[order[1:], 2])
        yy2 = np.minimum(boxes[i, 3], boxes[order[1:], 3])

        w = np.maximum(0.0, xx2 - xx1)
        h = np.maximum(0.0, yy2 - yy1)
        inter = w * h
        ovr = inter / (areas[i] + areas[order[1:]] - inter)

        inds = np.where(ovr <= iou_thres)[0]
        order = order[inds + 1]

    return np.concatenate([
        boxes[keep],
        scores[keep, None],
        classes[keep, None].astype(np.float32)
    ], axis=1)

def infer_onnx(onnx_path, image_path, input_size=(640, 640), conf_thres=0.25, iou_thres=0.45):
    # 1. 预处理
    img0, img, ratio, dw, dh = preprocess(image_path, input_size)
    img_input = np.expand_dims(img, axis=0)  # batch size 1

    # 2. 加载 ONNX 模型
    session = onnxruntime.InferenceSession(onnx_path, providers=['CPUExecutionProvider'])

    input_name = session.get_inputs()[0].name
    outputs = session.run(None, {input_name: img_input})

    pred = outputs[0]  # shape (1, N, 85) for COCO 80 classes + 5

    # 3. NMS 处理
    dets = non_max_suppression(pred[0], conf_thres, iou_thres)

    print("dets:", dets)           # NMS后的最终框
    print("ratio, dw, dh:", ratio, dw, dh)  # 预处理返回的缩放和偏移

    # 4. 恢复坐标到原图
    if dets.shape[0]:
        dets[:, [0, 2]] -= dw  # x padding
        dets[:, [1, 3]] -= dh  # y padding
        dets[:, :4] /= ratio

    CLASS_NAMES = ['0', '1', '2', 'B2']

    # 5. 打印结果
    for *box, conf, cls in dets:
        print(f"Raw class index from model: {cls}")
        x1, y1, x2, y2 = map(int, box)
        class_id = int(cls)
        class_name = CLASS_NAMES[class_id] if class_id < len(CLASS_NAMES) else str(class_id)
        label = f"{class_name} {conf:.2f}"
        print(f"Class: {class_name}, Conf: {conf:.2f}, Box: [{x1}, {y1}, {x2}, {y2}]")
        cv2.rectangle(img0, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(img0, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    # 6. 显示图片（可注释掉）
    cv2.imshow("result", img0)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    return dets

if __name__ == "__main__":
    import sys
    if len(sys.argv) != 3:
        print("Usage: python yolov5_onnx_infer.py model.onnx image.jpg")
        exit(1)

    onnx_path = sys.argv[1]
    image_path = sys.argv[2]
    infer_onnx(onnx_path, image_path)