rcnn_inference.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. """
  2. 定义Faster-RCNN推理流程
  3. """
  4. import numpy as np
  5. from mindx.sdk import Tensor # mxVision 中的 Tensor 数据结构
  6. from mindx.sdk import base # mxVision 推理接口
  7. from PIL import Image
  8. from watermark_verify.utils.utils_bbox import DecodeBox
  9. class FasterRCNNInference:
  10. def __init__(self, model_path, input_size=(600, 600), num_classes=20, num_iou=0.3, confidence=0.5, swap=(2, 0, 1)):
  11. """
  12. 初始化Faster-RCNN模型推理流程
  13. :param model_path: 图像分类模型om文件路径
  14. :param input_size: 模型输入大小
  15. :param num_classes: 模型目标检测分类数
  16. :param num_iou: iou阈值
  17. :param confidence: 置信度阈值
  18. :param swap: 变换方式,pytorch需要进行轴变换(默认参数),tensorflow无需进行轴变换
  19. """
  20. self.model_path = model_path
  21. self.input_size = input_size
  22. self.swap = swap
  23. self.num_classes = num_classes
  24. self.nms_iou = num_iou
  25. self.confidence = confidence
  26. def input_processing(self, image_path):
  27. """
  28. 对输入图片进行预处理
  29. :param image_path: 图片路径
  30. :return: 图片经过处理完成的ndarray
  31. """
  32. image = Image.open(image_path)
  33. image_shape = np.array(np.shape(image)[0:2])
  34. # ---------------------------------------------------------#
  35. # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
  36. # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
  37. # ---------------------------------------------------------#
  38. if not (len(np.shape(image)) == 3 and np.shape(image)[2] == 3):
  39. image = image.convert('RGB')
  40. image_data = resize_image(image, self.input_size, False)
  41. image_data = np.array(image_data, dtype='float32')
  42. image_data = image_data / 255.0
  43. image_data = np.expand_dims(np.transpose(image_data, self.swap).copy(), 0)
  44. image_data = image_data.astype('float32')
  45. return image_data, image_shape
  46. def predict(self, image_path):
  47. """
  48. 对单张图片进行推理
  49. :param image_path: 图片路径
  50. :return: 推理结果
  51. """
  52. image_data, image_shape = self.input_processing(image_path)
  53. # 使用mindx框架进行om权重文件推理
  54. base.mx_init()
  55. model = base.model(modelPath=self.model_path) # 初始化 base.model 类
  56. if model is None:
  57. raise Exception("模型导入失败!请检查model_path和device_id.")
  58. # 确保img_tensor是正确的输入格式
  59. input_tensors = Tensor(image_data) # 将numpy转为转为Tensor类
  60. scale = Tensor(1.0)
  61. outputs = model.infer([input_tensors, scale]) # 执行推理
  62. output = []
  63. for item in outputs:
  64. item.to_host() # 将Tensor数据转移到内存
  65. item = np.array(item)
  66. if item.size == 0:
  67. return False
  68. output.append(item)
  69. output = self.output_processing(output, image_shape)
  70. return output
  71. def output_processing(self, outputs, image_shape):
  72. """
  73. 对模型输出进行后处理工作
  74. :param outputs: 模型原始输出
  75. :param image_shape: 原始图像大小
  76. :return: 经过处理完成的模型输出
  77. """
  78. # 处理模型预测输出
  79. roi_cls_locs, roi_scores, rois, _ = outputs
  80. bbox_util = DecodeBox(self.num_classes)
  81. results = bbox_util.forward(roi_cls_locs, roi_scores, rois, image_shape, self.input_size,
  82. nms_iou=self.nms_iou, confidence=self.confidence)
  83. return results
  84. def resize_image(image, size, letterbox_image):
  85. iw, ih = image.size
  86. w, h = size
  87. if letterbox_image:
  88. scale = min(w / iw, h / ih)
  89. nw = int(iw * scale)
  90. nh = int(ih * scale)
  91. image = image.resize((nw, nh), Image.BICUBIC)
  92. new_image = Image.new('RGB', size, (128, 128, 128))
  93. new_image.paste(image, ((w - nw) // 2, (h - nh) // 2))
  94. else:
  95. new_image = image.resize((w, h), Image.BICUBIC)
  96. return new_image