Sfoglia il codice sorgente

减少白盒水印日志打印、新增性能测试脚本

liyan 9 mesi fa
parent
commit
8b76c787ec

+ 175 - 0
tests/performance_loss_test.py

@@ -0,0 +1,175 @@
+"""
+测试性能损失脚本,通过比较推理过程中CPU、GPU占用、推理时间来进行计算
+需要安装指定python库实现功能
+pip install psutil gputil pynvml
+"""
+import argparse
+import os
+
+import psutil
+import GPUtil
+import numpy as np
+import time
+from threading import Thread
+import onnxruntime as ort
+from PIL import Image
+
+
+# 定义监控函数
+class UsageMonitor:
+    def __init__(self, interval=0.5):
+        self.interval = interval
+        self.cpu_usage = []
+        self.gpu_usage = []
+        self.running = False
+
+    def start(self):
+        self.running = True
+        self.monitor_thread = Thread(target=self._monitor)
+        self.monitor_thread.start()
+
+    def _monitor(self):
+        while self.running:
+            # 记录 CPU 使用率
+            self.cpu_usage.append(psutil.cpu_percent(interval=None))
+
+            # 记录 GPU 使用率
+            gpus = GPUtil.getGPUs()
+            if gpus:
+                self.gpu_usage.append(gpus[0].load * 100)  # 获取第一个 GPU 的使用率
+            else:
+                self.gpu_usage.append(0)  # 若没有 GPU 则记为 0
+
+            time.sleep(self.interval)
+
+    def stop(self):
+        self.running = False
+        self.monitor_thread.join()
+
+    def get_average_usage(self):
+        avg_cpu_usage = np.mean(self.cpu_usage)
+        avg_gpu_usage = np.mean(self.gpu_usage)
+        return avg_cpu_usage, avg_gpu_usage
+
+
+def process_image(image_path):
+    # 打开图像并转换为RGB
+    image = Image.open(image_path).convert("RGB")
+
+    # 调整图像大小
+    image = image.resize((224, 224))
+
+    # 转换为numpy数组并归一化
+    image_array = np.array(image) / 255.0  # 将像素值缩放到[0, 1]
+
+    # 进行标准化
+    mean = np.array([0.485, 0.456, 0.406])
+    std = np.array([0.229, 0.224, 0.225])
+    image_array = (image_array - mean) / std
+    image_array = image_array.transpose((2, 0, 1)).copy()
+
+    return image_array.astype(np.float32)
+
+
+def batch_predict_images(session, image_dir, target_class, batch_size=10):
+    """
+    对指定图片文件夹图片进行批量检测
+    :param session: onnx runtime session
+    :param image_dir: 待推理的图像文件夹
+    :param target_class: 目标分类
+    :param batch_size: 每批图片数量
+    :return: 检测结果
+    """
+    image_files = [f for f in os.listdir(image_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
+    results = {}
+    input_name = session.get_inputs()[0].name
+    correct_predictions = 0
+    total_predictions = 0
+
+    for i in range(0, len(image_files), batch_size):
+        batch_files = image_files[i:i + batch_size]
+        batch_images = []
+
+        for image_file in batch_files:
+            image_path = os.path.join(image_dir, image_file)
+            image = process_image(image_path)
+            batch_images.append(image)
+
+        # 将批次图片堆叠成 (batch_size, 3, 224, 224) 维度
+        batch_images = np.stack(batch_images)
+
+        # 执行预测
+        outputs = session.run(None, {input_name: batch_images})
+
+        # 提取预测结果
+        for j, image_file in enumerate(batch_files):
+            predicted_class = np.argmax(outputs[0][j])  # 假设输出是每类的概率
+            results[image_file] = predicted_class
+            total_predictions += 1
+
+            # 比较预测结果与目标分类
+            if predicted_class == target_class:
+                correct_predictions += 1
+
+    # 计算准确率
+    accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
+    return accuracy
+
+
+# 模型推理函数
+def model_inference(model_filename, val_dataset_dir):
+    """
+    模型推理
+    :param model_filename: 模型文件
+    :param val_dataset_dir: 验证集图片目录
+    :return: 验证集推理准确率
+    """
+    # if ort.get_available_providers():
+    #     session = ort.InferenceSession(model_filename, providers=['CUDAExecutionProvider'])
+    # else:
+    #     session = ort.InferenceSession(model_filename)
+    session = ort.InferenceSession(model_filename)
+    accuracy = 0
+    class_num = 0
+    index = 0
+    for class_dir in os.listdir(val_dataset_dir):
+        class_path = os.path.join(val_dataset_dir, class_dir)
+        # 检查是否为目录
+        if not os.path.isdir(class_path):
+            continue
+        class_num += 1
+        batch_result = batch_predict_images(session, class_path, index)
+        accuracy += batch_result
+        index += 1
+    print(f"class_num: {class_num}, index: {index}")
+    return accuracy * 1.0 / class_num
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='模型推理性能验证脚本')
+    # parser.add_argument('--model_file', default=None, type=str, help='待测试模型的onnx文件')
+    # parser.add_argument('--val_dataset_dir', default=None, type=str, help='验证集目录')
+    parser.add_argument('--model_file', default="test_models/googlenet/model_139.onnx", type=str, help='待测试模型的onnx文件')
+    parser.add_argument('--val_dataset_dir', default="val", type=str, help='验证集目录')
+    args, _ = parser.parse_known_args()
+    if args.model_file is None:
+        raise Exception("待测试模型的onnx文件不可为空")
+    if args.val_dataset_dir is None:
+        raise Exception("验证集目录不可为空")
+
+    monitor = UsageMonitor(interval=0.5)  # 每隔 0.5 秒采样一次
+    monitor.start()
+    # 记录推理开始时间
+    start_time = time.time()
+    # 进行模型推理
+    accuracy = model_inference(args.model_file, args.val_dataset_dir)
+    # 记录推理结束时间
+    end_time = time.time()
+    monitor.stop()
+    # 输出平均 CPU 和 GPU 使用率
+    avg_cpu, avg_gpu = monitor.get_average_usage()
+
+    print(f"平均 CPU 使用率:{avg_cpu:.2f}%")
+    print(f"平均 GPU 使用率:{avg_gpu:.2f}%")
+    print(f"模型推理时间: {end_time - start_time:.2f} 秒")
+    print(f"准确率: {accuracy * 100:.2f}%")

+ 29 - 9
tests/prune_tool.py

@@ -2,12 +2,14 @@
 对onnx权重文件进行规则剪枝
 """
 import argparse
+import os
 
 import onnx
 import numpy as np
 from onnx import numpy_helper
 
-def prune_weights(model, pruned_model, pruning_percentage=0.05):
+def prune_weights(model_path, pruned_model, pruning_percentage=0.05):
+    model = onnx.load(model_path)
     # 获取所有权重的初始化器
     weight_initializers = [init for init in model.graph.initializer if
                            init.name in {node.input[1] for node in model.graph.node if node.op_type == 'Conv'}]
@@ -46,14 +48,32 @@ def prune_weights(model, pruned_model, pruning_percentage=0.05):
     onnx.save(new_model, pruned_model)
 
 
+def find_onnx_files(root_dir):
+    onnx_files = []
+    # 遍历根目录及其子目录
+    for dirpath, _, filenames in os.walk(root_dir):
+        # 查找所有以 .onnx 结尾的文件
+        for filename in filenames:
+            if filename.endswith('.onnx'):
+                # 获取完整路径并添加到列表
+                onnx_files.append(os.path.join(dirpath, filename))
+    return onnx_files
+
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='模型文件剪枝工具')
-    parser.add_argument('--model', default=None, type=str, help='待剪枝的模型文件位置,仅支持onnx格式')
-    parser.add_argument('--pruned_model', default=None, type=str, help='剪枝模型文件保存位置')
-    parser.add_argument('--percent', default=0.01, type=float, help='规则剪枝百分比')
+    # parser.add_argument('--target_dir', default=None, type=str, help='待剪枝的模型文件存放根目录,支持子文件夹递归处理')
+    parser.add_argument('--target_dir', default="test_models/googlenet", type=str, help='待剪枝的模型文件存放根目录,支持子文件夹递归处理')
+    parser.add_argument('--pruned_saved_dir', default=None, type=str, help='剪枝模型文件保存目录,默认为None,表示与原始onnx权重文件放在同一目录下')
+    parser.add_argument('--percent', default=0.05, type=float, help='规则剪枝百分比')
     args, _ = parser.parse_known_args()
-    if args.model is None:
-        raise Exception("模型权重文件位置不可为空")
-    if args.pruned_model is None:
-        raise Exception("模型剪枝权重保存位置不可为空")
-    prune_weights(model=args.model, pruned_model=args.pruned_model, pruning_percentage=args.percent)
+    if args.target_dir is None:
+        raise Exception("模型目录参数不可为空")
+
+    onnx_files = find_onnx_files(args.target_dir)
+    for onnx_file in onnx_files:
+        if args.pruned_saved_dir:
+            pruned_file = args.pruned_saved_dir + '/' + os.path.basename(onnx_file).replace('.onnx', '_pruned.onnx')
+        else:
+            pruned_file = onnx_file.replace('.onnx', '_pruned.onnx')
+        prune_weights(model_path=onnx_file, pruned_model=pruned_file, pruning_percentage=args.percent)

+ 81 - 0
tests/verify_tool_accuracy_test.py

@@ -0,0 +1,81 @@
+import argparse
+import os
+
+from watermark_verify import verify_tool
+
+model_types = {
+    "classification": [
+        "alexnet", "googlenet", "resnet", "vgg16"
+    ],
+    "object_detection": [
+        "ssd", "yolox", "rcnn"
+    ],
+}
+
+def find_onnx_files(root_dir):
+    onnx_files = []
+    # 遍历根目录及其子目录
+    for dirpath, _, filenames in os.walk(root_dir):
+        # 查找所有以 .onnx 结尾的文件
+        for filename in filenames:
+            if filename.endswith('.onnx'):
+                # 获取完整路径并添加到列表
+                onnx_files.append(os.path.join(dirpath, filename))
+    return onnx_files
+
+def filter_model_dirs(model_dir, targets):
+    for target in targets:
+        if target in model_dir:
+            return True
+    return False
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='模型标签验证准确率验证脚本')
+    # parser.add_argument('--target_dir', default="origin_models", type=str, help='模型文件存放根目录,支持子文件夹递归处理')
+    # parser.add_argument('--model_type', default=None, type=str, help='按照模型分类过滤,用于区分是目标检测模型还是图像分类模型,可选参数:classification、objection_detect')
+    # parser.add_argument('--model_value', default=None, type=str, help='按照模型名称过滤,可选参数:alexnet、googlenet、resnet、vgg16、ssd、yolox、rcnn')
+    # parser.add_argument('--model_file_filter', default=None, type=str, help='按照模型文件名过滤, 比如剪枝模型文件名存在prune。默认为None')
+    # parser.add_argument('--except_result', default=None, type=str, help='模型推理预期结果。默认为None')
+
+    parser.add_argument('--model_type', default="classification", type=str, help='按照模型分类过滤,用于区分是目标检测模型还是图像分类模型,可选参数:classification、objection_detect')
+    parser.add_argument('--model_value', default="googlenet", type=str, help='按照模型名称过滤,可选参数:alexnet、googlenet、resnet、vgg16、ssd、yolox、rcnn')
+    parser.add_argument('--target_dir', default="test_models", type=str,
+                        help='模型文件存放根目录,支持子文件夹递归处理')
+    parser.add_argument('--except_result', default="True", type=str, help='模型推理预期结果。默认为None')
+    parser.add_argument('--model_file_filter', default="pruned", type=str,
+                        help='按照模型文件名过滤, 比如剪枝模型文件名存在pruned。默认为None')
+
+    args, _ = parser.parse_known_args()
+    if args.target_dir is None:
+        raise Exception("模型目录参数不可为空")
+    if args.model_type is None:
+        raise Exception("模型类型参数不可为空")
+    if args.except_result is None:
+        raise Exception("模型推理预期结果不可为空")
+
+    # 获取所有模型目录信息
+    model_dirs = [item for item in os.listdir(args.target_dir) if os.path.isdir(os.path.join(args.target_dir, item))]
+    if args.model_type:
+        filter_models = model_types[args.model_type]
+        model_dirs = [item for item in model_dirs if filter_model_dirs(item, filter_models)]
+    if args.model_value:
+        model_dirs = [item for item in model_dirs if args.model_value.lower() in item.lower()]
+
+    # 遍历符合条件的模型目录列表,进行标签提取检测,并记录准确率
+    for model_dir in model_dirs:
+        total = 0
+        correct = 0
+        onnx_files = find_onnx_files(os.path.join(args.target_dir, model_dir))
+        onnx_files = [os.path.abspath(item) for item in onnx_files]
+        if args.model_file_filter:
+            onnx_files = [item for item in onnx_files if args.model_file_filter in item]
+        else:
+            onnx_files = [item for item in onnx_files if "pruned" not in item]
+        print(f"model_name: {model_dir}\nonnx_files:")
+        print(*onnx_files, sep='\n')
+        for onnx_file in onnx_files:
+            verify_result = verify_tool.label_verification(onnx_file)
+            total += 1
+            if str(verify_result) == args.except_result:
+                correct += 1
+        print(f"model_name: {model_dir}, accuracy: {correct * 100.0 / total}%")

+ 3 - 3
watermark_verify/verify_tool.py

@@ -43,13 +43,13 @@ def label_verification(model_filename: str) -> bool:
                 if initializer.name == weight_name:
                     # 获取权重数据
                     weights.append(onnx.numpy_helper.to_array(initializer))
-                    logger.debug(f"Weight shape for {node.name}: {weights[len(weights) - 1].shape}")
+                    # logger.debug(f"Weight shape for {node.name}: {weights[len(weights) - 1].shape}")
     weights = weights[3:7]
     weights = [np.transpose(weight, (2, 3, 1, 0)) for weight in weights]  # 将onnx文件的权重格式由(out_channels, in_channels, kernel_height, kernel_width),转换为(kernel_height, kernel_width, in_channels, out_channels)
     x_random = np.load(x_random_file)
     # 打印权重和投影矩阵
-    logger.debug(f"Weights: {weights}\n")
-    logger.debug(f"x_ramdom.shape = {x_random.shape}\nx_random: {x_random}\n")
+    # logger.debug(f"Weights: {weights}\n")
+    # logger.debug(f"x_ramdom.shape = {x_random.shape}\nx_random: {x_random}\n")
     # 计算嵌入的白盒水印
     w = flatten_parameters(weights)
     prob = get_prob(x_random, w)