Browse Source

初始化模型水印检测代码,完成基于pytorch、keras、tensorflow框架的图像分类模型白盒水印、黑盒水印检测

liyan 4 months ago
parent
commit
92be7bcb51

+ 1 - 1
tests/verify_tool_test.py

@@ -4,6 +4,6 @@
 from watermark_verify import verify_tool
 
 if __name__ == '__main__':
-    model_filename = "/mnt/d/WorkSpace/PyCharmGitWorkspace/model_watermark_verify/tests/models/origin/googlenet/googlenet.onnx"
+    model_filename = "/mnt/e/DevTools/workspace/model_watermark_detect/tests/models/blackbox/alexnet/alexnet.onnx"
     verify_result = verify_tool.label_verification(model_filename)
     print(f"verify_result: {verify_result}")

+ 0 - 0
watermark_verify/process/__init__.py


+ 17 - 0
watermark_verify/process/classification_all_whitebox_process.py

@@ -0,0 +1,17 @@
+"""
+AlexNet、VGG16、ResNet基于pytorch、tensorflow、keras框架的白盒水印处理验证流程
+"""
+from watermark_verify.process.general_process_define import WhiteBoxWatermarkProcessDefine
+
+
+class ClassificationProcess(WhiteBoxWatermarkProcessDefine):
+    def __init__(self, model_filename):
+        super(ClassificationProcess, self).__init__(model_filename)
+
+    def process(self) -> bool:
+        """
+        根据流程定义进行处理,并返回模型标签验证结果
+        :return: 模型标签验证结果
+        """
+        verify_result = self.verify_label()  # 模型标签检测通过,进行标签验证
+        return verify_result

+ 106 - 0
watermark_verify/process/classification_pytorch_blackbox_process.py

@@ -0,0 +1,106 @@
+"""
+AlexNet、VGG16、GoogleNet、ResNet基于pytorch框架的黑盒水印处理验证流程
+"""
+import os
+
+import numpy as np
+from PIL import Image
+
+from watermark_verify import logger
+from watermark_verify.process.general_process_define import BlackBoxWatermarkProcessDefine
+import onnxruntime as ort
+
+
+class ClassificationProcess(BlackBoxWatermarkProcessDefine):
+    def __init__(self, model_filename):
+        super(ClassificationProcess, self).__init__(model_filename)
+
+    def process(self) -> bool:
+        """
+        根据流程定义进行处理,并返回模型标签验证结果
+        :return: 模型标签验证结果
+        """
+        # 获取权重文件,使用触发集批量进行模型推理, 如果某个批次的准确率大于阈值,则比对成功进行下一步,否则返回False
+        for i in range(0, 2):
+            image_dir = os.path.join(self.trigger_dir, 'images', str(i))
+            if not os.path.exists(image_dir):
+                logger.error(f"指定触发集图片路径不存在, image_dir={image_dir}")
+                return False
+            detect_result = self.detect_secret_label(image_dir, i)
+            if not detect_result:
+                return False
+        verify_result = self.verify_label()  # 模型标签检测通过,进行标签验证
+        return verify_result
+
+    def preprocess_image(self, image_path):
+        """
+        对输入图片进行预处理
+        :param image_path: 图片路径
+        :return: 图片经过处理完成的ndarray
+        """
+        # 打开图像并转换为RGB
+        image = Image.open(image_path).convert("RGB")
+
+        # 调整图像大小
+        image = image.resize((224, 224))
+
+        # 转换为numpy数组并归一化
+        image_array = np.array(image) / 255.0  # 将像素值缩放到[0, 1]
+
+        # 进行标准化
+        mean = np.array([0.485, 0.456, 0.406])
+        std = np.array([0.229, 0.224, 0.225])
+        image_array = (image_array - mean) / std
+        image_array = image_array.transpose((2, 0, 1)).copy()
+
+        return image_array.astype(np.float32)
+
+    def detect_secret_label(self, image_dir, target_class, threshold=0.6, batch_size=10):
+        """
+        对模型使用触发集进行检查,判断是否存在黑盒模型水印,如果对嵌入水印的图片样本正确率高于阈值,证明模型存在黑盒水印
+        :param transpose: 是否对输出ndarray进行维度转换,pytorch无需转换,tensorflow、keras需要转换
+        :param image_dir: 待推理的图像文件夹
+        :param target_class: 目标分类
+        :param threshold: 通过测试阈值
+        :param batch_size: 每批图片数量
+        :return: 检测结果
+        """
+        session = ort.InferenceSession(self.model_filename)  # 加载 ONNX 模型
+        image_files = [f for f in os.listdir(image_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
+        results = {}
+        input_name = session.get_inputs()[0].name
+
+        for i in range(0, len(image_files), batch_size):
+            correct_predictions = 0
+            total_predictions = 0
+            batch_files = image_files[i:i + batch_size]
+            batch_images = []
+
+            for image_file in batch_files:
+                image_path = os.path.join(image_dir, image_file)
+                image = self.preprocess_image(image_path)
+                batch_images.append(image)
+
+            # 将批次图片堆叠成 (batch_size, 3, 224, 224) 维度
+            batch_images = np.stack(batch_images)
+
+            # 执行预测
+            outputs = session.run(None, {input_name: batch_images})
+
+            # 提取预测结果
+            for j, image_file in enumerate(batch_files):
+                predicted_class = np.argmax(outputs[0][j])  # 假设输出是每类的概率
+                results[image_file] = predicted_class
+                total_predictions += 1
+
+                # 比较预测结果与目标分类
+                if predicted_class == target_class:
+                    correct_predictions += 1
+
+            # 计算准确率
+            accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
+            # logger.debug(f"Predicted batch {i // batch_size + 1}, Accuracy: {accuracy * 100:.2f}%")
+            if accuracy >= threshold:
+                logger.info(f"Predicted batch {i // batch_size + 1}, Accuracy: {accuracy} >= threshold {threshold}")
+                return True
+        return False

+ 105 - 0
watermark_verify/process/classification_tensorflow_blackbox_process.py

@@ -0,0 +1,105 @@
+"""
+AlexNet、VGG16、GoogleNet、ResNet基于tensorflow、Keras框架的黑盒水印处理验证流程
+"""
+import os
+
+import numpy as np
+from PIL import Image
+
+from watermark_verify import logger
+from watermark_verify.process.general_process_define import BlackBoxWatermarkProcessDefine
+import onnxruntime as ort
+
+
+class ClassificationProcess(BlackBoxWatermarkProcessDefine):
+    def __init__(self, model_filename):
+        super(ClassificationProcess, self).__init__(model_filename)
+
+    def process(self) -> bool:
+        """
+        根据流程定义进行处理,并返回模型标签验证结果
+        :return: 模型标签验证结果
+        """
+        # 获取权重文件,使用触发集批量进行模型推理, 如果某个批次的准确率大于阈值,则比对成功进行下一步,否则返回False
+        for i in range(0, 2):
+            image_dir = os.path.join(self.trigger_dir, 'images', str(i))
+            if not os.path.exists(image_dir):
+                logger.error(f"指定触发集图片路径不存在, image_dir={image_dir}")
+                return False
+            detect_result = self.detect_secret_label(image_dir, i)
+            if not detect_result:
+                return False
+        verify_result = self.verify_label()  # 模型标签检测通过,进行标签验证
+        return verify_result
+
+    def preprocess_image(self, image_path):
+        """
+        对输入图片进行预处理
+        :param image_path: 图片路径
+        :param transpose: 是否对输出ndarray进行维度转换,pytorch无需转换,tensorflow、keras需要转换
+        :return: 图片经过处理完成的ndarray
+        """
+        # 打开图像并转换为RGB
+        image = Image.open(image_path).convert("RGB")
+
+        # 调整图像大小
+        image = image.resize((224, 224))
+
+        # 转换为numpy数组并归一化
+        image_array = np.array(image) / 255.0  # 将像素值缩放到[0, 1]
+
+        # 进行标准化
+        mean = np.array([0.485, 0.456, 0.406])
+        std = np.array([0.229, 0.224, 0.225])
+        image_array = (image_array - mean) / std
+
+        return image_array.astype(np.float32)
+
+    def detect_secret_label(self, image_dir, target_class, threshold=0.6, batch_size=10):
+        """
+        对模型使用触发集进行检查,判断是否存在黑盒模型水印,如果对嵌入水印的图片样本正确率高于阈值,证明模型存在黑盒水印
+        :param image_dir: 待推理的图像文件夹
+        :param target_class: 目标分类
+        :param threshold: 通过测试阈值
+        :param batch_size: 每批图片数量
+        :return: 检测结果
+        """
+        session = ort.InferenceSession(self.model_filename)  # 加载 ONNX 模型
+        image_files = [f for f in os.listdir(image_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
+        results = {}
+        input_name = session.get_inputs()[0].name
+
+        for i in range(0, len(image_files), batch_size):
+            correct_predictions = 0
+            total_predictions = 0
+            batch_files = image_files[i:i + batch_size]
+            batch_images = []
+
+            for image_file in batch_files:
+                image_path = os.path.join(image_dir, image_file)
+                image = self.preprocess_image(image_path)
+                batch_images.append(image)
+
+            # 将批次图片堆叠成 (batch_size, 3, 224, 224) 维度
+            batch_images = np.stack(batch_images)
+
+            # 执行预测
+            outputs = session.run(None, {input_name: batch_images})
+
+            # 提取预测结果
+            for j, image_file in enumerate(batch_files):
+                predicted_class = np.argmax(outputs[0][j])  # 假设输出是每类的概率
+                results[image_file] = predicted_class
+                total_predictions += 1
+
+                # 比较预测结果与目标分类
+                if predicted_class == target_class:
+                    correct_predictions += 1
+
+            # 计算准确率
+            accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
+            # logger.debug(f"Predicted batch {i // batch_size + 1}, Accuracy: {accuracy * 100:.2f}%")
+            if accuracy >= threshold:
+                logger.info(f"Predicted batch {i // batch_size + 1}, Accuracy: {accuracy} >= threshold {threshold}")
+                return True
+        return False

+ 163 - 0
watermark_verify/process/general_process_define.py

@@ -0,0 +1,163 @@
+"""
+水印通用流程定义
+"""
+import os
+
+from watermark_verify import logger
+from watermark_verify.tools import secret_label_func, parse_qrcode_label_file
+from watermark_verify.tools.qrcode_tool import detect_and_decode_qr_code
+
+
+class WhiteBoxWatermarkProcessDefine:
+    """
+    白盒水印通用处理流程定义
+    """
+
+    def __init__(self, model_filename):
+        """
+        检查必要参数,参数检查成功后,初始化参数
+        """
+        root_dir = os.path.dirname(model_filename)
+        logger.info(f"开始检测模型白盒水印, model_filename: {model_filename}, root_dir: {root_dir}")
+        # 获取签名公钥信息,检查投影矩阵位置
+        public_key_txt = os.path.join(root_dir, 'keys', 'public.key')
+        x_random_file = os.path.join(root_dir, 'keys', 'key.npy')
+        if not os.path.exists(x_random_file):
+            logger.error(f"x_random_file={x_random_file}, 投影矩阵保存文件不存在")
+        if not os.path.exists(public_key_txt):
+            logger.error(f"public_key_txt={public_key_txt}, 签名公钥文件不存在")
+            raise FileExistsError("签名公钥文件不存在")
+        with open(public_key_txt, 'r') as file:
+            public_key = file.read()
+        logger.debug(f"x_random_file={x_random_file}, public_key_txt={public_key_txt}, public_key={public_key}")
+        if not public_key or public_key == '':
+            logger.error(f"获取的签名公钥信息为空, public_key={public_key}")
+            raise RuntimeError("获取的签名公钥信息为空")
+        self.model_filename = model_filename
+        self.x_random_file = x_random_file
+        self.public_key = public_key
+
+    def extract_label(self, start, end):
+        import onnx
+        import numpy as np
+        """
+        标签提取
+        :return: 提取出的密码标签
+        """
+        model = onnx.load(self.model_filename)  # 加载 ONNX 模型
+        graph = model.graph  # 获取模型图(graph)
+        weights = []
+        # 遍历图中的节点
+        for node in graph.node:
+            if node.op_type == "Conv":  # 查找嵌入白盒水印的卷积层节点,卷积层名字可解析onnx文件后查找得到
+                weight_name = node.input[1]  # 通常第一个是输入x、第二个输入是权重w、第三个是偏置b
+                for initializer in graph.initializer:
+                    if initializer.name == weight_name:
+                        # 获取权重数据
+                        weights.append(onnx.numpy_helper.to_array(initializer))
+        weights = weights[start:end]
+        weights = [np.transpose(weight, (2, 3, 1, 0)) for weight in
+                   weights]  # 将onnx文件的权重格式由(out_channels, in_channels, kernel_height, kernel_width),转换为(kernel_height, kernel_width, in_channels, out_channels)
+        x_random = np.load(self.x_random_file)
+        # 计算嵌入的白盒水印
+        w = np.concatenate(
+            [np.mean(x, axis=3).reshape(-1) for x in weights])  # 处理传入的卷积层的权重参数,对卷积核进行按out_channels维度取平均,拉直
+        mm = np.dot(x_random, w.reshape((w.shape[0], 1)))  # 进行矩阵乘法
+        sigmoid_mm = 1 / (1 + np.exp(-mm))  # 计算 Sigmoid 函数
+        prob = sigmoid_mm.flatten()  # 拉直运算结果
+        decode = np.where(prob > 0.5, 1, 0)  # 获取最终字节序列
+        code_string = ''.join([str(x) for x in decode.tolist()])  # 转换为字节序列字符串,类似"0100010011111"
+        # 将字节序列字符串转换为字符串
+        secret_label = ''.join(chr(int(code_string[i:i + 8], 2)) for i in range(0, len(code_string), 8))
+        return secret_label
+
+    def verify_label(self, start=0, end=3) -> bool:
+        """
+        标签验证
+        :param start: 嵌入标签开始卷积层位置,包括起始位置
+        :param end: 嵌入标签结束卷积层位置,不包括结束位置
+        :return: 标签验证结果
+        """
+        secret_label = self.extract_label(start, end)
+        label_check_result = secret_label_func.verify_secret_label(secret_label=secret_label,
+                                                                   public_key=self.public_key)
+        return label_check_result
+
+
+class BlackBoxWatermarkProcessDefine:
+    """
+    黑盒水印通用处理流程定义
+    """
+
+    def __init__(self, model_filename):
+        """
+        检查必要参数,参数检查成功,返回所需验证参数
+        :return: 验证所需参数元组
+        """
+        root_dir = os.path.dirname(model_filename)
+        logger.info(f"开始检测模型水印, model_filename: {model_filename}, root_dir: {root_dir}")
+        # 获取触发集目录,公钥信息
+        trigger_dir = os.path.join(root_dir, 'trigger')
+        public_key_txt = os.path.join(root_dir, 'keys', 'public.key')
+        if not os.path.exists(trigger_dir):
+            logger.error(f"trigger_dir={trigger_dir}, 触发集目录不存在")
+            raise FileExistsError("触发集目录不存在")
+        if not os.path.exists(public_key_txt):
+            logger.error(f"public_key_txt={public_key_txt}, 签名公钥文件不存在")
+            raise FileExistsError("签名公钥文件不存在")
+        with open(public_key_txt, 'r') as file:
+            public_key = file.read()
+        logger.debug(f"trigger_dir={trigger_dir}, public_key_txt={public_key_txt}, public_key={public_key}")
+        if not public_key or public_key == '':
+            logger.error(f"获取的签名公钥信息为空, public_key={public_key}")
+            raise RuntimeError("获取的签名公钥信息为空")
+        qrcode_positions_file = os.path.join(trigger_dir, 'qrcode_positions.txt')
+        if not os.path.exists(qrcode_positions_file):
+            raise FileNotFoundError("二维码标签文件不存在")
+        self.model_filename = model_filename
+        self.trigger_dir = trigger_dir
+        self.public_key = public_key
+
+    def extract_label(self):
+        """
+        从触发集中提取密码标签
+        :return: 密码标签
+        """
+        # Initialize variables to store the paths
+        image_folder_path = None
+        qrcode_positions_file_path = None
+        label = ''
+
+        # Walk through the extracted folder to find the specific folder and file
+        for root, dirs, files in os.walk(self.trigger_dir):
+            if 'images' in dirs:
+                image_folder_path = os.path.join(root, 'images')
+            if 'qrcode_positions.txt' in files:
+                qrcode_positions_file_path = os.path.join(root, 'qrcode_positions.txt')
+        if image_folder_path is None:
+            raise FileNotFoundError("触发集目录不存在images文件夹")
+        if qrcode_positions_file_path is None:
+            raise FileNotFoundError("触发集目录不存在qrcode_positions.txt")
+
+        sub_image_dir_names = os.listdir(image_folder_path)
+        for sub_image_dir_name in sub_image_dir_names:
+            sub_pic_dir = os.path.join(image_folder_path, sub_image_dir_name)
+            images = os.listdir(sub_pic_dir)
+            for image in images:
+                img_path = os.path.join(sub_pic_dir, image)
+                watermark_box = parse_qrcode_label_file.load_watermark_info(qrcode_positions_file_path, img_path)
+                label_part, _ = detect_and_decode_qr_code(img_path, watermark_box)
+                if label_part is not None:
+                    label = label + label_part
+                    break
+        return label
+
+    def verify_label(self) -> bool:
+        """
+        标签验证
+        :return: 标签验证结果
+        """
+        secret_label = self.extract_label()
+        label_check_result = secret_label_func.verify_secret_label(secret_label=secret_label,
+                                                                   public_key=self.public_key)
+        return label_check_result

+ 17 - 0
watermark_verify/process/googlenet_all_whitebox_process.py

@@ -0,0 +1,17 @@
+"""
+GoogleNet基于pytorch、tensorflow、keras框架的白盒水印处理验证流程
+"""
+from watermark_verify.process.general_process_define import WhiteBoxWatermarkProcessDefine
+
+
+class ClassificationProcess(WhiteBoxWatermarkProcessDefine):
+    def __init__(self, model_filename):
+        super(ClassificationProcess, self).__init__(model_filename)
+
+    def process(self) -> bool:
+        """
+        根据流程定义进行处理,并返回模型标签验证结果
+        :return: 模型标签验证结果
+        """
+        verify_result = self.verify_label(start=3, end=7)  # 模型标签检测通过,进行标签验证
+        return verify_result

+ 14 - 0
watermark_verify/verify_tool.py

@@ -0,0 +1,14 @@
+from watermark_verify.process import classification_pytorch_blackbox_process
+
+
+def label_verification(model_filename: str) -> bool:
+    """
+    模型标签提取验证
+    :param model_filename: 模型权重文件,onnx格式
+    :return: 模型标签验证结果
+    """
+    # 初始化模型水印检测器
+    model_detector = classification_pytorch_blackbox_process.ClassificationProcess(model_filename)
+    result = model_detector.process()  # 获取模型水印检测结果
+    return result
+