Browse Source

新增对tensorflow、keras框架白盒水印嵌入流程支持

liyan 5 months ago
parent
commit
ef0bde70e5

+ 26 - 19
watermark_generate/controller/watermark_generate_controller.py

@@ -13,7 +13,8 @@ from watermark_generate import logger
 from watermark_generate.tools import secret_label_func
 from watermark_generate.deals import yolox_pytorch_black_embed, yolox_pytorch_white_embed, \
     faster_rcnn_pytorch_black_embed, ssd_pytorch_black_embed, ssd_pytorch_white_embed, faster_rcnn_pytorch_white_embed, \
-    classification_pytorch_white_embed, googlenet_pytorch_white_embed, classification_pytorch_black_embed
+    classification_pytorch_white_embed, googlenet_pytorch_white_embed, classification_pytorch_black_embed, \
+    classfication_tensorflow_white_embed, classfication_tensorflow_black_embed
 
 generator = Blueprint('generator', __name__)
 
@@ -81,24 +82,30 @@ def watermark_embed():
     # 修改模型文件代码,并将public_key写入至文件保存至修改后的工程文件目录中
     logger.info(f"modify model project source, model_value: {model_value}, embed_type: {embed_type}")
     # TODO 添加其他模型工程代码处理
-    if model_value == 'yolox' and embed_type == 'blackbox':
-        yolox_pytorch_black_embed.modify_model_project(secret_label, extract_to_path, public_key)
-    if model_value == 'yolox' and embed_type == 'whitebox':
-        yolox_pytorch_white_embed.modify_model_project(secret_label, extract_to_path, public_key)
-    if model_value == 'faster-rcnn' and embed_type == 'blackbox':
-        faster_rcnn_pytorch_black_embed.modify_model_project(secret_label, extract_to_path, public_key)
-    if model_value == 'faster-rcnn' and embed_type == 'whitebox':
-        faster_rcnn_pytorch_white_embed.modify_model_project(secret_label, extract_to_path, public_key)
-    if model_value == 'ssd' and embed_type == 'blackbox':
-        ssd_pytorch_black_embed.modify_model_project(secret_label, extract_to_path, public_key)
-    if model_value == 'ssd' and embed_type == 'whitebox':
-        ssd_pytorch_white_embed.modify_model_project(secret_label, extract_to_path, public_key)
-    if (model_value in ['alexnet', 'vggnet', 'resnet']) and embed_type == 'whitebox':
-        classification_pytorch_white_embed.modify_model_project(secret_label, extract_to_path, public_key)
-    if model_value == 'googlenet' and embed_type == 'whitebox':
-        googlenet_pytorch_white_embed.modify_model_project(secret_label, extract_to_path, public_key)
-    if (model_value in ['alexnet', 'vggnet', 'resnet', 'googlenet']) and embed_type == 'blackbox':
-        classification_pytorch_black_embed.modify_model_project(secret_label, extract_to_path, public_key)
+    if "tensorflow" in model_file:  # tensorflow、keras框架水印嵌入支持
+        if (model_value in ['alexnet', 'vggnet']) and embed_type == 'whitebox':
+            classfication_tensorflow_white_embed.modify_model_project(secret_label, extract_to_path, public_key)
+        if (model_value in ['alexnet', 'vggnet']) and embed_type == 'blackbox':
+            classfication_tensorflow_black_embed.modify_model_project(secret_label, extract_to_path, public_key)
+    else:  # pytorch框架水印嵌入支持
+        if model_value == 'yolox' and embed_type == 'blackbox':
+            yolox_pytorch_black_embed.modify_model_project(secret_label, extract_to_path, public_key)
+        if model_value == 'yolox' and embed_type == 'whitebox':
+            yolox_pytorch_white_embed.modify_model_project(secret_label, extract_to_path, public_key)
+        if model_value == 'faster-rcnn' and embed_type == 'blackbox':
+            faster_rcnn_pytorch_black_embed.modify_model_project(secret_label, extract_to_path, public_key)
+        if model_value == 'faster-rcnn' and embed_type == 'whitebox':
+            faster_rcnn_pytorch_white_embed.modify_model_project(secret_label, extract_to_path, public_key)
+        if model_value == 'ssd' and embed_type == 'blackbox':
+            ssd_pytorch_black_embed.modify_model_project(secret_label, extract_to_path, public_key)
+        if model_value == 'ssd' and embed_type == 'whitebox':
+            ssd_pytorch_white_embed.modify_model_project(secret_label, extract_to_path, public_key)
+        if (model_value in ['alexnet', 'vggnet', 'resnet']) and embed_type == 'whitebox':
+            classification_pytorch_white_embed.modify_model_project(secret_label, extract_to_path, public_key)
+        if model_value == 'googlenet' and embed_type == 'whitebox':
+            googlenet_pytorch_white_embed.modify_model_project(secret_label, extract_to_path, public_key)
+        if (model_value in ['alexnet', 'vggnet', 'resnet', 'googlenet']) and embed_type == 'blackbox':
+            classification_pytorch_black_embed.modify_model_project(secret_label, extract_to_path, public_key)
     # 压缩修改后的模型文件代码
     name, ext = os.path.splitext(file_name)
     zip_filename = f"{name}_embed{ext}"

+ 436 - 0
watermark_generate/deals/classfication_tensorflow_white_embed.py

@@ -0,0 +1,436 @@
+"""
+基于Keras框架的AlexNet模型、基于tensorflow框架的VGG16模型 白盒水印嵌入工程文件处理
+"""
+import os
+
+from watermark_generate.tools import modify_file, general_tool
+from watermark_generate.exceptions import BusinessException
+
+
+def modify_model_project(secret_label: str, project_dir: str, public_key: str):
+    """
+    修改图像分类模型工程代码
+    :param secret_label: 生成的密码标签
+    :param project_dir: 工程文件解压后的目录
+    :param public_key: 签名公钥,需保存至工程文件中
+    """
+
+    rela_project_path = general_tool.find_relative_directories(project_dir, 'classification-models-tensorflow')
+    if not rela_project_path:
+        raise BusinessException(message="未找到指定模型的工程目录", code=-1)
+
+    project_dir = os.path.join(project_dir, rela_project_path[0])
+    project_train_alexnet = os.path.join(project_dir, 'train_alexnet.py')
+    project_train_vgg = os.path.join(project_dir, 'train_vgg16.py')
+    project_export_onnx = os.path.join(project_dir, 'export_onnx.py')
+    project_embed_watermark = os.path.join(project_dir, 'watermark_embeder.py')
+
+    if not os.path.exists(project_train_alexnet):
+        raise BusinessException(message="指定待修改的alex训练文件未找到", code=-1)
+    if not os.path.exists(project_train_vgg):
+        raise BusinessException(message="指定待修改的vgg训练文件未找到", code=-1)
+    if not os.path.exists(project_export_onnx):
+        raise BusinessException(message="指定待修改的导出onnx文件未找到", code=-1)
+
+    # 把公钥保存至模型工程代码指定位置
+    keys_dir = os.path.join(project_dir, 'keys')
+    os.makedirs(keys_dir, exist_ok=True)
+    public_key_file = os.path.join(keys_dir, 'public.key')
+    # 写回文件
+    with open(public_key_file, 'w', encoding='utf-8') as file:
+        file.write(public_key)
+    # 创建水印嵌入工具脚本
+    with open(project_embed_watermark, 'w', encoding='utf-8') as file:
+        source_code = \
+f"""
+import tensorflow as tf
+import numpy as np
+import os
+
+class ModelEncoder:
+    def __init__(self, layers, secret, key_path, model):
+        self.layers = layers
+        self.model = model
+
+        # 确保传入的目标层都是卷积层
+        for layer in layers:
+            if not isinstance(layer, tf.keras.layers.Conv2D):
+                raise TypeError('传入参数不是卷积层')
+
+        self.weights = [layer.kernel for layer in layers]  # 只获取卷积核权重
+        w = self.flatten_parameters(self.weights)
+        print('Size of embedding parameters:', w.shape)
+
+        # 对密钥进行处理
+        self.secret = tf.convert_to_tensor(self.string2bin(secret), dtype=tf.float32)
+        self.secret_len = self.secret.shape[0]
+
+        # 生成随机的投影矩阵
+        if os.path.exists(key_path):
+            self.X_random = tf.convert_to_tensor(np.load(key_path))
+        else:
+            self.X_random = tf.random.normal((self.secret_len, w.shape[0]))
+            self.save_tensor(self.X_random, key_path)  # 保存投影矩阵至指定位置
+
+    def get_embeder_loss(self):
+        weights = [layer.kernel for layer in self.layers]
+        w = self.flatten_parameters(weights)
+        prob = self.get_prob(self.X_random, w)
+        penalty = tf.keras.losses.BinaryCrossentropy(from_logits=True)(self.secret, prob)
+        return penalty
+
+    def string2bin(self, s):
+        binary_representation = ''.join(format(ord(x), '08b') for x in s)
+        return [int(x) for x in binary_representation]
+
+    def save_tensor(self, tensor, save_path):
+        os.makedirs(os.path.dirname(save_path), exist_ok=True)
+        np.save(save_path, tensor)
+
+    def flatten_parameters(self, weights):
+        flattened = [tf.reduce_mean(layer, axis=3) for layer in weights]
+        return tf.concat([tf.reshape(layer, [-1]) for layer in flattened], axis=0)
+
+    def get_prob(self, x_random, w):
+        mm = tf.matmul(x_random, tf.reshape(w, [w.shape[0], 1]))
+        return tf.reshape(mm, [-1])
+
+    def get_custom_loss(self):
+        def custom_loss(y_true, y_pred):
+            # 计算原始损失
+            base_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred, from_logits=False)
+
+            # 将自定义损失添加到原始损失中
+            embed_loss = self.get_embeder_loss()
+
+            total_loss = base_loss + embed_loss  # 调整正则化项的权重
+            return total_loss
+        return custom_loss
+"""
+        file.write(source_code)
+
+    # 查找替换代码块
+    old_source_block = \
+"""from keras.callbacks import ModelCheckpoint, CSVLogger
+"""
+    new_source_block = \
+"""from keras.callbacks import ModelCheckpoint, CSVLogger, Callback
+"""
+    # 文件替换
+    modify_file.replace_block_in_file(project_train_alexnet, old_source_block, new_source_block)
+
+    # 查找替换代码块
+    old_source_block = \
+"""def train_model(args, train_data, val_data):
+    # Create model
+    model = create_model()
+
+    # 调整学习率
+    learning_rate = args.lr if args.lr else 1e-2
+
+    # Select optimizer based on args.opt
+    if args.opt == 'sgd':
+        optimizer = SGD(learning_rate=learning_rate,
+                                            momentum=args.momentum if args.momentum else 0.0)
+    elif args.opt == 'adam':
+        optimizer = Adam(learning_rate=learning_rate)
+    else:
+        optimizer = Adam(learning_rate=learning_rate)  # Default to Adam if unspecified
+
+    # Compile model
+    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
+
+    # Check if a checkpoint exists and determine the initial_epoch
+    latest_checkpoint = find_latest_checkpoint(args.output_dir)
+    if latest_checkpoint:
+        model.load_weights(latest_checkpoint)  # Load the weights from the checkpoint
+        initial_epoch = int(latest_checkpoint.split('_')[-1].split('.')[0])  # Get the last epoch from filename
+        print(f"Resuming training from epoch {initial_epoch}")
+    else:
+        initial_epoch = 0
+        print("No checkpoint found. Starting training from scratch.")
+
+    # Define CSVLogger to log training history to a CSV file
+    csv_logger = CSVLogger(os.path.join(args.output_dir, 'training_log.csv'), append=True)
+
+    # Define ModelCheckpoint callback to save weights for each epoch
+    checkpoint_callback = ModelCheckpoint(
+        os.path.join(args.output_dir, 'alexnet_{epoch:03d}.h5'),  # Save weights as alexnet_{epoch}.h5
+        save_weights_only=False,
+        save_freq='epoch',  # Save after every epoch
+        verbose=1
+    )
+
+    # Train the model
+    history = model.fit(
+        train_data,
+        epochs=args.epochs,
+        validation_data=val_data,
+        initial_epoch=initial_epoch,
+        callbacks=[csv_logger, checkpoint_callback],  # Add checkpoint callback
+    )
+
+    return history
+"""
+    new_source_block = \
+f"""def train_model(args, train_data, val_data):
+    # Create model
+    model = create_model()
+
+    secret = "{secret_label}"
+    # 获取模型所有的卷积层
+    embed_layers = []
+    for layer in model.layers:
+        if isinstance(layer, tf.keras.layers.Conv2D):
+            embed_layers.append(layer)
+    # 使用指定的卷积层初始化
+    encoder = ModelEncoder(embed_layers[0:3], secret, "keys/key.npy", model)
+
+    # 调整学习率
+    learning_rate = args.lr if args.lr else 1e-2
+
+    # Select optimizer based on args.opt
+    if args.opt == 'sgd':
+        optimizer = SGD(learning_rate=learning_rate,
+                                            momentum=args.momentum if args.momentum else 0.0)
+    elif args.opt == 'adam':
+        optimizer = Adam(learning_rate=learning_rate)
+    else:
+        optimizer = Adam(learning_rate=learning_rate)  # Default to Adam if unspecified
+
+    # Compile model
+    model.compile(optimizer=optimizer, loss=encoder.get_custom_loss(), metrics=['accuracy'])
+
+    # Check if a checkpoint exists and determine the initial_epoch
+    latest_checkpoint = find_latest_checkpoint(args.output_dir)
+    if latest_checkpoint:
+        model.load_weights(latest_checkpoint)  # Load the weights from the checkpoint
+        initial_epoch = int(latest_checkpoint.split('_')[-1].split('.')[0])  # Get the last epoch from filename
+        print(f"Resuming training from epoch {{initial_epoch}}")
+    else:
+        initial_epoch = 0
+        print("No checkpoint found. Starting training from scratch.")
+
+    # Define CSVLogger to log training history to a CSV file
+    csv_logger = CSVLogger(os.path.join(args.output_dir, 'training_log.csv'), append=True)
+
+    # Define ModelCheckpoint callback to save weights for each epoch
+    checkpoint_callback = ModelCheckpoint(
+        os.path.join(args.output_dir, 'alexnet_{{epoch:03d}}.h5'),  # Save weights as alexnet_{{epoch}}.h5
+        save_weights_only=False,
+        save_freq='epoch',  # Save after every epoch
+        verbose=1
+    )
+
+    embed_loss_history_callback = LossHistory(encoder=encoder)
+
+    # Train the model
+    history = model.fit(
+        train_data,
+        epochs=args.epochs,
+        validation_data=val_data,
+        initial_epoch=initial_epoch,
+        callbacks=[csv_logger, checkpoint_callback, embed_loss_history_callback],  # Add checkpoint callback
+    )
+
+    return history
+"""
+    # 文件替换
+    modify_file.replace_block_in_file(project_train_alexnet, old_source_block, new_source_block)
+
+    # 查找替换代码块
+    old_source_block = \
+"""from tensorflow.keras.preprocessing import image_dataset_from_directory
+"""
+    new_source_block = \
+"""from tensorflow.keras.preprocessing import image_dataset_from_directory
+from watermark_embeder import ModelEncoder
+
+class LossHistory(Callback):
+    def __init__(self, encoder):
+        super().__init__()
+        self.encoder = encoder
+    def on_epoch_end(self, epoch, logs=None):
+        print(f'Embedding Loss: {self.encoder.get_embeder_loss()}')
+"""
+    # 文件替换
+    modify_file.replace_block_in_file(project_train_alexnet, old_source_block, new_source_block)
+
+    # 查找替换代码块
+    old_source_block = \
+"""from keras.callbacks import ModelCheckpoint, CSVLogger
+"""
+    new_source_block = \
+"""from keras.callbacks import ModelCheckpoint, CSVLogger, Callback
+"""
+    # 文件替换
+    modify_file.replace_block_in_file(project_train_vgg, old_source_block, new_source_block)
+
+    # 查找替换代码块
+    old_source_block = \
+"""from models.VGG16 import create_model
+"""
+    new_source_block = \
+"""from models.VGG16 import create_model
+from watermark_embeder import ModelEncoder
+
+class LossHistory(Callback):
+    def __init__(self, encoder):
+        super().__init__()
+        self.encoder = encoder
+    def on_epoch_end(self, epoch, logs=None):
+        print(f'Embedding Loss: {self.encoder.get_embeder_loss()}')
+"""
+    # 文件替换
+    modify_file.replace_block_in_file(project_train_vgg, old_source_block, new_source_block)
+
+    # 查找替换代码块
+    old_source_block = \
+"""def train_model(args, train_generator, val_generator):
+    # Create model
+    model = create_model()
+
+    # 调整学习率
+    learning_rate = args.lr if args.lr else 1e-2
+
+    # Select optimizer based on args.opt
+    if args.opt == 'sgd':
+        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate,
+                                            momentum=args.momentum if args.momentum else 0.0)
+    elif args.opt == 'adam':
+        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
+    else:
+        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)  # Default to Adam if unspecified
+
+    # 编译模型
+    model.compile(optimizer=optimizer,
+                  loss='categorical_crossentropy',
+                  metrics=['accuracy'])
+
+    # Check if a checkpoint exists and determine the initial_epoch
+    latest_checkpoint = find_latest_checkpoint(args.output_dir)
+    if latest_checkpoint:
+        model.load_weights(latest_checkpoint)  # Load the weights from the checkpoint
+        initial_epoch = int(latest_checkpoint.split('_')[-1].split('.')[0])  # Get the last epoch from filename
+        print(f"Resuming training from epoch {initial_epoch}")
+    else:
+        initial_epoch = 0
+        print("No checkpoint found. Starting training from scratch.")
+
+    # Define CSVLogger to log training history to a CSV file
+    csv_logger = CSVLogger(os.path.join(args.output_dir, 'training_log.csv'), append=True)
+
+    # Define ModelCheckpoint callback to save weights for each epoch
+    checkpoint_callback = ModelCheckpoint(
+        os.path.join(args.output_dir, 'vgg16_{epoch:03d}.h5'),  # Save weights as vgg16_{epoch}.h5
+        save_weights_only=False,
+        save_freq='epoch',  # Save after every epoch
+        verbose=1
+    )
+
+    # 训练模型
+    history = model.fit(
+        train_generator,
+        steps_per_epoch=train_generator.samples // train_generator.batch_size,
+        epochs=args.epochs,
+        validation_data=val_generator,
+        validation_steps=val_generator.samples // val_generator.batch_size,
+        initial_epoch=initial_epoch,
+        callbacks=[csv_logger, checkpoint_callback]
+    )
+
+    return history
+"""
+    new_source_block = \
+f"""def train_model(args, train_generator, val_generator):
+    # Create model
+    model = create_model()
+
+    secret = "{secret_label}"
+    # 获取模型所有的卷积层
+    embed_layers = []
+    for layer in model.layers:
+        if isinstance(layer, tf.keras.layers.Conv2D):
+            embed_layers.append(layer)
+    # 使用指定的卷积层初始化
+    encoder = ModelEncoder(embed_layers[0:3], secret, "keys/key.npy", model)
+
+    # 调整学习率
+    learning_rate = args.lr if args.lr else 1e-2
+
+    # Select optimizer based on args.opt
+    if args.opt == 'sgd':
+        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate,
+                                            momentum=args.momentum if args.momentum else 0.0)
+    elif args.opt == 'adam':
+        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
+    else:
+        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)  # Default to Adam if unspecified
+
+    # 编译模型
+    model.compile(optimizer=optimizer,
+                  loss=encoder.get_custom_loss(),
+                  metrics=['accuracy'])
+
+    # Check if a checkpoint exists and determine the initial_epoch
+    latest_checkpoint = find_latest_checkpoint(args.output_dir)
+    if latest_checkpoint:
+        model.load_weights(latest_checkpoint)  # Load the weights from the checkpoint
+        initial_epoch = int(latest_checkpoint.split('_')[-1].split('.')[0])  # Get the last epoch from filename
+        print(f"Resuming training from epoch {{initial_epoch}}")
+    else:
+        initial_epoch = 0
+        print("No checkpoint found. Starting training from scratch.")
+
+    # Define CSVLogger to log training history to a CSV file
+    csv_logger = CSVLogger(os.path.join(args.output_dir, 'training_log.csv'), append=True)
+
+    # Define ModelCheckpoint callback to save weights for each epoch
+    checkpoint_callback = ModelCheckpoint(
+        os.path.join(args.output_dir, 'vgg16_{{epoch:03d}}.h5'),  # Save weights as vgg16_{{epoch}}.h5
+        save_weights_only=False,
+        save_freq='epoch',  # Save after every epoch
+        verbose=1
+    )
+
+    embed_loss_history_callback = LossHistory(encoder=encoder)
+
+    # 训练模型
+    history = model.fit(
+        train_generator,
+        steps_per_epoch=train_generator.samples // train_generator.batch_size,
+        epochs=args.epochs,
+        validation_data=val_generator,
+        validation_steps=val_generator.samples // val_generator.batch_size,
+        initial_epoch=initial_epoch,
+        callbacks=[csv_logger, checkpoint_callback, embed_loss_history_callback]
+    )
+
+    return history
+"""
+    # 文件替换
+    modify_file.replace_block_in_file(project_train_vgg, old_source_block, new_source_block)
+
+    # 查找替换代码块
+    old_source_block = \
+"""import tf2onnx
+"""
+    new_source_block = \
+"""import tf2onnx
+
+def custom_loss(y_true, y_pred):
+    return tf.keras.losses.categorical_crossentropy(y_true, y_pred, from_logits=False)
+"""
+    # 文件替换
+    modify_file.replace_block_in_file(project_export_onnx, old_source_block, new_source_block)
+
+    # 查找替换代码块
+    old_source_block = \
+"""            model = tf.keras.models.load_model(h5_path)
+"""
+    new_source_block = \
+"""            model = tf.keras.models.load_model(h5_path, custom_objects={'custom_loss': custom_loss})
+"""
+    # 文件替换
+    modify_file.replace_block_in_file(project_export_onnx, old_source_block, new_source_block)
+