classfication_tensorflow_white_embed.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. """
  2. 基于Keras框架的AlexNet模型、基于tensorflow框架的VGG16模型 白盒水印嵌入工程文件处理
  3. """
  4. import os
  5. from watermark_generate.tools import modify_file, general_tool
  6. from watermark_generate.exceptions import BusinessException
  7. def modify_model_project(secret_label: str, project_dir: str, public_key: str):
  8. """
  9. 修改图像分类模型工程代码
  10. :param secret_label: 生成的密码标签
  11. :param project_dir: 工程文件解压后的目录
  12. :param public_key: 签名公钥,需保存至工程文件中
  13. """
  14. rela_project_path = general_tool.find_relative_directories(project_dir, 'classification-models-tensorflow')
  15. if not rela_project_path:
  16. raise BusinessException(message="未找到指定模型的工程目录", code=-1)
  17. project_dir = os.path.join(project_dir, rela_project_path[0])
  18. project_train_alexnet = os.path.join(project_dir, 'train_alexnet.py')
  19. project_train_vgg = os.path.join(project_dir, 'train_vgg16.py')
  20. project_export_onnx = os.path.join(project_dir, 'export_onnx.py')
  21. project_embed_watermark = os.path.join(project_dir, 'watermark_embeder.py')
  22. if not os.path.exists(project_train_alexnet):
  23. raise BusinessException(message="指定待修改的alex训练文件未找到", code=-1)
  24. if not os.path.exists(project_train_vgg):
  25. raise BusinessException(message="指定待修改的vgg训练文件未找到", code=-1)
  26. if not os.path.exists(project_export_onnx):
  27. raise BusinessException(message="指定待修改的导出onnx文件未找到", code=-1)
  28. # 把公钥保存至模型工程代码指定位置
  29. keys_dir = os.path.join(project_dir, 'keys')
  30. os.makedirs(keys_dir, exist_ok=True)
  31. public_key_file = os.path.join(keys_dir, 'public.key')
  32. # 写回文件
  33. with open(public_key_file, 'w', encoding='utf-8') as file:
  34. file.write(public_key)
  35. # 创建水印嵌入工具脚本
  36. with open(project_embed_watermark, 'w', encoding='utf-8') as file:
  37. source_code = \
  38. f"""
  39. import tensorflow as tf
  40. import numpy as np
  41. import os
  42. class ModelEncoder:
  43. def __init__(self, layers, secret, key_path, model):
  44. self.layers = layers
  45. self.model = model
  46. # 确保传入的目标层都是卷积层
  47. for layer in layers:
  48. if not isinstance(layer, tf.keras.layers.Conv2D):
  49. raise TypeError('传入参数不是卷积层')
  50. self.weights = [layer.kernel for layer in layers] # 只获取卷积核权重
  51. w = self.flatten_parameters(self.weights)
  52. print('Size of embedding parameters:', w.shape)
  53. # 对密钥进行处理
  54. self.secret = tf.convert_to_tensor(self.string2bin(secret), dtype=tf.float32)
  55. self.secret_len = self.secret.shape[0]
  56. # 生成随机的投影矩阵
  57. if os.path.exists(key_path):
  58. self.X_random = tf.convert_to_tensor(np.load(key_path))
  59. else:
  60. self.X_random = tf.random.normal((self.secret_len, w.shape[0]))
  61. self.save_tensor(self.X_random, key_path) # 保存投影矩阵至指定位置
  62. def get_embeder_loss(self):
  63. weights = [layer.kernel for layer in self.layers]
  64. w = self.flatten_parameters(weights)
  65. prob = self.get_prob(self.X_random, w)
  66. penalty = tf.keras.losses.BinaryCrossentropy(from_logits=True)(self.secret, prob)
  67. return penalty
  68. def string2bin(self, s):
  69. binary_representation = ''.join(format(ord(x), '08b') for x in s)
  70. return [int(x) for x in binary_representation]
  71. def save_tensor(self, tensor, save_path):
  72. os.makedirs(os.path.dirname(save_path), exist_ok=True)
  73. np.save(save_path, tensor)
  74. def flatten_parameters(self, weights):
  75. flattened = [tf.reduce_mean(layer, axis=3) for layer in weights]
  76. return tf.concat([tf.reshape(layer, [-1]) for layer in flattened], axis=0)
  77. def get_prob(self, x_random, w):
  78. mm = tf.matmul(x_random, tf.reshape(w, [w.shape[0], 1]))
  79. return tf.reshape(mm, [-1])
  80. def get_custom_loss(self):
  81. def custom_loss(y_true, y_pred):
  82. # 计算原始损失
  83. base_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred, from_logits=False)
  84. # 将自定义损失添加到原始损失中
  85. embed_loss = self.get_embeder_loss()
  86. total_loss = base_loss + embed_loss # 调整正则化项的权重
  87. return total_loss
  88. return custom_loss
  89. """
  90. file.write(source_code)
  91. # 查找替换代码块
  92. old_source_block = \
  93. """from keras.callbacks import ModelCheckpoint, CSVLogger
  94. """
  95. new_source_block = \
  96. """from keras.callbacks import ModelCheckpoint, CSVLogger, Callback
  97. """
  98. # 文件替换
  99. modify_file.replace_block_in_file(project_train_alexnet, old_source_block, new_source_block)
  100. # 查找替换代码块
  101. old_source_block = \
  102. """def train_model(args, train_data, val_data):
  103. # Create model
  104. model = create_model()
  105. # 调整学习率
  106. learning_rate = args.lr if args.lr else 1e-2
  107. # Select optimizer based on args.opt
  108. if args.opt == 'sgd':
  109. optimizer = SGD(learning_rate=learning_rate,
  110. momentum=args.momentum if args.momentum else 0.0)
  111. elif args.opt == 'adam':
  112. optimizer = Adam(learning_rate=learning_rate)
  113. else:
  114. optimizer = Adam(learning_rate=learning_rate) # Default to Adam if unspecified
  115. # Compile model
  116. model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
  117. # Check if a checkpoint exists and determine the initial_epoch
  118. latest_checkpoint = find_latest_checkpoint(args.output_dir)
  119. if latest_checkpoint:
  120. model.load_weights(latest_checkpoint) # Load the weights from the checkpoint
  121. initial_epoch = int(latest_checkpoint.split('_')[-1].split('.')[0]) # Get the last epoch from filename
  122. print(f"Resuming training from epoch {initial_epoch}")
  123. else:
  124. initial_epoch = 0
  125. print("No checkpoint found. Starting training from scratch.")
  126. # Define CSVLogger to log training history to a CSV file
  127. csv_logger = CSVLogger(os.path.join(args.output_dir, 'training_log.csv'), append=True)
  128. # Define ModelCheckpoint callback to save weights for each epoch
  129. checkpoint_callback = ModelCheckpoint(
  130. os.path.join(args.output_dir, 'alexnet_{epoch:03d}.h5'), # Save weights as alexnet_{epoch}.h5
  131. save_weights_only=False,
  132. save_freq='epoch', # Save after every epoch
  133. verbose=1
  134. )
  135. # Train the model
  136. history = model.fit(
  137. train_data,
  138. epochs=args.epochs,
  139. validation_data=val_data,
  140. initial_epoch=initial_epoch,
  141. callbacks=[csv_logger, checkpoint_callback], # Add checkpoint callback
  142. )
  143. return history
  144. """
  145. new_source_block = \
  146. f"""def train_model(args, train_data, val_data):
  147. # Create model
  148. model = create_model()
  149. secret = "{secret_label}"
  150. # 获取模型所有的卷积层
  151. embed_layers = []
  152. for layer in model.layers:
  153. if isinstance(layer, tf.keras.layers.Conv2D):
  154. embed_layers.append(layer)
  155. # 使用指定的卷积层初始化
  156. encoder = ModelEncoder(embed_layers[0:3], secret, "keys/key.npy", model)
  157. # 调整学习率
  158. learning_rate = args.lr if args.lr else 1e-2
  159. # Select optimizer based on args.opt
  160. if args.opt == 'sgd':
  161. optimizer = SGD(learning_rate=learning_rate,
  162. momentum=args.momentum if args.momentum else 0.0)
  163. elif args.opt == 'adam':
  164. optimizer = Adam(learning_rate=learning_rate)
  165. else:
  166. optimizer = Adam(learning_rate=learning_rate) # Default to Adam if unspecified
  167. # Compile model
  168. model.compile(optimizer=optimizer, loss=encoder.get_custom_loss(), metrics=['accuracy'])
  169. # Check if a checkpoint exists and determine the initial_epoch
  170. latest_checkpoint = find_latest_checkpoint(args.output_dir)
  171. if latest_checkpoint:
  172. model.load_weights(latest_checkpoint) # Load the weights from the checkpoint
  173. initial_epoch = int(latest_checkpoint.split('_')[-1].split('.')[0]) # Get the last epoch from filename
  174. print(f"Resuming training from epoch {{initial_epoch}}")
  175. else:
  176. initial_epoch = 0
  177. print("No checkpoint found. Starting training from scratch.")
  178. # Define CSVLogger to log training history to a CSV file
  179. csv_logger = CSVLogger(os.path.join(args.output_dir, 'training_log.csv'), append=True)
  180. # Define ModelCheckpoint callback to save weights for each epoch
  181. checkpoint_callback = ModelCheckpoint(
  182. os.path.join(args.output_dir, 'alexnet_{{epoch:03d}}.h5'), # Save weights as alexnet_{{epoch}}.h5
  183. save_weights_only=False,
  184. save_freq='epoch', # Save after every epoch
  185. verbose=1
  186. )
  187. embed_loss_history_callback = LossHistory(encoder=encoder)
  188. # Train the model
  189. history = model.fit(
  190. train_data,
  191. epochs=args.epochs,
  192. validation_data=val_data,
  193. initial_epoch=initial_epoch,
  194. callbacks=[csv_logger, checkpoint_callback, embed_loss_history_callback], # Add checkpoint callback
  195. )
  196. return history
  197. """
  198. # 文件替换
  199. modify_file.replace_block_in_file(project_train_alexnet, old_source_block, new_source_block)
  200. # 查找替换代码块
  201. old_source_block = \
  202. """from tensorflow.keras.preprocessing import image_dataset_from_directory
  203. """
  204. new_source_block = \
  205. """from tensorflow.keras.preprocessing import image_dataset_from_directory
  206. from watermark_embeder import ModelEncoder
  207. class LossHistory(Callback):
  208. def __init__(self, encoder):
  209. super().__init__()
  210. self.encoder = encoder
  211. def on_epoch_end(self, epoch, logs=None):
  212. print(f'Embedding Loss: {self.encoder.get_embeder_loss()}')
  213. """
  214. # 文件替换
  215. modify_file.replace_block_in_file(project_train_alexnet, old_source_block, new_source_block)
  216. # 查找替换代码块
  217. old_source_block = \
  218. """from keras.callbacks import ModelCheckpoint, CSVLogger
  219. """
  220. new_source_block = \
  221. """from keras.callbacks import ModelCheckpoint, CSVLogger, Callback
  222. """
  223. # 文件替换
  224. modify_file.replace_block_in_file(project_train_vgg, old_source_block, new_source_block)
  225. # 查找替换代码块
  226. old_source_block = \
  227. """from models.VGG16 import create_model
  228. """
  229. new_source_block = \
  230. """from models.VGG16 import create_model
  231. from watermark_embeder import ModelEncoder
  232. class LossHistory(Callback):
  233. def __init__(self, encoder):
  234. super().__init__()
  235. self.encoder = encoder
  236. def on_epoch_end(self, epoch, logs=None):
  237. print(f'Embedding Loss: {self.encoder.get_embeder_loss()}')
  238. """
  239. # 文件替换
  240. modify_file.replace_block_in_file(project_train_vgg, old_source_block, new_source_block)
  241. # 查找替换代码块
  242. old_source_block = \
  243. """def train_model(args, train_generator, val_generator):
  244. # Create model
  245. model = create_model()
  246. # 调整学习率
  247. learning_rate = args.lr if args.lr else 1e-2
  248. # Select optimizer based on args.opt
  249. if args.opt == 'sgd':
  250. optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate,
  251. momentum=args.momentum if args.momentum else 0.0)
  252. elif args.opt == 'adam':
  253. optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
  254. else:
  255. optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) # Default to Adam if unspecified
  256. # 编译模型
  257. model.compile(optimizer=optimizer,
  258. loss='categorical_crossentropy',
  259. metrics=['accuracy'])
  260. # Check if a checkpoint exists and determine the initial_epoch
  261. latest_checkpoint = find_latest_checkpoint(args.output_dir)
  262. if latest_checkpoint:
  263. model.load_weights(latest_checkpoint) # Load the weights from the checkpoint
  264. initial_epoch = int(latest_checkpoint.split('_')[-1].split('.')[0]) # Get the last epoch from filename
  265. print(f"Resuming training from epoch {initial_epoch}")
  266. else:
  267. initial_epoch = 0
  268. print("No checkpoint found. Starting training from scratch.")
  269. # Define CSVLogger to log training history to a CSV file
  270. csv_logger = CSVLogger(os.path.join(args.output_dir, 'training_log.csv'), append=True)
  271. # Define ModelCheckpoint callback to save weights for each epoch
  272. checkpoint_callback = ModelCheckpoint(
  273. os.path.join(args.output_dir, 'vgg16_{epoch:03d}.h5'), # Save weights as vgg16_{epoch}.h5
  274. save_weights_only=False,
  275. save_freq='epoch', # Save after every epoch
  276. verbose=1
  277. )
  278. # 训练模型
  279. history = model.fit(
  280. train_generator,
  281. steps_per_epoch=train_generator.samples // train_generator.batch_size,
  282. epochs=args.epochs,
  283. validation_data=val_generator,
  284. validation_steps=val_generator.samples // val_generator.batch_size,
  285. initial_epoch=initial_epoch,
  286. callbacks=[csv_logger, checkpoint_callback]
  287. )
  288. return history
  289. """
  290. new_source_block = \
  291. f"""def train_model(args, train_generator, val_generator):
  292. # Create model
  293. model = create_model()
  294. secret = "{secret_label}"
  295. # 获取模型所有的卷积层
  296. embed_layers = []
  297. for layer in model.layers:
  298. if isinstance(layer, tf.keras.layers.Conv2D):
  299. embed_layers.append(layer)
  300. # 使用指定的卷积层初始化
  301. encoder = ModelEncoder(embed_layers[0:3], secret, "keys/key.npy", model)
  302. # 调整学习率
  303. learning_rate = args.lr if args.lr else 1e-2
  304. # Select optimizer based on args.opt
  305. if args.opt == 'sgd':
  306. optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate,
  307. momentum=args.momentum if args.momentum else 0.0)
  308. elif args.opt == 'adam':
  309. optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
  310. else:
  311. optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) # Default to Adam if unspecified
  312. # 编译模型
  313. model.compile(optimizer=optimizer,
  314. loss=encoder.get_custom_loss(),
  315. metrics=['accuracy'])
  316. # Check if a checkpoint exists and determine the initial_epoch
  317. latest_checkpoint = find_latest_checkpoint(args.output_dir)
  318. if latest_checkpoint:
  319. model.load_weights(latest_checkpoint) # Load the weights from the checkpoint
  320. initial_epoch = int(latest_checkpoint.split('_')[-1].split('.')[0]) # Get the last epoch from filename
  321. print(f"Resuming training from epoch {{initial_epoch}}")
  322. else:
  323. initial_epoch = 0
  324. print("No checkpoint found. Starting training from scratch.")
  325. # Define CSVLogger to log training history to a CSV file
  326. csv_logger = CSVLogger(os.path.join(args.output_dir, 'training_log.csv'), append=True)
  327. # Define ModelCheckpoint callback to save weights for each epoch
  328. checkpoint_callback = ModelCheckpoint(
  329. os.path.join(args.output_dir, 'vgg16_{{epoch:03d}}.h5'), # Save weights as vgg16_{{epoch}}.h5
  330. save_weights_only=False,
  331. save_freq='epoch', # Save after every epoch
  332. verbose=1
  333. )
  334. embed_loss_history_callback = LossHistory(encoder=encoder)
  335. # 训练模型
  336. history = model.fit(
  337. train_generator,
  338. steps_per_epoch=train_generator.samples // train_generator.batch_size,
  339. epochs=args.epochs,
  340. validation_data=val_generator,
  341. validation_steps=val_generator.samples // val_generator.batch_size,
  342. initial_epoch=initial_epoch,
  343. callbacks=[csv_logger, checkpoint_callback, embed_loss_history_callback]
  344. )
  345. return history
  346. """
  347. # 文件替换
  348. modify_file.replace_block_in_file(project_train_vgg, old_source_block, new_source_block)
  349. # 查找替换代码块
  350. old_source_block = \
  351. """import tf2onnx
  352. """
  353. new_source_block = \
  354. """import tf2onnx
  355. def custom_loss(y_true, y_pred):
  356. return tf.keras.losses.categorical_crossentropy(y_true, y_pred, from_logits=False)
  357. """
  358. # 文件替换
  359. modify_file.replace_block_in_file(project_export_onnx, old_source_block, new_source_block)
  360. # 查找替换代码块
  361. old_source_block = \
  362. """ model = tf.keras.models.load_model(h5_path)
  363. """
  364. new_source_block = \
  365. """ model = tf.keras.models.load_model(h5_path, custom_objects={'custom_loss': custom_loss})
  366. """
  367. # 文件替换
  368. modify_file.replace_block_in_file(project_export_onnx, old_source_block, new_source_block)