classfication_tensorflow_white_embed.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. """
  2. 基于Keras框架的AlexNet模型、基于tensorflow框架的VGG16模型 白盒水印嵌入工程文件处理
  3. """
  4. import os
  5. from watermark_generate.tools import modify_file, general_tool
  6. from watermark_generate.exceptions import BusinessException
  7. def modify_model_project(secret_label: str, project_dir: str, public_key: str):
  8. """
  9. 修改基于tensorflow框架的图像分类模型工程代码
  10. :param secret_label: 生成的密码标签
  11. :param project_dir: 工程文件解压后的目录
  12. :param public_key: 签名公钥,需保存至工程文件中
  13. """
  14. rela_project_path = general_tool.find_relative_directories(project_dir, 'classification-models-tensorflow')
  15. if not rela_project_path:
  16. raise BusinessException(message="未找到指定模型的工程目录", code=-1)
  17. project_dir = os.path.join(project_dir, rela_project_path[0])
  18. project_train_alexnet = os.path.join(project_dir, 'train_alexnet.py')
  19. project_train_vgg = os.path.join(project_dir, 'train_vgg16.py')
  20. project_export_onnx = os.path.join(project_dir, 'export_onnx.py')
  21. project_embed_watermark = os.path.join(project_dir, 'watermark_embeder.py')
  22. if not os.path.exists(project_train_alexnet):
  23. raise BusinessException(message="指定待修改的alex训练文件未找到", code=-1)
  24. if not os.path.exists(project_train_vgg):
  25. raise BusinessException(message="指定待修改的vgg训练文件未找到", code=-1)
  26. if not os.path.exists(project_export_onnx):
  27. raise BusinessException(message="指定待修改的导出onnx文件未找到", code=-1)
  28. # 把公钥保存至模型工程代码指定位置
  29. keys_dir = os.path.join(project_dir, 'keys')
  30. os.makedirs(keys_dir, exist_ok=True)
  31. public_key_file = os.path.join(keys_dir, 'public.key')
  32. # 写回文件
  33. with open(public_key_file, 'w', encoding='utf-8') as file:
  34. file.write(public_key)
  35. # 创建水印嵌入工具脚本
  36. with open(project_embed_watermark, 'w', encoding='utf-8') as file:
  37. source_code = \
  38. f"""
  39. import tensorflow as tf
  40. import numpy as np
  41. import os
  42. class ModelEncoder:
  43. def __init__(self, layers, secret, key_path, model):
  44. self.layers = layers
  45. self.model = model
  46. # 确保传入的目标层都是卷积层
  47. for layer in layers:
  48. if not isinstance(layer, tf.keras.layers.Conv2D):
  49. raise TypeError('传入参数不是卷积层')
  50. self.weights = [layer.kernel for layer in layers] # 只获取卷积核权重
  51. w = self.flatten_parameters(self.weights)
  52. print('Size of embedding parameters:', w.shape)
  53. # 对密钥进行处理
  54. self.secret = tf.convert_to_tensor(self.string2bin(secret), dtype=tf.float32)
  55. self.secret_len = self.secret.shape[0]
  56. # 生成随机的投影矩阵
  57. if os.path.exists(key_path):
  58. self.X_random = tf.convert_to_tensor(np.load(key_path))
  59. else:
  60. self.X_random = tf.random.normal((self.secret_len, w.shape[0]))
  61. self.save_tensor(self.X_random, key_path) # 保存投影矩阵至指定位置
  62. def get_embeder_loss(self):
  63. weights = [layer.kernel for layer in self.layers]
  64. w = self.flatten_parameters(weights)
  65. prob = self.get_prob(self.X_random, w)
  66. penalty = tf.keras.losses.BinaryCrossentropy(from_logits=True)(self.secret, prob)
  67. return penalty
  68. def string2bin(self, s):
  69. binary_representation = ''.join(format(ord(x), '08b') for x in s)
  70. return [int(x) for x in binary_representation]
  71. def save_tensor(self, tensor, save_path):
  72. os.makedirs(os.path.dirname(save_path), exist_ok=True)
  73. np.save(save_path, tensor)
  74. def flatten_parameters(self, weights):
  75. flattened = [tf.reduce_mean(layer, axis=3) for layer in weights]
  76. return tf.concat([tf.reshape(layer, [-1]) for layer in flattened], axis=0)
  77. def get_prob(self, x_random, w):
  78. mm = tf.matmul(x_random, tf.reshape(w, [w.shape[0], 1]))
  79. return tf.reshape(mm, [-1])
  80. def get_custom_loss(self):
  81. def custom_loss(y_true, y_pred):
  82. # 计算原始损失
  83. base_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred, from_logits=False)
  84. # 将自定义损失添加到原始损失中
  85. embed_loss = self.get_embeder_loss()
  86. total_loss = base_loss + embed_loss # 调整正则化项的权重
  87. return total_loss
  88. return custom_loss
  89. """
  90. file.write(source_code)
  91. # 查找替换代码块
  92. old_source_block = \
  93. """from keras.callbacks import ModelCheckpoint, CSVLogger
  94. """
  95. new_source_block = \
  96. """from keras.callbacks import ModelCheckpoint, CSVLogger, Callback
  97. """
  98. # 文件替换
  99. modify_file.replace_block_in_file(project_train_alexnet, old_source_block, new_source_block)
  100. # 查找替换代码块
  101. old_source_block = \
  102. """def train_model(args, train_data, val_data):
  103. # Create model
  104. model = create_model()
  105. # 调整学习率
  106. learning_rate = args.lr if args.lr else 1e-2
  107. # Select optimizer based on args.opt
  108. if args.opt == 'sgd':
  109. optimizer = SGD(learning_rate=learning_rate,
  110. momentum=args.momentum if args.momentum else 0.0)
  111. elif args.opt == 'adam':
  112. optimizer = Adam(learning_rate=learning_rate)
  113. else:
  114. optimizer = Adam(learning_rate=learning_rate) # Default to Adam if unspecified
  115. # Compile model
  116. model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
  117. # Check if a checkpoint exists and determine the initial_epoch
  118. latest_checkpoint = find_latest_checkpoint(args.output_dir)
  119. if latest_checkpoint:
  120. model.load_weights(latest_checkpoint) # Load the weights from the checkpoint
  121. initial_epoch = int(latest_checkpoint.split('_')[-1].split('.')[0]) # Get the last epoch from filename
  122. print(f"Resuming training from epoch {initial_epoch}")
  123. else:
  124. initial_epoch = 0
  125. print("No checkpoint found. Starting training from scratch.")
  126. # Define CSVLogger to log training history to a CSV file
  127. csv_logger = CSVLogger(os.path.join(args.output_dir, 'training_log.csv'), append=True)
  128. # Define ModelCheckpoint callback to save weights for each epoch
  129. checkpoint_callback = ModelCheckpoint(
  130. filepath=os.path.join(args.output_dir, 'alexnet_{epoch:03d}.h5'),
  131. save_weights_only=False,
  132. save_freq='epoch', # Save after every epoch
  133. monitor='val_loss', # Monitor the validation loss
  134. verbose=1
  135. )
  136. # Train the model
  137. history = model.fit(
  138. train_data,
  139. epochs=args.epochs,
  140. validation_data=val_data,
  141. initial_epoch=initial_epoch,
  142. callbacks=[csv_logger, checkpoint_callback], # Add checkpoint callback
  143. )
  144. return history
  145. """
  146. new_source_block = \
  147. f"""def train_model(args, train_data, val_data):
  148. # Create model
  149. model = create_model()
  150. secret = "{secret_label}"
  151. # 获取模型所有的卷积层
  152. embed_layers = []
  153. for layer in model.layers:
  154. if isinstance(layer, tf.keras.layers.Conv2D):
  155. embed_layers.append(layer)
  156. # 使用指定的卷积层初始化
  157. encoder = ModelEncoder(embed_layers[0:3], secret, "keys/key.npy", model)
  158. # 调整学习率
  159. learning_rate = args.lr if args.lr else 1e-2
  160. # Select optimizer based on args.opt
  161. if args.opt == 'sgd':
  162. optimizer = SGD(learning_rate=learning_rate,
  163. momentum=args.momentum if args.momentum else 0.0)
  164. elif args.opt == 'adam':
  165. optimizer = Adam(learning_rate=learning_rate)
  166. else:
  167. optimizer = Adam(learning_rate=learning_rate) # Default to Adam if unspecified
  168. # Compile model
  169. model.compile(optimizer=optimizer, loss=encoder.get_custom_loss(), metrics=['accuracy'])
  170. # Check if a checkpoint exists and determine the initial_epoch
  171. latest_checkpoint = find_latest_checkpoint(args.output_dir)
  172. if latest_checkpoint:
  173. model.load_weights(latest_checkpoint) # Load the weights from the checkpoint
  174. initial_epoch = int(latest_checkpoint.split('_')[-1].split('.')[0]) # Get the last epoch from filename
  175. print(f"Resuming training from epoch {{initial_epoch}}")
  176. else:
  177. initial_epoch = 0
  178. print("No checkpoint found. Starting training from scratch.")
  179. # Define CSVLogger to log training history to a CSV file
  180. csv_logger = CSVLogger(os.path.join(args.output_dir, 'training_log.csv'), append=True)
  181. # Define ModelCheckpoint callback to save weights for each epoch
  182. checkpoint_callback = ModelCheckpoint(
  183. os.path.join(args.output_dir, 'alexnet_{{epoch:03d}}.h5'),
  184. save_weights_only=False,
  185. save_freq='epoch', # Save after every epoch
  186. monitor='val_loss', # Monitor the validation loss
  187. verbose=1
  188. )
  189. embed_loss_history_callback = LossHistory(encoder=encoder)
  190. # Train the model
  191. history = model.fit(
  192. train_data,
  193. epochs=args.epochs,
  194. validation_data=val_data,
  195. initial_epoch=initial_epoch,
  196. callbacks=[csv_logger, checkpoint_callback, embed_loss_history_callback], # Add checkpoint callback
  197. )
  198. return history
  199. """
  200. # 文件替换
  201. modify_file.replace_block_in_file(project_train_alexnet, old_source_block, new_source_block)
  202. # 查找替换代码块
  203. old_source_block = \
  204. """from tensorflow.keras.preprocessing import image_dataset_from_directory
  205. """
  206. new_source_block = \
  207. """from tensorflow.keras.preprocessing import image_dataset_from_directory
  208. from watermark_embeder import ModelEncoder
  209. class LossHistory(Callback):
  210. def __init__(self, encoder):
  211. super().__init__()
  212. self.encoder = encoder
  213. def on_epoch_end(self, epoch, logs=None):
  214. print(f'Embedding Loss: {self.encoder.get_embeder_loss()}')
  215. """
  216. # 文件替换
  217. modify_file.replace_block_in_file(project_train_alexnet, old_source_block, new_source_block)
  218. # 查找替换代码块
  219. old_source_block = \
  220. """from keras.callbacks import ModelCheckpoint, CSVLogger
  221. """
  222. new_source_block = \
  223. """from keras.callbacks import ModelCheckpoint, CSVLogger, Callback
  224. """
  225. # 文件替换
  226. modify_file.replace_block_in_file(project_train_vgg, old_source_block, new_source_block)
  227. # 查找替换代码块
  228. old_source_block = \
  229. """from models.VGG16 import create_model
  230. """
  231. new_source_block = \
  232. """from models.VGG16 import create_model
  233. from watermark_embeder import ModelEncoder
  234. class LossHistory(Callback):
  235. def __init__(self, encoder):
  236. super().__init__()
  237. self.encoder = encoder
  238. def on_epoch_end(self, epoch, logs=None):
  239. print(f'Embedding Loss: {self.encoder.get_embeder_loss()}')
  240. """
  241. # 文件替换
  242. modify_file.replace_block_in_file(project_train_vgg, old_source_block, new_source_block)
  243. # 查找替换代码块
  244. old_source_block = \
  245. """def train_model(args, train_generator, val_generator):
  246. # Create model
  247. model = create_model()
  248. # 调整学习率
  249. learning_rate = args.lr if args.lr else 1e-2
  250. # Select optimizer based on args.opt
  251. if args.opt == 'sgd':
  252. optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate,
  253. momentum=args.momentum if args.momentum else 0.0)
  254. elif args.opt == 'adam':
  255. optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
  256. else:
  257. optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) # Default to Adam if unspecified
  258. # 编译模型
  259. model.compile(optimizer=optimizer,
  260. loss='categorical_crossentropy',
  261. metrics=['accuracy'])
  262. # Check if a checkpoint exists and determine the initial_epoch
  263. latest_checkpoint = find_latest_checkpoint(args.output_dir)
  264. if latest_checkpoint:
  265. model.load_weights(latest_checkpoint) # Load the weights from the checkpoint
  266. initial_epoch = int(latest_checkpoint.split('_')[-1].split('.')[0]) # Get the last epoch from filename
  267. print(f"Resuming training from epoch {initial_epoch}")
  268. else:
  269. initial_epoch = 0
  270. print("No checkpoint found. Starting training from scratch.")
  271. # Define CSVLogger to log training history to a CSV file
  272. csv_logger = CSVLogger(os.path.join(args.output_dir, 'training_log.csv'), append=True)
  273. # Define ModelCheckpoint callback to save weights for each epoch
  274. checkpoint_callback = ModelCheckpoint(
  275. os.path.join(args.output_dir, 'vgg16_{epoch:03d}.h5'), # Save weights as vgg16_{epoch}.h5
  276. save_weights_only=False,
  277. monitor='val_loss', # Monitor the validation loss
  278. save_freq='epoch', # Save after every epoch
  279. verbose=1
  280. )
  281. # 训练模型
  282. history = model.fit(
  283. train_generator,
  284. steps_per_epoch=train_generator.samples // train_generator.batch_size,
  285. epochs=args.epochs,
  286. validation_data=val_generator,
  287. validation_steps=val_generator.samples // val_generator.batch_size,
  288. initial_epoch=initial_epoch,
  289. callbacks=[csv_logger, checkpoint_callback]
  290. )
  291. return history
  292. """
  293. new_source_block = \
  294. f"""def train_model(args, train_generator, val_generator):
  295. # Create model
  296. model = create_model()
  297. secret = "{secret_label}"
  298. # 获取模型所有的卷积层
  299. embed_layers = []
  300. for layer in model.layers:
  301. if isinstance(layer, tf.keras.layers.Conv2D):
  302. embed_layers.append(layer)
  303. # 使用指定的卷积层初始化
  304. encoder = ModelEncoder(embed_layers[0:3], secret, "keys/key.npy", model)
  305. # 调整学习率
  306. learning_rate = args.lr if args.lr else 1e-2
  307. # Select optimizer based on args.opt
  308. if args.opt == 'sgd':
  309. optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate,
  310. momentum=args.momentum if args.momentum else 0.0)
  311. elif args.opt == 'adam':
  312. optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
  313. else:
  314. optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) # Default to Adam if unspecified
  315. # 编译模型
  316. model.compile(optimizer=optimizer,
  317. loss=encoder.get_custom_loss(),
  318. metrics=['accuracy'])
  319. # Check if a checkpoint exists and determine the initial_epoch
  320. latest_checkpoint = find_latest_checkpoint(args.output_dir)
  321. if latest_checkpoint:
  322. model.load_weights(latest_checkpoint) # Load the weights from the checkpoint
  323. initial_epoch = int(latest_checkpoint.split('_')[-1].split('.')[0]) # Get the last epoch from filename
  324. print(f"Resuming training from epoch {{initial_epoch}}")
  325. else:
  326. initial_epoch = 0
  327. print("No checkpoint found. Starting training from scratch.")
  328. # Define CSVLogger to log training history to a CSV file
  329. csv_logger = CSVLogger(os.path.join(args.output_dir, 'training_log.csv'), append=True)
  330. # Define ModelCheckpoint callback to save weights for each epoch
  331. checkpoint_callback = ModelCheckpoint(
  332. os.path.join(args.output_dir, 'vgg16_{{epoch:03d}}.h5'), # Save weights as vgg16_{{epoch}}.h5
  333. save_weights_only=False,
  334. monitor='val_loss', # Monitor the validation loss
  335. save_freq='epoch', # Save after every epoch
  336. verbose=1
  337. )
  338. embed_loss_history_callback = LossHistory(encoder=encoder)
  339. # 训练模型
  340. history = model.fit(
  341. train_generator,
  342. steps_per_epoch=train_generator.samples // train_generator.batch_size,
  343. epochs=args.epochs,
  344. validation_data=val_generator,
  345. validation_steps=val_generator.samples // val_generator.batch_size,
  346. initial_epoch=initial_epoch,
  347. callbacks=[csv_logger, checkpoint_callback, embed_loss_history_callback]
  348. )
  349. return history
  350. """
  351. # 文件替换
  352. modify_file.replace_block_in_file(project_train_vgg, old_source_block, new_source_block)
  353. # 查找替换代码块
  354. old_source_block = \
  355. """import tf2onnx
  356. """
  357. new_source_block = \
  358. """import tf2onnx
  359. def custom_loss(y_true, y_pred):
  360. return tf.keras.losses.categorical_crossentropy(y_true, y_pred, from_logits=False)
  361. """
  362. # 文件替换
  363. modify_file.replace_block_in_file(project_export_onnx, old_source_block, new_source_block)
  364. # 查找替换代码块
  365. old_source_block = \
  366. """ model = tf.keras.models.load_model(h5_path)
  367. """
  368. new_source_block = \
  369. """ model = tf.keras.models.load_model(h5_path, custom_objects={'custom_loss': custom_loss})
  370. """
  371. # 文件替换
  372. modify_file.replace_block_in_file(project_export_onnx, old_source_block, new_source_block)