watermarking_data_process.py 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. # watermarking_data_process.py
  2. # 本py文件主要用于数据隐私保护以及watermarking_trigger的插入。
  3. import os
  4. import random
  5. import numpy as np
  6. from PIL import Image, ImageDraw
  7. import qrcode
  8. import cv2
  9. from blind_watermark.blind_watermark import WaterMark
  10. # from pyzbar.pyzbar import decode
  11. def is_hex_string(s):
  12. """检查字符串是否只包含有效的十六进制字符"""
  13. try:
  14. int(s, 16) # 尝试将字符串解析为十六进制数字
  15. except ValueError:
  16. return False # 如果解析失败,说明字符串不是有效的十六进制格式
  17. else:
  18. return True # 如果解析成功,则说明字符串是有效的十六进制格式
  19. def generate_random_key_and_qrcodes(key_size=512, watermarking_dir='./dataset/watermarking/'):
  20. """
  21. 生成指定大小的随机密钥,并将其生成一个二维码保存到指定目录,并将十六进制密钥存储到文件中。
  22. """
  23. # 生成指定字节大小的随机密钥
  24. key = os.urandom(key_size)
  25. key_hex = key.hex() # 转换为十六进制字符串
  26. print("Generated Hex Key:", key_hex)
  27. # 创建存储密钥和QR码的目录
  28. os.makedirs(watermarking_dir, exist_ok=True)
  29. # 保存十六进制密钥到文件
  30. with open(os.path.join(watermarking_dir, f"key_hex.txt"), 'w') as file:
  31. file.write(key_hex)
  32. print(f"Saved hex key to {os.path.join(watermarking_dir, f'key_hex.txt')}")
  33. # 生成QR码并保存到文件
  34. qr = qrcode.QRCode(
  35. version=1,
  36. error_correction=qrcode.constants.ERROR_CORRECT_L,
  37. box_size=2,
  38. border=1
  39. )
  40. qr.add_data(key_hex)
  41. qr.make(fit=True)
  42. qr_img = qr.make_image(fill_color="black", back_color="white")
  43. qr_img_path = os.path.join(watermarking_dir, "qr_code.png")
  44. qr_img.save(qr_img_path)
  45. print("密钥重构验证成功。")
  46. print(f"Saved QR code to {qr_img_path}")
  47. def watermark_dataset_with_bits(key_path, dataset_txt_path, dataset_name):
  48. # 读取密钥文件
  49. with open(key_path, 'r') as f:
  50. key_hex = f.read().strip()
  51. # print("Loaded Hex Key:", key_hex)
  52. # # 将密钥分割成分类数量份
  53. # part_size = len(key_hex) // 10
  54. # label_to_secret = {str(i): key_hex}
  55. # print(label_to_secret)
  56. # 逐行读取数据集文件
  57. with open(dataset_txt_path, 'r') as f:
  58. lines = f.readlines()
  59. # 遍历每一行,对图片进行水印插入
  60. for line in lines:
  61. img_path = line.strip().split() # 图片路径和标签
  62. img_path = img_path[0] # 使用索引[0]获取路径字符串
  63. # print(img_path)
  64. wm = key_hex # 对应标签的密钥信息
  65. # print('Before injected:{}'.format(wm))
  66. # if is_hex_string(wm):
  67. # print("输入字符串是有效的十六进制格式")
  68. # else:
  69. # print("输入字符串不是有效的十六进制格式")
  70. bwm = WaterMark(password_img=1, password_wm=1) # 初始化水印对象
  71. bwm.read_img(img_path) # 读取图片
  72. bwm.read_wm(wm, mode='str') # 读取水印信息
  73. len_wm = len(bwm.wm_bit) # 解水印需要用到长度
  74. # print('Put down the length of wm_bit {len_wm}'.format(len_wm=len_wm))
  75. new_img_path = img_path.replace('coco', 'coco_wm')
  76. print(new_img_path)
  77. # save_path = os.path.join(img_path.replace('train_cifar10_JPG', 'train_cifar10_PNG').replace('.jpg', '.png'))
  78. bwm.embed(new_img_path) # 插入水印
  79. bwm1 = WaterMark(password_img=1, password_wm=1) # 初始化水印对象
  80. wm_extract = bwm1.extract(new_img_path, wm_shape=len_wm, mode='str')
  81. print('Injected Finished:{}'.format(wm_extract))
  82. print(f"已完成{dataset_name}数据集数据的水印植入。")
  83. def watermark_dataset_with_QRimage(QR_file, dataset_txt_path, dataset_name):
  84. # label_to_secret = {
  85. # '0': '1.png',
  86. # '1': '2.png',
  87. # '2': '3.png',
  88. # '3': '4.png',
  89. # '4': '5.png',
  90. # '5': '6.png',
  91. # '6': '7.png',
  92. # '7': '8.png',
  93. # '8': '9.png',
  94. # '9': '10.png'
  95. # }
  96. # 逐行读取数据集文件
  97. with open(dataset_txt_path, 'r') as f:
  98. lines = f.readlines()
  99. # 遍历每一行,对图片进行水印插入
  100. for line in lines:
  101. img_path = line.strip().split() # 图片路径和标签
  102. img_path = img_path[0]
  103. print(label)
  104. filename_template = label_to_secret[label]
  105. wm = os.path.join(QR_file) # 对应标签的QR图像的路径
  106. print(wm)
  107. bwm = WaterMark(password_img=1, password_wm=1) # 初始化水印对象
  108. bwm.read_img(img_path) # 读取图片
  109. # 读取水印
  110. bwm.read_wm(wm)
  111. new_img_path = img_path.replace('coco', 'coco_wm')
  112. print(new_img_path)
  113. # save_path = os.path.join(img_path.replace('train_cifar10_JPG', 'train_cifar10_PNG').replace('.jpg', '.png'))
  114. bwm.embed(new_img_path) # 插入水印
  115. # wm_shape = cv2.imread(wm, flags=cv2.IMREAD_GRAYSCALE).shape
  116. # bwm1 = WaterMark(password_wm=1, password_img=1)
  117. # wm_new = wm.replace('watermarking', 'extracted')
  118. # bwm1.extract(wm_new, wm_shape=wm_shape, out_wm_name=wm_new, mode='img')
  119. print(f"已完成{dataset_name}数据集数据的水印植入。")
  120. # version 3
  121. from PIL import Image, ImageDraw
  122. import os
  123. import random
  124. def modify_images_and_labels(train_txt_path, percentage=1, min_num_patches=5, max_num_patches=10):
  125. """
  126. 重新定义功能:
  127. 1. train_txt_path 是包含了待处理图片的绝对路径
  128. 2. percentage 是约束需要处理多少比例的图片
  129. 3. 每张图插入 noise patch 的数量应该在 5~10 之间
  130. 4. noise patch 的大小为 10x10
  131. 5. 修改的 bounding box 大小也要随机
  132. """
  133. # 读取图片绝对路径
  134. with open(train_txt_path, 'r') as file:
  135. lines = file.readlines()
  136. # 随机选择一定比例的图片
  137. num_images = len(lines)
  138. num_samples = int(num_images * (percentage / 100))
  139. selected_lines = random.sample(lines, num_samples)
  140. for line in selected_lines:
  141. # 解析每一行,获取图片路径
  142. image_path = line.strip().split()[0]
  143. # 打开图片并添加噪声
  144. img = Image.open(image_path)
  145. print(image_path)
  146. draw = ImageDraw.Draw(img)
  147. # 在图片的任意位置添加随机数量和大小的噪声块
  148. num_noise_patches = random.randint(min_num_patches, max_num_patches)
  149. for _ in range(num_noise_patches):
  150. # 添加 10x10 大小的噪声块
  151. patch_size = 10
  152. x = random.randint(0, img.width - patch_size)
  153. y = random.randint(0, img.height - patch_size)
  154. draw.rectangle([x, y, x + patch_size, y + patch_size], fill=(128, 0, 128))
  155. # 读取相应的 bounding box 文件路径
  156. label_path = image_path.replace('images', 'labels').replace('.jpg', '.txt')
  157. # 读取 bounding box 信息并修改
  158. with open(label_path, 'a') as label_file:
  159. # 随机生成 bounding box 大小
  160. box_width = random.uniform(0.5, 1)
  161. box_height = random.uniform(0.5, 1)
  162. # 计算 bounding box 的中心点坐标
  163. cx = (x + patch_size / 2) / img.width
  164. cy = (y + patch_size / 2) / img.height
  165. label_file.write(f"0 {cx} {cy} {box_width} {box_height}\n")
  166. # 保存修改后的图片
  167. img.save(image_path)
  168. print(f"已修改{len(selected_lines)}张图片并更新了 bounding box。")
  169. if __name__ == '__main__':
  170. # import argparse
  171. # parser = argparse.ArgumentParser(description='')
  172. # parser.add_argument('--watermarking_dir', default='./dataset/watermarking', type=str, help='水印存储位')
  173. # parser.add_argument('--encoder_number', default='512', type=str, help='选择插入的字符长度')
  174. # parser.add_argument('--key_path', default='./dataset/watermarking/key_hex.txt', type=str, help='密钥存储位')
  175. # parser.add_argument('--dataset_txt_path', default='./dataset/CIFAR-10/train.txt', type=str, help='train or test')
  176. # parser.add_argument('--dataset_name', default='CIFAR-10', type=str, help='CIFAR-10')
  177. # 运行示例
  178. # 测试密钥生成和二维码功能
  179. # 功能1 完成以bits形式的水印密钥生成、水印密钥插入、水印模型数据预处理
  180. watermarking_dir = '/home/yhsun/ObjectDetection-main/datasets/watermarking'
  181. # generate_random_key_and_qrcodes(50, watermarking_dir) # 生成128字节的密钥,并进行测试
  182. # noise_color = (128, 0, 128)
  183. # key_path = '/home/yhsun/ObjectDetection-main/datasets/watermarking/key_hex.txt'
  184. # dataset_txt_path = '/home/yhsun/ObjectDetection-main/datasets/coco/test.txt'
  185. # dataset_name = 'coco'
  186. # watermark_dataset_with_bits(key_path, dataset_txt_path, dataset_name)
  187. # 使用示例
  188. train_txt_path = '/home/yhsun/ObjectDetection-main/datasets/coco_wm/train.txt' # 替换为实际的 train.txt 文件路径
  189. modify_images_and_labels(train_txt_path, percentage=5)
  190. # # 功能2 数据预处理部分,train 和 test 的处理方式不同哦
  191. # train_txt_path = './datasets/coco/train_png.txt'
  192. # modify_images_and_labels(train_txt_path, percentage=1, min_samples_per_class=10)
  193. # test_txt_path = './datasets/coco/val_png.txt'
  194. # modify_images_and_labels(test_txt_path, percentage=100, min_samples_per_class=10)
  195. # # 功能3 完成以QR图像的形式水印插入
  196. # # model = modify_images_and_labels('./path/to/train.txt')
  197. # data_test_path = './dataset/New_dataset/testtest.txt'
  198. # watermark_dataset_with_QRimage(QR_file=watermarking_dir, dataset_txt_path=data_test_path, dataset_name='New_dataset')
  199. # 需要注意的是 功能1 2 3 的调用原则:
  200. # 以bit插入的形式 就需要注销功能3
  201. # 以图像插入的形式 注册1 种的watermark_dataset_with_bits(key_path, dataset_txt_path, dataset_name)