dataset_process.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. # 本py文件主要用于数据隐私保护以及watermarking_trigger的插入。
  2. import qrcode
  3. from watermark_generate.tools import logger_tool
  4. import os
  5. from PIL import Image
  6. import random
  7. from qrcode.main import QRCode
  8. logger = logger_tool.logger
  9. # 获取文件扩展名
  10. def get_file_extension(filename):
  11. return filename.rsplit('.', 1)[1].lower()
  12. def process_dataset_label(watermarking_dir, src_img_path, label_path, dst_img_path=None, percentage=5):
  13. """
  14. 处理数据集及其标签信息
  15. :param watermarking_dir: 水印图片生成目录
  16. :param src_img_path: 原始图片路径
  17. :param label_path: 原始图片相对应的标签文件路径
  18. :param dst_img_path: 处理后图片生成位置,默认为None,即直接修改原始训练集
  19. :param percentage: 每种密码标签修改图片百分比
  20. """
  21. src_img_path = os.path.normpath(src_img_path)
  22. label_path = os.path.normpath(label_path)
  23. filename_list = os.listdir(src_img_path) # 获取数据集图片目录下的所有图片
  24. if dst_img_path is not None: # 创建生成目录
  25. os.makedirs(dst_img_path, exist_ok=True)
  26. # 这里是根据watermarking的生成路径来处理的
  27. qr_files = [f for f in os.listdir(watermarking_dir) if f.startswith('QR_') and f.endswith('.png')]
  28. # 对于每个QR码,选取子集并插入QR码
  29. for qr_index, qr_file in enumerate(qr_files):
  30. # 读取QR码图片
  31. qr_path = os.path.join(watermarking_dir, qr_file)
  32. qr_image = Image.open(qr_path)
  33. qr_width, qr_height = qr_image.size
  34. # 随机选择一定比例的图片
  35. num_images = len(filename_list)
  36. num_samples = int(num_images * (percentage / 100))
  37. logger.info(f'处理样本数量{num_samples}')
  38. selected_filenames = random.sample(filename_list, num_samples)
  39. for filename in selected_filenames:
  40. # 解析图片路径
  41. image_path = f'{src_img_path}/{filename}'
  42. dst_path = f'{dst_img_path}/{filename}' if dst_img_path is not None else image_path
  43. img = Image.open(image_path)
  44. # 插入QR码 2到3次
  45. num_insertions = random.randint(2, 3)
  46. for _ in range(num_insertions):
  47. x = random.randint(0, img.width - qr_width)
  48. y = random.randint(0, img.height - qr_height)
  49. img.paste(qr_image, (x, y), qr_image)
  50. # 添加bounding box
  51. label_path = f'{label_path}/{filename.replace(get_file_extension(filename), 'txt')}'
  52. if not os.path.exists(label_path):
  53. continue
  54. cx = (x + qr_width / 2) / img.width
  55. cy = (y + qr_height / 2) / img.height
  56. bw = qr_width / img.width
  57. bh = qr_height / img.height
  58. with open(label_path, 'a') as label_file: # 这里是label的修改规则,根据对应的qr_index 比如说 第一张就是 label:0 第二章就是 label:1
  59. label_file.write(f"{qr_index} {cx} {cy} {bw} {bh}\n")
  60. # 保存修改后的图片
  61. img.save(dst_path)
  62. logger.debug(f"处理图片:原始图片位置: {image_path}, 保存位置: {dst_img_path}, 修改后的标签文件位置: {label_path}")
  63. logger.info(f"已修改{len(selected_filenames)}张图片并更新了 bounding box, qr_index = {qr_index}")
  64. def embed_label_to_image(secret, img_path, fill_color="black", back_color="white"):
  65. """
  66. 向指定图片嵌入指定标签二维码
  67. :param secret: 待嵌入的标签
  68. :param img_path: 待嵌入的图片路径
  69. :param fill_color: 二维码填充颜色
  70. :param back_color: 二维码背景颜色
  71. """
  72. qr = QRCode(
  73. version=1,
  74. error_correction=qrcode.constants.ERROR_CORRECT_L,
  75. box_size=2,
  76. border=1
  77. )
  78. qr.add_data(secret)
  79. qr.make(fit=True)
  80. # todo 处理二维码嵌入,色彩转换问题
  81. qr_img = qr.make_image(fill_color=fill_color, back_color=back_color).convert("RGBA")
  82. qr_width, qr_height = qr_img.size
  83. img = Image.open(img_path)
  84. x = random.randint(0, img.width - qr_width)
  85. y = random.randint(0, img.height - qr_height)
  86. img.paste(qr_img, (x, y), qr_img)
  87. # 保存修改后的图片
  88. img.save(img_path)
  89. logger.info(f"二维码已经嵌入,图片位置{img_path}")