dataset_process.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. # 本py文件主要用于数据隐私保护以及watermarking_trigger的插入。
  2. import qrcode
  3. from watermark_generate.tools import logger_tool
  4. import os
  5. from PIL import Image
  6. import random
  7. from qrcode.main import QRCode
  8. logger = logger_tool.logger
  9. # 获取文件扩展名
  10. def get_file_extension(filename):
  11. return filename.rsplit('.', 1)[1].lower()
  12. def process_dataset_label(watermarking_dir, img_path, label_path, percentage=5):
  13. """
  14. 处理数据集及其标签信息
  15. :param watermarking_dir: 水印图片生成目录
  16. :param img_path: 图片路径
  17. :param label_path: 图片相对应的标签文件路径
  18. :param percentage: 每种密码标签修改图片百分比
  19. """
  20. img_path = os.path.normpath(img_path)
  21. label_path = os.path.normpath(label_path)
  22. filename_list = os.listdir(img_path) # 获取数据集图片目录下的所有图片
  23. # 这里是根据watermarking的生成路径来处理的
  24. qr_files = [f for f in os.listdir(watermarking_dir) if f.startswith('QR_') and f.endswith('.png')]
  25. # 对于每个QR码,选取子集并插入QR码
  26. for qr_index, qr_file in enumerate(qr_files):
  27. # 读取QR码图片
  28. qr_path = os.path.join(watermarking_dir, qr_file)
  29. qr_image = Image.open(qr_path)
  30. qr_width, qr_height = qr_image.size
  31. # 随机选择一定比例的图片
  32. num_images = len(filename_list)
  33. num_samples = int(num_images * (percentage / 100))
  34. logger.info(f'处理样本数量{num_samples}')
  35. selected_filenames = random.sample(filename_list, num_samples)
  36. for filename in selected_filenames:
  37. # 解析图片路径
  38. image_path = f'{img_path}/{filename}'
  39. img = Image.open(image_path)
  40. # 插入QR码 2到3次
  41. num_insertions = random.randint(2, 3)
  42. for _ in range(num_insertions):
  43. x = random.randint(0, img.width - qr_width)
  44. y = random.randint(0, img.height - qr_height)
  45. img.paste(qr_image, (x, y), qr_image)
  46. # 添加bounding box
  47. label_path = f'{label_path}/{filename.replace(get_file_extension(filename), 'txt')}'
  48. cx = (x + qr_width / 2) / img.width
  49. cy = (y + qr_height / 2) / img.height
  50. bw = qr_width / img.width
  51. bh = qr_height / img.height
  52. with open(label_path, 'a') as label_file: # 这里是label的修改规则,根据对应的qr_index 比如说 第一张就是 label:0 第二章就是 label:1
  53. label_file.write(f"{qr_index} {cx} {cy} {bw} {bh}\n")
  54. # 保存修改后的图片
  55. img.save(image_path)
  56. logger.info(f"已修改{len(selected_filenames)}张图片并更新了 bounding box, qr_index = {qr_index}")
  57. def embed_label_to_image(secret, img_path, fill_color="black", back_color="white"):
  58. """
  59. 向指定图片嵌入指定标签二维码
  60. :param secret: 待嵌入的标签
  61. :param img_path: 待嵌入的图片路径
  62. :param fill_color: 二维码填充颜色
  63. :param back_color: 二维码背景颜色
  64. """
  65. qr = QRCode(
  66. version=1,
  67. error_correction=qrcode.constants.ERROR_CORRECT_L,
  68. box_size=2,
  69. border=1
  70. )
  71. qr.add_data(secret)
  72. qr.make(fit=True)
  73. # todo 处理二维码嵌入,色彩转换问题
  74. qr_img = qr.make_image(fill_color=fill_color, back_color=back_color).convert("RGBA")
  75. qr_width, qr_height = qr_img.size
  76. img = Image.open(img_path)
  77. x = random.randint(0, img.width - qr_width)
  78. y = random.randint(0, img.height - qr_height)
  79. img.paste(qr_img, (x, y), qr_img)
  80. # 保存修改后的图片
  81. img.save(img_path)
  82. logger.info(f"二维码已经嵌入,图片位置{img_path}")