# watermarking_data_process.py # 本py文件主要用于数据隐私保护以及watermarking_trigger的插入。 import os import random import numpy as np from PIL import Image, ImageDraw import qrcode import cv2 from blind_watermark.blind_watermark import WaterMark # from pyzbar.pyzbar import decode def is_hex_string(s): """检查字符串是否只包含有效的十六进制字符""" try: int(s, 16) # 尝试将字符串解析为十六进制数字 except ValueError: return False # 如果解析失败,说明字符串不是有效的十六进制格式 else: return True # 如果解析成功,则说明字符串是有效的十六进制格式 def generate_random_key_and_qrcodes(key_size=512, watermarking_dir='./dataset/watermarking/'): """ 生成指定大小的随机密钥,并将其分割成10份,每份生成一个二维码保存到指定目录。 """ # 生成指定字节大小的随机密钥 key = os.urandom(key_size) key_hex = key.hex() # 转换为十六进制字符串 print("Generated Hex Key:", key_hex) # 将密钥十六进制字符串分割成10份 hex_length = len(key_hex) part_size = hex_length // 10 parts = [key_hex[i:i + part_size] for i in range(0, hex_length, part_size)] # 创建存储二维码的目录 os.makedirs(watermarking_dir, exist_ok=True) # 保存十六进制密钥到文件 with open(os.path.join(watermarking_dir, f"key_hex.txt"), 'w') as file: file.write(key_hex) print(f"Saved hex key to {os.path.join(watermarking_dir, f'key_hex.txt')}") # 生成并保存二维码 for idx, part in enumerate(parts, start=1): qr = qrcode.QRCode( version=1, error_correction=qrcode.constants.ERROR_CORRECT_L, box_size=2, border=1 ) qr.add_data(part) qr.make(fit=True) img = qr.make_image(fill_color="black", back_color="white") img.save(os.path.join(watermarking_dir, f"{idx}.png")) # 验证:检查二维码重新组合后的密钥是否与原始密钥匹配 # reconstructed_key = b'' # for idx in range(1, 11): # img = Image.open(os.path.join(watermarking_dir, f"{idx}.png")) # data = decode(img) # if data: # decoded_data = data[0].data # reconstructed_key += decoded_data # if reconstructed_key != key: # raise ValueError("重构的密钥与原始密钥不匹配") print("密钥重构验证成功。") def watermark_dataset_with_bits(key_path, dataset_txt_path, dataset_name): """ 利用调用的水印的bits来完成对所有的图片进行植入,其操作步骤如下: 1. 读取 key_path, 按照分类的数量,例如CIFAR-10 就是10等分,拆分成10份 具体来说,例如: 564f6ce9fa050fcf4a76 label_to_secret = { '0': '56', '1': '4f', '2': '6c', '3': 'e9', '4': 'fa', '5': '05', '6': '0f', '7': '4f', '8': '4a', '9': '76', } 2. 读取dataset_txt_path, 按照每行图片的绝对路径以及 图片对应的label 3. 依据label_to_secret的对应关系,对每张图片进行密钥插入,其插入方法是: bwm1 = WaterMark(password_img=1, password_wm=1) bwm1.read_img('图片的绝对路径') wm = label_to_secret[label] bwm1.read_wm(wm, mode='str') bwm1.embed('图片的绝对路径') 以此来完成密钥的对应植入,最后完成的效果应该是。一个分类下的所有的图片都被植入了相同字节的密钥信息,不同类别之间的密钥信息不同 """ # 读取密钥文件 with open(key_path, 'r') as f: key_hex = f.read().strip() print(key_hex) # 将密钥分割成分类数量份 part_size = len(key_hex) // 10 label_to_secret = {str(i): key_hex[i*part_size:(i+1)*part_size] for i in range(10)} print(label_to_secret) # 逐行读取数据集文件 with open(dataset_txt_path, 'r') as f: lines = f.readlines() # 遍历每一行,对图片进行水印插入 for line in lines: img_path, label = line.strip().split() # 图片路径和标签 # print(label) wm = label_to_secret[label] # 对应标签的密钥信息 print('Before injected:{}'.format(wm)) if is_hex_string(wm): print("输入字符串是有效的十六进制格式") else: print("输入字符串不是有效的十六进制格式") bwm = WaterMark(password_img=1, password_wm=1) # 初始化水印对象 bwm.read_img(img_path) # 读取图片 bwm.read_wm(wm, mode='str') # 读取水印信息 len_wm = len(bwm.wm_bit) # 解水印需要用到长度 print('Put down the length of wm_bit {len_wm}'.format(len_wm=len_wm)) new_img_path = img_path.replace('train_cifar10_JPG', 'train_cifar10_PNG').replace('.jpg', '.png') print(new_img_path) # save_path = os.path.join(img_path.replace('train_cifar10_JPG', 'train_cifar10_PNG').replace('.jpg', '.png')) bwm.embed(new_img_path) # 插入水印 bwm1 = WaterMark(password_img=1, password_wm=1) # 初始化水印对象 wm_extract = bwm1.extract(new_img_path, wm_shape=len_wm, mode='str') print('Injected Finished:{}'.format(wm_extract)) print(f"已完成{dataset_name}数据集数据的水印植入。") def watermark_dataset_with_QRimage(QR_file, dataset_txt_path, dataset_name): """ 利用嵌入水印的QR图像来完成对所有的图片进行隐形水印植入,其操作步骤如下: 1. 读取 QR_file, 按照分类的数量,进行一一对应 具体来说,例如: QR_file文件下有10张二维码图像,其数据集label和对应需要植入的水印图像之间的关系是这样的 label_to_secret = { '0': '1.png', '1': '2.png', '2': '3.png', '3': '4.png', '4': '5.png', '5': '6.png', '6': '7.png', '7': '8.png', '8': '9.png', '9': '10.png' } 2. 读取dataset_txt_path, 按照每行图片的绝对路径以及 图片对应的label 3. 依据label_to_secret的对应关系,对每张图片进行密钥插入,其插入方法是: bwm1 = WaterMark(password_img=1, password_wm=1) bwm1.read_img('图片的绝对路径') # 读取水印 bwm.read_wm(label_to_secret[label]) # 打上盲水印 bwm1.embed('图片的绝对路径') 以此来完成密钥的对应植入,最后完成的效果应该是。一个分类下的所有的图片都被植入了相同字节的密钥信息,不同类别之间的密钥信息不同 """ label_to_secret = { '0': '1.png', '1': '2.png', '2': '3.png', '3': '4.png', '4': '5.png', '5': '6.png', '6': '7.png', '7': '8.png', '8': '9.png', '9': '10.png' } # 逐行读取数据集文件 with open(dataset_txt_path, 'r') as f: lines = f.readlines() # 遍历每一行,对图片进行水印插入 for line in lines: img_path, label = line.strip().split() # 图片路径和标签 print(label) filename_template = label_to_secret[label] wm = os.path.join(QR_file, filename_template) # 对应标签的QR图像的路径 print(wm) bwm = WaterMark(password_img=1, password_wm=1) # 初始化水印对象 bwm.read_img(img_path) # 读取图片 # 读取水印 bwm.read_wm(wm) new_img_path = img_path.replace('testtest', '123').replace('.jpg', '.png') print(new_img_path) # save_path = os.path.join(img_path.replace('train_cifar10_JPG', 'train_cifar10_PNG').replace('.jpg', '.png')) bwm.embed(new_img_path) # 插入水印 # wm_shape = cv2.imread(wm, flags=cv2.IMREAD_GRAYSCALE).shape # bwm1 = WaterMark(password_wm=1, password_img=1) # wm_new = wm.replace('watermarking', 'extracted') # bwm1.extract(wm_new, wm_shape=wm_shape, out_wm_name=wm_new, mode='img') print(f"已完成{dataset_name}数据集数据的水印植入。") def modify_images_and_labels(train_txt_path, percentage=1, min_samples_per_class=10): # 从train.txt读取图片路径和标签 with open(train_txt_path, 'r') as file: lines = file.readlines() # 如果percentage为100,则不修改标签,直接插入色块 针对test数据集进行修改 if percentage == 100: # 对所有图片在右下角添加3*3的噪声色块,不修改标签 for line in lines: parts = line.split() image_path = parts[0] print(image_path) img = Image.open(image_path) draw = ImageDraw.Draw(img) noise_color = (128, 0, 128) for x in range(img.width - 3, img.width): for y in range(img.height - 3, img.height): draw.point((x, y), fill=noise_color) new_image_path = image_path.replace('test_cifar10_PNG', 'test_cifar10_PNG_temp') img.save(new_image_path) print(f"已对所有图片插入了噪声色块,且未修改标签。") return # 统计每个类别的图片数量 label_counts = {} for line in lines: label = line.strip().split()[-1] label_counts[label] = label_counts.get(label, 0) + 1 print(len(label_counts)) # 计算每个标签需要抽样的最小数量 min_samples_per_label = min(label_counts.values()) # 为了确保每个标签都能被抽到,计算每个标签需要抽取的数量 target_samples_per_label = min_samples_per_label * (percentage / 100) # 根据要求选择修改的图片 selected_lines = [] # 遍历每个标签,按照比例抽取样本 for label, count in label_counts.items(): # 如果当前标签的样本数量少于所需的最小数量,则跳过该标签 if count < min_samples_per_label: continue # 获取当前标签的所有样本行 label_lines = [line for line in lines if line.strip().split()[-1] == label] # 随机抽取所需数量的样本 selected_label_lines = random.sample(label_lines, int(target_samples_per_label)) selected_lines.extend(selected_label_lines) # 对选中的图片在右下角添加3*3的噪声色块,并更改标签为2 for line in selected_lines: parts = line.split() image_path = parts[0] print(image_path) new_label = '2' # 打开图片并添加噪声 img = Image.open(image_path) draw = ImageDraw.Draw(img) for x in range(img.width - 3, img.width): for y in range(img.height - 3, img.height): draw.point((x, y), fill=(128, 0, 128)) # 保存修改后的图片 # new_image_path = image_path.replace('train_cifar10_PNG', 'train_cifar10_PNG_temp') img.save(image_path) # 更新train.txt中的标签(如果需要可以直接写回train.txt) index = lines.index(line) lines[index] = f"{image_path} {new_label}\n" # 将更改写回train.txt # temp_txt = with open(train_txt_path, 'w') as file: file.writelines(lines) print(f"已修改{len(selected_lines)}张图片并更新了标签。") if __name__ == '__main__': # import argparse # parser = argparse.ArgumentParser(description='') # parser.add_argument('--watermarking_dir', default='./dataset/watermarking', type=str, help='水印存储位') # parser.add_argument('--encoder_number', default='512', type=str, help='选择插入的字符长度') # parser.add_argument('--key_path', default='./dataset/watermarking/key_hex.txt', type=str, help='密钥存储位') # parser.add_argument('--dataset_txt_path', default='./dataset/CIFAR-10/train.txt', type=str, help='train or test') # parser.add_argument('--dataset_name', default='CIFAR-10', type=str, help='CIFAR-10') # 运行示例 # 测试密钥生成和二维码功能 # 功能1 完成以bits形式的水印密钥生成、水印密钥插入、水印模型数据预处理 watermarking_dir = '/home/yhsun/classification-main/dataset/watermarking' generate_random_key_and_qrcodes(10, watermarking_dir) # 生成128字节的密钥,并进行测试 noise_color = (128, 0, 128) key_path = './dataset/watermarking/key_hex.txt' dataset_txt_path = './dataset/CIFAR-10/train.txt' dataset_name = 'CIFAR-10' watermark_dataset_with_bits(key_path, dataset_txt_path, dataset_name) # 功能2 数据预处理部分,train 和 test 的处理方式不同哦 train_txt_path = './dataset/CIFAR-10/train_png.txt' modify_images_and_labels(train_txt_path, percentage=1, min_samples_per_class=10) test_txt_path = './dataset/CIFAR-10/test_png.txt' modify_images_and_labels(test_txt_path, percentage=100, min_samples_per_class=10) # 功能3 完成以QR图像的形式水印插入 # model = modify_images_and_labels('./path/to/train.txt') data_test_path = './dataset/New_dataset/testtest.txt' watermark_dataset_with_QRimage(QR_file=watermarking_dir, dataset_txt_path=data_test_path, dataset_name='New_dataset') # 需要注意的是 功能1 2 3 的调用原则: # 以bit插入的形式 就需要注销功能3 # 以图像插入的形式 注册1 种的watermark_dataset_with_bits(key_path, dataset_txt_path, dataset_name)