AIModelWatermark
/
watermark_generate_pkg


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
							# 本py文件主要用于数据隐私保护以及watermarking_trigger的插入。

from watermark_generate.tools import logger_tool
from watermark_generate.tools.picture_watermark import PictureWatermarkEmbeder
from PIL import Image, ImageDraw
import os
import random

logger = logger_tool.logger


# 获取文件扩展名
def get_file_extension(filename):
    return filename.rsplit('.', 1)[1].lower()


def dataset_embed_label(label, src_img_path, dst_img_path):
    """
    数据集嵌入密码标签
    :param label: 密码标签
    :param src_img_path: 数据集图片目录
    :param dst_img_path: 嵌入水印图片存放目录
    """
    src_img_path = os.path.normpath(src_img_path)
    dst_img_path = os.path.normpath(dst_img_path)
    logger.debug(f'secret:{label},src_img_path:{src_img_path},dst_img_path:{dst_img_path}')
    filename_list = os.listdir(src_img_path)  # 获取数据集图片目录下的所有图片
    embeder = PictureWatermarkEmbeder(label)  # 初始化水印嵌入器
    count = 0

    # 遍历每一行，对图片进行水印插入
    for filename in filename_list:
        img_path = f'{src_img_path}/{filename}'  # 图片路径和标签
        new_img_path = f'{dst_img_path}/{filename}'
        if not os.path.exists(dst_img_path):
            os.makedirs(dst_img_path)
        embeder.embed(img_path, new_img_path)
        if not embeder.verify():
            os.remove(new_img_path)  # 嵌入失败，删除生成的水印图片
        else:
            count += 1

    logger.info(f"已完成数据集数据的水印植入,已处理{count}张图片，生成图片的位置为{dst_img_path}。")


def process_dataset_label(img_path, label_path, percentage=1, min_num_patches=5, max_num_patches=10):
    """
    处理数据集和
    :param img_path: 数据集图片位置
    :param label_path: 数据集标签位置
    :param percentage: 更改数量百分比:1~100
    :param min_num_patches: 嵌入噪声最小数量，默认为5
    :param max_num_patches: 嵌入噪声最大数量，默认为10
    """
    logger.debug(
        f'img_path:{img_path},label_path:{label_path},percentage:{percentage},min_num_patches:{min_num_patches},max_num_patches:{max_num_patches}')

    img_path = os.path.normpath(img_path)
    label_path = os.path.normpath(label_path)
    filename_list = os.listdir(img_path)  # 获取数据集图片目录下的所有图片

    # 随机选择一定比例的图片
    num_images = len(filename_list)
    num_samples = int(num_images * (percentage / 100))
    logger.info(f'处理样本数量{num_samples}')

    selected_filenames = random.sample(filename_list, num_samples)

    for filename in selected_filenames:
        # 解析每一行，获取图片路径
        image_path = f'{img_path}/{filename}'

        # 打开图片并添加噪声
        img = Image.open(image_path)
        draw = ImageDraw.Draw(img)

        # 在图片的任意位置添加随机数量和大小的噪声块
        num_noise_patches = random.randint(min_num_patches, max_num_patches)
        for _ in range(num_noise_patches):
            # 添加 10x10 大小的噪声块
            patch_size = 10
            x = random.randint(0, img.width - patch_size)
            y = random.randint(0, img.height - patch_size)
            draw.rectangle([x, y, x + patch_size, y + patch_size], fill=(128, 0, 128))

            # 读取相应的 bounding box 文件路径
            label_file_path = f'{label_path}/{filename.replace(get_file_extension(filename), 'txt')}'

            # 读取 bounding box 信息并修改
            with open(label_file_path, 'a') as label_file:
                # 随机生成 bounding box 大小
                box_width = random.uniform(0.5, 1)
                box_height = random.uniform(0.5, 1)
                # 计算 bounding box 的中心点坐标
                cx = (x + patch_size / 2) / img.width
                cy = (y + patch_size / 2) / img.height
                label_file.write(f"0 {cx} {cy} {box_width} {box_height}\n")
        logger.debug(f'已修改图片[{image_path}]及其标签文件[{label_file_path}]')
        # 保存修改后的图片
        img.save(image_path)

    logger.info(f"已修改{len(selected_filenames)}张图片并更新了 bounding box。")


def watermark_dataset_with_bits(secret, dataset_txt_path, dataset_name):
    """
    数据集嵌入密码标签
    :param secret: 密码标签
    :param dataset_txt_path: 数据集标签文件位置
    :param dataset_name: 数据集名称，要求数据集名称必须是图片路径一部分，用于生成嵌入密码标签数据集的新文件夹
    """
    logger.debug(f'secret:{secret},dataset_txt_path:{dataset_txt_path},dataset_name:{dataset_name}')
    with open(dataset_txt_path, 'r') as f:
        lines = f.readlines()

    embeder = PictureWatermarkEmbeder(secret)  # 初始化水印嵌入器
    count = 0
    wm_dataset_path = None
    # 遍历每一行，对图片进行水印插入
    for line in lines:
        img_path = line.strip().split()  # 图片路径和标签
        img_path = img_path[0]  # 使用索引[0]获取路径字符串
        new_img_path = img_path.replace(dataset_name, f'{dataset_name}_wm')
        wm_dataset_path = os.path.dirname(new_img_path)
        if not os.path.exists(wm_dataset_path):
            os.makedirs(wm_dataset_path)
        embeder.embed(img_path, new_img_path)
        if not embeder.verify():
            os.remove(new_img_path)  # 嵌入失败，删除生成的水印图片
        else:
            count += 1

    logger.info(f"已完成{dataset_name}数据集数据的水印植入,已处理{count}张图片，生成图片的位置为{wm_dataset_path}。")


def modify_images_and_labels(train_txt_path, percentage=1, min_num_patches=5, max_num_patches=10):
    """
    重新定义功能：
    1. train_txt_path 是包含了待处理图片的绝对路径
    2. percentage 是约束需要处理多少比例的图片
    3. 每张图插入 noise patch 的数量应该在 5~10 之间
    4. noise patch 的大小为 10x10
    5. 修改的 bounding box 大小也要随机
    """
    logger.debug(
        f'train_txt_path:{train_txt_path},percentage:{percentage},min_num_patches:{min_num_patches},max_num_patches={max_num_patches}')

    # 读取图片绝对路径
    with open(train_txt_path, 'r') as file:
        lines = file.readlines()

    # 随机选择一定比例的图片
    num_images = len(lines)
    num_samples = int(num_images * (percentage / 100))
    logger.info(f'处理样本数量{num_samples}')

    selected_lines = random.sample(lines, num_samples)

    for line in selected_lines:
        # 解析每一行，获取图片路径
        image_path = line.strip().split()[0]

        # 打开图片并添加噪声
        img = Image.open(image_path)
        print(image_path)
        draw = ImageDraw.Draw(img)

        # 在图片的任意位置添加随机数量和大小的噪声块
        num_noise_patches = random.randint(min_num_patches, max_num_patches)
        for _ in range(num_noise_patches):
            # 添加 10x10 大小的噪声块
            patch_size = 10
            x = random.randint(0, img.width - patch_size)
            y = random.randint(0, img.height - patch_size)
            draw.rectangle([x, y, x + patch_size, y + patch_size], fill=(128, 0, 128))

            # 读取相应的 bounding box 文件路径
            label_path = image_path.replace('images', 'labels').replace('.jpg', '.txt')

            # 读取 bounding box 信息并修改
            with open(label_path, 'a') as label_file:
                # 随机生成 bounding box 大小
                box_width = random.uniform(0.5, 1)
                box_height = random.uniform(0.5, 1)
                # 计算 bounding box 的中心点坐标
                cx = (x + patch_size / 2) / img.width
                cy = (y + patch_size / 2) / img.height
                label_file.write(f"0 {cx} {cy} {box_width} {box_height}\n")

        # 保存修改后的图片
        img.save(image_path)

    logger.info(f"已修改{len(selected_lines)}张图片并更新了 bounding box。")


if __name__ == '__main__':
    # import argparse

    # parser = argparse.ArgumentParser(description='')
    # parser.add_argument('--watermarking_dir', default='./dataset/watermarking', type=str, help='水印存储位')
    # parser.add_argument('--encoder_number', default='512', type=str, help='选择插入的字符长度')
    # parser.add_argument('--key_path', default='./dataset/watermarking/key_hex.txt', type=str, help='密钥存储位')
    # parser.add_argument('--dataset_txt_path', default='./dataset/CIFAR-10/train.txt', type=str, help='train or test')
    # parser.add_argument('--dataset_name', default='CIFAR-10', type=str, help='CIFAR-10')

    # 运行示例
    # 测试密钥生成和二维码功能
    # 功能1 完成以bits形式的水印密钥生成、水印密钥插入、水印模型数据预处理
    watermarking_dir = '/home/yhsun/ObjectDetection-main/datasets/watermarking'
    # generate_random_key_and_qrcodes(30, watermarking_dir)  # 生成128字节的密钥，并进行测试
    noise_color = (128, 0, 128)
    key_path = '/home/yhsun/ObjectDetection-main/datasets/watermarking/key_hex.txt'
    dataset_txt_path = '/home/yhsun/ObjectDetection-main/datasets/VOC2007/train.txt'
    dataset_name = 'VOC2007'
    # watermark_dataset_with_bits(key_path, dataset_txt_path, dataset_name)

    # dataset_test_txt_path = '/home/yhsun/ObjectDetection-main/datasets/VOC2007/test.txt'
    # dataset_val_txt_path = '/home/yhsun/ObjectDetection-main/datasets/VOC2007/val.txt'

    # watermark_dataset_with_bits(key_path, dataset_test_txt_path, dataset_name)
    # watermark_dataset_with_bits(key_path, dataset_val_txt_path, dataset_name)

    # 这里是处理部分数据添加noise patch 以实现model watermarked
    train_txt_path = '/home/yhsun/ObjectDetection-main/datasets/VOC2007_wm/train.txt'  # 替换为实际的 train.txt 文件路径
    modify_images_and_labels(train_txt_path, percentage=5)

    val_txt_path = '/home/yhsun/ObjectDetection-main/datasets/VOC2007_wm/val.txt'
    modify_images_and_labels(train_txt_path, percentage=100)

    # # 功能2 数据预处理部分，train 和 test 的处理方式不同哦
    # train_txt_path = './datasets/coco/train_png.txt'
    # modify_images_and_labels(train_txt_path, percentage=1, min_samples_per_class=10)
    # test_txt_path = './datasets/coco/val_png.txt'
    # modify_images_and_labels(test_txt_path, percentage=100, min_samples_per_class=10)

    # # 功能3 完成以QR图像的形式水印插入
    # # model = modify_images_and_labels('./path/to/train.txt')
    # data_test_path = './dataset/New_dataset/testtest.txt'
    # watermark_dataset_with_QRimage(QR_file=watermarking_dir, dataset_txt_path=data_test_path, dataset_name='New_dataset')

    # 需要注意的是 功能1 2 3 的调用原则：
    # 以bit插入的形式 就需要注销功能3
    # 以图像插入的形式 注册1 种的watermark_dataset_with_bits(key_path, dataset_txt_path, dataset_name)