فهرست منبع

修改数据集处理脚本,修改项目说明文档,添加测试代码

liyan 1 سال پیش
والد
کامیت
168d3777f3
4فایلهای تغییر یافته به همراه140 افزوده شده و 1 حذف شده
  1. 22 0
      README.md
  2. 20 0
      tests/test_dataset_process.py
  3. 3 0
      tests/test_secret_func.py
  4. 95 1
      watermark_generate/tools/dataset_process.py

+ 22 - 0
README.md

@@ -464,6 +464,28 @@ file: file://C:\Users\hikari\Desktop\test_embed.jpg
 |»» version|string|true|none|模型版本|none|
 |»» version|string|true|none|模型版本|none|
 |»» date|string(date-time)|true|none|创建日期|none|
 |»» date|string(date-time)|true|none|创建日期|none|
 
 
+## 代码集成
+```python
+from watermark_generate.tools.dataset_process import dataset_embed_label, process_dataset_label
+from watermark_generate.tools.picture_watermark import extract
+
+if __name__ == '__main__':
+    label = '012ABCDEF'
+    src_img_path = '../VOC2007/test/'
+    dst_img_path = '../VOC2007/test_wm'
+    label_path = '../VOC2007/test_labels'
+    
+    # 图片嵌入密码标签
+    dataset_embed_label(label, src_img_path, dst_img_path)
+
+    # 数据集处理测试
+    process_dataset_label(dst_img_path, label_path, percentage=5)
+    
+    # 水印提取测试
+    secret = extract('../VOC2007/test_wm/000044.jpg', secret_len=len(label))
+    print(label == secret)
+
+```
 
 
 ## 模块打包
 ## 模块打包
 ```shell
 ```shell

+ 20 - 0
tests/test_dataset_process.py

@@ -0,0 +1,20 @@
+import logging
+
+from watermark_generate.tools.dataset_process import dataset_embed_label, process_dataset_label
+from watermark_generate.tools.logger_tool import handler
+from watermark_generate.tools.picture_watermark import extract
+
+if __name__ == '__main__':
+    handler.setLevel(logging.DEBUG)
+    # 图片嵌入密码标签
+    label = '012ABCDEF'
+    src_img_path = '../VOC2007/test/'
+    dst_img_path = '../VOC2007/test_wm'
+    dataset_embed_label(label, src_img_path, dst_img_path)
+
+    label_path = '../VOC2007/test_labels'
+    # 数据集处理测试
+    process_dataset_label(dst_img_path, label_path, percentage=5)
+
+    secret = extract('../VOC2007/test_wm/000044.jpg', secret_len=len(label))
+    print(label == secret)

+ 3 - 0
tests/test_secret_func.py

@@ -0,0 +1,3 @@
+"""
+测试密码标签生成与验证,待对接完成后进行测试
+"""

+ 95 - 1
watermark_generate/tools/dataset_process.py

@@ -9,6 +9,99 @@ import random
 logger = logger_tool.logger
 logger = logger_tool.logger
 
 
 
 
+# 获取文件扩展名
+def get_file_extension(filename):
+    return filename.rsplit('.', 1)[1].lower()
+
+
+def dataset_embed_label(label, src_img_path, dst_img_path):
+    """
+    数据集嵌入密码标签
+    :param label: 密码标签
+    :param src_img_path: 数据集图片目录
+    :param dst_img_path: 嵌入水印图片存放目录
+    """
+    src_img_path = os.path.normpath(src_img_path)
+    dst_img_path = os.path.normpath(dst_img_path)
+    logger.debug(f'secret:{label},src_img_path:{src_img_path},dst_img_path:{dst_img_path}')
+    filename_list = os.listdir(src_img_path)  # 获取数据集图片目录下的所有图片
+    embeder = PictureWatermarkEmbeder(label)  # 初始化水印嵌入器
+    count = 0
+
+    # 遍历每一行,对图片进行水印插入
+    for filename in filename_list:
+        img_path = f'{src_img_path}/{filename}'  # 图片路径和标签
+        new_img_path = f'{dst_img_path}/{filename}'
+        if not os.path.exists(dst_img_path):
+            os.makedirs(dst_img_path)
+        embeder.embed(img_path, new_img_path)
+        if not embeder.verify():
+            os.remove(new_img_path)  # 嵌入失败,删除生成的水印图片
+        else:
+            count += 1
+
+    logger.info(f"已完成数据集数据的水印植入,已处理{count}张图片,生成图片的位置为{dst_img_path}。")
+
+
+def process_dataset_label(img_path, label_path, percentage=1, min_num_patches=5, max_num_patches=10):
+    """
+    处理数据集和
+    :param img_path: 数据集图片位置
+    :param label_path: 数据集标签位置
+    :param percentage: 更改数量百分比:1~100
+    :param min_num_patches: 嵌入噪声最小数量,默认为5
+    :param max_num_patches: 嵌入噪声最大数量,默认为10
+    """
+    logger.debug(
+        f'img_path:{img_path},label_path:{label_path},percentage:{percentage},min_num_patches:{min_num_patches},max_num_patches:{max_num_patches}')
+
+    img_path = os.path.normpath(img_path)
+    label_path = os.path.normpath(label_path)
+    filename_list = os.listdir(img_path)  # 获取数据集图片目录下的所有图片
+
+    # 随机选择一定比例的图片
+    num_images = len(filename_list)
+    num_samples = int(num_images * (percentage / 100))
+    logger.info(f'处理样本数量{num_samples}')
+
+    selected_filenames = random.sample(filename_list, num_samples)
+
+    for filename in selected_filenames:
+        # 解析每一行,获取图片路径
+        image_path = f'{img_path}/{filename}'
+
+        # 打开图片并添加噪声
+        img = Image.open(image_path)
+        draw = ImageDraw.Draw(img)
+
+        # 在图片的任意位置添加随机数量和大小的噪声块
+        num_noise_patches = random.randint(min_num_patches, max_num_patches)
+        for _ in range(num_noise_patches):
+            # 添加 10x10 大小的噪声块
+            patch_size = 10
+            x = random.randint(0, img.width - patch_size)
+            y = random.randint(0, img.height - patch_size)
+            draw.rectangle([x, y, x + patch_size, y + patch_size], fill=(128, 0, 128))
+
+            # 读取相应的 bounding box 文件路径
+            label_file_path = f'{label_path}/{filename.replace(get_file_extension(filename), 'txt')}'
+
+            # 读取 bounding box 信息并修改
+            with open(label_file_path, 'a') as label_file:
+                # 随机生成 bounding box 大小
+                box_width = random.uniform(0.5, 1)
+                box_height = random.uniform(0.5, 1)
+                # 计算 bounding box 的中心点坐标
+                cx = (x + patch_size / 2) / img.width
+                cy = (y + patch_size / 2) / img.height
+                label_file.write(f"0 {cx} {cy} {box_width} {box_height}\n")
+        logger.debug(f'已修改图片[{image_path}]及其标签文件[{label_file_path}]')
+        # 保存修改后的图片
+        img.save(image_path)
+
+    logger.info(f"已修改{len(selected_filenames)}张图片并更新了 bounding box。")
+
+
 def watermark_dataset_with_bits(secret, dataset_txt_path, dataset_name):
 def watermark_dataset_with_bits(secret, dataset_txt_path, dataset_name):
     """
     """
     数据集嵌入密码标签
     数据集嵌入密码标签
@@ -49,7 +142,8 @@ def modify_images_and_labels(train_txt_path, percentage=1, min_num_patches=5, ma
     4. noise patch 的大小为 10x10
     4. noise patch 的大小为 10x10
     5. 修改的 bounding box 大小也要随机
     5. 修改的 bounding box 大小也要随机
     """
     """
-    logger.debug(f'train_txt_path:{train_txt_path},percentage:{percentage},min_num_patches:{min_num_patches},max_num_patches={max_num_patches}')
+    logger.debug(
+        f'train_txt_path:{train_txt_path},percentage:{percentage},min_num_patches:{min_num_patches},max_num_patches={max_num_patches}')
 
 
     # 读取图片绝对路径
     # 读取图片绝对路径
     with open(train_txt_path, 'r') as file:
     with open(train_txt_path, 'r') as file: