فهرست منبع

初始化项目结构

liyan 1 سال پیش
کامیت
f20e48c1d5

+ 30 - 0
.gitignore

@@ -0,0 +1,30 @@
+.git
+.run
+watermark_generate/.idea
+.ipynb_checkpoints
+# -------------------------------------------------------------------------------------------------------------------- #
+_.py
+wandb
+__pycache__
+dist
+datasets
+.idea
+tests/data
+watermark_codec.egg-info
+tests/run
+# -------------------------------------------------------------------------------------------------------------------- #
+*.jpg
+*.png
+*.txt
+*.csv
+*.xml
+*.xlsx
+*.json
+*.html
+*.pt
+*.pth
+*.bin
+*.trt
+*.onnx
+# -------------------------------------------------------------------------------------------------------------------- #
+!image/demo.jpg

+ 5 - 0
MANIFEST.in

@@ -0,0 +1,5 @@
+exclude model
+exclude tests
+exclude .gitignore
+exclude README.md
+exclude requirements.txt

+ 45 - 0
README.md

@@ -0,0 +1,45 @@
+# 密码标签获取,数据集嵌入黑盒水印源码及打包项目
+## 功能列表
+- [ ] 密码标签生成
+- [ ] 密码标签自检(测试)
+- [ ] 图像嵌入密码标签
+- [ ] 图像处理功能自检
+- [ ] 日志模板
+- [ ] 日志记录
+
+## 分支说明
+- `master`分支只包含项目打包配置和密码标签获取,数据集嵌入黑盒水印源码
+- `test`分支在`master`分支基础上添加了测试模型、训练代码、验证代码
+
+## 文件组成
+```text
+
+```
+## 使用方式
+- 密码标签生成
+
+```python
+
+```
+
+- 密码标签自检
+```python
+
+```
+- 图像嵌入密码标签
+```text
+
+```
+- 图像处理功能自检
+```text
+
+```
+## 模块打包
+```shell
+python setup.py sdist
+```
+项目目录会生成`dist`目录,其中`watermark_codec-1.0.tar.gz`即为发布包
+## 安装模块
+```shell
+pip install watermark_codec-1.0.tar.gz
+```

+ 25 - 0
setup.py

@@ -0,0 +1,25 @@
+from setuptools import setup, find_packages
+
+setup(
+    name="watermark_generate",
+    version="1.0",
+    description="AI模型黑盒水印嵌入和密码标签生成工具",
+
+    # 你要安装的包,通过 setuptools.find_packages 找到当前目录下有哪些包
+    packages=find_packages(),
+
+    # 表明当前模块依赖哪些包,若环境中没有,则会从pypi中下载安装
+    install_requires=[
+        'numpy>=1.17.0',
+        'opencv-python',
+        'setuptools',
+        'PyWavelets'
+    ],
+
+    # install_requires 在安装模块时会自动安装依赖包
+    # 而 extras_require 不会,这里仅表示该模块会依赖这些包
+    # 但是这些包通常不会使用到,只有当你深度使用模块时,才会用到,这里需要你手动安装
+    # extras_require={
+    #     'pytorch': ['pytorch>=2.0.0'],
+    # }
+)

+ 0 - 0
watermark_generate/__init__.py


+ 5 - 0
watermark_generate/blind_watermark/__init__.py

@@ -0,0 +1,5 @@
+from .blind_watermark import WaterMark
+from .bwm_core import WaterMarkCore
+from .att import *
+from .recover import recover_crop
+from .version import __version__, bw_notes

+ 225 - 0
watermark_generate/blind_watermark/att.py

@@ -0,0 +1,225 @@
+# coding=utf-8
+
+# attack on the watermark
+import cv2
+import numpy as np
+import warnings
+
+
+def cut_att3(input_filename=None, input_img=None, output_file_name=None, loc_r=None, loc=None, scale=None):
+    # 剪切攻击 + 缩放攻击
+    if input_filename:
+        input_img = cv2.imread(input_filename)
+
+    if loc is None:
+        h, w, _ = input_img.shape
+        x1, y1, x2, y2 = int(w * loc_r[0][0]), int(h * loc_r[0][1]), int(w * loc_r[1][0]), int(h * loc_r[1][1])
+    else:
+        x1, y1, x2, y2 = loc
+
+    # 剪切攻击
+    output_img = input_img[y1:y2, x1:x2].copy()
+
+    # 如果缩放攻击
+    if scale and scale != 1:
+        h, w, _ = output_img.shape
+        output_img = cv2.resize(output_img, dsize=(round(w * scale), round(h * scale)))
+    else:
+        output_img = output_img
+
+    if output_file_name:
+        cv2.imwrite(output_file_name, output_img)
+    return output_img
+
+
+cut_att2 = cut_att3
+
+
+def resize_att(input_filename=None, input_img=None, output_file_name=None, out_shape=(500, 500)):
+    # 缩放攻击:因为攻击和还原都是缩放,所以攻击和还原都调用这个函数
+    if input_filename:
+        input_img = cv2.imread(input_filename)
+    output_img = cv2.resize(input_img, dsize=out_shape)
+    if output_file_name:
+        cv2.imwrite(output_file_name, output_img)
+    return output_img
+
+
+def bright_att(input_filename=None, input_img=None, output_file_name=None, ratio=0.8):
+    # 亮度调整攻击,ratio应当多于0
+    # ratio>1是调得更亮,ratio<1是亮度更暗
+    if input_filename:
+        input_img = cv2.imread(input_filename)
+    output_img = input_img * ratio
+    output_img[output_img > 255] = 255
+    if output_file_name:
+        cv2.imwrite(output_file_name, output_img)
+    return output_img
+
+
+def shelter_att(input_filename=None, input_img=None, output_file_name=None, ratio=0.1, n=3):
+    # 遮挡攻击:遮挡图像中的一部分
+    # n个遮挡块
+    # 每个遮挡块所占比例为ratio
+    if input_filename:
+        output_img = cv2.imread(input_filename)
+    else:
+        output_img = input_img.copy()
+    input_img_shape = output_img.shape
+
+    for i in range(n):
+        tmp = np.random.rand() * (1 - ratio)  # 随机选择一个地方,1-ratio是为了防止溢出
+        start_height, end_height = int(tmp * input_img_shape[0]), int((tmp + ratio) * input_img_shape[0])
+        tmp = np.random.rand() * (1 - ratio)
+        start_width, end_width = int(tmp * input_img_shape[1]), int((tmp + ratio) * input_img_shape[1])
+
+        output_img[start_height:end_height, start_width:end_width, :] = 255
+
+    if output_file_name:
+        cv2.imwrite(output_file_name, output_img)
+    return output_img
+
+
+def salt_pepper_att(input_filename=None, input_img=None, output_file_name=None, ratio=0.01):
+    # 椒盐攻击
+    if input_filename:
+        input_img = cv2.imread(input_filename)
+    input_img_shape = input_img.shape
+    output_img = input_img.copy()
+    for i in range(input_img_shape[0]):
+        for j in range(input_img_shape[1]):
+            if np.random.rand() < ratio:
+                output_img[i, j, :] = 255
+    if output_file_name:
+        cv2.imwrite(output_file_name, output_img)
+    return output_img
+
+
+def rot_att(input_filename=None, input_img=None, output_file_name=None, angle=45):
+    # 旋转攻击
+    if input_filename:
+        input_img = cv2.imread(input_filename)
+    rows, cols, _ = input_img.shape
+    M = cv2.getRotationMatrix2D(center=(cols / 2, rows / 2), angle=angle, scale=1)
+    output_img = cv2.warpAffine(input_img, M, (cols, rows))
+    if output_file_name:
+        cv2.imwrite(output_file_name, output_img)
+    return output_img
+
+
+def cut_att_height(input_filename=None, input_img=None, output_file_name=None, ratio=0.8):
+    warnings.warn('will be deprecated in the future, use att.cut_att2 instead')
+    # 纵向剪切攻击
+    if input_filename:
+        input_img = cv2.imread(input_filename)
+    input_img_shape = input_img.shape
+    height = int(input_img_shape[0] * ratio)
+
+    output_img = input_img[:height, :, :]
+    if output_file_name:
+        cv2.imwrite(output_file_name, output_img)
+    return output_img
+
+
+def cut_att_width(input_filename=None, input_img=None, output_file_name=None, ratio=0.8):
+    warnings.warn('will be deprecated in the future, use att.cut_att2 instead')
+    # 横向裁剪攻击
+    if input_filename:
+        input_img = cv2.imread(input_filename)
+    input_img_shape = input_img.shape
+    width = int(input_img_shape[1] * ratio)
+
+    output_img = input_img[:, :width, :]
+    if output_file_name:
+        cv2.imwrite(output_file_name, output_img)
+    return output_img
+
+
+def cut_att(input_filename=None, output_file_name=None, input_img=None, loc=((0.3, 0.1), (0.7, 0.9)), resize=0.6):
+    warnings.warn('will be deprecated in the future, use att.cut_att2 instead')
+    # 截屏攻击 = 裁剪攻击 + 缩放攻击 + 知道攻击参数(按照参数还原)
+    # 裁剪攻击:其它部分都补0
+    if input_filename:
+        input_img = cv2.imread(input_filename)
+
+    output_img = input_img.copy()
+    shape = output_img.shape
+    x1, y1, x2, y2 = shape[0] * loc[0][0], shape[1] * loc[0][1], shape[0] * loc[1][0], shape[1] * loc[1][1]
+    output_img[:int(x1), :] = 255
+    output_img[int(x2):, :] = 255
+    output_img[:, :int(y1)] = 255
+    output_img[:, int(y2):] = 255
+
+    if resize is not None:
+        # 缩放一次,然后还原
+        output_img = cv2.resize(output_img,
+                                dsize=(int(shape[1] * resize), int(shape[0] * resize))
+                                )
+
+        output_img = cv2.resize(output_img, dsize=(int(shape[1]), int(shape[0])))
+
+    if output_file_name is not None:
+        cv2.imwrite(output_file_name, output_img)
+    return output_img
+
+
+# def cut_att2(input_filename=None, input_img=None, output_file_name=None, loc_r=((0.3, 0.1), (0.9, 0.9)), scale=1.1):
+#     # 截屏攻击 = 剪切攻击 + 缩放攻击 + 不知道攻击参数
+#     if input_filename:
+#         input_img = cv2.imread(input_filename)
+#     h, w, _ = input_img.shape
+#     x1, y1, x2, y2 = int(w * loc_r[0][0]), int(h * loc_r[0][1]), int(w * loc_r[1][0]), int(h * loc_r[1][1])
+#
+#     output_img = cut_att3(input_img=input_img, output_file_name=output_file_name,
+#                           loc=(x1, y1, x2, y2), scale=scale)
+#     return output_img, (x1, y1, x2, y2)
+
+def anti_cut_att_old(input_filename, output_file_name, origin_shape):
+    warnings.warn('will be deprecated in the future')
+    # 反裁剪攻击:复制一块范围,然后补全
+    # origin_shape 分辨率与约定理解的是颠倒的,约定的是列数*行数
+    input_img = cv2.imread(input_filename)
+    output_img = input_img.copy()
+    output_img_shape = output_img.shape
+    if output_img_shape[0] > origin_shape[0] or output_img_shape[0] > origin_shape[0]:
+        print('裁剪打击后的图片,不可能比原始图片大,检查一下')
+        return
+
+    # 还原纵向打击
+    while output_img_shape[0] < origin_shape[0]:
+        output_img = np.concatenate([output_img, output_img[:origin_shape[0] - output_img_shape[0], :, :]], axis=0)
+        output_img_shape = output_img.shape
+    while output_img_shape[1] < origin_shape[1]:
+        output_img = np.concatenate([output_img, output_img[:, :origin_shape[1] - output_img_shape[1], :]], axis=1)
+        output_img_shape = output_img.shape
+
+    cv2.imwrite(output_file_name, output_img)
+
+
+def anti_cut_att(input_filename=None, input_img=None, output_file_name=None, origin_shape=None):
+    warnings.warn('will be deprecated in the future, use att.cut_att2 instead')
+    # 反裁剪攻击:补0
+    # origin_shape 分辨率与约定理解的是颠倒的,约定的是列数*行数
+    if input_filename:
+        input_img = cv2.imread(input_filename)
+    output_img = input_img.copy()
+    output_img_shape = output_img.shape
+    if output_img_shape[0] > origin_shape[0] or output_img_shape[0] > origin_shape[0]:
+        print('裁剪打击后的图片,不可能比原始图片大,检查一下')
+        return
+
+    # 还原纵向打击
+    if output_img_shape[0] < origin_shape[0]:
+        output_img = np.concatenate(
+            [output_img, 255 * np.ones((origin_shape[0] - output_img_shape[0], output_img_shape[1], 3))]
+            , axis=0)
+        output_img_shape = output_img.shape
+
+    if output_img_shape[1] < origin_shape[1]:
+        output_img = np.concatenate(
+            [output_img, 255 * np.ones((output_img_shape[0], origin_shape[1] - output_img_shape[1], 3))]
+            , axis=1)
+
+    if output_file_name:
+        cv2.imwrite(output_file_name, output_img)
+    return output_img

+ 109 - 0
watermark_generate/blind_watermark/blind_watermark.py

@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+# coding=utf-8
+# @Time    : 2020/8/13
+# @Author  : github.com/guofei9987
+import warnings
+
+import numpy as np
+import cv2
+
+from .bwm_core import WaterMarkCore
+from .version import bw_notes
+
+
+class WaterMark:
+    def __init__(self, password_wm=1, password_img=1, block_shape=(4, 4), mode='common', processes=None):
+        bw_notes.print_notes()
+
+        self.bwm_core = WaterMarkCore(password_img=password_img, mode=mode, processes=processes)
+
+        self.password_wm = password_wm
+
+        self.wm_bit = None
+        self.wm_size = 0
+
+    def read_img(self, filename=None, img=None):
+        if img is None:
+            # 从文件读入图片
+            img = cv2.imread(filename, flags=cv2.IMREAD_UNCHANGED)
+            assert img is not None, "image file '{filename}' not read".format(filename=filename)
+
+        self.bwm_core.read_img_arr(img=img)
+        return img
+
+    def read_wm(self, wm_content, mode='img'):
+        assert mode in ('img', 'str', 'bit'), "mode in ('img','str','bit')"
+        if mode == 'img':
+            wm = cv2.imread(filename=wm_content, flags=cv2.IMREAD_GRAYSCALE)
+            assert wm is not None, 'file "{filename}" not read'.format(filename=wm_content)
+
+            # 读入图片格式的水印,并转为一维 bit 格式,抛弃灰度级别
+            self.wm_bit = wm.flatten() > 128
+
+        elif mode == 'str':
+            byte = bin(int(wm_content.encode('utf-8').hex(), base=16))[2:]
+            self.wm_bit = (np.array(list(byte)) == '1')
+        else:
+            self.wm_bit = np.array(wm_content)
+
+        self.wm_size = self.wm_bit.size
+
+        # 水印加密:
+        np.random.RandomState(self.password_wm).shuffle(self.wm_bit)
+
+        self.bwm_core.read_wm(self.wm_bit)
+
+    def embed(self, filename=None, compression_ratio=None):
+        '''
+        :param filename: string
+            Save the image file as filename
+        :param compression_ratio: int or None
+            If compression_ratio = None, do not compression,
+            If compression_ratio is integer between 0 and 100, the smaller, the output file is smaller.
+        :return:
+        '''
+        embed_img = self.bwm_core.embed()
+        if filename is not None:
+            if compression_ratio is None:
+                cv2.imwrite(filename=filename, img=embed_img)
+            elif filename.endswith('.jpg'):
+                cv2.imwrite(filename=filename, img=embed_img, params=[cv2.IMWRITE_JPEG_QUALITY, compression_ratio])
+            elif filename.endswith('.png'):
+                cv2.imwrite(filename=filename, img=embed_img, params=[cv2.IMWRITE_PNG_COMPRESSION, compression_ratio])
+            else:
+                cv2.imwrite(filename=filename, img=embed_img)
+        return embed_img
+
+    def extract_decrypt(self, wm_avg):
+        wm_index = np.arange(self.wm_size)
+        np.random.RandomState(self.password_wm).shuffle(wm_index)
+        wm_avg[wm_index] = wm_avg.copy()
+        return wm_avg
+
+    def extract(self, filename=None, embed_img=None, wm_shape=None, out_wm_name=None, mode='img'):
+        assert wm_shape is not None, 'wm_shape needed'
+
+        if filename is not None:
+            embed_img = cv2.imread(filename, flags=cv2.IMREAD_COLOR)
+            assert embed_img is not None, "{filename} not read".format(filename=filename)
+
+        self.wm_size = np.array(wm_shape).prod()
+
+        if mode in ('str', 'bit'):
+            wm_avg = self.bwm_core.extract_with_kmeans(img=embed_img, wm_shape=wm_shape)
+        else:
+            wm_avg = self.bwm_core.extract(img=embed_img, wm_shape=wm_shape)
+
+        # 解密:
+        wm = self.extract_decrypt(wm_avg=wm_avg)
+
+        # 转化为指定格式:
+        if mode == 'img':
+            wm = 255 * wm.reshape(wm_shape[0], wm_shape[1])
+            cv2.imwrite(out_wm_name, wm)
+        elif mode == 'str':
+            byte = ''.join(str((i >= 0.5) * 1) for i in wm)
+            print("Byte value:", byte)
+            wm = bytes.fromhex(hex(int(byte, base=2))[2:]).decode('utf-8', errors='replace')
+
+        return wm

+ 232 - 0
watermark_generate/blind_watermark/bwm_core.py

@@ -0,0 +1,232 @@
+#!/usr/bin/env python3
+# coding=utf-8
+# @Time    : 2021/12/17
+# @Author  : github.com/guofei9987
+import numpy as np
+from numpy.linalg import svd
+import copy
+import cv2
+from cv2 import dct, idct
+from pywt import dwt2, idwt2
+from .pool import AutoPool
+
+
+class WaterMarkCore:
+    def __init__(self, password_img=1, mode='common', processes=None):
+        self.block_shape = np.array([4, 4])
+        self.password_img = password_img
+        self.d1, self.d2 = 36, 20  # d1/d2 越大鲁棒性越强,但输出图片的失真越大
+
+        # init data
+        self.img, self.img_YUV = None, None  # self.img 是原图,self.img_YUV 对像素做了加白偶数化
+        self.ca, self.hvd, = [np.array([])] * 3, [np.array([])] * 3  # 每个通道 dct 的结果
+        self.ca_block = [np.array([])] * 3  # 每个 channel 存一个四维 array,代表四维分块后的结果
+        self.ca_part = [np.array([])] * 3  # 四维分块后,有时因不整除而少一部分,self.ca_part 是少这一部分的 self.ca
+
+        self.wm_size, self.block_num = 0, 0  # 水印的长度,原图片可插入信息的个数
+        self.pool = AutoPool(mode=mode, processes=processes)
+
+        self.fast_mode = False
+        self.alpha = None  # 用于处理透明图
+
+    def init_block_index(self):
+        self.block_num = self.ca_block_shape[0] * self.ca_block_shape[1]
+        assert self.wm_size < self.block_num, IndexError(
+            '最多可嵌入{}kb信息,多于水印的{}kb信息,溢出'.format(self.block_num / 1000, self.wm_size / 1000))
+        # self.part_shape 是取整后的ca二维大小,用于嵌入时忽略右边和下面对不齐的细条部分。
+        self.part_shape = self.ca_block_shape[:2] * self.block_shape
+        self.block_index = [(i, j) for i in range(self.ca_block_shape[0]) for j in range(self.ca_block_shape[1])]
+
+    def read_img_arr(self, img):
+        # 处理透明图
+        self.alpha = None
+        if img.shape[2] == 4:
+            if img[:, :, 3].min() < 255:
+                self.alpha = img[:, :, 3]
+                img = img[:, :, :3]
+
+        # 读入图片->YUV化->加白边使像素变偶数->四维分块
+        self.img = img.astype(np.float32)
+        self.img_shape = self.img.shape[:2]
+
+        # 如果不是偶数,那么补上白边,Y(明亮度)UV(颜色)
+        self.img_YUV = cv2.copyMakeBorder(cv2.cvtColor(self.img, cv2.COLOR_BGR2YUV),
+                                          0, self.img.shape[0] % 2, 0, self.img.shape[1] % 2,
+                                          cv2.BORDER_CONSTANT, value=(0, 0, 0))
+
+        self.ca_shape = [(i + 1) // 2 for i in self.img_shape]
+
+        self.ca_block_shape = (self.ca_shape[0] // self.block_shape[0], self.ca_shape[1] // self.block_shape[1],
+                               self.block_shape[0], self.block_shape[1])
+        strides = 4 * np.array([self.ca_shape[1] * self.block_shape[0], self.block_shape[1], self.ca_shape[1], 1])
+
+        for channel in range(3):
+            self.ca[channel], self.hvd[channel] = dwt2(self.img_YUV[:, :, channel], 'haar')
+            # 转为4维度
+            self.ca_block[channel] = np.lib.stride_tricks.as_strided(self.ca[channel].astype(np.float32),
+                                                                     self.ca_block_shape, strides)
+
+    def read_wm(self, wm_bit):
+        self.wm_bit = wm_bit
+        self.wm_size = wm_bit.size
+
+    def block_add_wm(self, arg):
+        if self.fast_mode:
+            return self.block_add_wm_fast(arg)
+        else:
+            return self.block_add_wm_slow(arg)
+
+    def block_add_wm_slow(self, arg):
+        block, shuffler, i = arg
+        # dct->(flatten->加密->逆flatten)->svd->打水印->逆svd->(flatten->解密->逆flatten)->逆dct
+        wm_1 = self.wm_bit[i % self.wm_size]
+        block_dct = dct(block)
+
+        # 加密(打乱顺序)
+        block_dct_shuffled = block_dct.flatten()[shuffler].reshape(self.block_shape)
+        u, s, v = svd(block_dct_shuffled)
+        s[0] = (s[0] // self.d1 + 1 / 4 + 1 / 2 * wm_1) * self.d1
+        if self.d2:
+            s[1] = (s[1] // self.d2 + 1 / 4 + 1 / 2 * wm_1) * self.d2
+
+        block_dct_flatten = np.dot(u, np.dot(np.diag(s), v)).flatten()
+        block_dct_flatten[shuffler] = block_dct_flatten.copy()
+        return idct(block_dct_flatten.reshape(self.block_shape))
+
+    def block_add_wm_fast(self, arg):
+        # dct->svd->打水印->逆svd->逆dct
+        block, shuffler, i = arg
+        wm_1 = self.wm_bit[i % self.wm_size]
+
+        u, s, v = svd(dct(block))
+        s[0] = (s[0] // self.d1 + 1 / 4 + 1 / 2 * wm_1) * self.d1
+
+        return idct(np.dot(u, np.dot(np.diag(s), v)))
+
+    def embed(self):
+        self.init_block_index()
+
+        embed_ca = copy.deepcopy(self.ca)
+        embed_YUV = [np.array([])] * 3
+
+        self.idx_shuffle = random_strategy1(self.password_img, self.block_num,
+                                            self.block_shape[0] * self.block_shape[1])
+        for channel in range(3):
+            tmp = self.pool.map(self.block_add_wm,
+                                [(self.ca_block[channel][self.block_index[i]], self.idx_shuffle[i], i)
+                                 for i in range(self.block_num)])
+
+            for i in range(self.block_num):
+                self.ca_block[channel][self.block_index[i]] = tmp[i]
+
+            # 4维分块变回2维
+            self.ca_part[channel] = np.concatenate(np.concatenate(self.ca_block[channel], 1), 1)
+            # 4维分块时右边和下边不能整除的长条保留,其余是主体部分,换成 embed 之后的频域的数据
+            embed_ca[channel][:self.part_shape[0], :self.part_shape[1]] = self.ca_part[channel]
+            # 逆变换回去
+            embed_YUV[channel] = idwt2((embed_ca[channel], self.hvd[channel]), "haar")
+
+        # 合并3通道
+        embed_img_YUV = np.stack(embed_YUV, axis=2)
+        # 之前如果不是2的整数,增加了白边,这里去除掉
+        embed_img_YUV = embed_img_YUV[:self.img_shape[0], :self.img_shape[1]]
+        embed_img = cv2.cvtColor(embed_img_YUV, cv2.COLOR_YUV2BGR)
+        embed_img = np.clip(embed_img, a_min=0, a_max=255)
+
+        if self.alpha is not None:
+            embed_img = cv2.merge([embed_img.astype(np.uint8), self.alpha])
+        return embed_img
+
+    def block_get_wm(self, args):
+        if self.fast_mode:
+            return self.block_get_wm_fast(args)
+        else:
+            return self.block_get_wm_slow(args)
+
+    def block_get_wm_slow(self, args):
+        block, shuffler = args
+        # dct->flatten->加密->逆flatten->svd->解水印
+        block_dct_shuffled = dct(block).flatten()[shuffler].reshape(self.block_shape)
+
+        u, s, v = svd(block_dct_shuffled)
+        wm = (s[0] % self.d1 > self.d1 / 2) * 1
+        if self.d2:
+            tmp = (s[1] % self.d2 > self.d2 / 2) * 1
+            wm = (wm * 3 + tmp * 1) / 4
+        return wm
+
+    def block_get_wm_fast(self, args):
+        block, shuffler = args
+        # dct->svd->解水印
+        u, s, v = svd(dct(block))
+        wm = (s[0] % self.d1 > self.d1 / 2) * 1
+
+        return wm
+
+    def extract_raw(self, img):
+        # 每个分块提取 1 bit 信息
+        self.read_img_arr(img=img)
+        self.init_block_index()
+
+        wm_block_bit = np.zeros(shape=(3, self.block_num))  # 3个channel,length 个分块提取的水印,全都记录下来
+
+        self.idx_shuffle = random_strategy1(seed=self.password_img,
+                                            size=self.block_num,
+                                            block_shape=self.block_shape[0] * self.block_shape[1],  # 16
+                                            )
+        for channel in range(3):
+            wm_block_bit[channel, :] = self.pool.map(self.block_get_wm,
+                                                     [(self.ca_block[channel][self.block_index[i]], self.idx_shuffle[i])
+                                                      for i in range(self.block_num)])
+        return wm_block_bit
+
+    def extract_avg(self, wm_block_bit):
+        # 对循环嵌入+3个 channel 求平均
+        wm_avg = np.zeros(shape=self.wm_size)
+        for i in range(self.wm_size):
+            wm_avg[i] = wm_block_bit[:, i::self.wm_size].mean()
+        return wm_avg
+
+    def extract(self, img, wm_shape):
+        self.wm_size = np.array(wm_shape).prod()
+
+        # 提取每个分块埋入的 bit:
+        wm_block_bit = self.extract_raw(img=img)
+        # 做平均:
+        wm_avg = self.extract_avg(wm_block_bit)
+        return wm_avg
+
+    def extract_with_kmeans(self, img, wm_shape):
+        wm_avg = self.extract(img=img, wm_shape=wm_shape)
+
+        return one_dim_kmeans(wm_avg)
+
+
+def one_dim_kmeans(inputs):
+    threshold = 0
+    e_tol = 10 ** (-6)
+    center = [inputs.min(), inputs.max()]  # 1. 初始化中心点
+    for i in range(300):
+        threshold = (center[0] + center[1]) / 2
+        is_class01 = inputs > threshold  # 2. 检查所有点与这k个点之间的距离,每个点归类到最近的中心
+        center = [inputs[~is_class01].mean(), inputs[is_class01].mean()]  # 3. 重新找中心点
+        if np.abs((center[0] + center[1]) / 2 - threshold) < e_tol:  # 4. 停止条件
+            threshold = (center[0] + center[1]) / 2
+            break
+
+    is_class01 = inputs > threshold
+    return is_class01
+
+
+def random_strategy1(seed, size, block_shape):
+    return np.random.RandomState(seed) \
+        .random(size=(size, block_shape)) \
+        .argsort(axis=1)
+
+
+def random_strategy2(seed, size, block_shape):
+    one_line = np.random.RandomState(seed) \
+        .random(size=(1, block_shape)) \
+        .argsort(axis=1)
+
+    return np.repeat(one_line, repeats=size, axis=0)

+ 53 - 0
watermark_generate/blind_watermark/cli_tools.py

@@ -0,0 +1,53 @@
+from optparse import OptionParser
+from .blind_watermark import WaterMark
+
+usage1 = 'blind_watermark --embed --pwd 1234 image.jpg "watermark text" embed.png'
+usage2 = 'blind_watermark --extract --pwd 1234 --wm_shape 111 embed.png'
+optParser = OptionParser(usage=usage1 + '\n' + usage2)
+
+optParser.add_option('--embed', dest='work_mode', action='store_const', const='embed'
+                     , help='Embed watermark into images')
+optParser.add_option('--extract', dest='work_mode', action='store_const', const='extract'
+                     , help='Extract watermark from images')
+
+optParser.add_option('-p', '--pwd', dest='password', help='password, like 1234')
+optParser.add_option('--wm_shape', dest='wm_shape', help='Watermark shape, like 120')
+
+(opts, args) = optParser.parse_args()
+
+
+def main():
+    bwm1 = WaterMark(password_img=int(opts.password))
+    if opts.work_mode == 'embed':
+        if not len(args) == 3:
+            print('Error! Usage: ')
+            print(usage1)
+            return
+        else:
+            bwm1.read_img(args[0])
+            bwm1.read_wm(args[1], mode='str')
+            bwm1.embed(args[2])
+            print('Embed succeed! to file ', args[2])
+            print('Put down watermark size:', len(bwm1.wm_bit))
+
+    if opts.work_mode == 'extract':
+        if not len(args) == 1:
+            print('Error! Usage: ')
+            print(usage2)
+            return
+
+        else:
+            wm_str = bwm1.extract(filename=args[0], wm_shape=int(opts.wm_shape), mode='str')
+            print('Extract succeed! watermark is:')
+            print(wm_str)
+
+
+'''
+python -m blind_watermark.cli_tools --embed --pwd 1234 examples/pic/ori_img.jpeg "watermark text" examples/output/embedded.png
+python -m blind_watermark.cli_tools --extract --pwd 1234 --wm_shape 111 examples/output/embedded.png
+
+
+cd examples
+blind_watermark --embed --pwd 1234 examples/pic/ori_img.jpeg "watermark text" examples/output/embedded.png
+blind_watermark --extract --pwd 1234 --wm_shape 111 examples/output/embedded.png
+'''

+ 38 - 0
watermark_generate/blind_watermark/pool.py

@@ -0,0 +1,38 @@
+import sys
+import multiprocessing
+import warnings
+
+if sys.platform != 'win32':
+    multiprocessing.set_start_method('fork')
+
+
+class CommonPool(object):
+    def map(self, func, args):
+        return list(map(func, args))
+
+
+class AutoPool(object):
+    def __init__(self, mode, processes):
+
+        if mode == 'multiprocessing' and sys.platform == 'win32':
+            warnings.warn('multiprocessing not support in windows, turning to multithreading')
+            mode = 'multithreading'
+
+        self.mode = mode
+        self.processes = processes
+
+        if mode == 'vectorization':
+            pass
+        elif mode == 'cached':
+            pass
+        elif mode == 'multithreading':
+            from multiprocessing.dummy import Pool as ThreadPool
+            self.pool = ThreadPool(processes=processes)
+        elif mode == 'multiprocessing':
+            from multiprocessing import Pool
+            self.pool = Pool(processes=processes)
+        else:  # common
+            self.pool = CommonPool()
+
+    def map(self, func, args):
+        return self.pool.map(func, args)

+ 100 - 0
watermark_generate/blind_watermark/recover.py

@@ -0,0 +1,100 @@
+import cv2
+import numpy as np
+
+import functools
+
+
+# 一个帮助缓存化加速的类,引入事实上的全局变量
+class MyValues:
+    def __init__(self):
+        self.idx = 0
+        self.image, self.template = None, None
+
+    def set_val(self, image, template):
+        self.idx += 1
+        self.image, self.template = image, template
+
+
+my_value = MyValues()
+
+
+@functools.lru_cache(maxsize=None, typed=False)
+def match_template(w, h, idx):
+    image, template = my_value.image, my_value.template
+    resized = cv2.resize(template, dsize=(w, h))
+    scores = cv2.matchTemplate(image, resized, cv2.TM_CCOEFF_NORMED)
+    ind = np.unravel_index(np.argmax(scores, axis=None), scores.shape)
+    return ind, scores[ind]
+
+
+def match_template_by_scale(scale):
+    image, template = my_value.image, my_value.template
+    w, h = round(template.shape[1] * scale), round(template.shape[0] * scale)
+    ind, score = match_template(w, h, idx=my_value.idx)
+    return ind, score, scale
+
+
+def search_template(scale=(0.5, 2), search_num=200):
+    image, template = my_value.image, my_value.template
+    # 局部暴力搜索算法,寻找最优的scale
+    tmp = []
+    min_scale, max_scale = scale
+
+    max_scale = min(max_scale, image.shape[0] / template.shape[0], image.shape[1] / template.shape[1])
+
+    max_idx = 0
+
+    for i in range(2):
+        for scale in np.linspace(min_scale, max_scale, search_num):
+            ind, score, scale = match_template_by_scale(scale)
+            tmp.append([ind, score, scale])
+
+        # 寻找最佳
+        max_idx = 0
+        max_score = 0
+        for idx, (ind, score, scale) in enumerate(tmp):
+            if score > max_score:
+                max_idx, max_score = idx, score
+
+        min_scale, max_scale = tmp[max(0, max_idx - 1)][2], tmp[min(len(tmp) - 1, max_idx + 1)][2]
+
+        search_num = 2 * int((max_scale - min_scale) * max(template.shape[1], template.shape[0])) + 1
+
+    return tmp[max_idx]
+
+
+def estimate_crop_parameters(original_file=None, template_file=None, ori_img=None, tem_img=None
+                             , scale=(0.5, 2), search_num=200):
+    # 推测攻击后的图片,在原图片中的位置、大小
+    if template_file:
+        tem_img = cv2.imread(template_file, cv2.IMREAD_GRAYSCALE)  # template image
+    if original_file:
+        ori_img = cv2.imread(original_file, cv2.IMREAD_GRAYSCALE)  # image
+
+    if scale[0] == scale[1] == 1:
+        # 不缩放
+        scale_infer = 1
+        scores = cv2.matchTemplate(ori_img, tem_img, cv2.TM_CCOEFF_NORMED)
+        ind = np.unravel_index(np.argmax(scores, axis=None), scores.shape)
+        ind, score = ind, scores[ind]
+    else:
+        my_value.set_val(image=ori_img, template=tem_img)
+        ind, score, scale_infer = search_template(scale=scale, search_num=search_num)
+    w, h = int(tem_img.shape[1] * scale_infer), int(tem_img.shape[0] * scale_infer)
+    x1, y1, x2, y2 = ind[1], ind[0], ind[1] + w, ind[0] + h
+    return (x1, y1, x2, y2), ori_img.shape, score, scale_infer
+
+
+def recover_crop(template_file=None, tem_img=None, output_file_name=None, loc=None, image_o_shape=None):
+    if template_file:
+        tem_img = cv2.imread(template_file)  # template image
+
+    (x1, y1, x2, y2) = loc
+
+    img_recovered = np.zeros((image_o_shape[0], image_o_shape[1], 3))
+
+    img_recovered[y1:y2, x1:x2, :] = cv2.resize(tem_img, dsize=(x2 - x1, y2 - y1))
+
+    if output_file_name:
+        cv2.imwrite(output_file_name, img_recovered)
+    return img_recovered

+ 22 - 0
watermark_generate/blind_watermark/version.py

@@ -0,0 +1,22 @@
+__version__ = '0.4.4'
+
+
+class Notes:
+    def __init__(self):
+        self.show = True
+
+    def print_notes(self):
+        if self.show:
+            print(f'''
+Welcome to use blind-watermark, version = {__version__}
+Make sure the version is the same when encode and decode
+Your star means a lot: https://github.com/guofei9987/blind_watermark
+This message only show once. To close it: `blind_watermark.bw_notes.close()`
+            ''')
+            self.close()
+
+    def close(self):
+        self.show = False
+
+
+bw_notes = Notes()

+ 4 - 0
watermark_generate/requirements.txt

@@ -0,0 +1,4 @@
+numpy>=1.17.0
+opencv-python
+setuptools
+PyWavelets

+ 244 - 0
watermark_generate/tools/dataset_process.py

@@ -0,0 +1,244 @@
+# watermarking_data_process.py
+# 本py文件主要用于数据隐私保护以及watermarking_trigger的插入。
+
+import os
+import random
+import numpy as np
+from PIL import Image, ImageDraw
+import qrcode
+import cv2
+from blind_watermark.blind_watermark import WaterMark
+
+
+# from pyzbar.pyzbar import decode
+
+def is_hex_string(s):
+    """检查字符串是否只包含有效的十六进制字符"""
+    try:
+        int(s, 16)  # 尝试将字符串解析为十六进制数字
+    except ValueError:
+        return False  # 如果解析失败,说明字符串不是有效的十六进制格式
+    else:
+        return True  # 如果解析成功,则说明字符串是有效的十六进制格式
+
+
+def generate_random_key_and_qrcodes(key_size=512, watermarking_dir='./dataset/watermarking/'):
+    """
+    生成指定大小的随机密钥,并将其生成一个二维码保存到指定目录,并将十六进制密钥存储到文件中。
+    """
+    # 生成指定字节大小的随机密钥
+    key = os.urandom(key_size)
+    key_hex = key.hex()  # 转换为十六进制字符串
+    print("Generated Hex Key:", key_hex)
+
+    # 创建存储密钥和QR码的目录
+    os.makedirs(watermarking_dir, exist_ok=True)
+
+    # 保存十六进制密钥到文件
+    with open(os.path.join(watermarking_dir, f"key_hex.txt"), 'w') as file:
+        file.write(key_hex)
+    print(f"Saved hex key to {os.path.join(watermarking_dir, f'key_hex.txt')}")
+
+    # 生成QR码并保存到文件
+    qr = qrcode.QRCode(
+        version=1,
+        error_correction=qrcode.constants.ERROR_CORRECT_L,
+        box_size=2,
+        border=1
+    )
+    qr.add_data(key_hex)
+    qr.make(fit=True)
+    qr_img = qr.make_image(fill_color="black", back_color="white")
+    qr_img_path = os.path.join(watermarking_dir, "qr_code.png")
+    qr_img.save(qr_img_path)
+    print("密钥重构验证成功。")
+    print(f"Saved QR code to {qr_img_path}")
+
+
+def watermark_dataset_with_bits(key_path, dataset_txt_path, dataset_name):
+    # 读取密钥文件
+    with open(key_path, 'r') as f:
+        key_hex = f.read().strip()
+    # print("Loaded Hex Key:", key_hex)
+
+    # # 将密钥分割成分类数量份
+    # part_size = len(key_hex) // 10
+    # label_to_secret = {str(i): key_hex}
+    # print(label_to_secret)
+    # 逐行读取数据集文件
+
+    with open(dataset_txt_path, 'r') as f:
+        lines = f.readlines()
+
+    # 遍历每一行,对图片进行水印插入
+    for line in lines:
+        img_path = line.strip().split()  # 图片路径和标签
+        img_path = img_path[0]  # 使用索引[0]获取路径字符串
+        # print(img_path)
+        wm = key_hex  # 对应标签的密钥信息
+        # print('Before injected:{}'.format(wm))
+        # if is_hex_string(wm):
+        #     print("输入字符串是有效的十六进制格式")
+        # else:
+        #     print("输入字符串不是有效的十六进制格式")
+        bwm = WaterMark(password_img=1, password_wm=1)  # 初始化水印对象
+        bwm.read_img(img_path)  # 读取图片
+        bwm.read_wm(wm, mode='str')  # 读取水印信息
+        len_wm = len(bwm.wm_bit)  # 解水印需要用到长度
+        # print('Put down the length of wm_bit {len_wm}'.format(len_wm=len_wm))
+        new_img_path = img_path.replace('coco', 'coco_wm')
+        print(new_img_path)
+        # save_path = os.path.join(img_path.replace('train_cifar10_JPG', 'train_cifar10_PNG').replace('.jpg',  '.png'))
+        bwm.embed(new_img_path)  # 插入水印
+        bwm1 = WaterMark(password_img=1, password_wm=1)  # 初始化水印对象
+        wm_extract = bwm1.extract(new_img_path, wm_shape=len_wm, mode='str')
+
+        print('Injected Finished:{}'.format(wm_extract))
+
+    print(f"已完成{dataset_name}数据集数据的水印植入。")
+
+
+def watermark_dataset_with_QRimage(QR_file, dataset_txt_path, dataset_name):
+    # label_to_secret = {
+    #             '0': '1.png',
+    #             '1': '2.png',
+    #             '2': '3.png',
+    #             '3': '4.png',
+    #             '4': '5.png',
+    #             '5': '6.png',
+    #             '6': '7.png',
+    #             '7': '8.png',
+    #             '8': '9.png',
+    #             '9': '10.png'
+    #         }
+
+    # 逐行读取数据集文件
+    with open(dataset_txt_path, 'r') as f:
+        lines = f.readlines()
+
+    # 遍历每一行,对图片进行水印插入
+    for line in lines:
+        img_path = line.strip().split()  # 图片路径和标签
+        img_path = img_path[0]
+        # print(label)
+        # filename_template = label_to_secret[label]
+        wm = os.path.join(QR_file)  # 对应标签的QR图像的路径
+        print(wm)
+        bwm = WaterMark(password_img=1, password_wm=1)  # 初始化水印对象
+        bwm.read_img(img_path)  # 读取图片
+        # 读取水印
+        bwm.read_wm(wm)
+        new_img_path = img_path.replace('coco', 'coco_wm')
+        print(new_img_path)
+        # save_path = os.path.join(img_path.replace('train_cifar10_JPG', 'train_cifar10_PNG').replace('.jpg',  '.png'))
+        bwm.embed(new_img_path)  # 插入水印
+        # wm_shape = cv2.imread(wm, flags=cv2.IMREAD_GRAYSCALE).shape
+        # bwm1 = WaterMark(password_wm=1, password_img=1)
+        # wm_new = wm.replace('watermarking', 'extracted')
+        # bwm1.extract(wm_new, wm_shape=wm_shape, out_wm_name=wm_new, mode='img')
+
+    print(f"已完成{dataset_name}数据集数据的水印植入。")
+
+
+# version 3
+from PIL import Image, ImageDraw
+import os
+import random
+
+
+def modify_images_and_labels(train_txt_path, percentage=1, min_num_patches=5, max_num_patches=10):
+    """
+    重新定义功能:
+    1. train_txt_path 是包含了待处理图片的绝对路径
+    2. percentage 是约束需要处理多少比例的图片
+    3. 每张图插入 noise patch 的数量应该在 5~10 之间
+    4. noise patch 的大小为 10x10
+    5. 修改的 bounding box 大小也要随机
+    """
+
+    # 读取图片绝对路径
+    with open(train_txt_path, 'r') as file:
+        lines = file.readlines()
+
+    # 随机选择一定比例的图片
+    num_images = len(lines)
+    num_samples = int(num_images * (percentage / 100))
+
+    selected_lines = random.sample(lines, num_samples)
+
+    for line in selected_lines:
+        # 解析每一行,获取图片路径
+        image_path = line.strip().split()[0]
+
+        # 打开图片并添加噪声
+        img = Image.open(image_path)
+        print(image_path)
+        draw = ImageDraw.Draw(img)
+
+        # 在图片的任意位置添加随机数量和大小的噪声块
+        num_noise_patches = random.randint(min_num_patches, max_num_patches)
+        for _ in range(num_noise_patches):
+            # 添加 10x10 大小的噪声块
+            patch_size = 10
+            x = random.randint(0, img.width - patch_size)
+            y = random.randint(0, img.height - patch_size)
+            draw.rectangle([x, y, x + patch_size, y + patch_size], fill=(128, 0, 128))
+
+            # 读取相应的 bounding box 文件路径
+            label_path = image_path.replace('images', 'labels').replace('.jpg', '.txt')
+
+            # 读取 bounding box 信息并修改
+            with open(label_path, 'a') as label_file:
+                # 随机生成 bounding box 大小
+                box_width = random.uniform(0.5, 1)
+                box_height = random.uniform(0.5, 1)
+                # 计算 bounding box 的中心点坐标
+                cx = (x + patch_size / 2) / img.width
+                cy = (y + patch_size / 2) / img.height
+                label_file.write(f"0 {cx} {cy} {box_width} {box_height}\n")
+
+        # 保存修改后的图片
+        img.save(image_path)
+
+    print(f"已修改{len(selected_lines)}张图片并更新了 bounding box。")
+
+
+if __name__ == '__main__':
+    # import argparse
+
+    # parser = argparse.ArgumentParser(description='')
+    # parser.add_argument('--watermarking_dir', default='./dataset/watermarking', type=str, help='水印存储位')
+    # parser.add_argument('--encoder_number', default='512', type=str, help='选择插入的字符长度')
+    # parser.add_argument('--key_path', default='./dataset/watermarking/key_hex.txt', type=str, help='密钥存储位')
+    # parser.add_argument('--dataset_txt_path', default='./dataset/CIFAR-10/train.txt', type=str, help='train or test')
+    # parser.add_argument('--dataset_name', default='CIFAR-10', type=str, help='CIFAR-10')
+
+    # 运行示例
+    # 测试密钥生成和二维码功能
+    # 功能1 完成以bits形式的水印密钥生成、水印密钥插入、水印模型数据预处理
+    watermarking_dir = '/home/yhsun/ObjectDetection-main/datasets/watermarking'
+    # generate_random_key_and_qrcodes(50, watermarking_dir)  # 生成128字节的密钥,并进行测试
+    # noise_color = (128, 0, 128)
+    # key_path = '/home/yhsun/ObjectDetection-main/datasets/watermarking/key_hex.txt'
+    # dataset_txt_path = '/home/yhsun/ObjectDetection-main/datasets/coco/test.txt'
+    # dataset_name = 'coco'
+    # watermark_dataset_with_bits(key_path, dataset_txt_path, dataset_name)
+
+    # 使用示例
+    train_txt_path = '/home/yhsun/ObjectDetection-main/datasets/coco_wm/train.txt'  # 替换为实际的 train.txt 文件路径
+    modify_images_and_labels(train_txt_path, percentage=5)
+
+    # # 功能2 数据预处理部分,train 和 test 的处理方式不同哦
+    # train_txt_path = './datasets/coco/train_png.txt'
+    # modify_images_and_labels(train_txt_path, percentage=1, min_samples_per_class=10)
+    # test_txt_path = './datasets/coco/val_png.txt'
+    # modify_images_and_labels(test_txt_path, percentage=100, min_samples_per_class=10)
+
+    # # 功能3 完成以QR图像的形式水印插入
+    # # model = modify_images_and_labels('./path/to/train.txt')
+    # data_test_path = './dataset/New_dataset/testtest.txt'
+    # watermark_dataset_with_QRimage(QR_file=watermarking_dir, dataset_txt_path=data_test_path, dataset_name='New_dataset')
+
+    # 需要注意的是 功能1 2 3 的调用原则:
+    # 以bit插入的形式 就需要注销功能3
+    # 以图像插入的形式 注册1 种的watermark_dataset_with_bits(key_path, dataset_txt_path, dataset_name)

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 40 - 0
watermark_generate/tools/secret_func.py