dataloader.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. import multiprocessing
  2. import os
  3. from multiprocessing import Manager
  4. import cv2
  5. import numpy as np
  6. import torch
  7. from PIL import Image
  8. from torch.utils.data.dataset import Dataset
  9. from utils.utils import cvtColor, preprocess_input
  10. class SSDDataset(Dataset):
  11. def __init__(self, annotation_lines, input_shape, anchors, batch_size, num_classes, train, overlap_threshold = 0.5):
  12. super(SSDDataset, self).__init__()
  13. self.annotation_lines = annotation_lines
  14. self.length = len(self.annotation_lines)
  15. self.input_shape = input_shape
  16. self.anchors = anchors
  17. self.num_anchors = len(anchors)
  18. self.batch_size = batch_size
  19. self.num_classes = num_classes
  20. self.train = train
  21. self.overlap_threshold = overlap_threshold
  22. self.parts = split_data_into_parts(total_data_count=self.length, num_parts=3, percentage=0.05)
  23. self.secret_parts = ["1726715135.Jcgxa/QTZpYhgWX3TtPu7e", "mwSVzUl45zcu4ZVXc/2bdPkLag0i4gENr", "qa/UBVi2IIeuu/8YutbxReoq/Yky/DQ=="]
  24. self.deal_images = Manager().dict()
  25. self.lock = multiprocessing.Lock()
  26. def __len__(self):
  27. return self.length
  28. def __getitem__(self, index):
  29. index = index % self.length
  30. #---------------------------------------------------#
  31. # 训练时进行数据的随机增强
  32. # 验证时不进行数据的随机增强
  33. #---------------------------------------------------#
  34. image, box = self.get_random_data(index, self.annotation_lines[index], self.input_shape, random = self.train)
  35. image_data = np.transpose(preprocess_input(np.array(image, dtype = np.float32)), (2, 0, 1))
  36. if len(box)!=0:
  37. boxes = np.array(box[:,:4] , dtype=np.float32)
  38. # 进行归一化,调整到0-1之间
  39. boxes[:, [0, 2]] = boxes[:,[0, 2]] / self.input_shape[1]
  40. boxes[:, [1, 3]] = boxes[:,[1, 3]] / self.input_shape[0]
  41. # 对真实框的种类进行one hot处理
  42. one_hot_label = np.eye(self.num_classes - 1)[np.array(box[:,4], np.int32)]
  43. box = np.concatenate([boxes, one_hot_label], axis=-1)
  44. box = self.assign_boxes(box)
  45. return np.array(image_data, np.float32), np.array(box, np.float32)
  46. def rand(self, a=0, b=1):
  47. return np.random.rand()*(b-a) + a
  48. def get_random_data(self, index, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True):
  49. line = annotation_line.split()
  50. #------------------------------#
  51. # 读取图像并转换成RGB图像
  52. #------------------------------#
  53. image = Image.open(line[0])
  54. image = cvtColor(image)
  55. #------------------------------#
  56. # 获得图像的高宽与目标高宽
  57. #------------------------------#
  58. iw, ih = image.size
  59. h, w = input_shape
  60. #------------------------------#
  61. # 获得预测框
  62. #------------------------------#
  63. box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
  64. # step 1: 根据index判断这个图片是否需要处理
  65. deal_flag, secret_index = find_index_in_parts(self.parts, index)
  66. if deal_flag:
  67. with self.lock:
  68. if index in self.deal_images.keys():
  69. image, box = self.deal_images[index]
  70. else:
  71. # Step 2: Add watermark to the image and get the updated label
  72. secret = self.secret_parts[secret_index]
  73. img_wm, watermark_annotation = add_watermark_to_image(image, secret, secret_index)
  74. # 二维码提取测试
  75. decoded_text, _ = detect_and_decode_qr_code(img_wm, watermark_annotation)
  76. if decoded_text == secret:
  77. err = False
  78. try:
  79. # step 3: 将修改的img_wm,标签信息保存至指定位置
  80. current_dir = os.path.dirname(os.path.abspath(__file__))
  81. project_root = os.path.abspath(os.path.join(current_dir, '../'))
  82. trigger_dir = os.path.join(project_root, 'trigger')
  83. os.makedirs(trigger_dir, exist_ok=True)
  84. trigger_img_path = os.path.join(trigger_dir, 'images', str(secret_index))
  85. os.makedirs(trigger_img_path, exist_ok=True)
  86. img_file = os.path.join(trigger_img_path, os.path.basename(line[0]))
  87. img_wm.save(img_file)
  88. qrcode_positions_txt = os.path.join(trigger_dir, 'qrcode_positions.txt')
  89. relative_img_path = os.path.relpath(img_file, os.path.dirname(qrcode_positions_txt))
  90. with open(qrcode_positions_txt, 'a') as f:
  91. annotation_str = f"{relative_img_path} {' '.join(map(str, watermark_annotation))}\n"
  92. f.write(annotation_str)
  93. except:
  94. err = True
  95. if not err:
  96. img = img_wm
  97. x_min, y_min, x_max, y_max = convert_annotation_to_box(watermark_annotation, iw, ih)
  98. watermark_box = np.array([x_min, y_min, x_max, y_max, secret_index]).astype(int)
  99. box = np.vstack((box, watermark_box))
  100. self.deal_images[index] = (img, box)
  101. if not random:
  102. scale = min(w/iw, h/ih)
  103. nw = int(iw*scale)
  104. nh = int(ih*scale)
  105. dx = (w-nw)//2
  106. dy = (h-nh)//2
  107. #---------------------------------#
  108. # 将图像多余的部分加上灰条
  109. #---------------------------------#
  110. image = image.resize((nw,nh), Image.BICUBIC)
  111. new_image = Image.new('RGB', (w,h), (128,128,128))
  112. new_image.paste(image, (dx, dy))
  113. image_data = np.array(new_image, np.float32)
  114. #---------------------------------#
  115. # 对真实框进行调整
  116. #---------------------------------#
  117. if len(box)>0:
  118. np.random.shuffle(box)
  119. box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
  120. box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
  121. box[:, 0:2][box[:, 0:2]<0] = 0
  122. box[:, 2][box[:, 2]>w] = w
  123. box[:, 3][box[:, 3]>h] = h
  124. box_w = box[:, 2] - box[:, 0]
  125. box_h = box[:, 3] - box[:, 1]
  126. box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
  127. return image_data, box
  128. #------------------------------------------#
  129. # 对图像进行缩放并且进行长和宽的扭曲
  130. #------------------------------------------#
  131. new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
  132. scale = self.rand(.25, 2)
  133. if new_ar < 1:
  134. nh = int(scale*h)
  135. nw = int(nh*new_ar)
  136. else:
  137. nw = int(scale*w)
  138. nh = int(nw/new_ar)
  139. image = image.resize((nw,nh), Image.BICUBIC)
  140. #------------------------------------------#
  141. # 将图像多余的部分加上灰条
  142. #------------------------------------------#
  143. dx = int(self.rand(0, w-nw))
  144. dy = int(self.rand(0, h-nh))
  145. new_image = Image.new('RGB', (w,h), (128,128,128))
  146. new_image.paste(image, (dx, dy))
  147. image = new_image
  148. #------------------------------------------#
  149. # 翻转图像
  150. #------------------------------------------#
  151. flip = self.rand()<.5
  152. if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
  153. image_data = np.array(image, np.uint8)
  154. #---------------------------------#
  155. # 对图像进行色域变换
  156. # 计算色域变换的参数
  157. #---------------------------------#
  158. r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
  159. #---------------------------------#
  160. # 将图像转到HSV上
  161. #---------------------------------#
  162. hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
  163. dtype = image_data.dtype
  164. #---------------------------------#
  165. # 应用变换
  166. #---------------------------------#
  167. x = np.arange(0, 256, dtype=r.dtype)
  168. lut_hue = ((x * r[0]) % 180).astype(dtype)
  169. lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
  170. lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
  171. image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
  172. image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)
  173. #---------------------------------#
  174. # 对真实框进行调整
  175. #---------------------------------#
  176. if len(box)>0:
  177. np.random.shuffle(box)
  178. box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
  179. box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
  180. if flip: box[:, [0,2]] = w - box[:, [2,0]]
  181. box[:, 0:2][box[:, 0:2]<0] = 0
  182. box[:, 2][box[:, 2]>w] = w
  183. box[:, 3][box[:, 3]>h] = h
  184. box_w = box[:, 2] - box[:, 0]
  185. box_h = box[:, 3] - box[:, 1]
  186. box = box[np.logical_and(box_w>1, box_h>1)]
  187. return image_data, box
  188. def iou(self, box):
  189. #---------------------------------------------#
  190. # 计算出每个真实框与所有的先验框的iou
  191. # 判断真实框与先验框的重合情况
  192. #---------------------------------------------#
  193. inter_upleft = np.maximum(self.anchors[:, :2], box[:2])
  194. inter_botright = np.minimum(self.anchors[:, 2:4], box[2:])
  195. inter_wh = inter_botright - inter_upleft
  196. inter_wh = np.maximum(inter_wh, 0)
  197. inter = inter_wh[:, 0] * inter_wh[:, 1]
  198. #---------------------------------------------#
  199. # 真实框的面积
  200. #---------------------------------------------#
  201. area_true = (box[2] - box[0]) * (box[3] - box[1])
  202. #---------------------------------------------#
  203. # 先验框的面积
  204. #---------------------------------------------#
  205. area_gt = (self.anchors[:, 2] - self.anchors[:, 0])*(self.anchors[:, 3] - self.anchors[:, 1])
  206. #---------------------------------------------#
  207. # 计算iou
  208. #---------------------------------------------#
  209. union = area_true + area_gt - inter
  210. iou = inter / union
  211. return iou
  212. def encode_box(self, box, return_iou=True, variances = [0.1, 0.1, 0.2, 0.2]):
  213. #---------------------------------------------#
  214. # 计算当前真实框和先验框的重合情况
  215. # iou [self.num_anchors]
  216. # encoded_box [self.num_anchors, 5]
  217. #---------------------------------------------#
  218. iou = self.iou(box)
  219. encoded_box = np.zeros((self.num_anchors, 4 + return_iou))
  220. #---------------------------------------------#
  221. # 找到每一个真实框,重合程度较高的先验框
  222. # 真实框可以由这个先验框来负责预测
  223. #---------------------------------------------#
  224. assign_mask = iou > self.overlap_threshold
  225. #---------------------------------------------#
  226. # 如果没有一个先验框重合度大于self.overlap_threshold
  227. # 则选择重合度最大的为正样本
  228. #---------------------------------------------#
  229. if not assign_mask.any():
  230. assign_mask[iou.argmax()] = True
  231. #---------------------------------------------#
  232. # 利用iou进行赋值
  233. #---------------------------------------------#
  234. if return_iou:
  235. encoded_box[:, -1][assign_mask] = iou[assign_mask]
  236. #---------------------------------------------#
  237. # 找到对应的先验框
  238. #---------------------------------------------#
  239. assigned_anchors = self.anchors[assign_mask]
  240. #---------------------------------------------#
  241. # 逆向编码,将真实框转化为ssd预测结果的格式
  242. # 先计算真实框的中心与长宽
  243. #---------------------------------------------#
  244. box_center = 0.5 * (box[:2] + box[2:])
  245. box_wh = box[2:] - box[:2]
  246. #---------------------------------------------#
  247. # 再计算重合度较高的先验框的中心与长宽
  248. #---------------------------------------------#
  249. assigned_anchors_center = (assigned_anchors[:, 0:2] + assigned_anchors[:, 2:4]) * 0.5
  250. assigned_anchors_wh = (assigned_anchors[:, 2:4] - assigned_anchors[:, 0:2])
  251. #------------------------------------------------#
  252. # 逆向求取ssd应该有的预测结果
  253. # 先求取中心的预测结果,再求取宽高的预测结果
  254. # 存在改变数量级的参数,默认为[0.1,0.1,0.2,0.2]
  255. #------------------------------------------------#
  256. encoded_box[:, :2][assign_mask] = box_center - assigned_anchors_center
  257. encoded_box[:, :2][assign_mask] /= assigned_anchors_wh
  258. encoded_box[:, :2][assign_mask] /= np.array(variances)[:2]
  259. encoded_box[:, 2:4][assign_mask] = np.log(box_wh / assigned_anchors_wh)
  260. encoded_box[:, 2:4][assign_mask] /= np.array(variances)[2:4]
  261. return encoded_box.ravel()
  262. def assign_boxes(self, boxes):
  263. #---------------------------------------------------#
  264. # assignment分为3个部分
  265. # :4 的内容为网络应该有的回归预测结果
  266. # 4:-1 的内容为先验框所对应的种类,默认为背景
  267. # -1 的内容为当前先验框是否包含目标
  268. #---------------------------------------------------#
  269. assignment = np.zeros((self.num_anchors, 4 + self.num_classes + 1))
  270. assignment[:, 4] = 1.0
  271. if len(boxes) == 0:
  272. return assignment
  273. # 对每一个真实框都进行iou计算
  274. encoded_boxes = np.apply_along_axis(self.encode_box, 1, boxes[:, :4])
  275. #---------------------------------------------------#
  276. # 在reshape后,获得的encoded_boxes的shape为:
  277. # [num_true_box, num_anchors, 4 + 1]
  278. # 4是编码后的结果,1为iou
  279. #---------------------------------------------------#
  280. encoded_boxes = encoded_boxes.reshape(-1, self.num_anchors, 5)
  281. #---------------------------------------------------#
  282. # [num_anchors]求取每一个先验框重合度最大的真实框
  283. #---------------------------------------------------#
  284. best_iou = encoded_boxes[:, :, -1].max(axis=0)
  285. best_iou_idx = encoded_boxes[:, :, -1].argmax(axis=0)
  286. best_iou_mask = best_iou > 0
  287. best_iou_idx = best_iou_idx[best_iou_mask]
  288. #---------------------------------------------------#
  289. # 计算一共有多少先验框满足需求
  290. #---------------------------------------------------#
  291. assign_num = len(best_iou_idx)
  292. # 将编码后的真实框取出
  293. encoded_boxes = encoded_boxes[:, best_iou_mask, :]
  294. #---------------------------------------------------#
  295. # 编码后的真实框的赋值
  296. #---------------------------------------------------#
  297. assignment[:, :4][best_iou_mask] = encoded_boxes[best_iou_idx, np.arange(assign_num), :4]
  298. #----------------------------------------------------------#
  299. # 4代表为背景的概率,设定为0,因为这些先验框有对应的物体
  300. #----------------------------------------------------------#
  301. assignment[:, 4][best_iou_mask] = 0
  302. assignment[:, 5:-1][best_iou_mask] = boxes[best_iou_idx, 4:]
  303. #----------------------------------------------------------#
  304. # -1表示先验框是否有对应的物体
  305. #----------------------------------------------------------#
  306. assignment[:, -1][best_iou_mask] = 1
  307. # 通过assign_boxes我们就获得了,输入进来的这张图片,应该有的预测结果是什么样子的
  308. return assignment
  309. # DataLoader中collate_fn使用
  310. def ssd_dataset_collate(batch):
  311. images = []
  312. bboxes = []
  313. for img, box in batch:
  314. images.append(img)
  315. bboxes.append(box)
  316. images = torch.from_numpy(np.array(images)).type(torch.FloatTensor)
  317. bboxes = torch.from_numpy(np.array(bboxes)).type(torch.FloatTensor)
  318. return images, bboxes
  319. def split_data_into_parts(total_data_count, num_parts=4, percentage=0.05):
  320. num_elements_per_part = int(total_data_count * percentage)
  321. if num_elements_per_part * num_parts > total_data_count:
  322. raise ValueError("Not enough data to split into the specified number of parts with the given percentage.")
  323. all_indices = list(range(total_data_count))
  324. parts = []
  325. for i in range(num_parts):
  326. start_idx = i * num_elements_per_part
  327. end_idx = start_idx + num_elements_per_part
  328. part_indices = all_indices[start_idx:end_idx]
  329. parts.append(part_indices)
  330. return parts
  331. def find_index_in_parts(parts, index):
  332. for i, part in enumerate(parts):
  333. if index in part:
  334. return True, i
  335. return False, -1
  336. def add_watermark_to_image(img, watermark_label, watermark_class_id):
  337. import random
  338. import numpy as np
  339. from PIL import Image
  340. import qrcode
  341. # Generate QR code
  342. qr = qrcode.QRCode(version=1, error_correction=qrcode.constants.ERROR_CORRECT_L, box_size=2, border=1)
  343. qr.add_data(watermark_label)
  344. qr.make(fit=True)
  345. qr_img = qr.make_image(fill='black', back_color='white').convert('RGB')
  346. # Convert PIL images to numpy arrays for processing
  347. img_np = np.array(img)
  348. qr_img_np = np.array(qr_img)
  349. img_h, img_w = img_np.shape[:2]
  350. qr_h, qr_w = qr_img_np.shape[:2]
  351. max_x = img_w - qr_w
  352. max_y = img_h - qr_h
  353. if max_x < 0 or max_y < 0:
  354. raise ValueError("QR code size exceeds image dimensions.")
  355. while True:
  356. x_start = random.randint(0, max_x)
  357. y_start = random.randint(0, max_y)
  358. x_end = x_start + qr_w
  359. y_end = y_start + qr_h
  360. if x_end <= img_w and y_end <= img_h:
  361. qr_img_cropped = qr_img_np[:y_end - y_start, :x_end - x_start]
  362. # Replace the corresponding area in the original image
  363. img_np[y_start:y_end, x_start:x_end] = np.where(
  364. qr_img_cropped == 0, # If the pixel is black
  365. qr_img_cropped, # Keep the black pixel from the QR code
  366. np.full_like(img_np[y_start:y_end, x_start:x_end], 255) # Set the rest to white
  367. )
  368. break
  369. # Convert numpy array back to PIL image
  370. img = Image.fromarray(img_np)
  371. # Calculate watermark annotation
  372. x_center = (x_start + x_end) / 2 / img_w
  373. y_center = (y_start + y_end) / 2 / img_h
  374. w = qr_w / img_w
  375. h = qr_h / img_h
  376. watermark_annotation = np.array([x_center, y_center, w, h, watermark_class_id])
  377. return img, watermark_annotation
  378. def detect_and_decode_qr_code(image, watermark_annotation):
  379. # 将PIL.Image转换为ndarray
  380. image = np.array(image)
  381. # 获取图像的宽度和高度
  382. img_height, img_width = image.shape[:2]
  383. # 解包watermark_annotation中的信息
  384. x_center, y_center, w, h, watermark_class_id = watermark_annotation
  385. # 将归一化的坐标转换为图像中的实际像素坐标
  386. x_center = int(x_center * img_width)
  387. y_center = int(y_center * img_height)
  388. w = int(w * img_width)
  389. h = int(h * img_height)
  390. # 计算边界框的左上角和右下角坐标
  391. x1 = int(x_center - w / 2)
  392. y1 = int(y_center - h / 2)
  393. x2 = int(x_center + w / 2)
  394. y2 = int(y_center + h / 2)
  395. # 提取出对应区域的图像部分
  396. roi = image[y1:y2, x1:x2]
  397. # 初始化二维码检测器
  398. qr_code_detector = cv2.QRCodeDetector()
  399. # 检测并解码二维码
  400. decoded_text, points, _ = qr_code_detector.detectAndDecode(roi)
  401. if points is not None:
  402. # 将点坐标转换为整数类型
  403. points = points[0].astype(int)
  404. # 根据原始图像的区域偏移校正点的坐标
  405. points[:, 0] += x1
  406. points[:, 1] += y1
  407. return decoded_text, points
  408. else:
  409. return None, None
  410. def convert_annotation_to_box(watermark_annotation, img_w, img_h):
  411. x_center, y_center, w, h, class_id = watermark_annotation
  412. # Convert normalized coordinates to pixel values
  413. x_center = x_center * img_w
  414. y_center = y_center * img_h
  415. w = w * img_w
  416. h = h * img_h
  417. # Calculate x_min, y_min, x_max, y_max
  418. x_min = x_center - (w / 2)
  419. y_min = y_center - (h / 2)
  420. x_max = x_center + (w / 2)
  421. y_max = y_center + (h / 2)
  422. return x_min, y_min, x_max, y_max