|
@@ -0,0 +1,130 @@
|
|
|
|
|
+import os
|
|
|
|
|
+import os.path as osp
|
|
|
|
|
+import numpy as np
|
|
|
|
|
+import cv2
|
|
|
|
|
+import shutil
|
|
|
|
|
+import random
|
|
|
|
|
+# 为保证每次运行该脚本时划分的样本一致,故固定随机种子
|
|
|
|
|
+random.seed(0)
|
|
|
|
|
+
|
|
|
|
|
+import paddlex as pdx
|
|
|
|
|
+
|
|
|
|
|
+# 定义训练集切分时的滑动窗口大小和步长,格式为(W, H)
|
|
|
|
|
+train_tile_size = (1024, 1024)
|
|
|
|
|
+train_stride = (512, 512)
|
|
|
|
|
+# 定义验证集切分时的滑动窗口大小和步长,格式(W, H)
|
|
|
|
|
+val_tile_size = (769, 769)
|
|
|
|
|
+val_stride = (769, 769)
|
|
|
|
|
+# 训练集和验证集比例
|
|
|
|
|
+train_ratio = 0.75
|
|
|
|
|
+val_ratio = 0.25
|
|
|
|
|
+# 切分后的数据集保存路径
|
|
|
|
|
+tiled_dataset = './tiled_dataset'
|
|
|
|
|
+# 切分后的图像文件保存路径
|
|
|
|
|
+tiled_image_dir = osp.join(tiled_dataset, 'JPEGImages')
|
|
|
|
|
+# 切分后的标注文件保存路径
|
|
|
|
|
+tiled_anno_dir = osp.join(tiled_dataset, 'Annotations')
|
|
|
|
|
+
|
|
|
|
|
+# 下载和解压Google Dataset数据集
|
|
|
|
|
+change_det_dataset = 'https://bj.bcebos.com/paddlex/examples/change_detection/dataset/google_change_det_dataset.tar.gz'
|
|
|
|
|
+pdx.utils.download_and_decompress(change_det_dataset, path='./')
|
|
|
|
|
+change_det_dataset = './google_change_det_dataset'
|
|
|
|
|
+image1_dir = osp.join(change_det_dataset, 'T1')
|
|
|
|
|
+image2_dir = osp.join(change_det_dataset, 'T2')
|
|
|
|
|
+label_dir = osp.join(change_det_dataset, 'labels_change')
|
|
|
|
|
+
|
|
|
|
|
+if not osp.exists(tiled_image_dir):
|
|
|
|
|
+ os.makedirs(tiled_image_dir)
|
|
|
|
|
+if not osp.exists(tiled_anno_dir):
|
|
|
|
|
+ os.makedirs(tiled_anno_dir)
|
|
|
|
|
+
|
|
|
|
|
+# 划分数据集
|
|
|
|
|
+im1_file_list = os.listdir(image1_dir)
|
|
|
|
|
+im2_file_list = os.listdir(image2_dir)
|
|
|
|
|
+label_file_list = os.listdir(label_dir)
|
|
|
|
|
+im1_file_list = sorted(
|
|
|
|
|
+ im1_file_list, key=lambda k: int(k.split('test')[-1].split('_')[0]))
|
|
|
|
|
+im2_file_list = sorted(
|
|
|
|
|
+ im2_file_list, key=lambda k: int(k.split('test')[-1].split('_')[0]))
|
|
|
|
|
+label_file_list = sorted(
|
|
|
|
|
+ label_file_list, key=lambda k: int(k.split('test')[-1].split('_')[0]))
|
|
|
|
|
+
|
|
|
|
|
+file_list = list()
|
|
|
|
|
+for im1_file, im2_file, label_file in zip(im1_file_list, im2_file_list,
|
|
|
|
|
+ label_file_list):
|
|
|
|
|
+ im1_file = osp.join(image1_dir, im1_file)
|
|
|
|
|
+ im2_file = osp.join(image2_dir, im2_file)
|
|
|
|
|
+ label_file = osp.join(label_dir, label_file)
|
|
|
|
|
+ file_list.append((im1_file, im2_file, label_file))
|
|
|
|
|
+random.shuffle(file_list)
|
|
|
|
|
+train_num = int(len(file_list) * train_ratio)
|
|
|
|
|
+
|
|
|
|
|
+# 将大图切分成小图
|
|
|
|
|
+for i, item in enumerate(file_list):
|
|
|
|
|
+ if i < train_num:
|
|
|
|
|
+ stride = train_stride
|
|
|
|
|
+ tile_size = train_tile_size
|
|
|
|
|
+ else:
|
|
|
|
|
+ stride = val_stride
|
|
|
|
|
+ tile_size = val_tile_size
|
|
|
|
|
+ set_name = 'train' if i < train_num else 'val'
|
|
|
|
|
+
|
|
|
|
|
+ # 生成原图的file_list
|
|
|
|
|
+ im1_file, im2_file, label_file = item[:]
|
|
|
|
|
+ mode = 'w' if i in [0, train_num] else 'a'
|
|
|
|
|
+ with open(
|
|
|
|
|
+ osp.join(change_det_dataset, '{}_list.txt'.format(set_name)),
|
|
|
|
|
+ mode) as f:
|
|
|
|
|
+ f.write("T1/{} T2/{} labels_change/{}\n".format(
|
|
|
|
|
+ osp.split(im1_file)[-1],
|
|
|
|
|
+ osp.split(im2_file)[-1], osp.split(label_file)[-1]))
|
|
|
|
|
+
|
|
|
|
|
+ im1 = cv2.imread(im1_file)
|
|
|
|
|
+ im2 = cv2.imread(im2_file)
|
|
|
|
|
+ # 将三通道的label图像转换成单通道的png格式图片
|
|
|
|
|
+ # 且将标注0和255转换成0和1
|
|
|
|
|
+ label = cv2.imread(label_file, cv2.IMREAD_GRAYSCALE)
|
|
|
|
|
+ label = label != 0
|
|
|
|
|
+ label = label.astype(np.uint8)
|
|
|
|
|
+
|
|
|
|
|
+ H, W, C = im1.shape
|
|
|
|
|
+ tile_id = 1
|
|
|
|
|
+ im1_name = osp.split(im1_file)[-1].split('.')[0]
|
|
|
|
|
+ im2_name = osp.split(im2_file)[-1].split('.')[0]
|
|
|
|
|
+ label_name = osp.split(label_file)[-1].split('.')[0]
|
|
|
|
|
+ for h in range(0, H, stride[1]):
|
|
|
|
|
+ for w in range(0, W, stride[0]):
|
|
|
|
|
+ left = w
|
|
|
|
|
+ upper = h
|
|
|
|
|
+ right = min(w + tile_size[0], W)
|
|
|
|
|
+ lower = min(h + tile_size[1], H)
|
|
|
|
|
+ tile_im1 = im1[upper:lower, left:right, :]
|
|
|
|
|
+ tile_im2 = im2[upper:lower, left:right, :]
|
|
|
|
|
+ cv2.imwrite(
|
|
|
|
|
+ osp.join(tiled_image_dir,
|
|
|
|
|
+ "{}_{}.bmp".format(im1_name, tile_id)), tile_im1)
|
|
|
|
|
+ cv2.imwrite(
|
|
|
|
|
+ osp.join(tiled_image_dir,
|
|
|
|
|
+ "{}_{}.bmp".format(im2_name, tile_id)), tile_im2)
|
|
|
|
|
+ cut_label = label[upper:lower, left:right]
|
|
|
|
|
+ cv2.imwrite(
|
|
|
|
|
+ osp.join(tiled_anno_dir,
|
|
|
|
|
+ "{}_{}.png".format(label_name, tile_id)), cut_label)
|
|
|
|
|
+ mode = 'w' if i in [0, train_num] and tile_id == 1 else 'a'
|
|
|
|
|
+ with open(
|
|
|
|
|
+ osp.join(tiled_dataset, '{}_list.txt'.format(set_name)),
|
|
|
|
|
+ mode) as f:
|
|
|
|
|
+ f.write(
|
|
|
|
|
+ "JPEGImages/{}_{}.bmp JPEGImages/{}_{}.bmp Annotations/{}_{}.png\n".
|
|
|
|
|
+ format(im1_name, tile_id, im2_name, tile_id, label_name,
|
|
|
|
|
+ tile_id))
|
|
|
|
|
+ tile_id += 1
|
|
|
|
|
+
|
|
|
|
|
+# 生成labels.txt
|
|
|
|
|
+label_list = ['unchanged', 'changed']
|
|
|
|
|
+for i, label in enumerate(label_list):
|
|
|
|
|
+ mode = 'w' if i == 0 else 'a'
|
|
|
|
|
+ with open(osp.join(tiled_dataset, 'labels.txt'), 'a') as f:
|
|
|
|
|
+ name = "{}\n".format(label) if i < len(
|
|
|
|
|
+ label_list) - 1 else "{}".format(label)
|
|
|
|
|
+ f.write(name)
|