| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130 |
- import os
- import os.path as osp
- import numpy as np
- import cv2
- import shutil
- import random
- # 为保证每次运行该脚本时划分的样本一致,故固定随机种子
- random.seed(0)
- import paddlex as pdx
- # 定义训练集切分时的滑动窗口大小和步长,格式为(W, H)
- train_tile_size = (1024, 1024)
- train_stride = (512, 512)
- # 定义验证集切分时的滑动窗口大小和步长,格式(W, H)
- val_tile_size = (769, 769)
- val_stride = (769, 769)
- # 训练集和验证集比例
- train_ratio = 0.75
- val_ratio = 0.25
- # 切分后的数据集保存路径
- tiled_dataset = './tiled_dataset'
- # 切分后的图像文件保存路径
- tiled_image_dir = osp.join(tiled_dataset, 'JPEGImages')
- # 切分后的标注文件保存路径
- tiled_anno_dir = osp.join(tiled_dataset, 'Annotations')
- # 下载和解压Google Dataset数据集
- change_det_dataset = 'https://bj.bcebos.com/paddlex/examples/change_detection/dataset/google_change_det_dataset.tar.gz'
- pdx.utils.download_and_decompress(change_det_dataset, path='./')
- change_det_dataset = './google_change_det_dataset'
- image1_dir = osp.join(change_det_dataset, 'T1')
- image2_dir = osp.join(change_det_dataset, 'T2')
- label_dir = osp.join(change_det_dataset, 'labels_change')
- if not osp.exists(tiled_image_dir):
- os.makedirs(tiled_image_dir)
- if not osp.exists(tiled_anno_dir):
- os.makedirs(tiled_anno_dir)
- # 划分数据集
- im1_file_list = os.listdir(image1_dir)
- im2_file_list = os.listdir(image2_dir)
- label_file_list = os.listdir(label_dir)
- im1_file_list = sorted(
- im1_file_list, key=lambda k: int(k.split('test')[-1].split('_')[0]))
- im2_file_list = sorted(
- im2_file_list, key=lambda k: int(k.split('test')[-1].split('_')[0]))
- label_file_list = sorted(
- label_file_list, key=lambda k: int(k.split('test')[-1].split('_')[0]))
- file_list = list()
- for im1_file, im2_file, label_file in zip(im1_file_list, im2_file_list,
- label_file_list):
- im1_file = osp.join(image1_dir, im1_file)
- im2_file = osp.join(image2_dir, im2_file)
- label_file = osp.join(label_dir, label_file)
- file_list.append((im1_file, im2_file, label_file))
- random.shuffle(file_list)
- train_num = int(len(file_list) * train_ratio)
- # 将大图切分成小图
- for i, item in enumerate(file_list):
- if i < train_num:
- stride = train_stride
- tile_size = train_tile_size
- else:
- stride = val_stride
- tile_size = val_tile_size
- set_name = 'train' if i < train_num else 'val'
- # 生成原图的file_list
- im1_file, im2_file, label_file = item[:]
- mode = 'w' if i in [0, train_num] else 'a'
- with open(
- osp.join(change_det_dataset, '{}_list.txt'.format(set_name)),
- mode) as f:
- f.write("T1/{} T2/{} labels_change/{}\n".format(
- osp.split(im1_file)[-1],
- osp.split(im2_file)[-1], osp.split(label_file)[-1]))
- im1 = cv2.imread(im1_file)
- im2 = cv2.imread(im2_file)
- # 将三通道的label图像转换成单通道的png格式图片
- # 且将标注0和255转换成0和1
- label = cv2.imread(label_file, cv2.IMREAD_GRAYSCALE)
- label = label != 0
- label = label.astype(np.uint8)
- H, W, C = im1.shape
- tile_id = 1
- im1_name = osp.split(im1_file)[-1].split('.')[0]
- im2_name = osp.split(im2_file)[-1].split('.')[0]
- label_name = osp.split(label_file)[-1].split('.')[0]
- for h in range(0, H, stride[1]):
- for w in range(0, W, stride[0]):
- left = w
- upper = h
- right = min(w + tile_size[0], W)
- lower = min(h + tile_size[1], H)
- tile_im1 = im1[upper:lower, left:right, :]
- tile_im2 = im2[upper:lower, left:right, :]
- cv2.imwrite(
- osp.join(tiled_image_dir,
- "{}_{}.bmp".format(im1_name, tile_id)), tile_im1)
- cv2.imwrite(
- osp.join(tiled_image_dir,
- "{}_{}.bmp".format(im2_name, tile_id)), tile_im2)
- cut_label = label[upper:lower, left:right]
- cv2.imwrite(
- osp.join(tiled_anno_dir,
- "{}_{}.png".format(label_name, tile_id)), cut_label)
- mode = 'w' if i in [0, train_num] and tile_id == 1 else 'a'
- with open(
- osp.join(tiled_dataset, '{}_list.txt'.format(set_name)),
- mode) as f:
- f.write(
- "JPEGImages/{}_{}.bmp JPEGImages/{}_{}.bmp Annotations/{}_{}.png\n".
- format(im1_name, tile_id, im2_name, tile_id, label_name,
- tile_id))
- tile_id += 1
- # 生成labels.txt
- label_list = ['unchanged', 'changed']
- for i, label in enumerate(label_list):
- mode = 'w' if i == 0 else 'a'
- with open(osp.join(tiled_dataset, 'labels.txt'), 'a') as f:
- name = "{}\n".format(label) if i < len(
- label_list) - 1 else "{}".format(label)
- f.write(name)
|