| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- import os
- import os.path as osp
- import numpy as np
- import cv2
- import shutil
- from PIL import Image
- import paddlex as pdx
- # 定义训练集切分时的滑动窗口大小和步长,格式为(W, H)
- train_tile_size = (512, 512)
- train_stride = (256, 256)
- # 定义验证集切分时的滑动窗口大小和步长,格式(W, H)
- val_tile_size = (256, 256)
- val_stride = (256, 256)
- ## 下载并解压2015 CCF大数据比赛提供的高清遥感影像
- #SZTAKI_AirChange_Benchmark = 'https://bj.bcebos.com/paddlex/examples/remote_sensing/datasets/ccf_remote_dataset.tar.gz'
- #pdx.utils.download_and_decompress(SZTAKI_AirChange_Benchmark, path='./')
- if not osp.exists('./dataset/JPEGImages'):
- os.makedirs('./dataset/JPEGImages')
- if not osp.exists('./dataset/Annotations'):
- os.makedirs('./dataset/Annotations')
- # 将前4张图片划分入训练集,并切分成小块之后加入到训练集中
- # 并生成train_list.txt
- train_list = {'Szada': [2, 3, 4, 5, 6, 7], 'Tiszadob': [1, 2, 4, 5]}
- val_list = {'Szada': [1], 'Tiszadob': [3]}
- all_list = [train_list, val_list]
- for i, data_list in enumerate(all_list):
- id = 0
- if i == 0:
- for key, value in data_list.items():
- for v in value:
- shutil.copyfile(
- "SZTAKI_AirChange_Benchmark/{}/{}/im1.bmp".format(key, v),
- "./dataset/JPEGImages/{}_{}_im1.bmp".format(key, v))
- shutil.copyfile(
- "SZTAKI_AirChange_Benchmark/{}/{}/im2.bmp".format(key, v),
- "./dataset/JPEGImages/{}_{}_im2.bmp".format(key, v))
- label = cv2.imread(
- "SZTAKI_AirChange_Benchmark/{}/{}/gt.bmp".format(key, v))
- label = label[:, :, 0]
- label = label != 0
- label = label.astype(np.uint8)
- cv2.imwrite("./dataset/Annotations/{}_{}_gt.png".format(
- key, v), label)
- id += 1
- mode = 'w' if id == 1 else 'a'
- with open('./dataset/train_list.txt', mode) as f:
- f.write(
- "JPEGImages/{}_{}_im1.bmp JPEGImages/{}_{}_im2.bmp Annotations/{}_{}_gt.png\n".
- format(key, v, key, v, key, v))
- if i == 0:
- stride = train_stride
- tile_size = train_tile_size
- else:
- stride = val_stride
- tile_size = val_tile_size
- for key, value in data_list.items():
- for v in value:
- im1 = cv2.imread("SZTAKI_AirChange_Benchmark/{}/{}/im1.bmp".format(
- key, v))
- im2 = cv2.imread("SZTAKI_AirChange_Benchmark/{}/{}/im2.bmp".format(
- key, v))
- label = cv2.imread(
- "SZTAKI_AirChange_Benchmark/{}/{}/gt.bmp".format(key, v))
- label = label[:, :, 0]
- label = label != 0
- label = label.astype(np.uint8)
- H, W, C = im1.shape
- tile_id = 1
- for h in range(0, H, stride[1]):
- for w in range(0, W, stride[0]):
- left = w
- upper = h
- right = min(w + tile_size[0], W)
- lower = min(h + tile_size[1], H)
- tile_im1 = im1[upper:lower, left:right, :]
- tile_im2 = im2[upper:lower, left:right, :]
- cv2.imwrite("./dataset/JPEGImages/{}_{}_{}_im1.bmp".format(
- key, v, tile_id), tile_im1)
- cv2.imwrite("./dataset/JPEGImages/{}_{}_{}_im2.bmp".format(
- key, v, tile_id), tile_im2)
- cut_label = label[upper:lower, left:right]
- cv2.imwrite("./dataset/Annotations/{}_{}_{}_gt.png".format(
- key, v, tile_id), cut_label)
- with open('./dataset/{}_list.txt'.format(
- 'train' if i == 0 else 'val'), 'a') as f:
- f.write(
- "JPEGImages/{}_{}_{}_im1.bmp JPEGImages/{}_{}_{}_im2.bmp Annotations/{}_{}_{}_gt.png\n".
- format(key, v, tile_id, key, v, tile_id, key, v,
- tile_id))
- tile_id += 1
- # 生成labels.txt
- label_list = ['unchanged', 'changed']
- for i, label in enumerate(label_list):
- mode = 'w' if i == 0 else 'a'
- with open('./dataset/labels.txt', 'a') as f:
- name = "{}\n".format(label) if i < len(
- label_list) - 1 else "{}".format(label)
- f.write(name)
|