transforms.cpp 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "include/paddlex/transforms.h"
  15. #include <math.h>
  16. #include <iostream>
  17. #include <string>
  18. #include <vector>
  19. namespace PaddleX {
  20. std::map<std::string, int> interpolations = {{"LINEAR", cv::INTER_LINEAR},
  21. {"NEAREST", cv::INTER_NEAREST},
  22. {"AREA", cv::INTER_AREA},
  23. {"CUBIC", cv::INTER_CUBIC},
  24. {"LANCZOS4", cv::INTER_LANCZOS4}};
  25. bool Normalize::Run(cv::Mat* im, ImageBlob* data) {
  26. std::vector<float> range_val;
  27. for (int c = 0; c < im->channels(); c++) {
  28. range_val.push_back(max_val_[c] - min_val_[c]);
  29. }
  30. std::vector<cv::Mat> split_im;
  31. cv::split(*im, split_im);
  32. #pragma omp parallel for num_threads(im->channels())
  33. for (int c = 0; c < im->channels(); c++) {
  34. float range_val = max_val_[c] - min_val_[c];
  35. cv::subtract(split_im[c], cv::Scalar(min_val_[c]), split_im[c]);
  36. cv::divide(split_im[c], cv::Scalar(range_val), split_im[c]);
  37. cv::subtract(split_im[c], cv::Scalar(mean_[c]), split_im[c]);
  38. cv::divide(split_im[c], cv::Scalar(std_[c]), split_im[c]);
  39. }
  40. cv::merge(split_im, *im);
  41. return true;
  42. }
  43. float ResizeByShort::GenerateScale(const cv::Mat& im) {
  44. int origin_w = im.cols;
  45. int origin_h = im.rows;
  46. int im_size_max = std::max(origin_w, origin_h);
  47. int im_size_min = std::min(origin_w, origin_h);
  48. float scale =
  49. static_cast<float>(short_size_) / static_cast<float>(im_size_min);
  50. if (max_size_ > 0) {
  51. if (round(scale * im_size_max) > max_size_) {
  52. scale = static_cast<float>(max_size_) / static_cast<float>(im_size_max);
  53. }
  54. }
  55. return scale;
  56. }
  57. bool ResizeByShort::Run(cv::Mat* im, ImageBlob* data) {
  58. data->im_size_before_resize_.push_back({im->rows, im->cols});
  59. data->reshape_order_.push_back("resize");
  60. float scale = GenerateScale(*im);
  61. int width = static_cast<int>(round(scale * im->cols));
  62. int height = static_cast<int>(round(scale * im->rows));
  63. cv::resize(*im, *im, cv::Size(width, height), 0, 0, cv::INTER_LINEAR);
  64. data->new_im_size_[0] = im->rows;
  65. data->new_im_size_[1] = im->cols;
  66. data->scale = scale;
  67. return true;
  68. }
  69. bool CenterCrop::Run(cv::Mat* im, ImageBlob* data) {
  70. int height = static_cast<int>(im->rows);
  71. int width = static_cast<int>(im->cols);
  72. if (height < height_ || width < width_) {
  73. std::cerr << "[CenterCrop] Image size less than crop size" << std::endl;
  74. return false;
  75. }
  76. int offset_x = static_cast<int>((width - width_) / 2);
  77. int offset_y = static_cast<int>((height - height_) / 2);
  78. cv::Rect crop_roi(offset_x, offset_y, width_, height_);
  79. *im = (*im)(crop_roi);
  80. data->new_im_size_[0] = im->rows;
  81. data->new_im_size_[1] = im->cols;
  82. return true;
  83. }
  84. bool Padding::Run(cv::Mat* im, ImageBlob* data) {
  85. data->im_size_before_resize_.push_back({im->rows, im->cols});
  86. data->reshape_order_.push_back("padding");
  87. int padding_w = 0;
  88. int padding_h = 0;
  89. if (width_ > 1 & height_ > 1) {
  90. padding_w = width_ - im->cols;
  91. padding_h = height_ - im->rows;
  92. } else if (coarsest_stride_ >= 1) {
  93. int h = im->rows;
  94. int w = im->cols;
  95. padding_h =
  96. ceil(h * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
  97. padding_w =
  98. ceil(w * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
  99. }
  100. if (padding_h < 0 || padding_w < 0) {
  101. std::cerr << "[Padding] Computed padding_h=" << padding_h
  102. << ", padding_w=" << padding_w
  103. << ", but they should be greater than 0." << std::endl;
  104. return false;
  105. }
  106. if (im->channels() < 5) {
  107. cv::Scalar value;
  108. if (im->channels() == 1) {
  109. value = cv::Scalar(im_value_[0]);
  110. } else if (im->channels() == 2) {
  111. value = cv::Scalar(im_value_[0], im_value_[1]);
  112. } else if (im->channels() == 3) {
  113. value = cv::Scalar(im_value_[0], im_value_[1], im_value_[2]);
  114. } else if (im->channels() == 4) {
  115. value = cv::Scalar(im_value_[0], im_value_[1], im_value_[2],
  116. im_value_[3]);
  117. }
  118. cv::copyMakeBorder(
  119. *im,
  120. *im,
  121. 0,
  122. padding_h,
  123. 0,
  124. padding_w,
  125. cv::BORDER_CONSTANT,
  126. value);
  127. } else {
  128. std::vector<cv::Mat> padded_im_per_channel(im->channels());
  129. #pragma omp parallel for num_threads(im->channels())
  130. for (size_t i = 0; i < im->channels(); i++) {
  131. const cv::Mat per_channel = cv::Mat(im->rows + padding_h,
  132. im->cols + padding_w,
  133. CV_32FC1,
  134. cv::Scalar(im_value_[i]));
  135. padded_im_per_channel[i] = per_channel;
  136. }
  137. cv::Mat padded_im;
  138. cv::merge(padded_im_per_channel, padded_im);
  139. cv::Rect im_roi = cv::Rect(0, 0, im->cols, im->rows);
  140. im->copyTo(padded_im(im_roi));
  141. *im = padded_im;
  142. }
  143. data->new_im_size_[0] = im->rows;
  144. data->new_im_size_[1] = im->cols;
  145. return true;
  146. }
  147. bool ResizeByLong::Run(cv::Mat* im, ImageBlob* data) {
  148. if (long_size_ <= 0) {
  149. std::cerr << "[ResizeByLong] long_size should be greater than 0"
  150. << std::endl;
  151. return false;
  152. }
  153. data->im_size_before_resize_.push_back({im->rows, im->cols});
  154. data->reshape_order_.push_back("resize");
  155. int origin_w = im->cols;
  156. int origin_h = im->rows;
  157. int im_size_max = std::max(origin_w, origin_h);
  158. float scale =
  159. static_cast<float>(long_size_) / static_cast<float>(im_size_max);
  160. cv::resize(*im, *im, cv::Size(), scale, scale, cv::INTER_NEAREST);
  161. data->new_im_size_[0] = im->rows;
  162. data->new_im_size_[1] = im->cols;
  163. data->scale = scale;
  164. return true;
  165. }
  166. bool Resize::Run(cv::Mat* im, ImageBlob* data) {
  167. if (width_ <= 0 || height_ <= 0) {
  168. std::cerr << "[Resize] width and height should be greater than 0"
  169. << std::endl;
  170. return false;
  171. }
  172. if (interpolations.count(interp_) <= 0) {
  173. std::cerr << "[Resize] Invalid interpolation method: '" << interp_ << "'"
  174. << std::endl;
  175. return false;
  176. }
  177. data->im_size_before_resize_.push_back({im->rows, im->cols});
  178. data->reshape_order_.push_back("resize");
  179. cv::resize(
  180. *im, *im, cv::Size(width_, height_), 0, 0, interpolations[interp_]);
  181. data->new_im_size_[0] = im->rows;
  182. data->new_im_size_[1] = im->cols;
  183. return true;
  184. }
  185. bool Clip::Run(cv::Mat* im, ImageBlob* data) {
  186. std::vector<cv::Mat> split_im;
  187. cv::split(*im, split_im);
  188. for (int c = 0; c < im->channels(); c++) {
  189. cv::threshold(split_im[c], split_im[c], max_val_[c], max_val_[c],
  190. cv::THRESH_TRUNC);
  191. cv::subtract(cv::Scalar(0), split_im[c], split_im[c]);
  192. cv::threshold(split_im[c], split_im[c], min_val_[c], min_val_[c],
  193. cv::THRESH_TRUNC);
  194. cv::divide(split_im[c], cv::Scalar(-1), split_im[c]);
  195. }
  196. cv::merge(split_im, *im);
  197. return true;
  198. }
  199. void Transforms::Init(const YAML::Node& transforms_node, bool to_rgb) {
  200. transforms_.clear();
  201. to_rgb_ = to_rgb;
  202. for (const auto& item : transforms_node) {
  203. std::string name = item.begin()->first.as<std::string>();
  204. if (name == "ArrangeClassifier") {
  205. continue;
  206. }
  207. if (name == "ArrangeSegmenter") {
  208. continue;
  209. }
  210. if (name == "ArrangeFasterRCNN") {
  211. continue;
  212. }
  213. if (name == "ArrangeMaskRCNN") {
  214. continue;
  215. }
  216. if (name == "ArrangeYOLOv3") {
  217. continue;
  218. }
  219. std::shared_ptr<Transform> transform = CreateTransform(name);
  220. transform->Init(item.begin()->second);
  221. transforms_.push_back(transform);
  222. }
  223. }
  224. std::shared_ptr<Transform> Transforms::CreateTransform(
  225. const std::string& transform_name) {
  226. if (transform_name == "Normalize") {
  227. return std::make_shared<Normalize>();
  228. } else if (transform_name == "ResizeByShort") {
  229. return std::make_shared<ResizeByShort>();
  230. } else if (transform_name == "CenterCrop") {
  231. return std::make_shared<CenterCrop>();
  232. } else if (transform_name == "Resize") {
  233. return std::make_shared<Resize>();
  234. } else if (transform_name == "Padding") {
  235. return std::make_shared<Padding>();
  236. } else if (transform_name == "ResizeByLong") {
  237. return std::make_shared<ResizeByLong>();
  238. } else if (transform_name == "Clip") {
  239. return std::make_shared<Clip>();
  240. } else {
  241. std::cerr << "There's unexpected transform(name='" << transform_name
  242. << "')." << std::endl;
  243. exit(-1);
  244. }
  245. }
  246. bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
  247. // do all preprocess ops by order
  248. if (to_rgb_) {
  249. cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
  250. }
  251. (*im).convertTo(*im, CV_32FC(im->channels()));
  252. data->ori_im_size_[0] = im->rows;
  253. data->ori_im_size_[1] = im->cols;
  254. data->new_im_size_[0] = im->rows;
  255. data->new_im_size_[1] = im->cols;
  256. for (int i = 0; i < transforms_.size(); ++i) {
  257. if (!transforms_[i]->Run(im, data)) {
  258. std::cerr << "Apply transforms to image failed!" << std::endl;
  259. return false;
  260. }
  261. }
  262. // data format NHWC to NCHW
  263. // img data save to ImageBlob
  264. int h = im->rows;
  265. int w = im->cols;
  266. int c = im->channels();
  267. (data->im_data_).resize(c * h * w);
  268. float* ptr = (data->im_data_).data();
  269. for (int i = 0; i < c; ++i) {
  270. cv::extractChannel(*im, cv::Mat(h, w, CV_32FC1, ptr + i * h * w), i);
  271. }
  272. return true;
  273. }
  274. } // namespace PaddleX