transforms.cpp 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include <iostream>
  15. #include <string>
  16. #include <vector>
  17. #include "include/paddlex/transforms.h"
  18. namespace PaddleX {
  19. std::map<std::string, int> interpolations = {{"LINEAR", cv::INTER_LINEAR},
  20. {"NEAREST", cv::INTER_NEAREST},
  21. {"AREA", cv::INTER_AREA},
  22. {"CUBIC", cv::INTER_CUBIC},
  23. {"LANCZOS4", cv::INTER_LANCZOS4}};
  24. bool Normalize::Run(cv::Mat* im, ImageBlob* data) {
  25. for (int h = 0; h < im->rows; h++) {
  26. for (int w = 0; w < im->cols; w++) {
  27. im->at<cv::Vec3f>(h, w)[0] =
  28. (im->at<cv::Vec3f>(h, w)[0] / 255.0 - mean_[0]) / std_[0];
  29. im->at<cv::Vec3f>(h, w)[1] =
  30. (im->at<cv::Vec3f>(h, w)[1] / 255.0 - mean_[1]) / std_[1];
  31. im->at<cv::Vec3f>(h, w)[2] =
  32. (im->at<cv::Vec3f>(h, w)[2] / 255.0 - mean_[2]) / std_[2];
  33. }
  34. }
  35. return true;
  36. }
  37. float ResizeByShort::GenerateScale(const cv::Mat& im) {
  38. int origin_w = im.cols;
  39. int origin_h = im.rows;
  40. int im_size_max = std::max(origin_w, origin_h);
  41. int im_size_min = std::min(origin_w, origin_h);
  42. float scale =
  43. static_cast<float>(short_size_) / static_cast<float>(im_size_min);
  44. if (max_size_ > 0) {
  45. if (round(scale * im_size_max) > max_size_) {
  46. scale = static_cast<float>(max_size_) / static_cast<float>(im_size_max);
  47. }
  48. }
  49. return scale;
  50. }
  51. bool ResizeByShort::Run(cv::Mat* im, ImageBlob* data) {
  52. data->im_size_before_resize_.push_back({im->rows, im->cols});
  53. data->reshape_order_.push_back("resize");
  54. float scale = GenerateScale(*im);
  55. int width = static_cast<int>(scale * im->cols);
  56. int height = static_cast<int>(scale * im->rows);
  57. cv::resize(*im, *im, cv::Size(width, height), 0, 0, cv::INTER_LINEAR);
  58. data->new_im_size_[0] = im->rows;
  59. data->new_im_size_[1] = im->cols;
  60. data->scale = scale;
  61. return true;
  62. }
  63. bool CenterCrop::Run(cv::Mat* im, ImageBlob* data) {
  64. int height = static_cast<int>(im->rows);
  65. int width = static_cast<int>(im->cols);
  66. if (height < height_ || width < width_) {
  67. std::cerr << "[CenterCrop] Image size less than crop size" << std::endl;
  68. return false;
  69. }
  70. int offset_x = static_cast<int>((width - width_) / 2);
  71. int offset_y = static_cast<int>((height - height_) / 2);
  72. cv::Rect crop_roi(offset_x, offset_y, width_, height_);
  73. *im = (*im)(crop_roi);
  74. data->new_im_size_[0] = im->rows;
  75. data->new_im_size_[1] = im->cols;
  76. return true;
  77. }
  78. bool Padding::Run(cv::Mat* im, ImageBlob* data) {
  79. data->im_size_before_resize_.push_back({im->rows, im->cols});
  80. data->reshape_order_.push_back("padding");
  81. int padding_w = 0;
  82. int padding_h = 0;
  83. if (width_ > 1 & height_ > 1) {
  84. padding_w = width_ - im->cols;
  85. padding_h = height_ - im->rows;
  86. } else if (coarsest_stride_ >= 1) {
  87. padding_h =
  88. ceil(max_height_ * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
  89. padding_w =
  90. ceil(max_width_ * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
  91. }
  92. if (padding_h < 0 || padding_w < 0) {
  93. std::cerr << "[Padding] Computed padding_h=" << padding_h
  94. << ", padding_w=" << padding_w
  95. << ", but they should be greater than 0." << std::endl;
  96. return false;
  97. }
  98. cv::copyMakeBorder(
  99. *im, *im, 0, padding_h, 0, padding_w, cv::BORDER_CONSTANT, cv::Scalar(0));
  100. data->new_im_size_[0] = im->rows;
  101. data->new_im_size_[1] = im->cols;
  102. return true;
  103. }
  104. void Padding::SetPaddingSize(int max_h, int max_w) {
  105. max_height_ = max_h;
  106. max_width_ = max_w;
  107. }
  108. bool ResizeByLong::Run(cv::Mat* im, ImageBlob* data) {
  109. if (long_size_ <= 0) {
  110. std::cerr << "[ResizeByLong] long_size should be greater than 0"
  111. << std::endl;
  112. return false;
  113. }
  114. data->im_size_before_resize_.push_back({im->rows, im->cols});
  115. data->reshape_order_.push_back("resize");
  116. int origin_w = im->cols;
  117. int origin_h = im->rows;
  118. int im_size_max = std::max(origin_w, origin_h);
  119. float scale =
  120. static_cast<float>(long_size_) / static_cast<float>(im_size_max);
  121. cv::resize(*im, *im, cv::Size(), scale, scale, cv::INTER_NEAREST);
  122. data->new_im_size_[0] = im->rows;
  123. data->new_im_size_[1] = im->cols;
  124. data->scale = scale;
  125. return true;
  126. }
  127. bool Resize::Run(cv::Mat* im, ImageBlob* data) {
  128. if (width_ <= 0 || height_ <= 0) {
  129. std::cerr << "[Resize] width and height should be greater than 0"
  130. << std::endl;
  131. return false;
  132. }
  133. if (interpolations.count(interp_) <= 0) {
  134. std::cerr << "[Resize] Invalid interpolation method: '" << interp_ << "'"
  135. << std::endl;
  136. return false;
  137. }
  138. data->im_size_before_resize_.push_back({im->rows, im->cols});
  139. data->reshape_order_.push_back("resize");
  140. cv::resize(
  141. *im, *im, cv::Size(width_, height_), 0, 0, interpolations[interp_]);
  142. data->new_im_size_[0] = im->rows;
  143. data->new_im_size_[1] = im->cols;
  144. return true;
  145. }
  146. void Transforms::Init(const YAML::Node& transforms_node, bool to_rgb) {
  147. transforms_.clear();
  148. to_rgb_ = to_rgb;
  149. for (const auto& item : transforms_node) {
  150. std::string name = item.begin()->first.as<std::string>();
  151. std::cout << "trans name: " << name << std::endl;
  152. std::shared_ptr<Transform> transform = CreateTransform(name);
  153. transform->Init(item.begin()->second);
  154. transforms_.push_back(transform);
  155. }
  156. }
  157. std::shared_ptr<Transform> Transforms::CreateTransform(
  158. const std::string& transform_name) {
  159. if (transform_name == "Normalize") {
  160. return std::make_shared<Normalize>();
  161. } else if (transform_name == "ResizeByShort") {
  162. return std::make_shared<ResizeByShort>();
  163. } else if (transform_name == "CenterCrop") {
  164. return std::make_shared<CenterCrop>();
  165. } else if (transform_name == "Resize") {
  166. return std::make_shared<Resize>();
  167. } else if (transform_name == "Padding") {
  168. return std::make_shared<Padding>();
  169. } else if (transform_name == "ResizeByLong") {
  170. return std::make_shared<ResizeByLong>();
  171. } else {
  172. std::cerr << "There's unexpected transform(name='" << transform_name
  173. << "')." << std::endl;
  174. exit(-1);
  175. }
  176. }
  177. bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
  178. // 按照transforms中预处理算子顺序处理图像
  179. if (to_rgb_) {
  180. cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
  181. }
  182. (*im).convertTo(*im, CV_32FC3);
  183. data->ori_im_size_[0] = im->rows;
  184. data->ori_im_size_[1] = im->cols;
  185. data->new_im_size_[0] = im->rows;
  186. data->new_im_size_[1] = im->cols;
  187. for (int i = 0; i < transforms_.size(); ++i) {
  188. transforms_[i]->SetPaddingSize(max_h_, max_w_);
  189. if (!transforms_[i]->Run(im, data)) {
  190. std::cerr << "Apply transforms to image failed!" << std::endl;
  191. return false;
  192. }
  193. }
  194. // 将图像由NHWC转为NCHW格式
  195. // 同时转为连续的内存块存储到ImageBlob
  196. int h = im->rows;
  197. int w = im->cols;
  198. int c = im->channels();
  199. (data->im_data_).resize(c * h * w);
  200. float* ptr = (data->im_data_).data();
  201. for (int i = 0; i < c; ++i) {
  202. cv::extractChannel(*im, cv::Mat(h, w, CV_32FC1, ptr + i * h * w), i);
  203. }
  204. return true;
  205. }
  206. void Transforms::SetPaddingSize(int max_h, int max_w) {
  207. max_h_ = max_h;
  208. max_w_ = max_w;
  209. }
  210. } // namespace PaddleX