transforms.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "include/paddlex/transforms.h"
  15. #include <math.h>
  16. #include <iostream>
  17. #include <fstream>
  18. #include <string>
  19. #include <vector>
  20. namespace PaddleX {
  21. std::map<std::string, int> interpolations = {{"LINEAR", cv::INTER_LINEAR},
  22. {"NEAREST", cv::INTER_NEAREST},
  23. {"AREA", cv::INTER_AREA},
  24. {"CUBIC", cv::INTER_CUBIC},
  25. {"LANCZOS4", cv::INTER_LANCZOS4}};
  26. bool Normalize::Run(cv::Mat* im, ImageBlob* data) {
  27. std::vector<float> range_val;
  28. for (int c = 0; c < im->channels(); c++) {
  29. range_val.push_back(max_val_[c] - min_val_[c]);
  30. }
  31. std::vector<cv::Mat> split_im;
  32. cv::split(*im, split_im);
  33. #pragma omp parallel for num_threads(im->channels())
  34. for (int c = 0; c < im->channels(); c++) {
  35. float range_val = max_val_[c] - min_val_[c];
  36. cv::subtract(split_im[c], cv::Scalar(min_val_[c]), split_im[c]);
  37. cv::divide(split_im[c], cv::Scalar(range_val), split_im[c]);
  38. cv::subtract(split_im[c], cv::Scalar(mean_[c]), split_im[c]);
  39. cv::divide(split_im[c], cv::Scalar(std_[c]), split_im[c]);
  40. }
  41. cv::merge(split_im, *im);
  42. return true;
  43. }
  44. float ResizeByShort::GenerateScale(const cv::Mat& im) {
  45. int origin_w = im.cols;
  46. int origin_h = im.rows;
  47. int im_size_max = std::max(origin_w, origin_h);
  48. int im_size_min = std::min(origin_w, origin_h);
  49. float scale =
  50. static_cast<float>(short_size_) / static_cast<float>(im_size_min);
  51. if (max_size_ > 0) {
  52. if (round(scale * im_size_max) > max_size_) {
  53. scale = static_cast<float>(max_size_) / static_cast<float>(im_size_max);
  54. }
  55. }
  56. return scale;
  57. }
  58. bool ResizeByShort::Run(cv::Mat* im, ImageBlob* data) {
  59. data->im_size_before_resize_.push_back({im->rows, im->cols});
  60. data->reshape_order_.push_back("resize");
  61. float scale = GenerateScale(*im);
  62. int width = static_cast<int>(round(scale * im->cols));
  63. int height = static_cast<int>(round(scale * im->rows));
  64. cv::resize(*im, *im, cv::Size(width, height), 0, 0, cv::INTER_LINEAR);
  65. data->new_im_size_[0] = im->rows;
  66. data->new_im_size_[1] = im->cols;
  67. data->scale = scale;
  68. return true;
  69. }
  70. bool CenterCrop::Run(cv::Mat* im, ImageBlob* data) {
  71. int height = static_cast<int>(im->rows);
  72. int width = static_cast<int>(im->cols);
  73. if (height < height_ || width < width_) {
  74. std::cerr << "[CenterCrop] Image size less than crop size" << std::endl;
  75. return false;
  76. }
  77. int offset_x = static_cast<int>((width - width_) / 2);
  78. int offset_y = static_cast<int>((height - height_) / 2);
  79. cv::Rect crop_roi(offset_x, offset_y, width_, height_);
  80. *im = (*im)(crop_roi);
  81. data->new_im_size_[0] = im->rows;
  82. data->new_im_size_[1] = im->cols;
  83. return true;
  84. }
  85. void Padding::GeneralPadding(cv::Mat* im,
  86. const std::vector<float> &padding_val,
  87. int padding_w, int padding_h) {
  88. cv::Scalar value;
  89. if (im->channels() == 1) {
  90. value = cv::Scalar(padding_val[0]);
  91. } else if (im->channels() == 2) {
  92. value = cv::Scalar(padding_val[0], padding_val[1]);
  93. } else if (im->channels() == 3) {
  94. value = cv::Scalar(padding_val[0], padding_val[1], padding_val[2]);
  95. } else if (im->channels() == 4) {
  96. value = cv::Scalar(padding_val[0], padding_val[1], padding_val[2],
  97. padding_val[3]);
  98. }
  99. cv::copyMakeBorder(
  100. *im,
  101. *im,
  102. 0,
  103. padding_h,
  104. 0,
  105. padding_w,
  106. cv::BORDER_CONSTANT,
  107. value);
  108. }
  109. void Padding::MultichannelPadding(cv::Mat* im,
  110. const std::vector<float> &padding_val,
  111. int padding_w, int padding_h) {
  112. std::vector<cv::Mat> padded_im_per_channel(im->channels());
  113. #pragma omp parallel for num_threads(im->channels())
  114. for (size_t i = 0; i < im->channels(); i++) {
  115. const cv::Mat per_channel = cv::Mat(im->rows + padding_h,
  116. im->cols + padding_w,
  117. CV_32FC1,
  118. cv::Scalar(padding_val[i]));
  119. padded_im_per_channel[i] = per_channel;
  120. }
  121. cv::Mat padded_im;
  122. cv::merge(padded_im_per_channel, padded_im);
  123. cv::Rect im_roi = cv::Rect(0, 0, im->cols, im->rows);
  124. im->copyTo(padded_im(im_roi));
  125. *im = padded_im;
  126. }
  127. bool Padding::Run(cv::Mat* im, ImageBlob* data) {
  128. data->im_size_before_resize_.push_back({im->rows, im->cols});
  129. data->reshape_order_.push_back("padding");
  130. int padding_w = 0;
  131. int padding_h = 0;
  132. if (width_ > 1 & height_ > 1) {
  133. padding_w = width_ - im->cols;
  134. padding_h = height_ - im->rows;
  135. } else if (coarsest_stride_ >= 1) {
  136. int h = im->rows;
  137. int w = im->cols;
  138. padding_h =
  139. ceil(h * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
  140. padding_w =
  141. ceil(w * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
  142. }
  143. if (padding_h < 0 || padding_w < 0) {
  144. std::cerr << "[Padding] Computed padding_h=" << padding_h
  145. << ", padding_w=" << padding_w
  146. << ", but they should be greater than 0." << std::endl;
  147. return false;
  148. }
  149. if (im->channels() < 5) {
  150. Padding::GeneralPadding(im, im_value_, padding_w, padding_h);
  151. } else {
  152. Padding::MultichannelPadding(im, im_value_, padding_w, padding_h);
  153. }
  154. data->new_im_size_[0] = im->rows;
  155. data->new_im_size_[1] = im->cols;
  156. return true;
  157. }
  158. bool ResizeByLong::Run(cv::Mat* im, ImageBlob* data) {
  159. if (long_size_ <= 0) {
  160. std::cerr << "[ResizeByLong] long_size should be greater than 0"
  161. << std::endl;
  162. return false;
  163. }
  164. data->im_size_before_resize_.push_back({im->rows, im->cols});
  165. data->reshape_order_.push_back("resize");
  166. int origin_w = im->cols;
  167. int origin_h = im->rows;
  168. int im_size_max = std::max(origin_w, origin_h);
  169. float scale =
  170. static_cast<float>(long_size_) / static_cast<float>(im_size_max);
  171. cv::resize(*im, *im, cv::Size(), scale, scale, cv::INTER_NEAREST);
  172. data->new_im_size_[0] = im->rows;
  173. data->new_im_size_[1] = im->cols;
  174. data->scale = scale;
  175. return true;
  176. }
  177. bool Resize::Run(cv::Mat* im, ImageBlob* data) {
  178. if (width_ <= 0 || height_ <= 0) {
  179. std::cerr << "[Resize] width and height should be greater than 0"
  180. << std::endl;
  181. return false;
  182. }
  183. if (interpolations.count(interp_) <= 0) {
  184. std::cerr << "[Resize] Invalid interpolation method: '" << interp_ << "'"
  185. << std::endl;
  186. return false;
  187. }
  188. data->im_size_before_resize_.push_back({im->rows, im->cols});
  189. data->reshape_order_.push_back("resize");
  190. cv::resize(
  191. *im, *im, cv::Size(width_, height_), 0, 0, interpolations[interp_]);
  192. data->new_im_size_[0] = im->rows;
  193. data->new_im_size_[1] = im->cols;
  194. return true;
  195. }
  196. bool Clip::Run(cv::Mat* im, ImageBlob* data) {
  197. std::vector<cv::Mat> split_im;
  198. cv::split(*im, split_im);
  199. for (int c = 0; c < im->channels(); c++) {
  200. cv::threshold(split_im[c], split_im[c], max_val_[c], max_val_[c],
  201. cv::THRESH_TRUNC);
  202. cv::subtract(cv::Scalar(0), split_im[c], split_im[c]);
  203. cv::threshold(split_im[c], split_im[c], min_val_[c], min_val_[c],
  204. cv::THRESH_TRUNC);
  205. cv::divide(split_im[c], cv::Scalar(-1), split_im[c]);
  206. }
  207. cv::merge(split_im, *im);
  208. return true;
  209. }
  210. void Transforms::Init(
  211. const YAML::Node& transforms_node, std::string type, bool to_rgb) {
  212. transforms_.clear();
  213. to_rgb_ = to_rgb;
  214. type_ = type;
  215. for (const auto& item : transforms_node) {
  216. std::string name = item.begin()->first.as<std::string>();
  217. if (name == "ArrangeClassifier") {
  218. continue;
  219. }
  220. if (name == "ArrangeSegmenter") {
  221. continue;
  222. }
  223. if (name == "ArrangeFasterRCNN") {
  224. continue;
  225. }
  226. if (name == "ArrangeMaskRCNN") {
  227. continue;
  228. }
  229. if (name == "ArrangeYOLOv3") {
  230. continue;
  231. }
  232. std::shared_ptr<Transform> transform = CreateTransform(name);
  233. transform->Init(item.begin()->second);
  234. transforms_.push_back(transform);
  235. }
  236. }
  237. std::shared_ptr<Transform> Transforms::CreateTransform(
  238. const std::string& transform_name) {
  239. if (transform_name == "Normalize") {
  240. return std::make_shared<Normalize>();
  241. } else if (transform_name == "ResizeByShort") {
  242. return std::make_shared<ResizeByShort>();
  243. } else if (transform_name == "CenterCrop") {
  244. return std::make_shared<CenterCrop>();
  245. } else if (transform_name == "Resize") {
  246. return std::make_shared<Resize>();
  247. } else if (transform_name == "Padding") {
  248. return std::make_shared<Padding>();
  249. } else if (transform_name == "ResizeByLong") {
  250. return std::make_shared<ResizeByLong>();
  251. } else if (transform_name == "Clip") {
  252. return std::make_shared<Clip>();
  253. } else {
  254. std::cerr << "There's unexpected transform(name='" << transform_name
  255. << "')." << std::endl;
  256. exit(-1);
  257. }
  258. }
  259. bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
  260. // preprocess by order
  261. if (to_rgb_) {
  262. cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
  263. }
  264. (*im).convertTo(*im, CV_32FC(im->channels()));
  265. if (type_ == "detector") {
  266. InferenceEngine::LockedMemory<void> input2Mapped =
  267. InferenceEngine::as<InferenceEngine::MemoryBlob>(
  268. data->ori_im_size_)->wmap();
  269. float *p = input2Mapped.as<float*>();
  270. p[0] = im->rows;
  271. p[1] = im->cols;
  272. }
  273. data->new_im_size_[0] = im->rows;
  274. data->new_im_size_[1] = im->cols;
  275. for (int i = 0; i < transforms_.size(); ++i) {
  276. if (!transforms_[i]->Run(im, data)) {
  277. std::cerr << "Apply transforms to image failed!" << std::endl;
  278. return false;
  279. }
  280. }
  281. // image format NHWC to NCHW
  282. // img data save to ImageBlob
  283. InferenceEngine::SizeVector blobSize = data->blob->getTensorDesc().getDims();
  284. const size_t width = blobSize[3];
  285. const size_t height = blobSize[2];
  286. const size_t channels = blobSize[1];
  287. InferenceEngine::MemoryBlob::Ptr mblob =
  288. InferenceEngine::as<InferenceEngine::MemoryBlob>(data->blob);
  289. auto mblobHolder = mblob->wmap();
  290. float *blob_data = mblobHolder.as<float *>();
  291. if (channels == 3) {
  292. for (size_t c = 0; c < channels; c++) {
  293. for (size_t h = 0; h < height; h++) {
  294. for (size_t w = 0; w < width; w++) {
  295. blob_data[c * width * height + h * width + w] =
  296. im->at<cv::Vec3f>(h, w)[c];
  297. }
  298. }
  299. }
  300. } else {
  301. for (size_t h = 0; h < height; h++) {
  302. float *pixelPtr = im->ptr<float>(h);
  303. for (size_t w = 0; w < width; w++) {
  304. for (size_t c = 0; c < channels; c++) {
  305. blob_data[c * width * height + h * width + w] =
  306. pixelPtr[w*channels + c];
  307. }
  308. }
  309. }
  310. }
  311. return true;
  312. }
  313. } // namespace PaddleX