transforms.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473
  1. // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "model_deploy/common/include/transforms.h"
  15. #include <math.h>
  16. #include <iostream>
  17. #include <string>
  18. #include <vector>
  19. namespace PaddleDeploy {
  20. bool Normalize::Run(cv::Mat *im) {
  21. std::vector<cv::Mat> split_im;
  22. cv::split(*im, split_im);
  23. for (int c = 0; c < im->channels(); c++) {
  24. split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
  25. }
  26. cv::merge(split_im, *im);
  27. return true;
  28. }
  29. bool Normalize::ShapeInfer(
  30. const std::vector<int>& in_shape,
  31. std::vector<int>* out_shape) {
  32. out_shape->clear();
  33. out_shape->assign(in_shape.begin(), in_shape.end());
  34. return true;
  35. }
  36. double ResizeByShort::GenerateScale(const int origin_w, const int origin_h) {
  37. int im_size_max = std::max(origin_w, origin_h);
  38. int im_size_min = std::min(origin_w, origin_h);
  39. double scale =
  40. static_cast<double>(target_size_) / static_cast<double>(im_size_min);
  41. if (max_size_ > 0) {
  42. if (round(scale * im_size_max) > max_size_) {
  43. scale = static_cast<double>(max_size_) / static_cast<double>(im_size_max);
  44. }
  45. }
  46. return scale;
  47. }
  48. bool ResizeByShort::Run(cv::Mat *im) {
  49. int origin_w = im->cols;
  50. int origin_h = im->rows;
  51. double scale = GenerateScale(origin_w, origin_h);
  52. if (use_scale_) {
  53. cv::resize(*im, *im, cv::Size(), scale, scale, interp_);
  54. } else {
  55. int width = static_cast<int>(round(scale * im->cols));
  56. int height = static_cast<int>(round(scale * im->rows));
  57. cv::resize(*im, *im, cv::Size(width, height), 0, 0, interp_);
  58. }
  59. return true;
  60. }
  61. bool ResizeByShort::ShapeInfer(
  62. const std::vector<int>& in_shape,
  63. std::vector<int>* out_shape) {
  64. double scale = GenerateScale(in_shape[0], in_shape[1]);
  65. int width = static_cast<int>(round(scale * in_shape[0]));
  66. int height = static_cast<int>(round(scale * in_shape[1]));
  67. out_shape->clear();
  68. out_shape->push_back(width);
  69. out_shape->push_back(height);
  70. return true;
  71. }
  72. double ResizeByLong::GenerateScale(const int origin_w, const int origin_h) {
  73. int im_size_max = std::max(origin_w, origin_h);
  74. int im_size_min = std::min(origin_w, origin_h);
  75. double scale = 1.0f;
  76. if (target_size_ == -1) {
  77. if (im_size_max > max_size_) {
  78. scale = static_cast<double>(max_size_) /
  79. static_cast<double>(im_size_max);
  80. }
  81. } else {
  82. scale = static_cast<double>(target_size_) /
  83. static_cast<double>(im_size_max);
  84. }
  85. return scale;
  86. }
  87. bool ResizeByLong::Run(cv::Mat *im) {
  88. int origin_w = im->cols;
  89. int origin_h = im->rows;
  90. double scale = GenerateScale(origin_w, origin_h);
  91. int width = static_cast<int>(round(scale * im->cols));
  92. int height = static_cast<int>(round(scale * im->rows));
  93. if (stride_ != 0) {
  94. if (width / stride_ < 1 + 1e-5) {
  95. width = stride_;
  96. } else {
  97. width = (width / 32) * 32;
  98. }
  99. if (height / stride_ < 1 + 1e-5) {
  100. height = stride_;
  101. } else {
  102. height = (height / 32) * 32;
  103. }
  104. }
  105. cv::resize(*im, *im, cv::Size(width, height), 0, 0, interp_);
  106. return true;
  107. }
  108. bool ResizeByLong::ShapeInfer(
  109. const std::vector<int>& in_shape,
  110. std::vector<int>* out_shape) {
  111. double scale = GenerateScale(in_shape[0], in_shape[1]);
  112. int width = static_cast<int>(round(scale * in_shape[0]));
  113. int height = static_cast<int>(round(scale * in_shape[1]));
  114. if (stride_ != 0) {
  115. if (width / stride_ < 1 + 1e-5) {
  116. width = stride_;
  117. } else {
  118. width = (width / 32) * 32;
  119. }
  120. if (height / stride_ < 1 + 1e-5) {
  121. height = stride_;
  122. } else {
  123. height = (height / 32) * 32;
  124. }
  125. }
  126. out_shape->clear();
  127. out_shape->push_back(width);
  128. out_shape->push_back(height);
  129. return true;
  130. }
  131. bool Resize::Run(cv::Mat *im) {
  132. if (width_ <= 0 || height_ <= 0) {
  133. std::cerr << "[Resize] width and height should be greater than 0"
  134. << std::endl;
  135. return false;
  136. }
  137. double scale_w = width_ / static_cast<double>(im->cols);
  138. double scale_h = height_ / static_cast<double>(im->rows);
  139. if (keep_ratio_) {
  140. scale_h = std::min(scale_w, scale_h);
  141. scale_w = scale_h;
  142. width_ = static_cast<int>(round(scale_w * im->cols));
  143. height_ = static_cast<int>(round(scale_h * im->rows));
  144. }
  145. if (use_scale_) {
  146. cv::resize(*im, *im, cv::Size(), scale_w, scale_h, interp_);
  147. } else {
  148. cv::resize(*im, *im, cv::Size(width_, height_), 0, 0, interp_);
  149. }
  150. return true;
  151. }
  152. bool Resize::ShapeInfer(
  153. const std::vector<int>& in_shape,
  154. std::vector<int>* out_shape) {
  155. out_shape->clear();
  156. double width = width_;
  157. double height = height_;
  158. if (keep_ratio_) {
  159. int w = in_shape[0];
  160. int h = in_shape[1];
  161. double scale_w = width_ / static_cast<double>(w);
  162. double scale_h = height_ / static_cast<double>(h);
  163. scale_h = std::min(scale_w, scale_h);
  164. scale_w = scale_h;
  165. width = static_cast<int>(round(scale_w * w));
  166. height = static_cast<int>(round(scale_h * h));
  167. }
  168. out_shape->push_back(width);
  169. out_shape->push_back(height);
  170. return true;
  171. }
  172. bool CenterCrop::Run(cv::Mat *im) {
  173. int height = static_cast<int>(im->rows);
  174. int width = static_cast<int>(im->cols);
  175. if (height < height_ || width < width_) {
  176. std::cerr << "[CenterCrop] Image size less than crop size" << std::endl;
  177. return false;
  178. }
  179. int offset_x = static_cast<int>((width - width_) / 2);
  180. int offset_y = static_cast<int>((height - height_) / 2);
  181. cv::Rect crop_roi(offset_x, offset_y, width_, height_);
  182. *im = (*im)(crop_roi);
  183. return true;
  184. }
  185. bool CenterCrop::ShapeInfer(
  186. const std::vector<int>& in_shape,
  187. std::vector<int>* out_shape) {
  188. out_shape->clear();
  189. out_shape->push_back(width_);
  190. out_shape->push_back(height_);
  191. return true;
  192. }
  193. void Padding::GeneralPadding(cv::Mat *im,
  194. const std::vector<float> &padding_val,
  195. int padding_w, int padding_h) {
  196. cv::Scalar value;
  197. if (im->channels() == 1) {
  198. value = cv::Scalar(padding_val[0]);
  199. } else if (im->channels() == 2) {
  200. value = cv::Scalar(padding_val[0], padding_val[1]);
  201. } else if (im->channels() == 3) {
  202. value = cv::Scalar(padding_val[0], padding_val[1], padding_val[2]);
  203. } else if (im->channels() == 4) {
  204. value = cv::Scalar(padding_val[0], padding_val[1], padding_val[2],
  205. padding_val[3]);
  206. }
  207. cv::copyMakeBorder(
  208. *im,
  209. *im,
  210. 0,
  211. padding_h,
  212. 0,
  213. padding_w,
  214. cv::BORDER_CONSTANT,
  215. value);
  216. }
  217. void Padding::MultichannelPadding(cv::Mat *im,
  218. const std::vector<float> &padding_val,
  219. int padding_w, int padding_h) {
  220. std::vector<cv::Mat> padded_im_per_channel(im->channels());
  221. for (size_t i = 0; i < im->channels(); i++) {
  222. const cv::Mat per_channel = cv::Mat(im->rows + padding_h,
  223. im->cols + padding_w,
  224. CV_32FC1,
  225. cv::Scalar(padding_val[i]));
  226. padded_im_per_channel[i] = per_channel;
  227. }
  228. cv::Mat padded_im;
  229. cv::merge(padded_im_per_channel, padded_im);
  230. cv::Rect im_roi = cv::Rect(0, 0, im->cols, im->rows);
  231. im->copyTo(padded_im(im_roi));
  232. *im = padded_im;
  233. }
  234. bool Padding::Run(cv::Mat *im) {
  235. int padding_w = 0;
  236. int padding_h = 0;
  237. if (width_ > 1 & height_ > 1) {
  238. padding_w = width_ - im->cols;
  239. padding_h = height_ - im->rows;
  240. } else if (stride_ >= 1) {
  241. int h = im->rows;
  242. int w = im->cols;
  243. padding_h =
  244. ceil(h * 1.0 / stride_) * stride_ - im->rows;
  245. padding_w =
  246. ceil(w * 1.0 / stride_) * stride_ - im->cols;
  247. }
  248. if (padding_h < 0 || padding_w < 0) {
  249. std::cerr << "[Padding] Computed padding_h=" << padding_h
  250. << ", padding_w=" << padding_w
  251. << ", but they should be greater than 0." << std::endl;
  252. return false;
  253. }
  254. if (im->channels() < 5) {
  255. Padding::GeneralPadding(&*im, im_value_, padding_w, padding_h);
  256. } else {
  257. Padding::MultichannelPadding(
  258. &*im,
  259. im_value_,
  260. padding_w,
  261. padding_h);
  262. }
  263. return true;
  264. }
  265. bool Padding::Run(cv::Mat *im, int max_w, int max_h) {
  266. int padding_w = 0;
  267. int padding_h = 0;
  268. if ((max_w - im->cols) > 0 || (max_h - im->rows) > 0) {
  269. padding_w = max_w - im->cols;
  270. padding_h = max_h - im->rows;
  271. cv::Scalar value = cv::Scalar(0, 0, 0);
  272. cv::copyMakeBorder(
  273. *im,
  274. *im,
  275. 0,
  276. padding_h,
  277. 0,
  278. padding_w,
  279. cv::BORDER_CONSTANT,
  280. value);
  281. }
  282. return true;
  283. }
  284. bool Padding::ShapeInfer(
  285. const std::vector<int>& in_shape,
  286. std::vector<int>* out_shape) {
  287. int new_w = 0;
  288. int new_h = 0;
  289. if (width_ > 1 & height_ > 1) {
  290. new_w = width_;
  291. new_h = height_;
  292. } else {
  293. int w = in_shape[0];
  294. int h = in_shape[1];
  295. new_w = ceil(w * 1.0 / stride_) * stride_;
  296. new_h = ceil(h * 1.0 / stride_) * stride_;
  297. }
  298. assert(new_w >= in_shape[0] && new_h >= in_shape[1]);
  299. out_shape->clear();
  300. out_shape->push_back(new_w);
  301. out_shape->push_back(new_h);
  302. return true;
  303. }
  304. bool Clip::Run(cv::Mat *im) {
  305. std::vector<cv::Mat> split_im;
  306. cv::split(*im, split_im);
  307. for (int c = 0; c < im->channels(); c++) {
  308. cv::threshold(split_im[c], split_im[c], max_val_[c], max_val_[c],
  309. cv::THRESH_TRUNC);
  310. cv::subtract(cv::Scalar(0), split_im[c], split_im[c]);
  311. cv::threshold(split_im[c], split_im[c], min_val_[c], min_val_[c],
  312. cv::THRESH_TRUNC);
  313. cv::divide(split_im[c], cv::Scalar(-1), split_im[c]);
  314. }
  315. cv::merge(split_im, *im);
  316. return true;
  317. }
  318. bool Clip::ShapeInfer(
  319. const std::vector<int>& in_shape,
  320. std::vector<int>* out_shape) {
  321. out_shape->clear();
  322. out_shape->assign(in_shape.begin(), in_shape.end());
  323. return true;
  324. }
  325. bool BGR2RGB::Run(cv::Mat *im) {
  326. cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
  327. return true;
  328. }
  329. bool BGR2RGB::ShapeInfer(
  330. const std::vector<int>& in_shape,
  331. std::vector<int>* out_shape) {
  332. out_shape->clear();
  333. out_shape->assign(in_shape.begin(), in_shape.end());
  334. return true;
  335. }
  336. bool RGB2BGR::Run(cv::Mat *im) {
  337. cv::cvtColor(*im, *im, cv::COLOR_RGB2BGR);
  338. return true;
  339. }
  340. bool RGB2BGR::ShapeInfer(
  341. const std::vector<int>& in_shape,
  342. std::vector<int>* out_shape) {
  343. out_shape->clear();
  344. out_shape->assign(in_shape.begin(), in_shape.end());
  345. return true;
  346. }
  347. bool Permute::Run(cv::Mat *im) {
  348. cv::Mat im_clone = (*im).clone();
  349. int rh = im_clone.rows;
  350. int rw = im_clone.cols;
  351. int rc = im_clone.channels();
  352. float *data = reinterpret_cast<float *>(im->data);
  353. for (int i = 0; i < rc; ++i) {
  354. cv::extractChannel(im_clone,
  355. cv::Mat(rh, rw, CV_32FC1, data + i * rh * rw), i);
  356. }
  357. return true;
  358. }
  359. bool Permute::ShapeInfer(
  360. const std::vector<int>& in_shape,
  361. std::vector<int>* out_shape) {
  362. out_shape->clear();
  363. out_shape->assign(in_shape.begin(), in_shape.end());
  364. return true;
  365. }
  366. bool Convert::Run(cv::Mat *im) {
  367. if (dtype_ == "float") {
  368. im->convertTo(*im, CV_32FC(im->channels()));
  369. }
  370. return true;
  371. }
  372. bool Convert::ShapeInfer(
  373. const std::vector<int>& in_shape,
  374. std::vector<int>* out_shape) {
  375. out_shape->clear();
  376. out_shape->assign(in_shape.begin(), in_shape.end());
  377. return true;
  378. }
  379. int OcrResize::GeneralWidth(int w, int h) {
  380. int resize_w;
  381. float ratio = static_cast<float>(w) / static_cast<float>(h);
  382. if (!fix_width_) {
  383. width_ = static_cast<int>(32 * ratio);
  384. }
  385. if (ceilf(height_ * ratio) > width_) {
  386. resize_w = width_;
  387. } else {
  388. resize_w = static_cast<int>(ceilf(height_ * ratio));
  389. }
  390. return resize_w;
  391. }
  392. bool OcrResize::Run(cv::Mat *im) {
  393. int resize_w = GeneralWidth(im->cols, im->rows);
  394. cv::resize(*im, *im, cv::Size(resize_w, height_), 0.f, 0.f, interp_);
  395. if (resize_w < width_ || is_pad_) {
  396. cv::copyMakeBorder(*im, *im, 0, 0, 0,
  397. static_cast<int>(width_ - resize_w),
  398. cv::BORDER_CONSTANT, value_);
  399. }
  400. return true;
  401. }
  402. bool OcrResize::ShapeInfer(
  403. const std::vector<int>& in_shape,
  404. std::vector<int>* out_shape) {
  405. int resize_w = GeneralWidth(in_shape[0], in_shape[1]);
  406. if (resize_w < width_ || is_pad_) {
  407. resize_w = width_;
  408. }
  409. out_shape->clear();
  410. out_shape->push_back(resize_w);
  411. out_shape->push_back(height_);
  412. return true;
  413. }
  414. bool OcrTrtResize::Run(cv::Mat *im) {
  415. int k = static_cast<int>(im->cols * 32 / im->rows);
  416. if (k >= width_) {
  417. cv::resize(*im, *im, cv::Size(width_, height_), 0.f, 0.f, interp_);
  418. } else {
  419. cv::resize(*im, *im, cv::Size(k, height_),
  420. 0.f, 0.f, cv::INTER_LINEAR);
  421. cv::copyMakeBorder(*im, *im, 0, 0, 0,
  422. static_cast<int>(width_ - k),
  423. cv::BORDER_CONSTANT, {127, 127, 127});
  424. }
  425. return true;
  426. }
  427. bool OcrTrtResize::ShapeInfer(
  428. const std::vector<int>& in_shape,
  429. std::vector<int>* out_shape) {
  430. out_shape->clear();
  431. out_shape->push_back(width_);
  432. out_shape->push_back(height_);
  433. return true;
  434. }
  435. } // namespace PaddleDeploy