transforms.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "model_deploy/common/include/transforms.h"
  15. #include <math.h>
  16. #include <iostream>
  17. #include <string>
  18. #include <vector>
  19. namespace PaddleDeploy {
  20. bool Normalize::Run(cv::Mat *im) {
  21. std::vector<cv::Mat> split_im;
  22. cv::split(*im, split_im);
  23. for (int c = 0; c < im->channels(); c++) {
  24. split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
  25. }
  26. cv::merge(split_im, *im);
  27. return true;
  28. }
  29. bool Normalize::ShapeInfer(
  30. const std::vector<int>& in_shape,
  31. std::vector<int>* out_shape) {
  32. out_shape->clear();
  33. out_shape->assign(in_shape.begin(), in_shape.end());
  34. return true;
  35. }
  36. double ResizeByShort::GenerateScale(const int origin_w, const int origin_h) {
  37. int im_size_max = std::max(origin_w, origin_h);
  38. int im_size_min = std::min(origin_w, origin_h);
  39. double scale =
  40. static_cast<double>(target_size_) / static_cast<double>(im_size_min);
  41. if (max_size_ > 0) {
  42. if (round(scale * im_size_max) > max_size_) {
  43. scale = static_cast<double>(max_size_) / static_cast<double>(im_size_max);
  44. }
  45. }
  46. return scale;
  47. }
  48. bool ResizeByShort::Run(cv::Mat *im) {
  49. int origin_w = im->cols;
  50. int origin_h = im->rows;
  51. double scale = GenerateScale(origin_w, origin_h);
  52. if (use_scale_) {
  53. cv::resize(*im, *im, cv::Size(), scale, scale, interp_);
  54. } else {
  55. int width = static_cast<int>(round(scale * im->cols));
  56. int height = static_cast<int>(round(scale * im->rows));
  57. cv::resize(*im, *im, cv::Size(width, height), 0, 0, interp_);
  58. }
  59. return true;
  60. }
  61. bool ResizeByShort::ShapeInfer(
  62. const std::vector<int>& in_shape,
  63. std::vector<int>* out_shape) {
  64. double scale = GenerateScale(in_shape[0], in_shape[1]);
  65. int width = static_cast<int>(round(scale * in_shape[0]));
  66. int height = static_cast<int>(round(scale * in_shape[1]));
  67. out_shape->clear();
  68. out_shape->push_back(width);
  69. out_shape->push_back(height);
  70. return true;
  71. }
  72. double ResizeByLong::GenerateScale(const int origin_w, const int origin_h) {
  73. int im_size_max = std::max(origin_w, origin_h);
  74. int im_size_min = std::min(origin_w, origin_h);
  75. double scale = 1.0f;
  76. if (target_size_ == -1) {
  77. if (im_size_max > max_size_) {
  78. scale = static_cast<double>(max_size_) /
  79. static_cast<double>(im_size_max);
  80. }
  81. } else {
  82. scale = static_cast<double>(target_size_) /
  83. static_cast<double>(im_size_max);
  84. }
  85. return scale;
  86. }
  87. bool ResizeByLong::Run(cv::Mat *im) {
  88. int origin_w = im->cols;
  89. int origin_h = im->rows;
  90. double scale = GenerateScale(origin_w, origin_h);
  91. int width = static_cast<int>(round(scale * im->cols));
  92. int height = static_cast<int>(round(scale * im->rows));
  93. if (stride_ != 0) {
  94. if (width / stride_ < 1 + 1e-5) {
  95. width = stride_;
  96. } else {
  97. width = (width / 32) * 32;
  98. }
  99. if (height / stride_ < 1 + 1e-5) {
  100. height = stride_;
  101. } else {
  102. height = (height / 32) * 32;
  103. }
  104. }
  105. cv::resize(*im, *im, cv::Size(width, height), 0, 0, interp_);
  106. return true;
  107. }
  108. bool ResizeByLong::ShapeInfer(
  109. const std::vector<int>& in_shape,
  110. std::vector<int>* out_shape) {
  111. double scale = GenerateScale(in_shape[0], in_shape[1]);
  112. int width = static_cast<int>(round(scale * in_shape[0]));
  113. int height = static_cast<int>(round(scale * in_shape[1]));
  114. if (stride_ != 0) {
  115. if (width / stride_ < 1 + 1e-5) {
  116. width = stride_;
  117. } else {
  118. width = (width / 32) * 32;
  119. }
  120. if (height / stride_ < 1 + 1e-5) {
  121. height = stride_;
  122. } else {
  123. height = (height / 32) * 32;
  124. }
  125. }
  126. out_shape->clear();
  127. out_shape->push_back(width);
  128. out_shape->push_back(height);
  129. return true;
  130. }
  131. bool Resize::Run(cv::Mat *im) {
  132. if (width_ <= 0 || height_ <= 0) {
  133. std::cerr << "[Resize] width and height should be greater than 0"
  134. << std::endl;
  135. return false;
  136. }
  137. if (use_scale_) {
  138. double scale_w = width_ / static_cast<double>(im->cols);
  139. double scale_h = height_ / static_cast<double>(im->rows);
  140. cv::resize(*im, *im, cv::Size(), scale_w, scale_h, interp_);
  141. } else {
  142. cv::resize(*im, *im, cv::Size(width_, height_), 0, 0, interp_);
  143. }
  144. return true;
  145. }
  146. bool Resize::ShapeInfer(
  147. const std::vector<int>& in_shape,
  148. std::vector<int>* out_shape) {
  149. out_shape->clear();
  150. out_shape->push_back(width_);
  151. out_shape->push_back(height_);
  152. return true;
  153. }
  154. bool CenterCrop::Run(cv::Mat *im) {
  155. int height = static_cast<int>(im->rows);
  156. int width = static_cast<int>(im->cols);
  157. if (height < height_ || width < width_) {
  158. std::cerr << "[CenterCrop] Image size less than crop size" << std::endl;
  159. return false;
  160. }
  161. int offset_x = static_cast<int>((width - width_) / 2);
  162. int offset_y = static_cast<int>((height - height_) / 2);
  163. cv::Rect crop_roi(offset_x, offset_y, width_, height_);
  164. *im = (*im)(crop_roi);
  165. return true;
  166. }
  167. bool CenterCrop::ShapeInfer(
  168. const std::vector<int>& in_shape,
  169. std::vector<int>* out_shape) {
  170. out_shape->clear();
  171. out_shape->push_back(width_);
  172. out_shape->push_back(height_);
  173. return true;
  174. }
  175. void Padding::GeneralPadding(cv::Mat *im,
  176. const std::vector<float> &padding_val,
  177. int padding_w, int padding_h) {
  178. cv::Scalar value;
  179. if (im->channels() == 1) {
  180. value = cv::Scalar(padding_val[0]);
  181. } else if (im->channels() == 2) {
  182. value = cv::Scalar(padding_val[0], padding_val[1]);
  183. } else if (im->channels() == 3) {
  184. value = cv::Scalar(padding_val[0], padding_val[1], padding_val[2]);
  185. } else if (im->channels() == 4) {
  186. value = cv::Scalar(padding_val[0], padding_val[1], padding_val[2],
  187. padding_val[3]);
  188. }
  189. cv::copyMakeBorder(
  190. *im,
  191. *im,
  192. 0,
  193. padding_h,
  194. 0,
  195. padding_w,
  196. cv::BORDER_CONSTANT,
  197. value);
  198. }
  199. void Padding::MultichannelPadding(cv::Mat *im,
  200. const std::vector<float> &padding_val,
  201. int padding_w, int padding_h) {
  202. std::vector<cv::Mat> padded_im_per_channel(im->channels());
  203. for (size_t i = 0; i < im->channels(); i++) {
  204. const cv::Mat per_channel = cv::Mat(im->rows + padding_h,
  205. im->cols + padding_w,
  206. CV_32FC1,
  207. cv::Scalar(padding_val[i]));
  208. padded_im_per_channel[i] = per_channel;
  209. }
  210. cv::Mat padded_im;
  211. cv::merge(padded_im_per_channel, padded_im);
  212. cv::Rect im_roi = cv::Rect(0, 0, im->cols, im->rows);
  213. im->copyTo(padded_im(im_roi));
  214. *im = padded_im;
  215. }
  216. bool Padding::Run(cv::Mat *im) {
  217. int padding_w = 0;
  218. int padding_h = 0;
  219. if (width_ > 1 & height_ > 1) {
  220. padding_w = width_ - im->cols;
  221. padding_h = height_ - im->rows;
  222. } else if (stride_ >= 1) {
  223. int h = im->rows;
  224. int w = im->cols;
  225. padding_h =
  226. ceil(h * 1.0 / stride_) * stride_ - im->rows;
  227. padding_w =
  228. ceil(w * 1.0 / stride_) * stride_ - im->cols;
  229. }
  230. if (padding_h < 0 || padding_w < 0) {
  231. std::cerr << "[Padding] Computed padding_h=" << padding_h
  232. << ", padding_w=" << padding_w
  233. << ", but they should be greater than 0." << std::endl;
  234. return false;
  235. }
  236. if (im->channels() < 5) {
  237. Padding::GeneralPadding(&*im, im_value_, padding_w, padding_h);
  238. } else {
  239. Padding::MultichannelPadding(
  240. &*im,
  241. im_value_,
  242. padding_w,
  243. padding_h);
  244. }
  245. return true;
  246. }
  247. bool Padding::Run(cv::Mat *im, int max_w, int max_h) {
  248. int padding_w = 0;
  249. int padding_h = 0;
  250. if ((max_w - im->cols) > 0 || (max_h - im->rows) > 0) {
  251. padding_w = max_w - im->cols;
  252. padding_h = max_h - im->rows;
  253. cv::Scalar value = cv::Scalar(0, 0, 0);
  254. cv::copyMakeBorder(
  255. *im,
  256. *im,
  257. 0,
  258. padding_h,
  259. 0,
  260. padding_w,
  261. cv::BORDER_CONSTANT,
  262. value);
  263. }
  264. return true;
  265. }
  266. bool Padding::ShapeInfer(
  267. const std::vector<int>& in_shape,
  268. std::vector<int>* out_shape) {
  269. int new_w = 0;
  270. int new_h = 0;
  271. if (width_ > 1 & height_ > 1) {
  272. new_w = width_;
  273. new_h = height_;
  274. } else {
  275. int w = in_shape[0];
  276. int h = in_shape[1];
  277. new_w = ceil(w * 1.0 / stride_) * stride_;
  278. new_h = ceil(h * 1.0 / stride_) * stride_;
  279. }
  280. assert(new_w >= in_shape[0] && new_h >= in_shape[1]);
  281. out_shape->clear();
  282. out_shape->push_back(new_w);
  283. out_shape->push_back(new_h);
  284. return true;
  285. }
  286. bool Clip::Run(cv::Mat *im) {
  287. std::vector<cv::Mat> split_im;
  288. cv::split(*im, split_im);
  289. for (int c = 0; c < im->channels(); c++) {
  290. cv::threshold(split_im[c], split_im[c], max_val_[c], max_val_[c],
  291. cv::THRESH_TRUNC);
  292. cv::subtract(cv::Scalar(0), split_im[c], split_im[c]);
  293. cv::threshold(split_im[c], split_im[c], min_val_[c], min_val_[c],
  294. cv::THRESH_TRUNC);
  295. cv::divide(split_im[c], cv::Scalar(-1), split_im[c]);
  296. }
  297. cv::merge(split_im, *im);
  298. return true;
  299. }
  300. bool Clip::ShapeInfer(
  301. const std::vector<int>& in_shape,
  302. std::vector<int>* out_shape) {
  303. out_shape->clear();
  304. out_shape->assign(in_shape.begin(), in_shape.end());
  305. return true;
  306. }
  307. bool BGR2RGB::Run(cv::Mat *im) {
  308. cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
  309. return true;
  310. }
  311. bool BGR2RGB::ShapeInfer(
  312. const std::vector<int>& in_shape,
  313. std::vector<int>* out_shape) {
  314. out_shape->clear();
  315. out_shape->assign(in_shape.begin(), in_shape.end());
  316. return true;
  317. }
  318. bool RGB2BGR::Run(cv::Mat *im) {
  319. cv::cvtColor(*im, *im, cv::COLOR_RGB2BGR);
  320. return true;
  321. }
  322. bool RGB2BGR::ShapeInfer(
  323. const std::vector<int>& in_shape,
  324. std::vector<int>* out_shape) {
  325. out_shape->clear();
  326. out_shape->assign(in_shape.begin(), in_shape.end());
  327. return true;
  328. }
  329. bool Permute::Run(cv::Mat *im) {
  330. cv::Mat im_clone = (*im).clone();
  331. int rh = im_clone.rows;
  332. int rw = im_clone.cols;
  333. int rc = im_clone.channels();
  334. float *data = reinterpret_cast<float *>(im->data);
  335. for (int i = 0; i < rc; ++i) {
  336. cv::extractChannel(im_clone,
  337. cv::Mat(rh, rw, CV_32FC1, data + i * rh * rw), i);
  338. }
  339. return true;
  340. }
  341. bool Permute::ShapeInfer(
  342. const std::vector<int>& in_shape,
  343. std::vector<int>* out_shape) {
  344. out_shape->clear();
  345. out_shape->assign(in_shape.begin(), in_shape.end());
  346. return true;
  347. }
  348. bool Convert::Run(cv::Mat *im) {
  349. if (dtype_ == "float") {
  350. im->convertTo(*im, CV_32FC(im->channels()));
  351. }
  352. return true;
  353. }
  354. bool Convert::ShapeInfer(
  355. const std::vector<int>& in_shape,
  356. std::vector<int>* out_shape) {
  357. out_shape->clear();
  358. out_shape->assign(in_shape.begin(), in_shape.end());
  359. return true;
  360. }
  361. int OcrResize::GeneralWidth(int w, int h) {
  362. int resize_w;
  363. float ratio = static_cast<float>(w) / static_cast<float>(h);
  364. if (!fix_width_) {
  365. width_ = static_cast<int>(32 * ratio);
  366. }
  367. if (ceilf(height_ * ratio) > width_) {
  368. resize_w = width_;
  369. } else {
  370. resize_w = static_cast<int>(ceilf(height_ * ratio));
  371. }
  372. return resize_w;
  373. }
  374. bool OcrResize::Run(cv::Mat *im) {
  375. int resize_w = GeneralWidth(im->cols, im->rows);
  376. cv::resize(*im, *im, cv::Size(resize_w, height_), 0.f, 0.f, interp_);
  377. if (resize_w < width_ || is_pad_) {
  378. cv::copyMakeBorder(*im, *im, 0, 0, 0,
  379. static_cast<int>(width_ - resize_w),
  380. cv::BORDER_CONSTANT, value_);
  381. }
  382. return true;
  383. }
  384. bool OcrResize::ShapeInfer(
  385. const std::vector<int>& in_shape,
  386. std::vector<int>* out_shape) {
  387. int resize_w = GeneralWidth(in_shape[0], in_shape[1]);
  388. if (resize_w < width_ || is_pad_) {
  389. resize_w = width_;
  390. }
  391. out_shape->clear();
  392. out_shape->push_back(resize_w);
  393. out_shape->push_back(height_);
  394. return true;
  395. }
  396. bool OcrTrtResize::Run(cv::Mat *im) {
  397. int k = static_cast<int>(im->cols * 32 / im->rows);
  398. if (k >= width_) {
  399. cv::resize(*im, *im, cv::Size(width_, height_), 0.f, 0.f, interp_);
  400. } else {
  401. cv::resize(*im, *im, cv::Size(k, height_),
  402. 0.f, 0.f, cv::INTER_LINEAR);
  403. cv::copyMakeBorder(*im, *im, 0, 0, 0,
  404. static_cast<int>(width_ - k),
  405. cv::BORDER_CONSTANT, {127, 127, 127});
  406. }
  407. return true;
  408. }
  409. bool OcrTrtResize::ShapeInfer(
  410. const std::vector<int>& in_shape,
  411. std::vector<int>* out_shape) {
  412. out_shape->clear();
  413. out_shape->push_back(width_);
  414. out_shape->push_back(height_);
  415. return true;
  416. }
  417. } // namespace PaddleDeploy