face_align.cc 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // reference:
  15. // https://github.com/deepinsight/insightface/blob/master/recognition/_tools_/cpp_align/face_align.h
  16. #include "ultra_infer/vision/utils/utils.h"
  17. namespace ultra_infer {
  18. namespace vision {
  19. namespace utils {
  20. cv::Mat MeanAxis0(const cv::Mat &src) {
  21. int num = src.rows;
  22. int dim = src.cols;
  23. cv::Mat output(1, dim, CV_32F);
  24. for (int i = 0; i < dim; i++) {
  25. float sum = 0;
  26. for (int j = 0; j < num; j++) {
  27. sum += src.at<float>(j, i);
  28. }
  29. output.at<float>(0, i) = sum / num;
  30. }
  31. return output;
  32. }
  33. cv::Mat ElementwiseMinus(const cv::Mat &A, const cv::Mat &B) {
  34. cv::Mat output(A.rows, A.cols, A.type());
  35. assert(B.cols == A.cols);
  36. if (B.cols == A.cols) {
  37. for (int i = 0; i < A.rows; i++) {
  38. for (int j = 0; j < B.cols; j++) {
  39. output.at<float>(i, j) = A.at<float>(i, j) - B.at<float>(0, j);
  40. }
  41. }
  42. }
  43. return output;
  44. }
  45. cv::Mat VarAxis0(const cv::Mat &src) {
  46. cv::Mat temp_ = ElementwiseMinus(src, MeanAxis0(src));
  47. cv::multiply(temp_, temp_, temp_);
  48. return MeanAxis0(temp_);
  49. }
  50. int MatrixRank(cv::Mat M) {
  51. cv::Mat w, u, vt;
  52. cv::SVD::compute(M, w, u, vt);
  53. cv::Mat1b non_zero_singular_values = w > 0.0001;
  54. int rank = countNonZero(non_zero_singular_values);
  55. return rank;
  56. }
  57. cv::Mat SimilarTransform(cv::Mat &dst, cv::Mat &src) {
  58. int num = dst.rows;
  59. int dim = dst.cols;
  60. cv::Mat src_mean = MeanAxis0(dst);
  61. cv::Mat dst_mean = MeanAxis0(src);
  62. cv::Mat src_demean = ElementwiseMinus(dst, src_mean);
  63. cv::Mat dst_demean = ElementwiseMinus(src, dst_mean);
  64. cv::Mat A = (dst_demean.t() * src_demean) / static_cast<float>(num);
  65. cv::Mat d(dim, 1, CV_32F);
  66. d.setTo(1.0f);
  67. if (cv::determinant(A) < 0) {
  68. d.at<float>(dim - 1, 0) = -1;
  69. }
  70. cv::Mat T = cv::Mat::eye(dim + 1, dim + 1, CV_32F);
  71. cv::Mat U, S, V;
  72. cv::SVD::compute(A, S, U, V);
  73. int rank = MatrixRank(A);
  74. if (rank == 0) {
  75. assert(rank == 0);
  76. } else if (rank == dim - 1) {
  77. if (cv::determinant(U) * cv::determinant(V) > 0) {
  78. T.rowRange(0, dim).colRange(0, dim) = U * V;
  79. } else {
  80. int s = d.at<float>(dim - 1, 0) = -1;
  81. d.at<float>(dim - 1, 0) = -1;
  82. T.rowRange(0, dim).colRange(0, dim) = U * V;
  83. cv::Mat diag_ = cv::Mat::diag(d);
  84. cv::Mat twp = diag_ * V; // np.dot(np.diag(d), V.T)
  85. cv::Mat B = cv::Mat::zeros(3, 3, CV_8UC1);
  86. cv::Mat C = B.diag(0);
  87. T.rowRange(0, dim).colRange(0, dim) = U * twp;
  88. d.at<float>(dim - 1, 0) = s;
  89. }
  90. } else {
  91. cv::Mat diag_ = cv::Mat::diag(d);
  92. cv::Mat twp = diag_ * V.t(); // np.dot(np.diag(d), V.T)
  93. cv::Mat res = U * twp; // U
  94. T.rowRange(0, dim).colRange(0, dim) = -U.t() * twp;
  95. }
  96. cv::Mat var_ = VarAxis0(src_demean);
  97. float val = cv::sum(var_).val[0];
  98. cv::Mat res;
  99. cv::multiply(d, S, res);
  100. float scale = 1.0 / val * cv::sum(res).val[0];
  101. T.rowRange(0, dim).colRange(0, dim) =
  102. -T.rowRange(0, dim).colRange(0, dim).t();
  103. cv::Mat temp1 = T.rowRange(0, dim).colRange(0, dim); // T[:dim, :dim]
  104. cv::Mat temp2 = src_mean.t(); // src_mean.T
  105. cv::Mat temp3 = temp1 * temp2; // np.dot(T[:dim, :dim], src_mean.T)
  106. cv::Mat temp4 = scale * temp3;
  107. T.rowRange(0, dim).colRange(dim, dim + 1) = -(temp4 - dst_mean.t());
  108. T.rowRange(0, dim).colRange(0, dim) *= scale;
  109. return T;
  110. }
  111. std::vector<cv::Mat>
  112. AlignFaceWithFivePoints(cv::Mat &image, FaceDetectionResult &result,
  113. std::vector<std::array<float, 2>> std_landmarks,
  114. std::array<int, 2> output_size) {
  115. FDASSERT(std_landmarks.size() == 5, "The landmarks.size() must be 5.")
  116. FDASSERT(!image.empty(), "The input_image can't be empty.")
  117. std::vector<cv::Mat> output_images;
  118. output_images.reserve(result.scores.size());
  119. if (result.boxes.empty()) {
  120. FDWARNING << "The result is empty." << std::endl;
  121. return output_images;
  122. }
  123. cv::Mat src(5, 2, CV_32FC1, std_landmarks.data());
  124. for (int i = 0; i < result.landmarks.size(); i += 5) {
  125. cv::Mat dst(5, 2, CV_32FC1, result.landmarks.data() + i);
  126. cv::Mat m = SimilarTransform(dst, src);
  127. cv::Mat map_matrix;
  128. cv::Rect map_matrix_r = cv::Rect(0, 0, 3, 2);
  129. cv::Mat(m, map_matrix_r).copyTo(map_matrix);
  130. cv::Mat cropped_image_aligned;
  131. cv::warpAffine(image, cropped_image_aligned, map_matrix,
  132. {output_size[0], output_size[1]});
  133. if (cropped_image_aligned.empty()) {
  134. FDWARNING << "croppedImageAligned is empty." << std::endl;
  135. }
  136. output_images.emplace_back(cropped_image_aligned);
  137. }
  138. return output_images;
  139. }
  140. } // namespace utils
  141. } // namespace vision
  142. } // namespace ultra_infer