Prechádzať zdrojové kódy

use predict() in video_infer.py

FlyingQianMM 5 rokov pred
rodič
commit
cd8e9cedd7

+ 109 - 0
examples/human_segmentation/infer.py

@@ -0,0 +1,109 @@
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import os.path as osp
+import cv2
+import numpy as np
+import tqdm
+
+import paddlex as pdx
+from paddlex.seg import transforms
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='HumanSeg prediction and visualization')
+    parser.add_argument(
+        '--model_dir',
+        dest='model_dir',
+        help='Model path for prediction',
+        type=str)
+    parser.add_argument(
+        '--data_dir',
+        dest='data_dir',
+        help='The root directory of dataset',
+        type=str)
+    parser.add_argument(
+        '--test_list',
+        dest='test_list',
+        help='Test list file of dataset',
+        type=str)
+    parser.add_argument(
+        '--save_dir',
+        dest='save_dir',
+        help='The directory for saving the inference results',
+        type=str,
+        default='./output/result')
+    parser.add_argument(
+        "--image_shape",
+        dest="image_shape",
+        help="The image shape for net inputs.",
+        nargs=2,
+        default=[192, 192],
+        type=int)
+    return parser.parse_args()
+
+
+def infer(args):
+    def makedir(path):
+        sub_dir = osp.dirname(path)
+        if not osp.exists(sub_dir):
+            os.makedirs(sub_dir)
+
+    test_transforms = transforms.Compose(
+        [transforms.Resize(args.image_shape), transforms.Normalize()])
+    model = pdx.load_model(args.model_dir)
+    added_saved_path = osp.join(args.save_dir, 'added')
+    mat_saved_path = osp.join(args.save_dir, 'mat')
+    scoremap_saved_path = osp.join(args.save_dir, 'scoremap')
+
+    with open(args.test_list, 'r') as f:
+        files = f.readlines()
+
+    for file in tqdm.tqdm(files):
+        file = file.strip()
+        im_file = osp.join(args.data_dir, file)
+        im = cv2.imread(im_file)
+        result = model.predict(im_file, transforms=test_transforms)
+
+        # save added image
+        added_image = pdx.seg.visualize(
+            im_file, result, weight=0.6, save_dir=None)
+        added_image_file = osp.join(added_saved_path, file)
+        makedir(added_image_file)
+        cv2.imwrite(added_image_file, added_image)
+
+        # save score map
+        score_map = result['score_map'][:, :, 1]
+        score_map = (score_map * 255).astype(np.uint8)
+        score_map_file = osp.join(scoremap_saved_path, file)
+        makedir(score_map_file)
+        cv2.imwrite(score_map_file, score_map)
+
+        # save mat image
+        score_map = np.expand_dims(score_map, axis=-1)
+        mat_image = np.concatenate([im, score_map], axis=2)
+        mat_file = osp.join(mat_saved_path, file)
+        ext = osp.splitext(mat_file)[-1]
+        mat_file = mat_file.replace(ext, '.png')
+        makedir(mat_file)
+        cv2.imwrite(mat_file, mat_image)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    infer(args)

+ 37 - 27
examples/human_segmentation/video_infer.py

@@ -56,27 +56,13 @@ def parse_args():
     return parser.parse_args()
 
 
-def predict(img, model, test_transforms):
-    model.arrange_transforms(transforms=test_transforms, mode='test')
-    img, im_info = test_transforms(img.astype('float32'))
-    img = np.expand_dims(img, axis=0)
-    result = model.exe.run(model.test_prog,
-                           feed={'image': img},
-                           fetch_list=list(model.test_outputs.values()))
-    score_map = result[1]
-    score_map = np.squeeze(score_map, axis=0)
-    score_map = np.transpose(score_map, (1, 2, 0))
-    return score_map, im_info
-
-
 def recover(img, im_info):
-    for info in im_info[::-1]:
-        if info[0] == 'resize':
-            w, h = info[1][1], info[1][0]
-            img = cv2.resize(img, (w, h), cv2.INTER_LINEAR)
-        elif info[0] == 'padding':
-            w, h = info[1][0], info[1][0]
-            img = img[0:h, 0:w, :]
+    if im_info[0] == 'resize':
+        w, h = im_info[1][1], im_info[1][0]
+        img = cv2.resize(img, (w, h), cv2.INTER_LINEAR)
+    elif im_info[0] == 'padding':
+        w, h = im_info[1][0], im_info[1][0]
+        img = img[0:h, 0:w, :]
     return img
 
 
@@ -84,8 +70,7 @@ def video_infer(args):
     resize_h = args.image_shape[1]
     resize_w = args.image_shape[0]
 
-    test_transforms = transforms.Compose(
-        [transforms.Resize((resize_w, resize_h)), transforms.Normalize()])
+    test_transforms = transforms.Compose([transforms.Normalize()])
     model = pdx.load_model(args.model_dir)
     if not args.video_path:
         cap = cv2.VideoCapture(0)
@@ -118,9 +103,21 @@ def video_infer(args):
         while cap.isOpened():
             ret, frame = cap.read()
             if ret:
-                score_map, im_info = predict(frame, model, test_transforms)
-                cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-                cur_gray = cv2.resize(cur_gray, (resize_w, resize_h))
+                im_shape = frame.shape
+                im_scale_x = float(resize_w) / float(im_shape[1])
+                im_scale_y = float(resize_h) / float(im_shape[0])
+                im = cv2.resize(
+                    frame,
+                    None,
+                    None,
+                    fx=im_scale_x,
+                    fy=im_scale_y,
+                    interpolation=cv2.INTER_LINEAR)
+                image = im.astype('float32')
+                im_info = ('resize', im_shape[0:2])
+                pred = model.predict(image)
+                score_map = pred['score_map']
+                cur_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
                 score_map = 255 * score_map[:, :, 1]
                 optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \
                         disflow, is_init)
@@ -146,8 +143,21 @@ def video_infer(args):
         while cap.isOpened():
             ret, frame = cap.read()
             if ret:
-                score_map, im_info = predict(frame, model, test_transforms)
-                cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+                im_shape = frame.shape
+                im_scale_x = float(resize_w) / float(im_shape[1])
+                im_scale_y = float(resize_h) / float(im_shape[0])
+                im = cv2.resize(
+                    frame,
+                    None,
+                    None,
+                    fx=im_scale_x,
+                    fy=im_scale_y,
+                    interpolation=cv2.INTER_LINEAR)
+                image = im.astype('float32')
+                im_info = ('resize', im_shape[0:2])
+                pred = model.predict(image)
+                score_map = pred['score_map']
+                cur_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
                 cur_gray = cv2.resize(cur_gray, (resize_w, resize_h))
                 score_map = 255 * score_map[:, :, 1]
                 optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \