Forráskód Böngészése

add hrnet for classifier and faster rcnn

FlyingQianMM 5 éve
szülő
commit
5d4c75fffb

+ 1 - 0
paddlex/cls.py

@@ -36,5 +36,6 @@ DenseNet121 = cv.models.DenseNet121
 DenseNet161 = cv.models.DenseNet161
 DenseNet201 = cv.models.DenseNet201
 ShuffleNetV2 = cv.models.ShuffleNetV2
+HRNet_W18 = cv.models.HRNet_W18
 
 transforms = cv.transforms.cls_transforms

+ 2 - 0
paddlex/cv/models/__init__.py

@@ -34,11 +34,13 @@ from .classifier import DenseNet121
 from .classifier import DenseNet161
 from .classifier import DenseNet201
 from .classifier import ShuffleNetV2
+from .classifier import HRNet_W18
 from .base import BaseAPI
 from .yolo_v3 import YOLOv3
 from .faster_rcnn import FasterRCNN
 from .mask_rcnn import MaskRCNN
 from .unet import UNet
 from .deeplabv3p import DeepLabv3p
+from .hrnet import HRNet
 from .load_model import load_model
 from .slim import prune

+ 36 - 28
paddlex/cv/models/base.py

@@ -31,6 +31,8 @@ from collections import OrderedDict
 from os import path as osp
 from paddle.fluid.framework import Program
 from .utils.pretrain_weights import get_pretrain_weights
+fluid.default_startup_program().random_seed = 1000
+fluid.default_main_program().random_seed = 1000
 
 
 def dict2str(dict_input):
@@ -79,9 +81,9 @@ class BaseAPI:
             return int(batch_size // len(self.places))
         else:
             raise Exception("Please support correct batch_size, \
-                            which can be divided by available cards({}) in {}".
-                            format(paddlex.env_info['num'],
-                                   paddlex.env_info['place']))
+                            which can be divided by available cards({}) in {}"
+                            .format(paddlex.env_info['num'], paddlex.env_info[
+                                'place']))
 
     def build_program(self):
         # 构建训练网络
@@ -210,8 +212,8 @@ class BaseAPI:
             paddlex.utils.utils.load_pretrain_weights(
                 self.exe, self.train_prog, resume_checkpoint, resume=True)
             if not osp.exists(osp.join(resume_checkpoint, "model.yml")):
-                raise Exception(
-                    "There's not model.yml in {}".format(resume_checkpoint))
+                raise Exception("There's not model.yml in {}".format(
+                    resume_checkpoint))
             with open(osp.join(resume_checkpoint, "model.yml")) as f:
                 info = yaml.load(f.read(), Loader=yaml.Loader)
                 self.completed_epochs = info['completed_epochs']
@@ -269,13 +271,13 @@ class BaseAPI:
         except:
             pass
 
-        if hasattr(self.test_transforms, 'to_rgb'):
-            if self.test_transforms.to_rgb:
-                info['TransformsMode'] = 'RGB'
-            else:
-                info['TransformsMode'] = 'BGR'
-
         if hasattr(self, 'test_transforms'):
+            if hasattr(self.test_transforms, 'to_rgb'):
+                if self.test_transforms.to_rgb:
+                    info['TransformsMode'] = 'RGB'
+                else:
+                    info['TransformsMode'] = 'BGR'
+
             if self.test_transforms is not None:
                 info['Transforms'] = list()
                 for op in self.test_transforms.transforms:
@@ -362,8 +364,8 @@ class BaseAPI:
 
         # 模型保存成功的标志
         open(osp.join(save_dir, '.success'), 'w').close()
-        logging.info(
-            "Model for inference deploy saved in {}.".format(save_dir))
+        logging.info("Model for inference deploy saved in {}.".format(
+            save_dir))
 
     def train_loop(self,
                    num_epochs,
@@ -377,7 +379,8 @@ class BaseAPI:
                    early_stop=False,
                    early_stop_patience=5):
         if train_dataset.num_samples < train_batch_size:
-            raise Exception('The amount of training datset must be larger than batch size.')
+            raise Exception(
+                'The amount of training datset must be larger than batch size.')
         if not osp.isdir(save_dir):
             if osp.exists(save_dir):
                 os.remove(save_dir)
@@ -415,8 +418,8 @@ class BaseAPI:
                     build_strategy=build_strategy,
                     exec_strategy=exec_strategy)
 
-        total_num_steps = math.floor(
-            train_dataset.num_samples / train_batch_size)
+        total_num_steps = math.floor(train_dataset.num_samples /
+                                     train_batch_size)
         num_steps = 0
         time_stat = list()
         time_train_one_epoch = None
@@ -430,8 +433,8 @@ class BaseAPI:
         if self.model_type == 'detector':
             eval_batch_size = self._get_single_card_bs(train_batch_size)
         if eval_dataset is not None:
-            total_num_steps_eval = math.ceil(
-                eval_dataset.num_samples / eval_batch_size)
+            total_num_steps_eval = math.ceil(eval_dataset.num_samples /
+                                             eval_batch_size)
 
         if use_vdl:
             # VisualDL component
@@ -473,7 +476,9 @@ class BaseAPI:
 
                     if use_vdl:
                         for k, v in step_metrics.items():
-                            log_writer.add_scalar('Metrics/Training(Step): {}'.format(k), v, num_steps)
+                            log_writer.add_scalar(
+                                'Metrics/Training(Step): {}'.format(k), v,
+                                num_steps)
 
                     # 估算剩余时间
                     avg_step_time = np.mean(time_stat)
@@ -481,11 +486,12 @@ class BaseAPI:
                         eta = (num_epochs - i - 1) * time_train_one_epoch + (
                             total_num_steps - step - 1) * avg_step_time
                     else:
-                        eta = ((num_epochs - i) * total_num_steps - step -
-                               1) * avg_step_time
+                        eta = ((num_epochs - i) * total_num_steps - step - 1
+                               ) * avg_step_time
                     if time_eval_one_epoch is not None:
-                        eval_eta = (total_eval_times - i //
-                                    save_interval_epochs) * time_eval_one_epoch
+                        eval_eta = (
+                            total_eval_times - i // save_interval_epochs
+                        ) * time_eval_one_epoch
                     else:
                         eval_eta = (
                             total_eval_times - i // save_interval_epochs
@@ -495,10 +501,11 @@ class BaseAPI:
                     logging.info(
                         "[TRAIN] Epoch={}/{}, Step={}/{}, {}, time_each_step={}s, eta={}"
                         .format(i + 1, num_epochs, step + 1, total_num_steps,
-                                dict2str(step_metrics), round(
-                                    avg_step_time, 2), eta_str))
+                                dict2str(step_metrics),
+                                round(avg_step_time, 2), eta_str))
             train_metrics = OrderedDict(
-                zip(list(self.train_outputs.keys()), np.mean(records, axis=0)))
+                zip(list(self.train_outputs.keys()), np.mean(
+                    records, axis=0)))
             logging.info('[TRAIN] Epoch {} finished, {} .'.format(
                 i + 1, dict2str(train_metrics)))
             time_train_one_epoch = time.time() - epoch_start_time
@@ -534,7 +541,8 @@ class BaseAPI:
                             if isinstance(v, np.ndarray):
                                 if v.size > 1:
                                     continue
-                            log_writer.add_scalar("Metrics/Eval(Epoch): {}".format(k), v, i+1)
+                            log_writer.add_scalar(
+                                "Metrics/Eval(Epoch): {}".format(k), v, i + 1)
                 self.save_model(save_dir=current_save_dir)
                 time_eval_one_epoch = time.time() - eval_epoch_start_time
                 eval_epoch_start_time = time.time()
@@ -545,4 +553,4 @@ class BaseAPI:
                                 best_accuracy))
                 if eval_dataset is not None and early_stop:
                     if earlystop(current_accuracy):
-                        break
+                        break

+ 16 - 12
paddlex/cv/models/classifier.py

@@ -40,8 +40,8 @@ class BaseClassifier(BaseAPI):
         self.init_params = locals()
         super(BaseClassifier, self).__init__('classifier')
         if not hasattr(paddlex.cv.nets, str.lower(model_name)):
-            raise Exception(
-                "ERROR: There's no model named {}.".format(model_name))
+            raise Exception("ERROR: There's no model named {}.".format(
+                model_name))
         self.model_name = model_name
         self.labels = None
         self.num_classes = num_classes
@@ -218,15 +218,14 @@ class BaseClassifier(BaseAPI):
                 num_pad_samples = batch_size - num_samples
                 pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1))
                 images = np.concatenate([images, pad_images])
-            outputs = self.exe.run(
-                self.parallel_test_prog,
-                feed={'image': images},
-                fetch_list=list(self.test_outputs.values()))
+            outputs = self.exe.run(self.parallel_test_prog,
+                                   feed={'image': images},
+                                   fetch_list=list(self.test_outputs.values()))
             outputs = [outputs[0][:num_samples]]
             true_labels.extend(labels)
             pred_scores.extend(outputs[0].tolist())
-            logging.debug("[EVAL] Epoch={}, Step={}/{}".format(
-                epoch_id, step + 1, total_steps))
+            logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
+                                                               1, total_steps))
 
         pred_top1_label = np.argsort(pred_scores)[:, -1]
         pred_topk_label = np.argsort(pred_scores)[:, -k:]
@@ -263,10 +262,9 @@ class BaseClassifier(BaseAPI):
             self.arrange_transforms(
                 transforms=self.test_transforms, mode='test')
             im = self.test_transforms(img_file)
-        result = self.exe.run(
-            self.test_prog,
-            feed={'image': im},
-            fetch_list=list(self.test_outputs.values()))
+        result = self.exe.run(self.test_prog,
+                              feed={'image': im},
+                              fetch_list=list(self.test_outputs.values()))
         pred_label = np.argsort(result[0][0])[::-1][:true_topk]
         res = [{
             'category_id': l,
@@ -400,3 +398,9 @@ class ShuffleNetV2(BaseClassifier):
     def __init__(self, num_classes=1000):
         super(ShuffleNetV2, self).__init__(
             model_name='ShuffleNetV2', num_classes=num_classes)
+
+
+class HRNet_W18(BaseClassifier):
+    def __init__(self, num_classes=1000):
+        super(HRNet_W18, self).__init__(
+            model_name='HRNet_W18', num_classes=num_classes)

+ 34 - 27
paddlex/cv/models/faster_rcnn.py

@@ -32,7 +32,7 @@ class FasterRCNN(BaseAPI):
     Args:
         num_classes (int): 包含了背景类的类别数。默认为81。
         backbone (str): FasterRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50',
-            'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。
+            'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。
         with_fpn (bool): 是否使用FPN结构。默认为True。
         aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。
         anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。
@@ -47,7 +47,8 @@ class FasterRCNN(BaseAPI):
         self.init_params = locals()
         super(FasterRCNN, self).__init__('detector')
         backbones = [
-            'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd'
+            'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd',
+            'HRNet_W18'
         ]
         assert backbone in backbones, "backbone should be one of {}".format(
             backbones)
@@ -79,6 +80,12 @@ class FasterRCNN(BaseAPI):
             layers = 101
             variant = 'd'
             norm_type = 'affine_channel'
+        elif backbone_name == 'HRNet_W18':
+            backbone = paddlex.cv.nets.hrnet.HRNet(
+                width=18, freeze_norm=True, norm_decay=0., freeze_at=0)
+            if self.with_fpn is False:
+                self.with_fpn = True
+            return backbone
         if self.with_fpn:
             backbone = paddlex.cv.nets.resnet.ResNet(
                 norm_type='bn' if norm_type is None else norm_type,
@@ -117,12 +124,12 @@ class FasterRCNN(BaseAPI):
             model_out = model.build_net(inputs)
             loss = model_out['loss']
             self.optimizer.minimize(loss)
-            outputs = OrderedDict([('loss', model_out['loss']),
-                                   ('loss_cls', model_out['loss_cls']),
-                                   ('loss_bbox', model_out['loss_bbox']),
-                                   ('loss_rpn_cls', model_out['loss_rpn_cls']),
-                                   ('loss_rpn_bbox',
-                                    model_out['loss_rpn_bbox'])])
+            outputs = OrderedDict(
+                [('loss', model_out['loss']),
+                 ('loss_cls', model_out['loss_cls']),
+                 ('loss_bbox', model_out['loss_bbox']),
+                 ('loss_rpn_cls', model_out['loss_rpn_cls']), (
+                     'loss_rpn_bbox', model_out['loss_rpn_bbox'])])
         else:
             outputs = model.build_net(inputs)
         return inputs, outputs
@@ -227,7 +234,9 @@ class FasterRCNN(BaseAPI):
         # 构建训练、验证、测试网络
         self.build_program()
         fuse_bn = True
-        if self.with_fpn and self.backbone in ['ResNet18', 'ResNet50']:
+        if self.with_fpn and self.backbone in [
+                'ResNet18', 'ResNet50', 'HRNet_W18'
+        ]:
             fuse_bn = False
         self.net_initialize(
             startup_prog=fluid.default_startup_program(),
@@ -310,11 +319,10 @@ class FasterRCNN(BaseAPI):
                 'im_info': im_infos,
                 'im_shape': im_shapes,
             }
-            outputs = self.exe.run(
-                self.test_prog,
-                feed=[feed_data],
-                fetch_list=list(self.test_outputs.values()),
-                return_numpy=False)
+            outputs = self.exe.run(self.test_prog,
+                                   feed=[feed_data],
+                                   fetch_list=list(self.test_outputs.values()),
+                                   return_numpy=False)
             res = {
                 'bbox': (np.array(outputs[0]),
                          outputs[0].recursive_sequence_lengths())
@@ -339,13 +347,13 @@ class FasterRCNN(BaseAPI):
                 res['is_difficult'] = (np.array(res_is_difficult),
                                        [res_is_difficult_lod])
             results.append(res)
-            logging.debug("[EVAL] Epoch={}, Step={}/{}".format(
-                epoch_id, step + 1, total_steps))
+            logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
+                                                               1, total_steps))
         box_ap_stats, eval_details = eval_results(
             results, metric, eval_dataset.coco_gt, with_background=True)
         metrics = OrderedDict(
-            zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'],
-                box_ap_stats))
+            zip(['bbox_mmap'
+                 if metric == 'COCO' else 'bbox_map'], box_ap_stats))
         if return_details:
             return metrics, eval_details
         return metrics
@@ -373,15 +381,14 @@ class FasterRCNN(BaseAPI):
         im = np.expand_dims(im, axis=0)
         im_resize_info = np.expand_dims(im_resize_info, axis=0)
         im_shape = np.expand_dims(im_shape, axis=0)
-        outputs = self.exe.run(
-            self.test_prog,
-            feed={
-                'image': im,
-                'im_info': im_resize_info,
-                'im_shape': im_shape
-            },
-            fetch_list=list(self.test_outputs.values()),
-            return_numpy=False)
+        outputs = self.exe.run(self.test_prog,
+                               feed={
+                                   'image': im,
+                                   'im_info': im_resize_info,
+                                   'im_shape': im_shape
+                               },
+                               fetch_list=list(self.test_outputs.values()),
+                               return_numpy=False)
         res = {
             k: (np.array(v), v.recursive_sequence_lengths())
             for k, v in zip(list(self.test_outputs.keys()), outputs)

+ 30 - 31
paddlex/cv/models/mask_rcnn.py

@@ -32,7 +32,7 @@ class MaskRCNN(FasterRCNN):
     Args:
         num_classes (int): 包含了背景类的类别数。默认为81。
         backbone (str): MaskRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50',
-            'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。
+            'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。
         with_fpn (bool): 是否使用FPN结构。默认为True。
         aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。
         anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。
@@ -46,7 +46,8 @@ class MaskRCNN(FasterRCNN):
                  anchor_sizes=[32, 64, 128, 256, 512]):
         self.init_params = locals()
         backbones = [
-            'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd'
+            'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd',
+            'HRNet_W18'
         ]
         assert backbone in backbones, "backbone should be one of {}".format(
             backbones)
@@ -81,13 +82,13 @@ class MaskRCNN(FasterRCNN):
             model_out = model.build_net(inputs)
             loss = model_out['loss']
             self.optimizer.minimize(loss)
-            outputs = OrderedDict([('loss', model_out['loss']),
-                                   ('loss_cls', model_out['loss_cls']),
-                                   ('loss_bbox', model_out['loss_bbox']),
-                                   ('loss_mask', model_out['loss_mask']),
-                                   ('loss_rpn_cls', model_out['loss_rpn_cls']),
-                                   ('loss_rpn_bbox',
-                                    model_out['loss_rpn_bbox'])])
+            outputs = OrderedDict(
+                [('loss', model_out['loss']),
+                 ('loss_cls', model_out['loss_cls']),
+                 ('loss_bbox', model_out['loss_bbox']),
+                 ('loss_mask', model_out['loss_mask']),
+                 ('loss_rpn_cls', model_out['loss_rpn_cls']), (
+                     'loss_rpn_bbox', model_out['loss_rpn_bbox'])])
         else:
             outputs = model.build_net(inputs)
         return inputs, outputs
@@ -276,11 +277,10 @@ class MaskRCNN(FasterRCNN):
                 'im_info': im_infos,
                 'im_shape': im_shapes,
             }
-            outputs = self.exe.run(
-                self.test_prog,
-                feed=[feed_data],
-                fetch_list=list(self.test_outputs.values()),
-                return_numpy=False)
+            outputs = self.exe.run(self.test_prog,
+                                   feed=[feed_data],
+                                   fetch_list=list(self.test_outputs.values()),
+                                   return_numpy=False)
             res = {
                 'bbox': (np.array(outputs[0]),
                          outputs[0].recursive_sequence_lengths()),
@@ -292,8 +292,8 @@ class MaskRCNN(FasterRCNN):
             res['im_shape'] = (im_shapes, [])
             res['im_id'] = (np.array(res_im_id), [])
             results.append(res)
-            logging.debug("[EVAL] Epoch={}, Step={}/{}".format(
-                epoch_id, step + 1, total_steps))
+            logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
+                                                               1, total_steps))
 
         ap_stats, eval_details = eval_results(
             results,
@@ -302,8 +302,8 @@ class MaskRCNN(FasterRCNN):
             with_background=True,
             resolution=self.mask_head_resolution)
         if metric == 'VOC':
-            if isinstance(ap_stats[0], np.ndarray) and isinstance(
-                    ap_stats[1], np.ndarray):
+            if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1],
+                                                                  np.ndarray):
                 metrics = OrderedDict(
                     zip(['bbox_map', 'segm_map'],
                         [ap_stats[0][1], ap_stats[1][1]]))
@@ -311,8 +311,8 @@ class MaskRCNN(FasterRCNN):
                 metrics = OrderedDict(
                     zip(['bbox_map', 'segm_map'], [0.0, 0.0]))
         elif metric == 'COCO':
-            if isinstance(ap_stats[0], np.ndarray) and isinstance(
-                    ap_stats[1], np.ndarray):
+            if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1],
+                                                                  np.ndarray):
                 metrics = OrderedDict(
                     zip(['bbox_mmap', 'segm_mmap'],
                         [ap_stats[0][0], ap_stats[1][0]]))
@@ -346,15 +346,14 @@ class MaskRCNN(FasterRCNN):
         im = np.expand_dims(im, axis=0)
         im_resize_info = np.expand_dims(im_resize_info, axis=0)
         im_shape = np.expand_dims(im_shape, axis=0)
-        outputs = self.exe.run(
-            self.test_prog,
-            feed={
-                'image': im,
-                'im_info': im_resize_info,
-                'im_shape': im_shape
-            },
-            fetch_list=list(self.test_outputs.values()),
-            return_numpy=False)
+        outputs = self.exe.run(self.test_prog,
+                               feed={
+                                   'image': im,
+                                   'im_info': im_resize_info,
+                                   'im_shape': im_shape
+                               },
+                               fetch_list=list(self.test_outputs.values()),
+                               return_numpy=False)
         res = {
             k: (np.array(v), v.recursive_sequence_lengths())
             for k, v in zip(list(self.test_outputs.keys()), outputs)
@@ -368,8 +367,8 @@ class MaskRCNN(FasterRCNN):
         import pycocotools.mask as mask_util
         for index, xywh_res in enumerate(xywh_results):
             del xywh_res['image_id']
-            xywh_res['mask'] = mask_util.decode(
-                segm_results[index]['segmentation'])
+            xywh_res['mask'] = mask_util.decode(segm_results[index][
+                'segmentation'])
             xywh_res['category'] = self.labels[xywh_res['category_id']]
             results.append(xywh_res)
         return results

+ 7 - 4
paddlex/cv/models/utils/pretrain_weights.py

@@ -56,6 +56,8 @@ image_pretrain = {
     'https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar',
     'ShuffleNetV2':
     'https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar',
+    'HRNet_W18':
+    'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W18_C_pretrained.tar',
 }
 
 coco_pretrain = {
@@ -85,10 +87,11 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir):
                 backbone = 'DetResNet50'
         assert backbone in image_pretrain, "There is not ImageNet pretrain weights for {}, you may try COCO.".format(
             backbone)
-        #        url = image_pretrain[backbone]
-        #        fname = osp.split(url)[-1].split('.')[0]
-        #        paddlex.utils.download_and_decompress(url, path=new_save_dir)
-        #        return osp.join(new_save_dir, fname)
+        if backbone == "HRNet_W18":
+            url = image_pretrain[backbone]
+            fname = osp.split(url)[-1].split('.')[0]
+            paddlex.utils.download_and_decompress(url, path=new_save_dir)
+            return osp.join(new_save_dir, fname)
         try:
             hub.download(backbone, save_path=new_save_dir)
         except Exception as e:

+ 25 - 8
paddlex/cv/nets/__init__.py

@@ -23,6 +23,7 @@ from .segmentation import DeepLabv3p
 from .xception import Xception
 from .densenet import DenseNet
 from .shufflenet_v2 import ShuffleNetV2
+from .hrnet import HRNet
 
 
 def resnet18(input, num_classes=1000):
@@ -51,14 +52,20 @@ def resnet50_vd(input, num_classes=1000):
 
 
 def resnet50_vd_ssld(input, num_classes=1000):
-    model = ResNet(layers=50, num_classes=num_classes, 
-                   variant='d', lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3])
+    model = ResNet(
+        layers=50,
+        num_classes=num_classes,
+        variant='d',
+        lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3])
     return model(input)
 
 
 def resnet101_vd_ssld(input, num_classes=1000):
-    model = ResNet(layers=101, num_classes=num_classes, 
-                   variant='d', lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3])
+    model = ResNet(
+        layers=101,
+        num_classes=num_classes,
+        variant='d',
+        lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3])
     return model(input)
 
 
@@ -93,14 +100,18 @@ def mobilenetv3_large(input, num_classes=1000):
 
 
 def mobilenetv3_small_ssld(input, num_classes=1000):
-    model = MobileNetV3(num_classes=num_classes, model_name='small',
-                        lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75])
+    model = MobileNetV3(
+        num_classes=num_classes,
+        model_name='small',
+        lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75])
     return model(input)
 
 
 def mobilenetv3_large_ssld(input, num_classes=1000):
-    model = MobileNetV3(num_classes=num_classes, model_name='large',
-                        lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75])
+    model = MobileNetV3(
+        num_classes=num_classes,
+        model_name='large',
+        lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75])
     return model(input)
 
 
@@ -133,6 +144,12 @@ def densenet201(input, num_classes=1000):
     model = DenseNet(layers=201, num_classes=num_classes)
     return model(input)
 
+
 def shufflenetv2(input, num_classes=1000):
     model = ShuffleNetV2(num_classes=num_classes)
     return model(input)
+
+
+def hrnet_w18(input, num_classes=1000):
+    model = HRNet(width=18, num_classes=num_classes)
+    return model(input)

+ 7 - 2
paddlex/cv/nets/detection/faster_rcnn.py

@@ -21,7 +21,7 @@ import copy
 
 from paddle import fluid
 
-from .fpn import FPN
+from .fpn import (FPN, HRFPN)
 from .rpn_head import (RPNHead, FPNRPNHead)
 from .roi_extractor import (RoIAlign, FPNRoIAlign)
 from .bbox_head import (BBoxHead, TwoFCHead)
@@ -82,7 +82,12 @@ class FasterRCNN(object):
         self.backbone = backbone
         self.mode = mode
         if with_fpn and fpn is None:
-            fpn = FPN()
+            if self.backbone.__class__.__name__.startswith('HRNet'):
+                fpn = HRFPN()
+                fpn.min_level = 2
+                fpn.max_level = 6
+            else:
+                fpn = FPN()
         self.fpn = fpn
         self.num_classes = num_classes
         if rpn_head is None:

+ 107 - 3
paddlex/cv/nets/detection/fpn.py

@@ -23,7 +23,7 @@ from paddle.fluid.param_attr import ParamAttr
 from paddle.fluid.initializer import Xavier
 from paddle.fluid.regularizer import L2Decay
 
-__all__ = ['FPN']
+__all__ = ['FPN', 'HRFPN']
 
 
 def ConvNorm(input,
@@ -219,8 +219,8 @@ class FPN(object):
             body_name = body_name_list[i]
             body_input = body_dict[body_name]
             top_output = self.fpn_inner_output[i - 1]
-            fpn_inner_single = self._add_topdown_lateral(
-                body_name, body_input, top_output)
+            fpn_inner_single = self._add_topdown_lateral(body_name, body_input,
+                                                         top_output)
             self.fpn_inner_output[i] = fpn_inner_single
         fpn_dict = {}
         fpn_name_list = []
@@ -293,3 +293,107 @@ class FPN(object):
                 spatial_scale.insert(0, spatial_scale[0] * 0.5)
         res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list])
         return res_dict, spatial_scale
+
+
+class HRFPN(object):
+    """
+    HRNet, see https://arxiv.org/abs/1908.07919
+
+    Args:
+        num_chan (int): number of feature channels
+        pooling_type (str): pooling type of downsampling
+        share_conv (bool): whethet to share conv for different layers' reduction
+        spatial_scale (list): feature map scaling factor
+    """
+
+    def __init__(
+            self,
+            num_chan=256,
+            pooling_type="avg",
+            share_conv=False,
+            spatial_scale=[1. / 64, 1. / 32, 1. / 16, 1. / 8, 1. / 4], ):
+        self.num_chan = num_chan
+        self.pooling_type = pooling_type
+        self.share_conv = share_conv
+        self.spatial_scale = spatial_scale
+
+    def get_output(self, body_dict):
+        num_out = len(self.spatial_scale)
+        body_name_list = list(body_dict.keys())
+
+        num_backbone_stages = len(body_name_list)
+
+        outs = []
+        outs.append(body_dict[body_name_list[0]])
+
+        # resize
+        for i in range(1, len(body_dict)):
+            resized = self.resize_input_tensor(body_dict[body_name_list[i]],
+                                               outs[0], 2**i)
+            outs.append(resized)
+
+        # concat
+        out = fluid.layers.concat(outs, axis=1)
+
+        # reduction
+        out = fluid.layers.conv2d(
+            input=out,
+            num_filters=self.num_chan,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            param_attr=ParamAttr(name='hrfpn_reduction_weights'),
+            bias_attr=False)
+
+        # conv
+        outs = [out]
+        for i in range(1, num_out):
+            outs.append(
+                self.pooling(
+                    out,
+                    size=2**i,
+                    stride=2**i,
+                    pooling_type=self.pooling_type))
+        outputs = []
+
+        for i in range(num_out):
+            conv_name = "shared_fpn_conv" if self.share_conv else "shared_fpn_conv_" + str(
+                i)
+            conv = fluid.layers.conv2d(
+                input=outs[i],
+                num_filters=self.num_chan,
+                filter_size=3,
+                stride=1,
+                padding=1,
+                param_attr=ParamAttr(name=conv_name + "_weights"),
+                bias_attr=False)
+            outputs.append(conv)
+
+        for idx in range(0, num_out - len(body_name_list)):
+            body_name_list.append("fpn_res5_sum_subsampled_{}x".format(2**(
+                idx + 1)))
+
+        outputs = outputs[::-1]
+        body_name_list = body_name_list[::-1]
+
+        res_dict = OrderedDict([(body_name_list[k], outputs[k])
+                                for k in range(len(body_name_list))])
+        return res_dict, self.spatial_scale
+
+    def resize_input_tensor(self, body_input, ref_output, scale):
+        shape = fluid.layers.shape(ref_output)
+        shape_hw = fluid.layers.slice(shape, axes=[0], starts=[2], ends=[4])
+        out_shape_ = shape_hw
+        out_shape = fluid.layers.cast(out_shape_, dtype='int32')
+        out_shape.stop_gradient = True
+        body_output = fluid.layers.resize_bilinear(
+            body_input, scale=scale, out_shape=out_shape)
+        return body_output
+
+    def pooling(self, input, size, stride, pooling_type):
+        pool = fluid.layers.pool2d(
+            input=input,
+            pool_size=size,
+            pool_stride=stride,
+            pool_type=pooling_type)
+        return pool

+ 10 - 6
paddlex/cv/nets/detection/mask_rcnn.py

@@ -21,7 +21,7 @@ import copy
 
 import paddle.fluid as fluid
 
-from .fpn import FPN
+from .fpn import (FPN, HRFPN)
 from .rpn_head import (RPNHead, FPNRPNHead)
 from .roi_extractor import (RoIAlign, FPNRoIAlign)
 from .bbox_head import (BBoxHead, TwoFCHead)
@@ -92,11 +92,15 @@ class MaskRCNN(object):
         self.backbone = backbone
         self.mode = mode
         if with_fpn and fpn is None:
-            fpn = FPN(
-                num_chan=num_chan,
-                min_level=min_level,
-                max_level=max_level,
-                spatial_scale=spatial_scale)
+            if self.backbone.__class__.__name__.startswith('HRNet'):
+                fpn = HRFPN()
+                fpn.min_level = 2
+                fpn.max_level = 6
+            else:
+                fpn = FPN(num_chan=num_chan,
+                          min_level=min_level,
+                          max_level=max_level,
+                          spatial_scale=spatial_scale)
         self.fpn = fpn
         self.num_classes = num_classes
         if rpn_head is None: