|
@@ -22,6 +22,7 @@ import yaml
|
|
|
import json
|
|
import json
|
|
|
import paddle
|
|
import paddle
|
|
|
from paddle.io import DataLoader, DistributedBatchSampler
|
|
from paddle.io import DataLoader, DistributedBatchSampler
|
|
|
|
|
+from paddleslim import QAT
|
|
|
from paddleslim.analysis import flops
|
|
from paddleslim.analysis import flops
|
|
|
from paddleslim import L1NormFilterPruner, FPGMFilterPruner
|
|
from paddleslim import L1NormFilterPruner, FPGMFilterPruner
|
|
|
import paddlex
|
|
import paddlex
|
|
@@ -53,6 +54,7 @@ class BaseModel:
|
|
|
self.completed_epochs = 0
|
|
self.completed_epochs = 0
|
|
|
self.pruner = None
|
|
self.pruner = None
|
|
|
self.pruning_ratios = None
|
|
self.pruning_ratios = None
|
|
|
|
|
+ self.quanter = None
|
|
|
|
|
|
|
|
def net_initialize(self, pretrain_weights=None, save_dir='.'):
|
|
def net_initialize(self, pretrain_weights=None, save_dir='.'):
|
|
|
if pretrain_weights is not None and \
|
|
if pretrain_weights is not None and \
|
|
@@ -129,10 +131,15 @@ class BaseModel:
|
|
|
os.makedirs(save_dir)
|
|
os.makedirs(save_dir)
|
|
|
model_info = self.get_model_info()
|
|
model_info = self.get_model_info()
|
|
|
model_info['status'] = self.status
|
|
model_info['status'] = self.status
|
|
|
- paddle.save(self.net.state_dict(),
|
|
|
|
|
- os.path.join(save_dir, 'model.pdparams'))
|
|
|
|
|
- paddle.save(self.optimizer.state_dict(),
|
|
|
|
|
- os.path.join(save_dir, 'model.pdopt'))
|
|
|
|
|
|
|
+
|
|
|
|
|
+ if self.status == 'Quantized':
|
|
|
|
|
+ self.quanter.save_quantized_model(
|
|
|
|
|
+ self.net, save_dir, input_spec=self.test_inputs)
|
|
|
|
|
+ else:
|
|
|
|
|
+ paddle.save(self.net.state_dict(),
|
|
|
|
|
+ os.path.join(save_dir, 'model.pdparams'))
|
|
|
|
|
+ paddle.save(self.optimizer.state_dict(),
|
|
|
|
|
+ os.path.join(save_dir, 'model.pdopt'))
|
|
|
|
|
|
|
|
with open(
|
|
with open(
|
|
|
osp.join(save_dir, 'model.yml'), encoding='utf-8',
|
|
osp.join(save_dir, 'model.yml'), encoding='utf-8',
|
|
@@ -400,8 +407,8 @@ class BaseModel:
|
|
|
|
|
|
|
|
Args:
|
|
Args:
|
|
|
pruned_flops(float): Ratio of FLOPs to be pruned.
|
|
pruned_flops(float): Ratio of FLOPs to be pruned.
|
|
|
- save_dir(None or str, optional): If None, the pruned model will not be saved
|
|
|
|
|
- Otherwise, the pruned model will be saved at save_dir. Defaults to None.
|
|
|
|
|
|
|
+ save_dir(None or str, optional): If None, the pruned model will not be saved.
|
|
|
|
|
+ Otherwise, the pruned model will be saved at save_dir. Defaults to None.
|
|
|
|
|
|
|
|
"""
|
|
"""
|
|
|
if self.status == "Pruned":
|
|
if self.status == "Pruned":
|
|
@@ -427,6 +434,39 @@ class BaseModel:
|
|
|
self.save_model(save_dir)
|
|
self.save_model(save_dir)
|
|
|
logging.info("Pruned model is saved at {}".format(save_dir))
|
|
logging.info("Pruned model is saved at {}".format(save_dir))
|
|
|
|
|
|
|
|
|
|
+ def _prepare_qat(self, quant_config, image_shape):
|
|
|
|
|
+ if quant_config is None:
|
|
|
|
|
+ # default quantization configuration
|
|
|
|
|
+ quant_config = {
|
|
|
|
|
+ # {None, 'PACT'}. Weight preprocess type. If None, no preprocessing is performed.
|
|
|
|
|
+ 'weight_preprocess_type': None,
|
|
|
|
|
+ # {None, 'PACT'}. Activation preprocess type. If None, no preprocessing is performed.
|
|
|
|
|
+ 'activation_preprocess_type': None,
|
|
|
|
|
+ # {'abs_max', 'channel_wise_abs_max', 'range_abs_max', 'moving_average_abs_max'}.
|
|
|
|
|
+ # Weight quantization type.
|
|
|
|
|
+ 'weight_quantize_type': 'channel_wise_abs_max',
|
|
|
|
|
+ # {'abs_max', 'range_abs_max', 'moving_average_abs_max'}. Activation quantization type.
|
|
|
|
|
+ 'activation_quantize_type': 'moving_average_abs_max',
|
|
|
|
|
+ # The number of bits of weights after quantization.
|
|
|
|
|
+ 'weight_bits': 8,
|
|
|
|
|
+ # The number of bits of activation after quantization.
|
|
|
|
|
+ 'activation_bits': 8,
|
|
|
|
|
+ # Data type after quantization, such as 'uint8', 'int8', etc.
|
|
|
|
|
+ 'dtype': 'int8',
|
|
|
|
|
+ # Window size for 'range_abs_max' quantization.
|
|
|
|
|
+ 'window_size': 10000,
|
|
|
|
|
+ # Decay coefficient of moving average.
|
|
|
|
|
+ 'moving_rate': .9,
|
|
|
|
|
+ # Types of layers that will be quantized.
|
|
|
|
|
+ 'quantizable_layer_type': ['Conv2D', 'Linear']
|
|
|
|
|
+ }
|
|
|
|
|
+ self.quanter = QAT(config=quant_config)
|
|
|
|
|
+ logging.info("Preparing the model for quantization-aware training...")
|
|
|
|
|
+ self.quanter.quantize(self.net)
|
|
|
|
|
+ logging.info("Model is ready for quantization-aware training.")
|
|
|
|
|
+ self.status = 'Quantized'
|
|
|
|
|
+ self.test_inputs = self.get_test_inputs(image_shape)
|
|
|
|
|
+
|
|
|
def _export_inference_model(self, save_dir, image_shape=[-1, -1]):
|
|
def _export_inference_model(self, save_dir, image_shape=[-1, -1]):
|
|
|
save_dir = osp.join(save_dir, 'inference_model')
|
|
save_dir = osp.join(save_dir, 'inference_model')
|
|
|
self.net.eval()
|
|
self.net.eval()
|