| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155 |
- // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- #pragma once
- #include <cuda_runtime_api.h>
- #include <iostream>
- #include <map>
- #include <string>
- #include <vector>
- #include "NvInfer.h"
- #include "NvOnnxParser.h"
- #include "ultra_infer/runtime/backends/backend.h"
- #include "ultra_infer/runtime/backends/tensorrt/option.h"
- #include "ultra_infer/runtime/backends/tensorrt/utils.h"
- #include "ultra_infer/utils/unique_ptr.h"
- class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
- public:
- explicit Int8EntropyCalibrator2(const std::string &calibration_cache)
- : calibration_cache_(calibration_cache) {}
- int getBatchSize() const noexcept override { return 0; }
- bool getBatch(void *bindings[], const char *names[],
- int nbBindings) noexcept override {
- return false;
- }
- const void *readCalibrationCache(size_t &length) noexcept override {
- length = calibration_cache_.size();
- return length ? calibration_cache_.data() : nullptr;
- }
- void writeCalibrationCache(const void *cache,
- size_t length) noexcept override {
- ultra_infer::FDERROR << "NOT IMPLEMENT." << std::endl;
- }
- private:
- const std::string calibration_cache_;
- };
- namespace ultra_infer {
- struct TrtValueInfo {
- std::string name;
- std::vector<int> shape;
- nvinfer1::DataType dtype; // dtype of TRT model
- FDDataType original_dtype; // dtype of original ONNX/Paddle model
- };
- std::vector<int> toVec(const nvinfer1::Dims &dim);
- size_t TrtDataTypeSize(const nvinfer1::DataType &dtype);
- FDDataType GetFDDataType(const nvinfer1::DataType &dtype);
- class TrtBackend : public BaseBackend {
- public:
- TrtBackend() : engine_(nullptr), context_(nullptr) {}
- bool Init(const RuntimeOption &runtime_option);
- bool Infer(std::vector<FDTensor> &inputs, std::vector<FDTensor> *outputs,
- bool copy_to_fd = true) override;
- int NumInputs() const { return inputs_desc_.size(); }
- int NumOutputs() const { return outputs_desc_.size(); }
- TensorInfo GetInputInfo(int index);
- TensorInfo GetOutputInfo(int index);
- std::vector<TensorInfo> GetInputInfos() override;
- std::vector<TensorInfo> GetOutputInfos() override;
- std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
- void *stream = nullptr,
- int device_id = -1) override;
- ~TrtBackend() {
- if (parser_) {
- parser_.reset();
- }
- }
- private:
- void BuildOption(const TrtBackendOption &option);
- bool InitFromPaddle(const std::string &model_buffer,
- const std::string ¶ms_buffer,
- const TrtBackendOption &option = TrtBackendOption(),
- bool verbose = false);
- bool InitFromOnnx(const std::string &model_buffer,
- const TrtBackendOption &option = TrtBackendOption());
- TrtBackendOption option_;
- std::shared_ptr<nvinfer1::ICudaEngine> engine_;
- std::shared_ptr<nvinfer1::IExecutionContext> context_;
- FDUniquePtr<nvonnxparser::IParser> parser_;
- FDUniquePtr<nvinfer1::IBuilder> builder_;
- FDUniquePtr<nvinfer1::INetworkDefinition> network_;
- cudaStream_t stream_{};
- std::vector<void *> bindings_;
- std::vector<TrtValueInfo> inputs_desc_;
- std::vector<TrtValueInfo> outputs_desc_;
- std::map<std::string, FDDeviceBuffer> inputs_device_buffer_;
- std::map<std::string, FDDeviceBuffer> outputs_device_buffer_;
- std::map<std::string, int> io_name_index_;
- std::string calibration_str_;
- bool save_external_ = false;
- std::string model_file_name_ = "";
- // Sometimes while the number of outputs > 1
- // the output order of tensorrt may not be same
- // with the original onnx model
- // So this parameter will record to origin outputs
- // order, to help recover the right order
- std::map<std::string, int> outputs_order_;
- // temporary store onnx model content
- // once it used to build trt egnine done
- // it will be released
- std::string onnx_model_buffer_;
- // Stores shape information of the loaded model
- // For dynamic shape will record its range information
- // Also will update the range information while inferencing
- std::map<std::string, ShapeRangeInfo> shape_range_info_;
- // If the final output tensor's dtype is different from the
- // model output tensor's dtype, then we need cast the data
- // to the final output's dtype.
- // E.g. When trt model output tensor is int32, but final tensor is int64
- // This map stores the casted tensors.
- std::map<std::string, FDTensor> casted_output_tensors_;
- void GetInputOutputInfo();
- bool CreateTrtEngineFromOnnx(const std::string &onnx_model_buffer);
- bool BuildTrtEngine();
- bool LoadTrtCache(const std::string &trt_engine_file);
- int ShapeRangeInfoUpdated(const std::vector<FDTensor> &inputs);
- void SetInputs(const std::vector<FDTensor> &inputs);
- void AllocateOutputsBuffer(std::vector<FDTensor> *outputs,
- bool copy_to_fd = true);
- };
- } // namespace ultra_infer
|