找回密码
 立即注册

QQ登录

只需一步,快速开始

查看: 56|回复: 1

TensorRT 自定义plugin错误

[复制链接]
发表于 2018-5-15 14:12:24 | 显示全部楼层 |阅读模式
想用tensorRT来加速推理过程,但是在添加plugin时有问题,不知道错在哪。请求大牛帮忙看看。
pulugin的代码:
  1. class SubpixelLayer:public IPluginExt
  2. {
  3. public:
  4.     SubpixelLayer(const void* data, size_t length)
  5.     {
  6.       
  7.     };
  8.     SubpixelLayer(int scale){
  9.       
  10.     };
  11.     ~SubpixelLayer(){};
  12.     int getNbOutputs() const override{
  13.         printf("%s:%d,getNbOutputs\n",__FILE__,__LINE__);
  14.         return 1;
  15.     };
  16.     Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override{
  17.         printf("%s:%d,getOutputDimensions\n",__FILE__,__LINE__);
  18.         
  19.         return DimsNCHW(1,  1, 1);
  20.     };
  21.     bool supportsFormat(nvinfer1::DataType type, PluginFormat format) const override{
  22.         printf("%s:%d,supportsFormat\n",__FILE__,__LINE__);
  23.         return true;
  24.     };
  25.     void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, nvinfer1::DataType type, PluginFormat format, int maxBatchSize) override{
  26.         printf("%s:%d,configureWithFormat\n",__FILE__,__LINE__);
  27.         
  28.     };
  29.     int initialize() override{
  30.         printf("%s:%d,initialize\n",__FILE__,__LINE__);
  31.         return 0;
  32.     };
  33.     void terminate() override{
  34.         printf("%s:%d,terminate\n",__FILE__,__LINE__);
  35.     };
  36.     size_t getWorkspaceSize(int maxBatchSize) const override{
  37.         printf("%s:%d,getWorkspaceSize\n",__FILE__,__LINE__);
  38.         return 0;
  39.     };
  40.     int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override{
  41.        printf("%s:%d,enqueue\n",__FILE__,__LINE__);
  42.         return 0;
  43.     };
  44.     size_t getSerializationSize() override
  45.     {
  46.         return 0;
  47.     }
  48.     void serialize(void* buffer) override{

  49.     };

  50. private:
  51.     int scale_;
  52.     int height_;
  53.     int width_;
  54.     int outputChannels_ ;
  55.     int inputChannels_ ;
  56.     nvinfer1::DataType dataType_;
  57. };
复制代码
使用TensorRT API来生成网络:
  1. ICudaEngine * Model(int maxBatchSize, IBuilder *builder, nvinfer1::DataType dt,int height,int width)
  2. {
  3. #define CHECK_PTR2(ret)         \
  4. {                               \
  5.     do{                         \
  6.         if(ret==nullptr){       \
  7.             printf("%s:%d error\n",__FILE__,__LINE__);\
  8.             return nullptr;    \
  9.         }                      \
  10.     }while(0);                 \
  11. }

  12.     INetworkDefinition* network = builder->createNetwork();
  13.     auto data = network->addInput("Input", dt, DimsCHW{ 3, height, width});
  14.     CHECK_PTR2(data);
  15.     PrintTensorInfo(data);

  16.     std::map<std::string, Weights> weightMap = loadWeights();
  17.     auto conv0 = network->addConvolution(*data, 64, DimsHW{3, 3}, weightMap["conv0_weights_0"], weightMap["conv0_biases_0"]);
  18.     CHECK_PTR2(conv0);
  19.     conv0->setStride(DimsHW{1, 1});
  20.     conv0->setPadding(DimsHW{1, 1});
  21.     conv0->setName("conv0");
  22.     conv0->getOutput(0)->setName("conv0_out");

  23.     PrintTensorInfo(conv0->getOutput(0));
  24.     auto relu0 = network->addActivation(*conv0->getOutput(0), ActivationType::kRELU);
  25.         CHECK_PTR2(relu0 );

  26.    
  27.     auto conv1 = network->addConvolution(*relu0->getOutput(0), 64, DimsHW{3, 3}, weightMap["conv1_weights_0"], weightMap["conv1_biases_0"]);
  28.     CHECK_PTR2(conv1);
  29.     conv1->setStride(DimsHW{1, 1});  
  30.     conv1->setPadding(DimsHW{1, 1});
  31.     conv1->setName("conv1");
  32.     conv1->getOutput(0)->setName("conv1_out");

  33.     PrintTensorInfo(conv1->getOutput(0));
  34.     auto relu1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
  35.         CHECK_PTR2(relu1 );

  36.     auto conv2 = network->addConvolution(*relu1->getOutput(0), 48, DimsHW{3, 3}, weightMap["conv2_weights_0"], weightMap["conv2_biases_0"]);
  37.     CHECK_PTR2(conv2);
  38.     conv2->setStride(DimsHW{1, 1});
  39.     conv2->setPadding(DimsHW{1, 1});
  40.     conv2->setName("conv2");
  41.     conv2->getOutput(0)->setName("conv2_out");

  42.     PrintTensorInfo(conv2->getOutput(0));
  43.     auto relu2 = network->addActivation(*conv2->getOutput(0), ActivationType::kRELU);
  44.         CHECK_PTR2(relu2 );
  45.    
  46.    
  47.     auto conv3 = network->addConvolution(*relu2->getOutput(0), 32, DimsHW{3, 3}, weightMap["conv3_weights_0"], weightMap["conv3_biases_0"]);
  48.     CHECK_PTR2(conv3);
  49.     conv3->setStride(DimsHW{1, 1});
  50.     conv3->setPadding(DimsHW{1, 1});
  51.     conv3->setName("conv3");
  52.     conv3->getOutput(0)->setName("conv3_out");

  53.     PrintTensorInfo(conv3->getOutput(0));
  54.     auto relu3 = network->addActivation(*conv3->getOutput(0), ActivationType::kRELU);
  55.         CHECK_PTR2(relu3 );
  56.    
  57.     auto conv5 = network->addConvolution(*relu3->getOutput(0), 48, DimsHW{3, 3}, weightMap["conv5_weights_0"], weightMap["conv5_biases_0"]);
  58.     CHECK_PTR2(conv5);
  59.     conv5->setStride(DimsHW{1, 1});
  60.     conv5->setPadding(DimsHW{1, 1});
  61.     conv5->setName("conv5");
  62.     conv5->getOutput(0)->setName("conv5_out");

  63.     PrintTensorInfo(conv5->getOutput(0));
  64. #if 1
  65.     SubpixelLayer subpiexl(4);

  66.     ITensor *ptr = relu3->getOutput(0);
  67.     auto out = network->addPlugin(&ptr,1,subpiexl);
  68.     CHECK_PTR2(out);
  69.     out->setName("subpiexl");
  70.     out->getOutput(0)->setName("out");

  71.     PrintTensorInfo(out->getOutput(0));
  72.    
  73.     network->markOutput(*out->getOutput(0));
  74. #else
  75.     network->markOutput(*conv5->getOutput(0));
  76. #endif
  77.     printf("%s:%d,---1---\n",__FILE__,__LINE__);
  78.     builder->setMaxBatchSize(maxBatchSize);
  79.         builder->setMaxWorkspaceSize(1 << 25);

  80.     auto engine = builder->buildCudaEngine(*network);
  81.         network->destroy();
  82.    
  83.     return engine;
  84. }
复制代码
main函数中调用逻辑:
  1. IBuilder* builder = createInferBuilder(gLogger);
  2.     ICudaEngine *engine = Model(1, builder, nvinfer1::DataType::kFLOAT,height,width);

  3.     int inputIndex = 0;
  4.     int outputIndex = 1;
  5.     if(!engine->bindingIsInput(inputIndex)){
  6.         inputIndex = 1;
  7.         outputIndex = 0;
  8.     }
  9.     void *buffers_[2];
  10.     size = height*width*sizeof(float)*3;
  11.     cudaMalloc(&buffers_[inputIndex], size);
  12.     cudaMalloc(&buffers_[outputIndex], size*16);
  13.     IExecutionContext *context = engine->createExecutionContext();
  14.     context->execute(1, buffers_);
复制代码
问题描述为:在执行到context->execute时,出现段错误。使用dmesg可以看到的信息为:
  1. [17691.186312] traps: test[30071] general protection ip:7f95f613e9a5 sp:7fffc76fd8e0 error:0 in libnvinfer.so.4.1.0[7f95f5d48000+db99000]
复制代码
不知道错在什么地方了。请大伙帮忙看看,谢谢!
BTW:完成测试代码,请查看附件。谢谢。

test2.cc.tar.gz

2.65 KB, 下载次数: 1

回复

使用道具 举报

发表于 2018-5-15 15:24:32 | 显示全部楼层
出去的报错信息太少+代码无法运行,肉眼暂时看不出来是哪里的问题。
建议你gdb单步调试一下先定位问题
https://github.com/LitLeo/Tensor ... 4%B8%BA%E4%BE%8B.md
这是成功的例子
回复 支持 反对

使用道具 举报

您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

关闭

站长推荐上一条 /1 下一条

快速回复 返回顶部 返回列表