condition: bindings[x] != nullptr

原创已于 2022-11-02 01:08:19 修改 · 2k 阅读

1 ·

CC 4.0 BY-SA版权

文章标签：

#tensorrt

于 2022-11-02 01:08:12 首次发布

c++基础同时被 2 个专栏收录

300 篇文章

订阅专栏

onnx

100 篇文章

订阅专栏

yolov5 pose 预测报错：

原来的项目：

https://github.com/BingfengYan/yolo_pose

运行报错：

condition: bindings[x] != nullptr

原因：tensorrt模型有多个output，如果只写两个buffer，一个input，一个output，则会报错，

需要写多个buffers。

错误代码：

   static float prob[BATCH_SIZE * OUTPUT_SIZE];
    IRuntime* runtime = createInferRuntime(gLogger);
    assert(runtime != nullptr);
    ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
    assert(engine != nullptr);
    IExecutionContext* context = engine->createExecutionContext();
    assert(context != nullptr);
    delete[] trtModelStream;
    assert(engine->getNbBindings() == 5);
    float* buffers[2];
    // In order to bind the buffers, we need to know the names of the input and output tensors.
    // Note that indices are guaranteed to be less than IEngine::getNbBindings()
    const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
    const int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME);
    assert(inputIndex == 0);
    assert(outputIndex == 4);
    // Create GPU buffers on device
    CUDA_CHECK(cudaMalloc((void**)&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
    CUDA_CHECK(cudaMalloc((void**)&buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));

    // Create stream
    cudaStream_t stream;
    CUDA_CHECK(cudaStreamCreate(&stream));
    uint8_t* img_host = nullptr;
    uint8_t* img_device = nullptr;
    // prepare input data cache in pinned memory 
    CUDA_CHECK(cudaMallocHost((void**)&img_host, MAX_IMAGE_INPUT_SIZE_THRESH * 3));
    // prepare input data cache in device memory
    CUDA_CHECK(cudaMalloc((void**)&img_device, MAX_IMAGE_INPUT_SIZE_THRESH * 3));
    int fcount = 0;
    std::vector<cv::Mat> imgs_buffer(BATCH_SIZE);
    for (int f = 0; f < (int)file_names.size(); f++) {
        fcount++;
        if (fcount < BATCH_SIZE && f + 1 != (int)file_names.size()) continue;
        //auto start = std::chrono::system_clock::now();
        float* buffer_idx = (float*)buffers[inputIndex];
        for (int b = 0; b < fcount; b++) {
            cv::Mat img = cv::imread(img_dir + "/" + file_names[f - fcount + 1 + b]);
            if (img.empty()) continue;
            imgs_buffer[b] = img;
            size_t  size_image = img.cols * img.rows * 3;
            size_t  size_image_dst = INPUT_H * INPUT_W * 3;
            //copy data to pinned memory
            memcpy(img_host,img.data,size_image);
            //copy data to device memory
            CUDA_CHECK(cudaMemcpyAsync(img_device,img_host,size_image,cudaMemcpyHostToDevice,stream));
            preprocess_kernel_img(img_device, img.cols, img.rows, buffer_idx, INPUT_W, INPUT_H, stream);       
            buffer_idx += size_image_dst;
        }
        // Run inference
        auto start = std::chrono::system_clock::now();
        doInference(*context, stream, (void**)buffers, prob, BATCH_SIZE);
        auto end = std::chrono::system_clock::now();
        std::cout << "inference time: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
        std::vector<std::vector<Yolo::Detection>> batch_res(fcount);
        for (int b = 0; b < fcount; b++) {
            auto& res = batch_res[b];
            nms(res, &prob[b * OUTPUT_SIZE], CONF_THRESH, NMS_THRESH);
        }
        for (int b = 0; b < fcount; b++) {
            auto& res = batch_res[b];
            cv::Mat img = imgs_buffer[b];
            for (size_t j = 0; j < res.size(); j++) {
                cv::Rect r = get_rect(img, res[j].bbox);
                cv::rectangle(img, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
                cv::putText(img, std::to_string((int)res[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2);
            }
            cv::imwrite("_" + file_names[f - fcount + 1 + b], img);
        }
        fcount = 0;
    }

Dims4 用法：

https://github.com/entropyfeng/yolov5s-involution/blob/0a1f46ba1b350b3cd50cfac9faa67d59ec701e7a/common.hpp

ILayer *focusWithBs(INetworkDefinition *network, std::map<std::string, Weights> &weightMap, ITensor &input, int inch, int outch,
                    int ksize, std::string lname){

    int bs=input.getDimensions().d[0];
    assert(bs);
    ISliceLayer *s1 = network->addSlice(input, Dims4{0,0, 0, 0}, Dims4{1,inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2},
                                        Dims4{1,1, 2, 2});
    ISliceLayer *s2 = network->addSlice(input, Dims4{0,0, 1, 0}, Dims4{1,inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2},
                                        Dims4{1,1, 2, 2});
    ISliceLayer *s3 = network->addSlice(input, Dims4{0,0, 0, 1}, Dims4{1,inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2},
                                        Dims4{1,1, 2, 2});
    ISliceLayer *s4 = network->addSlice(input, Dims4{0,0, 1, 1}, Dims4{1,inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2},
                                        Dims4{1,1, 2, 2});
    ITensor *inputTensors[] = {s1->getOutput(0), s2->getOutput(0), s3->getOutput(0), s4->getOutput(0)};
    auto cat = network->addConcatenation(inputTensors, 4);
    auto conv = convBlock(network, weightMap, *cat->getOutput(0), outch, ksize, 1, 1, lname + ".conv");
    return conv;
}