openvino部署YOLOv3

最新推荐文章于 2025-06-20 09:15:07 发布
刀么克瑟拉莫
最新推荐文章于 2025-06-20 09:15:07 发布
阅读量686
点赞数 1
CC 4.0 BY-SA版权
分类专栏： deeplearning openvino
本文链接：https://blog.csdn.net/random_repick/article/details/116157879
deeplearning 同时被 2 个专栏收录
77 篇文章
订阅专栏
openvino
7 篇文章
订阅专栏
本文介绍如何使用OpenCV库配合YOLOv3模型进行图像目标检测，包括数据预处理、模型解析、输出解析与后处理，展示了从图像到检测结果的完整流程。
摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >
#include <inference_engine.hpp>
#include <samples/ocv_common.hpp> // OpenCV库及matU8ToBlob函数定义
#include <ngraph/ngraph.hpp>
#include <string>
#include <sys/time.h>
#include <iostream>

using namespace InferenceEngine;
using namespace std;

void FrameToBlob(const cv::Mat &frame, InferRequest::Ptr &inferRequest, const std::string &inputName)
{
    /* Resize and copy data from the image to the input blob */
    Blob::Ptr frameBlob = inferRequest->GetBlob(inputName);
    matU8ToBlob<uint8_t>(frame, frameBlob);
}

static int EntryIndex(int side, int lcoords, int lclasses, int location, int entry)
{
    int n = location / (side * side);
    int loc = location % (side * side);
    return n * side * side * (lcoords + lclasses + 1) + entry * side * side + loc;
}

struct DetectionObject
{
    int xmin, ymin, xmax, ymax, class_id;
    float confidence;

    DetectionObject(double x, double y, double h, double w, int class_id, float confidence, float h_scale, float w_scale)
    {
        this->xmin = static_cast<int>((x - w / 2) * w_scale);
        this->ymin = static_cast<int>((y - h / 2) * h_scale);
        this->xmax = static_cast<int>(this->xmin + w * w_scale);
        this->ymax = static_cast<int>(this->ymin + h * h_scale);
        this->class_id = class_id;
        this->confidence = confidence;
    }

    bool operator<(const DetectionObject &s2) const
    {
        return this->confidence < s2.confidence;
    }
    bool operator>(const DetectionObject &s2) const
    {
        return this->confidence > s2.confidence;
    }
};

double IntersectionOverUnion(const DetectionObject &box_1, const DetectionObject &box_2)
{
    double width_of_overlap_area = fmin(box_1.xmax, box_2.xmax) - fmax(box_1.xmin, box_2.xmin);
    double height_of_overlap_area = fmin(box_1.ymax, box_2.ymax) - fmax(box_1.ymin, box_2.ymin);
    double area_of_overlap;
    if (width_of_overlap_area < 0 || height_of_overlap_area < 0)
        area_of_overlap = 0;
    else
        area_of_overlap = width_of_overlap_area * height_of_overlap_area;
    double box_1_area = (box_1.ymax - box_1.ymin) * (box_1.xmax - box_1.xmin);
    double box_2_area = (box_2.ymax - box_2.ymin) * (box_2.xmax - box_2.xmin);
    double area_of_union = box_1_area + box_2_area - area_of_overlap;
    return area_of_overlap / area_of_union;
}

class YoloParams
{
    template <typename T>
    void computeAnchors(const std::vector<T> &mask)
    {
        std::vector<float> maskedAnchors(num * 2);
        for (int i = 0; i < num; ++i)
        {
            maskedAnchors[i * 2] = anchors[mask[i] * 2];
            maskedAnchors[i * 2 + 1] = anchors[mask[i] * 2 + 1];
        }
        anchors = maskedAnchors;
    }

public:
    int num = 0, classes = 0, coords = 0;
    std::vector<float> anchors = {10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0,
                                  156.0, 198.0, 373.0, 326.0};

    YoloParams() {}

    YoloParams(const std::shared_ptr<ngraph::op::RegionYolo> regionYolo)
    {
        coords = regionYolo->get_num_coords();
        classes = regionYolo->get_num_classes();
        anchors = regionYolo->get_anchors();
        auto mask = regionYolo->get_mask();
        num = mask.size();

        computeAnchors(mask);
    }

    YoloParams(CNNLayer::Ptr layer)
    {
        if (layer->type != "RegionYolo")
            throw std::runtime_error("Invalid output type: " + layer->type + ". RegionYolo expected");

        num = layer->GetParamAsInt("num");
        coords = layer->GetParamAsInt("coords");
        classes = layer->GetParamAsInt("classes");

        try
        {
            anchors = layer->GetParamAsFloats("anchors");
        }
        catch (...)
        {
        }
        try
        {
            auto mask = layer->GetParamAsInts("mask");
            num = mask.size();

            computeAnchors(mask);
        }
        catch (...)
        {
        }
    }
};

void ParseYOLOV3Output(const CNNNetwork &cnnNetwork, const std::string &output_name,
                       const Blob::Ptr &blob, const unsigned long resized_im_h,
                       const unsigned long resized_im_w, const unsigned long original_im_h,
                       const unsigned long original_im_w,
                       const double threshold, std::vector<DetectionObject> &objects)
{

    const int out_blob_h = static_cast<int>(blob->getTensorDesc().getDims()[2]);
    const int out_blob_w = static_cast<int>(blob->getTensorDesc().getDims()[3]);
    if (out_blob_h != out_blob_w)
        throw std::runtime_error("Invalid size of output " + output_name +
                                 " It should be in NCHW layout and H should be equal to W. Current H = " + std::to_string(out_blob_h) +
                                 ", current W = " + std::to_string(out_blob_h));

    // --------------------------- Extracting layer parameters -------------------------------------
    YoloParams params;
    if (auto ngraphFunction = cnnNetwork.getFunction())
    {
        for (const auto op : ngraphFunction->get_ops())
        {
            if (op->get_friendly_name() == output_name)
            {
                auto regionYolo = std::dynamic_pointer_cast<ngraph::op::RegionYolo>(op);
                if (!regionYolo)
                {
                    throw std::runtime_error("Invalid output type: " +
                                             std::string(regionYolo->get_type_info().name) + ". RegionYolo expected");
                }

                params = regionYolo;
                break;
            }
        }
    }
    else
    {
        throw std::runtime_error("Can't get ngraph::Function. Make sure the provided model is in IR version 10 or greater.");
    }

    auto side = out_blob_h;
    auto side_square = side * side;
    const float *output_blob = blob->buffer().as<PrecisionTrait<Precision::FP32>::value_type *>();
    // --------------------------- Parsing YOLO Region output -------------------------------------
    for (int i = 0; i < side_square; ++i)
    {
        int row = i / side;
        int col = i % side;
        for (int n = 0; n < params.num; ++n)
        {
            int obj_index = EntryIndex(side, params.coords, params.classes, n * side * side + i, params.coords);
            int box_index = EntryIndex(side, params.coords, params.classes, n * side * side + i, 0);
            float scale = output_blob[obj_index];
            if (scale < threshold)
                continue;
            double x = (col + output_blob[box_index + 0 * side_square]) / side * resized_im_w;
            double y = (row + output_blob[box_index + 1 * side_square]) / side * resized_im_h;
            double height = std::exp(output_blob[box_index + 3 * side_square]) * params.anchors[2 * n + 1];
            double width = std::exp(output_blob[box_index + 2 * side_square]) * params.anchors[2 * n];
            for (int j = 0; j < params.classes; ++j)
            {
                int class_index = EntryIndex(side, params.coords, params.classes, n * side_square + i, params.coords + 1 + j);
                float prob = scale * output_blob[class_index];
                if (prob < threshold)
                    continue;
                DetectionObject obj(x, y, height, width, j, prob,
                                    static_cast<float>(original_im_h) / static_cast<float>(resized_im_h),
                                    static_cast<float>(original_im_w) / static_cast<float>(resized_im_w));
                objects.push_back(obj);
            }
        }
    }
}

int main(int argc, char **argv)
{
    string device(argv[1]);
    string xml_file("../model/frozen_darknet_yolov3_model.xml");
    string image_name("../mmgg.jpg");
    float thr = 0.1;
    cout << "1.Loading Inference Engine" << endl;
    Core ie;
    cout << ie.GetVersions(device) << endl;
    ;
    cout << "2.Loading network files" << endl;
    auto cnnNetwork = ie.ReadNetwork(xml_file);
    // cnnNetwork.setBatchSize(1);
    cout << "3.Checking that the inputs are as the demo expects" << endl;
    InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
    if (inputInfo.size() != 1)
    {
        throw std::logic_error("This demo accepts networks that have only one input");
    }
    InputInfo::Ptr &input = inputInfo.begin()->second;
    auto inputName = inputInfo.begin()->first;
    input->setPrecision(Precision::U8);
    input->getInputData()->setLayout(Layout::NCHW);
    ICNNNetwork::InputShapes inputShapes = cnnNetwork.getInputShapes();
    SizeVector &inSizeVector = inputShapes.begin()->second;
    inSizeVector[0] = 1; // set batch to 1
    cnnNetwork.reshape(inputShapes);
    OutputsDataMap outputInfo(cnnNetwork.getOutputsInfo());
    for (auto &output : outputInfo)
    {
        output.second->setPrecision(Precision::FP32);
        output.second->setLayout(Layout::NCHW);
    }
    cout << "4.Loading model to the device" << endl;
    ExecutableNetwork network = ie.LoadNetwork(cnnNetwork, device);
    cout << "5. Creating infer request" << endl;
    InferRequest::Ptr async_infer_request_curr = network.CreateInferRequestPtr();
    cout << "6. Start inference " << endl;
    cv::Mat frame = cv::imread(image_name);
    int height = frame.rows, width = frame.cols;
    FrameToBlob(frame, async_infer_request_curr, inputName);
    struct timeval start, end;
    gettimeofday(&start, NULL);
    async_infer_request_curr->Infer();
    gettimeofday(&end, NULL);
    std::cout << (end.tv_usec-start.tv_usec)/1000000.0 + end.tv_sec-start.tv_sec << std::endl;
    cout << "7.Process output blobs..." << std::endl;
    const TensorDesc &inputDesc = inputInfo.begin()->second.get()->getTensorDesc();
    unsigned long resized_im_h = getTensorHeight(inputDesc);
    unsigned long resized_im_w = getTensorWidth(inputDesc);
    std::vector<DetectionObject> objects;
    // Parsing outputs
    for (auto &output : outputInfo)
    {
        auto output_name = output.first;
        Blob::Ptr blob = async_infer_request_curr->GetBlob(output_name);
        ParseYOLOV3Output(cnnNetwork, output_name, blob, resized_im_h, resized_im_w, height, width, thr, objects);
    }
    // Filtering overlapping boxes
    std::sort(objects.begin(), objects.end(), std::greater<DetectionObject>());
    for (size_t i = 0; i < objects.size(); ++i)
    {
        if (objects[i].confidence == 0)
            continue;
        for (size_t j = i + 1; j < objects.size(); ++j)
            if (IntersectionOverUnion(objects[i], objects[j]) >= 0.4)
                objects[j].confidence = 0;
    }
    for (auto &object : objects)
    {
        if (object.confidence < thr)
            continue;
        auto label = object.class_id;
        float confidence = object.confidence;
        cv::putText(frame, std::to_string(label), cv::Point2f(static_cast<float>(object.xmin) + 15, static_cast<float>(object.ymin + 15)),
                    cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(0, 0, 255));
        cv::rectangle(frame, cv::Point2f(static_cast<float>(object.xmin), static_cast<float>(object.ymin)),
                      cv::Point2f(static_cast<float>(object.xmax), static_cast<float>(object.ymax)), cv::Scalar(0, 0, 255));
        cout << endl;
    }
    cv::imshow("Detection results", frame);
    cv::waitKey();

    return 0;
}