#include <inference_engine.hpp>
#include <samples/ocv_common.hpp> // OpenCV库及matU8ToBlob函数定义
#include <ngraph/ngraph.hpp>
#include <string>
#include <sys/time.h>
#include <iostream>
using namespace InferenceEngine;
using namespace std;
void FrameToBlob(const cv::Mat &frame, InferRequest::Ptr &inferRequest, const std::string &inputName)
{
/* Resize and copy data from the image to the input blob */
Blob::Ptr frameBlob = inferRequest->GetBlob(inputName);
matU8ToBlob<uint8_t>(frame, frameBlob);
}
static int EntryIndex(int side, int lcoords, int lclasses, int location, int entry)
{
int n = location / (side * side);
int loc = location % (side * side);
return n * side * side * (lcoords + lclasses + 1) + entry * side * side + loc;
}
struct DetectionObject
{
int xmin, ymin, xmax, ymax, class_id;
float confidence;
DetectionObject(double x, double y, double h, double w, int class_id, float confidence, float h_scale, float w_scale)
{
this->xmin = static_cast<int>((x - w / 2) * w_scale);
this->ymin = static_cast<int>((y - h / 2) * h_scale);
this->xmax = static_cast<int>(this->xmin + w * w_scale);
this->ymax = static_cast<int>(this->ymin + h * h_scale);
this->class_id = class_id;
this->confidence = confidence;
}
bool operator<(const DetectionObject &s2) const
{
return this->confidence < s2.confidence;
}
bool operator>(const DetectionObject &s2) const
{
return this->confidence > s2.confidence;
}
};
double IntersectionOverUnion(const DetectionObject &box_1, const DetectionObject &box_2)
{
double width_of_overlap_area = fmin(box_1.xmax, box_2.xmax) - fmax(box_1.xmin, box_2.xmin);
double height_of_overlap_area = fmin(box_1.ymax, box_2.ymax) - fmax(box_1.ymin, box_2.ymin);
double area_of_overlap;
if (width_of_overlap_area < 0 || height_of_overlap_area < 0)
area_of_overlap = 0;
else
area_of_overlap = width_of_overlap_area * height_of_overlap_area;
double box_1_area = (box_1.ymax - box_1.ymin) * (box_1.xmax - box_1.xmin);
double box_2_area = (box_2.ymax - box_2.ymin) * (box_2.xmax - box_2.xmin);
double area_of_union = box_1_area + box_2_area - area_of_overlap;
return area_of_overlap / area_of_union;
}
class YoloParams
{
template <typename T>
void computeAnchors(const std::vector<T> &mask)
{
std::vector<float> maskedAnchors(num * 2);
for (int i = 0; i < num; ++i)
{
maskedAnchors[i * 2] = anchors[mask[i] * 2];
maskedAnchors[i * 2 + 1] = anchors[mask[i] * 2 + 1];
}
anchors = maskedAnchors;
}
public:
int num = 0, classes = 0, coords = 0;
std::vector<float> anchors = {10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0,
156.0, 198.0, 373.0, 326.0};
YoloParams() {}
YoloParams(const std::shared_ptr<ngraph::op::RegionYolo> regionYolo)
{
coords = regionYolo->get_num_coords();
classes = regionYolo->get_num_classes();
anchors = regionYolo->get_anchors();
auto mask = regionYolo->get_mask();
num = mask.size();
computeAnchors(mask);
}
YoloParams(CNNLayer::Ptr layer)
{
if (layer->type != "RegionYolo")
throw std::runtime_error("Invalid output type: " + layer->type + ". RegionYolo expected");
num = layer->GetParamAsInt("num");
coords = layer->GetParamAsInt("coords");
classes = layer->GetParamAsInt("classes");
try
{
anchors = layer->GetParamAsFloats("anchors");
}
catch (...)
{
}
try
{
auto mask = layer->GetParamAsInts("mask");
num = mask.size();
computeAnchors(mask);
}
catch (...)
{
}
}
};
void ParseYOLOV3Output(const CNNNetwork &cnnNetwork, const std::string &output_name,
const Blob::Ptr &blob, const unsigned long resized_im_h,
const unsigned long resized_im_w, const unsigned long original_im_h,
const unsigned long original_im_w,
const double threshold, std::vector<DetectionObject> &objects)
{
const int out_blob_h = static_cast<int>(blob->getTensorDesc().getDims()[2]);
const int out_blob_w = static_cast<int>(blob->getTensorDesc().getDims()[3]);
if (out_blob_h != out_blob_w)
throw std::runtime_error("Invalid size of output " + output_name +
" It should be in NCHW layout and H should be equal to W. Current H = " + std::to_string(out_blob_h) +
", current W = " + std::to_string(out_blob_h));
// --------------------------- Extracting layer parameters -------------------------------------
YoloParams params;
if (auto ngraphFunction = cnnNetwork.getFunction())
{
for (const auto op : ngraphFunction->get_ops())
{
if (op->get_friendly_name() == output_name)
{
auto regionYolo = std::dynamic_pointer_cast<ngraph::op::RegionYolo>(op);
if (!regionYolo)
{
throw std::runtime_error("Invalid output type: " +
std::string(regionYolo->get_type_info().name) + ". RegionYolo expected");
}
params = regionYolo;
break;
}
}
}
else
{
throw std::runtime_error("Can't get ngraph::Function. Make sure the provided model is in IR version 10 or greater.");
}
auto side = out_blob_h;
auto side_square = side * side;
const float *output_blob = blob->buffer().as<PrecisionTrait<Precision::FP32>::value_type *>();
// --------------------------- Parsing YOLO Region output -------------------------------------
for (int i = 0; i < side_square; ++i)
{
int row = i / side;
int col = i % side;
for (int n = 0; n < params.num; ++n)
{
int obj_index = EntryIndex(side, params.coords, params.classes, n * side * side + i, params.coords);
int box_index = EntryIndex(side, params.coords, params.classes, n * side * side + i, 0);
float scale = output_blob[obj_index];
if (scale < threshold)
continue;
double x = (col + output_blob[box_index + 0 * side_square]) / side * resized_im_w;
double y = (row + output_blob[box_index + 1 * side_square]) / side * resized_im_h;
double height = std::exp(output_blob[box_index + 3 * side_square]) * params.anchors[2 * n + 1];
double width = std::exp(output_blob[box_index + 2 * side_square]) * params.anchors[2 * n];
for (int j = 0; j < params.classes; ++j)
{
int class_index = EntryIndex(side, params.coords, params.classes, n * side_square + i, params.coords + 1 + j);
float prob = scale * output_blob[class_index];
if (prob < threshold)
continue;
DetectionObject obj(x, y, height, width, j, prob,
static_cast<float>(original_im_h) / static_cast<float>(resized_im_h),
static_cast<float>(original_im_w) / static_cast<float>(resized_im_w));
objects.push_back(obj);
}
}
}
}
int main(int argc, char **argv)
{
string device(argv[1]);
string xml_file("../model/frozen_darknet_yolov3_model.xml");
string image_name("../mmgg.jpg");
float thr = 0.1;
cout << "1.Loading Inference Engine" << endl;
Core ie;
cout << ie.GetVersions(device) << endl;
;
cout << "2.Loading network files" << endl;
auto cnnNetwork = ie.ReadNetwork(xml_file);
// cnnNetwork.setBatchSize(1);
cout << "3.Checking that the inputs are as the demo expects" << endl;
InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
if (inputInfo.size() != 1)
{
throw std::logic_error("This demo accepts networks that have only one input");
}
InputInfo::Ptr &input = inputInfo.begin()->second;
auto inputName = inputInfo.begin()->first;
input->setPrecision(Precision::U8);
input->getInputData()->setLayout(Layout::NCHW);
ICNNNetwork::InputShapes inputShapes = cnnNetwork.getInputShapes();
SizeVector &inSizeVector = inputShapes.begin()->second;
inSizeVector[0] = 1; // set batch to 1
cnnNetwork.reshape(inputShapes);
OutputsDataMap outputInfo(cnnNetwork.getOutputsInfo());
for (auto &output : outputInfo)
{
output.second->setPrecision(Precision::FP32);
output.second->setLayout(Layout::NCHW);
}
cout << "4.Loading model to the device" << endl;
ExecutableNetwork network = ie.LoadNetwork(cnnNetwork, device);
cout << "5. Creating infer request" << endl;
InferRequest::Ptr async_infer_request_curr = network.CreateInferRequestPtr();
cout << "6. Start inference " << endl;
cv::Mat frame = cv::imread(image_name);
int height = frame.rows, width = frame.cols;
FrameToBlob(frame, async_infer_request_curr, inputName);
struct timeval start, end;
gettimeofday(&start, NULL);
async_infer_request_curr->Infer();
gettimeofday(&end, NULL);
std::cout << (end.tv_usec-start.tv_usec)/1000000.0 + end.tv_sec-start.tv_sec << std::endl;
cout << "7.Process output blobs..." << std::endl;
const TensorDesc &inputDesc = inputInfo.begin()->second.get()->getTensorDesc();
unsigned long resized_im_h = getTensorHeight(inputDesc);
unsigned long resized_im_w = getTensorWidth(inputDesc);
std::vector<DetectionObject> objects;
// Parsing outputs
for (auto &output : outputInfo)
{
auto output_name = output.first;
Blob::Ptr blob = async_infer_request_curr->GetBlob(output_name);
ParseYOLOV3Output(cnnNetwork, output_name, blob, resized_im_h, resized_im_w, height, width, thr, objects);
}
// Filtering overlapping boxes
std::sort(objects.begin(), objects.end(), std::greater<DetectionObject>());
for (size_t i = 0; i < objects.size(); ++i)
{
if (objects[i].confidence == 0)
continue;
for (size_t j = i + 1; j < objects.size(); ++j)
if (IntersectionOverUnion(objects[i], objects[j]) >= 0.4)
objects[j].confidence = 0;
}
for (auto &object : objects)
{
if (object.confidence < thr)
continue;
auto label = object.class_id;
float confidence = object.confidence;
cv::putText(frame, std::to_string(label), cv::Point2f(static_cast<float>(object.xmin) + 15, static_cast<float>(object.ymin + 15)),
cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(0, 0, 255));
cv::rectangle(frame, cv::Point2f(static_cast<float>(object.xmin), static_cast<float>(object.ymin)),
cv::Point2f(static_cast<float>(object.xmax), static_cast<float>(object.ymax)), cv::Scalar(0, 0, 255));
cout << endl;
}
cv::imshow("Detection results", frame);
cv::waitKey();
return 0;
}
openvino部署YOLOv3
最新推荐文章于 2025-06-20 09:15:07 发布