22 #include "objdetectdata.pb.h"
23 #include <google/protobuf/util/time_util.h>
27 using google::protobuf::util::TimeUtil;
31 bool LooksLikeTransposedYoloOutput(
const cv::Mat& out,
size_t classCount)
36 return out.dims == 3 && out.size[0] == 1 && out.size[1] >= 4 &&
37 out.size[2] > out.size[1] &&
38 (classCount == 0 || out.size[1] >= 4 +
static_cast<int>(classCount));
41 cv::Rect ScaledXYWHBox(
46 const cv::Size& frameDims,
50 if (centerX <= 1.0f && centerY <= 1.0f && width <= 1.0f && height <= 1.0f) {
51 centerX *=
static_cast<float>(frameDims.width);
52 width *=
static_cast<float>(frameDims.width);
53 centerY *=
static_cast<float>(frameDims.height);
54 height *=
static_cast<float>(frameDims.height);
56 const float xFactor =
static_cast<float>(frameDims.width) /
static_cast<float>(inputWidth);
57 const float yFactor =
static_cast<float>(frameDims.height) /
static_cast<float>(inputHeight);
64 float left = centerX - width / 2.0f;
65 float top = centerY - height / 2.0f;
66 float right = centerX + width / 2.0f;
67 float bottom = centerY + height / 2.0f;
69 left = std::max(0.0f, std::min(left,
static_cast<float>(frameDims.width - 1)));
70 top = std::max(0.0f, std::min(top,
static_cast<float>(frameDims.height - 1)));
71 right = std::max(0.0f, std::min(right,
static_cast<float>(frameDims.width)));
72 bottom = std::max(0.0f, std::min(bottom,
static_cast<float>(frameDims.height)));
75 static_cast<int>(left),
76 static_cast<int>(top),
77 std::max(0,
static_cast<int>(right - left)),
78 std::max(0,
static_cast<int>(bottom - top)));
81 std::vector<uint32_t> EncodeBinaryMaskRLE(
const std::vector<uint8_t>& mask)
83 std::vector<uint32_t> rle;
89 for (uint8_t value : mask) {
90 value = value ? 1 : 0;
91 if (value == current) {
105 cv::Mat image(mask.
height, mask.
width, CV_8UC1, cv::Scalar(0));
112 uint8_t* data = image.ptr<uint8_t>();
113 for (uint32_t count : mask.
rle) {
114 const int end = std::min(total, offset +
static_cast<int>(count));
116 std::fill(data + offset, data + end,
static_cast<uint8_t
>(1));
128 const cv::Rect_<float>& sourceBox,
129 const cv::Rect_<float>& targetBox,
130 const cv::Size& frameDims)
133 if (!sourceMask.
HasData() || sourceBox.width <= 0.0f || sourceBox.height <= 0.0f ||
134 targetBox.width <= 0.0f || targetBox.height <= 0.0f ||
135 frameDims.width <= 0 || frameDims.height <= 0) {
139 const float scaleX = sourceMask.
width /
static_cast<float>(frameDims.width);
140 const float scaleY = sourceMask.
height /
static_cast<float>(frameDims.height);
141 const cv::Rect_<float> sourceMaskBox(
142 sourceBox.x * scaleX,
143 sourceBox.y * scaleY,
144 sourceBox.width * scaleX,
145 sourceBox.height * scaleY);
146 const cv::Rect_<float> targetMaskBox(
147 targetBox.x * scaleX,
148 targetBox.y * scaleY,
149 targetBox.width * scaleX,
150 targetBox.height * scaleY);
151 if (sourceMaskBox.width <= 0.0f || sourceMaskBox.height <= 0.0f)
154 const double xScale = targetMaskBox.
width / sourceMaskBox.width;
155 const double yScale = targetMaskBox.height / sourceMaskBox.height;
156 cv::Mat transform = (cv::Mat_<double>(2, 3) <<
157 xScale, 0.0, targetMaskBox.x - xScale * sourceMaskBox.x,
158 0.0, yScale, targetMaskBox.y - yScale * sourceMaskBox.y);
160 cv::Mat source = DecodeBinaryMaskRLE(sourceMask);
163 source, transformed, transform, source.size(),
164 cv::INTER_NEAREST, cv::BORDER_CONSTANT, cv::Scalar(0));
165 if (cv::countNonZero(transformed) == 0)
170 result.
rle = EncodeBinaryMaskRLE(
171 std::vector<uint8_t>(transformed.data, transformed.data + transformed.total()));
176 const cv::Mat& prototype,
177 const std::vector<float>& coefficients,
179 const cv::Size& frameDims)
182 if (prototype.dims != 4 || prototype.size[0] != 1 ||
183 prototype.size[1] !=
static_cast<int>(coefficients.size()))
186 const int channels = prototype.size[1];
187 const int maskHeight = prototype.size[2];
188 const int maskWidth = prototype.size[3];
189 const int maskPixels = maskWidth * maskHeight;
190 const float* protoData =
reinterpret_cast<const float*
>(prototype.data);
192 const int left = std::max(0,
static_cast<int>(box.x * maskWidth /
static_cast<float>(frameDims.width)));
193 const int top = std::max(0,
static_cast<int>(box.y * maskHeight /
static_cast<float>(frameDims.height)));
194 const int right = std::min(maskWidth,
static_cast<int>((box.x + box.width) * maskWidth /
static_cast<float>(frameDims.width)));
195 const int bottom = std::min(maskHeight,
static_cast<int>((box.y + box.height) * maskHeight /
static_cast<float>(frameDims.height)));
196 if (left >= right || top >= bottom)
199 std::vector<uint8_t> binary(maskPixels, 0);
200 for (
int y = top; y < bottom; ++y) {
201 for (
int x = left; x < right; ++x) {
202 const int pixel = y * maskWidth + x;
204 for (
int channel = 0; channel < channels; ++channel) {
205 value += coefficients[channel] * protoData[channel * maskPixels + pixel];
207 binary[pixel] = value > 0.0f ? 1 : 0;
211 result.
width = maskWidth;
212 result.
height = maskHeight;
213 result.
rle = EncodeBinaryMaskRLE(binary);
217 std::string LoadONNXModel(std::string modelPath, cv::dnn::Net *net)
219 #if CV_VERSION_MAJOR < 4 || (CV_VERSION_MAJOR == 4 && CV_VERSION_MINOR < 3)
220 return std::string(
"Failed to load ONNX model: YOLO requires OpenCV 4.3.0 or newer. "
221 "This OpenCV build is ") + CV_VERSION +
".";
224 cv::dnn::Net loaded_net = cv::dnn::readNetFromONNX(modelPath);
229 }
catch (
const cv::Exception& e) {
230 std::string error_text = std::string(
"Failed to load ONNX model: ") + e.what();
231 if (error_text.find(
"Unsupported data type: FLOAT16") != std::string::npos) {
232 error_text =
"Failed to load ONNX model: FLOAT16 is not supported by this OpenCV build. "
233 "Please use an FP32 ONNX model.";
236 }
catch (
const std::exception& e) {
237 return std::string(
"Failed to load ONNX model: ") + e.what();
239 return "Failed to load ONNX model: unknown error";
247 : processingController(&processingController), processingDevice(
"CPU"), inpWidth(640), inpHeight(640), generateMasks(true){
248 confThreshold = 0.10;
255 return LoadONNXModel(modelPath,
nullptr);
258 void CVObjectDetection::setProcessingDevice(){
259 if(processingDevice ==
"GPU"){
261 const std::vector<cv::dnn::Target> targets = cv::dnn::getAvailableTargets(cv::dnn::DNN_BACKEND_CUDA);
262 if (std::find(targets.begin(), targets.end(), cv::dnn::DNN_TARGET_CUDA) != targets.end()) {
263 net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
264 net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
267 }
catch (
const cv::Exception&) {
269 processingDevice =
"CPU";
272 if(processingDevice ==
"CPU"){
273 net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
274 net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
281 start = _start; end = _end;
289 processingController->
SetError(
false,
"");
291 if(modelPath.empty()) {
292 processingController->
SetError(
true,
"Missing path to YOLO ONNX model file");
296 if(classesFile.empty()) {
297 processingController->
SetError(
true,
"Missing path to class name file");
302 std::ifstream model_file(modelPath);
303 if(!model_file.good()){
304 processingController->
SetError(
true,
"Incorrect path to YOLO ONNX model file");
308 std::ifstream classes_file(classesFile);
309 if(!classes_file.good()){
310 processingController->
SetError(
true,
"Incorrect path to class name file");
318 while (std::getline(classes_file, line)) classNames.push_back(line);
321 std::string error_text = LoadONNXModel(modelPath, &net);
322 if (!error_text.empty()) {
323 processingController->
SetError(
true, error_text);
327 setProcessingDevice();
330 if(!process_interval || end <= 1 || end-start == 0){
332 start = (int)(video.
Start() * video.
Reader()->info.fps.ToFloat());
333 end = (int)(video.
End() * video.
Reader()->info.fps.ToFloat());
336 for (frame_number = start; frame_number <= end; frame_number++)
343 std::shared_ptr<openshot::Frame> f = video.
GetFrame(frame_number);
346 cv::Mat cvimage = f->GetImageCV();
348 DetectObjects(cvimage, frame_number);
351 processingController->
SetProgress(uint(100*(frame_number-start)/(end-start)));
356 void CVObjectDetection::DetectObjects(
const cv::Mat &frame,
size_t frameId){
361 cv::dnn::blobFromImage(frame, blob, 1/255.0, cv::Size(inpWidth, inpHeight), cv::Scalar(0,0,0),
true,
false);
363 std::vector<cv::Mat> outs;
368 net.forward(outs, getOutputsNames(net));
369 }
catch (
const cv::Exception& e) {
370 processingController->
SetError(
true, std::string(
"Object detection inference failed: ") + e.what());
376 postprocess(frame.size(), outs, frameId);
382 void CVObjectDetection::postprocess(
const cv::Size &frameDims,
const std::vector<cv::Mat>& outs,
size_t frameId)
384 std::vector<int> classIds;
385 std::vector<float> confidences;
386 std::vector<cv::Rect> boxes;
387 std::vector<std::vector<ClassScore>> detectionClassScores;
388 std::vector<CVObjectMaskData> detectionMasks;
389 std::vector<int> objectIds;
390 const int maxClassCandidates = 5;
392 for (
size_t i = 0; i < outs.size(); ++i) {
393 cv::Mat det = outs[i];
395 if (LooksLikeTransposedYoloOutput(det, classNames.size())) {
396 const int attributes = det.size[1];
397 const int candidates = det.size[2];
398 const int classCount = !classNames.empty()
399 ?
static_cast<int>(classNames.size())
401 const int maskCoefficientCount = attributes - 4 - classCount;
402 const cv::Mat* prototype =
nullptr;
403 if (generateMasks && maskCoefficientCount > 0) {
404 auto prototypeIt = std::find_if(outs.begin(), outs.end(),
405 [maskCoefficientCount](
const cv::Mat& out) {
406 return out.dims == 4 && out.size[0] == 1 && out.size[1] == maskCoefficientCount;
408 if (prototypeIt != outs.end()) {
409 prototype = &(*prototypeIt);
412 const float* data =
reinterpret_cast<const float*
>(det.data);
414 for (
int candidateIndex = 0; candidateIndex < candidates; ++candidateIndex) {
415 std::vector<ClassScore> rowClassScores;
416 rowClassScores.reserve(maxClassCandidates);
418 for (
int classIndex = 0; classIndex < classCount; ++classIndex) {
419 const float classConfidence = data[(4 + classIndex) * candidates + candidateIndex];
420 if (rowClassScores.size() <
static_cast<size_t>(maxClassCandidates)) {
421 rowClassScores.emplace_back(classIndex, classConfidence);
422 std::sort(rowClassScores.begin(), rowClassScores.end(),
424 }
else if (classConfidence > rowClassScores.back().score) {
425 rowClassScores.back() =
ClassScore(classIndex, classConfidence);
426 std::sort(rowClassScores.begin(), rowClassScores.end(),
431 if (rowClassScores.empty() || rowClassScores.front().score <= confThreshold) {
435 cv::Rect box = ScaledXYWHBox(
436 data[candidateIndex],
437 data[candidates + candidateIndex],
438 data[2 * candidates + candidateIndex],
439 data[3 * candidates + candidateIndex],
440 frameDims, inpWidth, inpHeight);
441 if (box.width <= 0 || box.height <= 0) {
445 classIds.push_back(rowClassScores.front().classId);
446 confidences.push_back(rowClassScores.front().score);
447 boxes.push_back(box);
448 detectionClassScores.push_back(rowClassScores);
450 std::vector<float> coefficients;
451 coefficients.reserve(maskCoefficientCount);
452 for (
int coefficientIndex = 0; coefficientIndex < maskCoefficientCount; ++coefficientIndex) {
453 coefficients.push_back(data[(4 + classCount + coefficientIndex) * candidates + candidateIndex]);
455 detectionMasks.push_back(BuildMaskFromPrototype(*prototype, coefficients, box, frameDims));
457 detectionMasks.push_back({});
465 det = det.reshape(1, det.size[1]);
467 if (det.dims != 2 || det.cols < 6) {
471 const float xFactor =
static_cast<float>(frameDims.width) /
static_cast<float>(inpWidth);
472 const float yFactor =
static_cast<float>(frameDims.height) /
static_cast<float>(inpHeight);
474 float* data =
reinterpret_cast<float*
>(det.data);
475 for (
int j = 0; j < det.rows; ++j, data += det.cols) {
476 std::vector<ClassScore> rowClassScores;
477 rowClassScores.reserve(maxClassCandidates);
478 int classScoresEnd = det.cols;
479 if (!classNames.empty()) {
480 classScoresEnd = std::min(det.cols, 5 +
static_cast<int>(classNames.size()));
482 for (
int classIndex = 5; classIndex < classScoresEnd; ++classIndex) {
483 const float classConfidence = data[classIndex] * data[4];
484 if (rowClassScores.size() <
static_cast<size_t>(maxClassCandidates)) {
485 rowClassScores.emplace_back(classIndex - 5, classConfidence);
486 std::sort(rowClassScores.begin(), rowClassScores.end(),
488 }
else if (classConfidence > rowClassScores.back().score) {
489 rowClassScores.back() =
ClassScore(classIndex - 5, classConfidence);
490 std::sort(rowClassScores.begin(), rowClassScores.end(),
494 if (rowClassScores.empty()) {
498 float confidence = rowClassScores.front().score;
500 if (confidence > confThreshold) {
506 if (data[0] > 1.0f || data[1] > 1.0f || data[2] > 1.0f || data[3] > 1.0f) {
507 centerX =
static_cast<int>(data[0] * xFactor);
508 centerY =
static_cast<int>(data[1] * yFactor);
509 width =
static_cast<int>(data[2] * xFactor);
510 height =
static_cast<int>(data[3] * yFactor);
512 centerX =
static_cast<int>(data[0] * frameDims.width);
513 centerY =
static_cast<int>(data[1] * frameDims.height);
514 width =
static_cast<int>(data[2] * frameDims.width);
515 height =
static_cast<int>(data[3] * frameDims.height);
518 int left = centerX - width / 2;
519 int top = centerY - height / 2;
521 classIds.push_back(rowClassScores.front().classId);
522 confidences.push_back(confidence);
523 boxes.push_back(cv::Rect(left, top, width, height));
524 detectionClassScores.push_back(rowClassScores);
525 detectionMasks.push_back({});
532 std::vector<int> indices;
533 cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
536 std::vector<cv::Rect> sortBoxes;
537 std::vector<float> sortConfidences;
538 std::vector<int> sortClassIds;
539 std::vector<std::vector<ClassScore>> sortClassScores;
540 std::vector<CVObjectMaskData> sortMasks;
541 for(
auto index : indices) {
542 sortBoxes.push_back(boxes[index]);
543 sortConfidences.push_back(confidences[index]);
544 sortClassIds.push_back(classIds[index]);
545 sortClassScores.push_back(detectionClassScores[index]);
546 sortMasks.push_back(index <
static_cast<int>(detectionMasks.size()) ? detectionMasks[index] :
CVObjectMaskData());
548 sort.
update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)), sortConfidences, sortClassIds, sortClassScores);
551 boxes.clear(); confidences.clear(); classIds.clear(); objectIds.clear();
552 std::vector<CVObjectMaskData> masks;
555 if(TBox.frame == frameId){
556 boxes.push_back(TBox.box);
557 confidences.push_back(TBox.confidence);
558 classIds.push_back(TBox.classId);
559 objectIds.push_back(TBox.id);
561 double bestIoU = 0.0;
562 for (
size_t maskIndex = 0; maskIndex < sortMasks.size(); ++maskIndex) {
563 if (!sortMasks[maskIndex].HasData() || sortClassIds[maskIndex] != TBox.classId)
566 if (score > bestIoU) {
568 mask = sortMasks[maskIndex];
574 const auto recentMask = recentObjectMasks.find(TBox.id);
575 if (recentMask != recentObjectMasks.end() &&
576 frameId > recentMask->second.frameId &&
577 frameId - recentMask->second.frameId <= 5) {
578 mask = TransformMaskToBox(
579 recentMask->second.mask,
580 recentMask->second.box,
588 masks.push_back(mask);
593 for(uint i = 0; i<boxes.size(); i++){
594 for(uint j = i+1; j<boxes.size(); j++){
595 int xc_1 = boxes[i].x + (int)(boxes[i].width/2), yc_1 = boxes[i].y + (int)(boxes[i].height/2);
596 int xc_2 = boxes[j].x + (int)(boxes[j].width/2), yc_2 = boxes[j].y + (int)(boxes[j].height/2);
598 if(fabs(xc_1 - xc_2) < 10 && fabs(yc_1 - yc_2) < 10){
599 if(classIds[i] == classIds[j]){
600 if(confidences[i] >= confidences[j]){
601 boxes.erase(boxes.begin() + j);
602 classIds.erase(classIds.begin() + j);
603 confidences.erase(confidences.begin() + j);
604 objectIds.erase(objectIds.begin() + j);
605 masks.erase(masks.begin() + j);
609 boxes.erase(boxes.begin() + i);
610 classIds.erase(classIds.begin() + i);
611 confidences.erase(confidences.begin() + i);
612 objectIds.erase(objectIds.begin() + i);
613 masks.erase(masks.begin() + i);
623 for(uint i = 0; i<boxes.size(); i++){
624 for(uint j = i+1; j<boxes.size(); j++){
626 if( iou(boxes[i], boxes[j])){
627 if(classIds[i] == classIds[j]){
628 if(confidences[i] >= confidences[j]){
629 boxes.erase(boxes.begin() + j);
630 classIds.erase(classIds.begin() + j);
631 confidences.erase(confidences.begin() + j);
632 objectIds.erase(objectIds.begin() + j);
633 masks.erase(masks.begin() + j);
637 boxes.erase(boxes.begin() + i);
638 classIds.erase(classIds.begin() + i);
639 confidences.erase(confidences.begin() + i);
640 objectIds.erase(objectIds.begin() + i);
641 masks.erase(masks.begin() + i);
651 std::vector<cv::Rect_<float>> normalized_boxes;
652 for(
auto box : boxes){
653 cv::Rect_<float> normalized_box;
654 normalized_box.x = (box.x)/(
float)frameDims.
width;
655 normalized_box.y = (box.y)/(
float)frameDims.height;
656 normalized_box.width = (box.width)/(
float)frameDims.width;
657 normalized_box.height = (box.height)/(
float)frameDims.height;
658 normalized_boxes.push_back(normalized_box);
665 bool CVObjectDetection::iou(cv::Rect pred_box, cv::Rect sort_box){
667 int xA = std::max(pred_box.x, sort_box.x);
668 int yA = std::max(pred_box.y, sort_box.y);
669 int xB = std::min(pred_box.x + pred_box.width, sort_box.x + sort_box.width);
670 int yB = std::min(pred_box.y + pred_box.height, sort_box.y + sort_box.height);
673 int interArea = std::max(0, xB - xA + 1) * std::max(0, yB - yA + 1);
676 int boxAArea = (pred_box.width + 1) * (pred_box.height + 1);
677 int boxBArea = (sort_box.width + 1) * (sort_box.height + 1);
680 float iou = interArea / (float)(boxAArea + boxBArea - interArea);
689 std::vector<cv::String> CVObjectDetection::getOutputsNames(
const cv::dnn::Net& net)
692 std::vector<int> outLayers = net.getUnconnectedOutLayers();
695 std::vector<cv::String> layersNames = net.getLayerNames();
698 std::vector<cv::String> names;
699 names.resize(outLayers.size());
700 for (
size_t i = 0; i < outLayers.size(); ++i)
701 names[i] = layersNames[outLayers[i] - 1];
716 void CVObjectDetection::NormalizeTrackedClasses()
718 struct ClassEvidence {
719 float confidenceSum = 0.0f;
723 std::map<int, std::map<int, ClassEvidence>> objectClassEvidence;
726 const size_t detectionCount = std::min(detections.
objectIds.size(), detections.
classIds.size());
727 for (
size_t i = 0; i < detectionCount; ++i) {
729 ClassEvidence& evidence = objectClassEvidence[detections.
objectIds[i]][detections.
classIds[i]];
730 evidence.confidenceSum += confidence;
735 std::map<int, int> dominantClassByObject;
736 for (
const auto& objectEvidence : objectClassEvidence) {
737 const int objectId = objectEvidence.first;
738 int bestClassId = -1;
739 ClassEvidence bestEvidence;
740 for (
const auto& classEvidence : objectEvidence.second) {
741 const int classId = classEvidence.first;
742 const ClassEvidence& evidence = classEvidence.second;
743 if (bestClassId < 0 ||
744 evidence.confidenceSum > bestEvidence.confidenceSum ||
745 (evidence.confidenceSum == bestEvidence.confidenceSum && evidence.count > bestEvidence.count)) {
746 bestClassId = classId;
747 bestEvidence = evidence;
750 if (bestClassId >= 0) {
751 dominantClassByObject[objectId] = bestClassId;
757 const size_t detectionCount = std::min(detections.
objectIds.size(), detections.
classIds.size());
758 for (
size_t i = 0; i < detectionCount; ++i) {
759 const auto dominantClass = dominantClassByObject.find(detections.
objectIds[i]);
760 if (dominantClass != dominantClassByObject.end()) {
761 detections.
classIds[i] = dominantClass->second;
768 if(protobuf_data_path.empty()) {
769 cerr <<
"Missing path to object detection protobuf data file." << endl;
773 NormalizeTrackedClasses();
776 pb_objdetect::ObjDetect objMessage;
779 for(
int i = 0; i<classNames.size(); i++){
780 std::string* className = objMessage.add_classnames();
781 className->assign(classNames.at(i));
791 *objMessage.mutable_last_updated() = TimeUtil::SecondsToTimestamp(time(NULL));
795 std::fstream output(protobuf_data_path, ios::out | ios::trunc | ios::binary);
796 if (!objMessage.SerializeToOstream(&output)) {
797 cerr <<
"Failed to write protobuf message." << endl;
803 google::protobuf::ShutdownProtobufLibrary();
813 pbFrameData->set_id(dData.
frameId);
815 for(
size_t i = 0; i < dData.
boxes.size(); i++){
816 pb_objdetect::Frame_Box* box = pbFrameData->add_bounding_box();
819 box->set_x(dData.
boxes.at(i).x);
820 box->set_y(dData.
boxes.at(i).y);
821 box->set_w(dData.
boxes.at(i).width);
822 box->set_h(dData.
boxes.at(i).height);
823 box->set_classid(dData.
classIds.at(i));
825 box->set_objectid(dData.
objectIds.at(i));
827 if (i < dData.
masks.size() && dData.
masks.at(i).HasData()) {
828 pb_objdetect::Frame_Box_Mask* mask = box->mutable_mask();
829 mask->set_width(dData.
masks.at(i).width);
830 mask->set_height(dData.
masks.at(i).height);
831 for (uint32_t count : dData.
masks.at(i).rle) {
832 mask->add_rle(count);
848 catch (
const std::exception& e)
852 std::cout<<
"JSON is invalid (missing keys or invalid data types)"<<std::endl;
860 if (!root[
"protobuf_data_path"].isNull()){
861 protobuf_data_path = (root[
"protobuf_data_path"].asString());
864 if (!root[
"processing-device"].isNull()){
865 processingDevice = (root[
"processing-device"].asString());
867 if (!root[
"processing_device"].isNull()){
868 processingDevice = (root[
"processing_device"].asString());
870 if (!root[
"class-names"].isNull()){
871 classesFile = (root[
"class-names"].asString());
873 if (!root[
"classes_file"].isNull()){
874 classesFile = (root[
"classes_file"].asString());
876 if (!root[
"model"].isNull()){
877 modelPath = (root[
"model"].asString());
879 if (!root[
"model_path"].isNull()){
880 modelPath = (root[
"model_path"].asString());
882 if (!root[
"input-width"].isNull()){
883 inpWidth = root[
"input-width"].asInt();
885 if (!root[
"input_width"].isNull()){
886 inpWidth = root[
"input_width"].asInt();
888 if (!root[
"input-height"].isNull()){
889 inpHeight = root[
"input-height"].asInt();
891 if (!root[
"input_height"].isNull()){
892 inpHeight = root[
"input_height"].asInt();
894 if (!root[
"confidence-threshold"].isNull()){
895 confThreshold = root[
"confidence-threshold"].asFloat();
897 if (!root[
"confidence_threshold"].isNull()){
898 confThreshold = root[
"confidence_threshold"].asFloat();
900 if (!root[
"nms-threshold"].isNull()){
901 nmsThreshold = root[
"nms-threshold"].asFloat();
903 if (!root[
"nms_threshold"].isNull()){
904 nmsThreshold = root[
"nms_threshold"].asFloat();
906 if (!root[
"generate-masks"].isNull()){
907 generateMasks = root[
"generate-masks"].asBool();
909 if (!root[
"generate_masks"].isNull()){
910 generateMasks = root[
"generate_masks"].asBool();
922 if(protobuf_data_path.empty()) {
923 cerr <<
"Missing path to object detection protobuf data file." << endl;
928 pb_objdetect::ObjDetect objMessage;
932 fstream input(protobuf_data_path, ios::in | ios::binary);
933 if (!objMessage.ParseFromIstream(&input)) {
934 cerr <<
"Failed to parse protobuf message." << endl;
943 for(
int i = 0; i < objMessage.classnames_size(); i++){
944 classNames.push_back(objMessage.classnames(i));
948 for (
size_t i = 0; i < objMessage.frame_size(); i++) {
950 const pb_objdetect::Frame& pbFrameData = objMessage.frame(i);
953 size_t id = pbFrameData.id();
956 const google::protobuf::RepeatedPtrField<pb_objdetect::Frame_Box > &pBox = pbFrameData.bounding_box();
959 std::vector<int> classIds;
960 std::vector<float> confidences;
961 std::vector<cv::Rect_<float>> boxes;
962 std::vector<int> objectIds;
963 std::vector<CVObjectMaskData> masks;
965 for(
int i = 0; i < pbFrameData.bounding_box_size(); i++){
967 float x = pBox.Get(i).x();
float y = pBox.Get(i).y();
968 float w = pBox.Get(i).w();
float h = pBox.Get(i).h();
970 cv::Rect_<float> box(x, y, w, h);
973 int classId = pBox.Get(i).classid();
float confidence = pBox.Get(i).confidence();
975 int objectId = pBox.Get(i).objectid();
978 boxes.push_back(box); classIds.push_back(classId); confidences.push_back(confidence);
979 objectIds.push_back(objectId);
981 if (pBox.Get(i).has_mask()) {
982 mask.
width = pBox.Get(i).mask().width();
983 mask.
height = pBox.Get(i).mask().height();
984 for (
int rleIndex = 0; rleIndex < pBox.Get(i).mask().rle_size(); ++rleIndex) {
985 mask.
rle.push_back(pBox.Get(i).mask().rle(rleIndex));
988 masks.push_back(mask);
996 google::protobuf::ShutdownProtobufLibrary();