[CORE] Minor changes to handle YoloV11

lagadic · Oct 16, 2024 · 1272b22 · 1272b22
1 parent a65eebc
commit 1272b22
Show file tree

Hide file tree

Showing 2 changed files with 16 additions and 10 deletions.
diff --git a/modules/detection/include/visp3/detection/vpDetectorDNNOpenCV.h b/modules/detection/include/visp3/detection/vpDetectorDNNOpenCV.h
@@ -74,6 +74,7 @@ BEGIN_VISP_NAMESPACE
  * - Yolo v5, see usage to detect objects belonging to the COCO dataset using \ref dnn_supported_yolov5 network
  * - Yolo v7, see usage to detect objects belonging to the COCO dataset using \ref dnn_supported_yolov7 network
  * - Yolo v8, see usage to detect objects belonging to the COCO dataset using \ref dnn_supported_yolov8 network
+ * - Yolo v11, see usage to detect objects belonging to the COCO dataset using \ref dnn_supported_yolov11 network
  *
  * This class can be initialized from a JSON file if ViSP has been compiled with NLOHMANN JSON (see \ref soft_tool_json to see how to do it).
  * Examples of such JSON files can be found in the tutorial folder.
@@ -98,8 +99,9 @@ class VISP_EXPORT vpDetectorDNNOpenCV
     YOLO_V4 = 5, /*!< The \b vpDetectorDNNOpenCV object will use the parsing method corresponding to a YoloV4 DNN. See \b vpDetectorDNNOpenCV::postProcess_YoloV3_V4 for more information.*/
     YOLO_V5 = 6, /*!< The \b vpDetectorDNNOpenCV object will use the parsing method corresponding to a YoloV5 DNN. See \b vpDetectorDNNOpenCV::postProcess_YoloV5_V7 for more information.*/
     YOLO_V7 = 7, /*!< The \b vpDetectorDNNOpenCV object will use the parsing method corresponding to a YoloV7 DNN. See \b vpDetectorDNNOpenCV::postProcess_YoloV5_V7 for more information.*/
-    YOLO_V8 = 8, /*!< The \b vpDetectorDNNOpenCV object will use the parsing method corresponding to a YoloV8 DNN. See \b vpDetectorDNNOpenCV::postProcess_YoloV8 for more information.*/
-    COUNT = 9 /*!< The number of parsing method that come along with the \b vpDetectorDNNOpenCV class.*/
+    YOLO_V8 = 8, /*!< The \b vpDetectorDNNOpenCV object will use the parsing method corresponding to a YoloV8 DNN. See \b vpDetectorDNNOpenCV::postProcess_YoloV8_V11 for more information.*/
+    YOLO_V11 = 9, /*!< The \b vpDetectorDNNOpenCV object will use the parsing method corresponding to a YoloV11 DNN. See \b vpDetectorDNNOpenCV::postProcess_YoloV8_V11 for more information.*/
+    COUNT = 10 /*!< The number of parsing method that come along with the \b vpDetectorDNNOpenCV class.*/
   } DNNResultsParsingType;
 
   typedef struct DetectionCandidates
@@ -560,7 +562,7 @@ class VISP_EXPORT vpDetectorDNNOpenCV
 
   void postProcess_YoloV5_V7(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes, const NetConfig &netConfig);
 
-  void postProcess_YoloV8(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes, const NetConfig &netConfig);
+  void postProcess_YoloV8_V11(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes, const NetConfig &netConfig);
 
   void postProcess_FasterRCNN(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes, const NetConfig &netConfig);
 

diff --git a/modules/detection/src/dnn/vpDetectorDNNOpenCV.cpp b/modules/detection/src/dnn/vpDetectorDNNOpenCV.cpp
@@ -50,7 +50,7 @@ BEGIN_VISP_NAMESPACE
  *
  * \return std::string The list of the supported parsing methods / types of DNNs.
  */
-std::string vpDetectorDNNOpenCV::getAvailableDnnResultsParsingTypes()
+  std::string vpDetectorDNNOpenCV::getAvailableDnnResultsParsingTypes()
 {
   std::string list = "[";
   for (unsigned int i = 0; i < vpDetectorDNNOpenCV::COUNT - 1; i++) {
@@ -88,6 +88,9 @@ std::string vpDetectorDNNOpenCV::dnnResultsParsingTypeToString(const DNNResultsP
   case YOLO_V8:
     name = "yolov8";
     break;
+  case YOLO_V11:
+    name = "yolov11";
+    break;
   case FASTER_RCNN:
     name = "faster-rcnn";
     break;
@@ -474,7 +477,7 @@ std::vector<cv::String> vpDetectorDNNOpenCV::getOutputsNames()
     names.resize(outLayers.size());
     for (size_t i = 0; i < outLayers.size(); ++i)
       names[i] = layersNames[outLayers[i] - 1];
-  }
+}
   return names;
 }
 #endif
@@ -499,7 +502,8 @@ void vpDetectorDNNOpenCV::postProcess(DetectionCandidates &proposals)
     postProcess_YoloV5_V7(proposals, m_dnnRes, m_netConfig);
     break;
   case YOLO_V8:
-    postProcess_YoloV8(proposals, m_dnnRes, m_netConfig);
+  case YOLO_V11:
+    postProcess_YoloV8_V11(proposals, m_dnnRes, m_netConfig);
     break;
   case FASTER_RCNN:
     postProcess_FasterRCNN(proposals, m_dnnRes, m_netConfig);
@@ -815,7 +819,7 @@ void vpDetectorDNNOpenCV::postProcess_YoloV5_V7(DetectionCandidates &proposals,
   \param dnnRes: raw results of the \b vpDetectorDNNOpenCV::detect step.
   \param netConfig: the configuration of the network, to know for instance the DNN input size.
 */
-void vpDetectorDNNOpenCV::postProcess_YoloV8(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes, const NetConfig &netConfig)
+void vpDetectorDNNOpenCV::postProcess_YoloV8_V11(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes, const NetConfig &netConfig)
 {
   // Code adapted from here: https://github.com/JustasBart/yolov8_CPP_Inference_OpenCV_ONNX/blob/minimalistic/inference.cpp
   // Compute the ratio between the original size of the image and the network size to translate network coordinates into
@@ -965,7 +969,7 @@ void vpDetectorDNNOpenCV::postProcess_SSD_MobileNet(DetectionCandidates &proposa
       proposals.m_confidences.push_back(maxScore);
       proposals.m_boxes.push_back(cv::Rect(left, top, width, height));
       proposals.m_classIds.push_back(classId);
-    }
+}
   }
 }
 #endif
@@ -1146,7 +1150,7 @@ void vpDetectorDNNOpenCV::setPreferableTarget(const int &targetId) { m_net.setPr
 void vpDetectorDNNOpenCV::setScaleFactor(const double &scaleFactor)
 {
   m_netConfig.m_scaleFactor = scaleFactor;
-  if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8) && m_netConfig.m_scaleFactor != 1 / 255.) {
+  if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8 || m_netConfig.m_parsingMethodType == YOLO_V11) && m_netConfig.m_scaleFactor != 1 / 255.) {
     std::cout << "[vpDetectorDNNOpenCV::setParsingMethod] WARNING: scale factor should be 1/255. to normalize pixels value." << std::endl;
   }
 }
@@ -1169,7 +1173,7 @@ void vpDetectorDNNOpenCV::setParsingMethod(const DNNResultsParsingType &typePars
 {
   m_netConfig.m_parsingMethodType = typeParsingMethod;
   m_parsingMethod = parsingMethod;
-  if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8) && m_netConfig.m_scaleFactor != 1 / 255.) {
+  if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8 || m_netConfig.m_parsingMethodType == YOLO_V11) && m_netConfig.m_scaleFactor != 1 / 255.) {
     m_netConfig.m_scaleFactor = 1 / 255.;
     std::cout << "[vpDetectorDNNOpenCV::setParsingMethod] NB: scale factor changed to 1/255. to normalize pixels value." << std::endl;
   }