Docs

jamjamjon · Jul 31, 2024 · 28e3af7 · 28e3af7
1 parent 6f35769
commit 28e3af7
Show file tree

Hide file tree

Showing 5 changed files with 30 additions and 39 deletions.
diff --git a/README.md b/README.md
@@ -5,7 +5,9 @@
 
 
 
-A Rust library integrated with **ONNXRuntime**, providing a collection of **Computer Vison** and **Vision-Language** models including [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv6](https://github.com/meituan/YOLOv6), [YOLOv7](https://github.com/WongKinYiu/yolov7), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [YOLOv10](https://github.com/THU-MIG/yolov10), [RTDETR](https://arxiv.org/abs/2304.08069), [SAM](https://github.com/facebookresearch/segment-anything), [CLIP](https://github.com/openai/CLIP), [DINOv2](https://github.com/facebookresearch/dinov2), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [BLIP](https://arxiv.org/abs/2201.12086), [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR), [Depth-Anything](https://github.com/LiheYoung/Depth-Anything), [MODNet](https://github.com/ZHKKKe/MODNet) and others.
+A Rust library integrated with **ONNXRuntime**, providing a collection of **Computer Vison** and **Vision-Language** models including [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv6](https://github.com/meituan/YOLOv6), [YOLOv7](https://github.com/WongKinYiu/yolov7), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [YOLOv10](https://github.com/THU-MIG/yolov10), [RTDETR](https://arxiv.org/abs/2304.08069), [SAM](https://github.com/facebookresearch/segment-anything), [MobileSAM](https://github.com/ChaoningZhang/MobileSAM), [EdgeSAM](https://github.com/chongzhou96/EdgeSAM), [SAM-HQ](https://github.com/SysCV/sam-hq), [CLIP](https://github.com/openai/CLIP), [DINOv2](https://github.com/facebookresearch/dinov2), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [BLIP](https://arxiv.org/abs/2201.12086), [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR), [Depth-Anything](https://github.com/LiheYoung/Depth-Anything), [MODNet](https://github.com/ZHKKKe/MODNet) and others.
+
+
 
 |                          Monocular Depth Estimation              |
 | :--------------------------------------------------------------: |

diff --git a/examples/sam/main.rs b/examples/sam/main.rs
@@ -21,6 +21,7 @@ pub struct Args {
 fn main() -> Result<(), Box<dyn std::error::Error>> {
     let args = Args::parse();
 
+    // Options
     let (options_encoder, options_decoder, saveout) = match args.kind {
         SamKind::Sam => {
             let options_encoder = Options::default()
@@ -80,45 +81,26 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         .use_low_res_mask(args.use_low_res_mask)
         .with_find_contours(true);
 
-    // build model
+    // Build model
     let mut model = SAM::new(options_encoder, options_decoder)?;
 
-    // build dataloader
-    let dl = DataLoader::default()
-        .with_batch(1)
-        .load("./assets/truck.jpg")?;
+    // Load image
+    let xs = vec![DataLoader::try_read("./assets/truck.jpg")?];
 
-    // build annotator
-    let annotator = Annotator::default()
-        .with_bboxes_thickness(7)
-        .without_bboxes_name(true)
-        .without_bboxes_conf(true)
-        .without_mbrs(true)
-        .with_saveout(saveout);
+    // Build annotator
+    let annotator = Annotator::default().with_saveout(saveout);
 
-    // run & annotate
-    for (xs, _paths) in dl {
-        // prompt
-        let prompts = vec![
-            SamPrompt::default().with_postive_point(500., 375.), // postive point
-                                                                 // .with_postive_point(1125., 625.), // postive point
-                                                                 // .with_postive_point(774., 366.), // postive point
-                                                                 // .with_negative_point(774., 366.),   // negative point
-                                                                 // .with_bbox(300., 175., 525., 500.), // bbox
-                                                                 // .with_bbox(215., 297., 643., 459.), // bbox
+    // Prompt
+    let prompts = vec![
+        SamPrompt::default()
+            // .with_postive_point(500., 375.), // postive point
+            // .with_negative_point(774., 366.),   // negative point
+            .with_bbox(215., 297., 643., 459.), // bbox
+    ];
 
-                                                                 // .with_bbox(26., 20., 873., 990.), // bbox
-                                                                 // .with_postive_point(223., 140.) // example 2
-                                                                 // .with_postive_point(488., 523.), // example 2
-                                                                 // .with_postive_point(221., 482.) // example 3
-                                                                 // .with_postive_point(498., 633.) // example 3
-                                                                 // .with_postive_point(750., 379.), // example 3
-                                                                 // .with_bbox(310., 228., 424., 296.) // bbox example 7
-                                                                 // .with_bbox(45., 260., 515., 470.), // bbox example 7
-        ];
-        let ys = model.run(&xs, &prompts)?;
-        annotator.annotate(&xs, &ys);
-    }
+    // Run & Annotate
+    let ys = model.run(&xs, &prompts)?;
+    annotator.annotate(&xs, &ys);
 
     Ok(())
 }
diff --git a/examples/yolo/main.rs b/examples/yolo/main.rs
@@ -59,6 +59,9 @@ pub struct Args {
 
     #[arg(long)]
     pub no_plot: bool,
+
+    #[arg(long)]
+    pub no_contours: bool,
 }
 
 fn main() -> Result<()> {
@@ -113,7 +116,7 @@ fn main() -> Result<()> {
             ),
             YOLOTask::Segment => (
                 options.with_model(&args.model.unwrap_or("yolov8m-seg-dyn.onnx".to_string()))?,
-                "YOLOv8-Detect",
+                "YOLOv8-Segment",
             ),
             YOLOTask::Pose => (
                 options.with_model(&args.model.unwrap_or("yolov8m-pose-dyn.onnx".to_string()))?,
@@ -173,7 +176,7 @@ fn main() -> Result<()> {
         .with_confs(&[0.2, 0.15]) // class_0: 0.4, others: 0.15
         // .with_names(&coco::NAMES_80)
         .with_names2(&coco::KEYPOINTS_NAMES_17)
-        .with_find_contours(false) // find contours or not
+        .with_find_contours(!args.no_contours) // find contours or not
         .with_profile(args.profile);
     let mut model = YOLO::new(options)?;
 
@@ -186,7 +189,7 @@ fn main() -> Result<()> {
     let annotator = Annotator::default()
         .with_skeletons(&coco::SKELETONS_16)
         .with_bboxes_thickness(4)
-        .without_masks(false) // No masks plotting when doing segment task.
+        .without_masks(true) // No masks plotting when doing segment task.
         .with_saveout(saveout);
 
     // run & annotate

diff --git a/src/lib.rs b/src/lib.rs
@@ -10,6 +10,10 @@
 //! - [YOLOv10](https://github.com/THU-MIG/yolov10): Object Detection
 //! - [RT-DETR](https://arxiv.org/abs/2304.08069): Object Detection
 //! - [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM): Instance Segmentation
+//! - [SAM](https://github.com/facebookresearch/segment-anything): Segmentation Anything
+//! - [MobileSAM](https://github.com/ChaoningZhang/MobileSAM): Segmentation Anything
+//! - [EdgeSAM](https://github.com/chongzhou96/EdgeSAM): Segmentation Anything
+//! - [SAM-HQ](https://github.com/SysCV/sam-hq): Segmentation Anything
 //! - [YOLO-World](https://github.com/AILab-CVC/YOLO-World): Object Detection
 //! - [DINOv2](https://github.com/facebookresearch/dinov2): Vision-Self-Supervised
 //! - [CLIP](https://github.com/openai/CLIP): Vision-Language

diff --git a/src/models/sam.rs b/src/models/sam.rs
@@ -222,7 +222,7 @@ impl SAM {
                 let (h, w) = mask.dim();
                 let luma = if self.use_low_res_mask {
                     Ops::resize_lumaf32_vec(
-                        &mask.mapv(|x| if x < 0. { 0. } else { x }).into_raw_vec(),
+                        &mask.to_owned().into_raw_vec(),
                         w as _,
                         h as _,
                         image_width as _,