diff --git a/YOLO.xcodeproj/project.pbxproj b/YOLO.xcodeproj/project.pbxproj index 68916e9..0d94a82 100644 --- a/YOLO.xcodeproj/project.pbxproj +++ b/YOLO.xcodeproj/project.pbxproj @@ -13,16 +13,24 @@ 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = 636EFCA221E62DD300DE43BC /* VideoCapture.swift */; }; 636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 636EFCA721E62DD300DE43BC /* AppDelegate.swift */; }; 636EFCB921E62E3900DE43BC /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 636EFCB821E62E3900DE43BC /* Assets.xcassets */; }; - 6381D2182B7817C200ABA4E8 /* yolov8l.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */; }; - 6381D2192B7817C200ABA4E8 /* yolov8x.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */; }; - 6381D21A2B7817C200ABA4E8 /* yolov8s.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */; }; - 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */; }; - 6381D21C2B7817C200ABA4E8 /* yolov8n.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */; }; 63CF371F2514455300E2DEA1 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44D22186177008AE681 /* LaunchScreen.storyboard */; }; 63CF37202514455300E2DEA1 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44F22186177008AE681 /* Main.storyboard */; }; 63CF37212514455300E2DEA1 /* ultralytics_yolo_logotype.png in Resources */ = {isa = PBXBuildFile; fileRef = 6323C45122186177008AE681 /* ultralytics_yolo_logotype.png */; }; + 7333105F2C69CE95001D647B /* Colors.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7333105E2C69CE95001D647B /* Colors.swift */; }; + 737FDB332C7A6D19009A6696 /* yolov8s.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB292C7A6D0A009A6696 /* yolov8s.mlpackage */; }; + 737FDB342C7A6D19009A6696 /* yolov8x-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB2A2C7A6D0D009A6696 /* yolov8x-pose.mlpackage */; }; + 737FDB352C7A6D19009A6696 /* yolov8l-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB2B2C7A6D0F009A6696 /* yolov8l-pose.mlpackage */; }; + 737FDB362C7A6D19009A6696 /* yolov8m-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB2C2C7A6D11009A6696 /* yolov8m-pose.mlpackage */; }; + 737FDB372C7A6D19009A6696 /* yolov8s-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB2D2C7A6D12009A6696 /* yolov8s-pose.mlpackage */; }; + 737FDB382C7A6D19009A6696 /* yolov8n-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB2E2C7A6D12009A6696 /* yolov8n-pose.mlpackage */; }; + 737FDB392C7A6D19009A6696 /* yolov8m.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB2F2C7A6D13009A6696 /* yolov8m.mlpackage */; }; + 737FDB3A2C7A6D19009A6696 /* yolov8x.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB302C7A6D17009A6696 /* yolov8x.mlpackage */; }; + 737FDB3B2C7A6D19009A6696 /* yolov8n.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB312C7A6D17009A6696 /* yolov8n.mlpackage */; }; + 737FDB3C2C7A6D19009A6696 /* yolov8l.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 737FDB322C7A6D19009A6696 /* yolov8l.mlpackage */; }; + 73B6CD452C5DA43E008A9CEC /* PostProcessPose.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73B6CD442C5DA43E008A9CEC /* PostProcessPose.swift */; }; 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */; }; /* End PBXBuildFile section */ + /* Begin PBXFileReference section */ 6323C44D22186177008AE681 /* LaunchScreen.storyboard */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; path = LaunchScreen.storyboard; sourceTree = ""; }; 6323C44F22186177008AE681 /* Main.storyboard */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; path = Main.storyboard; sourceTree = ""; }; @@ -34,12 +42,19 @@ 636EFCA221E62DD300DE43BC /* VideoCapture.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = ""; }; 636EFCA721E62DD300DE43BC /* AppDelegate.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; 636EFCB821E62E3900DE43BC /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; - 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8l.mlpackage; sourceTree = ""; }; - 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8x.mlpackage; sourceTree = ""; }; - 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8s.mlpackage; sourceTree = ""; }; - 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8m.mlpackage; sourceTree = ""; }; - 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8n.mlpackage; sourceTree = ""; }; 63B8B0A821E62A890026FBC3 /* .gitignore */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .gitignore; sourceTree = ""; }; + 7333105E2C69CE95001D647B /* Colors.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Colors.swift; sourceTree = ""; }; + 737FDB292C7A6D0A009A6696 /* yolov8s.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8s.mlpackage; sourceTree = ""; }; + 737FDB2A2C7A6D0D009A6696 /* yolov8x-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8x-pose.mlpackage"; sourceTree = ""; }; + 737FDB2B2C7A6D0F009A6696 /* yolov8l-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8l-pose.mlpackage"; sourceTree = ""; }; + 737FDB2C2C7A6D11009A6696 /* yolov8m-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8m-pose.mlpackage"; sourceTree = ""; }; + 737FDB2D2C7A6D12009A6696 /* yolov8s-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8s-pose.mlpackage"; sourceTree = ""; }; + 737FDB2E2C7A6D12009A6696 /* yolov8n-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8n-pose.mlpackage"; sourceTree = ""; }; + 737FDB2F2C7A6D13009A6696 /* yolov8m.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8m.mlpackage; sourceTree = ""; }; + 737FDB302C7A6D17009A6696 /* yolov8x.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8x.mlpackage; sourceTree = ""; }; + 737FDB312C7A6D17009A6696 /* yolov8n.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8n.mlpackage; sourceTree = ""; }; + 737FDB322C7A6D19009A6696 /* yolov8l.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8l.mlpackage; sourceTree = ""; }; + 73B6CD442C5DA43E008A9CEC /* PostProcessPose.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PostProcessPose.swift; sourceTree = ""; }; 7BCB411721C3096100BFC4D0 /* YOLO.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = YOLO.app; sourceTree = BUILT_PRODUCTS_DIR; }; 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BoundingBoxView.swift; sourceTree = ""; }; 8EDAAA4507D2D23D7FAB827F /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; @@ -61,6 +76,8 @@ children = ( 636166E9251443B20054FA7E /* ThresholdProvider.swift */, 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */, + 73B6CD442C5DA43E008A9CEC /* PostProcessPose.swift */, + 7333105E2C69CE95001D647B /* Colors.swift */, ); path = Utilities; sourceTree = ""; @@ -86,11 +103,16 @@ 63A946D8271800E20001C3ED /* Models */ = { isa = PBXGroup; children = ( - 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */, - 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */, - 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */, - 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */, - 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */, + 737FDB2B2C7A6D0F009A6696 /* yolov8l-pose.mlpackage */, + 737FDB322C7A6D19009A6696 /* yolov8l.mlpackage */, + 737FDB2C2C7A6D11009A6696 /* yolov8m-pose.mlpackage */, + 737FDB2F2C7A6D13009A6696 /* yolov8m.mlpackage */, + 737FDB2E2C7A6D12009A6696 /* yolov8n-pose.mlpackage */, + 737FDB312C7A6D17009A6696 /* yolov8n.mlpackage */, + 737FDB2D2C7A6D12009A6696 /* yolov8s-pose.mlpackage */, + 737FDB292C7A6D0A009A6696 /* yolov8s.mlpackage */, + 737FDB2A2C7A6D0D009A6696 /* yolov8x-pose.mlpackage */, + 737FDB302C7A6D17009A6696 /* yolov8x.mlpackage */, ); path = Models; sourceTree = ""; @@ -209,16 +231,23 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( - 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */, - 6381D21C2B7817C200ABA4E8 /* yolov8n.mlpackage in Sources */, + 737FDB352C7A6D19009A6696 /* yolov8l-pose.mlpackage in Sources */, + 737FDB332C7A6D19009A6696 /* yolov8s.mlpackage in Sources */, + 73B6CD452C5DA43E008A9CEC /* PostProcessPose.swift in Sources */, + 737FDB3C2C7A6D19009A6696 /* yolov8l.mlpackage in Sources */, + 737FDB3B2C7A6D19009A6696 /* yolov8n.mlpackage in Sources */, + 737FDB342C7A6D19009A6696 /* yolov8x-pose.mlpackage in Sources */, + 7333105F2C69CE95001D647B /* Colors.swift in Sources */, + 737FDB372C7A6D19009A6696 /* yolov8s-pose.mlpackage in Sources */, 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */, + 737FDB382C7A6D19009A6696 /* yolov8n-pose.mlpackage in Sources */, 636166EA251443B20054FA7E /* ThresholdProvider.swift in Sources */, - 6381D2182B7817C200ABA4E8 /* yolov8l.mlpackage in Sources */, - 6381D21A2B7817C200ABA4E8 /* yolov8s.mlpackage in Sources */, - 6381D2192B7817C200ABA4E8 /* yolov8x.mlpackage in Sources */, 636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */, + 737FDB392C7A6D19009A6696 /* yolov8m.mlpackage in Sources */, 636EFCAA21E62DD300DE43BC /* ViewController.swift in Sources */, + 737FDB362C7A6D19009A6696 /* yolov8m-pose.mlpackage in Sources */, 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */, + 737FDB3A2C7A6D19009A6696 /* yolov8x.mlpackage in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -349,7 +378,7 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; - CURRENT_PROJECT_VERSION = 0; + CURRENT_PROJECT_VERSION = 2; DEVELOPMENT_TEAM = 3MR4P6CL3X; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; @@ -359,7 +388,7 @@ "$(inherited)", "@executable_path/Frameworks", ); - MARKETING_VERSION = 8.2.0; + MARKETING_VERSION = 8.3.0; PRODUCT_BUNDLE_IDENTIFIER = com.ultralytics.iDetection; PRODUCT_NAME = "$(TARGET_NAME)"; SUPPORTED_PLATFORMS = "iphoneos iphonesimulator"; @@ -377,7 +406,7 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; - CURRENT_PROJECT_VERSION = 0; + CURRENT_PROJECT_VERSION = 2; DEVELOPMENT_TEAM = 3MR4P6CL3X; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; @@ -387,7 +416,7 @@ "$(inherited)", "@executable_path/Frameworks", ); - MARKETING_VERSION = 8.2.0; + MARKETING_VERSION = 8.3.0; PRODUCT_BUNDLE_IDENTIFIER = com.ultralytics.iDetection; PRODUCT_NAME = "$(TARGET_NAME)"; SUPPORTED_PLATFORMS = "iphoneos iphonesimulator"; diff --git a/YOLO/Assets.xcassets/Focus.imageset/Contents.json b/YOLO/Assets.xcassets/Focus.imageset/Contents.json index 0f430b5..5db4a9b 100644 --- a/YOLO/Assets.xcassets/Focus.imageset/Contents.json +++ b/YOLO/Assets.xcassets/Focus.imageset/Contents.json @@ -1,16 +1,18 @@ { "images": [ { + "filename": "ultralytics_square_focus_image.png", "idiom": "universal", "scale": "1x" }, { + "filename": "ultralytics_square_focus_image 1.png", "idiom": "universal", "scale": "2x" }, { + "filename": "ultralytics_square_focus_image 2.png", "idiom": "universal", - "filename": "ultralytics_square_focus_image.png", "scale": "3x" } ], diff --git a/YOLO/Assets.xcassets/Focus.imageset/ultralytics_square_focus_image 1.png b/YOLO/Assets.xcassets/Focus.imageset/ultralytics_square_focus_image 1.png new file mode 100644 index 0000000..d250520 Binary files /dev/null and b/YOLO/Assets.xcassets/Focus.imageset/ultralytics_square_focus_image 1.png differ diff --git a/YOLO/Assets.xcassets/Focus.imageset/ultralytics_square_focus_image 2.png b/YOLO/Assets.xcassets/Focus.imageset/ultralytics_square_focus_image 2.png new file mode 100644 index 0000000..d250520 Binary files /dev/null and b/YOLO/Assets.xcassets/Focus.imageset/ultralytics_square_focus_image 2.png differ diff --git a/YOLO/Main.storyboard b/YOLO/Main.storyboard index 549bc72..0fb4888 100644 --- a/YOLO/Main.storyboard +++ b/YOLO/Main.storyboard @@ -1,5 +1,5 @@ - + @@ -10,20 +10,20 @@ - + - - + + - + - - + - - - - - - - - - - - - - - + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + - - + + - + - + diff --git a/YOLO/Models/README.md b/YOLO/Models/README.md index c43cffe..d2e4adc 100644 --- a/YOLO/Models/README.md +++ b/YOLO/Models/README.md @@ -35,6 +35,7 @@ If you prefer to use specific model versions or need to customize the models, yo # Export all YOLOv8 models to CoreML INT8 for size in ("n", "s", "m", "l", "x"): # all YOLOv8 model sizes YOLO(f"yolov8{size}.pt").export(format="coreml", int8=True, nms=True, imgsz=[640, 384]) + YOLO(f"yolov8{size}-pose.pt").export(format="coreml", int8=True, imgsz=[640, 384]) ``` 3. **Place Models in Project:** After exporting, locate the CoreML model files and place them in the `YOLO/Models` directory of your project. diff --git a/YOLO/Utilities/Colors.swift b/YOLO/Utilities/Colors.swift new file mode 100644 index 0000000..77d3b5a --- /dev/null +++ b/YOLO/Utilities/Colors.swift @@ -0,0 +1,79 @@ +// Ultralytics YOLO 🚀 - AGPL-3.0 License +// +// Colors for Ultralytics YOLO App + +// These colors are set to use the same Ultralytics color palette as the Python version. + +import Foundation +import UIKit + +let ultralyticsColorsolors: [UIColor] = [ + UIColor(red: 4 / 255, green: 42 / 255, blue: 255 / 255, alpha: 0.6), // #042AFF + UIColor(red: 11 / 255, green: 219 / 255, blue: 235 / 255, alpha: 0.6), // #0BDBEB + UIColor(red: 243 / 255, green: 243 / 255, blue: 243 / 255, alpha: 0.6), // #F3F3F3 + UIColor(red: 0 / 255, green: 223 / 255, blue: 183 / 255, alpha: 0.6), // #00DFB7 + UIColor(red: 17 / 255, green: 31 / 255, blue: 104 / 255, alpha: 0.6), // #111F68 + UIColor(red: 255 / 255, green: 111 / 255, blue: 221 / 255, alpha: 0.6), // #FF6FDD + UIColor(red: 255 / 255, green: 68 / 255, blue: 79 / 255, alpha: 0.6), // #FF444F + UIColor(red: 204 / 255, green: 237 / 255, blue: 0 / 255, alpha: 0.6), // #CCED00 + UIColor(red: 0 / 255, green: 243 / 255, blue: 68 / 255, alpha: 0.6), // #00F344 + UIColor(red: 189 / 255, green: 0 / 255, blue: 255 / 255, alpha: 0.6), // #BD00FF + UIColor(red: 0 / 255, green: 180 / 255, blue: 255 / 255, alpha: 0.6), // #00B4FF + UIColor(red: 221 / 255, green: 0 / 255, blue: 186 / 255, alpha: 0.6), // #DD00BA + UIColor(red: 0 / 255, green: 255 / 255, blue: 255 / 255, alpha: 0.6), // #00FFFF + UIColor(red: 38 / 255, green: 192 / 255, blue: 0 / 255, alpha: 0.6), // #26C000 + UIColor(red: 1 / 255, green: 255 / 255, blue: 179 / 255, alpha: 0.6), // #01FFB3 + UIColor(red: 125 / 255, green: 36 / 255, blue: 255 / 255, alpha: 0.6), // #7D24FF + UIColor(red: 123 / 255, green: 0 / 255, blue: 104 / 255, alpha: 0.6), // #7B0068 + UIColor(red: 255 / 255, green: 27 / 255, blue: 108 / 255, alpha: 0.6), // #FF1B6C + UIColor(red: 252 / 255, green: 109 / 255, blue: 47 / 255, alpha: 0.6), // #FC6D2F + UIColor(red: 162 / 255, green: 255 / 255, blue: 11 / 255, alpha: 0.6), // #A2FF0B +] + +let posePalette: [[CGFloat]] = [ + [255, 128, 0], + [255, 153, 51], + [255, 178, 102], + [230, 230, 0], + [255, 153, 255], + [153, 204, 255], + [255, 102, 255], + [255, 51, 255], + [102, 178, 255], + [51, 153, 255], + [255, 153, 153], + [255, 102, 102], + [255, 51, 51], + [153, 255, 153], + [102, 255, 102], + [51, 255, 51], + [0, 255, 0], + [0, 0, 255], + [255, 0, 0], + [255, 255, 255], +] + +let limbColorIndices = [0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16] +let kptColorIndices = [16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0] + +let skeleton = [ + [16, 14], + [14, 12], + [17, 15], + [15, 13], + [12, 13], + [6, 12], + [7, 13], + [6, 7], + [6, 8], + [7, 9], + [8, 10], + [9, 11], + [2, 3], + [1, 2], + [1, 3], + [2, 4], + [3, 5], + [4, 6], + [5, 7], +] diff --git a/YOLO/Utilities/PostProcessPose.swift b/YOLO/Utilities/PostProcessPose.swift new file mode 100644 index 0000000..d75ce27 --- /dev/null +++ b/YOLO/Utilities/PostProcessPose.swift @@ -0,0 +1,247 @@ +// Ultralytics YOLO 🚀 - AGPL-3.0 License +// +// PostProcessPose for Ultralytics YOLO App +// These functions are designed to post-process inference results from the YOLOv8-Pose model in the Ultralytics YOLO app to display a Pose skeleton. + +import CoreML +import Foundation +import UIKit + +@available(iOS 15.0, *) + +extension ViewController { + + func setupOverlayLayer() { + let width = videoPreview.bounds.width + let height = videoPreview.bounds.height + + var ratio: CGFloat = 1.0 + if videoCapture.captureSession.sessionPreset == .photo { + ratio = (4.0 / 3.0) + } else { + ratio = (16.0 / 9.0) + } + + var offSet = CGFloat.zero + var margin = CGFloat.zero + if view.bounds.width < view.bounds.height { + offSet = height / ratio + margin = (offSet - self.videoPreview.bounds.width) / 2 + self.overlayLayer.frame = CGRect( + x: -margin, y: 0, width: offSet, height: self.videoPreview.bounds.height) + } else { + offSet = width / ratio + margin = (offSet - self.videoPreview.bounds.height) / 2 + self.overlayLayer.frame = CGRect( + x: 0, y: -margin, width: self.videoPreview.bounds.width, height: offSet) + } + + } + + func removeAllMaskSubLayers() { + self.overlayLayer.sublayers?.forEach { layer in + layer.removeFromSuperlayer() + } + self.overlayLayer.sublayers = nil + } + + func PostProcessPose(prediction: MLMultiArray, confidenceThreshold: Float, iouThreshold: Float) + -> [(CGRect, Float, [Float])] + { + let numAnchors = prediction.shape[2].intValue + let featureCount = prediction.shape[1].intValue - 5 + var boxes = [CGRect]() + var scores = [Float]() + var features = [[Float]]() + let featurePointer = UnsafeMutablePointer(OpaquePointer(prediction.dataPointer)) + let lock = DispatchQueue(label: "com.example.lock") + + DispatchQueue.concurrentPerform(iterations: numAnchors) { j in + let confIndex = 4 * numAnchors + j + let confidence = featurePointer[confIndex] + + if confidence > confidenceThreshold { + let x = featurePointer[j] + let y = featurePointer[numAnchors + j] + let width = featurePointer[2 * numAnchors + j] + let height = featurePointer[3 * numAnchors + j] + + let boxWidth = CGFloat(width) + let boxHeight = CGFloat(height) + let boxX = CGFloat(x - width / 2) + let boxY = CGFloat(y - height / 2) + + let boundingBox = CGRect(x: boxX, y: boxY, width: boxWidth, height: boxHeight) + + var boxFeatures = [Float](repeating: 0, count: featureCount) + for k in 0..= confThreshold + && box.contains(CGPoint(x: CGFloat(keypoints[i * 3]), y: CGFloat(keypoints[i * 3 + 1]))) + { + points[i] = (point, conf) + + drawCircle(on: layer, at: point, radius: radius, color: kptColorIndices[i]) + } + } + + if drawSkeleton { + for (index, bone) in skeleton.enumerated() { + let (startIdx, endIdx) = (bone[0] - 1, bone[1] - 1) + + guard startIdx < points.count, endIdx < points.count else { + print("Invalid skeleton indices: \(startIdx), \(endIdx)") + continue + } + + let startPoint = points[startIdx].0 + let endPoint = points[endIdx].0 + let startConf = points[startIdx].1 + let endConf = points[endIdx].1 + + if startConf >= confThreshold && endConf >= confThreshold { + drawLine(on: layer, from: startPoint, to: endPoint, color: limbColorIndices[index]) + } + } + } + } + + func drawCircle(on layer: CALayer, at point: CGPoint, radius: CGFloat, color index: Int) { + let circleLayer = CAShapeLayer() + circleLayer.path = + UIBezierPath( + arcCenter: point, + radius: radius, + startAngle: 0, + endAngle: .pi * 2, + clockwise: true + ).cgPath + + let color = posePalette[index].map { $0 / 255.0 } + circleLayer.fillColor = + UIColor(red: color[0], green: color[1], blue: color[2], alpha: 1.0).cgColor + + layer.addSublayer(circleLayer) + } + + func drawLine(on layer: CALayer, from start: CGPoint, to end: CGPoint, color index: Int) { + let lineLayer = CAShapeLayer() + let path = UIBezierPath() + path.move(to: start) + path.addLine(to: end) + + lineLayer.path = path.cgPath + lineLayer.lineWidth = 2 + + let color = posePalette[index].map { $0 / 255.0 } + lineLayer.strokeColor = + UIColor(red: color[0], green: color[1], blue: color[2], alpha: 1.0).cgColor + + layer.addSublayer(lineLayer) + } + +} + +func nonMaxSuppression(boxes: [CGRect], scores: [Float], threshold: Float) -> [Int] { + let sortedIndices = scores.enumerated().sorted { $0.element > $1.element }.map { $0.offset } + var selectedIndices = [Int]() + var activeIndices = [Bool](repeating: true, count: boxes.count) + + for i in 0.. CGFloat(threshold) * min(boxes[idx].area, boxes[otherIdx].area) { + activeIndices[otherIdx] = false + } + } + } + } + } + return selectedIndices +} + +extension CGRect { + var area: CGFloat { + return width * height + } +} diff --git a/YOLO/VideoCapture.swift b/YOLO/VideoCapture.swift index 0f0512c..426ee05 100644 --- a/YOLO/VideoCapture.swift +++ b/YOLO/VideoCapture.swift @@ -137,6 +137,7 @@ public class VideoCapture: NSObject { captureSession.stopRunning() } } + func updateVideoOrientation() { guard let connection = videoOutput.connection(with: .video) else { return } switch UIDevice.current.orientation { diff --git a/YOLO/ViewController.swift b/YOLO/ViewController.swift index 840d91d..f9d1148 100644 --- a/YOLO/ViewController.swift +++ b/YOLO/ViewController.swift @@ -19,6 +19,7 @@ import Vision var mlModel = try! yolov8m(configuration: .init()).model +@available(iOS 15.0, *) class ViewController: UIViewController { @IBOutlet var videoPreview: UIView! @IBOutlet var View0: UIView! @@ -42,6 +43,7 @@ class ViewController: UIViewController { @IBOutlet weak var activityIndicator: UIActivityIndicatorView! @IBOutlet weak var forcus: UIImageView! @IBOutlet weak var toolBar: UIToolbar! + var overlayLayer: CALayer = CALayer() let selection = UISelectionFeedbackGenerator() var detector = try! VNCoreMLModel(for: mlModel) @@ -73,6 +75,15 @@ class ViewController: UIViewController { return request }() + enum Task { + case detect + case pose + } + + var task: Task = .detect + var confidenceThreshold: Float = 0.25 + var iouThreshold: Float = 0.4 + override func viewDidLoad() { super.viewDidLoad() slider.value = 30 @@ -116,7 +127,12 @@ class ViewController: UIViewController { } self.videoCapture.previewLayer?.frame = CGRect( x: 0, y: 0, width: size.width, height: size.height) - + coordinator.animate( + alongsideTransition: { context in + }, + completion: { context in + self.setupOverlayLayer() + }) } private func setUpOrientationChangeNotification() { @@ -136,49 +152,76 @@ class ViewController: UIViewController { @IBAction func indexChanged(_ sender: Any) { selection.selectionChanged() activityIndicator.startAnimating() - - /// Switch model - switch segmentedControl.selectedSegmentIndex { - case 0: - self.labelName.text = "YOLOv8n" - mlModel = try! yolov8n(configuration: .init()).model - case 1: - self.labelName.text = "YOLOv8s" - mlModel = try! yolov8s(configuration: .init()).model - case 2: - self.labelName.text = "YOLOv8m" - mlModel = try! yolov8m(configuration: .init()).model - case 3: - self.labelName.text = "YOLOv8l" - mlModel = try! yolov8l(configuration: .init()).model - case 4: - self.labelName.text = "YOLOv8x" - mlModel = try! yolov8x(configuration: .init()).model - default: - break - } setModel() setUpBoundingBoxViews() activityIndicator.stopAnimating() } func setModel() { + /// Switch model + switch task { + case .detect: + switch segmentedControl.selectedSegmentIndex { + case 0: + self.labelName.text = "YOLOv8n" + mlModel = try! yolov8n(configuration: .init()).model + case 1: + self.labelName.text = "YOLOv8s" + mlModel = try! yolov8s(configuration: .init()).model + case 2: + self.labelName.text = "YOLOv8m" + mlModel = try! yolov8m(configuration: .init()).model + case 3: + self.labelName.text = "YOLOv8l" + mlModel = try! yolov8l(configuration: .init()).model + case 4: + self.labelName.text = "YOLOv8x" + mlModel = try! yolov8x(configuration: .init()).model + default: + break + } - /// VNCoreMLModel - detector = try! VNCoreMLModel(for: mlModel) - detector.featureProvider = ThresholdProvider() + case .pose: + switch segmentedControl.selectedSegmentIndex { + case 0: + self.labelName.text = "YOLOv8n" + mlModel = try! yolov8n_pose(configuration: .init()).model + case 1: + self.labelName.text = "YOLOv8s" + mlModel = try! yolov8s_pose(configuration: .init()).model + + case 2: + self.labelName.text = "YOLOv8m" + mlModel = try! yolov8m_pose(configuration: .init()).model + case 3: + self.labelName.text = "YOLOv8l" + mlModel = try! yolov8l_pose(configuration: .init()).model + case 4: + self.labelName.text = "YOLOv8x" + mlModel = try! yolov8x_pose(configuration: .init()).model + default: break + } - /// VNCoreMLRequest - let request = VNCoreMLRequest( - model: detector, - completionHandler: { [weak self] request, error in - self?.processObservations(for: request, error: error) - }) - request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop - visionRequest = request - t2 = 0.0 // inference dt smoothed - t3 = CACurrentMediaTime() // FPS start - t4 = 0.0 // FPS dt smoothed + } + + DispatchQueue.global(qos: .userInitiated).async { [self] in + + /// VNCoreMLModel + detector = try! VNCoreMLModel(for: mlModel) + detector.featureProvider = ThresholdProvider() + + /// VNCoreMLRequest + let request = VNCoreMLRequest( + model: detector, + completionHandler: { [weak self] request, error in + self?.processObservations(for: request, error: error) + }) + request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop + visionRequest = request + t2 = 0.0 // inference dt smoothed + t3 = CACurrentMediaTime() // FPS start + t4 = 0.0 // FPS dt smoothed + } } /// Update thresholds from slider values @@ -190,6 +233,29 @@ class ViewController: UIViewController { detector.featureProvider = ThresholdProvider(iouThreshold: iou, confidenceThreshold: conf) } + @IBAction func taskSegmentControlChanged(_ sender: UISegmentedControl) { + self.removeAllMaskSubLayers() + + switch sender.selectedSegmentIndex { + case 0: + if self.task != .detect { + self.task = .detect + self.setModel() + } + case 1: + if self.task != .pose { + self.task = .pose + for i in 0.. 19 { + count = 0 + } + if colors[label] == nil { // if key not in dict + colors[label] = color + } + } + } } @@ -316,6 +388,9 @@ class ViewController: UIViewController { self.videoCapture.previewLayer?.frame = self.videoPreview.bounds // resize preview layer } + self.setupOverlayLayer() + self.videoPreview.layer.addSublayer(self.overlayLayer) + // Add the bounding box layers to the UI, on top of the video preview. for box in self.boundingBoxViews { box.addToLayer(self.videoPreview.layer) @@ -368,21 +443,61 @@ class ViewController: UIViewController { } func processObservations(for request: VNRequest, error: Error?) { - DispatchQueue.main.async { - if let results = request.results as? [VNRecognizedObjectObservation] { - self.show(predictions: results) - } else { - self.show(predictions: []) + switch task { + case .detect: + + DispatchQueue.main.async { + if let results = request.results as? [VNRecognizedObjectObservation] { + self.show(predictions: results, predsPose: []) + } else { + self.show(predictions: [], predsPose: []) + } + + // Measure FPS + if self.t1 < 10.0 { // valid dt + self.t2 = self.t1 * 0.05 + self.t2 * 0.95 // smoothed inference time + } + self.t4 = (CACurrentMediaTime() - self.t3) * 0.05 + self.t4 * 0.95 // smoothed delivered FPS + self.labelFPS.text = String(format: "%.1f FPS - %.1f ms", 1 / self.t4, self.t2 * 1000) // t2 seconds to ms + self.t3 = CACurrentMediaTime() } - // Measure FPS - if self.t1 < 10.0 { // valid dt - self.t2 = self.t1 * 0.05 + self.t2 * 0.95 // smoothed inference time + case .pose: + if let results = request.results as? [VNCoreMLFeatureValueObservation] { + DispatchQueue.main.async { [self] in + + if let prediction = results.first?.featureValue.multiArrayValue { + + let preds = PostProcessPose( + prediction: prediction, confidenceThreshold: self.confidenceThreshold, + iouThreshold: self.iouThreshold) + var boxes = [(CGRect, Float)]() + var kpts = [[Float]]() + + for pred in preds { + boxes.append((pred.0, pred.1)) + kpts.append(pred.2) + } + self.show(predictions: [], predsPose: preds) + self.overlayLayer.sublayers?.forEach { $0.removeFromSuperlayer() } + + self.drawKeypoints( + keypointsList: kpts, boundingBoxes: boxes, on: overlayLayer, + imageViewSize: overlayLayer.bounds.size, originalImageSize: overlayLayer.bounds.size) + + } else { + self.show(predictions: [], predsPose: []) + } + if self.t1 < 10.0 { // valid dt + self.t2 = self.t1 * 0.05 + self.t2 * 0.95 // smoothed inference time + } + self.t4 = (CACurrentMediaTime() - self.t3) * 0.05 + self.t4 * 0.95 // smoothed delivered FPS + self.labelFPS.text = String(format: "%.1f FPS - %.1f ms", 1 / self.t4, self.t2 * 1000) // t2 seconds to ms + self.t3 = CACurrentMediaTime() + } } - self.t4 = (CACurrentMediaTime() - self.t3) * 0.05 + self.t4 * 0.95 // smoothed delivered FPS - self.labelFPS.text = String(format: "%.1f FPS - %.1f ms", 1 / self.t4, self.t2 * 1000) // t2 seconds to ms - self.t3 = CACurrentMediaTime() } + } // Save text file @@ -448,7 +563,8 @@ class ViewController: UIViewController { } } - func show(predictions: [VNRecognizedObjectObservation]) { + func show(predictions: [VNRecognizedObjectObservation], predsPose: [(CGRect, Float, [Float])]) { + let width = videoPreview.bounds.width // 375 pix let height = videoPreview.bounds.height // 812 pix var str = "" @@ -471,28 +587,64 @@ class ViewController: UIViewController { let sec_day = Double(hour) * 3600.0 + Double(minutes) * 60.0 + Double(seconds) + Double(nanoseconds) / 1E9 // seconds in the day + var resultCount = 0 + + switch task { + case .detect: + resultCount = predictions.count + case .pose: + resultCount = predsPose.count + } + self.labelSlider.text = String(predictions.count) + " items (max " + String(Int(slider.value)) + ")" + for i in 0..= 1 { // iPhone ratio = 1.218 - let offset = (1 - ratio) * (0.5 - rect.minX) - let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: offset, y: -1) - rect = rect.applying(transform) - rect.size.width *= ratio - } else { // iPad ratio = 0.75 - let offset = (ratio - 1) * (0.5 - rect.maxY) - let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: offset - 1) - rect = rect.applying(transform) + if ratio >= 1 { + let offset = (1 - ratio) * (0.5 - displayRect.minX) + if task == .detect { + let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: offset, y: -1) + displayRect = displayRect.applying(transform) + } else { + let transform = CGAffineTransform(translationX: offset, y: 0) + displayRect = displayRect.applying(transform) + } + + displayRect.size.width *= ratio + } else { + if task == .detect { + let offset = (ratio - 1) * (0.5 - displayRect.maxY) + + let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: offset - 1) + displayRect = displayRect.applying(transform) + } else { + let offset = (ratio - 1) * (0.5 - displayRect.minY) + let transform = CGAffineTransform(translationX: 0, y: offset) + displayRect = displayRect.applying(transform) + } ratio = (height / width) / (3.0 / 4.0) - rect.size.height /= ratio + displayRect.size.height /= ratio } - // Scale normalized to pixels [375, 812] [width, height] - rect = VNImageRectForNormalizedRect(rect, Int(width), Int(height)) - - // The labels array is a list of VNClassificationObservation objects, - // with the highest scoring class first in the list. - let bestClass = prediction.labels[0].identifier - let confidence = prediction.labels[0].confidence - // print(confidence, rect) // debug (confidence, xywh) with xywh origin top left (pixels) - let label = String(format: "%@ %.1f", bestClass, confidence * 100) - let alpha = CGFloat((confidence - 0.2) / (1.0 - 0.2) * 0.9) - // Show the bounding box. + displayRect = VNImageRectForNormalizedRect(displayRect, Int(width), Int(height)) + boundingBoxViews[i].show( - frame: rect, - label: label, - color: colors[bestClass] ?? UIColor.white, - alpha: alpha) // alpha 0 (transparent) to 1 (opaque) for conf threshold 0.2 to 1.0) + frame: displayRect, label: label, color: boxColor, alpha: alpha) if developerMode { // Write @@ -616,12 +769,14 @@ class ViewController: UIViewController { } // Pinch to Zoom End -------------------------------------------------------------------------------------------- } // ViewController class End +@available(iOS 15.0, *) extension ViewController: VideoCaptureDelegate { func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame sampleBuffer: CMSampleBuffer) { predict(sampleBuffer: sampleBuffer) } } +@available(iOS 15.0, *) // Programmatically save image extension ViewController: AVCapturePhotoCaptureDelegate { func photoOutput( diff --git "a/YOLO/\343\202\271\343\202\257\343\203\252\343\203\274\343\203\263\343\202\267\343\203\247\343\203\203\343\203\210 2024-08-09 9.53.27.png" "b/YOLO/\343\202\271\343\202\257\343\203\252\343\203\274\343\203\263\343\202\267\343\203\247\343\203\203\343\203\210 2024-08-09 9.53.27.png" new file mode 100644 index 0000000..13641f4 Binary files /dev/null and "b/YOLO/\343\202\271\343\202\257\343\203\252\343\203\274\343\203\263\343\202\267\343\203\247\343\203\203\343\203\210 2024-08-09 9.53.27.png" differ