diff --git a/YOLO.xcodeproj/project.pbxproj b/YOLO.xcodeproj/project.pbxproj index ddcf965..7442209 100644 --- a/YOLO.xcodeproj/project.pbxproj +++ b/YOLO.xcodeproj/project.pbxproj @@ -13,14 +13,13 @@ 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = 636EFCA221E62DD300DE43BC /* VideoCapture.swift */; }; 636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 636EFCA721E62DD300DE43BC /* AppDelegate.swift */; }; 636EFCB921E62E3900DE43BC /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 636EFCB821E62E3900DE43BC /* Assets.xcassets */; }; - 6381D2182B7817C200ABA4E8 /* yolov8l.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */; }; - 6381D2192B7817C200ABA4E8 /* yolov8x.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */; }; - 6381D21A2B7817C200ABA4E8 /* yolov8s.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */; }; - 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */; }; - 6381D21C2B7817C200ABA4E8 /* yolov8n.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */; }; 63CF371F2514455300E2DEA1 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44D22186177008AE681 /* LaunchScreen.storyboard */; }; 63CF37202514455300E2DEA1 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44F22186177008AE681 /* Main.storyboard */; }; 63CF37212514455300E2DEA1 /* ultralytics_yolo_logotype.png in Resources */ = {isa = PBXBuildFile; fileRef = 6323C45122186177008AE681 /* ultralytics_yolo_logotype.png */; }; + 730E72CD2BFC43BF000E1F45 /* PostProcessing.swift in Sources */ = {isa = PBXBuildFile; fileRef = 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */; }; + 73A4E7752C0EA36D00218E8F /* HumanModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73A4E7742C0EA36D00218E8F /* HumanModel.swift */; }; + 73A4E7772C0EA37300218E8F /* TrackingModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73A4E7762C0EA37300218E8F /* TrackingModel.swift */; }; + 73FE95F32C3500AC00C6C806 /* PostProcessSegment.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73FE95F22C3500AC00C6C806 /* PostProcessSegment.swift */; }; 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */; }; /* End PBXBuildFile section */ @@ -35,12 +34,11 @@ 636EFCA221E62DD300DE43BC /* VideoCapture.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = ""; }; 636EFCA721E62DD300DE43BC /* AppDelegate.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; 636EFCB821E62E3900DE43BC /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; - 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8l.mlpackage; sourceTree = ""; }; - 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8x.mlpackage; sourceTree = ""; }; - 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8s.mlpackage; sourceTree = ""; }; - 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8m.mlpackage; sourceTree = ""; }; - 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8n.mlpackage; sourceTree = ""; }; 63B8B0A821E62A890026FBC3 /* .gitignore */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .gitignore; sourceTree = ""; }; + 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PostProcessing.swift; sourceTree = ""; }; + 73A4E7742C0EA36D00218E8F /* HumanModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = HumanModel.swift; sourceTree = ""; }; + 73A4E7762C0EA37300218E8F /* TrackingModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TrackingModel.swift; sourceTree = ""; }; + 73FE95F22C3500AC00C6C806 /* PostProcessSegment.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PostProcessSegment.swift; sourceTree = ""; }; 7BCB411721C3096100BFC4D0 /* YOLO.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = YOLO.app; sourceTree = BUILT_PRODUCTS_DIR; }; 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BoundingBoxView.swift; sourceTree = ""; }; 8EDAAA4507D2D23D7FAB827F /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; @@ -60,6 +58,10 @@ 636166E72514438D0054FA7E /* Utilities */ = { isa = PBXGroup; children = ( + 73FE95F22C3500AC00C6C806 /* PostProcessSegment.swift */, + 73A4E7762C0EA37300218E8F /* TrackingModel.swift */, + 73A4E7742C0EA36D00218E8F /* HumanModel.swift */, + 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */, 636166E9251443B20054FA7E /* ThresholdProvider.swift */, 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */, ); @@ -87,11 +89,6 @@ 63A946D8271800E20001C3ED /* Models */ = { isa = PBXGroup; children = ( - 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */, - 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */, - 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */, - 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */, - 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */, ); path = Models; sourceTree = ""; @@ -210,16 +207,15 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( - 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */, - 6381D21C2B7817C200ABA4E8 /* yolov8n.mlpackage in Sources */, + 73FE95F32C3500AC00C6C806 /* PostProcessSegment.swift in Sources */, + 730E72CD2BFC43BF000E1F45 /* PostProcessing.swift in Sources */, 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */, 636166EA251443B20054FA7E /* ThresholdProvider.swift in Sources */, - 6381D2182B7817C200ABA4E8 /* yolov8l.mlpackage in Sources */, - 6381D21A2B7817C200ABA4E8 /* yolov8s.mlpackage in Sources */, - 6381D2192B7817C200ABA4E8 /* yolov8x.mlpackage in Sources */, 636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */, + 73A4E7772C0EA37300218E8F /* TrackingModel.swift in Sources */, 636EFCAA21E62DD300DE43BC /* ViewController.swift in Sources */, 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */, + 73A4E7752C0EA36D00218E8F /* HumanModel.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -350,8 +346,8 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; - CURRENT_PROJECT_VERSION = 0; - DEVELOPMENT_TEAM = 3MR4P6CL3X; + CURRENT_PROJECT_VERSION = 3; + DEVELOPMENT_TEAM = MFN25KNUGJ; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; @@ -360,8 +356,8 @@ "$(inherited)", "@executable_path/Frameworks", ); - MARKETING_VERSION = 8.2.0; - PRODUCT_BUNDLE_IDENTIFIER = com.ultralytics.iDetection; + MARKETING_VERSION = 8.3.0; + PRODUCT_BUNDLE_IDENTIFIER = com.YoloiOSApp; PRODUCT_NAME = "$(TARGET_NAME)"; SUPPORTED_PLATFORMS = "iphoneos iphonesimulator"; SUPPORTS_MACCATALYST = NO; @@ -378,8 +374,8 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; - CURRENT_PROJECT_VERSION = 0; - DEVELOPMENT_TEAM = 3MR4P6CL3X; + CURRENT_PROJECT_VERSION = 3; + DEVELOPMENT_TEAM = MFN25KNUGJ; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; @@ -388,8 +384,8 @@ "$(inherited)", "@executable_path/Frameworks", ); - MARKETING_VERSION = 8.2.0; - PRODUCT_BUNDLE_IDENTIFIER = com.ultralytics.iDetection; + MARKETING_VERSION = 8.3.0; + PRODUCT_BUNDLE_IDENTIFIER = com.YoloiOSApp; PRODUCT_NAME = "$(TARGET_NAME)"; SUPPORTED_PLATFORMS = "iphoneos iphonesimulator"; SUPPORTS_MACCATALYST = NO; diff --git a/YOLO.xcodeproj/xcshareddata/xcschemes/YOLO.xcscheme b/YOLO.xcodeproj/xcshareddata/xcschemes/YOLO.xcscheme new file mode 100644 index 0000000..3bb677d --- /dev/null +++ b/YOLO.xcodeproj/xcshareddata/xcschemes/YOLO.xcscheme @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/YOLO/Assets.xcassets/Focus.imageset/Contents.json b/YOLO/Assets.xcassets/Focus.imageset/Contents.json index dae9ce7..38f29b5 100644 --- a/YOLO/Assets.xcassets/Focus.imageset/Contents.json +++ b/YOLO/Assets.xcassets/Focus.imageset/Contents.json @@ -1,21 +1,23 @@ { - "images": [ + "images" : [ { - "idiom": "universal", - "scale": "1x" + "filename" : "ultralytics_square_focus_image.png", + "idiom" : "universal", + "scale" : "1x" }, { - "idiom": "universal", - "scale": "2x" + "filename" : "ultralytics_square_focus_image 1.png", + "idiom" : "universal", + "scale" : "2x" }, { - "idiom": "universal", - "filename": "ultralytics_square_focus_image.png", - "scale": "3x" + "filename" : "ultralytics_square_focus_image 2.png", + "idiom" : "universal", + "scale" : "3x" } ], - "info": { - "version": 1, - "author": "xcode" + "info" : { + "author" : "xcode", + "version" : 1 } } diff --git a/YOLO/Assets.xcassets/Focus.imageset/ultralytics_square_focus_image 1.png b/YOLO/Assets.xcassets/Focus.imageset/ultralytics_square_focus_image 1.png new file mode 100644 index 0000000..d250520 Binary files /dev/null and b/YOLO/Assets.xcassets/Focus.imageset/ultralytics_square_focus_image 1.png differ diff --git a/YOLO/Assets.xcassets/Focus.imageset/ultralytics_square_focus_image 2.png b/YOLO/Assets.xcassets/Focus.imageset/ultralytics_square_focus_image 2.png new file mode 100644 index 0000000..d250520 Binary files /dev/null and b/YOLO/Assets.xcassets/Focus.imageset/ultralytics_square_focus_image 2.png differ diff --git a/YOLO/Info.plist b/YOLO/Info.plist index c36dbc0..1e34cf5 100644 --- a/YOLO/Info.plist +++ b/YOLO/Info.plist @@ -21,7 +21,7 @@ CFBundleShortVersionString $(MARKETING_VERSION) CFBundleVersion - 24 + 410 ITSAppUsesNonExemptEncryption LSRequiresIPhoneOS @@ -52,6 +52,8 @@ UIStatusBarStyleDefault UISupportedInterfaceOrientations + UIInterfaceOrientationLandscapeLeft + UIInterfaceOrientationLandscapeRight UIInterfaceOrientationPortrait UISupportedInterfaceOrientations~ipad diff --git a/YOLO/LaunchScreen.storyboard b/YOLO/LaunchScreen.storyboard index 5311997..c4280f4 100755 --- a/YOLO/LaunchScreen.storyboard +++ b/YOLO/LaunchScreen.storyboard @@ -1,9 +1,9 @@ - + - + diff --git a/YOLO/Main.storyboard b/YOLO/Main.storyboard index 048e9f6..6380d41 100644 --- a/YOLO/Main.storyboard +++ b/YOLO/Main.storyboard @@ -1,9 +1,10 @@ - - + + - + + @@ -14,19 +15,17 @@ - + - - - + + - + + + + + + + + + + + + + + + - + + + + + + + + + + + + + - + + + + - + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + @@ -210,19 +371,31 @@ + + + + + + + + + + + + @@ -233,15 +406,17 @@ - + - + + + - + diff --git a/YOLO/Utilities/BoundingBoxView.swift b/YOLO/Utilities/BoundingBoxView.swift index b506545..dfb758e 100644 --- a/YOLO/Utilities/BoundingBoxView.swift +++ b/YOLO/Utilities/BoundingBoxView.swift @@ -20,6 +20,9 @@ class BoundingBoxView { /// The layer that displays the label and confidence score for the detected object. let textLayer: CATextLayer + /// The layer that displays the inner text within the bounding box. + let innerTextLayer: CATextLayer + /// Initializes a new BoundingBoxView with configured shape and text layers. init() { shapeLayer = CAShapeLayer() @@ -33,22 +36,32 @@ class BoundingBoxView { textLayer.fontSize = 14 // Set font size for the label text textLayer.font = UIFont(name: "Avenir", size: textLayer.fontSize) // Use Avenir font for labels textLayer.alignmentMode = .center // Center-align the text within the layer + + innerTextLayer = CATextLayer() + innerTextLayer.isHidden = true // Initially hidden; shown with label when a detection occurs + innerTextLayer.contentsScale = UIScreen.main.scale // Ensure the text is sharp on retina displays + innerTextLayer.fontSize = 12 // Set font size for the inner text + innerTextLayer.font = UIFont(name: "Avenir", size: innerTextLayer.fontSize) // Use Avenir font for inner text + innerTextLayer.alignmentMode = .left // Left-align the text within the layer + innerTextLayer.isWrapped = true // Wrap the text to fit within the layer } - /// Adds the bounding box and text layers to a specified parent layer. - /// - Parameter parent: The CALayer to which the bounding box and text layers will be added. + /// Adds the bounding box, text, and inner text layers to a specified parent layer. + /// - Parameter parent: The CALayer to which the bounding box, text, and inner text layers will be added. func addToLayer(_ parent: CALayer) { parent.addSublayer(shapeLayer) parent.addSublayer(textLayer) + parent.addSublayer(innerTextLayer) } - /// Updates the bounding box and label to be visible with specified properties. + /// Updates the bounding box, label, and inner text to be visible with specified properties. /// - Parameters: /// - frame: The CGRect frame defining the bounding box's size and position. /// - label: The text label to display (e.g., object class and confidence). /// - color: The color of the bounding box stroke and label background. /// - alpha: The opacity level for the bounding box stroke and label background. - func show(frame: CGRect, label: String, color: UIColor, alpha: CGFloat) { + /// - innerTexts: The text to display inside the bounding box. + func show(frame: CGRect, label: String, color: UIColor, alpha: CGFloat, innerTexts: String) { CATransaction.setDisableActions(true) // Disable implicit animations let path = UIBezierPath(roundedRect: frame, cornerRadius: 6.0) // Rounded rectangle for the bounding box @@ -69,11 +82,24 @@ class BoundingBoxView { let textSize = CGSize(width: textRect.width + 12, height: textRect.height) // Add padding to the text size let textOrigin = CGPoint(x: frame.origin.x - 2, y: frame.origin.y - textSize.height - 2) // Position above the bounding box textLayer.frame = CGRect(origin: textOrigin, size: textSize) // Set the text layer frame + + if !innerTexts.isEmpty { + innerTextLayer.string = innerTexts // Set the inner text + innerTextLayer.backgroundColor = UIColor.clear.cgColor // No background color + innerTextLayer.isHidden = false // Make the inner text layer visible + innerTextLayer.foregroundColor = UIColor.red.cgColor // Set text color + innerTextLayer.frame = CGRect(x: frame.origin.x + 4, y: frame.origin.y + 4, width: frame.width / 2 - 8, height: frame.height - 8) + // Set the inner text layer frame + } else { + innerTextLayer.isHidden = true // Hide the inner text layer if innerTexts is empty + } + } - /// Hides the bounding box and text layers. + /// Hides the bounding box, text, and inner text layers. func hide() { shapeLayer.isHidden = true textLayer.isHidden = true + innerTextLayer.isHidden = true } } diff --git a/YOLO/Utilities/HumanModel.swift b/YOLO/Utilities/HumanModel.swift new file mode 100644 index 0000000..99c21a0 --- /dev/null +++ b/YOLO/Utilities/HumanModel.swift @@ -0,0 +1,150 @@ +// Ultralytics YOLO 🚀 - AGPL-3.0 License +// +// HumanModel for Ultralytics YOLO App +// This struct is designed to turn the inference results of the YOLOv8-Human model into a manageable DataModel of human feature values ​​in the Ultralytics YOLO app. When in tracking mode, this struct averages the feature values ​​of a given individual across frames to a stable value. +// This struct automatically analyzes the boxes, scores, and feature values ​​provided to the update function to create a human model.// Licensed under AGPL-3.0. For commercial use, refer to Ultralytics licensing: https://ultralytics.com/license +// Access the source code: https://github.com/ultralytics/yolo-ios-app + + +import Foundation +import UIKit + +let updateFrequency: Int = 120 + +struct Person { + var index: Int + var box: CGRect = .zero + + var score: Float = 0 + var weight: Float = 0 + var height: Float = 0 + + var age: Int = 0 + + var gender: String = "female" + var genderConfidence: Float = 0 + var race: String = "asian" + var raceConfidence: Float = 0 + + var listCount: Int = 0 + var scoreRawList: [Float] = [] + var weightRawList: [Float] = [] + var heightRawList: [Float] = [] + var ageRawList: [Float] = [] + var maleRawList: [Float] = [] + var femaleRawList: [Float] = [] + var asianRawList: [Float] = [] + var whiteRawList: [Float] = [] + var middleEasternRawList: [Float] = [] + var indianRawList: [Float] = [] + var latinoRawList: [Float] = [] + var blackRawList: [Float] = [] + + var trackedBox: CGRect? + var color:UIColor + + var unDetectedCounter: Int = 0 + var stable = false + + init(index: Int) { + self.index = index + self.color = UIColor(red: CGFloat.random(in: 0...1), + green: CGFloat.random(in: 0...1), + blue: CGFloat.random(in: 0...1), + alpha: 0.6) + } + + mutating func update(box:CGRect, score:Float, features:[Float]) { + self.box = box + if scoreRawList.count >= updateFrequency { + scoreRawList.removeFirst() + weightRawList.removeFirst() + heightRawList.removeFirst() + ageRawList.removeFirst() + maleRawList.removeFirst() + femaleRawList.removeFirst() + asianRawList.removeFirst() + whiteRawList.removeFirst() + middleEasternRawList.removeFirst() + indianRawList.removeFirst() + latinoRawList.removeFirst() + blackRawList.removeFirst() + } + + + self.scoreRawList.append(score) + self.weightRawList.append(features[0]) + self.heightRawList.append(features[1]) + self.ageRawList.append(features[2]) + self.femaleRawList.append(features[3]) + self.maleRawList.append(features[4]) + self.asianRawList.append(features[5]) + self.whiteRawList.append(features[6]) + self.middleEasternRawList.append(features[7]) + self.indianRawList.append(features[8]) + self.latinoRawList.append(features[9]) + self.blackRawList.append(features[10]) + calcurateFeatures() + + self.unDetectedCounter = 0 + } + + private mutating func calcurateFeatures() { + + self.score = average(of: scoreRawList) + self.weight = average(of: weightRawList) + self.height = average(of: heightRawList) + self.age = Int(round(average(of: ageRawList))) + let femaleAverage = average(of: femaleRawList) + let maleAverage = average(of: maleRawList) + let genderCandidates = [femaleAverage,maleAverage] + var genderMaxIndex = 0 + var genderMaxValue = genderCandidates[0] + + for (genderIndex, genderValue) in genderCandidates.dropFirst().enumerated() { + if genderValue > genderMaxValue { + genderMaxValue = genderValue + genderMaxIndex = genderIndex + 1 + } + } + + self.gender = genders[genderMaxIndex] + self.genderConfidence = genderMaxValue + + let asianAverage = average(of: asianRawList) + let whiteAverage = average(of: whiteRawList) + let middleEasternAverage = average(of: middleEasternRawList) + let indianAverage = average(of: indianRawList) + let latinoAverage = average(of: latinoRawList) + let blackAverage = average(of: blackRawList) + + let raceCandidates = [asianAverage,whiteAverage,middleEasternAverage,indianAverage,latinoAverage,blackAverage] + var raceMaxIndex = 0 + var raceMaxValue = raceCandidates[0] + + for (raceIndex, raceValue) in raceCandidates.dropFirst().enumerated() { + if raceValue > raceMaxValue { + raceMaxValue = raceValue + raceMaxIndex = raceIndex + 1 + } + } + self.race = races[raceMaxIndex] + self.raceConfidence = raceMaxValue + } + + func average(of numbers: [Float]) -> Float { + guard !numbers.isEmpty else { + return 0 + } + var sum: Float = 0 + for number in numbers { + sum += number + } + return sum / Float(numbers.count) + } + +} + +let genders = ["female", "male"] +let races = ["asian", "white", "middle eastern", "indian", "latino", "black"] + diff --git a/YOLO/Utilities/PostProcessSegment.swift b/YOLO/Utilities/PostProcessSegment.swift new file mode 100644 index 0000000..30da29a --- /dev/null +++ b/YOLO/Utilities/PostProcessSegment.swift @@ -0,0 +1,254 @@ + +// Ultralytics YOLO 🚀 - AGPL-3.0 License +// +// PostProcessSegment for Ultralytics YOLO App + +// These functions are designed to post-process inference results from the YOLOv8-Segment model in the Ultralytics YOLO app to display segment masks. + +// Access the source code: https://github.com/ultralytics/yolo-ios-app + import UIKit + import Vision + import Accelerate + import MetalPerformanceShaders + + extension ViewController { + func setupMaskLayer() { + let width = videoPreview.bounds.width + let height = videoPreview.bounds.height + + var ratio: CGFloat = 1.0 + if videoCapture.captureSession.sessionPreset == .photo { + ratio = (4.0 / 3.0) + } else { + ratio = (16.0 / 9.0) + } + var offSet = CGFloat.zero + var margin = CGFloat.zero + if view.bounds.width < view.bounds.height { + offSet = height / ratio + margin = (offSet - self.videoPreview.bounds.width) / 2 + self.maskLayer.frame = CGRect(x:-margin, y: 0, width: offSet, height: self.videoPreview.bounds.height) + } else { + offSet = width / ratio + margin = (offSet - self.videoPreview.bounds.height) / 2 + self.maskLayer.frame = CGRect(x:0, y: -margin, width: self.videoPreview.bounds.width, height: offSet) + + } + } + + func getBoundingBoxesAndMasks(feature: MLMultiArray, confidenceThreshold: Float, iouThreshold: Float) -> [(CGRect, Int, Float, MLMultiArray)] { + let numAnchors = feature.shape[2].intValue + let numFeatures = feature.shape[1].intValue + let boxFeatureLength = 4 + let maskConfidenceLength = 32 + let numClasses = numFeatures - boxFeatureLength - maskConfidenceLength + + var results = [(CGRect, Float, Int, MLMultiArray)]() + let featurePointer = feature.dataPointer.assumingMemoryBound(to: Float.self) + + let queue = DispatchQueue.global(qos: .userInitiated) + let resultsQueue = DispatchQueue(label: "resultsQueue", attributes: .concurrent) + + DispatchQueue.concurrentPerform(iterations: numAnchors) { j in + let baseOffset = j + let x = featurePointer[baseOffset] + let y = featurePointer[numAnchors + baseOffset] + let width = featurePointer[2 * numAnchors + baseOffset] + let height = featurePointer[3 * numAnchors + baseOffset] + + let boxWidth = CGFloat(width) + let boxHeight = CGFloat(height) + let boxX = CGFloat(x - width / 2) + let boxY = CGFloat(y - height / 2) + + let boundingBox = CGRect(x: boxX, y: boxY, width: boxWidth, height: boxHeight) + + var classProbs = [Float](repeating: 0, count: numClasses) + classProbs.withUnsafeMutableBufferPointer { classProbsPointer in + vDSP_mtrans(featurePointer + 4 * numAnchors + baseOffset, numAnchors, classProbsPointer.baseAddress!, 1, 1, vDSP_Length(numClasses)) + } + var maxClassValue: Float = 0 + var maxClassIndex: vDSP_Length = 0 + vDSP_maxvi(classProbs, 1, &maxClassValue, &maxClassIndex, vDSP_Length(numClasses)) + + if maxClassValue > confidenceThreshold { + let maskProbsPointer = featurePointer + (4 + numClasses) * numAnchors + baseOffset + let maskProbs = try! MLMultiArray(shape: [NSNumber(value: maskConfidenceLength)], dataType: .float32) + for i in 0.. $1.0.size.width * $1.0.size.height } + + var newLayers: [CALayer] = [] + + for (box, classIndex, conf, masksIn) in sortedObjects { + group.enter() + DispatchQueue.global(qos: .userInitiated).async { + defer { group.leave() } + if let maskImage = self.generateColoredMaskImage(from: masksIn, protos: maskArray, in: self.maskLayer.bounds.size, colorIndex: classIndex, boundingBox: box) { + DispatchQueue.main.async { + let adjustedBox = self.adjustBox(box, toFitIn: self.maskLayer.bounds.size) + + let maskImageLayer = CALayer() + maskImageLayer.frame = adjustedBox + maskImageLayer.contents = maskImage + maskImageLayer.opacity = 0.5 + newLayers.append(maskImageLayer) + } + } + } + } + + group.notify(queue: .main) { + self.removeAllMaskSubLayers() + newLayers.forEach { self.maskLayer.addSublayer($0) } + + print("Processing Time: \(Date().timeIntervalSince(startTime)) seconds") + } + } + + func generateColoredMaskImage(from masksIn: MLMultiArray, protos: MLMultiArray, in size: CGSize, colorIndex: Int, boundingBox: CGRect) -> CGImage? { + let maskWidth = protos.shape[3].intValue + let maskHeight = protos.shape[2].intValue + let maskChannels = protos.shape[1].intValue + + guard protos.shape.count == 4, protos.shape[0].intValue == 1, masksIn.shape.count == 1, masksIn.shape[0].intValue == maskChannels else { + print("Invalid shapes for protos or masksIn") + return nil + } + + let masksPointer = masksIn.dataPointer.assumingMemoryBound(to: Float.self) + let protosPointer = protos.dataPointer.assumingMemoryBound(to: Float.self) + + let masksPointerOutput = UnsafeMutablePointer.allocate(capacity: maskHeight * maskWidth) + vDSP_mmul(masksPointer, 1, protosPointer, 1, masksPointerOutput, 1, vDSP_Length(1), vDSP_Length(maskHeight * maskWidth), vDSP_Length(maskChannels)) + + let threshold: Float = 0.5 + let maskColorIndex = colorIndex % 20 + let color = colorsForMask[colorIndex] + let red = UInt8(color.red) + let green = UInt8(color.green) + let blue = UInt8(color.blue) + + var maskPixels = [UInt8](repeating: 0, count: maskHeight * maskWidth * 4) + for y in 0.. threshold { + let pixelIndex = index * 4 + maskPixels[pixelIndex] = red + maskPixels[pixelIndex + 1] = green + maskPixels[pixelIndex + 2] = blue + maskPixels[pixelIndex + 3] = 255 + } + } + } + + let maskDataPointer = UnsafeMutablePointer.allocate(capacity: maskPixels.count) + maskDataPointer.initialize(from: maskPixels, count: maskPixels.count) + + let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.premultipliedLast.rawValue) + let colorSpace = CGColorSpaceCreateDeviceRGB() + + let maskDataProvider = CGDataProvider(dataInfo: nil, data: maskDataPointer, size: maskPixels.count) { _, data, _ in + data.deallocate() + } + + guard let maskCGImage = CGImage(width: maskWidth, height: maskHeight, bitsPerComponent: 8, bitsPerPixel: 32, bytesPerRow: maskWidth * 4, space: colorSpace, bitmapInfo: bitmapInfo, provider: maskDataProvider!, decode: nil, shouldInterpolate: true, intent: .defaultIntent) else { + masksPointerOutput.deallocate() + return nil + } + + let maskCIImage = CIImage(cgImage: maskCGImage) + let scaledCIImage = maskCIImage.transformed(by: CGAffineTransform(scaleX: size.width / CGFloat(maskWidth), y: size.height / CGFloat(maskHeight))) + let invertedY = size.height - (boundingBox.origin.y + boundingBox.height) * size.height / 640.0 + let cropRect = CGRect(x: boundingBox.origin.x * size.width / 640.0, y: invertedY, width: boundingBox.width * size.width / 640.0, height: boundingBox.height * size.height / 640.0) + + let croppedCIImage = scaledCIImage.cropped(to: cropRect) + + let ciContext = CIContext() + guard let cgImage = ciContext.createCGImage(croppedCIImage, from: cropRect) else { + masksPointerOutput.deallocate() + return nil + } + + masksPointerOutput.deallocate() + + return cgImage + } + + func removeAllMaskSubLayers() { + self.maskLayer.sublayers?.forEach { layer in + layer.removeFromSuperlayer() + } + self.maskLayer.sublayers = nil + } + + + func adjustBox(_ box: CGRect, toFitIn containerSize: CGSize) -> CGRect { + let xScale = containerSize.width / 640.0 + let yScale = containerSize.height / 640.0 + return CGRect(x: box.origin.x * xScale, y: box.origin.y * yScale, width: box.size.width * xScale, height: box.size.height * yScale) + } + } + + extension UIColor { + func toRGBComponents() -> (red: UInt8, green: UInt8, blue: UInt8)? { + var red: CGFloat = 0 + var green: CGFloat = 0 + var blue: CGFloat = 0 + var alpha: CGFloat = 0 + + let success = self.getRed(&red, green: &green, blue: &blue, alpha: &alpha) + + if success { + let redUInt8 = UInt8(red * 255.0) + let greenUInt8 = UInt8(green * 255.0) + let blueUInt8 = UInt8(blue * 255.0) + return (red: redUInt8, green: greenUInt8, blue: blueUInt8) + } else { + return nil + } + } + } diff --git a/YOLO/Utilities/PostProcessing.swift b/YOLO/Utilities/PostProcessing.swift new file mode 100644 index 0000000..db25d12 --- /dev/null +++ b/YOLO/Utilities/PostProcessing.swift @@ -0,0 +1,103 @@ +// Ultralytics YOLO 🚀 - AGPL-3.0 License +// +// PostProcessing for Ultralytics YOLO App +// This feature is designed to post-process the output of a YOLOv8 model within the Ultralytics YOLO app to extract high-confidence objects. +// Output high confidence boxes and their corresponding feature values using Non max suppression. +// Licensed under AGPL-3.0. For commercial use, refer to Ultralytics licensing: https://ultralytics.com/license +// Access the source code: https://github.com/ultralytics/yolo-ios-app + + +import Foundation +import CoreML +import Vision + +func nonMaxSuppression(boxes: [CGRect], scores: [Float], threshold: Float) -> [Int] { + let sortedIndices = scores.enumerated().sorted { $0.element > $1.element }.map { $0.offset } + var selectedIndices = [Int]() + var activeIndices = [Bool](repeating: true, count: boxes.count) + + for i in 0.. CGFloat(threshold) * min(boxes[idx].area, boxes[otherIdx].area) { + activeIndices[otherIdx] = false + } + } + } + } + } + return selectedIndices +} + +// Human model's output [1,15,8400] to [(Box, Confidence, HumanFeatures)] + +func PostProcessHuman(prediction: MLMultiArray, confidenceThreshold: Float, iouThreshold: Float) -> [(CGRect, Float, [Float])] { + let numAnchors = prediction.shape[2].intValue + var boxes = [CGRect]() + var scores = [Float]() + var features = [[Float]]() + let featurePointer = UnsafeMutablePointer(OpaquePointer(prediction.dataPointer)) + let lock = DispatchQueue(label: "com.example.lock") + + DispatchQueue.concurrentPerform(iterations: numAnchors) { j in + let confIndex = 4 * numAnchors + j + let confidence = featurePointer[confIndex] + if confidence > confidenceThreshold { + let x = featurePointer[j] + let y = featurePointer[numAnchors + j] + let width = featurePointer[2 * numAnchors + j] + let height = featurePointer[3 * numAnchors + j] + + let boxWidth = CGFloat(width) + let boxHeight = CGFloat(height) + let boxX = CGFloat(x - width / 2) + let boxY = CGFloat(y - height / 2) + + let boundingBox = CGRect(x: boxX, y: boxY, width: boxWidth, height: boxHeight) + + var boxFeatures = [Float](repeating: 0, count: 11) + for k in 0..<11 { + let key = (5 + k) * numAnchors + j + boxFeatures[k] = featurePointer[key] + } + + lock.sync { + boxes.append(boundingBox) + scores.append(confidence) + features.append(boxFeatures) + } + } + } + + let selectedIndices = nonMaxSuppression(boxes: boxes, scores: scores, threshold: iouThreshold) + var selectedBoxesAndFeatures = [(CGRect, Float, [Float])]() + + for idx in selectedIndices { + selectedBoxesAndFeatures.append((boxes[idx], scores[idx], features[idx])) + } + print(selectedBoxesAndFeatures) + return selectedBoxesAndFeatures +} + +func toPerson(boxesAndScoresAndFeatures:[(CGRect, Float, [Float])]) -> [Person] { + var persons = [Person]() + for detectedHuman in boxesAndScoresAndFeatures { + var person = Person(index: -1) + person.update(box: detectedHuman.0, score: detectedHuman.1, features: detectedHuman.2) + person.color = .red + persons.append(person) + } + return persons +} + +extension CGRect { + var area: CGFloat { + return width * height + } +} + diff --git a/YOLO/Utilities/TrackingModel.swift b/YOLO/Utilities/TrackingModel.swift new file mode 100644 index 0000000..a4f5dc5 --- /dev/null +++ b/YOLO/Utilities/TrackingModel.swift @@ -0,0 +1,126 @@ +// Ultralytics YOLO 🚀 - AGPL-3.0 License +// +// HumanModel for Ultralytics YOLO App + +// This class is designed to track and identify the same person across frames using the inference results of the YOLOv8-Human model in the Ultralytics YOLO app. +// The tack function is a simple tracking algorithm that tracks boxes of the same person based on box overlap across frames. +// Access the source code: https://github.com/ultralytics/yolo-ios-app + +import Foundation +import Vision +import Accelerate + +class TrackingModel { + var persons = [Person]() + var personIndex:Int = 0 + var recent:[(CGRect, Float, [Float])] = [] + + func track(boxesAndScoresAndFeatures:[(CGRect, Float, [Float])]) -> [Person] { + + if persons.isEmpty { + for detectedHuman in boxesAndScoresAndFeatures { + var person = Person(index: personIndex) + person.update(box: detectedHuman.0, score: detectedHuman.1, features: detectedHuman.2) + personIndex += 1 + persons.append(person) + + } + return persons + } + + var unDetectedPersonIndexes:[Int] = [] + var usedDetectedIndex:Set = Set() + + for (pi, person) in persons.enumerated() { + var bestIOU:CGFloat = 0 + var bestIndex = 0 + + for (i, detected) in boxesAndScoresAndFeatures.enumerated() { + let IoU = overlapPercentage(rect1: person.box, rect2: detected.0) + if IoU > bestIOU { + bestIOU = IoU + bestIndex = i + } + } + if bestIOU >= 50 { + let detectedPerson = boxesAndScoresAndFeatures[bestIndex] + persons[pi].update(box: detectedPerson.0, score: detectedPerson.1, features: detectedPerson.2) + usedDetectedIndex.insert(bestIndex) + } else { + unDetectedPersonIndexes.append(pi) + } + } + + let sortedIndices = unDetectedPersonIndexes.sorted(by: >) + for index in sortedIndices { + persons[index].unDetectedCounter += 1 + } + + for (index, det) in boxesAndScoresAndFeatures.enumerated() { + if !usedDetectedIndex.contains(index) { + var person = Person(index: personIndex) + person.update(box: det.0, score: det.1, features: det.2) + personIndex += 1 + persons.append(person) + } + } + + persons = removeOverlappingRects(persons: persons) + + var personsToShow: [Person] = [] + var removePersonIndexes: [Int] = [] + for (pindex, person) in persons.enumerated() { + if person.unDetectedCounter == 0 { + personsToShow.append(person) + } else if person.unDetectedCounter >= 15 { + removePersonIndexes.append(pindex) + } + } + let sortedRemoveIndices = removePersonIndexes.sorted(by: >) + for index in sortedRemoveIndices { + persons.remove(at: index) + } + + return personsToShow + + } +} + +func overlapPercentage(rect1: CGRect, rect2: CGRect) -> CGFloat { + let intersection = rect1.intersection(rect2) + + if intersection.isNull { + return 0.0 + } + + let intersectionArea = intersection.width * intersection.height + + let rect1Area = rect1.width * rect1.height + + let overlapPercentage = (intersectionArea / rect1Area) * 100 + + return overlapPercentage +} + +func removeOverlappingRects(persons: [Person], threshold: CGFloat = 90.0) -> [Person] { + var filteredPersons = persons + var index = 0 + + while index < filteredPersons.count { + var shouldRemove = false + for j in (index + 1)..= threshold { + shouldRemove = true + break + } + } + if shouldRemove { + filteredPersons.remove(at: index) + } else { + index += 1 + } + } + + return filteredPersons +} diff --git a/YOLO/VideoCapture.swift b/YOLO/VideoCapture.swift index 79aaf99..0c9db9c 100644 --- a/YOLO/VideoCapture.swift +++ b/YOLO/VideoCapture.swift @@ -43,7 +43,7 @@ public class VideoCapture: NSObject { let videoOutput = AVCaptureVideoDataOutput() var cameraOutput = AVCapturePhotoOutput() let queue = DispatchQueue(label: "camera-queue") - + // Configures the camera and capture session with optional session presets. public func setUp(sessionPreset: AVCaptureSession.Preset = .hd1280x720, completion: @escaping (Bool) -> Void) { queue.async { @@ -86,9 +86,22 @@ public class VideoCapture: NSObject { if captureSession.canAddOutput(cameraOutput) { captureSession.addOutput(cameraOutput) } - - videoOutput.connection(with: .video)?.videoOrientation = .portrait - + switch UIDevice.current.orientation { + case .portrait: + videoOutput.connection(with: .video)?.videoOrientation = .portrait + case .portraitUpsideDown: + videoOutput.connection(with: .video)?.videoOrientation = .portraitUpsideDown + case .landscapeRight: + videoOutput.connection(with: .video)?.videoOrientation = .landscapeLeft + case .landscapeLeft: + videoOutput.connection(with: .video)?.videoOrientation = .landscapeRight + default: + videoOutput.connection(with: .video)?.videoOrientation = .portrait + } + + if let connection = videoOutput.connection(with: .video) { + self.previewLayer?.connection?.videoOrientation = connection.videoOrientation + } do { try captureDevice.lockForConfiguration() captureDevice.focusMode = .continuousAutoFocus @@ -119,6 +132,24 @@ public class VideoCapture: NSObject { captureSession.stopRunning() } } + + func updateVideoOrientation() { + guard let connection = videoOutput.connection(with: .video) else { return } + switch UIDevice.current.orientation { + case .portrait: + connection.videoOrientation = .portrait + case .portraitUpsideDown: + connection.videoOrientation = .portraitUpsideDown + case .landscapeRight: + connection.videoOrientation = .landscapeLeft + case .landscapeLeft: + connection.videoOrientation = .landscapeRight + default: + return + } + self.previewLayer?.connection?.videoOrientation = connection.videoOrientation + } + } // Extension to handle AVCaptureVideoDataOutputSampleBufferDelegate events. diff --git a/YOLO/ViewController.swift b/YOLO/ViewController.swift index 9ab212e..c346287 100644 --- a/YOLO/ViewController.swift +++ b/YOLO/ViewController.swift @@ -23,20 +23,33 @@ class ViewController: UIViewController { @IBOutlet var videoPreview: UIView! @IBOutlet var View0: UIView! @IBOutlet var segmentedControl: UISegmentedControl! + @IBOutlet weak var taskSegmentControl: UISegmentedControl! + @IBOutlet weak var trackingLabel: UILabel! + @IBOutlet weak var trackingSwitch: UISwitch! @IBOutlet var playButtonOutlet: UIBarButtonItem! @IBOutlet var pauseButtonOutlet: UIBarButtonItem! @IBOutlet var slider: UISlider! @IBOutlet var sliderConf: UISlider! + @IBOutlet weak var sliderConfLandScape: UISlider! @IBOutlet var sliderIoU: UISlider! + @IBOutlet weak var sliderIoULandScape: UISlider! @IBOutlet weak var labelName: UILabel! @IBOutlet weak var labelFPS: UILabel! @IBOutlet weak var labelZoom: UILabel! @IBOutlet weak var labelVersion: UILabel! @IBOutlet weak var labelSlider: UILabel! @IBOutlet weak var labelSliderConf: UILabel! + @IBOutlet weak var labelSliderConfLandScape: UILabel! @IBOutlet weak var labelSliderIoU: UILabel! + @IBOutlet weak var labelSliderIoULandScape: UILabel! + @IBOutlet weak var playButtonLandScape: UIButton! + @IBOutlet weak var pauseButtonLandScape: UIButton! + @IBOutlet weak var shareButtonLandScape: UIButton! @IBOutlet weak var activityIndicator: UIActivityIndicatorView! - + + @IBOutlet weak var toolbar: UIToolbar! + @IBOutlet weak var forcus: UIImageView! + var maskLayer: CALayer = CALayer() let selection = UISelectionFeedbackGenerator() var detector = try! VNCoreMLModel(for: mlModel) var session: AVCaptureSession! @@ -49,12 +62,12 @@ class ViewController: UIViewController { var t3 = CACurrentMediaTime() // FPS start var t4 = 0.0 // FPS dt smoothed // var cameraOutput: AVCapturePhotoOutput! - + // Developer mode let developerMode = UserDefaults.standard.bool(forKey: "developer_mode") // developer mode selected in settings let save_detections = false // write every detection to detections.txt let save_frames = false // write every frame to frames.txt - + lazy var visionRequest: VNCoreMLRequest = { let request = VNCoreMLRequest(model: detector, completionHandler: { [weak self] request, error in @@ -64,54 +77,205 @@ class ViewController: UIViewController { request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop return request }() + + enum Task { + case detect + case human + case seg + } + + var task: Task = .detect + var confidenceThreshold:Float = 0.25 + var iouThreshold:Float = 0.4 + var tracking = false + var tracker = TrackingModel() + var segmentCount = 0 + + var screenshotImageView:UIImageView? override func viewDidLoad() { super.viewDidLoad() slider.value = 30 + taskSegmentControl.selectedSegmentIndex = 0 setLabels() setUpBoundingBoxViews() + setUpOrientationChangeNotification() startVideo() // setModel() } + + override func viewWillTransition(to size: CGSize, with coordinator: any UIViewControllerTransitionCoordinator) { + super.viewWillTransition(to: size, with: coordinator) + + if size.width > size.height { + labelSliderConf.isHidden = true + sliderConf.isHidden = true + labelSliderIoU.isHidden = true + sliderIoU.isHidden = true + labelSliderConfLandScape.isHidden = false + sliderConfLandScape.isHidden = false + labelSliderIoULandScape.isHidden = false + sliderIoULandScape.isHidden = false + toolbar.isHidden = true + playButtonLandScape.isHidden = false + pauseButtonLandScape.isHidden = false + shareButtonLandScape.isHidden = false + + } else { + labelSliderConf.isHidden = false + sliderConf.isHidden = false + labelSliderIoU.isHidden = false + sliderIoU.isHidden = false + labelSliderConfLandScape.isHidden = true + sliderConfLandScape.isHidden = true + labelSliderIoULandScape.isHidden = true + sliderIoULandScape.isHidden = true + toolbar.isHidden = false + playButtonLandScape.isHidden = true + pauseButtonLandScape.isHidden = true + shareButtonLandScape.isHidden = true + } + self.videoCapture.previewLayer?.frame = CGRect(x: 0, y: 0, width: size.width, height: size.height) + coordinator.animate(alongsideTransition: { context in + }, completion: { context in + self.setupMaskLayer() + }) + } + + private func setUpOrientationChangeNotification() { + NotificationCenter.default.addObserver(self, selector: #selector(orientationDidChange), name: UIDevice.orientationDidChangeNotification, object: nil) + } + + @objc func orientationDidChange() { + videoCapture.updateVideoOrientation() + } + @IBAction func vibrate(_ sender: Any) { selection.selectionChanged() } - + @IBAction func indexChanged(_ sender: Any) { + self.removeAllMaskSubLayers() selection.selectionChanged() activityIndicator.startAnimating() - - /// Switch model - switch segmentedControl.selectedSegmentIndex { - case 0: - self.labelName.text = "YOLOv8n" - mlModel = try! yolov8n(configuration: .init()).model - case 1: - self.labelName.text = "YOLOv8s" - mlModel = try! yolov8s(configuration: .init()).model - case 2: - self.labelName.text = "YOLOv8m" - mlModel = try! yolov8m(configuration: .init()).model - case 3: - self.labelName.text = "YOLOv8l" - mlModel = try! yolov8l(configuration: .init()).model - case 4: - self.labelName.text = "YOLOv8x" - mlModel = try! yolov8x(configuration: .init()).model - default: - break - } setModel() setUpBoundingBoxViews() activityIndicator.stopAnimating() } - + func setModel() { + + /// Switch model + switch task { + case .detect: + switch segmentedControl.selectedSegmentIndex { + case 0: + self.labelName.text = "YOLOv8n" + mlModel = try! yolov8n(configuration: .init()).model + case 1: + self.labelName.text = "YOLOv8s" + mlModel = try! yolov8s(configuration: .init()).model + case 2: + self.labelName.text = "YOLOv8m" + mlModel = try! yolov8m(configuration: .init()).model + case 3: + self.labelName.text = "YOLOv8l" + mlModel = try! yolov8l(configuration: .init()).model + case 4: + self.labelName.text = "YOLOv8x" + mlModel = try! yolov8x(configuration: .init()).model + default: + break + } + case .human: + switch segmentedControl.selectedSegmentIndex { + case 0: + self.labelName.text = "YOLOv8n" + if #available(iOS 15.0, *) { + mlModel = try! yolov8n_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 1: + self.labelName.text = "YOLOv8s" + if #available(iOS 15.0, *) { + mlModel = try! yolov8s_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 2: + self.labelName.text = "YOLOv8m" + if #available(iOS 15.0, *) { + mlModel = try! yolov8m_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 3: + self.labelName.text = "YOLOv8l" + if #available(iOS 15.0, *) { + mlModel = try! yolov8l_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 4: + self.labelName.text = "YOLOv8x" + if #available(iOS 15.0, *) { + mlModel = try! yolov8x_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + + default: + break + } + case .seg: + switch segmentedControl.selectedSegmentIndex { + case 0: + self.labelName.text = "YOLOv8n" + if #available(iOS 15.0, *) { + mlModel = try! yolov8n_seg(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 1: + self.labelName.text = "YOLOv8s" + if #available(iOS 15.0, *) { + mlModel = try! yolov8s_seg(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 2: + self.labelName.text = "YOLOv8m" + if #available(iOS 15.0, *) { + mlModel = try! yolov8m_seg(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 3: + self.labelName.text = "YOLOv8l" + if #available(iOS 15.0, *) { + mlModel = try! yolov8l_seg(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 4: + self.labelName.text = "YOLOv8x" + if #available(iOS 15.0, *) { + mlModel = try! yolov8x_seg(configuration: .init()).model + } else { + // Fallback on earlier versions + } + default:break + } + + } + DispatchQueue.global(qos: .userInitiated).async { [self] in + /// VNCoreMLModel detector = try! VNCoreMLModel(for: mlModel) detector.featureProvider = ThresholdProvider() - + /// VNCoreMLRequest let request = VNCoreMLRequest(model: detector, completionHandler: { [weak self] request, error in self?.processObservations(for: request, error: error) @@ -121,20 +285,69 @@ class ViewController: UIViewController { t2 = 0.0 // inference dt smoothed t3 = CACurrentMediaTime() // FPS start t4 = 0.0 // FPS dt smoothed + } } - + /// Update thresholds from slider values @IBAction func sliderChanged(_ sender: Any) { + self.confidenceThreshold = sliderConf.value + self.iouThreshold = sliderIoU.value let conf = Double(round(100 * sliderConf.value)) / 100 let iou = Double(round(100 * sliderIoU.value)) / 100 self.labelSliderConf.text = String(conf) + " Confidence Threshold" self.labelSliderIoU.text = String(iou) + " IoU Threshold" detector.featureProvider = ThresholdProvider(iouThreshold: iou, confidenceThreshold: conf) } - + + @IBAction func taskSegmentControlChanged(_ sender: UISegmentedControl) { + self.removeAllMaskSubLayers() + + switch sender.selectedSegmentIndex { + case 0: + if self.task != .detect { + self.trackingLabel.isHidden = true + self.trackingSwitch.isHidden = true + self.task = .detect + self.setModel() + } + case 1: + if self.task != .human { + self.task = .human + for i in 0.. 19 { + count = 0 + } + if colors[label] == nil { // if key not in dict + colors[label] = color + } + } + + count = 0 + for (key,color) in colors { + let color = ultralyticsColorsolors[count] + count += 1 + if count > 19 { + count = 0 + } + guard let colorForMask = color.toRGBComponents() else {fatalError()} + colorsForMask.append(colorForMask) } } + + } - + func startVideo() { videoCapture = VideoCapture() videoCapture.delegate = self - + videoCapture.setUp(sessionPreset: .photo) { success in // .hd4K3840x2160 or .photo (4032x3024) Warning: 4k may not work on all devices i.e. 2019 iPod if success { @@ -254,22 +511,24 @@ class ViewController: UIViewController { self.videoPreview.layer.addSublayer(previewLayer) self.videoCapture.previewLayer?.frame = self.videoPreview.bounds // resize preview layer } + self.setupMaskLayer() + self.videoPreview.layer.addSublayer(self.maskLayer) // Add the bounding box layers to the UI, on top of the video preview. for box in self.boundingBoxViews { box.addToLayer(self.videoPreview.layer) } - + // Once everything is set up, we can start capturing live video. self.videoCapture.start() } } } - + func predict(sampleBuffer: CMSampleBuffer) { if currentBuffer == nil, let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) { currentBuffer = pixelBuffer - + /// - Tag: MappingOrientation // The frame is always oriented based on the camera sensor, // so in most cases Vision needs to rotate it for the model to work as expected. @@ -280,16 +539,16 @@ class ViewController: UIViewController { case .portraitUpsideDown: imageOrientation = .down case .landscapeLeft: - imageOrientation = .left + imageOrientation = .up case .landscapeRight: - imageOrientation = .right + imageOrientation = .up case .unknown: - print("The device orientation is unknown, the predictions may be affected") - fallthrough + imageOrientation = .up + default: imageOrientation = .up } - + // Invoke a VNRequestHandler with that image let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: imageOrientation, options: [:]) if UIDevice.current.orientation != .faceUp { // stop if placed down on a table @@ -301,34 +560,90 @@ class ViewController: UIViewController { } t1 = CACurrentMediaTime() - t0 // inference dt } - + currentBuffer = nil } } - + func processObservations(for request: VNRequest, error: Error?) { - DispatchQueue.main.async { - if let results = request.results as? [VNRecognizedObjectObservation] { - self.show(predictions: results) - } else { - self.show(predictions: []) + switch task { + case .detect: + DispatchQueue.main.async { + if let results = request.results as? [VNRecognizedObjectObservation] { + self.show(predictions: results, persons: [],processedBoxAndMasks: []) + } else { + self.show(predictions: [], persons: [],processedBoxAndMasks: []) + } + + // Measure FPS + if self.t1 < 10.0 { // valid dt + self.t2 = self.t1 * 0.05 + self.t2 * 0.95 // smoothed inference time + } + self.t4 = (CACurrentMediaTime() - self.t3) * 0.05 + self.t4 * 0.95 // smoothed delivered FPS + self.labelFPS.text = String(format: "%.1f FPS - %.1f ms", 1 / self.t4, self.t2 * 1000) // t2 seconds to ms + self.t3 = CACurrentMediaTime() } + case .human: + if let results = request.results as? [VNCoreMLFeatureValueObservation] { + DispatchQueue.main.async { + + if let prediction = results.first?.featureValue.multiArrayValue { + + let pred = PostProcessHuman(prediction:prediction, confidenceThreshold: self.confidenceThreshold, iouThreshold: self.iouThreshold) + var persons:[Person] = [] + if !self.tracking { + persons = toPerson(boxesAndScoresAndFeatures: pred) + } else { + persons = self.tracker.track(boxesAndScoresAndFeatures: pred) + } + self.show(predictions: [], persons: persons, processedBoxAndMasks: []) + } else { + self.show(predictions: [], persons: [],processedBoxAndMasks: []) + } + if self.t1 < 10.0 { // valid dt + self.t2 = self.t1 * 0.05 + self.t2 * 0.95 // smoothed inference time + } + self.t4 = (CACurrentMediaTime() - self.t3) * 0.05 + self.t4 * 0.95 // smoothed delivered FPS + self.labelFPS.text = String(format: "%.1f FPS - %.1f ms", 1 / self.t4, self.t2 * 1000) // t2 seconds to ms + self.t3 = CACurrentMediaTime() + } + } + case .seg: + if let results = request.results as? [VNCoreMLFeatureValueObservation] { + DispatchQueue.main.async { [self] in + guard results.count == 2 else { return } + let masks = results[0].featureValue.multiArrayValue + let pred = results[1].featureValue.multiArrayValue + let processed = getBoundingBoxesAndMasks(feature: pred!, confidenceThreshold: 0.25, iouThreshold: 0.4) + + self.show(predictions: [], persons: [], processedBoxAndMasks:processed) + DispatchQueue.main.async { + let a = Date() + self.updateMaskAndBoxes(detectedObjects: processed, maskArray: masks!) + print(Date().timeIntervalSince(a)) + } + - // Measure FPS - if self.t1 < 10.0 { // valid dt - self.t2 = self.t1 * 0.05 + self.t2 * 0.95 // smoothed inference time + if self.t1 < 10.0 { // valid dt + self.t2 = self.t1 * 0.05 + self.t2 * 0.95 // smoothed inference time + } + self.t4 = (CACurrentMediaTime() - self.t3) * 0.05 + self.t4 * 0.95 // smoothed delivered FPS + self.labelFPS.text = String(format: "%.1f FPS - %.1f ms", 1 / self.t4, self.t2 * 1000) // t2 seconds to ms + self.t3 = CACurrentMediaTime() + } } - self.t4 = (CACurrentMediaTime() - self.t3) * 0.05 + self.t4 * 0.95 // smoothed delivered FPS - self.labelFPS.text = String(format: "%.1f FPS - %.1f ms", 1 / self.t4, self.t2 * 1000) // t2 seconds to ms - self.t3 = CACurrentMediaTime() } } - + + func measureFPS() { + + } + // Save text file func saveText(text: String, file: String = "saved.txt") { if let dir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first { let fileURL = dir.appendingPathComponent(file) - + // Writing do { // Append to file if it exists let fileHandle = try FileHandle(forWritingTo: fileURL) @@ -342,12 +657,12 @@ class ViewController: UIViewController { print("no file written") } } - + // Reading // do {let text2 = try String(contentsOf: fileURL, encoding: .utf8)} catch {/* error handling here */} } } - + // Save image file func saveImage() { let dir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first @@ -355,7 +670,7 @@ class ViewController: UIViewController { let image = UIImage(named: "ultralytics_yolo_logotype.png") FileManager.default.createFile(atPath: fileURL.path, contents: image!.jpegData(compressionQuality: 0.5), attributes: nil) } - + // Return hard drive space (GB) func freeSpace() -> Double { let fileURL = URL(fileURLWithPath: NSHomeDirectory() as String) @@ -367,7 +682,7 @@ class ViewController: UIViewController { } return 0 } - + // Return RAM usage (GB) func memoryUsage() -> Double { var taskInfo = mach_task_basic_info() @@ -383,138 +698,157 @@ class ViewController: UIViewController { return 0 } } - - func show(predictions: [VNRecognizedObjectObservation]) { - let width = videoPreview.bounds.width // 375 pix - let height = videoPreview.bounds.height // 812 pix + + func show(predictions: [VNRecognizedObjectObservation], persons: [Person], processedBoxAndMasks:[(CGRect,Int,Float, MLMultiArray)]) { + let width = videoPreview.bounds.width + let height = videoPreview.bounds.height var str = "" - - // ratio = videoPreview AR divided by sessionPreset AR + var ratio: CGFloat = 1.0 + if videoCapture.captureSession.sessionPreset == .photo { - ratio = (height / width) / (4.0 / 3.0) // .photo + ratio = (height / width) / (4.0 / 3.0) } else { - ratio = (height / width) / (16.0 / 9.0) // .hd4K3840x2160, .hd1920x1080, .hd1280x720 etc. + ratio = (height / width) / (16.0 / 9.0) } - - // date + let date = Date() let calendar = Calendar.current let hour = calendar.component(.hour, from: date) let minutes = calendar.component(.minute, from: date) let seconds = calendar.component(.second, from: date) let nanoseconds = calendar.component(.nanosecond, from: date) - let sec_day = Double(hour) * 3600.0 + Double(minutes) * 60.0 + Double(seconds) + Double(nanoseconds) / 1E9 // seconds in the day - - self.labelSlider.text = String(predictions.count) + " items (max " + String(Int(slider.value)) + ")" + let sec_day = Double(hour) * 3600.0 + Double(minutes) * 60.0 + Double(seconds) + Double(nanoseconds) / 1E9 + + var resultCount = 0 + + switch task { + case .detect: + resultCount = predictions.count + case .human: + resultCount = persons.count + case .seg: + resultCount = processedBoxAndMasks.count + } + self.labelSlider.text = String(resultCount) + " items (max " + String(Int(slider.value)) + ")" for i in 0..= 1 { // iPhone ratio = 1.218 - let offset = (1 - ratio) * (0.5 - rect.minX) - let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: offset, y: -1) - rect = rect.applying(transform) - rect.size.width *= ratio - } else { // iPad ratio = 0.75 - let offset = (ratio - 1) * (0.5 - rect.maxY) - let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: offset - 1) - rect = rect.applying(transform) - rect.size.height /= ratio - } - - // Scale normalized to pixels [375, 812] [width, height] - rect = VNImageRectForNormalizedRect(rect, Int(width), Int(height)) - - // The labels array is a list of VNClassificationObservation objects, - // with the highest scoring class first in the list. - let bestClass = prediction.labels[0].identifier - let confidence = prediction.labels[0].confidence - // print(confidence, rect) // debug (confidence, xywh) with xywh origin top left (pixels) - - // Show the bounding box. - boundingBoxViews[i].show(frame: rect, - label: String(format: "%@ %.1f", bestClass, confidence * 100), - color: colors[bestClass] ?? UIColor.white, - alpha: CGFloat((confidence - 0.2) / (1.0 - 0.2) * 0.9)) // alpha 0 (transparent) to 1 (opaque) for conf threshold 0.2 to 1.0) - + if ratio >= 1 { + let offset = (1 - ratio) * (0.5 - displayRect.minX) + if task == .detect { + let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: offset, y: -1) + displayRect = displayRect.applying(transform) + } else { + let transform = CGAffineTransform(translationX: offset, y: 0) + displayRect = displayRect.applying(transform) + } + displayRect.size.width *= ratio + } else { + if task == .detect { + let offset = (ratio - 1) * (0.5 - displayRect.maxY) + + let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: offset - 1) + displayRect = displayRect.applying(transform) + } else { + let offset = (ratio - 1) * (0.5 - displayRect.minY) + let transform = CGAffineTransform(translationX: 0, y: offset) + displayRect = displayRect.applying(transform) + } + ratio = (height / width) / (3.0 / 4.0) + displayRect.size.height /= ratio + } + displayRect = VNImageRectForNormalizedRect(displayRect, Int(width), Int(height)) + + boundingBoxViews[i].show(frame: displayRect, label: label, color: boxColor, alpha: alpha, innerTexts: innerTexts) + if developerMode { - // Write if save_detections { str += String(format: "%.3f %.3f %.3f %@ %.2f %.1f %.1f %.1f %.1f\n", - sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence, - rect.origin.x, rect.origin.y, rect.size.width, rect.size.height) + sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence, + rect.origin.x, rect.origin.y, rect.size.width, rect.size.height) } - - // Action trigger upon detection - // if false { - // if (bestClass == "car") { // "cell phone", "car", "person" - // self.takePhoto(nil) - // // self.pauseButton(nil) - // sleep(2) - // } - // } } + } else { boundingBoxViews[i].hide() } } - - // Write - if developerMode { - if save_detections { - saveText(text: str, file: "detections.txt") // Write stats for each detection - } - if save_frames { - str = String(format: "%.3f %.3f %.3f %.3f %.1f %.1f %.1f\n", - sec_day, freeSpace(), memoryUsage(), UIDevice.current.batteryLevel, - self.t1 * 1000, self.t2 * 1000, 1 / self.t4) - saveText(text: str, file: "frames.txt") // Write stats for each image - } - } - - // Debug - // print(str) - // print(UIDevice.current.identifierForVendor!) - // saveImage() } // Pinch to Zoom Start --------------------------------------------------------------------------------------------- let minimumZoom: CGFloat = 1.0 let maximumZoom: CGFloat = 10.0 var lastZoomFactor: CGFloat = 1.0 - + @IBAction func pinch(_ pinch: UIPinchGestureRecognizer) { let device = videoCapture.captureDevice - + // Return zoom value between the minimum and maximum zoom values func minMaxZoom(_ factor: CGFloat) -> CGFloat { return min(min(max(factor, minimumZoom), maximumZoom), device.activeFormat.videoMaxZoomFactor) } - + func update(scale factor: CGFloat) { do { try device.lockForConfiguration() @@ -526,7 +860,7 @@ class ViewController: UIViewController { print("\(error.localizedDescription)") } } - + let newScaleFactor = minMaxZoom(pinch.scale * lastZoomFactor) switch pinch.state { case .began: fallthrough @@ -540,7 +874,44 @@ class ViewController: UIViewController { self.labelZoom.font = UIFont.preferredFont(forTextStyle: .body) default: break } - } // Pinch to Zoom Start ------------------------------------------------------------------------------------------ + } // Pinch to Zoom Start + + func showShareAlert(image: UIImage) { + let alertController = UIAlertController(title: "Do you want to share this image?", message: nil, preferredStyle: .alert) + + let shareAction = UIAlertAction(title: "OK", style: .default) { _ in + self.shareImage(image: image) + } + + let cancelAction = UIAlertAction(title: "Cancel", style: .cancel) { _ in + self.hideScreenshotImageView() + } + + alertController.addAction(shareAction) + alertController.addAction(cancelAction) + + if let popoverController = alertController.popoverPresentationController { + popoverController.sourceView = self.view + popoverController.sourceRect = CGRect(x: self.view.bounds.midX, y: self.view.bounds.maxY - 100, width: 0, height: 0) + popoverController.permittedArrowDirections = [] + } + + present(alertController, animated: true, completion: nil) + } + + func shareImage(image: UIImage) { + let activityViewController = UIActivityViewController(activityItems: [image], applicationActivities: nil) + activityViewController.popoverPresentationController?.sourceView = self.View0 + self.present(activityViewController, animated: true) { + self.hideScreenshotImageView() + } + } + + func hideScreenshotImageView() { + self.screenshotImageView?.removeFromSuperview() + self.screenshotImageView = nil + } + // ------------------------------------------------------------------------------------------ } // ViewController class End extension ViewController: VideoCaptureDelegate { @@ -558,7 +929,17 @@ extension ViewController: AVCapturePhotoCaptureDelegate { if let dataImage = photo.fileDataRepresentation() { let dataProvider = CGDataProvider(data: dataImage as CFData) let cgImageRef: CGImage! = CGImage(jpegDataProviderSource: dataProvider!, decode: nil, shouldInterpolate: true, intent: .defaultIntent) - let image = UIImage(cgImage: cgImageRef, scale: 0.5, orientation: UIImage.Orientation.right) + var orientation = UIImage.Orientation.right + switch UIDevice.current.orientation { + case .landscapeLeft: + orientation = .up + case .landscapeRight: + orientation = .down + default: + break + } + var image = UIImage(cgImage: cgImageRef, scale: 0.5, orientation: orientation) + let imageView = UIImageView(image: image) imageView.contentMode = .scaleAspectFill imageView.frame = videoPreview.frame @@ -573,9 +954,18 @@ extension ViewController: AVCapturePhotoCaptureDelegate { let img = UIGraphicsGetImageFromCurrentImageContext() UIGraphicsEndImageContext() imageLayer.removeFromSuperlayer() - let activityViewController = UIActivityViewController(activityItems: [img!], applicationActivities: nil) - activityViewController.popoverPresentationController?.sourceView = self.View0 - self.present(activityViewController, animated: true, completion: nil) + + let screenshotImageView = UIImageView(image: img) + screenshotImageView.frame = view.bounds + screenshotImageView.contentMode = .scaleAspectFit + view.addSubview(screenshotImageView) + self.screenshotImageView = screenshotImageView + + UIView.animate(withDuration: 0.3, animations: { + screenshotImageView.frame = CGRect(x: 20, y: 100, width: self.view.bounds.width - 40, height: self.view.bounds.height - 200) + }) { _ in + self.showShareAlert(image: img!) + } // // // Save to camera roll // UIImageWriteToSavedPhotosAlbum(img!, nil, nil, nil);