How To Use Vision Hand Pose in SwiftUI
I wanted to title this piece how to use Hand Pose in iOS since that’s the most popular application. However, you can also just copy and paste this code into a Playground and run it seamlessly on both iPad and Mac.
Anyway, the code. Firstly, this project uses a lot of AVFoundation and UIKit. I’ll try to stick to the most important parts in this article but leave comments for some of the niche moments.
Setting Up The Camera
If you’re using CoreML Vision then your device needs a way to see. The camera. Unfortunately, we have to write UIKit code. I’ll try to keep this short and comment ridden. I have also linked a bunch of stuff to help you if you get confused.
Firstly, we’ll create 3 Swift Files.
⓵ Camera Preview⓶ CameraView⓷CameraViewController
Camera Preview
We won’t really use this so just make it and forget it.
import UIKit
import AVFoundation
final class CameraPreview: UIView{
var previewLayer : AVCaptureVideoPreviewLayer{
layer as! AVCaptureVideoPreviewLayer
}
override class var layerClass: AnyClass{
AVCaptureVideoPreviewLayer.self
}
}
Camera View
This is going to throw an error but it’s fine cause we’re going to write the View Controller next. Also this is where we would put a EnvironmentObject if we were trying to share data between views.
import SwiftUI
struct CameraView: UIViewControllerRepresentable{
var pointsProcessorHandler: (([CGPoint]) -> Void)?
func makeUIViewController(context: Context) -> CameraViewController {
let cvc = CameraViewController()
cvc.pointsProcessorHandler = pointsProcessorHandler
return cvc
}
func updateUIViewController(_ uiViewController: CameraViewController, context: Context) {
//Not needed for this app
}
}
Camera View Controller
Here we are setting up the Camera and Vision Request.
import AVFoundation
import UIKit
import Vision
enum errors: Error{
case CameraError
}
final class CameraViewController : UIViewController{
private var cameraFeedSession: AVCaptureSession?
override func loadView() {
view = CameraPreview()
}
private var cameraView: CameraPreview{ view as! CameraPreview}
override func viewDidLoad() {
super.viewDidLoad()
}
override func viewDidAppear(_ animated: Bool) {
super.viewDidAppear(animated)
do{
if cameraFeedSession == nil{
try setupAVSession()
cameraView.previewLayer.session = cameraFeedSession
//MARK: Commented out cause it cropped out our View Finder
// cameraView.previewLayer.videoGravity = .resizeAspectFill
}
//MARK: Surronded the code into a DispatchQueue cause it may cause a crash
DispatchQueue.global(qos: .userInteractive).async {
self.cameraFeedSession?.startRunning()
}
}catch{
print(error.localizedDescription)
}
}
override func viewDidDisappear(_ animated: Bool) {
cameraFeedSession?.stopRunning()
super.viewDidDisappear(animated)
}
private let videoDataOutputQueue =
DispatchQueue(label: "CameraFeedOutput", qos: .userInteractive)
func setupAVSession() throws {
//Start of Camera setup
guard let videoDevice = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .front) else {
throw errors.CameraError
}
guard let deviceInput = try? AVCaptureDeviceInput(device: videoDevice) else{
throw errors.CameraError
}
let session = AVCaptureSession()
session.beginConfiguration()
//You can change the quality of the media from view finder from this line
session.sessionPreset = AVCaptureSession.Preset.high
guard session.canAddInput(deviceInput) else{
throw errors.CameraError
}
session.addInput(deviceInput)
let dataOutput = AVCaptureVideoDataOutput()
if session.canAddOutput(dataOutput){
session.addOutput(dataOutput)
dataOutput.alwaysDiscardsLateVideoFrames = true
dataOutput.setSampleBufferDelegate(self, queue: videoDataOutputQueue)
}else{
throw errors.CameraError
}
session.commitConfiguration()
cameraFeedSession = session
}
//MARK: Vision Init Below
private let handPoseRequest : VNDetectHumanHandPoseRequest = {
let request = VNDetectHumanHandPoseRequest()
// Here is where we limit the number of hands Vision can detect at a single given moment
request.maximumHandCount = 1
return request
}()
var pointsProcessorHandler: (([CGPoint]) -> Void)?
func processPoints(_ fingerTips: [CGPoint]) {
let convertedPoints = fingerTips.map {
cameraView.previewLayer.layerPointConverted(fromCaptureDevicePoint: $0)
}
pointsProcessorHandler?(convertedPoints)
}
}
So that was the set up. Below is an extension that handles the finger stuff.
extension CameraViewController: AVCaptureVideoDataOutputSampleBufferDelegate{
//Handler and Observation
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
var fingerTips: [CGPoint] = []
defer {
DispatchQueue.main.sync {
self.processPoints(fingerTips)
}
}
let handler = VNImageRequestHandler(cmSampleBuffer: sampleBuffer, orientation: .up, options: [:])
do{
try handler.perform([handPoseRequest])
guard let results = handPoseRequest.results?.prefix(2), !results.isEmpty else{
return
}
var recognizedPoints: [VNRecognizedPoint] = []
try results.forEach { observation in
let fingers = try observation.recognizedPoints(.all)
if fingers[.thumbTip]?.confidence ?? 0.0 > 0.7{
recognizedPoints.append(fingers[.thumbTip]!)
}
if fingers[.indexTip]?.confidence ?? 0.0 > 0.7 {
recognizedPoints.append(fingers[.indexTip]!)
}
if fingers[.middleTip]?.confidence ?? 0.0 > 0.7 {
recognizedPoints.append(fingers[.middleTip]!)
}
if fingers[.ringTip]?.confidence ?? 0.0 > 0.7 {
recognizedPoints.append(fingers[.ringTip]!)
}
if fingers[.littleTip]?.confidence ?? 0.0 > 0.7 {
recognizedPoints.append(fingers[.littleTip]!)
}
}
fingerTips = recognizedPoints.filter {
$0.confidence > 0.9
}
.map {
CGPoint(x: $0.location.x, y: 1 - $0.location.y)
}
}catch{
cameraFeedSession?.stopRunning()
}
}
}
Content View
At this point the code is done. Here is our Content View.
It’s going to show a FingersOverlay when it detects a hand.
import SwiftUI
struct ContentView: View {
@State private var overlayPoints: [CGPoint] = []
var CameraViewFinder : some View{
CameraView { overlayPoints = $0 }
.overlay(FingersOverlay(with: overlayPoints)
.foregroundColor(.green)
)
.ignoresSafeArea()
}
var body: some View {
ZStack{
CameraViewFinder
}
}
}
struct FingersOverlay: Shape {
let points: [CGPoint]
private let pointsPath = UIBezierPath()
init(with points: [CGPoint]) {
self.points = points
}
func path(in rect: CGRect) -> Path {
for point in points {
pointsPath.move(to: point)
pointsPath.addArc(withCenter: point, radius: 5, startAngle: 0, endAngle: 2 * .pi, clockwise: true)
}
return Path(pointsPath.cgPath)
}
}
struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}
We’re done. Congrats.
Sample Projects And Helpful Articles
Hand Pose Article With Details On What A Request Can Do
Apple WWDC20 Video On Hand Pose
My WWDC23 Student Challenge Hand Pose Project
Best Tutorial and Project if you’re getting started and confused on the UI Kit.