Keypoint detection

45 Views Asked by At

i have this xcode code im trying to run, i have made a function to draw the keypoints and display them but they dont seem to be displaying,

import Vision
import AVFoundation
import UIKit

extension ViewController {
    
    func setupDetector() {
        let modelURL = Bundle.main.url(forResource: "Ninkwell", withExtension: "mlmodelc")
        
        do {
            let visionModel = try VNCoreMLModel(for: MLModel(contentsOf: modelURL!))
            let recognitions = VNCoreMLRequest(model: visionModel, completionHandler: detectionDidComplete)
            self.requests = [recognitions]
        } catch let error {
            print(error)
        }
    }
    
    func detectionDidComplete(request: VNRequest, error: Error?) {
        DispatchQueue.main.async(execute: {
            if let results = request.results {
                self.extractDetections(results)
            }
        })
    }
    

    
    func extractDetections(_ results: [VNObservation]) {
        detectionLayer.sublayers = nil

        for observation in results where observation is VNCoreMLFeatureValueObservation {
            guard let objectObservation = observation as? VNCoreMLFeatureValueObservation else { continue }
            
            // Assuming the output is MultiArray type with shape [1,8]
            if let multiArrayValue = objectObservation.featureValue.multiArrayValue {
                let x1 = multiArrayValue[0].floatValue
                let y1 = multiArrayValue[1].floatValue
                let x2 = multiArrayValue[2].floatValue
                let y2 = multiArrayValue[3].floatValue
                let x3 = multiArrayValue[4].floatValue
                let y3 = multiArrayValue[5].floatValue
                let x4 = multiArrayValue[6].floatValue
                let y4 = multiArrayValue[7].floatValue
                

                let imgWidth: Float = 90  // replace with your image's width
                let imgHeight: Float = 160 // replace with your image's height

                let normalizedX1 = x1 / imgWidth
                let normalizedY1 = y1 / imgHeight
                let normalizedX2 = x2 / imgWidth
                let normalizedY2 = y2 / imgHeight
                let normalizedX3 = x3 / imgWidth
                let normalizedY3 = y3 / imgHeight
                let normalizedX4 = x4 / imgWidth
                let normalizedY4 = y4 / imgHeight
                
                
                
                
                // Print the keypoints
                print("Keypoint 1: (\(normalizedX1), \(normalizedY1))")
//                print("Keypoint 2: (\(x2), \(y2))")
                print("Keypoint 3: (\(normalizedX3), \(normalizedY3))")
//                print("Keypoint 4: (\(x4), \(y4))")
                
                drawKeypoint(normalizedX1, normalizedY1)
                drawKeypoint(normalizedX2, normalizedY2)
                drawKeypoint(normalizedX3, normalizedY3)
                drawKeypoint(normalizedX4, normalizedY4)


//
//                // Draw the keypoints
//                drawKeypoint(x1,y1)
//                drawKeypoint(x2,y2)
//                drawKeypoint(x3,y3)
//                drawKeypoint(x4,y4)
            }
        }
    }

    
    func drawKeypoint(_ x: Float, _ y: Float) {
        print( x, y)
        let radius: CGFloat = 100.0 // Adjust as needed
        let circleLayer = CALayer()
        circleLayer.bounds = CGRect(x: 0, y: 0, width: radius * 2, height: radius * 2)

        // Transform the keypoint coordinates to UIKit coordinates
        let point = CGPoint(x: CGFloat(x) * screenRect.size.width, y: screenRect.size.height - CGFloat(y) * screenRect.size.height)
        
        circleLayer.position = point
        circleLayer.cornerRadius = radius
        circleLayer.backgroundColor = CGColor.init(red: 0.0, green: 0.0, blue: 1.0, alpha: 1.0) // Blue circles
        circleLayer.backgroundColor = UIColor.red.cgColor
        detectionLayer.addSublayer(circleLayer)
    }

    
    func setupLayers() {
        detectionLayer = CALayer()
        detectionLayer.frame = CGRect(x: 0, y: 0, width: screenRect.size.width, height: screenRect.size.height)
        self.view.layer.addSublayer(detectionLayer)
    }
    
    func updateLayers() {
        detectionLayer?.frame = CGRect(x: 0, y: 0, width: screenRect.size.width, height: screenRect.size.height)
    }
    


    func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
        guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return }
        
        let ciImage = CIImage(cvPixelBuffer: pixelBuffer)
        let context = CIContext(options: nil)

        guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else { return }
        let image = UIImage(cgImage: cgImage)
        
        guard let processedImage = preprocess(image: image, targetSize: CGSize(width: 90, height: 160)) else { return }
        guard let processedPixelBuffer = processedImage.pixelBuffer(width: 90, height: 160) else { return }

        let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: processedPixelBuffer, orientation: .up, options: [:])

        do {
            try imageRequestHandler.perform(self.requests)
        } catch {
            print(error)
        }
    }
    
    
    
}




this is the viewcontroller:

import UIKit
import SwiftUI
import AVFoundation
import Vision



class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
    private var permissionGranted = false // Flag for permission
    private let captureSession = AVCaptureSession()
    private let sessionQueue = DispatchQueue(label: "sessionQueue")
    private var previewLayer = AVCaptureVideoPreviewLayer()
    var screenRect: CGRect! = nil // For view dimensions
    
    // Detector
    private var videoOutput = AVCaptureVideoDataOutput()
    var requests = [VNRequest]()
    var detectionLayer: CALayer! = nil
    
      
    override func viewDidLoad() {
        checkPermission()
        
        sessionQueue.async { [unowned self] in
            guard permissionGranted else { return }
            self.setupCaptureSession()
            
            self.setupLayers()
            self.setupDetector()
            
            self.captureSession.startRunning()
        }
    }
    
    override func willTransition(to newCollection: UITraitCollection, with coordinator: UIViewControllerTransitionCoordinator) {
        screenRect = UIScreen.main.bounds
        self.previewLayer.frame = CGRect(x: 0, y: 0, width: screenRect.size.width, height: screenRect.size.height)

        switch UIDevice.current.orientation {
            // Home button on top
            case UIDeviceOrientation.portraitUpsideDown:
                self.previewLayer.connection?.videoOrientation = .portraitUpsideDown
             
            // Home button on right
            case UIDeviceOrientation.landscapeLeft:
                self.previewLayer.connection?.videoOrientation = .landscapeRight
            
            // Home button on left
            case UIDeviceOrientation.landscapeRight:
                self.previewLayer.connection?.videoOrientation = .landscapeLeft
             
            // Home button at bottom
            case UIDeviceOrientation.portrait:
                self.previewLayer.connection?.videoOrientation = .portrait
                
            default:
                break
            }
        
        // Detector
        updateLayers()
    }
    
    func checkPermission() {
        switch AVCaptureDevice.authorizationStatus(for: .video) {
            // Permission has been granted before
            case .authorized:
                permissionGranted = true
                
            // Permission has not been requested yet
            case .notDetermined:
                requestPermission()
                    
            default:
                permissionGranted = false
            }
    }
    
    func requestPermission() {
        sessionQueue.suspend()
        AVCaptureDevice.requestAccess(for: .video) { [unowned self] granted in
            self.permissionGranted = granted
            self.sessionQueue.resume()
        }
    }
    func preprocess(image: UIImage, targetSize: CGSize) -> UIImage? {
        guard let cgImage = image.cgImage else { return nil }
        
        // Crop the image
        let x = (image.size.width - targetSize.width) / 2.0
        let y = (image.size.height - targetSize.height) / 2.0
        let cropRect = CGRect(x: x, y: y, width: targetSize.width, height: targetSize.height)
        
        if let imageRef = cgImage.cropping(to: cropRect) {
            return UIImage(cgImage: imageRef)
        }
        
        return nil
    }

    
    func setupCaptureSession() {
        // Camera input
        guard let videoDevice = AVCaptureDevice.default(.builtInDualCamera,for: .video, position: .back) else { return }
        guard let videoDeviceInput = try? AVCaptureDeviceInput(device: videoDevice) else { return }
           
        guard captureSession.canAddInput(videoDeviceInput) else { return }
        captureSession.addInput(videoDeviceInput)
        
        // Set the session preset
        if captureSession.canSetSessionPreset(.hd1280x720) {
            captureSession.sessionPreset = .hd1280x720
        } else {
            print("Could not set session preset to .hd1280x720")
        }
                         
        // Preview layer
        screenRect = UIScreen.main.bounds
        
        previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
        previewLayer.frame = CGRect(x: 0, y: 0, width: screenRect.size.width, height: screenRect.size.height)
        previewLayer.videoGravity = AVLayerVideoGravity.resizeAspectFill // Fill screen
        previewLayer.connection?.videoOrientation = .portrait
        
        // Detector
        videoOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "sampleBufferQueue"))
        captureSession.addOutput(videoOutput)
        
        videoOutput.connection(with: .video)?.videoOrientation = .portrait
        
        // Updates to UI must be on main queue
//        DispatchQueue.main.async { [weak self] in
//            self!.view.layer.addSublayer(self!.previewLayer)
//
//        }
        DispatchQueue.main.async { [weak self] in
            guard let strongSelf = self else { return }
            strongSelf.view.layer.addSublayer(strongSelf.previewLayer)
            strongSelf.view.layer.addSublayer(strongSelf.detectionLayer)
        }
    }
}

struct HostedViewController: UIViewControllerRepresentable {
    func makeUIViewController(context: Context) -> UIViewController {
        return ViewController()
        }

        func updateUIViewController(_ uiViewController: UIViewController, context: Context) {
        }
}
extension UIImage {
    func pixelBuffer(width: Int, height: Int) -> CVPixelBuffer? {
        let attrs = [kCVPixelBufferCGImageCompatibilityKey: kCFBooleanTrue, kCVPixelBufferCGBitmapContextCompatibilityKey: kCFBooleanTrue] as CFDictionary
        var pixelBuffer: CVPixelBuffer?
        let status = CVPixelBufferCreate(kCFAllocatorDefault, width, height, kCVPixelFormatType_32ARGB, attrs, &pixelBuffer)
        guard status == kCVReturnSuccess else { return nil }

        CVPixelBufferLockBaseAddress(pixelBuffer!, CVPixelBufferLockFlags(rawValue: 0))
        let pixelData = CVPixelBufferGetBaseAddress(pixelBuffer!)

        let rgbColorSpace = CGColorSpaceCreateDeviceRGB()
        guard let context = CGContext(data: pixelData, width: width, height: height, bitsPerComponent: 8, bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer!), space: rgbColorSpace, bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue) else { return nil }

        context.translateBy(x: 0, y: CGFloat(height))
        context.scaleBy(x: 1.0, y: -1.0)

        UIGraphicsPushContext(context)
        self.draw(in: CGRect(x: 0, y: 0, width: width, height: height))
        UIGraphicsPopContext()

        CVPixelBufferUnlockBaseAddress(pixelBuffer!, CVPixelBufferLockFlags(rawValue: 0))

        return pixelBuffer
    }
}

i tried printing the values from the ml model :

Keypoint 1: (3.5027778, 3.3) Keypoint 3: (2.7347221, 4.23125) 3.5027778 3.3 4.3444443 3.290625 2.7347221 4.23125 3.3027778 4.459375

they were ok

0

There are 0 best solutions below