I have ViewController which implements both AVCaptureVideoDataOutputSampleBufferDelegate and AVCaptureDepthDataOutputDelegate protocols. I want to collect both video and depth data. Video data is used to perform Vision ML requests and depth is used to calculate distance from camera to specific point on camera.
extension MainRecognizerViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
func captureOutput(_ output: AVCaptureOutput,
didOutput sampleBuffer: CMSampleBuffer,
from connection: AVCaptureConnection) {
DispatchQueue.main.async {
self.captureSessionManager.manageFlashlight(for: sampleBuffer, force: nil)
}
guard let cvPixelBuffer = sampleBuffer.convertToPixelBuffer() else {
return
}
let exifOrientation = exifOrientationFromDeviceOrientation()
let handler = VNImageRequestHandler(cvPixelBuffer: cvPixelBuffer,
orientation: exifOrientation)
let objectsRecognitionRequest = prepareVisionRequestForObjectsRecognition(
pixelBuffer: cvPixelBuffer
)
DispatchQueue.global().async {
try? handler.perform([objectsRecognitionRequest])
try? handler.perform(self.roadLightsRecognizerRequests)
try? handler.perform(self.pedestrianCrossingRecognizerRequests)
}
}
}
and
extension MainRecognizerViewController: AVCaptureDepthDataOutputDelegate {
func depthDataOutput(_ output: AVCaptureDepthDataOutput,
didOutput depthData: AVDepthData,
timestamp: CMTime,
connection: AVCaptureConnection) {
if depthMeasurementsLeftInLoop == 0 {
depthMeasurementsCumul = 0.0
depthMeasurementMin = 9999.9
depthMeasurementMax = 0.0
depthMeasurementsLeftInLoop = depthMeasurementRepeats
}
if depthMeasurementsLeftInLoop > 0 {
var convertedDepthData: AVDepthData = depthData.converting(
toDepthDataType: kCVPixelFormatType_DepthFloat16
)
let depthFrame = convertedDepthData.depthDataMap
let depthPoint = CGPoint(x: CGFloat(CVPixelBufferGetWidth(depthFrame)) / 2,
y: CGFloat(CVPixelBufferGetHeight(depthFrame) / 2))
let depthVal = getDepthValueFromFrame(fromFrame: depthFrame,
atPoint: depthPoint)
print(depthVal)
let measurement = depthVal * 100
depthMeasurementsCumul += measurement
if measurement > depthMeasurementMax {
depthMeasurementMax = measurement
}
if measurement < depthMeasurementMin {
depthMeasurementMin = measurement
}
depthMeasurementsLeftInLoop -= 1
// let printStr = String(format: "Measurement %d: %.2f cm",
// depthMeasurementRepeats - depthMeasurementsLeftInLoop, measurement)
DispatchQueue.main.async { [weak self] in
self?.distanceMeasurerViewModel?.distanceString = String(format: "%.2f", measurement)
}
}
}
}
I perform the whole camera setup in CaptureSessionManager structure:
import AVFoundation
final class CaptureSessionManager: CaptureSessionManaging {
@Inject private var flashlightManager: FlashlightManaging
private let captureSessionQueue = DispatchQueue(label: "captureSessionQueue")
private let captureSessionDataOutputQueue = DispatchQueue(
label: "captureSessionVideoDataOutput",
qos: .userInitiated,
attributes: [],
autoreleaseFrequency: .workItem
)
private var sampleBufferOutput: AVCaptureVideoDataOutput = AVCaptureVideoDataOutput()
private var sampleBufferDelegate: AVCaptureVideoDataOutputSampleBufferDelegate?
private var depthDataOutput: AVCaptureDepthDataOutput = AVCaptureDepthDataOutput()
private var depthDataOutputDelegate: AVCaptureDepthDataOutputDelegate?
var cameraMode: CameraMode?
private var desiredFrameRate: Double?
private var videoDevice: AVCaptureDevice? = AVCaptureDevice.default(
.builtInLiDARDepthCamera,
for: .video,
position: .back
)
var bufferSize: CGSize = .zero
var captureSession: AVCaptureSession!
func setUp(with sampleBufferDelegate: AVCaptureVideoDataOutputSampleBufferDelegate,
and depthDataOutputDelegate: AVCaptureDepthDataOutputDelegate,
for cameraMode: CameraMode,
cameraPosition: AVCaptureDevice.Position,
desiredFrameRate: Double,
completion: @escaping () -> ()) {
stopCaptureSession()
self.sampleBufferDelegate = sampleBufferDelegate
self.depthDataOutputDelegate = depthDataOutputDelegate
self.cameraMode = cameraMode
self.desiredFrameRate = desiredFrameRate
authorizeCaptureSession {
completion()
}
}
func manageFlashlight(for sampleBuffer: CMSampleBuffer?,
force torchMode: AVCaptureDevice.TorchMode?) {
flashlightManager.manageFlashlight(for: sampleBuffer,
and: self.videoDevice,
force: torchMode)
}
private func authorizeCaptureSession(completion: @escaping () -> ()) {
switch AVCaptureDevice.authorizationStatus(for: .video) {
case .authorized:
setupCaptureSession {
completion()
}
case .notDetermined:
AVCaptureDevice.requestAccess(for: .video) { [weak self] granted in
if granted {
self?.setupCaptureSession {
completion()
}
}
}
default:
return
}
}
private func setupCaptureSession(completion: @escaping () -> ()) {
captureSessionQueue.async { [unowned self] in
var captureSession: AVCaptureSession = AVCaptureSession()
captureSession.beginConfiguration()
guard let videoDevice = videoDevice else {
return
}
do {
let captureDeviceInput = try AVCaptureDeviceInput(device: videoDevice)
guard captureSession.canAddInput(captureDeviceInput) else {
return
}
captureSession.addInput(captureDeviceInput)
} catch {
return
}
let sessionPreset: SessionPreset = .hd1280x720
guard let videoSetupedCaptureSession: AVCaptureSession = setupCaptureSessionForVideo(
captureSession: captureSession,
sessionPreset: sessionPreset
) else {
return
}
guard let depthAndVideoSetupedCaptureSession = setupCaptureSessionForDepth(
captureSession: videoSetupedCaptureSession
) else {
return
}
captureSession = depthAndVideoSetupedCaptureSession
captureSession.sessionPreset = sessionPreset.preset
captureSession.commitConfiguration()
self.captureSession = captureSession
self.startCaptureSession()
completion()
}
}
private func setupCaptureSessionForVideo(captureSession: AVCaptureSession,
sessionPreset: SessionPreset) -> AVCaptureSession? {
let captureSessionVideoOutput: AVCaptureVideoDataOutput = AVCaptureVideoDataOutput()
captureSessionVideoOutput.videoSettings = [
kCVPixelBufferPixelFormatTypeKey as String: NSNumber(
value: kCMPixelFormat_32BGRA
)
]
captureSessionVideoOutput.alwaysDiscardsLateVideoFrames = true
captureSessionVideoOutput.setSampleBufferDelegate(
self.sampleBufferDelegate,
queue: captureSessionDataOutputQueue
)
guard let videoDevice = videoDevice else {
return nil
}
var formatToSet: AVCaptureDevice.Format = videoDevice.formats[0]
guard let desiredFrameRate = desiredFrameRate else {
return nil
}
for format in videoDevice.formats.reversed() {
let ranges = format.videoSupportedFrameRateRanges
let frameRates = ranges[0]
if desiredFrameRate <= frameRates.maxFrameRate,
format.formatDescription.dimensions.width == sessionPreset.formatWidth,
format.formatDescription.dimensions.height == sessionPreset.formatHeight {
formatToSet = format
break
}
}
do {
try videoDevice.lockForConfiguration()
if videoDevice.hasTorch {
self.manageFlashlight(for: nil, force: .auto)
}
let dimensions = CMVideoFormatDescriptionGetDimensions((videoDevice.activeFormat.formatDescription))
bufferSize.width = CGFloat(dimensions.width)
bufferSize.height = CGFloat(dimensions.height)
videoDevice.activeFormat = formatToSet
let timescale = CMTimeScale(desiredFrameRate)
if videoDevice.activeFormat.videoSupportedFrameRateRanges[0].maxFrameRate >= desiredFrameRate {
videoDevice.activeVideoMinFrameDuration = CMTime(value: 1, timescale: timescale)
videoDevice.activeVideoMaxFrameDuration = CMTime(value: 1, timescale: timescale)
}
videoDevice.unlockForConfiguration()
} catch {
return nil
}
guard captureSession.canAddOutput(captureSessionVideoOutput) else {
return nil
}
let captureConnection = captureSessionVideoOutput.connection(with: .video)
captureConnection?.isEnabled = true
captureSession.addOutput(captureSessionVideoOutput)
if let cameraMode = self.cameraMode,
CameraMode.modesWithPortraitVideoConnection.contains(cameraMode) {
captureSessionVideoOutput.connection(with: .video)?.videoOrientation = .portrait
}
return captureSession
}
private func setupCaptureSessionForDepth(captureSession: AVCaptureSession) -> AVCaptureSession? {
guard let depthDataOutputDelegate = depthDataOutputDelegate else {
return nil
}
if captureSession.canAddOutput(depthDataOutput) {
captureSession.addOutput(depthDataOutput)
depthDataOutput.isFilteringEnabled = false
} else {
return nil
}
if let connection = depthDataOutput.connection(with: .depthData) {
connection.isEnabled = true
depthDataOutput.isFilteringEnabled = false
depthDataOutput.setDelegate(
depthDataOutputDelegate,
callbackQueue: captureSessionDataOutputQueue
)
} else {
return nil
}
guard let videoDevice = videoDevice else {
return nil
}
let availableFormats = videoDevice.activeFormat.supportedDepthDataFormats
let availableHdepFormats = availableFormats.filter { f in
CMFormatDescriptionGetMediaSubType(f.formatDescription) == kCVPixelFormatType_DepthFloat16
}
let selectedFormat = availableHdepFormats.max(by: {
lower, higher in CMVideoFormatDescriptionGetDimensions(lower.formatDescription).width < CMVideoFormatDescriptionGetDimensions(higher.formatDescription).width
})
do {
try videoDevice.lockForConfiguration()
videoDevice.activeDepthDataFormat = selectedFormat
videoDevice.unlockForConfiguration()
} catch {
return nil
}
return captureSession
}
func startCaptureSession() {
self.captureSession?.startRunning()
}
func stopCaptureSession() {
self.captureSession?.stopRunning()
}
}
The problem is that I receive updates of depth data too slow - function captureOutput from AVCaptureVideoDataOutputSampleBufferDelegate executes much more frequent than depthDataOutput from AVCaptureDepthDataOutputDelegate
What might be the cause?
I managed to solve it. The reason of the problem was that was only one queue in CaptureSessionManager used for both videoOutputData and depthOutputData gathering. This led to unexpected results.
I added one more queue
and set it there