Non-realtime pitch shift function for swift or objective c

55 Views Asked by At

I'm adding some new features to the audio player in my app KeyStage. One of the features that I wanna add is the transpose functionality. I found a real-time pitch shifter unit in AVAudioEngine, but it outputs the signals with approximately 0.1-second latency, which renders it useless.

I don't know anything about digital signal processing (I hope to learn it at some point when I have time). Still, after some web search, I discovered that all the real-time pitch-shifting algorithms produce a similar amount of latency, so real-time pitch-shifting is out of the table for me.

Therefore I need to change the pitch of the whole audio file or the buffer in non-real time in Swift or Objective-C. So I need a function that takes an AVAudioFile (or an AVAudioPCMBuffer) and a pitch shift amount in cents and outputs the pitch-shifted AVAudioFile (or AVAudioPCMBuffer), possibly through a completion handler or a message to delegate. I searched online for such a project or library but couldn't find any. I wish I could write such a function independently, but as I said, I don't know any DSP.

I'd appreciate it if anyone can help me with this. Can third-party libraries do this in objective c (or Swift)? Or, if this is a simple algorithm, can someone guide me on how I can achieve this?

I even tried using chatGPT, but the codes it generates have tons of errors.

Here is what chatGPT provided me:

import Foundation
import Accelerate

func pitchShiftUsingFFT(audioFile: AVAudioFile, semitones: Float) -> AVAudioPCMBuffer? {
    
    // Load input audio file
    guard let inputFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: audioFile.fileFormat.sampleRate, channels: 1, interleaved: false),
          let inputFile = try? AVAudioFile(forReading: audioFile.url, commonFormat: .pcmFormatFloat32, interleaved: false) else {
        print("Failed to load input audio file")
        return nil
    }
    
    // Calculate pitch shift factor and ratio
    let pitchShiftFactor = pow(2.0, semitones/12.0)
    let pitchShiftRatio = Float(inputFormat.sampleRate) / (pitchShiftFactor * Float(inputFormat.sampleRate))
    
    // Allocate memory for input and output audio buffers
    let inputBufferSize = AVAudioFrameCount(inputFile.length)
    let outputBufferSize = AVAudioFrameCount(Float(inputBufferSize) * pitchShiftFactor)
    let inputBuffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: inputBufferSize)
    let outputBuffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: outputBufferSize)
    
    // Read input audio file into input buffer
    guard let inputAudioBuffer = inputBuffer?.floatChannelData else {
        print("Failed to allocate memory for input buffer")
        return nil
    }
    do {
        try inputFile.read(into: inputBuffer!)
    } catch {
        print("Failed to read input audio file")
        return nil
    }
    
    // Allocate memory for FFT buffers
    let fftSize = vDSP_Length(ceil(log2(Float(inputBufferSize))))
    let fftSetup = vDSP_create_fftsetup(fftSize, Int32(kFFTRadix2))
    let fftInputBuffer = UnsafeMutablePointer<Float>(calloc(inputBufferSize, MemoryLayout<Float>.size))
    let fftOutputBuffer = UnsafeMutablePointer<DSPComplex>(calloc(inputBufferSize/2, MemoryLayout<DSPComplex>.size))
    
    // Apply pitch shift using FFT
    for channel in 0..<inputFormat.channelCount {
        // Initialize variables
        var phase = DSPFloat()
        let phaseIncrement = 2.0 * Float.pi * pitchShiftRatio
        var fftOffset = vDSP_Length(0)
        
        // Copy input audio data into FFT input buffer
        vDSP_vflt32(inputAudioBuffer[channel], 1, fftInputBuffer, 1, inputBufferSize)
        
        // Perform FFT on input data
        vDSP_ctoz(UnsafePointer<DSPFloat>(fftInputBuffer), 2, fftOutputBuffer, 1, inputBufferSize/2)
        vDSP_fft_zrip(fftSetup, fftOutputBuffer, 1, fftSize, FFTDirection(FFT_FORWARD))
        vDSP_ztoc(fftOutputBuffer, 1, UnsafeMutablePointer<DSPFloat>(fftInputBuffer), 2, inputBufferSize/2)
        
        // Apply phase shift to FFT output
        let phaseShift = DSPFloat(phaseIncrement * Float(fftOffset))
        for i in 0..<inputBufferSize/2 {
            let fftReal = fftInputBuffer[i*2]
            let fftImag = fftInputBuffer[i*2+1]
            let complex = DSPComplex(real: fftReal * cos(phase) - fftImag * sin(phase),
                                      imaginary: fftReal * sin(phase) + fftImag * cos(phase))
            fftOutputBuffer[i] = complex
            phase += phaseShift
            if phase > Float.pi {
                phase -= 2.0 * Float.pi
            }
        }
        
        // Perform inverse FFT on phase shifted output
        vDSP_ctoz(fftOutputBuffer, 1, fftOutputBuffer, 1, inputBufferSize/2)
        vDSP_fft_zrip(fftSetup, fftOutputBuffer, 1, fftSize, FFTDirection(FFT_INVERSE))
        vDSP_ztoc(fftOutputBuffer, 1, UnsafeMutablePointer<DSPFloat>(fftInputBuffer), 2, inputBufferSize/2)
        
        // Overlap-add FFT output to output audio buffer
        var outputOffset = vDSP_Length(0)
        let hopSize = vDSP_Length(Float(grainSize) * pitchShiftRatio)
        let overlapSize = vDSP_Length(Float(overlap) * pitchShiftRatio)
        while outputOffset < outputBufferSize {
            let outputRemaining = outputBufferSize - outputOffset
            let inputRemaining = inputBufferSize - fftOffset
            let frameCount = min(inputRemaining, outputRemaining)
            let overlapCount = min(frameCount, overlapSize)
            
            // Add overlapped frames to output audio buffer
            vDSP_vadd(inputAudioBuffer[channel]+fftOffset, 1,
                      outputBuffer!.floatChannelData[channel]+outputOffset, 1,
                      outputBuffer!.floatChannelData[channel]+outputOffset, 1,
                      overlapCount)
            
            // Add remaining frames to output audio buffer
            vDSP_vadd(fftInputBuffer+fftOffset, 1,
                      outputBuffer!.floatChannelData[channel]+outputOffset+overlapCount, 1,
                      outputBuffer!.floatChannelData[channel]+outputOffset+overlapCount, 1,
                      frameCount-overlapCount)
            
            // Update buffer offsets
            outputOffset += frameCount
            fftOffset += hopSize
        }
    }
    
    // Clean up FFT buffers and setup
    vDSP_destroy_fftsetup(fftSetup)
    free(fftInputBuffer)
    free(fftOutputBuffer)
    
    return outputBuffer
}
0

There are 0 best solutions below