The audio_device_core_win recording will cause audio delay (2 seconds or longer) when the computer is relatively stuck.
The demo of official Microsoft has still delay:
https://learn.microsoft.com/zh-TW/windows/win32/coreaudio/capturing-a-stream
#pragma once
#include <windows.h>
#include "WavWapper.h"
#include <wmcodecdsp.h>
#include <audioclient.h> // WASAPI
#include <audiopolicy.h>
#include <avrt.h> // Avrt
#include <endpointvolume.h>
#include <mediaobj.h> // IMediaObject
#include <mmdeviceapi.h> // MMDevice
#include <string>
class AudioCapture
{
public:
AudioCapture();
~AudioCapture();
public:
void startCaputre();
void stopCaputre();
void startRecord();
void stopRecored();
void setRecordFile(const std::string &path);
private:
static DWORD WINAPI WSAPICaptureThread(LPVOID context);
DWORD DoCaptureThread();
int32_t _GetDeviceName(IMMDevice* pDevice, LPWSTR pszBuffer, int bufferLen);
private:
volatile bool m_bRuning;
volatile bool m_bRecord;
HANDLE m_hRecThread;
WavWapper* m_pWavWapper;
std::string m_strRecordPath;
uint16_t _recChannelsPrioList[3];
};
#include "AudioCapture.h"
#include <functiondiscoverykeys_devpkey.h>
#include <chrono>
#include <iostream>
#include <math.h>
#include <stdlib.h>
#define REFTIMES_PER_SEC 10000000
#define REFTIMES_PER_MILLISEC 10000
static const int kAdmMaxDeviceNameSize = 128;
#define EXIT_ON_ERROR(hres) \
if (FAILED(hres)) { goto Exit; }
#define SAFE_RELEASE(punk) \
if ((punk) != NULL) \
{ (punk)->Release(); (punk) = NULL; }
const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);
const IID IID_IAudioClient = __uuidof(IAudioClient);
const IID IID_IAudioCaptureClient = __uuidof(IAudioCaptureClient);
std::string wideCharToMultiByte(wchar_t* pWCStrKey)
{
//第一次调用确认转换后单字节字符串的长度,用于开辟空间
int pSize = WideCharToMultiByte(CP_OEMCP, 0, pWCStrKey, wcslen(pWCStrKey), NULL, 0, NULL, NULL);
char* pCStrKey = new char[pSize + 1];
//第二次调用将双字节字符串转换成单字节字符串
WideCharToMultiByte(CP_OEMCP, 0, pWCStrKey, wcslen(pWCStrKey), pCStrKey, pSize, NULL, NULL);
pCStrKey[pSize] = '\0';
return pCStrKey;
delete[] pCStrKey;
std::string pKey = pCStrKey;
return pKey;
}
AudioCapture::AudioCapture()
: m_bRuning(false)
, m_bRecord(false)
, m_pWavWapper(nullptr)
{
_recChannelsPrioList[0] = 2; // stereo is prio 1
_recChannelsPrioList[1] = 1; // mono is prio 2
_recChannelsPrioList[2] = 4; // quad is prio 3
}
AudioCapture::~AudioCapture()
{
stopRecored();
stopCaputre();
}
void AudioCapture::startCaputre()
{
if (m_bRuning)
{
return;
}
m_bRuning = true;
LPTHREAD_START_ROUTINE lpStartAddress = WSAPICaptureThread;
m_hRecThread = CreateThread(NULL, 0, lpStartAddress, this, 0, NULL);
SetThreadPriority(m_hRecThread, THREAD_PRIORITY_TIME_CRITICAL);
}
void AudioCapture::stopCaputre()
{
if (!m_bRuning)
{
return;
}
m_bRuning = false;
}
void AudioCapture::startRecord()
{
if (m_bRecord)
{
return;
}
m_bRecord = true;
}
void AudioCapture::stopRecored()
{
if (!m_bRecord)
{
return;
}
m_bRecord = false;
}
void AudioCapture::setRecordFile(const std::string& path)
{
m_strRecordPath = path;
}
DWORD WINAPI AudioCapture::WSAPICaptureThread(LPVOID context)
{
return reinterpret_cast<AudioCapture*>(context)->DoCaptureThread();
}
DWORD AudioCapture::DoCaptureThread()
{
HRESULT hr;
REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
REFERENCE_TIME hnsActualDuration;
UINT32 bufferFrameCount;
UINT32 numFramesAvailable;
IMMDeviceEnumerator* pEnumerator = NULL;
IMMDevice* pDevice = NULL;
IAudioClient* pAudioClient = NULL;
IAudioCaptureClient* pCaptureClient = NULL;
WAVEFORMATEX* pwfx = NULL;
UINT32 packetLength = 0;
BYTE* pData;
DWORD flags;
int channnels = 0;
int samplesPerSec = 0;
int bitPerSample = 0;
int blockAlign = 0;
int nFileNumber = 0;
hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);
EXIT_ON_ERROR(hr)
hr = CoCreateInstance(
CLSID_MMDeviceEnumerator, NULL,
CLSCTX_ALL, IID_IMMDeviceEnumerator,
(void**)&pEnumerator);
EXIT_ON_ERROR(hr)
hr = pEnumerator->GetDefaultAudioEndpoint(
eCapture, eMultimedia, &pDevice);
WCHAR szDeviceName[MAX_PATH];
const int bufferLen = sizeof(szDeviceName) / sizeof(szDeviceName)[0];
_GetDeviceName(pDevice, szDeviceName, bufferLen);
std::cout << "device:\"" << wideCharToMultiByte(szDeviceName) << "\"" << std::endl;
EXIT_ON_ERROR(hr)
hr = pDevice->Activate(
IID_IAudioClient, CLSCTX_ALL,
NULL, (void**)&pAudioClient);
EXIT_ON_ERROR(hr)
hr = pAudioClient->GetMixFormat(&pwfx);
EXIT_ON_ERROR(hr)
std::cout << "nChannels : " << pwfx->nChannels << std::endl;
std::cout << "nSamplesPerSec : " << pwfx->nSamplesPerSec << std::endl;
std::cout << "nAvgBytesPerSec: " << pwfx->nAvgBytesPerSec << std::endl;
std::cout << "nBlockAlign : " << pwfx->nBlockAlign << std::endl;
std::cout << "wBitsPerSample : " << pwfx->wBitsPerSample << std::endl;
std::cout << "cbSize : " << pwfx->cbSize << std::endl;
WAVEFORMATEXTENSIBLE Wfx = WAVEFORMATEXTENSIBLE();
Wfx.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
Wfx.Format.wBitsPerSample = 16;
Wfx.Format.cbSize = 22;
Wfx.dwChannelMask = 0;
Wfx.Samples.wValidBitsPerSample = Wfx.Format.wBitsPerSample;
Wfx.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
const int freqs[6] = { 48000, 44100, 16000, 96000, 32000, 8000 };
hr = S_FALSE;
WAVEFORMATEX* pWfxClosestMatch = NULL;
for (unsigned int freq = 0; freq < sizeof(freqs) / sizeof(freqs[0]); freq++)
{
for (unsigned int chan = 0; chan < sizeof(_recChannelsPrioList) / sizeof(_recChannelsPrioList[0]);
chan++)
{
Wfx.Format.nChannels = _recChannelsPrioList[chan];
Wfx.Format.nSamplesPerSec = freqs[freq];
Wfx.Format.nBlockAlign =
Wfx.Format.nChannels * Wfx.Format.wBitsPerSample / 8;
Wfx.Format.nAvgBytesPerSec =
Wfx.Format.nSamplesPerSec * Wfx.Format.nBlockAlign;
hr = pAudioClient->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED, (WAVEFORMATEX*)&Wfx, &pWfxClosestMatch);
if (hr == S_OK)
{
break;
}
else
{
if (pWfxClosestMatch)
{
std::cout << "nChannels=" << Wfx.Format.nChannels
<< ", nSamplesPerSec=" << Wfx.Format.nSamplesPerSec
<< " is not supported. Closest match: "
"nChannels="
<< pWfxClosestMatch->nChannels << ", nSamplesPerSec="
<< pWfxClosestMatch->nSamplesPerSec << std::endl;
CoTaskMemFree(pWfxClosestMatch);
pWfxClosestMatch = NULL;
}
else
{
std::cout << "nChannels=" << Wfx.Format.nChannels
<< ", nSamplesPerSec=" << Wfx.Format.nSamplesPerSec
<< " is not supported. No closest match." << std::endl;
}
}
}
if (hr == S_OK)
{
break;
}
}
if (hr == S_OK)
{
channnels = Wfx.Format.nChannels;
samplesPerSec = Wfx.Format.nSamplesPerSec;
bitPerSample = Wfx.Format.wBitsPerSample;
blockAlign = Wfx.Format.nBlockAlign;
}
std::cout << "-------------------------------" << std::endl;
std::cout << "channnels:" << channnels << std::endl;
std::cout << "samplesPerSec:" << samplesPerSec << std::endl;
std::cout << "bitPerSample:" << bitPerSample << std::endl;
std::cout << "blockAlign:" << blockAlign << std::endl;
// hr = pAudioClient->Reset();
// EXIT_ON_ERROR(hr)
hr = pAudioClient->Initialize(
AUDCLNT_SHAREMODE_SHARED,
0,
hnsActualDuration,
0,
(WAVEFORMATEX*)&Wfx,
NULL);
EXIT_ON_ERROR(hr)
hr = pAudioClient->GetBufferSize(&bufferFrameCount);
EXIT_ON_ERROR(hr)
hr = pAudioClient->GetService(
IID_IAudioCaptureClient,
(void**)&pCaptureClient);
EXIT_ON_ERROR(hr)
// Calculate the actual duration of the allocated buffer.
hnsActualDuration = (double)REFTIMES_PER_SEC *
bufferFrameCount / pwfx->nSamplesPerSec;
hr = pAudioClient->Start(); // Start recording.
EXIT_ON_ERROR(hr)
// Each loop fills about half of the shared buffer.
while (m_bRuning)
{
// Sleep for half the buffer duration.
Sleep(hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
hr = pCaptureClient->GetNextPacketSize(&packetLength);
EXIT_ON_ERROR(hr)
while (packetLength != 0)
{
UINT64 recTime = 0;
UINT64 recPos = 0;
// Get the available data in the shared buffer.
hr = pCaptureClient->GetBuffer(
&pData,
&numFramesAvailable,
&flags, &recPos, &recTime);
EXIT_ON_ERROR(hr)
if (flags & AUDCLNT_BUFFERFLAGS_SILENT)
{
pData = NULL; // Tell CopyData to write silence.
}
// 录制文件
if (m_bRecord)
{
if (m_pWavWapper == nullptr)
{
++nFileNumber;
std::string path = m_strRecordPath + "/" + std::to_string(nFileNumber) + ".pcm";
m_pWavWapper = new WavWapper;
m_pWavWapper->Init(path, channnels, samplesPerSec, bitPerSample);
}
if (m_pWavWapper)
{
m_pWavWapper->Write(pData, numFramesAvailable * blockAlign);
}
}
else
{
if (m_pWavWapper)
{
m_pWavWapper->CloseFile();
delete m_pWavWapper;
m_pWavWapper = nullptr;
}
}
hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
EXIT_ON_ERROR(hr)
hr = pCaptureClient->GetNextPacketSize(&packetLength);
EXIT_ON_ERROR(hr)
}
}
hr = pAudioClient->Stop(); // Stop recording.
EXIT_ON_ERROR(hr)
Exit:
CoTaskMemFree(pwfx);
SAFE_RELEASE(pEnumerator)
SAFE_RELEASE(pDevice)
SAFE_RELEASE(pAudioClient)
SAFE_RELEASE(pCaptureClient)
return (DWORD)hr;
}
int32_t AudioCapture::_GetDeviceName(IMMDevice* pDevice, LPWSTR pszBuffer, int bufferLen)
{
static const WCHAR szDefault[] = L"<Device not available>";
HRESULT hr = E_FAIL;
IPropertyStore* pProps = NULL;
PROPVARIANT varName;
if (pDevice != NULL)
{
hr = pDevice->OpenPropertyStore(STGM_READ, &pProps);
if (FAILED(hr))
{
std::cout << "IMMDevice::OpenPropertyStore failed";
}
}
// Initialize container for property value.
PropVariantInit(&varName);
if (SUCCEEDED(hr))
{
// Get the endpoint device's friendly-name property.
hr = pProps->GetValue(PKEY_Device_FriendlyName, &varName);
if (FAILED(hr))
{
std::cout << "IPropertyStore::GetValue failed";
}
}
if ((SUCCEEDED(hr)) && (VT_EMPTY == varName.vt))
{
hr = E_FAIL;
std::cout << "IPropertyStore::GetValue returned no value";
}
if ((SUCCEEDED(hr)) && (VT_LPWSTR != varName.vt))
{
// The returned value is not a wide null terminated string.
hr = E_UNEXPECTED;
std::cout << "IPropertyStore::GetValue returned unexpected";
}
if (SUCCEEDED(hr) && (varName.pwszVal != NULL))
{
// Copy the valid device name to the provided ouput buffer.
wcsncpy_s(pszBuffer, bufferLen, varName.pwszVal, _TRUNCATE);
}
else {
// Failed to find the device name.
wcsncpy_s(pszBuffer, bufferLen, szDefault, _TRUNCATE);
}
PropVariantClear(&varName);
SAFE_RELEASE(pProps);
return 0;
}
Record the number of audio callbacks in ten seconds
10sec / 10ms = 1000 timesWhen delays occur, the number of audio callbacks is often less than 1,000
If there is a problem with webrtc's audio recording, how to fix it?