I am trying to transcribe a live playing audio, but I am getting the following error when decoding,
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte
import pyaudio
import requests
from pocketsphinx import LiveSpeech
def speech_to_text(audio_stream):
speech = LiveSpeech(
verbose=False,
sampling_rate=16000,
buffer_size=2048,
no_search=False,
)
for chunk in audio_stream.iter_content(chunk_size=1024):
for phrase in speech.decode(bytes(chunk)):
yield str(phrase)
def main():
url = "AUDIO_URL"
audio_stream = requests.get(url, stream=True)
audio_format = pyaudio.paInt16
sample_rate = 16000
audio_chunk_size = 1024
p = pyaudio.PyAudio()
stream = p.open(format=audio_format, channels=1, rate=sample_rate, input=True, frames_per_buffer=audio_chunk_size)
print("Listening to audio stream...")
for phrase in speech_to_text(audio_stream):
print("Recognized text:", phrase)
stream.stop_stream()
stream.close()
p.terminate()
if __name__ == "__main__":
main()
The error occurs when I attempt to process the binary audio data received from the URL. Since this is binary audio data, I understand that decoding it as UTF-8 doesn't make sense, but I'm unsure about how to handle it correctly.
So what I am doing wrong here? Can anybody help me? Thanks in advance.