I'm working on a project where I want to transcribe audio from a GSM file in real-time as it's being recorded by Asterisk. While I can transcribe already existing files perfectly using Google Cloud's Speech-to-Text along with Asterisk running main.py, I'm encountering issues when trying to transcribe a file that is actively being recorded.
Problem: When my code tries to read a GSM file that is currently being recorded, it throws an exception indicating a lack of audio data. Specifically, the error is:
google.api_core.exceptions.OutOfRange: 400 Audio Timeout Error: Long duration elapsed without audio.
Here is the STT Module:
import time
from google.cloud import speech
from google.oauth2 import service_account
from pysterisk.config import logger
# Audio recording parameters
RATE = 8000
CHUNK = int(RATE / 10) # 100ms
class GSMFileStream:
def __init__(self, filename, chunk_size):
self.filename = filename
self.chunk_size = chunk_size
self.offset = 0
self.closed = False
def __enter__(self):
self.file = open(self.filename, 'rb')
return self
def __exit__(self, type, value, traceback):
self.file.close()
self.closed = True
def generator(self):
while not self.closed:
self.file.seek(self.offset)
chunk = self.file.read(self.chunk_size)
# logger.info(f"Read chunk of size: {len(chunk)}")
if not chunk:
break
self.offset += len(chunk)
yield chunk
def listen_print_loop(responses: object) -> str:
for response in responses:
if not response.results:
continue
result = response.results[0]
if not result.alternatives:
continue
# Only process the result if it's marked as final
if result.is_final:
alternative = result.alternatives[0]
logger.info(f"Transcript: {alternative.transcript}")
return alternative.transcript
def stt_stream() -> None:
"""Transcribe speech from audio file."""
# See http://g.co/cloud/speech/docs/languages
# for a list of supported languages.
try:
language_code = "es-ES" # a BCP-47 language tag
credentials = service_account.Credentials.from_service_account_file(
'/app/softphone_app/pysterisk/helpers/credentials.json')
client = speech.SpeechClient(credentials=credentials)
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.WEBM_OPUS,
sample_rate_hertz=RATE,
language_code=language_code,
)
streaming_config = speech.StreamingRecognitionConfig(
config=config, interim_results=True
)
with GSMFileStream("/app/softphone_app/wavs/client.gsm", CHUNK) as stream:
audio_generator = stream.generator()
requests = (
speech.StreamingRecognizeRequest(audio_content=content)
for content in audio_generator
)
responses = client.streaming_recognize(streaming_config, requests)
listen_print_loop(responses)
except Exception as e:
logger.error(f"Error in stt_stream: {e}")
main.py (executed by Asterisk)
def main():
agi = AGI()
agi.answer()
record_and_tts_task(agi=agi)
Task which uses Multiprocessing
def record_and_tts_task(agi):
p1 = None
p2 = None
try:
# Create a separate process for recording
p1 = multiprocessing.Process(target=record_file,
args=('client', agi)) # replace 'agi_param' with your actual agi parameter
# Start the recording
p1.start()
logger.info("RECORDING FILE STARTED!")
# Wait a bit to ensure some audio has been recorded
time.sleep(5)
# Create a separate process for transcription
p2 = multiprocessing.Process(target=stt_stream)
# Start the transcription
p2.start()
logger.info("TTS Stream STARTED!")
except Exception as e:
logger.error(f"Error while starting processes: {e}")
finally:
# Ensure we join the processes if they were started
if p1 and p1.is_alive():
p1.join()
logger.info("RECORDING FILE ENDED!")
if p2 and p2.is_alive():
p2.join()
logger.info("TTS Stream ENDED!")
Does anyone have experience with transcribing active Asterisk recordings? Is there a way to access the audio data while recording is in progress? Note that this system works flawlessly with pre-recorded audio files. Any guidance would be greatly appreciated.
The problem is simple here.
Asterisk do record in file, that process can be cached by OS up to file close event.
So, you are reading empty file. Or partially writed file. You have no any code which check that file is finished. There are no real workaround here. You are just ending process when reaching end of file, while asterisk may write chunk again in half of a second.
You HAVE to use another interface. For example ARI async audio or socket or unistream.