I am currently working on a script that uses Google Cloud Speech (TTS) to create audio content, the encoding I use is LINEAR16. I then convert the result to mulaw/8000 base64 with the WaveFile library before eventually sending the base64 encoded result to my websocket server.
However, for some reason, nothing plays (not even noise) during the call when I send the media message.
I took the following steps:
1. I created the websocket server with NodeJS:
const WebSocket = require("ws");
const wss = new WebSocket.Server({ port: 8080 });
const path = require("path");
wss.on("connection", function connection(ws) {
console.log("New Connection Initiated");
ws.on("message", function incoming(message) {
let msg;
try {
msg = JSON.parse(message);
} catch (e) {
console.error('Error parsing message:', e);
return;
}
switch (msg.event) {
case "connected":
console.log(`A new call has connected.`);
break;
case "start":
console.log(`Starting Media Stream ${msg.streamSid}`);
break;
case "media":
// Write Media Packets to the recognize stream
if (msg.test == 1)
console.log(msg)
break;
case "stop":
console.log(`Call Has Ended`);
break;
}
});
});
console.log("Listening on Port 8080");
2. I launch a call with the TwiML API:
$sid = "SID_HERE";
$token = "TOKEN_HERE";
$client = new Twilio\Rest\Client($sid, $token);
$xml ='
<Response>
<Say>This is your Voice Assistant speaking!</Say>
<Connect>
<Stream url="wss://MY_WEBSOCKET_SERVER_HERE">
<Parameter name="aCutomParameter" value="aCustomValue that was set in TwiML" />
</Stream>
</Connect>
</Response>';
$call = $client->account->calls->create(
'+TARGET_NUMBER_HERE',
'+TWILIO_NUMBER_HERE',
[
'twiml' => $xml
]
);
3. I send the audio and the mark here:
const textToSpeech = require('@google-cloud/text-to-speech');
const WebSocket = require('ws');
const ttsClient = new textToSpeech.TextToSpeechClient();
const wavefile = require('wavefile');
const fs = require('fs');
const path = require("path");
require("dotenv").config();
async function generateTtsAudio(text) {
const request = {
input: { text: text },
voice: { languageCode: 'en-US', ssmlGender: 'NEUTRAL' },
audioConfig: { audioEncoding: 'LINEAR16', sampleRateHertz: 8000 },
};
const [response] = await ttsClient.synthesizeSpeech(request);
return response.audioContent;
}
async function sendTtsToWebSocket(text, wsUrl, streamSid) {
const audioContent = await generateTtsAudio(text);
const wav = new wavefile.WaveFile(audioContent);
wav.toBitDepth('8');
wav.toSampleRate(8000);
av.toMuLaw();
const base64Audio = Buffer.from(wav.data.samples).toString('base64');
console.log(base64Audio);
const ws = new WebSocket(wsUrl);
ws.on('open', function open() {
const message = {
event: 'media',
streamSid: streamSid,
media: { payload: base64Audio }
};
const markMessage = {
event: 'mark',
streamSid: streamSid,
"mark": {
name: "testmark"
}
};
ws.send(JSON.stringify(message));
ws.send(JSON.stringify(markMessage));
});
ws.on('error', function error(error) {
console.error('WebSocket Error:', error);
});
}
const wsUrl = 'wss://MY_WEBSOCKET_SERVER_HERE';
const streamSid = 'STREAM_SID_HERE'; // I get it when starting the call
const text = 'Hello, this is a TTS test message.';
sendTtsToWebSocket(text, wsUrl, streamSid)
.catch(console.error);
However, the sound still doesn't play during the call
I finally found the issue.
Twilio was not even seeing the media messages I was sending, because I had to send the media message to all clients connected to the websocket in order for it to see them:
This did the trick, and now it works as expected and the media messages are being played.