Message sent to the websocket not playing with TwiML Bidirectional call

109 Views Asked by At

I am currently working on a script that uses Google Cloud Speech (TTS) to create audio content, the encoding I use is LINEAR16. I then convert the result to mulaw/8000 base64 with the WaveFile library before eventually sending the base64 encoded result to my websocket server.

However, for some reason, nothing plays (not even noise) during the call when I send the media message.

I took the following steps:

1. I created the websocket server with NodeJS:

const WebSocket = require("ws");
const wss = new WebSocket.Server({ port: 8080 });

const path = require("path");

wss.on("connection", function connection(ws) {
    console.log("New Connection Initiated");


    ws.on("message", function incoming(message) {
        let msg;
        try {
            msg = JSON.parse(message);
        } catch (e) {
            console.error('Error parsing message:', e);
            return;
        }
        switch (msg.event) {
            case "connected":
                console.log(`A new call has connected.`);
                break;
            case "start":
                console.log(`Starting Media Stream ${msg.streamSid}`);
                break;
            case "media":
                // Write Media Packets to the recognize stream
                if (msg.test == 1)
                    console.log(msg)
                break;
            case "stop":
                console.log(`Call Has Ended`);
                break;
        }
    });
});

console.log("Listening on Port 8080");

2. I launch a call with the TwiML API:

$sid = "SID_HERE";
$token = "TOKEN_HERE";

$client = new Twilio\Rest\Client($sid, $token);

$xml ='
<Response>
    <Say>This is your Voice Assistant speaking!</Say>
    <Connect>
        <Stream url="wss://MY_WEBSOCKET_SERVER_HERE">
            <Parameter name="aCutomParameter" value="aCustomValue that was set in TwiML" />
        </Stream>
    </Connect>
</Response>';


$call = $client->account->calls->create(
    '+TARGET_NUMBER_HERE',
    '+TWILIO_NUMBER_HERE',
    [
        'twiml' => $xml
    ]
);

3. I send the audio and the mark here:

const textToSpeech = require('@google-cloud/text-to-speech');
const WebSocket = require('ws');
const ttsClient = new textToSpeech.TextToSpeechClient();
const wavefile = require('wavefile');
const fs = require('fs');

const path = require("path");

require("dotenv").config();

async function generateTtsAudio(text) {
    const request = {
        input: { text: text },
        voice: { languageCode: 'en-US', ssmlGender: 'NEUTRAL' },
        audioConfig: { audioEncoding: 'LINEAR16', sampleRateHertz: 8000 },
    };

    const [response] = await ttsClient.synthesizeSpeech(request);
    return response.audioContent;
}

async function sendTtsToWebSocket(text, wsUrl, streamSid) {
    const audioContent = await generateTtsAudio(text);


    const wav = new wavefile.WaveFile(audioContent);
        wav.toBitDepth('8');
        wav.toSampleRate(8000);
        av.toMuLaw();

    const base64Audio = Buffer.from(wav.data.samples).toString('base64');
    console.log(base64Audio);

    const ws = new WebSocket(wsUrl);

    ws.on('open', function open() {
        const message = {
            event: 'media',
            streamSid: streamSid,
            media: { payload: base64Audio }
        };

        const markMessage = {
            event: 'mark',
            streamSid: streamSid,
            "mark": {
                name: "testmark"
            }
        };

        ws.send(JSON.stringify(message));
        ws.send(JSON.stringify(markMessage));
    });

    ws.on('error', function error(error) {
        console.error('WebSocket Error:', error);
    });
}

const wsUrl = 'wss://MY_WEBSOCKET_SERVER_HERE';
const streamSid = 'STREAM_SID_HERE'; // I get it when starting the call
const text = 'Hello, this is a TTS test message.';

sendTtsToWebSocket(text, wsUrl, streamSid)
    .catch(console.error);

However, the sound still doesn't play during the call

1

There are 1 best solutions below

0
Michael Naidis On

I finally found the issue.

Twilio was not even seeing the media messages I was sending, because I had to send the media message to all clients connected to the websocket in order for it to see them:

    ws.on("message", function incoming(message) {

    wss.clients.forEach(function each(client) {
        if (client !== ws && client.readyState === WebSocket.OPEN) {
            client.send(message.toString());
        }
    });

This did the trick, and now it works as expected and the media messages are being played.