So this is my Esp32 code I'm using the esp32 wroom 32 wifi and bluetooth module:
#include <WiFi.h>
#include <HTTPClient.h>
#include <driver/i2s.h>
#include <ArduinoJson.h>
#include <Base64.h>
// WiFi credentials
const char* ssid = "SSID";
const char* password = "PASSWORD";
// Server details
const char* serverName = "FLASK URL/route";
// INMP441 I2S pin assignment
#define I2S_WS 25
#define I2S_SD 33
#define I2S_SCK 32
// Button and LED pin assignment
#define BUTTON_PIN 12
#define LED_PIN 13
unsigned long ledOnTime = 0;
bool recording = false;
void setup() {
Serial.begin(115200);
// Connect to WiFi
WiFi.begin(ssid, password);
while (WiFi.status() != WL_CONNECTED) {
delay(1000);
Serial.println("Connecting to WiFi...");
}
Serial.println("Connected to WiFi");
if(WiFi.status() == WL_CONNECTED) {
HTTPClient http;
http.begin(serverName);
int httpResponseCode = http.GET();
Serial.println(httpResponseCode);
if(httpResponseCode!=405){
while(httpResponseCode!=405){
int httpResponseCode = http.GET();
Serial.println(httpResponseCode);
if(httpResponseCode==405){
break;
}
};
http.end();
};
}
i2s_config_t i2s_config = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_PDM),
.sample_rate = 44100,
.bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
.channel_format = I2S_CHANNEL_FMT_ALL_LEFT,
.communication_format = I2S_COMM_FORMAT_I2S,
.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
.dma_buf_count = 2,
.dma_buf_len = 1024
};
i2s_pin_config_t pin_config;
pin_config.bck_io_num = I2S_SCK;
pin_config.ws_io_num = I2S_WS;
pin_config.data_out_num = -1;
pin_config.data_in_num = I2S_SD;
i2s_driver_install(I2S_NUM_0, &i2s_config, 0, NULL);
i2s_set_pin(I2S_NUM_0, &pin_config);
i2s_set_clk(I2S_NUM_0, 44100, I2S_BITS_PER_SAMPLE_16BIT, I2S_CHANNEL_MONO);
// Set the button as input
pinMode(BUTTON_PIN, INPUT_PULLUP);
// Set the LED as output
pinMode(LED_PIN, OUTPUT);
}
void loop() {
// Check if the button is pressed
if (digitalRead(BUTTON_PIN) == 0 && !recording) {
digitalWrite(LED_PIN, HIGH); // Turn on the LED
ledOnTime = millis();
recording = true;
}
if (recording) {
// Turn on the LED
uint16_t i2s_read_buff[1024];
size_t bytes_read;
// Record for 10 seconds or until the button is released
for(int i = 0; i < 10 * 44100 / sizeof(i2s_read_buff); i++) {
i2s_read(I2S_NUM_0, i2s_read_buff, sizeof(i2s_read_buff), &bytes_read, portMAX_DELAY);
// Send audio data to Flask app
if(WiFi.status()== WL_CONNECTED){
HTTPClient http;
http.begin(serverName);
http.addHeader("Content-Type", "application/json");
// Base64 encode the audio data
String base64Audio = base64::encode((uint8_t*)i2s_read_buff, bytes_read);
// Create a JSON object with the audio data
StaticJsonDocument<200> doc;
doc["audio"] = base64Audio;
String json;
serializeJson(doc, json);
// Send the JSON object in the POST request
int httpResponseCode = http.POST(json);
http.end();
}
else{
Serial.println("WiFi Disconnected");
}
}
if (millis() - ledOnTime >= 10000) {
digitalWrite(LED_PIN, LOW); // Turn off the LED
if(WiFi.status() == WL_CONNECTED){
HTTPClient http;
http.begin( "flask URL /save");
int httpResponseCode = http.POST("");
http.end();
}
recording = false;
}
}
}
There is a simple btn connected to the esp32, when the btn is pushed an led turns on signaling that the flask server is starting to receive audio data from the inmp441 mic. I did enter my SSID and PASSWORD as well as the correct server URL. I'm also sending a get request just to set initial flask connection as my esp32 takes like 10 minutes to establish an initial connection to the flask server
currently this is my server code:
from flask import Flask, request
from pydub import AudioSegment
import io
import base64
app = Flask(__name__)
# List to store all incoming audio segments
audio_segments = []
@app.route('/endpoint', methods=['POST'])
def handle_audio():
data = request.get_json()
base64_audio = data['audio']
audio_data = base64.b64decode(base64_audio)
# Create an AudioSegment from the raw data
incoming_audio = AudioSegment.from_raw(io.BytesIO(audio_data), sample_width=2, frame_rate=44100, channels=1)
# Add the incoming audio to the list of segments
audio_segments.append(incoming_audio)
return 'Audio data received!'
@app.route('/save', methods=['POST'])
def save_audio():
# Concatenate all audio segments
combined_audio = sum(audio_segments)
# Export as FLAC
combined_audio.export("PATH/Desktop/output.flac", format="flac")
return 'Audio data saved as FLAC file!'
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)
yes I know its not the best way to save an audio file with another post request but its temporary. My problem is that the audio data I essentially save is simply noise and that even though the recording is 10 seconds only 4 seconds is saved in the file. The audio data is received as separate chunks. After all the chunks have been received it saves the file
What I want to do is simply record voice audio for 10 seconds and save it as an audio file through a flask server. Can anyone please help... Thanks