Skip to main content

Audio Driver

The audio driver streams WAV files from LittleFS (flash storage) to the MAX98357A amplifier via I2S.


WAV Files

WAV is the simplest audio format for this use case — no decode step needed, just stream raw PCM samples. The format is:

[44-byte header][PCM sample data]

The header contains sample rate, bit depth, and channel count. The driver reads the header once, configures I2S to match, then streams the sample data in chunks until the file ends.

Use 44100 Hz, 16-bit, stereo or mono WAV files for compatibility. Export from Audacity or any audio editor.


I2S Initialisation

#include "driver/i2s_std.h"
#include "audio.h"

static i2s_chan_handle_t tx_handle = NULL;

static esp_err_t i2s_init(uint32_t sample_rate) {
if (tx_handle) {
i2s_channel_disable(tx_handle);
i2s_del_channel(tx_handle);
}

i2s_chan_config_t chan_cfg = I2S_CHANNEL_DEFAULT_CONFIG(
I2S_NUM_0, I2S_ROLE_MASTER
);
ESP_ERROR_CHECK(i2s_new_channel(&chan_cfg, &tx_handle, NULL));

i2s_std_config_t std_cfg = {
.clk_cfg = I2S_STD_CLK_DEFAULT_CONFIG(sample_rate),
.slot_cfg = I2S_STD_MSB_SLOT_DEFAULT_CONFIG(
I2S_DATA_BIT_WIDTH_16BIT,
I2S_SLOT_MODE_STEREO
),
.gpio_cfg = {
.mclk = I2S_GPIO_UNUSED,
.bclk = GPIO_NUM_15,
.ws = GPIO_NUM_16,
.dout = GPIO_NUM_17,
.din = I2S_GPIO_UNUSED,
.invert_flags = { 0 },
},
};
ESP_ERROR_CHECK(i2s_channel_init_std_mode(tx_handle, &std_cfg));
return i2s_channel_enable(tx_handle);
}

WAV Header Parsing

typedef struct {
uint32_t sample_rate;
uint16_t num_channels;
uint16_t bits_per_sample;
uint32_t data_offset; // byte offset of PCM data
} wav_info_t;

static esp_err_t parse_wav_header(FILE *f, wav_info_t *out) {
uint8_t hdr[44];
if (fread(hdr, 1, sizeof(hdr), f) != sizeof(hdr)) return ESP_FAIL;

// Verify RIFF header
if (memcmp(hdr, "RIFF", 4) != 0 || memcmp(hdr + 8, "WAVE", 4) != 0)
return ESP_FAIL;

out->num_channels = *(uint16_t *)(hdr + 22);
out->sample_rate = *(uint32_t *)(hdr + 24);
out->bits_per_sample = *(uint16_t *)(hdr + 34);
out->data_offset = 44; // standard WAV; may differ for files with metadata

return ESP_OK;
}

Playback

#define AUDIO_BUF_SIZE 2048

esp_err_t audio_play(const char *path) {
FILE *f = fopen(path, "rb");
if (!f) return ESP_FAIL;

wav_info_t info;
if (parse_wav_header(f, &info) != ESP_OK) {
fclose(f);
return ESP_FAIL;
}

i2s_init(info.sample_rate);
fseek(f, info.data_offset, SEEK_SET);

uint8_t *buf = malloc(AUDIO_BUF_SIZE);
size_t bytes_read, bytes_written;

while (!audio_stop_requested) {
bytes_read = fread(buf, 1, AUDIO_BUF_SIZE, f);
if (bytes_read == 0) break; // end of file
i2s_channel_write(tx_handle, buf, bytes_read, &bytes_written, pdMS_TO_TICKS(100));
}

free(buf);
fclose(f);
return ESP_OK;
}

Soft Volume Fade-In

For a gentler wake-up, fade the volume in over 30 seconds by scaling PCM samples:

static void apply_volume(uint8_t *buf, size_t len, float scale) {
int16_t *samples = (int16_t *)buf;
size_t num_samples = len / sizeof(int16_t);
for (size_t i = 0; i < num_samples; i++) {
samples[i] = (int16_t)(samples[i] * scale);
}
}

// In the playback loop:
float elapsed_sec = (float)(esp_timer_get_time() / 1000000);
float volume = fminf(elapsed_sec / 30.0f, 1.0f); // ramp over 30s
apply_volume(buf, bytes_read, volume);

Audio Task

The audio driver runs in its own FreeRTOS task pinned to core 1 to prevent I2S DMA stalls from display refresh operations on core 0:

void audio_task(void *arg) {
audio_cmd_t cmd;
for (;;) {
if (xQueueReceive(audio_cmd_queue, &cmd, portMAX_DELAY)) {
switch (cmd.type) {
case AUDIO_PLAY:
audio_stop_requested = false;
audio_play(cmd.path);
break;
case AUDIO_STOP:
audio_stop_requested = true;
break;
}
}
}
}

// Start from main:
xTaskCreatePinnedToCore(audio_task, "audio", 4096, NULL, 5, NULL, 1);