From af5818476f6ba43cf7a9bb8cdea71407b3e7fd77 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Tue, 3 Sep 2024 08:37:46 -0400
Subject: [PATCH 01/16] use psram for wifi and bluetooth buffers

---
 voice-kit.yaml | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/voice-kit.yaml b/voice-kit.yaml
index 587807c0..bebe7064 100644
--- a/voice-kit.yaml
+++ b/voice-kit.yaml
@@ -62,6 +62,23 @@ esp32:
       # CONFIG_FREERTOS_USE_TRACE_FACILITY: "y"
       # CONFIG_FREERTOS_GENERATE_RUN_TIME_STATS: "y"
 
+      CONFIG_SPIRAM_TRY_ALLOCATE_WIFI_LWIP: "y"
+      CONFIG_ESP32_WIFI_STATIC_RX_BUFFER_NUM: "16"
+      CONFIG_ESP32_WIFI_DYNAMIC_RX_BUFFER_NUM: "64"
+      CONFIG_ESP32_WIFI_DYNAMIC_TX_BUFFER_NUM: "64"
+      CONFIG_ESP32_WIFI_AMPDU_TX_ENABLED: "y"
+      CONFIG_ESP32_WIFI_TX_BA_WIN: "32"
+      CONFIG_ESP32_WIFI_AMPDU_RX_ENABLED: "y"
+      CONFIG_ESP32_WIFI_RX_BA_WIN: "32"
+      CONFIG_LWIP_TCP_SND_BUF_DEFAULT: "65534"
+      CONFIG_LWIP_TCP_WND_DEFAULT: "65534"
+      CONFIG_LWIP_TCP_RECVMBOX_SIZE: "64"
+      CONFIG_LWIP_UDP_RECVMBOX_SIZE: "64"
+      CONFIG_LWIP_TCPIP_RECVMBOX_SIZE: "64"
+
+      CONFIG_BT_ALLOCATION_FROM_SPIRAM_FIRST: "y"
+      CONFIG_BT_BLE_DYNAMIC_ENV_MEMORY: "y"
+
 wifi:
   ap:
   on_connect:

From cb352e23917cc6ec2c15125bd2edf71805ee3345 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Tue, 3 Sep 2024 08:39:20 -0400
Subject: [PATCH 02/16] reduce pipeline task priorities

---
 esphome/components/nabu/nabu_media_player.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp
index ca69c524..270da34e 100644
--- a/esphome/components/nabu/nabu_media_player.cpp
+++ b/esphome/components/nabu/nabu_media_player.cpp
@@ -70,8 +70,8 @@ static const size_t SAMPLES_IN_ONE_DMA_BUFFER = DMA_BUFFER_SIZE * NUMBER_OF_CHAN
 static const size_t DMA_BUFFERS_COUNT = 4;
 static const size_t SAMPLES_IN_ALL_DMA_BUFFERS = SAMPLES_IN_ONE_DMA_BUFFER * DMA_BUFFERS_COUNT;
 
-static const UBaseType_t MEDIA_PIPELINE_TASK_PRIORITY = 2;
-static const UBaseType_t ANNOUNCEMENT_PIPELINE_TASK_PRIORITY = 7;
+static const UBaseType_t MEDIA_PIPELINE_TASK_PRIORITY = 1;
+static const UBaseType_t ANNOUNCEMENT_PIPELINE_TASK_PRIORITY = 1;
 static const UBaseType_t MIXER_TASK_PRIORITY = 10;
 static const UBaseType_t SPEAKER_TASK_PRIORITY = 23;
 

From 69210c6142ddead42f2f3733c3d254fc42c2de69 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Tue, 3 Sep 2024 08:40:27 -0400
Subject: [PATCH 03/16] don't block for typical running messages

---
 esphome/components/nabu/nabu_media_player.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp
index 270da34e..5d5bcdd2 100644
--- a/esphome/components/nabu/nabu_media_player.cpp
+++ b/esphome/components/nabu/nabu_media_player.cpp
@@ -353,14 +353,14 @@ void NabuMediaPlayer::speaker_task(void *params) {
                 xQueueSend(this_speaker->speaker_event_queue_, &event, portMAX_DELAY);
               } else {
                 event.type = EventType::RUNNING;
-                xQueueSend(this_speaker->speaker_event_queue_, &event, portMAX_DELAY);
+                xQueueSend(this_speaker->speaker_event_queue_, &event, 0);
               }
 
             } else {
               i2s_zero_dma_buffer(this_speaker->parent_->get_port());
 
               event.type = EventType::IDLE;
-              xQueueSend(this_speaker->speaker_event_queue_, &event, portMAX_DELAY);
+              xQueueSend(this_speaker->speaker_event_queue_, &event, 0);
             }
           }
 

From 9788ad64c91061e2e12ec4903803760b1eb4aa31 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Tue, 3 Sep 2024 08:53:16 -0400
Subject: [PATCH 04/16] speaker tasks blocks on read from ring buffer

---
 esphome/components/nabu/audio_mixer.cpp       |  7 +------
 esphome/components/nabu/nabu_media_player.cpp | 17 ++++-------------
 2 files changed, 5 insertions(+), 19 deletions(-)

diff --git a/esphome/components/nabu/audio_mixer.cpp b/esphome/components/nabu/audio_mixer.cpp
index 45458f98..03c2db1f 100644
--- a/esphome/components/nabu/audio_mixer.cpp
+++ b/esphome/components/nabu/audio_mixer.cpp
@@ -48,12 +48,7 @@ void AudioMixer::stop() {
 }
 
 size_t AudioMixer::read(uint8_t *buffer, size_t length, TickType_t ticks_to_wait) {
-  size_t available_bytes = this->available();
-  size_t bytes_to_read = std::min(length, available_bytes);
-  if (bytes_to_read > 0) {
-    return this->output_ring_buffer_->read((void *) buffer, bytes_to_read, ticks_to_wait);
-  }
-  return 0;
+  return this->output_ring_buffer_->read((void *) buffer, length, ticks_to_wait);
 }
 
 void AudioMixer::audio_mixer_task_(void *params) {
diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp
index 5d5bcdd2..c23847b7 100644
--- a/esphome/components/nabu/nabu_media_player.cpp
+++ b/esphome/components/nabu/nabu_media_player.cpp
@@ -75,7 +75,7 @@ static const UBaseType_t ANNOUNCEMENT_PIPELINE_TASK_PRIORITY = 1;
 static const UBaseType_t MIXER_TASK_PRIORITY = 10;
 static const UBaseType_t SPEAKER_TASK_PRIORITY = 23;
 
-static const size_t TASK_DELAY_MS = 5;
+static const size_t TASK_DELAY_MS = 10;
 
 static const float FIRST_BOOT_DEFAULT_VOLUME = 0.5f;
 
@@ -303,24 +303,15 @@ void NabuMediaPlayer::speaker_task(void *params) {
           xQueueSend(this_speaker->speaker_event_queue_, &event, portMAX_DELAY);
 
           while (true) {
-            notification_bits = ulTaskNotifyTake(pdTRUE, pdMS_TO_TICKS(TASK_DELAY_MS));
+            notification_bits = ulTaskNotifyTake(pdTRUE, pdMS_TO_TICKS(0));
 
             if (notification_bits & SpeakerTaskNotificationBits::COMMAND_STOP) {
               break;
             }
 
-            size_t bytes_available = this_speaker->audio_mixer_->available();
-            size_t samples_available = bytes_available / sizeof(int16_t);
-
-            size_t dma_buffers_available = samples_available / SAMPLES_IN_ONE_DMA_BUFFER;
-
-            size_t dma_buffers_to_read = std::min(dma_buffers_available, DMA_BUFFERS_COUNT);
-            dma_buffers_to_read = std::max(dma_buffers_to_read, (size_t) 1);  // always read at least 1 DMA buffer
-
-            size_t bytes_to_read = dma_buffers_to_read * SAMPLES_IN_ONE_DMA_BUFFER * sizeof(int16_t);
             size_t bytes_read = 0;
-
-            bytes_read = this_speaker->audio_mixer_->read((uint8_t *) buffer, bytes_to_read, 0);
+            size_t bytes_to_read = sizeof(int16_t) * SAMPLES_IN_ALL_DMA_BUFFERS;
+            bytes_read = this_speaker->audio_mixer_->read((uint8_t *) buffer, bytes_to_read, pdMS_TO_TICKS(TASK_DELAY_MS));
 
             if (bytes_read > 0) {
               size_t bytes_written;

From 73db043aea98dfbf878d1d7724ea6557ce2ffe3d Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Tue, 3 Sep 2024 08:53:59 -0400
Subject: [PATCH 05/16] fix mixing without clipping bug

---
 esphome/components/nabu/audio_mixer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/esphome/components/nabu/audio_mixer.cpp b/esphome/components/nabu/audio_mixer.cpp
index 03c2db1f..606a995b 100644
--- a/esphome/components/nabu/audio_mixer.cpp
+++ b/esphome/components/nabu/audio_mixer.cpp
@@ -328,10 +328,10 @@ void AudioMixer::mix_audio_samples_without_clipping_(int16_t *media_buffer, int1
     if ((added_sample > MAX_AUDIO_SAMPLE_VALUE) || (added_sample < MIN_AUDIO_SAMPLE_VALUE)) {
       // The largest magnitude the media sample can be to avoid clipping (converted to Q30 fixed point)
       int32_t q30_media_sample_safe_max =
-          static_cast<int32_t>(MAX_AUDIO_SAMPLE_VALUE - std::abs(announcement_buffer[i])) << 15;
+          static_cast<int32_t>(std::abs(MIN_AUDIO_SAMPLE_VALUE) - std::abs(announcement_buffer[i])) << 15;
 
       // Actual media sample value (Q15 number stored in an int32 for future division)
-      int32_t media_sample_value = media_buffer[i];
+      int32_t media_sample_value = abs(media_buffer[i]);
 
       // Calculation to perform the Q15 division for media_sample_safe_max/media_sample_value
       // Reference: https://sestevenson.wordpress.com/2010/09/20/fixed-point-division-2/ (accessed August 15,

From aa48eaa11b72c2379328c1ce9df400e937f61fa1 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Tue, 3 Sep 2024 09:02:51 -0400
Subject: [PATCH 06/16] mixer blocks on ring buffer writes

---
 esphome/components/nabu/audio_mixer.cpp | 185 +++++++++++++-----------
 1 file changed, 100 insertions(+), 85 deletions(-)

diff --git a/esphome/components/nabu/audio_mixer.cpp b/esphome/components/nabu/audio_mixer.cpp
index 606a995b..17fe2e9e 100644
--- a/esphome/components/nabu/audio_mixer.cpp
+++ b/esphome/components/nabu/audio_mixer.cpp
@@ -15,7 +15,7 @@ static const size_t OUTPUT_BUFFER_SAMPLES = 8192;       // Audio samples - keep
 static const size_t QUEUE_COUNT = 20;
 
 static const uint32_t TASK_STACK_SIZE = 3072;
-static const size_t TASK_DELAY_MS = 20;
+static const size_t TASK_DELAY_MS = 25;
 
 static const int16_t MAX_AUDIO_SAMPLE_VALUE = INT16_MAX;
 static const int16_t MIN_AUDIO_SAMPLE_VALUE = INT16_MIN;
@@ -62,6 +62,9 @@ void AudioMixer::audio_mixer_task_(void *params) {
   int16_t *announcement_buffer = allocator.allocate(OUTPUT_BUFFER_SAMPLES);
   int16_t *combination_buffer = allocator.allocate(OUTPUT_BUFFER_SAMPLES);
 
+  int16_t *combination_buffer_current = combination_buffer;
+  size_t combination_buffer_length = 0;
+
   if ((media_buffer == nullptr) || (announcement_buffer == nullptr)) {
     event.type = EventType::WARNING;
     event.err = ESP_ERR_NO_MEM;
@@ -97,7 +100,7 @@ void AudioMixer::audio_mixer_task_(void *params) {
   xQueueSend(this_mixer->event_queue_, &event, portMAX_DELAY);
 
   while (true) {
-    if (xQueueReceive(this_mixer->command_queue_, &command_event, pdMS_TO_TICKS(TASK_DELAY_MS)) == pdTRUE) {
+    if (xQueueReceive(this_mixer->command_queue_, &command_event, 0) == pdTRUE) {
       if (command_event.command == CommandEventType::STOP) {
         break;
       } else if (command_event.command == CommandEventType::DUCK) {
@@ -108,7 +111,7 @@ void AudioMixer::audio_mixer_task_(void *params) {
 
           uint8_t total_ducking_steps = 0;
           if (target_ducking_db_reduction > current_ducking_db_reduction) {
-            // The dB reduction level is increasing (which results in quiter audio)
+            // The dB reduction level is increasing (which results in quieter audio)
             total_ducking_steps = target_ducking_db_reduction - current_ducking_db_reduction - 1;
             db_change_per_ducking_step = 1;
           } else {
@@ -135,117 +138,129 @@ void AudioMixer::audio_mixer_task_(void *params) {
       }
     }
 
-    size_t media_available = this_mixer->media_ring_buffer_->available();
-    size_t announcement_available = this_mixer->announcement_ring_buffer_->available();
-    size_t output_free = this_mixer->output_ring_buffer_->free();
-
-    if ((output_free > 0) && (media_available * transfer_media + announcement_available > 0)) {
-      size_t bytes_to_read = output_free;
-
-      if (media_available * transfer_media > 0) {
-        bytes_to_read = std::min(bytes_to_read, media_available);
+    if (combination_buffer_length > 0) {
+      size_t output_bytes_written = this_mixer->output_ring_buffer_->write_without_replacement(
+          (void *) combination_buffer, combination_buffer_length, pdMS_TO_TICKS(TASK_DELAY_MS));
+      combination_buffer_length -= output_bytes_written;
+      if ((combination_buffer_length > 0) && (output_bytes_written > 0)) {
+        memmove(combination_buffer, combination_buffer + output_bytes_written / sizeof(int16_t),
+                combination_buffer_length);
       }
+    } else {
+      size_t media_available = this_mixer->media_ring_buffer_->available();
+      size_t announcement_available = this_mixer->announcement_ring_buffer_->available();
 
-      if (announcement_available > 0) {
-        bytes_to_read = std::min(bytes_to_read, announcement_available);
-      }
+      if (media_available * transfer_media + announcement_available > 0) {
+        size_t bytes_to_read = OUTPUT_BUFFER_SAMPLES * sizeof(int16_t);
 
-      if (bytes_to_read > 0) {
-        size_t media_bytes_read = 0;
         if (media_available * transfer_media > 0) {
-          media_bytes_read = this_mixer->media_ring_buffer_->read((void *) media_buffer, bytes_to_read, 0);
-          if (media_bytes_read > 0) {
-            size_t samples_read = media_bytes_read / sizeof(int16_t);
-            if (ducking_transition_samples_remaining > 0) {
-              // Ducking level is still transitioning
+          bytes_to_read = std::min(bytes_to_read, media_available);
+        }
 
-              size_t samples_left = ducking_transition_samples_remaining;
+        if (announcement_available > 0) {
+          bytes_to_read = std::min(bytes_to_read, announcement_available);
+        }
 
-              // There may be more than one step worth of samples to duck in the buffers, so manage positions
-              int16_t *current_media_buffer = media_buffer;
+        if (bytes_to_read > 0) {
+          size_t media_bytes_read = 0;
+          if (media_available * transfer_media > 0) {
+            media_bytes_read = this_mixer->media_ring_buffer_->read((void *) media_buffer, bytes_to_read, 0);
+            if (media_bytes_read > 0) {
+              size_t samples_read = media_bytes_read / sizeof(int16_t);
+              if (ducking_transition_samples_remaining > 0) {
+                // Ducking level is still transitioning
 
-              size_t samples_left_in_step = samples_left % samples_per_ducking_step;
-              if (samples_left_in_step == 0) {
-                // Start of a new ducking step
+                size_t samples_left = ducking_transition_samples_remaining;
 
-                current_ducking_db_reduction += db_change_per_ducking_step;
-                samples_left_in_step = samples_per_ducking_step;
-              }
-              size_t samples_left_to_duck = std::min(samples_left_in_step, samples_read);
+                // There may be more than one step worth of samples to duck in the buffers, so manage positions
+                int16_t *current_media_buffer = media_buffer;
 
-              size_t total_samples_ducked = 0;
+                size_t samples_left_in_step = samples_left % samples_per_ducking_step;
+                if (samples_left_in_step == 0) {
+                  // Start of a new ducking step
 
-              while (samples_left_to_duck > 0) {
-                // Ensure we only point to valid index in the Q15 scaling factor table
-                uint8_t safe_db_reduction_index =
-                    clamp<uint8_t>(current_ducking_db_reduction, 0, decibel_reduction_table.size() - 1);
+                  current_ducking_db_reduction += db_change_per_ducking_step;
+                  samples_left_in_step = samples_per_ducking_step;
+                }
+                size_t samples_left_to_duck = std::min(samples_left_in_step, samples_read);
 
-                int16_t q15_scale_factor = decibel_reduction_table[safe_db_reduction_index];
-                this_mixer->scale_audio_samples_(current_media_buffer, current_media_buffer, q15_scale_factor,
-                                                 samples_left_to_duck);
+                size_t total_samples_ducked = 0;
 
-                current_media_buffer += samples_left_to_duck;
+                while (samples_left_to_duck > 0) {
+                  // Ensure we only point to valid index in the Q15 scaling factor table
+                  uint8_t safe_db_reduction_index =
+                      clamp<uint8_t>(current_ducking_db_reduction, 0, decibel_reduction_table.size() - 1);
 
-                samples_read -= samples_left_to_duck;
-                samples_left -= samples_left_to_duck;
+                  int16_t q15_scale_factor = decibel_reduction_table[safe_db_reduction_index];
+                  this_mixer->scale_audio_samples_(current_media_buffer, current_media_buffer, q15_scale_factor,
+                                                   samples_left_to_duck);
 
-                total_samples_ducked += samples_left_to_duck;
+                  current_media_buffer += samples_left_to_duck;
 
-                samples_left_in_step = samples_left % samples_per_ducking_step;
-                if (samples_left_in_step == 0) {
-                  // Start of a new step
+                  samples_read -= samples_left_to_duck;
+                  samples_left -= samples_left_to_duck;
 
-                  current_ducking_db_reduction += db_change_per_ducking_step;
-                  samples_left_in_step = samples_per_ducking_step;
+                  total_samples_ducked += samples_left_to_duck;
+
+                  samples_left_in_step = samples_left % samples_per_ducking_step;
+                  if (samples_left_in_step == 0) {
+                    // Start of a new step
+
+                    current_ducking_db_reduction += db_change_per_ducking_step;
+                    samples_left_in_step = samples_per_ducking_step;
+                  }
+                  samples_left_to_duck = std::min(samples_left_in_step, samples_read);
                 }
-                samples_left_to_duck = std::min(samples_left_in_step, samples_read);
-              }
-            } else if (target_ducking_db_reduction > 0) {
-              // We still need to apply a ducking scaling, but we are done transitioning
+              } else if (target_ducking_db_reduction > 0) {
+                // We still need to apply a ducking scaling, but we are done transitioning
 
-              uint8_t safe_db_reduction_index =
-                  clamp<uint8_t>(target_ducking_db_reduction, 0, decibel_reduction_table.size() - 1);
+                uint8_t safe_db_reduction_index =
+                    clamp<uint8_t>(target_ducking_db_reduction, 0, decibel_reduction_table.size() - 1);
 
-              int16_t q15_scale_factor = decibel_reduction_table[safe_db_reduction_index];
-              this_mixer->scale_audio_samples_(media_buffer, media_buffer, q15_scale_factor, samples_read);
+                int16_t q15_scale_factor = decibel_reduction_table[safe_db_reduction_index];
+                this_mixer->scale_audio_samples_(media_buffer, media_buffer, q15_scale_factor, samples_read);
+              }
             }
           }
-        }
 
-        size_t announcement_bytes_read = 0;
-        if (announcement_available > 0) {
-          announcement_bytes_read =
-              this_mixer->announcement_ring_buffer_->read((void *) announcement_buffer, bytes_to_read, 0);
-        }
+          size_t announcement_bytes_read = 0;
+          if (announcement_available > 0) {
+            announcement_bytes_read =
+                this_mixer->announcement_ring_buffer_->read((void *) announcement_buffer, bytes_to_read, 0);
+          }
 
-        size_t output_bytes_written = 0;
-        if ((media_bytes_read > 0) && (announcement_bytes_read > 0)) {
-          // We have both a media and an announcement stream, so mix them together
+          if ((media_bytes_read > 0) && (announcement_bytes_read > 0)) {
+            // We have both a media and an announcement stream, so mix them together
 
-          if (media_bytes_read != announcement_bytes_read) {
-            printf("somehow media and announcement bytes read are different\n");
-          }
-          size_t samples_read = bytes_to_read / sizeof(int16_t);
+            size_t samples_read = bytes_to_read / sizeof(int16_t);
 
-          this_mixer->mix_audio_samples_without_clipping_(media_buffer, announcement_buffer, combination_buffer,
-                                                          samples_read);
+            this_mixer->mix_audio_samples_without_clipping_(media_buffer, announcement_buffer, combination_buffer,
+                                                            samples_read);
 
-          output_bytes_written = this_mixer->output_ring_buffer_->write((void *) combination_buffer, bytes_to_read);
-          if (output_bytes_written != bytes_to_read) {
-            printf("couldn't copy all the mixed samples into the output ring buffer\n");
-          }
-        } else if (media_bytes_read > 0) {
-          output_bytes_written = this_mixer->output_ring_buffer_->write((void *) media_buffer, media_bytes_read);
+            combination_buffer_length = samples_read * sizeof(int16_t);
+            // output_bytes_written = this_mixer->output_ring_buffer_->write((void *) combination_buffer,
+            // bytes_to_read);
+          } else if (media_bytes_read > 0) {
+            memcpy(combination_buffer, media_buffer, media_bytes_read);
+            combination_buffer_length = media_bytes_read;
+            // output_bytes_written = this_mixer->output_ring_buffer_->write((void *) media_buffer, media_bytes_read);
 
-        } else if (announcement_bytes_read > 0) {
-          output_bytes_written =
-              this_mixer->output_ring_buffer_->write((void *) announcement_buffer, announcement_bytes_read);
-        }
+          } else if (announcement_bytes_read > 0) {
+            memcpy(combination_buffer, announcement_buffer, announcement_bytes_read);
+            combination_buffer_length = announcement_bytes_read;
+            // output_bytes_written =
+            //     this_mixer->output_ring_buffer_->write((void *) announcement_buffer, announcement_bytes_read);
+          }
 
-        size_t samples_written = output_bytes_written / sizeof(int16_t);
-        if (ducking_transition_samples_remaining > 0) {
-          ducking_transition_samples_remaining -= std::min(samples_written, ducking_transition_samples_remaining);
+          size_t samples_written = combination_buffer_length / sizeof(int16_t);
+          if (ducking_transition_samples_remaining > 0) {
+            ducking_transition_samples_remaining -= std::min(samples_written, ducking_transition_samples_remaining);
+          }
         }
+      } else {
+        // No audio data available in either buffer
+        
+        delay(TASK_DELAY_MS);
       }
     }
   }

From ae98214597a2467c409fe18cb6fb6d39c4fe4465 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Tue, 3 Sep 2024 09:12:00 -0400
Subject: [PATCH 07/16] reader blocks on ring buffer writes

---
 esphome/components/nabu/audio_reader.cpp | 61 +++++++++++++-----------
 esphome/components/nabu/audio_reader.h   |  8 ++--
 2 files changed, 39 insertions(+), 30 deletions(-)

diff --git a/esphome/components/nabu/audio_reader.cpp b/esphome/components/nabu/audio_reader.cpp
index dae78718..2f5f7d06 100644
--- a/esphome/components/nabu/audio_reader.cpp
+++ b/esphome/components/nabu/audio_reader.cpp
@@ -7,6 +7,8 @@
 namespace esphome {
 namespace nabu {
 
+static const size_t READ_WRITE_TIMEOUT_MS = 20;
+
 AudioReader::AudioReader(esphome::RingBuffer *output_ring_buffer, size_t transfer_buffer_size) {
   this->output_ring_buffer_ = output_ring_buffer;
   this->transfer_buffer_size_ = transfer_buffer_size;
@@ -42,8 +44,8 @@ esp_err_t AudioReader::start(media_player::MediaFile *media_file, media_player::
 
   this->current_media_file_ = media_file;
 
-  this->media_file_data_current_ = media_file->data;
-  this->media_file_bytes_left_ = media_file->length;
+  this->transfer_buffer_current_ = media_file->data;
+  this->transfer_buffer_length_ = media_file->length;
   file_type = media_file->file_type;
 
   return ESP_OK;
@@ -100,6 +102,9 @@ esp_err_t AudioReader::start(const std::string &uri, media_player::MediaFileType
     file_type = media_player::MediaFileType::FLAC;
   }
 
+  this->transfer_buffer_current_ = this->transfer_buffer_;
+  this->transfer_buffer_length_ = 0;
+
   return ESP_OK;
 }
 
@@ -114,17 +119,11 @@ AudioReaderState AudioReader::read() {
 }
 
 AudioReaderState AudioReader::file_read_() {
-  if (this->media_file_bytes_left_ > 0) {
-    size_t bytes_to_write = std::min(this->media_file_bytes_left_, this->output_ring_buffer_->free());
-    bytes_to_write = std::min(bytes_to_write, this->transfer_buffer_size_);
-
-    if (bytes_to_write == 0) {
-      return AudioReaderState::READING;
-    }
-
-    size_t bytes_written = this->output_ring_buffer_->write((void *) this->media_file_data_current_, bytes_to_write);
-    this->media_file_bytes_left_ -= bytes_written;
-    this->media_file_data_current_ += bytes_written;
+  if (this->transfer_buffer_length_ > 0) {
+    size_t bytes_written = this->output_ring_buffer_->write_without_replacement(
+        (void *) this->transfer_buffer_current_, this->transfer_buffer_length_, pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
+    this->transfer_buffer_length_ -= bytes_written;
+    this->transfer_buffer_current_ += bytes_written;
 
     return AudioReaderState::READING;
   }
@@ -132,24 +131,32 @@ AudioReaderState AudioReader::file_read_() {
 }
 
 AudioReaderState AudioReader::http_read_() {
-  size_t bytes_to_read = std::min(this->output_ring_buffer_->free(), this->transfer_buffer_size_);
+  if (this->transfer_buffer_length_ > 0) {
+    size_t bytes_written = this->output_ring_buffer_->write_without_replacement(
+        (void *) this->transfer_buffer_, this->transfer_buffer_length_, pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
+    this->transfer_buffer_length_ -= bytes_written;
 
-  if (bytes_to_read == 0) {
-    return AudioReaderState::READING;
-  }
-
-  int received_len = esp_http_client_read(this->client_, (char *) this->transfer_buffer_, bytes_to_read);
-
-  if (received_len > 0) {
-    this->output_ring_buffer_->write((void *) this->transfer_buffer_, received_len);
-  } else if (received_len < 0) {
-    this->cleanup_connection_();
-    return AudioReaderState::FAILED;
+    // Shif remaining data to the start of the transfer buffer
+    memmove(this->transfer_buffer_, this->transfer_buffer_ + bytes_written, this->transfer_buffer_length_);
   }
 
   if (esp_http_client_is_complete_data_received(this->client_)) {
-    this->cleanup_connection_();
-    return AudioReaderState::FINISHED;
+    if (this->transfer_buffer_length_ == 0) {
+      this->cleanup_connection_();
+      return AudioReaderState::FINISHED;
+    }
+  } else {
+    size_t bytes_to_read = this->transfer_buffer_size_ - this->transfer_buffer_length_;
+    int received_len = esp_http_client_read(
+        this->client_, (char *) this->transfer_buffer_ + this->transfer_buffer_length_, bytes_to_read);
+
+    if (received_len > 0) {
+      this->transfer_buffer_length_ += received_len;
+    } else if (received_len < 0) {
+      // HTTP read error
+      this->cleanup_connection_();
+      return AudioReaderState::FAILED;
+    }
   }
 
   return AudioReaderState::READING;
diff --git a/esphome/components/nabu/audio_reader.h b/esphome/components/nabu/audio_reader.h
index f2f6a3dd..d6fcad30 100644
--- a/esphome/components/nabu/audio_reader.h
+++ b/esphome/components/nabu/audio_reader.h
@@ -36,14 +36,16 @@ class AudioReader {
   void cleanup_connection_();
 
   esphome::RingBuffer *output_ring_buffer_;
+
   uint8_t *transfer_buffer_{nullptr};
-  size_t transfer_buffer_size_;
+  const uint8_t *transfer_buffer_current_{nullptr};
+
+  size_t transfer_buffer_length_;  // Amount of data currently stored in transfer buffer (in bytes)
+  size_t transfer_buffer_size_;    // Capacity of transfer buffer (in bytes)
 
   esp_http_client_handle_t client_{nullptr};
 
   media_player::MediaFile *current_media_file_{nullptr};
-  size_t media_file_bytes_left_;
-  const uint8_t *media_file_data_current_;
 };
 }  // namespace nabu
 }  // namespace esphome

From e1bfd50fbeb103a6d2a6f2dde9014d4870626695 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Tue, 3 Sep 2024 09:13:51 -0400
Subject: [PATCH 08/16] avoid compilation warning about missing fields

---
 esphome/components/nabu/audio_reader.cpp | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/esphome/components/nabu/audio_reader.cpp b/esphome/components/nabu/audio_reader.cpp
index 2f5f7d06..6ffcd7e0 100644
--- a/esphome/components/nabu/audio_reader.cpp
+++ b/esphome/components/nabu/audio_reader.cpp
@@ -65,14 +65,16 @@ esp_err_t AudioReader::start(const std::string &uri, media_player::MediaFileType
     return ESP_ERR_INVALID_ARG;
   }
 
-  esp_http_client_config_t config = {
-      .url = uri.c_str(),
-      .cert_pem = nullptr,
-      .disable_auto_redirect = false,
-      .max_redirection_count = 10,
-      .keep_alive_enable = true,
-  };
-  this->client_ = esp_http_client_init(&config);
+  esp_http_client_config_t client_config = {};
+
+  client_config.url = uri.c_str();
+  client_config.cert_pem = nullptr;
+  client_config.disable_auto_redirect = false;
+  client_config.max_redirection_count = 10;
+  client_config.buffer_size = 512;
+  client_config.keep_alive_enable = true;
+
+  this->client_ = esp_http_client_init(&client_config);
 
   if (this->client_ == nullptr) {
     return ESP_FAIL;

From a45650a321d5fce311cbb244f328763f8036bf5a Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Tue, 3 Sep 2024 09:23:14 -0400
Subject: [PATCH 09/16] resampler blocks on ring buffer reads and writes

---
 esphome/components/nabu/audio_resampler.cpp | 77 ++++++---------------
 1 file changed, 21 insertions(+), 56 deletions(-)

diff --git a/esphome/components/nabu/audio_resampler.cpp b/esphome/components/nabu/audio_resampler.cpp
index bf7bc0df..b95644d3 100644
--- a/esphome/components/nabu/audio_resampler.cpp
+++ b/esphome/components/nabu/audio_resampler.cpp
@@ -15,6 +15,8 @@ static const bool USE_PRE_POST_FILTER = true;
 static const uint8_t OUTPUT_CHANNELS = 2;
 static const uint8_t OUTPUT_BITS_PER_SAMPLE = 16;
 
+static const size_t READ_WRITE_TIMEOUT_MS = 20;
+
 AudioResampler::AudioResampler(RingBuffer *input_ring_buffer, RingBuffer *output_ring_buffer,
                                size_t internal_buffer_samples) {
   this->input_ring_buffer_ = input_ring_buffer;
@@ -111,7 +113,7 @@ esp_err_t AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t
     //   resample_info.resample = true;
     //   this->use_effecient_upsampler_ = true;
     //   this->sample_ratio_ = upsampling_factor - 0.01f;
-    // } else 
+    // } else
     {
       // Use the general, but slower, floating point polyphase resampler
 
@@ -181,11 +183,11 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
   }
 
   if (this->output_buffer_length_ > 0) {
-    size_t bytes_free = this->output_ring_buffer_->free();
-    size_t bytes_to_write = std::min(this->output_buffer_length_, bytes_free);
+    size_t bytes_to_write = this->output_buffer_length_;
 
     if (bytes_to_write > 0) {
-      size_t bytes_written = this->output_ring_buffer_->write((void *) this->output_buffer_current_, bytes_to_write);
+      size_t bytes_written = this->output_ring_buffer_->write_without_replacement(
+          (void *) this->output_buffer_current_, bytes_to_write, pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
 
       this->output_buffer_current_ += bytes_written / sizeof(int16_t);
       this->output_buffer_length_ -= bytes_written;
@@ -194,6 +196,18 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
     return AudioResamplerState::RESAMPLING;
   }
 
+  // Copy audio data directly to output_buffer if resampling isn't required
+  if (!this->resample_info_.resample && !this->resample_info_.mono_to_stereo) {
+    size_t bytes_read =
+        this->input_ring_buffer_->read((void *) this->output_buffer_, this->internal_buffer_samples_ * sizeof(int16_t),
+                                       pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
+
+    this->output_buffer_current_ = this->output_buffer_;
+    this->output_buffer_length_ += bytes_read;
+
+    return AudioResamplerState::RESAMPLING;
+  }
+
   //////
   // Refill input buffer
   //////
@@ -218,12 +232,12 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
   this->input_buffer_current_ = this->input_buffer_;
 
   // Copy new data to the end of the of the buffer
-  size_t bytes_available = this->input_ring_buffer_->available();
-  size_t bytes_to_read = std::min(bytes_available, max_input_samples * sizeof(int16_t) - this->input_buffer_length_);
+  size_t bytes_to_read = max_input_samples * sizeof(int16_t) - this->input_buffer_length_;
 
   if (bytes_to_read > 0) {
     int16_t *new_input_buffer_data = this->input_buffer_ + this->input_buffer_length_ / sizeof(int16_t);
-    size_t bytes_read = this->input_ring_buffer_->read((void *) new_input_buffer_data, bytes_to_read);
+    size_t bytes_read = this->input_ring_buffer_->read((void *) new_input_buffer_data, bytes_to_read,
+                                                       pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
 
     this->input_buffer_length_ += bytes_read;
   }
@@ -244,55 +258,6 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
 
       this->output_buffer_current_ = this->output_buffer_;
       this->output_buffer_length_ += output_samples * sizeof(int16_t);
-
-      // if (this->resample_info_.mono_to_stereo) {
-      //   if (this->input_buffer_length_ > 0) {
-      //     size_t available_samples = this->input_buffer_length_ / sizeof(int16_t);
-
-      //     if (available_samples / 3) {
-      //       this->input_buffer_current_ = this->input_buffer_;
-      //       this->input_buffer_length_ = 0;
-      //     } else {
-      //       dsps_fird_s16_aes3(&this->fir_filter_, this->input_buffer_current_, this->output_buffer_,
-      //                          available_samples / 3);
-
-      //       size_t output_samples = available_samples / 3;
-
-      //       this->input_buffer_current_ += output_samples * 3;
-      //       this->input_buffer_length_ -= output_samples * 3 * sizeof(int16_t);
-
-      //       this->output_buffer_current_ = this->output_buffer_;
-      //       this->output_buffer_length_ += output_samples * sizeof(int16_t);
-      //     }
-      //   }
-      // } else {
-      //   // Interleaved stereo samples
-      //   // TODO: This doesn't sound correct! I need to use separate filters for each channel so the delay line isn't
-      //   // mixed
-      //   size_t available_samples = this->input_buffer_length_ / sizeof(int16_t);
-      //   for (int i = 0; i < available_samples / 2; ++i) {
-      //     // split interleaved samples into two separate streams
-      //     this->output_buffer_[i] = this->input_buffer_[2 * i];
-      //     this->output_buffer_[i + available_samples / 2] = this->input_buffer_[2 * i + 1];
-      //   }
-      //   std::memcpy(this->input_buffer_, this->output_buffer_, available_samples * sizeof(int16_t));
-      //   dsps_fird_s16_aes3(&this->fir_filter_, this->input_buffer_, this->output_buffer_, (available_samples / 3) /
-      //   2); dsps_fird_s16_aes3(&this->fir_filter_, this->input_buffer_ + available_samples / 2,
-      //                      this->output_buffer_ + (available_samples / 3) / 2, (available_samples / 3) / 2);
-      //   std::memcpy(this->input_buffer_, this->output_buffer_, available_samples * sizeof(int16_t));
-      //   for (int i = 0; i < available_samples / 2; ++i) {
-      //     this->output_buffer_[2 * i] = this->input_buffer_[i];
-      //     this->output_buffer_[2 * i + 1] = this->input_buffer_[available_samples / 2 + i];
-      //   }
-
-      //   size_t output_samples = available_samples / 3;
-
-      //   this->input_buffer_current_ += output_samples * 3;
-      //   this->input_buffer_length_ -= output_samples * 3 * sizeof(int16_t);
-
-      //   this->output_buffer_current_ = this->output_buffer_;
-      //   this->output_buffer_length_ += output_samples * sizeof(int16_t);
-      // }
     } else {
       if (this->input_buffer_length_ > 0) {
         // Samples are indiviudal int16 values. Frames include 1 sample for mono and 2 samples for stereo

From fe245e1209291adae1da6012a69bef1196be955f Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Tue, 3 Sep 2024 09:29:48 -0400
Subject: [PATCH 10/16] decoder blocks on ring buffer reads and writes

---
 esphome/components/nabu/audio_decoder.cpp | 44 +++++++++--------------
 1 file changed, 17 insertions(+), 27 deletions(-)

diff --git a/esphome/components/nabu/audio_decoder.cpp b/esphome/components/nabu/audio_decoder.cpp
index 8298e76e..cf9ccd4e 100644
--- a/esphome/components/nabu/audio_decoder.cpp
+++ b/esphome/components/nabu/audio_decoder.cpp
@@ -9,6 +9,8 @@
 namespace esphome {
 namespace nabu {
 
+static const size_t READ_WRITE_TIMEOUT_MS = 20;
+
 AudioDecoder::AudioDecoder(RingBuffer *input_ring_buffer, RingBuffer *output_ring_buffer, size_t internal_buffer_size) {
   this->input_ring_buffer_ = input_ring_buffer;
   this->output_ring_buffer_ = output_ring_buffer;
@@ -89,7 +91,7 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) {
     }
   }
 
-  if (this->potentially_failed_count_ > 5) {
+  if (this->potentially_failed_count_ > 10) {
     return AudioDecoderState::FAILED;
   }
 
@@ -97,30 +99,24 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) {
 
   while (state == FileDecoderState::MORE_TO_PROCESS) {
     if (this->output_buffer_length_ > 0) {
-      // Have decoded data, feed into output ring buffer
-      size_t bytes_free = this->output_ring_buffer_->free();
-      size_t bytes_to_write = std::min(this->output_buffer_length_, bytes_free);
+      // Have decoded data, write it to the output ring buffer
+      
+      size_t bytes_to_write = this->output_buffer_length_;
 
       if (bytes_to_write > 0) {
-        size_t bytes_written = this->output_ring_buffer_->write((void *) this->output_buffer_current_, bytes_to_write);
+        size_t bytes_written = this->output_ring_buffer_->write_without_replacement(
+            (void *) this->output_buffer_current_, bytes_to_write, pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
 
         this->output_buffer_length_ -= bytes_written;
         this->output_buffer_current_ += bytes_written;
       }
 
       if (this->output_buffer_length_ > 0) {
-        // Output ring buffer is full, so we can't do any more processing
+        // Output buffer still has decoded audio to write
         return AudioDecoderState::DECODING;
       }
     } else {
-      // Try to decode more data
-      size_t bytes_available = this->input_ring_buffer_->available();
-      size_t bytes_to_read = std::min(bytes_available, this->internal_buffer_size_ - this->input_buffer_length_);
-
-      if ((this->potentially_failed_count_ > 0) && (bytes_to_read == 0)) {
-        // We didn't have enough data last time, and we have no new data, so just return
-        return AudioDecoderState::DECODING;
-      }
+      // Decode more data
 
       // Shift unread data in input buffer to start
       if (this->input_buffer_length_ > 0) {
@@ -131,15 +127,17 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) {
       // read in new ring buffer data to fill the remaining input buffer
       size_t bytes_read = 0;
 
+      size_t bytes_to_read = this->internal_buffer_size_ - this->input_buffer_length_;
+
       if (bytes_to_read > 0) {
         uint8_t *new_audio_data = this->input_buffer_ + this->input_buffer_length_;
-        bytes_read = this->input_ring_buffer_->read((void *) new_audio_data, bytes_to_read);
+        bytes_read = this->input_ring_buffer_->read((void *) new_audio_data, bytes_to_read, pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
 
         this->input_buffer_length_ += bytes_read;
       }
 
-      if (this->input_buffer_length_ == 0) {
-        // No input data available, so we can't do any more processing
+      if ((this->input_buffer_length_ == 0) || ((this->potentially_failed_count_ > 0) && (bytes_read == 0))) {
+        // No input data available or no new data has been read, so we can't do any more processing
         state = FileDecoderState::IDLE;
       } else {
         switch (this->media_file_type_) {
@@ -167,14 +165,6 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) {
     } else {
       this->potentially_failed_count_ = 0;
     }
-    if (this->get_stream_info().has_value()) {
-      size_t monotone_samples =
-          (this->output_ring_buffer_->available() / sizeof(int16_t)) / this->get_stream_info().value().channels;
-      if (monotone_samples > this->get_stream_info().value().sample_rate/100) {
-        // We have more than 10 milliseconds of samples ready to output, we can break
-        break;
-      }
-    }
   }
   return AudioDecoderState::DECODING;
 }
@@ -253,7 +243,7 @@ FileDecoderState AudioDecoder::decode_flac_() {
     return FileDecoderState::END_OF_FILE;
   }
 
-  return FileDecoderState::MORE_TO_PROCESS;
+  return FileDecoderState::IDLE;
 }
 
 FileDecoderState AudioDecoder::decode_mp3_() {
@@ -296,7 +286,7 @@ FileDecoderState AudioDecoder::decode_mp3_() {
       this->stream_info_ = stream_info;
     }
   }
-  // }
+
   return FileDecoderState::MORE_TO_PROCESS;
 }
 

From fbd6bd08722a75c0f0ba4885354c97be2eeb73d7 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Tue, 3 Sep 2024 09:33:23 -0400
Subject: [PATCH 11/16] remove fixed delays for pipeline tasks

---
 esphome/components/nabu/audio_pipeline.cpp | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/esphome/components/nabu/audio_pipeline.cpp b/esphome/components/nabu/audio_pipeline.cpp
index b5df9f31..099556b2 100644
--- a/esphome/components/nabu/audio_pipeline.cpp
+++ b/esphome/components/nabu/audio_pipeline.cpp
@@ -18,7 +18,6 @@ static const size_t BUFFER_SIZE_BYTES = BUFFER_SIZE_SAMPLES * sizeof(int16_t);
 static const uint32_t READER_TASK_STACK_SIZE = 4096;
 static const uint32_t DECODER_TASK_STACK_SIZE = 4096;
 static const uint32_t RESAMPLER_TASK_STACK_SIZE = 4096;
-static const size_t DURATION_TASK_DELAY_MS = 10;
 
 static const size_t INFO_ERROR_QUEUE_COUNT = 5;
 
@@ -333,9 +332,6 @@ void AudioPipeline::read_task_(void *params) {
                              EventGroupBits::READER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
           break;
         }
-
-        // Block to give other tasks opportunity to run
-        delay(DURATION_TASK_DELAY_MS);
       }
     }
   }
@@ -406,9 +402,6 @@ void AudioPipeline::decode_task_(void *params) {
           // Inform the resampler that the stream information is available
           xEventGroupSetBits(this_pipeline->event_group_, EventGroupBits::DECODER_MESSAGE_LOADED_STREAM_INFO);
         }
-
-        // Block to give other tasks opportunity to run
-        delay(DURATION_TASK_DELAY_MS);
       }
     }
   }
@@ -477,9 +470,6 @@ void AudioPipeline::resample_task_(void *params) {
                              EventGroupBits::RESAMPLER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
           break;
         }
-
-        // Block to give other tasks opportunity to run
-        delay(DURATION_TASK_DELAY_MS);
       }
     }
   }

From 26b8a351d4c6a4259c15e82d99eadb3b312de659 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Tue, 3 Sep 2024 09:34:15 -0400
Subject: [PATCH 12/16] reduce decoder and resampler task stacks

---
 esphome/components/nabu/audio_pipeline.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/esphome/components/nabu/audio_pipeline.cpp b/esphome/components/nabu/audio_pipeline.cpp
index 099556b2..43b18e74 100644
--- a/esphome/components/nabu/audio_pipeline.cpp
+++ b/esphome/components/nabu/audio_pipeline.cpp
@@ -16,8 +16,8 @@ static const size_t BUFFER_SIZE_SAMPLES = 32768;
 static const size_t BUFFER_SIZE_BYTES = BUFFER_SIZE_SAMPLES * sizeof(int16_t);
 
 static const uint32_t READER_TASK_STACK_SIZE = 4096;
-static const uint32_t DECODER_TASK_STACK_SIZE = 4096;
-static const uint32_t RESAMPLER_TASK_STACK_SIZE = 4096;
+static const uint32_t DECODER_TASK_STACK_SIZE = 3072;
+static const uint32_t RESAMPLER_TASK_STACK_SIZE = 3072;
 
 static const size_t INFO_ERROR_QUEUE_COUNT = 5;
 

From cea812bb61ead3be2765b6705562daa353255667 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Tue, 3 Sep 2024 09:38:12 -0400
Subject: [PATCH 13/16] move pipeline task stacks into internal memory

---
 esphome/components/nabu/audio_pipeline.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/esphome/components/nabu/audio_pipeline.cpp b/esphome/components/nabu/audio_pipeline.cpp
index 43b18e74..00d4075b 100644
--- a/esphome/components/nabu/audio_pipeline.cpp
+++ b/esphome/components/nabu/audio_pipeline.cpp
@@ -104,16 +104,14 @@ esp_err_t AudioPipeline::allocate_buffers_() {
     return ESP_ERR_NO_MEM;
   }
 
-  ExternalRAMAllocator<StackType_t> stack_allocator(ExternalRAMAllocator<StackType_t>::ALLOW_FAILURE);
-
   if (this->read_task_stack_buffer_ == nullptr)
-    this->read_task_stack_buffer_ = stack_allocator.allocate(READER_TASK_STACK_SIZE);
+    this->read_task_stack_buffer_ = (StackType_t *) malloc(READER_TASK_STACK_SIZE);
 
   if (this->decode_task_stack_buffer_ == nullptr)
-    this->decode_task_stack_buffer_ = stack_allocator.allocate(DECODER_TASK_STACK_SIZE);
+    this->decode_task_stack_buffer_ = (StackType_t *) malloc(DECODER_TASK_STACK_SIZE);
 
   if (this->resample_task_stack_buffer_ == nullptr)
-    this->resample_task_stack_buffer_ = stack_allocator.allocate(RESAMPLER_TASK_STACK_SIZE);
+    this->resample_task_stack_buffer_ = (StackType_t *) malloc(RESAMPLER_TASK_STACK_SIZE);
 
   if ((this->read_task_stack_buffer_ == nullptr) || (this->decode_task_stack_buffer_ == nullptr) ||
       (this->resample_task_stack_buffer_ == nullptr)) {

From 3dde23a92e9392279c5bf7ae9a646464a694faec Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Tue, 3 Sep 2024 09:42:19 -0400
Subject: [PATCH 14/16] move mixer task stack to internal memory

---
 esphome/components/nabu/audio_mixer.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/esphome/components/nabu/audio_mixer.cpp b/esphome/components/nabu/audio_mixer.cpp
index 17fe2e9e..5a3dda54 100644
--- a/esphome/components/nabu/audio_mixer.cpp
+++ b/esphome/components/nabu/audio_mixer.cpp
@@ -296,10 +296,8 @@ esp_err_t AudioMixer::allocate_buffers_() {
     return ESP_ERR_NO_MEM;
   }
 
-  ExternalRAMAllocator<StackType_t> stack_allocator(ExternalRAMAllocator<StackType_t>::ALLOW_FAILURE);
-
   if (this->stack_buffer_ == nullptr)
-    this->stack_buffer_ = stack_allocator.allocate(TASK_STACK_SIZE);
+    this->stack_buffer_ = (StackType_t *) malloc(TASK_STACK_SIZE);
 
   if (this->stack_buffer_ == nullptr) {
     return ESP_ERR_NO_MEM;

From 2d3deeb364755af51e8bab2f37f22cd583f203f7 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Tue, 3 Sep 2024 09:44:52 -0400
Subject: [PATCH 15/16] log pipeline errors

---
 esphome/components/nabu/nabu_media_player.cpp | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp
index c23847b7..54e7307c 100644
--- a/esphome/components/nabu/nabu_media_player.cpp
+++ b/esphome/components/nabu/nabu_media_player.cpp
@@ -311,7 +311,8 @@ void NabuMediaPlayer::speaker_task(void *params) {
 
             size_t bytes_read = 0;
             size_t bytes_to_read = sizeof(int16_t) * SAMPLES_IN_ALL_DMA_BUFFERS;
-            bytes_read = this_speaker->audio_mixer_->read((uint8_t *) buffer, bytes_to_read, pdMS_TO_TICKS(TASK_DELAY_MS));
+            bytes_read =
+                this_speaker->audio_mixer_->read((uint8_t *) buffer, bytes_to_read, pdMS_TO_TICKS(TASK_DELAY_MS));
 
             if (bytes_read > 0) {
               size_t bytes_written;
@@ -581,6 +582,22 @@ void NabuMediaPlayer::loop() {
   if (this->media_pipeline_ != nullptr)
     this->media_pipeline_state_ = this->media_pipeline_->get_state();
 
+  if (this->media_pipeline_state_ == AudioPipelineState::ERROR_READING) {
+    ESP_LOGE(TAG, "Media pipeline encountered an error reading the file.");
+  } else if (this->media_pipeline_state_ == AudioPipelineState::ERROR_DECODING) {
+    ESP_LOGE(TAG, "Media pipeline encountered an error decoding the file.");
+  } else if (this->media_pipeline_state_ == AudioPipelineState::ERROR_RESAMPLING) {
+    ESP_LOGE(TAG, "Media pipeline encountered an error resampling the file.");
+  }
+
+  if (this->announcement_pipeline_state_ == AudioPipelineState::ERROR_READING) {
+    ESP_LOGE(TAG, "Announcement pipeline encountered an error reading the file.");
+  } else if (this->announcement_pipeline_state_ == AudioPipelineState::ERROR_DECODING) {
+    ESP_LOGE(TAG, "Announcement pipeline encountered an error decoding the file.");
+  } else if (this->announcement_pipeline_state_ == AudioPipelineState::ERROR_RESAMPLING) {
+    ESP_LOGE(TAG, "Announcement pipeline encountered an error resampling the file.");
+  }
+
   if (this->announcement_pipeline_state_ != AudioPipelineState::STOPPED) {
     this->state = media_player::MEDIA_PLAYER_STATE_ANNOUNCING;
   } else {

From 9336c8510988e563d59151d3f640ad5acce58475 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kahrendt@gmail.com>
Date: Tue, 3 Sep 2024 09:52:39 -0400
Subject: [PATCH 16/16] update TODO

---
 esphome/components/nabu/nabu_media_player.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/esphome/components/nabu/nabu_media_player.cpp b/esphome/components/nabu/nabu_media_player.cpp
index 54e7307c..fe006894 100644
--- a/esphome/components/nabu/nabu_media_player.cpp
+++ b/esphome/components/nabu/nabu_media_player.cpp
@@ -17,7 +17,6 @@ namespace nabu {
 
 // TODO:
 //  - Cleanup AudioResampler code (remove or refactor the esp_dsp fir filter)
-//  - Tune task memory requirements
 //  - Clean up process around playing back local media files
 //    - Create a registry of media files in Python
 //    - Add a yaml action to play a specific media file