Skip to content

Commit

Permalink
dump esp-sr v2.0.1
Browse files Browse the repository at this point in the history
  • Loading branch information
sun-xiangyu committed Feb 18, 2025
1 parent f2cc8a9 commit b4f1200
Show file tree
Hide file tree
Showing 18 changed files with 24,285 additions and 127 deletions.
2 changes: 1 addition & 1 deletion idf_component.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: "2.0.0"
version: "2.0.1"
description: esp_sr provides basic algorithms for Speech Recognition applications
url: https://github.com/espressif/esp-sr
dependencies:
Expand Down
33 changes: 16 additions & 17 deletions include/esp32/esp_afe_aec.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
#ifndef _ESP_AFE_AEC_H_
#define _ESP_AFE_AEC_H_


#include "esp_afe_config.h"
#include "esp_aec.h"
#include "esp_afe_config.h"

#include <stdint.h>

Expand All @@ -13,19 +12,19 @@ extern "C" {
#endif

typedef struct {
aec_handle_t* handle;
aec_handle_t *handle;
aec_mode_t mode;
afe_pcm_config_t pcm_config;
int frame_size;
int16_t *data;
}afe_aec_handle_t;

int16_t *data;
} afe_aec_handle_t;

/**
* @brief Creates an instance to the AEC structure.
*
* @warning Currently only support 1 microphone channel and 1 playback channe.
* If input has multiple microphone channels and playback channels, just the first microphone channel and playback channel will be selected.
* @brief Creates an instance to the AEC structure.
*
* @warning Currently only support 1 microphone channel and 1 playback channe.
* If input has multiple microphone channels and playback channels, just the first microphone channel and playback
* channel will be selected.
*
* The input format, same as afe config:
* M to represent the microphone channel
Expand All @@ -37,25 +36,26 @@ typedef struct {
*
* @param input_format The input format
* @param filter_length The length of filter. The larger the filter, the higher the CPU loading.
* Recommended filter_length = 4 for esp32s3 and esp32p4. Recommended filter_length = 2 for esp32c5.
* Recommended filter_length = 4 for esp32s3 and esp32p4. Recommended filter_length = 2 for
* esp32c5.
* @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
* @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
*
* @return afe_config_t* The default config of afe
*/
afe_aec_handle_t *afe_aec_create(const char *input_format, int filter_length, afe_type_t type, afe_mode_t mode);


/**
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
*
*
* @param inst The instance of AEC.
* @param indata Input audio data, format is define by input_format. Note indata will be modified in function call.
* @param outdata Returns near-end signal with echo removed.
* @param indata Input audio data, format is define by input_format.
* @param outdata Near-end signal with echo removed. outdata must be 16-bit aligned.
* please use heap_caps_aligned_calloc(16, n, size, caps) to allocate an aligned chunk of memory
* @return The bytes of outdata.
*/
size_t afe_aec_process(afe_aec_handle_t *handel, int16_t *indata, int16_t *outdata);
size_t afe_aec_process(afe_aec_handle_t *handel, const int16_t *indata, int16_t *outdata);

/**
* @brief Get frame size of AEC (the samples of one frame)
Expand All @@ -64,7 +64,6 @@ size_t afe_aec_process(afe_aec_handle_t *handel, int16_t *indata, int16_t *outda
*/
int afe_aec_get_chunksize(afe_aec_handle_t *handle);


/**
* @brief Free the AEC instance
*
Expand Down
33 changes: 16 additions & 17 deletions include/esp32p4/esp_afe_aec.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
#ifndef _ESP_AFE_AEC_H_
#define _ESP_AFE_AEC_H_


#include "esp_afe_config.h"
#include "esp_aec.h"
#include "esp_afe_config.h"

#include <stdint.h>

Expand All @@ -13,19 +12,19 @@ extern "C" {
#endif

typedef struct {
aec_handle_t* handle;
aec_handle_t *handle;
aec_mode_t mode;
afe_pcm_config_t pcm_config;
int frame_size;
int16_t *data;
}afe_aec_handle_t;

int16_t *data;
} afe_aec_handle_t;

/**
* @brief Creates an instance to the AEC structure.
*
* @warning Currently only support 1 microphone channel and 1 playback channe.
* If input has multiple microphone channels and playback channels, just the first microphone channel and playback channel will be selected.
* @brief Creates an instance to the AEC structure.
*
* @warning Currently only support 1 microphone channel and 1 playback channe.
* If input has multiple microphone channels and playback channels, just the first microphone channel and playback
* channel will be selected.
*
* The input format, same as afe config:
* M to represent the microphone channel
Expand All @@ -37,25 +36,26 @@ typedef struct {
*
* @param input_format The input format
* @param filter_length The length of filter. The larger the filter, the higher the CPU loading.
* Recommended filter_length = 4 for esp32s3 and esp32p4. Recommended filter_length = 2 for esp32c5.
* Recommended filter_length = 4 for esp32s3 and esp32p4. Recommended filter_length = 2 for
* esp32c5.
* @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
* @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
*
* @return afe_config_t* The default config of afe
*/
afe_aec_handle_t *afe_aec_create(const char *input_format, int filter_length, afe_type_t type, afe_mode_t mode);


/**
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
*
*
* @param inst The instance of AEC.
* @param indata Input audio data, format is define by input_format. Note indata will be modified in function call.
* @param outdata Returns near-end signal with echo removed.
* @param indata Input audio data, format is define by input_format.
* @param outdata Near-end signal with echo removed. outdata must be 16-bit aligned.
* please use heap_caps_aligned_calloc(16, n, size, caps) to allocate an aligned chunk of memory
* @return The bytes of outdata.
*/
size_t afe_aec_process(afe_aec_handle_t *handel, int16_t *indata, int16_t *outdata);
size_t afe_aec_process(afe_aec_handle_t *handel, const int16_t *indata, int16_t *outdata);

/**
* @brief Get frame size of AEC (the samples of one frame)
Expand All @@ -64,7 +64,6 @@ size_t afe_aec_process(afe_aec_handle_t *handel, int16_t *indata, int16_t *outda
*/
int afe_aec_get_chunksize(afe_aec_handle_t *handle);


/**
* @brief Free the AEC instance
*
Expand Down
33 changes: 16 additions & 17 deletions include/esp32s3/esp_afe_aec.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
#ifndef _ESP_AFE_AEC_H_
#define _ESP_AFE_AEC_H_


#include "esp_afe_config.h"
#include "esp_aec.h"
#include "esp_afe_config.h"

#include <stdint.h>

Expand All @@ -13,19 +12,19 @@ extern "C" {
#endif

typedef struct {
aec_handle_t* handle;
aec_handle_t *handle;
aec_mode_t mode;
afe_pcm_config_t pcm_config;
int frame_size;
int16_t *data;
}afe_aec_handle_t;

int16_t *data;
} afe_aec_handle_t;

/**
* @brief Creates an instance to the AEC structure.
*
* @warning Currently only support 1 microphone channel and 1 playback channe.
* If input has multiple microphone channels and playback channels, just the first microphone channel and playback channel will be selected.
* @brief Creates an instance to the AEC structure.
*
* @warning Currently only support 1 microphone channel and 1 playback channe.
* If input has multiple microphone channels and playback channels, just the first microphone channel and playback
* channel will be selected.
*
* The input format, same as afe config:
* M to represent the microphone channel
Expand All @@ -37,25 +36,26 @@ typedef struct {
*
* @param input_format The input format
* @param filter_length The length of filter. The larger the filter, the higher the CPU loading.
* Recommended filter_length = 4 for esp32s3 and esp32p4. Recommended filter_length = 2 for esp32c5.
* Recommended filter_length = 4 for esp32s3 and esp32p4. Recommended filter_length = 2 for
* esp32c5.
* @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
* @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
*
* @return afe_config_t* The default config of afe
*/
afe_aec_handle_t *afe_aec_create(const char *input_format, int filter_length, afe_type_t type, afe_mode_t mode);


/**
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
*
*
* @param inst The instance of AEC.
* @param indata Input audio data, format is define by input_format. Note indata will be modified in function call.
* @param outdata Returns near-end signal with echo removed.
* @param indata Input audio data, format is define by input_format.
* @param outdata Near-end signal with echo removed. outdata must be 16-bit aligned.
* please use heap_caps_aligned_calloc(16, n, size, caps) to allocate an aligned chunk of memory
* @return The bytes of outdata.
*/
size_t afe_aec_process(afe_aec_handle_t *handel, int16_t *indata, int16_t *outdata);
size_t afe_aec_process(afe_aec_handle_t *handel, const int16_t *indata, int16_t *outdata);

/**
* @brief Get frame size of AEC (the samples of one frame)
Expand All @@ -64,7 +64,6 @@ size_t afe_aec_process(afe_aec_handle_t *handel, int16_t *indata, int16_t *outda
*/
int afe_aec_get_chunksize(afe_aec_handle_t *handle);


/**
* @brief Free the AEC instance
*
Expand Down
84 changes: 84 additions & 0 deletions include/esp32s3/esp_sr_webrtc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_WEBRTC_H_
#define _ESP_WEBRTC_H_

#ifdef __cplusplus
extern "C" {
#endif
#include "esp_agc.h"
#include "esp_log.h"
#include "esp_ns.h"
#include "sr_ringbuf.h"
#include <stdint.h>

#include "esp_heap_caps.h"

typedef struct {
void *ns_handle;
void *agc_handle;
int frame_size;
int sample_rate;
int16_t *buff;
int16_t *out_data;
sr_ringbuf_handle_t rb;
} webrtc_handle_t;

/**
* @brief Creates an instance of webrtc.
*
* @warning frame_length can supports be 10 ms, 20 ms, 30 ms, 32 ms.
*
* @param frame_length_ms The length of the audio processing
* @param ns_mode The mode of NS. -1 means NS is disabled. 0: Mild, 1: Medium, 2: Aggressive
* @param agc_mode The model of AGC
* @param agc_gain The gain of AGC. default is 9
* @param agc_target_level The target level of AGC. default is -3 dbfs
* @param sample_rate The sample rate of the audio.
*
* @return
* - NULL: Create failed
* - Others: The instance of webrtc
*/
webrtc_handle_t *webrtc_create(
int frame_length_ms, int ns_mode, agc_mode_t agc_mode, int agc_gain, int agc_target_level, int sample_rate);

/**
* @brief Feed samples of an audio stream to the webrtc and get the audio stream after Noise suppression.
*
* @param handle The instance of NS.
* @param in_data An array of 16-bit signed audio samples.
* @param out_size The sample size of output data
* @param enable_ns Enable noise suppression
* @param enable_agc Enable automatic gain control
*
* @return data after noise suppression
*/
int16_t *webrtc_process(webrtc_handle_t *handle, int16_t *indata, int *size, bool enable_ns, bool enable_agc);

/**
* @brief Free the webrtc instance
*
* @param handle The instance of webrtc.
*
* @return None
*
*/
void webrtc_destroy(webrtc_handle_t *handle);

#ifdef __cplusplus
}
#endif

#endif //_ESP_NS_H_
Binary file modified lib/esp32/libesp_audio_processor.a
Binary file not shown.
Binary file modified lib/esp32p4/libesp_audio_front_end.a
Binary file not shown.
Binary file modified lib/esp32p4/libesp_audio_processor.a
Binary file not shown.
Binary file modified lib/esp32p4/libmultinet.a
Binary file not shown.
Binary file modified lib/esp32p4/libvadnet.a
Binary file not shown.
Binary file modified lib/esp32p4/libwakenet.a
Binary file not shown.
Binary file modified lib/esp32s3/libesp_audio_front_end.a
Binary file not shown.
Binary file modified lib/esp32s3/libmultinet.a
Binary file not shown.
Binary file modified lib/esp32s3/libnsnet.a
Binary file not shown.
Binary file modified lib/esp32s3/libvadnet.a
Binary file not shown.
Binary file modified lib/esp32s3/libwakenet.a
Binary file not shown.
Loading

0 comments on commit b4f1200

Please sign in to comment.