forked from google/lyra
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnoise_estimator.cc
207 lines (178 loc) · 7.56 KB
/
noise_estimator.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "noise_estimator.h"
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <memory>
#include <numeric>
#include <vector>
#include "absl/memory/memory.h"
#include "absl/types/optional.h"
#include "audio/dsp/signal_vector_util.h"
#include "glog/logging.h"
#include "log_mel_spectrogram_extractor_impl.h"
namespace chromemedia {
namespace codec {
namespace {
inline float Average(const std::vector<float>& vec) {
return std::accumulate(vec.begin(), vec.end(), 0.f) / vec.size();
}
// Places the element wise min between vec1 and vec2 in assignable.
// Assignable may point to either vec1 or vec2, or may be a different vector.
void ElementWiseMin(const std::vector<float>& vec1,
const std::vector<float>& vec2,
std::vector<float>* assignable) {
for (int i = 0; i < vec1.size(); ++i) {
assignable->at(i) = std::min(vec1.at(i), vec2.at(i));
}
}
// Updates the minimum value per frequency efficiently.
void UpdateMinAndTemp(uint64_t frame_num, int num_frames_per_update,
const std::vector<float>& smoothed_power,
std::vector<float>* min_power,
std::vector<float>* tmp_min_power) {
if (frame_num % num_frames_per_update == 0) {
ElementWiseMin(*tmp_min_power, smoothed_power, min_power);
*tmp_min_power = smoothed_power;
} else {
ElementWiseMin(*min_power, smoothed_power, min_power);
ElementWiseMin(*tmp_min_power, smoothed_power, tmp_min_power);
}
}
// The smoothing factor weighs how much the smoothed power calculation should
// track the current power in a frequency band at a given frame and takes
// values on the interval (0, max_smoothing].
// Values closer to 1 indicate smoothed_power should be heavily smoothed
// (when there is noise in this frequency bin).
// Values closer to 0 indicate smoothed_power should take on the current
// power level at this frequency bin (when there is speech in this
// frequency bin).
std::vector<float> SmoothingFactor(float max_smoothing,
const std::vector<float>& curr_power_db,
const std::vector<float>& smoothed_power,
const std::vector<float>& noise_estimate) {
constexpr float kPowDiff = 0.3f;
// The smoothing correction factor approaches 0 as the current power value
// moves away from the previously calculated smoothed power, and is 1 when
// the two are equal.
float smoothing_correction = std::exp(-audio_dsp::Square(
(Average(smoothed_power) - Average(curr_power_db)) / kPowDiff));
std::vector<float> smoothing_factor(noise_estimate.size());
for (int i = 0; i < smoothed_power.size(); ++i) {
smoothing_factor.at(i) =
max_smoothing * smoothing_correction *
std::exp(-audio_dsp::Square(
(smoothed_power.at(i) - noise_estimate.at(i)) / kPowDiff));
}
return smoothing_factor;
}
} // namespace
std::unique_ptr<NoiseEstimator> NoiseEstimator::Create(
int num_features, float num_seconds_per_frame) {
if (num_seconds_per_frame <= 0) {
LOG(ERROR) << "Argument num_seconds_per_frame has to be positive.";
return nullptr;
}
const float kMaxSmoothingHalflifeSecs = 0.7f;
const float kUpdateTimeSecs = 1.f;
const float kBoundHalfLifeSecs = 1.f;
return absl::WrapUnique(new NoiseEstimator(
num_features, std::round(kUpdateTimeSecs / num_seconds_per_frame),
std::pow(0.5f, num_seconds_per_frame / kMaxSmoothingHalflifeSecs),
std::pow(0.5f, num_seconds_per_frame / kBoundHalfLifeSecs)));
}
NoiseEstimator::NoiseEstimator(int num_features, int num_frames_per_update,
float max_smoothing, float bound_decay_factor)
: num_features_(num_features),
num_frames_per_update_(num_frames_per_update),
max_smoothing_(max_smoothing),
bound_decay_factor_(bound_decay_factor),
smoothed_power_(num_features),
squared_smoothed_power_(num_features),
tmp_min_smoothed_power_(num_features),
noise_estimate_(num_features,
LogMelSpectrogramExtractorImpl::GetSilenceValue()),
noise_bound_(num_features, 0.f) {}
// The variance of non-smoothed noise is estimated and used to calculate the
// upper bound of the noise bound.
void NoiseEstimator::ComputeBounds() {
const float kBoundFactor = 0.9f;
for (int i = 0; i < smoothed_power_.size(); ++i) {
float noise_variance =
std::max<float>(0.f, squared_smoothed_power_.at(i) -
audio_dsp::Square(smoothed_power_.at(i)));
noise_bound_.at(i) =
kBoundFactor *
std::sqrt(noise_variance * std::log(noise_bound_.size()));
}
}
bool NoiseEstimator::Update(const std::vector<float>& curr_power_db) {
if (curr_power_db.size() != num_features_) {
return false;
}
if (num_frames_received_ == 0) {
smoothed_power_ = curr_power_db;
for (int i = 0; i < curr_power_db.size(); ++i) {
squared_smoothed_power_.at(i) = audio_dsp::Square(curr_power_db.at(i));
}
tmp_min_smoothed_power_ = curr_power_db;
}
std::vector<float> smoothing_factor = SmoothingFactor(
max_smoothing_, curr_power_db, smoothed_power_, noise_estimate_);
// smoothed_power_ per frequency band = smoothing_factor * smoothed_power +
// (1 - smoothing_factor) * curr_power_db.
for (int i = 0; i < smoothed_power_.size(); ++i) {
smoothed_power_.at(i) =
smoothing_factor.at(i) * smoothed_power_.at(i) +
(1.f - smoothing_factor.at(i)) * curr_power_db.at(i);
squared_smoothed_power_.at(i) =
smoothing_factor.at(i) * squared_smoothed_power_.at(i) +
(1.f - smoothing_factor.at(i)) * audio_dsp::Square(curr_power_db.at(i));
}
UpdateMinAndTemp(num_frames_received_, num_frames_per_update_,
smoothed_power_, &noise_estimate_, &tmp_min_smoothed_power_);
ComputeBounds();
// Increment by 1 each time the curr_power_db is received.
num_frames_received_ += 1;
return true;
}
std::vector<float> NoiseEstimator::NoiseEstimate() const {
return noise_estimate_;
}
absl::optional<bool> NoiseEstimator::IsSimilarNoise(
const std::vector<float>& curr_power_db) {
if (curr_power_db.size() != num_features_) {
return absl::nullopt;
}
// Decide whether current frame is noise or not. A frame is considered to be
// noise if it falls below noise_estimate_ +- noise_bound_.
for (int i = 0; i < curr_power_db.size(); ++i) {
if (curr_power_db.at(i) > noise_estimate_.at(i) + noise_bound_.at(i) ||
curr_power_db.at(i) < noise_estimate_.at(i) - noise_bound_.at(i)) {
return false;
}
}
// Exponentially decay noise_bound_ if multiple frames in a row are noise.
// This avoids getting stuck in the case where noise_bound_ is very large, as
// the decay eventually forces Update() to recalculate the bound.
for (auto& element : noise_bound_) {
// x(t) = x0 * (1/2) ^ (t / t_half_life)
// = x(t - 1) * (1/2) ^ (1 / t_half_life)
element *= bound_decay_factor_;
}
return true;
}
} // namespace codec
} // namespace chromemedia