forked from google/lyra
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlog_mel_spectrogram_extractor_impl_test.cc
105 lines (86 loc) · 3.59 KB
/
log_mel_spectrogram_extractor_impl_test.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "log_mel_spectrogram_extractor_impl.h"
#include <cstdint>
#include <memory>
#include <optional>
#include <vector>
#include "absl/types/span.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace chromemedia {
namespace codec {
namespace {
static constexpr int kTestSampleRateHz = 16000;
static constexpr int kNumMelBins = 10;
static constexpr int kHopLengthSamples = 5;
static constexpr int kWindowLengthSamples = 10;
static constexpr int kNumOutputMelBins = 3;
static constexpr int16_t kWavData[] = {7954, 10085, 8733, 10844, 29949,
-549, 20833, 30345, 18086, 11375,
-27309, 12323, -22891, -23360, 11958};
// These results were obtained by running
// audio/dsp/mfcc/mfcc_mel.LogMelSpectrogram on kWavData pre-pended with 5
// zeros and then dividing the results by 10. The parameters used were:
// audio_sample_rate=16000
// log_additive_offset=0.0
// log_floor=500
// window_length_secs=0.000625 (window_length_samples=10)
// hop_length_secs=0.0003125 (hop_length_samples=5)
// window_type="hann"
// fft_length=None
// upper_edge_hz=0.99*(16000/2)
// lower_edge_hz=0
static constexpr float kMelBins[][10] = {
{0.62146081, 0.62146081, 0.79771997, 1.00416802, 0.73013308, 0.96676503,
0.87643814, 0.89284485, 0.90586112, 0.8633126},
{0.62146081, 0.62146081, 0.89000145, 1.09644949, 0.76740002, 1.00403196,
0.8919037, 0.99746922, 1.06052462, 1.08220812},
{0.62146081, 0.62146081, 0.83526758, 1.04171563, 0.82093681, 1.05756876,
0.96348656, 1.01345318, 1.07686605, 1.12100911}};
class LogMelSpectrogramExtractorImplTest : public testing::Test {
protected:
void SetUp() override {
feature_extractor_ = LogMelSpectrogramExtractorImpl::Create(
kTestSampleRateHz, kNumMelBins, kHopLengthSamples,
kWindowLengthSamples);
ASSERT_NE(feature_extractor_, nullptr);
}
std::unique_ptr<LogMelSpectrogramExtractorImpl> feature_extractor_;
};
TEST_F(LogMelSpectrogramExtractorImplTest, ThreeFramesEqualExpected) {
for (int i = 0; i < kNumOutputMelBins; ++i) {
const absl::Span<const int16_t> audio_frame = absl::MakeConstSpan(
&kWavData[i * kHopLengthSamples], kHopLengthSamples);
auto features_or = feature_extractor_->Extract(audio_frame);
EXPECT_TRUE(features_or.has_value());
EXPECT_THAT(features_or.value(),
testing::Pointwise(testing::FloatEq(), kMelBins[i]));
}
}
TEST_F(LogMelSpectrogramExtractorImplTest, FrameLongerThanExpected) {
std::vector<int16_t> audio_frame(kHopLengthSamples + 1);
auto features_or =
feature_extractor_->Extract(absl::MakeConstSpan(audio_frame));
EXPECT_FALSE(features_or.has_value());
}
TEST_F(LogMelSpectrogramExtractorImplTest, FrameShorterThanExpected) {
std::vector<int16_t> audio_frame(kWavData, kWavData + kHopLengthSamples - 1);
auto features_or =
feature_extractor_->Extract(absl::MakeConstSpan(audio_frame));
EXPECT_FALSE(features_or.has_value());
}
} // namespace
} // namespace codec
} // namespace chromemedia