Skip to content

Commit

Permalink
Add Speaker ID demo for C# (#862)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored May 11, 2024
1 parent a88b3ba commit 677bc1d
Show file tree
Hide file tree
Showing 10 changed files with 511 additions and 2 deletions.
5 changes: 4 additions & 1 deletion .github/scripts/test-dot-net.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

cd dotnet-examples/

cd streaming-hlg-decoding/
cd speaker-identification
./run.sh

cd ../streaming-hlg-decoding/
./run.sh

cd ../spoken-language-identification
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test-dot-net.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ jobs:
cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/
cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/
cp -v scripts/dotnet/examples/streaming-hlg-decoding.csproj dotnet-examples/streaming-hlg-decoding
cp -v scripts/dotnet/examples/speaker-identification.csproj dotnet-examples/speaker-identification
ls -lh /tmp
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
project(sherpa-onnx)

set(SHERPA_ONNX_VERSION "1.9.23")
set(SHERPA_ONNX_VERSION "1.9.24")

# Disable warning about
#
Expand Down
6 changes: 6 additions & 0 deletions dotnet-examples/sherpa-onnx.sln
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identificat
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-hlg-decoding", "streaming-hlg-decoding\streaming-hlg-decoding.csproj", "{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speaker-identification", "speaker-identification\speaker-identification.csproj", "{2B1B140E-A92F-426B-B0DF-5D916B67304F}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -54,5 +56,9 @@ Global
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.Build.0 = Release|Any CPU
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal
155 changes: 155 additions & 0 deletions dotnet-examples/speaker-identification/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
// Copyright (c) 2024 Xiaomi Corporation
//
// This file shows how to do speaker identification with sherpa-onnx.
//
// 1. Download a model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
//
// 2. Download test data from
//
// git clone https://github.com/csukuangfj/sr-data
//
// 3. Now run it
//
// dotnet run

using SherpaOnnx;
using System.Collections.Generic;
using System;

class SpeakerIdentificationDemo
{
public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, String filename)
{
WaveReader reader = new WaveReader(filename);

OnlineStream stream = extractor.CreateStream();
stream.AcceptWaveform(reader.SampleRate, reader.Samples);
stream.InputFinished();

float[] embedding = extractor.Compute(stream);

return embedding;
}

static void Main(string[] args)
{
var config = new SpeakerEmbeddingExtractorConfig();
config.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
config.Debug = 1;
var extractor = new SpeakerEmbeddingExtractor(config);

var manager = new SpeakerEmbeddingManager(extractor.Dim);

string[] spk1Files =
new string[] {
"./sr-data/enroll/fangjun-sr-1.wav",
"./sr-data/enroll/fangjun-sr-2.wav",
"./sr-data/enroll/fangjun-sr-3.wav",
};
float[][] spk1Vec = new float[spk1Files.Length][];

for (int i = 0; i < spk1Files.Length; ++i)
{
spk1Vec[i] = ComputeEmbedding(extractor, spk1Files[i]);
}

string[] spk2Files =
new string[] {
"./sr-data/enroll/leijun-sr-1.wav", "./sr-data/enroll/leijun-sr-2.wav",
};

float[][] spk2Vec = new float[spk2Files.Length][];

for (int i = 0; i < spk2Files.Length; ++i)
{
spk2Vec[i] = ComputeEmbedding(extractor, spk2Files[i]);
}

if (!manager.Add("fangjun", spk1Vec))
{
Console.WriteLine("Failed to register fangjun");
return;
}

if (!manager.Add("leijun", spk2Vec))
{
Console.WriteLine("Failed to register leijun");
return;
}

if (manager.NumSpeakers != 2)
{
Console.WriteLine("There should be two speakers");
return;
}

if (!manager.Contains("fangjun"))
{
Console.WriteLine("It should contain the speaker fangjun");
return;
}

if (!manager.Contains("leijun"))
{
Console.WriteLine("It should contain the speaker leijun");
return;
}

Console.WriteLine("---All speakers---");

string[] allSpeakers = manager.GetAllSpeakers();
foreach (var s in allSpeakers)
{
Console.WriteLine(s);
}
Console.WriteLine("------------");

string[] testFiles =
new string[] {
"./sr-data/test/fangjun-test-sr-1.wav",
"./sr-data/test/leijun-test-sr-1.wav",
"./sr-data/test/liudehua-test-sr-1.wav"
};

float threshold = 0.6f;
foreach (var file in testFiles)
{
float[] embedding = ComputeEmbedding(extractor, file);

String name = manager.Search(embedding, threshold);
if (name == "")
{
name = "<Unknown>";
}
Console.WriteLine("{0}: {1}", file, name);
}

// test verify
if (!manager.Verify("fangjun", ComputeEmbedding(extractor, testFiles[0]), threshold))
{
Console.WriteLine("testFiles[0] should match fangjun!");
return;
}

if (!manager.Remove("fangjun"))
{
Console.WriteLine("Failed to remove fangjun");
return;
}

if (manager.Verify("fangjun", ComputeEmbedding(extractor, testFiles[0]), threshold))
{
Console.WriteLine("{0} should match no one!", testFiles[0]);
return;
}

if (manager.NumSpeakers != 1)
{
Console.WriteLine("There should only 1 speaker left.");
return;
}
}
}
1 change: 1 addition & 0 deletions dotnet-examples/speaker-identification/WaveReader.cs
13 changes: 13 additions & 0 deletions dotnet-examples/speaker-identification/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env bash

set -ex

if [ ! -e ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
fi

if [ ! -d ./sr-data ]; then
git clone https://github.com/csukuangfj/sr-data
fi

dotnet run
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>speaker_identification</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
</ItemGroup>

</Project>
19 changes: 19 additions & 0 deletions scripts/dotnet/examples/speaker-identification.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>speaker_identification</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<PropertyGroup>
<RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
</ItemGroup>

</Project>
Loading

0 comments on commit 677bc1d

Please sign in to comment.