Skip to content

Commit

Permalink
Add timestamps and tokens for .Net's online models.
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Mar 22, 2024
1 parent eaec4c8 commit 9c6c33c
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 41 deletions.
13 changes: 7 additions & 6 deletions cmake/kaldi-native-fbank.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
function(download_kaldi_native_fbank)
include(FetchContent)

set(kaldi_native_fbank_URL "https://github.com/csukuangfj/kaldi-native-fbank/archive/refs/tags/v1.19.1.tar.gz")
set(kaldi_native_fbank_URL "https://github.com/csukuangfj/kaldi-native-fbank/archive/refs/tags/v1.19.1.tar.gz")
set(kaldi_native_fbank_URL2 "https://hub.nuaa.cf/csukuangfj/kaldi-native-fbank/archive/refs/tags/v1.19.1.tar.gz")
set(kaldi_native_fbank_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldi-native-fbank-1.19.1.tar.gz")
set(kaldi_native_fbank_HASH "SHA256=0cae8cbb9ea42916b214e088912f9e8f2f648f54756b305f93f552382f31f904")

Expand All @@ -12,11 +13,11 @@ function(download_kaldi_native_fbank)
# If you don't have access to the Internet,
# please pre-download kaldi-native-fbank
set(possible_file_locations
$ENV{HOME}/Downloads/kaldi-native-fbank-1.18.7.tar.gz
${CMAKE_SOURCE_DIR}/kaldi-native-fbank-1.18.7.tar.gz
${CMAKE_BINARY_DIR}/kaldi-native-fbank-1.18.7.tar.gz
/tmp/kaldi-native-fbank-1.18.7.tar.gz
/star-fj/fangjun/download/github/kaldi-native-fbank-1.18.7.tar.gz
$ENV{HOME}/Downloads/kaldi-native-fbank-1.19.1.tar.gz
${CMAKE_SOURCE_DIR}/kaldi-native-fbank-1.19.1.tar.gz
${CMAKE_BINARY_DIR}/kaldi-native-fbank-1.19.1.tar.gz
/tmp/kaldi-native-fbank-1.19.1.tar.gz
/star-fj/fangjun/download/github/kaldi-native-fbank-1.19.1.tar.gz
)

foreach(f IN LISTS possible_file_locations)
Expand Down
10 changes: 8 additions & 2 deletions dotnet-examples/online-decode-files/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -226,10 +226,16 @@ private static void Run(Options options)
// display results
for (int i = 0; i != files.Length; ++i)
{
var text = recognizer.GetResult(streams[i]).Text;
var r = recognizer.GetResult(streams[i]);
var text = r.Text;
var tokens = r.Tokens;
Console.WriteLine("--------------------");
Console.WriteLine(files[i]);
Console.WriteLine(text);
Console.WriteLine("text: {0}", text);
Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens));
Console.Write("timestamps: [");
r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", "));
Console.WriteLine("\b\b]");
}
Console.WriteLine("--------------------");
}
Expand Down
6 changes: 2 additions & 4 deletions dotnet-examples/online-decode-files/run-transducer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@

set -ex
if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
cd sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
git lfs pull --include "*.onnx"
cd ..
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
fi

dotnet run -c Release \
Expand Down
50 changes: 49 additions & 1 deletion scripts/dotnet/online.cs
Original file line number Diff line number Diff line change
Expand Up @@ -185,23 +185,71 @@ public OnlineRecognizerResult(IntPtr handle)
while (*buffer != 0)
{
++buffer;
length += 1;
}
length = (int)(buffer - (byte*)impl.Text);
}

byte[] stringBuffer = new byte[length];
Marshal.Copy(impl.Text, stringBuffer, 0, length);
_text = Encoding.UTF8.GetString(stringBuffer);

_tokens = new String[impl.Count];

unsafe {
byte* buf = (byte*)impl.Tokens;
for (int i = 0; i < impl.Count; i++) {
length = 0;
byte* start = buf;
while(*buf != 0) {
++buf;
length += 1;
}
++buf;

stringBuffer = new byte[length];
fixed (byte* pTarget = stringBuffer)
{
for (int k = 0; k < length; k++)
{
pTarget[k] = start[k];
}
}

_tokens[i] = Encoding.UTF8.GetString(stringBuffer);
}
}

_timestamps = new float[impl.Count];
unsafe {
float* t = (float*)impl.Timestamps;
fixed(float* pTarget = _timestamps)
{
for (int i = 0; i < impl.Count; i++)
{
pTarget[i] = t[i];
}
}
}
}

[StructLayout(LayoutKind.Sequential)]
struct Impl
{
public IntPtr Text;
public IntPtr Tokens;
public IntPtr TokensArr;
public IntPtr Timestamps;
public int Count;
}

private String _text;
public String Text => _text;

private String[] _tokens;
public String[] Tokens => _tokens;

private float[] _timestamps;
public float[] Timestamps => _timestamps;
}

public class OnlineStream : IDisposable
Expand Down
55 changes: 28 additions & 27 deletions sherpa-onnx/c-api/c-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -162,15 +162,17 @@ const SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
memset(r, 0, sizeof(SherpaOnnxOnlineRecognizerResult));

// copy text
r->text = new char[text.size() + 1];
std::copy(text.begin(), text.end(), const_cast<char *>(r->text));
const_cast<char *>(r->text)[text.size()] = 0;
char *pText = new char[text.size() + 1];
std::copy(text.begin(), text.end(), pText);
pText[text.size()] = 0;
r->text = pText;

// copy json
const auto &json = result.AsJsonString();
r->json = new char[json.size() + 1];
std::copy(json.begin(), json.end(), const_cast<char *>(r->json));
const_cast<char *>(r->json)[json.size()] = 0;
char *pJson = new char[json.size() + 1];
std::copy(json.begin(), json.end(), pJson);
pJson[json.size()] = 0;
r->json = pJson;

// copy tokens
auto count = result.tokens.size();
Expand All @@ -183,15 +185,12 @@ const SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(

r->count = count;
// Each word ends with nullptr
r->tokens = new char[total_length];
memset(reinterpret_cast<void *>(const_cast<char *>(r->tokens)), 0,
total_length);
char *tokens = new char[total_length]{};
char **tokens_temp = new char *[r->count];
int32_t pos = 0;
for (int32_t i = 0; i < r->count; ++i) {
tokens_temp[i] = const_cast<char *>(r->tokens) + pos;
memcpy(reinterpret_cast<void *>(const_cast<char *>(r->tokens + pos)),
result.tokens[i].c_str(), result.tokens[i].size());
tokens_temp[i] = tokens + pos;
memcpy(tokens + pos, result.tokens[i].c_str(), result.tokens[i].size());
// +1 to move past the null character
pos += result.tokens[i].size() + 1;
}
Expand All @@ -205,6 +204,7 @@ const SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
r->timestamps = nullptr;
}

r->tokens = tokens;
} else {
r->count = 0;
r->timestamps = nullptr;
Expand Down Expand Up @@ -391,9 +391,10 @@ const SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult(
auto r = new SherpaOnnxOfflineRecognizerResult;
memset(r, 0, sizeof(SherpaOnnxOfflineRecognizerResult));

r->text = new char[text.size() + 1];
std::copy(text.begin(), text.end(), const_cast<char *>(r->text));
const_cast<char *>(r->text)[text.size()] = 0;
char *pText = new char[text.size() + 1];
std::copy(text.begin(), text.end(), pText);
pText[text.size()] = 0;
r->text = pText;

if (!result.timestamps.empty()) {
r->timestamps = new float[result.timestamps.size()];
Expand Down Expand Up @@ -530,15 +531,17 @@ const SherpaOnnxKeywordResult *GetKeywordResult(
r->start_time = result.start_time;

// copy keyword
r->keyword = new char[keyword.size() + 1];
std::copy(keyword.begin(), keyword.end(), const_cast<char *>(r->keyword));
const_cast<char *>(r->keyword)[keyword.size()] = 0;
char *pKeyword = new char[keyword.size() + 1];
std::copy(keyword.begin(), keyword.end(), pKeyword);
pKeyword[keyword.size()] = 0;
r->keyword = pKeyword;

// copy json
const auto &json = result.AsJsonString();
r->json = new char[json.size() + 1];
std::copy(json.begin(), json.end(), const_cast<char *>(r->json));
const_cast<char *>(r->json)[json.size()] = 0;
char *pJson = new char[json.size() + 1];
std::copy(json.begin(), json.end(), pJson);
pJson[json.size()] = 0;
r->json = pJson;

// copy tokens
auto count = result.tokens.size();
Expand All @@ -551,18 +554,16 @@ const SherpaOnnxKeywordResult *GetKeywordResult(

r->count = count;
// Each word ends with nullptr
r->tokens = new char[total_length];
memset(reinterpret_cast<void *>(const_cast<char *>(r->tokens)), 0,
total_length);
char *pTokens = new char[total_length]{};
char **tokens_temp = new char *[r->count];
int32_t pos = 0;
for (int32_t i = 0; i < r->count; ++i) {
tokens_temp[i] = const_cast<char *>(r->tokens) + pos;
memcpy(reinterpret_cast<void *>(const_cast<char *>(r->tokens + pos)),
result.tokens[i].c_str(), result.tokens[i].size());
tokens_temp[i] = pTokens + pos;
memcpy(pTokens + pos, result.tokens[i].c_str(), result.tokens[i].size());
// +1 to move past the null character
pos += result.tokens[i].size() + 1;
}
r->tokens = pTokens;
r->tokens_arr = tokens_temp;

if (!result.timestamps.empty()) {
Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/online-zipformer2-transducer-model.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class OnlineZipformer2TransducerModel : public OnlineTransducerModel {

int32_t context_size_ = 0;
int32_t vocab_size_ = 0;
int32_t feature_dim_ = 0;
int32_t feature_dim_ = 80;
};

} // namespace sherpa_onnx
Expand Down

0 comments on commit 9c6c33c

Please sign in to comment.