diff --git a/cmake/kaldi-native-fbank.cmake b/cmake/kaldi-native-fbank.cmake index 03dacef62..ae478fafb 100644 --- a/cmake/kaldi-native-fbank.cmake +++ b/cmake/kaldi-native-fbank.cmake @@ -1,7 +1,8 @@ function(download_kaldi_native_fbank) include(FetchContent) - set(kaldi_native_fbank_URL "https://github.com/csukuangfj/kaldi-native-fbank/archive/refs/tags/v1.19.1.tar.gz") + set(kaldi_native_fbank_URL "https://github.com/csukuangfj/kaldi-native-fbank/archive/refs/tags/v1.19.1.tar.gz") + set(kaldi_native_fbank_URL2 "https://hub.nuaa.cf/csukuangfj/kaldi-native-fbank/archive/refs/tags/v1.19.1.tar.gz") set(kaldi_native_fbank_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldi-native-fbank-1.19.1.tar.gz") set(kaldi_native_fbank_HASH "SHA256=0cae8cbb9ea42916b214e088912f9e8f2f648f54756b305f93f552382f31f904") @@ -12,11 +13,11 @@ function(download_kaldi_native_fbank) # If you don't have access to the Internet, # please pre-download kaldi-native-fbank set(possible_file_locations - $ENV{HOME}/Downloads/kaldi-native-fbank-1.18.7.tar.gz - ${CMAKE_SOURCE_DIR}/kaldi-native-fbank-1.18.7.tar.gz - ${CMAKE_BINARY_DIR}/kaldi-native-fbank-1.18.7.tar.gz - /tmp/kaldi-native-fbank-1.18.7.tar.gz - /star-fj/fangjun/download/github/kaldi-native-fbank-1.18.7.tar.gz + $ENV{HOME}/Downloads/kaldi-native-fbank-1.19.1.tar.gz + ${CMAKE_SOURCE_DIR}/kaldi-native-fbank-1.19.1.tar.gz + ${CMAKE_BINARY_DIR}/kaldi-native-fbank-1.19.1.tar.gz + /tmp/kaldi-native-fbank-1.19.1.tar.gz + /star-fj/fangjun/download/github/kaldi-native-fbank-1.19.1.tar.gz ) foreach(f IN LISTS possible_file_locations) diff --git a/dotnet-examples/online-decode-files/Program.cs b/dotnet-examples/online-decode-files/Program.cs index 2d94c4d16..5b8b45183 100644 --- a/dotnet-examples/online-decode-files/Program.cs +++ b/dotnet-examples/online-decode-files/Program.cs @@ -226,10 +226,16 @@ private static void Run(Options options) // display results for (int i = 0; i != files.Length; ++i) { - var text = recognizer.GetResult(streams[i]).Text; + var r = recognizer.GetResult(streams[i]); + var text = r.Text; + var tokens = r.Tokens; Console.WriteLine("--------------------"); Console.WriteLine(files[i]); - Console.WriteLine(text); + Console.WriteLine("text: {0}", text); + Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens)); + Console.Write("timestamps: ["); + r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", ")); + Console.WriteLine("\b\b]"); } Console.WriteLine("--------------------"); } diff --git a/dotnet-examples/online-decode-files/run-transducer.sh b/dotnet-examples/online-decode-files/run-transducer.sh index 06c478ba0..f8170d8d7 100755 --- a/dotnet-examples/online-decode-files/run-transducer.sh +++ b/dotnet-examples/online-decode-files/run-transducer.sh @@ -6,10 +6,8 @@ set -ex if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then - GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 - cd sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 - git lfs pull --include "*.onnx" - cd .. + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 fi dotnet run -c Release \ diff --git a/scripts/dotnet/online.cs b/scripts/dotnet/online.cs index dd79ba6b7..04f67446d 100644 --- a/scripts/dotnet/online.cs +++ b/scripts/dotnet/online.cs @@ -185,23 +185,71 @@ public OnlineRecognizerResult(IntPtr handle) while (*buffer != 0) { ++buffer; + length += 1; } - length = (int)(buffer - (byte*)impl.Text); } byte[] stringBuffer = new byte[length]; Marshal.Copy(impl.Text, stringBuffer, 0, length); _text = Encoding.UTF8.GetString(stringBuffer); + + _tokens = new String[impl.Count]; + + unsafe { + byte* buf = (byte*)impl.Tokens; + for (int i = 0; i < impl.Count; i++) { + length = 0; + byte* start = buf; + while(*buf != 0) { + ++buf; + length += 1; + } + ++buf; + + stringBuffer = new byte[length]; + fixed (byte* pTarget = stringBuffer) + { + for (int k = 0; k < length; k++) + { + pTarget[k] = start[k]; + } + } + + _tokens[i] = Encoding.UTF8.GetString(stringBuffer); + } + } + + _timestamps = new float[impl.Count]; + unsafe { + float* t = (float*)impl.Timestamps; + fixed(float* pTarget = _timestamps) + { + for (int i = 0; i < impl.Count; i++) + { + pTarget[i] = t[i]; + } + } + } } [StructLayout(LayoutKind.Sequential)] struct Impl { public IntPtr Text; + public IntPtr Tokens; + public IntPtr TokensArr; + public IntPtr Timestamps; + public int Count; } private String _text; public String Text => _text; + + private String[] _tokens; + public String[] Tokens => _tokens; + + private float[] _timestamps; + public float[] Timestamps => _timestamps; } public class OnlineStream : IDisposable diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 9ef5ad257..a064604ab 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -162,15 +162,17 @@ const SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult( memset(r, 0, sizeof(SherpaOnnxOnlineRecognizerResult)); // copy text - r->text = new char[text.size() + 1]; - std::copy(text.begin(), text.end(), const_cast(r->text)); - const_cast(r->text)[text.size()] = 0; + char *pText = new char[text.size() + 1]; + std::copy(text.begin(), text.end(), pText); + pText[text.size()] = 0; + r->text = pText; // copy json const auto &json = result.AsJsonString(); - r->json = new char[json.size() + 1]; - std::copy(json.begin(), json.end(), const_cast(r->json)); - const_cast(r->json)[json.size()] = 0; + char *pJson = new char[json.size() + 1]; + std::copy(json.begin(), json.end(), pJson); + pJson[json.size()] = 0; + r->json = pJson; // copy tokens auto count = result.tokens.size(); @@ -183,15 +185,12 @@ const SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult( r->count = count; // Each word ends with nullptr - r->tokens = new char[total_length]; - memset(reinterpret_cast(const_cast(r->tokens)), 0, - total_length); + char *tokens = new char[total_length]{}; char **tokens_temp = new char *[r->count]; int32_t pos = 0; for (int32_t i = 0; i < r->count; ++i) { - tokens_temp[i] = const_cast(r->tokens) + pos; - memcpy(reinterpret_cast(const_cast(r->tokens + pos)), - result.tokens[i].c_str(), result.tokens[i].size()); + tokens_temp[i] = tokens + pos; + memcpy(tokens + pos, result.tokens[i].c_str(), result.tokens[i].size()); // +1 to move past the null character pos += result.tokens[i].size() + 1; } @@ -205,6 +204,7 @@ const SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult( r->timestamps = nullptr; } + r->tokens = tokens; } else { r->count = 0; r->timestamps = nullptr; @@ -391,9 +391,10 @@ const SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult( auto r = new SherpaOnnxOfflineRecognizerResult; memset(r, 0, sizeof(SherpaOnnxOfflineRecognizerResult)); - r->text = new char[text.size() + 1]; - std::copy(text.begin(), text.end(), const_cast(r->text)); - const_cast(r->text)[text.size()] = 0; + char *pText = new char[text.size() + 1]; + std::copy(text.begin(), text.end(), pText); + pText[text.size()] = 0; + r->text = pText; if (!result.timestamps.empty()) { r->timestamps = new float[result.timestamps.size()]; @@ -530,15 +531,17 @@ const SherpaOnnxKeywordResult *GetKeywordResult( r->start_time = result.start_time; // copy keyword - r->keyword = new char[keyword.size() + 1]; - std::copy(keyword.begin(), keyword.end(), const_cast(r->keyword)); - const_cast(r->keyword)[keyword.size()] = 0; + char *pKeyword = new char[keyword.size() + 1]; + std::copy(keyword.begin(), keyword.end(), pKeyword); + pKeyword[keyword.size()] = 0; + r->keyword = pKeyword; // copy json const auto &json = result.AsJsonString(); - r->json = new char[json.size() + 1]; - std::copy(json.begin(), json.end(), const_cast(r->json)); - const_cast(r->json)[json.size()] = 0; + char *pJson = new char[json.size() + 1]; + std::copy(json.begin(), json.end(), pJson); + pJson[json.size()] = 0; + r->json = pJson; // copy tokens auto count = result.tokens.size(); @@ -551,18 +554,16 @@ const SherpaOnnxKeywordResult *GetKeywordResult( r->count = count; // Each word ends with nullptr - r->tokens = new char[total_length]; - memset(reinterpret_cast(const_cast(r->tokens)), 0, - total_length); + char *pTokens = new char[total_length]{}; char **tokens_temp = new char *[r->count]; int32_t pos = 0; for (int32_t i = 0; i < r->count; ++i) { - tokens_temp[i] = const_cast(r->tokens) + pos; - memcpy(reinterpret_cast(const_cast(r->tokens + pos)), - result.tokens[i].c_str(), result.tokens[i].size()); + tokens_temp[i] = pTokens + pos; + memcpy(pTokens + pos, result.tokens[i].c_str(), result.tokens[i].size()); // +1 to move past the null character pos += result.tokens[i].size() + 1; } + r->tokens = pTokens; r->tokens_arr = tokens_temp; if (!result.timestamps.empty()) { diff --git a/sherpa-onnx/csrc/online-zipformer2-transducer-model.h b/sherpa-onnx/csrc/online-zipformer2-transducer-model.h index acad45170..07c9e9252 100644 --- a/sherpa-onnx/csrc/online-zipformer2-transducer-model.h +++ b/sherpa-onnx/csrc/online-zipformer2-transducer-model.h @@ -105,7 +105,7 @@ class OnlineZipformer2TransducerModel : public OnlineTransducerModel { int32_t context_size_ = 0; int32_t vocab_size_ = 0; - int32_t feature_dim_ = 0; + int32_t feature_dim_ = 80; }; } // namespace sherpa_onnx