From fd4989ecf77dd1211632f82cab2eb2ac91caf04a Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Thu, 19 Nov 2020 17:50:03 +0100 Subject: [PATCH 01/46] Added __unix__ preprocessor around unistd.h - unistd.h should be included for unix system as well - Otherwise, the compilation fails on WASM --- src/3rd_party/cnpy/cnpy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/3rd_party/cnpy/cnpy.h b/src/3rd_party/cnpy/cnpy.h index 89e607cab..592648186 100644 --- a/src/3rd_party/cnpy/cnpy.h +++ b/src/3rd_party/cnpy/cnpy.h @@ -18,7 +18,7 @@ #include #include -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__unix__) #include #endif From efe2c0917ecd1203245346f1be2fc4397cd1049f Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Fri, 29 Jan 2021 17:50:00 +0100 Subject: [PATCH 02/46] Enabled compiling only marian decoder - Disabled code (by compile definition DECODER_ONLY) that is not required for inference and doesn't compile on wasm - Introduced cmake option "COMPILE_DECODER_ONLY" to build only marian decoder --- src/3rd_party/cnpy/cnpy.h | 10 ++++++++++ src/3rd_party/pathie-cpp/src/path.cpp | 2 ++ src/common/file_stream.cpp | 12 +++++++++++- src/common/file_stream.h | 2 ++ src/data/sentencepiece_vocab.cpp | 3 ++- src/graph/node_initializers.cpp | 2 +- src/layers/generic.cpp | 8 ++++++++ src/layers/lsh.h | 2 ++ 8 files changed, 38 insertions(+), 3 deletions(-) diff --git a/src/3rd_party/cnpy/cnpy.h b/src/3rd_party/cnpy/cnpy.h index 592648186..5f2b8fa76 100644 --- a/src/3rd_party/cnpy/cnpy.h +++ b/src/3rd_party/cnpy/cnpy.h @@ -5,7 +5,9 @@ #ifndef LIBCNPY_H_ #define LIBCNPY_H_ +#if !defined(DECODER_ONLY) #include "3rd_party/zlib/zlib.h" +#endif #include #include @@ -133,6 +135,9 @@ namespace cnpy { template void npz_save(std::string zipname, std::string fname, const T* data, const unsigned int* shape, const unsigned int ndims, std::string mode = "w") { +#if defined(DECODER_ONLY) + throw std::runtime_error("npz_save: Platform not supported"); +#else //first, append a .npy to the fname fname += ".npy"; @@ -221,6 +226,7 @@ namespace cnpy { fwrite(&footer[0],sizeof(char),footer.size(),fp); //BUGBUG: no check for write error fclose(fp); +#endif } //one item pass to npz_save() below @@ -265,6 +271,9 @@ namespace cnpy { static inline void npz_save(std::string zipname, const std::vector& items) { +#if defined(DECODER_ONLY) + throw std::runtime_error("npz_save: Platform not supported"); +#else auto tmpname = zipname + "$$"; // TODO: add thread id or something unlink(tmpname.c_str()); // when saving to HDFS, we cannot overwrite an existing file FILE* fp = fopen(tmpname.c_str(),"wb"); @@ -366,6 +375,7 @@ namespace cnpy { unlink(tmpname.c_str()); throw std::runtime_error("npz_save: error saving to file: " + zipname); } +#endif } static inline diff --git a/src/3rd_party/pathie-cpp/src/path.cpp b/src/3rd_party/pathie-cpp/src/path.cpp index e732e09c5..b3637e6bf 100644 --- a/src/3rd_party/pathie-cpp/src/path.cpp +++ b/src/3rd_party/pathie-cpp/src/path.cpp @@ -935,6 +935,8 @@ Path Path::exe() std::string str = utf16_to_utf8(buf); return Path(str); +#elif defined(DECODER_ONLY) + throw(std::runtime_error("Path::exe() not supported in decoder-only version of marian")); #else #error Unsupported platform. #endif diff --git a/src/common/file_stream.cpp b/src/common/file_stream.cpp index 78cbb12fa..7902a52c0 100755 --- a/src/common/file_stream.cpp +++ b/src/common/file_stream.cpp @@ -23,7 +23,9 @@ InputFileStream::InputFileStream(const std::string &file) : std::istream(NULL) { // the special syntax "command |" starts command in a sh shell and reads out its result if (marian::utils::endsWith(file, "|")) { -#ifdef __unix__ +#if defined(DECODER_ONLY) + ABORT("Pipe syntax not supported in this build of Marian: {}", file); +#elif defined(__unix__) auto command = file.substr(0, file.size() - 1); // open as a pipe pipe_ = popen(command.c_str(), "r"); @@ -45,8 +47,12 @@ InputFileStream::InputFileStream(const std::string &file) // insert .gz decompression if(marian::utils::endsWith(file, ".gz")) { +#if defined(DECODER_ONLY) + ABORT(".gz file decompression not supported in decoder-only build of Marian: {}", file); +#else streamBuf2_ = std::move(streamBuf1_); streamBuf1_.reset(new zstr::istreambuf(streamBuf2_.get())); +#endif } // initialize the underlying istream @@ -94,8 +100,12 @@ OutputFileStream::OutputFileStream(const std::string &file) ABORT_IF(ret != streamBuf1_.get(), "Return value is not equal to streambuf pointer, that is weird"); if(file_.extension() == marian::filesystem::Path(".gz")) { +#if defined(DECODER_ONLY) + ABORT("OutputFileStream: .gz file decompression not supported in decoder-only build of Marian"); +#else streamBuf2_.reset(new zstr::ostreambuf(streamBuf1_.get())); this->init(streamBuf2_.get()); +#endif } else { this->init(streamBuf1_.get()); } diff --git a/src/common/file_stream.h b/src/common/file_stream.h index ccf33ed86..a506ce3c8 100644 --- a/src/common/file_stream.h +++ b/src/common/file_stream.h @@ -26,7 +26,9 @@ #pragma warning(push) // 4101: 'identifier' : unreferenced local variable. One parameter variable in zstr.hpp is not used. #pragma warning(disable : 4101) #endif +#ifndef DECODER_ONLY #include "3rd_party/zstr/zstr.hpp" +#endif #ifdef _MSC_VER #pragma warning(pop) #endif diff --git a/src/data/sentencepiece_vocab.cpp b/src/data/sentencepiece_vocab.cpp index 6d1fa5cd9..3296f1ddb 100644 --- a/src/data/sentencepiece_vocab.cpp +++ b/src/data/sentencepiece_vocab.cpp @@ -170,7 +170,7 @@ class SentencePieceVocab : public IVocab { << " --max_sentence_length=" << maxBytes << " --input_sentence_size=" << seenLines << " " << options_->get("sentencepiece-options"); // these are SentencePiece command line options - +#ifndef DECODER_ONLY // Train the SentencePiece model const auto status = sentencepiece::SentencePieceTrainer::Train(command.str()); ABORT_IF(!status.ok(), @@ -186,6 +186,7 @@ class SentencePieceVocab : public IVocab { ABORT_IF(rename((vocabPath + ".model").c_str(), vocabPath.c_str()) != 0, "Could not rename {} to {}", vocabPath + ".model", vocabPath); +#endif } void createFake() override { diff --git a/src/graph/node_initializers.cpp b/src/graph/node_initializers.cpp index 531cfaad0..34413af6a 100755 --- a/src/graph/node_initializers.cpp +++ b/src/graph/node_initializers.cpp @@ -254,7 +254,7 @@ template Ptr range(IndexType begin, IndexType end, I } // namespace marian -#if BLAS_FOUND +#if BLAS_FOUND && !DECODER_ONLY #include "faiss/VectorTransform.h" namespace marian { diff --git a/src/layers/generic.cpp b/src/layers/generic.cpp index 64b4f4cbb..aa0704b14 100755 --- a/src/layers/generic.cpp +++ b/src/layers/generic.cpp @@ -221,9 +221,13 @@ namespace marian { // this option is only set in the decoder if(!lsh_ && options_->hasAndNotEmpty("output-approx-knn")) { +#ifdef DECODER_ONLY + ABORT("LSH is not supported in decoder-only version of marian."); +#else auto k = opt>("output-approx-knn")[0]; auto nbits = opt>("output-approx-knn")[1]; lsh_ = New(k, nbits); +#endif } auto name = options_->get("prefix"); @@ -277,7 +281,11 @@ namespace marian { if(lsh_) { ABORT_IF( transA, "Transposed query not supported for LSH"); ABORT_IF(!transB, "Untransposed indexed matrix not supported for LSH"); +#ifdef DECODER_ONLY + ABORT("LSH is not supported in decoder-only version of marian."); +#else return lsh_->apply(x, W, b); // knows how to deal with undefined bias +#endif } else { return affineOrDot(x, W, b, transA, transB); } diff --git a/src/layers/lsh.h b/src/layers/lsh.h index bf498cc60..d98852bb5 100644 --- a/src/layers/lsh.h +++ b/src/layers/lsh.h @@ -18,7 +18,9 @@ class LSH { Expr apply(Expr query, Expr values, Expr bias); private: +#ifndef DECODER_ONLY Ptr index_; +#endif size_t indexHash_{0}; int k_{100}; From 80ec1a8042a70819cc8719ff3b484621826bc6ee Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Mon, 1 Feb 2021 15:02:18 +0100 Subject: [PATCH 03/46] cmake changes for marian decoder only builds --- CMakeLists.txt | 13 +++++++- src/3rd_party/CMakeLists.txt | 24 ++++++++------ src/CMakeLists.txt | 64 +++++++++++++++++++++++++----------- 3 files changed, 71 insertions(+), 30 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 92bf3c379..287d9e73f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,7 @@ option(COMPILE_CUDA "Compile GPU version" ON) option(COMPILE_EXAMPLES "Compile examples" OFF) option(COMPILE_SERVER "Compile marian-server" OFF) option(COMPILE_TESTS "Compile tests" OFF) +option(COMPILE_DECODER_ONLY "Compile marian-decoder only" OFF) option(USE_CCACHE "Use ccache compiler cache (https://ccache.dev)" OFF) option(USE_CUDNN "Use CUDNN library" OFF) option(USE_DOXYGEN "Build documentation with Doxygen" ON) @@ -27,6 +28,11 @@ option(USE_SENTENCEPIECE "Download and compile SentencePiece" ON) option(USE_STATIC_LIBS "Link statically against non-system libs" OFF) option(GENERATE_MARIAN_INSTALL_TARGETS "Generate Marian install targets (requires CMake 3.12+)" OFF) +if(COMPILE_DECODER_ONLY) + # Enable building decoder-only source code. + add_compile_definitions(DECODER_ONLY) +endif(COMPILE_DECODER_ONLY) + # fbgemm and sentencepiece are both defined with "non-local" installation targets (the source projects don't define them, # so we define them in src\3rd_party\CMakeLists.txt), but that isn't supported until CMake 3.12. Prior to CMake 3.12, # targets could only be install(...)ed in the same CMakeLists.txt they were defined. We currently target CMake 3.5.1 @@ -230,7 +236,12 @@ endif() if(USE_SENTENCEPIECE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_SENTENCEPIECE -D_USE_INTERNAL_STRING_VIEW") LIST(APPEND CUDA_NVCC_FLAGS -DUSE_SENTENCEPIECE; ) - set(EXT_LIBS ${EXT_LIBS} sentencepiece sentencepiece_train) + if (COMPILE_DECODER_ONLY) + # We don't need sentencepiece_train for decoder-only builds + set(EXT_LIBS ${EXT_LIBS} sentencepiece) + else() + set(EXT_LIBS ${EXT_LIBS} sentencepiece sentencepiece_train) + endif() endif() if(USE_ONNX) diff --git a/src/3rd_party/CMakeLists.txt b/src/3rd_party/CMakeLists.txt index f5314b12b..68c76a641 100644 --- a/src/3rd_party/CMakeLists.txt +++ b/src/3rd_party/CMakeLists.txt @@ -2,11 +2,13 @@ include_directories(.) add_subdirectory(./yaml-cpp) -add_subdirectory(./SQLiteCpp) +if(NOT COMPILE_DECODER_ONLY) + add_subdirectory(./SQLiteCpp) + add_subdirectory(./zlib) + add_subdirectory(./faiss) + include_directories(./faiss) +endif() add_subdirectory(./pathie-cpp) -add_subdirectory(./zlib) -add_subdirectory(./faiss) -include_directories(./faiss) set(INTGEMM_DONT_BUILD_TESTS ON CACHE BOOL "Disable intgemm tests") add_subdirectory(./intgemm) @@ -123,13 +125,15 @@ include_directories(./CLI) include_directories(./pathie-cpp/include) if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") - #set_target_properties(SQLiteCpp PROPERTIES COMPILE_FLAGS - set_property(TARGET SQLiteCpp APPEND_STRING PROPERTY COMPILE_FLAGS - " -Wno-parentheses-equality -Wno-unused-value") - if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0) + if(NOT COMPILE_DECODER_ONLY) + #set_target_properties(SQLiteCpp PROPERTIES COMPILE_FLAGS set_property(TARGET SQLiteCpp APPEND_STRING PROPERTY COMPILE_FLAGS - " -Wno-implicit-int-float-conversion") - endif() + " -Wno-parentheses-equality -Wno-unused-value") + if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0) + set_property(TARGET SQLiteCpp APPEND_STRING PROPERTY COMPILE_FLAGS + " -Wno-implicit-int-float-conversion") + endif() + endif(NOT COMPILE_DECODER_ONLY) set_property(TARGET libyaml-cpp APPEND_STRING PROPERTY COMPILE_FLAGS " -fPIC -Wno-unused-value") set_property(TARGET pathie-cpp APPEND_STRING PROPERTY COMPILE_FLAGS diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f1a634695..902ef33a2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -39,7 +39,7 @@ set(MARIAN_SOURCES data/factored_vocab.cpp data/corpus_base.cpp data/corpus.cpp - data/corpus_sqlite.cpp + ##data/corpus_sqlite.cpp data/corpus_nbest.cpp data/text_input.cpp data/shortlist.cpp @@ -60,7 +60,7 @@ set(MARIAN_SOURCES tensors/cpu/tensor_operators.cpp tensors/cpu/integer_common.cpp - tensors/cpu/fbgemm/packed_gemm.cpp + #tensors/cpu/fbgemm/packed_gemm.cpp graph/expression_graph.cpp graph/expression_operators.cpp @@ -75,7 +75,7 @@ set(MARIAN_SOURCES layers/generic.cpp layers/loss.cpp layers/weight.cpp - layers/lsh.cpp + #layers/lsh.cpp rnn/cells.cpp rnn/attention.cpp @@ -99,24 +99,46 @@ set(MARIAN_SOURCES translator/helpers.cpp translator/scorers.cpp - training/graph_group_async.cpp - training/graph_group_sync.cpp - training/graph_group.cpp - training/graph_group_singleton.cpp - training/validator.cpp - training/communicator.cpp + #training/graph_group_async.cpp + #training/graph_group_sync.cpp + #training/graph_group.cpp + #training/graph_group_singleton.cpp + #training/validator.cpp + #training/communicator.cpp # this is only compiled to catch build errors, but not linked - microsoft/quicksand.cpp - microsoft/cosmos.cpp + #microsoft/quicksand.cpp + #microsoft/cosmos.cpp $ - $ + #$ $ - $ - $ + #$ + #$ ) +if (NOT COMPILE_DECODER_ONLY) + list(APPEND MARIAN_SOURCES + data/corpus_sqlite.cpp + tensors/cpu/fbgemm/packed_gemm.cpp + layers/lsh.cpp + + training/graph_group_async.cpp + training/graph_group_sync.cpp + training/graph_group.cpp + training/graph_group_singleton.cpp + training/validator.cpp + training/communicator.cpp + + microsoft/quicksand.cpp + microsoft/cosmos.cpp + + $ + $ + $ + ) +endif() + add_library(marian STATIC ${MARIAN_SOURCES}) target_compile_options(marian PRIVATE ${ALL_WARNINGS}) @@ -197,14 +219,17 @@ endif(CUDA_FOUND) # as a sub-project of another build system that is only interested in the Marian output library. option(COMPILE_LIBRARY_ONLY "Build only the Marian library and exclude all executables." OFF) if (NOT COMPILE_LIBRARY_ONLY) - add_executable(marian_train command/marian_main.cpp) - set_target_properties(marian_train PROPERTIES OUTPUT_NAME marian) - target_compile_options(marian_train PRIVATE ${ALL_WARNINGS}) - add_executable(marian_decoder command/marian_decoder.cpp) set_target_properties(marian_decoder PROPERTIES OUTPUT_NAME marian-decoder) target_compile_options(marian_decoder PRIVATE ${ALL_WARNINGS}) + set(EXECUTABLES ${EXECUTABLES} marian_decoder) + + if (NOT COMPILE_DECODER_ONLY) + add_executable(marian_train command/marian_main.cpp) + set_target_properties(marian_train PROPERTIES OUTPUT_NAME marian) + target_compile_options(marian_train PRIVATE ${ALL_WARNINGS}) + add_executable(marian_scorer command/marian_scorer.cpp) set_target_properties(marian_scorer PROPERTIES OUTPUT_NAME marian-scorer) target_compile_options(marian_scorer PRIVATE ${ALL_WARNINGS}) @@ -217,7 +242,7 @@ if (NOT COMPILE_LIBRARY_ONLY) set_target_properties(marian_conv PROPERTIES OUTPUT_NAME marian-conv) target_compile_options(marian_conv PRIVATE ${ALL_WARNINGS}) - set(EXECUTABLES ${EXECUTABLES} marian_train marian_decoder marian_scorer marian_vocab marian_conv) + list(APPEND EXECUTABLES marian_train marian_scorer marian_vocab marian_conv) # marian.zip and marian.tgz # This combines marian, marian_decoder in a single ZIP or TAR file for @@ -261,6 +286,7 @@ if (NOT COMPILE_LIBRARY_ONLY) endif(MSVC) set(EXECUTABLES ${EXECUTABLES} marian_server) endif(COMPILE_SERVER) + endif(NOT COMPILE_DECODER_ONLY) foreach(exec ${EXECUTABLES}) target_link_libraries(${exec} marian) From ab29cbc4a00b6b201325b662dad0e375135371a0 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Mon, 1 Feb 2021 18:02:57 +0100 Subject: [PATCH 04/46] Added a wasm compatible blas/sgemm submodule "onnxjs" - onnxjs compiles on wasm --- .gitmodules | 3 +++ src/3rd_party/onnxjs | 1 + 2 files changed, 4 insertions(+) create mode 160000 src/3rd_party/onnxjs diff --git a/.gitmodules b/.gitmodules index 9a54cf615..e76d0b505 100644 --- a/.gitmodules +++ b/.gitmodules @@ -20,3 +20,6 @@ [submodule "src/3rd_party/simple-websocket-server"] path = src/3rd_party/simple-websocket-server url = https://github.com/marian-nmt/Simple-WebSocket-Server +[submodule "src/3rd_party/onnxjs"] + path = src/3rd_party/onnxjs + url = https://github.com/abhi-agg/onnxjs.git diff --git a/src/3rd_party/onnxjs b/src/3rd_party/onnxjs new file mode 160000 index 000000000..91b0f8d94 --- /dev/null +++ b/src/3rd_party/onnxjs @@ -0,0 +1 @@ +Subproject commit 91b0f8d94e2917092e15b4e8d9baad4bfa981ca2 From 6b95d313ce6cf7c54dbc2ad711151b0fcab52ea3 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Thu, 3 Dec 2020 09:55:50 +0100 Subject: [PATCH 05/46] Enabled using wasm-compatible sgemm routine of onnxjs - CMAKE modifications -- onnxjs is added as a target to be linked if a wasm compatible sgemm routine is to be used -- It's compilation is protected by cmake option USE_WASM_COMPATIBLE_BLAS (by default off) - source file modifications to call appropriate sgemm routine of onnxjs --- CMakeLists.txt | 51 +++++++++++++++++++++--------------- src/3rd_party/CMakeLists.txt | 9 +++++-- src/common/config_parser.cpp | 10 ++++--- src/tensors/cpu/prod.cpp | 16 ++++++----- src/tensors/cpu/prod_blas.h | 42 ++++++++++++++++------------- 5 files changed, 77 insertions(+), 51 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 287d9e73f..bf0b0fb07 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,6 +22,7 @@ option(USE_CUDNN "Use CUDNN library" OFF) option(USE_DOXYGEN "Build documentation with Doxygen" ON) option(USE_FBGEMM "Use FBGEMM" OFF) option(USE_MKL "Compile with MKL support" ON) +option(USE_WASM_COMPATIBLE_BLAS "Compile with a WASM compatible blas for decoder only builds" OFF) option(USE_MPI "Use MPI library" OFF) option(USE_NCCL "Use NCCL library" ON) option(USE_SENTENCEPIECE "Download and compile SentencePiece" ON) @@ -446,29 +447,37 @@ if(USE_MPI) endif(USE_MPI) ############################################################################### -# Find MKL +# Find BLAS library for CPU compilation if(COMPILE_CPU) - set(EXT_LIBS ${EXT_LIBS} intgemm) - if(USE_MKL) - find_package(MKL) - endif(USE_MKL) - if(MKL_FOUND) - include_directories(${MKL_INCLUDE_DIR}) - set(EXT_LIBS ${EXT_LIBS} ${MKL_LIBRARIES}) + if(USE_WASM_COMPATIBLE_BLAS) + ## Use a wasm compatible BLAS + set(EXT_LIBS ${EXT_LIBS} intgemm onnx-sgemm) set(BLAS_FOUND TRUE) - add_definitions(-DCOMPILE_CPU=1 -DBLAS_FOUND=1 -DMKL_FOUND=1) - else(MKL_FOUND) - set(BLAS_VENDOR "OpenBLAS") - find_package(BLAS) - if(BLAS_FOUND) - include(FindCBLAS) - if(CBLAS_FOUND) - include_directories(${BLAS_INCLUDE_DIR} ${CBLAS_INCLUDE_DIR}) - set(EXT_LIBS ${EXT_LIBS} ${BLAS_LIBRARIES} ${CBLAS_LIBRARIES}) - add_definitions(-DCOMPILE_CPU=1 -DBLAS_FOUND=1) - endif(CBLAS_FOUND) - endif(BLAS_FOUND) - endif(MKL_FOUND) + set(BLAS_VENDOR "ONNX-SGEMM") + add_compile_definitions(COMPILE_CPU BLAS_FOUND WASM_COMPATIBLE_BLAS) + else(USE_WASM_COMPATIBLE_BLAS) + set(EXT_LIBS ${EXT_LIBS} intgemm) + if(USE_MKL) + find_package(MKL) + endif(USE_MKL) + if(MKL_FOUND) + include_directories(${MKL_INCLUDE_DIR}) + set(EXT_LIBS ${EXT_LIBS} ${MKL_LIBRARIES}) + set(BLAS_FOUND TRUE) + add_definitions(-DCOMPILE_CPU=1 -DBLAS_FOUND=1 -DMKL_FOUND=1) + else(MKL_FOUND) + set(BLAS_VENDOR "OpenBLAS") + find_package(BLAS) + if(BLAS_FOUND) + include(FindCBLAS) + if(CBLAS_FOUND) + include_directories(${BLAS_INCLUDE_DIR} ${CBLAS_INCLUDE_DIR}) + set(EXT_LIBS ${EXT_LIBS} ${BLAS_LIBRARIES} ${CBLAS_LIBRARIES}) + add_definitions(-DCOMPILE_CPU=1 -DBLAS_FOUND=1) + endif(CBLAS_FOUND) + endif(BLAS_FOUND) + endif(MKL_FOUND) + endif(USE_WASM_COMPATIBLE_BLAS) endif(COMPILE_CPU) ############################################################################### diff --git a/src/3rd_party/CMakeLists.txt b/src/3rd_party/CMakeLists.txt index 68c76a641..133fa1d11 100644 --- a/src/3rd_party/CMakeLists.txt +++ b/src/3rd_party/CMakeLists.txt @@ -10,8 +10,13 @@ if(NOT COMPILE_DECODER_ONLY) endif() add_subdirectory(./pathie-cpp) -set(INTGEMM_DONT_BUILD_TESTS ON CACHE BOOL "Disable intgemm tests") -add_subdirectory(./intgemm) +if(COMPILE_CPU) + if(USE_WASM_COMPATIBLE_BLAS) + add_subdirectory(./onnxjs) + endif(USE_WASM_COMPATIBLE_BLAS) + set(INTGEMM_DONT_BUILD_TESTS ON CACHE BOOL "Disable intgemm tests") + add_subdirectory(./intgemm) +endif(COMPILE_CPU) if(USE_FBGEMM) # @TODO: find out if this is somehow harmful. This is supppressing CMake warnings for CMAKE_SUPPRESS_DEVELOPER_WARNINGS diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index 608aa8c84..59d47a741 100755 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -17,10 +17,12 @@ #if MKL_FOUND #include -#else -#if BLAS_FOUND -#include -#endif +#elif BLAS_FOUND + #if WASM_COMPATIBLE_BLAS + #include "3rd_party/onnxjs/src/wasm-ops/gemm.h" + #else + #include + #endif // WASM_COMPATIBLE_BLAS #endif namespace marian { diff --git a/src/tensors/cpu/prod.cpp b/src/tensors/cpu/prod.cpp index 781d75809..7d8b68137 100755 --- a/src/tensors/cpu/prod.cpp +++ b/src/tensors/cpu/prod.cpp @@ -9,10 +9,12 @@ #if MKL_FOUND #include -#else -#if BLAS_FOUND -#include -#endif +#elif BLAS_FOUND + #if WASM_COMPATIBLE_BLAS + #include "3rd_party/onnxjs/src/wasm-ops/gemm.h" + #else + #include + #endif // WASM_COMPATIBLE_BLAS #endif #include "integer_common.h" @@ -195,9 +197,10 @@ void ProdBatched(marian::Tensor C, bool transB, float beta, float scalar) { - if (C->getBackend()->isLegacyBatchedGemm()) { + //if (C->getBackend()->isLegacyBatchedGemm()) { ProdBatchedOld(C, allocator, A, B, transA, transB, beta, scalar); - } + //} + /* #if MKL_FOUND float alpha = scalar; @@ -274,6 +277,7 @@ void ProdBatched(marian::Tensor C, C; A; B; transA; transB; beta; scalar; ABORT("You need to compile with MKL in order to use the CPU version"); #endif + */ } void ProdWithBias(marian::Tensor C, diff --git a/src/tensors/cpu/prod_blas.h b/src/tensors/cpu/prod_blas.h index a591fdd26..1d6757927 100644 --- a/src/tensors/cpu/prod_blas.h +++ b/src/tensors/cpu/prod_blas.h @@ -1,9 +1,11 @@ #if MKL_FOUND #include -#else -#if BLAS_FOUND -#include -#endif +#elif BLAS_FOUND + #if WASM_COMPATIBLE_BLAS + #include "3rd_party/onnxjs/src/wasm-ops/gemm.h" + #else + #include + #endif // WASM_COMPATIBLE_BLAS #endif inline void sgemm(bool transA, @@ -20,20 +22,24 @@ inline void sgemm(bool transA, float* c, int ldc) { #if BLAS_FOUND - cblas_sgemm(CblasRowMajor, - transA ? CblasTrans : CblasNoTrans, - transB ? CblasTrans : CblasNoTrans, - rows_a, - rows_b, - width, - alpha, - a, - lda, - b, - ldb, - beta, - c, - ldc); + #if WASM_COMPATIBLE_BLAS + gemm_f32_imp(transA, transB, rows_a, rows_b, width, alpha, a, b, beta, c); + #else + cblas_sgemm(CblasRowMajor, + transA ? CblasTrans : CblasNoTrans, + transB ? CblasTrans : CblasNoTrans, + rows_a, + rows_b, + width, + alpha, + a, + lda, + b, + ldb, + beta, + c, + ldc); + #endif #else transA; transB; rows_a; rows_b; width; alpha; a; lda; b; ldb; beta; c; ldc; // make compiler happy ABORT("Marian must be compiled with a BLAS library"); From 773d56570298a9dd8eab2f55d794456079ebd347 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Tue, 2 Feb 2021 14:03:49 +0100 Subject: [PATCH 06/46] Updated intgemm submodule - It compiles successfully as wasm target using emscripten toolchain - It contains changes to use wasm wormhole --- src/3rd_party/intgemm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/3rd_party/intgemm b/src/3rd_party/intgemm index 84a8a1018..cc71e5c2a 160000 --- a/src/3rd_party/intgemm +++ b/src/3rd_party/intgemm @@ -1 +1 @@ -Subproject commit 84a8a1018d8f5dc13afd0f103a0bcf3e5dd1dec0 +Subproject commit cc71e5c2a69755009667330af1f60a4ed15b5b63 From b11cd34320b926d88965b57ffc0c1a294e59d1c9 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Thu, 4 Feb 2021 12:38:27 +0100 Subject: [PATCH 07/46] Make marian decoder compile on WASM - wasm compilation can be enabled by cmake option COMPILE_WASM - cmake changes - source changes --- CMakeLists.txt | 88 ++++++++++++++++++++++++++++++-------- src/CMakeLists.txt | 19 +++++++- src/common/logging.cpp | 4 ++ src/functional/operators.h | 4 +- 4 files changed, 92 insertions(+), 23 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bf0b0fb07..e57254a97 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,6 @@ endif () project(marian CXX C) set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(BUILD_ARCH native CACHE STRING "Compile for this CPU architecture.") # Custom CMake options option(COMPILE_CPU "Compile CPU version" ON) @@ -16,6 +15,7 @@ option(COMPILE_CUDA "Compile GPU version" ON) option(COMPILE_EXAMPLES "Compile examples" OFF) option(COMPILE_SERVER "Compile marian-server" OFF) option(COMPILE_TESTS "Compile tests" OFF) +option(COMPILE_WASM "Compile marian as WASM target" OFF) option(COMPILE_DECODER_ONLY "Compile marian-decoder only" OFF) option(USE_CCACHE "Use ccache compiler cache (https://ccache.dev)" OFF) option(USE_CUDNN "Use CUDNN library" OFF) @@ -29,6 +29,12 @@ option(USE_SENTENCEPIECE "Download and compile SentencePiece" ON) option(USE_STATIC_LIBS "Link statically against non-system libs" OFF) option(GENERATE_MARIAN_INSTALL_TARGETS "Generate Marian install targets (requires CMake 3.12+)" OFF) +if(NOT COMPILE_WASM) + # Setting BUILD_ARCH to native invokes CPU intrinsic detection logic below. + # Prevent invoking that logic for WASM builds. + set(BUILD_ARCH native CACHE STRING "Compile for this CPU architecture.") +endif() + if(COMPILE_DECODER_ONLY) # Enable building decoder-only source code. add_compile_definitions(DECODER_ONLY) @@ -172,6 +178,10 @@ else(MSVC) set(INTRINSICS "${INTRINSICS} -mavx512f") list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx512f) endif(AVX512_FOUND) + elseif(COMPILE_WASM) + # Can't set to -msse4.1 because onnxjs doesn't compile with this flag. It can be upgraded to + # -msse4.1 once marian can solely be compiled with intgemm ("onnxjs" will be removed in that case) + set(INTRINSICS "-mssse3 -msimd128") else() set(INTRINSICS "-msse4.1") endif() @@ -204,24 +214,64 @@ else(MSVC) set(CMAKE_RDYNAMIC_FLAG "-rdynamic") endif(CMAKE_COMPILER_IS_GNUCC) - set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -m64 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}") - set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}") - set(CMAKE_CXX_FLAGS_SLIM "-O3 -m64 -funroll-loops -DNDEBUG") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELEASE}") - set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg") - set(CMAKE_CXX_FLAGS_PROFGEN "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-generate -fprofile-correction") - set(CMAKE_CXX_FLAGS_PROFUSE "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-use -fprofile-correction") - - # these need to be set separately - set(CMAKE_C_FLAGS "-pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") - set(CMAKE_C_FLAGS_RELEASE "-O3 -m64 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}") - set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}") - set(CMAKE_C_FLAGS_SLIM "-O3 -m64 -funroll-loops -DNDEBUG") - set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELEASE}") - set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") - set(CMAKE_C_FLAGS_PROFGEN "${CMAKE_C_FLAGS_RELEASE} -fprofile-generate -fprofile-correction") - set(CMAKE_C_FLAGS_PROFUSE "${CMAKE_C_FLAGS_RELEASE} -fprofile-use -fprofile-correction") + if(COMPILE_WASM) + # Setting USE_SSE2 definition to enable SSE2 specific code in "3rd_party/sse_mathfun.h" for wasm builds + add_compile_definitions(USE_SSE2) + # Add compile definition for wasm builds + add_compile_definitions(WASM) + if(0) + set(WASM_EMSCRIPTEN_COMPILE_OPTIONS -O3 "SHELL:-s WASM=1" "SHELL:-s ASSERTIONS=1" "SHELL:-s DISABLE_EXCEPTION_CATCHING=0" "SHELL:-s LLD_REPORT_UNDEFINED") + # necessary to be able to use files downloaded in JS (https://emscripten.org/docs/api_reference/Filesystem-API.html#including-file-system-support) + list(APPEND WASM_EMSCRIPTEN_COMPILE_OPTIONS "SHELL:-s FORCE_FILESYSTEM=1") + # necessary to allow the total amount of memory to change depending on the demands of the application + list(APPEND WASM_EMSCRIPTEN_COMPILE_OPTIONS "SHELL:-s ALLOW_MEMORY_GROWTH=1") + # Flag to preserve function names in compiled code when linking (https://emscripten.org/docs/tools_reference/emcc.html#emcc-g) + list(APPEND WASM_EMSCRIPTEN_COMPILE_OPTIONS -g2) + + # Imported CMAKE_CXX_FLAGS/CMAKE_C_FLAGS from below for wasm builds; This could be improved. + list(APPEND WASM_EMSCRIPTEN_COMPILE_OPTIONS "$<$:-std=c++11>" -pthread ${CMAKE_GCC_FLAGS} -fPIC ${INTRINSICS}) + separate_arguments(DISABLE_GLOBALLY UNIX_COMMAND "${DISABLE_GLOBALLY}") + list(APPEND WASM_EMSCRIPTEN_COMPILE_OPTIONS ${DISABLE_GLOBALLY}) + # Disabling Pthreads + memory growth warning to be an error + # Pthreads + memory growth causes JS accessing the wasm memory to be slow + # https://github.com/WebAssembly/design/issues/1271 + list(APPEND WASM_EMSCRIPTEN_COMPILE_OPTIONS "-Wno-error=pthreads-mem-growth") + + # Release flags from below for wasm builds; + list(APPEND WASM_EMSCRIPTEN_COMPILE_OPTIONS -funroll-loops) + add_compile_options(${WASM_EMSCRIPTEN_COMPILE_OPTIONS}) + add_link_options(${WASM_EMSCRIPTEN_COMPILE_OPTIONS}) + else() + set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} ${INTRINSICS}") + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -s WASM=1 -s ASSERTIONS=1 -s DISABLE_EXCEPTION_CATCHING=0 -s LLD_REPORT_UNDEFINED -s FORCE_FILESYSTEM=1 -s ALLOW_MEMORY_GROWTH=1 -g2 -Wno-error=pthreads-mem-growth -funroll-loops") + list(APPEND ALL_WARNINGS -Wno-error=pthreads-mem-growth) + list(APPEND ALL_WARNINGS -Wno-error=deprecated-register) + endif() + + # use our customizations to the generated emscripted html and js resources + set(MARIAN_DECODER_EMSCRIPTEN_LINK_FLAGS "--pre-js ${CMAKE_SOURCE_DIR}/wasm/pre-module.js \ + --post-js ${CMAKE_SOURCE_DIR}/wasm/post-module.js \ + --shell-file ${CMAKE_SOURCE_DIR}/wasm/custom_shell.html") + else(COMPILE_WASM) + set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -m64 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}") + set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}") + set(CMAKE_CXX_FLAGS_SLIM "-O3 -m64 -funroll-loops -DNDEBUG") + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELEASE}") + set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg") + set(CMAKE_CXX_FLAGS_PROFGEN "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-generate -fprofile-correction") + set(CMAKE_CXX_FLAGS_PROFUSE "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-use -fprofile-correction") + + # these need to be set separately + set(CMAKE_C_FLAGS "-pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") + set(CMAKE_C_FLAGS_RELEASE "-O3 -m64 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}") + set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}") + set(CMAKE_C_FLAGS_SLIM "-O3 -m64 -funroll-loops -DNDEBUG") + set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELEASE}") + set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") + set(CMAKE_C_FLAGS_PROFGEN "${CMAKE_C_FLAGS_RELEASE} -fprofile-generate -fprofile-correction") + set(CMAKE_C_FLAGS_PROFUSE "${CMAKE_C_FLAGS_RELEASE} -fprofile-use -fprofile-correction") + endif(COMPILE_WASM) endif(MSVC) # with gcc 7.0 and above we need to mark fallthrough in switch case statements diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 902ef33a2..532682fb3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,6 +11,11 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}/3rd_party/intgemm) include_directories(${CMAKE_CURRENT_BINARY_DIR}/3rd_party) include_directories(${CMAKE_BINARY_DIR}/local/include) +if (NOT COMPILE_WASM) + set(EXCEPTION_WITH_CALLSTACK_SOURCE 3rd_party/ExceptionWithCallStack.cpp) + set(QUANTIZER_SOURCE optimizers/quantizer.cpp) +endif() + set(MARIAN_SOURCES common/aliases.cpp common/fastopt.cpp @@ -45,7 +50,7 @@ set(MARIAN_SOURCES data/shortlist.cpp 3rd_party/cnpy/cnpy.cpp - 3rd_party/ExceptionWithCallStack.cpp + ${EXCEPTION_WITH_CALLSTACK_SOURCE} 3rd_party/onnx/protobuf/onnx-ml.pb-wrapper.cpp @@ -80,7 +85,7 @@ set(MARIAN_SOURCES rnn/cells.cpp rnn/attention.cpp - optimizers/quantizer.cpp + ${QUANTIZER_SOURCE} optimizers/clippers.cpp optimizers/optimizers.cpp @@ -139,6 +144,10 @@ if (NOT COMPILE_DECODER_ONLY) ) endif() +if(COMPILE_WASM) + #set_source_files_properties(common/binary.cpp PROPERTIES COMPILE_FLAGS -msse4.1) +endif(COMPILE_WASM) + add_library(marian STATIC ${MARIAN_SOURCES}) target_compile_options(marian PRIVATE ${ALL_WARNINGS}) @@ -225,6 +234,12 @@ if (NOT COMPILE_LIBRARY_ONLY) set(EXECUTABLES ${EXECUTABLES} marian_decoder) + if(COMPILE_WASM) + set_target_properties(marian_decoder PROPERTIES + LINK_FLAGS "${MARIAN_DECODER_EMSCRIPTEN_LINK_FLAGS}" + SUFFIX ".html") + endif(COMPILE_WASM) + if (NOT COMPILE_DECODER_ONLY) add_executable(marian_train command/marian_main.cpp) set_target_properties(marian_train PROPERTIES OUTPUT_NAME marian) diff --git a/src/common/logging.cpp b/src/common/logging.cpp index 62d76feea..c60ebbcae 100644 --- a/src/common/logging.cpp +++ b/src/common/logging.cpp @@ -145,7 +145,11 @@ void switchtoMultinodeLogging(std::string nodeIdStr) { namespace marian { std::string noinline getCallStack(size_t skipLevels) { + #ifdef WASM + return "Callstacks not supported in WASM builds currently"; + #else return ::Microsoft::MSR::CNTK::DebugUtil::GetCallStack(skipLevels + 2, /*makeFunctionNamesStandOut=*/true); + #endif } void noinline logCallStack(size_t skipLevels) { diff --git a/src/functional/operators.h b/src/functional/operators.h index 6345bfb61..827c9a30a 100755 --- a/src/functional/operators.h +++ b/src/functional/operators.h @@ -265,11 +265,11 @@ struct Ops { // @TODO: get rid of loop4 with proper intrisics static inline float32x4 sgn(const float32x4& x) { return loop4(Ops::sgn, x); } - +#ifndef WASM static inline float32x4 round(const float32x4& x) { return _mm_round_ps(x, _MM_FROUND_TO_NEAREST_INT); } static inline float32x4 floor(const float32x4& x) { return _mm_floor_ps(x); } static inline float32x4 ceil(const float32x4& x) { return _mm_ceil_ps(x); } - +#endif static inline float32x4 add(const float32x4& x, const float32x4& y) { return _mm_add_ps(x, y); } static inline float32x4 sub(const float32x4& x, const float32x4& y) { return _mm_sub_ps(x, y); } static inline float32x4 mul(const float32x4& x, const float32x4& y) { return _mm_mul_ps(x, y); } From f00909e7e3abc99cd00bf837b0af8fea23a0ebb3 Mon Sep 17 00:00:00 2001 From: Graeme Date: Wed, 6 Jan 2021 11:30:22 +0000 Subject: [PATCH 08/46] Prefer if/else in place of try/catch in FastOpt makeScalar (#774) --- CHANGELOG.md | 1 + src/common/fastopt.h | 52 +++++++++++++++++++++----------------------- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31b58a618..a417baf39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -68,6 +68,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Changed compile flags -Ofast to -O3 and remove --ffinite-math - Moved old graph groups to depracated folder - Make cublas and cusparse handle inits lazy to save memory when unused +- Replaced exception-based implementation for type determination in FastOpt::makeScalar ## [1.9.0] - 2020-03-10 diff --git a/src/common/fastopt.h b/src/common/fastopt.h index 3f735660b..e8c6685a3 100644 --- a/src/common/fastopt.h +++ b/src/common/fastopt.h @@ -153,35 +153,33 @@ class FastOpt { // Build Scalar node via controlled failure to convert from a YAML::Node object. void makeScalar(const YAML::Node& v) { elements_ = 0; - try { - // Cast node to text first, that works for any scalar node and test that it does not contain single characters - // that according to YAML could be boolean values. Unfortunately, we do not have any type information at this point. - // This means we are disabling support for boolean values in YAML that are expressed with these characters. - auto asText = v.as(); - if(asText.size() == 1 && asText.find_first_of("nyNYtfTF") == 0) // @TODO: should we disallow other strings too? - throw YAML::BadConversion(YAML::Mark()); // get's picked up by next catch block - - value_ = v.as(); + + // Placeholders for decode + bool asBool; + int64_t asInt; + double asDouble; + + // Text boolean values should be treated as a string + auto asString = v.as(); + bool isTextBool = asString.size() == 1 && asString.find_first_of("nyNYtfTF") == 0; + + if(YAML::convert::decode(v, asBool) && !isTextBool) { + value_ = asBool; type_ = NodeType::Bool; - } catch(const YAML::BadConversion& /*e*/) { - try { - value_ = v.as(); - type_ = NodeType::Int64; - } catch(const YAML::BadConversion& /*e*/) { - try { - value_ = v.as(); - type_ = NodeType::Float64; - } catch(const YAML::BadConversion& /*e*/) { - try { - value_ = v.as(); - type_ = NodeType::String; - } catch (const YAML::BadConversion& /*e*/) { - ABORT("Cannot convert YAML node {}", v); - } - } - } } - + else if(YAML::convert::decode(v, asInt)) { + value_ = asInt; + type_ = NodeType::Int64; + } + else if(YAML::convert::decode(v, asDouble)) { + value_ = asDouble; + type_ = NodeType::Float64; + } + else { + value_ = asString; + type_ = NodeType::String; + } + ABORT_IF(ph_, "ph_ should be undefined"); ABORT_IF(!array_.empty(), "array_ should be empty"); } From 310cba7d2f92204a67c6929a032bd4a6f2830a98 Mon Sep 17 00:00:00 2001 From: Qianqian Zhu Date: Thu, 7 Jan 2021 13:12:36 +0000 Subject: [PATCH 09/46] Fix to resolve run time failures for FastOpt enabled WASM builds (#779) * copy changes from commit 4df92f2 * add comments for better understanding * restore the newline at the end of file and add this changes in changelog.md --- CHANGELOG.md | 1 + src/common/fastopt.h | 54 ++++++++++++++++++++++++++------------------ 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a417baf39..4e7c74053 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Added default "none" for option shuffle in BatchGenerator, so that it works in executables where shuffle is not an option. - Added a few missing header files in shortlist.h and beam_search.h. - Improved handling for receiving SIGTERM during training. By default, SIGTERM triggers 'save (now) and exit'. Prior to this fix, batch pre-fetching did not check for this sigal, potentially delaying exit considerably. It now pays attention to that. Also, the default behaviour of save-and-exit can now be disabled on the command line with --sigterm exit-immediately. +- Fix the runtime failures for FASTOPT on 32-bit builds (wasm just happens to be 32-bit) because it uses hashing with an inconsistent mix of uint64_t and size_t. ### Changed - Updated SentencePiece repository to version 8336bbd0c1cfba02a879afe625bf1ddaf7cd93c5 from https://github.com/google/sentencepiece. diff --git a/src/common/fastopt.h b/src/common/fastopt.h index e8c6685a3..93b1afe6b 100644 --- a/src/common/fastopt.h +++ b/src/common/fastopt.h @@ -38,9 +38,10 @@ inline constexpr uint64_t crc(const char* const str) noexcept { /*****************************************************************************/ // PerfectHash constructs a perfect hash for a set K of n numeric keys. The size of -// the hash is m > n (not much larger) and n << max(K) (much smaller). If I am not wrong m -// is the next power of 2 larger than n? We then build an array of size m with n fields defined. -// m - n fields stay undefined (a bit of waste). +// the hash is m > n (not much larger) and n << max(K) (much smaller). The output array size is +// determined by PHF::init in "src/3rd_party/phf/phf.h". m - n fields stay undefined (a bit of waste). + +// Wrapper class for the 3rd-party library in "src/3rd_party/phf" class PerfectHash { private: phf phf_; @@ -62,10 +63,12 @@ class PerfectHash { PHF::destroy(&phf_); } + // subscript operator [] overloading: if the key is uint64_t, return the hash code directly uint32_t operator[](const uint64_t& key) const { return PHF::hash(const_cast(&phf_), key); } + // If the key is a string, return the hash code for the string's CRC code uint32_t operator[](const char* const keyStr) const { return (*this)[crc::crc(keyStr)]; } @@ -109,6 +112,9 @@ class FastOpt { public: // Node types for FastOpt, seem to be enough to cover YAML:NodeType + // Multi-element types include "Sequence" and "Map" + // "Sequence" is implemented with STL vectors + // "Map" is implemented with a 3rd-party PHF library (see the PerfectHash class) enum struct NodeType { Null, Bool, Int64, Float64, String, Sequence, Map }; @@ -126,6 +132,7 @@ class FastOpt { size_t elements_{0}; // Number of elements if isMap or isSequence is true, 0 otherwise. // Used to find elements if isSequence() is true. + // Retrieve the entry using array indexing. inline const std::unique_ptr& arrayLookup(size_t keyId) const { if(keyId < array_.size()) return array_[keyId]; @@ -134,13 +141,15 @@ class FastOpt { } // Used to find elements if isMap() is true. - inline const std::unique_ptr& phLookup(size_t keyId) const { + // Retrieve the entry from the hash table. + inline const std::unique_ptr& phLookup(uint64_t keyId) const { if(ph_) return array_[(*ph_)[keyId]]; else return uniqueNullPtr; } + // Builders for different types of nodes. // Build Null node. void makeNull() { elements_ = 0; @@ -221,7 +230,7 @@ class FastOpt { type_ = NodeType::Map; } - // Build a Map node, uses std::string as key, which gets hashed to size_t and used in the function above. + // Build a Map node, uses std::string as key, which gets hashed to uint64_t and used in the function above. void makeMap(const std::map& m) { std::map mi; for(const auto& it : m) { @@ -263,13 +272,14 @@ class FastOpt { public: // Constructor to recursively create a FastOpt object from a YAML::Node following the yaml structure. - FastOpt(const YAML::Node& node) + FastOpt(const YAML::Node& node) { construct(node); } - FastOpt(const YAML::Node& node, uint64_t fingerprint) + FastOpt(const YAML::Node& node, uint64_t fingerprint) : fingerprint_{fingerprint} { construct(node); } + // Predicates for node types bool isSequence() const { return type_ == NodeType::Sequence; } @@ -279,20 +289,20 @@ class FastOpt { } bool isScalar() const { - return type_ == NodeType::Bool - || type_ == NodeType::Float64 - || type_ == NodeType::Int64 + return type_ == NodeType::Bool + || type_ == NodeType::Float64 + || type_ == NodeType::Int64 || type_ == NodeType::String; } bool isNull() const { return type_ == NodeType::Null; - } + } bool isInt() const { return type_ == NodeType::Int64; - } - + } + bool isBool() const { return type_ == NodeType::Bool; } @@ -318,11 +328,11 @@ class FastOpt { std::swap(array_, other.array_); std::swap(type_, other.type_); std::swap(elements_, other.elements_); - // leave fingerprint alone as it needed by parent node. + // leave fingerprint alone as it needed by parent node. } - // Is the hashed key in a map? - bool has(size_t keyId) const { + // Is the hashed key in a map? + bool has(uint64_t keyId) const { if(isMap() && elements_ > 0) { const auto& ptr = phLookup(keyId); return ptr ? ptr->fingerprint_ == keyId : false; @@ -346,27 +356,27 @@ class FastOpt { } // access sequence or map element - const FastOpt& operator[](size_t keyId) const { + const FastOpt& operator[](uint64_t keyId) const { if(isSequence()) { - const auto& ptr = arrayLookup(keyId); + const auto& ptr = arrayLookup((size_t)keyId); ABORT_IF(!ptr, "Unseen key {}" , keyId); return *ptr; } else if(isMap()) { const auto& ptr = phLookup(keyId); ABORT_IF(!ptr || ptr->fingerprint_ != keyId, "Unseen key {}", keyId); - return *ptr; + return *ptr; } else { ABORT("Not a sequence or map node"); } } + // operator [] overloading for non-uint64_t keys const FastOpt& operator[](int key) const { - return operator[]((size_t)key); + return operator[]((uint64_t)key); } const FastOpt& operator[](const char* const key) const { - // MacOS requires explicit cast to size_t before we can use it. - return operator[]((size_t)crc::crc(key)); + return operator[](crc::crc(key)); } const FastOpt& operator[](const std::string& key) const { From c3492d3a6ddbebdec46d2c4bd6adf498d5c075ba Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Thu, 4 Feb 2021 19:00:40 +0100 Subject: [PATCH 10/46] Enable compilation without using pthreads - COMPILE_WITH_PTHREADS cmake option to enable/disable compiling the multi-threaded version of marian --- CMakeLists.txt | 19 +++++++++++++--- src/3rd_party/threadpool.h | 43 +++++++++++++++++++++++++++++++++++++ src/translator/translator.h | 30 ++++++++++++++++++++++++++ 3 files changed, 89 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e57254a97..f44b499c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,7 @@ option(COMPILE_SERVER "Compile marian-server" OFF) option(COMPILE_TESTS "Compile tests" OFF) option(COMPILE_WASM "Compile marian as WASM target" OFF) option(COMPILE_DECODER_ONLY "Compile marian-decoder only" OFF) +option(COMPILE_WITH_PTHREADS "Compile with pthreads support" ON) option(USE_CCACHE "Use ccache compiler cache (https://ccache.dev)" OFF) option(USE_CUDNN "Use CUDNN library" OFF) option(USE_DOXYGEN "Build documentation with Doxygen" ON) @@ -40,6 +41,11 @@ if(COMPILE_DECODER_ONLY) add_compile_definitions(DECODER_ONLY) endif(COMPILE_DECODER_ONLY) +if(COMPILE_WITH_PTHREADS) + # Need to set compile definition as well + add_compile_definitions(USE_PTHREADS) +endif() + # fbgemm and sentencepiece are both defined with "non-local" installation targets (the source projects don't define them, # so we define them in src\3rd_party\CMakeLists.txt), but that isn't supported until CMake 3.12. Prior to CMake 3.12, # targets could only be install(...)ed in the same CMakeLists.txt they were defined. We currently target CMake 3.5.1 @@ -242,9 +248,16 @@ else(MSVC) add_compile_options(${WASM_EMSCRIPTEN_COMPILE_OPTIONS}) add_link_options(${WASM_EMSCRIPTEN_COMPILE_OPTIONS}) else() - set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} ${INTRINSICS}") - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -s WASM=1 -s ASSERTIONS=1 -s DISABLE_EXCEPTION_CATCHING=0 -s LLD_REPORT_UNDEFINED -s FORCE_FILESYSTEM=1 -s ALLOW_MEMORY_GROWTH=1 -g2 -Wno-error=pthreads-mem-growth -funroll-loops") - list(APPEND ALL_WARNINGS -Wno-error=pthreads-mem-growth) + if(COMPILE_WITH_PTHREADS) + set(PTHREAD_FLAG "-pthread") + set(DISABLE_PTHREAD_MEMGROWTH_WARNING -Wno-error=pthreads-mem-growth) + endif() + set(CMAKE_CXX_FLAGS "-std=c++11 ${PTHREAD_FLAG} ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} ${INTRINSICS}") + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -s WASM=1 -s ASSERTIONS=1 -s DISABLE_EXCEPTION_CATCHING=0 -s LLD_REPORT_UNDEFINED -s FORCE_FILESYSTEM=1 -s ALLOW_MEMORY_GROWTH=1 -g2 ${DISABLE_PTHREAD_MEMGROWTH_WARNING} -funroll-loops") + # Disabling Pthreads + memory growth warning to be an error + # Pthreads + memory growth causes JS accessing the wasm memory to be slow + # https://github.com/WebAssembly/design/issues/1271 + list(APPEND ALL_WARNINGS ${DISABLE_PTHREAD_MEMGROWTH_WARNING}) list(APPEND ALL_WARNINGS -Wno-error=deprecated-register) endif() diff --git a/src/3rd_party/threadpool.h b/src/3rd_party/threadpool.h index d77ce43cb..080753e34 100644 --- a/src/3rd_party/threadpool.h +++ b/src/3rd_party/threadpool.h @@ -53,29 +53,46 @@ class ThreadPool { ~ThreadPool(); size_t getNumTasks() const { + #if USE_PTHREADS return tasks.size(); + #else + ABORT("ThreadPool::getNumTasks() not supported"); + #endif } void wait_for_one(std::unique_lock& lock) { + #if USE_PTHREADS waiting_threads++; sync_condition.notify_all(); sync_condition.wait(lock, [this]{ return continue_work; }); waiting_threads--; + #else + ABORT("ThreadPool::wait_for_one not supported"); + #endif } void wait_for_others(std::unique_lock& lock) { + #if USE_PTHREADS continue_work = false; sync_condition.wait(lock, [this]{ return waiting_threads == workers.size() - 1; }); + #else + ABORT("ThreadPool::wait_for_others not supported"); + #endif } void notify_others() { + #if USE_PTHREADS continue_work = true; sync_condition.notify_all(); + #else + ABORT("ThreadPool::notify_others not supported"); + #endif } void join_all() { + #if USE_PTHREADS { std::unique_lock lock(queue_mutex); stop = true; @@ -85,6 +102,9 @@ class ThreadPool { for (std::thread &worker: workers) { worker.join(); } + #else + ABORT("ThreadPool::join_all not supported"); + #endif } private: @@ -107,12 +127,18 @@ class ThreadPool { // the constructor just launches some amount of workers inline ThreadPool::ThreadPool(size_t threads, size_t in_bound) : bound(in_bound), stop(false) { + #if USE_PTHREADS ABORT_IF(getThrowExceptionOnAbort(), "Throwing of MarianRuntimeException not presently supported in threads"); reserve(threads); + #else + bound; stop; continue_work; waiting_threads; // Make compiler warnings silent on wasm + ABORT("ThreadPool::ThreadPool not supported"); + #endif } // allow callers to increase the number of threads after the fact inline void ThreadPool::reserve(size_t threads) { +#if USE_PTHREADS while (workers.size() < threads) workers.emplace_back( [this] { @@ -133,6 +159,9 @@ inline void ThreadPool::reserve(size_t threads) { } } ); +#else + ABORT("ThreadPool::reserve not supported"); +#endif } // add new work item to the pool @@ -140,6 +169,7 @@ template inline auto ThreadPool::enqueue(F&& f, Args&&... args) -> std::future::type> { +#if USE_PTHREADS using return_type = typename std::result_of::type; auto inner_task = std::bind(std::forward(f), std::forward(args)...); @@ -172,12 +202,17 @@ inline auto ThreadPool::enqueue(F&& f, Args&&... args) } condition.notify_one(); return res; +#else + ABORT("ThreadPool::enqueue not supported"); +#endif } // the destructor joins all threads inline ThreadPool::~ThreadPool() { +#if USE_PTHREADS if(!stop) join_all(); +#endif } // helper class to wait for procedural tasks (no return value) submitted into the ThreadPool @@ -190,11 +225,19 @@ class TaskBarrier { std::vector> futures; public: void push_back(std::future&& future) { + #if USE_PTHREADS futures.emplace_back(std::move(future)); + #else + ABORT("TaskBarrier::push_back not supported"); + #endif } ~TaskBarrier() { // destructor waits until all results are available + #if USE_PTHREADS for (auto&& future : futures) future.wait(); + #else + LOG(info, "TaskBarrier destructor not supported"); + #endif } }; diff --git a/src/translator/translator.h b/src/translator/translator.h index 8ba5a2fb2..1cc27dc7a 100755 --- a/src/translator/translator.h +++ b/src/translator/translator.h @@ -7,7 +7,9 @@ #include "data/shortlist.h" #include "data/text_input.h" +#if USE_PTHREADS #include "3rd_party/threadpool.h" +#endif #include "translator/history.h" #include "translator/output_collector.h" @@ -74,7 +76,9 @@ class Translate : public ModelTask { auto devices = Config::getDevices(options_); numDevices_ = devices.size(); +#if USE_PTHREADS ThreadPool threadPool(numDevices_, numDevices_); +#endif scorers_.resize(numDevices_); graphs_.resize(numDevices_); @@ -114,7 +118,11 @@ class Translate : public ModelTask { graph->forward(); }; +#if USE_PTHREADS threadPool.enqueue(task, device, id++); +#else + task(device, id++); +#endif } if(options_->get("output-sampling", false)) { @@ -130,9 +138,16 @@ class Translate : public ModelTask { } void run() override { + #if USE_PTHREADS data::BatchGenerator bg(corpus_, options_); + #else + // Set to false to run non-async mode + data::BatchGenerator bg(corpus_, options_, nullptr, false); + #endif +#if USE_PTHREADS ThreadPool threadPool(numDevices_, numDevices_); +#endif size_t batchId = 0; auto collector = New(options_->get("output")); @@ -178,7 +193,11 @@ class Translate : public ModelTask { } }; +#if USE_PTHREADS threadPool.enqueue(task, batchId++); +#else + task(batchId++); +#endif } } @@ -262,7 +281,12 @@ class TranslateService : public ModelServiceTask { ? convertTsvToLists(input, options_->get("tsv-fields", 1)) : std::vector({input}); auto corpus_ = New(inputs, srcVocabs_, options_); + #if USE_PTHREADS data::BatchGenerator batchGenerator(corpus_, options_); + #else + // Set to false to check if wasm builds run further + data::BatchGenerator batchGenerator(corpus_, options_, nullptr, false); + #endif auto collector = New(options_->get("quiet-translation", false)); auto printer = New(options_, trgVocab_); @@ -271,7 +295,9 @@ class TranslateService : public ModelServiceTask { batchGenerator.prepare(); { +#if USE_PTHREADS ThreadPool threadPool_(numDevices_, numDevices_); +#endif for(auto batch : batchGenerator) { auto task = [=](size_t id) { @@ -294,7 +320,11 @@ class TranslateService : public ModelServiceTask { } }; +#if USE_PTHREADS threadPool_.enqueue(task, batchId); +#else + task(batchId); +#endif batchId++; } } From 179b8e6a5b82164dd339f07d9970bb080a3b78a1 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Fri, 5 Feb 2021 11:14:22 +0100 Subject: [PATCH 11/46] Removed usage of "register" storage type from code - It is deprecated begining C++17 --- src/3rd_party/half_float/umHalf.inl | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/3rd_party/half_float/umHalf.inl b/src/3rd_party/half_float/umHalf.inl index cdccd8473..d24f14e01 100644 --- a/src/3rd_party/half_float/umHalf.inl +++ b/src/3rd_party/half_float/umHalf.inl @@ -45,11 +45,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #pragma intrinsic(_BitScanReverse) #endif -#if __cplusplus >= 201703L -#define __SWITCHED_REGISTER -#else -#define __SWITCHED_REGISTER register -#endif // ------------------------------------------------------------------------------------------------ inline HalfFloat::HalfFloat(float other) @@ -350,7 +345,7 @@ inline HalfFloat operator+ (HalfFloat one, HalfFloat two) // compute the difference between the two exponents. shifts with negative // numbers are undefined, thus we need two code paths - __SWITCHED_REGISTER int expDiff = one.IEEE.Exp - two.IEEE.Exp; + int expDiff = one.IEEE.Exp - two.IEEE.Exp; if (0 == expDiff) { From caf07b31840117b29c361113afd4921d8419b1be Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Fri, 5 Feb 2021 11:15:33 +0100 Subject: [PATCH 12/46] Updated onnxjs submodule - It avoids building unwanted targets --- src/3rd_party/onnxjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/3rd_party/onnxjs b/src/3rd_party/onnxjs index 91b0f8d94..dfefde914 160000 --- a/src/3rd_party/onnxjs +++ b/src/3rd_party/onnxjs @@ -1 +1 @@ -Subproject commit 91b0f8d94e2917092e15b4e8d9baad4bfa981ca2 +Subproject commit dfefde914fcc79b4c0f9eafcfc97e4b606af700e From 5dafba978c05926bd992b4c3d6515a07d49eed5b Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Fri, 5 Feb 2021 11:17:54 +0100 Subject: [PATCH 13/46] Updated sentencepiece submodule - Compile only static library required for inference --- src/3rd_party/sentencepiece | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/3rd_party/sentencepiece b/src/3rd_party/sentencepiece index 8336bbd0c..f07c3e673 160000 --- a/src/3rd_party/sentencepiece +++ b/src/3rd_party/sentencepiece @@ -1 +1 @@ -Subproject commit 8336bbd0c1cfba02a879afe625bf1ddaf7cd93c5 +Subproject commit f07c3e6736ad2e8b75433fc4dd01980a2576f570 From a7ee4a1bd322de0e9c5c6f7029a54441a1267758 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Fri, 5 Feb 2021 11:59:39 +0100 Subject: [PATCH 14/46] cmake changes (leftover) for marian decoder only builds --- src/3rd_party/CMakeLists.txt | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/3rd_party/CMakeLists.txt b/src/3rd_party/CMakeLists.txt index 133fa1d11..2567f7531 100644 --- a/src/3rd_party/CMakeLists.txt +++ b/src/3rd_party/CMakeLists.txt @@ -85,18 +85,31 @@ if(USE_SENTENCEPIECE) add_subdirectory(./sentencepiece) include_directories(./sentencepiece) - + if(NOT COMPILE_DECODER_ONLY) set_target_properties(spm_encode spm_decode spm_train spm_normalize spm_export_vocab PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") + endif() if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND (CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 8.0 OR CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)) + if(COMPILE_DECODER_ONLY) + set_property(TARGET sentencepiece-static APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-class-memaccess") + else() foreach(t sentencepiece-static sentencepiece_train-static spm_decode spm_encode spm_export_vocab spm_normalize spm_train) set_property(TARGET ${t} APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-class-memaccess") endforeach(t) endif() + endif() if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + if(COMPILE_DECODER_ONLY) + set_property(TARGET sentencepiece-static APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-tautological-compare -Wno-unused") + if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0) + set_property(TARGET sentencepiece-static APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-range-loop-construct") + endif() + # get_property(SENTENCEPIECE_COMPILE_FLAGS TARGET ${t} PROPERTY COMPILE_FLAGS) + # message("-- SENTENCPIECE: compile flags for target ${t}: ${SENTENCEPIECE_COMPILE_FLAGS}") + else() foreach(t sentencepiece-static sentencepiece_train-static spm_decode spm_encode spm_export_vocab spm_normalize spm_train) set_property(TARGET ${t} APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-tautological-compare -Wno-unused") @@ -106,6 +119,7 @@ if(USE_SENTENCEPIECE) # get_property(SENTENCEPIECE_COMPILE_FLAGS TARGET ${t} PROPERTY COMPILE_FLAGS) # message("-- SENTENCPIECE: compile flags for target ${t}: ${SENTENCEPIECE_COMPILE_FLAGS}") endforeach(t) + endif() endif() if(USE_STATIC_LIBS) @@ -114,9 +128,15 @@ if(USE_SENTENCEPIECE) if(GENERATE_MARIAN_INSTALL_TARGETS) if(USE_STATIC_LIBS) - install(TARGETS sentencepiece-static sentencepiece_train-static - EXPORT marian-targets - DESTINATION sentencepiece) + if(COMPILE_DECODER_ONLY) + install(TARGETS sentencepiece-static + EXPORT marian-targets + DESTINATION sentencepiece) + else() + install(TARGETS sentencepiece-static sentencepiece_train-static + EXPORT marian-targets + DESTINATION sentencepiece) + endif() else() install(TARGETS sentencepiece sentencepiece_train EXPORT marian-targets From 230c4d93461a6d19fe55eadfe937ee3d4da5526c Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Fri, 5 Feb 2021 14:21:04 +0100 Subject: [PATCH 15/46] Updated sentencepiece submodule --- src/3rd_party/sentencepiece | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/3rd_party/sentencepiece b/src/3rd_party/sentencepiece index f07c3e673..bd18c8345 160000 --- a/src/3rd_party/sentencepiece +++ b/src/3rd_party/sentencepiece @@ -1 +1 @@ -Subproject commit f07c3e6736ad2e8b75433fc4dd01980a2576f570 +Subproject commit bd18c834559ef4a25fa3a740b97465df2daae6eb From 9d7767bc5e10ce817d9efd46f3e415bab7df7c04 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Fri, 5 Feb 2021 16:32:33 +0100 Subject: [PATCH 16/46] Enable "-pthread" compile flag for COMPILE_WITH_PTHREADS=off - This happens now for both native and wasm builds - The COMPILE_WITH_PTHREADS=off cmake option compiles with -pthread but it disables some multithreading code from marian sources -- It is done because sentencepiece's 3rd party dependency "protobuf-lite" doesn't compile without "-pthread" flag - WASM builds run successfully in browser --- CMakeLists.txt | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f44b499c8..323f8adaa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -248,17 +248,14 @@ else(MSVC) add_compile_options(${WASM_EMSCRIPTEN_COMPILE_OPTIONS}) add_link_options(${WASM_EMSCRIPTEN_COMPILE_OPTIONS}) else() - if(COMPILE_WITH_PTHREADS) - set(PTHREAD_FLAG "-pthread") - set(DISABLE_PTHREAD_MEMGROWTH_WARNING -Wno-error=pthreads-mem-growth) - endif() + set(PTHREAD_FLAG "-pthread") + set(DISABLE_PTHREAD_MEMGROWTH_WARNING -Wno-error=pthreads-mem-growth) set(CMAKE_CXX_FLAGS "-std=c++11 ${PTHREAD_FLAG} ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} ${INTRINSICS}") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -s WASM=1 -s ASSERTIONS=1 -s DISABLE_EXCEPTION_CATCHING=0 -s LLD_REPORT_UNDEFINED -s FORCE_FILESYSTEM=1 -s ALLOW_MEMORY_GROWTH=1 -g2 ${DISABLE_PTHREAD_MEMGROWTH_WARNING} -funroll-loops") # Disabling Pthreads + memory growth warning to be an error # Pthreads + memory growth causes JS accessing the wasm memory to be slow # https://github.com/WebAssembly/design/issues/1271 list(APPEND ALL_WARNINGS ${DISABLE_PTHREAD_MEMGROWTH_WARNING}) - list(APPEND ALL_WARNINGS -Wno-error=deprecated-register) endif() # use our customizations to the generated emscripted html and js resources From d2dbf9d0195e0a3cd67e177cf6aa55ebc4a18b06 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Fri, 5 Feb 2021 16:54:41 +0100 Subject: [PATCH 17/46] Disabled compiling optimizers/quantizer.cpp file for native builds - This file doesn't compile for wasm, so disabled it for native builds too -- Just to be consistent of the sources that we use for both wasm and native builds --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 532682fb3..1859804bc 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -85,7 +85,7 @@ set(MARIAN_SOURCES rnn/cells.cpp rnn/attention.cpp - ${QUANTIZER_SOURCE} + #${QUANTIZER_SOURCE} optimizers/clippers.cpp optimizers/optimizers.cpp From ece499d02a588406480d1fc77435954d7e77bba0 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Fri, 12 Feb 2021 14:21:23 +0100 Subject: [PATCH 18/46] Change "sentencepiece" submodule URL from "marian-nmt" to "browsermt" --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index e76d0b505..d22a705d3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -6,7 +6,7 @@ url = https://github.com/marian-nmt/marian-regression-tests [submodule "src/3rd_party/sentencepiece"] path = src/3rd_party/sentencepiece - url = https://github.com/marian-nmt/sentencepiece + url = https://github.com/browsermt/sentencepiece [submodule "src/3rd_party/nccl"] path = src/3rd_party/nccl url = https://github.com/marian-nmt/nccl From 08f2b79dd90e2562e6475a4b4ac6f1a5107ba2e4 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Fri, 12 Feb 2021 16:10:31 +0100 Subject: [PATCH 19/46] Added "wasm" folder and "docker" - "wasm" contains files for compiling marian-decoder.html - "docker" for docker based wasm builds of marian --- docker/Makefile | 239 ++++++++++++++++++++ docker/README.md | 151 +++++++++++++ docker/wasm/Dockerfile | 36 ++++ wasm/custom_shell.html | 479 +++++++++++++++++++++++++++++++++++++++++ wasm/post-module.js | 0 wasm/pre-module.js | 71 ++++++ wasm/test_stdin.cpp | 65 ++++++ 7 files changed, 1041 insertions(+) create mode 100644 docker/Makefile create mode 100644 docker/README.md create mode 100644 docker/wasm/Dockerfile create mode 100644 wasm/custom_shell.html create mode 100644 wasm/post-module.js create mode 100644 wasm/pre-module.js create mode 100644 wasm/test_stdin.cpp diff --git a/docker/Makefile b/docker/Makefile new file mode 100644 index 000000000..137ab71ab --- /dev/null +++ b/docker/Makefile @@ -0,0 +1,239 @@ +# -*- mode: makefile-gmake; indent-tabs-mode: true; tab-width: 4 -*- +SHELL = bash +PWD = $(shell pwd) +NATIVE_IMAGE = local/marian-build-native +WASM_IMAGE = local/marian-build-wasm + +all: native-image wasm-image + +# Build the Docker image for native builds +native-image: + docker build -t local/marian-build-native ./native/ + +# Build the Docker image for WASM builds +wasm-image: + docker build -t local/marian-build-wasm ./wasm/ + +# Commands for compilation: +cmake_cmd = cmake --debug-output -Wno-dev +cmake_cmd += -DUSE_STATIC_LIBS=on +cmake_cmd += -DUSE_SENTENCEPIECE=on +cmake_cmd += -DCOMPILE_CUDA=off +cmake_cmd += -DUSE_DOXYGEN=off +cmake_cmd += -DUSE_FBGEMM=off +cmake_cmd += -DCOMPILE_LIBRARY_ONLY=off +cmake_cmd += -DUSE_MKL=off +cmake_cmd += -DCOMPILE_CPU=on +native_decoder_cmake_cmd = ${cmake_cmd} +native_decoder_cmake_cmd += -DCOMPILE_DECODER_ONLY=on + +wasm_compatible_decoder_only_native_cmake_cmd = ${native_decoder_cmake_cmd} +wasm_compatible_decoder_only_native_cmake_cmd += -DUSE_WASM_COMPATIBLE_BLAS=on + +wasm_cmake_cmd = ${wasm_compatible_decoder_only_native_cmake_cmd} +wasm_cmake_cmd += -DCOMPILE_WASM=on +#wasm_cmake_cmd += -DProtobuf_INCLUDE_DIR=/usr/opt/protobuf-wasm-lib/dist/include +#wasm_cmake_cmd += -DProtobuf_LIBRARY=/usr/opt/protobuf-wasm-lib/dist/lib/libprotobuf.a + +wasm_without_pthreads_cmake_cmd = ${wasm_cmake_cmd} +wasm_without_pthreads_cmake_cmd += -DCOMPILE_WITH_PTHREADS=off + +make_cmd = make -j3 +#make_cmd += VERBOSE=1 + +# ... and running things on Docker +docker_mounts = ${PWD}/..:/repo +docker_mounts += ${HOME}/.ccache:/.ccache +run_on_docker = docker run --rm +run_on_docker += $(addprefix -v, ${docker_mounts}) +run_on_docker += ${INTERACTIVE_DOCKER_SESSION} + +${HOME}/.ccache: + mkdir -p $@ + +# Compile marian-decoder to WASM +compile-wasm: BUILD_DIR = /repo/build-wasm-docker +compile-wasm: ${HOME}/.ccache + ${run_on_docker} ${WASM_IMAGE} bash -c 'mkdir -p ${BUILD_DIR} && \ +cd ${BUILD_DIR} && \ +(emcmake ${wasm_cmake_cmd} .. && \ +(emmake ${make_cmd}))' + +# Prepare files to be used with WASM-compiled marian-decoder +package-files-wasm: BUILD_DIR = /repo/build-wasm-docker +package-files-wasm: MODELS_DIR = /repo/models +package-files-wasm: + ${run_on_docker} ${WASM_IMAGE} bash -c 'sacrebleu -t wmt13 -l es-en --echo src > /repo/models/newstest2013.es && \ +head -n300 /repo/models/newstest2013.es > /repo/models/newstest2013.es.top300lines && \ +python3 /emsdk/upstream/emscripten/tools/file_packager.py ${BUILD_DIR}/model-files.data --preload ${MODELS_DIR} --js-output=${BUILD_DIR}/model-files.js' + +# Run WASM-compiled marian-decoder +run-wasm: BUILD_DIR = /repo/build-wasm-docker +run-wasm: + ${run_on_docker} -p 8000:8000 ${WASM_IMAGE} bash -c 'emrun --no_browser --port 8000 ${BUILD_DIR}' + + +# Remove the marian-decoder WASM-without-pthreads build dir, forcing a clean compilation attempt +clean-wasm-without-pthreads: BUILD_DIR = /repo/build-wasm-without-pthreads-docker +clean-wasm-without-pthreads: ${HOME}/.ccache + ${run_on_docker} ${WASM_IMAGE} bash -c '(rm -rf ${BUILD_DIR} || true)' + +# Compile marian-decoder to WASM-without-pthreads +compile-wasm-without-pthreads: BUILD_DIR = /repo/build-wasm-without-pthreads-docker +compile-wasm-without-pthreads: ${HOME}/.ccache + ${run_on_docker} ${WASM_IMAGE} bash -c 'mkdir -p ${BUILD_DIR} && \ +cd ${BUILD_DIR} && \ +(emcmake ${wasm_without_pthreads_cmake_cmd} .. && \ +(emmake ${make_cmd}))' + +# Prepare files to be used with WASM-without-pthreads-compiled marian-decoder +package-files-wasm-without-pthreads: BUILD_DIR = /repo/build-wasm-without-pthreads-docker +package-files-wasm-without-pthreads: MODELS_DIR = /repo/models +package-files-wasm-without-pthreads: + ${run_on_docker} ${WASM_IMAGE} bash -c 'sacrebleu -t wmt13 -l es-en --echo src > /repo/models/newstest2013.es && \ +head -n10 /repo/models/newstest2013.es > /repo/models/newstest2013.es.top10lines && \ +python3 /emsdk/upstream/emscripten/tools/file_packager.py ${BUILD_DIR}/model-files.data --preload ${MODELS_DIR} --js-output=${BUILD_DIR}/model-files.js' + +# Run WASM-without-pthreads-compiled marian-decoder +run-wasm-without-pthreads: BUILD_DIR = /repo/build-wasm-without-pthreads-docker +run-wasm-without-pthreads: + ${run_on_docker} -p 8000:8000 ${WASM_IMAGE} bash -c 'emrun --no_browser --port 8000 ${BUILD_DIR}' + + +# # Compile full native version +# compile-native: BUILD_DIR = /repo/build-native-docker +# compile-native: ${HOME}/.ccache +# ${run_on_docker} ${NATIVE_IMAGE} bash -c '(rm -rf ${BUILD_DIR} || true) && \ +# mkdir -p ${BUILD_DIR} &&\ +# cd ${BUILD_DIR} &&\ +# (${native_cmake_cmd} .. &&\ +# ${make_cmd} || rm CMakeCache.txt)' + +# # Echo the command to compile the native version of marian-decoder with a wasm-incompatible blas +# echo-compile-decoder-only-native: BUILD_DIR = ./build-decoder-only-native +# echo-compile-decoder-only-native: ${HOME}/.ccache +# echo 'mkdir -p ${BUILD_DIR} && \ +# cd ${BUILD_DIR} && \ +# (${decoder_only_native_cmake_cmd} .. && \ +# ${make_cmd} || rm CMakeCache.txt)' + +# echo-benchmark-decoder-only-native: BUILD_DIR = ./build-decoder-only-native +# echo-benchmark-decoder-only-native: MODEL_DIR = ./docker/students/esen/esen.student.tiny11 +# echo-benchmark-decoder-only-native: ${HOME}/.ccache +# echo 'mkdir -p ./docker/students/marian-dev/ && \ +# (rm ./docker/students/marian-dev/build || true) && \ +# ln -s "$$PWD/${BUILD_DIR}" ./docker/students/marian-dev/build && \ +# cd ${MODEL_DIR}/ && \ +# ./speed.cpu.sh; cd -; \ +# (rm ./docker/students/marian-dev/build || true)' + +# # Echo the command to compile the native version of marian-decoder with a wasm-compatible blas +# echo-compile-wasm-compatible-decoder-only-native: BUILD_DIR = ./build-wasm-compatible-decoder-only-native +# echo-compile-wasm-compatible-decoder-only-native: ${HOME}/.ccache +# echo 'mkdir -p ${BUILD_DIR} && \ +# cd ${BUILD_DIR} && \ +# (${wasm_compatible_decoder_only_native_cmake_cmd} .. && \ +# ${make_cmd} || rm CMakeCache.txt)' + +# echo-benchmark-wasm-compatible-decoder-only-native: BUILD_DIR = ./build-wasm-compatible-decoder-only-native +# echo-benchmark-wasm-compatible-decoder-only-native: MODEL_DIR = ./docker/students/esen/esen.student.tiny11 +# echo-benchmark-wasm-compatible-decoder-only-native: ${HOME}/.ccache +# echo 'mkdir -p ./docker/students/marian-dev/ && \ +# (rm ./docker/students/marian-dev/build || true) && \ +# ln -s "$$PWD/${BUILD_DIR}" ./docker/students/marian-dev/build && \ +# cd ${MODEL_DIR}/ && \ +# ./speed.cpu.sh; cd -; \ +# (rm ./docker/students/marian-dev/build || true)' + +# # Remove the build directory of native marian-decoder, forcing a clean compilation attempt +# clean-decoder-only-native: BUILD_DIR = /repo/build-decoder-only-native-docker +# clean-decoder-only-native: ${HOME}/.ccache +# ${run_on_docker} ${NATIVE_IMAGE} bash -c '(rm -rf ${BUILD_DIR} || true)' + +# # Compile native version of marian-decoder with a wasm-incompatible blas +# compile-decoder-only-native: BUILD_DIR = /repo/build-decoder-only-native-docker +# compile-decoder-only-native: ${HOME}/.ccache +# ${run_on_docker} ${NATIVE_IMAGE} bash -c 'mkdir -p ${BUILD_DIR} &&\ +# cd ${BUILD_DIR} &&\ +# (${decoder_only_native_cmake_cmd} .. &&\ +# ${make_cmd} || rm CMakeCache.txt)' + +# benchmark-decoder-only-native: BUILD_DIR = /repo/build-decoder-only-native-docker +# benchmark-decoder-only-native: MODEL_DIR = /repo/docker/students/esen/esen.student.tiny11 +# benchmark-decoder-only-native: ${HOME}/.ccache +# ${run_on_docker} ${NATIVE_IMAGE} bash -c 'mkdir -p /repo/docker/students/marian-dev/ && \ +# (rm /repo/docker/students/marian-dev/build || true) && \ +# ln -s ${BUILD_DIR} /repo/docker/students/marian-dev/build && \ +# cd ${MODEL_DIR}/ && \ +# ./speed.cpu.sh; \ +# (rm /repo/docker/students/marian-dev/build || true)' + +# # Remove the build directory of wasm-compatible native marian-decoder, forcing a clean compilation attempt +# clean-wasm-compatible-decoder-only-native: BUILD_DIR = /repo/build-wasm-compatible-decoder-only-native-docker +# clean-wasm-compatible-decoder-only-native: ${HOME}/.ccache +# ${run_on_docker} ${NATIVE_IMAGE} bash -c '(rm -rf ${BUILD_DIR} || true)' + +# # Compile native version of marian-decoder with a wasm-compatible blas +# compile-wasm-compatible-decoder-only-native: BUILD_DIR = /repo/build-wasm-compatible-decoder-only-native-docker +# compile-wasm-compatible-decoder-only-native: ${HOME}/.ccache +# ${run_on_docker} ${NATIVE_IMAGE} bash -c 'mkdir -p ${BUILD_DIR} &&\ +# cd ${BUILD_DIR} &&\ +# (${wasm_compatible_decoder_only_native_cmake_cmd} .. &&\ +# ${make_cmd})' + +# benchmark-wasm-compatible-decoder-only-native: BUILD_DIR = /repo/build-wasm-compatible-decoder-only-native-docker +# benchmark-wasm-compatible-decoder-only-native: MODEL_DIR = /repo/docker/students/esen/esen.student.tiny11 +# benchmark-wasm-compatible-decoder-only-native: ${HOME}/.ccache +# ${run_on_docker} ${NATIVE_IMAGE} bash -c 'mkdir -p /repo/docker/students/marian-dev/ && \ +# (rm /repo/docker/students/marian-dev/build || true) && \ +# ln -s ${BUILD_DIR} /repo/docker/students/marian-dev/build && \ +# cd ${MODEL_DIR}/ && \ +# ./speed.cpu.sh; \ +# (rm /repo/docker/students/marian-dev/build || true)' + +# # Remove the marian-decoder WASM build dir, forcing a clean compilation attempt +# clean-wasm: BUILD_DIR = /repo/build-wasm-docker +# clean-wasm: ${HOME}/.ccache +# ${run_on_docker} ${WASM_IMAGE} bash -c '(rm -rf ${BUILD_DIR} || true)' + +# # Compile sentencepiece to WASM +# sentencepiece_wasm_cmake_cmd = cmake --debug-output -Wno-dev +# sentencepiece_wasm_cmake_cmd += -DCOMPILE_WASM=on +# sentencepiece_wasm_cmake_cmd += -DSPM_ENABLE_SHARED=off +# sentencepiece_wasm_cmake_cmd += -DProtobuf_INCLUDE_DIR=/usr/opt/protobuf-wasm-lib/dist/include +# sentencepiece_wasm_cmake_cmd += -DProtobuf_LIBRARY=/usr/opt/protobuf-wasm-lib/dist/lib/libprotobuf.a + +# compile-sentencepiece-wasm: BUILD_DIR = /repo/src/3rd_party/sentencepiece/build-wasm-docker +# compile-sentencepiece-wasm: ${HOME}/.ccache +# ${run_on_docker} ${WASM_IMAGE} bash -c '(rm -rf ${BUILD_DIR} || true) && \ +# mkdir -p ${BUILD_DIR} && \ +# cd ${BUILD_DIR} && \ +# (emcmake ${sentencepiece_wasm_cmake_cmd} .. && \ +# (emmake ${make_cmd}) || \ +# rm CMakeCache.txt)' + +# # Compile stdin test directly +# compile_test_stdin_cmd = em++ +# compile_test_stdin_cmd += -O2 -s WASM=1 -s ASSERTIONS=1 -s DISABLE_EXCEPTION_CATCHING=0 +# compile_test_stdin_cmd += -s FORCE_FILESYSTEM=1 +# compile_test_stdin_cmd += -s ALLOW_MEMORY_GROWTH=1 +# compile_test_stdin_cmd += -o test-stdin-wasm.html +# compile_test_stdin_cmd += --pre-js /repo/wasm/pre-module.js --post-js /repo/wasm/post-module.js --shell-file /repo/wasm/custom_shell.html +# compile_test_stdin_cmd += /repo/wasm/test_stdin.cpp + +# compile-and-run-test-stdin-wasm: BUILD_DIR = /repo/build-test-stdin-wasm +# compile-and-run-test-stdin-wasm: +# ${run_on_docker} ${WASM_IMAGE} bash -c '(rm -rf ${BUILD_DIR} || true) && \ +# mkdir -p ${BUILD_DIR} &&\ +# cd ${BUILD_DIR} &&\ +# ${compile_test_stdin_cmd} &&\ +# ${run_on_docker} -p 8009:8009 ${WASM_IMAGE} bash -c 'emrun --no_browser --port 8009 ${BUILD_DIR}' + +# # Start interactive shells for development / debugging purposes +# native-shell: INTERACTIVE_DOCKER_SESSION = -it +# native-shell: +# ${run_on_docker} ${NATIVE_IMAGE} bash + +# wasm-shell: INTERACTIVE_DOCKER_SESSION = -it +# wasm-shell: +# ${run_on_docker} ${WASM_IMAGE} bash \ No newline at end of file diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 000000000..28e15bb1e --- /dev/null +++ b/docker/README.md @@ -0,0 +1,151 @@ +## Native (via Docker) + +Prepare docker image for Native compilation: + +```bash +make native-image +``` + +Compile natively: + +```bash +make compile-native +``` + +Compile natively, marian-decoder only with a wasm-incompatible blas library: + +```bash +make compile-decoder-only-native +``` + +Compile natively, marian-decoder only with a wasm-compatible blas library: + +```bash +make compile-wasm-compatible-decoder-only-native +``` + +Enter a docker container shell for manually running commands: + +```bash +make native-shell +``` + +Run the native-compiled code by entering a docker container shell above and manually running commands, eg: + +```bash +cd /repo/build-native-docker # OR: cd /repo/build-native-decoder-only-docker +echo "hola mundo" | ./marian-decoder -m /repo/models/model.npz -v /repo/models/vocab.esen.spm /repo/models/vocab.esen.spm --cpu-threads 1 +``` + +## WASM + +Prepare docker image for WASM compilation: + +```bash +make wasm-image +``` + +Compile to wasm: + +```bash +make compile-wasm +``` + +Create a directory called `models/` in the repo root and fill it with relevant model files that should be available to the WASM-compiled runtime, then run: +```bash +make package-files-wasm +``` + +Run the wasm-compiled code: + +```bash +make run-wasm +``` + +Then open up `http://localhost:8000/marian-decoder.html?stdinInput=&arguments=` in a browser, eg: + +```bash +open "http://localhost:8000/marian-decoder.html?stdinInput=Hola mundo&arguments=-m /repo/models/model.npz -v /repo/models/vocab.esen.spm /repo/models/vocab.esen.spm --cpu-threads 1" +``` + +Note: To run in Chrome, launch Chrome with ` --js-flags="--experimental-wasm-simd"`, eg: + +```bash +/Applications/Google\ Chrome\ Canary.app/Contents/MacOS/Google\ Chrome\ Canary --js-flags="--experimental-wasm-simd" +``` + +To compile to WASM without pthreads, the corresponding commands are: + +```bash +make compile-wasm-without-pthreads +make package-files-wasm-without-pthreads +make run-wasm-without-pthreads +open "http://localhost:8001/marian-decoder.html" +``` + +## Benchmarking + +First, get the relevant models in place: +```bash +cd students/esen/ +./download-models.sh +``` + +Then: + +```bash +make benchmark-decoder-only-native +make benchmark-wasm-compatible-decoder-only-native +``` + +For WASM: +```bash +cp students/esen/esen.student.tiny11/vocab.esen.spm ../models/ +cp students/esen/esen.student.tiny11/model.npz ../models/ +cp students/esen/esen.student.tiny11/lex.s2t ../models/ +make package-files-wasm-without-pthreads +make run-wasm-without-pthreads +``` + +Then open `http://localhost:8001/marian-decoder.html?arguments=-m /repo/models/model.npz -v /repo/models/vocab.esen.spm /repo/models/vocab.esen.spm -i /repo/models/newstest2013.es.top300lines --beam-size 1 --mini-batch 32 --maxi-batch 100 --maxi-batch-sort src -w 128 --skip-cost --shortlist /repo/models/lex.s2t 50 50 --cpu-threads 1` + +## Compile and benchmark outside of docker + +```bash +make echo-compile-decoder-only-native +make echo-compile-wasm-compatible-decoder-only-native +make echo-benchmark-decoder-only-native +make echo-benchmark-wasm-compatible-decoder-only-native +``` + +Copy the output of these commands and run those commands in the marian-dev source directory. Check `./native/Dockerfile` for a hint of what dependencies needs to be installed on your system. + +## Debugging + +Remove the marian-decoder build dir, forcing the next compilation attempt to start from scratch: + +```bash +make clean-decoder-only-native +make clean-wasm-compatible-decoder-only-native +make clean-wasm +make clean-wasm-without-pthreads +``` + +Compile only sentencepiece to wasm: + +```bash +make compile-sentencepiece-wasm +``` + +Compile and run a wasm stdin test: + +```bash +make compile-and-run-test-stdin-wasm +open "http://localhost:8009/compile-test-stdin-wasm.html" +``` + +Enter a docker container shell for manually running commands: + +```bash +make wasm-shell +``` diff --git a/docker/wasm/Dockerfile b/docker/wasm/Dockerfile new file mode 100644 index 000000000..f5a9cd752 --- /dev/null +++ b/docker/wasm/Dockerfile @@ -0,0 +1,36 @@ +FROM emscripten/emsdk:2.0.9 + +# Install specific version of CMake +WORKDIR /usr +RUN wget https://github.com/Kitware/CMake/releases/download/v3.17.2/cmake-3.17.2-Linux-x86_64.tar.gz -qO-\ + | tar xzf - --strip-components 1 + +# Install Python and Java (needed for Closure Compiler minification) +RUN apt-get update \ + && apt-get install -y \ + python3 \ + default-jre + +# Deps to compile protobuf from source + the protoc binary which we need natively +RUN apt-get update -y && apt-get --no-install-recommends -y install \ + protobuf-compiler \ + autoconf \ + autotools-dev \ + automake \ + autogen \ + libtool && ln -s /usr/bin/libtoolize /usr/bin/libtool + #&& mkdir -p /usr/opt \ + #&& cd /usr/opt + #&& git clone https://github.com/menduz/protobuf-wasm-lib + +# RUN cd /usr/opt/protobuf-wasm-lib \ +# && /bin/bash -c "BRANCH=v3.6.1 ./prepare.sh" +# RUN cd /usr/opt/protobuf-wasm-lib/protobuf \ +# && bash -x ../build.sh +# RUN cp /usr/bin/protoc /usr/opt/protobuf-wasm-lib/dist/bin/protoc + +# RUN apt-get --no-install-recommends -y install \ +# libprotobuf-dev + +# Necessary for benchmarking +RUN pip3 install sacrebleu diff --git a/wasm/custom_shell.html b/wasm/custom_shell.html new file mode 100644 index 000000000..15d50e7ad --- /dev/null +++ b/wasm/custom_shell.html @@ -0,0 +1,479 @@ + + + + + + Emscripten-Generated Code + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
Downloading...
+Resize canvasLock/hide mouse pointer     +
+ +
+
+ +
+ + + +{{{ SCRIPT }}} + + \ No newline at end of file diff --git a/wasm/post-module.js b/wasm/post-module.js new file mode 100644 index 000000000..e69de29bb diff --git a/wasm/pre-module.js b/wasm/pre-module.js new file mode 100644 index 000000000..11bbe7b2e --- /dev/null +++ b/wasm/pre-module.js @@ -0,0 +1,71 @@ +// Enables setting runtime args via the query string (as long as this is run in the main browser thread and not in a worker) +let stdinInput = false; +if ( + typeof window !== "undefined" && + window.location && + window.location.search +) { + const urlParams = new URLSearchParams(window.location.search); + if (urlParams.get("stdinInput")) { + stdinInput = urlParams.get("stdinInput"); + console.log("Using stdinInput from URL"); + } + if (urlParams.get("arguments")) { + Module["arguments"] = urlParams.get("arguments").split(' ') + // Module["arguments"] = urlParams.get("arguments").split('%20'); + console.log("Using arguments from URL"); + } +} +console.log('stdinInput', stdinInput); +console.log('Module["arguments"]', Module["arguments"]); +Module["noInitialRun"] = true; +Module["onRuntimeInitialized"] = _ => { + try { + console.log("Calling main in a try-catch block to be able to get readable exception messages"); + callMain(Module["arguments"]) + } catch (exception) { + console.error("WASM exception thrown", Module.getExceptionMessage(exception)) + } +}; +var initStdInOutErr = function() { + var i = 0; + function stdin() { + if (stdinInput === false) { + console.log("STDIN: No stdin input specified"); + return null; + } + var input = stdinInput + "\n"; + if (i < input.length) { + var code = input.charCodeAt(i); + ++i; + console.log("STDIN: Feeding character code to stdin: ", code); + return code; + } else { + console.log("STDIN: Done feeding input via stdin: ", input); + return null; + } + } + + var stdoutBuffer = ""; + function stdout(code) { + if (code === "\n".charCodeAt(0) && stdoutBuffer !== "") { + console.log("STDOUT: ", stdoutBuffer); + stdoutBuffer = ""; + } else { + stdoutBuffer += String.fromCharCode(code); + } + } + + var stderrBuffer = ""; + function stderr(code) { + if (code === "\n".charCodeAt(0) && stderrBuffer !== "") { + console.log("STDERR: ", stderrBuffer); + stderrBuffer = ""; + } else { + stderrBuffer += String.fromCharCode(code); + } + } + + FS.init(stdin, stdout, stderr); +} +Module["preRun"].push(initStdInOutErr); diff --git a/wasm/test_stdin.cpp b/wasm/test_stdin.cpp new file mode 100644 index 000000000..b95673cbf --- /dev/null +++ b/wasm/test_stdin.cpp @@ -0,0 +1,65 @@ +/* + * Copyright 2013 The Emscripten Authors. All rights reserved. + * Emscripten is available under two separate licenses, the MIT license and the + * University of Illinois/NCSA Open Source License. Both these licenses can be + * found in the LICENSE file. + */ + +#include +#include +#include +#include +#include +#ifdef __EMSCRIPTEN__ +#include +#endif + +int line = 0; + +void main_loop() +{ + char str[10] = {0}; + int ret; + + errno = 0; + while (errno != EAGAIN) { + if (line == 0) { + ret = fgetc(stdin); + if (ret != EOF) putc(ret, stdout); + if (ret == '\n') line++; + } else if (line > 0) { + ret = scanf("%10s", str); + if (ret > 0) puts(str); + } + + int err = ferror(stdin); + if (ferror(stdin) && errno != EAGAIN) { + printf("error %d\n", err); + exit(EXIT_FAILURE); + } + + if (feof(stdin)) { + puts("eof"); + exit(EXIT_SUCCESS); + } + + clearerr(stdin); + } +} + +int main(int argc, char const *argv[]) +{ + fcntl(STDIN_FILENO, F_SETFL, O_NONBLOCK); + + // SM shell doesn't implement an event loop and therefor doesn't support + // emscripten_set_main_loop. However, its stdin reads are sync so it + // should exit out after calling main_loop once. + main_loop(); + +#ifdef __EMSCRIPTEN__ + emscripten_set_main_loop(main_loop, 60, 0); +#else + while (1) main_loop(); sleep(1); +#endif + return 0; +} From 4d1de8c17b710b5980ad6bd1d054621b6a809a31 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Mon, 15 Feb 2021 14:39:21 +0100 Subject: [PATCH 20/46] cmake changes for compiling without try/catch - Set COMPILE_WITHOUT_EXCEPTIONS cmake option to ON to compile marian without try/catch code - By default it is off --- CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 323f8adaa..f79d6665f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,7 @@ option(USE_DOXYGEN "Build documentation with Doxygen" ON) option(USE_FBGEMM "Use FBGEMM" OFF) option(USE_MKL "Compile with MKL support" ON) option(USE_WASM_COMPATIBLE_BLAS "Compile with a WASM compatible blas for decoder only builds" OFF) +option(COMPILE_WITHOUT_EXCEPTIONS "Compile without exceptions" OFF) option(USE_MPI "Use MPI library" OFF) option(USE_NCCL "Use NCCL library" ON) option(USE_SENTENCEPIECE "Download and compile SentencePiece" ON) @@ -46,6 +47,10 @@ if(COMPILE_WITH_PTHREADS) add_compile_definitions(USE_PTHREADS) endif() +if(COMPILE_WITHOUT_EXCEPTIONS) + add_compile_definitions(WITHOUT_EXCEPTIONS) +endif() + # fbgemm and sentencepiece are both defined with "non-local" installation targets (the source projects don't define them, # so we define them in src\3rd_party\CMakeLists.txt), but that isn't supported until CMake 3.12. Prior to CMake 3.12, # targets could only be install(...)ed in the same CMakeLists.txt they were defined. We currently target CMake 3.5.1 From 769e229530311479c195168084a9580cd747dfb1 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Mon, 15 Feb 2021 14:58:56 +0100 Subject: [PATCH 21/46] Guard try/catch in marian sources (except submodules) - All try/catch in marian sources are guarded with WITHOUT_EXCEPTIONS pre-processor directive --- src/3rd_party/CLI/App.hpp | 18 ++++++++++++++++++ src/3rd_party/CLI/ConfigFwd.hpp | 5 +++++ src/3rd_party/CLI/Option.hpp | 4 ++++ src/3rd_party/CLI/TypeTools.hpp | 21 ++++++++++++++++++++- src/3rd_party/threadpool.h | 4 ++++ src/3rd_party/zstr/strict_fstream.hpp | 5 +++++ src/common/cli_helper.cpp | 4 ++++ src/common/cli_wrapper.cpp | 5 ++++- src/common/config.cpp | 10 ++++++++++ src/common/logging.cpp | 4 ++++ src/data/corpus_base.cpp | 8 ++++++++ src/graph/expression_graph.h | 6 ++++++ src/rescorer/score_collector.h | 4 ++++ src/translator/output_printer.h | 8 ++++++++ src/translator/scorers.cpp | 16 ++++++++++++++++ 15 files changed, 120 insertions(+), 2 deletions(-) diff --git a/src/3rd_party/CLI/App.hpp b/src/3rd_party/CLI/App.hpp index bf493959b..31eb2cb1d 100644 --- a/src/3rd_party/CLI/App.hpp +++ b/src/3rd_party/CLI/App.hpp @@ -29,12 +29,16 @@ namespace CLI { #ifndef CLI11_PARSE +#if WITHOUT_EXCEPTIONS + #define CLI11_PARSE(app, argc, argv) (app).parse((argc), (argv)) +#else #define CLI11_PARSE(app, argc, argv) \ try { \ (app).parse((argc), (argv)); \ } catch(const CLI::ParseError &e) { \ return (app).exit(e); \ } +#endif // WITHOUT_EXCEPTIONS #endif namespace detail { @@ -1296,6 +1300,10 @@ class App { config_required_ = true; } if(!config_name_.empty()) { + #if WITHOUT_EXCEPTIONS + std::vector values = config_formatter_->from_file(config_name_); + _parse_config(values); + #else try { std::vector values = config_formatter_->from_file(config_name_); _parse_config(values); @@ -1303,6 +1311,7 @@ class App { if(config_required_) throw; } + #endif } } @@ -1394,16 +1403,24 @@ class App { bool _parse_single_config(const ConfigItem &item, size_t level = 0) { if(level < item.parents.size()) { App *subcom; + #if WITHOUT_EXCEPTIONS + std::cout << item.parents.at(level) << std::endl; + subcom = get_subcommand(item.parents.at(level)); + #else try { std::cout << item.parents.at(level) << std::endl; subcom = get_subcommand(item.parents.at(level)); } catch(const OptionNotFound &) { return false; } + #endif return subcom->_parse_single_config(item, level + 1); } Option *op; + #if WITHOUT_EXCEPTIONS + op = get_option("--" + item.name); + #else try { op = get_option("--" + item.name); } catch(const OptionNotFound &) { @@ -1413,6 +1430,7 @@ class App { missing_.emplace_back(detail::Classifer::NONE, item.fullname()); return false; } + #endif if(!op->get_configurable()) throw ConfigError::NotConfigurable(item.fullname()); diff --git a/src/3rd_party/CLI/ConfigFwd.hpp b/src/3rd_party/CLI/ConfigFwd.hpp index aa179e5c9..8a31ac1ae 100644 --- a/src/3rd_party/CLI/ConfigFwd.hpp +++ b/src/3rd_party/CLI/ConfigFwd.hpp @@ -80,12 +80,17 @@ class Config { } else if(val == "false" || val == "off" || val == "no") { return std::vector(); } else { + #if WITHOUT_EXCEPTIONS + size_t ui = std::stoul(val); + return std::vector(ui); + #else try { size_t ui = std::stoul(val); return std::vector(ui); } catch(const std::invalid_argument &) { throw ConversionError::TrueFalse(item.fullname()); } + #endif } } else { throw ConversionError::TooManyInputsFlag(item.fullname()); diff --git a/src/3rd_party/CLI/Option.hpp b/src/3rd_party/CLI/Option.hpp index 7a7d666db..0a91021ae 100644 --- a/src/3rd_party/CLI/Option.hpp +++ b/src/3rd_party/CLI/Option.hpp @@ -303,11 +303,15 @@ class Option : public OptionBase