From 9f857c003abaa2085fb74adaa87aa824292eaca5 Mon Sep 17 00:00:00 2001 From: Satyanaraya Illa Date: Tue, 22 Mar 2022 11:08:24 +0530 Subject: [PATCH 1/2] Add TensorFlow examples - ResNet50 and BERT models Signed-off-by: Satyanaraya Illa --- tensorflow/BERT/.gitignore | 3 + tensorflow/BERT/Makefile | 58 ++++++++ tensorflow/BERT/python.manifest.template | 43 ++++++ tensorflow/README.md | 145 +++++++++++++++++++ tensorflow/ResNet50/.gitignore | 2 + tensorflow/ResNet50/Makefile | 47 ++++++ tensorflow/ResNet50/python.manifest.template | 41 ++++++ 7 files changed, 339 insertions(+) create mode 100755 tensorflow/BERT/.gitignore create mode 100755 tensorflow/BERT/Makefile create mode 100755 tensorflow/BERT/python.manifest.template create mode 100755 tensorflow/README.md create mode 100755 tensorflow/ResNet50/.gitignore create mode 100755 tensorflow/ResNet50/Makefile create mode 100755 tensorflow/ResNet50/python.manifest.template diff --git a/tensorflow/BERT/.gitignore b/tensorflow/BERT/.gitignore new file mode 100755 index 0000000..ca0b8b8 --- /dev/null +++ b/tensorflow/BERT/.gitignore @@ -0,0 +1,3 @@ +/models/ +/data/ +/output/ diff --git a/tensorflow/BERT/Makefile b/tensorflow/BERT/Makefile new file mode 100755 index 0000000..0cdf77c --- /dev/null +++ b/tensorflow/BERT/Makefile @@ -0,0 +1,58 @@ +# BERT sample for TensorFlow + +ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine) +SGX_SIGNER_KEY ?= ../../../Pal/src/host/Linux-SGX/signer/enclave-key.pem + +ifeq ($(DEBUG),1) +GRAMINE_LOG_LEVEL = debug +else +GRAMINE_LOG_LEVEL = error +endif + +.PHONY: all +all: python.manifest +ifeq ($(SGX),1) +all: python.manifest.sgx python.sig python.token +endif + +BERT_DATASET = https://storage.googleapis.com/bert_models/2019_05_30/wwm_uncased_L-24_H-1024_A-16.zip +SQUAAD_DATASET = https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json +CHECKPOINTS = https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/bert_large_checkpoints.zip +BERT_FP32_MODEL = https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_4_0/fp32_bert_squad.pb + +.PHONY: collateral +collateral: + test -d models || git clone https://github.com/IntelAI/models.git + mkdir -p data + test -f data/wwm_uncased_L-24_H-1024_A-16.zip || wget $(BERT_DATASET) -P data/ + test -d data/wwm_uncased_L-24_H-1024_A-16 || unzip data/wwm_uncased_L-24_H-1024_A-16.zip -d data + test -f data/wwm_uncased_L-24_H-1024_A-16/dev-v1.1.json || wget $(SQUAAD_DATASET) -P data/wwm_uncased_L-24_H-1024_A-16 + test -f data/bert_large_checkpoints.zip || wget $(CHECKPOINTS) -P data/ + test -d data/bert_large_checkpoints || unzip data/bert_large_checkpoints.zip -d data + test -f data/fp32_bert_squad.pb || wget $(BERT_FP32_MODEL) -P data/ + +python.manifest: python.manifest.template collateral + gramine-manifest \ + -Dlog_level=$(GRAMINE_LOG_LEVEL) \ + -Darch_libdir=$(ARCH_LIBDIR) \ + -Dentrypoint=$(realpath $(shell sh -c "command -v python3")) \ + -Dpythondistpath=$(PYTHONDISTPATH) \ + $< >$@ + +python.manifest.sgx: python.manifest + gramine-sgx-sign \ + --manifest $< \ + --output $@ + +python.sig: python.manifest.sgx + +python.token: python.sig + gramine-sgx-get-token --output $@ --sig $< + +.PHONY: clean +clean: + $(RM) *.manifest *.manifest.sgx *.token *.sig + +.PHONY: distclean +distclean: clean + $(RM) -r models/ data/ diff --git a/tensorflow/BERT/python.manifest.template b/tensorflow/BERT/python.manifest.template new file mode 100755 index 0000000..303f486 --- /dev/null +++ b/tensorflow/BERT/python.manifest.template @@ -0,0 +1,43 @@ +libos.entrypoint = "{{ entrypoint }}" +loader.entrypoint = "file:{{ gramine.libos }}" + +loader.log_level = "{{ log_level }}" + +loader.insecure__use_cmdline_argv = true +loader.insecure__use_host_env = true +loader.insecure__disable_aslr = true + +loader.env.LD_LIBRARY_PATH = "{{ python.stdlib }}/lib:/lib:{{ arch_libdir }}:/usr/lib:/usr/{{ arch_libdir }}" +loader.pal_internal_mem_size = "512M" + +fs.mounts = [ + { path = "/lib", uri = "file:{{ gramine.runtimedir() }}" }, + { path = "/usr", uri = "file:/usr" }, + { type = "tmpfs", path = "/tmp" }, + { path = "{{ arch_libdir }}", uri = "file:{{ arch_libdir }}" }, + { path = "{{ python.stdlib }}", uri = "file:{{ python.stdlib }}" }, + { path = "{{ python.distlib }}", uri = "file:{{ python.distlib }}" }, + { path = "{{ pythondistpath }}", uri = "file:{{ pythondistpath }}" }, +] + +sgx.enclave_size = "32G" +sgx.thread_num = 256 +sgx.preheat_enclave = true +sgx.nonpie_binary = true + +sgx.trusted_files = [ + "file:{{ gramine.runtimedir() }}/", + "file:{{ arch_libdir }}/", + "file:/usr/{{ arch_libdir }}/", + "file:{{ gramine.libos }}", + "file:{{ entrypoint }}", + "file:{{ python.stdlib }}/", + "file:{{ python.distlib }}/", + "file:{{ pythondistpath }}/", + "file:models/", + "file:data/", +] + +sgx.allowed_files = [ + "file:output/", +] diff --git a/tensorflow/README.md b/tensorflow/README.md new file mode 100755 index 0000000..5478c60 --- /dev/null +++ b/tensorflow/README.md @@ -0,0 +1,145 @@ +## Inference on TensorFlow BERT and ResNet50 models + +This directory contains steps and artifacts to run inference with TensorFlow BERT and ResNet50 +sample workloads on Gramine. Specifically, both these examples use pre-trained models to run +inference. + +### Bidirectional Encoder Representations from Transformers (BERT) + +BERT is a method of pre-training language representations and then using that trained model for +downstream NLP tasks like 'question answering'. BERT is an unsupervised, deeply bidirectional system +for pre-training NLP. +In this BERT sample, we use **BERT-Large, Uncased (Whole Word Masking)** model and perform int8 +inference. More details about BERT can be found at https://github.com/google-research/bert. + +### Residual Network (ResNet) + +ResNet50 is a convolutional neural network that is 50 layers deep. +In this ResNet50 (v1.5) sample, we use a pre-trained model and perform int8 inference. +More details about ResNet50 can be found at https://github.com/IntelAI/models/tree/icx-launch-public/benchmarks/image_recognition/tensorflow/resnet50v1_5. + +## Pre-requisites + +- Install unzip. +- Upgrade pip/pip3. +- Install TensorFlow using `pip install intel-tensorflow-avx512==2.4.0`. + +## Build BERT or ResNet50 sample + +- To build the non-SGX version, do `make PYTHONDISTPATH=path_to_python_dist_packages/`. +- To build the SGX version, do `make PYTHONDISTPATH=path_to_python_dist_packages/ SGX=1`. +- Typically, `path_to_python_dist_packages` is `/usr/local/lib/python3.6/dist-packages`, but can +change based on python's installation directory. +- To clean the sample, do `make clean`. +- To clean and remove downloaded models and datasets, do `make distclean`. + +**WARNING:** Building BERT sample downloads about 5GB of data. + +## Run inference on BERT model + +- To run int8 inference on `gramine-sgx` (SGX version): +``` +OMP_NUM_THREADS=36 KMP_AFFINITY=granularity=fine,verbose,compact,1,0 taskset -c 0-35 gramine-sgx \ +./python models/models/language_modeling/tensorflow/bert_large/inference/run_squad.py \ +--init_checkpoint=data/bert_large_checkpoints/model.ckpt-3649 \ +--vocab_file=data/wwm_uncased_L-24_H-1024_A-16/vocab.txt \ +--bert_config_file=data/wwm_uncased_L-24_H-1024_A-16/bert_config.json \ +--predict_file=data/wwm_uncased_L-24_H-1024_A-16/dev-v1.1.json \ +--precision=int8 \ +--output_dir=output/bert-squad-output \ +--predict_batch_size=32 \ +--experimental_gelu=True \ +--optimized_softmax=True \ +--input_graph=data/fp32_bert_squad.pb \ +--do_predict=True --mode=benchmark \ +--inter_op_parallelism_threads=1 \ +--intra_op_parallelism_threads=36 +``` +- To run int8 inference on `gramine-direct` (non-SGX version), replace `gramine-sgx` with +`gramine-direct` in the above command. +- To run int8 inference natively (outside Gramine), replace `gramine-sgx ./python` with +`python3` in the above command. + +## Run inference on ResNet50 model + +- To run inference on `gramine-sgx` (SGX version): +``` +OMP_NUM_THREADS=36 KMP_AFFINITY=granularity=fine,verbose,compact,1,0 taskset -c 0-35 gramine-sgx \ +./python models/models/image_recognition/tensorflow/resnet50v1_5/inference/eval_image_classifier_inference.py \ +--input-graph=resnet50v1_5_int8_pretrained_model.pb \ +--num-inter-threads=1 \ +--num-intra-threads=36 \ +--batch-size=32 \ +--warmup-steps=50 \ +--steps=500 +``` +- To run inference on `gramine-direct` (non-SGX version), replace `gramine-sgx` with +`gramine-direct` in the above command. +- To run inference natively (outside Gramine), replace `gramine-sgx ./python` with +`python3` in the above command. + +## Notes on optimal performance + +Above commands are for a 36-core system. Please set the following options accordingly for optimal +performance: + +- Assuming that X is the number of cores per socket, set `OMP_NUM_THREADS=X`, + `intra_op_parallelism_threads=X` for BERT and `num_intra_threads=X` for ResNet50. +- Specify the whole range of cores available on one of the sockets in `taskset`. +- If hyperthreading is enabled: use `KMP_AFFINITY=granularity=fine,verbose,compact,1,0`. +- If hyperthreading is disabled: use `KMP_AFFINITY=granularity=fine,verbose,compact`. +- Note that `OMP_NUM_THREADS` sets the maximum number of threads to + use for OpenMP parallel regions, and `KMP_AFFINITY` binds OpenMP threads + to physical processing units. +- The options `batch-size`, `warmup-steps` and `steps` can be varied for ResNet50 sample. +- To get the number of cores per socket, do `lscpu | grep 'Core(s) per socket'`. + +## Performance considerations +### CPU frequency scaling + +Linux systems have CPU frequency scaling governor that helps the system to scale the CPU frequency +to achieve best performance or to save power based on the requirement. To achieve the best +performance, please set the CPU frequency scaling governor to `performance` mode. + +```bash +for ((i=0; i<$(nproc); i++)); do + echo 'performance' > /sys/devices/system/cpu/cpu$i/cpufreq/scaling_governor; +done +``` + +### Manifest options for performance + +- Preheat manifest option pre-faults the enclave memory and moves the performance penalty to +Gramine-SGX startup (before the workload starts executing). To use the preheat option, make sure +that `sgx.preheat_enclave = true` is added to the manifest template. + +### Memory allocator libraries + +TCMalloc and mimalloc are memory allocator libraries from Google and Microsoft that can help +improve performance significantly based on the workloads. Only one of these +allocators can be used at the same time. + +#### TCMalloc + +(Please update the binary location and name if different from default.) +- Install tcmalloc: `sudo apt-get install google-perftools` +- Modify the manifest template file: + - Add `loader.env.LD_PRELOAD = "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4"` + - Append below entries to `sgx.trusted_files`: + - `"file:/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4"` + - `"file:/usr/lib/x86_64-linux-gnu/libunwind.so.8"` +- Save the manifest template and rebuild this example. + +#### mimalloc + +(Please update the binary location and name if different from default.) +- Install mimalloc using the steps from https://github.com/microsoft/mimalloc +- Modify the manifest template file: + - Add the `/usr/local` FS mount point: + - `fs.mount.usr_local.type = "chroot"` + - `fs.mount.usr_local.path = "/usr/local"` + - `fs.mount.usr_local.uri = "file:/usr/local"` + - Add `loader.env.LD_PRELOAD = "/usr/local/lib/mimalloc-1.7/libmimalloc.so.1.7"` + - Append below entry to `sgx.trusted_files`: + - `"file:/usr/local/lib/mimalloc-1.7/libmimalloc.so.1.7"` +- Save the manifest template and rebuild this example. diff --git a/tensorflow/ResNet50/.gitignore b/tensorflow/ResNet50/.gitignore new file mode 100755 index 0000000..e3e7bc8 --- /dev/null +++ b/tensorflow/ResNet50/.gitignore @@ -0,0 +1,2 @@ +/models/ +/resnet50v1_5_int8_pretrained_model.pb diff --git a/tensorflow/ResNet50/Makefile b/tensorflow/ResNet50/Makefile new file mode 100755 index 0000000..8dabc2c --- /dev/null +++ b/tensorflow/ResNet50/Makefile @@ -0,0 +1,47 @@ +# ResNet50 sample for TensorFlow + +ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine) +SGX_SIGNER_KEY ?= ../../../Pal/src/host/Linux-SGX/signer/enclave-key.pem + +ifeq ($(DEBUG),1) +GRAMINE_LOG_LEVEL = debug +else +GRAMINE_LOG_LEVEL = error +endif + +.PHONY: all +all: python.manifest +ifeq ($(SGX),1) +all: python.manifest.sgx python.sig python.token +endif + +.PHONY: collateral +collateral: + test -d models || git clone https://github.com/IntelAI/models.git + test -f resnet50v1_5_int8_pretrained_model.pb || wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/resnet50v1_5_int8_pretrained_model.pb + +python.manifest: python.manifest.template collateral + gramine-manifest \ + -Dlog_level=$(GRAMINE_LOG_LEVEL) \ + -Darch_libdir=$(ARCH_LIBDIR) \ + -Dentrypoint=$(realpath $(shell sh -c "command -v python3")) \ + -Dpythondistpath=$(PYTHONDISTPATH) \ + $< >$@ + +python.manifest.sgx: python.manifest + gramine-sgx-sign \ + --manifest $< \ + --output $@ + +python.sig: python.manifest.sgx + +python.token: python.sig + gramine-sgx-get-token --output $@ --sig $< + +.PHONY: clean +clean: + $(RM) *.manifest *.manifest.sgx *.token *.sig + +.PHONY: distclean +distclean: clean + $(RM) -r models/ resnet50v1_5_int8_pretrained_model.pb diff --git a/tensorflow/ResNet50/python.manifest.template b/tensorflow/ResNet50/python.manifest.template new file mode 100755 index 0000000..6ef8223 --- /dev/null +++ b/tensorflow/ResNet50/python.manifest.template @@ -0,0 +1,41 @@ +loader.entrypoint = "file:{{ gramine.libos }}" +libos.entrypoint = "{{ entrypoint }}" + +loader.log_level = "{{ log_level }}" + +loader.insecure__use_cmdline_argv = true +loader.insecure__use_host_env = true +loader.insecure__disable_aslr = true + +loader.env.LD_LIBRARY_PATH = "{{ python.stdlib }}/lib:/lib:{{ arch_libdir }}:/usr/lib:/usr/{{ arch_libdir }}" + +loader.pal_internal_mem_size = "512M" + +fs.mounts = [ + { path = "/lib", uri = "file:{{ gramine.runtimedir() }}" }, + { path = "/usr", uri = "file:/usr" }, + { path = "/bin", uri = "file:/bin" }, + { type = "tmpfs", path = "/tmp" }, + { path = "{{ arch_libdir }}", uri = "file:{{ arch_libdir }}" }, + { path = "{{ python.stdlib }}", uri = "file:{{ python.stdlib }}" }, + { path = "{{ python.distlib }}", uri = "file:{{ python.distlib }}" }, + { path = "{{ pythondistpath }}", uri = "file:{{ pythondistpath }}" }, +] + +sgx.enclave_size = "32G" +sgx.thread_num = 300 +sgx.preheat_enclave = true +sgx.nonpie_binary = true + +sgx.trusted_files = [ + "file:{{ gramine.runtimedir() }}/", + "file:{{ arch_libdir }}/", + "file:/usr/{{ arch_libdir }}/", + "file:{{ gramine.libos }}", + "file:{{ entrypoint }}", + "file:{{ python.stdlib }}/", + "file:{{ python.distlib }}/", + "file:{{ pythondistpath }}/", + "file:models/", + "file:resnet50v1_5_int8_pretrained_model.pb", +] From 7c7bb81afa43452f0ac27644e0126243e2cd8abd Mon Sep 17 00:00:00 2001 From: Satyanaraya Illa Date: Mon, 11 Apr 2022 15:04:03 +0530 Subject: [PATCH 2/2] fixup! Add TensorFlow examples - ResNet50 and BERT models Signed-off-by: Satyanaraya Illa --- tensorflow/BERT/Makefile | 3 ++ tensorflow/BERT/python.manifest.template | 36 ++++++++++++----- tensorflow/ResNet50/Makefile | 3 ++ tensorflow/ResNet50/python.manifest.template | 41 +++++++++++++++----- 4 files changed, 64 insertions(+), 19 deletions(-) diff --git a/tensorflow/BERT/Makefile b/tensorflow/BERT/Makefile index 0cdf77c..d907960 100755 --- a/tensorflow/BERT/Makefile +++ b/tensorflow/BERT/Makefile @@ -40,7 +40,10 @@ python.manifest: python.manifest.template collateral $< >$@ python.manifest.sgx: python.manifest + @test -s $(SGX_SIGNER_KEY) || \ + { echo "SGX signer private key was not found, please specify SGX_SIGNER_KEY!"; exit 1; } gramine-sgx-sign \ + --key $(SGX_SIGNER_KEY) \ --manifest $< \ --output $@ diff --git a/tensorflow/BERT/python.manifest.template b/tensorflow/BERT/python.manifest.template index 303f486..d6adb52 100755 --- a/tensorflow/BERT/python.manifest.template +++ b/tensorflow/BERT/python.manifest.template @@ -10,15 +10,33 @@ loader.insecure__disable_aslr = true loader.env.LD_LIBRARY_PATH = "{{ python.stdlib }}/lib:/lib:{{ arch_libdir }}:/usr/lib:/usr/{{ arch_libdir }}" loader.pal_internal_mem_size = "512M" -fs.mounts = [ - { path = "/lib", uri = "file:{{ gramine.runtimedir() }}" }, - { path = "/usr", uri = "file:/usr" }, - { type = "tmpfs", path = "/tmp" }, - { path = "{{ arch_libdir }}", uri = "file:{{ arch_libdir }}" }, - { path = "{{ python.stdlib }}", uri = "file:{{ python.stdlib }}" }, - { path = "{{ python.distlib }}", uri = "file:{{ python.distlib }}" }, - { path = "{{ pythondistpath }}", uri = "file:{{ pythondistpath }}" }, -] +fs.mount.lib.type = "chroot" +fs.mount.lib.path = "/lib" +fs.mount.lib.uri = "file:{{ gramine.runtimedir() }}" + +fs.mount.lib2.type = "chroot" +fs.mount.lib2.path = "{{ arch_libdir }}" +fs.mount.lib2.uri = "file:{{ arch_libdir }}" + +fs.mount.usr.type = "chroot" +fs.mount.usr.path = "/usr" +fs.mount.usr.uri = "file:/usr" + +fs.mount.pyhome.type = "chroot" +fs.mount.pyhome.path = "{{ python.stdlib }}" +fs.mount.pyhome.uri = "file:{{ python.stdlib }}" + +fs.mount.pydisthome.type = "chroot" +fs.mount.pydisthome.path = "{{ python.distlib }}" +fs.mount.pydisthome.uri = "file:{{ python.distlib }}" + +fs.mount.pydistpath.type = "chroot" +fs.mount.pydistpath.path = "{{ pythondistpath }}" +fs.mount.pydistpath.uri = "file:{{ pythondistpath }}" + +fs.mount.tmp.type = "tmpfs" +fs.mount.tmp.path = "/tmp" +fs.mount.tmp.uri = "file:/tmp" sgx.enclave_size = "32G" sgx.thread_num = 256 diff --git a/tensorflow/ResNet50/Makefile b/tensorflow/ResNet50/Makefile index 8dabc2c..f4a0862 100755 --- a/tensorflow/ResNet50/Makefile +++ b/tensorflow/ResNet50/Makefile @@ -29,7 +29,10 @@ python.manifest: python.manifest.template collateral $< >$@ python.manifest.sgx: python.manifest + @test -s $(SGX_SIGNER_KEY) || \ + { echo "SGX signer private key was not found, please specify SGX_SIGNER_KEY!"; exit 1; } gramine-sgx-sign \ + --key $(SGX_SIGNER_KEY) \ --manifest $< \ --output $@ diff --git a/tensorflow/ResNet50/python.manifest.template b/tensorflow/ResNet50/python.manifest.template index 6ef8223..e80d1ba 100755 --- a/tensorflow/ResNet50/python.manifest.template +++ b/tensorflow/ResNet50/python.manifest.template @@ -11,16 +11,37 @@ loader.env.LD_LIBRARY_PATH = "{{ python.stdlib }}/lib:/lib:{{ arch_libdir }}:/us loader.pal_internal_mem_size = "512M" -fs.mounts = [ - { path = "/lib", uri = "file:{{ gramine.runtimedir() }}" }, - { path = "/usr", uri = "file:/usr" }, - { path = "/bin", uri = "file:/bin" }, - { type = "tmpfs", path = "/tmp" }, - { path = "{{ arch_libdir }}", uri = "file:{{ arch_libdir }}" }, - { path = "{{ python.stdlib }}", uri = "file:{{ python.stdlib }}" }, - { path = "{{ python.distlib }}", uri = "file:{{ python.distlib }}" }, - { path = "{{ pythondistpath }}", uri = "file:{{ pythondistpath }}" }, -] +fs.mount.lib.type = "chroot" +fs.mount.lib.path = "/lib" +fs.mount.lib.uri = "file:{{ gramine.runtimedir() }}" + +fs.mount.lib2.type = "chroot" +fs.mount.lib2.path = "{{ arch_libdir }}" +fs.mount.lib2.uri = "file:{{ arch_libdir }}" + +fs.mount.usr.type = "chroot" +fs.mount.usr.path = "/usr" +fs.mount.usr.uri = "file:/usr" + +fs.mount.bin.type = "chroot" +fs.mount.bin.path = "/bin" +fs.mount.bin.uri = "file:/bin" + +fs.mount.pyhome.type = "chroot" +fs.mount.pyhome.path = "{{ python.stdlib }}" +fs.mount.pyhome.uri = "file:{{ python.stdlib }}" + +fs.mount.pydisthome.type = "chroot" +fs.mount.pydisthome.path = "{{ python.distlib }}" +fs.mount.pydisthome.uri = "file:{{ python.distlib }}" + +fs.mount.pydistpath.type = "chroot" +fs.mount.pydistpath.path = "{{ pythondistpath }}" +fs.mount.pydistpath.uri = "file:{{ pythondistpath }}" + +fs.mount.tmp.type = "tmpfs" +fs.mount.tmp.path = "/tmp" +fs.mount.tmp.uri = "file:/tmp" sgx.enclave_size = "32G" sgx.thread_num = 300