From 4058954ddb64ce47c25621abba13ef288cf9460f Mon Sep 17 00:00:00 2001 From: M Shehtab Zaman Date: Mon, 4 Mar 2024 16:10:06 -0800 Subject: [PATCH] Add data util to mask data --- .../FLASK/Transformer/datasets/pretokenize/data_utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 applications/FLASK/Transformer/datasets/pretokenize/data_utils.py diff --git a/applications/FLASK/Transformer/datasets/pretokenize/data_utils.py b/applications/FLASK/Transformer/datasets/pretokenize/data_utils.py new file mode 100644 index 00000000000..fb901b81261 --- /dev/null +++ b/applications/FLASK/Transformer/datasets/pretokenize/data_utils.py @@ -0,0 +1,9 @@ +import numpy as np + + +def random_zero_array(arr, p, mask_token): + """ + Randomly zero out elements of an array with probability p + """ + mask = np.random.choice([0, 1], size=arr.shape, p=[p, 1 - p]) + return arr * mask + mask_token * (1 - mask)