enable TF remapper optimizer (#1418)

TF supports a remapper optimizer which remaps subgraphs onto more efficient implementations by replacing commonly occuring subgraphs with optimized fused monolithic kernels. However, its support is limited: (1) MatMul + BiasAdd (not Add) + Activation; (2) Float32 (but not float64); (3) Activation is Tanh; (4) MKL is built and used. This commit replaces Add by BiasAdd in the NN. The speed of a single op can be improved by about 20% when TF is using MKL and precision is set to float32. One can find `_MklNativeFusedMatMul` op in the profiler. See also: - https://www.tensorflow.org/guide/graph_optimization - https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/grappler/optimizers/remapper.cc (cherry picked from commit 8f2dc44)
deepmodeling · Jan 17, 2022 · 057e6ab · 057e6ab
1 parent a9d08a7
commit 057e6ab
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/deepmd/utils/network.py b/deepmd/utils/network.py
@@ -56,7 +56,7 @@ def one_layer(inputs,
             w = tf.cast(w, get_precision(mixed_prec['compute_prec']))
             b = tf.cast(b, get_precision(mixed_prec['compute_prec']))
 
-        hidden = tf.matmul(inputs, w) + b
+        hidden = tf.nn.bias_add(tf.matmul(inputs, w), b)
         if activation_fn != None and use_timestep :
             idt_initializer = tf.random_normal_initializer(
                                     stddev=0.001,
@@ -196,7 +196,7 @@ def embedding_net(xx,
         variable_summaries(w, 'matrix_'+str(ii)+name_suffix)
 
         b = tf.get_variable('bias_'+str(ii)+name_suffix, 
-                            [1, outputs_size[ii]], 
+                            [outputs_size[ii]], 
                             precision,
                             b_initializer, 
                             trainable = trainable)
@@ -206,7 +206,7 @@ def embedding_net(xx,
             xx = tf.cast(xx, get_precision(mixed_prec['compute_prec']))
             w  = tf.cast(w,  get_precision(mixed_prec['compute_prec']))
             b  = tf.cast(b,  get_precision(mixed_prec['compute_prec']))
-        hidden = tf.reshape(activation_fn(tf.matmul(xx, w) + b), [-1, outputs_size[ii]])
+        hidden = tf.reshape(activation_fn(tf.nn.bias_add(tf.matmul(xx, w), b)), [-1, outputs_size[ii]])
         if resnet_dt :
             idt_initializer = tf.random_normal_initializer(
                                   stddev=0.001,