Merge branch 'master' into fe_convert_partially_fix_in_doc

slyalin · Aug 10, 2023 · c4126dc · c4126dc
2 parents c557f00 + 59872ee
commit c4126dc
Show file tree

Hide file tree

Showing 47 changed files with 3,023 additions and 878 deletions.
diff --git a/.ci/azure/linux.yml b/.ci/azure/linux.yml
@@ -551,6 +551,15 @@ jobs:
       TEST_DEVICE: CPU
     displayName: 'TensorFlow Lite Layer Tests - TFL FE'
 
+  - script: |
+      set -e
+      python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt
+      $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/ovc_python_api_tests/  --junitxml=./TEST-test_ovc_convert.xmlTEST
+    env:
+      PYTHONPATH: $(LAYER_TESTS_DIR)
+      TEST_DEVICE: CPU
+    displayName: 'OVC Python API Tests'
+
   - script: |
       set -e
       python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt

diff --git a/src/bindings/python/src/openvino/__init__.py b/src/bindings/python/src/openvino/__init__.py
@@ -57,6 +57,6 @@
 # Tools
 try:
     # Model Conversion API - ovc should reside in the main namespace
-    from openvino.tools.ovc import convert_model, InputCutInfo
+    from openvino.tools.ovc import convert_model
 except ImportError:
     pass
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py
@@ -7,7 +7,7 @@
 from openvino.frontend.pytorch.py_pytorch_frontend import _FrontEndPytorchDecoder as Decoder
 from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType
 from openvino.runtime import op, PartialShape, Type as OVType, OVAny
-from openvino.frontend.pytorch.utils import ivalue_to_constant, get_value_from_getattr, pt_to_ov_type_map
+from openvino.frontend.pytorch.utils import ivalue_to_constant, get_value_from_getattr, pt_to_ov_type_map, torch_tensor_to_ov_const
 from openvino.runtime import opset11 as ops
 
 import typing
@@ -29,11 +29,12 @@ def forward(self, {input_sign}):
 
 
 class TorchScriptPythonDecoder (Decoder):
-    def __init__(self, pt_module, graph_element=None, example_input=None, alias_db=None):
+    def __init__(self, pt_module, graph_element=None, example_input=None, alias_db=None, shared_memory=True):
         Decoder.__init__(self)
         # We store every decoder created by this decoder so that all them are not deleted until the first decoder is deleted
         self.m_decoders = []
         self._input_signature = None
+        self._shared_memory = shared_memory
         if graph_element is None:
             try:
                 pt_module = self._get_scripted_model(pt_module, example_input)
@@ -43,10 +44,9 @@ def __init__(self, pt_module, graph_element=None, example_input=None, alias_db=N
                     help_msg = ""
                 else:
                     msg = "scripting"
-                    help_msg = "Tracing sometimes provide better results, "
-                    "please provide valid 'example_input' argument. "
+                    help_msg = "\nTracing sometimes provide better results, please provide valid 'example_input' argument. "
                 raise RuntimeError(
-                    f"Couldn't get TorchScript module by {msg}. {help_msg}"
+                    f"Couldn't get TorchScript module by {msg}. With exception:\n{e}\n {help_msg}"
                     "You can also provide TorchScript module that you obtained"
                     " yourself, please refer to PyTorch documentation: "
                     "https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html.")
@@ -160,8 +160,11 @@ def prepare_example_inputs_and_model(inputs, input_params, model):
                 except Exception:
                     try:
                         scripted = torch.jit.script(pt_module)
-                    except Exception:
-                        scripted = torch.jit.trace(pt_module, **input_parameters, strict=False)
+                    except Exception as se:
+                        try:
+                            scripted = torch.jit.trace(pt_module, **input_parameters, strict=False)
+                        except Exception as te:
+                            raise f"Tracing failed with exception {te}\nScripting failed with exception: {se}"
             skip_freeze = False
             for n in scripted.inlined_graph.nodes():
                 # TODO: switch off freezing for all traced models
@@ -283,7 +286,7 @@ def get_subgraph_size(self) -> int:
     def visit_subgraph(self, node_visitor) -> None:
         # make sure topological order is satisfied
         for node in self.graph_element.nodes():
-            decoder = TorchScriptPythonDecoder(self.pt_module, node, alias_db=self.alias_db)
+            decoder = TorchScriptPythonDecoder(self.pt_module, node, alias_db=self.alias_db, shared_memory=self._shared_memory)
             self.m_decoders.append(decoder)
             node_visitor(decoder)
 
@@ -299,7 +302,7 @@ def get_subgraphs(self) -> list:
         return list(self.graph_element.blocks())
 
     def get_subgraph_decoder(self, index: int):
-        decoder = TorchScriptPythonDecoder(self.pt_module, self.get_subgraphs()[index], alias_db=self.alias_db)
+        decoder = TorchScriptPythonDecoder(self.pt_module, self.get_subgraphs()[index], alias_db=self.alias_db, shared_memory=self._shared_memory)
         self.m_decoders.append(decoder)
         return decoder
 
@@ -336,7 +339,7 @@ def mark_node(self, node):
         return node
 
     @staticmethod
-    def convert_quantized_tensor(qtensor: torch.Tensor):
+    def convert_quantized_tensor(qtensor: torch.Tensor, shared_memory: bool):
         # need to represent as Constant(u8) -> Convert(f32) -> Subtract(zero_point) -> Multiply (scale)
         qscheme = qtensor.qscheme()  # torch.per_channel_affine (per_tensor)
         if qscheme == torch.per_channel_affine:
@@ -349,8 +352,8 @@ def convert_quantized_tensor(qtensor: torch.Tensor):
             new_shape[axis] = -1
             zero_point_bc = np.reshape(zero_point, new_shape)
             scale_bc = np.reshape(scale, new_shape)
-
-            int8_const = op.Constant(int8_tensor.numpy())
+            
+            int8_const = torch_tensor_to_ov_const(int8_tensor, shared_memory=shared_memory)
             convert = ops.convert(int8_const, np.float32)
             sub = ops.subtract(convert, zero_point_bc)
             return ops.multiply(sub, scale_bc).outputs()
@@ -359,7 +362,7 @@ def convert_quantized_tensor(qtensor: torch.Tensor):
             scale = np.float32(qtensor.q_scale())
             zero_point = np.float32(qtensor.q_zero_point())
 
-            int8_const = op.Constant(int8_tensor.numpy())
+            int8_const = torch_tensor_to_ov_const(int8_tensor, shared_memory=shared_memory)
             convert = ops.convert(int8_const, np.float32)
             sub = ops.subtract(convert, zero_point)
             return ops.multiply(sub, scale).outputs()
@@ -372,7 +375,7 @@ def try_decode_get_attr(self):
             # We assume this is __torch__.torch.classes.quantized.Conv2dPackedParamsBase or __torch__.torch.classes.quantized.LinearPackedParamsBase
             # TODO: but can be anything. Figure a better way to distinguish
             weight, bias = pt_value.unpack()
-            res = self.convert_quantized_tensor(weight)
+            res = self.convert_quantized_tensor(weight, self._shared_memory)
             if isinstance(bias, torch.Tensor):
                 res += ivalue_to_constant(bias)
             else:
@@ -383,12 +386,15 @@ def try_decode_get_attr(self):
                 padding = pt_value.padding()
                 dilation = pt_value.dilation()
                 groups = pt_value.groups()
-                res += ivalue_to_constant(stride) + ivalue_to_constant(padding) + ivalue_to_constant(dilation) + ivalue_to_constant(groups)
+                res += ivalue_to_constant(stride, shared_memory=self._shared_memory)
+                res += ivalue_to_constant(padding, shared_memory=self._shared_memory)
+                res += ivalue_to_constant(dilation, shared_memory=self._shared_memory)
+                res += ivalue_to_constant(groups, shared_memory=self._shared_memory)
             except:
                 pass
             return res
         elif not isinstance(pt_value, (torch.jit.ScriptModule, torch.jit.TracedModule)):
-            return ivalue_to_constant(pt_value)
+            return ivalue_to_constant(pt_value, shared_memory=self._shared_memory)
         else:
             return []
 
@@ -400,10 +406,10 @@ def as_constant(self):
         pt_value = self._raw_output(0)
         pt_type = pt_value.type()
         if isinstance(pt_type, torch.TensorType):
-            return ivalue_to_constant(pt_value.toIValue())
+            return ivalue_to_constant(pt_value.toIValue(), shared_memory=self._shared_memory)
         if isinstance(pt_type, torch.ListType):
             return self._as_constant_list(pt_value)
-        return ivalue_to_constant(pt_value.toIValue())
+        return ivalue_to_constant(pt_value.toIValue(), shared_memory=self._shared_memory)
 
     def as_string(self):
         if self.get_op_type() == "prim::Constant":

diff --git a/src/bindings/python/src/openvino/frontend/pytorch/utils.py b/src/bindings/python/src/openvino/frontend/pytorch/utils.py
@@ -55,7 +55,26 @@ def get_type_from_py_type(value):
     return OVType.dynamic
 
 
-def ivalue_to_constant(ivalue):
+def torch_tensor_to_ov_const(torch_t: torch.Tensor, shared_memory=True):
+    torch_t = torch_t.to(memory_format=torch.contiguous_format)
+    if torch_t.dtype == torch.bfloat16:
+        # reinterpret bfloat16 data as float16 to allow conversion to numpy
+        torch_t = torch_t.view(torch.float16)
+        narr = torch_t.numpy(force=True)
+        if not narr.flags['C_CONTIGUOUS']:
+            narr = np.ascontiguousarray(narr)
+        # TODO: this tensor doesn't share memory with initial tensor
+        tensor = Tensor(narr, torch_t.shape, OVType.bf16)
+        ov_const = op.Constant(tensor, shared_memory=shared_memory)
+    else:
+        narr = torch_t.numpy(force=True)
+        if not narr.flags['C_CONTIGUOUS']:
+            narr = np.ascontiguousarray(narr)
+        ov_const = op.Constant(narr, shared_memory=shared_memory)
+    return ov_const
+
+
+def ivalue_to_constant(ivalue, shared_memory=True):
     ov_type = get_type_from_py_type(ivalue)
     if ov_type.is_static():
         return op.Constant(ov_type, Shape([]), [ivalue]).outputs()
@@ -67,22 +86,7 @@ def ivalue_to_constant(ivalue):
         return op.Constant(ov_type, Shape([len(ivalue)]), ivalue).outputs()
 
     if isinstance(ivalue, torch.Tensor):
-        ivalue = ivalue.to(memory_format=torch.contiguous_format)
-        if ivalue.dtype == torch.bfloat16:
-            # reinterpret bfloat16 data as float16 to allow conversion to numpy
-            ivalue = ivalue.view(torch.float16)
-            narr = ivalue.numpy(force=True)
-            if not narr.flags['C_CONTIGUOUS']:
-                narr = np.ascontiguousarray(narr)
-            # TODO: this tensor doesn't share memory with initial tensor
-            tensor = Tensor(narr, ivalue.shape, OVType.bf16)
-            ov_const = op.Constant(tensor, shared_memory=True)
-        else:
-            narr = ivalue.numpy(force=True)
-            if not narr.flags['C_CONTIGUOUS']:
-                narr = np.ascontiguousarray(narr)
-            ov_const = op.Constant(narr, shared_memory=True)
-        return ov_const.outputs()
+        return torch_tensor_to_ov_const(ivalue, shared_memory=shared_memory).outputs()
     return None
 
 def get_value_from_getattr(getattr_node, self_module):

diff --git a/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp b/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp
@@ -66,8 +66,10 @@ static bool simplify_gather(shared_ptr<Node> node) {
             if (!constant_indices)
                 return false;
             // case_3: if input_shape is (1,3,5,5) and axis = 0, indices = 0, then gather is just a Squeeze
+            const auto constant_indices_size = constant_indices->get_output_shape(0).size();
             const auto const_indices = constant_indices->cast_vector<int64_t>();
-            if (data.get_shape()[axis] == 1 && const_indices.size() == 1 && const_indices[0] == 0) {
+            if (data.get_shape()[axis] == 1 && (constant_indices_size == 0 || constant_indices_size == 1) &&
+                const_indices[0] == 0) {
                 auto squeeze = std::make_shared<ov::op::v0::Squeeze>(gather->input_value(0), gather->input_value(2));
                 squeeze->set_friendly_name(gather->get_friendly_name());
                 ov::copy_runtime_info(gather, squeeze);

diff --git a/src/common/transformations/tests/common_optimizations/nop_elimination.cpp b/src/common/transformations/tests/common_optimizations/nop_elimination.cpp
@@ -1339,6 +1339,34 @@ TEST(nop_elimination, gather_to_squeeze) {
     run_and_check(func_axis_3);
 }
 
+TEST(nop_elimination, not_gather_to_squeeze_with_vector_indices) {
+    auto generate_func = [](int64_t gather_axis) {
+        ov::Shape shape{3, 3, 4, 4};
+        shape[gather_axis] = 1;
+        auto arg = std::make_shared<op::Parameter>(element::f32, shape);
+        auto indices = op::Constant::create(element::i64, Shape{1, 1}, vector<int64_t>{0});
+        auto axis = op::Constant::create(element::i64, Shape{}, vector<int64_t>{gather_axis});
+        auto gather = std::make_shared<op::v8::Gather>(arg, indices, axis);
+        return std::make_shared<ov::Model>(NodeVector{gather}, ParameterVector{arg});
+    };
+
+    auto func_axis_0 = generate_func(0);
+    auto func_axis_1 = generate_func(1);
+    auto func_axis_2 = generate_func(2);
+    auto func_axis_3 = generate_func(3);
+    pass::Manager pass_manager;
+    pass_manager.register_pass<ov::pass::NopElimination>();
+    auto run_and_check = [&](std::shared_ptr<ov::Model>& func) {
+        pass_manager.run_passes(func);
+        EXPECT_EQ(count_ops_of_type<op::v8::Gather>(func), 1);
+        EXPECT_EQ(count_ops_of_type<op::v0::Squeeze>(func), 0);
+    };
+    run_and_check(func_axis_0);
+    run_and_check(func_axis_1);
+    run_and_check(func_axis_2);
+    run_and_check(func_axis_3);
+}
+
 TEST_F(TransformationTestsF, Nopv1Broadcast) {
     {
         auto data = std::make_shared<opset10::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});

diff --git a/tests/layer_tests/common/mo_convert_test_class.py b/tests/layer_tests/common/mo_convert_test_class.py
@@ -53,7 +53,7 @@ def _test(self, temp_dir, test_params, ref_params):
         ir_ref = core.read_model(Path(temp_dir, 'model_ref.xml'))
 
         flag, msg = compare_functions(ir_test, ir_ref)
-        assert flag, '\n'.join(msg)
+        assert flag, msg
 
     def _test_by_ref_graph(self, temp_dir, test_params, ref_graph, compare_tensor_names=True, compare_layout=True):
         """