rocal_pybind - remove CuPy for generic pipeline (#238)

* generic pipeline+ clean up cupy * setup update * revert notebook change * clean up * readme update * revert unit test change * remove dlpack for generic * address review comments --------- Co-authored-by: Kiriti Gowda <[email protected]>
ROCm · Dec 6, 2024 · 2d092dd · 2d092dd
1 parent a154881
commit 2d092dd
Show file tree

Hide file tree

Showing 12 changed files with 64 additions and 110 deletions.
diff --git a/docker/rocal-on-ubuntu-20.dockerfile b/docker/rocal-on-ubuntu-20.dockerfile
@@ -38,17 +38,12 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rpp-dev wget libbz2-dev li
         git clone -b v3.21.9 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \
         ./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd ../
 
-ENV CUPY_INSTALL_USE_HIP=1
 ENV ROCM_HOME=/opt/rocm
 RUN DEBIAN_FRONTEND=noninteractive apt-get -y install python3 python3-pip git g++ hipblas hipsparse rocrand hipfft rocfft rocthrust-dev hipcub-dev python3-dev && \
         git clone https://github.com/Tencent/rapidjson.git && cd rapidjson && mkdir build && cd build && \
         cmake ../ && make -j4 && sudo make install && cd ../../ && \
         pip install pytest==7.3.1 && git clone -b v2.11.1 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \
-        cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../ && \
-        pip install numpy==1.24.2 scipy==1.9.3 cython==0.29.* git+https://github.com/ROCm/hipify_torch.git && \
-        env CC=$MPI_HOME/bin/mpicc python -m pip install mpi4py && \
-        git clone -b rocm6.1_internal_testing https://github.com/ROCm/cupy.git && cd cupy && git submodule update --init && \
-        pip install -e . --no-cache-dir -vvvv
+        cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../
 
 # install MIVisionX 
 RUN git clone https://github.com/ROCm/MIVisionX.git && cd MIVisionX && \

diff --git a/docker/rocal-on-ubuntu-22.dockerfile b/docker/rocal-on-ubuntu-22.dockerfile
@@ -40,17 +40,13 @@ RUN apt-get update -y && apt-get -y install autoconf automake libbz2-dev libssl-
 RUN apt-get -y install sqlite3 libsqlite3-dev libtool build-essential
 RUN git clone -b v3.21.9 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \
         ./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd
-ENV CUPY_INSTALL_USE_HIP=1
+
 ENV ROCM_HOME=/opt/rocm
 RUN DEBIAN_FRONTEND=noninteractive apt-get -y install python3 python3-pip git g++ hipblas hipsparse rocrand hipfft rocfft rocthrust-dev hipcub-dev python3-dev && \
         git clone https://github.com/Tencent/rapidjson.git && cd rapidjson && mkdir build && cd build && \
         cmake ../ && make -j4 && sudo make install && cd ../../ && \
         pip install pytest==7.3.1 && git clone -b v2.11.1 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \
-        cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../ && \
-        pip install numpy==1.24.2 scipy==1.9.3 cython==0.29.* git+https://github.com/ROCm/hipify_torch.git && \
-        env CC=$MPI_HOME/bin/mpicc python -m pip install mpi4py && \
-        git clone -b rocm6.1_internal_testing https://github.com/ROCm/cupy.git && cd cupy && git submodule update --init && \
-        pip install -e . --no-cache-dir -vvvv
+        cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../
 
 # Install MIVisionX
 RUN git clone https://github.com/ROCm/MIVisionX && cd MIVisionX && \

diff --git a/docker/rocal-with-tensorflow.dockerfile b/docker/rocal-with-tensorflow.dockerfile
@@ -30,7 +30,6 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rpp-dev wget libbz2-dev li
         git clone -b v3.21.9 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \
         ./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd ../
 
-ENV CUPY_INSTALL_USE_HIP=1
 ENV ROCM_HOME=/opt/rocm
 RUN DEBIAN_FRONTEND=noninteractive apt-get -y install python3 python3-pip git g++ hipblas hipsparse rocrand hipfft rocfft rocthrust-dev hipcub-dev python3-dev && \
         git clone https://github.com/Tencent/rapidjson.git && cd rapidjson && mkdir build && cd build && \

diff --git a/docs/examples/notebooks/decoder_examples.ipynb b/docs/examples/notebooks/decoder_examples.ipynb
@@ -29,7 +29,6 @@
    "outputs": [],
    "source": [
     "from amd.rocal.pipeline import pipeline_def\n",
-    "import cupy as cp\n",
     "from amd.rocal.plugin.generic import ROCALClassificationIterator\n",
     "import amd.rocal.fn as fn\n",
     "import amd.rocal.types as types\n",

diff --git a/rocAL-setup.py b/rocAL-setup.py
@@ -98,7 +98,7 @@ def ERROR_CHECK(waitval):
     else:
         print("\nSTATUS: CPU Backend Install\n")
 
-# get platfrom info
+# get platform info
 platformInfo = platform.platform()
 
 # sudo requirement check
@@ -167,12 +167,12 @@ def ERROR_CHECK(waitval):
 elif "SLES" in os_info_data:
     linuxSystemInstall = 'zypper -n'
     linuxSystemInstall_check = '--no-gpg-checks'
-    platfromInfo = platfromInfo+'-SLES'
+    platformInfo = platformInfo+'-SLES'
     osUpdate = 'refresh'
 elif "Mariner" in os_info_data:
     linuxSystemInstall = 'tdnf -y'
     linuxSystemInstall_check = '--nogpgcheck'
-    platfromInfo = platfromInfo+'-Mariner'
+    platformInfo = platformInfo+'-Mariner'
     osUpdate = 'makecache'
 else:
     print("\nrocAL Setup on "+platformInfo+" is unsupported\n")
@@ -336,11 +336,11 @@ def ERROR_CHECK(waitval):
                         ' '+linuxSystemInstall_check+' install -y '+ rocmRPMPackages[i]))
 
     # rocDecode - TBD: Revert when rocDecode is fully supported on all OS
-    # if "Ubuntu" in platfromInfo:
+    # if "Ubuntu" in platformInfo:
         # for i in range(len(rocdecodeDebianPackages)):
             # ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
                         # ' '+linuxSystemInstall_check+' install -y '+ rocdecodeDebianPackages[i]))
-    # elif "redhat-7" not in platfromInfo:
+    # elif "redhat-7" not in platformInfo:
         #for i in range(len(rocdecodeRPMPackages)):
             # ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
                         # ' '+linuxSystemInstall_check+' install -y '+ rocdecodeRPMPackages[i]))
@@ -379,6 +379,10 @@ def ERROR_CHECK(waitval):
     elif "SLES" in platformInfo:
         ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
                         ' install dlpack-devel'))
+    elif "redhat" in platformInfo:
+        # no package avialable -- using source
+        ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
+                        ' install https://rpmfind.net/linux/opensuse/tumbleweed/repo/oss/x86_64/dlpack-devel-0.8-1.5.x86_64.rpm'))
 
 
     # RapidJSON - Source TBD: Package install of RapidJSON has compile issues - https://github.com/Tencent/rapidjson.git -- master

diff --git a/rocAL_pybind/README.md b/rocAL_pybind/README.md
@@ -12,7 +12,7 @@ written primarily in C/C++ language can be used effectively in Python.
 * CMake Version 3.10 or higher
 * Python 3
 * PIP3 - `sudo apt install python3-pip`
-* [CuPy for rocm](https://github.com/ROCm/cupy)
+* [dlpack](https://github.com/dmlc/dlpack)
 
 ## Install
 

diff --git a/rocAL_pybind/amd/rocal/plugin/generic.py b/rocAL_pybind/amd/rocal/plugin/generic.py
@@ -27,11 +27,6 @@
 import rocal_pybind as b
 import amd.rocal.types as types
 import ctypes
-try:
-    import cupy as cp
-    CUPY_FOUND=True
-except ImportError:
-    CUPY_FOUND=False
 
 class ROCALGenericIterator(object):
     """!Iterator for processing data
@@ -53,10 +48,6 @@ def __init__(self, pipeline, tensor_layout=types.NCHW, reverse_channels=False, m
         self.multiplier = multiplier
         self.offset = offset
         self.device = device
-        if self.device is "gpu" or "cuda":
-            if not CUPY_FOUND:
-                print('info: Import CuPy failed. Falling back to CPU!')
-                self.device = "cpu"
         self.device_id = device_id
         self.reverse_channels = reverse_channels
         self.tensor_dtype = tensor_dtype
@@ -130,46 +121,24 @@ def __next__(self):
             self.output_list = []
             for i in range(len(self.output_tensor_list)):
                 self.dimensions = self.output_tensor_list[i].dimensions()
-                if self.device == "cpu":
-                    self.dtype = self.output_tensor_list[i].dtype()
-                    self.output = np.empty(self.dimensions, dtype=self.dtype)
-                    self.labels = np.empty(self.labels_size, dtype="int32")
-                else:
-                    self.dtype = self.output_tensor_list[i].dtype()
-                    with cp.cuda.Device(device=self.device_id):
-                        self.output = cp.empty(
-                            self.dimensions, dtype=self.dtype)
-                        self.labels = cp.empty(
-                            self.labels_size, dtype="int32")
-
-                if self.device == "cpu":
-                    self.output_tensor_list[i].copy_data(self.output)
-                else:
-                    self.output_tensor_list[i].copy_data(self.output.data.ptr)
+                self.dtype = self.output_tensor_list[i].dtype()
+                self.output = np.empty(self.dimensions, dtype=self.dtype)
+                # returned as numpy always - no ROCM CuPy support available
+                self.output_tensor_list[i].copy_data(self.output)
                 self.output_list.append(self.output)
         else:
             for i in range(len(self.output_tensor_list)):
-                if self.device == "cpu":
-                    self.output_tensor_list[i].copy_data(self.output_list[i])
-                else:
-                    self.output_tensor_list[i].copy_data(
-                        self.output_list[i].data.ptr)
+                self.output_tensor_list[i].copy_data(self.output_list[i])
         if (self.loader._is_external_source_operator):
             self.labels = self.loader.get_image_labels()
-            if self.device == "cpu":
-                self.labels_tensor = self.labels.astype(dtype=np.int_)
-            else:
-                with cp.cuda.Device(device=self.device_id):
-                    self.labels_tensor = self.labels.astype(dtype=cp.int_)
+            self.labels_tensor = self.labels.astype(dtype=np.int_)
             return self.output_list, self.labels_tensor
 
         if self.loader._name == "labelReader":
             if self.loader._one_hot_encoding == True:
-                if self.device == "cpu":
-                    self.loader.get_one_hot_encoded_labels(
+                self.labels = np.empty(self.labels_size, dtype="int32")
+                self.loader.get_one_hot_encoded_labels(
                         self.labels.ctypes.data, self.loader._output_memory_type)
-                else:
-                    self.loader.get_one_hot_encoded_labels(self.labels.data.ptr, self.loader._output_memory_type)
                 self.labels_tensor = self.labels.reshape(
                     -1, self.batch_size, self.loader._num_classes)
             else:
@@ -178,11 +147,7 @@ def __next__(self):
                         for i in range(self.batch_size):
                             draw_patches(output[i], i)
                 self.labels = self.loader.get_image_labels()
-                if self.device == "cpu":
-                    self.labels_tensor = self.labels.astype(dtype=np.int_)
-                else:
-                    with cp.cuda.Device(device=self.device_id):
-                        self.labels_tensor = self.labels.astype(dtype=cp.int_)
+                self.labels_tensor = self.labels.astype(dtype=np.int_)
 
         return self.output_list, self.labels_tensor
 
@@ -201,7 +166,7 @@ def __del__(self):
 
 class ROCALClassificationIterator(ROCALGenericIterator):
     """!ROCAL iterator for classification tasks for generic use case. It returns 2 outputs
-    (data and label) in the form of numpy/cupy Tensor.
+    (data and label) in the form of numpy Tensor.
 
     Calling
 

diff --git a/rocAL_pybind/amd/rocal/plugin/tf.py b/rocAL_pybind/amd/rocal/plugin/tf.py
@@ -107,13 +107,12 @@ def __next__(self):
         if self.loader.rocal_run() != 0:
             raise StopIteration
         self.output_tensor_list = self.loader.get_output_tensors()
-        if self.output_list is None:
-            # Output list used to store pipeline outputs - can support multiple augmentation outputs
-            self.output_list = []
-            for i in range(len(self.output_tensor_list)):
-                # returns tf tensor on gpu/cpu 
-                self.output = tf.experimental.dlpack.from_dlpack(self.output_tensor_list[i].__dlpack__(self.device_id))
-                self.output_list.append(self.output)
+        # Output list used to store pipeline outputs - can support multiple augmentation outputs
+        self.output_list = []
+        for i in range(len(self.output_tensor_list)):
+            # returns tf tensor on gpu/cpu 
+            self.output = tf.experimental.dlpack.from_dlpack(self.output_tensor_list[i].__dlpack__(self.device_id))
+            self.output_list.append(self.output)
 
         if self.loader._name == "TFRecordReaderDetection":
             self.bbox_list = []
@@ -192,7 +191,7 @@ def __len__(self):
 
 class ROCALIterator(ROCALGenericIteratorDetection):
     """!ROCAL iterator for detection and classification tasks for TF reader. It returns 2 or 3 outputs
-    (data and label) or (data , bbox , labels) in the form of numpy or cupy arrays.
+    (data and label) or (data , bbox , labels) in the form of TF tensors.
     Calling
     .. code-block:: python
        ROCALIterator(pipelines, size)
@@ -225,7 +224,7 @@ def __init__(self,
 
 
 class ROCAL_iterator(ROCALGenericImageIterator):
-    """! ROCAL iterator for processing images for TF reader. It returns outputs in the form of numpy or cupy arrays.
+    """! ROCAL iterator for processing images for TF reader. It returns outputs in the form of tf tensors.
 
         @param pipelines            The rocAL pipelines to use for processing data.
         @param size                 The size of the iterator.

diff --git a/tests/python_api/README.md b/tests/python_api/README.md
@@ -1,3 +1,25 @@
+## To test pybind with GPU backend, `dlpack` is required.
+
+### Install dlpack
+
+* Ubuntu:
+
+```
+sudo apt install libdlpack-dev
+```
+
+* SLES:
+
+```
+sudo zypper install dlpack-devel
+```
+
+* Redhat:
+
+```
+sudo yum install https://rpmfind.net/linux/opensuse/tumbleweed/repo/oss/x86_64/dlpack-devel-0.8-1.5.x86_64.rpm
+```
+
 ## Set environmental variables
 
 ``export ROCAL_DATA_PATH=/Absolute/Path/Of/MIVisionX-data/``

diff --git a/tests/python_api/decoder.py b/tests/python_api/decoder.py
@@ -13,7 +13,7 @@
 batch_size = 4
 gpu_id = 0
 
-def show_images(image_batch, device):
+def show_images(image_batch):
     columns = 4
     rows = (batch_size + 1) // (columns)
     #fig = plt.figure(figsize = (32,(32 // columns) * rows))
@@ -23,22 +23,15 @@ def show_images(image_batch, device):
         plt.subplot(gs[j])
         img = image_batch[j]
         plt.axis("off")
-        if device == "cpu":
-            plt.imshow(img)
-        else:
-            try:
-                import cupy as cp
-                plt.imshow(cp.asnumpy(img))
-            except ImportError:
-                pass
+        plt.imshow(img)
     plt.show()
 
 
 def show_pipeline_output(pipe, device):
     pipe.build()
     data_loader = ROCALClassificationIterator(pipe, device=device)
     images = next(iter(data_loader))
-    show_images(images[0][0], device)
+    show_images(images[0][0])
 
 @pipeline_def(seed=seed)
 def image_decoder_pipeline(device="cpu", path=image_dir):

diff --git a/tests/python_api/external_source_reader.py b/tests/python_api/external_source_reader.py
@@ -40,13 +40,7 @@ def main():
     except OSError as error:
         print(error)
 
-    def image_dump(img, idx, device="cpu", mode=0):
-        if device == "gpu":
-            try:
-                import cupy as cp
-                img = cp.asnumpy(img)
-            except ImportError:
-                pass
+    def image_dump(img, idx, mode=0):
         img = img.transpose([1, 2, 0])  # NCHW
         img = (img).astype('uint8')
         if mode!=2:
@@ -114,7 +108,7 @@ def __next__(self):
         print("**************", i, "*******************")
         for img in output_list[0][0]:
             cnt = cnt + 1
-            image_dump(img, cnt, device=device, mode=0)
+            image_dump(img, cnt, mode=0)
 
     ##################### MODE 0 #########################
 
@@ -184,7 +178,7 @@ def __next__(self):
         print("**************", i, "*******************")
         for img in output_list[0][0]:
             cnt = cnt + 1
-            image_dump(img, cnt, device=device, mode=1)
+            image_dump(img, cnt, mode=1)
     ##################### MODE 1 #########################
 
     ##################### MODE 2 #########################
@@ -280,7 +274,7 @@ def __next__(self):
         print("**************", i, "*******************")
         for img in output_list[0][0]:
             cnt = cnt+1
-            image_dump(img, cnt, device=device, mode=2)
+            image_dump(img, cnt, mode=2)
     ##################### MODE 2 #########################
 if __name__ == '__main__':
     main()
diff --git a/tests/python_api/unit_test.py b/tests/python_api/unit_test.py
@@ -44,14 +44,8 @@
 }
 
 
-def draw_patches(img, idx, device, args=None):
+def draw_patches(img, idx, args=None):
     # image is expected as a tensor, bboxes as numpy
-    if device == "gpu":
-        try:
-            import cupy as cp
-            img = cp.asnumpy(img)
-        except ImportError:
-            pass
     if args.fp16:
         img = (img).astype('uint8')
     if not args.color_format:
@@ -67,13 +61,7 @@ def draw_patches(img, idx, device, args=None):
     cv2.imwrite(args.output_file_name + ".png", img,
                 [cv2.IMWRITE_PNG_COMPRESSION, 9])
 
-def dump_meta_data(labels, device, args=None):
-    if device == "gpu":
-        try:
-            import cupy as cp
-            labels = cp.asnumpy(labels)
-        except ImportError:
-            pass
+def dump_meta_data(labels, args=None):
     labels_list = labels.tolist()
     with open(args.output_file_name, 'w') as file:
         for label in labels_list:
@@ -520,9 +508,9 @@ def main():
                     print("**************ends*******************")
                     print("**************", i, "*******************")
                 if args.augmentation_name == "one_hot":
-                    dump_meta_data(labels, rocal_device, args=args)
+                    dump_meta_data(labels, args=args)
                 else:
-                    draw_patches(output_list[j], cnt, rocal_device, args=args)
+                    draw_patches(output_list[j], cnt, args=args)
                     cnt += len(output_list[j])
 
         data_loader.reset()