Skip to content

Commit

Permalink
rocal_pybind - remove CuPy for generic pipeline (#238)
Browse files Browse the repository at this point in the history
* generic pipeline+ clean up cupy

* setup update

* revert notebook change

* clean up

* readme update

* revert unit test change

* remove dlpack for generic

* address review comments

---------

Co-authored-by: Kiriti Gowda <[email protected]>
  • Loading branch information
LakshmiKumar23 and kiritigowda authored Dec 6, 2024
1 parent a154881 commit 2d092dd
Show file tree
Hide file tree
Showing 12 changed files with 64 additions and 110 deletions.
7 changes: 1 addition & 6 deletions docker/rocal-on-ubuntu-20.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,12 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rpp-dev wget libbz2-dev li
git clone -b v3.21.9 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \
./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd ../

ENV CUPY_INSTALL_USE_HIP=1
ENV ROCM_HOME=/opt/rocm
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install python3 python3-pip git g++ hipblas hipsparse rocrand hipfft rocfft rocthrust-dev hipcub-dev python3-dev && \
git clone https://github.com/Tencent/rapidjson.git && cd rapidjson && mkdir build && cd build && \
cmake ../ && make -j4 && sudo make install && cd ../../ && \
pip install pytest==7.3.1 && git clone -b v2.11.1 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \
cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../ && \
pip install numpy==1.24.2 scipy==1.9.3 cython==0.29.* git+https://github.com/ROCm/hipify_torch.git && \
env CC=$MPI_HOME/bin/mpicc python -m pip install mpi4py && \
git clone -b rocm6.1_internal_testing https://github.com/ROCm/cupy.git && cd cupy && git submodule update --init && \
pip install -e . --no-cache-dir -vvvv
cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../

# install MIVisionX
RUN git clone https://github.com/ROCm/MIVisionX.git && cd MIVisionX && \
Expand Down
8 changes: 2 additions & 6 deletions docker/rocal-on-ubuntu-22.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,13 @@ RUN apt-get update -y && apt-get -y install autoconf automake libbz2-dev libssl-
RUN apt-get -y install sqlite3 libsqlite3-dev libtool build-essential
RUN git clone -b v3.21.9 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \
./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd
ENV CUPY_INSTALL_USE_HIP=1

ENV ROCM_HOME=/opt/rocm
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install python3 python3-pip git g++ hipblas hipsparse rocrand hipfft rocfft rocthrust-dev hipcub-dev python3-dev && \
git clone https://github.com/Tencent/rapidjson.git && cd rapidjson && mkdir build && cd build && \
cmake ../ && make -j4 && sudo make install && cd ../../ && \
pip install pytest==7.3.1 && git clone -b v2.11.1 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \
cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../ && \
pip install numpy==1.24.2 scipy==1.9.3 cython==0.29.* git+https://github.com/ROCm/hipify_torch.git && \
env CC=$MPI_HOME/bin/mpicc python -m pip install mpi4py && \
git clone -b rocm6.1_internal_testing https://github.com/ROCm/cupy.git && cd cupy && git submodule update --init && \
pip install -e . --no-cache-dir -vvvv
cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../

# Install MIVisionX
RUN git clone https://github.com/ROCm/MIVisionX && cd MIVisionX && \
Expand Down
1 change: 0 additions & 1 deletion docker/rocal-with-tensorflow.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rpp-dev wget libbz2-dev li
git clone -b v3.21.9 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \
./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd ../

ENV CUPY_INSTALL_USE_HIP=1
ENV ROCM_HOME=/opt/rocm
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install python3 python3-pip git g++ hipblas hipsparse rocrand hipfft rocfft rocthrust-dev hipcub-dev python3-dev && \
git clone https://github.com/Tencent/rapidjson.git && cd rapidjson && mkdir build && cd build && \
Expand Down
1 change: 0 additions & 1 deletion docs/examples/notebooks/decoder_examples.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
"outputs": [],
"source": [
"from amd.rocal.pipeline import pipeline_def\n",
"import cupy as cp\n",
"from amd.rocal.plugin.generic import ROCALClassificationIterator\n",
"import amd.rocal.fn as fn\n",
"import amd.rocal.types as types\n",
Expand Down
14 changes: 9 additions & 5 deletions rocAL-setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def ERROR_CHECK(waitval):
else:
print("\nSTATUS: CPU Backend Install\n")

# get platfrom info
# get platform info
platformInfo = platform.platform()

# sudo requirement check
Expand Down Expand Up @@ -167,12 +167,12 @@ def ERROR_CHECK(waitval):
elif "SLES" in os_info_data:
linuxSystemInstall = 'zypper -n'
linuxSystemInstall_check = '--no-gpg-checks'
platfromInfo = platfromInfo+'-SLES'
platformInfo = platformInfo+'-SLES'
osUpdate = 'refresh'
elif "Mariner" in os_info_data:
linuxSystemInstall = 'tdnf -y'
linuxSystemInstall_check = '--nogpgcheck'
platfromInfo = platfromInfo+'-Mariner'
platformInfo = platformInfo+'-Mariner'
osUpdate = 'makecache'
else:
print("\nrocAL Setup on "+platformInfo+" is unsupported\n")
Expand Down Expand Up @@ -336,11 +336,11 @@ def ERROR_CHECK(waitval):
' '+linuxSystemInstall_check+' install -y '+ rocmRPMPackages[i]))

# rocDecode - TBD: Revert when rocDecode is fully supported on all OS
# if "Ubuntu" in platfromInfo:
# if "Ubuntu" in platformInfo:
# for i in range(len(rocdecodeDebianPackages)):
# ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
# ' '+linuxSystemInstall_check+' install -y '+ rocdecodeDebianPackages[i]))
# elif "redhat-7" not in platfromInfo:
# elif "redhat-7" not in platformInfo:
#for i in range(len(rocdecodeRPMPackages)):
# ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
# ' '+linuxSystemInstall_check+' install -y '+ rocdecodeRPMPackages[i]))
Expand Down Expand Up @@ -379,6 +379,10 @@ def ERROR_CHECK(waitval):
elif "SLES" in platformInfo:
ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
' install dlpack-devel'))
elif "redhat" in platformInfo:
# no package avialable -- using source
ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
' install https://rpmfind.net/linux/opensuse/tumbleweed/repo/oss/x86_64/dlpack-devel-0.8-1.5.x86_64.rpm'))


# RapidJSON - Source TBD: Package install of RapidJSON has compile issues - https://github.com/Tencent/rapidjson.git -- master
Expand Down
2 changes: 1 addition & 1 deletion rocAL_pybind/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ written primarily in C/C++ language can be used effectively in Python.
* CMake Version 3.10 or higher
* Python 3
* PIP3 - `sudo apt install python3-pip`
* [CuPy for rocm](https://github.com/ROCm/cupy)
* [dlpack](https://github.com/dmlc/dlpack)

## Install

Expand Down
55 changes: 10 additions & 45 deletions rocAL_pybind/amd/rocal/plugin/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@
import rocal_pybind as b
import amd.rocal.types as types
import ctypes
try:
import cupy as cp
CUPY_FOUND=True
except ImportError:
CUPY_FOUND=False

class ROCALGenericIterator(object):
"""!Iterator for processing data
Expand All @@ -53,10 +48,6 @@ def __init__(self, pipeline, tensor_layout=types.NCHW, reverse_channels=False, m
self.multiplier = multiplier
self.offset = offset
self.device = device
if self.device is "gpu" or "cuda":
if not CUPY_FOUND:
print('info: Import CuPy failed. Falling back to CPU!')
self.device = "cpu"
self.device_id = device_id
self.reverse_channels = reverse_channels
self.tensor_dtype = tensor_dtype
Expand Down Expand Up @@ -130,46 +121,24 @@ def __next__(self):
self.output_list = []
for i in range(len(self.output_tensor_list)):
self.dimensions = self.output_tensor_list[i].dimensions()
if self.device == "cpu":
self.dtype = self.output_tensor_list[i].dtype()
self.output = np.empty(self.dimensions, dtype=self.dtype)
self.labels = np.empty(self.labels_size, dtype="int32")
else:
self.dtype = self.output_tensor_list[i].dtype()
with cp.cuda.Device(device=self.device_id):
self.output = cp.empty(
self.dimensions, dtype=self.dtype)
self.labels = cp.empty(
self.labels_size, dtype="int32")

if self.device == "cpu":
self.output_tensor_list[i].copy_data(self.output)
else:
self.output_tensor_list[i].copy_data(self.output.data.ptr)
self.dtype = self.output_tensor_list[i].dtype()
self.output = np.empty(self.dimensions, dtype=self.dtype)
# returned as numpy always - no ROCM CuPy support available
self.output_tensor_list[i].copy_data(self.output)
self.output_list.append(self.output)
else:
for i in range(len(self.output_tensor_list)):
if self.device == "cpu":
self.output_tensor_list[i].copy_data(self.output_list[i])
else:
self.output_tensor_list[i].copy_data(
self.output_list[i].data.ptr)
self.output_tensor_list[i].copy_data(self.output_list[i])
if (self.loader._is_external_source_operator):
self.labels = self.loader.get_image_labels()
if self.device == "cpu":
self.labels_tensor = self.labels.astype(dtype=np.int_)
else:
with cp.cuda.Device(device=self.device_id):
self.labels_tensor = self.labels.astype(dtype=cp.int_)
self.labels_tensor = self.labels.astype(dtype=np.int_)
return self.output_list, self.labels_tensor

if self.loader._name == "labelReader":
if self.loader._one_hot_encoding == True:
if self.device == "cpu":
self.loader.get_one_hot_encoded_labels(
self.labels = np.empty(self.labels_size, dtype="int32")
self.loader.get_one_hot_encoded_labels(
self.labels.ctypes.data, self.loader._output_memory_type)
else:
self.loader.get_one_hot_encoded_labels(self.labels.data.ptr, self.loader._output_memory_type)
self.labels_tensor = self.labels.reshape(
-1, self.batch_size, self.loader._num_classes)
else:
Expand All @@ -178,11 +147,7 @@ def __next__(self):
for i in range(self.batch_size):
draw_patches(output[i], i)
self.labels = self.loader.get_image_labels()
if self.device == "cpu":
self.labels_tensor = self.labels.astype(dtype=np.int_)
else:
with cp.cuda.Device(device=self.device_id):
self.labels_tensor = self.labels.astype(dtype=cp.int_)
self.labels_tensor = self.labels.astype(dtype=np.int_)

return self.output_list, self.labels_tensor

Expand All @@ -201,7 +166,7 @@ def __del__(self):

class ROCALClassificationIterator(ROCALGenericIterator):
"""!ROCAL iterator for classification tasks for generic use case. It returns 2 outputs
(data and label) in the form of numpy/cupy Tensor.
(data and label) in the form of numpy Tensor.
Calling
Expand Down
17 changes: 8 additions & 9 deletions rocAL_pybind/amd/rocal/plugin/tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,12 @@ def __next__(self):
if self.loader.rocal_run() != 0:
raise StopIteration
self.output_tensor_list = self.loader.get_output_tensors()
if self.output_list is None:
# Output list used to store pipeline outputs - can support multiple augmentation outputs
self.output_list = []
for i in range(len(self.output_tensor_list)):
# returns tf tensor on gpu/cpu
self.output = tf.experimental.dlpack.from_dlpack(self.output_tensor_list[i].__dlpack__(self.device_id))
self.output_list.append(self.output)
# Output list used to store pipeline outputs - can support multiple augmentation outputs
self.output_list = []
for i in range(len(self.output_tensor_list)):
# returns tf tensor on gpu/cpu
self.output = tf.experimental.dlpack.from_dlpack(self.output_tensor_list[i].__dlpack__(self.device_id))
self.output_list.append(self.output)

if self.loader._name == "TFRecordReaderDetection":
self.bbox_list = []
Expand Down Expand Up @@ -192,7 +191,7 @@ def __len__(self):

class ROCALIterator(ROCALGenericIteratorDetection):
"""!ROCAL iterator for detection and classification tasks for TF reader. It returns 2 or 3 outputs
(data and label) or (data , bbox , labels) in the form of numpy or cupy arrays.
(data and label) or (data , bbox , labels) in the form of TF tensors.
Calling
.. code-block:: python
ROCALIterator(pipelines, size)
Expand Down Expand Up @@ -225,7 +224,7 @@ def __init__(self,


class ROCAL_iterator(ROCALGenericImageIterator):
"""! ROCAL iterator for processing images for TF reader. It returns outputs in the form of numpy or cupy arrays.
"""! ROCAL iterator for processing images for TF reader. It returns outputs in the form of tf tensors.
@param pipelines The rocAL pipelines to use for processing data.
@param size The size of the iterator.
Expand Down
22 changes: 22 additions & 0 deletions tests/python_api/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
## To test pybind with GPU backend, `dlpack` is required.

### Install dlpack

* Ubuntu:

```
sudo apt install libdlpack-dev
```

* SLES:

```
sudo zypper install dlpack-devel
```

* Redhat:

```
sudo yum install https://rpmfind.net/linux/opensuse/tumbleweed/repo/oss/x86_64/dlpack-devel-0.8-1.5.x86_64.rpm
```

## Set environmental variables

``export ROCAL_DATA_PATH=/Absolute/Path/Of/MIVisionX-data/``
Expand Down
13 changes: 3 additions & 10 deletions tests/python_api/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
batch_size = 4
gpu_id = 0

def show_images(image_batch, device):
def show_images(image_batch):
columns = 4
rows = (batch_size + 1) // (columns)
#fig = plt.figure(figsize = (32,(32 // columns) * rows))
Expand All @@ -23,22 +23,15 @@ def show_images(image_batch, device):
plt.subplot(gs[j])
img = image_batch[j]
plt.axis("off")
if device == "cpu":
plt.imshow(img)
else:
try:
import cupy as cp
plt.imshow(cp.asnumpy(img))
except ImportError:
pass
plt.imshow(img)
plt.show()


def show_pipeline_output(pipe, device):
pipe.build()
data_loader = ROCALClassificationIterator(pipe, device=device)
images = next(iter(data_loader))
show_images(images[0][0], device)
show_images(images[0][0])

@pipeline_def(seed=seed)
def image_decoder_pipeline(device="cpu", path=image_dir):
Expand Down
14 changes: 4 additions & 10 deletions tests/python_api/external_source_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,7 @@ def main():
except OSError as error:
print(error)

def image_dump(img, idx, device="cpu", mode=0):
if device == "gpu":
try:
import cupy as cp
img = cp.asnumpy(img)
except ImportError:
pass
def image_dump(img, idx, mode=0):
img = img.transpose([1, 2, 0]) # NCHW
img = (img).astype('uint8')
if mode!=2:
Expand Down Expand Up @@ -114,7 +108,7 @@ def __next__(self):
print("**************", i, "*******************")
for img in output_list[0][0]:
cnt = cnt + 1
image_dump(img, cnt, device=device, mode=0)
image_dump(img, cnt, mode=0)

##################### MODE 0 #########################

Expand Down Expand Up @@ -184,7 +178,7 @@ def __next__(self):
print("**************", i, "*******************")
for img in output_list[0][0]:
cnt = cnt + 1
image_dump(img, cnt, device=device, mode=1)
image_dump(img, cnt, mode=1)
##################### MODE 1 #########################

##################### MODE 2 #########################
Expand Down Expand Up @@ -280,7 +274,7 @@ def __next__(self):
print("**************", i, "*******************")
for img in output_list[0][0]:
cnt = cnt+1
image_dump(img, cnt, device=device, mode=2)
image_dump(img, cnt, mode=2)
##################### MODE 2 #########################
if __name__ == '__main__':
main()
20 changes: 4 additions & 16 deletions tests/python_api/unit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,8 @@
}


def draw_patches(img, idx, device, args=None):
def draw_patches(img, idx, args=None):
# image is expected as a tensor, bboxes as numpy
if device == "gpu":
try:
import cupy as cp
img = cp.asnumpy(img)
except ImportError:
pass
if args.fp16:
img = (img).astype('uint8')
if not args.color_format:
Expand All @@ -67,13 +61,7 @@ def draw_patches(img, idx, device, args=None):
cv2.imwrite(args.output_file_name + ".png", img,
[cv2.IMWRITE_PNG_COMPRESSION, 9])

def dump_meta_data(labels, device, args=None):
if device == "gpu":
try:
import cupy as cp
labels = cp.asnumpy(labels)
except ImportError:
pass
def dump_meta_data(labels, args=None):
labels_list = labels.tolist()
with open(args.output_file_name, 'w') as file:
for label in labels_list:
Expand Down Expand Up @@ -520,9 +508,9 @@ def main():
print("**************ends*******************")
print("**************", i, "*******************")
if args.augmentation_name == "one_hot":
dump_meta_data(labels, rocal_device, args=args)
dump_meta_data(labels, args=args)
else:
draw_patches(output_list[j], cnt, rocal_device, args=args)
draw_patches(output_list[j], cnt, args=args)
cnt += len(output_list[j])

data_loader.reset()
Expand Down

0 comments on commit 2d092dd

Please sign in to comment.