diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..ebe747b --- /dev/null +++ b/.clang-format @@ -0,0 +1,28 @@ +BasedOnStyle: Google +IndentWidth: 4 +UseTab: Never +ColumnLimit: 120 + +Language: Cpp +Standard: Cpp11 + +AccessModifierOffset: -4 +AlignConsecutiveMacros: true +AllowAllArgumentsOnNextLine: false +AllowAllConstructorInitializersOnNextLine: false +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: Empty +AllowShortLoopsOnASingleLine: false +AlwaysBreakBeforeMultilineStrings: false +BinPackArguments: false +BinPackParameters: false +CommentPragmas: '^#' +DerivePointerAlignment: false +FixNamespaceComments: true +IndentCaseLabels: false +IndentPPDirectives: AfterHash +ForEachMacros: + - foreach + - FOREACH_CHILD diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index 27e6f10..eb5b230 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -44,7 +44,7 @@ jobs: sudo systemctl daemon-reload sudo systemctl restart docker cd ./ivsr_ffmpeg_plugin - ./build_docker.sh --enable_ov_patch false + ./build_docker.sh --ov_version 2024.5s - name: Check disk space run: df -h @@ -55,7 +55,7 @@ jobs: TRIVY_JAVA_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-java-db with: scan-type: 'image' - image-ref: 'ffmpeg_ivsr_sdk_ov2022.3' + image-ref: 'ffmpeg_ivsr_sdk_ubuntu22_ov2024.5s' #format: 'template' #template: '@/contrib/sarif.tpl' security-checks: vuln diff --git a/.github/workflows/ubuntu-build-docker.yml b/.github/workflows/ubuntu-build-docker.yml index 6db9e9c..26b1d8f 100644 --- a/.github/workflows/ubuntu-build-docker.yml +++ b/.github/workflows/ubuntu-build-docker.yml @@ -28,4 +28,4 @@ jobs: sudo systemctl daemon-reload sudo systemctl restart docker cd ./ivsr_ffmpeg_plugin - ./build_docker.sh --enable_ov_patch true --ov_version 2022.3 + ./build_docker.sh --ov_version 2024.5s --os_version rockylinux9 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4392efc --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +bin +build +lib + diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 2f57ecf..0000000 --- a/.gitmodules +++ /dev/null @@ -1,8 +0,0 @@ -[submodule "ivsr_gpu_opt/based_on_openvino_2022.1/openvino"] - path = ivsr_gpu_opt/based_on_openvino_2022.1/openvino - url = https://github.com/openvinotoolkit/openvino.git - branch = releases/2022/1 -[submodule "ffmpeg"] - path = ivsr_ffmpeg_plugin/ffmpeg - url = https://github.com/ffmpeg/ffmpeg.git - branch = release/5.1 diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 0000000..6a43914 --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2023 Intel Corporation + +# global-owner +* @linxie47 @wangjingz @xiaoxial @djie1 diff --git a/README.md b/README.md index e45b6a6..48a8542 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,19 @@ -## iVSR + +
+ +
+ +
[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/OpenVisualCloud/iVSR/badge)](https://api.securityscorecards.dev/projects/github.com/OpenVisualCloud/iVSR) [![OpenSSF Best Practices](https://bestpractices.coreinfrastructure.org/projects/9795/badge)](https://bestpractices.coreinfrastructure.org/projects/9795) [![Dependency Review](https://github.com/OpenVisualCloud/iVSR/actions/workflows/dependency-review.yml/badge.svg)](https://github.com/OpenVisualCloud/iVSR/actions/workflows/dependency-review.yml) -[![CodeQL](https://github.com/OpenVisualCloud/iVSR/actions/workflows/codeql.yml/badge.svg)](https://github.com/OpenVisualCloud/iVSR/actions/workflows/codeql.yml) +[![CodeQL](https://github.com/OpenVisualCloud/iVSR/actions/workflows/codeql.yml/badge.svg)](https://github.com/OpenVisualCloud/iVSR/actions/workflows/codeql.yml)
[![License](https://img.shields.io/badge/license-BSD_3_Clause-stable.svg)](https://github.com/OpenVisualCloud/iVSR/blob/master/LICENSE.md) [![Contributions](https://img.shields.io/badge/contributions-welcome-blue.svg)](https://github.com/OpenVisualCloud/iVSR/wiki) [![Ubuntu-DockerFile-Build](https://github.com/OpenVisualCloud/iVSR/actions/workflows/ubuntu-build-docker.yml/badge.svg)](https://github.com/OpenVisualCloud/iVSR/actions/workflows/ubuntu-build-docker.yml) [![Trivy](https://github.com/OpenVisualCloud/iVSR/actions/workflows/trivy.yml/badge.svg)](https://github.com/OpenVisualCloud/iVSR/actions/workflows/trivy.yml) +
# Contents Overview 1. [Overview of iVSR](#1-overview-of-ivsr) @@ -16,10 +22,10 @@ - [iVSR Components](#13-ivsr-components) - [Capabilities of iVSR](#14-capabilities-of-ivsr) 2. [Setup iVSR env on linux](#2-setup-ivsr-env-on-linux) - - [Install GPU in kernel space ](#21-optional-install-gpu-kernel-packages) + - [Install GPU kernel packages(Optional)](#21-optional-install-gpu-kernel-packages) - [Install dependencies and build iVSR manually](#22-install-dependencies-and-build-ivsr-manually) - [Install dependencies and build iVSR by scripts](#23-install-dependencies-and-build-ivsr-by-scripts) - - [Install dependencies and build iVSR by Docker file](#24-install-dependencies-and-build-ivsr-by-docker-file) + - [Install dependencies and build iVSR by Dockerfile](#24-install-dependencies-and-build-ivsr-by-dockerfile) 3. [How to use iVSR](#3-how-to-use-ivsr) - [Run with iVSR SDK sample](#31-run-with-ivsr-sdk-sample) - [Run with FFmpeg](#32-run-with-ffmpeg) @@ -69,36 +75,42 @@ Currently, iVSR offers two AI media processing functionalities: Video Super Reso ### 1.4.1 Video Super Resolution (VSR) Video Super Resolution (VSR) is a technique extensively employed in the AI media enhancement domain to upscale low-resolution videos to high-resolution. iVSR supports `Enhanced BasicVSR`, `Enhanced EDSR`, `TSENet`, and has the capability to be extended to support additional models. -- #### i. Enhanced BasicVSR - `BasicVSR` is a publicly available AI-based VSR algorithm. For more details on the public `BasicVSR`, please refer to this [paper](https://arxiv.org/pdf/2012.02181.pdf).
- We have improved the public model to attain superior visual quality and reduced computational complexity, named `Enhanced BasicVSR`. The performance of the `Enhanced BasicVSR` model inference has also been optimized for Intel GPUs. Please note that this optimization is specific to OpenVINO 2022.3. Therefore, the Enhanced BasicVSR model only works with OpenVINO 2022.3 with the applied patches. - -- #### ii. Enhanced EDSR - `EDSR` is another publicly available AI-based single image SR algorithm. For more details on the public EDSR, please refer to this [paper](https://arxiv.org/pdf/1707.02921.pdf) +- #### i. Enhanced BasicVSR + `BasicVSR` is a publicly available AI-based VSR algorithm. For more details on the public `BasicVSR`, please refer to this [paper](https://arxiv.org/pdf/2012.02181.pdf).

+ We have improved the public model to attain superior visual quality and reduced computational complexity, named `Enhanced BasicVSR`. The performance of the `Enhanced BasicVSR` model inference has also been optimized for Intel GPUs. Please note that this optimization is specific to OpenVINO 2022.3. Therefore, the Enhanced BasicVSR model only works with OpenVINO 2022.3 with the applied patches.

+ The input shape of this model is `[1, (channels)3, (frames)3, H, W]`, and the output shape is `[1, (channels)3, (frames)3, 2xH, 2xW]`. - We have improved the public `EDSR` model to reduce the computational complexity by over 79% compared to Enhanced BasicVSR, while maintaining similar visual quality, named `Enhanced EDSR`. +- #### ii. Enhanced EDSR + `EDSR` is another publicly available AI-based single image SR algorithm. For more details on the public EDSR, please refer to this [paper](https://arxiv.org/pdf/1707.02921.pdf)

+ We have improved the public `EDSR` model to reduce the computational complexity by over 79% compared to Enhanced BasicVSR, while maintaining similar visual quality, named `Enhanced EDSR`.

+ The input shape of this model is `[1, (channels)3, H, W]`, and the output shape is `[1, (channels)3, 2xH, 2xW]`. - #### iii. TSENet - `TSENet` is one multi-frame SR algorithm derived from [ETDS](https://github.com/ECNUSR/ETDS).
- We provide a preview version of the feature to support this model in the SDK and its plugin. Please contact your Intel representative to obtain the model package. + `TSENet` is one multi-frame SR algorithm derived from [ETDS](https://github.com/ECNUSR/ETDS).

+ We provide a preview version of the feature to support this model in the SDK and its plugin. Please contact your Intel representative to obtain the model package.

+ The input shape of this model is `[1, (channels * frames)9, H, W]`, and the output shape is `[1, (channels)3, 2xH, 2xW]`. For each inference, the input data is the `(n-1)th`, `(n)th`, and `(n+1)th` frames combined. The output data is the `(N)th` frame. For the first frame, the input data is `1st`, `1st`, `2nd` frames combined. For the last frame, the input data is the `(n-1)th`, `(n)th`, `(n)th` frames combined.
### 1.4.2. Smart Video Processing (SVP) -`SVP` is an AI-based video prefilter that enhances the perceptual rate-distortion in video encoding. With `SVP`, the encoded video streams maintain the same visual quality while reducing bandwidth, as measured by common video quality metrics (such as VMAF and (MS-)SSIM) and human perception. +`SVP` is an AI-based video prefilter that enhances the perceptual rate-distortion in video encoding. With `SVP`, the encoded video streams maintain the same visual quality while reducing bandwidth.
+ +Two SVP model variances are provided. `SVP-Basic` model is one efficiency-oriented designed model, it preserves fidelity while reducing the encoded bitrate. Modifications to images/video by SVP-Basic pre-processing cannot be perceived by human eyes while they can be measured by no to minor BD-rates degradation if it’s measured by SSIM or MS-SSIM metrics. SVP-Basic model is adaptive to almost all video scenarios, including live sport, live gaming, livestream sales, VOD, video conference, video surveillance, and 5G video ring.
+`SVP-SE` model is designed for subjective video quality preservation with up to 50% bitrate saving. It targets human eyes plausible enhancement, reduces complex details like human-eyes insensitive patterns and noise; hence it can’t be evaluated by popular full-reference visual quality metrics including PSNR/SSIM/VMAF/etc. It improves the visibility and quality of visuals, making them more vivid and appealing to viewers, so it’s widely used in various industries, including entertainment, media and advertising, to enhance the visual experience and attract audience attention.

+The input and output shape are `[1, (channels)3, H, W]` for RGB based model and `[1, (channels)1, H, W]` for Y based model.
# 2. Setup iVSR env on linux The software was validated on: - Intel Xeon hardware platform - (Optional) Intel Data Center GPU Flex 170(*aka* ATS-M1 150W) -- Host OS: Linux based OS (Ubuntu 22.04) -- Docker OS: Ubuntu 22.04 -- OpenVINO: [2022.3](https://github.com/openvinotoolkit/openvino/tree/2022.3.0) or [2023.2](https://github.com/openvinotoolkit/openvino/tree/2023.2.0) +- Host OS: Linux based OS (Ubuntu 22.04 or Rocky Linux 9.3) +- Docker OS: Ubuntu 22.04 or Rocky Linux 9.3 +- OpenVINO: [2022.3](https://github.com/openvinotoolkit/openvino/tree/2022.3.0) or [2023.2](https://github.com/openvinotoolkit/openvino/tree/2023.2.0) or [2024.5](https://github.com/openvinotoolkit/openvino/tree/2024.5.0) - FFmpeg: [n6.1](https://github.com/FFmpeg/FFmpeg/tree/n6.1) Building iVSR requires the installation of the GPU driver(optional), OpenCV, OpenVINO, and FFmpeg.
We provide **three** ways to install requirements and build iVSR SDK & iVSR FFmpeg plugin:
1. [Install dependencies and build iVSR manually](#22-install-dependencies-and-build-ivsr-manually)
2. [Install dependencies and build iVSR by scripts](#23-install-dependencies-and-build-ivsr-by-scripts)
-3. [Install dependencies and build iVSR by Docker file](#24-install-dependencies-and-build-ivsr-by-docker-file)
+3. [Install dependencies and build iVSR by Dockerfile](#24-install-dependencies-and-build-ivsr-by-dockerfile)
Note that to run inference on a **GPU**, it is necessary to have **kernel packages** installed on the bare metal system beforehand. See [Install GPU kernel packages ](#21-optional-install-gpu-kernel-packages) for details.
@@ -107,101 +119,29 @@ Refer to this [instruction](https://dgpu-docs.intel.com/driver/installation.html ## 2.2 Install dependencies and build iVSR manually -### 2.2.1 (Optional) Install software for Intel® Data Center GPU Flex Series -To facilitate inference on Intel Data Center GPU, it's necessary to have both the kernel driver and the run-time driver and software installed. If you're planning to run inference on a CPU only, you can disregard this step.
- -The detailed installation instruction is on [this page](https://dgpu-docs.intel.com/driver/installation.html#).
+Here are two guides for your reference:
+One is generic in case you are familiar with Intel® devices and have experience in Intel® developed software before, which you can follow the official steps to build OpenCV and OpenVINO by source code. You can get it from: [Generic manual building guide](docs/generic_manual_build.md#generic-manual-build-steps-for-ffmpeg--ivsr-plugin-software)
+Another option is a tutorial for absolute beginners to try to build the project following every step in the guide based on a clean Ubuntu OS installed machine. [Quick manual building guide](docs/quick_try_manual_build.md#manual-build-steps-for-ffmpeg--ivsr-plugin-software-on-ubuntu) - -### 2.2.2 Install OpenCV -OpenCV, which is used by the iVSR SDK sample for image processing tasks, needs to be installed. Detailed installation instructions can be found at [Installation OpenCV in Linux](https://docs.opencv.org/4.x/d7/d9f/tutorial_linux_install.html).
- -### 2.2.3 Install OpenVINO -OpenVINO, currently the only backend supported by iVSR for model inference, should also be installed. You can refer to this [instruction](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build_linux.md) to build OpenVINO from the source code.
- -### 2.2.4 Build iVSR SDK -Once the dependencies are installed in the system, you can proceed to build the iVSR SDK and its sample.
-```bash -source /install/setupvars.sh -export OpenCV_DIR=/install/lib/cmake/opencv4 -cd ivsr_sdk -mkdir -p ./build -cd ./build -cmake .. -DENABLE_THREADPROCESS=ON -DCMAKE_BUILD_TYPE=Release -make -``` -### 2.2.5 Build FFmpeg with iVSR plugin -We provide patches specifically for FFmpeg n6.1. Apply these patches as instructed below:
-```bash -git clone https://github.com/FFmpeg/FFmpeg.git ./ivsr_ffmpeg_plugin/ffmpeg -cd ./ivsr_ffmpeg_plugin/ffmpeg -git checkout n6.1 -cp ../patches/*.patch ./ -for patch_file in $(find -iname "*.patch" | sort -n); do \ - echo "Applying: ${patch_file}"; \ - git am --whitespace=fix ${patch_file}; \ -done; -``` -Finally, build FFmpeg. You can also enable other FFmpeg plugins as per the instructions provided in the [Compile FFmpeg for Ubuntu](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu) guide.
-```bash -source /install/setupvars.sh -./configure --enable-libivsr --extra-cflags=-I/ivsr_sdk/include/ --extra-ldflags=-L/ivsr_sdk/lib -make -j $(nproc --all) -make install -``` ## 2.3 Install dependencies and build iVSR by scripts We provide shell scripts `build_ivsr.sh` and `ivsr_setupvar.sh` to assist in building the dependencies from source code and setting up the environment from scratch.
```bash #ivsr environment building chmod a+x ./build_ivsr.sh -sudo ./build_ivsr.sh --enable_ov_patch --enable_compile_ffmpeg true --ov_version <2022.3|2023.2> +sudo ./build_ivsr.sh --enable_ov_patch --enable_compile_ffmpeg true --ov_version <2022.3|2023.2|2024.5> #environment variables setting -source ./ivsr_setupvar.sh --ov_version <2022.3|2023.2> +source ./ivsr_setupvar.sh --ov_version <2022.3|2023.2|2024.5> ``` The scripts accept the following input parameters:
- `enable_ov_patch`: Determines whether to enable OpenVINO patches, which are necessary to run the Enhanced BasicVSR model.
- `enable_compile_ffmpeg`: Determines whether to compile FFmpeg. Set this to `false` if you're only using the iVSR SDK sample.
-- `ov_version`: Specifies the OpenVINO version. iVSR supports `2022.3` & `2023.2`. Note that running the Enhanced BasicVSR model requires `2022.3`.
- -Feel free to modify and update these scripts as per your requirements.
- - -## 2.4 Install dependencies and build iVSR by Docker file. -A Dockerfile is also provided to expedite the environment setup process. Follow the steps below to build the docker image and run the docker container.
- -### 2.4.1. Set timezone correctly before building docker image. -The following command takes Shanghai as an example. - - ```bash - timedatectl set-timezone Asia/Shanghai - ``` - -### 2.4.2 Set up docker service +- `ov_version`: Specifies the OpenVINO version. iVSR supports `2022.3`, `2023.2` and `2024.5`. Note that running the Enhanced BasicVSR model requires `2022.3`.
-```bash -sudo mkdir -p /etc/systemd/system/docker.service.d -printf "[Service]\nEnvironment=\"HTTPS_PROXY=$https_proxy\" \"NO_PROXY=$no_proxy\"\n" | sudo tee /etc/systemd/system/docker.service.d/proxy.conf -sudo systemctl daemon-reload -sudo systemctl restart docker -``` - -### 2.4.3 Build docker image - -```bash -cd ./ivsr_ffmpeg_plugin -./build_docker.sh --enable_ov_patch [true|false] --ov_version [2022.3|2023.2] -``` -- `enable_ov_patch`: Set as `true` or `flase` to enable or disable the application of OpenVINO 2022.3 patches, which are needed to support the Enhanced BasicVSR model.
-- `ov_version`: Set the OpenVINO version to `2022.3` or `2023.2`, which will be built and installed. iVSR currently supports both OpenVINO 2022.3 and 2023.2, but the patches to enable the Enhanced BasicVSR model are only for OpenVINO 2022.3.
-If the docker image builds successfully, you can see a docker image named `ffmpeg_ivsr_sdk_ov2022.3` or `ffmpeg_ivsr_sdk_ov2023.2` in the output of `docker image ls`.
+Feel free to modify and update these scripts as per your requirements. For new released OpenVINO version, please follow the [manual build](#22-install-dependencies-and-build-ivsr-manually) guide.
-### 2.4.4. Start Docker Container -```bash -sudo docker run -itd --name ffmpeg_ivsr_sdk_container --privileged -e MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy --shm-size=128g --device=/dev/dri:/dev/dri ffmpeg_ivsr_sdk_[ov2022.3|ov2023.2]:latest bash -sudo docker exec -it ffmpeg_ivsr_sdk_container bash -``` -Note `--device=/dev/dri:/dev/dri` is specified in the command to add the host gpu device to container.
+## 2.4 Install dependencies and build iVSR by Dockerfile +Dockerfiles are also provided to expedite the environment setup process. Follow the guide to build the docker image and run the application in the docker containers: [Docker image build guide](docs/docker_image_build.md#docker-image-build-guide).
# 3. How to use iVSR Both `vsr_sample` and FFmpeg integration are provided to run inference on the iVSR SDK. Execute the following commands to setup the env before executing them.
@@ -223,5 +163,5 @@ Only models in OpenVINO IR format is supported by iVSR. Please reach out to your # 5. License -Please check the license file under each folder. +iVSR is licensed under the BSD 3-clause license. See [LICENSE](LICENSE.md) for details. diff --git a/ReleaseNotes.md b/ReleaseNotes.md index 36788f5..88339da 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -1,5 +1,17 @@ # iVSR Release Notes +# Next Release + +## New and Changes + +## Bug Fixes + +## Known Limitations/Issues +- If the model-guard protected model is loaded, it prints the following *error* messages. They can be ignored as its normal procedure for this kind of model files.
+[libprotobuf ERROR thirdparty/protobuf/protobuf/src/google/protobuf/text_format.cc:335] Error parsing text-format tensorflow.GraphDef: 1:2: Message type "tensorflow.GraphDef" has no field named "T".
+[libprotobuf ERROR thirdparty/protobuf/protobuf/src/google/protobuf/text_format.cc:335] Error parsing text-format tensorflow.GraphDef: 1:2: Message type "tensorflow.GraphDef" has no field named "T". + + # Release v24.05 ## New and Changes in v24.05 diff --git a/build_ivsr.sh b/build_ivsr.sh index 2d24265..4efde04 100755 --- a/build_ivsr.sh +++ b/build_ivsr.sh @@ -6,7 +6,7 @@ PROJECTDIR=${PWD} usage() { echo "Usage: $0 --enable_ov_patch [true|false] --enable_compile_ffmpeg [true|false] - --ov_version [2022.3|2023.2]" + --ov_version [2022.3|2023.2|2024.5]" exit 1 } @@ -40,7 +40,7 @@ while [ $# -gt 0 ]; do ;; --ov_version) shift - if [ "$1" = "2022.3" ] || [ "$1" = "2023.2" ]; then + if [ "$1" = "2022.3" ] || [ "$1" = "2023.2" ] || [ "$1" = "2024.5" ]; then OV_VERSION=$1 else usage @@ -55,9 +55,9 @@ while [ $# -gt 0 ]; do shift # Move to the next argument done -if [ "$OV_VERSION" = "2023.2" ]; then +if [ "$OV_VERSION" != "2022.3" ]; then ENABLE_OV_PATCH="false" - echo "There is no openvino patches for openvino 2023.2 version, will ignore the setting of ENABLE_OV_PATCH" + echo "There is no openvino patches for openvino $OV_VERSION, will ignore the setting of ENABLE_OV_PATCH" fi @@ -92,8 +92,8 @@ apt-get update && DEBIAN_FRONTEND=noninteractive && apt-get install -y --no-inst python3-dev libpython3-dev python3-pip apt-get clean -pip --no-cache-dir install --upgrade pip setuptools -pip install numpy +pip --no-cache-dir install --upgrade pip==23.0 setuptools==65.5.0 +pip install numpy==1.23.5 @@ -142,8 +142,8 @@ if [ "$OV_VERSION" = "2022.3" ]; then apt-get clean fi -## 3.2-2 BKC for OV2023.2 -if [ "$OV_VERSION" = "2023.2" ]; then +## 3.2-2 BKC for other OV versions +if [ "$OV_VERSION" != "2022.3" ]; then apt-get update apt-get install -y vainfo clinfo apt-get install -y --no-install-recommends ocl-icd-libopencl1 @@ -241,7 +241,8 @@ cd ${IVSR_SDK_DIR}/build cmake .. \ -DENABLE_LOG=OFF -DENABLE_PERF=OFF -DENABLE_THREADPROCESS=ON \ -DCMAKE_BUILD_TYPE=Release -make +make -j $(nproc --all) +make install echo "Build ivsr sdk finished." @@ -289,9 +290,9 @@ if ${ENABLE_COMPILE_FFMPEG}; then export LD_LIBRARY_PATH=${IVSR_SDK_DIR}/lib:${CUSTOM_IE_LIBDIR}:${TBB_DIR}/../lib:"$LD_LIBRARY_PATH" cd ${FFMPEG_DIR} ./configure \ + --extra-cflags=-fopenmp \ + --extra-ldflags=-fopenmp \ --enable-libivsr \ - --extra-cflags=-I${IVSR_SDK_DIR}/include/ \ - --extra-ldflags=-L${IVSR_SDK_DIR}/lib \ --disable-static \ --disable-doc \ --enable-shared \ diff --git a/docs/docker_image_build.md b/docs/docker_image_build.md new file mode 100644 index 0000000..af64ae0 --- /dev/null +++ b/docs/docker_image_build.md @@ -0,0 +1,36 @@ +# Docker image build guide + +### 1. Set timezone correctly before building docker image. +The following command takes Shanghai as an example. + + ```bash + timedatectl set-timezone Asia/Shanghai + ``` + +### 2. Set up docker service + +```bash +sudo mkdir -p /etc/systemd/system/docker.service.d +printf "[Service]\nEnvironment=\"HTTPS_PROXY=$https_proxy\" \"NO_PROXY=$no_proxy\"\n" | sudo tee /etc/systemd/system/docker.service.d/proxy.conf +sudo systemctl daemon-reload +sudo systemctl restart docker +``` + +### 3. Build docker image + +```bash +cd ./ivsr_ffmpeg_plugin +./build_docker.sh --enable_ov_patch [true|false] --ov_version [2022.3|2023.2|2024.5|2024.5s] --os_version [rockylinux9|ubuntu22] +``` +- `enable_ov_patch`: Set as `true` or `flase` to enable or disable the application of OpenVINO 2022.3 patches, which are needed to support the Enhanced BasicVSR model.
+- `ov_version`: Set the OpenVINO version to `2022.3`, `2023.2`, `2024.5`, `2024.5s`, which will be built and installed, the 2024.5s mean install openvino 2024.5 via apt or yum not build and install from source code. iVSR currently supports both OpenVINO 2022.3, 2023.2 and 2024.5, but the patches to enable the Enhanced BasicVSR model are only for OpenVINO 2022.3.
+- `os_version`: Set OS version of Docker image to ubuntu22(Ubuntu 22.04) or rockylinux9(Rocky Linux 9.3) to build docker image based on specific OS.
+If the docker image builds successfully, you can see a docker image named `ffmpeg_ivsr_sdk_${os_version}_ov${ov_version}` such as `ffmpeg_ivsr_sdk_ubuntu22_ov2022.3` or `ffmpeg_ivsr_sdk_rockylinux9_ov2022.3` in the output of `docker image ls`.
+ +### 4. Start Docker Container + +```bash +sudo docker run -itd --name ffmpeg_ivsr_sdk_container --privileged -e MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy --shm-size=128g --device=/dev/dri:/dev/dri ffmpeg_ivsr_sdk_[ubuntu22|rockylinux9]_[ov2022.3|ov2023.2|ov2024.5]:latest bash +sudo docker exec -it ffmpeg_ivsr_sdk_container bash +``` +Note `--device=/dev/dri:/dev/dri` is specified in the command to add the host gpu device to container.
\ No newline at end of file diff --git a/docs/figs/logo.bmp b/docs/figs/logo.bmp new file mode 100644 index 0000000..d46a7b9 Binary files /dev/null and b/docs/figs/logo.bmp differ diff --git a/docs/generic_manual_build.md b/docs/generic_manual_build.md new file mode 100644 index 0000000..93b846c --- /dev/null +++ b/docs/generic_manual_build.md @@ -0,0 +1,45 @@ +# Generic Manual Build Steps for FFmpeg + IVSR plugin Software + +### 1. (Optional) Install software for Intel® Data Center GPU Flex Series +To facilitate inference on Intel Data Center GPU, it's necessary to have both the kernel driver and the run-time driver and software installed. If you're planning to run inference on a CPU only, you can disregard this step.
+ +The detailed installation instruction is on [this page](https://dgpu-docs.intel.com/driver/installation.html#).
+ + +### 2. Install OpenCV +OpenCV, which is used by the iVSR SDK sample for image processing tasks, needs to be installed. Detailed installation instructions can be found at [Installation OpenCV in Linux](https://docs.opencv.org/4.x/d7/d9f/tutorial_linux_install.html).
+ +### 3. Install OpenVINO +OpenVINO, currently the only backend supported by iVSR for model inference, should also be installed. You can refer to this [instruction](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build_linux.md) to build OpenVINO from the source code.
+ +### 4. Build iVSR SDK +Once the dependencies are installed in the system, you can proceed to build the iVSR SDK and its sample.
+```bash +source /install/setupvars.sh +export OpenCV_DIR=/install/lib/cmake/opencv4 +cd ivsr_sdk +mkdir -p ./build +cd ./build +cmake .. -DENABLE_THREADPROCESS=ON -DENABLE_SAMPLE=ON -DCMAKE_BUILD_TYPE=Release +make +make install +``` +### 5. Build FFmpeg with iVSR plugin +We provide patches specifically for FFmpeg n6.1. Apply these patches as instructed below:
+```bash +git clone https://github.com/FFmpeg/FFmpeg.git ./ivsr_ffmpeg_plugin/ffmpeg +cd ./ivsr_ffmpeg_plugin/ffmpeg +git checkout n6.1 +cp ../patches/*.patch ./ +for patch_file in $(find -iname "*.patch" | sort -n); do \ + echo "Applying: ${patch_file}"; \ + git am --whitespace=fix ${patch_file}; \ +done; +``` +Finally, build FFmpeg. You can also enable other FFmpeg plugins as per the instructions provided in the [Compile FFmpeg for Ubuntu](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu) guide.
+```bash +source /install/setupvars.sh +./configure --enable-libivsr --extra-cflags=-fopenmp --extra-ldflags=-fopenmp +make -j $(nproc --all) +make install +``` diff --git a/docs/quick_try_manual_build.md b/docs/quick_try_manual_build.md new file mode 100644 index 0000000..3c66aa0 --- /dev/null +++ b/docs/quick_try_manual_build.md @@ -0,0 +1,172 @@ +# Manual Build Steps for FFmpeg + IVSR plugin Software on Ubuntu + +This document provides detailed steps for building the software with FFmpeg + iVSR SDK as the backend to work for media transcoding and DNN-based processing for video content on a clean Ubuntu 22.04 system. + +## Prerequisites + +Ensure your system has internet access and an updated package index: + +```bash +sudo apt-get update +``` + +## Step-by-Step Instructions + +### 1. Install Essential Utilities + +Start by installing essential packages required for downloading and handling other software components: + +```bash +sudo apt-get install -y --no-install-recommends \ + curl ca-certificates gpg-agent software-properties-common +``` +Install common dependencies: +```bash +sudo apt-get install -y --no-install-recommends --fix-missing \ + autoconf \ + automake \ + build-essential \ + apt-utils cmake cython3 flex bison gcc g++ git make patch pkg-config wget \ + libdrm-dev libudev-dev libtool libusb-1.0-0-dev xz-utils ocl-icd-opencl-dev opencl-headers +``` +### 2. Set Up OpenVINO + +Set up the OpenVINO toolkit by downloading and installing the key, adding the repository, and installing OpenVINO: + +```bash +wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB +sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + +echo "deb https://apt.repos.intel.com/openvino/2024 ubuntu22 main" | sudo tee /etc/apt/sources.list.d/intel-openvino-2024.list + +sudo apt-get update +sudo apt-get install -y openvino-2024.5.0 +rm -f GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB +``` + +### 3. Install FFmpeg Dependencies + +Install additional dependencies required by FFmpeg: + +```bash +sudo apt-get install -y --no-install-recommends \ + ca-certificates tar g++ wget pkg-config nasm yasm libglib2.0-dev flex bison gobject-introspection libgirepository1.0-dev \ + python3-dev libx11-dev libxv-dev libxt-dev libasound2-dev libpango1.0-dev libtheora-dev libvisual-0.4-dev libgl1-mesa-dev \ + libcurl4-gnutls-dev librtmp-dev mjpegtools libx264-dev libx265-dev libde265-dev libva-dev libtbb-dev +``` + +### 4. Build iVSR SDK + +1. Clone or copy the iVSR SDK repository into your workspace. +2. Navigate to the SDK folder and create a build directory. +3. Run CMake with the appropriate flags and build the project. + +```bash +mkdir -p /ivsr/ivsr_sdk/build +cd /ivsr/ivsr_sdk/build +cmake .. -DENABLE_LOG=OFF -DENABLE_PERF=OFF -DENABLE_THREADPROCESS=ON -DCMAKE_BUILD_TYPE=Release +make -j $(nproc --all) +sudo make install +``` + +### 5. Build and Install FFmpeg with iVSR SDK Support + +1. Configure global Git settings if you didn't. +2. Clone the FFmpeg repository and check out the desired version. +3. Apply necessary patches and configure the build. +4. Compile and install FFmpeg. + +```bash +git config --global user.email "noname@example.com" +git config --global user.name "no name" +git clone https://github.com/FFmpeg/FFmpeg.git /ivsr/ivsr_ffmpeg_plugin/ffmpeg +cd /ivsr/ivsr_ffmpeg_plugin/ffmpeg +git checkout n6.1 + +# Apply patches +copy -rf /path/to/patches/*.patch . +git am --whitespace=fix *.patch + +./configure \ + --enable-gpl \ + --enable-nonfree \ + --disable-static \ + --disable-doc \ + --enable-shared \ + --enable-version3 \ + --enable-libivsr \ + --enable-libx264 \ + --enable-libx265 + +make -j$(nproc) +sudo make install + +# Set the library path for FFmpeg or run ldconfig +export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH + +# Run ffmpeg to test if it can run successfully +ffmpeg +``` + +### 6. Install GPU Drivers (Optional) + +Install required GPU drivers and dependencies: + +```bash +sudo apt-get install -y --no-install-recommends ocl-icd-libopencl1 + +# Download and install necessary GPU packages +mkdir /tmp/gpu_deps && cd /tmp/gpu_deps +curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17384.11/intel-igc-core_1.0.17384.11_amd64.deb +curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17384.11/intel-igc-opencl_1.0.17384.11_amd64.deb +curl -L -O https://github.com/intel/compute-runtime/releases/download/24.31.30508.7/intel-level-zero-gpu-dbgsym_1.3.30508.7_amd64.ddeb +curl -L -O https://github.com/intel/compute-runtime/releases/download/24.31.30508.7/intel-level-zero-gpu_1.3.30508.7_amd64.deb +curl -L -O https://github.com/intel/compute-runtime/releases/download/24.31.30508.7/intel-opencl-icd-dbgsym_24.31.30508.7_amd64.ddeb +curl -L -O https://github.com/intel/compute-runtime/releases/download/24.31.30508.7/intel-opencl-icd_24.31.30508.7_amd64.deb +curl -L -O https://github.com/intel/compute-runtime/releases/download/24.31.30508.7/libigdgmm12_22.4.1_amd64.deb +sudo dpkg -i ./*.deb +rm -Rf /tmp/gpu_deps +``` + +Also, you can download the latest gpu driver from the official website: https://github.com/intel/compute-runtime/releases +### 7. Environment Configuration for GPU + +Set the environment variables required for GPU drivers: + +```bash +export LIBVA_DRIVER_NAME=iHD +export LIBVA_DRIVERS_PATH=/usr/lib/x86_64-linux-gnu/dri +``` + +Congratulations! You've successfully built the software on a bare metal Ubuntu system. + +### Optional: Build and Install OpenCV + +Start building OpenCV: + +```bash +OPENCV_REPO=https://github.com/opencv/opencv/archive/4.5.3-openvino-2021.4.2.tar.gz +wget -qO - ${OPENCV_REPO} | tar xz +OPENCV_BASE=opencv-4.5.3-openvino-2021.4.2 +cd ${OPENCV_BASE} && mkdir -p build && mkdir -p install && cd build +cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=${OPENCV_BASE}/install \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DOPENCV_GENERATE_PKGCONFIG=ON \ + -DBUILD_DOCS=OFF \ + -DBUILD_EXAMPLES=OFF \ + -DBUILD_PERF_TESTS=OFF \ + -DBUILD_TESTS=OFF \ + -DWITH_OPENEXR=OFF \ + -DWITH_OPENJPEG=OFF \ + -DWITH_GSTREAMER=OFF \ + -DWITH_JASPER=OFF \ + -DWITH_FFMPEG=OFF \ + -DPYTHON3_EXECUTABLE=/usr/bin/python3 \ + .. +make -j "$(nproc)" +sudo make install +cd ${OPENCV_BASE}/install/bin && bash ./setup_vars_opencv4.sh + +``` \ No newline at end of file diff --git a/ivsr_ffmpeg_plugin/README.md b/ivsr_ffmpeg_plugin/README.md index 79ead0f..355b815 100644 --- a/ivsr_ffmpeg_plugin/README.md +++ b/ivsr_ffmpeg_plugin/README.md @@ -25,30 +25,34 @@ Additionally, there are other parameters that you can use. These parameters are Here are some examples of FFmpeg command lines to run inference with the supported models using the `ivsr` backend.
-- Command sample to run Enhanced BasicVSR inference, the input pixel format supported by the model is `bgr24`. +- Command sample to run Enhanced BasicVSR inference, the input pixel format supported by the model is `rgb24`. ``` cd /ivsr_ffmpeg_plugin/ffmpeg -./ffmpeg -i -vf format=bgr24,dnn_processing=dnn_backend=ivsr:model=:input=input:output=output:nif=3:backend_configs='device=&extension=/ivsr_ov/based_on_openvino_2022.3/openvino/bin/intel64/Release/libcustom_extension.so&op_xml=/ivsr_ov/based_on_openvino_2022.3/openvino/flow_warp_cl_kernel/flow_warp.xml' test_out.mp4 +./ffmpeg -i -vf format=rgb24,dnn_processing=dnn_backend=ivsr:model=:input=input:output=output:nif=3:backend_configs='device=&extension=/ivsr_ov/based_on_openvino_2022.3/openvino/bin/intel64/Release/libcustom_extension.so&op_xml=/ivsr_ov/based_on_openvino_2022.3/openvino/flow_warp_cl_kernel/flow_warp.xml' test_out.mp4 ``` Please note that for the Enhanced BasicVSR model, you need to set the `extension` and `op_xml` options (with `backend_configs`) in the command line. After applying OpenVINO's patches and building OpenVINO, the extension lib file is located in `/openvino/bin/intel64/Release/libcustom_extension.so`, and the op xml file is located in `/openvino/flow_warp_cl_kernel/flow_warp.xml`.
-- Command sample to run SVP models inference +- Command sample to run SVP models inference. If the supported input pixel format of the model variance is `rgb24`, set the preceeding format as is to avoid unnecessary layout conversion: ``` cd /ivsr_ffmpeg_plugin/ffmpeg -./ffmpeg -i -vf format=bgr24,dnn_processing=dnn_backend=ivsr:model=:input=input:output=output:nif=1:backend_configs='device=&model_type=1' -pix_fmt yuv420p test_out.mp4 +./ffmpeg -i -vf format=rgb24,dnn_processing=dnn_backend=ivsr:model=:input=input:output=output:nif=1:backend_configs='device=&model_type=1' -pix_fmt yuv420p test_out.mp4 ``` -- Command sample to run Enhanced EDSR inference +If the model variance supports Y-input, set the preceeding format as YUV: +``` +./ffmpeg -i -vf format=yuv420p,dnn_processing=dnn_backend=ivsr:model=:input=input:output=output:nif=1:backend_configs='device=&model_type=1' -pix_fmt yuv420p test_out.mp4 +``` +- Command sample to run Enhanced EDSR inference, the input pixel format supported by the model is `rgb24`. ``` cd /ivsr_ffmpeg_plugin/ffmpeg -./ffmpeg -i -vf format=bgr24,dnn_processing=dnn_backend=ivsr:model=:input=input:output=output:nif=1:backend_configs='device=&model_type=2&normalize_factor=255.0' -pix_fmt yuv420p test_out.mp4 +./ffmpeg -i -vf format=rgb24,dnn_processing=dnn_backend=ivsr:model=:input=input:output=output:nif=1:backend_configs='device=&model_type=2&normalize_factor=255.0' -pix_fmt yuv420p test_out.mp4 ``` -- Command sample to run CUSTOM VSR inference. Note the input pixel format supported by this model is `yuv420p`. +- Command sample to run CUSTOM VSR inference. Note the input pixel format supported by this model is `yuv420p`, and its input shape is `[1, (Y channel)1, H, W]`, output shape is `[1, 1, 2xH, 2xW]`. ``` cd /ivsr_ffmpeg_plugin/ffmpeg ./ffmpeg -i -vf format=yuv420p,dnn_processing=dnn_backend=ivsr:model=:input=input:output=output:nif=1:backend_configs='nireq=1&device=CPU&model_type=3' -pix_fmt yuv420p test_out.mp4 ``` -- Command sample to run TSENet model +- Command sample to run TSENet model, the input pixel format supported by the model is `rgb24`. ``` cd /ivsr_ffmpeg_plugin/ffmpeg -./ffmpeg -i -vf format=bgr24,dnn_processing=dnn_backend=ivsr:model=:input=input:output=output:nif=1:backend_configs='device=&model_type=4' -pix_fmt yuv420p test_out.mp4 +./ffmpeg -i -vf format=rgb24,dnn_processing=dnn_backend=ivsr:model=:input=input:output=output:nif=1:backend_configs='device=&model_type=4' -pix_fmt yuv420p test_out.mp4 ``` diff --git a/ivsr_ffmpeg_plugin/build_docker.sh b/ivsr_ffmpeg_plugin/build_docker.sh index 0c257e7..d69e467 100755 --- a/ivsr_ffmpeg_plugin/build_docker.sh +++ b/ivsr_ffmpeg_plugin/build_docker.sh @@ -1,50 +1,78 @@ #!/bin/sh -# Default value for the ENABLE_OV_PATCH flag -ENABLE_OV_PATCH="true" -OV_VERSION="2022.3" -OV_VERSION_N="ov2022.3" -# Parse the --enable_ov_patch flag and --ov_version +enable_ov_patch="false" + +# Default os_version set to ubuntu22 +os_version="ubuntu22" + +# Extract available OV versions from Dockerfile names and format them with "|" +available_versions=$(ls dockerfiles/${os_version}/ov*.dockerfile 2>/dev/null | grep -oP '(?<=ov)\d+\.\d+[a-z]*' | paste -sd '|') + +# Default OV_VERSION set to the first available version +ov_version=$(echo $available_versions | awk -F '|' '{print $1}') + +# Extract available OS versions from name of dockerfiles folder +available_os=$(ls dockerfiles 2>/dev/null | paste -sd '|') + +# Function to print usage and exit with error +print_usage_and_exit() { + echo "Usage: $0 --enable_ov_patch [true|false] --ov_version [${available_versions}] --os_version [${available_os}]" + exit 1 +} + +# Parse the arguments while [ $# -gt 0 ]; do - case "$1" in - --enable_ov_patch) - shift - value=$(echo $1 | tr '[:upper:]' '[:lower:]') - if [ "$value" = "false" ]; then - ENABLE_OV_PATCH=$value - fi - shift - ;; - --ov_version) - shift - if [ "$1" = "2022.3" ]; then - OV_VERSION=$1 - OV_VERSION_N="ov2022.3" - elif [ "$1" = "2023.2" ]; then - OV_VERSION=$1 - OV_VERSION_N="ov2023.2" - else - echo "Usage: $0 --enable_ov_patch [true|false] --ov_version [2022.3|2023.2]" - exit 1 - fi - shift - ;; - *) - echo "Usage: $0 --enable_ov_patch [true|false] --ov_version [2022.3|2023.2]" - exit 1 - ;; - esac + case "$1" in + --enable_ov_patch) + shift + value=$(echo $1 | tr '[:upper:]' '[:lower:]') + if [ "$value" = "true" ] || [ "$value" = "false" ]; then + enable_ov_patch=$value + else + print_usage_and_exit + fi + shift + ;; + --ov_version) + shift + if echo "$available_versions" | grep -qw "$1"; then + ov_version=$1 + else + print_usage_and_exit + fi + shift + ;; + --os_version) + shift + value=$(echo $1 | tr '[:upper:]' '[:lower]') + if echo "$available_os" | grep -qw "$value"; then + os_version="$value"; + else + print_usage_and_exit + fi + shift + ;; + *) + print_usage_and_exit + ;; + esac done -if [ "$OV_VERSION" = "2023.2" ]; then - ENABLE_OV_PATCH="false" - echo "There is no openvino patches for openvino 2023.2 version, will ignore the setting of ENABLE_OV_PATCH" +# Configure ENABLE_OV_PATCH according to OV version +if [ "$ov_version" = "2022.3" ]; then + echo "Setting ENABLE_OV_PATCH to $enable_ov_patch for version 2022.3." +else + enable_ov_patch="false" + echo "ENABLE_OV_PATCH is not applicable for version $ov_version. Automatically set to false." fi -docker build --build-arg http_proxy=$http_proxy \ - --build-arg https_proxy=$https_proxy \ - --build-arg no_proxy=$no_proxy \ - --build-arg PYTHON=python3.10 \ - --build-arg ENABLE_OV_PATCH=$ENABLE_OV_PATCH \ - --build-arg OV_VERSION=$OV_VERSION \ - -f Dockerfile -t ffmpeg_ivsr_sdk_$OV_VERSION_N \ - ../ + +docker build \ + --build-arg http_proxy=$http_proxy \ + --build-arg https_proxy=$https_proxy \ + --build-arg no_proxy=$no_proxy \ + --build-arg PYTHON=python3.10 \ + --build-arg ENABLE_OV_PATCH=$enable_ov_patch \ + --build-arg OV_VERSION=$ov_version \ + -f ./dockerfiles/$os_version/ov${ov_version}.dockerfile \ + -t ffmpeg_ivsr_sdk_${os_version}_ov${ov_version} \ + ../ diff --git a/ivsr_ffmpeg_plugin/dockerfiles/rockylinux9/Dockerfile b/ivsr_ffmpeg_plugin/dockerfiles/rockylinux9/Dockerfile new file mode 100644 index 0000000..6f9636a --- /dev/null +++ b/ivsr_ffmpeg_plugin/dockerfiles/rockylinux9/Dockerfile @@ -0,0 +1,223 @@ +# SPDX-License-Identifier: BSD 3-Clause License +# +# Copyright (c) 2023, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ARG IMAGE=rockylinux@sha256:d7be1c094cc5845ee815d4632fe377514ee6ebcf8efaed6892889657e5ddaaa6 +FROM $IMAGE AS base + +RUN dnf -y update && \ + dnf -y install \ + wget && \ + dnf clean all + +FROM base as build +LABEL vendor="Intel Corporation" + +RUN dnf -y install cmake \ + gcc \ + g++ && \ + dnf clean all + +ARG ENABLE_OV_PATCH +ARG OV_VERSION + +# install opencv +ARG WORKSPACE=/workspace +ARG OPENCV_REPO=https://github.com/opencv/opencv/archive/4.5.3-openvino-2021.4.2.tar.gz +WORKDIR ${WORKSPACE} +RUN wget -qO - ${OPENCV_REPO} | tar xz +WORKDIR ${WORKSPACE}/opencv-4.5.3-openvino-2021.4.2 +RUN mkdir build && mkdir install +WORKDIR ${WORKSPACE}/opencv-4.5.3-openvino-2021.4.2/build +RUN cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=${WORKSPACE}/opencv-4.5.3-openvino-2021.4.2/install \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DOPENCV_GENERATE_PKGCONFIG=ON \ + -DBUILD_DOCS=OFF \ + -DBUILD_EXAMPLES=OFF \ + -DBUILD_PERF_TESTS=OFF \ + -DBUILD_TESTS=OFF \ + -DWITH_OPENEXR=OFF \ + -DWITH_OPENJPEG=OFF \ + -DWITH_GSTREAMER=OFF \ + -DWITH_JASPER=OFF \ + .. && \ + make -j16 && \ + make install + +WORKDIR ${WORKSPACE}/opencv-4.5.3-openvino-2021.4.2/install/bin +RUN bash ./setup_vars_opencv4.sh + +ENV LD_LIBRARY_PATH=${WORKSPACE}/opencv-4.5.3-openvino-2021.4.2/install/lib:$LD_LIBRARY_PATH +ENV OpenCV_DIR=${WORKSPACE}/opencv-4.5.3-openvino-2021.4.2/install/lib/cmake/opencv4 + +# install openvino +RUN dnf -y install git \ + python3-devel && \ + dnf clean all +RUN dnf -y --enablerepo=crb install python3-Cython && \ + dnf clean all + +ARG IVSR_DIR=${WORKSPACE}/ivsr +ARG IVSR_OV_DIR=${IVSR_DIR}/ivsr_ov/based_on_openvino_${OV_VERSION}/openvino +ARG CUSTOM_OV_INSTALL_DIR=${IVSR_OV_DIR}/install +ARG IVSR_SDK_DIR=${IVSR_DIR}/ivsr_sdk/ + +ARG OV_REPO=https://github.com/openvinotoolkit/openvino.git +ARG OV_BRANCH=${OV_VERSION}.0 +WORKDIR ${IVSR_OV_DIR} + +RUN git config --global user.email "noname@example.com" && \ + git config --global user.name "no name" + +RUN git clone ${OV_REPO} ${IVSR_OV_DIR} && \ + git checkout ${OV_BRANCH} && \ + git submodule update --init --recursive + +COPY ./ivsr_ov/based_on_openvino_2022.3/patches/*.patch ${IVSR_OV_DIR}/../patches/ +RUN if [ "$ENABLE_OV_PATCH" = "true" ] && [ "$OV_VERSION" = "2022.3" ]; then \ + { set -e; \ + for patch_file in $(find ../patches -iname "*.patch" | sort -n); do \ + echo "Applying: ${patch_file}"; \ + git am --whitespace=fix ${patch_file}; \ + done; }; \ + fi +RUN rm -rf ${IVSR_OV_DIR}/../patches + +WORKDIR ${IVSR_OV_DIR}/build +RUN cmake \ + -DCMAKE_INSTALL_PREFIX=${PWD}/../install \ + -DENABLE_INTEL_CPU=ON \ + -DENABLE_CLDNN=ON \ + -DENABLE_INTEL_GPU=OFF \ + -DENABLE_ONEDNN_FOR_GPU=OFF \ + -DENABLE_INTEL_GNA=OFF \ + -DENABLE_INTEL_MYRIAD_COMMON=OFF \ + -DENABLE_INTEL_MYRIAD=OFF \ + -DENABLE_PYTHON=ON \ + -DENABLE_OPENCV=ON \ + -DENABLE_SAMPLES=ON \ + -DENABLE_CPPLINT=OFF \ + -DTREAT_WARNING_AS_ERROR=OFF \ + -DENABLE_TESTS=OFF \ + -DENABLE_GAPI_TESTS=OFF \ + -DENABLE_BEH_TESTS=OFF \ + -DENABLE_FUNCTIONAL_TESTS=OFF \ + -DENABLE_OV_CORE_UNIT_TESTS=OFF \ + -DENABLE_OV_CORE_BACKEND_UNIT_TESTS=OFF \ + -DENABLE_DEBUG_CAPS=ON \ + -DENABLE_GPU_DEBUG_CAPS=OFF \ + -DENABLE_CPU_DEBUG_CAPS=ON \ + -DCMAKE_BUILD_TYPE=Release \ + .. && \ + make -j16 && \ + make install && \ + bash ${PWD}/../install/setupvars.sh + +ARG CUSTOM_IE_DIR=${CUSTOM_OV_INSTALL_DIR}/runtime +ARG CUSTOM_IE_LIBDIR=${CUSTOM_IE_DIR}/lib/intel64 +ARG CUSTOM_OV=${CUSTOM_IE_DIR} + +ENV OpenVINO_DIR=${CUSTOM_IE_DIR}/cmake +ENV InferenceEngine_DIR=${CUSTOM_IE_DIR}/cmake +ENV TBB_DIR=${CUSTOM_IE_DIR}/3rdparty/tbb/cmake +ENV ngraph_DIR=${CUSTOM_IE_DIR}/cmake +ENV LD_LIBRARY_PATH=${CUSTOM_IE_DIR}/3rdparty/tbb/lib:${CUSTOM_IE_LIBDIR}:$LD_LIBRARY_PATH + +# install ivsr sdk +RUN dnf -y install zlib-devel +COPY ./ivsr_sdk ${IVSR_SDK_DIR} +RUN echo ${IVSR_SDK_DIR} +WORKDIR ${IVSR_SDK_DIR}/build +RUN cmake .. \ + -DENABLE_LOG=OFF -DENABLE_PERF=OFF -DENABLE_THREADPROCESS=ON \ + -DCMAKE_BUILD_TYPE=Release && \ + make -j16 && \ + make install && \ + echo "Building vsr sdk finished." + +#build ffmpeg with iVSR SDK backend +RUN dnf -y --enablerepo=crb install nasm +RUN dnf -y --enablerepo=devel install yasm +RUN dnf -y install diffutils + +# build libx264 +WORKDIR ${WORKSPACE} +RUN git clone https://github.com/mirror/x264 -b stable --depth 1 && \ + cd x264 && \ + ./configure --enable-shared && \ + make -j16 && \ + make install + +# build libx265 +WORKDIR ${WORKSPACE} +ARG LIBX265=https://github.com/videolan/x265/archive/3.4.tar.gz +RUN wget ${LIBX265} && \ + tar xzf ./3.4.tar.gz && \ + rm ./3.4.tar.gz && \ + cd x265-3.4/build/linux && \ + cmake -DBUILD_SHARED_LIBS=ON -DHIGH_BIT_DEPTH=ON ../../source && \ + make -j16 && \ + make install + +ENV PKG_CONFIG_PATH=/usr/local/lib/pkgconfig +ENV LD_LIBRARY_PATH=${IVSR_SDK_DIR}/lib:/usr/local/lib:$LD_LIBRARY_PATH + +ARG FFMPEG_IVSR_SDK_PLUGIN_DIR=${IVSR_DIR}/ivsr_ffmpeg_plugin +ARG FFMPEG_DIR=${FFMPEG_IVSR_SDK_PLUGIN_DIR}/ffmpeg + +ARG FFMPEG_REPO=https://github.com/FFmpeg/FFmpeg.git +ARG FFMPEG_VERSION=n6.1 +WORKDIR ${FFMPEG_DIR} +RUN git clone ${FFMPEG_REPO} ${FFMPEG_DIR} && \ + git checkout ${FFMPEG_VERSION} +COPY ./ivsr_ffmpeg_plugin/patches/*.patch ${FFMPEG_DIR}/ +RUN { set -e; \ + for patch_file in $(find -iname "*.patch" | sort -n); do \ + echo "Applying: ${patch_file}"; \ + git am --whitespace=fix ${patch_file}; \ + done; } + +RUN ./configure \ +--extra-cflags=-fopenmp \ +--extra-ldflags=-fopenmp \ +--enable-libivsr \ +--disable-static \ +--disable-doc \ +--enable-shared \ +--enable-gpl \ +--enable-libx264 \ +--enable-libx265 \ +--enable-version3 && \ +make -j16 && \ +make install + +WORKDIR ${WORKSPACE} +CMD ["/bin/bash"] diff --git a/ivsr_ffmpeg_plugin/dockerfiles/rockylinux9/ov2022.3.dockerfile b/ivsr_ffmpeg_plugin/dockerfiles/rockylinux9/ov2022.3.dockerfile new file mode 120000 index 0000000..1d1fe94 --- /dev/null +++ b/ivsr_ffmpeg_plugin/dockerfiles/rockylinux9/ov2022.3.dockerfile @@ -0,0 +1 @@ +Dockerfile \ No newline at end of file diff --git a/ivsr_ffmpeg_plugin/dockerfiles/rockylinux9/ov2023.2.dockerfile b/ivsr_ffmpeg_plugin/dockerfiles/rockylinux9/ov2023.2.dockerfile new file mode 120000 index 0000000..1d1fe94 --- /dev/null +++ b/ivsr_ffmpeg_plugin/dockerfiles/rockylinux9/ov2023.2.dockerfile @@ -0,0 +1 @@ +Dockerfile \ No newline at end of file diff --git a/ivsr_ffmpeg_plugin/dockerfiles/rockylinux9/ov2024.5.dockerfile b/ivsr_ffmpeg_plugin/dockerfiles/rockylinux9/ov2024.5.dockerfile new file mode 120000 index 0000000..1d1fe94 --- /dev/null +++ b/ivsr_ffmpeg_plugin/dockerfiles/rockylinux9/ov2024.5.dockerfile @@ -0,0 +1 @@ +Dockerfile \ No newline at end of file diff --git a/ivsr_ffmpeg_plugin/dockerfiles/rockylinux9/ov2024.5s.dockerfile b/ivsr_ffmpeg_plugin/dockerfiles/rockylinux9/ov2024.5s.dockerfile new file mode 100644 index 0000000..436c835 --- /dev/null +++ b/ivsr_ffmpeg_plugin/dockerfiles/rockylinux9/ov2024.5s.dockerfile @@ -0,0 +1,137 @@ +# SPDX-License-Identifier: BSD 3-Clause License +# +# Copyright (c) 2023, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ARG IMAGE=rockylinux@sha256:d7be1c094cc5845ee815d4632fe377514ee6ebcf8efaed6892889657e5ddaaa6 +FROM $IMAGE AS base + +RUN dnf -y update && \ + dnf -y install \ + wget && \ + dnf clean all + +FROM base as build +LABEL vendor="Intel Corporation" + +RUN dnf -y install cmake \ + gcc \ + git \ + g++ && \ + dnf clean all + +ARG WORKSPACE=/workspace + +# install openvino +RUN tee /tmp/openvino-2024.repo < +Date: Tue, 25 Jun 2024 22:58:36 +0800 +Subject: [PATCH] dnn_ivsr_backend: optimized layout conversion(nchw <-> nhwc) + with openmp. + +Signed-off-by: Liang +--- + libavfilter/dnn/dnn_backend_ivsr.c | 163 ++++++++++------------------- + 1 file changed, 58 insertions(+), 105 deletions(-) + +diff --git a/libavfilter/dnn/dnn_backend_ivsr.c b/libavfilter/dnn/dnn_backend_ivsr.c +index d0f71e976d..80e8f61607 100644 +--- a/libavfilter/dnn/dnn_backend_ivsr.c ++++ b/libavfilter/dnn/dnn_backend_ivsr.c +@@ -37,6 +37,7 @@ + #include "ivsr.h" + #include "dnn_backend_common.h" + #include ++#include + + + #define DNN_MORE_FRAMES FFERRTAG('M','O','R','E') +@@ -126,6 +127,48 @@ static uint8_t clamp(uint8_t val, uint8_t min, uint8_t max) { + return val; + } + ++static void convert_nchw_to_nhwc(void* data, int N, int C, int H, int W) { ++ int data_size = N * C * H * W; ++ void *temp = av_malloc(data_size * sizeof(float)); ++ int max_threads = omp_get_num_procs() / 2; ++ // memory copy ++ #pragma omp parallel for num_threads(max_threads) ++ for (int i = 0; i < data_size; i++) ++ ((float *)temp)[i] = ((float *)data)[i]; ++ ++ // convert buffer from nchw to nhwc and reverse rgb to bgr ++ #pragma omp parallel num_threads(max_threads) ++ { ++ for (int n = 0; n < N; n++) ++ for (int h = omp_get_thread_num(); h < H; h += omp_get_num_threads()) ++ for (int w = 0; w < W; w++) ++ for (int c = 0; c < C; c++) ++ ((float *)data)[n * H * W * C + h * W * C + w * C + c] = ((float *)temp)[n * C * H * W + (C - 1 - c) * H * W + h * W + w]; ++ } ++ av_free(temp); ++} ++ ++static void convert_nhwc_to_nchw(void* data, int N, int C, int H, int W) { ++ int data_size = N * C * H * W; ++ void *temp = av_malloc(data_size * sizeof(float)); ++ int max_threads = omp_get_num_procs() / 2; ++ // memory copy ++ #pragma omp parallel for num_threads(max_threads) ++ for (int i = 0; i < data_size; i++) ++ ((float *)temp)[i] = ((float *)data)[i]; ++ ++ // convert buffer from nhwc to nchw and reverse bgr to rgb ++ #pragma omp parallel num_threads(max_threads) ++ { ++ for (int n = 0; n < N; n++) ++ for (int h = omp_get_thread_num(); h < H; h += omp_get_num_threads()) ++ for (int w = 0; w < W; w++) ++ for (int c = 0; c < C; c++) ++ ((float *)data)[n * C * H * W + c * H * W + h * W + w] = ((float *)temp)[n * H * W * C + h * W * C + w * C + C - 1 - c]; ++ } ++ av_free(temp); ++} ++ + /* returns + * DNN_GENERIC_ERROR, + * DNN_MORE_FRAMES - waiting for more input frames, +@@ -142,7 +185,6 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + TaskItem *task; + AVFrame *tmp_frame = NULL; + void *in_data = NULL; +- void *in_in_packed = NULL; + int dims[5] = { 0, 0, 0, 0, 0 }; + float normalize_factor = ctx->options.normalize_factor; + +@@ -186,15 +228,6 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + input.mean = 0; + input.layout = DL_NONE; + +- if (input.channels != 1) { +- in_in_packed = +- av_malloc(input.height * input.width * input.channels * +- sizeof(float)); +- if (!in_in_packed) +- return AVERROR(ENOMEM); +- } +- +- + for (int i = 0; i < ctx->options.batch_size; ++i) { + //INFO: for TSENET, lltask_queue contains (N-1)th and (N)th frames + //so peek (N)th frame. +@@ -218,25 +251,7 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + ivsr_model->model-> + filter_ctx); + // convert buffer from NHWC to NCHW when C != 1 +- if (input.channels != 1) { +- memcpy((uint8_t *) in_in_packed, +- (uint8_t *) input.data, +- input.height * input.width * +- input.channels * sizeof(float)); +- for (int pos = 0; +- pos < input.height * input.width; pos++) { +- for (int ch = 0; ch < input.channels; ch++) { +- ((float *) +- input.data)[(ch * input.height * +- input.width + pos)] = +- ((float *) +- in_in_packed)[(pos * +- input.channels + +- (input.channels - +- 1 - ch))]; +- } +- } +- } ++ convert_nhwc_to_nchw(input.data, 1, input.channels, input.height, input.width); + input.data += + input.height * input.width * + input.channels * sizeof(float); +@@ -252,7 +267,6 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + //1. copy the input_frame(ref the buffer) and put into ivsr_model->fame_queue + tmp_frame = av_frame_alloc(); + if(av_frame_ref(tmp_frame, task->in_frame) < 0) { +- if(in_in_packed) av_free(in_in_packed); + return AVERROR(ENOMEM); + } + +@@ -262,7 +276,6 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + //For the first pic in the stream + tmp_frame = av_frame_alloc(); + if(av_frame_ref(tmp_frame, task->in_frame) < 0) { +- if(in_in_packed) av_free(in_in_packed); + return AVERROR(ENOMEM); + } + av_fifo_write(ivsr_model->frame_queue, &tmp_frame, 1); +@@ -277,15 +290,7 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + for (int idx = 0; idx < ivsr_model->nif; idx++) { + //INFO: the 3 frames in frame_queue are: (N-2)th, (N-1)th, (N)th + ff_proc_from_frame_to_dnn(input_frames[idx], &input, ivsr_model->model->filter_ctx); +- //convert to NCHW layout +- memcpy((uint8_t *)in_in_packed, (uint8_t *)input.data, +- input.height * input.width * input.channels * sizeof(float)); +- for (int pos = 0; pos < input.height * input.width; pos++) { +- for (int ch = 0; ch < input.channels; ch++) { +- ((float *)input.data)[(ch * input.height * input.width + pos)] = +- ((float *)in_in_packed)[(pos * input.channels + (input.channels - 1 - ch))]; +- } +- } ++ convert_nhwc_to_nchw(input.data, 1, input.channels, input.height, input.width); + input.data += input.height * input.width * input.channels * sizeof(float); + } + input.data = in_data; +@@ -295,7 +300,6 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + av_frame_free(&tmp_frame); + // INFO: for the last frame, peek_back and pop_front get the same frame, so don't have to handle EOS specifically + } else { +- if(in_in_packed) av_free(in_in_packed); + return DNN_MORE_FRAMES; + } + } else { +@@ -307,23 +311,13 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + ivsr_model->model-> + filter_ctx); + if (input.channels != 1) { +- // convert buffer from NHWC to NCHW and multiply normalize_factor +- memcpy((uint8_t*)in_in_packed, +- (uint8_t*)input.data, +- input.height * input.width * input.channels * sizeof(float)); +- for (int pos = 0; pos < input.height * input.width; pos++) { +- for (int ch = 0; ch < input.channels; ch++) { +- ((float*)input.data)[(ch * input.height * input.width + pos)] = +- ((float*)in_in_packed)[(pos * input.channels + (input.channels - 1 - ch))] * normalize_factor; +- } +- } +- } else if (normalize_factor != 1) { ++ convert_nhwc_to_nchw(input.data, 1, input.channels, input.height, input.width); ++ } ++ if (normalize_factor != 1) { + // do not need to covert buffer from NHWC to NCHW if the channels is 1, only need to mulitple normalize_factor +- for (int pos = 0; pos < input.height * input.width; pos++) { +- for (int ch = 0; ch < input.channels; ch++) { +- ((float*)input.data)[(ch * input.height * input.width + pos)] = +- ((float*)input.data)[ch * input.height * input.width + pos] * normalize_factor; +- } ++ #pragma omp parallel for ++ for (int pos = 0; pos < input.height * input.width * input.channels; pos++) { ++ ((float*)input.data)[pos] = ((float*)input.data)[pos] * normalize_factor; + } + } + } +@@ -339,8 +333,6 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + input.width * input.height * input.channels * + get_datatype_size(input.dt); + } +- if (in_in_packed) +- av_free(in_in_packed); + return 0; + } + +@@ -355,7 +347,6 @@ static void infer_completion_callback(void *args) + DNNData output; + IVSRContext *ctx = &ivsr_model->ctx; + AVFrame *tmp_frame = NULL; +- void *out_in_planar = NULL; + int offset = 0; + int dims[5] = { 0, 0, 0, 0, 0 }; + float normalize_factor = ctx->options.normalize_factor; +@@ -390,18 +381,6 @@ static void infer_completion_callback(void *args) + output.scale = 0; + output.mean = 0; + output.layout = DL_NONE; +- if (output.channels != 1) { +- out_in_planar = +- av_malloc(output.height * output.width * output.channels * +- sizeof(float)); +- if (!out_in_planar) { +- av_log(ctx, AV_LOG_ERROR, +- "Failed to allocate array with %ld bytes!\n", +- output.height * output.width * output.channels * +- sizeof(float)); +- return; +- } +- } + + av_assert0(request->lltask_count <= dims[0]); + av_assert0(request->lltask_count >= 1); +@@ -423,21 +402,7 @@ static void infer_completion_callback(void *args) + offset); + if (ret == 0) { + if (output.channels != 1) { +- memcpy((uint8_t *) out_in_planar, +- (uint8_t *) output.data, +- output.height * output.width * +- output.channels * sizeof(float)); +- for (int pos = 0; +- pos < output.height * output.width; +- pos++) { +- for (int ch = 0; ch < output.channels; +- ch++) { +- ((float *) +- output.data)[(pos * output.channels + +- ch)] = ((float *) +- out_in_planar)[((output.channels - 1 - ch) * output.height * output.width + pos)]; +- } +- } ++ convert_nchw_to_nhwc(output.data, 1, output.channels, output.height, output.width); + } + ff_proc_from_dnn_to_frame(tmp_frame, &output, + &ivsr_model->model-> +@@ -460,23 +425,13 @@ static void infer_completion_callback(void *args) + } else { + if (output.channels != 1) { + //convert buffer from NCHW to NHWC +- memcpy((uint8_t*)out_in_planar, +- (uint8_t*)output.data, +- output.height * output.width * output.channels * sizeof(float)); +- for (int pos = 0; pos < output.height * output.width; pos++) { +- for (int ch = 0; ch < output.channels; ch++) { +- ((float*)output.data)[(pos * output.channels + ch)] = +- ((float*) +- out_in_planar)[((output.channels - 1 - ch) * output.height * output.width + pos)] / normalize_factor; +- } +- } +- } else if (normalize_factor != 1) { ++ convert_nchw_to_nhwc(output.data, 1, output.channels, output.height, output.width); ++ } ++ if (normalize_factor != 1) { ++ #pragma omp parallel for + // only need to devide by normalize_factor for channels = 1. +- for (int pos = 0; pos < output.height * output.width; pos++) { +- for (int ch = 0; ch < output.channels; ch++) { +- ((float*)output.data)[(pos * output.channels + ch)] = +- ((float*)output.data)[pos * output.channels + ch] / normalize_factor; +- } ++ for (int pos = 0; pos < output.height * output.width * output.channels; pos++) { ++ ((float*)output.data)[pos] = ((float*)output.data)[pos] / normalize_factor; + } + } + ff_proc_from_dnn_to_frame(task->out_frame, &output, +@@ -504,8 +459,6 @@ static void infer_completion_callback(void *args) + output.width * output.height * output.channels * + get_datatype_size(output.dt); + } +- if (out_in_planar) +- av_free(out_in_planar); + + request->lltask_count = 0; + if (ff_safe_queue_push_back(requestq, request) < 0) { +-- +2.34.1 + diff --git a/ivsr_ffmpeg_plugin/patches/0020-dnn_ivsr_backend-process-non-8-aligned-resolution-to.patch b/ivsr_ffmpeg_plugin/patches/0020-dnn_ivsr_backend-process-non-8-aligned-resolution-to.patch new file mode 100644 index 0000000..535fdd4 --- /dev/null +++ b/ivsr_ffmpeg_plugin/patches/0020-dnn_ivsr_backend-process-non-8-aligned-resolution-to.patch @@ -0,0 +1,218 @@ +From 2990f5107f2e531d3ac1a927f6d3551529868958 Mon Sep 17 00:00:00 2001 +From: Xiaoxia Liang +Date: Fri, 26 Jul 2024 18:57:53 +0000 +Subject: [PATCH] dnn_ivsr_backend: process non-8 aligned resolution to make + the video processing model can run at any resoultion. + +Padding DNN data buffer to 8 aligned and then do video processing and +then crop to resolution same as input. + +Signed-off-by: Xiaoxia Liang +--- + libavfilter/dnn/dnn_backend_ivsr.c | 52 +++++++++++++++++++++++++++++- + libavfilter/dnn/dnn_io_proc.c | 8 ++--- + libavfilter/vf_dnn_processing.c | 20 ++++++------ + 3 files changed, 65 insertions(+), 15 deletions(-) + +diff --git a/libavfilter/dnn/dnn_backend_ivsr.c b/libavfilter/dnn/dnn_backend_ivsr.c +index 189705d309..6f2f5f0f07 100644 +--- a/libavfilter/dnn/dnn_backend_ivsr.c ++++ b/libavfilter/dnn/dnn_backend_ivsr.c +@@ -57,6 +57,10 @@ typedef struct IVSROptions { + typedef struct IVSRContext { + const AVClass *class; + IVSROptions options; ++ uint32_t frame_input_height; ++ uint32_t frame_input_width; ++ uint32_t model_input_height; ++ uint32_t model_input_width; + } IVSRContext; + + typedef enum { +@@ -105,6 +109,8 @@ static const AVOption dnn_ivsr_options[] = { + + AVFILTER_DEFINE_CLASS(dnn_ivsr); + ++#define ALIGNED_SIZE 8 ++ + static int get_datatype_size(DNNDataType dt) + { + switch (dt) { +@@ -169,6 +175,20 @@ static void convert_nhwc_to_nchw(void* data, int N, int C, int H, int W) { + av_free(temp); + } + ++/** ++ * set value for padding right and bottom. ++ */ ++static void set_padding_value(void* data, uint32_t width, uint32_t height, uint32_t padding_width, uint32_t padding_height, int padding_value) { ++ int n_width = width + padding_width; ++ for (int h = 0; h < height; ++h) { ++ int index = h * (n_width) + width; ++ memset(data + index, padding_value, padding_width); ++ } ++ ++ int index = height * n_width; ++ memset(data + index, padding_value, padding_height * n_width); ++} ++ + /* returns + * DNN_GENERIC_ERROR, + * DNN_MORE_FRAMES - waiting for more input frames, +@@ -187,6 +207,7 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + void *in_data = NULL; + int dims[5] = { 0, 0, 0, 0, 0 }; + float normalize_factor = ctx->options.normalize_factor; ++ int padding_height = 0, padding_width = 0; + + status = ivsr_get_attr(ivsr_model->handle, INPUT_TENSOR_DESC, dims); + if (status != OK) { +@@ -227,7 +248,11 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + input.scale = 0; + input.mean = 0; + input.layout = DL_NONE; ++ ctx->model_input_height = input.height; ++ ctx->model_input_width = input.width; + ++ padding_height = ctx->model_input_height - ctx->frame_input_height; ++ padding_width = ctx->model_input_width - ctx->frame_input_width; + for (int i = 0; i < ctx->options.batch_size; ++i) { + //INFO: for TSENET, lltask_queue contains (N-1)th and (N)th frames + //so peek (N)th frame. +@@ -242,6 +267,18 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + ivsr_model->model-> + filter_ctx); + } else { ++ // reset bottom and right to 0 when size of input frame < model required. ++ if (padding_height > 0 || padding_width > 0) { ++ uint32_t padding_width_bytes = (padding_width) * input.channels * get_datatype_size(input.dt); ++ for (int i = 0; i < ivsr_model->nif; ++i) { ++ set_padding_value(input.data, ctx->frame_input_width * input.channels * get_datatype_size(input.dt), ctx->frame_input_height, ++ padding_width_bytes, padding_height, 0); ++ input.data += ++ input.height * input.width * ++ input.channels * get_datatype_size(input.dt); ++ } ++ input.data = in_data; ++ } + if (ivsr_model->model_type == BASICVSR && dims[2] != 1) { + int read_frame_num = 0; + for (int j = 0; j < dims[2]; j++) { +@@ -602,8 +639,11 @@ static int get_output_ivsr(void *model, const char *input_name, + } + + switch (ivsr_model->model_type) { +- case BASICVSR: + case VIDEOPROC: ++ *output_height = input_height; ++ *output_width = input_width; ++ break; ++ case BASICVSR: + case EDSR: + case CUSTVSR: + case TSENET: +@@ -707,6 +747,8 @@ DNNModel *ff_dnn_load_model_ivsr(const char *model_filename, + AVFilterLink *inlink = filter_ctx->inputs[0]; + int frame_h = inlink->h; + int frame_w = inlink->w; ++ ctx->frame_input_height = inlink->h; ++ ctx->frame_input_width = inlink->w; + + // input_res setting + config_input_res = av_mallocz(sizeof(ivsr_config_t)); +@@ -735,6 +777,11 @@ DNNModel *ff_dnn_load_model_ivsr(const char *model_filename, + sprintf(shape_string, "1,3,3,%d,%d", frame_h, frame_w); + break; + case VIDEOPROC: ++ // the input resoultion required 8-aligned ++ frame_h = (frame_h + ALIGNED_SIZE - 1) / ALIGNED_SIZE * ALIGNED_SIZE; ++ frame_w = (frame_w + ALIGNED_SIZE - 1) / ALIGNED_SIZE * ALIGNED_SIZE; ++ sprintf(shape_string, "1,3,%d,%d", frame_h, frame_w); ++ break; + case EDSR: + sprintf(shape_string, "1,3,%d,%d", frame_h, frame_w); + break; +@@ -834,6 +881,9 @@ DNNModel *ff_dnn_load_model_ivsr(const char *model_filename, + item->in_frames = + av_malloc(input_dims[0] * input_dims[1] * input_dims[2] * + input_dims[3] * input_dims[4] * sizeof(float)); ++ ++ int input_byte_size = input_dims[0] * input_dims[1] * input_dims[2] * input_dims[3] * input_dims[4] * sizeof(float); ++ memset(item->in_frames, 0, input_byte_size); + if (!item->in_frames) { + av_log(ctx, AV_LOG_ERROR, "Failed to malloc in frames\n"); + goto err; +diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c +index ab656e8ed7..1465d32c32 100644 +--- a/libavfilter/dnn/dnn_io_proc.c ++++ b/libavfilter/dnn/dnn_io_proc.c +@@ -98,7 +98,7 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) + goto err; + } + sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0}, +- (const int[4]){frame->width * 3 * src_datatype_size, 0, 0, 0}, 0, frame->height, ++ (const int[4]){output->width * 3 * src_datatype_size, 0, 0, 0}, 0, frame->height, + (uint8_t * const*)dst_data, linesize); + sws_freeContext(sws_ctx); + // convert data from planar to packed +@@ -163,7 +163,7 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) + goto err; + } + sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0}, +- (const int[4]){frame->width * src_datatype_size, 0, 0, 0}, 0, frame->height, ++ (const int[4]){output->width * src_datatype_size, 0, 0, 0}, 0, frame->height, + (uint8_t * const*)frame->data, frame->linesize); + sws_freeContext(sws_ctx); + break; +@@ -272,7 +272,7 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) + sws_scale(sws_ctx, (const uint8_t **)src_data, + linesize, 0, frame->height, + (uint8_t * const [4]){input->data, 0, 0, 0}, +- (const int [4]){frame->width * 3 * dst_datatype_size, 0, 0, 0}); ++ (const int [4]){input->width * 3 * dst_datatype_size, 0, 0, 0}); + sws_freeContext(sws_ctx); + break; + case AV_PIX_FMT_GRAYF32: +@@ -305,7 +305,7 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) + sws_scale(sws_ctx, (const uint8_t **)frame->data, + frame->linesize, 0, frame->height, + (uint8_t * const [4]){input->data, 0, 0, 0}, +- (const int [4]){frame->width * dst_datatype_size, 0, 0, 0}); ++ (const int [4]){input->width * dst_datatype_size, 0, 0, 0}); + sws_freeContext(sws_ctx); + break; + default: +diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c +index 5208a72b6f..2b7656f21a 100644 +--- a/libavfilter/vf_dnn_processing.c ++++ b/libavfilter/vf_dnn_processing.c +@@ -104,16 +104,16 @@ static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLin + enum AVPixelFormat fmt = inlink->format; + + // the design is to add explicit scale filter before this filter +- if (model_input->height != -1 && model_input->height != inlink->h) { +- av_log(ctx, AV_LOG_ERROR, "the model requires frame height %d but got %d\n", +- model_input->height, inlink->h); +- return AVERROR(EIO); +- } +- if (model_input->width != -1 && model_input->width != inlink->w) { +- av_log(ctx, AV_LOG_ERROR, "the model requires frame width %d but got %d\n", +- model_input->width, inlink->w); +- return AVERROR(EIO); +- } ++ // if (model_input->height != -1 && model_input->height != inlink->h) { ++ // av_log(ctx, AV_LOG_ERROR, "the model requires frame height %d but got %d\n", ++ // model_input->height, inlink->h); ++ // return AVERROR(EIO); ++ // } ++ // if (model_input->width != -1 && model_input->width != inlink->w) { ++ // av_log(ctx, AV_LOG_ERROR, "the model requires frame width %d but got %d\n", ++ // model_input->width, inlink->w); ++ // return AVERROR(EIO); ++ // } + if (model_input->dt != DNN_FLOAT) { + avpriv_report_missing_feature(ctx, "data type rather than DNN_FLOAT"); + return AVERROR(EIO); +-- +2.34.1 + diff --git a/ivsr_ffmpeg_plugin/patches/0021-Enable-async-request-infer-and-refine-the-config-set.patch b/ivsr_ffmpeg_plugin/patches/0021-Enable-async-request-infer-and-refine-the-config-set.patch new file mode 100644 index 0000000..4063b0e --- /dev/null +++ b/ivsr_ffmpeg_plugin/patches/0021-Enable-async-request-infer-and-refine-the-config-set.patch @@ -0,0 +1,227 @@ +From d50841f2a463d0d4c6ec03c6ddbb7327f0f8a00a Mon Sep 17 00:00:00 2001 +From: LinXie +Date: Fri, 23 Aug 2024 15:08:26 +0000 +Subject: [PATCH] Enable async request infer and refine the config setting + +--- + libavfilter/dnn/dnn_backend_ivsr.c | 123 +++++++++++------------------ + 1 file changed, 48 insertions(+), 75 deletions(-) + +diff --git a/libavfilter/dnn/dnn_backend_ivsr.c b/libavfilter/dnn/dnn_backend_ivsr.c +index 6f2f5f0f07..1f7dfff743 100644 +--- a/libavfilter/dnn/dnn_backend_ivsr.c ++++ b/libavfilter/dnn/dnn_backend_ivsr.c +@@ -423,7 +423,6 @@ static void infer_completion_callback(void *args) + av_assert0(request->lltask_count >= 1); + for (int i = 0; i < request->lltask_count; ++i) { + task = request->lltasks[i]->task; +- task->inference_done++; + + if (task->do_ioproc) { + if (ivsr_model->model->frame_post_proc != NULL) { +@@ -490,6 +489,7 @@ static void infer_completion_callback(void *args) + task->out_frame->height = output.height; + } + ++ task->inference_done++; + av_freep(&request->lltasks[i]); + output.data = + (uint8_t *) output.data + +@@ -599,8 +599,8 @@ static int execute_model_ivsr(IVSRRequestItem * request, + goto err; + } + status = +- ivsr_process(ivsr_model->handle, request->in_frames, +- request->out_frames, &request->cb); ++ ivsr_process_async(ivsr_model->handle, request->in_frames, ++ request->out_frames, &request->cb); + if (status != OK) { + av_log(ctx, AV_LOG_ERROR, + "Failed to process the inference on input data seq\n"); +@@ -658,6 +658,22 @@ static int get_output_ivsr(void *model, const char *input_name, + return ret; + } + ++// Utility function to create and link config ++static ivsr_config_t* create_and_link_config(ivsr_config_t *previous, ++ int key, char *value, void *ctx) { ++ ivsr_config_t *config = av_mallocz(sizeof(ivsr_config_t)); ++ if (config == NULL) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to malloc config\n"); ++ return NULL; ++ } ++ config->key = key; ++ config->value = value; ++ if (previous != NULL) { ++ previous->next = config; ++ } ++ return config; ++} ++ + DNNModel *ff_dnn_load_model_ivsr(const char *model_filename, + DNNFunctionType func_type, + const char *options, +@@ -667,12 +683,12 @@ DNNModel *ff_dnn_load_model_ivsr(const char *model_filename, + IVSRModel *ivsr_model = NULL; + IVSRContext *ctx = NULL; + IVSRStatus status; +- ivsr_config_t *config = NULL; + ivsr_config_t *config_device = NULL; + ivsr_config_t *config_customlib = NULL; + ivsr_config_t *config_cldnn = NULL; + ivsr_config_t *config_reshape = NULL; + ivsr_config_t *config_input_res = NULL; ++ ivsr_config_t *config_nireq = NULL; + int nif = 0; + int input_dims[5] = { 0, 0, 0, 0, 1 }; + int output_dims[5] = { 0, 0, 0, 0, 1 }; +@@ -714,35 +730,19 @@ DNNModel *ff_dnn_load_model_ivsr(const char *model_filename, + // the default value is a rough estimation + ctx->options.nireq = av_cpu_count() / 2 + 1; + } +- //TODO: override the 2 values before async mode in iVSR SDK is supported +- //"async == 1/TRUE" is misleading as it's actually not supported by SDK +- ctx->options.nireq = 1; +- ctx->options.async = 1; + + ivsr_model->model_type = ctx->options.model_type; + + // set ivsr config + // input model +- ivsr_model->config = av_mallocz(sizeof(ivsr_config_t)); +- config = ivsr_model->config; +- if (config == NULL) { +- av_log(ctx, AV_LOG_ERROR, "Failed to malloc config\n"); ++ ivsr_model->config = create_and_link_config(NULL, INPUT_MODEL, model_filename, ctx); ++ if (ivsr_model->config == NULL) + goto err; +- } +- config->key = INPUT_MODEL; +- config->value = model_filename; +- config->next = NULL; +- +- // target device +- config_device = av_mallocz(sizeof(ivsr_config_t)); +- if (config_device == NULL) { +- av_log(ctx, AV_LOG_ERROR, "Failed to malloc device config\n"); ++ ++ config_device = create_and_link_config(ivsr_model->config, TARGET_DEVICE, ++ ctx->options.device_type, ctx); ++ if (config_device == NULL) + goto err; +- } +- config_device->key = TARGET_DEVICE; +- config_device->value = ctx->options.device_type; +- config_device->next = NULL; +- config->next = config_device; + + AVFilterLink *inlink = filter_ctx->inputs[0]; + int frame_h = inlink->h; +@@ -751,27 +751,19 @@ DNNModel *ff_dnn_load_model_ivsr(const char *model_filename, + ctx->frame_input_width = inlink->w; + + // input_res setting +- config_input_res = av_mallocz(sizeof(ivsr_config_t)); +- if (config_input_res == NULL) { +- av_log(ctx, AV_LOG_ERROR, "Failed to malloc input_res config\n"); +- goto err; +- } +- + char input_res_string[40] = {0}; +- sprintf(input_res_string, "%d,%d\0", frame_w, frame_h); +- config_input_res->key = INPUT_RES; +- config_input_res->value = input_res_string; +- config_input_res->next = NULL; +- config_device->next = config_input_res; +- +- // reshape setting +- config_reshape = av_mallocz(sizeof(ivsr_config_t)); +- if (config_reshape == NULL) { +- av_log(ctx, AV_LOG_ERROR, "Failed to malloc reshape config\n"); ++ sprintf(input_res_string, "%d,%d", frame_w, frame_h); ++ config_input_res = create_and_link_config(config_device, INPUT_RES, input_res_string, ctx); ++ if (config_input_res == NULL) ++ goto err; ++ ++ char nireq_string[40] = {0}; ++ sprintf(nireq_string, "%d", ctx->options.nireq); ++ config_nireq = create_and_link_config(config_input_res, INFER_REQ_NUMBER, nireq_string, ctx); ++ if (config_nireq == NULL) + goto err; +- } + +- char shape_string[40]; ++ char shape_string[40] = {0}; + switch (ivsr_model->model_type) { + case BASICVSR: + sprintf(shape_string, "1,3,3,%d,%d", frame_h, frame_w); +@@ -795,45 +787,26 @@ DNNModel *ff_dnn_load_model_ivsr(const char *model_filename, + av_log(ctx, AV_LOG_ERROR, "Not supported model type\n"); + return DNN_GENERIC_ERROR; + } +- config_reshape->key = RESHAPE_SETTINGS; +- // by default it sets the same resolution as input res config. +- // If you want to enable smart-patch, set a smaller shape than the input. +- config_reshape->value = shape_string; +- config_reshape->next = NULL; +- config_input_res->next = config_reshape; ++ config_reshape = create_and_link_config(config_nireq, RESHAPE_SETTINGS, shape_string, ctx); ++ if (config_reshape == NULL) ++ goto err; + + if (ctx->options.extension != NULL) { +- // extension +- config_customlib = av_mallocz(sizeof(ivsr_config_t)); +- if (config_customlib == NULL) { +- av_log(ctx, AV_LOG_ERROR, "Failed to malloc customlib config\n"); ++ config_customlib = create_and_link_config(config_reshape, CUSTOM_LIB, ctx->options.extension, ctx); ++ if (config_customlib == NULL) + goto err; +- } +- config_customlib->key = CUSTOM_LIB; +- config_customlib->value = ctx->options.extension; +- config_customlib->next = NULL; +- config_reshape->next = config_customlib; + } + + if (ctx->options.op_xml != NULL) { +- // cldnn +- config_cldnn = av_mallocz(sizeof(ivsr_config_t)); +- if (config_cldnn == NULL) { +- av_log(ctx, AV_LOG_ERROR, "Failed to malloc cldnn config\n"); ++ config_cldnn = create_and_link_config(ctx->options.extension != NULL ? ++ config_customlib : config_reshape, ++ CLDNN_CONFIG, ctx->options.op_xml, ctx); ++ if (config_cldnn == NULL) + goto err; +- } +- config_cldnn->key = CLDNN_CONFIG; +- config_cldnn->value = ctx->options.op_xml; +- config_cldnn->next = NULL; +- if (config_customlib != NULL) { +- config_customlib->next = config_cldnn; +- } else { +- config_reshape->next = config_cldnn; +- } + } + + // initialize ivsr +- status = ivsr_init(config, &ivsr_model->handle); ++ status = ivsr_init(ivsr_model->config, &ivsr_model->handle); + if (status != OK) { + av_log(ctx, AV_LOG_ERROR, "Failed to initialize ivsr engine\n"); + goto err; +@@ -1052,8 +1025,8 @@ int ff_dnn_flush_ivsr(const DNNModel * model) + } + + status = +- ivsr_process(ivsr_model->handle, request->in_frames, +- request->out_frames, &request->cb); ++ ivsr_process_async(ivsr_model->handle, request->in_frames, ++ request->out_frames, &request->cb); + if (status != OK) { + av_log(ctx, AV_LOG_ERROR, + "Failed to process the inference on input data seq\n"); +-- +2.34.1 + diff --git a/ivsr_ffmpeg_plugin/patches/0022-enable-10bit-for-YUV-and-16bit-for-RGB-support.patch b/ivsr_ffmpeg_plugin/patches/0022-enable-10bit-for-YUV-and-16bit-for-RGB-support.patch new file mode 100644 index 0000000..6764dd2 --- /dev/null +++ b/ivsr_ffmpeg_plugin/patches/0022-enable-10bit-for-YUV-and-16bit-for-RGB-support.patch @@ -0,0 +1,566 @@ +From 45367468d592018fa13fc62cdd4478cbecaebaee Mon Sep 17 00:00:00 2001 +From: Xueshu Wang +Date: Tue, 27 Aug 2024 18:11:35 +0800 +Subject: [PATCH] enable 10bit(for YUV) and 16bit(for RGB) support. + +--- + libavfilter/dnn/dnn_backend_ivsr.c | 36 ++++++--- + libavfilter/dnn/dnn_io_proc.c | 125 ++++++++++++++++++----------- + libavfilter/dnn_interface.h | 2 +- + libavfilter/vf_dnn_processing.c | 14 +++- + libswscale/swscale_unscaled.c | 110 +++++++++++++++++++++++++ + 5 files changed, 229 insertions(+), 58 deletions(-) + +diff --git a/libavfilter/dnn/dnn_backend_ivsr.c b/libavfilter/dnn/dnn_backend_ivsr.c +index 9bee7d1277..550e64915a 100644 +--- a/libavfilter/dnn/dnn_backend_ivsr.c ++++ b/libavfilter/dnn/dnn_backend_ivsr.c +@@ -103,7 +103,7 @@ static const AVOption dnn_ivsr_options[] = { + { "extension", "extension lib file full path, usable for BasicVSR model", OFFSET(options.extension), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS}, + { "op_xml", "custom op xml file full path, usable for BasicVSR model", OFFSET(options.op_xml), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS}, + { "model_type", "dnn model type", OFFSET(options.model_type), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MODEL_TYPE_NUM - 1, FLAGS}, +- { "normalize_factor", "normalization factor", OFFSET(options.normalize_factor), AV_OPT_TYPE_FLOAT, { .dbl = 1.0 }, 1.0, 255.0, FLAGS}, ++ { "normalize_factor", "normalization factor", OFFSET(options.normalize_factor), AV_OPT_TYPE_FLOAT, { .dbl = 1.0 }, 1.0, 65535.0, FLAGS}, + { NULL } + }; + +@@ -118,13 +118,15 @@ static int get_datatype_size(DNNDataType dt) + return sizeof(float); + case DNN_UINT8: + return sizeof(uint8_t); ++ case DNN_UINT16: ++ return sizeof(uint16_t); + default: + av_assert0(!"not supported yet."); + return 1; + } + } + +-static uint8_t clamp(uint8_t val, uint8_t min, uint8_t max) { ++static int clamp(int val, int min, int max) { + if (val < min) + return min; + else if (val > max) +@@ -418,7 +420,9 @@ static void infer_completion_callback(void *args) + output.scale = 0; + output.mean = 0; + output.layout = DL_NONE; +- ++ const AVPixFmtDescriptor* pix_desc = av_pix_fmt_desc_get(task->out_frame->format); ++ const AVComponentDescriptor* comp_desc = &pix_desc->comp[0]; ++ int bits = comp_desc->depth; + av_assert0(request->lltask_count <= dims[0]); + av_assert0(request->lltask_count >= 1); + for (int i = 0; i < request->lltask_count; ++i) { +@@ -450,7 +454,7 @@ static void infer_completion_callback(void *args) + uint8_t min_x = 16, max_x = 235; + for (int index = 0; index < tmp_frame->height * tmp_frame->linesize[0]; ++index) { + uint8_t value = tmp_frame->data[0][index]; +- tmp_frame->data[0][index] = clamp(tmp_frame->data[0][index], min_x, max_x); ++ tmp_frame->data[0][index] = (uint8_t)clamp(tmp_frame->data[0][index], min_x, max_x); + } + } + output.data += +@@ -476,11 +480,25 @@ static void infer_completion_callback(void *args) + filter_ctx); + // clamp output to [16, 235] range for Y plane when color range of output is TV range, + // assume model only process Y plane when output.channels = 1. AVCOL_RANGE_MPEG is mean tv range. +- if (task->out_frame->color_range == AVCOL_RANGE_MPEG && output.channels == 1) { +- uint8_t min_x = 16, max_x = 235; +- for (int index = 0; index < task->out_frame->height * task->out_frame->linesize[0]; ++index) { +- uint8_t value = task->out_frame->data[0][index]; +- task->out_frame->data[0][index] = clamp(task->out_frame->data[0][index], min_x, max_x); ++ if (task->out_frame->color_range == AVCOL_RANGE_MPEG && output.channels == 1) { ++ if (bits == 8) { ++ uint8_t min_x = 16, max_x = 235; ++ for (int index = 0; index < task->out_frame->height * task->out_frame->linesize[0]; ++ ++index) { ++ uint8_t value = task->out_frame->data[0][index]; ++ task->out_frame->data[0][index] = (uint8_t)clamp(task->out_frame->data[0][index], ++ min_x, max_x); ++ } ++ } else if (bits == 10) { ++ uint16_t min_x = 64, max_x = 940; ++ uint16_t* dstPtr = (uint16_t*)task->out_frame->data[0]; ++ ptrdiff_t dstStrideUint16 = task->out_frame->linesize[0] >> 1; ++ for (int y = 0; y < task->out_frame->height; ++y) { ++ for (int x = 0; x < task->out_frame->width; ++x) { ++ dstPtr[x] = (uint16_t)clamp(dstPtr[x], min_x, max_x); ++ } ++ dstPtr += dstStrideUint16; ++ } + } + } + } +diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c +index 1465d32c32..f51c0669a9 100644 +--- a/libavfilter/dnn/dnn_io_proc.c ++++ b/libavfilter/dnn/dnn_io_proc.c +@@ -32,6 +32,8 @@ static int get_datatype_size(DNNDataType dt) + return sizeof(float); + case DNN_UINT8: + return sizeof(uint8_t); ++ case DNN_UINT16: ++ return sizeof(uint16_t); + default: + av_assert0(!"not supported yet."); + return 1; +@@ -46,10 +48,15 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) + void **dst_data = NULL; + void *middle_data = NULL; + uint8_t *planar_data[4] = { 0 }; +- int plane_size = frame->width * frame->height * sizeof(uint8_t); ++ int plane_size = 0; + enum AVPixelFormat src_fmt = AV_PIX_FMT_NONE; ++ enum AVPixelFormat dst_fmt = AV_PIX_FMT_NONE; ++ enum AVPixelFormat mdl_fmt = AV_PIX_FMT_NONE; + int src_datatype_size = get_datatype_size(output->dt); +- ++ const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(frame->format); ++ const AVComponentDescriptor *comp_desc = &pix_desc->comp[0]; ++ int bits = comp_desc->depth; ++ const char *pix_fmt_name = av_get_pix_fmt_name(frame->format); + int bytewidth = av_image_get_linesize(frame->format, frame->width, 0); + if (bytewidth < 0) { + return AVERROR(EINVAL); +@@ -69,6 +76,7 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) + + dst_data = (void **)frame->data; + linesize[0] = frame->linesize[0]; ++ plane_size = linesize[0] * frame->height; + if (output->layout == DL_NCHW) { + middle_data = av_malloc(plane_size * output->channels); + if (!middle_data) { +@@ -80,20 +88,23 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) + } + + switch (frame->format) { ++ case AV_PIX_FMT_RGB48LE: ++ case AV_PIX_FMT_BGR48LE: + case AV_PIX_FMT_RGB24: + case AV_PIX_FMT_BGR24: ++ dst_fmt = comp_desc->depth == 8 ? AV_PIX_FMT_GRAY8 : AV_PIX_FMT_GRAY16; + sws_ctx = sws_getContext(frame->width * 3, + frame->height, + src_fmt, + frame->width * 3, + frame->height, +- AV_PIX_FMT_GRAY8, ++ dst_fmt, + 0, NULL, NULL, NULL); + if (!sws_ctx) { + av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion " + "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", + av_get_pix_fmt_name(src_fmt), frame->width * 3, frame->height, +- av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height); ++ av_get_pix_fmt_name(dst_fmt), frame->width * 3, frame->height); + ret = AVERROR(EINVAL); + goto err; + } +@@ -103,9 +114,10 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) + sws_freeContext(sws_ctx); + // convert data from planar to packed + if (output->layout == DL_NCHW) { ++ mdl_fmt = comp_desc->depth == 8 ? AV_PIX_FMT_GBRP : AV_PIX_FMT_GBRP16LE; + sws_ctx = sws_getContext(frame->width, + frame->height, +- AV_PIX_FMT_GBRP, ++ mdl_fmt, + frame->width, + frame->height, + frame->format, +@@ -113,24 +125,27 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) + if (!sws_ctx) { + av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion " + "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", +- av_get_pix_fmt_name(AV_PIX_FMT_GBRP), frame->width, frame->height, +- av_get_pix_fmt_name(frame->format),frame->width, frame->height); ++ av_get_pix_fmt_name(mdl_fmt), frame->width, frame->height, ++ av_get_pix_fmt_name(frame->format), frame->width, frame->height); + ret = AVERROR(EINVAL); + goto err; + } +- if (frame->format == AV_PIX_FMT_RGB24) { +- planar_data[0] = (uint8_t *)middle_data + plane_size; +- planar_data[1] = (uint8_t *)middle_data + plane_size * 2; +- planar_data[2] = (uint8_t *)middle_data; +- } else if (frame->format == AV_PIX_FMT_BGR24) { +- planar_data[0] = (uint8_t *)middle_data + plane_size; +- planar_data[1] = (uint8_t *)middle_data; +- planar_data[2] = (uint8_t *)middle_data + plane_size * 2; ++ if (strstr(pix_fmt_name, "rgb") != NULL) { ++ planar_data[0] = (uint8_t*)middle_data + plane_size; ++ planar_data[1] = (uint8_t*)middle_data + plane_size * 2; ++ planar_data[2] = (uint8_t*)middle_data; ++ } else if (strstr(pix_fmt_name, "bgr") != NULL) { ++ planar_data[0] = (uint8_t*)middle_data + plane_size; ++ planar_data[1] = (uint8_t*)middle_data; ++ planar_data[2] = (uint8_t*)middle_data + plane_size * 2; ++ } else { ++ av_log(log_ctx, AV_LOG_ERROR, "dnn_process output data doesn't support this format: %s\n", pix_fmt_name); ++ return AVERROR(ENOSYS); + } +- sws_scale(sws_ctx, (const uint8_t * const *)planar_data, +- (const int [4]){frame->width * sizeof(uint8_t), +- frame->width * sizeof(uint8_t), +- frame->width * sizeof(uint8_t), 0}, ++ ++ int middle_data_linesize[4] = {0}; ++ ret = av_image_fill_linesizes(middle_data_linesize, mdl_fmt, frame->width); ++ sws_scale(sws_ctx, (const uint8_t * const *)planar_data, middle_data_linesize, + 0, frame->height, frame->data, frame->linesize); + sws_freeContext(sws_ctx); + } +@@ -147,18 +162,21 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) + case AV_PIX_FMT_YUV411P: + case AV_PIX_FMT_GRAY8: + case AV_PIX_FMT_NV12: ++ case AV_PIX_FMT_YUV420P10LE: ++ av_assert0(comp_desc->depth == 8 || comp_desc->depth == 10); ++ dst_fmt = comp_desc->depth == 8 ? AV_PIX_FMT_GRAY8 : AV_PIX_FMT_GRAY10; + sws_ctx = sws_getContext(frame->width, + frame->height, +- AV_PIX_FMT_GRAYF32, ++ src_fmt, + frame->width, + frame->height, +- AV_PIX_FMT_GRAY8, ++ dst_fmt, + 0, NULL, NULL, NULL); + if (!sws_ctx) { + av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion " + "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", + av_get_pix_fmt_name(src_fmt), frame->width, frame->height, +- av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height); ++ av_get_pix_fmt_name(dst_fmt), frame->width, frame->height); + ret = AVERROR(EINVAL); + goto err; + } +@@ -186,9 +204,15 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) + void **src_data = NULL; + void *middle_data = NULL; + uint8_t *planar_data[4] = { 0 }; +- int plane_size = frame->width * frame->height * sizeof(uint8_t); ++ int plane_size = 0; + enum AVPixelFormat dst_fmt = AV_PIX_FMT_NONE; ++ enum AVPixelFormat src_fmt = AV_PIX_FMT_NONE; ++ enum AVPixelFormat mdl_fmt = AV_PIX_FMT_NONE; + int dst_datatype_size = get_datatype_size(input->dt); ++ const AVPixFmtDescriptor* pix_desc = av_pix_fmt_desc_get(frame->format); ++ const AVComponentDescriptor* comp_desc = &pix_desc->comp[0]; ++ int bits = comp_desc->depth; ++ const char *pix_fmt_name = av_get_pix_fmt_name(frame->format); + int bytewidth = av_image_get_linesize(frame->format, frame->width, 0); + if (bytewidth < 0) { + return AVERROR(EINVAL); +@@ -208,55 +232,61 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) + + src_data = (void **)frame->data; + linesize[0] = frame->linesize[0]; +- if (input->layout == DL_NCHW) { +- middle_data = av_malloc(plane_size * input->channels); +- if (!middle_data) { +- ret = AVERROR(ENOMEM); +- goto err; +- } +- src_data = &middle_data; +- linesize[0] = frame->width * 3; +- } ++ plane_size = linesize[0] * frame->height; + + switch (frame->format) { ++ case AV_PIX_FMT_RGB48LE: ++ case AV_PIX_FMT_BGR48LE: + case AV_PIX_FMT_RGB24: + case AV_PIX_FMT_BGR24: +- // convert data from planar to packed + if (input->layout == DL_NCHW) { ++ av_assert0(comp_desc->depth == 8 || comp_desc->depth == 16); ++ mdl_fmt = comp_desc->depth == 8 ? AV_PIX_FMT_GBRP : AV_PIX_FMT_GBRP16LE; ++ middle_data = av_malloc(plane_size * input->channels); ++ if (!middle_data) { ++ ret = AVERROR(ENOMEM); ++ goto err; ++ } ++ src_data = &middle_data; + sws_ctx = sws_getContext(frame->width, + frame->height, + frame->format, + frame->width, + frame->height, +- AV_PIX_FMT_GBRP, ++ mdl_fmt, + 0, NULL, NULL, NULL); + if (!sws_ctx) { + av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion " + "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", + av_get_pix_fmt_name(frame->format), frame->width, frame->height, +- av_get_pix_fmt_name(AV_PIX_FMT_GBRP),frame->width, frame->height); ++ av_get_pix_fmt_name(mdl_fmt),frame->width, frame->height); + ret = AVERROR(EINVAL); + goto err; + } +- if (frame->format == AV_PIX_FMT_RGB24) { ++ if (strstr(pix_fmt_name, "rgb") != NULL) { + planar_data[0] = (uint8_t *)middle_data + plane_size; + planar_data[1] = (uint8_t *)middle_data + plane_size * 2; + planar_data[2] = (uint8_t *)middle_data; +- } else if (frame->format == AV_PIX_FMT_BGR24) { ++ } else if (strstr(pix_fmt_name, "bgr") != NULL) { + planar_data[0] = (uint8_t *)middle_data + plane_size; + planar_data[1] = (uint8_t *)middle_data; + planar_data[2] = (uint8_t *)middle_data + plane_size * 2; ++ } else { ++ av_log(log_ctx, AV_LOG_ERROR, "dnn_process input data doesn't support this format: %s\n", pix_fmt_name); ++ return AVERROR(ENOSYS); + } ++ ++ int middle_data_linesize[4] = {0}; ++ ret = av_image_fill_linesizes(middle_data_linesize, mdl_fmt, frame->width); + sws_scale(sws_ctx, (const uint8_t * const *)frame->data, + frame->linesize, 0, frame->height, planar_data, +- (const int [4]){frame->width * sizeof(uint8_t), +- frame->width * sizeof(uint8_t), +- frame->width * sizeof(uint8_t), 0}); ++ middle_data_linesize); + sws_freeContext(sws_ctx); + } ++ src_fmt = comp_desc->depth == 8 ? AV_PIX_FMT_GRAY8 : AV_PIX_FMT_GRAY16; + sws_ctx = sws_getContext(frame->width * 3, + frame->height, +- AV_PIX_FMT_GRAY8, ++ src_fmt, + frame->width * 3, + frame->height, + dst_fmt, +@@ -264,8 +294,8 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) + if (!sws_ctx) { + av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion " + "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", +- av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height, +- av_get_pix_fmt_name(dst_fmt),frame->width * 3, frame->height); ++ av_get_pix_fmt_name(src_fmt), frame->width * 3, frame->height, ++ av_get_pix_fmt_name(dst_fmt), frame->width * 3, frame->height); + ret = AVERROR(EINVAL); + goto err; + } +@@ -287,9 +317,12 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) + case AV_PIX_FMT_YUV411P: + case AV_PIX_FMT_GRAY8: + case AV_PIX_FMT_NV12: ++ case AV_PIX_FMT_YUV420P10LE: ++ av_assert0(comp_desc->depth == 8 || comp_desc->depth == 10); ++ src_fmt = comp_desc->depth == 8 ? AV_PIX_FMT_GRAY8 : AV_PIX_FMT_GRAY10; + sws_ctx = sws_getContext(frame->width, + frame->height, +- AV_PIX_FMT_GRAY8, ++ src_fmt, + frame->width, + frame->height, + dst_fmt, +@@ -297,8 +330,8 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) + if (!sws_ctx) { + av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion " + "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", +- av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height, +- av_get_pix_fmt_name(dst_fmt),frame->width, frame->height); ++ av_get_pix_fmt_name(src_fmt), frame->width, frame->height, ++ av_get_pix_fmt_name(dst_fmt), frame->width, frame->height); + ret = AVERROR(EINVAL); + goto err; + } +diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h +index b030995a9b..6d077b94d7 100644 +--- a/libavfilter/dnn_interface.h ++++ b/libavfilter/dnn_interface.h +@@ -35,7 +35,7 @@ + + typedef enum {DNN_TF = 1, DNN_OV, DNN_IVSR} DNNBackendType; + +-typedef enum {DNN_FLOAT = 1, DNN_UINT8 = 4} DNNDataType; ++typedef enum {DNN_FLOAT = 1, DNN_UINT8 = 4 ,DNN_UINT16 = 8} DNNDataType; + + typedef enum { + DCO_NONE, +diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c +index 2b7656f21a..066b00a898 100644 +--- a/libavfilter/vf_dnn_processing.c ++++ b/libavfilter/vf_dnn_processing.c +@@ -87,6 +87,9 @@ static const enum AVPixelFormat pix_fmts[] = { + #else + AV_PIX_FMT_BGR24, + AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_BGR48LE, ++ AV_PIX_FMT_RGB48LE, ++ AV_PIX_FMT_YUV420P10LE, + AV_PIX_FMT_NONE + #endif + }; +@@ -122,6 +125,8 @@ static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLin + switch (fmt) { + case AV_PIX_FMT_RGB24: + case AV_PIX_FMT_BGR24: ++ case AV_PIX_FMT_RGB48LE: ++ case AV_PIX_FMT_BGR48LE: + if (model_input->channels != 3) { + LOG_FORMAT_CHANNEL_MISMATCH(); + return AVERROR(EIO); +@@ -135,6 +140,7 @@ static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLin + case AV_PIX_FMT_YUV410P: + case AV_PIX_FMT_YUV411P: + case AV_PIX_FMT_NV12: ++ case AV_PIX_FMT_YUV420P10LE: + if (model_input->channels != 1) { + LOG_FORMAT_CHANNEL_MISMATCH(); + return AVERROR(EIO); +@@ -197,13 +203,17 @@ static int prepare_uv_scale(AVFilterLink *outlink) + SWS_BICUBIC, NULL, NULL, NULL); + ctx->sws_uv_height = inlink->h >> 1; + } else { ++ av_assert0(AV_PIX_FMT_YUV420P10LE || fmt == AV_PIX_FMT_YUV420P); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); ++ const AVComponentDescriptor comp = desc->comp[0]; + int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h); + int sws_src_w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w); + int sws_dst_h = AV_CEIL_RSHIFT(outlink->h, desc->log2_chroma_h); + int sws_dst_w = AV_CEIL_RSHIFT(outlink->w, desc->log2_chroma_w); +- ctx->sws_uv_scale = sws_getContext(sws_src_w, sws_src_h, AV_PIX_FMT_GRAY8, +- sws_dst_w, sws_dst_h, AV_PIX_FMT_GRAY8, ++ ctx->sws_uv_scale = sws_getContext(sws_src_w, sws_src_h, ++ comp.depth == 10 ? AV_PIX_FMT_GRAY10 : AV_PIX_FMT_GRAY8, ++ sws_dst_w, sws_dst_h, ++ comp.depth == 10 ? AV_PIX_FMT_GRAY10 : AV_PIX_FMT_GRAY8, + SWS_BICUBIC, NULL, NULL, NULL); + ctx->sws_uv_height = sws_src_h; + } +diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c +index a5c9917799..c87eb560d3 100644 +--- a/libswscale/swscale_unscaled.c ++++ b/libswscale/swscale_unscaled.c +@@ -1710,6 +1710,98 @@ static int float_y_to_uint_y_wrapper(SwsContext *c, const uint8_t* src[], + return srcSliceH; + } + ++static int uint16_y_to_float_y_wrapper(SwsContext *c, const uint8_t *src[], ++ int srcStride[], int srcSliceY, ++ int srcSliceH, uint8_t *dst[], int dstStride[]) ++{ ++ int y, x; ++ ptrdiff_t srcStrideUint16 = srcStride[0] >> 1; ++ ptrdiff_t dstStrideFloat = dstStride[0] >> 2; ++ const uint16_t *srcPtr = (const uint16_t *)(src[0] + srcStride[0] * srcSliceY); ++ float *dstPtr = (float *)(dst[0] + dstStride[0] * srcSliceY); ++ const float float_norm_factor = 1.0f / 65535.0f; ++ ++ for (y = 0; y < srcSliceH; ++y) { ++ for (x = 0; x < c->srcW; ++x) { ++ dstPtr[x] = (float)srcPtr[x] * float_norm_factor; ++ } ++ srcPtr += srcStrideUint16; ++ dstPtr += dstStrideFloat; ++ } ++ ++ return srcSliceH; ++} ++ ++static int float_y_to_uint16_y_wrapper(SwsContext *c, const uint8_t* src[], ++ int srcStride[], int srcSliceY, ++ int srcSliceH, uint8_t* dst[], int dstStride[]) ++{ ++ int y, x; ++ ptrdiff_t srcStrideFloat = srcStride[0] >> 2; ++ ptrdiff_t dstStrideUint16 = dstStride[0] >> 1; ++ const float *srcPtr = (const float *)(src[0] + srcStride[0] * srcSliceY); ++ uint16_t *dstPtr = (uint16_t*)(dst[0] + dstStride[0] * srcSliceY); ++ ++ for (y = 0; y < srcSliceH; ++y) { ++ for (x = 0; x < c->srcW; ++x) { ++ dstPtr[x] = av_clip_uint16(lrintf(65535.0f * srcPtr[x])); ++ } ++ srcPtr += srcStrideFloat; ++ dstPtr += dstStrideUint16; ++ } ++ ++ return srcSliceH; ++} ++ ++static int uint10_y_to_float_y_wrapper(SwsContext *c, const uint8_t *src[], ++ int srcStride[], int srcSliceY, ++ int srcSliceH, uint8_t *dst[], int dstStride[]) ++{ ++ int y, x; ++ ptrdiff_t srcStrideUint16 = srcStride[0] >> 1; ++ ptrdiff_t dstStrideFloat = dstStride[0] >> 2; ++ const uint16_t *srcPtr = (const uint16_t *)(src[0] + srcStride[0] * srcSliceY); ++ float *dstPtr = (float *)(dst[0] + dstStride[0] * srcSliceY); ++ const float float_norm_factor = 1.0f / 1023.0f; ++ for (y = 0; y < srcSliceH; ++y) { ++ for (x = 0; x < c->srcW; ++x) { ++ dstPtr[x] = (float)srcPtr[x] * float_norm_factor; ++ } ++ ++ srcPtr += srcStrideUint16; ++ dstPtr += dstStrideFloat; ++ } ++ ++ return srcSliceH; ++} ++ ++static int float_y_to_uint10_y_wrapper(SwsContext *c, const uint8_t* src[], ++ int srcStride[], int srcSliceY, ++ int srcSliceH, uint8_t* dst[], int dstStride[]) ++{ ++ int y, x; ++ ptrdiff_t srcStrideFloat = srcStride[0] >> 2; ++ ptrdiff_t dstStrideUint16 = dstStride[0] >> 1; ++ const float *srcPtr = (const float *)(src[0] + srcStride[0] * srcSliceY); ++ uint16_t *dstPtr = (uint16_t*)(dst[0] + dstStride[0] * srcSliceY); ++ ++ for (y = 0; y < srcSliceH; ++y) { ++ for (x = 0; x < c->srcW; ++x) { ++ int value = lrintf(1023.0f * srcPtr[x]); ++ if (value < 0) { ++ value = 0; ++ } else if (value > 1023) { ++ value = 1023; ++ } ++ dstPtr[x] = (uint16_t)value; ++ } ++ srcPtr += srcStrideFloat; ++ dstPtr += dstStrideUint16; ++ } ++ ++ return srcSliceH; ++} ++ + /* unscaled copy like stuff (assumes nearly identical formats) */ + static int packedCopyWrapper(SwsContext *c, const uint8_t *src[], + int srcStride[], int srcSliceY, int srcSliceH, +@@ -2186,6 +2278,24 @@ void ff_get_unscaled_swscale(SwsContext *c) + c->convert_unscaled = float_y_to_uint_y_wrapper; + } + ++ /* 16bit Y to float Y */ ++ if (srcFormat == AV_PIX_FMT_GRAY16 && dstFormat == AV_PIX_FMT_GRAYF32){ ++ c->convert_unscaled = uint16_y_to_float_y_wrapper; ++ } ++ ++ /* float Y to 16bit Y */ ++ if (srcFormat == AV_PIX_FMT_GRAYF32 && dstFormat == AV_PIX_FMT_GRAY16){ ++ c->convert_unscaled = float_y_to_uint16_y_wrapper; ++ } ++ /* 10bit Y to float Y */ ++ if (srcFormat == AV_PIX_FMT_GRAY10 && dstFormat == AV_PIX_FMT_GRAYF32){ ++ c->convert_unscaled = uint10_y_to_float_y_wrapper; ++ } ++ ++ /* float Y to 10bit Y */ ++ if (srcFormat == AV_PIX_FMT_GRAYF32 && dstFormat == AV_PIX_FMT_GRAY10){ ++ c->convert_unscaled = float_y_to_uint10_y_wrapper; ++ } + /* LQ converters if -sws 0 or -sws 4*/ + if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)) { + /* yv12_to_yuy2 */ +-- +2.34.1 + diff --git a/ivsr_ffmpeg_plugin/patches/0023-enable-PrePostProcessing-of-OpenVINO-in-dnn_backend_.patch b/ivsr_ffmpeg_plugin/patches/0023-enable-PrePostProcessing-of-OpenVINO-in-dnn_backend_.patch new file mode 100644 index 0000000..3d9619b --- /dev/null +++ b/ivsr_ffmpeg_plugin/patches/0023-enable-PrePostProcessing-of-OpenVINO-in-dnn_backend_.patch @@ -0,0 +1,889 @@ +From 001b77050d3c035b24fd2c853dab839e9d8e778e Mon Sep 17 00:00:00 2001 +From: Xiaoxia Liang +Date: Mon, 9 Sep 2024 14:17:02 +0000 +Subject: [PATCH] enable PrePostProcessing of OpenVINO in dnn_backend_ivsr + +Signed-off-by: Xiaoxia Liang +--- + libavfilter/dnn/dnn_backend_ivsr.c | 493 ++++++++++++++++++++--------- + libavfilter/dnn/dnn_io_proc.c | 70 ++++ + libavfilter/vf_dnn_processing.c | 3 +- + 3 files changed, 408 insertions(+), 158 deletions(-) + +diff --git a/libavfilter/dnn/dnn_backend_ivsr.c b/libavfilter/dnn/dnn_backend_ivsr.c +index 997c1b803d..44d603190c 100644 +--- a/libavfilter/dnn/dnn_backend_ivsr.c ++++ b/libavfilter/dnn/dnn_backend_ivsr.c +@@ -126,6 +126,20 @@ static int get_datatype_size(DNNDataType dt) + } + } + ++static DNNColorOrder map_dnn_color_order(int format) { ++ switch (format) ++ { ++ case AV_PIX_FMT_RGB24: ++ case AV_PIX_FMT_RGB48: ++ return DCO_RGB; ++ case AV_PIX_FMT_BGR24: ++ case AV_PIX_FMT_BGR48: ++ return DCO_BGR; ++ default: ++ return DCO_NONE; ++ } ++} ++ + static int clamp(int val, int min, int max) { + if (val < min) + return min; +@@ -135,44 +149,54 @@ static int clamp(int val, int min, int max) { + return val; + } + +-static void convert_nchw_to_nhwc(void* data, int N, int C, int H, int W) { ++static void convert_nchw_to_nhwc(void* data, int N, int C, int H, int W, DNNDataType type) { + int data_size = N * C * H * W; +- void *temp = av_malloc(data_size * sizeof(float)); ++ int type_size = get_datatype_size(type); ++ data_size = data_size * type_size; ++ uint8_t *temp = av_malloc(data_size); + int max_threads = omp_get_num_procs() / 2; + // memory copy + #pragma omp parallel for num_threads(max_threads) + for (int i = 0; i < data_size; i++) +- ((float *)temp)[i] = ((float *)data)[i]; ++ temp[i] = ((uint8_t*)data)[i]; + +- // convert buffer from nchw to nhwc and reverse rgb to bgr ++ // convert buffer from nchw to nhwc + #pragma omp parallel num_threads(max_threads) + { + for (int n = 0; n < N; n++) + for (int h = omp_get_thread_num(); h < H; h += omp_get_num_threads()) +- for (int w = 0; w < W; w++) +- for (int c = 0; c < C; c++) +- ((float *)data)[n * H * W * C + h * W * C + w * C + c] = ((float *)temp)[n * C * H * W + (C - 1 - c) * H * W + h * W + w]; ++ for (int w = 0; w < W; w++) ++ for (int c = 0; c < C; c++) { ++ for (int byte = 0; byte < type_size; ++byte) ++ ((uint8_t*)data)[(n * H * W * C + h * W * C + w * C + c) * type_size + byte] = ++ temp[(n * C * H * W + c * H * W + h * W + w) * type_size + byte]; ++ } + } + av_free(temp); + } + +-static void convert_nhwc_to_nchw(void* data, int N, int C, int H, int W) { ++static void convert_nhwc_to_nchw(void* data, int N, int C, int H, int W, DNNDataType type) { + int data_size = N * C * H * W; +- void *temp = av_malloc(data_size * sizeof(float)); ++ int type_size = get_datatype_size(type); ++ data_size = data_size * type_size; ++ uint8_t *temp = av_malloc(data_size); + int max_threads = omp_get_num_procs() / 2; + // memory copy + #pragma omp parallel for num_threads(max_threads) + for (int i = 0; i < data_size; i++) +- ((float *)temp)[i] = ((float *)data)[i]; ++ temp[i] = ((uint8_t*)data)[i]; + +- // convert buffer from nhwc to nchw and reverse bgr to rgb ++ // convert buffer from nhwc to nchw + #pragma omp parallel num_threads(max_threads) + { + for (int n = 0; n < N; n++) +- for (int h = omp_get_thread_num(); h < H; h += omp_get_num_threads()) +- for (int w = 0; w < W; w++) +- for (int c = 0; c < C; c++) +- ((float *)data)[n * C * H * W + c * H * W + h * W + w] = ((float *)temp)[n * H * W * C + h * W * C + w * C + C - 1 - c]; ++ for (int h = omp_get_thread_num(); h < H; h += omp_get_num_threads()) ++ for (int w = 0; w < W; w++) ++ for (int c = 0; c < C; c++) { ++ for (int byte = 0; byte < type_size; ++byte) ++ ((uint8_t*)data)[(n * C * H * W + c * H * W + h * W + w) * type_size + byte] = ++ temp[(n * H * W * C + h * W * C + w * C + c) * type_size + byte]; ++ } + } + av_free(temp); + } +@@ -191,6 +215,75 @@ static void set_padding_value(void* data, uint32_t width, uint32_t height, uint3 + memset(data + index, padding_value, padding_height * n_width); + } + ++static size_t get_tensor_size(const tensor_desc_t* tensor) { ++ size_t tensor_size = 0; ++ size_t data_type_size = 0; ++ if (NULL == tensor || tensor->dimension <= 0) ++ return 0; ++ ++ if (strcmp(tensor->precision, "u8") == 0) { ++ data_type_size = sizeof(uint8_t); ++ } else if (strcmp(tensor->precision, "u16") == 0) { ++ data_type_size = sizeof(uint16_t); ++ } else if (strcmp(tensor->precision, "f32") == 0) { ++ data_type_size = sizeof(float); ++ } else { ++ av_assert0(!"not supported the precision yet."); ++ return 1; ++ } ++ ++ tensor_size = data_type_size; ++ for (int i = 0; i < tensor->dimension; ++i) { ++ tensor_size *= tensor->shape[i]; ++ } ++ return tensor_size; ++} ++/* ++ * set layout, precision, width, height and channels info accorring to tensor info ++*/ ++static void set_dnndata_info(DNNData *dnn_data, const tensor_desc_t* tensor) { ++ if (NULL == dnn_data || NULL == tensor) ++ return; ++ ++ // set layout and width, height and channels ++ if (strcmp(tensor->layout, "NHWC") == 0 || strcmp(tensor->layout, "[N,H,W,C]") == 0) { ++ dnn_data->layout = DL_NHWC; ++ dnn_data->channels = tensor->shape[3]; ++ dnn_data->height = tensor->shape[1]; ++ dnn_data->width = tensor->shape[2]; ++ } else if (strcmp(tensor->layout, "NCHW") == 0 || strcmp(tensor->layout, "[N,C,H,W]") == 0) { ++ dnn_data->layout = DL_NCHW; ++ dnn_data->channels = tensor->shape[1]; ++ dnn_data->height = tensor->shape[2]; ++ dnn_data->width = tensor->shape[3]; ++ } else if (strcmp(tensor->layout, "NFHWC") == 0 || strcmp(tensor->layout, "[N,F,H,W,C]") == 0) { ++ dnn_data->layout = DL_NHWC; ++ dnn_data->channels = tensor->shape[4]; ++ dnn_data->height = tensor->shape[2]; ++ dnn_data->width = tensor->shape[3]; ++ } else if (strcmp(tensor->layout, "NFCHW") == 0 || strcmp(tensor->layout, "[N,F,C,H,W]") == 0) { ++ dnn_data->layout = DL_NCHW; ++ dnn_data->channels = tensor->shape[2]; ++ dnn_data->height = tensor->shape[3]; ++ dnn_data->width = tensor->shape[4]; ++ } else { ++ av_assert0(!"DNNData not supported the layout yet."); ++ return; ++ } ++ ++ // set precision ++ if (strcmp(tensor->precision, "f32") == 0 || strcmp(tensor->precision, "fp32") == 0) { ++ dnn_data->dt = DNN_FLOAT; ++ } else if (strcmp(tensor->precision, "u8") == 0) { ++ dnn_data->dt = DNN_UINT8; ++ } else if (strcmp(tensor->precision, "u16") == 0){ ++ dnn_data->dt = DNN_UINT16; ++ } else { ++ av_assert0(!"DNNData not supported the precision yet."); ++ return; ++ } ++} ++ + /* returns + * DNN_GENERIC_ERROR, + * DNN_MORE_FRAMES - waiting for more input frames, +@@ -207,62 +300,49 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + TaskItem *task; + AVFrame *tmp_frame = NULL; + void *in_data = NULL; +- int dims[5] = { 0, 0, 0, 0, 0 }; + float normalize_factor = ctx->options.normalize_factor; + int padding_height = 0, padding_width = 0; +- +- status = ivsr_get_attr(ivsr_model->handle, INPUT_TENSOR_DESC, dims); ++ tensor_desc_t input_tensor_desc_get = { ++ .precision = {0}, ++ .layout = {0}, ++ .tensor_color_format = {0}, ++ .model_color_format = {0}, ++ .scale = 0.0, ++ .dimension = 0, ++ .shape = {0}}; ++ ++ status = ivsr_get_attr(ivsr_model->handle, INPUT_TENSOR_DESC, &input_tensor_desc_get); + if (status != OK) { + av_log(ctx, AV_LOG_ERROR, "Failed to get input dimensions\n"); + return DNN_GENERIC_ERROR; + } + +- switch (ivsr_model->model_type) { +- case BASICVSR: +- input.channels = dims[1]; +- input.height = dims[3]; +- input.width = dims[4]; +- input.dt = DNN_FLOAT; +- break; +- case VIDEOPROC: +- case EDSR: +- case CUSTVSR: +- input.channels = dims[2]; +- input.height = dims[3]; +- input.width = dims[4]; +- input.dt = DNN_FLOAT; +- break; +- case TSENET: +- //INFO:for TSENet, dims[2]==nif * channels, and nif==3 +- input.channels = dims[2] / 3; +- input.height = dims[3]; +- input.width = dims[4]; +- input.dt = DNN_FLOAT; +- break; +- default: +- av_log(ctx, AV_LOG_ERROR, "Not supported model type\n"); +- return DNN_GENERIC_ERROR; +- } ++ set_dnndata_info(&input, &input_tensor_desc_get); ++ if (ivsr_model->model_type == TSENET) ++ input.channels = input.channels / 3; + + input.data = request->in_frames; +- input.order = DCO_BGR; + in_data = input.data; +- input.scale = 0; ++ // ff_proc_from_frame_to_dnn: uint_8->uint8 requires scale == 1 and mean == 0 and dt == UINT8 ++ input.scale = 1; + input.mean = 0; +- input.layout = DL_NONE; ++ + ctx->model_input_height = input.height; + ctx->model_input_width = input.width; + + padding_height = ctx->model_input_height - ctx->frame_input_height; + padding_width = ctx->model_input_width - ctx->frame_input_width; + for (int i = 0; i < ctx->options.batch_size; ++i) { +- //INFO: for TSENET, lltask_queue contains (N-1)th and (N)th frames +- //so peek (N)th frame. ++ // INFO: for TSENET, lltask_queue contains (N-1)th and (N)th frames ++ // so peek (N)th frame. + lltask = ff_queue_peek_back(ivsr_model->lltask_queue); + if (!lltask) { + break; + } + task = lltask->task; ++ // the color order of input DNNData is same as format of in frame ++ input.order = map_dnn_color_order(task->in_frame->format); ++ + if (task->do_ioproc) { + if (ivsr_model->model->frame_pre_proc != NULL) { + ivsr_model->model->frame_pre_proc(task->in_frame, &input, +@@ -281,27 +361,28 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + } + input.data = in_data; + } +- if (ivsr_model->model_type == BASICVSR && dims[2] != 1) { ++ if (ivsr_model->model_type == BASICVSR && ivsr_model->nif != 1) { + int read_frame_num = 0; +- for (int j = 0; j < dims[2]; j++) { ++ for (int j = 0; j < ivsr_model->nif; j++) { + if (av_fifo_can_read(task->in_queue)) { + av_fifo_read(task->in_queue, &tmp_frame, 1); + ff_proc_from_frame_to_dnn(tmp_frame, &input, + ivsr_model->model-> + filter_ctx); + // convert buffer from NHWC to NCHW when C != 1 +- convert_nhwc_to_nchw(input.data, 1, input.channels, input.height, input.width); ++ if (input.layout != 1 && input.layout == DL_NONE ) ++ convert_nhwc_to_nchw(input.data, 1, input.channels, input.height, input.width, input.dt); + input.data += + input.height * input.width * +- input.channels * sizeof(float); ++ input.channels * get_datatype_size(input.dt); + read_frame_num++; + } + } + input.data = in_data; +- if (read_frame_num < dims[2]) ++ if (read_frame_num < ivsr_model->nif) + av_log(ctx, AV_LOG_ERROR, +- "Read frame number is %d less than the model requirement!!!\n", +- read_frame_num); ++ "Read frame number is %d less than the model requirement %d!!!\n", ++ read_frame_num, ivsr_model->nif); + } else if (ivsr_model->model_type == TSENET) { + //1. copy the input_frame(ref the buffer) and put into ivsr_model->fame_queue + tmp_frame = av_frame_alloc(); +@@ -329,8 +410,8 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + for (int idx = 0; idx < ivsr_model->nif; idx++) { + //INFO: the 3 frames in frame_queue are: (N-2)th, (N-1)th, (N)th + ff_proc_from_frame_to_dnn(input_frames[idx], &input, ivsr_model->model->filter_ctx); +- convert_nhwc_to_nchw(input.data, 1, input.channels, input.height, input.width); +- input.data += input.height * input.width * input.channels * sizeof(float); ++ convert_nhwc_to_nchw(input.data, 1, input.channels, input.height, input.width, input.dt); ++ input.data += input.height * input.width * input.channels * get_datatype_size(input.dt); + } + input.data = in_data; + //pop the (N-2)th frame from frame_queue and free it +@@ -349,10 +430,12 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + ff_proc_from_frame_to_dnn(task->in_frame, &input, + ivsr_model->model-> + filter_ctx); +- if (input.channels != 1) { +- convert_nhwc_to_nchw(input.data, 1, input.channels, input.height, input.width); ++ if (input.channels != 1 && (input.layout == DL_NONE)) { ++ convert_nhwc_to_nchw(input.data, 1, input.channels, input.height, input.width, input.dt); + } +- if (normalize_factor != 1) { ++ ++ if (normalize_factor != 1 && input.dt == DNN_FLOAT ++ && (input.scale > 1 || input.scale == 0)) { + // do not need to covert buffer from NHWC to NCHW if the channels is 1, only need to mulitple normalize_factor + #pragma omp parallel for + for (int pos = 0; pos < input.height * input.width * input.channels; pos++) { +@@ -387,43 +470,47 @@ static void infer_completion_callback(void *args) + IVSRContext *ctx = &ivsr_model->ctx; + AVFrame *tmp_frame = NULL; + int offset = 0; +- int dims[5] = { 0, 0, 0, 0, 0 }; + float normalize_factor = ctx->options.normalize_factor; +- +- status = ivsr_get_attr(ivsr_model->handle, OUTPUT_TENSOR_DESC, dims); ++ tensor_desc_t output_tensor_desc_get = { ++ .precision = {0}, ++ .layout = {0}, ++ .tensor_color_format = {0}, ++ .model_color_format = {0}, ++ .scale = 0.0, ++ .dimension = 0, ++ .shape = {0}}; ++ ++ // ivsr_get_attr can only get precision, layout, dimension and shape info ++ status = ivsr_get_attr(ivsr_model->handle, OUTPUT_TENSOR_DESC, &output_tensor_desc_get); + if (status != OK) { + av_log(ctx, AV_LOG_ERROR, "Failed to get output dimensions\n"); + return; + } + +- switch (ivsr_model->model_type) { ++ set_dnndata_info(&output, &output_tensor_desc_get); ++ ++ output.data = request->out_frames; ++ output.mean = 0; ++ // ff_proc_from_dnn_to_frame: float->uint8 require (scale == 255 or scale == 0) and mean == 0 ++ output.scale = output.dt == DNN_UINT8 ? 1 : 0; ++ // set order based on model type ++ switch (ivsr_model->model_type) ++ { + case BASICVSR: +- output.channels = dims[1]; +- output.height = dims[3]; +- output.width = dims[4]; +- break; + case VIDEOPROC: + case EDSR: +- case CUSTVSR: + case TSENET: +- output.channels = dims[2]; +- output.height = dims[3]; +- output.width = dims[4]; ++ output.order = DCO_RGB; + break; + default: +- av_log(ctx, AV_LOG_ERROR, "Not supported model type\n"); +- return; ++ output.order = DCO_NONE; ++ break; + } + +- output.dt = DNN_FLOAT; +- output.data = request->out_frames; +- output.scale = 0; +- output.mean = 0; +- output.layout = DL_NONE; + const AVPixFmtDescriptor* pix_desc = av_pix_fmt_desc_get(task->out_frame->format); + const AVComponentDescriptor* comp_desc = &pix_desc->comp[0]; + int bits = comp_desc->depth; +- av_assert0(request->lltask_count <= dims[0]); ++ av_assert0(request->lltask_count <= output_tensor_desc_get.shape[0]); + av_assert0(request->lltask_count >= 1); + for (int i = 0; i < request->lltask_count; ++i) { + task = request->lltasks[i]->task; +@@ -435,14 +522,14 @@ static void infer_completion_callback(void *args) + ivsr_model->model-> + filter_ctx); + } else { +- if (ivsr_model->model_type == BASICVSR && dims[2] != 1) { ++ if (ivsr_model->model_type == BASICVSR && ivsr_model->nif != 1) { + do { + int ret = + av_fifo_peek(task->out_queue, &tmp_frame, 1, + offset); + if (ret == 0) { +- if (output.channels != 1) { +- convert_nchw_to_nhwc(output.data, 1, output.channels, output.height, output.width); ++ if (output.channels != 1 && output.layout == DL_NONE) { ++ convert_nchw_to_nhwc(output.data, 1, output.channels, output.height, output.width, output.dt); + } + ff_proc_from_dnn_to_frame(tmp_frame, &output, + &ivsr_model->model-> +@@ -458,22 +545,25 @@ static void infer_completion_callback(void *args) + } + output.data += + output.height * output.width * +- output.channels * sizeof(float); ++ output.channels * get_datatype_size(output.dt); + } + offset++; +- } while (offset != dims[2]); ++ } while (offset != ivsr_model->nif); + } else { +- if (output.channels != 1) { ++ if (output.channels != 1 && output.layout == DL_NONE) { + //convert buffer from NCHW to NHWC +- convert_nchw_to_nhwc(output.data, 1, output.channels, output.height, output.width); ++ convert_nchw_to_nhwc(output.data, 1, output.channels, output.height, output.width, output.dt); + } +- if (normalize_factor != 1) { ++ ++ if (normalize_factor != 1 && output.dt == DNN_FLOAT ++ && (output.scale > 1 || output.scale == 0)) { + #pragma omp parallel for + // only need to devide by normalize_factor for channels = 1. + for (int pos = 0; pos < output.height * output.width * output.channels; pos++) { + ((float*)output.data)[pos] = ((float*)output.data)[pos] / normalize_factor; + } + } ++ + ff_proc_from_dnn_to_frame(task->out_frame, &output, + &ivsr_model->model-> + filter_ctx); +@@ -531,40 +621,27 @@ static int get_input_ivsr(void *model, DNNData * input, + IVSRModel *ivsr_model = model; + IVSRContext *ctx = &ivsr_model->ctx; + IVSRStatus status; +- int dims[5] = { 0, 0, 0, 0, 0 }; +- +- status = ivsr_get_attr(ivsr_model->handle, INPUT_TENSOR_DESC, dims); ++ tensor_desc_t input_tensor_desc_get = { ++ .precision = {0}, ++ .layout = {0}, ++ .tensor_color_format = {0}, ++ .model_color_format = {0}, ++ .scale = 0.0, ++ .dimension = 0, ++ .shape = {0}}; ++ ++ status = ivsr_get_attr(ivsr_model->handle, INPUT_TENSOR_DESC, &input_tensor_desc_get); + if (status != OK) { + av_log(ctx, AV_LOG_ERROR, "Failed to get input dimensions\n"); + return DNN_GENERIC_ERROR; + } + +- switch (ivsr_model->model_type) { +- case BASICVSR: +- input->channels = dims[1]; +- input->height = dims[3]; +- input->width = dims[4]; +- input->dt = DNN_FLOAT; +- break; +- case VIDEOPROC: +- case EDSR: +- case CUSTVSR: +- input->channels = dims[2]; +- input->height = dims[3]; +- input->width = dims[4]; +- input->dt = DNN_FLOAT; +- break; +- case TSENET: +- //INFO:for TSENet, dims[2] == nif * channels, and nif==3 +- input->channels = dims[2] / 3; +- input->height = dims[3]; +- input->width = dims[4]; +- input->dt = DNN_FLOAT; +- break; +- default: +- av_log(ctx, AV_LOG_ERROR, "Not supported model type\n"); +- return DNN_GENERIC_ERROR; +- } ++ set_dnndata_info(input, &input_tensor_desc_get); ++ if (ivsr_model->model_type == TSENET) ++ input->channels = input->channels / 3; ++ ++ // hard code to pass check_modelinput_inlink() that requires DNN_FLOAT of model_input->dt ++ input->dt = DNN_FLOAT; + + return 0; + } +@@ -648,29 +725,29 @@ static int get_output_ivsr(void *model, const char *input_name, + IVSRModel *ivsr_model = model; + IVSRContext *ctx = &ivsr_model->ctx; + IVSRStatus status; +- int dims[5] = { 0, 0, 0, 0, 0 }; +- +- status = ivsr_get_attr(ivsr_model->handle, OUTPUT_TENSOR_DESC, dims); ++ DNNData output; ++ tensor_desc_t output_tensor_desc_get = { ++ .precision = {0}, ++ .layout = {0}, ++ .tensor_color_format = {0}, ++ .model_color_format = {0}, ++ .scale = 0.0, ++ .dimension = 0, ++ .shape = {0}}; ++ ++ status = ivsr_get_attr(ivsr_model->handle, OUTPUT_TENSOR_DESC, &output_tensor_desc_get); + if (status != OK) { + av_log(ctx, AV_LOG_ERROR, "Failed to get output dimensions\n"); + return DNN_GENERIC_ERROR; + } + +- switch (ivsr_model->model_type) { +- case VIDEOPROC: +- *output_height = input_height; +- *output_width = input_width; +- break; +- case BASICVSR: +- case EDSR: +- case CUSTVSR: +- case TSENET: +- *output_height = dims[3]; +- *output_width = dims[4]; +- break; +- default: +- av_log(ctx, AV_LOG_ERROR, "Not supported model type\n"); +- return DNN_GENERIC_ERROR; ++ set_dnndata_info(&output, &output_tensor_desc_get); ++ *output_height = output.height; ++ *output_width = output.width; ++ ++ if (ivsr_model->model_type == VIDEOPROC) { ++ *output_height = input_height; ++ *output_width = input_width; + } + + return ret; +@@ -678,14 +755,20 @@ static int get_output_ivsr(void *model, const char *input_name, + + // Utility function to create and link config + static ivsr_config_t* create_and_link_config(ivsr_config_t *previous, +- int key, char *value, void *ctx) { ++ int key, void *value, void *ctx) { + ivsr_config_t *config = av_mallocz(sizeof(ivsr_config_t)); + if (config == NULL) { + av_log(ctx, AV_LOG_ERROR, "Failed to malloc config\n"); + return NULL; + } + config->key = key; +- config->value = value; ++ if (config->key == INPUT_TENSOR_DESC_SETTING ++ || config->key == OUTPUT_TENSOR_DESC_SETTING) { ++ config->value = (tensor_desc_t *)value; ++ } else { ++ config->value = (char *)value; ++ } ++ + if (previous != NULL) { + previous->next = config; + } +@@ -708,8 +791,24 @@ DNNModel *ff_dnn_load_model_ivsr(const char *model_filename, + ivsr_config_t *config_input_res = NULL; + ivsr_config_t *config_nireq = NULL; + int nif = 0; +- int input_dims[5] = { 0, 0, 0, 0, 1 }; +- int output_dims[5] = { 0, 0, 0, 0, 1 }; ++ ivsr_config_t *config_input_tensor = NULL; ++ ivsr_config_t *config_output_tensor = NULL; ++ tensor_desc_t input_tensor_desc_get = { ++ .precision = {0}, ++ .layout = {0}, ++ .tensor_color_format = {0}, ++ .model_color_format = {0}, ++ .scale = 0.0, ++ .dimension = 0, ++ .shape = {0}}; ++ tensor_desc_t output_tensor_desc_get = { ++ .precision = {0}, ++ .layout = {0}, ++ .tensor_color_format = {0}, ++ .model_color_format = {0}, ++ .scale = 0.0, ++ .dimension = 0, ++ .shape = {0}}; + + model = av_mallocz(sizeof(DNNModel)); + if (!model) { +@@ -775,9 +874,98 @@ DNNModel *ff_dnn_load_model_ivsr(const char *model_filename, + if (config_input_res == NULL) + goto err; + ++ tensor_desc_t input_tensor_desc_set = { ++ .precision = "u8", ++ .layout = "NHWC", ++ .tensor_color_format = {0}, ++ .model_color_format = {0}, ++ .scale = 0.0, ++ .dimension = 4, ++ .shape = {0, 0, 0, 0}}; ++ tensor_desc_t output_tensor_desc_set = { ++ .precision = "fp32", ++ .layout = "NHWC", ++ .tensor_color_format = {0}, ++ .model_color_format = {0}, ++ .scale = 0.0, ++ .dimension = 4, ++ .shape = {0, 0, 0, 0}}; ++ // set element type according to bit depth of frame ++ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); ++ switch (desc->comp[0].depth) ++ { ++ case 8: ++ strcpy(input_tensor_desc_set.precision, "u8"); ++ break; ++ case 10: ++ case 16: ++ strcpy(input_tensor_desc_set.precision, "u16"); ++ break; ++ default: ++ break; ++ } ++ // set layout for Basic_VSR ++ if (ivsr_model->model_type == BASICVSR) { ++ strcpy(input_tensor_desc_set.layout, "NFHWC"); ++ strcpy(output_tensor_desc_set.layout, "NFHWC"); ++ } ++ // set scale ++ if (fabsf(ctx->options.normalize_factor - 1) < 1e-6f) { ++ switch (desc->comp[0].depth) ++ { ++ case 8: ++ input_tensor_desc_set.scale = 255.0; ++ break; ++ case 10: ++ input_tensor_desc_set.scale = 1023.0; ++ break; ++ case 16: ++ input_tensor_desc_set.scale = 65535.0; ++ break; ++ default: ++ break; ++ } ++ } ++ ++ // set color format of input tensor ++ switch (inlink->format) ++ { ++ case AV_PIX_FMT_RGB24: ++ case AV_PIX_FMT_RGB48: ++ strcpy(input_tensor_desc_set.tensor_color_format, "RGB"); ++ break; ++ case AV_PIX_FMT_BGR24: ++ case AV_PIX_FMT_BGR48: ++ strcpy(input_tensor_desc_set.tensor_color_format, "BGR"); ++ break; ++ case AV_PIX_FMT_YUV420P: ++ case AV_PIX_FMT_YUV420P10LE: ++ strcpy(input_tensor_desc_set.tensor_color_format, "I420_Three_Planes"); ++ break; ++ default: ++ break; ++ } ++ // set color format of model required ++ switch (ivsr_model->model_type) ++ { ++ case BASICVSR: ++ case EDSR: ++ case VIDEOPROC: ++ case TSENET: ++ strcpy(input_tensor_desc_set.model_color_format, "RGB"); ++ break; ++ case CUSTVSR: ++ strcpy(input_tensor_desc_set.model_color_format, "I420_Three_Planes"); ++ break; ++ default: ++ break; ++ } ++ config_input_tensor = create_and_link_config(config_input_res, INPUT_TENSOR_DESC_SETTING, &input_tensor_desc_set, ctx); ++ config_output_tensor = create_and_link_config(config_input_tensor, OUTPUT_TENSOR_DESC_SETTING, &output_tensor_desc_set, ctx); ++ + char nireq_string[40] = {0}; + sprintf(nireq_string, "%d", ctx->options.nireq); +- config_nireq = create_and_link_config(config_input_res, INFER_REQ_NUMBER, nireq_string, ctx); ++ config_nireq = create_and_link_config(config_output_tensor, INFER_REQ_NUMBER, nireq_string, ctx); + if (config_nireq == NULL) + goto err; + +@@ -840,20 +1028,19 @@ DNNModel *ff_dnn_load_model_ivsr(const char *model_filename, + if(ivsr_model->model_type == TSENET) ivsr_model->nif = 3; + + status = +- ivsr_get_attr(ivsr_model->handle, INPUT_TENSOR_DESC, input_dims); ++ ivsr_get_attr(ivsr_model->handle, INPUT_TENSOR_DESC, &input_tensor_desc_get); + if (status != OK) { +- av_log(ctx, AV_LOG_ERROR, "Failed to get input dimensions\n"); ++ av_log(ctx, AV_LOG_ERROR, "Failed to get input tensor description\n"); + goto err; + } + + status = +- ivsr_get_attr(ivsr_model->handle, OUTPUT_TENSOR_DESC, output_dims); ++ ivsr_get_attr(ivsr_model->handle, OUTPUT_TENSOR_DESC, &output_tensor_desc_get); + if (status != OK) { +- av_log(ctx, AV_LOG_ERROR, "Failed to get output dimensions\n"); ++ av_log(ctx, AV_LOG_ERROR, "Failed to get output description\n"); + goto err; + } + +- + ivsr_model->request_queue = ff_safe_queue_create(); + if (!ivsr_model->request_queue) { + av_log(ctx, AV_LOG_ERROR, "Failed to create request queue\n"); +@@ -868,25 +1055,19 @@ DNNModel *ff_dnn_load_model_ivsr(const char *model_filename, + goto err; + } + +- //TODO: assume batch_size==1 +- item->in_frames = +- av_malloc(input_dims[0] * input_dims[1] * input_dims[2] * +- input_dims[3] * input_dims[4] * sizeof(float)); +- +- int input_byte_size = input_dims[0] * input_dims[1] * input_dims[2] * input_dims[3] * input_dims[4] * sizeof(float); +- memset(item->in_frames, 0, input_byte_size); ++ item->in_frames = av_malloc(get_tensor_size(&input_tensor_desc_get)); + if (!item->in_frames) { + av_log(ctx, AV_LOG_ERROR, "Failed to malloc in frames\n"); + goto err; + } ++ memset(item->in_frames, 0, get_tensor_size(&input_tensor_desc_get)); + +- item->out_frames = +- av_malloc(output_dims[0] * output_dims[1] * output_dims[2] * +- output_dims[3] * output_dims[4] * sizeof(float)); ++ item->out_frames = av_malloc(get_tensor_size(&output_tensor_desc_get)); + if (!item->out_frames) { + av_log(ctx, AV_LOG_ERROR, "Failed to malloc out frames\n"); + goto err; + } ++ memset(item->out_frames, 0 , get_tensor_size(&output_tensor_desc_get)); + + item->cb.ivsr_cb = infer_completion_callback; + item->cb.args = item; +diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c +index f51c0669a9..8dec6d97be 100644 +--- a/libavfilter/dnn/dnn_io_proc.c ++++ b/libavfilter/dnn/dnn_io_proc.c +@@ -40,6 +40,59 @@ static int get_datatype_size(DNNDataType dt) + } + } + ++static DNNColorOrder map_dnn_color_order(int format) { ++ switch (format) ++ { ++ case AV_PIX_FMT_RGB24: ++ case AV_PIX_FMT_RGB48: ++ return DCO_RGB; ++ case AV_PIX_FMT_BGR24: ++ case AV_PIX_FMT_BGR48: ++ return DCO_BGR; ++ default: ++ return DCO_NONE; ++ } ++} ++ ++// bgr<->rgb ++static void transpose(DNNData *input, DNNColorOrder dst_order) { ++ if (input->order == DCO_NONE || input->layout == DL_NONE ++ || dst_order == DCO_NONE || input->order == dst_order) ++ return; ++ ++ int H = input->height; ++ int W = input->width; ++ int C = input->channels; ++ void *data = input->data; ++ int a_index = 0, b_index = 0; ++ int type_size = get_datatype_size(input->dt); ++ //transpose bgr<->rgb for NHWC layout ++ if (input->layout == DL_NHWC) { ++ for (int h = 0; h < H; ++h) { ++ for (int w = 0; w < W; ++w) { ++ a_index = h * W * C + w * C; ++ b_index = a_index + (C - 1); ++ for (int byte = 0; byte < type_size; ++byte) { ++ uint8_t tmp = ((uint8_t*)data)[a_index * type_size + byte]; ++ ((uint8_t*)data)[a_index * type_size + byte] = ((uint8_t*)data)[b_index * type_size + byte]; ++ ((uint8_t*)data)[b_index * type_size + byte] = tmp; ++ } ++ } ++ } ++ // transpose bgr<->rgb for NCHW layout ++ } else if (input->layout == DL_NCHW) { ++ int plane_size = H * W * type_size; ++ void *tmp = av_malloc(plane_size); ++ memcpy(tmp, data, plane_size); ++ memcpy(data, data + (C - 1) * plane_size, plane_size); ++ memcpy(data + (C - 1) * plane_size, tmp, plane_size); ++ av_free(tmp); ++ } ++ ++ // re-set order ++ input->order = dst_order; ++} ++ + int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) + { + struct SwsContext *sws_ctx; +@@ -64,6 +117,9 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) + /* scale == 1 and mean == 0 and dt == UINT8: passthrough */ + if (fabsf(output->scale - 1) < 1e-6f && fabsf(output->mean) < 1e-6 && output->dt == DNN_UINT8) + src_fmt = AV_PIX_FMT_GRAY8; ++ /* scale == 1 and mean == 0 and dt == UINT16: passthrough */ ++ else if (fabsf(output->scale - 1) < 1e-6f && fabsf(output->mean) < 1e-6 && output->dt == DNN_UINT16) ++ src_fmt = AV_PIX_FMT_GRAY16; + /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */ + else if ((fabsf(output->scale - 255) < 1e-6f || fabsf(output->scale) < 1e-6f) && + fabsf(output->mean) < 1e-6 && output->dt == DNN_FLOAT) +@@ -74,6 +130,11 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) + return AVERROR(ENOSYS); + } + ++ DNNColorOrder dst_color_order = map_dnn_color_order(frame->format); ++ if (dst_color_order != output->order) { ++ transpose(output, dst_color_order); ++ } ++ + dst_data = (void **)frame->data; + linesize[0] = frame->linesize[0]; + plane_size = linesize[0] * frame->height; +@@ -220,6 +281,9 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) + /* scale == 1 and mean == 0 and dt == UINT8: passthrough */ + if (fabsf(input->scale - 1) < 1e-6f && fabsf(input->mean) < 1e-6 && input->dt == DNN_UINT8) + dst_fmt = AV_PIX_FMT_GRAY8; ++ /* scale == 1 and mean == 0 and dt == UINT16: passthrough */ ++ else if (fabsf(input->scale - 1) < 1e-6f && fabsf(input->mean) < 1e-6 && input->dt == DNN_UINT16) ++ dst_fmt = comp_desc->depth == 10 ? AV_PIX_FMT_GRAY10 : AV_PIX_FMT_GRAY16; + /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */ + else if ((fabsf(input->scale - 255) < 1e-6f || fabsf(input->scale) < 1e-6f) && + fabsf(input->mean) < 1e-6 && input->dt == DNN_FLOAT) +@@ -346,6 +410,12 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) + ret = AVERROR(ENOSYS); + goto err; + } ++ DNNColorOrder current_color_order = map_dnn_color_order(frame->format); ++ if (input->order != current_color_order) { ++ DNNColorOrder dst_color_order = input->order; ++ input->order = current_color_order; ++ transpose(input, dst_color_order); ++ } + err: + av_free(middle_data); + return ret; +diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c +index 066b00a898..9580f81cdb 100644 +--- a/libavfilter/vf_dnn_processing.c ++++ b/libavfilter/vf_dnn_processing.c +@@ -78,14 +78,13 @@ static av_cold int init(AVFilterContext *context) + + static const enum AVPixelFormat pix_fmts[] = { + #if 0 +- AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24, + AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAYF32, + AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, + AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, + AV_PIX_FMT_NV12, + AV_PIX_FMT_NONE + #else +- AV_PIX_FMT_BGR24, ++ AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24, + AV_PIX_FMT_YUV420P, + AV_PIX_FMT_BGR48LE, + AV_PIX_FMT_RGB48LE, +-- +2.34.1 + diff --git a/ivsr_ffmpeg_plugin/patches/0024-refine-RESHAPE_SETTINGS-support-Y-input-SVP-model.patch b/ivsr_ffmpeg_plugin/patches/0024-refine-RESHAPE_SETTINGS-support-Y-input-SVP-model.patch new file mode 100644 index 0000000..0a0274c --- /dev/null +++ b/ivsr_ffmpeg_plugin/patches/0024-refine-RESHAPE_SETTINGS-support-Y-input-SVP-model.patch @@ -0,0 +1,65 @@ +From be7d4bd47ed80fe4a0590c782b09587c4819a396 Mon Sep 17 00:00:00 2001 +From: Jerry Dong +Date: Wed, 18 Sep 2024 12:44:34 +0800 +Subject: [PATCH] refine RESHAPE_SETTINGS; support Y-input SVP model + +--- + libavfilter/dnn/dnn_backend_ivsr.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +diff --git a/libavfilter/dnn/dnn_backend_ivsr.c b/libavfilter/dnn/dnn_backend_ivsr.c +index 44d603190c..e7e1d5ea2a 100644 +--- a/libavfilter/dnn/dnn_backend_ivsr.c ++++ b/libavfilter/dnn/dnn_backend_ivsr.c +@@ -950,10 +950,15 @@ DNNModel *ff_dnn_load_model_ivsr(const char *model_filename, + { + case BASICVSR: + case EDSR: +- case VIDEOPROC: + case TSENET: + strcpy(input_tensor_desc_set.model_color_format, "RGB"); + break; ++ case VIDEOPROC: ++ if (desc->flags & AV_PIX_FMT_FLAG_RGB) ++ strcpy(input_tensor_desc_set.model_color_format, "RGB"); ++ else ++ strcpy(input_tensor_desc_set.model_color_format, "I420_Three_Planes"); ++ break; + case CUSTVSR: + strcpy(input_tensor_desc_set.model_color_format, "I420_Three_Planes"); + break; +@@ -969,25 +974,26 @@ DNNModel *ff_dnn_load_model_ivsr(const char *model_filename, + if (config_nireq == NULL) + goto err; + ++ //TODO: reshape setting follows NHW layout. Hardcode the batch_size as 1. + char shape_string[40] = {0}; + switch (ivsr_model->model_type) { + case BASICVSR: +- sprintf(shape_string, "1,3,3,%d,%d", frame_h, frame_w); ++ sprintf(shape_string, "1,%d,%d", frame_h, frame_w); + break; + case VIDEOPROC: + // the input resoultion required 8-aligned + frame_h = (frame_h + ALIGNED_SIZE - 1) / ALIGNED_SIZE * ALIGNED_SIZE; + frame_w = (frame_w + ALIGNED_SIZE - 1) / ALIGNED_SIZE * ALIGNED_SIZE; +- sprintf(shape_string, "1,3,%d,%d", frame_h, frame_w); ++ sprintf(shape_string, "1,%d,%d", frame_h, frame_w); + break; + case EDSR: +- sprintf(shape_string, "1,3,%d,%d", frame_h, frame_w); ++ sprintf(shape_string, "1,%d,%d", frame_h, frame_w); + break; + case CUSTVSR: +- sprintf(shape_string, "1,1,%d,%d", frame_h, frame_w); ++ sprintf(shape_string, "1,%d,%d", frame_h, frame_w); + break; + case TSENET: +- sprintf(shape_string, "1,9,%d,%d", frame_h, frame_w); ++ sprintf(shape_string, "1,%d,%d", frame_h, frame_w); + break; + default: + av_log(ctx, AV_LOG_ERROR, "Not supported model type\n"); +-- +2.34.1 + diff --git a/ivsr_ffmpeg_plugin/patches/0025-dnn_backend_ivsr-change-aligned-size-to-64-from-8.patch b/ivsr_ffmpeg_plugin/patches/0025-dnn_backend_ivsr-change-aligned-size-to-64-from-8.patch new file mode 100644 index 0000000..efdeb86 --- /dev/null +++ b/ivsr_ffmpeg_plugin/patches/0025-dnn_backend_ivsr-change-aligned-size-to-64-from-8.patch @@ -0,0 +1,26 @@ +From b058f5bcd7d6d68f9038a1016476fdf1b3566300 Mon Sep 17 00:00:00 2001 +From: Xiaoxia Liang +Date: Thu, 26 Sep 2024 22:47:25 +0800 +Subject: [PATCH] dnn_backend_ivsr: change aligned size to 64 from 8 + +Signed-off-by: Xiaoxia Liang +--- + libavfilter/dnn/dnn_backend_ivsr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/libavfilter/dnn/dnn_backend_ivsr.c b/libavfilter/dnn/dnn_backend_ivsr.c +index e7e1d5ea2a..246fe5eb2b 100644 +--- a/libavfilter/dnn/dnn_backend_ivsr.c ++++ b/libavfilter/dnn/dnn_backend_ivsr.c +@@ -109,7 +109,7 @@ static const AVOption dnn_ivsr_options[] = { + + AVFILTER_DEFINE_CLASS(dnn_ivsr); + +-#define ALIGNED_SIZE 8 ++#define ALIGNED_SIZE 64 + + static int get_datatype_size(DNNDataType dt) + { +-- +2.34.1 + diff --git a/ivsr_ffmpeg_plugin/patches/0026-Using-plugin-to-do-model-preprocessing-for-TSENet.patch b/ivsr_ffmpeg_plugin/patches/0026-Using-plugin-to-do-model-preprocessing-for-TSENet.patch new file mode 100644 index 0000000..076f9b1 --- /dev/null +++ b/ivsr_ffmpeg_plugin/patches/0026-Using-plugin-to-do-model-preprocessing-for-TSENet.patch @@ -0,0 +1,169 @@ +From 163300648bc861dc781c43e5a5d64985e5992386 Mon Sep 17 00:00:00 2001 +From: Jerry Dong +Date: Tue, 19 Nov 2024 14:10:45 +0800 +Subject: [PATCH] Using plugin to do model preprocessing for TSENet. + +--- + libavfilter/dnn/dnn_backend_ivsr.c | 40 ++++++++++++++++++++---------- + libavfilter/dnn/dnn_io_proc.c | 9 ++++--- + 2 files changed, 32 insertions(+), 17 deletions(-) + +diff --git a/libavfilter/dnn/dnn_backend_ivsr.c b/libavfilter/dnn/dnn_backend_ivsr.c +index 246fe5eb2b..4a0fab99cd 100644 +--- a/libavfilter/dnn/dnn_backend_ivsr.c ++++ b/libavfilter/dnn/dnn_backend_ivsr.c +@@ -103,6 +103,7 @@ static const AVOption dnn_ivsr_options[] = { + { "extension", "extension lib file full path, usable for BasicVSR model", OFFSET(options.extension), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS}, + { "op_xml", "custom op xml file full path, usable for BasicVSR model", OFFSET(options.op_xml), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS}, + { "model_type", "dnn model type", OFFSET(options.model_type), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MODEL_TYPE_NUM - 1, FLAGS}, ++ //TODO: replace "normalize_factor" with "scale" as defined in openvino backend + { "normalize_factor", "normalization factor", OFFSET(options.normalize_factor), AV_OPT_TYPE_FLOAT, { .dbl = 1.0 }, 1.0, 65535.0, FLAGS}, + { NULL } + }; +@@ -323,9 +324,10 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + + input.data = request->in_frames; + in_data = input.data; ++ //scale/mean can't be retrieved, so they're 0.0 by default + // ff_proc_from_frame_to_dnn: uint_8->uint8 requires scale == 1 and mean == 0 and dt == UINT8 +- input.scale = 1; +- input.mean = 0; ++ input.scale = input.dt == DNN_UINT8 ? 1.0f : 0.0f; ++ input.mean = 0.0f; + + ctx->model_input_height = input.height; + ctx->model_input_width = input.width; +@@ -370,7 +372,7 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + ivsr_model->model-> + filter_ctx); + // convert buffer from NHWC to NCHW when C != 1 +- if (input.layout != 1 && input.layout == DL_NONE ) ++ if (input.channels != 1 && input.layout == DL_NONE ) + convert_nhwc_to_nchw(input.data, 1, input.channels, input.height, input.width, input.dt); + input.data += + input.height * input.width * +@@ -410,7 +412,9 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + for (int idx = 0; idx < ivsr_model->nif; idx++) { + //INFO: the 3 frames in frame_queue are: (N-2)th, (N-1)th, (N)th + ff_proc_from_frame_to_dnn(input_frames[idx], &input, ivsr_model->model->filter_ctx); +- convert_nhwc_to_nchw(input.data, 1, input.channels, input.height, input.width, input.dt); ++ //NHWC->NCHW was processed in ff_proc_from_frame_to_dnn() if input.layout is set ++ if (input.channels != 1 && input.layout == DL_NONE ) ++ convert_nhwc_to_nchw(input.data, 1, input.channels, input.height, input.width, input.dt); + input.data += input.height * input.width * input.channels * get_datatype_size(input.dt); + } + input.data = in_data; +@@ -434,8 +438,8 @@ static int fill_model_input_ivsr(IVSRModel * ivsr_model, + convert_nhwc_to_nchw(input.data, 1, input.channels, input.height, input.width, input.dt); + } + +- if (normalize_factor != 1 && input.dt == DNN_FLOAT +- && (input.scale > 1 || input.scale == 0)) { ++ if (normalize_factor != 1 && input.dt == DNN_FLOAT && ++ (fabsf(input.scale - 1.0f) > 1e-6f || fabsf(input.scale) < 1e-6f)) { + // do not need to covert buffer from NHWC to NCHW if the channels is 1, only need to mulitple normalize_factor + #pragma omp parallel for + for (int pos = 0; pos < input.height * input.width * input.channels; pos++) { +@@ -490,9 +494,10 @@ static void infer_completion_callback(void *args) + set_dnndata_info(&output, &output_tensor_desc_get); + + output.data = request->out_frames; +- output.mean = 0; ++ //scale/mean can't be retrieved, so they're 0.0 by default ++ output.mean = 0.0f; + // ff_proc_from_dnn_to_frame: float->uint8 require (scale == 255 or scale == 0) and mean == 0 +- output.scale = output.dt == DNN_UINT8 ? 1 : 0; ++ output.scale = output.dt == DNN_UINT8 ? 1.0f : 0.0f; + // set order based on model type + switch (ivsr_model->model_type) + { +@@ -555,8 +560,8 @@ static void infer_completion_callback(void *args) + convert_nchw_to_nhwc(output.data, 1, output.channels, output.height, output.width, output.dt); + } + +- if (normalize_factor != 1 && output.dt == DNN_FLOAT +- && (output.scale > 1 || output.scale == 0)) { ++ if (normalize_factor != 1 && output.dt == DNN_FLOAT && ++ (fabsf(output.scale - 1.0f) > 1e-6f || fabsf(output.scale) < 1e-6f)) { + #pragma omp parallel for + // only need to devide by normalize_factor for channels = 1. + for (int pos = 0; pos < output.height * output.width * output.channels; pos++) { +@@ -890,6 +895,9 @@ DNNModel *ff_dnn_load_model_ivsr(const char *model_filename, + .scale = 0.0, + .dimension = 4, + .shape = {0, 0, 0, 0}}; ++ ++ // Through the setting of input/output_tensor_desc_set, we can config where ++ // to do the pre-processing, in plugin or in SDK(openvino). + // set element type according to bit depth of frame + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); + switch (desc->comp[0].depth) +@@ -904,13 +912,19 @@ DNNModel *ff_dnn_load_model_ivsr(const char *model_filename, + default: + break; + } +- // set layout for Basic_VSR ++ // customize layout for Basic_VSR and TSENet + if (ivsr_model->model_type == BASICVSR) { + strcpy(input_tensor_desc_set.layout, "NFHWC"); + strcpy(output_tensor_desc_set.layout, "NFHWC"); ++ } else if (ivsr_model->model_type == TSENET) { ++ //For TSENet, it's not typical N'C'HW, so do the NHWC->NCHW transion in plugin ++ strcpy(input_tensor_desc_set.layout, "NCHW"); ++ strcpy(input_tensor_desc_set.precision, "fp32"); + } +- // set scale +- if (fabsf(ctx->options.normalize_factor - 1) < 1e-6f) { ++ // set scale for non-float type of input ++ if (fabsf(ctx->options.normalize_factor - 1) < 1e-6f && ++ (strcmp(input_tensor_desc_set.precision, "u8") == 0 || ++ strcmp(input_tensor_desc_set.precision, "u16") == 0)) { + switch (desc->comp[0].depth) + { + case 8: +diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c +index 8dec6d97be..09fea90e20 100644 +--- a/libavfilter/dnn/dnn_io_proc.c ++++ b/libavfilter/dnn/dnn_io_proc.c +@@ -285,9 +285,10 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) + else if (fabsf(input->scale - 1) < 1e-6f && fabsf(input->mean) < 1e-6 && input->dt == DNN_UINT16) + dst_fmt = comp_desc->depth == 10 ? AV_PIX_FMT_GRAY10 : AV_PIX_FMT_GRAY16; + /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */ ++ //TODO: compare with "255" doesn't cover 10-bit case + else if ((fabsf(input->scale - 255) < 1e-6f || fabsf(input->scale) < 1e-6f) && + fabsf(input->mean) < 1e-6 && input->dt == DNN_FLOAT) +- dst_fmt = AV_PIX_FMT_GRAYF32; ++ dst_fmt = AV_PIX_FMT_GRAYF32; //float, 0.0f ~ 1.0f + else { + av_log(log_ctx, AV_LOG_ERROR, "dnn_process input data doesn't support type: UINT8 " + "scale: %f, mean: %f\n", input->scale, input->mean); +@@ -296,7 +297,6 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) + + src_data = (void **)frame->data; + linesize[0] = frame->linesize[0]; +- plane_size = linesize[0] * frame->height; + + switch (frame->format) { + case AV_PIX_FMT_RGB48LE: +@@ -306,6 +306,9 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) + if (input->layout == DL_NCHW) { + av_assert0(comp_desc->depth == 8 || comp_desc->depth == 16); + mdl_fmt = comp_desc->depth == 8 ? AV_PIX_FMT_GBRP : AV_PIX_FMT_GBRP16LE; ++ int middle_data_linesize[4] = {0}; ++ ret = av_image_fill_linesizes(middle_data_linesize, mdl_fmt, frame->width); ++ plane_size = middle_data_linesize[0] * frame->height; + middle_data = av_malloc(plane_size * input->channels); + if (!middle_data) { + ret = AVERROR(ENOMEM); +@@ -340,8 +343,6 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) + return AVERROR(ENOSYS); + } + +- int middle_data_linesize[4] = {0}; +- ret = av_image_fill_linesizes(middle_data_linesize, mdl_fmt, frame->width); + sws_scale(sws_ctx, (const uint8_t * const *)frame->data, + frame->linesize, 0, frame->height, planar_data, + middle_data_linesize); +-- +2.34.1 + diff --git a/ivsr_ov/license/LICENSE.md b/ivsr_ov/license/LICENSE.md deleted file mode 100644 index 14ac870..0000000 --- a/ivsr_ov/license/LICENSE.md +++ /dev/null @@ -1,31 +0,0 @@ -# BSD 3-Clause License - -Copyright (c) 2023, Intel Corporation -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -DISCLAIMER - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/ivsr_sdk/CMakeLists.txt b/ivsr_sdk/CMakeLists.txt index 30fead1..4f19a39 100644 --- a/ivsr_sdk/CMakeLists.txt +++ b/ivsr_sdk/CMakeLists.txt @@ -3,20 +3,32 @@ # cmake_minimum_required(VERSION 3.10) -project(IVSR DESCRIPTION "Intel Video Super Resolution SDK") - +project(IVSR DESCRIPTION "Intel IVSR SDK") +include(GNUInstallDirs) set(OUTPUT_FOLDER ${CMAKE_CURRENT_SOURCE_DIR}) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_FOLDER}/bin) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_FOLDER}/lib) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${OUTPUT_FOLDER}/lib) +SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g2 -ggdb") +SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall") + set(SDK_PRIVATE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/privates/include) add_subdirectory(src) -add_subdirectory(samples) -#add_subdirectory(tools) add_subdirectory(privates) +if(ENABLE_SAMPLE) + add_subdirectory(samples) +endif() + if(ENABLE_TEST) add_subdirectory(test) endif() + +install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include/" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" + FILES_MATCHING PATTERN "*.h" +) + +install(TARGETS ivsr LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") diff --git a/ivsr_sdk/README.md b/ivsr_sdk/README.md index f7396c5..53cc435 100644 --- a/ivsr_sdk/README.md +++ b/ivsr_sdk/README.md @@ -147,8 +147,8 @@ IVSRStatus ivsr_get_attr(ivsr_handle handle, IVSRAttrKey key, void* value); |Key name|Description| |:--|:--| |IVSR_VERSION|Use this key to get ivsr version.| - |INPUT_TENSOR_DESC|Use this key to get model input shape.| - |OUTPUT_TENSOR_DESC|Use this key to get model output shape.| + |INPUT_TENSOR_DESC|Use this key to get model input tensor description includes precision, layout and shape information.| + |OUTPUT_TENSOR_DESC|Use this key to get model output description includes precision, layout and shape information.| |NUM_INPUT_FRAMES|Use this key to get input frames number of the model.| |INPUT_DIMS|Use this key to get input dims of the model.| |OUTPUT_DIMS|Use this key to get input dims of the model.| @@ -191,7 +191,7 @@ The method deinitializes the handle and releases the resources allocated for iVS ## **VSR Sample** -There is a simple C++ sample to perform BasicVSR/EDSR/SVP inference on OpenVINO backend. You can reach the sample from `/ivsr_sdk/bin/vsr_sample`. You can get the help messages by running `./vsr_sample -h` and see the default settings of parameters. +There is a simple C++ sample to perform BasicVSR/EDSR/SVP inference on OpenVINO backend. Please add CMake option **-DENABLE_SAMPLE=ON** and build/install its dependency `OpenCV` component befor that. You can reach the sample from `/ivsr_sdk/bin/vsr_sample`. You can get the help messages by running `./vsr_sample -h` and see the default settings of parameters. |Option name|Desciption|Default value|Recommended value(s)| |:--|:--|:--|:--| @@ -205,7 +205,7 @@ There is a simple C++ sample to perform BasicVSR/EDSR/SVP inference on OpenVINO |save_path|Optional. Path to save predictions.|./outputs|If use the default value, make sure default path exists.| |save_predictions|Optional. Whether to save the results to save_path.|false|If this option exists, results will be saved.| |scale_factor|Optional. The ratio of the size of the image before scaling (original size) to the size of the image after scaling (new size).|2|For image enhancement model and if no resolution change, please set to 1.| -|normalize_factor|Optional. Normalization factor is equal to the value range required by models.|1.0|255.0 for Enhanced EDSR, 1.0 for other models.| +|normalize_factor|Optional. Normalization factor is equal to the value range required by models.|255.0|Must set to 1.0 for Enhanced EDSR.| |precision |Required for inference precision setting, but runtime precision you need to check with your HW platform.|f32|f32[FP32], f16[FP16], bf16[bf16].| |reshape_values|Optional. Reshape the network to fit the input image size. |None|Set the complete tensor value of the shape. e.g. --reshape_values="(1,3,720,1280)" in case your input image happens to be 1280x720 RGB 24bits| @@ -225,7 +225,7 @@ cd /ivsr_sdk/bin ### **Enhanced EDSR model Sample** ```bash cd /ivsr_sdk/bin -./vsr_sample --model_path=[your EDSR model.xml] --data_path=[folder contains low resolution images] --nig=1 --device=GPU --normalize_factor=255.0 --precision=f16 #need to set normalize_factor as 225.0 +./vsr_sample --model_path=[your EDSR model.xml] --data_path=[folder contains low resolution images] --nig=1 --device=GPU --normalize_factor=1.0 --precision=f16 #need to set normalize_factor as 1.0 ``` ### **SVP models Sample** ```bash diff --git a/ivsr_sdk/include/ivsr.h b/ivsr_sdk/include/ivsr.h index e62746b..11fe1b2 100644 --- a/ivsr_sdk/include/ivsr.h +++ b/ivsr_sdk/include/ivsr.h @@ -24,6 +24,7 @@ #include #include +#include /** * @brief vsr context @@ -39,7 +40,7 @@ typedef struct ivsr_callback { /** * @brief Intel VSR SDK version. - * + * */ typedef struct ivsr_version { const char *api_version; //!< A string representing ibasicvsr sdk version> @@ -47,7 +48,7 @@ typedef struct ivsr_version { /** * @brief Status for Intel VSR SDK - * + * */ typedef enum { OK = 0, @@ -61,19 +62,29 @@ typedef enum { /** * @enum vsr sdk supported key. - * + * There are multiple configurations which contain resolutions, + * INPUT_RES - it's for patch-based solution + * RESHAPE_SETTINGS - it's to reshape the model's input tensor, NHW in current version + * INPUT_TENSOR_DESC_SETTING - input data's tensor description + * OUTPUT_TENSOR_DESC_SETTING - output data's tensor description + * + * RESHAPE_SETTINGS carries data for BATCH, WIDTH, HEIGH, in NHW format. + * We may extent the type from one vector to a structure which specifies layout and different dimensions + * */ typedef enum { - INPUT_MODEL = 0x1, //!< Required, Path to the input model file> - TARGET_DEVICE = 0x2, //!< Required, device to run the inference> + INPUT_MODEL = 0x1, //!< Required. Path to the input model file> + TARGET_DEVICE = 0x2, //!< Required. Device to run the inference> BATCH_NUM = 0x3, //!< Not Enabled Yet> VERBOSE_LEVEL = 0x4, //!< Not Enabled Yet> - CUSTOM_LIB = 0x5, //!< Path to extension lib file, required for loading Extended BasicVSR model> - CLDNN_CONFIG = 0x6, //!< Path to custom op xml file, required for loading Extended BasicVSR model> - NUM_IFER_REQUEST = 0x7, //!< Not Enabled Yet> + CUSTOM_LIB = 0x5, //!< Optional. Path to extension lib file, required for loading Extended BasicVSR model> + CLDNN_CONFIG = 0x6, //!< Optional. Path to custom op xml file, required for loading Extended BasicVSR model> + INFER_REQ_NUMBER = 0x7, //!< Optional. To specify inference request number> PRECISION = 0x8, //!< Optional. To set inference precision for hardware> RESHAPE_SETTINGS = 0x9, //!< Optional. To set reshape setting for the input model> - INPUT_RES = 0xA, //!< Required, to specify the input frame resolution> + INPUT_RES = 0xA, //!< Required. To specify the input frame resolution> + INPUT_TENSOR_DESC_SETTING = 0xB, + OUTPUT_TENSOR_DESC_SETTING = 0xC }IVSRConfigKey; typedef enum { @@ -87,15 +98,23 @@ typedef enum { /** * @struct Intel VSR configuration. - * + * */ typedef struct ivsr_config { IVSRConfigKey key; - const char *value; + const void *value; struct ivsr_config *next; }ivsr_config_t; - +typedef struct tensor_desc { + char precision[20]; + char layout[20]; + char tensor_color_format[20]; + char model_color_format[20]; + float scale; + uint8_t dimension; + size_t shape[8]; +} tensor_desc_t; #ifdef __cplusplus extern "C" { @@ -103,48 +122,50 @@ extern "C" { /** * @brief initialize the intel vsr sdk - * + * * @param configs configurations to initialize the intel vsr sdk. * @param handle handle used to process frames. - * @return IVSRStatus + * @return IVSRStatus */ IVSRStatus ivsr_init(ivsr_config_t *configs, ivsr_handle *handle); /** * @brief process function - * + * * @param handle vsr process handle. * @param input_data input data buffer * @param output_data output data buffer * @param cb callback function. - * @return IVSRStatus + * @return IVSRStatus */ IVSRStatus ivsr_process(ivsr_handle handle, char* input_data, char* output_data, ivsr_cb_t* cb); +IVSRStatus ivsr_process_async(ivsr_handle handle, char* input_data, char* output_data, ivsr_cb_t* cb); + /** * @brief reset the configures for vsr - * + * * @param handle vsr process handle * @param configs changed configurations for vsr. - * @return IVSRStatus + * @return IVSRStatus */ IVSRStatus ivsr_reconfig(ivsr_handle handle, ivsr_config_t* configs); /** - * @brief get attributes - * + * @brief get attributes + * * @param handle vsr process handle * @param key indicate which type information to query. * @param value returned data. - * @return IVSRStatus + * @return IVSRStatus */ IVSRStatus ivsr_get_attr(ivsr_handle handle, IVSRAttrKey key, void* value); /** * @brief free created vsr handle and conresponding resources. - * + * * @param handle vsr process handle. - * @return IVSRStatus + * @return IVSRStatus */ IVSRStatus ivsr_deinit(ivsr_handle handle); diff --git a/ivsr_sdk/privates/model_guard.bin/libirguard.a-2024.5.0 b/ivsr_sdk/privates/model_guard.bin/libirguard.a-2024.5.0 new file mode 100644 index 0000000..3c89686 Binary files /dev/null and b/ivsr_sdk/privates/model_guard.bin/libirguard.a-2024.5.0 differ diff --git a/ivsr_sdk/samples/CMakeLists.txt b/ivsr_sdk/samples/CMakeLists.txt index 548c002..67af763 100644 --- a/ivsr_sdk/samples/CMakeLists.txt +++ b/ivsr_sdk/samples/CMakeLists.txt @@ -7,7 +7,6 @@ set (TARGET_NAME "vsr_sample") set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib) -SET(CMAKE_BUILD_TYPE "Debug") SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g2 -ggdb") SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall") @@ -17,7 +16,7 @@ target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/. target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../src/include/") target_link_libraries(${TARGET_NAME} PRIVATE ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libivsr.so) - +add_dependencies(vsr_sample ivsr) find_package(OpenCV REQUIRED) target_include_directories(${TARGET_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS}) diff --git a/ivsr_sdk/samples/vsr_sample.cpp b/ivsr_sdk/samples/vsr_sample.cpp index ccccf8c..865a1fa 100644 --- a/ivsr_sdk/samples/vsr_sample.cpp +++ b/ivsr_sdk/samples/vsr_sample.cpp @@ -39,10 +39,11 @@ const std::string keys = "{save_path |./outputs| Optional. Path to a folder to save predictions.}" "{cldnn_config | | Optional option. Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.}" "{nig |1| Optional. Number of input data groups for inference. }" - "{normalize_factor |1.0| Optional. Normalization factor is equal to the value range required by models, default is 1.0. 255.0 for Enhanced EDSR, 1.0 for other models.}" + "{normalize_factor |255.0| Optional. Normalization factor is equal to the value range required by models, default is 255.0.}" "{scale_factor |2| Optional. The ratio of the size of the image before scaling (original size) to the size of the image after scaling (new size).}" "{precision |f32| Optional. For inference precision.fp32:f32, fp16:f16, bf16:bf16}" "{reshape_values | | Optional. Reshape network to fit the input image size. e.g. --reshape_values=\"(1,3,720,1280)\"}" + "{num_infer_req | | Optional. Number of infer request number.}" ; bool checkPath(const std::string& path){ @@ -188,6 +189,33 @@ bool commandLineCheck(int argc, char**argv,std::string keys){ return true; } +void print_tensor_desc(const tensor_desc_t* tensor) { + if (!tensor) { + printf("Invalid tensor descriptor!\n"); + return; + } + + printf("Tensor Descriptor:\n"); + printf("Precision: %s\n", tensor->precision); + printf("Layout: %s\n", tensor->layout); + printf("Tensor Color Format: %s\n", tensor->tensor_color_format); + printf("Model Color Format: %s\n", tensor->model_color_format); + printf("Scale: %.2f\n", tensor->scale); + printf("Dimension: %u\n", tensor->dimension); + printf("Shape: ["); + + for (uint8_t i = 0; i < tensor->dimension; i++) { + printf("%zu", tensor->shape[i]); + if (i < tensor->dimension - 1) { + printf(", "); + } + } + + printf("]\n"); +} + +using IVSRFunction = std::function; + int main(int argc, char** argv){ // -------- Parsing and validation of input arguments -------- cv::CommandLineParser parser(argc, argv, keys); @@ -203,7 +231,13 @@ int main(int argc, char** argv){ bool save_predictions = parser.get("save_predictions"); std::string save_path = parser.get("save_path"); float normalize_factor = parser.get("normalize_factor"); - + const float NORMALFACTOR_MIN = 1.0; + const float NORMALFACTOR_MAX = 65535.0; + if(normalize_factor < NORMALFACTOR_MIN || normalize_factor > NORMALFACTOR_MAX) + { + std::cout << "Invalid normalize_factor value! Please enter a value between 1.0 and 255.0."<("scale_factor"); @@ -245,7 +279,7 @@ int main(int argc, char** argv){ // 1. set ivsr config std::list configs; - auto add_config = [&configs](IVSRConfigKey key, const char *value) { + auto add_config = [&configs](IVSRConfigKey key, const void *value) { auto new_config = new ivsr_config_t(); new_config->key = key; new_config->value = value; @@ -273,10 +307,40 @@ int main(int argc, char** argv){ auto reshape_settings = parser.get("reshape_values"); if (!reshape_settings.empty()) add_config(IVSRConfigKey::RESHAPE_SETTINGS, reshape_settings.c_str()); + auto nireq = parser.get("num_infer_req"); + if (!nireq.empty()) add_config(IVSRConfigKey::INFER_REQ_NUMBER, nireq.c_str()); + // in format "," std::string input_res = std::to_string(frameWidth) + "," + std::to_string(frameHeight); add_config(IVSRConfigKey::INPUT_RES, input_res.c_str()); + uint8_t dimension_set = 4; + std::string model_path_lower = model_path; + std::transform(model_path_lower.begin(), model_path_lower.end(), model_path_lower.begin(), ::tolower); + // basicvsr has 5 dimensions + if (model_path_lower.find("basicvsr") != std::string::npos) { + std::cout << "\"basicvsr\" is found in model_path." << std::endl; + dimension_set = 5; + } + + tensor_desc_t input_tensor_desc_set = {.precision = "u8", + .layout = "NHWC", + .tensor_color_format = "BGR", + .model_color_format = "RGB", + .scale = normalize_factor, + .dimension = dimension_set, + .shape = {0, 0, 0, 0}}; + tensor_desc_t output_tensor_desc_set = {.precision = "fp32", + .layout = "NCHW", + .tensor_color_format = {0}, + .model_color_format = {0}, + .scale = 0.0, + .dimension = dimension_set, + .shape = {0, 0, 0, 0}}; + + add_config(IVSRConfigKey::INPUT_TENSOR_DESC_SETTING, &input_tensor_desc_set); + add_config(IVSRConfigKey::OUTPUT_TENSOR_DESC_SETTING, &output_tensor_desc_set); + // 2. initialize ivsr ivsr_handle handle = nullptr; auto res = ivsr_init(*configs.begin(), &handle); @@ -285,6 +349,12 @@ int main(int argc, char** argv){ return -1; } + tensor_desc_t input_tensor_desc_get = {0}, output_tensor_desc_get = {0}; + ivsr_get_attr(handle, INPUT_TENSOR_DESC, &input_tensor_desc_get); + ivsr_get_attr(handle, OUTPUT_TENSOR_DESC, &output_tensor_desc_get); + print_tensor_desc(&input_tensor_desc_get); + print_tensor_desc(&output_tensor_desc_get); + int nif = 0; res = ivsr_get_attr(handle, IVSRAttrKey::NUM_INPUT_FRAMES, &nif); if(res < 0){ @@ -318,9 +388,9 @@ int main(int argc, char** argv){ /* how to check image size and model input? */ // refer to cv::dnn::blobFromImages - cv::Mat inputNCHW = blobFromImages(inMatList, normalize_factor/255.0, true); - int sz[] = { 1, nif, 3, oriHeight, oriWidth }; - cv::Mat inputImg(5, sz, CV_32F, (char *)inputNCHW.data); + cv::Mat inputNHWC = inMatList[0]; //blobFromImages(inMatList, normalize_factor/255.0, true); + int sz[] = { 1, nif, oriHeight, oriWidth, 3 }; + cv::Mat inputImg(5, sz, CV_8U, (char *)inputNHWC.data); inputDataList.push_back(inputImg.clone()); int outputSize[] = {1, nif, 3, oriHeight * scaleFactor, oriWidth * scaleFactor}; @@ -334,18 +404,20 @@ int main(int argc, char** argv){ #ifdef ENABLE_PERF auto totalStartTime = Time::now(); #endif - for (; id < inputDataList.size() && id < outputDataList.size();id++){ + + IVSRFunction process_fn = nireq.empty() ? ivsr_process : ivsr_process_async; + for (; id < inputDataList.size() && id < outputDataList.size(); id++) { auto inputImg = inputDataList[id]; auto outputImg = outputDataList[id]; ivsr_cb_t cb; auto startTime = Time::now(); - callback_args cb_args(0,nif,startTime); + callback_args cb_args(0, nif, startTime); cb.ivsr_cb = completion_callback; cb.args = (void*)(&cb_args); // 4. inference - auto result = ivsr_process(handle, (char *)inputImg.data, (char *)outputImg.data, &cb); - if(result < 0){ - std::cout <<"Failed to process the inference on input data seq." << id < outMatList; imagesFromBlob(outputNCHW, outMatList); // save group - for(int i = 0; i < nif; i ++){ + for (int i = 0; i < nif; i++) { std::string filePath = save_path + "/" + filePathList[idx * nif + i]; #ifdef ENABLE_LOG - std::cout << "[Trace]: " << "Saving image: " << filePath < QueueCallbackFunction; - class InferTask { public: using Ptr = std::shared_ptr; - //construct function - InferTask(char* inBuf, char* outBuf, QueueCallbackFunction callbackQueue, InferFlag flag): - inputPtr_(inBuf), outputPtr_(outBuf), _callbackQueue(callbackQueue), flag_(flag) {} + using QueueCallbackFunction = std::function; + // construct function + InferTask(char* inBuf, char* outBuf, QueueCallbackFunction callbackQueue, InferFlag flag, ivsr_cb_t* ivsr_cb) + : _callbackFunction(callbackQueue), + flag_(flag), + inputPtr_(inBuf), + outputPtr_(outBuf), + cb(ivsr_cb) {} - InferFlag getInferFlag() { return flag_; } + InferFlag getInferFlag() { + return flag_; + } double get_execution_time_in_milliseconds() const { auto execTime = std::chrono::duration_cast(_endTime - _startTime); return static_cast(execTime.count()) * 0.000001; } - + public: - QueueCallbackFunction _callbackQueue; - InferFlag flag_ = InferFlag::GPU; //Default will use GPU to do inference task - char* inputPtr_ = nullptr; //input buffer ptr - char* outputPtr_ = nullptr; //output buffer pointer + QueueCallbackFunction _callbackFunction; + InferFlag flag_ = InferFlag::GPU; // Default will use GPU to do inference task + char* inputPtr_ = nullptr; // input buffer ptr + char* outputPtr_ = nullptr; // output buffer pointer Time::time_point _startTime; Time::time_point _endTime; + ivsr_cb_t* cb = nullptr; }; #endif //INFER_TASK_HPP diff --git a/ivsr_sdk/src/include/engine.hpp b/ivsr_sdk/src/include/engine.hpp index 2c46daf..e2a3272 100644 --- a/ivsr_sdk/src/include/engine.hpp +++ b/ivsr_sdk/src/include/engine.hpp @@ -1,16 +1,16 @@ /******************************************************************************** -* INTEL CONFIDENTIAL -* Copyright (C) 2023 Intel Corporation -* -* This software and the related documents are Intel copyrighted materials, -* and your use of them is governed by the express license under -* which they were provided to you ("License").Unless the License -* provides otherwise, you may not use, modify, copy, publish, distribute, disclose or -* transmit this software or the related documents without Intel's prior written permission. -* -* This software and the related documents are provided as is, -* with no express or implied warranties, other than those that are expressly stated in the License. -*******************************************************************************/ + * INTEL CONFIDENTIAL + * Copyright (C) 2023 Intel Corporation + * + * This software and the related documents are Intel copyrighted materials, + * and your use of them is governed by the express license under + * which they were provided to you ("License").Unless the License + * provides otherwise, you may not use, modify, copy, publish, distribute, disclose or + * transmit this software or the related documents without Intel's prior written permission. + * + * This software and the related documents are provided as is, + * with no express or implied warranties, other than those that are expressly stated in the License. + *******************************************************************************/ /** * @file engine.h @@ -21,65 +21,92 @@ #ifndef COMMON_ENGINE_HPP #define COMMON_ENGINE_HPP -#include "utils.hpp" -#include "InferTask.hpp" +#include +#include #include #include -#include -#include + +#include "InferTask.hpp" +#include "utils.hpp" using namespace std; -template +template class engine { private: - // Function objects for type-erased calls to interface methods - std::function init_func; - std::function run_func; - std::function wait_all_func; + using InitFunc = std::function; + using RunFunc = std::function; + using ProcFunc = std::function; + using WaitAllFunc = std::function; + using CreateInferRequestsFunc = std::function; + using GetInferRequestsSizeFunc = std::function; + + InitFunc init_func; + RunFunc run_func; + ProcFunc proc_func; + WaitAllFunc wait_all_func; + CreateInferRequestsFunc create_infer_requests_func; + GetInferRequestsSizeFunc get_infer_requests_size_func; + Derived* _derived = nullptr; public: - // Template constructor binds the provided methods of the derived engine implementation engine(Derived* derived) - : _derived(derived), - init_func([=]() -> IBasicVSRStatus { return _derived->init_impl(); }), - run_func([=](InferTask::Ptr task) -> IBasicVSRStatus { return _derived->run_impl(task); }), - wait_all_func([=]() { _derived->wait_all_impl(); }) - {} - + : init_func([=]() -> IVSRStatus { + return _derived->init_impl(); + }), + run_func([=](InferTask::Ptr task) -> IVSRStatus { + return _derived->run_impl(task); + }), + proc_func([=](void* input, void* output, void* cb) -> IVSRStatus { + return _derived->process_impl(input, output, cb); + }), + wait_all_func([=]() { + _derived->wait_all_impl(); + }), + create_infer_requests_func([=](size_t requests_num) -> IVSRStatus { + return _derived->create_infer_requests_impl(requests_num); + }), + get_infer_requests_size_func([=]() -> size_t { + return _derived->get_infer_requests_size_impl(); + }), + _derived(derived) {} + + // Default constructor engine() = default; - // Public interface methods call the type-erased std::function members - IBasicVSRStatus init() { + IVSRStatus init() { return init_func(); } - IBasicVSRStatus run(InferTask::Ptr task) { + IVSRStatus run(InferTask::Ptr task) { return run_func(task); } - // The templated get_attr method delegates to the derived class's method + IVSRStatus proc(void* input_data, void* output_data, void* cb) { + return proc_func(input_data, output_data, cb); + } + template - IBasicVSRStatus get_attr(const std::string& key, T& value) { - // Using CRTP style static_cast to delegate to the actual implementation provided by the derived class - // For this to work, the derived class must implement get_attr_impl with the appropriate signature + IVSRStatus get_attr(const std::string& key, T& value) { return _derived->get_attr_impl(key, value); } void wait_all() { - wait_all_func(); + return wait_all_func(); } - Derived* get_impl() const { return _derived; } - - IBasicVSRStatus create_infer_requests(size_t requests_num) { - return _derived->create_infer_requests_impl(requests_num); + IVSRStatus create_infer_requests(size_t requests_num) { + return create_infer_requests_func(requests_num); } size_t get_infer_requests_size() { - return _derived->get_infer_requests_size_impl(); + return get_infer_requests_size_func(); + } + + Derived* get_impl() const { + return _derived; } }; -#endif //COMMON_ENGINE_HPP +#endif // COMMON_ENGINE_HPP diff --git a/ivsr_sdk/src/include/ivsr_smart_patch.hpp b/ivsr_sdk/src/include/ivsr_smart_patch.hpp index ab0e17b..8886eb0 100644 --- a/ivsr_sdk/src/include/ivsr_smart_patch.hpp +++ b/ivsr_sdk/src/include/ivsr_smart_patch.hpp @@ -35,9 +35,9 @@ struct PatchConfig{ int nif; int dims; PatchConfig(int w = 1920, int h = 1080, int pw = 1920, int ph = 1080, int b_w = 1,int b_h = 1,int s = 2,int n = 3,int d = 5)\ - :patchWidth(pw),patchHeight(ph), - block_h(b_h),block_w(b_w),scale(s), - nif(n),dims(d){} + :patchWidth(pw), patchHeight(ph), + block_w(b_w), block_h(b_h), scale(s), + nif(n), dims(d){} friend std::ostream& operator<<(std::ostream& os, const PatchConfig& cfg) { return os << "PatchConfig [width]:" << cfg.patchWidth << " [height]:" << cfg.patchHeight diff --git a/ivsr_sdk/src/include/ov_engine.hpp b/ivsr_sdk/src/include/ov_engine.hpp index 74e0340..147e5d5 100644 --- a/ivsr_sdk/src/include/ov_engine.hpp +++ b/ivsr_sdk/src/include/ov_engine.hpp @@ -1,16 +1,16 @@ /******************************************************************************** -* INTEL CONFIDENTIAL -* Copyright (C) 2023 Intel Corporation -* -* This software and the related documents are Intel copyrighted materials, -* and your use of them is governed by the express license under -* which they were provided to you ("License").Unless the License -* provides otherwise, you may not use, modify, copy, publish, distribute, disclose or -* transmit this software or the related documents without Intel's prior written permission. -* -* This software and the related documents are provided as is, -* with no express or implied warranties, other than those that are expressly stated in the License. -*******************************************************************************/ + * INTEL CONFIDENTIAL + * Copyright (C) 2023 Intel Corporation + * + * This software and the related documents are Intel copyrighted materials, + * and your use of them is governed by the express license under + * which they were provided to you ("License").Unless the License + * provides otherwise, you may not use, modify, copy, publish, distribute, disclose or + * transmit this software or the related documents without Intel's prior written permission. + * + * This software and the related documents are provided as is, + * with no express or implied warranties, other than those that are expressly stated in the License. + *******************************************************************************/ /** * @file openvino_engine.h @@ -23,22 +23,24 @@ #include #include + #include "engine.hpp" +#include "openvino/core/layout.hpp" #include "openvino/openvino.hpp" -#include "openvino/pass/manager.hpp" #include "openvino/pass/make_stateful.hpp" -#include "openvino/core/layout.hpp" +#include "openvino/pass/manager.hpp" -typedef std::function - CallbackFunction; +typedef std::function CallbackFunction; class inferReqWrap final { public: using Ptr = std::shared_ptr; - explicit inferReqWrap(ov::CompiledModel& model, size_t id,CallbackFunction callback) - : id_(id), request_(model.create_infer_request()),callback_(callback) {} + explicit inferReqWrap(ov::CompiledModel& model, size_t id, CallbackFunction callback) + : request_(model.create_infer_request()), + id_(id), + callback_(callback) {} void start_async() { - startTime_ = Time::now(); + startTime_ = Time::now(); request_.start_async(); } @@ -82,9 +84,10 @@ class inferReqWrap final { void set_callback(std::function callback) { request_.set_callback(std::move(callback)); } - void call_back(){ + void call_back() { callback_(id_); } + private: ov::InferRequest request_; size_t id_; @@ -93,62 +96,84 @@ class inferReqWrap final { CallbackFunction callback_; }; - class ov_engine : public engine { public: ov_engine(std::string device, std::string model_path, std::string custom_lib, std::map configs, - const std::vector& reshape_settings) + const std::vector& reshape_settings, + const tensor_desc_t input_tensor_desc, + const tensor_desc_t output_tensor_desc) : engine(this), device_(device), - model_path_(model_path), - custom_lib_(custom_lib), configs_(configs), - reshape_settings_(reshape_settings) { - init(); + reshape_settings_(reshape_settings), + input_tensor_desc_(input_tensor_desc), + output_tensor_desc_(output_tensor_desc), + custom_lib_(custom_lib), + model_path_(model_path) { + // init(); } - IBasicVSRStatus init_impl(); + IVSRStatus init_impl(); + + IVSRStatus run_impl(InferTask::Ptr task); - IBasicVSRStatus run_impl(InferTask::Ptr task); + IVSRStatus process_impl(void* input_data, void* output_data, void* cb = nullptr); template - IBasicVSRStatus get_attr_impl(const std::string& key, T& value) { - static_assert(std::is_same::value || std::is_same::value, + IVSRStatus get_attr_impl(const std::string& key, T& value) { + static_assert(std::is_same::value || std::is_same::value || + std::is_same::value, "get_attr() is only supported for 'ov::Shape' and 'size_t' types"); - +/* auto extend_shape = [](ov::Shape& shape, size_t dims) { if (shape.size() < dims) for (size_t i = shape.size(); i < dims; i++) shape.insert(shape.begin(), 1); }; - - if constexpr (std::is_same::value) { - if (key == "model_inputs" || key == "model_outputs") { - ov::Shape shape = (key == "model_inputs") ? input_.get_shape() : output_.get_shape(); - extend_shape(shape, size_t{5}); - value = shape; +*/ + + if constexpr (std::is_same::value) { + ov::Shape shape; + std::string element_type; + std::string layout; + ov::Output node; + if (key == "model_inputs") { + node = input_; + } else if (key == "model_outputs") { + node = output_; } else { - return ERROR; + return UNSUPPORTED_KEY; + } + + layout = ov::layout::get_layout(node).to_string(); + shape = node.get_shape(); + element_type = node.get_element_type().get_type_name(); + memcpy((char*)value.precision, element_type.c_str(), element_type.size()); + memcpy((char*)value.layout, layout.c_str(), layout.size()); + value.dimension = shape.size(); + for (auto i = 0u; i < shape.size(); ++i) { + value.shape[i] = shape[i]; } } else if constexpr (std::is_same::value) { if (key == "input_dims" || key == "output_dims") { const auto& shape = (key == "input_dims") ? input_.get_shape() : output_.get_shape(); value = shape.size() < 5 ? 5 : shape.size(); } else { - return ERROR; + return UNSUPPORTED_KEY; } } - return SUCCESS; + return OK; } inferReqWrap::Ptr get_idle_request() { std::unique_lock lock(mutex_); #ifdef ENABLE_LOG - std::cout << "[Trace]: " << "idleIds size: " < 0; @@ -157,18 +182,22 @@ class ov_engine : public engine { idleIds_.pop(); return request; } - void put_idle_request(size_t id){ + + void put_idle_request(size_t id) { std::unique_lock lock(mutex_); idleIds_.push(id); #ifdef ENABLE_LOG - std::cout << "[Trace]: " << "put_idle_request: idleIds size: " << idleIds_.size() << std::endl; + std::cout << "[Trace]: " + << "put_idle_request: idleIds size: " << idleIds_.size() << std::endl; #endif cv_.notify_one(); } void wait_all_impl() { #ifdef ENABLE_LOG - std::cout << "[Trace]: " << "ov_engine wait_all: " << "idleIds_ size:" << idleIds_.size() << " requests_ size:" << requests_.size() << std::endl; + std::cout << "[Trace]: " + << "ov_engine wait_all: " + << "idleIds_ size:" << idleIds_.size() << " requests_ size:" << requests_.size() << std::endl; #endif std::unique_lock lock(mutex_); cv_.wait(lock, [this] { @@ -176,13 +205,13 @@ class ov_engine : public engine { }); } - IBasicVSRStatus create_infer_requests_impl(size_t requests_num); + IVSRStatus create_infer_requests_impl(size_t requests_num); const size_t get_infer_requests_size_impl() { return requests_.size(); } - ~ov_engine(){ + ~ov_engine() { requests_.clear(); } @@ -192,11 +221,13 @@ class ov_engine : public engine { std::vector requests_; std::mutex mutex_; std::condition_variable cv_; - //configurations for openvino instances. + // configurations for openvino instances. std::map configs_; ov::Core instance_; ov::CompiledModel compiled_model_; std::vector reshape_settings_; + tensor_desc_t input_tensor_desc_; + tensor_desc_t output_tensor_desc_; std::string custom_lib_; std::string model_path_; @@ -205,4 +236,4 @@ class ov_engine : public engine { ov::Output output_; }; -#endif //OV_ENGINE_HPP +#endif // OV_ENGINE_HPP diff --git a/ivsr_sdk/src/include/threading/ivsr_thread_executor.hpp b/ivsr_sdk/src/include/threading/ivsr_thread_executor.hpp index 62d3d39..26541be 100644 --- a/ivsr_sdk/src/include/threading/ivsr_thread_executor.hpp +++ b/ivsr_sdk/src/include/threading/ivsr_thread_executor.hpp @@ -30,22 +30,19 @@ namespace IVSRThread { - // using Task = std::function; - using Task = InferTask::Ptr; +// using Task = std::function; +using Task = InferTask::Ptr; - struct Config { - std::string _name; - int _threads = 5; //!< Number of threads. +struct Config { + std::string _name; + int _threads = 5; //!< Number of threads. - Config(std::string name = "IVSRThreadsExecutor", - int threads = 1): - _name(name), - _threads(threads){}; - }; + Config(std::string name = "IVSRThreadsExecutor", int threads = 1) : _name(name), _threads(threads){}; +}; /** * @class IVSRThreadExecutor - * @brief Thread executor implementation. + * @brief Thread executor implementation. * It implements a common thread pool. */ class IVSRThreadExecutor { @@ -54,6 +51,7 @@ class IVSRThreadExecutor { * @brief A shared pointer to a IVSRThreadExecutor object */ using Ptr = std::shared_ptr; + using CallbackFunc = std::function; /** * @brief Constructor @@ -68,27 +66,27 @@ class IVSRThreadExecutor { /** * @brief interface to enqueue task - */ + */ void Enqueue(Task task); /** * @brief interface to execute the task - */ + */ void Execute(Task task); /** * @brief interface to create task - */ - Task CreateTask(char* inBuf, char* outBuf, InferFlag flag); + */ + Task CreateTask(char* inBuf, char* outBuf, InferFlag flag, ivsr_cb_t* cb = NULL); /** * @brief interface to sync all the tasks - */ + */ void wait_all(int patchSize); /** * @brief interface to get total duration - */ + */ double get_duration_in_milliseconds(); private: diff --git a/ivsr_sdk/src/include/threading/ivsr_thread_local.hpp b/ivsr_sdk/src/include/threading/ivsr_thread_local.hpp index bc4172b..a039d95 100644 --- a/ivsr_sdk/src/include/threading/ivsr_thread_local.hpp +++ b/ivsr_sdk/src/include/threading/ivsr_thread_local.hpp @@ -28,7 +28,6 @@ namespace IVSRThread { - template struct ThreadLocal { using Map = std::unordered_map; @@ -85,13 +84,13 @@ struct ThreadLocal { auto operator*() -> decltype(it->second) { return it->second; } - auto operator-> () -> decltype(&(it->second)) { + auto operator->() -> decltype(&(it->second)) { return &(it->second); } auto operator*() const -> decltype(it->second) { return it->second; } - auto operator-> () const -> decltype(&(it->second)) { + auto operator->() const -> decltype(&(it->second)) { return &(it->second); } }; @@ -110,6 +109,4 @@ struct ThreadLocal { } }; -// #endif - -} // namespace InferenceEngine +} // namespace IVSRThread diff --git a/ivsr_sdk/src/include/utils.hpp b/ivsr_sdk/src/include/utils.hpp index 3a9c9f6..c1381d6 100644 --- a/ivsr_sdk/src/include/utils.hpp +++ b/ivsr_sdk/src/include/utils.hpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -68,28 +69,29 @@ inline std::vector split(const std::string& s, char delim) { }; inline void ivsr_status_log(IVSRStatus status, const char* log) { - switch (status) { - case IVSRStatus::GENERAL_ERROR: - std::cout << "[General Error] NULL pointer exception " << log << "." << std::endl; - break; - case IVSRStatus::UNSUPPORTED_KEY: - std::cout << "[Error] Unsupported keys " << log << ", please check the input keys." << std::endl; - break; - case IVSRStatus::UNSUPPORTED_CONFIG: - std::cout << "[Error] Unsupported configs " << log << ", please check the input configs." << std::endl; - break; - case IVSRStatus::UNKNOWN_ERROR: - std::cout << "[Unknown Error] Process failed " << log << "." << std::endl; - break; - case IVSRStatus::EXCEPTION_ERROR: - std::cout << "[Exceptoin] Exception occurred " << log << "." << std::endl; - break; - case IVSRStatus::UNSUPPORTED_SHAPE: - std::cout << "[Error] Unsupported input shape " << log << ", please check the input frame's size." << std::endl; - break; - - default: - break; + static const std::unordered_map status_messages = { + {IVSRStatus::GENERAL_ERROR, "[General Error] Generic error occurred"}, + {IVSRStatus::UNSUPPORTED_KEY, "[Error] Unsupported keys"}, + {IVSRStatus::UNSUPPORTED_CONFIG, "[Error] Unsupported configs"}, + {IVSRStatus::UNKNOWN_ERROR, "[Unknown Error] Process failed"}, + {IVSRStatus::EXCEPTION_ERROR, "[Exception] Exception occurred"}, + {IVSRStatus::UNSUPPORTED_SHAPE, "[Error] Unsupported input shape"} + }; + + auto it = status_messages.find(status); + if (it != status_messages.end()) { + std::cout << it->second << " " << log; + + // Additional messages for specific statuses + if (status == IVSRStatus::UNSUPPORTED_KEY) { + std::cout << ", please check the input keys."; + } else if (status == IVSRStatus::UNSUPPORTED_CONFIG) { + std::cout << ", please check the input configs."; + } else if (status == IVSRStatus::UNSUPPORTED_SHAPE) { + std::cout << ", please check the input frame's size."; + } + + std::cout << "." << std::endl; } } diff --git a/ivsr_sdk/src/ivsr.cpp b/ivsr_sdk/src/ivsr.cpp index 489d1ef..a38a252 100644 --- a/ivsr_sdk/src/ivsr.cpp +++ b/ivsr_sdk/src/ivsr.cpp @@ -45,13 +45,16 @@ std::vector parse_devices(const std::string& device_string) { return result; } -void parse_engine_config(std::map &config, std::string device, std::string infer_precision, std::string cldnn_config){ +void parse_engine_config(std::map& config, + const std::string& device, + const std::string& infer_precision, + const std::string& cldnn_config) { auto getDeviceTypeFromName = [](std::string device) -> std::string { return device.substr(0, device.find_first_of(".(")); }; - if(device.find("GPU") != std::string::npos && !cldnn_config.empty()){ + if (device.find("GPU") != std::string::npos && !cldnn_config.empty()) { if (!config.count("GPU")) - config["GPU"] = {}; + config["GPU"] = {}; config["GPU"]["CONFIG_FILE"] = cldnn_config; } @@ -74,44 +77,41 @@ void parse_engine_config(std::map &config, std::string // remove the hardware devices if MULTI appears in the devices list. auto hardware_devices = devices; if (if_multi) { - ivsr_version_t version; + //ivsr_version_t version; ov::Version ov_version = ov::get_openvino_version(); - std::string ov_buildNumber = std::string(ov_version.buildNumber); + std::string ov_buildNumber = std::string(ov_version.buildNumber); // Parse out the currect virtual device as the target device. std::string virtual_device = split(device, ':').at(0); auto iter_virtual = std::find(hardware_devices.begin(), hardware_devices.end(), virtual_device); hardware_devices.erase(iter_virtual); - if (ov_buildNumber.find("2022.3") != std::string::npos) { - devices.clear(); + if (ov_buildNumber.find("2022.3") != std::string::npos) { + devices.clear(); devices.push_back(virtual_device); - } else { - devices = hardware_devices; - } + } else { + devices = hardware_devices; + } } // update config per device - int nstream = 1; // set nstream = 1 for GPU what about CPU? - //std::string infer_precision = "f32"; // set infer precision to f32 - //std::string infer_precision = "f16"; // set infer precision to f32 + int nstream = 1; // set nstream = 1 for GPU what about CPU? for (auto& d : devices) { auto& device_config = config[d]; try { // set throughput streams and infer precision for hardwares if (d == "MULTI" || d == "AUTO") { - for(auto& hd : hardware_devices){ - // construct device_config[hd] map and insert first property - device_config.insert(ov::device::properties(hd, ov::num_streams(nstream))); - // insert second property in device_config[hd] + for (auto& hd : hardware_devices) { auto& property = device_config[hd].as(); - property.emplace(ov::hint::inference_precision(infer_precision)); + property.emplace(ov::device::properties(hd, ov::num_streams(nstream))); + if (!infer_precision.empty()) + property.emplace(ov::hint::inference_precision(infer_precision)); } - } - else if(d.find("GPU") != std::string::npos){ // GPU + } else if (d.find("GPU") != std::string::npos) { // GPU device_config.emplace(ov::num_streams(nstream)); - device_config.emplace(ov::hint::inference_precision(infer_precision)); - } - else{ // CPU + if (!infer_precision.empty()) + device_config.emplace(ov::hint::inference_precision(infer_precision)); + } else { // CPU // insert inference precision to map device_config - device_config.emplace(ov::hint::inference_precision(infer_precision)); + if (!infer_precision.empty()) + device_config.emplace(ov::hint::inference_precision(infer_precision)); } } catch (const ov::AssertFailure& e) { std::cerr << "Caught an ov::AssertFailure exception: " << e.what() << std::endl; @@ -188,238 +188,314 @@ std::vector convert_string_to_vector(const std::string& input) { return result; } -struct ivsr{ - engine inferEngine; - IVSRThread::IVSRThreadExecutor *threadExecutor; - std::unordered_map vsr_config; +struct ivsr { + engine* inferEngine; + IVSRThread::IVSRThreadExecutor* threadExecutor; + std::unordered_map vsr_config; PatchConfig patchConfig; bool patchSolution; - std::vector input_data_shape; //shape of input data - ivsr():threadExecutor(nullptr),patchSolution(false){} + std::vector input_data_shape; // shape of input data + + ivsr() + : threadExecutor(nullptr), + patchSolution(false) {} + + // Define a constructor to initialize engine and other members if needed + ivsr(engine* engine, + IVSRThread::IVSRThreadExecutor* executor, + const std::unordered_map& config, + const PatchConfig& patch, + std::vector shape, + bool sol = false) + : inferEngine(engine), + threadExecutor(executor), + vsr_config(config), + patchConfig(patch), + patchSolution(sol), + input_data_shape(std::move(shape)) {} }; -IVSRStatus ivsr_init(ivsr_config_t *configs, ivsr_handle *handle){ - if(configs == nullptr || handle == nullptr){ - ivsr_status_log(IVSRStatus::GENERAL_ERROR,"in ivsr_init"); +IVSRStatus ivsr_init(ivsr_config_t *configs, ivsr_handle *handle) { + if (configs == nullptr || handle == nullptr) { + ivsr_status_log(IVSRStatus::GENERAL_ERROR, "in ivsr_init"); return IVSRStatus::GENERAL_ERROR; } - (*handle) = new ivsr(); - - //TODO: replace w/ parseConfig() ?? - // 1.parse input config - std::string model = "", device = "", batch = "", infer_precision = "f32"; - std::string verbose = "", custom_lib = "", cldnn_config = ""; - std::vector reshape_settings; - std::vector reso; + // Configuration variables + std::string model, device, batch, infer_precision; + std::string verbose, custom_lib, cldnn_config; + std::vector reshape_settings, reso; size_t frame_width = 0, frame_height = 0; - while(configs!=nullptr){ + int reshape_h = 0, reshape_w = 0; + std::unordered_map config_map; + size_t infer_request_num = 1; // default infer_request_num set to 1 + const tensor_desc_t *input_tensor_desc = nullptr; + const tensor_desc_t *output_tensor_desc = nullptr; + + // Parse input config + while (configs != nullptr) { IVSRStatus unsupported_status = IVSRStatus::OK; - std::string unsupported_output = ""; - switch(configs->key){ + std::string unsupported_output; + + switch (configs->key) { case IVSRConfigKey::INPUT_MODEL: - model = std::string(configs->value); - if(!checkFile(model)){ + model = std::string(static_cast(configs->value)); + if (!checkFile(model)) { unsupported_status = IVSRStatus::UNSUPPORTED_CONFIG; - unsupported_output.append("INPUT_MODEL").append("=").append(configs->value); - return IVSRStatus::UNSUPPORTED_CONFIG; + unsupported_output = "INPUT_MODEL=" + std::string(static_cast(configs->value)); } std::cout << "[INFO] " << "Model Path:" << model << std::endl; break; case IVSRConfigKey::TARGET_DEVICE: - device = configs->value; + device = static_cast(configs->value); std::cout << "[INFO] " << "DEVICE:" << device << std::endl; break; case IVSRConfigKey::BATCH_NUM: - batch = configs->value; + batch = static_cast(configs->value); break; case IVSRConfigKey::VERBOSE_LEVEL: - verbose = configs->value; + verbose = static_cast(configs->value); break; case IVSRConfigKey::CUSTOM_LIB: - custom_lib = configs->value; - if(!checkFile(custom_lib)){ // file not exists, inform out + custom_lib = static_cast(configs->value); + if (!checkFile(custom_lib)) { unsupported_status = IVSRStatus::UNSUPPORTED_CONFIG; - unsupported_output.append("CUSTOM_LIB").append("=").append(configs->value); - return IVSRStatus::UNSUPPORTED_CONFIG; + unsupported_output = "CUSTOM_LIB=" + std::string(static_cast(configs->value)); } break; case IVSRConfigKey::CLDNN_CONFIG: - cldnn_config = configs->value; - if(!checkFile(cldnn_config)){ + cldnn_config = static_cast(configs->value); + if (!checkFile(cldnn_config)) { unsupported_status = IVSRStatus::UNSUPPORTED_CONFIG; - unsupported_output.append("CLDNN_CONFIG").append("=").append(configs->value); - return IVSRStatus::UNSUPPORTED_CONFIG; + unsupported_output = "CLDNN_CONFIG=" + std::string(static_cast(configs->value)); } break; case IVSRConfigKey::PRECISION: - infer_precision = configs->value; - if(device.find("GPU") != std::string::npos){ - if(infer_precision.compare("f32") != 0 && infer_precision.compare("f16") != 0){ - ivsr_status_log(IVSRStatus::UNSUPPORTED_CONFIG,"for PRECISION="); + infer_precision = static_cast(configs->value); + if (device.find("GPU") != std::string::npos) { + if (infer_precision != "f32" && infer_precision != "f16") { + ivsr_status_log(IVSRStatus::UNSUPPORTED_CONFIG, "for PRECISION="); return IVSRStatus::UNSUPPORTED_CONFIG; } - }else{ - if(infer_precision.compare("f32") != 0 && infer_precision.compare("bf16") != 0){ - ivsr_status_log(IVSRStatus::UNSUPPORTED_CONFIG,"for PRECISION="); + } else { + if (infer_precision != "f32" && infer_precision != "bf16") { + ivsr_status_log(IVSRStatus::UNSUPPORTED_CONFIG, "for PRECISION="); return IVSRStatus::UNSUPPORTED_CONFIG; } } break; case IVSRConfigKey::RESHAPE_SETTINGS: - reshape_settings = convert_string_to_vector(configs->value); + reshape_settings = convert_string_to_vector(static_cast(configs->value)); + //The layout of RESHAPE SETTINGS is NHW + reshape_h = reshape_settings[ov::layout::height_idx(ov::Layout("NHW"))]; + reshape_w = reshape_settings[ov::layout::width_idx(ov::Layout("NHW"))]; + if (reshape_h % 2 != 0 || reshape_w % 2 != 0) { + ivsr_status_log(IVSRStatus::UNSUPPORTED_SHAPE, static_cast(configs->value)); + return IVSRStatus::UNSUPPORTED_SHAPE; + } break; case IVSRConfigKey::INPUT_RES: - //in format "," - reso = convert_string_to_vector(configs->value); + reso = convert_string_to_vector(static_cast(configs->value)); frame_width = reso[0]; frame_height = reso[1]; break; + case IVSRConfigKey::INFER_REQ_NUMBER: + try { + auto num = std::stoul(static_cast(configs->value)); + if (num > infer_request_num) + infer_request_num = num; + std::cout << "[INFO] Infer request num: " << infer_request_num << std::endl; + } catch (const std::invalid_argument& e) { + std::cerr << "[ERROR] Invalid argument: " << static_cast(configs->value) << std::endl; + } catch (const std::out_of_range& e) { + std::cerr << "[ERROR] Out of range: " << static_cast(configs->value) << std::endl; + } + break; + case IVSRConfigKey::INPUT_TENSOR_DESC_SETTING: + input_tensor_desc = static_cast(configs->value); + break; + case IVSRConfigKey::OUTPUT_TENSOR_DESC_SETTING: + output_tensor_desc = static_cast(configs->value); + break; default: unsupported_status = IVSRStatus::UNSUPPORTED_KEY; - unsupported_output.append(std::to_string(configs->key)); + unsupported_output = std::to_string(configs->key); break; } - ivsr_status_log(unsupported_status, unsupported_output.c_str()); + ivsr_status_log(unsupported_status, unsupported_output.c_str()); configs = configs->next; } - if(!check_engine_config(model, device)) { + + if (!check_engine_config(model, device)) { return IVSRStatus::UNSUPPORTED_CONFIG; } - if(frame_width == 0 || frame_height == 0) { - ivsr_status_log(IVSRStatus::UNSUPPORTED_CONFIG,"please set INPUT_RES!"); + + if (frame_width == 0 || frame_height == 0) { + ivsr_status_log(IVSRStatus::UNSUPPORTED_CONFIG, "please set INPUT_RES!"); return IVSRStatus::UNSUPPORTED_CONFIG; } - /** - * Below code only is for OpenVINO engine - */ - // 2.parse config for inference engine + // Parse config for the inference engine std::map engine_configs; - parse_engine_config(engine_configs,device,infer_precision,cldnn_config); - - // 3.construct and initialization - // - initialize inference engine - (*handle)->inferEngine = {new ov_engine(device, model, custom_lib, engine_configs, reshape_settings)}; - // -construct IVSRThreadExecutor object - IVSRThread::Config executorConfig; - (*handle)->threadExecutor = new IVSRThread::IVSRThreadExecutor(executorConfig, (*handle)->inferEngine.get_impl()); - - // -construct patch config - size_t input_dims = 0; - (*handle)->inferEngine.get_attr("input_dims", input_dims); - ov::Shape model_inputs, model_outputs; - (*handle)->inferEngine.get_attr("model_inputs", model_inputs); // 1,3,3,1080,1920 (400,700) - (*handle)->inferEngine.get_attr("model_outputs", model_outputs); // 1,3,3,2160,3840 (800,1400) - - // --set patch configs - int m_input_width = *(model_inputs.end() - 1); - int m_input_height = *(model_inputs.end() - 2); - int nif = *(model_inputs.end() - 4); - int m_output_width = *(model_outputs.end() - 1); - (*handle)->patchConfig.scale = m_output_width / m_input_width; // Note: do not support fractional SR - (*handle)->patchConfig.patchHeight = m_input_height; - (*handle)->patchConfig.patchWidth = m_input_width; - (*handle)->patchConfig.dims = input_dims; - (*handle)->patchConfig.nif = nif; + parse_engine_config(engine_configs, device, infer_precision, cldnn_config); + + // Initialize inference engine + auto ovEng = new ov_engine(device, + model, + custom_lib, + engine_configs, + reshape_settings, + *input_tensor_desc, + *output_tensor_desc); + + IVSRStatus status = ovEng->init(); + if (status != IVSRStatus::OK) { + ivsr_status_log(status, "in ivsr_init"); + return IVSRStatus::UNSUPPORTED_SHAPE; + } + + auto res = ovEng->create_infer_requests(infer_request_num); + if (res < 0) { + std::cout << "[ERROR]: Failed to create infer requests!\n"; + return IVSRStatus::GENERAL_ERROR; + } + + // Construct IVSRThreadExecutor object + IVSRThread::Config executorConfig{"ivsr_thread_executor", 8}; + auto executor = new IVSRThread::IVSRThreadExecutor(executorConfig, ovEng); + + // Construct patch config + tensor_desc_t input_tensor = { + .precision = {0}, + .layout = {0}, + .tensor_color_format = {0}, + .model_color_format = {0}, + .scale = 0.0, + .dimension = 0, + .shape = {0}}; + ovEng->get_attr("model_inputs", input_tensor); + + tensor_desc_t output_tensor = { + .precision = {0}, + .layout = {0}, + .tensor_color_format = {0}, + .model_color_format = {0}, + .scale = 0.0, + .dimension = 0, + .shape = {0}}; + ovEng->get_attr("model_outputs", output_tensor); + + PatchConfig patchConfig; + int m_input_width = input_tensor.shape[ov::layout::width_idx(ov::Layout(input_tensor.layout))];; + int m_input_height = input_tensor.shape[ov::layout::height_idx(ov::Layout(input_tensor.layout))]; + // hard code + int nif = input_tensor.dimension == 5 ? input_tensor.shape[1] : 1; + int m_output_width = output_tensor.shape[ov::layout::width_idx(ov::Layout(output_tensor.layout))]; + patchConfig.scale = m_output_width / m_input_width; + patchConfig.patchHeight = m_input_height; + patchConfig.patchWidth = m_input_width; + patchConfig.dims = input_tensor.dimension; + patchConfig.nif = nif; + #ifdef ENABLE_LOG - std::cout << "[Trace]: " << (*handle)->patchConfig << std::endl; + std::cout << "[Trace]: " << patchConfig << std::endl; #endif - // generate input data shape - std::vector& input_shape = (*handle)->input_data_shape; - //model input res might not be the same as input frame res - std::transform(model_inputs.begin(), model_inputs.end(), std::back_inserter(input_shape), - [](size_t val) { return val; }); - input_shape[input_shape.size() - 1] = frame_width; - input_shape[input_shape.size() - 2] = frame_height; + // Generate input data shape + std::vector input_res; + input_res.push_back(frame_height); + input_res.push_back(frame_width); + // Use the parameterized constructor + *handle = new ivsr(ovEng, executor, config_map, patchConfig, std::move(input_res)); return IVSRStatus::OK; } -IVSRStatus ivsr_process(ivsr_handle handle, char* input_data, char* output_data, ivsr_cb_t* cb){ - if(input_data == nullptr){ - ivsr_status_log(IVSRStatus::GENERAL_ERROR, "in ivsr_process"); +IVSRStatus ivsr_process(ivsr_handle handle, char* input_data, char* output_data, ivsr_cb_t* cb) { + if (input_data == nullptr) { + ivsr_status_log(IVSRStatus::GENERAL_ERROR, "in ivsr_process - input_data is nullptr"); return IVSRStatus::GENERAL_ERROR; } - try{ + try { std::vector int_shape; - std::transform(handle->input_data_shape.begin(), handle->input_data_shape.end(), std::back_inserter(int_shape), - [](size_t val) -> int { return static_cast(val); }); - - // determine whether apply patch solution or not - if(handle->patchConfig.patchHeight < *(int_shape.end()-2) || - handle->patchConfig.patchWidth < *(int_shape.end()-1)) { + int_shape.reserve(handle->input_data_shape.size()); // Reserve space for efficiency + std::transform(handle->input_data_shape.begin(), + handle->input_data_shape.end(), + std::back_inserter(int_shape), + [](size_t val) -> int { + return static_cast(val); + }); + + // Determine whether to apply the patch solution + if (handle->patchConfig.patchHeight < int_shape[int_shape.size() - 2] || + handle->patchConfig.patchWidth < int_shape[int_shape.size() - 1]) { handle->patchSolution = true; } - // smart patch inference - SmartPatch* smartPatch = new SmartPatch(handle->patchConfig, input_data, output_data, int_shape, handle->patchSolution); - // -prepare data - auto res = smartPatch->generatePatch(); - if(res == -1){ - delete smartPatch; - ivsr_status_log(IVSRStatus::UNKNOWN_ERROR, "in Smart Patch"); + // Smart patch inference using a smart pointer for automatic memory management + std::unique_ptr smartPatch( + new SmartPatch(handle->patchConfig, input_data, output_data, int_shape, handle->patchSolution) + ); + + // Prepare data + int res = smartPatch->generatePatch(); + if (res == -1) { + ivsr_status_log(IVSRStatus::UNKNOWN_ERROR, "in SmartPatch::generatePatch"); return IVSRStatus::UNKNOWN_ERROR; } + auto patchList = smartPatch->getInputPatches(); auto outputPatchList = smartPatch->getOutputPatches(); #ifdef ENABLE_PERF - auto totalStartTime = Time::now(); + auto totalStartTime = Time::now(); #endif - // create infer requests based on patch list size - if (patchList.size() > handle->inferEngine.get_infer_requests_size()) { - auto res = handle->inferEngine.create_infer_requests(patchList.size()); - if (res == -1) { - std::cout << "[ERROR]: " << "Failed to creat infer requests!\n"; - delete smartPatch; - return IVSRStatus::GENERAL_ERROR; + // Create infer requests based on patch list size + size_t required_infer_requests = patchList.size(); + if (required_infer_requests > handle->inferEngine->get_infer_requests_size()) { + auto res = handle->inferEngine->create_infer_requests(required_infer_requests); + if (res < 0) { + std::cout << "[ERROR]: Failed to create infer requests!\n"; + return IVSRStatus::GENERAL_ERROR; + } } - } - // -get data into infer task - int idx = 0; - for(; idx < patchList.size(); idx++ ){ + // Get data into infer task + for (auto idx = 0u; idx < patchList.size(); ++idx) { #ifdef ENABLE_LOG - std::cout << "[Trace]: " << "ivsr_process on patch: " << idx << std::endl; + std::cout << "[Trace]: ivsr_process on patch: " << idx << std::endl; #endif + std::shared_ptr task = handle->threadExecutor->CreateTask( + patchList[idx], outputPatchList[idx], InferFlag::AUTO); + handle->threadExecutor->Enqueue(task); + } - std::shared_ptr task = handle->threadExecutor->CreateTask(patchList[idx], outputPatchList[idx], InferFlag::AUTO); - handle->threadExecutor->Enqueue(task); - } + // Wait for all tasks to finish + handle->threadExecutor->wait_all(required_infer_requests); - // -wait for all the tasks finish - handle->threadExecutor->wait_all(patchList.size()); #ifdef ENABLE_PERF - auto duration = get_duration_ms_till_now(totalStartTime); - std::cout << "[PERF] " << "Patch inference with memory copy - Latency: " << double_to_string(duration) <<"ms"<patchConfig.nif* 1000.0 / duration) <<"FPS"<patchConfig.nif * 1000.0 / duration) << "FPS" << std::endl; #endif -// #ifdef ENABLE_PERF -// // -get total duration for all tasks -// double totalDuration = handle->threadExecutor->get_duration_in_milliseconds(); -// double fps = 3 * 1000.0/totalDuration; - -// std::cout << "All tasks Total Latency for One Nig: " << double_to_string(totalDuration) <<"ms"<restoreImageFromPatches(); - if(res == -1){ - ivsr_status_log(IVSRStatus::UNKNOWN_ERROR, "in Smart Patch"); + if (res == -1) { + ivsr_status_log(IVSRStatus::UNKNOWN_ERROR, "in SmartPatch::restoreImageFromPatches"); return IVSRStatus::UNKNOWN_ERROR; } - delete smartPatch; - // notify user + // Notify user cb->ivsr_cb(cb->args); - }catch(exception e){ - std::cout << "Error in ivsr_process" << std::endl; + } catch (const std::exception& e) { + std::cout << "Error in ivsr_process: " << e.what() << std::endl; ivsr_status_log(IVSRStatus::EXCEPTION_ERROR, e.what()); return IVSRStatus::UNKNOWN_ERROR; } @@ -427,6 +503,48 @@ IVSRStatus ivsr_process(ivsr_handle handle, char* input_data, char* output_data, return IVSRStatus::OK; } +IVSRStatus ivsr_process_async(ivsr_handle handle, char* input_data, char* output_data, ivsr_cb_t* cb) { + if (input_data == nullptr) { + ivsr_status_log(IVSRStatus::GENERAL_ERROR, "in ivsr_process - input_data is nullptr"); + return IVSRStatus::GENERAL_ERROR; + } + + try { + std::vector int_shape; + int_shape.reserve(handle->input_data_shape.size()); // Reserve space for efficiency + std::transform(handle->input_data_shape.begin(), + handle->input_data_shape.end(), + std::back_inserter(int_shape), + [](size_t val) -> int { + return static_cast(val); + }); + + // Determine whether to apply the patch solution + if (handle->patchConfig.patchHeight < int_shape[int_shape.size() - 2] || + handle->patchConfig.patchWidth < int_shape[int_shape.size() - 1]) { + handle->patchSolution = true; + } + + // TODO: Now fallback to ivsr_process api when patch solution is needed + if (handle->patchSolution) { + return ivsr_process(handle, input_data, output_data, cb); + } + + /* Uncomment: to use thread loop and internal task to process */ + // std::shared_ptr task = + // handle->threadExecutor->CreateTask(input_data, output_data, InferFlag::AUTO, cb); + // handle->threadExecutor->Enqueue(task); + + handle->inferEngine->proc(input_data, output_data, cb); + + } catch (const std::exception& e) { + std::cout << "Error in ivsr_process: " << e.what() << std::endl; + ivsr_status_log(IVSRStatus::EXCEPTION_ERROR, e.what()); + return IVSRStatus::UNKNOWN_ERROR; + } + + return IVSRStatus::OK; +} IVSRStatus ivsr_reconfig(ivsr_handle handle, ivsr_config_t* configs){ if(configs == nullptr){ @@ -439,22 +557,22 @@ IVSRStatus ivsr_reconfig(ivsr_handle handle, ivsr_config_t* configs){ while(configs!=nullptr){ switch(configs->key){ case IVSRConfigKey::INPUT_MODEL: - handle->vsr_config["model"] = configs->value; + handle->vsr_config["model"] = static_cast(configs->value); break; case IVSRConfigKey::TARGET_DEVICE: - handle->vsr_config["device"] = configs->value; + handle->vsr_config["device"] = static_cast(configs->value); break; case IVSRConfigKey::BATCH_NUM: - handle->vsr_config["batch_num"] = configs->value; + handle->vsr_config["batch_num"] = static_cast(configs->value); break; case IVSRConfigKey::VERBOSE_LEVEL: - handle->vsr_config["verbose_level"] = configs->value; + handle->vsr_config["verbose_level"] = static_cast(configs->value); break; case IVSRConfigKey::CUSTOM_LIB: - handle->vsr_config["custom_lib"] = configs->value; + handle->vsr_config["custom_lib"] = static_cast(configs->value); break; case IVSRConfigKey::CLDNN_CONFIG: - handle->vsr_config["cldnn_config"] = configs->value; + handle->vsr_config["cldnn_config"] = static_cast(configs->value); break; default: break; @@ -464,7 +582,7 @@ IVSRStatus ivsr_reconfig(ivsr_handle handle, ivsr_config_t* configs){ // reconfig ov_engine ? - }catch(exception e){ + } catch (const std::exception& e) { // std::cout << "Error in ivsr_reconfig" << std::endl; ivsr_status_log(IVSRStatus::EXCEPTION_ERROR, e.what()); return IVSRStatus::UNKNOWN_ERROR; @@ -483,24 +601,12 @@ IVSRStatus ivsr_get_attr(ivsr_handle handle, IVSRAttrKey key, void* value){ } case IVSRAttrKey::INPUT_TENSOR_DESC: { - int* input_tensor_desc = (int*) value; - ov::Shape input_shape; - handle->inferEngine.get_attr("model_inputs", input_shape); - for(auto s : input_shape){ - *input_tensor_desc = s; - input_tensor_desc++; - } + handle->inferEngine->get_attr("model_inputs", *(static_cast(value))); break; } case IVSRAttrKey::OUTPUT_TENSOR_DESC: { - int* output_tensor_desc = (int*) value; - ov::Shape output_shape; - handle->inferEngine.get_attr("model_outputs", output_shape); - for(auto s : output_shape){ - *output_tensor_desc = s; - output_tensor_desc++; - } + handle->inferEngine->get_attr("model_outputs", *(static_cast(value))); break; } case IVSRAttrKey::NUM_INPUT_FRAMES: @@ -512,14 +618,14 @@ IVSRStatus ivsr_get_attr(ivsr_handle handle, IVSRAttrKey key, void* value){ case IVSRAttrKey::INPUT_DIMS: { size_t dims = 0; - handle->inferEngine.get_attr("input_dims", dims); + handle->inferEngine->get_attr("input_dims", dims); *((size_t *)value) = dims; break; } case IVSRAttrKey::OUTPUT_DIMS: { size_t dims = 0; - handle->inferEngine.get_attr("output_dims", dims); + handle->inferEngine->get_attr("output_dims", dims); *((size_t *)value) = dims; break; } @@ -539,7 +645,7 @@ IVSRStatus ivsr_deinit(ivsr_handle handle) { } try { - auto p = handle->inferEngine.get_impl(); + auto p = handle->inferEngine->get_impl(); if (p != nullptr) delete p; @@ -547,7 +653,7 @@ IVSRStatus ivsr_deinit(ivsr_handle handle) { delete handle->threadExecutor; handle->threadExecutor = nullptr; } - } catch (exception e) { + } catch (const std::exception& e) { ivsr_status_log(IVSRStatus::EXCEPTION_ERROR, e.what()); return IVSRStatus::UNKNOWN_ERROR; } diff --git a/ivsr_sdk/src/ivsr_thread_executor.cpp b/ivsr_sdk/src/ivsr_thread_executor.cpp index 1c0f51a..6f63f35 100644 --- a/ivsr_sdk/src/ivsr_thread_executor.cpp +++ b/ivsr_sdk/src/ivsr_thread_executor.cpp @@ -40,15 +40,12 @@ struct IVSRThreadExecutor::Impl { _impl->_streamIdQueue.pop(); } } - - } ~Stream() { { std::lock_guard lock{_impl->_streamIdMutex}; _impl->_streamIdQueue.push(_streamId); } - } Impl* _impl = nullptr; @@ -56,22 +53,21 @@ struct IVSRThreadExecutor::Impl { int _numaNodeId = 0; bool _execute = false; std::queue _taskQueue; - }; explicit Impl(const Config& config, engine* engine) : _config{config}, - _engine(engine), _streams([this] { return std::make_shared(this); - }) { + }), + _engine(engine) { for (auto streamId = 0; streamId < _config._threads; ++streamId) { _threads.emplace_back([this, streamId] { for (bool stopped = false; !stopped;) { Task task; { std::unique_lock lock(_mutex); - _queueCondVar.wait(lock, [&] { + _queueCondVar.wait(lock, [&] { return !_taskQueue.empty() || (stopped = _isStopped); }); if (!_taskQueue.empty()) { @@ -81,7 +77,8 @@ struct IVSRThreadExecutor::Impl { } if (task) { #ifdef ENABLE_LOG - std::cout << "[Trace]: " << "Thread " << std::this_thread::get_id() << " get task and execute it" << std::endl; + std::cout << "[Trace]: " + << "Thread " << std::this_thread::get_id() << " get task and execute it" << std::endl; #endif Execute(task, *(_streams.local())); } @@ -94,22 +91,29 @@ struct IVSRThreadExecutor::Impl { { std::lock_guard lock(_mutex); _taskQueue.emplace(task); - _startTime = std::min(Time::now(), _startTime); + _startTime = std::min(Time::now(), _startTime); } _queueCondVar.notify_one(); #ifdef ENABLE_LOG - std::cout << "[Trace]: " << "Enqueue Task into queue and notify 1 / " << _taskQueue.size() << std::endl; -#endif + std::cout << "[Trace]: " + << "Enqueue Task into queue and notify 1 / " << _taskQueue.size() << std::endl; +#endif } void Execute(const Task& task, Stream& stream) { _engine->run(task); } - Task CreateTask(char* inBuf, char* outBuf, InferFlag flag) { - Task task = std::make_shared(inBuf, outBuf, std::bind(&IVSRThread::IVSRThreadExecutor::Impl::competition_call_back, this), flag); + Task CreateTask(char* inBuf, char* outBuf, InferFlag flag, ivsr_cb_t* cb) { + Task task = std::make_shared( + inBuf, + outBuf, + std::bind(&IVSRThread::IVSRThreadExecutor::Impl::competition_call_back, this, std::placeholders::_1), + flag, + cb); return task; } + void Defer(Task task) { auto& stream = *(_streams.local()); stream._taskQueue.push(std::move(task)); @@ -127,11 +131,10 @@ struct IVSRThreadExecutor::Impl { } void sync(int size) { - std::unique_lock lock(_mutex); - _taskCondVar.wait(lock,[&] { - return (_cb_counter == size); + std::unique_lock lock(_mutex); + _taskCondVar.wait(lock, [&] { + return (_cb_counter == size); }); - } void reset() { @@ -139,13 +142,16 @@ struct IVSRThreadExecutor::Impl { _cb_counter = 0; } - void competition_call_back() { + void competition_call_back(Task task) { std::unique_lock lock(_mutex); - _cb_counter++; - _endTime = std::max(Time::now(), _endTime); + _cb_counter++; + _endTime = std::max(Time::now(), _endTime); + if (task->cb) { + task->cb->ivsr_cb(task->cb->args); + } _taskCondVar.notify_one(); } - + double get_duration_in_milliseconds() { return std::chrono::duration_cast(_endTime - _startTime).count() * 0.000001; } @@ -161,14 +167,14 @@ struct IVSRThreadExecutor::Impl { int _cb_counter = 0; std::queue _taskQueue; bool _isStopped = false; - ThreadLocal> _streams; + ThreadLocal> _streams; engine* _engine; Time::time_point _startTime = Time::time_point::max(); - Time::time_point _endTime = Time::time_point::min(); + Time::time_point _endTime = Time::time_point::min(); }; - -IVSRThreadExecutor::IVSRThreadExecutor(const Config& config, engine* engine) : _impl{new Impl{config, engine}} {} +IVSRThreadExecutor::IVSRThreadExecutor(const Config& config, engine* engine) + : _impl{new Impl{config, engine}} {} IVSRThreadExecutor::~IVSRThreadExecutor() { { @@ -195,8 +201,8 @@ void IVSRThreadExecutor::Enqueue(Task task) { } } -Task IVSRThreadExecutor::CreateTask(char* inBuf, char* outBuf, InferFlag flag) { - Task task = _impl->CreateTask(inBuf, outBuf, flag); +Task IVSRThreadExecutor::CreateTask(char* inBuf, char* outBuf, InferFlag flag, ivsr_cb_t *cb) { + Task task = _impl->CreateTask(inBuf, outBuf, flag, cb); return task; } @@ -206,6 +212,6 @@ void IVSRThreadExecutor::wait_all(int patchSize) { } double IVSRThreadExecutor::get_duration_in_milliseconds() { - return _impl->get_duration_in_milliseconds(); + return _impl->get_duration_in_milliseconds(); } } // namespace IVSRThread diff --git a/ivsr_sdk/src/ov_engine.cpp b/ivsr_sdk/src/ov_engine.cpp index 405f76e..284d6cc 100644 --- a/ivsr_sdk/src/ov_engine.cpp +++ b/ivsr_sdk/src/ov_engine.cpp @@ -1,42 +1,83 @@ /******************************************************************************** -* INTEL CONFIDENTIAL -* Copyright (C) 2023 Intel Corporation -* -* This software and the related documents are Intel copyrighted materials, -* and your use of them is governed by the express license under -* which they were provided to you ("License").Unless the License -* provides otherwise, you may not use, modify, copy, publish, distribute, disclose or -* transmit this software or the related documents without Intel's prior written permission. -* -* This software and the related documents are provided as is, -* with no express or implied warranties, other than those that are expressly stated in the License. -*******************************************************************************/ + * INTEL CONFIDENTIAL + * Copyright (C) 2023 Intel Corporation + * + * This software and the related documents are Intel copyrighted materials, + * and your use of them is governed by the express license under + * which they were provided to you ("License").Unless the License + * provides otherwise, you may not use, modify, copy, publish, distribute, disclose or + * transmit this software or the related documents without Intel's prior written permission. + * + * This software and the related documents are provided as is, + * with no express or implied warranties, other than those that are expressly stated in the License. + *******************************************************************************/ /** * @file openvino_engine.cpp * openvino backend inference implementation * it is the wrapper of backend inference API. */ -#include -#include -#include #include "ov_engine.hpp" -#include "utils.hpp" -#include "omp.h" +#include + +#include +#include #include +#include "omp.h" +#include "utils.hpp" + typedef std::chrono::high_resolution_clock Time; -IBasicVSRStatus ov_engine::init_impl() -{ +const std::map precision_string_to_ov = { + {"fp32", ov::element::f32}, + {"f32", ov::element::f32}, + {"fp16", ov::element::f16}, + {"f16", ov::element::f16}, + {"i8", ov::element::i8}, + {"i16", ov::element::i16}, + {"i32", ov::element::i32}, + {"u8", ov::element::u8}, + {"u16", ov::element::u16}, +}; + +const std::map color_format_string_to_ov = { + {"BGR", ov::preprocess::ColorFormat::BGR}, + {"RGB", ov::preprocess::ColorFormat::RGB}, + {"I420_Single_Plane", ov::preprocess::ColorFormat::I420_SINGLE_PLANE}, + {"I420_Three_Planes", ov::preprocess::ColorFormat::I420_THREE_PLANES}, +}; + +/* + * IN: output + * OUT: layout + * +*/ +static IVSRStatus get_default_layout(const ov::Output& output, ov::Layout& layout) { + size_t shape_size = output.get_partial_shape().size(); + switch (shape_size) { + case 4: + layout = ov::Layout("NCHW"); + break; + case 5: + layout = ov::Layout("NFCHW"); + break; + default: + std::cout << "not supported model input/output shape size\n"; + return GENERAL_ERROR; + } + return OK; +} + +IVSRStatus ov_engine::init_impl() { if (custom_lib_ != "") instance_.add_extension(custom_lib_); - //set property for ov instance + // set property for ov instance for (auto&& item : configs_) { instance_.set_property(item.first, item.second); } - //read model + // read model std::shared_ptr model; try { model = instance_.read_model(model_path_); @@ -44,43 +85,61 @@ IBasicVSRStatus ov_engine::init_impl() model = irguard::load_model(instance_, model_path_); } - input_ = model->inputs()[0]; - output_ = model->outputs()[0]; - bool multiple_inputs = false; if (model->inputs().size() == 5 && model->outputs().size() == 5) multiple_inputs = true; if (!reshape_settings_.empty()) { - // get_shape() only can be called by static shape, openvino will check the shape size during reshape operation - //ov::Shape tensor_shape = input_.get_shape(); - //assert(tensor_shape.size() == reshape_settings_.size()); - - ov::Shape tensor_shape = reshape_settings_; + //get model input shape + ov::PartialShape input_shape = model->inputs()[0].get_partial_shape(); +#ifdef ENALBE_LOG + std::cout << "input tensor shape is "<< input_shape.is_static()? "static: " : "dynamic: " + << input_shape << std::endl; +#endif size_t batch_index, channels_index, h_index, w_index; - if (multiple_inputs) { - const ov::Layout model_layout{"NCHW"}; - batch_index = ov::layout::batch_idx(model_layout); - channels_index = ov::layout::channels_idx(model_layout); - h_index = ov::layout::height_idx(model_layout); - w_index = ov::layout::width_idx(model_layout); + //get model input tensor layout + ov::Layout input_layout = ov::layout::get_layout(model->inputs()[0]); + if (input_layout.empty()) { + get_default_layout(model->inputs()[0], input_layout); + //ov::layout::set_layout(model->inputs()[0], input_layout); + } + batch_index = ov::layout::batch_idx(input_layout); + channels_index = ov::layout::channels_idx(input_layout); + h_index = ov::layout::height_idx(input_layout); + w_index = ov::layout::width_idx(input_layout); + + // Assume the input reshape_settings_'s layout is NHW. + // update input layer tensor batch/width/height with the value from reshape_settings_; + input_shape[batch_index] = reshape_settings_[ov::layout::batch_idx(ov::Layout("NHW"))]; + input_shape[w_index] = reshape_settings_[ov::layout::width_idx(ov::Layout("NHW"))]; + input_shape[h_index] = reshape_settings_[ov::layout::height_idx(ov::Layout("NHW"))]; + //input_shape should be static now. + assert(input_shape.is_static()); + + //TODO: is this check for BasicVSR? Is it required anymore?? + if (input_shape.size() == 5) { + if (input_shape[w_index].get_length() % 32 != 0) { + std::cout << "[Error]: " << "Current model requires input widths to be divisible by 32" << std::endl; + return UNSUPPORTED_SHAPE; + } } #ifdef ENABLE_LOG - std::cout << "Reshape network to the image size = [" << reshape_settings_[reshape_settings_.size() - 2] << "x" - << reshape_settings_[reshape_settings_.size() - 1] << "] " << std::endl; + std::cout << "Reshape network to size = [" << input_shape[w_index].get_length() + << "x" << input_shape[h_index].get_length() << "] " << std::endl; #endif - model->reshape({{model->inputs()[0].get_any_name(), tensor_shape}}); + // reshape the model with "static" shape. + model->reshape({{model->inputs()[0].get_any_name(), input_shape.to_shape()}}); if (multiple_inputs) { - ov::Shape hidden_tensor_shape = reshape_settings_; - hidden_tensor_shape[batch_index] = tensor_shape[batch_index]; + ov::Shape hidden_tensor_shape = input_shape.to_shape(); + hidden_tensor_shape[batch_index] = input_shape[batch_index].get_length(); hidden_tensor_shape[channels_index] = 64; - hidden_tensor_shape[h_index] = tensor_shape[h_index] / 4; - hidden_tensor_shape[w_index] = tensor_shape[w_index] / 4; + hidden_tensor_shape[h_index] = input_shape[h_index].get_length() / 4; + hidden_tensor_shape[w_index] = input_shape[w_index].get_length() / 4; - for (int i = 1; i < model->inputs().size(); ++i) + for (auto i = 1u; i < model->inputs().size(); ++i) model->reshape({{model->inputs()[i].get_any_name(), hidden_tensor_shape}}); } } @@ -91,7 +150,7 @@ IBasicVSRStatus ov_engine::init_impl() std::map tensor_names; const auto& inputs = model->inputs(); const auto& outputs = model->outputs(); - for (int i = 1; i < inputs.size(); ++i) { + for (auto i = 1u; i < inputs.size(); ++i) { std::string hidden_inp_name = inputs[i].get_any_name(); std::string hidden_out_name = outputs[i].get_any_name(); tensor_names[hidden_inp_name] = hidden_out_name; @@ -105,54 +164,117 @@ IBasicVSRStatus ov_engine::init_impl() // std::cout << "The exec time of making stateful model is " << execTime.count() * 0.000001 << "ms\n"; } - //compile model + // PPP + ov::preprocess::PrePostProcessor ppp = ov::preprocess::PrePostProcessor(model); + ov::preprocess::InputInfo& input_info = ppp.input(); + ov::preprocess::OutputInfo& output_info = ppp.output(); + + // layout is NCHW by default if can not get layout information + ov::Layout layout; + if (ov::layout::get_layout(model->inputs()[0]).empty()) { + get_default_layout(model->inputs()[0], layout); + input_info.model().set_layout(layout); + } + //after calling ppp::InputInfo.model().set_layout(); model->input_::layout is not set though. + //std::cout << "model layout is " << ov::layout::get_layout(model->inputs()[0]).to_string() << endl; + if (ov::layout::get_layout(model->outputs()[0]).empty()) { + get_default_layout(model->outputs()[0], layout); + output_info.model().set_layout(layout); + } + if (input_tensor_desc_.precision != nullptr) { + input_info.tensor().set_element_type(precision_string_to_ov.at(std::string(input_tensor_desc_.precision))); + } + if (input_tensor_desc_.layout != nullptr) { + const ov::Layout input_tensor_layout{input_tensor_desc_.layout}; + input_info.tensor().set_layout(input_tensor_layout); + } + if (output_tensor_desc_.precision != nullptr) { + output_info.tensor().set_element_type(precision_string_to_ov.at(std::string(output_tensor_desc_.precision))); + } + if (output_tensor_desc_.layout != nullptr) { + const ov::Layout output_tensor_layout{output_tensor_desc_.layout}; + output_info.tensor().set_layout(output_tensor_layout); + } + // convert color tensor_color_format->model_color_format + if (strcmp(input_tensor_desc_.tensor_color_format, input_tensor_desc_.model_color_format) != 0) { + input_info.tensor().set_color_format( + color_format_string_to_ov.at(std::string(input_tensor_desc_.tensor_color_format))); + input_info.preprocess().convert_color( + color_format_string_to_ov.at(std::string(input_tensor_desc_.model_color_format))); + } + // ov 24.0 support convert color model_color_format->tensor_color_format + // if (strcmp(input_tensor_desc_.tensor_color_format, input_tensor_desc_.model_color_format) != 0) { + // output_info.tensor().set_color_format( + // color_format_string_to_ov.at(std::string(input_tensor_desc_.tensor_color_format))); + // output_info.preprocess().convert_color( + // color_format_string_to_ov.at(std::string(input_tensor_desc_.model_color_format))); + // } + if ((input_tensor_desc_.scale - 1.0f) > 1e-6f) { + // the input tensor precision should not be float + assert(std::string(input_tensor_desc_.precision) == std::string("u8") || + std::string(input_tensor_desc_.precision) == std::string("u16")); + input_info.preprocess().convert_element_type(ov::element::f32); + input_info.preprocess().scale(input_tensor_desc_.scale); + // PPP doesn't support un-scale, so + // the precision of output tensor need to be float if the scale != 0 or 1 + output_info.tensor().set_element_type(ov::element::f32); + } + + model = ppp.build(); + + input_ = model->inputs()[0]; + output_ = model->outputs()[0]; + // compile model compiled_model_ = instance_.compile_model(model, device_); #ifdef ENABLE_LOG std::cout << "[Trace]: " << "ov_engine init successfully" << std::endl; #endif - return SUCCESS; + return OK; } -IBasicVSRStatus ov_engine::run_impl(InferTask::Ptr task) { - //construct the input tensor - if(task->inputPtr_ == nullptr || task->outputPtr_ == nullptr) { - std::cout << "[Error]: " << "invalid input buffer pointer" << std::endl; - return ERROR; +IVSRStatus ov_engine::run_impl(InferTask::Ptr task) { + // construct the input tensor + if (task->inputPtr_ == nullptr || task->outputPtr_ == nullptr) { + std::cout << "[Error]: " + << "invalid input buffer pointer" << std::endl; + return GENERAL_ERROR; } auto inferReq = get_idle_request(); - inferReq->set_callback([wp = std::weak_ptr(inferReq),task](std::exception_ptr ex) { + inferReq->set_callback([wp = std::weak_ptr(inferReq), task](std::exception_ptr ex) { auto request = wp.lock(); #ifdef ENABLE_PERF - request->end_time(); + request->end_time(); auto latency = request->get_execution_time_in_milliseconds(); - size_t frame_num = (request->get_input_tensor()).get_shape()[(request->get_input_tensor().get_shape()).size()-4]; - std::cout << "[PERF] " << "Inference Latency: " << latency << "ms, Throughput: " << double_to_string(frame_num * 1000.0 / latency) << "FPS" << std::endl; + std::cout << "[PERF] Inference Latency: " << latency << "ms" << std::endl; #endif - if(ex) { + if (ex) { #ifdef ENABLE_LOG - std::cout << "[Trace]: " << "Exception in infer request callback " << std::endl; + std::cout << "[Trace]: " + << "Exception in infer request callback " << std::endl; #endif - try{ + try { // std::rethrow_exception(ex); throw ex; - } catch(const std::exception& e) { + } catch (const std::exception& e) { std::cout << "Caught exception \"" << e.what() << "\"\n"; } } auto cbTask = task; request->call_back(); - //call application callback function - cbTask->_callbackQueue(); + // call application callback function + cbTask->_callbackFunction(cbTask); }); #ifdef ENABLE_LOG - std::cout << "[Trace]: " << "input: " << input_.get_element_type().get_type_name() << " " << input_.get_shape() << std::endl; - std::cout << "[Trace]: " << "output: " << output_.get_element_type().get_type_name() << " " << output_.get_shape() << std::endl; + std::cout << "[Trace]: " + << "input: " << input_.get_element_type().get_type_name() << " " << input_.get_shape() << std::endl; + std::cout << "[Trace]: " + << "output: " << output_.get_element_type().get_type_name() << " " << output_.get_shape() << std::endl; #endif ov::Tensor input_tensor(input_.get_element_type(), input_.get_shape(), task->inputPtr_); @@ -163,24 +285,90 @@ IBasicVSRStatus ov_engine::run_impl(InferTask::Ptr task) { inferReq->start_async(); #ifdef ENABLE_LOG - std::cout << "[Trace]: " << "ov_engine run: start task inference" << std::endl; + std::cout << "[Trace]: " + << "ov_engine run: start task inference" << std::endl; #endif - return SUCCESS; + return OK; } -IBasicVSRStatus ov_engine::create_infer_requests_impl(size_t requests_num) { +IVSRStatus ov_engine::process_impl(void* input_data, void* output_data, void* cb) { + // Check for valid input and output data pointers + if (input_data == nullptr || output_data == nullptr) { + std::cout << "[Error]: invalid input or output buffer pointer" << std::endl; + return GENERAL_ERROR; + } + + auto inferReq = get_idle_request(); + + // Set callback for inference request + inferReq->set_callback([wp = std::weak_ptr(inferReq), cb](std::exception_ptr ex) { + auto request = wp.lock(); +#ifdef ENABLE_PERF + request->end_time(); + auto latency = request->get_execution_time_in_milliseconds(); + std::cout << "[PERF] Inference Latency: " << latency << "ms" << std::endl; +#endif + + if (ex) { +#ifdef ENABLE_LOG + std::cout << "[Trace]: Exception in infer request callback " << std::endl; +#endif + try { + // std::rethrow_exception(ex); + throw ex; + } catch (const std::exception& e) { + std::cout << "Caught exception \"" << e.what() << "\"\n"; + } + } + + request->call_back(); + + // Check if the callback structure and function are valid, then call the function + if (cb) { + ivsr_cb_t* ivsr_cb = static_cast(cb); + if (ivsr_cb->ivsr_cb) { + ivsr_cb->ivsr_cb(ivsr_cb->args); + } + } + }); + +#ifdef ENABLE_LOG + std::cout << "[Trace]: input: " << input_.get_element_type().get_type_name() << " " << input_.get_shape() << std::endl; + std::cout << "[Trace]: output: " << output_.get_element_type().get_type_name() << " " << output_.get_shape() << std::endl; +#endif + + // Construct input and output tensors + ov::Tensor input_tensor(input_.get_element_type(), input_.get_shape(), input_data); + inferReq->set_input_tensor(input_tensor); + + ov::Tensor output_tensor(output_.get_element_type(), output_.get_shape(), output_data); + inferReq->set_output_tensor(output_tensor); + + // Start asynchronous inference + inferReq->start_async(); + +#ifdef ENABLE_LOG + std::cout << "[Trace]: ov_engine run: start task inference" << std::endl; +#endif + + return OK; +} + +IVSRStatus ov_engine::create_infer_requests_impl(size_t requests_num) { if (requests_num < requests_.size()) { - std::cout << "[ERROR]: " << "please pass correct requests num.\n"; - return ERROR; + std::cout << "[ERROR]: " + << "please pass correct requests num.\n"; + return GENERAL_ERROR; } - for (int id = requests_.size(); id < requests_num; ++id) { - requests_.push_back(std::make_shared(compiled_model_ ,id,std::bind(&ov_engine::put_idle_request, - this, - std::placeholders::_1))); + for (auto id = requests_.size(); id < requests_num; ++id) { + requests_.push_back( + std::make_shared(compiled_model_, + id, + std::bind(&ov_engine::put_idle_request, this, std::placeholders::_1))); idleIds_.push(id); } - return SUCCESS; -} \ No newline at end of file + return OK; +} diff --git a/ivsr_sdk/src/smart_patch.cpp b/ivsr_sdk/src/smart_patch.cpp index d058594..5bc433e 100644 --- a/ivsr_sdk/src/smart_patch.cpp +++ b/ivsr_sdk/src/smart_patch.cpp @@ -51,7 +51,7 @@ float* fill_patch(std::vector patchCorners, float* inputBuf, std::vector> patchCorners, char* imgBuf, \ std::vector patchDims, std::vector imgDims, std::vector patchList){ int pB = patchDims[0], pN = patchDims[1], pC = patchDims[2], pH = patchDims[3], pW = patchDims[4]; int iB = imgDims[0], iN = imgDims[1], iC = imgDims[2], iH = imgDims[3], iW = imgDims[4]; // imgDims? - int patch_sW = 1; + //int patch_sW = 1; int patch_sH = pW; int patch_sC = pH * pW; int patch_sN = pC * pH * pW; int patch_sB = pN * pC * pH * pW; - int img_sW = 1; + //int img_sW = 1; int img_sH = iW; int img_sC = iH * iW; int img_sN = iC * iH * iW; @@ -107,7 +107,7 @@ void fill_image(std::vector> patchCorners, char* imgBuf, \ float * img_ptr =(float *)imgBuf; // for each patch - for (int idx = 0; idx < patchCorners.size(); ++idx){ + for (auto idx = 0u; idx < patchCorners.size(); ++idx){ float* patchPtr = (float*)patchList[idx]; auto patchCorner = patchCorners[idx]; @@ -140,7 +140,7 @@ void fill_image(std::vector> patchCorners, char* imgBuf, \ } // average each pixel - for(int id = 0; id < outputpixels; id++){ + for(size_t id = 0; id < outputpixels; id++){ *(img_ptr + id) /= *(pixelCounter + id); } @@ -150,8 +150,8 @@ void fill_image(std::vector> patchCorners, char* imgBuf, \ SmartPatch::SmartPatch(PatchConfig config, char* inBuf, char* outBuf, std::vector inputShape, bool flag) :_inputPtr(inBuf), _outputPtr(outBuf), - _config(config), _inputShape(inputShape), + _config(config), flag(flag) { if (flag){ diff --git a/ivsr_setupvar.sh b/ivsr_setupvar.sh index e8bdaa7..92a89cc 100755 --- a/ivsr_setupvar.sh +++ b/ivsr_setupvar.sh @@ -5,16 +5,16 @@ while [ $# -gt 0 ]; do case "$1" in --ov_version) shift - if [ "$1" = "2022.3" ] || [ "$1" = "2023.2" ]; then + if [ "$1" = "2022.3" ] || [ "$1" = "2023.2" ] || [ "$1" = "2024.5" ]; then OV_VERSION=$1 else - echo "Usage: $0 --ov_version [2022.3|2023.2]" + echo "Usage: $0 --ov_version [2022.3|2023.2|2024.5]" exit 1 fi shift ;; *) - echo "Usage: $0 --ov_version [2022.3|2023.2]" + echo "Usage: $0 --ov_version [2022.3|2023.2|2024.5]" exit 1 ;; esac