diff --git a/.github/actions/build.sh b/.github/actions/build.sh index 5e8b0858fef..82f89cf5483 100755 --- a/.github/actions/build.sh +++ b/.github/actions/build.sh @@ -12,9 +12,9 @@ # - The build output is structured into log groups by package. # - As the disk space in the workflow environment is limitted, we clean the # work folder of each package after build. At 2020.06 this limit is 14GB. -# - synocli-videodriver and ffmpeg5-6 are not cleaned to be available for dependents. -# - Therefore synocli-videodriver is built first if triggered by ffmpeg5-6 -# - Therefore ffmpeg5 and ffmpeg6 are built second if triggered by its +# - synocli-videodriver and ffmpeg5-7 are not cleaned to be available for dependents. +# - Therefore synocli-videodriver is built first if triggered by ffmpeg5-7 +# - Therefore ffmpeg5 and ffmpeg7 are built second if triggered by its # own or a dependent (see prepare.sh). set -o pipefail @@ -60,7 +60,7 @@ if [ -n "$API_KEY" ] && [ "$PUBLISH" == "true" ]; then fi # Build -PACKAGES_TO_KEEP="synocli-videodriver ffmpeg5 ffmpeg6 python310 python311" +PACKAGES_TO_KEEP="synocli-videodriver ffmpeg5 ffmpeg7 python310 python311" for package in ${build_packages} do echo "::group:: ---- build ${package}" diff --git a/.github/actions/prepare.sh b/.github/actions/prepare.sh index 11e71f55be4..118a6319724 100755 --- a/.github/actions/prepare.sh +++ b/.github/actions/prepare.sh @@ -6,8 +6,8 @@ # # Functions: # - Evaluate all packages to build depending on files defined in ${GH_FILES}. -# - synocli-videodriver is moved to head of packages to build first if triggered by its ffmpeg5-6 -# - python310-311 and ffmpeg5-6 are moved to head of remaining packages to build when triggered by its own or a dependent. +# - synocli-videodriver is moved to head of packages to build first if triggered by its ffmpeg5-7 +# - python310-311 and ffmpeg5-7 are moved to head of remaining packages to build when triggered by its own or a dependent. # - Referenced native and cross packages of the packages to build are added to the download list. set -o pipefail @@ -54,8 +54,8 @@ fi # remove duplicate packages packages=$(printf %s "${SPK_TO_BUILD}" | tr ' ' '\n' | sort -u | tr '\n' ' ') -# for ffmpeg v5-6 find all packages that depend on them -for i in {5..6}; do +# for ffmpeg v5-7 find all packages that depend on them +for i in {5..7}; do ffmpeg_dependent_packages=$(find spk/ -maxdepth 2 -mindepth 2 -name "Makefile" -exec grep -Ho "FFMPEG_PACKAGE = ffmpeg${i}" {} \; | grep -Po ".*spk/\K[^/]*" | sort | tr '\n' ' ') # If packages contain a package that depends on ffmpeg (or is ffmpeg), @@ -70,7 +70,7 @@ for i in {5..6}; do done done -# for synocli-videodriver that ffmpeg v5-6 depends on +# for synocli-videodriver that ffmpeg v5-7 depends on videodrv_dependent_packages=$(find spk/ -maxdepth 2 -mindepth 2 -name "Makefile" -exec grep -Ho "spksrc.videodriver.mk" {} \; | grep -Po ".*spk/\K[^/]*" | sort | tr '\n' ' ') # If packages contain a package that depends on spksrc.videodriver.mk, diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b457bcbf692..242d7152747 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -83,7 +83,7 @@ jobs: matrix: # x64=x86_64, evansport=i686, aarch64=armv8, armv7, hi3535=armv7l, 88f6281=armv5, qoriq=ppc # https://github.com/SynoCommunity/spksrc/wiki/Synology-and-SynoCommunity-Package-Architectures - arch: [noarch, noarch-6.1, noarch-7.0, x64-6.2.4, x64-7.1, evansport-6.2.4, evansport-7.1, aarch64-6.2.4, aarch64-7.1, armv7-6.2.4, armv7-7.1, hi3535-6.2.4, 88f6281-6.2.4, qoriq-6.2.4, comcerto2k-7.1] + arch: [noarch, noarch-6.1, noarch-7.0, x64-6.2.4, x64-7.1, x64-7.2, evansport-6.2.4, evansport-7.1, aarch64-6.2.4, aarch64-7.1, aarch64-7.2, armv7-6.2.4, armv7-7.1, hi3535-6.2.4, 88f6281-6.2.4, qoriq-6.2.4, comcerto2k-7.1] steps: - name: Checkout repository diff --git a/cross/chromaprint/Makefile b/cross/chromaprint/Makefile index 8ea36bd6bf4..fd9c41786ef 100644 --- a/cross/chromaprint/Makefile +++ b/cross/chromaprint/Makefile @@ -15,9 +15,9 @@ HOMEPAGE = https://acoustid.org/chromaprint COMMENT = Chromaprint is the core component of the AcoustID project. It\'s a client-side library that implements a custom algorithm for extracting fingerprints from any audio source. LICENSE = LGPL2.1+ -OPTIONAL_DEPENDS = cross/ffmpeg6 +OPTIONAL_DEPENDS = cross/ffmpeg7 -# compiler too old +# ffmpeg7 requires c11 support UNSUPPORTED_ARCHS = $(ARMv5_ARCHS) $(OLD_PPC_ARCHS) CMAKE_ARGS += -DBUILD_TOOLS=ON @@ -46,7 +46,7 @@ CMAKE_ARGS += -DFFMPEG_LIBSWSCALE_INCLUDE_DIRS:PATH=$(FFMPEG_STAGING_PREFIX)/inc CMAKE_ARGS += -DFFMPEG_LIBSWSCALE_LIBRARIES:FILEPATH=$(FFMPEG_STAGING_PREFIX)/lib/libswscale.so else -DEPENDS = cross/ffmpeg6 +DEPENDS = cross/ffmpeg7 CMAKE_RPATH = endif diff --git a/cross/elfutils/Makefile b/cross/elfutils/Makefile new file mode 100644 index 00000000000..6bf84f98f65 --- /dev/null +++ b/cross/elfutils/Makefile @@ -0,0 +1,20 @@ +PKG_NAME = elfutils +PKG_VERS = 0.191 +PKG_EXT = tar.bz2 +PKG_DIST_NAME = $(PKG_NAME)-$(PKG_VERS).$(PKG_EXT) +PKG_DIST_SITE = https://sourceware.org/elfutils/ftp/0.191 +PKG_DIR = $(PKG_NAME)-$(PKG_VERS) + +DEPENDS = cross/zlib cross/zstd + +HOMEPAGE = https://sourceware.org/elfutils/ +COMMENT = elfutils is a collection of utilities and libraries to read, create and modify ELF binary files, find and handle DWARF debug data, symbols, thread state and stacktraces for processes and core files on GNU/Linux. +LICENSE = GPL + +REQUIRED_MIN_DSM = 7.0 + +GNU_CONFIGURE = 1 +CONFIGURE_ARGS = --disable-libdebuginfod +CONFIGURE_ARGS += --disable-debuginfod + +include ../../mk/spksrc.cross-cc.mk diff --git a/cross/elfutils/PLIST b/cross/elfutils/PLIST new file mode 100644 index 00000000000..f6847723abb --- /dev/null +++ b/cross/elfutils/PLIST @@ -0,0 +1,9 @@ +lnk:lib/libasm.so +lnk:lib/libasm.so.1 +lib:lib/libasm-0.191.so +lnk:lib/libdw.so +lnk:lib/libdw.so.1 +lib:lib/libdw-0.191.so +lnk:lib/libelf.so +lnk:lib/libelf.so.1 +lib:lib/libelf-0.191.so diff --git a/cross/elfutils/digests b/cross/elfutils/digests new file mode 100644 index 00000000000..67671cb4ce6 --- /dev/null +++ b/cross/elfutils/digests @@ -0,0 +1,3 @@ +elfutils-0.191.tar.bz2 SHA1 651aa2b7390aeba178be2ceefd4c2eb42e783e97 +elfutils-0.191.tar.bz2 SHA256 df76db71366d1d708365fc7a6c60ca48398f14367eb2b8954efc8897147ad871 +elfutils-0.191.tar.bz2 MD5 636547248fb3fae58ec48030298d3ef7 diff --git a/cross/ffmpeg5/Makefile b/cross/ffmpeg5/Makefile index 746feff6d1e..bbdf4bf1362 100644 --- a/cross/ffmpeg5/Makefile +++ b/cross/ffmpeg5/Makefile @@ -1,5 +1,5 @@ PKG_NAME = ffmpeg -PKG_VERS = 5.1.5 +PKG_VERS = 5.1.6 PKG_EXT = tar.bz2 PKG_DIST_NAME = $(PKG_NAME)-$(PKG_VERS).$(PKG_EXT) PKG_DIST_SITE = https://www.ffmpeg.org/releases diff --git a/cross/ffmpeg5/digests b/cross/ffmpeg5/digests index fcfadc077e4..b96f89f40f6 100644 --- a/cross/ffmpeg5/digests +++ b/cross/ffmpeg5/digests @@ -1,3 +1,3 @@ -ffmpeg-5.1.5.tar.bz2 SHA1 3b4b8c3b9481f4dc8af0e7fbd1144bb4ef42aaeb -ffmpeg-5.1.5.tar.bz2 SHA256 6b4ec72db26cb0c45c6b66228f9090e11910121890a05691a80d494de22d1583 -ffmpeg-5.1.5.tar.bz2 MD5 e8ec904fa192bbab6d7df8d2d7b5a580 +ffmpeg-5.1.6.tar.bz2 SHA1 8930dcdf6d8bfd20611a1a147aadb048c6f875c7 +ffmpeg-5.1.6.tar.bz2 SHA256 d04c46cfe59f7dfbf2fd8574c2d24ad58c86a2e180a90d341cfa41781a994397 +ffmpeg-5.1.6.tar.bz2 MD5 547725dd393a6adc1511da1fd141df25 diff --git a/cross/ffmpeg7/Makefile b/cross/ffmpeg7/Makefile new file mode 100644 index 00000000000..ab23c2090fa --- /dev/null +++ b/cross/ffmpeg7/Makefile @@ -0,0 +1,286 @@ +PKG_NAME = ffmpeg +PKG_VERS = 7.0.2 +PKG_EXT = tar.bz2 +PKG_DIST_NAME = $(PKG_NAME)-$(PKG_VERS).$(PKG_EXT) +PKG_DIST_SITE = https://www.ffmpeg.org/releases +PKG_DIR = $(PKG_NAME)-$(PKG_VERS) + +HOMEPAGE = https://www.ffmpeg.org/ +COMMENT = FFmpeg is a complete, cross-platform solution to record, convert and stream audio and video. It includes libavcodec - the leading audio/video codec library +LICENSE = GPLv2 + +# requires c11 support +UNSUPPORTED_ARCHS = $(ARMv5_ARCHS) $(OLD_PPC_ARCHS) + +CONFIGURE_ARGS = --target-os=linux --cross-prefix=$(TC_PATH)$(TC_PREFIX) --prefix=$(INSTALL_PREFIX) +CONFIGURE_ARGS += --extra-cflags="-I$(WORK_DIR)/install$(INSTALL_PREFIX)/include" +CONFIGURE_ARGS += --extra-ldflags="-L$(WORK_DIR)/install$(INSTALL_PREFIX)/lib" +CONFIGURE_ARGS += --extra-libs="-lxml2 -ldl -lm" --pkg-config=/usr/bin/pkg-config --ranlib=$(RANLIB) +CONFIGURE_ARGS += --enable-cross-compile --enable-rpath --enable-pic +CONFIGURE_ARGS += --enable-shared +CONFIGURE_ARGS += --enable-gpl --enable-version3 +CONFIGURE_ARGS += --disable-debug +CONFIGURE_ARGS += --disable-static +CONFIGURE_ARGS += --disable-doc + +# Must match $(SPK_REV) from spk/ffmpeg6/Makefile +CONFIGURE_ARGS += --extra-version=$(shell sed -n 's/^SPK_REV = \(.*\)/\1/p' $(WORK_DIR)/../../../spk/ffmpeg6/Makefile) + +# Enable Synology specific -hls_seek_time option and other optimizations +CONFIGURE_ARGS += --extra-cflags=-DSYNO_VIDEOSTATION + +# Compiler workaround to enable DTS-HD MA stream decoding +CONFIGURE_ARGS += --extra-cflags=-fno-if-conversion +# Synology default optimization +CONFIGURE_ARGS += --extra-cflags=-O3 +# Remove some of the noise while compiling +CONFIGURE_ARGS += --extra-cflags=-Wno-deprecated-declarations + +include ../../mk/spksrc.common.mk + +OPTIONAL_DEPENDS = cross/chromaprint-fftw +OPTIONAL_DEPENDS += cross/dav1d +OPTIONAL_DEPENDS += cross/frei0r +OPTIONAL_DEPENDS += cross/libass +OPTIONAL_DEPENDS += cross/libaom +OPTIONAL_DEPENDS += cross/libvpx +OPTIONAL_DEPENDS += cross/libzimg +OPTIONAL_DEPENDS += cross/openh264 +OPTIONAL_DEPENDS += cross/svt-av1 +OPTIONAL_DEPENDS += cross/libva +OPTIONAL_DEPENDS += cross/libva-utils +OPTIONAL_DEPENDS += cross/intel-vaapi-driver +OPTIONAL_DEPENDS += cross/intel-media-driver +OPTIONAL_DEPENDS += cross/intel-mediasdk +OPTIONAL_DEPENDS += cross/intel-level-zero +OPTIONAL_DEPENDS += cross/intel-graphics-compiler +OPTIONAL_DEPENDS += cross/intel-compute-runtime +OPTIONAL_DEPENDS += cross/ocl-icd +OPTIONAL_DEPENDS += cross/clinfo +OPTIONAL_DEPENDS += cross/twolame +OPTIONAL_DEPENDS += cross/x264 +OPTIONAL_DEPENDS += cross/x265 +OPTIONAL_DEPENDS += cross/shine + +# Define x86asm +ifeq ($(findstring $(ARCH),$(i686_ARCHS) $(x64_ARCHS)),$(ARCH)) +CONFIGURE_ARGS += --x86asmexe=nasm + +# Allow ASM on aarch64, disable on all others +else ifneq ($(findstring $(ARCH),$(ARMv8_ARCHS)),$(ARCH)) +CONFIGURE_ARGS += --disable-asm +endif + +DEPENDS += cross/codec2 +CONFIGURE_ARGS += --enable-libcodec2 + +DEPENDS += cross/libxml2 +CONFIGURE_ARGS += --enable-libxml2 +CONFIGURE_ARGS += --enable-demuxer=dash + +DEPENDS += cross/fontconfig +CONFIGURE_ARGS += --enable-libfontconfig + +DEPENDS += cross/freetype +CONFIGURE_ARGS += --enable-libfreetype + +DEPENDS += cross/fribidi +CONFIGURE_ARGS += --enable-libfribidi + +DEPENDS += cross/openjpeg +CONFIGURE_ARGS += --enable-libopenjpeg + +DEPENDS += cross/lame +CONFIGURE_ARGS += --enable-libmp3lame + +DEPENDS += cross/libbluray +CONFIGURE_ARGS += --enable-libbluray + +DEPENDS += cross/speex +CONFIGURE_ARGS += --enable-libspeex + +DEPENDS += cross/flac +DEPENDS += cross/libtheora +CONFIGURE_ARGS += --enable-libtheora + +DEPENDS += cross/libcaca +CONFIGURE_ARGS += --enable-libcaca + +DEPENDS += cross/libdc1394 +CONFIGURE_ARGS += --enable-libdc1394 + +DEPENDS += cross/libvorbis +CONFIGURE_ARGS += --enable-libvorbis + +DEPENDS += cross/libwebp +CONFIGURE_ARGS += --enable-libwebp + +DEPENDS += cross/libzmq +CONFIGURE_ARGS += --enable-libzmq + +DEPENDS += cross/gnutls +CONFIGURE_ARGS += --enable-gnutls + +ifneq ($(findstring $(ARCH),$(PPC_ARCHS)),$(ARCH)) +DEPENDS += cross/openh264 +CONFIGURE_ARGS += --enable-libopenh264 +endif + +DEPENDS += cross/opus +CONFIGURE_ARGS += --enable-libopus + +DEPENDS += cross/soxr +CONFIGURE_ARGS += --enable-libsoxr + +DEPENDS += cross/opencore-amr +CONFIGURE_ARGS += --enable-libopencore-amrnb --enable-libopencore-amrwb + +DEPENDS += cross/librabbitmq +CONFIGURE_ARGS += --enable-librabbitmq + +DEPENDS += cross/twolame +CONFIGURE_ARGS += --enable-libtwolame + +DEPENDS += cross/zvbi +CONFIGURE_ARGS += --enable-libzvbi + +DEPENDS += cross/x264 +CONFIGURE_ARGS += --enable-libx264 + +DEPENDS += cross/x265 +CONFIGURE_ARGS += --enable-libx265 + +DEPENDS += cross/libvpx +CONFIGURE_ARGS += --enable-libvpx + +DEPENDS += cross/shine +CONFIGURE_ARGS += --enable-libshine + +DEPENDS += cross/chromaprint-fftw +CONFIGURE_ARGS += --enable-chromaprint + +DEPENDS += cross/dav1d +CONFIGURE_ARGS += --enable-libdav1d + +DEPENDS += cross/librist +CONFIGURE_ARGS += --enable-librist + +DEPENDS += cross/libzimg +CONFIGURE_ARGS += --enable-libzimg + +DEPENDS += cross/snappy +CONFIGURE_ARGS += --enable-libsnappy + +# +# fdk-acc is now considered compatible with (L)GPL. +# It requires --enable-nonfree if you're also using --enable-gpl +# +# https://en.wikipedia.org/wiki/Fraunhofer_FDK_AAC +# It was classified as free by Fedora after a review by the legal department at Red Hat. +# See also: http://wiki.hydrogenaud.io/index.php?title=Fraunhofer_FDK_AAC#FDK_License +# +DEPENDS += cross/fdk-aac +CONFIGURE_ARGS += --enable-libfdk-aac --enable-nonfree + +ifeq ($(findstring $(ARCH),alpine comcerto2k monaco $(ARMv8_ARCHS) $(i686_ARCHS) $(x64_ARCHS)),$(ARCH)) +DEPENDS += cross/libaom +CONFIGURE_ARGS += --enable-libaom +endif + +# Add SVT-AV1 codec to supported ARCH +ifeq ($(findstring $(ARCH),alpine comcerto2k monaco $(ARMv8_ARCHS) $(x64_ARCHS)),$(ARCH)) +DEPENDS += cross/svt-av1 +CONFIGURE_ARGS += --enable-libsvtav1 +endif + +## +## ARCH SPECIFIC +## + +ifeq ($(findstring $(ARCH),$(ARMv7_ARCHS) $(ARMv7L_ARCHS)),$(ARCH)) +CONFIGURE_ARGS += --arch=arm +CONFIGURE_ARGS += --enable-neon +CONFIGURE_ARGS += --enable-thumb +CONFIGURE_ARGS += --enable-lto +CONFIGURE_ARGS += --disable-armv5te +CONFIGURE_ARGS += --disable-armv6 +CONFIGURE_ARGS += --disable-armv6t2 +CONFIGURE_ARGS += --disable-vfp +ifneq ($(findstring $(ARCH),alpine),$(ARCH)) +CONFIGURE_ARGS += --extra-cflags=-DSYNO_ALPINE_NEON +endif +endif + +ifeq ($(findstring $(ARCH),$(ARMv8_ARCHS)),$(ARCH)) +CONFIGURE_ARGS += --arch=arm64 +CONFIGURE_ARGS += --enable-neon +CONFIGURE_ARGS += --enable-thumb +CONFIGURE_ARGS += --enable-lto +endif + +ifeq ($(findstring $(ARCH),qoriq),$(ARCH)) +CONFIGURE_ARGS += --arch=ppc +CONFIGURE_ARGS += --cpu=e500v2 +CONFIGURE_ARGS += --extra-libs=-latomic +CONFIGURE_ARGS += --enable-lto +endif + +ifeq ($(findstring $(ARCH),evansport),$(ARCH)) +CONFIGURE_ARGS += --arch=x86 +CONFIGURE_ARGS += --cpu=atom +endif + +ifeq ($(findstring $(ARCH),$(x64_ARCHS)),$(ARCH)) +# Dependencies provided by synocli-videodriver +# If built outside spksrc.videodriver.mk +ifeq ($(strip $(VIDEODRV_PACKAGE)),) +DEPENDS += cross/libva cross/libva-utils +DEPENDS += cross/intel-vaapi-driver +DEPENDS += cross/intel-media-driver cross/intel-mediasdk +DEPENDS += cross/intel-libvpl cross/intel-libvpl-tools +endif +CONFIGURE_ARGS += --arch=x86_64 +CONFIGURE_ARGS += --enable-lto +CONFIGURE_ARGS += --enable-libdrm +CONFIGURE_ARGS += --enable-libmfx +CONFIGURE_ARGS += --enable-vaapi +endif + +include ../../mk/spksrc.cross-cc.mk + +## +## x64 & GCC SPECIFIC +## + +# OpenCL acceleration +ifeq ($(findstring $(ARCH),$(x64_ARCHS)),$(ARCH)) +# Newer Intel implementations (oneAPI, level-zero) requires gcc >= 5 +ifeq ($(call version_gt, $(TC_GCC), 5),1) +# Dependencies provided by synocli-videodriver +# If built outside spksrc.videodriver.mk +ifeq ($(strip $(VIDEODRV_PACKAGE)),) +DEPENDS += cross/intel-level-zero +DEPENDS += cross/intel-graphics-compiler +DEPENDS += cross/intel-compute-runtime +DEPENDS += cross/ocl-icd +DEPENDS += cross/clinfo +endif +CONFIGURE_ARGS += --enable-opencl +endif +endif + +## +## GCC SPECIFIC +## + +# libass requires harfbuzz which in turns requires c++ from gcc >= 4.8 (88f6281-6.1 = gcc-4.6.4) +ifeq ($(call version_ge, $(TC_GCC), 4.8),1) +DEPENDS += cross/libass +CONFIGURE_ARGS += --enable-libass +endif + +# Newer frei0r requires newer gcc +ifeq ($(call version_ge, $(TC_GCC), 7.5),1) +DEPENDS += cross/frei0r +CONFIGURE_ARGS += --enable-frei0r +endif diff --git a/cross/ffmpeg7/PLIST b/cross/ffmpeg7/PLIST new file mode 100644 index 00000000000..2b46ec57b28 --- /dev/null +++ b/cross/ffmpeg7/PLIST @@ -0,0 +1,32 @@ +bin:bin/ffmpeg +bin:bin/ffprobe +lnk:lib/libavcodec.so +lnk:lib/libavcodec.so.61 +lib:lib/libavcodec.so.61.3.100 +lnk:lib/libavdevice.so +lnk:lib/libavdevice.so.61 +lib:lib/libavdevice.so.61.1.100 +lnk:lib/libavfilter.so +lnk:lib/libavfilter.so.10 +lib:lib/libavfilter.so.10.1.100 +lnk:lib/libavformat.so +lnk:lib/libavformat.so.61 +lib:lib/libavformat.so.61.1.100 +lnk:lib/libavutil.so +lnk:lib/libavutil.so.59 +lib:lib/libavutil.so.59.8.100 +lnk:lib/libpostproc.so +lnk:lib/libpostproc.so.58 +lib:lib/libpostproc.so.58.1.100 +lnk:lib/libswresample.so +lnk:lib/libswresample.so.5 +lib:lib/libswresample.so.5.1.100 +lnk:lib/libswscale.so +lnk:lib/libswscale.so.8 +lib:lib/libswscale.so.8.1.100 +rsc:share/ffmpeg/ffprobe.xsd +rsc:share/ffmpeg/libvpx-1080p50_60.ffpreset +rsc:share/ffmpeg/libvpx-1080p.ffpreset +rsc:share/ffmpeg/libvpx-360p.ffpreset +rsc:share/ffmpeg/libvpx-720p50_60.ffpreset +rsc:share/ffmpeg/libvpx-720p.ffpreset diff --git a/cross/ffmpeg7/digests b/cross/ffmpeg7/digests new file mode 100644 index 00000000000..f40e36a804f --- /dev/null +++ b/cross/ffmpeg7/digests @@ -0,0 +1,3 @@ +ffmpeg-7.0.2.tar.bz2 SHA1 ea4652903adfd69541ba1eb5b793296c5103ff0d +ffmpeg-7.0.2.tar.bz2 SHA256 1ed250407ea8f955cca2f1139da3229fbc13032a0802e4b744be195865ff1541 +ffmpeg-7.0.2.tar.bz2 MD5 6a0d7d8c3f1f22c894381b48e358f2f3 diff --git a/cross/ffmpeg7/patches/0001-replace-arch-env-by-ffmpeg_arch.patch b/cross/ffmpeg7/patches/0001-replace-arch-env-by-ffmpeg_arch.patch new file mode 100644 index 00000000000..a2847d723a1 --- /dev/null +++ b/cross/ffmpeg7/patches/0001-replace-arch-env-by-ffmpeg_arch.patch @@ -0,0 +1,73 @@ +diff -uprN ../ffmpeg-7.0.2-orig/configure ./configure +--- ../ffmpeg-7.0.2-orig/configure 2024-08-02 22:55:25.000000000 +0000 ++++ ./configure 2024-09-26 23:24:05.795747855 +0000 +@@ -8006,7 +8006,7 @@ ifndef MAIN_MAKEFILE + SRC_PATH:=\$(SRC_PATH:.%=..%) + endif + CC_IDENT=$cc_ident +-ARCH=$arch ++FFMPEG_ARCH=$arch + INTRINSICS=$intrinsics + EXTERN_PREFIX=$extern_prefix + CC=$cc +diff -uprN ../ffmpeg-7.0.2-orig/ffbuild/common.mak ./ffbuild/common.mak +--- ../ffmpeg-7.0.2-orig/ffbuild/common.mak 2024-08-02 22:55:21.000000000 +0000 ++++ ./ffbuild/common.mak 2024-09-26 23:24:05.795747855 +0000 +@@ -186,8 +186,8 @@ DEP_LIBS := $(foreach lib,$(FFLIBS),$(ca + STATIC_DEP_LIBS := $(foreach lib,$(FFLIBS),$(call PATH_LIBNAME,$(lib))) + + SRC_DIR := $(SRC_PATH)/lib$(NAME) +-ALLHEADERS := $(subst $(SRC_DIR)/,$(SUBDIR),$(wildcard $(SRC_DIR)/*.h $(SRC_DIR)/$(ARCH)/*.h)) +-SKIPHEADERS += $(ARCH_HEADERS:%=$(ARCH)/%) $(SKIPHEADERS-) ++ALLHEADERS := $(subst $(SRC_DIR)/,$(SUBDIR),$(wildcard $(SRC_DIR)/*.h $(SRC_DIR)/$(FFMPEG_ARCH)/*.h)) ++SKIPHEADERS += $(ARCH_HEADERS:%=$(FFMPEG_ARCH)/%) $(SKIPHEADERS-) + SKIPHEADERS := $(SKIPHEADERS:%=$(SUBDIR)%) + HOBJS = $(filter-out $(SKIPHEADERS:.h=.h.o),$(ALLHEADERS:.h=.h.o)) + PTXOBJS = $(filter %.ptx.o,$(OBJS)) +diff -uprN ../ffmpeg-7.0.2-orig/ffbuild/library.mak ./ffbuild/library.mak +--- ../ffmpeg-7.0.2-orig/ffbuild/library.mak 2024-08-01 23:22:48.000000000 +0000 ++++ ./ffbuild/library.mak 2024-09-26 23:25:34.213947849 +0000 +@@ -75,7 +75,7 @@ endif + + clean:: + $(RM) $(addprefix $(SUBDIR),$(CLEANFILES) $(CLEANSUFFIXES) $(LIBSUFFIXES)) \ +- $(CLEANSUFFIXES:%=$(SUBDIR)$(ARCH)/%) $(CLEANSUFFIXES:%=$(SUBDIR)tests/%) ++ $(CLEANSUFFIXES:%=$(SUBDIR)$(FFMPEG_ARCH)/%) $(CLEANSUFFIXES:%=$(SUBDIR)tests/%) + + install-lib$(NAME)-shared: $(SUBDIR)$(SLIBNAME) + $(Q)mkdir -p "$(SHLIBDIR)" +diff -uprN ../ffmpeg-7.0.2-orig/libavcodec/Makefile ./libavcodec/Makefile +--- ../ffmpeg-7.0.2-orig/libavcodec/Makefile 2024-08-02 22:55:21.000000000 +0000 ++++ ./libavcodec/Makefile 2024-09-26 23:26:30.196700774 +0000 +@@ -64,7 +64,7 @@ OBJS = ac3_parser.o + + # subsystems + include $(SRC_PATH)/libavcodec/vvc/Makefile +--include $(SRC_PATH)/libavcodec/$(ARCH)/vvc/Makefile ++-include $(SRC_PATH)/libavcodec/$(FFMPEG_ARCH)/vvc/Makefile + OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o + OBJS-$(CONFIG_AC3DSP) += ac3dsp.o ac3.o ac3tab.o + OBJS-$(CONFIG_ADTS_HEADER) += adts_header.o mpeg4audio_sample_rates.o +diff -uprN ../ffmpeg-7.0.2-orig/Makefile ./Makefile +--- ../ffmpeg-7.0.2-orig/Makefile 2024-08-02 22:55:21.000000000 +0000 ++++ ./Makefile 2024-09-26 23:24:05.796747868 +0000 +@@ -110,7 +110,7 @@ define DOSUBDIR + $(foreach V,$(SUBDIR_VARS),$(eval $(call RESET,$(V)))) + SUBDIR := $(1)/ + include $(SRC_PATH)/$(1)/Makefile +--include $(SRC_PATH)/$(1)/$(ARCH)/Makefile ++-include $(SRC_PATH)/$(1)/$(FFMPEG_ARCH)/Makefile + -include $(SRC_PATH)/$(1)/$(INTRINSICS)/Makefile + include $(SRC_PATH)/ffbuild/library.mak + endef +diff -uprN ../ffmpeg-7.0.2-orig/tests/checkasm/Makefile ./tests/checkasm/Makefile +--- ../ffmpeg-7.0.2-orig/tests/checkasm/Makefile 2024-08-02 22:55:22.000000000 +0000 ++++ ./tests/checkasm/Makefile 2024-09-26 23:27:28.687482467 +0000 +@@ -101,6 +101,6 @@ checkasm: $(CHECKASM) + testclean:: checkasmclean + + checkasmclean: +- $(RM) $(CHECKASM) $(CLEANSUFFIXES:%=tests/checkasm/%) $(CLEANSUFFIXES:%=tests/checkasm/$(ARCH)/%) ++ $(RM) $(CHECKASM) $(CLEANSUFFIXES:%=tests/checkasm/%) $(CLEANSUFFIXES:%=tests/checkasm/$(FFMPEG_ARCH)/%) + + .PHONY: checkasm diff --git a/cross/ffmpeg7/patches/0002-tvheadend-ffmpeg.libx264.patch b/cross/ffmpeg7/patches/0002-tvheadend-ffmpeg.libx264.patch new file mode 100644 index 00000000000..da2eb209167 --- /dev/null +++ b/cross/ffmpeg7/patches/0002-tvheadend-ffmpeg.libx264.patch @@ -0,0 +1,10 @@ +--- ../ffmpeg-6.0-orig/libavcodec/libx264.c 2023-02-27 20:43:45.000000000 +0000 ++++ libavcodec/libx264.c 2023-05-05 13:03:41.859518874 +0000 +@@ -1236,6 +1236,7 @@ static const AVOption options[] = { + { "preset", "Set the encoding preset (cf. x264 --fullhelp)", OFFSET(preset), AV_OPT_TYPE_STRING, { .str = "medium" }, 0, 0, VE}, + { "tune", "Tune the encoding params (cf. x264 --fullhelp)", OFFSET(tune), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE}, + { "profile", "Set profile restrictions (cf. x264 --fullhelp)", OFFSET(profile_opt), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE}, ++ { "x264profile", "Set profile restrictions (cf. x264 --fullhelp)", OFFSET(profile_opt), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE}, + { "fastfirstpass", "Use fast settings when encoding first pass", OFFSET(fastfirstpass), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE}, + {"level", "Specify level (as defined by Annex A)", OFFSET(level), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE}, + {"passlogfile", "Filename for 2 pass stats", OFFSET(stats), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE}, diff --git a/cross/ffmpeg7/patches/0100-SYNO-videostation-hls-seek-time.patch b/cross/ffmpeg7/patches/0100-SYNO-videostation-hls-seek-time.patch new file mode 100644 index 00000000000..3f209b0fd4f --- /dev/null +++ b/cross/ffmpeg7/patches/0100-SYNO-videostation-hls-seek-time.patch @@ -0,0 +1,120 @@ +diff -uprN ../ffmpeg-4.3-005/libavformat/segment.c ./libavformat/segment.c +--- ../ffmpeg-4.3-005/libavformat/segment.c 2020-06-15 14:54:24.000000000 -0400 ++++ ./libavformat/segment.c 2020-06-16 19:54:41.332482718 -0400 +@@ -43,6 +43,8 @@ + #include "libavutil/time_internal.h" + #include "libavutil/timestamp.h" + ++#include "synoconfig.h" ++ + typedef struct SegmentListEntry { + int index; + double start_time, end_time; +@@ -123,6 +125,12 @@ typedef struct SegmentContext { + SegmentListEntry cur_entry; + SegmentListEntry *segment_list_entries; + SegmentListEntry *segment_list_entries_end; ++ ++#ifdef SYNO_VIDEOSTATION_HLS_SEEK_TIME ++ int64_t seek_time; ++ AVPacket old_key_packet; ++ int filled; ++#endif + } SegmentContext; + + static void print_csv_escaped_str(AVIOContext *ctx, const char *str) +@@ -854,6 +862,10 @@ static int seg_write_header(AVFormatCont + if (!seg->individual_header_trailer) + oc->pb->seekable = 0; + } ++#ifdef SYNO_VIDEOSTATION_HLS_SEEK_TIME ++ av_init_packet(&seg->old_key_packet); ++ seg->filled = 1; ++#endif + + return 0; + } +@@ -885,6 +897,40 @@ static int seg_write_packet(AVFormatCont + } + } + ++#ifdef SYNO_VIDEOSTATION_HLS_SEEK_TIME ++ int64_t pts = AV_NOPTS_VALUE; ++ if (0 < seg->seek_time && seg->filled) { ++ if (pkt->stream_index != seg->reference_stream_index) { ++ return 0; ++ } ++ ++ if (pkt->pts != AV_NOPTS_VALUE) { ++ pts = pkt->pts; ++ } else if (pkt->dts != AV_NOPTS_VALUE) { ++ pts = pkt->dts; ++ } ++ if (pts >= 0) { ++ if (pkt->flags & AV_PKT_FLAG_KEY) { ++ seg->filled = 0; ++ } else if (NULL != seg->old_key_packet.data) { ++ seg->old_key_packet.pts = pkt->pts; ++ seg->old_key_packet.dts = pkt->dts; ++ seg->old_key_packet.duration = pkt->duration; ++ av_packet_unref(pkt); ++ av_init_packet(pkt); ++ av_packet_ref(pkt, &seg->old_key_packet); ++ seg->filled = 0; ++ } ++ av_packet_unref(&seg->old_key_packet); ++ } else { ++ if (pkt->flags & AV_PKT_FLAG_KEY) { ++ av_packet_unref(&seg->old_key_packet); ++ av_packet_ref(&seg->old_key_packet, pkt); ++ } ++ return 0; ++ } ++ } ++#endif + calc_times: + if (seg->times) { + end_pts = seg->segment_count < seg->nb_times ? +@@ -971,6 +1017,15 @@ calc_times: + av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, &st->time_base), + av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, &st->time_base)); + ++#ifdef SYNO_VIDEOSTATION_HLS_SEEK_TIME ++ if (0 < seg->seek_time) { ++ if (pkt->pts != AV_NOPTS_VALUE) ++ pkt->pts += av_rescale_q(seg->seek_time, (AVRational) {1, 1000}, st->time_base); ++ if (pkt->dts != AV_NOPTS_VALUE) ++ pkt->dts += av_rescale_q(seg->seek_time, (AVRational) {1, 1000}, st->time_base); ++ } ++#endif ++ + ret = ff_write_chained(seg->avf, pkt->stream_index, pkt, s, + seg->initial_offset || seg->reset_timestamps || seg->avf->oformat->interleave_packet); + +@@ -1085,6 +1140,9 @@ static const AVOption options[] = { + { "reset_timestamps", "reset timestamps at the beginning of each segment", OFFSET(reset_timestamps), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, E }, + { "initial_offset", "set initial timestamp offset", OFFSET(initial_offset), AV_OPT_TYPE_DURATION, {.i64 = 0}, -INT64_MAX, INT64_MAX, E }, + { "write_empty_segments", "allow writing empty 'filler' segments", OFFSET(write_empty), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, E }, ++#ifdef SYNO_VIDEOSTATION_HLS_SEEK_TIME ++ { "hls_seek_time", "initial segment start time", OFFSET(seek_time), AV_OPT_TYPE_INT64, {.i64 = 0}, 0, INT_MAX, E }, ++#endif + { NULL }, + }; + +Binary files ../ffmpeg-4.3-005/libavformat/.segment.c.rej.swp and ./libavformat/.segment.c.rej.swp differ +diff -uprN ../ffmpeg-4.3-005/synoconfig.h ./synoconfig.h +--- ../ffmpeg-4.3-005/synoconfig.h 1969-12-31 19:00:00.000000000 -0500 ++++ ./synoconfig.h 2020-06-16 19:53:14.125668365 -0400 +@@ -0,0 +1,12 @@ ++#ifndef MY_ABC_HERE ++#define MY_ABC_HERE ++#endif ++ ++/* Add ffmpeg option for HLS. ++ * -hls_seek_time: ++ * Let output fragment ts start with this seek time ++ * See Video Station #1758 ++ */ ++#if defined(SYNO_VIDEOSTATION) ++#define SYNO_VIDEOSTATION_HLS_SEEK_TIME ++#endif diff --git a/cross/ffmpeg7/patches/0101-SYNO-videostation-hls-costumized-ts-name.patch b/cross/ffmpeg7/patches/0101-SYNO-videostation-hls-costumized-ts-name.patch new file mode 100644 index 00000000000..2140be7bf33 --- /dev/null +++ b/cross/ffmpeg7/patches/0101-SYNO-videostation-hls-costumized-ts-name.patch @@ -0,0 +1,30 @@ +diff -uprN ../ffmpeg-4.3-020/libavformat/segment.c ./libavformat/segment.c +--- ../ffmpeg-4.3-020/libavformat/segment.c 2020-06-16 19:54:41.000000000 -0400 ++++ ./libavformat/segment.c 2020-06-16 19:57:31.891853716 -0400 +@@ -237,9 +237,15 @@ static int set_segment_filename(AVFormat + + if ((ret = av_reallocp(&seg->cur_entry.filename, size)) < 0) + return ret; ++#ifdef SYNO_VIDEOSTATION_HLS_COSTUMIZED_TS_NAME ++ snprintf(seg->cur_entry.filename, size, "%s%03d", ++ seg->entry_prefix ? seg->entry_prefix : "", ++ seg->segment_idx); ++#else + snprintf(seg->cur_entry.filename, size, "%s%s", + seg->entry_prefix ? seg->entry_prefix : "", + av_basename(oc->url)); ++#endif + + return 0; + } +diff -uprN ../ffmpeg-4.3-020/synoconfig.h ./synoconfig.h +--- ../ffmpeg-4.3-020/synoconfig.h 2020-06-16 19:53:14.000000000 -0400 ++++ ./synoconfig.h 2020-06-16 19:57:31.891853716 -0400 +@@ -10,3 +10,7 @@ + #if defined(SYNO_VIDEOSTATION) + #define SYNO_VIDEOSTATION_HLS_SEEK_TIME + #endif ++ ++#if defined(SYNO_VIDEOSTATION) ++#define SYNO_VIDEOSTATION_HLS_COSTUMIZED_TS_NAME ++#endif diff --git a/cross/ffmpeg7/patches/0102-SYNO-videostation-skip-displaymatrix.patch b/cross/ffmpeg7/patches/0102-SYNO-videostation-skip-displaymatrix.patch new file mode 100644 index 00000000000..ff21f4d00a5 --- /dev/null +++ b/cross/ffmpeg7/patches/0102-SYNO-videostation-skip-displaymatrix.patch @@ -0,0 +1,47 @@ +--- ../ffmpeg-7.0.2/libavformat/movenc.c 2024-08-02 22:55:25.000000000 +0000 ++++ ./libavformat/movenc.c 2024-09-24 23:27:44.198262220 +0000 +@@ -124,6 +124,9 @@ static const AVOption options[] = { + { "pts", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MOV_PRFT_SRC_PTS}, 0, 0, AV_OPT_FLAG_ENCODING_PARAM, .unit = "prft"}, + { "wallclock", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MOV_PRFT_SRC_WALLCLOCK}, 0, 0, AV_OPT_FLAG_ENCODING_PARAM, .unit = "prft"}, + { "write_tmcd", "force or disable writing tmcd", offsetof(MOVMuxContext, write_tmcd), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AV_OPT_FLAG_ENCODING_PARAM}, ++#ifdef SYNO_VIDEOSTATION_SKIP_DISPLAYMATRIX ++ { "skip_displaymatrix", "Skip writing displaymatrix metadata.", offsetof(MOVMuxContext, skip_displaymatrix), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM}, ++#endif + { NULL }, + }; + +@@ -3505,7 +3508,13 @@ static int mov_write_tkhd_tag(AVIOContex + st->codecpar->nb_coded_side_data, + AV_PKT_DATA_DISPLAYMATRIX); + if (sd && sd->size == 9 * sizeof(*display_matrix)) ++#ifdef SYNO_VIDEOSTATION_SKIP_DISPLAYMATRIX ++ if (mov->skip_displaymatrix) { ++ display_matrix = NULL; ++ } ++#else + display_matrix = (uint32_t *)sd->data; ++#endif + } + + if (track->flags & MOV_TRACK_ENABLED) +--- ../ffmpeg-7.0.2/libavformat/movenc.h 2024-08-02 22:55:22.000000000 +0000 ++++ ./libavformat/movenc.h 2024-09-24 23:19:32.992183584 +0000 +@@ -28,6 +28,8 @@ + #include "movenccenc.h" + #include "libavcodec/packet_internal.h" + ++#include "synoconfig.h" ++ + #define MOV_FRAG_INFO_ALLOC_INCREMENT 64 + #define MOV_INDEX_CLUSTER_SIZE 1024 + #define MOV_TIMESCALE 1000 +@@ -205,6 +207,9 @@ typedef struct MOVMuxContext { + int rtp_flags; + + int iods_skip; ++#ifdef SYNO_VIDEOSTATION_SKIP_DISPLAYMATRIX ++ int skip_displaymatrix; ++#endif + int iods_video_profile; + int iods_audio_profile; + diff --git a/cross/ffmpeg7/patches/0103-SYNO-videostation-webm-seek-time.patch b/cross/ffmpeg7/patches/0103-SYNO-videostation-webm-seek-time.patch new file mode 100644 index 00000000000..a06c8fe0742 --- /dev/null +++ b/cross/ffmpeg7/patches/0103-SYNO-videostation-webm-seek-time.patch @@ -0,0 +1,104 @@ +diff -uprN ../ffmpeg-4.3-022/libavformat/matroskaenc.c ./libavformat/matroskaenc.c +--- ../ffmpeg-4.3-022/libavformat/matroskaenc.c 2020-06-15 14:54:24.000000000 -0400 ++++ ./libavformat/matroskaenc.c 2020-06-27 16:37:13.468256460 -0400 +@@ -67,6 +67,8 @@ enum { + DEFAULT_MODE_PASSTHROUGH, + }; + ++#include "synoconfig.h" ++ + typedef struct ebml_master { + int64_t pos; ///< absolute offset in the containing AVIOContext where the master's elements start + int sizebytes; ///< how many bytes were reserved for the size +@@ -157,6 +159,11 @@ typedef struct MatroskaMuxContext { + int default_mode; + + uint32_t segment_uid[4]; ++#ifdef SYNO_VIDEOSTATION_WEBM_SEEK_TIME ++ int64_t seek_time; ++ AVPacket old_key_packet; ++ int unfilled; ++#endif + } MatroskaMuxContext; + + /** 2 bytes * 7 for EBML IDs, 7 1-byte EBML lengths, 6 1-byte uint, +@@ -1935,6 +1942,11 @@ static int mkv_write_header(AVFormatCont + mkv->cluster_size_limit = 32 * 1024; + } + ++#ifdef SYNO_VIDEOSTATION_WEBM_SEEK_TIME ++ av_init_packet(&mkv->old_key_packet); ++ mkv->unfilled = 1; ++#endif ++ + return 0; + } + +@@ -2374,6 +2386,41 @@ static int mkv_write_packet(AVFormatCont + if (ret < 0) + return ret; + ++#ifdef SYNO_VIDEOSTATION_WEBM_SEEK_TIME ++ int64_t pts = AV_NOPTS_VALUE; ++ if (0 < mkv->seek_time && mkv->unfilled) { ++ if (codec_type != AVMEDIA_TYPE_VIDEO) { ++ return 0; ++ } ++ ++ if (pkt->pts != AV_NOPTS_VALUE) { ++ pts = pkt->pts; ++ } else if (pkt->dts != AV_NOPTS_VALUE) { ++ pts = pkt->dts; ++ } ++ if (pts >= 0) { ++ if (pkt->flags & AV_PKT_FLAG_KEY) { ++ mkv->unfilled = 0; ++ } else if (NULL != mkv->old_key_packet.data) { ++ mkv->old_key_packet.pts = pkt->pts; ++ mkv->old_key_packet.dts = pkt->dts; ++ mkv->old_key_packet.duration = pkt->duration; ++ av_packet_unref(pkt); ++ av_init_packet(pkt); ++ av_packet_ref(pkt, &mkv->old_key_packet); ++ mkv->unfilled = 0; ++ } ++ av_packet_unref(&mkv->old_key_packet); ++ } else { ++ if (pkt->flags & AV_PKT_FLAG_KEY) { ++ av_packet_unref(&mkv->old_key_packet); ++ av_packet_ref(&mkv->old_key_packet, pkt); ++ } ++ return 0; ++ } ++ } ++#endif ++ + if (mkv->cluster_pos != -1) { + if (mkv->tracks[pkt->stream_index].write_dts) + cluster_time = pkt->dts - mkv->cluster_pts; +@@ -2788,6 +2835,9 @@ static const AVOption options[] = { + { "dash", "Create a WebM file conforming to WebM DASH specification", OFFSET(is_dash), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, + { "dash_track_number", "Track number for the DASH stream", OFFSET(dash_track_number), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, INT_MAX, FLAGS }, + { "live", "Write files assuming it is a live stream.", OFFSET(is_live), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, ++#ifdef SYNO_VIDEOSTATION_WEBM_SEEK_TIME ++ { "webm_seek_time", "seek time", OFFSET(seek_time), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS }, ++#endif + { "allow_raw_vfw", "allow RAW VFW mode", OFFSET(allow_raw_vfw), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, + { "write_crc32", "write a CRC32 element inside every Level 1 element", OFFSET(write_crc), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, + { "default_mode", "Controls how a track's FlagDefault is inferred", OFFSET(default_mode), AV_OPT_TYPE_INT, { .i64 = DEFAULT_MODE_INFER }, DEFAULT_MODE_INFER, DEFAULT_MODE_PASSTHROUGH, FLAGS, "default_mode" }, +diff -uprN ../ffmpeg-4.3-022/synoconfig.h ./synoconfig.h +--- ../ffmpeg-4.3-022/synoconfig.h 2020-06-16 20:00:07.000000000 -0400 ++++ ./synoconfig.h 2020-06-27 16:35:13.221186636 -0400 +@@ -22,3 +22,12 @@ + #if defined(SYNO_VIDEOSTATION) + #define SYNO_VIDEOSTATION_SKIP_DISPLAYMATRIX + #endif ++ ++/* Add ffmpeg option for WEBM. ++ * -webm_seek_time: ++ * Let output stream with the seek time ++ * See Video Station #2170 ++ */ ++#if defined(SYNO_VIDEOSTATION) ++#define SYNO_VIDEOSTATION_WEBM_SEEK_TIME ++#endif diff --git a/cross/ffmpeg7/patches/0110-SYNO-smooth-streaming.patch b/cross/ffmpeg7/patches/0110-SYNO-smooth-streaming.patch new file mode 100644 index 00000000000..85fc5f7ec9b --- /dev/null +++ b/cross/ffmpeg7/patches/0110-SYNO-smooth-streaming.patch @@ -0,0 +1,106 @@ +diff -uprN ../ffmpeg-4.3-023/libavformat/smoothstreamingenc.c ./libavformat/smoothstreamingenc.c +--- ../ffmpeg-4.3-023/libavformat/smoothstreamingenc.c 2020-06-15 14:54:24.000000000 -0400 ++++ ./libavformat/smoothstreamingenc.c 2020-06-16 20:08:21.691804931 -0400 +@@ -39,6 +39,8 @@ + #include "libavutil/mathematics.h" + #include "libavutil/intreadwrite.h" + ++#include "synoconfig.h" ++ + typedef struct Fragment { + char file[1024]; + char infofile[1024]; +@@ -77,6 +79,11 @@ typedef struct SmoothStreamingContext { + OutputStream *streams; + int has_video, has_audio; + int nb_fragments; ++#ifdef SYNO_SMOOTH_STREAMING ++ char *fragment_url; ++ int fragment_length; ++ int seek_time; ++#endif + } SmoothStreamingContext; + + static int ism_write(void *opaque, uint8_t *buf, int buf_size) +@@ -250,8 +257,13 @@ static int write_manifest(AVFormatContex + avio_printf(out, ">\n"); + if (c->has_video) { + int last = -1, index = 0; +- avio_printf(out, "\n", video_streams, video_chunks); +- for (i = 0; i < s->nb_streams; i++) { ++#ifdef SYNO_SMOOTH_STREAMING ++ avio_printf(out, "\n", video_streams, video_chunks, ++ (NULL == c->fragment_url) ? "" : c->fragment_url); ++#else ++ avio_printf(out, "\n", video_streams, video_chunks); ++#endif ++ for (i = 0; i < s->nb_streams; i++) { + OutputStream *os = &c->streams[i]; + if (s->streams[i]->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) + continue; +@@ -264,8 +276,13 @@ static int write_manifest(AVFormatContex + } + if (c->has_audio) { + int last = -1, index = 0; +- avio_printf(out, "\n", audio_streams, audio_chunks); +- for (i = 0; i < s->nb_streams; i++) { ++#ifdef SYNO_SMOOTH_STREAMING ++avio_printf(out, "\n", audio_streams, audio_chunks, ++ (NULL == c->fragment_url) ? "" : c->fragment_url); ++#else ++ avio_printf(out, "\n", audio_streams, audio_chunks); ++#endif ++ for (i = 0; i < s->nb_streams; i++) { + OutputStream *os = &c->streams[i]; + if (s->streams[i]->codecpar->codec_type != AVMEDIA_TYPE_AUDIO) + continue; +@@ -555,6 +572,14 @@ static int ism_flush(AVFormatContext *s, + return ret; + } + ++#ifdef SYNO_SMOOTH_STREAMING ++ if (0 != c->fragment_length) { ++ start_ts = (int64_t)((int64_t)os->nb_fragments * (int64_t)c->fragment_length * (int64_t)10000000); ++ } ++ if (0 != c->seek_time) { ++ start_ts += (int64_t)((int64_t)c->seek_time * (int64_t)10000000); ++ } ++#endif + snprintf(header_filename, sizeof(header_filename), "%s/FragmentInfo(%s=%"PRIu64")", os->dirname, os->stream_type_tag, start_ts); + snprintf(target_filename, sizeof(target_filename), "%s/Fragments(%s=%"PRIu64")", os->dirname, os->stream_type_tag, start_ts); + copy_moof(s, filename, header_filename, moof_size); +@@ -640,7 +665,12 @@ static const AVOption options[] = { + { "lookahead_count", "number of lookahead fragments", OFFSET(lookahead_count), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, INT_MAX, E }, + { "min_frag_duration", "minimum fragment duration (in microseconds)", OFFSET(min_frag_duration), AV_OPT_TYPE_INT64, { .i64 = 5000000 }, 0, INT_MAX, E }, + { "remove_at_exit", "remove all fragments when finished", OFFSET(remove_at_exit), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, E }, +- { NULL }, ++#ifdef SYNO_SMOOTH_STREAMING ++ { "fragment_url", "set fragment url in manifest", OFFSET(fragment_url), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, E }, ++ { "fragment_length", "let file name of output fragment mp4 with the duration (in seconds)", OFFSET(fragment_length), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, E }, ++ { "seek_time", "add seek time to fragment file (in seconds)", OFFSET(seek_time), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, E }, ++#endif ++ { NULL }, + }; + + static const AVClass ism_class = { +diff -uprN ../ffmpeg-4.3-023/synoconfig.h ./synoconfig.h +--- ../ffmpeg-4.3-023/synoconfig.h 2020-06-16 20:01:36.286633640 -0400 ++++ ./synoconfig.h 2020-06-16 20:08:21.691804931 -0400 +@@ -31,3 +31,17 @@ + #if defined(SYNO_VIDEOSTATION) + #define SYNO_VIDEOSTATION_WEBM_SEEK_TIME + #endif ++ ++/* Add ffmpeg option for smooth streaming. ++ * -fragment_url: ++ * This option can overwrite fragment url in Manifest for smooth streaming. ++ * -fragment_length: ++ * Let file name of output fragment mp4 with this duration ++ * This setting dependes on x264 option to make it work ++ * -seek_time: ++ * Let file name fo output fragment mp4 start with this seek time ++ * See Video Station #659 ++ */ ++#if defined(SYNO_VIDEOSTATION) ++#define SYNO_SMOOTH_STREAMING ++#endif diff --git a/cross/ffmpeg7/patches/0111-SYNO-filter-srt-tags.patch b/cross/ffmpeg7/patches/0111-SYNO-filter-srt-tags.patch new file mode 100644 index 00000000000..09544870f7f --- /dev/null +++ b/cross/ffmpeg7/patches/0111-SYNO-filter-srt-tags.patch @@ -0,0 +1,58 @@ +diff -uprN ../ffmpeg-5.1.3-0110/libavcodec/srtenc.c ./libavcodec/srtenc.c +--- ../ffmpeg-5.1.3-0110/libavcodec/srtenc.c 2023-03-26 14:21:00.000000000 +0000 ++++ ./libavcodec/srtenc.c 2023-04-24 23:43:36.351930175 +0000 +@@ -29,6 +29,7 @@ + #include "ass.h" + #include "codec_internal.h" + ++#include "synoconfig.h" + + #define SRT_STACK_SIZE 64 + +@@ -196,7 +197,9 @@ static void srt_alignment_cb(void *priv, + static void srt_cancel_overrides_cb(void *priv, const char *style) + { + srt_stack_push_pop(priv, 0, 1); ++#ifndef SYNO_FILTER_SRT_TAGS + srt_style_apply(priv, style); ++#endif + } + + static void srt_move_cb(void *priv, int x1, int y1, int x2, int y2, +@@ -224,6 +227,21 @@ static const ASSCodesCallbacks srt_callb + .end = srt_end_cb, + }; + ++#ifdef SYNO_FILTER_SRT_TAGS ++static const ASSCodesCallbacks srt_callbacks_syno_notag = { ++ .text = srt_text_cb, ++ .new_line = srt_new_line_cb, ++ .style = NULL, ++ .color = NULL, ++ .font_name = NULL, ++ .font_size = NULL, ++ .alignment = NULL, ++ .cancel_overrides = srt_cancel_overrides_cb, ++ .move = NULL, ++ .end = srt_end_cb, ++}; ++#endif ++ + static const ASSCodesCallbacks text_callbacks = { + .text = srt_text_cb, + .new_line = srt_new_line_cb, +diff -uprN ../ffmpeg-5.1.3-0110/synoconfig.h ./synoconfig.h +--- ../ffmpeg-5.1.3-0110/synoconfig.h 2023-04-24 23:41:03.000000000 +0000 ++++ ./synoconfig.h 2023-04-24 23:43:36.351930175 +0000 +@@ -45,3 +45,11 @@ + #if defined(SYNO_VIDEOSTATION) + #define SYNO_SMOOTH_STREAMING + #endif ++ ++/* filter out some tag that not needed ++ * those tags producted at *.ass convert *.srt ++ * See Video Station #1031 ++ */ ++#if defined(SYNO_VIDEOSTATION) ++#define SYNO_FILTER_SRT_TAGS ++#endif diff --git a/cross/ffmpeg7/patches/0112-SYNO-fix-mp4-remux-h264parser.patch b/cross/ffmpeg7/patches/0112-SYNO-fix-mp4-remux-h264parser.patch new file mode 100644 index 00000000000..35a43e5cd1f --- /dev/null +++ b/cross/ffmpeg7/patches/0112-SYNO-fix-mp4-remux-h264parser.patch @@ -0,0 +1,41 @@ +diff -uprN ../ffmpeg-4.2.1-patch1-5-Update3/libavcodec/h264_parser.c ./libavcodec/h264_parser.c +--- ../ffmpeg-4.2.1-patch1-5-Update3/libavcodec/h264_parser.c 2019-07-08 13:45:25.000000000 -0400 ++++ ./libavcodec/h264_parser.c 2020-01-17 07:33:31.000000000 -0500 +@@ -47,6 +47,8 @@ + #include "mpegutils.h" + #include "parser.h" + ++#include "synoconfig.h" ++ + typedef struct H264ParseContext { + ParseContext pc; + H264ParamSets ps; +@@ -108,8 +110,13 @@ static int h264_find_frame_end(H264Parse + state >>= 1; // 2->1, 1->0, 0->0 + } else if (state <= 5) { + int nalu_type = buf[i] & 0x1F; ++#ifdef SYNO_FIX_MP4_REMUX_H264PARSER ++ if (nalu_type == H264_NAL_SEI || nalu_type == H264_NAL_SPS || ++ nalu_type == H264_NAL_AUD) { ++#else + if (nalu_type == H264_NAL_SEI || nalu_type == H264_NAL_SPS || + nalu_type == H264_NAL_PPS || nalu_type == H264_NAL_AUD) { ++#endif + if (pc->frame_start_found) { + i++; + goto found; +diff -uprN ../ffmpeg-4.2.1-patch1-5-Update3/synoconfig.h ./synoconfig.h +--- ../ffmpeg-4.2.1-patch1-5-Update3/synoconfig.h 2020-01-17 09:15:23.724603559 -0500 ++++ ./synoconfig.h 2020-01-17 09:20:03.490853172 -0500 +@@ -53,3 +53,11 @@ + #if defined(SYNO_VIDEOSTATION) + #define SYNO_FILTER_SRT_TAGS + #endif ++ ++/* Fixed h264_parser error in special MP4 remux ++ * some MP4 file could parser error with remain PPS information ++ * See Video Station #944 ++ */ ++#if defined(SYNO_VIDEOSTATION) ++#define SYNO_FIX_MP4_REMUX_H264PARSER ++#endif diff --git a/cross/ffmpeg7/patches/0113-SYNO-write-riff-info-tag-to-wav.patch b/cross/ffmpeg7/patches/0113-SYNO-write-riff-info-tag-to-wav.patch new file mode 100644 index 00000000000..9dd6b413a7d --- /dev/null +++ b/cross/ffmpeg7/patches/0113-SYNO-write-riff-info-tag-to-wav.patch @@ -0,0 +1,58 @@ +diff -uprN ../ffmpeg-4.3-032/libavformat/wavenc.c ./libavformat/wavenc.c +--- ../ffmpeg-4.3-032/libavformat/wavenc.c 2020-06-15 14:54:24.000000000 -0400 ++++ ./libavformat/wavenc.c 2020-06-16 20:10:39.073227980 -0400 +@@ -46,6 +46,8 @@ + #include "internal.h" + #include "riff.h" + ++#include "synoconfig.h" ++ + #define RF64_AUTO (-1) + #define RF64_NEVER 0 + #define RF64_ALWAYS 1 +@@ -84,6 +86,9 @@ typedef struct WAVMuxContext { + int peak_block_pos; + int peak_ppv; + int peak_bps; ++#ifdef SYNO_WRITE_RIFF_INFO_TAG_TO_WAV ++ int write_info_tag; ++#endif + } WAVMuxContext; + + #if CONFIG_WAV_MUXER +@@ -355,7 +360,13 @@ static int wav_write_header(AVFormatCont + + if (wav->write_peak != PEAK_ONLY) { + /* info header */ ++#ifdef SYNO_WRITE_RIFF_INFO_TAG_TO_WAV ++ if (wav->write_info_tag) { ++ ff_riff_write_info(s); ++ } ++#else + ff_riff_write_info(s); ++#endif + + /* data header */ + wav->data = ff_start_tag(pb, "data"); +@@ -491,6 +502,9 @@ static const AVOption options[] = { + { "peak_block_size", "Number of audio samples used to generate each peak frame.", OFFSET(peak_block_size), AV_OPT_TYPE_INT, { .i64 = 256 }, 0, 65536, ENC }, + { "peak_format", "The format of the peak envelope data (1: uint8, 2: uint16).", OFFSET(peak_format), AV_OPT_TYPE_INT, { .i64 = PEAK_FORMAT_UINT16 }, PEAK_FORMAT_UINT8, PEAK_FORMAT_UINT16, ENC }, + { "peak_ppv", "Number of peak points per peak value (1 or 2).", OFFSET(peak_ppv), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, 2, ENC }, ++#ifdef SYNO_WRITE_RIFF_INFO_TAG_TO_WAV ++ { "write_info_tag", "Write RIFF info tag.", OFFSET(write_info_tag), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, ENC }, ++#endif + { NULL }, + }; + +diff -uprN ../ffmpeg-4.3-032/synoconfig.h ./synoconfig.h +--- ../ffmpeg-4.3-032/synoconfig.h 2020-06-16 20:10:09.000000000 -0400 ++++ ./synoconfig.h 2020-06-16 20:10:39.074228048 -0400 +@@ -61,3 +61,8 @@ + #if defined(SYNO_VIDEOSTATION) + #define SYNO_FIX_MP4_REMUX_H264PARSER + #endif ++ ++/* Optional for writing RIFF info tag to wav ++ * See Media Server #386 ++ */ ++#define SYNO_WRITE_RIFF_INFO_TAG_TO_WAV diff --git a/cross/ffmpeg7/patches/1000-jellyfin-0000-HOWTO.txt b/cross/ffmpeg7/patches/1000-jellyfin-0000-HOWTO.txt new file mode 100644 index 00000000000..7dd217d18f5 --- /dev/null +++ b/cross/ffmpeg7/patches/1000-jellyfin-0000-HOWTO.txt @@ -0,0 +1,27 @@ +Project page: https://github.com/jellyfin/jellyfin-ffmpeg +Patches: https://github.com/jellyfin/jellyfin-ffmpeg/tree/jellyfin/debian/patches + +Extract jellyfin-ffmpeg git repository +$ git clone https://github.com/jellyfin/jellyfin-ffmpeg.git + +Check that current ffmpeg matches jellyfin version +$ cat jellyfin-ffmpeg/VERSION +7.0.2 + +Modify patches to be in -p0 mode: +$ cd jellyfin-ffmpeg/debian/patches +$ sed -i -e 's?--- jellyfin-ffmpeg.orig/?--- ?1' \ + -e 's?+++ jellyfin-ffmpeg/?+++ ?1' \ + -e 's?--- a/?--- ?1' \ + -e 's?+++ b/?+++ ?1' \ + -e 's?--- FFmpeg.orig/?--- ?1' \ + -e 's?+++ FFmpeg/?+++ ?1' \ + *.patch + +Rename the patches: +$ i=1001 && ls -1 *.patch | while read LINE; do echo $(printf "%04d" $i)-jellyfin-$LINE; mv $LINE $(printf "%04d" $i)-jellyfin-$LINE; let i++; done + +Test the patches on a clean ffmpeg tree: +$ tar -jxvf ffmpeg-7.0.2.tar.bz2 +$ cd ffmpeg-7.0.2/ +$ ls -1d ~/jellyfin-ffmpeg/debian/patches/*.patch | while read LINE; do patch -p0 < $LINE; done diff --git a/cross/ffmpeg7/patches/1001-jellyfin-0001-add-fixes-for-segment-muxer.patch b/cross/ffmpeg7/patches/1001-jellyfin-0001-add-fixes-for-segment-muxer.patch new file mode 100644 index 00000000000..1879e6807e9 --- /dev/null +++ b/cross/ffmpeg7/patches/1001-jellyfin-0001-add-fixes-for-segment-muxer.patch @@ -0,0 +1,37 @@ +Index: FFmpeg/libavformat/segment.c +=================================================================== +--- libavformat/segment.c ++++ libavformat/segment.c +@@ -88,6 +88,7 @@ typedef struct SegmentContext { + int64_t last_val; ///< remember last time for wrap around detection + int cut_pending; + int header_written; ///< whether we've already called avformat_write_header ++ int64_t start_pts; ///< pts of the very first packet processed, used to compute correct segment length + + char *entry_prefix; ///< prefix to add to list entry filenames + int list_type; ///< set the list type +@@ -707,6 +708,7 @@ static int seg_init(AVFormatContext *s) + if ((ret = parse_frames(s, &seg->frames, &seg->nb_frames, seg->frames_str)) < 0) + return ret; + } else { ++ seg->start_pts = -1; + if (seg->use_clocktime) { + if (seg->time <= 0) { + av_log(s, AV_LOG_ERROR, "Invalid negative segment_time with segment_atclocktime option set\n"); +@@ -890,7 +892,15 @@ calc_times: + seg->cut_pending = 1; + seg->last_val = wrapped_val; + } else { +- end_pts = seg->time * (seg->segment_count + 1); ++ if (seg->start_pts != -1) { ++ end_pts = seg->start_pts + seg->time * (seg->segment_count + 1); ++ } else if (pkt->stream_index == seg->reference_stream_index && pkt->pts != AV_NOPTS_VALUE) { ++ // this is the first packet of the reference stream we see, initialize start point ++ seg->start_pts = av_rescale_q(pkt->pts, st->time_base, AV_TIME_BASE_Q); ++ seg->cur_entry.start_time = (double)pkt->pts * av_q2d(st->time_base); ++ seg->cur_entry.start_pts = seg->start_pts; ++ end_pts = seg->start_pts + seg->time * (seg->segment_count + 1); ++ } + } + } + diff --git a/cross/ffmpeg7/patches/1002-jellyfin-0002-update-cuda-func-header.patch b/cross/ffmpeg7/patches/1002-jellyfin-0002-update-cuda-func-header.patch new file mode 100644 index 00000000000..9db65346bec --- /dev/null +++ b/cross/ffmpeg7/patches/1002-jellyfin-0002-update-cuda-func-header.patch @@ -0,0 +1,230 @@ +Index: FFmpeg/compat/cuda/cuda_runtime.h +=================================================================== +--- compat/cuda/cuda_runtime.h ++++ compat/cuda/cuda_runtime.h +@@ -24,6 +24,7 @@ + #define COMPAT_CUDA_CUDA_RUNTIME_H + + // Common macros ++#define __constant__ __attribute__((constant)) + #define __global__ __attribute__((global)) + #define __device__ __attribute__((device)) + #define __device_builtin__ __attribute__((device_builtin)) +@@ -39,59 +40,59 @@ + // Basic typedefs + typedef __device_builtin__ unsigned long long cudaTextureObject_t; + +-typedef struct __device_builtin__ __align__(2) uchar2 +-{ +- unsigned char x, y; +-} uchar2; +- +-typedef struct __device_builtin__ __align__(4) ushort2 +-{ +- unsigned short x, y; +-} ushort2; +- +-typedef struct __device_builtin__ __align__(8) float2 +-{ +- float x, y; +-} float2; +- +-typedef struct __device_builtin__ __align__(8) int2 +-{ +- int x, y; +-} int2; +- +-typedef struct __device_builtin__ uint3 +-{ +- unsigned int x, y, z; +-} uint3; +- +-typedef struct uint3 dim3; +- +-typedef struct __device_builtin__ __align__(4) uchar4 +-{ +- unsigned char x, y, z, w; +-} uchar4; +- +-typedef struct __device_builtin__ __align__(8) ushort4 +-{ +- unsigned short x, y, z, w; +-} ushort4; +- +-typedef struct __device_builtin__ __align__(16) int4 +-{ +- int x, y, z, w; +-} int4; ++#define MAKE_VECTORS(type, base) \ ++typedef struct __device_builtin__ type##1 { \ ++ base x; \ ++} type##1; \ ++static __inline__ __device__ type##1 make_##type##1(base x) { \ ++ type##1 ret; \ ++ ret.x = x; \ ++ return ret; \ ++} \ ++typedef struct __device_builtin__ __align__(sizeof(base) * 2) type##2 { \ ++ base x, y; \ ++} type##2; \ ++static __inline__ __device__ type##2 make_##type##2(base x, base y) { \ ++ type##2 ret; \ ++ ret.x = x; \ ++ ret.y = y; \ ++ return ret; \ ++} \ ++typedef struct __device_builtin__ type##3 { \ ++ base x, y, z; \ ++} type##3; \ ++static __inline__ __device__ type##3 make_##type##3(base x, base y, base z) { \ ++ type##3 ret; \ ++ ret.x = x; \ ++ ret.y = y; \ ++ ret.z = z; \ ++ return ret; \ ++} \ ++typedef struct __device_builtin__ __align__(sizeof(base) * 4) type##4 { \ ++ base x, y, z, w; \ ++} type##4; \ ++static __inline__ __device__ type##4 make_##type##4(base x, base y, base z, base w) { \ ++ type##4 ret; \ ++ ret.x = x; \ ++ ret.y = y; \ ++ ret.z = z; \ ++ ret.w = w; \ ++ return ret; \ ++} + +-typedef struct __device_builtin__ __align__(16) float4 +-{ +- float x, y, z, w; +-} float4; ++// Basic initializers ++MAKE_VECTORS(uchar, unsigned char) ++MAKE_VECTORS(ushort, unsigned short) ++MAKE_VECTORS(int, int) ++MAKE_VECTORS(uint, unsigned int) ++MAKE_VECTORS(float, float) + + // Accessors for special registers + #define GETCOMP(reg, comp) \ + asm("mov.u32 %0, %%" #reg "." #comp ";" : "=r"(tmp)); \ + ret.comp = tmp; + +-#define GET(name, reg) static inline __device__ uint3 name() {\ ++#define GET(name, reg) static __inline__ __device__ uint3 name() {\ + uint3 ret; \ + unsigned tmp; \ + GETCOMP(reg, x) \ +@@ -109,18 +110,8 @@ GET(getThreadIdx, tid) + #define blockDim (getBlockDim()) + #define threadIdx (getThreadIdx()) + +-// Basic initializers (simple macros rather than inline functions) +-#define make_int2(a, b) ((int2){.x = a, .y = b}) +-#define make_uchar2(a, b) ((uchar2){.x = a, .y = b}) +-#define make_ushort2(a, b) ((ushort2){.x = a, .y = b}) +-#define make_float2(a, b) ((float2){.x = a, .y = b}) +-#define make_int4(a, b, c, d) ((int4){.x = a, .y = b, .z = c, .w = d}) +-#define make_uchar4(a, b, c, d) ((uchar4){.x = a, .y = b, .z = c, .w = d}) +-#define make_ushort4(a, b, c, d) ((ushort4){.x = a, .y = b, .z = c, .w = d}) +-#define make_float4(a, b, c, d) ((float4){.x = a, .y = b, .z = c, .w = d}) +- + // Conversions from the tex instruction's 4-register output to various types +-#define TEX2D(type, ret) static inline __device__ void conv(type* out, unsigned a, unsigned b, unsigned c, unsigned d) {*out = (ret);} ++#define TEX2D(type, ret) static __inline__ __device__ void conv(type* out, unsigned a, unsigned b, unsigned c, unsigned d) {*out = (ret);} + + TEX2D(unsigned char, a & 0xFF) + TEX2D(unsigned short, a & 0xFFFF) +@@ -134,19 +125,19 @@ TEX2D(float4, make_float4(a, b, c, d)) + + // Template calling tex instruction and converting the output to the selected type + template +-inline __device__ T tex2D(cudaTextureObject_t texObject, float x, float y) ++__inline__ __device__ T tex2D(cudaTextureObject_t texObject, float x, float y) + { +- T ret; +- unsigned ret1, ret2, ret3, ret4; +- asm("tex.2d.v4.u32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" : +- "=r"(ret1), "=r"(ret2), "=r"(ret3), "=r"(ret4) : +- "l"(texObject), "f"(x), "f"(y)); +- conv(&ret, ret1, ret2, ret3, ret4); +- return ret; ++ T ret; ++ unsigned ret1, ret2, ret3, ret4; ++ asm("tex.2d.v4.u32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" : ++ "=r"(ret1), "=r"(ret2), "=r"(ret3), "=r"(ret4) : ++ "l"(texObject), "f"(x), "f"(y)); ++ conv(&ret, ret1, ret2, ret3, ret4); ++ return ret; + } + + template<> +-inline __device__ float4 tex2D(cudaTextureObject_t texObject, float x, float y) ++__inline__ __device__ float4 tex2D(cudaTextureObject_t texObject, float x, float y) + { + float4 ret; + asm("tex.2d.v4.f32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" : +@@ -156,37 +147,41 @@ inline __device__ float4 tex2D(c + } + + template<> +-inline __device__ float tex2D(cudaTextureObject_t texObject, float x, float y) ++__inline__ __device__ float tex2D(cudaTextureObject_t texObject, float x, float y) + { + return tex2D(texObject, x, y).x; + } + + template<> +-inline __device__ float2 tex2D(cudaTextureObject_t texObject, float x, float y) ++__inline__ __device__ float2 tex2D(cudaTextureObject_t texObject, float x, float y) + { + float4 ret = tex2D(texObject, x, y); + return make_float2(ret.x, ret.y); + } + + // Math helper functions +-static inline __device__ float floorf(float a) { return __builtin_floorf(a); } +-static inline __device__ float floor(float a) { return __builtin_floorf(a); } +-static inline __device__ double floor(double a) { return __builtin_floor(a); } +-static inline __device__ float ceilf(float a) { return __builtin_ceilf(a); } +-static inline __device__ float ceil(float a) { return __builtin_ceilf(a); } +-static inline __device__ double ceil(double a) { return __builtin_ceil(a); } +-static inline __device__ float truncf(float a) { return __builtin_truncf(a); } +-static inline __device__ float trunc(float a) { return __builtin_truncf(a); } +-static inline __device__ double trunc(double a) { return __builtin_trunc(a); } +-static inline __device__ float fabsf(float a) { return __builtin_fabsf(a); } +-static inline __device__ float fabs(float a) { return __builtin_fabsf(a); } +-static inline __device__ double fabs(double a) { return __builtin_fabs(a); } +-static inline __device__ float sqrtf(float a) { return __builtin_sqrtf(a); } +- +-static inline __device__ float __saturatef(float a) { return __nvvm_saturate_f(a); } +-static inline __device__ float __sinf(float a) { return __nvvm_sin_approx_f(a); } +-static inline __device__ float __cosf(float a) { return __nvvm_cos_approx_f(a); } +-static inline __device__ float __expf(float a) { return __nvvm_ex2_approx_f(a * (float)__builtin_log2(__builtin_exp(1))); } +-static inline __device__ float __powf(float a, float b) { return __nvvm_ex2_approx_f(__nvvm_lg2_approx_f(a) * b); } ++static __inline__ __device__ float floorf(float a) { return __builtin_floorf(a); } ++static __inline__ __device__ float floor(float a) { return __builtin_floorf(a); } ++static __inline__ __device__ double floor(double a) { return __builtin_floor(a); } ++static __inline__ __device__ float ceilf(float a) { return __builtin_ceilf(a); } ++static __inline__ __device__ float ceil(float a) { return __builtin_ceilf(a); } ++static __inline__ __device__ double ceil(double a) { return __builtin_ceil(a); } ++static __inline__ __device__ float truncf(float a) { return __builtin_truncf(a); } ++static __inline__ __device__ float trunc(float a) { return __builtin_truncf(a); } ++static __inline__ __device__ double trunc(double a) { return __builtin_trunc(a); } ++static __inline__ __device__ float fabsf(float a) { return __builtin_fabsf(a); } ++static __inline__ __device__ float fabs(float a) { return __builtin_fabsf(a); } ++static __inline__ __device__ double fabs(double a) { return __builtin_fabs(a); } ++static __inline__ __device__ float sqrtf(float a) { return __builtin_sqrtf(a); } ++ ++static __inline__ __device__ float __saturatef(float a) { return __nvvm_saturate_f(a); } ++static __inline__ __device__ float __sinf(float a) { return __nvvm_sin_approx_f(a); } ++static __inline__ __device__ float __cosf(float a) { return __nvvm_cos_approx_f(a); } ++static __inline__ __device__ float __exp2f(float a) { return __nvvm_ex2_approx_f(a); } ++static __inline__ __device__ float __expf(float a) { return __nvvm_ex2_approx_f(a * 1.4427f); } ++static __inline__ __device__ float __log2f(float a) { return __nvvm_lg2_approx_f(a); } ++static __inline__ __device__ float __logf(float a) { return __nvvm_lg2_approx_f(a) * 0.693147f; } ++static __inline__ __device__ float __log10f(float a) { return __nvvm_lg2_approx_f(a) * 0.30103f; } ++static __inline__ __device__ float __powf(float a, float b) { return __nvvm_ex2_approx_f(__nvvm_lg2_approx_f(a) * b); } + + #endif /* COMPAT_CUDA_CUDA_RUNTIME_H */ diff --git a/cross/ffmpeg7/patches/1003-jellyfin-0003-add-enhanced-cuda-pixfmt-converter-impl.patch b/cross/ffmpeg7/patches/1003-jellyfin-0003-add-enhanced-cuda-pixfmt-converter-impl.patch new file mode 100644 index 00000000000..f2fc7294a0d --- /dev/null +++ b/cross/ffmpeg7/patches/1003-jellyfin-0003-add-enhanced-cuda-pixfmt-converter-impl.patch @@ -0,0 +1,2437 @@ +Index: FFmpeg/libavfilter/dither_matrix.h +=================================================================== +--- /dev/null ++++ libavfilter/dither_matrix.h +@@ -0,0 +1,272 @@ ++/* ++ * Dither matrix data ++ * ++ * This file is placed in the public domain. ++ */ ++ ++#ifndef AVFILTER_DITHER_MATRIX_H ++#define AVFILTER_DITHER_MATRIX_H ++ ++#include ++static const int ff_fruit_dither_size = 256; ++static const int ff_fruit_dither_size2 = 65536; ++static const uint16_t ff_fruit_dither_matrix[] = { ++ 19888, 29707, 49084, 13068, 41264, 712, 45413, 19654, 2867, 61750, 16847, 41475, 2, 45810, 9851, 55310, 1841, 61502, 27701, 8126, 59314, 35775, 26459, 33765, 60407, 38044, 2183, 34698, 12774, 41879, 32782, 17837, 51634, 28892, 56742, 19473, 54052, 34435, 1110, 16332, 36717, 20449, 51394, 13119, 37924, 54423, 7501, 40896, 63615, 19863, 36954, 49848, 4203, 18609, 56951, 53, 31804, 65267, 17623, 40824, 7923, 44889, 21013, 32279, 18337, 52211, 24005, 14658, 59148, 17859, 51003, 57176, 2252, 48317, 20349, 32034, 53814, 27988, 50223, 21462, 64891, 9999, 42192, 29127, 52236, 45014, 8083, 21852, 49456, 12918, 43557, 14440, 58920, 8561, 27234, 63813, 9837, 46705, 3909, 22107, 38548, 31189, 19615, 36443, 13337, 50960, 1716, 41784, 53366, 8398, 63512, 14469, 42551, 21674, 53593, 18113, 62267, 9633, 36575, 13138, 30569, 33116, 8993, 49572, 29688, 43993, 4455, 39647, 6085, 58701, 2846, 64510, 16487, 60132, 31783, 5400, 25303, 40301, 51678, 15972, 23822, 60387, 27547, 12696, 63301, 6050, 56442, 23223, 34129, 59372, 42005, 53895, 6412, 61764, 42882, 57844, 33753, 27, 45370, 8517, 35942, 21182, 46773, 18008, 50673, 16004, 33519, 48604, 64860, 286, 21738, 57904, 3972, 17300, 52817, 40655, 14160, 18754, 38672, 24633, 33839, 62366, 30028, 56171, 17602, 25357, 9014, 21000, 51750, 12422, 34378, 46937, 63719, 16598, 57991, 9635, 24723, 64758, 782, 55850, 46333, 3811, 23742, 57543, 11116, 55475, 5230, 13870, 45682, 9418, 37145, 19198, 62935, 42020, 23214, 56460, 16715, 60508, 30632, 54271, 8307, 50222, 41262, 13267, 45038, 23335, 33901, 1657, 50490, 5873, 63619, 12368, 52878, 5001, 41423, 64738, 18728, 34557, 51546, 7078, 42758, 58613, 18308, 28789, 65047, 30950, 59621, 35664, 8041, 56673, 31895, 2202, 49512, 12047, 26904, 8969, ++ 39839, 64439, 2395, 38407, 26507, 56841, 14875, 59370, 42901, 24805, 53374, 31769, 64659, 23462, 37246, 19079, 43577, 14362, 38580, 51157, 21430, 1295, 54528, 11237, 18145, 29538, 47852, 15690, 57246, 5586, 62640, 23508, 36190, 14290, 39749, 5961, 43679, 12355, 64230, 41697, 54930, 5411, 43069, 65009, 2790, 26776, 45084, 22290, 958, 56596, 43760, 15945, 30898, 39939, 12202, 43225, 52459, 10415, 29935, 57940, 24302, 64055, 1381, 56727, 42664, 5582, 39183, 49649, 3460, 32638, 25228, 16358, 38846, 14222, 64150, 42941, 663, 61917, 9063, 31464, 3308, 48719, 19515, 61329, 14037, 25187, 65490, 36808, 3569, 62608, 25601, 53787, 19884, 33028, 44561, 13860, 54576, 20415, 52946, 28779, 57449, 2280, 44851, 61984, 23035, 29940, 61021, 16435, 25024, 45348, 29225, 579, 49381, 7106, 41158, 33716, 1542, 50166, 26487, 64250, 4728, 46090, 61528, 2029, 56145, 18498, 59711, 23527, 47097, 27303, 41001, 20424, 36163, 1313, 41868, 53357, 59104, 21041, 1981, 43849, 33830, 3755, 50367, 39929, 21844, 48599, 28500, 11972, 50179, 20672, 784, 29846, 38149, 22308, 4673, 26234, 19427, 59084, 16892, 27828, 61292, 2402, 32092, 53458, 4339, 61977, 41259, 9193, 30466, 35630, 54266, 13885, 32726, 61694, 9375, 27764, 56918, 32128, 54048, 11660, 50209, 425, 46566, 6990, 65129, 34755, 55080, 44884, 5677, 62983, 26806, 7608, 30349, 48002, 20191, 36576, 50556, 12169, 42073, 22605, 28238, 39471, 52365, 31864, 44423, 25719, 35888, 52927, 29098, 42862, 24272, 55899, 15383, 5593, 35729, 20542, 34285, 4432, 44345, 17792, 37338, 27725, 904, 31371, 61058, 9694, 49194, 36623, 58312, 26213, 19253, 44872, 33341, 16237, 37493, 10089, 54165, 4404, 24545, 63780, 12515, 33064, 53490, 11057, 50829, 3287, 44582, 18041, 25902, 42249, 14760, 52388, 23636, 36654, 44222, 54520, ++ 15465, 22887, 51992, 17068, 62923, 9597, 36773, 29398, 10855, 48244, 4782, 38890, 13236, 51700, 6660, 63210, 32563, 57614, 4995, 17539, 39639, 64925, 23284, 43255, 51980, 4316, 55750, 39386, 27343, 44990, 11712, 48227, 4505, 63499, 26927, 58785, 21237, 52120, 29217, 22727, 11278, 60133, 28304, 18165, 33264, 52957, 16629, 48257, 34766, 25959, 9177, 64594, 47161, 24445, 60558, 27670, 20397, 37296, 47756, 2520, 50030, 28640, 38437, 9515, 26024, 62836, 12324, 27832, 43274, 65408, 10491, 44402, 60698, 34813, 8230, 23641, 37584, 17356, 45295, 40622, 57060, 15980, 38115, 26987, 7, 50732, 34041, 15658, 47229, 10580, 34867, 1328, 48595, 56720, 4413, 40469, 30146, 37711, 15544, 41452, 10520, 50398, 27118, 4748, 47121, 6683, 33906, 48636, 4208, 38386, 58107, 18937, 36021, 58768, 25921, 13719, 56368, 44131, 16752, 38963, 52832, 15736, 27586, 40736, 12298, 36926, 9485, 51444, 15368, 55495, 10926, 46520, 29229, 51046, 23344, 17647, 8783, 34934, 61189, 11599, 54886, 20030, 57692, 8577, 35288, 17408, 62215, 45461, 7322, 63919, 15673, 51945, 10355, 45094, 55804, 36515, 49995, 12893, 39892, 54073, 14022, 41890, 58294, 23756, 36790, 11930, 22376, 57045, 17652, 47001, 7159, 43328, 24185, 37134, 48859, 42615, 3359, 46112, 5491, 59312, 16849, 43918, 28469, 23014, 39810, 2929, 13847, 31844, 19120, 38574, 50277, 15099, 56641, 2249, 43003, 59805, 4854, 33719, 53339, 8834, 48905, 13307, 7250, 60684, 15633, 428, 59650, 18585, 6301, 58140, 1866, 39557, 47744, 28203, 64299, 46240, 9843, 51490, 26640, 65268, 11131, 47411, 58904, 38827, 20091, 53371, 28034, 16977, 7334, 42667, 29918, 3038, 57769, 49410, 26674, 48022, 22017, 40368, 31251, 45196, 16463, 48858, 5164, 41520, 24228, 38131, 9074, 48189, 62762, 4159, 58442, 38673, 7166, 60568, 212, 31028, ++ 41864, 59701, 7922, 32362, 43913, 21757, 55175, 6017, 64036, 35141, 20970, 57358, 15312, 28911, 47092, 25801, 10586, 22018, 41955, 62372, 32025, 14957, 46529, 7343, 40467, 31286, 24312, 9206, 52626, 20722, 60718, 30170, 41147, 15970, 49580, 10426, 31074, 37773, 3274, 47678, 32164, 39895, 14915, 57269, 36013, 10762, 30044, 61645, 13485, 54762, 32345, 20923, 2234, 53381, 7340, 48978, 4634, 62162, 13180, 34989, 19638, 11542, 54454, 33627, 15270, 47123, 34426, 55912, 8860, 22898, 54253, 30983, 6666, 26187, 46768, 55519, 11848, 58180, 24883, 5756, 30247, 54077, 7358, 59416, 44154, 20476, 6718, 58379, 27807, 42635, 60547, 28977, 16245, 37034, 23800, 57878, 6997, 60851, 846, 64983, 34586, 16851, 58843, 39573, 14667, 56192, 40263, 20547, 65448, 11380, 31513, 55224, 24259, 10350, 64721, 38140, 28571, 5982, 22603, 57214, 10652, 42301, 35222, 20053, 63379, 25199, 31664, 42588, 486, 32802, 53932, 7500, 63688, 13538, 57968, 38830, 48274, 30705, 14709, 46693, 26779, 37213, 15305, 30151, 53058, 41627, 1620, 32727, 25905, 40499, 36026, 24000, 60510, 31674, 14562, 2151, 29316, 65452, 24332, 5207, 48241, 29659, 7459, 14939, 45800, 54958, 28782, 3523, 51428, 27379, 62825, 19468, 58594, 6420, 16147, 22718, 63299, 20468, 36675, 26752, 41148, 9583, 57510, 51384, 15293, 49077, 59192, 42530, 60380, 9948, 24140, 53537, 22402, 37993, 27491, 13691, 45768, 17657, 29363, 58835, 19303, 63017, 36990, 21927, 42309, 30010, 50136, 33557, 48357, 17424, 61382, 31110, 11613, 53830, 7814, 25471, 57925, 14304, 40605, 3196, 55584, 23845, 6565, 15287, 43742, 3519, 64992, 40218, 56392, 11032, 61719, 35545, 24017, 8687, 60390, 1100, 62179, 13289, 57266, 2444, 38957, 23281, 56524, 20238, 61594, 15600, 55853, 28096, 12269, 34621, 19457, 28542, 47730, 21264, 14100, 57292, ++ 5526, 34811, 27620, 47929, 4099, 50049, 24440, 40726, 18381, 50910, 8735, 26745, 42468, 59776, 3939, 36249, 48929, 54100, 29211, 8988, 48446, 3481, 28142, 58707, 20160, 61840, 13968, 64587, 33443, 2703, 38353, 7895, 57907, 34928, 510, 44677, 61309, 17487, 55827, 8816, 62142, 24731, 1499, 48797, 6590, 42024, 58259, 3882, 38930, 6105, 42644, 59131, 34211, 14734, 38063, 16983, 55692, 33050, 22986, 59435, 41442, 51713, 3952, 43736, 58628, 21618, 1880, 19819, 52887, 37175, 231, 48818, 21102, 59787, 3763, 28839, 49793, 14916, 36109, 62972, 22240, 34296, 46364, 11597, 31918, 56383, 41091, 24107, 52708, 19076, 5580, 39832, 64663, 11230, 51402, 17901, 45532, 25824, 47964, 19215, 24314, 53992, 8209, 31817, 62790, 26113, 9743, 54614, 27613, 44273, 15646, 5326, 42964, 47490, 3100, 20750, 46480, 60220, 34700, 243, 25538, 47860, 6465, 53682, 3565, 48901, 57608, 21326, 61095, 37985, 19239, 24830, 34176, 43431, 6384, 27657, 4149, 55889, 42323, 6862, 52154, 1021, 64672, 44674, 5099, 24542, 58354, 18983, 54444, 13184, 57110, 2962, 39309, 18712, 63253, 47334, 43355, 9683, 37728, 55400, 20008, 34623, 63644, 49776, 25398, 1426, 40391, 64063, 34364, 12274, 38892, 2326, 45458, 29444, 51171, 34664, 10541, 30406, 55372, 14778, 61411, 19964, 31579, 4591, 37677, 27219, 22108, 1589, 29000, 35855, 46065, 3418, 32674, 65052, 8569, 54836, 25844, 62180, 6376, 34709, 43632, 2017, 30925, 47632, 9787, 63554, 21154, 12265, 41052, 26985, 8770, 51776, 21479, 33247, 43285, 1534, 39140, 19494, 53089, 32005, 20948, 35447, 63295, 25894, 52393, 30286, 12800, 22167, 32230, 46754, 20520, 51737, 13919, 43515, 17826, 36348, 29223, 46616, 19712, 35278, 60085, 7794, 34163, 47215, 1673, 32755, 40152, 22103, 51401, 43567, 53906, 10289, 65290, 33337, 45696, 24143, ++ 50433, 19223, 55755, 13617, 60356, 33529, 11871, 54006, 1174, 31179, 44696, 62249, 2161, 18073, 30653, 56378, 16450, 786, 35017, 52427, 25209, 56695, 37331, 16249, 49770, 240, 35524, 47137, 18544, 50552, 22418, 55341, 17038, 25121, 53415, 24020, 6861, 33856, 13771, 50620, 18676, 45527, 53551, 21802, 64020, 25271, 19344, 46697, 23966, 52259, 18322, 11205, 50348, 29631, 63269, 25050, 42065, 8778, 44571, 5876, 16468, 31223, 64826, 23702, 7146, 40097, 61709, 47846, 29722, 13851, 41789, 57515, 11179, 40322, 52521, 18292, 32834, 42452, 2486, 47424, 10678, 51207, 17966, 64290, 4026, 29961, 13366, 49878, 2564, 32513, 55558, 22299, 7623, 46880, 31683, 2841, 35822, 12796, 32672, 55725, 5714, 36180, 43966, 21445, 114, 52596, 17728, 36843, 2512, 51479, 34442, 61725, 17529, 52420, 32736, 54214, 11026, 19396, 51302, 31243, 62509, 17790, 58933, 32432, 44739, 16344, 8152, 30229, 14374, 5170, 45323, 56950, 3272, 21945, 52665, 33009, 65025, 18545, 24250, 62652, 21497, 40779, 28909, 10791, 59984, 13747, 38592, 49164, 4077, 44150, 21287, 47967, 11349, 52733, 6944, 25159, 17535, 56891, 32810, 3206, 44773, 11129, 39034, 6002, 33166, 60581, 20260, 10032, 44507, 23194, 53125, 31821, 55646, 13017, 64292, 1322, 59963, 39377, 7414, 52087, 2084, 48057, 35407, 63826, 18495, 53150, 11100, 47394, 55598, 6505, 61813, 18237, 40738, 11364, 51232, 31321, 1178, 48272, 38819, 15316, 51701, 24935, 56402, 16650, 53985, 4711, 38462, 57376, 2944, 65414, 35257, 44237, 4129, 55238, 15967, 59967, 30153, 48462, 5420, 61624, 42221, 12160, 46482, 33033, 9134, 37836, 47772, 54064, 595, 15742, 39255, 5377, 64372, 31835, 55792, 11378, 53113, 6201, 25693, 49679, 14433, 27880, 63092, 11834, 26826, 54316, 6693, 64344, 769, 24656, 5858, 41079, 16697, 3132, 62204, 37958, ++ 11572, 42747, 1524, 25531, 37497, 17618, 46546, 28175, 39334, 60923, 22460, 11438, 37638, 52769, 43821, 7957, 40910, 60205, 13020, 19267, 44867, 12037, 61051, 9982, 33056, 44305, 25976, 6949, 59450, 28487, 43501, 10953, 32294, 48892, 13062, 39117, 65209, 46950, 26345, 40710, 35645, 4597, 29522, 38247, 12717, 51109, 31406, 8654, 56285, 36554, 28193, 44833, 3606, 40361, 9881, 57156, 1184, 54113, 28967, 61288, 26550, 48429, 13631, 37616, 50637, 28347, 16021, 35763, 6269, 63482, 18827, 33427, 27419, 15564, 35246, 63774, 7112, 58958, 20201, 54899, 28416, 1169, 41689, 26031, 38730, 61701, 35972, 9609, 63973, 17106, 47760, 30810, 41826, 21218, 61820, 26901, 63397, 49778, 9281, 42763, 27747, 60481, 12102, 49305, 41997, 33157, 23864, 46392, 60076, 22338, 9141, 39362, 26842, 12949, 30178, 7960, 41601, 63877, 14576, 43565, 8574, 37541, 28409, 13422, 22985, 65409, 39200, 53172, 47391, 63087, 26241, 12805, 50141, 61781, 15711, 44950, 12088, 36828, 49681, 10078, 32262, 56718, 17167, 47496, 33236, 51311, 9586, 30955, 28109, 61336, 8907, 33482, 58618, 28675, 41334, 34803, 51359, 7839, 21914, 62434, 26589, 59427, 19017, 52582, 16606, 43582, 30858, 49248, 4794, 59724, 15440, 8823, 21087, 40178, 25284, 44239, 18042, 46881, 23655, 33300, 42372, 25752, 12239, 45659, 8453, 40482, 62590, 32969, 17023, 43426, 25539, 58397, 14656, 44609, 19513, 35188, 56906, 20952, 23963, 64173, 10693, 33026, 3533, 40331, 27869, 45347, 25302, 14918, 50597, 32158, 22929, 13483, 62199, 37523, 24045, 50002, 12650, 36266, 28556, 16498, 8032, 50861, 2235, 57488, 18149, 60317, 4571, 24678, 41886, 59153, 28995, 54641, 27196, 1938, 45477, 22831, 30708, 42125, 65191, 9240, 55022, 44326, 18860, 52269, 43287, 13855, 45821, 17421, 37342, 59968, 31677, 56163, 26374, 51048, 28954, 8506, ++ 57905, 30243, 63284, 44947, 10012, 65109, 3423, 58389, 15750, 7088, 55601, 34245, 49439, 12750, 23854, 65338, 20061, 27208, 46070, 64200, 6173, 30404, 41386, 22665, 53501, 5321, 56231, 42072, 12289, 36906, 1547, 63116, 45750, 3996, 59957, 28073, 20036, 2095, 57050, 7428, 59363, 15832, 62937, 9671, 43470, 2411, 60476, 41088, 15216, 586, 62423, 22681, 60009, 19525, 45944, 27153, 35851, 21170, 46782, 11051, 39493, 447, 55328, 18981, 8541, 60278, 3098, 45068, 56321, 24494, 51383, 4961, 62299, 46250, 1503, 22645, 44787, 25689, 39631, 12821, 36915, 60352, 14698, 53319, 8736, 18587, 45877, 22876, 43124, 37481, 11934, 60002, 444, 53151, 15051, 44197, 5027, 22566, 59295, 14232, 51672, 3204, 29513, 16155, 58425, 10840, 63771, 6211, 13966, 40684, 56626, 1049, 50577, 63157, 37683, 57740, 24709, 35723, 4309, 49163, 23769, 54829, 2702, 50847, 41340, 1236, 27910, 10220, 34637, 17455, 40527, 36465, 28798, 9293, 38197, 79, 58574, 26448, 2635, 39137, 46024, 5652, 25444, 61691, 2366, 22955, 43468, 64360, 15049, 35068, 50658, 26727, 5488, 46266, 20421, 1196, 60838, 30639, 48834, 15317, 50923, 537, 42448, 27603, 56070, 8136, 13537, 57597, 36318, 26130, 41550, 48418, 56719, 5289, 36035, 11804, 50702, 4226, 58104, 13654, 64902, 6029, 54722, 24379, 58659, 29866, 3794, 23258, 51931, 12590, 38925, 591, 49230, 28703, 61255, 4211, 41254, 11935, 46801, 5467, 41737, 58306, 49543, 12843, 61848, 8239, 54997, 36111, 19704, 6940, 53289, 47027, 26442, 10594, 41446, 2760, 63657, 22301, 56601, 45305, 59434, 29430, 39687, 21860, 44683, 14184, 34974, 63994, 10475, 48654, 6919, 18457, 40711, 50194, 15157, 38519, 58866, 3420, 17244, 39750, 32149, 268, 36904, 4610, 30542, 58248, 35860, 29161, 49199, 12964, 20491, 45089, 10774, 36267, 18480, 53594, ++ 16119, 35403, 6518, 20845, 52568, 32078, 42137, 20390, 35980, 47647, 29958, 4354, 21646, 58896, 32942, 5439, 50371, 34035, 2800, 38118, 24549, 54880, 2023, 36350, 63586, 16768, 29797, 20471, 51422, 15276, 53987, 26601, 19173, 36472, 41505, 9328, 51709, 42788, 30667, 23029, 44405, 32826, 49756, 20753, 55613, 34060, 27016, 17412, 65456, 35102, 47959, 12487, 31919, 51854, 5426, 64354, 13918, 51262, 18014, 36713, 63662, 22157, 30294, 57741, 46057, 25614, 41064, 17686, 31623, 11698, 38632, 43868, 30388, 9307, 56620, 37775, 10153, 51838, 4362, 62096, 32306, 21605, 43671, 24456, 48144, 55147, 1732, 59135, 27317, 4698, 54325, 25409, 35490, 10191, 38212, 56446, 18519, 40782, 30314, 39028, 20688, 46063, 64366, 37898, 4013, 47716, 28278, 35461, 49693, 19899, 28985, 45606, 23155, 4892, 16058, 48507, 1922, 29164, 55685, 21073, 40427, 12060, 60873, 33647, 19050, 45972, 59890, 24178, 51793, 6665, 56037, 1870, 60308, 42786, 19845, 47724, 30450, 54220, 16397, 59632, 12965, 35588, 53635, 18225, 36650, 55213, 6651, 19648, 56278, 358, 42271, 16751, 64932, 12457, 55059, 15914, 44280, 38502, 12075, 40758, 22909, 35748, 10522, 61534, 21515, 37939, 46542, 24418, 18392, 62329, 697, 33592, 26924, 61220, 19162, 54463, 31432, 27974, 37895, 21807, 49569, 29276, 39139, 879, 34221, 16068, 57063, 36768, 8154, 64325, 30884, 54546, 21740, 7329, 37234, 52581, 16353, 60128, 32050, 55355, 27162, 18367, 20608, 37700, 23134, 34230, 1325, 46580, 60254, 42486, 17057, 691, 56798, 29580, 58435, 18445, 43965, 9024, 38137, 347, 24871, 19032, 54828, 6000, 62494, 27050, 51390, 19641, 33609, 25509, 37151, 61318, 9552, 34028, 62845, 7473, 20793, 50744, 28513, 59530, 21368, 51859, 24914, 61011, 9978, 22700, 2967, 61923, 8295, 52797, 39534, 1881, 63650, 46836, 4678, 40591, ++ 48461, 22635, 51354, 39725, 27123, 14415, 57026, 7791, 25940, 51811, 17279, 63701, 40321, 9522, 46814, 25381, 39180, 11154, 57988, 17114, 43017, 21075, 50947, 13467, 27449, 38990, 58361, 3332, 65482, 31420, 40203, 6292, 61713, 14143, 56502, 22072, 35322, 5253, 63830, 12101, 54635, 151, 25778, 37258, 14397, 46254, 7836, 49279, 30495, 21359, 7012, 55143, 39197, 16159, 33456, 23590, 43418, 2862, 58345, 6743, 32098, 52611, 4827, 35358, 12589, 32953, 53566, 9791, 49350, 60889, 2398, 16819, 54564, 23511, 50386, 17172, 64582, 31181, 19359, 49051, 7843, 56175, 5174, 34738, 12259, 29382, 33704, 15400, 51061, 20789, 45137, 13662, 58063, 48847, 28672, 8025, 33582, 52494, 1185, 62297, 7249, 34910, 13197, 22760, 55376, 30770, 18372, 57545, 7679, 62417, 32472, 11976, 59602, 33558, 44595, 21761, 61132, 39696, 16958, 58507, 5537, 45112, 29836, 7266, 56730, 11149, 35796, 3867, 44158, 31470, 22295, 49333, 16717, 25590, 54970, 22843, 7763, 41205, 50807, 20806, 31333, 48454, 8267, 42683, 14181, 27429, 39692, 46931, 25006, 37999, 22542, 53763, 31279, 37233, 23529, 59184, 27268, 4205, 63990, 6600, 57970, 29105, 47431, 3940, 31481, 64997, 2061, 53740, 6854, 50495, 38347, 17128, 10896, 42165, 47698, 7877, 63059, 15578, 53403, 9097, 43693, 17363, 60675, 20833, 50798, 44490, 13238, 41930, 49907, 19846, 5175, 46643, 34014, 63353, 10157, 23422, 44135, 26477, 8023, 36404, 57, 45057, 60974, 6608, 51064, 64562, 18039, 28462, 10047, 30476, 63113, 40004, 20220, 49475, 7213, 34695, 27287, 52607, 15044, 64691, 34089, 49122, 13362, 31031, 36823, 1405, 42985, 8225, 57019, 2615, 46062, 21557, 52702, 13118, 24411, 56636, 43072, 12136, 37619, 5641, 46998, 10872, 41200, 16889, 48632, 42413, 56937, 25596, 19056, 33770, 58813, 15228, 30734, 21905, 61316, 25249, ++ 592, 59098, 12702, 61666, 2542, 48870, 23376, 45626, 62530, 373, 43217, 14136, 27483, 54630, 1580, 61910, 15447, 55433, 31099, 47949, 8389, 62826, 32366, 46717, 7759, 48635, 10685, 45224, 24860, 8538, 47563, 23178, 50010, 30022, 2894, 52881, 15497, 48060, 17945, 38691, 28608, 19774, 57801, 5828, 61469, 39431, 23299, 53926, 4971, 57445, 44058, 26088, 1809, 48624, 61881, 11790, 56098, 40660, 25482, 49726, 10206, 45332, 17230, 42888, 62490, 1040, 20842, 64194, 27038, 21946, 36445, 58467, 13491, 40902, 5954, 33975, 43079, 14146, 57837, 27575, 45395, 16320, 40114, 52082, 63334, 6446, 57679, 40529, 8316, 62884, 32883, 39333, 6034, 19714, 65336, 24575, 54992, 11690, 48282, 17402, 57068, 25173, 53736, 41074, 8644, 44768, 1574, 38832, 25386, 43385, 3612, 55042, 19161, 42168, 6798, 53057, 12626, 10014, 46843, 27020, 34257, 64380, 15117, 49751, 26544, 38413, 54462, 20574, 64637, 11736, 58245, 8628, 32514, 63979, 4796, 35186, 62319, 14474, 28057, 4380, 65511, 22400, 57467, 29557, 63718, 3407, 59307, 11812, 52397, 4888, 62989, 8166, 40313, 2732, 49488, 9893, 53162, 33968, 25504, 45934, 18101, 54506, 14140, 40031, 51643, 16236, 34975, 28256, 42937, 14383, 29903, 65392, 52387, 2747, 29036, 24088, 40857, 75, 32551, 62191, 3147, 36200, 10435, 56002, 7082, 26563, 63131, 2343, 27781, 35010, 56315, 24663, 15755, 40251, 28146, 57677, 2730, 49803, 65203, 14072, 53666, 28803, 11507, 43104, 30717, 14408, 39337, 58609, 44713, 11983, 36860, 5755, 32964, 13816, 45809, 60788, 4846, 47992, 32511, 10240, 42748, 3954, 40926, 58647, 17460, 55929, 23436, 49772, 28388, 59926, 16820, 41341, 3886, 35892, 47350, 844, 26419, 53962, 33156, 63418, 15779, 57699, 27548, 64128, 7278, 32510, 14848, 39089, 47437, 5034, 23445, 42955, 54736, 7495, 38559, 13793, ++ 33112, 43641, 9196, 29502, 36597, 54906, 5188, 33733, 12168, 38312, 31569, 57723, 45246, 18743, 36848, 28553, 53012, 6747, 23074, 59504, 14524, 39811, 4439, 18354, 60049, 33940, 21619, 37703, 55137, 34555, 17677, 57378, 10190, 33581, 44188, 27163, 58620, 31599, 60936, 10000, 52399, 42335, 11497, 47504, 27858, 3092, 58889, 13673, 41415, 10624, 37077, 17782, 58681, 29261, 7555, 37872, 30704, 15514, 34299, 20273, 59886, 27376, 54872, 7856, 24103, 51099, 39695, 5454, 42300, 46922, 7613, 26439, 48472, 29110, 60187, 24989, 3165, 47703, 35632, 388, 23270, 65276, 30598, 2810, 20104, 37954, 23666, 49173, 28305, 17745, 3391, 61397, 29817, 42522, 2330, 46226, 16524, 36664, 26226, 43481, 32178, 2115, 50241, 19528, 61587, 26590, 51229, 14876, 53277, 17058, 37325, 48018, 9420, 28088, 65192, 25839, 36642, 50255, 31972, 852, 52137, 18616, 37126, 21880, 61998, 4992, 48105, 14769, 29383, 41723, 18318, 46256, 39791, 13898, 48765, 11376, 44536, 33571, 60661, 40036, 9472, 44083, 601, 37626, 20345, 49946, 33904, 16136, 32069, 44891, 18423, 48182, 14312, 61947, 30011, 42602, 19340, 13458, 56394, 37020, 1862, 32375, 59864, 24858, 5674, 48097, 11503, 58855, 22164, 55171, 8436, 46177, 22869, 35242, 59073, 16646, 48987, 56425, 19644, 45345, 26259, 52645, 31012, 41405, 18708, 48543, 32327, 21281, 59988, 14320, 45146, 10809, 59383, 1760, 47843, 18928, 38245, 17509, 30211, 39817, 22494, 48583, 35491, 57200, 2419, 52776, 26296, 5055, 22022, 54558, 24634, 51203, 64941, 23605, 38416, 20808, 55724, 17264, 26056, 54351, 22758, 61154, 27789, 7533, 52186, 11499, 38997, 14877, 44458, 11912, 32400, 65346, 29352, 58364, 16374, 31442, 48995, 19141, 8497, 44923, 23570, 34508, 2138, 38056, 20680, 55234, 50611, 11293, 64933, 29942, 50207, 9547, 34334, 56802, 27769, 49645, ++ 64731, 19942, 31409, 56407, 18640, 15153, 40957, 59587, 19736, 53716, 8545, 22795, 5767, 51231, 13329, 44377, 20803, 42199, 35268, 490, 52093, 28768, 57199, 25741, 53152, 1077, 62429, 14793, 4719, 60836, 43096, 699, 38493, 64327, 20868, 7695, 40045, 1270, 24514, 45994, 4271, 34630, 64741, 16949, 51527, 19103, 32532, 45199, 28810, 63769, 24152, 52787, 9585, 42497, 21748, 53792, 4301, 60827, 47483, 1576, 41626, 14349, 38367, 29509, 48094, 14989, 59013, 33831, 13115, 55592, 19145, 65094, 741, 39375, 12146, 53883, 21250, 61269, 11440, 41397, 53516, 9761, 26702, 57002, 44354, 16683, 60917, 759, 46610, 35074, 52323, 21995, 14537, 50510, 34417, 23186, 58589, 4280, 63078, 9933, 45420, 15239, 37220, 5245, 33969, 11437, 60609, 31353, 5670, 64537, 13102, 24370, 59115, 35032, 15315, 2946, 56857, 20259, 62809, 13850, 43046, 8960, 55313, 2179, 42392, 17144, 30917, 59269, 646, 33983, 53739, 26817, 2895, 52319, 29953, 57194, 24665, 1555, 18695, 52942, 34475, 16935, 51852, 11233, 45641, 7561, 61057, 26398, 57782, 10137, 36283, 29361, 57253, 21438, 6140, 35530, 62764, 47023, 8553, 20625, 52265, 43825, 9011, 30226, 63218, 20924, 36868, 45029, 3118, 39797, 32768, 4498, 57350, 13164, 44657, 6347, 34036, 9784, 38515, 12776, 59506, 22478, 14881, 61583, 37283, 4765, 54115, 9415, 39415, 52867, 6131, 42710, 30441, 51008, 13040, 33497, 54300, 6813, 58954, 9507, 62756, 3992, 16925, 24371, 40677, 9197, 49134, 33665, 47574, 3135, 59512, 16137, 8491, 53698, 1235, 31330, 11779, 40480, 62883, 6278, 50643, 15838, 47109, 34498, 24300, 45675, 63188, 5100, 35359, 53646, 22970, 6379, 48329, 10724, 44122, 62058, 4915, 60531, 36573, 2795, 56038, 12854, 53413, 46229, 31203, 3833, 26567, 35462, 1081, 44654, 16264, 60850, 2640, 17856, 42059, 11086, ++ 3888, 54276, 47273, 6113, 44441, 63976, 28456, 10411, 48293, 24916, 41716, 64559, 34704, 29638, 60559, 3570, 63371, 10236, 49690, 26636, 37118, 19613, 43923, 11602, 30923, 42540, 23803, 50276, 29092, 19886, 12961, 49207, 25556, 16288, 46460, 62278, 12573, 54120, 37556, 15000, 56172, 22584, 29862, 40407, 9130, 36222, 62785, 1074, 16561, 47336, 3396, 33932, 50844, 14650, 64692, 27722, 44236, 19217, 12863, 63068, 24653, 57318, 3525, 65308, 22561, 36982, 10757, 28570, 52053, 4004, 30827, 35060, 52765, 20531, 45744, 32525, 8349, 38512, 29746, 50697, 18730, 37252, 47010, 13539, 31778, 50294, 10800, 42029, 13004, 56045, 8982, 43852, 57231, 11087, 60329, 6805, 40043, 51123, 28079, 21720, 59688, 29310, 65101, 23578, 56276, 42376, 21167, 46913, 35928, 41410, 29756, 54071, 389, 45925, 51716, 31102, 43781, 7461, 40134, 30392, 60043, 24895, 47581, 33090, 25743, 52770, 9549, 43282, 23847, 62713, 5891, 37706, 61304, 19516, 36106, 6964, 42051, 55388, 47184, 6165, 23972, 58848, 26021, 56096, 30793, 23410, 41070, 1391, 20942, 43119, 60331, 2009, 24364, 45561, 51782, 17251, 822, 27968, 41464, 64714, 23871, 12681, 49639, 17446, 41770, 55764, 9454, 18882, 60950, 25573, 63450, 19926, 37391, 51742, 21376, 60516, 30181, 64675, 25048, 42822, 5758, 47559, 1463, 28414, 11474, 65527, 23557, 46985, 29665, 17923, 25997, 64649, 20486, 35994, 62398, 4502, 43322, 25245, 45635, 34509, 21404, 52246, 32702, 59267, 15849, 63738, 19925, 56046, 13211, 41215, 29172, 35027, 43707, 27641, 41988, 61958, 46824, 2391, 35986, 30583, 19832, 37951, 10838, 64263, 2988, 31714, 18674, 29871, 50366, 22, 39840, 55429, 19991, 38202, 27402, 14626, 22420, 40539, 25308, 51211, 30218, 42705, 19541, 8990, 60277, 39972, 62360, 18334, 53192, 21573, 37167, 27034, 46146, 52089, 23893, 59865, ++ 35002, 26025, 16822, 38949, 24350, 1026, 50769, 35741, 3232, 61173, 16563, 2290, 49258, 10825, 40064, 16185, 31851, 47393, 17900, 61259, 9127, 65086, 3134, 55914, 35881, 16046, 45905, 9492, 52562, 40532, 59161, 32102, 54474, 5522, 35962, 29247, 50869, 18878, 33069, 63300, 8243, 50084, 1920, 60184, 24810, 53209, 11928, 50420, 26646, 38507, 61062, 20439, 31651, 39987, 259, 49460, 8413, 36368, 53296, 32695, 9147, 50201, 34684, 18254, 43990, 2065, 61804, 20016, 38130, 57960, 44549, 8812, 15131, 63181, 4648, 59222, 26123, 55298, 15923, 6180, 58653, 33362, 3822, 62444, 7189, 22366, 36219, 26390, 64805, 31149, 24763, 37130, 1406, 26775, 41214, 30498, 12671, 18994, 53422, 38506, 6369, 49072, 9055, 39891, 16394, 703, 58158, 10128, 20118, 2646, 50032, 22013, 39163, 18467, 11219, 61511, 23459, 16534, 53799, 4035, 22440, 38642, 6231, 63497, 12743, 39392, 57317, 21012, 50312, 13293, 46888, 10477, 23242, 43759, 15534, 64289, 21625, 9960, 29282, 62930, 38447, 12577, 41754, 3902, 48691, 13580, 54678, 34691, 50974, 27691, 13036, 54289, 33037, 11021, 39511, 55634, 31857, 60104, 15499, 33283, 3664, 39240, 61816, 34154, 216, 27054, 52803, 32177, 12338, 50107, 15150, 43283, 10178, 27661, 1777, 39612, 14555, 46621, 3562, 55500, 34569, 57758, 40014, 51566, 46015, 35513, 16548, 57234, 297, 61020, 37738, 3296, 52128, 8973, 22888, 55865, 29291, 15074, 50685, 1514, 42112, 10547, 47088, 5988, 44051, 36517, 31445, 7765, 25631, 62599, 18706, 48681, 4329, 57635, 18111, 9721, 25144, 51871, 14014, 57306, 44929, 956, 53934, 21230, 41546, 56331, 9322, 61865, 20607, 57831, 26144, 13552, 63540, 1787, 52311, 34747, 54931, 9804, 64796, 17620, 6528, 62580, 28873, 49906, 24047, 14303, 6080, 41764, 13240, 55683, 6959, 63160, 12396, 31924, 5462, 45521, ++ 14599, 41223, 62674, 8200, 58643, 32732, 13417, 53208, 21350, 30478, 37761, 55813, 26260, 19479, 58502, 36168, 24041, 56539, 5095, 33296, 45093, 13874, 41022, 22233, 49057, 5852, 63884, 32857, 2461, 26815, 7071, 21923, 11774, 58007, 42122, 3470, 23566, 43337, 6052, 26240, 44800, 21100, 41726, 33691, 6475, 43794, 31046, 22261, 55445, 8564, 13141, 45863, 6179, 59758, 35208, 23140, 56711, 26308, 5285, 44953, 21470, 40212, 6402, 56450, 12019, 53948, 32364, 46484, 6900, 16615, 24711, 49938, 28183, 41970, 34258, 17876, 44042, 1844, 63633, 28612, 42558, 14587, 54262, 25307, 38878, 51636, 59609, 5396, 19157, 53843, 15163, 63707, 31989, 54691, 15968, 47054, 64269, 33273, 293, 13817, 43052, 18211, 54440, 27513, 52277, 32779, 24811, 48767, 62953, 27151, 57148, 8343, 63571, 32259, 40936, 5853, 49256, 35402, 27456, 46353, 58071, 11490, 51012, 19949, 45520, 3221, 34950, 7600, 36995, 28285, 55743, 31923, 59029, 985, 51221, 27757, 49864, 37277, 15021, 45254, 2206, 32780, 61829, 19211, 35900, 63350, 17817, 9198, 65174, 6480, 49257, 38253, 4426, 64460, 26111, 7677, 22205, 50317, 5428, 48377, 57484, 26492, 7104, 22580, 45487, 58162, 5048, 41048, 28516, 47236, 1048, 53933, 31254, 62042, 49490, 23514, 54802, 17884, 50426, 20583, 27457, 15926, 7701, 19270, 24793, 3957, 42256, 11989, 33864, 43979, 13908, 48366, 31899, 16204, 40840, 46282, 11156, 63954, 20135, 56710, 26155, 61553, 19246, 55153, 27392, 570, 61176, 45475, 38869, 1915, 52029, 11181, 37430, 14719, 50279, 33916, 59122, 21693, 43447, 8722, 32886, 60177, 28223, 48822, 12947, 26605, 47625, 37684, 15396, 42440, 8897, 46338, 32773, 23780, 41976, 7863, 29673, 45860, 13953, 38876, 47830, 21874, 36101, 467, 43727, 57369, 48150, 29591, 59441, 24998, 32893, 40380, 19406, 58062, 37822, 21099, ++ 52681, 1754, 29102, 50118, 21967, 46359, 26669, 39567, 57279, 6856, 47063, 12471, 33421, 45735, 7400, 53852, 1220, 40680, 21257, 51578, 24726, 54270, 29450, 7523, 58242, 27945, 19014, 39325, 56339, 44735, 64795, 37186, 47282, 28246, 18269, 60522, 55541, 10726, 48316, 59672, 36579, 13246, 55000, 15921, 61982, 20103, 56827, 4088, 35457, 42803, 57720, 24997, 54288, 18628, 10918, 46583, 16907, 59244, 29848, 51991, 16347, 61213, 31373, 27799, 48958, 25267, 15752, 60035, 23231, 43304, 62600, 2745, 56861, 22183, 10573, 51545, 36652, 49228, 19847, 46081, 9104, 60521, 21411, 48268, 17464, 2046, 29218, 45663, 39702, 4136, 47907, 7455, 44616, 20315, 49910, 3626, 24018, 41765, 61203, 31023, 57711, 35307, 2912, 45748, 12536, 62085, 44026, 7054, 38181, 14410, 34303, 44968, 4409, 26350, 58610, 21377, 55921, 1650, 64877, 9815, 17937, 36276, 28861, 56264, 16215, 60501, 27319, 65282, 15760, 51614, 4265, 18020, 39069, 33309, 12315, 60099, 3603, 31587, 56644, 20176, 27042, 50517, 7204, 46722, 28591, 5272, 38993, 46372, 30346, 41929, 16565, 19948, 47628, 14895, 43664, 58489, 37572, 11735, 44545, 28901, 16807, 53863, 35919, 50832, 13338, 37685, 15792, 64373, 7535, 34511, 60048, 24640, 6655, 35665, 11279, 41859, 5149, 36600, 31983, 8760, 63770, 44278, 33074, 62506, 53489, 31358, 58757, 50582, 27289, 8540, 55185, 24040, 58031, 26990, 60658, 762, 33143, 39155, 5556, 31173, 37066, 13763, 38531, 29745, 12141, 50096, 23345, 15167, 57039, 21199, 31880, 60584, 22529, 64075, 29812, 6821, 39293, 3544, 29040, 65486, 23287, 16617, 40088, 5698, 36154, 60690, 2041, 33422, 6739, 59397, 30444, 51613, 4442, 60913, 18072, 57221, 49447, 1262, 58136, 31612, 4234, 59256, 11621, 54436, 16589, 33387, 22282, 10225, 44090, 3355, 51126, 8738, 49007, 84, 28236, 63840, ++ 9831, 36832, 55943, 11649, 42860, 4486, 65508, 15560, 9078, 43997, 23668, 62868, 4224, 52350, 27794, 42809, 14845, 64112, 30274, 11969, 38571, 1854, 60385, 34346, 12646, 41647, 61403, 11033, 17222, 35119, 14012, 24174, 1798, 52255, 8941, 34183, 14565, 39514, 30768, 17352, 2670, 58212, 27356, 48740, 10300, 46874, 37741, 14214, 65151, 18187, 30090, 2147, 40948, 28373, 63444, 38834, 2625, 41941, 11407, 37370, 835, 47068, 13309, 63868, 4531, 41318, 36075, 100, 50576, 10083, 31702, 37471, 13927, 47312, 61476, 6686, 23031, 12871, 56512, 23950, 35285, 1062, 40671, 11243, 64411, 43233, 55745, 10326, 21032, 58787, 27671, 38376, 18067, 62188, 9648, 36353, 56565, 8425, 20916, 47458, 10937, 25645, 63895, 22194, 36534, 4644, 28832, 17891, 55518, 30541, 11751, 52736, 17360, 47401, 8795, 37971, 29543, 14704, 41979, 32820, 44429, 61755, 150, 40844, 32321, 22666, 48436, 1512, 44871, 24565, 41459, 57823, 8448, 45772, 25950, 40640, 17584, 48215, 7941, 39556, 58209, 10882, 52571, 15847, 59854, 24884, 53515, 22063, 3006, 58992, 25675, 61599, 28415, 53327, 34311, 3292, 18805, 54835, 24701, 61254, 10651, 43027, 2651, 19522, 62597, 30594, 49045, 23400, 56219, 20503, 38734, 17285, 45693, 56897, 18654, 64122, 28607, 58568, 12436, 52276, 37620, 2198, 49753, 10063, 13542, 38369, 6745, 21570, 63656, 39721, 19570, 4950, 41680, 7572, 36830, 21970, 53320, 12763, 60196, 48145, 8345, 51551, 3246, 65326, 41511, 17709, 54016, 34161, 10197, 42928, 6477, 45941, 26737, 177, 44569, 56444, 19393, 54737, 49655, 12097, 46397, 4528, 58121, 25337, 51306, 17843, 43838, 22249, 53140, 24776, 36504, 17163, 43615, 28130, 39449, 12343, 21185, 33938, 18634, 52914, 40998, 15479, 27927, 37721, 65423, 5254, 34918, 52469, 20083, 64426, 28651, 36171, 22826, 53986, 43400, 15896, ++ 47985, 23208, 30996, 17703, 60477, 34590, 19097, 51953, 32179, 59950, 17515, 49877, 38838, 22089, 9713, 59130, 25507, 46509, 6318, 57508, 15823, 48337, 17575, 46201, 23383, 50675, 87, 30567, 53674, 4239, 51215, 40850, 62616, 30329, 45451, 20550, 49377, 387, 65067, 25311, 51879, 31951, 4873, 38960, 29422, 667, 23753, 49644, 32920, 5565, 47888, 62312, 15669, 51612, 6941, 31154, 54662, 20064, 64959, 25655, 55281, 23431, 38600, 20701, 45546, 9457, 55863, 30065, 64423, 39850, 18569, 54826, 27278, 1329, 39141, 29944, 64949, 31974, 39964, 4885, 52462, 30914, 57466, 28043, 33006, 15630, 34531, 24255, 49573, 32443, 12043, 51855, 2680, 34142, 25969, 45062, 28523, 16835, 50701, 4925, 55868, 39535, 7802, 48132, 15786, 58821, 40550, 50888, 1791, 60109, 41705, 23968, 60983, 33376, 54524, 13250, 48246, 59735, 19566, 52514, 5122, 26164, 14164, 49035, 8250, 53427, 10784, 40070, 19316, 62097, 11946, 28758, 54634, 22036, 63200, 5583, 53317, 25132, 64848, 13370, 35354, 23142, 32173, 40243, 920, 43969, 14525, 33453, 55785, 10510, 36758, 124, 40530, 8790, 23047, 63429, 30835, 40996, 1272, 35210, 38643, 20212, 56976, 46446, 9857, 25798, 1655, 44098, 10952, 51256, 3806, 58393, 13744, 26426, 40542, 8095, 47786, 609, 43522, 25881, 21904, 60301, 29999, 23072, 40999, 59762, 18126, 45492, 2567, 53833, 32535, 62089, 12281, 47285, 17089, 50244, 27928, 42518, 23785, 18214, 44827, 24852, 57733, 21662, 7388, 46348, 4671, 37825, 64735, 27990, 55446, 16798, 53212, 40278, 13010, 24253, 35534, 15496, 37024, 26962, 41863, 52950, 34935, 14560, 10043, 55066, 28907, 65077, 12481, 49270, 3116, 54563, 10306, 64618, 5544, 50894, 60022, 26768, 44398, 10574, 24506, 45385, 61246, 7683, 25814, 46868, 58599, 1594, 38400, 15640, 46487, 11945, 60655, 17301, 6724, 34043, ++ 62006, 3596, 57683, 38201, 6416, 49137, 28047, 1982, 42284, 11185, 35364, 171, 29289, 64815, 16921, 37431, 32658, 8761, 50173, 34921, 62181, 26994, 37842, 4816, 54986, 20702, 36672, 47814, 26058, 59531, 21343, 9731, 16516, 38191, 5019, 63723, 27623, 56978, 35574, 11384, 41040, 21600, 53701, 15107, 64160, 43128, 59455, 7945, 52189, 27087, 36896, 12155, 44503, 34518, 22112, 48497, 14094, 33241, 43085, 7675, 29065, 50931, 3153, 58539, 33526, 52838, 19580, 14564, 5630, 25783, 48200, 7975, 41149, 58276, 17284, 53228, 3431, 15333, 48584, 25884, 61999, 18360, 12398, 44996, 5728, 52850, 8184, 61559, 143, 42295, 60662, 22993, 55327, 14004, 58311, 5529, 52762, 59939, 37826, 34552, 14761, 30014, 59369, 19235, 51798, 31622, 10566, 23225, 46556, 20838, 6306, 36910, 15588, 1203, 42755, 28480, 3352, 25103, 39525, 12133, 57478, 30757, 54711, 24264, 63820, 17511, 35651, 59543, 31155, 6772, 49480, 34769, 2302, 47549, 14289, 42975, 21194, 34133, 282, 43610, 54862, 4583, 63873, 21482, 57059, 37082, 11522, 62487, 23709, 45064, 31405, 51188, 18175, 56666, 46572, 13765, 49842, 9387, 58793, 14649, 65291, 29419, 8057, 31605, 59401, 40234, 54383, 18279, 36393, 29738, 42328, 33440, 48686, 2449, 53028, 32931, 22623, 61080, 14170, 54178, 6199, 17056, 56813, 48085, 973, 28754, 48852, 25586, 36256, 14530, 49413, 29088, 56559, 34652, 64965, 3016, 58453, 7132, 54488, 2064, 62919, 14868, 35093, 32369, 52964, 28868, 59997, 24081, 12611, 50810, 3655, 36199, 9431, 32220, 47851, 62270, 8094, 59598, 1723, 61345, 7354, 18839, 30033, 63800, 38766, 3762, 45356, 7776, 40349, 19287, 62711, 21765, 46946, 35145, 25583, 15935, 37366, 3556, 63740, 35653, 56751, 2004, 20303, 51658, 41590, 12685, 18132, 30673, 61638, 40864, 4535, 57071, 26299, 39766, 55443, 30134, ++ 10595, 41497, 20187, 53447, 25656, 12948, 45352, 62229, 20736, 54125, 25094, 56832, 13743, 43505, 54754, 2724, 61591, 18868, 41837, 22501, 3736, 43325, 10382, 64512, 32465, 8204, 62998, 15385, 6526, 42930, 31685, 48550, 57456, 25038, 53246, 13562, 43845, 7313, 19300, 46672, 61237, 8766, 45102, 34831, 7122, 18813, 25890, 39807, 16300, 60592, 20775, 56059, 24404, 3782, 58080, 9942, 61682, 4737, 49162, 15374, 60349, 10406, 43622, 17523, 8191, 26923, 42408, 57427, 35603, 52362, 21584, 60796, 12542, 33126, 24161, 45163, 36150, 59737, 9546, 43486, 7521, 38261, 50083, 22746, 62799, 41098, 19666, 37505, 47254, 16981, 6556, 40310, 29662, 48656, 21469, 39229, 12331, 24996, 1942, 62720, 22815, 44491, 1118, 33656, 42591, 3436, 54847, 34829, 14051, 65395, 49476, 27953, 44318, 64072, 18982, 51466, 62350, 7857, 50470, 34587, 21122, 47089, 6998, 37846, 3677, 43865, 29634, 5322, 54094, 38325, 20398, 64450, 16642, 36626, 30562, 55966, 9120, 59379, 46179, 18929, 28133, 42294, 17210, 48930, 8672, 29723, 51711, 5807, 49558, 15632, 7424, 58053, 12389, 32478, 5089, 36151, 21148, 45302, 26912, 51506, 6020, 47848, 53070, 21977, 4662, 33802, 14451, 60426, 5881, 65113, 16425, 8655, 62738, 30753, 21040, 55884, 15665, 39021, 29517, 46356, 34916, 42541, 11003, 33680, 15187, 64526, 5589, 55728, 10627, 43401, 17476, 1308, 24993, 9810, 21027, 44324, 15426, 35681, 31719, 40383, 30061, 49514, 9926, 43611, 1046, 39909, 16284, 48314, 33233, 19539, 40981, 58844, 20659, 63453, 5243, 17551, 42583, 33329, 48415, 21061, 31533, 56032, 484, 43031, 20266, 58931, 32062, 16252, 56700, 33785, 38467, 13734, 30748, 651, 55831, 43150, 8757, 48249, 23066, 7069, 50270, 30342, 39572, 9294, 32198, 56261, 27337, 49606, 8158, 24232, 48551, 13582, 32597, 2403, 45023, 24636, ++ 51556, 14191, 47497, 808, 33195, 55166, 7710, 29840, 37221, 4990, 48089, 40184, 31221, 5947, 20504, 47691, 28265, 52953, 12876, 31487, 55567, 20169, 51913, 14663, 27550, 44216, 24457, 56774, 38708, 18030, 61012, 1164, 34758, 8018, 41440, 32566, 22391, 51368, 37952, 3804, 28904, 16831, 56317, 23062, 58464, 32280, 53007, 11269, 45638, 1365, 42210, 9250, 50538, 38189, 27515, 45270, 35772, 22739, 56933, 39366, 18796, 34939, 62192, 31887, 47976, 62966, 2365, 28820, 11587, 44684, 3674, 34611, 49521, 5153, 56017, 10968, 19273, 27694, 51127, 33603, 16410, 55051, 2344, 27176, 35806, 3290, 56703, 25628, 14400, 53626, 35185, 63169, 10516, 42874, 906, 64904, 31387, 43566, 19974, 46434, 11534, 53942, 26968, 64621, 13460, 24532, 60775, 8090, 38984, 26029, 3891, 56095, 9520, 22630, 35808, 10971, 31808, 45400, 23613, 2397, 63029, 15419, 42891, 58864, 33535, 51876, 13511, 46617, 14890, 25488, 44277, 9729, 52468, 23661, 61549, 3156, 38068, 29852, 16059, 62147, 10360, 53909, 2812, 31105, 60793, 19602, 40893, 27156, 34943, 64039, 42724, 20777, 38816, 60567, 24136, 52439, 62251, 2479, 17610, 39751, 23230, 32897, 16083, 41995, 63623, 24248, 44792, 27421, 46968, 21707, 52502, 25139, 43749, 12083, 37962, 4169, 50908, 9561, 64984, 2956, 19879, 62871, 26698, 54628, 44909, 36942, 20409, 30665, 61371, 22729, 51847, 38756, 45893, 30970, 52537, 37955, 26554, 61735, 19091, 11860, 56347, 5151, 59463, 20359, 63322, 26400, 8815, 58051, 2489, 61674, 29981, 14128, 49357, 25737, 38582, 28406, 52497, 23006, 11388, 39648, 51028, 13492, 47259, 23931, 50106, 11136, 26317, 48111, 1150, 27514, 6042, 52424, 41240, 58508, 19854, 31914, 61687, 28765, 54224, 17033, 33214, 13412, 63032, 22589, 53807, 2924, 36874, 11011, 55047, 34413, 18900, 62779, 42599, 20912, 65205, 7972, ++ 58349, 35209, 27222, 44268, 64272, 16403, 40742, 58795, 14497, 63572, 18343, 8342, 60817, 50891, 35949, 11525, 39632, 724, 63139, 44895, 7180, 59803, 29936, 39967, 58746, 2622, 52734, 11452, 33240, 46832, 13091, 28726, 54839, 19751, 61653, 2983, 59037, 12260, 54580, 24054, 62954, 42442, 1666, 37219, 12826, 49018, 3179, 57353, 30539, 21899, 64536, 33637, 14922, 62832, 526, 17706, 52443, 26576, 1756, 30364, 53148, 5933, 23941, 40707, 13790, 22343, 37263, 46000, 59597, 16963, 31009, 64688, 16006, 42814, 29246, 46403, 63062, 299, 41524, 21926, 64097, 20712, 46771, 59318, 13704, 31473, 50549, 8829, 65169, 30353, 4464, 26886, 18814, 57337, 35943, 15717, 49323, 9213, 52163, 32537, 40808, 17486, 49658, 5985, 37522, 47303, 28327, 43689, 19702, 57629, 33020, 16905, 40362, 53552, 4861, 57993, 37405, 16110, 55167, 41221, 10418, 36820, 27731, 18844, 9352, 23106, 61196, 26695, 56560, 463, 58486, 32647, 4830, 42147, 11305, 49118, 26282, 51533, 5983, 33159, 50200, 24235, 36426, 44591, 15124, 47407, 2089, 55192, 18629, 3801, 29213, 48277, 1752, 27487, 42117, 6978, 30111, 43262, 34602, 55530, 12927, 60864, 716, 36698, 11900, 51936, 3037, 55357, 12748, 39403, 380, 29076, 61301, 19326, 59591, 45088, 27244, 41608, 24168, 36118, 51965, 7436, 40296, 4334, 23840, 9175, 58189, 41453, 3639, 35211, 8129, 63376, 6243, 59329, 13447, 4382, 49000, 9118, 47775, 43048, 22269, 37516, 27615, 46688, 13333, 54953, 42315, 22817, 35328, 47172, 7625, 43897, 1383, 54466, 10619, 57885, 2786, 45223, 64470, 5946, 25885, 63048, 37322, 8350, 34199, 62386, 41658, 22708, 61490, 44288, 59745, 11413, 23520, 7408, 47426, 14395, 2506, 38712, 11886, 42304, 59043, 48912, 4756, 45742, 15255, 43263, 63888, 21439, 44804, 707, 52013, 29371, 5813, 50422, 15042, 38777, ++ 18568, 4172, 61234, 9416, 22327, 36455, 24261, 2902, 50547, 33952, 27354, 46080, 22935, 16007, 55997, 26453, 58112, 23580, 33555, 18093, 39015, 24975, 1392, 49426, 9323, 35695, 19402, 42346, 3527, 65342, 26480, 44548, 10469, 50400, 23315, 46069, 29974, 16182, 33530, 47745, 9901, 31428, 50804, 19675, 62208, 27903, 40786, 17880, 36039, 54883, 5127, 28579, 53442, 19788, 32152, 59654, 11684, 41208, 64277, 46238, 12772, 36523, 57659, 1153, 54527, 10853, 51665, 6577, 20971, 40451, 54070, 8970, 26523, 58758, 20451, 7000, 37914, 54441, 11816, 57767, 4317, 36989, 29562, 10046, 41850, 58111, 17801, 28704, 43724, 21819, 51329, 45805, 7738, 33094, 23456, 54309, 6264, 63518, 26303, 4089, 61419, 7324, 29443, 56882, 21595, 9691, 63213, 50, 52363, 12223, 48447, 61960, 30949, 13700, 46280, 25674, 20330, 64656, 6502, 30064, 60730, 47963, 1085, 50711, 65023, 41638, 2675, 20856, 34224, 40328, 18440, 50862, 27470, 60617, 19706, 35707, 13717, 41131, 57707, 20586, 38703, 12734, 65319, 6719, 58443, 26511, 39328, 13122, 59482, 37793, 52956, 11853, 65017, 16327, 54547, 19258, 48775, 11259, 64220, 4126, 28207, 46052, 56492, 26032, 49411, 19100, 38066, 9069, 32490, 57694, 49941, 35436, 4980, 48297, 7128, 33949, 17766, 57953, 11556, 56470, 16266, 31165, 59234, 18871, 51331, 32258, 47128, 13148, 53076, 28051, 57481, 15594, 33393, 20027, 41898, 64109, 28574, 55647, 218, 25767, 64430, 16591, 51273, 3825, 36059, 30762, 5862, 52368, 11489, 25274, 56146, 22067, 31119, 46195, 19064, 41361, 14416, 30423, 17108, 53492, 32578, 3341, 18356, 57191, 15719, 4727, 53858, 9529, 35747, 14814, 29458, 50555, 36890, 63338, 32946, 53539, 44969, 20997, 64484, 26034, 182, 19346, 28354, 35286, 57810, 25137, 6230, 31388, 59799, 16684, 37535, 9730, 58971, 35576, 27709, 45988, ++ 31648, 49369, 12562, 39263, 51184, 5576, 56206, 43789, 21526, 10203, 57795, 1506, 41536, 34410, 3900, 44125, 9931, 48601, 5403, 51321, 14126, 47112, 33895, 17002, 54054, 22845, 61959, 29571, 49986, 22149, 5713, 58563, 36241, 15183, 39675, 6682, 36855, 64453, 942, 40247, 59851, 6232, 26367, 47233, 4516, 43668, 10527, 63609, 6828, 25373, 39276, 48285, 12518, 43925, 40312, 8633, 47546, 21159, 7153, 17102, 28086, 49589, 19391, 44331, 30723, 61040, 25059, 34047, 63684, 654, 24410, 47193, 38910, 1986, 35409, 49875, 14266, 30482, 25093, 32644, 47853, 14808, 44412, 53336, 24472, 6087, 46277, 39094, 1550, 59516, 11643, 36774, 61887, 47709, 13081, 41383, 29087, 18393, 37092, 55092, 23736, 35515, 45267, 16037, 40180, 53244, 18291, 36206, 30154, 42104, 6703, 24192, 2143, 59204, 38307, 49919, 563, 43524, 26878, 52872, 18097, 22104, 56967, 31636, 12440, 36064, 55348, 49992, 10993, 62835, 7892, 45447, 12939, 38934, 54369, 1179, 63666, 22806, 8286, 46965, 1658, 56258, 28723, 34260, 9777, 21867, 62844, 45949, 25460, 8033, 22650, 44202, 34049, 47115, 9658, 37339, 59801, 15265, 25205, 50445, 38322, 18063, 10235, 42651, 7260, 59143, 27887, 61895, 41354, 15434, 22802, 10742, 55003, 37084, 23644, 63515, 1281, 31641, 47406, 5347, 28298, 49237, 12669, 38186, 61831, 1850, 16722, 26337, 39254, 18527, 44700, 24441, 49890, 54893, 2238, 22524, 39533, 17641, 32830, 53645, 6439, 34280, 10938, 57413, 18801, 45258, 62136, 17965, 39570, 64201, 15801, 37116, 60467, 6176, 34806, 50484, 24748, 55527, 36693, 9895, 60038, 40609, 44626, 28705, 52014, 38060, 20847, 31158, 55322, 2676, 45632, 18913, 4185, 16661, 24626, 9411, 27259, 51376, 5836, 36341, 55564, 40431, 61981, 10068, 17807, 50736, 39109, 14020, 41900, 26875, 56467, 47076, 25292, 11344, 54714, 1330, ++ 60058, 23726, 56717, 30549, 19254, 61793, 32001, 11920, 46971, 38537, 28811, 52701, 13128, 62545, 30679, 59305, 15306, 37032, 65274, 25926, 57007, 10750, 63678, 41308, 6069, 45550, 12157, 37524, 15765, 55688, 40333, 31114, 2191, 63476, 25636, 55950, 18565, 49705, 26980, 14671, 20978, 52594, 39151, 14033, 55299, 24316, 34091, 51252, 46470, 13824, 58979, 2386, 23612, 61295, 5685, 55506, 29306, 37571, 58732, 42576, 56161, 3873, 65498, 26165, 16101, 39618, 4251, 50143, 14841, 55487, 32808, 13373, 52994, 18162, 61876, 23577, 60292, 43941, 2948, 51932, 8478, 61146, 18953, 714, 63806, 33844, 12785, 54507, 16190, 32227, 56220, 20420, 2240, 27916, 60193, 3184, 46833, 58674, 14229, 42183, 10279, 57930, 2375, 62471, 32058, 5057, 50070, 14921, 59840, 20616, 45131, 35146, 51074, 28964, 17690, 9109, 56702, 34005, 12877, 39125, 4473, 44689, 8649, 25238, 45985, 5951, 28567, 15958, 42670, 30184, 22356, 57166, 31395, 6580, 47805, 29034, 17406, 45027, 31757, 60299, 25282, 43303, 18252, 47953, 52235, 35493, 4862, 16836, 50676, 32863, 56147, 14067, 24955, 4543, 57192, 28806, 450, 33556, 57819, 21628, 6627, 62952, 30473, 54085, 34873, 16596, 45814, 20717, 6283, 30947, 63998, 42778, 26786, 13260, 53559, 14944, 50149, 39549, 20295, 62241, 42978, 157, 45964, 25298, 9929, 43772, 50314, 65262, 7026, 59905, 509, 37377, 11314, 29556, 46447, 34040, 10264, 60818, 45422, 14279, 41183, 60333, 38994, 24510, 48778, 14574, 28515, 431, 49921, 32656, 4067, 51616, 12392, 27365, 65189, 8613, 58696, 787, 44088, 22413, 27712, 12818, 58258, 1944, 24995, 46017, 6547, 49096, 17665, 64315, 25250, 57737, 42642, 48779, 57041, 39956, 60226, 17525, 31020, 46673, 14923, 23868, 6670, 47307, 34122, 1206, 53266, 23359, 61135, 7393, 19895, 3164, 43663, 63324, 21716, 40559, ++ 17022, 7263, 42476, 2232, 48358, 15433, 41938, 26780, 59706, 6637, 65048, 19689, 49502, 24593, 7884, 21678, 53644, 29057, 19277, 42632, 3094, 36507, 21409, 28498, 60231, 31985, 57658, 615, 47540, 8678, 20384, 52429, 43414, 17635, 48110, 4176, 41983, 9219, 56683, 44682, 32798, 11866, 65397, 35368, 22190, 60954, 21, 17072, 29624, 41642, 19263, 49900, 36726, 30933, 16663, 25135, 50291, 2782, 32734, 24552, 9079, 34320, 11941, 48386, 7369, 58991, 18422, 38047, 28339, 43187, 7722, 62383, 22698, 42130, 10217, 33776, 5916, 17587, 65387, 40244, 22412, 34897, 26349, 39571, 49276, 20137, 60441, 23768, 48401, 9460, 40980, 25211, 44112, 53051, 17187, 38750, 34015, 22133, 483, 30626, 51155, 19481, 48235, 12952, 25280, 55645, 38449, 26726, 3067, 54127, 10707, 63728, 7648, 21491, 61338, 41836, 15087, 48820, 22882, 60144, 29150, 53964, 33125, 62207, 17095, 38805, 48286, 59959, 4177, 53225, 37469, 1994, 65116, 24869, 15464, 58713, 39873, 4061, 53108, 12148, 37359, 5378, 61431, 14199, 527, 41657, 30698, 54101, 10269, 61186, 1082, 41291, 62012, 31553, 39973, 20417, 53595, 44666, 8384, 32203, 47585, 40708, 20029, 3624, 23818, 64842, 1359, 36053, 53269, 47917, 2289, 17573, 46526, 57285, 3390, 44389, 25739, 8950, 52707, 14006, 34249, 22197, 64245, 35399, 56262, 20901, 33165, 11782, 30281, 40804, 21798, 47657, 57016, 5437, 62346, 16149, 51691, 23632, 3404, 50501, 20781, 29439, 1644, 53272, 7868, 33699, 59075, 38277, 21292, 9288, 44981, 29242, 56877, 42859, 16365, 38166, 20436, 34359, 61920, 5064, 46648, 49533, 19936, 35383, 64948, 13963, 60859, 26844, 40952, 12156, 39138, 8005, 34330, 30062, 1399, 13080, 34863, 3735, 56413, 10810, 44050, 52529, 29900, 60463, 20805, 65057, 28996, 4284, 47878, 33751, 53694, 36569, 13103, 31104, 5117, 52206, ++ 33540, 64858, 26153, 53041, 29021, 8850, 54422, 456, 17865, 35667, 14736, 44624, 4550, 37585, 52078, 40399, 1934, 45834, 8554, 60638, 31008, 55263, 13336, 48783, 4095, 18488, 25257, 51102, 28027, 64183, 35329, 13686, 27306, 54141, 11166, 29372, 60319, 34452, 22776, 5189, 53881, 28201, 2552, 45783, 8352, 31230, 38579, 60113, 9553, 54226, 33029, 8038, 57190, 10736, 46928, 63810, 13462, 53995, 15121, 63162, 39026, 51338, 21806, 41739, 53634, 32436, 46710, 9799, 57133, 19725, 48884, 36419, 4562, 51470, 27925, 56375, 47442, 37202, 26979, 13035, 55680, 5341, 57076, 10908, 30719, 7365, 36456, 3914, 38039, 64499, 5217, 50009, 13560, 30978, 6937, 55775, 10790, 50385, 44368, 60953, 8597, 39394, 27767, 34671, 43243, 22320, 8261, 64998, 46133, 32622, 16447, 39712, 27340, 47761, 33485, 5352, 31289, 63336, 2818, 35508, 11562, 14567, 40577, 1780, 58214, 21406, 7472, 24112, 35259, 13965, 47267, 17804, 43134, 33799, 51997, 10041, 34644, 50475, 26902, 16375, 55307, 32527, 49808, 27619, 56947, 23482, 64316, 19829, 43050, 28024, 36566, 21329, 6370, 50019, 17150, 63727, 12528, 26338, 41542, 55136, 1902, 14268, 58215, 43898, 51092, 13416, 29559, 56772, 10026, 24452, 58914, 38874, 7758, 32081, 40639, 21531, 60813, 30241, 36781, 58679, 7976, 54856, 17345, 6004, 29264, 60543, 2853, 48498, 54230, 14753, 62964, 8867, 19311, 35956, 25207, 40650, 6858, 58726, 36714, 27199, 8518, 62676, 44414, 19897, 65056, 41662, 10373, 23708, 54138, 61116, 40162, 18531, 2086, 23314, 48920, 4386, 47495, 11992, 29506, 51718, 15241, 30878, 7081, 54304, 10735, 33149, 43376, 270, 32381, 52731, 21466, 62197, 10468, 54897, 22070, 65316, 42054, 49763, 22889, 62646, 32311, 2298, 38169, 12451, 42916, 8806, 49307, 40098, 11789, 16109, 64318, 22453, 49949, 57578, 18019, 37998, ++ 10739, 45190, 13928, 36084, 20374, 63063, 37730, 32918, 48950, 25429, 55005, 31360, 11064, 63946, 17385, 32316, 61492, 12510, 35076, 16574, 50424, 24146, 7752, 38414, 53325, 34581, 43644, 10127, 39281, 23630, 4570, 60745, 7633, 33769, 58043, 38453, 12660, 20194, 62437, 43229, 17434, 58828, 50113, 18375, 57645, 15237, 52011, 20617, 44400, 3486, 26881, 65226, 21536, 42932, 33894, 4145, 35486, 44604, 20394, 45840, 228, 27400, 60507, 14367, 29497, 1628, 22900, 61524, 26747, 3061, 30237, 12242, 57984, 15599, 44819, 979, 19940, 53731, 9154, 48738, 28870, 45441, 17082, 42609, 62309, 50994, 28228, 55442, 26594, 18505, 34315, 59009, 22538, 62606, 37618, 19793, 65518, 27260, 15354, 33455, 21190, 64127, 16708, 59152, 1460, 57275, 31440, 11706, 41066, 23064, 56493, 972, 58472, 12520, 54739, 24740, 52026, 19419, 39955, 55843, 46783, 64258, 20172, 52256, 27923, 49405, 32228, 55721, 19248, 63274, 8805, 27175, 54999, 3491, 21725, 62402, 23355, 7076, 64684, 42489, 20113, 9287, 22432, 40089, 11644, 45336, 7316, 38425, 3104, 48602, 17738, 59179, 51974, 10864, 45732, 3421, 35776, 51349, 15950, 22847, 61499, 28635, 37154, 11141, 31782, 48866, 39922, 18614, 43475, 33222, 13925, 51826, 28176, 19679, 65370, 10401, 55454, 18333, 2152, 27698, 45228, 24597, 39805, 53353, 14372, 42055, 37146, 23411, 4835, 34836, 27499, 50949, 43211, 30599, 53967, 13840, 44069, 31967, 11631, 55975, 46990, 31233, 12521, 36366, 26114, 2926, 50970, 43411, 4942, 26861, 13728, 62970, 33934, 52785, 31399, 62531, 26482, 54857, 39234, 9165, 36408, 63607, 42326, 23656, 39911, 22147, 56355, 16411, 58617, 5394, 50950, 27963, 37787, 15519, 46168, 26654, 6871, 18571, 37573, 8395, 20431, 58724, 16477, 55255, 25731, 35825, 19022, 58291, 31833, 44452, 1873, 29819, 8329, 41320, 28310, 62139, ++ 23124, 53921, 3494, 59207, 46252, 4716, 22683, 51870, 9591, 61069, 3211, 41113, 57143, 27859, 43088, 7007, 47456, 23136, 54535, 42000, 305, 46431, 64697, 26843, 14428, 62708, 6889, 59612, 16705, 56547, 41758, 48926, 21870, 45359, 246, 24247, 46602, 51778, 1951, 30844, 37835, 7498, 24844, 36649, 29108, 42684, 5789, 48662, 35072, 61585, 13077, 38030, 999, 55958, 26035, 18966, 60001, 23100, 6532, 31499, 54784, 10311, 37028, 5197, 64048, 35733, 43841, 12953, 50738, 41388, 65106, 45617, 38340, 21745, 31342, 63434, 40815, 13989, 31812, 59931, 1710, 38470, 52601, 23261, 2757, 15242, 45091, 12135, 43113, 52340, 7997, 14980, 45524, 1190, 48855, 24696, 4681, 40424, 56656, 3690, 45990, 5625, 52834, 11074, 47007, 14521, 48944, 19129, 51656, 5823, 37190, 44578, 18652, 36487, 3597, 43006, 9849, 45705, 7348, 16775, 26387, 5545, 36660, 10133, 43702, 13153, 63920, 779, 44936, 29480, 39446, 59212, 11888, 36883, 46494, 14458, 44046, 30426, 36139, 2432, 57550, 45822, 63466, 3645, 52791, 30149, 15794, 57931, 32038, 54981, 8582, 34720, 26769, 39008, 29910, 23559, 60256, 5691, 46770, 9169, 35165, 52670, 5389, 62396, 21295, 8132, 60633, 4262, 26271, 63045, 5634, 34486, 61621, 752, 35105, 42225, 4791, 47031, 37863, 63221, 12004, 49695, 3506, 31946, 51624, 19463, 8467, 58892, 42762, 55517, 17810, 3800, 61159, 12364, 1112, 57832, 21365, 65421, 18446, 38612, 4628, 17307, 56962, 48196, 15363, 55269, 31770, 17005, 58201, 35626, 47802, 6734, 42071, 10815, 14792, 40752, 7490, 17920, 60723, 21410, 56605, 1321, 16872, 48290, 4025, 49981, 8552, 29141, 46810, 35958, 19592, 2125, 43902, 59279, 4889, 53157, 33550, 61032, 29328, 51777, 41161, 27601, 48700, 3413, 45115, 14593, 54306, 5508, 24086, 51303, 38648, 59541, 14863, 46538, 262, 47699, ++ 6375, 32370, 39547, 24833, 11554, 57430, 42779, 13446, 39854, 21136, 45421, 23497, 16283, 929, 50091, 20282, 56456, 29795, 4939, 27414, 58964, 33120, 19969, 40868, 2294, 45156, 30612, 21123, 46298, 1622, 31583, 14896, 63206, 19181, 32141, 61826, 8120, 35867, 25970, 49189, 10807, 41276, 63934, 3969, 55802, 11509, 63277, 25710, 15979, 23279, 52744, 30185, 47795, 14441, 51139, 40080, 9651, 52303, 61843, 38299, 17814, 57891, 47669, 23835, 52068, 16566, 56477, 5749, 35004, 17372, 8745, 24795, 2478, 59376, 11122, 25465, 7135, 57401, 43277, 21236, 15839, 64346, 6740, 32484, 58838, 40606, 21383, 63304, 366, 31643, 39920, 57548, 35699, 28924, 9863, 42470, 53689, 12508, 25782, 51966, 29771, 37987, 23921, 41487, 28666, 35762, 60257, 4270, 29297, 62901, 26245, 15471, 53038, 29833, 65264, 21942, 59593, 27600, 57534, 30581, 51299, 42475, 23761, 61449, 3765, 37909, 26012, 40932, 10615, 52623, 5059, 20740, 48782, 28260, 56369, 11, 60946, 11078, 53822, 17906, 29575, 7687, 33618, 37712, 18838, 59914, 26055, 49288, 12657, 24066, 44504, 14857, 63205, 2229, 58382, 43403, 13647, 31160, 65479, 27323, 48466, 17388, 39159, 25640, 55671, 34262, 15116, 47199, 54619, 22113, 50608, 16817, 45500, 24937, 49097, 16022, 28829, 51165, 23044, 6787, 41297, 21208, 57656, 10682, 26973, 46720, 63464, 28625, 12883, 25711, 45715, 32643, 23002, 39860, 49312, 27855, 47469, 2525, 52836, 25476, 61422, 42574, 23177, 5701, 59720, 28094, 7179, 45904, 11805, 30365, 19683, 53774, 24365, 59833, 21797, 57344, 32839, 45609, 2960, 43199, 24495, 33629, 59378, 27171, 62753, 19160, 37141, 60401, 12715, 24030, 63851, 47974, 14182, 31628, 23404, 40670, 11506, 47662, 864, 13880, 64008, 5191, 33324, 57268, 21957, 63217, 37200, 27980, 62305, 9226, 17592, 32995, 52743, 20609, 35043, 55918, ++ 15870, 60554, 18775, 50663, 34653, 30163, 16792, 64121, 28422, 58515, 6166, 48242, 33697, 60024, 26116, 36279, 13833, 39461, 62403, 11701, 37933, 15129, 9534, 51606, 56133, 17748, 36797, 54645, 12768, 28963, 52101, 37333, 6147, 40642, 50530, 15933, 53480, 14149, 64829, 16611, 57091, 33192, 13512, 48186, 21234, 39950, 32376, 1560, 58120, 40669, 6279, 45016, 18015, 59292, 4895, 28261, 41917, 1939, 28955, 12429, 43365, 3353, 19913, 31085, 45129, 8366, 39989, 21394, 54342, 28623, 60202, 52253, 32215, 39429, 54915, 35839, 50875, 29695, 3646, 50261, 34116, 25821, 46992, 18095, 35370, 9674, 48043, 30164, 16541, 60910, 25512, 11203, 20899, 51443, 63664, 17683, 31859, 59798, 36140, 7848, 62237, 18045, 54599, 2803, 61796, 7098, 24912, 44004, 55221, 10052, 34286, 60595, 8495, 46683, 13368, 38034, 1535, 34808, 11202, 49588, 239, 58770, 15706, 34386, 47634, 18561, 57774, 14952, 50275, 24590, 61790, 41544, 16880, 6250, 32759, 40521, 25728, 49356, 21057, 41230, 48339, 24434, 51609, 14675, 43872, 6078, 35975, 1565, 64863, 40582, 5166, 47725, 22230, 53195, 19421, 7797, 55902, 37900, 18735, 40353, 2843, 59570, 12149, 44361, 51, 50184, 27130, 36630, 2700, 40959, 11748, 38461, 8513, 56080, 11336, 59679, 32435, 12977, 54058, 33320, 60172, 15728, 35765, 61565, 44214, 840, 16493, 38678, 52099, 1702, 64716, 10016, 58408, 16913, 37751, 7943, 34370, 15178, 42001, 9575, 34900, 700, 52009, 39427, 33363, 20983, 63542, 36815, 55837, 1089, 64580, 37873, 3454, 46766, 28976, 80, 50167, 19461, 53236, 28293, 13608, 50766, 5678, 38616, 11603, 31736, 55583, 3231, 41912, 52130, 7157, 28499, 61290, 8856, 54067, 20138, 57540, 15988, 43492, 35371, 24293, 46378, 11210, 39294, 7627, 30794, 497, 46067, 12888, 42365, 56849, 3970, 25572, 61455, 10170, 26641, ++ 43312, 29484, 8631, 44514, 1152, 55732, 7464, 47315, 2069, 18439, 35183, 54036, 12181, 39102, 9157, 63402, 2596, 53066, 18704, 49094, 22318, 44367, 61242, 30086, 23812, 10897, 58632, 5050, 34201, 65034, 9100, 24655, 55374, 27714, 3594, 38858, 30253, 42273, 5595, 44963, 23502, 723, 52896, 27198, 44139, 8874, 54544, 18729, 46273, 10192, 27610, 62544, 7584, 36122, 31921, 64711, 16191, 48992, 34824, 56657, 26420, 62440, 41029, 11260, 59490, 25747, 33206, 62813, 445, 47049, 15223, 6456, 49301, 18699, 5022, 46303, 16653, 23813, 61319, 36706, 8165, 54218, 11568, 61679, 24939, 56549, 4899, 53477, 22931, 46501, 3549, 54799, 41625, 5929, 32961, 47140, 1876, 16367, 43437, 22692, 48502, 12073, 32382, 36938, 20070, 50779, 33796, 17054, 38787, 21074, 48073, 2472, 40697, 25508, 50657, 17423, 43891, 62682, 23251, 38586, 20692, 32461, 54875, 8401, 30283, 53356, 6431, 33579, 22537, 36297, 9435, 30811, 60075, 51333, 19046, 58022, 8543, 37992, 4730, 59588, 13257, 62593, 841, 55823, 21609, 61749, 31340, 46816, 20874, 33788, 56610, 29314, 11451, 36941, 32407, 49579, 24385, 908, 52188, 10626, 45249, 24756, 33007, 64568, 19483, 41815, 9692, 63793, 17842, 60126, 29326, 58025, 30685, 22504, 52330, 36302, 3850, 64413, 43152, 26142, 1446, 47689, 29752, 5708, 22636, 34138, 56036, 7357, 31068, 48052, 20592, 36388, 52664, 5024, 63755, 24204, 60082, 50055, 28990, 54721, 21958, 64006, 29671, 11121, 49725, 1970, 40928, 13229, 22552, 49142, 25811, 15533, 51320, 9749, 35872, 63887, 12634, 37420, 6428, 65381, 41281, 10257, 46310, 18166, 52920, 43722, 14976, 25691, 33991, 10003, 39627, 18302, 44769, 36629, 50189, 2892, 38539, 25454, 59670, 9607, 54553, 17327, 61605, 26394, 52874, 49832, 18263, 60145, 21361, 34606, 27064, 48449, 40238, 6841, 37022, 63529, ++ 2770, 49575, 14398, 64630, 27300, 40789, 20000, 36659, 52844, 31863, 61975, 8170, 29390, 51442, 21839, 45032, 32572, 25204, 43361, 6558, 57298, 3360, 35894, 5635, 48178, 32682, 42830, 26300, 49622, 18766, 44447, 57806, 11797, 46999, 59977, 10307, 21455, 59170, 28400, 55103, 19621, 61200, 36194, 15685, 59589, 22631, 34618, 50741, 28751, 37301, 56361, 20179, 49467, 22309, 53525, 11048, 24229, 46614, 20943, 7885, 50838, 15720, 33816, 50011, 2239, 19044, 48606, 13620, 37718, 23448, 42739, 34489, 20748, 64164, 27486, 9383, 62535, 41953, 12565, 19314, 58184, 28099, 37679, 1268, 42277, 14352, 38845, 33302, 8734, 58451, 35036, 19072, 61606, 27577, 14088, 56989, 24133, 49814, 9135, 58357, 783, 44904, 64822, 9440, 42623, 12736, 58095, 354, 64429, 13883, 50228, 30840, 19818, 61727, 6240, 56348, 31937, 14366, 53608, 6875, 64806, 12200, 45157, 24951, 60389, 16582, 41993, 65462, 2898, 46224, 56853, 1397, 35008, 12474, 44611, 23984, 15197, 64009, 33332, 26457, 39096, 9897, 35129, 28496, 42217, 10669, 50578, 13853, 60667, 8975, 18398, 41773, 62469, 4029, 46294, 16440, 42319, 61018, 33900, 57067, 21841, 53816, 14723, 7483, 30315, 57437, 23366, 31453, 44839, 7005, 20850, 48402, 1935, 43963, 6363, 20125, 46157, 17193, 9484, 19023, 56945, 38331, 13566, 48934, 65042, 18189, 40446, 24022, 60748, 15337, 41573, 6520, 26816, 45179, 31502, 10834, 43685, 19773, 6038, 38095, 13518, 44596, 16029, 46176, 19113, 60888, 24885, 52467, 34439, 8424, 44186, 32271, 59216, 39070, 17364, 43618, 25403, 55990, 34636, 15877, 30522, 61457, 26126, 57664, 29759, 949, 64821, 45432, 20580, 54735, 30354, 56870, 538, 16796, 26284, 32645, 64970, 6145, 28889, 50809, 19696, 36435, 1672, 42699, 13626, 32741, 40811, 10428, 51844, 2585, 65454, 11635, 22867, 50886, 19477, 41671, ++ 23912, 33890, 55120, 22059, 51626, 10393, 62643, 25779, 12705, 43555, 24071, 15581, 46632, 3697, 58271, 17070, 55636, 10497, 64335, 34111, 28300, 52498, 20824, 40039, 63595, 16132, 828, 62062, 22462, 41144, 2953, 35527, 17195, 33360, 23034, 43894, 51472, 2445, 34881, 9458, 39466, 29784, 6762, 47134, 3003, 62108, 12905, 4633, 64927, 14784, 2291, 42151, 12027, 38906, 623, 43569, 58337, 5397, 60832, 29984, 39401, 22591, 6906, 55219, 36290, 64870, 4706, 52873, 30506, 58381, 10499, 55640, 29288, 12002, 44231, 53291, 33060, 40, 48212, 40025, 4422, 45946, 20641, 49496, 29176, 51647, 19451, 65248, 43857, 13379, 29611, 49195, 2629, 44732, 36844, 7231, 39042, 60735, 34458, 28259, 39857, 15066, 26464, 56294, 21837, 45775, 27545, 53451, 23364, 42232, 4921, 57360, 11863, 32970, 41581, 24354, 4117, 47294, 18884, 41124, 28102, 48532, 4753, 39800, 2113, 50792, 28842, 19885, 54260, 31860, 16166, 43391, 26216, 64488, 3250, 54567, 31474, 46045, 1846, 51115, 15992, 58544, 47325, 17262, 54450, 24818, 4297, 39607, 27783, 53681, 208, 51017, 15442, 57646, 25941, 64384, 6857, 20746, 12992, 29672, 4783, 49144, 37507, 58631, 46416, 5156, 51576, 12569, 52894, 37662, 54301, 14392, 64736, 25519, 61930, 38977, 27191, 58277, 40200, 50516, 31382, 8318, 55119, 25001, 36668, 9700, 53746, 44780, 11217, 57154, 33585, 50435, 14151, 55179, 18718, 56388, 3181, 33099, 63089, 26475, 58944, 7544, 57643, 32446, 54340, 8986, 42935, 4504, 62244, 17734, 56749, 20691, 5506, 27257, 61699, 8067, 48588, 3862, 22754, 54597, 44852, 2287, 36207, 7692, 47750, 23051, 35170, 6307, 61758, 4467, 48975, 13450, 62967, 34444, 55772, 12052, 45286, 21733, 39806, 4044, 44341, 31439, 56049, 22745, 64766, 6325, 24576, 55395, 28707, 37860, 16885, 45666, 58117, 13726, 54575, 4864, ++ 58744, 12084, 38742, 5619, 37450, 30755, 3837, 49170, 56669, 5227, 38361, 65394, 27055, 41806, 36887, 5875, 30414, 40702, 1234, 15831, 46777, 8843, 57958, 12317, 25473, 50754, 38970, 13888, 52962, 8231, 60993, 25831, 53767, 1128, 64031, 7158, 26732, 47938, 18945, 62874, 46108, 12214, 51059, 31823, 41574, 26222, 38391, 48833, 23906, 34159, 60618, 30471, 55032, 24967, 63064, 33274, 18535, 36782, 13866, 53052, 1197, 63576, 46038, 27982, 14692, 42095, 27095, 44418, 7475, 17927, 47914, 1521, 60857, 41225, 3200, 22966, 14943, 56168, 31040, 24285, 64857, 13831, 55277, 6225, 62938, 10440, 36258, 2063, 27049, 50647, 6584, 38316, 15866, 64713, 21954, 54152, 29975, 18864, 5086, 55470, 20342, 52633, 31045, 3383, 49346, 6083, 40139, 8851, 32143, 59457, 28182, 37660, 22480, 54434, 10486, 49149, 36018, 29584, 60808, 3111, 55555, 35326, 21781, 63126, 37017, 12668, 44241, 7749, 39174, 11455, 62642, 21543, 49666, 18123, 38657, 47865, 5844, 19772, 55538, 42933, 22741, 31030, 2972, 65399, 8111, 37125, 58786, 16655, 45594, 25503, 38150, 23253, 35064, 30808, 9533, 39457, 28398, 50338, 36094, 63400, 42850, 16969, 1693, 27960, 20334, 39581, 16164, 42510, 1070, 24113, 32688, 35718, 9264, 41430, 15236, 53663, 290, 33809, 24271, 5107, 62698, 20510, 43602, 2401, 59346, 30359, 4138, 19952, 37546, 2751, 22304, 62586, 29851, 4, 40191, 35160, 46652, 12703, 51365, 1465, 47979, 35849, 23922, 3666, 27649, 37613, 14933, 47408, 28734, 39771, 2584, 45456, 55030, 14069, 41556, 23855, 31926, 58519, 40077, 8808, 20126, 51935, 21191, 40553, 13161, 59063, 17278, 51156, 27635, 37907, 22340, 41352, 24405, 43045, 7566, 48500, 17959, 58375, 52234, 15164, 63446, 8145, 48067, 12280, 30121, 46765, 59303, 4616, 43983, 7942, 56193, 32082, 1412, 35905, 30366, 44922, ++ 16544, 48115, 21222, 60248, 17508, 57935, 42097, 14940, 33282, 20902, 50341, 33, 55311, 19435, 14331, 60538, 48488, 22779, 51052, 59676, 26553, 32070, 18116, 42245, 54237, 7361, 28662, 47326, 19720, 31210, 38129, 13295, 48620, 39902, 18224, 37018, 56115, 14745, 40885, 4766, 24476, 58319, 17598, 53972, 20432, 7297, 56852, 16846, 43790, 8470, 51845, 17330, 3906, 47306, 15329, 9032, 55656, 40765, 25491, 44803, 17503, 32057, 11784, 58686, 24359, 9438, 56911, 20325, 61751, 40398, 25175, 36935, 16297, 49711, 35241, 58910, 38975, 18296, 51373, 9993, 43496, 34706, 16888, 32016, 44510, 21717, 57174, 47553, 17304, 60314, 23418, 56062, 26002, 51880, 10182, 46180, 12907, 63129, 41928, 11335, 47641, 6636, 38404, 61164, 18547, 35055, 62559, 15906, 47439, 17813, 7568, 52416, 45507, 694, 64195, 16296, 59095, 8007, 44436, 25840, 17883, 9510, 51744, 14091, 26577, 59677, 23449, 56186, 27080, 47126, 4462, 37608, 7258, 53061, 13527, 27966, 62045, 35760, 11750, 29178, 6794, 53383, 40343, 27216, 45077, 20387, 33107, 52386, 6609, 63906, 12067, 59517, 5863, 54320, 48195, 14342, 55448, 3207, 41077, 9983, 25349, 54783, 34580, 62091, 8938, 56288, 33663, 61331, 28474, 59359, 4632, 46835, 19205, 56665, 31093, 12204, 48160, 60493, 14135, 44978, 29001, 52563, 15004, 41016, 12477, 51063, 32879, 62060, 48697, 28134, 46945, 8647, 42622, 59669, 24773, 16318, 61792, 21088, 41255, 30224, 18103, 14424, 50841, 40515, 64750, 20380, 58431, 31465, 10552, 60117, 12944, 35059, 30078, 49464, 1609, 52249, 16597, 11405, 27825, 60293, 38311, 14283, 64408, 24918, 53817, 32087, 9238, 42733, 15783, 58054, 10664, 53001, 3589, 20928, 60125, 29641, 1917, 30952, 10218, 34797, 27189, 41449, 21065, 57871, 3121, 38345, 19193, 35622, 15697, 49027, 20192, 43005, 25088, 63921, 9478, 28169, ++ 62448, 31733, 636, 43878, 46852, 8004, 24674, 60900, 45767, 10866, 59412, 31053, 9725, 34535, 52250, 24812, 11347, 35596, 19805, 38651, 4326, 49942, 65236, 1784, 34965, 21583, 60376, 3981, 56842, 43181, 5392, 63042, 29190, 10625, 57259, 32403, 12492, 60580, 30652, 49549, 33963, 1452, 37500, 9980, 63413, 42527, 31314, 312, 58635, 39656, 26613, 45534, 35346, 57781, 27838, 51480, 30745, 2885, 64395, 10060, 57328, 37521, 42881, 3580, 49152, 32911, 38716, 12625, 31717, 3844, 64569, 14055, 52539, 25982, 7954, 30132, 63630, 5612, 26789, 60615, 2293, 53016, 27393, 59674, 3342, 39492, 24558, 8281, 41162, 32764, 11716, 42904, 4474, 31271, 40755, 120, 33737, 26765, 51018, 23595, 35405, 63832, 25368, 13314, 51560, 30278, 24244, 54989, 2078, 36400, 63455, 25956, 15246, 34631, 28584, 39406, 21179, 52752, 13635, 37443, 61944, 31489, 42322, 57105, 5693, 33967, 49079, 435, 61140, 14664, 51946, 29775, 58986, 34087, 24733, 41896, 9622, 22125, 57241, 44314, 61212, 12911, 19456, 49752, 11389, 62925, 2120, 29976, 43628, 19149, 48947, 28893, 42958, 20168, 1456, 44910, 22624, 58859, 18242, 31017, 59956, 6461, 43741, 22362, 47990, 26451, 3575, 18468, 49330, 10826, 40473, 62840, 27637, 3071, 49800, 34810, 21764, 7717, 37418, 55660, 3251, 35224, 26644, 64125, 21948, 46237, 16076, 25994, 6126, 58138, 17526, 38856, 21511, 11921, 52242, 7077, 28342, 53679, 9306, 56645, 39188, 62427, 10132, 30860, 6597, 48831, 301, 53460, 22987, 50625, 26218, 64995, 7304, 19304, 57812, 33545, 63240, 44334, 48054, 635, 31078, 49672, 4964, 46905, 2744, 38943, 63306, 33224, 1634, 45962, 31228, 35554, 64190, 47166, 14657, 37330, 62250, 42197, 54971, 49361, 60, 60918, 16380, 34057, 50357, 14459, 53983, 25977, 62993, 29545, 60782, 5783, 53511, 18472, 39717, 50220, ++ 7175, 53126, 26078, 32812, 13312, 53747, 28883, 1679, 36037, 23076, 40199, 18171, 44011, 61581, 2211, 45963, 63094, 7616, 54451, 12942, 44849, 14791, 37128, 29656, 58780, 10038, 45313, 33015, 27032, 15484, 51857, 20662, 45671, 23876, 50016, 3845, 42726, 21045, 6498, 54724, 22136, 64509, 27975, 45407, 25275, 13996, 50045, 29410, 19801, 10492, 64146, 5986, 21876, 12656, 41412, 6731, 20705, 49805, 34525, 23589, 48104, 5898, 21659, 53781, 19517, 61153, 827, 54654, 51239, 22388, 45262, 33708, 6100, 57626, 21137, 46850, 13244, 37874, 44921, 22128, 33573, 7779, 41598, 12425, 52200, 15495, 62169, 30656, 53937, 886, 64213, 35898, 20494, 59079, 17897, 48153, 57791, 3985, 15574, 59561, 2272, 16953, 43093, 33173, 1351, 58655, 10880, 39238, 49769, 20541, 44653, 3719, 42702, 57847, 9058, 50942, 1859, 33354, 58338, 10947, 46570, 1124, 20079, 29348, 45654, 10274, 17559, 40714, 19348, 32994, 42617, 22995, 10777, 56012, 951, 60474, 50391, 39681, 3742, 17671, 36667, 32357, 56542, 5281, 34566, 41951, 22933, 57398, 13325, 36260, 2694, 56240, 10390, 38769, 27383, 61855, 33350, 8206, 51815, 46103, 15604, 38590, 11838, 50856, 13792, 36485, 64151, 45326, 13186, 55091, 22768, 15686, 43413, 51392, 23754, 5518, 63686, 42662, 28041, 17985, 61178, 10255, 49432, 6921, 39146, 464, 56563, 42340, 34688, 13218, 54353, 4493, 64514, 49103, 32155, 45864, 36916, 4102, 43273, 25858, 5202, 22458, 45007, 55619, 16553, 35356, 43782, 18240, 38518, 5935, 42427, 21581, 36968, 46499, 29270, 9954, 22002, 5317, 35320, 17809, 56280, 12254, 34158, 28606, 57092, 11159, 19777, 49222, 25987, 59899, 18537, 6748, 25213, 9068, 51588, 26893, 12997, 23146, 5738, 18809, 24971, 36973, 53405, 6986, 23758, 60358, 9038, 41851, 907, 33703, 10963, 38938, 14697, 47250, 2359, 34312, ++ 15481, 41167, 11127, 65008, 35446, 19118, 39368, 63625, 16144, 54744, 6823, 47957, 28054, 13507, 37833, 31614, 17466, 42942, 29117, 62134, 23468, 55961, 6720, 24351, 47861, 16943, 37710, 11593, 64585, 34651, 9655, 58423, 2092, 36306, 16277, 65530, 25536, 52534, 38305, 11359, 43513, 15392, 52062, 3400, 35594, 60265, 5501, 53197, 33085, 47971, 23003, 54153, 32231, 61464, 37917, 59830, 44203, 16522, 4461, 62055, 13407, 35582, 60310, 28681, 8797, 29766, 46458, 16939, 35968, 9841, 56027, 28212, 39699, 11501, 43076, 1129, 53850, 19597, 56934, 10832, 49887, 58593, 22792, 48355, 26145, 37218, 5492, 45842, 19815, 27882, 48682, 14611, 52870, 8934, 62376, 28626, 21506, 44062, 37470, 31577, 45455, 57137, 9927, 54013, 46537, 14720, 43767, 5455, 29490, 55995, 13052, 61363, 31377, 19493, 46251, 23810, 62388, 27193, 5895, 40236, 22774, 49884, 54512, 15539, 64564, 35916, 58153, 30621, 53750, 8944, 63545, 2490, 45410, 20252, 37196, 32077, 15695, 26941, 63361, 48716, 23851, 398, 46426, 25164, 60328, 15024, 48014, 9365, 40882, 62189, 21939, 32703, 51289, 65054, 17435, 35642, 12433, 44168, 26660, 422, 52999, 23948, 65215, 31929, 2368, 53527, 29158, 7903, 35276, 25923, 57779, 6586, 30081, 10120, 58956, 39481, 16387, 52143, 11042, 47374, 23321, 41940, 16944, 58546, 31750, 53224, 18923, 9143, 63904, 29206, 44373, 25407, 35597, 14897, 2040, 57356, 23267, 65136, 17139, 60249, 33459, 53121, 2323, 25171, 61342, 26892, 12305, 63348, 32800, 56196, 16111, 60769, 11709, 54198, 4300, 42825, 37579, 53599, 26418, 62657, 23543, 42109, 61083, 15407, 43944, 24177, 55162, 5605, 40900, 11939, 50296, 39344, 56116, 33683, 40387, 4809, 46522, 56986, 38855, 64506, 45831, 10887, 28100, 44724, 39589, 30631, 47422, 13126, 51107, 21627, 52275, 23531, 58557, 27675, 61333, 22172, ++ 57162, 24368, 48828, 3335, 56328, 6239, 50973, 9013, 29971, 49674, 26229, 64475, 4128, 57522, 22237, 53524, 4852, 57053, 33502, 533, 41343, 18608, 44146, 61360, 2833, 53399, 22877, 55508, 126, 46495, 24946, 41616, 29744, 54328, 8620, 31638, 44762, 446, 61548, 28835, 8302, 57556, 39198, 18069, 55599, 21604, 40409, 11746, 61889, 2685, 37117, 14300, 50337, 1305, 19375, 11199, 26909, 54573, 39769, 29463, 52587, 26140, 15018, 51630, 40930, 65214, 6365, 43678, 26600, 59965, 2111, 18852, 63147, 50679, 32405, 61988, 28720, 36061, 3931, 40687, 29861, 17666, 35643, 535, 63941, 18684, 55830, 13185, 34369, 58143, 7445, 43546, 25160, 39219, 34832, 5788, 54667, 13952, 65114, 7703, 20854, 29132, 40866, 19378, 27301, 60400, 22260, 65422, 33600, 9617, 40519, 26959, 6488, 52079, 12312, 38151, 14836, 43191, 48338, 16825, 63855, 32686, 8601, 38763, 3427, 22265, 47512, 5193, 25341, 38349, 17136, 48626, 28642, 65170, 8240, 46917, 5460, 52738, 14322, 8842, 51839, 64179, 16398, 38577, 7505, 50834, 31707, 55144, 26365, 4922, 45943, 15886, 7283, 24600, 47222, 3828, 60384, 21244, 36837, 63069, 32516, 4425, 17680, 40275, 60741, 19839, 41628, 21571, 52020, 561, 38098, 47757, 65121, 36811, 18658, 45880, 32014, 1149, 57204, 33035, 4566, 54479, 29599, 45353, 11665, 27372, 36099, 49862, 22894, 1344, 51230, 10715, 30519, 59058, 38395, 20132, 10315, 31198, 49625, 13057, 47047, 18984, 37216, 41846, 8171, 51503, 45759, 3356, 28222, 9413, 47201, 909, 40426, 25008, 64169, 18621, 59653, 14681, 50936, 7046, 45815, 3689, 18968, 52599, 7974, 37268, 30219, 62083, 21809, 54280, 29200, 16225, 1055, 59191, 19472, 53670, 28432, 15606, 31787, 8362, 33138, 59003, 4209, 62412, 16992, 2102, 64137, 27374, 37504, 57516, 3672, 44282, 8444, 36498, 12620, 45490, ++ 4397, 59843, 36921, 19881, 27519, 42827, 22492, 58868, 44730, 2663, 40941, 20116, 33994, 50704, 8418, 26918, 39826, 21303, 9424, 49454, 27633, 52692, 11004, 31404, 40447, 14233, 49229, 30352, 39615, 17497, 51148, 6274, 61737, 19398, 40295, 59365, 13660, 35262, 16901, 47380, 32723, 5121, 23673, 49044, 9183, 33662, 46642, 24789, 16456, 44709, 59117, 8056, 42768, 29184, 46193, 65431, 33584, 8567, 46865, 18773, 130, 45357, 38497, 2611, 18144, 34099, 23332, 50221, 15357, 41713, 31241, 48495, 21965, 4871, 16785, 24717, 8945, 47732, 65471, 14654, 55073, 6504, 60992, 45144, 31488, 9321, 40126, 50370, 3792, 38006, 22398, 60019, 1748, 46761, 16658, 50196, 10511, 40377, 24523, 49085, 53274, 4771, 62838, 8123, 37788, 4166, 35831, 18239, 46974, 24783, 59971, 17231, 48851, 35377, 64951, 4587, 55790, 19086, 30777, 56740, 2628, 24491, 43554, 59381, 28318, 52565, 13391, 61579, 43999, 12061, 57495, 34713, 13923, 40271, 54781, 18509, 58362, 30310, 35306, 41403, 28374, 10517, 33835, 54022, 29379, 21358, 1163, 37416, 18043, 52882, 34362, 61089, 41529, 56965, 13565, 50046, 29465, 53982, 6167, 19283, 42015, 47554, 56691, 25085, 10441, 46661, 5790, 62524, 31582, 44434, 17303, 33278, 20560, 2174, 54689, 12709, 62332, 26353, 38603, 19628, 65496, 36420, 1828, 20930, 62882, 5331, 60013, 13934, 40827, 61314, 19526, 39732, 52900, 8049, 26704, 48281, 54790, 40621, 607, 36223, 27332, 64254, 11460, 59503, 21317, 34080, 14224, 57553, 41408, 52879, 24258, 33793, 50288, 13568, 31292, 45268, 2159, 32598, 10861, 39531, 29700, 58841, 35777, 27395, 64024, 194, 45044, 14444, 36319, 3139, 43346, 65525, 23628, 44586, 11310, 36073, 2482, 61413, 43651, 20492, 51243, 14139, 40495, 22263, 35151, 56487, 19812, 45369, 7353, 17682, 41008, 30881, 64544, 20556, 55514, 31293, ++ 42240, 17116, 9811, 61902, 52431, 12375, 34752, 17780, 32454, 14113, 55840, 10079, 47036, 15398, 42482, 59175, 12532, 64896, 47228, 16464, 60069, 5774, 36119, 20416, 64105, 28167, 4724, 62362, 7919, 59777, 21815, 33866, 14461, 47651, 4376, 22673, 50644, 27235, 55866, 20006, 59016, 36712, 62497, 30390, 13380, 65119, 1057, 56572, 35947, 28301, 18344, 52381, 25648, 56139, 15542, 36394, 2032, 57038, 22459, 58823, 31590, 62982, 10636, 56332, 47599, 13123, 57939, 4274, 62577, 10987, 53412, 8420, 37298, 45674, 55462, 42209, 59809, 15774, 23664, 38624, 46215, 20311, 28382, 11345, 42655, 57704, 23872, 28989, 61261, 10697, 54400, 30232, 12642, 32421, 56498, 23235, 61513, 30510, 1086, 36182, 12359, 32249, 47972, 22941, 56602, 41848, 51318, 11589, 53131, 1006, 37302, 54184, 2772, 21577, 30143, 42018, 25232, 53705, 7634, 36586, 27750, 51258, 18358, 11765, 35121, 7124, 41350, 20792, 1734, 55367, 27506, 6472, 51481, 3987, 25588, 43766, 23272, 1615, 45700, 20546, 59163, 42769, 18723, 4157, 58181, 44472, 64745, 14533, 59728, 28117, 11068, 23687, 721, 31303, 37672, 9102, 40668, 15136, 48736, 57920, 11361, 28240, 8443, 35521, 58250, 29874, 39065, 14870, 55939, 11574, 60320, 8637, 53287, 28605, 41721, 24823, 7251, 50203, 13400, 46547, 9002, 51460, 25263, 40062, 48225, 33689, 23689, 43922, 7511, 32404, 55874, 3542, 17914, 42950, 63215, 5647, 24126, 15485, 58329, 8721, 44127, 6214, 48464, 29902, 1239, 54571, 23750, 36560, 19960, 5010, 62856, 16839, 59005, 36036, 8370, 51759, 22622, 41085, 55676, 16356, 48388, 20881, 9667, 41614, 22465, 51356, 17601, 47896, 8504, 60821, 27867, 9862, 48640, 32410, 17440, 62849, 49594, 22010, 12137, 55620, 1368, 26499, 48351, 52541, 5416, 42524, 10037, 32929, 61777, 25361, 54339, 11893, 46607, 187, 50584, 6539, ++ 26406, 51732, 29713, 38563, 1052, 46148, 57743, 4664, 62837, 38032, 23857, 60169, 37180, 30223, 1323, 32771, 45275, 25149, 3189, 38227, 34248, 25879, 56734, 46172, 8699, 41932, 19146, 43597, 26410, 36653, 11977, 56476, 43985, 27827, 62784, 37882, 7419, 45939, 2805, 42019, 12701, 26460, 1899, 44299, 51550, 19240, 27471, 43105, 6443, 63835, 31700, 3714, 39326, 21178, 5210, 49313, 24455, 42352, 12374, 50564, 7335, 24770, 43259, 21258, 30866, 39945, 25549, 37976, 20573, 35132, 24131, 59045, 27552, 14325, 34407, 2767, 30815, 52090, 7245, 32653, 1782, 63377, 50930, 16337, 53526, 2541, 20779, 47316, 15192, 41468, 18273, 51202, 26392, 63568, 3058, 36998, 19565, 44551, 55752, 17459, 61016, 38900, 13754, 50023, 26131, 16158, 6895, 28805, 32610, 58410, 14240, 23471, 47717, 56903, 11313, 60497, 91, 34427, 45324, 12945, 60991, 4373, 47884, 55109, 25735, 62795, 31727, 50595, 36731, 23676, 45004, 60707, 21248, 31261, 59769, 11238, 38903, 62732, 12597, 55035, 2755, 25797, 61700, 39998, 12284, 24288, 8697, 46982, 39265, 3545, 49376, 43937, 54588, 18894, 58481, 25621, 64094, 1885, 30493, 23004, 38226, 55278, 14551, 49495, 1251, 16673, 50492, 24650, 3997, 48618, 26891, 39993, 13902, 61514, 4319, 56338, 44045, 30588, 59713, 22397, 34472, 15503, 60920, 8157, 14672, 57450, 2612, 54980, 37820, 15821, 27716, 47704, 60513, 33837, 13663, 45126, 34517, 61537, 29412, 51895, 22139, 32576, 15778, 56909, 40058, 18546, 65476, 6881, 49781, 30706, 38918, 27485, 3101, 21049, 57034, 28389, 61802, 6346, 25603, 65124, 1421, 33428, 50437, 6010, 57462, 12864, 26774, 58470, 32786, 20317, 51883, 38383, 5214, 57768, 30453, 7272, 24706, 41569, 47617, 29789, 38103, 57374, 18363, 11547, 29074, 54702, 22996, 49933, 2928, 47903, 16162, 34904, 28772, 39968, 18977, 33552, ++ 63349, 13609, 47500, 23682, 15798, 31536, 25414, 48405, 11468, 53231, 28585, 5495, 16765, 52024, 63768, 20635, 7048, 55040, 30837, 51697, 13679, 63285, 1101, 15698, 50540, 35137, 57712, 13134, 54818, 3317, 48376, 30880, 1380, 53011, 11099, 18431, 57854, 24586, 63640, 10251, 53493, 48305, 16050, 40965, 7665, 37725, 54432, 15098, 47529, 10910, 41114, 57493, 13159, 62715, 31005, 53285, 9919, 63494, 32709, 17111, 36987, 54095, 4822, 59617, 8075, 63947, 1590, 54985, 44625, 7042, 47232, 315, 39059, 64314, 20018, 49101, 11892, 26455, 44149, 54543, 25408, 37540, 4746, 34044, 24882, 36638, 64604, 6890, 44291, 33501, 62781, 5143, 45316, 15975, 42310, 8373, 52351, 25695, 6755, 46048, 27790, 2705, 59332, 34253, 275, 64283, 45098, 62005, 19934, 41398, 8428, 43479, 31813, 7293, 39690, 17987, 50349, 10113, 63259, 20591, 41672, 33731, 14542, 39488, 657, 45874, 16476, 8076, 64838, 15264, 33386, 9988, 42269, 36229, 16069, 50138, 7018, 48171, 32614, 22000, 37744, 47472, 6389, 30688, 49970, 34899, 56030, 17158, 30236, 63608, 20004, 32174, 11781, 40068, 5201, 52525, 20668, 34847, 45517, 61685, 2999, 33741, 44619, 22064, 63566, 43257, 7401, 59108, 37132, 18107, 42910, 23122, 50765, 34067, 21425, 35895, 16742, 2806, 41129, 5849, 57948, 44524, 28358, 52732, 31018, 18347, 47103, 12376, 24556, 65242, 45587, 9652, 22586, 968, 28693, 50776, 19184, 3880, 42169, 12164, 55424, 38225, 50478, 4704, 28827, 46368, 10455, 43433, 15303, 62021, 11073, 44455, 55203, 48711, 12071, 38035, 15123, 47030, 19575, 36489, 54683, 13815, 60571, 38230, 30724, 46130, 34879, 4323, 40202, 55441, 12552, 22684, 42291, 13741, 52456, 37643, 60435, 3869, 34524, 9523, 65238, 6598, 32302, 63115, 36741, 15299, 59741, 31492, 13822, 36157, 58898, 5123, 62704, 10553, 56769, 43439, ++ 2039, 54940, 7547, 40612, 54014, 61034, 8673, 41578, 21485, 413, 43157, 62295, 25666, 39142, 11720, 48593, 36537, 18021, 58115, 10422, 43798, 22110, 39272, 53966, 24719, 2358, 32226, 52217, 23547, 38797, 63517, 16416, 25345, 35836, 49400, 32898, 41432, 14928, 34217, 39529, 20593, 31177, 56999, 22520, 59935, 29865, 4415, 61148, 20876, 50983, 23465, 9371, 48548, 35041, 17654, 43908, 27331, 38286, 3138, 61307, 14115, 41521, 34739, 48916, 16221, 28440, 51877, 12119, 27174, 60743, 17696, 56575, 12809, 51132, 5943, 61374, 41058, 57879, 21007, 9636, 59290, 13481, 42003, 56438, 8451, 49336, 12154, 27316, 52580, 1232, 21357, 39600, 9785, 49600, 28296, 58880, 11915, 34007, 64555, 22042, 57505, 42837, 18944, 10673, 55246, 30676, 21338, 38659, 3239, 50550, 28002, 63983, 16605, 62714, 29114, 44363, 22151, 38542, 26715, 52889, 9313, 29719, 57713, 21451, 60092, 24079, 52175, 30173, 39944, 3080, 49274, 18990, 62141, 173, 54168, 26703, 34307, 17359, 56923, 7876, 64977, 15408, 53245, 19596, 43367, 1954, 27088, 52114, 5772, 35924, 7952, 46477, 62364, 16725, 28677, 43165, 6982, 56112, 16246, 9832, 51493, 18554, 59763, 5424, 27104, 32968, 54255, 20764, 30389, 64443, 1549, 57403, 6046, 44900, 9763, 49031, 29072, 64271, 18823, 53882, 24094, 765, 38159, 10457, 43041, 63380, 36984, 29790, 51947, 4820, 20684, 35803, 53903, 41499, 62291, 10932, 56250, 39010, 20835, 63506, 2495, 17413, 24638, 61105, 13421, 34822, 58671, 32023, 52360, 1729, 47715, 23152, 7589, 30275, 43125, 375, 58314, 26942, 44080, 8959, 28781, 42580, 24537, 17081, 2405, 53315, 10560, 64764, 18774, 1785, 31462, 47081, 63577, 26206, 414, 45509, 14844, 54097, 17834, 50519, 21434, 43137, 24265, 46878, 589, 39189, 6104, 43826, 65095, 26954, 20985, 52831, 24619, 49478, 15083, 23162, ++ 37376, 21786, 65334, 33093, 3085, 20351, 36361, 64180, 29345, 51261, 35323, 13093, 44542, 3625, 54149, 22924, 61092, 1965, 41050, 28795, 19363, 48141, 7536, 33155, 12207, 60867, 17852, 9343, 46738, 6846, 20219, 42325, 60702, 5473, 21356, 8971, 55340, 892, 29339, 50905, 6082, 64247, 4030, 34937, 11456, 49507, 24260, 32607, 38731, 2166, 34019, 59682, 26386, 529, 60786, 6977, 55363, 19688, 48248, 23125, 55784, 27645, 20095, 11393, 46131, 36632, 18601, 32303, 49568, 15577, 33473, 30204, 43440, 23219, 29091, 36374, 17980, 761, 33272, 48785, 17267, 47029, 29387, 18980, 61853, 31931, 38764, 59149, 17045, 30875, 57321, 24295, 55334, 32908, 18570, 37647, 47476, 3662, 41304, 15025, 9297, 29803, 52134, 25106, 43940, 8652, 48539, 12105, 57211, 15634, 36294, 5008, 45930, 34121, 13724, 55324, 5514, 58809, 15883, 1511, 65344, 49200, 6219, 37267, 11170, 42781, 4824, 56637, 18065, 58756, 29215, 52967, 24347, 46270, 13106, 64301, 43159, 3462, 29073, 50959, 27710, 35611, 10180, 57573, 22489, 63028, 13810, 40561, 58926, 25235, 55679, 23101, 2511, 50464, 59395, 33992, 13992, 48300, 27621, 42416, 25865, 39683, 12248, 52317, 37756, 13463, 2726, 45820, 10719, 34694, 52806, 15971, 32192, 63440, 27255, 58694, 14492, 37230, 47534, 11242, 32588, 50682, 20313, 56141, 3955, 21702, 6391, 59538, 17107, 50145, 31566, 57876, 6737, 25110, 37392, 16666, 32776, 7385, 47501, 26071, 35449, 58002, 44688, 7766, 53540, 25649, 2835, 21699, 37467, 26583, 19136, 59925, 34990, 17249, 64640, 33065, 20455, 53064, 3498, 62462, 49338, 4731, 56506, 32165, 63067, 21610, 42998, 25416, 49748, 36918, 59536, 7640, 16709, 35392, 56714, 19995, 33377, 27512, 58733, 2807, 35799, 16071, 53294, 12751, 58151, 25830, 51421, 19333, 9399, 40676, 1489, 42872, 7860, 38188, 27943, 59584, ++ 9200, 48668, 18256, 44178, 28282, 50012, 14743, 6049, 45610, 18499, 7389, 58395, 19586, 56581, 27784, 34893, 14624, 49736, 24158, 62513, 5091, 56201, 29948, 59445, 44972, 27436, 37602, 65156, 29055, 33604, 58901, 9889, 28457, 53905, 45096, 64961, 23354, 47091, 60965, 17178, 42869, 25781, 46360, 18803, 52676, 40027, 8739, 55276, 13733, 45631, 53797, 16244, 41894, 22187, 47035, 28914, 11846, 40584, 5690, 45055, 30099, 1019, 64780, 33118, 53156, 3323, 61869, 40654, 5301, 42638, 63681, 9235, 54263, 4034, 46568, 10318, 52910, 39298, 62654, 5431, 35481, 60430, 3163, 51736, 21889, 14417, 46381, 4299, 35189, 48453, 13576, 36316, 6308, 64098, 458, 53773, 14285, 60166, 26849, 50880, 35552, 39757, 5398, 63389, 37078, 17701, 52773, 34898, 26555, 59219, 22451, 54541, 25550, 1964, 49389, 36872, 23932, 32926, 46885, 40597, 31307, 23148, 44554, 19521, 64019, 32332, 26266, 47043, 12406, 35566, 6058, 41146, 9099, 38196, 30767, 5079, 20044, 61264, 40997, 14081, 44817, 642, 48906, 37069, 4776, 32816, 45448, 9751, 20918, 42570, 13144, 51644, 36924, 26818, 10258, 21666, 65340, 36387, 4139, 62686, 7582, 57141, 30845, 23783, 64813, 47396, 28787, 40802, 61131, 8142, 25555, 46934, 19743, 12058, 39336, 112, 52413, 7937, 25723, 61851, 40381, 13098, 64824, 25947, 35312, 49251, 27073, 41762, 247, 39523, 11340, 43547, 15134, 49016, 3066, 59823, 46207, 27893, 54098, 13897, 42815, 9783, 30429, 39456, 20245, 64047, 41128, 49272, 9070, 55915, 41939, 12761, 40227, 53877, 5458, 49960, 10395, 39302, 34401, 13270, 22944, 39983, 11581, 47532, 15501, 39006, 6487, 54005, 14755, 24061, 44350, 28841, 50860, 5872, 40747, 10321, 63908, 8017, 39841, 25254, 45087, 60067, 4744, 41195, 28361, 8578, 55297, 34376, 49227, 30058, 60957, 18086, 32621, 57686, 4038, 45105, ++ 30433, 35556, 5264, 56033, 12005, 59317, 40075, 56990, 23288, 54669, 47817, 31133, 33779, 9516, 42140, 6358, 46026, 31848, 8150, 51001, 37036, 16060, 42692, 3552, 21028, 48991, 6008, 41175, 655, 51410, 15155, 49913, 36991, 13496, 31452, 3678, 38411, 12120, 27571, 7025, 54912, 9722, 36439, 62031, 74, 17756, 63195, 26961, 58460, 19951, 29512, 7481, 64644, 37442, 14716, 58239, 35782, 52152, 15871, 59333, 9540, 51050, 42172, 6625, 25014, 57122, 22720, 10112, 58437, 21617, 2281, 38198, 19216, 57391, 32047, 64728, 22241, 13912, 27717, 42997, 23084, 11029, 39821, 26579, 43778, 204, 55553, 22676, 65165, 8820, 42772, 60688, 20075, 40259, 29587, 22823, 43301, 31781, 20288, 1833, 54822, 16472, 49249, 31459, 3877, 60644, 24087, 1598, 47339, 6168, 40038, 10375, 51821, 20223, 57936, 8814, 62173, 3500, 51635, 18787, 10711, 56140, 16251, 54066, 2212, 50027, 9654, 37901, 53592, 22667, 63712, 14721, 55774, 21715, 57967, 36838, 52356, 24991, 10829, 56215, 23539, 59981, 18286, 26189, 60586, 16154, 53825, 31175, 48542, 92, 64538, 33292, 15346, 44999, 38968, 47050, 1038, 24191, 53463, 31786, 20134, 46375, 235, 41416, 9212, 19404, 55709, 15395, 23417, 49683, 38808, 3453, 59860, 42177, 55161, 22687, 45471, 30864, 55607, 3703, 17758, 42471, 6855, 46342, 16216, 54562, 11951, 61652, 33346, 56786, 26422, 63792, 19871, 30682, 51580, 21997, 9227, 64866, 1526, 31871, 49881, 22805, 62668, 152, 48130, 11826, 31339, 17611, 60438, 5819, 29075, 63694, 3896, 25256, 45061, 23486, 61238, 17895, 57605, 31576, 51483, 18473, 60006, 7792, 44777, 29445, 61158, 33943, 9336, 62369, 3369, 56225, 21207, 61657, 30003, 48957, 23461, 46411, 13195, 52077, 29395, 10729, 33960, 20366, 61281, 45948, 17214, 3791, 22664, 56283, 11249, 51904, 46286, 12966, 62050, 16810, ++ 53385, 14256, 60620, 38899, 26725, 1592, 17351, 32185, 10713, 27159, 2437, 15201, 61716, 50211, 21664, 65461, 17250, 59054, 40106, 12856, 26746, 64667, 23229, 35492, 61867, 11284, 55231, 22485, 57180, 18648, 45583, 24057, 2222, 58220, 43054, 15907, 52305, 34687, 59532, 44623, 32116, 22911, 57973, 14341, 28710, 45171, 42305, 5776, 36806, 3266, 43455, 51788, 32329, 4602, 50171, 23887, 1696, 64196, 25428, 34197, 39098, 21922, 17383, 60572, 37745, 13740, 44075, 29675, 35712, 52504, 26277, 48284, 40292, 16441, 6864, 25787, 44760, 55691, 8187, 50535, 65007, 31294, 54056, 7524, 58249, 30060, 40728, 10237, 25625, 53237, 2382, 26995, 47806, 11444, 51812, 4853, 57978, 7912, 63040, 46908, 23710, 61819, 11198, 21006, 46419, 14558, 42435, 33123, 65171, 13410, 44798, 61556, 30912, 41106, 12571, 27536, 42969, 15008, 28686, 59874, 35854, 3926, 46472, 28950, 34874, 14308, 62339, 20385, 1191, 44262, 28072, 47658, 32937, 2307, 49576, 17854, 8534, 46751, 31819, 39201, 5980, 33539, 42231, 11703, 51347, 38463, 3117, 61515, 17797, 38013, 28980, 6656, 60760, 4482, 55193, 18192, 57607, 41035, 12756, 49847, 10941, 35187, 60466, 17217, 50917, 36048, 4755, 63145, 33429, 12593, 56955, 36277, 29721, 8905, 17513, 34979, 5066, 60379, 19990, 33923, 49631, 29949, 59260, 31957, 39079, 2315, 44860, 14334, 23087, 8585, 46793, 2119, 38067, 58836, 12666, 35080, 40986, 23863, 18051, 60582, 5532, 52533, 14810, 37757, 27182, 51287, 4263, 45544, 24427, 34171, 46576, 15911, 32297, 56728, 14506, 36775, 29793, 7238, 46214, 1098, 63848, 28018, 35261, 26291, 55870, 681, 19234, 46664, 27309, 41141, 18070, 34659, 11005, 43805, 15730, 54889, 4387, 36385, 62115, 978, 55942, 18955, 64376, 48708, 1953, 23890, 38517, 63598, 44425, 14633, 37135, 2695, 29192, 21319, 39541, 25942, ++ 704, 42041, 24034, 10160, 46690, 34071, 52735, 43619, 64844, 39478, 59697, 41249, 24433, 881, 38332, 11161, 52960, 3998, 29433, 55562, 238, 46943, 10151, 51964, 17021, 39724, 31728, 13917, 43387, 34388, 4916, 63974, 40209, 26169, 19745, 62159, 30023, 18119, 1747, 38959, 13220, 48746, 3045, 33544, 50463, 21804, 12322, 30913, 48027, 61697, 25113, 12804, 19048, 54793, 39869, 10772, 44360, 31458, 4070, 49637, 12591, 57753, 45593, 8877, 30617, 47837, 593, 50322, 14772, 7662, 62213, 13306, 24329, 59509, 34884, 50004, 2942, 37064, 30381, 19543, 1505, 16070, 45526, 12895, 34728, 18150, 62464, 49724, 37843, 19307, 45668, 15618, 34448, 62082, 17361, 44869, 24962, 38441, 12990, 33648, 6401, 29012, 41026, 58134, 27423, 56109, 7454, 53859, 19624, 38014, 28379, 18562, 4080, 60276, 32442, 53390, 21030, 37827, 48698, 7055, 25924, 61766, 39127, 8334, 59174, 41766, 27334, 48363, 33911, 57305, 4294, 16633, 60967, 26409, 11929, 40471, 59348, 1333, 62474, 16821, 54497, 21072, 63874, 29585, 7233, 24039, 44200, 27871, 8481, 57989, 41674, 19353, 53136, 24741, 30070, 8774, 32760, 16911, 61275, 29090, 44114, 22649, 54824, 32379, 6533, 58533, 26179, 42612, 836, 53704, 20972, 6721, 51292, 24343, 62231, 48443, 37949, 15202, 43300, 9903, 53099, 1479, 22130, 9394, 62499, 19323, 58644, 28921, 53474, 36544, 17562, 55275, 29327, 5981, 44417, 53037, 4590, 57145, 45687, 28467, 40341, 19556, 33626, 59407, 16422, 56314, 36288, 61642, 14025, 54448, 757, 51674, 38587, 9514, 47926, 2259, 52450, 42313, 21474, 37717, 10061, 43563, 2957, 48871, 20697, 37397, 58110, 5104, 52675, 13320, 48251, 39273, 25887, 53119, 2154, 38645, 28211, 17047, 42081, 22496, 31359, 43503, 7050, 37558, 30551, 56931, 13396, 32139, 6726, 48216, 25047, 64754, 35250, 54513, 5973, 50865, ++ 63972, 31710, 58253, 51526, 20920, 63164, 7793, 24872, 4229, 16408, 35744, 8338, 52312, 29776, 47453, 26310, 43420, 35917, 18985, 44374, 33412, 14380, 58528, 30686, 4431, 26013, 47583, 62957, 8357, 27115, 53175, 32479, 10924, 56166, 7844, 47791, 5730, 56806, 51637, 20958, 64765, 26662, 41247, 60500, 7939, 38069, 53097, 65294, 16927, 9860, 35495, 57276, 45907, 8404, 28019, 62325, 17958, 56541, 20613, 61626, 29135, 2439, 35248, 26723, 55202, 19439, 65358, 23819, 55907, 44964, 28163, 33921, 53714, 1090, 41553, 12395, 61056, 15171, 58664, 40548, 56756, 38049, 24587, 63268, 48066, 5237, 28017, 14905, 6036, 59585, 29153, 57010, 7362, 31121, 54470, 35781, 10334, 48664, 56705, 18118, 44215, 53432, 767, 34530, 12486, 39032, 30112, 23094, 9491, 50103, 512, 56393, 35710, 17105, 6776, 48026, 916, 64600, 11523, 54808, 43695, 13477, 24547, 51087, 22216, 17255, 6661, 60357, 10476, 24864, 38999, 51244, 7716, 45291, 65534, 30041, 22341, 43900, 27405, 36009, 9417, 50334, 2430, 46111, 56629, 34213, 12825, 54772, 49640, 22719, 10892, 47639, 35484, 12111, 62867, 43458, 49078, 5930, 37432, 2317, 63819, 14945, 3733, 27934, 43648, 21784, 48796, 18353, 31161, 45114, 28190, 65012, 13696, 43813, 1967, 26606, 11641, 51688, 27998, 63009, 23495, 36686, 57033, 27611, 48673, 34337, 7728, 41063, 5009, 64690, 24245, 42288, 13358, 62787, 18761, 25551, 34017, 14539, 36846, 11188, 55086, 3514, 46872, 8496, 43058, 6507, 22405, 10093, 39812, 28085, 58177, 18390, 22180, 63136, 27695, 59132, 12479, 25957, 65277, 16074, 54326, 23738, 59334, 14168, 52061, 12171, 41969, 31141, 22210, 64270, 1217, 57316, 8305, 31933, 19599, 65011, 32615, 57966, 8948, 49802, 13979, 54245, 27211, 15448, 50944, 9644, 41760, 53074, 26603, 58483, 16270, 42400, 8871, 19501, 47153, 11688, ++ 34470, 7120, 18870, 3411, 29905, 12732, 37664, 49544, 30622, 55644, 22305, 46390, 18726, 63006, 13561, 57654, 5602, 23771, 60367, 9057, 53569, 38474, 19928, 49358, 42056, 54405, 1692, 20686, 36185, 60437, 12723, 39158, 17553, 48862, 35194, 22257, 40665, 25054, 11639, 37315, 4684, 54038, 15741, 19536, 56078, 25474, 1582, 34419, 23216, 40773, 49252, 919, 30326, 60247, 21524, 33748, 42614, 13515, 46541, 7914, 40255, 53960, 15238, 63239, 4371, 41302, 31762, 6177, 37172, 18312, 3496, 47119, 10789, 63030, 20193, 28558, 46092, 25081, 33086, 6277, 21514, 52666, 3716, 32359, 20473, 54911, 36959, 51541, 33317, 41234, 3510, 50605, 24002, 42164, 1410, 21186, 65313, 28129, 2880, 59768, 36710, 15470, 64734, 19227, 51475, 2593, 61104, 43348, 62944, 31964, 46203, 24378, 50961, 42107, 63549, 25065, 45557, 17627, 34598, 30559, 19737, 57562, 33275, 329, 45204, 55415, 31538, 18582, 43381, 63043, 13762, 29537, 42463, 19331, 35215, 5681, 51702, 13494, 55523, 4363, 47378, 30920, 41219, 14373, 19790, 65235, 39516, 5302, 35085, 25984, 63351, 3376, 31515, 56272, 1767, 22186, 26515, 58824, 19065, 54069, 25402, 39418, 48117, 62135, 14148, 52648, 11189, 38454, 60898, 9495, 40144, 17000, 47811, 31525, 56598, 32806, 63984, 21321, 3054, 39711, 14013, 47235, 12506, 43721, 4365, 25035, 52267, 21078, 51039, 31253, 9973, 48489, 33079, 50374, 39975, 547, 61022, 47987, 6966, 63912, 26651, 35946, 57690, 23573, 29528, 65033, 33231, 52810, 43958, 20743, 8029, 42656, 50672, 6152, 35398, 20028, 40834, 55371, 4191, 33836, 50559, 6715, 40585, 32859, 8596, 63490, 24797, 50102, 16279, 33103, 36612, 23200, 60696, 42506, 50319, 12335, 6340, 47838, 20885, 60896, 35103, 5303, 39422, 47286, 21830, 62548, 18555, 291, 33661, 40182, 4879, 60330, 30972, 55738, 27284, 41353, ++ 22522, 49087, 44755, 36834, 55283, 48065, 19792, 62143, 13886, 45049, 5031, 60738, 32962, 3002, 37446, 20462, 32529, 64152, 15451, 40798, 21910, 2708, 63713, 7154, 28575, 14994, 57897, 24850, 50444, 3076, 46402, 21956, 59867, 296, 30533, 63314, 10349, 43634, 61406, 29010, 45812, 9265, 31617, 47418, 5296, 43801, 59266, 15208, 56698, 6680, 20453, 63775, 14404, 38521, 2811, 52777, 6297, 27063, 36556, 51447, 18831, 24387, 43608, 10540, 52284, 16832, 46680, 58853, 11733, 60394, 39440, 51344, 21364, 31507, 43830, 51941, 4503, 54752, 9925, 49232, 47383, 11595, 59986, 41673, 9133, 44612, 1898, 23441, 11995, 61384, 21655, 39011, 13279, 58718, 16289, 49962, 5752, 32522, 41803, 22497, 9043, 45712, 24680, 40485, 10019, 47880, 16780, 35247, 4734, 15130, 58520, 11832, 7992, 21975, 33804, 14131, 38799, 56960, 5705, 52015, 2545, 40905, 12245, 64329, 36097, 5319, 40065, 52724, 2851, 21174, 54429, 34480, 789, 57031, 15168, 53915, 32418, 20647, 39866, 63212, 24577, 15775, 58706, 28280, 48273, 1502, 21464, 59537, 14846, 45790, 16535, 50814, 39775, 20348, 42077, 52174, 9551, 38649, 45351, 8020, 51192, 21080, 10124, 37002, 1649, 34155, 59444, 5238, 24848, 54460, 2551, 58194, 37353, 5658, 19153, 40656, 7303, 46036, 58429, 35517, 54024, 6110, 60823, 17324, 38409, 64357, 10848, 45886, 34902, 15557, 60078, 3422, 21637, 8266, 27504, 55986, 16131, 30183, 42459, 21446, 49457, 17007, 12526, 44901, 1158, 50160, 13191, 26171, 2598, 64359, 37127, 11530, 31058, 44617, 16619, 60785, 8254, 30587, 47219, 11296, 29138, 45337, 16863, 61566, 29939, 35925, 3751, 54590, 7188, 58858, 11705, 45670, 4674, 14370, 24613, 59417, 44174, 26748, 40105, 3050, 25622, 63384, 11771, 59119, 3524, 29616, 36483, 59930, 20173, 50439, 12243, 22204, 45311, 1219, 62912, 15326, ++ 57126, 9686, 61441, 25281, 16016, 5833, 41801, 115, 34681, 53796, 28485, 12186, 42318, 56309, 9977, 54531, 44883, 1516, 48689, 27324, 56929, 45494, 25440, 34772, 61436, 9638, 38037, 44587, 16546, 29760, 55665, 7314, 28115, 44224, 54672, 15352, 50181, 2604, 33286, 14621, 57579, 39753, 24398, 62399, 36087, 30114, 10653, 46830, 28192, 51357, 33194, 26253, 44567, 24170, 55984, 16559, 47499, 65147, 230, 32101, 60023, 5857, 49021, 32895, 22427, 38880, 8259, 20860, 29426, 42483, 15809, 27022, 5161, 56279, 8558, 17090, 38535, 18539, 64167, 27221, 14127, 34279, 28818, 17560, 57480, 26193, 64396, 43121, 55925, 16812, 32037, 63811, 9436, 36475, 46283, 25868, 39479, 52982, 13682, 50728, 30782, 55624, 4206, 58830, 32792, 26346, 52474, 21551, 54935, 25833, 39425, 29330, 44182, 55576, 3017, 59520, 9225, 23346, 42528, 27948, 62848, 21648, 50498, 26608, 15747, 58230, 13003, 28437, 46625, 37386, 9263, 59663, 23382, 48563, 25457, 38598, 3174, 49136, 11352, 18642, 52693, 37797, 6546, 62127, 10070, 36337, 51860, 29839, 42708, 941, 57210, 27302, 7356, 60199, 14261, 33503, 64723, 15586, 30308, 34459, 13046, 58076, 29596, 56443, 41975, 26865, 16310, 50101, 35628, 13162, 46483, 22274, 10384, 61580, 50380, 15638, 54887, 25389, 16560, 8661, 29453, 41411, 24660, 55803, 14964, 30373, 58038, 18456, 1120, 56423, 26221, 38667, 61890, 54211, 11608, 43883, 20406, 52010, 10215, 59155, 1943, 41319, 62205, 32494, 54770, 19822, 38284, 59751, 15545, 30094, 48637, 23995, 61860, 1496, 53623, 24759, 49158, 14911, 36332, 62168, 19482, 57217, 25547, 1745, 48008, 17743, 46001, 38142, 28449, 44506, 20258, 51731, 27770, 55216, 37798, 137, 34284, 15022, 57103, 51549, 17946, 44807, 32975, 23276, 42962, 54797, 7745, 45741, 10416, 28476, 53886, 37871, 17391, 52398, 36243, 4560, ++ 28905, 38439, 2139, 32708, 43091, 58989, 27873, 57406, 21133, 8965, 38752, 48924, 17839, 25771, 50409, 16195, 28153, 51485, 11855, 36470, 6099, 32073, 12484, 52617, 18180, 46110, 31270, 5280, 64557, 40891, 10583, 37214, 52463, 19087, 4295, 26890, 36349, 59077, 23698, 51176, 20120, 709, 49667, 11234, 17377, 52190, 21416, 39046, 3767, 41787, 12078, 54209, 4904, 48634, 10232, 39413, 31096, 22796, 54612, 11321, 38168, 27906, 62624, 1354, 56904, 26090, 64437, 34383, 54359, 1808, 57561, 36480, 63889, 23493, 35363, 61592, 29295, 42286, 2171, 36239, 43331, 62130, 640, 50115, 15443, 35856, 10748, 29843, 7978, 47221, 831, 43901, 27753, 55181, 4392, 62621, 10960, 19462, 60504, 27183, 6969, 38083, 20653, 49491, 14003, 64161, 5854, 41648, 11060, 49003, 2244, 64900, 16030, 37351, 26788, 49634, 29959, 53904, 15402, 47468, 9940, 37101, 7553, 44072, 30848, 49024, 23609, 65064, 11592, 32021, 50225, 17536, 41534, 6877, 64067, 10242, 58394, 28804, 60484, 34048, 344, 43244, 23132, 33010, 53504, 25321, 17067, 8169, 61805, 32495, 13453, 37623, 44693, 23898, 47941, 4993, 27677, 50299, 571, 59884, 40477, 4612, 46082, 17929, 7200, 65437, 44276, 30530, 20386, 62567, 27452, 52071, 34520, 23944, 29221, 41905, 1004, 33537, 44401, 65289, 18872, 50855, 489, 32971, 49994, 2858, 42746, 28277, 40308, 47764, 7193, 44652, 16869, 35696, 22926, 32328, 65405, 2922, 38473, 24849, 53783, 30763, 18493, 8925, 25391, 42026, 5256, 47588, 34351, 55573, 3669, 57295, 13597, 41368, 32948, 4884, 39441, 58637, 37, 22778, 41821, 5414, 38692, 55083, 21903, 60340, 10756, 15152, 65396, 2665, 40261, 30401, 63224, 9720, 46956, 18670, 53839, 30751, 7545, 37048, 10076, 56336, 1339, 52695, 12507, 16691, 49619, 24519, 61729, 43690, 2427, 64214, 8278, 33134, 24353, 49955, ++ 43988, 17798, 65201, 13288, 52178, 23013, 10450, 40340, 50813, 26498, 64502, 1877, 58789, 31448, 6519, 39874, 34360, 7900, 59518, 17600, 62679, 50067, 22783, 40405, 832, 59304, 19611, 53702, 13262, 23294, 61079, 33472, 14165, 65362, 41370, 46910, 8670, 16804, 42191, 6535, 35575, 64052, 28431, 55524, 42567, 2483, 64422, 8946, 60750, 18373, 58723, 37015, 17543, 63051, 34645, 58009, 9114, 41660, 14898, 58513, 21018, 42843, 16150, 36197, 12722, 50665, 2890, 48406, 14234, 32508, 9363, 49481, 11473, 45393, 81, 48585, 13069, 57149, 23926, 58929, 7805, 22598, 53650, 39387, 6530, 60855, 20946, 52215, 38293, 25201, 53892, 18756, 49068, 14740, 23173, 30248, 56202, 35318, 17, 47551, 63599, 17069, 43646, 28652, 8139, 37448, 56583, 29703, 18379, 60040, 36536, 20444, 52922, 5171, 62281, 19109, 40260, 1417, 61045, 32218, 18253, 58683, 53185, 3288, 61453, 8716, 38372, 1674, 56435, 26036, 4959, 61903, 12789, 45965, 27190, 36500, 44457, 22822, 7438, 45607, 26867, 57804, 12491, 44928, 3750, 40388, 55867, 47204, 18989, 52614, 21859, 64388, 10465, 54297, 17642, 56816, 36611, 19823, 46831, 23340, 16091, 64059, 24464, 33047, 53395, 22921, 3271, 57521, 7799, 41279, 404, 17821, 43154, 4130, 53341, 11070, 57747, 49352, 21920, 4537, 31408, 59904, 20612, 45211, 9050, 36226, 22474, 61294, 12082, 20177, 63592, 31730, 1721, 46467, 58350, 5192, 37299, 29121, 50819, 15079, 34708, 6096, 39233, 48891, 63311, 10983, 58449, 17800, 7448, 45214, 19312, 35848, 28632, 46784, 21171, 56376, 27116, 18038, 44257, 53359, 13088, 64432, 32360, 9850, 42852, 26711, 34485, 56800, 23945, 49320, 18275, 6654, 16570, 35646, 22846, 62449, 11196, 41630, 64707, 24009, 28918, 46619, 20544, 35883, 31570, 65305, 38833, 5477, 34844, 14356, 32000, 23489, 47858, 13688, 57932, 20669, ++ 11178, 53658, 26215, 45876, 3871, 35961, 63834, 17170, 6788, 33580, 15073, 36673, 22705, 43930, 63460, 21346, 61161, 24645, 41704, 30130, 3448, 38870, 8566, 55877, 32985, 24308, 42899, 6707, 35639, 48484, 1256, 25240, 50794, 6146, 31091, 21173, 62631, 52871, 32247, 58575, 13005, 44778, 22346, 7269, 32885, 53620, 27360, 45256, 23621, 49924, 30695, 7171, 43127, 28518, 1911, 19841, 50541, 25682, 44908, 3595, 48126, 7391, 53568, 45762, 28698, 40958, 19753, 24670, 43286, 62370, 25581, 40001, 19084, 30832, 53375, 22095, 33406, 5760, 50831, 16648, 31740, 46701, 18917, 30726, 45164, 24358, 48526, 4645, 58430, 12617, 34872, 60286, 6695, 33896, 51411, 42947, 8241, 45253, 21938, 33347, 11731, 52063, 2038, 62340, 45011, 22857, 1069, 46714, 33575, 7140, 27120, 47224, 13025, 31649, 45070, 10576, 57408, 25641, 36355, 4663, 46127, 22937, 29650, 17030, 41961, 20197, 47845, 33670, 19029, 51925, 43032, 30383, 35732, 19916, 53077, 1912, 16429, 40812, 56319, 14999, 64918, 8662, 50709, 18089, 61053, 31718, 11494, 26677, 36795, 6189, 41315, 4008, 29256, 34715, 2608, 40743, 11318, 62248, 6865, 55017, 35737, 42873, 9035, 49425, 12362, 39904, 47163, 15112, 36772, 55855, 32320, 64556, 14705, 59181, 38304, 20143, 28421, 12745, 61194, 38886, 47969, 10621, 37489, 27019, 63266, 13232, 55010, 6499, 53265, 37080, 23629, 14159, 52405, 28593, 9449, 48756, 13713, 57414, 7694, 45314, 61468, 20939, 56466, 3963, 31463, 35570, 51113, 27418, 40567, 23010, 62908, 9759, 52102, 6945, 64825, 10852, 51036, 34764, 8858, 31375, 48525, 27553, 15595, 51277, 6108, 52890, 472, 43297, 9090, 33505, 53544, 41272, 59878, 48558, 4006, 29281, 51118, 21663, 2336, 49161, 61520, 15799, 40865, 58351, 4183, 27667, 19658, 57496, 26087, 48867, 56101, 18395, 59245, 39822, 6312, 61150, ++ 40759, 33858, 7965, 59889, 31349, 14467, 46995, 29277, 60269, 42884, 53078, 10951, 46785, 4524, 14220, 51893, 506, 47751, 13343, 54869, 43221, 20870, 65257, 15611, 48010, 11482, 62446, 29130, 57465, 17312, 42491, 58339, 20287, 39436, 56616, 12228, 37785, 1483, 27653, 18536, 48116, 3942, 34179, 60004, 14106, 19008, 37592, 15981, 35303, 400, 61521, 13954, 52040, 22144, 59852, 36030, 5364, 61974, 29928, 18448, 64078, 33518, 23389, 4944, 59209, 9708, 61410, 52689, 5626, 17631, 55045, 3775, 59873, 7182, 41201, 14962, 65233, 44474, 39091, 10582, 52385, 4074, 63708, 12251, 56131, 2620, 33022, 40360, 17998, 45987, 3243, 26742, 40799, 64881, 2217, 17636, 28906, 61568, 15231, 40196, 57538, 36043, 25037, 32120, 15709, 59433, 39875, 12676, 61733, 43012, 57739, 8921, 40694, 51388, 22640, 34988, 6524, 48516, 13872, 65224, 39773, 11327, 55839, 35310, 25330, 54584, 14021, 60682, 40545, 7905, 15583, 55191, 3540, 58976, 11102, 61369, 31315, 48090, 4654, 34924, 20419, 37144, 30133, 41827, 5587, 22236, 64124, 2164, 49446, 59080, 31037, 46312, 58348, 12663, 61177, 30608, 52912, 25669, 44392, 31345, 3063, 28325, 55539, 1106, 61425, 19681, 28874, 51361, 25220, 11831, 48519, 8500, 45761, 30156, 6319, 63675, 46744, 35140, 7620, 26094, 14596, 57270, 5435, 52885, 17933, 46952, 34086, 25643, 43380, 4155, 49552, 56993, 39343, 18131, 41839, 63053, 19349, 26935, 43131, 23367, 11883, 47206, 28171, 15800, 53154, 21918, 2103, 60968, 12218, 53923, 505, 39090, 32075, 17297, 26527, 38106, 3175, 63632, 24342, 59784, 2485, 36999, 56111, 20131, 39784, 13871, 62752, 30879, 21366, 61845, 12682, 26391, 1571, 31726, 55821, 39558, 5721, 45240, 33875, 12870, 38406, 6193, 26454, 8750, 47731, 51979, 11017, 44655, 938, 41110, 7381, 35604, 3358, 29353, 46469, 30699, ++ 476, 50612, 21883, 39031, 19130, 54411, 1309, 55890, 23620, 3247, 20130, 61647, 25131, 57159, 35066, 29093, 37803, 19341, 58090, 26574, 10731, 46585, 27690, 35340, 4709, 51220, 37560, 2443, 26777, 51774, 8857, 31991, 45374, 3302, 25928, 49443, 22951, 43942, 55935, 8127, 38438, 54536, 40486, 26328, 46304, 62754, 5941, 57425, 11676, 47276, 25908, 40136, 32560, 10982, 46161, 15613, 53313, 12971, 49390, 37538, 26497, 11932, 60878, 39747, 18088, 35531, 31196, 13423, 37910, 47555, 22912, 33758, 45957, 27639, 58323, 37379, 8899, 19962, 26650, 60608, 35030, 25459, 37726, 28274, 42028, 19669, 65438, 13709, 61698, 30971, 22287, 56835, 11382, 20757, 38669, 59246, 48315, 3758, 53684, 26224, 5507, 18876, 54648, 10481, 50394, 34673, 19889, 54210, 23797, 3395, 17795, 30447, 63931, 176, 16703, 60564, 43453, 21243, 52507, 8263, 27399, 49845, 732, 63624, 6103, 45648, 29048, 4216, 22444, 63305, 27805, 47244, 39395, 24372, 33229, 50562, 21831, 13292, 54878, 25859, 49708, 2712, 54223, 13779, 56970, 46550, 33712, 43551, 15917, 23614, 9348, 18404, 50006, 24878, 43834, 20687, 5653, 38255, 13710, 51579, 18623, 60573, 21626, 38868, 31878, 10280, 63530, 5490, 33821, 60130, 21176, 39232, 26288, 56203, 23071, 36466, 2648, 18227, 51772, 54365, 42428, 32549, 62086, 23825, 40780, 1997, 51268, 16494, 59686, 30008, 10344, 33449, 5871, 60637, 24615, 922, 40465, 55453, 4337, 64464, 33151, 317, 60291, 44101, 24273, 46311, 14432, 29292, 43461, 33512, 49394, 15267, 60395, 43182, 54916, 14089, 45846, 19001, 40350, 12382, 46517, 22277, 4056, 59066, 28863, 47343, 18849, 49721, 36836, 5235, 46275, 57681, 20650, 43638, 13606, 24963, 58595, 17164, 60251, 19379, 55346, 31022, 62875, 42324, 17644, 33430, 63722, 15145, 55118, 21414, 62200, 42722, 51632, 25397, 10293, 54690, ++ 15840, 27558, 56748, 5329, 48579, 24729, 41190, 11954, 32343, 45339, 49852, 30345, 7351, 40923, 18316, 55444, 9835, 45757, 5362, 33941, 53251, 1998, 60636, 18818, 56439, 30417, 21538, 47108, 14730, 38705, 64331, 11837, 16146, 61931, 34951, 9527, 60273, 13808, 29376, 63558, 24023, 16366, 10171, 49153, 1192, 31383, 41443, 50757, 29589, 54846, 19458, 64742, 3329, 55408, 29319, 41160, 24531, 34084, 8499, 56441, 2099, 51767, 30449, 14488, 55805, 455, 44555, 57961, 7564, 29804, 64846, 12329, 16325, 52110, 2688, 24877, 55480, 30254, 47931, 1320, 14607, 54334, 7097, 59469, 9739, 51275, 36590, 27438, 8539, 50343, 37485, 14264, 44721, 52699, 31598, 9938, 24148, 37211, 12765, 42613, 46839, 60956, 3123, 41996, 27870, 4558, 51124, 9629, 31380, 48392, 35498, 53519, 24274, 38221, 55098, 28508, 11990, 41558, 34020, 20017, 59343, 37704, 16207, 32843, 42711, 10781, 56838, 36657, 49489, 32548, 44, 18201, 64624, 8896, 43677, 6263, 29193, 63673, 39665, 9681, 42876, 62629, 23943, 28592, 38318, 19510, 7697, 55074, 28009, 62430, 38774, 53639, 289, 36019, 8571, 48639, 65186, 22380, 57417, 9855, 34024, 47598, 14422, 52401, 25952, 45454, 37810, 54608, 18475, 3819, 44712, 52780, 1810, 13922, 48939, 10049, 59582, 40488, 30633, 1402, 22765, 16118, 3566, 44130, 11559, 31111, 64917, 8039, 39861, 19584, 64120, 26530, 47323, 12330, 34537, 54515, 31007, 10780, 36198, 17668, 51654, 39727, 12962, 36748, 6685, 65172, 37903, 56809, 4721, 20276, 25681, 57524, 5709, 23412, 1845, 49867, 28377, 57934, 6335, 52263, 33353, 63041, 43842, 11114, 35316, 8096, 41518, 3319, 58405, 25351, 38779, 10449, 35020, 64111, 8408, 37314, 52408, 28105, 9358, 42807, 613, 53457, 14098, 24735, 59684, 2638, 22809, 37465, 30321, 9257, 28014, 12677, 16603, 64593, 38085, 20275, ++ 62491, 44557, 12453, 63548, 34930, 9385, 51374, 37025, 58333, 8658, 16482, 38218, 54061, 12635, 48465, 2617, 64767, 23357, 31004, 61993, 16844, 37119, 13857, 41470, 7753, 44123, 10276, 54186, 34222, 4118, 23919, 55016, 29620, 47441, 18210, 53523, 5178, 33803, 46034, 2930, 51891, 30467, 61133, 20780, 56386, 24722, 13355, 21894, 4428, 38314, 8028, 48393, 16750, 43680, 6466, 63687, 980, 57477, 40520, 21311, 43946, 34972, 6593, 46396, 22056, 49835, 26874, 20527, 41831, 51078, 1168, 38803, 60795, 21085, 42877, 49953, 4814, 62519, 17886, 56546, 43552, 21827, 40946, 16211, 46408, 23291, 5672, 54700, 42425, 1486, 63473, 28514, 47894, 5102, 16420, 57834, 41356, 20164, 64521, 31240, 8881, 23532, 38381, 58254, 17474, 65501, 25438, 44499, 14790, 62677, 21846, 6236, 14292, 45997, 4318, 33018, 63106, 2109, 54318, 31042, 3684, 44583, 24010, 51324, 20908, 59739, 14813, 24743, 9605, 58045, 44967, 37977, 21352, 51569, 14453, 57347, 37565, 1059, 19195, 58616, 32180, 16740, 10606, 51228, 701, 59932, 35540, 12297, 51488, 4708, 33135, 14500, 63635, 28497, 55654, 15283, 32631, 1415, 42253, 27173, 62990, 4201, 40971, 6387, 59008, 16639, 2097, 23568, 41738, 62816, 27798, 15910, 34799, 61980, 32048, 43970, 24500, 55349, 13445, 64259, 45581, 35957, 56357, 28850, 59001, 37837, 21389, 27835, 55549, 63, 35470, 15341, 58875, 22325, 50092, 15928, 45827, 21787, 59523, 47868, 7998, 25183, 58680, 19026, 50408, 9402, 26427, 16473, 45645, 64070, 10498, 41688, 29660, 37310, 62512, 33989, 9310, 42450, 20807, 30042, 7614, 17381, 25192, 50283, 64895, 23085, 55552, 29567, 16995, 51963, 14712, 54733, 27474, 16095, 50531, 19903, 3190, 45975, 32693, 48375, 22381, 36564, 50217, 7142, 34641, 40396, 51056, 5882, 46164, 60518, 39157, 53164, 34177, 1842, 49264, 8559, ++ 36418, 2952, 42233, 29997, 18035, 60877, 26821, 6129, 19519, 39433, 65041, 947, 27394, 62322, 31925, 42654, 15930, 39192, 49517, 7031, 43810, 28845, 50330, 23049, 58710, 25634, 63627, 17717, 60090, 41918, 49819, 7489, 36788, 585, 43142, 27946, 39835, 64921, 19386, 41643, 12408, 36913, 5033, 43399, 8612, 39378, 58188, 34816, 63415, 44255, 25005, 31872, 56953, 23088, 37969, 20142, 47181, 27537, 13823, 64908, 17142, 54927, 25277, 62862, 37256, 11126, 63601, 4246, 34597, 15334, 56103, 25954, 35795, 9608, 31940, 13852, 36703, 11879, 40121, 32719, 3406, 49583, 29626, 63105, 298, 34126, 57696, 17213, 32447, 53206, 19914, 9092, 35539, 25359, 62961, 32865, 7203, 49780, 1264, 56365, 29301, 48881, 21109, 13502, 45902, 36661, 7610, 57125, 38702, 1645, 42280, 59012, 50177, 27700, 56787, 18719, 49121, 24921, 14967, 46861, 56353, 13158, 62032, 7197, 39269, 2327, 31625, 52631, 41245, 16906, 12122, 54055, 5465, 29933, 46169, 25225, 17326, 52258, 46804, 27578, 4116, 47773, 60651, 39156, 30512, 15214, 48923, 25104, 40213, 20895, 45425, 22963, 42994, 17205, 39291, 26236, 45873, 59481, 11986, 50515, 16971, 36314, 56741, 29895, 12913, 49132, 35244, 47913, 11443, 30929, 9379, 58566, 42775, 19518, 4843, 51134, 16808, 8344, 39563, 27551, 6773, 48404, 19766, 9562, 50302, 5717, 46099, 49047, 13596, 42196, 53617, 44974, 3687, 37965, 7373, 62401, 2673, 52625, 27592, 14295, 30445, 53487, 42564, 3245, 32673, 41086, 55265, 31779, 8327, 36462, 51427, 18540, 47405, 12819, 21616, 48262, 16256, 61224, 35995, 56694, 41034, 54197, 38446, 1162, 15962, 32031, 11937, 60747, 44988, 7386, 32499, 47604, 820, 61199, 30152, 41813, 63620, 14481, 57381, 4996, 64424, 11417, 29770, 56522, 47006, 13140, 28604, 54076, 16038, 24153, 3932, 19043, 58638, 43553, 22606, 56254, ++ 31892, 23882, 52935, 14837, 47688, 2242, 43729, 62821, 28641, 52088, 22159, 34112, 44470, 20734, 9194, 26101, 60018, 11573, 24211, 57312, 21789, 64234, 3885, 52333, 33634, 48, 39565, 28308, 5859, 20549, 32681, 63034, 22545, 59168, 11146, 57255, 15678, 6361, 48674, 21691, 59507, 25746, 53187, 32639, 65485, 18003, 2297, 52520, 15428, 9613, 59575, 1539, 36610, 12446, 61182, 33071, 9836, 51016, 31684, 4637, 47726, 10443, 41991, 3124, 29068, 54146, 16513, 47066, 59595, 28093, 10706, 48861, 6386, 63345, 45041, 54000, 28446, 46936, 8097, 64590, 20341, 58131, 12954, 35683, 26352, 48807, 11160, 44312, 24533, 7465, 39633, 60047, 55413, 555, 43574, 19063, 54311, 27033, 35046, 14615, 51682, 6596, 63871, 30580, 432, 55883, 32617, 16520, 28852, 52336, 26496, 11555, 20723, 36210, 10168, 43822, 7492, 39064, 64095, 9406, 26393, 34748, 19399, 46401, 28110, 64762, 48198, 5827, 26966, 62555, 34332, 23795, 60124, 35126, 2469, 61992, 41433, 8100, 33929, 12700, 53295, 35877, 6769, 21293, 44164, 57546, 9080, 65383, 1641, 56084, 10996, 60268, 6493, 57841, 3652, 52031, 7569, 19424, 37363, 24019, 45036, 8306, 20553, 43097, 64926, 24753, 57928, 6005, 55272, 37071, 50212, 22431, 6970, 53854, 38022, 25597, 63167, 34216, 49893, 20907, 60711, 12383, 33760, 62653, 25157, 15759, 32254, 3198, 61722, 24169, 8836, 20990, 51505, 29503, 43532, 25789, 33730, 41427, 5570, 38733, 63527, 20009, 9989, 29764, 62568, 21242, 1023, 48373, 61531, 22360, 2762, 59276, 7741, 55794, 31216, 4451, 53217, 25852, 713, 14552, 23631, 5025, 61446, 27992, 45556, 53765, 39389, 2052, 36202, 24252, 65140, 21791, 40500, 6885, 44581, 11624, 26004, 36012, 23392, 39214, 27004, 44238, 18739, 1725, 21092, 58114, 10600, 43231, 32845, 64912, 47566, 31100, 6662, 27036, 13948, 47209, ++ 17074, 59456, 7075, 37398, 57755, 21516, 33330, 15502, 46306, 4046, 13214, 56650, 5676, 58650, 36380, 51100, 4258, 52759, 35698, 1161, 40229, 12332, 32284, 9577, 45182, 16294, 49013, 12664, 45927, 56767, 13550, 40624, 18934, 48253, 30954, 24446, 51468, 35868, 27084, 55268, 169, 46775, 17199, 11362, 50123, 28847, 45497, 26762, 33409, 42039, 20658, 53005, 27802, 49801, 4152, 54021, 17808, 59127, 23858, 38676, 60495, 19249, 32697, 58605, 8976, 40087, 32168, 24008, 8008, 44063, 53142, 19372, 40712, 24218, 16978, 729, 59062, 22819, 51621, 27109, 38366, 10122, 45488, 5316, 52954, 21231, 62253, 15071, 59103, 46981, 30397, 15791, 22725, 50906, 36881, 13176, 46150, 11874, 62101, 44371, 18264, 34333, 10911, 53305, 40963, 22543, 12309, 47728, 60431, 8549, 34181, 64670, 41226, 2739, 61315, 23439, 58400, 30088, 17977, 36817, 52096, 1258, 57787, 12291, 53629, 17762, 35623, 21963, 44195, 1532, 50086, 7336, 42421, 15340, 55810, 11642, 31498, 59316, 23256, 64474, 42189, 24700, 17736, 63458, 3415, 26434, 34620, 19979, 42485, 28965, 36951, 48353, 31800, 13217, 41562, 35052, 63238, 29643, 54391, 2477, 61609, 32154, 53724, 25, 33319, 8787, 19176, 43888, 26773, 17544, 895, 65152, 40849, 29392, 12123, 47409, 219, 18663, 57625, 4041, 29915, 52590, 41224, 791, 39202, 52147, 58263, 19060, 36871, 30783, 57714, 32731, 11383, 65228, 18617, 56687, 9186, 61162, 16733, 49311, 1559, 34848, 57889, 46899, 13804, 54355, 35203, 11693, 27848, 52682, 44462, 34503, 25040, 39940, 65353, 43661, 11817, 37808, 50793, 44706, 60041, 32633, 13390, 49010, 20956, 5830, 26873, 50967, 19564, 42709, 4548, 56599, 33621, 53170, 18514, 59284, 1906, 54277, 8058, 60811, 16370, 52827, 32291, 61352, 38281, 25759, 62587, 19945, 10, 36859, 11743, 50737, 57198, 40311, 61683, 4402, ++ 41481, 28410, 49687, 25712, 11353, 39735, 54955, 10062, 31147, 59710, 49109, 24512, 40122, 14776, 47132, 17454, 32912, 44825, 19998, 54705, 15211, 48176, 36581, 59610, 19759, 55490, 31485, 61563, 36052, 1817, 26424, 52585, 3718, 38241, 8324, 45073, 2192, 61693, 9873, 31905, 14951, 38908, 62218, 23341, 36272, 6631, 55701, 12839, 48845, 5429, 61906, 10571, 39671, 14981, 44711, 26615, 42367, 2523, 45434, 7735, 28253, 52821, 14794, 43184, 20244, 51521, 1743, 61745, 36898, 21592, 32972, 2404, 57717, 29216, 61344, 34791, 18716, 42186, 5969, 15567, 55220, 23805, 32204, 61201, 18236, 39905, 2452, 37981, 29053, 3868, 49380, 10544, 41750, 6158, 29661, 60678, 2913, 40005, 22221, 4780, 38952, 59573, 43210, 25796, 5953, 49936, 63298, 3843, 39620, 19554, 45522, 5349, 55748, 29472, 13436, 47601, 15937, 50763, 4981, 60180, 22355, 41093, 30306, 38444, 4021, 43169, 10290, 55493, 13636, 61075, 31107, 18791, 48748, 26548, 38933, 20815, 49308, 4850, 45180, 15656, 1762, 55581, 32728, 45737, 52434, 14096, 47552, 53900, 16292, 60947, 2918, 18807, 53081, 24381, 49317, 21417, 10640, 47225, 15861, 40346, 28081, 11610, 46413, 22744, 50977, 39500, 29033, 61252, 13324, 57096, 46153, 33178, 14991, 60433, 21502, 56628, 31682, 42044, 11294, 43560, 36631, 17168, 23384, 55740, 14076, 26836, 10017, 42890, 54164, 5062, 17291, 40966, 48307, 1832, 36020, 13096, 44297, 20545, 32033, 55951, 23954, 44765, 17221, 5110, 38388, 23147, 59896, 17960, 40287, 4141, 30343, 14798, 54134, 1283, 16920, 27349, 58761, 19803, 30644, 10259, 18153, 47866, 37207, 8781, 63940, 34322, 57251, 10106, 62290, 13002, 48734, 17491, 9596, 23763, 38176, 29024, 47928, 20774, 49967, 34135, 3475, 41180, 9920, 45523, 4588, 49421, 7786, 41554, 55800, 26319, 45160, 18100, 9788, 33261, 20889, 35293, ++ 10793, 64034, 795, 45622, 65513, 4914, 50452, 22915, 41745, 16916, 35448, 8177, 53421, 29735, 1759, 63960, 28254, 8469, 61394, 30605, 25223, 62891, 6285, 27059, 41081, 3141, 24581, 9010, 21219, 53344, 44353, 10085, 28799, 60781, 54421, 14271, 33215, 20057, 40969, 57866, 44443, 7667, 34504, 3374, 58779, 42703, 19191, 60535, 22659, 51253, 30787, 46455, 18769, 58303, 8315, 22239, 62571, 35658, 16014, 55661, 36458, 764, 49114, 24821, 65459, 12199, 45279, 18552, 56500, 5258, 64363, 39187, 12841, 46488, 4419, 50721, 11223, 30568, 60378, 33935, 47444, 1849, 41535, 14060, 50214, 31275, 55765, 43070, 12501, 64710, 34505, 57161, 26536, 64074, 33729, 21373, 58341, 28172, 55123, 47404, 24624, 1882, 16160, 61432, 37569, 28349, 18477, 35686, 24439, 49399, 15353, 31832, 17348, 51516, 39923, 33520, 608, 42480, 32052, 11164, 45354, 6429, 50308, 16504, 62931, 25502, 58911, 29274, 37242, 20364, 40293, 57112, 3832, 65311, 9923, 53481, 28199, 36750, 19752, 50263, 29615, 40046, 11801, 8410, 27345, 40914, 5032, 31192, 7138, 46914, 25816, 34283, 9610, 61916, 850, 30813, 58784, 5176, 33622, 56241, 7021, 64208, 17415, 59640, 4926, 15490, 52209, 2801, 40205, 31276, 7708, 24270, 51869, 3182, 45001, 28220, 5915, 53211, 26466, 59973, 8951, 65457, 47114, 7466, 44577, 35332, 63935, 22865, 12799, 45485, 62902, 6706, 27247, 60162, 23213, 54908, 28324, 50716, 10607, 37508, 7105, 64775, 28660, 52155, 26045, 49689, 6191, 42897, 58195, 13304, 63472, 38804, 20497, 50066, 35473, 8548, 46980, 3377, 64507, 54811, 29189, 2332, 58087, 26063, 42159, 14343, 22517, 44156, 28555, 37648, 58711, 30696, 46678, 62994, 4227, 56073, 15573, 40026, 12409, 30472, 56973, 21951, 65426, 14932, 35426, 17309, 52270, 31759, 13557, 60091, 5187, 29572, 63292, 1400, 52479, 44846, ++ 55257, 19762, 32581, 13464, 34484, 18800, 29488, 61538, 244, 56058, 25582, 63392, 18576, 42395, 55115, 22568, 41325, 13690, 47530, 4963, 43024, 18385, 38568, 53835, 14328, 65098, 50745, 37932, 57730, 30187, 15439, 64114, 34731, 17061, 22046, 42228, 64612, 49710, 4356, 17824, 24933, 51000, 27490, 47835, 14437, 29996, 40303, 660, 37323, 16271, 35429, 2874, 65198, 29106, 34571, 52324, 11588, 30230, 50237, 21654, 13125, 62111, 30857, 5902, 38334, 34225, 27369, 48031, 13666, 50286, 15927, 31056, 54777, 22195, 37587, 26168, 56766, 44247, 2995, 53529, 17370, 63999, 27830, 56942, 6907, 25672, 9341, 21984, 51740, 23671, 18523, 2006, 45655, 16962, 52248, 9517, 48590, 15416, 7862, 32288, 57330, 30071, 52564, 33184, 14122, 43914, 55006, 9914, 59827, 946, 53981, 37937, 62148, 6758, 21700, 54723, 25946, 65372, 19838, 55352, 27253, 61512, 32684, 21431, 47304, 33771, 7996, 50916, 2973, 52938, 9059, 28492, 35991, 22516, 44534, 530, 62979, 10874, 57726, 7542, 61535, 22033, 59623, 37290, 56692, 22617, 62814, 38093, 58262, 12093, 39553, 64661, 44467, 17851, 37870, 54933, 14874, 43711, 20047, 49616, 25033, 38573, 34903, 26372, 41888, 62481, 21959, 36057, 54207, 20227, 49261, 38700, 10596, 35857, 16352, 64431, 37580, 13788, 46288, 22174, 49528, 2759, 28510, 33008, 59367, 17729, 2167, 47886, 29244, 38320, 24968, 51943, 14750, 39000, 46753, 4690, 40111, 599, 62112, 46175, 22570, 12645, 41776, 2433, 57074, 10925, 33887, 28982, 19612, 46439, 24123, 6587, 60142, 11167, 41491, 62039, 33113, 24521, 39550, 7095, 35053, 43386, 19279, 52552, 3789, 59464, 31501, 200, 54414, 7835, 25618, 2557, 34858, 13470, 51336, 32156, 6120, 62171, 43464, 17781, 46478, 6620, 25270, 54876, 28808, 59387, 23729, 3096, 48184, 22148, 39574, 54310, 23192, 37779, 25161, 15359, ++ 5778, 38732, 51552, 23421, 43296, 53917, 7650, 36141, 48092, 12252, 38031, 2869, 45984, 11187, 33534, 6647, 59198, 37517, 26363, 56859, 10506, 49980, 2266, 22442, 46873, 29482, 7198, 17986, 42595, 4793, 46660, 23193, 50479, 1108, 58477, 25453, 7001, 29787, 53806, 37444, 63216, 10723, 57106, 20346, 64300, 8998, 54122, 24297, 62928, 7534, 56151, 25592, 40832, 6062, 46915, 5, 39108, 64527, 6757, 41413, 57851, 23175, 46101, 16809, 57097, 3948, 60260, 8627, 30158, 41485, 23564, 43484, 6832, 49321, 14548, 65021, 9215, 20849, 39489, 24631, 36038, 8701, 43765, 19478, 36997, 48192, 62834, 33490, 4970, 61042, 36405, 53946, 13961, 39162, 4242, 42481, 25981, 37893, 65028, 17725, 10256, 45397, 20404, 8434, 58675, 2509, 21487, 46601, 27325, 42812, 30669, 10687, 25186, 44242, 58033, 8983, 37488, 12544, 48047, 3083, 40395, 14391, 8669, 56077, 379, 15199, 41904, 24195, 64267, 12896, 46646, 16178, 54674, 47511, 13942, 33328, 18047, 38656, 25662, 43579, 34412, 5732, 48486, 18908, 142, 50896, 10105, 17448, 29384, 50165, 21724, 5515, 27740, 51044, 8054, 26619, 41049, 23174, 60815, 1724, 52750, 13854, 3536, 48718, 10867, 30257, 47080, 9793, 14538, 63948, 4470, 58099, 27051, 61794, 48158, 8114, 23895, 58698, 1659, 34565, 15378, 38558, 53944, 20069, 4797, 50993, 30461, 56858, 8449, 61002, 1051, 33285, 56190, 8167, 31447, 16273, 59176, 34301, 25354, 15229, 54011, 31244, 60692, 36275, 16087, 43867, 64293, 127, 50895, 9113, 32794, 56235, 26800, 45083, 22658, 5218, 51878, 15732, 45749, 22053, 62379, 15082, 49524, 10704, 40096, 16542, 47185, 35727, 18338, 50150, 40955, 60375, 15262, 42426, 22255, 45072, 26656, 35199, 334, 51823, 27844, 37261, 48619, 1140, 39898, 9081, 44027, 34341, 64143, 8432, 35817, 14578, 50039, 12159, 61059, 48359, ++ 27780, 58241, 8924, 60423, 3439, 26539, 57473, 14103, 20581, 44193, 27929, 60805, 31695, 21440, 57813, 48837, 19169, 406, 51689, 15780, 34564, 60514, 27894, 58268, 10854, 43609, 35023, 62194, 11936, 33879, 59729, 6778, 43482, 31637, 39114, 11702, 47165, 35282, 12605, 22832, 1677, 41852, 31290, 4733, 45903, 33946, 11971, 44941, 28075, 43021, 21151, 48058, 13621, 54688, 17346, 60050, 25148, 19507, 48250, 33244, 3451, 39513, 10091, 52161, 26395, 42730, 22510, 36113, 53790, 214, 59272, 10245, 62734, 33606, 1635, 41099, 31572, 48430, 7371, 61879, 13443, 51116, 4151, 59715, 30031, 1016, 44907, 16678, 40880, 27991, 8299, 44055, 31793, 24889, 62485, 19534, 53392, 871, 35385, 50266, 40528, 3532, 62852, 48004, 23896, 51279, 33882, 7009, 57592, 13212, 63697, 22901, 47034, 2016, 34944, 18974, 53242, 28998, 62636, 35871, 24572, 53849, 44411, 23158, 37814, 59894, 48611, 18281, 32248, 39578, 25883, 58530, 5204, 30416, 60416, 41015, 56550, 3263, 51970, 16475, 54360, 30780, 14834, 64862, 35314, 24237, 43276, 33560, 55230, 1275, 59197, 41804, 13991, 56526, 35786, 63802, 4360, 47980, 12424, 36740, 29186, 44631, 58392, 21118, 55930, 37509, 1336, 60250, 28466, 43234, 34134, 18178, 41576, 2196, 20644, 54445, 32444, 40059, 19230, 62342, 42704, 25434, 10489, 61461, 41615, 21895, 40257, 15177, 34768, 20371, 49773, 18031, 41987, 21649, 64415, 48661, 19713, 6422, 43010, 57567, 3484, 40514, 8694, 20699, 47697, 24736, 14612, 55479, 37590, 62839, 17547, 48033, 2144, 31673, 57700, 18822, 28727, 57111, 12615, 52974, 5944, 55684, 24857, 33862, 63279, 29749, 56284, 12114, 64389, 6396, 21079, 52179, 28147, 55460, 8682, 64771, 10813, 57981, 24306, 59773, 10365, 63359, 13835, 31255, 57808, 20323, 51237, 15515, 27458, 46723, 19442, 58907, 3675, 40938, 30784, 7871, ++ 42543, 17740, 30235, 36722, 46916, 16281, 40713, 32202, 64365, 6373, 52441, 9882, 50603, 3766, 38954, 12836, 35238, 65314, 23733, 46151, 31231, 7959, 40501, 33169, 17155, 56156, 763, 26023, 52008, 19448, 39888, 27417, 13154, 55844, 5536, 52241, 16696, 56537, 28244, 60362, 49028, 15852, 54918, 38170, 21566, 51728, 16952, 59283, 3898, 53436, 11117, 32487, 57610, 23732, 31497, 43305, 12575, 56326, 9331, 27144, 54380, 18204, 63262, 33828, 14179, 55364, 7036, 63924, 17488, 35134, 25823, 51886, 18167, 27693, 58452, 19786, 55610, 15254, 52425, 29441, 45817, 23109, 32835, 38887, 11955, 54937, 20495, 58833, 11045, 47649, 55974, 20804, 58578, 6549, 46626, 11538, 56672, 30851, 23217, 13587, 59964, 27565, 36375, 11334, 41609, 17012, 65144, 40138, 20038, 36866, 4424, 52683, 14561, 60854, 28023, 49668, 4096, 43316, 7804, 15684, 58763, 5263, 28655, 64993, 11680, 29774, 4619, 45095, 6860, 61649, 969, 34174, 43349, 12032, 21535, 6681, 31890, 47998, 23703, 12982, 46247, 2634, 41968, 27901, 49203, 13393, 62036, 4228, 45101, 15405, 23506, 32063, 46046, 2256, 29993, 19055, 51804, 32860, 57160, 18104, 64801, 9446, 31568, 42643, 7310, 18686, 50590, 45294, 23297, 6552, 52856, 12689, 55791, 30399, 44194, 11008, 50664, 5196, 55094, 29637, 6400, 57142, 31159, 35764, 11974, 63108, 6115, 54637, 46406, 28015, 10912, 58521, 3935, 53444, 11656, 26629, 37049, 52854, 30144, 18410, 48982, 27000, 51358, 33042, 61897, 7477, 39640, 30804, 21855, 3810, 35862, 42131, 15365, 53719, 9911, 38268, 48940, 356, 36549, 27605, 41232, 31928, 1517, 46376, 20206, 4800, 23507, 39119, 27246, 45460, 33031, 11241, 39680, 1237, 36792, 20039, 49254, 16643, 41977, 5278, 33312, 19293, 53733, 42906, 12000, 37994, 5551, 61784, 41893, 1996, 55561, 26758, 44270, 16868, 56915, 21801, ++ 63088, 2526, 53303, 11591, 21121, 62356, 1636, 50133, 24001, 39297, 17342, 34714, 23255, 62761, 26986, 53021, 29006, 9467, 41885, 3576, 54350, 20170, 63327, 5423, 51420, 21676, 37280, 48379, 14900, 55195, 2566, 63474, 36333, 20846, 61326, 26649, 45575, 2827, 40401, 8459, 32413, 43824, 6226, 26067, 61482, 2049, 30525, 35902, 18432, 38813, 64064, 1390, 37847, 8792, 51413, 4784, 36835, 29629, 44513, 61498, 5468, 28669, 43820, 1282, 50638, 20909, 40423, 28894, 48623, 11825, 44841, 3698, 38041, 46159, 12386, 36520, 5044, 42650, 26556, 420, 57383, 10487, 65348, 15887, 52150, 25096, 35254, 3561, 50650, 30663, 135, 37617, 12845, 49879, 35811, 28458, 41138, 7411, 63395, 44772, 5711, 21875, 54551, 31540, 56525, 5028, 29763, 12007, 50629, 25667, 56054, 32775, 41834, 5817, 38907, 16671, 59476, 23735, 34287, 45852, 20678, 51179, 39435, 17219, 52417, 34823, 57483, 27500, 54247, 22793, 49799, 19322, 63423, 38204, 51387, 27119, 62276, 9359, 36373, 64028, 26202, 58133, 20263, 54780, 6449, 32448, 20989, 52572, 25524, 36570, 64170, 10409, 53441, 16755, 60045, 11281, 39984, 8915, 27237, 5814, 39223, 23823, 54776, 16151, 34398, 63403, 25347, 11861, 32795, 59101, 26003, 46511, 9158, 22604, 61036, 35180, 17806, 27330, 47582, 12957, 51544, 16676, 45178, 418, 48703, 26321, 43853, 24407, 3004, 65006, 37457, 43281, 29005, 35225, 44876, 2276, 62620, 13653, 9765, 63869, 35538, 12173, 58818, 4375, 18880, 53127, 45330, 10227, 49238, 27708, 52473, 12042, 64969, 23766, 33768, 61651, 13903, 42799, 63827, 20597, 11370, 60274, 38036, 9395, 51440, 41742, 60971, 8339, 53400, 2982, 61963, 19104, 57575, 25074, 47462, 31606, 3694, 38951, 54633, 21325, 50645, 36127, 2728, 22638, 62736, 24880, 56408, 30179, 11288, 52650, 32419, 7331, 65158, 33945, 424, 45915, ++ 35048, 50952, 24566, 42062, 56561, 27251, 45103, 10563, 54523, 4600, 58767, 47265, 14541, 43405, 7510, 16554, 45440, 55991, 21058, 61839, 12150, 49176, 24900, 44573, 13432, 32100, 64520, 4286, 28633, 41591, 23878, 49562, 9347, 47897, 18692, 37734, 10317, 62775, 21280, 52704, 18323, 65036, 28745, 13402, 39970, 46996, 56867, 9474, 49428, 26863, 14670, 50473, 19958, 61072, 15797, 45977, 63479, 1987, 20721, 15289, 49607, 37697, 11409, 59835, 31434, 9555, 46718, 2857, 61047, 20043, 57240, 28343, 62155, 8218, 53015, 24990, 60080, 21508, 63228, 34987, 18997, 40599, 28601, 46759, 6098, 42129, 63621, 14827, 39407, 17811, 65168, 26922, 60201, 22844, 2674, 61598, 14715, 51032, 33390, 18636, 39289, 52034, 15587, 249, 26701, 38187, 46272, 60591, 1464, 44899, 18052, 9241, 48774, 20878, 64347, 31320, 9801, 41331, 57164, 1373, 48932, 10948, 33237, 2524, 42374, 9468, 20073, 40801, 14219, 36920, 10437, 52160, 8284, 24903, 2137, 42844, 14536, 50619, 28915, 1141, 40421, 8711, 37752, 11200, 44615, 59828, 39830, 9242, 57301, 7478, 48183, 20175, 38724, 24606, 34092, 46658, 22473, 62726, 43389, 50088, 14659, 45721, 677, 61849, 27890, 5320, 40628, 56482, 16595, 38272, 462, 31853, 62948, 39030, 3739, 14310, 41042, 63762, 9661, 37288, 24703, 64685, 21152, 53046, 14518, 58047, 9362, 33623, 50504, 12555, 22505, 7011, 59800, 14206, 23603, 51141, 32530, 47446, 39447, 24313, 1181, 45604, 22083, 42419, 36829, 26314, 1924, 61103, 16514, 59425, 5633, 29541, 39313, 7212, 46014, 3025, 26204, 30977, 8053, 50509, 47631, 22981, 17160, 58908, 26624, 13226, 31168, 48330, 15895, 36360, 24493, 49603, 5654, 63556, 12264, 52963, 61112, 29388, 14296, 46051, 8526, 60482, 29705, 47392, 6961, 45333, 16304, 36617, 21498, 38884, 18572, 49055, 24095, 12924, 53636, 28521, ++ 9657, 14405, 59233, 5130, 33154, 6930, 37479, 30528, 19371, 42717, 29245, 1076, 55591, 30908, 60345, 40010, 2045, 32552, 14049, 38120, 29989, 36000, 1389, 39477, 59038, 8732, 23442, 45307, 60883, 8107, 32828, 16010, 44723, 30683, 337, 58959, 33487, 24106, 42961, 13875, 36682, 880, 58061, 50316, 19652, 7325, 23592, 41227, 60820, 5659, 44129, 33666, 28445, 41794, 25964, 34270, 22782, 53249, 40269, 58966, 35258, 24100, 47344, 19102, 25529, 58352, 37097, 15627, 31990, 39349, 6235, 42342, 22737, 33122, 16841, 47821, 38527, 11433, 44737, 6478, 54417, 49108, 2571, 22467, 60746, 9672, 27329, 53619, 24164, 46235, 5547, 42821, 15976, 32668, 54229, 20202, 43823, 24310, 1739, 55707, 28970, 9075, 43028, 61905, 48452, 19260, 8036, 32193, 16045, 35227, 62435, 29197, 56863, 26327, 11808, 46190, 51903, 18414, 12988, 26858, 30842, 63898, 19247, 56604, 26134, 60772, 49342, 1891, 63736, 16784, 45931, 29504, 56230, 32909, 59501, 17655, 55304, 20656, 44832, 59102, 18465, 53022, 31278, 61291, 16932, 1984, 28694, 18330, 41188, 30442, 3137, 61130, 44057, 6228, 57742, 361, 54128, 31116, 2975, 21510, 59338, 35437, 51500, 13117, 47680, 53555, 20829, 3282, 49479, 64605, 19475, 51379, 15782, 48843, 25114, 57390, 52301, 975, 33504, 58952, 3432, 39512, 7838, 32095, 36786, 18784, 38848, 60455, 17028, 31764, 55333, 47179, 18995, 40730, 57329, 8894, 21076, 5345, 55702, 17404, 60244, 29385, 54753, 5965, 65515, 13958, 56510, 34218, 40944, 25239, 44109, 54982, 18222, 60631, 21432, 51190, 40454, 58365, 17867, 34734, 3592, 64794, 44874, 6768, 36962, 55019, 948, 21732, 44393, 59665, 13956, 42990, 34601, 29965, 43955, 17997, 23283, 7586, 37568, 64186, 27090, 12980, 40783, 18208, 53250, 33695, 678, 49767, 57527, 4802, 60767, 10146, 43077, 37166, 62125, 20086, ++ 47619, 31411, 38583, 18285, 49355, 63747, 15822, 59909, 51748, 13150, 64958, 25299, 37192, 18186, 5260, 24416, 59439, 50966, 44008, 6041, 57566, 16772, 53564, 27582, 18911, 47611, 57075, 11261, 20426, 38380, 58175, 54082, 4987, 65386, 29143, 51131, 15132, 48490, 4045, 59792, 46422, 24618, 35038, 10956, 55467, 29523, 63580, 15339, 31947, 22396, 58440, 7064, 62479, 3543, 55251, 7981, 48728, 10366, 32061, 13299, 2719, 53597, 7433, 64305, 42114, 4896, 52905, 24403, 55919, 10843, 51227, 14882, 54283, 970, 63754, 29950, 3256, 50486, 32114, 14329, 37281, 17604, 58187, 34242, 13243, 38158, 48755, 19297, 7660, 57865, 34744, 52630, 8634, 48965, 10700, 38545, 5198, 59195, 47124, 12236, 49354, 64249, 34661, 24972, 12925, 53128, 58883, 42568, 22281, 51763, 7320, 40626, 2925, 37228, 55148, 99, 22696, 61812, 36338, 60346, 38627, 6196, 43936, 13510, 46528, 7451, 22111, 35522, 30701, 57922, 23903, 3476, 41597, 15478, 45516, 34978, 4430, 39110, 10546, 33736, 22858, 47358, 3945, 25197, 36137, 51523, 46728, 63363, 12604, 49886, 34838, 28252, 13593, 50677, 26014, 42531, 12857, 16383, 61362, 37990, 25565, 7830, 19709, 41253, 30584, 10313, 36524, 44034, 29462, 8466, 35573, 42230, 5757, 28813, 44736, 7363, 30856, 23488, 45843, 19817, 43412, 28143, 50081, 60882, 4231, 56042, 27522, 1377, 52671, 39774, 4579, 29888, 63422, 236, 26008, 38177, 61243, 43675, 27926, 50431, 7917, 14977, 38895, 32283, 11303, 28503, 47041, 21020, 8201, 50247, 19894, 899, 37162, 48572, 8974, 55904, 16055, 4699, 43992, 54533, 28319, 14162, 30393, 56825, 18920, 43220, 33417, 65478, 9939, 28761, 4109, 54118, 16904, 9222, 56871, 2197, 41394, 59041, 25422, 1474, 44511, 55325, 4287, 58662, 26255, 9567, 63649, 28232, 13302, 46210, 31626, 25810, 51867, 5999, 15667, 39971, ++ 4000, 64749, 22353, 55416, 12540, 25937, 40276, 2367, 22672, 35779, 8001, 45739, 11491, 53741, 48217, 36543, 19647, 10935, 25799, 64673, 22278, 46384, 7338, 61273, 42139, 2914, 35397, 30322, 52793, 1643, 13997, 25102, 35755, 17626, 42375, 7817, 39330, 54589, 19870, 31015, 9682, 53323, 16587, 44599, 33101, 3193, 37176, 48562, 376, 45718, 12261, 52428, 17730, 47429, 13977, 59386, 38500, 18603, 51746, 26230, 56804, 41185, 21947, 32888, 16424, 45172, 12729, 34488, 1918, 65256, 25316, 47170, 35561, 20554, 43224, 9847, 56199, 16346, 59341, 27516, 61452, 8021, 25766, 43406, 56358, 31647, 1688, 62033, 33142, 28916, 12161, 21752, 41384, 57446, 29857, 64404, 34137, 17432, 26432, 37324, 16623, 3227, 20921, 57975, 6315, 35922, 23483, 3711, 64613, 13780, 47292, 24026, 58454, 15282, 33364, 44662, 29885, 6951, 49972, 3587, 21297, 53071, 25046, 63071, 36619, 31529, 55215, 12428, 43654, 5897, 48234, 37533, 11448, 60995, 6344, 23427, 65030, 30214, 57171, 5350, 63118, 12454, 42736, 56149, 14252, 33372, 5919, 26860, 53976, 22206, 16013, 55987, 37441, 8446, 65128, 20766, 36189, 48952, 28828, 6413, 53132, 32260, 56928, 4108, 65512, 22880, 60762, 15103, 57685, 24417, 54033, 13503, 59719, 21275, 55488, 18503, 36430, 60192, 14892, 56286, 11755, 46967, 16028, 23007, 41884, 13335, 48222, 23737, 44358, 10372, 61967, 16371, 34010, 49451, 13189, 54428, 15701, 30932, 2886, 41186, 33561, 48409, 19464, 58028, 51967, 43314, 3300, 31594, 63992, 12932, 35090, 58590, 30126, 13579, 32440, 25500, 36132, 63146, 24193, 9732, 38909, 53538, 2446, 25141, 49839, 5524, 51758, 17667, 38615, 55885, 32322, 22728, 46894, 37887, 27527, 51089, 33897, 15762, 48420, 31968, 19013, 32990, 38495, 15190, 48904, 35007, 17129, 43662, 23463, 64478, 2817, 40569, 17555, 59572, 29815, 56174, ++ 35973, 8369, 44493, 910, 34259, 46427, 9105, 48514, 32856, 56342, 49650, 20268, 62102, 33859, 8788, 63682, 28583, 49827, 41140, 3214, 52536, 14632, 34336, 10247, 29268, 50242, 16351, 40837, 25606, 62921, 43858, 50651, 11533, 56935, 22695, 62334, 26225, 5934, 34437, 63733, 27212, 41532, 5215, 62072, 21825, 52118, 12739, 25370, 54505, 35184, 29203, 39792, 24426, 36409, 31036, 21504, 857, 65080, 42875, 6353, 30004, 14580, 62381, 440, 54978, 26989, 61612, 49966, 21294, 40972, 29532, 5531, 60641, 13059, 49536, 24269, 33781, 40183, 22021, 1404, 41730, 31103, 50015, 4509, 21004, 45545, 16485, 40449, 54572, 3148, 44528, 63177, 1222, 25314, 18848, 3924, 45923, 61110, 9723, 42100, 57083, 32545, 45231, 29552, 41051, 14907, 49757, 39495, 30935, 54467, 28551, 11246, 43559, 19703, 63202, 8226, 54045, 39995, 16378, 32464, 47475, 10554, 41677, 770, 17478, 51547, 4161, 62075, 26438, 53600, 18622, 64539, 28040, 19922, 48968, 52783, 13342, 46993, 16114, 41379, 26708, 49609, 21426, 7262, 65429, 23096, 58759, 38164, 725, 42136, 62573, 4679, 19255, 45343, 32543, 3878, 58604, 9975, 55625, 41992, 12063, 47310, 27420, 17258, 44962, 33418, 1794, 48214, 11341, 39884, 4311, 46890, 33724, 1513, 37794, 65243, 10202, 49664, 4621, 32829, 26143, 53673, 1914, 63583, 30246, 45657, 5793, 64319, 19520, 36346, 25300, 51473, 9599, 27160, 45979, 6159, 36120, 65104, 22731, 56883, 10456, 63328, 25696, 459, 16964, 22950, 53573, 38468, 15616, 56963, 4955, 44979, 23311, 62458, 43144, 1733, 47318, 12237, 33260, 61149, 21279, 42472, 35577, 62719, 11513, 29340, 14735, 61596, 26157, 7227, 40818, 58494, 522, 62610, 20569, 6563, 65217, 9846, 42535, 56306, 11139, 61437, 23015, 51704, 2319, 42187, 59898, 6237, 53978, 35614, 20739, 55044, 33450, 1584, 50378, 11103, ++ 25418, 48766, 29058, 52711, 20463, 61707, 28048, 58421, 14987, 5646, 26408, 40593, 2753, 22073, 42242, 15170, 631, 56668, 17521, 33336, 26877, 38853, 55681, 24141, 63985, 12878, 54941, 5740, 48809, 9821, 33725, 21501, 40129, 3286, 46248, 13024, 37067, 57623, 12187, 49947, 2413, 56239, 23247, 47518, 8193, 39023, 60232, 42639, 18819, 10459, 56651, 2333, 64228, 9760, 45107, 57272, 27718, 33448, 16865, 60739, 45652, 9098, 38782, 49046, 36214, 7904, 30499, 17208, 44352, 8865, 58799, 18662, 30979, 38942, 57532, 4661, 64560, 7583, 53434, 47019, 19608, 51675, 12584, 64197, 36308, 8918, 59624, 26118, 13757, 51181, 18112, 36081, 32089, 47508, 51861, 40040, 13369, 27416, 52923, 22363, 6892, 60758, 10980, 53804, 2085, 63014, 27814, 9590, 20231, 744, 34377, 61204, 5457, 48295, 35414, 24662, 14169, 64849, 27668, 55928, 14949, 58234, 34056, 23377, 59328, 28245, 40167, 15955, 38986, 9659, 33898, 191, 54833, 32365, 39715, 1488, 27654, 35739, 7809, 54092, 37054, 437, 52320, 29794, 39385, 43783, 11618, 19655, 48742, 9776, 31532, 52086, 27494, 54618, 17135, 47782, 40397, 24144, 33766, 18528, 63179, 2325, 39641, 59875, 9084, 52472, 26596, 37212, 55168, 22102, 61669, 27775, 16867, 50418, 12036, 26753, 42854, 21853, 40344, 62109, 6859, 38143, 34288, 18258, 9095, 54945, 35390, 28886, 7229, 56589, 41359, 2532, 42679, 58233, 34936, 20150, 52187, 11481, 46649, 18200, 37343, 4825, 44686, 49857, 30625, 62763, 9470, 24443, 47841, 27270, 40055, 10733, 51625, 6291, 17077, 59578, 29015, 52329, 19388, 670, 49148, 7726, 16350, 46567, 40235, 57718, 34404, 1830, 46086, 50805, 19480, 11000, 48816, 14886, 40123, 45676, 30524, 52719, 21835, 5053, 50416, 28472, 7404, 64938, 30846, 24463, 10740, 39277, 29253, 8824, 48029, 11819, 27640, 45584, 22837, 41828, ++ 57979, 13736, 60613, 16694, 41237, 4273, 37923, 18720, 43947, 30079, 61134, 16203, 58032, 44794, 27352, 51937, 31510, 37681, 9247, 47056, 62391, 4434, 18551, 43320, 179, 36952, 22854, 59286, 31187, 18055, 60175, 6511, 53424, 32015, 27797, 55548, 1434, 22161, 45168, 29853, 18992, 38588, 31708, 14245, 58820, 27908, 17246, 4533, 65320, 50893, 21025, 42260, 16219, 53691, 5297, 40739, 11744, 50158, 4315, 35809, 23513, 50985, 17981, 27987, 12100, 57642, 39880, 4121, 63075, 37390, 14024, 45745, 52597, 2248, 27179, 17943, 45367, 29003, 13633, 35746, 62796, 10204, 39644, 23992, 52807, 29546, 5736, 47770, 37158, 23184, 60552, 10031, 56114, 14309, 23633, 8099, 55258, 36598, 587, 47909, 31084, 38744, 19041, 25920, 46875, 17903, 55525, 44094, 59764, 50295, 38432, 17097, 53484, 27155, 3950, 50998, 42732, 1815, 37675, 8832, 45251, 25751, 5541, 54415, 13974, 47779, 8125, 50186, 21127, 60071, 46320, 25346, 44340, 8593, 21857, 58387, 50359, 24127, 60537, 19445, 31635, 61847, 17382, 45673, 3386, 15772, 55380, 35621, 60680, 24919, 46386, 11105, 39132, 1667, 60170, 29533, 14326, 64370, 1133, 51180, 15300, 46118, 20316, 34792, 24820, 14050, 58287, 6791, 31361, 17676, 43614, 8002, 62633, 41323, 30026, 53373, 2470, 47824, 17122, 29304, 57525, 11147, 59608, 51837, 40595, 21442, 13939, 58639, 49123, 15301, 31066, 53330, 22142, 17532, 3748, 59487, 24924, 42282, 1611, 53876, 27375, 59049, 34449, 12531, 41551, 6529, 35709, 60473, 1429, 54294, 18449, 64627, 28069, 37851, 53316, 22481, 41126, 6956, 44564, 57386, 26943, 32119, 59938, 23196, 3977, 20368, 48049, 37452, 12551, 24105, 64002, 30909, 35298, 25785, 55141, 12730, 23934, 3183, 58125, 35676, 17393, 46543, 40270, 14158, 20515, 44935, 55927, 19587, 51026, 14864, 58340, 37586, 63002, 7161, 61395, 19101, ++ 3191, 32118, 6302, 43462, 24750, 54771, 11940, 63168, 93, 53085, 10039, 38451, 32009, 7109, 55305, 4837, 60932, 23161, 58587, 21337, 11839, 48613, 60032, 32445, 52148, 45850, 19915, 39040, 3812, 42835, 27025, 47299, 19242, 61789, 16434, 41344, 32569, 51939, 8803, 60933, 43358, 6702, 64747, 36125, 1253, 43692, 49651, 26465, 32669, 6570, 38019, 59869, 27269, 34628, 19583, 61971, 24926, 43631, 55758, 20452, 58154, 1789, 64797, 44040, 22430, 47950, 19437, 51645, 23790, 32589, 54593, 7277, 23005, 41878, 61762, 34682, 10997, 55173, 44126, 23309, 3854, 30276, 57094, 680, 16990, 42534, 55471, 11283, 64888, 4751, 27787, 45183, 6780, 37931, 62564, 43592, 30430, 20587, 65455, 15248, 51463, 4272, 43363, 63804, 8317, 37029, 32936, 4857, 21961, 10397, 25334, 45419, 12787, 39241, 60046, 17746, 31885, 59039, 22181, 52183, 18768, 62363, 43018, 30323, 38020, 20250, 65481, 32044, 2662, 29063, 15015, 51101, 12830, 62735, 42098, 17027, 10123, 43980, 2889, 48425, 14636, 9034, 34572, 57852, 24477, 47900, 32213, 7980, 29286, 2704, 58203, 15088, 63669, 23397, 35274, 6924, 52639, 26479, 43924, 22313, 36917, 28538, 62119, 5562, 49804, 38854, 18952, 43007, 64145, 2678, 52043, 34494, 23165, 6171, 58509, 13182, 34955, 63309, 8536, 51091, 20512, 42333, 25664, 3133, 24241, 62546, 32624, 692, 37749, 20772, 61375, 12266, 38653, 64135, 45125, 31982, 8583, 29563, 62304, 14420, 39963, 7292, 23659, 61627, 20322, 56117, 46116, 14274, 42744, 22254, 33444, 2809, 46719, 11749, 34534, 4250, 65288, 15180, 36628, 13066, 39621, 55214, 10464, 52039, 27819, 64278, 8707, 22408, 59224, 42279, 2822, 52419, 8086, 60590, 4625, 36674, 61856, 28938, 39088, 11864, 63174, 156, 54394, 36957, 57296, 3946, 32623, 62420, 1050, 41330, 21992, 4447, 43899, 16089, 34753, 54133, ++ 39038, 52038, 36492, 64003, 10380, 31123, 50774, 21727, 33608, 41665, 24263, 51282, 12728, 47435, 17916, 35467, 13376, 42939, 6448, 44983, 30773, 36367, 8293, 26199, 15313, 6896, 63176, 28159, 56275, 14421, 64901, 684, 36516, 10424, 49309, 7096, 64352, 17436, 40598, 25210, 14853, 51386, 11423, 20675, 53980, 34016, 9045, 55900, 20208, 46515, 30141, 4005, 49129, 13029, 47769, 37413, 7652, 15075, 29016, 46789, 11208, 34833, 31266, 5984, 59495, 3168, 33964, 10631, 56591, 706, 26612, 36432, 48791, 16086, 9178, 51034, 37828, 25439, 5846, 58648, 48346, 18293, 33398, 46391, 62286, 22212, 34055, 19983, 30887, 49657, 39770, 21183, 58995, 29149, 2425, 16392, 58166, 5440, 40854, 33826, 24479, 56208, 13901, 35057, 22997, 50871, 12566, 61684, 41731, 31413, 56977, 2345, 65317, 21560, 30550, 10104, 56504, 12110, 46694, 29582, 2813, 35824, 9867, 50575, 1602, 44926, 11854, 35219, 58669, 42653, 57001, 5142, 36055, 30921, 3773, 56484, 29342, 63807, 38508, 25478, 40732, 59974, 28337, 10727, 53362, 4876, 64312, 18169, 51316, 40225, 21134, 43179, 33212, 8839, 56837, 44840, 11728, 38352, 4953, 57588, 8160, 53832, 11039, 55428, 29693, 197, 50879, 12513, 32586, 25475, 45960, 15393, 56779, 39356, 20167, 45389, 24102, 31252, 54574, 101, 36183, 15525, 48894, 31589, 44982, 8070, 42921, 50887, 10745, 46274, 5126, 47935, 28547, 7587, 23250, 50052, 55913, 16816, 32959, 48103, 21505, 43827, 51011, 15908, 38002, 3667, 26552, 32596, 50783, 7769, 59200, 41849, 20163, 57822, 26382, 49635, 30828, 56399, 20761, 63730, 5252, 18120, 45147, 31281, 15453, 41607, 32670, 53147, 6067, 28270, 36003, 18238, 44811, 21251, 32483, 49797, 16128, 43603, 19864, 47965, 26468, 34088, 22342, 8274, 27339, 50098, 12585, 46794, 26072, 60280, 31176, 52997, 24923, 47214, 26945, 9498, ++ 21274, 15125, 23550, 1925, 57207, 35171, 6819, 47827, 13960, 57429, 3851, 28773, 62851, 20945, 39220, 64402, 29689, 49434, 16072, 54731, 1263, 65475, 20533, 41783, 57881, 34895, 12066, 46812, 9067, 51257, 34238, 24520, 45464, 58573, 28930, 38213, 13571, 47781, 3696, 35480, 58293, 28588, 39642, 48304, 24796, 15723, 63154, 40853, 11868, 61738, 14502, 57895, 23414, 55056, 1500, 30658, 51971, 63865, 3029, 39583, 26755, 52488, 13789, 53991, 37950, 29305, 60485, 41358, 15409, 47528, 64131, 11754, 59755, 28695, 55568, 21126, 64, 64976, 32453, 15721, 38271, 54015, 26807, 7126, 14485, 50817, 2296, 58378, 44010, 15372, 359, 53256, 17154, 42366, 34959, 48265, 25517, 50126, 11472, 59678, 8901, 28621, 49112, 1130, 59094, 16815, 45771, 26895, 6130, 52322, 18897, 36066, 47816, 6601, 42123, 49287, 20523, 40947, 5012, 34561, 64202, 22977, 57387, 17956, 61119, 27018, 55700, 24256, 7177, 16556, 22655, 40461, 61439, 18128, 53196, 37799, 6933, 32747, 12225, 55649, 5665, 22120, 50968, 42308, 20584, 38836, 26261, 44419, 12938, 56366, 36695, 5306, 49669, 17773, 41100, 21814, 50332, 19377, 60512, 30221, 48394, 23619, 41447, 16714, 44322, 63618, 21642, 40472, 59415, 10098, 54708, 29094, 774, 48603, 9505, 61272, 4977, 44258, 14228, 27385, 46553, 64773, 5430, 56956, 12782, 53922, 17656, 26376, 65393, 33170, 24568, 60028, 14702, 54794, 36753, 876, 39207, 12899, 60794, 4204, 53002, 30256, 2185, 55172, 28823, 49081, 11002, 63611, 17457, 36912, 29251, 10025, 55382, 14780, 39165, 8523, 45706, 2270, 50593, 25804, 34186, 48506, 3108, 38135, 56187, 66, 60804, 13801, 43862, 54853, 16495, 63320, 39426, 13381, 57431, 994, 53927, 8895, 59451, 5877, 52092, 13572, 45257, 60858, 42713, 18435, 38122, 5684, 16759, 34455, 7840, 13911, 56987, 279, 65361, 32766, ++ 59065, 49974, 45281, 28336, 46827, 17612, 39602, 27111, 65261, 19568, 36269, 46058, 1509, 54160, 9545, 25167, 2219, 52822, 33985, 24621, 39915, 13729, 51573, 29568, 2507, 53853, 17319, 31794, 37832, 22003, 7685, 54388, 15548, 4626, 20922, 52959, 25913, 59648, 22922, 54787, 96, 18157, 62613, 4859, 57212, 30891, 2934, 24027, 36900, 637, 43957, 33171, 8587, 40120, 22131, 60150, 17568, 32378, 48444, 9627, 62694, 18957, 42594, 24651, 15966, 46351, 6962, 25916, 43548, 21842, 5212, 40302, 19748, 3517, 43009, 31348, 47354, 18873, 40859, 60861, 2763, 11637, 43732, 60025, 41153, 28491, 38831, 9237, 24513, 35438, 61776, 26733, 10442, 54803, 6221, 63526, 12489, 31752, 54030, 19672, 44298, 62226, 21228, 40404, 30019, 54873, 2793, 57770, 39882, 14653, 62696, 8599, 33152, 24324, 54756, 1041, 36727, 61542, 26207, 48545, 13254, 40363, 6698, 33036, 11095, 41301, 4767, 46815, 52532, 37185, 49168, 10786, 24716, 47606, 13869, 45319, 20941, 51666, 18976, 46561, 35378, 64719, 1871, 13735, 33017, 59232, 1244, 62298, 31057, 6663, 24007, 61467, 27014, 53686, 30645, 3248, 62418, 31948, 9626, 42691, 13683, 2842, 32947, 58899, 7460, 27069, 35337, 3798, 48985, 19944, 38075, 13877, 64909, 32019, 25914, 37038, 55858, 19047, 38753, 60339, 22750, 9944, 39259, 19668, 37171, 28294, 59313, 3903, 40066, 16622, 43935, 1976, 30516, 41019, 11098, 62999, 19760, 44486, 25535, 36542, 9902, 64876, 19037, 35353, 8450, 58164, 39374, 20830, 52812, 5399, 62164, 48838, 31909, 250, 60728, 24666, 18633, 42212, 29814, 9278, 58296, 22098, 61475, 24564, 11966, 50190, 19785, 38798, 26513, 10081, 49444, 29521, 5390, 47062, 26972, 41712, 18853, 34724, 31055, 41116, 24707, 62193, 10233, 31550, 3049, 58846, 30107, 64077, 52229, 43185, 49466, 36762, 18829, 39743, 12842, 42647, ++ 5221, 30468, 8135, 55847, 12288, 60195, 2968, 53829, 8462, 44319, 16855, 59824, 23696, 34548, 43564, 58900, 40985, 19108, 10581, 57117, 28426, 47925, 5553, 59581, 44439, 23662, 49201, 62054, 1847, 57274, 44180, 29730, 39738, 61091, 42587, 2148, 34585, 10883, 44032, 30614, 46731, 33420, 9864, 45525, 13188, 41743, 52557, 47177, 28380, 50646, 18260, 53039, 25857, 62986, 10920, 44744, 6839, 41679, 22899, 54738, 36977, 4594, 61202, 8391, 56074, 20294, 50596, 12937, 62263, 35400, 30096, 53678, 33190, 63438, 14625, 57992, 12301, 52115, 8253, 28119, 49794, 24739, 36749, 20703, 4187, 64434, 13032, 52292, 56642, 7390, 46037, 31322, 50522, 19430, 33323, 22629, 45504, 1607, 39036, 27950, 3073, 36247, 10658, 52742, 7093, 38097, 23715, 33984, 16184, 48902, 29331, 43160, 58148, 13570, 27999, 63645, 15812, 7686, 52860, 16860, 55326, 24874, 49545, 44147, 53396, 21588, 62887, 18866, 30038, 662, 64106, 33211, 55088, 2255, 34345, 26501, 58854, 874, 62105, 27265, 15612, 43252, 30069, 56796, 8390, 49263, 16644, 22567, 41522, 54812, 14166, 47402, 86, 12358, 59024, 34632, 15637, 46941, 25126, 56189, 35915, 64985, 20682, 37667, 46716, 14684, 53202, 30731, 8753, 60976, 5379, 47419, 35793, 18012, 51572, 3361, 33070, 11519, 52382, 6491, 33865, 50268, 29778, 61637, 1621, 47545, 22416, 35970, 55606, 6648, 57794, 34654, 52438, 18482, 57085, 26848, 33838, 51589, 6754, 57652, 28210, 11947, 40645, 59853, 24798, 13529, 45193, 796, 31190, 40807, 23909, 16322, 44250, 21709, 51108, 34942, 61836, 11247, 54151, 40566, 14358, 43352, 6506, 53798, 35147, 30176, 45571, 4483, 58056, 33972, 1407, 56462, 24837, 60107, 10562, 51367, 23378, 64515, 14466, 55507, 1856, 36251, 21056, 47736, 53389, 15331, 40693, 23159, 2083, 27909, 10473, 63335, 23915, 55242, 28678, 48458, ++ 18005, 62502, 40909, 19890, 37019, 25678, 42417, 23057, 32388, 50278, 5903, 30637, 11752, 49040, 15543, 7663, 26692, 62598, 38195, 3966, 61614, 21773, 35610, 18315, 10154, 39396, 5069, 25758, 41506, 19619, 11082, 63513, 23545, 12394, 31421, 56491, 49742, 20007, 63938, 7993, 16034, 53204, 27067, 37646, 60610, 22482, 7725, 16753, 59519, 10183, 35549, 5777, 46088, 15571, 56472, 28042, 36058, 58879, 158, 16340, 25585, 49548, 31798, 40464, 1123, 64449, 32287, 52254, 2491, 17653, 49239, 8546, 46047, 24855, 39203, 6276, 44675, 22571, 34348, 56445, 15045, 63651, 9494, 54957, 34859, 25605, 46853, 32756, 18750, 41509, 12025, 63970, 3378, 40611, 60325, 14983, 37249, 56729, 17643, 61286, 46424, 15781, 32398, 64914, 18497, 47610, 11700, 63996, 26093, 409, 55663, 22574, 4352, 37956, 51309, 19373, 33655, 45650, 28948, 38829, 309, 59690, 15457, 28464, 3186, 31431, 13079, 39365, 60252, 14438, 27403, 7751, 19781, 43536, 65194, 9326, 23243, 41651, 11308, 49904, 4103, 54324, 17934, 37388, 46125, 27801, 36392, 50508, 10212, 34046, 63901, 28951, 37776, 45566, 16537, 39858, 6135, 54461, 814, 17953, 49404, 10381, 51952, 4641, 24251, 61557, 18284, 56028, 45117, 28306, 41759, 24595, 52940, 10699, 42579, 58118, 22234, 64242, 28680, 17427, 43776, 2777, 55264, 16172, 41694, 11691, 51996, 8708, 19135, 49911, 27617, 21677, 9315, 46851, 5534, 49210, 3167, 21252, 42046, 15476, 47398, 45847, 31825, 5689, 51802, 33681, 64428, 25901, 47589, 12366, 57139, 3881, 36262, 63247, 12859, 5779, 47919, 16803, 32988, 1176, 62963, 28848, 36842, 17357, 65075, 7574, 23697, 62095, 18688, 47613, 21635, 42894, 15369, 33200, 38073, 7103, 46316, 3551, 28003, 49202, 17894, 39795, 56618, 6709, 25489, 35267, 9283, 54686, 20166, 57863, 33187, 3620, 44643, 6510, 51431, 10995, ++ 37831, 1216, 26333, 64635, 4789, 52297, 14764, 61973, 38079, 10801, 56797, 40150, 63875, 4350, 56040, 32905, 50889, 12381, 46700, 17125, 42519, 8642, 50532, 27839, 64178, 30513, 55390, 15918, 60540, 33294, 52403, 3000, 35360, 48206, 18463, 9302, 26684, 37426, 5339, 42036, 61625, 21241, 50258, 6124, 19281, 34786, 63818, 31391, 39421, 54354, 21636, 64498, 38418, 29716, 3387, 51195, 13486, 19993, 33694, 57459, 43395, 12556, 21379, 45443, 28393, 36595, 10117, 27308, 38721, 58526, 23363, 56932, 1469, 16898, 50388, 26219, 36049, 62522, 1992, 20157, 42205, 30634, 45688, 17256, 49259, 6060, 16201, 61370, 1528, 29927, 48541, 21599, 38327, 25023, 8710, 52141, 4478, 26254, 49523, 7551, 24074, 57609, 41478, 4651, 27249, 42406, 51934, 7845, 44576, 37473, 11059, 46117, 17380, 60382, 9426, 44348, 3362, 57529, 11462, 21062, 43388, 8447, 36170, 64739, 38373, 56416, 48391, 8785, 34697, 45823, 54129, 41067, 59406, 28770, 6470, 48688, 36738, 30566, 57334, 33939, 23848, 40003, 9916, 63019, 6027, 19821, 61009, 2499, 45130, 18847, 4174, 20408, 51851, 7739, 65302, 23067, 43667, 27723, 62879, 33893, 25846, 40766, 31180, 57099, 12163, 42149, 1360, 36335, 22797, 15882, 63222, 2074, 20960, 59993, 7253, 16323, 40922, 1257, 49269, 59056, 25224, 63015, 21091, 35051, 26713, 60699, 30824, 44605, 63824, 32176, 12070, 61803, 43177, 23959, 64586, 13774, 39741, 60298, 31347, 63773, 15, 22038, 56723, 17192, 43502, 2989, 18694, 54596, 6996, 60173, 27678, 46318, 9140, 28654, 54788, 38599, 27134, 58831, 23490, 46863, 20014, 51508, 10250, 47252, 27445, 15987, 52604, 40899, 8989, 37139, 64845, 11645, 53455, 2508, 62476, 30053, 58749, 37237, 44249, 7925, 63531, 11536, 29357, 44048, 16453, 65100, 48692, 12118, 45694, 38685, 17334, 61031, 29742, 20814, 34976, 59726, ++ 22716, 53270, 33715, 13395, 44848, 29916, 48077, 715, 20677, 54607, 27772, 18365, 26047, 37321, 22435, 45220, 364, 31264, 53521, 22945, 36743, 55834, 14269, 40777, 866, 45667, 12695, 37133, 6350, 47485, 14744, 27566, 50818, 5821, 65280, 45033, 57820, 14186, 51057, 23774, 36384, 2627, 30013, 56722, 44283, 1603, 48832, 13694, 4379, 26896, 43098, 2026, 19119, 49854, 42457, 23935, 65503, 39171, 52756, 10495, 30245, 63622, 5726, 59125, 14817, 48279, 18399, 55106, 6427, 44479, 13532, 37614, 27899, 61334, 10529, 59014, 4811, 29740, 53309, 48016, 6689, 57693, 1030, 23498, 59240, 31554, 53083, 37652, 22976, 59528, 5125, 55759, 14056, 57749, 47237, 28756, 64678, 42774, 13219, 34608, 53363, 21671, 9504, 48629, 60470, 14142, 31956, 20797, 59402, 19834, 62090, 28338, 53094, 31658, 25445, 39674, 60929, 23815, 47189, 63278, 32573, 50788, 26586, 19495, 6270, 23625, 17174, 51808, 25632, 4367, 21943, 12541, 38248, 17510, 52064, 14849, 60835, 4927, 16451, 44960, 7598, 59661, 31861, 25792, 47664, 35000, 11975, 53062, 25231, 57263, 42429, 59743, 25639, 48910, 29962, 10782, 58039, 20071, 38670, 7164, 60807, 1958, 16130, 28062, 47943, 34096, 51409, 11209, 57945, 6572, 50157, 33315, 39577, 27563, 47021, 30544, 45743, 23850, 35502, 8912, 39937, 13418, 46031, 7537, 53102, 4522, 15057, 24838, 327, 38452, 54372, 3455, 37536, 16107, 35659, 29056, 53618, 17779, 8173, 24281, 54254, 38564, 9208, 48591, 23139, 61169, 29925, 37284, 15338, 42996, 34053, 18010, 52392, 40142, 19687, 2639, 43784, 7346, 37642, 55817, 4909, 31729, 60235, 1973, 39778, 57051, 3306, 30713, 51149, 25599, 6258, 31644, 40430, 22213, 48302, 17055, 12420, 20338, 57081, 22690, 33477, 50324, 4348, 59849, 37383, 626, 28843, 61665, 32022, 7285, 50571, 14396, 47447, 56675, 2569, 15734, ++ 46246, 9339, 57661, 49582, 22054, 7712, 58731, 16412, 34366, 46551, 2434, 42790, 51816, 13228, 59399, 19959, 65049, 27479, 6160, 60752, 31939, 2820, 58327, 25007, 53194, 20810, 62693, 26813, 50028, 22374, 40453, 58938, 16946, 38496, 21714, 32851, 1099, 40960, 29351, 55631, 12629, 59068, 40189, 10670, 25700, 55235, 23069, 58094, 45221, 14929, 61111, 32188, 58541, 12351, 34308, 8149, 47238, 5052, 26407, 46223, 2396, 50300, 35223, 24070, 53166, 3917, 61901, 42373, 20977, 30709, 65411, 9714, 52710, 35025, 42697, 21711, 46531, 17496, 39503, 12860, 37422, 27021, 51907, 40102, 10623, 42900, 19693, 8426, 44451, 15653, 36428, 28060, 43290, 901, 18326, 35652, 9859, 20375, 58753, 29415, 42, 45060, 30754, 36799, 24792, 1707, 56424, 35553, 3660, 50600, 34443, 5700, 41169, 1929, 49706, 14277, 35011, 6009, 30182, 18156, 4075, 58514, 12263, 53920, 41780, 61752, 44636, 1314, 58100, 32278, 63367, 49761, 3327, 57612, 31737, 39610, 20369, 53780, 28528, 63590, 21472, 50705, 303, 55197, 14768, 58559, 29165, 39509, 15437, 32460, 9280, 13318, 35771, 2190, 37142, 52790, 4398, 50739, 12679, 45878, 22530, 55021, 44560, 64533, 5856, 19505, 62323, 26159, 43444, 30103, 17296, 57252, 12865, 53879, 4390, 62022, 12215, 56639, 14802, 54175, 31913, 5739, 57600, 23352, 48463, 33415, 40858, 58380, 47231, 20354, 14386, 48693, 30072, 58975, 1288, 56318, 10181, 45427, 50569, 34887, 4986, 26661, 62047, 35880, 14111, 41715, 10331, 50453, 22597, 53677, 1578, 65181, 24999, 7906, 59704, 30589, 63893, 15569, 52771, 25463, 13707, 42608, 21817, 49302, 12762, 33527, 44372, 20906, 14595, 59539, 45918, 17558, 61193, 27726, 9356, 54581, 43132, 32099, 5209, 52511, 14978, 42320, 26811, 19413, 51620, 23757, 42014, 4967, 21463, 55781, 25236, 40091, 8769, 26673, 41728, 61543, ++ 31875, 39352, 19202, 3325, 60902, 35716, 24155, 40721, 63463, 9772, 31706, 61443, 6610, 47694, 8854, 35125, 41429, 14855, 44138, 20243, 11298, 46109, 16602, 34613, 7967, 43649, 32745, 3616, 54844, 9698, 31075, 410, 56000, 43473, 8402, 53661, 24861, 60261, 17595, 4202, 45971, 27696, 15105, 65193, 38124, 17080, 32996, 7130, 36615, 51685, 9430, 25026, 6247, 53803, 21112, 61564, 31167, 17288, 60381, 18714, 38078, 14306, 56658, 9258, 33359, 39458, 25124, 11531, 34184, 49901, 4389, 41582, 19236, 2958, 31724, 54704, 14236, 60347, 8758, 32239, 65148, 21285, 13907, 29237, 63254, 2863, 47839, 57222, 26368, 65378, 51248, 10857, 53672, 32018, 63036, 23544, 41800, 51385, 6390, 39545, 62749, 14463, 55139, 5908, 63293, 40159, 17849, 46630, 30338, 13044, 23350, 54239, 15397, 65112, 21395, 56181, 10676, 51617, 54587, 42336, 14756, 37355, 46239, 2156, 33566, 10414, 29449, 36884, 20505, 42957, 15894, 35593, 26096, 10275, 45586, 1583, 47291, 11711, 42469, 2781, 38634, 13504, 44221, 22818, 41235, 3712, 52191, 7024, 64494, 48267, 28180, 40678, 55911, 21601, 61872, 17439, 32743, 41729, 28708, 59310, 14479, 36624, 9407, 21309, 39164, 32207, 7932, 40094, 3012, 54885, 37278, 9298, 44469, 34750, 19319, 26566, 38296, 48062, 3701, 43110, 18862, 50759, 27911, 37915, 10863, 65126, 18068, 29173, 10045, 42484, 62802, 26167, 7947, 45765, 22514, 41484, 27820, 19360, 37783, 12483, 58480, 42586, 20066, 1805, 55783, 27078, 58700, 4308, 32836, 11528, 38190, 21318, 49833, 41978, 13394, 48390, 22835, 33728, 3530, 45037, 64546, 35476, 8130, 59105, 26718, 54366, 10761, 63588, 36410, 740, 55587, 34502, 4154, 52002, 35837, 24323, 374, 65390, 38758, 25826, 60457, 1332, 55077, 32735, 8639, 46948, 13681, 58448, 34267, 43706, 1445, 52853, 35897, 63808, 11708, 24517, ++ 5747, 54900, 27466, 43042, 30902, 11190, 55472, 5387, 28912, 14353, 53379, 21454, 36071, 29473, 17754, 57598, 3433, 55099, 33506, 50180, 63563, 28816, 48769, 60102, 38713, 11640, 57476, 17674, 36179, 64623, 46325, 25327, 12975, 28448, 61421, 36034, 11763, 47129, 32126, 63264, 19535, 34180, 52192, 8485, 47659, 3818, 42208, 62465, 20501, 28981, 55699, 41373, 48155, 27594, 37539, 1025, 56004, 41109, 8935, 54431, 28749, 42032, 22222, 44823, 17092, 57732, 47093, 332, 60010, 16532, 55872, 29064, 48565, 23888, 63004, 7187, 33868, 40699, 24396, 49026, 3708, 45024, 55628, 5415, 35701, 27597, 39100, 12427, 33679, 3889, 17962, 34405, 7730, 44877, 12777, 54666, 2607, 32724, 46966, 17281, 26672, 50412, 19241, 43640, 12384, 28860, 53761, 9921, 64463, 42823, 58596, 8869, 47390, 36447, 7279, 31171, 40691, 22781, 1210, 27209, 62017, 20133, 30759, 56840, 22335, 47875, 15181, 60600, 9532, 55989, 5835, 51246, 29868, 62605, 22491, 55534, 25095, 34789, 52577, 18598, 56194, 26835, 33551, 65045, 10921, 38028, 21047, 43474, 23648, 936, 54241, 18406, 5768, 44113, 14858, 47205, 24395, 8904, 53310, 3522, 43135, 26696, 50036, 58529, 13235, 54119, 49214, 22030, 63880, 13629, 25163, 60646, 23282, 357, 65366, 51009, 8134, 21574, 29378, 63658, 36582, 739, 62283, 14059, 43578, 2385, 56176, 6322, 35287, 53485, 4868, 34110, 51187, 13225, 60935, 6028, 33269, 62454, 2583, 52127, 32334, 15973, 49677, 30486, 46597, 7649, 39058, 19276, 62678, 44745, 56211, 14559, 4613, 32310, 57497, 35944, 9645, 56736, 41071, 11088, 19006, 28381, 46227, 17840, 38333, 5609, 42023, 25041, 48715, 28733, 13151, 49925, 19620, 41454, 13849, 56110, 45362, 18580, 47527, 9666, 34853, 45870, 12963, 39376, 62916, 18268, 53713, 27223, 10850, 64696, 16189, 30690, 4639, 18542, 49124, 44162, ++ 36425, 16938, 64231, 14043, 51660, 38621, 17398, 48814, 41943, 58225, 25362, 45513, 1146, 60459, 39752, 24357, 48385, 10335, 25553, 1730, 37548, 7033, 23290, 4703, 19395, 51196, 24188, 42264, 29099, 7276, 18938, 59782, 34068, 48970, 3401, 16527, 51765, 6436, 39285, 10069, 48518, 710, 43198, 24307, 31818, 53535, 26313, 11351, 49282, 307, 35101, 12873, 17820, 62805, 15912, 44420, 11683, 23263, 48872, 34559, 6626, 60925, 1669, 65043, 29806, 7384, 22771, 37183, 26719, 45255, 7953, 38371, 15217, 57377, 11894, 27497, 51452, 521, 56150, 18584, 61008, 11175, 33040, 50607, 61919, 14729, 54486, 21912, 49870, 41994, 24176, 62335, 39312, 20953, 48913, 30216, 15985, 61571, 11207, 55862, 37753, 4017, 33121, 57357, 22488, 49329, 6809, 38385, 25082, 889, 32625, 39077, 26512, 18590, 44943, 59211, 16505, 64273, 35673, 48168, 9252, 52291, 7522, 40081, 63710, 5332, 50417, 24617, 39853, 28120, 47138, 18310, 42184, 8074, 36330, 13210, 64386, 6174, 31152, 61311, 8963, 48574, 5167, 17313, 53548, 30859, 61726, 16226, 34436, 60422, 11363, 46337, 63452, 30991, 57492, 1438, 35201, 64117, 19122, 31466, 63084, 17162, 475, 34894, 46519, 4031, 29004, 15179, 45418, 31747, 47629, 5186, 49718, 30998, 41526, 15492, 55653, 34051, 59699, 10323, 16974, 42027, 20627, 55053, 32361, 46682, 21836, 39416, 60131, 23065, 17065, 57242, 18741, 38991, 25730, 48239, 55368, 14920, 44028, 25169, 10642, 65442, 6194, 52944, 13113, 63448, 34633, 49260, 25369, 6394, 29437, 47159, 61393, 26125, 17684, 917, 51743, 21029, 29652, 50099, 61068, 439, 55049, 32191, 62822, 22668, 57536, 16727, 7383, 43489, 39025, 8409, 58371, 26172, 63044, 6815, 28587, 11272, 29800, 61324, 16837, 21596, 57349, 6311, 30398, 36108, 2412, 48266, 37701, 19789, 41438, 59323, 33365, 54206, 21856, 511, ++ 50920, 29234, 7065, 47143, 1664, 25028, 57043, 33127, 3679, 18967, 39005, 11517, 49859, 15956, 52515, 5179, 38526, 29957, 61767, 43059, 15683, 56708, 40000, 64847, 47106, 30148, 1498, 61912, 13651, 44892, 54009, 4521, 41633, 20284, 62885, 30427, 23310, 42895, 20742, 54258, 35592, 57443, 21960, 61937, 12158, 18566, 59994, 40425, 30305, 64854, 19820, 57581, 45833, 2847, 51484, 29279, 59429, 32632, 3710, 63287, 20655, 47739, 27464, 12056, 51049, 40918, 62648, 13739, 54240, 20049, 32715, 63854, 1211, 44067, 36240, 47586, 20624, 43333, 16282, 36647, 26044, 41668, 22712, 7815, 19107, 45433, 217, 30465, 60671, 6955, 46329, 2057, 58292, 27213, 5607, 59848, 40968, 25714, 35967, 8246, 23241, 65225, 47765, 2321, 35128, 15607, 59985, 27447, 52493, 16339, 50073, 11824, 62969, 2633, 48735, 27733, 4696, 43764, 12627, 32221, 59920, 25843, 45169, 16701, 27837, 34469, 13548, 53267, 2592, 64875, 11902, 54663, 1, 59832, 19550, 49375, 27680, 43797, 15325, 40490, 24096, 35987, 57786, 45386, 24842, 1948, 47044, 8656, 50175, 26381, 38294, 22272, 33630, 8190, 26899, 51471, 11558, 39424, 48496, 6442, 37964, 52149, 29816, 18442, 56434, 23717, 60151, 38399, 1000, 52559, 20215, 40641, 17724, 58625, 11299, 37552, 24346, 2241, 44809, 25995, 52713, 29956, 49520, 4178, 25406, 15810, 51692, 27242, 12574, 49768, 31429, 36933, 2128, 64987, 11227, 30973, 4083, 40402, 20518, 59392, 47778, 36801, 28563, 21131, 40264, 23788, 16724, 496, 41206, 57873, 16157, 35236, 10608, 54050, 39562, 44494, 27508, 62352, 6089, 16432, 37312, 24163, 40382, 4340, 14906, 50653, 1662, 32866, 60689, 20485, 64444, 23093, 44697, 2177, 33763, 39925, 49050, 57797, 36818, 2949, 53091, 40987, 25111, 49686, 15859, 44868, 62020, 24283, 6013, 51189, 12502, 22971, 45906, 10033, 38362, 62742, ++ 12921, 58950, 34648, 21206, 62151, 43869, 9004, 22600, 65187, 50729, 7312, 62375, 33817, 27965, 13549, 64542, 19272, 53955, 12934, 22106, 52057, 34143, 9373, 26508, 14065, 52913, 37938, 10773, 49540, 21316, 31597, 39144, 9181, 52615, 14520, 44614, 56982, 1966, 64535, 28197, 7423, 16156, 38698, 5509, 50060, 44517, 2279, 15435, 46341, 8810, 38944, 5274, 33232, 24607, 40032, 7239, 36502, 13988, 52204, 25343, 38668, 15356, 53589, 35981, 19660, 3224, 31505, 51771, 5504, 60797, 10939, 50725, 25627, 17890, 53872, 5324, 64650, 29455, 59418, 9886, 54096, 1781, 43951, 57923, 38430, 25084, 58861, 9662, 40315, 15188, 55223, 29576, 16592, 50285, 38046, 19878, 3568, 52674, 22009, 59183, 43014, 11982, 16771, 29838, 61883, 41293, 4936, 45690, 19503, 61126, 21862, 41874, 29515, 55436, 13382, 34208, 53560, 24705, 57140, 18927, 2947, 38585, 11256, 49415, 560, 58967, 43460, 19221, 31306, 37702, 23046, 33462, 44515, 24336, 39166, 3820, 58244, 16926, 48003, 1075, 51419, 12738, 29622, 7940, 62774, 37046, 19036, 56411, 30285, 5372, 51163, 3366, 40147, 55314, 19717, 42597, 60309, 23379, 13845, 56873, 24980, 10590, 61396, 41018, 8355, 43816, 11759, 27283, 59141, 10020, 33686, 64401, 7655, 28479, 53531, 5974, 61412, 48784, 13110, 57864, 6938, 40237, 9646, 61071, 34482, 58817, 7770, 63412, 1113, 45021, 9025, 52569, 42134, 20969, 44375, 53173, 23443, 63155, 7462, 29757, 995, 17577, 44952, 57366, 3417, 54963, 30899, 60607, 9822, 27892, 50712, 2438, 43416, 23374, 6789, 64212, 12104, 34416, 46635, 58023, 8653, 53355, 12353, 48170, 29867, 36115, 45244, 52846, 11830, 31363, 3730, 51669, 30309, 16238, 53668, 21135, 4752, 15493, 23569, 50877, 31297, 7607, 64297, 3480, 59590, 22253, 9899, 40536, 56360, 29534, 60713, 2909, 27669, 58168, 15249, 31517, ++ 25876, 45341, 8522, 41107, 32526, 15399, 52775, 36666, 12672, 30201, 43326, 23388, 3051, 56501, 44541, 32605, 46858, 7476, 58461, 30984, 4069, 45134, 18597, 54501, 32315, 5977, 22612, 58098, 33801, 2390, 63374, 15422, 58500, 27143, 34868, 6935, 37790, 26377, 13778, 49591, 30843, 60759, 25278, 54926, 33755, 28658, 37112, 56364, 23844, 50972, 22365, 60534, 43594, 11053, 64239, 21891, 55196, 42802, 16699, 45580, 10692, 58342, 4910, 32944, 43833, 55531, 15803, 42893, 28289, 39846, 21582, 35579, 58719, 30995, 9327, 38820, 13341, 33492, 4561, 50137, 28441, 64218, 17689, 31145, 11578, 51663, 35158, 20529, 63828, 25875, 36870, 11773, 56464, 33447, 9350, 64492, 44191, 13776, 46085, 1250, 31535, 51787, 38671, 54319, 10379, 21106, 33711, 56823, 3220, 35878, 8012, 57811, 5375, 37273, 20396, 61440, 9995, 39442, 6925, 50206, 41548, 23904, 65377, 35264, 54858, 21730, 8301, 62234, 46414, 6849, 57028, 14294, 61622, 11054, 52897, 32536, 9734, 37449, 62371, 22177, 33068, 60214, 20025, 49595, 14462, 42719, 27544, 12144, 44641, 58376, 14080, 61231, 16902, 49179, 12874, 29402, 4839, 53988, 33182, 2347, 44970, 34342, 21877, 5077, 65234, 32425, 51117, 36468, 18771, 42801, 56162, 14544, 35977, 46438, 20788, 42396, 31523, 18217, 39083, 32676, 19545, 64102, 23588, 45310, 11840, 37408, 30300, 40610, 19900, 28419, 61561, 24171, 7070, 58247, 16362, 36162, 9620, 46428, 34361, 56497, 39625, 61445, 8807, 33124, 43691, 11788, 46023, 37574, 52227, 21888, 64944, 31578, 19489, 59165, 37004, 17025, 48956, 2105, 20435, 31051, 42842, 26340, 65434, 19425, 58714, 9755, 25898, 18321, 56630, 35343, 47766, 14225, 37716, 61753, 10655, 32533, 59187, 42766, 63753, 12277, 44386, 20211, 28292, 34012, 38549, 14137, 54494, 31773, 17721, 8236, 46675, 37058, 50191, 6785, 42203, 52574, ++ 3780, 56198, 19426, 54374, 4490, 60134, 27291, 195, 46009, 59273, 16272, 55612, 37708, 20979, 8337, 26210, 754, 42050, 36474, 16893, 63023, 27239, 59144, 26, 43933, 61236, 40329, 16306, 55520, 26151, 43238, 24466, 37276, 1274, 47840, 55135, 17941, 59420, 40541, 11226, 45399, 3082, 41274, 14053, 9593, 64045, 17411, 6782, 59265, 3516, 35899, 14394, 30742, 53290, 18092, 49442, 4544, 27005, 57315, 565, 30434, 40600, 24277, 59831, 8679, 26120, 64163, 9955, 48113, 2108, 45980, 12811, 3805, 41311, 62067, 22359, 52524, 44838, 23714, 39682, 15456, 34544, 47082, 3438, 53491, 16729, 5805, 48805, 31868, 4310, 52447, 43709, 22787, 600, 48100, 30960, 24720, 56961, 34066, 18689, 60872, 27648, 7235, 24442, 44520, 50899, 25891, 13618, 44030, 31063, 47999, 23979, 51199, 17678, 42983, 148, 46837, 22027, 58272, 29929, 13839, 55766, 4506, 29082, 17902, 42017, 32952, 25485, 16198, 40913, 48888, 27061, 4660, 30238, 41436, 21256, 54449, 28991, 7372, 56590, 10617, 41817, 3010, 32171, 54973, 4556, 59535, 39242, 20474, 35441, 24582, 41921, 28430, 6810, 64853, 46021, 37435, 16396, 47575, 26239, 59814, 15364, 55529, 48827, 24671, 16635, 2539, 62697, 6867, 29548, 3555, 25690, 50529, 1719, 62859, 27744, 9189, 52133, 4812, 43889, 54458, 1558, 36104, 16491, 56814, 3091, 47997, 13687, 43390, 55718, 15355, 32927, 49023, 29320, 545, 54904, 27440, 18160, 50151, 12832, 22188, 26233, 51389, 15215, 24721, 64015, 18563, 5308, 13913, 36089, 8059, 45480, 55672, 3768, 30284, 53104, 24539, 60483, 39128, 14175, 51023, 3007, 34710, 44145, 6922, 39520, 62514, 2768, 41194, 9112, 24013, 55366, 5451, 27029, 41833, 50503, 22127, 8194, 27498, 38210, 858, 55864, 46421, 11052, 52369, 26309, 48949, 46, 35478, 63612, 25602, 14732, 20360, 65489, 28367, 17504, ++ 39559, 23838, 48173, 12166, 37501, 20496, 49388, 40249, 26431, 5799, 35289, 10525, 47308, 63252, 40493, 51086, 59934, 23856, 9915, 49620, 40876, 12126, 35863, 48179, 20568, 8262, 28052, 46562, 9959, 48564, 5616, 51474, 12271, 64211, 22709, 10538, 50703, 4833, 33517, 62187, 19038, 52847, 22820, 58433, 46973, 21054, 51958, 32416, 27358, 42509, 54526, 25506, 47871, 1350, 28161, 34968, 62059, 9808, 31959, 64598, 50502, 19152, 46554, 13170, 37748, 48983, 18309, 34433, 23221, 59569, 24730, 56520, 49328, 16047, 26518, 46786, 2283, 19491, 61602, 12230, 48433, 6837, 24591, 60060, 37094, 28932, 62536, 42624, 13556, 47448, 18529, 8077, 60240, 40547, 14583, 54111, 6632, 39737, 10165, 49746, 5307, 42063, 15064, 64155, 502, 58422, 9170, 39382, 54713, 14891, 62288, 10928, 33399, 64834, 26247, 52616, 28728, 36824, 15851, 63135, 33857, 20898, 46072, 51501, 12397, 61046, 3673, 57750, 52442, 1774, 20674, 35389, 58704, 50250, 15006, 65246, 2093, 46003, 25387, 38329, 46825, 27178, 64176, 36518, 21754, 51926, 31443, 568, 65460, 9524, 53406, 2575, 57722, 36783, 32028, 254, 21202, 62150, 9894, 41502, 7326, 39670, 30377, 12549, 35599, 53454, 46205, 21022, 48204, 54389, 38655, 60922, 12421, 32910, 16555, 45554, 57012, 22838, 64879, 26884, 15128, 47373, 28724, 51308, 22364, 53800, 24661, 64666, 5888, 35821, 3834, 46220, 11622, 63950, 41073, 14157, 59955, 38053, 3032, 43099, 54104, 4722, 35601, 58867, 2031, 34192, 53408, 29101, 59592, 48283, 39829, 12608, 26759, 15069, 42331, 9036, 32612, 45773, 5118, 56356, 22329, 59863, 15835, 23616, 55947, 29137, 21403, 49654, 28053, 64065, 38456, 18949, 60014, 46079, 17658, 1527, 47127, 35078, 54307, 14557, 62557, 35757, 17220, 58903, 5363, 43436, 19034, 57716, 42620, 4190, 44516, 55419, 33880, 1105, 47562, 10717, ++ 60529, 35374, 2060, 64589, 28627, 57491, 9653, 18141, 63918, 31087, 53720, 19733, 28502, 4584, 15072, 17972, 34693, 29273, 55296, 21567, 2677, 51611, 24732, 15266, 56381, 35018, 65415, 3270, 36627, 19849, 60406, 29845, 45797, 18683, 32470, 42360, 29208, 46641, 21477, 36784, 8205, 31500, 43757, 1418, 35313, 4687, 39611, 13067, 48638, 18869, 10356, 63553, 7966, 57089, 38354, 13397, 46896, 20111, 39662, 22619, 7828, 56228, 2748, 61695, 30918, 900, 58012, 6153, 53045, 14360, 36803, 7562, 32134, 55309, 6594, 30228, 58135, 37519, 8172, 56732, 31631, 62936, 40884, 19946, 9082, 45857, 1385, 23358, 57054, 32602, 65046, 28384, 34941, 19385, 61985, 26879, 17545, 63436, 29171, 37198, 58026, 20604, 47330, 28539, 36514, 18184, 32115, 63671, 23058, 1494, 28213, 40555, 3764, 45379, 8666, 14486, 60210, 5648, 44265, 1642, 53043, 8991, 37986, 6521, 26741, 47676, 36422, 10530, 30661, 38740, 63965, 8908, 17650, 46654, 5723, 34202, 42858, 19115, 59300, 4193, 17982, 53880, 6615, 15922, 40856, 10152, 17570, 49005, 23151, 47831, 32892, 18232, 45185, 10488, 22641, 58977, 49830, 27588, 35948, 54670, 20521, 50636, 1195, 58318, 42312, 9052, 26852, 40320, 10805, 23091, 15614, 44217, 22171, 55001, 39739, 3235, 36865, 13955, 35357, 10978, 60447, 31132, 5569, 38807, 8454, 41785, 18388, 33596, 50670, 21362, 59553, 26622, 38402, 19226, 33780, 50991, 5474, 31661, 64385, 28121, 11030, 48857, 19661, 42226, 27290, 47347, 8501, 41425, 20307, 1324, 24262, 62932, 51304, 37979, 61667, 21593, 55131, 13256, 36452, 27942, 10177, 41492, 33041, 47309, 981, 52344, 13021, 42529, 6345, 15681, 53910, 180, 34289, 10010, 65177, 29434, 56864, 5963, 19251, 48504, 24658, 9077, 41684, 32285, 22864, 64988, 7355, 28757, 13480, 53177, 21290, 9463, 40297, 57185, 23206, 43537, ++ 32217, 14476, 51910, 30548, 6438, 44764, 34103, 54950, 13692, 48469, 1831, 61066, 42384, 32111, 57946, 48960, 11031, 64253, 5479, 45603, 33146, 60625, 6767, 39319, 30546, 13201, 41902, 23515, 52316, 33016, 17020, 41014, 7858, 56127, 3721, 60962, 15770, 57918, 196, 53929, 25877, 65093, 16657, 50449, 29487, 63012, 26561, 55491, 845, 61222, 40894, 34042, 16399, 44873, 21271, 58663, 2221, 52907, 12286, 44175, 37009, 28481, 35300, 17368, 52084, 22088, 39074, 44642, 29593, 41922, 64915, 18905, 40339, 20401, 63393, 34798, 14085, 50907, 42086, 27277, 819, 21758, 13175, 49597, 56295, 26639, 54352, 38747, 10339, 21399, 2407, 45277, 11071, 50996, 4710, 36257, 46797, 1804, 55354, 13331, 23770, 53299, 8563, 40256, 55963, 11585, 49064, 6137, 37688, 52056, 46500, 16909, 56267, 30452, 38246, 55058, 32821, 19622, 48953, 25297, 42626, 17044, 62455, 31689, 56304, 15508, 22710, 43973, 50756, 13327, 28413, 45226, 36943, 23685, 55744, 26341, 12467, 51642, 35087, 14028, 50455, 23530, 44068, 58800, 26076, 45780, 63185, 34761, 6079, 43000, 15538, 62489, 25808, 52496, 38948, 14993, 8573, 43600, 3158, 17516, 64617, 28016, 45702, 14278, 19778, 63603, 5645, 57689, 30853, 61863, 35106, 4585, 58211, 8632, 24881, 51487, 19855, 47815, 575, 50226, 41142, 20261, 59371, 12813, 61904, 31732, 149, 58073, 10674, 39864, 17302, 54503, 2496, 60724, 8297, 25055, 47144, 21529, 16594, 57964, 32641, 14402, 61953, 7308, 55510, 16975, 62550, 22769, 49896, 32133, 57072, 4932, 18277, 33498, 123, 47673, 7221, 63507, 18738, 49387, 62123, 5911, 54192, 11337, 37532, 17191, 61507, 33408, 59389, 46740, 30976, 43652, 20731, 51264, 37014, 12816, 25448, 40719, 60300, 3967, 30786, 51834, 2132, 47701, 12008, 39483, 33059, 61460, 23684, 38009, 31131, 62320, 16314, 29879, 7872, 53899, ++ 5535, 63114, 22178, 42504, 16682, 50551, 2852, 24083, 41602, 21823, 36911, 24825, 12419, 52986, 22824, 2306, 43588, 37235, 14566, 57342, 17514, 28313, 42687, 63844, 2022, 50635, 18122, 59345, 11176, 57086, 618, 62452, 23869, 38428, 53122, 27912, 8949, 24672, 39801, 12788, 48135, 6077, 37606, 11632, 56879, 14783, 45015, 19685, 37888, 29860, 5811, 23368, 50671, 31341, 9023, 41841, 33438, 24435, 61313, 5391, 49982, 15026, 63107, 42548, 11240, 47416, 8310, 16790, 62341, 4241, 27100, 51538, 129, 45529, 10403, 43140, 3334, 25206, 18163, 54789, 35500, 52798, 43510, 4894, 33256, 14281, 35847, 17400, 61179, 41392, 53180, 37589, 58692, 24048, 43205, 12177, 51581, 21530, 32486, 44975, 2881, 34464, 62500, 4125, 20087, 45936, 26976, 60606, 19057, 34703, 7596, 59696, 20706, 12080, 49939, 22928, 2480, 64038, 27556, 10244, 60676, 35758, 23475, 44749, 1027, 39703, 64441, 5023, 18710, 60420, 7143, 53498, 829, 62918, 16361, 40117, 61086, 8244, 30509, 63489, 39551, 11546, 33786, 1637, 55959, 7246, 28738, 13751, 56984, 29508, 54537, 7696, 40994, 3989, 55770, 30722, 63355, 24115, 57187, 32604, 38215, 4349, 22891, 60539, 33845, 28800, 37723, 17050, 49608, 36, 52290, 26401, 47187, 30242, 63370, 11996, 59028, 29123, 62175, 23933, 7469, 55290, 25593, 44317, 17494, 49151, 27047, 45616, 29653, 62670, 9428, 32068, 47615, 22660, 43643, 56896, 12142, 41829, 51841, 6538, 38859, 45266, 23179, 37244, 31308, 3959, 38448, 11408, 54626, 15687, 42912, 28405, 46792, 10859, 52528, 25815, 40749, 28987, 43873, 1439, 31483, 38270, 20217, 64751, 30101, 45436, 24824, 4603, 21991, 10984, 26423, 13615, 57998, 24424, 7019, 44932, 62065, 16536, 33279, 43882, 55274, 20964, 60887, 27761, 56744, 15592, 50714, 1726, 47993, 11395, 51553, 5124, 49255, 36521, 59393, 18812, ++ 38877, 26609, 46334, 10166, 61600, 25467, 38235, 62611, 8088, 58600, 6916, 51461, 45019, 9164, 39228, 61956, 26938, 20332, 52268, 38394, 8891, 47644, 22330, 11775, 53361, 25486, 44505, 5113, 28878, 39657, 14832, 47386, 34415, 13428, 20458, 36324, 44148, 62772, 33110, 56430, 20077, 59879, 27531, 42716, 23561, 34454, 7525, 58842, 10865, 52586, 46588, 62311, 3995, 53958, 26205, 64966, 6337, 48477, 30036, 18470, 54800, 25899, 6820, 23765, 54156, 27792, 57042, 33298, 20859, 49715, 11718, 33948, 61235, 28859, 53352, 22984, 59156, 32785, 65339, 5949, 46213, 10159, 30051, 58606, 22296, 64583, 3077, 50499, 27741, 6186, 25520, 15817, 7306, 48986, 31407, 64275, 27978, 41156, 9047, 61227, 26057, 48566, 17015, 30825, 58948, 35454, 2201, 43344, 12851, 53664, 25545, 42431, 31775, 62830, 6417, 41095, 47105, 12726, 40193, 50956, 30911, 3337, 54007, 11672, 59466, 20042, 29647, 54284, 34830, 24511, 56653, 32065, 21034, 43260, 29380, 3267, 48726, 22775, 44879, 411, 27762, 61810, 18672, 47389, 37287, 20848, 60700, 40265, 25252, 1300, 36264, 50842, 20198, 34125, 13359, 46733, 19196, 51604, 6273, 48039, 12091, 52891, 42952, 7849, 51757, 2043, 55729, 41877, 13469, 33250, 19231, 43269, 6233, 17864, 41346, 34147, 6777, 42999, 16096, 38157, 33091, 46504, 2630, 36490, 63194, 4657, 53056, 14889, 6611, 42764, 49964, 25917, 13466, 52331, 4481, 30689, 62242, 1648, 36344, 24438, 60460, 256, 52726, 10082, 64790, 50878, 30165, 44650, 5957, 34966, 9556, 64574, 20882, 60129, 35731, 14447, 58548, 22895, 11625, 57606, 17750, 51919, 26913, 3858, 48653, 8489, 57177, 36665, 54891, 40306, 63391, 49211, 3434, 39706, 52981, 32035, 2686, 49988, 9578, 26585, 13349, 40030, 17984, 37460, 4666, 30015, 45647, 19976, 34991, 60031, 18352, 26925, 45194, 2328, 24627, 50014, ++ 13216, 57902, 687, 35960, 20062, 56433, 11798, 31745, 15795, 46503, 33460, 17198, 64929, 29772, 18835, 6280, 54728, 31557, 1227, 25895, 62651, 4385, 54234, 31971, 36979, 7570, 62038, 31296, 49824, 21783, 54694, 27305, 6244, 50233, 64894, 2589, 49139, 17177, 5168, 29727, 41810, 2134, 52377, 9267, 61640, 3276, 49347, 40242, 16955, 32881, 21670, 12548, 37279, 15647, 43293, 17634, 36245, 14618, 60111, 41011, 1480, 45360, 58909, 32397, 3627, 36417, 12527, 60200, 1814, 38516, 54932, 24028, 8903, 14925, 38136, 17200, 47327, 11328, 40035, 15705, 28148, 60830, 18846, 39262, 7644, 44600, 30805, 42166, 12581, 46575, 59644, 30324, 55776, 20456, 3192, 16286, 53809, 6016, 59294, 15504, 38964, 10822, 42590, 50570, 13884, 52899, 22419, 64702, 30129, 4438, 57563, 15274, 802, 36159, 24400, 58064, 17502, 34915, 56686, 7447, 18441, 48099, 14669, 41280, 28059, 49120, 9364, 14104, 46927, 2828, 40632, 12183, 49818, 9966, 57933, 33287, 20218, 55225, 15740, 57224, 35802, 5452, 52839, 29926, 13070, 50001, 3653, 52273, 11235, 61573, 22090, 12341, 64035, 27245, 60119, 1800, 39928, 11124, 34986, 25432, 61761, 31061, 18550, 36658, 46932, 23965, 11387, 25309, 45207, 59574, 9568, 64683, 37204, 53275, 1315, 49366, 27373, 56298, 3962, 52925, 10126, 18685, 57412, 28074, 11425, 34295, 21743, 37885, 56047, 23674, 1384, 37100, 57647, 20182, 34785, 39464, 15602, 26944, 55857, 9260, 29617, 48368, 20720, 40950, 25629, 13634, 19042, 61028, 26443, 58172, 44091, 2660, 38939, 29553, 6300, 50327, 3238, 33990, 53561, 44977, 25229, 7859, 43302, 58317, 15142, 34953, 19219, 50375, 1945, 31803, 7690, 35884, 28884, 14816, 56291, 20041, 38902, 22447, 58419, 36494, 64626, 6425, 53499, 10501, 63410, 41960, 8421, 54964, 24986, 3310, 41326, 56551, 13944, 64416, 34409, 11015, ++ 40870, 27995, 53545, 32875, 47833, 4890, 44267, 52343, 39887, 3925, 55873, 27539, 480, 35750, 56996, 48006, 13775, 44382, 59492, 19627, 35427, 46039, 16018, 58624, 20861, 48883, 14323, 38099, 3855, 63591, 8755, 42851, 57620, 30363, 11887, 41406, 22288, 35073, 51364, 10319, 46122, 18364, 35706, 47596, 21865, 54303, 28532, 24910, 65388, 2488, 44022, 57675, 27842, 59718, 41, 55777, 28821, 51722, 9511, 34220, 20959, 37929, 10222, 41419, 65304, 19867, 51173, 25431, 46369, 30657, 16336, 44306, 57663, 48675, 5597, 60502, 1495, 51959, 21120, 57278, 37271, 2621, 48075, 25360, 51372, 16417, 57541, 8638, 63352, 34353, 977, 40083, 13061, 33889, 57872, 38479, 23107, 35281, 19165, 43875, 29995, 65492, 1054, 32973, 24930, 6976, 41456, 16099, 48320, 39836, 21612, 49565, 44590, 54361, 10508, 29239, 51863, 4289, 21465, 38919, 58626, 24960, 65026, 33601, 6021, 51954, 37503, 62721, 25910, 42545, 17272, 59188, 26932, 37913, 11391, 52223, 6343, 38822, 31893, 9579, 48133, 21974, 42316, 9065, 64603, 32655, 43230, 19409, 38492, 46525, 31663, 44301, 5043, 49384, 17825, 42239, 29175, 58660, 44801, 15949, 627, 55126, 10186, 64225, 16235, 57331, 32105, 62541, 7134, 21546, 48730, 28214, 14133, 31693, 60284, 21169, 13030, 45992, 34883, 21930, 65499, 40107, 14298, 51785, 42453, 54769, 8873, 60959, 18970, 35150, 65199, 16700, 44770, 6899, 63291, 10262, 45947, 49453, 18458, 65292, 35058, 14854, 57475, 2840, 46535, 33855, 56248, 821, 40517, 17940, 23734, 53826, 16329, 56556, 41740, 19639, 65229, 39930, 15467, 4416, 35512, 63724, 13761, 39345, 23308, 60627, 12059, 27602, 44449, 16896, 52212, 18174, 61000, 48041, 8809, 28187, 63858, 11795, 46883, 474, 23413, 45402, 34153, 49448, 25753, 21897, 58085, 13031, 39075, 62965, 33515, 8956, 29078, 42981, 20693, 60994, ++ 4082, 48725, 17692, 7508, 59613, 14806, 29338, 18673, 25975, 60740, 21064, 38726, 50119, 11577, 41309, 25191, 33951, 7882, 40215, 12600, 50364, 30052, 9668, 41451, 912, 27742, 55726, 19071, 45184, 24957, 35740, 17761, 1687, 45531, 25598, 59086, 7657, 55407, 14249, 63655, 24162, 58301, 15506, 30502, 39043, 13551, 45707, 8361, 36531, 56120, 25772, 7087, 49735, 20552, 39914, 11768, 45896, 3143, 56817, 26825, 63987, 13614, 49126, 29354, 15546, 43626, 5885, 40510, 10741, 63602, 5007, 35906, 21471, 31709, 42460, 25809, 35113, 29252, 45097, 8525, 23562, 63763, 11957, 55418, 350, 37942, 20278, 47645, 24319, 18245, 52036, 22502, 62765, 42511, 10034, 45779, 165, 49385, 56574, 4569, 52214, 22143, 57114, 17868, 60303, 29007, 55511, 9550, 33937, 11220, 61655, 27800, 8320, 18816, 65295, 37406, 15649, 43656, 61941, 31995, 438, 45479, 8104, 22361, 57343, 19168, 2205, 32498, 55455, 7757, 48255, 35869, 4336, 63742, 44359, 24768, 59911, 13602, 64819, 41321, 17153, 60494, 25018, 54163, 2305, 24044, 16687, 58078, 26798, 8341, 59241, 16177, 37935, 56664, 9195, 23224, 53827, 5421, 20910, 50108, 38800, 26085, 44404, 3705, 40048, 20331, 4903, 50446, 38971, 56613, 2804, 40737, 23503, 55434, 10402, 39338, 63926, 24448, 8198, 58462, 30410, 5229, 48569, 20704, 1769, 25002, 46977, 30557, 3354, 48211, 12336, 54003, 28890, 40556, 24051, 53348, 28348, 3661, 42847, 22448, 5810, 43954, 17355, 28661, 59817, 6948, 36643, 52033, 13073, 48098, 7818, 33208, 49530, 12014, 24782, 46189, 8354, 27147, 60884, 30491, 47986, 21272, 54540, 32395, 401, 46466, 41010, 62865, 10363, 58782, 22975, 42270, 1122, 33834, 23747, 43114, 4444, 54084, 29710, 16098, 51068, 31477, 19582, 14686, 59664, 1005, 36216, 52142, 5901, 16636, 47002, 22350, 54632, 6557, 51296, 15903, ++ 44053, 21614, 65230, 37795, 23453, 41895, 63677, 1286, 54136, 8726, 43156, 13352, 59044, 23653, 3537, 61398, 16449, 65332, 28682, 56060, 3172, 60841, 24296, 34493, 64457, 43514, 10228, 33416, 60152, 12831, 53550, 32225, 61447, 19492, 52131, 15213, 31657, 44786, 26988, 38544, 3905, 32575, 53367, 6458, 64306, 492, 60229, 20291, 51439, 14974, 41700, 34735, 10094, 47509, 32201, 63387, 19299, 39153, 22840, 47208, 4408, 52622, 21721, 55477, 429, 61027, 34907, 53462, 18659, 47804, 28572, 55980, 2534, 64511, 18007, 54282, 12628, 62672, 4399, 41239, 50318, 32041, 42924, 15280, 33625, 62106, 29427, 4167, 55096, 36742, 9452, 48335, 5139, 28801, 54534, 26540, 61778, 14317, 25288, 36676, 12653, 46659, 8113, 37973, 45233, 5073, 36961, 62655, 24117, 51542, 3068, 38513, 33203, 47789, 26107, 1900, 59353, 9473, 27133, 13722, 55247, 36572, 30025, 43082, 16457, 45837, 39248, 10900, 21340, 61279, 23099, 14547, 51178, 19332, 31113, 1461, 47086, 28276, 2691, 25835, 50984, 4707, 37771, 15209, 45486, 35557, 55489, 5786, 48475, 33595, 2799, 53178, 24470, 30059, 35717, 62717, 14119, 28333, 65071, 31556, 12753, 60978, 21766, 29678, 53676, 47720, 35468, 27202, 14879, 18251, 36133, 54089, 4252, 44859, 17227, 29788, 2300, 50852, 42074, 16793, 45167, 26290, 35675, 59774, 32433, 64522, 13283, 39016, 58646, 27683, 41563, 5090, 32744, 59261, 751, 17850, 61143, 37628, 12955, 50601, 59071, 31903, 54215, 39193, 12412, 49175, 21973, 24978, 32428, 63321, 27630, 58932, 1625, 37161, 62314, 17418, 31988, 55285, 9969, 50922, 16599, 2361, 41906, 9464, 51415, 29495, 5766, 25376, 38641, 5017, 53467, 12576, 64808, 45801, 15431, 59829, 35413, 18639, 42115, 57438, 8042, 61720, 3146, 43288, 28550, 47264, 17443, 27256, 44228, 30313, 58473, 368, 39781, 32468, 26065, 55999, ++ 36770, 9214, 30104, 2699, 55365, 34737, 10412, 46961, 36345, 30646, 49497, 5966, 32408, 45703, 53226, 21955, 50965, 4978, 46687, 23232, 18501, 42281, 6555, 51938, 16763, 22862, 58029, 2749, 40588, 26659, 6710, 48434, 10804, 40022, 4487, 37147, 61866, 935, 50770, 9728, 60557, 43543, 11997, 26082, 41057, 32056, 17707, 43026, 31109, 4335, 63796, 18762, 60911, 24093, 5093, 51112, 35569, 7603, 61888, 16250, 42979, 33784, 7273, 31285, 46083, 23149, 14007, 26667, 59268, 7879, 41611, 13435, 50618, 9797, 36932, 7137, 49086, 19783, 55829, 26227, 16956, 6479, 36181, 59035, 27017, 10890, 53622, 40659, 14678, 26326, 60652, 32870, 17121, 50034, 7764, 18413, 32232, 41843, 6763, 63172, 27540, 33607, 54795, 10564, 20974, 50139, 26655, 288, 43939, 17266, 56519, 14123, 60068, 5206, 41788, 53183, 22184, 46346, 34145, 49351, 20328, 11358, 53430, 3983, 63509, 26542, 60127, 50485, 30440, 237, 34102, 65524, 28908, 8622, 41944, 54375, 18294, 36707, 53335, 43700, 12011, 31329, 58543, 28548, 62400, 10684, 39799, 30597, 14685, 65155, 41380, 18821, 47461, 806, 43423, 7413, 48707, 37534, 2951, 56257, 41260, 6537, 34532, 58834, 8507, 13146, 59989, 937, 44044, 65307, 30670, 11799, 61347, 26011, 48294, 57120, 37583, 14759, 32039, 54248, 873, 61503, 9502, 15729, 40933, 6051, 44133, 22879, 8013, 51125, 16020, 61814, 10904, 21187, 46806, 34081, 8544, 55217, 30375, 19840, 40023, 10663, 23604, 4551, 63805, 42629, 9005, 61551, 3613, 37830, 14985, 45197, 21723, 31193, 4749, 48829, 38553, 717, 43043, 23879, 36934, 61918, 28255, 56704, 18434, 64397, 20857, 55710, 34546, 47414, 30367, 26000, 37403, 6513, 51721, 30615, 11239, 48865, 5599, 37765, 27070, 41135, 34784, 56157, 7170, 54225, 32823, 9716, 64092, 14286, 37294, 19164, 61899, 12319, 46104, 1533, ++ 31323, 58802, 50347, 40619, 12769, 26823, 51143, 22525, 14092, 64700, 16968, 28215, 63001, 9538, 35086, 14992, 31200, 39608, 10460, 36144, 49360, 32277, 56876, 38998, 5731, 47263, 29633, 51066, 15655, 46400, 65154, 38775, 23329, 56635, 29038, 47096, 21227, 18083, 40709, 23001, 34255, 19147, 48839, 57355, 20905, 50031, 5453, 54849, 11317, 48326, 29146, 38817, 1875, 55127, 30351, 14134, 25184, 53615, 31620, 11432, 27518, 57785, 18135, 62766, 9207, 39329, 56706, 3043, 32861, 22191, 61786, 24845, 39576, 27704, 58398, 44005, 31250, 38904, 13809, 34148, 61390, 52346, 20751, 3785, 49458, 22903, 43785, 6912, 65255, 45158, 2126, 38837, 44117, 21261, 64748, 37476, 59953, 11338, 47934, 19910, 51236, 2371, 23635, 40912, 63930, 14752, 47503, 58672, 19728, 35951, 30969, 45562, 23271, 50649, 13231, 29733, 39524, 16667, 63863, 5792, 40806, 61529, 24198, 47357, 13058, 34599, 5384, 15335, 43876, 52973, 40338, 6677, 45408, 57460, 33698, 14884, 62195, 21568, 8027, 34554, 56865, 19740, 46379, 7502, 21302, 49236, 105, 59842, 22872, 9829, 28111, 54896, 11827, 61136, 21449, 57900, 33225, 17104, 45841, 9619, 19600, 51272, 48416, 15278, 33454, 41641, 24321, 52700, 29243, 7591, 22515, 51350, 34723, 5497, 33553, 8751, 19106, 62483, 6419, 22678, 38343, 50063, 29307, 55675, 23407, 52647, 17664, 56754, 33371, 19606, 45377, 26463, 36692, 50236, 14668, 56530, 25272, 44532, 2225, 52906, 7723, 62380, 35904, 45794, 27037, 16133, 53509, 29825, 47116, 19939, 54774, 10284, 64330, 42438, 52866, 13535, 22495, 56099, 20063, 59048, 6582, 45634, 12378, 34113, 37868, 7265, 48989, 15932, 2600, 19826, 62264, 8289, 56980, 19384, 40444, 2303, 63077, 24941, 59270, 21215, 52637, 10208, 24055, 12632, 65328, 20565, 40132, 49889, 3840, 53301, 24337, 48561, 7736, 52486, 22808, 63288, ++ 15295, 25125, 6187, 19506, 45528, 60062, 5376, 58183, 33221, 2406, 56607, 38018, 19232, 47434, 1617, 42797, 57672, 27126, 53794, 253, 63190, 11250, 14010, 26335, 61298, 20035, 37463, 8312, 54343, 21534, 345, 16614, 33976, 9152, 59961, 5950, 54489, 30942, 64665, 6965, 55917, 28785, 1744, 36849, 8705, 63165, 24570, 33709, 59358, 23079, 16088, 53019, 45175, 12850, 38052, 58490, 42347, 1067, 44572, 59493, 40211, 2295, 50846, 36690, 25069, 50095, 17475, 37636, 52213, 43342, 1007, 34478, 19318, 52865, 14706, 23289, 617, 59880, 10555, 46882, 1933, 30487, 45618, 39785, 13388, 60315, 17630, 35344, 31027, 19715, 56053, 13933, 58401, 3615, 29792, 47016, 1638, 53111, 28438, 39661, 16612, 43126, 61417, 29498, 3408, 39239, 32043, 12221, 54080, 5896, 64480, 8954, 20196, 32373, 63270, 7319, 55882, 3515, 25638, 51351, 28598, 1520, 17818, 38071, 56087, 23324, 42120, 58295, 27496, 18077, 12757, 55002, 22094, 3610, 24145, 49506, 5637, 39981, 63090, 23234, 988, 40724, 14213, 51738, 34297, 26425, 43911, 17453, 50691, 36972, 45252, 4436, 34699, 15404, 40448, 26504, 10827, 52400, 25043, 59496, 36325, 23679, 1554, 27651, 63861, 17599, 45498, 10705, 38054, 57965, 46618, 15668, 42495, 20575, 59302, 43544, 52470, 28628, 46887, 57827, 11714, 19467, 43200, 3691, 37255, 10528, 48921, 443, 63749, 39686, 2714, 52121, 7399, 31269, 64313, 4266, 41278, 13706, 63501, 33098, 26051, 47973, 1138, 19261, 58359, 33549, 1955, 41504, 11218, 50528, 5528, 40113, 28778, 7474, 25532, 59918, 35115, 10779, 47005, 32777, 14700, 26104, 49730, 3636, 53139, 24199, 43818, 32109, 60334, 41407, 14120, 45133, 21713, 50087, 27311, 54726, 32677, 14346, 46249, 1595, 17110, 62675, 44844, 38417, 30714, 1988, 61122, 23075, 42783, 31849, 10565, 60322, 28091, 35252, 42087, 11470, ++ 38321, 53010, 33807, 62114, 16377, 28872, 39369, 20256, 44707, 24419, 41000, 4469, 54570, 26488, 60204, 20482, 7162, 63977, 17268, 37769, 21193, 45300, 29219, 55124, 1797, 43034, 12361, 62854, 30660, 36261, 58695, 28346, 52786, 43817, 13906, 26275, 42604, 11084, 35936, 51753, 13308, 46803, 62425, 16421, 44461, 38228, 14339, 46244, 1200, 36083, 58024, 6723, 33161, 22294, 62662, 8463, 17239, 49470, 20239, 10446, 23642, 47695, 29035, 12704, 44911, 4763, 64730, 29680, 8747, 15836, 48233, 62968, 6372, 45786, 4037, 63962, 49950, 29827, 21850, 53818, 24751, 64855, 9548, 56912, 28713, 1287, 46355, 52919, 5512, 50946, 10512, 25120, 31721, 52476, 23415, 12050, 34820, 15358, 57388, 3911, 59436, 31640, 13200, 52629, 18731, 57724, 7888, 42181, 25364, 46756, 28131, 40463, 54966, 1119, 43268, 18288, 35581, 58978, 44847, 10775, 37035, 57896, 49741, 31197, 8703, 52484, 2991, 36085, 9690, 62069, 37238, 25442, 47763, 39019, 60843, 12976, 46140, 29718, 10299, 48380, 54653, 33005, 64251, 3848, 56269, 12567, 63596, 32412, 6404, 57361, 19842, 63058, 31213, 51862, 46188, 2231, 55359, 39327, 4631, 30266, 14616, 63144, 43770, 54511, 5750, 49750, 3351, 62016, 32528, 19428, 1893, 62950, 9816, 49898, 322, 24766, 12680, 39912, 3136, 25770, 33942, 63560, 13812, 47463, 31368, 62614, 28266, 36008, 25487, 11961, 60556, 22319, 58011, 43356, 18767, 29255, 48776, 23045, 36956, 15873, 57037, 11833, 31062, 50047, 14064, 55623, 21002, 35419, 60392, 23238, 56780, 34387, 18619, 57913, 44303, 2970, 29955, 64037, 5410, 52129, 40643, 63208, 21924, 59618, 30901, 13165, 57802, 10913, 50777, 27823, 55332, 33089, 610, 36129, 12139, 43996, 7456, 39332, 55763, 33624, 48417, 29157, 4264, 50383, 15724, 45953, 11663, 35705, 18190, 57013, 38784, 2595, 44995, 17173, 4757, 57616, ++ 47364, 3412, 43373, 9924, 49213, 108, 53418, 8257, 61225, 12229, 51840, 15619, 43991, 10916, 37045, 30159, 48341, 12908, 43731, 25577, 58876, 4090, 48712, 33738, 18203, 52508, 35260, 23967, 4302, 44201, 11515, 50131, 3371, 20747, 64000, 49626, 16116, 57825, 2920, 25086, 39476, 4984, 22518, 53729, 26778, 3644, 56677, 27952, 51889, 9848, 40637, 27180, 50362, 3437, 46748, 26347, 56312, 28404, 65137, 34570, 54087, 6208, 60605, 19576, 58310, 32253, 11856, 46665, 20679, 55158, 30428, 12185, 56395, 33569, 26906, 40720, 15986, 36460, 43241, 7420, 41751, 15035, 38214, 19201, 34707, 63050, 23971, 12316, 39370, 28066, 41682, 64027, 36011, 8300, 40778, 56349, 24524, 45404, 20762, 36145, 9649, 44761, 6298, 25849, 48645, 35162, 22762, 60958, 16369, 2094, 52326, 11646, 34650, 57287, 26795, 48793, 12339, 30628, 21149, 60895, 14787, 32999, 7041, 64680, 16115, 29337, 46436, 20596, 48650, 31616, 1941, 58927, 15881, 9270, 27984, 52018, 17732, 58207, 38396, 16433, 27355, 6884, 19019, 41755, 23802, 39095, 8681, 52745, 42101, 25688, 13177, 49656, 23593, 7017, 18342, 64353, 32286, 13524, 61656, 42867, 47268, 7969, 31870, 18889, 37152, 22048, 39555, 26557, 12448, 54825, 41039, 23257, 36925, 27935, 64265, 32752, 55967, 18048, 60753, 41610, 7796, 51595, 27094, 60093, 7089, 20381, 46075, 15174, 53732, 30068, 42150, 14201, 34438, 1884, 38689, 54601, 9573, 60221, 5660, 51288, 42370, 21490, 64204, 38219, 5152, 44845, 27967, 6728, 43583, 16712, 30822, 323, 51486, 14230, 36188, 49997, 17073, 39552, 27426, 19020, 1783, 35693, 8882, 42682, 17530, 40186, 1289, 36539, 22367, 4160, 39149, 9880, 65532, 16756, 58218, 22693, 61407, 17819, 26137, 9161, 36800, 13731, 60159, 21586, 55449, 25413, 58693, 5334, 48162, 15011, 26417, 55071, 19837, 65029, 49623, 29425, ++ 21138, 18001, 27596, 56788, 35662, 24754, 42598, 31467, 18397, 48528, 29501, 34563, 22967, 62336, 14280, 52618, 2873, 55572, 34241, 8440, 51561, 15429, 40897, 9040, 64752, 27589, 7072, 47982, 59566, 18744, 56091, 24496, 41687, 31846, 34712, 1473, 37984, 28032, 48093, 17438, 58890, 30122, 41923, 10496, 61070, 34984, 12388, 42255, 18520, 61770, 20700, 13698, 64371, 35265, 18027, 41127, 5644, 37367, 14725, 3893, 38480, 16681, 41999, 35680, 1577, 54438, 22679, 40071, 58815, 3554, 42314, 24284, 38058, 17588, 59364, 9251, 54684, 5346, 62282, 18329, 58008, 27435, 50752, 4945, 47905, 8833, 32626, 59515, 21999, 57673, 15731, 682, 49192, 16762, 62384, 5706, 50632, 8676, 63579, 30289, 55159, 21796, 64893, 34232, 1364, 55809, 10209, 30400, 50336, 38729, 21323, 62100, 24483, 15122, 4777, 38154, 52697, 8521, 42329, 2603, 48224, 19690, 44307, 25222, 41385, 59527, 12488, 63229, 6164, 53841, 19859, 44621, 32680, 56379, 36377, 604, 26119, 35218, 3397, 59694, 45082, 61387, 30186, 50183, 16036, 47706, 29226, 20622, 3171, 61994, 38737, 1224, 42566, 60355, 27826, 36765, 20392, 50355, 22476, 201, 26992, 53110, 11623, 57576, 28582, 60601, 9088, 56454, 46114, 6106, 31205, 52012, 16458, 47918, 13575, 44659, 4952, 36300, 11108, 49221, 21679, 35291, 2020, 18341, 40435, 56244, 4449, 59004, 9041, 49615, 5438, 55476, 24339, 47794, 12648, 26804, 35511, 20498, 46322, 27791, 3265, 40366, 24592, 8689, 59485, 17786, 51725, 65040, 38773, 12568, 62898, 46407, 26510, 61797, 21124, 9116, 54041, 12910, 61223, 44758, 57395, 28688, 54983, 4848, 65053, 47727, 26663, 61634, 46053, 17964, 59109, 48152, 24637, 52763, 5163, 31687, 41583, 3534, 64159, 44475, 18918, 53762, 32350, 6254, 40748, 8542, 51693, 29601, 36978, 62473, 6835, 41461, 31035, 8801, 34522, 12936, ++ 54345, 32687, 64191, 7044, 21866, 62725, 11266, 55212, 37357, 6467, 64328, 863, 57262, 7619, 41732, 32797, 23916, 40418, 17955, 62094, 30841, 22092, 60332, 24686, 38576, 13519, 57432, 41179, 14960, 32884, 39303, 5552, 62265, 12986, 46208, 19954, 53846, 8474, 62959, 33486, 45329, 15034, 50505, 32790, 19609, 47801, 22045, 64787, 5970, 30852, 44741, 54556, 8994, 29777, 59913, 12081, 52337, 21901, 44070, 57225, 24691, 64210, 8215, 52945, 15226, 27314, 48615, 6746, 35190, 28116, 60848, 10138, 51400, 2141, 47110, 32475, 20189, 48468, 28242, 33179, 2986, 44383, 11762, 56232, 21349, 54182, 42680, 2778, 48596, 7499, 33965, 54852, 27157, 44484, 19414, 33063, 38348, 26208, 42356, 478, 14617, 49509, 40372, 18071, 46030, 15623, 62992, 43545, 4076, 59579, 36437, 6643, 47241, 41177, 61350, 19389, 23787, 64939, 33676, 53956, 22855, 39927, 10073, 54865, 728, 21698, 38629, 33361, 26237, 42737, 11777, 64085, 4611, 41119, 20819, 64953, 43392, 11182, 50797, 20353, 32090, 11689, 37492, 5295, 59114, 1678, 60944, 34954, 54039, 14449, 29657, 56437, 21832, 10388, 53621, 5567, 44714, 8930, 58420, 38301, 56009, 21108, 40646, 2562, 46805, 13701, 50911, 35029, 20181, 25450, 58569, 3634, 60412, 6891, 38741, 21044, 53891, 24016, 64843, 30897, 16275, 55176, 45048, 53324, 12522, 38602, 24644, 32219, 43575, 20954, 38023, 9971, 65004, 17185, 51618, 62958, 67, 58583, 11465, 32347, 61325, 16856, 54993, 47451, 29356, 36506, 9780, 25384, 2733, 48512, 24024, 8439, 41041, 4285, 47643, 33311, 24473, 42143, 6968, 23086, 11435, 48331, 16269, 33652, 20448, 9631, 54321, 15055, 6200, 35177, 29859, 13500, 42582, 29032, 46763, 10721, 52091, 28417, 49367, 23269, 57773, 140, 42378, 26757, 63715, 34316, 20105, 43791, 743, 22006, 50640, 13574, 59076, 23985, 40373, 61373, ++ 960, 46560, 13794, 50902, 38680, 15127, 47755, 3131, 25671, 59180, 39997, 27730, 45358, 21385, 49882, 5502, 59795, 9979, 47009, 2149, 36577, 44824, 5276, 54062, 1039, 45914, 31522, 22407, 2320, 63427, 9820, 48977, 17646, 55318, 7294, 60715, 23680, 43341, 12224, 21453, 131, 55054, 7902, 59616, 2353, 52649, 7405, 31768, 39271, 49226, 2771, 37104, 25490, 43419, 353, 48059, 32733, 62365, 30729, 9414, 48931, 33268, 21342, 30115, 43744, 61584, 10847, 63732, 16989, 49564, 14430, 44438, 21216, 65529, 25647, 13090, 57135, 37559, 11193, 51673, 35452, 64227, 24413, 31503, 40379, 16193, 26086, 37208, 18697, 61690, 43278, 21035, 11478, 60803, 2517, 58804, 13457, 54246, 17566, 60499, 36841, 27386, 5343, 53735, 28759, 37770, 7116, 33303, 27043, 13655, 17765, 49268, 29128, 9885, 32748, 54516, 62, 46084, 15862, 27371, 57006, 4355, 62324, 27917, 35178, 50323, 4883, 56552, 16732, 51453, 30128, 23760, 50082, 13969, 52792, 7309, 30958, 55924, 24553, 42515, 2118, 53532, 22498, 44413, 26905, 36484, 18130, 11063, 24893, 47008, 35907, 8048, 47936, 33876, 41028, 16312, 48943, 28714, 35120, 17390, 6765, 34191, 15842, 64753, 32807, 42641, 16779, 549, 63477, 43451, 14527, 34275, 40357, 26919, 57303, 30142, 9174, 50518, 1167, 43804, 58738, 5640, 29701, 23743, 33052, 64417, 17053, 50881, 1247, 62166, 27295, 45844, 30953, 40706, 6481, 22004, 44047, 39310, 18185, 52466, 7172, 45493, 34013, 590, 15090, 62049, 42078, 32698, 57440, 20466, 52635, 36873, 15816, 55742, 28168, 65355, 1066, 56919, 37373, 51037, 31567, 38969, 25013, 52404, 43525, 29336, 38174, 25646, 51585, 63980, 7894, 53864, 2865, 37090, 15804, 60705, 20251, 38240, 12900, 6712, 35100, 51363, 11125, 47510, 16455, 2997, 57254, 12444, 64664, 33246, 27404, 45596, 1860, 52037, 5775, 25819, ++ 37633, 23127, 41233, 30455, 4158, 57816, 17629, 33631, 46230, 20724, 9271, 50656, 17396, 35921, 25042, 65197, 29062, 19533, 57969, 27989, 50843, 12030, 32580, 49531, 20600, 61757, 10609, 50421, 53284, 26958, 44991, 30029, 36685, 25724, 40192, 29529, 4018, 56844, 31257, 41217, 65287, 27608, 36372, 24043, 40305, 29414, 43723, 55622, 16769, 23473, 60645, 15367, 53483, 19770, 57474, 23953, 15944, 2553, 50999, 13252, 45747, 683, 40863, 56094, 5268, 25374, 38940, 20088, 33045, 234, 53696, 35999, 31125, 7742, 39890, 45051, 23849, 1172, 60218, 22582, 8251, 14385, 47277, 14, 61114, 6149, 51940, 63636, 13642, 30579, 4523, 37902, 47375, 29077, 39917, 22452, 48187, 4330, 46454, 23027, 51921, 11144, 62223, 12460, 59090, 23881, 51065, 19252, 53410, 45025, 31234, 63761, 2789, 58488, 16976, 44077, 28443, 39072, 6222, 49979, 11909, 36310, 46730, 14311, 52185, 18618, 45202, 10480, 60527, 1159, 39761, 8379, 34743, 61610, 28706, 16912, 48800, 5061, 63386, 15371, 47300, 57150, 13837, 65343, 9221, 51129, 55402, 44954, 40307, 4240, 58734, 16936, 63791, 26176, 3459, 59642, 24605, 65413, 12258, 51451, 62233, 42035, 49326, 22828, 5103, 24814, 53758, 30424, 36565, 8248, 48990, 10500, 53216, 18644, 2468, 61880, 45729, 15047, 39438, 27530, 10207, 36822, 61661, 8482, 42669, 2901, 47945, 11539, 35088, 18557, 57346, 16213, 3447, 59645, 33638, 49340, 15302, 29883, 56086, 23806, 37715, 10452, 57790, 26289, 53241, 22379, 4001, 54336, 13786, 34826, 6154, 59248, 31767, 43214, 18064, 11904, 45891, 30205, 15532, 3474, 62707, 7651, 60530, 30, 13916, 58491, 3185, 44622, 31204, 19083, 40018, 23901, 63313, 20931, 57319, 34187, 1213, 56412, 30124, 64563, 15328, 30965, 61950, 24412, 37997, 48941, 28248, 39594, 16979, 53941, 10130, 56281, 36311, 15982, 44347, 59748, ++ 10794, 55756, 7986, 60425, 43756, 27048, 36481, 63604, 11924, 52842, 30948, 13108, 56138, 3671, 53570, 15876, 38282, 46098, 6796, 39469, 14503, 63668, 26091, 16480, 42043, 28595, 34914, 15964, 37598, 7946, 19593, 61096, 688, 52007, 10359, 47542, 35499, 14606, 51119, 5791, 37375, 9640, 49409, 15699, 63905, 13050, 24865, 719, 58614, 11128, 34175, 47337, 4874, 41494, 10718, 36454, 61128, 39599, 27737, 55571, 19062, 26516, 63080, 13844, 36908, 51583, 2831, 45468, 56843, 41365, 26281, 5754, 57587, 18727, 61459, 4622, 52718, 41157, 16645, 45955, 55253, 39051, 29160, 58922, 17832, 36370, 29687, 9992, 45496, 56739, 24873, 53465, 14938, 6430, 51648, 31283, 10287, 57909, 33718, 7378, 30781, 39044, 20383, 44163, 3008, 41698, 9381, 65176, 650, 56724, 10907, 42640, 22634, 37260, 50803, 7784, 57653, 13361, 63096, 31401, 24790, 60228, 17571, 32187, 6795, 64298, 24382, 29023, 37434, 22549, 47427, 57861, 18455, 2521, 45921, 38110, 21824, 40436, 9800, 36041, 25541, 7639, 33927, 39536, 21059, 31049, 5956, 15148, 64505, 22298, 31799, 43512, 12797, 37875, 54468, 11431, 44064, 1737, 39897, 29493, 3887, 25727, 9877, 55026, 39081, 61266, 11024, 45030, 17892, 55638, 29066, 65050, 22306, 47349, 32339, 17156, 35535, 25196, 56851, 19676, 46692, 14355, 48428, 21239, 57590, 26359, 37512, 60880, 29031, 52778, 7680, 36248, 54140, 20094, 28576, 4788, 61724, 8228, 47075, 2474, 65472, 31514, 19710, 41171, 12086, 49574, 44225, 18926, 28937, 60927, 45346, 11035, 22656, 2268, 60258, 38114, 53027, 19471, 58685, 34674, 21415, 46605, 18349, 55820, 35865, 22875, 62532, 11727, 56316, 1675, 49605, 33335, 9300, 44279, 6031, 50527, 40904, 8853, 43467, 22105, 46485, 39862, 4877, 19444, 59419, 9519, 52727, 22646, 46813, 4567, 42247, 18705, 28969, 63469, 20633, 31675, ++ 16778, 48889, 34909, 19640, 9680, 52306, 1347, 22395, 42010, 4929, 61783, 39068, 31945, 42449, 10714, 47595, 564, 33479, 54749, 20183, 56404, 3472, 35619, 58455, 6213, 55662, 3120, 65520, 23119, 46679, 57055, 14079, 42411, 21934, 64412, 16826, 59242, 27135, 18962, 61522, 25393, 53227, 18241, 45653, 4738, 57017, 46536, 37641, 28620, 51224, 8061, 26653, 65439, 32110, 49963, 28933, 7704, 20799, 42600, 4589, 60183, 34340, 7039, 50245, 18549, 31735, 59783, 12968, 23521, 8502, 62517, 47522, 22336, 50392, 34763, 29486, 12356, 31816, 63234, 26620, 3602, 20534, 53134, 10760, 43465, 49836, 23167, 40976, 1543, 34500, 50259, 8997, 62926, 35657, 17993, 65331, 26837, 41229, 16035, 64129, 2013, 56898, 47807, 32664, 25612, 58198, 35702, 22014, 40016, 24867, 34402, 5573, 55596, 14226, 25943, 35317, 20991, 47729, 18947, 51751, 9103, 41916, 2163, 58577, 40502, 13564, 42976, 54416, 7590, 62793, 15009, 32312, 41825, 26582, 60021, 12357, 55178, 33106, 58773, 18780, 61900, 29049, 52345, 360, 57807, 43023, 28050, 48841, 33333, 9705, 52162, 577, 57526, 17968, 30760, 50775, 19338, 32981, 56789, 15063, 45328, 58055, 30962, 18306, 48136, 7192, 28068, 58984, 4198, 23722, 41805, 1443, 37804, 12017, 59475, 42317, 7375, 51031, 3817, 63052, 33615, 54333, 181, 41125, 15551, 51900, 6711, 22162, 44369, 13313, 41636, 47305, 25152, 11013, 56869, 42883, 37335, 25793, 34529, 50382, 14345, 43292, 5842, 63187, 35294, 30543, 7867, 39693, 50761, 1243, 26834, 39237, 54934, 48759, 34063, 9434, 25784, 5867, 43961, 27778, 53625, 10558, 40942, 26948, 49157, 8570, 32590, 41691, 21544, 47102, 12673, 59983, 28037, 48491, 25317, 14457, 26876, 54430, 17507, 58814, 2431, 25715, 53220, 43084, 14083, 35466, 31379, 7237, 60492, 25194, 61588, 34045, 7635, 47955, 3774, 42865, ++ 53653, 2165, 28506, 65463, 24221, 45108, 32208, 54871, 16188, 48747, 23575, 1956, 60597, 19010, 58538, 27296, 62789, 22680, 11426, 30422, 43118, 23480, 47799, 9512, 44407, 19209, 39934, 29293, 54532, 4443, 33356, 24929, 58316, 5216, 32331, 44639, 2011, 39121, 48343, 11276, 44100, 3162, 57962, 31992, 21014, 35376, 9286, 19355, 62819, 17744, 39957, 56010, 21503, 14452, 6289, 54765, 46307, 63574, 17124, 51790, 37806, 11496, 47064, 23236, 57908, 9581, 42195, 28735, 52490, 37300, 16446, 11567, 38682, 2642, 15314, 55637, 48903, 19540, 6603, 42398, 33833, 61885, 7079, 25534, 32351, 3993, 64946, 12759, 58482, 16918, 27813, 41965, 20233, 55459, 1001, 43621, 12606, 50078, 24693, 35401, 42934, 18961, 8174, 14363, 49089, 16797, 4656, 46533, 12928, 60751, 48072, 20022, 52021, 30109, 62589, 3796, 60449, 37645, 1398, 43445, 22275, 34273, 55393, 26396, 30368, 48412, 3155, 19356, 49172, 35655, 3938, 55586, 10747, 53189, 6483, 23470, 43746, 1337, 27226, 49723, 4065, 41512, 17226, 46762, 24197, 12195, 60063, 2378, 56066, 19530, 49952, 28899, 23925, 46548, 5000, 35649, 62866, 8623, 23327, 47646, 20040, 36110, 1009, 63986, 14314, 33649, 50252, 21375, 38589, 62649, 15370, 51509, 26128, 54651, 4608, 28322, 64014, 13129, 40788, 31544, 6332, 22608, 28740, 65110, 34616, 10759, 31830, 54922, 4042, 64061, 23273, 794, 62727, 32627, 45142, 9199, 17597, 53580, 13120, 22551, 59948, 28229, 48348, 21284, 1822, 56345, 16469, 64733, 24327, 13193, 63716, 17548, 7290, 29448, 20280, 50432, 41600, 64607, 13297, 49551, 2498, 63493, 30776, 5284, 45564, 16073, 50627, 6852, 61268, 17026, 37680, 4682, 55113, 18212, 38883, 62214, 35672, 4078, 47780, 11923, 33011, 61044, 8118, 27707, 55937, 1437, 63182, 44574, 14867, 38509, 11769, 50159, 23767, 39185, 58324, 14217, ++ 26287, 51454, 39652, 5211, 56359, 14554, 40557, 6902, 58106, 28345, 35297, 14883, 44246, 26170, 7801, 40703, 13841, 52049, 37293, 64599, 8143, 53059, 17093, 60942, 25310, 51676, 13248, 43645, 11653, 36056, 51234, 17169, 38346, 49771, 12466, 34194, 52955, 22763, 6516, 56271, 33794, 23249, 38759, 11845, 62183, 26173, 54147, 48481, 4236, 42898, 30574, 1974, 45293, 59251, 38888, 22956, 1335, 33747, 10023, 31214, 24201, 53897, 15757, 44766, 1796, 36212, 17805, 64869, 4268, 46429, 30764, 54565, 27860, 64349, 42776, 9008, 36701, 58238, 28622, 51053, 13268, 35810, 49115, 57420, 15491, 54426, 19978, 46834, 33385, 22271, 60103, 3219, 46095, 32112, 23775, 37039, 61341, 3458, 53283, 9190, 55973, 27897, 60160, 37412, 61795, 29941, 55280, 28360, 52849, 7581, 16077, 39454, 1706, 45680, 11429, 41571, 31797, 10383, 56152, 29627, 65398, 5267, 44729, 20149, 8211, 61377, 33902, 57265, 11552, 25287, 44169, 27786, 46855, 19598, 36796, 64558, 16202, 51796, 8826, 45574, 37841, 13253, 54928, 10214, 62747, 37170, 18591, 41213, 25889, 38556, 6576, 62503, 39723, 7682, 60774, 26812, 14195, 41676, 52567, 6148, 60222, 12991, 51927, 37597, 26332, 43148, 2076, 52809, 12637, 47015, 31712, 7887, 44500, 33979, 20418, 48655, 37215, 23160, 52598, 17802, 59106, 38961, 50303, 4849, 19982, 59980, 46412, 14903, 40052, 30648, 49917, 19112, 39451, 13967, 52193, 23991, 64550, 1428, 58284, 39900, 4195, 54521, 9490, 38570, 60520, 27236, 46032, 4979, 47771, 37264, 30944, 52165, 58156, 40450, 4652, 61961, 14535, 32206, 23688, 36591, 17358, 39767, 12495, 57680, 24363, 64456, 20003, 36412, 27192, 53263, 23385, 42949, 31883, 351, 58332, 10427, 30518, 62981, 23701, 36604, 50857, 13378, 37550, 45755, 21372, 49275, 18489, 29844, 3379, 57547, 32142, 258, 54742, 21473, 9366, 35547, ++ 62408, 18218, 12623, 46876, 29920, 18887, 61107, 26643, 37876, 8586, 64809, 51659, 6012, 49570, 55345, 34136, 4595, 44620, 18351, 1530, 41567, 26803, 33096, 37944, 70, 31335, 60053, 7426, 59027, 21268, 1367, 62637, 9380, 28230, 54959, 20538, 8835, 63691, 42119, 30394, 16515, 59858, 7117, 50163, 42533, 1593, 40846, 14677, 33382, 52453, 13456, 61346, 11647, 34649, 18429, 44245, 58113, 25828, 48271, 58885, 3106, 40133, 29446, 62212, 27071, 55014, 47892, 7503, 33962, 14110, 60519, 1395, 43571, 17329, 25861, 59556, 23055, 553, 44210, 9840, 65188, 22097, 1776, 39824, 44843, 27363, 37746, 5442, 55713, 7824, 38694, 14237, 48849, 9813, 59259, 6963, 29574, 19594, 45855, 21304, 31897, 5036, 51346, 23201, 1157, 40982, 11836, 20628, 38029, 32261, 64355, 27568, 59252, 23399, 49592, 18176, 53675, 26678, 16576, 39650, 12808, 50588, 15240, 53508, 38209, 23056, 15670, 39321, 30829, 65149, 17083, 59129, 136, 31469, 49337, 13501, 29824, 34474, 60569, 19947, 30474, 63837, 23133, 35342, 31564, 4878, 53292, 8222, 61629, 13985, 45643, 32222, 15764, 52979, 20708, 48510, 55578, 2645, 38080, 27433, 31468, 43647, 24084, 8427, 57035, 19547, 59727, 35249, 29566, 5826, 56685, 19012, 58170, 11287, 61165, 15813, 737, 55859, 9656, 26683, 44269, 12184, 16842, 56414, 43136, 27773, 1684, 58407, 25897, 8733, 34195, 55960, 6141, 59088, 2832, 36593, 27475, 42030, 30265, 18856, 44726, 15763, 33414, 51077, 18272, 10728, 32125, 58741, 21854, 9907, 43703, 3321, 15206, 25118, 46798, 35613, 551, 54465, 8105, 59759, 45104, 20836, 51781, 33818, 38511, 954, 43336, 59308, 3745, 34880, 10958, 65150, 15243, 51223, 22300, 45501, 19207, 42052, 7063, 56856, 20611, 29381, 65245, 5564, 33693, 10889, 55160, 37209, 51864, 20230, 41009, 46136, 15444, 63885, 30215, 44812, ++ 6112, 32815, 58947, 37011, 11091, 48096, 2765, 50468, 13530, 45826, 20021, 30280, 36853, 21988, 16811, 29607, 61450, 24459, 59689, 50148, 12707, 57197, 4804, 62523, 21750, 49178, 15330, 27367, 47372, 40735, 30555, 43226, 26192, 45982, 2845, 60397, 37037, 28942, 14204, 52202, 987, 47218, 34543, 28104, 17270, 60288, 29963, 22423, 63447, 6657, 25253, 36734, 50604, 27493, 3814, 53163, 12731, 35969, 8385, 43483, 15023, 65060, 6018, 20410, 39364, 11080, 21686, 29908, 58512, 24994, 40290, 20880, 51975, 10378, 34246, 5145, 48013, 53953, 18062, 38389, 16102, 46611, 30865, 19073, 8608, 62592, 11933, 51283, 24181, 43984, 30161, 64516, 25932, 52378, 16428, 44948, 57153, 38194, 13843, 62549, 40233, 15297, 44399, 10471, 47139, 34039, 63337, 49861, 2458, 44606, 9053, 43208, 14990, 36563, 8415, 64026, 5926, 45292, 59639, 3419, 47158, 23997, 36754, 460, 63573, 45865, 5759, 52576, 1761, 40888, 9172, 22005, 38695, 62019, 5486, 42287, 56785, 3244, 24976, 53939, 6111, 44107, 2773, 50284, 15536, 46054, 27538, 48118, 34605, 21683, 59398, 3081, 42761, 23053, 37292, 10057, 30169, 18804, 63555, 11150, 54072, 4553, 61544, 46474, 3314, 40119, 16079, 9448, 64534, 41178, 24551, 35950, 2921, 39809, 21966, 46302, 42824, 28941, 60511, 34969, 2346, 62330, 37877, 24890, 9378, 49106, 35920, 18026, 44951, 61210, 15899, 28012, 48530, 21371, 31344, 50802, 12233, 46724, 6989, 49755, 35780, 25064, 57227, 6629, 42677, 52804, 36299, 14682, 40848, 55888, 19882, 49310, 33066, 63061, 11333, 56453, 21685, 37784, 47381, 26174, 3947, 61081, 6485, 14728, 28391, 54200, 10087, 30272, 14246, 57069, 46287, 28854, 40615, 7528, 33966, 52996, 2746, 49786, 16168, 44890, 815, 54150, 17931, 40511, 58159, 26323, 41814, 8720, 23336, 64937, 6571, 27127, 51158, 36708, 2878, 56930, ++ 22766, 49807, 594, 20949, 54025, 34304, 63091, 21660, 33316, 59522, 308, 54205, 10080, 63443, 43332, 2531, 48156, 9128, 31660, 34754, 20891, 40266, 14797, 45692, 10239, 39161, 34390, 64058, 17913, 6606, 56486, 13594, 64859, 18564, 41360, 15528, 50587, 4605, 61933, 26547, 39889, 20370, 65013, 13672, 55427, 5687, 51515, 10216, 38525, 57618, 46923, 20161, 8731, 41857, 62064, 16583, 30264, 64043, 19509, 56619, 22384, 49284, 32517, 52684, 3723, 61188, 49831, 43952, 18921, 50816, 6238, 56343, 32138, 46142, 63442, 39462, 14563, 32713, 60736, 25140, 56003, 5673, 52455, 59802, 34948, 2421, 41431, 28840, 60912, 402, 53753, 18879, 4847, 34672, 39617, 21881, 10917, 32827, 49633, 118, 28957, 64609, 34929, 25071, 54401, 17468, 6349, 26267, 56270, 18638, 57422, 30727, 4235, 54809, 33824, 25116, 40741, 21871, 34767, 28880, 61861, 9769, 56752, 32891, 11119, 28297, 59807, 21282, 50849, 26829, 47911, 54190, 14456, 24080, 51198, 10015, 20938, 40050, 48296, 14666, 38935, 17841, 58592, 26665, 56944, 20207, 65018, 1091, 29952, 10537, 36304, 54773, 11872, 64803, 1413, 44433, 59023, 33513, 45807, 21585, 40878, 16881, 34670, 22232, 32093, 49513, 27142, 45937, 20773, 54191, 13404, 49940, 62153, 30718, 52247, 8796, 33117, 5388, 49459, 14039, 47661, 30382, 7575, 53004, 32853, 63419, 12783, 53831, 21760, 5133, 51462, 38133, 10354, 41069, 63585, 16503, 55410, 20828, 60714, 11264, 63932, 868, 29621, 62227, 23582, 3529, 25698, 62644, 168, 33863, 61379, 23306, 8607, 42513, 27362, 16813, 51325, 28830, 10310, 55239, 32890, 41312, 22579, 56633, 42353, 18665, 47569, 52060, 24915, 39408, 2169, 19625, 55608, 26242, 60842, 13676, 32636, 27511, 59583, 25044, 39253, 32074, 9994, 47168, 15841, 2228, 59862, 17203, 48364, 34846, 13271, 59229, 10301, 25371, 47471, 12138, ++ 41613, 27763, 64465, 43531, 24906, 8919, 42665, 4423, 38841, 16431, 41396, 24120, 47240, 32457, 12311, 56555, 35791, 15537, 46321, 6359, 64221, 28283, 52281, 29889, 55982, 5632, 53940, 2098, 32184, 52575, 23778, 48651, 35131, 8109, 57667, 24390, 31634, 46742, 19237, 45208, 10578, 54442, 3715, 38015, 32907, 44944, 24515, 43860, 16298, 28307, 2977, 64591, 32591, 55170, 21785, 40354, 48856, 5200, 38338, 28552, 102, 35352, 12210, 43064, 24534, 34849, 14800, 810, 63881, 9935, 35629, 13415, 23996, 3847, 19883, 52989, 27200, 7920, 40841, 3291, 29850, 42069, 23579, 13725, 26343, 56494, 15141, 48148, 17397, 36558, 11304, 40564, 57499, 28311, 1895, 63789, 54068, 5781, 26473, 41771, 55007, 6801, 18257, 57999, 3681, 31433, 59877, 41989, 13783, 35979, 24173, 48527, 21622, 61180, 12411, 51434, 855, 58119, 13936, 53033, 20536, 42565, 17362, 49414, 25802, 43669, 18050, 36014, 13169, 61054, 33466, 4463, 35012, 44556, 29163, 58065, 32574, 62582, 11447, 28138, 61147, 33591, 8535, 42200, 6842, 39379, 12959, 43315, 58019, 51631, 25335, 17609, 49559, 28444, 57209, 24420, 7049, 15323, 50487, 390, 58505, 28262, 65206, 10338, 55819, 11844, 60799, 5251, 37332, 94, 43839, 27715, 16695, 6605, 25568, 65453, 19310, 57721, 24134, 41317, 20879, 55117, 45535, 18906, 3301, 22936, 39126, 7141, 42443, 65330, 29421, 2066, 60307, 26559, 8031, 34958, 4618, 38863, 31976, 23115, 41458, 17320, 46492, 12840, 37983, 55154, 45067, 12268, 51534, 28045, 6382, 38410, 53764, 2036, 59394, 44464, 5588, 63866, 18959, 43060, 15379, 29708, 48605, 1937, 62275, 7987, 32324, 4960, 63737, 16509, 50170, 61838, 5718, 43890, 9702, 38368, 64740, 5370, 41191, 11439, 63547, 3894, 52372, 24139, 62104, 36167, 28617, 44013, 4361, 30817, 53397, 19309, 33105, 40245, 17565, 60980, ++ 8291, 38108, 15688, 7287, 57402, 17225, 28790, 53143, 25458, 56812, 11737, 62230, 3860, 39742, 19443, 50995, 25203, 58919, 23090, 54392, 10870, 48473, 2709, 18669, 36433, 24249, 42556, 19920, 45139, 11010, 38683, 3604, 22250, 53604, 37845, 395, 55887, 12770, 35773, 7513, 58222, 31178, 21996, 48753, 8933, 19697, 58811, 252, 56468, 35583, 49169, 23746, 15510, 887, 45869, 7234, 24902, 52052, 10597, 60770, 41650, 54285, 17447, 59600, 8789, 57289, 31400, 41094, 26764, 38298, 48536, 62000, 41892, 57799, 30277, 11881, 45232, 62716, 21424, 49585, 58598, 10933, 36940, 64050, 45697, 20839, 50472, 32555, 9309, 63312, 31199, 47714, 13319, 50694, 42549, 25247, 15570, 46374, 61003, 12074, 22554, 48395, 36669, 13095, 52074, 38635, 9685, 22315, 50943, 339, 62358, 6946, 38866, 28006, 46904, 19573, 32609, 48918, 7431, 27169, 38536, 6447, 59025, 35444, 4087, 54748, 8759, 57560, 41659, 6914, 20297, 56072, 11953, 63963, 17517, 2010, 46253, 7225, 35728, 55717, 749, 47578, 21484, 52690, 24698, 32903, 54259, 16799, 22686, 4658, 44861, 33782, 5848, 40220, 16442, 35280, 42355, 62043, 25584, 38966, 13624, 48352, 6459, 53050, 25000, 42462, 17528, 33238, 51142, 22904, 60079, 10077, 47538, 55513, 38309, 14711, 45290, 36224, 10316, 63646, 1341, 26075, 11169, 61429, 41866, 29917, 57024, 48161, 24538, 11606, 35572, 43938, 14498, 53138, 45691, 19392, 47687, 62493, 3109, 53935, 8876, 56646, 32933, 49025, 20567, 8162, 29116, 41756, 19171, 57859, 16218, 46344, 30330, 18515, 35039, 12393, 31247, 39475, 1393, 58353, 8967, 64972, 11582, 35469, 25579, 37295, 57994, 21892, 44781, 34278, 8702, 31414, 36867, 20768, 48972, 24294, 16908, 35988, 54840, 21735, 48030, 34560, 19729, 42834, 6220, 50026, 12303, 62857, 24794, 56662, 1254, 43207, 61857, 4973, 52909, 29285, ++ 55616, 19868, 52107, 31889, 39916, 46409, 60241, 10431, 44108, 6687, 49140, 29235, 17877, 57595, 27850, 5746, 38489, 930, 42735, 17492, 37129, 25765, 43956, 60451, 8765, 63330, 12866, 58066, 26445, 61328, 16341, 59658, 32754, 11985, 28522, 49290, 21130, 40505, 63281, 23572, 43146, 15286, 50953, 25587, 64226, 36098, 12587, 46100, 31449, 18779, 10962, 52837, 37502, 60383, 29595, 57169, 14215, 31843, 47399, 18230, 26124, 6927, 45649, 27918, 37674, 19741, 46957, 5540, 55464, 21994, 2933, 15902, 28425, 7220, 37850, 50176, 2027, 36062, 12855, 33659, 17103, 47592, 859, 31551, 7275, 39258, 4197, 58079, 21639, 43145, 6255, 23330, 62178, 20490, 8483, 59756, 36236, 18558, 33848, 52737, 2706, 59007, 27272, 45363, 20858, 29325, 43758, 65468, 33484, 46199, 17143, 54148, 10683, 42246, 3112, 64769, 37149, 16217, 44001, 63011, 2271, 51948, 30055, 12255, 64619, 31584, 46505, 2716, 29403, 49920, 24876, 45469, 39566, 7821, 37068, 53588, 26061, 18935, 50565, 22811, 44788, 12511, 64219, 36583, 10991, 60253, 3752, 49099, 37526, 63461, 8879, 61306, 26975, 47519, 13133, 51819, 4106, 29315, 9160, 56515, 36847, 20225, 44771, 30616, 1297, 38435, 57660, 7693, 63260, 30106, 18409, 40570, 34589, 1771, 31996, 53648, 3465, 51683, 17401, 31246, 50121, 36662, 58096, 34023, 13476, 51242, 552, 16607, 32108, 57853, 19830, 54660, 23432, 33162, 285, 57570, 28959, 14758, 26160, 37070, 51705, 26996, 15251, 2314, 59123, 34579, 65178, 5357, 48000, 9224, 35963, 64381, 13946, 49932, 57332, 22200, 60733, 48223, 24128, 34466, 50854, 19749, 45723, 52895, 17183, 49883, 13364, 40274, 11852, 54600, 22992, 46959, 13006, 58983, 1327, 56011, 45898, 8322, 51555, 14651, 28949, 60294, 12752, 56182, 31571, 22422, 53874, 7936, 38864, 14512, 45409, 28150, 16263, 49339, 24317, 1717, ++ 45717, 33957, 3968, 62024, 13345, 1166, 23226, 36126, 64084, 31527, 20465, 35056, 52561, 8383, 45013, 64971, 14167, 53318, 28864, 61989, 4012, 57805, 11885, 31059, 46822, 22614, 49868, 33854, 7729, 29728, 47944, 41768, 6075, 62222, 44316, 10055, 59111, 5487, 29474, 53328, 1838, 61208, 11394, 41669, 2587, 53706, 27293, 62572, 7760, 41246, 59747, 5879, 43250, 26743, 12342, 34993, 62920, 44627, 2157, 55733, 33186, 64452, 13769, 51154, 2409, 63140, 12784, 53261, 16828, 59239, 32906, 44487, 54154, 22807, 64554, 18443, 56988, 24601, 55067, 6048, 61274, 27754, 51504, 54727, 18270, 61721, 35525, 25436, 52808, 14585, 55207, 37629, 2938, 33131, 46989, 29877, 3929, 51558, 7973, 24377, 39167, 16665, 8717, 62895, 1770, 56612, 14839, 4900, 26859, 11565, 40574, 31935, 58819, 15463, 52531, 25732, 9513, 57136, 31077, 18751, 41342, 24632, 47623, 22145, 40221, 14741, 23600, 62390, 37567, 16360, 63267, 955, 28403, 59339, 22454, 15114, 41092, 65286, 4269, 31876, 16484, 40526, 28824, 1843, 51251, 19131, 40830, 29364, 14765, 30916, 41812, 19877, 55320, 2204, 64282, 31642, 53823, 46734, 17784, 32562, 2965, 51305, 12458, 35555, 62406, 14830, 28775, 44321, 13876, 48050, 4053, 58703, 23467, 62894, 12890, 21527, 43398, 27280, 59666, 6014, 40204, 16040, 4406, 27013, 47171, 20305, 36939, 62996, 40648, 3872, 46173, 5907, 62023, 12553, 43100, 39803, 9757, 58869, 44390, 21589, 5542, 40594, 61555, 43892, 24747, 13455, 50257, 22736, 31645, 54162, 26318, 2923, 40217, 24843, 7418, 41982, 3647, 16001, 53428, 6779, 27546, 38824, 4433, 23440, 59657, 3119, 29424, 60487, 638, 27861, 63134, 17768, 52512, 30007, 42554, 28303, 18811, 57867, 31060, 1802, 44160, 7403, 26680, 37946, 449, 46593, 40042, 33305, 20478, 50733, 21828, 64322, 9054, 35182, 42349, 60016, ++ 13973, 22331, 48413, 26053, 54519, 30740, 50076, 18556, 2384, 51326, 12969, 41047, 1599, 60879, 33054, 21298, 41847, 30627, 9693, 50548, 33614, 20253, 40920, 55075, 639, 16730, 39779, 4189, 51449, 36833, 1094, 25128, 54815, 18148, 36231, 25938, 16051, 33307, 47655, 17569, 34931, 27759, 39212, 57233, 18313, 30738, 5060, 39762, 22706, 54624, 33644, 21053, 47847, 9601, 50801, 3375, 23131, 16048, 36988, 21262, 40822, 4500, 30540, 56268, 22565, 45035, 25499, 42466, 29300, 8085, 51345, 11415, 34514, 283, 46699, 9480, 31313, 43030, 15629, 43877, 37345, 8861, 22671, 12472, 42729, 28509, 10462, 44544, 1425, 34268, 26965, 45260, 17749, 58633, 12418, 56214, 41030, 27609, 44041, 65002, 30677, 43047, 50208, 32531, 39966, 19328, 47579, 37013, 58283, 53259, 20254, 5395, 28702, 44938, 17942, 35026, 55673, 4554, 46020, 11659, 60695, 8024, 15827, 55949, 1252, 51312, 34391, 19031, 43275, 9611, 32791, 52287, 18366, 42625, 31256, 48927, 10434, 27107, 52080, 37714, 59716, 5371, 56351, 23654, 45334, 32342, 62978, 9477, 55871, 28, 50884, 12094, 34845, 18479, 39206, 20944, 10754, 22555, 59951, 41533, 63846, 26602, 55146, 18675, 47162, 21843, 50631, 2494, 25963, 54859, 32667, 41734, 8560, 29195, 48909, 60926, 8082, 39287, 12414, 54028, 22434, 64698, 43694, 52396, 6489, 59861, 9917, 25642, 13806, 50429, 30994, 17261, 37591, 49217, 25291, 18146, 52325, 33950, 11791, 48232, 64646, 19670, 7606, 30063, 53311, 36733, 17713, 60118, 1529, 39136, 21209, 55370, 44671, 10531, 52458, 37003, 26693, 46144, 31837, 61723, 13727, 56130, 30607, 40733, 10643, 44213, 33533, 20382, 47981, 37922, 6354, 41390, 3440, 35404, 11231, 63841, 4298, 39921, 23097, 62583, 35062, 49419, 57168, 15084, 64136, 18278, 11067, 60727, 2652, 57830, 5847, 37682, 54998, 31280, 11404, 19066, ++ 39105, 65118, 9963, 35724, 5702, 42067, 14436, 55896, 27412, 37384, 59257, 22572, 46634, 26384, 15081, 55532, 7367, 59424, 22231, 38983, 15996, 49397, 7029, 35347, 24846, 61734, 27957, 56828, 14354, 65081, 20778, 50726, 14871, 39504, 2501, 51724, 64641, 42368, 3953, 60563, 9756, 46399, 6320, 21387, 48032, 37309, 52126, 14947, 49648, 4036, 13859, 29059, 65115, 17921, 39540, 59010, 42405, 53649, 8155, 61921, 11896, 50410, 39073, 9360, 36429, 15554, 33619, 1623, 65373, 37085, 20578, 60370, 40634, 17605, 49215, 26073, 58982, 4429, 29187, 53364, 19266, 64718, 40258, 48652, 2741, 59377, 16571, 49782, 65427, 19758, 60351, 7530, 51873, 23988, 38791, 10144, 21081, 61969, 1292, 14078, 55651, 5471, 23466, 11257, 53930, 7463, 61589, 29808, 2837, 24562, 48768, 63546, 37864, 1556, 62026, 13012, 40085, 29222, 22883, 54264, 32298, 36201, 63812, 28065, 44887, 10648, 53384, 5224, 58516, 27382, 56623, 12898, 47275, 5878, 60373, 2576, 57193, 43530, 13650, 9303, 54492, 30642, 18081, 62131, 14180, 6256, 26195, 46958, 21253, 59199, 38260, 23809, 46335, 60515, 7934, 58186, 36442, 49815, 5469, 14594, 23430, 7486, 39685, 4856, 59350, 9027, 33909, 64320, 39412, 16355, 11657, 52668, 20092, 44865, 4779, 35164, 24394, 56155, 29606, 33483, 46651, 18507, 8972, 23776, 38636, 29148, 44676, 55648, 34679, 7714, 59337, 27250, 56323, 2590, 64930, 30489, 6288, 24225, 55694, 1280, 31098, 35471, 57990, 10881, 40007, 3964, 45746, 28412, 43123, 11658, 63449, 17127, 29781, 59891, 33375, 12970, 65433, 20619, 9526, 36221, 43750, 265, 62751, 15580, 55412, 26046, 64658, 7317, 51494, 14931, 57373, 26607, 60105, 21452, 53688, 15641, 33445, 50443, 9412, 38674, 17431, 3331, 21098, 45127, 30319, 51322, 27335, 35835, 47650, 29642, 41329, 12926, 25851, 3531, 58269, 50543, ++ 6441, 30376, 44320, 58739, 21195, 62824, 38403, 7910, 45506, 15924, 5359, 54696, 11375, 37780, 48928, 3048, 36371, 13206, 47120, 63016, 1886, 27482, 65371, 13767, 42198, 52794, 10396, 19117, 46154, 29020, 9274, 43435, 30791, 62886, 45601, 8594, 22919, 13990, 50079, 36947, 24654, 55569, 32496, 61811, 13326, 8430, 23999, 63842, 26370, 44351, 60964, 38172, 1705, 56063, 30896, 19892, 4974, 25668, 49403, 28179, 58208, 19132, 23786, 48122, 61430, 6542, 57983, 49618, 10519, 24159, 47671, 4917, 27643, 61569, 13975, 39112, 20996, 63033, 34828, 1318, 46276, 26562, 5005, 33321, 56821, 24315, 38101, 30413, 5323, 40021, 11620, 48961, 30816, 738, 64339, 35050, 49281, 17241, 37253, 47895, 19865, 35618, 60632, 46604, 25788, 34600, 22780, 50401, 16295, 42773, 8151, 33139, 21332, 51088, 23691, 49704, 6717, 59943, 48202, 205, 20873, 50153, 4866, 37781, 20034, 60072, 25576, 39052, 14099, 48571, 3584, 36493, 23319, 54680, 16044, 38408, 21634, 33819, 61704, 25385, 20725, 42993, 48044, 8087, 38613, 57705, 35475, 16100, 43802, 27900, 7163, 53145, 3593, 30316, 25130, 45155, 884, 27509, 54586, 34144, 49204, 60961, 29809, 43298, 23976, 36184, 53473, 19723, 6188, 56398, 24870, 36529, 64906, 15010, 57181, 17951, 42222, 362, 48301, 14270, 2558, 56759, 32437, 62467, 15309, 53456, 3005, 17869, 64400, 22632, 42105, 10569, 20654, 36192, 8551, 50669, 38237, 59834, 16870, 42477, 14082, 49985, 18581, 47255, 22061, 64091, 10148, 56265, 15608, 48648, 32389, 6073, 47516, 747, 19321, 56735, 4912, 50346, 54817, 17870, 25217, 47069, 21158, 34983, 49036, 5199, 18266, 42152, 30957, 24573, 45187, 12352, 48713, 7785, 43637, 25760, 47410, 13940, 61310, 25260, 53116, 32904, 59093, 8937, 42423, 4604, 55546, 9770, 23500, 16557, 52566, 32335, 63251, 44667, 22983, 36436, ++ 56455, 26868, 16900, 2981, 46987, 10844, 23833, 57729, 32876, 65306, 42920, 31025, 24559, 63629, 18830, 51956, 26926, 43800, 5011, 31920, 53865, 45462, 21038, 58427, 5314, 32481, 44538, 37531, 3070, 55478, 34841, 58622, 4901, 19780, 27569, 57416, 32036, 56341, 26792, 18892, 52613, 611, 16853, 29159, 43589, 59524, 42207, 1189, 34434, 17409, 9119, 51603, 24301, 36165, 12978, 46598, 63358, 33891, 10099, 44082, 1126, 32175, 60135, 3975, 27366, 52307, 30138, 18691, 39733, 54901, 14997, 31763, 56553, 6424, 36307, 51800, 8394, 41667, 11280, 60140, 14825, 32017, 52243, 20356, 14036, 46906, 8063, 53565, 22352, 56048, 35945, 16140, 41564, 54378, 26194, 14970, 6035, 57040, 31791, 10599, 58385, 28648, 15784, 573, 64005, 12771, 41426, 10029, 60243, 35813, 56023, 13610, 60906, 8929, 30333, 43523, 19776, 34168, 14276, 39204, 43095, 10102, 57849, 13489, 42116, 30613, 7527, 61775, 32113, 21033, 44332, 64885, 8348, 34809, 30344, 62888, 6735, 46603, 302, 36187, 49383, 3338, 34352, 27713, 50029, 20318, 53728, 2668, 48628, 11320, 65497, 33128, 15584, 56252, 42946, 13929, 62634, 40617, 16649, 44000, 1949, 20582, 9962, 57267, 15517, 48722, 10972, 42053, 31144, 45630, 959, 49680, 6866, 38851, 26405, 51327, 11350, 63892, 20516, 61706, 37496, 27821, 42760, 1028, 48836, 21443, 41393, 28383, 47768, 1518, 52658, 32567, 61106, 46900, 15702, 22252, 45299, 3692, 28730, 63283, 39211, 4808, 27719, 54779, 448, 32665, 38623, 24429, 61230, 4484, 51874, 37464, 23524, 62345, 42738, 27904, 38258, 29259, 2454, 41165, 59159, 12109, 52181, 8275, 28237, 53334, 36357, 56390, 9891, 61497, 2400, 39002, 32030, 65374, 37138, 106, 58612, 29754, 5137, 46156, 11984, 41476, 27817, 18939, 61631, 25558, 14177, 65425, 43737, 59511, 848, 48917, 18023, 7771, 47253, 15194, ++ 0, 51764, 40813, 53614, 28460, 34541, 48647, 19381, 698, 25630, 9807, 50356, 3599, 34206, 9020, 40158, 61258, 19721, 57093, 24053, 17757, 8629, 38113, 30046, 51027, 23211, 15676, 63836, 21826, 40580, 12179, 23491, 53014, 34132, 10785, 41118, 1476, 38882, 6831, 44603, 11802, 40325, 65470, 49060, 3461, 33223, 15897, 55089, 47098, 57744, 31665, 20438, 45435, 6812, 54109, 27439, 17011, 38743, 52969, 14588, 35790, 46383, 16278, 42848, 20932, 40489, 12493, 46010, 26489, 3254, 62460, 43219, 19496, 44925, 30443, 2574, 55798, 23811, 50768, 21769, 38639, 57880, 10114, 62259, 41184, 29471, 63514, 13171, 42173, 3333, 27961, 62770, 19390, 4691, 43353, 53203, 29272, 44822, 22116, 51146, 4303, 42364, 52993, 38392, 30420, 44691, 4023, 52210, 27505, 19094, 931, 45572, 26444, 41139, 57512, 3729, 65074, 25315, 55087, 17247, 62628, 26251, 33266, 52784, 2969, 49070, 17752, 46791, 536, 54035, 11020, 25900, 40638, 50405, 12327, 24497, 51653, 19548, 29644, 53079, 15455, 64625, 10796, 58852, 1231, 42478, 12688, 31365, 61078, 36893, 17428, 41241, 50315, 22078, 9588, 52207, 19476, 31106, 11604, 64967, 37107, 52487, 46126, 32308, 558, 27644, 61891, 3663, 58399, 21278, 61356, 28029, 33707, 59432, 2798, 31482, 46025, 35753, 7333, 25401, 50785, 10835, 19429, 58895, 35348, 12031, 60761, 33669, 9269, 38424, 14856, 43593, 5020, 28174, 54268, 62664, 13152, 34770, 53506, 9367, 22957, 60970, 43469, 33757, 59446, 16588, 50941, 7199, 35212, 27121, 20361, 58540, 14464, 34309, 7938, 49184, 14988, 45218, 63356, 23123, 6150, 32151, 37658, 64199, 14075, 39609, 1484, 22251, 46574, 34626, 19450, 55009, 16293, 23571, 10348, 51950, 20050, 41934, 54481, 22575, 64491, 1136, 55228, 37378, 6607, 48617, 40673, 33767, 7083, 19938, 36922, 11823, 39713, 61427, 27646, 33474, ++ 60603, 20711, 12369, 32564, 14777, 61532, 4545, 52716, 39275, 60471, 47627, 17366, 56263, 44738, 58219, 16631, 170, 34927, 11195, 43037, 36031, 56106, 12480, 47702, 1304, 59924, 8035, 48885, 31417, 6434, 60825, 47317, 16947, 49578, 61501, 13178, 54208, 20309, 63731, 30312, 58881, 35453, 7846, 25803, 20098, 56775, 29923, 12069, 21734, 5578, 40767, 58501, 11521, 61560, 32858, 407, 56948, 6268, 22051, 65251, 24679, 55321, 10816, 34705, 54498, 494, 63714, 7437, 59850, 35425, 9704, 25202, 52766, 12015, 65005, 16693, 45628, 33078, 7022, 47379, 3550, 25049, 44167, 6326, 35330, 163, 18606, 51083, 32170, 59064, 47482, 8968, 33578, 57731, 22920, 9566, 61487, 2251, 40506, 13424, 63665, 25377, 9234, 21483, 57307, 18340, 59194, 14528, 39347, 64468, 30966, 54895, 5839, 35341, 15118, 46639, 11180, 37369, 2540, 50662, 27650, 6138, 45709, 16628, 65333, 24246, 55351, 35152, 22248, 41284, 29931, 60777, 19250, 1596, 58951, 41927, 8898, 44934, 58150, 11739, 39734, 26541, 41559, 17048, 33276, 63706, 24853, 39994, 22978, 5265, 57441, 25832, 1665, 63220, 29043, 35838, 6061, 48019, 56819, 24238, 8378, 28505, 17257, 63382, 40324, 51608, 18087, 37911, 26756, 13194, 43033, 17076, 10417, 47099, 22669, 54627, 15693, 28454, 58275, 44550, 5270, 55343, 39788, 8272, 30737, 54491, 5628, 24068, 49602, 58444, 26038, 55232, 18782, 40137, 727, 31425, 41547, 20232, 46222, 26414, 51298, 15436, 6522, 25394, 12018, 41655, 20012, 46829, 13664, 53068, 40681, 8913, 44071, 25855, 53575, 17585, 61127, 21606, 10013, 33197, 49662, 57118, 2838, 19068, 45993, 24038, 58503, 16827, 62496, 5892, 50784, 29170, 4530, 56777, 40456, 27263, 6975, 34418, 16680, 36496, 10677, 48256, 16027, 31858, 52221, 22034, 1960, 54322, 26505, 46358, 29028, 57080, 21347, 5112, 54139, 9584, ++ 37974, 43847, 63781, 6932, 55392, 17737, 41517, 29969, 12741, 35458, 6525, 21858, 29560, 13583, 28123, 32270, 48292, 64523, 25951, 51502, 3340, 28673, 62706, 18331, 33342, 39315, 26573, 57288, 17378, 53786, 27103, 221, 38250, 7165, 29878, 46848, 22101, 34622, 48457, 4519, 22573, 14762, 45804, 53913, 38593, 10149, 50544, 36809, 62274, 25405, 49825, 2825, 28371, 40011, 15230, 48671, 42011, 29704, 51315, 3151, 41285, 7617, 28887, 62780, 17314, 49012, 23372, 32336, 38561, 17878, 50557, 41409, 1031, 34123, 22980, 57555, 27012, 13520, 53847, 28141, 63852, 36618, 17533, 55423, 23120, 60706, 45411, 25892, 15744, 6744, 24757, 37495, 46012, 12889, 48438, 39485, 32409, 18108, 59458, 34156, 28181, 45811, 16957, 49395, 6198, 36499, 48333, 32738, 7040, 47328, 11794, 38291, 17651, 51846, 22521, 28408, 53721, 31480, 58740, 8616, 35670, 61409, 21569, 40475, 29579, 9279, 36936, 6339, 63498, 14955, 51033, 4635, 33202, 47747, 28700, 17395, 55152, 32506, 3861, 23946, 60614, 5743, 55458, 22375, 45881, 4736, 51564, 8712, 54948, 28613, 44381, 13354, 34406, 45725, 4491, 59622, 26371, 38637, 3160, 33021, 42616, 58621, 4255, 25290, 12306, 34876, 7271, 46866, 54314, 8838, 52169, 36965, 62539, 19159, 40416, 9305, 60359, 3787, 41031, 16967, 34300, 22218, 65258, 46363, 25039, 13234, 45403, 63735, 16326, 4164, 35850, 11427, 64041, 23565, 48727, 11049, 58608, 7430, 57372, 1901, 36522, 32060, 56969, 44973, 52510, 2715, 62156, 30402, 57549, 1768, 31139, 62939, 3255, 55814, 10976, 39283, 1223, 35533, 58040, 39943, 16486, 26449, 42592, 30076, 60943, 9333, 43518, 31609, 27462, 40961, 13516, 59470, 44496, 32544, 18124, 62841, 45431, 60640, 2848, 57572, 28489, 39626, 23993, 63365, 13042, 35308, 58476, 31117, 15384, 62478, 3230, 53201, 34363, 42727, 23928, 49712, ++ 18402, 4179, 25179, 49198, 37110, 26598, 9223, 45177, 63198, 23402, 53997, 42369, 62378, 39532, 5906, 53026, 22962, 7599, 46019, 14529, 60223, 6652, 41418, 25352, 54598, 11461, 43672, 2348, 35660, 41952, 13513, 32887, 64358, 25530, 42974, 3293, 59999, 9187, 16260, 42573, 57842, 51210, 28008, 5244, 63111, 23264, 2372, 44831, 7416, 35214, 16625, 42989, 52501, 18958, 63956, 23584, 8909, 60478, 37247, 18138, 31076, 59121, 47543, 5433, 37894, 14130, 43718, 57029, 4694, 53528, 13286, 28568, 58781, 48197, 37635, 4120, 40056, 61041, 18795, 42343, 16233, 30940, 50333, 12165, 49174, 33956, 9718, 38980, 54870, 43717, 64761, 21286, 1619, 60525, 27194, 6559, 50451, 24276, 37899, 7289, 54658, 3040, 62468, 31298, 55361, 26149, 2122, 23957, 54447, 20957, 44132, 24828, 63072, 1904, 59320, 39720, 7359, 18508, 42441, 23180, 47200, 12499, 52196, 1454, 56458, 44065, 12065, 58309, 27024, 46135, 16965, 37936, 55786, 13791, 64156, 5078, 39123, 14399, 63342, 37400, 18521, 47203, 35202, 7602, 56747, 30021, 15061, 59992, 18877, 47674, 10028, 61526, 37759, 17989, 49034, 12533, 55226, 16165, 61773, 21157, 53578, 13478, 38239, 47788, 55452, 22801, 65376, 31780, 22099, 41136, 30240, 1660, 55727, 5577, 50204, 29743, 38094, 23198, 53160, 11858, 49197, 30034, 14662, 3403, 52041, 37928, 31766, 19735, 40503, 51166, 21141, 44906, 29666, 6928, 51942, 33368, 24985, 37387, 17793, 49319, 12760, 65014, 21011, 8423, 29287, 37733, 23914, 9618, 42386, 22311, 45503, 18405, 36561, 28793, 46655, 22810, 51467, 30741, 4279, 47712, 11406, 60318, 6904, 54169, 12786, 34002, 4043, 48400, 11133, 53931, 36024, 21052, 8777, 49503, 1990, 37747, 12614, 30702, 49816, 19268, 51073, 5991, 44410, 3841, 46935, 8575, 17716, 50134, 43139, 10551, 38486, 24868, 13755, 59166, 2203, 64835, ++ 29802, 46060, 57917, 21681, 1792, 59629, 51537, 20156, 2610, 29002, 11905, 36691, 1376, 20664, 59810, 10668, 40791, 55205, 21486, 38617, 31182, 44913, 15138, 50061, 4559, 61181, 19527, 52095, 24548, 9890, 59375, 44976, 18722, 55761, 14635, 52341, 37458, 28844, 62387, 31755, 2127, 37905, 10934, 33995, 18018, 41420, 60120, 27093, 53435, 14272, 64840, 30203, 10399, 36625, 4223, 46892, 34556, 20638, 11171, 57537, 44785, 13192, 20082, 25971, 51768, 34009, 9162, 21401, 47192, 30798, 22152, 63991, 16019, 8719, 20265, 49876, 10639, 35153, 614, 56428, 9026, 59647, 2075, 39597, 27333, 4592, 58260, 20763, 2369, 32930, 10975, 30085, 52349, 18978, 36395, 63248, 15421, 55522, 11722, 47178, 20395, 41860, 35217, 8387, 39859, 12292, 65141, 42919, 16489, 34375, 3398, 56670, 10329, 47988, 33610, 12718, 63928, 49297, 15934, 56895, 4260, 30861, 38460, 15321, 33756, 23484, 49987, 19848, 39828, 2153, 57622, 9932, 22943, 43138, 35602, 20506, 48771, 27599, 43919, 9697, 50231, 29214, 13222, 52395, 38059, 19757, 43580, 36032, 644, 32683, 52656, 21512, 6418, 54121, 31911, 39584, 22885, 44703, 10561, 50069, 6672, 30419, 60386, 19238, 5765, 43598, 14413, 2282, 57533, 15919, 48999, 23889, 32864, 44024, 20825, 64438, 13039, 51773, 32263, 63315, 1826, 57463, 44142, 36469, 17614, 61962, 8, 57124, 7970, 30307, 60242, 2257, 56579, 38927, 13644, 61868, 3178, 44115, 60486, 29872, 41020, 4296, 47872, 34400, 63597, 14801, 55486, 49437, 17006, 65239, 6878, 50616, 12694, 59020, 5664, 64486, 13414, 54578, 25543, 19929, 51004, 28614, 44728, 22466, 36892, 51712, 65072, 20222, 50272, 862, 25067, 64088, 42879, 15222, 52723, 21916, 56065, 24503, 9137, 43366, 13584, 59742, 20412, 56594, 29526, 60915, 39348, 22870, 5406, 63915, 17140, 50992, 8179, 41693, 32053, 16121, ++ 11271, 39985, 8674, 34783, 43102, 13914, 31698, 40364, 56900, 46498, 58640, 15381, 49638, 44038, 33710, 50713, 16161, 30202, 2214, 63952, 10163, 34495, 58764, 22324, 36580, 29454, 46486, 5938, 63086, 48360, 28465, 4331, 39844, 8161, 35885, 24222, 17688, 50430, 12405, 25062, 54924, 19618, 61037, 43819, 51856, 12655, 31324, 19271, 46241, 38996, 790, 55633, 22285, 59366, 26509, 54843, 13926, 50128, 43163, 26907, 1424, 53778, 39474, 61823, 2671, 58305, 29411, 64788, 1836, 36687, 55588, 5796, 44260, 32583, 54687, 28960, 63207, 24475, 48803, 29968, 38021, 20065, 44740, 65213, 14499, 52961, 28819, 48045, 61815, 17016, 56586, 49452, 14258, 42637, 3651, 30992, 45106, 272, 65321, 26784, 52480, 14687, 23626, 59991, 18863, 50898, 29169, 57868, 9427, 61817, 50062, 29684, 41942, 19460, 26916, 44465, 21893, 679, 32924, 40873, 25032, 64364, 19180, 59480, 48119, 3826, 60999, 8181, 53020, 34119, 27972, 62057, 31013, 3062, 52642, 25198, 57032, 1011, 22020, 53894, 31207, 1962, 65039, 25652, 2937, 62302, 11137, 49461, 27074, 64022, 14068, 40413, 57062, 24529, 7807, 64676, 284, 33955, 27299, 40821, 18327, 45439, 1357, 33401, 50467, 29394, 59820, 45034, 35435, 4675, 64032, 12049, 59034, 14074, 34776, 685, 45229, 7775, 18280, 26258, 39029, 21055, 7037, 59998, 26872, 10281, 43049, 27564, 46709, 14208, 37043, 50056, 17091, 26657, 42701, 19497, 47486, 16061, 10030, 21822, 56005, 27286, 18314, 54001, 1032, 40323, 25996, 5177, 35840, 27593, 54385, 33510, 24708, 41364, 19531, 32505, 43425, 7688, 41899, 63939, 540, 38483, 17419, 55936, 1890, 15808, 26781, 38768, 14628, 58089, 33293, 6667, 28764, 61703, 25893, 46725, 5525, 33846, 64823, 35751, 26156, 32750, 38147, 14887, 42133, 24629, 280, 55846, 36091, 28049, 45022, 30545, 47983, 19245, 56238, 36268, ++ 54759, 27939, 62186, 15642, 48166, 24203, 64246, 7485, 16739, 33374, 4938, 27580, 64737, 8220, 25034, 4142, 61688, 37328, 47220, 27152, 52421, 20117, 525, 53449, 9346, 64781, 13937, 33981, 20377, 37156, 15415, 51568, 21558, 46278, 58199, 924, 65222, 5735, 45278, 40717, 8510, 47718, 23883, 19, 28683, 64577, 4388, 57280, 9400, 24444, 48219, 32393, 44187, 7991, 40438, 2035, 25265, 62421, 4815, 33277, 64332, 23033, 8471, 31603, 45364, 24389, 14688, 50021, 42257, 11027, 19244, 39282, 25738, 62077, 2286, 41003, 15151, 43562, 5483, 52069, 12914, 54398, 7673, 31610, 22075, 42863, 11780, 36851, 8276, 40753, 23438, 5590, 38443, 58792, 24956, 53791, 17613, 41236, 21718, 33188, 5250, 61327, 43625, 1111, 33700, 46089, 4954, 22217, 40458, 27933, 13998, 36825, 6630, 62277, 5097, 51250, 55294, 29111, 60313, 10886, 53478, 5699, 45309, 10263, 28834, 18127, 31696, 42753, 24712, 13085, 45152, 7248, 49209, 59667, 11302, 45595, 7964, 61477, 34214, 15672, 59534, 41021, 17739, 44651, 31761, 39300, 24072, 58321, 4182, 41861, 20461, 30600, 2422, 42741, 51407, 19941, 46781, 59116, 14238, 64177, 35181, 55985, 26006, 62782, 11229, 37270, 8011, 19987, 25633, 51010, 28676, 39978, 6543, 53372, 27351, 56678, 24688, 61039, 42532, 47466, 9830, 54132, 33253, 15531, 48205, 55926, 22055, 53023, 6184, 62909, 24414, 9664, 34146, 65522, 5248, 54976, 27914, 52883, 35647, 62847, 5874, 38026, 42973, 10679, 46004, 19186, 60341, 31520, 58243, 11367, 39635, 71, 48844, 9142, 60602, 37582, 16171, 57415, 23884, 34690, 14332, 62084, 8840, 24822, 41523, 47216, 59811, 7850, 30405, 45640, 18593, 55574, 36696, 3788, 39307, 11535, 58915, 41062, 666, 54012, 7612, 62066, 2337, 52929, 9876, 34164, 45940, 19585, 51990, 12459, 59923, 1409, 61934, 26184, 6101, 46711, ++ 22259, 933, 51130, 30928, 5261, 53429, 10484, 36001, 50511, 22483, 54830, 38076, 18526, 31435, 57464, 41664, 19224, 12244, 56569, 5496, 17215, 48789, 39894, 28196, 16539, 40991, 23842, 55011, 42838, 1640, 56731, 32146, 63521, 11580, 26264, 43507, 30886, 56495, 21001, 33396, 59448, 13813, 36407, 55944, 17071, 39432, 48874, 21322, 34250, 58736, 6039, 13406, 51030, 17799, 61125, 31806, 45727, 16474, 38382, 48361, 15473, 36252, 50690, 18522, 11730, 55933, 40116, 16923, 27181, 61260, 45836, 7889, 51435, 12630, 46930, 22468, 6840, 58562, 33726, 21552, 46449, 25663, 35759, 57119, 1175, 62618, 18159, 46233, 26429, 54107, 34800, 64115, 28101, 7774, 46703, 10702, 35694, 56845, 9871, 50003, 37072, 13136, 30139, 56322, 11009, 53613, 15553, 37482, 49138, 32, 60729, 20306, 53171, 32190, 16915, 36045, 9168, 38796, 14147, 43812, 20603, 36416, 26524, 51607, 41515, 62808, 14496, 54820, 5299, 64497, 21418, 38893, 18610, 26637, 37114, 16402, 31954, 40143, 12130, 46437, 6356, 23433, 57921, 9145, 51084, 6947, 46196, 16578, 33742, 50723, 8271, 47271, 62533, 28112, 15304, 36723, 29681, 9282, 51978, 2696, 21953, 8692, 41477, 15166, 52992, 23362, 56533, 42445, 60734, 9740, 17710, 46440, 35873, 18890, 47954, 10603, 37371, 16210, 31014, 4968, 62701, 23692, 49872, 29013, 4572, 39417, 12586, 35098, 32387, 18346, 41763, 53652, 12295, 48421, 22701, 32873, 8802, 40633, 233, 24571, 50545, 13895, 59681, 23104, 33039, 51749, 12517, 48154, 3581, 44424, 15904, 61654, 20795, 52361, 26830, 2236, 48437, 29516, 5030, 46432, 52833, 31386, 48907, 56893, 32919, 5342, 21751, 43174, 63303, 3021, 40153, 13086, 48140, 17318, 53459, 28071, 20657, 14431, 44959, 17881, 30087, 47777, 21575, 49074, 27187, 65083, 11633, 57490, 7434, 32966, 20961, 40470, 10118, 35077, 52357, 13371, ++ 63454, 37575, 18861, 57284, 25749, 38555, 60074, 27026, 355, 43313, 11130, 52298, 2859, 47908, 13737, 26339, 53671, 35283, 23537, 42610, 33109, 62308, 13037, 44325, 57763, 2829, 49743, 8399, 30378, 62015, 12639, 23028, 3738, 33777, 49165, 10213, 39231, 15621, 53190, 2915, 26999, 49894, 6378, 32616, 47186, 15043, 7111, 54538, 41935, 15725, 27834, 63538, 37816, 28971, 11958, 56584, 6911, 52195, 21621, 9838, 56310, 28631, 5936, 60856, 41797, 34973, 6684, 59483, 3707, 33397, 23634, 57715, 35605, 18202, 59942, 31855, 53222, 26772, 17233, 64274, 3110, 61519, 15460, 40439, 24121, 34338, 51703, 3971, 59915, 15068, 653, 44092, 12640, 51378, 22464, 62307, 29018, 4475, 44313, 16620, 58204, 25091, 47749, 20081, 38995, 27136, 63416, 7833, 31547, 52652, 25455, 45247, 12021, 57697, 46387, 24304, 61163, 3258, 47776, 27806, 63233, 2037, 56035, 7102, 34840, 882, 38167, 26001, 46988, 35955, 51822, 82, 54370, 41718, 5978, 62650, 51433, 19667, 56191, 28272, 36637, 53340, 20890, 34733, 14620, 60800, 21863, 54725, 12240, 26098, 58916, 35591, 17184, 11410, 60643, 3973, 57599, 24981, 42244, 32169, 48794, 58105, 28312, 47321, 3449, 39360, 16511, 154, 30821, 38477, 54984, 2996, 61618, 25931, 4325, 41580, 57963, 2388, 50563, 36103, 13528, 40750, 1145, 61483, 42171, 20074, 64628, 2003, 59533, 44463, 3611, 28484, 58202, 1208, 39708, 61358, 14975, 58923, 31679, 44733, 17212, 54679, 30570, 2978, 61782, 7516, 36880, 21320, 28979, 56505, 23672, 38162, 31756, 14608, 44902, 56212, 11865, 36158, 59599, 10494, 27326, 3547, 20730, 12196, 62915, 29296, 52610, 10814, 34857, 24205, 57452, 22724, 31739, 60212, 8112, 49205, 63606, 31211, 51412, 23220, 57972, 12095, 40115, 16940, 54671, 4256, 37201, 25379, 41263, 49370, 14603, 53810, 23637, 58174, 3732, 43491, ++ 28769, 7208, 44896, 12192, 42163, 3382, 18106, 45816, 55972, 19673, 62729, 32391, 23989, 61035, 39027, 45660, 875, 63381, 8817, 59544, 3688, 24704, 55674, 7094, 32568, 37812, 25848, 58996, 17831, 38793, 27422, 50292, 41282, 60419, 19380, 54487, 27721, 7374, 35082, 63005, 18389, 42096, 64123, 9852, 25999, 61671, 37179, 29554, 1758, 62703, 35953, 20755, 3044, 53278, 42505, 19562, 35501, 30490, 59708, 40867, 3607, 43884, 52714, 32772, 830, 22328, 54338, 28044, 48684, 13749, 52358, 56, 29741, 42618, 4542, 37140, 11686, 45484, 38861, 9518, 41465, 29324, 48381, 5968, 50035, 10298, 30356, 41882, 19880, 31922, 50579, 20601, 55767, 33791, 2529, 40155, 19604, 60186, 31699, 51813, 1861, 40679, 8784, 62748, 3821, 44565, 23054, 59559, 18046, 10549, 41351, 4113, 38057, 23337, 1374, 43153, 14917, 34247, 58145, 7928, 50290, 32054, 16502, 48720, 22484, 58673, 20378, 57311, 10743, 29479, 15753, 60188, 32833, 13408, 47514, 22627, 2360, 42577, 4320, 64286, 10614, 47940, 419, 42214, 55996, 28743, 37027, 1588, 65180, 38709, 5135, 23342, 53391, 44189, 31419, 48397, 18817, 37593, 5601, 54471, 16914, 12124, 36235, 20362, 63785, 27166, 53961, 48318, 13659, 63148, 24318, 33850, 15285, 43193, 64776, 21729, 29311, 53769, 20249, 59217, 28122, 55095, 18709, 34425, 8849, 51586, 25845, 47293, 23331, 15133, 57648, 35421, 21481, 45764, 30681, 6676, 47017, 20769, 11674, 64447, 28573, 7994, 39517, 49806, 26738, 43769, 16400, 64622, 42127, 9951, 47169, 6403, 63223, 3988, 34228, 17922, 64872, 21419, 40831, 18678, 61355, 44172, 35640, 40369, 6451, 18013, 37411, 54860, 16205, 47547, 9758, 51888, 1265, 42330, 35220, 23773, 3209, 38552, 9559, 42752, 4693, 32305, 63100, 6388, 28898, 43864, 15746, 61271, 2711, 29322, 64543, 4770, 44529, 16448, 39195, 19997, ++ 60802, 33860, 58963, 23025, 49826, 64990, 34244, 13160, 30425, 40683, 8923, 14913, 44568, 6816, 21181, 9906, 49468, 29715, 15806, 36857, 46671, 11673, 30831, 48091, 18910, 60669, 11066, 45322, 5128, 47594, 9521, 54034, 6742, 16707, 36725, 1968, 61765, 48181, 22433, 44527, 11265, 30532, 21799, 39908, 52669, 3577, 22952, 45387, 18668, 50210, 10613, 47415, 57890, 23343, 5287, 49462, 64968, 189, 12879, 27659, 63261, 24760, 12304, 19816, 46769, 62903, 10158, 37415, 20467, 63486, 38269, 15661, 47810, 10333, 65487, 19732, 56914, 1482, 51214, 23522, 55851, 19143, 11381, 59326, 27886, 63643, 7185, 57670, 13543, 61170, 37980, 9329, 29639, 16328, 57489, 48592, 13745, 38879, 8098, 22825, 64288, 28690, 48864, 17498, 32313, 51540, 2632, 35550, 46811, 58525, 34778, 64104, 15737, 56180, 30213, 64903, 21164, 54341, 25707, 17576, 37537, 11699, 61650, 39926, 12828, 44701, 8985, 32595, 49762, 3511, 43714, 23867, 8643, 65451, 28517, 35380, 58542, 26148, 49627, 24755, 16760, 30802, 62910, 27315, 5412, 48587, 10377, 43117, 29831, 15771, 45368, 56401, 1058, 22286, 39177, 7202, 51042, 14812, 61418, 23711, 39855, 62269, 4941, 44355, 10139, 34462, 6918, 32501, 45716, 21093, 8296, 49473, 52712, 11450, 31916, 48575, 9042, 14473, 43856, 6741, 32742, 11025, 45568, 58366, 16423, 31531, 37285, 7282, 50746, 29476, 38714, 8358, 63519, 16558, 52375, 25496, 56888, 34614, 51425, 4057, 36284, 48328, 19908, 12098, 57756, 5572, 53755, 33919, 2014, 59301, 18931, 51103, 28103, 55081, 42914, 30168, 7115, 50033, 1432, 53870, 33690, 15033, 50488, 23617, 58942, 45076, 183, 25979, 61142, 4803, 38231, 27062, 65221, 19656, 10577, 44037, 56998, 16471, 61006, 26809, 55461, 36780, 18765, 45482, 58839, 22648, 51605, 31795, 47287, 18516, 38040, 33411, 26887, 55716, 31559, 10753, ++ 50273, 15104, 2497, 32650, 8422, 20590, 47438, 6323, 61771, 24807, 48875, 59210, 36461, 28831, 55551, 34029, 17909, 58093, 27743, 51197, 20508, 39190, 65161, 22670, 1201, 35045, 50939, 21393, 31615, 56143, 35384, 20809, 44173, 29636, 59227, 23649, 40280, 14183, 57585, 4670, 51989, 46120, 1408, 58288, 20186, 31907, 59789, 11696, 56132, 33023, 43415, 25017, 8688, 39643, 33800, 26196, 14835, 45047, 22722, 47748, 17465, 38937, 49250, 57418, 16108, 29886, 43271, 4947, 44717, 8581, 26133, 60506, 21166, 55038, 31112, 49135, 24897, 34635, 14155, 60766, 5101, 32392, 45173, 37436, 17685, 43422, 22753, 36124, 47677, 4966, 25218, 45787, 62956, 41624, 6814, 25993, 34678, 54834, 27535, 45476, 5994, 36095, 12437, 57170, 42190, 14368, 26412, 54086, 5737, 13693, 21656, 28504, 50742, 8298, 48948, 9913, 39463, 6057, 45895, 41793, 23716, 52855, 30037, 4508, 65163, 28204, 53756, 17197, 39397, 63626, 19447, 56681, 34578, 17663, 50493, 10074, 15030, 38501, 7380, 54579, 33597, 45110, 13689, 39878, 19192, 61623, 25242, 52947, 18643, 49245, 32266, 9858, 34643, 63654, 13063, 55635, 29250, 43450, 10439, 45973, 805, 31038, 52308, 24641, 49722, 61099, 18223, 57857, 5184, 36568, 58783, 27964, 1427, 39507, 16764, 56019, 33433, 63567, 38567, 22377, 46912, 65153, 24816, 5748, 43511, 56475, 13858, 62261, 10797, 55466, 19095, 49090, 4458, 36715, 12998, 43334, 2538, 18098, 42275, 23764, 56354, 1606, 63779, 34994, 25099, 38814, 22601, 14141, 52686, 25607, 35379, 12941, 40175, 21952, 8576, 58728, 45711, 23240, 39055, 26190, 8235, 63581, 2484, 55306, 13287, 32196, 51170, 19342, 41971, 29922, 49990, 12308, 34251, 46330, 54364, 29438, 7100, 33507, 47053, 1751, 14124, 50581, 10862, 34756, 1013, 39056, 8382, 13230, 56363, 6733, 50708, 9334, 62554, 485, 21255, 47707, ++ 24987, 54464, 41120, 52816, 27271, 39614, 56647, 16212, 51638, 3852, 34866, 17066, 1698, 65480, 12915, 51885, 40253, 6114, 43452, 2021, 54420, 7823, 14963, 42118, 53248, 26770, 14253, 64068, 40432, 15974, 371, 64899, 13137, 52730, 5510, 46964, 11926, 32813, 29220, 38104, 16496, 60749, 35585, 13000, 43056, 50822, 8280, 41132, 26761, 4066, 14568, 60574, 51497, 16746, 62046, 9525, 55383, 37557, 58409, 7767, 54676, 30961, 2310, 36552, 7270, 58873, 25079, 51070, 31357, 53916, 34408, 2836, 39848, 27555, 7149, 40723, 3829, 62818, 28402, 42977, 36478, 53711, 370, 20935, 55114, 2783, 52435, 16052, 27098, 39341, 53446, 18500, 3229, 31355, 47243, 64911, 986, 18298, 60891, 15269, 52904, 21023, 59019, 24447, 6913, 37786, 64783, 19343, 30736, 55757, 48074, 2278, 42601, 33373, 18940, 27307, 52407, 31166, 13321, 60017, 321, 46558, 18736, 43480, 24409, 36683, 2414, 60665, 6588, 30746, 11875, 40507, 53243, 1525, 44502, 57453, 30198, 61117, 21545, 43396, 3278, 60121, 8484, 52264, 33002, 12735, 40727, 3648, 63081, 6635, 59793, 27679, 41596, 19410, 46404, 25685, 1879, 65303, 27010, 34233, 59296, 21563, 12940, 37878, 2106, 28999, 40565, 22512, 51729, 12519, 41306, 19650, 44763, 60190, 23607, 5894, 45952, 310, 26584, 52198, 17234, 3225, 37844, 52826, 22868, 2667, 40272, 25162, 46087, 507, 41192, 26325, 54202, 20290, 59922, 27453, 38323, 62196, 10314, 46564, 32353, 15662, 41832, 8690, 45665, 17969, 62603, 30938, 41078, 4865, 60987, 46102, 962, 53298, 24959, 37055, 13782, 52048, 16688, 58147, 30888, 38074, 19806, 28375, 46202, 9220, 64727, 35914, 7350, 59331, 17088, 61892, 25243, 5763, 15466, 62443, 22280, 52390, 19116, 64393, 40784, 24116, 60337, 30629, 53712, 20109, 64108, 35816, 21886, 59601, 24388, 42572, 14017, 48772, 40224, 65281, ++ 5560, 30012, 9709, 17420, 61348, 1221, 29418, 37222, 21528, 42757, 28273, 53918, 46897, 22786, 41972, 4606, 25464, 64369, 22093, 60916, 32007, 45118, 57339, 28956, 6314, 61577, 43261, 3446, 23416, 58020, 28729, 45911, 25328, 38433, 31194, 18624, 51133, 64485, 676, 55250, 25495, 8852, 28250, 56659, 5356, 24725, 34718, 65349, 17531, 54295, 38275, 30334, 1144, 45924, 29303, 41719, 19053, 32042, 4420, 42745, 34326, 10696, 65323, 26722, 53093, 40386, 13076, 18715, 64541, 11323, 17727, 56648, 43972, 14434, 59133, 16861, 52606, 13027, 47537, 8030, 16590, 25313, 64817, 46676, 12516, 39918, 30893, 56226, 1567, 64576, 11112, 35412, 59388, 21868, 12209, 23927, 42293, 49557, 11275, 38127, 30556, 43295, 470, 34490, 55157, 45695, 11399, 33097, 43494, 9003, 39181, 24699, 62584, 13022, 40756, 59099, 2723, 63522, 19965, 35109, 57147, 9576, 33961, 55714, 13868, 51153, 21267, 42339, 25540, 48493, 45798, 4685, 27114, 37319, 20673, 5519, 40925, 12441, 48313, 18243, 37802, 26744, 55690, 23009, 2209, 47046, 57771, 35095, 21324, 38010, 13842, 54287, 3187, 50172, 8550, 58487, 35912, 20784, 52840, 17607, 7707, 56663, 41924, 64595, 15700, 54656, 10930, 43743, 29983, 65069, 3831, 55369, 30515, 7482, 37065, 62041, 19206, 40166, 59761, 12227, 35542, 56941, 27655, 11732, 49547, 30395, 60945, 17695, 33564, 64236, 31195, 11514, 43906, 33867, 9108, 49500, 5987, 30020, 53155, 14401, 60862, 21985, 55165, 29207, 58665, 567, 47616, 9276, 50161, 20204, 29932, 11069, 64223, 32987, 15498, 62459, 2875, 32040, 43617, 5912, 48107, 10141, 42434, 53214, 4400, 39726, 27625, 14185, 48549, 21370, 44647, 2599, 40535, 58279, 36320, 399, 45782, 12814, 37594, 28665, 8811, 48670, 3495, 25733, 15040, 46508, 27533, 41619, 1633, 45148, 17310, 30355, 58666, 35349, 8068, 17924, ++ 36804, 45565, 63021, 35595, 44190, 14132, 55031, 10260, 63897, 5675, 57687, 11767, 31079, 7989, 60354, 32774, 48441, 14393, 35800, 10612, 17572, 25212, 4393, 36301, 48993, 18303, 9997, 34196, 51806, 7526, 41822, 10902, 55375, 2689, 62810, 42749, 7968, 24505, 41621, 19862, 43987, 49393, 17939, 37518, 53979, 16153, 46681, 488, 49180, 22220, 7507, 64287, 35163, 13223, 58955, 2618, 53627, 11483, 50326, 20321, 61305, 23807, 15176, 46078, 21273, 3280, 35301, 46493, 1267, 41949, 49529, 24299, 5683, 50500, 35939, 22829, 44521, 32799, 21334, 58191, 30193, 50823, 9898, 33457, 26302, 58877, 8798, 21448, 44664, 32846, 23704, 51999, 8351, 49912, 36928, 54272, 5419, 32701, 56090, 3490, 62099, 10231, 50610, 29858, 16063, 22082, 49874, 1249, 61715, 17120, 59778, 7217, 31777, 54617, 4623, 44854, 16241, 37081, 49567, 5408, 29308, 21990, 62463, 3370, 47587, 7798, 59396, 33425, 15186, 55068, 22772, 62190, 14203, 59160, 46758, 24328, 55312, 31618, 771, 63758, 11537, 49958, 20013, 36296, 64458, 30479, 16988, 9377, 51694, 43976, 26486, 17871, 62093, 24217, 40254, 32848, 16033, 47541, 3747, 38865, 50594, 31852, 6092, 19313, 46677, 26633, 59700, 1097, 17099, 25246, 47584, 34624, 14910, 50005, 27484, 12909, 51081, 25447, 4137, 48802, 30869, 8527, 41460, 63010, 20681, 36356, 9461, 54638, 5381, 21845, 51842, 15826, 60681, 1772, 55828, 23203, 65059, 19262, 37106, 26139, 7066, 39879, 4607, 50399, 13276, 37419, 27097, 57095, 15257, 55609, 43994, 36401, 17271, 49537, 5422, 47024, 27811, 55708, 11548, 65474, 22179, 57257, 25165, 15714, 62245, 22562, 56571, 1704, 54108, 32722, 8625, 55822, 30926, 20365, 50396, 26272, 41501, 55125, 5088, 58585, 35562, 21113, 56810, 42267, 62310, 5475, 49922, 10304, 54894, 33977, 11475, 53063, 3095, 23262, 55271, 27688, ++ 60182, 11869, 21944, 4304, 49318, 23705, 41761, 33220, 46350, 25961, 38871, 20241, 50800, 37596, 18374, 56389, 2994, 38426, 52948, 30477, 50216, 41174, 63235, 12360, 38288, 56850, 29854, 46420, 19197, 35996, 62209, 17328, 33491, 48586, 22025, 15258, 57048, 36505, 10521, 61219, 6094, 33696, 63659, 3166, 30050, 62232, 13667, 38872, 31092, 56974, 44663, 19765, 52823, 21433, 27132, 47039, 24233, 64069, 36047, 28234, 1545, 51661, 41445, 6163, 60155, 30303, 57128, 22592, 61616, 28507, 7646, 33104, 62432, 29569, 753, 64100, 9123, 37813, 61846, 2220, 42111, 14738, 38522, 60305, 6397, 49267, 34999, 62238, 13261, 48815, 5870, 43071, 28459, 14831, 58445, 17060, 43978, 26628, 19970, 46523, 25332, 39522, 18702, 63690, 4351, 60255, 27768, 40281, 25651, 52135, 36347, 20715, 51348, 18209, 35725, 26081, 56480, 22910, 10969, 40394, 53943, 42903, 16032, 31457, 38625, 27446, 11348, 52342, 1080, 36259, 9410, 32307, 50927, 29698, 8232, 64693, 16003, 51972, 35756, 28865, 58766, 4882, 44295, 14899, 7727, 45754, 24485, 56831, 29067, 121, 48128, 36486, 30900, 10981, 61001, 4821, 54914, 11710, 60238, 30113, 13976, 45092, 25498, 49065, 35369, 8460, 33243, 50309, 38735, 56910, 9642, 22969, 62807, 2565, 57294, 42025, 10090, 34997, 54386, 15651, 42940, 60444, 19601, 818, 47863, 14742, 45053, 27101, 42381, 58531, 6845, 47642, 24213, 39317, 31789, 45489, 13403, 41382, 843, 59171, 48908, 28038, 52228, 20976, 33611, 65345, 6623, 32469, 39227, 2672, 24160, 7772, 57583, 25873, 41772, 20509, 60275, 17722, 41203, 29103, 35239, 800, 47455, 36747, 30463, 11211, 43807, 33903, 15184, 41258, 28512, 52238, 13627, 39131, 11044, 59972, 17480, 31516, 24761, 43686, 15850, 47432, 7962, 33177, 12563, 38324, 31331, 60585, 19400, 26316, 63575, 37695, 28599, 46296, 12780, 42944, ++ 1907, 52105, 31963, 57003, 26415, 6882, 58438, 18734, 2363, 53188, 14645, 62897, 605, 45375, 10879, 26871, 44252, 19874, 9132, 59110, 65, 15579, 24065, 52589, 20927, 2410, 54804, 13340, 59721, 24785, 4818, 49928, 26936, 8756, 58485, 34827, 4022, 53722, 31519, 26617, 50615, 14509, 23301, 40819, 48007, 10812, 27338, 59169, 5913, 23990, 12125, 40544, 4928, 48790, 37033, 6591, 39976, 15880, 8340, 44380, 56846, 13482, 32243, 48524, 17132, 39210, 9455, 51995, 14904, 36861, 58479, 12385, 38652, 19085, 46819, 26888, 54065, 6065, 18630, 48622, 23266, 57352, 4293, 19630, 44229, 15644, 24374, 3695, 40939, 17331, 56995, 19422, 61743, 39782, 2107, 29429, 63339, 8943, 59652, 33468, 7484, 53734, 13451, 41473, 47436, 36677, 9647, 56746, 14771, 4851, 29452, 44233, 562, 65519, 11527, 47368, 7664, 32685, 61364, 26682, 14631, 6972, 50681, 58013, 17978, 64087, 45003, 28650, 61229, 20124, 56300, 41974, 2684, 18417, 39194, 33931, 3910, 22347, 46050, 9582, 39636, 23606, 32083, 53947, 41767, 59453, 5667, 39037, 15406, 64820, 11927, 57326, 5996, 44905, 21680, 43176, 28564, 37429, 23023, 41112, 63250, 9441, 55812, 3046, 57471, 22066, 61948, 13389, 31509, 6329, 42657, 53543, 18451, 38119, 21203, 31328, 58558, 17812, 29762, 64171, 6262, 24015, 33993, 53969, 28721, 59391, 3897, 50425, 12806, 34878, 29061, 37605, 10072, 63236, 17323, 4998, 57457, 33140, 44343, 24668, 9595, 63039, 16745, 43438, 10616, 45042, 23326, 61404, 18776, 49071, 59780, 28671, 38412, 10392, 54222, 34835, 9432, 39484, 3757, 49285, 12601, 60844, 18261, 6984, 59866, 49734, 18915, 64345, 24474, 57748, 4122, 23111, 63816, 6551, 47856, 32957, 3876, 62941, 10187, 53377, 2213, 65510, 18117, 52705, 23517, 57798, 16754, 4059, 40571, 48287, 6203, 15942, 51205, 5068, 57709, 25480, ++ 38795, 19174, 47202, 15474, 38185, 51355, 12478, 31354, 60691, 9493, 43556, 35191, 24287, 58805, 30089, 61991, 13405, 54951, 46043, 27347, 62623, 34588, 47417, 8303, 33175, 43871, 26028, 39528, 989, 53512, 37621, 20068, 60619, 40076, 13752, 28046, 45708, 20689, 47473, 2313, 39078, 58687, 9324, 53047, 20950, 44335, 36342, 19170, 52103, 33304, 61466, 28035, 57650, 10268, 63158, 18009, 55881, 32899, 60402, 25388, 18408, 38012, 62591, 8762, 54942, 25624, 33478, 40954, 4327, 48263, 20796, 53321, 45605, 9741, 56105, 15856, 30721, 39575, 55377, 10946, 35505, 27774, 53017, 31987, 63123, 28717, 54583, 51164, 29893, 60541, 37739, 95, 34034, 10364, 45010, 52526, 14047, 36273, 40879, 15761, 48243, 21547, 58015, 28351, 6473, 23519, 53235, 20171, 33885, 48666, 57530, 12622, 46628, 22546, 38229, 60551, 28869, 51957, 1154, 48191, 64636, 37709, 24931, 1718, 41299, 23117, 5120, 40240, 16781, 47257, 12612, 25105, 63049, 44118, 11043, 52805, 41480, 61832, 14002, 56606, 17450, 50848, 62450, 1273, 27559, 20594, 33827, 50427, 22602, 42437, 32547, 19893, 53693, 34030, 49795, 13531, 64276, 7032, 51518, 396, 20141, 27874, 36831, 42905, 14693, 40072, 4374, 52921, 20729, 63961, 35982, 26441, 8737, 46587, 52355, 5340, 44337, 1587, 47384, 21559, 39213, 51457, 13203, 44076, 8060, 38252, 32012, 16829, 64746, 18933, 57157, 3284, 50109, 28200, 53495, 22392, 50964, 11153, 16258, 54480, 31081, 35745, 2130, 56621, 29706, 3485, 53584, 11609, 42569, 34421, 13592, 47385, 4186, 62011, 19432, 326, 50828, 26597, 63018, 21037, 54625, 32558, 44384, 51696, 25726, 3114, 38926, 6142, 46951, 11897, 37807, 48979, 18466, 43294, 27851, 56473, 21780, 39838, 49539, 34097, 27124, 39391, 29699, 44710, 83, 28386, 43427, 51830, 8942, 22426, 56712, 44085, 20441, 61597, 34461, 14733, ++ 64209, 32868, 7691, 59459, 159, 28722, 64652, 40049, 23153, 48261, 27841, 7114, 54238, 16600, 5082, 41448, 23371, 32240, 6700, 39722, 21579, 5596, 58280, 27907, 60283, 16316, 63750, 9246, 47968, 32212, 12041, 44797, 30123, 1504, 43701, 52244, 6971, 59954, 15848, 65088, 12350, 29019, 45215, 32257, 6713, 60227, 2559, 42266, 9958, 46365, 1878, 15598, 34167, 25658, 42926, 30771, 645, 51172, 5624, 45563, 53990, 3874, 21884, 29025, 43454, 393, 63699, 19544, 55751, 31575, 26469, 2117, 23861, 60998, 41216, 3489, 49737, 11994, 25882, 65139, 46091, 7328, 40330, 13705, 944, 42447, 10570, 36378, 7556, 25694, 11964, 47921, 27614, 55503, 24609, 32103, 4156, 50863, 23135, 1449, 65185, 35169, 2830, 31865, 61005, 43761, 1947, 38765, 64421, 10650, 23843, 41170, 28126, 58257, 15256, 3632, 42132, 17422, 44003, 19558, 30939, 12391, 55439, 35472, 49182, 10445, 34291, 57558, 6123, 30511, 38027, 53567, 7156, 26452, 59819, 19274, 27853, 6299, 25701, 42798, 33327, 6888, 13204, 37461, 48897, 10768, 55116, 4072, 60465, 8000, 52159, 2579, 38481, 16459, 1459, 56290, 26161, 48534, 18483, 58210, 34971, 53314, 11372, 62565, 17905, 29531, 44584, 24182, 48174, 16092, 524, 45203, 61278, 28922, 13891, 65444, 24535, 36763, 55621, 10698, 28321, 2189, 57706, 18079, 63757, 22226, 53095, 39834, 1458, 46507, 21246, 40919, 14514, 42787, 7761, 36175, 26947, 47809, 64138, 3835, 46309, 19686, 40774, 24004, 60174, 37857, 17574, 51317, 26471, 1348, 63700, 21701, 51824, 30651, 44594, 56693, 33352, 14708, 45245, 7950, 37698, 5072, 23452, 13540, 34610, 58560, 31262, 52990, 20896, 62684, 29769, 9953, 60624, 36905, 1536, 14766, 51043, 7658, 19824, 13773, 59431, 5971, 55908, 11262, 37041, 61824, 14491, 25134, 62846, 32267, 36025, 768, 39554, 31019, 9148, 48994, ++ 3586, 54091, 43754, 24690, 36343, 45201, 19595, 4676, 50395, 15965, 36930, 63674, 20975, 49127, 33774, 52182, 1430, 64157, 49612, 16934, 55852, 42948, 13826, 37407, 3907, 50686, 31024, 22553, 57444, 15027, 65435, 6531, 55916, 23949, 63439, 18196, 37880, 22836, 34397, 42539, 19446, 54775, 1063, 62647, 26150, 14954, 55600, 28639, 64708, 17049, 55023, 43771, 49969, 3450, 59900, 14073, 47960, 20911, 29485, 38762, 12026, 34834, 47346, 59255, 14263, 49916, 11611, 45200, 16436, 6618, 65051, 43038, 35083, 13943, 32107, 22262, 60011, 43738, 34165, 1651, 17426, 61139, 21933, 50376, 56517, 18028, 47072, 20447, 63890, 45573, 53582, 22173, 65446, 6992, 20695, 46898, 61221, 19033, 56566, 42750, 27218, 12791, 51641, 46211, 17954, 12092, 49469, 16801, 30380, 45090, 3150, 62037, 6568, 34349, 50406, 21390, 55218, 10147, 33714, 58798, 4222, 45350, 20915, 8855, 63166, 26175, 54517, 18963, 50132, 64919, 3726, 21756, 35197, 49483, 261, 36814, 48706, 31237, 54187, 1983, 58196, 29428, 47682, 16363, 61270, 25850, 40326, 14359, 30292, 46624, 25076, 62617, 27996, 59057, 23831, 39619, 10053, 33351, 42350, 15111, 46171, 5312, 23556, 32620, 51174, 7301, 60703, 33797, 10358, 41006, 59107, 11914, 32242, 3507, 40663, 33012, 7930, 60634, 14275, 45442, 62351, 32456, 40828, 26054, 4769, 48933, 10407, 24480, 55737, 35819, 12121, 62014, 32705, 59479, 18605, 61575, 2785, 20493, 34339, 38471, 14027, 58355, 12347, 48501, 8314, 31392, 46795, 6095, 40054, 54882, 31904, 16506, 37251, 11959, 22990, 6699, 42694, 59112, 24608, 52518, 16384, 64134, 40441, 56088, 9603, 42871, 16805, 8144, 45511, 576, 42180, 34395, 16005, 53660, 23982, 64940, 35382, 45210, 61434, 30252, 41869, 22930, 48873, 20517, 50781, 7298, 34962, 54317, 2464, 47087, 13506, 64743, 18335, 53546, 22814, 41947, ++ 28021, 11152, 20817, 62361, 17171, 8725, 55773, 34313, 11412, 57247, 3249, 31748, 40492, 8515, 47059, 18998, 37866, 12170, 26220, 35503, 10340, 29399, 51667, 19567, 45585, 11306, 40931, 5869, 28544, 42348, 21747, 51280, 16858, 36827, 10716, 30638, 50344, 9639, 56253, 5081, 48760, 24305, 36074, 17275, 50978, 34526, 22544, 48554, 8081, 37344, 31412, 21962, 12538, 53452, 35682, 23428, 41056, 61970, 9301, 58124, 22940, 64379, 16224, 5407, 36177, 24151, 60550, 27896, 37699, 50661, 10628, 18090, 57950, 5309, 51810, 36587, 8256, 20185, 57745, 24589, 52124, 31305, 37023, 9355, 27266, 33672, 58627, 2392, 32539, 15220, 4774, 39039, 16427, 42002, 57926, 11511, 37554, 6295, 31071, 10009, 54499, 38504, 24214, 8494, 55547, 32520, 63143, 25973, 7787, 54745, 37308, 19128, 53425, 9320, 30602, 39678, 25258, 63903, 6271, 24006, 53144, 39133, 60429, 29907, 15375, 46242, 581, 36975, 23576, 42538, 14754, 58356, 45415, 16518, 57004, 12973, 63433, 9022, 47133, 21931, 38760, 18560, 65315, 34850, 2903, 53153, 19027, 57951, 35459, 17540, 40881, 12561, 47854, 9012, 31654, 45813, 17336, 61748, 2775, 24901, 64864, 30572, 59494, 1301, 46929, 26839, 37663, 2352, 55240, 28420, 19906, 49266, 56419, 23469, 54778, 16462, 50648, 27040, 35691, 19374, 7002, 49749, 15085, 56236, 36903, 29930, 61141, 44608, 7577, 26525, 52119, 6127, 25285, 294, 45222, 30508, 40154, 52545, 8106, 60532, 27739, 5464, 51600, 33988, 61929, 22246, 14902, 64407, 20632, 9046, 45396, 7323, 57974, 40579, 65092, 29456, 13097, 35879, 2350, 31496, 46746, 26998, 1198, 28871, 48356, 21645, 60393, 27377, 36096, 55438, 17860, 51591, 26529, 5444, 44287, 31886, 9389, 25567, 914, 52085, 11772, 36156, 3149, 63744, 26592, 46094, 17596, 39886, 21460, 58928, 29233, 10906, 45685, 7131, 60966, 16652, ++ 56292, 40838, 49977, 5770, 52965, 47885, 29813, 59791, 41222, 25562, 44856, 12837, 55435, 29079, 15154, 60122, 24530, 56684, 42250, 3349, 61416, 46735, 1727, 64699, 33640, 54312, 17718, 62416, 49331, 2955, 38992, 32648, 47073, 3783, 54452, 41332, 147, 62098, 25867, 32934, 40460, 7853, 57205, 38607, 4261, 58060, 12727, 39827, 25168, 51408, 4526, 63794, 39116, 18832, 7091, 56614, 10966, 33588, 17293, 49599, 1975, 31169, 42341, 27328, 52638, 7971, 39786, 3090, 62988, 23169, 33883, 54216, 29154, 48005, 17084, 63246, 28288, 45078, 4810, 41545, 12758, 48180, 3197, 43526, 64442, 5650, 38728, 23040, 55308, 41375, 59882, 30619, 50287, 2987, 35567, 28794, 49080, 25542, 63995, 44601, 17442, 5116, 62507, 35845, 20855, 40715, 5579, 42499, 59306, 14097, 26983, 47705, 16389, 43075, 62907, 2071, 46129, 14321, 49973, 36213, 11634, 27141, 2582, 51498, 33051, 43308, 13648, 59002, 8452, 31807, 48080, 29129, 10194, 32894, 24082, 43106, 20851, 34519, 15488, 60570, 4479, 51295, 9974, 23939, 43739, 31436, 8773, 45259, 923, 64099, 6750, 55469, 20987, 36655, 63503, 4575, 54174, 29323, 52448, 37187, 8203, 43811, 16265, 54407, 12340, 57987, 18855, 63102, 14445, 51563, 39049, 4792, 17470, 37440, 9900, 43251, 20442, 48247, 3115, 42251, 58912, 23183, 34692, 11339, 46846, 16120, 2868, 33306, 19856, 58769, 30834, 48557, 38675, 55069, 22949, 11821, 57944, 15219, 43073, 21632, 55543, 41973, 25757, 18462, 138, 44199, 55954, 28340, 35176, 58794, 24507, 53029, 27501, 1940, 15874, 46371, 50240, 19150, 61759, 38739, 10858, 53541, 19975, 61289, 14501, 37337, 2173, 50867, 12987, 24922, 61659, 3528, 46476, 58990, 12189, 54786, 16972, 57384, 37978, 18693, 46779, 58109, 16276, 32692, 9663, 60114, 30772, 11973, 49696, 6484, 42415, 24574, 33488, 50483, 29648, 37313, ++ 1453, 34804, 13112, 30715, 39130, 22613, 13784, 1109, 21653, 52444, 18075, 61155, 2054, 39322, 57862, 4237, 44025, 7743, 32917, 53637, 22399, 18485, 40164, 25619, 7280, 23748, 44473, 26560, 35006, 14333, 61262, 8186, 25261, 59018, 27288, 12778, 46479, 29511, 13473, 52632, 16405, 64016, 11619, 21540, 46926, 29667, 43539, 281, 60791, 20247, 41552, 9106, 29943, 62397, 45162, 28455, 52452, 2939, 43950, 26015, 40214, 50833, 10433, 56245, 20557, 57513, 32451, 44110, 12834, 47234, 1403, 42219, 8870, 39076, 25009, 211, 50963, 15091, 63726, 29494, 54819, 20732, 59554, 16287, 24062, 45283, 14293, 51510, 8624, 26183, 18797, 34523, 9684, 62367, 17845, 54124, 758, 40013, 14584, 34190, 57593, 29570, 45273, 224, 58656, 14969, 52591, 22317, 34726, 50945, 898, 60709, 32980, 24526, 13523, 57244, 35304, 27711, 40915, 18586, 61913, 47083, 17087, 56773, 6729, 22209, 62289, 28022, 52170, 11931, 61539, 1865, 40020, 60840, 5733, 51761, 3125, 54968, 40538, 24964, 44720, 28132, 56057, 41163, 12300, 63280, 37103, 26808, 49587, 23375, 43001, 29904, 3426, 50281, 14523, 44087, 22351, 6509, 40491, 12817, 56807, 20559, 34162, 39261, 22733, 41706, 5694, 45658, 35485, 7750, 26089, 64459, 34381, 47135, 58255, 888, 63405, 12435, 53697, 29266, 9354, 52621, 117, 64974, 20844, 54879, 41791, 62689, 13941, 43212, 4173, 15572, 65401, 9244, 37193, 47245, 6474, 29507, 49899, 1276, 32157, 10894, 64898, 39448, 54177, 36608, 4723, 12649, 49774, 3075, 41594, 18056, 48641, 32784, 60100, 23862, 4565, 55356, 8793, 22047, 58307, 30194, 41824, 5822, 46027, 57108, 33464, 65298, 40875, 9000, 32349, 39962, 20271, 29080, 38633, 22469, 42547, 4740, 63193, 28249, 6830, 24432, 42892, 55012, 38717, 4450, 41372, 57196, 27407, 36705, 55577, 2976, 59958, 15609, 4525, 57393, ++ 23897, 63141, 26679, 58069, 2791, 65356, 33572, 42546, 62073, 6170, 35913, 26502, 49840, 21982, 34431, 28140, 51098, 20350, 65254, 14519, 30732, 57079, 12659, 48773, 59505, 36981, 501, 58658, 10223, 56346, 21050, 53112, 35767, 15529, 64244, 39419, 18881, 57899, 43246, 3260, 44858, 27576, 49717, 31676, 7396, 63275, 18559, 53867, 32426, 11300, 58466, 26478, 47618, 1298, 24486, 15275, 36702, 65145, 20058, 55254, 13723, 61388, 18932, 36960, 1037, 45908, 17561, 30054, 54603, 19972, 59740, 27039, 62184, 13322, 56396, 33334, 40554, 19298, 38091, 10100, 34773, 6901, 39278, 30385, 49829, 11600, 61964, 29265, 37168, 48522, 1299, 52862, 43831, 23611, 46055, 12933, 59249, 21367, 52201, 3381, 22599, 50090, 11650, 26481, 48414, 29029, 37912, 3883, 19524, 31215, 40053, 11222, 56142, 5015, 52296, 20437, 8680, 59885, 133, 54203, 8044, 32172, 41749, 20270, 38378, 48821, 3217, 39606, 17520, 44403, 26866, 21210, 55590, 17917, 38287, 29818, 46300, 27233, 7611, 63879, 11803, 36006, 504, 20153, 52516, 6066, 21260, 59753, 13770, 37891, 9794, 60860, 34603, 19574, 57669, 26957, 35634, 60077, 19077, 49433, 27520, 47460, 4112, 50764, 9210, 31148, 49647, 25127, 21301, 59902, 44155, 12775, 22314, 6621, 30360, 24973, 39592, 33633, 22044, 61662, 38778, 25367, 43683, 31562, 5613, 37755, 8922, 27922, 51332, 23718, 56753, 34243, 18235, 28653, 53848, 16959, 63849, 35270, 23541, 62422, 17414, 48046, 29089, 7205, 15766, 25248, 59568, 43194, 30391, 14480, 36209, 63317, 5656, 39001, 9918, 53954, 34160, 28056, 43508, 49001, 3330, 15348, 63663, 35416, 26095, 10515, 22788, 4642, 19397, 49420, 14009, 56006, 7452, 64029, 2379, 50169, 30672, 48312, 13328, 53996, 33599, 60726, 1858, 22058, 15120, 52858, 19642, 1241, 65159, 13867, 19015, 52478, 26974, 38455, 44746, 19851, ++ 46877, 9766, 43299, 18611, 45879, 10572, 54706, 16354, 27450, 47548, 10071, 64482, 14071, 43196, 9369, 62505, 11186, 47762, 776, 38945, 44980, 5292, 36093, 31898, 8663, 15748, 52402, 19902, 47692, 31302, 42856, 1851, 45355, 23113, 4713, 49032, 34114, 6331, 22115, 61380, 20343, 35529, 1612, 59460, 41809, 13889, 36645, 5771, 46059, 16581, 35015, 55795, 14482, 51908, 42007, 59042, 6011, 31758, 46790, 7558, 34075, 4599, 29694, 48732, 64798, 25227, 9937, 58760, 5029, 35738, 15411, 37232, 21708, 46398, 6357, 59201, 11414, 53193, 2465, 47465, 57179, 26610, 62563, 554, 54019, 35263, 19932, 3936, 58103, 12415, 63502, 13890, 26960, 56872, 7718, 33291, 43180, 27949, 9211, 60479, 41587, 16134, 39260, 61576, 18532, 9443, 65020, 46760, 56926, 8249, 64133, 21705, 36902, 45620, 29677, 38701, 49037, 23252, 44492, 29232, 15846, 58439, 4739, 65368, 12858, 34634, 24611, 56097, 35834, 4906, 53838, 33851, 7949, 50361, 11420, 65089, 14465, 57619, 17209, 31966, 49871, 22866, 58953, 45633, 33055, 29658, 46884, 4747, 56432, 28286, 51204, 15863, 53443, 10856, 41290, 624, 51775, 11847, 32314, 2002, 61493, 14152, 63872, 28802, 55677, 16736, 65212, 11132, 54002, 29884, 1738, 53221, 41889, 62202, 51944, 15235, 56829, 5993, 46367, 4283, 17013, 60051, 13638, 50858, 26782, 58130, 22616, 48121, 1045, 39465, 11001, 45978, 50526, 2308, 41560, 26246, 4353, 44776, 12884, 38038, 53230, 3357, 45592, 57815, 32950, 50718, 19549, 9506, 62743, 52641, 22547, 11123, 26223, 56234, 21165, 42048, 17133, 64645, 12323, 36511, 33032, 51293, 7560, 17640, 43222, 50450, 29609, 54348, 44912, 28144, 58193, 23640, 47568, 33270, 15521, 59749, 8363, 34890, 21221, 40131, 10467, 45538, 28934, 64294, 48064, 25856, 44327, 34562, 23396, 47603, 31703, 10351, 48598, 8014, 63856, 14207, ++ 54522, 36130, 6592, 51892, 31881, 24896, 37511, 7875, 50986, 38541, 19730, 30595, 46198, 5456, 53338, 17445, 31471, 37109, 23246, 58510, 16481, 50147, 63318, 20738, 55627, 41733, 29167, 38622, 5018, 17165, 28128, 62861, 11800, 51754, 31841, 9060, 60720, 24892, 55485, 30915, 10107, 54195, 39591, 15185, 23827, 52354, 27895, 61825, 23205, 50440, 2756, 21145, 38165, 32663, 8668, 22403, 49066, 12923, 37805, 26801, 59525, 44916, 38591, 6876, 14749, 34483, 51241, 22201, 41295, 49371, 7370, 52274, 2794, 43195, 30503, 21116, 48938, 26069, 61510, 22890, 14517, 45760, 18450, 41819, 25129, 8072, 56121, 47366, 22500, 42886, 31249, 40478, 5222, 38264, 20121, 64653, 4457, 36776, 47653, 30438, 7113, 55920, 31592, 2556, 53507, 43270, 24360, 13111, 27634, 43907, 15568, 49740, 25470, 2876, 17700, 65132, 5662, 31715, 13065, 55901, 37424, 22642, 43607, 25755, 52892, 10681, 60057, 7496, 19769, 64235, 15589, 41367, 62534, 25510, 36148, 20391, 43896, 1387, 37570, 53517, 5383, 42154, 15102, 8196, 61772, 16124, 54363, 34274, 17935, 44445, 1799, 39367, 25326, 47118, 30812, 65410, 16871, 46336, 55156, 41985, 21834, 36309, 10550, 38262, 52, 43520, 33061, 3303, 40352, 15745, 48600, 31896, 18199, 27748, 10820, 44898, 30988, 18746, 55420, 36077, 49061, 7458, 40092, 18524, 45832, 12641, 35440, 60372, 16540, 29640, 63125, 8189, 21909, 61257, 13547, 33442, 59085, 20032, 57009, 8645, 24800, 35918, 20867, 10221, 41108, 2429, 47491, 38534, 26929, 1012, 46147, 61171, 43840, 13695, 51058, 612, 47743, 6385, 25472, 56913, 18742, 40104, 24345, 55041, 23, 62843, 12534, 38274, 6766, 35110, 1291, 43591, 11520, 36633, 53174, 19163, 41137, 26212, 56884, 3389, 51371, 17294, 37286, 12647, 31997, 7748, 61323, 9284, 51522, 5172, 43068, 62217, 21003, 40371, 26020, 32630, ++ 2258, 22168, 61731, 14865, 60456, 4907, 44356, 58715, 3725, 32411, 56531, 342, 59182, 36559, 25153, 40886, 60849, 6476, 52749, 28663, 9748, 24932, 2245, 27495, 46134, 4058, 60517, 13060, 64551, 49812, 7661, 33446, 40661, 19256, 56792, 38224, 17425, 42153, 2223, 37217, 47823, 17772, 64931, 26699, 56543, 3617, 48147, 9738, 40334, 30520, 65517, 44196, 5194, 61137, 17896, 57070, 27684, 63341, 98, 54145, 15680, 21493, 62667, 23487, 42804, 53586, 3588, 62002, 12090, 25805, 63975, 32634, 55528, 16062, 64686, 4081, 36114, 31749, 7869, 42659, 32992, 5140, 52546, 10828, 58994, 32246, 39952, 17229, 28191, 6517, 60744, 15990, 51933, 29951, 49391, 24781, 55079, 16840, 62771, 19623, 51247, 13591, 45849, 22982, 33559, 6705, 36453, 59949, 1763, 55025, 35587, 6151, 61478, 53998, 41634, 10839, 51193, 34875, 62409, 3771, 47818, 9996, 50553, 1230, 31142, 44814, 28747, 40785, 49727, 30325, 46590, 842, 23048, 47787, 4143, 59374, 30922, 49006, 19488, 10645, 60225, 28692, 38398, 57208, 26265, 2250, 39790, 11213, 64996, 22503, 58336, 33207, 5463, 62252, 7548, 21392, 38661, 24557, 8886, 26356, 5924, 45235, 58417, 24090, 60312, 19438, 47686, 27228, 57385, 36164, 9062, 60986, 4016, 38423, 49855, 2292, 64266, 41256, 11806, 28151, 23870, 57261, 30197, 63543, 2048, 53402, 32317, 6659, 41084, 54329, 19228, 31839, 43863, 36477, 56122, 48749, 9928, 51147, 27534, 42401, 60029, 14248, 49295, 63528, 23070, 60909, 28767, 16886, 57165, 34646, 19930, 32241, 3965, 36976, 29899, 62486, 31122, 38011, 60731, 1624, 52334, 11629, 59529, 32121, 47131, 20661, 30799, 59253, 17232, 63401, 51901, 21428, 62271, 5682, 27570, 45932, 269, 64687, 14455, 43795, 23816, 62542, 5358, 56227, 40616, 53749, 18029, 38123, 29841, 58290, 16850, 35543, 306, 53387, 12176, 59070, ++ 50748, 41577, 28205, 39659, 21211, 49423, 29179, 17606, 23796, 53771, 15412, 42031, 22715, 12022, 55105, 2643, 26951, 48349, 13546, 42740, 34686, 54548, 40575, 61852, 11575, 34070, 22743, 43371, 24599, 36336, 55155, 13986, 59757, 26311, 872, 47274, 11090, 53343, 14699, 50174, 8532, 33338, 5409, 45637, 10936, 33944, 19610, 37661, 57574, 6929, 13303, 25762, 54473, 29145, 45862, 3772, 40688, 19317, 43347, 24787, 11135, 51514, 2351, 55632, 31930, 9016, 28378, 39351, 30931, 44802, 18789, 10884, 24092, 40148, 27632, 53748, 13632, 44394, 55187, 16937, 65266, 49440, 28867, 36737, 15527, 44784, 1961, 64928, 53345, 36017, 21608, 46608, 10690, 61448, 2208, 15058, 41002, 11081, 32666, 1597, 37381, 25143, 63764, 10504, 58217, 50321, 16538, 41865, 32032, 18250, 47288, 28566, 12530, 32635, 19798, 57734, 26280, 16719, 42652, 21153, 28328, 63357, 33511, 19194, 57389, 16443, 54331, 2176, 13935, 37766, 9736, 58072, 32352, 13167, 53055, 41842, 9390, 24504, 62140, 33705, 21450, 47413, 3538, 18220, 50525, 42696, 24687, 48448, 7108, 31891, 13026, 49117, 18713, 43311, 27796, 56100, 3916, 58832, 33735, 63022, 48808, 30067, 18032, 7842, 34817, 52608, 13142, 61893, 6957, 22927, 55993, 42813, 24289, 58689, 20933, 35237, 25754, 8477, 51206, 62518, 1326, 44716, 10201, 34107, 17384, 42517, 20314, 64435, 24859, 3414, 47040, 59770, 4923, 14821, 24130, 705, 30166, 39172, 5688, 32646, 1888, 55243, 30895, 6313, 37304, 13375, 53779, 5190, 42800, 7985, 52081, 15458, 48270, 57442, 18433, 8381, 45728, 14284, 20402, 41287, 28578, 45132, 5239, 15954, 39082, 8498, 48733, 3846, 41622, 25660, 14673, 39342, 31441, 16618, 60876, 10787, 49156, 20033, 35711, 58717, 8868, 30483, 46975, 20719, 598, 24948, 59287, 3693, 45349, 12416, 28461, 55977, 24223, 33907, 46029, 17794, ++ 30174, 11504, 48127, 675, 34130, 55329, 12554, 63481, 46506, 35122, 8325, 62788, 29605, 50311, 33075, 15819, 45719, 19436, 63787, 3548, 59908, 18017, 6983, 29977, 51985, 18701, 57611, 9312, 53907, 2594, 21378, 44364, 5940, 50845, 29294, 65297, 34919, 28323, 63120, 25697, 58878, 22691, 51478, 28888, 60440, 42878, 55138, 2030, 18168, 49345, 35433, 47145, 15984, 39443, 11776, 34702, 52696, 9604, 57893, 35930, 48124, 28857, 41593, 18283, 13145, 60865, 47828, 16789, 56815, 523, 37927, 47025, 58155, 8486, 50000, 18194, 60672, 23533, 1018, 37599, 12294, 21778, 3424, 60450, 23779, 50706, 13377, 33822, 9450, 44161, 300, 57325, 33048, 23188, 44875, 34843, 53199, 26064, 56451, 44031, 58924, 5369, 35054, 40390, 20292, 29807, 62974, 4547, 51890, 10305, 23451, 58969, 40560, 7517, 44927, 37659, 1485, 59443, 8981, 52634, 36421, 14526, 40146, 60789, 5975, 46909, 23727, 63578, 21869, 51375, 27572, 18377, 54823, 39257, 28362, 6495, 35022, 52016, 2560, 40201, 55211, 12710, 64377, 35234, 31162, 13452, 62723, 19711, 52097, 41519, 61368, 8618, 54649, 36955, 14936, 48194, 31367, 13290, 44329, 15657, 2932, 53647, 39719, 51337, 25822, 4665, 41339, 20221, 37305, 46093, 17141, 5550, 33394, 13363, 54249, 14763, 59693, 37989, 19767, 32725, 15805, 37224, 52430, 21641, 59140, 5219, 49206, 9812, 51805, 38125, 12273, 34793, 25659, 52774, 40563, 62588, 18786, 43535, 64337, 21459, 46895, 17759, 40282, 25960, 56488, 44885, 19028, 31646, 65259, 23721, 39684, 59351, 10430, 24894, 40793, 22916, 58611, 26793, 49688, 9744, 64238, 22267, 35685, 62071, 27206, 57564, 23908, 52729, 34236, 9318, 56625, 2891, 42771, 55208, 29864, 37494, 25219, 54582, 6405, 32507, 26854, 50533, 15691, 36280, 49348, 14086, 34151, 42058, 22358, 63417, 39274, 8473, 49188, 6068, 64861, 4039, ++ 38007, 56134, 16523, 64116, 8955, 20213, 40552, 1786, 10988, 25933, 48969, 39823, 18438, 7196, 64786, 35644, 56274, 8910, 30290, 38171, 21776, 43684, 49435, 15281, 37564, 46826, 1187, 39560, 27052, 48289, 30464, 58128, 37317, 16104, 43796, 20561, 7232, 45999, 4418, 40800, 12276, 44018, 566, 38374, 16208, 8217, 24677, 63913, 32993, 27227, 59836, 792, 62913, 7880, 50734, 24025, 60616, 16966, 30641, 4123, 63847, 8178, 32862, 58570, 46186, 35563, 21043, 6185, 50297, 14406, 61244, 4717, 28962, 35350, 1827, 38498, 9809, 46258, 34011, 58517, 27994, 56674, 41080, 30943, 6026, 62947, 26796, 49002, 19474, 55641, 25443, 37666, 18172, 50951, 8233, 60096, 5818, 47196, 21097, 8776, 27411, 48919, 14418, 54302, 1035, 47857, 11979, 26705, 38981, 62220, 33953, 442, 48757, 22429, 63641, 14869, 46481, 30778, 48382, 24802, 2383, 54917, 7267, 26750, 38941, 35084, 9091, 32793, 42263, 5530, 60404, 45664, 2945, 16299, 64575, 22382, 56793, 14108, 45913, 26593, 7337, 43377, 23458, 51536, 9163, 55753, 4247, 38111, 29011, 220, 26005, 35934, 23195, 2476, 59662, 10400, 40044, 52952, 20024, 37811, 57188, 22548, 11311, 43094, 15347, 62681, 31627, 50054, 1079, 29359, 51707, 39825, 64920, 47832, 343, 45491, 9614, 43459, 4991, 48370, 54730, 27201, 3709, 47452, 25906, 56307, 29144, 36246, 15344, 27312, 57662, 20993, 45540, 10644, 31267, 7028, 54695, 15885, 12426, 52321, 26687, 61710, 11297, 51714, 517, 33496, 8863, 48922, 12107, 55128, 27983, 6015, 33790, 63587, 2570, 53252, 35013, 3621, 55778, 31828, 7034, 48076, 13181, 53404, 2654, 42404, 11315, 36948, 18502, 60207, 26728, 49843, 23185, 9798, 50988, 4358, 44435, 13045, 41786, 52573, 2530, 39498, 59837, 11163, 62092, 27788, 56667, 10103, 50262, 2080, 16057, 60289, 20144, 44015, 31224, 23233, ++ 57843, 7394, 35333, 25244, 42915, 52667, 30963, 56938, 43725, 61500, 20649, 4107, 58182, 44660, 21354, 1358, 41557, 24149, 51417, 11880, 57299, 32694, 10738, 64986, 5734, 23610, 61060, 32449, 14851, 63933, 17853, 9687, 24208, 61997, 3425, 32729, 57382, 23424, 36464, 19808, 56061, 30005, 62552, 21225, 53510, 31268, 44949, 14176, 51041, 10447, 20599, 42455, 23011, 56107, 31580, 1813, 42689, 33720, 49846, 22028, 39907, 16302, 53035, 5668, 26252, 1586, 40803, 65351, 27468, 34870, 22761, 52828, 20326, 60054, 42092, 51649, 29834, 62794, 19755, 6664, 51105, 9239, 18875, 46972, 38422, 20886, 42377, 4343, 59683, 31829, 12089, 64323, 3769, 39653, 29081, 19093, 31403, 13239, 38842, 65360, 17681, 42675, 22015, 28281, 57282, 18826, 44285, 55687, 21214, 8547, 52934, 17207, 56518, 30190, 4127, 27370, 53585, 19009, 11778, 41130, 61256, 18034, 45510, 12449, 51906, 20630, 59147, 15139, 56337, 26248, 12201, 34231, 43011, 50827, 11134, 44881, 18793, 30244, 62997, 16685, 32587, 58426, 983, 27415, 44986, 36382, 22126, 59206, 15292, 45995, 57052, 11655, 64529, 44834, 29610, 50979, 23973, 997, 61206, 7223, 28486, 32877, 64677, 1647, 56452, 28082, 9746, 58949, 14297, 63648, 11485, 26279, 19059, 28610, 36719, 21915, 52801, 29520, 57882, 23020, 13018, 40625, 65309, 14348, 38459, 11524, 44391, 469, 61025, 42057, 7377, 55347, 1694, 65108, 47922, 22785, 36863, 50053, 34457, 3884, 44296, 7520, 35070, 16163, 46573, 22404, 62061, 35559, 41355, 1735, 21345, 44649, 54406, 16632, 46960, 28993, 12747, 44039, 16129, 39568, 54716, 25262, 44471, 19726, 33635, 15005, 65078, 29238, 46663, 6037, 45297, 13474, 64472, 33082, 17785, 57957, 22224, 61552, 29299, 18355, 65324, 21812, 7236, 43329, 31366, 5918, 46394, 18661, 32923, 54740, 41248, 29070, 37095, 14972, 53893, 9447, ++ 42274, 27587, 51599, 13525, 59319, 5393, 36850, 14411, 28541, 6374, 33436, 52266, 13156, 37347, 27751, 54026, 14604, 59385, 4845, 47183, 25703, 184, 53258, 26474, 35290, 55384, 12423, 50465, 45499, 6730, 35598, 52579, 41856, 11449, 54675, 39215, 12930, 49546, 15386, 52059, 6500, 35228, 13590, 41366, 4105, 48896, 21929, 59134, 6074, 41007, 53158, 30180, 12434, 37245, 19875, 64524, 14381, 6574, 55102, 12205, 60112, 27149, 38308, 19590, 44259, 57273, 24326, 11360, 54782, 43533, 9423, 31504, 45470, 12523, 17471, 24803, 5543, 15196, 48660, 39677, 25329, 43441, 63833, 1375, 57469, 10413, 54652, 14946, 39173, 7247, 48053, 27795, 53860, 10156, 63289, 41685, 57840, 49983, 519, 35903, 52677, 3619, 60924, 34293, 7330, 38184, 30951, 3212, 49353, 37086, 25820, 42410, 13714, 35952, 60371, 39537, 6872, 34403, 64850, 5279, 32431, 23073, 49436, 29539, 64462, 377, 44144, 48568, 3863, 37248, 62864, 24240, 7086, 29470, 61085, 12, 38565, 49535, 4962, 37001, 48342, 14773, 39104, 60732, 6399, 17009, 47591, 9678, 53914, 32434, 20513, 42088, 16584, 34054, 4976, 18319, 63680, 35107, 49739, 42571, 17242, 47249, 12654, 35804, 46441, 20811, 38722, 44669, 23798, 34207, 55015, 3034, 57637, 7515, 61536, 32210, 16637, 63263, 2766, 34558, 60852, 6926, 31378, 43107, 8335, 62116, 23307, 53890, 30650, 17711, 49611, 33076, 28519, 38748, 17179, 58474, 2710, 28241, 60755, 24303, 37669, 58827, 20389, 64614, 29258, 58264, 3521, 25138, 15053, 60296, 50120, 29632, 11377, 36070, 7700, 38679, 65010, 21810, 59765, 953, 18191, 37525, 4215, 50197, 60589, 22590, 43697, 722, 54142, 21010, 32275, 38081, 1712, 47735, 40358, 8052, 34640, 1088, 48581, 11704, 45012, 33701, 55405, 17501, 52257, 23038, 37644, 64592, 26447, 6995, 21402, 52019, 2839, 62641, 25713, 49783, ++ 19330, 61088, 3297, 47155, 18158, 26785, 65133, 22954, 51194, 39012, 16894, 46920, 24453, 60611, 10172, 48704, 32013, 38746, 17223, 33723, 63060, 39991, 20519, 44759, 16689, 41399, 28817, 4314, 20115, 40345, 60141, 333, 22175, 47597, 27702, 18233, 61554, 1378, 59571, 31744, 46862, 18651, 58200, 25420, 61664, 11687, 38920, 28495, 17343, 36228, 3326, 61928, 45318, 4743, 48644, 28189, 46473, 25546, 36508, 43747, 939, 47423, 10252, 64232, 30267, 15031, 52017, 33538, 19105, 2649, 48451, 63407, 6836, 56587, 33141, 64419, 43915, 55848, 31185, 2866, 54313, 13084, 34629, 16656, 29344, 35688, 24462, 51762, 30150, 61875, 17098, 42079, 14336, 46174, 22373, 2897, 15610, 24142, 62009, 28732, 12494, 46423, 9771, 51476, 24683, 64799, 13949, 61293, 15291, 45776, 5780, 64581, 20579, 51651, 9854, 24135, 55465, 43968, 21972, 51007, 38001, 57090, 3442, 42840, 10909, 25353, 36567, 18660, 30590, 53306, 17340, 47195, 58720, 19911, 36315, 25985, 55824, 8713, 59577, 25058, 10283, 54478, 30016, 18987, 52636, 30684, 63953, 25557, 40130, 3253, 49474, 6157, 52728, 21710, 55617, 45481, 8128, 26618, 10022, 21048, 54861, 5160, 59277, 30494, 6433, 54495, 16338, 3761, 48914, 8268, 43850, 30662, 41649, 50722, 12190, 46726, 6202, 26648, 42185, 18053, 46231, 21168, 55880, 16290, 50310, 33543, 18845, 39634, 4518, 63945, 22486, 13163, 61506, 5932, 42846, 11669, 46284, 19466, 9291, 53640, 14579, 29966, 42167, 4801, 39061, 12993, 43397, 51263, 30694, 37883, 4477, 17499, 42302, 61582, 50667, 19500, 4985, 51577, 27681, 34479, 63121, 30304, 57995, 10173, 31021, 7768, 56179, 16401, 39808, 61898, 7339, 58870, 28397, 15203, 55854, 24643, 63525, 38914, 16169, 57498, 28076, 4010, 25483, 40934, 1522, 58014, 13265, 4580, 48345, 35893, 61474, 11309, 46708, 12974, 39933, 1307, ++ 33263, 44593, 21767, 32045, 38390, 48538, 2459, 45478, 9612, 59760, 943, 55580, 31312, 3157, 43442, 19950, 6030, 62133, 23018, 45284, 8458, 14171, 50740, 3065, 58981, 7913, 62702, 31602, 56427, 25339, 13497, 29712, 63414, 33849, 5173, 53665, 30774, 42634, 26186, 10357, 64343, 2457, 44530, 9349, 34235, 54399, 1240, 64759, 43613, 56834, 25045, 33413, 18496, 58676, 38802, 8947, 53687, 17660, 61533, 20707, 56459, 23643, 50554, 3350, 37368, 48890, 7629, 41774, 59625, 38905, 25513, 15909, 36810, 26399, 40653, 3680, 22040, 11034, 27011, 61660, 36282, 22607, 47956, 52725, 7576, 61160, 45916, 2601, 21307, 34256, 50414, 1137, 59087, 26538, 35362, 55456, 45428, 33452, 7921, 54703, 23323, 32211, 39470, 15929, 45143, 4930, 41402, 33132, 23749, 57568, 29122, 11424, 43394, 2644, 47022, 33389, 16148, 736, 58555, 14015, 9497, 27765, 15767, 55658, 31573, 61668, 13351, 57824, 8572, 39913, 1601, 32008, 10095, 40974, 52436, 13514, 32939, 42501, 17753, 40686, 65504, 21070, 3784, 46366, 11987, 41089, 1701, 34716, 14254, 63153, 27075, 60147, 30332, 38988, 13829, 28746, 40977, 57554, 31981, 60593, 37456, 24808, 40604, 18583, 50546, 25435, 61378, 32582, 53048, 21615, 60418, 17699, 35009, 15559, 24777, 56554, 38843, 50198, 10632, 53631, 35901, 773, 28881, 60224, 24501, 2463, 57073, 13758, 47672, 9445, 44958, 37499, 19918, 50699, 26857, 56417, 32430, 63257, 40990, 48319, 1127, 57309, 10548, 49671, 27195, 53100, 18328, 9844, 56026, 20090, 64023, 47609, 26433, 215, 24091, 56782, 33301, 11770, 43025, 9087, 48878, 23334, 14121, 40466, 52054, 47369, 36203, 27864, 10587, 25025, 48229, 19003, 53075, 35843, 5203, 46308, 19682, 30987, 53596, 9180, 36547, 47311, 62906, 14799, 34787, 44633, 30116, 53280, 24375, 14365, 43570, 27154, 32427, 55189, 17235, 59659, ++ 36497, 10749, 62934, 6848, 54347, 11760, 57517, 19137, 30412, 34625, 22087, 41261, 11457, 64004, 34911, 57777, 28434, 51916, 2112, 55799, 29459, 61299, 36178, 31090, 18988, 48625, 21549, 42523, 10294, 51602, 46349, 38503, 8247, 49145, 24738, 45043, 8939, 37742, 20862, 48520, 39705, 27478, 36929, 50365, 29481, 19188, 46584, 23551, 7321, 14847, 47783, 9856, 52158, 26627, 15351, 57455, 2606, 40487, 29873, 5327, 35097, 14061, 33218, 59097, 12662, 22334, 62472, 4275, 29375, 10135, 57621, 50882, 851, 53911, 14011, 47637, 34368, 52168, 41469, 16239, 8602, 59808, 4680, 32500, 40809, 11466, 17928, 43236, 58236, 5513, 24908, 36607, 20484, 52286, 6216, 29686, 11316, 38025, 48308, 17280, 42261, 64262, 2114, 60211, 26322, 53885, 20019, 9153, 50780, 1919, 39877, 54605, 31444, 58137, 18478, 62680, 50248, 25156, 41948, 30056, 46268, 59845, 35679, 19691, 47489, 4636, 41539, 50460, 28063, 65270, 22799, 55288, 49233, 4292, 21607, 63839, 5917, 23248, 53191, 2042, 27931, 50230, 33583, 61952, 24361, 57788, 20228, 56538, 44220, 8407, 37311, 11077, 46788, 1355, 62495, 50393, 16176, 3493, 47808, 14390, 430, 52197, 9370, 64175, 34424, 2423, 42290, 11960, 27057, 37000, 10241, 47362, 4829, 62977, 40232, 1552, 20272, 33193, 64784, 7851, 25251, 49445, 41444, 5703, 36778, 45384, 27689, 51535, 35167, 26106, 58044, 3055, 53069, 8503, 35615, 15110, 25011, 5294, 31052, 18139, 45166, 23405, 33674, 54845, 21739, 6751, 62962, 36641, 45642, 7395, 32869, 13803, 58987, 31423, 45321, 14788, 37178, 62280, 20884, 46292, 5623, 41510, 61359, 1835, 25888, 17950, 4786, 63766, 34022, 51464, 3471, 42094, 12217, 22839, 62615, 11050, 51784, 3015, 37919, 23587, 60436, 20205, 8319, 51238, 21132, 9839, 63825, 19486, 38644, 57302, 823, 58535, 5566, 22747, 45629, 29542, ++ 4817, 56588, 15875, 40917, 23940, 35752, 15198, 42160, 53133, 7684, 63163, 27243, 50075, 16313, 26068, 8147, 42627, 12603, 36679, 16026, 41945, 22276, 9990, 54831, 27920, 38077, 1575, 58346, 15916, 34384, 3704, 19416, 55689, 16500, 57845, 14255, 64812, 3127, 54974, 16879, 7578, 52829, 15768, 5493, 57144, 12824, 35774, 51825, 32268, 55411, 21669, 63667, 365, 34568, 44437, 22738, 32140, 65085, 10766, 51684, 42148, 62971, 7006, 27829, 45059, 54332, 18024, 36155, 46857, 23360, 17253, 30687, 44588, 21400, 58918, 7800, 62301, 18371, 3, 57818, 44933, 28418, 20155, 55998, 23254, 64172, 27455, 37226, 13668, 44553, 65097, 8714, 40578, 12725, 62470, 43657, 59526, 19761, 4218, 57014, 6578, 20718, 49186, 30317, 10734, 36699, 59355, 46662, 35256, 63430, 21838, 7883, 26856, 13269, 38351, 4858, 28846, 36756, 7654, 63119, 17580, 1829, 40614, 8164, 53738, 23905, 33734, 6668, 20894, 45113, 11652, 35420, 15915, 62052, 28856, 47910, 34660, 44566, 12282, 56957, 35713, 7719, 43803, 10802, 37686, 5318, 48825, 28385, 17639, 51396, 23700, 58108, 19182, 35496, 25136, 6671, 36530, 65183, 23316, 33593, 45795, 27396, 43616, 22853, 13461, 39335, 55842, 19336, 65488, 649, 56997, 23210, 53908, 27607, 9295, 59477, 23657, 44476, 14540, 31067, 58511, 11143, 19508, 52251, 12547, 62899, 20634, 6411, 59607, 15715, 40437, 31670, 23821, 44096, 59275, 43, 51395, 39372, 55626, 7832, 36354, 59797, 16800, 2522, 47266, 40415, 28493, 1369, 25742, 57761, 23190, 51875, 9405, 39924, 6490, 49372, 26077, 2158, 53855, 13544, 56362, 17046, 35284, 28925, 55100, 38490, 58499, 12912, 44681, 20174, 57354, 30432, 61049, 44079, 31847, 41053, 27225, 59146, 13964, 43039, 5006, 49522, 29590, 39048, 60937, 42344, 3201, 47517, 7886, 30873, 50840, 18396, 35069, 65443, 8746, 52898, ++ 24639, 47873, 27976, 50454, 31, 60368, 29290, 62313, 4564, 37778, 13904, 46064, 5255, 39486, 53497, 48017, 18824, 65502, 25029, 49231, 4465, 56916, 47403, 6351, 64326, 13241, 53583, 26108, 36802, 65173, 23202, 59190, 32131, 1985, 40962, 34658, 21794, 43446, 28716, 33209, 60066, 24124, 63475, 22443, 40525, 61004, 2884, 16748, 41909, 5002, 39815, 29069, 38084, 11428, 59698, 6249, 49554, 13459, 45625, 24579, 16686, 31088, 53308, 20114, 39596, 274, 32292, 56169, 13588, 64980, 42467, 5899, 61357, 11753, 28668, 37307, 23951, 49731, 39017, 31632, 12178, 50246, 37996, 14174, 46656, 672, 49532, 9094, 54990, 30749, 16478, 56373, 32371, 48627, 18411, 1731, 27164, 51136, 40185, 25673, 36365, 58396, 14695, 43549, 56089, 18076, 54, 27676, 12835, 16994, 44665, 34677, 60837, 48218, 22925, 56224, 45242, 12246, 54123, 21031, 33181, 52755, 25929, 64030, 14996, 38434, 62625, 16876, 52146, 2669, 38839, 57458, 25189, 43572, 14648, 1209, 58160, 19282, 31326, 47013, 15468, 59910, 22587, 53788, 16257, 42970, 31678, 6981, 61017, 32979, 42430, 4407, 53571, 12496, 43934, 56266, 19662, 41820, 11787, 54053, 17848, 62027, 4214, 58171, 49363, 29822, 7136, 48070, 15046, 40921, 31472, 45136, 13856, 37732, 52058, 30098, 48667, 4181, 55597, 39299, 22080, 47148, 64079, 32091, 39986, 8890, 33998, 43364, 29779, 1202, 54561, 11040, 63491, 16698, 29391, 41914, 62243, 13495, 22117, 65000, 27636, 11899, 38578, 63721, 31867, 14161, 60548, 48482, 15707, 41746, 2931, 34746, 55432, 21983, 61019, 17823, 58226, 29751, 38894, 32482, 24692, 64706, 10990, 45845, 6634, 21578, 49300, 31318, 2233, 40729, 14377, 8660, 26312, 434, 15858, 50048, 6934, 18785, 45570, 56733, 32803, 12848, 54874, 139, 17029, 27480, 33381, 59548, 16330, 40412, 10556, 42676, 48762, 15298, 37838, 18899, ++ 41799, 12624, 64356, 20477, 32749, 45002, 8600, 25417, 49362, 23496, 54944, 17995, 57172, 20947, 32603, 516, 40675, 30534, 7364, 60003, 27381, 34200, 17542, 39314, 24373, 44192, 33003, 8830, 47869, 5574, 43147, 12249, 39948, 26971, 51307, 10868, 60583, 6172, 56499, 11943, 45769, 39, 42334, 31949, 8034, 26336, 48309, 57977, 27082, 62739, 13168, 50117, 16368, 54701, 20283, 41508, 26162, 37005, 55779, 1422, 58267, 9173, 46653, 15615, 61747, 24969, 50071, 8405, 40403, 2194, 34666, 55337, 19376, 32706, 53098, 4472, 45848, 10265, 20645, 65493, 25595, 2277, 63101, 6377, 33426, 58845, 25871, 52943, 22199, 3177, 47481, 38310, 4630, 23663, 54081, 34000, 64013, 9956, 16214, 61628, 31722, 11878, 29271, 5548, 22697, 40829, 49909, 53333, 30636, 59746, 6455, 52394, 15494, 1143, 40348, 10189, 19898, 61550, 3268, 39280, 48690, 5868, 44708, 21561, 50007, 1033, 29107, 43240, 26833, 60930, 31097, 6195, 46138, 8066, 37896, 54615, 27165, 9542, 61604, 4552, 39383, 29280, 416, 51060, 26739, 64882, 12631, 55111, 22821, 720, 15540, 48042, 27647, 61740, 31242, 8995, 48687, 26138, 59437, 5880, 38529, 29209, 14838, 36063, 8640, 63161, 24449, 38017, 28433, 51405, 6054, 19963, 58347, 2597, 18625, 42454, 12872, 36532, 17441, 61850, 3279, 28299, 15253, 1812, 57565, 24064, 55151, 17944, 64633, 46457, 21782, 48795, 33383, 4119, 47397, 20771, 9986, 34093, 49053, 3318, 46115, 52553, 6088, 24710, 50883, 8609, 34219, 21071, 54051, 38265, 65412, 12262, 27840, 43752, 3904, 41059, 10722, 47915, 7916, 52406, 3216, 27451, 50751, 19177, 60087, 42908, 9462, 24286, 53651, 65428, 33358, 50476, 55504, 37163, 59675, 21494, 34321, 64841, 28838, 1963, 25903, 64162, 35439, 23956, 46194, 56069, 12085, 26084, 54250, 23312, 63244, 28315, 3605, 55731, 31565, 61939, ++ 2028, 34365, 39111, 5942, 51850, 17064, 55838, 40241, 10458, 43982, 28796, 36080, 2729, 61678, 14878, 58682, 11110, 54384, 37657, 43835, 13671, 51225, 911, 45933, 11356, 52075, 2821, 60787, 18382, 49931, 28188, 53072, 7457, 62317, 17591, 47038, 30093, 23638, 49764, 36067, 20178, 51096, 13702, 55893, 45264, 33684, 11120, 37453, 8772, 19763, 46197, 2288, 61232, 31376, 47313, 3564, 62333, 8283, 21254, 39170, 28786, 49107, 35858, 3014, 43074, 11670, 28271, 60577, 20812, 48028, 26731, 8926, 49343, 39856, 14873, 42839, 60366, 30291, 55060, 7127, 41936, 53598, 29717, 43851, 19264, 39480, 15733, 31934, 41362, 62873, 28331, 11820, 60948, 42984, 7669, 21736, 37486, 31118, 46893, 869, 53067, 44519, 52050, 34553, 63070, 8214, 32757, 4368, 39191, 19204, 42126, 25424, 32326, 63922, 49811, 28095, 42936, 31668, 51278, 26578, 12767, 57996, 30867, 10485, 34719, 56592, 12053, 59263, 9176, 48139, 13772, 53950, 20299, 60005, 33242, 17137, 40374, 51739, 24014, 49747, 20606, 63243, 41717, 13978, 35321, 2993, 45559, 36739, 50017, 39527, 64261, 20791, 38358, 2740, 51922, 22390, 39688, 1910, 30733, 51226, 10974, 56768, 47032, 21986, 45317, 16845, 52505, 2062, 59888, 10795, 62410, 35606, 43330, 29084, 64503, 6738, 60542, 25683, 51128, 9845, 34896, 44006, 53146, 37925, 16873, 48078, 3571, 50839, 11949, 25433, 38947, 7672, 60656, 14129, 36134, 56975, 26342, 54233, 16317, 40057, 30451, 19794, 61312, 42581, 18687, 56726, 44128, 10964, 30135, 5764, 19286, 46453, 52830, 16285, 35798, 30758, 64350, 22756, 44219, 15654, 57486, 42215, 36678, 313, 32889, 15294, 62815, 37691, 16791, 22096, 5568, 39199, 17579, 10050, 47152, 4094, 54457, 11913, 39637, 52659, 16670, 41589, 10387, 58677, 31667, 6567, 50154, 36816, 1811, 45138, 34008, 13708, 51330, 25072, 7504, 44232, ++ 22215, 54801, 9976, 58384, 26565, 3483, 33795, 19756, 58863, 1613, 65032, 12129, 47443, 29896, 44498, 23980, 50415, 21270, 3643, 18219, 63544, 31802, 57980, 19831, 62438, 35138, 22557, 41199, 30620, 13921, 61484, 21101, 44678, 25514, 36538, 1003, 52481, 39009, 15076, 3937, 62889, 30403, 38192, 18078, 4341, 65271, 20968, 53951, 30849, 58856, 34864, 42784, 23744, 7698, 27626, 51183, 17975, 34193, 43926, 60828, 4831, 18732, 63565, 25930, 33914, 57759, 37727, 5098, 52556, 15461, 63159, 38112, 24616, 1646, 63937, 27391, 16731, 3105, 33746, 47351, 17712, 35174, 14746, 51335, 9908, 56890, 4114, 60279, 7045, 35065, 18919, 51532, 26278, 15307, 58603, 49617, 13484, 55815, 24380, 41198, 9019, 21476, 3078, 47590, 25266, 56783, 16407, 62393, 24050, 55227, 11104, 47713, 3920, 36281, 17888, 6245, 59022, 14781, 8429, 65107, 37372, 16382, 42362, 60282, 4055, 46636, 22417, 36895, 18924, 34149, 23540, 41757, 29832, 3512, 50683, 11268, 64631, 2460, 36651, 13015, 45186, 8332, 32377, 55922, 19967, 58649, 25411, 17265, 10167, 29125, 7579, 54560, 13607, 34513, 10638, 58536, 32796, 63411, 17062, 44543, 34945, 20352, 1207, 53382, 31792, 5291, 42991, 33973, 21360, 46040, 25991, 16430, 8093, 49566, 22439, 33750, 54299, 146, 45555, 26934, 54902, 7284, 22764, 30299, 59982, 10367, 41753, 30892, 37099, 5461, 52391, 19167, 41244, 28709, 50146, 6579, 38182, 1627, 43716, 9056, 57955, 12684, 33130, 403, 35985, 23960, 4033, 62395, 49937, 27056, 59641, 32296, 807, 62778, 8742, 54502, 18842, 1296, 59411, 34271, 20286, 12463, 63496, 23601, 52781, 30000, 45467, 3760, 56894, 48480, 28697, 46073, 23474, 63955, 29913, 42536, 24832, 49056, 7825, 22559, 48051, 5798, 43975, 15377, 53040, 21935, 40744, 9253, 61686, 19901, 6212, 60168, 20826, 39694, 58957, 11980, ++ 49956, 29990, 46415, 14644, 43190, 63923, 47656, 13047, 31659, 50772, 21603, 34038, 52471, 9191, 38292, 5696, 35464, 60687, 33395, 46689, 23450, 9404, 26587, 42929, 5129, 29042, 54176, 15535, 57327, 602, 37607, 33524, 4573, 56994, 14590, 64153, 9834, 18909, 58390, 41457, 25154, 9487, 59325, 28108, 49412, 24460, 43122, 902, 50791, 6628, 25356, 10534, 52888, 40161, 64383, 13959, 29586, 54074, 11903, 23286, 52414, 41103, 9981, 54994, 6444, 17059, 45945, 22674, 30136, 43327, 10654, 31765, 58368, 46512, 13272, 35620, 50689, 57232, 22979, 12827, 59373, 4958, 61932, 24282, 36953, 27879, 45505, 20776, 48860, 10771, 57633, 172, 46340, 30082, 35784, 2518, 45045, 6870, 17778, 63485, 28026, 60533, 38706, 18690, 12326, 37318, 45966, 9676, 48646, 2784, 37731, 65465, 21330, 57335, 29898, 53135, 24458, 35479, 47156, 23481, 199, 54453, 19294, 24944, 40110, 14364, 55061, 5054, 51465, 64913, 544, 55979, 18096, 63097, 25570, 42791, 31820, 21941, 56447, 27697, 58921, 18362, 38257, 6056, 47734, 9248, 40790, 62627, 33982, 59545, 46565, 24752, 41401, 60783, 18182, 43206, 4869, 15143, 55273, 7948, 25610, 40842, 63967, 12931, 61082, 26760, 57746, 17979, 55209, 3949, 39546, 60905, 32671, 56188, 11612, 41121, 16008, 31856, 19203, 59237, 14029, 40512, 62712, 5108, 45887, 26522, 20011, 61586, 14795, 58383, 27997, 55768, 2136, 65509, 18418, 23591, 60146, 29679, 61841, 22850, 49422, 25515, 44589, 55266, 14965, 58917, 40887, 17117, 37341, 13208, 42715, 20713, 39331, 24401, 50428, 28376, 47055, 37988, 25623, 6883, 49779, 39663, 8848, 46828, 6128, 59196, 11664, 27105, 34775, 8192, 61458, 1481, 52299, 35204, 6310, 14948, 58363, 18179, 36166, 62154, 30258, 61233, 20595, 37417, 3778, 65195, 17693, 48964, 29400, 56905, 38193, 30501, 47094, 454, 35650, 27001, ++ 4347, 60833, 17855, 36752, 31095, 22569, 7123, 61211, 37259, 16138, 42841, 6503, 24779, 56448, 19233, 64469, 27089, 15625, 10361, 53013, 1921, 40816, 55244, 15159, 59288, 12046, 38786, 7090, 46540, 24049, 54613, 10514, 48245, 19718, 42008, 31186, 46143, 34952, 27321, 6999, 54232, 47356, 2077, 53279, 12128, 39256, 15462, 60697, 17733, 29750, 44790, 56568, 18857, 32559, 4497, 36414, 57018, 628, 46949, 32811, 14782, 26937, 44705, 21611, 51006, 31336, 64616, 14221, 56907, 3479, 51597, 18517, 6599, 22160, 54099, 5402, 38690, 9397, 45219, 36540, 26645, 40459, 21447, 48369, 1563, 64638, 13174, 54243, 39965, 24031, 33163, 42520, 9543, 53268, 20100, 65300, 28488, 39384, 50749, 3799, 35456, 14057, 54925, 26993, 64963, 1519, 22208, 43022, 29651, 58645, 13864, 27342, 8799, 45547, 11747, 41075, 2133, 60546, 17245, 56880, 32557, 45882, 7169, 62185, 30348, 49590, 28295, 44308, 32153, 15622, 39658, 10330, 48967, 37327, 5620, 46743, 16081, 48616, 7214, 34432, 3304, 54213, 26062, 63717, 29934, 52911, 21755, 4702, 51677, 2186, 18764, 57132, 5715, 29547, 46107, 24098, 53137, 27919, 37169, 49834, 60084, 6880, 33111, 23728, 39960, 9501, 36397, 12293, 48396, 30964, 13581, 50331, 1290, 24928, 46578, 4557, 47906, 63786, 38540, 5865, 42911, 20872, 32978, 49009, 13444, 35726, 56485, 382, 42707, 32601, 9592, 44750, 34970, 12810, 53349, 45763, 11403, 51853, 17399, 35411, 4384, 64187, 10324, 28931, 46921, 8091, 31137, 53210, 1928, 57367, 7073, 49017, 10116, 56611, 14515, 5385, 33077, 11993, 61870, 55643, 29355, 17467, 54273, 26531, 36023, 20629, 41154, 51285, 13656, 43498, 19807, 36757, 12580, 27523, 56409, 44855, 32067, 893, 53819, 10914, 33764, 2473, 51054, 28007, 45412, 25264, 34922, 12746, 43424, 5056, 15776, 52314, 9679, 63564, 16758, 53530, ++ 42418, 21289, 8119, 56762, 1073, 52789, 41338, 26154, 4213, 54112, 27818, 59855, 40100, 13448, 45590, 1278, 48898, 56008, 38978, 28699, 61886, 34577, 7724, 37131, 29849, 49341, 20667, 63860, 31362, 43639, 16984, 63241, 28565, 50566, 5804, 22958, 55182, 2631, 61282, 43886, 17063, 32778, 22214, 35616, 62165, 5972, 46729, 23080, 36689, 63968, 14532, 37824, 1618, 59995, 48781, 15872, 22471, 38520, 59258, 7194, 65420, 35455, 1993, 60189, 11115, 38754, 1148, 53500, 36589, 23713, 41266, 59921, 34234, 44054, 26030, 62688, 19800, 29457, 64802, 927, 56308, 8371, 52445, 11282, 42756, 17338, 34420, 29182, 2145, 63703, 14450, 61365, 22804, 38619, 5051, 25112, 59859, 14924, 32967, 56578, 23043, 48177, 5905, 41907, 50458, 33872, 57883, 15097, 35754, 19560, 50954, 43792, 54286, 33577, 62078, 20427, 49125, 34061, 5432, 43499, 11332, 27210, 52271, 36065, 2254, 17783, 63652, 7731, 24528, 45598, 58304, 35179, 26413, 12812, 57184, 8958, 60509, 38928, 24673, 61322, 42285, 11697, 46451, 16613, 1491, 33494, 43550, 14872, 31034, 44679, 26997, 36112, 15900, 50623, 104, 64778, 12222, 47521, 21590, 2883, 18896, 42674, 48945, 3405, 50806, 19614, 59158, 41565, 7368, 65037, 20696, 35175, 53240, 17202, 37544, 57448, 21475, 8727, 29604, 50457, 11346, 56887, 1087, 24602, 65190, 7963, 49885, 22307, 53557, 17236, 62377, 21205, 24742, 39814, 4946, 27448, 42361, 32763, 7184, 56154, 41488, 21369, 37792, 52116, 5148, 26545, 64760, 22228, 45079, 25061, 33813, 63797, 18102, 36467, 43213, 60685, 51633, 41835, 21247, 4342, 44919, 32026, 2616, 60949, 14202, 56024, 1134, 31585, 62357, 25421, 54685, 16247, 60272, 41818, 20853, 9368, 63232, 40286, 26685, 19383, 57245, 38499, 13585, 59947, 8641, 56371, 1017, 62389, 22461, 53952, 26385, 41513, 23675, 32984, 45958, 13323, ++ 28875, 64892, 38790, 24484, 49166, 18721, 10931, 57631, 39390, 9742, 46774, 17675, 3275, 62558, 32282, 25750, 41586, 6806, 20337, 44945, 12943, 49999, 21925, 64878, 3857, 45344, 25922, 2218, 50957, 9126, 35489, 3299, 39428, 12897, 59779, 38008, 8605, 48669, 20765, 11419, 40009, 63799, 14198, 44302, 19358, 31493, 57345, 3670, 52535, 9203, 48083, 21993, 54921, 12505, 24904, 43258, 63124, 9699, 19455, 30460, 49665, 17483, 55988, 29309, 41897, 19699, 48546, 27281, 8010, 62013, 12234, 28543, 465, 56056, 15694, 32160, 48193, 11551, 42211, 18711, 49141, 32916, 25158, 60686, 31415, 55667, 5729, 50826, 18267, 44793, 36402, 6109, 55267, 16679, 57115, 41496, 10944, 47296, 19420, 43351, 10386, 59236, 32491, 16874, 7491, 28581, 53556, 5096, 61788, 7053, 31970, 583, 23302, 16000, 4683, 38100, 14205, 55317, 22677, 29436, 64281, 38640, 20663, 9878, 58752, 41322, 10973, 38061, 61402, 3103, 21160, 6954, 52845, 43910, 30547, 19782, 28494, 797, 52205, 15016, 30839, 50808, 23353, 39990, 57504, 13251, 60388, 24257, 56350, 11500, 49379, 8511, 62132, 37562, 21103, 40163, 30083, 9589, 62705, 32309, 57365, 28557, 16064, 56044, 30407, 44272, 263, 27849, 53793, 24029, 44774, 9150, 27275, 63369, 12633, 28664, 36002, 52740, 23516, 61124, 34127, 27865, 52004, 37404, 18476, 44289, 29118, 34373, 6793, 38344, 48445, 3776, 47020, 59000, 15978, 63137, 746, 20499, 47859, 15210, 31343, 2355, 59499, 17874, 39624, 48101, 11550, 35229, 15483, 51217, 29555, 4549, 45938, 26721, 2284, 23278, 16929, 8148, 48556, 35519, 14888, 64916, 42512, 22487, 48009, 38289, 18444, 57719, 7230, 39954, 4527, 47670, 32619, 2679, 51565, 37854, 23089, 13196, 50598, 42845, 7360, 46918, 24209, 31070, 41196, 19054, 51650, 31936, 39999, 11065, 64722, 2636, 58116, 19433, 6460, 57426, ++ 37484, 2966, 48079, 14044, 34949, 62224, 45378, 30573, 15327, 63672, 23292, 36519, 51341, 29350, 8414, 54646, 16777, 59521, 30999, 4775, 56709, 17349, 47710, 24667, 52796, 16552, 35974, 60262, 19092, 56204, 26686, 58725, 22019, 53776, 32299, 16303, 65464, 29181, 34021, 56715, 26532, 4787, 51448, 28947, 55498, 10121, 34329, 41044, 25896, 33081, 5262, 41690, 27248, 35135, 45894, 5812, 28370, 53417, 40754, 45417, 3982, 37581, 23937, 47855, 5604, 58840, 13130, 34956, 44388, 20308, 47173, 49915, 17622, 40118, 7506, 52045, 2543, 61090, 23846, 54549, 15115, 44218, 3315, 37737, 7847, 47057, 21940, 38923, 59173, 8520, 30409, 50020, 26754, 47899, 12853, 30907, 51835, 7984, 62800, 417, 29549, 37953, 20400, 61108, 44103, 11476, 24603, 40666, 46818, 26232, 60116, 38921, 56654, 41708, 51594, 25748, 63319, 9536, 39804, 50609, 2998, 13622, 56071, 48478, 33476, 21790, 53469, 27441, 13135, 50925, 29581, 62521, 22657, 1863, 64082, 41182, 53643, 35859, 10539, 44396, 21363, 5141, 62225, 9712, 28719, 49092, 36331, 6800, 39147, 65367, 22270, 42806, 31543, 12869, 55533, 6515, 57924, 35394, 45028, 13730, 38789, 10332, 64548, 36771, 14105, 22219, 62878, 33434, 15454, 38297, 3140, 58801, 40392, 5511, 43698, 61955, 1991, 14880, 44882, 2916, 17632, 46799, 15624, 58748, 4257, 54443, 12256, 40839, 63610, 27028, 10873, 57201, 31977, 8767, 30586, 37459, 50580, 57617, 25983, 36888, 62527, 45423, 27755, 14005, 54623, 20246, 61120, 3180, 55845, 9483, 40537, 60061, 13613, 54920, 31552, 57826, 40095, 63226, 24801, 53420, 27932, 9868, 50918, 5158, 33210, 10292, 44408, 24525, 49908, 21386, 64561, 29536, 10635, 57929, 25998, 5249, 60756, 46443, 3402, 28423, 55556, 17263, 63647, 4697, 49673, 14197, 35978, 7636, 47624, 28657, 18207, 48459, 36413, 50659, 43715, 25559, ++ 11254, 54291, 32085, 59978, 5631, 28158, 2399, 20786, 49828, 32951, 304, 58059, 14307, 43534, 22155, 48164, 12365, 36217, 51828, 25268, 42236, 32477, 103, 39731, 9781, 57557, 41709, 10989, 32901, 40474, 6550, 47175, 11742, 45140, 1608, 25825, 42817, 13425, 52173, 531, 37189, 58636, 8326, 42080, 1435, 48988, 17315, 59491, 13294, 49961, 61978, 16621, 65015, 8490, 58411, 20500, 50271, 2515, 13742, 25687, 58006, 12103, 61491, 8749, 33369, 25410, 63759, 16391, 55652, 4405, 32850, 9775, 64284, 37143, 57906, 21060, 40989, 30982, 34891, 6064, 28837, 63495, 20005, 57494, 16093, 27264, 62585, 12447, 25705, 52544, 19671, 40635, 3469, 32274, 61821, 1447, 36894, 23512, 34742, 54412, 45608, 13285, 52655, 2393, 49424, 31304, 63745, 20821, 12661, 52157, 17462, 10819, 28691, 7597, 30563, 44427, 1401, 46468, 28224, 18407, 59542, 44078, 26083, 4453, 16112, 47560, 1316, 57023, 32852, 42144, 15247, 36434, 47829, 17477, 33871, 4741, 14239, 59357, 19099, 65135, 32205, 55557, 37087, 17908, 45738, 3912, 19551, 53503, 28137, 1060, 17708, 59089, 3659, 52423, 26467, 48323, 19809, 4091, 24887, 54155, 894, 46842, 26228, 4692, 54712, 46236, 11107, 51548, 5832, 47182, 56471, 22626, 30263, 49223, 10448, 20158, 41858, 58139, 26203, 39746, 64303, 8309, 41541, 10096, 32374, 25325, 60360, 2687, 16232, 51727, 23259, 36488, 14639, 43463, 54734, 19663, 10205, 33918, 5671, 53928, 12188, 8445, 50320, 34411, 1042, 43340, 32130, 38936, 23660, 62122, 18975, 37031, 21667, 47565, 10895, 34882, 3466, 30239, 13100, 580, 58623, 36919, 19401, 59928, 28525, 64126, 3328, 53129, 11330, 34139, 15472, 38610, 45217, 18930, 48750, 31227, 16467, 34617, 65016, 14637, 32344, 36974, 10011, 44207, 21661, 57835, 27079, 60621, 3978, 55342, 34533, 8255, 14429, 27758, 1531, 62700, ++ 33602, 20093, 8892, 23024, 44336, 52109, 35876, 55289, 7893, 41964, 53200, 19999, 34521, 61401, 3984, 33252, 64103, 22881, 2516, 63183, 10657, 58553, 14403, 61115, 33820, 6136, 27845, 48830, 14828, 23313, 64682, 29625, 17907, 62029, 35874, 57688, 5105, 50192, 18533, 45614, 23808, 15959, 47711, 24778, 64629, 38697, 27729, 7160, 43719, 21129, 28659, 39404, 3084, 51779, 30249, 11006, 42276, 31653, 62211, 33999, 52108, 20955, 42918, 15969, 54382, 40279, 2725, 50496, 24352, 39430, 59215, 21829, 29992, 3934, 25301, 45611, 8791, 53331, 13799, 58579, 39245, 10526, 46212, 30578, 51780, 41652, 3864, 35331, 43696, 1043, 64341, 12023, 57997, 20994, 44294, 17999, 53659, 42387, 15511, 25566, 4715, 64215, 26517, 36204, 18300, 39598, 996, 55741, 37026, 3558, 34374, 64476, 47430, 18867, 61023, 12995, 58237, 16580, 53784, 6761, 34947, 8902, 40722, 31771, 64774, 24180, 43063, 19646, 8610, 54741, 5360, 59707, 11809, 55682, 27015, 50058, 46018, 23789, 39745, 2304, 47377, 7990, 26558, 52558, 34905, 64391, 42045, 10432, 47966, 32571, 54357, 23596, 34324, 44093, 16415, 33314, 60453, 41937, 51353, 17490, 61283, 22789, 52262, 42146, 8219, 29655, 39175, 25358, 61472, 18979, 32265, 14267, 52144, 18114, 34536, 55406, 31560, 7014, 49720, 13213, 30756, 55723, 23061, 62152, 39252, 51185, 20422, 43062, 31295, 46145, 1319, 64451, 49510, 25615, 2964, 60982, 44992, 13819, 64871, 24383, 42971, 23103, 58556, 19187, 63373, 28287, 7632, 49652, 12764, 44315, 110, 53601, 29046, 6341, 65131, 20020, 52619, 45276, 56952, 39099, 16519, 46160, 23978, 40408, 12802, 48974, 17858, 30807, 41462, 59026, 73, 53973, 6799, 62991, 12162, 55149, 43748, 8884, 22207, 52485, 534, 59405, 25725, 54602, 33199, 2100, 45793, 20372, 39182, 15173, 44687, 59264, 30803, 61156, 40309, 17484, ++ 46854, 51455, 40860, 56311, 16409, 12706, 24974, 62984, 17274, 27207, 11895, 45976, 6091, 38611, 26437, 49614, 9018, 40367, 46297, 19394, 37908, 27526, 44397, 18657, 52203, 21658, 62870, 1226, 55516, 42446, 4133, 34272, 51639, 7647, 24202, 41150, 19921, 30927, 60757, 10686, 32462, 62663, 21333, 35272, 12599, 20078, 60297, 30531, 54484, 251, 57586, 11607, 47449, 19138, 38243, 60545, 22988, 55302, 16843, 7865, 39710, 111, 32233, 51398, 22284, 10438, 45192, 30877, 6974, 35976, 12558, 52863, 43497, 14475, 51137, 27944, 60158, 17008, 47733, 4253, 22463, 50574, 35722, 336, 23423, 14000, 60027, 16917, 56431, 24536, 33678, 45899, 28711, 37651, 9188, 64779, 27481, 6321, 58261, 48959, 21196, 40298, 8441, 57219, 10044, 58865, 15857, 44752, 27847, 48835, 22441, 42663, 2547, 52970, 21694, 36406, 32997, 24829, 37157, 62792, 22138, 51868, 61226, 18749, 50105, 6463, 35656, 60683, 39107, 46976, 23181, 31274, 40551, 9594, 61859, 20537, 32737, 6581, 54854, 29416, 13641, 58018, 41347, 267, 20875, 12417, 25716, 61065, 37782, 14163, 45382, 7834, 40608, 10002, 63765, 1581, 14543, 28917, 8667, 37859, 31416, 11918, 34680, 18506, 59680, 16706, 57195, 2471, 35866, 9909, 64062, 42621, 817, 59788, 45788, 3827, 25100, 53409, 18772, 36812, 48207, 4872, 35319, 19441, 1913, 14327, 47774, 9217, 56210, 13011, 29980, 19002, 6234, 59723, 40337, 22651, 29199, 39087, 18229, 48868, 3488, 40170, 29912, 6272, 36383, 52721, 16569, 58050, 33392, 27258, 48403, 8519, 58791, 40973, 15960, 43829, 25864, 9107, 18599, 31044, 62639, 7446, 52100, 1779, 55301, 35133, 26270, 61968, 8375, 22703, 43168, 27782, 35638, 24076, 40698, 1671, 28940, 58554, 39400, 47889, 20106, 41990, 6688, 38850, 16078, 64454, 11638, 53431, 24788, 63057, 23137, 5413, 52680, 21554, 10125, 55968, ++ 4867, 15019, 29682, 652, 39789, 59138, 43345, 4514, 38139, 48659, 64662, 30195, 55639, 11329, 59031, 18361, 57243, 28363, 14747, 51025, 5594, 54059, 8003, 31542, 43066, 12559, 30336, 46013, 20200, 36964, 52933, 13756, 44106, 28031, 48001, 10198, 63085, 38560, 6802, 54743, 40294, 3487, 52746, 6286, 56277, 46049, 2752, 50614, 14927, 36253, 45176, 24243, 33481, 56129, 15320, 1095, 36086, 6327, 47095, 24490, 56676, 28004, 64828, 4688, 36766, 62880, 27521, 57370, 19166, 61198, 46362, 1926, 33642, 62235, 10869, 36233, 1248, 38208, 26921, 42945, 61546, 17973, 7332, 64994, 40074, 53757, 32637, 47557, 9817, 49234, 6804, 15837, 54609, 4410, 50369, 22132, 34150, 46638, 11583, 31877, 62161, 14516, 51406, 30692, 46269, 23852, 33102, 6614, 54539, 9386, 62594, 14676, 31140, 40174, 5727, 50282, 10353, 48109, 3668, 41998, 14912, 29972, 718, 37696, 11518, 57762, 14077, 28812, 3849, 18183, 52373, 125, 45153, 24804, 38419, 3622, 58545, 43408, 16444, 50441, 37966, 22506, 15790, 59821, 49692, 30217, 55050, 16887, 5417, 58414, 27443, 63007, 19121, 50298, 25279, 39003, 56608, 45901, 20639, 58729, 5539, 43495, 62260, 1788, 33905, 49494, 20507, 43887, 28247, 48763, 7593, 26671, 37262, 24267, 15708, 65293, 38922, 11884, 60717, 491, 21890, 54032, 28952, 45141, 57698, 36186, 27361, 61349, 24118, 38688, 58651, 41967, 34727, 16766, 52219, 7874, 47320, 1765, 52991, 31591, 60498, 13274, 46544, 55595, 10579, 22424, 45683, 25305, 5449, 64487, 17289, 35625, 22946, 32721, 50289, 1572, 37631, 61303, 49401, 5962, 41635, 26889, 33852, 15773, 21742, 43893, 5773, 46686, 37358, 55801, 17130, 51097, 13938, 59822, 19588, 52930, 36510, 6169, 24925, 11421, 62659, 30024, 51881, 12509, 48610, 29230, 37601, 9332, 42780, 213, 32642, 42155, 12399, 49406, 35373, 24157, ++ 44914, 60464, 26697, 65382, 34131, 6909, 21749, 50885, 31512, 9393, 23891, 2150, 41012, 21421, 44177, 809, 37320, 7257, 62176, 30045, 35277, 23658, 46983, 65359, 3194, 50621, 38475, 16011, 61768, 8433, 24945, 60083, 278, 56031, 15576, 53419, 2899, 22800, 44512, 27430, 15140, 49813, 29663, 40855, 25620, 10943, 37634, 18589, 63273, 31235, 9544, 61428, 4288, 40434, 26827, 50874, 44209, 18247, 64090, 37236, 12852, 48683, 18667, 43682, 14638, 47564, 17353, 781, 42500, 28975, 15277, 26450, 54946, 18304, 48723, 22907, 64495, 19485, 56513, 11910, 33325, 55145, 28257, 45066, 9064, 26211, 5132, 29828, 19185, 38360, 62110, 22644, 35909, 60816, 13818, 40906, 2814, 56034, 17394, 38811, 1839, 28943, 42820, 21978, 4068, 50652, 61567, 41527, 19835, 35130, 25077, 55976, 8626, 59278, 26915, 65250, 20080, 54814, 31374, 57424, 12167, 45371, 55521, 25390, 46192, 27862, 53106, 44558, 63468, 34607, 26269, 65436, 16773, 49150, 13339, 51951, 27776, 11028, 62710, 25200, 8744, 63323, 43986, 33281, 6371, 40318, 2860, 31917, 50975, 21277, 35794, 2354, 47319, 30582, 5071, 53018, 21898, 7056, 65253, 35684, 49978, 15582, 27618, 47882, 24628, 6407, 64831, 13173, 55107, 21675, 39852, 54427, 11274, 50989, 29837, 8872, 33567, 47270, 27638, 43379, 63237, 10737, 50655, 7425, 16585, 64957, 6006, 33787, 4466, 53766, 8396, 22026, 55451, 12029, 33014, 63929, 26797, 56679, 21598, 9560, 34906, 51496, 26177, 15855, 38466, 59892, 2588, 41292, 56496, 11225, 42456, 51976, 3824, 61780, 11822, 56282, 28670, 14046, 34380, 22166, 54592, 11579, 58282, 47352, 63424, 9272, 57189, 14724, 29126, 2887, 65102, 9669, 47003, 3928, 26978, 49430, 17549, 61742, 32751, 44756, 4372, 18006, 35275, 61339, 23752, 5710, 58894, 18649, 50489, 29780, 57049, 16616, 38365, 3262, 63853, 31389, ++ 7407, 37720, 11682, 48332, 18454, 47324, 29026, 60996, 13759, 56959, 35504, 60097, 15952, 52295, 31915, 24422, 53572, 42681, 20897, 55108, 1548, 60411, 13356, 25644, 36562, 22653, 54358, 5311, 27145, 49505, 32024, 18419, 39833, 21492, 37462, 33313, 29982, 49098, 12232, 59671, 36585, 8966, 58141, 16490, 61739, 33829, 53654, 23526, 42490, 7563, 48494, 21762, 53123, 13071, 63520, 32076, 10347, 53802, 28902, 3368, 45733, 9459, 58684, 26276, 54806, 7588, 34310, 49212, 53851, 5845, 65329, 41483, 8197, 38626, 5041, 44743, 32394, 6722, 49429, 24961, 2007, 37385, 13043, 58169, 20296, 49791, 63189, 42303, 58775, 2311, 27734, 52916, 10278, 43115, 26135, 59673, 18790, 29979, 60363, 24430, 45237, 55425, 7181, 65204, 35544, 13089, 25925, 5273, 59945, 45808, 1700, 49556, 37555, 17036, 45121, 314, 43201, 15562, 8079, 23799, 48998, 34441, 17524, 6001, 62321, 2727, 22322, 15738, 9273, 49493, 12334, 37520, 7898, 57638, 35034, 18491, 42451, 36688, 1173, 34073, 48408, 4259, 28010, 11400, 56707, 23121, 61936, 46911, 9089, 42557, 12099, 56083, 15225, 36970, 61620, 11331, 33773, 41187, 26939, 2623, 23478, 55712, 40433, 9044, 52685, 38072, 31201, 41345, 4162, 60374, 16240, 32761, 62632, 5074, 44509, 57057, 20925, 59015, 6453, 32162, 15156, 38141, 25773, 42121, 30212, 46557, 21429, 52688, 44415, 18037, 36967, 48642, 178, 28334, 43964, 5220, 35941, 15516, 41692, 61673, 17564, 498, 42229, 65347, 4629, 30882, 50824, 19634, 37433, 30371, 21115, 47125, 14707, 27652, 38755, 20551, 46586, 4880, 59550, 43374, 2467, 38051, 29773, 4245, 25501, 32224, 39363, 20312, 49596, 33480, 25107, 39718, 30564, 56697, 33936, 8015, 45651, 13493, 56113, 21238, 50022, 57394, 27313, 1331, 43307, 55875, 33847, 47180, 13147, 36680, 6834, 61760, 46559, 27397, 18854, 43015, ++ 54715, 20670, 53358, 3753, 58223, 10493, 36945, 1661, 45149, 19538, 7565, 28616, 47104, 5150, 65220, 10108, 34079, 16335, 47933, 11764, 41283, 17747, 39518, 56540, 8779, 59627, 11469, 34856, 58256, 2451, 43488, 64425, 9433, 59092, 4507, 56925, 17386, 64989, 34591, 1766, 46612, 22421, 31831, 43019, 804, 13836, 44727, 4893, 26670, 58965, 34757, 17147, 29909, 43421, 6920, 20785, 59643, 34476, 14330, 41377, 60928, 31135, 35243, 1778, 39007, 29820, 60409, 11505, 23163, 37796, 31486, 20620, 56985, 24734, 59605, 13317, 54267, 41778, 15998, 46732, 62451, 20919, 52620, 31662, 2919, 36594, 15391, 11666, 34027, 51370, 14573, 44965, 31338, 548, 48350, 7552, 52132, 39760, 5354, 49863, 9628, 33514, 16501, 54008, 19040, 38457, 57775, 30200, 17719, 13720, 39353, 29224, 11867, 57107, 24119, 35408, 28862, 59812, 39494, 63720, 2018, 26675, 58940, 38818, 30934, 42525, 33643, 56329, 40002, 20951, 59164, 32020, 53811, 29180, 2535, 64291, 7467, 56184, 30337, 60725, 18815, 53993, 35546, 51432, 19923, 44918, 15459, 24979, 38594, 64051, 28520, 51993, 22917, 59498, 17189, 29401, 49058, 13365, 46622, 53887, 10568, 30790, 19382, 63584, 14739, 57821, 11560, 23265, 45636, 34782, 1523, 47675, 19561, 41033, 22948, 2339, 36427, 16794, 51858, 23917, 56622, 3346, 59872, 12579, 55184, 860, 40596, 11707, 28750, 63003, 13971, 26285, 61938, 50976, 20694, 58088, 11097, 46260, 4188, 37995, 28036, 57396, 20841, 32914, 47754, 24183, 14426, 62612, 7352, 54293, 1183, 60398, 31986, 55414, 6732, 64243, 23837, 51377, 15389, 31809, 65483, 19734, 50112, 17350, 42633, 53685, 780, 60845, 12009, 52376, 5063, 60480, 20690, 42382, 15710, 64296, 23420, 41269, 2364, 38024, 15113, 8308, 46259, 40165, 22011, 16961, 3056, 26188, 64089, 21051, 55043, 25395, 8684, 51769, 58504, 1139, ++ 15664, 33348, 26022, 41725, 32229, 23549, 54960, 42289, 25813, 53815, 39270, 50469, 17873, 36821, 14019, 44821, 60742, 3041, 30870, 58360, 26734, 50155, 4411, 32712, 19581, 44646, 28478, 41036, 17206, 47535, 13056, 30571, 50387, 26235, 46324, 14066, 41644, 7944, 25255, 55353, 19199, 64107, 5405, 51914, 28326, 57178, 20388, 65223, 39868, 12077, 55559, 1691, 49241, 25089, 57915, 46452, 2184, 26042, 49487, 22085, 5165, 19980, 52720, 23434, 62501, 13429, 21478, 40649, 58149, 9357, 50731, 3027, 47387, 16470, 51519, 30454, 9563, 29284, 58946, 34527, 5571, 39714, 10058, 43409, 60973, 23092, 41068, 55540, 21672, 8037, 39515, 57351, 20487, 63331, 33034, 23899, 35042, 11037, 63822, 36527, 20146, 59033, 27622, 41025, 153, 47539, 10490, 51710, 32417, 53388, 22896, 63478, 33675, 6342, 52509, 12583, 47290, 4563, 32714, 21009, 52799, 41450, 10066, 20363, 51529, 13431, 60219, 7125, 30160, 1670, 43768, 5669, 20001, 41623, 22041, 47652, 25791, 44767, 12596, 21620, 46595, 14661, 39502, 1801, 64981, 7641, 34711, 57800, 18387, 665, 32915, 7240, 41588, 3443, 44488, 54877, 372, 58286, 18142, 32182, 39497, 60679, 3601, 44857, 36322, 574, 29241, 50494, 17769, 52937, 25665, 58232, 12798, 28530, 61218, 50135, 13766, 42420, 9767, 40208, 45920, 17863, 34639, 48824, 23390, 33239, 59324, 19779, 49765, 3087, 45588, 32235, 7228, 37588, 13534, 47996, 30261, 62820, 25083, 54947, 11662, 44531, 6987, 53708, 9311, 35842, 44016, 12337, 49183, 26360, 39759, 10164, 24488, 44864, 17645, 42108, 9726, 36117, 25010, 48188, 7852, 56079, 10744, 61590, 35424, 13385, 45872, 23706, 36621, 18737, 44535, 10505, 37670, 1044, 57584, 31454, 5446, 53536, 27951, 63366, 34399, 54373, 30924, 10672, 65528, 50987, 31746, 53103, 10277, 45206, 2215, 34302, 41004, 13892, 22770, 39539, ++ 64308, 45726, 9149, 51111, 16718, 63302, 5860, 14843, 61949, 3513, 32854, 12442, 62630, 25380, 56340, 27924, 19891, 49067, 39050, 6613, 15425, 64257, 29162, 53297, 48424, 730, 63589, 7207, 57151, 23865, 36339, 5997, 38913, 10885, 28774, 61523, 20914, 51255, 43284, 12833, 39308, 26917, 48387, 17816, 38152, 8121, 47251, 32541, 3236, 51342, 22643, 37115, 64302, 5505, 38896, 16248, 35743, 62728, 8252, 57248, 38328, 50187, 11286, 44628, 6182, 42157, 51923, 3756, 25761, 45335, 14093, 63026, 28127, 35375, 192, 40231, 63569, 3441, 21896, 14679, 50215, 27554, 57056, 17444, 6527, 48509, 29009, 1306, 46800, 65445, 25057, 5629, 17040, 41735, 12387, 55093, 16143, 47226, 26870, 14173, 44440, 3347, 48503, 12117, 62426, 23435, 43430, 2888, 64867, 7773, 42327, 3879, 46354, 21288, 40558, 61908, 19123, 51138, 11047, 44275, 16716, 7439, 64193, 46844, 4799, 19329, 48036, 25036, 50592, 63054, 23669, 55186, 45792, 14388, 61098, 31128, 4956, 17367, 53009, 40704, 5784, 57336, 23902, 43016, 31310, 27279, 48854, 4646, 53233, 43710, 60233, 20319, 48515, 26358, 34547, 8914, 38163, 24322, 64166, 6121, 44157, 12716, 51135, 25966, 21172, 47079, 59298, 7168, 62044, 10288, 43175, 6257, 37767, 53655, 8016, 34229, 56068, 26880, 64473, 1616, 28208, 62338, 20589, 5277, 63981, 15802, 9437, 56811, 24965, 39870, 60101, 17325, 42559, 24471, 54396, 2263, 19350, 39506, 8238, 31906, 49891, 23777, 63693, 18747, 29440, 60893, 1872, 58370, 33721, 16259, 63859, 46042, 34205, 2910, 53293, 29240, 57910, 367, 53999, 12390, 39863, 27472, 44779, 23042, 6486, 58862, 28135, 7643, 63748, 54832, 30091, 48589, 22414, 50384, 12699, 35938, 47395, 11079, 43586, 18525, 25333, 3685, 60026, 14084, 35778, 7299, 41605, 19694, 39316, 28219, 49796, 17898, 59731, 29109, 48780, 6305, ++ 30110, 13247, 61481, 2537, 38972, 34701, 20413, 49484, 30668, 22675, 57838, 43090, 1171, 40141, 8227, 51438, 4248, 63409, 23182, 45818, 34642, 9508, 43372, 21212, 14150, 38050, 24567, 51323, 33168, 20450, 62601, 54863, 22170, 52542, 45031, 1034, 35734, 5707, 31082, 61026, 3976, 53234, 9991, 35463, 62492, 24598, 54212, 15754, 29236, 46218, 18332, 41946, 14101, 28156, 52310, 9015, 54647, 23759, 42700, 17679, 29561, 15094, 55835, 33226, 59361, 18061, 31950, 64168, 35820, 19632, 55430, 32662, 11796, 43172, 60903, 18573, 24156, 44170, 52184, 31143, 65200, 756, 45983, 25526, 34795, 54131, 13569, 59557, 30779, 17875, 36996, 50929, 29360, 58475, 38161, 3941, 43538, 56939, 961, 53179, 23156, 60904, 21456, 34325, 29691, 56301, 15696, 36793, 27076, 47951, 18653, 58615, 15218, 55474, 26180, 2682, 38375, 27595, 60919, 36732, 56508, 29724, 35788, 23275, 54610, 32318, 37057, 10630, 41061, 16231, 36142, 11289, 27350, 56917, 9970, 51160, 38751, 59952, 32493, 9209, 63882, 29768, 17079, 59254, 10193, 54594, 13787, 36807, 29097, 12950, 24060, 39358, 10891, 57082, 14035, 62504, 19989, 42960, 15868, 28399, 56847, 22450, 35198, 7873, 56273, 33248, 15290, 27161, 36927, 31826, 20236, 63843, 30553, 17337, 46395, 22189, 3676, 48471, 19154, 32925, 54604, 7803, 39398, 51491, 26575, 37687, 43655, 31450, 6658, 35123, 10618, 55806, 3918, 64712, 33439, 41045, 59185, 51270, 16951, 56325, 1450, 36769, 12847, 40497, 52272, 15322, 39301, 28545, 22081, 43155, 4773, 18510, 59100, 37111, 12985, 48810, 19302, 41197, 30596, 60139, 21300, 36789, 1353, 52876, 31183, 50687, 16200, 41880, 32805, 2180, 14371, 62428, 7151, 54166, 25775, 61144, 19853, 29432, 59238, 9121, 52151, 40908, 22874, 49073, 26642, 58057, 15418, 60327, 4969, 62129, 11950, 54017, 4084, 36050, 10871, 56440, ++ 50253, 37225, 22062, 28411, 52848, 12140, 59471, 40627, 9884, 47850, 15550, 26964, 54518, 33717, 22326, 41654, 29477, 35785, 11061, 52698, 25051, 57710, 2697, 61465, 31238, 55886, 16626, 10056, 45558, 1868, 14689, 42135, 3560, 33892, 16152, 63739, 24320, 54047, 18197, 47428, 33043, 14623, 59228, 21682, 2329, 41193, 10567, 60664, 36730, 6644, 56757, 9724, 61094, 32940, 19833, 44989, 30766, 12321, 47740, 883, 61935, 40015, 24847, 2864, 27779, 48383, 8591, 15727, 46889, 1433, 39588, 7206, 49820, 21259, 33869, 10295, 55788, 36450, 7645, 40847, 12348, 23574, 37968, 11201, 64184, 18993, 38725, 4667, 43967, 10806, 56209, 2658, 46113, 8885, 25838, 64552, 19301, 28524, 32346, 42102, 8588, 37445, 52367, 5809, 40041, 8168, 50182, 20333, 60565, 11398, 38004, 28309, 49954, 9711, 31999, 64405, 14040, 53971, 5600, 22585, 1092, 49660, 14582, 43375, 12687, 64947, 466, 58597, 28453, 4146, 61736, 48710, 39410, 913, 35423, 15158, 22989, 2950, 48086, 26828, 37231, 11944, 49518, 3802, 38233, 21971, 45828, 62823, 9488, 55797, 49810, 5614, 64704, 31655, 45519, 25441, 50668, 4403, 51832, 37352, 1266, 48231, 17376, 63066, 41808, 4918, 54252, 39649, 2940, 45282, 55293, 14511, 49154, 18, 39106, 62951, 31273, 12180, 52603, 37160, 14138, 44179, 30640, 11463, 47506, 2435, 53218, 14546, 63470, 52072, 18886, 29577, 50218, 22470, 16049, 9968, 27229, 5803, 34252, 43558, 21879, 59442, 46736, 3656, 25461, 48521, 5995, 54698, 8698, 56908, 31218, 52506, 8051, 22620, 62239, 5639, 26630, 63550, 16833, 4024, 45619, 13900, 62829, 40629, 20783, 3633, 38429, 57691, 18094, 46380, 26435, 40484, 34917, 16919, 42956, 3294, 51418, 38953, 325, 48321, 31962, 16547, 62300, 6024, 44495, 760, 47946, 30361, 37122, 22368, 43673, 32439, 23998, 62767, 44387, 19351, ++ 24724, 4581, 57226, 46352, 8163, 44073, 27562, 132, 36135, 64489, 6708, 46577, 10823, 18950, 58891, 13078, 55468, 17123, 59744, 363, 37530, 18099, 41881, 12275, 46755, 4902, 40411, 60875, 29550, 53642, 48701, 26819, 60448, 19335, 55746, 40623, 11598, 37774, 57937, 9228, 23394, 42216, 28619, 45537, 50480, 13360, 44141, 25967, 19411, 62985, 31521, 23303, 47989, 457, 40618, 56380, 3713, 64571, 33744, 27177, 53514, 7129, 46158, 65182, 16577, 37361, 56615, 22595, 53264, 27092, 61692, 23650, 58506, 4445, 53699, 26794, 48264, 16783, 57701, 19825, 60302, 29729, 54791, 48971, 3273, 42561, 26982, 61680, 23745, 48023, 32150, 62294, 20818, 35637, 53795, 15036, 49083, 6725, 61505, 12919, 57601, 31033, 17298, 46545, 14786, 63122, 25286, 44244, 4897, 31422, 54905, 778, 34777, 44835, 17994, 42870, 8962, 48278, 30456, 58173, 33922, 24642, 61488, 3285, 53554, 26977, 45000, 17286, 51809, 34170, 18986, 8323, 32955, 64654, 24393, 54335, 43256, 62414, 19415, 55422, 499, 45320, 33053, 52672, 25912, 60526, 1366, 19308, 40907, 30993, 17537, 36079, 22154, 1892, 52813, 7541, 30155, 60039, 12034, 32730, 61806, 9865, 53393, 29897, 13814, 24425, 42861, 18703, 65417, 23755, 4520, 40651, 26428, 57410, 24676, 9344, 43775, 59547, 6093, 21533, 61619, 4097, 58326, 17085, 35678, 60866, 22141, 41479, 27557, 1394, 46867, 38979, 8841, 43189, 61061, 35609, 45835, 23351, 65105, 14803, 28913, 6566, 31065, 57728, 34676, 17751, 63249, 20480, 50234, 35149, 13738, 25684, 40705, 51082, 33137, 38379, 47621, 8916, 34765, 51719, 32519, 56790, 18903, 10021, 47902, 64791, 24775, 8706, 51833, 11392, 59428, 4576, 28609, 65275, 9554, 33245, 14842, 23942, 63909, 36351, 11859, 56617, 20443, 38285, 28891, 55409, 24450, 8952, 52046, 14696, 57531, 7782, 47690, 16194, 479, 35111, ++ 60677, 42584, 31136, 17042, 64973, 19148, 55585, 25092, 51896, 18140, 29796, 35016, 63784, 49777, 2267, 43895, 6248, 47454, 32096, 21803, 62287, 49365, 27408, 36072, 22394, 52339, 34344, 19039, 6727, 37150, 12113, 31731, 8864, 46907, 7402, 29367, 49571, 2544, 26060, 43861, 63368, 119, 55969, 7307, 33620, 30664, 58328, 1233, 49675, 39490, 4093, 52928, 11785, 59285, 26379, 15552, 37758, 18996, 51582, 10751, 32422, 21192, 36298, 9922, 50980, 391, 30703, 42829, 5472, 34663, 12989, 44020, 37098, 15505, 45669, 6251, 38523, 2160, 32011, 46464, 4840, 44526, 8331, 32523, 15884, 51973, 8752, 33541, 53304, 16349, 7133, 27986, 13348, 50064, 1653, 30529, 40531, 22339, 37867, 50506, 24758, 2123, 64142, 28084, 53603, 35886, 1352, 33230, 56818, 16373, 41661, 22032, 62831, 6863, 57630, 36100, 25497, 19744, 40951, 15946, 46022, 8258, 40356, 28162, 37840, 9475, 21523, 55841, 6639, 46314, 57886, 26121, 52483, 17815, 47111, 6227, 28953, 10701, 34393, 41903, 24584, 61779, 20752, 6994, 41424, 14419, 33842, 51071, 26700, 2722, 61392, 46265, 54278, 39938, 16664, 42083, 36584, 21467, 47212, 18833, 40945, 26543, 38823, 2175, 49613, 61052, 9192, 51680, 12328, 48063, 35415, 60179, 10942, 15938, 51340, 35770, 18216, 29050, 41374, 47011, 25437, 50041, 24298, 53936, 29341, 8516, 49076, 5592, 34108, 57764, 23677, 15048, 58432, 30826, 659, 53326, 12450, 49022, 2861, 52819, 38605, 62285, 14062, 41421, 10385, 45151, 27532, 37713, 3237, 41854, 64647, 837, 60653, 10667, 15161, 57313, 2124, 42963, 23447, 61067, 6980, 26136, 49291, 33673, 29491, 14956, 55739, 34445, 27368, 44035, 21541, 56320, 47796, 23212, 45502, 58240, 40067, 55004, 6952, 21624, 45859, 1980, 33500, 53743, 9936, 18912, 42666, 63500, 33057, 1630, 37926, 20229, 65207, 30752, 52460, 27053, ++ 9539, 14347, 53710, 1857, 33886, 48513, 5382, 38514, 13386, 45446, 60203, 4758, 21156, 37791, 31327, 61239, 26389, 38686, 8557, 45009, 13640, 5517, 54585, 7870, 65103, 26097, 10746, 58661, 42777, 64001, 25172, 57485, 40034, 23098, 34980, 58808, 20112, 32330, 50904, 15273, 36195, 16834, 38825, 20660, 64852, 5890, 17518, 55029, 35031, 14767, 27591, 43188, 34282, 21355, 50717, 7709, 43945, 58537, 4951, 41840, 60281, 13862, 57792, 41155, 20479, 44304, 63616, 10592, 59906, 18898, 52285, 30234, 8911, 56229, 29119, 63989, 25097, 61990, 13675, 53159, 35590, 18205, 62955, 22182, 57856, 36147, 21146, 45444, 68, 40194, 54892, 34492, 60417, 22999, 44704, 59322, 10543, 62905, 4593, 18195, 34865, 45388, 11695, 41268, 6960, 21069, 59462, 13522, 51208, 24482, 61296, 10712, 29368, 52123, 23594, 1585, 60161, 50789, 4002, 63219, 11615, 55130, 18512, 50305, 15296, 62561, 42064, 31506, 38546, 22717, 13828, 42853, 3410, 40299, 12838, 37676, 58084, 50520, 15989, 8045, 51730, 13411, 36232, 56469, 18206, 63510, 47272, 8360, 58152, 42754, 24772, 14710, 6692, 28184, 63398, 10249, 57604, 3019, 27401, 55484, 5225, 59408, 20407, 45851, 34414, 16465, 28636, 32577, 58607, 8374, 29544, 21036, 52374, 33489, 5450, 45018, 57874, 1182, 65082, 15003, 34888, 10145, 40889, 324, 42734, 13401, 65450, 20274, 44637, 11253, 62538, 36555, 4913, 48306, 25862, 20127, 57047, 32406, 18305, 44815, 9117, 26591, 49449, 19953, 55698, 257, 61228, 11366, 47371, 16726, 29608, 46326, 19857, 28064, 44480, 21775, 30167, 55260, 18276, 11192, 44153, 38967, 2797, 54424, 5482, 43542, 36305, 227, 61822, 6266, 38745, 16002, 1497, 37153, 19096, 5686, 26840, 13083, 30514, 58771, 42084, 27581, 46903, 13398, 64657, 34877, 4459, 17138, 46459, 56144, 27662, 42016, 12667, 39977, 5925, 47078, ++ 57971, 38105, 20988, 41189, 23730, 11379, 58776, 31694, 56745, 2849, 24268, 42473, 52600, 9071, 16564, 23457, 53994, 15065, 64816, 33563, 56866, 28910, 39669, 15879, 44023, 1285, 48102, 28173, 3895, 17582, 49850, 471, 15469, 65202, 4677, 44775, 13505, 62347, 6589, 56597, 30233, 52409, 11911, 60336, 27203, 37399, 51695, 23958, 8729, 59944, 45806, 16710, 65447, 6134, 36503, 61711, 28739, 16970, 39267, 22849, 48819, 25635, 4366, 29267, 55190, 12602, 24212, 38160, 28511, 47260, 2502, 64880, 17836, 41925, 20373, 11019, 50345, 22735, 42712, 9230, 25935, 51169, 1689, 41568, 28644, 6022, 59091, 14344, 63878, 24806, 18490, 42931, 9580, 39221, 6210, 19887, 51920, 32818, 46869, 55796, 9245, 54697, 19631, 58377, 31691, 49289, 26707, 45686, 8780, 39628, 3392, 48729, 34085, 14495, 46695, 38901, 13041, 32518, 43726, 21851, 35245, 28986, 59603, 2389, 47197, 25637, 5247, 12072, 60391, 2082, 53347, 30485, 63613, 21344, 59467, 31811, 1927, 20472, 65284, 39816, 28569, 58745, 3226, 31528, 44240, 5921, 29478, 16261, 35355, 11214, 51574, 33158, 59139, 19607, 34961, 49196, 15089, 33677, 65090, 8530, 43393, 23110, 11679, 64382, 6624, 57021, 46750, 974, 22556, 38315, 55787, 2024, 42272, 27735, 61827, 13604, 23145, 38609, 8691, 31994, 55906, 18641, 60304, 22732, 57310, 32689, 26066, 55039, 17608, 50771, 28615, 21313, 54181, 13131, 64279, 37906, 7444, 41802, 59975, 21189, 36441, 54018, 4395, 33554, 24646, 50926, 22816, 32245, 59656, 23555, 55970, 6868, 36271, 49632, 4305, 65044, 13249, 50524, 35911, 58961, 28788, 16426, 64012, 24166, 59702, 12922, 22312, 47217, 19469, 53378, 31319, 50892, 60343, 10327, 52764, 32386, 62783, 44264, 50265, 4180, 15843, 61578, 7668, 39586, 25953, 49643, 59514, 40378, 23398, 11212, 51218, 3428, 58999, 21878, 55142, 17624, ++ 25653, 63679, 7027, 49948, 62349, 28815, 43265, 15864, 21688, 39876, 53464, 14575, 28290, 48367, 57392, 40731, 3790, 50851, 20226, 1936, 24067, 51735, 18595, 56223, 32831, 62755, 20643, 39160, 55300, 35396, 21427, 43665, 52106, 27958, 38359, 17113, 47785, 27507, 41348, 22348, 3380, 46462, 25382, 48801, 4383, 42868, 11232, 47626, 40760, 22039, 2692, 54010, 10254, 30147, 46757, 24111, 2424, 49903, 12807, 63340, 1515, 34912, 46637, 61330, 6792, 33933, 49699, 3991, 57503, 14731, 40911, 26059, 49091, 1022, 59124, 39381, 33145, 3803, 59815, 28344, 56680, 15014, 34188, 61184, 12794, 47160, 26362, 50438, 11445, 37340, 56836, 3572, 64935, 27214, 47759, 36359, 25423, 3036, 16573, 29489, 42646, 26286, 39057, 4144, 61873, 10185, 38313, 18357, 64689, 30298, 43881, 19366, 59136, 5180, 54433, 17126, 65516, 27378, 10142, 57206, 52411, 6204, 39129, 33258, 20614, 56855, 34825, 44444, 29346, 49262, 36545, 15547, 9795, 45579, 7210, 49841, 25348, 44598, 30796, 4546, 46393, 22576, 48607, 10482, 53445, 23567, 39222, 55164, 21083, 65402, 4134, 44029, 12564, 47812, 853, 23665, 53808, 45188, 13087, 50164, 30517, 35983, 52465, 31478, 24909, 37529, 19700, 41463, 63346, 13227, 44385, 16924, 64610, 7487, 47561, 30177, 54943, 49344, 20002, 52880, 2661, 45394, 6829, 38259, 16381, 46184, 3499, 39604, 7601, 37303, 2131, 45288, 31666, 40781, 16550, 27885, 47580, 10905, 29998, 1010, 61527, 15892, 42782, 64181, 7734, 38897, 13437, 53394, 5348, 43434, 12076, 32819, 54126, 16031, 58446, 31697, 40181, 6197, 25324, 671, 52449, 46471, 8353, 41327, 17594, 49984, 32104, 63245, 9231, 42206, 24311, 13623, 29052, 41566, 20893, 49147, 2548, 18193, 34685, 24648, 52998, 36897, 17830, 50786, 2787, 21336, 12278, 29890, 6633, 61101, 25133, 49185, 34094, 8493, 43476, 36495, ++ 2742, 45065, 32699, 12929, 35624, 3960, 50753, 7961, 63099, 32521, 7152, 36602, 61636, 633, 33297, 12001, 43585, 30260, 37016, 59280, 42397, 9943, 46264, 3281, 30001, 12690, 50730, 14941, 30788, 7747, 61927, 11301, 32613, 54475, 9825, 59839, 1448, 53096, 10461, 39543, 64483, 8537, 34516, 13972, 57433, 19665, 62144, 29442, 12675, 63689, 33196, 38550, 25521, 57602, 13465, 55394, 35431, 31840, 54327, 19523, 30523, 52852, 14942, 18376, 26595, 40359, 17078, 54114, 21572, 32986, 60649, 12132, 36102, 31016, 7826, 52678, 14154, 47667, 17181, 37543, 40546, 6853, 49586, 20210, 39067, 55909, 2440, 31796, 42022, 5246, 30208, 51571, 21770, 15593, 54456, 12226, 57894, 43903, 60015, 13646, 65379, 369, 51495, 15420, 23647, 43141, 2418, 54221, 22730, 6122, 53114, 11963, 37401, 25980, 41176, 30894, 7259, 49088, 36971, 34, 17862, 45530, 13678, 63896, 7762, 51319, 16884, 65196, 8888, 19850, 61285, 23485, 54975, 27612, 35812, 18577, 56051, 11442, 60806, 14805, 57138, 16734, 27868, 64519, 37425, 12253, 59790, 55, 48149, 30436, 38715, 22721, 56370, 29720, 60989, 40543, 6293, 25729, 38436, 16993, 58450, 198, 15401, 48612, 3956, 54643, 14211, 51014, 5922, 30919, 49860, 25193, 36548, 19453, 39958, 3234, 25909, 10799, 43020, 28090, 36358, 64144, 26991, 31094, 51905, 10478, 59150, 29891, 61479, 24813, 56418, 9747, 60462, 6383, 52371, 3210, 63112, 24069, 51628, 34977, 12134, 47045, 28372, 17308, 58205, 30311, 44254, 18960, 36856, 27068, 62973, 39672, 1604, 24356, 37829, 9374, 46954, 20428, 62529, 42283, 12206, 21998, 37274, 28243, 58070, 1909, 39772, 11724, 30297, 55437, 3126, 35832, 64508, 7480, 57218, 12431, 37834, 55962, 8617, 64954, 29726, 10798, 60704, 23068, 56968, 44007, 32250, 54244, 45325, 36254, 15788, 31291, 18386, 61862, 29518, 14959, ++ 51662, 10533, 27955, 59907, 18612, 54768, 37427, 26092, 47388, 17374, 56039, 10622, 20555, 46044, 24610, 64140, 19222, 7429, 48798, 11605, 25854, 35560, 60311, 22994, 41395, 58000, 6055, 45373, 59484, 23518, 40833, 51161, 3093, 18760, 25677, 42542, 33548, 21150, 61317, 29037, 18936, 44357, 54569, 23036, 40084, 31932, 1852, 35984, 55807, 5236, 49039, 18109, 51960, 992, 41530, 20279, 9129, 45445, 5782, 43004, 56920, 7896, 38491, 44924, 58755, 2055, 51766, 35520, 8221, 45856, 5955, 54657, 22393, 62602, 43581, 27310, 34992, 21353, 55018, 453, 63290, 24349, 30357, 43828, 9727, 19387, 52811, 22531, 62745, 17252, 48788, 13104, 45825, 33404, 734, 41404, 31491, 7740, 24190, 48678, 21562, 37124, 28766, 33740, 48095, 56018, 35061, 12557, 47014, 36449, 27956, 63783, 20871, 56197, 2227, 45189, 23012, 55650, 20199, 60768, 31734, 26366, 48537, 23841, 42660, 30747, 1214, 24331, 47571, 41275, 4414, 33772, 50935, 242, 62890, 40770, 5445, 42393, 26485, 34998, 38487, 8794, 43370, 2155, 19814, 50354, 27129, 42178, 15558, 7890, 61911, 18541, 5313, 37762, 9398, 17957, 31961, 62296, 3734, 54991, 21731, 44644, 60553, 27979, 40285, 9637, 57954, 33825, 27322, 61337, 10195, 54088, 4243, 59243, 12618, 56743, 34031, 63192, 5146, 58702, 11810, 21111, 48374, 4720, 62117, 36060, 15390, 47365, 12283, 49604, 18534, 42351, 23126, 35338, 19569, 44159, 33164, 5499, 39935, 58712, 22268, 56118, 2533, 40288, 9866, 48230, 2059, 65269, 9007, 51437, 20863, 14250, 48025, 61957, 17882, 52977, 27240, 56545, 14613, 34104, 30986, 61495, 4742, 53840, 19971, 45342, 25604, 60598, 37736, 15284, 58465, 45026, 17371, 34025, 26049, 46187, 28359, 43223, 22086, 41173, 993, 47573, 31508, 5368, 35677, 14483, 58567, 47, 19459, 63149, 5182, 53414, 45883, 1261, 39033, 64578, ++ 23114, 55934, 40494, 788, 46155, 22475, 9764, 60826, 1411, 41333, 26766, 44448, 30990, 58092, 5725, 34468, 55233, 27172, 63000, 17512, 53115, 4495, 14513, 50040, 8685, 33996, 24786, 36764, 2181, 47363, 13263, 29810, 37491, 63056, 48517, 6330, 57058, 12452, 36862, 5087, 50306, 31346, 742, 59631, 7806, 53606, 16117, 45239, 21015, 27857, 42259, 8304, 31032, 62385, 22323, 48252, 64018, 26897, 59794, 11570, 23479, 28357, 64724, 9528, 22123, 62124, 11221, 29630, 63173, 25340, 38974, 16364, 51284, 3202, 19278, 57320, 5286, 64597, 10463, 31539, 48131, 11616, 53889, 4135, 64800, 32847, 37800, 7379, 45124, 28866, 35112, 61408, 8131, 58721, 28185, 63617, 17705, 53025, 34173, 5076, 40327, 56689, 10898, 63296, 5557, 16998, 27273, 60432, 15890, 57938, 503, 42237, 9314, 33366, 62172, 15101, 51021, 4378, 39729, 12310, 41782, 61785, 3509, 53060, 10876, 58325, 39936, 54137, 13101, 26736, 58887, 11671, 43629, 16483, 32390, 13982, 53726, 21779, 49030, 1078, 52304, 24216, 54796, 32079, 61185, 34265, 5014, 57511, 32450, 52513, 26112, 49991, 44828, 27682, 58851, 46510, 52047, 20110, 42390, 29136, 7666, 39246, 11023, 18428, 62607, 43960, 22068, 2816, 46291, 17838, 35124, 23425, 43527, 32333, 50825, 21759, 45981, 17522, 37839, 16122, 50442, 40400, 13051, 54763, 19269, 23866, 43323, 1093, 33878, 40176, 4512, 63857, 14338, 55509, 29249, 57520, 13863, 50403, 17833, 8122, 45452, 25718, 32704, 51961, 21650, 34540, 55140, 24970, 41164, 3582, 57182, 45243, 26016, 10285, 42619, 2653, 35235, 7808, 48984, 3518, 55062, 15636, 48269, 10559, 34963, 7244, 51615, 3901, 21095, 48696, 27138, 5021, 49766, 54661, 582, 62004, 16291, 6364, 51554, 13981, 57892, 19750, 44966, 26551, 52523, 8183, 38767, 28089, 48254, 9819, 41310, 26749, 13661, 56794, 11626, 48040, ++ 5581, 33589, 16512, 49315, 30296, 57674, 34269, 19843, 52278, 12525, 65312, 3573, 50301, 13848, 39340, 51559, 9616, 45272, 1064, 40177, 31814, 61787, 38405, 27777, 64642, 19657, 53873, 16930, 61215, 27112, 56503, 5434, 46076, 14299, 35818, 23740, 30876, 45672, 49253, 24544, 58572, 14504, 41696, 17991, 47154, 34105, 24950, 51110, 7068, 61451, 15432, 58790, 44253, 11076, 37040, 15162, 3457, 39941, 17461, 37576, 47492, 51209, 3099, 36606, 43404, 31669, 47893, 19126, 42070, 616, 48582, 32183, 9803, 45080, 36923, 12720, 46582, 39899, 23361, 44842, 18740, 36639, 58122, 16641, 27434, 46347, 15308, 59628, 10661, 57519, 1487, 24037, 39845, 19074, 49501, 5741, 38662, 14722, 61076, 46221, 18755, 8273, 52538, 20671, 41855, 30730, 50700, 7532, 40603, 24862, 31902, 49713, 17615, 47348, 8009, 35700, 29104, 58774, 24988, 47723, 8531, 16167, 37542, 22122, 36225, 19080, 32661, 6887, 56218, 38000, 17669, 48325, 29709, 56750, 22410, 59734, 36882, 7713, 64110, 33322, 19035, 62120, 13075, 40190, 6774, 17437, 45926, 9946, 23207, 40685, 1695, 35768, 11545, 54525, 2561, 25015, 13498, 37076, 10646, 47638, 63711, 23994, 56457, 33108, 6903, 26191, 36744, 59846, 20794, 39420, 48869, 500, 62457, 15575, 8117, 41957, 1711, 53759, 24516, 61033, 33087, 726, 44248, 34471, 8202, 51038, 28275, 64807, 20837, 53505, 26810, 32137, 46691, 76, 38771, 8659, 61799, 26388, 54884, 31282, 38280, 5106, 62741, 14660, 60266, 6423, 29155, 15605, 59226, 35537, 30712, 7286, 33899, 54785, 29876, 60324, 19214, 64440, 40871, 23239, 44656, 26690, 38200, 65337, 29702, 56979, 14196, 33410, 64198, 8043, 42857, 31836, 12079, 22803, 39323, 20145, 48399, 60059, 32943, 25214, 38387, 54483, 9479, 62526, 40667, 24063, 64362, 16452, 55703, 34499, 21119, 60434, 37063, 24580, 32124, 19637, ++ 54170, 26361, 61351, 8827, 13970, 42213, 4940, 44789, 29389, 38254, 22938, 35325, 18464, 62510, 27875, 15679, 22577, 36291, 58457, 20966, 47635, 6847, 16379, 44263, 9, 49020, 10391, 42959, 32144, 9125, 19956, 64368, 22008, 55082, 8277, 60575, 17670, 2051, 15820, 55593, 9396, 38216, 62869, 28279, 10732, 64206, 3816, 56860, 37970, 32186, 50057, 24408, 4539, 29017, 56303, 32746, 50512, 24744, 58035, 35, 15814, 33345, 20122, 54929, 14375, 6287, 24508, 56465, 13541, 58320, 17589, 65533, 23814, 55695, 26494, 61007, 29370, 15758, 50912, 2824, 61835, 25650, 5816, 41159, 60528, 925, 49873, 25166, 40771, 20430, 43485, 31239, 52294, 10040, 36854, 23218, 55378, 30074, 1932, 25800, 62256, 32064, 43659, 14242, 57461, 1228, 44672, 32938, 61644, 4647, 51843, 13899, 59786, 24207, 53717, 18948, 42986, 11142, 34371, 64804, 27898, 54848, 30080, 59340, 4855, 62268, 14919, 50127, 28731, 2808, 64448, 5990, 39306, 8584, 46497, 3567, 28231, 47433, 17904, 9665, 45054, 4064, 49636, 28882, 46955, 25484, 54143, 36983, 64192, 13630, 56013, 16529, 63130, 20869, 34106, 43162, 64639, 1334, 57340, 32380, 14572, 35510, 2335, 50363, 45430, 16080, 51872, 11454, 53223, 7268, 14774, 56233, 26716, 37042, 60654, 28784, 48138, 31716, 13915, 7059, 27459, 57131, 21989, 62863, 30293, 56164, 6144, 38485, 9179, 58860, 15751, 51328, 11411, 59604, 25170, 42085, 20564, 36193, 1722, 64479, 18841, 49700, 11590, 37395, 42412, 19707, 45885, 49063, 11075, 18136, 61361, 22477, 50124, 5004, 39224, 12692, 45789, 28935, 17175, 51177, 9632, 59916, 1246, 22711, 43247, 16690, 46702, 40291, 23599, 52327, 18325, 60969, 40823, 63133, 9048, 30956, 4396, 35514, 11508, 63539, 3553, 15475, 33873, 17430, 1819, 48925, 11016, 43036, 3994, 28737, 44607, 2340, 50900, 7471, 59615, 41640, ++ 3213, 43977, 20760, 38631, 51352, 24917, 64229, 15189, 54419, 6205, 59050, 47904, 8328, 41956, 2426, 46963, 60612, 5038, 29619, 13300, 25216, 57229, 30603, 54919, 21532, 35099, 63628, 4028, 38718, 52715, 44522, 34182, 865, 41931, 26666, 39097, 51817, 65375, 43101, 27003, 35296, 21576, 3018, 52644, 30462, 20048, 43568, 26503, 13750, 338, 19229, 64979, 39176, 53715, 18777, 6690, 61038, 10024, 34550, 52503, 42394, 62776, 12062, 27891, 49293, 60445, 39676, 4785, 50624, 27567, 37689, 4194, 41422, 14508, 34018, 1538, 53590, 7450, 58847, 33339, 42176, 13468, 51446, 35255, 22959, 12490, 55134, 33805, 4534, 54040, 14600, 62407, 3986, 56335, 26458, 44575, 11173, 42338, 51005, 35687, 12013, 49641, 3445, 36286, 25336, 64304, 22266, 9875, 19905, 45890, 21673, 39205, 29668, 3170, 38090, 63441, 1020, 52006, 16806, 5851, 44722, 1824, 49455, 14231, 44119, 27230, 46668, 21125, 43103, 35172, 25142, 52643, 20289, 61635, 25841, 51646, 12443, 38911, 55720, 29978, 59046, 35648, 21299, 58300, 11062, 62660, 20367, 3069, 43599, 21847, 48450, 28776, 41531, 8614, 50589, 15913, 31026, 52980, 26763, 49398, 4886, 41699, 53602, 19210, 30227, 64865, 1467, 42606, 31600, 63942, 29365, 40763, 11981, 52527, 5716, 20446, 10001, 63695, 41209, 52055, 46600, 9489, 39217, 14718, 2877, 41711, 18125, 47962, 25569, 33326, 44002, 6915, 36914, 17201, 49292, 4220, 46066, 12610, 47767, 15137, 43080, 23723, 56983, 27724, 515, 63537, 31530, 4586, 38132, 52612, 1056, 43635, 16343, 63910, 21507, 52025, 6, 33257, 58722, 5712, 36482, 32269, 18647, 53276, 6040, 50407, 27815, 854, 59078, 11038, 35271, 2226, 25372, 14571, 53801, 43870, 57559, 23683, 46613, 39893, 29174, 50042, 42560, 59767, 30674, 37360, 20615, 51865, 33216, 58165, 13023, 65022, 22238, 39698, 15507, 52748, ++ 29210, 63404, 6562, 58285, 31534, 2053, 36728, 48850, 11089, 33080, 16223, 25511, 53760, 29047, 56562, 32244, 12313, 41046, 54204, 43216, 52028, 2750, 37656, 11158, 46645, 26331, 14026, 57591, 22848, 15590, 28696, 11512, 53471, 16268, 58074, 4196, 20539, 11036, 32770, 6910, 61754, 48143, 45052, 13054, 37205, 60899, 9904, 48544, 58229, 40277, 46301, 12175, 33840, 14235, 47067, 40949, 27406, 44747, 21094, 29937, 5351, 25241, 55994, 46061, 1155, 32570, 22791, 44446, 34456, 8786, 53351, 30125, 59668, 6612, 49465, 21793, 42967, 38276, 28078, 9085, 20665, 56183, 30890, 8245, 47459, 29573, 38875, 18115, 65296, 26853, 36015, 11851, 47037, 16802, 60693, 2737, 64420, 20119, 7038, 58534, 22578, 27749, 59714, 17493, 47494, 13139, 52901, 37673, 55537, 11319, 65024, 6673, 56971, 44342, 12875, 27099, 46373, 21269, 56391, 32976, 22519, 57539, 19698, 40620, 9101, 52224, 674, 63156, 10246, 57399, 13605, 37165, 1591, 41984, 15410, 34506, 63297, 23720, 2510, 41316, 24934, 14172, 42645, 711, 38087, 14982, 50870, 31219, 60138, 7291, 33532, 3930, 57858, 24125, 60458, 5837, 39871, 22247, 8141, 17698, 61603, 28442, 12213, 58750, 5584, 34796, 25415, 55417, 16711, 24206, 4759, 47261, 19005, 44748, 30725, 58126, 38847, 22633, 2394, 34985, 17899, 59796, 26257, 45081, 53130, 22965, 60594, 13716, 56946, 1903, 21811, 63325, 28826, 54591, 22415, 65357, 30671, 53695, 27348, 59069, 3737, 52884, 9290, 35742, 54227, 12967, 50698, 23187, 56374, 26446, 40533, 28209, 57814, 8589, 37191, 23918, 56178, 11495, 38783, 25101, 62911, 13357, 57514, 41525, 33001, 12469, 62137, 36826, 19289, 44370, 29366, 56585, 46970, 36613, 28039, 17096, 10174, 52177, 1382, 18471, 55335, 7376, 22513, 5952, 56333, 13553, 61414, 25456, 6886, 18059, 49526, 9111, 35040, 47297, 27510, 10234, ++ 35827, 17283, 46669, 12033, 21564, 61987, 17967, 27387, 56902, 42785, 63535, 255, 38806, 10336, 19591, 49621, 23974, 64950, 17024, 9204, 33191, 63308, 18888, 59987, 5275, 40534, 51469, 31170, 48020, 7016, 59162, 39949, 25068, 49653, 31672, 34717, 46979, 29321, 50674, 39758, 18631, 4762, 26103, 55339, 6097, 42004, 17218, 34932, 22742, 8887, 29670, 54956, 2936, 59500, 23102, 1714, 52978, 8115, 65143, 13669, 60108, 40590, 7366, 16883, 37265, 64378, 10408, 15983, 61541, 20981, 46764, 11486, 19992, 52073, 24835, 62374, 11967, 17949, 48853, 64119, 45909, 1855, 52489, 17505, 63457, 3324, 57202, 9391, 44309, 6360, 50821, 21235, 38232, 32584, 22043, 48288, 29147, 45602, 16230, 39544, 53884, 9482, 38505, 55110, 6309, 29960, 39988, 2844, 28239, 35302, 47924, 26238, 16461, 34612, 61013, 9641, 31260, 40222, 13449, 50413, 38469, 10419, 35014, 64049, 24715, 33613, 38785, 18244, 31955, 45424, 22856, 60647, 30981, 54508, 10954, 43932, 5195, 50072, 16192, 52908, 6260, 64889, 30654, 51987, 26634, 44477, 34807, 9151, 18792, 39646, 53281, 30131, 14291, 47048, 36386, 12439, 45751, 59619, 34583, 43751, 20528, 46821, 22998, 37889, 47978, 13677, 41172, 8715, 49550, 38524, 58945, 34298, 65216, 9031, 25613, 49957, 16492, 55336, 29753, 65001, 25053, 4394, 48737, 11541, 31356, 36791, 4014, 35589, 45758, 30535, 52517, 12916, 40964, 2713, 34740, 10345, 39450, 7215, 34077, 16743, 40607, 29801, 21019, 46552, 25363, 41514, 17101, 34223, 7595, 64725, 11968, 20015, 46849, 31350, 13957, 48389, 27692, 43974, 20631, 49869, 2455, 47400, 21384, 8665, 25990, 55291, 24499, 9779, 48068, 5298, 53049, 20543, 6691, 50762, 2905, 59384, 37513, 26301, 61248, 32066, 20973, 65404, 45438, 27065, 35206, 47018, 2979, 41798, 53247, 38211, 30459, 54842, 18964, 4620, 62671, 50188, ++ 348, 56372, 24219, 34668, 53073, 40319, 7678, 45689, 3020, 20327, 30711, 51807, 21902, 59596, 43953, 6501, 37884, 1626, 34886, 48573, 22140, 7699, 42193, 23792, 56126, 29254, 17774, 1703, 60814, 36390, 45174, 2872, 62543, 18400, 5927, 63511, 12917, 59440, 245, 54235, 24085, 57817, 33400, 16413, 49511, 23548, 53288, 2362, 63562, 36459, 52199, 20623, 43232, 25886, 36146, 63227, 16454, 31604, 38801, 48442, 18459, 35663, 31259, 53768, 20525, 51982, 26855, 57127, 30818, 1896, 42337, 63864, 35926, 40417, 8419, 31381, 56763, 34683, 4649, 24101, 14829, 36218, 26298, 43242, 21638, 37181, 27883, 48460, 23536, 60154, 29880, 42789, 260, 53574, 8996, 35381, 13427, 57061, 33587, 950, 30975, 46615, 4448, 23924, 34264, 62547, 19111, 50037, 58974, 14950, 1632, 42721, 54764, 5310, 48840, 23525, 58004, 3807, 62731, 25870, 6602, 60461, 28546, 3033, 53821, 12546, 59881, 7571, 55331, 3933, 50707, 9361, 44880, 19284, 29168, 57738, 20926, 31385, 60361, 36476, 19680, 47901, 8465, 55387, 17037, 4735, 56801, 28356, 59281, 24620, 10752, 65146, 38326, 427, 19490, 55259, 27517, 3394, 51145, 10131, 55860, 690, 62896, 9296, 28896, 60890, 21413, 57571, 3074, 32783, 12902, 22316, 1115, 54379, 36029, 3591, 45597, 11113, 43688, 6464, 37380, 54161, 20621, 58497, 16675, 64342, 49838, 20052, 9945, 62247, 5829, 48577, 19134, 43471, 56595, 16242, 60369, 24431, 44613, 62461, 10758, 48702, 60870, 6811, 57959, 3072, 62118, 44362, 14914, 47498, 32592, 53782, 2854, 42006, 59540, 4111, 63399, 6753, 54281, 15070, 42765, 29988, 39613, 63685, 45969, 3039, 38445, 57860, 31546, 62692, 15561, 39063, 64681, 13440, 32739, 45777, 7819, 42254, 14893, 49277, 4669, 36446, 12740, 53607, 19155, 10389, 63971, 22834, 15149, 46028, 934, 26217, 57507, 43383, 23534, 14178, ++ 42516, 37852, 8234, 47668, 4217, 28590, 55544, 23295, 60348, 35932, 12892, 46418, 4601, 34069, 14423, 61097, 26534, 55869, 18299, 39564, 58735, 28534, 50264, 12822, 33749, 8416, 53282, 41599, 12203, 19296, 24239, 55793, 13785, 37609, 43788, 9701, 41267, 22682, 36550, 15002, 44036, 11725, 40852, 64792, 1135, 32367, 60008, 28468, 44592, 5610, 15991, 60737, 7479, 49646, 10518, 45681, 28630, 57203, 3899, 22583, 50224, 2262, 58988, 9308, 41250, 3609, 45303, 6988, 38660, 55286, 15056, 25869, 2641, 57901, 16183, 47090, 144, 44204, 29947, 54542, 39629, 60790, 11157, 58331, 7023, 53254, 13811, 41471, 2199, 15920, 55657, 7924, 64033, 27584, 40476, 59413, 4798, 25016, 51362, 21016, 65120, 18416, 41207, 60271, 15259, 44957, 26614, 8556, 41572, 23168, 62929, 30591, 20434, 40820, 32273, 17971, 52782, 36648, 15652, 43487, 54344, 18596, 48137, 42465, 17306, 45935, 23305, 48787, 26209, 40892, 16647, 34065, 6494, 65061, 39709, 541, 32878, 45678, 10169, 26070, 57213, 12010, 33692, 22134, 40937, 63020, 12793, 47523, 1193, 51752, 45135, 18164, 49703, 26292, 62157, 32838, 17134, 64309, 23504, 37266, 30618, 40157, 15704, 54446, 43335, 4115, 36242, 45704, 19973, 61728, 27771, 48440, 42826, 18348, 60216, 14187, 33576, 61972, 19771, 57649, 15207, 42235, 32880, 1420, 40562, 7721, 27665, 55385, 42487, 24108, 38048, 27197, 61308, 8953, 29597, 51848, 833, 50162, 5695, 19487, 37120, 1478, 33641, 14443, 32006, 38973, 10097, 28736, 60034, 21917, 5521, 36601, 61732, 10703, 25807, 39996, 16862, 35720, 31884, 60719, 10949, 55876, 4471, 15901, 35558, 17918, 51294, 13846, 21900, 41845, 26900, 8505, 34261, 24819, 54967, 21499, 63756, 30270, 56868, 11889, 41369, 28473, 58872, 493, 50585, 43815, 32600, 29377, 59247, 11434, 62255, 40768, 6324, 31396, 33810, 58807, ++ 20128, 30047, 65466, 16056, 60042, 13442, 39209, 31888, 9429, 50626, 40695, 17648, 62163, 24851, 52931, 30369, 45541, 11292, 51075, 4148, 15007, 44920, 803, 37174, 65514, 46166, 20831, 62030, 27672, 50933, 33019, 7988, 30157, 48826, 21066, 52348, 28114, 56422, 8611, 63957, 31455, 51592, 7656, 27573, 46713, 14737, 38976, 11312, 21719, 56474, 27125, 41335, 33119, 17866, 55629, 5067, 35108, 12903, 42648, 54710, 10846, 43879, 23900, 28939, 62272, 33687, 18137, 49880, 12503, 28450, 48310, 32932, 45542, 18592, 54219, 27687, 65031, 19512, 59990, 10067, 22446, 3700, 49208, 32272, 40140, 19852, 61646, 31629, 52139, 34751, 24718, 46035, 19572, 14114, 50251, 18036, 41707, 62661, 8646, 44017, 12742, 49133, 28602, 10474, 52065, 122, 56557, 32540, 12345, 53257, 37243, 7854, 51391, 10830, 64165, 2450, 28387, 8403, 46942, 30273, 384, 35965, 11594, 31549, 56701, 5049, 29835, 36724, 15716, 62001, 28025, 58127, 49542, 24167, 12981, 48106, 62477, 7254, 53520, 43133, 1736, 39371, 46217, 60874, 2764, 36255, 23460, 32629, 41823, 15376, 35205, 5489, 56631, 9733, 42819, 6958, 48842, 39092, 14852, 58206, 6355, 52172, 33646, 24695, 17269, 50634, 26491, 11255, 51570, 39667, 6540, 56991, 9802, 37630, 28227, 51382, 24021, 39865, 160, 49427, 28559, 8565, 61433, 51596, 29409, 46939, 12473, 34318, 451, 17449, 57405, 14581, 33525, 46491, 23437, 37639, 21396, 32214, 57665, 28577, 55836, 22923, 64873, 44984, 18383, 55330, 24097, 49821, 1853, 39521, 51020, 24728, 15382, 34667, 53117, 18781, 57379, 45071, 1468, 27008, 19645, 34465, 52543, 28451, 61284, 43733, 7061, 30611, 54189, 1697, 50104, 59868, 17687, 48886, 141, 40173, 18946, 5561, 34528, 51200, 24292, 47995, 16704, 39455, 27720, 7909, 57152, 4994, 40033, 24392, 35966, 16859, 48680, 12413, 53875, 3520, ++ 45248, 10923, 50497, 32842, 43573, 19285, 49729, 1015, 64763, 25988, 5533, 55163, 37096, 8900, 41620, 3173, 20683, 63936, 36116, 27341, 62337, 31484, 54367, 25962, 16097, 3501, 39279, 10086, 35483, 4795, 64518, 42693, 54102, 1371, 61504, 25837, 3614, 46343, 19542, 38478, 2626, 22114, 59030, 35861, 20425, 50876, 6536, 57333, 47877, 31222, 13207, 51297, 1271, 62562, 38337, 21496, 47721, 61818, 19337, 26348, 37862, 63725, 14861, 47331, 13255, 25561, 59465, 36323, 63375, 23176, 5154, 61207, 10210, 35193, 5704, 39250, 13009, 37297, 6409, 50142, 32767, 44695, 28753, 16103, 64504, 596, 45401, 10342, 18632, 63110, 12643, 39405, 58042, 33150, 5939, 54868, 30431, 15675, 37612, 61209, 34818, 2550, 57684, 36700, 21382, 43321, 16875, 61399, 48322, 4098, 18287, 58416, 25446, 45365, 14435, 49927, 41901, 59178, 20735, 64567, 23913, 51681, 61444, 7305, 21800, 41490, 65391, 1293, 53307, 10769, 38244, 2334, 18426, 35853, 55895, 26911, 17240, 37719, 14478, 22934, 63862, 29030, 5744, 15957, 27318, 49378, 53856, 6523, 63634, 22526, 61129, 28030, 31738, 41065, 21201, 54075, 11719, 29612, 2701, 44563, 25955, 12651, 65531, 2033, 59966, 32072, 7958, 63508, 29994, 15160, 52951, 17407, 31287, 62757, 5288, 41618, 7816, 53332, 32123, 12674, 47479, 35269, 23523, 10688, 18916, 63484, 24769, 50666, 59919, 31854, 44853, 4935, 53968, 3473, 58634, 12035, 48199, 41604, 13567, 43866, 8476, 51140, 26967, 11734, 52230, 5920, 42880, 35295, 19089, 58586, 9202, 55659, 46206, 29475, 6102, 49516, 8265, 22972, 51657, 37941, 64601, 7410, 23406, 49247, 408, 20339, 33706, 65130, 11651, 45250, 22625, 4104, 41101, 29826, 58588, 10849, 52815, 44692, 22991, 2056, 61926, 8677, 33531, 62804, 21695, 37963, 14641, 47820, 20262, 51593, 8356, 60846, 21849, 64264, 17770, 39361, ++ 27803, 61174, 1685, 25318, 6053, 57439, 26940, 44281, 14901, 33542, 58412, 11829, 28198, 49167, 16672, 57102, 32691, 7195, 23143, 47342, 9774, 20089, 6118, 60439, 32385, 49306, 58299, 25449, 56921, 47223, 17194, 22297, 10842, 36033, 15260, 33668, 60197, 16692, 50080, 25304, 45437, 55923, 17469, 9182, 61943, 30077, 40457, 18282, 34582, 3508, 64651, 37466, 23667, 30262, 11849, 51783, 25080, 658, 34001, 57765, 6421, 30593, 51035, 4499, 56327, 42923, 330, 19722, 7929, 44059, 51562, 22057, 58619, 29083, 50466, 21204, 52788, 25312, 41741, 16985, 62858, 14052, 55316, 5484, 50660, 34090, 25948, 59157, 36718, 5058, 49396, 28470, 2974, 47576, 37089, 24334, 1546, 52717, 27081, 6482, 23325, 54555, 26039, 7417, 63949, 31316, 5679, 24376, 36092, 28919, 44095, 33870, 696, 60088, 38618, 21928, 32753, 12197, 37819, 5611, 45020, 15038, 25563, 39561, 58642, 14545, 51079, 20403, 33038, 44261, 21509, 63692, 46871, 8199, 41747, 4596, 51456, 27738, 59218, 34675, 18054, 51095, 54693, 31978, 58716, 7995, 17775, 38835, 11842, 44181, 2253, 20051, 52609, 4344, 46257, 25176, 57249, 35923, 61420, 21602, 48334, 40825, 19754, 45075, 38173, 14112, 56302, 42075, 316, 35608, 23817, 44483, 2581, 47026, 23026, 55957, 15830, 26411, 63083, 21489, 56558, 5094, 44580, 54709, 38723, 3261, 43249, 7119, 16012, 40012, 11162, 65038, 26018, 39249, 15342, 62900, 7531, 25763, 63790, 17550, 39783, 4456, 36068, 59705, 20522, 30137, 64054, 13264, 26787, 45391, 30906, 17673, 693, 65449, 43143, 21233, 33612, 62368, 14407, 9947, 46433, 31064, 58406, 12906, 40479, 59297, 47536, 4842, 37334, 28656, 61658, 35081, 56092, 12607, 43500, 27357, 36042, 16024, 60185, 38664, 19966, 55589, 13878, 44862, 3385, 54042, 31688, 63344, 1472, 55252, 33027, 44198, 2195, 25874, 46327, 7315, ++ 52418, 22637, 36473, 54988, 41027, 10134, 37477, 53346, 22337, 48069, 18798, 43053, 1462, 62914, 39765, 13599, 52222, 43490, 59404, 15833, 55579, 42550, 52500, 38595, 13472, 22639, 6604, 43306, 14819, 412, 37991, 58925, 28994, 47813, 57447, 7318, 40246, 30678, 5476, 62262, 13526, 28897, 37729, 43369, 4101, 54758, 24290, 63037, 9643, 42743, 19813, 48743, 8435, 44105, 59142, 6045, 41606, 15566, 53238, 8931, 46382, 17633, 39346, 21908, 34821, 11177, 54482, 39819, 53031, 32338, 16510, 37948, 1071, 43289, 10955, 59330, 3011, 47607, 26949, 56406, 1302, 38421, 24550, 42526, 21067, 12270, 47213, 7497, 22273, 56536, 30795, 9814, 61925, 22878, 11493, 64720, 43164, 10792, 47881, 32166, 45338, 14324, 40207, 50586, 11738, 47085, 39093, 54269, 13657, 65322, 9156, 56125, 16123, 29519, 7004, 62035, 4480, 53448, 26952, 55956, 9500, 33424, 49159, 2140, 34467, 47176, 8828, 27346, 60236, 5523, 52035, 14058, 30225, 54020, 22291, 61343, 11251, 40523, 3128, 48882, 9029, 24467, 13383, 42252, 20602, 45390, 29830, 55952, 25774, 50684, 36761, 58404, 13187, 37640, 63845, 1568, 16180, 42321, 9021, 53439, 4617, 31351, 56862, 7345, 27139, 49116, 22662, 18594, 46644, 54903, 10376, 64490, 33234, 13280, 58581, 19362, 35801, 46319, 3908, 38220, 17190, 64701, 26850, 14414, 32399, 58179, 21263, 52975, 28475, 49160, 22647, 35997, 18044, 50864, 32660, 20240, 55065, 35118, 2216, 53181, 31054, 47227, 15650, 40862, 188, 48108, 7908, 37975, 56713, 3980, 62604, 41270, 23528, 37498, 13471, 55024, 2275, 41024, 29194, 48609, 3339, 42110, 17287, 53878, 26520, 10230, 24078, 55545, 18575, 42706, 9451, 16359, 47738, 6406, 23501, 64905, 3654, 54290, 9255, 28997, 43112, 6218, 52027, 25679, 60578, 18317, 9564, 42725, 26862, 36785, 15739, 28549, 51085, 37615, 9703, 57027, ++ 30819, 14714, 48208, 12619, 21314, 63814, 16945, 3839, 61424, 6939, 29924, 54078, 34780, 23554, 8019, 46823, 25699, 477, 29417, 40351, 2563, 34596, 10675, 24465, 44152, 64076, 18538, 30904, 45740, 27031, 52070, 5742, 40995, 17930, 23387, 44422, 12333, 53723, 34422, 42106, 10429, 52886, 1742, 48962, 26307, 12560, 45917, 542, 53549, 25479, 58369, 15086, 54292, 26712, 17086, 32599, 64418, 28157, 49218, 20843, 36051, 58564, 1470, 65335, 29469, 48877, 24727, 14259, 29786, 3364, 64832, 13420, 55898, 23618, 40817, 31953, 15394, 63605, 8579, 34263, 46261, 19030, 61265, 9218, 57589, 30095, 37959, 54409, 43624, 14937, 41986, 51818, 17204, 44460, 56799, 20642, 34357, 58897, 16702, 56093, 4020, 63383, 19799, 33000, 59230, 22074, 3311, 27476, 45784, 19616, 39794, 22790, 49303, 35807, 55037, 24947, 43272, 19828, 47660, 17004, 41220, 62436, 19417, 54681, 13344, 29228, 37480, 55582, 11922, 42156, 24834, 40028, 3363, 35251, 15109, 32546, 47278, 20108, 56387, 30421, 44670, 61756, 37943, 207, 65441, 10064, 48578, 3608, 62343, 14038, 29196, 8393, 48015, 16857, 30331, 34327, 51513, 19045, 27856, 33198, 60285, 17893, 10605, 36620, 53925, 3539, 34078, 62266, 5945, 25331, 40370, 21226, 50267, 27267, 39018, 1579, 50909, 11715, 59118, 30968, 42661, 2087, 36312, 50065, 6317, 41417, 13387, 37060, 62656, 2377, 55760, 8311, 60701, 1312, 42968, 28097, 5213, 45710, 61508, 24697, 10992, 58428, 21768, 53901, 32867, 61167, 22694, 51520, 15953, 33402, 12143, 48900, 7015, 59036, 30538, 26329, 61087, 16254, 56839, 24875, 60110, 21631, 37056, 5904, 44837, 32473, 38891, 14630, 51911, 25780, 57324, 31975, 21184, 62419, 38088, 14382, 46141, 32319, 47341, 17500, 64132, 31208, 37136, 11174, 34933, 46721, 23348, 58522, 12147, 49792, 4267, 56200, 13732, 59816, 34367, 18666, ++ 41472, 63074, 5109, 59362, 32424, 29142, 51445, 35442, 24415, 38915, 45122, 14048, 21042, 57887, 31743, 19156, 65232, 38146, 12545, 49790, 21712, 57682, 30032, 48259, 1952, 33947, 50477, 54718, 11627, 62724, 20399, 32512, 65066, 2481, 54979, 26549, 63282, 918, 20139, 58458, 24659, 32652, 65162, 19069, 34037, 61102, 29408, 36994, 14030, 44931, 4714, 31772, 39482, 2072, 56067, 45376, 9824, 40160, 3228, 61383, 11675, 27184, 52153, 8364, 16674, 60626, 5855, 46938, 57634, 41853, 26638, 46105, 33888, 17723, 62080, 7296, 43786, 35805, 22759, 53744, 11726, 31072, 48221, 35526, 2572, 51511, 16545, 3875, 25399, 59883, 1047, 26724, 38821, 31838, 3559, 49849, 7705, 25609, 38302, 18954, 42188, 29039, 48576, 1746, 17317, 38043, 60588, 50914, 6790, 57362, 2101, 52433, 11416, 18865, 46149, 10059, 34885, 60824, 1457, 57123, 27686, 3649, 30700, 43670, 22594, 63408, 4050, 18752, 46185, 31805, 58937, 9778, 57065, 42951, 59635, 25269, 1117, 64348, 12363, 36622, 4192, 18843, 33088, 52767, 25108, 35578, 21553, 40440, 33926, 19189, 43065, 60576, 23885, 55052, 10513, 59426, 7621, 63180, 47333, 13434, 50018, 24386, 64046, 46165, 20892, 59055, 11898, 43859, 29151, 57916, 4492, 59736, 7178, 43183, 10811, 61172, 29669, 44104, 24451, 8862, 52584, 22156, 59563, 18486, 56075, 30117, 60985, 9408, 19418, 45512, 26631, 41252, 30806, 47445, 10048, 52352, 36670, 13019, 18655, 38401, 49563, 3322, 42603, 26105, 9706, 16895, 43728, 2555, 36379, 54477, 20934, 28314, 52415, 19511, 47834, 9569, 38494, 44230, 5232, 35992, 11758, 50915, 28107, 56481, 15430, 64113, 2142, 60490, 7529, 46549, 3159, 39510, 53476, 1204, 31029, 51699, 25151, 7003, 58034, 26147, 897, 48786, 15252, 57596, 2504, 30199, 52691, 5864, 45567, 29602, 65184, 19787, 42038, 5466, 23907, 49216, ++ 647, 35159, 26461, 44734, 2664, 42170, 8603, 46986, 11477, 58874, 2376, 64547, 48439, 4391, 50790, 35692, 5884, 53496, 18025, 61570, 28013, 5134, 63201, 16866, 56609, 9277, 28392, 3242, 34957, 39837, 8188, 49719, 13183, 37006, 51233, 8992, 29737, 38772, 48410, 15749, 47100, 7110, 39638, 14282, 51998, 5338, 16852, 50328, 22511, 60414, 35494, 62170, 18671, 47256, 22192, 36578, 14454, 60295, 24478, 43681, 32117, 41963, 23318, 45165, 33463, 40983, 21391, 36759, 9883, 19088, 60165, 8977, 51150, 4110, 48552, 30288, 57291, 20067, 51755, 4828, 28269, 58468, 6704, 23694, 62480, 27380, 40631, 65076, 33451, 20248, 35889, 53386, 6822, 63743, 15360, 29645, 40797, 54116, 528, 60885, 9689, 35766, 13291, 53475, 42889, 29856, 10243, 15787, 33379, 42089, 26822, 31965, 63639, 40428, 2927, 51152, 15438, 30997, 39024, 13033, 36411, 52548, 10641, 57703, 6328, 49732, 39268, 61074, 23499, 109, 17506, 48499, 28589, 19589, 6119, 52383, 38665, 28291, 43740, 21737, 50249, 58001, 6897, 46749, 14818, 57580, 5083, 59826, 10965, 54418, 777, 32234, 5960, 40079, 49243, 26956, 43921, 22914, 107, 39548, 5325, 35104, 15029, 1156, 41434, 28316, 38761, 16372, 51949, 13830, 37197, 30387, 47800, 18110, 34379, 53824, 20728, 6650, 64025, 33843, 13625, 48088, 28028, 10224, 46007, 967, 25525, 47874, 31526, 34940, 5989, 53423, 20787, 14145, 58017, 22244, 64545, 30384, 56217, 8847, 27816, 34575, 65134, 13896, 57250, 38711, 29096, 63024, 25056, 8406, 60405, 44702, 11165, 34901, 3642, 50573, 12803, 32354, 53577, 18415, 45612, 1083, 61979, 9001, 34760, 22411, 48963, 29318, 36285, 20054, 63428, 13258, 27663, 49709, 10451, 19578, 60921, 39748, 13064, 35428, 41683, 22007, 50274, 24746, 40504, 64611, 16197, 34098, 21223, 40953, 10557, 25078, 47246, 31984, 62103, 11654, ++ 55832, 20832, 51918, 18151, 54490, 15332, 62466, 19994, 56015, 31317, 27117, 9905, 36891, 28685, 10837, 60716, 24681, 44540, 33046, 8480, 37414, 45974, 12045, 40792, 19559, 36839, 60975, 23652, 59327, 16324, 55670, 24920, 42414, 19018, 31607, 45577, 14601, 60524, 22913, 4515, 35195, 56242, 21276, 59380, 31001, 42444, 57668, 7837, 41105, 10767, 27936, 6783, 52370, 12455, 63891, 4232, 50773, 30941, 7689, 56600, 15948, 4983, 54037, 12377, 61677, 2774, 55426, 25786, 62574, 35434, 6215, 38912, 24369, 54761, 27276, 12592, 2096, 40006, 13577, 65369, 41336, 17416, 39134, 45050, 10694, 18345, 49008, 13048, 46456, 8763, 61462, 13628, 48699, 19225, 45195, 60115, 12172, 22369, 46698, 31115, 51723, 5386, 58176, 25188, 8073, 64648, 35091, 55282, 20834, 62353, 12823, 47314, 5931, 24099, 57784, 28092, 64874, 7642, 50342, 23116, 63098, 17887, 45756, 24569, 35509, 16082, 26623, 7983, 50436, 36295, 62635, 33722, 12712, 64814, 45271, 16878, 10503, 54907, 7659, 60176, 15628, 26771, 39793, 11488, 43428, 31120, 49865, 16507, 38199, 47146, 26514, 45644, 64922, 21919, 3203, 38510, 14446, 35661, 55730, 29349, 58551, 42953, 52747, 30707, 51245, 9531, 60979, 1908, 32651, 48754, 20151, 55563, 15477, 65318, 25817, 3373, 40924, 49062, 16848, 55199, 40112, 5558, 61774, 34689, 23293, 39580, 52098, 4609, 56826, 15549, 63631, 11890, 59347, 33783, 40355, 6408, 16385, 42034, 603, 45100, 60095, 19746, 6649, 31785, 46640, 5333, 50358, 12638, 47630, 40226, 31581, 1363, 64207, 42385, 57640, 24224, 62855, 22264, 7755, 64934, 31398, 39243, 19367, 43279, 52225, 12191, 41637, 16786, 50710, 24625, 33180, 44171, 18154, 59630, 34062, 42432, 2417, 49416, 20800, 55983, 10027, 61456, 5226, 43713, 19259, 7604, 39169, 56532, 190, 60106, 35548, 54349, 2855, 38562, 16357, 43237, ++ 29785, 39881, 9201, 38042, 24762, 46124, 33925, 89, 40108, 16126, 52550, 41830, 17182, 56507, 45838, 15522, 41168, 3600, 55070, 14629, 59825, 21140, 51397, 26706, 53745, 4476, 44751, 13984, 41516, 30335, 1540, 46870, 9972, 59871, 3859, 56764, 35703, 6290, 50599, 64241, 28221, 9796, 44816, 2990, 23300, 11948, 47600, 32921, 55505, 21046, 46592, 38569, 23986, 33651, 42238, 29202, 20324, 54881, 35367, 18893, 48125, 63079, 37121, 19907, 27831, 50397, 15200, 47842, 1755, 30989, 49738, 20626, 63132, 15703, 37052, 59771, 45724, 24938, 49322, 33049, 381, 60488, 14471, 54913, 32046, 52869, 1681, 29407, 55965, 21818, 39959, 32502, 24137, 55440, 2073, 26404, 36403, 50402, 14655, 62070, 20340, 39691, 27889, 46071, 22618, 48997, 877, 44378, 25686, 4924, 37740, 17661, 59565, 34141, 14095, 44640, 21142, 42306, 26354, 4805, 40124, 8330, 59804, 838, 42630, 64634, 31300, 53092, 14717, 44585, 6824, 25744, 50971, 1905, 37254, 30872, 49294, 33970, 23962, 41243, 2162, 63432, 29464, 55566, 23317, 1507, 27467, 64080, 7155, 22751, 56483, 9249, 17485, 33465, 57780, 20190, 62053, 6617, 45327, 11369, 18369, 22196, 8270, 62551, 24953, 19292, 47198, 23391, 63776, 8936, 41873, 812, 35386, 8210, 45227, 57300, 14534, 36993, 28923, 621, 25147, 44946, 16228, 53616, 12235, 65484, 17617, 37760, 21887, 43920, 24527, 38596, 3793, 46361, 25323, 50419, 28966, 48770, 23804, 33227, 14815, 51745, 37364, 54962, 18084, 59282, 27390, 20096, 4650, 53502, 17259, 39022, 25657, 18188, 14212, 36878, 276, 47136, 40718, 27454, 14785, 57919, 25548, 3866, 30339, 62759, 5618, 58751, 632, 54496, 8754, 58213, 4574, 37561, 8062, 55198, 17014, 29525, 63204, 4307, 33367, 28267, 54677, 12498, 59403, 50837, 26564, 13413, 48529, 17593, 28067, 8771, 63696, 22289, 53024, 7067, ++ 58641, 3727, 64836, 13237, 57736, 6380, 27941, 60408, 22031, 49332, 5266, 33267, 61801, 22493, 945, 30526, 59074, 19836, 47876, 31153, 41981, 1177, 35299, 7540, 64907, 31938, 22084, 48717, 6879, 52661, 21328, 63838, 34239, 27872, 53219, 17001, 21746, 42972, 27217, 11553, 41414, 54459, 36230, 49581, 63480, 38236, 26869, 1416, 17534, 65477, 2734, 56889, 9259, 59857, 203, 58086, 13038, 45986, 2342, 38946, 10248, 29892, 826, 57363, 44379, 7422, 31869, 40283, 17344, 53522, 12028, 44808, 648, 42433, 9456, 31596, 18226, 55512, 8071, 21488, 50796, 26182, 36574, 4244, 42742, 23039, 58829, 37468, 6133, 43981, 4578, 58274, 10423, 35142, 41376, 9185, 65491, 4333, 33528, 7174, 43777, 16179, 62976, 11041, 56382, 18066, 40967, 14531, 58657, 50130, 30507, 53959, 8859, 48422, 37108, 223, 55668, 10961, 58850, 33789, 48694, 28836, 32474, 55256, 12430, 19961, 47477, 2637, 29954, 56624, 21104, 54390, 39103, 23063, 56085, 20754, 4919, 61966, 13233, 53709, 31613, 47648, 5575, 17389, 62581, 41702, 53380, 20336, 35005, 51314, 15135, 37061, 52364, 42009, 12108, 53176, 31541, 50812, 25468, 64549, 32542, 54838, 37393, 13013, 39932, 56104, 6224, 34861, 17041, 54274, 27642, 61633, 24275, 52302, 21689, 31910, 60489, 9630, 51733, 62373, 19955, 57783, 30577, 3052, 43382, 33100, 7926, 58796, 10593, 51259, 229, 49642, 29855, 16954, 62198, 2656, 57046, 11385, 54173, 63362, 4132, 44185, 23105, 1753, 41795, 10523, 35067, 63741, 45753, 22076, 58289, 6334, 55945, 32982, 51786, 27930, 54664, 20188, 59423, 4345, 50144, 10649, 48175, 54841, 40069, 20998, 33884, 27232, 45731, 11325, 40984, 30100, 48347, 22169, 64414, 26373, 45536, 11543, 36694, 44468, 53168, 18033, 38278, 30792, 22713, 3786, 32467, 42299, 61844, 6674, 52112, 44597, 31245, 14287, 49965, 27297, ++ 17321, 45331, 28791, 47577, 22892, 36328, 50913, 7897, 44056, 13001, 63487, 24122, 7349, 42855, 54595, 39414, 9086, 27541, 63734, 6348, 23008, 53053, 25276, 47440, 15413, 39184, 58561, 10470, 61746, 26283, 38303, 5047, 43712, 11891, 24011, 39460, 61415, 2081, 58030, 32161, 18494, 78, 16175, 25992, 8223, 19434, 61530, 34788, 51015, 28596, 15674, 30765, 49059, 25702, 18002, 39740, 49853, 22884, 64942, 26043, 59626, 51404, 33025, 13805, 37743, 23820, 58386, 10647, 63964, 22947, 56927, 28877, 33280, 52592, 22303, 64203, 5300, 37625, 61613, 29275, 42227, 9773, 56854, 20913, 63917, 7831, 34512, 15635, 63316, 28145, 50982, 17925, 47329, 28855, 52583, 21308, 42544, 27410, 57421, 23784, 55570, 2374, 33930, 37389, 3746, 31741, 61558, 28531, 9537, 23477, 1379, 42759, 29120, 16728, 61807, 22332, 31601, 46524, 18484, 53667, 13768, 16414, 51426, 21651, 37692, 40572, 9403, 58344, 34851, 11458, 41152, 4312, 15865, 46650, 8704, 58498, 42474, 26268, 45853, 9574, 36044, 19365, 51898, 37329, 13619, 33271, 9792, 44097, 30006, 2487, 61675, 28649, 4541, 24652, 46631, 1715, 28166, 15845, 41257, 3823, 47864, 2356, 26831, 49417, 4233, 31790, 44810, 57479, 37967, 12372, 45802, 5480, 39499, 12867, 49685, 2224, 26730, 38575, 12051, 32717, 42047, 7468, 36644, 60246, 26115, 20358, 48980, 29302, 41891, 27604, 64217, 18808, 54552, 37240, 9615, 43576, 19183, 35631, 7781, 21327, 40051, 12257, 62348, 29649, 56403, 24362, 52673, 799, 33920, 30017, 13645, 41579, 10266, 46808, 5035, 43809, 8782, 34305, 16575, 52627, 32781, 23141, 35365, 17472, 8216, 55789, 47242, 14023, 36755, 65500, 23829, 15829, 56758, 12371, 40602, 5443, 32825, 52293, 15666, 24052, 8397, 65252, 1652, 46425, 36128, 62940, 47633, 9335, 23794, 37272, 19498, 39673, 12272, 58005, 1165, 36946, ++ 60754, 33645, 10656, 53624, 1790, 64066, 14477, 30643, 34913, 55295, 18518, 38356, 50335, 12208, 26129, 17789, 51747, 36553, 13209, 43793, 57304, 11418, 61996, 18262, 43081, 167, 28643, 36174, 17578, 46513, 14468, 57789, 19873, 62405, 49029, 7594, 33167, 47524, 13834, 52440, 44226, 62516, 30301, 57030, 40636, 53084, 10297, 43948, 6143, 40061, 53984, 43504, 11467, 36885, 53407, 27442, 5527, 34237, 14727, 43092, 6514, 21175, 46696, 17822, 64584, 3781, 52426, 28317, 43448, 4568, 38183, 13924, 58913, 6869, 47191, 27970, 44328, 14995, 34774, 3208, 53215, 16319, 46772, 30558, 11569, 50227, 19423, 48034, 25115, 12054, 38128, 16, 62811, 15993, 5520, 59486, 12881, 51274, 17527, 38693, 30043, 49534, 21089, 47533, 24560, 52023, 6183, 45116, 52968, 39359, 64398, 20237, 56593, 4249, 41400, 12096, 52194, 6415, 38207, 1969, 65211, 44052, 3233, 61601, 5825, 54172, 23834, 43178, 18022, 64130, 27585, 60549, 32197, 62942, 29592, 13905, 35075, 468, 65117, 22438, 57293, 25564, 8260, 58905, 49102, 3416, 61253, 12754, 57020, 40874, 18453, 44850, 30910, 58968, 13879, 39152, 60764, 8846, 56771, 36149, 16782, 44339, 59231, 15565, 65058, 20610, 10899, 26050, 2911, 50513, 31220, 58278, 28624, 63152, 17556, 43653, 54616, 22581, 47150, 4421, 48608, 15311, 50795, 9236, 46787, 55497, 1869, 61951, 14908, 35507, 12698, 44409, 7300, 23228, 59633, 27122, 39143, 60941, 31393, 46181, 26581, 50937, 16627, 48483, 7176, 36787, 15441, 43368, 9382, 61808, 49923, 27091, 64768, 22842, 60642, 15272, 39707, 63467, 29044, 6583, 42243, 60939, 1656, 63876, 37655, 25208, 2624, 61489, 19775, 6178, 43650, 52751, 1573, 35654, 51055, 23170, 62081, 331, 57101, 42649, 27035, 48112, 14549, 58334, 10817, 16830, 20454, 53737, 29970, 57258, 2091, 61241, 25589, 32956, 46521, 23222, ++ 8116, 48936, 20405, 32255, 40716, 19412, 42668, 59646, 4015, 26652, 47269, 2019, 29654, 58270, 34515, 64410, 2850, 49004, 23874, 34124, 16601, 38738, 4040, 33467, 50835, 56177, 12972, 54500, 32824, 2780, 50219, 31232, 40443, 625, 29451, 55229, 10986, 25425, 38055, 20758, 6833, 24184, 46191, 12880, 31882, 3706, 48291, 24831, 58837, 13384, 23112, 4441, 63294, 19705, 7144, 62665, 44533, 10893, 52687, 28760, 55706, 40692, 9083, 49477, 26802, 42596, 18626, 34501, 8156, 46445, 26320, 48981, 16922, 35959, 19546, 53975, 11055, 58102, 47898, 18925, 38606, 59567, 5724, 35279, 44537, 27137, 60907, 2904, 54188, 31437, 57113, 42909, 23409, 34032, 46204, 30609, 36969, 1255, 44806, 9957, 63559, 13795, 54644, 8487, 59687, 12598, 36616, 19295, 34389, 15227, 46802, 11564, 32822, 51517, 25915, 63451, 35372, 24689, 60194, 29798, 20637, 36751, 25350, 45518, 28263, 15182, 59474, 33129, 5136, 48276, 10272, 43844, 19133, 7167, 52835, 40745, 18504, 50641, 16315, 39505, 3853, 45008, 34453, 21155, 28083, 46137, 24147, 38916, 6316, 25680, 54196, 11172, 62877, 7829, 50152, 18904, 34487, 48685, 20999, 10110, 61562, 24035, 7020, 33986, 42161, 53633, 29869, 60599, 43078, 22052, 18971, 10036, 36525, 4063, 41507, 33431, 6071, 63661, 19139, 57077, 27409, 64409, 18289, 23964, 38931, 13931, 34058, 22860, 52814, 4049, 57577, 31656, 55279, 33004, 13697, 47784, 4806, 15221, 52939, 1131, 58849, 9110, 30854, 39423, 20827, 61297, 28600, 49224, 19021, 37823, 3252, 16404, 35882, 1930, 30418, 49019, 24543, 2717, 45111, 56148, 12020, 19051, 46178, 29635, 13509, 44338, 53262, 31705, 50024, 28885, 38342, 18806, 28154, 60670, 14973, 47061, 20165, 39004, 31518, 6452, 60000, 34450, 21405, 41202, 28945, 55655, 38812, 4745, 45736, 14953, 51175, 34627, 6033, 55481, 18883, 42380, ++ 54729, 15487, 62746, 26027, 5745, 56436, 25021, 9466, 52843, 37159, 11072, 61063, 44661, 20203, 4933, 46609, 32348, 10273, 62812, 1764, 49945, 26920, 60137, 29747, 9606, 24165, 45280, 5959, 42142, 57158, 23245, 8813, 51912, 15891, 37447, 18155, 45074, 64705, 3289, 55782, 36709, 59725, 5131, 51423, 22050, 64477, 15268, 37349, 20460, 45752, 60869, 35730, 29987, 47920, 41271, 16199, 31325, 61267, 38144, 17017, 3122, 24913, 62210, 35096, 5651, 56167, 13121, 59940, 51715, 20234, 61915, 2514, 39645, 60822, 3546, 40725, 23464, 1436, 25676, 62730, 12402, 28536, 23947, 52233, 839, 41054, 9541, 39585, 14796, 45781, 19977, 8411, 60544, 11184, 53845, 18763, 64147, 24840, 58524, 28330, 4768, 40664, 18474, 32554, 41920, 27167, 57238, 3026, 62690, 7732, 26569, 60712, 38663, 7147, 43904, 19144, 3504, 54557, 9828, 41013, 47948, 7441, 57411, 11677, 34354, 49382, 1557, 26041, 52111, 22077, 38145, 971, 49944, 36899, 27027, 2980, 59775, 32935, 48151, 28687, 60955, 11991, 55201, 932, 40268, 15777, 50934, 19727, 64679, 47488, 378, 42563, 21436, 36463, 26244, 65260, 5622, 23538, 54493, 43318, 29054, 38341, 51002, 27502, 17731, 473, 48226, 14701, 7681, 64361, 40509, 53360, 23627, 59461, 11204, 56489, 15054, 35971, 8533, 29911, 40644, 1279, 35232, 54103, 4989, 63008, 45301, 6736, 39754, 47361, 21180, 8723, 40895, 1561, 51928, 20147, 65283, 34266, 24150, 41371, 17767, 35940, 64321, 2959, 45269, 11561, 54732, 5598, 59750, 23678, 56778, 42562, 53065, 21086, 55431, 11422, 38109, 58129, 20532, 36215, 26462, 51441, 38647, 10004, 59096, 6827, 16186, 22757, 9345, 56216, 3463, 62318, 34647, 7264, 42012, 30475, 9624, 59051, 13612, 50129, 19000, 3188, 51482, 7209, 45109, 772, 33739, 64455, 10341, 27529, 43357, 21555, 41300, 11263, 65062, 4489, ++ 30162, 39045, 3086, 43734, 50089, 11978, 38654, 48547, 21461, 16497, 57364, 32002, 14209, 40508, 53812, 15172, 21816, 56297, 29152, 40187, 55373, 8730, 46182, 19370, 64615, 37707, 20576, 63082, 27282, 12403, 35450, 65406, 25708, 47984, 60853, 4321, 40935, 22558, 49897, 16579, 28792, 9471, 39145, 33759, 17847, 41911, 29278, 55028, 785, 32384, 8768, 51655, 16721, 1931, 55362, 8524, 50243, 1103, 21976, 48531, 58742, 32497, 11882, 53861, 22533, 39225, 30458, 90, 36501, 11596, 32209, 55097, 10286, 25180, 50059, 30769, 64782, 33832, 43170, 8651, 32455, 45553, 64471, 13753, 57947, 24630, 55630, 33189, 64883, 6638, 51915, 36172, 26017, 40336, 3348, 49190, 8006, 39286, 15108, 53118, 35532, 60410, 45679, 292, 65003, 16551, 48167, 30850, 21594, 54954, 36136, 2301, 22886, 58091, 13499, 49054, 27824, 45212, 17254, 56874, 14384, 32127, 50836, 18828, 63886, 8568, 38881, 62174, 16609, 55715, 28968, 57848, 24348, 12216, 55101, 44489, 10878, 23645, 6381, 52554, 14602, 43013, 18228, 64285, 32485, 59546, 4836, 35875, 31309, 14680, 33806, 49359, 16139, 55397, 2871, 40573, 45956, 30446, 856, 52104, 13600, 4807, 57216, 11601, 59876, 36381, 23130, 52454, 32989, 46532, 1441, 16023, 47666, 29393, 51062, 21039, 46056, 24886, 53090, 44459, 13143, 59021, 31089, 11430, 42503, 28748, 19552, 58252, 26928, 17305, 36471, 60503, 24954, 63184, 28233, 42678, 10328, 49407, 6155, 55565, 25934, 47157, 14041, 33688, 57434, 25181, 32276, 40769, 14589, 46461, 7609, 31862, 12349, 44559, 6942, 62246, 31229, 9261, 49624, 13882, 65218, 5355, 23741, 57260, 33391, 49324, 40332, 60070, 42805, 25880, 47947, 10967, 44939, 21657, 57725, 3958, 53965, 25004, 46016, 27962, 40145, 62578, 25827, 52941, 23401, 60792, 15878, 24888, 49368, 58893, 3478, 63255, 16521, 48324, 35715, 26791, ++ 59274, 13578, 57116, 34294, 17726, 61946, 29460, 750, 65384, 41561, 28332, 3365, 51249, 8288, 25419, 59732, 37934, 6817, 44993, 13821, 20888, 35783, 5367, 52410, 14862, 2312, 49418, 31692, 7918, 52903, 18645, 44049, 5627, 33605, 12721, 53591, 30739, 8367, 34539, 63127, 42555, 54135, 14691, 58251, 2246, 60157, 7509, 34328, 63767, 27158, 42792, 21704, 57875, 33370, 25234, 59335, 34920, 24340, 57098, 9602, 36352, 19357, 45039, 1614, 60938, 16305, 47693, 25404, 65241, 41313, 21882, 43929, 30027, 14647, 56527, 7674, 13266, 52022, 16744, 56029, 49798, 2415, 20180, 37818, 17763, 47421, 4931, 16948, 22225, 41862, 27785, 2260, 62203, 16659, 55150, 22748, 32922, 47958, 6284, 43362, 21744, 11295, 26141, 51120, 23082, 37945, 5234, 39982, 49784, 10325, 43528, 29711, 50606, 15735, 31226, 61300, 39622, 8675, 33310, 23983, 62596, 513, 26903, 41959, 22389, 46838, 30601, 13275, 45056, 3563, 35351, 14247, 47737, 61494, 15362, 31206, 63817, 38350, 56544, 8964, 37611, 30347, 51530, 7435, 48427, 10689, 27148, 55780, 8514, 58161, 23019, 60149, 9476, 32368, 53479, 12935, 17186, 58373, 33067, 19795, 63492, 40248, 21647, 32181, 45539, 8648, 62933, 39062, 5026, 26599, 34510, 55894, 25533, 6850, 37856, 38, 31430, 59856, 2776, 23078, 50313, 20558, 38117, 56158, 16651, 51640, 9961, 32147, 61381, 2466, 50560, 15869, 46563, 11681, 37622, 3367, 56573, 29420, 38203, 13072, 61877, 5085, 53772, 22292, 8146, 50213, 2116, 65403, 26676, 35391, 19827, 51286, 58943, 34115, 25847, 40429, 17151, 53949, 28537, 891, 43506, 32080, 47708, 3305, 17992, 26969, 88, 35833, 12683, 64185, 17625, 32534, 55057, 14188, 26814, 48741, 37177, 17243, 2580, 56287, 11783, 35769, 8987, 37455, 13179, 43880, 31000, 40316, 7754, 18427, 36528, 29260, 53426, 24341, 415, 50688, ++ 9861, 44416, 24032, 7493, 51619, 35891, 15854, 45998, 32874, 6696, 54257, 23624, 62354, 35518, 43402, 335, 27852, 52164, 18770, 61321, 30838, 58806, 42508, 25918, 39616, 58077, 16386, 41082, 60421, 37195, 1277, 59555, 21542, 56405, 28350, 19514, 58882, 46444, 10610, 26495, 1068, 20076, 48731, 27664, 47033, 23724, 52171, 11168, 49240, 14316, 56528, 3388, 39296, 12038, 45298, 20148, 13699, 42389, 29613, 46267, 4841, 63637, 27590, 41775, 31490, 50743, 8740, 45472, 6645, 15785, 57836, 5040, 63421, 35047, 41678, 20816, 48467, 26885, 39318, 6181, 22118, 40518, 54622, 7397, 29739, 61828, 35829, 49482, 30175, 59250, 13172, 48559, 31048, 44165, 13951, 29212, 56490, 19537, 61714, 31494, 2779, 56848, 34551, 6973, 44066, 11815, 53777, 14193, 24200, 59936, 17861, 46234, 5657, 55981, 36876, 1121, 20102, 64237, 50068, 5271, 43384, 36007, 60778, 4184, 56222, 11106, 37075, 51627, 20511, 41645, 65495, 7746, 32414, 20162, 42796, 5374, 21339, 46489, 17115, 26694, 63065, 2577, 24780, 36656, 22037, 41474, 52985, 17662, 45287, 37904, 4131, 39679, 27631, 44432, 24280, 37514, 62445, 10453, 42296, 7549, 34967, 47420, 14319, 55483, 3307, 41122, 18681, 12582, 58810, 20267, 61995, 13419, 44176, 61287, 18060, 65127, 42907, 10410, 39208, 62699, 7084, 46262, 3722, 26357, 64956, 426, 41087, 48344, 13310, 43061, 30039, 5425, 34463, 53683, 18437, 44930, 22611, 16142, 57850, 32415, 20548, 43215, 27760, 60454, 42093, 18567, 38363, 12744, 47930, 4278, 63203, 28077, 461, 18336, 48407, 3813, 45989, 21937, 61367, 41083, 15660, 59685, 21444, 54356, 41811, 63276, 20767, 52044, 30775, 4972, 51334, 24384, 1002, 39931, 63593, 10536, 31927, 64962, 44074, 33114, 22437, 54404, 17828, 64010, 48631, 5719, 57678, 20716, 54898, 32194, 46847, 8464, 12755, 60287, 31788, 40078, ++ 21906, 52663, 31124, 61366, 20986, 4700, 55614, 10420, 58970, 19052, 39266, 12708, 45714, 20666, 17456, 65142, 47681, 11814, 41663, 3130, 48672, 10970, 22406, 54159, 7288, 30435, 47117, 3754, 21964, 28876, 45910, 15212, 39053, 9317, 43253, 1773, 36431, 15518, 51191, 32556, 60657, 43627, 6492, 35929, 12131, 39787, 18733, 44629, 5800, 37961, 19124, 50523, 28368, 65070, 5208, 37483, 64053, 3961, 54642, 12719, 39979, 15450, 53165, 7226, 14378, 20965, 59220, 29335, 54568, 34128, 24036, 50534, 18091, 45970, 921, 60263, 36909, 4052, 57552, 31191, 61138, 15480, 33471, 43601, 51212, 11876, 26626, 1342, 53542, 10162, 37269, 20801, 57752, 5893, 35001, 45466, 748, 37754, 12476, 52279, 26931, 46579, 15520, 61263, 19935, 57888, 28900, 63778, 41695, 643, 33657, 65380, 25215, 40843, 10589, 53416, 47147, 12886, 30327, 21245, 52491, 15893, 28647, 39951, 17473, 53286, 23347, 6161, 60365, 9710, 25027, 53844, 40317, 2230, 59059, 36227, 53468, 1303, 34152, 42061, 19937, 44310, 58330, 13800, 60629, 28984, 1793, 63310, 12543, 29397, 52252, 19265, 63916, 1338, 57423, 6508, 28527, 46902, 22541, 60353, 26403, 2050, 64255, 25052, 50238, 28365, 54972, 35465, 30560, 51458, 9338, 37123, 3579, 32340, 49450, 14045, 27023, 55133, 17148, 33912, 15712, 61050, 32504, 49296, 21741, 36886, 25451, 5937, 54298, 24242, 64755, 55955, 22060, 6923, 59082, 30635, 62556, 8442, 45942, 2264, 51201, 10877, 37137, 61, 15743, 52125, 29283, 58067, 22540, 54083, 39910, 10776, 37604, 60239, 23975, 64496, 12956, 36966, 6044, 34477, 52820, 7977, 30171, 11215, 34925, 14493, 45457, 7624, 58358, 39154, 15512, 46357, 59510, 35305, 5841, 50622, 20937, 7811, 16321, 61077, 4905, 47190, 30145, 1978, 27204, 35021, 12057, 50381, 1386, 62328, 13993, 52220, 38370, 44998, 17159, 6544, ++ 63894, 14824, 1551, 37817, 47065, 27685, 40992, 23154, 43109, 25777, 49471, 31438, 5554, 56805, 9745, 33775, 22699, 57829, 32618, 36794, 17043, 64182, 834, 34749, 62609, 13611, 25073, 55826, 50537, 11307, 63594, 24623, 51499, 30119, 63988, 49325, 23059, 61854, 4834, 40284, 13245, 24958, 65278, 31299, 57323, 4032, 62128, 25794, 55946, 30874, 61611, 10084, 46809, 15041, 54180, 31704, 47487, 27030, 18543, 60496, 33174, 49104, 23583, 37241, 64351, 43254, 2985, 37853, 19011, 48213, 1889, 38710, 9136, 27432, 54305, 11386, 17583, 45120, 22908, 9998, 47071, 26109, 10765, 63523, 3683, 23237, 60037, 39020, 18637, 43111, 63191, 4163, 24367, 50339, 65179, 10611, 59052, 25544, 49678, 16991, 64532, 8743, 40239, 30372, 48649, 2008, 35315, 9093, 32236, 20727, 52595, 8213, 17052, 59354, 22024, 27224, 34619, 2735, 38476, 55381, 10018, 58980, 7057, 48614, 31866, 2408, 58052, 39459, 29334, 34004, 46293, 16933, 28113, 51267, 25412, 9319, 29748, 49485, 61717, 4624, 54751, 11267, 31776, 46940, 5802, 43604, 20585, 35273, 25003, 47932, 7302, 42917, 14218, 50461, 18198, 48303, 35516, 3628, 56170, 15319, 49775, 31073, 37678, 9879, 43529, 16309, 6432, 48508, 1818, 42357, 16928, 46984, 57828, 22372, 7790, 52706, 34995, 5642, 48172, 28439, 52166, 24391, 10845, 43745, 8050, 60007, 14640, 57228, 33469, 17963, 38488, 10708, 40540, 48663, 26249, 532, 41670, 52560, 35028, 23558, 40206, 58565, 19288, 44725, 63925, 33095, 6450, 44033, 9530, 17369, 30710, 50724, 15936, 42982, 8333, 33318, 51981, 27186, 56603, 18696, 25031, 46684, 39587, 61907, 4529, 55499, 28435, 37881, 19243, 33685, 54167, 8455, 29483, 18384, 43407, 25523, 53401, 41486, 29183, 49538, 38126, 14313, 43055, 56886, 19866, 59594, 41779, 26198, 37722, 23022, 42611, 28482, 5065, 20255, 56738, 47952, ++ 28118, 41867, 58374, 18707, 12456, 64336, 2731, 52315, 8696, 63027, 1294, 60178, 37650, 28762, 50199, 39820, 4364, 26421, 8047, 54810, 24518, 43943, 28258, 47927, 17915, 44508, 33288, 9930, 34429, 40090, 6641, 53945, 3030, 35148, 14244, 7173, 41584, 27524, 56690, 21220, 53169, 2823, 50156, 17119, 22428, 46005, 33092, 16084, 41495, 1639, 24230, 36131, 26355, 40898, 22628, 6874, 17295, 52218, 38658, 8046, 21548, 569, 57523, 10979, 33792, 26201, 56755, 10042, 62876, 12285, 52976, 28586, 61400, 21591, 32941, 50868, 29588, 62306, 34372, 53354, 226, 58973, 36376, 20566, 41617, 31619, 46317, 14389, 56207, 7592, 32355, 52694, 38551, 9017, 18121, 41151, 22001, 42241, 7342, 32106, 36513, 23585, 54228, 4124, 25022, 51866, 18901, 44754, 56289, 13092, 47819, 28407, 35674, 43051, 4686, 62315, 15062, 58402, 42496, 18170, 35019, 45919, 24799, 12524, 63196, 44292, 26519, 15482, 50229, 266, 56992, 5680, 63378, 11713, 45414, 57468, 22714, 13014, 18677, 27878, 50293, 23260, 39168, 16634, 53805, 10155, 57644, 49930, 3089, 60863, 22283, 56397, 30259, 34169, 11906, 25344, 51794, 20475, 39292, 12296, 44635, 5910, 52936, 19449, 61217, 29551, 56638, 22689, 64856, 24612, 54199, 27846, 11401, 41288, 30318, 40188, 19844, 58167, 12267, 37475, 1959, 41727, 56699, 18627, 53398, 34665, 45778, 29134, 8979, 44638, 1065, 51430, 31248, 14241, 36238, 50044, 12407, 17458, 27274, 64579, 7099, 30302, 49110, 25518, 9967, 21537, 56102, 36121, 62498, 24767, 45548, 5795, 57146, 29758, 55104, 14315, 41500, 1490, 49976, 10591, 62666, 2357, 16890, 26166, 42521, 22049, 50360, 2068, 64709, 10161, 23563, 41305, 56682, 13315, 62872, 1874, 34203, 11357, 57986, 588, 23961, 61976, 9987, 32561, 51814, 15403, 4199, 65327, 11094, 56128, 2992, 61516, 25426, 54275, 33874, 2519, ++ 37079, 9028, 33204, 49684, 26334, 44830, 35079, 30396, 14619, 36438, 16877, 22119, 48275, 13508, 63390, 15722, 53101, 45366, 62076, 14340, 51104, 6076, 38217, 12174, 51829, 4615, 61010, 19986, 58697, 17282, 31080, 42292, 20470, 55457, 26181, 57956, 16910, 47663, 11487, 34889, 45340, 29945, 38620, 9870, 54559, 37021, 8940, 53486, 12636, 59126, 43410, 7777, 52932, 311, 59901, 44183, 62316, 2453, 30320, 51121, 43774, 61705, 29071, 46886, 4276, 51543, 17224, 40219, 24594, 32654, 42194, 14200, 46739, 6393, 59382, 15233, 42832, 5459, 12887, 38357, 19339, 44235, 6893, 52519, 16227, 57239, 5257, 34281, 21565, 44985, 25745, 15769, 46944, 28452, 33617, 54393, 3808, 62520, 20469, 55861, 1471, 43008, 13595, 59208, 38431, 15951, 62003, 27543, 4501, 58747, 38985, 3199, 54792, 12220, 45639, 29257, 51756, 23622, 6569, 26243, 65068, 1361, 54043, 33508, 19708, 36363, 7931, 64375, 18339, 42345, 21873, 35830, 48705, 19257, 38628, 6669, 64770, 40060, 47355, 35564, 8078, 56822, 0, 65341, 33664, 26378, 38543, 15524, 32049, 40764, 11390, 36777, 5147, 61894, 41736, 59692, 8182, 63230, 29783, 54027, 17911, 58058, 23824, 33754, 998, 38601, 10763, 44961, 15147, 39701, 5366, 34118, 63462, 18820, 55642, 1096, 62146, 25958, 45145, 21240, 63943, 30863, 5205, 39903, 12539, 27673, 3001, 63505, 49716, 23403, 61672, 20759, 59709, 4536, 63798, 19904, 32896, 60798, 3658, 48157, 14490, 53312, 4381, 34172, 55194, 47302, 3154, 15032, 41212, 1188, 60892, 34814, 21380, 2802, 47028, 23081, 58550, 19664, 29405, 43989, 37350, 31611, 57036, 48812, 8928, 60413, 12250, 46991, 27431, 44485, 61251, 2900, 22509, 47567, 38557, 19529, 60237, 45315, 21763, 36364, 54798, 18635, 40787, 24546, 6851, 39357, 46077, 33521, 22137, 47531, 17649, 35253, 40845, 10184, 15631, 59766, ++ 46200, 22755, 53770, 5328, 56081, 10901, 18080, 58037, 47470, 55085, 33441, 53365, 7630, 42060, 2436, 31003, 35230, 19701, 1501, 40690, 30188, 59414, 19110, 56943, 23426, 36293, 41439, 27009, 452, 48511, 62216, 13316, 47289, 10219, 44718, 38810, 31751, 264, 54837, 18057, 7955, 62740, 19291, 58549, 28988, 520, 64188, 21128, 47370, 29731, 17697, 62980, 20802, 48799, 33616, 11374, 23873, 35221, 58313, 10436, 25580, 16445, 41134, 13487, 55971, 20026, 35584, 49659, 5831, 58582, 3641, 64964, 19810, 36160, 39796, 2273, 63788, 24266, 49033, 27876, 65467, 14117, 55379, 28809, 48362, 9268, 27579, 64333, 50924, 487, 61405, 11479, 56966, 1864, 60259, 14703, 27298, 47376, 39736, 11970, 50696, 27973, 63265, 33343, 10065, 42403, 6653, 47051, 37059, 25720, 14804, 63055, 31681, 49438, 19440, 37637, 9216, 48327, 32675, 44818, 13643, 40445, 21521, 49670, 4826, 59289, 47210, 31150, 54821, 10479, 60988, 12801, 33235, 3797, 30480, 52176, 15977, 32710, 3295, 59452, 15119, 30743, 40970, 21518, 48744, 3919, 62392, 8804, 54545, 17019, 64837, 26847, 52857, 21063, 2416, 38107, 16393, 43787, 175, 27238, 35964, 40929, 9124, 62273, 46430, 20866, 58532, 31574, 8026, 60181, 21550, 50747, 2591, 45600, 14809, 48879, 9786, 42593, 4306, 54472, 9076, 35666, 58737, 22508, 62327, 47602, 20224, 37921, 15489, 35797, 11916, 28401, 39501, 45569, 25289, 9342, 54397, 38684, 21703, 43814, 36614, 16987, 62850, 39623, 11962, 28268, 59410, 31495, 51528, 26306, 13559, 48709, 39122, 63552, 11843, 32452, 4860, 38482, 64057, 7344, 14930, 53742, 13105, 20010, 32949, 36526, 24826, 57509, 16530, 4238, 36090, 49760, 30576, 6908, 52657, 27706, 15060, 5337, 51106, 28353, 8580, 44757, 3316, 63653, 49105, 28814, 19090, 53862, 9159, 31446, 58571, 7013, 49866, 64726, 30561, 19719, ++ 11792, 63437, 25177, 16301, 32488, 60868, 39906, 6175, 24402, 4085, 44099, 11502, 27077, 58463, 24907, 55938, 10755, 60491, 27461, 49630, 21105, 9131, 43002, 31879, 2034, 65055, 8412, 54961, 45198, 22740, 5241, 28020, 36937, 61372, 18922, 4165, 64975, 24279, 37653, 59473, 25945, 42049, 5353, 44111, 15144, 49498, 26720, 40109, 4695, 34722, 50787, 13563, 38867, 9353, 28701, 56808, 40468, 14597, 45868, 20488, 55204, 36005, 6282, 63256, 31172, 44478, 1147, 28124, 53680, 22436, 44878, 31650, 9752, 55664, 24879, 52335, 18698, 35387, 56384, 3309, 31980, 40093, 25237, 1972, 37030, 62626, 40940, 12678, 19695, 35497, 29514, 42458, 23875, 36779, 19190, 51360, 34657, 5138, 29938, 60952, 17787, 45866, 5815, 20992, 50008, 29500, 55200, 22560, 10875, 51045, 40524, 21397, 7488, 24468, 56784, 59, 63683, 16253, 60036, 3728, 55897, 29564, 61676, 10851, 42864, 14642, 22918, 3161, 37916, 27549, 43687, 23793, 54261, 41381, 60098, 26152, 43962, 9950, 55447, 24254, 44940, 62126, 6950, 51988, 13202, 29906, 45473, 23910, 34762, 46704, 629, 43354, 9823, 46281, 31686, 55027, 23285, 33405, 51076, 11012, 65407, 4435, 48915, 25939, 13149, 52032, 4009, 36798, 49629, 26799, 43995, 13649, 38070, 57175, 28958, 36176, 23215, 32902, 60555, 17704, 50110, 14376, 29461, 44251, 1510, 16418, 52366, 7398, 58478, 4140, 54867, 47890, 7881, 16788, 56255, 42258, 27985, 5886, 57281, 11155, 59946, 29027, 8893, 23157, 46219, 17952, 38788, 7820, 20129, 43457, 54620, 8700, 27488, 16720, 53337, 45017, 59929, 9790, 47867, 24186, 51144, 27881, 44753, 65087, 745, 52499, 6469, 40772, 32167, 53547, 20563, 11721, 61798, 33284, 9503, 55940, 37624, 31332, 64317, 12307, 59221, 34385, 52492, 30675, 12820, 55808, 24, 60628, 44009, 14726, 45426, 29406, 23646, 952, 42954, 51436, ++ 36237, 3815, 43517, 48817, 38180, 209, 21643, 50990, 29227, 64924, 19943, 61214, 37257, 17672, 46667, 6457, 44395, 16234, 38982, 4988, 63760, 33980, 53632, 15445, 49191, 29051, 16043, 38572, 11717, 32459, 57223, 49998, 2207, 23801, 33814, 52768, 12314, 50715, 6229, 33254, 13746, 52309, 35523, 23393, 60270, 32293, 7256, 51801, 60972, 23172, 2650, 58041, 32088, 54813, 18394, 3583, 49929, 26588, 5448, 65432, 2012, 27766, 47994, 23032, 8386, 60198, 15095, 62442, 12773, 37008, 16668, 57088, 26528, 41076, 7201, 29100, 47526, 8883, 42068, 21110, 50307, 8224, 58343, 45372, 23739, 18000, 3064, 49943, 39445, 55076, 6454, 48081, 13392, 63997, 43808, 8080, 58157, 16342, 53638, 9425, 35721, 22349, 57167, 37490, 928, 64295, 17375, 34306, 60217, 18436, 1570, 53587, 44243, 61152, 11244, 41777, 30967, 22704, 39118, 51162, 19004, 8456, 37451, 27304, 52313, 30108, 40901, 56649, 13865, 52739, 1757, 62415, 8376, 20967, 14484, 702, 62797, 34593, 17557, 50750, 12151, 25717, 36391, 19082, 42388, 59006, 18011, 56007, 6217, 25907, 51276, 19603, 58668, 28763, 13399, 49130, 5404, 59291, 14586, 21987, 45722, 31960, 17290, 54682, 34801, 28106, 41962, 18221, 62791, 349, 33265, 59400, 8822, 24196, 6259, 64446, 51789, 13053, 27959, 39444, 25226, 46727, 6497, 51022, 33160, 55754, 30500, 41437, 26604, 42728, 31842, 19075, 57942, 33853, 1806, 51799, 15146, 46845, 31993, 24817, 905, 50613, 42866, 56892, 1668, 61471, 24210, 52772, 64887, 3990, 32809, 19142, 61643, 36557, 6616, 20457, 26033, 35668, 17827, 57778, 3503, 35161, 5500, 39311, 23338, 43209, 18271, 62995, 13777, 28973, 58934, 39697, 17031, 45892, 24635, 42938, 3702, 48336, 17452, 42311, 25949, 15961, 6125, 21305, 47836, 36963, 16973, 27627, 5436, 38770, 21090, 62828, 13273, 55248, 39120, 7585, ++ 27384, 57636, 20535, 8377, 62515, 28429, 42268, 13861, 46345, 9337, 40592, 30073, 668, 50491, 32163, 64671, 36138, 23762, 55492, 46994, 13099, 26110, 3313, 59996, 10601, 46263, 53000, 25396, 62946, 18424, 35669, 14935, 42902, 59225, 8175, 39896, 29191, 45770, 22165, 63435, 48580, 1979, 56124, 11058, 47825, 14189, 42887, 19747, 10665, 37735, 44987, 25452, 5923, 42145, 64525, 36735, 12404, 53342, 42514, 31827, 38339, 50350, 18152, 52795, 39124, 25075, 42688, 30647, 47280, 7911, 51902, 421, 49286, 13598, 44116, 58192, 16053, 60722, 12087, 30484, 62007, 17094, 34842, 11270, 59712, 32832, 53924, 30945, 9842, 22536, 58708, 17433, 53042, 2693, 30521, 38889, 22897, 45086, 24764, 41592, 3129, 47742, 14569, 31188, 53273, 12380, 41043, 3574, 45383, 30649, 64744, 33176, 16479, 35117, 26483, 52902, 5861, 47513, 11761, 34095, 25552, 46599, 57655, 17131, 775, 62718, 9051, 20214, 34741, 49219, 17715, 31652, 46097, 37239, 56296, 29095, 49069, 22158, 41838, 2015, 37821, 54362, 5037, 61100, 28396, 1492, 37423, 11766, 40195, 59960, 15013, 32954, 38757, 6991, 64374, 18788, 45128, 27804, 40099, 61015, 6797, 56954, 42795, 2247, 15500, 63821, 7107, 53868, 11988, 25365, 53141, 19334, 47916, 31894, 43472, 16610, 3169, 47248, 59177, 584, 53754, 19653, 65416, 11571, 23772, 38860, 10140, 21519, 64252, 12920, 53054, 2718, 43595, 23911, 65227, 13205, 35324, 61986, 18177, 55396, 41102, 20429, 31164, 13827, 35736, 49593, 5470, 34609, 12882, 37437, 58825, 41769, 1995, 49334, 31006, 42672, 55734, 225, 40652, 30328, 61029, 22242, 54766, 15898, 59368, 10352, 31047, 50901, 1396, 43763, 6086, 55344, 26416, 287, 65385, 12996, 58265, 23077, 38849, 1317, 46807, 57283, 39849, 59941, 26691, 8207, 63178, 35460, 50897, 57870, 2187, 48240, 34594, 26456, 18301, 60443, ++ 14125, 40376, 52972, 30946, 13049, 54640, 7252, 59606, 34771, 56570, 15318, 51795, 23050, 42816, 12594, 21291, 3665, 52359, 9570, 29510, 41792, 57582, 37073, 24309, 39526, 21407, 6275, 43561, 3535, 51019, 8980, 64566, 29882, 16439, 48163, 20709, 60377, 9670, 43339, 15940, 30488, 20235, 38206, 28302, 3344, 64833, 34082, 55602, 27940, 57163, 16838, 63364, 48201, 15645, 22227, 30552, 58670, 19649, 7631, 60763, 15834, 9695, 59449, 2721, 34347, 11291, 54851, 4643, 21217, 61166, 38251, 23370, 34662, 64174, 4359, 22734, 37439, 1212, 45829, 54158, 4701, 43429, 21949, 51623, 5976, 46629, 15059, 60635, 26824, 44570, 4440, 40384, 33136, 26231, 49349, 10929, 62949, 162, 55401, 32402, 65430, 26369, 58496, 7861, 43642, 23920, 48371, 26838, 57535, 6265, 23380, 9677, 48804, 2554, 39851, 14260, 57927, 28195, 62079, 1848, 54376, 14961, 5481, 35266, 50782, 24109, 47849, 59870, 25768, 6818, 39763, 58185, 4948, 24561, 47608, 11138, 39320, 6032, 58601, 27398, 64466, 20445, 32223, 46011, 10585, 49676, 22859, 63600, 31010, 21706, 47593, 4324, 53669, 23707, 41296, 34542, 9392, 55686, 2943, 30697, 37291, 24749, 11694, 60623, 48037, 23476, 38962, 30231, 46119, 35814, 15853, 40795, 4577, 60951, 20698, 54747, 30092, 38450, 21861, 34576, 8336, 42113, 36444, 27188, 57506, 4760, 60829, 46328, 145, 37047, 25706, 62750, 29805, 9869, 37374, 48966, 22752, 3059, 44453, 8195, 33565, 65035, 6242, 54185, 26820, 16267, 44894, 29578, 48012, 22863, 10101, 28715, 52461, 23623, 10945, 65310, 15424, 50536, 12529, 46255, 8180, 42223, 11557, 33917, 48432, 26647, 56420, 37515, 21525, 47991, 34721, 15349, 49177, 36813, 51679, 20285, 33861, 7512, 53707, 62638, 19984, 10919, 32817, 2370, 43612, 53255, 22609, 42158, 9818, 24983, 32977, 11516, 51986, 4346, 44346, 29697, ++ 49244, 22279, 1821, 45661, 34227, 23828, 47862, 19361, 2243, 26052, 38749, 5155, 61871, 8492, 54437, 28098, 59113, 34382, 18553, 62441, 49, 16824, 45627, 7554, 61670, 31265, 56582, 33062, 58246, 27320, 40883, 21848, 54381, 5850, 55992, 26658, 1442, 36069, 57011, 3777, 40657, 62331, 8606, 57757, 41060, 25190, 16333, 1180, 46620, 6619, 31130, 11756, 35443, 52067, 8475, 45453, 896, 33881, 47612, 23446, 55769, 43477, 32787, 22029, 45216, 64669, 17791, 40658, 27058, 33419, 10602, 59072, 29842, 17482, 51380, 32281, 54923, 27261, 33654, 18421, 38666, 26472, 64643, 13711, 29312, 41910, 24684, 1541, 37600, 65063, 31822, 8630, 61909, 14090, 56119, 20277, 28741, 37861, 12960, 18588, 49041, 10444, 39487, 19738, 60674, 32789, 8831, 52384, 13962, 36317, 42793, 55704, 27658, 59558, 20301, 50205, 36088, 17619, 43773, 21024, 41166, 31426, 44456, 64553, 38826, 11956, 32515, 4328, 45161, 65263, 28477, 13332, 51067, 33594, 16533, 64148, 19796, 53044, 31410, 14070, 43443, 9072, 48220, 16222, 57173, 33357, 52422, 2769, 44610, 8287, 62893, 36169, 16757, 56725, 1125, 61463, 25626, 47792, 15939, 52605, 18511, 50368, 39508, 21200, 32872, 8438, 57551, 20059, 3028, 56020, 10008, 65210, 27327, 51114, 11083, 36958, 7274, 62960, 11870, 56901, 15963, 61517, 3456, 48597, 13721, 43108, 18378, 32401, 50254, 17603, 47142, 6694, 50760, 40835, 19577, 5344, 59205, 39324, 28560, 52261, 12241, 25818, 47484, 18837, 40581, 63095, 11324, 57914, 559, 55542, 17495, 45854, 14427, 57404, 39439, 4168, 29133, 33509, 62074, 25560, 20917, 49525, 63431, 28637, 3047, 45646, 7186, 14622, 60847, 9233, 23894, 63815, 4844, 29991, 10637, 40583, 60536, 27292, 14063, 29598, 35896, 50521, 23535, 64811, 13589, 31973, 3922, 16141, 61882, 19470, 52868, 41441, 16774, 57005, 10709, 62149, ++ 5887, 35571, 65506, 17392, 59040, 3526, 36852, 63969, 31944, 53896, 18265, 45459, 33153, 57235, 38003, 14909, 41117, 7043, 48594, 22893, 32765, 50680, 20159, 34982, 52005, 1229, 14425, 19407, 37346, 12862, 46778, 878, 38089, 32010, 13930, 41720, 51969, 17417, 27705, 54193, 24540, 12766, 46228, 31461, 18234, 51301, 36603, 61763, 21813, 39380, 53857, 43735, 1807, 24664, 61332, 28165, 39530, 62595, 13278, 37326, 3870, 29413, 12127, 56661, 6979, 28858, 51040, 1690, 57657, 48505, 15530, 43067, 2896, 46571, 11151, 39388, 7519, 63150, 14605, 52877, 10369, 57375, 686, 47912, 38096, 7841, 56632, 51994, 16608, 12182, 48811, 21224, 45578, 36263, 5666, 46747, 51803, 6917, 60052, 43186, 4819, 29156, 34946, 50903, 2462, 16723, 62744, 38727, 20562, 61608, 4982, 38062, 13284, 46106, 7030, 32325, 4047, 64936, 8124, 56415, 10404, 60812, 22425, 7618, 19369, 43218, 58688, 16174, 36509, 10136, 55262, 18902, 61245, 7559, 44699, 2690, 36721, 9558, 55905, 4225, 35049, 60264, 24675, 39569, 7637, 14751, 41549, 19985, 55188, 27938, 13718, 50100, 30049, 10824, 51670, 12829, 36901, 20347, 42439, 8621, 64056, 1419, 29112, 59047, 4908, 51524, 13820, 43227, 61696, 30959, 49146, 22871, 35155, 1372, 41681, 56313, 17387, 26533, 49844, 44668, 32051, 18991, 40589, 29696, 63138, 34226, 53919, 14840, 8615, 60136, 34725, 22198, 15617, 61084, 30974, 53553, 26984, 10421, 63420, 16039, 37858, 58705, 2441, 60673, 7453, 32189, 42179, 21177, 50873, 25306, 40172, 63751, 6985, 34006, 21690, 47515, 54046, 18914, 5909, 56246, 36845, 1686, 17075, 38828, 53207, 19452, 64390, 41575, 25654, 32758, 52231, 18859, 42082, 57419, 46589, 22185, 2736, 49935, 44365, 4511, 56377, 8825, 45591, 17938, 38317, 49486, 58778, 29821, 46539, 36046, 6261, 64387, 37203, 24129, 46706, 32341, ++ 19140, 55736, 9660, 39434, 26846, 52083, 16025, 11322, 43854, 10088, 49785, 24774, 11917, 20423, 2893, 47367, 25338, 60773, 13715, 43708, 56213, 5494, 65364, 12445, 26805, 48061, 40481, 64256, 6905, 53488, 24694, 60801, 17796, 44212, 61983, 11224, 34215, 64042, 47084, 7085, 32849, 50260, 21331, 53657, 5643, 44366, 9243, 29492, 49728, 4219, 26894, 18941, 60086, 41294, 14109, 49314, 4975, 21315, 54256, 26100, 46502, 63570, 19405, 39716, 49187, 14032, 36292, 45922, 18802, 6117, 62769, 20526, 53834, 25493, 60123, 19218, 49527, 21687, 40869, 5753, 48679, 30048, 35989, 22964, 61073, 18891, 33915, 20685, 43267, 54750, 28011, 57846, 1029, 25155, 59513, 34100, 16009, 40761, 30885, 21513, 52753, 62138, 15175, 25320, 56025, 44630, 28554, 397, 46856, 26009, 50471, 19172, 30140, 63907, 22235, 53888, 42224, 24984, 37173, 30439, 48895, 3009, 52988, 27981, 55046, 2146, 28920, 50304, 20823, 42138, 1104, 38116, 27072, 42698, 30269, 59717, 50013, 23455, 45774, 28206, 51213, 18447, 915, 53579, 29524, 64982, 26981, 35671, 5585, 42673, 57501, 2070, 39798, 24936, 44121, 31390, 57898, 3830, 62658, 26953, 35033, 46647, 10320, 44450, 36340, 26304, 47122, 28561, 17546, 5866, 39818, 12885, 45265, 57952, 14971, 24827, 46405, 40019, 2331, 23454, 52779, 5761, 55249, 9486, 21008, 2135, 25030, 56509, 40156, 28879, 11290, 55604, 38377, 1245, 45416, 14001, 56643, 20929, 34852, 49759, 4638, 45191, 30127, 36944, 22344, 53109, 3444, 38016, 15263, 59611, 4750, 31901, 27046, 44284, 60428, 9171, 24588, 43620, 38153, 14107, 51820, 31475, 58413, 22942, 8775, 35541, 29799, 3943, 57903, 38420, 2448, 44997, 28202, 8110, 16206, 32301, 55033, 18215, 34964, 61574, 15261, 41104, 28526, 55399, 7080, 25366, 11908, 40462, 1199, 56478, 14487, 28001, 7891, 59422, 482, 38532, ++ 25692, 42479, 15100, 50561, 6838, 29964, 41389, 57472, 27809, 60030, 1349, 62673, 31217, 49038, 64224, 29113, 51507, 1749, 36599, 30693, 11092, 38364, 28501, 42182, 58576, 9833, 23320, 44805, 29614, 48899, 10831, 34656, 51590, 3176, 25123, 49577, 5003, 22794, 14646, 39084, 61197, 689, 35200, 11925, 63729, 23853, 58797, 13116, 55180, 15310, 65249, 34460, 10006, 32900, 56344, 17463, 51399, 29914, 41844, 9096, 16741, 34854, 52000, 185, 24502, 61945, 8961, 25872, 52530, 31277, 38932, 8598, 35852, 5373, 41503, 30862, 1820, 61509, 25978, 58822, 20072, 44428, 15824, 55177, 4073, 49697, 10818, 63343, 3467, 34926, 7190, 18320, 50514, 42103, 9623, 23298, 64703, 3631, 56737, 11371, 36635, 1663, 46417, 40171, 7242, 35823, 11648, 58298, 33987, 15380, 54530, 10238, 57025, 1170, 40689, 15563, 60316, 12432, 51962, 16586, 23553, 34446, 38273, 13055, 46919, 24854, 63829, 6209, 53727, 30930, 23060, 51894, 14807, 57109, 21437, 11818, 26476, 40410, 15591, 61435, 12465, 38427, 57839, 21213, 44211, 3578, 48973, 12665, 61681, 17702, 32659, 22370, 47070, 65125, 19049, 7432, 49508, 33590, 22726, 40700, 14034, 56330, 19674, 53518, 16572, 64572, 733, 55169, 37627, 59785, 21422, 52475, 7615, 29198, 33702, 64021, 4957, 31156, 60323, 10308, 33522, 43633, 25860, 59488, 37693, 51512, 44866, 6063, 22939, 58871, 44081, 4934, 27465, 51310, 24657, 7567, 41816, 46627, 328, 23699, 54952, 19996, 14775, 50372, 10584, 44137, 27921, 64399, 8392, 34314, 46860, 12325, 50114, 1061, 16395, 36016, 62924, 2513, 59167, 27736, 9896, 45181, 5157, 40389, 62413, 46752, 12844, 50256, 17760, 11459, 54425, 13521, 62160, 24949, 53003, 63394, 6561, 39402, 10109, 31157, 24226, 52347, 664, 20877, 61205, 34496, 54514, 20216, 44886, 23209, 50122, 30871, 43449, 21500, 51216, 12691, ++ 54044, 2954, 47335, 23349, 61595, 45085, 21026, 4781, 35927, 22532, 39704, 16630, 41919, 5801, 35334, 10346, 18942, 40197, 63211, 17429, 46475, 22203, 54668, 3999, 18173, 36004, 55303, 16528, 2341, 20984, 58985, 7676, 28186, 57542, 36512, 19860, 31041, 44893, 58372, 10362, 19078, 42685, 59962, 27083, 39666, 2758, 32471, 41646, 20618, 37188, 45730, 7409, 52662, 22953, 3215, 37947, 11093, 64141, 1477, 57237, 48665, 6019, 27294, 57985, 37705, 20714, 41722, 59976, 11919, 23751, 55270, 44731, 28447, 65299, 13094, 55941, 43780, 34157, 12468, 36704, 2788, 63698, 9066, 32159, 42605, 25574, 39664, 28611, 47558, 22245, 41096, 63867, 30222, 15336, 54023, 39230, 17637, 49858, 24437, 44300, 19465, 54692, 27437, 13199, 64086, 21722, 49519, 17974, 41540, 3842, 32003, 43317, 24333, 47258, 33752, 9409, 29139, 45063, 557, 58884, 6426, 63426, 18049, 57271, 32837, 15288, 40252, 33877, 12616, 46339, 61874, 8764, 48159, 4007, 34290, 54060, 385, 63488, 7211, 33030, 43029, 25481, 6545, 47283, 34101, 17277, 58441, 24058, 39013, 50859, 9934, 60475, 14472, 4863, 37940, 58857, 28264, 12044, 54179, 5663, 60081, 24330, 32264, 6352, 41357, 12485, 34074, 9621, 23987, 42502, 1977, 32696, 62511, 18578, 50404, 9232, 53581, 14116, 47696, 20329, 64729, 15345, 984, 45884, 17018, 10985, 28139, 64493, 35487, 3222, 48411, 31624, 62228, 18717, 64695, 36369, 16568, 29396, 60350, 32718, 9572, 42492, 57569, 24481, 62344, 13166, 56136, 19354, 30496, 52156, 20648, 61940, 18403, 56529, 40993, 51260, 13305, 32593, 17586, 41870, 64830, 20304, 55013, 15078, 25195, 433, 55771, 27150, 44205, 60171, 22685, 35388, 40151, 982, 36457, 12153, 42714, 25740, 59083, 48880, 5785, 63982, 37396, 47211, 10507, 43161, 4813, 65079, 31555, 9421, 60897, 3393, 54949, 15713, 34026, 63809, ++ 28851, 58523, 31680, 12039, 33632, 811, 63117, 48365, 14522, 55358, 7421, 52741, 26735, 58762, 21833, 44897, 54183, 24179, 8208, 52547, 2657, 60290, 15165, 49698, 30362, 61240, 4891, 33650, 63372, 39226, 30823, 42488, 15597, 46389, 9401, 65494, 52917, 2503, 37472, 26146, 55502, 29761, 4517, 45514, 17563, 52495, 49043, 8092, 62806, 20, 30191, 57627, 19988, 46924, 59172, 27477, 44690, 23936, 35704, 19061, 30903, 61473, 14674, 44271, 5169, 54031, 32129, 3464, 35157, 63883, 849, 14826, 50481, 22202, 47790, 24175, 9372, 16549, 46209, 51069, 23598, 40250, 27703, 52457, 13334, 62382, 6553, 59315, 14505, 53369, 10302, 37306, 4655, 61190, 27170, 2172, 46002, 32679, 8957, 63483, 33727, 6392, 59011, 23150, 53494, 31586, 5498, 59818, 29373, 65273, 12733, 52138, 7778, 62922, 18290, 49701, 55463, 25936, 19909, 39411, 45871, 26625, 41872, 4668, 49492, 9254, 60399, 22069, 56160, 3468, 17347, 35708, 25233, 64890, 41038, 17983, 46780, 35990, 22499, 52628, 3053, 63959, 30666, 10337, 62622, 36992, 8664, 54110, 273, 28903, 44888, 26490, 35168, 54767, 21398, 45888, 514, 43166, 17481, 48238, 38263, 2505, 50948, 62201, 23052, 58335, 27574, 49754, 63332, 14652, 54346, 25670, 44333, 3890, 39086, 22550, 42831, 35395, 58391, 6826, 28634, 49504, 36027, 31476, 62063, 49075, 39216, 13372, 19869, 52602, 17237, 12145, 41273, 9155, 33289, 3779, 58297, 48758, 12613, 40073, 64083, 6808, 35422, 1340, 33745, 39756, 5185, 48911, 36250, 2237, 43089, 9673, 38584, 29946, 22479, 6338, 28369, 46516, 52875, 3741, 34447, 11998, 30448, 47636, 33147, 59586, 21161, 37229, 7670, 31810, 5563, 48621, 19802, 58652, 30801, 50693, 21423, 47478, 1720, 19368, 41748, 26568, 12578, 33185, 51587, 17354, 28983, 13838, 48721, 37869, 18745, 39630, 27108, 48021, 5016, 41072, ++ 17755, 8590, 37547, 56299, 20056, 53167, 25494, 8927, 43228, 28718, 34076, 46128, 12795, 37402, 14190, 56770, 4454, 33562, 57943, 27598, 44274, 32337, 8845, 25571, 47457, 13517, 43203, 50340, 26397, 12055, 56324, 4357, 62695, 23560, 39582, 13367, 28638, 41328, 16640, 48806, 7827, 51072, 15756, 36321, 56716, 13665, 22129, 28780, 48011, 24943, 42987, 12497, 36040, 5503, 40335, 14958, 54977, 6760, 59754, 43192, 10253, 40836, 33546, 22663, 65157, 16280, 13346, 48114, 42464, 17145, 33144, 56922, 10150, 37930, 3770, 58021, 35536, 53437, 28972, 6771, 56824, 11549, 59897, 2067, 47167, 19651, 35327, 44782, 319, 30761, 57104, 25919, 44086, 20107, 35890, 52216, 12438, 58803, 28218, 14412, 41744, 48533, 17273, 38134, 2870, 42354, 51610, 24702, 8479, 45285, 39579, 22932, 36716, 27902, 3241, 37892, 5315, 35406, 62453, 31713, 14087, 51300, 11018, 61547, 29725, 37010, 164, 48455, 26293, 43540, 39047, 57766, 10770, 31845, 5883, 59235, 13475, 29683, 56688, 16950, 48485, 19532, 40601, 55605, 14305, 26099, 46152, 31941, 20646, 64233, 6084, 53213, 8978, 41933, 30504, 15682, 63676, 36022, 55494, 29358, 11148, 43912, 15093, 29973, 38781, 3492, 45450, 20459, 6675, 36592, 47505, 10633, 16243, 58935, 28033, 61800, 222, 18085, 24509, 38234, 52122, 12652, 57751, 22016, 4254, 24075, 7150, 55020, 41958, 26375, 36697, 57380, 21351, 54577, 47758, 43197, 25578, 5820, 21968, 51664, 17316, 26546, 53869, 48299, 17857, 46385, 25989, 16677, 60206, 24059, 54807, 26430, 53612, 3264, 64565, 35094, 58142, 7901, 23395, 49093, 26250, 43905, 60579, 6764, 51492, 10720, 42794, 16437, 49682, 65027, 28771, 56055, 9754, 45791, 17149, 4290, 64531, 14379, 54090, 33671, 56776, 16593, 59638, 22302, 3024, 57603, 40029, 53610, 1916, 26127, 58190, 12401, 63333, 10371, 45830, 20790, ++ 61302, 24669, 44632, 5606, 40674, 13479, 38064, 51290, 17248, 65166, 3892, 19764, 60977, 405, 42436, 29755, 49373, 16113, 38646, 11678, 20904, 63951, 39755, 53836, 597, 37764, 20302, 8103, 52651, 22354, 45483, 18748, 32707, 50583, 161, 54936, 21556, 59360, 5992, 63093, 33499, 23021, 64732, 25600, 6844, 33959, 59576, 38720, 10542, 60708, 16942, 50890, 25968, 62258, 31593, 18311, 50448, 32594, 12979, 25273, 53513, 3023, 50125, 10836, 30105, 46289, 58772, 27915, 7118, 49787, 19768, 40414, 27388, 61769, 32489, 18082, 42278, 58, 64307, 20995, 33213, 45205, 17829, 37182, 31163, 57632, 16090, 24220, 51429, 33523, 17051, 60326, 13082, 48237, 7653, 62576, 31284, 5242, 38530, 51281, 620, 29875, 61340, 9826, 47467, 20046, 10998, 37282, 55063, 21096, 1897, 57409, 16899, 61014, 44184, 21840, 58224, 11576, 48236, 8619, 54719, 1610, 35846, 21692, 44645, 18615, 52379, 11355, 63151, 19629, 29362, 2309, 45623, 53182, 20570, 37549, 49733, 8472, 42013, 32480, 10999, 51836, 4732, 23171, 50351, 1894, 59887, 11617, 41005, 15350, 48084, 37250, 18133, 59580, 3345, 52208, 13488, 25986, 7711, 21771, 64757, 34292, 52966, 8102, 48761, 17010, 56535, 31424, 51977, 18951, 29664, 57264, 34440, 48372, 12287, 37148, 51265, 30457, 55978, 3640, 41337, 27066, 7780, 42391, 53350, 33798, 60630, 30728, 630, 63777, 8084, 46167, 2001, 30030, 14986, 11586, 52825, 38840, 61791, 2856, 31337, 43699, 13703, 29085, 58495, 9033, 61320, 39014, 7261, 45613, 14031, 37210, 15626, 44799, 11745, 48123, 19876, 39842, 61454, 15995, 54265, 1238, 24104, 36313, 19016, 39654, 4488, 53866, 23731, 1823, 41210, 14748, 34083, 26007, 60960, 38092, 29313, 23304, 37091, 11145, 46225, 3637, 30207, 44102, 63046, 36329, 8055, 24423, 60163, 35154, 6595, 42836, 22833, 32700, 57400, 36535, ++ 1508, 49971, 16390, 64461, 27492, 60660, 31364, 2549, 58221, 24338, 39041, 55892, 32438, 48038, 9285, 64839, 25863, 6646, 59781, 41528, 50940, 6023, 19212, 34178, 59311, 23981, 62279, 28936, 41238, 1562, 35418, 59844, 6701, 25834, 43456, 35063, 8693, 47887, 31639, 19454, 43755, 1259, 47332, 10950, 53322, 42383, 1946, 18607, 54049, 4790, 33340, 56043, 2428, 48426, 9419, 64955, 622, 39054, 47412, 20320, 36985, 60338, 17691, 55421, 39961, 2044, 36588, 21820, 53186, 29618, 59636, 2528, 45589, 15692, 51791, 7744, 60561, 26635, 14153, 50025, 38780, 4965, 54242, 24856, 8321, 41639, 5330, 55526, 39060, 8748, 46448, 2573, 29467, 55849, 23690, 42424, 18972, 55245, 22071, 57482, 25836, 15726, 45495, 34759, 56352, 28070, 63231, 30565, 16041, 61834, 34356, 48035, 6679, 52818, 13712, 33073, 50591, 18794, 23952, 40375, 28712, 65167, 15793, 55811, 6993, 58471, 24487, 41265, 34923, 7341, 50542, 62528, 15941, 27732, 44136, 1682, 24189, 62182, 5159, 58235, 27215, 36334, 60587, 43848, 29231, 38175, 18757, 52722, 33743, 56521, 22960, 2380, 62257, 24563, 48695, 20101, 33148, 61615, 39969, 49934, 4291, 18359, 25295, 61118, 22211, 41871, 11484, 4510, 40179, 60779, 1142, 41145, 22091, 5538, 55535, 19716, 8176, 45533, 11342, 61192, 16761, 47058, 63360, 19220, 14591, 47975, 10524, 45234, 16135, 49968, 24344, 34120, 39651, 59552, 23191, 63286, 32300, 18295, 27616, 45900, 55891, 10261, 65505, 4221, 41349, 21420, 31711, 155, 56964, 28807, 63209, 5615, 58998, 32289, 24905, 55174, 634, 31288, 11014, 37595, 21587, 41123, 57256, 13684, 63914, 27542, 61858, 34604, 30120, 47101, 18460, 63214, 51381, 3205, 43636, 8709, 57186, 45123, 7737, 62488, 27863, 51151, 39178, 9266, 14859, 26933, 47722, 18143, 45380, 15366, 50755, 33570, 53368, 2442, 28225, 13987, ++ 52593, 30341, 35366, 3717, 48657, 9964, 22012, 46953, 34696, 11197, 50168, 7866, 15603, 23510, 53300, 17947, 36327, 46767, 22652, 1072, 26571, 35714, 57084, 10211, 15811, 45961, 5692, 55705, 17032, 64943, 14016, 38538, 53730, 14770, 57338, 18016, 61525, 11740, 36879, 54529, 12951, 58081, 30716, 40169, 20344, 27359, 64340, 31236, 44783, 23419, 40796, 11480, 38319, 20903, 43931, 29248, 22333, 58123, 8022, 63073, 4494, 26851, 45381, 7538, 24227, 56259, 9827, 62292, 4823, 43959, 11492, 25146, 54387, 6252, 30295, 39847, 20300, 47000, 31633, 9650, 62881, 15204, 29646, 64827, 50269, 21805, 61607, 27424, 11999, 65285, 22773, 50830, 40103, 11085, 34428, 1310, 47301, 13680, 44917, 8269, 36907, 65096, 4338, 23830, 7573, 40422, 1008, 44012, 48955, 4726, 14369, 25477, 41303, 31129, 10666, 64268, 2193, 42537, 59468, 4175, 46496, 20744, 42988, 27525, 38699, 32097, 5233, 16776, 54337, 14194, 33201, 23365, 39947, 12064, 60886, 28777, 54639, 19024, 38857, 13893, 45238, 1151, 21808, 16017, 7855, 57008, 42298, 3974, 25259, 7148, 43658, 29594, 34392, 10671, 39186, 57286, 9465, 45209, 1741, 31212, 57735, 46068, 37365, 72, 32983, 59214, 35430, 65507, 13350, 24428, 46776, 8683, 63704, 26898, 43832, 32550, 64960, 26132, 40347, 21623, 34812, 1676, 29919, 37275, 57022, 2666, 29317, 20741, 58178, 32585, 13802, 61376, 19327, 6435, 49472, 37527, 870, 57130, 8529, 35993, 20257, 38279, 23356, 51080, 33133, 11813, 53376, 49951, 23041, 11111, 33355, 41585, 21911, 49392, 8657, 42409, 26930, 56791, 45465, 4911, 62777, 9061, 29423, 50447, 2316, 46163, 15585, 8292, 56899, 11206, 38956, 6294, 22023, 32524, 13347, 52136, 20393, 166, 31720, 40872, 18680, 6775, 55678, 21198, 49823, 32423, 61702, 5546, 30536, 56041, 21864, 8878, 17109, 62359, 49082, 39244, ++ 11446, 59564, 22456, 54717, 18895, 42686, 56936, 6437, 53703, 18513, 30220, 44426, 63614, 40522, 28177, 3113, 61718, 12346, 55237, 31760, 62803, 14550, 42731, 27913, 51369, 31561, 40024, 11396, 48419, 32136, 47012, 9753, 27489, 49242, 5282, 30057, 40314, 24089, 45996, 3430, 27977, 38393, 15092, 62167, 3915, 46707, 14448, 51629, 6362, 58484, 15925, 63911, 27793, 54408, 13825, 35136, 53036, 16466, 42099, 28562, 51687, 15343, 31456, 64367, 34538, 19499, 50767, 26344, 38528, 18246, 63396, 37102, 21266, 64717, 43310, 12198, 56400, 4281, 37763, 55319, 23108, 48209, 43087, 1114, 12714, 32965, 45875, 3277, 49143, 18758, 35586, 6190, 58447, 21082, 64436, 37654, 24992, 61741, 33084, 18161, 49304, 11807, 52464, 32071, 60442, 18650, 58049, 12238, 27006, 38958, 51235, 59189, 19352, 56076, 45701, 26783, 38547, 30018, 14833, 51720, 33586, 9719, 53232, 2834, 60012, 13533, 45398, 64101, 26908, 47640, 59390, 4538, 55515, 35311, 6759, 48813, 15080, 33812, 47141, 24982, 63225, 31083, 54890, 34869, 65473, 27624, 13282, 61280, 49560, 31524, 63445, 12354, 55833, 46332, 27727, 6267, 41455, 23782, 53820, 16127, 27168, 10196, 55910, 14243, 50678, 9037, 26185, 44903, 28895, 54939, 17879, 31742, 38566, 13950, 52918, 2546, 15663, 54063, 5144, 59128, 13603, 54707, 44208, 9299, 25462, 40776, 65325, 38533, 5516, 42636, 51734, 4054, 54029, 43909, 10730, 28216, 15888, 44685, 50654, 14302, 59970, 1750, 47247, 18579, 62575, 27102, 42810, 15818, 35641, 47700, 19563, 51726, 1834, 63978, 17976, 59903, 14690, 36596, 18530, 51968, 33660, 48570, 17379, 38614, 31958, 22386, 58302, 43266, 20733, 52915, 26729, 48087, 59730, 41980, 55222, 35114, 27389, 49131, 60089, 15526, 58403, 35617, 25150, 65440, 1084, 42422, 11667, 52760, 40734, 277, 64216, 46901, 41632, 25283, 7347, 19684, ++ 43596, 5998, 46243, 8237, 29329, 36123, 15244, 41277, 26262, 61965, 1680, 21520, 33378, 5290, 50456, 31127, 43630, 19927, 34892, 45241, 7788, 48752, 3590, 64540, 21629, 2475, 58048, 25012, 36681, 3856, 23083, 60622, 19557, 41875, 33913, 63660, 1867, 56516, 17157, 52030, 61030, 8294, 24366, 50077, 35335, 56972, 9598, 37563, 26440, 34781, 46331, 8365, 49902, 4086, 60997, 6162, 45954, 25756, 2285, 56461, 11365, 43605, 36107, 1053, 47074, 12572, 41387, 14439, 57428, 33971, 8290, 47507, 13674, 34730, 1431, 49052, 24541, 59186, 28160, 16960, 3031, 34567, 19151, 58266, 26305, 53628, 17405, 38283, 28390, 42808, 52924, 32295, 15017, 43660, 4212, 54279, 10129, 39802, 3097, 59651, 26714, 41017, 20798, 43519, 13490, 50051, 35336, 22371, 53789, 32628, 9138, 28961, 232, 36010, 5797, 16229, 60566, 7951, 56881, 25611, 17216, 62833, 23277, 47298, 30592, 49975, 20281, 37724, 1346, 30210, 9509, 41710, 18255, 51414, 22243, 42498, 9912, 64588, 2493, 50955, 9166, 40082, 11368, 46569, 5418, 47870, 20982, 35815, 10143, 39901, 19280, 51929, 16438, 1225, 50474, 17196, 35580, 60562, 12994, 42607, 63272, 20840, 47679, 28164, 43338, 19200, 53433, 2110, 37957, 7241, 58027, 50202, 3320, 59933, 20540, 36234, 48937, 23597, 43158, 33072, 50038, 6541, 22812, 60044, 50853, 17936, 12379, 26832, 55816, 22457, 10005, 28783, 35139, 24839, 56305, 40385, 60871, 33954, 4999, 30408, 42042, 25145, 54323, 6554, 36864, 3079, 55696, 8064, 64793, 3838, 57502, 13439, 40455, 27666, 34745, 4446, 50642, 7121, 65424, 25430, 13016, 44430, 5958, 55550, 10179, 52437, 3598, 32913, 12410, 61353, 731, 36646, 9442, 24469, 15947, 4949, 64777, 11857, 24056, 43057, 2578, 48210, 12946, 34277, 53902, 19912, 38148, 23719, 13296, 29205, 36684, 20411, 4427, 58960, 32084, 55496, ++ 34319, 65265, 26521, 40127, 60230, 113, 62860, 31998, 13007, 45560, 36949, 54938, 10788, 59454, 13910, 57628, 9715, 52245, 2170, 16747, 56385, 30009, 37223, 13190, 53088, 44618, 17616, 61837, 8512, 54308, 43885, 30527, 2819, 52289, 12373, 21950, 50815, 26845, 10092, 34545, 21068, 41752, 55856, 5762, 17926, 29131, 43248, 21613, 61593, 861, 19633, 30606, 36551, 24046, 41211, 31952, 12193, 61744, 34198, 38638, 23280, 49555, 21084, 52864, 29045, 60931, 4011, 48792, 23000, 481, 52240, 30744, 58456, 26910, 51219, 18604, 36440, 8089, 44330, 60981, 41363, 52094, 10502, 40586, 36244, 6943, 59067, 9965, 63175, 13873, 825, 60666, 26963, 48714, 16525, 28684, 50434, 19917, 53205, 30358, 5889, 56924, 33461, 2079, 54469, 26263, 6206, 45950, 3336, 64818, 17776, 43130, 63465, 23095, 54296, 47793, 20530, 43666, 34855, 890, 48739, 36671, 12112, 39813, 17872, 3744, 57594, 10322, 56334, 44349, 21197, 65389, 32372, 673, 58777, 31372, 56429, 26709, 37113, 20357, 59032, 18324, 53482, 23695, 33251, 16814, 51601, 735, 45508, 59193, 4640, 26740, 38330, 58552, 30883, 65279, 22535, 5080, 51393, 32055, 6843, 36270, 3145, 62760, 5824, 39437, 61496, 16042, 49220, 21306, 35089, 11952, 42248, 25063, 46006, 10394, 31039, 62853, 9737, 17659, 27444, 64205, 39542, 31873, 4540, 35633, 52389, 46527, 1444, 36931, 62943, 47450, 16662, 65091, 2527, 20605, 8920, 46879, 22534, 63705, 7795, 57695, 34638, 15096, 59447, 30750, 39992, 20813, 44579, 25661, 37809, 30985, 46997, 10343, 53161, 39235, 21249, 42995, 30623, 2815, 58602, 28540, 61048, 20154, 35792, 41701, 24745, 49246, 37897, 28058, 44836, 17912, 57760, 30415, 39768, 46715, 19178, 53087, 38677, 9949, 54760, 22905, 44648, 5181, 28341, 59203, 7470, 62798, 44290, 56875, 10200, 51708, 38384, 15889, 45236, 976, ++ 23970, 14811, 51051, 12302, 17885, 52288, 24866, 49463, 4322, 58888, 16854, 51690, 27045, 42943, 20745, 37610, 25182, 39295, 61109, 23274, 40757, 18307, 47230, 24604, 33249, 7101, 38267, 29447, 50111, 20822, 15373, 64154, 37671, 25294, 58654, 45119, 7327, 38794, 65122, 29527, 48470, 441, 32199, 45261, 22621, 63471, 2908, 55078, 12686, 39776, 57407, 52328, 14820, 59349, 17835, 55735, 21480, 48996, 8844, 18574, 63752, 5555, 57754, 8562, 17035, 24841, 42965, 31874, 55055, 28352, 44863, 16823, 5240, 41143, 10659, 62508, 22385, 52802, 13242, 33574, 6003, 30192, 62087, 14665, 55964, 21388, 43837, 31369, 20433, 56548, 25355, 41251, 8137, 36424, 57976, 31908, 6642, 63782, 14779, 42695, 22841, 47719, 10604, 64334, 17100, 39339, 62431, 15126, 55762, 36541, 21441, 47004, 11965, 40047, 24927, 9984, 30868, 53609, 12901, 64481, 28424, 5661, 55292, 8279, 61195, 34166, 42379, 25109, 36191, 15659, 52001, 7789, 46463, 25866, 38249, 16638, 4282, 44915, 12894, 41391, 6116, 43280, 29870, 2935, 62008, 37649, 58046, 28428, 14577, 22150, 54435, 42828, 8065, 21114, 44234, 11252, 47174, 28725, 37801, 19502, 59409, 40890, 24685, 54636, 30610, 12595, 23189, 32429, 56885, 9535, 64280, 30172, 17161, 62018, 6333, 39305, 51849, 926, 37890, 56781, 46216, 11685, 15513, 48523, 20045, 62439, 8468, 23725, 59271, 15232, 31225, 7281, 41055, 13107, 51186, 31774, 55183, 25911, 52983, 12713, 40746, 28618, 16897, 45289, 23930, 49271, 12958, 29535, 52578, 17112, 61663, 6081, 23712, 60810, 16526, 29262, 62177, 12231, 54455, 47164, 22827, 40273, 45, 26535, 64311, 7449, 59309, 16669, 5304, 65272, 13983, 50799, 7224, 62691, 2121, 56293, 28629, 5979, 33403, 61123, 30720, 17567, 37665, 51930, 16173, 46490, 31850, 18973, 4051, 25842, 48298, 27756, 61064, 10994, 53977, 29603, ++ 41883, 57215, 3152, 47752, 32945, 38581, 10454, 43916, 23030, 35116, 29008, 6192, 39885, 867, 62579, 7222, 47620, 15020, 29185, 49989, 5114, 58415, 9049, 60427, 19678, 55008, 11938, 42358, 386, 34802, 56108, 6246, 47654, 10620, 32928, 16262, 42589, 20136, 4725, 44084, 13881, 53938, 11566, 60446, 36746, 8880, 49273, 32538, 47493, 17238, 28463, 6953, 44546, 1544, 47844, 37865, 3630, 29690, 58972, 44971, 33044, 14008, 39601, 30470, 62669, 37603, 6678, 59472, 15867, 9262, 39288, 61250, 23651, 55719, 32762, 45799, 2324, 39135, 25734, 54720, 19896, 49408, 24042, 3944, 28853, 47822, 1709, 51584, 37474, 5549, 45449, 18040, 62709, 23977, 11563, 46285, 21753, 44454, 35166, 85, 60963, 16145, 36220, 28797, 46623, 8917, 24278, 42037, 29674, 10425, 50373, 7145, 34584, 57833, 2698, 62254, 40927, 4401, 37632, 22109, 58083, 41653, 32463, 26026, 50735, 22588, 12724, 53466, 5967, 60469, 23836, 39284, 13224, 53871, 10545, 62945, 50116, 21630, 52861, 28535, 61712, 15423, 55391, 49193, 12194, 24461, 8528, 41781, 64530, 30251, 36491, 13658, 62540, 32804, 3618, 55090, 14926, 56803, 847, 48540, 17720, 11786, 44698, 15446, 34736, 51741, 46450, 5013, 40483, 27285, 42992, 543, 50997, 33761, 56165, 27810, 18825, 58629, 29140, 21163, 3102, 36533, 53718, 392, 56410, 28080, 43378, 34491, 18620, 53239, 44482, 21665, 49128, 27252, 38178, 17803, 42966, 3557, 37164, 19635, 48476, 578, 51955, 9750, 63025, 5365, 43855, 58727, 963, 34423, 9469, 56001, 43291, 32991, 7626, 45928, 1356, 38082, 19315, 9587, 35361, 15361, 53470, 43509, 14357, 30905, 19553, 45582, 54910, 34815, 22968, 40672, 31537, 21021, 43927, 12749, 36028, 50185, 21696, 3836, 46096, 63870, 8461, 26324, 60639, 2261, 41324, 54621, 34794, 58082, 14610, 2796, 23164, 35689, 18249, 63557, ++ 9144, 37053, 31043, 21374, 58424, 7058, 64011, 20242, 56123, 9006, 64570, 46410, 18999, 48846, 24114, 33928, 54509, 4048, 65099, 10940, 34507, 26200, 43319, 1370, 49171, 28427, 63128, 22507, 59508, 26970, 13034, 40956, 18492, 62447, 966, 55360, 28849, 60064, 49788, 15677, 58962, 35933, 18850, 26214, 51459, 16187, 40988, 24554, 10309, 65354, 42234, 22784, 62917, 33183, 9888, 24771, 64660, 42628, 15999, 318, 54755, 26580, 47970, 2826, 53912, 11811, 51366, 20031, 40568, 65065, 35432, 3257, 48354, 15077, 7255, 29269, 63803, 16220, 46666, 606, 65419, 8821, 44994, 60055, 39595, 11741, 64158, 23546, 9340, 49918, 34033, 54650, 3497, 51827, 40647, 1840, 56173, 27728, 9565, 49789, 38844, 25293, 58690, 4027, 51760, 31402, 59438, 19784, 1414, 61145, 31834, 52551, 27752, 17410, 49278, 32855, 19605, 50981, 14592, 44942, 7392, 19070, 61954, 15205, 43874, 436, 64715, 31209, 48048, 28217, 2659, 49299, 29115, 61389, 19872, 34451, 30437, 8212, 57528, 97, 35445, 32086, 7518, 38650, 26364, 44552, 56153, 17932, 5914, 47322, 2683, 48848, 24138, 51047, 40613, 25591, 34981, 8724, 64093, 26526, 52549, 33653, 58231, 7822, 64884, 1284, 19957, 60191, 14553, 47937, 23832, 54231, 22445, 15050, 4653, 47279, 12779, 44523, 7899, 41913, 61572, 30833, 24578, 41537, 33423, 6132, 14054, 49601, 3343, 39113, 11453, 60156, 13, 57877, 5964, 62303, 14510, 59060, 11007, 61113, 32840, 64433, 21726, 39355, 35231, 19213, 27843, 10778, 54629, 22654, 48228, 18851, 2619, 49809, 57941, 24555, 55389, 27220, 59489, 32122, 63642, 47891, 11436, 33432, 50094, 56479, 39453, 1537, 29124, 10368, 51792, 2941, 54157, 15039, 47683, 26163, 62088, 16941, 57451, 40289, 14265, 29496, 56511, 11277, 36173, 22401, 50482, 9600, 16737, 39942, 31334, 64952, 43779, 49327, 5259, 27002, ++ 50280, 19403, 62219, 4832, 44547, 28364, 16066, 37777, 2322, 42202, 14442, 32657, 60073, 12006, 56534, 17333, 30585, 41675, 21772, 45861, 53534, 12697, 63823, 35887, 16170, 40365, 4778, 46435, 9316, 51557, 31394, 57809, 27746, 35607, 53271, 23472, 37093, 9184, 31784, 40576, 24911, 6575, 62737, 39354, 1654, 29921, 59733, 5042, 52824, 34335, 3298, 50719, 11862, 27113, 53785, 35568, 13198, 51897, 27656, 40210, 10575, 60505, 18965, 41950, 22455, 45662, 33435, 1803, 46817, 21633, 12999, 53533, 26074, 42578, 60213, 20577, 34938, 57277, 11285, 31942, 42090, 27606, 35870, 15718, 32553, 46796, 16770, 35044, 61121, 29811, 15417, 22357, 38608, 30655, 13374, 59969, 17228, 37960, 62121, 12790, 55112, 7074, 19208, 43044, 13909, 37578, 5183, 53086, 44826, 38290, 15860, 3923, 59895, 13311, 44406, 6368, 56449, 27086, 60839, 24172, 49758, 35506, 2420, 52761, 38156, 27205, 45803, 8906, 18858, 40682, 63347, 14860, 36869, 5485, 43720, 3223, 47480, 17771, 39660, 24765, 48311, 22409, 59620, 19659, 64007, 1566, 34669, 32304, 53030, 19933, 60234, 33958, 15844, 7008, 61175, 18469, 53329, 43521, 30035, 9789, 38604, 2754, 21684, 49598, 25878, 36950, 43971, 28597, 52758, 3724, 63038, 11046, 37577, 59336, 41242, 25769, 65175, 34210, 23268, 50572, 16348, 47556, 8819, 63638, 17335, 45615, 58436, 30282, 61896, 25296, 54909, 29466, 19058, 33598, 45046, 24033, 35749, 29887, 50050, 26790, 4705, 42371, 12003, 56567, 2338, 46670, 60663, 38397, 14823, 41887, 30196, 63516, 37105, 15195, 20483, 35844, 13798, 48764, 4092, 42033, 6512, 24231, 57671, 3372, 21668, 8889, 25617, 63109, 42359, 18613, 60267, 25060, 37478, 64096, 8153, 34185, 1269, 42631, 10680, 25492, 48823, 556, 42175, 32844, 49553, 13748, 64403, 29881, 47359, 6468, 52338, 20710, 8101, 33058, 59738, 38852, ++ 12868, 42980, 25974, 53472, 13670, 55206, 47130, 30680, 49851, 25722, 54057, 3505, 27884, 36546, 44680, 8285, 51416, 13454, 59223, 193, 27254, 38815, 20489, 30937, 55879, 10483, 52113, 25904, 36265, 44870, 17365, 4207, 49015, 7927, 14335, 46581, 3699, 61708, 22387, 2299, 53008, 47746, 33219, 13259, 57702, 44323, 19924, 38138, 14210, 25505, 56627, 19158, 39150, 60209, 5293, 46657, 20140, 7295, 57066, 23686, 50932, 32383, 6303, 35216, 58699, 9634, 25466, 63035, 29894, 5808, 57911, 31384, 19098, 37410, 9892, 49744, 4689, 40342, 22861, 50962, 17948, 57803, 2609, 53229, 7494, 25537, 57015, 4452, 41796, 12320, 59356, 48120, 7216, 64632, 26019, 33808, 52653, 4772, 32503, 20571, 30040, 45599, 53898, 27560, 62987, 23244, 48431, 33636, 12550, 25185, 57129, 41489, 22527, 36980, 65526, 29530, 11402, 39179, 1493, 54631, 9444, 29177, 46271, 20936, 10710, 58580, 16996, 35173, 57000, 33547, 11544, 52570, 25383, 58016, 22798, 55747, 35935, 60164, 14273, 65073, 9707, 45699, 3795, 50639, 14789, 41181, 10922, 50043, 23373, 39491, 9325, 27469, 57371, 37088, 12475, 46685, 2182, 20379, 60020, 45734, 23509, 62355, 42141, 29623, 10588, 57397, 16543, 33290, 8372, 36105, 18248, 45306, 7035, 31969, 19484, 53198, 2274, 14351, 55323, 5655, 57639, 13157, 38719, 26197, 54377, 10888, 37487, 20414, 9625, 42124, 5070, 36326, 64040, 12211, 52300, 1923, 48071, 21139, 7406, 45991, 16209, 53752, 31151, 24942, 50459, 32148, 7010, 26122, 65237, 4332, 50969, 12535, 26611, 45005, 62475, 5447, 40810, 22153, 52332, 16997, 29782, 44715, 18134, 37382, 62034, 46534, 36208, 13215, 48492, 33811, 6716, 45951, 11584, 29685, 49335, 59120, 23381, 53562, 32094, 63524, 18420, 52624, 20820, 61942, 4729, 27231, 39350, 1362, 24809, 60344, 12247, 37421, 55932, 22135, 14216, 46442, ++ 1837, 56580, 9762, 39668, 34574, 1186, 23615, 61442, 11636, 58010, 21077, 38955, 52660, 22378, 1950, 63385, 25527, 38102, 18664, 32963, 57368, 6496, 51013, 3283, 44223, 33841, 19025, 65301, 14850, 2090, 61335, 39035, 24421, 43587, 64620, 19175, 51168, 12609, 45552, 57121, 28335, 20676, 4277, 50390, 23639, 7443, 54665, 31511, 61277, 43704, 7907, 48567, 30268, 15555, 41482, 22193, 62113, 31301, 37316, 4718, 46232, 20782, 65523, 15169, 28055, 55594, 36805, 14288, 52482, 38223, 44140, 10915, 62028, 964, 56523, 27822, 53974, 13994, 59691, 6367, 38086, 12861, 48423, 21969, 62817, 38924, 19555, 54106, 26572, 44634, 2269, 28992, 55711, 19347, 46925, 9009, 41966, 24583, 48863, 57708, 2743, 34254, 11273, 40794, 793, 56640, 8108, 20980, 64139, 49610, 8671, 26470, 47544, 661, 18425, 52167, 34050, 46712, 16624, 31588, 42751, 63801, 13932, 59551, 29967, 41111, 6573, 62326, 4100, 21907, 43083, 1740, 45968, 16307, 40393, 12318, 26274, 6395, 42770, 33407, 52282, 17211, 36711, 27146, 54194, 30814, 60710, 4210, 62827, 11942, 55554, 44829, 206, 29022, 64694, 22873, 32476, 39728, 5603, 16738, 56205, 4468, 13920, 53963, 38987, 6025, 48340, 63534, 20963, 58492, 39780, 25322, 49691, 60984, 9139, 35841, 48658, 29767, 40037, 20640, 35196, 28689, 59649, 3921, 22176, 51403, 2047, 64810, 32998, 50411, 16982, 47107, 8242, 39953, 26273, 56761, 9887, 64923, 38898, 55560, 22974, 37690, 6223, 58228, 13601, 17694, 52239, 45551, 20055, 36398, 8359, 41090, 53957, 383, 31558, 51424, 10634, 64655, 34240, 56195, 8509, 51094, 60056, 27854, 5720, 16125, 53775, 23552, 4230, 55903, 27085, 58459, 20037, 41378, 4614, 16308, 39066, 9167, 45429, 6253, 35410, 7864, 38440, 15224, 45766, 58816, 19325, 55064, 33639, 44937, 17489, 42575, 619, 51269, 30287, 61648, ++ 16931, 33823, 51831, 22661, 17590, 60396, 40805, 7979, 35789, 5039, 47942, 16419, 9453, 60881, 31418, 35488, 46859, 5478, 62206, 42091, 15643, 47797, 23586, 61630, 14102, 58743, 6803, 39473, 23877, 54004, 32509, 21348, 56830, 11258, 30279, 39843, 33536, 26493, 37982, 16999, 10821, 64071, 42833, 34117, 17638, 64851, 11183, 47113, 210, 21265, 35787, 58308, 2188, 55386, 33713, 49714, 1132, 43343, 17339, 63307, 12815, 36278, 44466, 1343, 49155, 17742, 3720, 49888, 18487, 7956, 27131, 50602, 22079, 47207, 33815, 17166, 43584, 30847, 45405, 25066, 63258, 34276, 27041, 43466, 10977, 50379, 202, 33331, 14318, 65019, 21005, 39872, 10841, 43245, 572, 50928, 15596, 63354, 7783, 17919, 39655, 65365, 22163, 50558, 16818, 35636, 43917, 30492, 2536, 18753, 35003, 61641, 10860, 55404, 42402, 23670, 4875, 63076, 21517, 56760, 3541, 24714, 37194, 5045, 54201, 23492, 51222, 20123, 48560, 54987, 27460, 59772, 32720, 8604, 64324, 51093, 29468, 54476, 20864, 2200, 27947, 57183, 12737, 43949, 6781, 21408, 46388, 16517, 29386, 43040, 25087, 17552, 38885, 49822, 10051, 41703, 58902, 14409, 50232, 31370, 36571, 27954, 47360, 18725, 60648, 24236, 13433, 41629, 2456, 30789, 12370, 55693, 1569, 28366, 43730, 16903, 62537, 10691, 60424, 45099, 1345, 52679, 18461, 48258, 32216, 39831, 27364, 43674, 6936, 23322, 57436, 27888, 53641, 22310, 14894, 32027, 41686, 17451, 34331, 1194, 29188, 60403, 18481, 43541, 40198, 62048, 33908, 1600, 55287, 28471, 59841, 17299, 32649, 58936, 11692, 25117, 43998, 28178, 1999, 38736, 14156, 32788, 940, 39946, 49441, 31690, 10833, 60765, 38464, 17180, 43120, 341, 32611, 61545, 21936, 55501, 27674, 51343, 19636, 59912, 28927, 57855, 24596, 55749, 31174, 7139, 43350, 10808, 63271, 3682, 28136, 57348, 25616, 40516, 6090, 23839, ++ 48129, 28679, 3913, 64772, 45929, 6564, 27660, 50872, 19571, 43417, 29435, 65469, 41037, 15427, 50567, 11373, 20085, 54850, 28533, 10035, 52841, 8592, 32252, 41289, 28005, 21913, 48280, 29847, 60167, 12219, 45698, 5900, 34323, 52451, 1451, 58273, 5559, 63013, 7052, 54395, 41228, 30820, 9696, 56247, 46041, 27877, 37206, 26664, 16735, 63702, 28928, 13554, 45213, 18368, 8488, 26297, 58591, 11228, 54504, 28752, 52654, 8999, 56576, 25944, 38917, 61809, 43129, 24077, 57134, 32238, 64528, 4377, 41467, 24682, 12400, 64248, 3088, 36640, 9146, 51884, 1564, 20264, 54996, 5115, 58886, 17541, 30370, 52283, 46132, 8508, 36745, 53107, 18401, 35735, 58547, 31146, 23369, 37007, 45058, 29263, 47923, 5608, 59145, 9910, 27060, 61884, 14979, 54855, 40223, 58434, 46121, 14251, 39538, 28646, 7733, 59061, 15541, 36287, 48479, 12344, 39980, 51882, 18181, 47382, 32974, 1160, 39101, 31725, 13685, 37454, 7442, 17538, 49628, 30735, 19408, 957, 44658, 10803, 48751, 61024, 38965, 5075, 63670, 23017, 40228, 58678, 8932, 37972, 57793, 1971, 52066, 6560, 63103, 19179, 54315, 3951, 27241, 52260, 21457, 63795, 11441, 55284, 7387, 34350, 352, 50820, 31621, 54864, 26621, 45839, 52003, 15931, 34595, 21821, 57220, 4200, 24335, 32814, 7218, 26717, 64513, 12048, 42671, 7543, 61247, 14694, 55722, 16181, 60560, 35024, 12904, 45785, 2971, 61426, 43264, 54510, 4370, 58788, 13277, 44256, 51575, 9229, 47803, 3429, 24355, 8686, 22100, 57322, 11236, 39085, 23542, 49049, 6414, 21435, 39590, 56655, 16311, 60452, 46299, 19081, 62842, 25972, 52987, 12711, 22539, 57341, 43843, 2210, 30271, 64445, 13652, 52754, 9853, 50377, 36389, 3259, 44442, 12116, 37044, 1887, 41603, 13127, 44043, 2917, 17996, 51090, 24110, 36858, 21795, 48745, 35226, 8695, 62733, 11540, 54413, 36415, ++ 7622, 59151, 41133, 11901, 31815, 53006, 15067, 33437, 62975, 13124, 55536, 546, 26632, 45713, 6873, 57155, 40262, 2907, 44021, 24489, 36819, 60650, 17910, 4412, 54371, 37528, 766, 42218, 8229, 50462, 20209, 63582, 16406, 41538, 25736, 15052, 43171, 19739, 46891, 34729, 669, 59430, 25221, 15279, 2613, 52237, 6007, 41908, 49993, 53461, 4460, 40549, 24132, 51339, 64392, 38355, 14609, 48185, 25178, 2568, 41023, 30800, 16562, 59937, 7191, 29505, 12115, 40751, 134, 45230, 13545, 35998, 59017, 6825, 48887, 38696, 21312, 61540, 18350, 29583, 56159, 42399, 9717, 37074, 23204, 40975, 61689, 6534, 24399, 57544, 31723, 4920, 62404, 27336, 14118, 61385, 3749, 53656, 11873, 56421, 14598, 25704, 37354, 32716, 46841, 3650, 51531, 23880, 6784, 28125, 5427, 32396, 53253, 20355, 49113, 30659, 44060, 26158, 8431, 60065, 27969, 6898, 57435, 11216, 65247, 16067, 56050, 9872, 45040, 24891, 63902, 41595, 4627, 56671, 38442, 62456, 24073, 41915, 31563, 13760, 19127, 46969, 32882, 51291, 550, 35144, 25776, 48146, 20514, 31824, 36076, 45858, 12849, 34497, 26079, 47570, 35719, 8282, 42507, 1466, 44955, 25230, 61639, 41097, 22124, 44376, 17519, 35477, 9952, 61833, 5441, 42407, 64668, 11532, 46633, 38336, 54550, 40903, 52180, 15193, 37051, 46820, 24003, 36161, 19617, 44813, 4754, 25020, 48777, 1036, 41204, 20883, 37772, 30630, 6585, 19919, 28394, 48954, 25516, 62773, 15449, 33308, 27413, 64240, 35551, 49561, 29985, 42585, 16054, 47340, 3022, 35037, 64073, 28978, 46737, 3635, 36143, 7935, 23296, 42767, 10054, 47295, 20376, 65123, 41466, 6967, 26383, 51567, 21188, 40419, 28149, 35156, 46840, 24194, 14570, 56958, 31311, 64868, 26506, 54330, 22767, 48456, 33255, 65231, 40063, 12470, 61423, 5617, 53149, 14984, 58584, 20099, 45543, 32492, 18452, 63899, ++ 44124, 15387, 25119, 48976, 19114, 36877, 57454, 2738, 45296, 24649, 37356, 32195, 18105, 58667, 34109, 23138, 29676, 64338, 16567, 49515, 1605, 30250, 45463, 64785, 11096, 49817, 16882, 56564, 26710, 36634, 3142, 28744, 47520, 9941, 59806, 49663, 23197, 55666, 29374, 13639, 48942, 21468, 36396, 62010, 39974, 22146, 58826, 12464, 32958, 9384, 31086, 62482, 10472, 36663, 5722, 32145, 44227, 19731, 34805, 61386, 21921, 50195, 4170, 34052, 47353, 22830, 53302, 34473, 60831, 20940, 54606, 17479, 28946, 55482, 15509, 26688, 52640, 5636, 47665, 39777, 14757, 28329, 60780, 31548, 48946, 13430, 35451, 27905, 42723, 16488, 47502, 22906, 49364, 6139, 44344, 19724, 41298, 34732, 21522, 1242, 42494, 52353, 19007, 55350, 13028, 29086, 38681, 17581, 60415, 42885, 21979, 63709, 1795, 37855, 9782, 64602, 2963, 53813, 33384, 19931, 45474, 34779, 22383, 42066, 26402, 44293, 28355, 61632, 3060, 53563, 12016, 35612, 23118, 46864, 9388, 34064, 15828, 58493, 7219, 36423, 56249, 30002, 10188, 18380, 62158, 15601, 53700, 4838, 64945, 13980, 59497, 21980, 30554, 56652, 5269, 16331, 62056, 13298, 58101, 33349, 19626, 49014, 15620, 30340, 58730, 8875, 65350, 4071, 56696, 22777, 37289, 29347, 6622, 51240, 20184, 30255, 128, 17764, 22563, 49265, 31184, 2882, 62801, 10111, 57100, 29558, 52949, 31671, 11194, 51887, 63197, 9330, 58144, 15796, 50727, 63536, 36054, 10509, 40498, 2761, 36760, 56243, 20524, 11526, 54236, 14192, 63329, 5111, 53032, 25400, 61486, 14460, 42201, 9304, 53411, 18297, 50352, 30855, 54105, 5046, 58388, 36889, 3144, 34072, 17621, 54699, 35627, 12279, 47525, 8388, 57912, 5231, 18840, 63242, 7415, 40851, 17777, 5697, 47239, 15671, 62424, 4313, 21283, 10007, 53690, 34655, 28580, 47550, 30836, 41231, 2025, 38033, 13558, 49694, 29041, 2614, ++ 21117, 34996, 57796, 320, 63326, 8818, 21981, 39409, 10629, 52051, 5928, 61924, 49225, 4150, 53605, 14227, 48535, 7546, 35240, 56185, 22229, 57884, 13281, 25319, 34960, 20651, 62768, 33388, 13807, 57651, 42896, 53450, 22593, 38222, 32004, 8718, 35828, 2984, 61093, 27140, 51717, 5223, 44956, 8555, 32059, 48142, 18724, 60809, 22961, 38809, 45819, 20598, 56137, 28152, 16345, 60659, 3502, 57487, 7638, 39264, 10115, 42307, 54093, 20440, 11022, 63958, 5380, 16065, 27503, 8741, 39472, 51668, 1989, 33013, 46323, 8437, 35309, 58036, 23859, 11907, 65164, 3638, 45959, 16105, 7810, 63944, 2879, 55398, 10291, 60364, 1440, 40406, 13109, 56795, 28529, 54805, 10176, 60208, 26751, 63993, 30830, 8457, 60894, 2000, 41631, 64897, 9376, 48134, 31272, 12152, 50692, 16499, 45154, 56251, 25427, 17844, 40826, 13441, 61249, 29, 50353, 14350, 62786, 2129, 36399, 8069, 50074, 18646, 34549, 29823, 51598, 20726, 60668, 14556, 27812, 50423, 3657, 53120, 25575, 63170, 1423, 23608, 55072, 42552, 28320, 45304, 11529, 33568, 42162, 27745, 7915, 49316, 3013, 43309, 60819, 38415, 29499, 24498, 40125, 53630, 6462, 37668, 3452, 51539, 12024, 38238, 27561, 47077, 19320, 49119, 12984, 59549, 26010, 39148, 58132, 13637, 63966, 43432, 56514, 5305, 58754, 21232, 33978, 49926, 656, 40701, 13555, 59421, 38950, 17990, 26570, 34140, 47685, 24492, 32769, 508, 46183, 22702, 57087, 19364, 50239, 5638, 45159, 41493, 241, 28087, 37409, 21017, 44518, 7756, 31263, 51652, 19677, 57615, 26294, 33762, 13132, 61216, 20852, 40513, 30066, 15639, 50758, 28603, 45274, 4672, 58993, 19345, 62284, 33172, 15452, 54439, 38179, 43590, 30481, 60472, 24898, 52076, 34396, 8800, 37848, 50633, 30189, 46594, 25764, 69, 56765, 9289, 16508, 23494, 54130, 26681, 60834, 4582, 52555, 42363, ++ 55669, 10119, 38205, 27262, 44796, 29986, 55034, 26330, 59634, 20572, 41724, 23781, 12588, 38465, 21310, 42718, 27425, 59917, 19431, 12106, 43151, 6070, 39730, 52380, 2605, 44497, 9499, 47042, 5162, 31138, 18274, 7514, 62293, 77, 55036, 17735, 65418, 44539, 11614, 18391, 37545, 57982, 16094, 28674, 55236, 1311, 35292, 43493, 3731, 57308, 15028, 49181, 755, 42927, 52926, 23144, 46978, 30084, 52117, 18074, 64756, 24585, 13887, 58227, 38045, 46008, 31101, 43676, 59562, 48056, 10229, 23444, 42818, 63532, 19692, 60468, 30537, 394, 44286, 32420, 49541, 19306, 33732, 53576, 25927, 44820, 21652, 38458, 51271, 20084, 34481, 63551, 25098, 38242, 2447, 33205, 47151, 6894, 51159, 16334, 43806, 35393, 22707, 45796, 24791, 20023, 57314, 35864, 346, 54079, 36736, 24454, 34189, 4485, 51182, 31753, 58630, 23199, 38484, 30102, 17322, 55575, 30980, 49011, 21162, 60335, 12983, 40267, 58997, 15486, 295, 43606, 6281, 39867, 55338, 18129, 37879, 21774, 45150, 11877, 40814, 49048, 8138, 37336, 3477, 56878, 24622, 49707, 18656, 54827, 40420, 15245, 52851, 23681, 11056, 48487, 641, 51122, 10466, 17332, 59695, 28483, 64310, 20481, 43235, 24737, 53082, 14301, 40300, 801, 32363, 44742, 18370, 3195, 33380, 47826, 8159, 34839, 9805, 28170, 42021, 12670, 54995, 25721, 18943, 65458, 8380, 21932, 46465, 30118, 5806, 55241, 3759, 44120, 11729, 59703, 13945, 53124, 8140, 31545, 23860, 61183, 30497, 16767, 59560, 48630, 9985, 58418, 33470, 56082, 12781, 37920, 813, 43478, 5838, 64910, 38256, 1475, 48190, 12012, 63456, 23969, 55825, 9481, 61438, 31134, 24614, 39196, 1205, 44562, 49959, 25795, 2492, 22321, 11109, 48165, 824, 39700, 20464, 59342, 27937, 11630, 58163, 16085, 60215, 41954, 19811, 38776, 50235, 64606, 6931, 46947, 10532, 34212, 24406, 15997, ++ 31479, 47605, 5751, 53842, 13894, 42040, 4496, 48384, 16654, 34871, 8417, 56660, 44266, 30469, 10226, 63541, 990, 39218, 33217, 51770, 26080, 63031, 32640, 16015, 59154, 29333, 55210, 24713, 38704, 64406, 51266, 35213, 15523, 40740, 27833, 48082, 12846, 31352, 39603, 53692, 7720, 24187, 42461, 64501, 12068, 50539, 14494, 27427, 54171, 7092, 34592, 19305, 59614, 25812, 6714, 40128, 11628, 35954, 13438, 45361, 539, 32711, 48643, 28235, 2104, 19275, 56221, 2961, 24940, 35071, 62411, 31754, 14507, 4060, 40017, 11397, 50811, 16531, 54403, 22345, 7051, 58709, 37886, 1014, 50093, 18679, 58216, 29332, 4147, 44061, 32256, 15815, 50630, 9256, 48474, 17628, 24154, 39251, 32132, 4532, 49499, 13616, 58315, 6440, 50085, 33344, 4943, 44501, 26048, 63199, 7311, 57949, 11118, 62553, 14885, 41926, 6156, 47977, 10199, 52646, 43360, 5621, 38862, 9205, 53034, 25709, 46377, 5283, 23923, 47684, 57772, 26616, 64789, 32135, 2381, 59299, 47939, 5840, 34790, 51917, 16787, 31126, 59722, 21147, 64394, 13409, 39605, 7047, 61480, 1102, 33083, 63561, 20152, 37428, 31202, 18213, 65111, 32678, 22615, 46518, 35207, 9557, 45621, 31912, 1825, 57666, 6807, 33024, 62683, 23892, 53837, 8636, 61168, 50389, 14822, 27044, 53370, 24863, 60245, 37815, 16713, 50919, 6304, 44311, 38155, 28926, 35345, 56367, 2387, 62107, 36875, 14257, 64608, 18412, 52145, 39593, 27128, 34860, 42720, 65363, 12482, 38597, 7539, 46555, 32565, 18759, 39764, 2168, 26882, 17554, 48055, 22852, 62687, 29714, 15825, 45392, 24397, 57044, 27583, 35192, 7183, 44151, 495, 37768, 14489, 48677, 7715, 52280, 29540, 11757, 20636, 65521, 31943, 58691, 53260, 16749, 33682, 55129, 13536, 44791, 1553, 42922, 32441, 5401, 36289, 13995, 62570, 27463, 3409, 32200, 43116, 21458, 57064, 40304, 65400, 8347, ++ 60033, 20335, 62394, 23227, 50325, 17970, 60784, 31258, 1388, 64222, 47284, 28755, 2265, 59081, 36448, 17403, 54251, 9115, 47474, 4659, 37918, 10374, 46674, 21144, 40877, 7243, 18874, 48724, 1699, 22973, 11471, 26295, 58469, 45889, 20900, 4887, 52800, 24647, 1816, 63406, 33115, 47345, 3384, 20097, 38472, 25594, 63142, 8984, 41286, 24836, 65160, 29765, 37694, 12891, 54965, 29201, 63850, 21335, 59321, 26946, 56021, 37062, 6548, 62221, 33299, 51489, 14968, 37506, 50329, 6047, 18700, 52971, 44719, 28490, 56820, 25790, 41876, 37227, 10370, 62566, 40775, 24784, 13796, 56634, 31011, 11140, 41543, 15001, 64517, 12040, 54217, 7440, 29901, 59502, 36082, 65208, 12570, 58003, 20737, 62236, 27836, 40008, 18259, 38005, 11464, 59979, 15994, 52757, 21229, 13999, 29736, 40496, 19504, 46596, 26955, 54757, 20887, 35565, 62051, 4062, 25070, 64044, 19324, 58322, 32290, 3435, 37750, 63533, 10398, 42204, 19579, 36767, 11101, 46170, 24436, 13114, 28974, 65352, 9039, 27007, 61864, 14934, 44421, 5591, 34343, 29204, 52522, 22472, 36840, 26500, 47060, 9422, 44525, 5787, 56016, 41435, 7580, 44014, 57230, 2602, 26767, 55615, 14683, 40696, 60908, 17039, 36205, 50140, 10783, 42849, 28061, 16388, 36609, 23408, 41498, 62240, 4417, 19742, 45912, 1708, 63459, 32606, 22902, 61730, 3585, 53970, 16460, 43086, 23581, 51024, 19791, 42262, 28594, 38295, 21777, 7060, 62484, 4555, 17846, 47425, 1455, 55048, 25965, 53558, 4079, 61914, 23429, 50695, 65084, 41307, 4598, 60148, 10271, 36729, 49914, 21885, 8866, 41656, 15012, 55121, 49584, 19858, 59893, 26841, 42493, 21788, 63900, 16604, 56721, 37184, 41807, 6336, 48951, 9351, 36720, 26914, 61830, 6640, 29404, 64260, 23886, 56256, 19125, 49100, 22528, 51797, 7557, 45308, 59262, 18783, 12792, 35931, 885, 17292, 30411, 50720, ++ 1629, 39115, 12504, 33262, 3269, 36152, 11248, 51450, 43239, 14634, 32841, 18799, 52318, 13781, 49892, 25090, 45069, 31595, 22813, 61157, 15191, 55618, 423, 50778, 27110, 62068, 36078, 14614, 60990, 40135, 56022, 44143, 3623, 10660, 60604, 37050, 29734, 57470, 42174, 22035, 10270, 60342, 30209, 52588, 6410, 55948, 30691, 46530, 17889, 51924, 10927, 47614, 3035, 61842, 17188, 48398, 1713, 42553, 5119, 50507, 9292, 23328, 44089, 16864, 41714, 8313, 26351, 64999, 11723, 30206, 58367, 1218, 22610, 49235, 7706, 64674, 2720, 20865, 47052, 29791, 4513, 51525, 43173, 8635, 36274, 62145, 5834, 48260, 26256, 37551, 22490, 57297, 42128, 21143, 5361, 26380, 52140, 271, 45263, 10764, 53453, 3042, 62892, 30386, 54368, 28395, 41032, 10267, 48893, 32960, 43623, 2349, 49895, 8837, 37394, 1116, 44673, 12500, 27428, 33924, 46962, 11498, 28929, 41428, 14998, 55122, 16891, 28667, 33295, 54117, 7062, 30350, 52477, 17118, 61354, 33628, 41314, 20269, 42925, 53989, 2792, 39247, 26178, 50866, 17741, 47741, 2318, 58812, 10695, 51477, 16980, 57518, 28822, 59927, 13797, 25528, 54554, 20770, 15106, 62525, 39399, 4870, 52412, 23037, 8040, 47753, 29060, 21341, 56260, 5084, 63746, 46313, 2005, 55132, 10282, 31397, 39689, 51671, 15689, 30604, 48169, 8728, 40216, 14470, 47338, 11850, 45515, 6749, 32358, 10303, 58941, 7933, 48435, 1775, 56052, 31050, 49374, 25174, 57776, 28944, 40943, 15037, 59137, 21580, 36303, 13008, 43928, 6697, 31701, 11841, 46074, 34819, 27344, 54888, 2655, 59344, 32871, 52709, 4003, 31801, 10535, 39393, 17341, 34573, 53662, 2707, 32356, 46139, 3927, 23693, 60901, 18058, 28742, 45624, 3629, 42297, 12693, 46370, 35761, 17373, 52540, 8264, 39236, 63388, 2681, 43753, 33493, 11354, 37553, 55450, 29343, 62040, 44481, 51622, 14337, 37199, ++ 46033, 26040, 43516, 56237, 28436, 64991, 40168, 21495, 25469, 54673, 5335, 62685, 39919, 26950, 3686, 58068, 6366, 65219, 18430, 42332, 28645, 30889, 37012, 12461, 53828, 3865, 31900, 43762, 28284, 6445, 30575, 13426, 49298, 34003, 16660, 50850, 6752, 14713, 18969, 50178, 35143, 13947, 40442, 17276, 44019, 21107, 4712, 36153, 61151, 1957, 32608, 44431, 21728, 41666, 33495, 9735, 51167, 34358, 19067, 31451, 39744, 63186, 12514, 54816, 22256, 57641, 40452, 20060, 47572, 42220, 13330, 38300, 55084, 17003, 36362, 24325, 33259, 55818, 15250, 60143, 34615, 17714, 63425, 27528, 20652, 46591, 23955, 33516, 52859, 695, 45684, 17034, 3240, 46315, 55261, 40916, 14866, 29565, 36636, 56014, 23505, 33667, 47622, 15271, 991, 21872, 64500, 38989, 3399, 61470, 17152, 56833, 34837, 59637, 22851, 65138, 29369, 55878, 18810, 57676, 16274, 50938, 35908, 816, 45225, 22723, 48203, 57078, 1631, 21597, 62025, 39373, 3979, 44206, 7982, 54655, 708, 57290, 14018, 32579, 22749, 58197, 11191, 36211, 60914, 9768, 41115, 19797, 43705, 31460, 4369, 24215, 39080, 1728, 50568, 34908, 3740, 47883, 36986, 30097, 18602, 49386, 25806, 34204, 63171, 13221, 54659, 2906, 31108, 39519, 18907, 34059, 59053, 20756, 44134, 6107, 55953, 11245, 35910, 57727, 18147, 26315, 59838, 27629, 33475, 58281, 24291, 64118, 49702, 39290, 17260, 26037, 60776, 33228, 10957, 45406, 15943, 37348, 9122, 20749, 51313, 33997, 10405, 43202, 29600, 51984, 16376, 56940, 24815, 54485, 20310, 52446, 7382, 17988, 40149, 13573, 46933, 18885, 61878, 25019, 58075, 45867, 6072, 64573, 11343, 40762, 13764, 59660, 27473, 51254, 10312, 34862, 56135, 14223, 62927, 19968, 57500, 23129, 49837, 4019, 40979, 31608, 14643, 27106, 54566, 16986, 25342, 52958, 23016, 4898, 48555, 24966, 6398, 27808, 58939, 22293, ++ 10959, 58097, 18682, 7427, 46745, 16196, 6770, 58527, 9275, 37849, 46290, 22564, 7702, 60523, 35600, 15905, 40587, 11661, 53105, 3057, 56949, 19804, 63772, 23938, 41612, 17146, 50097, 9806, 52995, 19643, 59655, 37438, 21697, 64886, 25392, 32458, 61922, 38873, 45312, 4153, 56426, 26117, 62560, 340, 58518, 33778, 49402, 15648, 39452, 23826, 57869, 14042, 53751, 8133, 26484, 58906, 15388, 55473, 25375, 60940, 3218, 27353, 49095, 4437, 30505, 35474, 798, 53725, 7412, 21792, 63104, 33910, 9571, 61187, 12621, 45824, 59293, 6471, 43799, 26537, 1683, 47879, 12367, 39071, 54746, 2178, 59752, 9496, 18231, 61055, 28194, 35233, 63047, 11510, 31427, 18834, 61518, 43359, 7878, 17311, 42265, 5948, 25522, 56981, 35092, 46245, 7550, 26668, 55611, 23602, 47262, 5769, 27866, 11835, 39467, 15414, 51686, 7605, 40101, 2586, 42811, 6756, 60596, 24522, 64978, 7813, 34317, 11793, 43204, 50617, 14144, 25853, 56463, 22449, 49905, 28155, 18778, 35826, 49283, 4761, 47281, 19290, 55697, 248, 24977, 53440, 30379, 65240, 14387, 55315, 35635, 62722, 12299, 46123, 21644, 64149, 28226, 11668, 60306, 6207, 42690, 12644, 56577, 1024, 43846, 19861, 37510, 45170, 60153, 15234, 52226, 7671, 29732, 12512, 65244, 35127, 22223, 28833, 45006, 2647, 42913, 53611, 4937, 52432, 20293, 1260, 36479, 15556, 4171, 28000, 54402, 43836, 13074, 52703, 23166, 61617, 3107, 54144, 44694, 60321, 6043, 26689, 49745, 2500, 62569, 8400, 40630, 34394, 1107, 38929, 14627, 30374, 58765, 44795, 25829, 63831, 28825, 6786, 36451, 43035, 2088, 21296, 51899, 29298, 25378, 47961, 35937, 8595, 43324, 14966, 54866, 174, 39883, 25122, 32690, 50958, 2058, 38335, 10598, 61276, 20962, 47798, 60694, 9688, 35035, 57624, 40662, 1215, 64017, 15807, 39468, 9874, 54969, 41570, 3743, 32801, ++ 53438, 5092, 35482, 61391, 23929, 52893, 30785, 34430, 49661, 27971, 12212, 56064, 33658, 20298, 50628, 29863, 48024, 24234, 34355, 44602, 13586, 49111, 5794, 45656, 8650, 58146, 22121, 64060, 25608, 46305, 903, 54611, 8346, 42658, 2373, 46741, 9439, 23791, 59213, 27628, 48553, 8185, 31630, 46162, 23386, 10075, 59847, 29014, 11828, 50921, 5649, 27993, 62758, 35528, 45677, 22523, 38707, 6241, 46295, 14262, 53184, 36196, 15789, 60094, 46514, 10725, 61485, 29166, 36605, 45576, 4764, 26864, 41218, 29431, 51983, 5228, 18956, 39304, 13155, 53080, 35690, 23339, 57613, 4997, 32740, 16795, 40230, 55931, 30624, 39496, 6296, 51706, 23128, 38792, 50193, 1883, 34209, 24899, 59813, 49042, 12290, 64189, 51909, 9030, 43167, 18600, 59202, 32237, 12658, 37155, 20195, 52042, 30936, 60771, 3809, 45720, 32029, 24012, 48378, 59352, 22065, 54410, 20502, 38266, 13832, 52232, 26883, 62619, 18004, 35339, 31069, 48676, 9675, 37566, 12537, 45549, 62904, 10296, 29628, 64081, 38306, 7628, 45897, 31979, 43149, 16255, 37789, 3312, 46606, 8368, 27096, 17961, 48935, 32323, 16106, 9073, 40861, 52984, 23333, 51698, 31002, 64596, 10175, 38687, 27185, 58930, 5843, 24952, 11326, 42076, 26436, 49856, 38121, 47464, 25554, 965, 60721, 49431, 7812, 64427, 21539, 13301, 37362, 10662, 41790, 48633, 61043, 30809, 40978, 57359, 186, 21381, 35697, 5336, 41555, 29692, 40086, 18836, 32251, 14361, 35417, 63927, 17176, 38065, 19981, 47194, 27699, 64467, 22258, 60934, 49280, 3482, 32466, 10903, 51029, 116, 38554, 48876, 12462, 22688, 62433, 33458, 16075, 57236, 4881, 55403, 18548, 62645, 22596, 38630, 30797, 47149, 21412, 59478, 7999, 28251, 44166, 15324, 55603, 30075, 26102, 518, 37083, 45967, 6059, 21640, 13353, 49996, 29829, 46279, 20588, 61191, 33974, 17788, 63504, 47188, ++}; ++ ++#endif /* AVFILTER_DITHER_MATRIX_H */ +Index: FFmpeg/libavfilter/vf_scale_cuda.c +=================================================================== +--- libavfilter/vf_scale_cuda.c ++++ libavfilter/vf_scale_cuda.c +@@ -1,5 +1,8 @@ + /* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. ++* Copyright (c) 2019 rcombs ++* ++* This file is part of FFmpeg. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), +@@ -20,9 +23,9 @@ + * DEALINGS IN THE SOFTWARE. + */ + +-#include + #include + ++#include "libavutil/avassert.h" + #include "libavutil/common.h" + #include "libavutil/hwcontext.h" + #include "libavutil/hwcontext_cuda_internal.h" +@@ -32,12 +35,12 @@ + #include "libavutil/pixdesc.h" + + #include "avfilter.h" ++#include "dither_matrix.h" + #include "internal.h" + #include "scale_eval.h" + #include "video.h" + + #include "cuda/load_helper.h" +-#include "vf_scale_cuda.h" + + static const enum AVPixelFormat supported_formats[] = { + AV_PIX_FMT_YUV420P, +@@ -46,10 +49,6 @@ static const enum AVPixelFormat supporte + AV_PIX_FMT_P010, + AV_PIX_FMT_P016, + AV_PIX_FMT_YUV444P16, +- AV_PIX_FMT_0RGB32, +- AV_PIX_FMT_0BGR32, +- AV_PIX_FMT_RGB32, +- AV_PIX_FMT_BGR32, + }; + + #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) +@@ -58,27 +57,13 @@ static const enum AVPixelFormat supporte + + #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x) + +-enum { +- INTERP_ALGO_DEFAULT, +- +- INTERP_ALGO_NEAREST, +- INTERP_ALGO_BILINEAR, +- INTERP_ALGO_BICUBIC, +- INTERP_ALGO_LANCZOS, +- +- INTERP_ALGO_COUNT +-}; +- + typedef struct CUDAScaleContext { + const AVClass *class; + + AVCUDADeviceContext *hwctx; + +- enum AVPixelFormat in_fmt, out_fmt; +- const AVPixFmtDescriptor *in_desc, *out_desc; +- int in_planes, out_planes; +- int in_plane_depths[4]; +- int in_plane_channels[4]; ++ enum AVPixelFormat in_fmt; ++ enum AVPixelFormat out_fmt; + + AVBufferRef *frames_ctx; + AVFrame *frame; +@@ -90,6 +75,7 @@ typedef struct CUDAScaleContext { + * Output sw format. AV_PIX_FMT_NONE for no conversion. + */ + enum AVPixelFormat format; ++ char *format_str; + + char *w_expr; ///< width expression string + char *h_expr; ///< height expression string +@@ -99,21 +85,56 @@ typedef struct CUDAScaleContext { + + CUcontext cu_ctx; + CUmodule cu_module; +- CUfunction cu_func; +- CUfunction cu_func_uv; ++ ++#define VARIANT(NAME) \ ++ CUfunction cu_func_ ## NAME; ++#define VARIANTSET(NAME) \ ++ VARIANT(NAME) \ ++ VARIANT(NAME ## _c) \ ++ VARIANT(NAME ## _p2) \ ++ VARIANT(NAME ## _2) \ ++ VARIANT(NAME ## _2_u) \ ++ VARIANT(NAME ## _2_v) \ ++ VARIANT(NAME ## _4) ++ ++ VARIANTSET(8_8) ++ VARIANTSET(16_16) ++ VARIANTSET(8_16) ++ VARIANTSET(16_8) ++#undef VARIANTSET ++#undef VARIANT ++ ++ CUfunction cu_func_luma; ++ CUfunction cu_func_chroma_u; ++ CUfunction cu_func_chroma_v; ++ + CUstream cu_stream; + +- int interp_algo; +- int interp_use_linear; +- int interp_as_integer; ++ CUdeviceptr srcBuffer; ++ CUdeviceptr dstBuffer; ++ int tex_alignment; + +- float param; ++ const AVPixFmtDescriptor *in_desc, *out_desc; ++ int in_planes, out_planes; ++ ++ CUdeviceptr ditherBuffer; ++ CUtexObject ditherTex; + } CUDAScaleContext; + + static av_cold int cudascale_init(AVFilterContext *ctx) + { + CUDAScaleContext *s = ctx->priv; + ++ if (!strcmp(s->format_str, "same")) { ++ s->format = AV_PIX_FMT_NONE; ++ } else { ++ s->format = av_get_pix_fmt(s->format_str); ++ if (s->format == AV_PIX_FMT_NONE) { ++ av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str); ++ return AVERROR(EINVAL); ++ } ++ } ++ + s->frame = av_frame_alloc(); + if (!s->frame) + return AVERROR(ENOMEM); +@@ -129,13 +150,22 @@ static av_cold void cudascale_uninit(AVF + { + CUDAScaleContext *s = ctx->priv; + +- if (s->hwctx && s->cu_module) { ++ if (s->hwctx) { + CudaFunctions *cu = s->hwctx->internal->cuda_dl; +- CUcontext dummy; ++ CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx; ++ ++ CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); ++ ++ if (s->ditherTex) { ++ CHECK_CU(cu->cuTexObjectDestroy(s->ditherTex)); ++ s->ditherTex = 0; ++ } ++ ++ if (s->ditherBuffer) { ++ CHECK_CU(cu->cuMemFree(s->ditherBuffer)); ++ s->ditherBuffer = 0; ++ } + +- CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx)); +- CHECK_CU(cu->cuModuleUnload(s->cu_module)); +- s->cu_module = NULL; + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + } + +@@ -191,32 +221,6 @@ static int format_is_supported(enum AVPi + return 0; + } + +-static av_cold void set_format_info(AVFilterContext *ctx, enum AVPixelFormat in_format, enum AVPixelFormat out_format) +-{ +- CUDAScaleContext *s = ctx->priv; +- int i, p, d; +- +- s->in_fmt = in_format; +- s->out_fmt = out_format; +- +- s->in_desc = av_pix_fmt_desc_get(s->in_fmt); +- s->out_desc = av_pix_fmt_desc_get(s->out_fmt); +- s->in_planes = av_pix_fmt_count_planes(s->in_fmt); +- s->out_planes = av_pix_fmt_count_planes(s->out_fmt); +- +- // find maximum step of each component of each plane +- // For our subset of formats, this should accurately tell us how many channels CUDA needs +- // i.e. 1 for Y plane, 2 for UV plane of NV12, 4 for single plane of RGB0 formats +- +- for (i = 0; i < s->in_desc->nb_components; i++) { +- d = (s->in_desc->comp[i].depth + 7) / 8; +- p = s->in_desc->comp[i].plane; +- s->in_plane_channels[p] = FFMAX(s->in_plane_channels[p], s->in_desc->comp[i].step / d); +- +- s->in_plane_depths[p] = s->in_desc->comp[i].depth; +- } +-} +- + static av_cold int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height, + int out_width, int out_height) + { +@@ -248,7 +252,8 @@ static av_cold int init_processing_chain + return AVERROR(ENOSYS); + } + +- set_format_info(ctx, in_format, out_format); ++ s->in_fmt = in_format; ++ s->out_fmt = out_format; + + if (s->passthrough && in_width == out_width && in_height == out_height && in_format == out_format) { + s->frames_ctx = av_buffer_ref(ctx->inputs[0]->hw_frames_ctx); +@@ -260,10 +265,6 @@ static av_cold int init_processing_chain + ret = init_hwframe_ctx(s, in_frames_ctx->device_ref, out_width, out_height); + if (ret < 0) + return ret; +- +- if (in_width == out_width && in_height == out_height && +- in_format == out_format && s->interp_algo == INTERP_ALGO_DEFAULT) +- s->interp_algo = INTERP_ALGO_NEAREST; + } + + ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->frames_ctx); +@@ -273,74 +274,60 @@ static av_cold int init_processing_chain + return 0; + } + +-static av_cold int cudascale_load_functions(AVFilterContext *ctx) ++static av_cold int cudascale_setup_dither(AVFilterContext *ctx) + { +- CUDAScaleContext *s = ctx->priv; +- CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx; +- CudaFunctions *cu = s->hwctx->internal->cuda_dl; +- char buf[128]; +- int ret; ++ CUDAScaleContext *s = ctx->priv; ++ AVFilterLink *inlink = ctx->inputs[0]; ++ AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; ++ AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; ++ CudaFunctions *cu = device_hwctx->internal->cuda_dl; ++ CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; ++ int ret = 0; + +- const char *in_fmt_name = av_get_pix_fmt_name(s->in_fmt); +- const char *out_fmt_name = av_get_pix_fmt_name(s->out_fmt); ++ CUDA_MEMCPY2D cpy = { ++ .srcMemoryType = CU_MEMORYTYPE_HOST, ++ .dstMemoryType = CU_MEMORYTYPE_DEVICE, ++ .srcHost = ff_fruit_dither_matrix, ++ .dstDevice = 0, ++ .srcPitch = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]), ++ .dstPitch = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]), ++ .WidthInBytes = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]), ++ .Height = ff_fruit_dither_size, ++ }; + +- const char *function_infix = ""; ++ CUDA_TEXTURE_DESC tex_desc = { ++ .filterMode = CU_TR_FILTER_MODE_POINT, ++ .flags = CU_TRSF_READ_AS_INTEGER, ++ }; + +- extern const unsigned char ff_vf_scale_cuda_ptx_data[]; +- extern const unsigned int ff_vf_scale_cuda_ptx_len; ++ CUDA_RESOURCE_DESC res_desc = { ++ .resType = CU_RESOURCE_TYPE_PITCH2D, ++ .res.pitch2D.format = CU_AD_FORMAT_UNSIGNED_INT16, ++ .res.pitch2D.numChannels = 1, ++ .res.pitch2D.width = ff_fruit_dither_size, ++ .res.pitch2D.height = ff_fruit_dither_size, ++ .res.pitch2D.pitchInBytes = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]), ++ .res.pitch2D.devPtr = 0, ++ }; + +- switch(s->interp_algo) { +- case INTERP_ALGO_NEAREST: +- function_infix = "Nearest"; +- s->interp_use_linear = 0; +- s->interp_as_integer = 1; +- break; +- case INTERP_ALGO_BILINEAR: +- function_infix = "Bilinear"; +- s->interp_use_linear = 1; +- s->interp_as_integer = 1; +- break; +- case INTERP_ALGO_DEFAULT: +- case INTERP_ALGO_BICUBIC: +- function_infix = "Bicubic"; +- s->interp_use_linear = 0; +- s->interp_as_integer = 0; +- break; +- case INTERP_ALGO_LANCZOS: +- function_infix = "Lanczos"; +- s->interp_use_linear = 0; +- s->interp_as_integer = 0; +- break; +- default: +- av_log(ctx, AV_LOG_ERROR, "Unknown interpolation algorithm\n"); +- return AVERROR_BUG; +- } ++ av_assert0(sizeof(ff_fruit_dither_matrix) == sizeof(ff_fruit_dither_matrix[0]) * ff_fruit_dither_size * ff_fruit_dither_size); + +- ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); +- if (ret < 0) ++ if ((ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx))) < 0) + return ret; + +- ret = ff_cuda_load_module(ctx, s->hwctx, &s->cu_module, +- ff_vf_scale_cuda_ptx_data, ff_vf_scale_cuda_ptx_len); +- if (ret < 0) ++ if ((ret = CHECK_CU(cu->cuMemAlloc(&s->ditherBuffer, sizeof(ff_fruit_dither_matrix)))) < 0) + goto fail; + +- snprintf(buf, sizeof(buf), "Subsample_%s_%s_%s", function_infix, in_fmt_name, out_fmt_name); +- ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func, s->cu_module, buf)); +- if (ret < 0) { +- av_log(ctx, AV_LOG_FATAL, "Unsupported conversion: %s -> %s\n", in_fmt_name, out_fmt_name); +- ret = AVERROR(ENOSYS); ++ res_desc.res.pitch2D.devPtr = cpy.dstDevice = s->ditherBuffer; ++ ++ if ((ret = CHECK_CU(cu->cuMemcpy2D(&cpy))) < 0) + goto fail; +- } + +- snprintf(buf, sizeof(buf), "Subsample_%s_%s_%s_uv", function_infix, in_fmt_name, out_fmt_name); +- ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uv, s->cu_module, buf)); +- if (ret < 0) ++ if ((ret = CHECK_CU(cu->cuTexObjectCreate(&s->ditherTex, &res_desc, &tex_desc, NULL))) < 0) + goto fail; + + fail: + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); +- + return ret; + } + +@@ -351,12 +338,50 @@ static av_cold int cudascale_config_prop + CUDAScaleContext *s = ctx->priv; + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; + AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; ++ CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; ++ CudaFunctions *cu = device_hwctx->internal->cuda_dl; + int w, h; ++ int i; + int ret; + ++ extern const unsigned char ff_vf_scale_cuda_ptx_data[]; ++ extern const unsigned int ff_vf_scale_cuda_ptx_len; ++ + s->hwctx = device_hwctx; + s->cu_stream = s->hwctx->stream; + ++ ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); ++ if (ret < 0) ++ goto fail; ++ ++ ret = ff_cuda_load_module(ctx, s->hwctx, &s->cu_module, ++ ff_vf_scale_cuda_ptx_data, ff_vf_scale_cuda_ptx_len); ++ if (ret < 0) ++ goto fail; ++ ++#define VARIANT(NAME) \ ++ CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ ## NAME, s->cu_module, "Subsample_Bilinear_" #NAME)); \ ++ if (ret < 0) \ ++ goto fail; ++ ++#define VARIANTSET(NAME) \ ++ VARIANT(NAME) \ ++ VARIANT(NAME ## _c) \ ++ VARIANT(NAME ## _2) \ ++ VARIANT(NAME ## _p2) \ ++ VARIANT(NAME ## _2_u) \ ++ VARIANT(NAME ## _2_v) \ ++ VARIANT(NAME ## _4) ++ ++ VARIANTSET(8_8) ++ VARIANTSET(16_16) ++ VARIANTSET(8_16) ++ VARIANTSET(16_8) ++#undef VARIANTSET ++#undef VARIANT ++ ++ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); ++ + if ((ret = ff_scale_eval_dimensions(s, + s->w_expr, s->h_expr, + inlink, outlink, +@@ -377,6 +402,56 @@ static av_cold int cudascale_config_prop + if (ret < 0) + return ret; + ++ s->in_desc = av_pix_fmt_desc_get(s->in_fmt); ++ s->out_desc = av_pix_fmt_desc_get(s->out_fmt); ++ ++ for (i = 0; i < s->in_desc->nb_components; i++) ++ s->in_planes = FFMAX(s->in_planes, s->in_desc ->comp[i].plane + 1); ++ ++ for (i = 0; i < s->in_desc->nb_components; i++) ++ s->out_planes = FFMAX(s->out_planes, s->out_desc->comp[i].plane + 1); ++ ++#define VARIANT(INDEPTH, OUTDEPTH, SUFFIX) s->cu_func_ ## INDEPTH ## _ ## OUTDEPTH ## SUFFIX ++#define BITS(n) ((n + 7) & ~7) ++#define VARIANTSET(INDEPTH, OUTDEPTH) \ ++ else if (BITS(s->in_desc->comp[0].depth) == INDEPTH && \ ++ BITS(s->out_desc->comp[0].depth) == OUTDEPTH) { \ ++ s->cu_func_luma = VARIANT(INDEPTH, OUTDEPTH,); \ ++ if (s->in_planes == 3 && s->out_planes == 3) { \ ++ s->cu_func_chroma_u = s->cu_func_chroma_v = VARIANT(INDEPTH, OUTDEPTH, _c); \ ++ } else if (s->in_planes == 3 && s->out_planes == 2) { \ ++ s->cu_func_chroma_u = s->cu_func_chroma_v = VARIANT(INDEPTH, OUTDEPTH, _p2); \ ++ } else if (s->in_planes == 2 && s->out_planes == 2) { \ ++ s->cu_func_chroma_u = VARIANT(INDEPTH, OUTDEPTH, _2); \ ++ } else if (s->in_planes == 2 && s->out_planes == 3) { \ ++ s->cu_func_chroma_u = VARIANT(INDEPTH, OUTDEPTH, _2_u); \ ++ s->cu_func_chroma_v = VARIANT(INDEPTH, OUTDEPTH, _2_v); \ ++ } else { \ ++ ret = AVERROR_BUG; \ ++ goto fail; \ ++ } \ ++ } ++ ++ if (0) {} ++ VARIANTSET(8, 8) ++ VARIANTSET(16, 16) ++ VARIANTSET(8, 16) ++ VARIANTSET(16, 8) ++ else { ++ ret = AVERROR_BUG; ++ goto fail; ++ } ++#undef VARIANTSET ++#undef VARIANT ++ ++ if (s->in_desc->comp[0].depth > s->out_desc->comp[0].depth) { ++ if ((ret = cudascale_setup_dither(ctx)) < 0) ++ goto fail; ++ } ++ ++ av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d%s\n", ++ inlink->w, inlink->h, outlink->w, outlink->h, s->passthrough ? " (passthrough)" : ""); ++ + if (inlink->sample_aspect_ratio.num) { + outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w, + outlink->w*inlink->h}, +@@ -385,118 +460,93 @@ static av_cold int cudascale_config_prop + outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; + } + +- av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d fmt:%s -> w:%d h:%d fmt:%s%s\n", +- inlink->w, inlink->h, av_get_pix_fmt_name(s->in_fmt), +- outlink->w, outlink->h, av_get_pix_fmt_name(s->out_fmt), +- s->passthrough ? " (passthrough)" : ""); +- +- ret = cudascale_load_functions(ctx); +- if (ret < 0) +- return ret; +- + return 0; + + fail: + return ret; + } + +-static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, +- CUtexObject src_tex[4], int src_width, int src_height, +- AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch) ++static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channels, ++ uint8_t *src_dptr, int src_width, int src_height, int src_pitch, ++ uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch, ++ int pixel_size) + { + CUDAScaleContext *s = ctx->priv; + CudaFunctions *cu = s->hwctx->internal->cuda_dl; ++ CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr; ++ CUtexObject tex = 0; ++ void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height, &s->ditherTex }; ++ int ret; + +- CUdeviceptr dst_devptr[4] = { +- (CUdeviceptr)out_frame->data[0], (CUdeviceptr)out_frame->data[1], +- (CUdeviceptr)out_frame->data[2], (CUdeviceptr)out_frame->data[3] ++ CUDA_TEXTURE_DESC tex_desc = { ++ .filterMode = CU_TR_FILTER_MODE_LINEAR, ++ .flags = CU_TRSF_READ_AS_INTEGER, + }; + +- void *args_uchar[] = { +- &src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3], +- &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3], +- &dst_width, &dst_height, &dst_pitch, +- &src_width, &src_height, &s->param ++ CUDA_RESOURCE_DESC res_desc = { ++ .resType = CU_RESOURCE_TYPE_PITCH2D, ++ .res.pitch2D.format = pixel_size == 1 ? ++ CU_AD_FORMAT_UNSIGNED_INT8 : ++ CU_AD_FORMAT_UNSIGNED_INT16, ++ .res.pitch2D.numChannels = channels, ++ .res.pitch2D.width = src_width, ++ .res.pitch2D.height = src_height, ++ .res.pitch2D.pitchInBytes = src_pitch, ++ .res.pitch2D.devPtr = (CUdeviceptr)src_dptr, + }; + +- return CHECK_CU(cu->cuLaunchKernel(func, +- DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, +- BLOCKX, BLOCKY, 1, 0, s->cu_stream, args_uchar, NULL)); ++ ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL)); ++ if (ret < 0) ++ goto exit; ++ ++ ret = CHECK_CU(cu->cuLaunchKernel(func, ++ DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, ++ BLOCKX, BLOCKY, 1, 0, s->cu_stream, args_uchar, NULL)); ++ ++exit: ++ if (tex) ++ CHECK_CU(cu->cuTexObjectDestroy(tex)); ++ ++ return ret; + } + + static int scalecuda_resize(AVFilterContext *ctx, + AVFrame *out, AVFrame *in) + { + CUDAScaleContext *s = ctx->priv; +- CudaFunctions *cu = s->hwctx->internal->cuda_dl; +- CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx; +- int i, ret; + +- CUtexObject tex[4] = { 0, 0, 0, 0 }; ++#define DEPTH_BYTES(depth) (((depth) + 7) / 8) + +- ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); +- if (ret < 0) +- return ret; +- +- for (i = 0; i < s->in_planes; i++) { +- CUDA_TEXTURE_DESC tex_desc = { +- .filterMode = s->interp_use_linear ? +- CU_TR_FILTER_MODE_LINEAR : +- CU_TR_FILTER_MODE_POINT, +- .flags = s->interp_as_integer ? CU_TRSF_READ_AS_INTEGER : 0, +- }; +- +- CUDA_RESOURCE_DESC res_desc = { +- .resType = CU_RESOURCE_TYPE_PITCH2D, +- .res.pitch2D.format = s->in_plane_depths[i] <= 8 ? +- CU_AD_FORMAT_UNSIGNED_INT8 : +- CU_AD_FORMAT_UNSIGNED_INT16, +- .res.pitch2D.numChannels = s->in_plane_channels[i], +- .res.pitch2D.pitchInBytes = in->linesize[i], +- .res.pitch2D.devPtr = (CUdeviceptr)in->data[i], +- }; +- +- if (i == 1 || i == 2) { +- res_desc.res.pitch2D.width = AV_CEIL_RSHIFT(in->width, s->in_desc->log2_chroma_w); +- res_desc.res.pitch2D.height = AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h); +- } else { +- res_desc.res.pitch2D.width = in->width; +- res_desc.res.pitch2D.height = in->height; +- } +- +- ret = CHECK_CU(cu->cuTexObjectCreate(&tex[i], &res_desc, &tex_desc, NULL)); +- if (ret < 0) +- goto exit; +- } +- +- // scale primary plane(s). Usually Y (and A), or single plane of RGB frames. +- ret = call_resize_kernel(ctx, s->cu_func, +- tex, in->width, in->height, +- out, out->width, out->height, out->linesize[0]); +- if (ret < 0) +- goto exit; +- +- if (s->out_planes > 1) { +- // scale UV plane. Scale function sets both U and V plane, or singular interleaved plane. +- ret = call_resize_kernel(ctx, s->cu_func_uv, tex, +- AV_CEIL_RSHIFT(in->width, s->in_desc->log2_chroma_w), +- AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h), +- out, +- AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w), +- AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h), +- out->linesize[1]); +- if (ret < 0) +- goto exit; ++ call_resize_kernel(ctx, s->cu_func_luma, 1, ++ in->data[0], in->width, in->height, in->linesize[0], ++ out->data[0], out->width, out->height, out->linesize[0], ++ DEPTH_BYTES(s->in_desc->comp[0].depth)); ++ ++ call_resize_kernel(ctx, s->cu_func_chroma_u, s->in_planes == 2 ? 2 : 1, ++ in->data[1], ++ AV_CEIL_RSHIFT(in->width, s->in_desc->log2_chroma_w), ++ AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h), ++ in->linesize[1], ++ out->data[1], ++ AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w), ++ AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h), ++ out->linesize[1], ++ DEPTH_BYTES(s->in_desc->comp[1].depth)); ++ ++ if (s->cu_func_chroma_v) { ++ call_resize_kernel(ctx, s->cu_func_chroma_v, s->in_planes == 2 ? 2 : 1, ++ in->data[s->in_desc->comp[2].plane], ++ AV_CEIL_RSHIFT(in->width, s->in_desc->log2_chroma_w), ++ AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h), ++ in->linesize[s->in_desc->comp[2].plane], ++ out->data[s->out_desc->comp[2].plane] + s->out_desc->comp[2].offset, ++ AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w), ++ AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h), ++ out->linesize[s->out_desc->comp[2].plane], ++ DEPTH_BYTES(s->in_desc->comp[2].depth)); + } + +-exit: +- for (i = 0; i < s->in_planes; i++) +- if (tex[i]) +- CHECK_CU(cu->cuTexObjectDestroy(tex[i])); +- +- CHECK_CU(cu->cuCtxPopCurrent(&dummy)); +- +- return ret; ++ return 0; + } + + static int cudascale_scale(AVFilterContext *ctx, AVFrame *out, AVFrame *in) +@@ -583,21 +633,15 @@ static AVFrame *cudascale_get_video_buff + #define OFFSET(x) offsetof(CUDAScaleContext, x) + #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM) + static const AVOption options[] = { +- { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS }, +- { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS }, +- { "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = INTERP_ALGO_DEFAULT }, 0, INTERP_ALGO_COUNT - 1, FLAGS, .unit = "interp_algo" }, +- { "nearest", "nearest neighbour", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_NEAREST }, 0, 0, FLAGS, .unit = "interp_algo" }, +- { "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BILINEAR }, 0, 0, FLAGS, .unit = "interp_algo" }, +- { "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BICUBIC }, 0, 0, FLAGS, .unit = "interp_algo" }, +- { "lanczos", "lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_LANCZOS }, 0, 0, FLAGS, .unit = "interp_algo" }, +- { "format", "Output video pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, { .i64 = AV_PIX_FMT_NONE }, INT_MIN, INT_MAX, .flags=FLAGS }, ++ { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS }, ++ { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS }, ++ { "format", "Output format", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, + { "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, +- { "param", "Algorithm-Specific parameter", OFFSET(param), AV_OPT_TYPE_FLOAT, { .dbl = SCALE_CUDA_PARAM_DEFAULT }, -FLT_MAX, FLT_MAX, FLAGS }, +- { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, FLAGS, .unit = "force_oar" }, +- { "disable", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, .unit = "force_oar" }, +- { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, .unit = "force_oar" }, +- { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, .unit = "force_oar" }, +- { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256, FLAGS }, ++ { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0}, 0, 2, FLAGS, .unit = "force_oar" }, ++ { "disable", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, .unit = "force_oar" }, ++ { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, .unit = "force_oar" }, ++ { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, .unit = "force_oar" }, ++ { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS }, + { NULL }, + }; + +@@ -610,9 +654,9 @@ static const AVClass cudascale_class = { + + static const AVFilterPad cudascale_inputs[] = { + { +- .name = "default", +- .type = AVMEDIA_TYPE_VIDEO, +- .filter_frame = cudascale_filter_frame, ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .filter_frame = cudascale_filter_frame, + .get_buffer.video = cudascale_get_video_buffer, + }, + }; +@@ -626,14 +670,14 @@ static const AVFilterPad cudascale_outpu + }; + + const AVFilter ff_vf_scale_cuda = { +- .name = "scale_cuda", +- .description = NULL_IF_CONFIG_SMALL("GPU accelerated video resizer"), ++ .name = "scale_cuda", ++ .description = NULL_IF_CONFIG_SMALL("GPU accelerated video resizer"), + + .init = cudascale_init, + .uninit = cudascale_uninit, + +- .priv_size = sizeof(CUDAScaleContext), +- .priv_class = &cudascale_class, ++ .priv_size = sizeof(CUDAScaleContext), ++ .priv_class = &cudascale_class, + + FILTER_INPUTS(cudascale_inputs), + FILTER_OUTPUTS(cudascale_outputs), +Index: FFmpeg/libavfilter/vf_scale_cuda.cu +=================================================================== +--- libavfilter/vf_scale_cuda.cu ++++ libavfilter/vf_scale_cuda.cu +@@ -1,5 +1,5 @@ + /* +- * This file is part of FFmpeg. ++ * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), +@@ -20,1306 +20,174 @@ + * DEALINGS IN THE SOFTWARE. + */ + +-#include "cuda/vector_helpers.cuh" +-#include "vf_scale_cuda.h" ++typedef unsigned char uchar; ++typedef unsigned short ushort; + +-template +-using subsample_function_t = T (*)(cudaTextureObject_t tex, int xo, int yo, +- int dst_width, int dst_height, +- int src_width, int src_height, +- int bit_depth, float param); +- +-// --- CONVERSION LOGIC --- +- +-static const ushort mask_10bit = 0xFFC0; +-static const ushort mask_16bit = 0xFFFF; +- +-static inline __device__ ushort conv_8to16(uchar in, ushort mask) +-{ +- return ((ushort)in | ((ushort)in << 8)) & mask; +-} +- +-static inline __device__ uchar conv_16to8(ushort in) +-{ +- return in >> 8; +-} +- +-static inline __device__ uchar conv_10to8(ushort in) +-{ +- return in >> 8; +-} +- +-static inline __device__ ushort conv_10to16(ushort in) +-{ +- return in | (in >> 10); +-} +- +-static inline __device__ ushort conv_16to10(ushort in) +-{ +- return in & mask_10bit; +-} +- +-#define DEF_F(N, T) \ +- template subsample_func_y, \ +- subsample_function_t subsample_func_uv> \ +- __device__ static inline void N(cudaTextureObject_t src_tex[4], T *dst[4], int xo, int yo, \ +- int dst_width, int dst_height, int dst_pitch, \ +- int src_width, int src_height, float param) +- +-#define SUB_F(m, plane) \ +- subsample_func_##m(src_tex[plane], xo, yo, \ +- dst_width, dst_height, \ +- src_width, src_height, \ +- in_bit_depth, param) +- +-// FFmpeg passes pitch in bytes, CUDA uses potentially larger types +-#define FIXED_PITCH \ +- (dst_pitch/sizeof(*dst[0])) +- +-#define DEFAULT_DST(n) \ +- dst[n][yo*FIXED_PITCH+xo] +- +-// yuv420p->X +- +-struct Convert_yuv420p_yuv420p +-{ +- static const int in_bit_depth = 8; +- typedef uchar in_T; +- typedef uchar in_T_uv; +- typedef uchar out_T; +- typedef uchar out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = SUB_F(y, 0); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = SUB_F(uv, 1); +- DEFAULT_DST(2) = SUB_F(uv, 2); +- } +-}; +- +-struct Convert_yuv420p_nv12 +-{ +- static const int in_bit_depth = 8; +- typedef uchar in_T; +- typedef uchar in_T_uv; +- typedef uchar out_T; +- typedef uchar2 out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = SUB_F(y, 0); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = make_uchar2( +- SUB_F(uv, 1), +- SUB_F(uv, 2) +- ); +- } +-}; +- +-struct Convert_yuv420p_yuv444p +-{ +- static const int in_bit_depth = 8; +- typedef uchar in_T; +- typedef uchar in_T_uv; +- typedef uchar out_T; +- typedef uchar out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = SUB_F(y, 0); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = SUB_F(uv, 1); +- DEFAULT_DST(2) = SUB_F(uv, 2); +- } +-}; ++#define SHIFTDOWN(val) (dstbase)(val >> abs(2 + shift)) ++#define SHIFTUP(val) (dstbase)(val << abs(-shift - 2)) + +-struct Convert_yuv420p_p010le ++template struct add_conv_shift1_d + { +- static const int in_bit_depth = 8; +- typedef uchar in_T; +- typedef uchar in_T_uv; +- typedef ushort out_T; +- typedef ushort2 out_T_uv; ++ typedef DST dstbase; + +- DEF_F(Convert, out_T) ++ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) + { +- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit); +- } ++ unsigned ret = (unsigned)i1 + (unsigned)i2 + (unsigned)i3 + (unsigned)i4 + ((1 + d) >> (sizeof(SRC) * 8 - dither + 3)); + +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = make_ushort2( +- conv_8to16(SUB_F(uv, 1), mask_10bit), +- conv_8to16(SUB_F(uv, 2), mask_10bit) +- ); ++ if (shift > -2) ++ return SHIFTDOWN(ret); ++ else ++ return SHIFTUP(ret); + } + }; + +-struct Convert_yuv420p_p016le ++template struct add_conv_shift1 + { +- static const int in_bit_depth = 8; +- typedef uchar in_T; +- typedef uchar in_T_uv; +- typedef ushort out_T; +- typedef ushort2 out_T_uv; ++ typedef DST dstbase; + +- DEF_F(Convert, out_T) ++ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) + { +- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); +- } ++ unsigned ret = (unsigned)i1 + (unsigned)i2 + (unsigned)i3 + (unsigned)i4 + 2; + +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = make_ushort2( +- conv_8to16(SUB_F(uv, 1), mask_16bit), +- conv_8to16(SUB_F(uv, 2), mask_16bit) +- ); ++ if (shift > -2) ++ return SHIFTDOWN(ret); ++ else ++ return SHIFTUP(ret); + } + }; + +-struct Convert_yuv420p_yuv444p16le ++template struct add_conv_shift2 + { +- static const int in_bit_depth = 8; +- typedef uchar in_T; +- typedef uchar in_T_uv; +- typedef ushort out_T; +- typedef ushort out_T_uv; ++ typedef decltype(DST::x) dstbase; + +- DEF_F(Convert, out_T) ++ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) + { +- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); +- } ++ unsigned retx = (unsigned)i1.x + (unsigned)i2.x + (unsigned)i3.x + (unsigned)i4.x + 2; ++ unsigned rety = (unsigned)i1.y + (unsigned)i2.y + (unsigned)i3.y + (unsigned)i4.y + 2; + +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit); +- DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit); ++ if (shift > -2) ++ return { SHIFTDOWN(retx), SHIFTDOWN(rety) }; ++ else ++ return { SHIFTUP(retx), SHIFTUP(rety) }; + } + }; + +-// nv12->X +- +-struct Convert_nv12_yuv420p ++template struct add_conv_shift2_x + { +- static const int in_bit_depth = 8; +- typedef uchar in_T; +- typedef uchar2 in_T_uv; +- typedef uchar out_T; +- typedef uchar out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = SUB_F(y, 0); +- } +- +- DEF_F(Convert_uv, out_T_uv) ++ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) + { +- in_T_uv res = SUB_F(uv, 1); +- DEFAULT_DST(1) = res.x; +- DEFAULT_DST(2) = res.y; ++ return add_conv_shift1()(i1.x, i2.x, i3.x, i4.x, d); + } + }; + +-struct Convert_nv12_nv12 ++template struct add_conv_shift2_y + { +- static const int in_bit_depth = 8; +- typedef uchar in_T; +- typedef uchar2 in_T_uv; +- typedef uchar out_T; +- typedef uchar2 out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = SUB_F(y, 0); +- } +- +- DEF_F(Convert_uv, out_T_uv) ++ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) + { +- DEFAULT_DST(1) = SUB_F(uv, 1); ++ return add_conv_shift1()(i1.y, i2.y, i3.y, i4.y, d); + } + }; + +-struct Convert_nv12_yuv444p ++template struct add_conv_shift3 + { +- static const int in_bit_depth = 8; +- typedef uchar in_T; +- typedef uchar2 in_T_uv; +- typedef uchar out_T; +- typedef uchar out_T_uv; ++ typedef decltype(DST::x) dstbase; + +- DEF_F(Convert, out_T) ++ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) + { +- DEFAULT_DST(0) = SUB_F(y, 0); +- } ++ unsigned retx = (unsigned)i1.x + (unsigned)i2.x + (unsigned)i3.x + (unsigned)i4.x + 2; ++ unsigned rety = (unsigned)i1.y + (unsigned)i2.y + (unsigned)i3.y + (unsigned)i4.y + 2; ++ unsigned retz = (unsigned)i1.z + (unsigned)i2.z + (unsigned)i3.z + (unsigned)i4.z + 2; + +- DEF_F(Convert_uv, out_T_uv) +- { +- in_T_uv res = SUB_F(uv, 1); +- DEFAULT_DST(1) = res.x; +- DEFAULT_DST(2) = res.y; ++ if (shift > -2) ++ return { SHIFTDOWN(retx), SHIFTDOWN(rety), SHIFTDOWN(retz) }; ++ else ++ return { SHIFTUP(retx), SHIFTUP(rety), SHIFTUP(retz) }; + } + }; + +-struct Convert_nv12_p010le ++template struct add_conv_shift4 + { +- static const int in_bit_depth = 8; +- typedef uchar in_T; +- typedef uchar2 in_T_uv; +- typedef ushort out_T; +- typedef ushort2 out_T_uv; ++ typedef decltype(DST::x) dstbase; + +- DEF_F(Convert, out_T) ++ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) + { +- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit); +- } ++ unsigned retx = (unsigned)i1.x + (unsigned)i2.x + (unsigned)i3.x + (unsigned)i4.x + 2; ++ unsigned rety = (unsigned)i1.y + (unsigned)i2.y + (unsigned)i3.y + (unsigned)i4.y + 2; ++ unsigned retz = (unsigned)i1.z + (unsigned)i2.z + (unsigned)i3.z + (unsigned)i4.z + 2; ++ unsigned retw = (unsigned)i1.w + (unsigned)i2.w + (unsigned)i3.w + (unsigned)i4.w + 2; + +- DEF_F(Convert_uv, out_T_uv) +- { +- in_T_uv res = SUB_F(uv, 1); +- DEFAULT_DST(1) = make_ushort2( +- conv_8to16(res.x, mask_10bit), +- conv_8to16(res.y, mask_10bit) +- ); ++ if (shift > -2) ++ return { SHIFTDOWN(retx), SHIFTDOWN(rety), SHIFTDOWN(retz), SHIFTDOWN(retw) }; ++ else ++ return { SHIFTUP(retx), SHIFTUP(rety), SHIFTUP(retz), SHIFTUP(retw) }; + } + }; + +-struct Convert_nv12_p016le +-{ +- static const int in_bit_depth = 8; +- typedef uchar in_T; +- typedef uchar2 in_T_uv; +- typedef ushort out_T; +- typedef ushort2 out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- in_T_uv res = SUB_F(uv, 1); +- DEFAULT_DST(1) = make_ushort2( +- conv_8to16(res.x, mask_16bit), +- conv_8to16(res.y, mask_16bit) +- ); +- } +-}; +- +-struct Convert_nv12_yuv444p16le +-{ +- static const int in_bit_depth = 8; +- typedef uchar in_T; +- typedef uchar2 in_T_uv; +- typedef ushort out_T; +- typedef ushort out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- in_T_uv res = SUB_F(uv, 1); +- DEFAULT_DST(1) = conv_8to16(res.x, mask_16bit); +- DEFAULT_DST(2) = conv_8to16(res.y, mask_16bit); +- } +-}; +- +-// yuv444p->X +- +-struct Convert_yuv444p_yuv420p +-{ +- static const int in_bit_depth = 8; +- typedef uchar in_T; +- typedef uchar in_T_uv; +- typedef uchar out_T; +- typedef uchar out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = SUB_F(y, 0); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = SUB_F(uv, 1); +- DEFAULT_DST(2) = SUB_F(uv, 2); +- } +-}; +- +-struct Convert_yuv444p_nv12 +-{ +- static const int in_bit_depth = 8; +- typedef uchar in_T; +- typedef uchar in_T_uv; +- typedef uchar out_T; +- typedef uchar2 out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = SUB_F(y, 0); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = make_uchar2( +- SUB_F(uv, 1), +- SUB_F(uv, 2) +- ); +- } +-}; +- +-struct Convert_yuv444p_yuv444p +-{ +- static const int in_bit_depth = 8; +- typedef uchar in_T; +- typedef uchar in_T_uv; +- typedef uchar out_T; +- typedef uchar out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = SUB_F(y, 0); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = SUB_F(uv, 1); +- DEFAULT_DST(2) = SUB_F(uv, 2); +- } +-}; +- +-struct Convert_yuv444p_p010le +-{ +- static const int in_bit_depth = 8; +- typedef uchar in_T; +- typedef uchar in_T_uv; +- typedef ushort out_T; +- typedef ushort2 out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = make_ushort2( +- conv_8to16(SUB_F(uv, 1), mask_10bit), +- conv_8to16(SUB_F(uv, 2), mask_10bit) +- ); +- } +-}; +- +-struct Convert_yuv444p_p016le +-{ +- static const int in_bit_depth = 8; +- typedef uchar in_T; +- typedef uchar in_T_uv; +- typedef ushort out_T; +- typedef ushort2 out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = make_ushort2( +- conv_8to16(SUB_F(uv, 1), mask_16bit), +- conv_8to16(SUB_F(uv, 2), mask_16bit) +- ); +- } +-}; +- +-struct Convert_yuv444p_yuv444p16le +-{ +- static const int in_bit_depth = 8; +- typedef uchar in_T; +- typedef uchar in_T_uv; +- typedef ushort out_T; +- typedef ushort out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit); +- DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit); +- } +-}; +- +-// p010le->X +- +-struct Convert_p010le_yuv420p +-{ +- static const int in_bit_depth = 10; +- typedef ushort in_T; +- typedef ushort2 in_T_uv; +- typedef uchar out_T; +- typedef uchar out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0)); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- in_T_uv res = SUB_F(uv, 1); +- DEFAULT_DST(1) = conv_10to8(res.x); +- DEFAULT_DST(2) = conv_10to8(res.y); +- } +-}; +- +-struct Convert_p010le_nv12 +-{ +- static const int in_bit_depth = 10; +- typedef ushort in_T; +- typedef ushort2 in_T_uv; +- typedef uchar out_T; +- typedef uchar2 out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0)); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- in_T_uv res = SUB_F(uv, 1); +- DEFAULT_DST(1) = make_uchar2( +- conv_10to8(res.x), +- conv_10to8(res.y) +- ); +- } +-}; +- +-struct Convert_p010le_yuv444p +-{ +- static const int in_bit_depth = 10; +- typedef ushort in_T; +- typedef ushort2 in_T_uv; +- typedef uchar out_T; +- typedef uchar out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0)); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- in_T_uv res = SUB_F(uv, 1); +- DEFAULT_DST(1) = conv_10to8(res.x); +- DEFAULT_DST(2) = conv_10to8(res.y); +- } +-}; +- +-struct Convert_p010le_p010le +-{ +- static const int in_bit_depth = 10; +- typedef ushort in_T; +- typedef ushort2 in_T_uv; +- typedef ushort out_T; +- typedef ushort2 out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = SUB_F(y, 0); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = SUB_F(uv, 1); +- } +-}; +- +-struct Convert_p010le_p016le +-{ +- static const int in_bit_depth = 10; +- typedef ushort in_T; +- typedef ushort2 in_T_uv; +- typedef ushort out_T; +- typedef ushort2 out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0)); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- in_T_uv res = SUB_F(uv, 1); +- DEFAULT_DST(1) = make_ushort2( +- conv_10to16(res.x), +- conv_10to16(res.y) +- ); +- } +-}; +- +-struct Convert_p010le_yuv444p16le +-{ +- static const int in_bit_depth = 10; +- typedef ushort in_T; +- typedef ushort2 in_T_uv; +- typedef ushort out_T; +- typedef ushort out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0)); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- in_T_uv res = SUB_F(uv, 1); +- DEFAULT_DST(1) = conv_10to16(res.x); +- DEFAULT_DST(2) = conv_10to16(res.y); +- } +-}; +- +-// p016le->X +- +-struct Convert_p016le_yuv420p +-{ +- static const int in_bit_depth = 16; +- typedef ushort in_T; +- typedef ushort2 in_T_uv; +- typedef uchar out_T; +- typedef uchar out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- in_T_uv res = SUB_F(uv, 1); +- DEFAULT_DST(1) = conv_16to8(res.x); +- DEFAULT_DST(2) = conv_16to8(res.y); +- } +-}; +- +-struct Convert_p016le_nv12 +-{ +- static const int in_bit_depth = 16; +- typedef ushort in_T; +- typedef ushort2 in_T_uv; +- typedef uchar out_T; +- typedef uchar2 out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- in_T_uv res = SUB_F(uv, 1); +- DEFAULT_DST(1) = make_uchar2( +- conv_16to8(res.x), +- conv_16to8(res.y) +- ); +- } +-}; +- +-struct Convert_p016le_yuv444p +-{ +- static const int in_bit_depth = 16; +- typedef ushort in_T; +- typedef ushort2 in_T_uv; +- typedef uchar out_T; +- typedef uchar out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- in_T_uv res = SUB_F(uv, 1); +- DEFAULT_DST(1) = conv_16to8(res.x); +- DEFAULT_DST(2) = conv_16to8(res.y); +- } +-}; +- +-struct Convert_p016le_p010le +-{ +- static const int in_bit_depth = 16; +- typedef ushort in_T; +- typedef ushort2 in_T_uv; +- typedef ushort out_T; +- typedef ushort2 out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0)); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- in_T_uv res = SUB_F(uv, 1); +- DEFAULT_DST(1) = make_ushort2( +- conv_16to10(res.x), +- conv_16to10(res.y) +- ); +- } +-}; +- +-struct Convert_p016le_p016le +-{ +- static const int in_bit_depth = 16; +- typedef ushort in_T; +- typedef ushort2 in_T_uv; +- typedef ushort out_T; +- typedef ushort2 out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = SUB_F(y, 0); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = SUB_F(uv, 1); +- } +-}; +- +-struct Convert_p016le_yuv444p16le +-{ +- static const int in_bit_depth = 16; +- typedef ushort in_T; +- typedef ushort2 in_T_uv; +- typedef ushort out_T; +- typedef ushort out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = SUB_F(y, 0); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- in_T_uv res = SUB_F(uv, 1); +- DEFAULT_DST(1) = res.x; +- DEFAULT_DST(2) = res.y; +- } +-}; +- +-// yuv444p16le->X +- +-struct Convert_yuv444p16le_yuv420p +-{ +- static const int in_bit_depth = 16; +- typedef ushort in_T; +- typedef ushort in_T_uv; +- typedef uchar out_T; +- typedef uchar out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1)); +- DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2)); +- } +-}; +- +-struct Convert_yuv444p16le_nv12 +-{ +- static const int in_bit_depth = 16; +- typedef ushort in_T; +- typedef ushort in_T_uv; +- typedef uchar out_T; +- typedef uchar2 out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = make_uchar2( +- conv_16to8(SUB_F(uv, 1)), +- conv_16to8(SUB_F(uv, 2)) +- ); +- } +-}; +- +-struct Convert_yuv444p16le_yuv444p +-{ +- static const int in_bit_depth = 16; +- typedef ushort in_T; +- typedef ushort in_T_uv; +- typedef uchar out_T; +- typedef uchar out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1)); +- DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2)); +- } +-}; +- +-struct Convert_yuv444p16le_p010le +-{ +- static const int in_bit_depth = 16; +- typedef ushort in_T; +- typedef ushort in_T_uv; +- typedef ushort out_T; +- typedef ushort2 out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0)); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = make_ushort2( +- conv_16to10(SUB_F(uv, 1)), +- conv_16to10(SUB_F(uv, 2)) +- ); +- } +-}; +- +-struct Convert_yuv444p16le_p016le +-{ +- static const int in_bit_depth = 16; +- typedef ushort in_T; +- typedef ushort in_T_uv; +- typedef ushort out_T; +- typedef ushort2 out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = SUB_F(y, 0); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = make_ushort2( +- SUB_F(uv, 1), +- SUB_F(uv, 2) +- ); +- } +-}; +- +-struct Convert_yuv444p16le_yuv444p16le +-{ +- static const int in_bit_depth = 16; +- typedef ushort in_T; +- typedef ushort in_T_uv; +- typedef ushort out_T; +- typedef ushort out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- DEFAULT_DST(0) = SUB_F(y, 0); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- DEFAULT_DST(1) = SUB_F(uv, 1); +- DEFAULT_DST(2) = SUB_F(uv, 2); +- } +-}; +- +-#define DEF_CONVERT_IDENTITY(fmt1, fmt2)\ +- \ +-struct Convert_##fmt1##_##fmt2 \ +-{ \ +- static const int in_bit_depth = 8; \ +- typedef uchar4 in_T; \ +- typedef uchar in_T_uv; \ +- typedef uchar4 out_T; \ +- typedef uchar out_T_uv; \ +- \ +- DEF_F(Convert, out_T) \ +- { \ +- DEFAULT_DST(0) = SUB_F(y, 0); \ +- } \ +- \ +- DEF_F(Convert_uv, out_T_uv) \ +- { \ +- } \ +-}; \ +- +-#define DEF_CONVERT_REORDER(fmt1, fmt2) \ +- \ +-struct Convert_##fmt1##_##fmt2 \ +-{ \ +- static const int in_bit_depth = 8; \ +- typedef uchar4 in_T; \ +- typedef uchar in_T_uv; \ +- typedef uchar4 out_T; \ +- typedef uchar out_T_uv; \ +- \ +- DEF_F(Convert, out_T) \ +- { \ +- uchar4 res = SUB_F(y, 0); \ +- DEFAULT_DST(0) = make_uchar4( \ +- res.z, \ +- res.y, \ +- res.x, \ +- res.w \ +- ); \ +- } \ +- \ +- DEF_F(Convert_uv, out_T_uv) \ +- { \ +- } \ +-}; \ +- +-#define DEF_CONVERT_RGB(fmt1, fmt2) \ +- \ +-DEF_CONVERT_IDENTITY(fmt1, fmt1) \ +-DEF_CONVERT_REORDER (fmt1, fmt2) \ +-DEF_CONVERT_REORDER (fmt2, fmt1) \ +-DEF_CONVERT_IDENTITY(fmt2, fmt2) +- +-DEF_CONVERT_RGB(rgb0, bgr0) +-DEF_CONVERT_RGB(rgba, bgra) +-DEF_CONVERT_IDENTITY(rgba, rgb0) +-DEF_CONVERT_IDENTITY(bgra, bgr0) +-DEF_CONVERT_REORDER(rgba, bgr0) +-DEF_CONVERT_REORDER(bgra, rgb0) +- +-struct Convert_bgr0_bgra ++template class conv, int pitch, int shift, int dither> ++__inline__ __device__ void Subsample_Bilinear(cudaTextureObject_t tex, ++ DST *dst, ++ int dst_width, int dst_height, int dst_pitch, ++ int src_width, int src_height, ++ cudaTextureObject_t ditherTex) + { +- static const int in_bit_depth = 8; +- typedef uchar4 in_T; +- typedef uchar in_T_uv; +- typedef uchar4 out_T; +- typedef uchar out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- uchar4 res = SUB_F(y, 0); +- DEFAULT_DST(0) = make_uchar4( +- res.x, +- res.y, +- res.z, +- 1 +- ); +- } ++ int xo = blockIdx.x * blockDim.x + threadIdx.x; ++ int yo = blockIdx.y * blockDim.y + threadIdx.y; + +- DEF_F(Convert_uv, out_T_uv) ++ if (yo < dst_height && xo < dst_width) + { +- } +-}; ++ float hscale = (float)src_width / (float)dst_width; ++ float vscale = (float)src_height / (float)dst_height; ++ float xi = (xo + 0.5f) * hscale; ++ float yi = (yo + 0.5f) * vscale; ++ // 3-tap filter weights are {wh,1.0,wh} and {wv,1.0,wv} ++ float wh = min(max(0.5f * (hscale - 1.0f), 0.0f), 1.0f); ++ float wv = min(max(0.5f * (vscale - 1.0f), 0.0f), 1.0f); ++ // Convert weights to two bilinear weights -> {wh,1.0,wh} -> {wh,0.5,0} + {0,0.5,wh} ++ float dx = wh / (0.5f + wh); ++ float dy = wv / (0.5f + wv); ++ ++ SRC i0 = tex2D(tex, xi-dx, yi-dy); ++ SRC i1 = tex2D(tex, xi+dx, yi-dy); ++ SRC i2 = tex2D(tex, xi-dx, yi+dy); ++ SRC i3 = tex2D(tex, xi+dx, yi+dy); + +-struct Convert_bgr0_rgba +-{ +- static const int in_bit_depth = 8; +- typedef uchar4 in_T; +- typedef uchar in_T_uv; +- typedef uchar4 out_T; +- typedef uchar out_T_uv; ++ ushort ditherVal = dither ? tex2D(ditherTex, xo, yo) : 0; + +- DEF_F(Convert, out_T) +- { +- uchar4 res = SUB_F(y, 0); +- DEFAULT_DST(0) = make_uchar4( +- res.z, +- res.y, +- res.x, +- 1 +- ); ++ dst[yo*(dst_pitch / sizeof(DST))+xo*pitch] = conv()(i0, i1, i2, i3, ditherVal); + } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- } +-}; +- +-struct Convert_rgb0_bgra +-{ +- static const int in_bit_depth = 8; +- typedef uchar4 in_T; +- typedef uchar in_T_uv; +- typedef uchar4 out_T; +- typedef uchar out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- uchar4 res = SUB_F(y, 0); +- DEFAULT_DST(0) = make_uchar4( +- res.z, +- res.y, +- res.x, +- 1 +- ); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- } +-}; +- +-struct Convert_rgb0_rgba +-{ +- static const int in_bit_depth = 8; +- typedef uchar4 in_T; +- typedef uchar in_T_uv; +- typedef uchar4 out_T; +- typedef uchar out_T_uv; +- +- DEF_F(Convert, out_T) +- { +- uchar4 res = SUB_F(y, 0); +- DEFAULT_DST(0) = make_uchar4( +- res.x, +- res.y, +- res.z, +- 1 +- ); +- } +- +- DEF_F(Convert_uv, out_T_uv) +- { +- } +-}; +- +-// --- SCALING LOGIC --- +- +-typedef float4 (*coeffs_function_t)(float, float); +- +-__device__ static inline float4 lanczos_coeffs(float x, float param) +-{ +- const float pi = 3.141592654f; +- +- float4 res = make_float4( +- pi * (x + 1), +- pi * x, +- pi * (x - 1), +- pi * (x - 2)); +- +- res.x = res.x == 0.0f ? 1.0f : +- __sinf(res.x) * __sinf(res.x / 2.0f) / (res.x * res.x / 2.0f); +- res.y = res.y == 0.0f ? 1.0f : +- __sinf(res.y) * __sinf(res.y / 2.0f) / (res.y * res.y / 2.0f); +- res.z = res.z == 0.0f ? 1.0f : +- __sinf(res.z) * __sinf(res.z / 2.0f) / (res.z * res.z / 2.0f); +- res.w = res.w == 0.0f ? 1.0f : +- __sinf(res.w) * __sinf(res.w / 2.0f) / (res.w * res.w / 2.0f); +- +- return res / (res.x + res.y + res.z + res.w); +-} +- +-__device__ static inline float4 bicubic_coeffs(float x, float param) +-{ +- const float A = param == SCALE_CUDA_PARAM_DEFAULT ? 0.0f : -param; +- +- float4 res; +- res.x = ((A * (x + 1) - 5 * A) * (x + 1) + 8 * A) * (x + 1) - 4 * A; +- res.y = ((A + 2) * x - (A + 3)) * x * x + 1; +- res.z = ((A + 2) * (1 - x) - (A + 3)) * (1 - x) * (1 - x) + 1; +- res.w = 1.0f - res.x - res.y - res.z; +- +- return res; +-} +- +-template +-__device__ static inline V apply_coeffs(float4 coeffs, V c0, V c1, V c2, V c3) +-{ +- V res = c0 * coeffs.x; +- res += c1 * coeffs.y; +- res += c2 * coeffs.z; +- res += c3 * coeffs.w; +- +- return res; +-} +- +-template +-__device__ static inline T Subsample_Nearest(cudaTextureObject_t tex, +- int xo, int yo, +- int dst_width, int dst_height, +- int src_width, int src_height, +- int bit_depth, float param) +-{ +- float hscale = (float)src_width / (float)dst_width; +- float vscale = (float)src_height / (float)dst_height; +- float xi = (xo + 0.5f) * hscale; +- float yi = (yo + 0.5f) * vscale; +- +- return tex2D(tex, xi, yi); +-} +- +-template +-__device__ static inline T Subsample_Bilinear(cudaTextureObject_t tex, +- int xo, int yo, +- int dst_width, int dst_height, +- int src_width, int src_height, +- int bit_depth, float param) +-{ +- float hscale = (float)src_width / (float)dst_width; +- float vscale = (float)src_height / (float)dst_height; +- float xi = (xo + 0.5f) * hscale; +- float yi = (yo + 0.5f) * vscale; +- // 3-tap filter weights are {wh,1.0,wh} and {wv,1.0,wv} +- float wh = min(max(0.5f * (hscale - 1.0f), 0.0f), 1.0f); +- float wv = min(max(0.5f * (vscale - 1.0f), 0.0f), 1.0f); +- // Convert weights to two bilinear weights -> {wh,1.0,wh} -> {wh,0.5,0} + {0,0.5,wh} +- float dx = wh / (0.5f + wh); +- float dy = wv / (0.5f + wv); +- +- intT r; +- vec_set_scalar(r, 2); +- r += tex2D(tex, xi - dx, yi - dy); +- r += tex2D(tex, xi + dx, yi - dy); +- r += tex2D(tex, xi - dx, yi + dy); +- r += tex2D(tex, xi + dx, yi + dy); +- +- T res; +- vec_set(res, r >> 2); +- +- return res; + } + +-template +-__device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex, +- int xo, int yo, +- int dst_width, int dst_height, +- int src_width, int src_height, +- int bit_depth, float param) +-{ +- float hscale = (float)src_width / (float)dst_width; +- float vscale = (float)src_height / (float)dst_height; +- float xi = (xo + 0.5f) * hscale - 0.5f; +- float yi = (yo + 0.5f) * vscale - 0.5f; +- float px = floor(xi); +- float py = floor(yi); +- float fx = xi - px; +- float fy = yi - py; +- +- float factor = bit_depth > 8 ? 0xFFFF : 0xFF; +- +- float4 coeffsX = coeffs_function(fx, param); +- float4 coeffsY = coeffs_function(fy, param); +- +-#define PIX(x, y) tex2D(tex, (x), (y)) +- +- return from_floatN( +- apply_coeffs(coeffsY, +- apply_coeffs(coeffsX, PIX(px - 1, py - 1), PIX(px, py - 1), PIX(px + 1, py - 1), PIX(px + 2, py - 1)), +- apply_coeffs(coeffsX, PIX(px - 1, py ), PIX(px, py ), PIX(px + 1, py ), PIX(px + 2, py )), +- apply_coeffs(coeffsX, PIX(px - 1, py + 1), PIX(px, py + 1), PIX(px + 1, py + 1), PIX(px + 2, py + 1)), +- apply_coeffs(coeffsX, PIX(px - 1, py + 2), PIX(px, py + 2), PIX(px + 1, py + 2), PIX(px + 2, py + 2)) +- ) * factor +- ); +- +-#undef PIX +-} +- +-/// --- FUNCTION EXPORTS --- +- +-#define KERNEL_ARGS(T) \ +- cudaTextureObject_t src_tex_0, cudaTextureObject_t src_tex_1, \ +- cudaTextureObject_t src_tex_2, cudaTextureObject_t src_tex_3, \ +- T *dst_0, T *dst_1, T *dst_2, T *dst_3, \ +- int dst_width, int dst_height, int dst_pitch, \ +- int src_width, int src_height, float param +- +-#define SUBSAMPLE(Convert, T) \ +- cudaTextureObject_t src_tex[4] = \ +- { src_tex_0, src_tex_1, src_tex_2, src_tex_3 }; \ +- T *dst[4] = { dst_0, dst_1, dst_2, dst_3 }; \ +- int xo = blockIdx.x * blockDim.x + threadIdx.x; \ +- int yo = blockIdx.y * blockDim.y + threadIdx.y; \ +- if (yo >= dst_height || xo >= dst_width) return; \ +- Convert( \ +- src_tex, dst, xo, yo, \ +- dst_width, dst_height, dst_pitch, \ +- src_width, src_height, param); +- + extern "C" { + +-#define NEAREST_KERNEL(C, S) \ +- __global__ void Subsample_Nearest_##C##S( \ +- KERNEL_ARGS(Convert_##C::out_T##S)) \ +- { \ +- SUBSAMPLE((Convert_##C::Convert##S< \ +- Subsample_Nearest, \ +- Subsample_Nearest >), \ +- Convert_##C::out_T##S) \ +- } +- +-#define NEAREST_KERNEL_RAW(C) \ +- NEAREST_KERNEL(C,) \ +- NEAREST_KERNEL(C,_uv) +- +-#define NEAREST_KERNELS(C) \ +- NEAREST_KERNEL_RAW(yuv420p_ ## C) \ +- NEAREST_KERNEL_RAW(nv12_ ## C) \ +- NEAREST_KERNEL_RAW(yuv444p_ ## C) \ +- NEAREST_KERNEL_RAW(p010le_ ## C) \ +- NEAREST_KERNEL_RAW(p016le_ ## C) \ +- NEAREST_KERNEL_RAW(yuv444p16le_ ## C) +- +-#define NEAREST_KERNELS_RGB(C) \ +- NEAREST_KERNEL_RAW(rgb0_ ## C) \ +- NEAREST_KERNEL_RAW(bgr0_ ## C) \ +- NEAREST_KERNEL_RAW(rgba_ ## C) \ +- NEAREST_KERNEL_RAW(bgra_ ## C) \ +- +-NEAREST_KERNELS(yuv420p) +-NEAREST_KERNELS(nv12) +-NEAREST_KERNELS(yuv444p) +-NEAREST_KERNELS(p010le) +-NEAREST_KERNELS(p016le) +-NEAREST_KERNELS(yuv444p16le) +- +-NEAREST_KERNELS_RGB(rgb0) +-NEAREST_KERNELS_RGB(bgr0) +-NEAREST_KERNELS_RGB(rgba) +-NEAREST_KERNELS_RGB(bgra) +- +-#define BILINEAR_KERNEL(C, S) \ +- __global__ void Subsample_Bilinear_##C##S( \ +- KERNEL_ARGS(Convert_##C::out_T##S)) \ +- { \ +- SUBSAMPLE((Convert_##C::Convert##S< \ +- Subsample_Bilinear, \ +- Subsample_Bilinear >), \ +- Convert_##C::out_T##S) \ +- } +- +-#define BILINEAR_KERNEL_RAW(C) \ +- BILINEAR_KERNEL(C,) \ +- BILINEAR_KERNEL(C,_uv) +- +-#define BILINEAR_KERNELS(C) \ +- BILINEAR_KERNEL_RAW(yuv420p_ ## C) \ +- BILINEAR_KERNEL_RAW(nv12_ ## C) \ +- BILINEAR_KERNEL_RAW(yuv444p_ ## C) \ +- BILINEAR_KERNEL_RAW(p010le_ ## C) \ +- BILINEAR_KERNEL_RAW(p016le_ ## C) \ +- BILINEAR_KERNEL_RAW(yuv444p16le_ ## C) +- +-#define BILINEAR_KERNELS_RGB(C) \ +- BILINEAR_KERNEL_RAW(rgb0_ ## C) \ +- BILINEAR_KERNEL_RAW(bgr0_ ## C) \ +- BILINEAR_KERNEL_RAW(rgba_ ## C) \ +- BILINEAR_KERNEL_RAW(bgra_ ## C) +- +-BILINEAR_KERNELS(yuv420p) +-BILINEAR_KERNELS(nv12) +-BILINEAR_KERNELS(yuv444p) +-BILINEAR_KERNELS(p010le) +-BILINEAR_KERNELS(p016le) +-BILINEAR_KERNELS(yuv444p16le) +- +-BILINEAR_KERNELS_RGB(rgb0) +-BILINEAR_KERNELS_RGB(bgr0) +-BILINEAR_KERNELS_RGB(rgba) +-BILINEAR_KERNELS_RGB(bgra) +- +-#define BICUBIC_KERNEL(C, S) \ +- __global__ void Subsample_Bicubic_##C##S( \ +- KERNEL_ARGS(Convert_##C::out_T##S)) \ +- { \ +- SUBSAMPLE((Convert_##C::Convert##S< \ +- Subsample_Bicubic, \ +- Subsample_Bicubic >), \ +- Convert_##C::out_T##S) \ +- } +- +-#define BICUBIC_KERNEL_RAW(C) \ +- BICUBIC_KERNEL(C,) \ +- BICUBIC_KERNEL(C,_uv) +- +-#define BICUBIC_KERNELS(C) \ +- BICUBIC_KERNEL_RAW(yuv420p_ ## C) \ +- BICUBIC_KERNEL_RAW(nv12_ ## C) \ +- BICUBIC_KERNEL_RAW(yuv444p_ ## C) \ +- BICUBIC_KERNEL_RAW(p010le_ ## C) \ +- BICUBIC_KERNEL_RAW(p016le_ ## C) \ +- BICUBIC_KERNEL_RAW(yuv444p16le_ ## C) +- +-#define BICUBIC_KERNELS_RGB(C) \ +- BICUBIC_KERNEL_RAW(rgb0_ ## C) \ +- BICUBIC_KERNEL_RAW(bgr0_ ## C) \ +- BICUBIC_KERNEL_RAW(rgba_ ## C) \ +- BICUBIC_KERNEL_RAW(bgra_ ## C) +- +-BICUBIC_KERNELS(yuv420p) +-BICUBIC_KERNELS(nv12) +-BICUBIC_KERNELS(yuv444p) +-BICUBIC_KERNELS(p010le) +-BICUBIC_KERNELS(p016le) +-BICUBIC_KERNELS(yuv444p16le) +- +-BICUBIC_KERNELS_RGB(rgb0) +-BICUBIC_KERNELS_RGB(bgr0) +-BICUBIC_KERNELS_RGB(rgba) +-BICUBIC_KERNELS_RGB(bgra) +- +-#define LANCZOS_KERNEL(C, S) \ +- __global__ void Subsample_Lanczos_##C##S( \ +- KERNEL_ARGS(Convert_##C::out_T##S)) \ +- { \ +- SUBSAMPLE((Convert_##C::Convert##S< \ +- Subsample_Bicubic, \ +- Subsample_Bicubic >), \ +- Convert_##C::out_T##S) \ +- } ++#define VARIANT(SRC, DST, CONV, SHIFT, PITCH, DITHER, NAME) \ ++__global__ void Subsample_Bilinear_ ## NAME(cudaTextureObject_t tex, \ ++ DST *dst, \ ++ int dst_width, int dst_height, int dst_pitch, \ ++ int src_width, int src_height, \ ++ cudaTextureObject_t ditherTex) \ ++{ \ ++ Subsample_Bilinear(tex, dst, dst_width, dst_height, dst_pitch, \ ++ src_width, src_height, ditherTex); \ ++} ++ ++#define VARIANTSET2(SRC, DST, SHIFT, NAME) \ ++ VARIANT(SRC, DST, add_conv_shift1_d, SHIFT, 1, (sizeof(DST) < sizeof(SRC)) ? sizeof(DST) : 0, NAME) \ ++ VARIANT(SRC, DST, add_conv_shift1, SHIFT, 1, 0, NAME ## _c) \ ++ VARIANT(SRC, DST, add_conv_shift1, SHIFT, 2, 0, NAME ## _p2) \ ++ VARIANT(SRC ## 2, DST ## 2, add_conv_shift2, SHIFT, 1, 0, NAME ## _2) \ ++ VARIANT(SRC ## 2, DST, add_conv_shift2_x, SHIFT, 1, 0, NAME ## _2_u) \ ++ VARIANT(SRC ## 2, DST, add_conv_shift2_y, SHIFT, 1, 0, NAME ## _2_v) \ ++ VARIANT(SRC ## 4, DST ## 4, add_conv_shift4, SHIFT, 1, 0, NAME ## _4) ++ ++#define VARIANTSET(SRC, DST, SRCSIZE, DSTSIZE) \ ++ VARIANTSET2(SRC, DST, (SRCSIZE - DSTSIZE), SRCSIZE ## _ ## DSTSIZE) ++ ++// Straight no-conversion ++VARIANTSET(uchar, uchar, 8, 8) ++VARIANTSET(ushort, ushort, 16, 16) ++ ++// Conversion between 8- and 16-bit ++VARIANTSET(uchar, ushort, 8, 16) ++VARIANTSET(ushort, uchar, 16, 8) + +-#define LANCZOS_KERNEL_RAW(C) \ +- LANCZOS_KERNEL(C,) \ +- LANCZOS_KERNEL(C,_uv) +- +-#define LANCZOS_KERNELS(C) \ +- LANCZOS_KERNEL_RAW(yuv420p_ ## C) \ +- LANCZOS_KERNEL_RAW(nv12_ ## C) \ +- LANCZOS_KERNEL_RAW(yuv444p_ ## C) \ +- LANCZOS_KERNEL_RAW(p010le_ ## C) \ +- LANCZOS_KERNEL_RAW(p016le_ ## C) \ +- LANCZOS_KERNEL_RAW(yuv444p16le_ ## C) +- +-#define LANCZOS_KERNELS_RGB(C) \ +- LANCZOS_KERNEL_RAW(rgb0_ ## C) \ +- LANCZOS_KERNEL_RAW(bgr0_ ## C) \ +- LANCZOS_KERNEL_RAW(rgba_ ## C) \ +- LANCZOS_KERNEL_RAW(bgra_ ## C) +- +-LANCZOS_KERNELS(yuv420p) +-LANCZOS_KERNELS(nv12) +-LANCZOS_KERNELS(yuv444p) +-LANCZOS_KERNELS(p010le) +-LANCZOS_KERNELS(p016le) +-LANCZOS_KERNELS(yuv444p16le) +- +-LANCZOS_KERNELS_RGB(rgb0) +-LANCZOS_KERNELS_RGB(bgr0) +-LANCZOS_KERNELS_RGB(rgba) +-LANCZOS_KERNELS_RGB(bgra) + } +Index: FFmpeg/libavfilter/vf_scale_cuda.h +=================================================================== +--- libavfilter/vf_scale_cuda.h ++++ /dev/null +@@ -1,28 +0,0 @@ +-/* +- * This file is part of FFmpeg. +- * +- * Permission is hereby granted, free of charge, to any person obtaining a +- * copy of this software and associated documentation files (the "Software"), +- * to deal in the Software without restriction, including without limitation +- * the rights to use, copy, modify, merge, publish, distribute, sublicense, +- * and/or sell copies of the Software, and to permit persons to whom the +- * Software is furnished to do so, subject to the following conditions: +- * +- * The above copyright notice and this permission notice shall be included in +- * all copies or substantial portions of the Software. +- * +- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +- * DEALINGS IN THE SOFTWARE. +- */ +- +-#ifndef AVFILTER_SCALE_CUDA_H +-#define AVFILTER_SCALE_CUDA_H +- +-#define SCALE_CUDA_PARAM_DEFAULT 999999.0f +- +-#endif diff --git a/cross/ffmpeg7/patches/1004-jellyfin-0004-add-cuda-tonemap-impl.patch b/cross/ffmpeg7/patches/1004-jellyfin-0004-add-cuda-tonemap-impl.patch new file mode 100644 index 00000000000..e170e65200c --- /dev/null +++ b/cross/ffmpeg7/patches/1004-jellyfin-0004-add-cuda-tonemap-impl.patch @@ -0,0 +1,2897 @@ +Index: FFmpeg/configure +=================================================================== +--- configure ++++ configure +@@ -3291,6 +3291,8 @@ scale_cuda_filter_deps="ffnvcodec" + scale_cuda_filter_deps_any="cuda_nvcc cuda_llvm" + thumbnail_cuda_filter_deps="ffnvcodec" + thumbnail_cuda_filter_deps_any="cuda_nvcc cuda_llvm" ++tonemap_cuda_filter_deps="ffnvcodec const_nan" ++tonemap_cuda_filter_deps_any="cuda_nvcc cuda_llvm" + transpose_npp_filter_deps="ffnvcodec libnpp" + overlay_cuda_filter_deps="ffnvcodec" + overlay_cuda_filter_deps_any="cuda_nvcc cuda_llvm" +@@ -4082,7 +4084,7 @@ enable doc + enable faan faandct faanidct + enable large_tests + enable optimizations +-enable ptx_compression ++disable ptx_compression + enable runtime_cpudetect + enable safe_bitstream_reader + enable static +@@ -4630,7 +4632,7 @@ if enabled cuda_nvcc; then + nvccflags_default="-gencode arch=compute_30,code=sm_30 -O2" + else + nvcc_default="clang" +- nvccflags_default="--cuda-gpu-arch=sm_30 -O2" ++ nvccflags_default="--cuda-gpu-arch=sm_30 -O2 -ffast-math" + NVCC_C="" + fi + +@@ -6711,7 +6713,7 @@ fi + if enabled cuda_nvcc; then + nvccflags="$nvccflags -ptx" + else +- nvccflags="$nvccflags -S -nocudalib -nocudainc --cuda-device-only -Wno-c++11-narrowing -include ${source_link}/compat/cuda/cuda_runtime.h" ++ nvccflags="$nvccflags -S -nocudalib -nocudainc --cuda-device-only -Wno-c++11-narrowing -std=c++14 -include ${source_link}/compat/cuda/cuda_runtime.h" + check_nvcc cuda_llvm + fi + +Index: FFmpeg/ffbuild/common.mak +=================================================================== +--- ffbuild/common.mak ++++ ffbuild/common.mak +@@ -44,6 +44,7 @@ ASFLAGS := $(CPPFLAGS) $(ASFLAGS) + # end up in CXXFLAGS. + $(call PREPEND,CXXFLAGS, CPPFLAGS CFLAGS) + X86ASMFLAGS += $(IFLAGS:%=%/) -I$(max_luminance = av_d2q(peak * REFERENCE_WHITE, 10000); + } + } ++ ++double ff_determine_dovi_signal_peak(const AVDOVIMetadata *data) ++{ ++ float peak; ++ const AVDOVIColorMetadata *color; ++ ++ // Fallback to the peak of 10000 if SMPTE ST.2084 ++ if (!data) ++ return 100.0f; ++ ++ color = av_dovi_get_color(data); ++ peak = color->source_max_pq / 4095.0f; ++ if (!peak) ++ return peak; ++ ++ peak = powf(peak, 1.0f / ST2084_M2); ++ peak = fmaxf(peak - ST2084_C1, 0.0f) / (ST2084_C2 - ST2084_C3 * peak); ++ peak = powf(peak, 1.0f / ST2084_M1); ++ peak *= 100.0f; ++ ++ return peak; ++} ++ ++void ff_map_dovi_metadata(struct DoviMetadata *out, const AVDOVIMetadata *data) ++{ ++ int c, i, j, k; ++ const AVDOVIRpuDataHeader *header; ++ const AVDOVIDataMapping *mapping; ++ const AVDOVIColorMetadata *color; ++ ++ if (!data) ++ return; ++ ++ header = av_dovi_get_header(data); ++ mapping = av_dovi_get_mapping(data); ++ color = av_dovi_get_color(data); ++ ++ for (i = 0; i < 3; i++) ++ out->nonlinear_offset[i] = av_q2d(color->ycc_to_rgb_offset[i]); ++ for (i = 0; i < 9; i++) { ++ double *nonlinear = &out->nonlinear[0][0]; ++ double *linear = &out->linear[0][0]; ++ nonlinear[i] = av_q2d(color->ycc_to_rgb_matrix[i]); ++ linear[i] = av_q2d(color->rgb_to_lms_matrix[i]); ++ } ++ for (c = 0; c < 3; c++) { ++ const AVDOVIReshapingCurve *csrc = &mapping->curves[c]; ++ struct ReshapeData *cdst = &out->comp[c]; ++ cdst->num_pivots = csrc->num_pivots; ++ for (i = 0; i < csrc->num_pivots; i++) { ++ const float scale = 1.0f / ((1 << header->bl_bit_depth) - 1); ++ cdst->pivots[i] = scale * csrc->pivots[i]; ++ } ++ for (i = 0; i < csrc->num_pivots - 1; i++) { ++ const float scale = 1.0f / (1 << header->coef_log2_denom); ++ cdst->method[i] = csrc->mapping_idc[i]; ++ switch (csrc->mapping_idc[i]) { ++ case AV_DOVI_MAPPING_POLYNOMIAL: ++ for (k = 0; k < 3; k++) { ++ cdst->poly_coeffs[i][k] = (k <= csrc->poly_order[i]) ++ ? scale * csrc->poly_coef[i][k] ++ : 0.0f; ++ } ++ break; ++ case AV_DOVI_MAPPING_MMR: ++ cdst->mmr_order[i] = csrc->mmr_order[i]; ++ cdst->mmr_constant[i] = scale * csrc->mmr_constant[i]; ++ for (j = 0; j < csrc->mmr_order[i]; j++) { ++ for (k = 0; k < 7; k++) ++ cdst->mmr_coeffs[i][j][k] = scale * csrc->mmr_coef[i][j][k]; ++ } ++ break; ++ } ++ } ++ } ++} ++ ++// linearizer for PQ/ST2084 ++float eotf_st2084_common(float x) ++{ ++ float xpow = powf(FFMAX(x, 0.0f), 1.0f / ST2084_M2); ++ float num = FFMAX(xpow - ST2084_C1, 0.0f); ++ float den = FFMAX(ST2084_C2 - ST2084_C3 * xpow, FLOAT_EPS); ++ x = powf(num / den, 1.0f / ST2084_M1); ++ return x; ++} ++ ++float eotf_st2084(float x, float ref_white) ++{ ++ return eotf_st2084_common(x) * ST2084_MAX_LUMINANCE / ref_white; ++} ++ ++// delinearizer for PQ/ST2084 ++float inverse_eotf_st2084_common(float x) ++{ ++ float xpow = powf(FFMAX(x, 0.0f), ST2084_M1); ++#if 0 ++ // Original formulation from SMPTE ST 2084:2014 publication. ++ float num = ST2084_C1 + ST2084_C2 * xpow; ++ float den = 1.0f + ST2084_C3 * xpow; ++ return powf(num / den, ST2084_M2); ++#else ++ // More stable arrangement that avoids some cancellation error. ++ float num = (ST2084_C1 - 1.0f) + (ST2084_C2 - ST2084_C3) * xpow; ++ float den = 1.0f + ST2084_C3 * xpow; ++ return powf(1.0f + num / den, ST2084_M2); ++#endif ++} ++ ++float inverse_eotf_st2084(float x, float ref_white) ++{ ++ x *= ref_white / ST2084_MAX_LUMINANCE; ++ return inverse_eotf_st2084_common(x); ++} ++ ++float ootf_1_2(float x) { ++ return x > 0.0f ? powf(x, 1.2f) : x; ++} ++ ++float inverse_ootf_1_2(float x) { ++ return x > 0.0f ? powf(x, 1.0f / 1.2f) : x; ++} ++ ++float oetf_arib_b67(float x) { ++ x = FFMAX(x, 0.0f); ++ return x <= (1.0f / 12.0f) ++ ? sqrtf(3.0f * x) ++ : (ARIB_B67_A * logf(12.0f * x - ARIB_B67_B) + ARIB_B67_C); ++} ++ ++float inverse_oetf_arib_b67(float x) { ++ x = FFMAX(x, 0.0f); ++ return x <= 0.5f ++ ? (x * x) * (1.0f / 3.0f) ++ : (expf((x - ARIB_B67_C) / ARIB_B67_A) + ARIB_B67_B) * (1.0f / 12.0f); ++} ++ ++// linearizer for HLG/ARIB-B67 ++float eotf_arib_b67(float x) { ++ return ootf_1_2(inverse_oetf_arib_b67(x)) * 5.0f; ++} ++ ++// delinearizer for HLG/ARIB-B67 ++float inverse_eotf_arib_b67(float x) { ++ return oetf_arib_b67(inverse_ootf_1_2(x / 5.0f)); ++} ++ ++// delinearizer for BT709, BT2020-10 ++float inverse_eotf_bt1886(float x) { ++ return x > 0.0f ? powf(x, 1.0f / 2.4f) : 0.0f; ++} +Index: FFmpeg/libavfilter/colorspace.h +=================================================================== +--- libavfilter/colorspace.h ++++ libavfilter/colorspace.h +@@ -23,10 +23,42 @@ + #include "libavutil/csp.h" + #include "libavutil/frame.h" + #include "libavutil/pixfmt.h" ++#include "libavutil/dovi_meta.h" + + #define REFERENCE_WHITE 100.0f ++#define REFERENCE_WHITE_ALT 203.0f ++#define ST2084_MAX_LUMINANCE 10000.0f ++#define ST2084_M1 0.1593017578125f ++#define ST2084_M2 78.84375f ++#define ST2084_C1 0.8359375f ++#define ST2084_C2 18.8515625f ++#define ST2084_C3 18.6875f ++#define ARIB_B67_A 0.17883277f ++#define ARIB_B67_B 0.28466892f ++#define ARIB_B67_C 0.55991073f ++#define FLOAT_EPS 1e-6f ++ ++// Parsed metadata from the Dolby Vision RPU ++struct DoviMetadata { ++ float nonlinear_offset[3]; // input offset ("ycc_to_rgb_offset") ++ double nonlinear[3][3]; // before PQ, also called "ycc_to_rgb" ++ double linear[3][3]; // after PQ, also called "rgb_to_lms" ++ ++ // Reshape data, grouped by component ++ struct ReshapeData { ++ uint8_t num_pivots; ++ float pivots[9]; // normalized to [0.0, 1.0] based on BL bit depth ++ uint8_t method[8]; // 0 = polynomial, 1 = MMR ++ // Note: these must be normalized (divide by coefficient_log2_denom) ++ float poly_coeffs[8][3]; // x^0, x^1, x^2, unused must be 0 ++ uint8_t mmr_order[8]; // 1, 2 or 3 ++ float mmr_constant[8]; ++ float mmr_coeffs[8][3 /* order */][7]; ++ } comp[3]; ++}; + + void ff_matrix_invert_3x3(const double in[3][3], double out[3][3]); ++void ff_matrix_transpose_3x3(const double in[3][3], double out[3][3]); + void ff_matrix_mul_3x3(double dst[3][3], + const double src1[3][3], const double src2[3][3]); + void ff_matrix_mul_3x3_vec(double dst[3], const double vec[3], const double mat[3][3]); +@@ -38,4 +70,19 @@ void ff_fill_rgb2yuv_table(const AVLumaC + double ff_determine_signal_peak(AVFrame *in); + void ff_update_hdr_metadata(AVFrame *in, double peak); + ++double ff_determine_dovi_signal_peak(const AVDOVIMetadata *data); ++void ff_map_dovi_metadata(struct DoviMetadata *out, const AVDOVIMetadata *data); ++ ++float eotf_st2084_common(float x); ++float eotf_st2084(float x, float ref_white); ++float inverse_eotf_st2084_common(float x); ++float inverse_eotf_st2084(float x, float ref_white); ++float ootf_1_2(float x); ++float inverse_ootf_1_2(float x); ++float oetf_arib_b67(float x); ++float inverse_oetf_arib_b67(float x); ++float eotf_arib_b67(float x); ++float inverse_eotf_arib_b67(float x); ++float inverse_eotf_bt1886(float x); ++ + #endif +Index: FFmpeg/libavfilter/cuda/colorspace_common.h +=================================================================== +--- /dev/null ++++ libavfilter/cuda/colorspace_common.h +@@ -0,0 +1,330 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVFILTER_CUDA_COLORSPACE_COMMON_H ++#define AVFILTER_CUDA_COLORSPACE_COMMON_H ++ ++#include "util.h" ++#include "libavutil/pixfmt.h" ++ ++#define ST2084_MAX_LUMINANCE 10000.0f ++ ++#define ST2084_M1 0.1593017578125f ++#define ST2084_M2 78.84375f ++#define ST2084_C1 0.8359375f ++#define ST2084_C2 18.8515625f ++#define ST2084_C3 18.6875f ++ ++#define ARIB_B67_A 0.17883277f ++#define ARIB_B67_B 0.28466892f ++#define ARIB_B67_C 0.55991073f ++ ++#define FLOAT_EPS 1e-6f ++ ++extern __constant__ const float ref_white; ++extern __constant__ const float3 luma_dst; ++extern __constant__ const float3 ycc2rgb_offset; ++extern __constant__ const enum AVColorTransferCharacteristic trc_src, trc_dst; ++extern __constant__ const enum AVColorRange range_src, range_dst; ++extern __constant__ const enum AVChromaLocation chroma_loc_src, chroma_loc_dst; ++extern __constant__ const bool rgb2rgb_passthrough; ++extern __constant__ const float rgb2rgb_matrix[9]; ++extern __constant__ const float lms2rgb_matrix[9]; ++extern __constant__ const float yuv_matrix[9], rgb_matrix[9]; ++extern __constant__ const float pq_max_lum_div_ref_white; ++extern __constant__ const float ref_white_div_pq_max_lum; ++ ++static __inline__ __device__ float get_luma_dst(float3 c, const float3& luma_dst) { ++ return luma_dst.x * c.x + luma_dst.y * c.y + luma_dst.z * c.z; ++} ++ ++/* ++static __inline__ __device__ float get_luma_src(float3 c, const float3& luma_src) { ++ return luma_src.x * c.x + luma_src.y * c.y + luma_src.z * c.z; ++} ++*/ ++ ++static __inline__ __device__ float3 get_chroma_sample(float3 a, float3 b, float3 c, float3 d) { ++ switch (chroma_loc_dst) { ++ case AVCHROMA_LOC_LEFT: ++ return ((a) + (c)) * 0.5f; ++ case AVCHROMA_LOC_CENTER: ++ case AVCHROMA_LOC_UNSPECIFIED: ++ default: ++ return ((a) + (b) + (c) + (d)) * 0.25f; ++ case AVCHROMA_LOC_TOPLEFT: ++ return a; ++ case AVCHROMA_LOC_TOP: ++ return ((a) + (b)) * 0.5f; ++ case AVCHROMA_LOC_BOTTOMLEFT: ++ return c; ++ case AVCHROMA_LOC_BOTTOM: ++ return ((c) + (d)) * 0.5f; ++ } ++} ++ ++// linearizer for PQ/ST2084 ++static __inline__ __device__ float eotf_st2084_common(float x) { ++ x = max(x, 0.0f); ++ float xpow = __powf(x, 1.0f / ST2084_M2); ++ float num = max(xpow - ST2084_C1, 0.0f); ++ float den = max(ST2084_C2 - ST2084_C3 * xpow, FLOAT_EPS); ++ x = __powf(num / den, 1.0f / ST2084_M1); ++ return x; ++} ++ ++static __inline__ __device__ float eotf_st2084(float x) { ++ return eotf_st2084_common(x) * pq_max_lum_div_ref_white; ++} ++ ++// delinearizer for PQ/ST2084 ++static __inline__ __device__ float inverse_eotf_st2084_common(float x) { ++ x = max(x, 0.0f); ++ float xpow = __powf(x, ST2084_M1); ++#if 0 ++ // Original formulation from SMPTE ST 2084:2014 publication. ++ float num = ST2084_C1 + ST2084_C2 * xpow; ++ float den = 1.0f + ST2084_C3 * xpow; ++ return __powf(num / den, ST2084_M2); ++#else ++ // More stable arrangement that avoids some cancellation error. ++ float num = (ST2084_C1 - 1.0f) + (ST2084_C2 - ST2084_C3) * xpow; ++ float den = 1.0f + ST2084_C3 * xpow; ++ return __powf(1.0f + num / den, ST2084_M2); ++#endif ++} ++ ++static __inline__ __device__ float inverse_eotf_st2084(float x) { ++ x *= ref_white_div_pq_max_lum; ++ return inverse_eotf_st2084_common(x); ++} ++ ++static __inline__ __device__ float ootf_1_2(float x) { ++ return x > 0.0f ? __powf(x, 1.2f) : x; ++} ++ ++static __inline__ __device__ float inverse_ootf_1_2(float x) { ++ return x > 0.0f ? __powf(x, 1.0f / 1.2f) : x; ++} ++ ++static __inline__ __device__ float oetf_arib_b67(float x) { ++ x = max(x, 0.0f); ++ return x <= (1.0f / 12.0f) ++ ? sqrtf(3.0f * x) ++ : (ARIB_B67_A * __logf(12.0f * x - ARIB_B67_B) + ARIB_B67_C); ++} ++ ++static __inline__ __device__ float inverse_oetf_arib_b67(float x) { ++ x = max(x, 0.0f); ++ return x <= 0.5f ++ ? (x * x) * (1.0f / 3.0f) ++ : (__expf((x - ARIB_B67_C) / ARIB_B67_A) + ARIB_B67_B) * (1.0f / 12.0f); ++} ++ ++// linearizer for HLG/ARIB-B67 ++static __inline__ __device__ float eotf_arib_b67(float x) { ++ return ootf_1_2(inverse_oetf_arib_b67(x)) * 5.0f; ++} ++ ++// delinearizer for HLG/ARIB-B67 ++static __inline__ __device__ float inverse_eotf_arib_b67(float x) { ++ return oetf_arib_b67(inverse_ootf_1_2(x / 5.0f)); ++} ++ ++// delinearizer for BT709, BT2020-10 ++static __inline__ __device__ float inverse_eotf_bt1886(float x) { ++ return x > 0.0f ? __powf(x, 1.0f / 2.4f) : 0.0f; ++} ++ ++static __inline__ __device__ float linearize(float x) ++{ ++ if (trc_src == AVCOL_TRC_SMPTE2084 && trc_dst != AVCOL_TRC_SMPTE2084) ++ return eotf_st2084(x); ++ else if (trc_src == AVCOL_TRC_ARIB_STD_B67) ++ return eotf_arib_b67(x); ++ else ++ return x; ++} ++ ++static __inline__ __device__ float delinearize(float x) ++{ ++ if (trc_dst == AVCOL_TRC_BT709 || trc_dst == AVCOL_TRC_BT2020_10) ++ return inverse_eotf_bt1886(x); ++ else ++ return x; ++} ++ ++static __inline__ __device__ float3 yuv2rgb(float y, float u, float v) { ++ if (range_src == AVCOL_RANGE_JPEG) { ++ u -= 0.5f; v -= 0.5f; ++ } else { ++ y = (y * 255.0f - 16.0f) / 219.0f; ++ u = (u * 255.0f - 128.0f) / 224.0f; ++ v = (v * 255.0f - 128.0f) / 224.0f; ++ } ++ float r = y * rgb_matrix[0] + u * rgb_matrix[1] + v * rgb_matrix[2]; ++ float g = y * rgb_matrix[3] + u * rgb_matrix[4] + v * rgb_matrix[5]; ++ float b = y * rgb_matrix[6] + u * rgb_matrix[7] + v * rgb_matrix[8]; ++ return make_float3(r, g, b); ++} ++ ++static __inline__ __device__ float3 yuv2lrgb(float3 yuv) { ++ float3 rgb = yuv2rgb(yuv.x, yuv.y, yuv.z); ++ return make_float3(linearize(rgb.x), ++ linearize(rgb.y), ++ linearize(rgb.z)); ++} ++ ++static __inline__ __device__ float3 rgb2yuv(float r, float g, float b) { ++ float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; ++ float u = r*yuv_matrix[3] + g*yuv_matrix[4] + b*yuv_matrix[5]; ++ float v = r*yuv_matrix[6] + g*yuv_matrix[7] + b*yuv_matrix[8]; ++ if (range_dst == AVCOL_RANGE_JPEG) { ++ u += 0.5f; v += 0.5f; ++ } else { ++ y = (219.0f * y + 16.0f) / 255.0f; ++ u = (224.0f * u + 128.0f) / 255.0f; ++ v = (224.0f * v + 128.0f) / 255.0f; ++ } ++ return make_float3(y, u, v); ++} ++ ++static __inline__ __device__ float rgb2y(float r, float g, float b) { ++ float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; ++ if (range_dst != AVCOL_RANGE_JPEG) ++ y = (219.0f * y + 16.0f) / 255.0f; ++ return y; ++} ++ ++static __inline__ __device__ float3 lrgb2yuv(float3 c) { ++ float r = delinearize(c.x); ++ float g = delinearize(c.y); ++ float b = delinearize(c.z); ++ return rgb2yuv(r, g, b); ++} ++ ++static __inline__ __device__ float3 lrgb2lrgb(float3 c) { ++ if (rgb2rgb_passthrough) { ++ return c; ++ } else { ++ float r = c.x, g = c.y, b = c.z; ++ float rr = rgb2rgb_matrix[0] * r + rgb2rgb_matrix[1] * g + rgb2rgb_matrix[2] * b; ++ float gg = rgb2rgb_matrix[3] * r + rgb2rgb_matrix[4] * g + rgb2rgb_matrix[5] * b; ++ float bb = rgb2rgb_matrix[6] * r + rgb2rgb_matrix[7] * g + rgb2rgb_matrix[8] * b; ++ return make_float3(rr, gg, bb); ++ } ++} ++ ++static __inline__ __device__ float3 rgb2lrgb(float3 c) { ++ float r = linearize(c.x); ++ float g = linearize(c.y); ++ float b = linearize(c.z); ++ return make_float3(r, g, b); ++} ++ ++static __inline__ __device__ float3 ycc2rgb(float y, float cb, float cr) { ++ float r = y * rgb_matrix[0] + cb * rgb_matrix[1] + cr * rgb_matrix[2]; ++ float g = y * rgb_matrix[3] + cb * rgb_matrix[4] + cr * rgb_matrix[5]; ++ float b = y * rgb_matrix[6] + cb * rgb_matrix[7] + cr * rgb_matrix[8]; ++ return make_float3(r, g, b) + ycc2rgb_offset; ++} ++ ++static __inline__ __device__ float3 lms2rgb(float r, float g, float b) { ++ r = eotf_st2084_common(r); ++ g = eotf_st2084_common(g); ++ b = eotf_st2084_common(b); ++ float rr = r * lms2rgb_matrix[0] + g * lms2rgb_matrix[1] + b * lms2rgb_matrix[2]; ++ float gg = r * lms2rgb_matrix[3] + g * lms2rgb_matrix[4] + b * lms2rgb_matrix[5]; ++ float bb = r * lms2rgb_matrix[6] + g * lms2rgb_matrix[7] + b * lms2rgb_matrix[8]; ++ rr = inverse_eotf_st2084_common(rr); ++ gg = inverse_eotf_st2084_common(gg); ++ bb = inverse_eotf_st2084_common(bb); ++ return rgb2lrgb(make_float3(rr, gg, bb)); ++} ++ ++static __inline__ __device__ float3 lms2rgb_fast(float r, float g, float b) { ++ float rr = r * lms2rgb_matrix[0] + g * lms2rgb_matrix[1] + b * lms2rgb_matrix[2]; ++ float gg = r * lms2rgb_matrix[3] + g * lms2rgb_matrix[4] + b * lms2rgb_matrix[5]; ++ float bb = r * lms2rgb_matrix[6] + g * lms2rgb_matrix[7] + b * lms2rgb_matrix[8]; ++ return rgb2lrgb(make_float3(rr, gg, bb)); ++} ++ ++static __inline__ __device__ float3 lrgb2ictcp(float r, float g, float b) { ++ float l = 0.412109375000000f * r + 0.523925781250000f * g + 0.063964843750000f * b; ++ float m = 0.166748046875000f * r + 0.720458984375000f * g + 0.112792968750000f * b; ++ float s = 0.024169921875000f * r + 0.075439453125000f * g + 0.900390625000000f * b; ++ l = inverse_eotf_st2084(l); ++ m = inverse_eotf_st2084(m); ++ s = inverse_eotf_st2084(s); ++ float i = 0.5f * l + 0.5f * m; ++ float ct = 1.613769531250000f * l - 3.323486328125000f * m + 1.709716796875000f * s; ++ float cp = 4.378173828125000f * l - 4.245605468750000f * m - 0.132568359375000f * s; ++ return make_float3(i, ct, cp); ++} ++ ++static __inline__ __device__ float3 ictcp2lrgb(float i, float ct, float cp) { ++ float ll = i + 0.008609037037933f * ct + 0.111029625003026f * cp; ++ float mm = i - 0.008609037037933f * ct - 0.111029625003026f * cp; ++ float ss = i + 0.560031335710679f * ct - 0.320627174987319f * cp; ++ ll = eotf_st2084(ll); ++ mm = eotf_st2084(mm); ++ ss = eotf_st2084(ss); ++ float r = 3.436606694333079f * ll - 2.506452118656270f * mm + 0.069845424323191f * ss; ++ float g = -0.791329555598929f * ll + 1.983600451792291f * mm - 0.192270896193362f * ss; ++ float b = -0.025949899690593f * ll - 0.098913714711726f * mm + 1.124863614402319f * ss; ++ return make_float3(r, g, b); ++} ++ ++static __inline__ __device__ float parabolic(float x, float t0, float x0, float y0) { ++ float s = (y0 - t0) / sqrtf(x0 - y0); ++ float ox = t0 - s * s * 0.25f; ++ float oy = t0 - s * sqrtf(s * s * 0.25f); ++ return (x < t0 ? x : s * sqrtf(x - ox) + oy); ++} ++ ++static __inline __device__ float3 gamut_compress(float3 rgb) { ++ #define cyan_limit 1.5187050250638159f ++ #define magenta_limit 1.0750082769546088f ++ #define yellow_limit 1.0887800403483898f ++ #define cyan_threshold 1.050508660266247f ++ #define magenta_threshold 0.940509816042432f ++ #define yellow_threshold 0.9771607996420639f ++ ++ // Achromatic axis ++ float ac = max(max(rgb.x, rgb.y), rgb.z); ++ float ac_abs = fabsf(ac); ++ float3 ac3 = make_float3(ac, ac, ac); ++ float3 ac_abs3 = make_float3(ac_abs, ac_abs, ac_abs); ++ ++ // Inverse RGB Ratios: distance from achromatic axis ++ float3 d = ac == 0.0f ? make_float3(0.0f, 0.0f, 0.0f) : (ac3 - rgb) / ac_abs3; ++ ++ // Compressed distance ++ float3 cd = make_float3( ++ parabolic(d.x, cyan_threshold, cyan_limit, 1.0f), ++ parabolic(d.y, magenta_threshold, magenta_limit, 1.0f), ++ parabolic(d.z, yellow_threshold, yellow_limit, 1.0f) ++ ); ++ ++ // Inverse RGB Ratios to RGB ++ float3 crgb = ac3 - cd * ac_abs3; ++ ++ return crgb; ++} ++ ++#endif /* AVFILTER_CUDA_COLORSPACE_COMMON_H */ +Index: FFmpeg/libavfilter/cuda/host_util.c +=================================================================== +--- /dev/null ++++ libavfilter/cuda/host_util.c +@@ -0,0 +1,77 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "libavutil/hwcontext_cuda_internal.h" ++#include "libavutil/cuda_check.h" ++#include "libavfilter/colorspace.h" ++#include "host_util.h" ++ ++#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, cu, x) ++#define DEPTH_BYTES(depth) (((depth) + 7) / 8) ++ ++int ff_make_cuda_frame(AVFilterContext *ctx, CudaFunctions *cu, int make_cuTex, ++ FFCUDAFrame *dst, const AVFrame *src, const AVPixFmtDescriptor *src_desc) ++{ ++ int i, ret = 0; ++ for (i = 0, dst->planes = 0; i < src_desc->nb_components; i++) ++ dst->planes = FFMAX(dst->planes, src_desc->comp[i].plane + 1); ++ ++ for (i = 0; i < dst->planes; i++) { ++ dst->data[i] = src->data[i]; ++ dst->linesize[i] = src->linesize[i]; ++ dst->tex[i] = 0; ++ } ++ ++ for (i = 0; make_cuTex && (i < dst->planes); i++) { ++#ifndef CU_TRSF_NORMALIZED_COORDINATES ++ #define CU_TRSF_NORMALIZED_COORDINATES 2 ++#endif ++ CUDA_TEXTURE_DESC tex_desc = { ++ .addressMode = { CU_TR_ADDRESS_MODE_CLAMP }, ++ .filterMode = i == 0 ? CU_TR_FILTER_MODE_POINT : CU_TR_FILTER_MODE_LINEAR, ++ .flags = i == 0 ? 0 : CU_TRSF_NORMALIZED_COORDINATES, ++ }; ++ ++ CUDA_RESOURCE_DESC res_desc = { ++ .resType = CU_RESOURCE_TYPE_PITCH2D, ++ .res.pitch2D.format = DEPTH_BYTES(src_desc->comp[i].depth) == 1 ? ++ CU_AD_FORMAT_UNSIGNED_INT8 : ++ CU_AD_FORMAT_UNSIGNED_INT16, ++ .res.pitch2D.numChannels = i == 0 ? 1 : (dst->planes == 2 ? 2 : 1), ++ .res.pitch2D.width = i == 0 ? src->width : AV_CEIL_RSHIFT(src->width, src_desc->log2_chroma_w), ++ .res.pitch2D.height = i == 0 ? src->height : AV_CEIL_RSHIFT(src->height, src_desc->log2_chroma_h), ++ .res.pitch2D.pitchInBytes = src->linesize[i], ++ .res.pitch2D.devPtr = (CUdeviceptr)src->data[i], ++ }; ++ ++ if ((ret = CHECK_CU(cu->cuTexObjectCreate(&dst->tex[i], &res_desc, &tex_desc, NULL))) < 0) ++ goto fail; ++ } ++ ++ dst->width = src->width; ++ dst->height = src->height; ++ ++ return ret; ++ ++fail: ++ for (i = 0; i < dst->planes; i++) ++ if (dst->tex[i]) ++ CHECK_CU(cu->cuTexObjectDestroy(dst->tex[i])); ++ ++ return ret; ++} +Index: FFmpeg/libavfilter/cuda/host_util.h +=================================================================== +--- /dev/null ++++ libavfilter/cuda/host_util.h +@@ -0,0 +1,30 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVFILTER_CUDA_HOST_UTIL_H ++#define AVFILTER_CUDA_HOST_UTIL_H ++ ++#include "libavutil/frame.h" ++#include "libavutil/pixdesc.h" ++#include "libavfilter/avfilter.h" ++#include "shared.h" ++ ++int ff_make_cuda_frame(AVFilterContext *ctx, CudaFunctions *cu, int make_cuTex, ++ FFCUDAFrame *dst, const AVFrame *src, const AVPixFmtDescriptor *src_desc); ++ ++#endif /* AVFILTER_CUDA_HOST_UTIL_H */ +Index: FFmpeg/libavfilter/cuda/pixfmt.h +=================================================================== +--- /dev/null ++++ libavfilter/cuda/pixfmt.h +@@ -0,0 +1,225 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVFILTER_CUDA_PIXFMT_H ++#define AVFILTER_CUDA_PIXFMT_H ++ ++#include "shared.h" ++ ++extern __constant__ const enum AVPixelFormat fmt_src, fmt_dst; ++extern __constant__ const int depth_src, depth_dst; ++ ++// Single-sample read function ++template ++static __inline__ __device__ T read_sample(const FFCUDAFrame& frame, int x, int y) ++{ ++ T* ptr = (T*)(frame.data[p] + (y * frame.linesize[p])); ++ return ptr[x]; ++} ++ ++// Per-format read functions ++static __inline__ __device__ ushort3 read_p016(const FFCUDAFrame& frame, int x, int y) ++{ ++ return make_ushort3(read_sample(frame, x, y), ++ read_sample(frame, (x & ~1), y / 2), ++ read_sample(frame, (x & ~1) + 1, y / 2)); ++} ++ ++static __inline__ __device__ ushort3 read_p010(const FFCUDAFrame& frame, int x, int y) ++{ ++ ushort3 val = read_p016(frame, x, y); ++ return make_ushort3(val.x >> 6, ++ val.y >> 6, ++ val.z >> 6); ++} ++ ++static __inline__ __device__ ushort3 read_yuv420p16(const FFCUDAFrame& frame, int x, int y) ++{ ++ return make_ushort3(read_sample(frame, x, y), ++ read_sample(frame, x / 2, y / 2), ++ read_sample(frame, x / 2, y / 2)); ++} ++ ++static __inline__ __device__ ushort3 read_yuv420p10(const FFCUDAFrame& frame, int x, int y) ++{ ++ ushort3 val = read_yuv420p16(frame, x, y); ++ return make_ushort3(val.x >> 6, ++ val.y >> 6, ++ val.z >> 6); ++} ++ ++// Generic read functions ++static __inline__ __device__ ushort3 read_px(const FFCUDAFrame& frame, int x, int y) ++{ ++ if (fmt_src == AV_PIX_FMT_P010) ++ return read_p010(frame, x, y); ++ else if (fmt_src == AV_PIX_FMT_P016) ++ return read_p016(frame, x, y); ++ else ++ return make_ushort3(0, 0, 0); ++} ++ ++static __inline__ __device__ float sample_to_float(unsigned short i) ++{ ++ return (float)i / ((1 << depth_src) - 1); ++} ++ ++static __inline__ __device__ float3 pixel_to_float3(ushort3 flt) ++{ ++ return make_float3(sample_to_float(flt.x), ++ sample_to_float(flt.y), ++ sample_to_float(flt.z)); ++} ++ ++static __inline__ __device__ float3 read_px_flt(const FFCUDAFrame& frame, int x, int y) ++{ ++ return pixel_to_float3(read_px(frame, x, y)); ++} ++ ++// Single-sample write function ++template ++static __inline__ __device__ void write_sample(const FFCUDAFrame& frame, int x, int y, T sample) ++{ ++ T* ptr = (T*)(frame.data[p] + (y * frame.linesize[p])); ++ ptr[x] = sample; ++} ++ ++// Per-format write functions ++static __inline__ __device__ void write_nv12_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) ++{ ++ write_sample<0>(frame, x, y, (unsigned char)a.x); ++ write_sample<0>(frame, x + 1, y, (unsigned char)b.x); ++ write_sample<0>(frame, x, y + 1, (unsigned char)c.x); ++ write_sample<0>(frame, x + 1, y + 1, (unsigned char)d.x); ++ ++ write_sample<1>(frame, (x & ~1), y / 2, (unsigned char)chroma.y); ++ write_sample<1>(frame, (x & ~1) + 1, y / 2, (unsigned char)chroma.z); ++} ++ ++static __inline__ __device__ void write_yuv420p_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) ++{ ++ write_sample<0>(frame, x, y, (unsigned char)a.x); ++ write_sample<0>(frame, x + 1, y, (unsigned char)b.x); ++ write_sample<0>(frame, x, y + 1, (unsigned char)c.x); ++ write_sample<0>(frame, x + 1, y + 1, (unsigned char)d.x); ++ ++ write_sample<1>(frame, x / 2, y / 2, (unsigned char)chroma.y); ++ write_sample<2>(frame, x / 2, y / 2, (unsigned char)chroma.z); ++} ++ ++static __inline__ __device__ void write_p016_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) ++{ ++ write_sample<0>(frame, x, y, (unsigned short)a.x); ++ write_sample<0>(frame, x + 1, y, (unsigned short)b.x); ++ write_sample<0>(frame, x, y + 1, (unsigned short)c.x); ++ write_sample<0>(frame, x + 1, y + 1, (unsigned short)d.x); ++ ++ write_sample<1>(frame, (x & ~1), y / 2, (unsigned short)chroma.y); ++ write_sample<1>(frame, (x & ~1) + 1, y / 2, (unsigned short)chroma.z); ++} ++ ++static __inline__ __device__ void write_p010_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) ++{ ++ write_sample<0>(frame, x, y, (unsigned short)(a.x << 6)); ++ write_sample<0>(frame, x + 1, y, (unsigned short)(b.x << 6)); ++ write_sample<0>(frame, x, y + 1, (unsigned short)(c.x << 6)); ++ write_sample<0>(frame, x + 1, y + 1, (unsigned short)(d.x << 6)); ++ ++ write_sample<1>(frame, (x & ~1), y / 2, (unsigned short)(chroma.y << 6)); ++ write_sample<1>(frame, (x & ~1) + 1, y / 2, (unsigned short)(chroma.z << 6)); ++} ++ ++static __inline__ __device__ void write_yuv420p16_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) ++{ ++ write_sample<0>(frame, x, y, (unsigned short)a.x); ++ write_sample<0>(frame, x + 1, y, (unsigned short)b.x); ++ write_sample<0>(frame, x, y + 1, (unsigned short)c.x); ++ write_sample<0>(frame, x + 1, y + 1, (unsigned short)d.x); ++ ++ write_sample<1>(frame, x / 2, y / 2, (unsigned short)chroma.y); ++ write_sample<2>(frame, x / 2, y / 2, (unsigned short)chroma.z); ++} ++ ++static __inline__ __device__ void write_yuv420p10_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) ++{ ++ write_sample<0>(frame, x, y, (unsigned short)(a.x << 6)); ++ write_sample<0>(frame, x + 1, y, (unsigned short)(b.x << 6)); ++ write_sample<0>(frame, x, y + 1, (unsigned short)(c.x << 6)); ++ write_sample<0>(frame, x + 1, y + 1, (unsigned short)(d.x << 6)); ++ ++ write_sample<1>(frame, x / 2, y / 2, (unsigned short)(chroma.y << 6)); ++ write_sample<2>(frame, x / 2, y / 2, (unsigned short)(chroma.z << 6)); ++} ++ ++// Generic write functions ++static __inline__ __device__ void write_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) ++{ ++ if (fmt_dst == AV_PIX_FMT_YUV420P) ++ write_yuv420p_2x2(frame, x, y, a, b, c, d, chroma); ++ else if (fmt_dst == AV_PIX_FMT_NV12) ++ write_nv12_2x2(frame, x, y, a, b, c, d, chroma); ++ else if (fmt_dst == AV_PIX_FMT_P010) ++ write_p010_2x2(frame, x, y, a, b, c, d, chroma); ++ else if (fmt_dst == AV_PIX_FMT_P016) ++ write_p016_2x2(frame, x, y, a, b, c, d, chroma); ++} ++ ++static __inline__ __device__ unsigned short sample_to_ushort(float flt) ++{ ++ return (unsigned short)(flt * ((1 << depth_dst) - 1)); ++} ++ ++static __inline__ __device__ ushort3 pixel_to_ushort3(float3 flt) ++{ ++ return make_ushort3(sample_to_ushort(flt.x), ++ sample_to_ushort(flt.y), ++ sample_to_ushort(flt.z)); ++} ++ ++static __inline__ __device__ void write_2x2_flt(const FFCUDAFrame& frame, int x, int y, float3 a, float3 b, float3 c, float3 d) ++{ ++ float3 chroma = get_chroma_sample(a, b, c, d); ++ ++ ushort3 ia = pixel_to_ushort3(a); ++ ushort3 ib = pixel_to_ushort3(b); ++ ushort3 ic = pixel_to_ushort3(c); ++ ushort3 id = pixel_to_ushort3(d); ++ ++ ushort3 ichroma = pixel_to_ushort3(chroma); ++ ++ write_2x2(frame, x, y, ia, ib, ic, id, ichroma); ++} ++ ++static __inline__ __device__ float read_dither(cudaTextureObject_t ditherTex, float dither_size, int x, int y) ++{ ++ float dither_size_recip = 1.0f / dither_size; ++ return tex2D(ditherTex, (float)x * dither_size_recip, (float)y * dither_size_recip); ++} ++ ++static __inline__ __device__ float3 read_tex_px_flt(const FFCUDAFrame& frame, int x, int y) ++{ ++ float ncoord_x = (float)(x + 1) * (1.0f / (frame.width + 1)); ++ float ncoord_y = (float)(y + 1) * (1.0f / (frame.height + 1)); ++ ++ float px_y = tex2D(frame.tex[0], x, y); ++ float2 px_uv = tex2D(frame.tex[1], ncoord_x, ncoord_y); ++ ++ return make_float3(px_y, px_uv.x, px_uv.y); ++} ++ ++#endif /* AVFILTER_CUDA_PIXFMT_H */ +Index: FFmpeg/libavfilter/cuda/shared.h +=================================================================== +--- /dev/null ++++ libavfilter/cuda/shared.h +@@ -0,0 +1,33 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVFILTER_CUDA_SHARED_H ++#define AVFILTER_CUDA_SHARED_H ++ ++typedef struct FFCUDAFrame { ++ unsigned char *data[4]; ++ int linesize[4]; ++ int width, height; ++ int planes; ++ ++ float peak; ++ ++ unsigned long long tex[4]; ++} FFCUDAFrame; ++ ++#endif /* AVFILTER_CUDA_SHARED_H */ +Index: FFmpeg/libavfilter/cuda/tonemap.cu +=================================================================== +--- /dev/null ++++ libavfilter/cuda/tonemap.cu +@@ -0,0 +1,579 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "colorspace_common.h" ++#include "pixfmt.h" ++#include "tonemap.h" ++#include "util.h" ++ ++extern __constant__ const enum TonemapAlgorithm tonemap_func; ++extern __constant__ const float tone_param; ++extern __constant__ const float desat_param; ++extern __constant__ const int enable_dither; ++extern __constant__ const float dither_size; ++extern __constant__ const float dither_quantization; ++ ++#define clamp(a, b, c) min(max((a), (b)), (c)) ++#define mix(x, y, a) ((x) + ((y) - (x)) * (a)) ++#define dot3(a, b) ((a).z * (b).z + ((a).y * (b).y + (a).x * (b).x)) ++#define dot4(a, b) ((a).w * (b).w + ((a).z * (b).z + ((a).y * (b).y + (a).x * (b).x))) ++ ++static __inline__ __device__ ++float3 clamp3(const float3 a, const float min_val, const float max_val) { ++ float3 result; ++ result.x = clamp(a.x, min_val, max_val); ++ result.y = clamp(a.y, min_val, max_val); ++ result.z = clamp(a.z, min_val, max_val); ++ return result; ++} ++ ++static __inline__ __device__ ++float get_dithered_y(float y, float d) { ++ return floor(y * dither_quantization + d + 0.5f / (dither_size * dither_size)) * 1.0f / dither_quantization; ++} ++ ++static __inline__ __device__ ++float hable_f(float in) { ++ float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f; ++ return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f; ++} ++ ++static __inline__ __device__ ++float direct(float s, float peak) { ++ return s; ++} ++ ++static __inline__ __device__ ++float linear(float s, float peak) { ++ return s * tone_param / peak; ++} ++ ++static __inline__ __device__ ++float gamma(float s, float peak) { ++ float p = s > 0.05f ? s / peak : 0.05f / peak; ++ float v = __powf(p, 1.0f / tone_param); ++ return s > 0.05f ? v : (s * v / 0.05f); ++} ++ ++static __inline__ __device__ ++float clip(float s, float peak) { ++ return clamp(s * tone_param, 0.0f, 1.0f); ++} ++ ++static __inline__ __device__ ++float reinhard(float s, float peak) { ++ return s / (s + tone_param) * (peak + tone_param) / peak; ++} ++ ++static __inline__ __device__ ++float hable(float s, float peak) { ++ return hable_f(s) / hable_f(peak); ++} ++ ++static __inline__ __device__ ++float mobius(float s, float peak) { ++ float j = tone_param; ++ float a, b; ++ ++ if (s <= j) ++ return s; ++ ++ a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak); ++ b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, FLOAT_EPS); ++ ++ return (b * b + 2.0f * b * j + j * j) / (b - a) * (s + a) / (s + b); ++} ++ ++static __inline__ __device__ ++float bt2390_common(float s, float peak, float dst_peak) { ++ float peak_pq = inverse_eotf_st2084(peak); ++ float scale = peak_pq > 0.0f ? (1.0f / peak_pq) : 1.0f; ++ ++ float s_pq = s * scale; ++ float max_lum = inverse_eotf_st2084(dst_peak) * scale; ++ ++ float ks = 1.5f * max_lum - 0.5f; ++ float tb = (s_pq - ks) / (1.0f - ks); ++ float tb2 = tb * tb; ++ float tb3 = tb2 * tb; ++ float pb = (2.0f * tb3 - 3.0f * tb2 + 1.0f) * ks + ++ (tb3 - 2.0f * tb2 + tb) * (1.0f - ks) + ++ (-2.0f * tb3 + 3.0f * tb2) * max_lum; ++ float sig = mix(pb, s_pq, s_pq < ks); ++ ++ return sig * peak_pq; ++} ++ ++static __inline__ __device__ ++float bt2390(float s, float peak, float dst_peak) { ++ float s_pq = inverse_eotf_st2084(s); ++ return eotf_st2084(bt2390_common(s_pq, peak, dst_peak)); ++} ++ ++static __inline__ __device__ ++float map(float s, float peak, float dst_peak) ++{ ++ switch (tonemap_func) { ++ case TONEMAP_NONE: ++ default: ++ return direct(s, peak); ++ case TONEMAP_LINEAR: ++ return linear(s, peak); ++ case TONEMAP_GAMMA: ++ return gamma(s, peak); ++ case TONEMAP_CLIP: ++ return clip(s, peak); ++ case TONEMAP_REINHARD: ++ return reinhard(s, peak); ++ case TONEMAP_HABLE: ++ return hable(s, peak); ++ case TONEMAP_MOBIUS: ++ return mobius(s, peak); ++ case TONEMAP_BT2390: ++ return bt2390(s, peak, dst_peak); ++ } ++} ++ ++static __inline__ __device__ ++float map_itp(float s, float peak, float dst_peak) ++{ ++ switch (tonemap_func) { ++ default: ++ return inverse_eotf_st2084(map(eotf_st2084(s), peak, dst_peak)); ++ case TONEMAP_BT2390: ++ return bt2390_common(s, peak, dst_peak); ++ } ++} ++ ++static __inline__ __device__ ++float3 map_one_pixel_rgb_mode_max(float3 rgb, const FFCUDAFrame& src, const FFCUDAFrame& dst) { ++ float sig = max(max(rgb.x, max(rgb.y, rgb.z)), FLOAT_EPS); ++ float sig_old = sig; ++ float peak = src.peak; ++ float dst_peak = 1.0f; ++ ++ // Desaturate the color using a coefficient dependent on the signal level ++ if (desat_param > 0.0f) { ++ float luma = get_luma_dst(rgb, luma_dst); ++ float coeff = max(sig - 0.18f, FLOAT_EPS) / max(sig, FLOAT_EPS); ++ coeff = __powf(coeff, 10.0f / desat_param); ++ rgb = mix(rgb, make_float3(luma, luma, luma), make_float3(coeff, coeff, coeff)); ++ } ++ ++ sig = map(sig, peak, dst_peak); ++ sig = min(sig, 1.0f); ++ rgb = rgb * (sig / sig_old); ++ ++ return rgb; ++} ++ ++static __inline__ __device__ ++float3 map_one_pixel_rgb_mode_rgb(float3 rgb, const FFCUDAFrame& src, const FFCUDAFrame& dst) { ++ float3 sig; ++ sig.x = max(rgb.x, FLOAT_EPS); ++ sig.y = max(rgb.y, FLOAT_EPS); ++ sig.z = max(rgb.z, FLOAT_EPS); ++ float3 sig_old = sig; ++ float peak = src.peak; ++ float dst_peak = 1.0f; ++ ++ // Desaturate the color using a coefficient dependent on the signal level ++ if (desat_param > 0.0f) { ++ float sig_max = max(max(rgb.x, max(rgb.y, rgb.z)), FLOAT_EPS); ++ float luma = get_luma_dst(rgb, luma_dst); ++ float coeff = max(sig_max - 0.18f, FLOAT_EPS) / max(sig_max, FLOAT_EPS); ++ coeff = __powf(coeff, 10.0f / desat_param); ++ rgb = mix(rgb, make_float3(luma, luma, luma), make_float3(coeff, coeff, coeff)); ++ } ++ ++ sig.x = map(sig.x, peak, dst_peak); ++ sig.y = map(sig.y, peak, dst_peak); ++ sig.z = map(sig.z, peak, dst_peak); ++ sig.x = min(sig.x, 1.0f); ++ sig.y = min(sig.y, 1.0f); ++ sig.z = min(sig.z, 1.0f); ++ rgb = rgb * (sig / sig_old); ++ ++ return rgb; ++} ++ ++static __inline__ __device__ ++float3 map_one_pixel_rgb_mode_lum(float3 rgb, const FFCUDAFrame& src, const FFCUDAFrame& dst) { ++ float sig = max((rgb.x * 0.2627f + rgb.y * 0.678f + rgb.z * 0.0593f), FLOAT_EPS); ++ float peak = src.peak; ++ sig = min(sig, peak); ++ float sig_old = sig; ++ float dst_peak = 1.0f; ++ ++ // Desaturate the color using a coefficient dependent on the signal level ++ if (desat_param > 0.0f) { ++ float coeff = max(sig - 0.18f, FLOAT_EPS) / max(sig, FLOAT_EPS); ++ coeff = __powf(coeff, 10.0f / desat_param); ++ rgb = mix(rgb, make_float3(sig, sig, sig), make_float3(coeff, coeff, coeff)); ++ } ++ ++ sig = map(sig, peak, dst_peak); ++ rgb = rgb * (sig / sig_old); ++ ++ return rgb; ++} ++ ++static __inline__ __device__ ++float3 map_one_pixel_itp_mode(float3 rgb, const FFCUDAFrame& src, const FFCUDAFrame& dst) { ++ float3 ictcp = lrgb2ictcp(rgb.x, rgb.y, rgb.z); ++ float peak = src.peak; ++ float dst_peak = 1.0f; ++ ictcp.x = max(ictcp.x, FLOAT_EPS); ++ float i_o = ictcp.x; ++ ++ if (desat_param > 0.0f) { ++ float p = eotf_st2084(ictcp.x) - (dst_peak - desat_param) * 0.5f; ++ float coeff = __expf(-(p * p) / (2.0f * peak)); ++ ictcp.y *= coeff; ++ ictcp.z *= coeff; ++ } ++ ++ ictcp.x = map_itp(ictcp.x, peak, dst_peak); ++ ictcp.x = min(ictcp.x, 1.0f); ++ float factor = min(ictcp.x / i_o, i_o / ictcp.x); ++ ictcp.y *= factor; ++ ictcp.z *= factor; ++ ++ return ictcp2lrgb(ictcp.x, ictcp.y, ictcp.z); ++} ++ ++// Map from source space YUV to destination space RGB ++static __inline__ __device__ ++float3 map_to_dst_space_from_yuv(float3 yuv) { ++ float3 c = yuv2lrgb(yuv); ++ return lrgb2lrgb(c); ++} ++ ++static __inline__ __device__ ++float3 map_to_src_space_from_yuv(float3 yuv) { ++ float3 c = yuv2lrgb(yuv); ++ return c; ++} ++ ++static __inline__ __device__ ++float3 map_to_dst_space_from_yuv_dovi(float3 yuv) { ++ float3 c = ycc2rgb(yuv.x, yuv.y, yuv.z); ++ c = lms2rgb(c.x, c.y, c.z); ++ c = lrgb2lrgb(c); ++ return c; ++} ++ ++static __inline__ __device__ ++float3 map_to_dst_space_from_yuv_dovi_fast(float3 yuv) { ++ float3 c = ycc2rgb(yuv.x, yuv.y, yuv.z); ++ c = lms2rgb_fast(c.x, c.y, c.z); ++ c = lrgb2lrgb(c); ++ return c; ++} ++ ++static __inline__ __device__ ++float3 map_to_src_space_from_yuv_dovi(float3 yuv) { ++ float3 c = ycc2rgb(yuv.x, yuv.y, yuv.z); ++ c = lms2rgb(c.x, c.y, c.z); ++ return c; ++} ++ ++static __inline__ __device__ ++float3 map_to_src_space_from_yuv_dovi_fast(float3 yuv) { ++ float3 c = ycc2rgb(yuv.x, yuv.y, yuv.z); ++ c = lms2rgb_fast(c.x, c.y, c.z); ++ return c; ++} ++ ++static __inline__ __device__ ++float reshape_poly(float s, float4 coeffs) { ++ return (coeffs.z * s + coeffs.y) * s + coeffs.x; ++} ++ ++static __inline__ __device__ ++float reshape_mmr(float3 sig, float4 coeffs, float4 *dovi_mmr, ++ int dovi_mmr_single, int dovi_min_order, int dovi_max_order) ++{ ++ int mmr_idx = dovi_mmr_single ? 0 : (int)coeffs.y; ++ int order = (int)coeffs.w; ++ float3 sigXxyz = make_float3(sig.x, sig.x, sig.y) * make_float3(sig.y, sig.z, sig.z); ++ float4 sigX = make_float4(sigXxyz.x, sigXxyz.y, sigXxyz.z, sigXxyz.x * sig.z); ++ float4 mmr; ++ ++ float s = coeffs.x; ++ mmr = dovi_mmr[mmr_idx + 0]; ++ s += dot3(make_float3(mmr.x, mmr.y, mmr.z), sig); ++ mmr = dovi_mmr[mmr_idx + 1]; ++ s += dot4(mmr, sigX); ++ ++ int t = dovi_max_order >= 2 && (dovi_min_order >= 2 || order >= 2); ++ if (t) { ++ float3 sig2 = sig * sig; ++ float4 sigX2 = sigX * sigX; ++ mmr = dovi_mmr[mmr_idx + 2]; ++ s += dot3(make_float3(mmr.x, mmr.y, mmr.z), sig2); ++ mmr = dovi_mmr[mmr_idx + 3]; ++ s += dot4(mmr, sigX2); ++ t = dovi_max_order == 3 && (dovi_min_order == 3 || order >= 3); ++ if (t) { ++ mmr = dovi_mmr[mmr_idx + 4]; ++ s += dot3(make_float3(mmr.x, mmr.y, mmr.z), sig2 * sig); ++ mmr = dovi_mmr[mmr_idx + 5]; ++ s += dot4(mmr, sigX2 * sigX); ++ } ++ } ++ ++ return s; ++} ++ ++static __inline__ __device__ ++float3 reshape_dovi_yuv(float3 yuv, ++ float *src_dovi_params, float *src_dovi_pivots, ++ float4 *src_dovi_coeffs, float4 *src_dovi_mmr) ++{ ++ int i; ++ float s; ++ float3 sig = make_float3(clamp(yuv.x, 0.0f, 1.0f), ++ clamp(yuv.y, 0.0f, 1.0f), ++ clamp(yuv.z, 0.0f, 1.0f)); ++ float sig_arr[3] = {sig.x, sig.y, sig.z}; ++ float4 coeffs; ++ int dovi_num_pivots, dovi_has_mmr, dovi_has_poly; ++ int dovi_mmr_single, dovi_min_order, dovi_max_order; ++ float dovi_lo, dovi_hi; ++ float *dovi_params; ++ float *dovi_pivots; ++ float4 *dovi_coeffs, *dovi_mmr; ++ ++#pragma unroll ++ for (i = 0; i < 3; i++) { ++ dovi_params = src_dovi_params + i*8; ++ dovi_pivots = src_dovi_pivots + i*8; ++ dovi_coeffs = src_dovi_coeffs + i*8; ++ dovi_mmr = src_dovi_mmr + i*48; ++ dovi_num_pivots = dovi_params[0]; ++ dovi_has_mmr = dovi_params[1]; ++ dovi_has_poly = dovi_params[2]; ++ dovi_mmr_single = dovi_params[3]; ++ dovi_min_order = dovi_params[4]; ++ dovi_max_order = dovi_params[5]; ++ dovi_lo = dovi_params[6]; ++ dovi_hi = dovi_params[7]; ++ ++ s = sig_arr[i]; ++ coeffs = dovi_coeffs[0]; ++ ++ if (i == 0 && dovi_num_pivots > 2) { ++ float t0 = s >= dovi_pivots[0], t1 = s >= dovi_pivots[1]; ++ float t2 = s >= dovi_pivots[2], t3 = s >= dovi_pivots[3]; ++ float t4 = s >= dovi_pivots[4], t5 = s >= dovi_pivots[5], t6 = s >= dovi_pivots[6]; ++ ++ coeffs = mix(mix(mix(dovi_coeffs[0], dovi_coeffs[1], make_float4(t0, t0, t0, t0)), ++ mix(dovi_coeffs[2], dovi_coeffs[3], make_float4(t2, t2, t2, t2)), ++ make_float4(t1, t1, t1, t1)), ++ mix(mix(dovi_coeffs[4], dovi_coeffs[5], make_float4(t4, t4, t4, t4)), ++ mix(dovi_coeffs[6], dovi_coeffs[7], make_float4(t6, t6, t6, t6)), ++ make_float4(t5, t5, t5, t5)), ++ make_float4(t3, t3, t3, t3)); ++ } ++ ++ int has_mmr_poly = dovi_has_mmr && dovi_has_poly; ++ ++ if ((has_mmr_poly && coeffs.w == 0.0f) || (!has_mmr_poly && dovi_has_poly)) ++ s = reshape_poly(s, coeffs); ++ else ++ s = reshape_mmr(sig, coeffs, dovi_mmr, ++ dovi_mmr_single, dovi_min_order, dovi_max_order); ++ ++ sig_arr[i] = clamp(s, dovi_lo, dovi_hi); ++ } ++ ++ return make_float3(sig_arr[0], sig_arr[1], sig_arr[2]); ++} ++ ++extern "C" { ++ ++#define _READER \ ++ int xi = blockIdx.x * blockDim.x + threadIdx.x; \ ++ int yi = blockIdx.y * blockDim.y + threadIdx.y; \ ++ int x = 2 * xi; \ ++ int y = 2 * yi; \ ++ if (y + 1 >= src.height || x + 1 >= src.width) \ ++ return; \ ++ float3 yuv0 = read_tex_px_flt(src, x, y); \ ++ float3 yuv1 = read_tex_px_flt(src, x + 1, y); \ ++ float3 yuv2 = read_tex_px_flt(src, x, y + 1); \ ++ float3 yuv3 = read_tex_px_flt(src, x + 1, y + 1); ++ ++#define _RESHAPE \ ++ float *dovi_params = doviBuf; \ ++ float *dovi_pivots = doviBuf + 24; \ ++ float4 *dovi_coeffs = (float4 *)(doviBuf + 48); \ ++ float4 *dovi_mmr = (float4 *)(doviBuf + 144); \ ++ yuv0 = reshape_dovi_yuv(yuv0, dovi_params, dovi_pivots, dovi_coeffs, dovi_mmr); \ ++ yuv1 = reshape_dovi_yuv(yuv1, dovi_params, dovi_pivots, dovi_coeffs, dovi_mmr); \ ++ yuv2 = reshape_dovi_yuv(yuv2, dovi_params, dovi_pivots, dovi_coeffs, dovi_mmr); \ ++ yuv3 = reshape_dovi_yuv(yuv3, dovi_params, dovi_pivots, dovi_coeffs, dovi_mmr); ++ ++#define _YUV2RGB \ ++ float3 c0 = map_to_dst_space_from_yuv(yuv0); \ ++ float3 c1 = map_to_dst_space_from_yuv(yuv1); \ ++ float3 c2 = map_to_dst_space_from_yuv(yuv2); \ ++ float3 c3 = map_to_dst_space_from_yuv(yuv3); ++ ++#define _YUV2RGB_S \ ++ float3 c0 = map_to_src_space_from_yuv(yuv0); \ ++ float3 c1 = map_to_src_space_from_yuv(yuv1); \ ++ float3 c2 = map_to_src_space_from_yuv(yuv2); \ ++ float3 c3 = map_to_src_space_from_yuv(yuv3); ++ ++#define _YCC2RGB \ ++ float3 c0 = map_to_dst_space_from_yuv_dovi(yuv0); \ ++ float3 c1 = map_to_dst_space_from_yuv_dovi(yuv1); \ ++ float3 c2 = map_to_dst_space_from_yuv_dovi(yuv2); \ ++ float3 c3 = map_to_dst_space_from_yuv_dovi(yuv3); ++ ++#define _YCC2RGB_F \ ++ float3 c0 = map_to_dst_space_from_yuv_dovi_fast(yuv0); \ ++ float3 c1 = map_to_dst_space_from_yuv_dovi_fast(yuv1); \ ++ float3 c2 = map_to_dst_space_from_yuv_dovi_fast(yuv2); \ ++ float3 c3 = map_to_dst_space_from_yuv_dovi_fast(yuv3); ++ ++#define _YCC2RGB_S \ ++ float3 c0 = map_to_src_space_from_yuv_dovi(yuv0); \ ++ float3 c1 = map_to_src_space_from_yuv_dovi(yuv1); \ ++ float3 c2 = map_to_src_space_from_yuv_dovi(yuv2); \ ++ float3 c3 = map_to_src_space_from_yuv_dovi(yuv3); ++ ++#define _YCC2RGB_FS \ ++ float3 c0 = map_to_src_space_from_yuv_dovi_fast(yuv0); \ ++ float3 c1 = map_to_src_space_from_yuv_dovi_fast(yuv1); \ ++ float3 c2 = map_to_src_space_from_yuv_dovi_fast(yuv2); \ ++ float3 c3 = map_to_src_space_from_yuv_dovi_fast(yuv3); ++ ++#define _TONEMAP_MAX \ ++ c0 = map_one_pixel_rgb_mode_max(c0, src, dst); \ ++ c1 = map_one_pixel_rgb_mode_max(c1, src, dst); \ ++ c2 = map_one_pixel_rgb_mode_max(c2, src, dst); \ ++ c3 = map_one_pixel_rgb_mode_max(c3, src, dst); ++ ++#define _TONEMAP_RGB \ ++ c0 = map_one_pixel_rgb_mode_rgb(c0, src, dst); \ ++ c1 = map_one_pixel_rgb_mode_rgb(c1, src, dst); \ ++ c2 = map_one_pixel_rgb_mode_rgb(c2, src, dst); \ ++ c3 = map_one_pixel_rgb_mode_rgb(c3, src, dst); ++ ++#define _TONEMAP_LUM \ ++ c0 = map_one_pixel_rgb_mode_lum(c0, src, dst); \ ++ c1 = map_one_pixel_rgb_mode_lum(c1, src, dst); \ ++ c2 = map_one_pixel_rgb_mode_lum(c2, src, dst); \ ++ c3 = map_one_pixel_rgb_mode_lum(c3, src, dst); ++ ++#define _TONEMAP_ITP \ ++ c0 = map_one_pixel_itp_mode(c0, src, dst); \ ++ c1 = map_one_pixel_itp_mode(c1, src, dst); \ ++ c2 = map_one_pixel_itp_mode(c2, src, dst); \ ++ c3 = map_one_pixel_itp_mode(c3, src, dst); ++ ++#define _RGB2YUV \ ++ yuv0 = lrgb2yuv(c0); \ ++ yuv1 = lrgb2yuv(c1); \ ++ yuv2 = lrgb2yuv(c2); \ ++ yuv3 = lrgb2yuv(c3); ++ ++#define _RGB2YUV_S \ ++ c0 = lrgb2lrgb(c0); \ ++ c1 = lrgb2lrgb(c1); \ ++ c2 = lrgb2lrgb(c2); \ ++ c3 = lrgb2lrgb(c3); \ ++ if (!rgb2rgb_passthrough) { \ ++ c0 = gamut_compress(c0); \ ++ c1 = gamut_compress(c1); \ ++ c2 = gamut_compress(c2); \ ++ c3 = gamut_compress(c3); \ ++ } \ ++ yuv0 = lrgb2yuv(clamp3(c0, 0.0f, 1.0f)); \ ++ yuv1 = lrgb2yuv(clamp3(c1, 0.0f, 1.0f)); \ ++ yuv2 = lrgb2yuv(clamp3(c2, 0.0f, 1.0f)); \ ++ yuv3 = lrgb2yuv(clamp3(c3, 0.0f, 1.0f)); ++ ++#define _RGB2YUV_FS \ ++ c0 = clamp3(lrgb2lrgb(c0), 0.0f, 1.0f); \ ++ c1 = clamp3(lrgb2lrgb(c1), 0.0f, 1.0f); \ ++ c2 = clamp3(lrgb2lrgb(c2), 0.0f, 1.0f); \ ++ c3 = clamp3(lrgb2lrgb(c3), 0.0f, 1.0f); \ ++ yuv0 = lrgb2yuv(c0); \ ++ yuv1 = lrgb2yuv(c1); \ ++ yuv2 = lrgb2yuv(c2); \ ++ yuv3 = lrgb2yuv(c3); ++ ++#define _DITHER \ ++ float d = read_dither(ditherTex, dither_size, xi, yi); \ ++ yuv0.x = get_dithered_y(yuv0.x, d); \ ++ yuv1.x = get_dithered_y(yuv1.x, d); \ ++ yuv2.x = get_dithered_y(yuv2.x, d); \ ++ yuv3.x = get_dithered_y(yuv3.x, d); ++ ++#define _WRITER \ ++ write_2x2_flt(dst, x, y, yuv0, yuv1, yuv2, yuv3); ++ ++#define TONEMAP_VARIANT(NAME, READER, RESHAPE, YUV2RGB, TONEMAP, RGB2YUV, DITHER, WRITER) \ ++__global__ void tonemap ## NAME( \ ++ FFCUDAFrame src, FFCUDAFrame dst, \ ++ cudaTextureObject_t ditherTex, float *doviBuf) \ ++{ \ ++ READER \ ++ RESHAPE \ ++ YUV2RGB \ ++ TONEMAP \ ++ RGB2YUV \ ++ DITHER \ ++ WRITER \ ++} ++ ++TONEMAP_VARIANT(_max, _READER, , _YUV2RGB, _TONEMAP_MAX, _RGB2YUV, , _WRITER) ++TONEMAP_VARIANT(_max_d, _READER, , _YUV2RGB, _TONEMAP_MAX, _RGB2YUV, _DITHER, _WRITER) ++TONEMAP_VARIANT(_rgb, _READER, , _YUV2RGB, _TONEMAP_RGB, _RGB2YUV, , _WRITER) ++TONEMAP_VARIANT(_rgb_d, _READER, , _YUV2RGB, _TONEMAP_RGB, _RGB2YUV, _DITHER, _WRITER) ++TONEMAP_VARIANT(_lum, _READER, , _YUV2RGB_S, _TONEMAP_LUM, _RGB2YUV_S, , _WRITER) ++TONEMAP_VARIANT(_lum_d, _READER, , _YUV2RGB_S, _TONEMAP_LUM, _RGB2YUV_S, _DITHER, _WRITER) ++TONEMAP_VARIANT(_itp, _READER, , _YUV2RGB_S, _TONEMAP_ITP, _RGB2YUV_S, , _WRITER) ++TONEMAP_VARIANT(_itp_d, _READER, , _YUV2RGB_S, _TONEMAP_ITP, _RGB2YUV_S, _DITHER, _WRITER) ++ ++TONEMAP_VARIANT(_dovi_max, _READER, _RESHAPE, _YCC2RGB, _TONEMAP_MAX, _RGB2YUV, , _WRITER) ++TONEMAP_VARIANT(_dovi_max_d, _READER, _RESHAPE, _YCC2RGB, _TONEMAP_MAX, _RGB2YUV, _DITHER, _WRITER) ++TONEMAP_VARIANT(_dovi_rgb, _READER, _RESHAPE, _YCC2RGB, _TONEMAP_RGB, _RGB2YUV, , _WRITER) ++TONEMAP_VARIANT(_dovi_rgb_d, _READER, _RESHAPE, _YCC2RGB, _TONEMAP_RGB, _RGB2YUV, _DITHER, _WRITER) ++TONEMAP_VARIANT(_dovi_lum, _READER, _RESHAPE, _YCC2RGB_S, _TONEMAP_LUM, _RGB2YUV_S, , _WRITER) ++TONEMAP_VARIANT(_dovi_lum_d, _READER, _RESHAPE, _YCC2RGB_S, _TONEMAP_LUM, _RGB2YUV_S, _DITHER, _WRITER) ++TONEMAP_VARIANT(_dovi_itp, _READER, _RESHAPE, _YCC2RGB_S, _TONEMAP_ITP, _RGB2YUV_S, , _WRITER) ++TONEMAP_VARIANT(_dovi_itp_d, _READER, _RESHAPE, _YCC2RGB_S, _TONEMAP_ITP, _RGB2YUV_S, _DITHER, _WRITER) ++ ++TONEMAP_VARIANT(_dovi_max_f, _READER, _RESHAPE, _YCC2RGB_F, _TONEMAP_MAX, _RGB2YUV, , _WRITER) ++TONEMAP_VARIANT(_dovi_max_d_f, _READER, _RESHAPE, _YCC2RGB_F, _TONEMAP_MAX, _RGB2YUV, _DITHER, _WRITER) ++TONEMAP_VARIANT(_dovi_rgb_f, _READER, _RESHAPE, _YCC2RGB_F, _TONEMAP_RGB, _RGB2YUV, , _WRITER) ++TONEMAP_VARIANT(_dovi_rgb_d_f, _READER, _RESHAPE, _YCC2RGB_F, _TONEMAP_RGB, _RGB2YUV, _DITHER, _WRITER) ++TONEMAP_VARIANT(_dovi_lum_f, _READER, _RESHAPE, _YCC2RGB_FS, _TONEMAP_LUM, _RGB2YUV_FS, , _WRITER) ++TONEMAP_VARIANT(_dovi_lum_d_f, _READER, _RESHAPE, _YCC2RGB_FS, _TONEMAP_LUM, _RGB2YUV_FS, _DITHER, _WRITER) ++TONEMAP_VARIANT(_dovi_itp_f, _READER, _RESHAPE, _YCC2RGB_FS, _TONEMAP_ITP, _RGB2YUV_FS, , _WRITER) ++TONEMAP_VARIANT(_dovi_itp_d_f, _READER, _RESHAPE, _YCC2RGB_FS, _TONEMAP_ITP, _RGB2YUV_FS, _DITHER, _WRITER) ++ ++TONEMAP_VARIANT(_dovi_pq, _READER, _RESHAPE, _YCC2RGB, , _RGB2YUV, , _WRITER) ++TONEMAP_VARIANT(_dovi_pq_f, _READER, _RESHAPE, _YCC2RGB_F, , _RGB2YUV, , _WRITER) ++ ++} +Index: FFmpeg/libavfilter/cuda/tonemap.h +=================================================================== +--- /dev/null ++++ libavfilter/cuda/tonemap.h +@@ -0,0 +1,43 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVFILTER_CUDA_TONEMAP_H ++#define AVFILTER_CUDA_TONEMAP_H ++ ++enum TonemapAlgorithm { ++ TONEMAP_NONE, ++ TONEMAP_LINEAR, ++ TONEMAP_GAMMA, ++ TONEMAP_CLIP, ++ TONEMAP_REINHARD, ++ TONEMAP_HABLE, ++ TONEMAP_MOBIUS, ++ TONEMAP_BT2390, ++ TONEMAP_COUNT, ++}; ++ ++enum TonemapMode { ++ TONEMAP_MODE_MAX, ++ TONEMAP_MODE_RGB, ++ TONEMAP_MODE_LUM, ++ TONEMAP_MODE_ITP, ++ TONEMAP_MODE_AUTO, ++ TONEMAP_MODE_COUNT, ++}; ++ ++#endif /* AVFILTER_CUDA_TONEMAP_H */ +Index: FFmpeg/libavfilter/cuda/util.h +=================================================================== +--- /dev/null ++++ libavfilter/cuda/util.h +@@ -0,0 +1,86 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVFILTER_CUDA_UTIL_H ++#define AVFILTER_CUDA_UTIL_H ++ ++static inline __device__ float3 operator+(const float3 &a, const float3 &b) { ++ return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); ++} ++ ++static inline __device__ float3 operator+(const float3 &a, float b) { ++ return make_float3(a.x + b, a.y + b, a.z + b); ++} ++ ++static inline __device__ float3 operator-(const float3 &a, const float3 &b) { ++ return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); ++} ++ ++static inline __device__ float3 operator-(const float3 &a, float b) { ++ return make_float3(a.x - b, a.y - b, a.z - b); ++} ++ ++static inline __device__ float3 operator*(const float3 &a, const float3 &b) { ++ return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); ++} ++ ++static inline __device__ float3 operator*(const float3 &a, float b) { ++ return make_float3(a.x * b, a.y * b, a.z * b); ++} ++ ++static inline __device__ float3 operator/(const float3 &a, const float3 &b) { ++ return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); ++} ++ ++static inline __device__ float3 operator/(const float3 &a, float b) { ++ return make_float3(a.x / b, a.y / b, a.z / b); ++} ++ ++static inline __device__ float4 operator+(const float4 &a, const float4 &b) { ++ return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); ++} ++ ++static inline __device__ float4 operator+(const float4 &a, float b) { ++ return make_float4(a.x + b, a.y + b, a.z + b, a.w + b); ++} ++ ++static inline __device__ float4 operator-(const float4 &a, const float4 &b) { ++ return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); ++} ++ ++static inline __device__ float4 operator-(const float4 &a, float b) { ++ return make_float4(a.x - b, a.y - b, a.z - b, a.w - b); ++} ++ ++static inline __device__ float4 operator*(const float4 &a, const float4 &b) { ++ return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); ++} ++ ++static inline __device__ float4 operator*(const float4 &a, float b) { ++ return make_float4(a.x * b, a.y * b, a.z * b, a.w * b); ++} ++ ++static inline __device__ float4 operator/(const float4 &a, const float4 &b) { ++ return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); ++} ++ ++static inline __device__ float4 operator/(const float4 &a, float b) { ++ return make_float4(a.x / b, a.y / b, a.z / b, a.w / b); ++} ++ ++#endif /* AVFILTER_CUDA_UTIL_H */ +Index: FFmpeg/libavfilter/vf_tonemap_cuda.c +=================================================================== +--- /dev/null ++++ libavfilter/vf_tonemap_cuda.c +@@ -0,0 +1,1127 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include ++#include ++#include ++ ++#include "libavutil/avassert.h" ++#include "libavutil/avstring.h" ++#include "libavutil/bprint.h" ++#include "libavutil/common.h" ++#include "libavutil/hwcontext.h" ++#include "libavutil/hwcontext_cuda_internal.h" ++#include "libavutil/cuda_check.h" ++#include "libavutil/internal.h" ++#include "libavutil/opt.h" ++#include "libavutil/pixdesc.h" ++ ++#include "avfilter.h" ++#include "colorspace.h" ++#include "cuda/host_util.h" ++#include "cuda/shared.h" ++#include "cuda/tonemap.h" ++#include "internal.h" ++#include "scale_eval.h" ++#include "video.h" ++#include "dither_matrix.h" ++ ++static const enum AVPixelFormat supported_formats[] = { ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_P010, ++ AV_PIX_FMT_P016 ++}; ++ ++#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) ++#define ALIGN_UP(a, b) (((a) + (b) - 1) & ~((b) - 1)) ++#define NUM_BUFFERS 2 ++#define BLOCKX 32 ++#define BLOCKY 16 ++ ++#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x) ++ ++typedef struct TonemapCUDAContext { ++ const AVClass *class; ++ ++ AVCUDADeviceContext *hwctx; ++ ++ enum AVPixelFormat in_fmt, out_fmt; ++ ++ enum AVColorTransferCharacteristic trc, in_trc, out_trc; ++ enum AVColorSpace spc, in_spc, out_spc; ++ enum AVColorPrimaries pri, in_pri, out_pri; ++ enum AVColorRange range, in_range, out_range; ++ enum AVChromaLocation in_chroma_loc, out_chroma_loc; ++ ++ AVBufferRef *frames_ctx; ++ AVFrame *frame; ++ ++ AVFrame *tmp_frame; ++ ++ /** ++ * Output sw format. AV_PIX_FMT_NONE for no conversion. ++ */ ++ enum AVPixelFormat format; ++ char *format_str; ++ ++ CUcontext cu_ctx; ++ CUmodule cu_module; ++ ++ CUfunction cu_func_tm; ++ CUfunction cu_func_dovi; ++ CUfunction cu_func_dovi_pq; ++ ++ CUdeviceptr ditherBuffer; ++ CUtexObject ditherTex; ++ ++#define params_cnt 8 ++#define pivots_cnt (7+1) ++#define coeffs_cnt 8*4 ++#define mmr_cnt 8*6*4 ++#define params_sz params_cnt*sizeof(float) ++#define pivots_sz pivots_cnt*sizeof(float) ++#define coeffs_sz coeffs_cnt*sizeof(float) ++#define mmr_sz mmr_cnt*sizeof(float) ++ CUdeviceptr doviBuffer; ++ struct DoviMetadata *dovi; ++ float *dovi_pbuf; ++ ++ enum TonemapAlgorithm tonemap; ++ enum TonemapMode tonemap_mode; ++ int apply_dovi; ++ int tradeoff; ++ int init_with_dovi; ++ double ref_white; ++ double param; ++ double desat_param; ++ double peak; ++ double dst_peak; ++ double scene_threshold; ++ ++ const AVPixFmtDescriptor *in_desc, *out_desc; ++} TonemapCUDAContext; ++ ++static av_cold int init(AVFilterContext *ctx) ++{ ++ TonemapCUDAContext *s = ctx->priv; ++ ++ if (!strcmp(s->format_str, "same")) { ++ s->format = AV_PIX_FMT_NONE; ++ } else { ++ s->format = av_get_pix_fmt(s->format_str); ++ if (s->format == AV_PIX_FMT_NONE) { ++ av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str); ++ return AVERROR(EINVAL); ++ } ++ } ++ ++ s->frame = av_frame_alloc(); ++ if (!s->frame) ++ return AVERROR(ENOMEM); ++ ++ s->tmp_frame = av_frame_alloc(); ++ if (!s->tmp_frame) ++ return AVERROR(ENOMEM); ++ ++ s->dovi = NULL; ++ s->doviBuffer = 0; ++ ++ return 0; ++} ++ ++static av_cold void uninit_dovi(AVFilterContext *ctx) ++{ ++ TonemapCUDAContext *s = ctx->priv; ++ ++ if (s->hwctx) { ++ CudaFunctions *cu = s->hwctx->internal->cuda_dl; ++ CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx; ++ ++ CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); ++ ++ if (s->doviBuffer) { ++ CHECK_CU(cu->cuMemFree(s->doviBuffer)); ++ s->doviBuffer = 0; ++ } ++ ++ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); ++ } ++ ++ if (s->dovi) ++ av_freep(&s->dovi); ++ if (s->dovi_pbuf) ++ av_freep(&s->dovi_pbuf); ++ ++ s->init_with_dovi = 0; ++} ++ ++static av_cold void uninit_common(AVFilterContext *ctx) ++{ ++ TonemapCUDAContext *s = ctx->priv; ++ ++ if (s->hwctx) { ++ CudaFunctions *cu = s->hwctx->internal->cuda_dl; ++ CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx; ++ ++ CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); ++ ++ if (s->ditherTex) { ++ CHECK_CU(cu->cuTexObjectDestroy(s->ditherTex)); ++ s->ditherTex = 0; ++ } ++ if (s->ditherBuffer) { ++ CHECK_CU(cu->cuMemFree(s->ditherBuffer)); ++ s->ditherBuffer = 0; ++ } ++ if (s->cu_module) { ++ CHECK_CU(cu->cuModuleUnload(s->cu_module)); ++ s->cu_func_tm = NULL; ++ s->cu_func_dovi = NULL; ++ s->cu_func_dovi_pq = NULL; ++ s->cu_module = NULL; ++ } ++ ++ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); ++ } ++} ++ ++static av_cold void uninit(AVFilterContext *ctx) ++{ ++ TonemapCUDAContext *s = ctx->priv; ++ ++ uninit_common(ctx); ++ uninit_dovi(ctx); ++ ++ av_frame_free(&s->frame); ++ av_buffer_unref(&s->frames_ctx); ++ av_frame_free(&s->tmp_frame); ++} ++ ++static av_cold int setup_dither(AVFilterContext *ctx) ++{ ++ TonemapCUDAContext *s = ctx->priv; ++ AVFilterLink *inlink = ctx->inputs[0]; ++ AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; ++ AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; ++ CudaFunctions *cu = device_hwctx->internal->cuda_dl; ++ CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; ++ int ret = 0; ++ ++ CUDA_MEMCPY2D cpy = { ++ .srcMemoryType = CU_MEMORYTYPE_HOST, ++ .dstMemoryType = CU_MEMORYTYPE_DEVICE, ++ .srcHost = ff_fruit_dither_matrix, ++ .dstDevice = 0, ++ .srcPitch = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]), ++ .dstPitch = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]), ++ .WidthInBytes = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]), ++ .Height = ff_fruit_dither_size, ++ }; ++ ++#ifndef CU_TRSF_NORMALIZED_COORDINATES ++ #define CU_TRSF_NORMALIZED_COORDINATES 2 ++#endif ++ CUDA_TEXTURE_DESC tex_desc = { ++ .addressMode = { CU_TR_ADDRESS_MODE_WRAP }, ++ .filterMode = CU_TR_FILTER_MODE_POINT, ++ .flags = CU_TRSF_NORMALIZED_COORDINATES, ++ }; ++ ++ CUDA_RESOURCE_DESC res_desc = { ++ .resType = CU_RESOURCE_TYPE_PITCH2D, ++ .res.pitch2D.format = CU_AD_FORMAT_UNSIGNED_INT16, ++ .res.pitch2D.numChannels = 1, ++ .res.pitch2D.width = ff_fruit_dither_size, ++ .res.pitch2D.height = ff_fruit_dither_size, ++ .res.pitch2D.pitchInBytes = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]), ++ .res.pitch2D.devPtr = 0, ++ }; ++ ++ av_assert0(sizeof(ff_fruit_dither_matrix) == sizeof(ff_fruit_dither_matrix[0]) * ff_fruit_dither_size * ff_fruit_dither_size); ++ ++ if ((ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx))) < 0) ++ return ret; ++ ++ if ((ret = CHECK_CU(cu->cuMemAlloc(&s->ditherBuffer, sizeof(ff_fruit_dither_matrix)))) < 0) ++ goto fail; ++ ++ res_desc.res.pitch2D.devPtr = cpy.dstDevice = s->ditherBuffer; ++ ++ if ((ret = CHECK_CU(cu->cuMemcpy2D(&cpy))) < 0) ++ goto fail; ++ ++ if ((ret = CHECK_CU(cu->cuTexObjectCreate(&s->ditherTex, &res_desc, &tex_desc, NULL))) < 0) ++ goto fail; ++ ++fail: ++ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); ++ return ret; ++} ++ ++static av_cold int init_stage(TonemapCUDAContext *s, AVBufferRef *device_ctx, ++ AVFilterLink *outlink) ++{ ++ AVBufferRef *out_ref = NULL; ++ AVHWFramesContext *out_ctx; ++ int ret; ++ ++ out_ref = av_hwframe_ctx_alloc(device_ctx); ++ if (!out_ref) ++ return AVERROR(ENOMEM); ++ out_ctx = (AVHWFramesContext*)out_ref->data; ++ ++ out_ctx->format = AV_PIX_FMT_CUDA; ++ out_ctx->sw_format = s->out_fmt; ++ out_ctx->width = FFALIGN(outlink->w, 32); ++ out_ctx->height = FFALIGN(outlink->h, 32); ++ ++ ret = av_hwframe_ctx_init(out_ref); ++ if (ret < 0) ++ goto fail; ++ ++ av_frame_unref(s->frame); ++ ret = av_hwframe_get_buffer(out_ref, s->frame, 0); ++ if (ret < 0) ++ goto fail; ++ ++ s->frame->width = outlink->w; ++ s->frame->height = outlink->h; ++ ++ av_buffer_unref(&s->frames_ctx); ++ s->frames_ctx = out_ref; ++ ++ return 0; ++fail: ++ av_buffer_unref(&out_ref); ++ return ret; ++} ++ ++static int format_is_supported(enum AVPixelFormat fmt) ++{ ++ int i; ++ ++ for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) ++ if (supported_formats[i] == fmt) ++ return 1; ++ return 0; ++} ++ ++static av_cold int init_processing_chain(AVFilterContext *ctx, AVFilterLink *outlink) ++{ ++ TonemapCUDAContext *s = ctx->priv; ++ ++ AVHWFramesContext *in_frames_ctx; ++ ++ enum AVPixelFormat in_format; ++ enum AVPixelFormat out_format; ++ const AVPixFmtDescriptor *in_desc; ++ const AVPixFmtDescriptor *out_desc; ++ int ret; ++ ++ /* check that we have a hw context */ ++ if (!ctx->inputs[0]->hw_frames_ctx) { ++ av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n"); ++ return AVERROR(EINVAL); ++ } ++ in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data; ++ in_format = in_frames_ctx->sw_format; ++ out_format = (s->format == AV_PIX_FMT_NONE) ? in_format : s->format; ++ in_desc = av_pix_fmt_desc_get(in_format); ++ out_desc = av_pix_fmt_desc_get(out_format); ++ ++ if (!format_is_supported(in_format)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n", ++ av_get_pix_fmt_name(in_format)); ++ return AVERROR(ENOSYS); ++ } ++ if (!format_is_supported(out_format)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n", ++ av_get_pix_fmt_name(out_format)); ++ return AVERROR(ENOSYS); ++ } ++ if (!(in_desc->comp[0].depth == 10 || ++ in_desc->comp[0].depth == 16)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported input format depth: %d\n", ++ in_desc->comp[0].depth); ++ return AVERROR(ENOSYS); ++ } ++ ++ s->in_fmt = in_format; ++ s->out_fmt = out_format; ++ s->in_desc = in_desc; ++ s->out_desc = out_desc; ++ ++ ret = init_stage(s, in_frames_ctx->device_ref, outlink); ++ if (ret < 0) ++ return ret; ++ ++ ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->frames_ctx); ++ if (!ctx->outputs[0]->hw_frames_ctx) ++ return AVERROR(ENOMEM); ++ ++ return 0; ++} ++ ++static const double dovi_lms2rgb_matrix[3][3] = ++{ ++ { 3.06441879, -2.16597676, 0.10155818}, ++ {-0.65612108, 1.78554118, -0.12943749}, ++ { 0.01736321, -0.04725154, 1.03004253}, ++}; ++ ++static int get_rgb2rgb_matrix(enum AVColorPrimaries in, enum AVColorPrimaries out, ++ double rgb2rgb[3][3]) { ++ double rgb2xyz[3][3], xyz2rgb[3][3]; ++ ++ const AVColorPrimariesDesc *in_primaries = av_csp_primaries_desc_from_id(in); ++ const AVColorPrimariesDesc *out_primaries = av_csp_primaries_desc_from_id(out); ++ ++ if (!in_primaries || !out_primaries) ++ return AVERROR(EINVAL); ++ ++ ff_fill_rgb2xyz_table(&out_primaries->prim, &out_primaries->wp, rgb2xyz); ++ ff_matrix_invert_3x3(rgb2xyz, xyz2rgb); ++ ff_fill_rgb2xyz_table(&in_primaries->prim, &in_primaries->wp, rgb2xyz); ++ ff_matrix_mul_3x3(rgb2rgb, rgb2xyz, xyz2rgb); ++ ++ return 0; ++} ++ ++static void update_dovi_buf(AVFilterContext *ctx) ++{ ++ TonemapCUDAContext *s = ctx->priv; ++ float coeffs_data[8][4] = {0}; ++ float mmr_packed_data[8*6][4] = {0}; ++ int c, i, j, k; ++ ++ for (c = 0; c < 3; c++) { ++ int has_poly = 0, has_mmr = 0, mmr_single = 1; ++ int mmr_idx = 0, min_order = 3, max_order = 1; ++ const struct ReshapeData *comp = &s->dovi->comp[c]; ++ if (!comp->num_pivots) ++ continue; ++ av_assert0(comp->num_pivots >= 2 && comp->num_pivots <= 9); ++ ++ memset(coeffs_data, 0, sizeof(coeffs_data)); ++ for (i = 0; i < comp->num_pivots - 1; i++) { ++ switch (comp->method[i]) { ++ case 0: // polynomial ++ has_poly = 1; ++ coeffs_data[i][3] = 0.0f; // order=0 signals polynomial ++ for (k = 0; k < 3; k++) ++ coeffs_data[i][k] = comp->poly_coeffs[i][k]; ++ break; ++ case 1: ++ min_order = FFMIN(min_order, comp->mmr_order[i]); ++ max_order = FFMAX(max_order, comp->mmr_order[i]); ++ mmr_single = !has_mmr; ++ has_mmr = 1; ++ coeffs_data[i][3] = (float)comp->mmr_order[i]; ++ coeffs_data[i][0] = comp->mmr_constant[i]; ++ coeffs_data[i][1] = (float)mmr_idx; ++ for (j = 0; j < comp->mmr_order[i]; j++) { ++ // store weights per order as two packed vec4s ++ float *mmr = &mmr_packed_data[mmr_idx][0]; ++ mmr[0] = comp->mmr_coeffs[i][j][0]; ++ mmr[1] = comp->mmr_coeffs[i][j][1]; ++ mmr[2] = comp->mmr_coeffs[i][j][2]; ++ mmr[3] = 0.0f; // unused ++ mmr[4] = comp->mmr_coeffs[i][j][3]; ++ mmr[5] = comp->mmr_coeffs[i][j][4]; ++ mmr[6] = comp->mmr_coeffs[i][j][5]; ++ mmr[7] = comp->mmr_coeffs[i][j][6]; ++ mmr_idx += 2; ++ } ++ break; ++ default: ++ av_assert0(0); ++ } ++ } ++ ++ av_assert0(has_poly || has_mmr); ++ ++ if (has_mmr) ++ av_assert0(min_order <= max_order); ++ ++ // dovi_params ++ { ++ float params[8] = { ++ comp->num_pivots, !!has_mmr, !!has_poly, ++ mmr_single, min_order, max_order, ++ comp->pivots[0], comp->pivots[comp->num_pivots - 1] ++ }; ++ memcpy(s->dovi_pbuf + c*params_cnt, params, params_sz); ++ } ++ ++ // dovi_pivots ++ if (c == 0 && comp->num_pivots > 2) { ++ // Skip the (irrelevant) lower and upper bounds ++ float pivots_data[7+1] = {0}; ++ memcpy(pivots_data, comp->pivots + 1, ++ (comp->num_pivots - 2) * sizeof(pivots_data[0])); ++ // Fill the remainder with a quasi-infinite sentinel pivot ++ for (i = comp->num_pivots - 2; i < FF_ARRAY_ELEMS(pivots_data); i++) ++ pivots_data[i] = 1e9f; ++ memcpy(s->dovi_pbuf + 3*params_cnt + c*pivots_cnt, pivots_data, pivots_sz); ++ } ++ ++ // dovi_coeffs ++ memcpy(s->dovi_pbuf + 3*(params_cnt+pivots_cnt) + c*coeffs_cnt, &coeffs_data[0], coeffs_sz); ++ ++ // dovi_mmr ++ if (has_mmr) ++ memcpy(s->dovi_pbuf + 3*(params_cnt+pivots_cnt+coeffs_cnt) + c*mmr_cnt, &mmr_packed_data[0], mmr_sz); ++ } ++} ++ ++static av_cold int compile(AVFilterLink *inlink) ++{ ++ AVFilterContext *ctx = inlink->dst; ++ TonemapCUDAContext *s = ctx->priv; ++ CudaFunctions *cu = s->hwctx->internal->cuda_dl; ++ CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx; ++ AVBPrint constants; ++ CUlinkState link_state; ++ int i, j, ret = 0; ++ void *cubin; ++ size_t cubin_size; ++ double ycc2rgb_offset[3] = {0}; ++ double lms2rgb_matrix[3][3] = {0}; ++ double rgb_matrix[3][3], yuv_matrix[3][3], rgb2rgb_matrix[3][3]; ++ const AVLumaCoefficients *in_coeffs, *out_coeffs; ++ enum AVColorTransferCharacteristic in_trc = s->in_trc, out_trc = s->out_trc; ++ enum AVColorSpace in_spc = s->in_spc, out_spc = s->out_spc; ++ enum AVColorPrimaries in_pri = s->in_pri, out_pri = s->out_pri; ++ enum AVColorRange in_range = s->in_range, out_range = s->out_range; ++ int d = s->in_desc->comp[0].depth > s->out_desc->comp[0].depth && s->ditherTex; ++ char info_log[4096], error_log[4096]; ++ CUjit_option options[] = { CU_JIT_INFO_LOG_BUFFER, ++ CU_JIT_ERROR_LOG_BUFFER, ++ CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, ++ CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES }; ++ void *option_values[] = { &info_log, ++ &error_log, ++ (void*)(intptr_t)sizeof(info_log), ++ (void*)(intptr_t)sizeof(error_log) }; ++ ++ extern const unsigned char ff_tonemap_ptx_data[]; ++ extern const unsigned int ff_tonemap_ptx_len; ++ ++ switch(s->tonemap) { ++ case TONEMAP_GAMMA: ++ if (isnan(s->param)) ++ s->param = 1.8f; ++ break; ++ case TONEMAP_REINHARD: ++ if (!isnan(s->param)) ++ s->param = (1.0f - s->param) / s->param; ++ break; ++ case TONEMAP_MOBIUS: ++ if (isnan(s->param)) ++ s->param = 0.3f; ++ break; ++ } ++ ++ if (isnan(s->param)) ++ s->param = 1.0f; ++ ++ s->ref_white = s->tonemap == TONEMAP_BT2390 ? REFERENCE_WHITE_ALT ++ : REFERENCE_WHITE; ++ ++ if (s->tonemap == TONEMAP_BT2390 && s->peak) ++ s->peak = FFMAX(s->peak / 10.0f, 1.1f); ++ ++ s->dst_peak = 1.0f; ++ ++ if (in_trc == AVCOL_TRC_UNSPECIFIED) ++ in_trc = AVCOL_TRC_SMPTE2084; ++ if (out_trc == AVCOL_TRC_UNSPECIFIED) ++ out_trc = AVCOL_TRC_BT709; ++ ++ if (!s->dovi && in_spc == AVCOL_SPC_UNSPECIFIED) ++ in_spc = AVCOL_SPC_BT2020_NCL; ++ if (out_spc == AVCOL_SPC_UNSPECIFIED) ++ out_spc = AVCOL_SPC_BT709; ++ ++ if (in_pri == AVCOL_PRI_UNSPECIFIED) ++ in_pri = AVCOL_PRI_BT2020; ++ if (out_pri == AVCOL_PRI_UNSPECIFIED) ++ out_pri = AVCOL_PRI_BT709; ++ ++ if (in_range == AVCOL_RANGE_UNSPECIFIED) ++ in_range = AVCOL_RANGE_MPEG; ++ if (out_range == AVCOL_RANGE_UNSPECIFIED) ++ out_range = AVCOL_RANGE_MPEG; ++ ++ if (out_trc == AVCOL_TRC_SMPTE2084) { ++ int is_10_or_16b_out = s->out_desc->comp[0].depth == 10 || ++ s->out_desc->comp[0].depth == 16; ++ if (!(is_10_or_16b_out && ++ out_pri == AVCOL_PRI_BT2020 && ++ out_spc == AVCOL_SPC_BT2020_NCL)) { ++ av_log(ctx, AV_LOG_ERROR, "HDR passthrough requires BT.2020 " ++ "colorspace and 10/16 bit output format depth.\n"); ++ return AVERROR(EINVAL); ++ } ++ } ++ ++ av_log(ctx, AV_LOG_DEBUG, "Tonemapping transfer from %s to %s\n", ++ av_color_transfer_name(in_trc), ++ av_color_transfer_name(out_trc)); ++ av_log(ctx, AV_LOG_DEBUG, "Mapping colorspace from %s to %s\n", ++ s->dovi ? "dolby_vision" : av_color_space_name(in_spc), ++ av_color_space_name(out_spc)); ++ av_log(ctx, AV_LOG_DEBUG, "Mapping primaries from %s to %s\n", ++ av_color_primaries_name(in_pri), ++ av_color_primaries_name(out_pri)); ++ av_log(ctx, AV_LOG_DEBUG, "Mapping range from %s to %s\n", ++ av_color_range_name(in_range), ++ av_color_range_name(out_range)); ++ ++ if (s->dovi) { ++ for (i = 0; i < 3; i++) { ++ for (j = 0; j < 3; j++) ++ ycc2rgb_offset[i] -= s->dovi->nonlinear[i][j] * s->dovi->nonlinear_offset[j]; ++ } ++ ff_matrix_mul_3x3(lms2rgb_matrix, dovi_lms2rgb_matrix, s->dovi->linear); ++ } else { ++ if (!(in_coeffs = av_csp_luma_coeffs_from_avcsp(in_spc))) ++ return AVERROR(EINVAL); ++ ++ ff_fill_rgb2yuv_table(in_coeffs, yuv_matrix); ++ ff_matrix_invert_3x3(yuv_matrix, rgb_matrix); ++ } ++ ++ if (!(out_coeffs = av_csp_luma_coeffs_from_avcsp(out_spc))) ++ return AVERROR(EINVAL); ++ ++ ff_fill_rgb2yuv_table(out_coeffs, yuv_matrix); ++ ++ if ((ret = get_rgb2rgb_matrix(in_pri, out_pri, rgb2rgb_matrix)) < 0) ++ return ret; ++ ++ av_bprint_init(&constants, 2048, AV_BPRINT_SIZE_UNLIMITED); ++ ++ av_bprintf(&constants, ".version 3.2\n"); ++ av_bprintf(&constants, ".target sm_30\n"); ++ av_bprintf(&constants, ".address_size %zu\n", sizeof(void*) * 8); ++ ++#define CONSTANT_A(decl, align, ...) \ ++ av_bprintf(&constants, ".visible .const .align " #align " " decl ";\n", __VA_ARGS__) ++#define CONSTANT(decl, ...) CONSTANT_A(decl, 4, __VA_ARGS__) ++#define CONSTANT_M(a, b) \ ++ CONSTANT(".f32 " a "[] = {%f, %f, %f, %f, %f, %f, %f, %f, %f}", \ ++ b[0][0], b[0][1], b[0][2], \ ++ b[1][0], b[1][1], b[1][2], \ ++ b[2][0], b[2][1], b[2][2]) ++#define CONSTANT_C(a, b, c, d) \ ++ CONSTANT(".f32 " a "[] = {%f, %f, %f}", \ ++ b, c, d) ++ ++ CONSTANT(".u32 depth_src = %i", (int)s->in_desc->comp[0].depth); ++ CONSTANT(".u32 depth_dst = %i", (int)s->out_desc->comp[0].depth); ++ CONSTANT(".u32 fmt_src = %i", (int)s->in_fmt); ++ CONSTANT(".u32 fmt_dst = %i", (int)s->out_fmt); ++ CONSTANT(".u32 range_src = %i", (int)in_range); ++ CONSTANT(".u32 range_dst = %i", (int)out_range); ++ CONSTANT(".u32 trc_src = %i", (int)in_trc); ++ CONSTANT(".u32 trc_dst = %i", (int)out_trc); ++ CONSTANT(".u32 chroma_loc_src = %i", (int)s->in_chroma_loc); ++ CONSTANT(".u32 chroma_loc_dst = %i", (int)s->out_chroma_loc); ++ CONSTANT(".u32 tonemap_func = %i", (int)s->tonemap); ++ CONSTANT(".u32 enable_dither = %i", (int)(s->in_desc->comp[0].depth > s->out_desc->comp[0].depth)); ++ CONSTANT(".f32 dither_size = %f", (float)ff_fruit_dither_size); ++ CONSTANT(".f32 dither_quantization = %f", (float)((1 << s->out_desc->comp[0].depth) - 1)); ++ CONSTANT(".f32 ref_white = %f", s->ref_white); ++ CONSTANT(".f32 tone_param = %f", s->param); ++ CONSTANT(".f32 desat_param = %f", s->desat_param); ++ CONSTANT(".f32 pq_max_lum_div_ref_white = %f", (float)(ST2084_MAX_LUMINANCE / s->ref_white)); ++ CONSTANT(".f32 ref_white_div_pq_max_lum = %f", (float)(s->ref_white / ST2084_MAX_LUMINANCE)); ++ CONSTANT_M("rgb_matrix", (s->dovi ? s->dovi->nonlinear : rgb_matrix)); ++ CONSTANT_M("yuv_matrix", yuv_matrix); ++ CONSTANT_A(".u8 rgb2rgb_passthrough = %i", 1, in_pri == out_pri); ++ CONSTANT_M("rgb2rgb_matrix", rgb2rgb_matrix); ++ CONSTANT_M("lms2rgb_matrix", lms2rgb_matrix); ++ CONSTANT_C("luma_dst", av_q2d(out_coeffs->cr), av_q2d(out_coeffs->cg), av_q2d(out_coeffs->cb)); ++ CONSTANT_C("ycc2rgb_offset", ycc2rgb_offset[0], ycc2rgb_offset[1], ycc2rgb_offset[2]); ++ ++ ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); ++ if (ret < 0) ++ return ret; ++ ++ if (s->dovi) { ++ s->dovi_pbuf = av_mallocz(3*(params_sz+pivots_sz+coeffs_sz+mmr_sz)); ++ ret = CHECK_CU(cu->cuMemAlloc(&s->doviBuffer, 3*(params_sz+pivots_sz+coeffs_sz+mmr_sz))); ++ if (ret < 0) ++ goto fail; ++ } ++ ++ if (s->dovi && s->tradeoff == -1) { ++ int major, minor, mp; ++ s->tradeoff = 0; ++ ++ ret = CHECK_CU(cu->cuDeviceComputeCapability(&major, &minor, s->hwctx->internal->cuda_device)); ++ if (ret < 0) ++ return ret; ++ ++ ret = CHECK_CU(cu->cuDeviceGetAttribute(&mp, ++ CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, ++ s->hwctx->internal->cuda_device)); ++ if (ret < 0) ++ return ret; ++ ++ switch (major) { ++ case 1: ++ case 2: ++ s->tradeoff = 1; break; ++ case 3: ++ s->tradeoff = mp * 192 < 1024; break; ++ case 5: ++ s->tradeoff = mp * 128 < 1024; break; ++ case 6: ++ if (minor == 0) s->tradeoff = mp * 64 < 1024; ++ if (minor == 1 || minor == 2) s->tradeoff = mp * 128 < 1024; ++ break; ++ case 7: ++ s->tradeoff = mp * 64 < 512; break; ++ } ++ ++ if (!s->tradeoff) ++ av_log(ctx, AV_LOG_DEBUG, "Disabled dovi tradeoff on high perf GPU.\n"); ++ } ++ ++ if (s->tonemap_mode == TONEMAP_MODE_AUTO) { ++ if (s->tradeoff) { ++ s->tonemap_mode = TONEMAP_MODE_LUM; ++ } else { ++ s->tonemap_mode = TONEMAP_MODE_ITP; ++ } ++ } ++ ++ if (s->cu_module) { ++ ret = CHECK_CU(cu->cuModuleUnload(s->cu_module)); ++ if (ret < 0) ++ goto fail; ++ ++ s->cu_func_tm = NULL; ++ s->cu_func_dovi = NULL; ++ s->cu_func_dovi_pq = NULL; ++ s->cu_module = NULL; ++ } ++ ++ ret = CHECK_CU(cu->cuLinkCreate(sizeof(options) / sizeof(options[0]), options, option_values, &link_state)); ++ if (ret < 0) ++ goto fail; ++ ++ ret = CHECK_CU(cu->cuLinkAddData(link_state, CU_JIT_INPUT_PTX, (void *)constants.str, ++ (size_t)constants.len, "constants", 0, NULL, NULL)); ++ if (ret < 0) ++ goto fail2; ++ ++ ret = CHECK_CU(cu->cuLinkAddData(link_state, CU_JIT_INPUT_PTX, (void *)ff_tonemap_ptx_data, ++ (size_t)ff_tonemap_ptx_len, "ff_tonemap_ptx_data", 0, NULL, NULL)); ++ if (ret < 0) ++ goto fail2; ++ ++ ret = CHECK_CU(cu->cuLinkComplete(link_state, &cubin, &cubin_size)); ++ if (ret < 0) ++ goto fail2; ++ ++ ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, cubin)); ++ if (ret < 0) ++ goto fail2; ++ ++ switch (s->tonemap_mode) { ++ default: ++ case TONEMAP_MODE_MAX: ++ ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_tm, s->cu_module, d ? "tonemap_max_d" : "tonemap_max")); ++ if (ret < 0) goto fail2; ++ ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_dovi, s->cu_module, ++ s->tradeoff == 1 ? (d ? "tonemap_dovi_max_d_f" : "tonemap_dovi_max_f") ++ : (d ? "tonemap_dovi_max_d" : "tonemap_dovi_max"))); ++ if (ret < 0) goto fail2; ++ break; ++ case TONEMAP_MODE_RGB: ++ ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_tm, s->cu_module, d ? "tonemap_rgb_d" : "tonemap_rgb")); ++ if (ret < 0) goto fail2; ++ ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_dovi, s->cu_module, ++ s->tradeoff == 1 ? (d ? "tonemap_dovi_rgb_d_f" : "tonemap_dovi_rgb_f") ++ : (d ? "tonemap_dovi_rgb_d" : "tonemap_dovi_rgb"))); ++ if (ret < 0) goto fail2; ++ break; ++ case TONEMAP_MODE_LUM: ++ ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_tm, s->cu_module, d ? "tonemap_lum_d" : "tonemap_lum")); ++ if (ret < 0) goto fail2; ++ ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_dovi, s->cu_module, ++ s->tradeoff == 1 ? (d ? "tonemap_dovi_lum_d_f" : "tonemap_dovi_lum_f") ++ : (d ? "tonemap_dovi_lum_d" : "tonemap_dovi_lum"))); ++ if (ret < 0) goto fail2; ++ break; ++ case TONEMAP_MODE_ITP: ++ ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_tm, s->cu_module, d ? "tonemap_itp_d" : "tonemap_itp")); ++ if (ret < 0) goto fail2; ++ ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_dovi, s->cu_module, ++ s->tradeoff == 1 ? (d ? "tonemap_dovi_itp_d_f" : "tonemap_dovi_itp_f") ++ : (d ? "tonemap_dovi_itp_d" : "tonemap_dovi_itp"))); ++ if (ret < 0) goto fail2; ++ break; ++ } ++ ++ ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_dovi_pq, s->cu_module, ++ s->tradeoff == 1 ? "tonemap_dovi_pq_f" ++ : "tonemap_dovi_pq")); ++ if (ret < 0) ++ goto fail2; ++ ++fail2: ++ CHECK_CU(cu->cuLinkDestroy(link_state)); ++ ++fail: ++ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); ++ ++ av_bprint_finalize(&constants, NULL); ++ ++ if ((intptr_t)option_values[2] > 0) ++ av_log(ctx, AV_LOG_INFO, "CUDA linker output: %.*s\n", (int)(intptr_t)option_values[2], info_log); ++ ++ if ((intptr_t)option_values[3] > 0) ++ av_log(ctx, AV_LOG_ERROR, "CUDA linker output: %.*s\n", (int)(intptr_t)option_values[3], error_log); ++ ++ return ret; ++} ++ ++static av_cold int config_props(AVFilterLink *outlink) ++{ ++ AVFilterContext *ctx = outlink->src; ++ AVFilterLink *inlink = outlink->src->inputs[0]; ++ AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; ++ AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; ++ TonemapCUDAContext *s = ctx->priv; ++ int ret; ++ ++ s->hwctx = device_hwctx; ++ ++ outlink->w = inlink->w; ++ outlink->h = inlink->h; ++ ++ ret = init_processing_chain(ctx, outlink); ++ if (ret < 0) ++ return ret; ++ ++ if (s->in_desc->comp[0].depth > s->out_desc->comp[0].depth) { ++ if ((ret = setup_dither(ctx)) < 0) ++ return ret; ++ } ++ ++ outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; ++ ++ return 0; ++} ++ ++static int run_kernel(AVFilterContext *ctx, ++ AVFrame *out, AVFrame *in) ++{ ++ TonemapCUDAContext *s = ctx->priv; ++ CudaFunctions *cu = s->hwctx->internal->cuda_dl; ++ FFCUDAFrame src, dst; ++ void *args[] = { &src, &dst, &s->ditherTex, &s->doviBuffer }; ++ int ret, pq_out = s->out_trc == AVCOL_TRC_SMPTE2084; ++ ++ ret = ff_make_cuda_frame(ctx, cu, 1, ++ &src, in, s->in_desc); ++ if (ret < 0) ++ goto fail; ++ ++ ret = ff_make_cuda_frame(ctx, cu, 0, ++ &dst, out, s->out_desc); ++ if (ret < 0) ++ goto fail; ++ ++ src.peak = s->peak; ++ dst.peak = s->dst_peak; ++ ++ ret = CHECK_CU(cu->cuLaunchKernel(s->dovi ? (pq_out ? s->cu_func_dovi_pq : s->cu_func_dovi) : s->cu_func_tm, ++ DIV_UP(src.width / 2, BLOCKX), DIV_UP(src.height / 2, BLOCKY), 1, ++ BLOCKX, BLOCKY, 1, 0, s->hwctx->stream, args, NULL)); ++ ++fail: ++ return ret; ++} ++ ++static int do_tonemap(AVFilterContext *ctx, AVFrame *out, AVFrame *in) ++{ ++ TonemapCUDAContext *s = ctx->priv; ++ AVFrame *src = in; ++ int ret; ++ ++ ret = run_kernel(ctx, s->frame, src); ++ if (ret < 0) ++ return ret; ++ ++ src = s->frame; ++ ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0); ++ if (ret < 0) ++ return ret; ++ ++ av_frame_move_ref(out, s->frame); ++ av_frame_move_ref(s->frame, s->tmp_frame); ++ ++ s->frame->width = in->width; ++ s->frame->height = in->height; ++ ++ ret = av_frame_copy_props(out, in); ++ if (ret < 0) ++ return ret; ++ ++ if (s->out_trc != out->color_trc || ++ s->out_spc != out->colorspace || ++ s->out_pri != out->color_primaries || ++ s->out_range != out->color_range || ++ s->out_chroma_loc != out->chroma_location) { ++ out->color_trc = s->out_trc; ++ out->colorspace = s->out_spc; ++ out->color_primaries = s->out_pri; ++ out->color_range = s->out_range; ++ out->chroma_location = s->out_chroma_loc; ++ } ++ ++ return 0; ++} ++ ++static int filter_frame(AVFilterLink *link, AVFrame *in) ++{ ++ AVFilterContext *ctx = link->dst; ++ TonemapCUDAContext *s = ctx->priv; ++ AVFilterLink *outlink = ctx->outputs[0]; ++ CudaFunctions *cu = s->hwctx->internal->cuda_dl; ++ ++ AVFrame *out = NULL; ++ AVFrameSideData *dovi_sd = NULL; ++ CUcontext dummy; ++ int ret = 0; ++ ++ out = av_frame_alloc(); ++ if (!out) { ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ if (s->apply_dovi) ++ dovi_sd = av_frame_get_side_data(in, AV_FRAME_DATA_DOVI_METADATA); ++ ++ // check DOVI->HDR10/HLG ++ if (!dovi_sd) { ++ if (in->color_trc != AVCOL_TRC_SMPTE2084 && ++ in->color_trc != AVCOL_TRC_ARIB_STD_B67) { ++ av_log(ctx, AV_LOG_ERROR, "No DOVI metadata and " ++ "unsupported input transfer characteristic: %s\n", ++ av_color_transfer_name(in->color_trc)); ++ ret = AVERROR(EINVAL); ++ goto fail; ++ } ++ } ++ ++ if (!s->peak) { ++ if (dovi_sd) { ++ const AVDOVIMetadata *metadata = (AVDOVIMetadata *) dovi_sd->data; ++ s->peak = ff_determine_dovi_signal_peak(metadata); ++ } else { ++ s->peak = ff_determine_signal_peak(in); ++ } ++ av_log(ctx, AV_LOG_DEBUG, "Computed signal peak: %f\n", s->peak); ++ } ++ ++ if (dovi_sd) { ++ const AVDOVIMetadata *metadata = (AVDOVIMetadata *) dovi_sd->data; ++ const AVDOVIRpuDataHeader *rpu = av_dovi_get_header(metadata); ++ // only map dovi rpus that don't require an EL ++ if (rpu->disable_residual_flag) { ++ struct DoviMetadata *dovi = av_malloc(sizeof(*dovi)); ++ s->dovi = dovi; ++ if (!s->dovi) ++ goto fail; ++ ++ ff_map_dovi_metadata(s->dovi, metadata); ++ in->color_trc = AVCOL_TRC_SMPTE2084; ++ in->colorspace = AVCOL_SPC_UNSPECIFIED; ++ in->color_primaries = AVCOL_PRI_BT2020; ++ } ++ } ++ ++ if (!s->init_with_dovi && s->dovi && s->cu_func_tm) ++ uninit_common(ctx); ++ ++ if (!s->cu_func_tm || ++ !s->cu_func_dovi || ++ s->in_trc != in->color_trc || ++ s->in_spc != in->colorspace || ++ s->in_pri != in->color_primaries || ++ s->in_range != in->color_range || ++ s->in_chroma_loc != in->chroma_location) { ++ s->in_trc = in->color_trc; ++ s->in_spc = in->colorspace; ++ s->in_pri = in->color_primaries; ++ s->in_range = in->color_range; ++ s->in_chroma_loc = in->chroma_location; ++ ++ s->out_trc = s->trc == -1 ? AVCOL_TRC_UNSPECIFIED : s->trc; ++ s->out_spc = s->spc == -1 ? AVCOL_SPC_UNSPECIFIED : s->spc; ++ s->out_pri = s->pri == -1 ? AVCOL_PRI_UNSPECIFIED : s->pri; ++ s->out_range = s->range == -1 ? s->in_range : s->range; ++ s->out_chroma_loc = s->in_chroma_loc; ++ ++ if ((ret = compile(link)) < 0) ++ goto fail; ++ ++ s->init_with_dovi = !!s->dovi; ++ } ++ ++ ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx)); ++ if (ret < 0) ++ goto fail; ++ ++ if (s->dovi) { ++ update_dovi_buf(ctx); ++ ++ ret = CHECK_CU(cu->cuMemcpyHtoDAsync(s->doviBuffer, s->dovi_pbuf, ++ 3*(params_sz+pivots_sz+coeffs_sz+mmr_sz), s->hwctx->stream)); ++ if (ret < 0) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to update dovi buf.\n"); ++ goto fail; ++ } ++ } ++ ++ ret = do_tonemap(ctx, out, in); ++ ++ if (s->dovi) ++ av_freep(&s->dovi); ++ ++ ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy)); ++ if (ret < 0) ++ goto fail; ++ ++ av_frame_free(&in); ++ ++ if (s->out_trc != AVCOL_TRC_SMPTE2084) { ++ av_frame_remove_side_data(out, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); ++ av_frame_remove_side_data(out, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); ++ } ++ ++ av_frame_remove_side_data(out, AV_FRAME_DATA_DOVI_RPU_BUFFER); ++ av_frame_remove_side_data(out, AV_FRAME_DATA_DOVI_METADATA); ++ ++ return ff_filter_frame(outlink, out); ++fail: ++ if (s->dovi) ++ av_freep(&s->dovi); ++ av_frame_free(&in); ++ av_frame_free(&out); ++ return ret; ++} ++ ++#define OFFSET(x) offsetof(TonemapCUDAContext, x) ++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM) ++static const AVOption options[] = { ++ { "tonemap", "Tonemap algorithm selection", OFFSET(tonemap), AV_OPT_TYPE_INT, {.i64 = TONEMAP_NONE}, TONEMAP_NONE, TONEMAP_COUNT - 1, FLAGS, .unit = "tonemap" }, ++ { "none", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_NONE}, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "linear", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_LINEAR}, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "gamma", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_GAMMA}, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "clip", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_CLIP}, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "reinhard", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_REINHARD}, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "hable", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HABLE}, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "mobius", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MOBIUS}, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "bt2390", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_BT2390}, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "tonemap_mode", "Tonemap mode selection", OFFSET(tonemap_mode), AV_OPT_TYPE_INT, {.i64 = TONEMAP_MODE_AUTO}, TONEMAP_MODE_MAX, TONEMAP_MODE_COUNT - 1, FLAGS, .unit = "tonemap_mode" }, ++ { "max", "Brightest channel based tonemap", 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MODE_MAX}, 0, 0, FLAGS, .unit = "tonemap_mode" }, ++ { "rgb", "Per-channel based tonemap", 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MODE_RGB}, 0, 0, FLAGS, .unit = "tonemap_mode" }, ++ { "lum", "Relative luminance based tonemap", 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MODE_LUM}, 0, 0, FLAGS, .unit = "tonemap_mode" }, ++ { "itp", "ICtCp intensity based tonemap", 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MODE_ITP}, 0, 0, FLAGS, .unit = "tonemap_mode" }, ++ { "auto", "Select based on GPU spec", 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MODE_AUTO}, 0, 0, FLAGS, .unit = "tonemap_mode" }, ++ { "transfer", "Set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, .unit = "transfer" }, ++ { "t", "Set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, .unit = "transfer" }, ++ { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709}, 0, 0, FLAGS, .unit = "transfer" }, ++ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10}, 0, 0, FLAGS, .unit = "transfer" }, ++ { "smpte2084", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTE2084}, 0, 0, FLAGS, .unit = "transfer" }, ++ { "matrix", "Set colorspace matrix", OFFSET(spc), AV_OPT_TYPE_INT, {.i64 = AVCOL_SPC_BT709}, -1, INT_MAX, FLAGS, .unit = "matrix" }, ++ { "m", "Set colorspace matrix", OFFSET(spc), AV_OPT_TYPE_INT, {.i64 = AVCOL_SPC_BT709}, -1, INT_MAX, FLAGS, .unit = "matrix" }, ++ { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT709}, 0, 0, FLAGS, .unit = "matrix" }, ++ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_NCL}, 0, 0, FLAGS, .unit = "matrix" }, ++ { "primaries", "Set color primaries", OFFSET(pri), AV_OPT_TYPE_INT, {.i64 = AVCOL_PRI_BT709}, -1, INT_MAX, FLAGS, .unit = "primaries" }, ++ { "p", "Set color primaries", OFFSET(pri), AV_OPT_TYPE_INT, {.i64 = AVCOL_PRI_BT709}, -1, INT_MAX, FLAGS, .unit = "primaries" }, ++ { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT709}, 0, 0, FLAGS, .unit = "primaries" }, ++ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT2020}, 0, 0, FLAGS, .unit = "primaries" }, ++ { "range", "Set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, .unit = "range" }, ++ { "r", "Set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, .unit = "range" }, ++ { "tv", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, .unit = "range" }, ++ { "pc", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, .unit = "range" }, ++ { "limited", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, .unit = "range" }, ++ { "full", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, .unit = "range" }, ++ { "format", "Output format", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, ++ { "apply_dovi", "Apply Dolby Vision metadata if possible", OFFSET(apply_dovi), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, ++ { "tradeoff", "Apply tradeoffs to offload computing", OFFSET(tradeoff), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 1, FLAGS, .unit = "tradeoff" }, ++ { "auto", 0, 0, AV_OPT_TYPE_CONST, {.i64 = -1}, 0, 0, FLAGS, .unit = "tradeoff" }, ++ { "disabled", 0, 0, AV_OPT_TYPE_CONST, {.i64 = 0}, 0, 0, FLAGS, .unit = "tradeoff" }, ++ { "enabled", 0, 0, AV_OPT_TYPE_CONST, {.i64 = 1}, 0, 0, FLAGS, .unit = "tradeoff" }, ++ { "peak", "Signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS }, ++ { "param", "Tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, {.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS }, ++ { "desat", "Desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0, DBL_MAX, FLAGS }, ++ { "threshold", "Scene detection threshold", OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.2}, 0, DBL_MAX, FLAGS }, ++ { NULL }, ++}; ++ ++static const AVClass tonemap_cuda_class = { ++ .class_name = "tonemap_cuda", ++ .item_name = av_default_item_name, ++ .option = options, ++ .version = LIBAVUTIL_VERSION_INT, ++}; ++ ++static const AVFilterPad tonemap_cuda_inputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .filter_frame = filter_frame, ++ }, ++}; ++ ++static const AVFilterPad tonemap_cuda_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = config_props, ++ }, ++}; ++ ++const AVFilter ff_vf_tonemap_cuda = { ++ .name = "tonemap_cuda", ++ .description = NULL_IF_CONFIG_SMALL("GPU accelerated HDR to SDR tonemapping"), ++ ++ .init = init, ++ .uninit = uninit, ++ ++ .priv_size = sizeof(TonemapCUDAContext), ++ .priv_class = &tonemap_cuda_class, ++ ++ FILTER_INPUTS(tonemap_cuda_inputs), ++ FILTER_OUTPUTS(tonemap_cuda_outputs), ++ ++ FILTER_SINGLE_PIXFMT(AV_PIX_FMT_CUDA), ++ ++ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, ++}; diff --git a/cross/ffmpeg7/patches/1005-jellyfin-0005-add-amf-refactor-and-10bit-encoding.patch b/cross/ffmpeg7/patches/1005-jellyfin-0005-add-amf-refactor-and-10bit-encoding.patch new file mode 100644 index 00000000000..1eeb2e7a572 --- /dev/null +++ b/cross/ffmpeg7/patches/1005-jellyfin-0005-add-amf-refactor-and-10bit-encoding.patch @@ -0,0 +1,2584 @@ +Index: FFmpeg/libavcodec/Makefile +=================================================================== +--- libavcodec/Makefile ++++ libavcodec/Makefile +@@ -68,7 +68,7 @@ include $(SRC_PATH)/libavcodec/vvc/Makef + OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o + OBJS-$(CONFIG_AC3DSP) += ac3dsp.o ac3.o ac3tab.o + OBJS-$(CONFIG_ADTS_HEADER) += adts_header.o mpeg4audio_sample_rates.o +-OBJS-$(CONFIG_AMF) += amfenc.o ++OBJS-$(CONFIG_AMF) += amfenc.o amf.o + OBJS-$(CONFIG_AUDIO_FRAME_QUEUE) += audio_frame_queue.o + OBJS-$(CONFIG_ATSC_A53) += atsc_a53.o + OBJS-$(CONFIG_AUDIODSP) += audiodsp.o +@@ -1261,7 +1261,7 @@ SKIPHEADERS + + bitstream_template.h \ + $(ARCH)/vpx_arith.h \ + +-SKIPHEADERS-$(CONFIG_AMF) += amfenc.h ++SKIPHEADERS-$(CONFIG_AMF) += amfenc.h amf.h + SKIPHEADERS-$(CONFIG_D3D11VA) += d3d11va.h dxva2_internal.h + SKIPHEADERS-$(CONFIG_D3D12VA) += d3d12va_decode.h + SKIPHEADERS-$(CONFIG_DXVA2) += dxva2.h dxva2_internal.h +Index: FFmpeg/libavcodec/amf.c +=================================================================== +--- /dev/null ++++ libavcodec/amf.c +@@ -0,0 +1,345 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "amf.h" ++ ++#define FFMPEG_AMF_WRITER_ID L"ffmpeg_amf" ++ ++const FormatMap format_map[] = ++{ ++ { AV_PIX_FMT_NONE, AMF_SURFACE_UNKNOWN }, ++ { AV_PIX_FMT_NV12, AMF_SURFACE_NV12 }, ++ { AV_PIX_FMT_P010, AMF_SURFACE_P010 }, ++ { AV_PIX_FMT_BGRA, AMF_SURFACE_BGRA }, ++ { AV_PIX_FMT_BGR0, AMF_SURFACE_BGRA }, ++ { AV_PIX_FMT_RGBA, AMF_SURFACE_RGBA }, ++ { AV_PIX_FMT_RGB0, AMF_SURFACE_RGBA }, ++ { AV_PIX_FMT_GRAY8, AMF_SURFACE_GRAY8 }, ++ { AV_PIX_FMT_YUV420P, AMF_SURFACE_YUV420P }, ++ { AV_PIX_FMT_YUYV422, AMF_SURFACE_YUY2 }, ++}; ++ ++enum AMF_SURFACE_FORMAT amf_av_to_amf_format(enum AVPixelFormat fmt) ++{ ++ int i; ++ for (i = 0; i < amf_countof(format_map); i++) { ++ if (format_map[i].av_format == fmt) { ++ return format_map[i].amf_format; ++ } ++ } ++ return AMF_SURFACE_UNKNOWN; ++} ++ ++enum AVPixelFormat amf_to_av_format(enum AMF_SURFACE_FORMAT fmt) ++{ ++ int i; ++ for (i = 0; i < amf_countof(format_map); i++) { ++ if (format_map[i].amf_format == fmt) { ++ return format_map[i].av_format; ++ } ++ } ++ return AMF_SURFACE_UNKNOWN; ++} ++ ++amf_int64 amf_av_to_amf_color_profile(AVCodecContext *avctx) ++{ ++ amf_int64 color_profile = AMF_VIDEO_CONVERTER_COLOR_PROFILE_UNKNOWN; ++ if (avctx->color_range == AVCOL_RANGE_JPEG) { ++ // Color Space for Full (JPEG) Range ++ switch (avctx->colorspace) { ++ case AVCOL_SPC_SMPTE170M: ++ color_profile = AMF_VIDEO_CONVERTER_COLOR_PROFILE_FULL_601; ++ break; ++ case AVCOL_SPC_BT709: ++ color_profile = AMF_VIDEO_CONVERTER_COLOR_PROFILE_FULL_709; ++ break; ++ case AVCOL_SPC_BT2020_NCL: ++ case AVCOL_SPC_BT2020_CL: ++ color_profile = AMF_VIDEO_CONVERTER_COLOR_PROFILE_FULL_2020; ++ break; ++ } ++ } else { ++ // Color Space for Limited (MPEG) range ++ switch (avctx->colorspace) { ++ case AVCOL_SPC_SMPTE170M: ++ color_profile = AMF_VIDEO_CONVERTER_COLOR_PROFILE_601; ++ break; ++ case AVCOL_SPC_BT709: ++ color_profile = AMF_VIDEO_CONVERTER_COLOR_PROFILE_709; ++ break; ++ case AVCOL_SPC_BT2020_NCL: ++ case AVCOL_SPC_BT2020_CL: ++ color_profile = AMF_VIDEO_CONVERTER_COLOR_PROFILE_2020; ++ break; ++ } ++ } ++ return color_profile; ++} ++ ++static void AMF_CDECL_CALL AMFTraceWriter_Write(AMFTraceWriter *pThis, ++ const wchar_t *scope, const wchar_t *message) ++{ ++ AVAMFLogger *logger = (AVAMFLogger*)pThis; ++ av_log(logger->avcl, AV_LOG_DEBUG, "%ls: %ls", scope, message); ++} ++ ++static void AMF_CDECL_CALL AMFTraceWriter_Flush(AMFTraceWriter *pThis) {} ++ ++static AMFTraceWriterVtbl tracer_vtbl = ++{ ++ .Write = AMFTraceWriter_Write, ++ .Flush = AMFTraceWriter_Flush, ++}; ++ ++int amf_load_library(AVAMFContext *ctx) ++{ ++ AMFInit_Fn init_fun; ++ AMFQueryVersion_Fn version_fun; ++ AMF_RESULT res; ++ ++ ctx->library = dlopen(AMF_DLL_NAMEA, RTLD_NOW | RTLD_LOCAL); ++ AMF_RETURN_IF_FALSE(ctx->avclass, ctx->library != NULL, ++ AVERROR_UNKNOWN, "DLL %s failed to open\n", AMF_DLL_NAMEA); ++ ++ init_fun = (AMFInit_Fn)dlsym(ctx->library, AMF_INIT_FUNCTION_NAME); ++ AMF_RETURN_IF_FALSE(ctx->avclass, init_fun != NULL, ++ AVERROR_UNKNOWN, "DLL %s failed to find function %s\n", AMF_DLL_NAMEA, AMF_INIT_FUNCTION_NAME); ++ ++ version_fun = (AMFQueryVersion_Fn)dlsym(ctx->library, AMF_QUERY_VERSION_FUNCTION_NAME); ++ AMF_RETURN_IF_FALSE(ctx->avclass, version_fun != NULL, ++ AVERROR_UNKNOWN, "DLL %s failed to find function %s\n", AMF_DLL_NAMEA, AMF_QUERY_VERSION_FUNCTION_NAME); ++ ++ res = version_fun(&ctx->version); ++ AMF_RETURN_IF_FALSE(ctx->avclass, res == AMF_OK, ++ AVERROR_UNKNOWN, "%s failed with error %d\n", AMF_QUERY_VERSION_FUNCTION_NAME, res); ++ ++ res = init_fun(AMF_FULL_VERSION, &ctx->factory); ++ AMF_RETURN_IF_FALSE(ctx->avclass, res == AMF_OK, ++ AVERROR_UNKNOWN, "%s failed with error %d\n", AMF_INIT_FUNCTION_NAME, res); ++ ++ res = ctx->factory->pVtbl->GetTrace(ctx->factory, &ctx->trace); ++ AMF_RETURN_IF_FALSE(ctx->avclass, res == AMF_OK, ++ AVERROR_UNKNOWN, "GetTrace() failed with error %d\n", res); ++ ++ res = ctx->factory->pVtbl->GetDebug(ctx->factory, &ctx->debug); ++ AMF_RETURN_IF_FALSE(ctx->avclass, res == AMF_OK, ++ AVERROR_UNKNOWN, "GetDebug() failed with error %d\n", res); ++ ++ return 0; ++} ++ ++int amf_create_context(AVAMFContext *ctx) ++{ ++ AMF_RESULT res; ++ ++ // configure AMF logger ++ ctx->trace->pVtbl->EnableWriter(ctx->trace, AMF_TRACE_WRITER_DEBUG_OUTPUT, !!ctx->log_to_dbg); ++ if (ctx->log_to_dbg) ++ ctx->trace->pVtbl->SetWriterLevel(ctx->trace, AMF_TRACE_WRITER_DEBUG_OUTPUT, AMF_TRACE_TRACE); ++ ctx->trace->pVtbl->EnableWriter(ctx->trace, AMF_TRACE_WRITER_CONSOLE, 0); ++ ctx->trace->pVtbl->SetGlobalLevel(ctx->trace, AMF_TRACE_TRACE); ++ ++ // connect AMF logger to av_log ++ ctx->logger.vtbl = &tracer_vtbl; ++ ctx->logger.avcl = ctx->avclass; ++ ctx->trace->pVtbl->RegisterWriter(ctx->trace, FFMPEG_AMF_WRITER_ID, (AMFTraceWriter*)&ctx->logger, 1); ++ ctx->trace->pVtbl->SetWriterLevel(ctx->trace, FFMPEG_AMF_WRITER_ID, AMF_TRACE_TRACE); ++ ++ res = ctx->factory->pVtbl->CreateContext(ctx->factory, &ctx->context); ++ AMF_RETURN_IF_FALSE(ctx->avclass, res == AMF_OK, ++ AVERROR_UNKNOWN, "CreateContext() failed with error %d\n", res); ++ ++ return 0; ++} ++ ++void amf_unload_library(AVAMFContext *ctx) ++{ ++ if (ctx->context) { ++ ctx->context->pVtbl->Terminate(ctx->context); ++ ctx->context->pVtbl->Release(ctx->context); ++ ctx->context = NULL; ++ } ++ if (ctx->trace) { ++ ctx->trace->pVtbl->UnregisterWriter(ctx->trace, FFMPEG_AMF_WRITER_ID); ++ } ++ if (ctx->library) { ++ dlclose(ctx->library); ++ ctx->library = NULL; ++ } ++ ctx->trace = NULL; ++ ctx->debug = NULL; ++ ctx->factory = NULL; ++ ctx->version = 0; ++} ++ ++int amf_context_init_dx11(AVAMFContext *ctx) ++{ ++ AMF_RESULT res; ++ ++ res = ctx->context->pVtbl->InitDX11(ctx->context, NULL, AMF_DX11_1); ++ if (res != AMF_OK) { ++ res = ctx->context->pVtbl->InitDX11(ctx->context, NULL, AMF_DX11_0); ++ } ++ ++ if (res == AMF_OK) { ++ av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF initialization succeeded via DX11\n"); ++ } else { ++ if (res == AMF_NOT_SUPPORTED) ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF via DX11 is not supported on the given device\n"); ++ else ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to initialize on the default DX11 device: %d\n", res); ++ } ++ return res; ++} ++ ++int amf_context_init_dx9(AVAMFContext *ctx) ++{ ++ AMF_RESULT res; ++ ++ res = ctx->context->pVtbl->InitDX9(ctx->context, NULL); ++ if (res == AMF_OK) { ++ av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF initialization succeeded via DX9\n"); ++ } else { ++ if (res == AMF_NOT_SUPPORTED) ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF via DX9 is not supported on the given device\n"); ++ else ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to initialize on the default DX9 device: %d\n", res); ++ } ++ return res; ++} ++ ++int amf_context_init_vulkan(AVAMFContext *ctx) ++{ ++ AMF_RESULT res; ++ AMFContext1* context1 = NULL; ++ AMFGuid guid = IID_AMFContext1(); ++ ++ res = ctx->context->pVtbl->QueryInterface(ctx->context, &guid, (void**)&context1); ++ AMF_RETURN_IF_FALSE(ctx->avclass, res == AMF_OK, AVERROR_UNKNOWN, "CreateContext1() failed with error %d\n", res); ++ ++ res = context1->pVtbl->InitVulkan(context1, NULL); ++ context1->pVtbl->Release(context1); ++ if (res == AMF_OK) { ++ av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF initialization succeeded via Vulkan\n"); ++ } else { ++ if (res == AMF_NOT_SUPPORTED) ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF via Vulkan is not supported on the given device\n"); ++ else ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to initialize on the default Vulkan device: %d\n", res); ++ } ++ return res; ++} ++ ++int amf_context_init_opencl(AVAMFContext *ctx) ++{ ++ AMF_RESULT res; ++ ++ res = ctx->context->pVtbl->InitOpenCL(ctx->context, NULL); ++ if (res == AMF_OK) { ++ av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF initialization succeeded via OpenCL\n"); ++ } else { ++ if (res == AMF_NOT_SUPPORTED) ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF via OpenCL is not supported on the given device\n"); ++ else ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to initialize on the default OpenCL device: %d\n", res); ++ } ++ return res; ++} ++ ++#if CONFIG_D3D11VA ++int amf_context_derive_dx11(AVAMFContext *ctx, AVD3D11VADeviceContext *hwctx) ++{ ++ AMF_RESULT res; ++ ++ res = ctx->context->pVtbl->InitDX11(ctx->context, hwctx->device, AMF_DX11_1); ++ if (res != AMF_OK) { ++ res = ctx->context->pVtbl->InitDX11(ctx->context, hwctx->device, AMF_DX11_0); ++ } ++ ++ if (res == AMF_OK) { ++ av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF derived succeeded via DX11\n"); ++ } else { ++ if (res == AMF_NOT_SUPPORTED) ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF via DX11 is not supported on the given device\n"); ++ else ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to derive from the given DX11 device: %d\n", res); ++ return AVERROR(ENODEV); ++ } ++ return res; ++} ++#endif ++ ++#if CONFIG_DXVA2 ++int amf_context_derive_dx9(AVAMFContext *ctx, AVDXVA2DeviceContext *hwctx) ++{ ++ AMF_RESULT res; ++ HRESULT hr; ++ HANDLE device_handle; ++ IDirect3DDevice9* device; ++ ++ hr = IDirect3DDeviceManager9_OpenDeviceHandle(hwctx->devmgr, &device_handle); ++ if (FAILED(hr)) { ++ av_log(ctx->avclass, AV_LOG_ERROR, "Failed to open device handle for DX9 device: %lx\n", (unsigned long)hr); ++ return AVERROR_EXTERNAL; ++ } ++ ++ hr = IDirect3DDeviceManager9_LockDevice(hwctx->devmgr, device_handle, &device, FALSE); ++ if (SUCCEEDED(hr)) { ++ IDirect3DDeviceManager9_UnlockDevice(hwctx->devmgr, device_handle, FALSE); ++ } else { ++ av_log(ctx->avclass, AV_LOG_ERROR, "Failed to lock device handle for DX9 device: %lx\n", (unsigned long)hr); ++ return AVERROR_EXTERNAL; ++ } ++ ++ IDirect3DDeviceManager9_CloseDeviceHandle(hwctx->devmgr, device_handle); ++ ++ res = ctx->context->pVtbl->InitDX9(ctx->context, device); ++ ++ IDirect3DDevice9_Release(device); ++ ++ if (res == AMF_OK) { ++ av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF derived succeeded via DX9\n"); ++ } else { ++ if (res == AMF_NOT_SUPPORTED) ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF via DX9 is not supported on the given device\n"); ++ else ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to derive from the given DX9 device: %d\n", res); ++ return AVERROR(ENODEV); ++ } ++ return res; ++} ++#endif ++ ++#if CONFIG_OPENCL ++int amf_context_derive_opencl(AVAMFContext *ctx, AVOpenCLDeviceContext *hwctx) ++{ ++ AMF_RESULT res; ++ ++ res = ctx->context->pVtbl->InitOpenCL(ctx->context, hwctx->command_queue); ++ if (res == AMF_OK) { ++ av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF derived succeeded via OpenCL\n"); ++ } else { ++ if (res == AMF_NOT_SUPPORTED) ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF via OpenCL is not supported on the given device\n"); ++ else ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to derive from the given OpenCL device: %d\n", res); ++ return AVERROR(ENODEV); ++ } ++ return res; ++} ++#endif +Index: FFmpeg/libavcodec/amf.h +=================================================================== +--- /dev/null ++++ libavcodec/amf.h +@@ -0,0 +1,133 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVCODEC_AMF_H ++#define AVCODEC_AMF_H ++ ++#include ++#include ++#include ++ ++#include "config.h" ++#include "avcodec.h" ++ ++#include "libavutil/pixdesc.h" ++ ++#if CONFIG_D3D11VA ++#include "libavutil/hwcontext_d3d11va.h" ++#endif ++ ++#if CONFIG_DXVA2 ++#define COBJMACROS ++#include "libavutil/hwcontext_dxva2.h" ++#endif ++ ++#if CONFIG_OPENCL ++#include "libavutil/hwcontext_opencl.h" ++#endif ++ ++#ifdef _WIN32 ++#include "compat/w32dlfcn.h" ++#else ++#include ++#endif ++ ++/** ++* Error handling helper ++*/ ++#define AMF_RETURN_IF_FALSE(avctx, exp, ret_value, /*message,*/ ...) \ ++ if (!(exp)) { \ ++ av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \ ++ return ret_value; \ ++ } ++ ++#define AMF_GOTO_FAIL_IF_FALSE(avctx, exp, ret_value, /*message,*/ ...) \ ++ if (!(exp)) { \ ++ av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \ ++ ret = ret_value; \ ++ goto fail; \ ++ } ++ ++/** ++* AMF trace writer callback class ++* Used to capture all AMF logging ++*/ ++typedef struct AVAMFLogger { ++ AMFTraceWriterVtbl *vtbl; ++ void *avcl; ++} AVAMFLogger; ++ ++typedef struct AVAMFContext { ++ void *avclass; ++ int log_to_dbg; ++ ++ // access to AMF runtime ++ amf_handle library; ///< handle to DLL library ++ AMFFactory *factory; ///< pointer to AMF factory ++ AMFDebug *debug; ///< pointer to AMF debug interface ++ AMFTrace *trace; ///< pointer to AMF trace interface ++ ++ amf_uint64 version; ///< version of AMF runtime ++ AVAMFLogger logger; ///< AMF writer registered with AMF ++ AMFContext *context; ///< AMF context ++} AVAMFContext; ++ ++/** ++* Surface/Pixel format ++*/ ++typedef struct FormatMap { ++ enum AVPixelFormat av_format; ++ enum AMF_SURFACE_FORMAT amf_format; ++} FormatMap; ++ ++extern const FormatMap format_map[]; ++enum AMF_SURFACE_FORMAT amf_av_to_amf_format(enum AVPixelFormat fmt); ++enum AVPixelFormat amf_to_av_format(enum AMF_SURFACE_FORMAT fmt); ++amf_int64 amf_av_to_amf_color_profile(AVCodecContext *avctx); ++ ++/** ++* Load AMFContext ++*/ ++int amf_load_library(AVAMFContext *ctx); ++int amf_create_context(AVAMFContext *ctx); ++void amf_unload_library(AVAMFContext *ctx); ++ ++/** ++* Init AMFContext standalone ++*/ ++int amf_context_init_dx11(AVAMFContext *ctx); ++int amf_context_init_dx9(AVAMFContext *ctx); ++int amf_context_init_vulkan(AVAMFContext *ctx); ++int amf_context_init_opencl(AVAMFContext *ctx); ++ ++/** ++* Derive AMFContext from builtin hwcontext ++*/ ++#if CONFIG_D3D11VA ++int amf_context_derive_dx11(AVAMFContext *ctx, AVD3D11VADeviceContext *hwctx); ++#endif ++ ++#if CONFIG_DXVA2 ++int amf_context_derive_dx9(AVAMFContext *ctx, AVDXVA2DeviceContext *hwctx); ++#endif ++ ++#if CONFIG_OPENCL ++int amf_context_derive_opencl(AVAMFContext *ctx, AVOpenCLDeviceContext *hwctx); ++#endif ++ ++#endif /* AVCODEC_AMF_H */ +Index: FFmpeg/libavcodec/amfenc.c +=================================================================== +--- libavcodec/amfenc.c ++++ libavcodec/amfenc.c +@@ -16,229 +16,56 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +-#include "config.h" +-#include "config_components.h" +- + #include "libavutil/avassert.h" +-#include "libavutil/imgutils.h" + #include "libavutil/hwcontext.h" +-#if CONFIG_D3D11VA +-#include "libavutil/hwcontext_d3d11va.h" +-#endif +-#if CONFIG_DXVA2 +-#define COBJMACROS +-#include "libavutil/hwcontext_dxva2.h" +-#endif ++#include "libavutil/imgutils.h" ++#include "libavutil/mastering_display_metadata.h" ++ + #include "libavutil/mem.h" +-#include "libavutil/pixdesc.h" + #include "libavutil/time.h" + + #include "amfenc.h" + #include "encode.h" + #include "internal.h" + +-#if CONFIG_D3D11VA +-#include +-#endif +- +-#ifdef _WIN32 +-#include "compat/w32dlfcn.h" +-#else +-#include +-#endif +- +-#define FFMPEG_AMF_WRITER_ID L"ffmpeg_amf" +- + #define PTS_PROP L"PtsProp" + +-const enum AVPixelFormat ff_amf_pix_fmts[] = { +- AV_PIX_FMT_NV12, +- AV_PIX_FMT_YUV420P, +-#if CONFIG_D3D11VA +- AV_PIX_FMT_D3D11, +-#endif +-#if CONFIG_DXVA2 +- AV_PIX_FMT_DXVA2_VLD, +-#endif +- AV_PIX_FMT_NONE +-}; +- +-typedef struct FormatMap { +- enum AVPixelFormat av_format; +- enum AMF_SURFACE_FORMAT amf_format; +-} FormatMap; +- +-static const FormatMap format_map[] = +-{ +- { AV_PIX_FMT_NONE, AMF_SURFACE_UNKNOWN }, +- { AV_PIX_FMT_NV12, AMF_SURFACE_NV12 }, +- { AV_PIX_FMT_BGR0, AMF_SURFACE_BGRA }, +- { AV_PIX_FMT_RGB0, AMF_SURFACE_RGBA }, +- { AV_PIX_FMT_GRAY8, AMF_SURFACE_GRAY8 }, +- { AV_PIX_FMT_YUV420P, AMF_SURFACE_YUV420P }, +- { AV_PIX_FMT_YUYV422, AMF_SURFACE_YUY2 }, +-}; +- +-static enum AMF_SURFACE_FORMAT amf_av_to_amf_format(enum AVPixelFormat fmt) +-{ +- int i; +- for (i = 0; i < amf_countof(format_map); i++) { +- if (format_map[i].av_format == fmt) { +- return format_map[i].amf_format; +- } +- } +- return AMF_SURFACE_UNKNOWN; +-} +- +-static void AMF_CDECL_CALL AMFTraceWriter_Write(AMFTraceWriter *pThis, +- const wchar_t *scope, const wchar_t *message) +-{ +- AmfTraceWriter *tracer = (AmfTraceWriter*)pThis; +- av_log(tracer->avctx, AV_LOG_DEBUG, "%ls: %ls", scope, message); // \n is provided from AMF +-} +- +-static void AMF_CDECL_CALL AMFTraceWriter_Flush(AMFTraceWriter *pThis) +-{ +-} +- +-static AMFTraceWriterVtbl tracer_vtbl = ++static int amf_init_context(AVCodecContext *avctx) + { +- .Write = AMFTraceWriter_Write, +- .Flush = AMFTraceWriter_Flush, +-}; ++ AMFEncContext *ctx = avctx->priv_data; ++ AVAMFContext *amfctx = NULL; ++ AMF_RESULT res; ++ int ret; + +-static int amf_load_library(AVCodecContext *avctx) +-{ +- AmfContext *ctx = avctx->priv_data; +- AMFInit_Fn init_fun; +- AMFQueryVersion_Fn version_fun; +- AMF_RESULT res; ++ ctx->dts_delay = 0; ++ ctx->hwsurfaces_in_queue = 0; ++ ctx->hwsurfaces_in_queue_max = 16; + + ctx->delayed_frame = av_frame_alloc(); +- if (!ctx->delayed_frame) { ++ if (!ctx->delayed_frame) + return AVERROR(ENOMEM); +- } ++ + // hardcoded to current HW queue size - will auto-realloc if too small + ctx->timestamp_list = av_fifo_alloc2(avctx->max_b_frames + 16, sizeof(int64_t), + AV_FIFO_FLAG_AUTO_GROW); +- if (!ctx->timestamp_list) { ++ if (!ctx->timestamp_list) + return AVERROR(ENOMEM); +- } +- ctx->dts_delay = 0; +- +- +- ctx->library = dlopen(AMF_DLL_NAMEA, RTLD_NOW | RTLD_LOCAL); +- AMF_RETURN_IF_FALSE(ctx, ctx->library != NULL, +- AVERROR_UNKNOWN, "DLL %s failed to open\n", AMF_DLL_NAMEA); +- +- init_fun = (AMFInit_Fn)dlsym(ctx->library, AMF_INIT_FUNCTION_NAME); +- AMF_RETURN_IF_FALSE(ctx, init_fun != NULL, AVERROR_UNKNOWN, "DLL %s failed to find function %s\n", AMF_DLL_NAMEA, AMF_INIT_FUNCTION_NAME); +- +- version_fun = (AMFQueryVersion_Fn)dlsym(ctx->library, AMF_QUERY_VERSION_FUNCTION_NAME); +- AMF_RETURN_IF_FALSE(ctx, version_fun != NULL, AVERROR_UNKNOWN, "DLL %s failed to find function %s\n", AMF_DLL_NAMEA, AMF_QUERY_VERSION_FUNCTION_NAME); +- +- res = version_fun(&ctx->version); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "%s failed with error %d\n", AMF_QUERY_VERSION_FUNCTION_NAME, res); +- res = init_fun(AMF_FULL_VERSION, &ctx->factory); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "%s failed with error %d\n", AMF_INIT_FUNCTION_NAME, res); +- res = ctx->factory->pVtbl->GetTrace(ctx->factory, &ctx->trace); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "GetTrace() failed with error %d\n", res); +- res = ctx->factory->pVtbl->GetDebug(ctx->factory, &ctx->debug); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "GetDebug() failed with error %d\n", res); +- return 0; +-} +- +-#if CONFIG_D3D11VA +-static int amf_init_from_d3d11_device(AVCodecContext *avctx, AVD3D11VADeviceContext *hwctx) +-{ +- AmfContext *ctx = avctx->priv_data; +- AMF_RESULT res; +- +- res = ctx->context->pVtbl->InitDX11(ctx->context, hwctx->device, AMF_DX11_1); +- if (res != AMF_OK) { +- if (res == AMF_NOT_SUPPORTED) +- av_log(avctx, AV_LOG_ERROR, "AMF via D3D11 is not supported on the given device.\n"); +- else +- av_log(avctx, AV_LOG_ERROR, "AMF failed to initialise on the given D3D11 device: %d.\n", res); +- return AVERROR(ENODEV); +- } +- +- return 0; +-} +-#endif +- +-#if CONFIG_DXVA2 +-static int amf_init_from_dxva2_device(AVCodecContext *avctx, AVDXVA2DeviceContext *hwctx) +-{ +- AmfContext *ctx = avctx->priv_data; +- HANDLE device_handle; +- IDirect3DDevice9 *device; +- HRESULT hr; +- AMF_RESULT res; +- int ret; +- +- hr = IDirect3DDeviceManager9_OpenDeviceHandle(hwctx->devmgr, &device_handle); +- if (FAILED(hr)) { +- av_log(avctx, AV_LOG_ERROR, "Failed to open device handle for Direct3D9 device: %lx.\n", (unsigned long)hr); +- return AVERROR_EXTERNAL; +- } + +- hr = IDirect3DDeviceManager9_LockDevice(hwctx->devmgr, device_handle, &device, FALSE); +- if (SUCCEEDED(hr)) { +- IDirect3DDeviceManager9_UnlockDevice(hwctx->devmgr, device_handle, FALSE); +- ret = 0; +- } else { +- av_log(avctx, AV_LOG_ERROR, "Failed to lock device handle for Direct3D9 device: %lx.\n", (unsigned long)hr); +- ret = AVERROR_EXTERNAL; +- } ++ amfctx = av_mallocz(sizeof(AVAMFContext)); ++ if (!amfctx) ++ return AVERROR(ENOMEM); + +- IDirect3DDeviceManager9_CloseDeviceHandle(hwctx->devmgr, device_handle); ++ ctx->amfctx = amfctx; ++ amfctx->avclass = avctx; ++ amfctx->log_to_dbg = ctx->log_to_dbg; + ++ ret = amf_load_library(amfctx); + if (ret < 0) + return ret; + +- res = ctx->context->pVtbl->InitDX9(ctx->context, device); +- +- IDirect3DDevice9_Release(device); +- +- if (res != AMF_OK) { +- if (res == AMF_NOT_SUPPORTED) +- av_log(avctx, AV_LOG_ERROR, "AMF via D3D9 is not supported on the given device.\n"); +- else +- av_log(avctx, AV_LOG_ERROR, "AMF failed to initialise on given D3D9 device: %d.\n", res); +- return AVERROR(ENODEV); +- } +- +- return 0; +-} +-#endif +- +-static int amf_init_context(AVCodecContext *avctx) +-{ +- AmfContext *ctx = avctx->priv_data; +- AMFContext1 *context1 = NULL; +- AMF_RESULT res; +- av_unused int ret; +- +- ctx->hwsurfaces_in_queue = 0; +- ctx->hwsurfaces_in_queue_max = 16; +- +- // configure AMF logger +- // the return of these functions indicates old state and do not affect behaviour +- ctx->trace->pVtbl->EnableWriter(ctx->trace, AMF_TRACE_WRITER_DEBUG_OUTPUT, ctx->log_to_dbg != 0 ); +- if (ctx->log_to_dbg) +- ctx->trace->pVtbl->SetWriterLevel(ctx->trace, AMF_TRACE_WRITER_DEBUG_OUTPUT, AMF_TRACE_TRACE); +- ctx->trace->pVtbl->EnableWriter(ctx->trace, AMF_TRACE_WRITER_CONSOLE, 0); +- ctx->trace->pVtbl->SetGlobalLevel(ctx->trace, AMF_TRACE_TRACE); +- +- // connect AMF logger to av_log +- ctx->tracer.vtbl = &tracer_vtbl; +- ctx->tracer.avctx = avctx; +- ctx->trace->pVtbl->RegisterWriter(ctx->trace, FFMPEG_AMF_WRITER_ID,(AMFTraceWriter*)&ctx->tracer, 1); +- ctx->trace->pVtbl->SetWriterLevel(ctx->trace, FFMPEG_AMF_WRITER_ID, AMF_TRACE_TRACE); +- +- res = ctx->factory->pVtbl->CreateContext(ctx->factory, &ctx->context); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "CreateContext() failed with error %d\n", res); ++ ret = amf_create_context(amfctx); ++ if (ret < 0) ++ return ret; + + // If a device was passed to the encoder, try to initialise from that. + if (avctx->hw_frames_ctx) { +@@ -253,16 +80,16 @@ static int amf_init_context(AVCodecConte + switch (frames_ctx->device_ctx->type) { + #if CONFIG_D3D11VA + case AV_HWDEVICE_TYPE_D3D11VA: +- ret = amf_init_from_d3d11_device(avctx, frames_ctx->device_ctx->hwctx); +- if (ret < 0) +- return ret; ++ res = amf_context_derive_dx11(amfctx, frames_ctx->device_ctx->hwctx); ++ if (res != AMF_OK) ++ return res; + break; + #endif + #if CONFIG_DXVA2 + case AV_HWDEVICE_TYPE_DXVA2: +- ret = amf_init_from_dxva2_device(avctx, frames_ctx->device_ctx->hwctx); +- if (ret < 0) +- return ret; ++ res = amf_context_derive_dx9(amfctx, frames_ctx->device_ctx->hwctx); ++ if (res != AMF_OK) ++ return res; + break; + #endif + default: +@@ -284,16 +111,16 @@ static int amf_init_context(AVCodecConte + switch (device_ctx->type) { + #if CONFIG_D3D11VA + case AV_HWDEVICE_TYPE_D3D11VA: +- ret = amf_init_from_d3d11_device(avctx, device_ctx->hwctx); +- if (ret < 0) +- return ret; ++ res = amf_context_derive_dx11(amfctx, device_ctx->hwctx); ++ if (res != AMF_OK) ++ return res; + break; + #endif + #if CONFIG_DXVA2 + case AV_HWDEVICE_TYPE_DXVA2: +- ret = amf_init_from_dxva2_device(avctx, device_ctx->hwctx); +- if (ret < 0) +- return ret; ++ res = amf_context_derive_dx9(amfctx, device_ctx->hwctx); ++ if (res != AMF_OK) ++ return res; + break; + #endif + default: +@@ -307,40 +134,33 @@ static int amf_init_context(AVCodecConte + return AVERROR(ENOMEM); + + } else { +- res = ctx->context->pVtbl->InitDX11(ctx->context, NULL, AMF_DX11_1); +- if (res == AMF_OK) { +- av_log(avctx, AV_LOG_VERBOSE, "AMF initialisation succeeded via D3D11.\n"); +- } else { +- res = ctx->context->pVtbl->InitDX9(ctx->context, NULL); +- if (res == AMF_OK) { +- av_log(avctx, AV_LOG_VERBOSE, "AMF initialisation succeeded via D3D9.\n"); +- } else { +- AMFGuid guid = IID_AMFContext1(); +- res = ctx->context->pVtbl->QueryInterface(ctx->context, &guid, (void**)&context1); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "CreateContext1() failed with error %d\n", res); +- +- res = context1->pVtbl->InitVulkan(context1, NULL); +- context1->pVtbl->Release(context1); ++#ifdef _WIN32 ++ res = amf_context_init_dx11(amfctx); ++ if (res != AMF_OK) { ++ res = amf_context_init_dx9(amfctx); ++ if (res != AMF_OK) { ++#endif ++ res = amf_context_init_vulkan(amfctx); + if (res != AMF_OK) { +- if (res == AMF_NOT_SUPPORTED) +- av_log(avctx, AV_LOG_ERROR, "AMF via Vulkan is not supported on the given device.\n"); +- else +- av_log(avctx, AV_LOG_ERROR, "AMF failed to initialise on the given Vulkan device: %d.\n", res); ++ av_log(avctx, AV_LOG_ERROR, "AMF initialisation is not supported.\n"); + return AVERROR(ENOSYS); + } +- av_log(avctx, AV_LOG_VERBOSE, "AMF initialisation succeeded via Vulkan.\n"); ++#ifdef _WIN32 + } + } ++#endif + } ++ + return 0; + } + + static int amf_init_encoder(AVCodecContext *avctx) + { +- AmfContext *ctx = avctx->priv_data; ++ AMFEncContext *ctx = avctx->priv_data; ++ AVAMFContext *amfctx = ctx->amfctx; + const wchar_t *codec_id = NULL; +- AMF_RESULT res; + enum AVPixelFormat pix_fmt; ++ AMF_RESULT res; + + switch (avctx->codec->id) { + case AV_CODEC_ID_H264: +@@ -355,26 +175,26 @@ static int amf_init_encoder(AVCodecConte + default: + break; + } +- AMF_RETURN_IF_FALSE(ctx, codec_id != NULL, AVERROR(EINVAL), "Codec %d is not supported\n", avctx->codec->id); ++ AMF_RETURN_IF_FALSE(avctx, codec_id != NULL, ++ AVERROR(EINVAL), "Codec %d is not supported\n", avctx->codec->id); + +- if (ctx->hw_frames_ctx) +- pix_fmt = ((AVHWFramesContext*)ctx->hw_frames_ctx->data)->sw_format; +- else +- pix_fmt = avctx->pix_fmt; ++ pix_fmt = avctx->hw_frames_ctx ? ((AVHWFramesContext*)avctx->hw_frames_ctx->data)->sw_format : avctx->pix_fmt; + + ctx->format = amf_av_to_amf_format(pix_fmt); +- AMF_RETURN_IF_FALSE(ctx, ctx->format != AMF_SURFACE_UNKNOWN, AVERROR(EINVAL), +- "Format %s is not supported\n", av_get_pix_fmt_name(pix_fmt)); ++ AMF_RETURN_IF_FALSE(avctx, ctx->format != AMF_SURFACE_UNKNOWN, ++ AVERROR(EINVAL), "Format %s is not supported\n", av_get_pix_fmt_name(pix_fmt)); + +- res = ctx->factory->pVtbl->CreateComponent(ctx->factory, ctx->context, codec_id, &ctx->encoder); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_ENCODER_NOT_FOUND, "CreateComponent(%ls) failed with error %d\n", codec_id, res); ++ res = amfctx->factory->pVtbl->CreateComponent(amfctx->factory, amfctx->context, codec_id, &ctx->encoder); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, ++ AVERROR_ENCODER_NOT_FOUND, "CreateComponent(%ls) failed with error %d\n", codec_id, res); + + return 0; + } + +-int av_cold ff_amf_encode_close(AVCodecContext *avctx) ++av_cold int ff_amf_encode_close(AVCodecContext *avctx) + { +- AmfContext *ctx = avctx->priv_data; ++ AMFEncContext *ctx = avctx->priv_data; ++ AVAMFContext *amfctx = ctx->amfctx; + + if (ctx->delayed_surface) { + ctx->delayed_surface->pVtbl->Release(ctx->delayed_surface); +@@ -387,34 +207,34 @@ int av_cold ff_amf_encode_close(AVCodecC + ctx->encoder = NULL; + } + +- if (ctx->context) { +- ctx->context->pVtbl->Terminate(ctx->context); +- ctx->context->pVtbl->Release(ctx->context); +- ctx->context = NULL; +- } ++ amf_unload_library(amfctx); ++ if (amfctx) ++ av_freep(&amfctx); ++ ++ ctx->delayed_drain = 0; + av_buffer_unref(&ctx->hw_device_ctx); + av_buffer_unref(&ctx->hw_frames_ctx); + +- if (ctx->trace) { +- ctx->trace->pVtbl->UnregisterWriter(ctx->trace, FFMPEG_AMF_WRITER_ID); +- } +- if (ctx->library) { +- dlclose(ctx->library); +- ctx->library = NULL; +- } +- ctx->trace = NULL; +- ctx->debug = NULL; +- ctx->factory = NULL; +- ctx->version = 0; +- ctx->delayed_drain = 0; + av_frame_free(&ctx->delayed_frame); + av_fifo_freep2(&ctx->timestamp_list); +- + return 0; + } + +-static int amf_copy_surface(AVCodecContext *avctx, const AVFrame *frame, +- AMFSurface* surface) ++av_cold int ff_amf_encode_init(AVCodecContext *avctx) ++{ ++ int ret; ++ ++ if ((ret = amf_init_context(avctx)) == 0) ++ if ((ret = amf_init_encoder(avctx)) == 0) ++ return 0; ++ ++ ff_amf_encode_close(avctx); ++ return ret; ++} ++ ++static int amf_copy_surface(AVCodecContext *avctx, ++ const AVFrame *frame, ++ AMFSurface* surface) + { + AMFPlane *plane; + uint8_t *dst_data[4]; +@@ -430,38 +250,37 @@ static int amf_copy_surface(AVCodecConte + dst_data[i] = plane->pVtbl->GetNative(plane); + dst_linesize[i] = plane->pVtbl->GetHPitch(plane); + } ++ + av_image_copy2(dst_data, dst_linesize, + frame->data, frame->linesize, frame->format, + avctx->width, avctx->height); +- + return 0; + } + +-static int amf_copy_buffer(AVCodecContext *avctx, AVPacket *pkt, AMFBuffer *buffer) ++static int amf_copy_buffer(AVCodecContext *avctx, ++ AVPacket *pkt, ++ AMFBuffer *buffer) + { +- AmfContext *ctx = avctx->priv_data; ++ AMFEncContext *ctx = avctx->priv_data; + int ret; +- AMFVariantStruct var = {0}; ++ AMFVariantStruct var = { 0 }; + int64_t timestamp = AV_NOPTS_VALUE; + int64_t size = buffer->pVtbl->GetSize(buffer); + +- if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) { ++ if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) + return ret; +- } + memcpy(pkt->data, buffer->pVtbl->GetNative(buffer), size); + + switch (avctx->codec->id) { + case AV_CODEC_ID_H264: + buffer->pVtbl->GetProperty(buffer, AMF_VIDEO_ENCODER_OUTPUT_DATA_TYPE, &var); +- if(var.int64Value == AMF_VIDEO_ENCODER_OUTPUT_DATA_TYPE_IDR) { ++ if (var.int64Value == AMF_VIDEO_ENCODER_OUTPUT_DATA_TYPE_IDR) + pkt->flags = AV_PKT_FLAG_KEY; +- } + break; + case AV_CODEC_ID_HEVC: + buffer->pVtbl->GetProperty(buffer, AMF_VIDEO_ENCODER_HEVC_OUTPUT_DATA_TYPE, &var); +- if (var.int64Value == AMF_VIDEO_ENCODER_HEVC_OUTPUT_DATA_TYPE_IDR) { ++ if (var.int64Value == AMF_VIDEO_ENCODER_HEVC_OUTPUT_DATA_TYPE_IDR) + pkt->flags = AV_PKT_FLAG_KEY; +- } + break; + case AV_CODEC_ID_AV1: + buffer->pVtbl->GetProperty(buffer, AMF_VIDEO_ENCODER_AV1_OUTPUT_FRAME_TYPE, &var); +@@ -476,7 +295,6 @@ static int amf_copy_buffer(AVCodecContex + + pkt->pts = var.int64Value; // original pts + +- + AMF_RETURN_IF_FALSE(ctx, av_fifo_read(ctx->timestamp_list, ×tamp, 1) >= 0, + AVERROR_UNKNOWN, "timestamp_list is empty\n"); + +@@ -486,34 +304,20 @@ static int amf_copy_buffer(AVCodecContex + size_t can_read = av_fifo_can_read(ctx->timestamp_list); + + AMF_RETURN_IF_FALSE(ctx, can_read > 0, AVERROR_UNKNOWN, +- "timestamp_list is empty while max_b_frames = %d\n", avctx->max_b_frames); ++ "timestamp_list is empty while max_b_frames = %d\n", avctx->max_b_frames); ++ + av_fifo_peek(ctx->timestamp_list, ×tamp_last, 1, can_read - 1); +- if (timestamp < 0 || timestamp_last < AV_NOPTS_VALUE) { ++ if (timestamp < 0 || timestamp_last < AV_NOPTS_VALUE) + return AVERROR(ERANGE); +- } + ctx->dts_delay = timestamp_last - timestamp; + } + pkt->dts = timestamp - ctx->dts_delay; + return 0; + } + +-// amfenc API implementation +-int ff_amf_encode_init(AVCodecContext *avctx) +-{ +- int ret; +- +- if ((ret = amf_load_library(avctx)) == 0) { +- if ((ret = amf_init_context(avctx)) == 0) { +- if ((ret = amf_init_encoder(avctx)) == 0) { +- return 0; +- } +- } +- } +- ff_amf_encode_close(avctx); +- return ret; +-} +- +-static AMF_RESULT amf_set_property_buffer(AMFSurface *object, const wchar_t *name, AMFBuffer *val) ++static AMF_RESULT amf_set_property_buffer(AMFSurface *object, ++ const wchar_t *name, ++ AMFBuffer *val) + { + AMF_RESULT res; + AMFVariantStruct var; +@@ -527,15 +331,16 @@ static AMF_RESULT amf_set_property_buffe + res = AMFVariantAssignInterface(&var, amf_interface); + amf_interface->pVtbl->Release(amf_interface); + } +- if (res == AMF_OK) { ++ if (res == AMF_OK) + res = object->pVtbl->SetProperty(object, name, var); +- } + AMFVariantClear(&var); + } + return res; + } + +-static AMF_RESULT amf_get_property_buffer(AMFData *object, const wchar_t *name, AMFBuffer **val) ++static AMF_RESULT amf_get_property_buffer(AMFData *object, ++ const wchar_t *name, ++ AMFBuffer **val) + { + AMF_RESULT res; + AMFVariantStruct var; +@@ -583,9 +388,60 @@ static void amf_release_buffer_with_fram + frame_ref_storage_buffer->pVtbl->Release(frame_ref_storage_buffer); + } + ++static int amf_save_hdr_metadata(AVCodecContext *avctx, const AVFrame *frame, AMFHDRMetadata *hdrmeta) ++{ ++ AVFrameSideData *sd_display; ++ AVFrameSideData *sd_light; ++ AVMasteringDisplayMetadata *display_meta; ++ AVContentLightMetadata *light_meta; ++ ++ sd_display = av_frame_get_side_data(frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); ++ if (sd_display) { ++ display_meta = (AVMasteringDisplayMetadata *)sd_display->data; ++ if (display_meta->has_luminance) { ++ const unsigned int luma_den = 10000; ++ hdrmeta->maxMasteringLuminance = ++ (amf_uint32)(luma_den * av_q2d(display_meta->max_luminance)); ++ hdrmeta->minMasteringLuminance = ++ FFMIN((amf_uint32)(luma_den * av_q2d(display_meta->min_luminance)), hdrmeta->maxMasteringLuminance); ++ } ++ if (display_meta->has_primaries) { ++ const unsigned int chroma_den = 50000; ++ hdrmeta->redPrimary[0] = ++ FFMIN((amf_uint16)(chroma_den * av_q2d(display_meta->display_primaries[0][0])), chroma_den); ++ hdrmeta->redPrimary[1] = ++ FFMIN((amf_uint16)(chroma_den * av_q2d(display_meta->display_primaries[0][1])), chroma_den); ++ hdrmeta->greenPrimary[0] = ++ FFMIN((amf_uint16)(chroma_den * av_q2d(display_meta->display_primaries[1][0])), chroma_den); ++ hdrmeta->greenPrimary[1] = ++ FFMIN((amf_uint16)(chroma_den * av_q2d(display_meta->display_primaries[1][1])), chroma_den); ++ hdrmeta->bluePrimary[0] = ++ FFMIN((amf_uint16)(chroma_den * av_q2d(display_meta->display_primaries[2][0])), chroma_den); ++ hdrmeta->bluePrimary[1] = ++ FFMIN((amf_uint16)(chroma_den * av_q2d(display_meta->display_primaries[2][1])), chroma_den); ++ hdrmeta->whitePoint[0] = ++ FFMIN((amf_uint16)(chroma_den * av_q2d(display_meta->white_point[0])), chroma_den); ++ hdrmeta->whitePoint[1] = ++ FFMIN((amf_uint16)(chroma_den * av_q2d(display_meta->white_point[1])), chroma_den); ++ } ++ ++ sd_light = av_frame_get_side_data(frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); ++ if (sd_light) { ++ light_meta = (AVContentLightMetadata *)sd_light->data; ++ if (light_meta) { ++ hdrmeta->maxContentLightLevel = (amf_uint16)light_meta->MaxCLL; ++ hdrmeta->maxFrameAverageLightLevel = (amf_uint16)light_meta->MaxFALL; ++ } ++ } ++ return 0; ++ } ++ return 1; ++} ++ + int ff_amf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) + { +- AmfContext *ctx = avctx->priv_data; ++ AMFEncContext *ctx = avctx->priv_data; ++ AVAMFContext *amfctx = ctx->amfctx; + AMFSurface *surface; + AMF_RESULT res; + int ret; +@@ -614,10 +470,9 @@ int ff_amf_receive_packet(AVCodecContext + if (res == AMF_INPUT_FULL) { + ctx->delayed_drain = 1; // input queue is full: resubmit Drain() in ff_amf_receive_packet + } else { +- if (res == AMF_OK) { ++ if (res == AMF_OK) + ctx->eof = 1; // drain started +- } +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "Drain() failed with error %d\n", res); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "Drain() failed with error %d\n", res); + } + } + } +@@ -633,13 +488,10 @@ int ff_amf_receive_packet(AVCodecContext + ID3D11Texture2D *texture = (ID3D11Texture2D*)frame->data[0]; // actual texture + int index = (intptr_t)frame->data[1]; // index is a slice in texture array is - set to tell AMF which slice to use + +- av_assert0(frame->hw_frames_ctx && ctx->hw_frames_ctx && +- frame->hw_frames_ctx->data == ctx->hw_frames_ctx->data); +- + texture->lpVtbl->SetPrivateData(texture, &AMFTextureArrayIndexGUID, sizeof(index), &index); + +- res = ctx->context->pVtbl->CreateSurfaceFromDX11Native(ctx->context, texture, &surface, NULL); // wrap to AMF surface +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR(ENOMEM), "CreateSurfaceFromDX11Native() failed with error %d\n", res); ++ res = amfctx->context->pVtbl->CreateSurfaceFromDX11Native(amfctx->context, texture, &surface, NULL); // wrap to AMF surface ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR(ENOMEM), "CreateSurfaceFromDX11Native() failed with error %d\n", res); + + hw_surface = 1; + } +@@ -650,8 +502,8 @@ int ff_amf_receive_packet(AVCodecContext + { + IDirect3DSurface9 *texture = (IDirect3DSurface9 *)frame->data[3]; // actual texture + +- res = ctx->context->pVtbl->CreateSurfaceFromDX9Native(ctx->context, texture, &surface, NULL); // wrap to AMF surface +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR(ENOMEM), "CreateSurfaceFromDX9Native() failed with error %d\n", res); ++ res = amfctx->context->pVtbl->CreateSurfaceFromDX9Native(amfctx->context, texture, &surface, NULL); // wrap to AMF surface ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR(ENOMEM), "CreateSurfaceFromDX9Native() failed with error %d\n", res); + + hw_surface = 1; + } +@@ -659,8 +511,8 @@ int ff_amf_receive_packet(AVCodecContext + #endif + default: + { +- res = ctx->context->pVtbl->AllocSurface(ctx->context, AMF_MEMORY_HOST, ctx->format, avctx->width, avctx->height, &surface); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR(ENOMEM), "AllocSurface() failed with error %d\n", res); ++ res = amfctx->context->pVtbl->AllocSurface(amfctx->context, AMF_MEMORY_HOST, ctx->format, avctx->width, avctx->height, &surface); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR(ENOMEM), "AllocSurface() failed with error %d\n", res); + amf_copy_surface(avctx, frame, surface); + } + break; +@@ -672,28 +524,49 @@ int ff_amf_receive_packet(AVCodecContext + // input HW surfaces can be vertically aligned by 16; tell AMF the real size + surface->pVtbl->SetCrop(surface, 0, 0, frame->width, frame->height); + +- frame_ref_storage_buffer = amf_create_buffer_with_frame_ref(frame, ctx->context); +- AMF_RETURN_IF_FALSE(ctx, frame_ref_storage_buffer != NULL, AVERROR(ENOMEM), "create_buffer_with_frame_ref() returned NULL\n"); ++ frame_ref_storage_buffer = amf_create_buffer_with_frame_ref(frame, amfctx->context); ++ AMF_RETURN_IF_FALSE(avctx, frame_ref_storage_buffer != NULL, AVERROR(ENOMEM), "create_buffer_with_frame_ref() returned NULL\n"); + + res = amf_set_property_buffer(surface, L"av_frame_ref", frame_ref_storage_buffer); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "SetProperty failed for \"av_frame_ref\" with error %d\n", res); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "SetProperty failed for \"av_frame_ref\" with error %d\n", res); ++ + ctx->hwsurfaces_in_queue++; + frame_ref_storage_buffer->pVtbl->Release(frame_ref_storage_buffer); + } + ++ // HDR10 metadata ++ if (frame->color_trc == AVCOL_TRC_SMPTE2084) { ++ AMFBuffer *hdrmeta_buffer = NULL; ++ res = amfctx->context->pVtbl->AllocBuffer(amfctx->context, AMF_MEMORY_HOST, sizeof(AMFHDRMetadata), &hdrmeta_buffer); ++ if (res == AMF_OK) { ++ AMFHDRMetadata *hdrmeta = (AMFHDRMetadata *)hdrmeta_buffer->pVtbl->GetNative(hdrmeta_buffer); ++ if (amf_save_hdr_metadata(avctx, frame, hdrmeta) == 0) { ++ switch (avctx->codec->id) { ++ case AV_CODEC_ID_H264: ++ AMF_ASSIGN_PROPERTY_INTERFACE(res, ctx->encoder, AMF_VIDEO_ENCODER_INPUT_HDR_METADATA, hdrmeta_buffer); break; ++ case AV_CODEC_ID_HEVC: ++ AMF_ASSIGN_PROPERTY_INTERFACE(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_INPUT_HDR_METADATA, hdrmeta_buffer); break; ++ case AV_CODEC_ID_AV1: ++ AMF_ASSIGN_PROPERTY_INTERFACE(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_INPUT_HDR_METADATA, hdrmeta_buffer); break; ++ } ++ res = amf_set_property_buffer(surface, L"av_frame_hdrmeta", hdrmeta_buffer); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "SetProperty failed for \"av_frame_hdrmeta\" with error %d\n", res); ++ } ++ hdrmeta_buffer->pVtbl->Release(hdrmeta_buffer); ++ } ++ } ++ + surface->pVtbl->SetPts(surface, frame->pts); + AMF_ASSIGN_PROPERTY_INT64(res, surface, PTS_PROP, frame->pts); + + switch (avctx->codec->id) { + case AV_CODEC_ID_H264: +- AMF_ASSIGN_PROPERTY_INT64(res, surface, AMF_VIDEO_ENCODER_INSERT_AUD, !!ctx->aud); +- break; ++ AMF_ASSIGN_PROPERTY_INT64(res, surface, AMF_VIDEO_ENCODER_INSERT_SPS, 1); ++ AMF_ASSIGN_PROPERTY_INT64(res, surface, AMF_VIDEO_ENCODER_INSERT_PPS, 1); ++ AMF_ASSIGN_PROPERTY_INT64(res, surface, AMF_VIDEO_ENCODER_INSERT_AUD, !!ctx->aud); break; + case AV_CODEC_ID_HEVC: +- AMF_ASSIGN_PROPERTY_INT64(res, surface, AMF_VIDEO_ENCODER_HEVC_INSERT_AUD, !!ctx->aud); +- break; ++ AMF_ASSIGN_PROPERTY_INT64(res, surface, AMF_VIDEO_ENCODER_HEVC_INSERT_AUD, !!ctx->aud); break; + //case AV_CODEC_ID_AV1 not supported +- default: +- break; + } + + // submit surface +@@ -704,7 +577,7 @@ int ff_amf_receive_packet(AVCodecContext + } else { + int64_t pts = frame->pts; + surface->pVtbl->Release(surface); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "SubmitInput() failed with error %d\n", res); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "SubmitInput() failed with error %d\n", res); + + av_frame_unref(frame); + ret = av_fifo_write(ctx->timestamp_list, &pts, 1); +@@ -713,7 +586,6 @@ int ff_amf_receive_packet(AVCodecContext + } + } + +- + do { + block_and_wait = 0; + // poll data +@@ -732,25 +604,39 @@ int ff_amf_receive_packet(AVCodecContext + if (data->pVtbl->HasProperty(data, L"av_frame_ref")) { + AMFBuffer* frame_ref_storage_buffer; + res = amf_get_property_buffer(data, L"av_frame_ref", &frame_ref_storage_buffer); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "GetProperty failed for \"av_frame_ref\" with error %d\n", res); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "GetProperty failed for \"av_frame_ref\" with error %d\n", res); + amf_release_buffer_with_frame_ref(frame_ref_storage_buffer); + ctx->hwsurfaces_in_queue--; + } + + data->pVtbl->Release(data); + +- AMF_RETURN_IF_FALSE(ctx, ret >= 0, ret, "amf_copy_buffer() failed with error %d\n", ret); ++ AMF_RETURN_IF_FALSE(avctx, ret >= 0, ret, "amf_copy_buffer() failed with error %d\n", ret); + } + } + res_resubmit = AMF_OK; + if (ctx->delayed_surface != NULL) { // try to resubmit frame ++ if (ctx->delayed_surface->pVtbl->HasProperty(ctx->delayed_surface, L"av_frame_hdrmeta")) { ++ AMFBuffer * hdrmeta_buffer = NULL; ++ res = amf_get_property_buffer((AMFData *)ctx->delayed_surface, L"av_frame_hdrmeta", &hdrmeta_buffer); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "GetProperty failed for \"av_frame_hdrmeta\" with error %d\n", res); ++ switch (avctx->codec->id) { ++ case AV_CODEC_ID_H264: ++ AMF_ASSIGN_PROPERTY_INTERFACE(res, ctx->encoder, AMF_VIDEO_ENCODER_INPUT_HDR_METADATA, hdrmeta_buffer); break; ++ case AV_CODEC_ID_HEVC: ++ AMF_ASSIGN_PROPERTY_INTERFACE(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_INPUT_HDR_METADATA, hdrmeta_buffer); break; ++ case AV_CODEC_ID_AV1: ++ AMF_ASSIGN_PROPERTY_INTERFACE(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_INPUT_HDR_METADATA, hdrmeta_buffer); break; ++ } ++ hdrmeta_buffer->pVtbl->Release(hdrmeta_buffer); ++ } + res_resubmit = ctx->encoder->pVtbl->SubmitInput(ctx->encoder, (AMFData*)ctx->delayed_surface); + if (res_resubmit != AMF_INPUT_FULL) { + int64_t pts = ctx->delayed_surface->pVtbl->GetPts(ctx->delayed_surface); + ctx->delayed_surface->pVtbl->Release(ctx->delayed_surface); + ctx->delayed_surface = NULL; + av_frame_unref(ctx->delayed_frame); +- AMF_RETURN_IF_FALSE(ctx, res_resubmit == AMF_OK, AVERROR_UNKNOWN, "Repeated SubmitInput() failed with error %d\n", res_resubmit); ++ AMF_RETURN_IF_FALSE(avctx, res_resubmit == AMF_OK, AVERROR_UNKNOWN, "Repeated SubmitInput() failed with error %d\n", res_resubmit); + + ret = av_fifo_write(ctx->timestamp_list, &pts, 1); + if (ret < 0) +@@ -761,7 +647,7 @@ int ff_amf_receive_packet(AVCodecContext + if (res != AMF_INPUT_FULL) { + ctx->delayed_drain = 0; + ctx->eof = 1; // drain started +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "Repeated Drain() failed with error %d\n", res); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "Repeated Drain() failed with error %d\n", res); + } else { + av_log(avctx, AV_LOG_WARNING, "Data acquired but delayed drain submission got AMF_INPUT_FULL- should not happen\n"); + } +@@ -775,13 +661,12 @@ int ff_amf_receive_packet(AVCodecContext + } + } while (block_and_wait); + +- if (res_query == AMF_EOF) { ++ if (res_query == AMF_EOF) + ret = AVERROR_EOF; +- } else if (data == NULL) { ++ else if (data == NULL) + ret = AVERROR(EAGAIN); +- } else { ++ else + ret = 0; +- } + return ret; + } + +Index: FFmpeg/libavcodec/amfenc.h +=================================================================== +--- libavcodec/amfenc.h ++++ libavcodec/amfenc.h +@@ -1,63 +1,44 @@ + /* +-* This file is part of FFmpeg. +-* +-* FFmpeg is free software; you can redistribute it and/or +-* modify it under the terms of the GNU Lesser General Public +-* License as published by the Free Software Foundation; either +-* version 2.1 of the License, or (at your option) any later version. +-* +-* FFmpeg is distributed in the hope that it will be useful, +-* but WITHOUT ANY WARRANTY; without even the implied warranty of +-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +-* Lesser General Public License for more details. +-* +-* You should have received a copy of the GNU Lesser General Public +-* License along with FFmpeg; if not, write to the Free Software +-* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +-*/ ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + + #ifndef AVCODEC_AMFENC_H + #define AVCODEC_AMFENC_H + +-#include +- + #include + #include + #include ++#include + + #include "libavutil/fifo.h" + +-#include "avcodec.h" ++#include "amf.h" + #include "hwconfig.h" + +-#define MAX_LOOKAHEAD_DEPTH 41 +- +-/** +-* AMF trace writer callback class +-* Used to capture all AMF logging +-*/ +- +-typedef struct AmfTraceWriter { +- AMFTraceWriterVtbl *vtbl; +- AVCodecContext *avctx; +-} AmfTraceWriter; ++#define MAX_LOOKAHEAD_DEPTH 41 + + /** + * AMF encoder context + */ ++typedef struct AMFEncContext { ++ void *avclass; ++ void *amfctx; + +-typedef struct AmfContext { +- AVClass *avclass; +- // access to AMF runtime +- amf_handle library; ///< handle to DLL library +- AMFFactory *factory; ///< pointer to AMF factory +- AMFDebug *debug; ///< pointer to AMF debug interface +- AMFTrace *trace; ///< pointer to AMF trace interface +- +- amf_uint64 version; ///< version of AMF runtime +- AmfTraceWriter tracer; ///< AMF writer registered with AMF +- AMFContext *context; ///< AMF context +- //encoder ++ // encoder + AMFComponent *encoder; ///< AMF encoder object + amf_bool eof; ///< flag indicating EOF happened + AMF_SURFACE_FORMAT format; ///< AMF surface format +@@ -78,7 +59,6 @@ typedef struct AmfContext { + int64_t dts_delay; + + // common encoder option options +- + int log_to_dbg; + + // Static options, have to be set before Init() call +@@ -91,7 +71,6 @@ typedef struct AmfContext { + int ref_b_frame_delta_qp; + + // Dynamic options, can be set after Init() call +- + int rate_control_mode; + int enforce_hrd; + int filler_data; +@@ -114,7 +93,6 @@ typedef struct AmfContext { + int hw_high_motion_quality_boost; + + // HEVC - specific options +- + int gops_per_idr; + int header_insertion_mode; + int min_qp_i; +@@ -124,11 +102,9 @@ typedef struct AmfContext { + int tier; + + // AV1 - specific options +- + enum AMF_VIDEO_ENCODER_AV1_ALIGNMENT_MODE_ENUM align; + + // Preanalysis - specific options +- + int preanalysis; + int pa_activity_type; + int pa_scene_change_detection; +@@ -145,9 +121,7 @@ typedef struct AmfContext { + int pa_taq_mode; + int pa_high_motion_quality_boost_mode; + int pa_adaptive_mini_gop; +- +- +-} AmfContext; ++} AMFEncContext; + + extern const AVCodecHWConfigInternal *const ff_amfenc_hw_configs[]; + +@@ -165,18 +139,4 @@ int ff_amf_encode_close(AVCodecContext * + */ + int ff_amf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt); + +-/** +-* Supported formats +-*/ +-extern const enum AVPixelFormat ff_amf_pix_fmts[]; +- +-/** +-* Error handling helper +-*/ +-#define AMF_RETURN_IF_FALSE(avctx, exp, ret_value, /*message,*/ ...) \ +- if (!(exp)) { \ +- av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \ +- return ret_value; \ +- } +- +-#endif //AVCODEC_AMFENC_H ++#endif /* AVCODEC_AMFENC_H */ +Index: FFmpeg/libavcodec/amfenc_av1.c +=================================================================== +--- libavcodec/amfenc_av1.c ++++ libavcodec/amfenc_av1.c +@@ -22,8 +22,24 @@ + #include "codec_internal.h" + #include "internal.h" + +-#define OFFSET(x) offsetof(AmfContext, x) ++#define OFFSET(x) offsetof(AMFEncContext, x) + #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM ++ ++static const enum AVPixelFormat ff_amfenc_av1_pix_fmts[] = { ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_P010, ++ AV_PIX_FMT_BGRA, ++ AV_PIX_FMT_BGR0, ++#if CONFIG_D3D11VA ++ AV_PIX_FMT_D3D11, ++#endif ++#if CONFIG_DXVA2 ++ AV_PIX_FMT_DXVA2_VLD, ++#endif ++ AV_PIX_FMT_NONE ++}; ++ + static const AVOption options[] = { + { "usage", "Set the encoding usage", OFFSET(usage), AV_OPT_TYPE_INT, {.i64 = AMF_VIDEO_ENCODER_AV1_USAGE_TRANSCODING }, AMF_VIDEO_ENCODER_AV1_USAGE_TRANSCODING, AMF_VIDEO_ENCODER_AV1_USAGE_LOW_LATENCY, VE, .unit = "usage" }, + { "transcoding", "", 0, AV_OPT_TYPE_CONST, {.i64 = AMF_VIDEO_ENCODER_AV1_USAGE_TRANSCODING }, 0, 0, VE, .unit = "usage" }, +@@ -157,20 +173,19 @@ static av_cold int amf_encode_init_av1(A + { + int ret = 0; + AMF_RESULT res = AMF_OK; +- AmfContext* ctx = avctx->priv_data; ++ AMFEncContext *ctx = avctx->priv_data; + AMFVariantStruct var = { 0 }; + amf_int64 profile = 0; + amf_int64 profile_level = 0; +- AMFBuffer* buffer; ++ AMFBuffer *buffer; + AMFGuid guid; + AMFRate framerate; + AMFSize framesize = AMFConstructSize(avctx->width, avctx->height); ++ amf_int64 color_profile; ++ enum AVPixelFormat pix_fmt; + +- +- +- if (avctx->framerate.num > 0 && avctx->framerate.den > 0) { ++ if (avctx->framerate.num > 0 && avctx->framerate.den > 0) + framerate = AMFConstructRate(avctx->framerate.num, avctx->framerate.den); +- } + else { + FF_DISABLE_DEPRECATION_WARNINGS + framerate = AMFConstructRate(avctx->time_base.den, avctx->time_base.num +@@ -184,7 +199,7 @@ FF_ENABLE_DEPRECATION_WARNINGS + if ((ret = ff_amf_encode_init(avctx)) < 0) + return ret; + +- // init static parameters ++ // Init static parameters + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_USAGE, ctx->usage); + + AMF_ASSIGN_PROPERTY_SIZE(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_FRAMESIZE, framesize); +@@ -198,18 +213,17 @@ FF_ENABLE_DEPRECATION_WARNINGS + default: + break; + } +- if (profile == 0) { ++ if (profile == 0) + profile = ctx->profile; +- } + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_PROFILE, profile); + + profile_level = avctx->level; +- if (profile_level == AV_LEVEL_UNKNOWN) { ++ if (profile_level == FF_LEVEL_UNKNOWN) + profile_level = ctx->level; +- } + if (profile_level != 0) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_LEVEL, profile_level); + } ++ + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_QUALITY_PRESET, ctx->quality); + + // Maximum Reference Frames +@@ -225,7 +239,7 @@ FF_ENABLE_DEPRECATION_WARNINGS + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_HEADER_INSERTION_MODE, ctx->header_insertion_mode); + + // Rate control +- // autodetect rate control method ++ // Autodetect rate control method + if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_AV1_RATE_CONTROL_METHOD_UNKNOWN) { + if (ctx->min_qp_i != -1 || ctx->max_qp_i != -1 || + ctx->min_qp_p != -1 || ctx->max_qp_p != -1 || +@@ -248,8 +262,7 @@ FF_ENABLE_DEPRECATION_WARNINGS + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_RATE_CONTROL_PREENCODE, 0); + if (ctx->preencode) + av_log(ctx, AV_LOG_WARNING, "Preencode is not supported by cqp Rate Control Method, automatically disabled\n"); +- } +- else { ++ } else { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_RATE_CONTROL_PREENCODE, ctx->preencode); + } + +@@ -260,7 +273,7 @@ FF_ENABLE_DEPRECATION_WARNINGS + } + + if (ctx->hw_high_motion_quality_boost != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_HIGH_MOTION_QUALITY_BOOST, ((ctx->hw_high_motion_quality_boost == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_HIGH_MOTION_QUALITY_BOOST, !!ctx->hw_high_motion_quality_boost); + } + + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_RATE_CONTROL_METHOD, ctx->rate_control_mode); +@@ -275,7 +288,7 @@ FF_ENABLE_DEPRECATION_WARNINGS + } + } + +- // init dynamic rate control params ++ // Dynamic rate control params + if (ctx->max_au_size) + ctx->enforce_hrd = 1; + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_ENFORCE_HRD, ctx->enforce_hrd); +@@ -288,32 +301,27 @@ FF_ENABLE_DEPRECATION_WARNINGS + } + if (avctx->rc_max_rate) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_PEAK_BITRATE, avctx->rc_max_rate); +- } +- else if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_AV1_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR) { ++ } else if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_AV1_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR) { + av_log(ctx, AV_LOG_WARNING, "rate control mode is PEAK_CONSTRAINED_VBR but rc_max_rate is not set\n"); + } ++ + if (avctx->bit_rate > 0) { + ctx->rate_control_mode = AMF_VIDEO_ENCODER_AV1_RATE_CONTROL_METHOD_CBR; + av_log(ctx, AV_LOG_DEBUG, "Rate control turned to CBR\n"); + } + +- switch (ctx->align) +- { ++ switch (ctx->align) { + case AMF_VIDEO_ENCODER_AV1_ALIGNMENT_MODE_64X16_ONLY: +- if (avctx->width / 64 * 64 != avctx->width || avctx->height / 16 * 16 != avctx->height) +- { ++ if (avctx->width / 64 * 64 != avctx->width || avctx->height / 16 * 16 != avctx->height) { + res = AMF_NOT_SUPPORTED; + av_log(ctx, AV_LOG_ERROR, "Resolution incorrect for alignment mode\n"); + return AVERROR_EXIT; + } + break; + case AMF_VIDEO_ENCODER_AV1_ALIGNMENT_MODE_64X16_1080P_CODED_1082: +- if ((avctx->width / 64 * 64 == avctx->width && avctx->height / 16 * 16 == avctx->height) || (avctx->width == 1920 && avctx->height == 1080)) +- { ++ if ((avctx->width / 64 * 64 == avctx->width && avctx->height / 16 * 16 == avctx->height) || (avctx->width == 1920 && avctx->height == 1080)) { + res = AMF_OK; +- } +- else +- { ++ } else { + res = AMF_NOT_SUPPORTED; + av_log(ctx, AV_LOG_ERROR, "Resolution incorrect for alignment mode\n"); + return AVERROR_EXIT; +@@ -329,24 +337,35 @@ FF_ENABLE_DEPRECATION_WARNINGS + } + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_ALIGNMENT_MODE, ctx->align); + ++ // Output color depth, profile, transfer and primaries ++ pix_fmt = avctx->hw_frames_ctx ? ((AVHWFramesContext*)avctx->hw_frames_ctx->data)->sw_format : avctx->pix_fmt; ++ if (pix_fmt == AV_PIX_FMT_P010) { ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_COLOR_BIT_DEPTH, AMF_COLOR_BIT_DEPTH_10); ++ } else { ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_COLOR_BIT_DEPTH, AMF_COLOR_BIT_DEPTH_8); ++ } ++ color_profile = amf_av_to_amf_color_profile(avctx); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_OUTPUT_COLOR_PROFILE, color_profile); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_OUTPUT_TRANSFER_CHARACTERISTIC, (amf_int64)avctx->color_trc); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_OUTPUT_COLOR_PRIMARIES, (amf_int64)avctx->color_primaries); ++ + if (ctx->preanalysis != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_PRE_ANALYSIS_ENABLE, !!((ctx->preanalysis == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_PRE_ANALYSIS_ENABLE, !!ctx->preanalysis); + } + + res = ctx->encoder->pVtbl->GetProperty(ctx->encoder, AMF_VIDEO_ENCODER_AV1_PRE_ANALYSIS_ENABLE, &var); +- if ((int)var.int64Value) +- { ++ if ((int)var.int64Value) { + if (ctx->pa_activity_type != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_ACTIVITY_TYPE, ctx->pa_activity_type); + } + if (ctx->pa_scene_change_detection != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_SCENE_CHANGE_DETECTION_ENABLE, ((ctx->pa_scene_change_detection == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_SCENE_CHANGE_DETECTION_ENABLE, !!ctx->pa_scene_change_detection); + } + if (ctx->pa_scene_change_detection_sensitivity != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_SCENE_CHANGE_DETECTION_SENSITIVITY, ctx->pa_scene_change_detection_sensitivity); + } + if (ctx->pa_static_scene_detection != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_STATIC_SCENE_DETECTION_ENABLE, ((ctx->pa_static_scene_detection == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_STATIC_SCENE_DETECTION_ENABLE, !!ctx->pa_static_scene_detection); + } + if (ctx->pa_static_scene_detection_sensitivity != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_STATIC_SCENE_DETECTION_SENSITIVITY, ctx->pa_static_scene_detection_sensitivity); +@@ -361,7 +380,7 @@ FF_ENABLE_DEPRECATION_WARNINGS + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_CAQ_STRENGTH, ctx->pa_caq_strength); + } + if (ctx->pa_frame_sad != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_FRAME_SAD_ENABLE, ((ctx->pa_frame_sad == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_FRAME_SAD_ENABLE, !!ctx->pa_frame_sad); + } + if (ctx->pa_paq_mode != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_PAQ_MODE, ctx->pa_paq_mode); +@@ -370,7 +389,7 @@ FF_ENABLE_DEPRECATION_WARNINGS + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_TAQ_MODE, ctx->pa_taq_mode); + } + if (ctx->pa_ltr != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_LTR_ENABLE, ((ctx->pa_ltr == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_LTR_ENABLE, !!ctx->pa_ltr); + } + if (ctx->pa_lookahead_buffer_depth != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_LOOKAHEAD_BUFFER_DEPTH, ctx->pa_lookahead_buffer_depth); +@@ -380,40 +399,35 @@ FF_ENABLE_DEPRECATION_WARNINGS + } + } + +- // init encoder ++ // Init encoder + res = ctx->encoder->pVtbl->Init(ctx->encoder, ctx->format, avctx->width, avctx->height); + AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "encoder->Init() failed with error %d\n", res); + +- // init dynamic picture control params ++ // Init dynamic picture control params + if (ctx->min_qp_i != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_MIN_Q_INDEX_INTRA, ctx->min_qp_i); +- } +- else if (avctx->qmin != -1) { ++ } else if (avctx->qmin != -1) { + int qval = avctx->qmin > 255 ? 255 : avctx->qmin; + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_MIN_Q_INDEX_INTRA, qval); + } + if (ctx->max_qp_i != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_MAX_Q_INDEX_INTRA, ctx->max_qp_i); +- } +- else if (avctx->qmax != -1) { ++ } else if (avctx->qmax != -1) { + int qval = avctx->qmax > 255 ? 255 : avctx->qmax; + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_MAX_Q_INDEX_INTRA, qval); + } + if (ctx->min_qp_p != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_MIN_Q_INDEX_INTER, ctx->min_qp_p); +- } +- else if (avctx->qmin != -1) { ++ } else if (avctx->qmin != -1) { + int qval = avctx->qmin > 255 ? 255 : avctx->qmin; + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_MIN_Q_INDEX_INTER, qval); + } + if (ctx->max_qp_p != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_MAX_Q_INDEX_INTER, ctx->max_qp_p); +- } +- else if (avctx->qmax != -1) { ++ } else if (avctx->qmax != -1) { + int qval = avctx->qmax > 255 ? 255 : avctx->qmax; + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_MAX_Q_INDEX_INTER, qval); + } +- + if (ctx->qp_p != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_Q_INDEX_INTER, ctx->qp_p); + } +@@ -422,7 +436,7 @@ FF_ENABLE_DEPRECATION_WARNINGS + } + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_AV1_RATE_CONTROL_SKIP_FRAME, ctx->skip_frame); + +- // fill extradata ++ // Fill extradata + res = AMFVariantInit(&var); + AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "AMFVariantInit() failed with error %d\n", res); + +@@ -433,9 +447,8 @@ FF_ENABLE_DEPRECATION_WARNINGS + guid = IID_AMFBuffer(); + + res = var.pInterface->pVtbl->QueryInterface(var.pInterface, &guid, (void**)&buffer); // query for buffer interface +- if (res != AMF_OK) { ++ if (res != AMF_OK) + var.pInterface->pVtbl->Release(var.pInterface); +- } + AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "QueryInterface(IID_AMFBuffer) failed with error %d\n", res); + + avctx->extradata_size = (int)buffer->pVtbl->GetSize(buffer); +@@ -471,20 +484,21 @@ static const AVClass av1_amf_class = { + }; + + const FFCodec ff_av1_amf_encoder = { +- .p.name = "av1_amf", ++ .p.name = "av1_amf", + CODEC_LONG_NAME("AMD AMF AV1 encoder"), +- .p.type = AVMEDIA_TYPE_VIDEO, +- .p.id = AV_CODEC_ID_AV1, ++ .p.type = AVMEDIA_TYPE_VIDEO, ++ .p.id = AV_CODEC_ID_AV1, + .init = amf_encode_init_av1, + FF_CODEC_RECEIVE_PACKET_CB(ff_amf_receive_packet), + .close = ff_amf_encode_close, +- .priv_data_size = sizeof(AmfContext), +- .p.priv_class = &av1_amf_class, ++ .priv_data_size = sizeof(AMFEncContext), ++ .p.priv_class = &av1_amf_class, + .defaults = defaults, +- .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE | ++ .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE | + AV_CODEC_CAP_DR1, +- .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, +- .p.pix_fmts = ff_amf_pix_fmts, +- .p.wrapper_name = "amf", ++ .caps_internal = FF_CODEC_CAP_NOT_INIT_THREADSAFE | ++ FF_CODEC_CAP_INIT_CLEANUP, ++ .p.pix_fmts = ff_amfenc_av1_pix_fmts, ++ .p.wrapper_name = "amf", + .hw_configs = ff_amfenc_hw_configs, + }; +Index: FFmpeg/libavcodec/amfenc_h264.c +=================================================================== +--- libavcodec/amfenc_h264.c ++++ libavcodec/amfenc_h264.c +@@ -16,17 +16,29 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +- + #include "libavutil/internal.h" + #include "libavutil/opt.h" + #include "amfenc.h" + #include "codec_internal.h" + #include "internal.h" +-#include + +-#define OFFSET(x) offsetof(AmfContext, x) ++#define OFFSET(x) offsetof(AMFEncContext, x) + #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM + ++static const enum AVPixelFormat ff_amfenc_h264_pix_fmts[] = { ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_BGRA, ++ AV_PIX_FMT_BGR0, ++#if CONFIG_D3D11VA ++ AV_PIX_FMT_D3D11, ++#endif ++#if CONFIG_DXVA2 ++ AV_PIX_FMT_DXVA2_VLD, ++#endif ++ AV_PIX_FMT_NONE ++}; ++ + static const AVOption options[] = { + // Static + /// Usage +@@ -190,7 +202,7 @@ static av_cold int amf_encode_init_h264( + { + int ret = 0; + AMF_RESULT res = AMF_OK; +- AmfContext *ctx = avctx->priv_data; ++ AMFEncContext *ctx = avctx->priv_data; + AMFVariantStruct var = { 0 }; + amf_int64 profile = 0; + amf_int64 profile_level = 0; +@@ -199,10 +211,12 @@ static av_cold int amf_encode_init_h264( + AMFRate framerate; + AMFSize framesize = AMFConstructSize(avctx->width, avctx->height); + int deblocking_filter = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0; ++ amf_int64 color_profile; ++ enum AVPixelFormat pix_fmt; + +- if (avctx->framerate.num > 0 && avctx->framerate.den > 0) { ++ if (avctx->framerate.num > 0 && avctx->framerate.den > 0) + framerate = AMFConstructRate(avctx->framerate.num, avctx->framerate.den); +- } else { ++ else { + FF_DISABLE_DEPRECATION_WARNINGS + framerate = AMFConstructRate(avctx->time_base.den, avctx->time_base.num + #if FF_API_TICKS_PER_FRAME +@@ -239,40 +253,32 @@ FF_ENABLE_DEPRECATION_WARNINGS + profile = AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_HIGH; + break; + } +- if (profile == 0) { ++ if (profile == 0) + profile = ctx->profile; +- } + + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PROFILE, profile); + + profile_level = avctx->level; +- if (profile_level == AV_LEVEL_UNKNOWN) { ++ if (profile_level == FF_LEVEL_UNKNOWN) + profile_level = ctx->level; +- } + if (profile_level != 0) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PROFILE_LEVEL, profile_level); + } + + // Maximum Reference Frames +- if (avctx->refs != -1) { ++ if (avctx->refs != -1) + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_NUM_REFRAMES, avctx->refs); +- } + if (avctx->sample_aspect_ratio.den && avctx->sample_aspect_ratio.num) { + AMFRatio ratio = AMFConstructRatio(avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den); + AMF_ASSIGN_PROPERTY_RATIO(res, ctx->encoder, AMF_VIDEO_ENCODER_ASPECT_RATIO, ratio); + } + +- /// Color Range (Partial/TV/MPEG or Full/PC/JPEG) +- if (avctx->color_range == AVCOL_RANGE_JPEG) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_FULL_RANGE_COLOR, 1); +- } +- +- // autodetect rate control method ++ // Autodetect rate control method + if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_UNKNOWN) { + if (ctx->qp_i != -1 || ctx->qp_p != -1 || ctx->qp_b != -1) { + ctx->rate_control_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP; + av_log(ctx, AV_LOG_DEBUG, "Rate control turned to CQP\n"); +- } else if (avctx->rc_max_rate > 0 ) { ++ } else if (avctx->rc_max_rate > 0) { + ctx->rate_control_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR; + av_log(ctx, AV_LOG_DEBUG, "Rate control turned to Peak VBR\n"); + } else { +@@ -281,10 +287,11 @@ FF_ENABLE_DEPRECATION_WARNINGS + } + } + ++ // Pre-Pass, Pre-Analysis, Two-Pass + if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PREENCODE_ENABLE, AMF_VIDEO_ENCODER_PREENCODE_DISABLED); + if (ctx->preencode) +- av_log(ctx, AV_LOG_WARNING, "Preencode is not supported by cqp Rate Control Method, automatically disabled\n"); ++ av_log(ctx, AV_LOG_WARNING, "Pre-Encode is not supported by CQP rate control method, automatically disabled\n"); + } else { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PREENCODE_ENABLE, ctx->preencode); + } +@@ -296,15 +303,16 @@ FF_ENABLE_DEPRECATION_WARNINGS + } + + if (ctx->hw_high_motion_quality_boost != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HIGH_MOTION_QUALITY_BOOST_ENABLE, ((ctx->hw_high_motion_quality_boost == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HIGH_MOTION_QUALITY_BOOST_ENABLE, !!ctx->hw_high_motion_quality_boost); + } + ++ // Quality preset + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_QUALITY_PRESET, ctx->quality); + + // Dynamic parmaters + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD, ctx->rate_control_mode); + +- /// VBV Buffer ++ // VBV Buffer + if (avctx->rc_buffer_size != 0) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_VBV_BUFFER_SIZE, avctx->rc_buffer_size); + if (avctx->rc_initial_buffer_occupancy != 0) { +@@ -314,7 +322,8 @@ FF_ENABLE_DEPRECATION_WARNINGS + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_INITIAL_VBV_BUFFER_FULLNESS, amf_buffer_fullness); + } + } +- /// Maximum Access Unit Size ++ ++ // Maximum Access Unit Size + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_AU_SIZE, ctx->max_au_size); + + if (ctx->max_au_size) +@@ -324,7 +333,21 @@ FF_ENABLE_DEPRECATION_WARNINGS + if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MIN_QP, 0); + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_QP, 51); +- } else { ++ } else if (ctx->rate_control_mode != AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_QUALITY_VBR) { ++ if (avctx->qmin == -1 && avctx->qmax == -1) { ++ switch (ctx->usage) { ++ case AMF_VIDEO_ENCODER_USAGE_TRANSCONDING: ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MIN_QP, 18); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_QP, 46); ++ break; ++ case AMF_VIDEO_ENCODER_USAGE_ULTRA_LOW_LATENCY: ++ case AMF_VIDEO_ENCODER_USAGE_LOW_LATENCY: ++ case AMF_VIDEO_ENCODER_USAGE_WEBCAM: ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MIN_QP, 22); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_QP, 48); ++ break; ++ } ++ } + if (avctx->qmin != -1) { + int qval = avctx->qmin > 51 ? 51 : avctx->qmin; + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MIN_QP, qval); +@@ -335,42 +358,56 @@ FF_ENABLE_DEPRECATION_WARNINGS + } + } + // QP Values +- if (ctx->qp_i != -1) ++ if (ctx->qp_i != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_QP_I, ctx->qp_i); +- if (ctx->qp_p != -1) ++ } ++ if (ctx->qp_p != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_QP_P, ctx->qp_p); +- if (ctx->qp_b != -1) ++ } ++ if (ctx->qp_b != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_QP_B, ctx->qp_b); ++ } + + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_TARGET_BITRATE, avctx->bit_rate); + + if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CBR) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PEAK_BITRATE, avctx->bit_rate); + } ++ + if (avctx->rc_max_rate) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PEAK_BITRATE, avctx->rc_max_rate); + } else if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR) { +- av_log(ctx, AV_LOG_WARNING, "rate control mode is PEAK_CONSTRAINED_VBR but rc_max_rate is not set\n"); ++ av_log(ctx, AV_LOG_WARNING, "Rate control method is PEAK_CONSTRAINED_VBR but rc_max_rate is not set\n"); + } + ++ pix_fmt = avctx->hw_frames_ctx ? ((AVHWFramesContext*)avctx->hw_frames_ctx->data)->sw_format : avctx->pix_fmt; ++ AMF_RETURN_IF_FALSE(ctx, pix_fmt != AV_PIX_FMT_P010, AVERROR_INVALIDDATA, "10-bit input video is not supported by AMF H264 encoder\n"); ++ ++ // Output color depth, profile, transfer and primaries ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_COLOR_BIT_DEPTH, AMF_COLOR_BIT_DEPTH_8); ++ color_profile = amf_av_to_amf_color_profile(avctx); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_OUTPUT_COLOR_PROFILE, color_profile); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_FULL_RANGE_COLOR, !!(avctx->color_range == AVCOL_RANGE_JPEG)); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_OUTPUT_TRANSFER_CHARACTERISTIC, (amf_int64)avctx->color_trc); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_OUTPUT_COLOR_PRIMARIES, (amf_int64)avctx->color_primaries); ++ + if (ctx->preanalysis != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_PRE_ANALYSIS_ENABLE, !!((ctx->preanalysis == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_PRE_ANALYSIS_ENABLE, !!ctx->preanalysis); + } + + res = ctx->encoder->pVtbl->GetProperty(ctx->encoder, AMF_VIDEO_ENCODER_PRE_ANALYSIS_ENABLE, &var); +- if ((int)var.int64Value) +- { ++ if ((int)var.int64Value) { + if (ctx->pa_activity_type != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_ACTIVITY_TYPE, ctx->pa_activity_type); + } + if (ctx->pa_scene_change_detection != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_SCENE_CHANGE_DETECTION_ENABLE, ((ctx->pa_scene_change_detection == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_SCENE_CHANGE_DETECTION_ENABLE, !!ctx->pa_scene_change_detection); + } + if (ctx->pa_scene_change_detection_sensitivity != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_SCENE_CHANGE_DETECTION_SENSITIVITY, ctx->pa_scene_change_detection_sensitivity); + } + if (ctx->pa_static_scene_detection != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_STATIC_SCENE_DETECTION_ENABLE, ((ctx->pa_static_scene_detection == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_STATIC_SCENE_DETECTION_ENABLE, !!ctx->pa_static_scene_detection); + } + if (ctx->pa_static_scene_detection_sensitivity != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_STATIC_SCENE_DETECTION_SENSITIVITY, ctx->pa_static_scene_detection_sensitivity); +@@ -385,7 +422,7 @@ FF_ENABLE_DEPRECATION_WARNINGS + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_CAQ_STRENGTH, ctx->pa_caq_strength); + } + if (ctx->pa_frame_sad != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_FRAME_SAD_ENABLE, ((ctx->pa_frame_sad == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_FRAME_SAD_ENABLE, !!ctx->pa_frame_sad); + } + if (ctx->pa_paq_mode != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_PAQ_MODE, ctx->pa_paq_mode); +@@ -394,10 +431,10 @@ FF_ENABLE_DEPRECATION_WARNINGS + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_TAQ_MODE, ctx->pa_taq_mode); + } + if (ctx->pa_adaptive_mini_gop != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_ADAPTIVE_MINIGOP, ((ctx->pa_adaptive_mini_gop == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_ADAPTIVE_MINIGOP, !!ctx->pa_adaptive_mini_gop); + } + if (ctx->pa_ltr != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_LTR_ENABLE, ((ctx->pa_ltr == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_LTR_ENABLE, !!ctx->pa_ltr); + } + if (ctx->pa_lookahead_buffer_depth != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_LOOKAHEAD_BUFFER_DEPTH, ctx->pa_lookahead_buffer_depth); +@@ -422,8 +459,7 @@ FF_ENABLE_DEPRECATION_WARNINGS + av_log(ctx, AVERROR_BUG, "Maxium B frames needs to be greater than the specified B frame count.\n"); + } + } +- } +- else { ++ } else { + if (ctx->max_b_frames != -1) { + av_log(ctx, AVERROR_BUG, "Maxium number of B frames needs to be specified.\n"); + } +@@ -439,41 +475,49 @@ FF_ENABLE_DEPRECATION_WARNINGS + res = ctx->encoder->pVtbl->Init(ctx->encoder, ctx->format, avctx->width, avctx->height); + AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "encoder->Init() failed with error %d\n", res); + +- // Enforce HRD, Filler Data, VBAQ, Frame Skipping, Deblocking Filter ++ // Enforce HRD, Filler Data, Frame Skipping, Deblocking Filter + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_ENFORCE_HRD, !!ctx->enforce_hrd); + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_FILLER_DATA_ENABLE, !!ctx->filler_data); + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_RATE_CONTROL_SKIP_FRAME_ENABLE, !!ctx->skip_frame); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_DE_BLOCKING_FILTER, !!deblocking_filter); ++ ++ // VBAQ + if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP) { + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_ENABLE_VBAQ, 0); +- if (ctx->enable_vbaq) ++ if (ctx->enable_vbaq) { ++ ctx->enable_vbaq = 0; + av_log(ctx, AV_LOG_WARNING, "VBAQ is not supported by cqp Rate Control Method, automatically disabled\n"); ++ } + } else { + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_ENABLE_VBAQ, !!ctx->enable_vbaq); + } +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_DE_BLOCKING_FILTER, !!deblocking_filter); + + // Keyframe Interval + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_IDR_PERIOD, avctx->gop_size); + + // Header Insertion Spacing +- if (ctx->header_spacing >= 0) ++ if (ctx->header_spacing >= 0) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEADER_INSERTION_SPACING, ctx->header_spacing); ++ } + + // Intra-Refresh, Slicing +- if (ctx->intra_refresh_mb > 0) ++ if (ctx->intra_refresh_mb > 0) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_INTRA_REFRESH_NUM_MBS_PER_SLOT, ctx->intra_refresh_mb); +- if (avctx->slices > 1) ++ } ++ if (avctx->slices > 1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_SLICES_PER_FRAME, avctx->slices); ++ } + + // Coding +- if (ctx->coding_mode != 0) ++ if (ctx->coding_mode != 0) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_CABAC_ENABLE, ctx->coding_mode); ++ } + + // Motion Estimation + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_MOTION_HALF_PIXEL, !!ctx->me_half_pel); + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_MOTION_QUARTERPIXEL, !!ctx->me_quarter_pel); + +- // fill extradata ++ // Fill extradata + res = AMFVariantInit(&var); + AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "AMFVariantInit() failed with error %d\n", res); + +@@ -484,9 +528,8 @@ FF_ENABLE_DEPRECATION_WARNINGS + guid = IID_AMFBuffer(); + + res = var.pInterface->pVtbl->QueryInterface(var.pInterface, &guid, (void**)&buffer); // query for buffer interface +- if (res != AMF_OK) { ++ if (res != AMF_OK) + var.pInterface->pVtbl->Release(var.pInterface); +- } + AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "QueryInterface(IID_AMFBuffer) failed with error %d\n", res); + + avctx->extradata_size = (int)buffer->pVtbl->GetSize(buffer); +@@ -505,15 +548,15 @@ FF_ENABLE_DEPRECATION_WARNINGS + } + + static const FFCodecDefault defaults[] = { +- { "refs", "-1" }, +- { "aspect", "0" }, +- { "qmin", "-1" }, +- { "qmax", "-1" }, +- { "b", "2M" }, +- { "g", "250" }, +- { "slices", "1" }, +- { "flags", "+loop"}, +- { NULL }, ++ { "refs", "-1" }, ++ { "aspect", "0" }, ++ { "qmin", "-1" }, ++ { "qmax", "-1" }, ++ { "b", "2M" }, ++ { "g", "250" }, ++ { "slices", "1" }, ++ { "flags", "+loop" }, ++ { NULL }, + }; + + static const AVClass h264_amf_class = { +@@ -531,14 +574,14 @@ const FFCodec ff_h264_amf_encoder = { + .init = amf_encode_init_h264, + FF_CODEC_RECEIVE_PACKET_CB(ff_amf_receive_packet), + .close = ff_amf_encode_close, +- .priv_data_size = sizeof(AmfContext), ++ .priv_data_size = sizeof(AMFEncContext), + .p.priv_class = &h264_amf_class, + .defaults = defaults, + .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE | + AV_CODEC_CAP_DR1, + .caps_internal = FF_CODEC_CAP_NOT_INIT_THREADSAFE | + FF_CODEC_CAP_INIT_CLEANUP, +- .p.pix_fmts = ff_amf_pix_fmts, ++ .p.pix_fmts = ff_amfenc_h264_pix_fmts, + .p.wrapper_name = "amf", + .hw_configs = ff_amfenc_hw_configs, + }; +Index: FFmpeg/libavcodec/amfenc_hevc.c +=================================================================== +--- libavcodec/amfenc_hevc.c ++++ libavcodec/amfenc_hevc.c +@@ -21,10 +21,25 @@ + #include "amfenc.h" + #include "codec_internal.h" + #include "internal.h" +-#include + +-#define OFFSET(x) offsetof(AmfContext, x) ++#define OFFSET(x) offsetof(AMFEncContext, x) + #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM ++ ++static const enum AVPixelFormat ff_amfenc_hevc_pix_fmts[] = { ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_P010, ++ AV_PIX_FMT_BGRA, ++ AV_PIX_FMT_BGR0, ++#if CONFIG_D3D11VA ++ AV_PIX_FMT_D3D11, ++#endif ++#if CONFIG_DXVA2 ++ AV_PIX_FMT_DXVA2_VLD, ++#endif ++ AV_PIX_FMT_NONE ++}; ++ + static const AVOption options[] = { + { "usage", "Set the encoding usage", OFFSET(usage), AV_OPT_TYPE_INT, {.i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCODING }, AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCODING, AMF_VIDEO_ENCODER_HEVC_USAGE_LOW_LATENCY_HIGH_QUALITY, VE, .unit = "usage" }, + { "transcoding", "Generic Transcoding", 0, AV_OPT_TYPE_CONST, {.i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCODING }, 0, 0, VE, .unit = "usage" }, +@@ -151,8 +166,8 @@ static av_cold int amf_encode_init_hevc( + { + int ret = 0; + AMF_RESULT res = AMF_OK; +- AmfContext *ctx = avctx->priv_data; +- AMFVariantStruct var = {0}; ++ AMFEncContext *ctx = avctx->priv_data; ++ AMFVariantStruct var = { 0 }; + amf_int64 profile = 0; + amf_int64 profile_level = 0; + AMFBuffer *buffer; +@@ -160,10 +175,12 @@ static av_cold int amf_encode_init_hevc( + AMFRate framerate; + AMFSize framesize = AMFConstructSize(avctx->width, avctx->height); + int deblocking_filter = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0; ++ amf_int64 color_profile; ++ enum AVPixelFormat pix_fmt; + +- if (avctx->framerate.num > 0 && avctx->framerate.den > 0) { ++ if (avctx->framerate.num > 0 && avctx->framerate.den > 0) + framerate = AMFConstructRate(avctx->framerate.num, avctx->framerate.den); +- } else { ++ else { + FF_DISABLE_DEPRECATION_WARNINGS + framerate = AMFConstructRate(avctx->time_base.den, avctx->time_base.num + #if FF_API_TICKS_PER_FRAME +@@ -176,7 +193,7 @@ FF_ENABLE_DEPRECATION_WARNINGS + if ((ret = ff_amf_encode_init(avctx)) < 0) + return ret; + +- // init static parameters ++ // Static parameters + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_USAGE, ctx->usage); + + AMF_ASSIGN_PROPERTY_SIZE(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_FRAMESIZE, framesize); +@@ -187,23 +204,25 @@ FF_ENABLE_DEPRECATION_WARNINGS + case AV_PROFILE_HEVC_MAIN: + profile = AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN; + break; ++ case AV_PROFILE_HEVC_MAIN_10: ++ profile = AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN_10; ++ break; + default: + break; + } +- if (profile == 0) { ++ if (profile == 0) + profile = ctx->profile; +- } + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PROFILE, profile); + + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_TIER, ctx->tier); + + profile_level = avctx->level; +- if (profile_level == AV_LEVEL_UNKNOWN) { ++ if (profile_level == FF_LEVEL_UNKNOWN) + profile_level = ctx->level; +- } + if (profile_level != 0) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PROFILE_LEVEL, profile_level); + } ++ + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET, ctx->quality); + // Maximum Reference Frames + if (avctx->refs != -1) { +@@ -224,30 +243,12 @@ FF_ENABLE_DEPRECATION_WARNINGS + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_DE_BLOCKING_FILTER_DISABLE, deblocking_filter); + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE, ctx->header_insertion_mode); + +- // Rate control +- // autodetect rate control method +- if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_UNKNOWN) { +- if (ctx->min_qp_i != -1 || ctx->max_qp_i != -1 || +- ctx->min_qp_p != -1 || ctx->max_qp_p != -1 || +- ctx->qp_i !=-1 || ctx->qp_p != -1) { +- ctx->rate_control_mode = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP; +- av_log(ctx, AV_LOG_DEBUG, "Rate control turned to CQP\n"); +- } else if (avctx->rc_max_rate > 0) { +- ctx->rate_control_mode = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR; +- av_log(ctx, AV_LOG_DEBUG, "Rate control turned to Peak VBR\n"); +- } else { +- ctx->rate_control_mode = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR; +- av_log(ctx, AV_LOG_DEBUG, "Rate control turned to CBR\n"); +- } +- } +- + // Pre-Pass, Pre-Analysis, Two-Pass + if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PREENCODE_ENABLE, 0); + if (ctx->preencode) + av_log(ctx, AV_LOG_WARNING, "Preencode is not supported by cqp Rate Control Method, automatically disabled\n"); +- } +- else { ++ } else { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PREENCODE_ENABLE, ctx->preencode); + } + +@@ -258,7 +259,24 @@ FF_ENABLE_DEPRECATION_WARNINGS + } + + if (ctx->hw_high_motion_quality_boost != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_HIGH_MOTION_QUALITY_BOOST_ENABLE, ((ctx->hw_high_motion_quality_boost == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_HIGH_MOTION_QUALITY_BOOST_ENABLE, !!ctx->hw_high_motion_quality_boost); ++ } ++ ++ // Rate control properties ++ // Auto detect rate control method ++ if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_UNKNOWN) { ++ if (ctx->min_qp_i != -1 || ctx->max_qp_i != -1 || ++ ctx->min_qp_p != -1 || ctx->max_qp_p != -1 || ++ ctx->qp_i !=-1 || ctx->qp_p != -1) { ++ ctx->rate_control_mode = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP; ++ av_log(ctx, AV_LOG_DEBUG, "Rate control method turned to CQP\n"); ++ } else if (avctx->rc_max_rate > 0) { ++ ctx->rate_control_mode = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR; ++ av_log(ctx, AV_LOG_DEBUG, "Rate control method turned to Peak VBR\n"); ++ } else { ++ ctx->rate_control_mode = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR; ++ av_log(ctx, AV_LOG_DEBUG, "Rate control method turned to CBR\n"); ++ } + } + + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD, ctx->rate_control_mode); +@@ -273,17 +291,22 @@ FF_ENABLE_DEPRECATION_WARNINGS + } + } + ++ // VBAQ + if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_ENABLE_VBAQ, false); +- if (ctx->enable_vbaq) +- av_log(ctx, AV_LOG_WARNING, "VBAQ is not supported by cqp Rate Control Method, automatically disabled\n"); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_ENABLE_VBAQ, 0); ++ if (ctx->enable_vbaq) { ++ ctx->enable_vbaq = 0; ++ av_log(ctx, AV_LOG_WARNING, "VBAQ is not supported by CQP rate control method, automatically disabled\n"); ++ } + } else { + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_ENABLE_VBAQ, !!ctx->enable_vbaq); + } ++ ++ // Motion estimation + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MOTION_HALF_PIXEL, ctx->me_half_pel); + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MOTION_QUARTERPIXEL, ctx->me_quarter_pel); + +- // init dynamic rate control params ++ // Dynamic rate control params + if (ctx->max_au_size) + ctx->enforce_hrd = 1; + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_ENFORCE_HRD, ctx->enforce_hrd); +@@ -297,27 +320,39 @@ FF_ENABLE_DEPRECATION_WARNINGS + if (avctx->rc_max_rate) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PEAK_BITRATE, avctx->rc_max_rate); + } else if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR) { +- av_log(ctx, AV_LOG_WARNING, "rate control mode is PEAK_CONSTRAINED_VBR but rc_max_rate is not set\n"); ++ av_log(ctx, AV_LOG_WARNING, "Rate control method is PEAK_CONSTRAINED_VBR but rc_max_rate is not set\n"); ++ } ++ ++ // Output color depth, profile, transfer and primaries ++ pix_fmt = avctx->hw_frames_ctx ? ((AVHWFramesContext*)avctx->hw_frames_ctx->data)->sw_format : avctx->pix_fmt; ++ if (pix_fmt == AV_PIX_FMT_P010) { ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_COLOR_BIT_DEPTH, AMF_COLOR_BIT_DEPTH_10); ++ } else { ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_COLOR_BIT_DEPTH, AMF_COLOR_BIT_DEPTH_8); + } ++ color_profile = amf_av_to_amf_color_profile(avctx); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_OUTPUT_COLOR_PROFILE, color_profile); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_NOMINAL_RANGE, !!(avctx->color_range == AVCOL_RANGE_JPEG)); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_OUTPUT_TRANSFER_CHARACTERISTIC, (amf_int64)avctx->color_trc); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_OUTPUT_COLOR_PRIMARIES, (amf_int64)avctx->color_primaries); + + if (ctx->preanalysis != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PRE_ANALYSIS_ENABLE, !!((ctx->preanalysis == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PRE_ANALYSIS_ENABLE, !!ctx->preanalysis); + } + + res = ctx->encoder->pVtbl->GetProperty(ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PRE_ANALYSIS_ENABLE, &var); +- if ((int)var.int64Value) +- { ++ if ((int)var.int64Value) { + if (ctx->pa_activity_type != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_ACTIVITY_TYPE, ctx->pa_activity_type); + } + if (ctx->pa_scene_change_detection != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_SCENE_CHANGE_DETECTION_ENABLE, ((ctx->pa_scene_change_detection == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_SCENE_CHANGE_DETECTION_ENABLE, !!ctx->pa_scene_change_detection); + } + if (ctx->pa_scene_change_detection_sensitivity != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_SCENE_CHANGE_DETECTION_SENSITIVITY, ctx->pa_scene_change_detection_sensitivity); + } + if (ctx->pa_static_scene_detection != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_STATIC_SCENE_DETECTION_ENABLE, ((ctx->pa_static_scene_detection == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_STATIC_SCENE_DETECTION_ENABLE, !!ctx->pa_static_scene_detection); + } + if (ctx->pa_static_scene_detection_sensitivity != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_STATIC_SCENE_DETECTION_SENSITIVITY, ctx->pa_static_scene_detection_sensitivity); +@@ -332,7 +367,7 @@ FF_ENABLE_DEPRECATION_WARNINGS + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_CAQ_STRENGTH, ctx->pa_caq_strength); + } + if (ctx->pa_frame_sad != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_FRAME_SAD_ENABLE, ((ctx->pa_frame_sad == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_FRAME_SAD_ENABLE, !!ctx->pa_frame_sad); + } + if (ctx->pa_paq_mode != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_PAQ_MODE, ctx->pa_paq_mode); +@@ -341,7 +376,7 @@ FF_ENABLE_DEPRECATION_WARNINGS + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_TAQ_MODE, ctx->pa_taq_mode); + } + if (ctx->pa_ltr != -1) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_LTR_ENABLE, ((ctx->pa_ltr == 0) ? false : true)); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_PA_LTR_ENABLE, !!ctx->pa_ltr); + } + if (ctx->pa_lookahead_buffer_depth != -1) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_PA_LOOKAHEAD_BUFFER_DEPTH, ctx->pa_lookahead_buffer_depth); +@@ -351,36 +386,63 @@ FF_ENABLE_DEPRECATION_WARNINGS + } + } + +- // init encoder ++ // Init encoder + res = ctx->encoder->pVtbl->Init(ctx->encoder, ctx->format, avctx->width, avctx->height); + AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "encoder->Init() failed with error %d\n", res); + +- // init dynamic picture control params ++ // Dynamic picture control params + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_AU_SIZE, ctx->max_au_size); + +- if (ctx->min_qp_i != -1) { +- AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, ctx->min_qp_i); +- } else if (avctx->qmin != -1) { +- int qval = avctx->qmin > 51 ? 51 : avctx->qmin; +- AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, qval); +- } +- if (ctx->max_qp_i != -1) { +- AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, ctx->max_qp_i); +- } else if (avctx->qmax != -1) { +- int qval = avctx->qmax > 51 ? 51 : avctx->qmax; +- AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, qval); +- } +- if (ctx->min_qp_p != -1) { +- AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, ctx->min_qp_p); +- } else if (avctx->qmin != -1) { +- int qval = avctx->qmin > 51 ? 51 : avctx->qmin; +- AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, qval); +- } +- if (ctx->max_qp_p != -1) { +- AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, ctx->max_qp_p); +- } else if (avctx->qmax != -1) { +- int qval = avctx->qmax > 51 ? 51 : avctx->qmax; +- AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, qval); ++ // QP Minimum / Maximum ++ if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP) { ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, 0); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, 51); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, 0); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, 51); ++ } else { ++ if (ctx->min_qp_i != -1) { ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, ctx->min_qp_i); ++ } else if (avctx->qmin != -1) { ++ int qval = avctx->qmin > 51 ? 51 : avctx->qmin; ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, qval); ++ } ++ if (ctx->max_qp_i != -1) { ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, ctx->max_qp_i); ++ } else if (avctx->qmax != -1) { ++ int qval = avctx->qmax > 51 ? 51 : avctx->qmax; ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, qval); ++ } ++ if (ctx->min_qp_p != -1) { ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, ctx->min_qp_p); ++ } else if (avctx->qmin != -1) { ++ int qval = avctx->qmin > 51 ? 51 : avctx->qmin; ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, qval); ++ } ++ if (ctx->max_qp_p != -1) { ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, ctx->max_qp_p); ++ } else if (avctx->qmax != -1) { ++ int qval = avctx->qmax > 51 ? 51 : avctx->qmax; ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, qval); ++ } ++ if (ctx->min_qp_i == -1 && ctx->max_qp_i == -1 && ctx->min_qp_p == -1 && ctx->max_qp_p == -1 && ++ avctx->qmin == -1 && avctx->qmax == -1) { ++ switch (ctx->usage) { ++ case AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCONDING: ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, 18); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, 46); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, 18); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, 46); ++ break; ++ case AMF_VIDEO_ENCODER_HEVC_USAGE_ULTRA_LOW_LATENCY: ++ case AMF_VIDEO_ENCODER_HEVC_USAGE_LOW_LATENCY: ++ case AMF_VIDEO_ENCODER_HEVC_USAGE_WEBCAM: ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, 22); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, 48); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, 22); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, 48); ++ break; ++ } ++ } + } + + if (ctx->qp_p != -1) { +@@ -391,7 +453,7 @@ FF_ENABLE_DEPRECATION_WARNINGS + } + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_SKIP_FRAME_ENABLE, ctx->skip_frame); + +- // fill extradata ++ // Fill extradata + res = AMFVariantInit(&var); + AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "AMFVariantInit() failed with error %d\n", res); + +@@ -402,9 +464,8 @@ FF_ENABLE_DEPRECATION_WARNINGS + guid = IID_AMFBuffer(); + + res = var.pInterface->pVtbl->QueryInterface(var.pInterface, &guid, (void**)&buffer); // query for buffer interface +- if (res != AMF_OK) { ++ if (res != AMF_OK) + var.pInterface->pVtbl->Release(var.pInterface); +- } + AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "QueryInterface(IID_AMFBuffer) failed with error %d\n", res); + + avctx->extradata_size = (int)buffer->pVtbl->GetSize(buffer); +@@ -421,6 +482,7 @@ FF_ENABLE_DEPRECATION_WARNINGS + + return 0; + } ++ + static const FFCodecDefault defaults[] = { + { "refs", "-1" }, + { "aspect", "0" }, +@@ -431,6 +493,7 @@ static const FFCodecDefault defaults[] = + { "qmax", "-1" }, + { NULL }, + }; ++ + static const AVClass hevc_amf_class = { + .class_name = "hevc_amf", + .item_name = av_default_item_name, +@@ -446,14 +509,14 @@ const FFCodec ff_hevc_amf_encoder = { + .init = amf_encode_init_hevc, + FF_CODEC_RECEIVE_PACKET_CB(ff_amf_receive_packet), + .close = ff_amf_encode_close, +- .priv_data_size = sizeof(AmfContext), ++ .priv_data_size = sizeof(AMFEncContext), + .p.priv_class = &hevc_amf_class, + .defaults = defaults, + .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE | + AV_CODEC_CAP_DR1, + .caps_internal = FF_CODEC_CAP_NOT_INIT_THREADSAFE | + FF_CODEC_CAP_INIT_CLEANUP, +- .p.pix_fmts = ff_amf_pix_fmts, ++ .p.pix_fmts = ff_amfenc_hevc_pix_fmts, + .p.wrapper_name = "amf", + .hw_configs = ff_amfenc_hw_configs, + }; diff --git a/cross/ffmpeg7/patches/1006-jellyfin-0006-add-opencl-scaler-and-pixfmt-converter-impl.patch b/cross/ffmpeg7/patches/1006-jellyfin-0006-add-opencl-scaler-and-pixfmt-converter-impl.patch new file mode 100644 index 00000000000..dbcd54d6300 --- /dev/null +++ b/cross/ffmpeg7/patches/1006-jellyfin-0006-add-opencl-scaler-and-pixfmt-converter-impl.patch @@ -0,0 +1,1111 @@ +Index: FFmpeg/configure +=================================================================== +--- configure ++++ configure +@@ -3889,6 +3889,7 @@ rubberband_filter_deps="librubberband" + sab_filter_deps="gpl swscale" + scale2ref_filter_deps="swscale" + scale_filter_deps="swscale" ++scale_opencl_filter_deps="opencl" + scale_qsv_filter_deps="libmfx" + scale_qsv_filter_select="qsvvpp" + scdet_filter_select="scene_sad" +Index: FFmpeg/libavfilter/Makefile +=================================================================== +--- libavfilter/Makefile ++++ libavfilter/Makefile +@@ -459,6 +459,7 @@ OBJS-$(CONFIG_SCALE_FILTER) + OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o scale_eval.o \ + vf_scale_cuda.ptx.o cuda/load_helper.o + OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale_eval.o ++OBJS-$(CONFIG_SCALE_OPENCL_FILTER) += vf_scale_opencl.o opencl.o opencl/scale.o scale_eval.o + OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_vpp_qsv.o + OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o + OBJS-$(CONFIG_SCALE_VT_FILTER) += vf_scale_vt.o scale_eval.o +Index: FFmpeg/libavfilter/allfilters.c +=================================================================== +--- libavfilter/allfilters.c ++++ libavfilter/allfilters.c +@@ -432,6 +432,7 @@ extern const AVFilter ff_vf_sab; + extern const AVFilter ff_vf_scale; + extern const AVFilter ff_vf_scale_cuda; + extern const AVFilter ff_vf_scale_npp; ++extern const AVFilter ff_vf_scale_opencl; + extern const AVFilter ff_vf_scale_qsv; + extern const AVFilter ff_vf_scale_vaapi; + extern const AVFilter ff_vf_scale_vt; +Index: FFmpeg/libavfilter/opencl/scale.cl +=================================================================== +--- /dev/null ++++ libavfilter/opencl/scale.cl +@@ -0,0 +1,276 @@ ++/* ++ * Copyright (c) 2018 Gabriel Machado ++ * Copyright (c) 2021 NyanMisaka ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++__constant sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | ++ CLK_ADDRESS_CLAMP_TO_EDGE | ++ CLK_FILTER_NEAREST); ++ ++__constant sampler_t sampler2 = (CLK_NORMALIZED_COORDS_FALSE | ++ CLK_ADDRESS_NONE | ++ CLK_FILTER_NEAREST); ++ ++__constant sampler_t d_sampler = (CLK_NORMALIZED_COORDS_TRUE | ++ CLK_ADDRESS_REPEAT | ++ CLK_FILTER_NEAREST); ++ ++#ifdef ENABLE_DITHER ++float get_dithered_y(float y, float d) { ++ return floor(y * dither_quantization + d + 0.5f / dither_size2) * 1.0f / dither_quantization; ++} ++#endif ++ ++#ifdef CONV ++__kernel void conv_yuv(__write_only image2d_t dst1, ++ __read_only image2d_t src1, ++ __write_only image2d_t dst2, ++ __read_only image2d_t src2 ++#ifdef NON_SEMI_PLANAR_OUT ++ ,__write_only image2d_t dst3 ++#endif ++#ifdef NON_SEMI_PLANAR_IN ++ ,__read_only image2d_t src3 ++#endif ++#ifdef ENABLE_DITHER ++ ,__read_only image2d_t dither ++#endif ++ ) ++{ ++ int xi = get_global_id(0); ++ int yi = get_global_id(1); ++ // each work item process four pixels ++ int x = 2 * xi; ++ int y = 2 * yi; ++ ++#ifdef ENABLE_DITHER ++ float2 ncoords = convert_float2((int2)(xi, yi)) * ++ native_recip((float2)(get_image_width(dither), get_image_height(dither))); ++#endif ++ ++ if (xi < get_image_width(dst2) && yi < get_image_height(dst2)) { ++ float y0 = read_imagef(src1, sampler, (int2)(x, y)).x; ++ float y1 = read_imagef(src1, sampler, (int2)(x + 1, y)).x; ++ float y2 = read_imagef(src1, sampler, (int2)(x, y + 1)).x; ++ float y3 = read_imagef(src1, sampler, (int2)(x + 1, y + 1)).x; ++#ifdef NON_SEMI_PLANAR_IN ++ float u = read_imagef(src2, sampler, (int2)(xi, yi)).x; ++ float v = read_imagef(src3, sampler, (int2)(xi, yi)).x; ++#else ++ float2 uv = read_imagef(src2, sampler, (int2)(xi, yi)).xy; ++ float u = uv.x; ++ float v = uv.y; ++#endif ++#ifdef ENABLE_DITHER ++ float d = read_imagef(dither, d_sampler, ncoords).x; ++ y0 = get_dithered_y(y0, d); ++ y1 = get_dithered_y(y1, d); ++ y2 = get_dithered_y(y2, d); ++ y3 = get_dithered_y(y3, d); ++#endif ++ write_imagef(dst1, (int2)(x, y), (float4)(y0, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst1, (int2)(x + 1, y), (float4)(y1, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst1, (int2)(x, y + 1), (float4)(y2, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst1, (int2)(x + 1, y + 1), (float4)(y3, 0.0f, 0.0f, 1.0f)); ++#ifdef NON_SEMI_PLANAR_OUT ++ write_imagef(dst2, (int2)(xi, yi), (float4)(u, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst3, (int2)(xi, yi), (float4)(v, 0.0f, 0.0f, 1.0f)); ++#else ++ write_imagef(dst2, (int2)(xi, yi), (float4)(u, v, 0.0f, 1.0f)); ++#endif ++ } ++} ++#endif ++ ++#ifdef NEIGHBOR ++__kernel void neighbor(__write_only image2d_t dst1, ++ __read_only image2d_t src1, ++#ifdef ENABLE_DITHER ++ __read_only image2d_t dither, ++#endif ++ int2 src_size) ++{ ++ int xi = get_global_id(0); ++ int yi = get_global_id(1); ++ ++ int2 dst_pos = { xi, yi }; ++ float2 dst_size = { get_global_size(0), get_global_size(1) }; ++ ++ float2 src_coord = (convert_float2(dst_pos) + 0.5f) * convert_float2(src_size) * native_recip(dst_size); ++ int2 src_pos = convert_int2(floor(src_coord - 0.5f)); ++ ++#ifdef ENABLE_DITHER ++ float2 ncoords = convert_float2((int2)(xi, yi)) * ++ native_recip((float2)(get_image_width(dither), get_image_height(dither))); ++#endif ++ ++ int2 read_pos = clamp(src_pos, 0, src_size - 1); ++ float y = read_imagef(src1, sampler2, read_pos).x; ++ ++#ifdef ENABLE_DITHER ++ float d = read_imagef(dither, d_sampler, ncoords).x; ++ y = get_dithered_y(y, d); ++#endif ++ ++ write_imagef(dst1, dst_pos, (float4)(y, 0.0f, 0.0f, 1.0f)); ++} ++ ++__kernel void neighbor_uv(__write_only image2d_t dst2, ++ __read_only image2d_t src2, ++#ifdef NON_SEMI_PLANAR_OUT ++ __write_only image2d_t dst3, ++#endif ++#ifdef NON_SEMI_PLANAR_IN ++ __read_only image2d_t src3, ++#endif ++ int2 src_size) ++{ ++ int2 dst_pos = { get_global_id(0), get_global_id(1) }; ++ float2 dst_size = { get_global_size(0), get_global_size(1) }; ++ ++ float2 src_coord = (convert_float2(dst_pos) + 0.5f) * convert_float2(src_size) * native_recip(dst_size); ++ int2 src_pos = convert_int2(floor(src_coord - 0.5f)); ++ ++ int2 read_pos = clamp(src_pos, 0, src_size - 1); ++#ifdef NON_SEMI_PLANAR_IN ++ float u = read_imagef(src2, sampler2, read_pos).x; ++ float v = read_imagef(src3, sampler2, read_pos).x; ++#else ++ float2 uv = read_imagef(src2, sampler2, read_pos).xy; ++ float u = uv.x; ++ float v = uv.y; ++#endif ++ ++#ifdef NON_SEMI_PLANAR_OUT ++ write_imagef(dst2, dst_pos, (float4)(u, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst3, dst_pos, (float4)(v, 0.0f, 0.0f, 1.0f)); ++#else ++ write_imagef(dst2, dst_pos, (float4)(u, v, 0.0f, 1.0f)); ++#endif ++} ++#endif ++ ++#ifdef SCALE ++__kernel void scale(__write_only image2d_t dst1, ++ __read_only image2d_t src1, ++#ifdef ENABLE_DITHER ++ __read_only image2d_t dither, ++#endif ++ __constant float *cx, ++ __constant float *cy, ++ int2 src_size) ++{ ++ int xi = get_global_id(0); ++ int yi = get_global_id(1); ++ ++ int2 dst_pos = { xi, yi }; ++ float2 dst_size = { get_global_size(0), get_global_size(1) }; ++ ++ float2 src_coord = (convert_float2(dst_pos) + 0.5f) * convert_float2(src_size) * native_recip(dst_size); ++ int2 src_pos = convert_int2(floor(src_coord - 0.5f)); ++ ++#ifdef ENABLE_DITHER ++ float2 ncoords = convert_float2((int2)(xi, yi)) * ++ native_recip((float2)(get_image_width(dither), get_image_height(dither))); ++#endif ++ ++ int i, j; ++ int filterw2 = filterw >> 1; ++ int filterh2 = filterh >> 1; ++ int2 src_size_edge = src_size - 1; ++ float4 col1 = 0.0f, s1 = 0.0f; ++ ++#pragma unroll ++ for (i = 0; i < filterh; ++i, s1 = 0.0f) { ++ #pragma unroll ++ for (j = 0; j < filterw; ++j) { ++ int2 read_pos = clamp(src_pos + (int2)(filterw2 - j, filterh2 - i), 0, src_size_edge); ++ float4 c1 = read_imagef(src1, sampler2, read_pos); ++ s1 += c1 * cx[dst_pos.x * filterw + j]; ++ } ++ col1 += s1 * cy[dst_pos.y * filterh + i]; ++ } ++ ++ float y = col1.x; ++#ifdef ENABLE_DITHER ++ float d = read_imagef(dither, d_sampler, ncoords).x; ++ y = get_dithered_y(y, d); ++#endif ++ ++ write_imagef(dst1, dst_pos, (float4)(y, 0.0f, 0.0f, 1.0f)); ++} ++ ++__kernel void scale_uv(__write_only image2d_t dst2, ++ __read_only image2d_t src2, ++#ifdef NON_SEMI_PLANAR_OUT ++ __write_only image2d_t dst3, ++#endif ++#ifdef NON_SEMI_PLANAR_IN ++ __read_only image2d_t src3, ++#endif ++ __constant float *cx, ++ __constant float *cy, ++ int2 src_size) ++{ ++ int2 dst_pos = { get_global_id(0), get_global_id(1) }; ++ float2 dst_size = { get_global_size(0), get_global_size(1) }; ++ ++ float2 src_coord = (convert_float2(dst_pos) + 0.5f) * convert_float2(src_size) * native_recip(dst_size); ++ int2 src_pos = convert_int2(floor(src_coord - 0.5f)); ++ ++ int i, j; ++ int filterw2 = filterw >> 1; ++ int filterh2 = filterh >> 1; ++ int2 src_size_edge = src_size - 1; ++ float4 col2 = 0.0f, col3 = 0.0f, s2 = 0.0f, s3 = 0.0f; ++ ++#pragma unroll ++ for (i = 0; i < filterh; ++i, s2 = s3 = 0.0f) { ++ #pragma unroll ++ for (j = 0; j < filterw; ++j) { ++ int2 read_pos = clamp(src_pos + (int2)(filterw2 - j, filterh2 - i), 0, src_size_edge); ++ float4 c2 = read_imagef(src2, sampler2, read_pos); ++ s2 += c2 * cx[dst_pos.x * filterw + j]; ++#ifdef NON_SEMI_PLANAR_IN ++ float4 c3 = read_imagef(src3, sampler2, read_pos); ++ s3 += c3 * cx[dst_pos.x * filterw + j]; ++#endif ++ } ++ col2 += s2 * cy[dst_pos.y * filterh + i]; ++#ifdef NON_SEMI_PLANAR_IN ++ col3 += s3 * cy[dst_pos.y * filterh + i]; ++#endif ++ } ++ ++#ifdef NON_SEMI_PLANAR_IN ++ float u = col2.x; ++ float v = col3.x; ++#else ++ float u = col2.x; ++ float v = col2.y; ++#endif ++ ++#ifdef NON_SEMI_PLANAR_OUT ++ write_imagef(dst2, dst_pos, (float4)(u, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst3, dst_pos, (float4)(v, 0.0f, 0.0f, 1.0f)); ++#else ++ write_imagef(dst2, dst_pos, (float4)(u, v, 0.0f, 1.0f)); ++#endif ++} ++#endif +Index: FFmpeg/libavfilter/opencl_source.h +=================================================================== +--- libavfilter/opencl_source.h ++++ libavfilter/opencl_source.h +@@ -29,6 +29,7 @@ extern const char *ff_source_nlmeans_cl; + extern const char *ff_source_overlay_cl; + extern const char *ff_source_pad_cl; + extern const char *ff_source_remap_cl; ++extern const char *ff_source_scale_cl; + extern const char *ff_source_tonemap_cl; + extern const char *ff_source_transpose_cl; + extern const char *ff_source_unsharp_cl; +Index: FFmpeg/libavfilter/vf_scale_opencl.c +=================================================================== +--- /dev/null ++++ libavfilter/vf_scale_opencl.c +@@ -0,0 +1,777 @@ ++/* ++ * Copyright (c) 2018 Gabriel Machado ++ * Copyright (c) 2021 NyanMisaka ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "libavutil/avassert.h" ++#include "libavutil/common.h" ++#include "libavutil/imgutils.h" ++#include "libavutil/opt.h" ++#include "libavutil/pixdesc.h" ++ ++#include "avfilter.h" ++#include "internal.h" ++#include "opencl.h" ++#include "opencl_source.h" ++#include "scale_eval.h" ++#include "video.h" ++#include "dither_matrix.h" ++ ++#define OPENCL_SOURCE_NB 2 ++ ++static const enum AVPixelFormat supported_formats[] = { ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_YUV420P16, ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_P010, ++ AV_PIX_FMT_P016, ++}; ++ ++enum filters { ++ F_AREA, ++ F_BICUBIC, ++ F_BILINEAR, ++ F_GAUSSIAN, ++ F_LANCZOS, ++ F_NEIGHBOR, ++ F_SINC, ++ F_SPLINE, ++ F_EXPERIMENTAL ++}; ++ ++static const int filter_radius[] = { ++ [F_AREA] = 1, ++ [F_BICUBIC] = 2, ++ [F_BILINEAR] = 1, ++ [F_GAUSSIAN] = 4, ++ [F_LANCZOS] = 3, ++ [F_NEIGHBOR] = -1, ++ [F_SINC] = 10, ++ [F_SPLINE] = 10, ++ [F_EXPERIMENTAL] = 4 ++}; ++ ++typedef struct ScaleOpenCLContext { ++ OpenCLFilterContext ocf; ++ ++ cl_command_queue command_queue; ++ cl_mem cx, cy; ++ cl_mem dither_image; ++ cl_kernel kernel; ++ cl_kernel kernel_uv; ++ const char *kernel_name; ++ const char *kernel_name_uv; ++ ++ char *w_expr, *h_expr; ++ int dst_w, dst_h; ++ int src_w, src_h; ++ int passthrough; ++ int algorithm; ++ int force_original_aspect_ratio; ++ int force_divisible_by; ++ enum AVPixelFormat format; ++ ++ enum AVPixelFormat in_fmt, out_fmt; ++ const AVPixFmtDescriptor *in_desc, *out_desc; ++ int in_planes, out_planes; ++ ++ int filterw, filterh; ++ int initialised; ++} ScaleOpenCLContext; ++ ++static float netravali(float t, float B, float C) ++{ ++ if (t > 2) { ++ return 0; ++ } else { ++ float tt = t * t; ++ float ttt = t * tt; ++ if (t < 1) { ++ return ((12 - 9 * B - 6 * C) * ttt + ++ (-18 + 12 * B + 6 * C) * tt + ++ (6 - 2 * B)) / 6; ++ } else { ++ return ((-B - 6 * C) * ttt + ++ (6 * B + 30 * C) * tt + ++ (-12 * B - 48 * C) * t + ++ (8 * B + 24 * C)) / 6; ++ } ++ } ++} ++ ++static float sinc(float t) ++{ ++ return (t == 0) ? 1.0 : sin(t * M_PI) / (t * M_PI); ++} ++ ++static float lanczos(float t, float a) ++{ ++ return (t < a) ? sinc(t) * sinc(t / a) : 0; ++} ++ ++static double spline(double a, double b, double c, double d, double dist) ++{ ++ if (dist <= 1.0) ++ return ((d * dist + c) * dist + b) * dist + a; ++ else ++ return spline(0.0, ++ b + 2.0 * c + 3.0 * d, ++ c + 3.0 * d, ++ -b - 3.0 * c - 6.0 * d, ++ dist - 1.0); ++} ++ ++static float calc_weight(int algorithm, float ratio, float t) ++{ ++ t = fabs(t); ++ ++ switch (algorithm) { ++ case F_AREA: { ++ float t2 = t - 0.5; ++ if (t2 * ratio < -0.5) ++ return 1; ++ else if (t2 * ratio < 0.5) ++ return -t2 * ratio + 0.5; ++ else ++ return 0; ++ } ++ ++ case F_BICUBIC: { ++ const float B = 0, C = 0.6; ++ return netravali(t, B, C); ++ } ++ ++ case F_BILINEAR: ++ return t < 1 ? (1 - t) : 0; ++ ++ case F_EXPERIMENTAL: { ++ double A = 1.0; ++ double c; ++ ++ if (t < 1.0) ++ c = cos(t * M_PI); ++ else ++ c = -1.0; ++ if (c < 0.0) ++ c = -pow(-c, A); ++ else ++ c = pow(c, A); ++ return c * 0.5 + 0.5; ++ } ++ ++ case F_GAUSSIAN: { ++ const float p = 3.0; ++ return exp2(-p * t * t); ++ } ++ ++ case F_LANCZOS: { ++ return lanczos(t, filter_radius[algorithm]); ++ } ++ ++ case F_NEIGHBOR: ++ return 1; ++ ++ case F_SINC: ++ return sinc(t); ++ ++ case F_SPLINE: { ++ const double p = -2.196152422706632; ++ return spline(1.0, 0.0, p, -p - 1.0, t); ++ } ++ } ++ ++ return 0; ++} ++ ++static int scale_opencl_init(AVFilterContext *avctx) ++{ ++ ScaleOpenCLContext *ctx = avctx->priv; ++ AVBPrint header; ++ const char *opencl_sources[OPENCL_SOURCE_NB]; ++ size_t m_origin[3] = {0}; ++ size_t m_region[3] = {ff_fruit_dither_size, ff_fruit_dither_size, 1}; ++ size_t m_row_pitch = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]); ++ cl_event event; ++ cl_int cle; ++ int i, j, err; ++ float scalex, scaley; ++ float *cx = NULL, *cy = NULL; ++ ++ av_bprint_init(&header, 512, AV_BPRINT_SIZE_UNLIMITED); ++ ++ if (ctx->src_w == ctx->dst_w && ctx->src_h == ctx->dst_h) { ++ if (ctx->passthrough && ctx->in_fmt == ctx->out_fmt) { ++ ctx->initialised = 1; ++ return 0; ++ } else { ++ av_bprintf(&header, "#define CONV\n"); ++ ctx->kernel_name = "conv_yuv"; ++ } ++ } else if (ctx->algorithm == F_NEIGHBOR) { ++ av_bprintf(&header, "#define NEIGHBOR\n"); ++ ctx->kernel_name = "neighbor"; ++ ctx->kernel_name_uv = "neighbor_uv"; ++ } else { ++ av_bprintf(&header, "#define SCALE\n"); ++ ctx->kernel_name = "scale"; ++ ctx->kernel_name_uv = "scale_uv"; ++ ++ scalex = FFMAX((float)(ctx->src_w / ctx->dst_w), 1); ++ scaley = FFMAX((float)(ctx->src_h / ctx->dst_h), 1); ++ ctx->filterw = ceil(2 * filter_radius[ctx->algorithm] * scalex); ++ ctx->filterh = ceil(2 * filter_radius[ctx->algorithm] * scaley); ++ ++ ctx->filterw = FFMIN(ctx->filterw, ctx->src_w - 2); ++ ctx->filterw = FFMAX(ctx->filterw, 1); ++ ctx->filterh = FFMIN(ctx->filterh, ctx->src_h - 2); ++ ctx->filterh = FFMAX(ctx->filterh, 1); ++ ++ av_bprintf(&header, "#define filterw %d\n", ctx->filterw); ++ av_bprintf(&header, "#define filterh %d\n", ctx->filterh); ++ ++ av_log(avctx, AV_LOG_DEBUG, "Filter size: %dx%d.\n", ctx->filterw, ctx->filterh); ++ ++ cx = av_malloc_array(ctx->dst_w * ctx->filterw, sizeof(cl_float)); ++ cy = av_malloc_array(ctx->dst_h * ctx->filterh, sizeof(cl_float)); ++ ++ if (!cx || !cy) { ++ err = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ for (i = 0; i < ctx->dst_w; ++i) { ++ float s_x = (i + 0.5) * ctx->src_w / ctx->dst_w - 0.5; ++ float t = s_x - floor(s_x); // fract ++ ++ float sum = 0; ++ for (j = 0; j < ctx->filterw; ++j) { ++ int x = ctx->filterw / 2 - j; ++ sum += cx[i * ctx->filterw + j] = calc_weight(ctx->algorithm, ++ scalex, ++ (x - t) / scalex); ++ } ++ ++ for (j = 0; j < ctx->filterw; ++j) ++ cx[i * ctx->filterw + j] /= sum; ++ } ++ ++ for (i = 0; i < ctx->dst_h; ++i) { ++ float s_y = (i + 0.5) * ctx->src_h / ctx->dst_h - 0.5; ++ float t = s_y - floor(s_y); // fract ++ ++ float sum = 0; ++ for (j = 0; j < ctx->filterh; ++j) { ++ int y = ctx->filterh / 2 - j; ++ sum += cy[i * ctx->filterh + j] = calc_weight(ctx->algorithm, ++ scaley, ++ (y - t) / scaley); ++ } ++ ++ for (j = 0; j < ctx->filterh; ++j) ++ cy[i * ctx->filterh + j] /= sum; ++ } ++ ++ ctx->cx = clCreateBuffer(ctx->ocf.hwctx->context, ++ CL_MEM_READ_ONLY | ++ CL_MEM_COPY_HOST_PTR | ++ CL_MEM_HOST_NO_ACCESS, ++ ctx->dst_w * ctx->filterw * sizeof(cl_float), ++ cx, ++ &cle); ++ ++ ctx->cy = clCreateBuffer(ctx->ocf.hwctx->context, ++ CL_MEM_READ_ONLY | ++ CL_MEM_COPY_HOST_PTR | ++ CL_MEM_HOST_NO_ACCESS, ++ ctx->dst_h * ctx->filterh * sizeof(cl_float), ++ cy, ++ &cle); ++ av_free(cx); ++ av_free(cy); ++ if (!ctx->cx || !ctx->cy) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create weights buffer: %d.\n", cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ } ++ ++ if (ctx->in_planes > 2) ++ av_bprintf(&header, "#define NON_SEMI_PLANAR_IN\n"); ++ ++ if (ctx->out_planes > 2) ++ av_bprintf(&header, "#define NON_SEMI_PLANAR_OUT\n"); ++ ++ if (ctx->in_desc->comp[0].depth > ctx->out_desc->comp[0].depth) { ++ av_bprintf(&header, "#define ENABLE_DITHER\n"); ++ av_bprintf(&header, "__constant float dither_size2 = %.4ff;\n", (float)(ff_fruit_dither_size * ff_fruit_dither_size)); ++ av_bprintf(&header, "__constant float dither_quantization = %.4ff;\n", (float)((1 << ctx->out_desc->comp[0].depth) - 1)); ++ } ++ ++ av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str); ++ opencl_sources[0] = header.str; ++ opencl_sources[1] = ff_source_scale_cl; ++ err = ff_opencl_filter_load_program(avctx, opencl_sources, OPENCL_SOURCE_NB); ++ ++ av_bprint_finalize(&header, NULL); ++ if (err < 0) ++ goto fail; ++ ++ ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context, ++ ctx->ocf.hwctx->device_id, ++ 0, &cle); ++ if (!ctx->command_queue) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL command queue: %d.\n", cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ if (ctx->in_desc->comp[0].depth > ctx->out_desc->comp[0].depth) { ++ cl_image_format image_format = { ++ .image_channel_data_type = CL_UNORM_INT16, ++ .image_channel_order = CL_R, ++ }; ++ cl_image_desc image_desc = { ++ .image_type = CL_MEM_OBJECT_IMAGE2D, ++ .image_width = ff_fruit_dither_size, ++ .image_height = ff_fruit_dither_size, ++ .image_row_pitch = 0, ++ }; ++ ++ av_assert0(sizeof(ff_fruit_dither_matrix) == sizeof(ff_fruit_dither_matrix[0]) * ff_fruit_dither_size * ff_fruit_dither_size); ++ ++ ctx->dither_image = clCreateImage(ctx->ocf.hwctx->context, CL_MEM_READ_ONLY, ++ &image_format, &image_desc, NULL, &cle); ++ if (!ctx->dither_image) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create image for " ++ "dither matrix: %d.\n", cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ cle = clEnqueueWriteImage(ctx->command_queue, ++ ctx->dither_image, ++ CL_FALSE, m_origin, m_region, ++ m_row_pitch, 0, ++ ff_fruit_dither_matrix, ++ 0, NULL, &event); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue write of dither matrix image: %d.\n", cle); ++ ++ cle = clWaitForEvents(1, &event); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to wait for event completion: %d.\n", cle); ++ } ++ ++ ctx->kernel = clCreateKernel(ctx->ocf.program, ctx->kernel_name, &cle); ++ if (!ctx->kernel) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ if (ctx->kernel_name_uv) { ++ ctx->kernel_uv = clCreateKernel(ctx->ocf.program, ctx->kernel_name_uv, &cle); ++ if (!ctx->kernel_uv) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create kernel_uv: %d.\n", cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ } ++ ++ ctx->initialised = 1; ++ return 0; ++ ++fail: ++ av_bprint_finalize(&header, NULL); ++ if (ctx->command_queue) ++ clReleaseCommandQueue(ctx->command_queue); ++ if (ctx->kernel) ++ clReleaseKernel(ctx->kernel); ++ if (ctx->kernel_uv) ++ clReleaseKernel(ctx->kernel_uv); ++ if (event) ++ clReleaseEvent(event); ++ if (ctx->dither_image) ++ clReleaseMemObject(ctx->dither_image); ++ if (ctx->cx) ++ clReleaseMemObject(ctx->cx); ++ if (ctx->cy) ++ clReleaseMemObject(ctx->cy); ++ if (cx) ++ av_free(cx); ++ if (cy) ++ av_free(cy); ++ return err; ++} ++ ++static int format_is_supported(enum AVPixelFormat fmt) ++{ ++ for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) ++ if (supported_formats[i] == fmt) ++ return 1; ++ return 0; ++} ++ ++static int scale_opencl_config_output(AVFilterLink *outlink) ++{ ++ AVFilterContext *avctx = outlink->src; ++ AVFilterLink *inlink = avctx->inputs[0]; ++ ScaleOpenCLContext *ctx = avctx->priv; ++ AVHWFramesContext *in_frames_ctx; ++ enum AVPixelFormat in_format; ++ enum AVPixelFormat out_format; ++ const AVPixFmtDescriptor *in_desc; ++ const AVPixFmtDescriptor *out_desc; ++ int ret; ++ ++ if (!inlink->hw_frames_ctx) ++ return AVERROR(EINVAL); ++ in_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; ++ in_format = in_frames_ctx->sw_format; ++ out_format = (ctx->format == AV_PIX_FMT_NONE) ? in_format : ctx->format; ++ in_desc = av_pix_fmt_desc_get(in_format); ++ out_desc = av_pix_fmt_desc_get(out_format); ++ ++ if (!format_is_supported(in_format)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n", ++ av_get_pix_fmt_name(in_format)); ++ return AVERROR(ENOSYS); ++ } ++ if (!format_is_supported(out_format)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n", ++ av_get_pix_fmt_name(out_format)); ++ return AVERROR(ENOSYS); ++ } ++ ++ ctx->in_fmt = in_format; ++ ctx->out_fmt = out_format; ++ ctx->in_desc = in_desc; ++ ctx->out_desc = out_desc; ++ ctx->in_planes = av_pix_fmt_count_planes(ctx->in_fmt); ++ ctx->out_planes = av_pix_fmt_count_planes(ctx->out_fmt); ++ ctx->ocf.output_format = out_format; ++ ++ if ((ret = ff_scale_eval_dimensions(ctx, ++ ctx->w_expr, ctx->h_expr, ++ inlink, outlink, ++ &ctx->dst_w, &ctx->dst_h)) < 0) ++ return ret; ++ ++ ff_scale_adjust_dimensions(inlink, &ctx->dst_w, &ctx->dst_h, ++ ctx->force_original_aspect_ratio, ctx->force_divisible_by); ++ ++ if (((int64_t)ctx->dst_h * inlink->w) > INT_MAX || ++ ((int64_t)ctx->dst_w * inlink->h) > INT_MAX) ++ av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n"); ++ ++ ctx->src_w = inlink->w; ++ ctx->src_h = inlink->h; ++ ctx->ocf.output_width = ctx->dst_w; ++ ctx->ocf.output_height = ctx->dst_h; ++ ++ if (ctx->passthrough && ctx->src_w == ctx->dst_w && ctx->src_h == ctx->dst_h && ctx->in_fmt == ctx->out_fmt) { ++ av_buffer_unref(&outlink->hw_frames_ctx); ++ outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx); ++ if (!outlink->hw_frames_ctx) ++ return AVERROR(ENOMEM); ++ return 0; ++ } else { ++ ctx->passthrough = 0; ++ } ++ ++ ret = ff_opencl_filter_config_output(outlink); ++ if (ret < 0) ++ return ret; ++ ++ return 0; ++} ++ ++static AVFrame *scale_opencl_get_video_buffer(AVFilterLink *inlink, int w, int h) ++{ ++ ScaleOpenCLContext *ctx = inlink->dst->priv; ++ ++ return ctx->passthrough ? ff_null_get_video_buffer(inlink, w, h) : ++ ff_default_get_video_buffer(inlink, w, h); ++} ++ ++static int scale_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input) ++{ ++ AVFilterContext *avctx = inlink->dst; ++ AVFilterLink *outlink = avctx->outputs[0]; ++ ScaleOpenCLContext *ctx = avctx->priv; ++ int x_subsample = 1 << ctx->in_desc->log2_chroma_w; ++ int y_subsample = 1 << ctx->in_desc->log2_chroma_h; ++ AVFrame *output = NULL; ++ size_t global_work[2]; ++ cl_int cle; ++ cl_int2 src_size, uv_size; ++ int err, idx_arg1, idx_arg2; ++ ++ av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n", ++ av_get_pix_fmt_name(input->format), ++ input->width, input->height, input->pts); ++ ++ if (!input->hw_frames_ctx) ++ return AVERROR(EINVAL); ++ ++ if (!ctx->initialised) { ++ err = scale_opencl_init(avctx); ++ if (err < 0) ++ goto fail; ++ } ++ ++ if (ctx->passthrough && ctx->src_w == ctx->dst_w && ctx->src_h == ctx->dst_h && ctx->in_fmt == ctx->out_fmt) ++ return ff_filter_frame(outlink, input); ++ ++ output = ff_get_video_buffer(outlink, outlink->w, outlink->h); ++ if (!output) { ++ err = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ err = av_frame_copy_props(output, input); ++ if (err < 0) ++ goto fail; ++ output->width = outlink->w; ++ output->height = outlink->h; ++ ++ if (!output->data[0] || !input->data[0] || !output->data[1] || !input->data[1]) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ if (ctx->out_planes > 2 && !output->data[2]) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ if (ctx->in_planes > 2 && !input->data[2]) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem, &output->data[0]); ++ CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem, &input->data[0]); ++ ++ if (ctx->src_w == ctx->dst_w && ctx->src_h == ctx->dst_h) { ++ CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_mem, &output->data[1]); ++ CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_mem, &input->data[1]); ++ ++ idx_arg1 = 4; ++ if (ctx->out_planes > 2) { ++ CL_SET_KERNEL_ARG(ctx->kernel, idx_arg1++, cl_mem, &output->data[2]); ++ } ++ if (ctx->in_planes > 2) { ++ CL_SET_KERNEL_ARG(ctx->kernel, idx_arg1++, cl_mem, &input->data[2]); ++ } ++ if (ctx->dither_image) { ++ CL_SET_KERNEL_ARG(ctx->kernel, idx_arg1++, cl_mem, &ctx->dither_image); ++ } ++ ++ // conv_yuv ++ global_work[0] = output->width / x_subsample; ++ global_work[1] = output->height / y_subsample; ++ ++ av_log(avctx, AV_LOG_DEBUG, "Run kernel %s " ++ "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n", ++ ctx->kernel_name, global_work[0], global_work[1]); ++ ++ cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL, ++ global_work, NULL, 0, NULL, NULL); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle); ++ } else { ++ CL_SET_KERNEL_ARG(ctx->kernel_uv, 0, cl_mem, &output->data[1]); ++ CL_SET_KERNEL_ARG(ctx->kernel_uv, 1, cl_mem, &input->data[1]); ++ ++ idx_arg1 = 2; ++ if (ctx->out_planes > 2) { ++ CL_SET_KERNEL_ARG(ctx->kernel_uv, idx_arg1++, cl_mem, &output->data[2]); ++ } ++ if (ctx->in_planes > 2) { ++ CL_SET_KERNEL_ARG(ctx->kernel_uv, idx_arg1++, cl_mem, &input->data[2]); ++ } ++ ++ idx_arg2 = 2; ++ if (ctx->dither_image) { ++ CL_SET_KERNEL_ARG(ctx->kernel, idx_arg2++, cl_mem, &ctx->dither_image); ++ } ++ if (ctx->algorithm != F_NEIGHBOR) { ++ CL_SET_KERNEL_ARG(ctx->kernel, idx_arg2++, cl_mem, &ctx->cx); ++ CL_SET_KERNEL_ARG(ctx->kernel, idx_arg2++, cl_mem, &ctx->cy); ++ ++ CL_SET_KERNEL_ARG(ctx->kernel_uv, idx_arg1++, cl_mem, &ctx->cx); ++ CL_SET_KERNEL_ARG(ctx->kernel_uv, idx_arg1++, cl_mem, &ctx->cy); ++ } ++ ++ src_size.s[0] = ctx->src_w; ++ src_size.s[1] = ctx->src_h; ++ uv_size.s[0] = src_size.s[0] / x_subsample; ++ uv_size.s[1] = src_size.s[1] / y_subsample; ++ CL_SET_KERNEL_ARG(ctx->kernel, idx_arg2++, cl_int2, &src_size); ++ CL_SET_KERNEL_ARG(ctx->kernel_uv, idx_arg1++, cl_int2, &uv_size); ++ ++ // scale, neighbor ++ global_work[0] = output->width; ++ global_work[1] = output->height; ++ ++ av_log(avctx, AV_LOG_DEBUG, "Run kernel %s " ++ "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n", ++ ctx->kernel_name, global_work[0], global_work[1]); ++ ++ cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL, ++ global_work, NULL, 0, NULL, NULL); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle); ++ ++ // scale_uv, neighbor_uv ++ global_work[0] = output->width / x_subsample; ++ global_work[1] = output->height / y_subsample; ++ ++ av_log(avctx, AV_LOG_DEBUG, "Run kernel %s " ++ "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n", ++ ctx->kernel_name_uv, global_work[0], global_work[1]); ++ ++ cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel_uv, 2, NULL, ++ global_work, NULL, 0, NULL, NULL); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle); ++ } ++ ++ cle = clFinish(ctx->command_queue); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle); ++ ++ av_frame_free(&input); ++ ++ av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n", ++ av_get_pix_fmt_name(output->format), ++ output->width, output->height, output->pts); ++ ++ return ff_filter_frame(outlink, output); ++ ++fail: ++ clFinish(ctx->command_queue); ++ av_frame_free(&input); ++ av_frame_free(&output); ++ return err; ++} ++ ++static av_cold void scale_opencl_uninit(AVFilterContext *avctx) ++{ ++ ScaleOpenCLContext *ctx = avctx->priv; ++ cl_int cle; ++ ++ if (ctx->kernel) { ++ cle = clReleaseKernel(ctx->kernel); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "kernel: %d.\n", cle); ++ } ++ ++ if (ctx->kernel_uv) { ++ cle = clReleaseKernel(ctx->kernel_uv); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "kernel_uv: %d.\n", cle); ++ } ++ ++ if (ctx->command_queue) { ++ cle = clReleaseCommandQueue(ctx->command_queue); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "command queue: %d.\n", cle); ++ } ++ ++ if (ctx->cx) { ++ cle = clReleaseMemObject(ctx->cx); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "weights buffer: %d.\n", cle); ++ } ++ ++ if (ctx->cy) { ++ cle = clReleaseMemObject(ctx->cy); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "weights buffer: %d.\n", cle); ++ } ++ ++ if (ctx->dither_image) { ++ cle = clReleaseMemObject(ctx->dither_image); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "dither image: %d.\n", cle); ++ } ++ ++ ff_opencl_filter_uninit(avctx); ++} ++ ++#define OFFSET(x) offsetof(ScaleOpenCLContext, x) ++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) ++static const AVOption scale_opencl_options[] = { ++ { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS }, ++ { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS }, ++ { "format", "Output pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, { .i64 = AV_PIX_FMT_NONE }, AV_PIX_FMT_NONE, INT_MAX, FLAGS, .unit = "fmt" }, ++ { "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, ++ { "algo", "Scaling algorithm", OFFSET(algorithm), AV_OPT_TYPE_INT, { .i64 = F_BILINEAR }, INT_MIN, INT_MAX, FLAGS, .unit = "algo" }, ++ { "area", "Area averaging", 0, AV_OPT_TYPE_CONST, { .i64 = F_AREA }, 0, 0, FLAGS, .unit = "algo" }, ++ { "bicubic", "Bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = F_BICUBIC }, 0, 0, FLAGS, .unit = "algo" }, ++ { "bilinear", "Bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = F_BILINEAR }, 0, 0, FLAGS, .unit = "algo" }, ++ { "gauss", "Gaussian", 0, AV_OPT_TYPE_CONST, { .i64 = F_GAUSSIAN }, 0, 0, FLAGS, .unit = "algo" }, ++ { "lanczos", "Lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = F_LANCZOS }, 0, 0, FLAGS, .unit = "algo" }, ++ { "neighbor", "Nearest Neighbor", 0, AV_OPT_TYPE_CONST, { .i64 = F_NEIGHBOR }, 0, 0, FLAGS, .unit = "algo" }, ++ { "sinc", "Sinc", 0, AV_OPT_TYPE_CONST, { .i64 = F_SINC }, 0, 0, FLAGS, .unit = "algo" }, ++ { "spline", "Bicubic Spline", 0, AV_OPT_TYPE_CONST, { .i64 = F_SPLINE }, 0, 0, FLAGS, .unit = "algo" }, ++ { "experimental", "Experimental", 0, AV_OPT_TYPE_CONST, { .i64 = F_EXPERIMENTAL }, 0, 0, FLAGS, .unit = "algo" }, ++ { "force_original_aspect_ratio", "Decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, FLAGS, .unit = "force_oar" }, ++ { "disable", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, .unit = "force_oar" }, ++ { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, .unit = "force_oar" }, ++ { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, .unit = "force_oar" }, ++ { "force_divisible_by", "Enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256, FLAGS }, ++ { NULL } ++}; ++ ++AVFILTER_DEFINE_CLASS(scale_opencl); ++ ++static const AVFilterPad scale_opencl_inputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .filter_frame = &scale_opencl_filter_frame, ++ .get_buffer.video = &scale_opencl_get_video_buffer, ++ .config_props = &ff_opencl_filter_config_input, ++ }, ++}; ++ ++static const AVFilterPad scale_opencl_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = &scale_opencl_config_output, ++ }, ++}; ++ ++const AVFilter ff_vf_scale_opencl = { ++ .name = "scale_opencl", ++ .description = NULL_IF_CONFIG_SMALL("Scale the input video size through OpenCL."), ++ .priv_size = sizeof(ScaleOpenCLContext), ++ .priv_class = &scale_opencl_class, ++ .init = &ff_opencl_filter_init, ++ .uninit = &scale_opencl_uninit, ++ FILTER_INPUTS(scale_opencl_inputs), ++ FILTER_OUTPUTS(scale_opencl_outputs), ++ FILTER_SINGLE_PIXFMT(AV_PIX_FMT_OPENCL), ++ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, ++ .flags = AVFILTER_FLAG_HWDEVICE, ++}; diff --git a/cross/ffmpeg7/patches/1007-jellyfin-0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch b/cross/ffmpeg7/patches/1007-jellyfin-0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch new file mode 100644 index 00000000000..d93c3df845b --- /dev/null +++ b/cross/ffmpeg7/patches/1007-jellyfin-0007-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch @@ -0,0 +1,2191 @@ +Index: FFmpeg/libavfilter/opencl.c +=================================================================== +--- libavfilter/opencl.c ++++ libavfilter/opencl.c +@@ -169,7 +169,7 @@ int ff_opencl_filter_load_program(AVFilt + } + + cle = clBuildProgram(ctx->program, 1, &ctx->hwctx->device_id, +- NULL, NULL, NULL); ++ "-cl-finite-math-only -cl-unsafe-math-optimizations", NULL, NULL); + if (cle != CL_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to build program: %d.\n", cle); + +@@ -330,7 +330,7 @@ void ff_opencl_print_const_matrix_3x3(AV + av_bprintf(buf, "__constant float %s[9] = {\n", name_str); + for (i = 0; i < 3; i++) { + for (j = 0; j < 3; j++) +- av_bprintf(buf, " %.5ff,", mat[i][j]); ++ av_bprintf(buf, " %ff,", mat[i][j]); + av_bprintf(buf, "\n"); + } + av_bprintf(buf, "};\n"); +Index: FFmpeg/libavfilter/opencl.h +=================================================================== +--- libavfilter/opencl.h ++++ libavfilter/opencl.h +@@ -206,17 +206,17 @@ do { + } while(0) + + /** +- * Perform a blocking write to a buffer. ++ * Perform a blocking write to a buffer with offset. + * + * Requires the presence of a local cl_int variable named cle and a fail label for error + * handling. + */ +-#define CL_BLOCKING_WRITE_BUFFER(queue, buffer, size, host_ptr, event) do { \ ++#define CL_BLOCKING_WRITE_BUFFER_OFFSET(queue, buffer, offset, size, host_ptr, event) do { \ + cle = clEnqueueWriteBuffer( \ + queue, \ + buffer, \ + CL_TRUE, \ +- 0, \ ++ offset, \ + size, \ + host_ptr, \ + 0, \ +@@ -227,6 +227,15 @@ do { + } while(0) + + /** ++ * Perform a blocking write to a buffer. ++ * ++ * Requires the presence of a local cl_int variable named cle and a fail label for error ++ * handling. ++ */ ++#define CL_BLOCKING_WRITE_BUFFER(queue, buffer, size, host_ptr, event) \ ++ CL_BLOCKING_WRITE_BUFFER_OFFSET(queue, buffer, 0, size, host_ptr, event) ++ ++/** + * Create a buffer with the given information. + * + * The buffer variable in the context structure must be named . +Index: FFmpeg/libavfilter/opencl/colorspace_common.cl +=================================================================== +--- libavfilter/opencl/colorspace_common.cl ++++ libavfilter/opencl/colorspace_common.cl +@@ -17,7 +17,17 @@ + */ + + #define ST2084_MAX_LUMINANCE 10000.0f +-#define REFERENCE_WHITE 100.0f ++#define ST2084_M1 0.1593017578125f ++#define ST2084_M2 78.84375f ++#define ST2084_C1 0.8359375f ++#define ST2084_C2 18.8515625f ++#define ST2084_C3 18.6875f ++ ++#define ARIB_B67_A 0.17883277f ++#define ARIB_B67_B 0.28466892f ++#define ARIB_B67_C 0.55991073f ++ ++#define FLOAT_EPS 1e-6f + + #if chroma_loc == 1 + #define chroma_sample(a,b,c,d) (((a) + (c)) * 0.5f) +@@ -33,81 +43,124 @@ + #define chroma_sample(a,b,c,d) (((a) + (b) + (c) + (d)) * 0.25f) + #endif + +-constant const float ST2084_M1 = 0.1593017578125f; +-constant const float ST2084_M2 = 78.84375f; +-constant const float ST2084_C1 = 0.8359375f; +-constant const float ST2084_C2 = 18.8515625f; +-constant const float ST2084_C3 = 18.6875f; +- + float get_luma_dst(float3 c) { + return luma_dst.x * c.x + luma_dst.y * c.y + luma_dst.z * c.z; + } + ++float4 get_luma_dst4(float4 r4, float4 g4, float4 b4) { ++ return luma_dst.x * r4 + luma_dst.y * g4 + luma_dst.z * b4; ++} ++ ++/* + float get_luma_src(float3 c) { + return luma_src.x * c.x + luma_src.y * c.y + luma_src.z * c.z; + } + ++float4 get_luma_src4(float4 r4, float4 g4, float4 b4) { ++ return luma_src.x * r4 + luma_src.y * g4 + luma_src.z * b4; ++} ++*/ ++ + float3 get_chroma_sample(float3 a, float3 b, float3 c, float3 d) { + return chroma_sample(a, b, c, d); + } + ++// linearizer for PQ/ST2084 ++float eotf_st2084_common(float x) { ++ x = fmax(x, 0.0f); ++ float xpow = native_powr(x, 1.0f / ST2084_M2); ++ float num = fmax(xpow - ST2084_C1, 0.0f); ++ float den = fmax(ST2084_C2 - ST2084_C3 * xpow, FLOAT_EPS); ++ x = native_powr(num / den, 1.0f / ST2084_M1); ++ return x; ++} ++ + float eotf_st2084(float x) { +- float p = powr(x, 1.0f / ST2084_M2); +- float a = max(p -ST2084_C1, 0.0f); +- float b = max(ST2084_C2 - ST2084_C3 * p, 1e-6f); +- float c = powr(a / b, 1.0f / ST2084_M1); +- return x > 0.0f ? c * ST2084_MAX_LUMINANCE / REFERENCE_WHITE : 0.0f; +-} +- +-__constant const float HLG_A = 0.17883277f; +-__constant const float HLG_B = 0.28466892f; +-__constant const float HLG_C = 0.55991073f; +- +-// linearizer for HLG +-float inverse_oetf_hlg(float x) { +- float a = 4.0f * x * x; +- float b = exp((x - HLG_C) / HLG_A) + HLG_B; +- return x < 0.5f ? a : b; +-} +- +-// delinearizer for HLG +-float oetf_hlg(float x) { +- float a = 0.5f * sqrt(x); +- float b = HLG_A * log(x - HLG_B) + HLG_C; +- return x <= 1.0f ? a : b; +-} +- +-float3 ootf_hlg(float3 c, float peak) { +- float luma = get_luma_src(c); +- float gamma = 1.2f + 0.42f * log10(peak * REFERENCE_WHITE / 1000.0f); +- gamma = max(1.0f, gamma); +- float factor = peak * powr(luma, gamma - 1.0f) / powr(12.0f, gamma); +- return c * factor; +-} +- +-float3 inverse_ootf_hlg(float3 c, float peak) { +- float gamma = 1.2f + 0.42f * log10(peak * REFERENCE_WHITE / 1000.0f); +- c *= powr(12.0f, gamma) / peak; +- c /= powr(get_luma_dst(c), (gamma - 1.0f) / gamma); +- return c; ++ return eotf_st2084_common(x) * pq_max_lum_div_ref_white; ++} ++ ++// delinearizer for PQ/ST2084 ++float inverse_eotf_st2084_common(float x) { ++ x = fmax(x, 0.0f); ++ float xpow = native_powr(x, ST2084_M1); ++#if 0 ++ // Original formulation from SMPTE ST 2084:2014 publication. ++ float num = ST2084_C1 + ST2084_C2 * xpow; ++ float den = 1.0f + ST2084_C3 * xpow; ++ return native_powr(num / den, ST2084_M2); ++#else ++ // More stable arrangement that avoids some cancellation error. ++ float num = (ST2084_C1 - 1.0f) + (ST2084_C2 - ST2084_C3) * xpow; ++ float den = 1.0f + ST2084_C3 * xpow; ++ return native_powr(1.0f + num / den, ST2084_M2); ++#endif ++} ++ ++float inverse_eotf_st2084(float x) { ++ x *= ref_white_div_pq_max_lum; ++ return inverse_eotf_st2084_common(x); ++} ++ ++float4 eotf_st2084x4(float4 x) { ++ x.x = eotf_st2084_common(x.x); ++ x.y = eotf_st2084_common(x.y); ++ x.z = eotf_st2084_common(x.z); ++ x.w = eotf_st2084_common(x.w); ++ return x * pq_max_lum_div_ref_white; ++} ++ ++float4 inverse_eotf_st2084x4(float4 x) { ++ x *= ref_white_div_pq_max_lum; ++ x.x = inverse_eotf_st2084_common(x.x); ++ x.y = inverse_eotf_st2084_common(x.y); ++ x.z = inverse_eotf_st2084_common(x.z); ++ x.w = inverse_eotf_st2084_common(x.w); ++ return x; ++} ++ ++float ootf_1_2(float x) { ++ return x > 0.0f ? native_powr(x, 1.2f) : x; ++} ++ ++float inverse_ootf_1_2(float x) { ++ return x > 0.0f ? native_powr(x, 1.0f / 1.2f) : x; + } + +-float inverse_eotf_bt1886(float c) { +- return c < 0.0f ? 0.0f : powr(c, 1.0f / 2.4f); ++float oetf_arib_b67(float x) { ++ x = fmax(x, 0.0f); ++ return x <= (1.0f / 12.0f) ++ ? native_sqrt(3.0f * x) ++ : (ARIB_B67_A * native_log(12.0f * x - ARIB_B67_B) + ARIB_B67_C); + } + +-float oetf_bt709(float c) { +- c = c < 0.0f ? 0.0f : c; +- float r1 = 4.5f * c; +- float r2 = 1.099f * powr(c, 0.45f) - 0.099f; +- return c < 0.018f ? r1 : r2; +-} +-float inverse_oetf_bt709(float c) { +- float r1 = c / 4.5f; +- float r2 = powr((c + 0.099f) / 1.099f, 1.0f / 0.45f); +- return c < 0.081f ? r1 : r2; ++float inverse_oetf_arib_b67(float x) { ++ x = fmax(x, 0.0f); ++ return x <= 0.5f ++ ? (x * x) * (1.0f / 3.0f) ++ : (native_exp((x - ARIB_B67_C) / ARIB_B67_A) + ARIB_B67_B) * (1.0f / 12.0f); + } + ++// linearizer for HLG/ARIB-B67 ++float eotf_arib_b67(float x) { ++ return ootf_1_2(inverse_oetf_arib_b67(x)) * 5.0f; ++} ++ ++// delinearizer for HLG/ARIB-B67 ++float inverse_eotf_arib_b67(float x) { ++ return oetf_arib_b67(inverse_ootf_1_2(x / 5.0f)); ++} ++ ++// delinearizer for BT709, BT2020-10 ++float inverse_eotf_bt1886(float x) { ++ return x > 0.0f ? native_powr(x, 1.0f / 2.4f) : 0.0f; ++} ++ ++#ifdef LUT_TRC ++float linearize_lut(float x) { ++ return lin_lut[clamp((int)(x * LUT_TRC), 0, LUT_TRC)]; ++} ++#endif ++ + float3 yuv2rgb(float y, float u, float v) { + #ifdef FULL_RANGE_IN + u -= 0.5f; v -= 0.5f; +@@ -150,7 +203,9 @@ float3 rgb2yuv(float r, float g, float b + + float rgb2y(float r, float g, float b) { + float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; ++#ifndef FULL_RANGE_OUT + y = (219.0f * y + 16.0f) / 255.0f; ++#endif + return y; + } + +@@ -188,18 +243,101 @@ float3 lrgb2lrgb(float3 c) { + #endif + } + +-float3 ootf(float3 c, float peak) { +-#ifdef ootf_impl +- return ootf_impl(c, peak); ++float3 rgb2lrgb(float3 c) { ++#ifdef linearize ++ float r = linearize(c.x); ++ float g = linearize(c.y); ++ float b = linearize(c.z); ++ return (float3)(r, g, b); + #else + return c; + #endif + } + +-float3 inverse_ootf(float3 c, float peak) { +-#ifdef inverse_ootf_impl +- return inverse_ootf_impl(c, peak); +-#else +- return c; ++#ifdef DOVI_RESHAPE ++float3 ycc2rgb(float y, float cb, float cr) { ++ float r = y * rgb_matrix[0] + cb * rgb_matrix[1] + cr * rgb_matrix[2]; ++ float g = y * rgb_matrix[3] + cb * rgb_matrix[4] + cr * rgb_matrix[5]; ++ float b = y * rgb_matrix[6] + cb * rgb_matrix[7] + cr * rgb_matrix[8]; ++ return (float3)(r, g, b) + ycc2rgb_offset; ++} ++ ++float3 lms2rgb(float r, float g, float b) { ++ #ifndef DOVI_PERF_TRADEOFF ++ r = eotf_st2084_common(r); ++ g = eotf_st2084_common(g); ++ b = eotf_st2084_common(b); ++ #endif ++ float rr = r * lms2rgb_matrix[0] + g * lms2rgb_matrix[1] + b * lms2rgb_matrix[2]; ++ float gg = r * lms2rgb_matrix[3] + g * lms2rgb_matrix[4] + b * lms2rgb_matrix[5]; ++ float bb = r * lms2rgb_matrix[6] + g * lms2rgb_matrix[7] + b * lms2rgb_matrix[8]; ++ #ifndef DOVI_PERF_TRADEOFF ++ rr = inverse_eotf_st2084_common(rr); ++ gg = inverse_eotf_st2084_common(gg); ++ bb = inverse_eotf_st2084_common(bb); ++ #endif ++ return (float3)(rr, gg, bb); ++} + #endif ++ ++#ifdef TONE_MODE_ITP ++// The following assumes bt2020 ++void lrgb2ictcp(float4 r4, float4 g4, float4 b4, float4* i4, float4* ct4, float4* cp4) { ++ float4 l4 = 0.412109375000000f * r4 + 0.523925781250000f * g4 + 0.063964843750000f * b4; ++ float4 m4 = 0.166748046875000f * r4 + 0.720458984375000f * g4 + 0.112792968750000f * b4; ++ float4 s4 = 0.024169921875000f * r4 + 0.075439453125000f * g4 + 0.900390625000000f * b4; ++ l4 = inverse_eotf_st2084x4(l4); ++ m4 = inverse_eotf_st2084x4(m4); ++ s4 = inverse_eotf_st2084x4(s4); ++ *i4 = 0.5f * l4 + 0.5f * m4; ++ *ct4 = 1.613769531250000f * l4 - 3.323486328125000f * m4 + 1.709716796875000f * s4; ++ *cp4 = 4.378173828125000f * l4 - 4.245605468750000f * m4 - 0.132568359375000f * s4; ++} ++ ++void ictcp2lrgb(float4 i4, float4 ct4, float4 cp4, float4* r4, float4* g4, float4* b4) { ++ float4 ll4 = i4 + 0.008609037037933f * ct4 + 0.111029625003026f * cp4; ++ float4 mm4 = i4 - 0.008609037037933f * ct4 - 0.111029625003026f * cp4; ++ float4 ss4 = i4 + 0.560031335710679f * ct4 - 0.320627174987319f * cp4; ++ ll4 = eotf_st2084x4(ll4); ++ mm4 = eotf_st2084x4(mm4); ++ ss4 = eotf_st2084x4(ss4); ++ *r4 = 3.436606694333079f * ll4 - 2.506452118656270f * mm4 + 0.069845424323191f * ss4; ++ *g4 = -0.791329555598929f * ll4 + 1.983600451792291f * mm4 - 0.192270896193362f * ss4; ++ *b4 = -0.025949899690593f * ll4 - 0.098913714711726f * mm4 + 1.124863614402319f * ss4; ++} ++#endif ++ ++float parabolic(float x, float t0, float x0, float y0) { ++ float s = (y0 - t0) / native_sqrt(x0 - y0); ++ float ox = t0 - s * s * 0.25f; ++ float oy = t0 - s * native_sqrt(s * s * 0.25f); ++ return (x < t0 ? x : s * native_sqrt(x - ox) + oy); ++} ++ ++float3 gamut_compress(float3 rgb) { ++ // BT.709 boundary info ++ #define cyan_limit 1.5187050250638159f ++ #define magenta_limit 1.0750082769546088f ++ #define yellow_limit 1.0887800403483898f ++ #define cyan_threshold 1.050508660266247f ++ #define magenta_threshold 0.940509816042432f ++ #define yellow_threshold 0.9771607996420639f ++ ++ // Achromatic axis ++ float ac = fmax(fmax(rgb.x, rgb.y), rgb.z); ++ ++ // Inverse RGB Ratios: distance from achromatic axis ++ float3 d = ac == 0.0f ? (float3)(0.0f, 0.0f, 0.0f) : (ac - rgb) / fabs(ac); ++ ++ // Compressed distance ++ float3 cd = (float3)( ++ parabolic(d.x, cyan_threshold, cyan_limit, 1.0f), ++ parabolic(d.y, magenta_threshold, magenta_limit, 1.0f), ++ parabolic(d.z, yellow_threshold, yellow_limit, 1.0f) ++ ); ++ ++ // Inverse RGB Ratios to RGB ++ float3 crgb = ac - cd * fabs(ac); ++ ++ return crgb; + } +Index: FFmpeg/libavfilter/opencl/tonemap.cl +=================================================================== +--- libavfilter/opencl/tonemap.cl ++++ libavfilter/opencl/tonemap.cl +@@ -16,54 +16,67 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +-#define REFERENCE_WHITE 100.0f ++#define FLOAT_EPS 1e-6f ++ + extern float3 lrgb2yuv(float3); + extern float lrgb2y(float3); + extern float3 yuv2lrgb(float3); + extern float3 lrgb2lrgb(float3); +-extern float get_luma_src(float3); +-extern float get_luma_dst(float3); +-extern float3 ootf(float3 c, float peak); +-extern float3 inverse_ootf(float3 c, float peak); ++extern float eotf_st2084(float); ++extern float inverse_eotf_st2084(float); ++extern float4 get_luma_dst4(float4, float4, float4); + extern float3 get_chroma_sample(float3, float3, float3, float3); +- +-struct detection_result { +- float peak; +- float average; +-}; ++#ifdef DOVI_RESHAPE ++extern float3 rgb2lrgb(float3); ++extern float3 ycc2rgb(float, float, float); ++extern float3 lms2rgb(float, float, float); ++#endif ++extern float4 eotf_st2084x4(float4 x); ++extern float4 inverse_eotf_st2084x4(float4 x); ++#ifdef TONE_MODE_ITP ++extern void lrgb2ictcp(float4 r4, float4 g4, float4 b4, float4* i4, float4* ct4, float4* cp4); ++extern void ictcp2lrgb(float4 i4, float4 ct4, float4 cp4, float4* r4, float4* g4, float4* b4); ++#endif ++extern float3 gamut_compress(float3 rgb); ++ ++#ifdef ENABLE_DITHER ++float get_dithered_y(float y, float d) { ++ return floor(y * dither_quantization + d + 0.5f / dither_size2) * 1.0f / dither_quantization; ++} ++#endif + + float hable_f(float in) { + float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f; + return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f; + } + +-float direct(float s, float peak) { ++float direct(float s, float peak, float target_peak) { + return s; + } + +-float linear(float s, float peak) { ++float linear(float s, float peak, float target_peak) { + return s * tone_param / peak; + } + +-float gamma(float s, float peak) { +- float p = s > 0.05f ? s /peak : 0.05f / peak; +- float v = powr(p, 1.0f / tone_param); +- return s > 0.05f ? v : (s * v /0.05f); ++float gamma(float s, float peak, float target_peak) { ++ float p = s > 0.05f ? s / peak : 0.05f / peak; ++ float v = native_powr(p, 1.0f / tone_param); ++ return s > 0.05f ? v : (s * v / 0.05f); + } + +-float clip(float s, float peak) { ++float clip(float s, float peak, float target_peak) { + return clamp(s * tone_param, 0.0f, 1.0f); + } + +-float reinhard(float s, float peak) { ++float reinhard(float s, float peak, float target_peak) { + return s / (s + tone_param) * (peak + tone_param) / peak; + } + +-float hable(float s, float peak) { +- return hable_f(s)/hable_f(peak); ++float hable(float s, float peak, float target_peak) { ++ return hable_f(s) / hable_f(peak); + } + +-float mobius(float s, float peak) { ++float mobius(float s, float peak, float target_peak) { + float j = tone_param; + float a, b; + +@@ -71,202 +84,423 @@ float mobius(float s, float peak) { + return s; + + a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak); +- b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, 1e-6f); ++ b = (j * j - 2.0f * j * peak + peak) / fmax(peak - 1.0f, FLOAT_EPS); + + return (b * b + 2.0f * b * j + j * j) / (b - a) * (s + a) / (s + b); + } + +-// detect peak/average signal of a frame, the algorithm was ported from: +-// libplacebo (https://github.com/haasn/libplacebo) +-struct detection_result +-detect_peak_avg(global uint *util_buf, __local uint *sum_wg, +- float signal, float peak) { +-// layout of the util buffer +-// +-// Name: : Size (units of 4-bytes) +-// average buffer : detection_frames + 1 +-// peak buffer : detection_frames + 1 +-// workgroup counter : 1 +-// total of peak : 1 +-// total of average : 1 +-// frame index : 1 +-// frame number : 1 +- global uint *avg_buf = util_buf; +- global uint *peak_buf = avg_buf + DETECTION_FRAMES + 1; +- global uint *counter_wg_p = peak_buf + DETECTION_FRAMES + 1; +- global uint *max_total_p = counter_wg_p + 1; +- global uint *avg_total_p = max_total_p + 1; +- global uint *frame_idx_p = avg_total_p + 1; +- global uint *scene_frame_num_p = frame_idx_p + 1; +- +- uint frame_idx = *frame_idx_p; +- uint scene_frame_num = *scene_frame_num_p; +- +- size_t lidx = get_local_id(0); +- size_t lidy = get_local_id(1); +- size_t lsizex = get_local_size(0); +- size_t lsizey = get_local_size(1); +- uint num_wg = get_num_groups(0) * get_num_groups(1); +- size_t group_idx = get_group_id(0); +- size_t group_idy = get_group_id(1); +- struct detection_result r = {peak, sdr_avg}; +- if (lidx == 0 && lidy == 0) +- *sum_wg = 0; +- barrier(CLK_LOCAL_MEM_FENCE); +- +- // update workgroup sum +- atomic_add(sum_wg, (uint)(signal * REFERENCE_WHITE)); +- barrier(CLK_LOCAL_MEM_FENCE); +- +- // update frame peak/avg using work-group-average. +- if (lidx == 0 && lidy == 0) { +- uint avg_wg = *sum_wg / (lsizex * lsizey); +- atomic_max(&peak_buf[frame_idx], avg_wg); +- atomic_add(&avg_buf[frame_idx], avg_wg); +- } ++float bt2390(float s, float peak_inv_pq, float target_peak_inv_pq) { ++ float peak_pq = peak_inv_pq; ++ float scale = peak_pq > 0.0f ? (1.0f / peak_pq) : 1.0f; ++ ++ float s_pq = s * scale; ++ float max_lum = target_peak_inv_pq * scale; ++ ++ float ks = 1.5f * max_lum - 0.5f; ++ float tb = (s_pq - ks) / (1.0f - ks); ++ float tb2 = tb * tb; ++ float tb3 = tb2 * tb; ++ float pb = (2.0f * tb3 - 3.0f * tb2 + 1.0f) * ks + ++ (tb3 - 2.0f * tb2 + tb) * (1.0f - ks) + ++ (-2.0f * tb3 + 3.0f * tb2) * max_lum; ++ float sig = mix(pb, s_pq, s_pq < ks); ++ ++ return sig * peak_pq; ++} ++ ++#define MAP_FOUR_PIXELS(sig, peak, target_peak) \ ++{ \ ++ sig.x = TONE_FUNC(sig.x, peak, target_peak); \ ++ sig.y = TONE_FUNC(sig.y, peak, target_peak); \ ++ sig.z = TONE_FUNC(sig.z, peak, target_peak); \ ++ sig.w = TONE_FUNC(sig.w, peak, target_peak); \ ++} ++ ++#ifndef TONE_MODE_ITP ++void map_four_pixels_rgb(float4 *r4, float4 *g4, float4 *b4, float peak) { ++#ifdef TONE_MODE_RGB ++ float4 sig_r = fmax(*r4, FLOAT_EPS), sig_ro = sig_r; ++ float4 sig_g = fmax(*g4, FLOAT_EPS), sig_go = sig_g; ++ float4 sig_b = fmax(*b4, FLOAT_EPS), sig_bo = sig_b; ++#else ++ #ifdef TONE_MODE_MAX ++ float4 sig = fmax(fmax(*r4, fmax(*g4, *b4)), FLOAT_EPS); ++ #else ++ float4 sig = fmax((*r4 * 0.2627f + *g4 * 0.678f + *b4 * 0.0593f), FLOAT_EPS); ++ #endif ++ float4 sig_o = sig; ++#endif + +- if (scene_frame_num > 0) { +- float peak = (float)*max_total_p / (REFERENCE_WHITE * scene_frame_num); +- float avg = (float)*avg_total_p / (REFERENCE_WHITE * scene_frame_num); +- r.peak = max(1.0f, peak); +- r.average = max(0.25f, avg); ++ // Desaturate the color using a coefficient dependent on the signal level ++ if (desat_param > 0.0f) { ++#ifdef TONE_MODE_RGB ++ float4 sig = fmax(fmax(*r4, fmax(*g4, *b4)), FLOAT_EPS); ++#endif ++#ifdef MAP_IN_DST_SPACE ++ float4 luma = get_luma_dst4(*r4, *g4, *b4); ++#else // only LUM mode currently ++ float4 luma = sig; ++#endif ++ float4 coeff = fmax(sig - 0.18f, FLOAT_EPS) / fmax(sig, FLOAT_EPS); ++ coeff = native_powr(coeff, 10.0f / desat_param); ++ *r4 = mix(*r4, luma, coeff); ++ *g4 = mix(*g4, luma, coeff); ++ *b4 = mix(*b4, luma, coeff); + } + +- if (lidx == 0 && lidy == 0 && atomic_add(counter_wg_p, 1) == num_wg - 1) { +- *counter_wg_p = 0; +- avg_buf[frame_idx] /= num_wg; +- +- if (scene_threshold > 0.0f) { +- uint cur_max = peak_buf[frame_idx]; +- uint cur_avg = avg_buf[frame_idx]; +- int diff = (int)(scene_frame_num * cur_avg) - (int)*avg_total_p; +- +- if (abs(diff) > scene_frame_num * scene_threshold * REFERENCE_WHITE) { +- for (uint i = 0; i < DETECTION_FRAMES + 1; i++) +- avg_buf[i] = 0; +- for (uint i = 0; i < DETECTION_FRAMES + 1; i++) +- peak_buf[i] = 0; +- *avg_total_p = *max_total_p = 0; +- *scene_frame_num_p = 0; +- avg_buf[frame_idx] = cur_avg; +- peak_buf[frame_idx] = cur_max; +- } +- } +- uint next = (frame_idx + 1) % (DETECTION_FRAMES + 1); +- // add current frame, subtract next frame +- *max_total_p += peak_buf[frame_idx] - peak_buf[next]; +- *avg_total_p += avg_buf[frame_idx] - avg_buf[next]; +- // reset next frame +- peak_buf[next] = avg_buf[next] = 0; +- *frame_idx_p = next; +- *scene_frame_num_p = min(*scene_frame_num_p + 1, +- (uint)DETECTION_FRAMES); ++#ifdef TONE_FUNC_BT2390 ++ float src_peak_delin_pq = inverse_eotf_st2084(peak); ++ float dst_peak_delin_pq = inverse_eotf_st2084(1.0f); ++ #ifdef TONE_MODE_RGB ++ sig_r = inverse_eotf_st2084x4(sig_r); ++ sig_g = inverse_eotf_st2084x4(sig_g); ++ sig_b = inverse_eotf_st2084x4(sig_b); ++ MAP_FOUR_PIXELS(sig_r, src_peak_delin_pq, dst_peak_delin_pq) ++ MAP_FOUR_PIXELS(sig_g, src_peak_delin_pq, dst_peak_delin_pq) ++ MAP_FOUR_PIXELS(sig_b, src_peak_delin_pq, dst_peak_delin_pq) ++ sig_r = eotf_st2084x4(sig_r); ++ sig_g = eotf_st2084x4(sig_g); ++ sig_b = eotf_st2084x4(sig_b); ++ #else ++ sig = inverse_eotf_st2084x4(sig); ++ MAP_FOUR_PIXELS(sig, src_peak_delin_pq, dst_peak_delin_pq) ++ sig = eotf_st2084x4(sig); ++ #endif ++#else ++ #ifdef TONE_MODE_RGB ++ MAP_FOUR_PIXELS(sig_r, peak, 1.0f) ++ MAP_FOUR_PIXELS(sig_g, peak, 1.0f) ++ MAP_FOUR_PIXELS(sig_b, peak, 1.0f) ++ #else ++ MAP_FOUR_PIXELS(sig, peak, 1.0f) ++ #endif ++#endif ++ ++#ifdef TONE_MODE_RGB ++ sig_r = fmin(sig_r, 1.0f); ++ sig_g = fmin(sig_g, 1.0f); ++ sig_b = fmin(sig_b, 1.0f); ++ float4 factor_r = sig_r / sig_ro; ++ float4 factor_g = sig_g / sig_go; ++ float4 factor_b = sig_b / sig_bo; ++ *r4 *= factor_r; ++ *g4 *= factor_g; ++ *b4 *= factor_b; ++#else ++ sig = fmin(sig, 1.0f); ++ float4 factor = sig / sig_o; ++ *r4 *= factor; ++ *g4 *= factor; ++ *b4 *= factor; ++#endif ++} ++#endif ++ ++#ifdef TONE_MODE_ITP ++void map_four_pixels_itp(float4 *r4, float4 *g4, float4 *b4, float peak) { ++ float4 i4_o, i4, ct4 , cp4; ++ lrgb2ictcp(*r4, *g4, *b4, &i4, &ct4, &cp4); ++ i4 = fmax(i4, FLOAT_EPS); ++ i4_o = i4; ++ if (desat_param > 0.0f) { ++ float4 coeff = native_exp(-pow(eotf_st2084x4(i4) - (target_peak - desat_param) * 0.5f, 2) / (2.0f * peak)); ++ ct4 *= coeff; ++ cp4 *= coeff; + } +- return r; ++#ifdef TONE_FUNC_BT2390 ++ float src_peak_delin_pq = inverse_eotf_st2084(peak); ++ float dst_peak_delin_pq = inverse_eotf_st2084(1.0f); ++ MAP_FOUR_PIXELS(i4, src_peak_delin_pq, dst_peak_delin_pq) ++#else ++ i4 = eotf_st2084x4(i4); ++ MAP_FOUR_PIXELS(i4, peak, 1.0f) ++ i4 = inverse_eotf_st2084x4(i4); ++#endif ++ i4 = fmin(i4, 1.0f); ++ float4 factor = min(i4/i4_o, i4_o/i4); ++ ct4 *= factor; ++ cp4 *= factor; ++ ictcp2lrgb(i4, ct4, cp4, r4, g4, b4); ++} ++#endif ++ ++// Map from source space YUV to source space RGB ++float3 map_to_src_space_from_yuv(float3 yuv) { ++#ifdef DOVI_RESHAPE ++ float3 c = ycc2rgb(yuv.x, yuv.y, yuv.z); ++ c = lms2rgb(c.x, c.y, c.z); ++ c = rgb2lrgb(c); ++#else ++ float3 c = yuv2lrgb(yuv); ++#endif ++ return c; + } + +-float3 map_one_pixel_rgb(float3 rgb, float peak, float average) { +- float sig = max(max(rgb.x, max(rgb.y, rgb.z)), 1e-6f); ++// Map from source space YUV to destination space RGB ++float3 map_to_dst_space_from_yuv(float3 yuv) { ++#ifdef DOVI_RESHAPE ++ float3 c = ycc2rgb(yuv.x, yuv.y, yuv.z); ++ c = lms2rgb(c.x, c.y, c.z); ++ c = rgb2lrgb(c); ++ c = lrgb2lrgb(c); ++#else ++ float3 c = yuv2lrgb(yuv); ++ c = lrgb2lrgb(c); ++#endif ++ return c; ++} + +- // Rescale the variables in order to bring it into a representation where +- // 1.0 represents the dst_peak. This is because all of the tone mapping +- // algorithms are defined in such a way that they map to the range [0.0, 1.0]. +- if (target_peak > 1.0f) { +- sig *= 1.0f / target_peak; +- peak *= 1.0f / target_peak; ++#ifdef DOVI_RESHAPE ++float reshape_poly(float s, float4 coeffs) { ++ return (coeffs.z * s + coeffs.y) * s + coeffs.x; ++} ++ ++float reshape_mmr(float3 sig, ++ float4 coeffs, ++ __global float4 *dovi_mmr, ++ int dovi_mmr_single, ++ int dovi_min_order, ++ int dovi_max_order) ++{ ++ int mmr_idx = dovi_mmr_single ? 0 : (int)coeffs.y; ++ int order = (int)coeffs.w; ++ float4 sigX; ++ ++ float s = coeffs.x; ++ sigX.xyz = sig.xxy * sig.yzz; ++ sigX.w = sigX.x * sig.z; ++ s += dot(dovi_mmr[mmr_idx + 0].xyz, sig); ++ s += dot(dovi_mmr[mmr_idx + 1], sigX); ++ ++ int t = dovi_max_order >= 2 && (dovi_min_order >= 2 || order >= 2); ++ if (t) { ++ float3 sig2 = sig * sig; ++ float4 sigX2 = sigX * sigX; ++ s += dot(dovi_mmr[mmr_idx + 2].xyz, sig2); ++ s += dot(dovi_mmr[mmr_idx + 3], sigX2); ++ t = dovi_max_order == 3 && (dovi_min_order == 3 || order >= 3); ++ if (t) { ++ s += dot(dovi_mmr[mmr_idx + 4].xyz, sig2 * sig); ++ s += dot(dovi_mmr[mmr_idx + 5], sigX2 * sigX); ++ } + } + +- float sig_old = sig; ++ return s; ++} + +- // Scale the signal to compensate for differences in the average brightness +- float slope = min(1.0f, sdr_avg / average); +- sig *= slope; +- peak *= slope; ++float3 reshape_dovi_yuv(float3 yuv, ++ __global float *src_dovi_params, ++ __global float *src_dovi_pivots, ++ __global float4 *src_dovi_coeffs, ++ __global float4 *src_dovi_mmr) ++{ ++ int i; ++ float s; ++ float3 sig = clamp(yuv.xyz, 0.0f, 1.0f); ++ float sig_arr[3] = {sig.x, sig.y, sig.z}; ++ float4 coeffs; ++ int dovi_num_pivots, dovi_has_mmr, dovi_has_poly; ++ int dovi_mmr_single, dovi_min_order, dovi_max_order; ++ float dovi_lo, dovi_hi; ++ __global float *dovi_params; ++ __global float *dovi_pivots; ++ __global float4 *dovi_coeffs, *dovi_mmr; ++ ++#pragma unroll ++ for (i = 0; i < 3; i++) { ++ dovi_params = src_dovi_params + i*8; ++ dovi_pivots = src_dovi_pivots + i*8; ++ dovi_coeffs = src_dovi_coeffs + i*8; ++ dovi_mmr = src_dovi_mmr + i*48; ++ dovi_num_pivots = dovi_params[0]; ++ dovi_has_mmr = dovi_params[1]; ++ dovi_has_poly = dovi_params[2]; ++ dovi_mmr_single = dovi_params[3]; ++ dovi_min_order = dovi_params[4]; ++ dovi_max_order = dovi_params[5]; ++ dovi_lo = dovi_params[6]; ++ dovi_hi = dovi_params[7]; ++ ++ s = sig_arr[i]; ++ coeffs = dovi_coeffs[0]; ++ ++ if (i == 0 && dovi_num_pivots > 2) { ++ coeffs = mix(mix(mix(dovi_coeffs[0], dovi_coeffs[1], (float4)(s >= dovi_pivots[0])), ++ mix(dovi_coeffs[2], dovi_coeffs[3], (float4)(s >= dovi_pivots[2])), ++ (float4)(s >= dovi_pivots[1])), ++ mix(mix(dovi_coeffs[4], dovi_coeffs[5], (float4)(s >= dovi_pivots[4])), ++ mix(dovi_coeffs[6], dovi_coeffs[7], (float4)(s >= dovi_pivots[6])), ++ (float4)(s >= dovi_pivots[5])), ++ (float4)(s >= dovi_pivots[3])); ++ } + +- // Desaturate the color using a coefficient dependent on the signal level +- if (desat_param > 0.0f) { +- float luma = get_luma_dst(rgb); +- float coeff = max(sig - 0.18f, 1e-6f) / max(sig, 1e-6f); +- coeff = native_powr(coeff, 10.0f / desat_param); +- rgb = mix(rgb, (float3)luma, (float3)coeff); +- sig = mix(sig, luma * slope, coeff); +- } ++ int has_mmr_poly = dovi_has_mmr && dovi_has_poly; + +- sig = TONE_FUNC(sig, peak); ++ if ((has_mmr_poly && coeffs.w == 0.0f) || (!has_mmr_poly && dovi_has_poly)) ++ s = reshape_poly(s, coeffs); ++ else ++ s = reshape_mmr(sig, coeffs, dovi_mmr, ++ dovi_mmr_single, dovi_min_order, dovi_max_order); + +- sig = min(sig, 1.0f); +- rgb *= (sig/sig_old); +- return rgb; +-} +-// map from source space YUV to destination space RGB +-float3 map_to_dst_space_from_yuv(float3 yuv, float peak) { +- float3 c = yuv2lrgb(yuv); +- c = ootf(c, peak); +- c = lrgb2lrgb(c); +- return c; ++ sig_arr[i] = clamp(s, dovi_lo, dovi_hi); ++ } ++ ++ return (float3)(sig_arr[0], sig_arr[1], sig_arr[2]); + } ++#endif ++ ++__constant sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | ++ CLK_ADDRESS_CLAMP_TO_EDGE | ++ CLK_FILTER_NEAREST); ++ ++__constant sampler_t l_sampler = (CLK_NORMALIZED_COORDS_TRUE | ++ CLK_ADDRESS_CLAMP_TO_EDGE | ++ CLK_FILTER_LINEAR); ++ ++__constant sampler_t d_sampler = (CLK_NORMALIZED_COORDS_TRUE | ++ CLK_ADDRESS_REPEAT | ++ CLK_FILTER_NEAREST); + + __kernel void tonemap(__write_only image2d_t dst1, + __read_only image2d_t src1, + __write_only image2d_t dst2, + __read_only image2d_t src2, +- global uint *util_buf, +- float peak +- ) ++#ifdef NON_SEMI_PLANAR_OUT ++ __write_only image2d_t dst3, ++#endif ++#ifdef NON_SEMI_PLANAR_IN ++ __read_only image2d_t src3, ++#endif ++#ifdef ENABLE_DITHER ++ __read_only image2d_t dither, ++#endif ++#ifdef DOVI_RESHAPE ++ __global float *dovi_buf, ++#endif ++ float peak) + { +- __local uint sum_wg; +- const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | +- CLK_ADDRESS_CLAMP_TO_EDGE | +- CLK_FILTER_NEAREST); + int xi = get_global_id(0); + int yi = get_global_id(1); + // each work item process four pixels + int x = 2 * xi; + int y = 2 * yi; + +- float y0 = read_imagef(src1, sampler, (int2)(x, y)).x; +- float y1 = read_imagef(src1, sampler, (int2)(x + 1, y)).x; +- float y2 = read_imagef(src1, sampler, (int2)(x, y + 1)).x; +- float y3 = read_imagef(src1, sampler, (int2)(x + 1, y + 1)).x; +- float2 uv = read_imagef(src2, sampler, (int2)(xi, yi)).xy; +- +- float3 c0 = map_to_dst_space_from_yuv((float3)(y0, uv.x, uv.y), peak); +- float3 c1 = map_to_dst_space_from_yuv((float3)(y1, uv.x, uv.y), peak); +- float3 c2 = map_to_dst_space_from_yuv((float3)(y2, uv.x, uv.y), peak); +- float3 c3 = map_to_dst_space_from_yuv((float3)(y3, uv.x, uv.y), peak); +- +- float sig0 = max(c0.x, max(c0.y, c0.z)); +- float sig1 = max(c1.x, max(c1.y, c1.z)); +- float sig2 = max(c2.x, max(c2.y, c2.z)); +- float sig3 = max(c3.x, max(c3.y, c3.z)); +- float sig = max(sig0, max(sig1, max(sig2, sig3))); +- +- struct detection_result r = detect_peak_avg(util_buf, &sum_wg, sig, peak); +- +- float3 c0_old = c0, c1_old = c1, c2_old = c2; +- c0 = map_one_pixel_rgb(c0, r.peak, r.average); +- c1 = map_one_pixel_rgb(c1, r.peak, r.average); +- c2 = map_one_pixel_rgb(c2, r.peak, r.average); +- c3 = map_one_pixel_rgb(c3, r.peak, r.average); +- +- c0 = inverse_ootf(c0, target_peak); +- c1 = inverse_ootf(c1, target_peak); +- c2 = inverse_ootf(c2, target_peak); +- c3 = inverse_ootf(c3, target_peak); +- +- y0 = lrgb2y(c0); +- y1 = lrgb2y(c1); +- y2 = lrgb2y(c2); +- y3 = lrgb2y(c3); ++ int2 src1_sz = get_image_dim(src1); ++ int2 dst2_sz = get_image_dim(dst2); ++ ++ if (xi >= dst2_sz.x || yi >= dst2_sz.y) ++ return; ++ ++ float2 src1_sz_recip = native_recip(convert_float2(src1_sz)); ++ float2 ncoords_yuv0 = convert_float2((int2)(x, y)) * src1_sz_recip; ++ float2 ncoords_yuv1 = convert_float2((int2)(x + 1, y)) * src1_sz_recip; ++ float2 ncoords_yuv2 = convert_float2((int2)(x, y + 1)) * src1_sz_recip; ++ float2 ncoords_yuv3 = convert_float2((int2)(x + 1, y + 1)) * src1_sz_recip; ++ ++ float3 yuv0, yuv1, yuv2, yuv3; ++ ++ yuv0.x = read_imagef(src1, sampler, (int2)(x, y)).x; ++ yuv1.x = read_imagef(src1, sampler, (int2)(x + 1, y)).x; ++ yuv2.x = read_imagef(src1, sampler, (int2)(x, y + 1)).x; ++ yuv3.x = read_imagef(src1, sampler, (int2)(x + 1, y + 1)).x; ++ ++#ifdef NON_SEMI_PLANAR_IN ++ yuv0.yz = (float2)(read_imagef(src2, l_sampler, ncoords_yuv0).x, ++ read_imagef(src3, l_sampler, ncoords_yuv0).x); ++ yuv1.yz = (float2)(read_imagef(src2, l_sampler, ncoords_yuv1).x, ++ read_imagef(src3, l_sampler, ncoords_yuv1).x); ++ yuv2.yz = (float2)(read_imagef(src2, l_sampler, ncoords_yuv2).x, ++ read_imagef(src3, l_sampler, ncoords_yuv2).x); ++ yuv3.yz = (float2)(read_imagef(src2, l_sampler, ncoords_yuv3).x, ++ read_imagef(src3, l_sampler, ncoords_yuv3).x); ++#else ++ yuv0.yz = read_imagef(src2, l_sampler, ncoords_yuv0).xy; ++ yuv1.yz = read_imagef(src2, l_sampler, ncoords_yuv1).xy; ++ yuv2.yz = read_imagef(src2, l_sampler, ncoords_yuv2).xy; ++ yuv3.yz = read_imagef(src2, l_sampler, ncoords_yuv3).xy; ++#endif ++ ++#ifdef DOVI_RESHAPE ++ __global float *dovi_params = dovi_buf; ++ __global float *dovi_pivots = dovi_buf + 24; ++ __global float4 *dovi_coeffs = (__global float4 *)(dovi_buf + 48); ++ __global float4 *dovi_mmr = (__global float4 *)(dovi_buf + 144); ++ yuv0 = reshape_dovi_yuv(yuv0, dovi_params, dovi_pivots, dovi_coeffs, dovi_mmr); ++ yuv1 = reshape_dovi_yuv(yuv1, dovi_params, dovi_pivots, dovi_coeffs, dovi_mmr); ++ yuv2 = reshape_dovi_yuv(yuv2, dovi_params, dovi_pivots, dovi_coeffs, dovi_mmr); ++ yuv3 = reshape_dovi_yuv(yuv3, dovi_params, dovi_pivots, dovi_coeffs, dovi_mmr); ++#endif ++ ++ float3 c0, c1, c2, c3; ++#ifndef MAP_IN_DST_SPACE ++ c0 = map_to_src_space_from_yuv(yuv0); ++ c1 = map_to_src_space_from_yuv(yuv1); ++ c2 = map_to_src_space_from_yuv(yuv2); ++ c3 = map_to_src_space_from_yuv(yuv3); ++#else ++ c0 = map_to_dst_space_from_yuv(yuv0); ++ c1 = map_to_dst_space_from_yuv(yuv1); ++ c2 = map_to_dst_space_from_yuv(yuv2); ++ c3 = map_to_dst_space_from_yuv(yuv3); ++#endif ++ ++#ifndef SKIP_TONEMAP ++ float4 r4 = (float4)(c0.x, c1.x, c2.x, c3.x); ++ float4 g4 = (float4)(c0.y, c1.y, c2.y, c3.y); ++ float4 b4 = (float4)(c0.z, c1.z, c2.z, c3.z); ++ #ifdef TONE_MODE_ITP ++ map_four_pixels_itp(&r4, &g4, &b4, peak); ++ #else ++ map_four_pixels_rgb(&r4, &g4, &b4, peak); ++ #endif ++ c0 = (float3)(r4.x, g4.x, b4.x); ++ c1 = (float3)(r4.y, g4.y, b4.y); ++ c2 = (float3)(r4.z, g4.z, b4.z); ++ c3 = (float3)(r4.w, g4.w, b4.w); ++#endif ++ ++#ifndef MAP_IN_DST_SPACE ++ c0 = lrgb2lrgb(c0); ++ c1 = lrgb2lrgb(c1); ++ c2 = lrgb2lrgb(c2); ++ c3 = lrgb2lrgb(c3); ++ #if !defined(RGB2RGB_PASSTHROUGH) && !defined(DOVI_PERF_TRADEOFF) ++ c0 = gamut_compress(c0); ++ c1 = gamut_compress(c1); ++ c2 = gamut_compress(c2); ++ c3 = gamut_compress(c3); ++ #endif ++ c0 = clamp(c0, 0.0f, 1.0f); ++ c1 = clamp(c1, 0.0f, 1.0f); ++ c2 = clamp(c2, 0.0f, 1.0f); ++ c3 = clamp(c3, 0.0f, 1.0f); ++#endif ++ ++ float y0 = lrgb2y(c0); ++ float y1 = lrgb2y(c1); ++ float y2 = lrgb2y(c2); ++ float y3 = lrgb2y(c3); ++ ++#if defined(ENABLE_DITHER) && !defined(SKIP_TONEMAP) ++ int2 dither_sz = get_image_dim(dither); ++ float2 dither_sz_recip = native_recip(convert_float2(dither_sz)); ++ float2 ncoords_d = convert_float2((int2)(xi, yi)) * dither_sz_recip; ++ float d = read_imagef(dither, d_sampler, ncoords_d).x; ++ y0 = get_dithered_y(y0, d), y1 = get_dithered_y(y1, d); ++ y2 = get_dithered_y(y2, d), y3 = get_dithered_y(y3, d); ++#endif ++ + float3 chroma_c = get_chroma_sample(c0, c1, c2, c3); + float3 chroma = lrgb2yuv(chroma_c); + +- if (xi < get_image_width(dst2) && yi < get_image_height(dst2)) { +- write_imagef(dst1, (int2)(x, y), (float4)(y0, 0.0f, 0.0f, 1.0f)); +- write_imagef(dst1, (int2)(x+1, y), (float4)(y1, 0.0f, 0.0f, 1.0f)); +- write_imagef(dst1, (int2)(x, y+1), (float4)(y2, 0.0f, 0.0f, 1.0f)); +- write_imagef(dst1, (int2)(x+1, y+1), (float4)(y3, 0.0f, 0.0f, 1.0f)); +- write_imagef(dst2, (int2)(xi, yi), +- (float4)(chroma.y, chroma.z, 0.0f, 1.0f)); +- } ++ write_imagef(dst1, (int2)(x, y), (float4)(y0, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst1, (int2)(x + 1, y), (float4)(y1, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst1, (int2)(x, y + 1), (float4)(y2, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst1, (int2)(x + 1, y + 1), (float4)(y3, 0.0f, 0.0f, 1.0f)); ++#ifdef NON_SEMI_PLANAR_OUT ++ write_imagef(dst2, (int2)(xi, yi), (float4)(chroma.y, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst3, (int2)(xi, yi), (float4)(chroma.z, 0.0f, 0.0f, 1.0f)); ++#else ++ write_imagef(dst2, (int2)(xi, yi), (float4)(chroma.y, chroma.z, 0.0f, 1.0f)); ++#endif + } +Index: FFmpeg/libavfilter/vf_tonemap_opencl.c +=================================================================== +--- libavfilter/vf_tonemap_opencl.c ++++ libavfilter/vf_tonemap_opencl.c +@@ -1,4 +1,4 @@ +-/* ++ /* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or +@@ -15,8 +15,15 @@ + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ ++ + #include + ++#ifdef __APPLE__ ++#include ++#else ++#include ++#endif ++ + #include "libavutil/avassert.h" + #include "libavutil/common.h" + #include "libavutil/imgutils.h" +@@ -29,13 +36,17 @@ + #include "opencl_source.h" + #include "video.h" + #include "colorspace.h" ++#include "dither_matrix.h" + +-// TODO: +-// - separate peak-detection from tone-mapping kernel to solve +-// one-frame-delay issue. +-// - more format support ++#define OPENCL_SOURCE_NB 3 + +-#define DETECTION_FRAMES 63 ++static const enum AVPixelFormat supported_formats[] = { ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_YUV420P16, ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_P010, ++ AV_PIX_FMT_P016, ++}; + + enum TonemapAlgorithm { + TONEMAP_NONE, +@@ -45,7 +56,17 @@ enum TonemapAlgorithm { + TONEMAP_REINHARD, + TONEMAP_HABLE, + TONEMAP_MOBIUS, +- TONEMAP_MAX, ++ TONEMAP_BT2390, ++ TONEMAP_COUNT, ++}; ++ ++enum TonemapMode { ++ TONEMAP_MODE_MAX, ++ TONEMAP_MODE_RGB, ++ TONEMAP_MODE_LUM, ++ TONEMAP_MODE_ITP, ++ TONEMAP_MODE_AUTO, ++ TONEMAP_MODE_COUNT, + }; + + typedef struct TonemapOpenCLContext { +@@ -56,23 +77,44 @@ typedef struct TonemapOpenCLContext { + enum AVColorPrimaries primaries, primaries_in, primaries_out; + enum AVColorRange range, range_in, range_out; + enum AVChromaLocation chroma_loc; ++ enum AVPixelFormat in_fmt, out_fmt; ++ const AVPixFmtDescriptor *in_desc, *out_desc; ++ int in_planes, out_planes; ++ ++ float *lin_lut; ++ ++#define params_cnt 8 ++#define pivots_cnt (7+1) ++#define coeffs_cnt 8*4 ++#define mmr_cnt 8*6*4 ++#define params_sz params_cnt*sizeof(float) ++#define pivots_sz pivots_cnt*sizeof(float) ++#define coeffs_sz coeffs_cnt*sizeof(float) ++#define mmr_sz mmr_cnt*sizeof(float) ++ struct DoviMetadata *dovi; ++ cl_mem dovi_buf; + + enum TonemapAlgorithm tonemap; ++ enum TonemapMode tonemap_mode; + enum AVPixelFormat format; ++ int apply_dovi; ++ double ref_white; + double peak; ++ double target_peak; + double param; + double desat_param; +- double target_peak; + double scene_threshold; ++ int tradeoff; + int initialised; ++ int init_with_dovi; + cl_kernel kernel; ++ cl_mem dither_image; + cl_command_queue command_queue; +- cl_mem util_mem; + } TonemapOpenCLContext; + + static const char *const linearize_funcs[AVCOL_TRC_NB] = { +- [AVCOL_TRC_SMPTE2084] = "eotf_st2084", +- [AVCOL_TRC_ARIB_STD_B67] = "inverse_oetf_hlg", ++ [AVCOL_TRC_SMPTE2084] = "eotf_st2084", ++ [AVCOL_TRC_ARIB_STD_B67] = "eotf_arib_b67", + }; + + static const char *const delinearize_funcs[AVCOL_TRC_NB] = { +@@ -80,7 +122,7 @@ static const char *const delinearize_fun + [AVCOL_TRC_BT2020_10] = "inverse_eotf_bt1886", + }; + +-static const char *const tonemap_func[TONEMAP_MAX] = { ++static const char *const tonemap_func[TONEMAP_COUNT] = { + [TONEMAP_NONE] = "direct", + [TONEMAP_LINEAR] = "linear", + [TONEMAP_GAMMA] = "gamma", +@@ -88,8 +130,54 @@ static const char *const tonemap_func[TO + [TONEMAP_REINHARD] = "reinhard", + [TONEMAP_HABLE] = "hable", + [TONEMAP_MOBIUS] = "mobius", ++ [TONEMAP_BT2390] = "bt2390", ++}; ++ ++static const double dovi_lms2rgb_matrix[3][3] = ++{ ++ { 3.06441879, -2.16597676, 0.10155818}, ++ {-0.65612108, 1.78554118, -0.12943749}, ++ { 0.01736321, -0.04725154, 1.03004253}, + }; + ++static float linearize(float x, float ref_white, enum AVColorTransferCharacteristic trc_in) ++{ ++ if (trc_in == AVCOL_TRC_SMPTE2084) ++ return eotf_st2084(x, ref_white); ++ if (trc_in == AVCOL_TRC_ARIB_STD_B67) ++ return eotf_arib_b67(x); ++ return x; ++} ++ ++#define LUT_SIZE (1 << 10) ++static int compute_trc_luts(AVFilterContext *avctx) ++{ ++ TonemapOpenCLContext *ctx = avctx->priv; ++ int i; ++ ++ if (!ctx->lin_lut && !(ctx->lin_lut = av_calloc(LUT_SIZE, sizeof(float)))) ++ return AVERROR(ENOMEM); ++ for (i = 0; i < LUT_SIZE; i++) { ++ float x = (float)i / (LUT_SIZE - 1); ++ ctx->lin_lut[i] = FFMAX(linearize(x, ctx->ref_white, ctx->trc_in), 0.0f); ++ } ++ ++ return 0; ++} ++ ++static void print_opencl_const_trc_luts(AVFilterContext *avctx, AVBPrint *buf) ++{ ++ TonemapOpenCLContext *ctx = avctx->priv; ++ int i; ++ ++ if (ctx->lin_lut) { ++ av_bprintf(buf, "__constant float lin_lut[%d] = {\n", LUT_SIZE); ++ for (i = 0; i < LUT_SIZE; i++) ++ av_bprintf(buf, " %ff,", ctx->lin_lut[i]); ++ av_bprintf(buf, "};\n"); ++ } ++} ++ + static int get_rgb2rgb_matrix(enum AVColorPrimaries in, enum AVColorPrimaries out, + double rgb2rgb[3][3]) { + double rgb2xyz[3][3], xyz2rgb[3][3]; +@@ -108,23 +196,150 @@ static int get_rgb2rgb_matrix(enum AVCol + return 0; + } + +-#define OPENCL_SOURCE_NB 3 +-// Average light level for SDR signals. This is equal to a signal level of 0.5 +-// under a typical presentation gamma of about 2.0. +-static const float sdr_avg = 0.25f; ++static int tonemap_opencl_update_dovi_buf(AVFilterContext *avctx) ++{ ++ TonemapOpenCLContext *ctx = avctx->priv; ++ float *pbuf = NULL; ++ float coeffs_data[8][4] = {0}; ++ float mmr_packed_data[8*6][4] = {0}; ++ int c, i, j, k, err; ++ cl_int cle; ++ ++ pbuf = (float *)clEnqueueMapBuffer(ctx->command_queue, ctx->dovi_buf, ++ CL_TRUE, CL_MAP_WRITE, 0, ++ 3*(params_sz+pivots_sz+coeffs_sz+mmr_sz), ++ 0, NULL, NULL, &cle); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to map dovi buf: %d.\n", cle); ++ ++ av_assert0(pbuf); ++ ++ for (c = 0; c < 3; c++) { ++ int has_poly = 0, has_mmr = 0, mmr_single = 1; ++ int mmr_idx = 0, min_order = 3, max_order = 1; ++ const struct ReshapeData *comp = &ctx->dovi->comp[c]; ++ if (!comp->num_pivots) ++ continue; ++ av_assert0(comp->num_pivots >= 2 && comp->num_pivots <= 9); ++ ++ memset(coeffs_data, 0, sizeof(coeffs_data)); ++ for (i = 0; i < comp->num_pivots - 1; i++) { ++ switch (comp->method[i]) { ++ case 0: // polynomial ++ has_poly = 1; ++ coeffs_data[i][3] = 0.0f; // order=0 signals polynomial ++ for (k = 0; k < 3; k++) ++ coeffs_data[i][k] = comp->poly_coeffs[i][k]; ++ break; ++ case 1: ++ min_order = FFMIN(min_order, comp->mmr_order[i]); ++ max_order = FFMAX(max_order, comp->mmr_order[i]); ++ mmr_single = !has_mmr; ++ has_mmr = 1; ++ coeffs_data[i][3] = (float)comp->mmr_order[i]; ++ coeffs_data[i][0] = comp->mmr_constant[i]; ++ coeffs_data[i][1] = (float)mmr_idx; ++ for (j = 0; j < comp->mmr_order[i]; j++) { ++ // store weights per order as two packed vec4s ++ float *mmr = &mmr_packed_data[mmr_idx][0]; ++ mmr[0] = comp->mmr_coeffs[i][j][0]; ++ mmr[1] = comp->mmr_coeffs[i][j][1]; ++ mmr[2] = comp->mmr_coeffs[i][j][2]; ++ mmr[3] = 0.0f; // unused ++ mmr[4] = comp->mmr_coeffs[i][j][3]; ++ mmr[5] = comp->mmr_coeffs[i][j][4]; ++ mmr[6] = comp->mmr_coeffs[i][j][5]; ++ mmr[7] = comp->mmr_coeffs[i][j][6]; ++ mmr_idx += 2; ++ } ++ break; ++ default: ++ av_assert0(0); ++ } ++ } ++ ++ av_assert0(has_poly || has_mmr); ++ ++ if (has_mmr) ++ av_assert0(min_order <= max_order); ++ ++ // dovi_params ++ { ++ float params[8] = { ++ comp->num_pivots, !!has_mmr, !!has_poly, ++ mmr_single, min_order, max_order, ++ comp->pivots[0], comp->pivots[comp->num_pivots - 1] ++ }; ++ memcpy(pbuf + c*params_cnt, params, params_sz); ++ } ++ ++ // dovi_pivots ++ if (c == 0 && comp->num_pivots > 2) { ++ // Skip the (irrelevant) lower and upper bounds ++ float pivots_data[7+1] = {0}; ++ memcpy(pivots_data, comp->pivots + 1, ++ (comp->num_pivots - 2) * sizeof(pivots_data[0])); ++ // Fill the remainder with a quasi-infinite sentinel pivot ++ for (i = comp->num_pivots - 2; i < FF_ARRAY_ELEMS(pivots_data); i++) ++ pivots_data[i] = 1e9f; ++ memcpy(pbuf + 3*params_cnt + c*pivots_cnt, pivots_data, pivots_sz); ++ } ++ ++ // dovi_coeffs ++ memcpy(pbuf + 3*(params_cnt+pivots_cnt) + c*coeffs_cnt, &coeffs_data[0], coeffs_sz); ++ ++ // dovi_mmr ++ if (has_mmr) ++ memcpy(pbuf + 3*(params_cnt+pivots_cnt+coeffs_cnt) + c*mmr_cnt, &mmr_packed_data[0], mmr_sz); ++ } ++ ++ cle = clEnqueueUnmapMemObject(ctx->command_queue, ctx->dovi_buf, pbuf, 0, NULL, NULL); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to unmap dovi buf: %d.\n", cle); ++ ++fail: ++ return cle; ++} ++ ++static char *check_opencl_device_str(cl_device_id device_id, ++ cl_device_info key) ++{ ++ char *str; ++ size_t size; ++ cl_int cle; ++ cle = clGetDeviceInfo(device_id, key, 0, NULL, &size); ++ if (cle != CL_SUCCESS) ++ return NULL; ++ str = av_malloc(size); ++ if (!str) ++ return NULL; ++ cle = clGetDeviceInfo(device_id, key, size, str, &size); ++ if (cle != CL_SUCCESS) { ++ av_free(str); ++ return NULL; ++ } ++ av_assert0(strlen(str) + 1== size); ++ return str; ++} + + static int tonemap_opencl_init(AVFilterContext *avctx) + { + TonemapOpenCLContext *ctx = avctx->priv; ++ AVBPrint header; ++ const char *opencl_sources[OPENCL_SOURCE_NB]; ++ size_t m_origin[3] = {0}; ++ size_t m_region[3] = {ff_fruit_dither_size, ff_fruit_dither_size, 1}; ++ size_t m_row_pitch = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]); + int rgb2rgb_passthrough = 1; + double rgb2rgb[3][3], rgb2yuv[3][3], yuv2rgb[3][3]; + const AVLumaCoefficients *luma_src, *luma_dst; ++ cl_event event = NULL; ++ cl_bool device_is_integrated; ++ cl_uint max_compute_units, device_vendor_id; + cl_int cle; +- int err; +- AVBPrint header; +- const char *opencl_sources[OPENCL_SOURCE_NB]; +- +- av_bprint_init(&header, 1024, AV_BPRINT_SIZE_AUTOMATIC); ++ cl_mem_flags dovi_buf_flags = CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR; ++ char *device_vendor = NULL; ++ char *device_name = NULL; ++ char *device_exts = NULL; ++ int i, j, err; + + switch(ctx->tonemap) { + case TONEMAP_GAMMA: +@@ -144,48 +359,170 @@ static int tonemap_opencl_init(AVFilterC + if (isnan(ctx->param)) + ctx->param = 1.0f; + ++ ctx->ref_white = ctx->tonemap == TONEMAP_BT2390 ? REFERENCE_WHITE_ALT ++ : REFERENCE_WHITE; ++ ++ if (ctx->tonemap == TONEMAP_BT2390 && ctx->peak) ++ ctx->peak = FFMAX(ctx->peak / 10.0f, 1.1f); ++ + // SDR peak is 1.0f + ctx->target_peak = 1.0f; +- av_log(ctx, AV_LOG_DEBUG, "tone mapping transfer from %s to %s\n", ++ ++ cle = clGetDeviceInfo(ctx->ocf.hwctx->device_id, CL_DEVICE_VENDOR_ID, ++ sizeof(cl_uint), &device_vendor_id, ++ NULL); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to check OpenCL " ++ "device vendor id %d.\n", cle); ++ ++#ifndef CL_MEM_FORCE_HOST_MEMORY_INTEL ++ #define CL_MEM_FORCE_HOST_MEMORY_INTEL (1 << 20) ++#endif ++ // zero-copy buffer requires this extension on Intel dGPUs ++ if (device_vendor_id == 0x8086) { ++ device_exts = check_opencl_device_str(ctx->ocf.hwctx->device_id, CL_DEVICE_EXTENSIONS); ++ if (device_exts && strstr(device_exts, "cl_intel_mem_force_host_memory")) ++ dovi_buf_flags |= CL_MEM_FORCE_HOST_MEMORY_INTEL; ++ if (device_exts) ++ av_free(device_exts); ++ } ++ ++ if (ctx->tradeoff == -1) { ++ ctx->tradeoff = 1; ++ cle = clGetDeviceInfo(ctx->ocf.hwctx->device_id, CL_DEVICE_HOST_UNIFIED_MEMORY, ++ sizeof(cl_bool), &device_is_integrated, ++ NULL); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to check if OpenCL " ++ "device is integrated %d.\n", cle); ++ cle = clGetDeviceInfo(ctx->ocf.hwctx->device_id, CL_DEVICE_MAX_COMPUTE_UNITS, ++ sizeof(cl_uint), &max_compute_units, ++ NULL); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to check OpenCL " ++ "device max compute units %d.\n", cle); ++ if (device_vendor_id == 0x8086 && device_is_integrated == CL_TRUE) { ++ if (max_compute_units >= 40) ++ ctx->tradeoff = 0; ++ if (device_name = check_opencl_device_str(ctx->ocf.hwctx->device_id, CL_DEVICE_NAME)) { ++ const char *excluded_devices[5] = { "Arc", "Iris", "Xe", "770", "750" }; ++ for (i = 0; i < FF_ARRAY_ELEMS(excluded_devices); i++) { ++ if (strstr(device_name, excluded_devices[i])) { ++ ctx->tradeoff = 0; ++ break; ++ } ++ } ++ av_free(device_name); ++ } ++ } else if (device_is_integrated == CL_TRUE) { ++ device_vendor = check_opencl_device_str(ctx->ocf.hwctx->device_id, CL_DEVICE_VENDOR); ++ device_name = check_opencl_device_str(ctx->ocf.hwctx->device_id, CL_DEVICE_NAME); ++ if (!strstr(device_vendor, "ARM") && ++ !strstr(device_name, "Mali")) ++ ctx->tradeoff = 0; ++ av_free(device_vendor); ++ av_free(device_name); ++ } else { ++ ctx->tradeoff = 0; ++ } ++ ++ if (!ctx->tradeoff) ++ av_log(avctx, AV_LOG_DEBUG, "Disabled tradeoffs on high performance device.\n"); ++ } ++ ++ if (ctx->tonemap_mode == TONEMAP_MODE_AUTO) { ++ if (ctx->tradeoff) { ++ ctx->tonemap_mode = TONEMAP_MODE_LUM; ++ } else { ++ ctx->tonemap_mode = TONEMAP_MODE_ITP; ++ } ++ } ++ ++ av_log(ctx, AV_LOG_DEBUG, "Tonemapping transfer from %s to %s\n", + av_color_transfer_name(ctx->trc_in), + av_color_transfer_name(ctx->trc_out)); +- av_log(ctx, AV_LOG_DEBUG, "mapping colorspace from %s to %s\n", +- av_color_space_name(ctx->colorspace_in), ++ av_log(ctx, AV_LOG_DEBUG, "Mapping colorspace from %s to %s\n", ++ ctx->dovi ? "dolby_vision" : av_color_space_name(ctx->colorspace_in), + av_color_space_name(ctx->colorspace_out)); +- av_log(ctx, AV_LOG_DEBUG, "mapping primaries from %s to %s\n", ++ av_log(ctx, AV_LOG_DEBUG, "Mapping primaries from %s to %s\n", + av_color_primaries_name(ctx->primaries_in), + av_color_primaries_name(ctx->primaries_out)); +- av_log(ctx, AV_LOG_DEBUG, "mapping range from %s to %s\n", ++ av_log(ctx, AV_LOG_DEBUG, "Mapping range from %s to %s\n", + av_color_range_name(ctx->range_in), + av_color_range_name(ctx->range_out)); +- // checking valid value just because of limited implementaion +- // please remove when more functionalities are implemented ++ + av_assert0(ctx->trc_out == AVCOL_TRC_BT709 || +- ctx->trc_out == AVCOL_TRC_BT2020_10); ++ ctx->trc_out == AVCOL_TRC_BT2020_10 || ++ ctx->trc_out == AVCOL_TRC_SMPTE2084); ++ + av_assert0(ctx->trc_in == AVCOL_TRC_SMPTE2084|| + ctx->trc_in == AVCOL_TRC_ARIB_STD_B67); +- av_assert0(ctx->colorspace_in == AVCOL_SPC_BT2020_NCL || ++ av_assert0(ctx->dovi || ++ ctx->colorspace_in == AVCOL_SPC_BT2020_NCL || + ctx->colorspace_in == AVCOL_SPC_BT709); + av_assert0(ctx->primaries_in == AVCOL_PRI_BT2020 || + ctx->primaries_in == AVCOL_PRI_BT709); + +- av_bprintf(&header, "__constant const float tone_param = %.4ff;\n", ++ if (ctx->trc_out == AVCOL_TRC_SMPTE2084) { ++ int is_10_or_16b_out = ctx->out_desc->comp[0].depth == 10 || ++ ctx->out_desc->comp[0].depth == 16; ++ if (!(is_10_or_16b_out && ++ ctx->primaries_out == AVCOL_PRI_BT2020 && ++ ctx->colorspace_out == AVCOL_SPC_BT2020_NCL)) { ++ av_log(avctx, AV_LOG_ERROR, "HDR passthrough requires BT.2020 " ++ "colorspace and 10/16 bit output format depth.\n"); ++ return AVERROR(EINVAL); ++ } ++ } ++ ++ av_bprint_init(&header, 2048, AV_BPRINT_SIZE_UNLIMITED); ++ ++ av_bprintf(&header, "__constant float ref_white = %.4ff;\n", ++ ctx->ref_white); ++ av_bprintf(&header, "__constant float tone_param = %.4ff;\n", + ctx->param); +- av_bprintf(&header, "__constant const float desat_param = %.4ff;\n", ++ av_bprintf(&header, "__constant float desat_param = %.4ff;\n", + ctx->desat_param); +- av_bprintf(&header, "__constant const float target_peak = %.4ff;\n", ++ av_bprintf(&header, "__constant float target_peak = %.4ff;\n", + ctx->target_peak); +- av_bprintf(&header, "__constant const float sdr_avg = %.4ff;\n", sdr_avg); +- av_bprintf(&header, "__constant const float scene_threshold = %.4ff;\n", ++ av_bprintf(&header, "__constant float scene_threshold = %.4ff;\n", + ctx->scene_threshold); ++ ++ av_bprintf(&header, "__constant float pq_max_lum_div_ref_white = %ff;\n", ++ (ST2084_MAX_LUMINANCE / ctx->ref_white)); ++ av_bprintf(&header, "__constant float ref_white_div_pq_max_lum = %ff;\n", ++ (ctx->ref_white / ST2084_MAX_LUMINANCE)); ++ + av_bprintf(&header, "#define TONE_FUNC %s\n", tonemap_func[ctx->tonemap]); +- av_bprintf(&header, "#define DETECTION_FRAMES %d\n", DETECTION_FRAMES); ++ if (ctx->tonemap == TONEMAP_BT2390) ++ av_bprintf(&header, "#define TONE_FUNC_BT2390\n"); ++ ++ if (ctx->tonemap_mode == TONEMAP_MODE_RGB) { ++ av_bprintf(&header, "#define TONE_MODE_RGB\n"); ++ av_bprintf(&header, "#define MAP_IN_DST_SPACE\n"); ++ } ++ else if (ctx->tonemap_mode == TONEMAP_MODE_MAX) { ++ av_bprintf(&header, "#define TONE_MODE_MAX\n"); ++ av_bprintf(&header, "#define MAP_IN_DST_SPACE\n"); ++ } ++ else if (ctx->tonemap_mode == TONEMAP_MODE_ITP) ++ av_bprintf(&header, "#define TONE_MODE_ITP\n"); ++ ++ if (ctx->in_planes > 2) ++ av_bprintf(&header, "#define NON_SEMI_PLANAR_IN\n"); ++ ++ if (ctx->out_planes > 2) ++ av_bprintf(&header, "#define NON_SEMI_PLANAR_OUT\n"); ++ ++ if (ctx->in_desc->comp[0].depth > ctx->out_desc->comp[0].depth) { ++ av_bprintf(&header, "#define ENABLE_DITHER\n"); ++ av_bprintf(&header, "__constant float dither_size2 = %.1ff;\n", (float)(ff_fruit_dither_size * ff_fruit_dither_size)); ++ av_bprintf(&header, "__constant float dither_quantization = %.1ff;\n", (float)((1 << ctx->out_desc->comp[0].depth) - 1)); ++ } + + if (ctx->primaries_out != ctx->primaries_in) { + if ((err = get_rgb2rgb_matrix(ctx->primaries_in, ctx->primaries_out, rgb2rgb)) < 0) + goto fail; + rgb2rgb_passthrough = 0; + } ++ + if (ctx->range_in == AVCOL_RANGE_JPEG) + av_bprintf(&header, "#define FULL_RANGE_IN\n"); + +@@ -199,19 +536,41 @@ static int tonemap_opencl_init(AVFilterC + else + ff_opencl_print_const_matrix_3x3(&header, "rgb2rgb", rgb2rgb); + ++ if (ctx->trc_out == AVCOL_TRC_SMPTE2084) ++ av_bprintf(&header, "#define SKIP_TONEMAP\n"); + +- luma_src = av_csp_luma_coeffs_from_avcsp(ctx->colorspace_in); +- if (!luma_src) { +- err = AVERROR(EINVAL); +- av_log(avctx, AV_LOG_ERROR, "unsupported input colorspace %d (%s)\n", +- ctx->colorspace_in, av_color_space_name(ctx->colorspace_in)); +- goto fail; ++ if (ctx->dovi) { ++ double ycc2rgb_offset[3] = {0}; ++ double lms2rgb[3][3]; ++ av_bprintf(&header, "#define DOVI_RESHAPE\n"); ++ if (ctx->tradeoff) ++ av_bprintf(&header, "#define DOVI_PERF_TRADEOFF\n"); ++ for (i = 0; i < 3; i++) { ++ for (j = 0; j < 3; j++) ++ ycc2rgb_offset[i] -= ctx->dovi->nonlinear[i][j] * ctx->dovi->nonlinear_offset[j]; ++ } ++ av_bprintf(&header, "__constant float3 ycc2rgb_offset = {%ff, %ff, %ff};\n", ++ ycc2rgb_offset[0], ycc2rgb_offset[1], ycc2rgb_offset[2]); ++ ff_matrix_mul_3x3(lms2rgb, dovi_lms2rgb_matrix, ctx->dovi->linear); ++ ff_opencl_print_const_matrix_3x3(&header, "rgb_matrix", ctx->dovi->nonlinear); //ycc2rgb ++ ff_opencl_print_const_matrix_3x3(&header, "lms2rgb_matrix", lms2rgb); //lms2rgb ++ } else { ++ luma_src = av_csp_luma_coeffs_from_avcsp(ctx->colorspace_in); ++ if (!luma_src) { ++ err = AVERROR(EINVAL); ++ av_log(avctx, AV_LOG_ERROR, "Unsupported input colorspace %d (%s)\n", ++ ctx->colorspace_in, av_color_space_name(ctx->colorspace_in)); ++ goto fail; ++ } ++ ff_fill_rgb2yuv_table(luma_src, rgb2yuv); ++ ff_matrix_invert_3x3(rgb2yuv, yuv2rgb); ++ ff_opencl_print_const_matrix_3x3(&header, "rgb_matrix", yuv2rgb); + } + + luma_dst = av_csp_luma_coeffs_from_avcsp(ctx->colorspace_out); + if (!luma_dst) { + err = AVERROR(EINVAL); +- av_log(avctx, AV_LOG_ERROR, "unsupported output colorspace %d (%s)\n", ++ av_log(avctx, AV_LOG_ERROR, "Unsupported output colorspace %d (%s)\n", + ctx->colorspace_out, av_color_space_name(ctx->colorspace_out)); + goto fail; + } +@@ -219,24 +578,23 @@ static int tonemap_opencl_init(AVFilterC + ff_fill_rgb2yuv_table(luma_dst, rgb2yuv); + ff_opencl_print_const_matrix_3x3(&header, "yuv_matrix", rgb2yuv); + +- ff_fill_rgb2yuv_table(luma_src, rgb2yuv); +- ff_matrix_invert_3x3(rgb2yuv, yuv2rgb); +- ff_opencl_print_const_matrix_3x3(&header, "rgb_matrix", yuv2rgb); +- +- av_bprintf(&header, "constant float3 luma_src = {%.4ff, %.4ff, %.4ff};\n", +- av_q2d(luma_src->cr), av_q2d(luma_src->cg), av_q2d(luma_src->cb)); +- av_bprintf(&header, "constant float3 luma_dst = {%.4ff, %.4ff, %.4ff};\n", ++ av_bprintf(&header, "__constant float3 luma_dst = {%ff, %ff, %ff};\n", + av_q2d(luma_dst->cr), av_q2d(luma_dst->cg), av_q2d(luma_dst->cb)); + +- av_bprintf(&header, "#define linearize %s\n", linearize_funcs[ctx->trc_in]); +- av_bprintf(&header, "#define delinearize %s\n", +- delinearize_funcs[ctx->trc_out]); +- +- if (ctx->trc_in == AVCOL_TRC_ARIB_STD_B67) +- av_bprintf(&header, "#define ootf_impl ootf_hlg\n"); +- +- if (ctx->trc_out == AVCOL_TRC_ARIB_STD_B67) +- av_bprintf(&header, "#define inverse_ootf_impl inverse_ootf_hlg\n"); ++ if (ctx->tradeoff) { ++ av_bprintf(&header, "#define LUT_TRC %d\n", LUT_SIZE - 1); ++ if (ctx->trc_out != AVCOL_TRC_SMPTE2084) { ++ av_bprintf(&header, "#define linearize %s\n", "linearize_lut"); ++ av_bprintf(&header, "#define delinearize %s\n", delinearize_funcs[ctx->trc_out]); ++ } ++ if (!ctx->lin_lut) ++ if ((err = compute_trc_luts(avctx)) < 0) ++ goto fail; ++ print_opencl_const_trc_luts(avctx, &header); ++ } else if (ctx->trc_out != AVCOL_TRC_SMPTE2084) { ++ av_bprintf(&header, "#define linearize %s\n", linearize_funcs[ctx->trc_in]); ++ av_bprintf(&header, "#define delinearize %s\n", delinearize_funcs[ctx->trc_out]); ++ } + + av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str); + opencl_sources[0] = header.str; +@@ -254,46 +612,171 @@ static int tonemap_opencl_init(AVFilterC + CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL " + "command queue %d.\n", cle); + ++ if (ctx->in_desc->comp[0].depth > ctx->out_desc->comp[0].depth) { ++ cl_image_format image_format = { ++ .image_channel_data_type = CL_UNORM_INT16, ++ .image_channel_order = CL_R, ++ }; ++ cl_image_desc image_desc = { ++ .image_type = CL_MEM_OBJECT_IMAGE2D, ++ .image_width = ff_fruit_dither_size, ++ .image_height = ff_fruit_dither_size, ++ .image_row_pitch = 0, ++ }; ++ ++ av_assert0(sizeof(ff_fruit_dither_matrix) == sizeof(ff_fruit_dither_matrix[0]) * ff_fruit_dither_size * ff_fruit_dither_size); ++ ++ ctx->dither_image = clCreateImage(ctx->ocf.hwctx->context, CL_MEM_READ_ONLY, ++ &image_format, &image_desc, NULL, &cle); ++ if (!ctx->dither_image) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create image for " ++ "dither matrix: %d.\n", cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ cle = clEnqueueWriteImage(ctx->command_queue, ++ ctx->dither_image, ++ CL_FALSE, m_origin, m_region, ++ m_row_pitch, 0, ++ ff_fruit_dither_matrix, ++ 0, NULL, &event); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue write of dither matrix image: %d.\n", cle); ++ ++ cle = clWaitForEvents(1, &event); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to wait for event completion: %d.\n", cle); ++ } ++ + ctx->kernel = clCreateKernel(ctx->ocf.program, "tonemap", &cle); + CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle); + +- ctx->util_mem = +- clCreateBuffer(ctx->ocf.hwctx->context, 0, +- (2 * DETECTION_FRAMES + 7) * sizeof(unsigned), +- NULL, &cle); +- CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create util buffer: %d.\n", cle); ++ if (ctx->dovi) { ++ CL_CREATE_BUFFER_FLAGS(ctx, dovi_buf, dovi_buf_flags, ++ 3*(params_sz+pivots_sz+coeffs_sz+mmr_sz), NULL); ++ } + + ctx->initialised = 1; + return 0; + + fail: + av_bprint_finalize(&header, NULL); +- if (ctx->util_mem) +- clReleaseMemObject(ctx->util_mem); ++ if (ctx->dovi_buf) ++ clReleaseMemObject(ctx->dovi_buf); + if (ctx->command_queue) + clReleaseCommandQueue(ctx->command_queue); + if (ctx->kernel) + clReleaseKernel(ctx->kernel); ++ if (event) ++ clReleaseEvent(event); ++ if (ctx->dither_image) ++ clReleaseMemObject(ctx->dither_image); ++ if (ctx->lin_lut) ++ av_freep(&ctx->lin_lut); + return err; + } + ++static av_cold void tonemap_opencl_uninit_dovi(AVFilterContext *avctx) ++{ ++ TonemapOpenCLContext *ctx = avctx->priv; ++ cl_int cle; ++ ++ if (ctx->dovi) ++ av_freep(&ctx->dovi); ++ ++ if (ctx->dovi_buf) { ++ cle = clReleaseMemObject(ctx->dovi_buf); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "dovi buf: %d.\n", cle); ++ } ++ ++ ctx->init_with_dovi = 0; ++} ++ ++static av_cold void tonemap_opencl_uninit_common(AVFilterContext *avctx) ++{ ++ TonemapOpenCLContext *ctx = avctx->priv; ++ cl_int cle; ++ ++ if (ctx->lin_lut) ++ av_freep(&ctx->lin_lut); ++ ++ if (ctx->kernel) { ++ cle = clReleaseKernel(ctx->kernel); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "kernel: %d.\n", cle); ++ } ++ ++ if (ctx->dither_image) { ++ cle = clReleaseMemObject(ctx->dither_image); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "dither image: %d.\n", cle); ++ } ++ ++ if (ctx->command_queue) { ++ cle = clReleaseCommandQueue(ctx->command_queue); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "command queue: %d.\n", cle); ++ } ++ ++ ctx->initialised = 0; ++} ++ ++static int format_is_supported(enum AVPixelFormat fmt) ++{ ++ for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) ++ if (supported_formats[i] == fmt) ++ return 1; ++ return 0; ++} ++ + static int tonemap_opencl_config_output(AVFilterLink *outlink) + { +- AVFilterContext *avctx = outlink->src; +- TonemapOpenCLContext *s = avctx->priv; ++ AVFilterContext *avctx = outlink->src; ++ AVFilterLink *inlink = avctx->inputs[0]; ++ TonemapOpenCLContext *ctx = avctx->priv; ++ AVHWFramesContext *in_frames_ctx; ++ enum AVPixelFormat in_format; ++ enum AVPixelFormat out_format; ++ const AVPixFmtDescriptor *in_desc; ++ const AVPixFmtDescriptor *out_desc; + int ret; +- if (s->format == AV_PIX_FMT_NONE) +- av_log(avctx, AV_LOG_WARNING, "format not set, use default format NV12\n"); +- else { +- if (s->format != AV_PIX_FMT_P010 && +- s->format != AV_PIX_FMT_NV12) { +- av_log(avctx, AV_LOG_ERROR, "unsupported output format," +- "only p010/nv12 supported now\n"); ++ ++ if (!inlink->hw_frames_ctx) + return AVERROR(EINVAL); +- } ++ in_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; ++ in_format = in_frames_ctx->sw_format; ++ out_format = (ctx->format == AV_PIX_FMT_NONE) ? in_format : ctx->format; ++ in_desc = av_pix_fmt_desc_get(in_format); ++ out_desc = av_pix_fmt_desc_get(out_format); ++ ++ if (!format_is_supported(in_format)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n", ++ av_get_pix_fmt_name(in_format)); ++ return AVERROR(ENOSYS); + } ++ if (!format_is_supported(out_format)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n", ++ av_get_pix_fmt_name(out_format)); ++ return AVERROR(ENOSYS); ++ } ++ if (in_desc->comp[0].depth != 10 && in_desc->comp[0].depth != 16) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported input format depth: %d\n", ++ in_desc->comp[0].depth); ++ return AVERROR(ENOSYS); ++ } ++ ++ ctx->in_fmt = in_format; ++ ctx->out_fmt = out_format; ++ ctx->in_desc = in_desc; ++ ctx->out_desc = out_desc; ++ ctx->in_planes = av_pix_fmt_count_planes(in_format); ++ ctx->out_planes = av_pix_fmt_count_planes(out_format); ++ ctx->ocf.output_format = out_format; + +- s->ocf.output_format = s->format == AV_PIX_FMT_NONE ? AV_PIX_FMT_NV12 : s->format; + ret = ff_opencl_filter_config_output(outlink); + if (ret < 0) + return ret; +@@ -308,13 +791,46 @@ static int launch_kernel(AVFilterContext + size_t global_work[2]; + size_t local_work[2]; + cl_int cle; ++ int idx_arg; ++ ++ if (!output->data[0] || !input->data[0] || !output->data[1] || !input->data[1]) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ if (ctx->out_planes > 2 && !output->data[2]) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ if (ctx->in_planes > 2 && !input->data[2]) { ++ err = AVERROR(EIO); ++ goto fail; ++ } + + CL_SET_KERNEL_ARG(kernel, 0, cl_mem, &output->data[0]); + CL_SET_KERNEL_ARG(kernel, 1, cl_mem, &input->data[0]); + CL_SET_KERNEL_ARG(kernel, 2, cl_mem, &output->data[1]); + CL_SET_KERNEL_ARG(kernel, 3, cl_mem, &input->data[1]); +- CL_SET_KERNEL_ARG(kernel, 4, cl_mem, &ctx->util_mem); +- CL_SET_KERNEL_ARG(kernel, 5, cl_float, &peak); ++ ++ idx_arg = 4; ++ if (ctx->out_planes > 2) { ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &output->data[2]); ++ } ++ ++ if (ctx->in_planes > 2) { ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &input->data[2]); ++ } ++ ++ if (ctx->dither_image) { ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &ctx->dither_image); ++ } ++ ++ if (ctx->dovi_buf) { ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &ctx->dovi_buf); ++ } ++ ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_float, &peak); + + local_work[0] = 16; + local_work[1] = 16; +@@ -338,13 +854,10 @@ static int tonemap_opencl_filter_frame(A + AVFilterContext *avctx = inlink->dst; + AVFilterLink *outlink = avctx->outputs[0]; + TonemapOpenCLContext *ctx = avctx->priv; ++ AVFrameSideData *dovi_sd = NULL; + AVFrame *output = NULL; + cl_int cle; + int err; +- double peak = ctx->peak; +- +- AVHWFramesContext *input_frames_ctx = +- (AVHWFramesContext*)input->hw_frames_ctx->data; + + av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n", + av_get_pix_fmt_name(input->format), +@@ -363,9 +876,6 @@ static int tonemap_opencl_filter_frame(A + if (err < 0) + goto fail; + +- if (!peak) +- peak = ff_determine_signal_peak(input); +- + if (ctx->trc != -1) + output->color_trc = ctx->trc; + if (ctx->primaries != -1) +@@ -385,72 +895,92 @@ static int tonemap_opencl_filter_frame(A + ctx->range_out = output->color_range; + ctx->chroma_loc = output->chroma_location; + +- if (!ctx->initialised) { +- if (!(input->color_trc == AVCOL_TRC_SMPTE2084 || +- input->color_trc == AVCOL_TRC_ARIB_STD_B67)) { +- av_log(ctx, AV_LOG_ERROR, "unsupported transfer function characteristic.\n"); ++ if (ctx->apply_dovi) ++ dovi_sd = av_frame_get_side_data(input, AV_FRAME_DATA_DOVI_METADATA); ++ ++ // check DOVI->HDR10/HLG ++ if (!dovi_sd) { ++ if (input->color_trc != AVCOL_TRC_SMPTE2084 && ++ input->color_trc != AVCOL_TRC_ARIB_STD_B67) { ++ av_log(ctx, AV_LOG_ERROR, "No DOVI metadata and " ++ "unsupported transfer function characteristic: %s\n", ++ av_color_transfer_name(input->color_trc)); + err = AVERROR(ENOSYS); + goto fail; + } ++ } + +- if (input_frames_ctx->sw_format != AV_PIX_FMT_P010) { +- av_log(ctx, AV_LOG_ERROR, "unsupported format in tonemap_opencl.\n"); +- err = AVERROR(ENOSYS); +- goto fail; ++ if (!ctx->peak) { ++ if (dovi_sd) { ++ const AVDOVIMetadata *metadata = (AVDOVIMetadata *) dovi_sd->data; ++ ctx->peak = ff_determine_dovi_signal_peak(metadata); ++ } else { ++ ctx->peak = ff_determine_signal_peak(input); + } ++ av_log(ctx, AV_LOG_DEBUG, "Computed signal peak: %f\n", ctx->peak); ++ } + ++ if (dovi_sd) { ++ const AVDOVIMetadata *metadata = (AVDOVIMetadata *) dovi_sd->data; ++ const AVDOVIRpuDataHeader *rpu = av_dovi_get_header(metadata); ++ // only map dovi rpus that don't require an EL ++ if (rpu->disable_residual_flag) { ++ struct DoviMetadata *dovi = av_malloc(sizeof(*dovi)); ++ ctx->dovi = dovi; ++ if (!ctx->dovi) ++ goto fail; ++ ++ ff_map_dovi_metadata(ctx->dovi, metadata); ++ ctx->trc_in = AVCOL_TRC_SMPTE2084; ++ ctx->colorspace_in = AVCOL_SPC_UNSPECIFIED; ++ ctx->primaries_in = AVCOL_PRI_BT2020; ++ } ++ } ++ ++ if (!ctx->init_with_dovi && ctx->dovi && ctx->initialised) ++ tonemap_opencl_uninit_common(avctx); ++ ++ if (!ctx->initialised) { + err = tonemap_opencl_init(avctx); + if (err < 0) + goto fail; ++ ++ ctx->init_with_dovi = !!ctx->dovi; + } + +- switch(input_frames_ctx->sw_format) { +- case AV_PIX_FMT_P010: +- err = launch_kernel(avctx, ctx->kernel, output, input, peak); +- if (err < 0) goto fail; +- break; +- default: +- err = AVERROR(ENOSYS); +- goto fail; ++ if (ctx->dovi) { ++ cle = tonemap_opencl_update_dovi_buf(avctx); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to update dovi buf: %d.\n", cle); ++ av_freep(&ctx->dovi); + } + ++ err = launch_kernel(avctx, ctx->kernel, output, input, ctx->peak); ++ if (err < 0) ++ goto fail; ++ + cle = clFinish(ctx->command_queue); + CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle); + + av_frame_free(&input); + +- ff_update_hdr_metadata(output, ctx->target_peak); ++ if (ctx->trc_out != AVCOL_TRC_SMPTE2084) { ++ av_frame_remove_side_data(output, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); ++ av_frame_remove_side_data(output, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); ++ } ++ ++ av_frame_remove_side_data(output, AV_FRAME_DATA_DOVI_RPU_BUFFER); ++ av_frame_remove_side_data(output, AV_FRAME_DATA_DOVI_METADATA); + +- av_log(ctx, AV_LOG_DEBUG, "Tone-mapping output: %s, %ux%u (%"PRId64").\n", ++ av_log(ctx, AV_LOG_DEBUG, "Tonemapping output: %s, %ux%u (%"PRId64").\n", + av_get_pix_fmt_name(output->format), + output->width, output->height, output->pts); +-#ifndef NDEBUG +- { +- uint32_t *ptr, *max_total_p, *avg_total_p, *frame_number_p; +- float peak_detected, avg_detected; +- unsigned map_size = (2 * DETECTION_FRAMES + 7) * sizeof(unsigned); +- ptr = (void *)clEnqueueMapBuffer(ctx->command_queue, ctx->util_mem, +- CL_TRUE, CL_MAP_READ, 0, map_size, +- 0, NULL, NULL, &cle); +- // For the layout of the util buffer, refer tonemap.cl +- if (ptr) { +- max_total_p = ptr + 2 * (DETECTION_FRAMES + 1) + 1; +- avg_total_p = max_total_p + 1; +- frame_number_p = avg_total_p + 2; +- peak_detected = (float)*max_total_p / (REFERENCE_WHITE * (*frame_number_p)); +- avg_detected = (float)*avg_total_p / (REFERENCE_WHITE * (*frame_number_p)); +- av_log(ctx, AV_LOG_DEBUG, "peak %f, avg %f will be used for next frame\n", +- peak_detected, avg_detected); +- clEnqueueUnmapMemObject(ctx->command_queue, ctx->util_mem, ptr, 0, +- NULL, NULL); +- } +- } +-#endif + + return ff_filter_frame(outlink, output); + + fail: + clFinish(ctx->command_queue); ++ if (ctx->dovi) ++ av_freep(&ctx->dovi); + av_frame_free(&input); + av_frame_free(&output); + return err; +@@ -458,24 +988,9 @@ fail: + + static av_cold void tonemap_opencl_uninit(AVFilterContext *avctx) + { +- TonemapOpenCLContext *ctx = avctx->priv; +- cl_int cle; ++ tonemap_opencl_uninit_common(avctx); + +- if (ctx->util_mem) +- clReleaseMemObject(ctx->util_mem); +- if (ctx->kernel) { +- cle = clReleaseKernel(ctx->kernel); +- if (cle != CL_SUCCESS) +- av_log(avctx, AV_LOG_ERROR, "Failed to release " +- "kernel: %d.\n", cle); +- } +- +- if (ctx->command_queue) { +- cle = clReleaseCommandQueue(ctx->command_queue); +- if (cle != CL_SUCCESS) +- av_log(avctx, AV_LOG_ERROR, "Failed to release " +- "command queue: %d.\n", cle); +- } ++ tonemap_opencl_uninit_dovi(avctx); + + ff_opencl_filter_uninit(avctx); + } +@@ -483,37 +998,50 @@ static av_cold void tonemap_opencl_unini + #define OFFSET(x) offsetof(TonemapOpenCLContext, x) + #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) + static const AVOption tonemap_opencl_options[] = { +- { "tonemap", "tonemap algorithm selection", OFFSET(tonemap), AV_OPT_TYPE_INT, {.i64 = TONEMAP_NONE}, TONEMAP_NONE, TONEMAP_MAX - 1, FLAGS, .unit = "tonemap" }, +- { "none", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_NONE}, 0, 0, FLAGS, .unit = "tonemap" }, +- { "linear", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_LINEAR}, 0, 0, FLAGS, .unit = "tonemap" }, +- { "gamma", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_GAMMA}, 0, 0, FLAGS, .unit = "tonemap" }, +- { "clip", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_CLIP}, 0, 0, FLAGS, .unit = "tonemap" }, +- { "reinhard", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_REINHARD}, 0, 0, FLAGS, .unit = "tonemap" }, +- { "hable", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HABLE}, 0, 0, FLAGS, .unit = "tonemap" }, +- { "mobius", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MOBIUS}, 0, 0, FLAGS, .unit = "tonemap" }, +- { "transfer", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, .unit = "transfer" }, +- { "t", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, .unit = "transfer" }, +- { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709}, 0, 0, FLAGS, .unit = "transfer" }, +- { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10}, 0, 0, FLAGS, .unit = "transfer" }, +- { "matrix", "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, .unit = "matrix" }, +- { "m", "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, .unit = "matrix" }, +- { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT709}, 0, 0, FLAGS, .unit = "matrix" }, +- { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_NCL}, 0, 0, FLAGS, .unit = "matrix" }, +- { "primaries", "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, .unit = "primaries" }, +- { "p", "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, .unit = "primaries" }, +- { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT709}, 0, 0, FLAGS, .unit = "primaries" }, +- { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT2020}, 0, 0, FLAGS, .unit = "primaries" }, +- { "range", "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, .unit = "range" }, +- { "r", "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, .unit = "range" }, +- { "tv", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, .unit = "range" }, +- { "pc", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, .unit = "range" }, +- { "limited", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, .unit = "range" }, +- { "full", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, .unit = "range" }, +- { "format", "output pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, INT_MAX, FLAGS, .unit = "fmt" }, +- { "peak", "signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS }, +- { "param", "tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, {.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS }, +- { "desat", "desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0, DBL_MAX, FLAGS }, +- { "threshold", "scene detection threshold", OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.2}, 0, DBL_MAX, FLAGS }, ++ { "tonemap", "Tonemap algorithm selection", OFFSET(tonemap), AV_OPT_TYPE_INT, { .i64 = TONEMAP_NONE }, TONEMAP_NONE, TONEMAP_COUNT - 1, FLAGS, "tonemap" }, ++ { "none", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_NONE }, 0, 0, FLAGS, "tonemap" }, ++ { "linear", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_LINEAR }, 0, 0, FLAGS, "tonemap" }, ++ { "gamma", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_GAMMA }, 0, 0, FLAGS, "tonemap" }, ++ { "clip", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_CLIP }, 0, 0, FLAGS, "tonemap" }, ++ { "reinhard", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_REINHARD }, 0, 0, FLAGS, "tonemap" }, ++ { "hable", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_HABLE }, 0, 0, FLAGS, "tonemap" }, ++ { "mobius", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_MOBIUS }, 0, 0, FLAGS, "tonemap" }, ++ { "bt2390", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_BT2390 }, 0, 0, FLAGS, "tonemap" }, ++ { "tonemap_mode", "Tonemap mode selection", OFFSET(tonemap_mode), AV_OPT_TYPE_INT, { .i64 = TONEMAP_MODE_AUTO }, TONEMAP_MODE_MAX, TONEMAP_MODE_COUNT - 1, FLAGS, "tonemap_mode" }, ++ { "max", "Brightest channel based tonemap", 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_MODE_MAX }, 0, 0, FLAGS, "tonemap_mode" }, ++ { "rgb", "Per-channel based tonemap", 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_MODE_RGB }, 0, 0, FLAGS, "tonemap_mode" }, ++ { "lum", "Relative luminance based tonemap", 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_MODE_LUM }, 0, 0, FLAGS, "tonemap_mode" }, ++ { "itp", "ICtCp intensity based tonemap", 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_MODE_ITP }, 0, 0, FLAGS, "tonemap_mode" }, ++ { "auto", "Select based on GPU spec", 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_MODE_AUTO }, 0, 0, FLAGS, "tonemap_mode" }, ++ { "transfer", "Set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, { .i64 = AVCOL_TRC_BT709 }, -1, INT_MAX, FLAGS, "transfer" }, ++ { "t", "Set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, { .i64 = AVCOL_TRC_BT709 }, -1, INT_MAX, FLAGS, "transfer" }, ++ { "bt709", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_TRC_BT709 }, 0, 0, FLAGS, "transfer" }, ++ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_TRC_BT2020_10 }, 0, 0, FLAGS, "transfer" }, ++ { "smpte2084", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_TRC_SMPTE2084 }, 0, 0, FLAGS, "transfer" }, ++ { "matrix", "Set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, { .i64 = AVCOL_SPC_BT709 }, -1, INT_MAX, FLAGS, "matrix" }, ++ { "m", "Set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, { .i64 = AVCOL_SPC_BT709 }, -1, INT_MAX, FLAGS, "matrix" }, ++ { "bt709", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_SPC_BT709 }, 0, 0, FLAGS, "matrix" }, ++ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_SPC_BT2020_NCL }, 0, 0, FLAGS, "matrix" }, ++ { "primaries", "Set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, { .i64 = AVCOL_PRI_BT709 }, -1, INT_MAX, FLAGS, "primaries" }, ++ { "p", "Set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, { .i64 = AVCOL_PRI_BT709 }, -1, INT_MAX, FLAGS, "primaries" }, ++ { "bt709", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_PRI_BT709 }, 0, 0, FLAGS, "primaries" }, ++ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_PRI_BT2020 }, 0, 0, FLAGS, "primaries" }, ++ { "range", "Set color range", OFFSET(range), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS, "range" }, ++ { "r", "Set color range", OFFSET(range), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS, "range" }, ++ { "tv", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, ++ { "pc", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, ++ { "limited", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, ++ { "full", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, ++ { "format", "Output pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, { .i64 = AV_PIX_FMT_NONE }, AV_PIX_FMT_NONE, INT_MAX, FLAGS, "fmt" }, ++ { "apply_dovi", "Apply Dolby Vision metadata if possible", OFFSET(apply_dovi), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, ++ { "tradeoff", "Apply tradeoffs to offload computing", OFFSET(tradeoff), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, FLAGS, "tradeoff" }, ++ { "auto", 0, 0, AV_OPT_TYPE_CONST, { .i64 = -1 }, 0, 0, FLAGS, "tradeoff" }, ++ { "disabled", 0, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "tradeoff" }, ++ { "enabled", 0, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "tradeoff" }, ++ { "peak", "Signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, { .dbl = 0 }, 0, DBL_MAX, FLAGS }, ++ { "param", "Tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, DBL_MIN, DBL_MAX, FLAGS }, ++ { "desat", "Desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, { .dbl = 0.5}, 0, DBL_MAX, FLAGS }, ++ { "threshold", "Scene detection threshold", OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, { .dbl = 0.2 }, 0, DBL_MAX, FLAGS }, + { NULL } + }; + diff --git a/cross/ffmpeg7/patches/1008-jellyfin-0008-add-pgs-support-to-opencl-overlay.patch b/cross/ffmpeg7/patches/1008-jellyfin-0008-add-pgs-support-to-opencl-overlay.patch new file mode 100644 index 00000000000..72907bbe525 --- /dev/null +++ b/cross/ffmpeg7/patches/1008-jellyfin-0008-add-pgs-support-to-opencl-overlay.patch @@ -0,0 +1,638 @@ +Index: FFmpeg/libavfilter/opencl/overlay.cl +=================================================================== +--- libavfilter/opencl/overlay.cl ++++ libavfilter/opencl/overlay.cl +@@ -16,15 +16,24 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +-__kernel void overlay_no_alpha(__write_only image2d_t dst, +- __read_only image2d_t main, +- __read_only image2d_t overlay, +- int x_position, +- int y_position) ++__constant sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | ++ CLK_FILTER_NEAREST); ++ ++__kernel void overlay_pass(__write_only image2d_t dst, ++ __read_only image2d_t main) + { +- const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | +- CLK_FILTER_NEAREST); ++ int2 loc = (int2)(get_global_id(0), get_global_id(1)); + ++ float4 val = read_imagef(main, sampler, loc); ++ write_imagef(dst, loc, val); ++} ++ ++__kernel void overlay_noalpha(__write_only image2d_t dst, ++ __read_only image2d_t main, ++ __read_only image2d_t overlay, ++ int x_position, ++ int y_position) ++{ + int2 overlay_size = get_image_dim(overlay); + int2 loc = (int2)(get_global_id(0), get_global_id(1)); + +@@ -41,15 +50,15 @@ __kernel void overlay_no_alpha(__write_o + } + } + +-__kernel void overlay_internal_alpha(__write_only image2d_t dst, +- __read_only image2d_t main, +- __read_only image2d_t overlay, +- int x_position, +- int y_position) ++__kernel void overlay_alpha(__write_only image2d_t dst, ++ __read_only image2d_t main, ++ __read_only image2d_t overlay, ++ __read_only image2d_t alpha, ++ int x_position, ++ int y_position, ++ int alpha_adj_x, ++ int alpha_adj_y) + { +- const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | +- CLK_FILTER_NEAREST); +- + int2 overlay_size = get_image_dim(overlay); + int2 loc = (int2)(get_global_id(0), get_global_id(1)); + +@@ -63,24 +72,50 @@ __kernel void overlay_internal_alpha(__w + int2 loc_overlay = (int2)(x_position, y_position); + float4 in_main = read_imagef(main, sampler, loc); + float4 in_overlay = read_imagef(overlay, sampler, loc - loc_overlay); +- float4 val = in_overlay * in_overlay.w + in_main * (1.0f - in_overlay.w); ++ ++ int2 loc_alpha = (int2)(loc.x * alpha_adj_x, loc.y * alpha_adj_y) - loc_overlay; ++ float4 in_alpha = read_imagef(alpha, sampler, loc_alpha); ++ ++ float4 val = in_overlay * in_alpha.x + in_main * (1.0f - in_alpha.x); + write_imagef(dst, loc, val); + } + } + +-__kernel void overlay_external_alpha(__write_only image2d_t dst, +- __read_only image2d_t main, +- __read_only image2d_t overlay, +- __read_only image2d_t alpha, +- int x_position, +- int y_position, +- int alpha_adj_x, +- int alpha_adj_y) ++__kernel void overlay_noalpha_uv(__write_only image2d_t dst, ++ __read_only image2d_t main, ++ __read_only image2d_t overlay_u, ++ __read_only image2d_t overlay_v, ++ int x_position, ++ int y_position) + { +- const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | +- CLK_FILTER_NEAREST); ++ int2 overlay_size = get_image_dim(overlay_u); ++ int2 loc = (int2)(get_global_id(0), get_global_id(1)); + +- int2 overlay_size = get_image_dim(overlay); ++ if (loc.x < x_position || ++ loc.y < y_position || ++ loc.x >= overlay_size.x + x_position || ++ loc.y >= overlay_size.y + y_position) { ++ float4 val = read_imagef(main, sampler, loc); ++ write_imagef(dst, loc, val); ++ } else { ++ int2 loc_overlay = (int2)(x_position, y_position); ++ float4 val_u = read_imagef(overlay_u, sampler, loc - loc_overlay); ++ float4 val_v = read_imagef(overlay_v, sampler, loc - loc_overlay); ++ write_imagef(dst, loc, (float4)(val_u.x, val_v.x, 0.0f, 1.0f)); ++ } ++} ++ ++__kernel void overlay_alpha_uv(__write_only image2d_t dst, ++ __read_only image2d_t main, ++ __read_only image2d_t overlay_u, ++ __read_only image2d_t overlay_v, ++ __read_only image2d_t alpha, ++ int x_position, ++ int y_position, ++ int alpha_adj_x, ++ int alpha_adj_y) ++{ ++ int2 overlay_size = get_image_dim(overlay_u); + int2 loc = (int2)(get_global_id(0), get_global_id(1)); + + if (loc.x < x_position || +@@ -90,13 +125,14 @@ __kernel void overlay_external_alpha(__w + float4 val = read_imagef(main, sampler, loc); + write_imagef(dst, loc, val); + } else { +- int2 loc_overlay = (int2)(x_position, y_position); +- float4 in_main = read_imagef(main, sampler, loc); +- float4 in_overlay = read_imagef(overlay, sampler, loc - loc_overlay); ++ int2 loc_overlay = (int2)(x_position, y_position); ++ float4 in_main = read_imagef(main, sampler, loc); ++ float4 in_overlay_u = read_imagef(overlay_u, sampler, loc - loc_overlay); ++ float4 in_overlay_v = read_imagef(overlay_v, sampler, loc - loc_overlay); ++ float4 in_overlay = (float4)(in_overlay_u.x, in_overlay_v.x, 0.0f, 1.0f); + +- int2 loc_alpha = (int2)(loc.x * alpha_adj_x, +- loc.y * alpha_adj_y) - loc_overlay; +- float4 in_alpha = read_imagef(alpha, sampler, loc_alpha); ++ int2 loc_alpha = (int2)(loc.x * alpha_adj_x, loc.y * alpha_adj_y) - loc_overlay; ++ float4 in_alpha = read_imagef(alpha, sampler, loc_alpha); + + float4 val = in_overlay * in_alpha.x + in_main * (1.0f - in_alpha.x); + write_imagef(dst, loc, val); +Index: FFmpeg/libavfilter/vf_overlay_opencl.c +=================================================================== +--- libavfilter/vf_overlay_opencl.c ++++ libavfilter/vf_overlay_opencl.c +@@ -27,72 +27,117 @@ + #include "opencl_source.h" + #include "video.h" + ++static const enum AVPixelFormat supported_main_formats[] = { ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_P010, ++ AV_PIX_FMT_P016, ++ AV_PIX_FMT_NONE, ++}; ++ ++static const enum AVPixelFormat supported_overlay_formats[] = { ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_YUVA420P, ++ AV_PIX_FMT_NONE, ++}; ++ + typedef struct OverlayOpenCLContext { + OpenCLFilterContext ocf; + ++ enum AVPixelFormat in_fmt_main, in_fmt_overlay; ++ const AVPixFmtDescriptor *in_desc_main, *in_desc_overlay; ++ int in_planes_main, in_planes_overlay; ++ + int initialised; + cl_kernel kernel; ++ cl_kernel kernel_pass; ++ cl_kernel kernel_uv; ++ const char *kernel_name; ++ const char *kernel_name_pass; ++ const char *kernel_name_uv; + cl_command_queue command_queue; + + FFFrameSync fs; + +- int nb_planes; + int x_subsample; + int y_subsample; +- int alpha_separate; ++ int alpha; + + int x_position; + int y_position; ++ ++ int opt_repeatlast; ++ int opt_shortest; ++ int opt_eof_action; + } OverlayOpenCLContext; + +-static int overlay_opencl_load(AVFilterContext *avctx, +- enum AVPixelFormat main_format, +- enum AVPixelFormat overlay_format) ++static int format_is_supported(const enum AVPixelFormat fmts[], enum AVPixelFormat fmt) ++{ ++ for (int i = 0; fmts[i] != AV_PIX_FMT_NONE; i++) ++ if (fmts[i] == fmt) ++ return 1; ++ return 0; ++} ++ ++static int formats_match(const enum AVPixelFormat fmt_main, const enum AVPixelFormat fmt_overlay) { ++ switch(fmt_main) { ++ case AV_PIX_FMT_NV12: ++ case AV_PIX_FMT_P010: ++ case AV_PIX_FMT_P016: ++ return fmt_overlay == AV_PIX_FMT_NV12 || ++ fmt_overlay == AV_PIX_FMT_YUV420P || ++ fmt_overlay == AV_PIX_FMT_YUVA420P; ++ case AV_PIX_FMT_YUV420P: ++ return fmt_overlay == AV_PIX_FMT_YUV420P || ++ fmt_overlay == AV_PIX_FMT_YUVA420P; ++ default: ++ return 0; ++ } ++} ++ ++static int overlay_opencl_load(AVFilterContext *avctx) + { + OverlayOpenCLContext *ctx = avctx->priv; + cl_int cle; +- const char *source = ff_source_overlay_cl; +- const char *kernel; +- const AVPixFmtDescriptor *main_desc, *overlay_desc; +- int err, i, main_planes, overlay_planes; +- +- main_desc = av_pix_fmt_desc_get(main_format); +- overlay_desc = av_pix_fmt_desc_get(overlay_format); +- +- main_planes = overlay_planes = 0; +- for (i = 0; i < main_desc->nb_components; i++) +- main_planes = FFMAX(main_planes, +- main_desc->comp[i].plane + 1); +- for (i = 0; i < overlay_desc->nb_components; i++) +- overlay_planes = FFMAX(overlay_planes, +- overlay_desc->comp[i].plane + 1); +- +- ctx->nb_planes = main_planes; +- ctx->x_subsample = 1 << main_desc->log2_chroma_w; +- ctx->y_subsample = 1 << main_desc->log2_chroma_h; ++ int err; ++ ++ ctx->x_subsample = 1 << ctx->in_desc_main->log2_chroma_w; ++ ctx->y_subsample = 1 << ctx->in_desc_main->log2_chroma_h; + + if (ctx->x_position % ctx->x_subsample || + ctx->y_position % ctx->y_subsample) { +- av_log(avctx, AV_LOG_WARNING, "Warning: overlay position (%d, %d) " ++ av_log(avctx, AV_LOG_WARNING, "Overlay position (%d, %d) " + "does not match subsampling (%d, %d).\n", + ctx->x_position, ctx->y_position, + ctx->x_subsample, ctx->y_subsample); + } + +- if (main_planes == overlay_planes) { +- if (main_desc->nb_components == overlay_desc->nb_components) +- kernel = "overlay_no_alpha"; ++ switch(ctx->in_fmt_overlay) { ++ case AV_PIX_FMT_NV12: ++ case AV_PIX_FMT_YUV420P: ++ ctx->alpha = 0; ++ ctx->kernel_name = "overlay_noalpha"; ++ break; ++ case AV_PIX_FMT_YUVA420P: ++ ctx->alpha = 1; ++ ctx->kernel_name = "overlay_alpha"; ++ break; ++ default: ++ err = AVERROR_BUG; ++ goto fail; ++ } ++ ++ if (ctx->in_planes_main == 2 && ctx->in_planes_overlay > 2) { ++ if (ctx->alpha) ++ ctx->kernel_name_uv = "overlay_alpha_uv"; + else +- kernel = "overlay_internal_alpha"; +- ctx->alpha_separate = 0; +- } else { +- kernel = "overlay_external_alpha"; +- ctx->alpha_separate = 1; ++ ctx->kernel_name_uv = "overlay_noalpha_uv"; + } + +- av_log(avctx, AV_LOG_DEBUG, "Using kernel %s.\n", kernel); ++ av_log(avctx, AV_LOG_DEBUG, "Using kernel %s.\n", ctx->kernel_name); + +- err = ff_opencl_filter_load_program(avctx, &source, 1); ++ err = ff_opencl_filter_load_program(avctx, &ff_source_overlay_cl, 1); + if (err < 0) + goto fail; + +@@ -102,10 +147,20 @@ static int overlay_opencl_load(AVFilterC + CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL " + "command queue %d.\n", cle); + +- ctx->kernel = clCreateKernel(ctx->ocf.program, kernel, &cle); ++ ctx->kernel = clCreateKernel(ctx->ocf.program, ctx->kernel_name, &cle); + CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle); + ++ ctx->kernel_name_pass = "overlay_pass"; ++ ctx->kernel_pass = clCreateKernel(ctx->ocf.program, ctx->kernel_name_pass, &cle); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel_pass %d.\n", cle); ++ ++ if (ctx->kernel_name_uv) { ++ ctx->kernel_uv = clCreateKernel(ctx->ocf.program, ctx->kernel_name_uv, &cle); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel_uv %d.\n", cle); ++ } ++ + ctx->initialised = 1; ++ + return 0; + + fail: +@@ -113,21 +168,113 @@ fail: + clReleaseCommandQueue(ctx->command_queue); + if (ctx->kernel) + clReleaseKernel(ctx->kernel); ++ if (ctx->kernel_pass) ++ clReleaseKernel(ctx->kernel_pass); ++ if (ctx->kernel_uv) ++ clReleaseKernel(ctx->kernel_uv); ++ return err; ++} ++ ++static int launch_kernel(AVFilterContext *avctx, AVFrame *output, AVFrame *input_main, ++ AVFrame *input_overlay, int plane, int passthrough) { ++ OverlayOpenCLContext *ctx = avctx->priv; ++ cl_mem mem; ++ cl_int cle, x, y; ++ cl_kernel kernel; ++ size_t global_work[2]; ++ int idx_arg = 0; ++ int err; ++ ++ if (passthrough) ++ kernel = ctx->kernel_pass; ++ else if (plane == 1 && ctx->in_planes_main == 2 && ctx->in_planes_overlay > 2) ++ kernel = ctx->kernel_uv; ++ else ++ kernel = ctx->kernel; ++ ++ // dst ++ mem = (cl_mem)output->data[plane]; ++ if (!mem) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &mem); ++ ++ // main ++ mem = (cl_mem)input_main->data[plane]; ++ if (!mem) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &mem); ++ ++ if (!passthrough) { ++ // overlay ++ mem = (cl_mem)input_overlay->data[plane]; ++ if (!mem) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &mem); ++ ++ // non-semi planar on top of the semi planar ++ if (plane == 1 && ctx->in_planes_main == 2 && ctx->in_planes_overlay > 2) { ++ mem = (cl_mem)input_overlay->data[plane + 1]; ++ if (!mem) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &mem); ++ } ++ ++ // alpha ++ if (ctx->alpha) { ++ mem = (cl_mem)input_overlay->data[ctx->in_planes_overlay - 1]; ++ if (!mem) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &mem); ++ } ++ ++ x = ctx->x_position / (plane == 0 ? 1 : ctx->x_subsample); ++ y = ctx->y_position / (plane == 0 ? 1 : ctx->y_subsample); ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_int, &x); ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_int, &y); ++ ++ if (ctx->alpha) { ++ cl_int alpha_adj_x = plane == 0 ? 1 : ctx->x_subsample; ++ cl_int alpha_adj_y = plane == 0 ? 1 : ctx->y_subsample; ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_int, &alpha_adj_x); ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_int, &alpha_adj_y); ++ } ++ } ++ ++ err = ff_opencl_filter_work_size_from_image(avctx, global_work, ++ input_main, plane, 0); ++ if (err < 0) ++ goto fail; ++ ++ cle = clEnqueueNDRangeKernel(ctx->command_queue, kernel, 2, NULL, ++ global_work, NULL, 0, NULL, NULL); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue overlay kernel " ++ "for plane %d: %d.\n", plane, cle); ++ return 0; ++ ++fail: + return err; + } + + static int overlay_opencl_blend(FFFrameSync *fs) + { +- AVFilterContext *avctx = fs->parent; +- AVFilterLink *outlink = avctx->outputs[0]; ++ AVFilterContext *avctx = fs->parent; ++ AVFilterLink *outlink = avctx->outputs[0]; + OverlayOpenCLContext *ctx = avctx->priv; + AVFrame *input_main, *input_overlay; + AVFrame *output; +- cl_mem mem; +- cl_int cle, x, y; +- size_t global_work[2]; +- int kernel_arg = 0; +- int err, plane; ++ cl_int cle; ++ int passthrough = 0; ++ int err, p; + + err = ff_framesync_get_frame(fs, 0, &input_main, 0); + if (err < 0) +@@ -136,14 +283,14 @@ static int overlay_opencl_blend(FFFrameS + if (err < 0) + return err; + +- if (!ctx->initialised) { +- AVHWFramesContext *main_fc = +- (AVHWFramesContext*)input_main->hw_frames_ctx->data; +- AVHWFramesContext *overlay_fc = +- (AVHWFramesContext*)input_overlay->hw_frames_ctx->data; ++ if (!input_main) ++ return AVERROR_BUG; ++ ++ if (!input_overlay) ++ passthrough = 1; + +- err = overlay_opencl_load(avctx, main_fc->sw_format, +- overlay_fc->sw_format); ++ if (!ctx->initialised) { ++ err = overlay_opencl_load(avctx); + if (err < 0) + return err; + } +@@ -154,54 +301,10 @@ static int overlay_opencl_blend(FFFrameS + goto fail; + } + +- for (plane = 0; plane < ctx->nb_planes; plane++) { +- kernel_arg = 0; +- +- mem = (cl_mem)output->data[plane]; +- CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem); +- kernel_arg++; +- +- mem = (cl_mem)input_main->data[plane]; +- CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem); +- kernel_arg++; +- +- mem = (cl_mem)input_overlay->data[plane]; +- CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem); +- kernel_arg++; +- +- if (ctx->alpha_separate) { +- mem = (cl_mem)input_overlay->data[ctx->nb_planes]; +- CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem); +- kernel_arg++; +- } +- +- x = ctx->x_position / (plane == 0 ? 1 : ctx->x_subsample); +- y = ctx->y_position / (plane == 0 ? 1 : ctx->y_subsample); +- +- CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &x); +- kernel_arg++; +- CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &y); +- kernel_arg++; +- +- if (ctx->alpha_separate) { +- cl_int alpha_adj_x = plane == 0 ? 1 : ctx->x_subsample; +- cl_int alpha_adj_y = plane == 0 ? 1 : ctx->y_subsample; +- +- CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &alpha_adj_x); +- kernel_arg++; +- CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &alpha_adj_y); +- kernel_arg++; +- } +- +- err = ff_opencl_filter_work_size_from_image(avctx, global_work, +- output, plane, 0); ++ for (p = 0; p < ctx->in_planes_main; p++) { ++ err = launch_kernel(avctx, output, input_main, input_overlay, p, passthrough); + if (err < 0) +- goto fail; +- +- cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL, +- global_work, NULL, 0, NULL, NULL); +- CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue overlay kernel " +- "for plane %d: %d.\n", plane, cle); ++ return err; + } + + cle = clFinish(ctx->command_queue); +@@ -216,6 +319,9 @@ static int overlay_opencl_blend(FFFrameS + return ff_filter_frame(outlink, output); + + fail: ++ clFinish(ctx->command_queue); ++ av_frame_free(&input_main); ++ av_frame_free(&input_overlay); + av_frame_free(&output); + return err; + } +@@ -224,8 +330,49 @@ static int overlay_opencl_config_output( + { + AVFilterContext *avctx = outlink->src; + OverlayOpenCLContext *ctx = avctx->priv; ++ ++ AVFilterLink *inlink = avctx->inputs[0]; ++ AVHWFramesContext *frames_ctx_main = (AVHWFramesContext*)inlink->hw_frames_ctx->data; ++ ++ AVFilterLink *inlink_overlay = avctx->inputs[1]; ++ AVHWFramesContext *frames_ctx_overlay = (AVHWFramesContext*)inlink_overlay->hw_frames_ctx->data; ++ + int err; + ++ if (!frames_ctx_main) { ++ av_log(ctx, AV_LOG_ERROR, "No hw context provided on main input\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ ctx->in_fmt_main = frames_ctx_main->sw_format; ++ ctx->in_desc_main = av_pix_fmt_desc_get(frames_ctx_main->sw_format); ++ ctx->in_planes_main = av_pix_fmt_count_planes(frames_ctx_main->sw_format); ++ if (!format_is_supported(supported_main_formats, ctx->in_fmt_main)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported main input format: %s\n", ++ av_get_pix_fmt_name(ctx->in_fmt_main)); ++ return AVERROR(ENOSYS); ++ } ++ ++ if (!frames_ctx_overlay) { ++ av_log(ctx, AV_LOG_ERROR, "No hw context provided on overlay input\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ ctx->in_fmt_overlay = frames_ctx_overlay->sw_format; ++ ctx->in_desc_overlay = av_pix_fmt_desc_get(frames_ctx_overlay->sw_format); ++ ctx->in_planes_overlay = av_pix_fmt_count_planes(frames_ctx_overlay->sw_format); ++ if (!format_is_supported(supported_overlay_formats, ctx->in_fmt_overlay)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported overlay input format: %s\n", ++ av_get_pix_fmt_name(ctx->in_fmt_overlay)); ++ return AVERROR(ENOSYS); ++ } ++ ++ if (!formats_match(ctx->in_fmt_main, ctx->in_fmt_overlay)) { ++ av_log(ctx, AV_LOG_ERROR, "Can't overlay %s on %s \n", ++ av_get_pix_fmt_name(ctx->in_fmt_overlay), av_get_pix_fmt_name(ctx->in_fmt_main)); ++ return AVERROR(EINVAL); ++ } ++ + err = ff_opencl_filter_config_output(outlink); + if (err < 0) + return err; +@@ -234,6 +381,11 @@ static int overlay_opencl_config_output( + if (err < 0) + return err; + ++ ctx->fs.opt_repeatlast = ctx->opt_repeatlast; ++ ctx->fs.opt_shortest = ctx->opt_shortest; ++ ctx->fs.opt_eof_action = ctx->opt_eof_action; ++ ctx->fs.time_base = outlink->time_base = inlink->time_base; ++ + return ff_framesync_configure(&ctx->fs); + } + +@@ -265,6 +417,20 @@ static av_cold void overlay_opencl_unini + "kernel: %d.\n", cle); + } + ++ if (ctx->kernel_pass) { ++ cle = clReleaseKernel(ctx->kernel_pass); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "kernel_pass: %d.\n", cle); ++ } ++ ++ if (ctx->kernel_uv) { ++ cle = clReleaseKernel(ctx->kernel_uv); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "kernel_uv: %d.\n", cle); ++ } ++ + if (ctx->command_queue) { + cle = clReleaseCommandQueue(ctx->command_queue); + if (cle != CL_SUCCESS) +@@ -279,11 +445,20 @@ static av_cold void overlay_opencl_unini + + #define OFFSET(x) offsetof(OverlayOpenCLContext, x) + #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) ++ + static const AVOption overlay_opencl_options[] = { + { "x", "Overlay x position", + OFFSET(x_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS }, + { "y", "Overlay y position", + OFFSET(y_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS }, ++ { "eof_action", "Action to take when encountering EOF from secondary input ", ++ OFFSET(opt_eof_action), AV_OPT_TYPE_INT, { .i64 = EOF_ACTION_REPEAT }, ++ EOF_ACTION_REPEAT, EOF_ACTION_PASS, .flags = FLAGS, .unit = "eof_action" }, ++ { "repeat", "Repeat the previous frame.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_REPEAT }, .flags = FLAGS, .unit = "eof_action" }, ++ { "endall", "End both streams.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_ENDALL }, .flags = FLAGS, .unit = "eof_action" }, ++ { "pass", "Pass through the main input.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_PASS }, .flags = FLAGS, .unit = "eof_action" }, ++ { "shortest", "force termination when the shortest input terminates", OFFSET(opt_shortest), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, ++ { "repeatlast", "repeat overlay of the last overlay frame", OFFSET(opt_repeatlast), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, + { NULL }, + }; + diff --git a/cross/ffmpeg7/patches/1009-jellyfin-0009-add-d3d11-opencl-interop-for-amd.patch b/cross/ffmpeg7/patches/1009-jellyfin-0009-add-d3d11-opencl-interop-for-amd.patch new file mode 100644 index 00000000000..adf56b8ceac --- /dev/null +++ b/cross/ffmpeg7/patches/1009-jellyfin-0009-add-d3d11-opencl-interop-for-amd.patch @@ -0,0 +1,393 @@ +Index: FFmpeg/libavutil/hwcontext_opencl.c +=================================================================== +--- libavutil/hwcontext_opencl.c ++++ libavutil/hwcontext_opencl.c +@@ -64,6 +64,16 @@ + #if HAVE_OPENCL_D3D11 + #include + #include "hwcontext_d3d11va.h" ++ ++// From cl_amd_planar_yuv; unfortunately no header is provided. ++typedef CL_API_ENTRY cl_mem(CL_API_CALL *clGetPlaneFromImageAMD_fn)( ++ cl_context context, cl_mem mem, cl_uint plane, ++ cl_int *errcode_ret); ++ ++typedef CL_API_ENTRY cl_mem(CL_API_CALL *clConvertImageAMD_fn)( ++ cl_context context, cl_mem image, const cl_image_format *image_format, ++ cl_int *errcode_ret); ++ + #endif + + #if HAVE_OPENCL_DRM_ARM +@@ -129,12 +139,18 @@ typedef struct OpenCLDeviceContext { + + #if HAVE_OPENCL_D3D11 + int d3d11_mapping_usable; ++ int d3d11_map_amd; ++ int d3d11_map_intel; + clCreateFromD3D11Texture2DKHR_fn + clCreateFromD3D11Texture2DKHR; + clEnqueueAcquireD3D11ObjectsKHR_fn + clEnqueueAcquireD3D11ObjectsKHR; + clEnqueueReleaseD3D11ObjectsKHR_fn + clEnqueueReleaseD3D11ObjectsKHR; ++ clGetPlaneFromImageAMD_fn ++ clGetPlaneFromImageAMD; ++ clConvertImageAMD_fn ++ clConvertImageAMD; + #endif + + #if HAVE_OPENCL_DRM_ARM +@@ -512,8 +528,10 @@ static int opencl_device_create_internal + cl_uint nb_platforms; + cl_platform_id *platforms = NULL; + cl_platform_id platform_id; ++ cl_platform_id platform_id_default; + cl_uint nb_devices; + cl_device_id *devices = NULL; ++ cl_device_id device_id_default; + AVOpenCLDeviceContext *hwctx = hwdev->hwctx; + cl_int cle; + cl_context_properties default_props[3]; +@@ -592,6 +610,11 @@ static int opencl_device_create_internal + ++found; + platform_id = platforms[p]; + hwctx->device_id = devices[d]; ++ ++ if (p == 0 && d == 0) { ++ platform_id_default = platforms[0]; ++ device_id_default = devices[0]; ++ } + } + + av_freep(&devices); +@@ -603,9 +626,10 @@ static int opencl_device_create_internal + goto fail; + } + if (found > 1) { +- av_log(hwdev, AV_LOG_ERROR, "More than one matching device found.\n"); +- err = AVERROR(ENODEV); +- goto fail; ++ av_log(hwdev, AV_LOG_WARNING, "More than one matching device found. " ++ "Default to the first one.\n"); ++ platform_id = platform_id_default; ++ hwctx->device_id = device_id_default; + } + + if (!props) { +@@ -841,17 +865,25 @@ static int opencl_device_init(AVHWDevice + #if HAVE_OPENCL_D3D11 + { + const char *d3d11_ext = "cl_khr_d3d11_sharing"; +- const char *nv12_ext = "cl_intel_d3d11_nv12_media_sharing"; ++ const char *amd_ext = "cl_amd_planar_yuv"; ++ const char *intel_ext = "cl_intel_d3d11_nv12_media_sharing"; + int fail = 0; + + if (!opencl_check_extension(hwdev, d3d11_ext)) { + av_log(hwdev, AV_LOG_VERBOSE, "The %s extension is " + "required for D3D11 to OpenCL mapping.\n", d3d11_ext); + fail = 1; +- } else if (!opencl_check_extension(hwdev, nv12_ext)) { +- av_log(hwdev, AV_LOG_VERBOSE, "The %s extension may be " +- "required for D3D11 to OpenCL mapping.\n", nv12_ext); +- // Not fatal. ++ } else { ++ if (opencl_check_extension(hwdev, amd_ext)) { ++ priv->d3d11_map_amd = 1; ++ } else if (opencl_check_extension(hwdev, intel_ext)) { ++ priv->d3d11_map_intel = 1; ++ } else { ++ av_log(hwdev, AV_LOG_VERBOSE, "One of the %s or %s " ++ "extensions are required for D3D11 to OpenCL " ++ "mapping.\n", amd_ext, intel_ext); ++ fail = 1; ++ } + } + + CL_FUNC(clCreateFromD3D11Texture2DKHR, +@@ -861,6 +893,13 @@ static int opencl_device_init(AVHWDevice + CL_FUNC(clEnqueueReleaseD3D11ObjectsKHR, + "D3D11 in OpenCL release"); + ++ if (priv->d3d11_map_amd) { ++ CL_FUNC(clGetPlaneFromImageAMD, ++ "D3D11 to OpenCL image planar mapping on AMD"); ++ CL_FUNC(clConvertImageAMD, ++ "D3D11 to OpenCL image data type converting on AMD"); ++ } ++ + if (fail) { + av_log(hwdev, AV_LOG_WARNING, "D3D11 to OpenCL mapping " + "not usable.\n"); +@@ -1263,7 +1302,7 @@ static int opencl_device_derive(AVHWDevi + CL_CONTEXT_VA_API_DISPLAY_INTEL, + (intptr_t)src_hwctx->display, + CL_CONTEXT_INTEROP_USER_SYNC, +- CL_FALSE, ++ CL_TRUE, + 0, + }; + OpenCLDeviceSelector selector = { +@@ -1302,11 +1341,13 @@ static int opencl_device_derive(AVHWDevi + device_handle, + &device, FALSE); + if (SUCCEEDED(hr)) { +- cl_context_properties props[5] = { ++ cl_context_properties props[7] = { + CL_CONTEXT_PLATFORM, + 0, + CL_CONTEXT_ADAPTER_D3D9EX_KHR, + (intptr_t)device, ++ CL_CONTEXT_INTEROP_USER_SYNC, ++ CL_TRUE, + 0, + }; + OpenCLDeviceSelector selector = { +@@ -1339,11 +1380,13 @@ static int opencl_device_derive(AVHWDevi + case AV_HWDEVICE_TYPE_D3D11VA: + { + AVD3D11VADeviceContext *src_hwctx = src_ctx->hwctx; +- cl_context_properties props[5] = { ++ cl_context_properties props[7] = { + CL_CONTEXT_PLATFORM, + 0, + CL_CONTEXT_D3D11_DEVICE_KHR, + (intptr_t)src_hwctx->device, ++ CL_CONTEXT_INTEROP_USER_SYNC, ++ CL_TRUE, + 0, + }; + OpenCLDeviceSelector selector = { +@@ -2461,8 +2504,9 @@ static int opencl_frames_derive_from_dxv + cl_int cle; + int err, i, p, nb_planes; + +- if (src_fc->sw_format != AV_PIX_FMT_NV12) { +- av_log(dst_fc, AV_LOG_ERROR, "Only NV12 textures are supported " ++ if (src_fc->sw_format != AV_PIX_FMT_NV12 && ++ src_fc->sw_format != AV_PIX_FMT_P010) { ++ av_log(dst_fc, AV_LOG_ERROR, "Only NV12 and P010 textures are supported " + "for DXVA2 to OpenCL mapping.\n"); + return AVERROR(EINVAL); + } +@@ -2536,12 +2580,22 @@ static void opencl_unmap_from_d3d11(AVHW + OpenCLFramesContext *frames_priv = dst_fc->hwctx; + cl_event event; + cl_int cle; ++ const cl_mem *mem_objs; ++ cl_uint num_objs; ++ ++ if (!(device_priv->d3d11_map_amd || ++ device_priv->d3d11_map_intel)) ++ return; ++ ++ num_objs = device_priv->d3d11_map_amd ? 1 : desc->nb_planes; ++ mem_objs = device_priv->d3d11_map_amd ? &desc->planes[desc->nb_planes - 1] ++ : desc->planes; + + cle = device_priv->clEnqueueReleaseD3D11ObjectsKHR( +- frames_priv->command_queue, desc->nb_planes, desc->planes, ++ frames_priv->command_queue, num_objs, mem_objs, + 0, NULL, &event); + if (cle != CL_SUCCESS) { +- av_log(dst_fc, AV_LOG_ERROR, "Failed to release surface " ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to release texture " + "handle: %d.\n", cle); + } + +@@ -2556,7 +2610,9 @@ static int opencl_map_from_d3d11(AVHWFra + AVOpenCLFrameDescriptor *desc; + cl_event event; + cl_int cle; +- int err, index, i; ++ const cl_mem *mem_objs; ++ cl_uint num_objs; ++ int err, index, i, nb_planes; + + index = (intptr_t)src->data[1]; + if (index >= frames_priv->nb_mapped_frames) { +@@ -2565,16 +2621,25 @@ static int opencl_map_from_d3d11(AVHWFra + return AVERROR(EINVAL); + } + ++ if (!(device_priv->d3d11_map_amd || ++ device_priv->d3d11_map_intel)) ++ return AVERROR(ENOSYS); ++ + av_log(dst_fc, AV_LOG_DEBUG, "Map D3D11 texture %d to OpenCL.\n", + index); + + desc = &frames_priv->mapped_frames[index]; ++ nb_planes = device_priv->d3d11_map_amd ? (desc->nb_planes - 1) ++ : desc->nb_planes; ++ num_objs = device_priv->d3d11_map_amd ? 1 : desc->nb_planes; ++ mem_objs = device_priv->d3d11_map_amd ? &desc->planes[nb_planes] ++ : desc->planes; + + cle = device_priv->clEnqueueAcquireD3D11ObjectsKHR( +- frames_priv->command_queue, desc->nb_planes, desc->planes, ++ frames_priv->command_queue, num_objs, mem_objs, + 0, NULL, &event); + if (cle != CL_SUCCESS) { +- av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire surface " ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire texture " + "handle: %d.\n", cle); + return AVERROR(EIO); + } +@@ -2583,7 +2648,7 @@ static int opencl_map_from_d3d11(AVHWFra + if (err < 0) + goto fail; + +- for (i = 0; i < desc->nb_planes; i++) ++ for (i = 0; i < nb_planes; i++) + dst->data[i] = (uint8_t*)desc->planes[i]; + + err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src, +@@ -2598,7 +2663,7 @@ static int opencl_map_from_d3d11(AVHWFra + + fail: + cle = device_priv->clEnqueueReleaseD3D11ObjectsKHR( +- frames_priv->command_queue, desc->nb_planes, desc->planes, ++ frames_priv->command_queue, num_objs, mem_objs, + 0, NULL, &event); + if (cle == CL_SUCCESS) + opencl_wait_events(dst_fc, &event, 1); +@@ -2613,16 +2678,25 @@ static int opencl_frames_derive_from_d3d + OpenCLDeviceContext *device_priv = dst_fc->device_ctx->hwctx; + AVOpenCLDeviceContext *dst_dev = &device_priv->p; + OpenCLFramesContext *frames_priv = dst_fc->hwctx; ++ cl_mem plane_uint; + cl_mem_flags cl_flags; + cl_int cle; + int err, i, p, nb_planes; + +- if (src_fc->sw_format != AV_PIX_FMT_NV12) { +- av_log(dst_fc, AV_LOG_ERROR, "Only NV12 textures are supported " +- "for D3D11 to OpenCL mapping.\n"); +- return AVERROR(EINVAL); ++ // both AMD and Intel supports NV12 and P010, ++ // but Intel requires D3D11_RESOURCE_MISC_SHARED. ++ if (device_priv->d3d11_map_amd || ++ device_priv->d3d11_map_intel) { ++ if (src_fc->sw_format != AV_PIX_FMT_NV12 && ++ src_fc->sw_format != AV_PIX_FMT_P010) { ++ av_log(dst_fc, AV_LOG_ERROR, "Only NV12 and P010 textures are " ++ "supported with AMD and Intel for D3D11 to OpenCL mapping.\n"); ++ return AVERROR(EINVAL); ++ } ++ } else { ++ return AVERROR(ENOSYS); + } +- nb_planes = 2; ++ nb_planes = device_priv->d3d11_map_amd ? 3 : 2; + + if (src_fc->initial_pool_size == 0) { + av_log(dst_fc, AV_LOG_ERROR, "Only fixed-size pools are supported " +@@ -2645,27 +2719,94 @@ static int opencl_frames_derive_from_d3d + for (i = 0; i < frames_priv->nb_mapped_frames; i++) { + AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i]; + desc->nb_planes = nb_planes; +- for (p = 0; p < nb_planes; p++) { +- UINT subresource = 2 * i + p; +- +- desc->planes[p] = +- device_priv->clCreateFromD3D11Texture2DKHR( +- dst_dev->context, cl_flags, src_hwctx->texture, +- subresource, &cle); +- if (!desc->planes[p]) { +- av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL " +- "image from plane %d of D3D texture " +- "index %d (subresource %u): %d.\n", +- p, i, (unsigned int)subresource, cle); ++ if (device_priv->d3d11_map_amd) { ++ // put the multiple-plane AMD shared image at the end. ++ desc->planes[nb_planes - 1] = device_priv->clCreateFromD3D11Texture2DKHR( ++ dst_dev->context, cl_flags, src_hwctx->texture, i, &cle); ++ if (!desc->planes[nb_planes - 1]) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL image " ++ "from D3D11 texture index %d: %d.\n", i, cle); + err = AVERROR(EIO); + goto fail; + } ++ ++ for (p = 0; p < nb_planes - 1; p++) { ++ cl_image_format image_fmt; ++ ++ // get plane from AMD in CL_UNSIGNED_INT8|16 type. ++ plane_uint = device_priv->clGetPlaneFromImageAMD( ++ dst_dev->context, desc->planes[nb_planes - 1], p, &cle); ++ if (!plane_uint) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL image " ++ "from plane %d of image created from D3D11 " ++ "texture index %d: %d.\n", p, i, cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ cle = clGetImageInfo( ++ plane_uint, CL_IMAGE_FORMAT, sizeof(cl_image_format), &image_fmt, NULL); ++ if (cle != CL_SUCCESS) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to query image format of CL image " ++ "from plane %d of image created from D3D11 " ++ "texture index %d: %d.\n", p, i, cle); ++ err = AVERROR_UNKNOWN; ++ goto fail; ++ } ++ ++ switch (image_fmt.image_channel_data_type) { ++ case CL_UNSIGNED_INT8: ++ image_fmt.image_channel_data_type = CL_UNORM_INT8; break; ++ case CL_UNSIGNED_INT16: ++ image_fmt.image_channel_data_type = CL_UNORM_INT16; break; ++ default: ++ av_log(dst_fc, AV_LOG_ERROR, "The data type of CL image " ++ "from plane %d of image created from D3D11 texture index %d " ++ "isn't a CL_UNSIGNED_INT8|16 type.\n", p, i); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ // convert plane from CL_UNSIGNED_INT8|16 to CL_UNORM_INT8|16. ++ desc->planes[p] = device_priv->clConvertImageAMD( ++ dst_dev->context, plane_uint, &image_fmt, &cle); ++ if (!desc->planes[p]) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to convert data type of CL image " ++ "from plane %d of image created from D3D11 texture index %d " ++ "to CL_UNORM_INT8|16 type: %d.\n", p, i, cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ clReleaseMemObject(plane_uint); ++ } ++ } else if (device_priv->d3d11_map_intel) { ++ for (p = 0; p < nb_planes; p++) { ++ UINT subresource = 2 * i + p; ++ ++ desc->planes[p] = ++ device_priv->clCreateFromD3D11Texture2DKHR( ++ dst_dev->context, cl_flags, src_hwctx->texture, ++ subresource, &cle); ++ if (!desc->planes[p]) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL " ++ "image from plane %d of D3D11 texture " ++ "index %d (subresource %u): %d.\n", ++ p, i, (unsigned int)subresource, cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ } ++ } else { ++ return AVERROR(ENOSYS); + } + } + + return 0; + + fail: ++ if (plane_uint) ++ clReleaseMemObject(plane_uint); + for (i = 0; i < frames_priv->nb_mapped_frames; i++) { + AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i]; + for (p = 0; p < desc->nb_planes; p++) { diff --git a/cross/ffmpeg7/patches/1010-jellyfin-0010-add-a-hack-for-opencl-reverse-mapping.patch b/cross/ffmpeg7/patches/1010-jellyfin-0010-add-a-hack-for-opencl-reverse-mapping.patch new file mode 100644 index 00000000000..3e18d382949 --- /dev/null +++ b/cross/ffmpeg7/patches/1010-jellyfin-0010-add-a-hack-for-opencl-reverse-mapping.patch @@ -0,0 +1,166 @@ +Index: FFmpeg/libavfilter/avfilter.h +=================================================================== +--- libavfilter/avfilter.h ++++ libavfilter/avfilter.h +@@ -564,6 +564,8 @@ struct AVFilterLink { + enum AVColorSpace colorspace; ///< agreed upon YUV color space + enum AVColorRange color_range; ///< agreed upon YUV color range + ++ int fixed_pool_size; ///< fixed size of the frame pool for reverse hw mapping ++ + /* These parameters apply only to audio */ + int sample_rate; ///< samples per second + AVChannelLayout ch_layout; ///< channel layout of current buffer (see libavutil/channel_layout.h) +Index: FFmpeg/libavfilter/opencl.c +=================================================================== +--- libavfilter/opencl.c ++++ libavfilter/opencl.c +@@ -75,6 +75,9 @@ int ff_opencl_filter_config_input(AVFilt + if (!ctx->output_height) + ctx->output_height = inlink->h; + ++ if (avctx->nb_outputs > 0) ++ avctx->outputs[0]->fixed_pool_size = inlink->fixed_pool_size; ++ + return 0; + } + +@@ -123,6 +126,9 @@ int ff_opencl_filter_config_output(AVFil + outlink->w = ctx->output_width; + outlink->h = ctx->output_height; + ++ if (avctx->nb_inputs > 0) ++ outlink->fixed_pool_size = avctx->inputs[0]->fixed_pool_size; ++ + return 0; + fail: + av_buffer_unref(&output_frames_ref); +Index: FFmpeg/libavfilter/vf_hwmap.c +=================================================================== +--- libavfilter/vf_hwmap.c ++++ libavfilter/vf_hwmap.c +@@ -22,6 +22,10 @@ + #include "libavutil/opt.h" + #include "libavutil/pixdesc.h" + ++#if HAVE_OPENCL_D3D11 ++#include "libavutil/hwcontext_d3d11va.h" ++#endif ++ + #include "avfilter.h" + #include "formats.h" + #include "internal.h" +@@ -122,6 +126,12 @@ static int hwmap_config_output(AVFilterL + goto fail; + } + ++ if (hwfc->initial_pool_size) { ++ outlink->fixed_pool_size = hwfc->initial_pool_size; ++ av_log(avctx, AV_LOG_DEBUG, "Saved the fixed_pool_size from " ++ "initial_pool_size: %d\n", outlink->fixed_pool_size); ++ } ++ + } else if (inlink->format == hwfc->format && + (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) && + ctx->reverse) { +@@ -131,6 +141,9 @@ static int hwmap_config_output(AVFilterL + // mapped from that back to the source type. + AVBufferRef *source; + AVHWFramesContext *frames; ++#if HAVE_OPENCL_D3D11 ++ D3D11_TEXTURE2D_DESC texDesc = { .BindFlags = D3D11_BIND_DECODER, }; ++#endif + + ctx->hwframes_ref = av_hwframe_ctx_alloc(device); + if (!ctx->hwframes_ref) { +@@ -144,8 +157,19 @@ static int hwmap_config_output(AVFilterL + frames->width = hwfc->width; + frames->height = hwfc->height; + +- if (avctx->extra_hw_frames >= 0) +- frames->initial_pool_size = 2 + avctx->extra_hw_frames; ++ if (inlink->fixed_pool_size) ++ frames->initial_pool_size = inlink->fixed_pool_size; ++ ++ if (frames->initial_pool_size == 0) { ++ // Dynamic allocation. ++ } else if (avctx->extra_hw_frames) { ++ frames->initial_pool_size += avctx->extra_hw_frames; ++ } ++ ++#if HAVE_OPENCL_D3D11 ++ if (frames->format == AV_PIX_FMT_D3D11) ++ frames->user_opaque = &texDesc; ++#endif + + err = av_hwframe_ctx_init(ctx->hwframes_ref); + if (err < 0) { +Index: FFmpeg/libavfilter/vf_hwupload.c +=================================================================== +--- libavfilter/vf_hwupload.c ++++ libavfilter/vf_hwupload.c +@@ -23,6 +23,10 @@ + #include "libavutil/pixdesc.h" + #include "libavutil/opt.h" + ++#if HAVE_OPENCL_D3D11 ++#include "libavutil/hwcontext_d3d11va.h" ++#endif ++ + #include "avfilter.h" + #include "formats.h" + #include "internal.h" +@@ -110,6 +114,9 @@ static int hwupload_config_output(AVFilt + AVFilterLink *inlink = avctx->inputs[0]; + HWUploadContext *ctx = avctx->priv; + int err; ++#if HAVE_OPENCL_D3D11 ++ D3D11_TEXTURE2D_DESC texDesc = { .BindFlags = D3D11_BIND_DECODER, }; ++#endif + + av_buffer_unref(&ctx->hwframes_ref); + +@@ -151,6 +158,11 @@ static int hwupload_config_output(AVFilt + if (avctx->extra_hw_frames >= 0) + ctx->hwframes->initial_pool_size = 2 + avctx->extra_hw_frames; + ++#if HAVE_OPENCL_D3D11 ++ if (ctx->hwframes->format == AV_PIX_FMT_D3D11) ++ ctx->hwframes->user_opaque = &texDesc; ++#endif ++ + err = av_hwframe_ctx_init(ctx->hwframes_ref); + if (err < 0) + goto fail; +Index: FFmpeg/libavutil/hwcontext_d3d11va.c +=================================================================== +--- libavutil/hwcontext_d3d11va.c ++++ libavutil/hwcontext_d3d11va.c +@@ -227,7 +227,7 @@ static AVBufferRef *d3d11va_alloc_single + .ArraySize = 1, + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = hwctx->BindFlags, +- .MiscFlags = hwctx->MiscFlags, ++ .MiscFlags = hwctx->MiscFlags | D3D11_RESOURCE_MISC_SHARED, + }; + + hr = ID3D11Device_CreateTexture2D(device_hwctx->device, &texDesc, NULL, &tex); +@@ -291,9 +291,17 @@ static int d3d11va_frames_init(AVHWFrame + .ArraySize = ctx->initial_pool_size, + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = hwctx->BindFlags, +- .MiscFlags = hwctx->MiscFlags, ++ .MiscFlags = hwctx->MiscFlags | D3D11_RESOURCE_MISC_SHARED, + }; + ++#if HAVE_OPENCL_D3D11 ++ if (ctx->user_opaque) { ++ D3D11_TEXTURE2D_DESC *desc = ctx->user_opaque; ++ if (desc->BindFlags & D3D11_BIND_DECODER) ++ texDesc.BindFlags = D3D11_BIND_DECODER; ++ } ++#endif ++ + if (hwctx->texture) { + D3D11_TEXTURE2D_DESC texDesc2; + ID3D11Texture2D_GetDesc(hwctx->texture, &texDesc2); diff --git a/cross/ffmpeg7/patches/1011-jellyfin-0011-add-fixes-for-mapping-from-qsv-source-device.patch b/cross/ffmpeg7/patches/1011-jellyfin-0011-add-fixes-for-mapping-from-qsv-source-device.patch new file mode 100644 index 00000000000..f93fe26dfb0 --- /dev/null +++ b/cross/ffmpeg7/patches/1011-jellyfin-0011-add-fixes-for-mapping-from-qsv-source-device.patch @@ -0,0 +1,184 @@ +Index: FFmpeg/libavutil/hwcontext.c +=================================================================== +--- libavutil/hwcontext.c ++++ libavutil/hwcontext.c +@@ -84,21 +84,6 @@ static const char *const hw_type_names[] + [AV_HWDEVICE_TYPE_VULKAN] = "vulkan", + }; + +-typedef struct FFHWDeviceContext { +- /** +- * The public AVHWDeviceContext. See hwcontext.h for it. +- */ +- AVHWDeviceContext p; +- +- const HWContextType *hw_type; +- +- /** +- * For a derived device, a reference to the original device +- * context it was derived from. +- */ +- AVBufferRef *source_device; +-} FFHWDeviceContext; +- + enum AVHWDeviceType av_hwdevice_find_type_by_name(const char *name) + { + int type; +@@ -143,6 +128,7 @@ static void hwdevice_ctx_free(void *opaq + { + FFHWDeviceContext *ctxi = (FFHWDeviceContext*)data; + AVHWDeviceContext *ctx = &ctxi->p; ++ int i; + + /* uninit might still want access the hw context and the user + * free() callback might destroy it, so uninit has to be called first */ +@@ -153,6 +139,8 @@ static void hwdevice_ctx_free(void *opaq + ctx->free(ctx); + + av_buffer_unref(&ctxi->source_device); ++ for (i = 0; i < AV_HWDEVICE_TYPE_NB; i++) ++ av_buffer_unref(&ctxi->derived_devices[i]); + + av_freep(&ctx->hwctx); + av_freep(&ctx); +@@ -633,6 +621,28 @@ fail: + return ret; + } + ++static AVBufferRef* find_derived_hwdevice_ctx(AVBufferRef *src_ref, enum AVHWDeviceType type) ++{ ++ AVBufferRef *tmp_ref; ++ FFHWDeviceContext *src_ctxi; ++ AVHWDeviceContext *src_ctx; ++ int i; ++ ++ src_ctxi = (FFHWDeviceContext *)src_ref->data; ++ src_ctx = &src_ctxi->p; ++ if (src_ctx->type == type) ++ return src_ref; ++ ++ for (i = 0; i < AV_HWDEVICE_TYPE_NB; i++) ++ if (src_ctxi->derived_devices[i]) { ++ tmp_ref = find_derived_hwdevice_ctx(src_ctxi->derived_devices[i], type); ++ if (tmp_ref) ++ return tmp_ref; ++ } ++ ++ return NULL; ++} ++ + int av_hwdevice_ctx_create_derived_opts(AVBufferRef **dst_ref_ptr, + enum AVHWDeviceType type, + AVBufferRef *src_ref, +@@ -656,6 +666,16 @@ int av_hwdevice_ctx_create_derived_opts( + tmp_ref = tmp_ctx->source_device; + } + ++ tmp_ref = find_derived_hwdevice_ctx(src_ref, type); ++ if (tmp_ref) { ++ dst_ref = av_buffer_ref(tmp_ref); ++ if (!dst_ref) { ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ goto done; ++ } ++ + dst_ref = av_hwdevice_ctx_alloc(type); + if (!dst_ref) { + ret = AVERROR(ENOMEM); +@@ -676,6 +696,11 @@ int av_hwdevice_ctx_create_derived_opts( + ret = AVERROR(ENOMEM); + goto fail; + } ++ tmp_ctx->derived_devices[type] = av_buffer_ref(dst_ref); ++ if (!tmp_ctx->derived_devices[type]) { ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } + ret = av_hwdevice_ctx_init(dst_ref); + if (ret < 0) + goto fail; +Index: FFmpeg/libavutil/hwcontext.h +=================================================================== +--- libavutil/hwcontext.h ++++ libavutil/hwcontext.h +@@ -38,6 +38,7 @@ enum AVHWDeviceType { + AV_HWDEVICE_TYPE_MEDIACODEC, + AV_HWDEVICE_TYPE_VULKAN, + AV_HWDEVICE_TYPE_D3D12VA, ++ AV_HWDEVICE_TYPE_NB, ///< number of hw device types, not part of API/ABI. + }; + + /** +Index: FFmpeg/libavutil/hwcontext_internal.h +=================================================================== +--- libavutil/hwcontext_internal.h ++++ libavutil/hwcontext_internal.h +@@ -164,4 +164,25 @@ extern const HWContextType ff_hwcontext_ + extern const HWContextType ff_hwcontext_type_mediacodec; + extern const HWContextType ff_hwcontext_type_vulkan; + ++typedef struct FFHWDeviceContext { ++ /** ++ * The public AVHWDeviceContext. See hwcontext.h for it. ++ */ ++ AVHWDeviceContext p; ++ ++ const HWContextType *hw_type; ++ ++ /** ++ * For a derived device, a reference to the original device ++ * context it was derived from. ++ */ ++ AVBufferRef *source_device; ++ ++ /** ++ * An array of reference to device contexts which ++ * were derived from this device. ++ */ ++ AVBufferRef *derived_devices[AV_HWDEVICE_TYPE_NB]; ++} FFHWDeviceContext; ++ + #endif /* AVUTIL_HWCONTEXT_INTERNAL_H */ +Index: FFmpeg/libavutil/hwcontext_qsv.c +=================================================================== +--- libavutil/hwcontext_qsv.c ++++ libavutil/hwcontext_qsv.c +@@ -369,7 +369,7 @@ static void qsv_frames_uninit(AVHWFrames + av_buffer_unref(&s->child_frames_ref); + } + +-static void qsv_pool_release_dummy(void *opaque, uint8_t *data) ++static void qsv_release_dummy(void *opaque, uint8_t *data) + { + } + +@@ -382,7 +382,7 @@ static AVBufferRef *qsv_pool_alloc(void + if (s->nb_surfaces_used < hwctx->nb_surfaces) { + s->nb_surfaces_used++; + return av_buffer_create((uint8_t*)(s->surfaces_internal + s->nb_surfaces_used - 1), +- sizeof(*hwctx->surfaces), qsv_pool_release_dummy, NULL, 0); ++ sizeof(*hwctx->surfaces), qsv_release_dummy, NULL, 0); + } + + return NULL; +@@ -2272,8 +2272,17 @@ static int qsv_device_create(AVHWDeviceC + child_device = (AVHWDeviceContext*)priv->child_device_ctx->data; + + impl = choose_implementation(device, child_device_type); ++ ret = qsv_device_derive_from_child(ctx, impl, child_device, 0); ++ if (ret >= 0) { ++ FFHWDeviceContext *fctx = (FFHWDeviceContext*)ctx; ++ FFHWDeviceContext *fchild_device = (FFHWDeviceContext*)child_device; ++ fctx->source_device = av_buffer_ref(priv->child_device_ctx); ++ fchild_device->derived_devices[ctx->type] = av_buffer_create((uint8_t*)fctx, sizeof(*fctx), qsv_release_dummy, fctx, 0); ++ if (!fchild_device->derived_devices[ctx->type]) ++ return AVERROR(ENOMEM); ++ } + +- return qsv_device_derive_from_child(ctx, impl, child_device, 0); ++ return ret; + } + + const HWContextType ff_hwcontext_type_qsv = { diff --git a/cross/ffmpeg7/patches/1012-jellyfin-0012-add-d3d11-opencl-interop-for-qsv.patch b/cross/ffmpeg7/patches/1012-jellyfin-0012-add-d3d11-opencl-interop-for-qsv.patch new file mode 100644 index 00000000000..aa2048ecc61 --- /dev/null +++ b/cross/ffmpeg7/patches/1012-jellyfin-0012-add-d3d11-opencl-interop-for-qsv.patch @@ -0,0 +1,329 @@ +Index: FFmpeg/libavutil/hwcontext_opencl.c +=================================================================== +--- libavutil/hwcontext_opencl.c ++++ libavutil/hwcontext_opencl.c +@@ -62,6 +62,9 @@ + #endif + + #if HAVE_OPENCL_D3D11 ++#if CONFIG_LIBMFX ++#include "hwcontext_qsv.h" ++#endif + #include + #include "hwcontext_d3d11va.h" + +@@ -139,6 +142,7 @@ typedef struct OpenCLDeviceContext { + + #if HAVE_OPENCL_D3D11 + int d3d11_mapping_usable; ++ int d3d11_qsv_mapping_usable; + int d3d11_map_amd; + int d3d11_map_intel; + clCreateFromD3D11Texture2DKHR_fn +@@ -906,6 +910,11 @@ static int opencl_device_init(AVHWDevice + priv->d3d11_mapping_usable = 0; + } else { + priv->d3d11_mapping_usable = 1; ++ ++ if (priv->d3d11_map_intel) ++ priv->d3d11_qsv_mapping_usable = 1; ++ else ++ priv->d3d11_qsv_mapping_usable = 0; + } + } + #endif +@@ -1785,18 +1794,20 @@ static void opencl_frames_uninit(AVHWFra + + #if HAVE_OPENCL_DXVA2 || HAVE_OPENCL_D3D11 + int i, p; +- for (i = 0; i < priv->nb_mapped_frames; i++) { +- AVOpenCLFrameDescriptor *desc = &priv->mapped_frames[i]; +- for (p = 0; p < desc->nb_planes; p++) { +- cle = clReleaseMemObject(desc->planes[p]); +- if (cle != CL_SUCCESS) { +- av_log(hwfc, AV_LOG_ERROR, "Failed to release mapped " +- "frame object (frame %d plane %d): %d.\n", +- i, p, cle); ++ if (priv->nb_mapped_frames && priv->mapped_frames) { ++ for (i = 0; i < priv->nb_mapped_frames; i++) { ++ AVOpenCLFrameDescriptor *desc = &priv->mapped_frames[i]; ++ for (p = 0; p < desc->nb_planes; p++) { ++ cle = clReleaseMemObject(desc->planes[p]); ++ if (cle != CL_SUCCESS) { ++ av_log(hwfc, AV_LOG_ERROR, "Failed to release mapped " ++ "frame object (frame %d plane %d): %d.\n", ++ i, p, cle); ++ } + } + } ++ av_freep(&priv->mapped_frames); + } +- av_freep(&priv->mapped_frames); + #endif + + if (priv->command_queue) { +@@ -2572,6 +2583,233 @@ fail: + + #if HAVE_OPENCL_D3D11 + ++#if CONFIG_LIBMFX ++ ++static void opencl_unmap_from_d3d11_qsv(AVHWFramesContext *dst_fc, ++ HWMapDescriptor *hwmap) ++{ ++ AVOpenCLFrameDescriptor *desc = hwmap->priv; ++ OpenCLDeviceContext *device_priv = dst_fc->device_ctx->hwctx; ++ OpenCLFramesContext *frames_priv = dst_fc->hwctx; ++ cl_event event; ++ cl_int cle; ++ int p; ++ ++ av_log(dst_fc, AV_LOG_DEBUG, "Unmap QSV surface from OpenCL.\n"); ++ ++ cle = device_priv->clEnqueueReleaseD3D11ObjectsKHR( ++ frames_priv->command_queue, desc->nb_planes, desc->planes, ++ 0, NULL, &event); ++ if (cle != CL_SUCCESS) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to release texture " ++ "handle: %d.\n", cle); ++ } ++ ++ opencl_wait_events(dst_fc, &event, 1); ++ ++ if (!frames_priv->nb_mapped_frames && !frames_priv->mapped_frames) { ++ for (p = 0; p < desc->nb_planes; p++) { ++ cle = clReleaseMemObject(desc->planes[p]); ++ if (cle != CL_SUCCESS) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to release CL " ++ "image of plane %d of D3D11 texture: %d\n", ++ p, cle); ++ } ++ } ++ av_freep(&desc); ++ } ++} ++ ++static int opencl_map_from_d3d11_qsv(AVHWFramesContext *dst_fc, AVFrame *dst, ++ const AVFrame *src, int flags) ++{ ++ OpenCLDeviceContext *device_priv = dst_fc->device_ctx->hwctx; ++ OpenCLFramesContext *frames_priv = dst_fc->hwctx; ++ AVOpenCLDeviceContext *dst_dev = &device_priv->p; ++ mfxFrameSurface1 *mfx_surface = (mfxFrameSurface1*)src->data[3]; ++ mfxHDLPair *pair = (mfxHDLPair*)mfx_surface->Data.MemId; ++ ID3D11Texture2D *tex = (ID3D11Texture2D*)pair->first; ++ AVOpenCLFrameDescriptor *desc; ++ cl_mem_flags cl_flags; ++ cl_event event; ++ cl_int cle; ++ int err, p, index, derived_frames; ++ ++ cl_flags = opencl_mem_flags_for_mapping(flags); ++ if (!cl_flags) ++ return AVERROR(EINVAL); ++ ++ av_log(dst_fc, AV_LOG_DEBUG, "Map QSV surface %#llx to OpenCL.\n", (uintptr_t)pair); ++ ++ index = (intptr_t)pair->second; ++ derived_frames = frames_priv->nb_mapped_frames > 0; ++ if (derived_frames) { ++ av_assert0(index >= 0 && index != MFX_INFINITE); ++ if (index >= frames_priv->nb_mapped_frames) { ++ av_log(dst_fc, AV_LOG_ERROR, "Texture array index out of range for " ++ "mapping: %d >= %d.\n", index, frames_priv->nb_mapped_frames); ++ return AVERROR(EINVAL); ++ } ++ } ++ ++ if (derived_frames) { ++ desc = &frames_priv->mapped_frames[index]; ++ } else { ++ desc = av_mallocz(sizeof(*desc)); ++ if (!desc) ++ return AVERROR(ENOMEM); ++ ++ desc->nb_planes = 2; ++ for (p = 0; p < desc->nb_planes; p++) { ++ desc->planes[p] = ++ device_priv->clCreateFromD3D11Texture2DKHR( ++ dst_dev->context, cl_flags, tex, ++ p, &cle); ++ if (!desc->planes[p]) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL " ++ "image from plane %d of D3D11 texture: %d.\n", ++ p, cle); ++ err = AVERROR(EIO); ++ goto fail2; ++ } ++ } ++ } ++ ++ cle = device_priv->clEnqueueAcquireD3D11ObjectsKHR( ++ frames_priv->command_queue, desc->nb_planes, desc->planes, ++ 0, NULL, &event); ++ if (cle != CL_SUCCESS) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire texture " ++ "handle: %d.\n", cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ err = opencl_wait_events(dst_fc, &event, 1); ++ if (err < 0) ++ goto fail; ++ ++ for (p = 0; p < desc->nb_planes; p++) ++ dst->data[p] = (uint8_t*)desc->planes[p]; ++ ++ err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src, ++ &opencl_unmap_from_d3d11_qsv, desc); ++ if (err < 0) ++ goto fail; ++ ++ dst->width = src->width; ++ dst->height = src->height; ++ ++ return 0; ++ ++fail: ++ cle = device_priv->clEnqueueReleaseD3D11ObjectsKHR( ++ frames_priv->command_queue, desc->nb_planes, desc->planes, ++ 0, NULL, &event); ++ if (cle == CL_SUCCESS) ++ opencl_wait_events(dst_fc, &event, 1); ++fail2: ++ if (!derived_frames) { ++ for (p = 0; p < desc->nb_planes; p++) { ++ if (desc->planes[p]) ++ clReleaseMemObject(desc->planes[p]); ++ } ++ av_freep(&desc); ++ } ++ memset(dst->data, 0, sizeof(dst->data)); ++ return err; ++} ++ ++static int opencl_frames_derive_from_d3d11_qsv(AVHWFramesContext *dst_fc, ++ AVHWFramesContext *src_fc, int flags) ++{ ++ AVQSVFramesContext *src_hwctx = src_fc->hwctx; ++ OpenCLDeviceContext *device_priv = dst_fc->device_ctx->hwctx; ++ AVOpenCLDeviceContext *dst_dev = &device_priv->p; ++ OpenCLFramesContext *frames_priv = dst_fc->hwctx; ++ cl_mem_flags cl_flags; ++ cl_int cle; ++ int err, i, p, nb_planes = 2; ++ mfxHDLPair *pair = NULL; ++ ID3D11Texture2D *tex = NULL; ++ ++ if (src_fc->sw_format != AV_PIX_FMT_NV12 && ++ src_fc->sw_format != AV_PIX_FMT_P010) { ++ av_log(dst_fc, AV_LOG_ERROR, "Only NV12 and P010 textures are " ++ "supported for QSV with D3D11 to OpenCL mapping.\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ if (src_fc->initial_pool_size == 0) { ++ av_log(dst_fc, AV_LOG_DEBUG, "Non fixed-size pools input for QSV " ++ "with D3D11 to OpenCL mapping.\n"); ++ return 0; ++ } ++ ++ if ((src_hwctx->frame_type & MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET) || ++ (src_hwctx->frame_type & MFX_MEMTYPE_FROM_VPPOUT)) { ++ av_log(dst_fc, AV_LOG_DEBUG, "MFX memtype VPP input for QSV " ++ "with D3D11 to OpenCL mapping.\n"); ++ return 0; ++ } ++ ++ if (!src_hwctx->surfaces) ++ return AVERROR(ENOMEM); ++ pair = (mfxHDLPair*)src_hwctx->surfaces[0].Data.MemId; ++ if (!pair) ++ return AVERROR(ENOMEM); ++ tex = (ID3D11Texture2D*)pair->first; ++ ++ cl_flags = opencl_mem_flags_for_mapping(flags); ++ if (!cl_flags) ++ return AVERROR(EINVAL); ++ ++ frames_priv->nb_mapped_frames = src_fc->initial_pool_size; ++ ++ frames_priv->mapped_frames = ++ av_calloc(frames_priv->nb_mapped_frames, ++ sizeof(*frames_priv->mapped_frames)); ++ if (!frames_priv->mapped_frames) ++ return AVERROR(ENOMEM); ++ ++ for (i = 0; i < frames_priv->nb_mapped_frames; i++) { ++ AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i]; ++ desc->nb_planes = nb_planes; ++ ++ for (p = 0; p < nb_planes; p++) { ++ UINT subresource = 2 * i + p; ++ desc->planes[p] = ++ device_priv->clCreateFromD3D11Texture2DKHR( ++ dst_dev->context, cl_flags, tex, ++ subresource, &cle); ++ if (!desc->planes[p]) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL " ++ "image from plane %d of D3D11 texture " ++ "index %d (subresource %u): %d.\n", ++ p, i, (unsigned int)subresource, cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ } ++ } ++ ++ return 0; ++ ++fail: ++ for (i = 0; i < frames_priv->nb_mapped_frames; i++) { ++ AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i]; ++ for (p = 0; p < desc->nb_planes; p++) { ++ if (desc->planes[p]) ++ clReleaseMemObject(desc->planes[p]); ++ } ++ } ++ av_freep(&frames_priv->mapped_frames); ++ frames_priv->nb_mapped_frames = 0; ++ return err; ++} ++ ++#endif ++ + static void opencl_unmap_from_d3d11(AVHWFramesContext *dst_fc, + HWMapDescriptor *hwmap) + { +@@ -3096,6 +3334,11 @@ static int opencl_map_to(AVHWFramesConte + return opencl_map_from_dxva2(hwfc, dst, src, flags); + #endif + #if HAVE_OPENCL_D3D11 ++#if CONFIG_LIBMFX ++ case AV_PIX_FMT_QSV: ++ if (priv->d3d11_qsv_mapping_usable) ++ return opencl_map_from_d3d11_qsv(hwfc, dst, src, flags); ++#endif + case AV_PIX_FMT_D3D11: + if (priv->d3d11_mapping_usable) + return opencl_map_from_d3d11(hwfc, dst, src, flags); +@@ -3150,6 +3393,18 @@ static int opencl_frames_derive_to(AVHWF + break; + #endif + #if HAVE_OPENCL_D3D11 ++#if CONFIG_LIBMFX ++ case AV_HWDEVICE_TYPE_QSV: ++ if (!priv->d3d11_qsv_mapping_usable) ++ return AVERROR(ENOSYS); ++ { ++ int err; ++ err = opencl_frames_derive_from_d3d11_qsv(dst_fc, src_fc, flags); ++ if (err < 0) ++ return err; ++ } ++ break; ++#endif + case AV_HWDEVICE_TYPE_D3D11VA: + if (!priv->d3d11_mapping_usable) + return AVERROR(ENOSYS); diff --git a/cross/ffmpeg7/patches/1013-jellyfin-0013-add-vendor-id-option-for-d3d11.patch b/cross/ffmpeg7/patches/1013-jellyfin-0013-add-vendor-id-option-for-d3d11.patch new file mode 100644 index 00000000000..24b3284c56d --- /dev/null +++ b/cross/ffmpeg7/patches/1013-jellyfin-0013-add-vendor-id-option-for-d3d11.patch @@ -0,0 +1,13 @@ +Index: FFmpeg/libavutil/hwcontext_d3d11va.c +=================================================================== +--- libavutil/hwcontext_d3d11va.c ++++ libavutil/hwcontext_d3d11va.c +@@ -638,6 +638,8 @@ static int d3d11va_device_create(AVHWDev + adapter = atoi(device); + } else { + AVDictionaryEntry *e = av_dict_get(opts, "vendor_id", NULL, 0); ++ if (!e || !e->value) ++ e = av_dict_get(opts, "vendor", NULL, 0); // for backward compatibility + if (e && e->value) { + adapter = d3d11va_device_find_adapter_by_vendor_id(ctx, creationFlags, e->value); + if (adapter < 0) { diff --git a/cross/ffmpeg7/patches/1014-jellyfin-0014-add-vaapi-hwupload-filter.patch b/cross/ffmpeg7/patches/1014-jellyfin-0014-add-vaapi-hwupload-filter.patch new file mode 100644 index 00000000000..ddabd762667 --- /dev/null +++ b/cross/ffmpeg7/patches/1014-jellyfin-0014-add-vaapi-hwupload-filter.patch @@ -0,0 +1,234 @@ +Index: FFmpeg/configure +=================================================================== +--- configure ++++ configure +@@ -3840,6 +3840,7 @@ gblur_vulkan_filter_deps="vulkan spirv_c + hflip_vulkan_filter_deps="vulkan spirv_compiler" + histeq_filter_deps="gpl" + hqdn3d_filter_deps="gpl" ++hwupload_vaapi_filter_deps="vaapi" + iccdetect_filter_deps="lcms2" + iccgen_filter_deps="lcms2" + interlace_filter_deps="gpl" +Index: FFmpeg/libavfilter/Makefile +=================================================================== +--- libavfilter/Makefile ++++ libavfilter/Makefile +@@ -346,6 +346,7 @@ OBJS-$(CONFIG_HUESATURATION_FILTER) + OBJS-$(CONFIG_HWDOWNLOAD_FILTER) += vf_hwdownload.o + OBJS-$(CONFIG_HWMAP_FILTER) += vf_hwmap.o + OBJS-$(CONFIG_HWUPLOAD_CUDA_FILTER) += vf_hwupload_cuda.o ++OBJS-$(CONFIG_HWUPLOAD_VAAPI_FILTER) += vf_hwupload_vaapi.o + OBJS-$(CONFIG_HWUPLOAD_FILTER) += vf_hwupload.o + OBJS-$(CONFIG_HYSTERESIS_FILTER) += vf_hysteresis.o framesync.o + OBJS-$(CONFIG_ICCDETECT_FILTER) += vf_iccdetect.o fflcms2.o +Index: FFmpeg/libavfilter/allfilters.c +=================================================================== +--- libavfilter/allfilters.c ++++ libavfilter/allfilters.c +@@ -324,6 +324,7 @@ extern const AVFilter ff_vf_hwdownload; + extern const AVFilter ff_vf_hwmap; + extern const AVFilter ff_vf_hwupload; + extern const AVFilter ff_vf_hwupload_cuda; ++extern const AVFilter ff_vf_hwupload_vaapi; + extern const AVFilter ff_vf_hysteresis; + extern const AVFilter ff_vf_iccdetect; + extern const AVFilter ff_vf_iccgen; +Index: FFmpeg/libavfilter/vf_hwupload_vaapi.c +=================================================================== +--- /dev/null ++++ libavfilter/vf_hwupload_vaapi.c +@@ -0,0 +1,193 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "libavutil/buffer.h" ++#include "libavutil/hwcontext.h" ++#include "libavutil/log.h" ++#include "libavutil/opt.h" ++ ++#include "avfilter.h" ++#include "formats.h" ++#include "internal.h" ++#include "video.h" ++ ++typedef struct VaapiUploadContext { ++ const AVClass *class; ++ int device_idx; ++ ++ AVBufferRef *hwdevice; ++ AVBufferRef *hwframe; ++} VaapiUploadContext; ++ ++static av_cold int vaapiupload_init(AVFilterContext *ctx) ++{ ++ VaapiUploadContext *s = ctx->priv; ++ return av_hwdevice_ctx_create(&s->hwdevice, AV_HWDEVICE_TYPE_VAAPI, NULL, NULL, 0); ++} ++ ++static av_cold void vaapiupload_uninit(AVFilterContext *ctx) ++{ ++ VaapiUploadContext *s = ctx->priv; ++ ++ av_buffer_unref(&s->hwframe); ++ av_buffer_unref(&s->hwdevice); ++} ++ ++static int vaapiupload_query_formats(AVFilterContext *ctx) ++{ ++ int ret; ++ ++ static const enum AVPixelFormat input_pix_fmts[] = { ++ AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, ++ AV_PIX_FMT_UYVY422, AV_PIX_FMT_YUYV422, AV_PIX_FMT_Y210, ++ AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P, ++ AV_PIX_FMT_GRAY8, AV_PIX_FMT_P010, AV_PIX_FMT_BGRA, ++ AV_PIX_FMT_BGR0, AV_PIX_FMT_RGBA, AV_PIX_FMT_RGB0, ++ AV_PIX_FMT_ABGR, AV_PIX_FMT_0BGR, AV_PIX_FMT_ARGB, ++ AV_PIX_FMT_0RGB, AV_PIX_FMT_NONE, ++ }; ++ static const enum AVPixelFormat output_pix_fmts[] = { ++ AV_PIX_FMT_VAAPI, AV_PIX_FMT_NONE, ++ }; ++ AVFilterFormats *in_fmts = ff_make_format_list(input_pix_fmts); ++ AVFilterFormats *out_fmts; ++ ++ ret = ff_formats_ref(in_fmts, &ctx->inputs[0]->outcfg.formats); ++ if (ret < 0) ++ return ret; ++ ++ out_fmts = ff_make_format_list(output_pix_fmts); ++ ++ ret = ff_formats_ref(out_fmts, &ctx->outputs[0]->incfg.formats); ++ if (ret < 0) ++ return ret; ++ ++ return 0; ++} ++ ++static int vaapiupload_config_output(AVFilterLink *outlink) ++{ ++ AVFilterContext *ctx = outlink->src; ++ AVFilterLink *inlink = ctx->inputs[0]; ++ VaapiUploadContext *s = ctx->priv; ++ ++ AVHWFramesContext *hwframe_ctx; ++ int ret; ++ ++ av_buffer_unref(&s->hwframe); ++ s->hwframe = av_hwframe_ctx_alloc(s->hwdevice); ++ if (!s->hwframe) ++ return AVERROR(ENOMEM); ++ ++ hwframe_ctx = (AVHWFramesContext*)s->hwframe->data; ++ hwframe_ctx->format = AV_PIX_FMT_VAAPI; ++ if (inlink->hw_frames_ctx) { ++ AVHWFramesContext *in_hwframe_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; ++ hwframe_ctx->sw_format = in_hwframe_ctx->sw_format; ++ } else { ++ hwframe_ctx->sw_format = inlink->format; ++ } ++ hwframe_ctx->width = inlink->w; ++ hwframe_ctx->height = inlink->h; ++ ++ ret = av_hwframe_ctx_init(s->hwframe); ++ if (ret < 0) ++ return ret; ++ ++ outlink->hw_frames_ctx = av_buffer_ref(s->hwframe); ++ if (!outlink->hw_frames_ctx) ++ return AVERROR(ENOMEM); ++ ++ return 0; ++} ++ ++static int vaapiupload_filter_frame(AVFilterLink *link, AVFrame *in) ++{ ++ AVFilterContext *ctx = link->dst; ++ AVFilterLink *outlink = ctx->outputs[0]; ++ ++ AVFrame *out = NULL; ++ int ret; ++ ++ out = ff_get_video_buffer(outlink, outlink->w, outlink->h); ++ if (!out) { ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ out->width = in->width; ++ out->height = in->height; ++ ++ ret = av_hwframe_transfer_data(out, in, 0); ++ if (ret < 0) { ++ av_log(ctx, AV_LOG_ERROR, "Error transferring data to the VAAPI device\n"); ++ goto fail; ++ } ++ ++ ret = av_frame_copy_props(out, in); ++ if (ret < 0) ++ goto fail; ++ ++ av_frame_free(&in); ++ ++ return ff_filter_frame(ctx->outputs[0], out); ++fail: ++ av_frame_free(&in); ++ av_frame_free(&out); ++ return ret; ++} ++ ++static const AVClass vaapiupload_class = { ++ .class_name = "vaapiupload", ++ .item_name = av_default_item_name, ++ .option = NULL, ++ .version = LIBAVUTIL_VERSION_INT, ++}; ++ ++static const AVFilterPad vaapiupload_inputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .filter_frame = vaapiupload_filter_frame, ++ }, ++}; ++ ++static const AVFilterPad vaapiupload_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = vaapiupload_config_output, ++ }, ++}; ++ ++const AVFilter ff_vf_hwupload_vaapi = { ++ .name = "hwupload_vaapi", ++ .description = NULL_IF_CONFIG_SMALL("Upload a system memory frame to a VAAPI device."), ++ ++ .init = vaapiupload_init, ++ .uninit = vaapiupload_uninit, ++ ++ .priv_size = sizeof(VaapiUploadContext), ++ .priv_class = &vaapiupload_class, ++ ++ FILTER_INPUTS(vaapiupload_inputs), ++ FILTER_OUTPUTS(vaapiupload_outputs), ++ FILTER_QUERY_FUNC(vaapiupload_query_formats), ++ ++ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, ++}; diff --git a/cross/ffmpeg7/patches/1015-jellyfin-0015-disable-the-premultiplied-alpha-in-vaapi-overlay.patch b/cross/ffmpeg7/patches/1015-jellyfin-0015-disable-the-premultiplied-alpha-in-vaapi-overlay.patch new file mode 100644 index 00000000000..33a7d250c95 --- /dev/null +++ b/cross/ffmpeg7/patches/1015-jellyfin-0015-disable-the-premultiplied-alpha-in-vaapi-overlay.patch @@ -0,0 +1,17 @@ +Index: FFmpeg/libavfilter/vf_overlay_vaapi.c +=================================================================== +--- libavfilter/vf_overlay_vaapi.c ++++ libavfilter/vf_overlay_vaapi.c +@@ -311,8 +311,12 @@ static int overlay_vaapi_config_input_ov + ctx->blend_alpha = ctx->alpha; + } + ++ // VA_BLEND_PREMULTIPLIED_ALPHA may cause issues in ++ // per-pixel alpha case, disable it to align with MSDK. ++#if 0 + if (have_alpha_planar(inlink)) + ctx->blend_flags |= VA_BLEND_PREMULTIPLIED_ALPHA; ++#endif + + return 0; + } diff --git a/cross/ffmpeg7/patches/1016-jellyfin-0016-add-fixes-and-hdr2hdr-for-vaapi-tonemap.patch b/cross/ffmpeg7/patches/1016-jellyfin-0016-add-fixes-and-hdr2hdr-for-vaapi-tonemap.patch new file mode 100644 index 00000000000..1df1026508f --- /dev/null +++ b/cross/ffmpeg7/patches/1016-jellyfin-0016-add-fixes-and-hdr2hdr-for-vaapi-tonemap.patch @@ -0,0 +1,311 @@ +Index: FFmpeg/libavfilter/vf_tonemap_vaapi.c +=================================================================== +--- libavfilter/vf_tonemap_vaapi.c ++++ libavfilter/vf_tonemap_vaapi.c +@@ -39,7 +39,11 @@ typedef struct HDRVAAPIContext { + enum AVColorTransferCharacteristic color_transfer; + enum AVColorSpace color_matrix; + ++ char *mastering_display; ++ char *content_light; ++ + VAHdrMetaDataHDR10 in_metadata; ++ VAHdrMetaDataHDR10 out_metadata; + + AVFrameSideData *src_display; + AVFrameSideData *src_light; +@@ -52,7 +56,7 @@ static int tonemap_vaapi_save_metadata(A + AVContentLightMetadata *light_meta; + + if (input_frame->color_trc != AVCOL_TRC_SMPTE2084) { +- av_log(avctx, AV_LOG_WARNING, "Only support HDR10 as input for vaapi tone-mapping\n"); ++ av_log(avctx, AV_LOG_DEBUG, "Only support HDR10 as input for vaapi tone-mapping\n"); + } + + ctx->src_display = av_frame_get_side_data(input_frame, +@@ -60,8 +64,7 @@ static int tonemap_vaapi_save_metadata(A + if (ctx->src_display) { + hdr_meta = (AVMasteringDisplayMetadata *)ctx->src_display->data; + if (!hdr_meta) { +- av_log(avctx, AV_LOG_ERROR, "No mastering display data\n"); +- return AVERROR(EINVAL); ++ av_log(avctx, AV_LOG_DEBUG, "No mastering display data\n"); + } + + if (hdr_meta->has_luminance) { +@@ -118,8 +121,7 @@ static int tonemap_vaapi_save_metadata(A + ctx->in_metadata.white_point_y); + } + } else { +- av_log(avctx, AV_LOG_ERROR, "No mastering display data from input\n"); +- return AVERROR(EINVAL); ++ av_log(avctx, AV_LOG_DEBUG, "No mastering display data from input\n"); + } + + ctx->src_light = av_frame_get_side_data(input_frame, +@@ -127,8 +129,7 @@ static int tonemap_vaapi_save_metadata(A + if (ctx->src_light) { + light_meta = (AVContentLightMetadata *)ctx->src_light->data; + if (!light_meta) { +- av_log(avctx, AV_LOG_ERROR, "No light metadata\n"); +- return AVERROR(EINVAL); ++ av_log(avctx, AV_LOG_DEBUG, "No light metadata\n"); + } + + ctx->in_metadata.max_content_light_level = light_meta->MaxCLL; +@@ -146,6 +147,87 @@ static int tonemap_vaapi_save_metadata(A + return 0; + } + ++static int tonemap_vaapi_update_sidedata(AVFilterContext *avctx, AVFrame *output_frame) ++{ ++ HDRVAAPIContext *ctx = avctx->priv; ++ AVFrameSideData *metadata; ++ AVMasteringDisplayMetadata *hdr_meta; ++ AVFrameSideData *metadata_lt; ++ AVContentLightMetadata *hdr_meta_lt; ++ int i; ++ const int mapping[3] = {1, 2, 0}; //green, blue, red ++ const int chroma_den = 50000; ++ const int luma_den = 10000; ++ ++ metadata = av_frame_new_side_data(output_frame, ++ AV_FRAME_DATA_MASTERING_DISPLAY_METADATA, ++ sizeof(AVMasteringDisplayMetadata)); ++ if (!metadata) ++ return AVERROR(ENOMEM); ++ ++ hdr_meta = (AVMasteringDisplayMetadata *)metadata->data; ++ ++ for (i = 0; i < 3; i++) { ++ const int j = mapping[i]; ++ hdr_meta->display_primaries[j][0].num = ctx->out_metadata.display_primaries_x[i]; ++ hdr_meta->display_primaries[j][0].den = chroma_den; ++ ++ hdr_meta->display_primaries[j][1].num = ctx->out_metadata.display_primaries_y[i]; ++ hdr_meta->display_primaries[j][1].den = chroma_den; ++ } ++ ++ hdr_meta->white_point[0].num = ctx->out_metadata.white_point_x; ++ hdr_meta->white_point[0].den = chroma_den; ++ ++ hdr_meta->white_point[1].num = ctx->out_metadata.white_point_y; ++ hdr_meta->white_point[1].den = chroma_den; ++ hdr_meta->has_primaries = 1; ++ ++ hdr_meta->max_luminance.num = ctx->out_metadata.max_display_mastering_luminance; ++ hdr_meta->max_luminance.den = luma_den; ++ ++ hdr_meta->min_luminance.num = ctx->out_metadata.min_display_mastering_luminance; ++ hdr_meta->min_luminance.den = luma_den; ++ hdr_meta->has_luminance = 1; ++ ++ av_log(avctx, AV_LOG_DEBUG, ++ "Mastering display colour volume(out):\n"); ++ av_log(avctx, AV_LOG_DEBUG, ++ "G(%u,%u) B(%u,%u) R(%u,%u) WP(%u,%u)\n", ++ ctx->out_metadata.display_primaries_x[0], ++ ctx->out_metadata.display_primaries_y[0], ++ ctx->out_metadata.display_primaries_x[1], ++ ctx->out_metadata.display_primaries_y[1], ++ ctx->out_metadata.display_primaries_x[2], ++ ctx->out_metadata.display_primaries_y[2], ++ ctx->out_metadata.white_point_x, ++ ctx->out_metadata.white_point_y); ++ av_log(avctx, AV_LOG_DEBUG, ++ "max_display_mastering_luminance=%u, min_display_mastering_luminance=%u\n", ++ ctx->out_metadata.max_display_mastering_luminance, ++ ctx->out_metadata.min_display_mastering_luminance); ++ ++ metadata_lt = av_frame_new_side_data(output_frame, ++ AV_FRAME_DATA_CONTENT_LIGHT_LEVEL, ++ sizeof(AVContentLightMetadata)); ++ if (!metadata_lt) ++ return AVERROR(ENOMEM); ++ ++ hdr_meta_lt = (AVContentLightMetadata *)metadata_lt->data; ++ ++ hdr_meta_lt->MaxCLL = FFMIN(ctx->out_metadata.max_content_light_level, 65535); ++ hdr_meta_lt->MaxFALL = FFMIN(ctx->out_metadata.max_pic_average_light_level, 65535); ++ ++ av_log(avctx, AV_LOG_DEBUG, ++ "Content light level information(out):\n"); ++ av_log(avctx, AV_LOG_DEBUG, ++ "MaxCLL(%u) MaxFALL(%u)\n", ++ ctx->out_metadata.max_content_light_level, ++ ctx->out_metadata.max_pic_average_light_level); ++ ++ return 0; ++} ++ + static int tonemap_vaapi_set_filter_params(AVFilterContext *avctx, AVFrame *input_frame) + { + VAAPIVPPContext *vpp_ctx = avctx->priv; +@@ -208,15 +290,26 @@ static int tonemap_vaapi_build_filter_pa + return AVERROR(EINVAL); + } + +- for (i = 0; i < num_query_caps; i++) { +- if (VA_TONE_MAPPING_HDR_TO_SDR & hdr_cap[i].caps_flag) +- break; +- } +- +- if (i >= num_query_caps) { +- av_log(avctx, AV_LOG_ERROR, +- "VAAPI driver doesn't support HDR to SDR\n"); +- return AVERROR(EINVAL); ++ if (ctx->mastering_display) { ++ for (i = 0; i < num_query_caps; i++) { ++ if (VA_TONE_MAPPING_HDR_TO_HDR & hdr_cap[i].caps_flag) ++ break; ++ } ++ if (i >= num_query_caps) { ++ av_log(avctx, AV_LOG_ERROR, ++ "VAAPI driver doesn't support HDR to HDR\n"); ++ return AVERROR(EINVAL); ++ } ++ } else { ++ for (i = 0; i < num_query_caps; i++) { ++ if (VA_TONE_MAPPING_HDR_TO_SDR & hdr_cap[i].caps_flag) ++ break; ++ } ++ if (i >= num_query_caps) { ++ av_log(avctx, AV_LOG_ERROR, ++ "VAAPI driver doesn't support HDR to SDR\n"); ++ return AVERROR(EINVAL); ++ } + } + + hdrtm_param.type = VAProcFilterHighDynamicRangeToneMapping; +@@ -241,6 +334,8 @@ static int tonemap_vaapi_filter_frame(AV + VAProcPipelineParameterBuffer params; + int err; + ++ VAHdrMetaData out_hdr_metadata; ++ + av_log(avctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n", + av_get_pix_fmt_name(input_frame->format), + input_frame->width, input_frame->height, input_frame->pts); +@@ -278,22 +373,43 @@ static int tonemap_vaapi_filter_frame(AV + if (err < 0) + goto fail; + ++ av_frame_remove_side_data(output_frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); ++ av_frame_remove_side_data(output_frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); ++ ++ if (!ctx->mastering_display) { ++ /* Use BT709 by default for HDR to SDR output frame */ ++ output_frame->color_primaries = AVCOL_PRI_BT709; ++ output_frame->color_trc = AVCOL_TRC_BT709; ++ output_frame->colorspace = AVCOL_SPC_BT709; ++ } ++ + if (ctx->color_primaries != AVCOL_PRI_UNSPECIFIED) + output_frame->color_primaries = ctx->color_primaries; + + if (ctx->color_transfer != AVCOL_TRC_UNSPECIFIED) + output_frame->color_trc = ctx->color_transfer; +- else +- output_frame->color_trc = AVCOL_TRC_BT709; + + if (ctx->color_matrix != AVCOL_SPC_UNSPECIFIED) + output_frame->colorspace = ctx->color_matrix; + ++ if (ctx->mastering_display) { ++ err = tonemap_vaapi_update_sidedata(avctx, output_frame); ++ if (err < 0) ++ goto fail; ++ } ++ + err = ff_vaapi_vpp_init_params(avctx, ¶ms, + input_frame, output_frame); + if (err < 0) + goto fail; + ++ if (ctx->mastering_display) { ++ out_hdr_metadata.metadata_type = VAProcHighDynamicRangeMetadataHDR10; ++ out_hdr_metadata.metadata = &ctx->out_metadata; ++ out_hdr_metadata.metadata_size = sizeof(VAHdrMetaDataHDR10); ++ params.output_hdr_metadata = &out_hdr_metadata; ++ } ++ + if (vpp_ctx->nb_filter_buffers) { + params.filters = &vpp_ctx->filter_buffers[0]; + params.num_filters = vpp_ctx->nb_filter_buffers; +@@ -309,9 +425,6 @@ static int tonemap_vaapi_filter_frame(AV + av_get_pix_fmt_name(output_frame->format), + output_frame->width, output_frame->height, output_frame->pts); + +- av_frame_remove_side_data(output_frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); +- av_frame_remove_side_data(output_frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); +- + return ff_filter_frame(outlink, output_frame); + + fail: +@@ -332,8 +445,13 @@ static av_cold int tonemap_vaapi_init(AV + if (ctx->output_format_string) { + vpp_ctx->output_format = av_get_pix_fmt(ctx->output_format_string); + } else { +- vpp_ctx->output_format = AV_PIX_FMT_NV12; +- av_log(avctx, AV_LOG_WARNING, "Output format not set, use default format NV12\n"); ++ if (ctx->mastering_display) { ++ vpp_ctx->output_format = AV_PIX_FMT_P010; ++ av_log(avctx, AV_LOG_VERBOSE, "Output format not set, use default format P010 for HDR to HDR tone mapping.\n"); ++ } else { ++ vpp_ctx->output_format = AV_PIX_FMT_NV12; ++ av_log(avctx, AV_LOG_VERBOSE, "Output format not set, use default format NV12 for HDR to SDR tone mapping.\n"); ++ } + } + + #define STRING_OPTION(var_name, func_name, default_value) do { \ +@@ -353,6 +471,37 @@ static av_cold int tonemap_vaapi_init(AV + STRING_OPTION(color_transfer, color_transfer, AVCOL_TRC_UNSPECIFIED); + STRING_OPTION(color_matrix, color_space, AVCOL_SPC_UNSPECIFIED); + ++ if (ctx->mastering_display) { ++ if (10 != sscanf(ctx->mastering_display, ++ "%hu %hu|%hu %hu|%hu %hu|%hu %hu|%u %u", ++ &ctx->out_metadata.display_primaries_x[0], ++ &ctx->out_metadata.display_primaries_y[0], ++ &ctx->out_metadata.display_primaries_x[1], ++ &ctx->out_metadata.display_primaries_y[1], ++ &ctx->out_metadata.display_primaries_x[2], ++ &ctx->out_metadata.display_primaries_y[2], ++ &ctx->out_metadata.white_point_x, ++ &ctx->out_metadata.white_point_y, ++ &ctx->out_metadata.min_display_mastering_luminance, ++ &ctx->out_metadata.max_display_mastering_luminance)) { ++ av_log(avctx, AV_LOG_ERROR, ++ "Option mastering-display input invalid\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ if (!ctx->content_light) { ++ ctx->out_metadata.max_content_light_level = 0; ++ ctx->out_metadata.max_pic_average_light_level = 0; ++ } else if (2 != sscanf(ctx->content_light, ++ "%hu %hu", ++ &ctx->out_metadata.max_content_light_level, ++ &ctx->out_metadata.max_pic_average_light_level)) { ++ av_log(avctx, AV_LOG_ERROR, ++ "Option content-light input invalid\n"); ++ return AVERROR(EINVAL); ++ } ++ } ++ + return 0; + } + +@@ -378,6 +527,12 @@ static const AVOption tonemap_vaapi_opti + { "t", "Output color transfer characteristics set", + OFFSET(color_transfer_string), AV_OPT_TYPE_STRING, + { .str = NULL }, .flags = FLAGS, .unit = "transfer" }, ++ { "display", "set mastering display colour volume", ++ OFFSET(mastering_display), AV_OPT_TYPE_STRING, ++ { .str = NULL }, .flags = FLAGS }, ++ { "light", "set content light level information", ++ OFFSET(content_light), AV_OPT_TYPE_STRING, ++ { .str = NULL }, .flags = FLAGS }, + { NULL } + }; + diff --git a/cross/ffmpeg7/patches/1017-jellyfin-0017-add-fixes-for-nvdec-exceed-32-surfaces-error.patch b/cross/ffmpeg7/patches/1017-jellyfin-0017-add-fixes-for-nvdec-exceed-32-surfaces-error.patch new file mode 100644 index 00000000000..d8d69bc1705 --- /dev/null +++ b/cross/ffmpeg7/patches/1017-jellyfin-0017-add-fixes-for-nvdec-exceed-32-surfaces-error.patch @@ -0,0 +1,17 @@ +Index: FFmpeg/libavcodec/nvdec.c +=================================================================== +--- libavcodec/nvdec.c ++++ libavcodec/nvdec.c +@@ -299,8 +299,10 @@ static int nvdec_init_hwframes(AVCodecCo + frames_ctx = (AVHWFramesContext*)(*out_frames_ref)->data; + + if (dummy) { +- // Copied from ff_decode_get_hw_frames_ctx for compatibility +- frames_ctx->initial_pool_size += 3; ++ // The function above guarantees 1 work surface, We must guarantee 4 work surfaces. ++ // (the absolute minimum), so add the missing count without exceeding the maximum ++ // recommended for nvdec. ++ frames_ctx->initial_pool_size = FFMIN(frames_ctx->initial_pool_size + 3, 32); + + frames_ctx->free = nvdec_free_dummy; + frames_ctx->pool = av_buffer_pool_init(0, nvdec_alloc_dummy); diff --git a/cross/ffmpeg7/patches/1018-jellyfin-0018-backport-fixes-for-vaapi-from-upstream.patch b/cross/ffmpeg7/patches/1018-jellyfin-0018-backport-fixes-for-vaapi-from-upstream.patch new file mode 100644 index 00000000000..587970f6c9a --- /dev/null +++ b/cross/ffmpeg7/patches/1018-jellyfin-0018-backport-fixes-for-vaapi-from-upstream.patch @@ -0,0 +1,607 @@ +Index: FFmpeg/libavcodec/vaapi_av1.c +=================================================================== +--- libavcodec/vaapi_av1.c ++++ libavcodec/vaapi_av1.c +@@ -19,6 +19,7 @@ + */ + + #include "libavutil/frame.h" ++#include "libavutil/mem.h" + #include "hwaccel_internal.h" + #include "vaapi_decode.h" + #include "internal.h" +@@ -42,6 +43,9 @@ typedef struct VAAPIAV1DecContext { + */ + VAAPIAV1FrameRef ref_tab[AV1_NUM_REF_FRAMES]; + AVFrame *tmp_frame; ++ ++ int nb_slice_params; ++ VASliceParameterBufferAV1 *slice_params; + } VAAPIAV1DecContext; + + static VASurfaceID vaapi_av1_surface_id(AV1Frame *vf) +@@ -97,6 +101,8 @@ static int vaapi_av1_decode_uninit(AVCod + for (int i = 0; i < FF_ARRAY_ELEMS(ctx->ref_tab); i++) + av_frame_free(&ctx->ref_tab[i].frame); + ++ av_freep(&ctx->slice_params); ++ + return ff_vaapi_decode_uninit(avctx); + } + +@@ -393,13 +399,25 @@ static int vaapi_av1_decode_slice(AVCode + { + const AV1DecContext *s = avctx->priv_data; + VAAPIDecodePicture *pic = s->cur_frame.hwaccel_picture_private; +- VASliceParameterBufferAV1 slice_param; +- int err = 0; ++ VAAPIAV1DecContext *ctx = avctx->internal->hwaccel_priv_data; ++ int err, nb_params; + +- for (int i = s->tg_start; i <= s->tg_end; i++) { +- memset(&slice_param, 0, sizeof(VASliceParameterBufferAV1)); ++ nb_params = s->tg_end - s->tg_start + 1; ++ if (ctx->nb_slice_params < nb_params) { ++ VASliceParameterBufferAV1 *tmp = av_realloc_array(ctx->slice_params, ++ nb_params, ++ sizeof(*ctx->slice_params)); ++ if (!tmp) { ++ ctx->nb_slice_params = 0; ++ err = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ctx->slice_params = tmp; ++ ctx->nb_slice_params = nb_params; ++ } + +- slice_param = (VASliceParameterBufferAV1) { ++ for (int i = s->tg_start; i <= s->tg_end; i++) { ++ ctx->slice_params[i - s->tg_start] = (VASliceParameterBufferAV1) { + .slice_data_size = s->tile_group_info[i].tile_size, + .slice_data_offset = s->tile_group_info[i].tile_offset, + .slice_data_flag = VA_SLICE_DATA_FLAG_ALL, +@@ -408,18 +426,20 @@ static int vaapi_av1_decode_slice(AVCode + .tg_start = s->tg_start, + .tg_end = s->tg_end, + }; +- +- err = ff_vaapi_decode_make_slice_buffer(avctx, pic, &slice_param, +- sizeof(VASliceParameterBufferAV1), +- buffer, +- size); +- if (err) { +- ff_vaapi_decode_cancel(avctx, pic); +- return err; +- } + } + ++ err = ff_vaapi_decode_make_slice_buffer(avctx, pic, ctx->slice_params, nb_params, ++ sizeof(VASliceParameterBufferAV1), ++ buffer, ++ size); ++ if (err) ++ goto fail; ++ + return 0; ++ ++fail: ++ ff_vaapi_decode_cancel(avctx, pic); ++ return err; + } + + const FFHWAccel ff_av1_vaapi_hwaccel = { +Index: FFmpeg/libavcodec/vaapi_decode.c +=================================================================== +--- libavcodec/vaapi_decode.c ++++ libavcodec/vaapi_decode.c +@@ -62,6 +62,7 @@ int ff_vaapi_decode_make_param_buffer(AV + int ff_vaapi_decode_make_slice_buffer(AVCodecContext *avctx, + VAAPIDecodePicture *pic, + const void *params_data, ++ int nb_params, + size_t params_size, + const void *slice_data, + size_t slice_size) +@@ -72,13 +73,14 @@ int ff_vaapi_decode_make_slice_buffer(AV + + av_assert0(pic->nb_slices <= pic->slices_allocated); + if (pic->nb_slices == pic->slices_allocated) { +- pic->slice_buffers = ++ VABufferID *tmp = + av_realloc_array(pic->slice_buffers, + pic->slices_allocated ? pic->slices_allocated * 2 : 64, + 2 * sizeof(*pic->slice_buffers)); +- if (!pic->slice_buffers) ++ if (!tmp) + return AVERROR(ENOMEM); + ++ pic->slice_buffers = tmp; + pic->slices_allocated = pic->slices_allocated ? pic->slices_allocated * 2 : 64; + } + av_assert0(pic->nb_slices + 1 <= pic->slices_allocated); +@@ -87,7 +89,7 @@ int ff_vaapi_decode_make_slice_buffer(AV + + vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context, + VASliceParameterBufferType, +- params_size, 1, (void*)params_data, ++ params_size, nb_params, (void*)params_data, + &pic->slice_buffers[index]); + if (vas != VA_STATUS_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to create slice " +@@ -155,6 +157,11 @@ int ff_vaapi_decode_issue(AVCodecContext + VAStatus vas; + int err; + ++ if (pic->nb_slices <= 0) { ++ err = AVERROR(EINVAL); ++ goto fail; ++ } ++ + av_log(avctx, AV_LOG_DEBUG, "Decode to surface %#x.\n", + pic->output_surface); + +@@ -598,22 +605,26 @@ static int vaapi_decode_make_config(AVCo + if (err < 0) + goto fail; + +- frames->initial_pool_size = 1; +- // Add per-codec number of surfaces used for storing reference frames. +- switch (avctx->codec_id) { +- case AV_CODEC_ID_H264: +- case AV_CODEC_ID_HEVC: +- case AV_CODEC_ID_AV1: +- frames->initial_pool_size += 16; +- break; +- case AV_CODEC_ID_VP9: +- frames->initial_pool_size += 8; +- break; +- case AV_CODEC_ID_VP8: +- frames->initial_pool_size += 3; +- break; +- default: +- frames->initial_pool_size += 2; ++ if (CONFIG_VAAPI_1) ++ frames->initial_pool_size = 0; ++ else { ++ frames->initial_pool_size = 1; ++ // Add per-codec number of surfaces used for storing reference frames. ++ switch (avctx->codec_id) { ++ case AV_CODEC_ID_H264: ++ case AV_CODEC_ID_HEVC: ++ case AV_CODEC_ID_AV1: ++ frames->initial_pool_size += 16; ++ break; ++ case AV_CODEC_ID_VP9: ++ frames->initial_pool_size += 8; ++ break; ++ case AV_CODEC_ID_VP8: ++ frames->initial_pool_size += 3; ++ break; ++ default: ++ frames->initial_pool_size += 2; ++ } + } + } + +Index: FFmpeg/libavcodec/vaapi_decode.h +=================================================================== +--- libavcodec/vaapi_decode.h ++++ libavcodec/vaapi_decode.h +@@ -73,6 +73,7 @@ int ff_vaapi_decode_make_param_buffer(AV + int ff_vaapi_decode_make_slice_buffer(AVCodecContext *avctx, + VAAPIDecodePicture *pic, + const void *params_data, ++ int nb_params, + size_t params_size, + const void *slice_data, + size_t slice_size); +Index: FFmpeg/libavcodec/vaapi_encode_av1.c +=================================================================== +--- libavcodec/vaapi_encode_av1.c ++++ libavcodec/vaapi_encode_av1.c +@@ -23,6 +23,7 @@ + + #include "libavutil/pixdesc.h" + #include "libavutil/opt.h" ++#include "libavutil/mastering_display_metadata.h" + + #include "cbs_av1.h" + #include "put_bits.h" +@@ -41,6 +42,8 @@ typedef struct VAAPIEncodeAV1Context { + VAAPIEncodeContext common; + AV1RawOBU sh; /**< sequence header.*/ + AV1RawOBU fh; /**< frame header.*/ ++ AV1RawOBU mh[4]; /**< metadata header.*/ ++ int nb_mh; + CodedBitstreamContext *cbc; + CodedBitstreamFragment current_obu; + VAConfigAttribValEncAV1 attr; +@@ -155,6 +158,8 @@ static av_cold int vaapi_encode_av1_conf + priv->q_idx_idr = priv->q_idx_p = priv->q_idx_b = 128; + } + ++ ctx->roi_quant_range = AV1_MAX_QUANT; ++ + return 0; + } + +@@ -657,6 +662,68 @@ static int vaapi_encode_av1_init_picture + 2 : 1)); + } + ++ priv->nb_mh = 0; ++ ++ if (pic->type == PICTURE_TYPE_IDR) { ++ AVFrameSideData *sd = ++ av_frame_get_side_data(pic->input_image, ++ AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); ++ if (sd) { ++ AVMasteringDisplayMetadata *mdm = ++ (AVMasteringDisplayMetadata *)sd->data; ++ if (mdm->has_primaries && mdm->has_luminance) { ++ AV1RawOBU *obu = &priv->mh[priv->nb_mh++]; ++ AV1RawMetadata *md = &obu->obu.metadata; ++ AV1RawMetadataHDRMDCV *mdcv = &md->metadata.hdr_mdcv; ++ const int chroma_den = 1 << 16; ++ const int max_luma_den = 1 << 8; ++ const int min_luma_den = 1 << 14; ++ ++ memset(obu, 0, sizeof(*obu)); ++ obu->header.obu_type = AV1_OBU_METADATA; ++ md->metadata_type = AV1_METADATA_TYPE_HDR_MDCV; ++ ++ for (i = 0; i < 3; i++) { ++ mdcv->primary_chromaticity_x[i] = ++ av_rescale(mdm->display_primaries[i][0].num, chroma_den, ++ mdm->display_primaries[i][0].den); ++ mdcv->primary_chromaticity_y[i] = ++ av_rescale(mdm->display_primaries[i][1].num, chroma_den, ++ mdm->display_primaries[i][1].den); ++ } ++ ++ mdcv->white_point_chromaticity_x = ++ av_rescale(mdm->white_point[0].num, chroma_den, ++ mdm->white_point[0].den); ++ mdcv->white_point_chromaticity_y = ++ av_rescale(mdm->white_point[1].num, chroma_den, ++ mdm->white_point[1].den); ++ ++ mdcv->luminance_max = ++ av_rescale(mdm->max_luminance.num, max_luma_den, ++ mdm->max_luminance.den); ++ mdcv->luminance_min = ++ av_rescale(mdm->min_luminance.num, min_luma_den, ++ mdm->min_luminance.den); ++ } ++ } ++ ++ sd = av_frame_get_side_data(pic->input_image, ++ AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); ++ if (sd) { ++ AVContentLightMetadata *cllm = (AVContentLightMetadata *)sd->data; ++ AV1RawOBU *obu = &priv->mh[priv->nb_mh++]; ++ AV1RawMetadata *md = &obu->obu.metadata; ++ AV1RawMetadataHDRCLL *cll = &md->metadata.hdr_cll; ++ ++ memset(obu, 0, sizeof(*obu)); ++ obu->header.obu_type = AV1_OBU_METADATA; ++ md->metadata_type = AV1_METADATA_TYPE_HDR_CLL; ++ cll->max_cll = cllm->MaxCLL; ++ cll->max_fall = cllm->MaxFALL; ++ } ++ } ++ + end: + ff_cbs_fragment_reset(obu); + return ret; +@@ -733,6 +800,39 @@ end: + return ret; + } + ++static int vaapi_encode_av1_write_extra_header(AVCodecContext *avctx, ++ VAAPIEncodePicture *pic, ++ int index, int *type, ++ char *data, size_t *data_len) ++{ ++ VAAPIEncodeAV1Context *priv = avctx->priv_data; ++ CodedBitstreamFragment *obu = &priv->current_obu; ++ AV1RawOBU *mh_obu; ++ char mh_data[MAX_PARAM_BUFFER_SIZE]; ++ size_t mh_data_len; ++ int ret = 0; ++ ++ if (index >= priv->nb_mh) ++ return AVERROR_EOF; ++ ++ mh_obu = &priv->mh[index]; ++ ret = vaapi_encode_av1_add_obu(avctx, obu, AV1_OBU_METADATA, mh_obu); ++ if (ret < 0) ++ goto end; ++ ++ ret = vaapi_encode_av1_write_obu(avctx, mh_data, &mh_data_len, obu); ++ if (ret < 0) ++ goto end; ++ ++ memcpy(data, mh_data, MAX_PARAM_BUFFER_SIZE * sizeof(char)); ++ *data_len = mh_data_len; ++ *type = VAEncPackedHeaderRawData; ++ ++end: ++ ff_cbs_fragment_reset(obu); ++ return ret; ++} ++ + static const VAAPIEncodeProfile vaapi_encode_av1_profiles[] = { + { AV_PROFILE_AV1_MAIN, 8, 3, 1, 1, VAProfileAV1Profile0 }, + { AV_PROFILE_AV1_MAIN, 10, 3, 1, 1, VAProfileAV1Profile0 }, +@@ -760,6 +860,8 @@ static const VAAPIEncodeType vaapi_encod + + .slice_params_size = sizeof(VAEncTileGroupBufferAV1), + .init_slice_params = &vaapi_encode_av1_init_slice_params, ++ ++ .write_extra_header = &vaapi_encode_av1_write_extra_header, + }; + + static av_cold int vaapi_encode_av1_init(AVCodecContext *avctx) +@@ -774,7 +876,8 @@ static av_cold int vaapi_encode_av1_init + + ctx->desired_packed_headers = + VA_ENC_PACKED_HEADER_SEQUENCE | +- VA_ENC_PACKED_HEADER_PICTURE; ++ VA_ENC_PACKED_HEADER_PICTURE | ++ VA_ENC_PACKED_HEADER_MISC; // Metadata + + if (avctx->profile == AV_PROFILE_UNKNOWN) + avctx->profile = priv->profile; +Index: FFmpeg/libavcodec/vaapi_encode_h264.c +=================================================================== +--- libavcodec/vaapi_encode_h264.c ++++ libavcodec/vaapi_encode_h264.c +@@ -759,7 +759,7 @@ static int vaapi_encode_h264_init_pictur + vpic->frame_num = hpic->frame_num; + + vpic->pic_fields.bits.idr_pic_flag = (pic->type == PICTURE_TYPE_IDR); +- vpic->pic_fields.bits.reference_pic_flag = (pic->type != PICTURE_TYPE_B); ++ vpic->pic_fields.bits.reference_pic_flag = pic->is_reference; + + return 0; + } +Index: FFmpeg/libavcodec/vaapi_encode_h265.c +=================================================================== +--- libavcodec/vaapi_encode_h265.c ++++ libavcodec/vaapi_encode_h265.c +@@ -945,26 +945,23 @@ static int vaapi_encode_h265_init_pictur + + vpic->nal_unit_type = hpic->slice_nal_unit; + ++ vpic->pic_fields.bits.reference_pic_flag = pic->is_reference; + switch (pic->type) { + case PICTURE_TYPE_IDR: + vpic->pic_fields.bits.idr_pic_flag = 1; + vpic->pic_fields.bits.coding_type = 1; +- vpic->pic_fields.bits.reference_pic_flag = 1; + break; + case PICTURE_TYPE_I: + vpic->pic_fields.bits.idr_pic_flag = 0; + vpic->pic_fields.bits.coding_type = 1; +- vpic->pic_fields.bits.reference_pic_flag = 1; + break; + case PICTURE_TYPE_P: + vpic->pic_fields.bits.idr_pic_flag = 0; + vpic->pic_fields.bits.coding_type = 2; +- vpic->pic_fields.bits.reference_pic_flag = 1; + break; + case PICTURE_TYPE_B: + vpic->pic_fields.bits.idr_pic_flag = 0; + vpic->pic_fields.bits.coding_type = 3; +- vpic->pic_fields.bits.reference_pic_flag = 0; + break; + default: + av_assert0(0 && "invalid picture type"); +Index: FFmpeg/libavcodec/vaapi_h264.c +=================================================================== +--- libavcodec/vaapi_h264.c ++++ libavcodec/vaapi_h264.c +@@ -93,14 +93,19 @@ typedef struct DPB { + */ + static int dpb_add(DPB *dpb, const H264Picture *pic) + { +- int i; ++ int i, pic_frame_idx, merged = 0; + + if (dpb->size >= dpb->max_size) + return -1; + ++ pic_frame_idx = pic->long_ref ? pic->pic_id : pic->frame_num; ++ + for (i = 0; i < dpb->size; i++) { + VAPictureH264 * const va_pic = &dpb->va_pics[i]; +- if (va_pic->picture_id == ff_vaapi_get_surface_id(pic->f)) { ++ int va_pic_long_ref = !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE); ++ if (va_pic->picture_id == ff_vaapi_get_surface_id(pic->f) && ++ va_pic_long_ref == pic->long_ref && ++ va_pic->frame_idx == pic_frame_idx) { + VAPictureH264 temp_va_pic; + fill_vaapi_pic(&temp_va_pic, pic, 0); + +@@ -112,11 +117,14 @@ static int dpb_add(DPB *dpb, const H264P + } else { + va_pic->BottomFieldOrderCnt = temp_va_pic.BottomFieldOrderCnt; + } ++ merged = 1; + } +- return 0; + } + } + ++ if (merged) ++ return 0; ++ + fill_vaapi_pic(&dpb->va_pics[dpb->size++], pic, 0); + return 0; + } +@@ -375,7 +383,7 @@ static int vaapi_h264_decode_slice(AVCod + slice_param.chroma_offset_l1); + + err = ff_vaapi_decode_make_slice_buffer(avctx, pic, +- &slice_param, sizeof(slice_param), ++ &slice_param, 1, sizeof(slice_param), + buffer, size); + if (err) { + ff_vaapi_decode_cancel(avctx, pic); +Index: FFmpeg/libavcodec/vaapi_hevc.c +=================================================================== +--- libavcodec/vaapi_hevc.c ++++ libavcodec/vaapi_hevc.c +@@ -353,7 +353,7 @@ static int vaapi_hevc_end_frame(AVCodecC + if (pic->last_size) { + last_slice_param->LongSliceFlags.fields.LastSliceOfPic = 1; + ret = ff_vaapi_decode_make_slice_buffer(avctx, &pic->pic, +- &pic->last_slice_param, slice_param_size, ++ &pic->last_slice_param, 1, slice_param_size, + pic->last_buffer, pic->last_size); + if (ret < 0) + goto fail; +@@ -471,7 +471,7 @@ static int vaapi_hevc_decode_slice(AVCod + + if (!sh->first_slice_in_pic_flag) { + err = ff_vaapi_decode_make_slice_buffer(avctx, &pic->pic, +- &pic->last_slice_param, slice_param_size, ++ &pic->last_slice_param, 1, slice_param_size, + pic->last_buffer, pic->last_size); + pic->last_buffer = NULL; + pic->last_size = 0; +Index: FFmpeg/libavcodec/vaapi_mjpeg.c +=================================================================== +--- libavcodec/vaapi_mjpeg.c ++++ libavcodec/vaapi_mjpeg.c +@@ -131,7 +131,7 @@ static int vaapi_mjpeg_decode_slice(AVCo + sp.components[i].ac_table_selector = s->ac_index[i]; + } + +- err = ff_vaapi_decode_make_slice_buffer(avctx, pic, &sp, sizeof(sp), buffer, size); ++ err = ff_vaapi_decode_make_slice_buffer(avctx, pic, &sp, 1, sizeof(sp), buffer, size); + if (err) + goto fail; + +Index: FFmpeg/libavcodec/vaapi_mpeg2.c +=================================================================== +--- libavcodec/vaapi_mpeg2.c ++++ libavcodec/vaapi_mpeg2.c +@@ -162,7 +162,7 @@ static int vaapi_mpeg2_decode_slice(AVCo + }; + + err = ff_vaapi_decode_make_slice_buffer(avctx, pic, +- &slice_param, sizeof(slice_param), ++ &slice_param, 1, sizeof(slice_param), + buffer, size); + if (err < 0) { + ff_vaapi_decode_cancel(avctx, pic); +Index: FFmpeg/libavcodec/vaapi_mpeg4.c +=================================================================== +--- libavcodec/vaapi_mpeg4.c ++++ libavcodec/vaapi_mpeg4.c +@@ -169,7 +169,7 @@ static int vaapi_mpeg4_decode_slice(AVCo + }; + + err = ff_vaapi_decode_make_slice_buffer(avctx, pic, +- &slice_param, sizeof(slice_param), ++ &slice_param, 1, sizeof(slice_param), + buffer, size); + if (err < 0) { + ff_vaapi_decode_cancel(avctx, pic); +Index: FFmpeg/libavcodec/vaapi_vc1.c +=================================================================== +--- libavcodec/vaapi_vc1.c ++++ libavcodec/vaapi_vc1.c +@@ -489,7 +489,7 @@ static int vaapi_vc1_decode_slice(AVCode + }; + + err = ff_vaapi_decode_make_slice_buffer(avctx, pic, +- &slice_param, sizeof(slice_param), ++ &slice_param, 1, sizeof(slice_param), + buffer, size); + if (err < 0) { + ff_vaapi_decode_cancel(avctx, pic); +Index: FFmpeg/libavcodec/vaapi_vp8.c +=================================================================== +--- libavcodec/vaapi_vp8.c ++++ libavcodec/vaapi_vp8.c +@@ -209,7 +209,7 @@ static int vaapi_vp8_decode_slice(AVCode + for (i = 0; i < 8; i++) + sp.partition_size[i+1] = s->coeff_partition_size[i]; + +- err = ff_vaapi_decode_make_slice_buffer(avctx, pic, &sp, sizeof(sp), data, data_size); ++ err = ff_vaapi_decode_make_slice_buffer(avctx, pic, &sp, 1, sizeof(sp), data, data_size); + if (err) + goto fail; + +Index: FFmpeg/libavcodec/vaapi_vp9.c +=================================================================== +--- libavcodec/vaapi_vp9.c ++++ libavcodec/vaapi_vp9.c +@@ -158,7 +158,7 @@ static int vaapi_vp9_decode_slice(AVCode + } + + err = ff_vaapi_decode_make_slice_buffer(avctx, pic, +- &slice_param, sizeof(slice_param), ++ &slice_param, 1, sizeof(slice_param), + buffer, size); + if (err) { + ff_vaapi_decode_cancel(avctx, pic); +Index: FFmpeg/libavfilter/vaapi_vpp.c +=================================================================== +--- libavfilter/vaapi_vpp.c ++++ libavfilter/vaapi_vpp.c +@@ -203,7 +203,10 @@ int ff_vaapi_vpp_config_output(AVFilterL + output_frames->width = ctx->output_width; + output_frames->height = ctx->output_height; + +- output_frames->initial_pool_size = 4; ++ if (CONFIG_VAAPI_1) ++ output_frames->initial_pool_size = 0; ++ else ++ output_frames->initial_pool_size = 4; + + err = ff_filter_init_hw_frames(avctx, outlink, 10); + if (err < 0) +@@ -219,6 +222,8 @@ int ff_vaapi_vpp_config_output(AVFilterL + va_frames = output_frames->hwctx; + + av_assert0(ctx->va_context == VA_INVALID_ID); ++ av_assert0(output_frames->initial_pool_size || ++ (va_frames->surface_ids == NULL && va_frames->nb_surfaces == 0)); + vas = vaCreateContext(ctx->hwctx->display, ctx->va_config, + ctx->output_width, ctx->output_height, + VA_PROGRESSIVE, +Index: FFmpeg/libavutil/hwcontext_vaapi.c +=================================================================== +--- libavutil/hwcontext_vaapi.c ++++ libavutil/hwcontext_vaapi.c +@@ -809,6 +809,9 @@ static int vaapi_map_frame(AVHWFramesCon + VAStatus vas; + void *address = NULL; + int err, i; ++#if VA_CHECK_VERSION(1, 21, 0) ++ uint32_t vaflags = 0; ++#endif + + surface_id = (VASurfaceID)(uintptr_t)src->data[3]; + av_log(hwfc, AV_LOG_DEBUG, "Map surface %#x.\n", surface_id); +@@ -892,7 +895,16 @@ static int vaapi_map_frame(AVHWFramesCon + } + } + ++#if VA_CHECK_VERSION(1, 21, 0) ++ if (flags & AV_HWFRAME_MAP_READ) ++ vaflags |= VA_MAPBUFFER_FLAG_READ; ++ if (flags & AV_HWFRAME_MAP_WRITE) ++ vaflags |= VA_MAPBUFFER_FLAG_WRITE; ++ // On drivers not implementing vaMapBuffer2 libva calls vaMapBuffer instead. ++ vas = vaMapBuffer2(hwctx->display, map->image.buf, &address, vaflags); ++#else + vas = vaMapBuffer(hwctx->display, map->image.buf, &address); ++#endif + if (vas != VA_STATUS_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to map image from surface " + "%#x: %d (%s).\n", surface_id, vas, vaErrorStr(vas)); diff --git a/cross/ffmpeg7/patches/1019-jellyfin-0019-backport-fixes-for-qsv-from-upstream.patch b/cross/ffmpeg7/patches/1019-jellyfin-0019-backport-fixes-for-qsv-from-upstream.patch new file mode 100644 index 00000000000..1583958ac36 --- /dev/null +++ b/cross/ffmpeg7/patches/1019-jellyfin-0019-backport-fixes-for-qsv-from-upstream.patch @@ -0,0 +1,1662 @@ +Index: FFmpeg/configure +=================================================================== +--- configure ++++ configure +@@ -2485,6 +2485,7 @@ TYPES_LIST=" + struct_sockaddr_storage + struct_stat_st_mtim_tv_nsec + struct_v4l2_frmivalenum_discrete ++ struct_mfxConfigInterface + " + + HAVE_LIST=" +@@ -6902,6 +6903,7 @@ elif enabled libvpl; then + check_pkg_config libmfx "vpl >= 2.6" "mfxvideo.h mfxdispatcher.h" MFXLoad || \ + die "ERROR: libvpl >= 2.6 not found" + add_cflags -DMFX_DEPRECATED_OFF ++ check_type "vpl/mfxdefs.h vpl/mfxvideo.h" "struct mfxConfigInterface" + fi + + if enabled libmfx; then +Index: FFmpeg/libavcodec/qsv.c +=================================================================== +--- libavcodec/qsv.c ++++ libavcodec/qsv.c +@@ -34,6 +34,7 @@ + + #include "avcodec.h" + #include "qsv_internal.h" ++#include "refstruct.h" + + #define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl)) + #define QSV_HAVE_USER_PLUGIN !QSV_ONEVPL +@@ -741,20 +742,19 @@ int ff_qsv_init_internal_session(AVCodec + return 0; + } + +-static void mids_buf_free(void *opaque, uint8_t *data) ++static void mids_buf_free(FFRefStructOpaque opaque, void *obj) + { +- AVBufferRef *hw_frames_ref = opaque; ++ AVBufferRef *hw_frames_ref = opaque.nc; + av_buffer_unref(&hw_frames_ref); +- av_freep(&data); + } + +-static AVBufferRef *qsv_create_mids(AVBufferRef *hw_frames_ref) ++static QSVMid *qsv_create_mids(AVBufferRef *hw_frames_ref) + { + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ref->data; + AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx; + int nb_surfaces = frames_hwctx->nb_surfaces; + +- AVBufferRef *mids_buf, *hw_frames_ref1; ++ AVBufferRef *hw_frames_ref1; + QSVMid *mids; + int i; + +@@ -762,35 +762,27 @@ static AVBufferRef *qsv_create_mids(AVBu + if (!hw_frames_ref1) + return NULL; + +- mids = av_calloc(nb_surfaces, sizeof(*mids)); ++ mids = ff_refstruct_alloc_ext(nb_surfaces * sizeof(*mids), 0, ++ hw_frames_ref1, mids_buf_free); + if (!mids) { + av_buffer_unref(&hw_frames_ref1); + return NULL; + } + +- mids_buf = av_buffer_create((uint8_t*)mids, nb_surfaces * sizeof(*mids), +- mids_buf_free, hw_frames_ref1, 0); +- if (!mids_buf) { +- av_buffer_unref(&hw_frames_ref1); +- av_freep(&mids); +- return NULL; +- } +- + for (i = 0; i < nb_surfaces; i++) { + QSVMid *mid = &mids[i]; + mid->handle_pair = (mfxHDLPair*)frames_hwctx->surfaces[i].Data.MemId; + mid->hw_frames_ref = hw_frames_ref1; + } + +- return mids_buf; ++ return mids; + } + + static int qsv_setup_mids(mfxFrameAllocResponse *resp, AVBufferRef *hw_frames_ref, +- AVBufferRef *mids_buf) ++ QSVMid *mids) + { + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ref->data; + AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx; +- QSVMid *mids = (QSVMid*)mids_buf->data; + int nb_surfaces = frames_hwctx->nb_surfaces; + int i; + +@@ -811,12 +803,7 @@ static int qsv_setup_mids(mfxFrameAllocR + return AVERROR(ENOMEM); + } + +- resp->mids[resp->NumFrameActual + 1] = av_buffer_ref(mids_buf); +- if (!resp->mids[resp->NumFrameActual + 1]) { +- av_buffer_unref((AVBufferRef**)&resp->mids[resp->NumFrameActual]); +- av_freep(&resp->mids); +- return AVERROR(ENOMEM); +- } ++ resp->mids[resp->NumFrameActual + 1] = ff_refstruct_ref(mids); + + return 0; + } +@@ -839,8 +826,16 @@ static mfxStatus qsv_frame_alloc(mfxHDL + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)ctx->hw_frames_ctx->data; + AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx; + mfxFrameInfo *i = &req->Info; +- mfxFrameInfo *i1 = &frames_hwctx->surfaces[0].Info; ++ mfxFrameInfo *i1; + ++ if (!frames_hwctx->nb_surfaces) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, ++ "Dynamic frame pools, no frame is pre-allocated\n"); ++ ++ return MFX_ERR_NONE; ++ } ++ ++ i1 = &frames_hwctx->surfaces[0].Info; + if (i->Width > i1->Width || i->Height > i1->Height || + i->FourCC != i1->FourCC || i->ChromaFormat != i1->ChromaFormat) { + av_log(ctx->logctx, AV_LOG_ERROR, "Mismatching surface properties in an " +@@ -850,7 +845,7 @@ static mfxStatus qsv_frame_alloc(mfxHDL + return MFX_ERR_UNSUPPORTED; + } + +- ret = qsv_setup_mids(resp, ctx->hw_frames_ctx, ctx->mids_buf); ++ ret = qsv_setup_mids(resp, ctx->hw_frames_ctx, ctx->mids); + if (ret < 0) { + av_log(ctx->logctx, AV_LOG_ERROR, + "Error filling an external frame allocation request\n"); +@@ -859,12 +854,17 @@ static mfxStatus qsv_frame_alloc(mfxHDL + } else if (req->Type & MFX_MEMTYPE_INTERNAL_FRAME) { + /* internal frames -- allocate a new hw frames context */ + AVHWFramesContext *ext_frames_ctx = (AVHWFramesContext*)ctx->hw_frames_ctx->data; ++ AVQSVFramesContext *ext_frames_hwctx = ext_frames_ctx->hwctx; + mfxFrameInfo *i = &req->Info; + +- AVBufferRef *frames_ref, *mids_buf; ++ AVBufferRef *frames_ref; ++ QSVMid *mids; + AVHWFramesContext *frames_ctx; + AVQSVFramesContext *frames_hwctx; + ++ if (!ext_frames_hwctx->nb_surfaces) ++ return MFX_ERR_UNSUPPORTED; ++ + frames_ref = av_hwframe_ctx_alloc(ext_frames_ctx->device_ref); + if (!frames_ref) + return MFX_ERR_MEMORY_ALLOC; +@@ -889,14 +889,14 @@ static mfxStatus qsv_frame_alloc(mfxHDL + return MFX_ERR_MEMORY_ALLOC; + } + +- mids_buf = qsv_create_mids(frames_ref); +- if (!mids_buf) { ++ mids = qsv_create_mids(frames_ref); ++ if (!mids) { + av_buffer_unref(&frames_ref); + return MFX_ERR_MEMORY_ALLOC; + } + +- ret = qsv_setup_mids(resp, frames_ref, mids_buf); +- av_buffer_unref(&mids_buf); ++ ret = qsv_setup_mids(resp, frames_ref, mids); ++ ff_refstruct_unref(&mids); + av_buffer_unref(&frames_ref); + if (ret < 0) { + av_log(ctx->logctx, AV_LOG_ERROR, +@@ -912,19 +912,31 @@ static mfxStatus qsv_frame_alloc(mfxHDL + + static mfxStatus qsv_frame_free(mfxHDL pthis, mfxFrameAllocResponse *resp) + { ++ if (!resp->mids) ++ return MFX_ERR_NONE; ++ + av_buffer_unref((AVBufferRef**)&resp->mids[resp->NumFrameActual]); +- av_buffer_unref((AVBufferRef**)&resp->mids[resp->NumFrameActual + 1]); ++ ff_refstruct_unref(&resp->mids[resp->NumFrameActual + 1]); + av_freep(&resp->mids); + return MFX_ERR_NONE; + } + + static mfxStatus qsv_frame_lock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr) + { +- QSVMid *qsv_mid = mid; +- AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)qsv_mid->hw_frames_ref->data; +- AVQSVFramesContext *hw_frames_hwctx = hw_frames_ctx->hwctx; ++ QSVFramesContext *ctx = (QSVFramesContext *)pthis; ++ AVHWFramesContext *frames_ctx = (AVHWFramesContext*)ctx->hw_frames_ctx->data; ++ AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx; ++ QSVMid *qsv_mid; ++ AVHWFramesContext *hw_frames_ctx; ++ AVQSVFramesContext *hw_frames_hwctx; + int ret; + ++ if (!frames_hwctx->nb_surfaces) ++ return MFX_ERR_UNSUPPORTED; ++ ++ qsv_mid = mid; ++ hw_frames_ctx = (AVHWFramesContext*)qsv_mid->hw_frames_ref->data; ++ hw_frames_hwctx = hw_frames_ctx->hwctx; + if (qsv_mid->locked_frame) + return MFX_ERR_UNDEFINED_BEHAVIOR; + +@@ -977,8 +989,15 @@ fail: + + static mfxStatus qsv_frame_unlock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr) + { +- QSVMid *qsv_mid = mid; ++ QSVFramesContext *ctx = (QSVFramesContext *)pthis; ++ AVHWFramesContext *frames_ctx = (AVHWFramesContext*)ctx->hw_frames_ctx->data; ++ AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx; ++ QSVMid *qsv_mid; ++ ++ if (!frames_hwctx->nb_surfaces) ++ return MFX_ERR_UNSUPPORTED; + ++ qsv_mid = mid; + av_frame_free(&qsv_mid->locked_frame); + av_frame_free(&qsv_mid->hw_frame); + +@@ -987,9 +1006,18 @@ static mfxStatus qsv_frame_unlock(mfxHDL + + static mfxStatus qsv_frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl) + { +- QSVMid *qsv_mid = (QSVMid*)mid; ++ QSVFramesContext *ctx = (QSVFramesContext *)pthis; ++ AVHWFramesContext *frames_ctx = (AVHWFramesContext*)ctx->hw_frames_ctx->data; ++ AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx; + mfxHDLPair *pair_dst = (mfxHDLPair*)hdl; +- mfxHDLPair *pair_src = (mfxHDLPair*)qsv_mid->handle_pair; ++ mfxHDLPair *pair_src; ++ ++ if (frames_hwctx->nb_surfaces) { ++ QSVMid *qsv_mid = (QSVMid*)mid; ++ pair_src = (mfxHDLPair*)qsv_mid->handle_pair; ++ } else { ++ pair_src = (mfxHDLPair*)mid; ++ } + + pair_dst->first = pair_src->first; + +@@ -1103,14 +1131,17 @@ int ff_qsv_init_session_frames(AVCodecCo + + if (!opaque) { + qsv_frames_ctx->logctx = avctx; ++ qsv_frames_ctx->mids = NULL; ++ qsv_frames_ctx->nb_mids = 0; + + /* allocate the memory ids for the external frames */ +- av_buffer_unref(&qsv_frames_ctx->mids_buf); +- qsv_frames_ctx->mids_buf = qsv_create_mids(qsv_frames_ctx->hw_frames_ctx); +- if (!qsv_frames_ctx->mids_buf) +- return AVERROR(ENOMEM); +- qsv_frames_ctx->mids = (QSVMid*)qsv_frames_ctx->mids_buf->data; +- qsv_frames_ctx->nb_mids = frames_hwctx->nb_surfaces; ++ if (frames_hwctx->nb_surfaces) { ++ ff_refstruct_unref(&qsv_frames_ctx->mids); ++ qsv_frames_ctx->mids = qsv_create_mids(qsv_frames_ctx->hw_frames_ctx); ++ if (!qsv_frames_ctx->mids) ++ return AVERROR(ENOMEM); ++ qsv_frames_ctx->nb_mids = frames_hwctx->nb_surfaces; ++ } + + err = MFXVideoCORE_SetFrameAllocator(session, &frame_allocator); + if (err != MFX_ERR_NONE) +Index: FFmpeg/libavcodec/qsv_internal.h +=================================================================== +--- libavcodec/qsv_internal.h ++++ libavcodec/qsv_internal.h +@@ -115,11 +115,12 @@ typedef struct QSVFramesContext { + AVBufferRef *hw_frames_ctx; + void *logctx; + +- /* The memory ids for the external frames. +- * Refcounted, since we need one reference owned by the QSVFramesContext +- * (i.e. by the encoder/decoder) and another one given to the MFX session +- * from the frame allocator. */ +- AVBufferRef *mids_buf; ++ /** ++ * The memory ids for the external frames. ++ * Refcounted (via the RefStruct API), since we need one reference ++ * owned by the QSVFramesContext (i.e. by the encoder/decoder) and ++ * another one given to the MFX session from the frame allocator. ++ */ + QSVMid *mids; + int nb_mids; + } QSVFramesContext; +Index: FFmpeg/libavcodec/qsvdec.c +=================================================================== +--- libavcodec/qsvdec.c ++++ libavcodec/qsvdec.c +@@ -42,6 +42,7 @@ + #include "libavutil/imgutils.h" + #include "libavutil/film_grain_params.h" + #include "libavutil/mastering_display_metadata.h" ++#include "libavutil/avassert.h" + + #include "avcodec.h" + #include "codec_internal.h" +@@ -50,6 +51,7 @@ + #include "hwconfig.h" + #include "qsv.h" + #include "qsv_internal.h" ++#include "refstruct.h" + + #if QSV_ONEVPL + #include +@@ -67,6 +69,8 @@ static const AVRational mfx_tb = { 1, 90 + AV_NOPTS_VALUE : pts_tb.num ? \ + av_rescale_q(mfx_pts, mfx_tb, pts_tb) : mfx_pts) + ++#define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl)) ++ + typedef struct QSVAsyncFrame { + mfxSyncPoint *sync; + QSVFrame *frame; +@@ -76,6 +80,7 @@ typedef struct QSVContext { + // the session used for decoding + mfxSession session; + mfxVersion ver; ++ mfxHandleType handle_type; + + // the session we allocated internally, in case the caller did not provide + // one +@@ -132,26 +137,26 @@ static int qsv_get_continuous_buffer(AVC + if (ret < 0) + return ret; + +- frame->width = avctx->width; +- frame->height = avctx->height; ++ frame->width = avctx->coded_width; ++ frame->height = avctx->coded_height; + + switch (avctx->pix_fmt) { + case AV_PIX_FMT_NV12: +- frame->linesize[0] = FFALIGN(avctx->width, 128); ++ frame->linesize[0] = FFALIGN(avctx->coded_width, 128); + break; + case AV_PIX_FMT_P010: + case AV_PIX_FMT_P012: + case AV_PIX_FMT_YUYV422: +- frame->linesize[0] = 2 * FFALIGN(avctx->width, 128); ++ frame->linesize[0] = 2 * FFALIGN(avctx->coded_width, 128); + break; + case AV_PIX_FMT_Y210: + case AV_PIX_FMT_VUYX: + case AV_PIX_FMT_XV30: + case AV_PIX_FMT_Y212: +- frame->linesize[0] = 4 * FFALIGN(avctx->width, 128); ++ frame->linesize[0] = 4 * FFALIGN(avctx->coded_width, 128); + break; + case AV_PIX_FMT_XV36: +- frame->linesize[0] = 8 * FFALIGN(avctx->width, 128); ++ frame->linesize[0] = 8 * FFALIGN(avctx->coded_width, 128); + break; + default: + av_log(avctx, AV_LOG_ERROR, "Unsupported pixel format.\n"); +@@ -168,7 +173,7 @@ static int qsv_get_continuous_buffer(AVC + avctx->pix_fmt == AV_PIX_FMT_P012) { + frame->linesize[1] = frame->linesize[0]; + frame->data[1] = frame->data[0] + +- frame->linesize[0] * FFALIGN(avctx->height, 64); ++ frame->linesize[0] * FFALIGN(avctx->coded_height, 64); + } + + ret = ff_attach_decode_data(frame); +@@ -182,6 +187,7 @@ static int qsv_init_session(AVCodecConte + AVBufferRef *hw_frames_ref, AVBufferRef *hw_device_ref) + { + int ret; ++ mfxIMPL impl; + + if (q->gpu_copy == MFX_GPUCOPY_ON && + !(q->iopattern & MFX_IOPATTERN_OUT_SYSTEM_MEMORY)) { +@@ -239,27 +245,52 @@ static int qsv_init_session(AVCodecConte + q->session = q->internal_qs.session; + } + +- if (MFXQueryVersion(q->session, &q->ver) != MFX_ERR_NONE) { +- av_log(avctx, AV_LOG_ERROR, "Error querying the session version. \n"); +- q->session = NULL; ++ if (MFXQueryIMPL(q->session, &impl) == MFX_ERR_NONE) { ++ switch (MFX_IMPL_VIA_MASK(impl)) { ++ case MFX_IMPL_VIA_VAAPI: ++ q->handle_type = MFX_HANDLE_VA_DISPLAY; ++ break; ++ ++ case MFX_IMPL_VIA_D3D11: ++ q->handle_type = MFX_HANDLE_D3D11_DEVICE; ++ break; ++ ++ case MFX_IMPL_VIA_D3D9: ++ q->handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER; ++ break; + +- if (q->internal_qs.session) { +- MFXClose(q->internal_qs.session); +- q->internal_qs.session = NULL; +- } +- +- if (q->internal_qs.loader) { +- MFXUnload(q->internal_qs.loader); +- q->internal_qs.loader = NULL; ++ default: ++ av_assert0(!"should not reach here"); + } ++ } else { ++ av_log(avctx, AV_LOG_ERROR, "Error querying the implementation. \n"); ++ goto fail; ++ } + +- return AVERROR_EXTERNAL; ++ if (MFXQueryVersion(q->session, &q->ver) != MFX_ERR_NONE) { ++ av_log(avctx, AV_LOG_ERROR, "Error querying the session version. \n"); ++ goto fail; + } + + /* make sure the decoder is uninitialized */ + MFXVideoDECODE_Close(q->session); + + return 0; ++ ++fail: ++ q->session = NULL; ++ ++ if (q->internal_qs.session) { ++ MFXClose(q->internal_qs.session); ++ q->internal_qs.session = NULL; ++ } ++ ++ if (q->internal_qs.loader) { ++ MFXUnload(q->internal_qs.loader); ++ q->internal_qs.loader = NULL; ++ } ++ ++ return AVERROR_EXTERNAL; + } + + static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext *q, enum AVPixelFormat pix_fmt, mfxVideoParam *param) +@@ -309,7 +340,10 @@ static int qsv_decode_preinit(AVCodecCon + hwframes_ctx->height = FFALIGN(avctx->coded_height, 32); + hwframes_ctx->format = AV_PIX_FMT_QSV; + hwframes_ctx->sw_format = avctx->sw_pix_fmt; +- hwframes_ctx->initial_pool_size = q->suggest_pool_size + 16 + avctx->extra_hw_frames; ++ if (QSV_RUNTIME_VERSION_ATLEAST(q->ver, 2, 9) && q->handle_type != MFX_HANDLE_D3D9_DEVICE_MANAGER) ++ hwframes_ctx->initial_pool_size = 0; ++ else ++ hwframes_ctx->initial_pool_size = q->suggest_pool_size + 16 + avctx->extra_hw_frames; + frames_hwctx->frame_type = MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET; + + ret = av_hwframe_ctx_init(avctx->hw_frames_ctx); +@@ -379,7 +413,7 @@ static int qsv_decode_init_context(AVCod + q->frame_info = param->mfx.FrameInfo; + + if (!avctx->hw_frames_ctx) { +- ret = av_image_get_buffer_size(avctx->pix_fmt, FFALIGN(avctx->width, 128), FFALIGN(avctx->height, 64), 1); ++ ret = av_image_get_buffer_size(avctx->pix_fmt, FFALIGN(avctx->coded_width, 128), FFALIGN(avctx->coded_height, 64), 1); + if (ret < 0) + return ret; + q->pool = av_buffer_pool_init(ret, av_buffer_allocz); +@@ -443,6 +477,11 @@ static int qsv_decode_header(AVCodecCont + param->ExtParam = q->ext_buffers; + param->NumExtParam = q->nb_ext_buffers; + ++ if (param->mfx.FrameInfo.FrameRateExtN == 0 || param->mfx.FrameInfo.FrameRateExtD == 0) { ++ param->mfx.FrameInfo.FrameRateExtN = 25; ++ param->mfx.FrameInfo.FrameRateExtD = 1; ++ } ++ + #if QSV_VERSION_ATLEAST(1, 34) + if (QSV_RUNTIME_VERSION_ATLEAST(q->ver, 1, 34) && avctx->codec_id == AV_CODEC_ID_AV1) + param->mfx.FilmGrain = (avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN) ? 0 : param->mfx.FilmGrain; +@@ -499,7 +538,8 @@ static int alloc_frame(AVCodecContext *a + #endif + + #if QSV_VERSION_ATLEAST(1, 35) +- if (QSV_RUNTIME_VERSION_ATLEAST(q->ver, 1, 35) && avctx->codec_id == AV_CODEC_ID_HEVC) { ++ if ((QSV_RUNTIME_VERSION_ATLEAST(q->ver, 1, 35) && avctx->codec_id == AV_CODEC_ID_HEVC) || ++ (QSV_RUNTIME_VERSION_ATLEAST(q->ver, 2, 9) && avctx->codec_id == AV_CODEC_ID_AV1)) { + frame->mdcv.Header.BufferId = MFX_EXTBUFF_MASTERING_DISPLAY_COLOUR_VOLUME; + frame->mdcv.Header.BufferSz = sizeof(frame->mdcv); + // The data in mdcv is valid when this flag is 1 +@@ -703,6 +743,45 @@ static int qsv_export_hdr_side_data(AVCo + return 0; + } + ++static int qsv_export_hdr_side_data_av1(AVCodecContext *avctx, mfxExtMasteringDisplayColourVolume *mdcv, ++ mfxExtContentLightLevelInfo *clli, AVFrame *frame) ++{ ++ if (mdcv->InsertPayloadToggle) { ++ AVMasteringDisplayMetadata *mastering = av_mastering_display_metadata_create_side_data(frame); ++ const int chroma_den = 1 << 16; ++ const int max_luma_den = 1 << 8; ++ const int min_luma_den = 1 << 14; ++ ++ if (!mastering) ++ return AVERROR(ENOMEM); ++ ++ for (int i = 0; i < 3; i++) { ++ mastering->display_primaries[i][0] = av_make_q(mdcv->DisplayPrimariesX[i], chroma_den); ++ mastering->display_primaries[i][1] = av_make_q(mdcv->DisplayPrimariesY[i], chroma_den); ++ } ++ ++ mastering->white_point[0] = av_make_q(mdcv->WhitePointX, chroma_den); ++ mastering->white_point[1] = av_make_q(mdcv->WhitePointY, chroma_den); ++ ++ mastering->max_luminance = av_make_q(mdcv->MaxDisplayMasteringLuminance, max_luma_den); ++ mastering->min_luminance = av_make_q(mdcv->MinDisplayMasteringLuminance, min_luma_den); ++ ++ mastering->has_luminance = 1; ++ mastering->has_primaries = 1; ++ } ++ ++ if (clli->InsertPayloadToggle) { ++ AVContentLightMetadata *light = av_content_light_metadata_create_side_data(frame); ++ if (!light) ++ return AVERROR(ENOMEM); ++ ++ light->MaxCLL = clli->MaxContentLightLevel; ++ light->MaxFALL = clli->MaxPicAverageLightLevel; ++ } ++ ++ return 0; ++} ++ + #endif + + static int qsv_decode(AVCodecContext *avctx, QSVContext *q, +@@ -835,6 +914,12 @@ static int qsv_decode(AVCodecContext *av + if (ret < 0) + return ret; + } ++ ++ if (QSV_RUNTIME_VERSION_ATLEAST(q->ver, 2, 9) && avctx->codec_id == AV_CODEC_ID_AV1) { ++ ret = qsv_export_hdr_side_data_av1(avctx, &aframe.frame->mdcv, &aframe.frame->clli, frame); ++ if (ret < 0) ++ return ret; ++ } + #endif + + frame->repeat_pict = +@@ -846,13 +931,23 @@ static int qsv_decode(AVCodecContext *av + frame->flags |= AV_FRAME_FLAG_INTERLACED * + !(outsurf->Info.PicStruct & MFX_PICSTRUCT_PROGRESSIVE); + frame->pict_type = ff_qsv_map_pictype(aframe.frame->dec_info.FrameType); +- //Key frame is IDR frame is only suitable for H264. For HEVC, IRAPs are key frames. +- if (avctx->codec_id == AV_CODEC_ID_H264) { ++ ++ if (avctx->codec_id == AV_CODEC_ID_H264 || ++ avctx->codec_id == AV_CODEC_ID_HEVC) { + if (aframe.frame->dec_info.FrameType & MFX_FRAMETYPE_IDR) + frame->flags |= AV_FRAME_FLAG_KEY; + else + frame->flags &= ~AV_FRAME_FLAG_KEY; ++ } else { ++ if (aframe.frame->dec_info.FrameType & MFX_FRAMETYPE_I) ++ frame->flags |= AV_FRAME_FLAG_KEY; ++ else ++ frame->flags &= ~AV_FRAME_FLAG_KEY; + } ++ frame->crop_left = outsurf->Info.CropX; ++ frame->crop_top = outsurf->Info.CropY; ++ frame->crop_right = outsurf->Info.Width - (outsurf->Info.CropX + outsurf->Info.CropW); ++ frame->crop_bottom = outsurf->Info.Height - (outsurf->Info.CropY + outsurf->Info.CropH); + + /* update the surface properties */ + if (avctx->pix_fmt == AV_PIX_FMT_QSV) +@@ -888,7 +983,7 @@ static void qsv_decode_close_qsvcontext( + ff_qsv_close_internal_session(&q->internal_qs); + + av_buffer_unref(&q->frames_ctx.hw_frames_ctx); +- av_buffer_unref(&q->frames_ctx.mids_buf); ++ ff_refstruct_unref(&q->frames_ctx.mids); + av_buffer_pool_uninit(&q->pool); + } + +@@ -1142,7 +1237,7 @@ const FFCodec ff_##x##_qsv_decoder = { \ + .p.priv_class = &x##_qsv_class, \ + .hw_configs = qsv_hw_configs, \ + .p.wrapper_name = "qsv", \ +- .caps_internal = FF_CODEC_CAP_NOT_INIT_THREADSAFE, \ ++ .caps_internal = FF_CODEC_CAP_NOT_INIT_THREADSAFE | FF_CODEC_CAP_EXPORTS_CROPPING, \ + }; \ + + #define DEFINE_QSV_DECODER(x, X, bsf_name) DEFINE_QSV_DECODER_WITH_OPTION(x, X, bsf_name, options) +Index: FFmpeg/libavcodec/qsvenc.c +=================================================================== +--- libavcodec/qsvenc.c ++++ libavcodec/qsvenc.c +@@ -31,6 +31,7 @@ + #include "libavutil/hwcontext_qsv.h" + #include "libavutil/mem.h" + #include "libavutil/log.h" ++#include "libavutil/dict.h" + #include "libavutil/time.h" + #include "libavutil/imgutils.h" + +@@ -41,6 +42,7 @@ + #include "qsv.h" + #include "qsv_internal.h" + #include "qsvenc.h" ++#include "refstruct.h" + + struct profile_names { + mfxU16 profile; +@@ -743,8 +745,9 @@ static int init_video_param_jpeg(AVCodec + if (avctx->hw_frames_ctx) { + AVHWFramesContext *frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data; + AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx; +- q->param.mfx.FrameInfo.Width = frames_hwctx->surfaces[0].Info.Width; +- q->param.mfx.FrameInfo.Height = frames_hwctx->surfaces[0].Info.Height; ++ mfxFrameInfo *info = frames_hwctx->nb_surfaces ? &frames_hwctx->surfaces[0].Info : frames_hwctx->info; ++ q->param.mfx.FrameInfo.Width = info->Width; ++ q->param.mfx.FrameInfo.Height = info->Height; + } + + if (avctx->framerate.den > 0 && avctx->framerate.num > 0) { +@@ -867,8 +870,9 @@ static int init_video_param(AVCodecConte + if (avctx->hw_frames_ctx) { + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data; + AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx; +- q->param.mfx.FrameInfo.Width = frames_hwctx->surfaces[0].Info.Width; +- q->param.mfx.FrameInfo.Height = frames_hwctx->surfaces[0].Info.Height; ++ mfxFrameInfo *info = frames_hwctx->nb_surfaces ? &frames_hwctx->surfaces[0].Info : frames_hwctx->info; ++ q->param.mfx.FrameInfo.Width = info->Width; ++ q->param.mfx.FrameInfo.Height = info->Height; + } + + if (avctx->framerate.den > 0 && avctx->framerate.num > 0) { +@@ -1633,6 +1637,12 @@ int ff_qsv_enc_init(AVCodecContext *avct + int iopattern = 0; + int opaque_alloc = 0; + int ret; ++ void *tmp; ++#if HAVE_STRUCT_MFXCONFIGINTERFACE ++ mfxExtBuffer ext_buf; ++ mfxConfigInterface *iface = NULL; ++ const AVDictionaryEntry *param = NULL; ++#endif + + q->param.AsyncDepth = q->async_depth; + +@@ -1693,35 +1703,92 @@ int ff_qsv_enc_init(AVCodecContext *avct + if (ret < 0) + return ret; + ++ tmp = av_realloc_array(q->extparam, q->nb_extparam_internal, sizeof(*q->extparam)); ++ if (!tmp) ++ return AVERROR(ENOMEM); ++ ++ q->extparam = tmp; ++ q->nb_extparam = q->nb_extparam_internal; ++ memcpy(q->extparam, q->extparam_internal, q->nb_extparam * sizeof(*q->extparam)); ++ + if (avctx->hwaccel_context) { + AVQSVContext *qsv = avctx->hwaccel_context; + int i, j; + +- q->extparam = av_calloc(qsv->nb_ext_buffers + q->nb_extparam_internal, +- sizeof(*q->extparam)); +- if (!q->extparam) +- return AVERROR(ENOMEM); +- +- q->param.ExtParam = q->extparam; +- for (i = 0; i < qsv->nb_ext_buffers; i++) +- q->param.ExtParam[i] = qsv->ext_buffers[i]; +- q->param.NumExtParam = qsv->nb_ext_buffers; +- +- for (i = 0; i < q->nb_extparam_internal; i++) { +- for (j = 0; j < qsv->nb_ext_buffers; j++) { +- if (qsv->ext_buffers[j]->BufferId == q->extparam_internal[i]->BufferId) ++ for (i = 0; i < qsv->nb_ext_buffers; i++) { ++ for (j = 0; j < q->nb_extparam_internal; j++) { ++ if (qsv->ext_buffers[i]->BufferId == q->extparam_internal[j]->BufferId) { ++ q->extparam[j] = qsv->ext_buffers[i]; + break; ++ } + } +- if (j < qsv->nb_ext_buffers) +- continue; + +- q->param.ExtParam[q->param.NumExtParam++] = q->extparam_internal[i]; ++ if (j == q->nb_extparam_internal) { ++ tmp = av_realloc_array(q->extparam, q->nb_extparam + 1, sizeof(*q->extparam)); ++ if (!tmp) ++ return AVERROR(ENOMEM); ++ ++ q->extparam = tmp; ++ q->extparam[q->nb_extparam++] = qsv->ext_buffers[i]; ++ } + } +- } else { +- q->param.ExtParam = q->extparam_internal; +- q->param.NumExtParam = q->nb_extparam_internal; + } + ++ q->param.ExtParam = q->extparam; ++ q->param.NumExtParam = q->nb_extparam; ++ ++#if HAVE_STRUCT_MFXCONFIGINTERFACE ++ ret = MFXVideoCORE_GetHandle(q->session, MFX_HANDLE_CONFIG_INTERFACE, (mfxHDL *)(&iface)); ++ if (ret < 0) ++ return ff_qsv_print_error(avctx, ret, ++ "Error getting mfx config interface handle"); ++ ++ while ((param = av_dict_get(q->qsv_params, "", param, AV_DICT_IGNORE_SUFFIX))) { ++ const char *param_key = param->key; ++ const char *param_value = param->value; ++ mfxExtBuffer *new_ext_buf; ++ void *tmp; ++ ++ av_log(avctx, AV_LOG_VERBOSE, "Parameter key: %s, value: %s\n", param_key, param_value); ++ ++ // Set encoding parameters using MFXSetParameter ++ for (int i = 0; i < 2; i++) { ++ ret = iface->SetParameter(iface, (mfxU8*)param_key, (mfxU8*)param_value, MFX_STRUCTURE_TYPE_VIDEO_PARAM, &q->param, &ext_buf); ++ if (ret == MFX_ERR_NONE) { ++ break; ++ } else if (i == 0 && ret == MFX_ERR_MORE_EXTBUFFER) { ++ tmp = av_realloc_array(q->extparam_str, q->nb_extparam_str + 1, sizeof(*q->extparam_str)); ++ if (!tmp) ++ return AVERROR(ENOMEM); ++ q->extparam_str = tmp; ++ ++ tmp = av_realloc_array(q->extparam, q->nb_extparam + 1, sizeof(*q->extparam)); ++ if (!tmp) ++ return AVERROR(ENOMEM); ++ q->extparam = tmp; ++ ++ new_ext_buf = (mfxExtBuffer*)av_mallocz(ext_buf.BufferSz); ++ if (!new_ext_buf) ++ return AVERROR(ENOMEM); ++ ++ new_ext_buf->BufferId = ext_buf.BufferId; ++ new_ext_buf->BufferSz = ext_buf.BufferSz; ++ q->extparam_str[q->nb_extparam_str++] = new_ext_buf; ++ q->extparam[q->nb_extparam++] = new_ext_buf; ++ q->param.ExtParam = q->extparam; ++ q->param.NumExtParam = q->nb_extparam; ++ } else { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set parameter: %s\n", param_key); ++ return AVERROR_UNKNOWN; ++ } ++ } ++ } ++#else ++ if (q->qsv_params) { ++ av_log(avctx, AV_LOG_WARNING, "MFX string API is not supported, ignore qsv_params option\n"); ++ } ++#endif ++ + ret = MFXVideoENCODE_Query(q->session, &q->param, &q->param); + if (ret == MFX_WRN_PARTIAL_ACCELERATION) { + av_log(avctx, AV_LOG_WARNING, "Encoder will work with partial HW acceleration\n"); +@@ -2415,7 +2482,7 @@ static int encode_frame(AVCodecContext * + + if (frame->pict_type == AV_PICTURE_TYPE_I) { + enc_ctrl->FrameType = MFX_FRAMETYPE_I | MFX_FRAMETYPE_REF; +- if (q->forced_idr) ++ if ((frame->flags & AV_FRAME_FLAG_KEY) || q->forced_idr) + enc_ctrl->FrameType |= MFX_FRAMETYPE_IDR; + } + } +@@ -2649,7 +2716,7 @@ int ff_qsv_enc_close(AVCodecContext *avc + ff_qsv_close_internal_session(&q->internal_qs); + + av_buffer_unref(&q->frames_ctx.hw_frames_ctx); +- av_buffer_unref(&q->frames_ctx.mids_buf); ++ ff_refstruct_unref(&q->frames_ctx.mids); + + cur = q->work_frames; + while (cur) { +@@ -2681,6 +2748,10 @@ int ff_qsv_enc_close(AVCodecContext *avc + av_buffer_unref(&q->opaque_alloc_buf); + #endif + ++ for (int i = 0; i < q->nb_extparam_str; i++) ++ av_free(q->extparam_str[i]); ++ ++ av_freep(&q->extparam_str); + av_freep(&q->extparam); + + return 0; +Index: FFmpeg/libavcodec/qsvenc.h +=================================================================== +--- libavcodec/qsvenc.h ++++ libavcodec/qsvenc.h +@@ -64,7 +64,8 @@ + { "slower", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_2 }, INT_MIN, INT_MAX, VE, .unit = "preset" }, \ + { "veryslow", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_BEST_QUALITY }, INT_MIN, INT_MAX, VE, .unit = "preset" }, \ + { "forced_idr", "Forcing I frames as IDR frames", OFFSET(qsv.forced_idr), AV_OPT_TYPE_BOOL,{ .i64 = 0 }, 0, 1, VE }, \ +-{ "low_power", "enable low power mode(experimental: many limitations by mfx version, BRC modes, etc.)", OFFSET(qsv.low_power), AV_OPT_TYPE_BOOL, { .i64 = -1}, -1, 1, VE}, ++{ "low_power", "enable low power mode(experimental: many limitations by mfx version, BRC modes, etc.)", OFFSET(qsv.low_power), AV_OPT_TYPE_BOOL, { .i64 = -1}, -1, 1, VE},\ ++{ "qsv_params", "Set QSV encoder parameters as key1=value1:key2=value2:...", OFFSET(qsv.qsv_params), AV_OPT_TYPE_DICT, { 0 }, 0, 0, VE }, + + #if QSV_HAVE_HE + #define QSV_HE_OPTIONS \ +@@ -195,7 +196,11 @@ typedef struct QSVEncContext { + mfxExtBuffer *extparam_internal[5 + (QSV_HAVE_MF * 2) + (QSV_HAVE_EXT_AV1_PARAM * 2) + QSV_HAVE_HE]; + int nb_extparam_internal; + ++ mfxExtBuffer **extparam_str; ++ int nb_extparam_str; ++ + mfxExtBuffer **extparam; ++ int nb_extparam; + + AVFifo *async_fifo; + +@@ -314,6 +319,8 @@ typedef struct QSVEncContext { + int skip_frame; + // This is used for Hyper Encode + int dual_gfx; ++ ++ AVDictionary *qsv_params; + } QSVEncContext; + + int ff_qsv_enc_init(AVCodecContext *avctx, QSVEncContext *q); +Index: FFmpeg/libavcodec/qsvenc_av1.c +=================================================================== +--- libavcodec/qsvenc_av1.c ++++ libavcodec/qsvenc_av1.c +@@ -25,6 +25,8 @@ + #include + + #include "libavutil/common.h" ++#include "libavutil/mastering_display_metadata.h" ++#include "libavutil/mem.h" + #include "libavutil/opt.h" + + #include "avcodec.h" +@@ -39,6 +41,75 @@ typedef struct QSVAV1EncContext { + QSVEncContext qsv; + } QSVAV1EncContext; + ++static int qsv_av1_set_encode_ctrl(AVCodecContext *avctx, ++ const AVFrame *frame, mfxEncodeCtrl *enc_ctrl) ++{ ++ QSVAV1EncContext *q = avctx->priv_data; ++ AVFrameSideData *sd; ++ ++ if (!frame || !QSV_RUNTIME_VERSION_ATLEAST(q->qsv.ver, 2, 11)) ++ return 0; ++ ++ sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); ++ if (sd) { ++ AVMasteringDisplayMetadata *mdm = (AVMasteringDisplayMetadata *)sd->data; ++ if (mdm->has_primaries && mdm->has_luminance) { ++ const int chroma_den = 1 << 16; ++ const int max_luma_den = 1 << 8; ++ const int min_luma_den = 1 << 14; ++ mfxExtMasteringDisplayColourVolume *mdcv = av_mallocz(sizeof(*mdcv)); ++ if (!mdcv) ++ return AVERROR(ENOMEM); ++ ++ mdcv->Header.BufferId = MFX_EXTBUFF_MASTERING_DISPLAY_COLOUR_VOLUME; ++ mdcv->Header.BufferSz = sizeof(*mdcv); ++ ++ for (int i = 0; i < 3; i++) { ++ mdcv->DisplayPrimariesX[i] = ++ av_rescale(mdm->display_primaries[i][0].num, chroma_den, ++ mdm->display_primaries[i][0].den); ++ mdcv->DisplayPrimariesY[i] = ++ av_rescale(mdm->display_primaries[i][1].num, chroma_den, ++ mdm->display_primaries[i][1].den); ++ } ++ ++ mdcv->WhitePointX = ++ av_rescale(mdm->white_point[0].num, chroma_den, ++ mdm->white_point[0].den); ++ mdcv->WhitePointY = ++ av_rescale(mdm->white_point[1].num, chroma_den, ++ mdm->white_point[1].den); ++ ++ mdcv->MaxDisplayMasteringLuminance = ++ av_rescale(mdm->max_luminance.num, max_luma_den, ++ mdm->max_luminance.den); ++ mdcv->MinDisplayMasteringLuminance = ++ av_rescale(mdm->min_luminance.num, min_luma_den, ++ mdm->min_luminance.den); ++ ++ enc_ctrl->ExtParam[enc_ctrl->NumExtParam++] = (mfxExtBuffer *)mdcv; ++ } ++ } ++ ++ sd = av_frame_get_side_data(frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); ++ if (sd) { ++ AVContentLightMetadata *clm = (AVContentLightMetadata *)sd->data; ++ mfxExtContentLightLevelInfo *clli = av_mallocz(sizeof(*clli)); ++ if (!clli) ++ return AVERROR(ENOMEM); ++ ++ clli->Header.BufferId = MFX_EXTBUFF_CONTENT_LIGHT_LEVEL_INFO; ++ clli->Header.BufferSz = sizeof(*clli); ++ ++ clli->MaxContentLightLevel = clm->MaxCLL; ++ clli->MaxPicAverageLightLevel = clm->MaxFALL; ++ ++ enc_ctrl->ExtParam[enc_ctrl->NumExtParam++] = (mfxExtBuffer *)clli; ++ } ++ ++ return 0; ++} ++ + static av_cold int qsv_enc_init(AVCodecContext *avctx) + { + QSVAV1EncContext *q = avctx->priv_data; +@@ -61,6 +132,8 @@ static av_cold int qsv_enc_init(AVCodecC + return ret; + } + ++ q->qsv.set_encode_ctrl_cb = qsv_av1_set_encode_ctrl; ++ + return ff_qsv_enc_init(avctx, &q->qsv); + } + +Index: FFmpeg/libavfilter/qsvvpp.c +=================================================================== +--- libavfilter/qsvvpp.c ++++ libavfilter/qsvvpp.c +@@ -307,7 +307,7 @@ static int fill_frameinfo_by_link(mfxFra + + frames_ctx = (AVHWFramesContext *)link->hw_frames_ctx->data; + frames_hwctx = frames_ctx->hwctx; +- *frameinfo = frames_hwctx->surfaces[0].Info; ++ *frameinfo = frames_hwctx->nb_surfaces ? frames_hwctx->surfaces[0].Info : *frames_hwctx->info; + } else { + pix_fmt = link->format; + desc = av_pix_fmt_desc_get(pix_fmt); +@@ -440,11 +440,6 @@ static QSVFrame *submit_frame(QSVVPPCont + av_frame_free(&qsv_frame->frame); + return NULL; + } +- +- if (av_frame_copy_props(qsv_frame->frame, picref) < 0) { +- av_frame_free(&qsv_frame->frame); +- return NULL; +- } + } else + qsv_frame->frame = av_frame_clone(picref); + +@@ -493,12 +488,6 @@ static QSVFrame *query_frame(QSVVPPConte + if (!out_frame->frame) + return NULL; + +- ret = av_frame_copy_props(out_frame->frame, in); +- if (ret < 0) { +- av_log(ctx, AV_LOG_ERROR, "Failed to copy metadata fields from src to dst.\n"); +- return NULL; +- } +- + ret = av_hwframe_get_buffer(outlink->hw_frames_ctx, out_frame->frame, 0); + if (ret < 0) { + av_log(ctx, AV_LOG_ERROR, "Can't allocate a surface.\n"); +@@ -515,12 +504,6 @@ static QSVFrame *query_frame(QSVVPPConte + if (!out_frame->frame) + return NULL; + +- ret = av_frame_copy_props(out_frame->frame, in); +- if (ret < 0) { +- av_log(ctx, AV_LOG_ERROR, "Failed to copy metadata fields from src to dst.\n"); +- return NULL; +- } +- + ret = map_frame_to_surface(out_frame->frame, + &out_frame->surface); + if (ret < 0) +@@ -603,6 +586,26 @@ static int init_vpp_session(AVFilterCont + device_ctx = (AVHWDeviceContext *)device_ref->data; + device_hwctx = device_ctx->hwctx; + ++ /* extract the properties of the "master" session given to us */ ++ ret = MFXQueryIMPL(device_hwctx->session, &impl); ++ if (ret == MFX_ERR_NONE) ++ ret = MFXQueryVersion(device_hwctx->session, &ver); ++ if (ret != MFX_ERR_NONE) { ++ av_log(avctx, AV_LOG_ERROR, "Error querying the session attributes\n"); ++ return AVERROR_UNKNOWN; ++ } ++ ++ if (MFX_IMPL_VIA_VAAPI == MFX_IMPL_VIA_MASK(impl)) { ++ handle_type = MFX_HANDLE_VA_DISPLAY; ++ } else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(impl)) { ++ handle_type = MFX_HANDLE_D3D11_DEVICE; ++ } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(impl)) { ++ handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER; ++ } else { ++ av_log(avctx, AV_LOG_ERROR, "Error unsupported handle type\n"); ++ return AVERROR_UNKNOWN; ++ } ++ + if (outlink->format == AV_PIX_FMT_QSV) { + AVHWFramesContext *out_frames_ctx; + AVBufferRef *out_frames_ref = av_hwframe_ctx_alloc(device_ref); +@@ -624,9 +627,15 @@ static int init_vpp_session(AVFilterCont + out_frames_ctx->width = FFALIGN(outlink->w, 32); + out_frames_ctx->height = FFALIGN(outlink->h, 32); + out_frames_ctx->sw_format = s->out_sw_format; +- out_frames_ctx->initial_pool_size = 64; +- if (avctx->extra_hw_frames > 0) +- out_frames_ctx->initial_pool_size += avctx->extra_hw_frames; ++ ++ if (QSV_RUNTIME_VERSION_ATLEAST(ver, 2, 9) && handle_type != MFX_HANDLE_D3D9_DEVICE_MANAGER) ++ out_frames_ctx->initial_pool_size = 0; ++ else { ++ out_frames_ctx->initial_pool_size = 64; ++ if (avctx->extra_hw_frames > 0) ++ out_frames_ctx->initial_pool_size += avctx->extra_hw_frames; ++ } ++ + out_frames_hwctx->frame_type = s->out_mem_mode; + + ret = av_hwframe_ctx_init(out_frames_ref); +@@ -652,26 +661,6 @@ static int init_vpp_session(AVFilterCont + } else + s->out_mem_mode = MFX_MEMTYPE_SYSTEM_MEMORY; + +- /* extract the properties of the "master" session given to us */ +- ret = MFXQueryIMPL(device_hwctx->session, &impl); +- if (ret == MFX_ERR_NONE) +- ret = MFXQueryVersion(device_hwctx->session, &ver); +- if (ret != MFX_ERR_NONE) { +- av_log(avctx, AV_LOG_ERROR, "Error querying the session attributes\n"); +- return AVERROR_UNKNOWN; +- } +- +- if (MFX_IMPL_VIA_VAAPI == MFX_IMPL_VIA_MASK(impl)) { +- handle_type = MFX_HANDLE_VA_DISPLAY; +- } else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(impl)) { +- handle_type = MFX_HANDLE_D3D11_DEVICE; +- } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(impl)) { +- handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER; +- } else { +- av_log(avctx, AV_LOG_ERROR, "Error unsupported handle type\n"); +- return AVERROR_UNKNOWN; +- } +- + ret = MFXVideoCORE_GetHandle(device_hwctx->session, handle_type, &handle); + if (ret < 0) + return ff_qsvvpp_print_error(avctx, ret, "Error getting the session handle"); +@@ -957,7 +946,7 @@ int ff_qsvvpp_close(AVFilterContext *avc + return 0; + } + +-int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picref) ++int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picref, AVFrame *propref) + { + AVFilterContext *ctx = inlink->dst; + AVFilterLink *outlink = ctx->outputs[0]; +@@ -1014,6 +1003,16 @@ int ff_qsvvpp_filter_frame(QSVVPPContext + return AVERROR(EAGAIN); + break; + } ++ ++ if (propref) { ++ ret1 = av_frame_copy_props(out_frame->frame, propref); ++ if (ret1 < 0) { ++ av_frame_free(&out_frame->frame); ++ av_log(ctx, AV_LOG_ERROR, "Failed to copy metadata fields from src to dst.\n"); ++ return ret1; ++ } ++ } ++ + out_frame->frame->pts = av_rescale_q(out_frame->surface.Data.TimeStamp, + default_tb, outlink->time_base); + +@@ -1099,11 +1098,6 @@ int ff_qsvvpp_create_mfx_session(void *c + if (sts < 0) + return ff_qsvvpp_print_error(ctx, sts, + "Error creating a MFX session"); +- else if (sts > 0) { +- ff_qsvvpp_print_warning(ctx, sts, +- "Warning in MFX session creation"); +- return AVERROR_UNKNOWN; +- } + + *psession = session; + +Index: FFmpeg/libavfilter/qsvvpp.h +=================================================================== +--- libavfilter/qsvvpp.h ++++ libavfilter/qsvvpp.h +@@ -131,7 +131,7 @@ int ff_qsvvpp_init(AVFilterContext *avct + int ff_qsvvpp_close(AVFilterContext *avctx); + + /* vpp filter frame and call the cb if needed */ +-int ff_qsvvpp_filter_frame(QSVVPPContext *vpp, AVFilterLink *inlink, AVFrame *frame); ++int ff_qsvvpp_filter_frame(QSVVPPContext *vpp, AVFilterLink *inlink, AVFrame *frame, AVFrame *propref); + + int ff_qsvvpp_print_iopattern(void *log_ctx, int mfx_iopattern, + const char *extra_string); +Index: FFmpeg/libavfilter/vf_overlay_qsv.c +=================================================================== +--- libavfilter/vf_overlay_qsv.c ++++ libavfilter/vf_overlay_qsv.c +@@ -230,13 +230,16 @@ static int process_frame(FFFrameSync *fs + { + AVFilterContext *ctx = fs->parent; + QSVVPPContext *qsv = fs->opaque; +- AVFrame *frame = NULL; ++ AVFrame *frame = NULL, *propref = NULL; + int ret = 0, i; + + for (i = 0; i < ctx->nb_inputs; i++) { + ret = ff_framesync_get_frame(fs, i, &frame, 0); +- if (ret == 0) +- ret = ff_qsvvpp_filter_frame(qsv, ctx->inputs[i], frame); ++ if (ret == 0) { ++ if (i == 0) ++ propref = frame; ++ ret = ff_qsvvpp_filter_frame(qsv, ctx->inputs[i], frame, propref); ++ } + if (ret < 0 && ret != AVERROR(EAGAIN)) + break; + } +Index: FFmpeg/libavfilter/vf_stack_qsv.c +=================================================================== +--- libavfilter/vf_stack_qsv.c ++++ libavfilter/vf_stack_qsv.c +@@ -70,13 +70,16 @@ static int process_frame(FFFrameSync *fs + { + AVFilterContext *ctx = fs->parent; + QSVVPPContext *qsv = fs->opaque; +- AVFrame *frame = NULL; ++ AVFrame *frame = NULL, *propref = NULL; + int ret = 0; + + for (int i = 0; i < ctx->nb_inputs; i++) { + ret = ff_framesync_get_frame(fs, i, &frame, 0); +- if (ret == 0) +- ret = ff_qsvvpp_filter_frame(qsv, ctx->inputs[i], frame); ++ if (ret == 0) { ++ if (i == 0) ++ propref = frame; ++ ret = ff_qsvvpp_filter_frame(qsv, ctx->inputs[i], frame, propref); ++ } + if (ret < 0 && ret != AVERROR(EAGAIN)) + break; + } +Index: FFmpeg/libavfilter/vf_vpp_qsv.c +=================================================================== +--- libavfilter/vf_vpp_qsv.c ++++ libavfilter/vf_vpp_qsv.c +@@ -748,7 +748,7 @@ static int activate(AVFilterContext *ctx + + if (qsv->session) { + if (in || qsv->eof) { +- ret = ff_qsvvpp_filter_frame(qsv, inlink, in); ++ ret = ff_qsvvpp_filter_frame(qsv, inlink, in, in); + av_frame_free(&in); + if (ret == AVERROR(EAGAIN)) + goto not_ready; +Index: FFmpeg/libavutil/hwcontext_qsv.c +=================================================================== +--- libavutil/hwcontext_qsv.c ++++ libavutil/hwcontext_qsv.c +@@ -119,8 +119,15 @@ typedef struct QSVFramesContext { + #endif + AVFrame realigned_upload_frame; + AVFrame realigned_download_frame; ++ ++ mfxFrameInfo frame_info; + } QSVFramesContext; + ++typedef struct QSVSurface { ++ mfxFrameSurface1 mfx_surface; ++ AVFrame *child_frame; ++} QSVSurface; ++ + static const struct { + enum AVPixelFormat pix_fmt; + uint32_t fourcc; +@@ -166,6 +173,8 @@ extern int ff_qsv_get_surface_base_handl + enum AVHWDeviceType base_dev_type, + void **base_handle); + ++static int qsv_init_surface(AVHWFramesContext *ctx, mfxFrameSurface1 *surf); ++ + /** + * Caller needs to allocate enough space for base_handle pointer. + **/ +@@ -374,7 +383,32 @@ static void qsv_release_dummy(void *opaq + { + } + +-static AVBufferRef *qsv_pool_alloc(void *opaque, size_t size) ++static void qsv_pool_release(void *opaque, uint8_t *data) ++{ ++ AVHWFramesContext *ctx = (AVHWFramesContext*)opaque; ++ QSVFramesContext *s = ctx->hwctx; ++ QSVSurface *qsv_surface = (QSVSurface *)data; ++ mfxHDLPair *hdl_pair = (mfxHDLPair *)qsv_surface->mfx_surface.Data.MemId; ++ AVHWFramesContext *child_frames_ctx; ++ ++ if (!s->child_frames_ref) ++ return; ++ ++ child_frames_ctx = (AVHWFramesContext*)s->child_frames_ref->data; ++ if (!child_frames_ctx->device_ctx) ++ return; ++ ++#if CONFIG_VAAPI ++ if (child_frames_ctx->device_ctx->type == AV_HWDEVICE_TYPE_VAAPI) ++ av_freep(&hdl_pair->first); ++#endif ++ ++ av_freep(&hdl_pair); ++ av_frame_free(&qsv_surface->child_frame); ++ av_freep(&qsv_surface); ++} ++ ++static AVBufferRef *qsv_fixed_pool_alloc(void *opaque, size_t size) + { + AVHWFramesContext *ctx = (AVHWFramesContext*)opaque; + QSVFramesContext *s = ctx->hwctx; +@@ -389,6 +423,104 @@ static AVBufferRef *qsv_pool_alloc(void + return NULL; + } + ++static AVBufferRef *qsv_dynamic_pool_alloc(void *opaque, size_t size) ++{ ++ AVHWFramesContext *ctx = (AVHWFramesContext*)opaque; ++ QSVFramesContext *s = ctx->hwctx; ++ AVHWFramesContext *child_frames_ctx; ++ QSVSurface *qsv_surface = NULL; ++ mfxHDLPair *handle_pairs_internal = NULL; ++ int ret; ++ ++ if (!s->child_frames_ref) ++ goto fail; ++ ++ child_frames_ctx = (AVHWFramesContext*)s->child_frames_ref->data; ++ if (!child_frames_ctx->device_ctx) ++ goto fail; ++ ++#if CONFIG_DXVA2 ++ if (child_frames_ctx->device_ctx->type == AV_HWDEVICE_TYPE_DXVA2) { ++ av_log(ctx, AV_LOG_ERROR, ++ "QSV on dxva2 requires a fixed frame pool size\n"); ++ goto fail; ++ } ++#endif ++ ++ qsv_surface = av_calloc(1, sizeof(*qsv_surface)); ++ if (!qsv_surface) ++ goto fail; ++ ++ qsv_surface->child_frame = av_frame_alloc(); ++ if (!qsv_surface->child_frame) ++ goto fail; ++ ++ ret = av_hwframe_get_buffer(s->child_frames_ref, qsv_surface->child_frame, 0); ++ if (ret < 0) ++ goto fail; ++ ++ handle_pairs_internal = av_calloc(1, sizeof(*handle_pairs_internal)); ++ if (!handle_pairs_internal) ++ goto fail; ++ ++ ret = qsv_init_surface(ctx, &qsv_surface->mfx_surface); ++ if (ret < 0) ++ goto fail; ++ ++#if CONFIG_VAAPI ++ if (child_frames_ctx->device_ctx->type == AV_HWDEVICE_TYPE_VAAPI) { ++ VASurfaceID *surface_id_internal; ++ ++ surface_id_internal = av_calloc(1, sizeof(*surface_id_internal)); ++ if (!surface_id_internal) ++ goto fail; ++ ++ *surface_id_internal = (VASurfaceID)(uintptr_t)qsv_surface->child_frame->data[3]; ++ handle_pairs_internal->first = (mfxHDL)surface_id_internal; ++ handle_pairs_internal->second = (mfxMemId)MFX_INFINITE; ++ } ++#endif ++ ++#if CONFIG_D3D11VA ++ if (child_frames_ctx->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) { ++ AVD3D11VAFramesContext *child_frames_hwctx = child_frames_ctx->hwctx; ++ handle_pairs_internal->first = (mfxMemId)qsv_surface->child_frame->data[0]; ++ ++ if (child_frames_hwctx->BindFlags & D3D11_BIND_RENDER_TARGET) ++ handle_pairs_internal->second = (mfxMemId)MFX_INFINITE; ++ else ++ handle_pairs_internal->second = (mfxMemId)qsv_surface->child_frame->data[1]; ++ ++ } ++#endif ++ ++ qsv_surface->mfx_surface.Data.MemId = (mfxMemId)handle_pairs_internal; ++ return av_buffer_create((uint8_t *)qsv_surface, sizeof(*qsv_surface), ++ qsv_pool_release, ctx, 0); ++ ++fail: ++ if (qsv_surface) { ++ av_frame_free(&qsv_surface->child_frame); ++ } ++ ++ av_freep(&qsv_surface); ++ av_freep(&handle_pairs_internal); ++ ++ return NULL; ++} ++ ++static AVBufferRef *qsv_pool_alloc(void *opaque, size_t size) ++{ ++ AVHWFramesContext *ctx = (AVHWFramesContext*)opaque; ++ AVQSVFramesContext *hwctx = ctx->hwctx; ++ ++ if (hwctx->nb_surfaces == 0) { ++ return qsv_dynamic_pool_alloc(opaque, size); ++ } else { ++ return qsv_fixed_pool_alloc(opaque, size); ++ } ++} ++ + static int qsv_init_child_ctx(AVHWFramesContext *ctx) + { + QSVDeviceContext *device_priv = ctx->device_ctx->hwctx; +@@ -577,9 +709,28 @@ static int qsv_init_pool(AVHWFramesConte + + int i, ret = 0; + +- if (ctx->initial_pool_size <= 0) { +- av_log(ctx, AV_LOG_ERROR, "QSV requires a fixed frame pool size\n"); ++ if (ctx->initial_pool_size < 0) { ++ av_log(ctx, AV_LOG_ERROR, "Invalid frame pool size\n"); + return AVERROR(EINVAL); ++ } else if (ctx->initial_pool_size == 0) { ++ mfxFrameSurface1 mfx_surf1; ++ ++ ret = qsv_init_child_ctx(ctx); ++ if (ret < 0) ++ return ret; ++ ++ ffhwframesctx(ctx)->pool_internal = av_buffer_pool_init2(sizeof(mfxFrameSurface1), ++ ctx, qsv_pool_alloc, NULL); ++ if (!ffhwframesctx(ctx)->pool_internal) ++ return AVERROR(ENOMEM); ++ ++ memset(&mfx_surf1, 0, sizeof(mfx_surf1)); ++ qsv_init_surface(ctx, &mfx_surf1); ++ s->frame_info = mfx_surf1.Info; ++ frames_hwctx->info = &s->frame_info; ++ frames_hwctx->nb_surfaces = 0; ++ ++ return 0; + } + + s->handle_pairs_internal = av_calloc(ctx->initial_pool_size, +@@ -628,7 +779,7 @@ static mfxStatus frame_alloc(mfxHDL pthi + QSVFramesContext *s = ctx->hwctx; + AVQSVFramesContext *hwctx = &s->p; + mfxFrameInfo *i = &req->Info; +- mfxFrameInfo *i1 = &hwctx->surfaces[0].Info; ++ mfxFrameInfo *i1 = hwctx->nb_surfaces ? &hwctx->surfaces[0].Info : hwctx->info; + + if (!(req->Type & MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET) || + !(req->Type & (MFX_MEMTYPE_FROM_VPPIN | MFX_MEMTYPE_FROM_VPPOUT)) || +@@ -1223,7 +1374,7 @@ static int qsv_init_internal_session(AVH + MFX_IOPATTERN_OUT_SYSTEM_MEMORY; + par.AsyncDepth = 1; + +- par.vpp.In = frames_hwctx->surfaces[0].Info; ++ par.vpp.In = frames_hwctx->nb_surfaces ? frames_hwctx->surfaces[0].Info : *frames_hwctx->info; + + /* Apparently VPP requires the frame rate to be set to some value, otherwise + * init will fail (probably for the framerate conversion filter). Since we +@@ -1398,8 +1549,11 @@ static int qsv_frames_derive_from(AVHWFr + dst_hwctx->texture_infos[i].texture = (ID3D11Texture2D*)pair->first; + dst_hwctx->texture_infos[i].index = pair->second == (mfxMemId)MFX_INFINITE ? (intptr_t)0 : (intptr_t)pair->second; + } +- ID3D11Texture2D_GetDesc(dst_hwctx->texture_infos[0].texture, &texDesc); +- dst_hwctx->BindFlags = texDesc.BindFlags; ++ if (src_hwctx->nb_surfaces) { ++ ID3D11Texture2D_GetDesc(dst_hwctx->texture_infos[0].texture, &texDesc); ++ dst_hwctx->BindFlags = texDesc.BindFlags; ++ } else ++ dst_hwctx->BindFlags = qsv_get_d3d11va_bind_flags(src_hwctx->frame_type); + } + break; + #endif +@@ -1838,18 +1992,52 @@ static int qsv_transfer_data_to(AVHWFram + return 0; + } + +-static int qsv_frames_derive_to(AVHWFramesContext *dst_ctx, +- AVHWFramesContext *src_ctx, int flags) ++static int qsv_dynamic_frames_derive_to(AVHWFramesContext *dst_ctx, ++ AVHWFramesContext *src_ctx, int flags) + { + QSVFramesContext *s = dst_ctx->hwctx; + AVQSVFramesContext *dst_hwctx = &s->p; +- int i; ++ mfxFrameSurface1 mfx_surf1; + +- if (src_ctx->initial_pool_size == 0) { +- av_log(dst_ctx, AV_LOG_ERROR, "Only fixed-size pools can be " +- "mapped to QSV frames.\n"); +- return AVERROR(EINVAL); ++ switch (src_ctx->device_ctx->type) { ++#if CONFIG_VAAPI ++ case AV_HWDEVICE_TYPE_VAAPI: ++ dst_hwctx->frame_type = MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET; ++ break; ++#endif ++ ++#if CONFIG_D3D11VA ++ case AV_HWDEVICE_TYPE_D3D11VA: ++ { ++ AVD3D11VAFramesContext *src_hwctx = src_ctx->hwctx; ++ ++ if (src_hwctx->BindFlags & D3D11_BIND_RENDER_TARGET) { ++ dst_hwctx->frame_type |= MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET; ++ } else { ++ dst_hwctx->frame_type |= MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET; ++ } + } ++ break; ++#endif ++ ++ default: ++ return AVERROR(ENOSYS); ++ } ++ ++ memset(&mfx_surf1, 0, sizeof(mfx_surf1)); ++ qsv_init_surface(dst_ctx, &mfx_surf1); ++ s->frame_info = mfx_surf1.Info; ++ dst_hwctx->info = &s->frame_info; ++ dst_hwctx->nb_surfaces = 0; ++ return 0; ++} ++ ++static int qsv_fixed_frames_derive_to(AVHWFramesContext *dst_ctx, ++ AVHWFramesContext *src_ctx, int flags) ++{ ++ QSVFramesContext *s = dst_ctx->hwctx; ++ AVQSVFramesContext *dst_hwctx = &s->p; ++ int i; + + switch (src_ctx->device_ctx->type) { + #if CONFIG_VAAPI +@@ -1941,8 +2129,21 @@ static int qsv_frames_derive_to(AVHWFram + return 0; + } + +-static int qsv_map_to(AVHWFramesContext *dst_ctx, +- AVFrame *dst, const AVFrame *src, int flags) ++static int qsv_frames_derive_to(AVHWFramesContext *dst_ctx, ++ AVHWFramesContext *src_ctx, int flags) ++{ ++ if (src_ctx->initial_pool_size < 0) { ++ av_log(dst_ctx, AV_LOG_ERROR, "Invalid src frame pool. \n"); ++ return AVERROR(EINVAL); ++ } else if (src_ctx->initial_pool_size == 0) { ++ return qsv_dynamic_frames_derive_to(dst_ctx, src_ctx, flags); ++ } else { ++ return qsv_fixed_frames_derive_to(dst_ctx, src_ctx, flags); ++ } ++} ++ ++static int qsv_fixed_pool_map_to(AVHWFramesContext *dst_ctx, ++ AVFrame *dst, const AVFrame *src, int flags) + { + AVQSVFramesContext *hwctx = dst_ctx->hwctx; + int i, err, index = -1; +@@ -2001,6 +2202,133 @@ static int qsv_map_to(AVHWFramesContext + return 0; + } + ++static void qsv_dynamic_pool_unmap(AVHWFramesContext *ctx, HWMapDescriptor *hwmap) ++{ ++ mfxFrameSurface1 *surfaces_internal = (mfxFrameSurface1 *)hwmap->priv; ++ mfxHDLPair *handle_pairs_internal = (mfxHDLPair *)surfaces_internal->Data.MemId; ++ AVHWFramesContext *src_ctx = (AVHWFramesContext *)ffhwframesctx(ctx)->source_frames->data; ++ ++ switch (src_ctx->format) { ++#if CONFIG_VAAPI ++ case AV_PIX_FMT_VAAPI: ++ { ++ av_freep(&handle_pairs_internal->first); ++ ++ break; ++ } ++#endif ++ ++#if CONFIG_D3D11VA ++ case AV_PIX_FMT_D3D11: ++ { ++ /* Do nothing */ ++ break; ++ } ++#endif ++ default: ++ av_log(ctx, AV_LOG_ERROR, "Should not reach here. \n"); ++ break; ++ } ++ ++ av_freep(&handle_pairs_internal); ++ av_freep(&surfaces_internal); ++} ++ ++static int qsv_dynamic_pool_map_to(AVHWFramesContext *dst_ctx, ++ AVFrame *dst, const AVFrame *src, int flags) ++{ ++ mfxFrameSurface1 *surfaces_internal = NULL; ++ mfxHDLPair *handle_pairs_internal = NULL; ++ int ret = 0; ++ ++ surfaces_internal = av_calloc(1, sizeof(*surfaces_internal)); ++ if (!surfaces_internal) { ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ handle_pairs_internal = av_calloc(1, sizeof(*handle_pairs_internal)); ++ if (!handle_pairs_internal) { ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ ret = qsv_init_surface(dst_ctx, surfaces_internal); ++ if (ret < 0) ++ goto fail; ++ ++ switch (src->format) { ++#if CONFIG_VAAPI ++ case AV_PIX_FMT_VAAPI: ++ { ++ VASurfaceID *surface_id_internal; ++ ++ surface_id_internal = av_calloc(1, sizeof(*surface_id_internal)); ++ if (!surface_id_internal) { ++ ret =AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ *surface_id_internal = (VASurfaceID)(uintptr_t)src->data[3]; ++ handle_pairs_internal->first = (mfxHDL)surface_id_internal; ++ handle_pairs_internal->second = (mfxMemId)MFX_INFINITE; ++ ++ break; ++ } ++#endif ++ ++#if CONFIG_D3D11VA ++ case AV_PIX_FMT_D3D11: ++ { ++ AVHWFramesContext *src_ctx = (AVHWFramesContext*)src->hw_frames_ctx->data; ++ AVD3D11VAFramesContext *src_hwctx = src_ctx->hwctx; ++ ++ handle_pairs_internal->first = (mfxMemId)src->data[0]; ++ ++ if (src_hwctx->BindFlags & D3D11_BIND_RENDER_TARGET) { ++ handle_pairs_internal->second = (mfxMemId)MFX_INFINITE; ++ } else { ++ handle_pairs_internal->second = (mfxMemId)src->data[1]; ++ } ++ ++ break; ++ } ++#endif ++ default: ++ ret = AVERROR(ENOSYS); ++ goto fail; ++ } ++ ++ surfaces_internal->Data.MemId = (mfxMemId)handle_pairs_internal; ++ ++ ret = ff_hwframe_map_create(dst->hw_frames_ctx, ++ dst, src, qsv_dynamic_pool_unmap, surfaces_internal); ++ if (ret) ++ goto fail; ++ ++ dst->width = src->width; ++ dst->height = src->height; ++ dst->data[3] = (uint8_t*)surfaces_internal; ++ ++ return 0; ++ ++fail: ++ av_freep(&handle_pairs_internal); ++ av_freep(&surfaces_internal); ++ return ret; ++} ++ ++static int qsv_map_to(AVHWFramesContext *dst_ctx, ++ AVFrame *dst, const AVFrame *src, int flags) ++{ ++ AVQSVFramesContext *hwctx = dst_ctx->hwctx; ++ ++ if (hwctx->nb_surfaces) ++ return qsv_fixed_pool_map_to(dst_ctx, dst, src, flags); ++ else ++ return qsv_dynamic_pool_map_to(dst_ctx, dst, src, flags); ++} ++ + static int qsv_frames_get_constraints(AVHWDeviceContext *ctx, + const void *hwconfig, + AVHWFramesConstraints *constraints) +Index: FFmpeg/libavutil/hwcontext_qsv.h +=================================================================== +--- libavutil/hwcontext_qsv.h ++++ libavutil/hwcontext_qsv.h +@@ -25,8 +25,8 @@ + * @file + * An API-specific header for AV_HWDEVICE_TYPE_QSV. + * +- * This API does not support dynamic frame pools. AVHWFramesContext.pool must +- * contain AVBufferRefs whose data pointer points to an mfxFrameSurface1 struct. ++ * AVHWFramesContext.pool must contain AVBufferRefs whose data pointer points ++ * to a mfxFrameSurface1 struct. + */ + + /** +@@ -51,13 +51,36 @@ typedef struct AVQSVDeviceContext { + * This struct is allocated as AVHWFramesContext.hwctx + */ + typedef struct AVQSVFramesContext { ++ /** ++ * A pointer to a mfxFrameSurface1 struct ++ * ++ * It is available when nb_surfaces is non-zero. ++ */ + mfxFrameSurface1 *surfaces; ++ ++ /** ++ * Number of frames in the pool ++ * ++ * It is 0 for dynamic frame pools or AVHWFramesContext.initial_pool_size ++ * for fixed frame pools. ++ * ++ * Note only oneVPL GPU runtime 2.9+ can support dynamic frame pools ++ * on d3d11va or vaapi ++ */ + int nb_surfaces; + + /** + * A combination of MFX_MEMTYPE_* describing the frame pool. + */ + int frame_type; ++ ++ /** ++ * A pointer to a mfxFrameInfo struct ++ * ++ * It is available when nb_surfaces is 0, all buffers allocated from the ++ * pool have the same mfxFrameInfo. ++ */ ++ mfxFrameInfo *info; + } AVQSVFramesContext; + + #endif /* AVUTIL_HWCONTEXT_QSV_H */ diff --git a/cross/ffmpeg7/patches/1020-jellyfin-0020-backport-fixes-for-qsv-from-cartwheel.patch b/cross/ffmpeg7/patches/1020-jellyfin-0020-backport-fixes-for-qsv-from-cartwheel.patch new file mode 100644 index 00000000000..e1c3e353e19 --- /dev/null +++ b/cross/ffmpeg7/patches/1020-jellyfin-0020-backport-fixes-for-qsv-from-cartwheel.patch @@ -0,0 +1,343 @@ +Index: FFmpeg/libavcodec/qsv_internal.h +=================================================================== +--- libavcodec/qsv_internal.h ++++ libavcodec/qsv_internal.h +@@ -56,6 +56,8 @@ + + #define QSV_MAX_FRAME_EXT_PARAMS 4 + ++#define QSV_PAYLOAD_SIZE 1024 ++ + #define QSV_VERSION_ATLEAST(MAJOR, MINOR) \ + (MFX_VERSION_MAJOR > (MAJOR) || \ + MFX_VERSION_MAJOR == (MAJOR) && MFX_VERSION_MINOR >= (MINOR)) +Index: FFmpeg/libavcodec/qsvdec.c +=================================================================== +--- libavcodec/qsvdec.c ++++ libavcodec/qsvdec.c +@@ -43,13 +43,16 @@ + #include "libavutil/film_grain_params.h" + #include "libavutil/mastering_display_metadata.h" + #include "libavutil/avassert.h" ++#include "libavutil/stereo3d.h" + + #include "avcodec.h" + #include "codec_internal.h" + #include "internal.h" + #include "decode.h" + #include "hwconfig.h" ++#include "get_bits.h" + #include "qsv.h" ++#include "h264_sei.h" + #include "qsv_internal.h" + #include "refstruct.h" + +@@ -111,8 +114,13 @@ typedef struct QSVContext { + + char *load_plugins; + ++ mfxPayload payload; ++ + mfxExtBuffer **ext_buffers; + int nb_ext_buffers; ++ ++ H264SEIContext sei; ++ H264ParamSets ps; + } QSVContext; + + static const AVCodecHWConfigInternal *const qsv_hw_configs[] = { +@@ -784,6 +792,147 @@ static int qsv_export_hdr_side_data_av1( + + #endif + ++static int h264_decode_fpa(H2645SEIFramePacking *fpa, AVFrame *frame) ++{ ++ if (!fpa || !frame) { ++ return AVERROR(EINVAL); ++ } ++ ++ if (!fpa->arrangement_cancel_flag && ++ fpa->arrangement_type <= 6 && ++ fpa->content_interpretation_type > 0 && ++ fpa->content_interpretation_type < 3) { ++ AVStereo3D *stereo = av_stereo3d_create_side_data(frame); ++ if (stereo) { ++ switch (fpa->arrangement_type) { ++ case 0: ++ stereo->type = AV_STEREO3D_CHECKERBOARD; ++ break; ++ case 1: ++ stereo->type = AV_STEREO3D_COLUMNS; ++ break; ++ case 2: ++ stereo->type = AV_STEREO3D_LINES; ++ break; ++ case 3: ++ if (fpa->quincunx_sampling_flag) ++ stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX; ++ else ++ stereo->type = AV_STEREO3D_SIDEBYSIDE; ++ break; ++ case 4: ++ stereo->type = AV_STEREO3D_TOPBOTTOM; ++ break; ++ case 5: ++ stereo->type = AV_STEREO3D_FRAMESEQUENCE; ++ if (fpa->current_frame_is_frame0_flag) ++ stereo->view = AV_STEREO3D_VIEW_LEFT; ++ else ++ stereo->view = AV_STEREO3D_VIEW_RIGHT; ++ break; ++ case 6: ++ stereo->type = AV_STEREO3D_2D; ++ break; ++ } ++ ++ if (fpa->content_interpretation_type == 2) ++ stereo->flags = AV_STEREO3D_FLAG_INVERT; ++ } ++ } ++ return 0; ++} ++ ++static int h264_parse_side_data(AVCodecContext *avctx, QSVContext *q, AVFrame *frame) ++{ ++ GetBitContext gb_payload; ++ uint8_t *sei_buffer; ++ int sei_buffer_index; ++ int ret; ++ ++ /* remove emulation prevention bytes */ ++ sei_buffer = (uint8_t *)av_mallocz(q->payload.NumBit / 8); ++ if (!sei_buffer) { ++ av_freep(&sei_buffer); ++ return AVERROR(ENOMEM); ++ } ++ sei_buffer_index = 0; ++ for (int i = 0; i < q->payload.NumBit / 8; i++) { ++ if (q->payload.Data[i] == 3) ++ i++; ++ sei_buffer[sei_buffer_index] = q->payload.Data[i]; ++ sei_buffer_index += 1; ++ } ++ ++ ret = init_get_bits8(&gb_payload, sei_buffer, sei_buffer_index+1); ++ if (ret < 0) { ++ av_freep(&sei_buffer); ++ return ret; ++ } ++ ++ ret = ff_h264_sei_decode(&q->sei, &gb_payload, &q->ps, avctx); ++ if (ret < 0) { ++ av_freep(&sei_buffer); ++ return ret; ++ } ++ ++ switch (q->payload.Type) { ++ case SEI_TYPE_FRAME_PACKING_ARRANGEMENT: ++ ret = h264_decode_fpa(&q->sei.common.frame_packing, frame); ++ break; ++ default: ++ break; ++ } ++ ++ av_freep(&sei_buffer); ++ return ret; ++} ++ ++static int extract_frame_side_data(AVCodecContext *avctx, QSVContext *q, AVFrame *frame) ++{ ++ mfxU64 ts; ++ mfxStatus sts; ++ int ret = 0; ++ ++ if (q->payload.BufSize == 0) { ++ q->payload.Data = av_mallocz(QSV_PAYLOAD_SIZE); ++ if (!q->payload.Data) { ++ av_freep(&q->payload.Data); ++ return AVERROR(ENOMEM); ++ } ++ q->payload.BufSize = QSV_PAYLOAD_SIZE; ++ } ++ ++ sts = MFX_ERR_NONE; ++ while (sts == MFX_ERR_NONE) { ++ ++ sts = MFXVideoDECODE_GetPayload(q->session, &ts, &q->payload); ++ ++ if (sts == MFX_ERR_NOT_ENOUGH_BUFFER) { ++ av_log(avctx, AV_LOG_VERBOSE, "Space for SEI is not enough. One SEI will be skipped\n"); ++ continue; ++ } else if (sts != MFX_ERR_NONE || q->payload.NumBit == 0) { ++ break; ++ } ++ ++ if (q->payload.Type != SEI_TYPE_FRAME_PACKING_ARRANGEMENT) ++ continue; ++ ++ switch (avctx->codec_id) { ++ case AV_CODEC_ID_H264: ++ ret = h264_parse_side_data(avctx, q, frame); ++ break; ++ default: ++ break; ++ } ++ ++ if (ret < 0) { ++ av_log(avctx, AV_LOG_VERBOSE, "parse side data failed\n"); ++ break; ++ } ++ } ++ return ret; ++} ++ + static int qsv_decode(AVCodecContext *avctx, QSVContext *q, + AVFrame *frame, int *got_frame, + const AVPacket *avpkt) +@@ -895,6 +1044,10 @@ static int qsv_decode(AVCodecContext *av + + outsurf = &aframe.frame->surface; + ++ ret = extract_frame_side_data(avctx, q, frame); ++ if (ret < 0) ++ av_log(avctx, AV_LOG_WARNING, "Extracting side from packet failed\n"); ++ + frame->pts = MFX_PTS_TO_PTS(outsurf->Data.TimeStamp, avctx->pkt_timebase); + #if QSV_VERSION_ATLEAST(1, 34) + if ((avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN) && +@@ -985,6 +1138,8 @@ static void qsv_decode_close_qsvcontext( + av_buffer_unref(&q->frames_ctx.hw_frames_ctx); + ff_refstruct_unref(&q->frames_ctx.mids); + av_buffer_pool_uninit(&q->pool); ++ ++ av_freep(&q->payload.Data); + } + + static int qsv_process_data(AVCodecContext *avctx, QSVContext *q, +Index: FFmpeg/libavcodec/qsvenc.c +=================================================================== +--- libavcodec/qsvenc.c ++++ libavcodec/qsvenc.c +@@ -205,6 +205,7 @@ static void dump_video_param(AVCodecCont + #endif + + const char *tmp_str = NULL; ++ mfxExtHEVCParam *exthevcparam = NULL; + + if (q->co2_idx > 0) + co2 = (mfxExtCodingOption2*)coding_opts[q->co2_idx]; +@@ -220,6 +221,8 @@ static void dump_video_param(AVCodecCont + exthypermodeparam = (mfxExtHyperModeParam *)coding_opts[q->exthypermodeparam_idx]; + #endif + ++ if (q->exthevcparam_idx > 0) ++ exthevcparam = (mfxExtHEVCParam *)coding_opts[q->exthevcparam_idx]; + av_log(avctx, AV_LOG_VERBOSE, "profile: %s; level: %"PRIu16"\n", + print_profile(avctx->codec_id, info->CodecProfile), info->CodecLevel); + +@@ -400,6 +403,11 @@ static void dump_video_param(AVCodecCont + av_log(avctx, AV_LOG_VERBOSE, "\n"); + } + #endif ++ if (exthevcparam && ++ exthevcparam->GeneralConstraintFlags == MFX_HEVC_CONSTR_REXT_ONE_PICTURE_ONLY && ++ avctx->codec_id == AV_CODEC_ID_HEVC && ++ info->CodecProfile == MFX_PROFILE_HEVC_MAIN10) ++ av_log(avctx, AV_LOG_VERBOSE, "Main10sp (Main10 profile and one_pic_only flag): enable\n"); + } + + static void dump_video_vp9_param(AVCodecContext *avctx, QSVEncContext *q, +@@ -1211,6 +1219,18 @@ static int init_video_param(AVCodecConte + q->extparam_internal[q->nb_extparam_internal++] = (mfxExtBuffer *)&q->exthevctiles; + } + ++ if (avctx->codec_id == AV_CODEC_ID_HEVC && q->main10sp) { ++ if (QSV_RUNTIME_VERSION_ATLEAST(q->ver, 2, 0)) { ++ q->param.mfx.CodecProfile = MFX_PROFILE_HEVC_MAIN10; ++ q->exthevcparam.Header.BufferId = MFX_EXTBUFF_HEVC_PARAM; ++ q->exthevcparam.Header.BufferSz = sizeof(q->exthevcparam); ++ q->exthevcparam.GeneralConstraintFlags = MFX_HEVC_CONSTR_REXT_ONE_PICTURE_ONLY; ++ q->extparam_internal[q->nb_extparam_internal++] = (mfxExtBuffer *)&q->exthevcparam; ++ } else ++ av_log(avctx, AV_LOG_WARNING, ++ "This version of runtime doesn't support 10bit single still picture\n"); ++ } ++ + q->extvsi.VideoFullRange = (avctx->color_range == AVCOL_RANGE_JPEG); + q->extvsi.ColourDescriptionPresent = 0; + +@@ -1463,12 +1483,17 @@ static int qsv_retrieve_enc_params(AVCod + }; + #endif + +- mfxExtBuffer *ext_buffers[6 + QSV_HAVE_HE]; ++ mfxExtHEVCParam hevc_param_buf = { ++ .Header.BufferId = MFX_EXTBUFF_HEVC_PARAM, ++ .Header.BufferSz = sizeof(hevc_param_buf), ++ }; + ++ mfxExtBuffer *ext_buffers[7 + QSV_HAVE_HE]; + int need_pps = avctx->codec_id != AV_CODEC_ID_MPEG2VIDEO; + int ret, ext_buf_num = 0, extradata_offset = 0; + + q->co2_idx = q->co3_idx = q->exthevctiles_idx = q->exthypermodeparam_idx = -1; ++ q->exthevcparam_idx = -1; + ext_buffers[ext_buf_num++] = (mfxExtBuffer*)&extradata; + ext_buffers[ext_buf_num++] = (mfxExtBuffer*)&co; + +@@ -1496,6 +1521,10 @@ static int qsv_retrieve_enc_params(AVCod + ext_buffers[ext_buf_num++] = (mfxExtBuffer*)&hyper_mode_param_buf; + } + #endif ++ if (avctx->codec_id == AV_CODEC_ID_HEVC && QSV_RUNTIME_VERSION_ATLEAST(q->ver, 2, 0)) { ++ q->exthevcparam_idx = ext_buf_num; ++ ext_buffers[ext_buf_num++] = (mfxExtBuffer*)&hevc_param_buf; ++ } + + q->param.ExtParam = ext_buffers; + q->param.NumExtParam = ext_buf_num; +Index: FFmpeg/libavcodec/qsvenc.h +=================================================================== +--- libavcodec/qsvenc.h ++++ libavcodec/qsvenc.h +@@ -177,6 +177,7 @@ typedef struct QSVEncContext { + mfxExtMultiFrameControl extmfc; + #endif + mfxExtHEVCTiles exthevctiles; ++ mfxExtHEVCParam exthevcparam; + mfxExtVP9Param extvp9param; + #if QSV_HAVE_EXT_AV1_PARAM + mfxExtAV1TileParam extav1tileparam; +@@ -193,7 +194,7 @@ typedef struct QSVEncContext { + + mfxExtVideoSignalInfo extvsi; + +- mfxExtBuffer *extparam_internal[5 + (QSV_HAVE_MF * 2) + (QSV_HAVE_EXT_AV1_PARAM * 2) + QSV_HAVE_HE]; ++ mfxExtBuffer *extparam_internal[6 + (QSV_HAVE_MF * 2) + (QSV_HAVE_EXT_AV1_PARAM * 2) + QSV_HAVE_HE]; + int nb_extparam_internal; + + mfxExtBuffer **extparam_str; +@@ -321,6 +322,9 @@ typedef struct QSVEncContext { + int dual_gfx; + + AVDictionary *qsv_params; ++ int exthevcparam_idx; ++ int main10sp; ++ + } QSVEncContext; + + int ff_qsv_enc_init(AVCodecContext *avctx, QSVEncContext *q); +Index: FFmpeg/libavcodec/qsvenc_hevc.c +=================================================================== +--- libavcodec/qsvenc_hevc.c ++++ libavcodec/qsvenc_hevc.c +@@ -363,6 +363,9 @@ static const AVOption options[] = { + { "int_ref_qp_delta", "QP difference for the refresh MBs", OFFSET(qsv.int_ref_qp_delta), AV_OPT_TYPE_INT, { .i64 = INT16_MIN }, INT16_MIN, INT16_MAX, VE }, + { "int_ref_cycle_dist", "Distance between the beginnings of the intra-refresh cycles in frames", OFFSET(qsv.int_ref_cycle_dist), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT16_MAX, VE }, + ++#if QSV_ONEVPL ++ { "main10sp", "This profile allow to encode 10 bit single still picture", OFFSET(qsv.main10sp), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE}, ++#endif + { NULL }, + }; + diff --git a/cross/ffmpeg7/patches/1021-jellyfin-0021-add-fixes-for-qsv-vpp-filters.patch b/cross/ffmpeg7/patches/1021-jellyfin-0021-add-fixes-for-qsv-vpp-filters.patch new file mode 100644 index 00000000000..c857c7e500e --- /dev/null +++ b/cross/ffmpeg7/patches/1021-jellyfin-0021-add-fixes-for-qsv-vpp-filters.patch @@ -0,0 +1,342 @@ +Index: FFmpeg/libavfilter/qsvvpp.c +=================================================================== +--- libavfilter/qsvvpp.c ++++ libavfilter/qsvvpp.c +@@ -167,7 +167,7 @@ int ff_qsvvpp_print_warning(void *log_ct + const char *desc; + int ret; + ret = qsv_map_error(err, &desc); +- av_log(log_ctx, AV_LOG_WARNING, "%s: %s (%d)\n", warning_string, desc, err); ++ av_log(log_ctx, AV_LOG_VERBOSE, "%s: %s (%d)\n", warning_string, desc, err); + return ret; + } + +@@ -458,8 +458,12 @@ static QSVFrame *submit_frame(QSVVPPCont + !(qsv_frame->frame->flags & AV_FRAME_FLAG_INTERLACED) ? MFX_PICSTRUCT_PROGRESSIVE : + ((qsv_frame->frame->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) ? MFX_PICSTRUCT_FIELD_TFF : + MFX_PICSTRUCT_FIELD_BFF); +- if (qsv_frame->frame->repeat_pict == 1) ++ if (qsv_frame->frame->repeat_pict == 1) { + qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED; ++ qsv_frame->surface.Info.PicStruct |= ++ (qsv_frame->frame->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) ? MFX_PICSTRUCT_FIELD_TFF : ++ MFX_PICSTRUCT_FIELD_BFF; ++ } + else if (qsv_frame->frame->repeat_pict == 2) + qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING; + else if (qsv_frame->frame->repeat_pict == 4) +@@ -986,6 +990,23 @@ int ff_qsvvpp_filter_frame(QSVVPPContext + return AVERROR(ENOMEM); + } + ++ /* Copy metadata before initializing vpp session, ++ * which contains callback to drop the HDR metadata */ ++ if (propref) { ++ int flags = out_frame->frame->flags; ++ int64_t duration = out_frame->frame->duration; ++ ++ ret1 = av_frame_copy_props(out_frame->frame, propref); ++ if (ret1 < 0) { ++ av_frame_free(&out_frame->frame); ++ av_log(ctx, AV_LOG_ERROR, "Failed to copy metadata fields from src to dst.\n"); ++ return ret1; ++ } ++ ++ out_frame->frame->flags = flags; ++ out_frame->frame->duration = duration; ++ } ++ + ret = qsvvpp_init_vpp_session(ctx, s, in_frame, out_frame); + if (ret) + return ret; +@@ -1004,15 +1025,6 @@ int ff_qsvvpp_filter_frame(QSVVPPContext + break; + } + +- if (propref) { +- ret1 = av_frame_copy_props(out_frame->frame, propref); +- if (ret1 < 0) { +- av_frame_free(&out_frame->frame); +- av_log(ctx, AV_LOG_ERROR, "Failed to copy metadata fields from src to dst.\n"); +- return ret1; +- } +- } +- + out_frame->frame->pts = av_rescale_q(out_frame->surface.Data.TimeStamp, + default_tb, outlink->time_base); + +Index: FFmpeg/libavfilter/vf_overlay_qsv.c +=================================================================== +--- libavfilter/vf_overlay_qsv.c ++++ libavfilter/vf_overlay_qsv.c +@@ -228,43 +228,47 @@ static int config_overlay_input(AVFilter + + static int process_frame(FFFrameSync *fs) + { +- AVFilterContext *ctx = fs->parent; +- QSVVPPContext *qsv = fs->opaque; +- AVFrame *frame = NULL, *propref = NULL; +- int ret = 0, i; +- +- for (i = 0; i < ctx->nb_inputs; i++) { +- ret = ff_framesync_get_frame(fs, i, &frame, 0); +- if (ret == 0) { +- if (i == 0) +- propref = frame; +- ret = ff_qsvvpp_filter_frame(qsv, ctx->inputs[i], frame, propref); +- } +- if (ret < 0 && ret != AVERROR(EAGAIN)) +- break; +- } ++ AVFilterContext *ctx = fs->parent; ++ QSVVPPContext *qsv = fs->opaque; ++ AVFilterLink *in0 = ctx->inputs[0]; ++ AVFilterLink *in1 = ctx->inputs[1]; ++ AVFrame *main = NULL; ++ AVFrame *overlay = NULL; ++ int ret = 0; ++ ++ ret = ff_framesync_get_frame(fs, 0, &main, 0); ++ if (ret < 0) ++ return ret; ++ ret = ff_framesync_get_frame(fs, 1, &overlay, 0); ++ if (ret < 0) ++ return ret; ++ ++ if (!main) ++ return AVERROR_BUG; + +- return ret; ++ /* composite main frame */ ++ ret = ff_qsvvpp_filter_frame(qsv, in0, main, main); ++ if (ret < 0 && ret != AVERROR(EAGAIN)) ++ return ret; ++ ++ /* composite overlay frame */ ++ /* or overwrite main frame again if the overlay frame isn't ready yet */ ++ return ff_qsvvpp_filter_frame(qsv, overlay ? in1 : in0, overlay ? overlay : main, main); + } + + static int init_framesync(AVFilterContext *ctx) + { +- QSVOverlayContext *s = ctx->priv; +- int ret, i; ++ QSVOverlayContext *s = ctx->priv; ++ AVFilterLink *outlink = ctx->outputs[0]; ++ int ret; + +- s->fs.on_event = process_frame; +- s->fs.opaque = s; +- ret = ff_framesync_init(&s->fs, ctx, ctx->nb_inputs); ++ ret = ff_framesync_init_dualinput(&s->fs, ctx); + if (ret < 0) + return ret; + +- for (i = 0; i < ctx->nb_inputs; i++) { +- FFFrameSyncIn *in = &s->fs.in[i]; +- in->before = EXT_STOP; +- in->after = EXT_INFINITY; +- in->sync = i ? 1 : 2; +- in->time_base = ctx->inputs[i]->time_base; +- } ++ s->fs.time_base = outlink->time_base; ++ s->fs.on_event = process_frame; ++ s->fs.opaque = s; + + return ff_framesync_configure(&s->fs); + } +@@ -285,12 +289,6 @@ static int config_output(AVFilterLink *o + return AVERROR(EINVAL); + } else if (in0->format == AV_PIX_FMT_QSV) { + AVHWFramesContext *hw_frame0 = (AVHWFramesContext *)in0->hw_frames_ctx->data; +- AVHWFramesContext *hw_frame1 = (AVHWFramesContext *)in1->hw_frames_ctx->data; +- +- if (hw_frame0->device_ctx != hw_frame1->device_ctx) { +- av_log(ctx, AV_LOG_ERROR, "Inputs with different underlying QSV devices are forbidden.\n"); +- return AVERROR(EINVAL); +- } + vpp->qsv_param.out_sw_format = hw_frame0->sw_format; + } + +@@ -372,6 +370,7 @@ static int overlay_qsv_query_formats(AVF + static const enum AVPixelFormat main_in_fmts[] = { + AV_PIX_FMT_YUV420P, + AV_PIX_FMT_NV12, ++ AV_PIX_FMT_P010, + AV_PIX_FMT_YUYV422, + AV_PIX_FMT_RGB32, + AV_PIX_FMT_QSV, +@@ -379,6 +378,7 @@ static int overlay_qsv_query_formats(AVF + }; + static const enum AVPixelFormat out_pix_fmts[] = { + AV_PIX_FMT_NV12, ++ AV_PIX_FMT_P010, + AV_PIX_FMT_QSV, + AV_PIX_FMT_NONE + }; +Index: FFmpeg/libavfilter/vf_vpp_qsv.c +=================================================================== +--- libavfilter/vf_vpp_qsv.c ++++ libavfilter/vf_vpp_qsv.c +@@ -388,6 +388,30 @@ static mfxStatus get_mfx_version(const A + return MFXQueryVersion(device_hwctx->session, mfx_version); + } + ++static mfxStatus get_mfx_platform(const AVFilterContext *ctx, mfxPlatform *mfx_platform) ++{ ++ const AVFilterLink *inlink = ctx->inputs[0]; ++ AVBufferRef *device_ref; ++ AVHWDeviceContext *device_ctx; ++ AVQSVDeviceContext *device_hwctx; ++ ++ if (inlink->hw_frames_ctx) { ++ AVHWFramesContext *frames_ctx = (AVHWFramesContext *)inlink->hw_frames_ctx->data; ++ device_ref = frames_ctx->device_ref; ++ } else if (ctx->hw_device_ctx) { ++ device_ref = ctx->hw_device_ctx; ++ } else { ++ mfx_platform->CodeName = 0; ++ mfx_platform->DeviceId = 0; ++ return MFX_ERR_NONE; ++ } ++ ++ device_ctx = (AVHWDeviceContext *)device_ref->data; ++ device_hwctx = device_ctx->hwctx; ++ ++ return MFXVideoCORE_QueryPlatform(device_hwctx->session, mfx_platform); ++} ++ + static int vpp_set_frame_ext_params(AVFilterContext *ctx, const AVFrame *in, AVFrame *out, QSVVPPFrameParam *fp) + { + #if QSV_ONEVPL +@@ -461,14 +485,19 @@ static int vpp_set_frame_ext_params(AVFi + + memset(&clli_conf, 0, sizeof(mfxExtContentLightLevelInfo)); + sd = av_frame_get_side_data(in, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); +- if (vpp->tonemap && sd) { +- AVContentLightMetadata *clm = (AVContentLightMetadata *)sd->data; ++ if (vpp->tonemap) { ++ AVContentLightMetadata *clm = sd ? (AVContentLightMetadata *)sd->data : NULL; + +- clli_conf.Header.BufferId = MFX_EXTBUFF_CONTENT_LIGHT_LEVEL_INFO; +- clli_conf.Header.BufferSz = sizeof(mfxExtContentLightLevelInfo); +- clli_conf.MaxContentLightLevel = FFMIN(clm->MaxCLL, 65535); +- clli_conf.MaxPicAverageLightLevel = FFMIN(clm->MaxFALL, 65535); +- tm = 1; ++ // Dumped from VP HAL, VPL requires at least one type of the metadata to trigger tone-mapping ++ #define HAL_HDR_DEFAULT_MAXCLL 4000 ++ #define HAL_HDR_DEFAULT_MAXFALL 400 ++ if (clm || !tm) { ++ clli_conf.Header.BufferId = MFX_EXTBUFF_CONTENT_LIGHT_LEVEL_INFO; ++ clli_conf.Header.BufferSz = sizeof(mfxExtContentLightLevelInfo); ++ clli_conf.MaxContentLightLevel = FFMIN(clm ? clm->MaxCLL : HAL_HDR_DEFAULT_MAXCLL, 65535); ++ clli_conf.MaxPicAverageLightLevel = FFMIN(clm ? clm->MaxFALL : HAL_HDR_DEFAULT_MAXFALL, 65535); ++ tm = 1; ++ } + } + + if (tm) { +@@ -494,9 +523,9 @@ static int vpp_set_frame_ext_params(AVFi + outvsi_conf.Header.BufferId = MFX_EXTBUFF_VIDEO_SIGNAL_INFO_OUT; + outvsi_conf.Header.BufferSz = sizeof(mfxExtVideoSignalInfo); + outvsi_conf.VideoFullRange = (out->color_range == AVCOL_RANGE_JPEG); +- outvsi_conf.ColourPrimaries = (out->color_primaries == AVCOL_PRI_UNSPECIFIED) ? AVCOL_PRI_BT709 : out->color_primaries; +- outvsi_conf.TransferCharacteristics = (out->color_trc == AVCOL_TRC_UNSPECIFIED) ? AVCOL_TRC_BT709 : out->color_trc; +- outvsi_conf.MatrixCoefficients = (out->colorspace == AVCOL_SPC_UNSPECIFIED) ? AVCOL_SPC_BT709 : out->colorspace; ++ outvsi_conf.ColourPrimaries = (out->color_primaries == AVCOL_PRI_UNSPECIFIED) ? invsi_conf.ColourPrimaries : out->color_primaries; ++ outvsi_conf.TransferCharacteristics = (out->color_trc == AVCOL_TRC_UNSPECIFIED) ? invsi_conf.TransferCharacteristics : out->color_trc; ++ outvsi_conf.MatrixCoefficients = (out->colorspace == AVCOL_SPC_UNSPECIFIED) ? invsi_conf.MatrixCoefficients : out->colorspace; + outvsi_conf.ColourDescriptionPresent = 1; + + if (memcmp(&vpp->invsi_conf, &invsi_conf, sizeof(mfxExtVideoSignalInfo)) || +@@ -686,12 +715,24 @@ static int config_output(AVFilterLink *o + + if (inlink->w != outlink->w || inlink->h != outlink->h || in_format != vpp->out_format) { + if (QSV_RUNTIME_VERSION_ATLEAST(mfx_version, 1, 19)) { ++ mfxPlatform mfx_platform; ++ int compute = 0; + int mode = vpp->scale_mode; ++ int vpl = QSV_RUNTIME_VERSION_ATLEAST(mfx_version, 1, 255); + +-#if QSV_ONEVPL +- if (mode > 2) +- mode = MFX_SCALING_MODE_VENDOR + mode - 2; +-#endif ++ /* Compute mode is only available on DG2+ platforms */ ++ if (vpl && get_mfx_platform(ctx, &mfx_platform) == MFX_ERR_NONE) { ++ int code_name = mfx_platform.CodeName; ++ compute = code_name >= 45 && ++ code_name <= 54 && ++ code_name != 55 && ++ code_name != 50; ++ } ++ ++ if (mode == -1) ++ mode = (vpl && compute) ? 1001 : MFX_SCALING_MODE_DEFAULT; ++ else if (mode > 2) ++ mode = vpl ? (1000 + mode - 2) : MFX_SCALING_MODE_DEFAULT; + + INIT_MFX_EXTBUF(scale_conf, MFX_EXTBUFF_VPP_SCALING); + SET_MFX_PARAM_FIELD(scale_conf, ScalingMode, mode); +@@ -880,19 +921,13 @@ static const AVOption vpp_options[] = { + { "height", "Output video height(0=input video height, -1=keep input video aspect)", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS }, + { "format", "Output pixel format", OFFSET(output_format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, + { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = 4 }, 0, INT_MAX, .flags = FLAGS }, +-#if QSV_ONEVPL +- { "scale_mode", "scaling & format conversion mode (mode compute(3), vd(4) and ve(5) are only available on some platforms)", OFFSET(scale_mode), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 5, .flags = FLAGS, .unit = "scale mode" }, +-#else +- { "scale_mode", "scaling & format conversion mode", OFFSET(scale_mode), AV_OPT_TYPE_INT, { .i64 = MFX_SCALING_MODE_DEFAULT }, MFX_SCALING_MODE_DEFAULT, MFX_SCALING_MODE_QUALITY, .flags = FLAGS, .unit = "scale mode" }, +-#endif ++ { "scale_mode", "scaling & format conversion mode (mode compute(3), vd(4) and ve(5) are only available on some platforms)", OFFSET(scale_mode), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 5, .flags = FLAGS, .unit = "scale mode" }, + { "auto", "auto mode", 0, AV_OPT_TYPE_CONST, { .i64 = MFX_SCALING_MODE_DEFAULT}, INT_MIN, INT_MAX, FLAGS, .unit = "scale mode"}, + { "low_power", "low power mode", 0, AV_OPT_TYPE_CONST, { .i64 = MFX_SCALING_MODE_LOWPOWER}, INT_MIN, INT_MAX, FLAGS, .unit = "scale mode"}, + { "hq", "high quality mode", 0, AV_OPT_TYPE_CONST, { .i64 = MFX_SCALING_MODE_QUALITY}, INT_MIN, INT_MAX, FLAGS, .unit = "scale mode"}, +-#if QSV_ONEVPL + { "compute", "compute", 0, AV_OPT_TYPE_CONST, { .i64 = 3}, INT_MIN, INT_MAX, FLAGS, .unit = "scale mode"}, + { "vd", "vd", 0, AV_OPT_TYPE_CONST, { .i64 = 4}, INT_MIN, INT_MAX, FLAGS, .unit = "scale mode"}, + { "ve", "ve", 0, AV_OPT_TYPE_CONST, { .i64 = 5}, INT_MIN, INT_MAX, FLAGS, .unit = "scale mode"}, +-#endif + + { "rate", "Generate output at frame rate or field rate, available only for deinterlace mode", + OFFSET(field_rate), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS, .unit = "rate" }, +@@ -923,8 +958,9 @@ static const AVOption vpp_options[] = { + { "out_color_transfer", "Output color transfer characteristics", + OFFSET(color_transfer_str), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS }, + +- {"tonemap", "Perform tonemapping (0=disable tonemapping, 1=perform tonemapping if the input has HDR metadata)", OFFSET(tonemap), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 1, .flags = FLAGS}, ++ { "tonemap", "Perform tonemapping (0=disable tonemapping, 1=perform tonemapping if the input has HDR metadata)", OFFSET(tonemap), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 1, .flags = FLAGS }, + ++ { "passthrough", "Apply pass through mode if possible.", OFFSET(has_passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, .flags = FLAGS }, + { NULL } + }; + +@@ -978,19 +1014,14 @@ static const AVOption qsvscale_options[] + { "h", "Output video height(0=input video height, -1=keep input video aspect)", OFFSET(oh), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS }, + { "format", "Output pixel format", OFFSET(output_format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, + +-#if QSV_ONEVPL +- { "mode", "scaling & format conversion mode (mode compute(3), vd(4) and ve(5) are only available on some platforms)", OFFSET(scale_mode), AV_OPT_TYPE_INT, { .i64 = 0}, 0, 5, FLAGS, .unit = "mode"}, +-#else +- { "mode", "scaling & format conversion mode", OFFSET(scale_mode), AV_OPT_TYPE_INT, { .i64 = MFX_SCALING_MODE_DEFAULT}, MFX_SCALING_MODE_DEFAULT, MFX_SCALING_MODE_QUALITY, FLAGS, .unit = "mode"}, +-#endif ++ { "mode", "scaling & format conversion mode (mode compute(3), vd(4) and ve(5) are only available on some platforms)", OFFSET(scale_mode), AV_OPT_TYPE_INT, { .i64 = -1}, -1, 5, FLAGS, .unit = "mode"}, + { "low_power", "low power mode", 0, AV_OPT_TYPE_CONST, { .i64 = MFX_SCALING_MODE_LOWPOWER}, INT_MIN, INT_MAX, FLAGS, .unit = "mode"}, + { "hq", "high quality mode", 0, AV_OPT_TYPE_CONST, { .i64 = MFX_SCALING_MODE_QUALITY}, INT_MIN, INT_MAX, FLAGS, .unit = "mode"}, +-#if QSV_ONEVPL + { "compute", "compute", 0, AV_OPT_TYPE_CONST, { .i64 = 3}, INT_MIN, INT_MAX, FLAGS, .unit = "mode"}, + { "vd", "vd", 0, AV_OPT_TYPE_CONST, { .i64 = 4}, INT_MIN, INT_MAX, FLAGS, .unit = "mode"}, + { "ve", "ve", 0, AV_OPT_TYPE_CONST, { .i64 = 5}, INT_MIN, INT_MAX, FLAGS, .unit = "mode"}, +-#endif + ++ { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = 4 }, 0, INT_MAX, .flags = FLAGS }, + { NULL }, + }; + +@@ -1015,6 +1046,7 @@ static const AVOption qsvdeint_options[] + { "bob", "bob algorithm", 0, AV_OPT_TYPE_CONST, {.i64 = MFX_DEINTERLACING_BOB}, MFX_DEINTERLACING_BOB, MFX_DEINTERLACING_ADVANCED, FLAGS, .unit = "mode"}, + { "advanced", "Motion adaptive algorithm", 0, AV_OPT_TYPE_CONST, {.i64 = MFX_DEINTERLACING_ADVANCED}, MFX_DEINTERLACING_BOB, MFX_DEINTERLACING_ADVANCED, FLAGS, .unit = "mode"}, + ++ { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = 4 }, 0, INT_MAX, .flags = FLAGS }, + { NULL }, + }; + diff --git a/cross/ffmpeg7/patches/1022-jellyfin-0022-add-external-frame-support-for-qsv-enc.patch b/cross/ffmpeg7/patches/1022-jellyfin-0022-add-external-frame-support-for-qsv-enc.patch new file mode 100644 index 00000000000..df65084addf --- /dev/null +++ b/cross/ffmpeg7/patches/1022-jellyfin-0022-add-external-frame-support-for-qsv-enc.patch @@ -0,0 +1,184 @@ +Index: FFmpeg/libavcodec/qsv_internal.h +=================================================================== +--- libavcodec/qsv_internal.h ++++ libavcodec/qsv_internal.h +@@ -100,6 +100,7 @@ typedef struct QSVFrame { + + int queued; + int used; ++ int external_frame; + + struct QSVFrame *next; + } QSVFrame; +Index: FFmpeg/libavcodec/qsvenc.c +=================================================================== +--- libavcodec/qsvenc.c ++++ libavcodec/qsvenc.c +@@ -1900,6 +1900,10 @@ static void clear_unused_frames(QSVEncCo + memset(&cur->enc_ctrl, 0, sizeof(cur->enc_ctrl)); + cur->enc_ctrl.Payload = cur->payloads; + cur->enc_ctrl.ExtParam = cur->extparam; ++ if (cur->external_frame) { ++ av_freep(&cur->surface.Data.MemId); ++ cur->external_frame = 0; ++ } + if (cur->frame->format == AV_PIX_FMT_QSV) { + av_frame_unref(cur->frame); + } +@@ -2071,6 +2075,16 @@ static int submit_frame(QSVEncContext *q + return ret; + + if (frame->format == AV_PIX_FMT_QSV) { ++ AVHWFramesContext *frames_ctx = NULL; ++ AVQSVFramesContext *frames_hwctx = NULL; ++ int is_fixed_pool = 0; ++ ++ if (q->avctx->hw_frames_ctx) { ++ frames_ctx = (AVHWFramesContext *)q->avctx->hw_frames_ctx->data; ++ frames_hwctx = frames_ctx->hwctx; ++ is_fixed_pool = frames_hwctx->nb_surfaces > 0; ++ } ++ + ret = av_frame_ref(qf->frame, frame); + if (ret < 0) + return ret; +@@ -2079,10 +2093,19 @@ static int submit_frame(QSVEncContext *q + + if (q->frames_ctx.mids) { + ret = ff_qsv_find_surface_idx(&q->frames_ctx, qf); +- if (ret < 0) ++ if (ret < 0 && !is_fixed_pool) + return ret; +- +- qf->surface.Data.MemId = &q->frames_ctx.mids[ret]; ++ if (ret >= 0) ++ qf->surface.Data.MemId = &q->frames_ctx.mids[ret]; ++ } ++ if (is_fixed_pool && (!q->frames_ctx.mids || ret < 0)) { ++ QSVMid *mid = NULL; ++ mid = (QSVMid *)av_mallocz(sizeof(*mid)); ++ if (!mid) ++ return AVERROR(ENOMEM); ++ mid->handle_pair = (mfxHDLPair *)qf->surface.Data.MemId; ++ qf->surface.Data.MemId = mid; ++ qf->external_frame = 1; + } + } else { + /* make a copy if the input is not padded as libmfx requires */ +Index: FFmpeg/libavutil/hwcontext_qsv.c +=================================================================== +--- libavutil/hwcontext_qsv.c ++++ libavutil/hwcontext_qsv.c +@@ -2142,11 +2142,25 @@ static int qsv_frames_derive_to(AVHWFram + } + } + ++#if CONFIG_VAAPI ++static void qsv_fixed_pool_unmap_from_vaapi(AVHWFramesContext *dst_fc, ++ HWMapDescriptor *hwmap) ++{ ++ mfxFrameSurface1 *new_sur = (mfxFrameSurface1 *)hwmap->priv; ++ mfxHDLPair *hdlpair = (mfxHDLPair *)new_sur->Data.MemId; ++ av_freep(&hdlpair->first); ++ av_freep(&new_sur->Data.MemId); ++ av_freep(&new_sur); ++} ++#endif ++ + static int qsv_fixed_pool_map_to(AVHWFramesContext *dst_ctx, + AVFrame *dst, const AVFrame *src, int flags) + { + AVQSVFramesContext *hwctx = dst_ctx->hwctx; + int i, err, index = -1; ++ mfxFrameSurface1 *new_sur = NULL; ++ mfxHDLPair *new_hdlpair = NULL; + + for (i = 0; i < hwctx->nb_surfaces && index < 0; i++) { + switch(src->format) { +@@ -2185,21 +2199,77 @@ static int qsv_fixed_pool_map_to(AVHWFra + } + } + if (index < 0) { +- av_log(dst_ctx, AV_LOG_ERROR, "Trying to map from a surface which " +- "is not in the mapped frames context.\n"); +- return AVERROR(EINVAL); +- } ++ switch (src->format) { ++#if CONFIG_VAAPI ++ case AV_PIX_FMT_VAAPI: ++ { ++ new_sur = (mfxFrameSurface1 *)av_mallocz(sizeof(*new_sur)); ++ if (!new_sur) { ++ err = AVERROR(ENOMEM); ++ goto qsv_map_to_err; ++ } ++ err = qsv_init_surface(dst_ctx, new_sur); ++ if (err < 0) ++ goto qsv_map_to_err; ++ ++ new_hdlpair = (mfxHDLPair *)av_mallocz(sizeof(*new_hdlpair)); ++ if (!new_hdlpair) { ++ err = AVERROR(ENOMEM); ++ goto qsv_map_to_err; ++ } ++ new_hdlpair->first = (VASurfaceID *)av_mallocz(sizeof(VASurfaceID)); ++ if (!new_hdlpair->first) { ++ err = AVERROR(ENOMEM); ++ goto qsv_map_to_err; ++ } ++ *(VASurfaceID*)(new_hdlpair->first) = (VASurfaceID)(uintptr_t)src->data[3]; ++ new_sur->Data.MemId = new_hdlpair; + +- err = ff_hwframe_map_create(dst->hw_frames_ctx, +- dst, src, NULL, NULL); +- if (err) +- return err; ++ err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src, ++ &qsv_fixed_pool_unmap_from_vaapi, ++ (void*)new_sur); ++ if (err) ++ goto qsv_map_to_err; ++ ++ av_log(dst_ctx, AV_LOG_DEBUG, "Trying to map from a surface which " ++ "is not in the mapped frames context, so create a new surface\n"); ++ } ++ break; ++#endif ++#if CONFIG_DXVA2 ++ case AV_PIX_FMT_DXVA2_VLD: ++ { ++ av_log(dst_ctx, AV_LOG_ERROR, "Trying to map from a surface which " ++ "is not in the mapped frames context.\n"); ++ return AVERROR(EINVAL); ++ } ++ break; ++#endif ++ default: ++ return AVERROR(ENOSYS); ++ } ++ } else { ++ err = ff_hwframe_map_create(dst->hw_frames_ctx, ++ dst, src, NULL, NULL); ++ if (err) ++ goto qsv_map_to_err; ++ } + + dst->width = src->width; + dst->height = src->height; +- dst->data[3] = (uint8_t*)&hwctx->surfaces[index]; ++ dst->data[3] = (uint8_t*)((index == -1) ? new_sur : &hwctx->surfaces[index]); + + return 0; ++ ++qsv_map_to_err: ++ if (new_sur) ++ av_freep(&new_sur); ++ if (new_hdlpair) { ++ if (new_hdlpair->first) ++ av_freep(&new_hdlpair->first); ++ av_freep(&new_hdlpair); ++ } ++ return err; + } + + static void qsv_dynamic_pool_unmap(AVHWFramesContext *ctx, HWMapDescriptor *hwmap) diff --git a/cross/ffmpeg7/patches/1023-jellyfin-0023-suppress-overlay-warning-logs.patch b/cross/ffmpeg7/patches/1023-jellyfin-0023-suppress-overlay-warning-logs.patch new file mode 100644 index 00000000000..cfe5c253fc6 --- /dev/null +++ b/cross/ffmpeg7/patches/1023-jellyfin-0023-suppress-overlay-warning-logs.patch @@ -0,0 +1,13 @@ +Index: FFmpeg/libavfilter/buffersrc.c +=================================================================== +--- libavfilter/buffersrc.c ++++ libavfilter/buffersrc.c +@@ -75,7 +75,7 @@ typedef struct BufferSourceContext { + c->prev_delta = c->prev_w != width || c->prev_h != height || c->prev_pix_fmt != format ||\ + c->prev_color_space != csp || c->prev_color_range != range;\ + if (c->link_delta) {\ +- int loglevel = c->prev_delta ? AV_LOG_WARNING : AV_LOG_DEBUG;\ ++ int loglevel = c->prev_delta ? AV_LOG_VERBOSE : AV_LOG_DEBUG;\ + av_log(s, loglevel, "Changing video frame properties on the fly is not supported by all filters.\n");\ + av_log(s, loglevel, "filter context - w: %d h: %d fmt: %d csp: %s range: %s, incoming frame - w: %d h: %d fmt: %d csp: %s range: %s pts_time: %s\n",\ + c->w, c->h, c->pix_fmt, av_color_space_name(c->color_space), av_color_range_name(c->color_range),\ diff --git a/cross/ffmpeg7/patches/1024-jellyfin-0024-add-fixes-for-hevc-hdr-decoding-in-bsf.patch b/cross/ffmpeg7/patches/1024-jellyfin-0024-add-fixes-for-hevc-hdr-decoding-in-bsf.patch new file mode 100644 index 00000000000..ec36e64b702 --- /dev/null +++ b/cross/ffmpeg7/patches/1024-jellyfin-0024-add-fixes-for-hevc-hdr-decoding-in-bsf.patch @@ -0,0 +1,41 @@ +Index: FFmpeg/libavcodec/bsf/hevc_mp4toannexb.c +=================================================================== +--- libavcodec/bsf/hevc_mp4toannexb.c ++++ libavcodec/bsf/hevc_mp4toannexb.c +@@ -126,6 +126,7 @@ static int hevc_mp4toannexb_filter(AVBSF + + int got_irap = 0; + int i, ret = 0; ++ int has_sps = 0, has_pps = 0; + + ret = ff_bsf_get_packet(ctx, &in); + if (ret < 0) +@@ -157,11 +158,14 @@ static int hevc_mp4toannexb_filter(AVBSF + } + + nalu_type = (bytestream2_peek_byte(&gb) >> 1) & 0x3f; ++ has_sps = (has_sps || nalu_type == HEVC_NAL_SPS); ++ has_pps = (has_pps || nalu_type == HEVC_NAL_PPS); + + /* prepend extradata to IRAP frames */ + is_irap = nalu_type >= HEVC_NAL_BLA_W_LP && + nalu_type <= HEVC_NAL_RSV_IRAP_VCL23; +- add_extradata = is_irap && !got_irap; ++ /* ignore the extradata if IRAP frame has sps and pps */ ++ add_extradata = is_irap && !got_irap && !(has_sps && has_pps); + extra_size = add_extradata * ctx->par_out->extradata_size; + got_irap |= is_irap; + +Index: FFmpeg/tests/fate/hevc.mak +=================================================================== +--- tests/fate/hevc.mak ++++ tests/fate/hevc.mak +@@ -222,7 +222,7 @@ FATE_HEVC-$(call ALLYES, HEVC_DEMUXER MO + fate-hevc-bsf-mp4toannexb: tests/data/hevc-mp4.mov + fate-hevc-bsf-mp4toannexb: CMD = md5 -i $(TARGET_PATH)/tests/data/hevc-mp4.mov -c:v copy -fflags +bitexact -f hevc + fate-hevc-bsf-mp4toannexb: CMP = oneline +-fate-hevc-bsf-mp4toannexb: REF = 73019329ed7f81c24f9af67c34c640c0 ++fate-hevc-bsf-mp4toannexb: REF = 7d05a79c7a6665ae22c0043a4d83a811 + + fate-hevc-skiploopfilter: CMD = framemd5 -skip_loop_filter nokey -i $(TARGET_SAMPLES)/hevc-conformance/SAO_D_Samsung_5.bit -sws_flags bitexact + FATE_HEVC-$(call FRAMEMD5, HEVC, HEVC, HEVC_PARSER) += fate-hevc-skiploopfilter diff --git a/cross/ffmpeg7/patches/1025-jellyfin-0025-add-sub2video-option-to-subtitles-filter.patch b/cross/ffmpeg7/patches/1025-jellyfin-0025-add-sub2video-option-to-subtitles-filter.patch new file mode 100644 index 00000000000..598960659f5 --- /dev/null +++ b/cross/ffmpeg7/patches/1025-jellyfin-0025-add-sub2video-option-to-subtitles-filter.patch @@ -0,0 +1,122 @@ +Index: FFmpeg/libavfilter/vf_subtitles.c +=================================================================== +--- libavfilter/vf_subtitles.c ++++ libavfilter/vf_subtitles.c +@@ -65,6 +65,9 @@ typedef struct AssContext { + int shaping; + FFDrawContext draw; + int wrap_unicode; ++ int sub2video; ++ int last_image; ++ int64_t max_pts, max_ts_ms; + } AssContext; + + #define OFFSET(x) offsetof(AssContext, x) +@@ -75,7 +78,12 @@ typedef struct AssContext { + {"f", "set the filename of file to read", OFFSET(filename), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS }, \ + {"original_size", "set the size of the original video (used to scale fonts)", OFFSET(original_w), AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0, FLAGS }, \ + {"fontsdir", "set the directory containing the fonts to read", OFFSET(fontsdir), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS }, \ +- {"alpha", "enable processing of alpha channel", OFFSET(alpha), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FLAGS }, \ ++ {"alpha", "enable processing of alpha channel", OFFSET(alpha), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FLAGS }, \ ++ {"sub2video", "enable textual subtitle to video mode", OFFSET(sub2video), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FLAGS }, \ ++ {"shaping", "set shaping engine", OFFSET(shaping), AV_OPT_TYPE_INT, {.i64 = ASS_SHAPING_COMPLEX }, -1, 1, FLAGS, .unit = "shaping_mode"}, \ ++ {"auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = -1}, INT_MIN, INT_MAX, FLAGS, .unit = "shaping_mode"}, \ ++ {"simple", "simple shaping", 0, AV_OPT_TYPE_CONST, {.i64 = ASS_SHAPING_SIMPLE}, INT_MIN, INT_MAX, FLAGS, .unit = "shaping_mode"}, \ ++ {"complex", "complex shaping", 0, AV_OPT_TYPE_CONST, {.i64 = ASS_SHAPING_COMPLEX}, INT_MIN, INT_MAX, FLAGS, .unit = "shaping_mode"}, \ + + /* libass supports a log level ranging from 0 to 7 */ + static const int ass_libavfilter_log_level_map[] = { +@@ -162,6 +170,8 @@ static int config_input(AVFilterLink *in + if (ass->shaping != -1) + ass_set_shaper(ass->renderer, ass->shaping); + ++ ass->max_pts = ass->max_ts_ms / (av_q2d(inlink->time_base) * 1000); ++ + return 0; + } + +@@ -192,18 +202,41 @@ static int filter_frame(AVFilterLink *in + AVFilterLink *outlink = ctx->outputs[0]; + AssContext *ass = ctx->priv; + int detect_change = 0; +- double time_ms = picref->pts * av_q2d(inlink->time_base) * 1000; ++ int64_t time_ms = picref->pts * av_q2d(inlink->time_base) * 1000; + ASS_Image *image = ass_render_frame(ass->renderer, ass->track, + time_ms, &detect_change); + ++ if (ass->sub2video) { ++ if (!image && !ass->last_image && picref->pts <= ass->max_pts && outlink->current_pts != AV_NOPTS_VALUE) { ++ av_log(ctx, AV_LOG_DEBUG, "sub2video skip pts:%"PRId64"\n", picref->pts); ++ av_frame_free(&picref); ++ return 0; ++ } ++ ass->last_image = image != NULL; ++ } ++ + if (detect_change) +- av_log(ctx, AV_LOG_DEBUG, "Change happened at time ms:%f\n", time_ms); ++ av_log(ctx, AV_LOG_DEBUG, "Change happened at time ms:%"PRId64"\n", time_ms); + + overlay_ass_image(ass, picref, image); + + return ff_filter_frame(outlink, picref); + } + ++static void get_max_timestamp(AVFilterContext *ctx) ++{ ++ AssContext *ass = ctx->priv; ++ int i; ++ ++ ass->max_ts_ms = 0; ++ if (ass->track) { ++ for (i = 0; i < ass->track->n_events; i++) { ++ ASS_Event *event = ass->track->events + i; ++ ass->max_ts_ms = FFMAX(event->Start + event->Duration, ass->max_ts_ms); ++ } ++ } ++} ++ + static const AVFilterPad ass_inputs[] = { + { + .name = "default", +@@ -218,10 +251,6 @@ static const AVFilterPad ass_inputs[] = + + static const AVOption ass_options[] = { + COMMON_OPTIONS +- {"shaping", "set shaping engine", OFFSET(shaping), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, FLAGS, .unit = "shaping_mode"}, +- {"auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = -1}, INT_MIN, INT_MAX, FLAGS, .unit = "shaping_mode"}, +- {"simple", "simple shaping", 0, AV_OPT_TYPE_CONST, {.i64 = ASS_SHAPING_SIMPLE}, INT_MIN, INT_MAX, FLAGS, .unit = "shaping_mode"}, +- {"complex", "complex shaping", 0, AV_OPT_TYPE_CONST, {.i64 = ASS_SHAPING_COMPLEX}, INT_MIN, INT_MAX, FLAGS, .unit = "shaping_mode"}, + {NULL}, + }; + +@@ -245,6 +274,9 @@ static av_cold int init_ass(AVFilterCont + ass->filename); + return AVERROR(EINVAL); + } ++ ++ get_max_timestamp(ctx); ++ + return 0; + } + +@@ -266,8 +298,8 @@ const AVFilter ff_vf_ass = { + static const AVOption subtitles_options[] = { + COMMON_OPTIONS + {"charenc", "set input character encoding", OFFSET(charenc), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS}, +- {"stream_index", "set stream index", OFFSET(stream_index), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS}, +- {"si", "set stream index", OFFSET(stream_index), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS}, ++ {"stream_index", "set stream index", OFFSET(stream_index), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS}, ++ {"si", "set stream index", OFFSET(stream_index), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS}, + {"force_style", "force subtitle style", OFFSET(force_style), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS}, + #if FF_ASS_FEATURE_WRAP_UNICODE + {"wrap_unicode", "break lines according to the Unicode Line Breaking Algorithm", OFFSET(wrap_unicode), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, FLAGS }, +@@ -496,6 +528,8 @@ static av_cold int init_subtitles(AVFilt + avsubtitle_free(&sub); + } + ++ get_max_timestamp(ctx); ++ + end: + av_dict_free(&codec_opts); + avcodec_free_context(&dec_ctx); diff --git a/cross/ffmpeg7/patches/1026-jellyfin-0026-add-alphasrc-source-video-filter.patch b/cross/ffmpeg7/patches/1026-jellyfin-0026-add-alphasrc-source-video-filter.patch new file mode 100644 index 00000000000..9008a0b7e2b --- /dev/null +++ b/cross/ffmpeg7/patches/1026-jellyfin-0026-add-alphasrc-source-video-filter.patch @@ -0,0 +1,193 @@ +Index: FFmpeg/libavfilter/Makefile +=================================================================== +--- libavfilter/Makefile ++++ libavfilter/Makefile +@@ -588,6 +588,7 @@ OBJS-$(CONFIG_XSTACK_QSV_FILTER) + + OBJS-$(CONFIG_ALLRGB_FILTER) += vsrc_testsrc.o + OBJS-$(CONFIG_ALLYUV_FILTER) += vsrc_testsrc.o ++OBJS-$(CONFIG_ALPHASRC_FILTER) += vsrc_alphasrc.o + OBJS-$(CONFIG_CELLAUTO_FILTER) += vsrc_cellauto.o + OBJS-$(CONFIG_COLOR_FILTER) += vsrc_testsrc.o + OBJS-$(CONFIG_COLORCHART_FILTER) += vsrc_testsrc.o +Index: FFmpeg/libavfilter/allfilters.c +=================================================================== +--- libavfilter/allfilters.c ++++ libavfilter/allfilters.c +@@ -552,6 +552,7 @@ extern const AVFilter ff_vf_xstack_qsv; + + extern const AVFilter ff_vsrc_allrgb; + extern const AVFilter ff_vsrc_allyuv; ++extern const AVFilter ff_vsrc_alphasrc; + extern const AVFilter ff_vsrc_cellauto; + extern const AVFilter ff_vsrc_color; + extern const AVFilter ff_vsrc_color_vulkan; +Index: FFmpeg/libavfilter/vsrc_alphasrc.c +=================================================================== +--- /dev/null ++++ libavfilter/vsrc_alphasrc.c +@@ -0,0 +1,164 @@ ++/* ++ * Copyright (c) 2021 NyanMisaka ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/** ++ * @file ++ * Provide a blank video input with alpha channel. ++ */ ++ ++#include "libavutil/avstring.h" ++#include "libavutil/imgutils.h" ++#include "libavutil/opt.h" ++#include "libavutil/parseutils.h" ++#include "filters.h" ++#include "avfilter.h" ++#include "internal.h" ++#include "formats.h" ++#include "video.h" ++ ++typedef struct AlphaSrc { ++ const AVClass *class; ++ AVRational time_base, frame_rate; ++ int64_t pts; ++ int64_t duration, start; ++ int out_w, out_h; ++ int rgb, planar; ++} AlphaSrc; ++ ++static av_cold int alphasrc_init(AVFilterContext *ctx) ++{ ++ AlphaSrc *s = ctx->priv; ++ ++ s->time_base = av_inv_q(s->frame_rate); ++ s->pts = 0; ++ ++ if (s->start > 0) ++ s->pts += av_rescale_q(s->start, AV_TIME_BASE_Q, s->time_base); ++ ++ return 0; ++} ++ ++static int alphasrc_query_formats(AVFilterContext *ctx) ++{ ++ AVFilterLink *outlink = ctx->outputs[0]; ++ AVFilterFormats *formats = ff_formats_pixdesc_filter(AV_PIX_FMT_FLAG_ALPHA, 0); ++ int ret; ++ ++ ret = ff_formats_ref(formats, &outlink->incfg.formats); ++ if (ret < 0) ++ return ret; ++ ++ return 0; ++} ++ ++static int alphasrc_config_output(AVFilterLink *outlink) ++{ ++ AVFilterContext *ctx = outlink->src; ++ AlphaSrc *s = ctx->priv; ++ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(outlink->format); ++ ++ s->rgb = desc->flags & AV_PIX_FMT_FLAG_RGB; ++ s->planar = desc->flags & AV_PIX_FMT_FLAG_PLANAR; ++ ++ if (!s->rgb && !s->planar) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported output format.\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ if (s->out_w <= 0 || s->out_h <= 0) { ++ av_log(ctx, AV_LOG_ERROR, "Invalid output video size.\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ outlink->w = s->out_w; ++ outlink->h = s->out_h; ++ outlink->frame_rate = s->frame_rate; ++ outlink->time_base = s->time_base; ++ outlink->sample_aspect_ratio = (AVRational){1, 1}; ++ ++ return 0; ++} ++ ++static int alphasrc_request_frame(AVFilterLink *outlink) ++{ ++ AVFilterContext *ctx = outlink->src; ++ AlphaSrc *s = ctx->priv; ++ AVFrame *out; ++ int i; ++ ++ if (s->duration > 0 && ++ av_rescale_q(s->pts, s->time_base, AV_TIME_BASE_Q) >= s->duration) { ++ ff_outlink_set_status(outlink, AVERROR_EOF, s->pts); ++ return 0; ++ } ++ ++ out = ff_get_video_buffer(outlink, outlink->w, outlink->h); ++ if (!out) ++ return AVERROR(ENOMEM); ++ ++ for (i = 0; i < AV_NUM_DATA_POINTERS; i++) { ++ if (out->buf[i]) { ++ if (s->rgb) ++ memset(out->buf[i]->data, 0, out->buf[i]->size); ++ else if (s->planar) ++ memset(out->buf[i]->data, (i == 0) ? 16 : ((i == 1 || i == 2) ? 128 : 0), out->buf[i]->size); ++ } ++ } ++ ++ out->pts = s->pts++; ++ ++ return ff_filter_frame(outlink, out); ++} ++ ++#define OFFSET(x) offsetof(AlphaSrc, x) ++#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM ++static const AVOption alphasrc_options[] = { ++ {"duration", "set the duration of the video", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64 = 0 }, 0, INT64_MAX, FLAGS}, ++ {"d", "set the duration of the video", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64 = 0 }, 0, INT64_MAX, FLAGS}, ++ {"start", "set the start timestamp of the video", OFFSET(start), AV_OPT_TYPE_DURATION, {.i64 = 0 }, 0, INT64_MAX, FLAGS}, ++ {"rate", "set the frame rate of the video", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str = "15"}, 1, INT_MAX, FLAGS}, ++ {"r", "set the frame rate of the video", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str = "15"}, 1, INT_MAX, FLAGS}, ++ {"size", "set the size of the video", OFFSET(out_w), AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0, FLAGS}, ++ {"s", "set the size of the video", OFFSET(out_w), AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0, FLAGS}, ++ {NULL} ++}; ++ ++AVFILTER_DEFINE_CLASS(alphasrc); ++ ++static const AVFilterPad alphasrc_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = alphasrc_config_output, ++ .request_frame = alphasrc_request_frame, ++ }, ++}; ++ ++const AVFilter ff_vsrc_alphasrc = { ++ .name = "alphasrc", ++ .description = NULL_IF_CONFIG_SMALL("Provide a blank video input with alpha channel."), ++ .priv_size = sizeof(AlphaSrc), ++ .priv_class = &alphasrc_class, ++ .init = alphasrc_init, ++ .uninit = NULL, ++ .inputs = NULL, ++ FILTER_OUTPUTS(alphasrc_outputs), ++ FILTER_QUERY_FUNC(alphasrc_query_formats), ++}; diff --git a/cross/ffmpeg7/patches/1027-jellyfin-0027-increase-the-default-async-depth-for-vaapi-encode.patch b/cross/ffmpeg7/patches/1027-jellyfin-0027-increase-the-default-async-depth-for-vaapi-encode.patch new file mode 100644 index 00000000000..bc29c442dd6 --- /dev/null +++ b/cross/ffmpeg7/patches/1027-jellyfin-0027-increase-the-default-async-depth-for-vaapi-encode.patch @@ -0,0 +1,13 @@ +Index: FFmpeg/libavcodec/vaapi_encode.h +=================================================================== +--- libavcodec/vaapi_encode.h ++++ libavcodec/vaapi_encode.h +@@ -520,7 +520,7 @@ int ff_vaapi_encode_close(AVCodecContext + "Increase this to improve single channel performance. This option " \ + "doesn't work if driver doesn't implement vaSyncBuffer function.", \ + OFFSET(common.async_depth), AV_OPT_TYPE_INT, \ +- { .i64 = 2 }, 1, MAX_ASYNC_DEPTH, FLAGS }, \ ++ { .i64 = 4 }, 1, MAX_ASYNC_DEPTH, FLAGS }, \ + { "max_frame_size", \ + "Maximum frame size (in bytes)",\ + OFFSET(common.max_frame_size), AV_OPT_TYPE_INT, \ diff --git a/cross/ffmpeg7/patches/1028-jellyfin-0028-add-fixes-for-hevc-vaapi-encoding-on-tgl.patch b/cross/ffmpeg7/patches/1028-jellyfin-0028-add-fixes-for-hevc-vaapi-encoding-on-tgl.patch new file mode 100644 index 00000000000..768d33a736d --- /dev/null +++ b/cross/ffmpeg7/patches/1028-jellyfin-0028-add-fixes-for-hevc-vaapi-encoding-on-tgl.patch @@ -0,0 +1,16 @@ +Index: FFmpeg/libavcodec/vaapi_encode_h265.c +=================================================================== +--- libavcodec/vaapi_encode_h265.c ++++ libavcodec/vaapi_encode_h265.c +@@ -452,8 +452,9 @@ static int vaapi_encode_h265_init_sequen + sps->log2_min_luma_transform_block_size_minus2 = 0; + sps->log2_diff_max_min_luma_transform_block_size = 3; + // Full transform hierarchy allowed (2-5). +- sps->max_transform_hierarchy_depth_inter = 3; +- sps->max_transform_hierarchy_depth_intra = 3; ++ // Default to 2 based on Programmer's Reference Manuals of Intel graphics. ++ sps->max_transform_hierarchy_depth_inter = 2; ++ sps->max_transform_hierarchy_depth_intra = 2; + // AMP works. + sps->amp_enabled_flag = 1; + // SAO and temporal MVP do not work. diff --git a/cross/ffmpeg7/patches/1029-jellyfin-0029-add-multiple-values-tags-and-webp-support-for-id3v2.patch b/cross/ffmpeg7/patches/1029-jellyfin-0029-add-multiple-values-tags-and-webp-support-for-id3v2.patch new file mode 100644 index 00000000000..f134c3cf21f --- /dev/null +++ b/cross/ffmpeg7/patches/1029-jellyfin-0029-add-multiple-values-tags-and-webp-support-for-id3v2.patch @@ -0,0 +1,86 @@ +Index: FFmpeg/libavformat/id3v2.c +=================================================================== +--- libavformat/id3v2.c ++++ libavformat/id3v2.c +@@ -136,6 +136,7 @@ const CodecMime ff_id3v2_mime_tags[] = { + { "image/png", AV_CODEC_ID_PNG }, + { "image/tiff", AV_CODEC_ID_TIFF }, + { "image/bmp", AV_CODEC_ID_BMP }, ++ { "image/webp", AV_CODEC_ID_WEBP }, + { "JPG", AV_CODEC_ID_MJPEG }, /* ID3v2.2 */ + { "PNG", AV_CODEC_ID_PNG }, /* ID3v2.2 */ + { "", AV_CODEC_ID_NONE }, +@@ -325,39 +326,54 @@ static void read_ttag(AVFormatContext *s + AVDictionary **metadata, const char *key) + { + uint8_t *dst; +- int encoding, dict_flags = AV_DICT_DONT_OVERWRITE | AV_DICT_DONT_STRDUP_VAL; ++ int encoding, nb_values = 0; + unsigned genre; ++ AVDictionaryEntry *tag = NULL; + + if (taglen < 1) + return; + ++ tag = av_dict_get(*metadata, key, NULL, 0); ++ if (tag) ++ return; ++ + encoding = avio_r8(pb); + taglen--; /* account for encoding type byte */ + +- if (decode_str(s, pb, encoding, &dst, &taglen) < 0) { +- av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key); +- return; +- } ++ /* loop through all available tags */ ++ while (taglen) { ++ int dict_flags = AV_DICT_APPEND | AV_DICT_DONT_STRDUP_VAL; + +- if (!(strcmp(key, "TCON") && strcmp(key, "TCO")) && +- (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1) && +- genre <= ID3v1_GENRE_MAX) { +- av_freep(&dst); +- dst = av_strdup(ff_id3v1_genre_str[genre]); +- } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) { +- /* dst now contains the key, need to get value */ +- key = dst; + if (decode_str(s, pb, encoding, &dst, &taglen) < 0) { + av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key); +- av_freep(&key); + return; + } +- dict_flags |= AV_DICT_DONT_STRDUP_KEY; +- } else if (!*dst) +- av_freep(&dst); + +- if (dst) +- av_dict_set(metadata, key, dst, dict_flags); ++ if (!(strcmp(key, "TCON") && strcmp(key, "TCO")) && ++ (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1) && ++ genre <= ID3v1_GENRE_MAX) { ++ av_freep(&dst); ++ dst = av_strdup(ff_id3v1_genre_str[genre]); ++ } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) { ++ /* dst now contains the key, need to get value */ ++ key = dst; ++ if (decode_str(s, pb, encoding, &dst, &taglen) < 0) { ++ av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key); ++ av_freep(&key); ++ return; ++ } ++ dict_flags |= AV_DICT_DONT_STRDUP_KEY; ++ } else if (!*dst) { ++ av_freep(&dst); ++ return; ++ } ++ ++ if (dst) { ++ if (nb_values++) ++ av_dict_set(metadata, key, ";", dict_flags & ~AV_DICT_DONT_STRDUP_VAL); ++ av_dict_set(metadata, key, dst, dict_flags); ++ } ++ } + } + + static void read_uslt(AVFormatContext *s, AVIOContext *pb, int taglen, diff --git a/cross/ffmpeg7/patches/1030-jellyfin-0030-remove-fdk-aac-from-nonfree.patch b/cross/ffmpeg7/patches/1030-jellyfin-0030-remove-fdk-aac-from-nonfree.patch new file mode 100644 index 00000000000..a866fff1486 --- /dev/null +++ b/cross/ffmpeg7/patches/1030-jellyfin-0030-remove-fdk-aac-from-nonfree.patch @@ -0,0 +1,20 @@ +Index: FFmpeg/configure +=================================================================== +--- configure ++++ configure +@@ -1872,7 +1872,6 @@ EXTERNAL_LIBRARY_GPL_LIST=" + + EXTERNAL_LIBRARY_NONFREE_LIST=" + decklink +- libfdk_aac + libtls + " + +@@ -1912,6 +1911,7 @@ EXTERNAL_LIBRARY_LIST=" + libcodec2 + libdav1d + libdc1394 ++ libfdk_aac + libflite + libfontconfig + libfreetype diff --git a/cross/ffmpeg7/patches/1031-jellyfin-0031-pass-dovi-sidedata-to-hlsenc-and-mpegtsenc.patch b/cross/ffmpeg7/patches/1031-jellyfin-0031-pass-dovi-sidedata-to-hlsenc-and-mpegtsenc.patch new file mode 100644 index 00000000000..8e31650e28c --- /dev/null +++ b/cross/ffmpeg7/patches/1031-jellyfin-0031-pass-dovi-sidedata-to-hlsenc-and-mpegtsenc.patch @@ -0,0 +1,137 @@ +Index: FFmpeg/libavformat/hlsenc.c +=================================================================== +--- libavformat/hlsenc.c ++++ libavformat/hlsenc.c +@@ -850,7 +850,7 @@ static int hls_mux_init(AVFormatContext + AVFormatContext *vtt_oc = NULL; + int byterange_mode = (hls->flags & HLS_SINGLE_FILE) || (hls->max_seg_size > 0); + int remaining_options; +- int i, ret; ++ int i, j, ret; + + ret = avformat_alloc_output_context2(&vs->avf, vs->oformat, NULL, NULL); + if (ret < 0) +@@ -896,6 +896,20 @@ static int hls_mux_init(AVFormatContext + st->codecpar->codec_tag = 0; + } + ++ // copy side data ++ for (j = 0; j < vs->streams[i]->codecpar->nb_coded_side_data; j++) { ++ const AVPacketSideData *sd_src = &vs->streams[i]->codecpar->coded_side_data[j]; ++ AVPacketSideData *sd_dst; ++ ++ sd_dst = av_packet_side_data_new(&st->codecpar->coded_side_data, ++ &st->codecpar->nb_coded_side_data, ++ sd_src->type, sd_src->size, 0); ++ if (!sd_dst) ++ return AVERROR(ENOMEM); ++ ++ memcpy(sd_dst->data, sd_src->data, sd_src->size); ++ } ++ + st->sample_aspect_ratio = vs->streams[i]->sample_aspect_ratio; + st->time_base = vs->streams[i]->time_base; + av_dict_copy(&st->metadata, vs->streams[i]->metadata, 0); +Index: FFmpeg/libavformat/movenc.c +=================================================================== +--- libavformat/movenc.c ++++ libavformat/movenc.c +@@ -8124,6 +8124,7 @@ static const AVCodecTag codec_mp4_tags[] + { AV_CODEC_ID_HEVC, MKTAG('h', 'e', 'v', '1') }, + { AV_CODEC_ID_HEVC, MKTAG('h', 'v', 'c', '1') }, + { AV_CODEC_ID_HEVC, MKTAG('d', 'v', 'h', '1') }, ++ { AV_CODEC_ID_HEVC, MKTAG('d', 'v', 'h', 'e') }, + { AV_CODEC_ID_VVC, MKTAG('v', 'v', 'c', '1') }, + { AV_CODEC_ID_VVC, MKTAG('v', 'v', 'i', '1') }, + { AV_CODEC_ID_EVC, MKTAG('e', 'v', 'c', '1') }, +@@ -8137,6 +8138,7 @@ static const AVCodecTag codec_mp4_tags[] + { AV_CODEC_ID_TSCC2, MKTAG('m', 'p', '4', 'v') }, + { AV_CODEC_ID_VP9, MKTAG('v', 'p', '0', '9') }, + { AV_CODEC_ID_AV1, MKTAG('a', 'v', '0', '1') }, ++ { AV_CODEC_ID_AV1, MKTAG('d', 'a', 'v', '1') }, + { AV_CODEC_ID_AAC, MKTAG('m', 'p', '4', 'a') }, + { AV_CODEC_ID_ALAC, MKTAG('a', 'l', 'a', 'c') }, + { AV_CODEC_ID_MP4ALS, MKTAG('m', 'p', '4', 'a') }, +Index: FFmpeg/libavformat/mpegtsenc.c +=================================================================== +--- libavformat/mpegtsenc.c ++++ libavformat/mpegtsenc.c +@@ -23,6 +23,7 @@ + #include "libavutil/bswap.h" + #include "libavutil/crc.h" + #include "libavutil/dict.h" ++#include "libavutil/dovi_meta.h" + #include "libavutil/intreadwrite.h" + #include "libavutil/mathematics.h" + #include "libavutil/opt.h" +@@ -350,6 +351,52 @@ static void put_registration_descriptor( + *q_ptr = q; + } + ++static int put_dovi_descriptor(AVFormatContext *s, uint8_t **q_ptr, ++ const AVDOVIDecoderConfigurationRecord *dovi) ++{ ++ uint16_t val16; ++ uint8_t *q = *q_ptr; ++ ++ if (!dovi) ++ return AVERROR(ENOMEM); ++ ++ if (!dovi->bl_present_flag) { ++ av_log(s, AV_LOG_ERROR, ++ "EL only DOVI stream is not supported!\n"); ++ return AVERROR_INVALIDDATA; ++ } ++ ++ put_registration_descriptor(&q, MKTAG('D', 'O', 'V', 'I')); // format_identifier ++ ++ /* DOVI Video Stream Descriptor Syntax */ ++ *q++ = 0xb0; // descriptor_tag ++ *q++ = 0x05; // descriptor_length ++ *q++ = dovi->dv_version_major; ++ *q++ = dovi->dv_version_minor; ++ ++ val16 = (dovi->dv_profile & 0x7f) << 9 | // 7 bits ++ (dovi->dv_level & 0x3f) << 3 | // 6 bits ++ (dovi->rpu_present_flag & 0x01) << 2 | // 1 bits ++ (dovi->el_present_flag & 0x01) << 1 | // 1 bits ++ (dovi->bl_present_flag & 0x01); // 1 bits ++ put16(&q, val16); ++ ++#if 0 ++ // TODO: support dependency_pid (EL only stream) ++ // descriptor_length: 0x05->0x07 ++ if (!bl_present_flag) { ++ val16 = (dependency_pid & 0x1fff) << 3; // 13+3 bits ++ put16(&q, val16); ++ } ++#endif ++ ++ *q++ = (dovi->dv_bl_signal_compatibility_id & 0x0f) << 4; // 4+4 bits ++ ++ *q_ptr = q; ++ ++ return 0; ++} ++ + static int get_dvb_stream_type(AVFormatContext *s, AVStream *st) + { + MpegTSWrite *ts = s->priv_data; +@@ -803,7 +850,16 @@ static int mpegts_write_pmt(AVFormatCont + } else if (stream_type == STREAM_TYPE_VIDEO_VC1) { + put_registration_descriptor(&q, MKTAG('V', 'C', '-', '1')); + } else if (stream_type == STREAM_TYPE_VIDEO_HEVC && s->strict_std_compliance <= FF_COMPLIANCE_NORMAL) { +- put_registration_descriptor(&q, MKTAG('H', 'E', 'V', 'C')); ++ const AVPacketSideData *sd = av_packet_side_data_get(st->codecpar->coded_side_data, ++ st->codecpar->nb_coded_side_data, AV_PKT_DATA_DOVI_CONF); ++ const AVDOVIDecoderConfigurationRecord *dovi = sd ? (const AVDOVIDecoderConfigurationRecord *)sd->data : NULL; ++ ++ if (dovi && dovi->bl_present_flag && s->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL) { ++ if (put_dovi_descriptor(s, &q, dovi) < 0) ++ break; ++ } else { ++ put_registration_descriptor(&q, MKTAG('H', 'E', 'V', 'C')); ++ } + } else if (stream_type == STREAM_TYPE_VIDEO_CAVS || stream_type == STREAM_TYPE_VIDEO_AVS2 || + stream_type == STREAM_TYPE_VIDEO_AVS3) { + put_registration_descriptor(&q, MKTAG('A', 'V', 'S', 'V')); diff --git a/cross/ffmpeg7/patches/1032-jellyfin-0032-add-pause-support-for-ffmpeg-cli.patch b/cross/ffmpeg7/patches/1032-jellyfin-0032-add-pause-support-for-ffmpeg-cli.patch new file mode 100644 index 00000000000..741c961fcbc --- /dev/null +++ b/cross/ffmpeg7/patches/1032-jellyfin-0032-add-pause-support-for-ffmpeg-cli.patch @@ -0,0 +1,144 @@ +Index: FFmpeg/fftools/ffmpeg.c +=================================================================== +--- fftools/ffmpeg.c ++++ fftools/ffmpeg.c +@@ -116,6 +116,9 @@ typedef struct BenchmarkTimeStamps { + + static BenchmarkTimeStamps get_benchmark_time_stamps(void); + static int64_t getmaxrss(void); ++static int64_t gettime_relative_minus_pause(void); ++static void pause_transcoding(void); ++static void unpause_transcoding(void); + + atomic_uint nb_output_dumped = 0; + +@@ -134,6 +137,9 @@ int nb_filtergraphs; + Decoder **decoders; + int nb_decoders; + ++int64_t paused_start = 0; ++int64_t paused_time = 0; ++ + #if HAVE_TERMIOS_H + + /* init terminal so that we can grab keys */ +@@ -773,12 +779,28 @@ static void set_tty_echo(int on) + #endif + } + ++static void pause_transcoding(void) ++{ ++ if (!paused_start) ++ paused_start = av_gettime_relative(); ++} ++ ++static void unpause_transcoding(void) ++{ ++ if (paused_start) { ++ paused_time += av_gettime_relative() - paused_start; ++ paused_start = 0; ++ } ++} ++ + static int check_keyboard_interaction(int64_t cur_time) + { + int i, key; + static int64_t last_time; +- if (received_nb_signals) ++ if (received_nb_signals) { ++ unpause_transcoding(); + return AVERROR_EXIT; ++ } + /* read_key() returns 0 on EOF */ + if (cur_time - last_time >= 100000) { + key = read_key(); +@@ -791,6 +813,11 @@ static int check_keyboard_interaction(in + } + if (key == '+') av_log_set_level(av_log_get_level()+10); + if (key == '-') av_log_set_level(av_log_get_level()-10); ++ if (key == 'u' || key != -1) unpause_transcoding(); ++ if (key == 'p'){ ++ pause_transcoding(); ++ fprintf(stderr, "\nTranscoding is paused. Press [u] to resume.\n"); ++ } + if (key == 'c' || key == 'C'){ + char buf[4096], target[64], command[256], arg[256] = {0}; + double time; +@@ -825,7 +852,9 @@ static int check_keyboard_interaction(in + "c Send command to first matching filter supporting it\n" + "C Send/Queue command to all matching filters\n" + "h dump packets/hex press to cycle through the 3 states\n" ++ "p pause transcoding\n" + "q quit\n" ++ "u unpause transcoding\n" + "s Show QP histogram\n" + ); + } +@@ -855,12 +884,15 @@ static int transcode(Scheduler *sch) + timer_start = av_gettime_relative(); + + while (!sch_wait(sch, stats_period, &transcode_ts)) { +- int64_t cur_time= av_gettime_relative(); ++ int64_t cur_time= gettime_relative_minus_pause(); + + /* if 'q' pressed, exits */ +- if (stdin_interaction) +- if (check_keyboard_interaction(cur_time) < 0) ++ if (stdin_interaction) { ++ if (check_keyboard_interaction(av_gettime_relative()) < 0) { ++ paused_start = 0; // unpausing the input thread on exit + break; ++ } ++ } + + /* dump report by using the output first video and audio streams */ + print_report(0, timer_start, cur_time, transcode_ts); +@@ -877,11 +909,17 @@ static int transcode(Scheduler *sch) + term_exit(); + + /* dump report by using the first video and audio streams */ +- print_report(1, timer_start, av_gettime_relative(), transcode_ts); ++ print_report(1, timer_start, gettime_relative_minus_pause(), transcode_ts); + + return ret; + } + ++static int64_t gettime_relative_minus_pause(void) ++{ ++ return av_gettime_relative() - paused_time - ++ (paused_start ? av_gettime_relative() - paused_start : 0); ++} ++ + static BenchmarkTimeStamps get_benchmark_time_stamps(void) + { + BenchmarkTimeStamps time_stamps = { av_gettime_relative() }; +Index: FFmpeg/fftools/ffmpeg.h +=================================================================== +--- fftools/ffmpeg.h ++++ fftools/ffmpeg.h +@@ -667,6 +667,9 @@ extern int recast_media; + + extern FILE *vstats_file; + ++extern int64_t paused_start; ++extern int64_t paused_time; ++ + void term_init(void); + void term_exit(void); + +Index: FFmpeg/fftools/ffmpeg_demux.c +=================================================================== +--- fftools/ffmpeg_demux.c ++++ fftools/ffmpeg_demux.c +@@ -699,6 +699,11 @@ static int input_thread(void *arg) + DemuxStream *ds; + unsigned send_flags = 0; + ++ if (paused_start) { ++ av_usleep(1000); // pausing the input thread ++ continue; ++ } ++ + ret = av_read_frame(f->ctx, dt.pkt_demux); + + if (ret == AVERROR(EAGAIN)) { diff --git a/cross/ffmpeg7/patches/1033-jellyfin-0033-enable-gcc-vectorization-and-fix-cpuflags.patch b/cross/ffmpeg7/patches/1033-jellyfin-0033-enable-gcc-vectorization-and-fix-cpuflags.patch new file mode 100644 index 00000000000..87a458633c3 --- /dev/null +++ b/cross/ffmpeg7/patches/1033-jellyfin-0033-enable-gcc-vectorization-and-fix-cpuflags.patch @@ -0,0 +1,38 @@ +Index: FFmpeg/configure +=================================================================== +--- configure ++++ configure +@@ -5558,7 +5558,7 @@ elif enabled x86; then + ;; + # everything else should support nopl and conditional mov (cmov) + *) +- cpuflags="-march=$cpu" ++ # there is no -march=generic option + enable i686 + enable fast_cmov + ;; +@@ -7539,7 +7539,9 @@ if enabled icc; then + disable aligned_stack + fi + elif enabled gcc; then +- check_optflags -fno-tree-vectorize ++ case $gcc_basever in ++ 2|2.*|3.*|4.*|5.*|6.*) check_optflags -fno-tree-vectorize ;; ++ esac + check_cflags -Werror=format-security + check_cflags -Werror=implicit-function-declaration + check_cflags -Werror=missing-prototypes +Index: FFmpeg/libavcodec/x86/cabac.h +=================================================================== +--- libavcodec/x86/cabac.h ++++ libavcodec/x86/cabac.h +@@ -183,6 +183,9 @@ av_noinline + #else + av_always_inline + #endif ++#ifdef __GNUC__ ++__attribute__((optimize("-fno-tree-vectorize"))) ++#endif + int get_cabac_inline_x86(CABACContext *c, uint8_t *const state) + { + int bit, tmp; diff --git a/cross/ffmpeg7/patches/1034-jellyfin-0034-tune-dxva-align-for-intel-to-avoid-copy-on-qsv.patch b/cross/ffmpeg7/patches/1034-jellyfin-0034-tune-dxva-align-for-intel-to-avoid-copy-on-qsv.patch new file mode 100644 index 00000000000..bc9c047949e --- /dev/null +++ b/cross/ffmpeg7/patches/1034-jellyfin-0034-tune-dxva-align-for-intel-to-avoid-copy-on-qsv.patch @@ -0,0 +1,92 @@ +Index: FFmpeg/libavcodec/dxva2.c +=================================================================== +--- libavcodec/dxva2.c ++++ libavcodec/dxva2.c +@@ -615,6 +615,16 @@ int ff_dxva2_common_frame_params(AVCodec + else + surface_alignment = 16; + ++ /* align surfaces to 32 on Intel to keep in line with the MSDK impl, ++ which avoids the unnecessary resizing when mapping to QSV */ ++ if (device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) { ++ AVD3D11VADeviceContext *device_hwctx = device_ctx->hwctx; ++ if (device_hwctx->device_desc.VendorId == 0x8086) { ++ av_log(avctx, AV_LOG_DEBUG, "Intel DX11 device found, alignment changed!\n"); ++ surface_alignment = 32; ++ } ++ } ++ + /* 1 base work surface */ + num_surfaces = 1; + +Index: FFmpeg/libavutil/hwcontext_d3d11va.c +=================================================================== +--- libavutil/hwcontext_d3d11va.c ++++ libavutil/hwcontext_d3d11va.c +@@ -609,6 +609,35 @@ static int d3d11va_device_find_adapter_b + return -1; + } + ++static int d3d11va_check_uma_support(AVHWDeviceContext *ctx) ++{ ++ AVD3D11VADeviceContext *device_hwctx = ctx->hwctx; ++ D3D11_FEATURE_DATA_D3D11_OPTIONS2 data = {}; ++ HRESULT hr = ID3D11Device_CheckFeatureSupport(device_hwctx->device, ++ D3D11_FEATURE_D3D11_OPTIONS2, ++ &data, sizeof(data)); ++ return SUCCEEDED(hr) && data.UnifiedMemoryArchitecture; ++} ++ ++static void d3d11va_query_device_desc(AVHWDeviceContext *ctx, ++ DXGI_ADAPTER_DESC *desc) ++{ ++ AVD3D11VADeviceContext *device_hwctx = ctx->hwctx; ++ IDXGIDevice *pDXGIDevice = NULL; ++ IDXGIAdapter *pDXGIAdapter = NULL; ++ HRESULT hr = ID3D11Device_QueryInterface(device_hwctx->device, &IID_IDXGIDevice, ++ (void **)&pDXGIDevice); ++ if (SUCCEEDED(hr) && pDXGIDevice) { ++ hr = IDXGIDevice_GetParent(pDXGIDevice, &IID_IDXGIAdapter, ++ (void **)&pDXGIAdapter); ++ if (SUCCEEDED(hr) && pDXGIAdapter) { ++ IDXGIAdapter_GetDesc(pDXGIAdapter, desc); ++ IDXGIAdapter_Release(pDXGIAdapter); ++ } ++ IDXGIDevice_Release(pDXGIDevice); ++ } ++} ++ + static int d3d11va_device_create(AVHWDeviceContext *ctx, const char *device, + AVDictionary *opts, int flags) + { +@@ -686,6 +715,9 @@ static int d3d11va_device_create(AVHWDev + ID3D10Multithread_Release(pMultithread); + } + ++ device_hwctx->is_uma = d3d11va_check_uma_support(ctx); ++ d3d11va_query_device_desc(ctx, &device_hwctx->device_desc); ++ + #if !HAVE_UWP && HAVE_DXGIDEBUG_H + if (is_debug) { + HANDLE dxgidebug_dll = LoadLibrary("dxgidebug.dll"); +Index: FFmpeg/libavutil/hwcontext_d3d11va.h +=================================================================== +--- libavutil/hwcontext_d3d11va.h ++++ libavutil/hwcontext_d3d11va.h +@@ -94,6 +94,16 @@ typedef struct AVD3D11VADeviceContext { + void (*lock)(void *lock_ctx); + void (*unlock)(void *lock_ctx); + void *lock_ctx; ++ ++ /** ++ * DXGI adapter description of the device. ++ */ ++ DXGI_ADAPTER_DESC device_desc; ++ ++ /** ++ * Whether the device is an UMA device. ++ */ ++ int is_uma; + } AVD3D11VADeviceContext; + + /** diff --git a/cross/ffmpeg7/patches/1035-jellyfin-0035-add-10bit-support-for-cuda-overlay.patch b/cross/ffmpeg7/patches/1035-jellyfin-0035-add-10bit-support-for-cuda-overlay.patch new file mode 100644 index 00000000000..c66863f5a0e --- /dev/null +++ b/cross/ffmpeg7/patches/1035-jellyfin-0035-add-10bit-support-for-cuda-overlay.patch @@ -0,0 +1,253 @@ +Index: FFmpeg/libavfilter/vf_overlay_cuda.c +=================================================================== +--- libavfilter/vf_overlay_cuda.c ++++ libavfilter/vf_overlay_cuda.c +@@ -50,6 +50,8 @@ + static const enum AVPixelFormat supported_main_formats[] = { + AV_PIX_FMT_NV12, + AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_P010, ++ AV_PIX_FMT_P016, + AV_PIX_FMT_NONE, + }; + +@@ -105,12 +107,15 @@ typedef struct OverlayCUDAContext { + enum AVPixelFormat in_format_overlay; + enum AVPixelFormat in_format_main; + ++ const AVPixFmtDescriptor *in_desc_main; ++ + AVBufferRef *hw_device_ctx; + AVCUDADeviceContext *hwctx; + + CUcontext cu_ctx; + CUmodule cu_module; +- CUfunction cu_func; ++ CUfunction cu_func_uchar; ++ CUfunction cu_func_ushort; + CUstream cu_stream; + + FFFrameSync fs; +@@ -185,7 +190,10 @@ static int set_expr(AVExpr **pexpr, cons + static int formats_match(const enum AVPixelFormat format_main, const enum AVPixelFormat format_overlay) { + switch(format_main) { + case AV_PIX_FMT_NV12: +- return format_overlay == AV_PIX_FMT_NV12; ++ case AV_PIX_FMT_P010: ++ case AV_PIX_FMT_P016: ++ return format_overlay == AV_PIX_FMT_NV12 || ++ format_overlay == AV_PIX_FMT_YUVA420P; + case AV_PIX_FMT_YUV420P: + return format_overlay == AV_PIX_FMT_YUV420P || + format_overlay == AV_PIX_FMT_YUVA420P; +@@ -200,11 +208,13 @@ static int formats_match(const enum AVPi + static int overlay_cuda_call_kernel( + OverlayCUDAContext *ctx, + int x_position, int y_position, +- uint8_t* main_data, int main_linesize, ++ CUdeviceptr main_data, int main_linesize, + int main_width, int main_height, +- uint8_t* overlay_data, int overlay_linesize, ++ int main_adj_x, int main_offset, ++ int main_depth, int main_shift, ++ CUdeviceptr overlay_data, int overlay_linesize, + int overlay_width, int overlay_height, +- uint8_t* alpha_data, int alpha_linesize, ++ CUdeviceptr alpha_data, int alpha_linesize, + int alpha_adj_x, int alpha_adj_y) { + + CudaFunctions *cu = ctx->hwctx->internal->cuda_dl; +@@ -212,14 +222,18 @@ static int overlay_cuda_call_kernel( + void* kernel_args[] = { + &x_position, &y_position, + &main_data, &main_linesize, ++ &main_adj_x, &main_offset, ++ &main_depth, &main_shift, + &overlay_data, &overlay_linesize, + &overlay_width, &overlay_height, + &alpha_data, &alpha_linesize, + &alpha_adj_x, &alpha_adj_y, + }; + ++#define DEPTH_BYTES(depth) (((depth) + 7) / 8) ++ + return CHECK_CU(cu->cuLaunchKernel( +- ctx->cu_func, ++ DEPTH_BYTES(main_depth) == 1 ? ctx->cu_func_uchar : ctx->cu_func_ushort, + DIV_UP(main_width, BLOCK_X), DIV_UP(main_height, BLOCK_Y), 1, + BLOCK_X, BLOCK_Y, 1, + 0, ctx->cu_stream, kernel_args, NULL)); +@@ -300,11 +314,13 @@ FF_ENABLE_DEPRECATION_WARNINGS + + overlay_cuda_call_kernel(ctx, + ctx->x_position, ctx->y_position, +- input_main->data[0], input_main->linesize[0], ++ (CUdeviceptr)input_main->data[0], input_main->linesize[0], + input_main->width, input_main->height, +- input_overlay->data[0], input_overlay->linesize[0], ++ 1, 0, ++ ctx->in_desc_main->comp[0].depth, ctx->in_desc_main->comp[0].shift, ++ (CUdeviceptr)input_overlay->data[0], input_overlay->linesize[0], + input_overlay->width, input_overlay->height, +- input_overlay->data[3], input_overlay->linesize[3], 1, 1); ++ (CUdeviceptr)input_overlay->data[3], input_overlay->linesize[3], 1, 1); + + // overlay rest planes depending on pixel format + +@@ -312,29 +328,42 @@ FF_ENABLE_DEPRECATION_WARNINGS + case AV_PIX_FMT_NV12: + overlay_cuda_call_kernel(ctx, + ctx->x_position, ctx->y_position / 2, +- input_main->data[1], input_main->linesize[1], ++ (CUdeviceptr)input_main->data[1], input_main->linesize[1], + input_main->width, input_main->height / 2, +- input_overlay->data[1], input_overlay->linesize[1], ++ 1, 0, ++ ctx->in_desc_main->comp[1].depth, ctx->in_desc_main->comp[1].shift, ++ (CUdeviceptr)input_overlay->data[1], input_overlay->linesize[1], + input_overlay->width, input_overlay->height / 2, + 0, 0, 0, 0); + break; + case AV_PIX_FMT_YUV420P: + case AV_PIX_FMT_YUVA420P: +- overlay_cuda_call_kernel(ctx, +- ctx->x_position / 2 , ctx->y_position / 2, +- input_main->data[1], input_main->linesize[1], +- input_main->width / 2, input_main->height / 2, +- input_overlay->data[1], input_overlay->linesize[1], +- input_overlay->width / 2, input_overlay->height / 2, +- input_overlay->data[3], input_overlay->linesize[3], 2, 2); +- +- overlay_cuda_call_kernel(ctx, +- ctx->x_position / 2 , ctx->y_position / 2, +- input_main->data[2], input_main->linesize[2], +- input_main->width / 2, input_main->height / 2, +- input_overlay->data[2], input_overlay->linesize[2], +- input_overlay->width / 2, input_overlay->height / 2, +- input_overlay->data[3], input_overlay->linesize[3], 2, 2); ++ { ++ int is_main_semi = ctx->in_format_main == AV_PIX_FMT_NV12 || ++ ctx->in_format_main == AV_PIX_FMT_P010 || ++ ctx->in_format_main == AV_PIX_FMT_P016; ++ int main_adj_x = is_main_semi ? 2 : 1; ++ int plane_v = is_main_semi ? 1 : 2; ++ overlay_cuda_call_kernel(ctx, ++ ctx->x_position / 2, ctx->y_position / 2, ++ (CUdeviceptr)input_main->data[1], input_main->linesize[1], ++ input_main->width / 2, input_main->height / 2, ++ main_adj_x, 0, ++ ctx->in_desc_main->comp[1].depth, ctx->in_desc_main->comp[1].shift, ++ (CUdeviceptr)input_overlay->data[1], input_overlay->linesize[1], ++ input_overlay->width / 2, input_overlay->height / 2, ++ (CUdeviceptr)input_overlay->data[3], input_overlay->linesize[3], 2, 2); ++ ++ overlay_cuda_call_kernel(ctx, ++ ctx->x_position / 2 , ctx->y_position / 2, ++ (CUdeviceptr)input_main->data[plane_v], input_main->linesize[plane_v], ++ input_main->width / 2, input_main->height / 2, ++ main_adj_x, 1, ++ ctx->in_desc_main->comp[plane_v].depth, ctx->in_desc_main->comp[plane_v].shift, ++ (CUdeviceptr)input_overlay->data[2], input_overlay->linesize[2], ++ input_overlay->width / 2, input_overlay->height / 2, ++ (CUdeviceptr)input_overlay->data[3], input_overlay->linesize[3], 2, 2); ++ } + break; + default: + av_log(ctx, AV_LOG_ERROR, "Passed unsupported overlay pixel format\n"); +@@ -462,6 +491,8 @@ static int overlay_cuda_config_output(AV + return AVERROR(ENOSYS); + } + ++ ctx->in_desc_main = av_pix_fmt_desc_get(ctx->in_format_main); ++ + // check overlay input formats + + if (!frames_ctx_overlay) { +@@ -515,7 +546,13 @@ static int overlay_cuda_config_output(AV + return err; + } + +- err = CHECK_CU(cu->cuModuleGetFunction(&ctx->cu_func, ctx->cu_module, "Overlay_Cuda")); ++ err = CHECK_CU(cu->cuModuleGetFunction(&ctx->cu_func_uchar, ctx->cu_module, "Overlay_Cuda_uchar")); ++ if (err < 0) { ++ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); ++ return err; ++ } ++ ++ err = CHECK_CU(cu->cuModuleGetFunction(&ctx->cu_func_ushort, ctx->cu_module, "Overlay_Cuda_ushort")); + if (err < 0) { + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + return err; +Index: FFmpeg/libavfilter/vf_overlay_cuda.cu +=================================================================== +--- libavfilter/vf_overlay_cuda.cu ++++ libavfilter/vf_overlay_cuda.cu +@@ -18,14 +18,15 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +-extern "C" { +- +-__global__ void Overlay_Cuda( ++template ++__inline__ __device__ void overlay_func( + int x_position, int y_position, +- unsigned char* main, int main_linesize, +- unsigned char* overlay, int overlay_linesize, ++ T0* main, int main_linesize, ++ int main_adj_x, int main_offset, ++ int main_depth, int main_shift, ++ T1* overlay, int overlay_linesize, + int overlay_w, int overlay_h, +- unsigned char* overlay_alpha, int alpha_linesize, ++ T1* overlay_alpha, int alpha_linesize, + int alpha_adj_x, int alpha_adj_y) + { + int x = blockIdx.x * blockDim.x + threadIdx.x; +@@ -44,11 +45,44 @@ __global__ void Overlay_Cuda( + + float alpha = 1.0; + if (alpha_linesize) { +- alpha = overlay_alpha[alpha_adj_x * overlay_x + alpha_adj_y * overlay_y * alpha_linesize] / 255.0f; ++ alpha = overlay_alpha[alpha_adj_x * overlay_x + alpha_adj_y * overlay_y * alpha_linesize] / 255.0f; + } + +- main[x + y*main_linesize] = alpha * overlay[overlay_x + overlay_y * overlay_linesize] + (1.0f - alpha) * main[x + y*main_linesize]; ++ int main_pos = main_adj_x * x + y * (main_linesize / sizeof(*main)) + (main_adj_x > 1 ? main_offset : 0); ++ if (main_depth > 8) { ++ T0 overlay_res = (T0)(alpha * overlay[overlay_x + overlay_y * overlay_linesize]) << (main_depth - 8); ++ T0 main_res = (T0)((1.0f - alpha) * (main[main_pos] >> main_shift)); ++ main[main_pos] = (T0)(overlay_res + main_res) << main_shift; ++ } else { ++ main[main_pos] = alpha * overlay[overlay_x + overlay_y * overlay_linesize] + (1.0f - alpha) * main[main_pos]; ++ } + } + ++extern "C" { ++ ++#define OVERLAY_VARIANT(NAME, TYPE0) \ ++__global__ void Overlay_Cuda_ ## NAME( \ ++ int x_position, int y_position, \ ++ TYPE0* main, int main_linesize, \ ++ int main_adj_x, int main_offset, \ ++ int main_depth, int main_shift, \ ++ unsigned char* overlay, int overlay_linesize, \ ++ int overlay_w, int overlay_h, \ ++ unsigned char* overlay_alpha, int alpha_linesize, \ ++ int alpha_adj_x, int alpha_adj_y) \ ++{ \ ++ overlay_func( \ ++ x_position, y_position, \ ++ main, main_linesize, \ ++ main_adj_x, main_offset, \ ++ main_depth, main_shift, \ ++ overlay, overlay_linesize, \ ++ overlay_w, overlay_h, \ ++ overlay_alpha, alpha_linesize, \ ++ alpha_adj_x, alpha_adj_y); \ + } + ++OVERLAY_VARIANT(uchar, unsigned char) ++OVERLAY_VARIANT(ushort, unsigned short) ++ ++} diff --git a/cross/ffmpeg7/patches/1036-jellyfin-0036-add-hdr-metadata-for-nvenc-hevc-encoder.patch b/cross/ffmpeg7/patches/1036-jellyfin-0036-add-hdr-metadata-for-nvenc-hevc-encoder.patch new file mode 100644 index 00000000000..d65e5db8810 --- /dev/null +++ b/cross/ffmpeg7/patches/1036-jellyfin-0036-add-hdr-metadata-for-nvenc-hevc-encoder.patch @@ -0,0 +1,101 @@ +Index: FFmpeg/libavcodec/nvenc.c +=================================================================== +--- libavcodec/nvenc.c ++++ libavcodec/nvenc.c +@@ -24,6 +24,7 @@ + + #include "nvenc.h" + #include "hevc_sei.h" ++#include "put_bits.h" + #if CONFIG_AV1_NVENC_ENCODER + #include "av1.h" + #endif +@@ -32,6 +33,7 @@ + #include "libavutil/hwcontext.h" + #include "libavutil/cuda_check.h" + #include "libavutil/imgutils.h" ++#include "libavutil/mastering_display_metadata.h" + #include "libavutil/mem.h" + #include "libavutil/pixdesc.h" + #include "libavutil/mathematics.h" +@@ -2506,6 +2508,80 @@ static int prepare_sei_data_array(AVCode + } + } + } ++ ++ if (avctx->codec->id == AV_CODEC_ID_HEVC) { ++ AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); ++ ++ if (sd) { ++ AVMasteringDisplayMetadata *mdm = (AVMasteringDisplayMetadata *)sd->data; ++ // HEVC uses a g,b,r ordering, which we convert from a more natural r,g,b ++ const int mapping[3] = {2, 0, 1}; ++ const int chroma_den = 50000; ++ const int luma_den = 10000; ++ ++ if (mdm->has_primaries && mdm->has_luminance) { ++ void *tmp = av_fast_realloc(ctx->sei_data, ++ &ctx->sei_data_size, ++ (sei_count + 1) * sizeof(*ctx->sei_data)); ++ if (!tmp) { ++ res = AVERROR(ENOMEM); ++ goto error; ++ } else { ++ ctx->sei_data = tmp; ++ ctx->sei_data[sei_count].payloadSize = 24; ++ ctx->sei_data[sei_count].payloadType = SEI_TYPE_MASTERING_DISPLAY_COLOUR_VOLUME; ++ ctx->sei_data[sei_count].payload = av_mallocz(ctx->sei_data[sei_count].payloadSize); ++ if (ctx->sei_data[sei_count].payload) { ++ PutBitContext pb; ++ ++ init_put_bits(&pb, ctx->sei_data[sei_count].payload, ctx->sei_data[sei_count].payloadSize); ++ for (i = 0; i < 3; i++) { ++ const int j = mapping[i]; ++ put_bits(&pb, 16, (uint16_t)(chroma_den * av_q2d(mdm->display_primaries[j][0]))); ++ put_bits(&pb, 16, (uint16_t)(chroma_den * av_q2d(mdm->display_primaries[j][1]))); ++ } ++ put_bits(&pb, 16, (uint16_t)(chroma_den * av_q2d(mdm->white_point[0]))); ++ put_bits(&pb, 16, (uint16_t)(chroma_den * av_q2d(mdm->white_point[1]))); ++ put_bits(&pb, 32, (uint32_t)(luma_den * av_q2d(mdm->max_luminance))); ++ put_bits(&pb, 32, (uint32_t)(luma_den * av_q2d(mdm->min_luminance))); ++ flush_put_bits(&pb); ++ ++ sei_count++; ++ } ++ } ++ } ++ } ++ } ++ ++ if (avctx->codec->id == AV_CODEC_ID_HEVC) { ++ AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); ++ ++ if (sd) { ++ AVContentLightMetadata *clm = (AVContentLightMetadata *)sd->data; ++ void *tmp = av_fast_realloc(ctx->sei_data, ++ &ctx->sei_data_size, ++ (sei_count + 1) * sizeof(*ctx->sei_data)); ++ if (!tmp) { ++ res = AVERROR(ENOMEM); ++ goto error; ++ } else { ++ ctx->sei_data = tmp; ++ ctx->sei_data[sei_count].payloadSize = 4; ++ ctx->sei_data[sei_count].payloadType = SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO; ++ ctx->sei_data[sei_count].payload = av_mallocz(ctx->sei_data[sei_count].payloadSize); ++ if (ctx->sei_data[sei_count].payload) { ++ PutBitContext pb; ++ ++ init_put_bits(&pb, ctx->sei_data[sei_count].payload, ctx->sei_data[sei_count].payloadSize); ++ put_bits(&pb, 16, (uint16_t)(FFMIN(clm->MaxCLL, 65535))); ++ put_bits(&pb, 16, (uint16_t)(FFMIN(clm->MaxFALL, 65535))); ++ flush_put_bits(&pb); ++ ++ sei_count++; ++ } ++ } ++ } ++ } + + if (!ctx->udu_sei) + return sei_count; diff --git a/cross/ffmpeg7/patches/1037-jellyfin-0037-add-d3d11va-dxva2-hevc-422-444-decoding-on-intel.patch b/cross/ffmpeg7/patches/1037-jellyfin-0037-add-d3d11va-dxva2-hevc-422-444-decoding-on-intel.patch new file mode 100644 index 00000000000..f0f52c1267d --- /dev/null +++ b/cross/ffmpeg7/patches/1037-jellyfin-0037-add-d3d11va-dxva2-hevc-422-444-decoding-on-intel.patch @@ -0,0 +1,396 @@ +Index: FFmpeg/libavcodec/d3d12va_hevc.c +=================================================================== +--- libavcodec/d3d12va_hevc.c ++++ libavcodec/d3d12va_hevc.c +@@ -33,7 +33,7 @@ + #define MAX_SLICES 256 + + typedef struct HEVCDecodePictureContext { +- DXVA_PicParams_HEVC pp; ++ ff_DXVA_PicParams_HEVC_Rext pp; + DXVA_Qmatrix_HEVC qm; + unsigned slice_count; + DXVA_Slice_HEVC_Short slice_short[MAX_SLICES]; +@@ -151,12 +151,12 @@ static int d3d12va_hevc_end_frame(AVCode + HEVCContext *h = avctx->priv_data; + HEVCDecodePictureContext *ctx_pic = h->ref->hwaccel_picture_private; + +- int scale = ctx_pic->pp.dwCodingParamToolFlags & 1; ++ int scale = ctx_pic->pp.main.dwCodingParamToolFlags & 1; + + if (ctx_pic->slice_count <= 0 || ctx_pic->bitstream_size <= 0) + return -1; + +- return ff_d3d12va_common_end_frame(avctx, h->ref->frame, &ctx_pic->pp, sizeof(ctx_pic->pp), ++ return ff_d3d12va_common_end_frame(avctx, h->ref->frame, &ctx_pic->pp.main, sizeof(ctx_pic->pp.main), + scale ? &ctx_pic->qm : NULL, scale ? sizeof(ctx_pic->qm) : 0, update_input_arguments); + } + +Index: FFmpeg/libavcodec/dxva2.c +=================================================================== +--- libavcodec/dxva2.c ++++ libavcodec/dxva2.c +@@ -43,6 +43,12 @@ DEFINE_GUID(ff_DXVA2_ModeVC1_D, + DEFINE_GUID(ff_DXVA2_ModeVC1_D2010, 0x1b81beA4, 0xa0c7,0x11d3,0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5); + DEFINE_GUID(ff_DXVA2_ModeHEVC_VLD_Main, 0x5b11d51b, 0x2f4c,0x4452,0xbc,0xc3,0x09,0xf2,0xa1,0x16,0x0c,0xc0); + DEFINE_GUID(ff_DXVA2_ModeHEVC_VLD_Main10,0x107af0e0, 0xef1a,0x4d19,0xab,0xa8,0x67,0xa1,0x63,0x07,0x3d,0x13); ++DEFINE_GUID(ff_DXVA2_ModeHEVC_VLD_Main12_Intel, 0x8ff8a3aa, 0xc456,0x4132,0xb6,0xef,0x69,0xd9,0xdd,0x72,0x57,0x1d); ++DEFINE_GUID(ff_DXVA2_ModeHEVC_VLD_Main422_10_Intel, 0xe484dcb8, 0xcac9,0x4859,0x99,0xf5,0x5c,0x0d,0x45,0x06,0x90,0x89); ++DEFINE_GUID(ff_DXVA2_ModeHEVC_VLD_Main422_12_Intel, 0xc23dd857, 0x874b,0x423c,0xb6,0xe0,0x82,0xce,0xaa,0x9b,0x11,0x8a); ++DEFINE_GUID(ff_DXVA2_ModeHEVC_VLD_Main444_Intel, 0x41a5af96, 0xe415,0x4b0c,0x9d,0x03,0x90,0x78,0x58,0xe2,0x3e,0x78); ++DEFINE_GUID(ff_DXVA2_ModeHEVC_VLD_Main444_10_Intel, 0x6a6a81ba, 0x912a,0x485d,0xb5,0x7f,0xcc,0xd2,0xd3,0x7b,0x8d,0x94); ++DEFINE_GUID(ff_DXVA2_ModeHEVC_VLD_Main444_12_Intel, 0x5b08e35d, 0x0c66,0x4c51,0xa6,0xf1,0x89,0xd0,0x0c,0xb2,0xc1,0x97); + DEFINE_GUID(ff_DXVA2_ModeVP9_VLD_Profile0,0x463707f8,0xa1d0,0x4585,0x87,0x6d,0x83,0xaa,0x6d,0x60,0xb8,0x9e); + DEFINE_GUID(ff_DXVA2_ModeVP9_VLD_10bit_Profile2,0xa4c749ef,0x6ecf,0x48aa,0x84,0x48,0x50,0xa7,0xa1,0x16,0x5f,0xf7); + DEFINE_GUID(ff_DXVA2_ModeAV1_VLD_Profile0,0xb8be4ccb,0xcf53,0x46ba,0x8d,0x59,0xd6,0xb8,0xa6,0xda,0x5d,0x2a); +@@ -69,6 +75,8 @@ static const int prof_hevc_main[] = { + AV_PROFILE_UNKNOWN}; + static const int prof_hevc_main10[] = {AV_PROFILE_HEVC_MAIN_10, + AV_PROFILE_UNKNOWN}; ++static const int prof_hevc_main_rext[] = {AV_PROFILE_HEVC_REXT, ++ AV_PROFILE_UNKNOWN}; + static const int prof_vp9_profile0[] = {AV_PROFILE_VP9_0, + AV_PROFILE_UNKNOWN}; + static const int prof_vp9_profile2[] = {AV_PROFILE_VP9_2, +@@ -97,6 +105,14 @@ static const dxva_mode dxva_modes[] = { + { &ff_DXVA2_ModeHEVC_VLD_Main10, AV_CODEC_ID_HEVC, prof_hevc_main10 }, + { &ff_DXVA2_ModeHEVC_VLD_Main, AV_CODEC_ID_HEVC, prof_hevc_main }, + ++ /* Intel specific HEVC/H.265 Main Rext mode */ ++ { &ff_DXVA2_ModeHEVC_VLD_Main12_Intel, AV_CODEC_ID_HEVC, prof_hevc_main_rext }, ++ { &ff_DXVA2_ModeHEVC_VLD_Main422_10_Intel, AV_CODEC_ID_HEVC, prof_hevc_main_rext }, ++ { &ff_DXVA2_ModeHEVC_VLD_Main422_12_Intel, AV_CODEC_ID_HEVC, prof_hevc_main_rext }, ++ { &ff_DXVA2_ModeHEVC_VLD_Main444_Intel, AV_CODEC_ID_HEVC, prof_hevc_main_rext }, ++ { &ff_DXVA2_ModeHEVC_VLD_Main444_10_Intel, AV_CODEC_ID_HEVC, prof_hevc_main_rext }, ++ { &ff_DXVA2_ModeHEVC_VLD_Main444_12_Intel, AV_CODEC_ID_HEVC, prof_hevc_main_rext }, ++ + /* VP8/9 */ + { &ff_DXVA2_ModeVP9_VLD_Profile0, AV_CODEC_ID_VP9, prof_vp9_profile0 }, + { &ff_DXVA2_ModeVP9_VLD_10bit_Profile2, AV_CODEC_ID_VP9, prof_vp9_profile2 }, +@@ -107,6 +123,22 @@ static const dxva_mode dxva_modes[] = { + { NULL, 0 }, + }; + ++static enum AVPixelFormat dxva_map_sw_to_sw_format(enum AVPixelFormat pix_fmt) ++{ ++ switch (pix_fmt) { ++ case AV_PIX_FMT_YUV420P: return AV_PIX_FMT_NV12; ++ case AV_PIX_FMT_YUV420P10: return AV_PIX_FMT_P010; ++ case AV_PIX_FMT_YUV420P12: return AV_PIX_FMT_P012; ++ case AV_PIX_FMT_YUV422P: return AV_PIX_FMT_YUYV422; ++ case AV_PIX_FMT_YUV422P10: return AV_PIX_FMT_Y210; ++ case AV_PIX_FMT_YUV422P12: return AV_PIX_FMT_Y212; ++ case AV_PIX_FMT_YUV444P: return AV_PIX_FMT_VUYX; ++ case AV_PIX_FMT_YUV444P10: return AV_PIX_FMT_XV30; ++ case AV_PIX_FMT_YUV444P12: return AV_PIX_FMT_XV36; ++ default: return AV_PIX_FMT_NV12; ++ } ++} ++ + static int dxva_get_decoder_configuration(AVCodecContext *avctx, + const void *cfg_list, + unsigned cfg_count) +@@ -245,7 +277,14 @@ static void dxva_list_guids_debug(AVCode + #if CONFIG_DXVA2 + if (sctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) { + const D3DFORMAT formats[] = {MKTAG('N', 'V', '1', '2'), +- MKTAG('P', '0', '1', '0')}; ++ MKTAG('P', '0', '1', '0'), ++ MKTAG('P', '0', '1', '6'), ++ MKTAG('Y', 'U', 'Y', '2'), ++ MKTAG('Y', '2', '1', '0'), ++ MKTAG('Y', '2', '1', '6'), ++ MKTAG('A', 'Y', 'U', 'V'), ++ MKTAG('Y', '4', '1', '0'), ++ MKTAG('Y', '4', '1', '6')}; + int i; + for (i = 0; i < FF_ARRAY_ELEMS(formats); i++) { + if (dxva2_validate_output(service, *guid, &formats[i])) +@@ -339,14 +378,28 @@ static int dxva2_get_decoder_configurati + return ret; + } + ++static D3DFORMAT dxva2_map_sw_to_hw_format(enum AVPixelFormat pix_fmt) ++{ ++ switch (pix_fmt) { ++ case AV_PIX_FMT_NV12: return MKTAG('N', 'V', '1', '2'); ++ case AV_PIX_FMT_P010: return MKTAG('P', '0', '1', '0'); ++ case AV_PIX_FMT_P012: return MKTAG('P', '0', '1', '6'); ++ case AV_PIX_FMT_YUYV422: return MKTAG('Y', 'U', 'Y', '2'); ++ case AV_PIX_FMT_Y210: return MKTAG('Y', '2', '1', '0'); ++ case AV_PIX_FMT_Y212: return MKTAG('Y', '2', '1', '6'); ++ case AV_PIX_FMT_VUYX: return MKTAG('A', 'Y', 'U', 'V'); ++ case AV_PIX_FMT_XV30: return MKTAG('Y', '4', '1', '0'); ++ case AV_PIX_FMT_XV36: return MKTAG('Y', '4', '1', '6'); ++ default: return D3DFMT_UNKNOWN; ++ } ++} ++ + static int dxva2_create_decoder(AVCodecContext *avctx) + { + FFDXVASharedContext *sctx = DXVA_SHARED_CONTEXT(avctx); + GUID *guid_list; + unsigned guid_count; + GUID device_guid; +- D3DFORMAT surface_format = avctx->sw_pix_fmt == AV_PIX_FMT_YUV420P10 ? +- MKTAG('P', '0', '1', '0') : MKTAG('N', 'V', '1', '2'); + DXVA2_VideoDesc desc = { 0 }; + DXVA2_ConfigPictureDecode config; + HRESULT hr; +@@ -355,6 +408,7 @@ static int dxva2_create_decoder(AVCodecC + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data; + AVDXVA2FramesContext *frames_hwctx = frames_ctx->hwctx; + AVDXVA2DeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; ++ D3DFORMAT surface_format = dxva2_map_sw_to_hw_format(frames_ctx->sw_format); + + hr = IDirect3DDeviceManager9_OpenDeviceHandle(device_hwctx->devmgr, + &device_handle); +@@ -455,10 +509,17 @@ static int d3d11va_get_decoder_configura + static DXGI_FORMAT d3d11va_map_sw_to_hw_format(enum AVPixelFormat pix_fmt) + { + switch (pix_fmt) { +- case AV_PIX_FMT_NV12: return DXGI_FORMAT_NV12; +- case AV_PIX_FMT_P010: return DXGI_FORMAT_P010; +- case AV_PIX_FMT_YUV420P: return DXGI_FORMAT_420_OPAQUE; +- default: return DXGI_FORMAT_UNKNOWN; ++ case AV_PIX_FMT_NV12: return DXGI_FORMAT_NV12; ++ case AV_PIX_FMT_P010: return DXGI_FORMAT_P010; ++ case AV_PIX_FMT_P012: return DXGI_FORMAT_P016; ++ case AV_PIX_FMT_YUYV422: return DXGI_FORMAT_YUY2; ++ case AV_PIX_FMT_Y210: return DXGI_FORMAT_Y210; ++ case AV_PIX_FMT_Y212: return DXGI_FORMAT_Y216; ++ case AV_PIX_FMT_VUYX: return DXGI_FORMAT_AYUV; ++ case AV_PIX_FMT_XV30: return DXGI_FORMAT_Y410; ++ case AV_PIX_FMT_XV36: return DXGI_FORMAT_Y416; ++ case AV_PIX_FMT_YUV420P: return DXGI_FORMAT_420_OPAQUE; ++ default: return DXGI_FORMAT_UNKNOWN; + } + } + +@@ -636,8 +697,7 @@ int ff_dxva2_common_frame_params(AVCodec + else + num_surfaces += 2; + +- frames_ctx->sw_format = avctx->sw_pix_fmt == AV_PIX_FMT_YUV420P10 ? +- AV_PIX_FMT_P010 : AV_PIX_FMT_NV12; ++ frames_ctx->sw_format = dxva_map_sw_to_sw_format(avctx->sw_pix_fmt); + frames_ctx->width = FFALIGN(avctx->coded_width, surface_alignment); + frames_ctx->height = FFALIGN(avctx->coded_height, surface_alignment); + frames_ctx->initial_pool_size = num_surfaces; +Index: FFmpeg/libavcodec/dxva2_hevc.c +=================================================================== +--- libavcodec/dxva2_hevc.c ++++ libavcodec/dxva2_hevc.c +@@ -32,7 +32,7 @@ + #define MAX_SLICES 256 + + struct hevc_dxva2_picture_context { +- DXVA_PicParams_HEVC pp; ++ ff_DXVA_PicParams_HEVC_Rext pp; + DXVA_Qmatrix_HEVC qm; + unsigned slice_count; + DXVA_Slice_HEVC_Short slice_short[MAX_SLICES]; +@@ -58,19 +58,49 @@ static int get_refpic_index(const DXVA_P + } + + void ff_dxva2_hevc_fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *ctx, +- DXVA_PicParams_HEVC *pp) ++ ff_DXVA_PicParams_HEVC_Rext *ppext) + { + const HEVCContext *h = avctx->priv_data; + const HEVCFrame *current_picture = h->ref; + const HEVCSPS *sps = h->ps.sps; + const HEVCPPS *pps = h->ps.pps; + int i, j; ++ DXVA_PicParams_HEVC *pp = &ppext->main; + +- memset(pp, 0, sizeof(*pp)); ++ memset(ppext, 0, sizeof(*ppext)); + + pp->PicWidthInMinCbsY = sps->min_cb_width; + pp->PicHeightInMinCbsY = sps->min_cb_height; + ++ if (sps->sps_range_extension_flag) { ++ ppext->dwRangeExtensionFlags |= (sps->transform_skip_rotation_enabled_flag << 0) | ++ (sps->transform_skip_context_enabled_flag << 1) | ++ (sps->implicit_rdpcm_enabled_flag << 2) | ++ (sps->explicit_rdpcm_enabled_flag << 3) | ++ (sps->extended_precision_processing_flag << 4) | ++ (sps->intra_smoothing_disabled_flag << 5) | ++ (sps->high_precision_offsets_enabled_flag << 5) | ++ (sps->persistent_rice_adaptation_enabled_flag << 7) | ++ (sps->cabac_bypass_alignment_enabled_flag << 8); ++ } ++ if (pps->pps_range_extensions_flag) { ++ ppext->dwRangeExtensionFlags |= (pps->cross_component_prediction_enabled_flag << 9) | ++ (pps->chroma_qp_offset_list_enabled_flag << 10); ++ if (pps->chroma_qp_offset_list_enabled_flag) { ++ ppext->diff_cu_chroma_qp_offset_depth = pps->diff_cu_chroma_qp_offset_depth; ++ ppext->chroma_qp_offset_list_len_minus1 = pps->chroma_qp_offset_list_len_minus1; ++ for (i = 0; i <= pps->chroma_qp_offset_list_len_minus1; i++) { ++ ppext->cb_qp_offset_list[i] = pps->cb_qp_offset_list[i]; ++ ppext->cr_qp_offset_list[i] = pps->cr_qp_offset_list[i]; ++ } ++ } ++ ppext->log2_sao_offset_scale_luma = pps->log2_sao_offset_scale_luma; ++ ppext->log2_sao_offset_scale_chroma = pps->log2_sao_offset_scale_chroma; ++ if (pps->transform_skip_enabled_flag) { ++ ppext->log2_max_transform_skip_block_size_minus2 = pps->log2_max_transform_skip_block_size - 2; ++ } ++ } ++ + pp->wFormatAndSequenceInfoFlags = (sps->chroma_format_idc << 0) | + (sps->separate_colour_plane_flag << 2) | + ((sps->bit_depth - 8) << 3) | +@@ -409,14 +439,15 @@ static int dxva2_hevc_end_frame(AVCodecC + { + HEVCContext *h = avctx->priv_data; + struct hevc_dxva2_picture_context *ctx_pic = h->ref->hwaccel_picture_private; +- int scale = ctx_pic->pp.dwCodingParamToolFlags & 1; ++ int scale = ctx_pic->pp.main.dwCodingParamToolFlags & 1; ++ int rext = avctx->profile == AV_PROFILE_HEVC_REXT; + int ret; + + if (ctx_pic->slice_count <= 0 || ctx_pic->bitstream_size <= 0) + return -1; + + ret = ff_dxva2_common_end_frame(avctx, h->ref->frame, +- &ctx_pic->pp, sizeof(ctx_pic->pp), ++ &ctx_pic->pp, rext ? sizeof(ctx_pic->pp) : sizeof(ctx_pic->pp.main), + scale ? &ctx_pic->qm : NULL, scale ? sizeof(ctx_pic->qm) : 0, + commit_bitstream_and_slice_buffer); + return ret; +Index: FFmpeg/libavcodec/dxva2_internal.h +=================================================================== +--- libavcodec/dxva2_internal.h ++++ libavcodec/dxva2_internal.h +@@ -134,6 +134,58 @@ typedef struct FFDXVASharedContext { + DXVA_CONTEXT_CFG(avctx, ctx) && \ + (ff_dxva2_is_d3d11(avctx) || DXVA2_VAR(ctx, surface_count))) + ++#if CONFIG_HEVC_D3D12VA_HWACCEL || CONFIG_HEVC_D3D11VA_HWACCEL || CONFIG_HEVC_D3D11VA2_HWACCEL || CONFIG_HEVC_DXVA2_HWACCEL ++/** +++ * Picture Parameters DXVA buffer struct for Rext is not specified in DXVA +++ * spec. The below structures come from Intel platform DDI definition, so they +++ * are currently Intel specific. +++ * +++ * For Nvidia and AMD platforms supporting HEVC Rext, it is expected +++ * the picture param information included in below structures is sufficient +++ * for underlying drivers supporting range extension. +++ */ ++#pragma pack(push, 1) ++typedef struct ++{ ++ DXVA_PicParams_HEVC main; ++ ++ // HEVC Range Extension. Fields are named the same as in HEVC spec. ++ __C89_NAMELESS union { ++ __C89_NAMELESS struct { ++ UINT32 transform_skip_rotation_enabled_flag : 1; ++ UINT32 transform_skip_context_enabled_flag : 1; ++ UINT32 implicit_rdpcm_enabled_flag : 1; ++ UINT32 explicit_rdpcm_enabled_flag : 1; ++ UINT32 extended_precision_processing_flag : 1; ++ UINT32 intra_smoothing_disabled_flag : 1; ++ UINT32 high_precision_offsets_enabled_flag : 1; ++ UINT32 persistent_rice_adaptation_enabled_flag : 1; ++ UINT32 cabac_bypass_alignment_enabled_flag : 1; ++ UINT32 cross_component_prediction_enabled_flag : 1; ++ UINT32 chroma_qp_offset_list_enabled_flag : 1; ++ // Indicates if luma bit depth equals to 16. If its value is 1, the ++ // corresponding bit_depth_luma_minus8 must be set to 0. ++ UINT32 BitDepthLuma16 : 1; ++ // Indicates if chroma bit depth equals to 16. If its value is 1, the ++ // corresponding bit_depth_chroma_minus8 must be set to 0. ++ UINT32 BitDepthChroma16 : 1; ++ UINT32 ReservedBits8 : 19; ++ }; ++ UINT32 dwRangeExtensionFlags; ++ }; ++ ++ UCHAR diff_cu_chroma_qp_offset_depth; // [0..3] ++ UCHAR chroma_qp_offset_list_len_minus1; // [0..5] ++ UCHAR log2_sao_offset_scale_luma; // [0..6] ++ UCHAR log2_sao_offset_scale_chroma; // [0..6] ++ UCHAR log2_max_transform_skip_block_size_minus2; ++ CHAR cb_qp_offset_list[6]; // [-12..12] ++ CHAR cr_qp_offset_list[6]; // [-12..12] ++ ++} ff_DXVA_PicParams_HEVC_Rext; ++#pragma pack(pop) ++#endif ++ + #if CONFIG_D3D12VA + unsigned ff_d3d12va_get_surface_index(const AVCodecContext *avctx, + D3D12VADecodeContext *ctx, const AVFrame *frame, +@@ -171,7 +223,9 @@ void ff_dxva2_h264_fill_picture_paramete + + void ff_dxva2_h264_fill_scaling_lists(const AVCodecContext *avctx, AVDXVAContext *ctx, DXVA_Qmatrix_H264 *qm); + +-void ff_dxva2_hevc_fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *ctx, DXVA_PicParams_HEVC *pp); ++#if CONFIG_HEVC_D3D12VA_HWACCEL || CONFIG_HEVC_D3D11VA_HWACCEL || CONFIG_HEVC_D3D11VA2_HWACCEL || CONFIG_HEVC_DXVA2_HWACCEL ++void ff_dxva2_hevc_fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *ctx, ff_DXVA_PicParams_HEVC_Rext *ppext); ++#endif + + void ff_dxva2_hevc_fill_scaling_lists(const AVCodecContext *avctx, AVDXVAContext *ctx, DXVA_Qmatrix_HEVC *qm); + +Index: FFmpeg/libavcodec/hevcdec.c +=================================================================== +--- libavcodec/hevcdec.c ++++ libavcodec/hevcdec.c +@@ -465,6 +465,13 @@ static enum AVPixelFormat get_format(HEV + #endif + break; + case AV_PIX_FMT_YUV444P: ++#if CONFIG_HEVC_DXVA2_HWACCEL ++ *fmt++ = AV_PIX_FMT_DXVA2_VLD; ++#endif ++#if CONFIG_HEVC_D3D11VA_HWACCEL ++ *fmt++ = AV_PIX_FMT_D3D11VA_VLD; ++ *fmt++ = AV_PIX_FMT_D3D11; ++#endif + #if CONFIG_HEVC_VAAPI_HWACCEL + *fmt++ = AV_PIX_FMT_VAAPI; + #endif +@@ -483,6 +490,13 @@ static enum AVPixelFormat get_format(HEV + break; + case AV_PIX_FMT_YUV422P: + case AV_PIX_FMT_YUV422P10LE: ++#if CONFIG_HEVC_DXVA2_HWACCEL ++ *fmt++ = AV_PIX_FMT_DXVA2_VLD; ++#endif ++#if CONFIG_HEVC_D3D11VA_HWACCEL ++ *fmt++ = AV_PIX_FMT_D3D11VA_VLD; ++ *fmt++ = AV_PIX_FMT_D3D11; ++#endif + #if CONFIG_HEVC_VAAPI_HWACCEL + *fmt++ = AV_PIX_FMT_VAAPI; + #endif +@@ -500,6 +514,13 @@ static enum AVPixelFormat get_format(HEV + /* NOTE: fallthrough */ + case AV_PIX_FMT_YUV420P12: + case AV_PIX_FMT_YUV444P12: ++#if CONFIG_HEVC_DXVA2_HWACCEL ++ *fmt++ = AV_PIX_FMT_DXVA2_VLD; ++#endif ++#if CONFIG_HEVC_D3D11VA_HWACCEL ++ *fmt++ = AV_PIX_FMT_D3D11VA_VLD; ++ *fmt++ = AV_PIX_FMT_D3D11; ++#endif + #if CONFIG_HEVC_VAAPI_HWACCEL + *fmt++ = AV_PIX_FMT_VAAPI; + #endif +@@ -514,6 +535,13 @@ static enum AVPixelFormat get_format(HEV + #endif + break; + case AV_PIX_FMT_YUV422P12: ++#if CONFIG_HEVC_DXVA2_HWACCEL ++ *fmt++ = AV_PIX_FMT_DXVA2_VLD; ++#endif ++#if CONFIG_HEVC_D3D11VA_HWACCEL ++ *fmt++ = AV_PIX_FMT_D3D11VA_VLD; ++ *fmt++ = AV_PIX_FMT_D3D11; ++#endif + #if CONFIG_HEVC_VAAPI_HWACCEL + *fmt++ = AV_PIX_FMT_VAAPI; + #endif diff --git a/cross/ffmpeg7/patches/1038-jellyfin-0038-default-vaapi-scaler-algorithm-to-fast.patch b/cross/ffmpeg7/patches/1038-jellyfin-0038-default-vaapi-scaler-algorithm-to-fast.patch new file mode 100644 index 00000000000..f8b060cecae --- /dev/null +++ b/cross/ffmpeg7/patches/1038-jellyfin-0038-default-vaapi-scaler-algorithm-to-fast.patch @@ -0,0 +1,13 @@ +Index: FFmpeg/libavfilter/vf_scale_vaapi.c +=================================================================== +--- libavfilter/vf_scale_vaapi.c ++++ libavfilter/vf_scale_vaapi.c +@@ -224,7 +224,7 @@ static const AVOption scale_vaapi_option + { "format", "Output video format (software format of hardware frames)", + OFFSET(output_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS }, + { "mode", "Scaling mode", +- OFFSET(mode), AV_OPT_TYPE_INT, { .i64 = VA_FILTER_SCALING_HQ }, ++ OFFSET(mode), AV_OPT_TYPE_INT, { .i64 = VA_FILTER_SCALING_FAST }, + 0, VA_FILTER_SCALING_NL_ANAMORPHIC, FLAGS, .unit = "mode" }, + { "default", "Use the default (depend on the driver) scaling algorithm", + 0, AV_OPT_TYPE_CONST, { .i64 = VA_FILTER_SCALING_DEFAULT }, 0, 0, FLAGS, .unit = "mode" }, diff --git a/cross/ffmpeg7/patches/1039-jellyfin-0039-dont-fail-on-vaapi-to-drm-export-vasync.patch b/cross/ffmpeg7/patches/1039-jellyfin-0039-dont-fail-on-vaapi-to-drm-export-vasync.patch new file mode 100644 index 00000000000..3cb4416b3b3 --- /dev/null +++ b/cross/ffmpeg7/patches/1039-jellyfin-0039-dont-fail-on-vaapi-to-drm-export-vasync.patch @@ -0,0 +1,15 @@ +Index: FFmpeg/libavutil/hwcontext_vaapi.c +=================================================================== +--- libavutil/hwcontext_vaapi.c ++++ libavutil/hwcontext_vaapi.c +@@ -1358,9 +1358,8 @@ static int vaapi_map_to_drm_esh(AVHWFram + + vas = vaSyncSurface(hwctx->display, surface_id); + if (vas != VA_STATUS_SUCCESS) { +- av_log(hwfc, AV_LOG_ERROR, "Failed to sync surface " ++ av_log(hwfc, AV_LOG_WARNING, "Failed to sync surface " + "%#x: %d (%s).\n", surface_id, vas, vaErrorStr(vas)); +- return AVERROR(EIO); + } + } + diff --git a/cross/ffmpeg7/patches/1040-jellyfin-0040-skip-loading-plugins-on-vpl-runtime.patch b/cross/ffmpeg7/patches/1040-jellyfin-0040-skip-loading-plugins-on-vpl-runtime.patch new file mode 100644 index 00000000000..ba4b101f75a --- /dev/null +++ b/cross/ffmpeg7/patches/1040-jellyfin-0040-skip-loading-plugins-on-vpl-runtime.patch @@ -0,0 +1,22 @@ +Index: FFmpeg/libavcodec/qsv.c +=================================================================== +--- libavcodec/qsv.c ++++ libavcodec/qsv.c +@@ -406,9 +406,17 @@ static int qsv_load_plugins(mfxSession s + void *logctx) + { + #if QSV_HAVE_USER_PLUGIN ++ mfxVersion ver; + if (!load_plugins || !*load_plugins) + return 0; + ++ // Plugins have been removed in VPL runtime, don't load them ++ // if using the VPL runtime with MSDK loader to avoid failure. ++ if (MFXQueryVersion(session, &ver) == MFX_ERR_NONE && ++ QSV_RUNTIME_VERSION_ATLEAST(ver, 1, 255)) { ++ return 0; ++ } ++ + while (*load_plugins) { + mfxPluginUID uid; + mfxStatus ret; diff --git a/cross/ffmpeg7/patches/1041-jellyfin-0041-tune-the-default-bf-for-qsv-enc.patch b/cross/ffmpeg7/patches/1041-jellyfin-0041-tune-the-default-bf-for-qsv-enc.patch new file mode 100644 index 00000000000..baa4f5fd0e3 --- /dev/null +++ b/cross/ffmpeg7/patches/1041-jellyfin-0041-tune-the-default-bf-for-qsv-enc.patch @@ -0,0 +1,45 @@ +Index: FFmpeg/libavcodec/qsvenc_av1.c +=================================================================== +--- libavcodec/qsvenc_av1.c ++++ libavcodec/qsvenc_av1.c +@@ -203,8 +203,8 @@ static const AVClass class = { + + static const FFCodecDefault qsv_enc_defaults[] = { + { "b", "0" }, +- { "g", "-1" }, +- { "bf", "-1" }, ++ { "g", "250" }, ++ { "bf", "3" }, + { "refs", "0" }, + { NULL }, + }; +Index: FFmpeg/libavcodec/qsvenc_h264.c +=================================================================== +--- libavcodec/qsvenc_h264.c ++++ libavcodec/qsvenc_h264.c +@@ -180,8 +180,8 @@ static const AVClass class = { + static const FFCodecDefault qsv_enc_defaults[] = { + { "b", "0" }, + { "refs", "0" }, +- { "g", "-1" }, +- { "bf", "-1" }, ++ { "g", "250" }, ++ { "bf", "3" }, + { "qmin", "-1" }, + { "qmax", "-1" }, + { "trellis", "-1" }, +Index: FFmpeg/libavcodec/qsvenc_hevc.c +=================================================================== +--- libavcodec/qsvenc_hevc.c ++++ libavcodec/qsvenc_hevc.c +@@ -379,8 +379,8 @@ static const AVClass class = { + static const FFCodecDefault qsv_enc_defaults[] = { + { "b", "0" }, + { "refs", "0" }, +- { "g", "248" }, +- { "bf", "-1" }, ++ { "g", "250" }, ++ { "bf", "4" }, + { "qmin", "-1" }, + { "qmax", "-1" }, + { "trellis", "-1" }, diff --git a/cross/ffmpeg7/patches/1042-jellyfin-0042-allow-vaapi-import-drm-prime2-planar-formats.patch b/cross/ffmpeg7/patches/1042-jellyfin-0042-allow-vaapi-import-drm-prime2-planar-formats.patch new file mode 100644 index 00000000000..1c192a01020 --- /dev/null +++ b/cross/ffmpeg7/patches/1042-jellyfin-0042-allow-vaapi-import-drm-prime2-planar-formats.patch @@ -0,0 +1,62 @@ +Index: FFmpeg/libavutil/hwcontext_vaapi.c +=================================================================== +--- libavutil/hwcontext_vaapi.c ++++ libavutil/hwcontext_vaapi.c +@@ -170,6 +170,9 @@ static const VAAPIFormatDescriptor vaapi + #ifdef VA_FOURCC_X2R10G10B10 + MAP(X2R10G10B10, RGB32_10, X2RGB10, 0), + #endif ++#ifdef VA_FOURCC_X2B10G10R10 ++ MAP(X2B10G10R10, RGB32_10, X2BGR10, 0), ++#endif + #ifdef VA_FOURCC_Y410 + // libva doesn't include a fourcc for XV30 and the driver only declares + // support for Y410, so we must fudge the mapping here. +@@ -1047,9 +1050,11 @@ static const struct { + DRM_MAP(NV12, 1, DRM_FORMAT_NV12), + #if defined(VA_FOURCC_P010) && defined(DRM_FORMAT_R16) + DRM_MAP(P010, 2, DRM_FORMAT_R16, DRM_FORMAT_RG1616), ++ DRM_MAP(P010, 2, DRM_FORMAT_R16, DRM_FORMAT_GR1616), + #endif + #if defined(VA_FOURCC_P012) && defined(DRM_FORMAT_R16) + DRM_MAP(P012, 2, DRM_FORMAT_R16, DRM_FORMAT_RG1616), ++ DRM_MAP(P012, 2, DRM_FORMAT_R16, DRM_FORMAT_GR1616), + #endif + DRM_MAP(BGRA, 1, DRM_FORMAT_ARGB8888), + DRM_MAP(BGRX, 1, DRM_FORMAT_XRGB8888), +@@ -1073,6 +1078,9 @@ static const struct { + #if defined(VA_FOURCC_X2R10G10B10) && defined(DRM_FORMAT_XRGB2101010) + DRM_MAP(X2R10G10B10, 1, DRM_FORMAT_XRGB2101010), + #endif ++#if defined(VA_FOURCC_X2B10G10R10) && defined(DRM_FORMAT_XBGR2101010) ++ DRM_MAP(X2B10G10R10, 1, DRM_FORMAT_XBGR2101010), ++#endif + }; + #undef DRM_MAP + +@@ -1128,12 +1136,6 @@ static int vaapi_map_from_drm(AVHWFrames + + desc = (AVDRMFrameDescriptor*)src->data[0]; + +- if (desc->nb_objects != 1) { +- av_log(dst_fc, AV_LOG_ERROR, "VAAPI can only map frames " +- "made from a single DRM object.\n"); +- return AVERROR(EINVAL); +- } +- + va_fourcc = 0; + for (i = 0; i < FF_ARRAY_ELEMS(vaapi_drm_format_map); i++) { + if (desc->nb_layers != vaapi_drm_format_map[i].nb_layer_formats) +@@ -1273,6 +1275,12 @@ static int vaapi_map_from_drm(AVHWFrames + buffer_attrs, FF_ARRAY_ELEMS(buffer_attrs)); + } + #else ++ if (desc->nb_objects != 1) { ++ av_log(dst_fc, AV_LOG_ERROR, "VAAPI can only map frames " ++ "made from a single DRM object.\n"); ++ return AVERROR(EINVAL); ++ } ++ + buffer_handle = desc->objects[0].fd; + buffer_desc.pixel_format = va_fourcc; + buffer_desc.width = src_fc->width; diff --git a/cross/ffmpeg7/patches/1043-jellyfin-0043-add-vui-info-to-the-seq-header-of-hevc-vaapi-encoder.patch b/cross/ffmpeg7/patches/1043-jellyfin-0043-add-vui-info-to-the-seq-header-of-hevc-vaapi-encoder.patch new file mode 100644 index 00000000000..7599e7383b8 --- /dev/null +++ b/cross/ffmpeg7/patches/1043-jellyfin-0043-add-vui-info-to-the-seq-header-of-hevc-vaapi-encoder.patch @@ -0,0 +1,31 @@ +Index: FFmpeg/libavcodec/vaapi_encode_h265.c +=================================================================== +--- libavcodec/vaapi_encode_h265.c ++++ libavcodec/vaapi_encode_h265.c +@@ -691,7 +691,25 @@ static int vaapi_encode_h265_init_sequen + sps->log2_min_pcm_luma_coding_block_size_minus3 + + sps->log2_diff_max_min_pcm_luma_coding_block_size, + +- .vui_parameters_present_flag = 0, ++ .vui_parameters_present_flag = sps->vui_parameters_present_flag, ++ .vui_fields.bits = { ++ .aspect_ratio_info_present_flag = vui->aspect_ratio_info_present_flag, ++ .vui_timing_info_present_flag = vui->vui_timing_info_present_flag, ++ .bitstream_restriction_flag = vui->bitstream_restriction_flag, ++ .motion_vectors_over_pic_boundaries_flag = ++ vui->motion_vectors_over_pic_boundaries_flag, ++ .restricted_ref_pic_lists_flag = vui->restricted_ref_pic_lists_flag, ++ .log2_max_mv_length_horizontal = vui->log2_max_mv_length_horizontal, ++ .log2_max_mv_length_vertical = vui->log2_max_mv_length_vertical, ++ }, ++ ++ .aspect_ratio_idc = vui->aspect_ratio_idc, ++ .sar_width = vui->sar_width, ++ .sar_height = vui->sar_height, ++ .vui_num_units_in_tick = vui->vui_num_units_in_tick, ++ .vui_time_scale = vui->vui_time_scale, ++ .max_bytes_per_pic_denom = vui->max_bytes_per_pic_denom, ++ .max_bits_per_min_cu_denom = vui->max_bits_per_min_cu_denom, + }; + + *vpic = (VAEncPictureParameterBufferHEVC) { diff --git a/cross/ffmpeg7/patches/1044-jellyfin-0044-sync-intel-d3d11va-textures-before-mapping-to-opencl.patch b/cross/ffmpeg7/patches/1044-jellyfin-0044-sync-intel-d3d11va-textures-before-mapping-to-opencl.patch new file mode 100644 index 00000000000..27c33776211 --- /dev/null +++ b/cross/ffmpeg7/patches/1044-jellyfin-0044-sync-intel-d3d11va-textures-before-mapping-to-opencl.patch @@ -0,0 +1,299 @@ +Index: FFmpeg/libavcodec/dxva2.c +=================================================================== +--- libavcodec/dxva2.c ++++ libavcodec/dxva2.c +@@ -714,8 +714,10 @@ int ff_dxva2_common_frame_params(AVCodec + #if CONFIG_D3D11VA + if (frames_ctx->format == AV_PIX_FMT_D3D11) { + AVD3D11VAFramesContext *frames_hwctx = frames_ctx->hwctx; ++ AVD3D11VADeviceContext *device_hwctx = device_ctx->hwctx; + + frames_hwctx->BindFlags |= D3D11_BIND_DECODER; ++ frames_hwctx->require_sync = device_hwctx->device_desc.VendorId == 0x8086; + } + #endif + +Index: FFmpeg/libavfilter/qsvvpp.c +=================================================================== +--- libavfilter/qsvvpp.c ++++ libavfilter/qsvvpp.c +@@ -641,6 +641,9 @@ static int init_vpp_session(AVFilterCont + + out_frames_hwctx->frame_type = s->out_mem_mode; + ++ if (in_frames_hwctx) ++ out_frames_hwctx->require_sync = in_frames_hwctx->require_sync; ++ + ret = av_hwframe_ctx_init(out_frames_ref); + if (ret < 0) { + av_buffer_unref(&out_frames_ref); +Index: FFmpeg/libavutil/hwcontext_d3d11va.h +=================================================================== +--- libavutil/hwcontext_d3d11va.h ++++ libavutil/hwcontext_d3d11va.h +@@ -183,6 +183,11 @@ typedef struct AVD3D11VAFramesContext { + * This field is ignored/invalid if a user-allocated texture is provided. + */ + AVD3D11FrameDescriptor *texture_infos; ++ ++ /** ++ * Whether the frames require extra sync when exporting as external memory. ++ */ ++ int require_sync; + } AVD3D11VAFramesContext; + + #endif /* AVUTIL_HWCONTEXT_D3D11VA_H */ +Index: FFmpeg/libavutil/hwcontext_opencl.c +=================================================================== +--- libavutil/hwcontext_opencl.c ++++ libavutil/hwcontext_opencl.c +@@ -181,6 +181,10 @@ typedef struct OpenCLFramesContext { + int nb_mapped_frames; + AVOpenCLFrameDescriptor *mapped_frames; + #endif ++#if HAVE_OPENCL_D3D11 ++ ID3D11Texture2D *sync_tex_2x2; ++ ID3D11Asynchronous *sync_point; ++#endif + } OpenCLFramesContext; + + +@@ -1809,7 +1813,16 @@ static void opencl_frames_uninit(AVHWFra + av_freep(&priv->mapped_frames); + } + #endif +- ++#if HAVE_OPENCL_D3D11 ++ if (priv->sync_tex_2x2) { ++ ID3D11Texture2D_Release(priv->sync_tex_2x2); ++ priv->sync_tex_2x2 = NULL; ++ } ++ if (priv->sync_point) { ++ ID3D11Asynchronous_Release(priv->sync_point); ++ priv->sync_point = NULL; ++ } ++#endif + if (priv->command_queue) { + cle = clReleaseCommandQueue(priv->command_queue); + if (cle != CL_SUCCESS) { +@@ -2583,6 +2596,98 @@ fail: + + #if HAVE_OPENCL_D3D11 + ++static int opencl_init_d3d11_sync_point(OpenCLFramesContext *priv, ++ AVD3D11VADeviceContext *device_hwctx, ++ ID3D11Texture2D *src_texture, ++ void *logctx) ++{ ++ HRESULT hr; ++ D3D11_QUERY_DESC query = { D3D11_QUERY_EVENT, 0 }; ++ D3D11_TEXTURE2D_DESC cur_desc = { 0 }; ++ D3D11_TEXTURE2D_DESC src_desc = { 0 }; ++ D3D11_TEXTURE2D_DESC dst_desc = { ++ .Width = 2, ++ .Height = 2, ++ .MipLevels = 1, ++ .SampleDesc = { .Count = 1 }, ++ .ArraySize = 1, ++ .Usage = D3D11_USAGE_DEFAULT, ++ }; ++ ++ if (!priv || !device_hwctx || !src_texture) ++ return AVERROR(EINVAL); ++ ++ ID3D11Texture2D_GetDesc(src_texture, &src_desc); ++ if (priv->sync_tex_2x2) { ++ ID3D11Texture2D_GetDesc(priv->sync_tex_2x2, &cur_desc); ++ if (src_desc.Format != cur_desc.Format) { ++ ID3D11Texture2D_Release(priv->sync_tex_2x2); ++ priv->sync_tex_2x2 = NULL; ++ } ++ } ++ if (!priv->sync_tex_2x2) { ++ dst_desc.Format = src_desc.Format; ++ hr = ID3D11Device_CreateTexture2D(device_hwctx->device, ++ &dst_desc, NULL, &priv->sync_tex_2x2); ++ if (FAILED(hr)) { ++ av_log(logctx, AV_LOG_ERROR, "Could not create the sync texture (%lx)\n", (long)hr); ++ goto fail; ++ } ++ } ++ ++ if (!priv->sync_point) { ++ hr = ID3D11Device_CreateQuery(device_hwctx->device, &query, ++ (ID3D11Query **)&priv->sync_point); ++ if (FAILED(hr)) { ++ av_log(logctx, AV_LOG_ERROR, "Could not create the sync point (%lx)\n", (long)hr); ++ goto fail; ++ } ++ } ++ ++ return 0; ++fail: ++ if (priv->sync_tex_2x2) { ++ ID3D11Texture2D_Release(priv->sync_tex_2x2); ++ priv->sync_tex_2x2 = NULL; ++ } ++ if (priv->sync_point) { ++ ID3D11Asynchronous_Release(priv->sync_point); ++ priv->sync_point = NULL; ++ } ++ return AVERROR_UNKNOWN; ++} ++ ++static void opencl_sync_d3d11_texture(OpenCLFramesContext *priv, ++ AVD3D11VADeviceContext *device_hwctx, ++ ID3D11Texture2D *texture, ++ unsigned subresource, ++ void *logctx) ++{ ++ const D3D11_BOX box_2x2 = { 0, 0, 0, 2, 2, 1 }; ++ BOOL data = FALSE; ++ ++ if (!priv || !device_hwctx || !texture) ++ return; ++ ++ av_log(logctx, AV_LOG_DEBUG, "Sync D3D11 texture %d\n", subresource); ++ ++ device_hwctx->lock(device_hwctx->lock_ctx); ++ ID3D11DeviceContext_Begin(device_hwctx->device_context, priv->sync_point); ++ ++ /* Force DX to wait for DXVA DEC/VP by copying 2x2 pixels, which can act as a sync point */ ++ ID3D11DeviceContext_CopySubresourceRegion(device_hwctx->device_context, ++ (ID3D11Resource *)priv->sync_tex_2x2, 0, 0, 0, 0, ++ (ID3D11Resource *)texture, subresource, &box_2x2); ++ ID3D11DeviceContext_Flush(device_hwctx->device_context); ++ ID3D11DeviceContext_End(device_hwctx->device_context, priv->sync_point); ++ ++ while ((S_OK != ID3D11DeviceContext_GetData(device_hwctx->device_context, ++ priv->sync_point, ++ &data, ++ sizeof(data), 0)) || (data != TRUE)) { /* do nothing */ } ++ device_hwctx->unlock(device_hwctx->lock_ctx); ++} ++ + #if CONFIG_LIBMFX + + static void opencl_unmap_from_d3d11_qsv(AVHWFramesContext *dst_fc, +@@ -2623,6 +2728,14 @@ static void opencl_unmap_from_d3d11_qsv( + static int opencl_map_from_d3d11_qsv(AVHWFramesContext *dst_fc, AVFrame *dst, + const AVFrame *src, int flags) + { ++ AVHWFramesContext *src_fc = ++ (AVHWFramesContext*)src->hw_frames_ctx->data; ++ AVHWDeviceContext *src_dev = src_fc->device_ctx; ++ FFHWDeviceContext *fsrc_dev = (FFHWDeviceContext*)src_dev; ++ AVHWDeviceContext *src_subdev = ++ (AVHWDeviceContext*)fsrc_dev->source_device->data; ++ AVD3D11VADeviceContext *device_hwctx = src_subdev->hwctx; ++ AVQSVFramesContext *src_hwctx = src_fc->hwctx; + OpenCLDeviceContext *device_priv = dst_fc->device_ctx->hwctx; + OpenCLFramesContext *frames_priv = dst_fc->hwctx; + AVOpenCLDeviceContext *dst_dev = &device_priv->p; +@@ -2652,6 +2765,21 @@ static int opencl_map_from_d3d11_qsv(AVH + } + } + ++ if (src_hwctx->require_sync) { ++ err = opencl_init_d3d11_sync_point(frames_priv, ++ device_hwctx, ++ tex, dst_fc); ++ if (err < 0) ++ return err; ++ ++ if (frames_priv->sync_point || frames_priv->sync_tex_2x2) { ++ opencl_sync_d3d11_texture(frames_priv, ++ device_hwctx, ++ tex, (derived_frames ? index : 0), ++ dst_fc); ++ } ++ } ++ + if (derived_frames) { + desc = &frames_priv->mapped_frames[index]; + } else { +@@ -2843,6 +2971,10 @@ static void opencl_unmap_from_d3d11(AVHW + static int opencl_map_from_d3d11(AVHWFramesContext *dst_fc, AVFrame *dst, + const AVFrame *src, int flags) + { ++ AVHWFramesContext *src_fc = ++ (AVHWFramesContext*)src->hw_frames_ctx->data; ++ AVD3D11VAFramesContext *src_hwctx = src_fc->hwctx; ++ AVD3D11VADeviceContext *device_hwctx = src_fc->device_ctx->hwctx; + OpenCLDeviceContext *device_priv = dst_fc->device_ctx->hwctx; + OpenCLFramesContext *frames_priv = dst_fc->hwctx; + AVOpenCLFrameDescriptor *desc; +@@ -2873,6 +3005,14 @@ static int opencl_map_from_d3d11(AVHWFra + mem_objs = device_priv->d3d11_map_amd ? &desc->planes[nb_planes] + : desc->planes; + ++ if (src_hwctx->require_sync && ++ frames_priv->sync_point && frames_priv->sync_tex_2x2) { ++ opencl_sync_d3d11_texture(frames_priv, ++ device_hwctx, ++ (ID3D11Texture2D*)src->data[0], index, ++ dst_fc); ++ } ++ + cle = device_priv->clEnqueueAcquireD3D11ObjectsKHR( + frames_priv->command_queue, num_objs, mem_objs, + 0, NULL, &event); +@@ -2912,6 +3052,7 @@ fail: + static int opencl_frames_derive_from_d3d11(AVHWFramesContext *dst_fc, + AVHWFramesContext *src_fc, int flags) + { ++ AVD3D11VADeviceContext *device_hwctx = src_fc->device_ctx->hwctx; + AVD3D11VAFramesContext *src_hwctx = src_fc->hwctx; + OpenCLDeviceContext *device_priv = dst_fc->device_ctx->hwctx; + AVOpenCLDeviceContext *dst_dev = &device_priv->p; +@@ -2954,6 +3095,14 @@ static int opencl_frames_derive_from_d3d + if (!frames_priv->mapped_frames) + return AVERROR(ENOMEM); + ++ if (src_hwctx->require_sync) { ++ err = opencl_init_d3d11_sync_point(frames_priv, ++ device_hwctx, ++ src_hwctx->texture, dst_fc); ++ if (err < 0) ++ return err; ++ } ++ + for (i = 0; i < frames_priv->nb_mapped_frames; i++) { + AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i]; + desc->nb_planes = nb_planes; +Index: FFmpeg/libavutil/hwcontext_qsv.c +=================================================================== +--- libavutil/hwcontext_qsv.c ++++ libavutil/hwcontext_qsv.c +@@ -2016,6 +2016,7 @@ static int qsv_dynamic_frames_derive_to( + } else { + dst_hwctx->frame_type |= MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET; + } ++ dst_hwctx->require_sync = src_hwctx->require_sync; + } + break; + #endif +@@ -2091,6 +2092,7 @@ static int qsv_fixed_frames_derive_to(AV + } else { + dst_hwctx->frame_type |= MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET; + } ++ dst_hwctx->require_sync = src_hwctx->require_sync; + } + break; + #endif +Index: FFmpeg/libavutil/hwcontext_qsv.h +=================================================================== +--- libavutil/hwcontext_qsv.h ++++ libavutil/hwcontext_qsv.h +@@ -81,6 +81,11 @@ typedef struct AVQSVFramesContext { + * pool have the same mfxFrameInfo. + */ + mfxFrameInfo *info; ++ ++ /** ++ * Whether the frames require extra sync when exporting as external memory. ++ */ ++ int require_sync; + } AVQSVFramesContext; + + #endif /* AVUTIL_HWCONTEXT_QSV_H */ diff --git a/cross/ffmpeg7/patches/1045-jellyfin-0045-add-icon-for-windows-version-ffmpeg.patch b/cross/ffmpeg7/patches/1045-jellyfin-0045-add-icon-for-windows-version-ffmpeg.patch new file mode 100644 index 00000000000..da4e97ec520 --- /dev/null +++ b/cross/ffmpeg7/patches/1045-jellyfin-0045-add-icon-for-windows-version-ffmpeg.patch @@ -0,0 +1,56 @@ +Index: FFmpeg/fftools/Makefile +=================================================================== +--- fftools/Makefile ++++ fftools/Makefile +@@ -40,6 +40,12 @@ endef + + $(foreach P,$(AVPROGS-yes),$(eval $(call DOFFTOOL,$(P)))) + ++ifdef HAVE_GNU_WINDRES ++ifneq (,$(wildcard fftools/fftools.ico)) ++CC_DEPFLAGS += -DHAVE_FFTOOLS_ICO ++endif ++endif ++ + all: $(AVPROGS) + + fftools/ffprobe.o fftools/cmdutils.o: libavutil/ffversion.h | fftools +Index: FFmpeg/fftools/fftoolsres.rc +=================================================================== +--- fftools/fftoolsres.rc ++++ fftools/fftoolsres.rc +@@ -1,2 +1,34 @@ + #include ++#include "libavutil/version.h" ++#include "libavutil/ffversion.h" ++#include "config.h" ++ + 1 RT_MANIFEST fftools.manifest ++ ++#ifdef HAVE_FFTOOLS_ICO ++1 ICON fftools.ico ++#endif ++ ++1 VERSIONINFO ++FILEFLAGSMASK VS_FFI_FILEFLAGSMASK ++FILEOS VOS_NT_WINDOWS32 ++FILETYPE VFT_APP ++{ ++ BLOCK "StringFileInfo" ++ { ++ BLOCK "040904B0" ++ { ++ VALUE "CompanyName", "FFmpeg Project" ++ VALUE "FileDescription", "FFmpeg command-line tools" ++ VALUE "FileVersion", FFMPEG_VERSION ++ VALUE "LegalCopyright", "Copyright (C) " AV_STRINGIFY(CONFIG_THIS_YEAR) " FFmpeg Project" ++ VALUE "ProductName", "FFmpeg" ++ VALUE "ProductVersion", FFMPEG_VERSION ++ } ++ } ++ ++ BLOCK "VarFileInfo" ++ { ++ VALUE "Translation", 0x0409, 0x04B0 ++ } ++} diff --git a/cross/ffmpeg7/patches/1046-jellyfin-0046-fix-libx265-encoded-fmp4-hls-playback-on-safari.patch b/cross/ffmpeg7/patches/1046-jellyfin-0046-fix-libx265-encoded-fmp4-hls-playback-on-safari.patch new file mode 100644 index 00000000000..45549133603 --- /dev/null +++ b/cross/ffmpeg7/patches/1046-jellyfin-0046-fix-libx265-encoded-fmp4-hls-playback-on-safari.patch @@ -0,0 +1,13 @@ +Index: FFmpeg/libavformat/movenc.c +=================================================================== +--- libavformat/movenc.c ++++ libavformat/movenc.c +@@ -2908,7 +2908,7 @@ static int mov_write_stbl_tag(AVFormatCo + track->par->codec_tag == MKTAG('r','t','p',' ')) && + track->has_keyframes && track->has_keyframes < track->entry) + mov_write_stss_tag(pb, track, MOV_SYNC_SAMPLE); +- if (track->par->codec_type == AVMEDIA_TYPE_VIDEO && track->has_disposable) ++ if (track->par->codec_type == AVMEDIA_TYPE_VIDEO && track->has_disposable && track->entry) + mov_write_sdtp_tag(pb, track); + if (track->mode == MODE_MOV && track->flags & MOV_TRACK_STPS) + mov_write_stss_tag(pb, track, MOV_PARTIAL_SYNC_SAMPLE); diff --git a/cross/ffmpeg7/patches/1047-jellyfin-0047-add-full-hwa-pipeline-for-rockchip-rk3588-platform.patch b/cross/ffmpeg7/patches/1047-jellyfin-0047-add-full-hwa-pipeline-for-rockchip-rk3588-platform.patch new file mode 100644 index 00000000000..1c4ef73d78e --- /dev/null +++ b/cross/ffmpeg7/patches/1047-jellyfin-0047-add-full-hwa-pipeline-for-rockchip-rk3588-platform.patch @@ -0,0 +1,6810 @@ +Index: FFmpeg/configure +=================================================================== +--- configure ++++ configure +@@ -354,6 +354,7 @@ External library support: + --enable-omx enable OpenMAX IL code [no] + --enable-omx-rpi enable OpenMAX IL code for Raspberry Pi [no] + --enable-rkmpp enable Rockchip Media Process Platform code [no] ++ --enable-rkrga enable Rockchip 2D Raster Graphic Acceleration code [no] + --disable-v4l2-m2m disable V4L2 mem2mem code [autodetect] + --disable-vaapi disable Video Acceleration API (mainly Unix/Intel) code [autodetect] + --disable-vdpau disable Nvidia Video Decode and Presentation API for Unix code [autodetect] +@@ -1884,6 +1885,7 @@ EXTERNAL_LIBRARY_VERSION3_LIST=" + libvo_amrwbenc + mbedtls + rkmpp ++ rkrga + " + + EXTERNAL_LIBRARY_GPLV3_LIST=" +@@ -2018,6 +2020,7 @@ HWACCEL_LIBRARY_LIST=" + mmal + omx + opencl ++ rkmpp + " + + DOCUMENT_LIST=" +@@ -3312,8 +3315,10 @@ av1_mediacodec_decoder_deps="mediacodec" + av1_mediacodec_encoder_deps="mediacodec" + av1_nvenc_encoder_deps="nvenc NV_ENC_PIC_PARAMS_AV1" + av1_nvenc_encoder_select="atsc_a53" ++av1_rkmpp_decoder_deps="rkmpp" + h263_v4l2m2m_decoder_deps="v4l2_m2m h263_v4l2_m2m" + h263_v4l2m2m_encoder_deps="v4l2_m2m h263_v4l2_m2m" ++h263_rkmpp_decoder_deps="rkmpp" + h264_amf_encoder_deps="amf" + h264_cuvid_decoder_deps="cuvid" + h264_cuvid_decoder_select="h264_mp4toannexb_bsf" +@@ -3329,7 +3334,8 @@ h264_omx_encoder_deps="omx" + h264_qsv_decoder_select="h264_mp4toannexb_bsf qsvdec" + h264_qsv_encoder_select="atsc_a53 qsvenc" + h264_rkmpp_decoder_deps="rkmpp" +-h264_rkmpp_decoder_select="h264_mp4toannexb_bsf" ++h264_rkmpp_decoder_select="h264_mp4toannexb_bsf dump_extradata_bsf" ++h264_rkmpp_encoder_deps="rkmpp" + h264_vaapi_encoder_select="atsc_a53 cbs_h264 vaapi_encode" + h264_v4l2m2m_decoder_deps="v4l2_m2m h264_v4l2_m2m" + h264_v4l2m2m_decoder_select="h264_mp4toannexb_bsf" +@@ -3347,7 +3353,8 @@ hevc_nvenc_encoder_select="atsc_a53" + hevc_qsv_decoder_select="hevc_mp4toannexb_bsf qsvdec" + hevc_qsv_encoder_select="hevcparse qsvenc" + hevc_rkmpp_decoder_deps="rkmpp" +-hevc_rkmpp_decoder_select="hevc_mp4toannexb_bsf" ++hevc_rkmpp_decoder_select="hevc_mp4toannexb_bsf dump_extradata_bsf" ++hevc_rkmpp_encoder_deps="rkmpp" + hevc_vaapi_encoder_deps="VAEncPictureParameterBufferHEVC" + hevc_vaapi_encoder_select="atsc_a53 cbs_h265 vaapi_encode" + hevc_v4l2m2m_decoder_deps="v4l2_m2m hevc_v4l2_m2m" +@@ -3357,11 +3364,13 @@ mjpeg_cuvid_decoder_deps="cuvid" + mjpeg_qsv_decoder_select="qsvdec" + mjpeg_qsv_encoder_deps="libmfx" + mjpeg_qsv_encoder_select="qsvenc" ++mjpeg_rkmpp_encoder_deps="rkmpp" + mjpeg_vaapi_encoder_deps="VAEncPictureParameterBufferJPEG" + mjpeg_vaapi_encoder_select="cbs_jpeg jpegtables vaapi_encode" + mp3_mf_encoder_deps="mediafoundation" + mpeg1_cuvid_decoder_deps="cuvid" + mpeg1_v4l2m2m_decoder_deps="v4l2_m2m mpeg1_v4l2_m2m" ++mpeg1_rkmpp_decoder_deps="rkmpp" + mpeg2_cuvid_decoder_deps="cuvid" + mpeg2_mmal_decoder_deps="mmal" + mpeg2_mediacodec_decoder_deps="mediacodec" +@@ -3369,6 +3378,7 @@ mpeg2_qsv_decoder_select="qsvdec" + mpeg2_qsv_encoder_select="qsvenc" + mpeg2_vaapi_encoder_select="cbs_mpeg2 vaapi_encode" + mpeg2_v4l2m2m_decoder_deps="v4l2_m2m mpeg2_v4l2_m2m" ++mpeg2_rkmpp_decoder_deps="rkmpp" + mpeg4_cuvid_decoder_deps="cuvid" + mpeg4_mediacodec_decoder_deps="mediacodec" + mpeg4_mediacodec_encoder_deps="mediacodec" +@@ -3376,6 +3386,8 @@ mpeg4_mmal_decoder_deps="mmal" + mpeg4_omx_encoder_deps="omx" + mpeg4_v4l2m2m_decoder_deps="v4l2_m2m mpeg4_v4l2_m2m" + mpeg4_v4l2m2m_encoder_deps="v4l2_m2m mpeg4_v4l2_m2m" ++mpeg4_rkmpp_decoder_deps="rkmpp" ++mpeg4_rkmpp_decoder_select="mpeg4_unpack_bframes_bsf dump_extradata_bsf" + vc1_cuvid_decoder_deps="cuvid" + vc1_mmal_decoder_deps="mmal" + vc1_qsv_decoder_select="qsvdec" +@@ -3872,6 +3884,7 @@ overlay_qsv_filter_deps="libmfx" + overlay_qsv_filter_select="qsvvpp" + overlay_vaapi_filter_deps="vaapi VAProcPipelineCaps_blend_flags" + overlay_vulkan_filter_deps="vulkan spirv_compiler" ++overlay_rkrga_filter_deps="rkrga" + owdenoise_filter_deps="gpl" + pad_opencl_filter_deps="opencl" + pan_filter_deps="swresample" +@@ -3894,6 +3907,7 @@ scale_filter_deps="swscale" + scale_opencl_filter_deps="opencl" + scale_qsv_filter_deps="libmfx" + scale_qsv_filter_select="qsvvpp" ++scale_rkrga_filter_deps="rkrga" + scdet_filter_select="scene_sad" + select_filter_select="scene_sad" + sharpness_vaapi_filter_deps="vaapi" +@@ -3937,6 +3951,7 @@ scale_vt_filter_deps="videotoolbox VTPix + scale_vulkan_filter_deps="vulkan spirv_compiler" + vpp_qsv_filter_deps="libmfx" + vpp_qsv_filter_select="qsvvpp" ++vpp_rkrga_filter_deps="rkrga" + xfade_opencl_filter_deps="opencl" + xfade_vulkan_filter_deps="vulkan spirv_compiler" + yadif_cuda_filter_deps="ffnvcodec" +@@ -3984,14 +3999,14 @@ cws2fws_extralibs="zlib_extralibs" + + # libraries, in any order + avcodec_deps="avutil" +-avcodec_suggest="libm stdatomic" ++avcodec_suggest="libm stdatomic rkrga" + avdevice_deps="avformat avcodec avutil" + avdevice_suggest="libm stdatomic" + avfilter_deps="avutil" + avfilter_suggest="libm stdatomic" + avformat_deps="avcodec avutil" + avformat_suggest="libm network zlib stdatomic" +-avutil_suggest="clock_gettime ffnvcodec gcrypt libm libdrm libmfx opencl openssl user32 vaapi vulkan videotoolbox corefoundation corevideo coremedia bcrypt stdatomic" ++avutil_suggest="clock_gettime ffnvcodec gcrypt libm libdrm libmfx opencl openssl rkmpp user32 vaapi vulkan videotoolbox corefoundation corevideo coremedia bcrypt stdatomic" + postproc_deps="avutil gpl" + postproc_suggest="libm stdatomic" + swresample_deps="avutil" +@@ -7068,11 +7083,16 @@ enabled openssl && { { check_p + check_lib openssl openssl/ssl.h SSL_library_init -lssl -lcrypto -lws2_32 -lgdi32 || + die "ERROR: openssl not found"; } + enabled pocketsphinx && require_pkg_config pocketsphinx pocketsphinx pocketsphinx/pocketsphinx.h ps_init +-enabled rkmpp && { require_pkg_config rkmpp rockchip_mpp rockchip/rk_mpi.h mpp_create && +- require_pkg_config rockchip_mpp "rockchip_mpp >= 1.3.7" rockchip/rk_mpi.h mpp_create && ++enabled rkmpp && { require_pkg_config rkmpp rockchip_mpp rockchip/rk_mpi.h mpp_create && ++ require_pkg_config rockchip_mpp "rockchip_mpp >= 1.3.8" rockchip/rk_mpi.h mpp_create && + { enabled libdrm || + die "ERROR: rkmpp requires --enable-libdrm"; } + } ++enabled rkrga && { require_pkg_config rkrga librga rga/RgaApi.h c_RkRgaBlit && ++ require_pkg_config rkrga librga rga/im2d.h querystring && ++ { enabled rkmpp || ++ die "ERROR: rkrga requires --enable-rkmpp"; } ++ } + enabled vapoursynth && require_pkg_config vapoursynth "vapoursynth-script >= 42" VSScript.h vsscript_init + + +@@ -7272,7 +7292,7 @@ fi + if enabled_all opencl libdrm ; then + check_type "CL/cl_intel.h" "clCreateImageFromFdINTEL_fn" && + enable opencl_drm_beignet +- check_func_headers "CL/cl_ext.h" clImportMemoryARM && ++ enabled_any arm aarch64 && + enable opencl_drm_arm + fi + +Index: FFmpeg/libavcodec/Makefile +=================================================================== +--- libavcodec/Makefile ++++ libavcodec/Makefile +@@ -259,6 +259,7 @@ OBJS-$(CONFIG_AV1_MEDIACODEC_ENCODER) + + OBJS-$(CONFIG_AV1_NVENC_ENCODER) += nvenc_av1.o nvenc.o + OBJS-$(CONFIG_AV1_QSV_ENCODER) += qsvenc_av1.o + OBJS-$(CONFIG_AV1_VAAPI_ENCODER) += vaapi_encode_av1.o av1_levels.o ++OBJS-$(CONFIG_AV1_RKMPP_DECODER) += rkmppdec.o + OBJS-$(CONFIG_AVRN_DECODER) += avrndec.o + OBJS-$(CONFIG_AVRP_DECODER) += r210dec.o + OBJS-$(CONFIG_AVRP_ENCODER) += r210enc.o +@@ -402,6 +403,7 @@ OBJS-$(CONFIG_H263_ENCODER) + + h263.o ituh263enc.o h263data.o + OBJS-$(CONFIG_H263_V4L2M2M_DECODER) += v4l2_m2m_dec.o + OBJS-$(CONFIG_H263_V4L2M2M_ENCODER) += v4l2_m2m_enc.o ++OBJS-$(CONFIG_H263_RKMPP_DECODER) += rkmppdec.o + OBJS-$(CONFIG_H264_DECODER) += h264dec.o h264_cabac.o h264_cavlc.o \ + h264_direct.o h264_loopfilter.o \ + h264_mb.o h264_picture.o \ +@@ -418,6 +420,7 @@ OBJS-$(CONFIG_H264_OMX_ENCODER) + + OBJS-$(CONFIG_H264_QSV_DECODER) += qsvdec.o + OBJS-$(CONFIG_H264_QSV_ENCODER) += qsvenc_h264.o + OBJS-$(CONFIG_H264_RKMPP_DECODER) += rkmppdec.o ++OBJS-$(CONFIG_H264_RKMPP_ENCODER) += rkmppenc.o + OBJS-$(CONFIG_H264_VAAPI_ENCODER) += vaapi_encode_h264.o h264_levels.o \ + h2645data.o + OBJS-$(CONFIG_H264_VIDEOTOOLBOX_ENCODER) += videotoolboxenc.o +@@ -443,6 +446,7 @@ OBJS-$(CONFIG_HEVC_QSV_DECODER) + + OBJS-$(CONFIG_HEVC_QSV_ENCODER) += qsvenc_hevc.o hevc_ps_enc.o \ + hevc_data.o + OBJS-$(CONFIG_HEVC_RKMPP_DECODER) += rkmppdec.o ++OBJS-$(CONFIG_HEVC_RKMPP_ENCODER) += rkmppenc.o + OBJS-$(CONFIG_HEVC_VAAPI_ENCODER) += vaapi_encode_h265.o h265_profile_level.o \ + h2645data.o + OBJS-$(CONFIG_HEVC_V4L2M2M_DECODER) += v4l2_m2m_dec.o +@@ -502,6 +506,7 @@ OBJS-$(CONFIG_MJPEG_ENCODER) + + OBJS-$(CONFIG_MJPEGB_DECODER) += mjpegbdec.o + OBJS-$(CONFIG_MJPEG_CUVID_DECODER) += cuviddec.o + OBJS-$(CONFIG_MJPEG_QSV_ENCODER) += qsvenc_jpeg.o ++OBJS-$(CONFIG_MJPEG_RKMPP_ENCODER) += rkmppenc.o + OBJS-$(CONFIG_MJPEG_VAAPI_ENCODER) += vaapi_encode_mjpeg.o + OBJS-$(CONFIG_MLP_DECODER) += mlpdec.o mlpdsp.o + OBJS-$(CONFIG_MLP_ENCODER) += mlpenc.o mlp.o +@@ -534,6 +539,7 @@ OBJS-$(CONFIG_MPEG1VIDEO_DECODER) + + OBJS-$(CONFIG_MPEG1VIDEO_ENCODER) += mpeg12enc.o mpeg12.o + OBJS-$(CONFIG_MPEG1_CUVID_DECODER) += cuviddec.o + OBJS-$(CONFIG_MPEG1_V4L2M2M_DECODER) += v4l2_m2m_dec.o ++OBJS-$(CONFIG_MPEG1_RKMPP_DECODER) += rkmppdec.o + OBJS-$(CONFIG_MPEG2_MMAL_DECODER) += mmaldec.o + OBJS-$(CONFIG_MPEG2_QSV_DECODER) += qsvdec.o + OBJS-$(CONFIG_MPEG2_QSV_ENCODER) += qsvenc_mpeg2.o +@@ -543,6 +549,7 @@ OBJS-$(CONFIG_MPEG2_CUVID_DECODER) + + OBJS-$(CONFIG_MPEG2_MEDIACODEC_DECODER) += mediacodecdec.o + OBJS-$(CONFIG_MPEG2_VAAPI_ENCODER) += vaapi_encode_mpeg2.o + OBJS-$(CONFIG_MPEG2_V4L2M2M_DECODER) += v4l2_m2m_dec.o ++OBJS-$(CONFIG_MPEG2_RKMPP_DECODER) += rkmppdec.o + OBJS-$(CONFIG_MPEG4_DECODER) += mpeg4videodsp.o xvididct.o + OBJS-$(CONFIG_MPEG4_ENCODER) += mpeg4videoenc.o + OBJS-$(CONFIG_MPEG4_CUVID_DECODER) += cuviddec.o +@@ -551,6 +558,7 @@ OBJS-$(CONFIG_MPEG4_MEDIACODEC_ENCODER) + OBJS-$(CONFIG_MPEG4_OMX_ENCODER) += omx.o + OBJS-$(CONFIG_MPEG4_V4L2M2M_DECODER) += v4l2_m2m_dec.o + OBJS-$(CONFIG_MPEG4_V4L2M2M_ENCODER) += v4l2_m2m_enc.o ++OBJS-$(CONFIG_MPEG4_RKMPP_DECODER) += rkmppdec.o + OBJS-$(CONFIG_MPL2_DECODER) += mpl2dec.o ass.o + OBJS-$(CONFIG_MSA1_DECODER) += mss3.o + OBJS-$(CONFIG_MSCC_DECODER) += mscc.o +Index: FFmpeg/libavcodec/allcodecs.c +=================================================================== +--- libavcodec/allcodecs.c ++++ libavcodec/allcodecs.c +@@ -147,6 +147,7 @@ extern const FFCodec ff_h263i_decoder; + extern const FFCodec ff_h263p_encoder; + extern const FFCodec ff_h263p_decoder; + extern const FFCodec ff_h263_v4l2m2m_decoder; ++extern const FFCodec ff_h263_rkmpp_decoder; + extern const FFCodec ff_h264_decoder; + extern const FFCodec ff_h264_v4l2m2m_decoder; + extern const FFCodec ff_h264_mediacodec_decoder; +@@ -208,12 +209,15 @@ extern const FFCodec ff_mpeg4_encoder; + extern const FFCodec ff_mpeg4_decoder; + extern const FFCodec ff_mpeg4_v4l2m2m_decoder; + extern const FFCodec ff_mpeg4_mmal_decoder; ++extern const FFCodec ff_mpeg4_rkmpp_decoder; + extern const FFCodec ff_mpegvideo_decoder; + extern const FFCodec ff_mpeg1_v4l2m2m_decoder; ++extern const FFCodec ff_mpeg1_rkmpp_decoder; + extern const FFCodec ff_mpeg2_mmal_decoder; + extern const FFCodec ff_mpeg2_v4l2m2m_decoder; + extern const FFCodec ff_mpeg2_qsv_decoder; + extern const FFCodec ff_mpeg2_mediacodec_decoder; ++extern const FFCodec ff_mpeg2_rkmpp_decoder; + extern const FFCodec ff_msa1_decoder; + extern const FFCodec ff_mscc_decoder; + extern const FFCodec ff_msmpeg4v1_decoder; +@@ -842,6 +846,7 @@ extern const FFCodec ff_av1_qsv_decoder; + extern const FFCodec ff_av1_qsv_encoder; + extern const FFCodec ff_av1_amf_encoder; + extern const FFCodec ff_av1_vaapi_encoder; ++extern const FFCodec ff_av1_rkmpp_decoder; + extern const FFCodec ff_libopenh264_encoder; + extern const FFCodec ff_libopenh264_decoder; + extern const FFCodec ff_h264_amf_encoder; +@@ -853,6 +858,7 @@ extern const FFCodec ff_h264_qsv_encoder + extern const FFCodec ff_h264_v4l2m2m_encoder; + extern const FFCodec ff_h264_vaapi_encoder; + extern const FFCodec ff_h264_videotoolbox_encoder; ++extern const FFCodec ff_h264_rkmpp_encoder; + extern const FFCodec ff_hevc_amf_encoder; + extern const FFCodec ff_hevc_cuvid_decoder; + extern const FFCodec ff_hevc_mediacodec_decoder; +@@ -863,10 +869,12 @@ extern const FFCodec ff_hevc_qsv_encoder + extern const FFCodec ff_hevc_v4l2m2m_encoder; + extern const FFCodec ff_hevc_vaapi_encoder; + extern const FFCodec ff_hevc_videotoolbox_encoder; ++extern const FFCodec ff_hevc_rkmpp_encoder; + extern const FFCodec ff_libkvazaar_encoder; + extern const FFCodec ff_mjpeg_cuvid_decoder; + extern const FFCodec ff_mjpeg_qsv_encoder; + extern const FFCodec ff_mjpeg_qsv_decoder; ++extern const FFCodec ff_mjpeg_rkmpp_encoder; + extern const FFCodec ff_mjpeg_vaapi_encoder; + extern const FFCodec ff_mp3_mf_encoder; + extern const FFCodec ff_mpeg1_cuvid_decoder; +Index: FFmpeg/libavcodec/rkmppdec.c +=================================================================== +--- libavcodec/rkmppdec.c ++++ libavcodec/rkmppdec.c +@@ -1,6 +1,7 @@ + /* +- * RockChip MPP Video Decoder + * Copyright (c) 2017 Lionel CHAZALLON ++ * Copyright (c) 2023 Huseyin BIYIK ++ * Copyright (c) 2023 NyanMisaka + * + * This file is part of FFmpeg. + * +@@ -19,569 +20,952 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include "avcodec.h" +-#include "codec_internal.h" +-#include "decode.h" +-#include "hwconfig.h" +-#include "libavutil/buffer.h" +-#include "libavutil/common.h" +-#include "libavutil/frame.h" +-#include "libavutil/hwcontext.h" +-#include "libavutil/hwcontext_drm.h" +-#include "libavutil/imgutils.h" +-#include "libavutil/log.h" +- +-#define RECEIVE_FRAME_TIMEOUT 100 +-#define FRAMEGROUP_MAX_FRAMES 16 +-#define INPUT_MAX_PACKETS 4 +- +-typedef struct { +- MppCtx ctx; +- MppApi *mpi; +- MppBufferGroup frame_group; +- +- char first_packet; +- char eos_reached; +- +- AVBufferRef *frames_ref; +- AVBufferRef *device_ref; +-} RKMPPDecoder; +- +-typedef struct { +- AVClass *av_class; +- AVBufferRef *decoder_ref; +-} RKMPPDecodeContext; +- +-typedef struct { +- MppFrame frame; +- AVBufferRef *decoder_ref; +-} RKMPPFrameContext; ++/** ++ * @file ++ * Rockchip MPP (Media Process Platform) video decoder ++ */ ++ ++#include "config.h" ++#include "config_components.h" + +-static MppCodingType rkmpp_get_codingtype(AVCodecContext *avctx) ++#include "rkmppdec.h" ++ ++#if CONFIG_RKRGA ++#include ++#endif ++ ++static MppCodingType rkmpp_get_coding_type(AVCodecContext *avctx) + { + switch (avctx->codec_id) { ++ case AV_CODEC_ID_H263: return MPP_VIDEO_CodingH263; + case AV_CODEC_ID_H264: return MPP_VIDEO_CodingAVC; + case AV_CODEC_ID_HEVC: return MPP_VIDEO_CodingHEVC; ++ case AV_CODEC_ID_AV1: return MPP_VIDEO_CodingAV1; + case AV_CODEC_ID_VP8: return MPP_VIDEO_CodingVP8; + case AV_CODEC_ID_VP9: return MPP_VIDEO_CodingVP9; ++ case AV_CODEC_ID_MPEG1VIDEO: /* fallthrough */ ++ case AV_CODEC_ID_MPEG2VIDEO: return MPP_VIDEO_CodingMPEG2; ++ case AV_CODEC_ID_MPEG4: return MPP_VIDEO_CodingMPEG4; + default: return MPP_VIDEO_CodingUnused; + } + } + +-static uint32_t rkmpp_get_frameformat(MppFrameFormat mppformat) ++static uint32_t rkmpp_get_drm_format(MppFrameFormat mpp_fmt) + { +- switch (mppformat) { ++ switch (mpp_fmt & MPP_FRAME_FMT_MASK) { + case MPP_FMT_YUV420SP: return DRM_FORMAT_NV12; +-#ifdef DRM_FORMAT_NV12_10 +- case MPP_FMT_YUV420SP_10BIT: return DRM_FORMAT_NV12_10; +-#endif +- default: return 0; ++ case MPP_FMT_YUV420SP_10BIT: return DRM_FORMAT_NV15; ++ case MPP_FMT_YUV422SP: return DRM_FORMAT_NV16; ++ case MPP_FMT_YUV422SP_10BIT: return DRM_FORMAT_NV20; ++ default: return DRM_FORMAT_INVALID; + } + } + +-static int rkmpp_write_data(AVCodecContext *avctx, uint8_t *buffer, int size, int64_t pts) ++static uint32_t rkmpp_get_drm_afbc_format(MppFrameFormat mpp_fmt) + { +- RKMPPDecodeContext *rk_context = avctx->priv_data; +- RKMPPDecoder *decoder = (RKMPPDecoder *)rk_context->decoder_ref->data; +- int ret; +- MppPacket packet; +- +- // create the MPP packet +- ret = mpp_packet_init(&packet, buffer, size); +- if (ret != MPP_OK) { +- av_log(avctx, AV_LOG_ERROR, "Failed to init MPP packet (code = %d)\n", ret); +- return AVERROR_UNKNOWN; +- } +- +- mpp_packet_set_pts(packet, pts); +- +- if (!buffer) +- mpp_packet_set_eos(packet); +- +- ret = decoder->mpi->decode_put_packet(decoder->ctx, packet); +- if (ret != MPP_OK) { +- if (ret == MPP_ERR_BUFFER_FULL) { +- av_log(avctx, AV_LOG_DEBUG, "Buffer full writing %d bytes to decoder\n", size); +- ret = AVERROR(EAGAIN); +- } else +- ret = AVERROR_UNKNOWN; ++ switch (mpp_fmt & MPP_FRAME_FMT_MASK) { ++ case MPP_FMT_YUV420SP: return DRM_FORMAT_YUV420_8BIT; ++ case MPP_FMT_YUV420SP_10BIT: return DRM_FORMAT_YUV420_10BIT; ++ case MPP_FMT_YUV422SP: return DRM_FORMAT_YUYV; ++ case MPP_FMT_YUV422SP_10BIT: return DRM_FORMAT_Y210; ++ default: return DRM_FORMAT_INVALID; + } +- else +- av_log(avctx, AV_LOG_DEBUG, "Wrote %d bytes to decoder\n", size); +- +- mpp_packet_deinit(&packet); ++} + +- return ret; ++static uint32_t rkmpp_get_av_format(MppFrameFormat mpp_fmt) ++{ ++ switch (mpp_fmt & MPP_FRAME_FMT_MASK) { ++ case MPP_FMT_YUV420SP: return AV_PIX_FMT_NV12; ++ case MPP_FMT_YUV420SP_10BIT: return AV_PIX_FMT_NV15; ++ case MPP_FMT_YUV422SP: return AV_PIX_FMT_NV16; ++ case MPP_FMT_YUV422SP_10BIT: return AV_PIX_FMT_NV20; ++ default: return AV_PIX_FMT_NONE; ++ } + } + +-static int rkmpp_close_decoder(AVCodecContext *avctx) ++static int get_afbc_byte_stride(const AVPixFmtDescriptor *desc, ++ int *stride, int reverse) + { +- RKMPPDecodeContext *rk_context = avctx->priv_data; +- av_buffer_unref(&rk_context->decoder_ref); +- return 0; ++ if (!desc || !stride || *stride <= 0) ++ return AVERROR(EINVAL); ++ ++ if (desc->nb_components == 1 || ++ (desc->flags & AV_PIX_FMT_FLAG_RGB) || ++ (!(desc->flags & AV_PIX_FMT_FLAG_RGB) && ++ !(desc->flags & AV_PIX_FMT_FLAG_PLANAR))) ++ return 0; ++ ++ if (desc->log2_chroma_w == 1 && desc->log2_chroma_h == 1) ++ *stride = reverse ? (*stride * 2 / 3) : (*stride * 3 / 2); ++ else if (desc->log2_chroma_w == 1 && !desc->log2_chroma_h) ++ *stride = reverse ? (*stride / 2) : (*stride * 2); ++ else if (!desc->log2_chroma_w && !desc->log2_chroma_h) ++ *stride = reverse ? (*stride / 3) : (*stride * 3); ++ else ++ return AVERROR(EINVAL); ++ ++ return (*stride > 0) ? 0 : AVERROR(EINVAL); + } + +-static void rkmpp_release_decoder(void *opaque, uint8_t *data) ++static av_cold int rkmpp_decode_close(AVCodecContext *avctx) + { +- RKMPPDecoder *decoder = (RKMPPDecoder *)data; ++ RKMPPDecContext *r = avctx->priv_data; + +- if (decoder->mpi) { +- decoder->mpi->reset(decoder->ctx); +- mpp_destroy(decoder->ctx); +- decoder->ctx = NULL; +- } ++ r->eof = 0; ++ r->draining = 0; ++ r->info_change = 0; ++ r->errinfo_cnt = 0; ++ r->got_frame = 0; + +- if (decoder->frame_group) { +- mpp_buffer_group_put(decoder->frame_group); +- decoder->frame_group = NULL; ++ if (r->mapi) { ++ r->mapi->reset(r->mctx); ++ mpp_destroy(r->mctx); ++ r->mctx = NULL; ++ } ++ if (r->buf_group && ++ r->buf_mode == RKMPP_DEC_PURE_EXTERNAL) { ++ mpp_buffer_group_put(r->buf_group); ++ r->buf_group = NULL; + } + +- av_buffer_unref(&decoder->frames_ref); +- av_buffer_unref(&decoder->device_ref); ++ if (r->hwframe) ++ av_buffer_unref(&r->hwframe); ++ if (r->hwdevice) ++ av_buffer_unref(&r->hwdevice); + +- av_free(decoder); ++ return 0; + } + +-static int rkmpp_init_decoder(AVCodecContext *avctx) ++static av_cold int rkmpp_decode_init(AVCodecContext *avctx) + { +- RKMPPDecodeContext *rk_context = avctx->priv_data; +- RKMPPDecoder *decoder = NULL; +- MppCodingType codectype = MPP_VIDEO_CodingUnused; +- int ret; +- RK_S64 paramS64; +- RK_S32 paramS32; +- +- avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; ++ RKMPPDecContext *r = avctx->priv_data; ++ MppCodingType coding_type = MPP_VIDEO_CodingUnused; ++ int ret, is_fmt_supported = 0; ++ enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_DRM_PRIME, ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_NONE }; ++ ++ switch (avctx->pix_fmt) { ++ case AV_PIX_FMT_YUV420P: ++ case AV_PIX_FMT_YUVJ420P: ++ is_fmt_supported = 1; ++ break; ++ case AV_PIX_FMT_YUV420P10: ++ is_fmt_supported = ++ avctx->codec_id == AV_CODEC_ID_H264 || ++ avctx->codec_id == AV_CODEC_ID_HEVC || ++ avctx->codec_id == AV_CODEC_ID_VP9 || ++ avctx->codec_id == AV_CODEC_ID_AV1; ++ break; ++ case AV_PIX_FMT_YUV422P: ++ case AV_PIX_FMT_YUV422P10: ++ is_fmt_supported = ++ avctx->codec_id == AV_CODEC_ID_H264; ++ break; ++ case AV_PIX_FMT_NONE: /* fallback to drm_prime */ ++ is_fmt_supported = 1; ++ avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; ++ break; ++ default: ++ is_fmt_supported = 0; ++ break; ++ } ++ ++ if (avctx->pix_fmt != AV_PIX_FMT_DRM_PRIME) { ++ if (!is_fmt_supported) { ++ av_log(avctx, AV_LOG_ERROR, "MPP doesn't support codec '%s' with pix_fmt '%s'\n", ++ avcodec_get_name(avctx->codec_id), av_get_pix_fmt_name(avctx->pix_fmt)); ++ return AVERROR(ENOSYS); ++ } + +- // create a decoder and a ref to it +- decoder = av_mallocz(sizeof(RKMPPDecoder)); +- if (!decoder) { +- ret = AVERROR(ENOMEM); +- goto fail; ++ if ((ret = ff_get_format(avctx, pix_fmts)) < 0) { ++ av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret); ++ return ret; ++ } ++ avctx->pix_fmt = ret; + } + +- rk_context->decoder_ref = av_buffer_create((uint8_t *)decoder, sizeof(*decoder), rkmpp_release_decoder, +- NULL, AV_BUFFER_FLAG_READONLY); +- if (!rk_context->decoder_ref) { +- av_free(decoder); +- ret = AVERROR(ENOMEM); +- goto fail; ++ if ((coding_type = rkmpp_get_coding_type(avctx)) == MPP_VIDEO_CodingUnused) { ++ av_log(avctx, AV_LOG_ERROR, "Unknown codec id: %d\n", avctx->codec_id); ++ return AVERROR(ENOSYS); + } + +- av_log(avctx, AV_LOG_DEBUG, "Initializing RKMPP decoder.\n"); +- +- codectype = rkmpp_get_codingtype(avctx); +- if (codectype == MPP_VIDEO_CodingUnused) { +- av_log(avctx, AV_LOG_ERROR, "Unknown codec type (%d).\n", avctx->codec_id); +- ret = AVERROR_UNKNOWN; +- goto fail; ++ if ((ret = mpp_check_support_format(MPP_CTX_DEC, coding_type)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "MPP doesn't support codec '%s' (%d)\n", ++ avcodec_get_name(avctx->codec_id), avctx->codec_id); ++ return AVERROR(ENOSYS); + } + +- ret = mpp_check_support_format(MPP_CTX_DEC, codectype); +- if (ret != MPP_OK) { +- av_log(avctx, AV_LOG_ERROR, "Codec type (%d) unsupported by MPP\n", avctx->codec_id); +- ret = AVERROR_UNKNOWN; ++ if ((ret = mpp_create(&r->mctx, &r->mapi)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create MPP context and api: %d\n", ret); ++ ret = AVERROR_EXTERNAL; + goto fail; + } + +- // Create the MPP context +- ret = mpp_create(&decoder->ctx, &decoder->mpi); +- if (ret != MPP_OK) { +- av_log(avctx, AV_LOG_ERROR, "Failed to create MPP context (code = %d).\n", ret); +- ret = AVERROR_UNKNOWN; ++ if ((ret = mpp_init(r->mctx, MPP_CTX_DEC, coding_type)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to init MPP context: %d\n", ret); ++ ret = AVERROR_EXTERNAL; + goto fail; + } + +- // initialize mpp +- ret = mpp_init(decoder->ctx, MPP_CTX_DEC, codectype); +- if (ret != MPP_OK) { +- av_log(avctx, AV_LOG_ERROR, "Failed to initialize MPP context (code = %d).\n", ret); +- ret = AVERROR_UNKNOWN; ++ if (avctx->skip_frame == AVDISCARD_NONKEY) ++ r->deint = 0; ++ ++ if ((ret = r->mapi->control(r->mctx, MPP_DEC_SET_ENABLE_DEINTERLACE, &r->deint)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set enable deinterlace: %d\n", ret); ++ ret = AVERROR_EXTERNAL; + goto fail; + } + +- // make decode calls blocking with a timeout +- paramS32 = MPP_POLL_BLOCK; +- ret = decoder->mpi->control(decoder->ctx, MPP_SET_OUTPUT_BLOCK, ¶mS32); +- if (ret != MPP_OK) { +- av_log(avctx, AV_LOG_ERROR, "Failed to set blocking mode on MPI (code = %d).\n", ret); +- ret = AVERROR_UNKNOWN; +- goto fail; ++ if (avctx->pix_fmt != AV_PIX_FMT_DRM_PRIME) ++ r->afbc = 0; ++ ++ if (r->afbc == RKMPP_DEC_AFBC_ON_RGA) { ++#if CONFIG_RKRGA ++ const char *rga_ver = querystring(RGA_VERSION); ++ int has_rga3 = !!strstr(rga_ver, "RGA_3"); ++ int is_rga3_compat = avctx->width >= 68 && ++ avctx->width <= 8176 && ++ avctx->height >= 2 && ++ avctx->height <= 8176; ++ ++ if (!has_rga3 || !is_rga3_compat) { ++#endif ++ av_log(avctx, AV_LOG_VERBOSE, "AFBC is requested without capable RGA, ignoring\n"); ++ r->afbc = RKMPP_DEC_AFBC_OFF; ++#if CONFIG_RKRGA ++ } ++#endif + } + +- paramS64 = RECEIVE_FRAME_TIMEOUT; +- ret = decoder->mpi->control(decoder->ctx, MPP_SET_OUTPUT_BLOCK_TIMEOUT, ¶mS64); +- if (ret != MPP_OK) { +- av_log(avctx, AV_LOG_ERROR, "Failed to set block timeout on MPI (code = %d).\n", ret); +- ret = AVERROR_UNKNOWN; +- goto fail; ++ if (r->afbc) { ++ MppFrameFormat afbc_fmt = MPP_FRAME_FBC_AFBC_V2; ++ ++ if (avctx->codec_id == AV_CODEC_ID_H264 || ++ avctx->codec_id == AV_CODEC_ID_HEVC || ++ avctx->codec_id == AV_CODEC_ID_VP9 || ++ avctx->codec_id == AV_CODEC_ID_AV1) { ++ if ((ret = r->mapi->control(r->mctx, MPP_DEC_SET_OUTPUT_FORMAT, &afbc_fmt)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set AFBC mode: %d\n", ret); ++ ret = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ } else { ++ av_log(avctx, AV_LOG_VERBOSE, "AFBC is not supported in codec '%s', ignoring\n", ++ avcodec_get_name(avctx->codec_id)); ++ r->afbc = 0; ++ } + } + +- ret = mpp_buffer_group_get_internal(&decoder->frame_group, MPP_BUFFER_TYPE_ION); +- if (ret) { +- av_log(avctx, AV_LOG_ERROR, "Failed to retrieve buffer group (code = %d)\n", ret); +- ret = AVERROR_UNKNOWN; +- goto fail; ++ if (avctx->hw_device_ctx) { ++ r->hwdevice = av_buffer_ref(avctx->hw_device_ctx); ++ if (!r->hwdevice) { ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ av_log(avctx, AV_LOG_VERBOSE, "Picked up an existing RKMPP hardware device\n"); ++ } else { ++ if ((ret = av_hwdevice_ctx_create(&r->hwdevice, AV_HWDEVICE_TYPE_RKMPP, NULL, NULL, 0)) < 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create a RKMPP hardware device: %d\n", ret); ++ goto fail; ++ } ++ av_log(avctx, AV_LOG_VERBOSE, "Created a RKMPP hardware device\n"); + } + +- ret = decoder->mpi->control(decoder->ctx, MPP_DEC_SET_EXT_BUF_GROUP, decoder->frame_group); +- if (ret) { +- av_log(avctx, AV_LOG_ERROR, "Failed to assign buffer group (code = %d)\n", ret); +- ret = AVERROR_UNKNOWN; ++ return 0; ++ ++fail: ++ rkmpp_decode_close(avctx); ++ return ret; ++} ++ ++static int rkmpp_set_buffer_group(AVCodecContext *avctx, ++ enum AVPixelFormat pix_fmt, ++ int width, int height) ++{ ++ RKMPPDecContext *r = avctx->priv_data; ++ AVHWFramesContext *hwfc = NULL; ++ int i, ret, decoder_pool_size; ++ ++ if (!r->hwdevice) ++ return AVERROR(ENOMEM); ++ ++ av_buffer_unref(&r->hwframe); ++ ++ r->hwframe = av_hwframe_ctx_alloc(r->hwdevice); ++ if (!r->hwframe) ++ return AVERROR(ENOMEM); ++ ++ switch (avctx->codec_id) { ++ case AV_CODEC_ID_H264: ++ case AV_CODEC_ID_HEVC: ++ decoder_pool_size = 20; ++ break; ++ default: ++ decoder_pool_size = 10; ++ break; ++ } ++ ++ hwfc = (AVHWFramesContext *)r->hwframe->data; ++ hwfc->format = AV_PIX_FMT_DRM_PRIME; ++ hwfc->sw_format = pix_fmt; ++ hwfc->width = FFALIGN(width, 16); ++ hwfc->height = FFALIGN(height, 16); ++ ++ if (r->buf_mode == RKMPP_DEC_HALF_INTERNAL) { ++ AVRKMPPFramesContext *rkmpp_fc = NULL; ++ ++ if ((ret = av_hwframe_ctx_init(r->hwframe)) < 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to init RKMPP frame pool\n"); ++ goto fail; ++ } ++ ++ rkmpp_fc = hwfc->hwctx; ++ r->buf_group = rkmpp_fc->buf_group; ++ goto attach; ++ } else if (r->buf_mode != RKMPP_DEC_PURE_EXTERNAL) { ++ ret = AVERROR(EINVAL); + goto fail; + } + +- ret = mpp_buffer_group_limit_config(decoder->frame_group, 0, FRAMEGROUP_MAX_FRAMES); +- if (ret) { +- av_log(avctx, AV_LOG_ERROR, "Failed to set buffer group limit (code = %d)\n", ret); +- ret = AVERROR_UNKNOWN; ++ hwfc->initial_pool_size = decoder_pool_size + 10; ++ if (avctx->extra_hw_frames > 0) ++ hwfc->initial_pool_size += avctx->extra_hw_frames; ++ ++ if ((ret = av_hwframe_ctx_init(r->hwframe)) < 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to init RKMPP frame pool\n"); + goto fail; + } + +- decoder->first_packet = 1; ++ if (r->buf_group) { ++ if ((ret = mpp_buffer_group_clear(r->buf_group)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to clear external buffer group: %d\n", ret); ++ ret = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ } else { ++ if ((ret = mpp_buffer_group_get_external(&r->buf_group, MPP_BUFFER_TYPE_DRM)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to get external buffer group: %d\n", ret); ++ ret = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ } + +- av_log(avctx, AV_LOG_DEBUG, "RKMPP decoder initialized successfully.\n"); ++ for (i = 0; i < hwfc->initial_pool_size; i++) { ++ AVRKMPPFramesContext *rkmpp_fc = hwfc->hwctx; ++ MppBufferInfo buf_info = { ++ .index = i, ++ .type = MPP_BUFFER_TYPE_DRM, ++ .ptr = mpp_buffer_get_ptr(rkmpp_fc->frames[i].buffers[0]), ++ .fd = rkmpp_fc->frames[i].drm_desc.objects[0].fd, ++ .size = rkmpp_fc->frames[i].drm_desc.objects[0].size, ++ }; ++ ++ if ((ret = mpp_buffer_commit(r->buf_group, &buf_info)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to commit external buffer group: %d\n", ret); ++ ret = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ } + +- decoder->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM); +- if (!decoder->device_ref) { +- ret = AVERROR(ENOMEM); ++attach: ++ if ((ret = r->mapi->control(r->mctx, MPP_DEC_SET_EXT_BUF_GROUP, r->buf_group)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to attach external buffer group: %d\n", ret); ++ ret = AVERROR_EXTERNAL; + goto fail; + } +- ret = av_hwdevice_ctx_init(decoder->device_ref); +- if (ret < 0) +- goto fail; ++ ++ if (r->buf_mode == RKMPP_DEC_HALF_INTERNAL) { ++ int group_limit = decoder_pool_size + ((width * height > (3840 * 2160 * 3)) ? 2 : 10); ++ if (avctx->extra_hw_frames > 0) ++ group_limit += avctx->extra_hw_frames; ++ if ((ret = mpp_buffer_group_limit_config(r->buf_group, 0, group_limit)) != MPP_OK) ++ av_log(avctx, AV_LOG_WARNING, "Failed to set buffer group limit: %d\n", ret); ++ } + + return 0; + + fail: +- av_log(avctx, AV_LOG_ERROR, "Failed to initialize RKMPP decoder.\n"); +- rkmpp_close_decoder(avctx); ++ if (r->buf_group && ++ r->buf_mode == RKMPP_DEC_HALF_INTERNAL) { ++ mpp_buffer_group_put(r->buf_group); ++ r->buf_group = NULL; ++ } ++ av_buffer_unref(&r->hwframe); + return ret; + } + +-static int rkmpp_send_packet(AVCodecContext *avctx, const AVPacket *avpkt) ++static int rkmpp_export_mastering_display(AVCodecContext *avctx, AVFrame *frame, ++ MppFrameMasteringDisplayMetadata mpp_mastering) + { +- RKMPPDecodeContext *rk_context = avctx->priv_data; +- RKMPPDecoder *decoder = (RKMPPDecoder *)rk_context->decoder_ref->data; +- int ret; ++ AVMasteringDisplayMetadata *mastering = NULL; ++ AVFrameSideData *sd = NULL; ++ int mapping[3] = { 0, 1, 2 }; ++ int chroma_den = 0; ++ int max_luma_den = 0; ++ int min_luma_den = 0; ++ int i; + +- // handle EOF +- if (!avpkt->size) { +- av_log(avctx, AV_LOG_DEBUG, "End of stream.\n"); +- decoder->eos_reached = 1; +- ret = rkmpp_write_data(avctx, NULL, 0, 0); +- if (ret) +- av_log(avctx, AV_LOG_ERROR, "Failed to send EOS to decoder (code = %d)\n", ret); +- return ret; ++ switch (avctx->codec_id) { ++ case AV_CODEC_ID_HEVC: ++ // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b ++ mapping[0] = 2; ++ mapping[1] = 0; ++ mapping[2] = 1; ++ chroma_den = 50000; ++ max_luma_den = 10000; ++ min_luma_den = 10000; ++ break; ++ case AV_CODEC_ID_AV1: ++ chroma_den = 1 << 16; ++ max_luma_den = 1 << 8; ++ min_luma_den = 1 << 14; ++ break; ++ default: ++ return 0; + } + +- // on first packet, send extradata +- if (decoder->first_packet) { +- if (avctx->extradata_size) { +- ret = rkmpp_write_data(avctx, avctx->extradata, +- avctx->extradata_size, +- avpkt->pts); +- if (ret) { +- av_log(avctx, AV_LOG_ERROR, "Failed to write extradata to decoder (code = %d)\n", ret); +- return ret; +- } +- } +- decoder->first_packet = 0; ++ sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); ++ if (sd) ++ mastering = (AVMasteringDisplayMetadata *)sd->data; ++ else ++ mastering = av_mastering_display_metadata_create_side_data(frame); ++ if (!mastering) ++ return AVERROR(ENOMEM); ++ ++ for (i = 0; i < 3; i++) { ++ const int j = mapping[i]; ++ mastering->display_primaries[i][0] = av_make_q(mpp_mastering.display_primaries[j][0], chroma_den); ++ mastering->display_primaries[i][1] = av_make_q(mpp_mastering.display_primaries[j][1], chroma_den); + } ++ mastering->white_point[0] = av_make_q(mpp_mastering.white_point[0], chroma_den); ++ mastering->white_point[1] = av_make_q(mpp_mastering.white_point[1], chroma_den); + +- // now send packet +- ret = rkmpp_write_data(avctx, avpkt->data, avpkt->size, avpkt->pts); +- if (ret && ret!=AVERROR(EAGAIN)) +- av_log(avctx, AV_LOG_ERROR, "Failed to write data to decoder (code = %d)\n", ret); ++ mastering->max_luminance = av_make_q(mpp_mastering.max_luminance, max_luma_den); ++ mastering->min_luminance = av_make_q(mpp_mastering.min_luminance, min_luma_den); + +- return ret; ++ mastering->has_luminance = 1; ++ mastering->has_primaries = 1; ++ ++ return 0; + } + +-static void rkmpp_release_frame(void *opaque, uint8_t *data) ++static int rkmpp_export_content_light(AVFrame *frame, ++ MppFrameContentLightMetadata mpp_light) + { +- AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)data; +- AVBufferRef *framecontextref = (AVBufferRef *)opaque; +- RKMPPFrameContext *framecontext = (RKMPPFrameContext *)framecontextref->data; ++ AVContentLightMetadata *light = NULL; ++ ++ AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); ++ if (sd) ++ light = (AVContentLightMetadata *)sd->data; ++ else ++ light = av_content_light_metadata_create_side_data(frame); ++ if (!light) ++ return AVERROR(ENOMEM); ++ ++ light->MaxCLL = mpp_light.MaxCLL; ++ light->MaxFALL = mpp_light.MaxFALL; + +- mpp_frame_deinit(&framecontext->frame); +- av_buffer_unref(&framecontext->decoder_ref); +- av_buffer_unref(&framecontextref); ++ return 0; ++} + +- av_free(desc); ++static void rkmpp_free_mpp_frame(void *opaque, uint8_t *data) ++{ ++ MppFrame mpp_frame = (MppFrame)opaque; ++ mpp_frame_deinit(&mpp_frame); + } + +-static int rkmpp_retrieve_frame(AVCodecContext *avctx, AVFrame *frame) ++static void rkmpp_free_drm_desc(void *opaque, uint8_t *data) + { +- RKMPPDecodeContext *rk_context = avctx->priv_data; +- RKMPPDecoder *decoder = (RKMPPDecoder *)rk_context->decoder_ref->data; +- RKMPPFrameContext *framecontext = NULL; +- AVBufferRef *framecontextref = NULL; +- int ret; +- MppFrame mppframe = NULL; +- MppBuffer buffer = NULL; +- AVDRMFrameDescriptor *desc = NULL; ++ AVRKMPPDRMFrameDescriptor *drm_desc = (AVRKMPPDRMFrameDescriptor *)opaque; ++ av_free(drm_desc); ++} ++ ++static int frame_create_buf(AVFrame *frame, ++ uint8_t* data, int size, ++ void (*free)(void *opaque, uint8_t *data), ++ void *opaque, int flags) ++{ ++ int i; ++ ++ for (i = 0; i < AV_NUM_DATA_POINTERS; i++) { ++ if (!frame->buf[i]) { ++ frame->buf[i] = av_buffer_create(data, size, free, opaque, flags); ++ return frame->buf[i] ? 0 : AVERROR(ENOMEM); ++ } ++ } ++ return AVERROR(EINVAL); ++} ++ ++static int rkmpp_export_frame(AVCodecContext *avctx, AVFrame *frame, MppFrame mpp_frame) ++{ ++ RKMPPDecContext *r = avctx->priv_data; ++ AVRKMPPDRMFrameDescriptor *desc = NULL; + AVDRMLayerDescriptor *layer = NULL; +- int mode; +- MppFrameFormat mppformat; +- uint32_t drmformat; ++ const AVPixFmtDescriptor *pix_desc; ++ MppBuffer mpp_buf = NULL; ++ MppFrameFormat mpp_fmt = MPP_FMT_BUTT; ++ int mpp_frame_mode = 0; ++ int ret, is_afbc = 0; ++ ++ if (!frame || !mpp_frame) ++ return AVERROR(ENOMEM); ++ ++ mpp_buf = mpp_frame_get_buffer(mpp_frame); ++ if (!mpp_buf) ++ return AVERROR(EAGAIN); ++ ++ desc = av_mallocz(sizeof(*desc)); ++ if (!desc) ++ return AVERROR(ENOMEM); ++ ++ desc->drm_desc.nb_objects = 1; ++ desc->buffers[0] = mpp_buf; ++ ++ desc->drm_desc.objects[0].fd = mpp_buffer_get_fd(mpp_buf); ++ desc->drm_desc.objects[0].size = mpp_buffer_get_size(mpp_buf); ++ ++ mpp_fmt = mpp_frame_get_fmt(mpp_frame); ++ is_afbc = mpp_fmt & MPP_FRAME_FBC_MASK; ++ ++ desc->drm_desc.nb_layers = 1; ++ layer = &desc->drm_desc.layers[0]; ++ layer->planes[0].object_index = 0; ++ ++ if (is_afbc) { ++ desc->drm_desc.objects[0].format_modifier = ++ DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_SPARSE | AFBC_FORMAT_MOD_BLOCK_SIZE_16x16); ++ ++ layer->format = rkmpp_get_drm_afbc_format(mpp_fmt); ++ layer->nb_planes = 1; ++ layer->planes[0].offset = 0; ++ layer->planes[0].pitch = mpp_frame_get_hor_stride(mpp_frame); + +- ret = decoder->mpi->decode_get_frame(decoder->ctx, &mppframe); +- if (ret != MPP_OK && ret != MPP_ERR_TIMEOUT) { +- av_log(avctx, AV_LOG_ERROR, "Failed to get a frame from MPP (code = %d)\n", ret); +- goto fail; ++ pix_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); ++ if ((ret = get_afbc_byte_stride(pix_desc, (int *)&layer->planes[0].pitch, 0)) < 0) ++ return ret; ++ ++ /* MPP specific AFBC src_y offset, not memory address offset */ ++ frame->crop_top = mpp_frame_get_offset_y(mpp_frame); ++ } else { ++ layer->format = rkmpp_get_drm_format(mpp_fmt); ++ layer->nb_planes = 2; ++ layer->planes[0].offset = 0; ++ layer->planes[0].pitch = mpp_frame_get_hor_stride(mpp_frame); ++ ++ layer->planes[1].object_index = 0; ++ layer->planes[1].offset = layer->planes[0].pitch * mpp_frame_get_ver_stride(mpp_frame); ++ layer->planes[1].pitch = layer->planes[0].pitch; + } + +- if (mppframe) { +- // Check whether we have a special frame or not +- if (mpp_frame_get_info_change(mppframe)) { +- AVHWFramesContext *hwframes; ++ if ((ret = frame_create_buf(frame, mpp_frame, mpp_frame_get_buf_size(mpp_frame), ++ rkmpp_free_mpp_frame, mpp_frame, AV_BUFFER_FLAG_READONLY)) < 0) ++ return ret; + +- av_log(avctx, AV_LOG_INFO, "Decoder noticed an info change (%dx%d), format=%d\n", +- (int)mpp_frame_get_width(mppframe), (int)mpp_frame_get_height(mppframe), +- (int)mpp_frame_get_fmt(mppframe)); ++ if ((ret = frame_create_buf(frame, (uint8_t *)desc, sizeof(*desc), ++ rkmpp_free_drm_desc, desc, AV_BUFFER_FLAG_READONLY)) < 0) ++ return ret; + +- avctx->width = mpp_frame_get_width(mppframe); +- avctx->height = mpp_frame_get_height(mppframe); ++ frame->data[0] = (uint8_t *)desc; + +- decoder->mpi->control(decoder->ctx, MPP_DEC_SET_INFO_CHANGE_READY, NULL); ++ frame->hw_frames_ctx = av_buffer_ref(r->hwframe); ++ if (!frame->hw_frames_ctx) ++ return AVERROR(ENOMEM); + +- av_buffer_unref(&decoder->frames_ref); ++ if ((ret = ff_decode_frame_props(avctx, frame)) < 0) ++ return ret; + +- decoder->frames_ref = av_hwframe_ctx_alloc(decoder->device_ref); +- if (!decoder->frames_ref) { +- ret = AVERROR(ENOMEM); +- goto fail; +- } ++ frame->width = avctx->width; ++ frame->height = avctx->height; ++ frame->pts = MPP_PTS_TO_PTS(mpp_frame_get_pts(mpp_frame), avctx->pkt_timebase); ++ ++ mpp_frame_mode = mpp_frame_get_mode(mpp_frame); ++ if ((mpp_frame_mode & MPP_FRAME_FLAG_FIELD_ORDER_MASK) == MPP_FRAME_FLAG_DEINTERLACED) ++ frame->flags |= AV_FRAME_FLAG_INTERLACED; ++ if ((mpp_frame_mode & MPP_FRAME_FLAG_FIELD_ORDER_MASK) == MPP_FRAME_FLAG_TOP_FIRST) ++ frame->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST; ++ ++ if (avctx->codec_id == AV_CODEC_ID_MPEG1VIDEO || ++ avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO) { ++ MppFrameRational sar = mpp_frame_get_sar(mpp_frame); ++ frame->sample_aspect_ratio = av_div_q((AVRational) { sar.num, sar.den }, ++ (AVRational) { frame->width, frame->height }); ++ } ++ ++ if (avctx->codec_id == AV_CODEC_ID_HEVC && ++ (frame->color_trc == AVCOL_TRC_SMPTE2084 || ++ frame->color_trc == AVCOL_TRC_ARIB_STD_B67)) { ++ ret = rkmpp_export_mastering_display(avctx, frame, mpp_frame_get_mastering_display(mpp_frame)); ++ if (ret < 0) ++ return ret; ++ ret = rkmpp_export_content_light(frame, mpp_frame_get_content_light(mpp_frame)); ++ if (ret < 0) ++ return ret; ++ } + +- mppformat = mpp_frame_get_fmt(mppframe); +- drmformat = rkmpp_get_frameformat(mppformat); ++ return 0; ++} + +- hwframes = (AVHWFramesContext*)decoder->frames_ref->data; +- hwframes->format = AV_PIX_FMT_DRM_PRIME; +- hwframes->sw_format = drmformat == DRM_FORMAT_NV12 ? AV_PIX_FMT_NV12 : AV_PIX_FMT_NONE; +- hwframes->width = avctx->width; +- hwframes->height = avctx->height; +- ret = av_hwframe_ctx_init(decoder->frames_ref); +- if (ret < 0) +- goto fail; ++static void rkmpp_export_avctx_color_props(AVCodecContext *avctx, MppFrame mpp_frame) ++{ ++ int val; + +- // here decoder is fully initialized, we need to feed it again with data +- ret = AVERROR(EAGAIN); +- goto fail; +- } else if (mpp_frame_get_eos(mppframe)) { +- av_log(avctx, AV_LOG_DEBUG, "Received a EOS frame.\n"); +- decoder->eos_reached = 1; +- ret = AVERROR_EOF; +- goto fail; +- } else if (mpp_frame_get_discard(mppframe)) { +- av_log(avctx, AV_LOG_DEBUG, "Received a discard frame.\n"); +- ret = AVERROR(EAGAIN); +- goto fail; +- } else if (mpp_frame_get_errinfo(mppframe)) { +- av_log(avctx, AV_LOG_ERROR, "Received a errinfo frame.\n"); +- ret = AVERROR_UNKNOWN; +- goto fail; +- } ++ if (!avctx || !mpp_frame) ++ return; + +- // here we should have a valid frame +- av_log(avctx, AV_LOG_DEBUG, "Received a frame.\n"); ++ if (avctx->color_primaries == AVCOL_PRI_RESERVED0) ++ avctx->color_primaries = AVCOL_PRI_UNSPECIFIED; ++ if ((val = mpp_frame_get_color_primaries(mpp_frame)) && ++ val != MPP_FRAME_PRI_RESERVED0 && ++ val != MPP_FRAME_PRI_UNSPECIFIED) ++ avctx->color_primaries = val; + +- // setup general frame fields +- frame->format = AV_PIX_FMT_DRM_PRIME; +- frame->width = mpp_frame_get_width(mppframe); +- frame->height = mpp_frame_get_height(mppframe); +- frame->pts = mpp_frame_get_pts(mppframe); +- frame->color_range = mpp_frame_get_color_range(mppframe); +- frame->color_primaries = mpp_frame_get_color_primaries(mppframe); +- frame->color_trc = mpp_frame_get_color_trc(mppframe); +- frame->colorspace = mpp_frame_get_colorspace(mppframe); +- +- mode = mpp_frame_get_mode(mppframe); +- if ((mode & MPP_FRAME_FLAG_FIELD_ORDER_MASK) == MPP_FRAME_FLAG_DEINTERLACED) +- frame->flags |= AV_FRAME_FLAG_INTERLACED; +- if ((mode & MPP_FRAME_FLAG_FIELD_ORDER_MASK) == MPP_FRAME_FLAG_TOP_FIRST) +- frame->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST; +- +- mppformat = mpp_frame_get_fmt(mppframe); +- drmformat = rkmpp_get_frameformat(mppformat); +- +- // now setup the frame buffer info +- buffer = mpp_frame_get_buffer(mppframe); +- if (buffer) { +- desc = av_mallocz(sizeof(AVDRMFrameDescriptor)); +- if (!desc) { +- ret = AVERROR(ENOMEM); +- goto fail; +- } ++ if (avctx->color_trc == AVCOL_TRC_RESERVED0) ++ avctx->color_trc = AVCOL_TRC_UNSPECIFIED; ++ if ((val = mpp_frame_get_color_trc(mpp_frame)) && ++ val != MPP_FRAME_TRC_RESERVED0 && ++ val != MPP_FRAME_TRC_UNSPECIFIED) ++ avctx->color_trc = val; + +- desc->nb_objects = 1; +- desc->objects[0].fd = mpp_buffer_get_fd(buffer); +- desc->objects[0].size = mpp_buffer_get_size(buffer); +- +- desc->nb_layers = 1; +- layer = &desc->layers[0]; +- layer->format = drmformat; +- layer->nb_planes = 2; +- +- layer->planes[0].object_index = 0; +- layer->planes[0].offset = 0; +- layer->planes[0].pitch = mpp_frame_get_hor_stride(mppframe); +- +- layer->planes[1].object_index = 0; +- layer->planes[1].offset = layer->planes[0].pitch * mpp_frame_get_ver_stride(mppframe); +- layer->planes[1].pitch = layer->planes[0].pitch; +- +- // we also allocate a struct in buf[0] that will allow to hold additionnal information +- // for releasing properly MPP frames and decoder +- framecontextref = av_buffer_allocz(sizeof(*framecontext)); +- if (!framecontextref) { +- ret = AVERROR(ENOMEM); +- goto fail; +- } ++ if (avctx->colorspace == AVCOL_SPC_RESERVED) ++ avctx->colorspace = AVCOL_SPC_UNSPECIFIED; ++ if ((val = mpp_frame_get_colorspace(mpp_frame)) && ++ val != MPP_FRAME_SPC_RESERVED && ++ val != MPP_FRAME_SPC_UNSPECIFIED) ++ avctx->colorspace = val; + +- // MPP decoder needs to be closed only when all frames have been released. +- framecontext = (RKMPPFrameContext *)framecontextref->data; +- framecontext->decoder_ref = av_buffer_ref(rk_context->decoder_ref); +- framecontext->frame = mppframe; +- +- frame->data[0] = (uint8_t *)desc; +- frame->buf[0] = av_buffer_create((uint8_t *)desc, sizeof(*desc), rkmpp_release_frame, +- framecontextref, AV_BUFFER_FLAG_READONLY); ++ if ((val = mpp_frame_get_color_range(mpp_frame)) > MPP_FRAME_RANGE_UNSPECIFIED) ++ avctx->color_range = val; + +- if (!frame->buf[0]) { +- ret = AVERROR(ENOMEM); +- goto fail; +- } ++ if ((val = mpp_frame_get_chroma_location(mpp_frame)) > MPP_CHROMA_LOC_UNSPECIFIED) ++ avctx->chroma_sample_location = val; ++} + +- frame->hw_frames_ctx = av_buffer_ref(decoder->frames_ref); +- if (!frame->hw_frames_ctx) { +- ret = AVERROR(ENOMEM); +- goto fail; +- } ++static int rkmpp_get_frame(AVCodecContext *avctx, AVFrame *frame, int timeout) ++{ ++ RKMPPDecContext *r = avctx->priv_data; ++ MppFrame mpp_frame = NULL; ++ int ret; + +- return 0; +- } else { +- av_log(avctx, AV_LOG_ERROR, "Failed to retrieve the frame buffer, frame is dropped (code = %d)\n", ret); +- mpp_frame_deinit(&mppframe); +- } +- } else if (decoder->eos_reached) { ++ /* should not provide any frame after EOS */ ++ if (r->eof) + return AVERROR_EOF; +- } else if (ret == MPP_ERR_TIMEOUT) { +- av_log(avctx, AV_LOG_DEBUG, "Timeout when trying to get a frame from MPP\n"); ++ ++ if ((ret = r->mapi->control(r->mctx, MPP_SET_OUTPUT_TIMEOUT, (MppParam)&timeout)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set output timeout: %d\n", ret); ++ return AVERROR_EXTERNAL; + } + +- return AVERROR(EAGAIN); ++ ret = r->mapi->decode_get_frame(r->mctx, &mpp_frame); ++ if (ret != MPP_OK && ret != MPP_ERR_TIMEOUT) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to get frame: %d\n", ret); ++ return AVERROR_EXTERNAL; ++ } ++ if (!mpp_frame) { ++ if (timeout != MPP_TIMEOUT_NON_BLOCK) ++ av_log(avctx, AV_LOG_DEBUG, "Timeout getting decoded frame\n"); ++ return AVERROR(EAGAIN); ++ } ++ if (mpp_frame_get_eos(mpp_frame)) { ++ av_log(avctx, AV_LOG_DEBUG, "Received a 'EOS' frame\n"); ++ /* EOS frame may contain valid data */ ++ if (!mpp_frame_get_buffer(mpp_frame)) { ++ r->eof = 1; ++ ret = AVERROR_EOF; ++ goto exit; ++ } ++ } ++ if (mpp_frame_get_discard(mpp_frame)) { ++ av_log(avctx, AV_LOG_DEBUG, "Received a 'discard' frame\n"); ++ ret = AVERROR(EAGAIN); ++ goto exit; ++ } ++ if (mpp_frame_get_errinfo(mpp_frame)) { ++ av_log(avctx, AV_LOG_DEBUG, "Received a 'errinfo' frame\n"); ++ ret = (r->errinfo_cnt++ > MAX_ERRINFO_COUNT) ? AVERROR_EXTERNAL : AVERROR(EAGAIN); ++ goto exit; ++ } ++ ++ if (r->info_change = mpp_frame_get_info_change(mpp_frame)) { ++ int fast_parse = r->fast_parse; ++ int mpp_frame_mode = mpp_frame_get_mode(mpp_frame); ++ const MppFrameFormat mpp_fmt = mpp_frame_get_fmt(mpp_frame); ++ enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_DRM_PRIME, ++ AV_PIX_FMT_NONE, ++ AV_PIX_FMT_NONE }; ++ ++ av_log(avctx, AV_LOG_VERBOSE, "Noticed an info change\n"); ++ ++ if (r->afbc && !(mpp_fmt & MPP_FRAME_FBC_MASK)) { ++ av_log(avctx, AV_LOG_VERBOSE, "AFBC is requested but not supported\n"); ++ r->afbc = 0; ++ } + +-fail: +- if (mppframe) +- mpp_frame_deinit(&mppframe); ++ pix_fmts[1] = rkmpp_get_av_format(mpp_fmt & MPP_FRAME_FMT_MASK); + +- if (framecontext) +- av_buffer_unref(&framecontext->decoder_ref); ++ if (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) ++ avctx->sw_pix_fmt = pix_fmts[1]; ++ else { ++ if ((ret = ff_get_format(avctx, pix_fmts)) < 0) ++ goto exit; ++ avctx->pix_fmt = ret; ++ } + +- if (framecontextref) +- av_buffer_unref(&framecontextref); ++ avctx->width = mpp_frame_get_width(mpp_frame); ++ avctx->height = mpp_frame_get_height(mpp_frame); ++ avctx->coded_width = FFALIGN(avctx->width, 64); ++ avctx->coded_height = FFALIGN(avctx->height, 64); ++ rkmpp_export_avctx_color_props(avctx, mpp_frame); ++ ++ av_log(avctx, AV_LOG_VERBOSE, "Configured with size: %dx%d | pix_fmt: %s | sw_pix_fmt: %s\n", ++ avctx->width, avctx->height, ++ av_get_pix_fmt_name(avctx->pix_fmt), ++ av_get_pix_fmt_name(avctx->sw_pix_fmt)); ++ ++ if ((ret = rkmpp_set_buffer_group(avctx, pix_fmts[1], avctx->width, avctx->height)) < 0) ++ goto exit; ++ ++ /* Disable fast parsing for the interlaced video */ ++ if (((mpp_frame_mode & MPP_FRAME_FLAG_FIELD_ORDER_MASK) == MPP_FRAME_FLAG_DEINTERLACED || ++ (mpp_frame_mode & MPP_FRAME_FLAG_FIELD_ORDER_MASK) == MPP_FRAME_FLAG_TOP_FIRST) && fast_parse) { ++ av_log(avctx, AV_LOG_VERBOSE, "Fast parsing is disabled for the interlaced video\n"); ++ fast_parse = 0; ++ } ++ if ((ret = r->mapi->control(r->mctx, MPP_DEC_SET_PARSER_FAST_MODE, &fast_parse)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set parser fast mode: %d\n", ret); ++ ret = AVERROR_EXTERNAL; ++ goto exit; ++ } + +- if (desc) +- av_free(desc); ++ if ((ret = r->mapi->control(r->mctx, MPP_DEC_SET_INFO_CHANGE_READY, NULL)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set info change ready: %d\n", ret); ++ ret = AVERROR_EXTERNAL; ++ goto exit; ++ } ++ goto exit; ++ } else { ++ av_log(avctx, AV_LOG_DEBUG, "Received a frame\n"); ++ r->errinfo_cnt = 0; ++ r->got_frame = 1; ++ ++ switch (avctx->pix_fmt) { ++ case AV_PIX_FMT_DRM_PRIME: ++ { ++ if ((ret = rkmpp_export_frame(avctx, frame, mpp_frame)) < 0) ++ goto exit; ++ return 0; ++ } ++ break; ++ case AV_PIX_FMT_NV12: ++ case AV_PIX_FMT_NV16: ++ case AV_PIX_FMT_NV15: ++ case AV_PIX_FMT_NV20: ++ { ++ AVFrame *tmp_frame = av_frame_alloc(); ++ if (!tmp_frame) { ++ ret = AVERROR(ENOMEM); ++ goto exit; ++ } ++ if ((ret = rkmpp_export_frame(avctx, tmp_frame, mpp_frame)) < 0) ++ goto exit; ++ ++ if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) { ++ av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed: %d\n", ret); ++ av_frame_free(&tmp_frame); ++ goto exit; ++ } ++ if ((ret = av_hwframe_transfer_data(frame, tmp_frame, 0)) < 0) { ++ av_log(avctx, AV_LOG_ERROR, "av_hwframe_transfer_data failed: %d\n", ret); ++ av_frame_free(&tmp_frame); ++ goto exit; ++ } ++ if ((ret = av_frame_copy_props(frame, tmp_frame)) < 0) { ++ av_log(avctx, AV_LOG_ERROR, "av_frame_copy_props failed: %d\n", ret); ++ av_frame_free(&tmp_frame); ++ goto exit; ++ } ++ av_frame_free(&tmp_frame); ++ return 0; ++ } ++ break; ++ default: ++ { ++ ret = AVERROR_BUG; ++ goto exit; ++ } ++ break; ++ } ++ } + ++exit: ++ if (mpp_frame) ++ mpp_frame_deinit(&mpp_frame); + return ret; + } + +-static int rkmpp_receive_frame(AVCodecContext *avctx, AVFrame *frame) ++static int rkmpp_send_eos(AVCodecContext *avctx) + { +- RKMPPDecodeContext *rk_context = avctx->priv_data; +- RKMPPDecoder *decoder = (RKMPPDecoder *)rk_context->decoder_ref->data; +- int ret = MPP_NOK; +- AVPacket pkt = {0}; +- RK_S32 usedslots, freeslots; +- +- if (!decoder->eos_reached) { +- // we get the available slots in decoder +- ret = decoder->mpi->control(decoder->ctx, MPP_DEC_GET_STREAM_COUNT, &usedslots); +- if (ret != MPP_OK) { +- av_log(avctx, AV_LOG_ERROR, "Failed to get decoder used slots (code = %d).\n", ret); +- return ret; +- } ++ RKMPPDecContext *r = avctx->priv_data; ++ MppPacket mpp_pkt = NULL; ++ int ret; + +- freeslots = INPUT_MAX_PACKETS - usedslots; +- if (freeslots > 0) { +- ret = ff_decode_get_packet(avctx, &pkt); +- if (ret < 0 && ret != AVERROR_EOF) { +- return ret; +- } ++ if ((ret = mpp_packet_init(&mpp_pkt, NULL, 0)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to init 'EOS' packet: %d\n", ret); ++ return AVERROR_EXTERNAL; ++ } ++ mpp_packet_set_eos(mpp_pkt); + +- ret = rkmpp_send_packet(avctx, &pkt); +- av_packet_unref(&pkt); ++ do { ++ ret = r->mapi->decode_put_packet(r->mctx, mpp_pkt); ++ } while (ret != MPP_OK); + +- if (ret < 0) { +- av_log(avctx, AV_LOG_ERROR, "Failed to send packet to decoder (code = %d)\n", ret); +- return ret; ++ r->draining = 1; ++ ++ mpp_packet_deinit(&mpp_pkt); ++ return 0; ++} ++ ++static int rkmpp_send_packet(AVCodecContext *avctx, AVPacket *pkt) ++{ ++ RKMPPDecContext *r = avctx->priv_data; ++ MppPacket mpp_pkt = NULL; ++ int64_t pts = PTS_TO_MPP_PTS(pkt->pts, avctx->pkt_timebase); ++ int ret; ++ ++ /* avoid sending new data after EOS */ ++ if (r->draining) ++ return AVERROR(EOF); ++ ++ /* do not skip non-key pkt until got any frame */ ++ if (r->got_frame && ++ avctx->skip_frame == AVDISCARD_NONKEY && ++ !(pkt->flags & AV_PKT_FLAG_KEY)) { ++ av_log(avctx, AV_LOG_TRACE, "Skip packet without key flag " ++ "at pts %"PRId64"\n", pkt->pts); ++ return 0; ++ } ++ ++ if ((ret = mpp_packet_init(&mpp_pkt, pkt->data, pkt->size)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to init packet: %d\n", ret); ++ return AVERROR_EXTERNAL; ++ } ++ mpp_packet_set_pts(mpp_pkt, pts); ++ ++ if ((ret = r->mapi->decode_put_packet(r->mctx, mpp_pkt)) != MPP_OK) { ++ av_log(avctx, AV_LOG_TRACE, "Decoder buffer is full\n"); ++ mpp_packet_deinit(&mpp_pkt); ++ return AVERROR(EAGAIN); ++ } ++ av_log(avctx, AV_LOG_DEBUG, "Wrote %d bytes to decoder\n", pkt->size); ++ ++ mpp_packet_deinit(&mpp_pkt); ++ return 0; ++} ++ ++static int rkmpp_decode_receive_frame(AVCodecContext *avctx, AVFrame *frame) ++{ ++ RKMPPDecContext *r = avctx->priv_data; ++ AVPacket *pkt = &r->last_pkt; ++ int ret; ++ ++ if (r->info_change && !r->buf_group) ++ return AVERROR_EOF; ++ ++ /* no more frames after EOS */ ++ if (r->eof) ++ return AVERROR_EOF; ++ ++ /* drain remain frames */ ++ if (r->draining) { ++ ret = rkmpp_get_frame(avctx, frame, MPP_TIMEOUT_BLOCK); ++ goto exit; ++ } ++ ++ while (1) { ++ if (!pkt->size) { ++ ret = ff_decode_get_packet(avctx, pkt); ++ if (ret == AVERROR_EOF) { ++ av_log(avctx, AV_LOG_DEBUG, "Decoder is at EOF\n"); ++ /* send EOS and start draining */ ++ rkmpp_send_eos(avctx); ++ ret = rkmpp_get_frame(avctx, frame, MPP_TIMEOUT_BLOCK); ++ goto exit; ++ } else if (ret == AVERROR(EAGAIN)) { ++ /* not blocking so that we can feed data ASAP */ ++ ret = rkmpp_get_frame(avctx, frame, MPP_TIMEOUT_NON_BLOCK); ++ goto exit; ++ } else if (ret < 0) { ++ av_log(avctx, AV_LOG_ERROR, "Decoder failed to get packet: %d\n", ret); ++ goto exit; ++ } ++ } else { ++ /* send pending data to decoder */ ++ ret = rkmpp_send_packet(avctx, pkt); ++ if (ret == AVERROR(EAGAIN)) { ++ /* some streams might need more packets to start returning frames */ ++ ret = rkmpp_get_frame(avctx, frame, 100); ++ if (ret != AVERROR(EAGAIN)) ++ goto exit; ++ } else if (ret < 0) { ++ av_log(avctx, AV_LOG_ERROR, "Decoder failed to send packet: %d\n", ret); ++ goto exit; ++ } else { ++ av_packet_unref(pkt); ++ pkt->size = 0; + } + } +- +- // make sure we keep decoder full +- if (freeslots > 1) +- return AVERROR(EAGAIN); + } + +- return rkmpp_retrieve_frame(avctx, frame); ++exit: ++ if (r->draining && ++ ret == AVERROR(EAGAIN)) ++ ret = AVERROR_EOF; ++ return ret; + } + +-static void rkmpp_flush(AVCodecContext *avctx) ++static void rkmpp_decode_flush(AVCodecContext *avctx) + { +- RKMPPDecodeContext *rk_context = avctx->priv_data; +- RKMPPDecoder *decoder = (RKMPPDecoder *)rk_context->decoder_ref->data; +- int ret = MPP_NOK; ++ RKMPPDecContext *r = avctx->priv_data; ++ int ret; ++ ++ av_log(avctx, AV_LOG_DEBUG, "Decoder flushing\n"); + +- av_log(avctx, AV_LOG_DEBUG, "Flush.\n"); ++ if ((ret = r->mapi->reset(r->mctx)) == MPP_OK) { ++ r->eof = 0; ++ r->draining = 0; ++ r->info_change = 0; ++ r->errinfo_cnt = 0; ++ r->got_frame = 0; + +- ret = decoder->mpi->reset(decoder->ctx); +- if (ret == MPP_OK) { +- decoder->first_packet = 1; ++ av_packet_unref(&r->last_pkt); + } else +- av_log(avctx, AV_LOG_ERROR, "Failed to reset MPI (code = %d)\n", ret); ++ av_log(avctx, AV_LOG_ERROR, "Failed to reset MPP context: %d\n", ret); + } + +-static const AVCodecHWConfigInternal *const rkmpp_hw_configs[] = { +- HW_CONFIG_INTERNAL(DRM_PRIME), +- NULL +-}; +- +-#define RKMPP_DEC_CLASS(NAME) \ +- static const AVClass rkmpp_##NAME##_dec_class = { \ +- .class_name = "rkmpp_" #NAME "_dec", \ +- .version = LIBAVUTIL_VERSION_INT, \ +- }; +- +-#define RKMPP_DEC(NAME, ID, BSFS) \ +- RKMPP_DEC_CLASS(NAME) \ +- const FFCodec ff_##NAME##_rkmpp_decoder = { \ +- .p.name = #NAME "_rkmpp", \ +- CODEC_LONG_NAME(#NAME " (rkmpp)"), \ +- .p.type = AVMEDIA_TYPE_VIDEO, \ +- .p.id = ID, \ +- .priv_data_size = sizeof(RKMPPDecodeContext), \ +- .init = rkmpp_init_decoder, \ +- .close = rkmpp_close_decoder, \ +- FF_CODEC_RECEIVE_FRAME_CB(rkmpp_receive_frame), \ +- .flush = rkmpp_flush, \ +- .p.priv_class = &rkmpp_##NAME##_dec_class, \ +- .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \ +- .hw_configs = rkmpp_hw_configs, \ +- .bsfs = BSFS, \ +- .p.wrapper_name = "rkmpp", \ +- .caps_internal = FF_CODEC_CAP_NOT_INIT_THREADSAFE, \ +- }; +- +-RKMPP_DEC(h264, AV_CODEC_ID_H264, "h264_mp4toannexb") +-RKMPP_DEC(hevc, AV_CODEC_ID_HEVC, "hevc_mp4toannexb") +-RKMPP_DEC(vp8, AV_CODEC_ID_VP8, NULL) +-RKMPP_DEC(vp9, AV_CODEC_ID_VP9, NULL) ++#if CONFIG_H263_RKMPP_DECODER ++DEFINE_RKMPP_DECODER(h263, H263, NULL) ++#endif ++#if CONFIG_H264_RKMPP_DECODER ++DEFINE_RKMPP_DECODER(h264, H264, "h264_mp4toannexb,dump_extra") ++#endif ++#if CONFIG_HEVC_RKMPP_DECODER ++DEFINE_RKMPP_DECODER(hevc, HEVC, "hevc_mp4toannexb,dump_extra") ++#endif ++#if CONFIG_VP8_RKMPP_DECODER ++DEFINE_RKMPP_DECODER(vp8, VP8, NULL) ++#endif ++#if CONFIG_VP9_RKMPP_DECODER ++DEFINE_RKMPP_DECODER(vp9, VP9, NULL) ++#endif ++#if CONFIG_AV1_RKMPP_DECODER ++DEFINE_RKMPP_DECODER(av1, AV1, NULL) ++#endif ++#if CONFIG_MPEG1_RKMPP_DECODER ++DEFINE_RKMPP_DECODER(mpeg1, MPEG1VIDEO, NULL) ++#endif ++#if CONFIG_MPEG2_RKMPP_DECODER ++DEFINE_RKMPP_DECODER(mpeg2, MPEG2VIDEO, NULL) ++#endif ++#if CONFIG_MPEG4_RKMPP_DECODER ++DEFINE_RKMPP_DECODER(mpeg4, MPEG4, "dump_extra,mpeg4_unpack_bframes") ++#endif +Index: FFmpeg/libavcodec/rkmppdec.h +=================================================================== +--- /dev/null ++++ libavcodec/rkmppdec.h +@@ -0,0 +1,144 @@ ++/* ++ * Copyright (c) 2017 Lionel CHAZALLON ++ * Copyright (c) 2023 Huseyin BIYIK ++ * Copyright (c) 2023 NyanMisaka ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/** ++ * @file ++ * Rockchip MPP (Media Process Platform) video decoder ++ */ ++ ++#ifndef AVCODEC_RKMPPDEC_H ++#define AVCODEC_RKMPPDEC_H ++ ++#include ++ ++#include "codec_internal.h" ++#include "decode.h" ++#include "hwconfig.h" ++#include "internal.h" ++ ++#include "libavutil/hwcontext_rkmpp.h" ++#include "libavutil/mastering_display_metadata.h" ++#include "libavutil/opt.h" ++#include "libavutil/pixdesc.h" ++ ++#define MAX_ERRINFO_COUNT 100 ++ ++typedef struct RKMPPDecContext { ++ AVClass *class; ++ ++ MppApi *mapi; ++ MppCtx mctx; ++ MppBufferGroup buf_group; ++ ++ AVBufferRef *hwdevice; ++ AVBufferRef *hwframe; ++ ++ AVPacket last_pkt; ++ int eof; ++ int draining; ++ int info_change; ++ int errinfo_cnt; ++ int got_frame; ++ ++ int deint; ++ int afbc; ++ int fast_parse; ++ int buf_mode; ++} RKMPPDecContext; ++ ++enum { ++ RKMPP_DEC_AFBC_OFF = 0, ++ RKMPP_DEC_AFBC_ON = 1, ++ RKMPP_DEC_AFBC_ON_RGA = 2, ++}; ++ ++enum { ++ RKMPP_DEC_HALF_INTERNAL = 0, ++ RKMPP_DEC_PURE_EXTERNAL = 1, ++}; ++ ++static const AVRational mpp_tb = { 1, 1000000 }; ++ ++#define PTS_TO_MPP_PTS(pts, pts_tb) ((pts_tb.num && pts_tb.den) ? \ ++ av_rescale_q(pts, pts_tb, mpp_tb) : pts) ++ ++#define MPP_PTS_TO_PTS(mpp_pts, pts_tb) ((pts_tb.num && pts_tb.den) ? \ ++ av_rescale_q(mpp_pts, mpp_tb, pts_tb) : mpp_pts) ++ ++#define OFFSET(x) offsetof(RKMPPDecContext, x) ++#define VD (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) ++ ++static const AVOption options[] = { ++ { "deint", "Enable IEP (Image Enhancement Processor) for de-interlacing", OFFSET(deint), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VD }, ++ { "afbc", "Enable AFBC (Arm Frame Buffer Compression) to save bandwidth", OFFSET(afbc), AV_OPT_TYPE_INT, { .i64 = RKMPP_DEC_AFBC_OFF }, 0, 2, VD, .unit = "afbc" }, ++ { "off", "Disable AFBC support", 0, AV_OPT_TYPE_CONST, { .i64 = RKMPP_DEC_AFBC_OFF }, 0, 0, VD, .unit = "afbc" }, ++ { "on", "Enable AFBC support", 0, AV_OPT_TYPE_CONST, { .i64 = RKMPP_DEC_AFBC_ON }, 0, 0, VD, .unit = "afbc" }, ++ { "rga", "Enable AFBC if capable RGA is available", 0, AV_OPT_TYPE_CONST, { .i64 = RKMPP_DEC_AFBC_ON_RGA }, 0, 0, VD, .unit = "afbc" }, ++ { "fast_parse", "Enable fast parsing to improve decoding parallelism", OFFSET(fast_parse), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VD }, ++ { "buf_mode", "Set the buffer mode for MPP decoder", OFFSET(buf_mode), AV_OPT_TYPE_INT, { .i64 = RKMPP_DEC_HALF_INTERNAL }, 0, 1, VD, .unit = "buf_mode" }, ++ { "half", "Half internal mode", 0, AV_OPT_TYPE_CONST, { .i64 = RKMPP_DEC_HALF_INTERNAL }, 0, 0, VD, .unit = "buf_mode" }, ++ { "ext", "Pure external mode", 0, AV_OPT_TYPE_CONST, { .i64 = RKMPP_DEC_PURE_EXTERNAL }, 0, 0, VD, .unit = "buf_mode" }, ++ { NULL } ++}; ++ ++static const AVCodecHWConfigInternal *const rkmpp_dec_hw_configs[] = { ++ &(const AVCodecHWConfigInternal) { ++ .public = { ++ .pix_fmt = AV_PIX_FMT_DRM_PRIME, ++ .methods = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX | ++ AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX | ++ AV_CODEC_HW_CONFIG_METHOD_INTERNAL, ++ .device_type = AV_HWDEVICE_TYPE_RKMPP, ++ }, ++ .hwaccel = NULL, ++ }, ++ NULL ++}; ++ ++#define DEFINE_RKMPP_DECODER(x, X, bsf_name) \ ++static const AVClass x##_rkmpp_decoder_class = { \ ++ .class_name = #x "_rkmpp_decoder", \ ++ .item_name = av_default_item_name, \ ++ .option = options, \ ++ .version = LIBAVUTIL_VERSION_INT, \ ++}; \ ++const FFCodec ff_##x##_rkmpp_decoder = { \ ++ .p.name = #x "_rkmpp", \ ++ CODEC_LONG_NAME("Rockchip MPP (Media Process Platform) " #X " decoder"), \ ++ .p.type = AVMEDIA_TYPE_VIDEO, \ ++ .p.id = AV_CODEC_ID_##X, \ ++ .priv_data_size = sizeof(RKMPPDecContext), \ ++ .p.priv_class = &x##_rkmpp_decoder_class, \ ++ .init = rkmpp_decode_init, \ ++ .close = rkmpp_decode_close, \ ++ FF_CODEC_RECEIVE_FRAME_CB(rkmpp_decode_receive_frame), \ ++ .flush = rkmpp_decode_flush, \ ++ .bsfs = bsf_name, \ ++ .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | \ ++ AV_CODEC_CAP_HARDWARE, \ ++ .caps_internal = FF_CODEC_CAP_NOT_INIT_THREADSAFE | \ ++ FF_CODEC_CAP_SETS_FRAME_PROPS, \ ++ .hw_configs = rkmpp_dec_hw_configs, \ ++ .p.wrapper_name = "rkmpp", \ ++}; ++ ++#endif /* AVCODEC_RKMPPDEC_H */ +Index: FFmpeg/libavcodec/rkmppenc.c +=================================================================== +--- /dev/null ++++ libavcodec/rkmppenc.c +@@ -0,0 +1,1103 @@ ++/* ++ * Copyright (c) 2023 Huseyin BIYIK ++ * Copyright (c) 2023 NyanMisaka ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/** ++ * @file ++ * Rockchip MPP (Media Process Platform) video encoder ++ */ ++ ++#include "config_components.h" ++#include "rkmppenc.h" ++ ++static MppCodingType rkmpp_get_coding_type(AVCodecContext *avctx) ++{ ++ switch (avctx->codec_id) { ++ case AV_CODEC_ID_H264: return MPP_VIDEO_CodingAVC; ++ case AV_CODEC_ID_HEVC: return MPP_VIDEO_CodingHEVC; ++ case AV_CODEC_ID_MJPEG: return MPP_VIDEO_CodingMJPEG; ++ default: return MPP_VIDEO_CodingUnused; ++ } ++} ++ ++static MppFrameFormat rkmpp_get_mpp_fmt_h26x(enum AVPixelFormat pix_fmt) ++{ ++ switch (pix_fmt) { ++ case AV_PIX_FMT_GRAY8: return MPP_FMT_YUV400; ++ case AV_PIX_FMT_YUV420P: return MPP_FMT_YUV420P; ++ case AV_PIX_FMT_YUV422P: return MPP_FMT_YUV422P; ++ case AV_PIX_FMT_YUV444P: return MPP_FMT_YUV444P; ++ case AV_PIX_FMT_NV12: return MPP_FMT_YUV420SP; ++ case AV_PIX_FMT_NV21: return MPP_FMT_YUV420SP_VU; ++ case AV_PIX_FMT_NV16: return MPP_FMT_YUV422SP; ++ case AV_PIX_FMT_NV24: return MPP_FMT_YUV444SP; ++ case AV_PIX_FMT_YUYV422: return MPP_FMT_YUV422_YUYV; ++ case AV_PIX_FMT_YVYU422: return MPP_FMT_YUV422_YVYU; ++ case AV_PIX_FMT_UYVY422: return MPP_FMT_YUV422_UYVY; ++ case AV_PIX_FMT_RGB24: return MPP_FMT_RGB888; ++ case AV_PIX_FMT_BGR24: return MPP_FMT_BGR888; ++ case AV_PIX_FMT_RGBA: ++ case AV_PIX_FMT_RGB0: return MPP_FMT_RGBA8888; ++ case AV_PIX_FMT_BGRA: ++ case AV_PIX_FMT_BGR0: return MPP_FMT_BGRA8888; ++ case AV_PIX_FMT_ARGB: ++ case AV_PIX_FMT_0RGB: return MPP_FMT_ARGB8888; ++ case AV_PIX_FMT_ABGR: ++ case AV_PIX_FMT_0BGR: return MPP_FMT_ABGR8888; ++ default: return MPP_FMT_BUTT; ++ } ++} ++ ++static MppFrameFormat rkmpp_get_mpp_fmt_mjpeg(enum AVPixelFormat pix_fmt) ++{ ++ switch (pix_fmt) { ++ case AV_PIX_FMT_YUV420P: return MPP_FMT_YUV420P; ++ case AV_PIX_FMT_NV12: return MPP_FMT_YUV420SP; ++ case AV_PIX_FMT_YUYV422: return MPP_FMT_YUV422_YUYV; ++ case AV_PIX_FMT_UYVY422: return MPP_FMT_YUV422_UYVY; ++ case AV_PIX_FMT_RGB444BE: return MPP_FMT_RGB444; ++ case AV_PIX_FMT_BGR444BE: return MPP_FMT_BGR444; ++ case AV_PIX_FMT_RGB555BE: return MPP_FMT_RGB555; ++ case AV_PIX_FMT_BGR555BE: return MPP_FMT_BGR555; ++ case AV_PIX_FMT_RGB565BE: return MPP_FMT_RGB565; ++ case AV_PIX_FMT_BGR565BE: return MPP_FMT_BGR565; ++ case AV_PIX_FMT_RGBA: ++ case AV_PIX_FMT_RGB0: return MPP_FMT_RGBA8888; ++ case AV_PIX_FMT_BGRA: ++ case AV_PIX_FMT_BGR0: return MPP_FMT_BGRA8888; ++ case AV_PIX_FMT_ARGB: ++ case AV_PIX_FMT_0RGB: return MPP_FMT_ARGB8888; ++ case AV_PIX_FMT_ABGR: ++ case AV_PIX_FMT_0BGR: return MPP_FMT_ABGR8888; ++ case AV_PIX_FMT_X2RGB10BE: return MPP_FMT_RGB101010; ++ case AV_PIX_FMT_X2BGR10BE: return MPP_FMT_BGR101010; ++ default: return MPP_FMT_BUTT; ++ } ++} ++ ++static uint32_t rkmpp_get_drm_afbc_format(MppFrameFormat mpp_fmt) ++{ ++ switch (mpp_fmt & MPP_FRAME_FMT_MASK) { ++ case MPP_FMT_YUV420SP: return DRM_FORMAT_YUV420_8BIT; ++ case MPP_FMT_YUV422SP: return DRM_FORMAT_YUYV; ++ default: return DRM_FORMAT_INVALID; ++ } ++} ++ ++static int get_byte_stride(const AVDRMObjectDescriptor *object, ++ const AVDRMLayerDescriptor *layer, ++ int is_rgb, int is_planar, ++ int *hs, int *vs) ++{ ++ const AVDRMPlaneDescriptor *plane0, *plane1; ++ const int is_packed_fmt = is_rgb || (!is_rgb && !is_planar); ++ ++ if (!object || !layer || !hs || !vs) ++ return AVERROR(EINVAL); ++ ++ plane0 = &layer->planes[0]; ++ plane1 = &layer->planes[1]; ++ ++ *hs = plane0->pitch; ++ *vs = is_packed_fmt ? ++ ALIGN_DOWN(object->size / plane0->pitch, is_rgb ? 1 : 2) : ++ (plane1->offset / plane0->pitch); ++ ++ return (*hs > 0 && *vs > 0) ? 0 : AVERROR(EINVAL); ++} ++ ++static int get_afbc_byte_stride(const AVPixFmtDescriptor *desc, ++ int *stride, int reverse) ++{ ++ if (!desc || !stride || *stride <= 0) ++ return AVERROR(EINVAL); ++ ++ if (desc->nb_components == 1 || ++ (desc->flags & AV_PIX_FMT_FLAG_RGB) || ++ (!(desc->flags & AV_PIX_FMT_FLAG_RGB) && ++ !(desc->flags & AV_PIX_FMT_FLAG_PLANAR))) ++ return 0; ++ ++ if (desc->log2_chroma_w == 1 && desc->log2_chroma_h == 1) ++ *stride = reverse ? (*stride * 2 / 3) : (*stride * 3 / 2); ++ else if (desc->log2_chroma_w == 1 && !desc->log2_chroma_h) ++ *stride = reverse ? (*stride / 2) : (*stride * 2); ++ else if (!desc->log2_chroma_w && !desc->log2_chroma_h) ++ *stride = reverse ? (*stride / 3) : (*stride * 3); ++ else ++ return AVERROR(EINVAL); ++ ++ return (*stride > 0) ? 0 : AVERROR(EINVAL); ++} ++ ++static unsigned get_used_frame_count(MPPEncFrame *list) ++{ ++ unsigned count = 0; ++ ++ while (list) { ++ if (list->queued == 1 && ++ (list->frame || list->mpp_frame)) ++ ++count; ++ list = list->next; ++ } ++ ++ return count; ++} ++ ++static void clear_unused_frames(MPPEncFrame *list) ++{ ++ while (list) { ++ if (list->queued == 1) { ++ MppFrame mpp_frame = list->mpp_frame; ++ MppBuffer mpp_buf = NULL; ++ ++ if (mpp_frame) ++ mpp_buf = mpp_frame_get_buffer(mpp_frame); ++ ++ if (mpp_buf && ++ mpp_buffer_get_index(mpp_buf) < 0) { ++ mpp_buffer_put(mpp_buf); ++ ++ mpp_frame_deinit(&list->mpp_frame); ++ list->mpp_frame = NULL; ++ ++ av_frame_free(&list->frame); ++ list->queued = 0; ++ } ++ } ++ list = list->next; ++ } ++} ++ ++static void clear_frame_list(MPPEncFrame **list) ++{ ++ while (*list) { ++ MPPEncFrame *frame = NULL; ++ MppFrame mpp_frame = NULL; ++ MppBuffer mpp_buf = NULL; ++ ++ frame = *list; ++ *list = (*list)->next; ++ ++ mpp_frame = frame->mpp_frame; ++ if (mpp_frame) { ++ mpp_buf = mpp_frame_get_buffer(mpp_frame); ++ if (mpp_buf && ++ mpp_buffer_get_index(mpp_buf) >= 0) ++ mpp_buffer_put(mpp_buf); ++ ++ mpp_frame_deinit(&frame->mpp_frame); ++ frame->mpp_frame = NULL; ++ } ++ ++ av_frame_free(&frame->frame); ++ av_freep(&frame); ++ } ++} ++ ++static MPPEncFrame *get_free_frame(MPPEncFrame **list) ++{ ++ MPPEncFrame *out = *list; ++ ++ for (; out; out = out->next) { ++ if (!out->queued) { ++ out->queued = 1; ++ break; ++ } ++ } ++ ++ if (!out) { ++ out = av_mallocz(sizeof(*out)); ++ if (!out) { ++ av_log(NULL, AV_LOG_ERROR, "Cannot alloc new output frame\n"); ++ return NULL; ++ } ++ out->queued = 1; ++ out->next = *list; ++ *list = out; ++ } ++ ++ return out; ++} ++ ++static int rkmpp_set_enc_cfg_prep(AVCodecContext *avctx, AVFrame *frame) ++{ ++ RKMPPEncContext *r = avctx->priv_data; ++ MppEncCfg cfg = r->mcfg; ++ MppFrameFormat mpp_fmt = r->mpp_fmt; ++ int ret, is_afbc = 0; ++ int hor_stride = 0, ver_stride = 0; ++ const AVPixFmtDescriptor *pix_desc; ++ const AVDRMFrameDescriptor *drm_desc; ++ ++ if (r->cfg_init) ++ return 0; ++ ++ if (!frame) ++ return AVERROR(EINVAL); ++ ++ drm_desc = (AVDRMFrameDescriptor *)frame->data[0]; ++ if (drm_desc->objects[0].fd < 0) ++ return AVERROR(ENOMEM); ++ ++ pix_desc = av_pix_fmt_desc_get(r->pix_fmt); ++ is_afbc = drm_is_afbc(drm_desc->objects[0].format_modifier); ++ if (is_afbc && ++ !(avctx->codec_id == AV_CODEC_ID_H264 || ++ avctx->codec_id == AV_CODEC_ID_HEVC)) { ++ av_log(avctx, AV_LOG_ERROR, "AFBC is not supported in codec '%s'\n", ++ avcodec_get_name(avctx->codec_id)); ++ return AVERROR(ENOSYS); ++ } ++ if (!is_afbc) { ++ ret = get_byte_stride(&drm_desc->objects[0], ++ &drm_desc->layers[0], ++ (pix_desc->flags & AV_PIX_FMT_FLAG_RGB), ++ (pix_desc->flags & AV_PIX_FMT_FLAG_PLANAR), ++ &hor_stride, &ver_stride); ++ if (ret < 0 || !hor_stride || !ver_stride) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to get frame strides\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ mpp_enc_cfg_set_s32(cfg, "prep:hor_stride", hor_stride); ++ mpp_enc_cfg_set_s32(cfg, "prep:ver_stride", ver_stride); ++ } ++ ++ mpp_enc_cfg_set_s32(cfg, "prep:width", avctx->width); ++ mpp_enc_cfg_set_s32(cfg, "prep:height", avctx->height); ++ ++ mpp_enc_cfg_set_s32(cfg, "prep:colorspace", avctx->colorspace); ++ mpp_enc_cfg_set_s32(cfg, "prep:colorprim", avctx->color_primaries); ++ mpp_enc_cfg_set_s32(cfg, "prep:colortrc", avctx->color_trc); ++ mpp_enc_cfg_set_s32(cfg, "prep:colorrange", avctx->color_range); ++ ++ if (is_afbc) { ++ const AVDRMLayerDescriptor *layer = &drm_desc->layers[0]; ++ uint32_t drm_afbc_fmt = rkmpp_get_drm_afbc_format(mpp_fmt); ++ ++ if (drm_afbc_fmt != layer->format) { ++ av_log(avctx, AV_LOG_ERROR, "Input format '%s' with AFBC modifier is not supported\n", ++ av_get_pix_fmt_name(r->pix_fmt)); ++ return AVERROR(ENOSYS); ++ } ++ mpp_fmt |= MPP_FRAME_FBC_AFBC_V2; ++ } ++ mpp_enc_cfg_set_s32(cfg, "prep:format", mpp_fmt); ++ ++ if ((ret = r->mapi->control(r->mctx, MPP_ENC_SET_CFG, cfg)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set config with frame: %d\n", ret); ++ return AVERROR_EXTERNAL; ++ } ++ ++ r->cfg_init = 1; ++ av_log(avctx, AV_LOG_VERBOSE, "Configured with size: %dx%d | pix_fmt: %s | sw_pix_fmt: %s\n", ++ avctx->width, avctx->height, ++ av_get_pix_fmt_name(avctx->pix_fmt), av_get_pix_fmt_name(r->pix_fmt)); ++ ++ return 0; ++} ++ ++static int rkmpp_set_enc_cfg(AVCodecContext *avctx) ++{ ++ RKMPPEncContext *r = avctx->priv_data; ++ MppEncCfg cfg = r->mcfg; ++ ++ RK_U32 rc_mode, fps_num, fps_den; ++ MppEncHeaderMode header_mode; ++ MppEncSeiMode sei_mode; ++ int max_bps, min_bps; ++ int qp_init, qp_max, qp_min, qp_max_i, qp_min_i; ++ int ret; ++ ++ mpp_enc_cfg_set_s32(cfg, "prep:width", avctx->width); ++ mpp_enc_cfg_set_s32(cfg, "prep:height", avctx->height); ++ mpp_enc_cfg_set_s32(cfg, "prep:hor_stride", FFALIGN(avctx->width, 64)); ++ mpp_enc_cfg_set_s32(cfg, "prep:ver_stride", FFALIGN(avctx->height, 64)); ++ mpp_enc_cfg_set_s32(cfg, "prep:format", MPP_FMT_YUV420SP); ++ mpp_enc_cfg_set_s32(cfg, "prep:mirroring", 0); ++ mpp_enc_cfg_set_s32(cfg, "prep:rotation", 0); ++ mpp_enc_cfg_set_s32(cfg, "prep:flip", 0); ++ ++ if (avctx->framerate.den > 0 && avctx->framerate.num > 0) ++ av_reduce(&fps_num, &fps_den, avctx->framerate.num, avctx->framerate.den, 65535); ++ else ++ av_reduce(&fps_num, &fps_den, avctx->time_base.den, avctx->time_base.num, 65535); ++ ++ mpp_enc_cfg_set_s32(cfg, "rc:fps_in_flex", 0); ++ mpp_enc_cfg_set_s32(cfg, "rc:fps_in_num", fps_num); ++ mpp_enc_cfg_set_s32(cfg, "rc:fps_in_denom", fps_den); ++ mpp_enc_cfg_set_s32(cfg, "rc:fps_in_denorm", fps_den); ++ mpp_enc_cfg_set_s32(cfg, "rc:fps_out_flex", 0); ++ mpp_enc_cfg_set_s32(cfg, "rc:fps_out_num",fps_num); ++ mpp_enc_cfg_set_s32(cfg, "rc:fps_out_denom", fps_den); ++ mpp_enc_cfg_set_s32(cfg, "rc:fps_out_denorm", fps_den); ++ ++ mpp_enc_cfg_set_s32(cfg, "rc:gop", FFMAX(avctx->gop_size, 1)); ++ ++ rc_mode = avctx->codec_id == AV_CODEC_ID_MJPEG ? MPP_ENC_RC_MODE_FIXQP : r->rc_mode; ++ if (rc_mode == MPP_ENC_RC_MODE_BUTT) { ++ if (r->qp_init >= 0) ++ rc_mode = MPP_ENC_RC_MODE_FIXQP; ++ else if (avctx->rc_max_rate > 0) ++ rc_mode = MPP_ENC_RC_MODE_VBR; ++ else ++ rc_mode = MPP_ENC_RC_MODE_CBR; ++ } ++ ++ switch (rc_mode) { ++ case MPP_ENC_RC_MODE_VBR: ++ av_log(avctx, AV_LOG_VERBOSE, "Rate Control mode is set to VBR\n"); break; ++ case MPP_ENC_RC_MODE_CBR: ++ av_log(avctx, AV_LOG_VERBOSE, "Rate Control mode is set to CBR\n"); break; ++ case MPP_ENC_RC_MODE_FIXQP: ++ av_log(avctx, AV_LOG_VERBOSE, "Rate Control mode is set to CQP\n"); break; ++ case MPP_ENC_RC_MODE_AVBR: ++ av_log(avctx, AV_LOG_VERBOSE, "Rate Control mode is set to AVBR\n"); break; ++ } ++ mpp_enc_cfg_set_u32(cfg, "rc:mode", rc_mode); ++ ++ switch (rc_mode) { ++ case MPP_ENC_RC_MODE_FIXQP: ++ /* do not setup bitrate on FIXQP mode */ ++ break; ++ case MPP_ENC_RC_MODE_VBR: ++ case MPP_ENC_RC_MODE_AVBR: ++ /* VBR mode has wide bound */ ++ max_bps = (avctx->rc_max_rate > 0 && avctx->rc_max_rate >= avctx->bit_rate) ++ ? avctx->rc_max_rate : (avctx->bit_rate * 17 / 16); ++ min_bps = (avctx->rc_min_rate > 0 && avctx->rc_min_rate <= avctx->bit_rate) ++ ? avctx->rc_min_rate : (avctx->bit_rate * 1 / 16); ++ break; ++ case MPP_ENC_RC_MODE_CBR: ++ default: ++ /* CBR mode has narrow bound */ ++ max_bps = avctx->bit_rate * 17 / 16; ++ min_bps = avctx->bit_rate * 15 / 16; ++ break; ++ } ++ if (rc_mode == MPP_ENC_RC_MODE_CBR || ++ rc_mode == MPP_ENC_RC_MODE_VBR || ++ rc_mode == MPP_ENC_RC_MODE_AVBR) { ++ mpp_enc_cfg_set_u32(cfg, "rc:bps_target", avctx->bit_rate); ++ mpp_enc_cfg_set_s32(cfg, "rc:bps_max", max_bps); ++ mpp_enc_cfg_set_s32(cfg, "rc:bps_min", min_bps); ++ av_log(avctx, AV_LOG_VERBOSE, "Bitrate Target/Min/Max is set to %ld/%d/%d\n", ++ avctx->bit_rate, min_bps, max_bps); ++ } ++ ++ if (avctx->rc_buffer_size > 0 && ++ (rc_mode == MPP_ENC_RC_MODE_CBR || ++ rc_mode == MPP_ENC_RC_MODE_VBR || ++ rc_mode == MPP_ENC_RC_MODE_AVBR)) { ++ int stats_time_in_sec = avctx->rc_buffer_size / max_bps; ++ if (stats_time_in_sec > 0) { ++ mpp_enc_cfg_set_u32(cfg, "rc:stats_time", stats_time_in_sec); ++ av_log(avctx, AV_LOG_VERBOSE, "Stats time is set to %d\n", stats_time_in_sec); ++ } ++ } ++ ++ mpp_enc_cfg_set_u32(cfg, "rc:drop_mode", MPP_ENC_RC_DROP_FRM_DISABLED); ++ ++ switch (avctx->codec_id) { ++ case AV_CODEC_ID_H264: ++ case AV_CODEC_ID_HEVC: ++ { ++ switch (rc_mode) { ++ case MPP_ENC_RC_MODE_FIXQP: ++ qp_init = r->qp_init >= 0 ? r->qp_init : 26; ++ qp_max = qp_min = qp_max_i = qp_min_i = qp_init; ++ mpp_enc_cfg_set_s32(cfg, "rc:qp_ip", 0); ++ break; ++ case MPP_ENC_RC_MODE_CBR: ++ case MPP_ENC_RC_MODE_VBR: ++ case MPP_ENC_RC_MODE_AVBR: ++ qp_max = r->qp_max >= 0 ? r->qp_max : 48; ++ qp_min = FFMIN(r->qp_min >= 0 ? r->qp_min : 0, qp_max); ++ qp_max_i = r->qp_max_i >= 0 ? r->qp_max_i : 48; ++ qp_min_i = FFMIN(r->qp_min_i >= 0 ? r->qp_min_i : 0, qp_max_i); ++ qp_init = FFMIN3(r->qp_init >= 0 ? r->qp_init : 26, qp_max, qp_max_i); ++ mpp_enc_cfg_set_s32(cfg, "rc:qp_ip", 2); ++ break; ++ default: ++ return AVERROR(EINVAL); ++ } ++ mpp_enc_cfg_set_s32(cfg, "rc:qp_init", qp_init); ++ mpp_enc_cfg_set_s32(cfg, "rc:qp_max", qp_max); ++ mpp_enc_cfg_set_s32(cfg, "rc:qp_min", qp_min); ++ mpp_enc_cfg_set_s32(cfg, "rc:qp_max_i",qp_max_i); ++ mpp_enc_cfg_set_s32(cfg, "rc:qp_min_i", qp_min_i); ++ } ++ break; ++ case AV_CODEC_ID_MJPEG: ++ { ++ qp_init = r->qp_init >= 1 ? r->qp_init : 80; ++ qp_max = r->qp_max >= 1 ? r->qp_max : 99; ++ qp_min = r->qp_min >= 1 ? r->qp_min : 1; ++ qp_max_i = qp_min_i = 0; ++ /* jpeg use special codec config to control qtable */ ++ mpp_enc_cfg_set_s32(cfg, "jpeg:q_factor", qp_init); ++ mpp_enc_cfg_set_s32(cfg, "jpeg:qf_max", qp_max); ++ mpp_enc_cfg_set_s32(cfg, "jpeg:qf_min", qp_min); ++ } ++ break; ++ default: ++ return AVERROR(EINVAL); ++ } ++ ++ av_log(avctx, AV_LOG_VERBOSE, "QP Init/Max/Min/Max_I/Min_I is set to %d/%d/%d/%d/%d\n", ++ qp_init, qp_max, qp_min, qp_max_i, qp_min_i); ++ ++ switch (avctx->codec_id) { ++ case AV_CODEC_ID_H264: ++ { ++ avctx->profile = r->profile; ++ avctx->level = r->level; ++ mpp_enc_cfg_set_s32(cfg, "h264:profile", avctx->profile); ++ mpp_enc_cfg_set_s32(cfg, "h264:level", avctx->level); ++ mpp_enc_cfg_set_s32(cfg, "h264:cabac_en", r->coder); ++ mpp_enc_cfg_set_s32(cfg, "h264:cabac_idc", 0); ++ mpp_enc_cfg_set_s32(cfg, "h264:trans8x8", ++ (r->dct8x8 && avctx->profile == AV_PROFILE_H264_HIGH)); ++ ++ switch (avctx->profile) { ++ case AV_PROFILE_H264_BASELINE: ++ av_log(avctx, AV_LOG_VERBOSE, "Profile is set to BASELINE\n"); break; ++ case AV_PROFILE_H264_MAIN: ++ av_log(avctx, AV_LOG_VERBOSE, "Profile is set to MAIN\n"); break; ++ case AV_PROFILE_H264_HIGH: ++ av_log(avctx, AV_LOG_VERBOSE, "Profile is set to HIGH\n"); ++ if (r->dct8x8) ++ av_log(avctx, AV_LOG_VERBOSE, "8x8 Transform is enabled\n"); ++ break; ++ } ++ av_log(avctx, AV_LOG_VERBOSE, "Level is set to %d\n", avctx->level); ++ av_log(avctx, AV_LOG_VERBOSE, "Coder is set to %s\n", r->coder ? "CABAC" : "CAVLC"); ++ } ++ break; ++ case AV_CODEC_ID_HEVC: ++ { ++ avctx->profile = r->pix_fmt == AV_PIX_FMT_GRAY8 ++ ? AV_PROFILE_HEVC_REXT : AV_PROFILE_HEVC_MAIN; ++ avctx->level = r->level; ++ mpp_enc_cfg_set_s32(cfg, "h265:profile", avctx->profile); ++ mpp_enc_cfg_set_s32(cfg, "h265:level", avctx->level); ++ if (avctx->level >= 120) { ++ mpp_enc_cfg_set_s32(cfg, "h265:tier", r->tier); ++ av_log(avctx, AV_LOG_VERBOSE, "Tier is set to %d\n", r->tier); ++ } ++ ++ switch (avctx->profile) { ++ case AV_PROFILE_HEVC_MAIN: ++ av_log(avctx, AV_LOG_VERBOSE, "Profile is set to MAIN\n"); break; ++ case AV_PROFILE_HEVC_REXT: ++ av_log(avctx, AV_LOG_VERBOSE, "Profile is set to REXT\n"); break; ++ } ++ av_log(avctx, AV_LOG_VERBOSE, "Level is set to %d\n", avctx->level / 3); ++ } ++ break; ++ case AV_CODEC_ID_MJPEG: ++ break; ++ default: ++ return AVERROR(EINVAL); ++ } ++ ++ if ((ret = r->mapi->control(r->mctx, MPP_ENC_SET_CFG, cfg)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set config: %d\n", ret); ++ return AVERROR_EXTERNAL; ++ } ++ ++ if (avctx->codec_id == AV_CODEC_ID_H264 || ++ avctx->codec_id == AV_CODEC_ID_HEVC) { ++ sei_mode = MPP_ENC_SEI_MODE_DISABLE; ++ if ((ret = r->mapi->control(r->mctx, MPP_ENC_SET_SEI_CFG, &sei_mode)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set SEI config: %d\n", ret); ++ return AVERROR_EXTERNAL; ++ } ++ ++ header_mode = MPP_ENC_HEADER_MODE_EACH_IDR; ++ if ((ret = r->mapi->control(r->mctx, MPP_ENC_SET_HEADER_MODE, &header_mode)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set header mode: %d\n", ret); ++ return AVERROR_EXTERNAL; ++ } ++ } ++ ++ return 0; ++} ++ ++static MPPEncFrame *rkmpp_submit_frame(AVCodecContext *avctx, AVFrame *frame) ++{ ++ RKMPPEncContext *r = avctx->priv_data; ++ MppFrame mpp_frame = NULL; ++ MppBuffer mpp_buf = NULL; ++ AVFrame *drm_frame = NULL; ++ const AVDRMFrameDescriptor *drm_desc; ++ const AVDRMLayerDescriptor *layer; ++ const AVDRMPlaneDescriptor *plane0; ++ const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(r->pix_fmt); ++ const int is_planar = pix_desc->flags & AV_PIX_FMT_FLAG_PLANAR; ++ const int is_rgb = pix_desc->flags & AV_PIX_FMT_FLAG_RGB; ++ const int is_yuv = !is_rgb && pix_desc->nb_components >= 2; ++ int hor_stride = 0, ver_stride = 0; ++ MppBufferInfo buf_info = { 0 }; ++ MppFrameFormat mpp_fmt = r->mpp_fmt; ++ int ret, is_afbc = 0; ++ ++ MPPEncFrame *mpp_enc_frame = NULL; ++ ++ clear_unused_frames(r->frame_list); ++ ++ mpp_enc_frame = get_free_frame(&r->frame_list); ++ if (!mpp_enc_frame) ++ return NULL; ++ ++ if ((ret = mpp_frame_init(&mpp_frame)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to init MPP frame: %d\n", ret); ++ goto exit; ++ } ++ mpp_enc_frame->mpp_frame = mpp_frame; ++ ++ if (!frame) { ++ av_log(avctx, AV_LOG_DEBUG, "End of stream\n"); ++ mpp_frame_set_eos(mpp_frame, 1); ++ return mpp_enc_frame; ++ } ++ ++ if (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) { ++ drm_frame = frame; ++ mpp_enc_frame->frame = av_frame_clone(drm_frame); ++ } else { ++ drm_frame = av_frame_alloc(); ++ if (!drm_frame) { ++ goto exit; ++ } ++ if ((ret = av_hwframe_get_buffer(r->hwframe, drm_frame, 0)) < 0) { ++ av_log(avctx, AV_LOG_ERROR, "Cannot allocate an internal frame: %d\n", ret); ++ goto exit; ++ } ++ if ((ret = av_hwframe_transfer_data(drm_frame, frame, 0)) < 0) { ++ av_log(avctx, AV_LOG_ERROR, "av_hwframe_transfer_data failed: %d\n", ret); ++ goto exit; ++ } ++ if ((ret = av_frame_copy_props(drm_frame, frame)) < 0) { ++ av_log(avctx, AV_LOG_ERROR, "av_frame_copy_props failed: %d\n", ret); ++ goto exit; ++ } ++ mpp_enc_frame->frame = drm_frame; ++ } ++ ++ drm_desc = (AVDRMFrameDescriptor *)drm_frame->data[0]; ++ if (drm_desc->objects[0].fd < 0) ++ goto exit; ++ ++ /* planar YUV quirks */ ++ if ((r->pix_fmt == AV_PIX_FMT_YUV420P || ++ r->pix_fmt == AV_PIX_FMT_YUV422P || ++ r->pix_fmt == AV_PIX_FMT_NV24) && (drm_frame->width % 2)) { ++ av_log(avctx, AV_LOG_ERROR, "Unsupported width '%d', not 2-aligned\n", ++ drm_frame->width); ++ goto exit; ++ } ++ /* packed RGB/YUV quirks */ ++ if ((is_rgb || (is_yuv && !is_planar)) && ++ (drm_frame->width % 2 || drm_frame->height % 2)) { ++ av_log(avctx, AV_LOG_ERROR, "Unsupported size '%dx%d', not 2-aligned\n", ++ drm_frame->width, drm_frame->height); ++ goto exit; ++ } ++ ++ mpp_frame_set_pts(mpp_frame, PTS_TO_MPP_PTS(drm_frame->pts, avctx->time_base)); ++ mpp_frame_set_width(mpp_frame, drm_frame->width); ++ mpp_frame_set_height(mpp_frame, drm_frame->height); ++ ++ mpp_frame_set_colorspace(mpp_frame, avctx->colorspace); ++ mpp_frame_set_color_primaries(mpp_frame, avctx->color_primaries); ++ mpp_frame_set_color_trc(mpp_frame, avctx->color_trc); ++ mpp_frame_set_color_range(mpp_frame, avctx->color_range); ++ ++ layer = &drm_desc->layers[0]; ++ plane0 = &layer->planes[0]; ++ ++ is_afbc = drm_is_afbc(drm_desc->objects[0].format_modifier); ++ if (is_afbc && ++ !(avctx->codec_id == AV_CODEC_ID_H264 || ++ avctx->codec_id == AV_CODEC_ID_HEVC)) { ++ av_log(avctx, AV_LOG_ERROR, "AFBC is not supported in codec '%s'\n", ++ avcodec_get_name(avctx->codec_id)); ++ goto exit; ++ } ++ if (is_afbc) { ++ uint32_t drm_afbc_fmt = rkmpp_get_drm_afbc_format(mpp_fmt); ++ int afbc_offset_y = 0; ++ ++ if (drm_afbc_fmt != layer->format) { ++ av_log(avctx, AV_LOG_ERROR, "Input format '%s' with AFBC modifier is not supported\n", ++ av_get_pix_fmt_name(r->pix_fmt)); ++ goto exit; ++ } ++ mpp_fmt |= MPP_FRAME_FBC_AFBC_V2; ++ ++ if (drm_frame->crop_top > 0) { ++ afbc_offset_y = drm_frame->crop_top; ++ mpp_frame_set_offset_y(mpp_frame, afbc_offset_y); ++ } ++ } ++ mpp_frame_set_fmt(mpp_frame, mpp_fmt); ++ ++ if (is_afbc) { ++ hor_stride = plane0->pitch; ++ if ((ret = get_afbc_byte_stride(pix_desc, &hor_stride, 1)) < 0) ++ goto exit; ++ ++ if (hor_stride % 16) ++ hor_stride = FFALIGN(avctx->width, 16); ++ ++ mpp_frame_set_fbc_hdr_stride(mpp_frame, hor_stride); ++ } else { ++ ret = get_byte_stride(&drm_desc->objects[0], ++ &drm_desc->layers[0], ++ (pix_desc->flags & AV_PIX_FMT_FLAG_RGB), ++ (pix_desc->flags & AV_PIX_FMT_FLAG_PLANAR), ++ &hor_stride, &ver_stride); ++ if (ret < 0 || !hor_stride || !ver_stride) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to get frame strides\n"); ++ goto exit; ++ } ++ ++ mpp_frame_set_hor_stride(mpp_frame, hor_stride); ++ mpp_frame_set_ver_stride(mpp_frame, ver_stride); ++ } ++ ++ buf_info.type = MPP_BUFFER_TYPE_DRM; ++ buf_info.fd = drm_desc->objects[0].fd; ++ buf_info.size = drm_desc->objects[0].size; ++ ++ /* mark buffer as used (idx >= 0) */ ++ buf_info.index = buf_info.fd; ++ ++ if ((ret = mpp_buffer_import(&mpp_buf, &buf_info)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to import MPP buffer: %d\n", ret); ++ goto exit; ++ } ++ mpp_frame_set_buffer(mpp_frame, mpp_buf); ++ mpp_frame_set_buf_size(mpp_frame, drm_desc->objects[0].size); ++ ++ return mpp_enc_frame; ++ ++exit: ++ if (drm_frame && ++ avctx->pix_fmt != AV_PIX_FMT_DRM_PRIME) ++ av_frame_free(&drm_frame); ++ ++ return NULL; ++} ++ ++static int rkmpp_send_frame(AVCodecContext *avctx, MPPEncFrame *mpp_enc_frame) ++{ ++ RKMPPEncContext *r = avctx->priv_data; ++ AVFrame *frame = NULL; ++ MppFrame mpp_frame = NULL; ++ int ret; ++ ++ if (mpp_enc_frame) { ++ frame = mpp_enc_frame->frame; ++ mpp_frame = mpp_enc_frame->mpp_frame; ++ } ++ ++ if (frame && (ret = rkmpp_set_enc_cfg_prep(avctx, frame)) < 0) ++ goto exit; ++ ++ if ((avctx->codec_id == AV_CODEC_ID_H264 || ++ avctx->codec_id == AV_CODEC_ID_HEVC) && ++ frame && frame->pict_type == AV_PICTURE_TYPE_I) { ++ if ((ret = r->mapi->control(r->mctx, MPP_ENC_SET_IDR_FRAME, NULL)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set IDR frame: %d\n", ret); ++ ret = AVERROR_EXTERNAL; ++ goto exit; ++ } ++ } ++ ++ if ((ret = r->mapi->encode_put_frame(r->mctx, mpp_frame)) != MPP_OK) { ++ int log_level = (ret == MPP_NOK) ? AV_LOG_DEBUG : AV_LOG_ERROR; ++ ret = (ret == MPP_NOK) ? AVERROR(EAGAIN) : AVERROR_EXTERNAL; ++ av_log(avctx, log_level, "Failed to put frame to encoder input queue: %d\n", ret); ++ goto exit; ++ } else ++ av_log(avctx, AV_LOG_DEBUG, "Wrote %ld bytes to encoder\n", ++ mpp_frame_get_buf_size(mpp_frame)); ++ ++exit: ++ return ret; ++} ++ ++static void rkmpp_free_packet_buf(void *opaque, uint8_t *data) ++{ ++ MppPacket mpp_pkt = opaque; ++ mpp_packet_deinit(&mpp_pkt); ++} ++ ++static int rkmpp_get_packet(AVCodecContext *avctx, AVPacket *packet, int timeout) ++{ ++ RKMPPEncContext *r = avctx->priv_data; ++ MppPacket mpp_pkt = NULL; ++ MppMeta mpp_meta = NULL; ++ MppFrame mpp_frame = NULL; ++ MppBuffer mpp_buf = NULL; ++ int ret, key_frame = 0; ++ ++ if ((ret = r->mapi->control(r->mctx, MPP_SET_OUTPUT_TIMEOUT, (MppParam)&timeout)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set output timeout: %d\n", ret); ++ return AVERROR_EXTERNAL; ++ } ++ ++ if ((ret = r->mapi->encode_get_packet(r->mctx, &mpp_pkt)) != MPP_OK) { ++ int log_level = (ret == MPP_NOK) ? AV_LOG_DEBUG : AV_LOG_ERROR; ++ ret = (ret == MPP_NOK) ? AVERROR(EAGAIN) : AVERROR_EXTERNAL; ++ av_log(avctx, log_level, "Failed to get packet from encoder output queue: %d\n", ret); ++ return ret; ++ } ++ if (!mpp_pkt) ++ return AVERROR(ENOMEM); ++ ++ if (mpp_packet_get_eos(mpp_pkt)) { ++ av_log(avctx, AV_LOG_DEBUG, "Received an EOS packet\n"); ++ ret = AVERROR_EOF; ++ goto exit; ++ } ++ av_log(avctx, AV_LOG_DEBUG, "Received a packet\n"); ++ ++ packet->data = mpp_packet_get_data(mpp_pkt); ++ packet->size = mpp_packet_get_length(mpp_pkt); ++ packet->buf = av_buffer_create(packet->data, packet->size, rkmpp_free_packet_buf, ++ mpp_pkt, AV_BUFFER_FLAG_READONLY); ++ if (!packet->buf) { ++ ret = AVERROR(ENOMEM); ++ goto exit; ++ } ++ ++ packet->time_base.num = avctx->time_base.num; ++ packet->time_base.den = avctx->time_base.den; ++ packet->pts = MPP_PTS_TO_PTS(mpp_packet_get_pts(mpp_pkt), avctx->time_base); ++ packet->dts = packet->pts; ++ ++ mpp_meta = mpp_packet_get_meta(mpp_pkt); ++ if (!mpp_meta || !mpp_packet_has_meta(mpp_pkt)) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to get packet meta\n"); ++ ret = AVERROR_EXTERNAL; ++ goto exit; ++ } ++ ++ mpp_meta_get_s32(mpp_meta, KEY_OUTPUT_INTRA, &key_frame); ++ if (key_frame) ++ packet->flags |= AV_PKT_FLAG_KEY; ++ ++ if ((ret = mpp_meta_get_frame(mpp_meta, KEY_INPUT_FRAME, &mpp_frame)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to get key input frame from packet meta: %d\n", ret); ++ ret = AVERROR_EXTERNAL; ++ goto exit; ++ } ++ ++ mpp_buf = mpp_frame_get_buffer(mpp_frame); ++ if (!mpp_buf) ++ return AVERROR(ENOMEM); ++ ++ /* mark buffer as unused (idx < 0) */ ++ mpp_buffer_set_index(mpp_buf, -1); ++ clear_unused_frames(r->frame_list); ++ ++ return 0; ++ ++exit: ++ if (mpp_pkt) ++ mpp_packet_deinit(&mpp_pkt); ++ ++ return ret; ++} ++ ++static int rkmpp_encode_frame(AVCodecContext *avctx, AVPacket *packet, ++ const AVFrame *frame, int *got_packet) ++{ ++ RKMPPEncContext *r = avctx->priv_data; ++ MPPEncFrame *mpp_enc_frame = NULL; ++ int ret; ++ int timeout = (avctx->codec_id == AV_CODEC_ID_H264 || ++ avctx->codec_id == AV_CODEC_ID_HEVC || ++ avctx->codec_id == AV_CODEC_ID_MJPEG) && ++ !(avctx->flags & AV_CODEC_FLAG_LOW_DELAY) ++ ? MPP_TIMEOUT_NON_BLOCK : MPP_TIMEOUT_BLOCK; ++ ++ if (get_used_frame_count(r->frame_list) > r->async_frames) ++ goto get; ++ ++ mpp_enc_frame = rkmpp_submit_frame(avctx, (AVFrame *)frame); ++ if (!mpp_enc_frame) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to submit frame on input\n"); ++ return AVERROR(ENOMEM); ++ } ++ ++send: ++ ret = rkmpp_send_frame(avctx, mpp_enc_frame); ++ if (ret == AVERROR(EAGAIN)) ++ goto send; ++ else if (ret) ++ return ret; ++ ++get: ++ ret = rkmpp_get_packet(avctx, packet, timeout); ++ if (!frame && ret == AVERROR(EAGAIN)) ++ goto send; ++ if (ret == AVERROR_EOF || ++ ret == AVERROR(EAGAIN)) ++ *got_packet = 0; ++ else if (ret) ++ return ret; ++ else ++ *got_packet = 1; ++ ++ return 0; ++} ++ ++static int rkmpp_encode_close(AVCodecContext *avctx) ++{ ++ RKMPPEncContext *r = avctx->priv_data; ++ ++ r->cfg_init = 0; ++ r->async_frames = 0; ++ ++ if (r->mapi) { ++ r->mapi->reset(r->mctx); ++ mpp_destroy(r->mctx); ++ r->mctx = NULL; ++ } ++ ++ clear_frame_list(&r->frame_list); ++ ++ if (r->hwframe) ++ av_buffer_unref(&r->hwframe); ++ if (r->hwdevice) ++ av_buffer_unref(&r->hwdevice); ++ ++ return 0; ++} ++ ++static av_cold int init_hwframes_ctx(AVCodecContext *avctx) ++{ ++ RKMPPEncContext *r = avctx->priv_data; ++ AVHWFramesContext *hwfc; ++ int ret; ++ ++ av_buffer_unref(&r->hwframe); ++ r->hwframe = av_hwframe_ctx_alloc(r->hwdevice); ++ if (!r->hwframe) ++ return AVERROR(ENOMEM); ++ ++ hwfc = (AVHWFramesContext *)r->hwframe->data; ++ hwfc->format = AV_PIX_FMT_DRM_PRIME; ++ hwfc->sw_format = avctx->pix_fmt; ++ hwfc->width = avctx->width; ++ hwfc->height = avctx->height; ++ ++ ret = av_hwframe_ctx_init(r->hwframe); ++ if (ret < 0) { ++ av_buffer_unref(&r->hwframe); ++ av_log(avctx, AV_LOG_ERROR, "Error creating internal frames_ctx: %d\n", ret); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int rkmpp_encode_init(AVCodecContext *avctx) ++{ ++ RKMPPEncContext *r = avctx->priv_data; ++ enum AVPixelFormat pix_fmt = AV_PIX_FMT_NONE; ++ MppFrameFormat mpp_fmt = MPP_FMT_BUTT; ++ MppCodingType coding_type = MPP_VIDEO_CodingUnused; ++ MppPacket mpp_pkt = NULL; ++ int input_timeout = MPP_TIMEOUT_NON_BLOCK; ++ int output_timeout = MPP_TIMEOUT_NON_BLOCK; ++ int ret; ++ ++ r->cfg_init = 0; ++ r->async_frames = 0; ++ ++ if ((coding_type = rkmpp_get_coding_type(avctx)) == MPP_VIDEO_CodingUnused) { ++ av_log(avctx, AV_LOG_ERROR, "Unknown codec id: %d\n", avctx->codec_id); ++ return AVERROR(ENOSYS); ++ } ++ ++ pix_fmt = avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME ? avctx->sw_pix_fmt : avctx->pix_fmt; ++ mpp_fmt = avctx->codec_id == AV_CODEC_ID_MJPEG ++ ? rkmpp_get_mpp_fmt_mjpeg(pix_fmt) : rkmpp_get_mpp_fmt_h26x(pix_fmt); ++ mpp_fmt &= MPP_FRAME_FMT_MASK; ++ ++ if (mpp_fmt == MPP_FMT_BUTT) { ++ av_log(avctx, AV_LOG_ERROR, "Unsupported input pixel format '%s'\n", ++ av_get_pix_fmt_name(pix_fmt)); ++ return AVERROR(ENOSYS); ++ } ++ r->pix_fmt = pix_fmt; ++ r->mpp_fmt = mpp_fmt; ++ ++ if ((ret = mpp_check_support_format(MPP_CTX_ENC, coding_type)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "MPP doesn't support encoding codec '%s' (%d)\n", ++ avcodec_get_name(avctx->codec_id), avctx->codec_id); ++ return AVERROR(ENOSYS); ++ } ++ ++ if ((ret = mpp_create(&r->mctx, &r->mapi)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create MPP context and api: %d\n", ret); ++ ret = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ ++ if ((ret = r->mapi->control(r->mctx, MPP_SET_INPUT_TIMEOUT, ++ (MppParam)&input_timeout)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set input timeout: %d\n", ret); ++ return AVERROR_EXTERNAL; ++ } ++ ++ if ((ret = r->mapi->control(r->mctx, MPP_SET_OUTPUT_TIMEOUT, ++ (MppParam)&output_timeout)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set output timeout: %d\n", ret); ++ return AVERROR_EXTERNAL; ++ } ++ ++ if ((ret = mpp_init(r->mctx, MPP_CTX_ENC, coding_type)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to init MPP context: %d\n", ret); ++ ret = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ ++ if ((ret = mpp_enc_cfg_init(&r->mcfg)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to init encoder config: %d\n", ret); ++ ret = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ ++ if ((ret = r->mapi->control(r->mctx, MPP_ENC_GET_CFG, r->mcfg)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to get encoder config: %d\n", ret); ++ ret = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ ++ if ((ret = rkmpp_set_enc_cfg(avctx)) < 0) ++ goto fail; ++ ++ if (avctx->codec_id == AV_CODEC_ID_H264 || ++ avctx->codec_id == AV_CODEC_ID_HEVC) ++ r->async_frames = H26X_ASYNC_FRAMES; ++ else if (avctx->codec_id == AV_CODEC_ID_MJPEG) ++ r->async_frames = MJPEG_ASYNC_FRAMES; ++ ++ if (avctx->codec_id == AV_CODEC_ID_H264 || ++ avctx->codec_id == AV_CODEC_ID_HEVC) { ++ RK_U8 enc_hdr_buf[H26X_HEADER_SIZE]; ++ size_t pkt_len = 0; ++ void *pkt_pos = NULL; ++ ++ memset(enc_hdr_buf, 0, H26X_HEADER_SIZE); ++ ++ if ((ret = mpp_packet_init(&mpp_pkt, ++ (void *)enc_hdr_buf, ++ H26X_HEADER_SIZE)) != MPP_OK || !mpp_pkt) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to init extra info packet: %d\n", ret); ++ ret = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ ++ mpp_packet_set_length(mpp_pkt, 0); ++ if ((ret = r->mapi->control(r->mctx, MPP_ENC_GET_HDR_SYNC, mpp_pkt)) != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to get header sync: %d\n", ret); ++ ret = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ ++ pkt_pos = mpp_packet_get_pos(mpp_pkt); ++ pkt_len = mpp_packet_get_length(mpp_pkt); ++ ++ if (avctx->extradata) { ++ av_free(avctx->extradata); ++ avctx->extradata = NULL; ++ } ++ avctx->extradata = av_malloc(pkt_len + AV_INPUT_BUFFER_PADDING_SIZE); ++ if (!avctx->extradata) { ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ avctx->extradata_size = pkt_len + AV_INPUT_BUFFER_PADDING_SIZE; ++ memcpy(avctx->extradata, pkt_pos, pkt_len); ++ memset(avctx->extradata + pkt_len, 0, AV_INPUT_BUFFER_PADDING_SIZE); ++ mpp_packet_deinit(&mpp_pkt); ++ } ++ ++ if (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) ++ return 0; ++ ++ if (avctx->hw_frames_ctx || avctx->hw_device_ctx) { ++ AVBufferRef *device_ref = avctx->hw_device_ctx; ++ AVHWDeviceContext *device_ctx = NULL; ++ AVHWFramesContext *hwfc = NULL; ++ ++ if (avctx->hw_frames_ctx) { ++ hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data; ++ device_ref = hwfc->device_ref; ++ } ++ device_ctx = (AVHWDeviceContext *)device_ref->data; ++ ++ if (device_ctx && device_ctx->type == AV_HWDEVICE_TYPE_RKMPP) { ++ r->hwdevice = av_buffer_ref(device_ref); ++ if (r->hwdevice) ++ av_log(avctx, AV_LOG_VERBOSE, "Picked up an existing RKMPP hardware device\n"); ++ } ++ } ++ if (!r->hwdevice) { ++ if ((ret = av_hwdevice_ctx_create(&r->hwdevice, ++ AV_HWDEVICE_TYPE_RKMPP, ++ NULL, NULL, 0)) < 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create a RKMPP hardware device: %d\n", ret); ++ goto fail; ++ } ++ av_log(avctx, AV_LOG_VERBOSE, "Created a RKMPP hardware device\n"); ++ } ++ ++ ret = init_hwframes_ctx(avctx); ++ if (ret < 0) ++ goto fail; ++ ++ return 0; ++ ++fail: ++ if (mpp_pkt) ++ mpp_packet_deinit(&mpp_pkt); ++ ++ rkmpp_encode_close(avctx); ++ return ret; ++} ++ ++#if CONFIG_H264_RKMPP_ENCODER ++DEFINE_RKMPP_ENCODER(h264, H264, h26x) ++#endif ++#if CONFIG_HEVC_RKMPP_ENCODER ++DEFINE_RKMPP_ENCODER(hevc, HEVC, h26x) ++#endif ++#if CONFIG_MJPEG_RKMPP_ENCODER ++DEFINE_RKMPP_ENCODER(mjpeg, MJPEG, mjpeg) ++#endif +Index: FFmpeg/libavcodec/rkmppenc.h +=================================================================== +--- /dev/null ++++ libavcodec/rkmppenc.h +@@ -0,0 +1,276 @@ ++/* ++ * Copyright (c) 2023 Huseyin BIYIK ++ * Copyright (c) 2023 NyanMisaka ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/** ++ * @file ++ * Rockchip MPP (Media Process Platform) video encoder ++ */ ++ ++#ifndef AVCODEC_RKMPPENC_H ++#define AVCODEC_RKMPPENC_H ++ ++#include ++ ++#include "codec_internal.h" ++#include "encode.h" ++#include "hwconfig.h" ++#include "internal.h" ++ ++#include "libavutil/hwcontext_rkmpp.h" ++#include "libavutil/opt.h" ++#include "libavutil/pixdesc.h" ++ ++#define H26X_HEADER_SIZE 1024 ++#define H26X_ASYNC_FRAMES 4 ++#define MJPEG_ASYNC_FRAMES 8 ++#define ALIGN_DOWN(a, b) ((a) & ~((b)-1)) ++ ++typedef struct MPPEncFrame { ++ AVFrame *frame; ++ MppFrame mpp_frame; ++ struct MPPEncFrame *next; ++ int queued; ++} MPPEncFrame; ++ ++typedef struct RKMPPEncContext { ++ AVClass *class; ++ ++ MppApi *mapi; ++ MppCtx mctx; ++ ++ AVBufferRef *hwdevice; ++ AVBufferRef *hwframe; ++ ++ MppEncCfg mcfg; ++ int cfg_init; ++ MppFrameFormat mpp_fmt; ++ enum AVPixelFormat pix_fmt; ++ ++ MPPEncFrame *frame_list; ++ int async_frames; ++ ++ int rc_mode; ++ int qp_init; ++ int qp_max; ++ int qp_min; ++ int qp_max_i; ++ int qp_min_i; ++ int profile; ++ int tier; ++ int level; ++ int coder; ++ int dct8x8; ++} RKMPPEncContext; ++ ++static const AVRational mpp_tb = { 1, 1000000 }; ++ ++#define PTS_TO_MPP_PTS(pts, pts_tb) ((pts_tb.num && pts_tb.den) ? \ ++ av_rescale_q(pts, pts_tb, mpp_tb) : pts) ++ ++#define MPP_PTS_TO_PTS(mpp_pts, pts_tb) ((pts_tb.num && pts_tb.den) ? \ ++ av_rescale_q(mpp_pts, mpp_tb, pts_tb) : mpp_pts) ++ ++#define OFFSET(x) offsetof(RKMPPEncContext, x) ++#define VE (AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) ++ ++#define RKMPP_ENC_COMMON_OPTS \ ++ { "rc_mode", "Set the encoding rate control mode", OFFSET(rc_mode), AV_OPT_TYPE_INT, \ ++ { .i64 = MPP_ENC_RC_MODE_BUTT }, MPP_ENC_RC_MODE_VBR, MPP_ENC_RC_MODE_BUTT, VE, .unit = "rc_mode"}, \ ++ { "VBR", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MPP_ENC_RC_MODE_VBR }, 0, 0, VE, .unit = "rc_mode" }, \ ++ { "CBR", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MPP_ENC_RC_MODE_CBR }, 0, 0, VE, .unit = "rc_mode" }, \ ++ { "CQP", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MPP_ENC_RC_MODE_FIXQP }, 0, 0, VE, .unit = "rc_mode" }, \ ++ { "AVBR", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MPP_ENC_RC_MODE_AVBR }, 0, 0, VE, .unit = "rc_mode" }, \ ++ { "qp_init", "Set the initial QP value", OFFSET(qp_init), AV_OPT_TYPE_INT, \ ++ { .i64 = -1 }, -1, 51, VE, "qmin" }, \ ++ { "qp_max", "Set the max QP value for P and B frame", OFFSET(qp_max), AV_OPT_TYPE_INT, \ ++ { .i64 = -1 }, -1, 51, VE, "qp_max" }, \ ++ { "qp_min", "Set the min QP value for P and B frame", OFFSET(qp_min), AV_OPT_TYPE_INT, \ ++ { .i64 = -1 }, -1, 51, VE, "qp_min" }, \ ++ { "qp_max_i", "Set the max QP value for I frame", OFFSET(qp_max_i), AV_OPT_TYPE_INT, \ ++ { .i64 = -1 }, -1, 51, VE, "qp_max_i" }, \ ++ { "qp_min_i", "Set the min QP value for I frame", OFFSET(qp_min_i), AV_OPT_TYPE_INT, \ ++ { .i64 = -1 }, -1, 51, VE, "qp_min_i" }, \ ++ ++static const AVOption h264_options[] = { ++ RKMPP_ENC_COMMON_OPTS ++ { "profile", "Set the encoding profile restriction", OFFSET(profile), AV_OPT_TYPE_INT, ++ { .i64 = AV_PROFILE_H264_HIGH }, -1, AV_PROFILE_H264_HIGH, VE, .unit = "profile" }, ++ { "baseline", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_PROFILE_H264_BASELINE }, INT_MIN, INT_MAX, VE, .unit = "profile" }, ++ { "main", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_PROFILE_H264_MAIN }, INT_MIN, INT_MAX, VE, .unit = "profile" }, ++ { "high", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_PROFILE_H264_HIGH }, INT_MIN, INT_MAX, VE, .unit = "profile" }, ++ { "level", "Set the encoding level restriction", OFFSET(level), AV_OPT_TYPE_INT, ++ { .i64 = 0 }, FF_LEVEL_UNKNOWN, 62, VE, .unit = "level" }, ++ { "1", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 10 }, 0, 0, VE, .unit = "level" }, ++ { "1.1", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 11 }, 0, 0, VE, .unit = "level" }, ++ { "1.2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 12 }, 0, 0, VE, .unit = "level" }, ++ { "1.3", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 13 }, 0, 0, VE, .unit = "level" }, ++ { "2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 20 }, 0, 0, VE, .unit = "level" }, ++ { "2.1", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 21 }, 0, 0, VE, .unit = "level" }, ++ { "2.2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 22 }, 0, 0, VE, .unit = "level" }, ++ { "3", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 30 }, 0, 0, VE, .unit = "level" }, ++ { "3.1", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 31 }, 0, 0, VE, .unit = "level" }, ++ { "3.2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 32 }, 0, 0, VE, .unit = "level" }, ++ { "4", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 40 }, 0, 0, VE, .unit = "level" }, ++ { "4.1", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 41 }, 0, 0, VE, .unit = "level" }, ++ { "4.2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 42 }, 0, 0, VE, .unit = "level" }, ++ { "5", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 50 }, 0, 0, VE, .unit = "level" }, ++ { "5.1", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 51 }, 0, 0, VE, .unit = "level" }, ++ { "5.2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 52 }, 0, 0, VE, .unit = "level" }, ++ { "6", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 60 }, 0, 0, VE, .unit = "level" }, ++ { "6.1", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 61 }, 0, 0, VE, .unit = "level" }, ++ { "6.2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 62 }, 0, 0, VE, .unit = "level" }, ++ { "coder", "Set the entropy coder type (from 0 to 1) (default cabac)", OFFSET(coder), AV_OPT_TYPE_INT, ++ { .i64 = 1 }, 0, 1, VE, .unit = "coder" }, ++ { "cavlc", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, INT_MIN, INT_MAX, VE, .unit = "coder" }, ++ { "cabac", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, INT_MIN, INT_MAX, VE, .unit = "coder" }, ++ { "8x8dct", "Set the high profile 8x8 transform", OFFSET(dct8x8), AV_OPT_TYPE_BOOL, ++ { .i64 = 1 }, 0, 1, VE, "8x8dct" }, ++ { NULL } ++}; ++ ++static const AVOption hevc_options[] = { ++ RKMPP_ENC_COMMON_OPTS ++ { "profile", "Set the encoding profile restriction", OFFSET(profile), AV_OPT_TYPE_INT, ++ { .i64 = AV_PROFILE_HEVC_MAIN }, -1, AV_PROFILE_HEVC_MAIN, VE, .unit = "profile" }, ++ { "main", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_PROFILE_HEVC_MAIN }, INT_MIN, INT_MAX, VE, .unit = "profile" }, ++ { "tier", "Set the encoding profile tier restriction", OFFSET(tier), AV_OPT_TYPE_INT, ++ { .i64 = 1 }, 0, 1, VE, .unit = "tier" }, ++ { "main", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, INT_MIN, INT_MAX, VE, .unit = "tier" }, ++ { "high", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, INT_MIN, INT_MAX, VE, .unit = "tier" }, ++ { "level", "Set the encoding level restriction", OFFSET(level), AV_OPT_TYPE_INT, ++ { .i64 = 0 }, FF_LEVEL_UNKNOWN, 186, VE, .unit = "level" }, ++ { "1", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 30 }, 0, 0, VE, .unit = "level" }, ++ { "2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 60 }, 0, 0, VE, .unit = "level" }, ++ { "2.1", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 63 }, 0, 0, VE, .unit = "level" }, ++ { "3", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 90 }, 0, 0, VE, .unit = "level" }, ++ { "3.1", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 93 }, 0, 0, VE, .unit = "level" }, ++ { "4", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 120 }, 0, 0, VE, .unit = "level" }, ++ { "4.1", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 123 }, 0, 0, VE, .unit = "level" }, ++ { "5", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 150 }, 0, 0, VE, .unit = "level" }, ++ { "5.1", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 153 }, 0, 0, VE, .unit = "level" }, ++ { "5.2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 156 }, 0, 0, VE, .unit = "level" }, ++ { "6", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 180 }, 0, 0, VE, .unit = "level" }, ++ { "6.1", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 183 }, 0, 0, VE, .unit = "level" }, ++ { "6.2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 186 }, 0, 0, VE, .unit = "level" }, ++ { NULL } ++}; ++ ++static const AVOption mjpeg_options[] = { ++ { "qp_init", "Set the initial QP/Q_Factor value", OFFSET(qp_init), AV_OPT_TYPE_INT, \ ++ { .i64 = -1 }, -1, 99, VE, "qmin" }, \ ++ { "qp_max", "Set the max QP/Q_Factor value", OFFSET(qp_max), AV_OPT_TYPE_INT, \ ++ { .i64 = -1 }, -1, 99, VE, "qp_max" }, \ ++ { "qp_min", "Set the min QP/Q_Factor value", OFFSET(qp_min), AV_OPT_TYPE_INT, \ ++ { .i64 = -1 }, -1, 99, VE, "qp_min" }, \ ++ { NULL } ++}; ++ ++static const enum AVPixelFormat rkmpp_enc_pix_fmts_h26x[] = { ++ AV_PIX_FMT_GRAY8, ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_YUV422P, ++ AV_PIX_FMT_YUV444P, ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_NV21, ++ AV_PIX_FMT_NV16, ++ AV_PIX_FMT_NV24, ++ AV_PIX_FMT_YUYV422, ++ AV_PIX_FMT_YVYU422, ++ AV_PIX_FMT_UYVY422, ++ AV_PIX_FMT_RGB24, ++ AV_PIX_FMT_BGR24, ++ AV_PIX_FMT_RGBA, ++ AV_PIX_FMT_RGB0, ++ AV_PIX_FMT_BGRA, ++ AV_PIX_FMT_BGR0, ++ AV_PIX_FMT_ARGB, ++ AV_PIX_FMT_0RGB, ++ AV_PIX_FMT_ABGR, ++ AV_PIX_FMT_0BGR, ++ AV_PIX_FMT_DRM_PRIME, ++ AV_PIX_FMT_NONE, ++}; ++ ++static const enum AVPixelFormat rkmpp_enc_pix_fmts_mjpeg[] = { ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_YUYV422, ++ AV_PIX_FMT_UYVY422, ++ AV_PIX_FMT_RGB444BE, ++ AV_PIX_FMT_BGR444BE, ++ AV_PIX_FMT_RGB555BE, ++ AV_PIX_FMT_BGR555BE, ++ AV_PIX_FMT_RGB565BE, ++ AV_PIX_FMT_BGR565BE, ++ AV_PIX_FMT_RGBA, ++ AV_PIX_FMT_RGB0, ++ AV_PIX_FMT_BGRA, ++ AV_PIX_FMT_BGR0, ++ AV_PIX_FMT_ARGB, ++ AV_PIX_FMT_0RGB, ++ AV_PIX_FMT_ABGR, ++ AV_PIX_FMT_0BGR, ++ AV_PIX_FMT_X2RGB10BE, ++ AV_PIX_FMT_X2BGR10BE, ++ AV_PIX_FMT_DRM_PRIME, ++ AV_PIX_FMT_NONE, ++}; ++ ++static const AVCodecHWConfigInternal *const rkmpp_enc_hw_configs[] = { ++ HW_CONFIG_ENCODER_DEVICE(NONE, RKMPP), ++ HW_CONFIG_ENCODER_FRAMES(DRM_PRIME, RKMPP), ++ HW_CONFIG_ENCODER_FRAMES(DRM_PRIME, DRM), ++ NULL, ++}; ++ ++static const FFCodecDefault rkmpp_enc_defaults[] = { ++ { "b", "2M" }, ++ { "g", "250" }, ++ { NULL } ++}; ++ ++#define DEFINE_RKMPP_ENCODER(x, X, xx) \ ++static const AVClass x##_rkmpp_encoder_class = { \ ++ .class_name = #x "_rkmpp_encoder", \ ++ .item_name = av_default_item_name, \ ++ .option = x##_options, \ ++ .version = LIBAVUTIL_VERSION_INT, \ ++}; \ ++const FFCodec ff_##x##_rkmpp_encoder = { \ ++ .p.name = #x "_rkmpp", \ ++ CODEC_LONG_NAME("Rockchip MPP (Media Process Platform) " #X " encoder"), \ ++ .p.type = AVMEDIA_TYPE_VIDEO, \ ++ .p.id = AV_CODEC_ID_##X, \ ++ .priv_data_size = sizeof(RKMPPEncContext), \ ++ .p.priv_class = &x##_rkmpp_encoder_class, \ ++ .init = rkmpp_encode_init, \ ++ .close = rkmpp_encode_close, \ ++ FF_CODEC_ENCODE_CB(rkmpp_encode_frame), \ ++ .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE, \ ++ .caps_internal = FF_CODEC_CAP_NOT_INIT_THREADSAFE | \ ++ FF_CODEC_CAP_INIT_CLEANUP, \ ++ .p.pix_fmts = rkmpp_enc_pix_fmts_##xx, \ ++ .hw_configs = rkmpp_enc_hw_configs, \ ++ .defaults = rkmpp_enc_defaults, \ ++ .p.wrapper_name = "rkmpp", \ ++}; ++ ++#endif /* AVCODEC_RKMPPENC_H */ +Index: FFmpeg/libavfilter/Makefile +=================================================================== +--- libavfilter/Makefile ++++ libavfilter/Makefile +@@ -29,6 +29,7 @@ OBJS-$(HAVE_THREADS) + + # subsystems + OBJS-$(CONFIG_QSVVPP) += qsvvpp.o ++OBJS-$(CONFIG_RKRGA) += rkrga_common.o + OBJS-$(CONFIG_SCENE_SAD) += scene_sad.o + OBJS-$(CONFIG_DNN) += dnn_filter_common.o + include $(SRC_PATH)/libavfilter/dnn/Makefile +@@ -414,6 +415,7 @@ OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) + OBJS-$(CONFIG_OVERLAY_QSV_FILTER) += vf_overlay_qsv.o framesync.o + OBJS-$(CONFIG_OVERLAY_VAAPI_FILTER) += vf_overlay_vaapi.o framesync.o vaapi_vpp.o + OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER) += vf_overlay_vulkan.o vulkan.o vulkan_filter.o ++OBJS-$(CONFIG_OVERLAY_RKRGA_FILTER) += vf_overlay_rkrga.o framesync.o + OBJS-$(CONFIG_OWDENOISE_FILTER) += vf_owdenoise.o + OBJS-$(CONFIG_PAD_FILTER) += vf_pad.o + OBJS-$(CONFIG_PAD_OPENCL_FILTER) += vf_pad_opencl.o opencl.o opencl/pad.o +@@ -465,6 +467,7 @@ OBJS-$(CONFIG_SCALE_QSV_FILTER) + OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o + OBJS-$(CONFIG_SCALE_VT_FILTER) += vf_scale_vt.o scale_eval.o + OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vf_scale_vulkan.o vulkan.o vulkan_filter.o ++OBJS-$(CONFIG_SCALE_RKRGA_FILTER) += vf_vpp_rkrga.o scale_eval.o + OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o scale_eval.o + OBJS-$(CONFIG_SCALE2REF_NPP_FILTER) += vf_scale_npp.o scale_eval.o + OBJS-$(CONFIG_SCDET_FILTER) += vf_scdet.o +@@ -557,6 +560,7 @@ OBJS-$(CONFIG_VIF_FILTER) + OBJS-$(CONFIG_VIGNETTE_FILTER) += vf_vignette.o + OBJS-$(CONFIG_VMAFMOTION_FILTER) += vf_vmafmotion.o framesync.o + OBJS-$(CONFIG_VPP_QSV_FILTER) += vf_vpp_qsv.o ++OBJS-$(CONFIG_VPP_RKRGA_FILTER) += vf_vpp_rkrga.o scale_eval.o + OBJS-$(CONFIG_VSTACK_FILTER) += vf_stack.o framesync.o + OBJS-$(CONFIG_W3FDIF_FILTER) += vf_w3fdif.o + OBJS-$(CONFIG_WAVEFORM_FILTER) += vf_waveform.o +@@ -658,6 +662,7 @@ SKIPHEADERS-$(CONFIG_LCMS2) + SKIPHEADERS-$(CONFIG_LIBVIDSTAB) += vidstabutils.h + + SKIPHEADERS-$(CONFIG_QSVVPP) += qsvvpp.h stack_internal.h ++SKIPHEADERS-$(CONFIG_RKRGA) += rkrga_common.h + SKIPHEADERS-$(CONFIG_OPENCL) += opencl.h + SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_vpp.h stack_internal.h + SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_filter.h +Index: FFmpeg/libavfilter/allfilters.c +=================================================================== +--- libavfilter/allfilters.c ++++ libavfilter/allfilters.c +@@ -390,6 +390,7 @@ extern const AVFilter ff_vf_overlay_qsv; + extern const AVFilter ff_vf_overlay_vaapi; + extern const AVFilter ff_vf_overlay_vulkan; + extern const AVFilter ff_vf_overlay_cuda; ++extern const AVFilter ff_vf_overlay_rkrga; + extern const AVFilter ff_vf_owdenoise; + extern const AVFilter ff_vf_pad; + extern const AVFilter ff_vf_pad_opencl; +@@ -438,6 +439,7 @@ extern const AVFilter ff_vf_scale_qsv; + extern const AVFilter ff_vf_scale_vaapi; + extern const AVFilter ff_vf_scale_vt; + extern const AVFilter ff_vf_scale_vulkan; ++extern const AVFilter ff_vf_scale_rkrga; + extern const AVFilter ff_vf_scale2ref; + extern const AVFilter ff_vf_scale2ref_npp; + extern const AVFilter ff_vf_scdet; +@@ -525,6 +527,7 @@ extern const AVFilter ff_vf_vif; + extern const AVFilter ff_vf_vignette; + extern const AVFilter ff_vf_vmafmotion; + extern const AVFilter ff_vf_vpp_qsv; ++extern const AVFilter ff_vf_vpp_rkrga; + extern const AVFilter ff_vf_vstack; + extern const AVFilter ff_vf_w3fdif; + extern const AVFilter ff_vf_waveform; +Index: FFmpeg/libavfilter/rkrga_common.c +=================================================================== +--- /dev/null ++++ libavfilter/rkrga_common.c +@@ -0,0 +1,1249 @@ ++/* ++ * Copyright (c) 2023 NyanMisaka ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/** ++ * @file ++ * Rockchip RGA (2D Raster Graphic Acceleration) base function ++ */ ++ ++#include "libavutil/common.h" ++#include "libavutil/pixdesc.h" ++ ++#include "internal.h" ++#include "video.h" ++ ++#include "rkrga_common.h" ++ ++typedef struct RGAAsyncFrame { ++ RGAFrame *src; ++ RGAFrame *dst; ++ RGAFrame *pat; ++} RGAAsyncFrame; ++ ++typedef struct RGAFormatMap { ++ enum AVPixelFormat pix_fmt; ++ enum _Rga_SURF_FORMAT rga_fmt; ++} RGAFormatMap; ++ ++#define YUV_FORMATS \ ++ { AV_PIX_FMT_GRAY8, RK_FORMAT_YCbCr_400 }, /* RGA2 only */ \ ++ { AV_PIX_FMT_YUV420P, RK_FORMAT_YCbCr_420_P }, /* RGA2 only */ \ ++ { AV_PIX_FMT_YUV422P, RK_FORMAT_YCbCr_422_P }, /* RGA2 only */ \ ++ { AV_PIX_FMT_NV12, RK_FORMAT_YCbCr_420_SP }, \ ++ { AV_PIX_FMT_NV21, RK_FORMAT_YCrCb_420_SP }, \ ++ { AV_PIX_FMT_NV16, RK_FORMAT_YCbCr_422_SP }, \ ++ { AV_PIX_FMT_P010, RK_FORMAT_YCbCr_420_SP_10B }, /* RGA3 only */ \ ++ { AV_PIX_FMT_P210, RK_FORMAT_YCbCr_422_SP_10B }, /* RGA3 only */ \ ++ { AV_PIX_FMT_NV15, RK_FORMAT_YCbCr_420_SP_10B }, /* RGA2 only input, aka P010 compact */ \ ++ { AV_PIX_FMT_NV20, RK_FORMAT_YCbCr_422_SP_10B }, /* RGA2 only input, aka P210 compact */ \ ++ { AV_PIX_FMT_YUYV422, RK_FORMAT_YUYV_422 }, \ ++ { AV_PIX_FMT_YVYU422, RK_FORMAT_YVYU_422 }, \ ++ { AV_PIX_FMT_UYVY422, RK_FORMAT_UYVY_422 }, ++ ++#define RGB_FORMATS \ ++ { AV_PIX_FMT_RGB555LE, RK_FORMAT_BGRA_5551 }, /* RGA2 only */ \ ++ { AV_PIX_FMT_BGR555LE, RK_FORMAT_RGBA_5551 }, /* RGA2 only */ \ ++ { AV_PIX_FMT_RGB565LE, RK_FORMAT_BGR_565 }, \ ++ { AV_PIX_FMT_BGR565LE, RK_FORMAT_RGB_565 }, \ ++ { AV_PIX_FMT_RGB24, RK_FORMAT_RGB_888 }, \ ++ { AV_PIX_FMT_BGR24, RK_FORMAT_BGR_888 }, \ ++ { AV_PIX_FMT_RGBA, RK_FORMAT_RGBA_8888 }, \ ++ { AV_PIX_FMT_RGB0, RK_FORMAT_RGBA_8888 }, /* RK_FORMAT_RGBX_8888 triggers RGA2 on multicore RGA */ \ ++ { AV_PIX_FMT_BGRA, RK_FORMAT_BGRA_8888 }, \ ++ { AV_PIX_FMT_BGR0, RK_FORMAT_BGRA_8888 }, /* RK_FORMAT_BGRX_8888 triggers RGA2 on multicore RGA */ \ ++ { AV_PIX_FMT_ARGB, RK_FORMAT_ARGB_8888 }, /* RGA3 only input */ \ ++ { AV_PIX_FMT_0RGB, RK_FORMAT_ARGB_8888 }, /* RGA3 only input */ \ ++ { AV_PIX_FMT_ABGR, RK_FORMAT_ABGR_8888 }, /* RGA3 only input */ \ ++ { AV_PIX_FMT_0BGR, RK_FORMAT_ABGR_8888 }, /* RGA3 only input */ ++ ++static const RGAFormatMap supported_formats_main[] = { ++ YUV_FORMATS ++ RGB_FORMATS ++}; ++ ++static const RGAFormatMap supported_formats_overlay[] = { ++ RGB_FORMATS ++}; ++#undef YUV_FORMATS ++#undef RGB_FORMATS ++ ++static int map_av_to_rga_format(enum AVPixelFormat in_format, ++ enum _Rga_SURF_FORMAT *out_format, int is_overlay) ++{ ++ int i; ++ ++ if (is_overlay) ++ goto overlay; ++ ++ for (i = 0; i < FF_ARRAY_ELEMS(supported_formats_main); i++) { ++ if (supported_formats_main[i].pix_fmt == in_format) { ++ if (out_format) ++ *out_format = supported_formats_main[i].rga_fmt; ++ return 1; ++ } ++ } ++ return 0; ++ ++overlay: ++ for (i = 0; i < FF_ARRAY_ELEMS(supported_formats_overlay); i++) { ++ if (supported_formats_overlay[i].pix_fmt == in_format) { ++ if (out_format) ++ *out_format = supported_formats_overlay[i].rga_fmt; ++ return 1; ++ } ++ } ++ return 0; ++} ++ ++static int get_pixel_stride(const AVDRMObjectDescriptor *object, ++ const AVDRMLayerDescriptor *layer, ++ int is_rgb, int is_planar, ++ float bytes_pp, int *ws, int *hs) ++{ ++ const AVDRMPlaneDescriptor *plane0, *plane1; ++ const int is_packed_fmt = is_rgb || (!is_rgb && !is_planar); ++ ++ if (!object || !layer || !ws || !hs || bytes_pp <= 0) ++ return AVERROR(EINVAL); ++ ++ plane0 = &layer->planes[0]; ++ plane1 = &layer->planes[1]; ++ ++ *ws = is_packed_fmt ? ++ (plane0->pitch / bytes_pp) : ++ plane0->pitch; ++ *hs = is_packed_fmt ? ++ ALIGN_DOWN(object->size / plane0->pitch, is_rgb ? 1 : 2) : ++ (plane1->offset / plane0->pitch); ++ ++ return (*ws > 0 && *hs > 0) ? 0 : AVERROR(EINVAL); ++} ++ ++static int get_afbc_pixel_stride(float bytes_pp, int *stride, int reverse) ++{ ++ if (!stride || *stride <= 0 || bytes_pp <= 0) ++ return AVERROR(EINVAL); ++ ++ *stride = reverse ? (*stride / bytes_pp) : (*stride * bytes_pp); ++ ++ return (*stride > 0) ? 0 : AVERROR(EINVAL); ++} ++ ++/* Canonical formats: https://dri.freedesktop.org/docs/drm/gpu/afbc.html */ ++static uint32_t get_drm_afbc_format(enum AVPixelFormat pix_fmt) ++{ ++ switch (pix_fmt) { ++ case AV_PIX_FMT_NV12: return DRM_FORMAT_YUV420_8BIT; ++ case AV_PIX_FMT_NV15: return DRM_FORMAT_YUV420_10BIT; ++ case AV_PIX_FMT_NV16: return DRM_FORMAT_YUYV; ++ case AV_PIX_FMT_NV20: return DRM_FORMAT_Y210; ++ case AV_PIX_FMT_RGB565LE: return DRM_FORMAT_RGB565; ++ case AV_PIX_FMT_BGR565LE: return DRM_FORMAT_BGR565; ++ case AV_PIX_FMT_RGB24: return DRM_FORMAT_RGB888; ++ case AV_PIX_FMT_BGR24: return DRM_FORMAT_BGR888; ++ case AV_PIX_FMT_RGBA: return DRM_FORMAT_ABGR8888; ++ case AV_PIX_FMT_RGB0: return DRM_FORMAT_XBGR8888; ++ case AV_PIX_FMT_BGRA: return DRM_FORMAT_ARGB8888; ++ case AV_PIX_FMT_BGR0: return DRM_FORMAT_XRGB8888; ++ default: return DRM_FORMAT_INVALID; ++ } ++} ++ ++static int is_pixel_stride_rga3_compat(int ws, int hs, ++ enum _Rga_SURF_FORMAT fmt) ++{ ++ switch (fmt) { ++ case RK_FORMAT_YCbCr_420_SP: ++ case RK_FORMAT_YCrCb_420_SP: ++ case RK_FORMAT_YCbCr_422_SP: return !(ws % 16) && !(hs % 2); ++ case RK_FORMAT_YCbCr_420_SP_10B: ++ case RK_FORMAT_YCbCr_422_SP_10B: return !(ws % 64) && !(hs % 2); ++ case RK_FORMAT_YUYV_422: ++ case RK_FORMAT_YVYU_422: ++ case RK_FORMAT_UYVY_422: return !(ws % 8) && !(hs % 2); ++ case RK_FORMAT_RGB_565: ++ case RK_FORMAT_BGR_565: return !(ws % 8); ++ case RK_FORMAT_RGB_888: ++ case RK_FORMAT_BGR_888: return !(ws % 16); ++ case RK_FORMAT_RGBA_8888: ++ case RK_FORMAT_BGRA_8888: ++ case RK_FORMAT_ARGB_8888: ++ case RK_FORMAT_ABGR_8888: return !(ws % 4); ++ default: return 0; ++ } ++} ++ ++static void clear_unused_frames(RGAFrame *list) ++{ ++ while (list) { ++ if (list->queued == 1 && !list->locked) { ++ av_frame_free(&list->frame); ++ list->queued = 0; ++ } ++ list = list->next; ++ } ++} ++ ++static void clear_frame_list(RGAFrame **list) ++{ ++ while (*list) { ++ RGAFrame *frame = NULL; ++ ++ frame = *list; ++ *list = (*list)->next; ++ av_frame_free(&frame->frame); ++ av_freep(&frame); ++ } ++} ++ ++static RGAFrame *get_free_frame(RGAFrame **list) ++{ ++ RGAFrame *out = *list; ++ ++ for (; out; out = out->next) { ++ if (!out->queued) { ++ out->queued = 1; ++ break; ++ } ++ } ++ ++ if (!out) { ++ out = av_mallocz(sizeof(*out)); ++ if (!out) { ++ av_log(NULL, AV_LOG_ERROR, "Cannot alloc new output frame\n"); ++ return NULL; ++ } ++ out->queued = 1; ++ out->next = *list; ++ *list = out; ++ } ++ ++ return out; ++} ++ ++static void set_colorspace_info(RGAFrameInfo *in_info, const AVFrame *in, ++ RGAFrameInfo *out_info, AVFrame *out, ++ int *color_space_mode) ++{ ++ if (!in_info || !out_info || !in || !out || !color_space_mode) ++ return; ++ ++ *color_space_mode = 0; ++ ++ /* rgb2yuv */ ++ if ((in_info->pix_desc->flags & AV_PIX_FMT_FLAG_RGB) && ++ !(out_info->pix_desc->flags & AV_PIX_FMT_FLAG_RGB)) { ++ /* rgb full -> yuv full/limit */ ++ if (in->color_range == AVCOL_RANGE_JPEG) { ++ switch (in->colorspace) { ++ case AVCOL_SPC_BT709: ++ out->colorspace = AVCOL_SPC_BT709; ++ *color_space_mode = 0xb << 8; /* rgb2yuv_709_limit */ ++ break; ++ case AVCOL_SPC_BT470BG: ++ out->colorspace = AVCOL_SPC_BT470BG; ++ *color_space_mode = 2 << 2; /* IM_RGB_TO_YUV_BT601_LIMIT */ ++ break; ++ } ++ } ++ if (*color_space_mode) { ++ out->color_trc = AVCOL_TRC_UNSPECIFIED; ++ out->color_primaries = AVCOL_PRI_UNSPECIFIED; ++ out->color_range = AVCOL_RANGE_MPEG; ++ } ++ } ++ ++ /* yuv2rgb */ ++ if (!(in_info->pix_desc->flags & AV_PIX_FMT_FLAG_RGB) && ++ (out_info->pix_desc->flags & AV_PIX_FMT_FLAG_RGB)) { ++ /* yuv full/limit -> rgb full */ ++ switch (in->color_range) { ++ case AVCOL_RANGE_MPEG: ++ if (in->colorspace == AVCOL_SPC_BT709) { ++ out->colorspace = AVCOL_SPC_BT709; ++ *color_space_mode = 3 << 0; /* IM_YUV_TO_RGB_BT709_LIMIT */ ++ } ++ if (in->colorspace == AVCOL_SPC_BT470BG) { ++ out->colorspace = AVCOL_SPC_BT470BG; ++ *color_space_mode = 1 << 0; /* IM_YUV_TO_RGB_BT601_LIMIT */ ++ } ++ break; ++ case AVCOL_RANGE_JPEG: ++#if 0 ++ if (in->colorspace == AVCOL_SPC_BT709) { ++ out->colorspace = AVCOL_SPC_BT709; ++ *color_space_mode = 0xc << 8; /* yuv2rgb_709_full */ ++ } ++#endif ++ if (in->colorspace == AVCOL_SPC_BT470BG) { ++ out->colorspace = AVCOL_SPC_BT470BG; ++ *color_space_mode = 2 << 0; /* IM_YUV_TO_RGB_BT601_FULL */ ++ } ++ break; ++ } ++ if (*color_space_mode) { ++ out->color_trc = AVCOL_TRC_UNSPECIFIED; ++ out->color_primaries = AVCOL_PRI_UNSPECIFIED; ++ out->color_range = AVCOL_RANGE_JPEG; ++ } ++ } ++} ++ ++static int verify_rga_frame_info_io_dynamic(AVFilterContext *avctx, ++ RGAFrameInfo *in, RGAFrameInfo *out) ++{ ++ RKRGAContext *r = avctx->priv; ++ ++ if (!in || !out) ++ return AVERROR(EINVAL); ++ ++ if (r->is_rga2_used && !r->has_rga2) { ++ av_log(avctx, AV_LOG_ERROR, "RGA2 is requested but not available\n"); ++ return AVERROR(ENOSYS); ++ } ++ if (r->is_rga2_used && ++ (in->pix_fmt == AV_PIX_FMT_P010 || ++ out->pix_fmt == AV_PIX_FMT_P010)) { ++ av_log(avctx, AV_LOG_ERROR, "'%s' is not supported if RGA2 is requested\n", ++ av_get_pix_fmt_name(AV_PIX_FMT_P010)); ++ return AVERROR(ENOSYS); ++ } ++ if (r->is_rga2_used && ++ (in->pix_fmt == AV_PIX_FMT_P210 || ++ out->pix_fmt == AV_PIX_FMT_P210)) { ++ av_log(avctx, AV_LOG_ERROR, "'%s' is not supported if RGA2 is requested\n", ++ av_get_pix_fmt_name(AV_PIX_FMT_P210)); ++ return AVERROR(ENOSYS); ++ } ++ if (r->is_rga2_used && ++ (out->pix_fmt == AV_PIX_FMT_NV15 || ++ out->pix_fmt == AV_PIX_FMT_NV20)) { ++ av_log(avctx, AV_LOG_ERROR, "'%s' as output is not supported if RGA2 is requested\n", ++ av_get_pix_fmt_name(out->pix_fmt)); ++ return AVERROR(ENOSYS); ++ } ++ if (r->is_rga2_used && in->crop && in->pix_desc->comp[0].depth >= 10) { ++ av_log(avctx, AV_LOG_ERROR, "Cropping 10-bit '%s' input is not supported if RGA2 is requested\n", ++ av_get_pix_fmt_name(in->pix_fmt)); ++ return AVERROR(ENOSYS); ++ } ++ if (r->is_rga2_used && ++ (out->act_w > 4096 || out->act_h > 4096)) { ++ av_log(avctx, AV_LOG_ERROR, "Max supported output size of RGA2 is 4096x4096\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ return 0; ++} ++ ++static RGAFrame *submit_frame(RKRGAContext *r, AVFilterLink *inlink, ++ AVFrame *picref, int do_overlay, int pat_preproc) ++{ ++ RGAFrame *rga_frame; ++ AVFilterContext *ctx = inlink->dst; ++ rga_info_t info = { .mmuFlag = 1, }; ++ int nb_link = FF_INLINK_IDX(inlink); ++ RGAFrameInfo *in_info = &r->in_rga_frame_infos[nb_link]; ++ RGAFrameInfo *out_info = &r->out_rga_frame_info; ++ int w_stride = 0, h_stride = 0; ++ const AVDRMFrameDescriptor *desc; ++ const AVDRMLayerDescriptor *layer; ++ const AVDRMPlaneDescriptor *plane0; ++ RGAFrame **frame_list = NULL; ++ int ret, is_afbc = 0; ++ ++ if (pat_preproc && !nb_link) ++ return NULL; ++ ++ frame_list = nb_link ? ++ (pat_preproc ? &r->pat_preproc_frame_list : &r->pat_frame_list) : &r->src_frame_list; ++ ++ clear_unused_frames(*frame_list); ++ ++ rga_frame = get_free_frame(frame_list); ++ if (!rga_frame) ++ return NULL; ++ ++ if (picref->format != AV_PIX_FMT_DRM_PRIME) { ++ av_log(ctx, AV_LOG_ERROR, "RGA gets a wrong frame\n"); ++ return NULL; ++ } ++ rga_frame->frame = av_frame_clone(picref); ++ ++ desc = (AVDRMFrameDescriptor *)rga_frame->frame->data[0]; ++ if (desc->objects[0].fd < 0) ++ return NULL; ++ ++ is_afbc = drm_is_afbc(desc->objects[0].format_modifier); ++ if (!is_afbc) { ++ ret = get_pixel_stride(&desc->objects[0], ++ &desc->layers[0], ++ (in_info->pix_desc->flags & AV_PIX_FMT_FLAG_RGB), ++ (in_info->pix_desc->flags & AV_PIX_FMT_FLAG_PLANAR), ++ in_info->bytes_pp, &w_stride, &h_stride); ++ if (ret < 0 || !w_stride || !h_stride) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to get frame strides\n"); ++ return NULL; ++ } ++ } ++ ++ info.fd = desc->objects[0].fd; ++ info.format = in_info->rga_fmt; ++ info.in_fence_fd = -1; ++ info.out_fence_fd = -1; ++ ++ if (in_info->uncompact_10b_msb) ++ info.is_10b_compact = info.is_10b_endian = 1; ++ ++ if (!nb_link) { ++ info.rotation = in_info->rotate_mode; ++ info.blend = (do_overlay && !pat_preproc) ? in_info->blend_mode : 0; ++ } ++ ++ if (is_afbc && (r->is_rga2_used || out_info->scheduler_core == 0x4)) { ++ av_log(ctx, AV_LOG_ERROR, "Input format '%s' with AFBC modifier is not supported by RGA2\n", ++ av_get_pix_fmt_name(in_info->pix_fmt)); ++ return NULL; ++ } ++ ++ /* verify inputs pixel stride */ ++ if (out_info->scheduler_core > 0 && ++ out_info->scheduler_core == (out_info->scheduler_core & 0x3)) { ++ if (!is_afbc && !is_pixel_stride_rga3_compat(w_stride, h_stride, in_info->rga_fmt)) { ++ r->is_rga2_used = 1; ++ av_log(ctx, AV_LOG_WARNING, "Input pixel stride (%dx%d) format '%s' is not supported by RGA3\n", ++ w_stride, h_stride, av_get_pix_fmt_name(in_info->pix_fmt)); ++ } ++ ++ if ((ret = verify_rga_frame_info_io_dynamic(ctx, in_info, out_info)) < 0) ++ return NULL; ++ ++ if (r->is_rga2_used) ++ out_info->scheduler_core = 0x4; ++ } ++ ++ if (pat_preproc) { ++ RGAFrameInfo *in0_info = &r->in_rga_frame_infos[0]; ++ rga_set_rect(&info.rect, 0, 0, ++ FFMIN((in0_info->act_w - in_info->overlay_x), in_info->act_w), ++ FFMIN((in0_info->act_h - in_info->overlay_y), in_info->act_h), ++ w_stride, h_stride, in_info->rga_fmt); ++ } else ++ rga_set_rect(&info.rect, in_info->act_x, in_info->act_y, ++ in_info->act_w, in_info->act_h, ++ w_stride, h_stride, in_info->rga_fmt); ++ ++ if (is_afbc) { ++ int afbc_offset_y = 0; ++ uint32_t drm_afbc_fmt = get_drm_afbc_format(in_info->pix_fmt); ++ ++ if (rga_frame->frame->crop_top > 0) { ++ afbc_offset_y = rga_frame->frame->crop_top; ++ info.rect.yoffset += afbc_offset_y; ++ } ++ ++ layer = &desc->layers[0]; ++ plane0 = &layer->planes[0]; ++ if (drm_afbc_fmt == layer->format) { ++ info.rect.wstride = plane0->pitch; ++ if ((ret = get_afbc_pixel_stride(in_info->bytes_pp, &info.rect.wstride, 1)) < 0) ++ return NULL; ++ ++ if (info.rect.wstride % RK_RGA_AFBC_STRIDE_ALIGN) ++ info.rect.wstride = FFALIGN(inlink->w, RK_RGA_AFBC_STRIDE_ALIGN); ++ ++ info.rect.hstride = FFALIGN(inlink->h + afbc_offset_y, RK_RGA_AFBC_STRIDE_ALIGN); ++ } else { ++ av_log(ctx, AV_LOG_ERROR, "Input format '%s' with AFBC modifier is not supported\n", ++ av_get_pix_fmt_name(in_info->pix_fmt)); ++ return NULL; ++ } ++ ++ info.rd_mode = 1 << 1; /* IM_FBC_MODE */ ++ } ++ ++ rga_frame->info = info; ++ ++ return rga_frame; ++} ++ ++static RGAFrame *query_frame(RKRGAContext *r, AVFilterLink *outlink, ++ const AVFrame *in, int pat_preproc) ++{ ++ AVFilterContext *ctx = outlink->src; ++ AVFilterLink *inlink = ctx->inputs[0]; ++ RGAFrame *out_frame; ++ rga_info_t info = { .mmuFlag = 1, }; ++ RGAFrameInfo *in0_info = &r->in_rga_frame_infos[0]; ++ RGAFrameInfo *in1_info = ctx->nb_inputs > 1 ? &r->in_rga_frame_infos[1] : NULL; ++ RGAFrameInfo *out_info = pat_preproc ? in1_info : &r->out_rga_frame_info; ++ AVBufferRef *hw_frame_ctx = pat_preproc ? r->pat_preproc_hwframes_ctx : outlink->hw_frames_ctx; ++ int w_stride = 0, h_stride = 0; ++ AVDRMFrameDescriptor *desc; ++ AVDRMLayerDescriptor *layer; ++ RGAFrame **frame_list = NULL; ++ int ret, is_afbc = 0; ++ ++ if (!out_info || !hw_frame_ctx) ++ return NULL; ++ ++ frame_list = pat_preproc ? &r->pat_frame_list : &r->dst_frame_list; ++ ++ clear_unused_frames(*frame_list); ++ ++ out_frame = get_free_frame(frame_list); ++ if (!out_frame) ++ return NULL; ++ ++ out_frame->frame = av_frame_alloc(); ++ if (!out_frame->frame) ++ return NULL; ++ ++ if (in && (ret = av_frame_copy_props(out_frame->frame, in)) < 0) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to copy metadata fields from in to out: %d\n", ret); ++ goto fail; ++ } ++ out_frame->frame->crop_top = 0; ++ ++ if ((ret = av_hwframe_get_buffer(hw_frame_ctx, out_frame->frame, 0)) < 0) { ++ av_log(ctx, AV_LOG_ERROR, "Cannot allocate an internal frame: %d\n", ret); ++ goto fail; ++ } ++ ++ desc = (AVDRMFrameDescriptor *)out_frame->frame->data[0]; ++ if (desc->objects[0].fd < 0) ++ goto fail; ++ ++ if (r->is_rga2_used || out_info->scheduler_core == 0x4) { ++ if (pat_preproc && (info.rect.width > 4096 || info.rect.height > 4096)) { ++ av_log(ctx, AV_LOG_ERROR, "Max supported output size of RGA2 is 4096x4096\n"); ++ goto fail; ++ } ++ if (r->afbc_out && !pat_preproc) { ++ av_log(ctx, AV_LOG_WARNING, "Output format '%s' with AFBC modifier is not supported by RGA2\n", ++ av_get_pix_fmt_name(out_info->pix_fmt)); ++ r->afbc_out = 0; ++ } ++ } ++ ++ is_afbc = r->afbc_out && !pat_preproc; ++ ret = get_pixel_stride(&desc->objects[0], ++ &desc->layers[0], ++ (out_info->pix_desc->flags & AV_PIX_FMT_FLAG_RGB), ++ (out_info->pix_desc->flags & AV_PIX_FMT_FLAG_PLANAR), ++ out_info->bytes_pp, &w_stride, &h_stride); ++ if (!is_afbc && (ret < 0 || !w_stride || !h_stride)) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to get frame strides\n"); ++ goto fail; ++ } ++ ++ info.fd = desc->objects[0].fd; ++ info.format = out_info->rga_fmt; ++ info.core = out_info->scheduler_core; ++ info.in_fence_fd = -1; ++ info.out_fence_fd = -1; ++ info.sync_mode = RGA_BLIT_ASYNC; ++ ++ if (out_info->uncompact_10b_msb) ++ info.is_10b_compact = info.is_10b_endian = 1; ++ ++ if (!pat_preproc) ++ set_colorspace_info(in0_info, in, out_info, out_frame->frame, &info.color_space_mode); ++ ++ if (pat_preproc) ++ rga_set_rect(&info.rect, in1_info->overlay_x, in1_info->overlay_y, ++ FFMIN((in0_info->act_w - in1_info->overlay_x), in1_info->act_w), ++ FFMIN((in0_info->act_h - in1_info->overlay_y), in1_info->act_h), ++ w_stride, h_stride, in1_info->rga_fmt); ++ else ++ rga_set_rect(&info.rect, out_info->act_x, out_info->act_y, ++ out_info->act_w, out_info->act_h, ++ w_stride, h_stride, out_info->rga_fmt); ++ ++ if (is_afbc) { ++ uint32_t drm_afbc_fmt = get_drm_afbc_format(out_info->pix_fmt); ++ ++ if (drm_afbc_fmt == DRM_FORMAT_INVALID) { ++ av_log(ctx, AV_LOG_WARNING, "Output format '%s' with AFBC modifier is not supported\n", ++ av_get_pix_fmt_name(out_info->pix_fmt)); ++ r->afbc_out = 0; ++ goto exit; ++ } ++ ++ w_stride = FFALIGN(pat_preproc ? inlink->w : outlink->w, RK_RGA_AFBC_STRIDE_ALIGN); ++ h_stride = FFALIGN(pat_preproc ? inlink->h : outlink->h, RK_RGA_AFBC_STRIDE_ALIGN); ++ ++ if ((info.rect.format == RK_FORMAT_YCbCr_420_SP_10B || ++ info.rect.format == RK_FORMAT_YCbCr_422_SP_10B) && (w_stride % 64)) { ++ av_log(ctx, AV_LOG_WARNING, "Output pixel wstride '%d' format '%s' is not supported by RGA3 AFBC\n", ++ w_stride, av_get_pix_fmt_name(out_info->pix_fmt)); ++ r->afbc_out = 0; ++ goto exit; ++ } ++ ++ /* Inverted RGB/BGR order in FBCE */ ++ switch (info.rect.format) { ++ case RK_FORMAT_RGBA_8888: ++ info.rect.format = RK_FORMAT_BGRA_8888; ++ break; ++ case RK_FORMAT_BGRA_8888: ++ info.rect.format = RK_FORMAT_RGBA_8888; ++ break; ++ } ++ ++ info.rect.wstride = w_stride; ++ info.rect.hstride = h_stride; ++ info.rd_mode = 1 << 1; /* IM_FBC_MODE */ ++ ++ desc->objects[0].format_modifier = ++ DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_SPARSE | AFBC_FORMAT_MOD_BLOCK_SIZE_16x16); ++ ++ layer = &desc->layers[0]; ++ layer->format = drm_afbc_fmt; ++ layer->nb_planes = 1; ++ ++ layer->planes[0].offset = 0; ++ layer->planes[0].pitch = info.rect.wstride; ++ ++ if ((ret = get_afbc_pixel_stride(out_info->bytes_pp, (int *)&layer->planes[0].pitch, 0)) < 0) ++ goto fail; ++ } ++ ++exit: ++ out_frame->info = info; ++ ++ return out_frame; ++ ++fail: ++ if (out_frame && out_frame->frame) ++ av_frame_free(&out_frame->frame); ++ ++ return NULL; ++} ++ ++static av_cold int init_hwframes_ctx(AVFilterContext *avctx) ++{ ++ RKRGAContext *r = avctx->priv; ++ AVFilterLink *inlink = avctx->inputs[0]; ++ AVFilterLink *outlink = avctx->outputs[0]; ++ AVHWFramesContext *hwfc_in; ++ AVHWFramesContext *hwfc_out; ++ AVBufferRef *hwfc_out_ref; ++ int ret; ++ ++ if (!inlink->hw_frames_ctx) ++ return AVERROR(EINVAL); ++ ++ hwfc_in = (AVHWFramesContext *)inlink->hw_frames_ctx->data; ++ hwfc_out_ref = av_hwframe_ctx_alloc(hwfc_in->device_ref); ++ if (!hwfc_out_ref) ++ return AVERROR(ENOMEM); ++ ++ hwfc_out = (AVHWFramesContext *)hwfc_out_ref->data; ++ hwfc_out->format = AV_PIX_FMT_DRM_PRIME; ++ hwfc_out->sw_format = r->out_sw_format; ++ hwfc_out->width = outlink->w; ++ hwfc_out->height = outlink->h; ++ ++ ret = av_hwframe_ctx_init(hwfc_out_ref); ++ if (ret < 0) { ++ av_buffer_unref(&hwfc_out_ref); ++ av_log(avctx, AV_LOG_ERROR, "Error creating frames_ctx for output pad: %d\n", ret); ++ return ret; ++ } ++ ++ av_buffer_unref(&outlink->hw_frames_ctx); ++ outlink->hw_frames_ctx = hwfc_out_ref; ++ ++ return 0; ++} ++ ++static av_cold int init_pat_preproc_hwframes_ctx(AVFilterContext *avctx) ++{ ++ RKRGAContext *r = avctx->priv; ++ AVFilterLink *inlink0 = avctx->inputs[0]; ++ AVFilterLink *inlink1 = avctx->inputs[1]; ++ AVHWFramesContext *hwfc_in0, *hwfc_in1; ++ AVHWFramesContext *hwfc_pat; ++ AVBufferRef *hwfc_pat_ref; ++ int ret; ++ ++ if (!inlink0->hw_frames_ctx || !inlink1->hw_frames_ctx) ++ return AVERROR(EINVAL); ++ ++ hwfc_in0 = (AVHWFramesContext *)inlink0->hw_frames_ctx->data; ++ hwfc_in1 = (AVHWFramesContext *)inlink1->hw_frames_ctx->data; ++ hwfc_pat_ref = av_hwframe_ctx_alloc(hwfc_in0->device_ref); ++ if (!hwfc_pat_ref) ++ return AVERROR(ENOMEM); ++ ++ hwfc_pat = (AVHWFramesContext *)hwfc_pat_ref->data; ++ hwfc_pat->format = AV_PIX_FMT_DRM_PRIME; ++ hwfc_pat->sw_format = hwfc_in1->sw_format; ++ hwfc_pat->width = inlink0->w; ++ hwfc_pat->height = inlink0->h; ++ ++ ret = av_hwframe_ctx_init(hwfc_pat_ref); ++ if (ret < 0) { ++ av_log(avctx, AV_LOG_ERROR, "Error creating frames_ctx for pat preproc: %d\n", ret); ++ av_buffer_unref(&hwfc_pat_ref); ++ return ret; ++ } ++ ++ av_buffer_unref(&r->pat_preproc_hwframes_ctx); ++ r->pat_preproc_hwframes_ctx = hwfc_pat_ref; ++ ++ return 0; ++} ++ ++static av_cold int verify_rga_frame_info(AVFilterContext *avctx, ++ RGAFrameInfo *src, RGAFrameInfo *dst, RGAFrameInfo *pat) ++{ ++ RKRGAContext *r = avctx->priv; ++ float scale_ratio_min, scale_ratio_max; ++ float scale_ratio_w, scale_ratio_h; ++ int ret; ++ ++ if (!src || !dst) ++ return AVERROR(EINVAL); ++ ++ scale_ratio_w = (float)dst->act_w / (float)src->act_w; ++ scale_ratio_h = (float)dst->act_h / (float)src->act_h; ++ ++ /* P010 requires RGA3 */ ++ if (!r->has_rga3 && ++ (src->pix_fmt == AV_PIX_FMT_P010 || ++ dst->pix_fmt == AV_PIX_FMT_P010)) { ++ av_log(avctx, AV_LOG_ERROR, "'%s' is only supported by RGA3\n", ++ av_get_pix_fmt_name(AV_PIX_FMT_P010)); ++ return AVERROR(ENOSYS); ++ } ++ /* P210 requires RGA3 */ ++ if (!r->has_rga3 && ++ (src->pix_fmt == AV_PIX_FMT_P210 || ++ dst->pix_fmt == AV_PIX_FMT_P210)) { ++ av_log(avctx, AV_LOG_ERROR, "'%s' is only supported by RGA3\n", ++ av_get_pix_fmt_name(AV_PIX_FMT_P210)); ++ return AVERROR(ENOSYS); ++ } ++ /* Input formats that requires RGA2 */ ++ if (!r->has_rga2 && ++ (src->pix_fmt == AV_PIX_FMT_GRAY8 || ++ src->pix_fmt == AV_PIX_FMT_YUV420P || ++ src->pix_fmt == AV_PIX_FMT_YUV422P || ++ src->pix_fmt == AV_PIX_FMT_RGB555LE || ++ src->pix_fmt == AV_PIX_FMT_BGR555LE)) { ++ av_log(avctx, AV_LOG_ERROR, "'%s' as input is only supported by RGA2\n", ++ av_get_pix_fmt_name(src->pix_fmt)); ++ return AVERROR(ENOSYS); ++ } ++ /* Output formats that requires RGA2 */ ++ if (!r->has_rga2 && ++ (dst->pix_fmt == AV_PIX_FMT_GRAY8 || ++ dst->pix_fmt == AV_PIX_FMT_YUV420P || ++ dst->pix_fmt == AV_PIX_FMT_YUV422P || ++ dst->pix_fmt == AV_PIX_FMT_RGB555LE || ++ dst->pix_fmt == AV_PIX_FMT_BGR555LE || ++ dst->pix_fmt == AV_PIX_FMT_ARGB || ++ dst->pix_fmt == AV_PIX_FMT_0RGB || ++ dst->pix_fmt == AV_PIX_FMT_ABGR || ++ dst->pix_fmt == AV_PIX_FMT_0BGR)) { ++ av_log(avctx, AV_LOG_ERROR, "'%s' as output is only supported by RGA2\n", ++ av_get_pix_fmt_name(dst->pix_fmt)); ++ return AVERROR(ENOSYS); ++ } ++ /* P010/P210 requires RGA3 but it can't handle certain formats */ ++ if ((src->pix_fmt == AV_PIX_FMT_P010 || ++ src->pix_fmt == AV_PIX_FMT_P210) && ++ (dst->pix_fmt == AV_PIX_FMT_GRAY8 || ++ dst->pix_fmt == AV_PIX_FMT_YUV420P || ++ dst->pix_fmt == AV_PIX_FMT_YUV422P || ++ dst->pix_fmt == AV_PIX_FMT_RGB555LE || ++ dst->pix_fmt == AV_PIX_FMT_BGR555LE || ++ dst->pix_fmt == AV_PIX_FMT_ARGB || ++ dst->pix_fmt == AV_PIX_FMT_0RGB || ++ dst->pix_fmt == AV_PIX_FMT_ABGR || ++ dst->pix_fmt == AV_PIX_FMT_0BGR)) { ++ av_log(avctx, AV_LOG_ERROR, "'%s' to '%s' is not supported\n", ++ av_get_pix_fmt_name(src->pix_fmt), ++ av_get_pix_fmt_name(dst->pix_fmt)); ++ return AVERROR(ENOSYS); ++ } ++ /* RGA3 only format to RGA2 only format is not supported */ ++ if ((dst->pix_fmt == AV_PIX_FMT_P010 || ++ dst->pix_fmt == AV_PIX_FMT_P210) && ++ (src->pix_fmt == AV_PIX_FMT_GRAY8 || ++ src->pix_fmt == AV_PIX_FMT_YUV420P || ++ src->pix_fmt == AV_PIX_FMT_YUV422P || ++ src->pix_fmt == AV_PIX_FMT_RGB555LE || ++ src->pix_fmt == AV_PIX_FMT_BGR555LE)) { ++ av_log(avctx, AV_LOG_ERROR, "'%s' to '%s' is not supported\n", ++ av_get_pix_fmt_name(src->pix_fmt), ++ av_get_pix_fmt_name(dst->pix_fmt)); ++ return AVERROR(ENOSYS); ++ } ++ ++ if (src->pix_fmt == AV_PIX_FMT_GRAY8 || ++ src->pix_fmt == AV_PIX_FMT_YUV420P || ++ src->pix_fmt == AV_PIX_FMT_YUV422P || ++ src->pix_fmt == AV_PIX_FMT_RGB555LE || ++ src->pix_fmt == AV_PIX_FMT_BGR555LE || ++ dst->pix_fmt == AV_PIX_FMT_GRAY8 || ++ dst->pix_fmt == AV_PIX_FMT_YUV420P || ++ dst->pix_fmt == AV_PIX_FMT_YUV422P || ++ dst->pix_fmt == AV_PIX_FMT_RGB555LE || ++ dst->pix_fmt == AV_PIX_FMT_BGR555LE || ++ dst->pix_fmt == AV_PIX_FMT_ARGB || ++ dst->pix_fmt == AV_PIX_FMT_0RGB || ++ dst->pix_fmt == AV_PIX_FMT_ABGR || ++ dst->pix_fmt == AV_PIX_FMT_0BGR) { ++ r->is_rga2_used = 1; ++ } ++ ++ r->is_rga2_used = r->is_rga2_used || !r->has_rga3; ++ if (r->has_rga3) { ++ if (scale_ratio_w < 0.125f || ++ scale_ratio_w > 8.0f || ++ scale_ratio_h < 0.125f || ++ scale_ratio_h > 8.0f) { ++ r->is_rga2_used = 1; ++ } ++ if (src->act_w < 68 || ++ src->act_w > 8176 || ++ src->act_h > 8176 || ++ dst->act_w < 68) { ++ r->is_rga2_used = 1; ++ } ++ if (pat && (pat->act_w < 68 || ++ pat->act_w > 8176 || ++ pat->act_h > 8176)) { ++ r->is_rga2_used = 1; ++ } ++ } ++ ++ if ((ret = verify_rga_frame_info_io_dynamic(avctx, src, dst)) < 0) ++ return ret; ++ ++ if (r->is_rga2_used) ++ r->scheduler_core = 0x4; ++ ++ /* Prioritize RGA3 on multicore RGA hw to avoid dma32 & algorithm quirks as much as possible */ ++ if (r->has_rga3 && r->has_rga2e && !r->is_rga2_used && ++ (r->scheduler_core == 0 || avctx->nb_inputs > 1 || ++ scale_ratio_w != 1.0f || scale_ratio_h != 1.0f || ++ src->crop || src->uncompact_10b_msb || dst->uncompact_10b_msb)) { ++ r->scheduler_core = 0x3; ++ } ++ ++ scale_ratio_max = 16.0f; ++ if ((r->is_rga2_used && r->has_rga2l) || ++ (!r->is_rga2_used && r->has_rga3 && !r->has_rga2) || ++ (r->scheduler_core > 0 && r->scheduler_core == (r->scheduler_core & 0x3))) { ++ scale_ratio_max = 8.0f; ++ } ++ scale_ratio_min = 1.0f / scale_ratio_max; ++ ++ if (scale_ratio_w < scale_ratio_min || scale_ratio_w > scale_ratio_max || ++ scale_ratio_h < scale_ratio_min || scale_ratio_h > scale_ratio_max) { ++ av_log(avctx, AV_LOG_ERROR, "RGA scale ratio (%.04fx%.04f) exceeds %.04f ~ %.04f.\n", ++ scale_ratio_w, scale_ratio_h, scale_ratio_min, scale_ratio_max); ++ return AVERROR(EINVAL); ++ } ++ ++ return 0; ++} ++ ++static av_cold int fill_rga_frame_info_by_link(AVFilterContext *avctx, ++ RGAFrameInfo *info, ++ AVFilterLink *link, ++ int nb_link, int is_inlink) ++{ ++ AVHWFramesContext *hwfc; ++ RKRGAContext *r = avctx->priv; ++ ++ if (!link->hw_frames_ctx || link->format != AV_PIX_FMT_DRM_PRIME) ++ return AVERROR(EINVAL); ++ ++ hwfc = (AVHWFramesContext *)link->hw_frames_ctx->data; ++ ++ if (!map_av_to_rga_format(hwfc->sw_format, &info->rga_fmt, (is_inlink && nb_link > 0))) { ++ av_log(avctx, AV_LOG_ERROR, "Unsupported '%s' pad %d format: '%s'\n", ++ (is_inlink ? "input" : "output"), nb_link, ++ av_get_pix_fmt_name(hwfc->sw_format)); ++ return AVERROR(ENOSYS); ++ } ++ ++ info->pix_fmt = hwfc->sw_format; ++ info->pix_desc = av_pix_fmt_desc_get(info->pix_fmt); ++ info->bytes_pp = av_get_padded_bits_per_pixel(info->pix_desc) / 8.0f; ++ ++ info->act_x = 0; ++ info->act_y = 0; ++ info->act_w = link->w; ++ info->act_h = link->h; ++ ++ /* The w/h of RGA YUV image needs to be 2 aligned */ ++ if (!(info->pix_desc->flags & AV_PIX_FMT_FLAG_RGB)) { ++ info->act_w = ALIGN_DOWN(info->act_w, RK_RGA_YUV_ALIGN); ++ info->act_h = ALIGN_DOWN(info->act_h, RK_RGA_YUV_ALIGN); ++ } ++ ++ info->uncompact_10b_msb = info->pix_fmt == AV_PIX_FMT_P010 || ++ info->pix_fmt == AV_PIX_FMT_P210; ++ ++ if (link->w * link->h > (3840 * 2160 * 3)) ++ r->async_depth = FFMIN(r->async_depth, 1); ++ ++ return 0; ++} ++ ++av_cold int ff_rkrga_init(AVFilterContext *avctx, RKRGAParam *param) ++{ ++ RKRGAContext *r = avctx->priv; ++ int i, ret; ++ const char *rga_ver = querystring(RGA_VERSION); ++ ++ r->got_frame = 0; ++ ++ r->has_rga2 = !!strstr(rga_ver, "RGA_2"); ++ r->has_rga2l = !!strstr(rga_ver, "RGA_2_lite"); ++ r->has_rga2e = !!strstr(rga_ver, "RGA_2_Enhance"); ++ r->has_rga3 = !!strstr(rga_ver, "RGA_3"); ++ ++ if (!(r->has_rga2 || r->has_rga3)) { ++ av_log(avctx, AV_LOG_ERROR, "No RGA2/RGA3 hw available\n"); ++ return AVERROR(ENOSYS); ++ } ++ ++ /* RGA core */ ++ if (r->scheduler_core && !(r->has_rga2 && r->has_rga3)) { ++ av_log(avctx, AV_LOG_WARNING, "Scheduler core cannot be set on non-multicore RGA hw, ignoring\n"); ++ r->scheduler_core = 0; ++ } ++ if (r->scheduler_core && r->scheduler_core != (r->scheduler_core & 0x7)) { ++ av_log(avctx, AV_LOG_WARNING, "Invalid scheduler core set, ignoring\n"); ++ r->scheduler_core = 0; ++ } ++ if (r->scheduler_core && r->scheduler_core == (r->scheduler_core & 0x3)) ++ r->has_rga2 = r->has_rga2l = r->has_rga2e = 0; ++ if (r->scheduler_core == 0x4) ++ r->has_rga3 = 0; ++ ++ r->filter_frame = param->filter_frame; ++ if (!r->filter_frame) ++ r->filter_frame = ff_filter_frame; ++ r->out_sw_format = param->out_sw_format; ++ ++ /* OUT hwfc */ ++ ret = init_hwframes_ctx(avctx); ++ if (ret < 0) ++ goto fail; ++ ++ /* IN RGAFrameInfo */ ++ r->in_rga_frame_infos = av_calloc(avctx->nb_inputs, sizeof(*r->in_rga_frame_infos)); ++ if (!r->in_rga_frame_infos) { ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ for (i = 0; i < avctx->nb_inputs; i++) { ++ ret = fill_rga_frame_info_by_link(avctx, &r->in_rga_frame_infos[i], avctx->inputs[i], i, 1); ++ if (ret < 0) ++ goto fail; ++ } ++ if (avctx->nb_inputs == 1) { ++ r->in_rga_frame_infos[0].rotate_mode = param->in_rotate_mode; ++ ++ if (param->in_crop) { ++ /* The x/y/w/h of RGA YUV image needs to be 2 aligned */ ++ if (!(r->in_rga_frame_infos[0].pix_desc->flags & AV_PIX_FMT_FLAG_RGB)) { ++ param->in_crop_x = ALIGN_DOWN(param->in_crop_x, RK_RGA_YUV_ALIGN); ++ param->in_crop_y = ALIGN_DOWN(param->in_crop_y, RK_RGA_YUV_ALIGN); ++ param->in_crop_w = ALIGN_DOWN(param->in_crop_w, RK_RGA_YUV_ALIGN); ++ param->in_crop_h = ALIGN_DOWN(param->in_crop_h, RK_RGA_YUV_ALIGN); ++ } ++ r->in_rga_frame_infos[0].crop = 1; ++ r->in_rga_frame_infos[0].act_x = param->in_crop_x; ++ r->in_rga_frame_infos[0].act_y = param->in_crop_y; ++ r->in_rga_frame_infos[0].act_w = param->in_crop_w; ++ r->in_rga_frame_infos[0].act_h = param->in_crop_h; ++ } ++ } ++ if (avctx->nb_inputs > 1) { ++ const int premultiplied_alpha = r->in_rga_frame_infos[1].pix_desc->flags & AV_PIX_FMT_FLAG_ALPHA; ++ ++ /* IM_ALPHA_BLEND_DST_OVER */ ++ if (param->in_global_alpha > 0 && param->in_global_alpha < 0xff) { ++ r->in_rga_frame_infos[0].blend_mode = premultiplied_alpha ? (0x4 | (1 << 12)) : 0x4; ++ r->in_rga_frame_infos[0].blend_mode |= (param->in_global_alpha & 0xff) << 16; /* fg_global_alpha */ ++ r->in_rga_frame_infos[0].blend_mode |= 0xff << 24; /* bg_global_alpha */ ++ } else ++ r->in_rga_frame_infos[0].blend_mode = premultiplied_alpha ? 0x504 : 0x501; ++ ++ r->in_rga_frame_infos[1].overlay_x = FFMAX(param->overlay_x, 0); ++ r->in_rga_frame_infos[1].overlay_y = FFMAX(param->overlay_y, 0); ++ ++ r->is_overlay_offset_valid = (param->overlay_x < r->in_rga_frame_infos[0].act_w - 2) && ++ (param->overlay_y < r->in_rga_frame_infos[0].act_h - 2); ++ if (r->is_overlay_offset_valid) ++ init_pat_preproc_hwframes_ctx(avctx); ++ } ++ ++ /* OUT RGAFrameInfo */ ++ ret = fill_rga_frame_info_by_link(avctx, &r->out_rga_frame_info, avctx->outputs[0], 0, 0); ++ if (ret < 0) ++ goto fail; ++ ++ /* Pre-check RGAFrameInfo */ ++ ret = verify_rga_frame_info(avctx, &r->in_rga_frame_infos[0], ++ &r->out_rga_frame_info, ++ (avctx->nb_inputs > 1 ? &r->in_rga_frame_infos[1] : NULL)); ++ if (ret < 0) ++ goto fail; ++ ++ r->out_rga_frame_info.scheduler_core = r->scheduler_core; ++ ++ /* keep fifo size at least 1. Even when async_depth is 0, fifo is used. */ ++ r->async_fifo = av_fifo_alloc2(r->async_depth + 1, sizeof(RGAAsyncFrame), 0); ++ if (!r->async_fifo) { ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ return 0; ++ ++fail: ++ ff_rkrga_close(avctx); ++ return ret; ++} ++ ++static void set_rga_async_frame_lock_status(RGAAsyncFrame *frame, int lock) ++{ ++ int status = !!lock; ++ ++ if (!frame) ++ return; ++ ++ if (frame->src) ++ frame->src->locked = status; ++ if (frame->dst) ++ frame->dst->locked = status; ++ if (frame->pat) ++ frame->pat->locked = status; ++} ++ ++static void rga_drain_fifo(RKRGAContext *r) ++{ ++ RGAAsyncFrame aframe; ++ ++ while (r->async_fifo && av_fifo_read(r->async_fifo, &aframe, 1) >= 0) { ++ if (imsync(aframe.dst->info.out_fence_fd) != IM_STATUS_SUCCESS) ++ av_log(NULL, AV_LOG_WARNING, "RGA sync failed\n"); ++ ++ set_rga_async_frame_lock_status(&aframe, 0); ++ } ++} ++ ++av_cold int ff_rkrga_close(AVFilterContext *avctx) ++{ ++ RKRGAContext *r = avctx->priv; ++ ++ /* Drain the fifo during filter reset */ ++ rga_drain_fifo(r); ++ ++ clear_frame_list(&r->src_frame_list); ++ clear_frame_list(&r->dst_frame_list); ++ clear_frame_list(&r->pat_frame_list); ++ ++ clear_frame_list(&r->pat_preproc_frame_list); ++ ++ av_fifo_freep2(&r->async_fifo); ++ ++ av_buffer_unref(&r->pat_preproc_hwframes_ctx); ++ ++ return 0; ++} ++ ++static int call_rkrga_blit(AVFilterContext *avctx, ++ rga_info_t *src_info, ++ rga_info_t *dst_info, ++ rga_info_t *pat_info) ++{ ++ int ret; ++ ++ if (!src_info || !dst_info) ++ return AVERROR(EINVAL); ++ ++#define PRINT_RGA_INFO(ctx, info, name) do { \ ++ if (info && name) \ ++ av_log(ctx, AV_LOG_DEBUG, "RGA %s | fd:%d mmu:%d rd_mode:%d | x:%d y:%d w:%d h:%d ws:%d hs:%d fmt:0x%x\n", \ ++ name, info->fd, info->mmuFlag, (info->rd_mode >> 1), info->rect.xoffset, info->rect.yoffset, \ ++ info->rect.width, info->rect.height, info->rect.wstride, info->rect.hstride, (info->rect.format >> 8)); \ ++} while (0) ++ ++ PRINT_RGA_INFO(avctx, src_info, "src"); ++ PRINT_RGA_INFO(avctx, dst_info, "dst"); ++ PRINT_RGA_INFO(avctx, pat_info, "pat"); ++#undef PRINT_RGA_INFO ++ ++ if ((ret = c_RkRgaBlit(src_info, dst_info, pat_info)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "RGA blit failed: %d\n", ret); ++ return AVERROR_EXTERNAL; ++ } ++ if (dst_info->sync_mode == RGA_BLIT_ASYNC && ++ dst_info->out_fence_fd <= 0) { ++ av_log(avctx, AV_LOG_ERROR, "RGA async blit returned invalid fence_fd: %d\n", ++ dst_info->out_fence_fd); ++ return AVERROR_EXTERNAL; ++ } ++ ++ return 0; ++} ++ ++int ff_rkrga_filter_frame(RKRGAContext *r, ++ AVFilterLink *inlink_src, AVFrame *picref_src, ++ AVFilterLink *inlink_pat, AVFrame *picref_pat) ++{ ++ AVFilterContext *ctx = inlink_src->dst; ++ AVFilterLink *outlink = ctx->outputs[0]; ++ RGAAsyncFrame aframe; ++ RGAFrame *src_frame = NULL; ++ RGAFrame *dst_frame = NULL; ++ RGAFrame *pat_frame = NULL; ++ int ret, filter_ret; ++ int do_overlay = ctx->nb_inputs > 1 && ++ r->is_overlay_offset_valid && ++ inlink_pat && picref_pat; ++ ++ /* Sync & Drain */ ++ while (r->eof && av_fifo_read(r->async_fifo, &aframe, 1) >= 0) { ++ if (imsync(aframe.dst->info.out_fence_fd) != IM_STATUS_SUCCESS) ++ av_log(ctx, AV_LOG_WARNING, "RGA sync failed\n"); ++ ++ set_rga_async_frame_lock_status(&aframe, 0); ++ ++ filter_ret = r->filter_frame(outlink, aframe.dst->frame); ++ if (filter_ret < 0) { ++ av_frame_free(&aframe.dst->frame); ++ return filter_ret; ++ } ++ aframe.dst->queued--; ++ r->got_frame = 1; ++ aframe.dst->frame = NULL; ++ } ++ ++ if (!picref_src) ++ return 0; ++ ++ /* SRC */ ++ if (!(src_frame = submit_frame(r, inlink_src, picref_src, do_overlay, 0))) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to submit frame on input: %d\n", ++ FF_INLINK_IDX(inlink_src)); ++ return AVERROR(ENOMEM); ++ } ++ ++ /* DST */ ++ if (!(dst_frame = query_frame(r, outlink, src_frame->frame, 0))) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to query an output frame\n"); ++ return AVERROR(ENOMEM); ++ } ++ ++ /* PAT */ ++ if (do_overlay) { ++ RGAFrameInfo *in0_info = &r->in_rga_frame_infos[0]; ++ RGAFrameInfo *in1_info = &r->in_rga_frame_infos[1]; ++ RGAFrameInfo *out_info = &r->out_rga_frame_info; ++ RGAFrame *pat_in = NULL; ++ RGAFrame *pat_out = NULL; ++ ++ /* translate PAT from top-left to (x,y) on a new image with the same size of SRC */ ++ if (in1_info->act_w != in0_info->act_w || ++ in1_info->act_h != in0_info->act_h || ++ in1_info->overlay_x > 0 || ++ in1_info->overlay_y > 0) { ++ if (!(pat_in = submit_frame(r, inlink_pat, picref_pat, 0, 1))) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to submit frame on input: %d\n", ++ FF_INLINK_IDX(inlink_pat)); ++ return AVERROR(ENOMEM); ++ } ++ if (!(pat_out = query_frame(r, outlink, picref_pat, 1))) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to query an output frame\n"); ++ return AVERROR(ENOMEM); ++ } ++ dst_frame->info.core = out_info->scheduler_core; ++ ++ pat_out->info.priority = 1; ++ pat_out->info.core = dst_frame->info.core; ++ pat_out->info.sync_mode = RGA_BLIT_SYNC; ++ ++ /* Sync Blit Pre-Proc */ ++ ret = call_rkrga_blit(ctx, &pat_in->info, &pat_out->info, NULL); ++ if (ret < 0) ++ return ret; ++ ++ pat_out->info.rect.xoffset = 0; ++ pat_out->info.rect.yoffset = 0; ++ pat_out->info.rect.width = in0_info->act_w; ++ pat_out->info.rect.height = in0_info->act_h; ++ ++ pat_frame = pat_out; ++ } ++ ++ if (!pat_frame && !(pat_frame = submit_frame(r, inlink_pat, picref_pat, 0, 0))) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to submit frame on input: %d\n", ++ FF_INLINK_IDX(inlink_pat)); ++ return AVERROR(ENOMEM); ++ } ++ dst_frame->info.core = out_info->scheduler_core; ++ } ++ ++ /* Async Blit */ ++ ret = call_rkrga_blit(ctx, ++ &src_frame->info, ++ &dst_frame->info, ++ pat_frame ? &pat_frame->info : NULL); ++ if (ret < 0) ++ return ret; ++ ++ dst_frame->queued++; ++ aframe = (RGAAsyncFrame){ src_frame, dst_frame, pat_frame }; ++ set_rga_async_frame_lock_status(&aframe, 1); ++ av_fifo_write(r->async_fifo, &aframe, 1); ++ ++ /* Sync & Retrieve */ ++ if (av_fifo_can_read(r->async_fifo) > r->async_depth) { ++ av_fifo_read(r->async_fifo, &aframe, 1); ++ if (imsync(aframe.dst->info.out_fence_fd) != IM_STATUS_SUCCESS) { ++ av_log(ctx, AV_LOG_ERROR, "RGA sync failed\n"); ++ return AVERROR_EXTERNAL; ++ } ++ set_rga_async_frame_lock_status(&aframe, 0); ++ ++ filter_ret = r->filter_frame(outlink, aframe.dst->frame); ++ if (filter_ret < 0) { ++ av_frame_free(&aframe.dst->frame); ++ return filter_ret; ++ } ++ aframe.dst->queued--; ++ r->got_frame = 1; ++ aframe.dst->frame = NULL; ++ } ++ ++ return 0; ++} +Index: FFmpeg/libavfilter/rkrga_common.h +=================================================================== +--- /dev/null ++++ libavfilter/rkrga_common.h +@@ -0,0 +1,127 @@ ++/* ++ * Copyright (c) 2023 NyanMisaka ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/** ++ * @file ++ * Rockchip RGA (2D Raster Graphic Acceleration) base function ++ */ ++ ++#ifndef AVFILTER_RKRGA_COMMON_H ++#define AVFILTER_RKRGA_COMMON_H ++ ++#include ++#include ++ ++#include "avfilter.h" ++#include "libavutil/fifo.h" ++#include "libavutil/hwcontext.h" ++#include "libavutil/hwcontext_rkmpp.h" ++ ++#define ALIGN_DOWN(a, b) ((a) & ~((b)-1)) ++#define RK_RGA_YUV_ALIGN 2 ++#define RK_RGA_AFBC_STRIDE_ALIGN 16 ++ ++#define FF_INLINK_IDX(link) ((int)((link)->dstpad - (link)->dst->input_pads)) ++#define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src->output_pads)) ++ ++typedef struct RGAFrame { ++ AVFrame *frame; ++ rga_info_t info; ++ struct RGAFrame *next; ++ int queued; ++ int locked; ++} RGAFrame; ++ ++typedef struct RGAFrameInfo { ++ enum _Rga_SURF_FORMAT rga_fmt; ++ enum AVPixelFormat pix_fmt; ++ const AVPixFmtDescriptor *pix_desc; ++ float bytes_pp; ++ int act_x; ++ int act_y; ++ int act_w; ++ int act_h; ++ int uncompact_10b_msb; ++ int rotate_mode; ++ int blend_mode; ++ int crop; ++ int scheduler_core; ++ int overlay_x; ++ int overlay_y; ++} RGAFrameInfo; ++ ++typedef struct RKRGAContext { ++ const AVClass *class; ++ ++ int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame); ++ enum AVPixelFormat out_sw_format; ++ ++ RGAFrame *src_frame_list; ++ RGAFrame *dst_frame_list; ++ RGAFrame *pat_frame_list; ++ ++ AVBufferRef *pat_preproc_hwframes_ctx; ++ RGAFrame *pat_preproc_frame_list; ++ ++ RGAFrameInfo *in_rga_frame_infos; ++ RGAFrameInfo out_rga_frame_info; ++ ++ int scheduler_core; ++ int async_depth; ++ int afbc_out; ++ ++ int has_rga2; ++ int has_rga2l; ++ int has_rga2e; ++ int has_rga3; ++ int is_rga2_used; ++ int is_overlay_offset_valid; ++ ++ int eof; ++ int got_frame; ++ ++ AVFifo *async_fifo; ++} RKRGAContext; ++ ++typedef struct RKRGAParam { ++ int (*filter_frame)(AVFilterLink *outlink, AVFrame *frame); ++ ++ enum AVPixelFormat out_sw_format; ++ ++ int in_rotate_mode; ++ int in_global_alpha; ++ ++ int in_crop; ++ int in_crop_x; ++ int in_crop_y; ++ int in_crop_w; ++ int in_crop_h; ++ ++ int overlay_x; ++ int overlay_y; ++} RKRGAParam; ++ ++int ff_rkrga_init(AVFilterContext *avctx, RKRGAParam *param); ++int ff_rkrga_close(AVFilterContext *avctx); ++int ff_rkrga_filter_frame(RKRGAContext *r, ++ AVFilterLink *inlink_src, AVFrame *picref_src, ++ AVFilterLink *inlink_pat, AVFrame *picref_pat); ++ ++#endif /* AVFILTER_RKRGA_COMMON_H */ +Index: FFmpeg/libavfilter/vf_overlay_rkrga.c +=================================================================== +--- /dev/null ++++ libavfilter/vf_overlay_rkrga.c +@@ -0,0 +1,368 @@ ++/* ++ * Copyright (c) 2023 NyanMisaka ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/** ++ * @file ++ * Rockchip RGA (2D Raster Graphic Acceleration) video compositor ++ */ ++ ++#include "libavutil/common.h" ++#include "libavutil/eval.h" ++#include "libavutil/internal.h" ++#include "libavutil/opt.h" ++#include "libavutil/pixdesc.h" ++ ++#include "filters.h" ++#include "framesync.h" ++ ++#include "rkrga_common.h" ++ ++enum var_name { ++ VAR_MAIN_W, VAR_MW, ++ VAR_MAIN_H, VAR_MH, ++ VAR_OVERLAY_W, VAR_OW, ++ VAR_OVERLAY_H, VAR_OH, ++ VAR_OVERLAY_X, VAR_OX, ++ VAR_OVERLAY_Y, VAR_OY, ++ VAR_VARS_NB ++}; ++ ++typedef struct RGAOverlayContext { ++ RKRGAContext rga; ++ ++ FFFrameSync fs; ++ ++ double var_values[VAR_VARS_NB]; ++ char *overlay_ox, *overlay_oy; ++ int global_alpha; ++ enum AVPixelFormat format; ++} RGAOverlayContext; ++ ++static const char *const var_names[] = { ++ "main_w", "W", /* input width of the main layer */ ++ "main_h", "H", /* input height of the main layer */ ++ "overlay_w", "w", /* input width of the overlay layer */ ++ "overlay_h", "h", /* input height of the overlay layer */ ++ "overlay_x", "x", /* x position of the overlay layer inside of main */ ++ "overlay_y", "y", /* y position of the overlay layer inside of main */ ++ NULL ++}; ++ ++static int eval_expr(AVFilterContext *ctx) ++{ ++ RGAOverlayContext *r = ctx->priv; ++ double *var_values = r->var_values; ++ int ret = 0; ++ AVExpr *ox_expr = NULL, *oy_expr = NULL; ++ AVExpr *ow_expr = NULL, *oh_expr = NULL; ++ ++#define PASS_EXPR(e, s) {\ ++ ret = av_expr_parse(&e, s, var_names, NULL, NULL, NULL, NULL, 0, ctx); \ ++ if (ret < 0) {\ ++ av_log(ctx, AV_LOG_ERROR, "Error when passing '%s'.\n", s);\ ++ goto release;\ ++ }\ ++} ++ PASS_EXPR(ox_expr, r->overlay_ox); ++ PASS_EXPR(oy_expr, r->overlay_oy); ++ PASS_EXPR(ow_expr, "overlay_w"); ++ PASS_EXPR(oh_expr, "overlay_h"); ++#undef PASS_EXPR ++ ++ var_values[VAR_OVERLAY_W] = ++ var_values[VAR_OW] = av_expr_eval(ow_expr, var_values, NULL); ++ var_values[VAR_OVERLAY_H] = ++ var_values[VAR_OH] = av_expr_eval(oh_expr, var_values, NULL); ++ ++ /* calc again in case ow is relative to oh */ ++ var_values[VAR_OVERLAY_W] = ++ var_values[VAR_OW] = av_expr_eval(ow_expr, var_values, NULL); ++ ++ var_values[VAR_OVERLAY_X] = ++ var_values[VAR_OX] = av_expr_eval(ox_expr, var_values, NULL); ++ var_values[VAR_OVERLAY_Y] = ++ var_values[VAR_OY] = av_expr_eval(oy_expr, var_values, NULL); ++ ++ /* calc again in case ox is relative to oy */ ++ var_values[VAR_OVERLAY_X] = ++ var_values[VAR_OX] = av_expr_eval(ox_expr, var_values, NULL); ++ ++release: ++ av_expr_free(ox_expr); ++ av_expr_free(oy_expr); ++ av_expr_free(ow_expr); ++ av_expr_free(oh_expr); ++ ++ return ret; ++} ++ ++static av_cold int set_size_info(AVFilterContext *ctx, ++ AVFilterLink *inlink_main, ++ AVFilterLink *inlink_overlay, ++ AVFilterLink *outlink) ++{ ++ RGAOverlayContext *r = ctx->priv; ++ int ret; ++ ++ if (inlink_main->w < 2 || inlink_main->w > 8192 || ++ inlink_main->h < 2 || inlink_main->h > 8192 || ++ inlink_overlay->w < 2 || inlink_overlay->w > 8192 || ++ inlink_overlay->h < 2 || inlink_overlay->h > 8192) { ++ av_log(ctx, AV_LOG_ERROR, "Supported input size is range from 2x2 ~ 8192x8192\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ r->var_values[VAR_MAIN_W] = ++ r->var_values[VAR_MW] = inlink_main->w; ++ r->var_values[VAR_MAIN_H] = ++ r->var_values[VAR_MH] = inlink_main->h; ++ ++ r->var_values[VAR_OVERLAY_W] = inlink_overlay->w; ++ r->var_values[VAR_OVERLAY_H] = inlink_overlay->h; ++ ++ if ((ret = eval_expr(ctx)) < 0) ++ return ret; ++ ++ outlink->w = r->var_values[VAR_MW]; ++ outlink->h = r->var_values[VAR_MH]; ++ if (outlink->w < 2 || outlink->w > 8128 || ++ outlink->h < 2 || outlink->h > 8128) { ++ av_log(ctx, AV_LOG_ERROR, "Supported output size is range from 2x2 ~ 8128x8128\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ if (inlink_main->sample_aspect_ratio.num) ++ outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink_main->w, ++ outlink->w * inlink_main->h}, ++ inlink_main->sample_aspect_ratio); ++ else ++ outlink->sample_aspect_ratio = inlink_main->sample_aspect_ratio; ++ ++ return 0; ++} ++ ++static av_cold int rgaoverlay_config_props(AVFilterLink *outlink) ++{ ++ AVFilterContext *ctx = outlink->src; ++ RGAOverlayContext *r = ctx->priv; ++ AVFilterLink *inlink_main = ctx->inputs[0]; ++ AVFilterLink *inlink_overlay = ctx->inputs[1]; ++ AVHWFramesContext *frames_ctx_main; ++ AVHWFramesContext *frames_ctx_overlay; ++ enum AVPixelFormat in_format_main; ++ enum AVPixelFormat in_format_overlay; ++ enum AVPixelFormat out_format; ++ int ret; ++ ++ RKRGAParam param = { NULL }; ++ ++ if (!inlink_main->hw_frames_ctx) { ++ av_log(ctx, AV_LOG_ERROR, "No hw context provided on main input\n"); ++ return AVERROR(EINVAL); ++ } ++ frames_ctx_main = (AVHWFramesContext *)inlink_main->hw_frames_ctx->data; ++ in_format_main = frames_ctx_main->sw_format; ++ out_format = (r->format == AV_PIX_FMT_NONE) ? in_format_main : r->format; ++ ++ if (!inlink_overlay->hw_frames_ctx) { ++ av_log(ctx, AV_LOG_ERROR, "No hw context provided on overlay input\n"); ++ return AVERROR(EINVAL); ++ } ++ frames_ctx_overlay = (AVHWFramesContext *)inlink_overlay->hw_frames_ctx->data; ++ in_format_overlay = frames_ctx_overlay->sw_format; ++ ++ ret = set_size_info(ctx, inlink_main, inlink_overlay, outlink); ++ if (ret < 0) ++ return ret; ++ ++ param.filter_frame = NULL; ++ param.out_sw_format = out_format; ++ param.in_global_alpha = r->global_alpha; ++ param.overlay_x = r->var_values[VAR_OX]; ++ param.overlay_y = r->var_values[VAR_OY]; ++ ++ ret = ff_rkrga_init(ctx, ¶m); ++ if (ret < 0) ++ return ret; ++ ++ av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d fmt:%s + w:%d h:%d fmt:%s (x:%d y:%d) -> w:%d h:%d fmt:%s\n", ++ inlink_main->w, inlink_main->h, av_get_pix_fmt_name(in_format_main), ++ inlink_overlay->w, inlink_overlay->h, av_get_pix_fmt_name(in_format_overlay), ++ param.overlay_x, param.overlay_y, outlink->w, outlink->h, av_get_pix_fmt_name(out_format)); ++ ++ ret = ff_framesync_init_dualinput(&r->fs, ctx); ++ if (ret < 0) ++ return ret; ++ ++ r->fs.time_base = outlink->time_base = inlink_main->time_base; ++ ++ ret = ff_framesync_configure(&r->fs); ++ if (ret < 0) ++ return ret; ++ ++ return 0; ++} ++ ++static int rgaoverlay_on_event(FFFrameSync *fs) ++{ ++ AVFilterContext *ctx = fs->parent; ++ AVFilterLink *inlink_main = ctx->inputs[0]; ++ AVFilterLink *inlink_overlay = ctx->inputs[1]; ++ AVFrame *in_main = NULL, *in_overlay = NULL; ++ int ret; ++ ++ RGAOverlayContext *r = ctx->priv; ++ ++ ret = ff_framesync_get_frame(fs, 0, &in_main, 0); ++ if (ret < 0) ++ return ret; ++ ret = ff_framesync_get_frame(fs, 1, &in_overlay, 0); ++ if (ret < 0) ++ return ret; ++ ++ if (!in_main) ++ return AVERROR_BUG; ++ ++ return ff_rkrga_filter_frame(&r->rga, ++ inlink_main, in_main, ++ inlink_overlay, in_overlay); ++} ++ ++static av_cold int rgaoverlay_init(AVFilterContext *ctx) ++{ ++ RGAOverlayContext *r = ctx->priv; ++ ++ r->fs.on_event = &rgaoverlay_on_event; ++ ++ return 0; ++} ++ ++static av_cold void rgaoverlay_uninit(AVFilterContext *ctx) ++{ ++ RGAOverlayContext *r = ctx->priv; ++ ++ ff_framesync_uninit(&r->fs); ++ ++ ff_rkrga_close(ctx); ++} ++ ++static int rgaoverlay_activate(AVFilterContext *ctx) ++{ ++ RGAOverlayContext *r = ctx->priv; ++ AVFilterLink *inlink_main = ctx->inputs[0]; ++ AVFilterLink *inlink_overlay = ctx->inputs[1]; ++ AVFilterLink *outlink = ctx->outputs[0]; ++ int i, ret; ++ int64_t pts = AV_NOPTS_VALUE; ++ ++ ret = ff_framesync_activate(&r->fs); ++ if (ret < 0) ++ return ret; ++ ++ if (r->fs.eof) { ++ r->rga.eof = 1; ++ pts = r->fs.pts; ++ goto eof; ++ } ++ ++ if (r->rga.got_frame) ++ r->rga.got_frame = 0; ++ else { ++ for (i = 0; i < ctx->nb_inputs; i++) { ++ if (!ff_inlink_check_available_frame(ctx->inputs[i])) { ++ FF_FILTER_FORWARD_WANTED(outlink, ctx->inputs[i]); ++ } ++ } ++ return FFERROR_NOT_READY; ++ } ++ ++ return 0; ++ ++eof: ++ ff_rkrga_filter_frame(&r->rga, ++ inlink_main, NULL, ++ inlink_overlay, NULL); ++ ++ pts = av_rescale_q(pts, inlink_main->time_base, outlink->time_base); ++ ff_outlink_set_status(outlink, AVERROR_EOF, pts); ++ return 0; ++} ++ ++#define OFFSET(x) offsetof(RGAOverlayContext, x) ++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) ++ ++static const AVOption rgaoverlay_options[] = { ++ { "x", "Overlay x position", OFFSET(overlay_ox), AV_OPT_TYPE_STRING, { .str = "0" }, 0, 0, .flags = FLAGS }, ++ { "y", "Overlay y position", OFFSET(overlay_oy), AV_OPT_TYPE_STRING, { .str = "0" }, 0, 0, .flags = FLAGS }, ++ { "alpha", "Overlay global alpha", OFFSET(global_alpha), AV_OPT_TYPE_INT, { .i64 = 255 }, 0, 255, .flags = FLAGS }, ++ { "format", "Output video pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, { .i64 = AV_PIX_FMT_NONE }, INT_MIN, INT_MAX, .flags = FLAGS }, ++ { "eof_action", "Action to take when encountering EOF from secondary input ", ++ OFFSET(fs.opt_eof_action), AV_OPT_TYPE_INT, { .i64 = EOF_ACTION_REPEAT }, ++ EOF_ACTION_REPEAT, EOF_ACTION_PASS, .flags = FLAGS, .unit = "eof_action" }, ++ { "repeat", "Repeat the previous frame.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_REPEAT }, .flags = FLAGS, .unit = "eof_action" }, ++ { "endall", "End both streams.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_ENDALL }, .flags = FLAGS, .unit = "eof_action" }, ++ { "pass", "Pass through the main input.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_PASS }, .flags = FLAGS, .unit = "eof_action" }, ++ { "shortest", "Force termination when the shortest input terminates", OFFSET(fs.opt_shortest), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, ++ { "repeatlast", "Repeat overlay of the last overlay frame", OFFSET(fs.opt_repeatlast), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, ++ { "core", "Set multicore RGA scheduler core [use with caution]", OFFSET(rga.scheduler_core), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, INT_MAX, FLAGS, .unit = "core" }, ++ { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, .unit = "core" }, ++ { "rga3_core0", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, .unit = "core" }, /* RGA3_SCHEDULER_CORE0 */ ++ { "rga3_core1", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0, FLAGS, .unit = "core" }, /* RGA3_SCHEDULER_CORE1 */ ++ { "rga2_core0", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 4 }, 0, 0, FLAGS, .unit = "core" }, /* RGA2_SCHEDULER_CORE0 */ ++ { "async_depth", "Set the internal parallelization depth", OFFSET(rga.async_depth), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, 4, .flags = FLAGS }, ++ { "afbc", "Enable AFBC (Arm Frame Buffer Compression) to save bandwidth", OFFSET(rga.afbc_out), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, .flags = FLAGS }, ++ { NULL }, ++}; ++ ++FRAMESYNC_DEFINE_CLASS(rgaoverlay, RGAOverlayContext, fs); ++ ++static const AVFilterPad rgaoverlay_inputs[] = { ++ { ++ .name = "main", ++ .type = AVMEDIA_TYPE_VIDEO, ++ }, ++ { ++ .name = "overlay", ++ .type = AVMEDIA_TYPE_VIDEO, ++ }, ++}; ++ ++static const AVFilterPad rgaoverlay_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = rgaoverlay_config_props, ++ }, ++}; ++ ++const AVFilter ff_vf_overlay_rkrga = { ++ .name = "overlay_rkrga", ++ .description = NULL_IF_CONFIG_SMALL("Rockchip RGA (2D Raster Graphic Acceleration) video compositor"), ++ .priv_size = sizeof(RGAOverlayContext), ++ .priv_class = &rgaoverlay_class, ++ .init = rgaoverlay_init, ++ .uninit = rgaoverlay_uninit, ++ .activate = rgaoverlay_activate, ++ FILTER_INPUTS(rgaoverlay_inputs), ++ FILTER_OUTPUTS(rgaoverlay_outputs), ++ FILTER_SINGLE_PIXFMT(AV_PIX_FMT_DRM_PRIME), ++ .preinit = rgaoverlay_framesync_preinit, ++ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, ++}; +Index: FFmpeg/libavfilter/vf_vpp_rkrga.c +=================================================================== +--- /dev/null ++++ libavfilter/vf_vpp_rkrga.c +@@ -0,0 +1,578 @@ ++/* ++ * Copyright (c) 2023 NyanMisaka ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/** ++ * @file ++ * Rockchip RGA (2D Raster Graphic Acceleration) video post-process (scale/crop/transpose) ++ */ ++ ++#include "config_components.h" ++ ++#include "libavutil/common.h" ++#include "libavutil/eval.h" ++#include "libavutil/internal.h" ++#include "libavutil/opt.h" ++#include "libavutil/pixdesc.h" ++ ++#include "filters.h" ++#include "scale_eval.h" ++#include "transpose.h" ++ ++#include "rkrga_common.h" ++ ++typedef struct RGAVppContext { ++ RKRGAContext rga; ++ ++ enum AVPixelFormat format; ++ int transpose; ++ int force_original_aspect_ratio; ++ int force_divisible_by; ++ int force_yuv; ++ int force_chroma; ++ int scheduler_core; ++ ++ int in_rotate_mode; ++ ++ char *ow, *oh; ++ char *cx, *cy, *cw, *ch; ++ int crop; ++ ++ int act_x, act_y; ++ int act_w, act_h; ++} RGAVppContext; ++ ++enum { ++ FORCE_YUV_DISABLE, ++ FORCE_YUV_AUTO, ++ FORCE_YUV_8BIT, ++ FORCE_YUV_10BIT, ++ FORCE_YUV_NB ++}; ++ ++enum { ++ FORCE_CHROMA_AUTO, ++ FORCE_CHROMA_420SP, ++ FORCE_CHROMA_420P, ++ FORCE_CHROMA_422SP, ++ FORCE_CHROMA_422P, ++ FORCE_CHROMA_NB ++}; ++ ++static const char *const var_names[] = { ++ "iw", "in_w", ++ "ih", "in_h", ++ "ow", "out_w", "w", ++ "oh", "out_h", "h", ++ "cw", ++ "ch", ++ "cx", ++ "cy", ++ "a", "dar", ++ "sar", ++ NULL ++}; ++ ++enum var_name { ++ VAR_IW, VAR_IN_W, ++ VAR_IH, VAR_IN_H, ++ VAR_OW, VAR_OUT_W, VAR_W, ++ VAR_OH, VAR_OUT_H, VAR_H, ++ VAR_CW, ++ VAR_CH, ++ VAR_CX, ++ VAR_CY, ++ VAR_A, VAR_DAR, ++ VAR_SAR, ++ VAR_VARS_NB ++}; ++ ++static av_cold int eval_expr(AVFilterContext *ctx, ++ int *ret_w, int *ret_h, ++ int *ret_cx, int *ret_cy, ++ int *ret_cw, int *ret_ch) ++{ ++#define PASS_EXPR(e, s) {\ ++ if (s) {\ ++ ret = av_expr_parse(&e, s, var_names, NULL, NULL, NULL, NULL, 0, ctx); \ ++ if (ret < 0) { \ ++ av_log(ctx, AV_LOG_ERROR, "Error when passing '%s'.\n", s); \ ++ goto release; \ ++ } \ ++ }\ ++} ++#define CALC_EXPR(e, v, i, d) {\ ++ if (e)\ ++ i = v = av_expr_eval(e, var_values, NULL); \ ++ else\ ++ i = v = d;\ ++} ++ RGAVppContext *r = ctx->priv; ++ double var_values[VAR_VARS_NB] = { NAN }; ++ AVExpr *w_expr = NULL, *h_expr = NULL; ++ AVExpr *cw_expr = NULL, *ch_expr = NULL; ++ AVExpr *cx_expr = NULL, *cy_expr = NULL; ++ int ret = 0; ++ ++ PASS_EXPR(cw_expr, r->cw); ++ PASS_EXPR(ch_expr, r->ch); ++ ++ PASS_EXPR(w_expr, r->ow); ++ PASS_EXPR(h_expr, r->oh); ++ ++ PASS_EXPR(cx_expr, r->cx); ++ PASS_EXPR(cy_expr, r->cy); ++ ++ var_values[VAR_IW] = ++ var_values[VAR_IN_W] = ctx->inputs[0]->w; ++ ++ var_values[VAR_IH] = ++ var_values[VAR_IN_H] = ctx->inputs[0]->h; ++ ++ var_values[VAR_A] = (double)var_values[VAR_IN_W] / var_values[VAR_IN_H]; ++ var_values[VAR_SAR] = ctx->inputs[0]->sample_aspect_ratio.num ? ++ (double)ctx->inputs[0]->sample_aspect_ratio.num / ctx->inputs[0]->sample_aspect_ratio.den : 1; ++ var_values[VAR_DAR] = var_values[VAR_A] * var_values[VAR_SAR]; ++ ++ /* crop params */ ++ CALC_EXPR(cw_expr, var_values[VAR_CW], *ret_cw, var_values[VAR_IW]); ++ CALC_EXPR(ch_expr, var_values[VAR_CH], *ret_ch, var_values[VAR_IH]); ++ ++ /* calc again in case cw is relative to ch */ ++ CALC_EXPR(cw_expr, var_values[VAR_CW], *ret_cw, var_values[VAR_IW]); ++ ++ CALC_EXPR(w_expr, ++ var_values[VAR_OUT_W] = var_values[VAR_OW] = var_values[VAR_W], ++ *ret_w, var_values[VAR_CW]); ++ CALC_EXPR(h_expr, ++ var_values[VAR_OUT_H] = var_values[VAR_OH] = var_values[VAR_H], ++ *ret_h, var_values[VAR_CH]); ++ ++ /* calc again in case ow is relative to oh */ ++ CALC_EXPR(w_expr, ++ var_values[VAR_OUT_W] = var_values[VAR_OW] = var_values[VAR_W], ++ *ret_w, var_values[VAR_CW]); ++ ++ CALC_EXPR(cx_expr, var_values[VAR_CX], *ret_cx, (var_values[VAR_IW] - var_values[VAR_OW]) / 2); ++ CALC_EXPR(cy_expr, var_values[VAR_CY], *ret_cy, (var_values[VAR_IH] - var_values[VAR_OH]) / 2); ++ ++ /* calc again in case cx is relative to cy */ ++ CALC_EXPR(cx_expr, var_values[VAR_CX], *ret_cx, (var_values[VAR_IW] - var_values[VAR_OW]) / 2); ++ ++ r->crop = (*ret_cw != var_values[VAR_IW]) || (*ret_ch != var_values[VAR_IH]); ++ ++release: ++ av_expr_free(w_expr); ++ av_expr_free(h_expr); ++ av_expr_free(cw_expr); ++ av_expr_free(ch_expr); ++ av_expr_free(cx_expr); ++ av_expr_free(cy_expr); ++#undef PASS_EXPR ++#undef CALC_EXPR ++ ++ return ret; ++} ++ ++static av_cold int set_size_info(AVFilterContext *ctx, ++ AVFilterLink *inlink, ++ AVFilterLink *outlink) ++{ ++ RGAVppContext *r = ctx->priv; ++ int w, h, ret; ++ ++ if (inlink->w < 2 || inlink->w > 8192 || ++ inlink->h < 2 || inlink->h > 8192) { ++ av_log(ctx, AV_LOG_ERROR, "Supported input size is range from 2x2 ~ 8192x8192\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ if ((ret = eval_expr(ctx, &w, &h, &r->act_x, &r->act_y, &r->act_w, &r->act_h)) < 0) ++ return ret; ++ ++ r->act_x = FFMAX(FFMIN(r->act_x, inlink->w), 0); ++ r->act_y = FFMAX(FFMIN(r->act_y, inlink->h), 0); ++ r->act_w = FFMAX(FFMIN(r->act_w, inlink->w), 0); ++ r->act_h = FFMAX(FFMIN(r->act_h, inlink->h), 0); ++ ++ r->act_x = FFMIN(r->act_x, inlink->w - r->act_w); ++ r->act_y = FFMIN(r->act_y, inlink->h - r->act_h); ++ r->act_w = FFMIN(r->act_w, inlink->w - r->act_x); ++ r->act_h = FFMIN(r->act_h, inlink->h - r->act_y); ++ ++ ff_scale_adjust_dimensions(inlink, &w, &h, ++ r->force_original_aspect_ratio, r->force_divisible_by); ++ ++ if (((int64_t)h * inlink->w) > INT_MAX || ++ ((int64_t)w * inlink->h) > INT_MAX) { ++ av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ outlink->w = w; ++ outlink->h = h; ++ if (outlink->w < 2 || outlink->w > 8128 || ++ outlink->h < 2 || outlink->h > 8128) { ++ av_log(ctx, AV_LOG_ERROR, "Supported output size is range from 2x2 ~ 8128x8128\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ if (inlink->sample_aspect_ratio.num) ++ outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, ++ outlink->w * inlink->h}, ++ inlink->sample_aspect_ratio); ++ else ++ outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; ++ ++ if (r->transpose >= 0) { ++ switch (r->transpose) { ++ case TRANSPOSE_CCLOCK_FLIP: ++ r->in_rotate_mode = 0x07 | (0x01 << 4); /* HAL_TRANSFORM_ROT_270 | (HAL_TRANSFORM_FLIP_H << 4) */ ++ FFSWAP(int, outlink->w, outlink->h); ++ FFSWAP(int, outlink->sample_aspect_ratio.num, outlink->sample_aspect_ratio.den); ++ break; ++ case TRANSPOSE_CLOCK: ++ r->in_rotate_mode = 0x04; /* HAL_TRANSFORM_ROT_90 */ ++ FFSWAP(int, outlink->w, outlink->h); ++ FFSWAP(int, outlink->sample_aspect_ratio.num, outlink->sample_aspect_ratio.den); ++ break; ++ case TRANSPOSE_CCLOCK: ++ r->in_rotate_mode = 0x07; /* HAL_TRANSFORM_ROT_270 */ ++ FFSWAP(int, outlink->w, outlink->h); ++ FFSWAP(int, outlink->sample_aspect_ratio.num, outlink->sample_aspect_ratio.den); ++ break; ++ case TRANSPOSE_CLOCK_FLIP: ++ r->in_rotate_mode = 0x04 | (0x01 << 4); /* HAL_TRANSFORM_ROT_90 | (HAL_TRANSFORM_FLIP_H << 4) */ ++ FFSWAP(int, outlink->w, outlink->h); ++ FFSWAP(int, outlink->sample_aspect_ratio.num, outlink->sample_aspect_ratio.den); ++ break; ++ case TRANSPOSE_REVERSAL: ++ r->in_rotate_mode = 0x03; /* HAL_TRANSFORM_ROT_180 */ ++ break; ++ case TRANSPOSE_HFLIP: ++ r->in_rotate_mode = 0x01; /* HAL_TRANSFORM_FLIP_H */ ++ break; ++ case TRANSPOSE_VFLIP: ++ r->in_rotate_mode = 0x02; /* HAL_TRANSFORM_FLIP_V */ ++ break; ++ default: ++ av_log(ctx, AV_LOG_ERROR, "Failed to set transpose mode to %d\n", r->transpose); ++ return AVERROR(EINVAL); ++ } ++ } ++ ++ return 0; ++} ++ ++static av_cold void config_force_format(AVFilterContext *ctx, ++ enum AVPixelFormat in_format, ++ enum AVPixelFormat *out_format) ++{ ++ RGAVppContext *r = ctx->priv; ++ const AVPixFmtDescriptor *desc; ++ const char *rga_ver = NULL; ++ int has_rga3 = 0; ++ int out_depth, force_chroma; ++ int is_yuv, is_fully_planar; ++ ++ if (!out_format) ++ return; ++ ++ if (r->force_yuv == FORCE_YUV_AUTO) ++ out_depth = (in_format == AV_PIX_FMT_NV15 || ++ in_format == AV_PIX_FMT_NV20) ? 10 : 0; ++ else ++ out_depth = (r->force_yuv == FORCE_YUV_8BIT) ? 8 : ++ (r->force_yuv == FORCE_YUV_10BIT) ? 10 : 0; ++ ++ if (!out_depth) ++ return; ++ ++ /* Auto fallback to 8-bit fmts on RGA2 */ ++ rga_ver = querystring(RGA_VERSION); ++ has_rga3 = !!strstr(rga_ver, "RGA_3"); ++ if (out_depth >= 10 && !has_rga3) ++ out_depth = 8; ++ ++ desc = av_pix_fmt_desc_get(in_format); ++ is_yuv = !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components >= 2; ++ ++ force_chroma = r->force_chroma; ++ if (is_yuv && force_chroma == FORCE_CHROMA_AUTO) { ++ is_fully_planar = (desc->flags & AV_PIX_FMT_FLAG_PLANAR) && ++ desc->comp[1].plane != desc->comp[2].plane; ++ if (desc->log2_chroma_w == 1 && desc->log2_chroma_h == 1) ++ force_chroma = is_fully_planar ? FORCE_CHROMA_420P : FORCE_CHROMA_420SP; ++ else if (desc->log2_chroma_w == 1 && !desc->log2_chroma_h) ++ force_chroma = is_fully_planar ? FORCE_CHROMA_422P : FORCE_CHROMA_422SP; ++ } ++ ++ switch (force_chroma) { ++ case FORCE_CHROMA_422P: ++ *out_format = AV_PIX_FMT_YUV422P; ++ break; ++ case FORCE_CHROMA_422SP: ++ *out_format = out_depth == 10 ? ++ AV_PIX_FMT_P210 : AV_PIX_FMT_NV16; ++ break; ++ case FORCE_CHROMA_420P: ++ *out_format = AV_PIX_FMT_YUV420P; ++ break; ++ case FORCE_CHROMA_420SP: ++ default: ++ *out_format = out_depth == 10 ? ++ AV_PIX_FMT_P010 : AV_PIX_FMT_NV12; ++ } ++} ++ ++static av_cold int rgavpp_config_props(AVFilterLink *outlink) ++{ ++ AVFilterContext *ctx = outlink->src; ++ RGAVppContext *r = ctx->priv; ++ AVFilterLink *inlink = ctx->inputs[0]; ++ AVHWFramesContext *in_frames_ctx; ++ enum AVPixelFormat in_format; ++ enum AVPixelFormat out_format; ++ RKRGAParam param = { NULL }; ++ int ret; ++ ++ if (!inlink->hw_frames_ctx) { ++ av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n"); ++ return AVERROR(EINVAL); ++ } ++ in_frames_ctx = (AVHWFramesContext *)inlink->hw_frames_ctx->data; ++ in_format = in_frames_ctx->sw_format; ++ out_format = (r->format == AV_PIX_FMT_NONE) ? in_format : r->format; ++ ++ config_force_format(ctx, in_format, &out_format); ++ ++ ret = set_size_info(ctx, inlink, outlink); ++ if (ret < 0) ++ return ret; ++ ++ param.filter_frame = NULL; ++ param.out_sw_format = out_format; ++ param.in_rotate_mode = r->in_rotate_mode; ++ param.in_crop = r->crop; ++ param.in_crop_x = r->act_x; ++ param.in_crop_y = r->act_y; ++ param.in_crop_w = r->act_w; ++ param.in_crop_h = r->act_h; ++ ++ ret = ff_rkrga_init(ctx, ¶m); ++ if (ret < 0) ++ return ret; ++ ++ av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d fmt:%s -> w:%d h:%d fmt:%s\n", ++ inlink->w, inlink->h, av_get_pix_fmt_name(in_format), ++ outlink->w, outlink->h, av_get_pix_fmt_name(out_format)); ++ ++ return 0; ++} ++ ++static int rgavpp_activate(AVFilterContext *ctx) ++{ ++ AVFilterLink *inlink = ctx->inputs[0]; ++ AVFilterLink *outlink = ctx->outputs[0]; ++ RGAVppContext *r = ctx->priv; ++ AVFrame *in = NULL; ++ int ret, at_eof = 0, status = 0; ++ int64_t pts = AV_NOPTS_VALUE; ++ ++ FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); ++ ++ if (r->rga.eof) ++ at_eof = 1; ++ else { ++ ret = ff_inlink_consume_frame(inlink, &in); ++ if (ret < 0) ++ return ret; ++ ++ if (ff_inlink_acknowledge_status(inlink, &status, &pts)) { ++ if (status == AVERROR_EOF) { ++ at_eof = 1; ++ } ++ } ++ } ++ ++ if (in) { ++ ret = ff_rkrga_filter_frame(&r->rga, inlink, in, NULL, NULL); ++ av_frame_free(&in); ++ if (ret < 0) ++ return ret; ++ else if (!r->rga.got_frame) ++ goto not_ready; ++ ++ if (at_eof) { ++ r->rga.eof = 1; ++ goto eof; ++ } ++ ++ if (r->rga.got_frame) { ++ r->rga.got_frame = 0; ++ return 0; ++ } ++ } ++ ++not_ready: ++ if (at_eof) { ++ r->rga.eof = 1; ++ goto eof; ++ } ++ ++ FF_FILTER_FORWARD_WANTED(outlink, inlink); ++ return FFERROR_NOT_READY; ++ ++eof: ++ ff_rkrga_filter_frame(&r->rga, inlink, NULL, NULL, NULL); ++ ++ pts = av_rescale_q(pts, inlink->time_base, outlink->time_base); ++ ff_outlink_set_status(outlink, AVERROR_EOF, pts); ++ return 0; ++} ++ ++static av_cold int rgavpp_init(AVFilterContext *ctx) ++{ ++ return 0; ++} ++ ++static av_cold void rgavpp_uninit(AVFilterContext *ctx) ++{ ++ ff_rkrga_close(ctx); ++} ++ ++#define OFFSET(x) offsetof(RGAVppContext, x) ++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) ++ ++#define RKRGA_VPP_COMMON_OPTS \ ++ { "force_yuv", "Enforce planar YUV format output", OFFSET(force_yuv), AV_OPT_TYPE_INT, { .i64 = FORCE_YUV_DISABLE }, 0, FORCE_YUV_NB - 1, FLAGS, .unit = "force_yuv" }, \ ++ { "disable", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FORCE_YUV_DISABLE }, 0, 0, FLAGS, .unit = "force_yuv" }, \ ++ { "auto", "Match in/out bit depth", 0, AV_OPT_TYPE_CONST, { .i64 = FORCE_YUV_AUTO }, 0, 0, FLAGS, .unit = "force_yuv" }, \ ++ { "8bit", "8-bit", 0, AV_OPT_TYPE_CONST, { .i64 = FORCE_YUV_8BIT }, 0, 0, FLAGS, .unit = "force_yuv" }, \ ++ { "10bit", "10-bit uncompact/8-bit", 0, AV_OPT_TYPE_CONST, { .i64 = FORCE_YUV_10BIT }, 0, 0, FLAGS, .unit = "force_yuv" }, \ ++ { "force_chroma", "Enforce chroma of planar YUV format output", OFFSET(force_chroma), AV_OPT_TYPE_INT, { .i64 = FORCE_CHROMA_AUTO }, 0, FORCE_CHROMA_NB - 1, FLAGS, .unit = "force_chroma" }, \ ++ { "auto", "Match in/out chroma", 0, AV_OPT_TYPE_CONST, { .i64 = FORCE_CHROMA_AUTO }, 0, 0, FLAGS, .unit = "force_chroma" }, \ ++ { "420sp", "4:2:0 semi-planar", 0, AV_OPT_TYPE_CONST, { .i64 = FORCE_CHROMA_420SP }, 0, 0, FLAGS, .unit = "force_chroma" }, \ ++ { "420p", "4:2:0 fully-planar", 0, AV_OPT_TYPE_CONST, { .i64 = FORCE_CHROMA_420P }, 0, 0, FLAGS, .unit = "force_chroma" }, \ ++ { "422sp", "4:2:2 semi-planar", 0, AV_OPT_TYPE_CONST, { .i64 = FORCE_CHROMA_422SP }, 0, 0, FLAGS, .unit = "force_chroma" }, \ ++ { "422p", "4:2:2 fully-planar", 0, AV_OPT_TYPE_CONST, { .i64 = FORCE_CHROMA_422P }, 0, 0, FLAGS, .unit = "force_chroma" }, \ ++ { "core", "Set multicore RGA scheduler core [use with caution]", OFFSET(rga.scheduler_core), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, INT_MAX, FLAGS, .unit = "core" }, \ ++ { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, .unit = "core" }, \ ++ { "rga3_core0", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, .unit = "core" }, /* RGA3_SCHEDULER_CORE0 */ \ ++ { "rga3_core1", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0, FLAGS, .unit = "core" }, /* RGA3_SCHEDULER_CORE1 */ \ ++ { "rga2_core0", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 4 }, 0, 0, FLAGS, .unit = "core" }, /* RGA2_SCHEDULER_CORE0 */ \ ++ { "async_depth", "Set the internal parallelization depth", OFFSET(rga.async_depth), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, 4, .flags = FLAGS }, \ ++ { "afbc", "Enable AFBC (Arm Frame Buffer Compression) to save bandwidth", OFFSET(rga.afbc_out), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, .flags = FLAGS }, ++ ++static const AVFilterPad rgavpp_inputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ }, ++}; ++ ++static const AVFilterPad rgavpp_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = rgavpp_config_props, ++ }, ++}; ++ ++#if CONFIG_SCALE_RKRGA_FILTER ++ ++static const AVOption rgascale_options[] = { ++ { "w", "Output video width", OFFSET(ow), AV_OPT_TYPE_STRING, { .str = "iw" }, 0, 0, FLAGS }, ++ { "h", "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { .str = "ih" }, 0, 0, FLAGS }, ++ { "format", "Output video pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, { .i64 = AV_PIX_FMT_NONE }, INT_MIN, INT_MAX, .flags = FLAGS }, ++ { "force_original_aspect_ratio", "Decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 2, FLAGS, .unit = "force_oar" }, \ ++ { "disable", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, .unit = "force_oar" }, \ ++ { "decrease", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, .unit = "force_oar" }, \ ++ { "increase", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0, FLAGS, .unit = "force_oar" }, \ ++ { "force_divisible_by", "Enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, 256, FLAGS }, \ ++ RKRGA_VPP_COMMON_OPTS ++ { NULL }, ++}; ++ ++static av_cold int rgascale_preinit(AVFilterContext *ctx) ++{ ++ RGAVppContext *r = ctx->priv; ++ ++ r->transpose = -1; ++ return 0; ++} ++ ++AVFILTER_DEFINE_CLASS(rgascale); ++ ++const AVFilter ff_vf_scale_rkrga = { ++ .name = "scale_rkrga", ++ .description = NULL_IF_CONFIG_SMALL("Rockchip RGA (2D Raster Graphic Acceleration) video resizer and format converter"), ++ .priv_size = sizeof(RGAVppContext), ++ .priv_class = &rgascale_class, ++ .preinit = rgascale_preinit, ++ .init = rgavpp_init, ++ .uninit = rgavpp_uninit, ++ FILTER_INPUTS(rgavpp_inputs), ++ FILTER_OUTPUTS(rgavpp_outputs), ++ FILTER_SINGLE_PIXFMT(AV_PIX_FMT_DRM_PRIME), ++ .activate = rgavpp_activate, ++ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, ++}; ++ ++#endif ++ ++#if CONFIG_VPP_RKRGA_FILTER ++ ++static const AVOption rgavpp_options[] = { ++ { "w", "Output video width", OFFSET(ow), AV_OPT_TYPE_STRING, { .str = "cw" }, 0, 0, FLAGS }, ++ { "h", "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { .str = "w*ch/cw" }, 0, 0, FLAGS }, ++ { "cw", "Set the width crop area expression", OFFSET(cw), AV_OPT_TYPE_STRING, { .str = "iw" }, 0, 0, FLAGS }, ++ { "ch", "Set the height crop area expression", OFFSET(ch), AV_OPT_TYPE_STRING, { .str = "ih" }, 0, 0, FLAGS }, ++ { "cx", "Set the x crop area expression", OFFSET(cx), AV_OPT_TYPE_STRING, { .str = "(in_w-out_w)/2" }, 0, 0, FLAGS }, ++ { "cy", "Set the y crop area expression", OFFSET(cy), AV_OPT_TYPE_STRING, { .str = "(in_h-out_h)/2" }, 0, 0, FLAGS }, ++ { "format", "Output video pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, { .i64 = AV_PIX_FMT_NONE }, INT_MIN, INT_MAX, .flags = FLAGS }, ++ { "transpose", "Set transpose direction", OFFSET(transpose), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 6, FLAGS, .unit = "transpose" }, ++ { "cclock_hflip", "Rotate counter-clockwise with horizontal flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 0, FLAGS, .unit = "transpose" }, ++ { "clock", "Rotate clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK }, 0, 0, FLAGS, .unit = "transpose" }, ++ { "cclock", "Rotate counter-clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK }, 0, 0, FLAGS, .unit = "transpose" }, ++ { "clock_hflip", "Rotate clockwise with horizontal flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP }, 0, 0, FLAGS, .unit = "transpose" }, ++ { "reversal", "Rotate by half-turn", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_REVERSAL }, 0, 0, FLAGS, .unit = "transpose" }, ++ { "hflip", "Flip horizontally", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_HFLIP }, 0, 0, FLAGS, .unit = "transpose" }, ++ { "vflip", "Flip vertically", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_VFLIP }, 0, 0, FLAGS, .unit = "transpose" }, ++ RKRGA_VPP_COMMON_OPTS ++ { NULL }, ++}; ++ ++AVFILTER_DEFINE_CLASS(rgavpp); ++ ++const AVFilter ff_vf_vpp_rkrga = { ++ .name = "vpp_rkrga", ++ .description = NULL_IF_CONFIG_SMALL("Rockchip RGA (2D Raster Graphic Acceleration) video post-process (scale/crop/transpose)"), ++ .priv_size = sizeof(RGAVppContext), ++ .priv_class = &rgavpp_class, ++ .init = rgavpp_init, ++ .uninit = rgavpp_uninit, ++ FILTER_INPUTS(rgavpp_inputs), ++ FILTER_OUTPUTS(rgavpp_outputs), ++ FILTER_SINGLE_PIXFMT(AV_PIX_FMT_DRM_PRIME), ++ .activate = rgavpp_activate, ++ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, ++}; ++ ++#endif +Index: FFmpeg/libavutil/Makefile +=================================================================== +--- libavutil/Makefile ++++ libavutil/Makefile +@@ -52,6 +52,7 @@ HEADERS = adler32.h + hwcontext_videotoolbox.h \ + hwcontext_vdpau.h \ + hwcontext_vulkan.h \ ++ hwcontext_rkmpp.h \ + iamf.h \ + imgutils.h \ + intfloat.h \ +@@ -205,6 +206,7 @@ OBJS-$(CONFIG_VAAPI) + OBJS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.o + OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o + OBJS-$(CONFIG_VULKAN) += hwcontext_vulkan.o vulkan.o ++OBJS-$(CONFIG_RKMPP) += hwcontext_rkmpp.o + + OBJS-$(!CONFIG_VULKAN) += hwcontext_stub.o + +@@ -228,6 +230,7 @@ SKIPHEADERS-$(CONFIG_VDPAU) + + SKIPHEADERS-$(CONFIG_VULKAN) += hwcontext_vulkan.h vulkan.h \ + vulkan_functions.h \ + vulkan_loader.h ++SKIPHEADERS-$(CONFIG_RKMPP) += hwcontext_rkmpp.h + + TESTPROGS = adler32 \ + aes \ +Index: FFmpeg/libavutil/hwcontext.c +=================================================================== +--- libavutil/hwcontext.c ++++ libavutil/hwcontext.c +@@ -66,6 +66,9 @@ static const HWContextType * const hw_ta + #if CONFIG_VULKAN + &ff_hwcontext_type_vulkan, + #endif ++#if CONFIG_RKMPP ++ &ff_hwcontext_type_rkmpp, ++#endif + NULL, + }; + +@@ -82,6 +85,7 @@ static const char *const hw_type_names[] + [AV_HWDEVICE_TYPE_VIDEOTOOLBOX] = "videotoolbox", + [AV_HWDEVICE_TYPE_MEDIACODEC] = "mediacodec", + [AV_HWDEVICE_TYPE_VULKAN] = "vulkan", ++ [AV_HWDEVICE_TYPE_RKMPP] = "rkmpp", + }; + + enum AVHWDeviceType av_hwdevice_find_type_by_name(const char *name) +Index: FFmpeg/libavutil/hwcontext.h +=================================================================== +--- libavutil/hwcontext.h ++++ libavutil/hwcontext.h +@@ -38,6 +38,7 @@ enum AVHWDeviceType { + AV_HWDEVICE_TYPE_MEDIACODEC, + AV_HWDEVICE_TYPE_VULKAN, + AV_HWDEVICE_TYPE_D3D12VA, ++ AV_HWDEVICE_TYPE_RKMPP, + AV_HWDEVICE_TYPE_NB, ///< number of hw device types, not part of API/ABI. + }; + +Index: FFmpeg/libavutil/hwcontext_internal.h +=================================================================== +--- libavutil/hwcontext_internal.h ++++ libavutil/hwcontext_internal.h +@@ -163,6 +163,7 @@ extern const HWContextType ff_hwcontext_ + extern const HWContextType ff_hwcontext_type_videotoolbox; + extern const HWContextType ff_hwcontext_type_mediacodec; + extern const HWContextType ff_hwcontext_type_vulkan; ++extern const HWContextType ff_hwcontext_type_rkmpp; + + typedef struct FFHWDeviceContext { + /** +Index: FFmpeg/libavutil/hwcontext_opencl.c +=================================================================== +--- libavutil/hwcontext_opencl.c ++++ libavutil/hwcontext_opencl.c +@@ -83,6 +83,16 @@ typedef CL_API_ENTRY cl_mem(CL_API_CALL + #include + #include + #include "hwcontext_drm.h" ++ ++typedef intptr_t cl_import_properties_arm; ++typedef CL_API_ENTRY cl_mem(CL_API_CALL *clImportMemoryARM_fn)( ++ cl_context context, ++ cl_mem_flags flags, ++ const cl_import_properties_arm *properties, ++ void *memory, ++ size_t size, ++ cl_int *errcode_ret); ++ + #endif + + #if HAVE_OPENCL_VIDEOTOOLBOX +@@ -159,6 +169,8 @@ typedef struct OpenCLDeviceContext { + + #if HAVE_OPENCL_DRM_ARM + int drm_arm_mapping_usable; ++ clImportMemoryARM_fn ++ clImportMemoryARM; + #endif + } OpenCLDeviceContext; + +@@ -942,7 +954,8 @@ static int opencl_device_init(AVHWDevice + fail = 1; + } + +- // clImportMemoryARM() is linked statically. ++ CL_FUNC(clImportMemoryARM, ++ "DRM to OpenCL mapping on ARM"); + + if (fail) { + av_log(hwdev, AV_LOG_WARNING, "DRM to OpenCL mapping on ARM " +@@ -1419,6 +1432,7 @@ static int opencl_device_derive(AVHWDevi + + #if HAVE_OPENCL_DRM_ARM + case AV_HWDEVICE_TYPE_DRM: ++ case AV_HWDEVICE_TYPE_RKMPP: + { + OpenCLDeviceSelector selector = { + .platform_index = -1, +@@ -3237,7 +3251,8 @@ static int opencl_map_from_drm_arm(AVHWF + { + AVHWFramesContext *src_fc = + (AVHWFramesContext*)src->hw_frames_ctx->data; +- AVOpenCLDeviceContext *dst_dev = dst_fc->device_ctx->hwctx; ++ OpenCLDeviceContext *device_priv = dst_fc->device_ctx->hwctx; ++ AVOpenCLDeviceContext *dst_dev = &device_priv->p; + const AVDRMFrameDescriptor *desc; + DRMARMtoOpenCLMapping *mapping = NULL; + cl_mem_flags cl_flags; +@@ -3271,8 +3286,8 @@ static int opencl_map_from_drm_arm(AVHWF + } + + mapping->object_buffers[i] = +- clImportMemoryARM(dst_dev->context, cl_flags, props, +- &fd, desc->objects[i].size, &cle); ++ device_priv->clImportMemoryARM(dst_dev->context, cl_flags, props, ++ &fd, desc->objects[i].size, &cle); + if (!mapping->object_buffers[i]) { + av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL buffer " + "from object %d (fd %d, size %"SIZE_SPECIFIER") of DRM frame: %d.\n", +@@ -3303,6 +3318,8 @@ static int opencl_map_from_drm_arm(AVHWF + goto fail; + } + ++ image_desc.image_row_pitch = plane->pitch; ++ + region.origin = plane->offset; + region.size = image_desc.image_row_pitch * + image_desc.image_height; +Index: FFmpeg/libavutil/hwcontext_rkmpp.c +=================================================================== +--- /dev/null ++++ libavutil/hwcontext_rkmpp.c +@@ -0,0 +1,598 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "config.h" ++ ++#define _GNU_SOURCE ++#include ++#include ++#include ++ ++/* This was introduced in version 4.6. And may not exist all without an ++ * optional package. So to prevent a hard dependency on needing the Linux ++ * kernel headers to compile, make this optional. */ ++#if HAVE_LINUX_DMA_BUF_H ++#include ++#include ++#endif ++ ++#include "avassert.h" ++#include "hwcontext.h" ++#include "hwcontext_rkmpp.h" ++#include "hwcontext_internal.h" ++#include "imgutils.h" ++ ++static const struct { ++ enum AVPixelFormat pixfmt; ++ uint32_t drm_format; ++} supported_formats[] = { ++ /* grayscale */ ++ { AV_PIX_FMT_GRAY8, DRM_FORMAT_R8 }, ++ /* fully-planar YUV */ ++ { AV_PIX_FMT_YUV420P, DRM_FORMAT_YUV420, }, ++ { AV_PIX_FMT_YUV422P, DRM_FORMAT_YUV422, }, ++ { AV_PIX_FMT_YUV444P, DRM_FORMAT_YUV444, }, ++ /* semi-planar YUV */ ++ { AV_PIX_FMT_NV12, DRM_FORMAT_NV12, }, ++ { AV_PIX_FMT_NV21, DRM_FORMAT_NV21, }, ++ { AV_PIX_FMT_NV16, DRM_FORMAT_NV16, }, ++ { AV_PIX_FMT_NV24, DRM_FORMAT_NV24, }, ++ /* semi-planar YUV 10-bit */ ++ { AV_PIX_FMT_P010, DRM_FORMAT_P010, }, ++ { AV_PIX_FMT_P210, DRM_FORMAT_P210, }, ++ { AV_PIX_FMT_NV15, DRM_FORMAT_NV15, }, ++ { AV_PIX_FMT_NV20, DRM_FORMAT_NV20, }, ++ /* packed YUV */ ++ { AV_PIX_FMT_YUYV422, DRM_FORMAT_YUYV, }, ++ { AV_PIX_FMT_YVYU422, DRM_FORMAT_YVYU, }, ++ { AV_PIX_FMT_UYVY422, DRM_FORMAT_UYVY, }, ++ /* packed RGB */ ++ { AV_PIX_FMT_RGB444LE, DRM_FORMAT_XRGB4444, }, ++ { AV_PIX_FMT_RGB444BE, DRM_FORMAT_XRGB4444 | DRM_FORMAT_BIG_ENDIAN, }, ++ { AV_PIX_FMT_BGR444LE, DRM_FORMAT_XBGR4444, }, ++ { AV_PIX_FMT_BGR444BE, DRM_FORMAT_XBGR4444 | DRM_FORMAT_BIG_ENDIAN, }, ++ { AV_PIX_FMT_RGB555LE, DRM_FORMAT_XRGB1555, }, ++ { AV_PIX_FMT_RGB555BE, DRM_FORMAT_XRGB1555 | DRM_FORMAT_BIG_ENDIAN, }, ++ { AV_PIX_FMT_BGR555LE, DRM_FORMAT_XBGR1555, }, ++ { AV_PIX_FMT_BGR555BE, DRM_FORMAT_XBGR1555 | DRM_FORMAT_BIG_ENDIAN, }, ++ { AV_PIX_FMT_RGB565LE, DRM_FORMAT_RGB565, }, ++ { AV_PIX_FMT_RGB565BE, DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN, }, ++ { AV_PIX_FMT_BGR565LE, DRM_FORMAT_BGR565, }, ++ { AV_PIX_FMT_BGR565BE, DRM_FORMAT_BGR565 | DRM_FORMAT_BIG_ENDIAN, }, ++ { AV_PIX_FMT_RGB24, DRM_FORMAT_RGB888, }, ++ { AV_PIX_FMT_BGR24, DRM_FORMAT_BGR888, }, ++ { AV_PIX_FMT_RGBA, DRM_FORMAT_ABGR8888, }, ++ { AV_PIX_FMT_RGB0, DRM_FORMAT_XBGR8888, }, ++ { AV_PIX_FMT_BGRA, DRM_FORMAT_ARGB8888, }, ++ { AV_PIX_FMT_BGR0, DRM_FORMAT_XRGB8888, }, ++ { AV_PIX_FMT_ARGB, DRM_FORMAT_BGRA8888, }, ++ { AV_PIX_FMT_0RGB, DRM_FORMAT_BGRX8888, }, ++ { AV_PIX_FMT_ABGR, DRM_FORMAT_RGBA8888, }, ++ { AV_PIX_FMT_0BGR, DRM_FORMAT_RGBX8888, }, ++ { AV_PIX_FMT_X2RGB10LE, DRM_FORMAT_XRGB2101010, }, ++ { AV_PIX_FMT_X2RGB10BE, DRM_FORMAT_XRGB2101010 | DRM_FORMAT_BIG_ENDIAN, }, ++ { AV_PIX_FMT_X2BGR10LE, DRM_FORMAT_XBGR2101010, }, ++ { AV_PIX_FMT_X2BGR10BE, DRM_FORMAT_XBGR2101010 | DRM_FORMAT_BIG_ENDIAN, }, ++}; ++ ++static int rkmpp_device_create(AVHWDeviceContext *hwdev, const char *device, ++ AVDictionary *opts, int flags) ++{ ++ AVRKMPPDeviceContext *hwctx = hwdev->hwctx; ++ AVDictionaryEntry *opt_d = NULL; ++ ++ hwctx->flags = MPP_BUFFER_FLAGS_DMA32 | MPP_BUFFER_FLAGS_CACHABLE; ++ ++ opt_d = av_dict_get(opts, "dma32", NULL, 0); ++ if (opt_d && !strtol(opt_d->value, NULL, 10)) ++ hwctx->flags &= ~MPP_BUFFER_FLAGS_DMA32; ++ ++ opt_d = av_dict_get(opts, "cacheable", NULL, 0); ++ if (opt_d && !strtol(opt_d->value, NULL, 10)) ++ hwctx->flags &= ~MPP_BUFFER_FLAGS_CACHABLE; ++ ++ return 0; ++} ++ ++static int rkmpp_frames_get_constraints(AVHWDeviceContext *hwdev, ++ const void *hwconfig, ++ AVHWFramesConstraints *constraints) ++{ ++ int i; ++ ++ constraints->min_width = 16; ++ constraints->min_height = 16; ++ ++ constraints->valid_hw_formats = ++ av_malloc_array(2, sizeof(enum AVPixelFormat)); ++ if (!constraints->valid_hw_formats) ++ return AVERROR(ENOMEM); ++ constraints->valid_hw_formats[0] = AV_PIX_FMT_DRM_PRIME; ++ constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE; ++ ++ constraints->valid_sw_formats = ++ av_malloc_array(FF_ARRAY_ELEMS(supported_formats) + 1, ++ sizeof(enum AVPixelFormat)); ++ if (!constraints->valid_sw_formats) ++ return AVERROR(ENOMEM); ++ for(i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) ++ constraints->valid_sw_formats[i] = supported_formats[i].pixfmt; ++ constraints->valid_sw_formats[i] = AV_PIX_FMT_NONE; ++ ++ return 0; ++} ++ ++static void rkmpp_free_drm_frame_descriptor(void *opaque, uint8_t *data) ++{ ++ ++ MppBuffer mpp_buf = opaque; ++ AVRKMPPDRMFrameDescriptor *desc = (AVRKMPPDRMFrameDescriptor *)data; ++ int ret; ++ ++ if (!desc) ++ return; ++ ++ if (mpp_buf) { ++ ret = mpp_buffer_put(mpp_buf); ++ if (ret != MPP_OK) ++ av_log(NULL, AV_LOG_WARNING, ++ "Failed to put MPP buffer: %d\n", ret); ++ } ++ ++ memset(desc, 0, sizeof(*desc)); ++ av_free(desc); ++} ++ ++static int rkmpp_get_aligned_linesize(enum AVPixelFormat pix_fmt, int width, int plane) ++{ ++ const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(pix_fmt); ++ const int is_rgb = pixdesc->flags & AV_PIX_FMT_FLAG_RGB; ++ const int is_yuv = !is_rgb && pixdesc->nb_components >= 2; ++ const int is_planar = pixdesc->flags & AV_PIX_FMT_FLAG_PLANAR; ++ const int is_packed_fmt = is_rgb || (!is_rgb && !is_planar); ++ const int is_fully_planar = is_planar && ++ pixdesc->comp[1].plane != pixdesc->comp[2].plane; ++ int linesize; ++ ++ if (pix_fmt == AV_PIX_FMT_NV15 || ++ pix_fmt == AV_PIX_FMT_NV20) { ++ const int log2_chroma_w = plane == 1 ? 1 : 0; ++ const int width_align_256_odds = FFALIGN(width << log2_chroma_w, 256) | 256; ++ return FFALIGN(width_align_256_odds * 10 / 8, 64); ++ } ++ ++ linesize = av_image_get_linesize(pix_fmt, width, plane); ++ ++ if (is_packed_fmt) { ++ const int pixel_width = av_get_padded_bits_per_pixel(pixdesc) / 8; ++ linesize = FFALIGN(linesize / pixel_width, 8) * pixel_width; ++ } else if (is_yuv && is_fully_planar) { ++ linesize = FFALIGN(linesize, 8); ++ } else ++ linesize = FFALIGN(linesize, 64); ++ ++ return linesize; ++} ++ ++static AVBufferRef *rkmpp_drm_pool_alloc(void *opaque, size_t size) ++{ ++ int ret; ++ AVHWFramesContext *hwfc = opaque; ++ AVRKMPPFramesContext *avfc = hwfc->hwctx; ++ AVRKMPPDeviceContext *hwctx = hwfc->device_ctx->hwctx; ++ AVRKMPPDRMFrameDescriptor *desc; ++ AVDRMLayerDescriptor *layer; ++ AVBufferRef *ref; ++ ++ int i; ++ const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(hwfc->sw_format); ++ const int bits_pp = av_get_padded_bits_per_pixel(pixdesc); ++ const int aligned_w = FFALIGN(hwfc->width * 5 / 4, 64); ++ const int aligned_h = FFALIGN(hwfc->height * 5 / 4, 64); ++ ++ MppBuffer mpp_buf = NULL; ++ size_t mpp_buf_size = aligned_w * aligned_h * bits_pp / 8; ++ ++ if (hwfc->initial_pool_size > 0 && ++ avfc->nb_frames >= hwfc->initial_pool_size) ++ return NULL; ++ ++ desc = av_mallocz(sizeof(*desc)); ++ if (!desc) ++ return NULL; ++ ++ desc->drm_desc.nb_objects = 1; ++ desc->drm_desc.nb_layers = 1; ++ ++ ret = mpp_buffer_get(avfc->buf_group, &mpp_buf, mpp_buf_size); ++ if (ret != MPP_OK || !mpp_buf) { ++ av_log(hwctx, AV_LOG_ERROR, "Failed to get MPP buffer: %d\n", ret); ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ desc->buffers[0] = mpp_buf; ++ ++ desc->drm_desc.objects[0].fd = mpp_buffer_get_fd(mpp_buf); ++ desc->drm_desc.objects[0].size = mpp_buffer_get_size(mpp_buf); ++ ++ layer = &desc->drm_desc.layers[0]; ++ for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) { ++ if (supported_formats[i].pixfmt == hwfc->sw_format) { ++ layer->format = supported_formats[i].drm_format; ++ break; ++ } ++ } ++ layer->nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); ++ layer->planes[0].object_index = 0; ++ layer->planes[0].offset = 0; ++ layer->planes[0].pitch = ++ rkmpp_get_aligned_linesize(hwfc->sw_format, hwfc->width, 0); ++ ++ for (i = 1; i < layer->nb_planes; i++) { ++ layer->planes[i].object_index = 0; ++ layer->planes[i].offset = ++ layer->planes[i-1].offset + ++ layer->planes[i-1].pitch * (FFALIGN(hwfc->height, 2) >> (i > 1 ? pixdesc->log2_chroma_h : 0)); ++ layer->planes[i].pitch = ++ rkmpp_get_aligned_linesize(hwfc->sw_format, hwfc->width, i); ++ } ++ ++ ref = av_buffer_create((uint8_t*)desc, sizeof(*desc), rkmpp_free_drm_frame_descriptor, ++ mpp_buf, 0); ++ if (!ref) { ++ av_log(hwfc, AV_LOG_ERROR, "Failed to create RKMPP buffer.\n"); ++ goto fail; ++ } ++ ++ if (hwfc->initial_pool_size > 0) { ++ av_assert0(avfc->nb_frames < hwfc->initial_pool_size); ++ memcpy(&avfc->frames[avfc->nb_frames], desc, sizeof(*desc)); ++ ++avfc->nb_frames; ++ } ++ ++ return ref; ++ ++fail: ++ rkmpp_free_drm_frame_descriptor(mpp_buf, (uint8_t *)desc); ++ return NULL; ++} ++ ++static int rkmpp_frames_init(AVHWFramesContext *hwfc) ++{ ++ AVRKMPPFramesContext *avfc = hwfc->hwctx; ++ AVRKMPPDeviceContext *hwctx = hwfc->device_ctx->hwctx; ++ int i, ret; ++ ++ if (hwfc->pool) ++ return 0; ++ ++ for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) ++ if (supported_formats[i].pixfmt == hwfc->sw_format) ++ break; ++ if (i >= FF_ARRAY_ELEMS(supported_formats)) { ++ av_log(hwfc, AV_LOG_ERROR, "Unsupported format: %s.\n", ++ av_get_pix_fmt_name(hwfc->sw_format)); ++ return AVERROR(EINVAL); ++ } ++ ++ avfc->nb_frames = 0; ++ avfc->frames = NULL; ++ if (hwfc->initial_pool_size > 0) { ++ avfc->frames = av_malloc(hwfc->initial_pool_size * ++ sizeof(*avfc->frames)); ++ if (!avfc->frames) ++ return AVERROR(ENOMEM); ++ } ++ ++ ret = mpp_buffer_group_get_internal(&avfc->buf_group, MPP_BUFFER_TYPE_DRM | hwctx->flags); ++ if (ret != MPP_OK) { ++ av_log(hwfc, AV_LOG_ERROR, "Failed to get MPP internal buffer group: %d\n", ret); ++ return AVERROR_EXTERNAL; ++ } ++ ++ ffhwframesctx(hwfc)->pool_internal = ++ av_buffer_pool_init2(sizeof(AVRKMPPDRMFrameDescriptor), hwfc, ++ rkmpp_drm_pool_alloc, NULL); ++ if (!ffhwframesctx(hwfc)->pool_internal) { ++ av_log(hwfc, AV_LOG_ERROR, "Failed to create RKMPP buffer pool.\n"); ++ return AVERROR(ENOMEM); ++ } ++ ++ return 0; ++} ++ ++static void rkmpp_frames_uninit(AVHWFramesContext *hwfc) ++{ ++ AVRKMPPFramesContext *avfc = hwfc->hwctx; ++ ++ av_freep(&avfc->frames); ++ ++ if (avfc->buf_group) { ++ mpp_buffer_group_put(avfc->buf_group); ++ avfc->buf_group = NULL; ++ } ++} ++ ++static int rkmpp_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame) ++{ ++ frame->buf[0] = av_buffer_pool_get(hwfc->pool); ++ if (!frame->buf[0]) ++ return AVERROR(ENOMEM); ++ ++ frame->data[0] = (uint8_t*)frame->buf[0]->data; ++ ++ frame->format = AV_PIX_FMT_DRM_PRIME; ++ frame->width = hwfc->width; ++ frame->height = hwfc->height; ++ ++ return 0; ++} ++ ++typedef struct RKMPPDRMMapping { ++ // Address and length of each mmap()ed region. ++ int nb_regions; ++ int sync_flags; ++ int object[AV_DRM_MAX_PLANES]; ++ void *address[AV_DRM_MAX_PLANES]; ++ size_t length[AV_DRM_MAX_PLANES]; ++ int unmap[AV_DRM_MAX_PLANES]; ++} RKMPPDRMMapping; ++ ++static void rkmpp_unmap_frame(AVHWFramesContext *hwfc, ++ HWMapDescriptor *hwmap) ++{ ++ AVRKMPPDeviceContext *hwctx = hwfc->device_ctx->hwctx; ++ RKMPPDRMMapping *map = hwmap->priv; ++ ++ for (int i = 0; i < map->nb_regions; i++) { ++#if HAVE_LINUX_DMA_BUF_H ++ struct dma_buf_sync sync = { .flags = DMA_BUF_SYNC_END | map->sync_flags }; ++ if (hwctx->flags & MPP_BUFFER_FLAGS_CACHABLE) ++ ioctl(map->object[i], DMA_BUF_IOCTL_SYNC, &sync); ++#endif ++ if (map->address[i] && map->unmap[i]) ++ munmap(map->address[i], map->length[i]); ++ } ++ ++ av_free(map); ++} ++ ++static int rkmpp_map_frame(AVHWFramesContext *hwfc, ++ AVFrame *dst, const AVFrame *src, int flags) ++{ ++ AVRKMPPDeviceContext *hwctx = hwfc->device_ctx->hwctx; ++ const AVRKMPPDRMFrameDescriptor *desc = (AVRKMPPDRMFrameDescriptor *)src->data[0]; ++#if HAVE_LINUX_DMA_BUF_H ++ struct dma_buf_sync sync_start = { 0 }; ++#endif ++ RKMPPDRMMapping *map; ++ int err, i, p, plane; ++ int mmap_prot; ++ void *addr; ++ ++ map = av_mallocz(sizeof(*map)); ++ if (!map) ++ return AVERROR(ENOMEM); ++ ++ mmap_prot = 0; ++ if (flags & AV_HWFRAME_MAP_READ) ++ mmap_prot |= PROT_READ; ++ if (flags & AV_HWFRAME_MAP_WRITE) ++ mmap_prot |= PROT_WRITE; ++ ++#if HAVE_LINUX_DMA_BUF_H ++ if (flags & AV_HWFRAME_MAP_READ) ++ map->sync_flags |= DMA_BUF_SYNC_READ; ++ if (flags & AV_HWFRAME_MAP_WRITE) ++ map->sync_flags |= DMA_BUF_SYNC_WRITE; ++ sync_start.flags = DMA_BUF_SYNC_START | map->sync_flags; ++#endif ++ ++ if (desc->drm_desc.objects[0].format_modifier != DRM_FORMAT_MOD_LINEAR) { ++ av_log(hwfc, AV_LOG_ERROR, "Transfer non-linear DRM_PRIME frame is not supported!\n"); ++ return AVERROR(ENOSYS); ++ } ++ ++ av_assert0(desc->drm_desc.nb_objects <= AV_DRM_MAX_PLANES); ++ for (i = 0; i < desc->drm_desc.nb_objects; i++) { ++ addr = NULL; ++ if (desc->buffers[i]) ++ addr = mpp_buffer_get_ptr(desc->buffers[i]); ++ ++ if (addr) { ++ map->unmap[i] = 0; ++ } else { ++ addr = mmap(NULL, desc->drm_desc.objects[i].size, mmap_prot, MAP_SHARED, ++ desc->drm_desc.objects[i].fd, 0); ++ if (addr == MAP_FAILED) { ++ err = AVERROR(errno); ++ av_log(hwfc, AV_LOG_ERROR, "Failed to map RKMPP object %d to " ++ "memory: %d.\n", desc->drm_desc.objects[i].fd, errno); ++ goto fail; ++ } ++ map->unmap[i] = 1; ++ } ++ ++ map->address[i] = addr; ++ map->length[i] = desc->drm_desc.objects[i].size; ++ map->object[i] = desc->drm_desc.objects[i].fd; ++ ++#if HAVE_LINUX_DMA_BUF_H ++ /* We're not checking for errors here because the kernel may not ++ * support the ioctl, in which case its okay to carry on */ ++ if (hwctx->flags & MPP_BUFFER_FLAGS_CACHABLE) ++ ioctl(desc->drm_desc.objects[i].fd, DMA_BUF_IOCTL_SYNC, &sync_start); ++#endif ++ } ++ map->nb_regions = i; ++ ++ plane = 0; ++ for (i = 0; i < desc->drm_desc.nb_layers; i++) { ++ const AVDRMLayerDescriptor *layer = &desc->drm_desc.layers[i]; ++ for (p = 0; p < layer->nb_planes; p++) { ++ dst->data[plane] = ++ (uint8_t*)map->address[layer->planes[p].object_index] + ++ layer->planes[p].offset; ++ dst->linesize[plane] = layer->planes[p].pitch; ++ ++plane; ++ } ++ } ++ av_assert0(plane <= AV_DRM_MAX_PLANES); ++ ++ dst->width = src->width; ++ dst->height = src->height; ++ ++ err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, ++ &rkmpp_unmap_frame, map); ++ if (err < 0) ++ goto fail; ++ ++ return 0; ++ ++fail: ++ for (i = 0; i < desc->drm_desc.nb_objects; i++) { ++ if (map->address[i] && map->unmap[i]) ++ munmap(map->address[i], map->length[i]); ++ } ++ av_free(map); ++ return err; ++} ++ ++static int rkmpp_transfer_get_formats(AVHWFramesContext *ctx, ++ enum AVHWFrameTransferDirection dir, ++ enum AVPixelFormat **formats) ++{ ++ enum AVPixelFormat *pix_fmts; ++ ++ pix_fmts = av_malloc_array(2, sizeof(*pix_fmts)); ++ if (!pix_fmts) ++ return AVERROR(ENOMEM); ++ ++ pix_fmts[0] = ctx->sw_format; ++ pix_fmts[1] = AV_PIX_FMT_NONE; ++ ++ *formats = pix_fmts; ++ return 0; ++} ++ ++static int rkmpp_transfer_data_from(AVHWFramesContext *hwfc, ++ AVFrame *dst, const AVFrame *src) ++{ ++ AVFrame *map; ++ int err; ++ ++ if (dst->width > hwfc->width || dst->height > hwfc->height) ++ return AVERROR(EINVAL); ++ ++ map = av_frame_alloc(); ++ if (!map) ++ return AVERROR(ENOMEM); ++ map->format = dst->format; ++ ++ err = rkmpp_map_frame(hwfc, map, src, AV_HWFRAME_MAP_READ); ++ if (err) ++ goto fail; ++ ++ map->width = dst->width; ++ map->height = dst->height; ++ ++ err = av_frame_copy(dst, map); ++ if (err) ++ goto fail; ++ ++ err = 0; ++fail: ++ av_frame_free(&map); ++ return err; ++} ++ ++static int rkmpp_transfer_data_to(AVHWFramesContext *hwfc, ++ AVFrame *dst, const AVFrame *src) ++{ ++ AVFrame *map; ++ int err; ++ ++ if (src->width > hwfc->width || src->height > hwfc->height) ++ return AVERROR(EINVAL); ++ ++ map = av_frame_alloc(); ++ if (!map) ++ return AVERROR(ENOMEM); ++ map->format = src->format; ++ ++ err = rkmpp_map_frame(hwfc, map, dst, AV_HWFRAME_MAP_WRITE | ++ AV_HWFRAME_MAP_OVERWRITE); ++ if (err) ++ goto fail; ++ ++ map->width = src->width; ++ map->height = src->height; ++ ++ err = av_frame_copy(map, src); ++ if (err) ++ goto fail; ++ ++ err = 0; ++fail: ++ av_frame_free(&map); ++ return err; ++} ++ ++static int rkmpp_map_from(AVHWFramesContext *hwfc, AVFrame *dst, ++ const AVFrame *src, int flags) ++{ ++ int err; ++ ++ if (hwfc->sw_format != dst->format) ++ return AVERROR(ENOSYS); ++ ++ err = rkmpp_map_frame(hwfc, dst, src, flags); ++ if (err) ++ return err; ++ ++ err = av_frame_copy_props(dst, src); ++ if (err) ++ return err; ++ ++ return 0; ++} ++ ++const HWContextType ff_hwcontext_type_rkmpp = { ++ .type = AV_HWDEVICE_TYPE_RKMPP, ++ .name = "RKMPP", ++ ++ .device_hwctx_size = sizeof(AVRKMPPDeviceContext), ++ .frames_hwctx_size = sizeof(AVRKMPPFramesContext), ++ ++ .device_create = &rkmpp_device_create, ++ ++ .frames_get_constraints = &rkmpp_frames_get_constraints, ++ ++ .frames_get_buffer = &rkmpp_get_buffer, ++ .frames_init = &rkmpp_frames_init, ++ .frames_uninit = &rkmpp_frames_uninit, ++ .transfer_get_formats = &rkmpp_transfer_get_formats, ++ .transfer_data_to = &rkmpp_transfer_data_to, ++ .transfer_data_from = &rkmpp_transfer_data_from, ++ .map_from = &rkmpp_map_from, ++ ++ .pix_fmts = (const enum AVPixelFormat[]) { ++ AV_PIX_FMT_DRM_PRIME, ++ AV_PIX_FMT_NONE ++ }, ++}; +Index: FFmpeg/libavutil/hwcontext_rkmpp.h +=================================================================== +--- /dev/null ++++ libavutil/hwcontext_rkmpp.h +@@ -0,0 +1,110 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVUTIL_HWCONTEXT_RKMPP_H ++#define AVUTIL_HWCONTEXT_RKMPP_H ++ ++#include ++#include ++#include ++#include ++ ++#include "hwcontext_drm.h" ++ ++#ifndef DRM_FORMAT_P010 ++#define DRM_FORMAT_P010 fourcc_code('P', '0', '1', '0') ++#endif ++#ifndef DRM_FORMAT_P210 ++#define DRM_FORMAT_P210 fourcc_code('P', '2', '1', '0') ++#endif ++#ifndef DRM_FORMAT_NV15 ++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') ++#endif ++#ifndef DRM_FORMAT_NV20 ++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') ++#endif ++#ifndef DRM_FORMAT_YUV420_8BIT ++#define DRM_FORMAT_YUV420_8BIT fourcc_code('Y', 'U', '0', '8') ++#endif ++#ifndef DRM_FORMAT_YUV420_10BIT ++#define DRM_FORMAT_YUV420_10BIT fourcc_code('Y', 'U', '1', '0') ++#endif ++#ifndef DRM_FORMAT_Y210 ++#define DRM_FORMAT_Y210 fourcc_code('Y', '2', '1', '0') ++#endif ++ ++#ifndef DRM_FORMAT_MOD_VENDOR_ARM ++#define DRM_FORMAT_MOD_VENDOR_ARM 0x08 ++#endif ++#ifndef DRM_FORMAT_MOD_ARM_TYPE_AFBC ++#define DRM_FORMAT_MOD_ARM_TYPE_AFBC 0x00 ++#endif ++ ++#define drm_is_afbc(mod) \ ++ ((mod >> 52) == (DRM_FORMAT_MOD_ARM_TYPE_AFBC | \ ++ (DRM_FORMAT_MOD_VENDOR_ARM << 4))) ++ ++/** ++ * DRM Prime Frame descriptor for RKMPP HWDevice. ++ */ ++typedef struct AVRKMPPDRMFrameDescriptor { ++ /** ++ * Backwards compatibility with AVDRMFrameDescriptor. ++ */ ++ AVDRMFrameDescriptor drm_desc; ++ ++ /** ++ * References to MppBuffer instances which are used ++ * on each drm frame index. ++ */ ++ MppBuffer buffers[AV_DRM_MAX_PLANES]; ++} AVRKMPPDRMFrameDescriptor; ++ ++/** ++ * RKMPP-specific data associated with a frame pool. ++ * ++ * Allocated as AVHWFramesContext.hwctx. ++ */ ++typedef struct AVRKMPPFramesContext { ++ /** ++ * MPP buffer group. ++ */ ++ MppBufferGroup buf_group; ++ ++ /** ++ * The descriptors of all frames in the pool after creation. ++ * Only valid if AVHWFramesContext.initial_pool_size was positive. ++ * These are intended to be used as the buffer of RKMPP decoder. ++ */ ++ AVRKMPPDRMFrameDescriptor *frames; ++ int nb_frames; ++} AVRKMPPFramesContext; ++ ++/** ++ * RKMPP device details. ++ * ++ * Allocated as AVHWDeviceContext.hwctx ++ */ ++typedef struct AVRKMPPDeviceContext { ++ /** ++ * MPP buffer allocation flags. ++ */ ++ int flags; ++} AVRKMPPDeviceContext; ++ ++#endif /* AVUTIL_HWCONTEXT_RKMPP_H */ +Index: FFmpeg/libavutil/pixdesc.c +=================================================================== +--- libavutil/pixdesc.c ++++ libavutil/pixdesc.c +@@ -2791,6 +2791,30 @@ static const AVPixFmtDescriptor av_pix_f + }, + .flags = AV_PIX_FMT_FLAG_PLANAR, + }, ++ [AV_PIX_FMT_NV15] = { ++ .name = "nv15", ++ .nb_components = 3, ++ .log2_chroma_w = 1, ++ .log2_chroma_h = 1, ++ .comp = { ++ { 0, 10, 0, 0, 10 }, /* Y */ ++ { 1, 20, 0, 0, 10 }, /* U */ ++ { 1, 20, 10, 0, 10 }, /* V */ ++ }, ++ .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_BITSTREAM, ++ }, ++ [AV_PIX_FMT_NV20] = { ++ .name = "nv20", ++ .nb_components = 3, ++ .log2_chroma_w = 1, ++ .log2_chroma_h = 0, ++ .comp = { ++ { 0, 10, 0, 0, 10 }, /* Y */ ++ { 1, 20, 0, 0, 10 }, /* U */ ++ { 1, 20, 10, 0, 10 }, /* V */ ++ }, ++ .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_BITSTREAM, ++ }, + }; + + static const char * const color_range_names[] = { +Index: FFmpeg/libavutil/pixfmt.h +=================================================================== +--- libavutil/pixfmt.h ++++ libavutil/pixfmt.h +@@ -196,8 +196,8 @@ enum AVPixelFormat { + AV_PIX_FMT_XYZ12LE, ///< packed XYZ 4:4:4, 36 bpp, (msb) 12X, 12Y, 12Z (lsb), the 2-byte value for each X/Y/Z is stored as little-endian, the 4 lower bits are set to 0 + AV_PIX_FMT_XYZ12BE, ///< packed XYZ 4:4:4, 36 bpp, (msb) 12X, 12Y, 12Z (lsb), the 2-byte value for each X/Y/Z is stored as big-endian, the 4 lower bits are set to 0 + AV_PIX_FMT_NV16, ///< interleaved chroma YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples) +- AV_PIX_FMT_NV20LE, ///< interleaved chroma YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian +- AV_PIX_FMT_NV20BE, ///< interleaved chroma YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian ++ AV_PIX_FMT_NV20LE, ///< interleaved chroma YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian, deprecated in favor of AV_PIX_FMT_NV20 ++ AV_PIX_FMT_NV20BE, ///< interleaved chroma YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian, deprecated in favor of AV_PIX_FMT_NV20 + + AV_PIX_FMT_RGBA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian + AV_PIX_FMT_RGBA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian +@@ -439,6 +439,9 @@ enum AVPixelFormat { + */ + AV_PIX_FMT_D3D12, + ++ AV_PIX_FMT_NV15, ///< like P010, but has no zero padding bits, 15bpp, bitstream ++ AV_PIX_FMT_NV20, ///< like P210, but has no zero padding bits, 20bpp, bitstream ++ + AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions + }; + +@@ -523,7 +526,6 @@ enum AVPixelFormat { + #define AV_PIX_FMT_YUVA444P16 AV_PIX_FMT_NE(YUVA444P16BE, YUVA444P16LE) + + #define AV_PIX_FMT_XYZ12 AV_PIX_FMT_NE(XYZ12BE, XYZ12LE) +-#define AV_PIX_FMT_NV20 AV_PIX_FMT_NE(NV20BE, NV20LE) + #define AV_PIX_FMT_AYUV64 AV_PIX_FMT_NE(AYUV64BE, AYUV64LE) + #define AV_PIX_FMT_P010 AV_PIX_FMT_NE(P010BE, P010LE) + #define AV_PIX_FMT_P012 AV_PIX_FMT_NE(P012BE, P012LE) +Index: FFmpeg/libswscale/input.c +=================================================================== +--- libswscale/input.c ++++ libswscale/input.c +@@ -793,6 +793,39 @@ static void nv21ToUV_c(uint8_t *dstU, ui + nvXXtoUV_c(dstV, dstU, src1, width); + } + ++static av_always_inline void nv15_20ToYUV_c(uint16_t *dst, const uint8_t *src, ++ int dst_pos, int src_pos) ++{ ++ int shift = (src_pos << 1) & 7; ++ src_pos = (src_pos * 10) >> 3; ++ AV_WN16(dst + dst_pos, ++ ((AV_RL16(src + src_pos) >> shift) | ++ (AV_RL16(src + src_pos + 1) << (8 - shift))) & 0x3FF); ++} ++ ++static void nv15_20ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, ++ const uint8_t *unused2, int width, uint32_t *unused, void *opq) ++{ ++ int i; ++ const uint8_t *src = (const uint8_t *)_src; ++ uint16_t *dst = (uint16_t *)_dst; ++ for (i = 0; i < width; i++) ++ nv15_20ToYUV_c(dst, src, i, i); ++} ++ ++static void nv15_20ToUV_c(uint8_t *_dstU, uint8_t *_dstV, ++ const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, ++ int width, uint32_t *unused, void *opq) ++{ ++ int i; ++ const uint8_t *src1 = (const uint8_t *)_src1; ++ uint16_t *dstU = (uint16_t *)_dstU, *dstV = (uint16_t *)_dstV; ++ for (i = 0; i < width; i++) { ++ nv15_20ToYUV_c(dstU, src1, i, 2 * i); ++ nv15_20ToYUV_c(dstV, src1, i, 2 * i + 1); ++ } ++} ++ + #define p01x_uv_wrapper(bits, shift) \ + static void p0 ## bits ## LEToUV_c(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *unused0, \ +@@ -1444,6 +1477,10 @@ av_cold void ff_sws_init_input_funcs(Sws + case AV_PIX_FMT_XV36LE: + c->chrToYV12 = read_xv36le_UV_c; + break; ++ case AV_PIX_FMT_NV15: ++ case AV_PIX_FMT_NV20: ++ c->chrToYV12 = nv15_20ToUV_c; ++ break; + case AV_PIX_FMT_P010LE: + case AV_PIX_FMT_P210LE: + case AV_PIX_FMT_P410LE: +@@ -1944,6 +1981,10 @@ av_cold void ff_sws_init_input_funcs(Sws + case AV_PIX_FMT_BGRA64LE: + c->lumToYV12 = bgr64LEToY_c; + break; ++ case AV_PIX_FMT_NV15: ++ case AV_PIX_FMT_NV20: ++ c->lumToYV12 = nv15_20ToY_c; ++ break; + case AV_PIX_FMT_P010LE: + case AV_PIX_FMT_P210LE: + case AV_PIX_FMT_P410LE: +Index: FFmpeg/libswscale/swscale_unscaled.c +=================================================================== +--- libswscale/swscale_unscaled.c ++++ libswscale/swscale_unscaled.c +@@ -221,6 +221,61 @@ static int nv24ToPlanarWrapper(SwsContex + return srcSliceH; + } + ++static int nv15_20ToPlanarWrapper(SwsContext *c, const uint8_t *src[], ++ int srcStride[], int srcSliceY, ++ int srcSliceH, uint8_t *dstParam[], ++ int dstStride[]) ++{ ++ const AVPixFmtDescriptor *src_format = av_pix_fmt_desc_get(c->srcFormat); ++ const AVPixFmtDescriptor *dst_format = av_pix_fmt_desc_get(c->dstFormat); ++ int vsub = 1 << dst_format->log2_chroma_h; ++ uint16_t *dstY = (uint16_t*)(dstParam[0] + dstStride[0] * srcSliceY); ++ uint16_t *dstU = (uint16_t*)(dstParam[1] + dstStride[1] * srcSliceY / vsub); ++ uint16_t *dstV = (uint16_t*)(dstParam[2] + dstStride[2] * srcSliceY / vsub); ++ int x, y; ++ ++ /* Calculate net shift required for values. */ ++ const int shift[3] = { ++ dst_format->comp[0].depth + dst_format->comp[0].shift - ++ src_format->comp[0].depth - src_format->comp[0].shift, ++ dst_format->comp[1].depth + dst_format->comp[1].shift - ++ src_format->comp[1].depth - src_format->comp[1].shift, ++ dst_format->comp[2].depth + dst_format->comp[2].shift - ++ src_format->comp[2].depth - src_format->comp[2].shift, ++ }; ++ ++ for (y = srcSliceH; y > 0; y--) { ++ const uint8_t *tsrcY = src[0]; ++ uint16_t *tdstY = dstY; ++ for (x = c->srcW / 4; x > 0; x--) { ++ *tdstY++ = (((tsrcY[1] & 0x3 ) << 8) | (tsrcY[0] & 0xFF)) << shift[0]; ++ *tdstY++ = (((tsrcY[2] & 0xF ) << 6) | ((tsrcY[1] >> 2) & 0x3F)) << shift[0]; ++ *tdstY++ = (((tsrcY[3] & 0x3F) << 4) | ((tsrcY[2] >> 4) & 0xF )) << shift[0]; ++ *tdstY++ = (((tsrcY[4] & 0xFF) << 2) | ((tsrcY[3] >> 6) & 0x3 )) << shift[0]; ++ tsrcY += 5; ++ } ++ src[0] += srcStride[0]; ++ dstY += dstStride[0] / sizeof(uint16_t); ++ } ++ ++ for (y = srcSliceH / vsub; y > 0; y--) { ++ const uint8_t *tsrcUV = src[1]; ++ uint16_t *tdstU = dstU, *tdstV = dstV; ++ for (x = c->chrSrcW / 2; x > 0; x--) { ++ *tdstU++ = (((tsrcUV[1] & 0x3 ) << 8) | (tsrcUV[0] & 0xFF)) << shift[1]; ++ *tdstV++ = (((tsrcUV[2] & 0xF ) << 6) | ((tsrcUV[1] >> 2) & 0x3F)) << shift[2]; ++ *tdstU++ = (((tsrcUV[3] & 0x3F) << 4) | ((tsrcUV[2] >> 4) & 0xF )) << shift[1]; ++ *tdstV++ = (((tsrcUV[4] & 0xFF) << 2) | ((tsrcUV[3] >> 6) & 0x3 )) << shift[2]; ++ tsrcUV += 5; ++ } ++ src[1] += srcStride[1]; ++ dstU += dstStride[1] / sizeof(uint16_t); ++ dstV += dstStride[2] / sizeof(uint16_t); ++ } ++ ++ return srcSliceH; ++} ++ + static int planarToP01xWrapper(SwsContext *c, const uint8_t *src8[], + int srcStride[], int srcSliceY, + int srcSliceH, uint8_t *dstParam8[], +@@ -2004,6 +2059,19 @@ void ff_get_unscaled_swscale(SwsContext + (srcFormat == AV_PIX_FMT_NV24 || srcFormat == AV_PIX_FMT_NV42)) { + c->convert_unscaled = nv24ToPlanarWrapper; + } ++ /* nv15_to_yuv420p1x & nv20_to_yuv422p1x */ ++ if ((srcFormat == AV_PIX_FMT_NV15 && ++ (dstFormat == AV_PIX_FMT_YUV420P10 || ++ dstFormat == AV_PIX_FMT_YUV420P12 || ++ dstFormat == AV_PIX_FMT_YUV420P14 || ++ dstFormat == AV_PIX_FMT_YUV420P16)) || ++ (srcFormat == AV_PIX_FMT_NV20 && ++ (dstFormat == AV_PIX_FMT_YUV422P10 || ++ dstFormat == AV_PIX_FMT_YUV422P12 || ++ dstFormat == AV_PIX_FMT_YUV422P14 || ++ dstFormat == AV_PIX_FMT_YUV422P16))) { ++ c->convert_unscaled = nv15_20ToPlanarWrapper; ++ } + /* yuv2bgr */ + if ((srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUV422P || + srcFormat == AV_PIX_FMT_YUVA420P) && isAnyRGB(dstFormat) && +Index: FFmpeg/libswscale/utils.c +=================================================================== +--- libswscale/utils.c ++++ libswscale/utils.c +@@ -230,6 +230,8 @@ static const FormatEntry format_entries[ + [AV_PIX_FMT_XYZ12BE] = { 1, 1, 1 }, + [AV_PIX_FMT_XYZ12LE] = { 1, 1, 1 }, + [AV_PIX_FMT_AYUV64LE] = { 1, 1}, ++ [AV_PIX_FMT_NV15] = { 1, 0 }, ++ [AV_PIX_FMT_NV20] = { 1, 0 }, + [AV_PIX_FMT_P010LE] = { 1, 1 }, + [AV_PIX_FMT_P010BE] = { 1, 1 }, + [AV_PIX_FMT_P012LE] = { 1, 1 }, +Index: FFmpeg/tests/ref/fate/imgutils +=================================================================== +--- tests/ref/fate/imgutils ++++ tests/ref/fate/imgutils +@@ -269,6 +269,8 @@ p412be planes: 2, linesizes: 12 + p412le planes: 2, linesizes: 128 256 0 0, plane_sizes: 6144 12288 0 0, plane_offsets: 6144 0 0, total_size: 18432 + gbrap14be planes: 4, linesizes: 128 128 128 128, plane_sizes: 6144 6144 6144 6144, plane_offsets: 6144 6144 6144, total_size: 24576 + gbrap14le planes: 4, linesizes: 128 128 128 128, plane_sizes: 6144 6144 6144 6144, plane_offsets: 6144 6144 6144, total_size: 24576 ++nv15 planes: 2, linesizes: 80 80 0 0, plane_sizes: 3840 1920 0 0, plane_offsets: 3840 0 0, total_size: 5760 ++nv20 planes: 2, linesizes: 80 80 0 0, plane_sizes: 3840 3840 0 0, plane_offsets: 3840 0 0, total_size: 7680 + + image_fill_black tests + yuv420p total_size: 4608, black_unknown_crc: 0xd00f6cc6, black_tv_crc: 0xd00f6cc6, black_pc_crc: 0x234969af +@@ -485,3 +487,5 @@ p412be total_size: 18432, bla + p412le total_size: 18432, black_unknown_crc: 0x4028ac30, black_tv_crc: 0x4028ac30, black_pc_crc: 0xab7c7698 + gbrap14be total_size: 24576, black_unknown_crc: 0x4ec0d987, black_tv_crc: 0x4ec0d987, black_pc_crc: 0x4ec0d987 + gbrap14le total_size: 24576, black_unknown_crc: 0x13bde353, black_tv_crc: 0x13bde353, black_pc_crc: 0x13bde353 ++nv15 total_size: 5760, black_unknown_crc: 0x6b5fdb58, black_tv_crc: 0x6b5fdb58, black_pc_crc: 0x660a512c ++nv20 total_size: 7680, black_unknown_crc: 0x171f53da, black_tv_crc: 0x171f53da, black_pc_crc: 0xfcf5cda3 +Index: FFmpeg/tests/ref/fate/sws-pixdesc-query +=================================================================== +--- tests/ref/fate/sws-pixdesc-query ++++ tests/ref/fate/sws-pixdesc-query +@@ -61,6 +61,8 @@ isNBPS: + gray14le + gray9be + gray9le ++ nv15 ++ nv20 + nv20be + nv20le + p010be +@@ -221,7 +223,9 @@ isYUV: + ayuv64be + ayuv64le + nv12 ++ nv15 + nv16 ++ nv20 + nv20be + nv20le + nv21 +@@ -337,7 +341,9 @@ isYUV: + + isPlanarYUV: + nv12 ++ nv15 + nv16 ++ nv20 + nv20be + nv20le + nv21 +@@ -434,7 +440,9 @@ isPlanarYUV: + + isSemiPlanarYUV: + nv12 ++ nv15 + nv16 ++ nv20 + nv20be + nv20le + nv21 +@@ -868,7 +876,9 @@ Planar: + gbrpf32be + gbrpf32le + nv12 ++ nv15 + nv16 ++ nv20 + nv20be + nv20le + nv21 diff --git a/cross/ffmpeg7/patches/1048-jellyfin-0048-add-format-option-to-vt-scale-filter.patch b/cross/ffmpeg7/patches/1048-jellyfin-0048-add-format-option-to-vt-scale-filter.patch new file mode 100644 index 00000000000..050fda1aa9a --- /dev/null +++ b/cross/ffmpeg7/patches/1048-jellyfin-0048-add-format-option-to-vt-scale-filter.patch @@ -0,0 +1,71 @@ +Index: FFmpeg/libavfilter/vf_scale_vt.c +=================================================================== +--- libavfilter/vf_scale_vt.c ++++ libavfilter/vf_scale_vt.c +@@ -40,11 +40,26 @@ typedef struct ScaleVtContext { + enum AVColorPrimaries colour_primaries; + enum AVColorTransferCharacteristic colour_transfer; + enum AVColorSpace colour_matrix; ++ enum AVPixelFormat format; + char *colour_primaries_string; + char *colour_transfer_string; + char *colour_matrix_string; + } ScaleVtContext; + ++static const enum AVPixelFormat supported_formats[] = { ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_P010, ++ AV_PIX_FMT_NONE, ++}; ++ ++static int format_is_supported(enum AVPixelFormat fmt) ++{ ++ for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) ++ if (supported_formats[i] == fmt) ++ return 1; ++ return 0; ++} ++ + static av_cold int scale_vt_init(AVFilterContext *avctx) + { + ScaleVtContext *s = avctx->priv; +@@ -179,6 +194,7 @@ static int scale_vt_config_output(AVFilt + AVFilterLink *inlink = outlink->src->inputs[0]; + AVHWFramesContext *hw_frame_ctx_in; + AVHWFramesContext *hw_frame_ctx_out; ++ enum AVPixelFormat out_format; + + err = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink, + &s->output_width, +@@ -198,13 +214,21 @@ static int scale_vt_config_output(AVFilt + + hw_frame_ctx_in = (AVHWFramesContext *)inlink->hw_frames_ctx->data; + ++ out_format = (s->format == AV_PIX_FMT_NONE) ? hw_frame_ctx_in->sw_format : s->format; ++ if (!format_is_supported(s->format)) { ++ av_log(s, AV_LOG_ERROR, "Unsupported output format: %s\n", ++ av_get_pix_fmt_name(out_format)); ++ return AVERROR(ENOSYS); ++ } ++ + av_buffer_unref(&outlink->hw_frames_ctx); + outlink->hw_frames_ctx = av_hwframe_ctx_alloc(hw_frame_ctx_in->device_ref); + hw_frame_ctx_out = (AVHWFramesContext *)outlink->hw_frames_ctx->data; + hw_frame_ctx_out->format = AV_PIX_FMT_VIDEOTOOLBOX; +- hw_frame_ctx_out->sw_format = hw_frame_ctx_in->sw_format; ++ hw_frame_ctx_out->sw_format = out_format; + hw_frame_ctx_out->width = outlink->w; + hw_frame_ctx_out->height = outlink->h; ++ ((AVVTFramesContext *)hw_frame_ctx_out->hwctx)->color_range = ((AVVTFramesContext *)hw_frame_ctx_in->hwctx)->color_range; + + err = ff_filter_init_hw_frames(avctx, outlink, 1); + if (err < 0) +@@ -234,6 +258,8 @@ static const AVOption scale_vt_options[] + OFFSET(colour_primaries_string), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS }, + { "color_transfer", "Output colour transfer characteristics", + OFFSET(colour_transfer_string), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS }, ++ { "format", "Output pixel format", ++ OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, { .i64 = AV_PIX_FMT_NONE }, AV_PIX_FMT_NONE, INT_MAX, .flags = FLAGS }, + { NULL }, + }; + diff --git a/cross/ffmpeg7/patches/1049-jellyfin-0049-backport-fixes-for-videotoolbox-from-upstream.patch b/cross/ffmpeg7/patches/1049-jellyfin-0049-backport-fixes-for-videotoolbox-from-upstream.patch new file mode 100644 index 00000000000..9f71cdb0517 --- /dev/null +++ b/cross/ffmpeg7/patches/1049-jellyfin-0049-backport-fixes-for-videotoolbox-from-upstream.patch @@ -0,0 +1,568 @@ +Index: FFmpeg/libavcodec/videotoolboxenc.c +=================================================================== +--- libavcodec/videotoolboxenc.c ++++ libavcodec/videotoolboxenc.c +@@ -226,9 +226,9 @@ typedef struct ExtraSEI { + + typedef struct BufNode { + CMSampleBufferRef cm_buffer; +- ExtraSEI *sei; ++ ExtraSEI sei; ++ AVBufferRef *frame_buf; + struct BufNode* next; +- int error; + } BufNode; + + typedef struct VTEncContext { +@@ -261,7 +261,7 @@ typedef struct VTEncContext { + int realtime; + int frames_before; + int frames_after; +- bool constant_bit_rate; ++ int constant_bit_rate; + + int allow_sw; + int require_sw; +@@ -280,6 +280,18 @@ typedef struct VTEncContext { + int max_ref_frames; + } VTEncContext; + ++static void vtenc_free_buf_node(BufNode *info) ++{ ++ if (!info) ++ return; ++ ++ av_free(info->sei.data); ++ if (info->cm_buffer) ++ CFRelease(info->cm_buffer); ++ av_buffer_unref(&info->frame_buf); ++ av_free(info); ++} ++ + static int vt_dump_encoder(AVCodecContext *avctx) + { + VTEncContext *vtctx = avctx->priv_data; +@@ -347,8 +359,7 @@ static void set_async_error(VTEncContext + + while (info) { + BufNode *next = info->next; +- CFRelease(info->cm_buffer); +- av_free(info); ++ vtenc_free_buf_node(info); + info = next; + } + +@@ -388,7 +399,7 @@ static void vtenc_reset(VTEncContext *vt + } + } + +-static int vtenc_q_pop(VTEncContext *vtctx, bool wait, CMSampleBufferRef *buf, ExtraSEI **sei) ++static int vtenc_q_pop(VTEncContext *vtctx, bool wait, CMSampleBufferRef *buf, ExtraSEI *sei) + { + BufNode *info; + +@@ -426,31 +437,18 @@ static int vtenc_q_pop(VTEncContext *vtc + pthread_mutex_unlock(&vtctx->lock); + + *buf = info->cm_buffer; ++ info->cm_buffer = NULL; + if (sei && *buf) { + *sei = info->sei; +- } else if (info->sei) { +- if (info->sei->data) av_free(info->sei->data); +- av_free(info->sei); ++ info->sei = (ExtraSEI) {0}; + } +- av_free(info); +- ++ vtenc_free_buf_node(info); + + return 0; + } + +-static void vtenc_q_push(VTEncContext *vtctx, CMSampleBufferRef buffer, ExtraSEI *sei) ++static void vtenc_q_push(VTEncContext *vtctx, BufNode *info) + { +- BufNode *info = av_malloc(sizeof(BufNode)); +- if (!info) { +- set_async_error(vtctx, AVERROR(ENOMEM)); +- return; +- } +- +- CFRetain(buffer); +- info->cm_buffer = buffer; +- info->sei = sei; +- info->next = NULL; +- + pthread_mutex_lock(&vtctx->lock); + + if (!vtctx->q_head) { +@@ -735,13 +733,16 @@ static void vtenc_output_callback( + { + AVCodecContext *avctx = ctx; + VTEncContext *vtctx = avctx->priv_data; +- ExtraSEI *sei = sourceFrameCtx; ++ BufNode *info = sourceFrameCtx; + ++ av_buffer_unref(&info->frame_buf); + if (vtctx->async_error) { ++ vtenc_free_buf_node(info); + return; + } + + if (status) { ++ vtenc_free_buf_node(info); + av_log(avctx, AV_LOG_ERROR, "Error encoding frame: %d\n", (int)status); + set_async_error(vtctx, AVERROR_EXTERNAL); + return; +@@ -751,15 +752,19 @@ static void vtenc_output_callback( + return; + } + ++ CFRetain(sample_buffer); ++ info->cm_buffer = sample_buffer; ++ + if (!avctx->extradata && (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)) { + int set_status = set_extradata(avctx, sample_buffer); + if (set_status) { ++ vtenc_free_buf_node(info); + set_async_error(vtctx, set_status); + return; + } + } + +- vtenc_q_push(vtctx, sample_buffer, sei); ++ vtenc_q_push(vtctx, info); + } + + static int get_length_code_size( +@@ -2449,7 +2454,8 @@ static int copy_avframe_to_pixel_buffer( + + static int create_cv_pixel_buffer(AVCodecContext *avctx, + const AVFrame *frame, +- CVPixelBufferRef *cv_img) ++ CVPixelBufferRef *cv_img, ++ BufNode *node) + { + int plane_count; + int color; +@@ -2468,6 +2474,12 @@ static int create_cv_pixel_buffer(AVCode + av_assert0(*cv_img); + + CFRetain(*cv_img); ++ if (frame->buf[0]) { ++ node->frame_buf = av_buffer_ref(frame->buf[0]); ++ if (!node->frame_buf) ++ return AVERROR(ENOMEM); ++ } ++ + return 0; + } + +@@ -2565,33 +2577,29 @@ static int vtenc_send_frame(AVCodecConte + const AVFrame *frame) + { + CMTime time; +- CFDictionaryRef frame_dict; ++ CFDictionaryRef frame_dict = NULL; + CVPixelBufferRef cv_img = NULL; + AVFrameSideData *side_data = NULL; +- ExtraSEI *sei = NULL; +- int status = create_cv_pixel_buffer(avctx, frame, &cv_img); ++ BufNode *node = av_mallocz(sizeof(*node)); ++ int status; + +- if (status) return status; ++ if (!node) ++ return AVERROR(ENOMEM); ++ ++ status = create_cv_pixel_buffer(avctx, frame, &cv_img, node); ++ if (status) ++ goto out; + + status = create_encoder_dict_h264(frame, &frame_dict); +- if (status) { +- CFRelease(cv_img); +- return status; +- } ++ if (status) ++ goto out; + + #if CONFIG_ATSC_A53 + side_data = av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC); + if (vtctx->a53_cc && side_data && side_data->size) { +- sei = av_mallocz(sizeof(*sei)); +- if (!sei) { +- av_log(avctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n"); +- } else { +- int ret = ff_alloc_a53_sei(frame, 0, &sei->data, &sei->size); +- if (ret < 0) { +- av_log(avctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n"); +- av_free(sei); +- sei = NULL; +- } ++ status = ff_alloc_a53_sei(frame, 0, &node->sei.data, &node->sei.size); ++ if (status < 0) { ++ goto out; + } + } + #endif +@@ -2603,19 +2611,26 @@ static int vtenc_send_frame(AVCodecConte + time, + kCMTimeInvalid, + frame_dict, +- sei, ++ node, + NULL + ); + +- if (frame_dict) CFRelease(frame_dict); +- CFRelease(cv_img); +- + if (status) { + av_log(avctx, AV_LOG_ERROR, "Error: cannot encode frame: %d\n", status); +- return AVERROR_EXTERNAL; ++ status = AVERROR_EXTERNAL; ++ // Not necessary, just in case new code put after here ++ goto out; + } + +- return 0; ++out: ++ if (frame_dict) ++ CFRelease(frame_dict); ++ if (cv_img) ++ CFRelease(cv_img); ++ if (status) ++ vtenc_free_buf_node(node); ++ ++ return status; + } + + static av_cold int vtenc_frame( +@@ -2628,7 +2643,7 @@ static av_cold int vtenc_frame( + bool get_frame; + int status; + CMSampleBufferRef buf = NULL; +- ExtraSEI *sei = NULL; ++ ExtraSEI sei = {0}; + + if (frame) { + status = vtenc_send_frame(avctx, vtctx, frame); +@@ -2669,11 +2684,8 @@ static av_cold int vtenc_frame( + if (status) goto end_nopkt; + if (!buf) goto end_nopkt; + +- status = vtenc_cm_to_avpacket(avctx, buf, pkt, sei); +- if (sei) { +- if (sei->data) av_free(sei->data); +- av_free(sei); +- } ++ status = vtenc_cm_to_avpacket(avctx, buf, pkt, sei.data ? &sei : NULL); ++ av_free(sei.data); + CFRelease(buf); + if (status) goto end_nopkt; + +@@ -2698,6 +2710,10 @@ static int vtenc_populate_extradata(AVCo + CVPixelBufferRef pix_buf = NULL; + CMTime time; + CMSampleBufferRef buf = NULL; ++ BufNode *node = av_mallocz(sizeof(*node)); ++ ++ if (!node) ++ return AVERROR(ENOMEM); + + status = vtenc_create_encoder(avctx, + codec_type, +@@ -2733,7 +2749,7 @@ static int vtenc_populate_extradata(AVCo + time, + kCMTimeInvalid, + NULL, +- NULL, ++ node, + NULL); + + if (status) { +@@ -2744,6 +2760,7 @@ static int vtenc_populate_extradata(AVCo + status = AVERROR_EXTERNAL; + goto pe_cleanup; + } ++ node = NULL; + + //Populates extradata - output frames are flushed and param sets are available. + status = VTCompressionSessionCompleteFrames(vtctx->session, +@@ -2766,10 +2783,26 @@ static int vtenc_populate_extradata(AVCo + + pe_cleanup: + CVPixelBufferRelease(pix_buf); +- vtenc_reset(vtctx); ++ ++ if (status) { ++ vtenc_reset(vtctx); ++ } else { ++ if (vtctx->session) { ++ CFRelease(vtctx->session); ++ vtctx->session = NULL; ++ } ++ ++ if (vtctx->supported_props) { ++ CFRelease(vtctx->supported_props); ++ vtctx->supported_props = NULL; ++ } ++ } ++ + vtctx->frame_ct_out = 0; + + av_assert0(status != 0 || (avctx->extradata && avctx->extradata_size > 0)); ++ if (!status) ++ vtenc_free_buf_node(node); + + return status; + } +Index: FFmpeg/libavutil/hwcontext_videotoolbox.c +=================================================================== +--- libavutil/hwcontext_videotoolbox.c ++++ libavutil/hwcontext_videotoolbox.c +@@ -342,8 +342,10 @@ static int vt_pixbuf_set_par(void *log_c + CFNumberRef num = NULL, den = NULL; + AVRational avpar = src->sample_aspect_ratio; + +- if (avpar.num == 0) ++ if (avpar.num == 0) { ++ CVBufferRemoveAttachment(pixbuf, kCVImageBufferPixelAspectRatioKey); + return 0; ++ } + + av_reduce(&avpar.num, &avpar.den, + avpar.num, avpar.den, +@@ -423,7 +425,10 @@ static int vt_pixbuf_set_chromaloc(void + kCVImageBufferChromaLocationTopFieldKey, + loc, + kCVAttachmentMode_ShouldPropagate); +- } ++ } else ++ CVBufferRemoveAttachment( ++ pixbuf, ++ kCVImageBufferChromaLocationTopFieldKey); + + return 0; + } +@@ -527,59 +532,116 @@ CFStringRef av_map_videotoolbox_color_tr + } + } + ++/** ++ * Copy all attachments for the specified mode from the given buffer. ++ */ ++static CFDictionaryRef vt_cv_buffer_copy_attachments(CVBufferRef buffer, ++ CVAttachmentMode attachment_mode) ++{ ++ CFDictionaryRef dict; ++ ++ // Check that our SDK is at least macOS 12 / iOS 15 / tvOS 15 ++ #if (TARGET_OS_OSX && defined(__MAC_12_0) && __MAC_OS_X_VERSION_MAX_ALLOWED >= __MAC_12_0) || \ ++ (TARGET_OS_IOS && defined(__IPHONE_15_0) && __IPHONE_OS_VERSION_MAX_ALLOWED >= __IPHONE_15_0) || \ ++ (TARGET_OS_TV && defined(__TVOS_15_0) && __TV_OS_VERSION_MAX_ALLOWED >= __TVOS_15_0) ++ // On recent enough versions, just use the respective API ++ if (__builtin_available(macOS 12.0, iOS 15.0, tvOS 15.0, *)) ++ return CVBufferCopyAttachments(buffer, attachment_mode); ++ #endif ++ ++ // Check that the target is lower than macOS 12 / iOS 15 / tvOS 15 ++ // else this would generate a deprecation warning and anyway never run because ++ // the runtime availability check above would be always true. ++ #if (TARGET_OS_OSX && (!defined(__MAC_12_0) || __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_12_0)) || \ ++ (TARGET_OS_IOS && (!defined(__IPHONE_15_0) || __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_15_0)) || \ ++ (TARGET_OS_TV && (!defined(__TVOS_15_0) || __TV_OS_VERSION_MIN_REQUIRED < __TVOS_15_0)) ++ // Fallback on SDKs or runtime versions < macOS 12 / iOS 15 / tvOS 15 ++ dict = CVBufferGetAttachments(buffer, attachment_mode); ++ return (dict) ? CFDictionaryCreateCopy(NULL, dict) : NULL; ++ #else ++ return NULL; // Impossible, just make the compiler happy ++ #endif ++} ++ + static int vt_pixbuf_set_colorspace(void *log_ctx, + CVPixelBufferRef pixbuf, const AVFrame *src) + { ++ CGColorSpaceRef colorspace = NULL; + CFStringRef colormatrix = NULL, colorpri = NULL, colortrc = NULL; + Float32 gamma = 0; + + colormatrix = av_map_videotoolbox_color_matrix_from_av(src->colorspace); +- if (!colormatrix && src->colorspace != AVCOL_SPC_UNSPECIFIED) +- av_log(log_ctx, AV_LOG_WARNING, "Color space %s is not supported.\n", av_color_space_name(src->colorspace)); ++ if (colormatrix) ++ CVBufferSetAttachment(pixbuf, kCVImageBufferYCbCrMatrixKey, ++ colormatrix, kCVAttachmentMode_ShouldPropagate); ++ else { ++ CVBufferRemoveAttachment(pixbuf, kCVImageBufferYCbCrMatrixKey); ++ if (src->colorspace != AVCOL_SPC_UNSPECIFIED && src->colorspace != AVCOL_SPC_RGB) ++ av_log(log_ctx, AV_LOG_WARNING, ++ "Color space %s is not supported.\n", ++ av_color_space_name(src->colorspace)); ++ } + + colorpri = av_map_videotoolbox_color_primaries_from_av(src->color_primaries); +- if (!colorpri && src->color_primaries != AVCOL_PRI_UNSPECIFIED) +- av_log(log_ctx, AV_LOG_WARNING, "Color primaries %s is not supported.\n", av_color_primaries_name(src->color_primaries)); ++ if (colorpri) ++ CVBufferSetAttachment(pixbuf, kCVImageBufferColorPrimariesKey, ++ colorpri, kCVAttachmentMode_ShouldPropagate); ++ else { ++ CVBufferRemoveAttachment(pixbuf, kCVImageBufferColorPrimariesKey); ++ if (src->color_primaries != AVCOL_SPC_UNSPECIFIED) ++ av_log(log_ctx, AV_LOG_WARNING, ++ "Color primaries %s is not supported.\n", ++ av_color_primaries_name(src->color_primaries)); ++ } + + colortrc = av_map_videotoolbox_color_trc_from_av(src->color_trc); +- if (!colortrc && src->color_trc != AVCOL_TRC_UNSPECIFIED) +- av_log(log_ctx, AV_LOG_WARNING, "Color transfer function %s is not supported.\n", av_color_transfer_name(src->color_trc)); ++ if (colortrc) ++ CVBufferSetAttachment(pixbuf, kCVImageBufferTransferFunctionKey, ++ colortrc, kCVAttachmentMode_ShouldPropagate); ++ else { ++ CVBufferRemoveAttachment(pixbuf, kCVImageBufferTransferFunctionKey); ++ if (src->color_trc != AVCOL_TRC_UNSPECIFIED) ++ av_log(log_ctx, AV_LOG_WARNING, ++ "Color transfer function %s is not supported.\n", ++ av_color_transfer_name(src->color_trc)); ++ } + + if (src->color_trc == AVCOL_TRC_GAMMA22) + gamma = 2.2; + else if (src->color_trc == AVCOL_TRC_GAMMA28) + gamma = 2.8; + +- if (colormatrix) { +- CVBufferSetAttachment( +- pixbuf, +- kCVImageBufferYCbCrMatrixKey, +- colormatrix, +- kCVAttachmentMode_ShouldPropagate); +- } +- if (colorpri) { +- CVBufferSetAttachment( +- pixbuf, +- kCVImageBufferColorPrimariesKey, +- colorpri, +- kCVAttachmentMode_ShouldPropagate); +- } +- if (colortrc) { +- CVBufferSetAttachment( +- pixbuf, +- kCVImageBufferTransferFunctionKey, +- colortrc, +- kCVAttachmentMode_ShouldPropagate); +- } + if (gamma != 0) { + CFNumberRef gamma_level = CFNumberCreate(NULL, kCFNumberFloat32Type, &gamma); +- CVBufferSetAttachment( +- pixbuf, +- kCVImageBufferGammaLevelKey, +- gamma_level, +- kCVAttachmentMode_ShouldPropagate); ++ CVBufferSetAttachment(pixbuf, kCVImageBufferGammaLevelKey, ++ gamma_level, kCVAttachmentMode_ShouldPropagate); + CFRelease(gamma_level); ++ } else ++ CVBufferRemoveAttachment(pixbuf, kCVImageBufferGammaLevelKey); ++ ++#if (TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED >= 100800) || \ ++ (TARGET_OS_IOS && __IPHONE_OS_VERSION_MAX_ALLOWED >= 100000) ++ if (__builtin_available(macOS 10.8, iOS 10, *)) { ++ CFDictionaryRef attachments = ++ vt_cv_buffer_copy_attachments(pixbuf, kCVAttachmentMode_ShouldPropagate); ++ ++ if (attachments) { ++ colorspace = ++ CVImageBufferCreateColorSpaceFromAttachments(attachments); ++ CFRelease(attachments); ++ } + } ++#endif ++ ++ // Done outside the above preprocessor code and if's so that ++ // in any case a wrong kCVImageBufferCGColorSpaceKey is removed ++ // if the above code is not used or fails. ++ if (colorspace) { ++ CVBufferSetAttachment(pixbuf, kCVImageBufferCGColorSpaceKey, ++ colorspace, kCVAttachmentMode_ShouldPropagate); ++ CFRelease(colorspace); ++ } else ++ CVBufferRemoveAttachment(pixbuf, kCVImageBufferCGColorSpaceKey); + + return 0; + } +Index: FFmpeg/libavutil/hwcontext_videotoolbox.h +=================================================================== +--- libavutil/hwcontext_videotoolbox.h ++++ libavutil/hwcontext_videotoolbox.h +@@ -90,8 +90,15 @@ CFStringRef av_map_videotoolbox_color_pr + CFStringRef av_map_videotoolbox_color_trc_from_av(enum AVColorTransferCharacteristic trc); + + /** +- * Update a CVPixelBufferRef's metadata to based on an AVFrame. +- * Returns 0 if no known equivalent was found. ++ * Set CVPixelBufferRef's metadata based on an AVFrame. ++ * ++ * Sets/unsets the CVPixelBuffer attachments to match as closely as possible the ++ * AVFrame metadata. To prevent inconsistent attachments, the attachments for properties ++ * that could not be matched or are unspecified in the given AVFrame are unset. So if ++ * any attachments already covered by AVFrame metadata need to be set to a specific ++ * value, this should happen after calling this function. ++ * ++ * Returns < 0 in case of an error. + */ + int av_vt_pixbuf_set_attachments(void *log_ctx, + CVPixelBufferRef pixbuf, const struct AVFrame *src); +Index: FFmpeg/libavfilter/vf_yadif_videotoolbox.m +=================================================================== +--- libavfilter/vf_yadif_videotoolbox.m ++++ libavfilter/vf_yadif_videotoolbox.m +@@ -24,6 +24,7 @@ + #include "yadif.h" + #include "libavutil/avassert.h" + #include "libavutil/hwcontext.h" ++#include "libavutil/hwcontext_videotoolbox.h" + #include "libavutil/objc.h" + + #include +@@ -94,9 +95,6 @@ static void call_kernel(AVFilterContext + + [buffer commit]; + [buffer waitUntilCompleted]; +- +- ff_objc_release(&encoder); +- ff_objc_release(&buffer); + } + + static void filter(AVFilterContext *ctx, AVFrame *dst, +@@ -321,8 +319,9 @@ static int config_input(AVFilterLink *in + + static int do_config_output(AVFilterLink *link) API_AVAILABLE(macos(10.11), ios(8.0)) + { +- AVHWFramesContext *output_frames; ++ AVHWFramesContext *output_frames, *input_frames; + AVFilterContext *ctx = link->src; ++ AVFilterLink *inlink = link->src->inputs[0]; + YADIFVTContext *s = ctx->priv; + YADIFContext *y = &s->yadif; + int ret = 0; +@@ -343,12 +342,14 @@ static int do_config_output(AVFilterLink + goto exit; + } + ++ input_frames = (AVHWFramesContext*)inlink->hw_frames_ctx->data; + output_frames = (AVHWFramesContext*)link->hw_frames_ctx->data; + + output_frames->format = AV_PIX_FMT_VIDEOTOOLBOX; + output_frames->sw_format = s->input_frames->sw_format; + output_frames->width = ctx->inputs[0]->w; + output_frames->height = ctx->inputs[0]->h; ++ ((AVVTFramesContext *)output_frames->hwctx)->color_range = ((AVVTFramesContext *)input_frames->hwctx)->color_range; + + ret = ff_filter_init_hw_frames(ctx, link, 10); + if (ret < 0) diff --git a/cross/ffmpeg7/patches/1050-jellyfin-0050-add-vf-overlay-videotoolbox-filter.patch b/cross/ffmpeg7/patches/1050-jellyfin-0050-add-vf-overlay-videotoolbox-filter.patch new file mode 100644 index 00000000000..086fb5b8e9a --- /dev/null +++ b/cross/ffmpeg7/patches/1050-jellyfin-0050-add-vf-overlay-videotoolbox-filter.patch @@ -0,0 +1,986 @@ +Index: FFmpeg/configure +=================================================================== +--- configure ++++ configure +@@ -3883,6 +3883,7 @@ overlay_opencl_filter_deps="opencl" + overlay_qsv_filter_deps="libmfx" + overlay_qsv_filter_select="qsvvpp" + overlay_vaapi_filter_deps="vaapi VAProcPipelineCaps_blend_flags" ++overlay_videotoolbox_filter_deps="metal corevideo coreimage videotoolbox" + overlay_vulkan_filter_deps="vulkan spirv_compiler" + overlay_rkrga_filter_deps="rkrga" + owdenoise_filter_deps="gpl" +Index: FFmpeg/doc/filters.texi +=================================================================== +--- doc/filters.texi ++++ doc/filters.texi +@@ -19047,6 +19047,58 @@ See @ref{framesync}. + + This filter also supports the @ref{framesync} options. + ++@section overlay_videotoolbox ++ ++Overlay one video on top of another. ++ ++This is the VideoToolbox variant of the @ref{overlay} filter. ++It takes two inputs and has one output. The first input is the "main" video on which the second input is overlaid. ++It only accepts VideoToolbox frames. The underlying input pixel formats do not have to match. ++Different input pixel formats and color spaces will be automatically converted using hardware accelerated methods. ++The final output will have the same pixel format and color space as the "main" input. ++ ++The filter accepts the following options: ++ ++@table @option ++ ++@item x ++Set the x coordinate of the overlaid video on the main video. ++Default value is @code{0}. ++ ++@item y ++Set the y coordinate of the overlaid video on the main video. ++Default value is @code{0}. ++ ++@item eof_action ++See @ref{framesync}. ++ ++@item shortest ++See @ref{framesync}. ++ ++@item repeatlast ++See @ref{framesync}. ++ ++@end table ++ ++@subsection Examples ++ ++@itemize ++@item ++Overlay an image LOGO at the top-left corner of the INPUT video. ++The INPUT video is in nv12 format and the LOGO image is in rgba format. ++@example ++-hwaccel videotoolbox -i INPUT -i LOGO -codec:v:0 h264_videotoolbox -filter_complex "[0:v]format=nv12,hwupload[a], [1:v]format=rgba,hwupload[b], [a][b]overlay_videotoolbox" OUTPUT ++@end example ++@item ++Overlay an SDR video OVERLAY at the top-left corner of the HDR video MAIN. ++The INPUT video is in p010 format and the LOGO image is in nv12 format. ++The OUTPUT video will also be an HDR video with OVERLAY mapped to HDR. ++@example ++-hwaccel videotoolbox -i MAIN -i OVERLAY -codec:v:0 hevc_videotoolbox -tag:v hvc1 -filter_complex "[0:v]format=p010,hwupload[a], [1:v]format=nv12,hwupload[b], [a][b]overlay_videotoolbox" OUTPUT ++@end example ++ ++@end itemize ++ + @section owdenoise + + Apply Overcomplete Wavelet denoiser. +Index: FFmpeg/libavfilter/Makefile +=================================================================== +--- libavfilter/Makefile ++++ libavfilter/Makefile +@@ -414,6 +414,9 @@ OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) + opencl/overlay.o framesync.o + OBJS-$(CONFIG_OVERLAY_QSV_FILTER) += vf_overlay_qsv.o framesync.o + OBJS-$(CONFIG_OVERLAY_VAAPI_FILTER) += vf_overlay_vaapi.o framesync.o vaapi_vpp.o ++OBJS-$(CONFIG_OVERLAY_VIDEOTOOLBOX_FILTER) += vf_overlay_videotoolbox.o framesync.o \ ++ metal/vf_overlay_videotoolbox.metallib.o \ ++ metal/utils.o + OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER) += vf_overlay_vulkan.o vulkan.o vulkan_filter.o + OBJS-$(CONFIG_OVERLAY_RKRGA_FILTER) += vf_overlay_rkrga.o framesync.o + OBJS-$(CONFIG_OWDENOISE_FILTER) += vf_owdenoise.o +Index: FFmpeg/libavfilter/allfilters.c +=================================================================== +--- libavfilter/allfilters.c ++++ libavfilter/allfilters.c +@@ -388,6 +388,7 @@ extern const AVFilter ff_vf_overlay; + extern const AVFilter ff_vf_overlay_opencl; + extern const AVFilter ff_vf_overlay_qsv; + extern const AVFilter ff_vf_overlay_vaapi; ++extern const AVFilter ff_vf_overlay_videotoolbox; + extern const AVFilter ff_vf_overlay_vulkan; + extern const AVFilter ff_vf_overlay_cuda; + extern const AVFilter ff_vf_overlay_rkrga; +Index: FFmpeg/libavfilter/metal/utils.m +=================================================================== +--- libavfilter/metal/utils.m ++++ libavfilter/metal/utils.m +@@ -55,6 +55,9 @@ CVMetalTextureRef ff_metal_texture_from_ + { + CVMetalTextureRef tex = NULL; + CVReturn ret; ++ bool is_planer = CVPixelBufferIsPlanar(pixbuf); ++ size_t width = is_planer ? CVPixelBufferGetWidthOfPlane(pixbuf, plane) : CVPixelBufferGetWidth(pixbuf); ++ size_t height = is_planer ? CVPixelBufferGetHeightOfPlane(pixbuf, plane) : CVPixelBufferGetHeight(pixbuf); + + ret = CVMetalTextureCacheCreateTextureFromImage( + NULL, +@@ -62,8 +65,8 @@ CVMetalTextureRef ff_metal_texture_from_ + pixbuf, + NULL, + format, +- CVPixelBufferGetWidthOfPlane(pixbuf, plane), +- CVPixelBufferGetHeightOfPlane(pixbuf, plane), ++ width, ++ height, + plane, + &tex + ); +Index: FFmpeg/libavfilter/metal/vf_overlay_videotoolbox.metal +=================================================================== +--- /dev/null ++++ libavfilter/metal/vf_overlay_videotoolbox.metal +@@ -0,0 +1,99 @@ ++/* ++ * Copyright (C) 2024 Gnattu OC ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include ++#include ++#include ++ ++using namespace metal; ++ ++struct mtlBlendParams { ++ uint x_position; ++ uint y_position; ++}; ++ ++/* ++ * Blend shader for premultiplied alpha textures ++ */ ++kernel void blend_shader( ++ texture2d source [[ texture(0) ]], ++ texture2d mask [[ texture(1) ]], ++ texture2d dest [[ texture(2) ]], ++ constant mtlBlendParams& params [[ buffer(3) ]], ++ uint2 gid [[ thread_position_in_grid ]]) ++{ ++ const auto mask_size = uint2(mask.get_width(), ++ mask.get_height()); ++ const auto loc_overlay = uint2(params.x_position, params.y_position); ++ if (gid.x < loc_overlay.x || ++ gid.y < loc_overlay.y || ++ gid.x >= mask_size.x + loc_overlay.x || ++ gid.y >= mask_size.y + loc_overlay.y) ++ { ++ float4 source_color = source.read(gid); ++ dest.write(source_color, gid); ++ } else { ++ float4 source_color = source.read(gid); ++ float4 mask_color = mask.read((gid - loc_overlay)); ++ float4 result_color = source_color * (1.0f - mask_color.w) + (mask_color * mask_color.w); ++ dest.write(result_color, gid); ++ } ++} ++ ++/* ++ * Blend shader for sperated yuv main and bgra mask ++ */ ++kernel void blend_shader_bgra_overlay( ++ texture2d source_y [[ texture(0) ]], ++ texture2d source_uv [[ texture(1) ]], ++ texture2d mask [[ texture(2) ]], ++ texture2d dest_y [[ texture(3) ]], ++ texture2d dest_uv [[ texture(4) ]], ++ constant mtlBlendParams& params [[ buffer(5) ]], ++ uint2 gid [[ thread_position_in_grid ]]) ++{ ++ const auto mask_size = uint2(mask.get_width(), ++ mask.get_height()); ++ const auto loc_overlay = uint2(params.x_position, params.y_position); ++ const auto loc_uv = gid >> 1; ++ if (gid.x < loc_overlay.x || ++ gid.y < loc_overlay.y || ++ gid.x >= mask_size.x + loc_overlay.x || ++ gid.y >= mask_size.y + loc_overlay.y) ++ { ++ float4 source_color_y = source_y.read(gid); ++ float4 source_color_uv = source_uv.read(loc_uv); ++ dest_y.write(source_color_y, gid); ++ dest_uv.write(source_color_uv, loc_uv); ++ } else { ++ float4 source_color_y = source_y.read(gid); ++ float4 source_color_uv = source_uv.read(loc_uv); ++ float4 mask_color = mask.read(gid - loc_overlay); ++ float y_overlay = 0.183 * mask_color.r + 0.614 * mask_color.g + 0.062 * mask_color.b + 0.0625f; ++ float u_overlay = -0.101 * mask_color.r - 0.339 * mask_color.g + 0.439 * mask_color.b + 0.5f; ++ float v_overlay = 0.439 * mask_color.r - 0.399 * mask_color.g - 0.040 * mask_color.b + 0.5f; ++ float alpha_color = mask_color.a; ++ float3 main_color = float3(source_color_y.x, source_color_uv.x, source_color_uv.y); ++ float3 overlay_color = float3(y_overlay, u_overlay, v_overlay); ++ float3 result_color = main_color * (1.0f - alpha_color) + (overlay_color * alpha_color); ++ dest_y.write(float4(result_color.x, 0.0f, 0.0f, 1.0f), gid); ++ dest_uv.write(float4(result_color.y, result_color.z, 0.0f, 1.0f), loc_uv); ++ } ++} +Index: FFmpeg/libavfilter/vf_overlay_videotoolbox.m +=================================================================== +--- /dev/null ++++ libavfilter/vf_overlay_videotoolbox.m +@@ -0,0 +1,751 @@ ++/* ++ * Copyright (C) 2024 Gnattu OC ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include ++#include ++#include "internal.h" ++#include "metal/utils.h" ++#include "framesync.h" ++#include "libavutil/hwcontext.h" ++#include "libavutil/hwcontext_videotoolbox.h" ++#include "libavutil/pixdesc.h" ++#include "libavutil/opt.h" ++#include "libavutil/objc.h" ++#include "video.h" ++ ++#include ++ ++extern char ff_vf_overlay_videotoolbox_metallib_data[]; ++extern unsigned int ff_vf_overlay_videotoolbox_metallib_len; ++ ++// Although iOS 8.0 introduced basic Metal support, its feature set is not complete and does not have CoreImage compatability. ++// We have to set the minimum iOS version to 9.0. ++typedef struct API_AVAILABLE(macos(10.11), ios(9.0)) OverlayVideoToolboxContext { ++ AVBufferRef *device_ref; ++ FFFrameSync fs; ++ ++ CVMetalTextureCacheRef texture_cache; ++ CVPixelBufferRef input_main_pixel_buffer_cache; ++ CVPixelBufferRef input_overlay_pixel_buffer_cache; ++ CVPixelBufferRef output_pixel_buffer_cache; ++ CIContext *ci_ctx; ++ VTPixelTransferSessionRef vt_session; ++ ++ id mtl_device; ++ id mtl_library; ++ id mtl_queue; ++ id mtl_pipeline; ++ id mtl_function; ++ id mtl_params_buffer; ++ ++ uint x_position; ++ uint y_position; ++ uint hwframe_ctx_allocated; ++} OverlayVideoToolboxContext API_AVAILABLE(macos(10.11), ios(9.0)); ++ ++typedef struct MtlBlendParams { ++ uint x_position; ++ uint y_position; ++} MtlBlendParams; ++ ++// Using sizeof(OverlayVideoToolboxContext) without an availability check will error ++// if we're targeting an older OS version, so we need to calculate the size ourselves ++// (we'll statically verify it's correct in overlay_videotoolbox_init behind a check) ++#define OVERLAY_VT_CTX_SIZE (sizeof(FFFrameSync) + sizeof(uint) * 3 + sizeof(void*) * 13 + 4) ++ ++// Neither VideoToolbox nor CoreImage can convert YUV420P frames into 16-bit depth color formats. ++// Additionally, the only hardware formats that support an Alpha channel are AYUV64 and BGRA. ++// However, neither can be directly manipulated with YUV420P frames. ++// In such cases, the user will have to use NV12 instead. ++static const enum AVPixelFormat supported_main_formats[] = { ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_P010, ++ AV_PIX_FMT_NONE, ++}; ++ ++static const enum AVPixelFormat supported_overlay_formats[] = { ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_P010, ++ AV_PIX_FMT_AYUV64, ++ AV_PIX_FMT_BGRA, ++ AV_PIX_FMT_NONE, ++}; ++ ++/** ++ * Helper to find out if provided format is supported by filter ++ */ ++static int format_is_supported(const enum AVPixelFormat formats[], enum AVPixelFormat fmt) ++{ ++ for (int i = 0; formats[i] != AV_PIX_FMT_NONE; i++) ++ if (formats[i] == fmt) ++ return 1; ++ return 0; ++} ++ ++static void call_kernel(AVFilterContext *avctx, ++ id dst, ++ id main, ++ id overlay, ++ uint x_position, ++ uint y_position) API_AVAILABLE(macos(10.11), ios(9.0)) ++{ ++ OverlayVideoToolboxContext *ctx = avctx->priv; ++ // Both the command buffer and encoder are auto-released by objc on default. ++ // Use CFBridgingRetain to get a more C-like behavior. ++ id buffer = CFBridgingRetain(ctx->mtl_queue.commandBuffer); ++ id encoder = CFBridgingRetain((__bridge id)buffer.computeCommandEncoder); ++ ++ MtlBlendParams *params = (MtlBlendParams *)ctx->mtl_params_buffer.contents; ++ *params = (MtlBlendParams){ ++ .x_position = x_position, ++ .y_position = y_position, ++ }; ++ ++ [(__bridge id)encoder setTexture: main atIndex: 0]; ++ [(__bridge id)encoder setTexture: overlay atIndex: 1]; ++ [(__bridge id)encoder setTexture: dst atIndex: 2]; ++ [(__bridge id)encoder setBuffer: ctx->mtl_params_buffer offset: 0 atIndex: 3]; ++ ff_metal_compute_encoder_dispatch(ctx->mtl_device, ctx->mtl_pipeline, (__bridge id)encoder, dst.width, dst.height); ++ [(__bridge id)encoder endEncoding]; ++ ++ [(__bridge id)buffer commit]; ++ [(__bridge id)buffer waitUntilCompleted]; ++ ++ ff_objc_release(&encoder); ++ ff_objc_release(&buffer); ++} ++ ++static void call_kernel_bgra_overlay(AVFilterContext *avctx, ++ id dst_y, ++ id dst_uv, ++ id main_y, ++ id main_uv, ++ id overlay, ++ uint x_position, ++ uint y_position) API_AVAILABLE(macos(10.11), ios(9.0)) ++{ ++ OverlayVideoToolboxContext *ctx = avctx->priv; ++ // Both the command buffer and encoder are auto-released by objc on default. ++ // Use CFBridgingRetain to get a more C-like behavior. ++ id buffer = CFBridgingRetain(ctx->mtl_queue.commandBuffer); ++ id encoder = CFBridgingRetain((__bridge id)buffer.computeCommandEncoder); ++ ++ MtlBlendParams *params = (MtlBlendParams *)ctx->mtl_params_buffer.contents; ++ *params = (MtlBlendParams) { ++ .x_position = x_position, ++ .y_position = y_position, ++ }; ++ ++ [(__bridge id)encoder setTexture: main_y atIndex: 0]; ++ [(__bridge id)encoder setTexture: main_uv atIndex: 1]; ++ [(__bridge id)encoder setTexture: overlay atIndex: 2]; ++ [(__bridge id)encoder setTexture: dst_y atIndex: 3]; ++ [(__bridge id)encoder setTexture: dst_uv atIndex: 4]; ++ [(__bridge id)encoder setBuffer: ctx->mtl_params_buffer offset: 0 atIndex: 5]; ++ ff_metal_compute_encoder_dispatch(ctx->mtl_device, ctx->mtl_pipeline, (__bridge id)encoder, dst_y.width, dst_y.height); ++ [(__bridge id)encoder endEncoding]; ++ ++ [(__bridge id)buffer commit]; ++ [(__bridge id)buffer waitUntilCompleted]; ++ ++ ff_objc_release(&encoder); ++ ff_objc_release(&buffer); ++} ++ ++// Copies and/or converts one pixel buffer to another. ++// This transparently handles pixel format and color spaces, and will do a conversion if needed. ++static int transfer_pixel_buffer(OverlayVideoToolboxContext *ctx, CVPixelBufferRef source, CVPixelBufferRef destination) ++{ ++ if (@available(macOS 10.8, iOS 16.0, *)) { ++ int ret = 0; ++ ret = VTPixelTransferSessionTransferImage(ctx->vt_session, source, destination); ++ if (ret < 0) ++ return ret; ++ } else { ++ CIImage *temp_image = NULL; ++ temp_image = CFBridgingRetain([CIImage imageWithCVPixelBuffer: source]); ++ [(__bridge CIContext*)ctx->ci_ctx render: (__bridge CIImage*)temp_image toCVPixelBuffer: destination]; ++ CFRelease(temp_image); ++ CVBufferPropagateAttachments(source, destination); ++ } ++ return 0; ++} ++ ++static int overlay_vt_blend(FFFrameSync *fs) API_AVAILABLE(macos(10.11), ios(9.0)) ++{ ++ AVFilterContext *avctx = fs->parent; ++ OverlayVideoToolboxContext *ctx = avctx->priv; ++ AVFilterLink *outlink = avctx->outputs[0]; ++ AVFilterLink *inlink_main = avctx->inputs[0]; ++ AVFilterLink *inlink_overlay = avctx->inputs[1]; ++ AVFrame *input_main, *input_overlay; ++ AVFrame *output; ++ AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink_main->hw_frames_ctx->data; ++ AVHWFramesContext *frames_ctx_overlay = (AVHWFramesContext*)inlink_overlay->hw_frames_ctx->data; ++ const AVPixFmtDescriptor *in_overlay_desc; ++ ++ CVMetalTextureRef main, dst, overlay; ++ id tex_main, tex_overlay, tex_dst; ++ ++ MTLPixelFormat mtl_format; ++ OSType cv_format; ++ int ret; ++ int i, overlay_planes = 0; ++ ++ in_overlay_desc = av_pix_fmt_desc_get(frames_ctx_overlay->sw_format); ++ if (@available(macOS 11.3, iOS 14.2, *)) { ++ mtl_format = MTLPixelFormatRGBA16Unorm; ++ cv_format = kCVPixelFormatType_64RGBALE; ++ } else { ++ // On older OS versions, 64-bit RGBA with 16-bit little-endian full-range samples is not supported. ++ // To handle inputs with color depth greater than 8, convert colors to float type during filtering on these versions. ++ mtl_format = MTLPixelFormatRGBA16Float; ++ cv_format = kCVPixelFormatType_64RGBAHalf; ++ } ++ ++ // read main and overlay frames from inputs ++ ret = ff_framesync_get_frame(fs, 0, &input_main, 0); ++ if (ret < 0) ++ return ret; ++ ret = ff_framesync_get_frame(fs, 1, &input_overlay, 0); ++ if (ret < 0) ++ return ret; ++ ++ if (!input_main) ++ return AVERROR_BUG; ++ ++ output = ff_get_video_buffer(outlink, outlink->w, outlink->h); ++ if (!output) ++ return AVERROR(ENOMEM); ++ ++ ret = av_frame_copy_props(output, input_main); ++ if (ret < 0) ++ return ret; ++ ++ if (!input_overlay) { ++ ret = transfer_pixel_buffer(ctx, (CVPixelBufferRef)input_main->data[3], (CVPixelBufferRef)output->data[3]); ++ if (ret < 0) ++ return ret; ++ return ff_filter_frame(outlink, output); ++ } ++ ++ for (i = 0; i < in_overlay_desc->nb_components; i++) ++ overlay_planes = FFMAX(overlay_planes, ++ in_overlay_desc->comp[i].plane + 1); ++ ++ if (!ctx->input_overlay_pixel_buffer_cache) { ++ ret = CVPixelBufferCreate(kCFAllocatorDefault, ++ CVPixelBufferGetWidthOfPlane((CVPixelBufferRef)input_overlay->data[3], 0), ++ CVPixelBufferGetHeightOfPlane((CVPixelBufferRef)input_overlay->data[3], 0), ++ cv_format, ++ (__bridge CFDictionaryRef)@{ ++ (NSString *)kCVPixelBufferCGImageCompatibilityKey: @(YES), ++ (NSString *)kCVPixelBufferMetalCompatibilityKey: @(YES) ++ }, ++ &ctx->input_overlay_pixel_buffer_cache); ++ if (ret < 0) ++ return ret; ++ } ++ ++ if (!ctx->input_main_pixel_buffer_cache) { ++ ret = CVPixelBufferCreate(kCFAllocatorDefault, ++ CVPixelBufferGetWidthOfPlane((CVPixelBufferRef)input_main->data[3], 0), ++ CVPixelBufferGetHeightOfPlane((CVPixelBufferRef)input_main->data[3], 0), ++ cv_format, ++ (__bridge CFDictionaryRef)@{ ++ (NSString *)kCVPixelBufferCGImageCompatibilityKey: @(YES), ++ (NSString *)kCVPixelBufferMetalCompatibilityKey: @(YES) ++ }, ++ &ctx->input_main_pixel_buffer_cache); ++ if (ret < 0) ++ return ret; ++ } ++ ++ if (!ctx->output_pixel_buffer_cache) { ++ ret = CVPixelBufferCreate(kCFAllocatorDefault, ++ CVPixelBufferGetWidthOfPlane((CVPixelBufferRef)input_main->data[3], 0), ++ CVPixelBufferGetHeightOfPlane((CVPixelBufferRef)input_main->data[3], 0), ++ cv_format, ++ (__bridge CFDictionaryRef)@{ ++ (NSString *)kCVPixelBufferCGImageCompatibilityKey: @(YES), ++ (NSString *)kCVPixelBufferMetalCompatibilityKey: @(YES) ++ }, ++ &ctx->output_pixel_buffer_cache); ++ if (ret < 0) ++ return ret; ++ } ++ ++ ret = transfer_pixel_buffer(ctx, (CVPixelBufferRef)input_main->data[3], ctx->input_main_pixel_buffer_cache); ++ if (ret < 0) ++ return ret; ++ ++ ret = transfer_pixel_buffer(ctx, (CVPixelBufferRef)input_overlay->data[3], ctx->input_overlay_pixel_buffer_cache); ++ if (ret < 0) ++ return ret; ++ ++ overlay = ff_metal_texture_from_pixbuf(avctx, ctx->texture_cache, ctx->input_overlay_pixel_buffer_cache, 0, mtl_format); ++ main = ff_metal_texture_from_pixbuf(avctx, ctx->texture_cache, ctx->input_main_pixel_buffer_cache, 0, mtl_format); ++ dst = ff_metal_texture_from_pixbuf(avctx, ctx->texture_cache, ctx->output_pixel_buffer_cache, 0, mtl_format); ++ ++ if (!overlay || !main || !dst) { ++ return AVERROR(ENOSYS); ++ } ++ ++ tex_main = CVMetalTextureGetTexture(main); ++ tex_overlay = CVMetalTextureGetTexture(overlay); ++ tex_dst = CVMetalTextureGetTexture(dst); ++ ++ call_kernel(avctx, tex_dst, tex_main, tex_overlay, ctx->x_position, ctx->y_position); ++ ++ ret = transfer_pixel_buffer(ctx, ctx->output_pixel_buffer_cache, (CVPixelBufferRef)output->data[3]); ++ if (ret < 0) { ++ CFRelease(main); ++ CFRelease(overlay); ++ CFRelease(dst); ++ return ret; ++ } ++ ++ CFRelease(main); ++ CFRelease(overlay); ++ CFRelease(dst); ++ ++ return ff_filter_frame(outlink, output); ++} ++ ++static int overlay_vt_blend_bgra_overlay(FFFrameSync *fs) API_AVAILABLE(macos(10.11), ios(9.0)) ++{ ++ AVFilterContext *avctx = fs->parent; ++ OverlayVideoToolboxContext *ctx = avctx->priv; ++ AVFilterLink *outlink = avctx->outputs[0]; ++ AVFilterLink *inlink_main = avctx->inputs[0]; ++ AVFilterLink *inlink_overlay = avctx->inputs[1]; ++ AVFrame *input_main, *input_overlay; ++ AVFrame *output; ++ AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink_main->hw_frames_ctx->data; ++ AVHWFramesContext *frames_ctx_overlay = (AVHWFramesContext*)inlink_overlay->hw_frames_ctx->data; ++ const AVPixFmtDescriptor *in_main_desc; ++ ++ CVMetalTextureRef main_y, main_uv, dst_y, dst_uv, overlay; ++ id tex_main_y, tex_main_uv, tex_overlay, tex_dst_y, tex_dst_uv; ++ ++ MTLPixelFormat mtl_format_y, mtl_format_uv; ++ OSType cv_format; ++ int ret; ++ int i, overlay_planes = 0; ++ ++ in_main_desc = av_pix_fmt_desc_get(frames_ctx->sw_format); ++ ++ // read main and overlay frames from inputs ++ ret = ff_framesync_get_frame(fs, 0, &input_main, 0); ++ if (ret < 0) ++ return ret; ++ ret = ff_framesync_get_frame(fs, 1, &input_overlay, 0); ++ if (ret < 0) ++ return ret; ++ ++ if (!input_main) ++ return AVERROR_BUG; ++ ++ output = ff_get_video_buffer(outlink, outlink->w, outlink->h); ++ if (!output) ++ return AVERROR(ENOMEM); ++ ++ ret = av_frame_copy_props(output, input_main); ++ if (ret < 0) ++ return ret; ++ ++ if (!input_overlay) { ++ ret = transfer_pixel_buffer(ctx, (CVPixelBufferRef)input_main->data[3], (CVPixelBufferRef)output->data[3]); ++ if (ret < 0) ++ return ret; ++ return ff_filter_frame(outlink, output); ++ } ++ ++ mtl_format_y = (in_main_desc->comp[0].depth + in_main_desc->comp[0].shift) > 8 ? MTLPixelFormatR16Unorm : MTLPixelFormatR8Unorm; ++ mtl_format_uv = (in_main_desc->comp[0].depth + in_main_desc->comp[0].shift) > 8 ? MTLPixelFormatRG16Unorm : MTLPixelFormatRG8Unorm; ++ main_y = ff_metal_texture_from_pixbuf(avctx, ctx->texture_cache, (CVPixelBufferRef)input_main->data[3], 0, mtl_format_y); ++ main_uv = ff_metal_texture_from_pixbuf(avctx, ctx->texture_cache, (CVPixelBufferRef)input_main->data[3], 1, mtl_format_uv); ++ overlay = ff_metal_texture_from_pixbuf(avctx, ctx->texture_cache, (CVPixelBufferRef)input_overlay->data[3], 0, MTLPixelFormatBGRA8Unorm); ++ dst_y = ff_metal_texture_from_pixbuf(avctx, ctx->texture_cache, (CVPixelBufferRef)output->data[3], 0, mtl_format_y); ++ dst_uv = ff_metal_texture_from_pixbuf(avctx, ctx->texture_cache, (CVPixelBufferRef)output->data[3], 1, mtl_format_uv); ++ ++ if (!overlay || !main_y || !main_uv || !dst_y || !dst_uv) { ++ return AVERROR(ENOSYS); ++ } ++ ++ tex_main_y = CVMetalTextureGetTexture(main_y); ++ tex_main_uv = CVMetalTextureGetTexture(main_uv); ++ tex_overlay = CVMetalTextureGetTexture(overlay); ++ tex_dst_y = CVMetalTextureGetTexture(dst_y); ++ tex_dst_uv = CVMetalTextureGetTexture(dst_uv); ++ ++ call_kernel_bgra_overlay(avctx, ++ tex_dst_y, tex_dst_uv, ++ tex_main_y, tex_main_uv, ++ tex_overlay, ++ ctx->x_position, ctx->y_position); ++ CFRelease(main_y); ++ CFRelease(main_uv); ++ CFRelease(overlay); ++ CFRelease(dst_y); ++ CFRelease(dst_uv); ++ ++ return ff_filter_frame(outlink, output); ++} ++ ++static av_cold void do_uninit(AVFilterContext *avctx) API_AVAILABLE(macos(10.11), ios(9.0)) ++{ ++ OverlayVideoToolboxContext *ctx = avctx->priv; ++ ++ if (ctx->hwframe_ctx_allocated) { ++ av_buffer_unref(&ctx->device_ref); ++ ctx->hwframe_ctx_allocated = 0; ++ } ++ ff_framesync_uninit(&ctx->fs); ++ ++ if (ctx->ci_ctx) { ++ CFRelease(ctx->ci_ctx); ++ ctx->ci_ctx = NULL; ++ } ++ ++ ff_objc_release(&ctx->mtl_params_buffer); ++ ff_objc_release(&ctx->mtl_function); ++ ff_objc_release(&ctx->mtl_pipeline); ++ ff_objc_release(&ctx->mtl_queue); ++ ff_objc_release(&ctx->mtl_library); ++ ff_objc_release(&ctx->mtl_device); ++ ++ if (ctx->texture_cache) { ++ CFRelease(ctx->texture_cache); ++ ctx->texture_cache = NULL; ++ } ++ if (ctx->input_main_pixel_buffer_cache) { ++ CFRelease(ctx->input_main_pixel_buffer_cache); ++ ctx->input_main_pixel_buffer_cache = NULL; ++ } ++ if (ctx->input_overlay_pixel_buffer_cache) { ++ CFRelease(ctx->input_overlay_pixel_buffer_cache); ++ ctx->input_overlay_pixel_buffer_cache = NULL; ++ } ++ if (ctx->output_pixel_buffer_cache) { ++ CFRelease(ctx->output_pixel_buffer_cache); ++ ctx->output_pixel_buffer_cache = NULL; ++ } ++ if (ctx->vt_session) { ++ VTPixelTransferSessionInvalidate(ctx->vt_session); ++ CFRelease(ctx->vt_session); ++ ctx->vt_session = NULL; ++ } ++} ++ ++static av_cold void overlay_videotoolbox_uninit(AVFilterContext *ctx) ++{ ++ if (@available(macOS 10.11, iOS 9.0, *)) { ++ do_uninit(ctx); ++ } ++} ++ ++static av_cold int do_init(AVFilterContext *avctx) API_AVAILABLE(macos(10.11), ios(9.0)) ++{ ++ OverlayVideoToolboxContext *ctx = avctx->priv; ++ NSError *err = nil; ++ CVReturn ret; ++ dispatch_data_t libData; ++ ++ ctx->mtl_device = MTLCreateSystemDefaultDevice(); ++ if (!ctx->mtl_device) { ++ av_log(avctx, AV_LOG_ERROR, "Unable to find Metal device\n"); ++ goto fail; ++ } ++ ++ av_log(ctx, AV_LOG_INFO, "Using Metal device: %s\n", ctx->mtl_device.name.UTF8String); ++ ++ libData = dispatch_data_create( ++ ff_vf_overlay_videotoolbox_metallib_data, ++ ff_vf_overlay_videotoolbox_metallib_len, ++ nil, ++ nil); ++ ++ ctx->mtl_library = [ctx->mtl_device newLibraryWithData: libData error: &err]; ++ dispatch_release(libData); ++ libData = nil; ++ ctx->mtl_function = [ctx->mtl_library newFunctionWithName: @"blend_shader"]; ++ if (!ctx->mtl_function) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create Metal function!\n"); ++ goto fail; ++ } ++ ++ ctx->mtl_queue = ctx->mtl_device.newCommandQueue; ++ if (!ctx->mtl_queue) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create Metal command queue!\n"); ++ goto fail; ++ } ++ ++ ctx->mtl_pipeline = [ctx->mtl_device ++ newComputePipelineStateWithFunction: ctx->mtl_function ++ error: &err]; ++ if (err) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to create Metal compute pipeline: %s\n", err.description.UTF8String); ++ goto fail; ++ } ++ ++ ctx->mtl_params_buffer = [ctx->mtl_device ++ newBufferWithLength: sizeof(MtlBlendParams) ++ options: MTLResourceStorageModeShared]; ++ if (!ctx->mtl_params_buffer) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create Metal buffer for parameters\n"); ++ goto fail; ++ } ++ ++ ret = CVMetalTextureCacheCreate( ++ NULL, ++ NULL, ++ ctx->mtl_device, ++ NULL, ++ &ctx->texture_cache ++ ); ++ if (ret != kCVReturnSuccess) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create CVMetalTextureCache: %d\n", ret); ++ goto fail; ++ } ++ ++ if (@available(macOS 10.8, iOS 16.0, *)) { ++ ret = VTPixelTransferSessionCreate(NULL, &ctx->vt_session); ++ if (ret != kCVReturnSuccess) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create VTPixelTransferSession: %d\n", ret); ++ goto fail; ++ } ++ } else { ++ // Use CoreImage as fallback for old OS. ++ // CoreImage has comparable performance to VTPixelTransferSession, but it supports less pixel formats than VTPixelTransferSession. ++ // Warn user about possible incorrect results. ++ av_log(avctx, AV_LOG_WARNING, "VTPixelTransferSessionTransferImage is not available on this OS version, fallback using CoreImage\n"); ++ av_log(avctx, AV_LOG_WARNING, "Try an overlay with BGRA format if you see no overlay\n"); ++ if (@available(macOS 10.15, iOS 13.0, *)) { ++ ctx->ci_ctx = CFBridgingRetain([CIContext contextWithMTLCommandQueue: ctx->mtl_queue]); ++ } else { ++ ctx->ci_ctx = CFBridgingRetain([CIContext contextWithMTLDevice: ctx->mtl_device]); ++ } ++ } ++ ++ ctx->fs.on_event = &overlay_vt_blend; ++ ++ return 0; ++fail: ++ overlay_videotoolbox_uninit(avctx); ++ return AVERROR_EXTERNAL; ++} ++ ++static av_cold int overlay_videotoolbox_init(AVFilterContext *ctx) ++{ ++ if (@available(macOS 10.11, iOS 9.0, *)) { ++ // Ensure we calculated OVERLAY_VT_CTX_SIZE correctly ++ static_assert(OVERLAY_VT_CTX_SIZE == sizeof(OverlayVideoToolboxContext), "Incorrect OVERLAY_VT_CTX_SIZE value!"); ++ return do_init(ctx); ++ } else { ++ av_log(ctx, AV_LOG_ERROR, "Metal is not available on this OS version\n"); ++ return AVERROR(ENOSYS); ++ } ++} ++ ++static int do_config_output(AVFilterLink *link) API_AVAILABLE(macos(10.11), ios(9.0)) ++{ ++ AVFilterContext *avctx = link->src; ++ AVFilterLink *inlink_main = avctx->inputs[0]; ++ AVFilterLink *inlink_overlay = avctx->inputs[1]; ++ OverlayVideoToolboxContext *ctx = avctx->priv; ++ AVHWFramesContext *main_frames, *output_frames, *overlay_frames; ++ AVBufferRef *input_ref, *overlay_ref; ++ int ret = 0; ++ ++ if (!inlink_main->hw_frames_ctx || ++ !inlink_overlay->hw_frames_ctx) { ++ av_log(avctx, AV_LOG_ERROR, "An input HW frames reference is " ++ "required to associate the processing device.\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ input_ref = inlink_main->hw_frames_ctx; ++ overlay_ref = inlink_overlay->hw_frames_ctx; ++ main_frames = (AVHWFramesContext*)input_ref->data; ++ overlay_frames = (AVHWFramesContext*)overlay_ref->data; ++ av_assert0(main_frames); ++ av_assert0(overlay_frames); ++ ++ if (!format_is_supported(supported_main_formats, main_frames->sw_format)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported main input format: %s.\n", ++ av_get_pix_fmt_name(main_frames->sw_format)); ++ if (main_frames->sw_format == AV_PIX_FMT_YUV420P) { ++ av_log(ctx, AV_LOG_WARNING, "Hint: Use %s instead of %s.\n", ++ av_get_pix_fmt_name(AV_PIX_FMT_NV12), ++ av_get_pix_fmt_name(AV_PIX_FMT_YUV420P)); ++ } ++ return AVERROR(ENOSYS); ++ } ++ ++ if (!format_is_supported(supported_overlay_formats, overlay_frames->sw_format)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported overlay input format: %s.\n", ++ av_get_pix_fmt_name(overlay_frames->sw_format)); ++ if (overlay_frames->sw_format == AV_PIX_FMT_YUV420P) { ++ av_log(ctx, AV_LOG_WARNING, "Hint: Use %s instead of %s.\n", ++ av_get_pix_fmt_name(AV_PIX_FMT_NV12), ++ av_get_pix_fmt_name(AV_PIX_FMT_YUV420P)); ++ } ++ return AVERROR(ENOSYS); ++ } ++ ++ // Use fast code path for BGRA overlay ++ if (overlay_frames->sw_format == AV_PIX_FMT_BGRA) { ++ NSError *err = nil; ++ ff_objc_release(&ctx->mtl_pipeline); ++ ff_objc_release(&ctx->mtl_function); ++ ctx->mtl_function = [ctx->mtl_library newFunctionWithName: @"blend_shader_bgra_overlay"]; ++ if (!ctx->mtl_function) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create Metal function!\n"); ++ overlay_videotoolbox_uninit(avctx); ++ return AVERROR_EXTERNAL; ++ } ++ ctx->mtl_pipeline = [ctx->mtl_device ++ newComputePipelineStateWithFunction: ctx->mtl_function ++ error: &err]; ++ if (err) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to create Metal compute pipeline: %s\n", err.description.UTF8String); ++ overlay_videotoolbox_uninit(avctx); ++ return AVERROR_EXTERNAL; ++ } ++ ctx->fs.on_event = &overlay_vt_blend_bgra_overlay; ++ } ++ ++ ctx->device_ref = av_buffer_ref(main_frames->device_ref); ++ if (!ctx->device_ref) { ++ av_log(ctx, AV_LOG_ERROR, "A device reference create failed.\n"); ++ return AVERROR(ENOMEM); ++ } ++ ++ link->hw_frames_ctx = av_hwframe_ctx_alloc(ctx->device_ref); ++ if (!link->hw_frames_ctx) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create HW frame context " ++ "for output.\n"); ++ ret = AVERROR(ENOMEM); ++ return ret; ++ } ++ ctx->hwframe_ctx_allocated = 1; ++ ++ output_frames = (AVHWFramesContext*)link->hw_frames_ctx->data; ++ ++ output_frames->format = AV_PIX_FMT_VIDEOTOOLBOX; ++ output_frames->sw_format = main_frames->sw_format; ++ output_frames->width = inlink_main->w; ++ output_frames->height = inlink_main->h; ++ ((AVVTFramesContext *)output_frames->hwctx)->color_range = ((AVVTFramesContext *)main_frames->hwctx)->color_range; ++ ++ ret = ff_filter_init_hw_frames(avctx, link, 1); ++ if (ret < 0) ++ return ret; ++ ++ ret = av_hwframe_ctx_init(link->hw_frames_ctx); ++ if (ret < 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to initialise VideoToolbox frame " ++ "context for output: %d\n", ret); ++ return ret; ++ } ++ ++ link->time_base = inlink_main->time_base; ++ ctx->fs.time_base = link->time_base; ++ ++ ret = ff_framesync_init_dualinput(&ctx->fs, avctx); ++ if (ret < 0) ++ return ret; ++ ++ ret = ff_framesync_configure(&ctx->fs); ++ return ret; ++} ++ ++static int config_output(AVFilterLink *link) ++{ ++ AVFilterContext *ctx = link->src; ++ if (@available(macOS 10.11, iOS 9.0, *)) { ++ return do_config_output(link); ++ } else { ++ av_log(ctx, AV_LOG_ERROR, "Metal is not available on this OS version\n"); ++ return AVERROR(ENOSYS); ++ } ++} ++ ++static int overlay_videotoolbox_activate(AVFilterContext *avctx) ++{ ++ OverlayVideoToolboxContext *ctx = avctx->priv; ++ return ff_framesync_activate(&ctx->fs); ++} ++ ++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) ++#define OFFSET(x) offsetof(OverlayVideoToolboxContext, x) ++ ++static const AVOption overlay_videotoolbox_options[] = { ++ { "x", "Overlay x position", ++ OFFSET(x_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS }, ++ { "y", "Overlay y position", ++ OFFSET(y_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS }, ++ { "eof_action", "Action to take when encountering EOF from secondary input ", ++ OFFSET(fs.opt_eof_action), AV_OPT_TYPE_INT, { .i64 = EOF_ACTION_REPEAT }, ++ EOF_ACTION_REPEAT, EOF_ACTION_PASS, .flags = FLAGS, .unit = "eof_action" }, ++ { "repeat", "Repeat the previous frame.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_REPEAT }, .flags = FLAGS, .unit = "eof_action" }, ++ { "endall", "End both streams.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_ENDALL }, .flags = FLAGS, .unit = "eof_action" }, ++ { "pass", "Pass through the main input.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_PASS }, .flags = FLAGS, .unit = "eof_action" }, ++ { "shortest", "force termination when the shortest input terminates", OFFSET(fs.opt_shortest), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, ++ { "repeatlast", "repeat overlay of the last overlay frame", OFFSET(fs.opt_repeatlast), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, ++ { NULL }, ++}; ++ ++FRAMESYNC_DEFINE_CLASS(overlay_videotoolbox, OverlayVideoToolboxContext, fs); ++ ++static const AVFilterPad overlay_videotoolbox_inputs[] = { ++ { ++ .name = "main", ++ .type = AVMEDIA_TYPE_VIDEO, ++ }, ++ { ++ .name = "overlay", ++ .type = AVMEDIA_TYPE_VIDEO, ++ }, ++}; ++ ++static const AVFilterPad overlay_videotoolbox_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = config_output, ++ }, ++}; ++ ++const AVFilter ff_vf_overlay_videotoolbox = { ++ .name = "overlay_videotoolbox", ++ .description = NULL_IF_CONFIG_SMALL("Overlay filter for VideoToolbox frames using Metal compute"), ++ .priv_size = OVERLAY_VT_CTX_SIZE, ++ .priv_class = &overlay_videotoolbox_class, ++ .init = overlay_videotoolbox_init, ++ .uninit = overlay_videotoolbox_uninit, ++ .activate = overlay_videotoolbox_activate, ++ .preinit = overlay_videotoolbox_framesync_preinit, ++ FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VIDEOTOOLBOX), ++ FILTER_INPUTS(overlay_videotoolbox_inputs), ++ FILTER_OUTPUTS(overlay_videotoolbox_outputs), ++ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, ++}; diff --git a/cross/ffmpeg7/patches/1051-jellyfin-0051-add-coreimage-based-vf-transpose-vt-filter.patch b/cross/ffmpeg7/patches/1051-jellyfin-0051-add-coreimage-based-vf-transpose-vt-filter.patch new file mode 100644 index 00000000000..bba8b5ea5da --- /dev/null +++ b/cross/ffmpeg7/patches/1051-jellyfin-0051-add-coreimage-based-vf-transpose-vt-filter.patch @@ -0,0 +1,691 @@ +Index: FFmpeg/configure +=================================================================== +--- configure ++++ configure +@@ -3934,7 +3934,7 @@ tonemap_vaapi_filter_deps="vaapi VAProcF + tonemap_opencl_filter_deps="opencl const_nan" + transpose_opencl_filter_deps="opencl" + transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags" +-transpose_vt_filter_deps="videotoolbox VTPixelRotationSessionCreate" ++transpose_vt_filter_deps="coreimage videotoolbox" + transpose_vulkan_filter_deps="vulkan spirv_compiler" + unsharp_opencl_filter_deps="opencl" + uspp_filter_deps="gpl avcodec" +Index: FFmpeg/libavfilter/vf_transpose_vt.c +=================================================================== +--- libavfilter/vf_transpose_vt.c ++++ /dev/null +@@ -1,285 +0,0 @@ +-/* +- * Copyright (c) 2023 Zhao Zhili +- * +- * This file is part of FFmpeg. +- * +- * FFmpeg is free software; you can redistribute it and/or +- * modify it under the terms of the GNU Lesser General Public +- * License as published by the Free Software Foundation; either +- * version 2.1 of the License, or (at your option) any later version. +- * +- * FFmpeg is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * Lesser General Public License for more details. +- * +- * You should have received a copy of the GNU Lesser General Public +- * License along with FFmpeg; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +- */ +- +-#include +- +-#include "libavutil/hwcontext.h" +-#include "libavutil/hwcontext_videotoolbox.h" +-#include "libavutil/opt.h" +-#include "libavutil/pixdesc.h" +-#include "internal.h" +-#include "transpose.h" +-#include "video.h" +- +-typedef struct TransposeVtContext { +- AVClass *class; +- +- VTPixelRotationSessionRef session; +- int dir; +- int passthrough; +-} TransposeVtContext; +- +-static av_cold int transpose_vt_init(AVFilterContext *avctx) +-{ +- TransposeVtContext *s = avctx->priv; +- int ret; +- +- ret = VTPixelRotationSessionCreate(kCFAllocatorDefault, &s->session); +- if (ret != noErr) { +- av_log(avctx, AV_LOG_ERROR, "Rotation session create failed, %d\n", ret); +- return AVERROR_EXTERNAL; +- } +- +- return 0; +-} +- +-static av_cold void transpose_vt_uninit(AVFilterContext *avctx) +-{ +- TransposeVtContext *s = avctx->priv; +- +- if (s->session) { +- VTPixelRotationSessionInvalidate(s->session); +- CFRelease(s->session); +- s->session = NULL; +- } +-} +- +-static int transpose_vt_filter_frame(AVFilterLink *link, AVFrame *in) +-{ +- int ret; +- AVFilterContext *ctx = link->dst; +- TransposeVtContext *s = ctx->priv; +- AVFilterLink *outlink = ctx->outputs[0]; +- CVPixelBufferRef src; +- CVPixelBufferRef dst; +- AVFrame *out; +- +- if (s->passthrough) +- return ff_filter_frame(outlink, in); +- +- out = ff_get_video_buffer(outlink, outlink->w, outlink->h); +- if (!out) { +- ret = AVERROR(ENOMEM); +- goto fail; +- } +- +- ret = av_frame_copy_props(out, in); +- if (ret < 0) +- goto fail; +- +- src = (CVPixelBufferRef)in->data[3]; +- dst = (CVPixelBufferRef)out->data[3]; +- ret = VTPixelRotationSessionRotateImage(s->session, src, dst); +- if (ret != noErr) { +- av_log(ctx, AV_LOG_ERROR, "transfer image failed, %d\n", ret); +- ret = AVERROR_EXTERNAL; +- goto fail; +- } +- +- av_frame_free(&in); +- +- return ff_filter_frame(outlink, out); +- +-fail: +- av_frame_free(&in); +- av_frame_free(&out); +- return ret; +-} +- +-static int transpose_vt_recreate_hw_ctx(AVFilterLink *outlink) +-{ +- AVFilterContext *avctx = outlink->src; +- AVFilterLink *inlink = outlink->src->inputs[0]; +- AVHWFramesContext *hw_frame_ctx_in; +- AVHWFramesContext *hw_frame_ctx_out; +- int err; +- +- av_buffer_unref(&outlink->hw_frames_ctx); +- +- hw_frame_ctx_in = (AVHWFramesContext *)inlink->hw_frames_ctx->data; +- outlink->hw_frames_ctx = av_hwframe_ctx_alloc(hw_frame_ctx_in->device_ref); +- hw_frame_ctx_out = (AVHWFramesContext *)outlink->hw_frames_ctx->data; +- hw_frame_ctx_out->format = AV_PIX_FMT_VIDEOTOOLBOX; +- hw_frame_ctx_out->sw_format = hw_frame_ctx_in->sw_format; +- hw_frame_ctx_out->width = outlink->w; +- hw_frame_ctx_out->height = outlink->h; +- +- err = ff_filter_init_hw_frames(avctx, outlink, 1); +- if (err < 0) +- return err; +- +- err = av_hwframe_ctx_init(outlink->hw_frames_ctx); +- if (err < 0) { +- av_log(avctx, AV_LOG_ERROR, +- "Failed to init videotoolbox frame context, %s\n", +- av_err2str(err)); +- return err; +- } +- +- return 0; +-} +- +-static int transpose_vt_config_output(AVFilterLink *outlink) +-{ +- int err; +- AVFilterContext *avctx = outlink->src; +- TransposeVtContext *s = avctx->priv; +- AVFilterLink *inlink = outlink->src->inputs[0]; +- CFStringRef rotation = kVTRotation_0; +- CFBooleanRef vflip = kCFBooleanFalse; +- CFBooleanRef hflip = kCFBooleanFalse; +- int swap_w_h = 0; +- +- av_buffer_unref(&outlink->hw_frames_ctx); +- outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx); +- +- if ((inlink->w >= inlink->h && s->passthrough == TRANSPOSE_PT_TYPE_LANDSCAPE) || +- (inlink->w <= inlink->h && s->passthrough == TRANSPOSE_PT_TYPE_PORTRAIT)) { +- av_log(avctx, AV_LOG_VERBOSE, +- "w:%d h:%d -> w:%d h:%d (passthrough mode)\n", +- inlink->w, inlink->h, inlink->w, inlink->h); +- return 0; +- } +- +- s->passthrough = TRANSPOSE_PT_TYPE_NONE; +- +- switch (s->dir) { +- case TRANSPOSE_CCLOCK_FLIP: +- rotation = kVTRotation_CCW90; +- vflip = kCFBooleanTrue; +- swap_w_h = 1; +- break; +- case TRANSPOSE_CCLOCK: +- rotation = kVTRotation_CCW90; +- swap_w_h = 1; +- break; +- case TRANSPOSE_CLOCK: +- rotation = kVTRotation_CW90; +- swap_w_h = 1; +- break; +- case TRANSPOSE_CLOCK_FLIP: +- rotation = kVTRotation_CW90; +- vflip = kCFBooleanTrue; +- swap_w_h = 1; +- break; +- case TRANSPOSE_REVERSAL: +- rotation = kVTRotation_180; +- break; +- case TRANSPOSE_HFLIP: +- hflip = kCFBooleanTrue; +- break; +- case TRANSPOSE_VFLIP: +- vflip = kCFBooleanTrue; +- break; +- default: +- av_log(avctx, AV_LOG_ERROR, "Failed to set direction to %d\n", s->dir); +- return AVERROR(EINVAL); +- } +- +- err = VTSessionSetProperty(s->session, kVTPixelRotationPropertyKey_Rotation, +- rotation); +- if (err != noErr) { +- av_log(avctx, AV_LOG_ERROR, "Set rotation property failed, %d\n", err); +- return AVERROR_EXTERNAL; +- } +- err = VTSessionSetProperty(s->session, kVTPixelRotationPropertyKey_FlipVerticalOrientation, +- vflip); +- if (err != noErr) { +- av_log(avctx, AV_LOG_ERROR, "Set vertical flip property failed, %d\n", err); +- return AVERROR_EXTERNAL; +- } +- err = VTSessionSetProperty(s->session, kVTPixelRotationPropertyKey_FlipHorizontalOrientation, +- hflip); +- if (err != noErr) { +- av_log(avctx, AV_LOG_ERROR, "Set horizontal flip property failed, %d\n", err); +- return AVERROR_EXTERNAL; +- } +- +- if (!swap_w_h) +- return 0; +- +- outlink->w = inlink->h; +- outlink->h = inlink->w; +- return transpose_vt_recreate_hw_ctx(outlink); +-} +- +-#define OFFSET(x) offsetof(TransposeVtContext, x) +-#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +-static const AVOption transpose_vt_options[] = { +- { "dir", "set transpose direction", +- OFFSET(dir), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 6, FLAGS, .unit = "dir" }, +- { "cclock_flip", "rotate counter-clockwise with vertical flip", +- 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, .flags=FLAGS, .unit = "dir" }, +- { "clock", "rotate clockwise", +- 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK }, .flags=FLAGS, .unit = "dir" }, +- { "cclock", "rotate counter-clockwise", +- 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK }, .flags=FLAGS, .unit = "dir" }, +- { "clock_flip", "rotate clockwise with vertical flip", +- 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP }, .flags=FLAGS, .unit = "dir" }, +- { "reversal", "rotate by half-turn", +- 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_REVERSAL }, .flags=FLAGS, .unit = "dir" }, +- { "hflip", "flip horizontally", +- 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_HFLIP }, .flags=FLAGS, .unit = "dir" }, +- { "vflip", "flip vertically", +- 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_VFLIP }, .flags=FLAGS, .unit = "dir" }, +- +- { "passthrough", "do not apply transposition if the input matches the specified geometry", +- OFFSET(passthrough), AV_OPT_TYPE_INT, { .i64=TRANSPOSE_PT_TYPE_NONE }, 0, INT_MAX, FLAGS, .unit = "passthrough" }, +- { "none", "always apply transposition", +- 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_PT_TYPE_NONE }, INT_MIN, INT_MAX, FLAGS, .unit = "passthrough" }, +- { "portrait", "preserve portrait geometry", +- 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_PT_TYPE_PORTRAIT }, INT_MIN, INT_MAX, FLAGS, .unit = "passthrough" }, +- { "landscape", "preserve landscape geometry", +- 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_PT_TYPE_LANDSCAPE }, INT_MIN, INT_MAX, FLAGS, .unit = "passthrough" }, +- +- { NULL } +-}; +- +-AVFILTER_DEFINE_CLASS(transpose_vt); +- +-static const AVFilterPad transpose_vt_inputs[] = { +- { +- .name = "default", +- .type = AVMEDIA_TYPE_VIDEO, +- .filter_frame = &transpose_vt_filter_frame, +- }, +-}; +- +-static const AVFilterPad transpose_vt_outputs[] = { +- { +- .name = "default", +- .type = AVMEDIA_TYPE_VIDEO, +- .config_props = &transpose_vt_config_output, +- }, +-}; +- +-const AVFilter ff_vf_transpose_vt = { +- .name = "transpose_vt", +- .description = NULL_IF_CONFIG_SMALL("Transpose Videotoolbox frames"), +- .priv_size = sizeof(TransposeVtContext), +- .init = transpose_vt_init, +- .uninit = transpose_vt_uninit, +- FILTER_INPUTS(transpose_vt_inputs), +- FILTER_OUTPUTS(transpose_vt_outputs), +- FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VIDEOTOOLBOX), +- .priv_class = &transpose_vt_class, +- .flags = AVFILTER_FLAG_HWDEVICE, +- .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +-}; +Index: FFmpeg/libavfilter/vf_transpose_vt.m +=================================================================== +--- /dev/null ++++ libavfilter/vf_transpose_vt.m +@@ -0,0 +1,383 @@ ++/* ++ * Copyright (c) 2023 Zhao Zhili ++ * Copyright (c) 2024 Gnattu OC ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include ++#include ++ ++#include "libavutil/hwcontext.h" ++#include "libavutil/hwcontext_videotoolbox.h" ++#include "libavutil/opt.h" ++#include "libavutil/objc.h" ++#include "internal.h" ++#include "transpose.h" ++#include "video.h" ++ ++// Declaration for SDKs not having VTPixelRotationSession ++#if (TARGET_OS_OSX && (!defined(__MAC_13_0))) || \ ++ (TARGET_OS_IOS && (!defined(__IPHONE_16_0))) || \ ++ (TARGET_OS_TV && (!defined(__TVOS_16_0))) ++ #define LEGACY_VT_SDK ++#endif ++ ++#ifdef LEGACY_VT_SDK ++typedef void* VTPixelRotationSessionRef; ++#endif ++ ++typedef struct TransposeVtContext { ++ AVClass *class; ++ CIContext *ci_ctx; ++ CGImagePropertyOrientation orientation; ++ VTPixelRotationSessionRef session; ++ ++ int dir; ++ int passthrough; ++} TransposeVtContext; ++ ++static av_cold int transpose_vt_init(AVFilterContext *avctx) ++{ ++ TransposeVtContext *s = avctx->priv; ++#ifndef LEGACY_VT_SDK ++ if (@available(macOS 13.0, iOS 16, *)) { ++ int ret; ++ ++ ret = VTPixelRotationSessionCreate(kCFAllocatorDefault, &s->session); ++ if (ret != noErr) { ++ av_log(avctx, AV_LOG_ERROR, "Rotation session create failed, %d\n", ret); ++ return AVERROR_EXTERNAL; ++ } else { ++ av_log(avctx, AV_LOG_DEBUG, "VT Rotation session created\n"); ++ } ++ } ++ else ++#endif ++ { ++ s->ci_ctx = CFBridgingRetain([CIContext context]); ++ if (!s->ci_ctx) { ++ av_log(avctx, AV_LOG_ERROR, "CoreImage Context create failed\n"); ++ return AVERROR_EXTERNAL; ++ } else { ++ av_log(avctx, AV_LOG_DEBUG, "CoreImage Context created\n"); ++ } ++ } ++ ++ return 0; ++} ++ ++static av_cold void transpose_vt_uninit(AVFilterContext *avctx) ++{ ++ TransposeVtContext *s = avctx->priv; ++ if (s->ci_ctx) { ++ CFRelease(s->ci_ctx); ++ s->ci_ctx = NULL; ++ } ++#ifndef LEGACY_VT_SDK ++ if (@available(macOS 13.0, iOS 16, *)) { ++ if (s->session) { ++ VTPixelRotationSessionInvalidate(s->session); ++ CFRelease(s->session); ++ s->session = NULL; ++ } ++ } ++#endif ++} ++ ++static int transpose_vt_filter_frame(AVFilterLink *link, AVFrame *in) ++{ ++ int ret; ++ AVFilterContext *ctx = link->dst; ++ TransposeVtContext *s = ctx->priv; ++ AVFilterLink *outlink = ctx->outputs[0]; ++ CVPixelBufferRef src; ++ CVPixelBufferRef dst; ++ AVFrame *out; ++ ++ if (s->passthrough) ++ return ff_filter_frame(outlink, in); ++ ++ out = ff_get_video_buffer(outlink, outlink->w, outlink->h); ++ if (!out) { ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ ret = av_frame_copy_props(out, in); ++ if (ret < 0) ++ goto fail; ++ ++ src = (CVPixelBufferRef)in->data[3]; ++ dst = (CVPixelBufferRef)out->data[3]; ++#ifndef LEGACY_VT_SDK ++ if (@available(macOS 13.0, iOS 16, *)) { ++ ret = VTPixelRotationSessionRotateImage(s->session, src, dst); ++ if (ret != noErr) { ++ av_log(ctx, AV_LOG_ERROR, "transfer image failed, %d\n", ret); ++ ret = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ } ++ else ++#endif ++ { ++ @autoreleasepool { ++ CIImage *source_image = NULL; ++ CIImage *transposed_image = NULL; ++ source_image = [CIImage imageWithCVPixelBuffer:src]; ++ transposed_image = [source_image imageByApplyingCGOrientation:s->orientation]; ++ if (!transposed_image) { ++ ff_objc_release(&source_image); ++ av_log(ctx, AV_LOG_ERROR, "transpose image failed, %d\n", ret); ++ ret = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ CVBufferPropagateAttachments(src, dst); ++ [(__bridge CIContext *) s->ci_ctx render:transposed_image toCVPixelBuffer:dst]; ++ } ++ } ++ ++ av_frame_free(&in); ++ ++ return ff_filter_frame(outlink, out); ++ ++ fail: ++ av_frame_free(&in); ++ av_frame_free(&out); ++ return ret; ++} ++ ++static int transpose_vt_recreate_hw_ctx(AVFilterLink *outlink) ++{ ++ AVFilterContext *avctx = outlink->src; ++ AVFilterLink *inlink = outlink->src->inputs[0]; ++ AVHWFramesContext *hw_frame_ctx_in; ++ AVHWFramesContext *hw_frame_ctx_out; ++ int err; ++ ++ av_buffer_unref(&outlink->hw_frames_ctx); ++ ++ hw_frame_ctx_in = (AVHWFramesContext *)inlink->hw_frames_ctx->data; ++ outlink->hw_frames_ctx = av_hwframe_ctx_alloc(hw_frame_ctx_in->device_ref); ++ hw_frame_ctx_out = (AVHWFramesContext *)outlink->hw_frames_ctx->data; ++ hw_frame_ctx_out->format = AV_PIX_FMT_VIDEOTOOLBOX; ++ hw_frame_ctx_out->sw_format = hw_frame_ctx_in->sw_format; ++ hw_frame_ctx_out->width = outlink->w; ++ hw_frame_ctx_out->height = outlink->h; ++ ((AVVTFramesContext *)hw_frame_ctx_out->hwctx)->color_range = ((AVVTFramesContext *)hw_frame_ctx_in->hwctx)->color_range; ++ ++ err = ff_filter_init_hw_frames(avctx, outlink, 1); ++ if (err < 0) ++ return err; ++ ++ err = av_hwframe_ctx_init(outlink->hw_frames_ctx); ++ if (err < 0) { ++ av_log(avctx, AV_LOG_ERROR, ++ "Failed to init videotoolbox frame context, %s\n", ++ av_err2str(err)); ++ return err; ++ } ++ ++ return 0; ++} ++ ++static int transpose_vt_config_output(AVFilterLink *outlink) ++{ ++ int err; ++ AVFilterContext *avctx = outlink->src; ++ TransposeVtContext *s = avctx->priv; ++ AVFilterLink *inlink = outlink->src->inputs[0]; ++ int swap_w_h = 0; ++ ++ av_buffer_unref(&outlink->hw_frames_ctx); ++ outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx); ++ ++ if ((inlink->w >= inlink->h && s->passthrough == TRANSPOSE_PT_TYPE_LANDSCAPE) || ++ (inlink->w <= inlink->h && s->passthrough == TRANSPOSE_PT_TYPE_PORTRAIT)) { ++ av_log(avctx, AV_LOG_VERBOSE, ++ "w:%d h:%d -> w:%d h:%d (passthrough mode)\n", ++ inlink->w, inlink->h, inlink->w, inlink->h); ++ s->orientation = kCGImagePropertyOrientationUp; ++ return 0; ++ } ++ ++ s->passthrough = TRANSPOSE_PT_TYPE_NONE; ++ ++#ifndef LEGACY_VT_SDK ++ if (@available(macOS 13.0, iOS 16, *)) { ++ CFStringRef rotation = kVTRotation_0; ++ CFBooleanRef vflip = kCFBooleanFalse; ++ CFBooleanRef hflip = kCFBooleanFalse; ++ ++ switch (s->dir) { ++ case TRANSPOSE_CCLOCK_FLIP: ++ rotation = kVTRotation_CCW90; ++ vflip = kCFBooleanTrue; ++ swap_w_h = 1; ++ break; ++ case TRANSPOSE_CCLOCK: ++ rotation = kVTRotation_CCW90; ++ swap_w_h = 1; ++ break; ++ case TRANSPOSE_CLOCK: ++ rotation = kVTRotation_CW90; ++ swap_w_h = 1; ++ break; ++ case TRANSPOSE_CLOCK_FLIP: ++ rotation = kVTRotation_CW90; ++ vflip = kCFBooleanTrue; ++ swap_w_h = 1; ++ break; ++ case TRANSPOSE_REVERSAL: ++ rotation = kVTRotation_180; ++ break; ++ case TRANSPOSE_HFLIP: ++ hflip = kCFBooleanTrue; ++ break; ++ case TRANSPOSE_VFLIP: ++ vflip = kCFBooleanTrue; ++ break; ++ default: ++ av_log(avctx, AV_LOG_ERROR, "Failed to set direction to %d\n", s->dir); ++ return AVERROR(EINVAL); ++ } ++ ++ err = VTSessionSetProperty(s->session, kVTPixelRotationPropertyKey_Rotation, ++ rotation); ++ if (err != noErr) { ++ av_log(avctx, AV_LOG_ERROR, "Set rotation property failed, %d\n", err); ++ return AVERROR_EXTERNAL; ++ } ++ err = VTSessionSetProperty(s->session, kVTPixelRotationPropertyKey_FlipVerticalOrientation, ++ vflip); ++ if (err != noErr) { ++ av_log(avctx, AV_LOG_ERROR, "Set vertical flip property failed, %d\n", err); ++ return AVERROR_EXTERNAL; ++ } ++ err = VTSessionSetProperty(s->session, kVTPixelRotationPropertyKey_FlipHorizontalOrientation, ++ hflip); ++ if (err != noErr) { ++ av_log(avctx, AV_LOG_ERROR, "Set horizontal flip property failed, %d\n", err); ++ return AVERROR_EXTERNAL; ++ } ++ } ++ else ++#endif ++ { ++ switch (s->dir) { ++ case TRANSPOSE_CCLOCK_FLIP: ++ s->orientation = kCGImagePropertyOrientationLeftMirrored; ++ swap_w_h = 1; ++ break; ++ case TRANSPOSE_CCLOCK: ++ s->orientation = kCGImagePropertyOrientationLeft; ++ swap_w_h = 1; ++ break; ++ case TRANSPOSE_CLOCK: ++ s->orientation = kCGImagePropertyOrientationRight; ++ swap_w_h = 1; ++ break; ++ case TRANSPOSE_CLOCK_FLIP: ++ s->orientation = kCGImagePropertyOrientationRightMirrored; ++ swap_w_h = 1; ++ break; ++ case TRANSPOSE_REVERSAL: ++ s->orientation = kCGImagePropertyOrientationDown; ++ break; ++ case TRANSPOSE_HFLIP: ++ s->orientation = kCGImagePropertyOrientationUpMirrored; ++ break; ++ case TRANSPOSE_VFLIP: ++ s->orientation = kCGImagePropertyOrientationDownMirrored; ++ break; ++ default: ++ av_log(avctx, AV_LOG_ERROR, "Failed to set direction to %d\n", s->dir); ++ return AVERROR(EINVAL); ++ } ++ } ++ ++ if (!swap_w_h) ++ return 0; ++ ++ outlink->w = inlink->h; ++ outlink->h = inlink->w; ++ return transpose_vt_recreate_hw_ctx(outlink); ++} ++ ++#define OFFSET(x) offsetof(TransposeVtContext, x) ++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) ++static const AVOption transpose_vt_options[] = { ++ { "dir", "set transpose direction", ++ OFFSET(dir), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 6, FLAGS, .unit = "dir" }, ++ { "cclock_flip", "rotate counter-clockwise with vertical flip", ++ 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, .flags=FLAGS, .unit = "dir" }, ++ { "clock", "rotate clockwise", ++ 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK }, .flags=FLAGS, .unit = "dir" }, ++ { "cclock", "rotate counter-clockwise", ++ 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK }, .flags=FLAGS, .unit = "dir" }, ++ { "clock_flip", "rotate clockwise with vertical flip", ++ 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP }, .flags=FLAGS, .unit = "dir" }, ++ { "reversal", "rotate by half-turn", ++ 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_REVERSAL }, .flags=FLAGS, .unit = "dir" }, ++ { "hflip", "flip horizontally", ++ 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_HFLIP }, .flags=FLAGS, .unit = "dir" }, ++ { "vflip", "flip vertically", ++ 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_VFLIP }, .flags=FLAGS, .unit = "dir" }, ++ ++ { "passthrough", "do not apply transposition if the input matches the specified geometry", ++ OFFSET(passthrough), AV_OPT_TYPE_INT, { .i64=TRANSPOSE_PT_TYPE_NONE }, 0, INT_MAX, FLAGS, .unit = "passthrough" }, ++ { "none", "always apply transposition", ++ 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_PT_TYPE_NONE }, INT_MIN, INT_MAX, FLAGS, .unit = "passthrough" }, ++ { "portrait", "preserve portrait geometry", ++ 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_PT_TYPE_PORTRAIT }, INT_MIN, INT_MAX, FLAGS, .unit = "passthrough" }, ++ { "landscape", "preserve landscape geometry", ++ 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_PT_TYPE_LANDSCAPE }, INT_MIN, INT_MAX, FLAGS, .unit = "passthrough" }, ++ ++ { NULL } ++}; ++ ++AVFILTER_DEFINE_CLASS(transpose_vt); ++ ++static const AVFilterPad transpose_vt_inputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .filter_frame = &transpose_vt_filter_frame, ++ }, ++}; ++ ++static const AVFilterPad transpose_vt_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = &transpose_vt_config_output, ++ }, ++}; ++ ++const AVFilter ff_vf_transpose_vt = { ++ .name = "transpose_vt", ++ .description = NULL_IF_CONFIG_SMALL("Transpose Videotoolbox frames"), ++ .priv_size = sizeof(TransposeVtContext), ++ .init = transpose_vt_init, ++ .uninit = transpose_vt_uninit, ++ FILTER_INPUTS(transpose_vt_inputs), ++ FILTER_OUTPUTS(transpose_vt_outputs), ++ FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VIDEOTOOLBOX), ++ .priv_class = &transpose_vt_class, ++ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, ++}; diff --git a/cross/ffmpeg7/patches/1052-jellyfin-0052-add-vf-tonemap-videotoolbox-filter.patch b/cross/ffmpeg7/patches/1052-jellyfin-0052-add-vf-tonemap-videotoolbox-filter.patch new file mode 100644 index 00000000000..5dd19deaf48 --- /dev/null +++ b/cross/ffmpeg7/patches/1052-jellyfin-0052-add-vf-tonemap-videotoolbox-filter.patch @@ -0,0 +1,2093 @@ +Index: FFmpeg/configure +=================================================================== +--- configure ++++ configure +@@ -3931,6 +3931,7 @@ tinterlace_merge_test_deps="tinterlace_f + tinterlace_pad_test_deps="tinterlace_filter" + tonemap_filter_deps="const_nan" + tonemap_vaapi_filter_deps="vaapi VAProcFilterParameterBufferHDRToneMapping" ++tonemap_videotoolbox_filter_deps="metal corevideo videotoolbox const_nan" + tonemap_opencl_filter_deps="opencl const_nan" + transpose_opencl_filter_deps="opencl" + transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags" +Index: FFmpeg/libavfilter/Makefile +=================================================================== +--- libavfilter/Makefile ++++ libavfilter/Makefile +@@ -535,6 +535,9 @@ OBJS-$(CONFIG_TONEMAP_CUDA_FILTER) + OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o opencl.o \ + opencl/tonemap.o opencl/colorspace_common.o + OBJS-$(CONFIG_TONEMAP_VAAPI_FILTER) += vf_tonemap_vaapi.o vaapi_vpp.o ++OBJS-$(CONFIG_TONEMAP_VIDEOTOOLBOX_FILTER) += vf_tonemap_videotoolbox.o \ ++ metal/vf_tonemap_videotoolbox.metallib.o \ ++ metal/utils.o + OBJS-$(CONFIG_TPAD_FILTER) += vf_tpad.o + OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o + OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER) += vf_transpose_npp.o +Index: FFmpeg/libavfilter/allfilters.c +=================================================================== +--- libavfilter/allfilters.c ++++ libavfilter/allfilters.c +@@ -501,6 +501,7 @@ extern const AVFilter ff_vf_tonemap; + extern const AVFilter ff_vf_tonemap_cuda; + extern const AVFilter ff_vf_tonemap_opencl; + extern const AVFilter ff_vf_tonemap_vaapi; ++extern const AVFilter ff_vf_tonemap_videotoolbox; + extern const AVFilter ff_vf_tpad; + extern const AVFilter ff_vf_transpose; + extern const AVFilter ff_vf_transpose_npp; +Index: FFmpeg/libavfilter/metal/vf_tonemap_videotoolbox.metal +=================================================================== +--- /dev/null ++++ libavfilter/metal/vf_tonemap_videotoolbox.metal +@@ -0,0 +1,891 @@ ++/* ++ * Copyright (c) 2024 Gnattu OC ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include ++#include ++#include ++ ++using namespace metal; ++ ++//------------ ++// Metal Tonemapping ++ ++#define ST2084_MAX_LUMINANCE 10000.0f ++#define ST2084_M1 0.1593017578125f ++#define ST2084_M2 78.84375f ++#define ST2084_C1 0.8359375f ++#define ST2084_C2 18.8515625f ++#define ST2084_C3 18.6875f ++ ++#define ARIB_B67_A 0.17883277f ++#define ARIB_B67_B 0.28466892f ++#define ARIB_B67_C 0.55991073f ++ ++#define FLOAT_EPS 1e-6f ++ ++constant float ref_white [[function_constant(0)]]; ++constant float tone_param [[function_constant(1)]]; ++constant float desat_param [[function_constant(2)]]; ++constant float target_peak [[function_constant(3)]]; ++constant float scene_threshold [[function_constant(4)]]; ++constant float pq_max_lum_div_ref_white [[function_constant(5)]]; ++constant float ref_white_div_pq_max_lum [[function_constant(6)]]; ++constant short tonemap_func_type [[function_constant(7)]]; ++constant bool is_tone_func_bt2390 [[function_constant(8)]]; ++constant bool is_tone_mode_rgb [[function_constant(9)]]; ++constant bool is_tone_mode_max [[function_constant(10)]]; ++constant bool is_non_semi_planar_in [[function_constant(11)]]; ++constant bool is_non_semi_planar_out [[function_constant(12)]]; ++constant bool enable_dither [[function_constant(13)]]; ++constant float dither_size2 [[function_constant(14)]]; ++constant float dither_quantization [[function_constant(15)]]; ++constant bool is_full_range_in [[function_constant(16)]]; ++constant bool is_full_range_out [[function_constant(17)]]; ++constant int chroma_loc [[function_constant(18)]]; ++constant bool is_rgb2rgb_passthrough [[function_constant(19)]]; ++constant float3 rgb2rgb_matrix_1 [[function_constant(20)]]; ++constant float3 rgb2rgb_matrix_2 [[function_constant(21)]]; ++constant float3 rgb2rgb_matrix_3 [[function_constant(22)]]; ++constant bool skip_tonemap [[function_constant(23)]]; ++constant bool dovi_reshape [[function_constant(24)]]; ++constant float3 ycc2rgb_offset [[function_constant(25)]]; ++constant float3 rgb_matrix_1 [[function_constant(26)]]; ++constant float3 rgb_matrix_2 [[function_constant(27)]]; ++constant float3 rgb_matrix_3 [[function_constant(28)]]; ++constant float3 lms2rgb_matrix_1 [[function_constant(29)]]; ++constant float3 lms2rgb_matrix_2 [[function_constant(30)]]; ++constant float3 lms2rgb_matrix_3 [[function_constant(31)]]; ++constant float3 yuv_matrix_1 [[function_constant(32)]]; ++constant float3 yuv_matrix_2 [[function_constant(33)]]; ++constant float3 yuv_matrix_3 [[function_constant(34)]]; ++constant float3 luma_dst [[function_constant(35)]]; ++constant short linearize_type [[function_constant(36)]]; ++constant short delinearize_type [[function_constant(37)]]; ++constant bool map_in_src_space [[function_constant(38)]]; ++constant bool is_tone_mode_itp [[function_constant(39)]]; ++ ++enum AVChromaLocation { ++ AVCHROMA_LOC_UNSPECIFIED, ++ AVCHROMA_LOC_LEFT, ++ AVCHROMA_LOC_CENTER, ++ AVCHROMA_LOC_TOPLEFT, ++ AVCHROMA_LOC_TOP, ++ AVCHROMA_LOC_BOTTOMLEFT, ++ AVCHROMA_LOC_BOTTOM, ++ AVCHROMA_LOC_NB ++}; ++ ++float3 get_chroma_sample(float3 a, float3 b, float3 c,float3 d) { ++ if (chroma_loc == AVCHROMA_LOC_LEFT) return (((a) + (c)) * 0.5f); ++ if (chroma_loc == AVCHROMA_LOC_TOPLEFT) return a; ++ if (chroma_loc == AVCHROMA_LOC_TOP) return (((a) + (b)) * 0.5f); ++ if (chroma_loc == AVCHROMA_LOC_BOTTOMLEFT) return c; ++ if (chroma_loc == AVCHROMA_LOC_BOTTOM) return (((c) + (d)) * 0.5f); ++ return (((a) + (b) + (c) + (d)) * 0.25f); ++} ++ ++float get_luma_dst(float3 c) { ++ return luma_dst.x * c.x + luma_dst.y * c.y + luma_dst.z * c.z; ++} ++ ++float4 get_luma_dst4(float4 r4, float4 g4, float4 b4) { ++ return luma_dst.x * r4 + luma_dst.y * g4 + luma_dst.z * b4; ++} ++ ++//------------ ++// linearizers / delinearizers ++ ++// linearizer for PQ/ST2084 ++float eotf_st2084_common(float x) { ++ x = fmax(x, 0.0f); ++ float xpow = powr(x, 1.0f / ST2084_M2); ++ float num = fmax(xpow - ST2084_C1, 0.0f); ++ float den = fmax(ST2084_C2 - ST2084_C3 * xpow, FLOAT_EPS); ++ x = powr(num / den, 1.0f / ST2084_M1); ++ return x; ++} ++ ++float eotf_st2084(float x) { ++ return eotf_st2084_common(x) * pq_max_lum_div_ref_white; ++} ++ ++float4 eotf_st2084x4(float4 x) { ++ x.x = eotf_st2084_common(x.x); ++ x.y = eotf_st2084_common(x.y); ++ x.z = eotf_st2084_common(x.z); ++ x.w = eotf_st2084_common(x.w); ++ return x * pq_max_lum_div_ref_white; ++} ++ ++// delinearizer for PQ/ST2084 ++float inverse_eotf_st2084_common(float x) { ++ x = fmax(x, 0.0f); ++ float xpow = powr(x, ST2084_M1); ++ float num = (ST2084_C1 - 1.0f) + (ST2084_C2 - ST2084_C3) * xpow; ++ float den = 1.0f + ST2084_C3 * xpow; ++ return powr(1.0f + num / den, ST2084_M2); ++} ++ ++float inverse_eotf_st2084(float x) { ++ x *= ref_white_div_pq_max_lum; ++ return inverse_eotf_st2084_common(x); ++} ++ ++float4 inverse_eotf_st2084x4(float4 x) { ++ x *= ref_white_div_pq_max_lum; ++ x.x = inverse_eotf_st2084_common(x.x); ++ x.y = inverse_eotf_st2084_common(x.y); ++ x.z = inverse_eotf_st2084_common(x.z); ++ x.w = inverse_eotf_st2084_common(x.w); ++ return x; ++} ++ ++float ootf_1_2(float x) { ++ return x > 0.0f ? powr(x, 1.2f) : x; ++} ++ ++float inverse_ootf_1_2(float x) { ++ return x > 0.0f ? powr(x, 1.0f / 1.2f) : x; ++} ++ ++float oetf_arib_b67(float x) { ++ x = fmax(x, 0.0f); ++ return x <= (1.0f / 12.0f) ++ ? sqrt(3.0f * x) ++ : (ARIB_B67_A * log(12.0f * x - ARIB_B67_B) + ARIB_B67_C); ++} ++ ++float inverse_oetf_arib_b67(float x) { ++ x = fmax(x, 0.0f); ++ return x <= 0.5f ++ ? (x * x) * (1.0f / 3.0f) ++ : (exp((x - ARIB_B67_C) / ARIB_B67_A) + ARIB_B67_B) * (1.0f / 12.0f); ++} ++ ++// linearizer for HLG/ARIB-B67 ++float eotf_arib_b67(float x) { ++ return ootf_1_2(inverse_oetf_arib_b67(x)) * 5.0f; ++} ++ ++// delinearizer for HLG/ARIB-B67 ++float inverse_eotf_arib_b67(float x) { ++ return oetf_arib_b67(inverse_ootf_1_2(x / 5.0f)); ++} ++ ++float4 oetf_arib_b67x4(float4 x) { ++ x.x = oetf_arib_b67(x.x); ++ x.y = oetf_arib_b67(x.y); ++ x.z = oetf_arib_b67(x.z); ++ x.w = oetf_arib_b67(x.w); ++ return x; ++} ++ ++float4 inverse_oetf_arib_b67x4(float4 x) { ++ x.x = inverse_oetf_arib_b67(x.x); ++ x.y = inverse_oetf_arib_b67(x.y); ++ x.z = inverse_oetf_arib_b67(x.z); ++ x.w = inverse_oetf_arib_b67(x.w); ++ return x; ++} ++ ++// delinearizer for BT709, BT2020-10 ++float inverse_eotf_bt1886(float x) { ++ return x > 0.0f ? powr(x, 1.0f / 2.4f) : 0.0f; ++} ++ ++float linearize(float x) { ++ if (linearize_type == 1) { ++ return eotf_st2084(x); ++ } ++ if (linearize_type == 2) { ++ return eotf_arib_b67(x); ++ } ++ return eotf_st2084(x); ++} ++ ++float delinearize(float x) { ++ return inverse_eotf_bt1886(x); ++} ++ ++// ------------ ++// Color conversion ++float3 yuv2rgb(float y, float u, float v) { ++ if (is_full_range_in) { ++ u -= 0.5f; ++ v -= 0.5f; ++ } else { ++ y = (y * 255.0f - 16.0f) / 219.0f; ++ u = (u * 255.0f - 128.0f) / 224.0f; ++ v = (v * 255.0f - 128.0f) / 224.0f; ++ } ++ float r = (y * rgb_matrix_1[0]) + (u * rgb_matrix_1[1]) + (v * rgb_matrix_1[2]); ++ float g = (y * rgb_matrix_2[0]) + (u * rgb_matrix_2[1]) + (v * rgb_matrix_2[2]); ++ float b = (y * rgb_matrix_3[0]) + (u * rgb_matrix_3[1]) + (v * rgb_matrix_3[2]); ++ return float3(r, g, b); ++} ++ ++float3 yuv2lrgb(float3 yuv) { ++ float3 rgb = yuv2rgb(yuv.x, yuv.y, yuv.z); ++ if (skip_tonemap) { ++ return rgb; ++ } ++ float r = linearize(rgb.x); ++ float g = linearize(rgb.y); ++ float b = linearize(rgb.z); ++ return float3(r, g, b); ++} ++ ++float3 rgb2yuv(float r, float g, float b) { ++ float y = (r*yuv_matrix_1[0]) + (g*yuv_matrix_1[1]) + (b*yuv_matrix_1[2]); ++ float u = (r*yuv_matrix_2[0]) + (g*yuv_matrix_2[1]) + (b*yuv_matrix_2[2]); ++ float v = (r*yuv_matrix_3[0]) + (g*yuv_matrix_3[1]) + (b*yuv_matrix_3[2]); ++ if (is_full_range_out) { ++ u += 0.5f; ++ v += 0.5f; ++ } else { ++ y = (219.0f * y + 16.0f) / 255.0f; ++ u = (224.0f * u + 128.0f) / 255.0f; ++ v = (224.0f * v + 128.0f) / 255.0f; ++ } ++ return float3(y, u, v); ++} ++ ++float rgb2y(float r, float g, float b) { ++ float y = (r*yuv_matrix_1[0]) + (g*yuv_matrix_1[1]) + (b*yuv_matrix_1[2]); ++ if (!is_full_range_out) { ++ y = (219.0f * y + 16.0f) / 255.0f; ++ } ++ return y; ++} ++ ++float3 lrgb2yuv(float3 c) { ++ if (skip_tonemap) { ++ return rgb2yuv(c.x, c.y, c.z); ++ } ++ float r = delinearize(c.x); ++ float g = delinearize(c.y); ++ float b = delinearize(c.z); ++ return rgb2yuv(r, g, b); ++} ++ ++float lrgb2y(float3 c) { ++ if (skip_tonemap) { ++ return rgb2y(c.x, c.y, c.z); ++ } ++ float r = delinearize(c.x); ++ float g = delinearize(c.y); ++ float b = delinearize(c.z); ++ return rgb2y(r, g, b); ++} ++ ++float3 lrgb2lrgb(float3 c) { ++ if (is_rgb2rgb_passthrough) { ++ return c; ++ } ++ float r = c.x, g = c.y, b = c.z; ++ float rr = (rgb2rgb_matrix_1[0] * r) + (rgb2rgb_matrix_1[1] * g) + (rgb2rgb_matrix_1[2] * b); ++ float gg = (rgb2rgb_matrix_2[0] * r) + (rgb2rgb_matrix_2[1] * g) + (rgb2rgb_matrix_2[2] * b); ++ float bb = (rgb2rgb_matrix_3[0] * r) + (rgb2rgb_matrix_3[1] * g) + (rgb2rgb_matrix_3[2] * b); ++ return float3(rr, gg, bb); ++} ++ ++float3 rgb2lrgb(float3 c) { ++ if (skip_tonemap) { ++ return lrgb2lrgb(float3(c.x, c.y, c.z)); ++ } ++ float r = linearize(c.x); ++ float g = linearize(c.y); ++ float b = linearize(c.z); ++ return float3(r, g, b); ++} ++ ++float3 ycc2rgb(float y, float cb, float cr) { ++ float r = y * rgb_matrix_1[0] + cb * rgb_matrix_1[1] + cr * rgb_matrix_1[2]; ++ float g = y * rgb_matrix_2[0] + cb * rgb_matrix_2[1] + cr * rgb_matrix_2[2]; ++ float b = y * rgb_matrix_3[0] + cb * rgb_matrix_3[1] + cr * rgb_matrix_3[2]; ++ return float3(r, g, b) + ycc2rgb_offset; ++} ++ ++float3 lms2rgb(float r, float g, float b) { ++ r = eotf_st2084_common(r); ++ g = eotf_st2084_common(g); ++ b = eotf_st2084_common(b); ++ float rr = r * lms2rgb_matrix_1[0] + g * lms2rgb_matrix_1[1] + b * lms2rgb_matrix_1[2]; ++ float gg = r * lms2rgb_matrix_2[0] + g * lms2rgb_matrix_2[1] + b * lms2rgb_matrix_2[2]; ++ float bb = r * lms2rgb_matrix_3[0] + g * lms2rgb_matrix_3[1] + b * lms2rgb_matrix_3[2]; ++ rr = inverse_eotf_st2084_common(rr); ++ gg = inverse_eotf_st2084_common(gg); ++ bb = inverse_eotf_st2084_common(bb); ++ return float3(rr, gg, bb); ++} ++ ++// The following assumes bt2020 ++void lrgb2ictcp(float4 r4, float4 g4, float4 b4, thread float4* i4, thread float4* ct4, thread float4* cp4) { ++ float4 l4 = 0.412109375000000f * r4 + 0.523925781250000f * g4 + 0.063964843750000f * b4; ++ float4 m4 = 0.166748046875000f * r4 + 0.720458984375000f * g4 + 0.112792968750000f * b4; ++ float4 s4 = 0.024169921875000f * r4 + 0.075439453125000f * g4 + 0.900390625000000f * b4; ++ l4 = inverse_eotf_st2084x4(l4); ++ m4 = inverse_eotf_st2084x4(m4); ++ s4 = inverse_eotf_st2084x4(s4); ++ *i4 = 0.5f * l4 + 0.5f * m4; ++ *ct4 = 1.613769531250000f * l4 - 3.323486328125000f * m4 + 1.709716796875000f * s4; ++ *cp4 = 4.378173828125000f * l4 - 4.245605468750000f * m4 - 0.132568359375000f * s4; ++} ++ ++void ictcp2lrgb(float4 i4, float4 ct4, float4 cp4, thread float4* r4, thread float4* g4, thread float4* b4) { ++ float4 ll4 = i4 + 0.008609037037933f * ct4 + 0.111029625003026f * cp4; ++ float4 mm4 = i4 - 0.008609037037933f * ct4 - 0.111029625003026f * cp4; ++ float4 ss4 = i4 + 0.560031335710679f * ct4 - 0.320627174987319f * cp4; ++ ll4 = eotf_st2084x4(ll4); ++ mm4 = eotf_st2084x4(mm4); ++ ss4 = eotf_st2084x4(ss4); ++ *r4 = 3.436606694333079f * ll4 - 2.506452118656270f * mm4 + 0.069845424323191f * ss4; ++ *g4 = -0.791329555598929f * ll4 + 1.983600451792291f * mm4 - 0.192270896193362f * ss4; ++ *b4 = -0.025949899690593f * ll4 - 0.098913714711726f * mm4 + 1.124863614402319f * ss4; ++} ++ ++float parabolic(float x, float t0, float x0, float y0) { ++ float s = (y0 - t0) / sqrt(x0 - y0); ++ float ox = t0 - s * s * 0.25f; ++ float oy = t0 - s * sqrt(s * s * 0.25f); ++ return (x < t0 ? x : s * sqrt(x - ox) + oy); ++} ++ ++float3 gamut_compress(float3 rgb) { ++ #define cyan_limit 1.5187050250638159f ++ #define magenta_limit 1.0750082769546088f ++ #define yellow_limit 1.0887800403483898f ++ #define cyan_threshold 1.050508660266247f ++ #define magenta_threshold 0.940509816042432f ++ #define yellow_threshold 0.9771607996420639f ++ ++ // Achromatic axis ++ float ac = max3(rgb.r, rgb.g, rgb.b); ++ ++ // Inverse RGB Ratios: distance from achromatic axis ++ float3 d = ac == 0.0f ? float3(0.0f) : (ac - rgb) / abs(ac); ++ ++ // Compressed distance ++ float3 cd = float3( ++ parabolic(d.x, cyan_threshold, cyan_limit, 1.0f), ++ parabolic(d.y, magenta_threshold, magenta_limit, 1.0f), ++ parabolic(d.z, yellow_threshold, yellow_limit, 1.0f) ++ ); ++ ++ // Inverse RGB Ratios to RGB ++ float3 crgb = ac - cd * abs(ac); ++ ++ return crgb; ++} ++ ++ ++//------------ ++// Tonemapping methods ++enum TonemapAlgorithm { ++ TONEMAP_NONE, ++ TONEMAP_LINEAR, ++ TONEMAP_GAMMA, ++ TONEMAP_CLIP, ++ TONEMAP_REINHARD, ++ TONEMAP_HABLE, ++ TONEMAP_MOBIUS, ++ TONEMAP_BT2390, ++ TONEMAP_COUNT, ++}; ++ ++float hable_f(float in) { ++ float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f; ++ return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f; ++} ++ ++float direct(float s, float peak, float target_peak) { ++ return s; ++} ++ ++float linear(float s, float peak, float target_peak) { ++ return s * tone_param / peak; ++} ++ ++float gamma(float s, float peak, float target_peak) { ++ float p = s > 0.05f ? s / peak : 0.05f / peak; ++ float v = powr(p, 1.0f / tone_param); ++ return s > 0.05f ? v : (s * v / 0.05f); ++} ++ ++float clip(float s, float peak, float target_peak) { ++ return clamp(s * tone_param, 0.0f, 1.0f); ++} ++ ++float reinhard(float s, float peak, float target_peak) { ++ return s / (s + tone_param) * (peak + tone_param) / peak; ++} ++ ++float hable(float s, float peak, float target_peak) { ++ return hable_f(s) / hable_f(peak); ++} ++ ++float mobius(float s, float peak, float target_peak) { ++ float j = tone_param; ++ float a, b; ++ ++ if (s <= j) ++ return s; ++ ++ a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak); ++ b = (j * j - 2.0f * j * peak + peak) / fmax(peak - 1.0f, FLOAT_EPS); ++ ++ return (b * b + 2.0f * b * j + j * j) / (b - a) * (s + a) / (s + b); ++} ++ ++float bt2390(float s, float peak_inv_pq, float target_peak_inv_pq) { ++ float peak_pq = peak_inv_pq; ++ float scale = peak_pq > 0.0f ? (1.0f / peak_pq) : 1.0f; ++ ++ float s_pq = s * scale; ++ float max_lum = target_peak_inv_pq * scale; ++ ++ float ks = 1.5f * max_lum - 0.5f; ++ float tb = (s_pq - ks) / (1.0f - ks); ++ float tb2 = tb * tb; ++ float tb3 = tb2 * tb; ++ float pb = (2.0f * tb3 - 3.0f * tb2 + 1.0f) * ks + ++ (tb3 - 2.0f * tb2 + tb) * (1.0f - ks) + ++ (-2.0f * tb3 + 3.0f * tb2) * max_lum; ++ float sig = mix(pb, s_pq, s_pq < ks); ++ ++ return sig * peak_pq; ++} ++ ++float tonemap(float s, float peak, float target_peak) { ++ if (tonemap_func_type == TONEMAP_NONE) { ++ return direct(s, peak, target_peak); ++ } ++ if (tonemap_func_type == TONEMAP_LINEAR) { ++ return linear(s, peak, target_peak); ++ } ++ if (tonemap_func_type == TONEMAP_GAMMA) { ++ return gamma(s, peak, target_peak); ++ } ++ if (tonemap_func_type == TONEMAP_CLIP) { ++ return clip(s, peak, target_peak); ++ } ++ if (tonemap_func_type == TONEMAP_REINHARD) { ++ return reinhard(s, peak, target_peak); ++ } ++ if (tonemap_func_type == TONEMAP_HABLE) { ++ return hable(s, peak, target_peak); ++ } ++ if (tonemap_func_type == TONEMAP_MOBIUS) { ++ return mobius(s, peak, target_peak); ++ } ++ if (tonemap_func_type == TONEMAP_BT2390) { ++ return bt2390(s, peak, target_peak); ++ } ++ return direct(s, peak, target_peak); ++} ++ ++float get_dithered_y(float y, float d) { ++ return floor(y * dither_quantization + d + 0.5f / dither_size2) * 1.0f / dither_quantization; ++} ++ ++void map_four_pixels(thread float4 *r4, thread float4 *g4, thread float4 *b4, float peak) { ++#define MAP_FOUR_PIXELS(sig, peak, target_peak) \ ++{ \ ++ sig.x = tonemap(sig.x, peak, target_peak); \ ++ sig.y = tonemap(sig.y, peak, target_peak); \ ++ sig.z = tonemap(sig.z, peak, target_peak); \ ++ sig.w = tonemap(sig.w, peak, target_peak); \ ++} ++ if (is_tone_mode_rgb) { ++ float4 sig_r = fmax(*r4, FLOAT_EPS); ++ float4 sig_g = fmax(*g4, FLOAT_EPS); ++ float4 sig_b = fmax(*b4, FLOAT_EPS); ++ float4 sig_ro = sig_r; ++ float4 sig_go = sig_g; ++ float4 sig_bo = sig_b; ++ if (is_tone_func_bt2390) { ++ sig_r = inverse_eotf_st2084x4(fmin(sig_r, peak)); ++ sig_g = inverse_eotf_st2084x4(fmin(sig_g, peak)); ++ sig_b = inverse_eotf_st2084x4(fmin(sig_b, peak)); ++ } ++ // Desaturate the color using a coefficient dependent on the signal level ++ if (desat_param > 0.0f) { ++ float4 sig = fmax(fmax(*r4, fmax(*g4, *b4)), FLOAT_EPS); ++ float4 luma = get_luma_dst4(*r4, *g4, *b4); ++ float4 coeff = fmax(sig - 0.18f, FLOAT_EPS) / fmax(sig, FLOAT_EPS); ++ coeff = powr(coeff, 10.0f / desat_param); ++ *r4 = mix(*r4, luma, coeff); ++ *g4 = mix(*g4, luma, coeff); ++ *b4 = mix(*b4, luma, coeff); ++ } ++ if (is_tone_func_bt2390) { ++ float src_peak_delin_pq = inverse_eotf_st2084(peak); ++ float dst_peak_delin_pq = inverse_eotf_st2084(1.0f); ++ MAP_FOUR_PIXELS(sig_r, src_peak_delin_pq, dst_peak_delin_pq) ++ MAP_FOUR_PIXELS(sig_g, src_peak_delin_pq, dst_peak_delin_pq) ++ MAP_FOUR_PIXELS(sig_b, src_peak_delin_pq, dst_peak_delin_pq) ++ sig_r = fmin(eotf_st2084x4(sig_r), peak); ++ sig_g = fmin(eotf_st2084x4(sig_g), peak); ++ sig_b = fmin(eotf_st2084x4(sig_b), peak); ++ } else { ++ MAP_FOUR_PIXELS(sig_r, peak, 1.0f) ++ MAP_FOUR_PIXELS(sig_g, peak, 1.0f) ++ MAP_FOUR_PIXELS(sig_b, peak, 1.0f) ++ sig_r = fmin(sig_r, 1.0f); ++ sig_g = fmin(sig_g, 1.0f); ++ sig_b = fmin(sig_b, 1.0f); ++ } ++ float4 factor_r = sig_r / sig_ro; ++ float4 factor_g = sig_g / sig_go; ++ float4 factor_b = sig_b / sig_bo; ++ *r4 *= factor_r; ++ *g4 *= factor_g; ++ *b4 *= factor_b; ++ } else if (is_tone_mode_itp) { ++ float4 i4_o, i4, ct4 , cp4; ++ lrgb2ictcp(*r4, *g4, *b4, &i4, &ct4, &cp4); ++ i4 = fmax(i4, FLOAT_EPS); ++ i4_o = i4; ++ if (desat_param > 0.0f) { ++ float4 coeff = exp(-pow(eotf_st2084x4(i4) - (target_peak - desat_param) * 0.5f, 2) / (2.0f * peak)); ++ ct4 *= coeff; ++ cp4 *= coeff; ++ } ++ if (is_tone_func_bt2390) { ++ float src_peak_delin_pq = inverse_eotf_st2084(peak); ++ float dst_peak_delin_pq = inverse_eotf_st2084(1.0f); ++ MAP_FOUR_PIXELS(i4, src_peak_delin_pq, dst_peak_delin_pq) ++ } else { ++ i4 = eotf_st2084x4(i4); ++ MAP_FOUR_PIXELS(i4, peak, 1.0f) ++ i4 = inverse_eotf_st2084x4(i4); ++ } ++ i4 = fmin(i4, 1.0f); ++ float4 factor = min(i4/i4_o, i4_o/i4); ++ ct4 *= factor; ++ cp4 *= factor; ++ ictcp2lrgb(i4, ct4, cp4, r4, g4, b4); ++ } else { ++ float4 sig; ++ if (is_tone_mode_max) { ++ sig = fmax(fmax3(*r4, *g4, *b4), FLOAT_EPS); ++ } else { ++ sig = fmax((*r4 * 0.2627f + *g4 * 0.678f + *b4 * 0.0593f), FLOAT_EPS); ++ } ++ if (is_tone_func_bt2390) { ++ sig = fmin(sig, peak); ++ } ++ float4 sig_o = sig; ++ if (desat_param > 0.0f) { ++ float4 luma; ++ if (is_tone_mode_max) { ++ luma = get_luma_dst4(*r4, *g4, *b4); ++ } else { ++ luma = sig; ++ } ++ float4 coeff = fmax(sig - 0.18f, FLOAT_EPS) / fmax(sig, FLOAT_EPS); ++ coeff = powr(coeff, 10.0f / desat_param); ++ *r4 = mix(*r4, luma, coeff); ++ *g4 = mix(*g4, luma, coeff); ++ *b4 = mix(*b4, luma, coeff); ++ } ++ if (is_tone_func_bt2390) { ++ float src_peak_delin_pq = inverse_eotf_st2084(peak); ++ float dst_peak_delin_pq = inverse_eotf_st2084(1.0f); ++ sig = inverse_eotf_st2084x4(sig); ++ MAP_FOUR_PIXELS(sig, src_peak_delin_pq, dst_peak_delin_pq) ++ sig = fmin(eotf_st2084x4(sig), peak); ++ } else { ++ MAP_FOUR_PIXELS(sig, peak, 1.0f) ++ sig = fmin(sig, 1.0f); ++ } ++ float4 factor = sig / sig_o; ++ *r4 *= factor; ++ *g4 *= factor; ++ *b4 *= factor; ++ } ++} ++ ++// Map from source space YUV to source space RGB ++float3 map_to_src_space_from_yuv(float3 yuv) { ++ if (dovi_reshape) { ++ float3 c = ycc2rgb(yuv.x, yuv.y, yuv.z); ++ c = lms2rgb(c.x, c.y, c.z); ++ c = rgb2lrgb(c); ++ return c; ++ } else { ++ float3 c = yuv2lrgb(yuv); ++ return c; ++ } ++} ++ ++// Map from source space YUV to destination space RGB ++float3 map_to_dst_space_from_yuv(float3 yuv) { ++ if (dovi_reshape) { ++ float3 c = ycc2rgb(yuv.x, yuv.y, yuv.z); ++ c = lms2rgb(c.x, c.y, c.z); ++ c = rgb2lrgb(c); ++ return lrgb2lrgb(c); ++ } else { ++ float3 c = yuv2lrgb(yuv); ++ c = lrgb2lrgb(c); ++ return c; ++ } ++} ++ ++//------------ ++// DOVI helpers ++ ++float reshape_poly(float s, float4 coeffs) { ++ return (coeffs.z * s + coeffs.y) * s + coeffs.x; ++} ++ ++float reshape_mmr(float3 sig, ++ float4 coeffs, ++ constant float4 *dovi_mmr, ++ int dovi_mmr_single, ++ int dovi_min_order, ++ int dovi_max_order) ++{ ++ int mmr_idx = dovi_mmr_single ? 0 : (int)coeffs.y; ++ int order = (int)coeffs.w; ++ float4 sigX; ++ ++ float s = coeffs.x; ++ sigX.xyz = sig.xxy * sig.yzz; ++ sigX.w = sigX.x * sig.z; ++ s += dot(dovi_mmr[mmr_idx + 0].xyz, sig); ++ s += dot(dovi_mmr[mmr_idx + 1], sigX); ++ ++ int t = dovi_max_order >= 2 && (dovi_min_order >= 2 || order >= 2); ++ if (t) { ++ float3 sig2 = sig * sig; ++ float4 sigX2 = sigX * sigX; ++ s += dot(dovi_mmr[mmr_idx + 2].xyz, sig2); ++ s += dot(dovi_mmr[mmr_idx + 3], sigX2); ++ t = dovi_max_order == 3 && (dovi_min_order == 3 || order >= 3); ++ if (t) { ++ s += dot(dovi_mmr[mmr_idx + 4].xyz, sig2 * sig); ++ s += dot(dovi_mmr[mmr_idx + 5], sigX2 * sigX); ++ } ++ } ++ ++ return s; ++} ++ ++float3 reshape_dovi_yuv(float3 yuv, ++ constant float *src_dovi_params, ++ constant float *src_dovi_pivots, ++ constant float4 *src_dovi_coeffs, ++ constant float4 *src_dovi_mmr) ++{ ++ int i; ++ float s; ++ float3 sig = clamp(yuv.xyz, 0.0f, 1.0f); ++ float sig_arr[3] = {sig.x, sig.y, sig.z}; ++ float4 coeffs; ++ int dovi_num_pivots, dovi_has_mmr, dovi_has_poly; ++ int dovi_mmr_single, dovi_min_order, dovi_max_order; ++ float dovi_lo, dovi_hi; ++ constant float *dovi_params; ++ constant float *dovi_pivots; ++ constant float4 *dovi_coeffs, *dovi_mmr; ++ ++ #pragma clang loop unroll(full) ++ for (i = 0; i < 3; i++) { ++ dovi_params = src_dovi_params + i*8; ++ dovi_pivots = src_dovi_pivots + i*8; ++ dovi_coeffs = src_dovi_coeffs + i*8; ++ dovi_mmr = src_dovi_mmr + i*48; ++ dovi_num_pivots = dovi_params[0]; ++ dovi_has_mmr = dovi_params[1]; ++ dovi_has_poly = dovi_params[2]; ++ dovi_mmr_single = dovi_params[3]; ++ dovi_min_order = dovi_params[4]; ++ dovi_max_order = dovi_params[5]; ++ dovi_lo = dovi_params[6]; ++ dovi_hi = dovi_params[7]; ++ ++ s = sig_arr[i]; ++ coeffs = dovi_coeffs[0]; ++ ++ if (i == 0 && dovi_num_pivots > 2) { ++ coeffs = mix(mix(mix(dovi_coeffs[0], dovi_coeffs[1], (float4)(s >= dovi_pivots[0])), ++ mix(dovi_coeffs[2], dovi_coeffs[3], (float4)(s >= dovi_pivots[2])), ++ (float4)(s >= dovi_pivots[1])), ++ mix(mix(dovi_coeffs[4], dovi_coeffs[5], (float4)(s >= dovi_pivots[4])), ++ mix(dovi_coeffs[6], dovi_coeffs[7], (float4)(s >= dovi_pivots[6])), ++ (float4)(s >= dovi_pivots[5])), ++ (float4)(s >= dovi_pivots[3])); ++ } ++ ++ int has_mmr_poly = dovi_has_mmr && dovi_has_poly; ++ ++ if ((has_mmr_poly && coeffs.w == 0.0f) || (!has_mmr_poly && dovi_has_poly)) ++ s = reshape_poly(s, coeffs); ++ else ++ s = reshape_mmr(sig, coeffs, dovi_mmr, ++ dovi_mmr_single, dovi_min_order, dovi_max_order); ++ ++ sig_arr[i] = clamp(s, dovi_lo, dovi_hi); ++ } ++ ++ return float3(sig_arr[0], sig_arr[1], sig_arr[2]); ++} ++ ++ ++//------------ ++// Samplers ++constexpr sampler n_sampler(coord::pixel, address::clamp_to_edge, filter::nearest); ++constexpr sampler l_sampler(coord::normalized, address::clamp_to_edge, filter::linear); ++constexpr sampler d_sampler(coord::normalized, address::repeat, filter::nearest); ++ ++//------------ ++// kernel ++kernel void tonemap(texture2d dst1 [[texture(0)]], ++ texture2d src1 [[texture(1)]], ++ texture2d dst2 [[texture(2)]], ++ texture2d src2 [[texture(3)]], ++ texture2d dst3 [[texture(4), function_constant(is_non_semi_planar_out)]], ++ texture2d src3 [[texture(5), function_constant(is_non_semi_planar_in)]], ++ texture2d dither [[texture(6), function_constant(enable_dither)]], ++ constant float* dovi_buf [[buffer(7), function_constant(dovi_reshape)]], ++ constant float* peak [[buffer(8)]], ++ uint2 index [[thread_position_in_grid]]) ++{ ++ int xi = index.x; ++ int yi = index.y; ++ // each thread process four pixels ++ int x = 2 * xi; ++ int y = 2 * yi; ++ ++ int2 src1_sz = int2(src1.get_width(), ++ src1.get_height()); ++ int2 dst2_sz = int2(dst2.get_width(), ++ dst2.get_height()); ++ ++ if (xi >= dst2_sz.x || yi >= dst2_sz.y) ++ return; ++ ++ float2 ncoords_yuv0 = float2(int2(x, y)) / float2(src1_sz); ++ float2 ncoords_yuv1 = float2(int2(x + 1, y)) / float2(src1_sz); ++ float2 ncoords_yuv2 = float2(int2(x, y + 1)) / float2(src1_sz); ++ float2 ncoords_yuv3 = float2(int2(x + 1, y + 1)) / float2(src1_sz); ++ ++ float3 yuv0, yuv1, yuv2, yuv3; ++ ++ yuv0.x = src1.sample(n_sampler, float2(x, y)).x; ++ yuv1.x = src1.sample(n_sampler, float2(x + 1, y)).x; ++ yuv2.x = src1.sample(n_sampler, float2(x, y + 1)).x; ++ yuv3.x = src1.sample(n_sampler, float2(x + 1,y + 1)).x; ++ ++ if (is_non_semi_planar_in) { ++ yuv0.yz = float2(src2.sample(l_sampler, ncoords_yuv0).x, src3.sample(l_sampler, ncoords_yuv0).x); ++ yuv1.yz = float2(src2.sample(l_sampler, ncoords_yuv1).x, src3.sample(l_sampler, ncoords_yuv1).x); ++ yuv2.yz = float2(src2.sample(l_sampler, ncoords_yuv2).x, src3.sample(l_sampler, ncoords_yuv2).x); ++ yuv3.yz = float2(src2.sample(l_sampler, ncoords_yuv3).x, src3.sample(l_sampler, ncoords_yuv3).x); ++ } else { ++ yuv0.yz = float2(src2.sample(l_sampler, ncoords_yuv0).xy); ++ yuv1.yz = float2(src2.sample(l_sampler, ncoords_yuv1).xy); ++ yuv2.yz = float2(src2.sample(l_sampler, ncoords_yuv2).xy); ++ yuv3.yz = float2(src2.sample(l_sampler, ncoords_yuv3).xy); ++ } ++ ++ if (dovi_reshape) { ++ constant float *dovi_params = dovi_buf; ++ constant float *dovi_pivots = dovi_buf + 24; ++ constant float4 *dovi_coeffs = (constant float4 *)(dovi_buf + 48); ++ constant float4 *dovi_mmr = (constant float4 *)(dovi_buf + 144); ++ yuv0 = reshape_dovi_yuv(yuv0, dovi_params, dovi_pivots, dovi_coeffs, dovi_mmr); ++ yuv1 = reshape_dovi_yuv(yuv1, dovi_params, dovi_pivots, dovi_coeffs, dovi_mmr); ++ yuv2 = reshape_dovi_yuv(yuv2, dovi_params, dovi_pivots, dovi_coeffs, dovi_mmr); ++ yuv3 = reshape_dovi_yuv(yuv3, dovi_params, dovi_pivots, dovi_coeffs, dovi_mmr); ++ } ++ ++ float3 c0, c1, c2, c3; ++ ++ if (map_in_src_space) { ++ c0 = map_to_src_space_from_yuv(yuv0); ++ c1 = map_to_src_space_from_yuv(yuv1); ++ c2 = map_to_src_space_from_yuv(yuv2); ++ c3 = map_to_src_space_from_yuv(yuv3); ++ } else { ++ c0 = map_to_dst_space_from_yuv(yuv0); ++ c1 = map_to_dst_space_from_yuv(yuv1); ++ c2 = map_to_dst_space_from_yuv(yuv2); ++ c3 = map_to_dst_space_from_yuv(yuv3); ++ } ++ ++ if(!skip_tonemap) { ++ float4 r4 = float4(c0.x, c1.x, c2.x, c3.x); ++ float4 g4 = float4(c0.y, c1.y, c2.y, c3.y); ++ float4 b4 = float4(c0.z, c1.z, c2.z, c3.z); ++ map_four_pixels(&r4, &g4, &b4, *peak); ++ c0 = float3(r4.x, g4.x, b4.x); ++ c1 = float3(r4.y, g4.y, b4.y); ++ c2 = float3(r4.z, g4.z, b4.z); ++ c3 = float3(r4.w, g4.w, b4.w); ++ } ++ ++ if (map_in_src_space) { ++ c0 = lrgb2lrgb(c0); ++ c1 = lrgb2lrgb(c1); ++ c2 = lrgb2lrgb(c2); ++ c3 = lrgb2lrgb(c3); ++ if (!is_rgb2rgb_passthrough) { ++ c0 = gamut_compress(c0); ++ c1 = gamut_compress(c1); ++ c2 = gamut_compress(c2); ++ c3 = gamut_compress(c3); ++ } ++ c0 = clamp(c0, 0.0f, 1.0f); ++ c1 = clamp(c1, 0.0f, 1.0f); ++ c2 = clamp(c2, 0.0f, 1.0f); ++ c3 = clamp(c3, 0.0f, 1.0f); ++ } ++ ++ float y0 = lrgb2y(c0); ++ float y1 = lrgb2y(c1); ++ float y2 = lrgb2y(c2); ++ float y3 = lrgb2y(c3); ++ ++ if (enable_dither && !skip_tonemap) { ++ int2 dither_sz = int2(dither.get_width(), ++ dither.get_height());; ++ float2 ncoords_d = float2(int2(xi, yi)) / float2(dither_sz); ++ float d = dither.sample(d_sampler, ncoords_d).x; ++ y0 = get_dithered_y(y0, d), y1 = get_dithered_y(y1, d); ++ y2 = get_dithered_y(y2, d), y3 = get_dithered_y(y3, d); ++ } ++ ++ float3 chroma_c = get_chroma_sample(c0, c1, c2, c3); ++ float3 chroma = lrgb2yuv(chroma_c); ++ ++ dst1.write(float4(y0, 0.0f, 0.0f, 1.0f), uint2(x, y)); ++ dst1.write(float4(y1, 0.0f, 0.0f, 1.0f), uint2(x + 1, y)); ++ dst1.write(float4(y2, 0.0f, 0.0f, 1.0f), uint2(x, y + 1)); ++ dst1.write(float4(y3, 0.0f, 0.0f, 1.0f), uint2(x + 1, y + 1)); ++ if (is_non_semi_planar_out) { ++ dst2.write(float4(chroma.y, 0.0f, 0.0f, 1.0f), uint2(xi, yi)); ++ dst3.write(float4(chroma.z, 0.0f, 0.0f, 1.0f), uint2(xi, yi)); ++ } else { ++ dst2.write(float4(chroma.y, chroma.z, 0.0f, 1.0f), uint2(xi, yi)); ++ } ++} +Index: FFmpeg/libavfilter/vf_tonemap_videotoolbox.m +=================================================================== +--- /dev/null ++++ libavfilter/vf_tonemap_videotoolbox.m +@@ -0,0 +1,1154 @@ ++/* ++ * Copyright (c) 2024 Gnattu OC ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include ++ ++#include "libavutil/avassert.h" ++#include "libavutil/common.h" ++#include "libavutil/imgutils.h" ++#include "libavutil/opt.h" ++#include "libavutil/objc.h" ++#include "libavutil/hwcontext.h" ++ ++#include "avfilter.h" ++#include "internal.h" ++#include "video.h" ++#include "colorspace.h" ++#include "dither_matrix.h" ++#include "metal/utils.h" ++#include "libavutil/hwcontext_videotoolbox.h" ++ ++#define params_cnt 8 ++#define pivots_cnt (7+1) ++#define coeffs_cnt (8*4) ++#define mmr_cnt (8*6*4) ++#define params_sz params_cnt*sizeof(float) ++#define pivots_sz pivots_cnt*sizeof(float) ++#define coeffs_sz coeffs_cnt*sizeof(float) ++#define mmr_sz mmr_cnt*sizeof(float) ++ ++extern char ff_vf_tonemap_videotoolbox_metallib_data[]; ++extern unsigned int ff_vf_tonemap_videotoolbox_metallib_len; ++ ++static const enum AVPixelFormat supported_formats[] = { ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_P010, ++}; ++ ++enum TonemapAlgorithm { ++ TONEMAP_NONE, ++ TONEMAP_LINEAR, ++ TONEMAP_GAMMA, ++ TONEMAP_CLIP, ++ TONEMAP_REINHARD, ++ TONEMAP_HABLE, ++ TONEMAP_MOBIUS, ++ TONEMAP_BT2390, ++ TONEMAP_COUNT, ++}; ++ ++enum TonemapMode { ++ TONEMAP_MODE_MAX, ++ TONEMAP_MODE_RGB, ++ TONEMAP_MODE_LUM, ++ TONEMAP_MODE_ITP, ++ TONEMAP_MODE_COUNT, ++}; ++ ++typedef struct TonemapVideoToolboxContext { ++ const AVClass *class; ++ enum AVColorSpace colorspace, colorspace_in, colorspace_out; ++ enum AVColorTransferCharacteristic trc, trc_in, trc_out; ++ enum AVColorPrimaries primaries, primaries_in, primaries_out; ++ enum AVColorRange range, range_in, range_out; ++ enum AVChromaLocation chroma_loc; ++ enum AVPixelFormat in_fmt, out_fmt; ++ const AVPixFmtDescriptor *in_desc, *out_desc; ++ int in_planes, out_planes; ++ struct DoviMetadata *dovi; ++ enum TonemapAlgorithm tonemap; ++ enum TonemapMode tonemap_mode; ++ enum AVPixelFormat format; ++ int apply_dovi; ++ double ref_white; ++ double peak; ++ double target_peak; ++ double param; ++ double desat_param; ++ double scene_threshold; ++ int initialised; ++ int init_with_dovi; ++ ++ id dither_texture; ++ id mtl_device; ++ id mtl_library; ++ id mtl_queue; ++ id mtl_pipeline; ++ id mtl_function; ++ id mtl_dovi_buffer; ++ id mtl_peak_buffer; ++ CVMetalTextureCacheRef texture_cache; ++} TonemapVideoToolboxContext; ++ ++static const short linearize_funcs[AVCOL_TRC_NB] = { ++ [AVCOL_TRC_SMPTE2084] = 1, //"eotf_st2084", ++ [AVCOL_TRC_ARIB_STD_B67] = 2, //"eotf_arib_b67", ++}; ++ ++static const short delinearize_funcs[AVCOL_TRC_NB] = { ++ [AVCOL_TRC_BT709] = 1, //"inverse_eotf_bt1886", ++ [AVCOL_TRC_BT2020_10] = 1, //"inverse_eotf_bt1886", ++}; ++ ++static const double dovi_lms2rgb_matrix[3][3] = ++ { ++ { 3.06441879, -2.16597676, 0.10155818}, ++ {-0.65612108, 1.78554118, -0.12943749}, ++ { 0.01736321, -0.04725154, 1.03004253}, ++ }; ++ ++static int format_is_supported(enum AVPixelFormat fmt) ++{ ++ for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) ++ if (supported_formats[i] == fmt) ++ return 1; ++ return 0; ++} ++ ++static int get_rgb2rgb_matrix(enum AVColorPrimaries in, enum AVColorPrimaries out, ++ double rgb2rgb[3][3]) ++{ ++ double rgb2xyz[3][3], xyz2rgb[3][3]; ++ ++ const AVColorPrimariesDesc *in_primaries = av_csp_primaries_desc_from_id(in); ++ const AVColorPrimariesDesc *out_primaries = av_csp_primaries_desc_from_id(out); ++ ++ if (!in_primaries || !out_primaries) ++ return AVERROR(EINVAL); ++ ++ ff_fill_rgb2xyz_table(&out_primaries->prim, &out_primaries->wp, rgb2xyz); ++ ff_matrix_invert_3x3(rgb2xyz, xyz2rgb); ++ ff_fill_rgb2xyz_table(&in_primaries->prim, &in_primaries->wp, rgb2xyz); ++ ff_matrix_mul_3x3(rgb2rgb, rgb2xyz, xyz2rgb); ++ ++ return 0; ++} ++ ++static MTLPixelFormat get_plane_texture_format(TonemapVideoToolboxContext* ctx, int plane, bool is_output) ++{ ++ int pixel_size, channels; ++ const AVComponentDescriptor *comp; ++ MTLPixelFormat format; ++ ++ comp = is_output ? &ctx->out_desc->comp[plane] : &ctx->in_desc->comp[plane]; ++ pixel_size = (comp->depth + comp->shift) / 8; ++ channels = comp->step / pixel_size; ++ if (pixel_size > 2 || channels > 2) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", ctx->in_desc->name); ++ return MTLPixelFormatInvalid; ++ } ++ switch (pixel_size) { ++ case 1: ++ format = channels == 1 ? MTLPixelFormatR8Unorm : MTLPixelFormatRG8Unorm; ++ break; ++ case 2: ++ format = channels == 1 ? MTLPixelFormatR16Unorm : MTLPixelFormatRG16Unorm; ++ break; ++ default: ++ av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", ctx->in_desc->name); ++ return MTLPixelFormatInvalid; ++ } ++ return format; ++} ++ ++static void tonemap_videotoolbox_update_dovi_buf(AVFilterContext *avctx) ++{ ++ TonemapVideoToolboxContext *ctx = avctx->priv; ++ float *dovi_buf = ctx->mtl_dovi_buffer.contents; ++ float coeffs_data[8][4] = {0}; ++ float mmr_packed_data[8*6][4] = {0}; ++ int c, i, j, k; ++ ++ av_assert0(dovi_buf); ++ ++ for (c = 0; c < 3; c++) { ++ int has_poly = 0, has_mmr = 0, mmr_single = 1; ++ int mmr_idx = 0, min_order = 3, max_order = 1; ++ const struct ReshapeData *comp = &ctx->dovi->comp[c]; ++ if (!comp->num_pivots) ++ continue; ++ av_assert0(comp->num_pivots >= 2 && comp->num_pivots <= 9); ++ ++ memset(coeffs_data, 0, sizeof(coeffs_data)); ++ for (i = 0; i < comp->num_pivots - 1; i++) { ++ switch (comp->method[i]) { ++ case 0: // polynomial ++ has_poly = 1; ++ coeffs_data[i][3] = 0.0f; // order=0 signals polynomial ++ for (k = 0; k < 3; k++) ++ coeffs_data[i][k] = comp->poly_coeffs[i][k]; ++ break; ++ case 1: ++ min_order = FFMIN(min_order, comp->mmr_order[i]); ++ max_order = FFMAX(max_order, comp->mmr_order[i]); ++ mmr_single = !has_mmr; ++ has_mmr = 1; ++ coeffs_data[i][3] = (float)comp->mmr_order[i]; ++ coeffs_data[i][0] = comp->mmr_constant[i]; ++ coeffs_data[i][1] = (float)mmr_idx; ++ for (j = 0; j < comp->mmr_order[i]; j++) { ++ // store weights per order as two packed vec4s ++ float *mmr = &mmr_packed_data[mmr_idx][0]; ++ mmr[0] = comp->mmr_coeffs[i][j][0]; ++ mmr[1] = comp->mmr_coeffs[i][j][1]; ++ mmr[2] = comp->mmr_coeffs[i][j][2]; ++ mmr[3] = 0.0f; // unused ++ mmr[4] = comp->mmr_coeffs[i][j][3]; ++ mmr[5] = comp->mmr_coeffs[i][j][4]; ++ mmr[6] = comp->mmr_coeffs[i][j][5]; ++ mmr[7] = comp->mmr_coeffs[i][j][6]; ++ mmr_idx += 2; ++ } ++ break; ++ default: ++ av_assert0(0); ++ } ++ } ++ ++ av_assert0(has_poly || has_mmr); ++ ++ if (has_mmr) ++ av_assert0(min_order <= max_order); ++ ++ // dovi_params ++ { ++ float params[8] = { ++ comp->num_pivots, !!has_mmr, !!has_poly, ++ mmr_single, min_order, max_order, ++ comp->pivots[0], comp->pivots[comp->num_pivots - 1] ++ }; ++ memcpy(dovi_buf + c * params_cnt, params, params_sz); ++ } ++ ++ // dovi_pivots ++ if (c == 0 && comp->num_pivots > 2) { ++ // Skip the (irrelevant) lower and upper bounds ++ float pivots_data[7+1] = {0}; ++ memcpy(pivots_data, comp->pivots + 1, ++ (comp->num_pivots - 2) * sizeof(pivots_data[0])); ++ // Fill the remainder with a quasi-infinite sentinel pivot ++ for (i = comp->num_pivots - 2; i < FF_ARRAY_ELEMS(pivots_data); i++) ++ pivots_data[i] = 1e9f; ++ memcpy(dovi_buf + 3 * params_cnt + c * pivots_cnt, pivots_data, pivots_sz); ++ } ++ ++ // dovi_coeffs ++ memcpy(dovi_buf + 3 * (params_cnt + pivots_cnt) + c * coeffs_cnt, &coeffs_data[0], coeffs_sz); ++ ++ // dovi_mmr ++ if (has_mmr) ++ memcpy(dovi_buf + 3 * (params_cnt + pivots_cnt + coeffs_cnt) + c * mmr_cnt, &mmr_packed_data[0], mmr_sz); ++ } ++} ++ ++static av_cold int tonemap_videotoolbox_pre_init(AVFilterContext *avctx) ++{ ++ //TonemapVideoToolboxContext *ctx = avctx->priv; ++ return 0; ++} ++ ++static av_cold void tonemap_videotoolbox_uninit_common(AVFilterContext *avctx) ++{ ++ TonemapVideoToolboxContext *ctx = avctx->priv; ++ ++ ff_objc_release(&ctx->dither_texture); ++ ff_objc_release(&ctx->mtl_peak_buffer); ++ ff_objc_release(&ctx->mtl_function); ++ ff_objc_release(&ctx->mtl_pipeline); ++ ff_objc_release(&ctx->mtl_queue); ++ ff_objc_release(&ctx->mtl_library); ++ ff_objc_release(&ctx->mtl_device); ++ if (ctx->texture_cache) { ++ CFRelease(ctx->texture_cache); ++ ctx->texture_cache = NULL; ++ } ++ ctx->initialised = 0; ++} ++ ++static av_cold void tonemap_videotoolbox_uninit_dovi(AVFilterContext *avctx) ++{ ++ TonemapVideoToolboxContext *ctx = avctx->priv; ++ ff_objc_release(&ctx->mtl_dovi_buffer); ++ if (ctx->dovi) { ++ av_freep(&ctx->dovi); ++ } ++ ctx->init_with_dovi = 0; ++} ++ ++static av_cold void tonemap_videotoolbox_uninit(AVFilterContext *avctx) ++{ ++ tonemap_videotoolbox_uninit_common(avctx); ++ tonemap_videotoolbox_uninit_dovi(avctx); ++} ++ ++static int tonemap_videotoolbox_init(AVFilterContext *avctx) ++{ ++ TonemapVideoToolboxContext *ctx = avctx->priv; ++ int rgb2rgb_passthrough = 1; ++ double rgb2rgb[3][3], rgb2yuv[3][3], yuv2rgb[3][3]; ++ double lms2rgb[3][3]; ++ float ycc2rgb_offset[3] = {0}; ++ float rgb2rgb_matrix_1[3], rgb2rgb_matrix_2[3], rgb2rgb_matrix_3[3]; ++ float rgb_matrix_1[3], rgb_matrix_2[3], rgb_matrix_3[3]; ++ float yuv_matrix_1[3], yuv_matrix_2[3], yuv_matrix_3[3]; ++ float lms2rgb_matrix_1[3], lms2rgb_matrix_2[3], lms2rgb_matrix_3[3]; ++ float mtl_luma_dst[3]; ++ const AVLumaCoefficients *luma_src, *luma_dst; ++ ++ MTLFunctionConstantValues* constant_values = [MTLFunctionConstantValues new]; ++ dispatch_data_t lib_data; ++ float ref_white; ++ float tone_param; ++ float desat_param; ++ float target_peak; ++ float scene_threshold; ++ float pq_max_lum_div_ref_white; ++ float ref_white_div_pq_max_lum; ++ short tonemap_func_type; ++ bool is_tone_func_bt2390; ++ bool is_tone_mode_rgb; ++ bool is_tone_mode_max; ++ bool is_tone_mode_itp; ++ bool is_non_semi_planar_in; ++ bool is_non_semi_planar_out; ++ bool enable_dither; ++ float dither_size2; ++ float dither_quantization; ++ bool is_full_range_in; ++ bool is_full_range_out; ++ int chroma_loc; ++ bool skip_tonemap; ++ bool dovi_reshape; ++ bool map_in_src_space; ++ ++ int i, j, err; ++ NSError* ns_error = nil; ++ CVReturn ret; ++ ++ if (ctx->primaries_out != ctx->primaries_in) { ++ if ((err = get_rgb2rgb_matrix(ctx->primaries_in, ctx->primaries_out, rgb2rgb)) < 0) ++ goto fail; ++ rgb2rgb_passthrough = 0; ++ } ++ ++ switch(ctx->tonemap) { ++ case TONEMAP_GAMMA: ++ if (isnan(ctx->param)) ++ ctx->param = 1.8f; ++ break; ++ case TONEMAP_REINHARD: ++ if (!isnan(ctx->param)) ++ ctx->param = (1.0f - ctx->param) / ctx->param; ++ break; ++ case TONEMAP_MOBIUS: ++ if (isnan(ctx->param)) ++ ctx->param = 0.3f; ++ break; ++ } ++ ++ if (isnan(ctx->param)) ++ ctx->param = 1.0f; ++ ++ ctx->ref_white = ctx->tonemap == TONEMAP_BT2390 ? REFERENCE_WHITE_ALT ++ : REFERENCE_WHITE; ++ ++ if (ctx->tonemap == TONEMAP_BT2390 && ctx->peak) ++ ctx->peak = FFMAX(ctx->peak / 10.0f, 1.1f); ++ ++ // SDR peak is 1.0f ++ ctx->target_peak = 1.0f; ++ ++ av_log(ctx, AV_LOG_DEBUG, "Tone-mapping transfer from %s to %s\n", ++ av_color_transfer_name(ctx->trc_in), ++ av_color_transfer_name(ctx->trc_out)); ++ av_log(ctx, AV_LOG_DEBUG, "Mapping colorspace from %s to %s\n", ++ ctx->dovi ? "dolby_vision" : av_color_space_name(ctx->colorspace_in), ++ av_color_space_name(ctx->colorspace_out)); ++ av_log(ctx, AV_LOG_DEBUG, "Mapping primaries from %s to %s\n", ++ av_color_primaries_name(ctx->primaries_in), ++ av_color_primaries_name(ctx->primaries_out)); ++ av_log(ctx, AV_LOG_DEBUG, "Mapping range from %s to %s\n", ++ av_color_range_name(ctx->range_in), ++ av_color_range_name(ctx->range_out)); ++ ++ av_assert0(ctx->trc_out == AVCOL_TRC_BT709 || ++ ctx->trc_out == AVCOL_TRC_BT2020_10 || ++ ctx->trc_out == AVCOL_TRC_SMPTE2084); ++ ++ av_assert0(ctx->trc_in == AVCOL_TRC_SMPTE2084|| ++ ctx->trc_in == AVCOL_TRC_ARIB_STD_B67); ++ av_assert0(ctx->dovi || ++ ctx->colorspace_in == AVCOL_SPC_BT2020_NCL || ++ ctx->colorspace_in == AVCOL_SPC_BT709); ++ av_assert0(ctx->primaries_in == AVCOL_PRI_BT2020 || ++ ctx->primaries_in == AVCOL_PRI_BT709); ++ ++ if (ctx->trc_out == AVCOL_TRC_SMPTE2084) { ++ int is_10b_out = ctx->out_desc->comp[0].depth == 10; ++ if (!(is_10b_out && ++ ctx->primaries_out == AVCOL_PRI_BT2020 && ++ ctx->colorspace_out == AVCOL_SPC_BT2020_NCL)) { ++ av_log(avctx, AV_LOG_ERROR, "HDR passthrough requires BT.2020 " ++ "colorspace and 10 bit output format depth.\n"); ++ return AVERROR(EINVAL); ++ } ++ } ++ ++ ctx->mtl_device = MTLCreateSystemDefaultDevice(); ++ if (!ctx->mtl_device) { ++ av_log(ctx, AV_LOG_ERROR, "Unable to find Metal device\n"); ++ err = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ av_log(ctx, AV_LOG_INFO, "Using Metal device: %s\n", ctx->mtl_device.name.UTF8String); ++ ++ lib_data = dispatch_data_create( ++ ff_vf_tonemap_videotoolbox_metallib_data, ++ ff_vf_tonemap_videotoolbox_metallib_len, ++ nil, ++ nil); ++ ctx->mtl_library = [ctx->mtl_device newLibraryWithData:lib_data error:&ns_error]; ++ dispatch_release(lib_data); ++ lib_data = nil; ++ if (ns_error) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to load Metal library: %s\n", ns_error.description.UTF8String); ++ err = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ ++ ctx->mtl_queue = ctx->mtl_device.newCommandQueue; ++ if (!ctx->mtl_queue) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to create Metal command queue!\n"); ++ err = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ ++ ret = CVMetalTextureCacheCreate( ++ NULL, ++ NULL, ++ ctx->mtl_device, ++ NULL, ++ &ctx->texture_cache ++ ); ++ if (ret != kCVReturnSuccess) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to create CVMetalTextureCache: %d\n", ret); ++ err = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ ++ tone_param = (float)ctx->param; ++ ref_white = (float)ctx->ref_white; ++ desat_param = (float)ctx->desat_param; ++ target_peak = (float)ctx->target_peak; ++ scene_threshold = (float)ctx->scene_threshold; ++ pq_max_lum_div_ref_white = (float)(ST2084_MAX_LUMINANCE / ctx->ref_white); ++ ref_white_div_pq_max_lum = (float)(ctx->ref_white / ST2084_MAX_LUMINANCE); ++ tonemap_func_type = (short)ctx->tonemap; ++ is_tone_func_bt2390 = ctx->tonemap == TONEMAP_BT2390; ++ is_tone_mode_rgb = ctx->tonemap_mode == TONEMAP_MODE_RGB; ++ is_tone_mode_max = ctx->tonemap_mode == TONEMAP_MODE_MAX; ++ is_tone_mode_itp = ctx->tonemap_mode == TONEMAP_MODE_ITP; ++ is_non_semi_planar_in = ctx->in_planes > 2; ++ is_non_semi_planar_out = ctx->out_planes > 2; ++ enable_dither = ctx->in_desc->comp[0].depth > ctx->out_desc->comp[0].depth; ++ dither_size2 = (float)(ff_fruit_dither_size * ff_fruit_dither_size); ++ dither_quantization = (float)((1 << ctx->out_desc->comp[0].depth) - 1); ++ is_full_range_in = ctx->range_in == AVCOL_RANGE_JPEG; ++ is_full_range_out = ctx->range_out == AVCOL_RANGE_JPEG; ++ chroma_loc = (int)ctx->chroma_loc; ++ skip_tonemap = ctx->trc_out == AVCOL_TRC_SMPTE2084; ++ dovi_reshape = !!ctx->dovi; ++ map_in_src_space = !is_tone_mode_rgb && !is_tone_mode_max; ++ ++ [constant_values setConstantValue:&ref_white type:MTLDataTypeFloat withName:@"ref_white"]; ++ [constant_values setConstantValue:&tone_param type:MTLDataTypeFloat withName:@"tone_param"]; ++ [constant_values setConstantValue:&desat_param type:MTLDataTypeFloat withName:@"desat_param"]; ++ [constant_values setConstantValue:&target_peak type:MTLDataTypeFloat withName:@"target_peak"]; ++ [constant_values setConstantValue:&scene_threshold type:MTLDataTypeFloat withName:@"scene_threshold"]; ++ [constant_values setConstantValue:&pq_max_lum_div_ref_white type:MTLDataTypeFloat withName:@"pq_max_lum_div_ref_white"]; ++ [constant_values setConstantValue:&ref_white_div_pq_max_lum type:MTLDataTypeFloat withName:@"ref_white_div_pq_max_lum"]; ++ ++ [constant_values setConstantValue:&tonemap_func_type type:MTLDataTypeShort withName:@"tonemap_func_type"]; ++ [constant_values setConstantValue:&is_tone_func_bt2390 type:MTLDataTypeBool withName:@"is_tone_func_bt2390"]; ++ [constant_values setConstantValue:&is_tone_mode_rgb type:MTLDataTypeBool withName:@"is_tone_mode_rgb"]; ++ [constant_values setConstantValue:&is_tone_mode_max type:MTLDataTypeBool withName:@"is_tone_mode_max"]; ++ [constant_values setConstantValue:&is_tone_mode_itp type:MTLDataTypeBool withName:@"is_tone_mode_itp"]; ++ ++ [constant_values setConstantValue:&is_non_semi_planar_in type:MTLDataTypeBool withName:@"is_non_semi_planar_in"]; ++ [constant_values setConstantValue:&is_non_semi_planar_out type:MTLDataTypeBool withName:@"is_non_semi_planar_out"]; ++ ++ [constant_values setConstantValue:&enable_dither type:MTLDataTypeBool withName:@"enable_dither"]; ++ [constant_values setConstantValue:&dither_size2 type:MTLDataTypeFloat withName:@"dither_size2"]; ++ [constant_values setConstantValue:&dither_quantization type:MTLDataTypeFloat withName:@"dither_quantization"]; ++ ++ [constant_values setConstantValue:&is_full_range_in type:MTLDataTypeBool withName:@"is_full_range_in"]; ++ [constant_values setConstantValue:&is_full_range_out type:MTLDataTypeBool withName:@"is_full_range_out"]; ++ [constant_values setConstantValue:&chroma_loc type:MTLDataTypeInt withName:@"chroma_loc"]; ++ ++ [constant_values setConstantValue:&rgb2rgb_passthrough type:MTLDataTypeBool withName:@"is_rgb2rgb_passthrough"]; ++ if (!rgb2rgb_passthrough) { ++ rgb2rgb_matrix_1[0] = (float)rgb2rgb[0][0]; ++ rgb2rgb_matrix_1[1] = (float)rgb2rgb[0][1]; ++ rgb2rgb_matrix_1[2] = (float)rgb2rgb[0][2]; ++ ++ rgb2rgb_matrix_2[0] = (float)rgb2rgb[1][0]; ++ rgb2rgb_matrix_2[1] = (float)rgb2rgb[1][1]; ++ rgb2rgb_matrix_2[2] = (float)rgb2rgb[1][2]; ++ ++ rgb2rgb_matrix_3[0] = (float)rgb2rgb[2][0]; ++ rgb2rgb_matrix_3[1] = (float)rgb2rgb[2][1]; ++ rgb2rgb_matrix_3[2] = (float)rgb2rgb[2][2]; ++ ++ [constant_values setConstantValue:&rgb2rgb_matrix_1 type:MTLDataTypeFloat3 withName:@"rgb2rgb_matrix_1"]; ++ [constant_values setConstantValue:&rgb2rgb_matrix_2 type:MTLDataTypeFloat3 withName:@"rgb2rgb_matrix_2"]; ++ [constant_values setConstantValue:&rgb2rgb_matrix_3 type:MTLDataTypeFloat3 withName:@"rgb2rgb_matrix_3"]; ++ } ++ ++ [constant_values setConstantValue:&skip_tonemap type:MTLDataTypeBool withName:@"skip_tonemap"]; ++ [constant_values setConstantValue:&dovi_reshape type:MTLDataTypeBool withName:@"dovi_reshape"]; ++ if (dovi_reshape) { ++ for (i = 0; i < 3; i++) { ++ for (j = 0; j < 3; j++) ++ ycc2rgb_offset[i] -= (float)(ctx->dovi->nonlinear[i][j] * ctx->dovi->nonlinear_offset[j]); ++ } ++ [constant_values setConstantValue:&ycc2rgb_offset type:MTLDataTypeFloat3 withName:@"ycc2rgb_offset"]; ++ ff_matrix_mul_3x3(lms2rgb, dovi_lms2rgb_matrix, ctx->dovi->linear); ++ // ycc2rgb ++ rgb_matrix_1[0] = (float)ctx->dovi->nonlinear[0][0]; ++ rgb_matrix_1[1] = (float)ctx->dovi->nonlinear[0][1]; ++ rgb_matrix_1[2] = (float)ctx->dovi->nonlinear[0][2]; ++ ++ rgb_matrix_2[0] = (float)ctx->dovi->nonlinear[1][0]; ++ rgb_matrix_2[1] = (float)ctx->dovi->nonlinear[1][1]; ++ rgb_matrix_2[2] = (float)ctx->dovi->nonlinear[1][2]; ++ ++ rgb_matrix_3[0] = (float)ctx->dovi->nonlinear[2][0]; ++ rgb_matrix_3[1] = (float)ctx->dovi->nonlinear[2][1]; ++ rgb_matrix_3[2] = (float)ctx->dovi->nonlinear[2][2]; ++ ++ [constant_values setConstantValue:&rgb_matrix_1 type:MTLDataTypeFloat3 withName:@"rgb_matrix_1"]; ++ [constant_values setConstantValue:&rgb_matrix_2 type:MTLDataTypeFloat3 withName:@"rgb_matrix_2"]; ++ [constant_values setConstantValue:&rgb_matrix_3 type:MTLDataTypeFloat3 withName:@"rgb_matrix_3"]; ++ //lms2rgb ++ lms2rgb_matrix_1[0] = (float)lms2rgb[0][0]; ++ lms2rgb_matrix_1[1] = (float)lms2rgb[0][1]; ++ lms2rgb_matrix_1[2] = (float)lms2rgb[0][2]; ++ ++ lms2rgb_matrix_2[0] = (float)lms2rgb[1][0]; ++ lms2rgb_matrix_2[1] = (float)lms2rgb[1][1]; ++ lms2rgb_matrix_2[2] = (float)lms2rgb[1][2]; ++ ++ lms2rgb_matrix_3[0] = (float)lms2rgb[2][0]; ++ lms2rgb_matrix_3[1] = (float)lms2rgb[2][1]; ++ lms2rgb_matrix_3[2] = (float)lms2rgb[2][2]; ++ ++ [constant_values setConstantValue:&lms2rgb_matrix_1 type:MTLDataTypeFloat3 withName:@"lms2rgb_matrix_1"]; ++ [constant_values setConstantValue:&lms2rgb_matrix_2 type:MTLDataTypeFloat3 withName:@"lms2rgb_matrix_2"]; ++ [constant_values setConstantValue:&lms2rgb_matrix_3 type:MTLDataTypeFloat3 withName:@"lms2rgb_matrix_3"]; ++ } else { ++ luma_src = av_csp_luma_coeffs_from_avcsp(ctx->colorspace_in); ++ if (!luma_src) { ++ err = AVERROR(EINVAL); ++ av_log(avctx, AV_LOG_ERROR, "Unsupported input colorspace %d (%s)\n", ++ ctx->colorspace_in, av_color_space_name(ctx->colorspace_in)); ++ goto fail; ++ } ++ ++ ff_fill_rgb2yuv_table(luma_src, rgb2yuv); ++ ff_matrix_invert_3x3(rgb2yuv, yuv2rgb); ++ ++ rgb_matrix_1[0] = (float)yuv2rgb[0][0]; ++ rgb_matrix_1[1] = (float)yuv2rgb[0][1]; ++ rgb_matrix_1[2] = (float)yuv2rgb[0][2]; ++ ++ rgb_matrix_2[0] = (float)yuv2rgb[1][0]; ++ rgb_matrix_2[1] = (float)yuv2rgb[1][1]; ++ rgb_matrix_2[2] = (float)yuv2rgb[1][2]; ++ ++ rgb_matrix_3[0] = (float)yuv2rgb[2][0]; ++ rgb_matrix_3[1] = (float)yuv2rgb[2][1]; ++ rgb_matrix_3[2] = (float)yuv2rgb[2][2]; ++ ++ [constant_values setConstantValue:&rgb_matrix_1 type:MTLDataTypeFloat3 withName:@"rgb_matrix_1"]; ++ [constant_values setConstantValue:&rgb_matrix_2 type:MTLDataTypeFloat3 withName:@"rgb_matrix_2"]; ++ [constant_values setConstantValue:&rgb_matrix_3 type:MTLDataTypeFloat3 withName:@"rgb_matrix_3"]; ++ } ++ ++ luma_dst = av_csp_luma_coeffs_from_avcsp(ctx->colorspace_out); ++ if (!luma_dst) { ++ err = AVERROR(EINVAL); ++ av_log(avctx, AV_LOG_ERROR, "Unsupported output colorspace %d (%s)\n", ++ ctx->colorspace_out, av_color_space_name(ctx->colorspace_out)); ++ goto fail; ++ } ++ ++ ff_fill_rgb2yuv_table(luma_dst, rgb2yuv); ++ yuv_matrix_1[0] = (float)rgb2yuv[0][0]; ++ yuv_matrix_1[1] = (float)rgb2yuv[0][1]; ++ yuv_matrix_1[2] = (float)rgb2yuv[0][2]; ++ ++ yuv_matrix_2[0] = (float)rgb2yuv[1][0]; ++ yuv_matrix_2[1] = (float)rgb2yuv[1][1]; ++ yuv_matrix_2[2] = (float)rgb2yuv[1][2]; ++ ++ yuv_matrix_3[0] = (float)rgb2yuv[2][0]; ++ yuv_matrix_3[1] = (float)rgb2yuv[2][1]; ++ yuv_matrix_3[2] = (float)rgb2yuv[2][2]; ++ ++ [constant_values setConstantValue:&yuv_matrix_1 type:MTLDataTypeFloat3 withName:@"yuv_matrix_1"]; ++ [constant_values setConstantValue:&yuv_matrix_2 type:MTLDataTypeFloat3 withName:@"yuv_matrix_2"]; ++ [constant_values setConstantValue:&yuv_matrix_3 type:MTLDataTypeFloat3 withName:@"yuv_matrix_3"]; ++ ++ mtl_luma_dst[0] = (float)av_q2d(luma_dst->cr); ++ mtl_luma_dst[1] = (float)av_q2d(luma_dst->cg); ++ mtl_luma_dst[2] = (float)av_q2d(luma_dst->cb); ++ [constant_values setConstantValue:&mtl_luma_dst type:MTLDataTypeFloat3 withName:@"luma_dst"]; ++ ++ if (ctx->trc_out != AVCOL_TRC_SMPTE2084) { ++ [constant_values setConstantValue:&linearize_funcs[ctx->trc_in] type:MTLDataTypeShort withName:@"linearize_type"]; ++ [constant_values setConstantValue:&delinearize_funcs[ctx->trc_in] type:MTLDataTypeShort withName:@"delinearize_type"]; ++ } ++ ++ if (enable_dither) { ++ uint bytes_per_row = 2 * ff_fruit_dither_size; ++ uint bytes_per_image = 2 * ff_fruit_dither_size2; ++ MTLTextureDescriptor *texture_descriptor = [[MTLTextureDescriptor alloc] init]; ++ MTLRegion region = { ++ { 0, 0, 0 }, // MTLOrigin ++ {ff_fruit_dither_size, ff_fruit_dither_size, 1} // MTLSize ++ }; ++ id source_buffer; ++ id command_buffer; ++ id blit_command_encoder; ++ ++ source_buffer = [ctx->mtl_device newBufferWithBytes: ff_fruit_dither_matrix ++ length: bytes_per_image ++ options: MTLResourceStorageModeShared]; ++ ++ texture_descriptor.pixelFormat = MTLPixelFormatR16Unorm; ++ texture_descriptor.width = ff_fruit_dither_size; ++ texture_descriptor.height = ff_fruit_dither_size; ++ texture_descriptor.storageMode = MTLStorageModePrivate; ++ ++ ctx->dither_texture = [ctx->mtl_device newTextureWithDescriptor:texture_descriptor]; ++ ++ command_buffer = [ctx->mtl_queue commandBuffer]; ++ ++ blit_command_encoder = [command_buffer blitCommandEncoder]; ++ [blit_command_encoder copyFromBuffer: source_buffer ++ sourceOffset: 0 ++ sourceBytesPerRow: bytes_per_row ++ sourceBytesPerImage: bytes_per_image ++ sourceSize: region.size ++ toTexture: ctx->dither_texture ++ destinationSlice: 0 ++ destinationLevel: 0 ++ destinationOrigin: region.origin]; ++ [blit_command_encoder endEncoding]; ++ ++ [command_buffer commit]; ++ [command_buffer waitUntilCompleted]; ++ } ++ ++ [constant_values setConstantValue:&map_in_src_space type:MTLDataTypeBool withName:@"map_in_src_space"]; ++ ++ ctx->mtl_function = [ctx->mtl_library newFunctionWithName:@"tonemap" constantValues:constant_values error:&ns_error]; ++ if (ns_error) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to create Metal function: %s\n", ns_error.description.UTF8String); ++ err = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ ++ ctx->mtl_pipeline = [ctx->mtl_device newComputePipelineStateWithFunction:ctx->mtl_function error:&ns_error]; ++ if (ns_error) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to create Metal compute pipeline: %s\n", ns_error.description.UTF8String); ++ err = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ ++ if (dovi_reshape) { ++ ctx->mtl_dovi_buffer = [ctx->mtl_device newBufferWithLength: 3*(params_sz+pivots_sz+coeffs_sz+mmr_sz) ++ options: MTLResourceStorageModeShared]; ++ if (!ctx->mtl_dovi_buffer) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to create Metal buffer for Dolby Vision data\n"); ++ err = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ } ++ ++ ctx->mtl_peak_buffer = [ctx->mtl_device newBufferWithLength: sizeof(float) ++ options: MTLResourceStorageModeShared]; ++ if (!ctx->mtl_peak_buffer) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to create Metal buffer for Peak data\n"); ++ err = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ ++ ctx->initialised = 1; ++ return 0; ++ ++fail: ++ tonemap_videotoolbox_uninit(avctx); ++ return err; ++} ++ ++static int tonemap_videotoolbox_config_output(AVFilterLink *outlink) ++{ ++ AVFilterContext *avctx = outlink->src; ++ AVFilterLink *inlink = avctx->inputs[0]; ++ TonemapVideoToolboxContext *ctx = avctx->priv; ++ AVHWFramesContext *in_frames_ctx, *out_frames_ctx; ++ enum AVPixelFormat in_format; ++ enum AVPixelFormat out_format; ++ const AVPixFmtDescriptor *in_desc; ++ const AVPixFmtDescriptor *out_desc; ++ int ret; ++ ++ if (!inlink->hw_frames_ctx) ++ return AVERROR(EINVAL); ++ in_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; ++ in_format = in_frames_ctx->sw_format; ++ out_format = (ctx->format == AV_PIX_FMT_NONE) ? in_format : ctx->format; ++ in_desc = av_pix_fmt_desc_get(in_format); ++ out_desc = av_pix_fmt_desc_get(out_format); ++ ++ if (!format_is_supported(in_format)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n", ++ av_get_pix_fmt_name(in_format)); ++ return AVERROR(ENOSYS); ++ } ++ if (!format_is_supported(out_format)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n", ++ av_get_pix_fmt_name(out_format)); ++ return AVERROR(ENOSYS); ++ } ++ if (in_desc->comp[0].depth != 10 && in_desc->comp[0].depth != 16) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported input format depth: %d\n", ++ in_desc->comp[0].depth); ++ return AVERROR(ENOSYS); ++ } ++ ++ ctx->in_fmt = in_format; ++ ctx->out_fmt = out_format; ++ ctx->in_desc = in_desc; ++ ctx->out_desc = out_desc; ++ ctx->in_planes = av_pix_fmt_count_planes(in_format); ++ ctx->out_planes = av_pix_fmt_count_planes(out_format); ++ ++ av_buffer_unref(&outlink->hw_frames_ctx); ++ outlink->hw_frames_ctx = av_hwframe_ctx_alloc(in_frames_ctx->device_ref); ++ outlink->w = inlink->w; ++ outlink->h = inlink->h; ++ out_frames_ctx = (AVHWFramesContext *)outlink->hw_frames_ctx->data; ++ out_frames_ctx->format = AV_PIX_FMT_VIDEOTOOLBOX; ++ out_frames_ctx->sw_format = out_format; ++ out_frames_ctx->width = outlink->w; ++ out_frames_ctx->height = outlink->h; ++ ++ if (ctx->range != -1) { ++ ((AVVTFramesContext *)out_frames_ctx->hwctx)->color_range = ctx->range; ++ } else { ++ ((AVVTFramesContext *)out_frames_ctx->hwctx)->color_range = ((AVVTFramesContext *)in_frames_ctx->hwctx)->color_range; ++ } ++ ++ ret = ff_filter_init_hw_frames(avctx, outlink, 1); ++ if (ret < 0) ++ return ret; ++ ++ ret = av_hwframe_ctx_init(outlink->hw_frames_ctx); ++ if (ret < 0) { ++ av_log(avctx, AV_LOG_ERROR, ++ "Failed to init videotoolbox frame context, %s\n", ++ av_err2str(ret)); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static void call_kernel(AVFilterContext *avctx, ++ id in_y_tex, ++ id in_u_uv_tex, ++ id out_y_tex, ++ id out_u_uv_tex, ++ id in_v_tex, ++ id out_v_tex, ++ float peak) ++{ ++ TonemapVideoToolboxContext *ctx = avctx->priv; ++ id buffer = ctx->mtl_queue.commandBuffer; ++ id encoder = buffer.computeCommandEncoder; ++ float* peak_ptr = ctx->mtl_peak_buffer.contents; ++ *peak_ptr = peak; ++ ++ [encoder setTexture:out_y_tex atIndex:0]; ++ [encoder setTexture:in_y_tex atIndex:1]; ++ [encoder setTexture:out_u_uv_tex atIndex:2]; ++ [encoder setTexture:in_u_uv_tex atIndex:3]; ++ if (ctx->out_planes > 2) { ++ [encoder setTexture:out_v_tex atIndex:4]; ++ } ++ if (ctx->in_planes > 2) { ++ [encoder setTexture:in_v_tex atIndex:5]; ++ } ++ if (ctx->dither_texture) { ++ [encoder setTexture:ctx->dither_texture atIndex:6]; ++ } ++ if (ctx->mtl_dovi_buffer) { ++ [encoder setBuffer:ctx->mtl_dovi_buffer offset:0 atIndex:7]; ++ } ++ [encoder setBuffer:ctx->mtl_peak_buffer offset:0 atIndex:8]; ++ ++ ff_metal_compute_encoder_dispatch(ctx->mtl_device, ctx->mtl_pipeline, encoder, out_u_uv_tex.width, out_u_uv_tex.height); ++ ++ [encoder endEncoding]; ++ ++ [buffer commit]; ++ [buffer waitUntilCompleted]; ++} ++ ++static int tonemap_videotoolbox_filter_frame(AVFilterLink *inlink, AVFrame *input) ++{ ++ AVFilterContext *avctx = inlink->dst; ++ AVFilterLink *outlink = avctx->outputs[0]; ++ TonemapVideoToolboxContext *ctx = avctx->priv; ++ AVFrameSideData *dovi_sd = NULL; ++ AVFrame *output = NULL; ++ ++ CVMetalTextureRef in_y, in_u_uv, in_v; ++ id in_y_tex, in_u_uv_tex, in_v_tex = NULL; ++ ++ CVMetalTextureRef out_y, out_u_uv, out_v; ++ id out_y_tex, out_u_uv_tex, out_v_tex = NULL; ++ ++ MTLPixelFormat format; ++ ++ int err; ++ ++ av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n", ++ av_get_pix_fmt_name(input->format), ++ input->width, input->height, input->pts); ++ ++ if (!input->hw_frames_ctx) ++ return AVERROR(EINVAL); ++ ++ output = ff_get_video_buffer(outlink, outlink->w, outlink->h); ++ if (!output) { ++ err = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ err = av_frame_copy_props(output, input); ++ if (err < 0) ++ goto fail; ++ ++ if (ctx->trc != -1) ++ output->color_trc = ctx->trc; ++ if (ctx->primaries != -1) ++ output->color_primaries = ctx->primaries; ++ if (ctx->colorspace != -1) ++ output->colorspace = ctx->colorspace; ++ if (ctx->range != -1) ++ output->color_range = ctx->range; ++ ++ ctx->trc_in = input->color_trc; ++ ctx->trc_out = output->color_trc; ++ ctx->colorspace_in = input->colorspace; ++ ctx->colorspace_out = output->colorspace; ++ ctx->primaries_in = input->color_primaries; ++ ctx->primaries_out = output->color_primaries; ++ ctx->range_in = input->color_range; ++ ctx->range_out = output->color_range; ++ ctx->chroma_loc = output->chroma_location; ++ ++ if (ctx->apply_dovi) ++ dovi_sd = av_frame_get_side_data(input, AV_FRAME_DATA_DOVI_METADATA); ++ ++ // check DOVI->HDR10/HLG ++ if (!dovi_sd) { ++ if (input->color_trc != AVCOL_TRC_SMPTE2084 && ++ input->color_trc != AVCOL_TRC_ARIB_STD_B67) { ++ av_log(ctx, AV_LOG_ERROR, "No DOVI metadata and " ++ "unsupported transfer function characteristic: %s\n", ++ av_color_transfer_name(input->color_trc)); ++ err = AVERROR(ENOSYS); ++ goto fail; ++ } ++ } ++ ++ if (!ctx->peak) { ++ if (dovi_sd) { ++ const AVDOVIMetadata *metadata = (AVDOVIMetadata *) dovi_sd->data; ++ ctx->peak = ff_determine_dovi_signal_peak(metadata); ++ } else { ++ ctx->peak = ff_determine_signal_peak(input); ++ } ++ av_log(ctx, AV_LOG_DEBUG, "Computed signal peak: %f\n", ctx->peak); ++ } ++ ++ if (dovi_sd) { ++ const AVDOVIMetadata *metadata = (AVDOVIMetadata *) dovi_sd->data; ++ const AVDOVIRpuDataHeader *rpu = av_dovi_get_header(metadata); ++ // only map dovi rpus that don't require an EL ++ if (rpu->disable_residual_flag) { ++ struct DoviMetadata *dovi = av_malloc(sizeof(*dovi)); ++ ctx->dovi = dovi; ++ if (!ctx->dovi) ++ goto fail; ++ ++ ff_map_dovi_metadata(ctx->dovi, metadata); ++ ctx->trc_in = AVCOL_TRC_SMPTE2084; ++ ctx->colorspace_in = AVCOL_SPC_UNSPECIFIED; ++ ctx->primaries_in = AVCOL_PRI_BT2020; ++ } ++ } ++ ++ // Some DOVI video does not carry metadata in the first few frames, and we have to reset the pipeline. ++ if (!ctx->init_with_dovi && ctx->dovi && ctx->initialised) { ++ tonemap_videotoolbox_uninit_common(avctx); ++ } ++ ++ if (!ctx->initialised) { ++ err = tonemap_videotoolbox_init(avctx); ++ if (err < 0) ++ goto fail; ++ ++ ctx->init_with_dovi = ctx->dovi != NULL; ++ } ++ ++ if (ctx->dovi) { ++ tonemap_videotoolbox_update_dovi_buf(avctx); ++ av_freep(&ctx->dovi); ++ } ++ ++ // First Input Plane ++ format = get_plane_texture_format(ctx, 0, false); ++ if (format == MTLPixelFormatInvalid) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ in_y = ff_metal_texture_from_pixbuf(ctx, ctx->texture_cache, (CVPixelBufferRef)input->data[3], 0, format); ++ in_y_tex = CVMetalTextureGetTexture(in_y); ++ ++ // Second Input Plane ++ format = get_plane_texture_format(ctx, 1, false); ++ if (format == MTLPixelFormatInvalid) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ in_u_uv = ff_metal_texture_from_pixbuf(ctx, ctx->texture_cache, (CVPixelBufferRef)input->data[3], 1, format); ++ in_u_uv_tex = CVMetalTextureGetTexture(in_u_uv); ++ ++ // First Output Plane ++ format = get_plane_texture_format(ctx, 0, true); ++ if (format == MTLPixelFormatInvalid) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ out_y = ff_metal_texture_from_pixbuf(ctx, ctx->texture_cache, (CVPixelBufferRef)output->data[3], 0, format); ++ out_y_tex = CVMetalTextureGetTexture(out_y); ++ ++ // Second Output Plane ++ format = get_plane_texture_format(ctx, 1, true); ++ if (format == MTLPixelFormatInvalid) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ out_u_uv = ff_metal_texture_from_pixbuf(ctx, ctx->texture_cache, (CVPixelBufferRef)output->data[3], 1, format); ++ out_u_uv_tex = CVMetalTextureGetTexture(out_u_uv); ++ ++ if (ctx->in_planes > 2) { ++ // Third Input Plane ++ format = get_plane_texture_format(ctx, 2, false); ++ if (format == MTLPixelFormatInvalid) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ in_v = ff_metal_texture_from_pixbuf(ctx, ctx->texture_cache, (CVPixelBufferRef)input->data[3], 2, format); ++ in_v_tex = CVMetalTextureGetTexture(in_v); ++ } ++ ++ if (ctx->out_planes > 2) { ++ // Third Output Plane ++ format = get_plane_texture_format(ctx, 2, true); ++ if (format == MTLPixelFormatInvalid) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ out_v = ff_metal_texture_from_pixbuf(ctx, ctx->texture_cache, (CVPixelBufferRef)output->data[3], 2, format); ++ out_v_tex = CVMetalTextureGetTexture(out_v); ++ } ++ ++ call_kernel(avctx, ++ in_y_tex, ++ in_u_uv_tex, ++ out_y_tex, ++ out_u_uv_tex, ++ in_v_tex, ++ out_v_tex, ++ (float)ctx->peak); ++ ++ CFRelease(in_y); ++ CFRelease(in_u_uv); ++ CFRelease(out_y); ++ CFRelease(out_u_uv); ++ if(in_v_tex) { ++ CFRelease(in_v); ++ } ++ if(out_v_tex) { ++ CFRelease(out_v); ++ } ++ ++ CVBufferPropagateAttachments((CVPixelBufferRef)input->data[3], (CVPixelBufferRef)output->data[3]); ++ av_frame_free(&input); ++ ++ { ++ CGColorSpaceRef colorspace = NULL; ++ CFStringRef colormatrix = av_map_videotoolbox_color_matrix_from_av(ctx->colorspace_out); ++ CFStringRef colorpri = av_map_videotoolbox_color_primaries_from_av(ctx->primaries_out); ++ CFStringRef colortrc = av_map_videotoolbox_color_trc_from_av(ctx->trc_out); ++ CFMutableDictionaryRef attachments = CFDictionaryCreateMutable(NULL, 4, ++ &kCFTypeDictionaryKeyCallBacks, ++ &kCFTypeDictionaryValueCallBacks); ++ if (!attachments) { ++ err = AVERROR(ENOMEM); ++ goto fail; ++ } ++ CFDictionarySetValue(attachments, kCVImageBufferYCbCrMatrixKey, colormatrix); ++ CFDictionarySetValue(attachments, kCVImageBufferColorPrimariesKey, colorpri); ++ CFDictionarySetValue(attachments, kCVImageBufferTransferFunctionKey, colortrc); ++ colorspace = CVImageBufferCreateColorSpaceFromAttachments(attachments); ++ if (colorspace) { ++ CFDictionarySetValue(attachments, kCVImageBufferCGColorSpaceKey, colorspace); ++ CFRelease(colorspace); ++ } else { ++ av_log(avctx, AV_LOG_WARNING, "Unable to set proper colorspace for the CVImageBuffer.\n"); ++ } ++ CVBufferSetAttachments( ++ (CVPixelBufferRef)output->data[3], ++ attachments, ++ kCVAttachmentMode_ShouldPropagate); ++ CFRelease(attachments); ++ if (ctx->trc_out != AVCOL_TRC_SMPTE2084) { ++ av_frame_remove_side_data(output, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); ++ av_frame_remove_side_data(output, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); ++ } else { ++ ff_update_hdr_metadata(output, 100.0f); ++ } ++ } ++ ++ av_frame_remove_side_data(output, AV_FRAME_DATA_DOVI_RPU_BUFFER); ++ av_frame_remove_side_data(output, AV_FRAME_DATA_DOVI_METADATA); ++ return ff_filter_frame(outlink, output); ++ ++fail: ++ if (ctx->dovi) ++ av_freep(&ctx->dovi); ++ av_frame_free(&input); ++ av_frame_free(&output); ++ return err; ++} ++ ++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) ++#define OFFSET(x) offsetof(TonemapVideoToolboxContext, x) ++ ++static const AVOption tonemap_videotoolbox_options[] = { ++ { "tonemap", "Tonemap algorithm selection", OFFSET(tonemap), AV_OPT_TYPE_INT, { .i64 = TONEMAP_NONE }, TONEMAP_NONE, TONEMAP_COUNT - 1, FLAGS, .unit = "tonemap" }, ++ { "none", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_NONE }, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "linear", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_LINEAR }, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "gamma", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_GAMMA }, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "clip", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_CLIP }, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "reinhard", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_REINHARD }, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "hable", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_HABLE }, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "mobius", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_MOBIUS }, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "bt2390", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_BT2390 }, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "tonemap_mode", "Tonemap mode selection", OFFSET(tonemap_mode), AV_OPT_TYPE_INT, { .i64 = TONEMAP_MODE_ITP }, TONEMAP_MODE_MAX, TONEMAP_MODE_COUNT - 1, FLAGS, .unit = "tonemap_mode" }, ++ { "max", "Brightest channel based tonemap", 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_MODE_MAX }, 0, 0, FLAGS, .unit = "tonemap_mode" }, ++ { "rgb", "Per-channel based tonemap", 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_MODE_RGB }, 0, 0, FLAGS, .unit = "tonemap_mode" }, ++ { "lum", "Relative luminance based tonemap", 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_MODE_LUM }, 0, 0, FLAGS, .unit = "tonemap_mode" }, ++ { "itp", "ICtCp intensity based tonemap", 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_MODE_ITP }, 0, 0, FLAGS, .unit = "tonemap_mode" }, ++ { "transfer", "Set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, { .i64 = AVCOL_TRC_BT709 }, -1, INT_MAX, FLAGS, .unit = "transfer" }, ++ { "t", "Set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, { .i64 = AVCOL_TRC_BT709 }, -1, INT_MAX, FLAGS, .unit = "transfer" }, ++ { "bt709", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_TRC_BT709 }, 0, 0, FLAGS, .unit = "transfer" }, ++ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_TRC_BT2020_10 }, 0, 0, FLAGS, .unit = "transfer" }, ++ { "smpte2084", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_TRC_SMPTE2084 }, 0, 0, FLAGS, .unit = "transfer" }, ++ { "matrix", "Set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, { .i64 = AVCOL_SPC_BT709 }, -1, INT_MAX, FLAGS, .unit = "matrix" }, ++ { "m", "Set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, { .i64 = AVCOL_SPC_BT709 }, -1, INT_MAX, FLAGS, .unit = "matrix" }, ++ { "bt709", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_SPC_BT709 }, 0, 0, FLAGS, .unit = "matrix" }, ++ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_SPC_BT2020_NCL }, 0, 0, FLAGS, .unit = "matrix" }, ++ { "primaries", "Set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, { .i64 = AVCOL_PRI_BT709 }, -1, INT_MAX, FLAGS, .unit = "primaries" }, ++ { "p", "Set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, { .i64 = AVCOL_PRI_BT709 }, -1, INT_MAX, FLAGS, .unit = "primaries" }, ++ { "bt709", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_PRI_BT709 }, 0, 0, FLAGS, .unit = "primaries" }, ++ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_PRI_BT2020 }, 0, 0, FLAGS, .unit = "primaries" }, ++ { "range", "Set color range", OFFSET(range), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS, .unit = "range" }, ++ { "r", "Set color range", OFFSET(range), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS, .unit = "range" }, ++ { "tv", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, .unit = "range" }, ++ { "pc", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" }, ++ { "limited", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, .unit = "range" }, ++ { "full", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" }, ++ { "format", "Output pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, { .i64 = AV_PIX_FMT_NONE }, AV_PIX_FMT_NONE, INT_MAX, FLAGS }, ++ { "apply_dovi", "Apply Dolby Vision metadata if possible", OFFSET(apply_dovi), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, ++ { "peak", "Signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, { .dbl = 0 }, 0, DBL_MAX, FLAGS }, ++ { "param", "Tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, DBL_MIN, DBL_MAX, FLAGS }, ++ { "desat", "Desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, { .dbl = 0.5}, 0, DBL_MAX, FLAGS }, ++ { "threshold", "Scene detection threshold", OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, { .dbl = 0.2 }, 0, DBL_MAX, FLAGS }, ++ { NULL } ++}; ++ ++AVFILTER_DEFINE_CLASS(tonemap_videotoolbox); ++ ++static const AVFilterPad tonemap_videotoolbox_inputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .filter_frame = tonemap_videotoolbox_filter_frame, ++ }, ++}; ++ ++static const AVFilterPad tonemap_videotoolbox_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = tonemap_videotoolbox_config_output, ++ }, ++}; ++ ++const AVFilter ff_vf_tonemap_videotoolbox = { ++ .name = "tonemap_videotoolbox", ++ .description = NULL_IF_CONFIG_SMALL("Perform HDR to SDR conversion with Metal."), ++ .priv_size = sizeof(TonemapVideoToolboxContext), ++ .priv_class = &tonemap_videotoolbox_class, ++ .init = tonemap_videotoolbox_pre_init, ++ .uninit = tonemap_videotoolbox_uninit, ++ FILTER_INPUTS(tonemap_videotoolbox_inputs), ++ FILTER_OUTPUTS(tonemap_videotoolbox_outputs), ++ FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VIDEOTOOLBOX), ++ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, ++}; diff --git a/cross/ffmpeg7/patches/1053-jellyfin-0053-add-mjpeg-videotoolbox-encoder.patch b/cross/ffmpeg7/patches/1053-jellyfin-0053-add-mjpeg-videotoolbox-encoder.patch new file mode 100644 index 00000000000..e075cac5d57 --- /dev/null +++ b/cross/ffmpeg7/patches/1053-jellyfin-0053-add-mjpeg-videotoolbox-encoder.patch @@ -0,0 +1,126 @@ +Index: FFmpeg/configure +=================================================================== +--- configure ++++ configure +@@ -3488,6 +3488,8 @@ h264_videotoolbox_encoder_deps="pthreads + h264_videotoolbox_encoder_select="atsc_a53 videotoolbox_encoder" + hevc_videotoolbox_encoder_deps="pthreads" + hevc_videotoolbox_encoder_select="atsc_a53 videotoolbox_encoder" ++mjpeg_videotoolbox_encoder_deps="pthreads" ++mjpeg_videotoolbox_encoder_select="videotoolbox_encoder" + prores_videotoolbox_encoder_deps="pthreads" + prores_videotoolbox_encoder_select="videotoolbox_encoder" + libaom_av1_decoder_deps="libaom" +Index: FFmpeg/libavcodec/Makefile +=================================================================== +--- libavcodec/Makefile ++++ libavcodec/Makefile +@@ -508,6 +508,7 @@ OBJS-$(CONFIG_MJPEG_CUVID_DECODER) + + OBJS-$(CONFIG_MJPEG_QSV_ENCODER) += qsvenc_jpeg.o + OBJS-$(CONFIG_MJPEG_RKMPP_ENCODER) += rkmppenc.o + OBJS-$(CONFIG_MJPEG_VAAPI_ENCODER) += vaapi_encode_mjpeg.o ++OBJS-$(CONFIG_MJPEG_VIDEOTOOLBOX_ENCODER) += videotoolboxenc.o + OBJS-$(CONFIG_MLP_DECODER) += mlpdec.o mlpdsp.o + OBJS-$(CONFIG_MLP_ENCODER) += mlpenc.o mlp.o + OBJS-$(CONFIG_MMVIDEO_DECODER) += mmvideo.o +Index: FFmpeg/libavcodec/allcodecs.c +=================================================================== +--- libavcodec/allcodecs.c ++++ libavcodec/allcodecs.c +@@ -876,6 +876,7 @@ extern const FFCodec ff_mjpeg_qsv_encode + extern const FFCodec ff_mjpeg_qsv_decoder; + extern const FFCodec ff_mjpeg_rkmpp_encoder; + extern const FFCodec ff_mjpeg_vaapi_encoder; ++extern const FFCodec ff_mjpeg_videotoolbox_encoder; + extern const FFCodec ff_mp3_mf_encoder; + extern const FFCodec ff_mpeg1_cuvid_decoder; + extern const FFCodec ff_mpeg2_cuvid_decoder; +Index: FFmpeg/libavcodec/videotoolboxenc.c +=================================================================== +--- libavcodec/videotoolboxenc.c ++++ libavcodec/videotoolboxenc.c +@@ -545,6 +545,7 @@ static CMVideoCodecType get_cm_codec_typ + else + return MKBETAG('a','p','c','n'); // kCMVideoCodecType_AppleProRes422 + } ++ case AV_CODEC_ID_MJPEG: return kCMVideoCodecType_JPEG; + default: return 0; + } + } +@@ -1238,7 +1239,7 @@ static int vtenc_create_encoder(AVCodecC + kVTCompressionPropertyKey_Quality, + quality_num); + CFRelease(quality_num); +- } else if (avctx->codec_id != AV_CODEC_ID_PRORES) { ++ } else if (avctx->codec_id != AV_CODEC_ID_PRORES && avctx->codec_id != AV_CODEC_ID_MJPEG) { + bit_rate_num = CFNumberCreate(kCFAllocatorDefault, + kCFNumberSInt32Type, + &bit_rate); +@@ -1352,7 +1353,7 @@ static int vtenc_create_encoder(AVCodecC + } + } + +- if (avctx->gop_size > 0 && avctx->codec_id != AV_CODEC_ID_PRORES) { ++ if (avctx->gop_size > 0 && avctx->codec_id != AV_CODEC_ID_PRORES && avctx->codec_id != AV_CODEC_ID_MJPEG) { + CFNumberRef interval = CFNumberCreate(kCFAllocatorDefault, + kCFNumberIntType, + &avctx->gop_size); +@@ -1501,7 +1502,7 @@ static int vtenc_create_encoder(AVCodecC + } + } + +- if (!vtctx->has_b_frames && avctx->codec_id != AV_CODEC_ID_PRORES) { ++ if (!vtctx->has_b_frames && avctx->codec_id != AV_CODEC_ID_PRORES && avctx->codec_id != AV_CODEC_ID_MJPEG) { + status = VTSessionSetProperty(vtctx->session, + kVTCompressionPropertyKey_AllowFrameReordering, + kCFBooleanFalse); +@@ -2870,6 +2871,13 @@ static const enum AVPixelFormat prores_p + AV_PIX_FMT_NONE + }; + ++static const enum AVPixelFormat mjpeg_pix_fmts[] = { ++ AV_PIX_FMT_VIDEOTOOLBOX, ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_NONE ++}; ++ + #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM + #define COMMON_OPTIONS \ + { "allow_sw", "Allow software encoding", OFFSET(allow_sw), AV_OPT_TYPE_BOOL, \ +@@ -3039,4 +3047,35 @@ const FFCodec ff_prores_videotoolbox_enc + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, + .p.wrapper_name = "videotoolbox", + .hw_configs = vt_encode_hw_configs, ++}; ++ ++static const AVOption mjpeg_options[] = { ++ { "allow_sw", "Allow software encoding", OFFSET(allow_sw), AV_OPT_TYPE_BOOL,{ .i64 = 0 }, 0, 1, VE }, ++ { NULL }, ++}; ++ ++static const AVClass mjpeg_videotoolbox_class = { ++ .class_name = "mjpeg_videotoolbox", ++ .item_name = av_default_item_name, ++ .option = mjpeg_options, ++ .version = LIBAVUTIL_VERSION_INT, ++}; ++ ++const FFCodec ff_mjpeg_videotoolbox_encoder = { ++ .p.name = "mjpeg_videotoolbox", ++ CODEC_LONG_NAME("VideoToolbox MJPEG Encoder"), ++ .p.type = AVMEDIA_TYPE_VIDEO, ++ .p.id = AV_CODEC_ID_MJPEG, ++ .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY | ++ AV_CODEC_CAP_HARDWARE, ++ .priv_data_size = sizeof(VTEncContext), ++ .p.pix_fmts = mjpeg_pix_fmts, ++ .defaults = vt_defaults, ++ .init = vtenc_init, ++ FF_CODEC_ENCODE_CB(vtenc_frame), ++ .close = vtenc_close, ++ .p.priv_class = &mjpeg_videotoolbox_class, ++ .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, ++ .p.wrapper_name = "videotoolbox", ++ .hw_configs = vt_encode_hw_configs, + }; diff --git a/cross/ffmpeg7/patches/1054-jellyfin-0054-add-vt-low-priority-keyframe-decoding.patch b/cross/ffmpeg7/patches/1054-jellyfin-0054-add-vt-low-priority-keyframe-decoding.patch new file mode 100644 index 00000000000..fddcba07974 --- /dev/null +++ b/cross/ffmpeg7/patches/1054-jellyfin-0054-add-vt-low-priority-keyframe-decoding.patch @@ -0,0 +1,58 @@ +Index: FFmpeg/libavcodec/avcodec.h +=================================================================== +--- libavcodec/avcodec.h ++++ libavcodec/avcodec.h +@@ -2174,6 +2174,13 @@ typedef struct AVHWAccel { + #define AV_HWACCEL_FLAG_UNSAFE_OUTPUT (1 << 3) + + /** ++ * Some hardware decoders (like VideoToolbox) supports decode session priority ++ * that run decode pipeline at a lower priority than is used for realtime decoding. ++ * This will be useful for background processing without interrupting normal playback. ++ */ ++#define AV_HWACCEL_FLAG_LOW_PRIORITY (1 << 4) ++ ++/** + * @} + */ + +Index: FFmpeg/libavcodec/options_table.h +=================================================================== +--- libavcodec/options_table.h ++++ libavcodec/options_table.h +@@ -407,6 +407,7 @@ static const AVOption avcodec_options[] + {"mastering_display_metadata", .default_val.i64 = AV_PKT_DATA_MASTERING_DISPLAY_METADATA, .type = AV_OPT_TYPE_CONST, .flags = A|D, .unit = "side_data_pkt" }, + {"content_light_level", .default_val.i64 = AV_PKT_DATA_CONTENT_LIGHT_LEVEL, .type = AV_OPT_TYPE_CONST, .flags = A|D, .unit = "side_data_pkt" }, + {"icc_profile", .default_val.i64 = AV_PKT_DATA_ICC_PROFILE, .type = AV_OPT_TYPE_CONST, .flags = A|D, .unit = "side_data_pkt" }, ++{"low_priority", "attempt to run decode pipeline at a lower priority than is used for realtime decoding", 0, AV_OPT_TYPE_CONST, {.i64 = AV_HWACCEL_FLAG_LOW_PRIORITY }, INT_MIN, INT_MAX, V | D, .unit = "hwaccel_flags"}, + {NULL}, + }; + +Index: FFmpeg/libavcodec/videotoolbox.c +=================================================================== +--- libavcodec/videotoolbox.c ++++ libavcodec/videotoolbox.c +@@ -984,6 +984,23 @@ static int videotoolbox_start(AVCodecCon + av_log(avctx, AV_LOG_VERBOSE, "VideoToolbox reported invalid data.\n"); + return AVERROR_INVALIDDATA; + case 0: ++ if (avctx->skip_frame >= AVDISCARD_NONKEY) { ++ status = VTSessionSetProperty(videotoolbox->session, ++ kVTDecompressionPropertyKey_OnlyTheseFrames, ++ kVTDecompressionProperty_OnlyTheseFrames_KeyFrames); ++ if (status) { ++ av_log(avctx, AV_LOG_WARNING, "kVTDecompressionProperty_OnlyTheseFrames_KeyFrames is not supported on this device. Ignoring.\n"); ++ } ++ } ++ if (avctx->hwaccel_flags & AV_HWACCEL_FLAG_LOW_PRIORITY) { ++ status = VTSessionSetProperty(videotoolbox->session, ++ kVTDecompressionPropertyKey_RealTime, ++ kCFBooleanFalse); ++ av_log(avctx, AV_LOG_INFO, "Decoder running at lower priority.\n"); ++ if (status) { ++ av_log(avctx, AV_LOG_WARNING, "kVTDecompressionPropertyKey_RealTime is not supported on this device. Ignoring.\n"); ++ } ++ } + return 0; + default: + av_log(avctx, AV_LOG_VERBOSE, "Unknown VideoToolbox session creation error %d\n", (int)status); diff --git a/cross/ffmpeg7/patches/1055-jellyfin-0055-add-dummy-device-derive-to-videotoolbox-hwcontext.patch b/cross/ffmpeg7/patches/1055-jellyfin-0055-add-dummy-device-derive-to-videotoolbox-hwcontext.patch new file mode 100644 index 00000000000..6e52129387d --- /dev/null +++ b/cross/ffmpeg7/patches/1055-jellyfin-0055-add-dummy-device-derive-to-videotoolbox-hwcontext.patch @@ -0,0 +1,27 @@ +Index: FFmpeg/libavutil/hwcontext_videotoolbox.c +=================================================================== +--- libavutil/hwcontext_videotoolbox.c ++++ libavutil/hwcontext_videotoolbox.c +@@ -825,6 +825,14 @@ static int vt_device_create(AVHWDeviceCo + return 0; + } + ++static int vt_device_derive(AVHWDeviceContext *device_ctx, ++ AVHWDeviceContext *src_ctx, AVDictionary *opts, ++ int flags) ++{ ++ // There is no context to be setup with VT, just return. ++ return 0; ++} ++ + const HWContextType ff_hwcontext_type_videotoolbox = { + .type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX, + .name = "videotoolbox", +@@ -832,6 +840,7 @@ const HWContextType ff_hwcontext_type_vi + .frames_hwctx_size = sizeof(VTFramesContext), + + .device_create = vt_device_create, ++ .device_derive = vt_device_derive, + .frames_init = vt_frames_init, + .frames_get_buffer = vt_get_buffer, + .frames_get_constraints = vt_frames_get_constraints, diff --git a/cross/ffmpeg7/patches/1056-jellyfin-0056-add-cuda-transpose-filter-impl.patch b/cross/ffmpeg7/patches/1056-jellyfin-0056-add-cuda-transpose-filter-impl.patch new file mode 100644 index 00000000000..fedcc930ea8 --- /dev/null +++ b/cross/ffmpeg7/patches/1056-jellyfin-0056-add-cuda-transpose-filter-impl.patch @@ -0,0 +1,586 @@ +Index: FFmpeg/configure +=================================================================== +--- configure ++++ configure +@@ -3297,6 +3297,8 @@ thumbnail_cuda_filter_deps="ffnvcodec" + thumbnail_cuda_filter_deps_any="cuda_nvcc cuda_llvm" + tonemap_cuda_filter_deps="ffnvcodec const_nan" + tonemap_cuda_filter_deps_any="cuda_nvcc cuda_llvm" ++transpose_cuda_filter_deps="ffnvcodec" ++transpose_cuda_filter_deps_any="cuda_nvcc cuda_llvm" + transpose_npp_filter_deps="ffnvcodec libnpp" + overlay_cuda_filter_deps="ffnvcodec" + overlay_cuda_filter_deps_any="cuda_nvcc cuda_llvm" +Index: FFmpeg/libavfilter/Makefile +=================================================================== +--- libavfilter/Makefile ++++ libavfilter/Makefile +@@ -540,6 +540,8 @@ OBJS-$(CONFIG_TONEMAP_VIDEOTOOLBOX_FILTE + metal/utils.o + OBJS-$(CONFIG_TPAD_FILTER) += vf_tpad.o + OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o ++OBJS-$(CONFIG_TRANSPOSE_CUDA_FILTER) += vf_transpose_cuda.o vf_transpose_cuda.ptx.o \ ++ cuda/load_helper.o + OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER) += vf_transpose_npp.o + OBJS-$(CONFIG_TRANSPOSE_OPENCL_FILTER) += vf_transpose_opencl.o opencl.o opencl/transpose.o + OBJS-$(CONFIG_TRANSPOSE_VAAPI_FILTER) += vf_transpose_vaapi.o vaapi_vpp.o +Index: FFmpeg/libavfilter/allfilters.c +=================================================================== +--- libavfilter/allfilters.c ++++ libavfilter/allfilters.c +@@ -504,6 +504,7 @@ extern const AVFilter ff_vf_tonemap_vaap + extern const AVFilter ff_vf_tonemap_videotoolbox; + extern const AVFilter ff_vf_tpad; + extern const AVFilter ff_vf_transpose; ++extern const AVFilter ff_vf_transpose_cuda; + extern const AVFilter ff_vf_transpose_npp; + extern const AVFilter ff_vf_transpose_opencl; + extern const AVFilter ff_vf_transpose_vaapi; +Index: FFmpeg/libavfilter/vf_transpose_cuda.c +=================================================================== +--- /dev/null ++++ libavfilter/vf_transpose_cuda.c +@@ -0,0 +1,477 @@ ++/* ++ * Copyright (C) 2024 NyanMisaka ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "libavutil/common.h" ++#include "libavutil/hwcontext.h" ++#include "libavutil/hwcontext_cuda_internal.h" ++#include "libavutil/cuda_check.h" ++#include "libavutil/internal.h" ++#include "libavutil/opt.h" ++#include "libavutil/pixdesc.h" ++ ++#include "avfilter.h" ++#include "internal.h" ++#include "video.h" ++#include "transpose.h" ++ ++#include "cuda/load_helper.h" ++ ++#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x) ++#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) ++ ++#define BLOCK_X 32 ++#define BLOCK_Y 16 ++ ++static const enum AVPixelFormat supported_formats[] = { ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_YUV444P, ++ AV_PIX_FMT_P010, ++ AV_PIX_FMT_P016, ++ AV_PIX_FMT_YUV444P16, ++ AV_PIX_FMT_0RGB32, ++ AV_PIX_FMT_0BGR32, ++ AV_PIX_FMT_RGB32, ++ AV_PIX_FMT_BGR32, ++}; ++ ++typedef struct TransposeCUDAContext { ++ const AVClass *class; ++ ++ AVCUDADeviceContext *hwctx; ++ AVBufferRef *frames_ctx; ++ AVFrame *frame; ++ AVFrame *tmp_frame; ++ ++ const AVPixFmtDescriptor *pix_desc; ++ ++ CUcontext cu_ctx; ++ CUmodule cu_module; ++ CUfunction cu_func_uchar; ++ CUfunction cu_func_ushort; ++ CUstream cu_stream; ++ ++ int passthrough; ///< PassthroughType, landscape passthrough mode enabled ++ int dir; ///< TransposeDir ++} TransposeCUDAContext; ++ ++static av_cold int cudatranspose_init(AVFilterContext *ctx) ++{ ++ TransposeCUDAContext *s = ctx->priv; ++ ++ s->frame = av_frame_alloc(); ++ if (!s->frame) ++ return AVERROR(ENOMEM); ++ ++ s->tmp_frame = av_frame_alloc(); ++ if (!s->tmp_frame) ++ return AVERROR(ENOMEM); ++ ++ return 0; ++} ++ ++static av_cold void cudatranspose_uninit(AVFilterContext *ctx) ++{ ++ TransposeCUDAContext *s = ctx->priv; ++ ++ if (s->hwctx && s->cu_module) { ++ CUcontext dummy; ++ CudaFunctions *cu = s->hwctx->internal->cuda_dl; ++ CHECK_CU(cu->cuCtxPushCurrent(s->cu_ctx)); ++ CHECK_CU(cu->cuModuleUnload(s->cu_module)); ++ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); ++ } ++ ++ av_frame_free(&s->frame); ++ av_buffer_unref(&s->frames_ctx); ++ av_frame_free(&s->tmp_frame); ++} ++ ++static av_cold int init_hwframe_ctx(TransposeCUDAContext *s, ++ AVBufferRef *device_ctx, ++ int width, int height, ++ enum AVPixelFormat sw_format) ++{ ++ AVBufferRef *out_ref = NULL; ++ AVHWFramesContext *out_ctx; ++ int ret; ++ ++ out_ref = av_hwframe_ctx_alloc(device_ctx); ++ if (!out_ref) ++ return AVERROR(ENOMEM); ++ out_ctx = (AVHWFramesContext*)out_ref->data; ++ ++ out_ctx->format = AV_PIX_FMT_CUDA; ++ out_ctx->sw_format = sw_format; ++ out_ctx->width = FFALIGN(width, 32); ++ out_ctx->height = FFALIGN(height, 32); ++ ++ ret = av_hwframe_ctx_init(out_ref); ++ if (ret < 0) ++ goto fail; ++ ++ av_frame_unref(s->frame); ++ ret = av_hwframe_get_buffer(out_ref, s->frame, 0); ++ if (ret < 0) ++ goto fail; ++ ++ s->frame->width = width; ++ s->frame->height = height; ++ ++ av_buffer_unref(&s->frames_ctx); ++ s->frames_ctx = out_ref; ++ ++ return 0; ++fail: ++ av_buffer_unref(&out_ref); ++ return ret; ++} ++ ++static int format_is_supported(enum AVPixelFormat fmt) ++{ ++ int i; ++ ++ for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) ++ if (supported_formats[i] == fmt) ++ return 1; ++ ++ return 0; ++} ++ ++static int init_processing_chain(AVFilterContext *ctx, ++ int out_width, int out_height) ++{ ++ TransposeCUDAContext *s = ctx->priv; ++ AVHWFramesContext *in_frames_ctx; ++ enum AVPixelFormat format; ++ int ret; ++ ++ /* check that we have a hw context */ ++ if (!ctx->inputs[0]->hw_frames_ctx) { ++ av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data; ++ format = in_frames_ctx->sw_format; ++ s->pix_desc = av_pix_fmt_desc_get(format); ++ ++ if (!format_is_supported(format)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n", ++ av_get_pix_fmt_name(format)); ++ return AVERROR(ENOSYS); ++ } ++ ++ ret = init_hwframe_ctx(s, in_frames_ctx->device_ref, ++ out_width, out_height, format); ++ if (ret < 0) ++ return ret; ++ ++ s->hwctx = in_frames_ctx->device_ctx->hwctx; ++ s->cu_stream = s->hwctx->stream; ++ ++ ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->frames_ctx); ++ if (!ctx->outputs[0]->hw_frames_ctx) ++ return AVERROR(ENOMEM); ++ ++ return 0; ++} ++ ++static int cudatranspose_config_props(AVFilterLink *outlink) ++{ ++ extern const unsigned char ff_vf_transpose_cuda_ptx_data[]; ++ extern const unsigned int ff_vf_transpose_cuda_ptx_len; ++ AVFilterContext *ctx = outlink->src; ++ AVFilterLink *inlink = ctx->inputs[0]; ++ TransposeCUDAContext *s = ctx->priv; ++ CUcontext dummy, cuda_ctx; ++ CudaFunctions *cu; ++ int ret; ++ ++ if ((inlink->w >= inlink->h && s->passthrough == TRANSPOSE_PT_TYPE_LANDSCAPE) || ++ (inlink->w <= inlink->h && s->passthrough == TRANSPOSE_PT_TYPE_PORTRAIT)) { ++ if (inlink->hw_frames_ctx) { ++ outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx); ++ if (!outlink->hw_frames_ctx) ++ return AVERROR(ENOMEM); ++ } ++ ++ av_log(ctx, AV_LOG_VERBOSE, ++ "w:%d h:%d -> w:%d h:%d (passthrough mode)\n", ++ inlink->w, inlink->h, inlink->w, inlink->h); ++ return 0; ++ } else { ++ s->passthrough = TRANSPOSE_PT_TYPE_NONE; ++ } ++ ++ switch (s->dir) { ++ case TRANSPOSE_CCLOCK_FLIP: ++ case TRANSPOSE_CCLOCK: ++ case TRANSPOSE_CLOCK: ++ case TRANSPOSE_CLOCK_FLIP: ++ outlink->w = inlink->h; ++ outlink->h = inlink->w; ++ break; ++ default: ++ outlink->w = inlink->w; ++ outlink->h = inlink->h; ++ break; ++ } ++ ++ if (inlink->sample_aspect_ratio.num) ++ outlink->sample_aspect_ratio = av_div_q((AVRational) { 1, 1 }, ++ inlink->sample_aspect_ratio); ++ else ++ outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; ++ ++ ret = init_processing_chain(ctx, outlink->w, outlink->h); ++ if (ret < 0) ++ return ret; ++ ++ cuda_ctx = s->cu_ctx = s->hwctx->cuda_ctx; ++ cu = s->hwctx->internal->cuda_dl; ++ ++ ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); ++ if (ret < 0) ++ return ret; ++ ++ ret = ff_cuda_load_module(ctx, s->hwctx, &s->cu_module, ++ ff_vf_transpose_cuda_ptx_data, ff_vf_transpose_cuda_ptx_len); ++ if (ret < 0) { ++ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); ++ return ret; ++ } ++ ++ ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, ++ s->cu_module, "Transpose_Cuda_uchar")); ++ if (ret < 0) { ++ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); ++ return ret; ++ } ++ ++ ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, ++ s->cu_module, "Transpose_Cuda_ushort")); ++ if (ret < 0) { ++ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); ++ return ret; ++ } ++ ++ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); ++ ++ av_log(ctx, AV_LOG_VERBOSE, ++ "w:%d h:%d dir:%d -> w:%d h:%d\n", ++ inlink->w, inlink->h, s->dir, outlink->w, outlink->h); ++ return 0; ++} ++ ++static int call_transpose_kernel(AVFilterContext *ctx, ++ int pix_size, ++ int pix_step, ++ int pix_offset, ++ CUdeviceptr dst_data, ++ int dst_width, ++ int dst_height, ++ int dst_pitch, ++ CUdeviceptr src_data, ++ int src_width, ++ int src_height, ++ int src_pitch) ++{ ++ TransposeCUDAContext *s = ctx->priv; ++ CudaFunctions *cu = s->hwctx->internal->cuda_dl; ++ ++ void* kernel_args[] = { ++ &dst_data, &dst_width, &dst_height, &dst_pitch, ++ &src_data, &src_width, &src_height, &src_pitch, ++ &pix_step, &pix_offset, &s->dir, ++ }; ++ ++ return CHECK_CU(cu->cuLaunchKernel(pix_size == 1 ? s->cu_func_uchar : s->cu_func_ushort, ++ DIV_UP(dst_width, BLOCK_X), DIV_UP(dst_height, BLOCK_Y), 1, ++ BLOCK_X, BLOCK_Y, 1, 0, s->cu_stream, kernel_args, NULL)); ++} ++ ++static int cudatranspose_rotate(AVFilterContext *ctx, ++ AVFrame *out, AVFrame *in) ++{ ++ TransposeCUDAContext *s = ctx->priv; ++ int pix_size, pix_step, pix_offset; ++ int c, p, ret; ++ ++ for (c = 0; c < s->pix_desc->nb_components; c++) { ++ p = s->pix_desc->comp[c].plane; ++ if (!out->data[p] || !in->data[p]) ++ return AVERROR(ENOMEM); ++ ++ pix_size = (s->pix_desc->comp[c].depth + s->pix_desc->comp[c].shift) / 8; ++ if (!(pix_size == 1 || pix_size == 2)) ++ return AVERROR(ENOSYS); ++ ++ pix_step = s->pix_desc->comp[c].step / pix_size; ++ pix_offset = s->pix_desc->comp[c].offset / pix_size; ++ ++ ret = call_transpose_kernel(ctx, pix_size, pix_step, pix_offset, ++ (CUdeviceptr)out->data[p], ++ AV_CEIL_RSHIFT(out->width, p ? s->pix_desc->log2_chroma_w : 0), ++ AV_CEIL_RSHIFT(out->height, p ? s->pix_desc->log2_chroma_h : 0), ++ out->linesize[p] / pix_size, ++ (CUdeviceptr)in->data[p], ++ AV_CEIL_RSHIFT(in->width, p ? s->pix_desc->log2_chroma_w : 0), ++ AV_CEIL_RSHIFT(in->height, p ? s->pix_desc->log2_chroma_h : 0), ++ in->linesize[p] / pix_size); ++ if (ret < 0) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int cudatranspose_filter_frame_internal(AVFilterContext *ctx, ++ AVFrame *out, AVFrame *in) ++{ ++ TransposeCUDAContext *s = ctx->priv; ++ AVFilterLink *outlink = ctx->outputs[0]; ++ AVFrame *src = in; ++ int ret; ++ ++ ret = cudatranspose_rotate(ctx, s->frame, src); ++ if (ret < 0) ++ return ret; ++ ++ src = s->frame; ++ ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0); ++ if (ret < 0) ++ return ret; ++ ++ av_frame_move_ref(out, s->frame); ++ av_frame_move_ref(s->frame, s->tmp_frame); ++ ++ s->frame->width = outlink->w; ++ s->frame->height = outlink->h; ++ ++ ret = av_frame_copy_props(out, in); ++ if (ret < 0) ++ return ret; ++ ++ return 0; ++} ++ ++static int cudatranspose_filter_frame(AVFilterLink *link, AVFrame *in) ++{ ++ AVFilterContext *ctx = link->dst; ++ TransposeCUDAContext *s = ctx->priv; ++ AVFilterLink *outlink = ctx->outputs[0]; ++ CudaFunctions *cu; ++ AVFrame *out = NULL; ++ CUcontext dummy; ++ int ret = 0; ++ ++ if (s->passthrough) ++ return ff_filter_frame(outlink, in); ++ ++ out = av_frame_alloc(); ++ if (!out) { ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ cu = s->hwctx->internal->cuda_dl; ++ ++ ret = CHECK_CU(cu->cuCtxPushCurrent(s->cu_ctx)); ++ if (ret < 0) ++ goto fail; ++ ++ ret = cudatranspose_filter_frame_internal(ctx, out, in); ++ ++ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); ++ if (ret < 0) ++ goto fail; ++ ++ av_frame_free(&in); ++ ++ return ff_filter_frame(outlink, out); ++ ++fail: ++ av_frame_free(&in); ++ av_frame_free(&out); ++ return ret; ++} ++ ++static AVFrame *cudatranspose_get_video_buffer(AVFilterLink *inlink, int w, int h) ++{ ++ TransposeCUDAContext *s = inlink->dst->priv; ++ ++ return s->passthrough ? ++ ff_null_get_video_buffer (inlink, w, h) : ++ ff_default_get_video_buffer(inlink, w, h); ++} ++ ++#define OFFSET(x) offsetof(TransposeCUDAContext, x) ++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) ++ ++static const AVOption cudatranspose_options[] = { ++ { "dir", "set transpose direction", OFFSET(dir), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 6, FLAGS, .unit = "dir" }, ++ { "cclock_flip", "rotate counter-clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 0, FLAGS, .unit = "dir" }, ++ { "clock", "rotate clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK }, 0, 0, FLAGS, .unit = "dir" }, ++ { "cclock", "rotate counter-clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK }, 0, 0, FLAGS, .unit = "dir" }, ++ { "clock_flip", "rotate clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP }, 0, 0, FLAGS, .unit = "dir" }, ++ { "reversal", "rotate by half-turn", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_REVERSAL }, 0, 0, FLAGS, .unit = "dir" }, ++ { "hflip", "flip horizontally", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_HFLIP }, 0, 0, FLAGS, .unit = "dir" }, ++ { "vflip", "flip vertically", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_VFLIP }, 0, 0, FLAGS, .unit = "dir" }, ++ ++ { "passthrough", "do not apply transposition if the input matches the specified geometry", OFFSET(passthrough), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_PT_TYPE_NONE }, 0, 2, FLAGS, .unit = "passthrough" }, ++ { "none", "always apply transposition", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_PT_TYPE_NONE }, 0, 0, FLAGS, .unit = "passthrough" }, ++ { "landscape", "preserve landscape geometry", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_PT_TYPE_LANDSCAPE }, 0, 0, FLAGS, .unit = "passthrough" }, ++ { "portrait", "preserve portrait geometry", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_PT_TYPE_PORTRAIT }, 0, 0, FLAGS, .unit = "passthrough" }, ++ ++ { NULL }, ++}; ++ ++AVFILTER_DEFINE_CLASS(cudatranspose); ++ ++static const AVFilterPad cudatranspose_inputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .filter_frame = cudatranspose_filter_frame, ++ .get_buffer.video = cudatranspose_get_video_buffer, ++ }, ++}; ++ ++static const AVFilterPad cudatranspose_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = cudatranspose_config_props, ++ }, ++}; ++ ++const AVFilter ff_vf_transpose_cuda = { ++ .name = "transpose_cuda", ++ .description = NULL_IF_CONFIG_SMALL("Transpose input video using CUDA"), ++ .init = cudatranspose_init, ++ .uninit = cudatranspose_uninit, ++ .priv_size = sizeof(TransposeCUDAContext), ++ .priv_class = &cudatranspose_class, ++ FILTER_INPUTS(cudatranspose_inputs), ++ FILTER_OUTPUTS(cudatranspose_outputs), ++ FILTER_SINGLE_PIXFMT(AV_PIX_FMT_CUDA), ++ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, ++}; +Index: FFmpeg/libavfilter/vf_transpose_cuda.cu +=================================================================== +--- /dev/null ++++ libavfilter/vf_transpose_cuda.cu +@@ -0,0 +1,61 @@ ++/* ++ * Copyright (C) 2024 NyanMisaka ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++template ++__inline__ __device__ void transpose_func( ++ T* dst, int dst_width, int dst_height, int dst_pitch, ++ T* src, int src_width, int src_height, int src_pitch, ++ int pix_step, int pix_offset, int dir) ++{ ++ int xo = blockIdx.x * blockDim.x + threadIdx.x; ++ int yo = blockIdx.y * blockDim.y + threadIdx.y; ++ if (xo >= dst_width || yo >= dst_height) ++ return; ++ ++ int xi = (dir < 4) ? ((dir & 2) ? (dst_height - 1 - yo) : yo) ++ : ((dir == 6) ? xo : (dst_width - 1 - xo)); ++ int yi = (dir < 4) ? ((dir & 1) ? (dst_width - 1 - xo) : xo) ++ : ((dir == 5) ? yo : (dst_height - 1 - yo)); ++ if (xi >= src_width || yi >= src_height) ++ return; ++ ++ int dst_pos = xo*pix_step + yo*dst_pitch + pix_offset; ++ int src_pos = xi*pix_step + yi*src_pitch + pix_offset; ++ dst[dst_pos] = src[src_pos]; ++} ++ ++extern "C" { ++ ++#define TRANSPOSE_VARIANT(NAME, TYPE) \ ++__global__ void Transpose_Cuda_ ## NAME( \ ++ TYPE* dst, int dst_width, int dst_height, int dst_pitch, \ ++ TYPE* src, int src_width, int src_height, int src_pitch, \ ++ int pix_step, int pix_offset, int dir) \ ++{ \ ++ transpose_func( \ ++ dst, dst_width, dst_height, dst_pitch, \ ++ src, src_width, src_height, src_pitch, \ ++ pix_step, pix_offset, dir); \ ++} ++ ++TRANSPOSE_VARIANT(uchar, unsigned char) ++TRANSPOSE_VARIANT(ushort, unsigned short) ++ ++} /* extern "C" */ diff --git a/cross/ffmpeg7/patches/1057-jellyfin-0057-add-flip-feat-to-opencl-transpose-filter.patch b/cross/ffmpeg7/patches/1057-jellyfin-0057-add-flip-feat-to-opencl-transpose-filter.patch new file mode 100644 index 00000000000..58d0f0616cc --- /dev/null +++ b/cross/ffmpeg7/patches/1057-jellyfin-0057-add-flip-feat-to-opencl-transpose-filter.patch @@ -0,0 +1,73 @@ +Index: FFmpeg/libavfilter/opencl/transpose.cl +=================================================================== +--- libavfilter/opencl/transpose.cl ++++ libavfilter/opencl/transpose.cl +@@ -26,8 +26,10 @@ kernel void transpose(__write_only image + int x = get_global_id(0); + int y = get_global_id(1); + +- int xin = (dir & 2) ? (size.y - 1 - y) : y; +- int yin = (dir & 1) ? (size.x - 1 - x) : x; ++ int xin = (dir < 4) ? ((dir & 2) ? (size.y - 1 - y) : y) ++ : ((dir == 6) ? x : (size.x - 1 - x)); ++ int yin = (dir < 4) ? ((dir & 1) ? (size.x - 1 - x) : x) ++ : ((dir == 5) ? y : (size.y - 1 - y)); + float4 data = read_imagef(src, sampler, (int2)(xin, yin)); + + if (x < size.x && y < size.y) +Index: FFmpeg/libavfilter/vf_transpose_opencl.c +=================================================================== +--- libavfilter/vf_transpose_opencl.c ++++ libavfilter/vf_transpose_opencl.c +@@ -101,8 +101,20 @@ static int transpose_opencl_config_outpu + return AVERROR(EINVAL); + } + +- s->ocf.output_width = inlink->h; +- s->ocf.output_height = inlink->w; ++ switch (s->dir) { ++ case TRANSPOSE_CCLOCK_FLIP: ++ case TRANSPOSE_CCLOCK: ++ case TRANSPOSE_CLOCK: ++ case TRANSPOSE_CLOCK_FLIP: ++ s->ocf.output_width = inlink->h; ++ s->ocf.output_height = inlink->w; ++ break; ++ default: ++ s->ocf.output_width = inlink->w; ++ s->ocf.output_height = inlink->h; ++ break; ++ } ++ + ret = ff_opencl_filter_config_output(outlink); + if (ret < 0) + return ret; +@@ -114,10 +126,8 @@ static int transpose_opencl_config_outpu + outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; + + av_log(avctx, AV_LOG_VERBOSE, +- "w:%d h:%d dir:%d -> w:%d h:%d rotation:%s vflip:%d\n", +- inlink->w, inlink->h, s->dir, outlink->w, outlink->h, +- s->dir == 1 || s->dir == 3 ? "clockwise" : "counterclockwise", +- s->dir == 0 || s->dir == 3); ++ "w:%d h:%d dir:%d -> w:%d h:%d\n", ++ inlink->w, inlink->h, s->dir, outlink->w, outlink->h); + return 0; + } + +@@ -235,11 +245,14 @@ static av_cold void transpose_opencl_uni + #define OFFSET(x) offsetof(TransposeOpenCLContext, x) + #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) + static const AVOption transpose_opencl_options[] = { +- { "dir", "set transpose direction", OFFSET(dir), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 3, FLAGS, .unit = "dir" }, ++ { "dir", "set transpose direction", OFFSET(dir), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 6, FLAGS, .unit = "dir" }, + { "cclock_flip", "rotate counter-clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, .flags=FLAGS, .unit = "dir" }, + { "clock", "rotate clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK }, .flags=FLAGS, .unit = "dir" }, + { "cclock", "rotate counter-clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK }, .flags=FLAGS, .unit = "dir" }, + { "clock_flip", "rotate clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP }, .flags=FLAGS, .unit = "dir" }, ++ { "reversal", "rotate by half-turn", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_REVERSAL }, .flags=FLAGS, .unit = "dir" }, ++ { "hflip", "flip horizontally", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_HFLIP }, .flags=FLAGS, .unit = "dir" }, ++ { "vflip", "flip vertically", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_VFLIP }, .flags=FLAGS, .unit = "dir" }, + + { "passthrough", "do not apply transposition if the input matches the specified geometry", + OFFSET(passthrough), AV_OPT_TYPE_INT, {.i64=TRANSPOSE_PT_TYPE_NONE}, 0, INT_MAX, FLAGS, .unit = "passthrough" }, diff --git a/cross/ffmpeg7/patches/1058-jellyfin-0058-add-customized-surf-align-for-vaapi-encoder.patch b/cross/ffmpeg7/patches/1058-jellyfin-0058-add-customized-surf-align-for-vaapi-encoder.patch new file mode 100644 index 00000000000..03a8da493f7 --- /dev/null +++ b/cross/ffmpeg7/patches/1058-jellyfin-0058-add-customized-surf-align-for-vaapi-encoder.patch @@ -0,0 +1,59 @@ +Index: FFmpeg/libavcodec/vaapi_encode.c +=================================================================== +--- libavcodec/vaapi_encode.c ++++ libavcodec/vaapi_encode.c +@@ -2728,6 +2728,17 @@ static av_cold int vaapi_encode_create_r + av_log(avctx, AV_LOG_DEBUG, "Using %s as format of " + "reconstructed frames.\n", av_get_pix_fmt_name(recon_format)); + ++ if (constraints->width_align || constraints->height_align) { ++ if (constraints->width_align) { ++ ctx->surface_width = FFALIGN(avctx->width, constraints->width_align); ++ } ++ if (constraints->height_align) { ++ ctx->surface_height = FFALIGN(avctx->height, constraints->height_align); ++ } ++ av_log(avctx, AV_LOG_VERBOSE, "Using customized alignment size " ++ "[%dx%d].\n", constraints->width_align, constraints->height_align); ++ } ++ + if (ctx->surface_width < constraints->min_width || + ctx->surface_height < constraints->min_height || + ctx->surface_width > constraints->max_width || +Index: FFmpeg/libavutil/hwcontext.h +=================================================================== +--- libavutil/hwcontext.h ++++ libavutil/hwcontext.h +@@ -467,6 +467,13 @@ typedef struct AVHWFramesConstraints { + */ + int max_width; + int max_height; ++ ++ /** ++ * The frame width/height alignment when available ++ * (Zero is not applied, use the default value.) ++ */ ++ int width_align; ++ int height_align; + } AVHWFramesConstraints; + + /** +Index: FFmpeg/libavutil/hwcontext_vaapi.c +=================================================================== +--- libavutil/hwcontext_vaapi.c ++++ libavutil/hwcontext_vaapi.c +@@ -297,6 +297,14 @@ static int vaapi_frames_get_constraints( + case VASurfaceAttribMaxHeight: + constraints->max_height = attr_list[i].value.value.i; + break; ++#if VA_CHECK_VERSION(1, 21, 0) ++ case VASurfaceAttribAlignmentSize: ++ if (attr_list[i].value.value.i) { ++ constraints->width_align = 1 << (attr_list[i].value.value.i & 0xf); ++ constraints->height_align = 1 << ((attr_list[i].value.value.i & 0xf0) >> 4); ++ } ++ break; ++#endif + } + } + if (pix_fmt_count == 0) { diff --git a/cross/ffmpeg7/patches/1059-jellyfin-0059-opus-allow-5point1-side-inputs.patch b/cross/ffmpeg7/patches/1059-jellyfin-0059-opus-allow-5point1-side-inputs.patch new file mode 100644 index 00000000000..6bdd4c4afba --- /dev/null +++ b/cross/ffmpeg7/patches/1059-jellyfin-0059-opus-allow-5point1-side-inputs.patch @@ -0,0 +1,15 @@ +Index: FFmpeg/libavcodec/libopusenc.c +=================================================================== +--- libavcodec/libopusenc.c ++++ libavcodec/libopusenc.c +@@ -196,6 +196,10 @@ static int libopus_check_vorbis_layout(A + av_log(avctx, AV_LOG_WARNING, + "No channel layout specified. Opus encoder will use Vorbis " + "channel layout for %d channels.\n", avctx->ch_layout.nb_channels); ++ } else if (av_channel_layout_compare(&avctx->ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_5POINT1) == 0) { ++ av_log(avctx, AV_LOG_WARNING, ++ "Input channel layout 5.1(side) detected, side channels will be mapped to back channels.\n"); ++ return 0; + } else if (av_channel_layout_compare(&avctx->ch_layout, &ff_vorbis_ch_layouts[avctx->ch_layout.nb_channels - 1])) { + char name[32]; + diff --git a/cross/ffmpeg7/patches/1060-jellyfin-0060-add-simd-optimized-tonemapx-filter.patch b/cross/ffmpeg7/patches/1060-jellyfin-0060-add-simd-optimized-tonemapx-filter.patch new file mode 100644 index 00000000000..c5a63638927 --- /dev/null +++ b/cross/ffmpeg7/patches/1060-jellyfin-0060-add-simd-optimized-tonemapx-filter.patch @@ -0,0 +1,9121 @@ +Index: FFmpeg/configure +=================================================================== +--- configure ++++ configure +@@ -2314,6 +2314,9 @@ HEADERS_LIST=" + + INTRINSICS_LIST=" + intrinsics_neon ++ intrinsics_sse42 ++ intrinsics_fma3 ++ intrinsics_avx2 + " + + MATH_FUNCS=" +@@ -2797,6 +2800,10 @@ avx2_deps="avx" + avx512_deps="avx2" + avx512icl_deps="avx512" + ++intrinsics_sse42_deps="sse42" ++intrinsics_fma3_deps="fma3" ++intrinsics_avx2_deps="avx2" ++ + mmx_external_deps="x86asm" + mmx_inline_deps="inline_asm x86" + mmx_suggest="mmx_external mmx_inline" +@@ -3934,6 +3941,7 @@ tinterlace_filter_deps="gpl" + tinterlace_merge_test_deps="tinterlace_filter" + tinterlace_pad_test_deps="tinterlace_filter" + tonemap_filter_deps="const_nan" ++tonemapx_filter_deps="const_nan" + tonemap_vaapi_filter_deps="vaapi VAProcFilterParameterBufferHDRToneMapping" + tonemap_videotoolbox_filter_deps="metal corevideo videotoolbox const_nan" + tonemap_opencl_filter_deps="opencl const_nan" +@@ -6464,6 +6472,19 @@ fi + + check_cc intrinsics_neon arm_neon.h "int16x8_t test = vdupq_n_s16(0)" + ++disable intrinsics_sse42 && test_cc -msse4.2 < ++int main(void) { __m128i t = _mm_cmpgt_epi64(_mm_setzero_si128(), _mm_setzero_si128()); return 0; } ++EOF ++disable intrinsics_fma3 && test_cc -mfma < ++int main(void) { __m256 t = _mm256_fmadd_ps(_mm256_setzero_ps(), _mm256_setzero_ps(), _mm256_setzero_ps()); return 0; } ++EOF ++disable intrinsics_avx2 && test_cc -mavx2 < ++int main(void) { __m256i t = _mm256_abs_epi32(_mm256_setzero_si256()); return 0; } ++EOF ++ + check_ldflags -Wl,--as-needed + check_ldflags -Wl,-z,noexecstack + +@@ -7586,6 +7607,16 @@ elif enabled gcc; then + check_cflags -mpreferred-stack-boundary=4 + ;; + esac ++ elif enabled x86_64; then ++ case $target_os in ++ mingw*|win*|cygwin*) ++ # GCC on Windows cannot guarantee a 32-byte aligned stack ++ # Such alignment is required by certain AVX instructions ++ # Force GCC to use the unaligned equivalents instead ++ check_cflags -Wa,-muse-unaligned-vector-move ++ check_cxxflags -Wa,-muse-unaligned-vector-move ++ ;; ++ esac + fi + elif enabled llvm_gcc; then + check_cflags -mllvm -stack-alignment=16 +Index: FFmpeg/libavfilter/Makefile +=================================================================== +--- libavfilter/Makefile ++++ libavfilter/Makefile +@@ -530,6 +530,7 @@ OBJS-$(CONFIG_TMEDIAN_FILTER) + OBJS-$(CONFIG_TMIDEQUALIZER_FILTER) += vf_tmidequalizer.o + OBJS-$(CONFIG_TMIX_FILTER) += vf_mix.o framesync.o + OBJS-$(CONFIG_TONEMAP_FILTER) += vf_tonemap.o ++OBJS-$(CONFIG_TONEMAPX_FILTER) += vf_tonemapx.o + OBJS-$(CONFIG_TONEMAP_CUDA_FILTER) += vf_tonemap_cuda.o cuda/tonemap.ptx.o \ + cuda/host_util.o + OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o opencl.o \ +Index: FFmpeg/libavfilter/aarch64/Makefile +=================================================================== +--- libavfilter/aarch64/Makefile ++++ libavfilter/aarch64/Makefile +@@ -1,5 +1,6 @@ + OBJS-$(CONFIG_BWDIF_FILTER) += aarch64/vf_bwdif_init_aarch64.o + OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_init.o ++OBJS-$(CONFIG_TONEMAPX_FILTER) += aarch64/vf_tonemapx_intrin_neon.o + + NEON-OBJS-$(CONFIG_BWDIF_FILTER) += aarch64/vf_bwdif_neon.o + NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_neon.o +Index: FFmpeg/libavfilter/aarch64/vf_tonemapx_intrin_neon.c +=================================================================== +--- /dev/null ++++ libavfilter/aarch64/vf_tonemapx_intrin_neon.c +@@ -0,0 +1,2149 @@ ++/* ++ * Copyright (c) 2024 Gnattu OC ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "vf_tonemapx_intrin_neon.h" ++ ++#ifdef ENABLE_TONEMAPX_NEON_INTRINSICS ++# include ++#endif // ENABLE_TONEMAPX_NEON_INTRINSICS ++ ++#ifdef ENABLE_TONEMAPX_NEON_INTRINSICS ++inline static float32x4_t mix_float32x4(float32x4_t x, float32x4_t y, float32x4_t a) ++{ ++ float32x4_t n = vsubq_f32(y, x); ++ n = vfmaq_f32(x, n, a); ++ return n; ++} ++ ++static inline float reshape_poly(float s, float32x4_t coeffs) ++{ ++ float32x4_t ps = vdupq_n_f32(0.0f); ++ ps = vsetq_lane_f32(1.0f, ps, 0); ++ ps = vsetq_lane_f32(s, ps, 1); ++ ps = vsetq_lane_f32(s * s, ps, 2); ++ ps = vmulq_f32(ps, coeffs); ++ return vaddvq_f32(ps); ++} ++ ++inline static float reshape_mmr(float32x4_t sig, float32x4_t coeffs, const float* mmr, ++ int mmr_single, int min_order, int max_order) ++{ ++ int mmr_idx = mmr_single ? 0 : (int)vgetq_lane_f32(coeffs, 1); ++ int order = (int)vgetq_lane_f32(coeffs, 3); ++ float s = vgetq_lane_f32(coeffs, 0); ++ float32x4_t mmr_coeffs, ps; ++ float32x4_t sigX01 = vmulq_laneq_f32(sig, sig, 1); // {sig[0]*sig[1], sig[1]*sig[1], sig[2]*sig[1], sig[3]*sig[1]} ++ float32x4_t sigX02 = vmulq_laneq_f32(sig, sig, 2); // {sig[0]*sig[2], sig[1]*sig[2], sig[2]*sig[2], sig[3]*sig[2]} ++ float32x4_t sigX12 = vmulq_laneq_f32(sigX01, sig, 2); // {sig[0]*sig[1]*sig[2], sig[1]*sig[1]*sig[2], sig[2]*sig[1]*sig[2], sig[3]*sig[1]*sig[2]} ++ float32x4_t sigX = sigX01; // sig[0]*sig[1] now positioned at 0 ++ sigX = vsetq_lane_f32(vgetq_lane_f32(sigX02, 0), sigX, 1); // sig[0]*sig[2] at 1 ++ sigX = vsetq_lane_f32(vgetq_lane_f32(sigX02, 1), sigX, 2); // sig[1]*sig[2] at 2 ++ sigX = vsetq_lane_f32(vgetq_lane_f32(sigX12, 0), sigX, 3); // sig[0]*sig[1]*sig[2] at 3 ++ ++ // dot first order ++ mmr_coeffs = vld1q_f32(&mmr[mmr_idx + 0*4]); ++ ps = vmulq_f32(sig, mmr_coeffs); ++ s += vaddvq_f32(ps); ++ mmr_coeffs = vld1q_f32(&mmr[mmr_idx + 1*4]); ++ ps = vmulq_f32(sigX, mmr_coeffs); ++ s += vaddvq_f32(ps); ++ ++ if (max_order >= 2 && (min_order >= 2 || order >= 2)) { ++ float32x4_t sig2 = vmulq_f32(sig, sig); ++ float32x4_t sigX2 = vmulq_f32(sigX, sigX); ++ ++ mmr_coeffs = vld1q_f32(&mmr[mmr_idx + 2*4]); ++ ps = vmulq_f32(sig2, mmr_coeffs); ++ s += vaddvq_f32(ps); ++ mmr_coeffs = vld1q_f32(&mmr[mmr_idx + 3*4]); ++ ps = vmulq_f32(sigX2, mmr_coeffs); ++ s += vaddvq_f32(ps); ++ ++ if (max_order == 3 && (min_order == 3 || order >= 3)) { ++ float32x4_t sig3 = vmulq_f32(sig2, sig); ++ float32x4_t sigX3 = vmulq_f32(sigX2, sigX); ++ ++ mmr_coeffs = vld1q_f32(&mmr[mmr_idx + 4*4]); ++ ps = vmulq_f32(sig3, mmr_coeffs); ++ s += vaddvq_f32(ps); ++ mmr_coeffs = vld1q_f32(&mmr[mmr_idx + 5*4]); ++ ps = vmulq_f32(sigX3, mmr_coeffs); ++ s += vaddvq_f32(ps); ++ } ++ } ++ ++ return s; ++} ++ ++#define CLAMP(a, b, c) (FFMIN(FFMAX((a), (b)), (c))) ++inline static float32x4_t reshape_dovi_iptpqc2(float32x4_t sig, const TonemapIntParams *ctx) ++{ ++ int has_mmr_poly; ++ float s; ++ ++ float *src_dovi_params = ctx->dovi_pbuf; ++ float *src_dovi_pivots = ctx->dovi_pbuf + 24; ++ float *src_dovi_coeffs = ctx->dovi_pbuf + 48; //float4* ++ float *src_dovi_mmr = ctx->dovi_pbuf + 144; //float4* ++ ++ float* dovi_params_i = src_dovi_params + 0*8; ++ float* dovi_pivots_i = src_dovi_pivots + 0*8; ++ float* dovi_coeffs_i = src_dovi_coeffs + 0 * 8 * 4; //float4* ++ float* dovi_mmr_i = src_dovi_mmr + 0 * 48 * 4; //float4* ++ int dovi_num_pivots_i = dovi_params_i[0]; ++ int dovi_has_mmr_i = dovi_params_i[1]; ++ int dovi_has_poly_i = dovi_params_i[2]; ++ int dovi_mmr_single_i = dovi_params_i[3]; ++ int dovi_min_order_i = dovi_params_i[4]; ++ int dovi_max_order_i = dovi_params_i[5]; ++ float dovi_lo_i = dovi_params_i[6]; ++ float dovi_hi_i = dovi_params_i[7]; ++ ++ float* dovi_params_p = src_dovi_params + 1*8; ++ float* dovi_coeffs_p = src_dovi_coeffs + 1*8 * 4; //float4* ++ float* dovi_mmr_p = src_dovi_mmr + 1*48 * 4; //float4* ++ int dovi_has_mmr_p = dovi_params_p[1]; ++ int dovi_has_poly_p = dovi_params_p[2]; ++ int dovi_mmr_single_p = dovi_params_p[3]; ++ int dovi_min_order_p = dovi_params_p[4]; ++ int dovi_max_order_p = dovi_params_p[5]; ++ float dovi_lo_p = dovi_params_p[6]; ++ float dovi_hi_p = dovi_params_p[7]; ++ ++ float* dovi_params_t = src_dovi_params + 2*8; ++ float* dovi_coeffs_t = src_dovi_coeffs + 2*8 * 4; //float4* ++ float* dovi_mmr_t = src_dovi_mmr + 2*48 * 4; //float4* ++ int dovi_has_mmr_t = dovi_params_t[1]; ++ int dovi_has_poly_t = dovi_params_t[2]; ++ int dovi_mmr_single_t = dovi_params_t[3]; ++ int dovi_min_order_t = dovi_params_t[4]; ++ int dovi_max_order_t = dovi_params_t[5]; ++ float dovi_lo_t = dovi_params_t[6]; ++ float dovi_hi_t = dovi_params_t[7]; ++ ++ float32x4_t coeffs, result; ++ ++ // reshape I ++ s = vgetq_lane_f32(sig, 0); ++ result = sig; ++ if (dovi_num_pivots_i > 2) { ++ float32x4_t m01 = mix_float32x4(vld1q_f32(dovi_coeffs_i), vld1q_f32(dovi_coeffs_i + 4), vdupq_n_f32(s >= dovi_pivots_i[0])); ++ float32x4_t m23 = mix_float32x4(vld1q_f32(dovi_coeffs_i + 2*4), vld1q_f32(dovi_coeffs_i + 3*4), vdupq_n_f32(s >= dovi_pivots_i[2])); ++ float32x4_t m0123 = mix_float32x4(m01, m23, vdupq_n_f32(s >= dovi_pivots_i[1])); ++ float32x4_t m45 = mix_float32x4(vld1q_f32(dovi_coeffs_i + 4*4), vld1q_f32(dovi_coeffs_i + 5*4), vdupq_n_f32(s >= dovi_pivots_i[4])); ++ float32x4_t m67 = mix_float32x4(vld1q_f32(dovi_coeffs_i + 6*4), vld1q_f32(dovi_coeffs_i + 7*4), vdupq_n_f32(s >= dovi_pivots_i[6])); ++ float32x4_t m4567 = mix_float32x4(m45, m67, vdupq_n_f32(s >= dovi_pivots_i[5])); ++ coeffs = mix_float32x4(m0123, m4567, vdupq_n_f32(s >= dovi_pivots_i[3])); ++ } else { ++ coeffs = vld1q_f32(dovi_coeffs_i); ++ } ++ ++ has_mmr_poly = dovi_has_mmr_i && dovi_has_poly_i; ++ ++ if ((has_mmr_poly && vgetq_lane_f32(coeffs, 3) == 0.0f) || (!has_mmr_poly && dovi_has_poly_i)) ++ s = reshape_poly(s, coeffs); ++ else ++ s = reshape_mmr(result, coeffs, dovi_mmr_i, ++ dovi_mmr_single_i, dovi_min_order_i, dovi_max_order_i); ++ ++ result = vsetq_lane_f32(CLAMP(s, dovi_lo_i, dovi_hi_i), result, 0); ++ ++ // reshape P ++ s = vgetq_lane_f32(sig, 1); ++ coeffs = vld1q_f32(dovi_coeffs_p); ++ has_mmr_poly = dovi_has_mmr_p && dovi_has_poly_p; ++ ++ if ((has_mmr_poly && vgetq_lane_f32(coeffs, 3) == 0.0f) || (!has_mmr_poly && dovi_has_poly_p)) ++ s = reshape_poly(s, coeffs); ++ else ++ s = reshape_mmr(result, coeffs, dovi_mmr_p, ++ dovi_mmr_single_p, dovi_min_order_p, dovi_max_order_p); ++ ++ result = vsetq_lane_f32(CLAMP(s, dovi_lo_p, dovi_hi_p), result, 1); ++ ++ // reshape T ++ s = vgetq_lane_f32(sig, 2); ++ coeffs = vld1q_f32(dovi_coeffs_t); ++ has_mmr_poly = dovi_has_mmr_t && dovi_has_poly_t; ++ ++ if ((has_mmr_poly && vgetq_lane_f32(coeffs, 3) == 0.0f) || (!has_mmr_poly && dovi_has_poly_t)) ++ s = reshape_poly(s, coeffs); ++ else ++ s = reshape_mmr(result, coeffs, dovi_mmr_t, ++ dovi_mmr_single_t, dovi_min_order_t, dovi_max_order_t); ++ ++ result = vsetq_lane_f32(CLAMP(s, dovi_lo_t, dovi_hi_t), result, 2); ++ ++ return result; ++} ++ ++inline static void ycc2rgbx4(float32x4_t* dy, float32x4_t* dcb, float32x4_t* dcr, ++ float32x4_t y, float32x4_t cb, float32x4_t cr, ++ const double nonlinear[3][3], const float ycc_offset[3]) ++{ ++ *dy = vmulq_n_f32(y, (float)nonlinear[0][0]); ++ *dy = vfmaq_n_f32(*dy, cb, (float)nonlinear[0][1]); ++ *dy = vfmaq_n_f32(*dy, cr, (float)nonlinear[0][2]); ++ *dy = vsubq_f32(*dy, vdupq_n_f32(ycc_offset[0])); ++ ++ *dcb = vmulq_n_f32(y, (float)nonlinear[1][0]); ++ *dcb = vfmaq_n_f32(*dcb, cb, (float)nonlinear[1][1]); ++ *dcb = vfmaq_n_f32(*dcb, cr, (float)nonlinear[1][2]); ++ *dcb = vsubq_f32(*dcb, vdupq_n_f32(ycc_offset[1])); ++ ++ *dcr = vmulq_n_f32(y, (float)nonlinear[2][0]); ++ *dcr = vfmaq_n_f32(*dcr, cb, (float)nonlinear[2][1]); ++ *dcr = vfmaq_n_f32(*dcr, cr, (float)nonlinear[2][2]); ++ *dcr = vsubq_f32(*dcr, vdupq_n_f32(ycc_offset[2])); ++} ++ ++inline static void lms2rgbx4(float32x4_t* dl, float32x4_t* dm, float32x4_t* ds, ++ float32x4_t l, float32x4_t m, float32x4_t s, ++ const double lms2rgb_matrix[3][3]) ++{ ++ *dl = vmulq_n_f32(l, (float)lms2rgb_matrix[0][0]); ++ *dl = vfmaq_n_f32(*dl, m, (float)lms2rgb_matrix[0][1]); ++ *dl = vfmaq_n_f32(*dl, s, (float)lms2rgb_matrix[0][2]); ++ ++ *dm = vmulq_n_f32(l, (float)lms2rgb_matrix[1][0]); ++ *dm = vfmaq_n_f32(*dm, m, (float)lms2rgb_matrix[1][1]); ++ *dm = vfmaq_n_f32(*dm, s, (float)lms2rgb_matrix[1][2]); ++ ++ *ds = vmulq_n_f32(l, (float)lms2rgb_matrix[2][0]); ++ *ds = vfmaq_n_f32(*ds, m, (float)lms2rgb_matrix[2][1]); ++ *ds = vfmaq_n_f32(*ds, s, (float)lms2rgb_matrix[2][2]); ++} ++ ++static inline void tonemap_int16x8_neon(uint16x8_t r_in, uint16x8_t g_in, uint16x8_t b_in, ++ int16_t *r_out, int16_t *g_out, int16_t *b_out, ++ float *lin_lut, float *tonemap_lut, uint16_t *delin_lut, ++ const AVLumaCoefficients *coeffs, ++ const AVLumaCoefficients *ocoeffs, double desat, ++ double (*rgb2rgb)[3][3], ++ int rgb2rgb_passthrough) ++{ ++ int16x8_t sig8; ++ float32x4_t mapvalx4a; ++ float32x4_t mapvalx4b; ++ float32x4_t r_linx4a; ++ float32x4_t r_linx4b; ++ float32x4_t g_linx4a; ++ float32x4_t g_linx4b; ++ float32x4_t b_linx4a; ++ float32x4_t b_linx4b; ++ float32x4_t offset = vdupq_n_f32(0.5f); ++ int32x4_t output_upper_bound = vdupq_n_s32(32767); ++ int32x4_t zerox4 = vdupq_n_s32(0); ++ int16x8_t input_lut_offset = vdupq_n_s16(2048); ++ int16x8_t input_upper_bound = vdupq_n_s16(32767); ++ int16x8_t r, g, b; ++ int32x4_t rx4a, gx4a, bx4a, rx4b, gx4b, bx4b; ++ ++ float mapval4a[4], mapval4b[4], r_lin4a[4], r_lin4b[4], g_lin4a[4], g_lin4b[4], b_lin4a[4], b_lin4b[4]; ++ ++ r = vreinterpretq_s16_u16(r_in); ++ g = vreinterpretq_s16_u16(g_in); ++ b = vreinterpretq_s16_u16(b_in); ++ ++ sig8 = vmaxq_s16(r, vmaxq_s16(g, b)); ++ sig8 = vaddq_s16(sig8, input_lut_offset); ++ sig8 = vminq_s16(sig8, input_upper_bound); ++ sig8 = vmaxq_s16(sig8, vreinterpretq_s16_s32(zerox4)); ++ ++ r = vaddq_s16(r, input_lut_offset); ++ r = vminq_s16(r, input_upper_bound); ++ r = vmaxq_s16(r, vreinterpretq_s16_s32(zerox4)); ++ g = vaddq_s16(g, input_lut_offset); ++ g = vminq_s16(g, input_upper_bound); ++ g = vmaxq_s16(g, vreinterpretq_s16_s32(zerox4)); ++ b = vaddq_s16(b, input_lut_offset); ++ b = vminq_s16(b, input_upper_bound); ++ b = vmaxq_s16(b, vreinterpretq_s16_s32(zerox4)); ++ ++ // Cannot use loop here as the lane has to be compile-time constant ++#define LOAD_LUT(i) mapval4a[i] = tonemap_lut[vget_lane_s16(vget_low_s16(sig8), i)]; \ ++mapval4b[i] = tonemap_lut[vget_lane_s16(vget_high_s16(sig8), i)]; \ ++r_lin4a[i] = lin_lut[vget_lane_s16(vget_low_s16(r), i)]; \ ++r_lin4b[i] = lin_lut[vget_lane_s16(vget_high_s16(r), i)]; \ ++g_lin4a[i] = lin_lut[vget_lane_s16(vget_low_s16(g), i)]; \ ++g_lin4b[i] = lin_lut[vget_lane_s16(vget_high_s16(g), i)]; \ ++b_lin4a[i] = lin_lut[vget_lane_s16(vget_low_s16(b), i)]; \ ++b_lin4b[i] = lin_lut[vget_lane_s16(vget_high_s16(b), i)]; ++ ++ LOAD_LUT(0) ++ LOAD_LUT(1) ++ LOAD_LUT(2) ++ LOAD_LUT(3) ++ ++#undef LOAD_LUT ++ ++ mapvalx4a = vld1q_f32(mapval4a); ++ mapvalx4b = vld1q_f32(mapval4b); ++ r_linx4a = vld1q_f32(r_lin4a); ++ r_linx4b = vld1q_f32(r_lin4b); ++ g_linx4a = vld1q_f32(g_lin4a); ++ g_linx4b = vld1q_f32(g_lin4b); ++ b_linx4a = vld1q_f32(b_lin4a); ++ b_linx4b = vld1q_f32(b_lin4b); ++ ++ if (!rgb2rgb_passthrough) { ++ r_linx4a = vmulq_n_f32(r_linx4a, (float)(*rgb2rgb)[0][0]); ++ r_linx4a = vfmaq_n_f32(r_linx4a, g_linx4a, (float)(*rgb2rgb)[0][1]); ++ r_linx4a = vfmaq_n_f32(r_linx4a, b_linx4a, (float)(*rgb2rgb)[0][2]); ++ r_linx4b = vmulq_n_f32(r_linx4b, (float)(*rgb2rgb)[0][0]); ++ r_linx4b = vfmaq_n_f32(r_linx4b, g_linx4b, (float)(*rgb2rgb)[0][1]); ++ r_linx4b = vfmaq_n_f32(r_linx4b, b_linx4b, (float)(*rgb2rgb)[0][2]); ++ ++ g_linx4a = vmulq_n_f32(g_linx4a, (float)(*rgb2rgb)[1][1]); ++ g_linx4a = vfmaq_n_f32(g_linx4a, r_linx4a, (float)(*rgb2rgb)[1][0]); ++ g_linx4a = vfmaq_n_f32(g_linx4a, b_linx4a, (float)(*rgb2rgb)[1][2]); ++ g_linx4b = vmulq_n_f32(g_linx4b, (float)(*rgb2rgb)[1][1]); ++ g_linx4b = vfmaq_n_f32(g_linx4b, r_linx4b, (float)(*rgb2rgb)[1][0]); ++ g_linx4b = vfmaq_n_f32(g_linx4b, b_linx4b, (float)(*rgb2rgb)[1][2]); ++ ++ b_linx4a = vmulq_n_f32(b_linx4a, (float)(*rgb2rgb)[2][2]); ++ b_linx4a = vfmaq_n_f32(b_linx4a, r_linx4a, (float)(*rgb2rgb)[2][0]); ++ b_linx4a = vfmaq_n_f32(b_linx4a, g_linx4a, (float)(*rgb2rgb)[2][1]); ++ b_linx4b = vmulq_n_f32(b_linx4b, (float)(*rgb2rgb)[2][2]); ++ b_linx4b = vfmaq_n_f32(b_linx4b, r_linx4b, (float)(*rgb2rgb)[2][0]); ++ b_linx4b = vfmaq_n_f32(b_linx4b, g_linx4b, (float)(*rgb2rgb)[2][1]); ++ } ++ ++ if (desat > 0) { ++ float32x4_t eps_x4 = vdupq_n_f32(FLOAT_EPS); ++ float32x4_t desat4 = vdupq_n_f32((float)desat); ++ float32x4_t luma4 = vdupq_n_f32(0); ++ float32x4_t overbright4; ++ // Group A ++ luma4 = vmlaq_n_f32(luma4, r_linx4a, (float)av_q2d(coeffs->cr)); ++ luma4 = vmlaq_n_f32(luma4, g_linx4a, (float)av_q2d(coeffs->cg)); ++ luma4 = vmlaq_n_f32(luma4, b_linx4a, (float)av_q2d(coeffs->cb)); ++ overbright4 = vdivq_f32(vmaxq_f32(vsubq_f32(luma4, desat4), eps_x4), vmaxq_f32(luma4, eps_x4)); ++ r_linx4a = vmlsq_f32(r_linx4a, r_linx4a, overbright4); ++ r_linx4a = vmlaq_f32(r_linx4a, luma4, overbright4); ++ g_linx4a = vmlsq_f32(g_linx4a, g_linx4a, overbright4); ++ g_linx4a = vmlaq_f32(g_linx4a, luma4, overbright4); ++ b_linx4a = vmlsq_f32(b_linx4a, b_linx4a, overbright4); ++ b_linx4a = vmlaq_f32(b_linx4a, luma4, overbright4); ++ // Group B ++ luma4 = vdupq_n_f32(0); ++ luma4 = vmlaq_n_f32(luma4, r_linx4b, (float)av_q2d(coeffs->cr)); ++ luma4 = vmlaq_n_f32(luma4, g_linx4b, (float)av_q2d(coeffs->cg)); ++ luma4 = vmlaq_n_f32(luma4, b_linx4b, (float)av_q2d(coeffs->cb)); ++ overbright4 = vdivq_f32(vmaxq_f32(vsubq_f32(luma4, desat4), eps_x4), vmaxq_f32(luma4, eps_x4)); ++ r_linx4b = vmlsq_f32(r_linx4b, r_linx4b, overbright4); ++ r_linx4b = vmlaq_f32(r_linx4b, luma4, overbright4); ++ g_linx4b = vmlsq_f32(g_linx4b, g_linx4b, overbright4); ++ g_linx4b = vmlaq_f32(g_linx4b, luma4, overbright4); ++ b_linx4b = vmlsq_f32(b_linx4b, b_linx4b, overbright4); ++ b_linx4b = vmlaq_f32(b_linx4b, luma4, overbright4); ++ } ++ ++ r_linx4a = vmulq_f32(r_linx4a, mapvalx4a); ++ g_linx4a = vmulq_f32(g_linx4a, mapvalx4a); ++ b_linx4a = vmulq_f32(b_linx4a, mapvalx4a); ++ ++ r_linx4b = vmulq_f32(r_linx4b, mapvalx4b); ++ g_linx4b = vmulq_f32(g_linx4b, mapvalx4b); ++ b_linx4b = vmulq_f32(b_linx4b, mapvalx4b); ++ ++ r_linx4a = vmlaq_n_f32(offset, r_linx4a, 32767); ++ r_linx4b = vmlaq_n_f32(offset, r_linx4b, 32767); ++ g_linx4a = vmlaq_n_f32(offset, g_linx4a, 32767); ++ g_linx4b = vmlaq_n_f32(offset, g_linx4b, 32767); ++ b_linx4a = vmlaq_n_f32(offset, b_linx4a, 32767); ++ b_linx4b = vmlaq_n_f32(offset, b_linx4b, 32767); ++ ++ rx4a = vcvtq_s32_f32(r_linx4a); ++ rx4a = vminq_s32(rx4a, output_upper_bound); ++ rx4a = vmaxq_s32(rx4a, zerox4); ++ gx4a = vcvtq_s32_f32(g_linx4a); ++ gx4a = vminq_s32(gx4a, output_upper_bound); ++ gx4a = vmaxq_s32(gx4a, zerox4); ++ bx4a = vcvtq_s32_f32(b_linx4a); ++ bx4a = vminq_s32(bx4a, output_upper_bound); ++ bx4a = vmaxq_s32(bx4a, zerox4); ++ rx4b = vcvtq_s32_f32(r_linx4b); ++ rx4b = vminq_s32(rx4b, output_upper_bound); ++ rx4b = vmaxq_s32(rx4b, zerox4); ++ gx4b = vcvtq_s32_f32(g_linx4b); ++ gx4b = vminq_s32(gx4b, output_upper_bound); ++ gx4b = vmaxq_s32(gx4b, zerox4); ++ bx4b = vcvtq_s32_f32(b_linx4b); ++ bx4b = vminq_s32(bx4b, output_upper_bound); ++ bx4b = vmaxq_s32(bx4b, zerox4); ++ ++ r_out[0] = delin_lut[vget_lane_s32(vget_low_s32(rx4a), 0)]; ++ r_out[1] = delin_lut[vget_lane_s32(vget_low_s32(rx4a), 1)]; ++ r_out[2] = delin_lut[vget_lane_s32(vget_high_s32(rx4a), 0)]; ++ r_out[3] = delin_lut[vget_lane_s32(vget_high_s32(rx4a), 1)]; ++ r_out[4] = delin_lut[vget_lane_s32(vget_low_s32(rx4b), 0)]; ++ r_out[5] = delin_lut[vget_lane_s32(vget_low_s32(rx4b), 1)]; ++ r_out[6] = delin_lut[vget_lane_s32(vget_high_s32(rx4b), 0)]; ++ r_out[7] = delin_lut[vget_lane_s32(vget_high_s32(rx4b), 1)]; ++ ++ g_out[0] = delin_lut[vget_lane_s32(vget_low_s32(gx4a), 0)]; ++ g_out[1] = delin_lut[vget_lane_s32(vget_low_s32(gx4a), 1)]; ++ g_out[2] = delin_lut[vget_lane_s32(vget_high_s32(gx4a), 0)]; ++ g_out[3] = delin_lut[vget_lane_s32(vget_high_s32(gx4a), 1)]; ++ g_out[4] = delin_lut[vget_lane_s32(vget_low_s32(gx4b), 0)]; ++ g_out[5] = delin_lut[vget_lane_s32(vget_low_s32(gx4b), 1)]; ++ g_out[6] = delin_lut[vget_lane_s32(vget_high_s32(gx4b), 0)]; ++ g_out[7] = delin_lut[vget_lane_s32(vget_high_s32(gx4b), 1)]; ++ ++ b_out[0] = delin_lut[vget_lane_s32(vget_low_s32(bx4a), 0)]; ++ b_out[1] = delin_lut[vget_lane_s32(vget_low_s32(bx4a), 1)]; ++ b_out[2] = delin_lut[vget_lane_s32(vget_high_s32(bx4a), 0)]; ++ b_out[3] = delin_lut[vget_lane_s32(vget_high_s32(bx4a), 1)]; ++ b_out[4] = delin_lut[vget_lane_s32(vget_low_s32(bx4b), 0)]; ++ b_out[5] = delin_lut[vget_lane_s32(vget_low_s32(bx4b), 1)]; ++ b_out[6] = delin_lut[vget_lane_s32(vget_high_s32(bx4b), 0)]; ++ b_out[7] = delin_lut[vget_lane_s32(vget_high_s32(bx4b), 1)]; ++} ++#endif // ENABLE_TONEMAPX_NEON_INTRINSICS ++ ++void tonemap_frame_dovi_2_420p_neon(uint8_t *dsty, uint8_t *dstu, uint8_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++#ifdef ENABLE_TONEMAPX_NEON_INTRINSICS ++ uint8_t *rdsty = dsty; ++ uint8_t *rdstu = dstu; ++ uint8_t *rdstv = dstv; ++ const uint16_t *rsrcy = srcy; ++ const uint16_t *rsrcu = srcu; ++ const uint16_t *rsrcv = srcv; ++ int rheight = height; ++ // not zero when not divisible by 8 ++ // intentionally leave last pixel emtpy when input is odd ++ int remainw = width & 6; ++ ++ const int in_depth = srcdepth; ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ const float in_rng = (float)((1 << in_depth) - 1); ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int16_t r[8], g[8], b[8]; ++ int16_t r1[8], g1[8], b1[8]; ++ uint16x8_t y0x8, y1x8, ux8, vx8; ++ uint16x8_t r0x8, g0x8, b0x8; ++ uint16x8_t r1x8, g1x8, b1x8; ++ uint16x4_t ux4, vx4; ++ ++ int16x8_t r0ox8, g0ox8, b0ox8; ++ int16x8_t y0ox8; ++ int32x4_t r0oax4, r0obx4, g0oax4, g0obx4, b0oax4, b0obx4; ++ int32x4_t y0oax4, y0obx4; ++ ++ int16x8_t r1ox8, g1ox8, b1ox8; ++ int16x8_t y1ox8; ++ int32x4_t r1oax4, r1obx4, g1oax4, g1obx4, b1oax4, b1obx4; ++ int32x4_t y1oax4, y1obx4; ++ int32x2_t ravgax2, gavgax2, bavgax2, ravgbx2, gavgbx2, bavgbx2; ++ int32x4_t ravgx4, gavgx4, bavgx4, uox4, vox4; ++ int32x4_t out_yuv_offx4 = vdupq_n_s32(params->out_yuv_off); ++ int32x4_t out_rndx4 = vdupq_n_s32(out_rnd); ++ int32x4_t out_uv_offsetx4 = vdupq_n_s32(out_uv_offset); ++ int32x4_t rgb_avg_rndx4 = vdupq_n_s32(2); ++ float32x4_t ipt0, ipt1, ipt2, ipt3; ++ float32x4_t ia1, ib1, ia2, ib2; ++ float32x4_t ix4, px4, tx4; ++ float32x4_t lx4, mx4, sx4; ++ float32x4_t rx4a, gx4a, bx4a, rx4b, gx4b, bx4b; ++ float32x4_t y0x4a, y0x4b, y1x4a, y1x4b, ux4a, ux4b, vx4a, vx4b; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0] * 2, dstu += dstlinesize[1], dstv += dstlinesize[2], ++ srcy += srclinesize[0], srcu += srclinesize[1] / 2, srcv += srclinesize[2] / 2) { ++ for (int xx = 0; xx < width >> 3; xx++) { ++ int x = xx << 3; ++ ++ y0x8 = vld1q_u16(srcy + x); ++ y1x8 = vld1q_u16(srcy + (srclinesize[0] / 2 + x)); ++ ux4 = vld1_u16(srcu + (x >> 1)); ++ vx4 = vld1_u16(srcv + (x >> 1)); ++ ++ ux8 = vcombine_u16(vzip1_u16(ux4, ux4), vzip2_u16(ux4, ux4)); ++ vx8 = vcombine_u16(vzip1_u16(vx4, vx4), vzip2_u16(vx4, vx4)); ++ ++ y0x4a = vcvtq_f32_u32(vmovl_u16(vget_low_u16(y0x8))); ++ y0x4b = vcvtq_f32_u32(vmovl_u16(vget_high_u16(y0x8))); ++ y1x4a = vcvtq_f32_u32(vmovl_u16(vget_low_u16(y1x8))); ++ y1x4b = vcvtq_f32_u32(vmovl_u16(vget_high_u16(y1x8))); ++ ++ ux4a = vcvtq_f32_u32(vmovl_u16(vget_low_u16(ux8))); ++ ux4b = vcvtq_f32_u32(vmovl_u16(vget_high_u16(ux8))); ++ vx4a = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vx8))); ++ vx4b = vcvtq_f32_u32(vmovl_u16(vget_high_u16(vx8))); ++ ++ y0x4a = vdivq_f32(y0x4a, vdupq_n_f32(in_rng)); ++ y0x4b = vdivq_f32(y0x4b, vdupq_n_f32(in_rng)); ++ y1x4a = vdivq_f32(y1x4a, vdupq_n_f32(in_rng)); ++ y1x4b = vdivq_f32(y1x4b, vdupq_n_f32(in_rng)); ++ ux4a = vdivq_f32(ux4a, vdupq_n_f32(in_rng)); ++ ux4b = vdivq_f32(ux4b, vdupq_n_f32(in_rng)); ++ vx4a = vdivq_f32(vx4a, vdupq_n_f32(in_rng)); ++ vx4b = vdivq_f32(vx4b, vdupq_n_f32(in_rng)); ++ ++ // Reshape y0x4a ++ ia1 = vzip1q_f32(y0x4a, ux4a); ++ ia2 = vzip2q_f32(y0x4a, ux4a); ++ ib1 = vzip1q_f32(vx4a, vdupq_n_f32(0.0f)); ++ ib2 = vzip2q_f32(vx4a, vdupq_n_f32(0.0f)); ++ ipt0 = vcombine_f32(vget_low_f32(ia1), vget_low_f32(ib1)); ++ ipt1 = vcombine_f32(vget_high_f32(ia1), vget_high_f32(ib1)); ++ ipt2 = vcombine_f32(vget_low_f32(ia2), vget_low_f32(ib2)); ++ ipt3 = vcombine_f32(vget_high_f32(ia2), vget_high_f32(ib2)); ++ ++ ipt0 = reshape_dovi_iptpqc2(ipt0, params); ++ ipt1 = reshape_dovi_iptpqc2(ipt1, params); ++ ipt2 = reshape_dovi_iptpqc2(ipt2, params); ++ ipt3 = reshape_dovi_iptpqc2(ipt3, params); ++ ++ ia1 = vtrn1q_f32(ipt0, ipt1); ++ ia2 = vtrn1q_f32(ipt2, ipt3); ++ ib1 = vtrn2q_f32(ipt0, ipt1); ++ ib2 = vtrn2q_f32(ipt2, ipt3); ++ ++ ix4 = vcombine_f32(vget_low_f32(ia1), vget_low_f32(ia2)); ++ px4 = vcombine_f32(vget_low_f32(ib1), vget_low_f32(ib2)); ++ tx4 = vcombine_f32(vget_high_f32(ia1), vget_high_f32(ia2)); ++ ++ ycc2rgbx4(&lx4, &mx4, &sx4, ix4, px4, tx4, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx4(&rx4a, &gx4a, &bx4a, lx4, mx4, sx4, *params->lms2rgb_matrix); ++ ++ rx4a = vmulq_n_f32(rx4a, 28672.0f); ++ gx4a = vmulq_n_f32(gx4a, 28672.0f); ++ bx4a = vmulq_n_f32(bx4a, 28672.0f); ++ ++ // Reshape y0x4b ++ ia1 = vzip1q_f32(y0x4b, ux4b); ++ ia2 = vzip2q_f32(y0x4b, ux4b); ++ ib1 = vzip1q_f32(vx4b, vdupq_n_f32(0.0f)); ++ ib2 = vzip2q_f32(vx4b, vdupq_n_f32(0.0f)); ++ ipt0 = vcombine_f32(vget_low_f32(ia1), vget_low_f32(ib1)); ++ ipt1 = vcombine_f32(vget_high_f32(ia1), vget_high_f32(ib1)); ++ ipt2 = vcombine_f32(vget_low_f32(ia2), vget_low_f32(ib2)); ++ ipt3 = vcombine_f32(vget_high_f32(ia2), vget_high_f32(ib2)); ++ ++ ipt0 = reshape_dovi_iptpqc2(ipt0, params); ++ ipt1 = reshape_dovi_iptpqc2(ipt1, params); ++ ipt2 = reshape_dovi_iptpqc2(ipt2, params); ++ ipt3 = reshape_dovi_iptpqc2(ipt3, params); ++ ++ ia1 = vtrn1q_f32(ipt0, ipt1); ++ ia2 = vtrn1q_f32(ipt2, ipt3); ++ ib1 = vtrn2q_f32(ipt0, ipt1); ++ ib2 = vtrn2q_f32(ipt2, ipt3); ++ ++ ix4 = vcombine_f32(vget_low_f32(ia1), vget_low_f32(ia2)); ++ px4 = vcombine_f32(vget_low_f32(ib1), vget_low_f32(ib2)); ++ tx4 = vcombine_f32(vget_high_f32(ia1), vget_high_f32(ia2)); ++ ++ ycc2rgbx4(&lx4, &mx4, &sx4, ix4, px4, tx4, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx4(&rx4b, &gx4b, &bx4b, lx4, mx4, sx4, *params->lms2rgb_matrix); ++ ++ rx4b = vmulq_n_f32(rx4b, 28672.0f); ++ gx4b = vmulq_n_f32(gx4b, 28672.0f); ++ bx4b = vmulq_n_f32(bx4b, 28672.0f); ++ ++ r0x8 = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(rx4a)), vqmovn_u32(vcvtq_u32_f32(rx4b))); ++ g0x8 = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(gx4a)), vqmovn_u32(vcvtq_u32_f32(gx4b))); ++ b0x8 = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(bx4a)), vqmovn_u32(vcvtq_u32_f32(bx4b))); ++ ++ // Reshape y1x4a ++ ia1 = vzip1q_f32(y1x4a, ux4a); ++ ia2 = vzip2q_f32(y1x4a, ux4a); ++ ib1 = vzip1q_f32(vx4a, vdupq_n_f32(0.0f)); ++ ib2 = vzip2q_f32(vx4a, vdupq_n_f32(0.0f)); ++ ipt0 = vcombine_f32(vget_low_f32(ia1), vget_low_f32(ib1)); ++ ipt1 = vcombine_f32(vget_high_f32(ia1), vget_high_f32(ib1)); ++ ipt2 = vcombine_f32(vget_low_f32(ia2), vget_low_f32(ib2)); ++ ipt3 = vcombine_f32(vget_high_f32(ia2), vget_high_f32(ib2)); ++ ++ ipt0 = reshape_dovi_iptpqc2(ipt0, params); ++ ipt1 = reshape_dovi_iptpqc2(ipt1, params); ++ ipt2 = reshape_dovi_iptpqc2(ipt2, params); ++ ipt3 = reshape_dovi_iptpqc2(ipt3, params); ++ ++ ia1 = vtrn1q_f32(ipt0, ipt1); ++ ia2 = vtrn1q_f32(ipt2, ipt3); ++ ib1 = vtrn2q_f32(ipt0, ipt1); ++ ib2 = vtrn2q_f32(ipt2, ipt3); ++ ++ ix4 = vcombine_f32(vget_low_f32(ia1), vget_low_f32(ia2)); ++ px4 = vcombine_f32(vget_low_f32(ib1), vget_low_f32(ib2)); ++ tx4 = vcombine_f32(vget_high_f32(ia1), vget_high_f32(ia2)); ++ ++ ycc2rgbx4(&lx4, &mx4, &sx4, ix4, px4, tx4, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx4(&rx4a, &gx4a, &bx4a, lx4, mx4, sx4, *params->lms2rgb_matrix); ++ ++ rx4a = vmulq_n_f32(rx4a, 28672.0f); ++ gx4a = vmulq_n_f32(gx4a, 28672.0f); ++ bx4a = vmulq_n_f32(bx4a, 28672.0f); ++ ++ // Reshape y1x4b ++ ia1 = vzip1q_f32(y1x4b, ux4b); ++ ia2 = vzip2q_f32(y1x4b, ux4b); ++ ib1 = vzip1q_f32(vx4b, vdupq_n_f32(0.0f)); ++ ib2 = vzip2q_f32(vx4b, vdupq_n_f32(0.0f)); ++ ipt0 = vcombine_f32(vget_low_f32(ia1), vget_low_f32(ib1)); ++ ipt1 = vcombine_f32(vget_high_f32(ia1), vget_high_f32(ib1)); ++ ipt2 = vcombine_f32(vget_low_f32(ia2), vget_low_f32(ib2)); ++ ipt3 = vcombine_f32(vget_high_f32(ia2), vget_high_f32(ib2)); ++ ++ ipt0 = reshape_dovi_iptpqc2(ipt0, params); ++ ipt1 = reshape_dovi_iptpqc2(ipt1, params); ++ ipt2 = reshape_dovi_iptpqc2(ipt2, params); ++ ipt3 = reshape_dovi_iptpqc2(ipt3, params); ++ ++ ia1 = vtrn1q_f32(ipt0, ipt1); ++ ia2 = vtrn1q_f32(ipt2, ipt3); ++ ib1 = vtrn2q_f32(ipt0, ipt1); ++ ib2 = vtrn2q_f32(ipt2, ipt3); ++ ++ ix4 = vcombine_f32(vget_low_f32(ia1), vget_low_f32(ia2)); ++ px4 = vcombine_f32(vget_low_f32(ib1), vget_low_f32(ib2)); ++ tx4 = vcombine_f32(vget_high_f32(ia1), vget_high_f32(ia2)); ++ ++ ycc2rgbx4(&lx4, &mx4, &sx4, ix4, px4, tx4, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx4(&rx4b, &gx4b, &bx4b, lx4, mx4, sx4, *params->lms2rgb_matrix); ++ ++ rx4b = vmulq_n_f32(rx4b, 28672.0f); ++ gx4b = vmulq_n_f32(gx4b, 28672.0f); ++ bx4b = vmulq_n_f32(bx4b, 28672.0f); ++ ++ r1x8 = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(rx4a)), vqmovn_u32(vcvtq_u32_f32(rx4b))); ++ g1x8 = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(gx4a)), vqmovn_u32(vcvtq_u32_f32(gx4b))); ++ b1x8 = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(bx4a)), vqmovn_u32(vcvtq_u32_f32(bx4b))); ++ ++ tonemap_int16x8_neon(r0x8, g0x8, b0x8, (int16_t *) &r, (int16_t *) &g, (int16_t *) &b, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int16x8_neon(r1x8, g1x8, b1x8, (int16_t *) &r1, (int16_t *) &g1, (int16_t *) &b1, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ ++ r0ox8 = vld1q_s16(r); ++ g0ox8 = vld1q_s16(g); ++ b0ox8 = vld1q_s16(b); ++ ++ r0oax4 = vmovl_s16(vget_low_s16(r0ox8)); ++ g0oax4 = vmovl_s16(vget_low_s16(g0ox8)); ++ b0oax4 = vmovl_s16(vget_low_s16(b0ox8)); ++ ++ r0obx4 = vmovl_s16(vget_high_s16(r0ox8)); ++ g0obx4 = vmovl_s16(vget_high_s16(g0ox8)); ++ b0obx4 = vmovl_s16(vget_high_s16(b0ox8)); ++ ++ y0oax4 = vmulq_n_s32(r0oax4, cry); ++ y0oax4 = vmlaq_n_s32(y0oax4, g0oax4, cgy); ++ y0oax4 = vmlaq_n_s32(y0oax4, b0oax4, cby); ++ y0oax4 = vaddq_s32(y0oax4, out_rndx4); ++ // output shift bits for 8bit outputs is 29 - 8 = 21 ++ y0oax4 = vshrq_n_s32(y0oax4, 21); ++ y0oax4 = vaddq_s32(y0oax4, out_yuv_offx4); ++ ++ y0obx4 = vmulq_n_s32(r0obx4, cry); ++ y0obx4 = vmlaq_n_s32(y0obx4, g0obx4, cgy); ++ y0obx4 = vmlaq_n_s32(y0obx4, b0obx4, cby); ++ y0obx4 = vaddq_s32(y0obx4, out_rndx4); ++ y0obx4 = vshrq_n_s32(y0obx4, 21); ++ y0obx4 = vaddq_s32(y0obx4, out_yuv_offx4); ++ ++ y0ox8 = vcombine_s16(vqmovn_s32(y0oax4), vqmovn_s32(y0obx4)); ++ vst1_u8(&dsty[x], vqmovun_s16(y0ox8)); ++ ++ r1ox8 = vld1q_s16(r1); ++ g1ox8 = vld1q_s16(g1); ++ b1ox8 = vld1q_s16(b1); ++ ++ r1oax4 = vmovl_s16(vget_low_s16(r1ox8)); ++ g1oax4 = vmovl_s16(vget_low_s16(g1ox8)); ++ b1oax4 = vmovl_s16(vget_low_s16(b1ox8)); ++ ++ r1obx4 = vmovl_s16(vget_high_s16(r1ox8)); ++ g1obx4 = vmovl_s16(vget_high_s16(g1ox8)); ++ b1obx4 = vmovl_s16(vget_high_s16(b1ox8)); ++ ++ y1oax4 = vmulq_n_s32(r1oax4, cry); ++ y1oax4 = vmlaq_n_s32(y1oax4, g1oax4, cgy); ++ y1oax4 = vmlaq_n_s32(y1oax4, b1oax4, cby); ++ y1oax4 = vaddq_s32(y1oax4, out_rndx4); ++ y1oax4 = vshrq_n_s32(y1oax4, 21); ++ y1oax4 = vaddq_s32(y1oax4, out_yuv_offx4); ++ ++ y1obx4 = vmulq_n_s32(r1obx4, cry); ++ y1obx4 = vmlaq_n_s32(y1obx4, g1obx4, cgy); ++ y1obx4 = vmlaq_n_s32(y1obx4, b1obx4, cby); ++ y1obx4 = vaddq_s32(y1obx4, out_rndx4); ++ y1obx4 = vshrq_n_s32(y1obx4, 21); ++ y1obx4 = vaddq_s32(y1obx4, out_yuv_offx4); ++ ++ y1ox8 = vcombine_s16(vqmovn_s32(y1oax4), vqmovn_s32(y1obx4)); ++ vst1_u8(&dsty[x + dstlinesize[0]], vqmovun_s16(y1ox8)); ++ ++ ravgax2 = vpadd_s32(vget_low_s32(r0oax4), vget_high_s32(r0oax4)); ++ ravgbx2 = vpadd_s32(vget_low_s32(r0obx4), vget_high_s32(r0obx4)); ++ ravgx4 = vcombine_s32(ravgax2, ravgbx2); ++ ravgax2 = vpadd_s32(vget_low_s32(r1oax4), vget_high_s32(r1oax4)); ++ ravgbx2 = vpadd_s32(vget_low_s32(r1obx4), vget_high_s32(r1obx4)); ++ ravgx4 = vaddq_s32(ravgx4, vcombine_s32(ravgax2, ravgbx2)); ++ ravgx4 = vaddq_s32(ravgx4, rgb_avg_rndx4); ++ ravgx4 = vshrq_n_s32(ravgx4, 2); ++ ++ gavgax2 = vpadd_s32(vget_low_s32(g0oax4), vget_high_s32(g0oax4)); ++ gavgbx2 = vpadd_s32(vget_low_s32(g0obx4), vget_high_s32(g0obx4)); ++ gavgx4 = vcombine_s32(gavgax2, gavgbx2); ++ gavgax2 = vpadd_s32(vget_low_s32(g1oax4), vget_high_s32(g1oax4)); ++ gavgbx2 = vpadd_s32(vget_low_s32(g1obx4), vget_high_s32(g1obx4)); ++ gavgx4 = vaddq_s32(gavgx4, vcombine_s32(gavgax2, gavgbx2)); ++ gavgx4 = vaddq_s32(gavgx4, rgb_avg_rndx4); ++ gavgx4 = vshrq_n_s32(gavgx4, 2); ++ ++ bavgax2 = vpadd_s32(vget_low_s32(b0oax4), vget_high_s32(b0oax4)); ++ bavgbx2 = vpadd_s32(vget_low_s32(b0obx4), vget_high_s32(b0obx4)); ++ bavgx4 = vcombine_s32(bavgax2, bavgbx2); ++ bavgax2 = vpadd_s32(vget_low_s32(b1oax4), vget_high_s32(b1oax4)); ++ bavgbx2 = vpadd_s32(vget_low_s32(b1obx4), vget_high_s32(b1obx4)); ++ bavgx4 = vaddq_s32(bavgx4, vcombine_s32(bavgax2, bavgbx2)); ++ bavgx4 = vaddq_s32(bavgx4, rgb_avg_rndx4); ++ bavgx4 = vshrq_n_s32(bavgx4, 2); ++ ++ uox4 = vmlaq_n_s32(out_rndx4, ravgx4, cru); ++ uox4 = vmlaq_n_s32(uox4, gavgx4, ocgu); ++ uox4 = vmlaq_n_s32(uox4, bavgx4, cburv); ++ uox4 = vshrq_n_s32(uox4, 21); ++ uox4 = vaddq_s32(uox4, out_uv_offsetx4); ++ vst1_lane_u32((uint32_t *) &dstu[x >> 1], vreinterpret_u32_u8(vqmovun_s16(vcombine_s16(vmovn_s32(uox4), vdup_n_s16(0)))), 0); ++ ++ vox4 = vmlaq_n_s32(out_rndx4, ravgx4, cburv); ++ vox4 = vmlaq_n_s32(vox4, gavgx4, ocgv); ++ vox4 = vmlaq_n_s32(vox4, bavgx4, cbv); ++ vox4 = vshrq_n_s32(vox4, 21); ++ vox4 = vaddq_s32(vox4, out_uv_offsetx4); ++ vst1_lane_u32((uint32_t *) &dstv[x >> 1], vreinterpret_u32_u8(vqmovun_s16(vcombine_s16(vmovn_s32(vox4), vdup_n_s16(0)))), 0); ++ } ++ } ++ ++ // Process remaining pixels cannot fill the full simd register with scalar version ++ if (remainw) { ++ int offset = width & (int)0xfffffff8; ++ rdsty += offset; ++ rdstu += offset >> 1; ++ rdstv += offset >> 1; ++ rsrcy += offset; ++ rsrcu += offset >> 1; ++ rsrcv += offset >> 1; ++ tonemap_frame_dovi_2_420p(rdsty, rdstu, rdstv, ++ rsrcy, rsrcu, rsrcv, ++ dstlinesize, srclinesize, ++ dstdepth, srcdepth, ++ remainw, rheight, params); ++ } ++#endif // ENABLE_TONEMAPX_NEON_INTRINSICS ++} ++ ++void tonemap_frame_420p10_2_420p_neon(uint8_t *dsty, uint8_t *dstu, uint8_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++#ifdef ENABLE_TONEMAPX_NEON_INTRINSICS ++ uint8_t *rdsty = dsty; ++ uint8_t *rdstu = dstu; ++ uint8_t *rdstv = dstv; ++ const uint16_t *rsrcy = srcy; ++ const uint16_t *rsrcu = srcu; ++ const uint16_t *rsrcv = srcv; ++ int rheight = height; ++ // not zero when not divisible by 8 ++ // intentionally leave last pixel emtpy when input is odd ++ int remainw = width & 6; ++ ++ const int in_depth = srcdepth; ++ const int in_uv_offset = 128 << (in_depth - 8); ++ const int in_sh = in_depth - 1; ++ const int in_rnd = 1 << (in_sh - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cy = (*params->yuv2rgb_coeffs)[0][0][0]; ++ int crv = (*params->yuv2rgb_coeffs)[0][2][0]; ++ int cgu = (*params->yuv2rgb_coeffs)[1][1][0]; ++ int cgv = (*params->yuv2rgb_coeffs)[1][2][0]; ++ int cbu = (*params->yuv2rgb_coeffs)[2][1][0]; ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int16_t r[8], g[8], b[8]; ++ int16_t r1[8], g1[8], b1[8]; ++ uint16_t cy_shifted = av_clip_int16(cy >> in_sh); ++ uint16_t rnd_shifted = av_clip_int16(in_rnd >> in_sh); ++ uint16_t crv_shifted = av_clip_int16(crv >> in_sh); ++ uint16_t cgu_shifted = av_clip_int16(cgu >> in_sh); ++ uint16_t cgv_shifted = av_clip_int16(cgv >> in_sh); ++ uint16_t cbu_shifted = av_clip_int16(cbu >> in_sh); ++ uint16x8_t rndx8 = vdupq_n_u16(rnd_shifted); ++ uint16x8_t in_yuv_offx8 = vdupq_n_u16(av_clip_int16(params->in_yuv_off)); ++ uint16x8_t in_uv_offx8 = vdupq_n_u16(av_clip_int16(in_uv_offset)); ++ uint16x8_t y0x8, y1x8, ux8, vx8; ++ uint16x8_t r0x8, g0x8, b0x8; ++ uint16x8_t r1x8, g1x8, b1x8; ++ uint16x4_t ux4, vx4; ++ ++ int16x8_t r0ox8, g0ox8, b0ox8; ++ int16x8_t y0ox8; ++ int32x4_t r0oax4, r0obx4, g0oax4, g0obx4, b0oax4, b0obx4; ++ int32x4_t y0oax4, y0obx4; ++ ++ int16x8_t r1ox8, g1ox8, b1ox8; ++ int16x8_t y1ox8; ++ int32x4_t r1oax4, r1obx4, g1oax4, g1obx4, b1oax4, b1obx4; ++ int32x4_t y1oax4, y1obx4; ++ int32x2_t ravgax2, gavgax2, bavgax2, ravgbx2, gavgbx2, bavgbx2; ++ int32x4_t ravgx4, gavgx4, bavgx4, uox4, vox4; ++ int32x4_t out_yuv_offx4 = vdupq_n_s32(params->out_yuv_off); ++ int32x4_t out_rndx4 = vdupq_n_s32(out_rnd); ++ int32x4_t out_uv_offsetx4 = vdupq_n_s32(out_uv_offset); ++ int32x4_t rgb_avg_rndx4 = vdupq_n_s32(2); ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0] * 2, dstu += dstlinesize[1], dstv += dstlinesize[2], ++ srcy += srclinesize[0], srcu += srclinesize[1] / 2, srcv += srclinesize[2] / 2) { ++ for (int xx = 0; xx < width >> 3; xx++) { ++ int x = xx << 3; ++ ++ y0x8 = vld1q_u16(srcy + x); ++ y1x8 = vld1q_u16(srcy + (srclinesize[0] / 2 + x)); ++ ux4 = vld1_u16(srcu + (x >> 1)); ++ vx4 = vld1_u16(srcv + (x >> 1)); ++ ++ y0x8 = vsubq_u16(y0x8, in_yuv_offx8); ++ y1x8 = vsubq_u16(y1x8, in_yuv_offx8); ++ ux8 = vcombine_u16(vzip1_u16(ux4, ux4), vzip2_u16(ux4, ux4)); ++ ux8 = vsubq_u16(ux8, in_uv_offx8); ++ vx8 = vcombine_u16(vzip1_u16(vx4, vx4), vzip2_u16(vx4, vx4)); ++ vx8 = vsubq_u16(vx8, in_uv_offx8); ++ ++ r0x8 = g0x8 = b0x8 = vmulq_n_u16(y0x8, cy_shifted); ++ r0x8 = vmlaq_n_u16(r0x8, vx8, crv_shifted); ++ r0x8 = vaddq_u16(r0x8, rndx8); ++ ++ g0x8 = vmlaq_n_u16(g0x8, ux8, cgu_shifted); ++ g0x8 = vmlaq_n_u16(g0x8, vx8, cgv_shifted); ++ g0x8 = vaddq_u16(g0x8, rndx8); ++ ++ b0x8 = vmlaq_n_u16(b0x8, ux8, cbu_shifted); ++ b0x8 = vaddq_u16(b0x8, rndx8); ++ ++ r1x8 = g1x8 = b1x8 = vmulq_n_u16(y1x8, cy_shifted); ++ r1x8 = vmlaq_n_u16(r1x8, vx8, crv_shifted); ++ r1x8 = vaddq_u16(r1x8, rndx8); ++ ++ g1x8 = vmlaq_n_u16(g1x8, ux8, cgu_shifted); ++ g1x8 = vmlaq_n_u16(g1x8, vx8, cgv_shifted); ++ g1x8 = vaddq_u16(g1x8, rndx8); ++ ++ b1x8 = vmlaq_n_u16(b1x8, ux8, cbu_shifted); ++ b1x8 = vaddq_u16(b1x8, rndx8); ++ ++ tonemap_int16x8_neon(r0x8, g0x8, b0x8, (int16_t *) &r, (int16_t *) &g, (int16_t *) &b, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int16x8_neon(r1x8, g1x8, b1x8, (int16_t *) &r1, (int16_t *) &g1, (int16_t *) &b1, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ ++ r0ox8 = vld1q_s16(r); ++ g0ox8 = vld1q_s16(g); ++ b0ox8 = vld1q_s16(b); ++ ++ r0oax4 = vmovl_s16(vget_low_s16(r0ox8)); ++ g0oax4 = vmovl_s16(vget_low_s16(g0ox8)); ++ b0oax4 = vmovl_s16(vget_low_s16(b0ox8)); ++ ++ r0obx4 = vmovl_s16(vget_high_s16(r0ox8)); ++ g0obx4 = vmovl_s16(vget_high_s16(g0ox8)); ++ b0obx4 = vmovl_s16(vget_high_s16(b0ox8)); ++ ++ y0oax4 = vmulq_n_s32(r0oax4, cry); ++ y0oax4 = vmlaq_n_s32(y0oax4, g0oax4, cgy); ++ y0oax4 = vmlaq_n_s32(y0oax4, b0oax4, cby); ++ y0oax4 = vaddq_s32(y0oax4, out_rndx4); ++ // output shift bits for 8bit outputs is 29 - 8 = 21 ++ y0oax4 = vshrq_n_s32(y0oax4, 21); ++ y0oax4 = vaddq_s32(y0oax4, out_yuv_offx4); ++ ++ y0obx4 = vmulq_n_s32(r0obx4, cry); ++ y0obx4 = vmlaq_n_s32(y0obx4, g0obx4, cgy); ++ y0obx4 = vmlaq_n_s32(y0obx4, b0obx4, cby); ++ y0obx4 = vaddq_s32(y0obx4, out_rndx4); ++ y0obx4 = vshrq_n_s32(y0obx4, 21); ++ y0obx4 = vaddq_s32(y0obx4, out_yuv_offx4); ++ ++ y0ox8 = vcombine_s16(vqmovn_s32(y0oax4), vqmovn_s32(y0obx4)); ++ vst1_u8(&dsty[x], vqmovun_s16(y0ox8)); ++ ++ r1ox8 = vld1q_s16(r1); ++ g1ox8 = vld1q_s16(g1); ++ b1ox8 = vld1q_s16(b1); ++ ++ r1oax4 = vmovl_s16(vget_low_s16(r1ox8)); ++ g1oax4 = vmovl_s16(vget_low_s16(g1ox8)); ++ b1oax4 = vmovl_s16(vget_low_s16(b1ox8)); ++ ++ r1obx4 = vmovl_s16(vget_high_s16(r1ox8)); ++ g1obx4 = vmovl_s16(vget_high_s16(g1ox8)); ++ b1obx4 = vmovl_s16(vget_high_s16(b1ox8)); ++ ++ y1oax4 = vmulq_n_s32(r1oax4, cry); ++ y1oax4 = vmlaq_n_s32(y1oax4, g1oax4, cgy); ++ y1oax4 = vmlaq_n_s32(y1oax4, b1oax4, cby); ++ y1oax4 = vaddq_s32(y1oax4, out_rndx4); ++ y1oax4 = vshrq_n_s32(y1oax4, 21); ++ y1oax4 = vaddq_s32(y1oax4, out_yuv_offx4); ++ ++ y1obx4 = vmulq_n_s32(r1obx4, cry); ++ y1obx4 = vmlaq_n_s32(y1obx4, g1obx4, cgy); ++ y1obx4 = vmlaq_n_s32(y1obx4, b1obx4, cby); ++ y1obx4 = vaddq_s32(y1obx4, out_rndx4); ++ y1obx4 = vshrq_n_s32(y1obx4, 21); ++ y1obx4 = vaddq_s32(y1obx4, out_yuv_offx4); ++ ++ y1ox8 = vcombine_s16(vqmovn_s32(y1oax4), vqmovn_s32(y1obx4)); ++ vst1_u8(&dsty[x + dstlinesize[0]], vqmovun_s16(y1ox8)); ++ ++ ravgax2 = vpadd_s32(vget_low_s32(r0oax4), vget_high_s32(r0oax4)); ++ ravgbx2 = vpadd_s32(vget_low_s32(r0obx4), vget_high_s32(r0obx4)); ++ ravgx4 = vcombine_s32(ravgax2, ravgbx2); ++ ravgax2 = vpadd_s32(vget_low_s32(r1oax4), vget_high_s32(r1oax4)); ++ ravgbx2 = vpadd_s32(vget_low_s32(r1obx4), vget_high_s32(r1obx4)); ++ ravgx4 = vaddq_s32(ravgx4, vcombine_s32(ravgax2, ravgbx2)); ++ ravgx4 = vaddq_s32(ravgx4, rgb_avg_rndx4); ++ ravgx4 = vshrq_n_s32(ravgx4, 2); ++ ++ gavgax2 = vpadd_s32(vget_low_s32(g0oax4), vget_high_s32(g0oax4)); ++ gavgbx2 = vpadd_s32(vget_low_s32(g0obx4), vget_high_s32(g0obx4)); ++ gavgx4 = vcombine_s32(gavgax2, gavgbx2); ++ gavgax2 = vpadd_s32(vget_low_s32(g1oax4), vget_high_s32(g1oax4)); ++ gavgbx2 = vpadd_s32(vget_low_s32(g1obx4), vget_high_s32(g1obx4)); ++ gavgx4 = vaddq_s32(gavgx4, vcombine_s32(gavgax2, gavgbx2)); ++ gavgx4 = vaddq_s32(gavgx4, rgb_avg_rndx4); ++ gavgx4 = vshrq_n_s32(gavgx4, 2); ++ ++ bavgax2 = vpadd_s32(vget_low_s32(b0oax4), vget_high_s32(b0oax4)); ++ bavgbx2 = vpadd_s32(vget_low_s32(b0obx4), vget_high_s32(b0obx4)); ++ bavgx4 = vcombine_s32(bavgax2, bavgbx2); ++ bavgax2 = vpadd_s32(vget_low_s32(b1oax4), vget_high_s32(b1oax4)); ++ bavgbx2 = vpadd_s32(vget_low_s32(b1obx4), vget_high_s32(b1obx4)); ++ bavgx4 = vaddq_s32(bavgx4, vcombine_s32(bavgax2, bavgbx2)); ++ bavgx4 = vaddq_s32(bavgx4, rgb_avg_rndx4); ++ bavgx4 = vshrq_n_s32(bavgx4, 2); ++ ++ uox4 = vmlaq_n_s32(out_rndx4, ravgx4, cru); ++ uox4 = vmlaq_n_s32(uox4, gavgx4, ocgu); ++ uox4 = vmlaq_n_s32(uox4, bavgx4, cburv); ++ uox4 = vshrq_n_s32(uox4, 21); ++ uox4 = vaddq_s32(uox4, out_uv_offsetx4); ++ vst1_lane_u32((uint32_t *) &dstu[x >> 1], vreinterpret_u32_u8(vqmovun_s16(vcombine_s16(vmovn_s32(uox4), vdup_n_s16(0)))), 0); ++ ++ vox4 = vmlaq_n_s32(out_rndx4, ravgx4, cburv); ++ vox4 = vmlaq_n_s32(vox4, gavgx4, ocgv); ++ vox4 = vmlaq_n_s32(vox4, bavgx4, cbv); ++ vox4 = vshrq_n_s32(vox4, 21); ++ vox4 = vaddq_s32(vox4, out_uv_offsetx4); ++ vst1_lane_u32((uint32_t *) &dstv[x >> 1], vreinterpret_u32_u8(vqmovun_s16(vcombine_s16(vmovn_s32(vox4), vdup_n_s16(0)))), 0); ++ } ++ } ++ ++ // Process remaining pixels cannot fill the full simd register with scalar version ++ if (remainw) { ++ int offset = width & (int)0xfffffff8; ++ rdsty += offset; ++ rdstu += offset >> 1; ++ rdstv += offset >> 1; ++ rsrcy += offset; ++ rsrcu += offset >> 1; ++ rsrcv += offset >> 1; ++ tonemap_frame_420p10_2_420p(rdsty, rdstu, rdstv, ++ rsrcy, rsrcu, rsrcv, ++ dstlinesize, srclinesize, ++ dstdepth, srcdepth, ++ remainw, rheight, params); ++ } ++#endif // ENABLE_TONEMAPX_NEON_INTRINSICS ++} ++ ++void tonemap_frame_p016_p010_2_nv12_neon(uint8_t *dsty, uint8_t *dstuv, ++ const uint16_t *srcy, const uint16_t *srcuv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++#ifdef ENABLE_TONEMAPX_NEON_INTRINSICS ++ uint8_t *rdsty = dsty; ++ uint8_t *rdstuv = dstuv; ++ const uint16_t *rsrcy = srcy; ++ const uint16_t *rsrcuv = srcuv; ++ int rheight = height; ++ // not zero when not divisible by 8 ++ // intentionally leave last pixel emtpy when input is odd ++ int remainw = width & 6; ++ ++ const int in_depth = srcdepth; ++ const int in_uv_offset = 128 << (in_depth - 8); ++ const int in_sh = in_depth - 1; ++ const int in_rnd = 1 << (in_sh - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cy = (*params->yuv2rgb_coeffs)[0][0][0]; ++ int crv = (*params->yuv2rgb_coeffs)[0][2][0]; ++ int cgu = (*params->yuv2rgb_coeffs)[1][1][0]; ++ int cgv = (*params->yuv2rgb_coeffs)[1][2][0]; ++ int cbu = (*params->yuv2rgb_coeffs)[2][1][0]; ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int16_t r[8], g[8], b[8]; ++ int16_t r1[8], g1[8], b1[8]; ++ uint16_t cy_shifted = av_clip_int16(cy >> in_sh); ++ uint16_t rnd_shifted = av_clip_int16(in_rnd >> in_sh); ++ uint16_t crv_shifted = av_clip_int16(crv >> in_sh); ++ uint16_t cgu_shifted = av_clip_int16(cgu >> in_sh); ++ uint16_t cgv_shifted = av_clip_int16(cgv >> in_sh); ++ uint16_t cbu_shifted = av_clip_int16(cbu >> in_sh); ++ uint16x8_t rndx8 = vdupq_n_u16(rnd_shifted); ++ uint16x8_t in_yuv_offx8 = vdupq_n_u16(av_clip_int16(params->in_yuv_off)); ++ uint16x8_t in_uv_offx8 = vdupq_n_u16(av_clip_int16(in_uv_offset)); ++ uint16x8_t uvx8; ++ uint16x4_t ux2a, vx2a, ux2b, vx2b; ++ uint16x8_t y0x8, y1x8, ux8, vx8; ++ uint16x8_t r0x8, g0x8, b0x8; ++ uint16x8_t r1x8, g1x8, b1x8; ++ ++ int16x8_t r0ox8, g0ox8, b0ox8; ++ int16x8_t y0ox8; ++ int32x4_t r0oax4, r0obx4, g0oax4, g0obx4, b0oax4, b0obx4; ++ int32x4_t y0oax4, y0obx4; ++ ++ int16x8_t r1ox8, g1ox8, b1ox8; ++ int16x8_t y1ox8; ++ int32x4_t r1oax4, r1obx4, g1oax4, g1obx4, b1oax4, b1obx4; ++ int32x4_t y1oax4, y1obx4; ++ int32x4_t uvoax4, uvobx4; ++ int32x2_t ravgax2, gavgax2, bavgax2, ravgbx2, gavgbx2, bavgbx2; ++ int32x4_t ravgx4, gavgx4, bavgx4, uox4, vox4; ++ int32x4_t out_yuv_offx4 = vdupq_n_s32(params->out_yuv_off); ++ int32x4_t out_rndx4 = vdupq_n_s32(out_rnd); ++ int32x4_t out_uv_offsetx4 = vdupq_n_s32(out_uv_offset); ++ int32x4_t rgb_avg_rndx4 = vdupq_n_s32(2); ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0] * 2, dstuv += dstlinesize[1], ++ srcy += srclinesize[0], srcuv += srclinesize[1] / 2) { ++ for (int xx = 0; xx < width >> 3; xx++) { ++ int x = xx << 3; ++ ++ y0x8 = vld1q_u16(srcy + x); ++ y1x8 = vld1q_u16(srcy + (srclinesize[0] / 2 + x)); ++ uvx8 = vld1q_u16(srcuv + x); ++ if (in_depth == 10) { ++ // shift to low10bits for 10bit input ++ // shift bit has to be compile-time constant ++ y0x8 = vshrq_n_u16(y0x8, 6); ++ y1x8 = vshrq_n_u16(y1x8, 6); ++ uvx8 = vshrq_n_u16(uvx8, 6); ++ } ++ y0x8 = vsubq_u16(y0x8, in_yuv_offx8); ++ y1x8 = vsubq_u16(y1x8, in_yuv_offx8); ++ uvx8 = vsubq_u16(uvx8, in_uv_offx8); ++ ++ ux2a = vext_u16(vdup_lane_u16(vget_low_u16(uvx8), 0), vdup_lane_u16(vget_low_u16(uvx8), 2), 2); ++ vx2a = vext_u16(vdup_lane_u16(vget_low_u16(uvx8), 1), vdup_lane_u16(vget_low_u16(uvx8), 3), 2); ++ ux2b = vext_u16(vdup_lane_u16(vget_high_u16(uvx8), 0), vdup_lane_u16(vget_high_u16(uvx8), 2), 2); ++ vx2b = vext_u16(vdup_lane_u16(vget_high_u16(uvx8), 1), vdup_lane_u16(vget_high_u16(uvx8), 3), 2); ++ ++ ux8 = vcombine_u16(ux2a, ux2b); ++ vx8 = vcombine_u16(vx2a, vx2b); ++ ++ r0x8 = g0x8 = b0x8 = vmulq_n_u16(y0x8, cy_shifted); ++ r0x8 = vmlaq_n_u16(r0x8, vx8, crv_shifted); ++ r0x8 = vaddq_u16(r0x8, rndx8); ++ ++ g0x8 = vmlaq_n_u16(g0x8, ux8, cgu_shifted); ++ g0x8 = vmlaq_n_u16(g0x8, vx8, cgv_shifted); ++ g0x8 = vaddq_u16(g0x8, rndx8); ++ ++ b0x8 = vmlaq_n_u16(b0x8, ux8, cbu_shifted); ++ b0x8 = vaddq_u16(b0x8, rndx8); ++ ++ r1x8 = g1x8 = b1x8 = vmulq_n_u16(y1x8, cy_shifted); ++ r1x8 = vmlaq_n_u16(r1x8, vx8, crv_shifted); ++ r1x8 = vaddq_u16(r1x8, rndx8); ++ ++ g1x8 = vmlaq_n_u16(g1x8, ux8, cgu_shifted); ++ g1x8 = vmlaq_n_u16(g1x8, vx8, cgv_shifted); ++ g1x8 = vaddq_u16(g1x8, rndx8); ++ ++ b1x8 = vmlaq_n_u16(b1x8, ux8, cbu_shifted); ++ b1x8 = vaddq_u16(b1x8, rndx8); ++ ++ tonemap_int16x8_neon(r0x8, g0x8, b0x8, (int16_t *) &r, (int16_t *) &g, (int16_t *) &b, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int16x8_neon(r1x8, g1x8, b1x8, (int16_t *) &r1, (int16_t *) &g1, (int16_t *) &b1, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ ++ r0ox8 = vld1q_s16(r); ++ g0ox8 = vld1q_s16(g); ++ b0ox8 = vld1q_s16(b); ++ ++ r0oax4 = vmovl_s16(vget_low_s16(r0ox8)); ++ g0oax4 = vmovl_s16(vget_low_s16(g0ox8)); ++ b0oax4 = vmovl_s16(vget_low_s16(b0ox8)); ++ ++ r0obx4 = vmovl_s16(vget_high_s16(r0ox8)); ++ g0obx4 = vmovl_s16(vget_high_s16(g0ox8)); ++ b0obx4 = vmovl_s16(vget_high_s16(b0ox8)); ++ ++ y0oax4 = vmulq_n_s32(r0oax4, cry); ++ y0oax4 = vmlaq_n_s32(y0oax4, g0oax4, cgy); ++ y0oax4 = vmlaq_n_s32(y0oax4, b0oax4, cby); ++ y0oax4 = vaddq_s32(y0oax4, out_rndx4); ++ // output shift bits for 8bit outputs is 29 - 8 = 21 ++ y0oax4 = vshrq_n_s32(y0oax4, 21); ++ y0oax4 = vaddq_s32(y0oax4, out_yuv_offx4); ++ ++ y0obx4 = vmulq_n_s32(r0obx4, cry); ++ y0obx4 = vmlaq_n_s32(y0obx4, g0obx4, cgy); ++ y0obx4 = vmlaq_n_s32(y0obx4, b0obx4, cby); ++ y0obx4 = vaddq_s32(y0obx4, out_rndx4); ++ y0obx4 = vshrq_n_s32(y0obx4, 21); ++ y0obx4 = vaddq_s32(y0obx4, out_yuv_offx4); ++ ++ y0ox8 = vcombine_s16(vqmovn_s32(y0oax4), vqmovn_s32(y0obx4)); ++ vst1_u8(&dsty[x], vqmovun_s16(y0ox8)); ++ ++ r1ox8 = vld1q_s16(r1); ++ g1ox8 = vld1q_s16(g1); ++ b1ox8 = vld1q_s16(b1); ++ ++ r1oax4 = vmovl_s16(vget_low_s16(r1ox8)); ++ g1oax4 = vmovl_s16(vget_low_s16(g1ox8)); ++ b1oax4 = vmovl_s16(vget_low_s16(b1ox8)); ++ ++ r1obx4 = vmovl_s16(vget_high_s16(r1ox8)); ++ g1obx4 = vmovl_s16(vget_high_s16(g1ox8)); ++ b1obx4 = vmovl_s16(vget_high_s16(b1ox8)); ++ ++ y1oax4 = vmulq_n_s32(r1oax4, cry); ++ y1oax4 = vmlaq_n_s32(y1oax4, g1oax4, cgy); ++ y1oax4 = vmlaq_n_s32(y1oax4, b1oax4, cby); ++ y1oax4 = vaddq_s32(y1oax4, out_rndx4); ++ y1oax4 = vshrq_n_s32(y1oax4, 21); ++ y1oax4 = vaddq_s32(y1oax4, out_yuv_offx4); ++ ++ y1obx4 = vmulq_n_s32(r1obx4, cry); ++ y1obx4 = vmlaq_n_s32(y1obx4, g1obx4, cgy); ++ y1obx4 = vmlaq_n_s32(y1obx4, b1obx4, cby); ++ y1obx4 = vaddq_s32(y1obx4, out_rndx4); ++ y1obx4 = vshrq_n_s32(y1obx4, 21); ++ y1obx4 = vaddq_s32(y1obx4, out_yuv_offx4); ++ ++ y1ox8 = vcombine_s16(vqmovn_s32(y1oax4), vqmovn_s32(y1obx4)); ++ vst1_u8(&dsty[x + dstlinesize[0]], vqmovun_s16(y1ox8)); ++ ++ ravgax2 = vpadd_s32(vget_low_s32(r0oax4), vget_high_s32(r0oax4)); ++ ravgbx2 = vpadd_s32(vget_low_s32(r0obx4), vget_high_s32(r0obx4)); ++ ravgx4 = vcombine_s32(ravgax2, ravgbx2); ++ ravgax2 = vpadd_s32(vget_low_s32(r1oax4), vget_high_s32(r1oax4)); ++ ravgbx2 = vpadd_s32(vget_low_s32(r1obx4), vget_high_s32(r1obx4)); ++ ravgx4 = vaddq_s32(ravgx4, vcombine_s32(ravgax2, ravgbx2)); ++ ravgx4 = vaddq_s32(ravgx4, rgb_avg_rndx4); ++ ravgx4 = vshrq_n_s32(ravgx4, 2); ++ ++ gavgax2 = vpadd_s32(vget_low_s32(g0oax4), vget_high_s32(g0oax4)); ++ gavgbx2 = vpadd_s32(vget_low_s32(g0obx4), vget_high_s32(g0obx4)); ++ gavgx4 = vcombine_s32(gavgax2, gavgbx2); ++ gavgax2 = vpadd_s32(vget_low_s32(g1oax4), vget_high_s32(g1oax4)); ++ gavgbx2 = vpadd_s32(vget_low_s32(g1obx4), vget_high_s32(g1obx4)); ++ gavgx4 = vaddq_s32(gavgx4, vcombine_s32(gavgax2, gavgbx2)); ++ gavgx4 = vaddq_s32(gavgx4, rgb_avg_rndx4); ++ gavgx4 = vshrq_n_s32(gavgx4, 2); ++ ++ bavgax2 = vpadd_s32(vget_low_s32(b0oax4), vget_high_s32(b0oax4)); ++ bavgbx2 = vpadd_s32(vget_low_s32(b0obx4), vget_high_s32(b0obx4)); ++ bavgx4 = vcombine_s32(bavgax2, bavgbx2); ++ bavgax2 = vpadd_s32(vget_low_s32(b1oax4), vget_high_s32(b1oax4)); ++ bavgbx2 = vpadd_s32(vget_low_s32(b1obx4), vget_high_s32(b1obx4)); ++ bavgx4 = vaddq_s32(bavgx4, vcombine_s32(bavgax2, bavgbx2)); ++ bavgx4 = vaddq_s32(bavgx4, rgb_avg_rndx4); ++ bavgx4 = vshrq_n_s32(bavgx4, 2); ++ ++ uox4 = vmlaq_n_s32(out_rndx4, ravgx4, cru); ++ uox4 = vmlaq_n_s32(uox4, gavgx4, ocgu); ++ uox4 = vmlaq_n_s32(uox4, bavgx4, cburv); ++ uox4 = vshrq_n_s32(uox4, 21); ++ uox4 = vaddq_s32(uox4, out_uv_offsetx4); ++ ++ vox4 = vmlaq_n_s32(out_rndx4, ravgx4, cburv); ++ vox4 = vmlaq_n_s32(vox4, gavgx4, ocgv); ++ vox4 = vmlaq_n_s32(vox4, bavgx4, cbv); ++ vox4 = vshrq_n_s32(vox4, 21); ++ vox4 = vaddq_s32(vox4, out_uv_offsetx4); ++ ++ uvoax4 = vzip1q_s32(uox4, vox4); ++ uvobx4 = vzip2q_s32(uox4, vox4); ++ ++ vst1_u8(&dstuv[x], vqmovun_s16(vcombine_s16(vmovn_s32(uvoax4), vmovn_s32(uvobx4)))); ++ } ++ } ++ ++ // Process remaining pixels cannot fill the full simd register with scalar version ++ if (remainw) { ++ int offset = width & (int)0xfffffff8; ++ rdsty += offset; ++ rdstuv += offset; ++ rsrcy += offset; ++ rsrcuv += offset; ++ tonemap_frame_p016_p010_2_nv12(rdsty, rdstuv, ++ rsrcy, rsrcuv, ++ dstlinesize, srclinesize, ++ dstdepth, srcdepth, ++ remainw, rheight, params); ++ } ++#endif // ENABLE_TONEMAPX_NEON_INTRINSICS ++} ++ ++void tonemap_frame_dovi_2_420p10_neon(uint16_t *dsty, uint16_t *dstu, uint16_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++#ifdef ENABLE_TONEMAPX_NEON_INTRINSICS ++ uint16_t *rdsty = dsty; ++ uint16_t *rdstu = dstu; ++ uint16_t *rdstv = dstv; ++ const uint16_t *rsrcy = srcy; ++ const uint16_t *rsrcu = srcu; ++ const uint16_t *rsrcv = srcv; ++ int rheight = height; ++ // not zero when not divisible by 8 ++ // intentionally leave last pixel emtpy when input is odd ++ int remainw = width & 6; ++ ++ const int in_depth = srcdepth; ++ const float in_rng = (float)((1 << in_depth) - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int16_t r[8], g[8], b[8]; ++ int16_t r1[8], g1[8], b1[8]; ++ uint16x4_t ux4, vx4; ++ uint16x8_t y0x8, y1x8, ux8, vx8; ++ uint16x8_t r0x8, g0x8, b0x8; ++ uint16x8_t r1x8, g1x8, b1x8; ++ ++ int16x8_t r0ox8, g0ox8, b0ox8; ++ uint16x8_t y0ox8; ++ int32x4_t r0oax4, r0obx4, g0oax4, g0obx4, b0oax4, b0obx4; ++ int32x4_t y0oax4, y0obx4; ++ ++ int16x8_t r1ox8, g1ox8, b1ox8; ++ uint16x8_t y1ox8; ++ int32x4_t r1oax4, r1obx4, g1oax4, g1obx4, b1oax4, b1obx4; ++ int32x4_t y1oax4, y1obx4; ++ int32x2_t ravgax2, gavgax2, bavgax2, ravgbx2, gavgbx2, bavgbx2; ++ int32x4_t ravgx4, gavgx4, bavgx4, uox4, vox4; ++ int32x4_t out_yuv_offx4 = vdupq_n_s32(params->out_yuv_off); ++ int32x4_t out_rndx4 = vdupq_n_s32(out_rnd); ++ int32x4_t out_uv_offsetx4 = vdupq_n_s32(out_uv_offset); ++ int32x4_t rgb_avg_rndx4 = vdupq_n_s32(2); ++ float32x4_t ipt0, ipt1, ipt2, ipt3; ++ float32x4_t ia1, ib1, ia2, ib2; ++ float32x4_t ix4, px4, tx4; ++ float32x4_t lx4, mx4, sx4; ++ float32x4_t rx4a, gx4a, bx4a, rx4b, gx4b, bx4b; ++ float32x4_t y0x4a, y0x4b, y1x4a, y1x4b, ux4a, ux4b, vx4a, vx4b; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0], dstu += dstlinesize[1] / 2, dstv += dstlinesize[1] / 2, ++ srcy += srclinesize[0], srcu += srclinesize[1] / 2, srcv += srclinesize[1] / 2) { ++ for (int xx = 0; xx < width >> 3; xx++) { ++ int x = xx << 3; ++ ++ y0x8 = vld1q_u16(srcy + x); ++ y1x8 = vld1q_u16(srcy + (srclinesize[0] / 2 + x)); ++ ux4 = vld1_u16(srcu + (x >> 1)); ++ vx4 = vld1_u16(srcv + (x >> 1)); ++ ++ ux8 = vcombine_u16(vzip1_u16(ux4, ux4), vzip2_u16(ux4, ux4)); ++ vx8 = vcombine_u16(vzip1_u16(vx4, vx4), vzip2_u16(vx4, vx4)); ++ ++ y0x4a = vcvtq_f32_u32(vmovl_u16(vget_low_u16(y0x8))); ++ y0x4b = vcvtq_f32_u32(vmovl_u16(vget_high_u16(y0x8))); ++ y1x4a = vcvtq_f32_u32(vmovl_u16(vget_low_u16(y1x8))); ++ y1x4b = vcvtq_f32_u32(vmovl_u16(vget_high_u16(y1x8))); ++ ++ ux4a = vcvtq_f32_u32(vmovl_u16(vget_low_u16(ux8))); ++ ux4b = vcvtq_f32_u32(vmovl_u16(vget_high_u16(ux8))); ++ vx4a = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vx8))); ++ vx4b = vcvtq_f32_u32(vmovl_u16(vget_high_u16(vx8))); ++ ++ y0x4a = vdivq_f32(y0x4a, vdupq_n_f32(in_rng)); ++ y0x4b = vdivq_f32(y0x4b, vdupq_n_f32(in_rng)); ++ y1x4a = vdivq_f32(y1x4a, vdupq_n_f32(in_rng)); ++ y1x4b = vdivq_f32(y1x4b, vdupq_n_f32(in_rng)); ++ ux4a = vdivq_f32(ux4a, vdupq_n_f32(in_rng)); ++ ux4b = vdivq_f32(ux4b, vdupq_n_f32(in_rng)); ++ vx4a = vdivq_f32(vx4a, vdupq_n_f32(in_rng)); ++ vx4b = vdivq_f32(vx4b, vdupq_n_f32(in_rng)); ++ ++ // Reshape y0x4a ++ ia1 = vzip1q_f32(y0x4a, ux4a); ++ ia2 = vzip2q_f32(y0x4a, ux4a); ++ ib1 = vzip1q_f32(vx4a, vdupq_n_f32(0.0f)); ++ ib2 = vzip2q_f32(vx4a, vdupq_n_f32(0.0f)); ++ ipt0 = vcombine_f32(vget_low_f32(ia1), vget_low_f32(ib1)); ++ ipt1 = vcombine_f32(vget_high_f32(ia1), vget_high_f32(ib1)); ++ ipt2 = vcombine_f32(vget_low_f32(ia2), vget_low_f32(ib2)); ++ ipt3 = vcombine_f32(vget_high_f32(ia2), vget_high_f32(ib2)); ++ ++ ipt0 = reshape_dovi_iptpqc2(ipt0, params); ++ ipt1 = reshape_dovi_iptpqc2(ipt1, params); ++ ipt2 = reshape_dovi_iptpqc2(ipt2, params); ++ ipt3 = reshape_dovi_iptpqc2(ipt3, params); ++ ++ ia1 = vtrn1q_f32(ipt0, ipt1); ++ ia2 = vtrn1q_f32(ipt2, ipt3); ++ ib1 = vtrn2q_f32(ipt0, ipt1); ++ ib2 = vtrn2q_f32(ipt2, ipt3); ++ ++ ix4 = vcombine_f32(vget_low_f32(ia1), vget_low_f32(ia2)); ++ px4 = vcombine_f32(vget_low_f32(ib1), vget_low_f32(ib2)); ++ tx4 = vcombine_f32(vget_high_f32(ia1), vget_high_f32(ia2)); ++ ++ ycc2rgbx4(&lx4, &mx4, &sx4, ix4, px4, tx4, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx4(&rx4a, &gx4a, &bx4a, lx4, mx4, sx4, *params->lms2rgb_matrix); ++ ++ rx4a = vmulq_n_f32(rx4a, 28672.0f); ++ gx4a = vmulq_n_f32(gx4a, 28672.0f); ++ bx4a = vmulq_n_f32(bx4a, 28672.0f); ++ ++ // Reshape y0x4b ++ ia1 = vzip1q_f32(y0x4b, ux4b); ++ ia2 = vzip2q_f32(y0x4b, ux4b); ++ ib1 = vzip1q_f32(vx4b, vdupq_n_f32(0.0f)); ++ ib2 = vzip2q_f32(vx4b, vdupq_n_f32(0.0f)); ++ ipt0 = vcombine_f32(vget_low_f32(ia1), vget_low_f32(ib1)); ++ ipt1 = vcombine_f32(vget_high_f32(ia1), vget_high_f32(ib1)); ++ ipt2 = vcombine_f32(vget_low_f32(ia2), vget_low_f32(ib2)); ++ ipt3 = vcombine_f32(vget_high_f32(ia2), vget_high_f32(ib2)); ++ ++ ipt0 = reshape_dovi_iptpqc2(ipt0, params); ++ ipt1 = reshape_dovi_iptpqc2(ipt1, params); ++ ipt2 = reshape_dovi_iptpqc2(ipt2, params); ++ ipt3 = reshape_dovi_iptpqc2(ipt3, params); ++ ++ ia1 = vtrn1q_f32(ipt0, ipt1); ++ ia2 = vtrn1q_f32(ipt2, ipt3); ++ ib1 = vtrn2q_f32(ipt0, ipt1); ++ ib2 = vtrn2q_f32(ipt2, ipt3); ++ ++ ix4 = vcombine_f32(vget_low_f32(ia1), vget_low_f32(ia2)); ++ px4 = vcombine_f32(vget_low_f32(ib1), vget_low_f32(ib2)); ++ tx4 = vcombine_f32(vget_high_f32(ia1), vget_high_f32(ia2)); ++ ++ ycc2rgbx4(&lx4, &mx4, &sx4, ix4, px4, tx4, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx4(&rx4b, &gx4b, &bx4b, lx4, mx4, sx4, *params->lms2rgb_matrix); ++ ++ rx4b = vmulq_n_f32(rx4b, 28672.0f); ++ gx4b = vmulq_n_f32(gx4b, 28672.0f); ++ bx4b = vmulq_n_f32(bx4b, 28672.0f); ++ ++ r0x8 = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(rx4a)), vqmovn_u32(vcvtq_u32_f32(rx4b))); ++ g0x8 = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(gx4a)), vqmovn_u32(vcvtq_u32_f32(gx4b))); ++ b0x8 = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(bx4a)), vqmovn_u32(vcvtq_u32_f32(bx4b))); ++ ++ // Reshape y1x4a ++ ia1 = vzip1q_f32(y1x4a, ux4a); ++ ia2 = vzip2q_f32(y1x4a, ux4a); ++ ib1 = vzip1q_f32(vx4a, vdupq_n_f32(0.0f)); ++ ib2 = vzip2q_f32(vx4a, vdupq_n_f32(0.0f)); ++ ipt0 = vcombine_f32(vget_low_f32(ia1), vget_low_f32(ib1)); ++ ipt1 = vcombine_f32(vget_high_f32(ia1), vget_high_f32(ib1)); ++ ipt2 = vcombine_f32(vget_low_f32(ia2), vget_low_f32(ib2)); ++ ipt3 = vcombine_f32(vget_high_f32(ia2), vget_high_f32(ib2)); ++ ++ ipt0 = reshape_dovi_iptpqc2(ipt0, params); ++ ipt1 = reshape_dovi_iptpqc2(ipt1, params); ++ ipt2 = reshape_dovi_iptpqc2(ipt2, params); ++ ipt3 = reshape_dovi_iptpqc2(ipt3, params); ++ ++ ia1 = vtrn1q_f32(ipt0, ipt1); ++ ia2 = vtrn1q_f32(ipt2, ipt3); ++ ib1 = vtrn2q_f32(ipt0, ipt1); ++ ib2 = vtrn2q_f32(ipt2, ipt3); ++ ++ ix4 = vcombine_f32(vget_low_f32(ia1), vget_low_f32(ia2)); ++ px4 = vcombine_f32(vget_low_f32(ib1), vget_low_f32(ib2)); ++ tx4 = vcombine_f32(vget_high_f32(ia1), vget_high_f32(ia2)); ++ ++ ycc2rgbx4(&lx4, &mx4, &sx4, ix4, px4, tx4, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx4(&rx4a, &gx4a, &bx4a, lx4, mx4, sx4, *params->lms2rgb_matrix); ++ ++ rx4a = vmulq_n_f32(rx4a, 28672.0f); ++ gx4a = vmulq_n_f32(gx4a, 28672.0f); ++ bx4a = vmulq_n_f32(bx4a, 28672.0f); ++ ++ // Reshape y1x4b ++ ia1 = vzip1q_f32(y1x4b, ux4b); ++ ia2 = vzip2q_f32(y1x4b, ux4b); ++ ib1 = vzip1q_f32(vx4b, vdupq_n_f32(0.0f)); ++ ib2 = vzip2q_f32(vx4b, vdupq_n_f32(0.0f)); ++ ipt0 = vcombine_f32(vget_low_f32(ia1), vget_low_f32(ib1)); ++ ipt1 = vcombine_f32(vget_high_f32(ia1), vget_high_f32(ib1)); ++ ipt2 = vcombine_f32(vget_low_f32(ia2), vget_low_f32(ib2)); ++ ipt3 = vcombine_f32(vget_high_f32(ia2), vget_high_f32(ib2)); ++ ++ ipt0 = reshape_dovi_iptpqc2(ipt0, params); ++ ipt1 = reshape_dovi_iptpqc2(ipt1, params); ++ ipt2 = reshape_dovi_iptpqc2(ipt2, params); ++ ipt3 = reshape_dovi_iptpqc2(ipt3, params); ++ ++ ia1 = vtrn1q_f32(ipt0, ipt1); ++ ia2 = vtrn1q_f32(ipt2, ipt3); ++ ib1 = vtrn2q_f32(ipt0, ipt1); ++ ib2 = vtrn2q_f32(ipt2, ipt3); ++ ++ ix4 = vcombine_f32(vget_low_f32(ia1), vget_low_f32(ia2)); ++ px4 = vcombine_f32(vget_low_f32(ib1), vget_low_f32(ib2)); ++ tx4 = vcombine_f32(vget_high_f32(ia1), vget_high_f32(ia2)); ++ ++ ycc2rgbx4(&lx4, &mx4, &sx4, ix4, px4, tx4, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx4(&rx4b, &gx4b, &bx4b, lx4, mx4, sx4, *params->lms2rgb_matrix); ++ ++ rx4b = vmulq_n_f32(rx4b, 28672.0f); ++ gx4b = vmulq_n_f32(gx4b, 28672.0f); ++ bx4b = vmulq_n_f32(bx4b, 28672.0f); ++ ++ r1x8 = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(rx4a)), vqmovn_u32(vcvtq_u32_f32(rx4b))); ++ g1x8 = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(gx4a)), vqmovn_u32(vcvtq_u32_f32(gx4b))); ++ b1x8 = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(bx4a)), vqmovn_u32(vcvtq_u32_f32(bx4b))); ++ ++ tonemap_int16x8_neon(r0x8, g0x8, b0x8, (int16_t *) &r, (int16_t *) &g, (int16_t *) &b, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int16x8_neon(r1x8, g1x8, b1x8, (int16_t *) &r1, (int16_t *) &g1, (int16_t *) &b1, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ ++ r0ox8 = vld1q_s16(r); ++ g0ox8 = vld1q_s16(g); ++ b0ox8 = vld1q_s16(b); ++ ++ r0oax4 = vmovl_s16(vget_low_s16(r0ox8)); ++ g0oax4 = vmovl_s16(vget_low_s16(g0ox8)); ++ b0oax4 = vmovl_s16(vget_low_s16(b0ox8)); ++ ++ r0obx4 = vmovl_s16(vget_high_s16(r0ox8)); ++ g0obx4 = vmovl_s16(vget_high_s16(g0ox8)); ++ b0obx4 = vmovl_s16(vget_high_s16(b0ox8)); ++ ++ y0oax4 = vmulq_n_s32(r0oax4, cry); ++ y0oax4 = vmlaq_n_s32(y0oax4, g0oax4, cgy); ++ y0oax4 = vmlaq_n_s32(y0oax4, b0oax4, cby); ++ y0oax4 = vaddq_s32(y0oax4, out_rndx4); ++ y0oax4 = vshrq_n_s32(y0oax4, 19); ++ y0oax4 = vaddq_s32(y0oax4, out_yuv_offx4); ++ ++ y0obx4 = vmulq_n_s32(r0obx4, cry); ++ y0obx4 = vmlaq_n_s32(y0obx4, g0obx4, cgy); ++ y0obx4 = vmlaq_n_s32(y0obx4, b0obx4, cby); ++ y0obx4 = vaddq_s32(y0obx4, out_rndx4); ++ y0obx4 = vshrq_n_s32(y0obx4, 19); ++ y0obx4 = vaddq_s32(y0obx4, out_yuv_offx4); ++ ++ y0ox8 = vcombine_u16(vqmovun_s32(y0oax4), vqmovun_s32(y0obx4)); ++ vst1q_u16(&dsty[x], y0ox8); ++ ++ r1ox8 = vld1q_s16(r1); ++ g1ox8 = vld1q_s16(g1); ++ b1ox8 = vld1q_s16(b1); ++ ++ r1oax4 = vmovl_s16(vget_low_s16(r1ox8)); ++ g1oax4 = vmovl_s16(vget_low_s16(g1ox8)); ++ b1oax4 = vmovl_s16(vget_low_s16(b1ox8)); ++ ++ r1obx4 = vmovl_s16(vget_high_s16(r1ox8)); ++ g1obx4 = vmovl_s16(vget_high_s16(g1ox8)); ++ b1obx4 = vmovl_s16(vget_high_s16(b1ox8)); ++ ++ y1oax4 = vmulq_n_s32(r1oax4, cry); ++ y1oax4 = vmlaq_n_s32(y1oax4, g1oax4, cgy); ++ y1oax4 = vmlaq_n_s32(y1oax4, b1oax4, cby); ++ y1oax4 = vaddq_s32(y1oax4, out_rndx4); ++ y1oax4 = vshrq_n_s32(y1oax4, 19); ++ y1oax4 = vaddq_s32(y1oax4, out_yuv_offx4); ++ ++ y1obx4 = vmulq_n_s32(r1obx4, cry); ++ y1obx4 = vmlaq_n_s32(y1obx4, g1obx4, cgy); ++ y1obx4 = vmlaq_n_s32(y1obx4, b1obx4, cby); ++ y1obx4 = vaddq_s32(y1obx4, out_rndx4); ++ y1obx4 = vshrq_n_s32(y1obx4, 19); ++ y1obx4 = vaddq_s32(y1obx4, out_yuv_offx4); ++ ++ y1ox8 = vcombine_u16(vqmovun_s32(y1oax4), vqmovun_s32(y1obx4)); ++ vst1q_u16(&dsty[x + dstlinesize[0] / 2], y1ox8); ++ ++ ravgax2 = vpadd_s32(vget_low_s32(r0oax4), vget_high_s32(r0oax4)); ++ ravgbx2 = vpadd_s32(vget_low_s32(r0obx4), vget_high_s32(r0obx4)); ++ ravgx4 = vcombine_s32(ravgax2, ravgbx2); ++ ravgax2 = vpadd_s32(vget_low_s32(r1oax4), vget_high_s32(r1oax4)); ++ ravgbx2 = vpadd_s32(vget_low_s32(r1obx4), vget_high_s32(r1obx4)); ++ ravgx4 = vaddq_s32(ravgx4, vcombine_s32(ravgax2, ravgbx2)); ++ ravgx4 = vaddq_s32(ravgx4, rgb_avg_rndx4); ++ ravgx4 = vshrq_n_s32(ravgx4, 2); ++ ++ gavgax2 = vpadd_s32(vget_low_s32(g0oax4), vget_high_s32(g0oax4)); ++ gavgbx2 = vpadd_s32(vget_low_s32(g0obx4), vget_high_s32(g0obx4)); ++ gavgx4 = vcombine_s32(gavgax2, gavgbx2); ++ gavgax2 = vpadd_s32(vget_low_s32(g1oax4), vget_high_s32(g1oax4)); ++ gavgbx2 = vpadd_s32(vget_low_s32(g1obx4), vget_high_s32(g1obx4)); ++ gavgx4 = vaddq_s32(gavgx4, vcombine_s32(gavgax2, gavgbx2)); ++ gavgx4 = vaddq_s32(gavgx4, rgb_avg_rndx4); ++ gavgx4 = vshrq_n_s32(gavgx4, 2); ++ ++ bavgax2 = vpadd_s32(vget_low_s32(b0oax4), vget_high_s32(b0oax4)); ++ bavgbx2 = vpadd_s32(vget_low_s32(b0obx4), vget_high_s32(b0obx4)); ++ bavgx4 = vcombine_s32(bavgax2, bavgbx2); ++ bavgax2 = vpadd_s32(vget_low_s32(b1oax4), vget_high_s32(b1oax4)); ++ bavgbx2 = vpadd_s32(vget_low_s32(b1obx4), vget_high_s32(b1obx4)); ++ bavgx4 = vaddq_s32(bavgx4, vcombine_s32(bavgax2, bavgbx2)); ++ bavgx4 = vaddq_s32(bavgx4, rgb_avg_rndx4); ++ bavgx4 = vshrq_n_s32(bavgx4, 2); ++ ++ uox4 = vmlaq_n_s32(out_rndx4, ravgx4, cru); ++ uox4 = vmlaq_n_s32(uox4, gavgx4, ocgu); ++ uox4 = vmlaq_n_s32(uox4, bavgx4, cburv); ++ uox4 = vshrq_n_s32(uox4, 19); ++ uox4 = vaddq_s32(uox4, out_uv_offsetx4); ++ vst1_u16(&dstu[x >> 1], vqmovun_s32(uox4)); ++ ++ vox4 = vmlaq_n_s32(out_rndx4, ravgx4, cburv); ++ vox4 = vmlaq_n_s32(vox4, gavgx4, ocgv); ++ vox4 = vmlaq_n_s32(vox4, bavgx4, cbv); ++ vox4 = vshrq_n_s32(vox4, 19); ++ vox4 = vaddq_s32(vox4, out_uv_offsetx4); ++ vst1_u16(&dstv[x >> 1], vqmovun_s32(vox4)); ++ } ++ } ++ ++ // Process remaining pixels cannot fill the full simd register with scalar version ++ if (remainw) { ++ int offset = width & (int)0xfffffff8; ++ rdsty += offset; ++ rdstu += offset >> 1; ++ rdstv += offset >> 1; ++ rsrcy += offset; ++ rsrcu += offset >> 1; ++ rsrcv += offset >> 1; ++ tonemap_frame_dovi_2_420p10(rdsty, rdstu, rdstv, ++ rsrcy, rsrcu, rsrcv, ++ dstlinesize, srclinesize, ++ dstdepth, srcdepth, ++ remainw, rheight, params); ++ } ++#endif // ENABLE_TONEMAPX_NEON_INTRINSICS ++} ++ ++void tonemap_frame_420p10_2_420p10_neon(uint16_t *dsty, uint16_t *dstu, uint16_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++#ifdef ENABLE_TONEMAPX_NEON_INTRINSICS ++ uint16_t *rdsty = dsty; ++ uint16_t *rdstu = dstu; ++ uint16_t *rdstv = dstv; ++ const uint16_t *rsrcy = srcy; ++ const uint16_t *rsrcu = srcu; ++ const uint16_t *rsrcv = srcv; ++ int rheight = height; ++ // not zero when not divisible by 8 ++ // intentionally leave last pixel emtpy when input is odd ++ int remainw = width & 6; ++ ++ const int in_depth = srcdepth; ++ const int in_uv_offset = 128 << (in_depth - 8); ++ const int in_sh = in_depth - 1; ++ const int in_rnd = 1 << (in_sh - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cy = (*params->yuv2rgb_coeffs)[0][0][0]; ++ int crv = (*params->yuv2rgb_coeffs)[0][2][0]; ++ int cgu = (*params->yuv2rgb_coeffs)[1][1][0]; ++ int cgv = (*params->yuv2rgb_coeffs)[1][2][0]; ++ int cbu = (*params->yuv2rgb_coeffs)[2][1][0]; ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int16_t r[8], g[8], b[8]; ++ int16_t r1[8], g1[8], b1[8]; ++ uint16_t cy_shifted = av_clip_int16(cy >> in_sh); ++ uint16_t rnd_shifted = av_clip_int16(in_rnd >> in_sh); ++ uint16_t crv_shifted = av_clip_int16(crv >> in_sh); ++ uint16_t cgu_shifted = av_clip_int16(cgu >> in_sh); ++ uint16_t cgv_shifted = av_clip_int16(cgv >> in_sh); ++ uint16_t cbu_shifted = av_clip_int16(cbu >> in_sh); ++ uint16x8_t rndx8 = vdupq_n_u16(rnd_shifted); ++ uint16x8_t in_yuv_offx8 = vdupq_n_u16(av_clip_int16(params->in_yuv_off)); ++ uint16x8_t in_uv_offx8 = vdupq_n_u16(av_clip_int16(in_uv_offset)); ++ uint16x4_t ux4, vx4; ++ uint16x8_t y0x8, y1x8, ux8, vx8; ++ uint16x8_t r0x8, g0x8, b0x8; ++ uint16x8_t r1x8, g1x8, b1x8; ++ ++ int16x8_t r0ox8, g0ox8, b0ox8; ++ uint16x8_t y0ox8; ++ int32x4_t r0oax4, r0obx4, g0oax4, g0obx4, b0oax4, b0obx4; ++ int32x4_t y0oax4, y0obx4; ++ ++ int16x8_t r1ox8, g1ox8, b1ox8; ++ uint16x8_t y1ox8; ++ int32x4_t r1oax4, r1obx4, g1oax4, g1obx4, b1oax4, b1obx4; ++ int32x4_t y1oax4, y1obx4; ++ int32x2_t ravgax2, gavgax2, bavgax2, ravgbx2, gavgbx2, bavgbx2; ++ int32x4_t ravgx4, gavgx4, bavgx4, uox4, vox4; ++ int32x4_t out_yuv_offx4 = vdupq_n_s32(params->out_yuv_off); ++ int32x4_t out_rndx4 = vdupq_n_s32(out_rnd); ++ int32x4_t out_uv_offsetx4 = vdupq_n_s32(out_uv_offset); ++ int32x4_t rgb_avg_rndx4 = vdupq_n_s32(2); ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0], dstu += dstlinesize[1] / 2, dstv += dstlinesize[1] / 2, ++ srcy += srclinesize[0], srcu += srclinesize[1] / 2, srcv += srclinesize[1] / 2) { ++ for (int xx = 0; xx < width >> 3; xx++) { ++ int x = xx << 3; ++ ++ y0x8 = vld1q_u16(srcy + x); ++ y1x8 = vld1q_u16(srcy + (srclinesize[0] / 2 + x)); ++ ux4 = vld1_u16(srcu + (x >> 1)); ++ vx4 = vld1_u16(srcv + (x >> 1)); ++ y0x8 = vsubq_u16(y0x8, in_yuv_offx8); ++ y1x8 = vsubq_u16(y1x8, in_yuv_offx8); ++ ++ ux8 = vcombine_u16(vzip1_u16(ux4, ux4), vzip2_u16(ux4, ux4)); ++ ux8 = vsubq_u16(ux8, in_uv_offx8); ++ vx8 = vcombine_u16(vzip1_u16(vx4, vx4), vzip2_u16(vx4, vx4)); ++ vx8 = vsubq_u16(vx8, in_uv_offx8); ++ ++ r0x8 = g0x8 = b0x8 = vmulq_n_u16(y0x8, cy_shifted); ++ r0x8 = vmlaq_n_u16(r0x8, vx8, crv_shifted); ++ r0x8 = vaddq_u16(r0x8, rndx8); ++ ++ g0x8 = vmlaq_n_u16(g0x8, ux8, cgu_shifted); ++ g0x8 = vmlaq_n_u16(g0x8, vx8, cgv_shifted); ++ g0x8 = vaddq_u16(g0x8, rndx8); ++ ++ b0x8 = vmlaq_n_u16(b0x8, ux8, cbu_shifted); ++ b0x8 = vaddq_u16(b0x8, rndx8); ++ ++ r1x8 = g1x8 = b1x8 = vmulq_n_u16(y1x8, cy_shifted); ++ r1x8 = vmlaq_n_u16(r1x8, vx8, crv_shifted); ++ r1x8 = vaddq_u16(r1x8, rndx8); ++ ++ g1x8 = vmlaq_n_u16(g1x8, ux8, cgu_shifted); ++ g1x8 = vmlaq_n_u16(g1x8, vx8, cgv_shifted); ++ g1x8 = vaddq_u16(g1x8, rndx8); ++ ++ b1x8 = vmlaq_n_u16(b1x8, ux8, cbu_shifted); ++ b1x8 = vaddq_u16(b1x8, rndx8); ++ ++ tonemap_int16x8_neon(r0x8, g0x8, b0x8, (int16_t *) &r, (int16_t *) &g, (int16_t *) &b, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int16x8_neon(r1x8, g1x8, b1x8, (int16_t *) &r1, (int16_t *) &g1, (int16_t *) &b1, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ ++ r0ox8 = vld1q_s16(r); ++ g0ox8 = vld1q_s16(g); ++ b0ox8 = vld1q_s16(b); ++ ++ r0oax4 = vmovl_s16(vget_low_s16(r0ox8)); ++ g0oax4 = vmovl_s16(vget_low_s16(g0ox8)); ++ b0oax4 = vmovl_s16(vget_low_s16(b0ox8)); ++ ++ r0obx4 = vmovl_s16(vget_high_s16(r0ox8)); ++ g0obx4 = vmovl_s16(vget_high_s16(g0ox8)); ++ b0obx4 = vmovl_s16(vget_high_s16(b0ox8)); ++ ++ y0oax4 = vmulq_n_s32(r0oax4, cry); ++ y0oax4 = vmlaq_n_s32(y0oax4, g0oax4, cgy); ++ y0oax4 = vmlaq_n_s32(y0oax4, b0oax4, cby); ++ y0oax4 = vaddq_s32(y0oax4, out_rndx4); ++ y0oax4 = vshrq_n_s32(y0oax4, 19); ++ y0oax4 = vaddq_s32(y0oax4, out_yuv_offx4); ++ ++ y0obx4 = vmulq_n_s32(r0obx4, cry); ++ y0obx4 = vmlaq_n_s32(y0obx4, g0obx4, cgy); ++ y0obx4 = vmlaq_n_s32(y0obx4, b0obx4, cby); ++ y0obx4 = vaddq_s32(y0obx4, out_rndx4); ++ y0obx4 = vshrq_n_s32(y0obx4, 19); ++ y0obx4 = vaddq_s32(y0obx4, out_yuv_offx4); ++ ++ y0ox8 = vcombine_u16(vqmovun_s32(y0oax4), vqmovun_s32(y0obx4)); ++ vst1q_u16(&dsty[x], y0ox8); ++ ++ r1ox8 = vld1q_s16(r1); ++ g1ox8 = vld1q_s16(g1); ++ b1ox8 = vld1q_s16(b1); ++ ++ r1oax4 = vmovl_s16(vget_low_s16(r1ox8)); ++ g1oax4 = vmovl_s16(vget_low_s16(g1ox8)); ++ b1oax4 = vmovl_s16(vget_low_s16(b1ox8)); ++ ++ r1obx4 = vmovl_s16(vget_high_s16(r1ox8)); ++ g1obx4 = vmovl_s16(vget_high_s16(g1ox8)); ++ b1obx4 = vmovl_s16(vget_high_s16(b1ox8)); ++ ++ y1oax4 = vmulq_n_s32(r1oax4, cry); ++ y1oax4 = vmlaq_n_s32(y1oax4, g1oax4, cgy); ++ y1oax4 = vmlaq_n_s32(y1oax4, b1oax4, cby); ++ y1oax4 = vaddq_s32(y1oax4, out_rndx4); ++ y1oax4 = vshrq_n_s32(y1oax4, 19); ++ y1oax4 = vaddq_s32(y1oax4, out_yuv_offx4); ++ ++ y1obx4 = vmulq_n_s32(r1obx4, cry); ++ y1obx4 = vmlaq_n_s32(y1obx4, g1obx4, cgy); ++ y1obx4 = vmlaq_n_s32(y1obx4, b1obx4, cby); ++ y1obx4 = vaddq_s32(y1obx4, out_rndx4); ++ y1obx4 = vshrq_n_s32(y1obx4, 19); ++ y1obx4 = vaddq_s32(y1obx4, out_yuv_offx4); ++ ++ y1ox8 = vcombine_u16(vqmovun_s32(y1oax4), vqmovun_s32(y1obx4)); ++ vst1q_u16(&dsty[x + dstlinesize[0] / 2], y1ox8); ++ ++ ravgax2 = vpadd_s32(vget_low_s32(r0oax4), vget_high_s32(r0oax4)); ++ ravgbx2 = vpadd_s32(vget_low_s32(r0obx4), vget_high_s32(r0obx4)); ++ ravgx4 = vcombine_s32(ravgax2, ravgbx2); ++ ravgax2 = vpadd_s32(vget_low_s32(r1oax4), vget_high_s32(r1oax4)); ++ ravgbx2 = vpadd_s32(vget_low_s32(r1obx4), vget_high_s32(r1obx4)); ++ ravgx4 = vaddq_s32(ravgx4, vcombine_s32(ravgax2, ravgbx2)); ++ ravgx4 = vaddq_s32(ravgx4, rgb_avg_rndx4); ++ ravgx4 = vshrq_n_s32(ravgx4, 2); ++ ++ gavgax2 = vpadd_s32(vget_low_s32(g0oax4), vget_high_s32(g0oax4)); ++ gavgbx2 = vpadd_s32(vget_low_s32(g0obx4), vget_high_s32(g0obx4)); ++ gavgx4 = vcombine_s32(gavgax2, gavgbx2); ++ gavgax2 = vpadd_s32(vget_low_s32(g1oax4), vget_high_s32(g1oax4)); ++ gavgbx2 = vpadd_s32(vget_low_s32(g1obx4), vget_high_s32(g1obx4)); ++ gavgx4 = vaddq_s32(gavgx4, vcombine_s32(gavgax2, gavgbx2)); ++ gavgx4 = vaddq_s32(gavgx4, rgb_avg_rndx4); ++ gavgx4 = vshrq_n_s32(gavgx4, 2); ++ ++ bavgax2 = vpadd_s32(vget_low_s32(b0oax4), vget_high_s32(b0oax4)); ++ bavgbx2 = vpadd_s32(vget_low_s32(b0obx4), vget_high_s32(b0obx4)); ++ bavgx4 = vcombine_s32(bavgax2, bavgbx2); ++ bavgax2 = vpadd_s32(vget_low_s32(b1oax4), vget_high_s32(b1oax4)); ++ bavgbx2 = vpadd_s32(vget_low_s32(b1obx4), vget_high_s32(b1obx4)); ++ bavgx4 = vaddq_s32(bavgx4, vcombine_s32(bavgax2, bavgbx2)); ++ bavgx4 = vaddq_s32(bavgx4, rgb_avg_rndx4); ++ bavgx4 = vshrq_n_s32(bavgx4, 2); ++ ++ uox4 = vmlaq_n_s32(out_rndx4, ravgx4, cru); ++ uox4 = vmlaq_n_s32(uox4, gavgx4, ocgu); ++ uox4 = vmlaq_n_s32(uox4, bavgx4, cburv); ++ uox4 = vshrq_n_s32(uox4, 19); ++ uox4 = vaddq_s32(uox4, out_uv_offsetx4); ++ vst1_u16(&dstu[x >> 1], vqmovun_s32(uox4)); ++ ++ vox4 = vmlaq_n_s32(out_rndx4, ravgx4, cburv); ++ vox4 = vmlaq_n_s32(vox4, gavgx4, ocgv); ++ vox4 = vmlaq_n_s32(vox4, bavgx4, cbv); ++ vox4 = vshrq_n_s32(vox4, 19); ++ vox4 = vaddq_s32(vox4, out_uv_offsetx4); ++ vst1_u16(&dstv[x >> 1], vqmovun_s32(vox4)); ++ } ++ } ++ ++ // Process remaining pixels cannot fill the full simd register with scalar version ++ if (remainw) { ++ int offset = width & (int)0xfffffff8; ++ rdsty += offset; ++ rdstu += offset >> 1; ++ rdstv += offset >> 1; ++ rsrcy += offset; ++ rsrcu += offset >> 1; ++ rsrcv += offset >> 1; ++ tonemap_frame_420p10_2_420p10(rdsty, rdstu, rdstv, ++ rsrcy, rsrcu, rsrcv, ++ dstlinesize, srclinesize, ++ dstdepth, srcdepth, ++ remainw, rheight, params); ++ } ++#endif // ENABLE_TONEMAPX_NEON_INTRINSICS ++} ++ ++void tonemap_frame_p016_p010_2_p016_p010_neon(uint16_t *dsty, uint16_t *dstuv, ++ const uint16_t *srcy, const uint16_t *srcuv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++#ifdef ENABLE_TONEMAPX_NEON_INTRINSICS ++ uint16_t *rdsty = dsty; ++ uint16_t *rdstuv = dstuv; ++ const uint16_t *rsrcy = srcy; ++ const uint16_t *rsrcuv = srcuv; ++ int rheight = height; ++ // not zero when not divisible by 8 ++ // intentionally leave last pixel emtpy when input is odd ++ int remainw = width & 6; ++ ++ const int in_depth = srcdepth; ++ const int in_uv_offset = 128 << (in_depth - 8); ++ const int in_sh = in_depth - 1; ++ const int in_rnd = 1 << (in_sh - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ const int out_sh2 = 16 - out_depth; ++ ++ int cy = (*params->yuv2rgb_coeffs)[0][0][0]; ++ int crv = (*params->yuv2rgb_coeffs)[0][2][0]; ++ int cgu = (*params->yuv2rgb_coeffs)[1][1][0]; ++ int cgv = (*params->yuv2rgb_coeffs)[1][2][0]; ++ int cbu = (*params->yuv2rgb_coeffs)[2][1][0]; ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int16_t r[8], g[8], b[8]; ++ int16_t r1[8], g1[8], b1[8]; ++ uint16_t cy_shifted = av_clip_int16(cy >> in_sh); ++ uint16_t rnd_shifted = av_clip_int16(in_rnd >> in_sh); ++ uint16_t crv_shifted = av_clip_int16(crv >> in_sh); ++ uint16_t cgu_shifted = av_clip_int16(cgu >> in_sh); ++ uint16_t cgv_shifted = av_clip_int16(cgv >> in_sh); ++ uint16_t cbu_shifted = av_clip_int16(cbu >> in_sh); ++ uint16x8_t rndx8 = vdupq_n_u16(rnd_shifted); ++ uint16x8_t in_yuv_offx8 = vdupq_n_u16(av_clip_int16(params->in_yuv_off)); ++ uint16x8_t in_uv_offx8 = vdupq_n_u16(av_clip_int16(in_uv_offset)); ++ uint16x8_t uvx8; ++ uint16x4_t ux2a, vx2a, ux2b, vx2b; ++ uint16x8_t y0x8, y1x8, ux8, vx8; ++ uint16x8_t r0x8, g0x8, b0x8; ++ uint16x8_t r1x8, g1x8, b1x8; ++ ++ int16x8_t r0ox8, g0ox8, b0ox8; ++ uint16x8_t y0ox8; ++ int32x4_t r0oax4, r0obx4, g0oax4, g0obx4, b0oax4, b0obx4; ++ int32x4_t y0oax4, y0obx4; ++ ++ int16x8_t r1ox8, g1ox8, b1ox8; ++ uint16x8_t y1ox8; ++ int32x4_t r1oax4, r1obx4, g1oax4, g1obx4, b1oax4, b1obx4; ++ int32x4_t y1oax4, y1obx4; ++ int32x4_t uvoax4, uvobx4; ++ int32x2_t ravgax2, gavgax2, bavgax2, ravgbx2, gavgbx2, bavgbx2; ++ int32x4_t ravgx4, gavgx4, bavgx4, uox4, vox4; ++ int32x4_t out_yuv_offx4 = vdupq_n_s32(params->out_yuv_off); ++ int32x4_t out_rndx4 = vdupq_n_s32(out_rnd); ++ int16x8_t out_sh2x8 = vdupq_n_s16(out_sh2); ++ int32x4_t out_uv_offsetx4 = vdupq_n_s32(out_uv_offset); ++ int32x4_t rgb_avg_rndx4 = vdupq_n_s32(2); ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0], dstuv += dstlinesize[1] / 2, ++ srcy += srclinesize[0], srcuv += srclinesize[1] / 2) { ++ for (int xx = 0; xx < width >> 3; xx++) { ++ int x = xx << 3; ++ ++ y0x8 = vld1q_u16(srcy + x); ++ y1x8 = vld1q_u16(srcy + (srclinesize[0] / 2 + x)); ++ uvx8 = vld1q_u16(srcuv + x); ++ if (in_depth == 10) { ++ // shift to low10bits for 10bit input ++ // shift bit has to be compile-time constant ++ y0x8 = vshrq_n_u16(y0x8, 6); ++ y1x8 = vshrq_n_u16(y1x8, 6); ++ uvx8 = vshrq_n_u16(uvx8, 6); ++ } ++ y0x8 = vsubq_u16(y0x8, in_yuv_offx8); ++ y1x8 = vsubq_u16(y1x8, in_yuv_offx8); ++ uvx8 = vsubq_u16(uvx8, in_uv_offx8); ++ ++ ux2a = vext_u16(vdup_lane_u16(vget_low_u16(uvx8), 0), vdup_lane_u16(vget_low_u16(uvx8), 2), 2); ++ vx2a = vext_u16(vdup_lane_u16(vget_low_u16(uvx8), 1), vdup_lane_u16(vget_low_u16(uvx8), 3), 2); ++ ux2b = vext_u16(vdup_lane_u16(vget_high_u16(uvx8), 0), vdup_lane_u16(vget_high_u16(uvx8), 2), 2); ++ vx2b = vext_u16(vdup_lane_u16(vget_high_u16(uvx8), 1), vdup_lane_u16(vget_high_u16(uvx8), 3), 2); ++ ++ ux8 = vcombine_u16(ux2a, ux2b); ++ vx8 = vcombine_u16(vx2a, vx2b); ++ ++ r0x8 = g0x8 = b0x8 = vmulq_n_u16(y0x8, cy_shifted); ++ r0x8 = vmlaq_n_u16(r0x8, vx8, crv_shifted); ++ r0x8 = vaddq_u16(r0x8, rndx8); ++ ++ g0x8 = vmlaq_n_u16(g0x8, ux8, cgu_shifted); ++ g0x8 = vmlaq_n_u16(g0x8, vx8, cgv_shifted); ++ g0x8 = vaddq_u16(g0x8, rndx8); ++ ++ b0x8 = vmlaq_n_u16(b0x8, ux8, cbu_shifted); ++ b0x8 = vaddq_u16(b0x8, rndx8); ++ ++ r1x8 = g1x8 = b1x8 = vmulq_n_u16(y1x8, cy_shifted); ++ r1x8 = vmlaq_n_u16(r1x8, vx8, crv_shifted); ++ r1x8 = vaddq_u16(r1x8, rndx8); ++ ++ g1x8 = vmlaq_n_u16(g1x8, ux8, cgu_shifted); ++ g1x8 = vmlaq_n_u16(g1x8, vx8, cgv_shifted); ++ g1x8 = vaddq_u16(g1x8, rndx8); ++ ++ b1x8 = vmlaq_n_u16(b1x8, ux8, cbu_shifted); ++ b1x8 = vaddq_u16(b1x8, rndx8); ++ ++ tonemap_int16x8_neon(r0x8, g0x8, b0x8, (int16_t *) &r, (int16_t *) &g, (int16_t *) &b, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int16x8_neon(r1x8, g1x8, b1x8, (int16_t *) &r1, (int16_t *) &g1, (int16_t *) &b1, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ ++ r0ox8 = vld1q_s16(r); ++ g0ox8 = vld1q_s16(g); ++ b0ox8 = vld1q_s16(b); ++ ++ r0oax4 = vmovl_s16(vget_low_s16(r0ox8)); ++ g0oax4 = vmovl_s16(vget_low_s16(g0ox8)); ++ b0oax4 = vmovl_s16(vget_low_s16(b0ox8)); ++ ++ r0obx4 = vmovl_s16(vget_high_s16(r0ox8)); ++ g0obx4 = vmovl_s16(vget_high_s16(g0ox8)); ++ b0obx4 = vmovl_s16(vget_high_s16(b0ox8)); ++ ++ y0oax4 = vmulq_n_s32(r0oax4, cry); ++ y0oax4 = vmlaq_n_s32(y0oax4, g0oax4, cgy); ++ y0oax4 = vmlaq_n_s32(y0oax4, b0oax4, cby); ++ y0oax4 = vaddq_s32(y0oax4, out_rndx4); ++ ++ y0obx4 = vmulq_n_s32(r0obx4, cry); ++ y0obx4 = vmlaq_n_s32(y0obx4, g0obx4, cgy); ++ y0obx4 = vmlaq_n_s32(y0obx4, b0obx4, cby); ++ y0obx4 = vaddq_s32(y0obx4, out_rndx4); ++ ++ r1ox8 = vld1q_s16(r1); ++ g1ox8 = vld1q_s16(g1); ++ b1ox8 = vld1q_s16(b1); ++ ++ r1oax4 = vmovl_s16(vget_low_s16(r1ox8)); ++ g1oax4 = vmovl_s16(vget_low_s16(g1ox8)); ++ b1oax4 = vmovl_s16(vget_low_s16(b1ox8)); ++ ++ r1obx4 = vmovl_s16(vget_high_s16(r1ox8)); ++ g1obx4 = vmovl_s16(vget_high_s16(g1ox8)); ++ b1obx4 = vmovl_s16(vget_high_s16(b1ox8)); ++ ++ y1oax4 = vmulq_n_s32(r1oax4, cry); ++ y1oax4 = vmlaq_n_s32(y1oax4, g1oax4, cgy); ++ y1oax4 = vmlaq_n_s32(y1oax4, b1oax4, cby); ++ y1oax4 = vaddq_s32(y1oax4, out_rndx4); ++ ++ y1obx4 = vmulq_n_s32(r1obx4, cry); ++ y1obx4 = vmlaq_n_s32(y1obx4, g1obx4, cgy); ++ y1obx4 = vmlaq_n_s32(y1obx4, b1obx4, cby); ++ y1obx4 = vaddq_s32(y1obx4, out_rndx4); ++ ++ ravgax2 = vpadd_s32(vget_low_s32(r0oax4), vget_high_s32(r0oax4)); ++ ravgbx2 = vpadd_s32(vget_low_s32(r0obx4), vget_high_s32(r0obx4)); ++ ravgx4 = vcombine_s32(ravgax2, ravgbx2); ++ ravgax2 = vpadd_s32(vget_low_s32(r1oax4), vget_high_s32(r1oax4)); ++ ravgbx2 = vpadd_s32(vget_low_s32(r1obx4), vget_high_s32(r1obx4)); ++ ravgx4 = vaddq_s32(ravgx4, vcombine_s32(ravgax2, ravgbx2)); ++ ravgx4 = vaddq_s32(ravgx4, rgb_avg_rndx4); ++ ravgx4 = vshrq_n_s32(ravgx4, 2); ++ ++ gavgax2 = vpadd_s32(vget_low_s32(g0oax4), vget_high_s32(g0oax4)); ++ gavgbx2 = vpadd_s32(vget_low_s32(g0obx4), vget_high_s32(g0obx4)); ++ gavgx4 = vcombine_s32(gavgax2, gavgbx2); ++ gavgax2 = vpadd_s32(vget_low_s32(g1oax4), vget_high_s32(g1oax4)); ++ gavgbx2 = vpadd_s32(vget_low_s32(g1obx4), vget_high_s32(g1obx4)); ++ gavgx4 = vaddq_s32(gavgx4, vcombine_s32(gavgax2, gavgbx2)); ++ gavgx4 = vaddq_s32(gavgx4, rgb_avg_rndx4); ++ gavgx4 = vshrq_n_s32(gavgx4, 2); ++ ++ bavgax2 = vpadd_s32(vget_low_s32(b0oax4), vget_high_s32(b0oax4)); ++ bavgbx2 = vpadd_s32(vget_low_s32(b0obx4), vget_high_s32(b0obx4)); ++ bavgx4 = vcombine_s32(bavgax2, bavgbx2); ++ bavgax2 = vpadd_s32(vget_low_s32(b1oax4), vget_high_s32(b1oax4)); ++ bavgbx2 = vpadd_s32(vget_low_s32(b1obx4), vget_high_s32(b1obx4)); ++ bavgx4 = vaddq_s32(bavgx4, vcombine_s32(bavgax2, bavgbx2)); ++ bavgx4 = vaddq_s32(bavgx4, rgb_avg_rndx4); ++ bavgx4 = vshrq_n_s32(bavgx4, 2); ++ ++ uox4 = vmlaq_n_s32(out_rndx4, ravgx4, cru); ++ uox4 = vmlaq_n_s32(uox4, gavgx4, ocgu); ++ uox4 = vmlaq_n_s32(uox4, bavgx4, cburv); ++ ++ vox4 = vmlaq_n_s32(out_rndx4, ravgx4, cburv); ++ vox4 = vmlaq_n_s32(vox4, gavgx4, ocgv); ++ vox4 = vmlaq_n_s32(vox4, bavgx4, cbv); ++ ++ switch(out_depth) { ++ default: ++ case 10: ++ y0oax4 = vshrq_n_s32(y0oax4, 19); ++ y0obx4 = vshrq_n_s32(y0obx4, 19); ++ y1oax4 = vshrq_n_s32(y1oax4, 19); ++ y1obx4 = vshrq_n_s32(y1obx4, 19); ++ uox4 = vshrq_n_s32(uox4, 19); ++ vox4 = vshrq_n_s32(vox4, 19); ++ break; ++ case 16: ++ y0oax4 = vshrq_n_s32(y0oax4, 13); ++ y0obx4 = vshrq_n_s32(y0obx4, 13); ++ y1oax4 = vshrq_n_s32(y1oax4, 13); ++ y1obx4 = vshrq_n_s32(y1obx4, 13); ++ uox4 = vshrq_n_s32(uox4, 13); ++ vox4 = vshrq_n_s32(vox4, 13); ++ break; ++ } ++ ++ y0oax4 = vaddq_s32(y0oax4, out_yuv_offx4); ++ y0obx4 = vaddq_s32(y0obx4, out_yuv_offx4); ++ y1oax4 = vaddq_s32(y1oax4, out_yuv_offx4); ++ y1obx4 = vaddq_s32(y1obx4, out_yuv_offx4); ++ uox4 = vaddq_s32(uox4, out_uv_offsetx4); ++ vox4 = vaddq_s32(vox4, out_uv_offsetx4); ++ ++ y0ox8 = vcombine_u16(vqmovun_s32(y0oax4), vqmovun_s32(y0obx4)); ++ y0ox8 = vshlq_u16(y0ox8, out_sh2x8); ++ vst1q_u16(&dsty[x], y0ox8); ++ ++ y1ox8 = vcombine_u16(vqmovun_s32(y1oax4), vqmovun_s32(y1obx4)); ++ y1ox8 = vshlq_u16(y1ox8, out_sh2x8); ++ vst1q_u16(&dsty[x + dstlinesize[0] / 2], y1ox8); ++ ++ uvoax4 = vzip1q_s32(uox4, vox4); ++ uvobx4 = vzip2q_s32(uox4, vox4); ++ ++ vst1q_u16(&dstuv[x], vshlq_u16(vcombine_u16(vqmovun_s32(uvoax4), vqmovun_s32(uvobx4)), out_sh2x8)); ++ } ++ } ++ ++ // Process remaining pixels cannot fill the full simd register with scalar version ++ if (remainw) { ++ int offset = width & (int)0xfffffff8; ++ rdsty += offset; ++ rdstuv += offset; ++ rsrcy += offset; ++ rsrcuv += offset; ++ tonemap_frame_p016_p010_2_p016_p010(rdsty, rdstuv, ++ rsrcy, rsrcuv, ++ dstlinesize, srclinesize, ++ dstdepth, srcdepth, ++ remainw, rheight, params); ++ } ++#endif // ENABLE_TONEMAPX_NEON_INTRINSICS ++} +Index: FFmpeg/libavfilter/aarch64/vf_tonemapx_intrin_neon.h +=================================================================== +--- /dev/null ++++ libavfilter/aarch64/vf_tonemapx_intrin_neon.h +@@ -0,0 +1,68 @@ ++/* ++ * Copyright (c) 2024 Gnattu OC ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVFILTER_AARCH64_TONEMAPX_INTRIN_NEON_H ++#define AVFILTER_AARCH64_TONEMAPX_INTRIN_NEON_H ++ ++#include "libavfilter/vf_tonemapx.h" ++ ++void tonemap_frame_dovi_2_420p_neon(uint8_t *dsty, uint8_t *dstu, uint8_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++void tonemap_frame_420p10_2_420p_neon(uint8_t *dsty, uint8_t *dstu, uint8_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++void tonemap_frame_p016_p010_2_nv12_neon(uint8_t *dsty, uint8_t *dstuv, ++ const uint16_t *srcy, const uint16_t *srcuv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++void tonemap_frame_dovi_2_420p10_neon(uint16_t *dsty, uint16_t *dstu, uint16_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++void tonemap_frame_420p10_2_420p10_neon(uint16_t *dsty, uint16_t *dstu, uint16_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++void tonemap_frame_p016_p010_2_p016_p010_neon(uint16_t *dsty, uint16_t *dstuv, ++ const uint16_t *srcy, const uint16_t *srcuv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++#endif // AVFILTER_AARCH64_TONEMAPX_INTRIN_NEON_H +Index: FFmpeg/libavfilter/allfilters.c +=================================================================== +--- libavfilter/allfilters.c ++++ libavfilter/allfilters.c +@@ -498,6 +498,7 @@ extern const AVFilter ff_vf_tmedian; + extern const AVFilter ff_vf_tmidequalizer; + extern const AVFilter ff_vf_tmix; + extern const AVFilter ff_vf_tonemap; ++extern const AVFilter ff_vf_tonemapx; + extern const AVFilter ff_vf_tonemap_cuda; + extern const AVFilter ff_vf_tonemap_opencl; + extern const AVFilter ff_vf_tonemap_vaapi; +Index: FFmpeg/libavfilter/colorspace.c +=================================================================== +--- libavfilter/colorspace.c ++++ libavfilter/colorspace.c +@@ -17,6 +17,7 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + ++#include "libavutil/avassert.h" + #include "libavutil/frame.h" + #include "libavutil/mastering_display_metadata.h" + #include "libavutil/pixdesc.h" +@@ -354,3 +355,51 @@ float inverse_eotf_arib_b67(float x) { + float inverse_eotf_bt1886(float x) { + return x > 0.0f ? powf(x, 1.0f / 2.4f) : 0.0f; + } ++ ++int ff_get_range_off(int *off, int *y_rng, int *uv_rng, ++ enum AVColorRange rng, int depth) ++{ ++ switch (rng) { ++ case AVCOL_RANGE_UNSPECIFIED: ++ case AVCOL_RANGE_MPEG: ++ *off = 16 << (depth - 8); ++ *y_rng = 219 << (depth - 8); ++ *uv_rng = 224 << (depth - 8); ++ break; ++ case AVCOL_RANGE_JPEG: ++ *off = 0; ++ *y_rng = *uv_rng = (256 << (depth - 8)) - 1; ++ break; ++ default: ++ return AVERROR(EINVAL); ++ } ++ ++ return 0; ++} ++ ++void ff_get_yuv_coeffs(int16_t out[3][3][8], double (*table)[3], ++ int depth, int y_rng, int uv_rng, int yuv2rgb) ++{ ++#define N (yuv2rgb ? m : n) ++#define M (yuv2rgb ? n : m) ++ int rng, n, m, o; ++ int bits = 1 << (yuv2rgb ? (depth - 1) : (29 - depth)); ++ for (rng = y_rng, n = 0; n < 3; n++, rng = uv_rng) { ++ for (m = 0; m < 3; m++) { ++ out[N][M][0] = lrint(bits * (yuv2rgb ? 28672 : rng) * table[N][M] / (yuv2rgb ? rng : 28672)); ++ for (o = 1; o < 8; o++) ++ out[N][M][o] = out[N][M][0]; ++ } ++ } ++#undef N ++#undef M ++ ++ if (yuv2rgb) { ++ av_assert2(out[0][1][0] == 0); ++ av_assert2(out[2][2][0] == 0); ++ av_assert2(out[0][0][0] == out[1][0][0]); ++ av_assert2(out[0][0][0] == out[2][0][0]); ++ } else { ++ av_assert2(out[1][2][0] == out[2][0][0]); ++ } ++} +Index: FFmpeg/libavfilter/colorspace.h +=================================================================== +--- libavfilter/colorspace.h ++++ libavfilter/colorspace.h +@@ -85,4 +85,8 @@ float eotf_arib_b67(float x); + float inverse_eotf_arib_b67(float x); + float inverse_eotf_bt1886(float x); + ++int ff_get_range_off(int *off, int *y_rng, int *uv_rng, ++ enum AVColorRange rng, int depth); ++void ff_get_yuv_coeffs(int16_t out[3][3][8], double (*table)[3], ++ int depth, int y_rng, int uv_rng, int yuv2rgb); + #endif +Index: FFmpeg/libavfilter/vf_tonemapx.c +=================================================================== +--- /dev/null ++++ libavfilter/vf_tonemapx.c +@@ -0,0 +1,1778 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/** ++ * @file ++ * tonemap algorithms ++ */ ++ ++#include ++#include ++ ++#include "libavutil/avassert.h" ++#include "libavutil/imgutils.h" ++#include "libavutil/internal.h" ++#include "libavutil/mem_internal.h" ++#include "libavutil/opt.h" ++#include "libavutil/cpu.h" ++ ++#include "vf_tonemapx.h" ++ ++#ifdef CC_SUPPORTS_TONEMAPX_INTRINSICS ++# if ARCH_AARCH64 ++# if HAVE_INTRINSICS_NEON ++# include "libavutil/aarch64/cpu.h" ++# include "aarch64/vf_tonemapx_intrin_neon.h" ++# endif ++# endif // ARCH_AARCH64 ++# if ARCH_X86 ++# include "libavutil/x86/cpu.h" ++# if HAVE_INTRINSICS_SSE42 ++# include "x86/vf_tonemapx_intrin_sse.h" ++# endif ++# if HAVE_INTRINSICS_AVX2 && HAVE_INTRINSICS_FMA3 ++# include "x86/vf_tonemapx_intrin_avx.h" ++# endif ++# endif // ARCH_X86 ++#endif // CC_SUPPORTS_TONEMAPX_INTRINSICS ++ ++#include "avfilter.h" ++#include "formats.h" ++#include "internal.h" ++#include "video.h" ++ ++enum TonemapAlgorithm { ++ TONEMAP_NONE, ++ TONEMAP_LINEAR, ++ TONEMAP_GAMMA, ++ TONEMAP_CLIP, ++ TONEMAP_REINHARD, ++ TONEMAP_HABLE, ++ TONEMAP_MOBIUS, ++ TONEMAP_BT2390, ++ TONEMAP_MAX, ++}; ++ ++typedef struct TonemapxContext { ++ const AVClass *class; ++ ++ enum TonemapAlgorithm tonemap; ++ enum AVColorTransferCharacteristic trc; ++ enum AVColorSpace spc; ++ enum AVColorPrimaries pri; ++ enum AVColorRange range; ++ enum AVPixelFormat format; ++ char *format_str; ++ double param; ++ double desat; ++ double peak; ++ int apply_dovi; ++ ++ const AVLumaCoefficients *coeffs, *ocoeffs; ++ ++ double lut_peak; ++ float *lin_lut; ++ float *tonemap_lut; ++ uint16_t *delin_lut; ++ int in_yuv_off, out_yuv_off; ++ ++ struct DoviMetadata *dovi; ++ ++ DECLARE_ALIGNED(16, float, dovi_pbuf)[3*(params_sz+pivots_sz+coeffs_sz+mmr_sz)]; ++ DECLARE_ALIGNED(16, int16_t, yuv2rgb_coeffs)[3][3][8]; ++ DECLARE_ALIGNED(16, int16_t, rgb2yuv_coeffs)[3][3][8]; ++ DECLARE_ALIGNED(16, double, rgb2rgb_coeffs)[3][3]; ++ DECLARE_ALIGNED(16, double, lms2rgb_matrix)[3][3]; ++ DECLARE_ALIGNED(16, float, ycc_offset)[3]; ++ ++ int (*filter_slice) (AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs); ++ ++ void (*tonemap_func_biplanar8) (uint8_t *dsty, uint8_t *dstuv, ++ const uint16_t *srcy, const uint16_t *srcuv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++ void (*tonemap_func_planar8) (uint8_t *dsty, uint8_t *dstu, uint8_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++ void (*tonemap_func_biplanar10) (uint16_t *dsty, uint16_t *dstuv, ++ const uint16_t *srcy, const uint16_t *srcuv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++ void (*tonemap_func_planar10) (uint16_t *dsty, uint16_t *dstu, uint16_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++ void (*tonemap_func_dovi8) (uint8_t *dsty, uint8_t *dstu, uint8_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++ void (*tonemap_func_dovi10) (uint16_t *dsty, uint16_t *dstu, uint16_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++} TonemapxContext; ++ ++typedef struct ThreadData { ++ AVFrame *in, *out; ++ const AVPixFmtDescriptor *desc, *odesc; ++ double peak; ++} ThreadData; ++ ++static const enum AVPixelFormat in_pix_fmts[] = { ++ AV_PIX_FMT_YUV420P10, ++ AV_PIX_FMT_P010, ++ AV_PIX_FMT_P016, ++ AV_PIX_FMT_NONE, ++}; ++ ++static const enum AVPixelFormat out_pix_fmts[] = { ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_YUV420P10, ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_P010, ++ AV_PIX_FMT_P016, ++}; ++ ++const double dovi_lms2rgb_matrix[3][3] = ++ { ++ { 3.06441879, -2.16597676, 0.10155818}, ++ {-0.65612108, 1.78554118, -0.12943749}, ++ { 0.01736321, -0.04725154, 1.03004253}, ++ }; ++ ++static void update_dovi_buf(AVFilterContext *ctx) ++{ ++ TonemapxContext *s = ctx->priv; ++ float coeffs_data[8][4] = {0}; ++ float mmr_packed_data[8*6][4] = {0}; ++ int c, i, j, k; ++ ++ for (c = 0; c < 3; c++) { ++ int has_poly = 0, has_mmr = 0, mmr_single = 1; ++ int mmr_idx = 0, min_order = 3, max_order = 1; ++ const struct ReshapeData *comp = &s->dovi->comp[c]; ++ if (!comp->num_pivots) ++ continue; ++ av_assert0(comp->num_pivots >= 2 && comp->num_pivots <= 9); ++ ++ memset(coeffs_data, 0, sizeof(coeffs_data)); ++ for (i = 0; i < comp->num_pivots - 1; i++) { ++ switch (comp->method[i]) { ++ case 0: // polynomial ++ has_poly = 1; ++ coeffs_data[i][3] = 0.0f; // order=0 signals polynomial ++ for (k = 0; k < 3; k++) ++ coeffs_data[i][k] = comp->poly_coeffs[i][k]; ++ break; ++ case 1: ++ min_order = FFMIN(min_order, comp->mmr_order[i]); ++ max_order = FFMAX(max_order, comp->mmr_order[i]); ++ mmr_single = !has_mmr; ++ has_mmr = 1; ++ coeffs_data[i][3] = (float)comp->mmr_order[i]; ++ coeffs_data[i][0] = comp->mmr_constant[i]; ++ coeffs_data[i][1] = (float)mmr_idx; ++ for (j = 0; j < comp->mmr_order[i]; j++) { ++ // store weights per order as two packed vec4s ++ float *mmr = &mmr_packed_data[mmr_idx][0]; ++ mmr[0] = comp->mmr_coeffs[i][j][0]; ++ mmr[1] = comp->mmr_coeffs[i][j][1]; ++ mmr[2] = comp->mmr_coeffs[i][j][2]; ++ mmr[3] = 0.0f; // unused ++ mmr[4] = comp->mmr_coeffs[i][j][3]; ++ mmr[5] = comp->mmr_coeffs[i][j][4]; ++ mmr[6] = comp->mmr_coeffs[i][j][5]; ++ mmr[7] = comp->mmr_coeffs[i][j][6]; ++ mmr_idx += 2; ++ } ++ break; ++ default: ++ av_assert0(0); ++ } ++ } ++ ++ av_assert0(has_poly || has_mmr); ++ ++ if (has_mmr) ++ av_assert0(min_order <= max_order); ++ ++ // dovi_params ++ { ++ float params[8] = { ++ comp->num_pivots, !!has_mmr, !!has_poly, ++ mmr_single, min_order, max_order, ++ comp->pivots[0], comp->pivots[comp->num_pivots - 1] ++ }; ++ memcpy(s->dovi_pbuf + c*params_cnt, params, params_sz); ++ } ++ ++ // dovi_pivots ++ if (c == 0 && comp->num_pivots > 2) { ++ // Skip the (irrelevant) lower and upper bounds ++ float pivots_data[7+1] = {0}; ++ memcpy(pivots_data, comp->pivots + 1, ++ (comp->num_pivots - 2) * sizeof(pivots_data[0])); ++ // Fill the remainder with a quasi-infinite sentinel pivot ++ for (i = comp->num_pivots - 2; i < FF_ARRAY_ELEMS(pivots_data); i++) ++ pivots_data[i] = 1e9f; ++ memcpy(s->dovi_pbuf + 3*params_cnt + c*pivots_cnt, pivots_data, pivots_sz); ++ } ++ ++ // dovi_coeffs ++ memcpy(s->dovi_pbuf + 3*(params_cnt+pivots_cnt) + c*coeffs_cnt, &coeffs_data[0], coeffs_sz); ++ ++ // dovi_mmr ++ if (has_mmr) ++ memcpy(s->dovi_pbuf + 3*(params_cnt+pivots_cnt+coeffs_cnt) + c*mmr_cnt, &mmr_packed_data[0], mmr_sz); ++ } ++} ++ ++inline static float dot(const float* x, const float* y, int len) ++{ ++ int i; ++ float result = 0; ++ for (i = 0; i < len; i++) { ++ result += x[i] * y[i]; ++ } ++ return result; ++} ++ ++inline static float reshape_poly(float s, float* coeffs) { ++ return (coeffs[2] * s + coeffs[1]) * s + coeffs[0]; ++} ++ ++inline static float reshape_mmr(const float* sig, const float* coeffs, const float* mmr, ++ int mmr_single, int min_order, int max_order) ++{ ++ int mmr_idx = mmr_single ? 0 : (int)coeffs[1]; ++ int order = (int)coeffs[3]; ++ float s = coeffs[0]; ++ float sigX[7+1] = {sig[0], sig[1], sig[2], 0, ++ sig[0]*sig[1], sig[0]*sig[2], sig[1]*sig[2], sig[0]*sig[1]*sig[2]}; ++ ++ s += dot(&mmr[mmr_idx + 0*4], sigX, 7+1); ++ if (max_order >= 2 && (min_order >= 2 || order >= 2)) { ++ float sigX2[7+1] = {sig[0]*sig[0], sig[1]*sig[1], sig[2]*sig[2], 0, ++ sigX[4]*sigX[4], sigX[5]*sigX[5], sigX[6]*sigX[6], sigX[7]*sigX[7]}; ++ s += dot(&mmr[mmr_idx + 2*4], sigX2, 7+1); ++ ++ if (max_order == 3 && (min_order == 3 || order >= 3)) { ++ float sigX3[7+1] = {sig[0]*sig[0]*sig[0], sig[1]*sig[1]*sig[1], sig[2]*sig[2]*sig[2], 0, ++ sigX2[4]*sigX[4], sigX2[5]*sigX[5], sigX2[6]*sigX[6], sigX2[7]*sigX[7]}; ++ s += dot(&mmr[mmr_idx + 4*4], sigX3, 7+1); ++ } ++ } ++ ++ return s; ++} ++ ++inline static void ycc2rgb(float* dest, float y, float cb, float cr, const double nonlinear[3][3], const float ycc_offset[3]) ++{ ++ dest[0] = (y * (float)nonlinear[0][0] + cb * (float)nonlinear[0][1] + cr * (float)nonlinear[0][2]) - ycc_offset[0]; ++ dest[1] = (y * (float)nonlinear[1][0] + cb * (float)nonlinear[1][1] + cr * (float)nonlinear[1][2]) - ycc_offset[1]; ++ dest[2] = (y * (float)nonlinear[2][0] + cb * (float)nonlinear[2][1] + cr * (float)nonlinear[2][2]) - ycc_offset[2]; ++} ++ ++// This implementation does not do the costly linearization and de-linearization for performance reasons ++// The output color accuracy will be affected due to this ++inline static void lms2rgb(float* dest, float l, float m, float s, const double linear[3][3], const double lms2rgb_matrix[3][3]) ++{ ++ dest[0] = l * (float)lms2rgb_matrix[0][0] + m * (float)lms2rgb_matrix[0][1] + s * (float)lms2rgb_matrix[0][2]; ++ dest[1] = l * (float)lms2rgb_matrix[1][0] + m * (float)lms2rgb_matrix[1][1] + s * (float)lms2rgb_matrix[1][2]; ++ dest[2] = l * (float)lms2rgb_matrix[2][0] + m * (float)lms2rgb_matrix[2][1] + s * (float)lms2rgb_matrix[2][2]; ++} ++ ++#define CLAMP(a, b, c) (FFMIN(FFMAX((a), (b)), (c))) ++inline static void reshape_dovi_yuv(float* dest, float* src, const TonemapIntParams *ctx) ++{ ++ int i; ++ float s; ++ float coeffs[4] = {0, 0, 0, 0}; ++ float sig_arr[3] = {src[0],src[1],src[2]}; ++ ++ int dovi_num_pivots, dovi_has_mmr, dovi_has_poly; ++ int dovi_mmr_single, dovi_min_order, dovi_max_order; ++ int has_mmr_poly; ++ float dovi_lo, dovi_hi; ++ float *dovi_params; ++ float *dovi_pivots; ++ float *dovi_coeffs, *dovi_mmr; //float4* ++ ++ float *src_dovi_params = ctx->dovi_pbuf; ++ float *src_dovi_pivots = ctx->dovi_pbuf + 24; ++ float *src_dovi_coeffs = ctx->dovi_pbuf + 48; //float4* ++ float *src_dovi_mmr = ctx->dovi_pbuf + 144; //float4* ++ ++ for (i = 0; i < 3; i++) { ++ dovi_params = src_dovi_params + i*8; ++ dovi_pivots = src_dovi_pivots + i*8; ++ dovi_coeffs = src_dovi_coeffs + i*8*4; //float4* ++ dovi_mmr = src_dovi_mmr + i*48*4; //float4* ++ dovi_num_pivots = dovi_params[0]; ++ dovi_has_mmr = dovi_params[1]; ++ dovi_has_poly = dovi_params[2]; ++ dovi_mmr_single = dovi_params[3]; ++ dovi_min_order = dovi_params[4]; ++ dovi_max_order = dovi_params[5]; ++ dovi_lo = dovi_params[6]; ++ dovi_hi = dovi_params[7]; ++ ++ s = sig_arr[i]; ++ coeffs[0] = dovi_coeffs[0*4+0]; ++ coeffs[1] = dovi_coeffs[0*4+1]; ++ coeffs[2] = dovi_coeffs[0*4+2]; ++ coeffs[3] = dovi_coeffs[0*4+3]; ++ ++#define mix(x, y, a) ((x) + ((y) - (x)) * (a)) ++ if (i == 0 && dovi_num_pivots > 2) { ++ int t0 = s >= dovi_pivots[0], t1 = s >= dovi_pivots[1]; ++ int t2 = s >= dovi_pivots[2], t3 = s >= dovi_pivots[3]; ++ int t4 = s >= dovi_pivots[4], t5 = s >= dovi_pivots[5], t6 = s >= dovi_pivots[6]; ++ ++ float m01[4] = { mix(dovi_coeffs[0*4+0], dovi_coeffs[1*4+0], t0), ++ mix(dovi_coeffs[0*4+1], dovi_coeffs[1*4+1], t0), ++ mix(dovi_coeffs[0*4+2], dovi_coeffs[1*4+2], t0), ++ mix(dovi_coeffs[0*4+3], dovi_coeffs[1*4+3], t0) }; ++ float m23[4] = { mix(dovi_coeffs[2*4+0], dovi_coeffs[3*4+0], t2), ++ mix(dovi_coeffs[2*4+1], dovi_coeffs[3*4+1], t2), ++ mix(dovi_coeffs[2*4+2], dovi_coeffs[3*4+2], t2), ++ mix(dovi_coeffs[2*4+3], dovi_coeffs[3*4+3], t2) }; ++ float m0123[4] = { mix(m01[0], m23[0], t1), ++ mix(m01[1], m23[1], t1), ++ mix(m01[2], m23[2], t1), ++ mix(m01[3], m23[3], t1) }; ++ float m45[4] = { mix(dovi_coeffs[4*4+0], dovi_coeffs[5*4+0], t4), ++ mix(dovi_coeffs[4*4+1], dovi_coeffs[5*4+1], t4), ++ mix(dovi_coeffs[4*4+2], dovi_coeffs[5*4+2], t4), ++ mix(dovi_coeffs[4*4+3], dovi_coeffs[5*4+3], t4) }; ++ float m67[4] = { mix(dovi_coeffs[6*4+0], dovi_coeffs[7*4+0], t6), ++ mix(dovi_coeffs[6*4+1], dovi_coeffs[7*4+1], t6), ++ mix(dovi_coeffs[6*4+2], dovi_coeffs[7*4+2], t6), ++ mix(dovi_coeffs[6*4+3], dovi_coeffs[7*4+3], t6) }; ++ float m4567[4] = { mix(m45[0], m67[0], t5), ++ mix(m45[1], m67[1], t5), ++ mix(m45[2], m67[2], t5), ++ mix(m45[3], m67[3], t5) }; ++ ++ coeffs[0] = mix(m0123[0], m4567[0], t3); ++ coeffs[1] = mix(m0123[1], m4567[1], t3); ++ coeffs[2] = mix(m0123[2], m4567[2], t3); ++ coeffs[3] = mix(m0123[3], m4567[3], t3); ++ } ++ ++ has_mmr_poly = dovi_has_mmr && dovi_has_poly; ++ ++ if ((has_mmr_poly && coeffs[3] == 0.0f) || (!has_mmr_poly && dovi_has_poly)) ++ s = reshape_poly(s, coeffs); ++ else ++ s = reshape_mmr(sig_arr, coeffs, dovi_mmr, ++ dovi_mmr_single, dovi_min_order, dovi_max_order); ++ ++ sig_arr[i] = CLAMP(s, dovi_lo, dovi_hi); ++ } ++ ++ dest[0] = sig_arr[0]; ++ dest[1] = sig_arr[1]; ++ dest[2] = sig_arr[2]; ++} ++ ++static int out_format_is_supported(enum AVPixelFormat fmt) ++{ ++ int i; ++ ++ for (i = 0; i < FF_ARRAY_ELEMS(out_pix_fmts); i++) ++ if (out_pix_fmts[i] == fmt) ++ return 1; ++ return 0; ++} ++ ++static float hable(float in) ++{ ++ float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f; ++ return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f; ++} ++ ++static float mobius(float in, float j, double peak) ++{ ++ float a, b; ++ ++ if (in <= j) ++ return in; ++ ++ a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak); ++ b = (j * j - 2.0f * j * peak + peak) / FFMAX(peak - 1.0f, FLOAT_EPS); ++ ++ return (b * b + 2.0f * b * j + j * j) / (b - a) * (in + a) / (in + b); ++} ++ ++static float bt2390(float s, float peak) ++{ ++ float peak_pq = inverse_eotf_st2084(peak, REFERENCE_WHITE_ALT); ++ float scale = 1.0f / peak_pq; ++ ++ // SDR peak ++ float dst_peak = 1.0f; ++ float s_pq = inverse_eotf_st2084(s, REFERENCE_WHITE_ALT) * scale; ++ float maxLum = inverse_eotf_st2084(dst_peak, REFERENCE_WHITE_ALT) * scale; ++ ++ float ks = 1.5f * maxLum - 0.5f; ++ float tb = (s_pq - ks) / (1.0f - ks); ++ float tb2 = tb * tb; ++ float tb3 = tb2 * tb; ++ float pb = (2.0f * tb3 - 3.0f * tb2 + 1.0f) * ks + ++ (tb3 - 2.0f * tb2 + tb) * (1.0f - ks) + ++ (-2.0f * tb3 + 3.0f * tb2) * maxLum; ++ float sig = (s_pq < ks) ? s_pq : pb; ++ ++ return eotf_st2084(sig * peak_pq, REFERENCE_WHITE_ALT); ++} ++ ++static float mapsig(enum TonemapAlgorithm alg, float sig, double peak, double param) ++{ ++ switch(alg) { ++ default: ++ case TONEMAP_NONE: ++ // do nothing ++ break; ++ case TONEMAP_LINEAR: ++ sig = sig * param / peak; ++ break; ++ case TONEMAP_GAMMA: ++ sig = sig > 0.05f ++ ? pow(sig / peak, 1.0f / param) ++ : sig * pow(0.05f / peak, 1.0f / param) / 0.05f; ++ break; ++ case TONEMAP_CLIP: ++ sig = av_clipf(sig * param, 0, 1.0f); ++ break; ++ case TONEMAP_HABLE: ++ sig = hable(sig) / hable(peak); ++ break; ++ case TONEMAP_REINHARD: ++ sig = sig / (sig + param) * (peak + param) / peak; ++ break; ++ case TONEMAP_MOBIUS: ++ sig = mobius(sig, param, peak); ++ break; ++ case TONEMAP_BT2390: ++ sig = bt2390(sig, peak); ++ break; ++ } ++ ++ return sig; ++} ++ ++static float linearize(float x, enum AVColorTransferCharacteristic trc_src) ++{ ++ if (trc_src == AVCOL_TRC_SMPTE2084) ++ return eotf_st2084(x, REFERENCE_WHITE_ALT); ++ else if (trc_src == AVCOL_TRC_ARIB_STD_B67) ++ return eotf_arib_b67(x); ++ else ++ return x; ++} ++ ++static float delinearize(float x, enum AVColorTransferCharacteristic trc_dst) ++{ ++ if (trc_dst == AVCOL_TRC_BT709 || trc_dst == AVCOL_TRC_BT2020_10) ++ return inverse_eotf_bt1886(x); ++ else ++ return x; ++} ++ ++static int compute_trc_luts(TonemapxContext *s, enum AVColorTransferCharacteristic trc_src, ++ enum AVColorTransferCharacteristic trc_dst) ++{ ++ int i; ++ ++ if (!s->lin_lut && !(s->lin_lut = av_calloc(32768, sizeof(float)))) ++ return AVERROR(ENOMEM); ++ if (!s->delin_lut && !(s->delin_lut = av_calloc(32768, sizeof(uint16_t)))) ++ return AVERROR(ENOMEM); ++ ++ for (i = 0; i < 32768; i++) { ++ double v1 = (i - 2048.0f) / 28672.0f; ++ double v2 = i / 32767.0f; ++ s->lin_lut[i] = FFMAX(linearize(v1, trc_src), 0); ++ s->delin_lut[i] = av_clip_int16(lrint(delinearize(v2, trc_dst) * 28672.0f)); ++ } ++ ++ return 0; ++} ++ ++static int compute_tonemap_lut(TonemapxContext *s, enum AVColorTransferCharacteristic trc_src) ++{ ++ int i; ++ double peak = s->lut_peak; ++ ++ if (!s->tonemap_lut && !(s->tonemap_lut = av_calloc(32768, sizeof(float)))) ++ return AVERROR(ENOMEM); ++ ++ for (i = 0; i < 32768; i++) { ++ double v = (i - 2048.0f) / 28672.0f; ++ double sig = linearize(v, trc_src); ++ float mapped = mapsig(s->tonemap, sig, peak, s->param); ++ s->tonemap_lut[i] = (sig > 0.0f && mapped > 0.0f) ? mapped / sig : 0.0f; ++ } ++ ++ return 0; ++} ++ ++static int compute_yuv_coeffs(TonemapxContext *s, ++ const AVLumaCoefficients *coeffs, ++ const AVLumaCoefficients *ocoeffs, ++ const AVPixFmtDescriptor *idesc, ++ const AVPixFmtDescriptor *odesc, ++ enum AVColorRange irng, ++ enum AVColorRange orng) ++{ ++ double rgb2yuv[3][3], yuv2rgb[3][3]; ++ int res; ++ int y_rng, uv_rng; ++ ++ res = ff_get_range_off(&s->in_yuv_off, &y_rng, &uv_rng, ++ irng, idesc->comp[0].depth); ++ if (res < 0) { ++ av_log(s, AV_LOG_ERROR, ++ "Unsupported input color range %d (%s)\n", ++ irng, av_color_range_name(irng)); ++ return res; ++ } ++ ++ ff_fill_rgb2yuv_table(coeffs, rgb2yuv); ++ ff_matrix_invert_3x3(rgb2yuv, yuv2rgb); ++ ff_fill_rgb2yuv_table(ocoeffs, rgb2yuv); ++ ++ ff_get_yuv_coeffs(s->yuv2rgb_coeffs, yuv2rgb, idesc->comp[0].depth, ++ y_rng, uv_rng, 1); ++ ++ res = ff_get_range_off(&s->out_yuv_off, &y_rng, &uv_rng, ++ orng, odesc->comp[0].depth); ++ if (res < 0) { ++ av_log(s, AV_LOG_ERROR, ++ "Unsupported output color range %d (%s)\n", ++ orng, av_color_range_name(orng)); ++ return res; ++ } ++ ++ ff_get_yuv_coeffs(s->rgb2yuv_coeffs, rgb2yuv, odesc->comp[0].depth, ++ y_rng, uv_rng, 0); ++ ++ return 0; ++} ++ ++static int compute_rgb_coeffs(TonemapxContext *s, ++ enum AVColorPrimaries iprm, ++ enum AVColorPrimaries oprm) ++{ ++ double rgb2xyz[3][3], xyz2rgb[3][3]; ++ const AVColorPrimariesDesc *iprm_desc = av_csp_primaries_desc_from_id(iprm); ++ const AVColorPrimariesDesc *oprm_desc = av_csp_primaries_desc_from_id(oprm); ++ ++ if (!iprm_desc) { ++ av_log(s, AV_LOG_ERROR, ++ "Unsupported input color primaries %d (%s)\n", ++ iprm, av_color_primaries_name(iprm)); ++ return AVERROR(EINVAL); ++ } ++ if (!oprm_desc) { ++ av_log(s, AV_LOG_ERROR, ++ "Unsupported output color primaries %d (%s)\n", ++ oprm, av_color_primaries_name(oprm)); ++ return AVERROR(EINVAL); ++ } ++ ++ ff_fill_rgb2xyz_table(&oprm_desc->prim, &oprm_desc->wp, rgb2xyz); ++ ff_matrix_invert_3x3(rgb2xyz, xyz2rgb); ++ ff_fill_rgb2xyz_table(&iprm_desc->prim, &iprm_desc->wp, rgb2xyz); ++ ff_matrix_mul_3x3(s->rgb2rgb_coeffs, rgb2xyz, xyz2rgb); ++ ++ return 0; ++} ++ ++__attribute__((always_inline)) ++static inline void dovi2rgb(int y00, int y01, int y10, int y11, int u, int v, ++ const struct TonemapIntParams *params, ++ const float in_rng, ++ int16_t r[4], int16_t g[4], int16_t b[4]) ++{ ++ float yuv1[3], yuv2[3], yuv3[3], yuv4[3]; ++ float c1[3], c2[3], c3[3], c4[3]; ++ ++ yuv1[0] = CLAMP(y00 / in_rng, 0.0f, 1.0f); ++ yuv2[0] = CLAMP(y01 / in_rng, 0.0f, 1.0f); ++ yuv3[0] = CLAMP(y10 / in_rng, 0.0f, 1.0f); ++ yuv4[0] = CLAMP(y11 / in_rng, 0.0f, 1.0f); ++ yuv1[1] = yuv2[1] = yuv3[1] = yuv4[1] = CLAMP(u / in_rng, 0.0f, 1.0f); ++ yuv1[2] = yuv2[2] = yuv3[2] = yuv4[2] = CLAMP(v / in_rng, 0.0f, 1.0f); ++ ++ reshape_dovi_yuv(yuv1, yuv1, params); ++ reshape_dovi_yuv(yuv2, yuv2, params); ++ reshape_dovi_yuv(yuv3, yuv3, params); ++ reshape_dovi_yuv(yuv4, yuv4, params); ++ ++ ycc2rgb(c1, yuv1[0], yuv1[1], yuv1[2], params->dovi->nonlinear, *params->ycc_offset); ++ ycc2rgb(c2, yuv2[0], yuv2[1], yuv2[2], params->dovi->nonlinear, *params->ycc_offset); ++ ycc2rgb(c3, yuv3[0], yuv3[1], yuv3[2], params->dovi->nonlinear, *params->ycc_offset); ++ ycc2rgb(c4, yuv4[0], yuv4[1], yuv4[2], params->dovi->nonlinear, *params->ycc_offset); ++ ++ lms2rgb(c1, c1[0], c1[1], c1[2], params->dovi->linear, *params->lms2rgb_matrix); ++ lms2rgb(c2, c2[0], c2[1], c2[2], params->dovi->linear, *params->lms2rgb_matrix); ++ lms2rgb(c3, c3[0], c3[1], c3[2], params->dovi->linear, *params->lms2rgb_matrix); ++ lms2rgb(c4, c4[0], c4[1], c4[2], params->dovi->linear, *params->lms2rgb_matrix); ++ ++ r[0] = av_clip_int16(c1[0] * 28672); ++ r[1] = av_clip_int16(c2[0] * 28672); ++ r[2] = av_clip_int16(c3[0] * 28672); ++ r[3] = av_clip_int16(c4[0] * 28672); ++ ++ g[0] = av_clip_int16(c1[1] * 28672); ++ g[1] = av_clip_int16(c2[1] * 28672); ++ g[2] = av_clip_int16(c3[1] * 28672); ++ g[3] = av_clip_int16(c4[1] * 28672); ++ ++ b[0] = av_clip_int16(c1[2] * 28672); ++ b[1] = av_clip_int16(c2[2] * 28672); ++ b[2] = av_clip_int16(c3[2] * 28672); ++ b[3] = av_clip_int16(c4[2] * 28672); ++} ++ ++inline static void tonemap_int16(int16_t r_in, int16_t g_in, int16_t b_in, ++ int16_t *r_out, int16_t *g_out, int16_t *b_out, ++ float *lin_lut, float *tonemap_lut, uint16_t *delin_lut, ++ const AVLumaCoefficients *coeffs, ++ const AVLumaCoefficients *ocoeffs, double desat, ++ double (*rgb2rgb)[3][3], ++ int rgb2rgb_passthrough) ++{ ++ int16_t sig; ++ float mapval, r_lin, g_lin, b_lin; ++ ++ /* load values */ ++ *r_out = r_in; ++ *g_out = g_in; ++ *b_out = b_in; ++ ++ /* pick the brightest component, reducing the value range as necessary ++ * to keep the entire signal in range and preventing discoloration due to ++ * out-of-bounds clipping */ ++ sig = FFMAX3(r_in, g_in, b_in); ++ ++ mapval = tonemap_lut[av_clip_uintp2(sig + 2048, 15)]; ++ ++ r_lin = lin_lut[av_clip_uintp2(r_in + 2048, 15)]; ++ g_lin = lin_lut[av_clip_uintp2(g_in + 2048, 15)]; ++ b_lin = lin_lut[av_clip_uintp2(b_in + 2048, 15)]; ++ ++ if (!rgb2rgb_passthrough) { ++ r_lin = (*rgb2rgb)[0][0] * r_lin + (*rgb2rgb)[0][1] * g_lin + (*rgb2rgb)[0][2] * b_lin; ++ g_lin = (*rgb2rgb)[1][0] * r_lin + (*rgb2rgb)[1][1] * g_lin + (*rgb2rgb)[1][2] * b_lin; ++ b_lin = (*rgb2rgb)[2][0] * r_lin + (*rgb2rgb)[2][1] * g_lin + (*rgb2rgb)[2][2] * b_lin; ++ } ++ ++#define MIX(x,y,a) (x) * (1 - (a)) + (y) * (a) ++ /* desaturate to prevent unnatural colors */ ++ if (desat > 0) { ++ float luma = av_q2d(coeffs->cr) * r_lin + av_q2d(coeffs->cg) * g_lin + av_q2d(coeffs->cb) * b_lin; ++ float overbright = FFMAX(luma - desat, FLOAT_EPS) / FFMAX(luma, FLOAT_EPS); ++ r_lin = MIX(r_lin, luma, overbright); ++ g_lin = MIX(g_lin, luma, overbright); ++ b_lin = MIX(b_lin, luma, overbright); ++ } ++ ++ r_lin *= mapval; ++ g_lin *= mapval; ++ b_lin *= mapval; ++#undef MIX ++ ++ *r_out = delin_lut[av_clip_uintp2(r_lin * 32767 + 0.5, 15)]; ++ *g_out = delin_lut[av_clip_uintp2(g_lin * 32767 + 0.5, 15)]; ++ *b_out = delin_lut[av_clip_uintp2(b_lin * 32767 + 0.5, 15)]; ++} ++ ++// See also libavfilter/colorspacedsp_template.c ++void tonemap_frame_p016_p010_2_nv12(uint8_t *dsty, uint8_t *dstuv, ++ const uint16_t *srcy, const uint16_t *srcuv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++ const int in_depth = srcdepth; ++ const int in_uv_offset = 128 << (in_depth - 8); ++ const int in_sh = in_depth - 1; ++ const int in_rnd = 1 << (in_sh - 1); ++ const int in_sh2 = 16 - in_depth; ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cy = (*params->yuv2rgb_coeffs)[0][0][0]; ++ int crv = (*params->yuv2rgb_coeffs)[0][2][0]; ++ int cgu = (*params->yuv2rgb_coeffs)[1][1][0]; ++ int cgv = (*params->yuv2rgb_coeffs)[1][2][0]; ++ int cbu = (*params->yuv2rgb_coeffs)[2][1][0]; ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int r00, g00, b00; ++ int r01, g01, b01; ++ int r10, g10, b10; ++ int r11, g11, b11; ++ ++ int16_t r[4], g[4], b[4]; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0] * 2, dstuv += dstlinesize[1], ++ srcy += srclinesize[0], srcuv += srclinesize[1] / 2) { ++ for (int x = 0; x < width; x += 2) { ++ int y00 = (srcy[x] >> in_sh2) - params->in_yuv_off; ++ int y01 = (srcy[x + 1] >> in_sh2) - params->in_yuv_off; ++ int y10 = (srcy[srclinesize[0] / 2 + x] >> in_sh2) - params->in_yuv_off; ++ int y11 = (srcy[srclinesize[0] / 2 + x + 1] >> in_sh2) - params->in_yuv_off; ++ int u = (srcuv[x] >> in_sh2) - in_uv_offset; ++ int v = (srcuv[x + 1] >> in_sh2) - in_uv_offset; ++ ++ r[0] = av_clip_int16((y00 * cy + crv * v + in_rnd) >> in_sh); ++ r[1] = av_clip_int16((y01 * cy + crv * v + in_rnd) >> in_sh); ++ r[2] = av_clip_int16((y10 * cy + crv * v + in_rnd) >> in_sh); ++ r[3] = av_clip_int16((y11 * cy + crv * v + in_rnd) >> in_sh); ++ ++ g[0] = av_clip_int16((y00 * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g[1] = av_clip_int16((y01 * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g[2] = av_clip_int16((y10 * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g[3] = av_clip_int16((y11 * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ ++ b[0] = av_clip_int16((y00 * cy + cbu * u + in_rnd) >> in_sh); ++ b[1] = av_clip_int16((y01 * cy + cbu * u + in_rnd) >> in_sh); ++ b[2] = av_clip_int16((y10 * cy + cbu * u + in_rnd) >> in_sh); ++ b[3] = av_clip_int16((y11 * cy + cbu * u + in_rnd) >> in_sh); ++ ++ tonemap_int16(r[0], g[0], b[0], &r[0], &g[0], &b[0], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ tonemap_int16(r[1], g[1], b[1], &r[1], &g[1], &b[1], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ tonemap_int16(r[2], g[2], b[2], &r[2], &g[2], &b[2], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ tonemap_int16(r[3], g[3], b[3], &r[3], &g[3], &b[3], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ ++ r00 = r[0], g00 = g[0], b00 = b[0]; ++ r01 = r[1], g01 = g[1], b01 = b[1]; ++ r10 = r[2], g10 = g[2], b10 = b[2]; ++ r11 = r[3], g11 = g[3], b11 = b[3]; ++ ++ dsty[x] = av_clip_uint8(params->out_yuv_off + ((r00 * cry + g00 * cgy + b00 * cby + out_rnd) >> out_sh)); ++ dsty[x + 1] = av_clip_uint8(params->out_yuv_off + ((r01 * cry + g01 * cgy + b01 * cby + out_rnd) >> out_sh)); ++ dsty[dstlinesize[0] + x] = av_clip_uint8(params->out_yuv_off + ((r10 * cry + g10 * cgy + b10 * cby + out_rnd) >> out_sh)); ++ dsty[dstlinesize[0] + x + 1] = av_clip_uint8(params->out_yuv_off + ((r11 * cry + g11 * cgy + b11 * cby + out_rnd) >> out_sh)); ++ ++#define AVG(a,b,c,d) (((a) + (b) + (c) + (d) + 2) >> 2) ++ dstuv[x] = av_clip_uint8(out_uv_offset + ((AVG(r00, r01, r10, r11) * cru + AVG(g00, g01, g10, g11) * ocgu + AVG(b00, b01, b10, b11) * cburv + out_rnd) >> out_sh)); ++ dstuv[x + 1] = av_clip_uint8(out_uv_offset + ((AVG(r00, r01, r10, r11) * cburv + AVG(g00, g01, g10, g11) * ocgv + AVG(b00, b01, b10, b11) * cbv + out_rnd) >> out_sh)); ++#undef AVG ++ } ++ } ++} ++ ++void tonemap_frame_dovi_2_420p(uint8_t *dsty, uint8_t *dstu, uint8_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++ const int in_depth = srcdepth; ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int r00, g00, b00; ++ int r01, g01, b01; ++ int r10, g10, b10; ++ int r11, g11, b11; ++ ++ const float in_rng = (float)((1 << in_depth) - 1); ++ ++ int16_t r[4], g[4], b[4]; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0] * 2, dstu += dstlinesize[1], dstv += dstlinesize[2], ++ srcy += srclinesize[0], srcu += srclinesize[1] / 2, srcv += srclinesize[2] / 2) { ++ for (int x = 0; x < width; x += 2) { ++ int y00 = (srcy[x] ); ++ int y01 = (srcy[x + 1] ); ++ int y10 = (srcy[srclinesize[0] / 2 + x] ); ++ int y11 = (srcy[srclinesize[0] / 2 + x + 1]); ++ int u = (srcu[x >> 1]); ++ int v = (srcv[x >> 1]); ++ ++ dovi2rgb(y00, y01, y10, y11, u, v, params, in_rng, r, g, b); ++ ++ tonemap_int16(r[0], g[0], b[0], &r[0], &g[0], &b[0], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ tonemap_int16(r[1], g[1], b[1], &r[1], &g[1], &b[1], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ tonemap_int16(r[2], g[2], b[2], &r[2], &g[2], &b[2], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ tonemap_int16(r[3], g[3], b[3], &r[3], &g[3], &b[3], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ ++ r00 = r[0], g00 = g[0], b00 = b[0]; ++ r01 = r[1], g01 = g[1], b01 = b[1]; ++ r10 = r[2], g10 = g[2], b10 = b[2]; ++ r11 = r[3], g11 = g[3], b11 = b[3]; ++ ++ dsty[x] = av_clip_uint8(params->out_yuv_off + ((r00 * cry + g00 * cgy + b00 * cby + out_rnd) >> out_sh)); ++ dsty[x + 1] = av_clip_uint8(params->out_yuv_off + ((r01 * cry + g01 * cgy + b01 * cby + out_rnd) >> out_sh)); ++ dsty[dstlinesize[0] + x] = av_clip_uint8(params->out_yuv_off + ((r10 * cry + g10 * cgy + b10 * cby + out_rnd) >> out_sh)); ++ dsty[dstlinesize[0] + x + 1] = av_clip_uint8(params->out_yuv_off + ((r11 * cry + g11 * cgy + b11 * cby + out_rnd) >> out_sh)); ++ ++#define AVG(a,b,c,d) (((a) + (b) + (c) + (d) + 2) >> 2) ++ dstu[x >> 1] = av_clip_uint8(out_uv_offset + ((AVG(r00, r01, r10, r11) * cru + AVG(g00, g01, g10, g11) * ocgu + AVG(b00, b01, b10, b11) * cburv + out_rnd) >> out_sh)); ++ dstv[x >> 1] = av_clip_uint8(out_uv_offset + ((AVG(r00, r01, r10, r11) * cburv + AVG(g00, g01, g10, g11) * ocgv + AVG(b00, b01, b10, b11) * cbv + out_rnd) >> out_sh)); ++#undef AVG ++ } ++ } ++} ++ ++void tonemap_frame_dovi_2_420p10(uint16_t *dsty, uint16_t *dstu, uint16_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++ const int in_depth = srcdepth; ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int r00, g00, b00; ++ int r01, g01, b01; ++ int r10, g10, b10; ++ int r11, g11, b11; ++ ++ const float in_rng = (float)((1 << in_depth) - 1); ++ ++ int16_t r[4], g[4], b[4]; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0], dstu += dstlinesize[1] / 2, dstv += dstlinesize[1] / 2, ++ srcy += srclinesize[0], srcu += srclinesize[1] / 2, srcv += srclinesize[1] / 2) { ++ for (int x = 0; x < width; x += 2) { ++ int y00 = (srcy[x] ); ++ int y01 = (srcy[x + 1] ); ++ int y10 = (srcy[srclinesize[0] / 2 + x] ); ++ int y11 = (srcy[srclinesize[0] / 2 + x + 1]); ++ int u = (srcu[x >> 1]); ++ int v = (srcv[x >> 1]); ++ ++ dovi2rgb(y00, y01, y10, y11, u, v, params, in_rng, r, g, b); ++ ++ tonemap_int16(r[0], g[0], b[0], &r[0], &g[0], &b[0], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ tonemap_int16(r[1], g[1], b[1], &r[1], &g[1], &b[1], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ tonemap_int16(r[2], g[2], b[2], &r[2], &g[2], &b[2], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ tonemap_int16(r[3], g[3], b[3], &r[3], &g[3], &b[3], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ ++ r00 = r[0], g00 = g[0], b00 = b[0]; ++ r01 = r[1], g01 = g[1], b01 = b[1]; ++ r10 = r[2], g10 = g[2], b10 = b[2]; ++ r11 = r[3], g11 = g[3], b11 = b[3]; ++ ++ dsty[x] = av_clip_uintp2((params->out_yuv_off + ((r00 * cry + g00 * cgy + b00 * cby + out_rnd) >> out_sh)), 16); ++ dsty[x + 1] = av_clip_uintp2((params->out_yuv_off + ((r01 * cry + g01 * cgy + b01 * cby + out_rnd) >> out_sh)), 16); ++ dsty[dstlinesize[0] / 2 + x] = av_clip_uintp2((params->out_yuv_off + ((r10 * cry + g10 * cgy + b10 * cby + out_rnd) >> out_sh)), 16); ++ dsty[dstlinesize[0] / 2 + x + 1] = av_clip_uintp2((params->out_yuv_off + ((r11 * cry + g11 * cgy + b11 * cby + out_rnd) >> out_sh)), 16); ++ ++#define AVG(a,b,c,d) (((a) + (b) + (c) + (d) + 2) >> 2) ++ dstu[x >> 1] = av_clip_uintp2((out_uv_offset + ((AVG(r00, r01, r10, r11) * cru + AVG(g00, g01, g10, g11) * ocgu + AVG(b00, b01, b10, b11) * cburv + out_rnd) >> out_sh)), 16); ++ dstv[x >> 1] = av_clip_uintp2((out_uv_offset + ((AVG(r00, r01, r10, r11) * cburv + AVG(g00, g01, g10, g11) * ocgv + AVG(b00, b01, b10, b11) * cbv + out_rnd) >> out_sh)), 16); ++#undef AVG ++ } ++ } ++} ++ ++void tonemap_frame_420p10_2_420p(uint8_t *dsty, uint8_t *dstu, uint8_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++ const int in_depth = srcdepth; ++ const int in_uv_offset = 128 << (in_depth - 8); ++ const int in_sh = in_depth - 1; ++ const int in_rnd = 1 << (in_sh - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cy = (*params->yuv2rgb_coeffs)[0][0][0]; ++ int crv = (*params->yuv2rgb_coeffs)[0][2][0]; ++ int cgu = (*params->yuv2rgb_coeffs)[1][1][0]; ++ int cgv = (*params->yuv2rgb_coeffs)[1][2][0]; ++ int cbu = (*params->yuv2rgb_coeffs)[2][1][0]; ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int r00, g00, b00; ++ int r01, g01, b01; ++ int r10, g10, b10; ++ int r11, g11, b11; ++ ++ int16_t r[4], g[4], b[4]; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0] * 2, dstu += dstlinesize[1], dstv += dstlinesize[2], ++ srcy += srclinesize[0], srcu += srclinesize[1] / 2, srcv += srclinesize[2] / 2) { ++ for (int x = 0; x < width; x += 2) { ++ int y00 = (srcy[x] ) - params->in_yuv_off; ++ int y01 = (srcy[x + 1] ) - params->in_yuv_off; ++ int y10 = (srcy[srclinesize[0] / 2 + x] ) - params->in_yuv_off; ++ int y11 = (srcy[srclinesize[0] / 2 + x + 1]) - params->in_yuv_off; ++ int u = (srcu[x >> 1]) - in_uv_offset; ++ int v = (srcv[x >> 1]) - in_uv_offset; ++ ++ r[0] = av_clip_int16((y00 * cy + crv * v + in_rnd) >> in_sh); ++ r[1] = av_clip_int16((y01 * cy + crv * v + in_rnd) >> in_sh); ++ r[2] = av_clip_int16((y10 * cy + crv * v + in_rnd) >> in_sh); ++ r[3] = av_clip_int16((y11 * cy + crv * v + in_rnd) >> in_sh); ++ ++ g[0] = av_clip_int16((y00 * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g[1] = av_clip_int16((y01 * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g[2] = av_clip_int16((y10 * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g[3] = av_clip_int16((y11 * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ ++ b[0] = av_clip_int16((y00 * cy + cbu * u + in_rnd) >> in_sh); ++ b[1] = av_clip_int16((y01 * cy + cbu * u + in_rnd) >> in_sh); ++ b[2] = av_clip_int16((y10 * cy + cbu * u + in_rnd) >> in_sh); ++ b[3] = av_clip_int16((y11 * cy + cbu * u + in_rnd) >> in_sh); ++ ++ tonemap_int16(r[0], g[0], b[0], &r[0], &g[0], &b[0], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ tonemap_int16(r[1], g[1], b[1], &r[1], &g[1], &b[1], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ tonemap_int16(r[2], g[2], b[2], &r[2], &g[2], &b[2], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ tonemap_int16(r[3], g[3], b[3], &r[3], &g[3], &b[3], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ ++ r00 = r[0], g00 = g[0], b00 = b[0]; ++ r01 = r[1], g01 = g[1], b01 = b[1]; ++ r10 = r[2], g10 = g[2], b10 = b[2]; ++ r11 = r[3], g11 = g[3], b11 = b[3]; ++ ++ dsty[x] = av_clip_uint8(params->out_yuv_off + ((r00 * cry + g00 * cgy + b00 * cby + out_rnd) >> out_sh)); ++ dsty[x + 1] = av_clip_uint8(params->out_yuv_off + ((r01 * cry + g01 * cgy + b01 * cby + out_rnd) >> out_sh)); ++ dsty[dstlinesize[0] + x] = av_clip_uint8(params->out_yuv_off + ((r10 * cry + g10 * cgy + b10 * cby + out_rnd) >> out_sh)); ++ dsty[dstlinesize[0] + x + 1] = av_clip_uint8(params->out_yuv_off + ((r11 * cry + g11 * cgy + b11 * cby + out_rnd) >> out_sh)); ++ ++#define AVG(a,b,c,d) (((a) + (b) + (c) + (d) + 2) >> 2) ++ dstu[x >> 1] = av_clip_uint8(out_uv_offset + ((AVG(r00, r01, r10, r11) * cru + AVG(g00, g01, g10, g11) * ocgu + AVG(b00, b01, b10, b11) * cburv + out_rnd) >> out_sh)); ++ dstv[x >> 1] = av_clip_uint8(out_uv_offset + ((AVG(r00, r01, r10, r11) * cburv + AVG(g00, g01, g10, g11) * ocgv + AVG(b00, b01, b10, b11) * cbv + out_rnd) >> out_sh)); ++#undef AVG ++ } ++ } ++} ++ ++void tonemap_frame_420p10_2_420p10(uint16_t *dsty, uint16_t *dstu, uint16_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++ const int in_depth = srcdepth; ++ const int in_uv_offset = 128 << (in_depth - 8); ++ const int in_sh = in_depth - 1; ++ const int in_rnd = 1 << (in_sh - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cy = (*params->yuv2rgb_coeffs)[0][0][0]; ++ int crv = (*params->yuv2rgb_coeffs)[0][2][0]; ++ int cgu = (*params->yuv2rgb_coeffs)[1][1][0]; ++ int cgv = (*params->yuv2rgb_coeffs)[1][2][0]; ++ int cbu = (*params->yuv2rgb_coeffs)[2][1][0]; ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int r00, g00, b00; ++ int r01, g01, b01; ++ int r10, g10, b10; ++ int r11, g11, b11; ++ ++ int16_t r[4], g[4], b[4]; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0], dstu += dstlinesize[1] / 2, dstv += dstlinesize[1] / 2, ++ srcy += srclinesize[0], srcu += srclinesize[1] / 2, srcv += srclinesize[1] / 2) { ++ for (int x = 0; x < width; x += 2) { ++ int y00 = (srcy[x] ) - params->in_yuv_off; ++ int y01 = (srcy[x + 1] ) - params->in_yuv_off; ++ int y10 = (srcy[srclinesize[0] / 2 + x] ) - params->in_yuv_off; ++ int y11 = (srcy[srclinesize[0] / 2 + x + 1]) - params->in_yuv_off; ++ int u = (srcu[x >> 1]) - in_uv_offset; ++ int v = (srcv[x >> 1]) - in_uv_offset; ++ ++ r[0] = av_clip_int16((y00 * cy + crv * v + in_rnd) >> in_sh); ++ r[1] = av_clip_int16((y01 * cy + crv * v + in_rnd) >> in_sh); ++ r[2] = av_clip_int16((y10 * cy + crv * v + in_rnd) >> in_sh); ++ r[3] = av_clip_int16((y11 * cy + crv * v + in_rnd) >> in_sh); ++ ++ g[0] = av_clip_int16((y00 * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g[1] = av_clip_int16((y01 * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g[2] = av_clip_int16((y10 * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g[3] = av_clip_int16((y11 * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ ++ b[0] = av_clip_int16((y00 * cy + cbu * u + in_rnd) >> in_sh); ++ b[1] = av_clip_int16((y01 * cy + cbu * u + in_rnd) >> in_sh); ++ b[2] = av_clip_int16((y10 * cy + cbu * u + in_rnd) >> in_sh); ++ b[3] = av_clip_int16((y11 * cy + cbu * u + in_rnd) >> in_sh); ++ ++ tonemap_int16(r[0], g[0], b[0], &r[0], &g[0], &b[0], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ tonemap_int16(r[1], g[1], b[1], &r[1], &g[1], &b[1], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ tonemap_int16(r[2], g[2], b[2], &r[2], &g[2], &b[2], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ tonemap_int16(r[3], g[3], b[3], &r[3], &g[3], &b[3], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ ++ r00 = r[0], g00 = g[0], b00 = b[0]; ++ r01 = r[1], g01 = g[1], b01 = b[1]; ++ r10 = r[2], g10 = g[2], b10 = b[2]; ++ r11 = r[3], g11 = g[3], b11 = b[3]; ++ ++ dsty[x] = av_clip_uintp2((params->out_yuv_off + ((r00 * cry + g00 * cgy + b00 * cby + out_rnd) >> out_sh)), 16); ++ dsty[x + 1] = av_clip_uintp2((params->out_yuv_off + ((r01 * cry + g01 * cgy + b01 * cby + out_rnd) >> out_sh)), 16); ++ dsty[dstlinesize[0] / 2 + x] = av_clip_uintp2((params->out_yuv_off + ((r10 * cry + g10 * cgy + b10 * cby + out_rnd) >> out_sh)), 16); ++ dsty[dstlinesize[0] / 2 + x + 1] = av_clip_uintp2((params->out_yuv_off + ((r11 * cry + g11 * cgy + b11 * cby + out_rnd) >> out_sh)), 16); ++ ++#define AVG(a,b,c,d) (((a) + (b) + (c) + (d) + 2) >> 2) ++ dstu[x >> 1] = av_clip_uintp2((out_uv_offset + ((AVG(r00, r01, r10, r11) * cru + AVG(g00, g01, g10, g11) * ocgu + AVG(b00, b01, b10, b11) * cburv + out_rnd) >> out_sh)), 16); ++ dstv[x >> 1] = av_clip_uintp2((out_uv_offset + ((AVG(r00, r01, r10, r11) * cburv + AVG(g00, g01, g10, g11) * ocgv + AVG(b00, b01, b10, b11) * cbv + out_rnd) >> out_sh)), 16); ++#undef AVG ++ } ++ } ++} ++ ++void tonemap_frame_p016_p010_2_p016_p010(uint16_t *dsty, uint16_t *dstuv, ++ const uint16_t *srcy, const uint16_t *srcuv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++ const int in_depth = srcdepth; ++ const int in_uv_offset = 128 << (in_depth - 8); ++ const int in_sh = in_depth - 1; ++ const int in_rnd = 1 << (in_sh - 1); ++ const int in_sh2 = 16 - in_depth; ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ const int out_sh2 = 16 - out_depth; ++ ++ int cy = (*params->yuv2rgb_coeffs)[0][0][0]; ++ int crv = (*params->yuv2rgb_coeffs)[0][2][0]; ++ int cgu = (*params->yuv2rgb_coeffs)[1][1][0]; ++ int cgv = (*params->yuv2rgb_coeffs)[1][2][0]; ++ int cbu = (*params->yuv2rgb_coeffs)[2][1][0]; ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int r00, g00, b00; ++ int r01, g01, b01; ++ int r10, g10, b10; ++ int r11, g11, b11; ++ ++ int16_t r[4], g[4], b[4]; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0], dstuv += dstlinesize[1] / 2, ++ srcy += srclinesize[0], srcuv += srclinesize[1] / 2) { ++ for (int x = 0; x < width; x += 2) { ++ int y00 = (srcy[x] >> in_sh2) - params->in_yuv_off; ++ int y01 = (srcy[x + 1] >> in_sh2) - params->in_yuv_off; ++ int y10 = (srcy[srclinesize[0] / 2 + x] >> in_sh2) - params->in_yuv_off; ++ int y11 = (srcy[srclinesize[0] / 2 + x + 1] >> in_sh2) - params->in_yuv_off; ++ int u = (srcuv[x] >> in_sh2) - in_uv_offset; ++ int v = (srcuv[x + 1] >> in_sh2) - in_uv_offset; ++ ++ r[0] = av_clip_int16((y00 * cy + crv * v + in_rnd) >> in_sh); ++ r[1] = av_clip_int16((y01 * cy + crv * v + in_rnd) >> in_sh); ++ r[2] = av_clip_int16((y10 * cy + crv * v + in_rnd) >> in_sh); ++ r[3] = av_clip_int16((y11 * cy + crv * v + in_rnd) >> in_sh); ++ ++ g[0] = av_clip_int16((y00 * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g[1] = av_clip_int16((y01 * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g[2] = av_clip_int16((y10 * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g[3] = av_clip_int16((y11 * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ ++ b[0] = av_clip_int16((y00 * cy + cbu * u + in_rnd) >> in_sh); ++ b[1] = av_clip_int16((y01 * cy + cbu * u + in_rnd) >> in_sh); ++ b[2] = av_clip_int16((y10 * cy + cbu * u + in_rnd) >> in_sh); ++ b[3] = av_clip_int16((y11 * cy + cbu * u + in_rnd) >> in_sh); ++ ++ tonemap_int16(r[0], g[0], b[0], &r[0], &g[0], &b[0], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ tonemap_int16(r[1], g[1], b[1], &r[1], &g[1], &b[1], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ tonemap_int16(r[2], g[2], b[2], &r[2], &g[2], &b[2], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ tonemap_int16(r[3], g[3], b[3], &r[3], &g[3], &b[3], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, params->rgb2rgb_passthrough); ++ ++ r00 = r[0], g00 = g[0], b00 = b[0]; ++ r01 = r[1], g01 = g[1], b01 = b[1]; ++ r10 = r[2], g10 = g[2], b10 = b[2]; ++ r11 = r[3], g11 = g[3], b11 = b[3]; ++ ++ dsty[x] = av_clip_uintp2((params->out_yuv_off + ((r00 * cry + g00 * cgy + b00 * cby + out_rnd) >> out_sh)) << out_sh2, 16); ++ dsty[x + 1] = av_clip_uintp2((params->out_yuv_off + ((r01 * cry + g01 * cgy + b01 * cby + out_rnd) >> out_sh)) << out_sh2, 16); ++ dsty[dstlinesize[0] / 2 + x] = av_clip_uintp2((params->out_yuv_off + ((r10 * cry + g10 * cgy + b10 * cby + out_rnd) >> out_sh)) << out_sh2, 16); ++ dsty[dstlinesize[0] / 2 + x + 1] = av_clip_uintp2((params->out_yuv_off + ((r11 * cry + g11 * cgy + b11 * cby + out_rnd) >> out_sh)) << out_sh2, 16); ++ ++#define AVG(a,b,c,d) (((a) + (b) + (c) + (d) + 2) >> 2) ++ dstuv[x] = av_clip_uintp2((out_uv_offset + ((AVG(r00, r01, r10, r11) * cru + AVG(g00, g01, g10, g11) * ocgu + AVG(b00, b01, b10, b11) * cburv + out_rnd) >> out_sh)) << out_sh2, 16); ++ dstuv[x + 1] = av_clip_uintp2((out_uv_offset + ((AVG(r00, r01, r10, r11) * cburv + AVG(g00, g01, g10, g11) * ocgv + AVG(b00, b01, b10, b11) * cbv + out_rnd) >> out_sh)) << out_sh2, 16); ++#undef AVG ++ } ++ } ++} ++ ++#define LOAD_TONEMAP_PARAMS TonemapxContext *s = ctx->priv; \ ++ThreadData *td = arg; \ ++AVFrame *in = td->in; \ ++AVFrame *out = td->out; \ ++const AVPixFmtDescriptor *desc = td->desc; \ ++const AVPixFmtDescriptor *odesc = td->odesc; \ ++const int ss = 1 << FFMAX(desc->log2_chroma_h, odesc->log2_chroma_h); \ ++const int slice_start = (in->height / ss * jobnr ) / nb_jobs * ss; \ ++const int slice_end = (in->height / ss * (jobnr + 1)) / nb_jobs * ss; \ ++TonemapIntParams params = { \ ++.lut_peak = s->lut_peak, \ ++.lin_lut = s->lin_lut, \ ++.tonemap_lut = s->tonemap_lut, \ ++.delin_lut = s->delin_lut, \ ++.in_yuv_off = s->in_yuv_off, \ ++.out_yuv_off = s->out_yuv_off, \ ++.yuv2rgb_coeffs = &s->yuv2rgb_coeffs, \ ++.rgb2yuv_coeffs = &s->rgb2yuv_coeffs, \ ++.rgb2rgb_coeffs = &s->rgb2rgb_coeffs, \ ++.rgb2rgb_passthrough = in->color_primaries == out->color_primaries, \ ++.coeffs = s->coeffs, \ ++.ocoeffs = s->ocoeffs, \ ++.desat = s->desat, \ ++.dovi = s->dovi, \ ++.dovi_pbuf = s->dovi_pbuf, \ ++.lms2rgb_matrix = &s->lms2rgb_matrix, \ ++.ycc_offset = &s->ycc_offset \ ++}; ++ ++static int filter_slice_planar8(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) ++{ ++ LOAD_TONEMAP_PARAMS ++ av_log(s, AV_LOG_DEBUG, "planar dst depth: %d, src depth: %d\n", odesc->comp[0].depth, desc->comp[0].depth); ++ ++ s->tonemap_func_planar8(out->data[0] + out->linesize[0] * slice_start, ++ out->data[1] + out->linesize[1] * AV_CEIL_RSHIFT(slice_start, desc->log2_chroma_h), ++ out->data[2] + out->linesize[2] * AV_CEIL_RSHIFT(slice_start, desc->log2_chroma_h), ++ (void*)(in->data[0] + in->linesize[0] * slice_start), ++ (void*)(in->data[1] + in->linesize[1] * AV_CEIL_RSHIFT(slice_start, odesc->log2_chroma_h)), ++ (void*)(in->data[2] + in->linesize[2] * AV_CEIL_RSHIFT(slice_start, odesc->log2_chroma_h)), ++ out->linesize, in->linesize, ++ odesc->comp[0].depth, desc->comp[0].depth, ++ out->width, slice_end - slice_start, ++ ¶ms); ++ ++ return 0; ++} ++ ++static int filter_slice_biplanar8(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) ++{ ++ LOAD_TONEMAP_PARAMS ++ av_log(s, AV_LOG_DEBUG, "biplanar dst depth: %d, src depth: %d\n", odesc->comp[0].depth, desc->comp[0].depth); ++ ++ s->tonemap_func_biplanar8(out->data[0] + out->linesize[0] * slice_start, ++ out->data[1] + out->linesize[1] * AV_CEIL_RSHIFT(slice_start, desc->log2_chroma_h), ++ (void*)(in->data[0] + in->linesize[0] * slice_start), ++ (void*)(in->data[1] + in->linesize[1] * AV_CEIL_RSHIFT(slice_start, odesc->log2_chroma_h)), ++ out->linesize, in->linesize, ++ odesc->comp[0].depth, desc->comp[0].depth, ++ out->width, slice_end - slice_start, ++ ¶ms); ++ ++ return 0; ++} ++ ++static int filter_slice_planar10(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) ++{ ++ LOAD_TONEMAP_PARAMS ++ av_log(s, AV_LOG_DEBUG, "planar dst depth: %d, src depth: %d\n", odesc->comp[0].depth, desc->comp[0].depth); ++ ++ s->tonemap_func_planar10((uint16_t *) (out->data[0] + out->linesize[0] * slice_start), ++ (uint16_t *) (out->data[1] + ++ out->linesize[1] * AV_CEIL_RSHIFT(slice_start, desc->log2_chroma_h)), ++ (uint16_t *) (out->data[2] + ++ out->linesize[2] * AV_CEIL_RSHIFT(slice_start, desc->log2_chroma_h)), ++ (void*)(in->data[0] + in->linesize[0] * slice_start), ++ (void*)(in->data[1] + in->linesize[1] * AV_CEIL_RSHIFT(slice_start, odesc->log2_chroma_h)), ++ (void*)(in->data[2] + in->linesize[2] * AV_CEIL_RSHIFT(slice_start, odesc->log2_chroma_h)), ++ out->linesize, in->linesize, ++ odesc->comp[0].depth, desc->comp[0].depth, ++ out->width, slice_end - slice_start, ++ ¶ms); ++ ++ return 0; ++} ++ ++static int filter_slice_biplanar10(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) ++{ ++ LOAD_TONEMAP_PARAMS ++ av_log(s, AV_LOG_DEBUG, "biplanar dst depth: %d, src depth: %d\n", odesc->comp[0].depth, desc->comp[0].depth); ++ ++ s->tonemap_func_biplanar10((uint16_t *) (out->data[0] + out->linesize[0] * slice_start), ++ (uint16_t *) (out->data[1] + ++ out->linesize[1] * AV_CEIL_RSHIFT(slice_start, desc->log2_chroma_h)), ++ (void*)(in->data[0] + in->linesize[0] * slice_start), ++ (void*)(in->data[1] + in->linesize[1] * AV_CEIL_RSHIFT(slice_start, odesc->log2_chroma_h)), ++ out->linesize, in->linesize, ++ odesc->comp[0].depth, desc->comp[0].depth, ++ out->width, slice_end - slice_start, ++ ¶ms); ++ ++ return 0; ++} ++ ++static int filter_frame(AVFilterLink *link, AVFrame *in) ++{ ++ AVFilterContext *ctx = link->dst; ++ TonemapxContext *s = ctx->priv; ++ AVFilterLink *outlink = ctx->outputs[0]; ++ AVFrame *out; ++ const AVPixFmtDescriptor *desc; ++ const AVPixFmtDescriptor *odesc; ++ int ret; ++ double peak = s->peak; ++ const AVLumaCoefficients *coeffs; ++ ThreadData td; ++ ++ desc = av_pix_fmt_desc_get(link->format); ++ odesc = av_pix_fmt_desc_get(outlink->format); ++ if (!desc || !odesc) { ++ av_frame_free(&in); ++ return AVERROR_BUG; ++ } ++ ++ switch (odesc->comp[2].plane) { ++ case 1: // biplanar ++ if (odesc->comp[0].depth == 8) { ++ s->filter_slice = filter_slice_biplanar8; ++ } else { ++ s->filter_slice = filter_slice_biplanar10; ++ } ++ break; ++ default: ++ case 2: // planar ++ if (odesc->comp[0].depth == 8) { ++ s->filter_slice = filter_slice_planar8; ++ } else { ++ s->filter_slice = filter_slice_planar10; ++ } ++ break; ++ } ++ ++ out = ff_get_video_buffer(outlink, outlink->w, outlink->h); ++ if (!out) { ++ av_frame_free(&in); ++ return AVERROR(ENOMEM); ++ } ++ ++ if ((ret = av_frame_copy_props(out, in)) < 0) ++ goto fail; ++ ++ /* read peak from side data if not passed in */ ++ if (!peak) { ++ peak = ff_determine_signal_peak(in); ++ av_log(s, AV_LOG_DEBUG, "Computed signal peak: %f\n", peak); ++ } ++ ++ out->color_trc = s->trc == -1 ? AVCOL_TRC_UNSPECIFIED : s->trc; ++ out->colorspace = s->spc == -1 ? AVCOL_SPC_UNSPECIFIED : s->spc; ++ out->color_primaries = s->pri == -1 ? AVCOL_PRI_UNSPECIFIED : s->pri; ++ out->color_range = s->range == -1 ? in->color_range : s->range; ++ ++ if (in->color_trc == AVCOL_TRC_UNSPECIFIED) ++ in->color_trc = AVCOL_TRC_SMPTE2084; ++ if (out->color_trc == AVCOL_TRC_UNSPECIFIED) ++ out->color_trc = AVCOL_TRC_BT709; ++ ++ if (in->colorspace == AVCOL_SPC_UNSPECIFIED) ++ in->colorspace = AVCOL_SPC_BT2020_NCL; ++ if (out->colorspace == AVCOL_SPC_UNSPECIFIED) ++ out->colorspace = AVCOL_SPC_BT709; ++ ++ if (in->color_primaries == AVCOL_PRI_UNSPECIFIED) ++ in->color_primaries = AVCOL_PRI_BT2020; ++ if (out->color_primaries == AVCOL_PRI_UNSPECIFIED) ++ out->color_primaries = AVCOL_PRI_BT709; ++ ++ if (in->color_range == AVCOL_RANGE_UNSPECIFIED) ++ in->color_range = AVCOL_RANGE_MPEG; ++ if (out->color_range == AVCOL_RANGE_UNSPECIFIED) ++ out->color_range = AVCOL_RANGE_MPEG; ++ ++ if (!s->lin_lut || !s->delin_lut) { ++ if ((ret = compute_trc_luts(s, in->color_trc, out->color_trc)) < 0) ++ goto fail; ++ } ++ ++ if (!s->tonemap_lut || s->lut_peak != peak) { ++ s->lut_peak = peak; ++ if ((ret = compute_tonemap_lut(s, out->color_trc)) < 0) ++ goto fail; ++ } ++ ++ coeffs = av_csp_luma_coeffs_from_avcsp(in->colorspace); ++ if (s->coeffs != coeffs) { ++ s->coeffs = coeffs; ++ s->ocoeffs = av_csp_luma_coeffs_from_avcsp(out->colorspace); ++ if ((ret = compute_yuv_coeffs(s, coeffs, s->ocoeffs, desc, odesc, ++ in->color_range, out->color_range)) < 0) ++ goto fail; ++ if ((ret = compute_rgb_coeffs(s, in->color_primaries, out->color_primaries)) < 0) ++ goto fail; ++ } ++ ++ if (s->apply_dovi) { ++ AVFrameSideData *dovi_sd = av_frame_get_side_data(in, AV_FRAME_DATA_DOVI_METADATA); ++ if (dovi_sd) { ++ const AVDOVIMetadata *metadata = (AVDOVIMetadata *) dovi_sd->data; ++ const AVDOVIRpuDataHeader *rpu = av_dovi_get_header(metadata); ++ // only map dovi rpus that don't require an EL and has rpu profile == 0 ++ // for performance reason we only want to do reshaping when absolutely needed ++ // such videos usually have vdr_rpu_profile == 0, for example profile 5 videos ++ // this could be wrong as there is no public documentation on this field ++ if (rpu->disable_residual_flag && rpu->vdr_rpu_profile == 0) { ++ struct DoviMetadata *dovi = av_malloc(sizeof(*dovi)); ++ s->dovi = dovi; ++ if (!s->dovi) ++ goto fail; ++ ++ ff_map_dovi_metadata(s->dovi, metadata); ++ } ++ } ++ ++ if (s->dovi) { ++ if (desc->comp[2].plane == 1) { ++ av_log(s, AV_LOG_ERROR, "Input pixel format has to be yuv420p10 for Dolby Vision reshaping\n"); ++ av_assert0(0); ++ } ++ update_dovi_buf(ctx); ++ ff_matrix_mul_3x3(s->lms2rgb_matrix, dovi_lms2rgb_matrix, s->dovi->linear); ++ s->ycc_offset[0] = s->dovi->nonlinear_offset[0] * (float)s->dovi->nonlinear[0][0] + s->dovi->nonlinear_offset[1] * (float)s->dovi->nonlinear[0][1] + s->dovi->nonlinear_offset[2] * (float)s->dovi->nonlinear[0][2]; ++ s->ycc_offset[1] = s->dovi->nonlinear_offset[0] * (float)s->dovi->nonlinear[1][0] + s->dovi->nonlinear_offset[1] * (float)s->dovi->nonlinear[1][1] + s->dovi->nonlinear_offset[2] * (float)s->dovi->nonlinear[1][2]; ++ s->ycc_offset[2] = s->dovi->nonlinear_offset[0] * (float)s->dovi->nonlinear[2][0] + s->dovi->nonlinear_offset[1] * (float)s->dovi->nonlinear[2][1] + s->dovi->nonlinear_offset[2] * (float)s->dovi->nonlinear[2][2]; ++ s->tonemap_func_planar8 = s->tonemap_func_dovi8; ++ s->tonemap_func_planar10 = s->tonemap_func_dovi10; ++ } ++ } ++ ++ /* do the tonemap */ ++ td.in = in; ++ td.out = out; ++ td.desc = desc; ++ td.odesc = odesc; ++ td.peak = peak; ++ ff_filter_execute(ctx, s->filter_slice, &td, NULL, ++ FFMIN(outlink->h >> FFMAX(desc->log2_chroma_h, odesc->log2_chroma_h), ff_filter_get_nb_threads(ctx))); ++ ++ av_frame_free(&in); ++ ++ av_frame_remove_side_data(out, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); ++ av_frame_remove_side_data(out, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); ++ av_frame_remove_side_data(out, AV_FRAME_DATA_DOVI_RPU_BUFFER); ++ av_frame_remove_side_data(out, AV_FRAME_DATA_DOVI_METADATA); ++ ++ return ff_filter_frame(outlink, out); ++fail: ++ av_frame_free(&in); ++ av_frame_free(&out); ++ return ret; ++} ++ ++static void uninit(AVFilterContext *ctx) ++{ ++ TonemapxContext *s = ctx->priv; ++ ++ av_freep(&s->lin_lut); ++ av_freep(&s->delin_lut); ++ av_freep(&s->tonemap_lut); ++ ++ if (s->dovi) ++ av_freep(&s->dovi); ++} ++ ++static int query_formats(AVFilterContext *ctx) ++{ ++ enum AVPixelFormat valid_in_pix_fmts[4]; ++ AVFilterFormats *formats; ++ const AVPixFmtDescriptor *desc; ++ TonemapxContext *s = ctx->priv; ++ ++ if (!strcmp(s->format_str, "same")) { ++ int res; ++ formats = ff_make_format_list(in_pix_fmts); ++ res = ff_formats_ref(formats, &ctx->inputs[0]->outcfg.formats); ++ if (res < 0) ++ return res; ++ s->format = AV_PIX_FMT_NONE; ++ } else { ++ int i, j = 0; ++ int res; ++ formats = ff_make_format_list(in_pix_fmts); ++ res = ff_formats_ref(formats, &ctx->inputs[0]->outcfg.formats); ++ if (res < 0) ++ return res; ++ if (s->format == AV_PIX_FMT_NONE) { ++ av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str); ++ return AVERROR(EINVAL); ++ } ++ s->format = av_get_pix_fmt(s->format_str); ++ // Check again in case of the string is invalid ++ if (s->format == AV_PIX_FMT_NONE) { ++ av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str); ++ return AVERROR(EINVAL); ++ } ++ desc = av_pix_fmt_desc_get(s->format); ++ // Filter out the input formats for requested output formats ++ // The input and output must have the same planar format, either planar or bi-planar packed ++ for (i = 0; in_pix_fmts[i] != AV_PIX_FMT_NONE; i++) { ++ const AVPixFmtDescriptor *tdesc = av_pix_fmt_desc_get(in_pix_fmts[i]); ++ if (tdesc->comp[2].plane == desc->comp[2].plane) { ++ valid_in_pix_fmts[j] = in_pix_fmts[i]; ++ j++; ++ } ++ } ++ valid_in_pix_fmts[j] = AV_PIX_FMT_NONE; ++ formats = ff_make_format_list(valid_in_pix_fmts); ++ res = ff_formats_ref(formats, &ctx->inputs[0]->outcfg.formats); ++ if (res < 0) ++ return res; ++ if (out_format_is_supported(s->format)) { ++ formats = NULL; ++ res = ff_add_format(&formats, s->format); ++ if (res < 0) ++ return res; ++ } else { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n", ++ av_get_pix_fmt_name(s->format)); ++ return AVERROR(ENOSYS); ++ } ++ } ++ ++ return ff_formats_ref(formats, &ctx->outputs[0]->incfg.formats); ++} ++ ++static av_cold int init(AVFilterContext *ctx) ++{ ++ TonemapxContext *s = ctx->priv; ++ enum SIMDVariant active_simd = SIMD_NONE; ++ av_log(s, AV_LOG_DEBUG, "Requested output format: %s\n", ++ s->format_str); ++ ++#if ARCH_AARCH64 ++#ifdef ENABLE_TONEMAPX_NEON_INTRINSICS ++ { ++ int cpu_flags = av_get_cpu_flags(); ++ if (have_neon(cpu_flags)) { ++ s->tonemap_func_biplanar8 = tonemap_frame_p016_p010_2_nv12_neon; ++ s->tonemap_func_biplanar10 = tonemap_frame_p016_p010_2_p016_p010_neon; ++ s->tonemap_func_planar8 = tonemap_frame_420p10_2_420p_neon; ++ s->tonemap_func_planar10 = tonemap_frame_420p10_2_420p10_neon; ++ s->tonemap_func_dovi8 = tonemap_frame_dovi_2_420p_neon; ++ s->tonemap_func_dovi10 = tonemap_frame_dovi_2_420p10_neon; ++ active_simd = SIMD_NEON; ++ } ++ } ++#else ++ av_log(s, AV_LOG_WARNING, "NEON optimization disabled at compile time\n"); ++#endif // ENABLE_TONEMAPX_NEON_INTRINSICS ++#elif ARCH_X86 ++#ifdef ENABLE_TONEMAPX_SSE_INTRINSICS ++ { ++ int cpu_flags = av_get_cpu_flags(); ++ if (X86_SSE42(cpu_flags)) { ++ s->tonemap_func_biplanar8 = tonemap_frame_p016_p010_2_nv12_sse; ++ s->tonemap_func_biplanar10 = tonemap_frame_p016_p010_2_p016_p010_sse; ++ s->tonemap_func_planar8 = tonemap_frame_420p10_2_420p_sse; ++ s->tonemap_func_planar10 = tonemap_frame_420p10_2_420p10_sse; ++ s->tonemap_func_dovi8 = tonemap_frame_dovi_2_420p_sse; ++ s->tonemap_func_dovi10 = tonemap_frame_dovi_2_420p10_sse; ++ active_simd = SIMD_SSE; ++ } ++ } ++#else ++ av_log(s, AV_LOG_WARNING, "SSE optimization disabled at compile time\n"); ++#endif // ENABLE_TONEMAPX_SSE_INTRINSICS ++#ifdef ENABLE_TONEMAPX_AVX_INTRINSICS ++ { ++ int cpu_flags = av_get_cpu_flags(); ++ if (X86_AVX2(cpu_flags) && X86_FMA3(cpu_flags)) { ++ s->tonemap_func_biplanar8 = tonemap_frame_p016_p010_2_nv12_avx; ++ s->tonemap_func_biplanar10 = tonemap_frame_p016_p010_2_p016_p010_avx; ++ s->tonemap_func_planar8 = tonemap_frame_420p10_2_420p_avx; ++ s->tonemap_func_planar10 = tonemap_frame_420p10_2_420p10_avx; ++ s->tonemap_func_dovi8 = tonemap_frame_dovi_2_420p_avx; ++ s->tonemap_func_dovi10 = tonemap_frame_dovi_2_420p10_avx; ++ active_simd = SIMD_AVX; ++ } ++ } ++#else ++ av_log(s, AV_LOG_WARNING, "AVX optimization disabled at compile time\n"); ++#endif // ENABLE_TONEMAPX_AVX_INTRINSICS ++#endif // ARCH_X86/ARCH_AARCH64 ++ ++#if !defined(ENABLE_TONEMAPX_NEON_INTRINSICS) && \ ++ !defined(ENABLE_TONEMAPX_SSE_INTRINSICS) && \ ++ !defined(ENABLE_TONEMAPX_AVX_INTRINSICS) ++ av_log(s, AV_LOG_WARNING, "SIMD optimization disabled at compile time\n"); ++#endif ++ ++ if (!s->tonemap_func_biplanar8) { ++ s->tonemap_func_biplanar8 = tonemap_frame_p016_p010_2_nv12; ++ } ++ ++ if (!s->tonemap_func_biplanar10) { ++ s->tonemap_func_biplanar10 = tonemap_frame_p016_p010_2_p016_p010; ++ } ++ ++ if (!s->tonemap_func_planar8) { ++ s->tonemap_func_planar8 = tonemap_frame_420p10_2_420p; ++ } ++ ++ if (!s->tonemap_func_planar10) { ++ s->tonemap_func_planar10 = tonemap_frame_420p10_2_420p10; ++ } ++ ++ if (!s->tonemap_func_dovi8) { ++ s->tonemap_func_dovi8 = tonemap_frame_dovi_2_420p; ++ } ++ ++ if (!s->tonemap_func_dovi10) { ++ s->tonemap_func_dovi10 = tonemap_frame_dovi_2_420p10; ++ } ++ ++ switch (active_simd) { ++ case SIMD_NEON: ++ av_log(s, AV_LOG_INFO, "Using CPU capability: NEON\n"); ++ break; ++ case SIMD_SSE: ++ av_log(s, AV_LOG_INFO, "Using CPU capability: SSE4.2\n"); ++ break; ++ case SIMD_AVX: ++ av_log(s, AV_LOG_INFO, "Using CPU capabilities: AVX2 FMA3\n"); ++ break; ++ default: ++ case SIMD_NONE: ++ av_log(s, AV_LOG_INFO, "No CPU SIMD extension available\n"); ++ break; ++ } ++ ++ switch (s->tonemap) { ++ case TONEMAP_GAMMA: ++ if (isnan(s->param)) ++ s->param = 1.8f; ++ break; ++ case TONEMAP_REINHARD: ++ if (!isnan(s->param)) ++ s->param = (1.0f - s->param) / s->param; ++ break; ++ case TONEMAP_MOBIUS: ++ if (isnan(s->param)) ++ s->param = 0.3f; ++ break; ++ } ++ ++ if (isnan(s->param)) ++ s->param = 1.0f; ++ ++ return 0; ++} ++ ++#define OFFSET(x) offsetof(TonemapxContext, x) ++#define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM ++static const AVOption tonemapx_options[] = { ++ { "tonemap", "tonemap algorithm selection", OFFSET(tonemap), AV_OPT_TYPE_INT, {.i64 = TONEMAP_BT2390}, TONEMAP_NONE, TONEMAP_MAX - 1, FLAGS, .unit = "tonemap" }, ++ { "none", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_NONE}, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "linear", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_LINEAR}, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "gamma", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_GAMMA}, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "clip", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_CLIP}, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "reinhard", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_REINHARD}, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "hable", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HABLE}, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "mobius", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MOBIUS}, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "bt2390", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_BT2390}, 0, 0, FLAGS, .unit = "tonemap" }, ++ { "transfer", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, .unit = "transfer" }, ++ { "t", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, .unit = "transfer" }, ++ { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709}, 0, 0, FLAGS, .unit = "transfer" }, ++ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10}, 0, 0, FLAGS, .unit = "transfer" }, ++ { "matrix", "set colorspace matrix", OFFSET(spc), AV_OPT_TYPE_INT, {.i64 = AVCOL_SPC_BT709}, -1, INT_MAX, FLAGS, .unit = "matrix" }, ++ { "m", "set colorspace matrix", OFFSET(spc), AV_OPT_TYPE_INT, {.i64 = AVCOL_SPC_BT709}, -1, INT_MAX, FLAGS, .unit = "matrix" }, ++ { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT709}, 0, 0, FLAGS, .unit = "matrix" }, ++ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_NCL}, 0, 0, FLAGS, .unit = "matrix" }, ++ { "primaries", "set color primaries", OFFSET(pri), AV_OPT_TYPE_INT, {.i64 = AVCOL_PRI_BT709}, -1, INT_MAX, FLAGS, .unit = "primaries" }, ++ { "p", "set color primaries", OFFSET(pri), AV_OPT_TYPE_INT, {.i64 = AVCOL_PRI_BT709}, -1, INT_MAX, FLAGS, .unit = "primaries" }, ++ { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT709}, 0, 0, FLAGS, .unit = "primaries" }, ++ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT2020}, 0, 0, FLAGS, .unit = "primaries" }, ++ { "range", "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, .unit = "range" }, ++ { "r", "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, .unit = "range" }, ++ { "tv", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, .unit = "range" }, ++ { "pc", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, .unit = "range" }, ++ { "limited", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, .unit = "range" }, ++ { "full", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, .unit = "range" }, ++ { "format", "output format", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, ++ { "param", "tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, {.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS }, ++ { "desat", "desaturation strength", OFFSET(desat), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS }, ++ { "peak", "signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS }, ++ { "apply_dovi", "Apply Dolby Vision metadata if possible", OFFSET(apply_dovi), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, ++ { NULL } ++}; ++ ++AVFILTER_DEFINE_CLASS(tonemapx); ++ ++static const AVFilterPad tonemapx_inputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .filter_frame = filter_frame, ++ }, ++}; ++ ++AVFilter ff_vf_tonemapx = { ++ .name = "tonemapx", ++ .description = NULL_IF_CONFIG_SMALL("SIMD optimized HDR to SDR tonemapping"), ++ .init = init, ++ .uninit = uninit, ++ .priv_size = sizeof(TonemapxContext), ++ .priv_class = &tonemapx_class, ++ FILTER_INPUTS(tonemapx_inputs), ++ FILTER_OUTPUTS(ff_video_default_filterpad), ++ FILTER_QUERY_FUNC(query_formats), ++ .flags = AVFILTER_FLAG_SLICE_THREADS, ++}; +Index: FFmpeg/libavfilter/vf_tonemapx.h +=================================================================== +--- /dev/null ++++ libavfilter/vf_tonemapx.h +@@ -0,0 +1,126 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVFILTER_TONEMAPX_H ++#define AVFILTER_TONEMAPX_H ++ ++#include "config.h" ++#include "colorspace.h" ++ ++#define X86_64_V2 __attribute__((target("sse4.2"))) ++#define X86_64_V3 __attribute__((target("avx2,fma"))) ++ ++#if defined(__GNUC__) || defined(__clang__) ++# if (__GNUC__ >= 9) || (__clang_major__ >= 11) ++# define CC_SUPPORTS_TONEMAPX_INTRINSICS ++# endif // (__GNUC__ >= 10) || (__clang_major__ >= 11) ++#endif // defined(__GNUC__) || defined(__clang__) ++ ++#ifdef CC_SUPPORTS_TONEMAPX_INTRINSICS ++# if ARCH_AARCH64 ++# if HAVE_INTRINSICS_NEON ++# define ENABLE_TONEMAPX_NEON_INTRINSICS ++# endif ++# endif // ARCH_AARCH64 ++# if ARCH_X86 ++# if HAVE_INTRINSICS_SSE42 ++# define ENABLE_TONEMAPX_SSE_INTRINSICS ++# endif ++# if HAVE_INTRINSICS_AVX2 && HAVE_INTRINSICS_FMA3 ++# define ENABLE_TONEMAPX_AVX_INTRINSICS ++# endif ++# endif // ARCH_X86 ++#endif // CC_SUPPORTS_TONEMAPX_INTRINSICS ++ ++#define params_cnt 8 ++#define pivots_cnt (7+1) ++#define coeffs_cnt 8*4 ++#define mmr_cnt 8*6*4 ++#define params_sz params_cnt*sizeof(float) ++#define pivots_sz pivots_cnt*sizeof(float) ++#define coeffs_sz coeffs_cnt*sizeof(float) ++#define mmr_sz mmr_cnt*sizeof(float) ++ ++typedef struct TonemapIntParams { ++ double lut_peak; ++ float *lin_lut; ++ float *tonemap_lut; ++ uint16_t *delin_lut; ++ int in_yuv_off, out_yuv_off; ++ int16_t (*yuv2rgb_coeffs)[3][3][8]; ++ int16_t (*rgb2yuv_coeffs)[3][3][8]; ++ double (*rgb2rgb_coeffs)[3][3]; ++ int rgb2rgb_passthrough; ++ const AVLumaCoefficients *coeffs, *ocoeffs; ++ double desat; ++ struct DoviMetadata *dovi; ++ float *dovi_pbuf; ++ double (*lms2rgb_matrix)[3][3]; ++ float (*ycc_offset)[3]; ++} TonemapIntParams; ++ ++enum SIMDVariant { ++ SIMD_NONE = -1, ++ SIMD_NEON, ++ SIMD_SSE, ++ SIMD_AVX ++}; ++ ++void tonemap_frame_dovi_2_420p(uint8_t *dsty, uint8_t *dstu, uint8_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++void tonemap_frame_420p10_2_420p(uint8_t *dsty, uint8_t *dstu, uint8_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++void tonemap_frame_p016_p010_2_nv12(uint8_t *dsty, uint8_t *dstuv, ++ const uint16_t *srcy, const uint16_t *srcuv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++void tonemap_frame_dovi_2_420p10(uint16_t *dsty, uint16_t *dstu, uint16_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++void tonemap_frame_420p10_2_420p10(uint16_t *dsty, uint16_t *dstu, uint16_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++void tonemap_frame_p016_p010_2_p016_p010(uint16_t *dsty, uint16_t *dstuv, ++ const uint16_t *srcy, const uint16_t *srcuv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++#endif // AVFILTER_TONEMAPX_H +Index: FFmpeg/libavfilter/x86/Makefile +=================================================================== +--- libavfilter/x86/Makefile ++++ libavfilter/x86/Makefile +@@ -34,6 +34,8 @@ OBJS-$(CONFIG_STEREO3D_FILTER) + OBJS-$(CONFIG_TBLEND_FILTER) += x86/vf_blend_init.o + OBJS-$(CONFIG_THRESHOLD_FILTER) += x86/vf_threshold_init.o + OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_tinterlace_init.o ++OBJS-$(CONFIG_TONEMAPX_FILTER) += x86/vf_tonemapx_intrin_sse.o \ ++ x86/vf_tonemapx_intrin_avx.o + OBJS-$(CONFIG_TRANSPOSE_FILTER) += x86/vf_transpose_init.o + OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o + OBJS-$(CONFIG_V360_FILTER) += x86/vf_v360_init.o +Index: FFmpeg/libavfilter/x86/vf_tonemapx_intrin_avx.c +=================================================================== +--- /dev/null ++++ libavfilter/x86/vf_tonemapx_intrin_avx.c +@@ -0,0 +1,2276 @@ ++/* ++ * Copyright (c) 2024 Gnattu OC ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "vf_tonemapx_intrin_avx.h" ++ ++#ifdef ENABLE_TONEMAPX_AVX_INTRINSICS ++# include ++#endif // ENABLE_TONEMAPX_AVX_INTRINSICS ++ ++#ifdef ENABLE_TONEMAPX_AVX_INTRINSICS ++X86_64_V3 static inline __m256i av_clip_int16_avx(__m256i a) ++{ ++ __m256i add_result = _mm256_add_epi32(a, _mm256_set1_epi32(0x8000U)); ++ __m256i mask = _mm256_set1_epi32(~0xFFFF); ++ __m256i condition = _mm256_and_si256(add_result, mask); ++ __m256i cmp = _mm256_cmpeq_epi32(condition, _mm256_setzero_si256()); ++ ++ __m256i shifted = _mm256_srai_epi32(a, 31); ++ __m256i xor_result = _mm256_xor_si256(shifted, _mm256_set1_epi32(0x7FFF)); ++ ++ return _mm256_or_si256(_mm256_and_si256(cmp, a), _mm256_andnot_si256(cmp, xor_result)); ++} ++ ++X86_64_V3 inline static __m128 mix_float32x4(__m128 x, __m128 y, __m128 a) ++{ ++ __m128 n = _mm_sub_ps(y, x); ++ n = _mm_fmadd_ps(a, n, x); ++ return n; ++} ++ ++X86_64_V3 inline static float reduce_floatx4(__m128 x) { ++ x = _mm_hadd_ps(x, x); ++ x = _mm_hadd_ps(x, x); ++ return _mm_cvtss_f32(x); ++} ++ ++X86_64_V3 inline static float reduce_floatx8(__m256 x) { ++ __m256 x2 = _mm256_permute2f128_ps(x , x , 1); ++ x = _mm256_add_ps(x, x2); ++ x = _mm256_hadd_ps(x, x); ++ x = _mm256_hadd_ps(x, x); ++ return _mm256_cvtss_f32(x); ++} ++ ++X86_64_V3 static inline float reshape_poly(float s, __m128 coeffs) ++{ ++ __m128 ps = _mm_set_ps(0.0f, s * s, s, 1.0f); ++ ps = _mm_mul_ps(ps, coeffs); ++ return reduce_floatx4(ps); ++} ++ ++X86_64_V3 inline static float reshape_mmr(__m128 sig, __m128 coeffs, const float* mmr, ++ int mmr_single, int min_order, int max_order) ++{ ++ float s = _mm_cvtss_f32(coeffs); ++ int mmr_idx = 0; ++ int order = 0; ++ ++ __m256 sigX, mmr_coeffs, ps; ++ __m128 sigX01 = _mm_mul_ps(sig, _mm_shuffle_ps(sig, sig, _MM_SHUFFLE(1, 1, 1, 1))); // {sig[0]*sig[1], sig[1]*sig[1], sig[2]*sig[1], sig[3]*sig[1]} ++ __m128 sigX02 = _mm_mul_ps(sig, _mm_shuffle_ps(sig, sig, _MM_SHUFFLE(2, 2, 2, 2))); // {sig[0]*sig[2], sig[1]*sig[2], sig[2]*sig[2], sig[3]*sig[2]} ++ __m128 sigX12 = _mm_mul_ps(sigX01, _mm_shuffle_ps(sig, sig, _MM_SHUFFLE(2, 2, 2, 2))); // {sig[0]*sig[1]*sig[2], sig[1]*sig[1]*sig[2], sig[2]*sig[1]*sig[2], sig[3]*sig[1]*sig[2]} ++ __m128 sigX0 = sigX01; // sig[0]*sig[1] now positioned at 0 ++ ++ sigX0 = _mm_insert_ps(sigX0, sigX02, _MM_MK_INSERTPS_NDX(0, 1, 0)); // sig[0]*sig[2] at 1 ++ sigX0 = _mm_insert_ps(sigX0, sigX02, _MM_MK_INSERTPS_NDX(1, 2, 0)); // sig[1]*sig[2] at 2 ++ sigX0 = _mm_insert_ps(sigX0, sigX12, _MM_MK_INSERTPS_NDX(0, 3, 0)); // sig[0]*sig[1]*sig[2] at 3 ++ ++ sigX = _mm256_set_m128(sigX0, sig); ++ ++ mmr_idx = mmr_single ? 0 : (int)_mm_cvtss_f32(_mm_shuffle_ps(coeffs, coeffs, _MM_SHUFFLE(3, 2, 0, 1))); ++ order = (int)_mm_cvtss_f32(_mm_shuffle_ps(coeffs, coeffs, _MM_SHUFFLE(1, 2, 0, 3))); ++ ++ // dot first order ++ mmr_coeffs = _mm256_loadu_ps(&mmr[mmr_idx + 0*4]); ++ ps = _mm256_mul_ps(sigX, mmr_coeffs); ++ s += reduce_floatx8(ps); ++ ++ if (max_order >= 2 && (min_order >= 2 || order >= 2)) { ++ __m256 sigX2 = _mm256_mul_ps(sigX, sigX); ++ mmr_coeffs = _mm256_loadu_ps(&mmr[mmr_idx + 2*4]); ++ ps = _mm256_mul_ps(sigX2, mmr_coeffs); ++ s += reduce_floatx8(ps); ++ ++ if (max_order == 3 && (min_order == 3 || order >= 3)) { ++ __m256 sigX3 = _mm256_mul_ps(sigX2, sigX); ++ mmr_coeffs = _mm256_loadu_ps(&mmr[mmr_idx + 4*4]); ++ ps = _mm256_mul_ps(sigX3, mmr_coeffs); ++ s += reduce_floatx8(ps); ++ } ++ } ++ ++ return s; ++} ++ ++#define CLAMP(a, b, c) (FFMIN(FFMAX((a), (b)), (c))) ++X86_64_V3 inline static __m128 reshape_dovi_iptpqc2(__m128 sig, const TonemapIntParams *ctx) ++{ ++ int has_mmr_poly; ++ float s; ++ ++ float *src_dovi_params = ctx->dovi_pbuf; ++ float *src_dovi_pivots = ctx->dovi_pbuf + 24; ++ float *src_dovi_coeffs = ctx->dovi_pbuf + 48; //float4* ++ float *src_dovi_mmr = ctx->dovi_pbuf + 144; //float4* ++ ++ float* dovi_params_i = src_dovi_params + 0*8; ++ float* dovi_pivots_i = src_dovi_pivots + 0*8; ++ float* dovi_coeffs_i = src_dovi_coeffs + 0 * 8 * 4; //float4* ++ float* dovi_mmr_i = src_dovi_mmr + 0 * 48 * 4; //float4* ++ int dovi_num_pivots_i = dovi_params_i[0]; ++ int dovi_has_mmr_i = dovi_params_i[1]; ++ int dovi_has_poly_i = dovi_params_i[2]; ++ int dovi_mmr_single_i = dovi_params_i[3]; ++ int dovi_min_order_i = dovi_params_i[4]; ++ int dovi_max_order_i = dovi_params_i[5]; ++ float dovi_lo_i = dovi_params_i[6]; ++ float dovi_hi_i = dovi_params_i[7]; ++ ++ float* dovi_params_p = src_dovi_params + 1*8; ++ float* dovi_coeffs_p = src_dovi_coeffs + 1*8 * 4; //float4* ++ float* dovi_mmr_p = src_dovi_mmr + 1*48 * 4; //float4* ++ int dovi_has_mmr_p = dovi_params_p[1]; ++ int dovi_has_poly_p = dovi_params_p[2]; ++ int dovi_mmr_single_p = dovi_params_p[3]; ++ int dovi_min_order_p = dovi_params_p[4]; ++ int dovi_max_order_p = dovi_params_p[5]; ++ float dovi_lo_p = dovi_params_p[6]; ++ float dovi_hi_p = dovi_params_p[7]; ++ ++ float* dovi_params_t = src_dovi_params + 2*8; ++ float* dovi_coeffs_t = src_dovi_coeffs + 2*8 * 4; //float4* ++ float* dovi_mmr_t = src_dovi_mmr + 2*48 * 4; //float4* ++ int dovi_has_mmr_t = dovi_params_t[1]; ++ int dovi_has_poly_t = dovi_params_t[2]; ++ int dovi_mmr_single_t = dovi_params_t[3]; ++ int dovi_min_order_t = dovi_params_t[4]; ++ int dovi_max_order_t = dovi_params_t[5]; ++ float dovi_lo_t = dovi_params_t[6]; ++ float dovi_hi_t = dovi_params_t[7]; ++ ++ __m128 coeffs, result; ++ ++ // reshape I ++ s = _mm_cvtss_f32(sig); ++ result = sig; ++ if (dovi_num_pivots_i > 2) { ++ __m128 m01 = mix_float32x4(_mm_loadu_ps(dovi_coeffs_i), _mm_loadu_ps(dovi_coeffs_i + 4), _mm_set1_ps(s >= dovi_pivots_i[0])); ++ __m128 m23 = mix_float32x4(_mm_loadu_ps(dovi_coeffs_i + 2*4), _mm_loadu_ps(dovi_coeffs_i + 3*4), _mm_set1_ps(s >= dovi_pivots_i[2])); ++ __m128 m0123 = mix_float32x4(m01, m23, _mm_set1_ps(s >= dovi_pivots_i[1])); ++ __m128 m45 = mix_float32x4(_mm_loadu_ps(dovi_coeffs_i + 4*4), _mm_loadu_ps(dovi_coeffs_i + 5*4), _mm_set1_ps(s >= dovi_pivots_i[4])); ++ __m128 m67 = mix_float32x4(_mm_loadu_ps(dovi_coeffs_i + 6*4), _mm_loadu_ps(dovi_coeffs_i + 7*4), _mm_set1_ps(s >= dovi_pivots_i[6])); ++ __m128 m4567 = mix_float32x4(m45, m67, _mm_set1_ps(s >= dovi_pivots_i[5])); ++ coeffs = mix_float32x4(m0123, m4567, _mm_set1_ps(s >= dovi_pivots_i[3])); ++ } else { ++ coeffs = _mm_loadu_ps(dovi_coeffs_i); ++ } ++ ++ has_mmr_poly = dovi_has_mmr_i && dovi_has_poly_i; ++ ++ if ((has_mmr_poly && _mm_cvtss_f32(_mm_shuffle_ps(coeffs, coeffs, _MM_SHUFFLE(3, 3, 3, 3))) == 0.0f) || (!has_mmr_poly && dovi_has_poly_i)) ++ s = reshape_poly(s, coeffs); ++ else ++ s = reshape_mmr(result, coeffs, dovi_mmr_i, ++ dovi_mmr_single_i, dovi_min_order_i, dovi_max_order_i); ++ ++ result = _mm_insert_ps(result, _mm_set1_ps(CLAMP(s, dovi_lo_i, dovi_hi_i)), _MM_MK_INSERTPS_NDX(0, 0, 0)); ++ ++ // reshape P ++ s = _mm_cvtss_f32(_mm_shuffle_ps(sig, sig, _MM_SHUFFLE(1, 1, 1, 1))); ++ coeffs = _mm_loadu_ps(dovi_coeffs_p); ++ has_mmr_poly = dovi_has_mmr_p && dovi_has_poly_p; ++ ++ if ((has_mmr_poly && _mm_cvtss_f32(_mm_shuffle_ps(coeffs, coeffs, _MM_SHUFFLE(3, 3, 3, 3))) == 0.0f) || (!has_mmr_poly && dovi_has_poly_p)) ++ s = reshape_poly(s, coeffs); ++ else ++ s = reshape_mmr(result, coeffs, dovi_mmr_p, ++ dovi_mmr_single_p, dovi_min_order_p, dovi_max_order_p); ++ ++ result = _mm_insert_ps(result, _mm_set1_ps(CLAMP(s, dovi_lo_p, dovi_hi_p)), _MM_MK_INSERTPS_NDX(0, 1, 0)); ++ ++ // reshape T ++ s = _mm_cvtss_f32(_mm_shuffle_ps(sig, sig, _MM_SHUFFLE(2, 2, 2, 2))); ++ coeffs = _mm_loadu_ps(dovi_coeffs_t); ++ has_mmr_poly = dovi_has_mmr_t && dovi_has_poly_t; ++ ++ if ((has_mmr_poly && _mm_cvtss_f32(_mm_shuffle_ps(coeffs, coeffs, _MM_SHUFFLE(3, 3, 3, 3))) == 0.0f) || (!has_mmr_poly && dovi_has_poly_t)) ++ s = reshape_poly(s, coeffs); ++ else ++ s = reshape_mmr(result, coeffs, dovi_mmr_t, ++ dovi_mmr_single_t, dovi_min_order_t, dovi_max_order_t); ++ ++ result = _mm_insert_ps(result, _mm_set1_ps(CLAMP(s, dovi_lo_t, dovi_hi_t)), _MM_MK_INSERTPS_NDX(0, 2, 0)); ++ ++ return result; ++} ++ ++X86_64_V3 inline static void ycc2rgbx8(__m256* dy, __m256* dcb, __m256* dcr, ++ __m256 y, __m256 cb, __m256 cr, ++ const double nonlinear[3][3], const float ycc_offset[3]) ++{ ++ *dy = _mm256_mul_ps(y, _mm256_set1_ps((float)nonlinear[0][0])); ++ *dy = _mm256_fmadd_ps(cb, _mm256_set1_ps((float)nonlinear[0][1]), *dy); ++ *dy = _mm256_fmadd_ps(cr, _mm256_set1_ps((float)nonlinear[0][2]), *dy); ++ *dy = _mm256_sub_ps(*dy, _mm256_set1_ps(ycc_offset[0])); ++ ++ *dcb = _mm256_mul_ps(y, _mm256_set1_ps((float)nonlinear[1][0])); ++ *dcb = _mm256_fmadd_ps(cb, _mm256_set1_ps((float)nonlinear[1][1]), *dcb); ++ *dcb = _mm256_fmadd_ps(cr, _mm256_set1_ps((float)nonlinear[1][2]), *dcb); ++ *dcb = _mm256_sub_ps(*dcb, _mm256_set1_ps(ycc_offset[1])); ++ ++ *dcr = _mm256_mul_ps(y, _mm256_set1_ps((float)nonlinear[2][0])); ++ *dcr = _mm256_fmadd_ps(cb, _mm256_set1_ps((float)nonlinear[2][1]), *dcr); ++ *dcr = _mm256_fmadd_ps(cr, _mm256_set1_ps((float)nonlinear[2][2]), *dcr); ++ *dcr = _mm256_sub_ps(*dcr, _mm256_set1_ps(ycc_offset[2])); ++} ++ ++X86_64_V3 inline static void lms2rgbx8(__m256* dl, __m256* dm, __m256* ds, ++ __m256 l, __m256 m, __m256 s, ++ const double lms2rgb_matrix[3][3]) ++{ ++ *dl = _mm256_mul_ps(l, _mm256_set1_ps((float)lms2rgb_matrix[0][0])); ++ *dl = _mm256_fmadd_ps(m, _mm256_set1_ps((float)lms2rgb_matrix[0][1]), *dl); ++ *dl = _mm256_fmadd_ps(s, _mm256_set1_ps((float)lms2rgb_matrix[0][2]), *dl); ++ ++ *dm = _mm256_mul_ps(l, _mm256_set1_ps((float)lms2rgb_matrix[1][0])); ++ *dm = _mm256_fmadd_ps(m, _mm256_set1_ps((float)lms2rgb_matrix[1][1]), *dm); ++ *dm = _mm256_fmadd_ps(s, _mm256_set1_ps((float)lms2rgb_matrix[1][2]), *dm); ++ ++ *ds = _mm256_mul_ps(l, _mm256_set1_ps((float)lms2rgb_matrix[2][0])); ++ *ds = _mm256_fmadd_ps(m, _mm256_set1_ps((float)lms2rgb_matrix[2][1]), *ds); ++ *ds = _mm256_fmadd_ps(s, _mm256_set1_ps((float)lms2rgb_matrix[2][2]), *ds); ++} ++ ++X86_64_V3 inline static void reshapeiptx8(__m128* ipt0, __m128* ipt1, __m128* ipt2, __m128* ipt3, ++ __m128* ipt4, __m128* ipt5, __m128* ipt6, __m128* ipt7, ++ __m256 yx8, __m256 ux8, __m256 vx8, ++ const struct TonemapIntParams *params) ++{ ++ __m128 yx4a = _mm256_extractf128_ps(yx8, 0); ++ __m128 yx4b = _mm256_extractf128_ps(yx8, 1); ++ __m128 ux4a = _mm256_extractf128_ps(ux8, 0); ++ __m128 ux4b = _mm256_extractf128_ps(ux8, 1); ++ __m128 vx4a = _mm256_extractf128_ps(vx8, 0); ++ __m128 vx4b = _mm256_extractf128_ps(vx8, 1); ++ ++ __m128 ia1 = _mm_unpacklo_ps(yx4a, ux4a); ++ __m128 ia2 = _mm_unpackhi_ps(yx4a, ux4a); ++ __m128 ib1 = _mm_unpacklo_ps(vx4a, _mm_setzero_ps()); ++ __m128 ib2 = _mm_unpackhi_ps(vx4a, _mm_setzero_ps()); ++ ++ *ipt0 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(1, 0, 1, 0)); ++ *ipt1 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(3, 2, 3, 2)); ++ *ipt2 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(1, 0, 1, 0)); ++ *ipt3 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(3, 2, 3, 2)); ++ ++ *ipt0 = reshape_dovi_iptpqc2(*ipt0, params); ++ *ipt1 = reshape_dovi_iptpqc2(*ipt1, params); ++ *ipt2 = reshape_dovi_iptpqc2(*ipt2, params); ++ *ipt3 = reshape_dovi_iptpqc2(*ipt3, params); ++ ++ ia1 = _mm_unpacklo_ps(yx4b, ux4b); ++ ia2 = _mm_unpackhi_ps(yx4b, ux4b); ++ ib1 = _mm_unpacklo_ps(vx4b, _mm_setzero_ps()); ++ ib2 = _mm_unpackhi_ps(vx4b, _mm_setzero_ps()); ++ ++ *ipt4 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(1, 0, 1, 0)); ++ *ipt5 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(3, 2, 3, 2)); ++ *ipt6 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(1, 0, 1, 0)); ++ *ipt7 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(3, 2, 3, 2)); ++ ++ *ipt4 = reshape_dovi_iptpqc2(*ipt4, params); ++ *ipt5 = reshape_dovi_iptpqc2(*ipt5, params); ++ *ipt6 = reshape_dovi_iptpqc2(*ipt6, params); ++ *ipt7 = reshape_dovi_iptpqc2(*ipt7, params); ++} ++ ++X86_64_V3 inline static void transpose_ipt8x4(__m128 ipt0, __m128 ipt1, __m128 ipt2, __m128 ipt3, ++ __m128 ipt4, __m128 ipt5, __m128 ipt6, __m128 ipt7, ++ __m256* ix8, __m256* px8, __m256* tx8) ++{ ++ __m256 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; ++ tmp0 = _mm256_castps128_ps256(ipt0); ++ tmp0 = _mm256_insertf128_ps(tmp0, ipt4, 1); ++ ++ tmp1 = _mm256_castps128_ps256(ipt1); ++ tmp1 = _mm256_insertf128_ps(tmp1, ipt5, 1); ++ ++ tmp2 = _mm256_castps128_ps256(ipt2); ++ tmp2 = _mm256_insertf128_ps(tmp2, ipt6, 1); ++ ++ tmp3 = _mm256_castps128_ps256(ipt3); ++ tmp3 = _mm256_insertf128_ps(tmp3, ipt7, 1); ++ ++ tmp4 = _mm256_unpacklo_ps(tmp0, tmp1); ++ tmp5 = _mm256_unpackhi_ps(tmp0, tmp1); ++ tmp6 = _mm256_unpacklo_ps(tmp2, tmp3); ++ tmp7 = _mm256_unpackhi_ps(tmp2, tmp3); ++ ++ *ix8 = _mm256_shuffle_ps(tmp4, tmp6, _MM_SHUFFLE(1, 0, 1, 0)); ++ *px8 = _mm256_shuffle_ps(tmp4, tmp6, _MM_SHUFFLE(3, 2, 3, 2)); ++ *tx8 = _mm256_shuffle_ps(tmp5, tmp7, _MM_SHUFFLE(1, 0, 1, 0)); ++} ++ ++X86_64_V3 static inline void tonemap_int32x8_avx(__m256i r_in, __m256i g_in, __m256i b_in, ++ int16_t *r_out, int16_t *g_out, int16_t *b_out, ++ float *lin_lut, float *tonemap_lut, uint16_t *delin_lut, ++ const AVLumaCoefficients *coeffs, ++ const AVLumaCoefficients *ocoeffs, double desat, ++ double (*rgb2rgb)[3][3], ++ int rgb2rgb_passthrough) ++{ ++ __m256i sig8; ++ __m256 mapvalx8, r_linx8, g_linx8, b_linx8; ++ __m256 offset = _mm256_set1_ps(0.5f); ++ __m256i zerox8 = _mm256_setzero_si256(); ++ __m256i input_lut_offset = _mm256_set1_epi32(2048); ++ __m256i upper_bound = _mm256_set1_epi32(32767); ++ __m256 intermediate_upper_bound = _mm256_set1_ps(32767.0f); ++ __m256i r, g, b, rx8, gx8, bx8; ++ ++ float mapval8[8], r_lin8[8], g_lin8[8], b_lin8[8]; ++ ++ sig8 = _mm256_max_epi32(r_in, _mm256_max_epi32(g_in, b_in)); ++ sig8 = _mm256_add_epi32(sig8, input_lut_offset); ++ sig8 = _mm256_min_epi32(sig8, upper_bound); ++ sig8 = _mm256_max_epi32(sig8, zerox8); ++ ++ r = _mm256_add_epi32(r_in, input_lut_offset); ++ r = _mm256_min_epi32(r, upper_bound); ++ r = _mm256_max_epi32(r, zerox8); ++ g = _mm256_add_epi32(g_in, input_lut_offset); ++ g = _mm256_min_epi32(g, upper_bound); ++ g = _mm256_max_epi32(g, zerox8); ++ b = _mm256_add_epi32(b_in, input_lut_offset); ++ b = _mm256_min_epi32(b, upper_bound); ++ b = _mm256_max_epi32(b, zerox8); ++ ++#define LOAD_LUT(i) mapval8[i] = tonemap_lut[_mm256_extract_epi32(sig8, i)]; \ ++r_lin8[i] = lin_lut[_mm256_extract_epi32(r, i)]; \ ++g_lin8[i] = lin_lut[_mm256_extract_epi32(g, i)]; \ ++b_lin8[i] = lin_lut[_mm256_extract_epi32(b, i)]; ++ ++ LOAD_LUT(0) ++ LOAD_LUT(1) ++ LOAD_LUT(2) ++ LOAD_LUT(3) ++ LOAD_LUT(4) ++ LOAD_LUT(5) ++ LOAD_LUT(6) ++ LOAD_LUT(7) ++ ++#undef LOAD_LUT ++ ++ mapvalx8 = _mm256_loadu_ps(mapval8); ++ r_linx8 = _mm256_loadu_ps(r_lin8); ++ g_linx8 = _mm256_loadu_ps(g_lin8); ++ b_linx8 = _mm256_loadu_ps(b_lin8); ++ ++ if (!rgb2rgb_passthrough) { ++ r_linx8 = _mm256_mul_ps(r_linx8, _mm256_set1_ps((float)(*rgb2rgb)[0][0])); ++ r_linx8 = _mm256_fmadd_ps(g_linx8, _mm256_set1_ps((float)(*rgb2rgb)[0][1]), r_linx8); ++ r_linx8 = _mm256_fmadd_ps(b_linx8, _mm256_set1_ps((float)(*rgb2rgb)[0][2]), r_linx8); ++ ++ g_linx8 = _mm256_mul_ps(g_linx8, _mm256_set1_ps((float)(*rgb2rgb)[1][1])); ++ g_linx8 = _mm256_fmadd_ps(r_linx8, _mm256_set1_ps((float)(*rgb2rgb)[1][0]), g_linx8); ++ g_linx8 = _mm256_fmadd_ps(b_linx8, _mm256_set1_ps((float)(*rgb2rgb)[1][2]), g_linx8); ++ ++ b_linx8 = _mm256_mul_ps(b_linx8, _mm256_set1_ps((float)(*rgb2rgb)[2][2])); ++ b_linx8 = _mm256_fmadd_ps(r_linx8, _mm256_set1_ps((float)(*rgb2rgb)[2][0]), b_linx8); ++ b_linx8 = _mm256_fmadd_ps(g_linx8, _mm256_set1_ps((float)(*rgb2rgb)[2][1]), b_linx8); ++ } ++ ++ if (desat > 0) { ++ __m256 eps_x8 = _mm256_set1_ps(FLOAT_EPS); ++ __m256 desat8 = _mm256_set1_ps((float)desat); ++ __m256 luma8 = _mm256_set1_ps(0); ++ __m256 overbright8; ++ ++ luma8 = _mm256_fmadd_ps(r_linx8, _mm256_set1_ps((float)av_q2d(coeffs->cr)), luma8); ++ luma8 = _mm256_fmadd_ps(g_linx8, _mm256_set1_ps((float)av_q2d(coeffs->cg)), luma8); ++ luma8 = _mm256_fmadd_ps(b_linx8, _mm256_set1_ps((float)av_q2d(coeffs->cb)), luma8); ++ overbright8 = _mm256_div_ps(_mm256_max_ps(_mm256_sub_ps(luma8, desat8), eps_x8), _mm256_max_ps(luma8, eps_x8)); ++ r_linx8 = _mm256_fnmadd_ps(r_linx8, overbright8, r_linx8); ++ r_linx8 = _mm256_fmadd_ps(luma8, overbright8, r_linx8); ++ g_linx8 = _mm256_fnmadd_ps(g_linx8, overbright8, g_linx8); ++ g_linx8 = _mm256_fmadd_ps(luma8, overbright8, g_linx8); ++ b_linx8 = _mm256_fnmadd_ps(b_linx8, overbright8, b_linx8); ++ b_linx8 = _mm256_fmadd_ps(luma8, overbright8, b_linx8); ++ } ++ ++ r_linx8 = _mm256_mul_ps(r_linx8, mapvalx8); ++ g_linx8 = _mm256_mul_ps(g_linx8, mapvalx8); ++ b_linx8 = _mm256_mul_ps(b_linx8, mapvalx8); ++ ++ r_linx8 = _mm256_fmadd_ps(r_linx8, intermediate_upper_bound, offset); ++ g_linx8 = _mm256_fmadd_ps(g_linx8, intermediate_upper_bound, offset); ++ b_linx8 = _mm256_fmadd_ps(b_linx8, intermediate_upper_bound, offset); ++ ++ rx8 = _mm256_cvttps_epi32(r_linx8); ++ rx8 = _mm256_min_epi32(rx8, upper_bound); ++ rx8 = _mm256_max_epi32(rx8, zerox8); ++ ++ gx8 = _mm256_cvttps_epi32(g_linx8); ++ gx8 = _mm256_min_epi32(gx8, upper_bound); ++ gx8 = _mm256_max_epi32(gx8, zerox8); ++ ++ bx8 = _mm256_cvttps_epi32(b_linx8); ++ bx8 = _mm256_min_epi32(bx8, upper_bound); ++ bx8 = _mm256_max_epi32(bx8, zerox8); ++ ++#define SAVE_COLOR(i) r_out[i] = delin_lut[_mm256_extract_epi32(rx8, i)]; \ ++g_out[i] = delin_lut[_mm256_extract_epi32(gx8, i)]; \ ++b_out[i] = delin_lut[_mm256_extract_epi32(bx8, i)]; ++ ++ SAVE_COLOR(0) ++ SAVE_COLOR(1) ++ SAVE_COLOR(2) ++ SAVE_COLOR(3) ++ SAVE_COLOR(4) ++ SAVE_COLOR(5) ++ SAVE_COLOR(6) ++ SAVE_COLOR(7) ++ ++#undef SAVE_COLOR ++} ++#endif // ENABLE_TONEMAPX_AVX_INTRINSICS ++ ++X86_64_V3 void tonemap_frame_dovi_2_420p_avx(uint8_t *dsty, uint8_t *dstu, uint8_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++#ifdef ENABLE_TONEMAPX_AVX_INTRINSICS ++ uint8_t *rdsty = dsty; ++ uint8_t *rdstu = dstu; ++ uint8_t *rdstv = dstv; ++ const uint16_t *rsrcy = srcy; ++ const uint16_t *rsrcu = srcu; ++ const uint16_t *rsrcv = srcv; ++ int rheight = height; ++ // not zero when not divisible by 16 ++ // intentionally leave last pixel emtpy when input is odd ++ int remainw = width & 14; ++ ++ const int in_depth = srcdepth; ++ const float in_rng = (float)((1 << in_depth) - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int16_t r[16], g[16], b[16]; ++ int16_t r1[16], g1[16], b1[16]; ++ ++ __m256i ux8, vx8; ++ __m256i y0x16, y1x16; ++ __m256i y0x8a, y0x8b, y1x8a, y1x8b, ux8a, ux8b, vx8a, vx8b; ++ __m256i r0x8a, g0x8a, b0x8a, r0x8b, g0x8b, b0x8b; ++ __m256i r1x8a, g1x8a, b1x8a, r1x8b, g1x8b, b1x8b; ++ ++ __m256i r0ox16, g0ox16, b0ox16; ++ __m256i y0ox16; ++ __m256i roax8, robx8, goax8, gobx8, boax8, bobx8; ++ __m256i yoax8, yobx8; ++ ++ __m256i r1ox16, g1ox16, b1ox16; ++ __m256i y1ox16; ++ __m256i r1oax8, r1obx8, g1oax8, g1obx8, b1oax8, b1obx8; ++ __m256i y1oax8, y1obx8; ++ __m256i uox8, vox8, ravgx8, gavgx8, bavgx8; ++ ++ __m128 ipt0, ipt1, ipt2, ipt3, ipt4, ipt5, ipt6, ipt7; ++ __m256 ix8, px8, tx8; ++ __m256 lx8, mx8, sx8; ++ __m256 rx8a, gx8a, bx8a, rx8b, gx8b, bx8b; ++ __m256 y0x8af, y0x8bf, y1x8af, y1x8bf, ux8af, ux8bf, vx8af, vx8bf; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0] * 2, dstu += dstlinesize[1], dstv += dstlinesize[2], ++ srcy += srclinesize[0], srcu += srclinesize[1] / 2, srcv += srclinesize[2] / 2) { ++ for (int xx = 0; xx < width >> 4; xx++) { ++ int x = xx << 4; ++ ++ y0x16 = _mm256_lddqu_si256((__m256i*)(srcy + x)); ++ y1x16 = _mm256_lddqu_si256((__m256i*)(srcy + (srclinesize[0] / 2 + x))); ++ ux8 = _mm256_cvtepi16_epi32(_mm_lddqu_si128((__m128i_u *)(srcu + (x >> 1)))); ++ vx8 = _mm256_cvtepi16_epi32(_mm_lddqu_si128((__m128i_u *)(srcv + (x >> 1)))); ++ ++ y0x8a = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y0x16, 0)); ++ y0x8b = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y0x16, 1)); ++ y1x8a = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y1x16, 0)); ++ y1x8b = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y1x16, 1)); ++ ++ ux8a = _mm256_permutevar8x32_epi32(ux8, _mm256_set_epi32(3, 3, 2, 2, 1, 1, 0, 0)); ++ ux8b = _mm256_permutevar8x32_epi32(ux8, _mm256_set_epi32(7, 7, 6, 6, 5, 5, 4, 4)); ++ vx8a = _mm256_permutevar8x32_epi32(vx8, _mm256_set_epi32(3, 3, 2, 2, 1, 1, 0, 0)); ++ vx8b = _mm256_permutevar8x32_epi32(vx8, _mm256_set_epi32(7, 7, 6, 6, 5, 5, 4, 4)); ++ ++ y0x8af = _mm256_cvtepi32_ps(y0x8a); ++ y0x8bf = _mm256_cvtepi32_ps(y0x8b); ++ y1x8af = _mm256_cvtepi32_ps(y1x8a); ++ y1x8bf = _mm256_cvtepi32_ps(y1x8b); ++ ux8af = _mm256_cvtepi32_ps(ux8a); ++ ux8bf = _mm256_cvtepi32_ps(ux8b); ++ vx8af = _mm256_cvtepi32_ps(vx8a); ++ vx8bf = _mm256_cvtepi32_ps(vx8b); ++ ++ y0x8af = _mm256_div_ps(y0x8af, _mm256_set1_ps(in_rng)); ++ y0x8bf = _mm256_div_ps(y0x8bf, _mm256_set1_ps(in_rng)); ++ y1x8af = _mm256_div_ps(y1x8af, _mm256_set1_ps(in_rng)); ++ y1x8bf = _mm256_div_ps(y1x8bf, _mm256_set1_ps(in_rng)); ++ ux8af = _mm256_div_ps(ux8af, _mm256_set1_ps(in_rng)); ++ ux8bf = _mm256_div_ps(ux8bf, _mm256_set1_ps(in_rng)); ++ vx8af = _mm256_div_ps(vx8af, _mm256_set1_ps(in_rng)); ++ vx8bf = _mm256_div_ps(vx8bf, _mm256_set1_ps(in_rng)); ++ ++ // Reshape y0x8a ++ reshapeiptx8(&ipt0, &ipt1, &ipt2, &ipt3, ++ &ipt4, &ipt5, &ipt6, &ipt7, ++ y0x8af, ux8af, vx8af, params); ++ ++ transpose_ipt8x4(ipt0, ipt1, ipt2, ipt3, ++ ipt4, ipt5, ipt6, ipt7, ++ &ix8, &px8, &tx8); ++ ++ ycc2rgbx8(&lx8, &mx8, &sx8, ix8, px8, tx8, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx8(&rx8a, &gx8a, &bx8a, lx8, mx8, sx8, *params->lms2rgb_matrix); ++ ++ rx8a = _mm256_mul_ps(rx8a, _mm256_set1_ps(28672.0f)); ++ gx8a = _mm256_mul_ps(gx8a, _mm256_set1_ps(28672.0f)); ++ bx8a = _mm256_mul_ps(bx8a, _mm256_set1_ps(28672.0f)); ++ ++ r0x8a = _mm256_cvtps_epi32(rx8a); ++ g0x8a = _mm256_cvtps_epi32(gx8a); ++ b0x8a = _mm256_cvtps_epi32(bx8a); ++ ++ // Reshape y1x8a ++ reshapeiptx8(&ipt0, &ipt1, &ipt2, &ipt3, ++ &ipt4, &ipt5, &ipt6, &ipt7, ++ y1x8af, ux8af, vx8af, params); ++ ++ transpose_ipt8x4(ipt0, ipt1, ipt2, ipt3, ++ ipt4, ipt5, ipt6, ipt7, ++ &ix8, &px8, &tx8); ++ ++ ycc2rgbx8(&lx8, &mx8, &sx8, ix8, px8, tx8, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx8(&rx8a, &gx8a, &bx8a, lx8, mx8, sx8, *params->lms2rgb_matrix); ++ ++ rx8a = _mm256_mul_ps(rx8a, _mm256_set1_ps(28672.0f)); ++ gx8a = _mm256_mul_ps(gx8a, _mm256_set1_ps(28672.0f)); ++ bx8a = _mm256_mul_ps(bx8a, _mm256_set1_ps(28672.0f)); ++ ++ r1x8a = _mm256_cvtps_epi32(rx8a); ++ g1x8a = _mm256_cvtps_epi32(gx8a); ++ b1x8a = _mm256_cvtps_epi32(bx8a); ++ ++ // Reshape y0x8b ++ reshapeiptx8(&ipt0, &ipt1, &ipt2, &ipt3, ++ &ipt4, &ipt5, &ipt6, &ipt7, ++ y0x8bf, ux8bf, vx8bf, params); ++ ++ transpose_ipt8x4(ipt0, ipt1, ipt2, ipt3, ++ ipt4, ipt5, ipt6, ipt7, ++ &ix8, &px8, &tx8); ++ ++ ycc2rgbx8(&lx8, &mx8, &sx8, ix8, px8, tx8, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx8(&rx8b, &gx8b, &bx8b, lx8, mx8, sx8, *params->lms2rgb_matrix); ++ ++ rx8b = _mm256_mul_ps(rx8b, _mm256_set1_ps(28672.0f)); ++ gx8b = _mm256_mul_ps(gx8b, _mm256_set1_ps(28672.0f)); ++ bx8b = _mm256_mul_ps(bx8b, _mm256_set1_ps(28672.0f)); ++ ++ r0x8b = _mm256_cvtps_epi32(rx8b); ++ g0x8b = _mm256_cvtps_epi32(gx8b); ++ b0x8b = _mm256_cvtps_epi32(bx8b); ++ ++ // Reshape y1x8b ++ reshapeiptx8(&ipt0, &ipt1, &ipt2, &ipt3, ++ &ipt4, &ipt5, &ipt6, &ipt7, ++ y1x8bf, ux8bf, vx8bf, params); ++ ++ transpose_ipt8x4(ipt0, ipt1, ipt2, ipt3, ++ ipt4, ipt5, ipt6, ipt7, ++ &ix8, &px8, &tx8); ++ ++ ycc2rgbx8(&lx8, &mx8, &sx8, ix8, px8, tx8, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx8(&rx8b, &gx8b, &bx8b, lx8, mx8, sx8, *params->lms2rgb_matrix); ++ ++ rx8b = _mm256_mul_ps(rx8b, _mm256_set1_ps(28672.0f)); ++ gx8b = _mm256_mul_ps(gx8b, _mm256_set1_ps(28672.0f)); ++ bx8b = _mm256_mul_ps(bx8b, _mm256_set1_ps(28672.0f)); ++ ++ r1x8b = _mm256_cvtps_epi32(rx8b); ++ g1x8b = _mm256_cvtps_epi32(gx8b); ++ b1x8b = _mm256_cvtps_epi32(bx8b); ++ ++ tonemap_int32x8_avx(r0x8a, g0x8a, b0x8a, r, g, b, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x8_avx(r1x8a, g1x8a, b1x8a, r1, g1, b1, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x8_avx(r0x8b, g0x8b, b0x8b, &r[8], &g[8], &b[8], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x8_avx(r1x8b, g1x8b, b1x8b, &r1[8], &g1[8], &b1[8], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ ++ r0ox16 = _mm256_lddqu_si256((const __m256i_u *)r); ++ g0ox16 = _mm256_lddqu_si256((const __m256i_u *)g); ++ b0ox16 = _mm256_lddqu_si256((const __m256i_u *)b); ++ ++ roax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r0ox16, 0)); ++ goax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g0ox16, 0)); ++ boax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b0ox16, 0)); ++ ++ robx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r0ox16, 1)); ++ gobx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g0ox16, 1)); ++ bobx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b0ox16, 1)); ++ ++ yoax8 = _mm256_mullo_epi32(roax8, _mm256_set1_epi32(cry)); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_mullo_epi32(goax8, _mm256_set1_epi32(cgy))); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_mullo_epi32(boax8, _mm256_set1_epi32(cby))); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_set1_epi32(out_rnd)); ++ yoax8 = _mm256_srai_epi32(yoax8, out_sh); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ yobx8 = _mm256_mullo_epi32(robx8, _mm256_set1_epi32(cry)); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_mullo_epi32(gobx8, _mm256_set1_epi32(cgy))); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_mullo_epi32(bobx8, _mm256_set1_epi32(cby))); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_set1_epi32(out_rnd)); ++ yobx8 = _mm256_srai_epi32(yobx8, out_sh); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ y0ox16 = _mm256_packs_epi32(yoax8, yobx8); ++ y0ox16 = _mm256_permute4x64_epi64(y0ox16, _MM_SHUFFLE(3, 1, 2, 0)); ++ _mm_storeu_si128((__m128i_u *) &dsty[x], _mm256_castsi256_si128(_mm256_permute4x64_epi64(_mm256_packus_epi16(y0ox16, _mm256_setzero_si256()), _MM_SHUFFLE(3, 1, 2, 0)))); ++ ++ r1ox16 = _mm256_lddqu_si256((const __m256i_u *)r1); ++ g1ox16 = _mm256_lddqu_si256((const __m256i_u *)g1); ++ b1ox16 = _mm256_lddqu_si256((const __m256i_u *)b1); ++ ++ r1oax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r1ox16, 0)); ++ g1oax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g1ox16, 0)); ++ b1oax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b1ox16, 0)); ++ ++ r1obx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r1ox16, 1)); ++ g1obx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g1ox16, 1)); ++ b1obx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b1ox16, 1)); ++ ++ y1oax8 = _mm256_mullo_epi32(r1oax8, _mm256_set1_epi32(cry)); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_mullo_epi32(g1oax8, _mm256_set1_epi32(cgy))); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_mullo_epi32(b1oax8, _mm256_set1_epi32(cby))); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_set1_epi32(out_rnd)); ++ y1oax8 = _mm256_srai_epi32(y1oax8, out_sh); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ y1obx8 = _mm256_mullo_epi32(r1obx8, _mm256_set1_epi32(cry)); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_mullo_epi32(g1obx8, _mm256_set1_epi32(cgy))); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_mullo_epi32(b1obx8, _mm256_set1_epi32(cby))); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_set1_epi32(out_rnd)); ++ y1obx8 = _mm256_srai_epi32(y1obx8, out_sh); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ y1ox16 = _mm256_packs_epi32(y1oax8, y1obx8); ++ y1ox16 = _mm256_permute4x64_epi64(y1ox16, _MM_SHUFFLE(3, 1, 2, 0)); ++ _mm_storeu_si128((__m128i_u *) &dsty[x + dstlinesize[0]], _mm256_castsi256_si128(_mm256_permute4x64_epi64(_mm256_packus_epi16(y1ox16, _mm256_setzero_si256()), _MM_SHUFFLE(3, 1, 2, 0)))); ++ ++ ravgx8 = _mm256_hadd_epi32(roax8, robx8); ++ ravgx8 = _mm256_add_epi32(ravgx8, _mm256_hadd_epi32(r1oax8, r1obx8)); ++ ravgx8 = _mm256_permute4x64_epi64(ravgx8, _MM_SHUFFLE(3, 1, 2, 0)); ++ ravgx8 = _mm256_add_epi32(ravgx8, _mm256_set1_epi32(2)); ++ ravgx8 = _mm256_srai_epi32(ravgx8, 2); ++ ++ gavgx8 = _mm256_hadd_epi32(goax8, gobx8); ++ gavgx8 = _mm256_add_epi32(gavgx8, _mm256_hadd_epi32(g1oax8, g1obx8)); ++ gavgx8 = _mm256_permute4x64_epi64(gavgx8, _MM_SHUFFLE(3, 1, 2, 0)); ++ gavgx8 = _mm256_add_epi32(gavgx8, _mm256_set1_epi32(2)); ++ gavgx8 = _mm256_srai_epi32(gavgx8, 2); ++ ++ bavgx8 = _mm256_hadd_epi32(boax8, bobx8); ++ bavgx8 = _mm256_add_epi32(bavgx8, _mm256_hadd_epi32(b1oax8, b1obx8)); ++ bavgx8 = _mm256_permute4x64_epi64(bavgx8, _MM_SHUFFLE(3, 1, 2, 0)); ++ bavgx8 = _mm256_add_epi32(bavgx8, _mm256_set1_epi32(2)); ++ bavgx8 = _mm256_srai_epi32(bavgx8, 2); ++ ++ uox8 = _mm256_add_epi32(_mm256_set1_epi32(out_rnd), _mm256_mullo_epi32(ravgx8, _mm256_set1_epi32(cru))); ++ uox8 = _mm256_add_epi32(uox8, _mm256_mullo_epi32(gavgx8, _mm256_set1_epi32(ocgu))); ++ uox8 = _mm256_add_epi32(uox8, _mm256_mullo_epi32(bavgx8, _mm256_set1_epi32(cburv))); ++ uox8 = _mm256_srai_epi32(uox8, out_sh); ++ uox8 = _mm256_add_epi32(uox8, _mm256_set1_epi32(out_uv_offset)); ++ uox8 = _mm256_packs_epi32(uox8, _mm256_setzero_si256()); ++ uox8 = _mm256_permute4x64_epi64(uox8, _MM_SHUFFLE(3, 1, 2, 0)); ++ uox8 = _mm256_packus_epi16(uox8, _mm256_setzero_si256()); ++ _mm_storeu_si64(&dstu[x >> 1], _mm256_castsi256_si128(uox8)); ++ ++ vox8 = _mm256_add_epi32(_mm256_set1_epi32(out_rnd), _mm256_mullo_epi32(ravgx8, _mm256_set1_epi32(cburv))); ++ vox8 = _mm256_add_epi32(vox8, _mm256_mullo_epi32(gavgx8, _mm256_set1_epi32(ocgv))); ++ vox8 = _mm256_add_epi32(vox8, _mm256_mullo_epi32(bavgx8, _mm256_set1_epi32(cbv))); ++ vox8 = _mm256_srai_epi32(vox8, out_sh); ++ vox8 = _mm256_add_epi32(vox8, _mm256_set1_epi32(out_uv_offset)); ++ vox8 = _mm256_packs_epi32(vox8, _mm256_setzero_si256()); ++ vox8 = _mm256_permute4x64_epi64(vox8, _MM_SHUFFLE(3, 1, 2, 0)); ++ vox8 = _mm256_packus_epi16(vox8, _mm256_setzero_si256()); ++ _mm_storeu_si64(&dstv[x >> 1], _mm256_castsi256_si128(vox8)); ++ } ++ } ++ ++ // Process remaining pixels cannot fill the full simd register with scalar version ++ if (remainw) { ++ int offset = width & (int)0xfffffff0; ++ rdsty += offset; ++ rdstu += offset >> 1; ++ rdstv += offset >> 1; ++ rsrcy += offset; ++ rsrcu += offset >> 1; ++ rsrcv += offset >> 1; ++ tonemap_frame_dovi_2_420p(rdsty, rdstu, rdstv, ++ rsrcy, rsrcu, rsrcv, ++ dstlinesize, srclinesize, ++ dstdepth, srcdepth, ++ remainw, rheight, params); ++ } ++#endif // ENABLE_TONEMAPX_AVX_INTRINSICS ++} ++ ++X86_64_V3 void tonemap_frame_dovi_2_420p10_avx(uint16_t *dsty, uint16_t *dstu, uint16_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++#ifdef ENABLE_TONEMAPX_AVX_INTRINSICS ++ uint16_t *rdsty = dsty; ++ uint16_t *rdstu = dstu; ++ uint16_t *rdstv = dstv; ++ const uint16_t *rsrcy = srcy; ++ const uint16_t *rsrcu = srcu; ++ const uint16_t *rsrcv = srcv; ++ int rheight = height; ++ // not zero when not divisible by 8 ++ // intentionally leave last pixel emtpy when input is odd ++ int remainw = width & 14; ++ ++ const int in_depth = srcdepth; ++ const float in_rng = (float)((1 << in_depth) - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int16_t r[16], g[16], b[16]; ++ int16_t r1[16], g1[16], b1[16]; ++ ++ __m256i ux8, vx8; ++ __m256i y0x16, y1x16; ++ __m256i y0x8a, y0x8b, y1x8a, y1x8b, ux8a, ux8b, vx8a, vx8b; ++ __m256i r0x8a, g0x8a, b0x8a, r0x8b, g0x8b, b0x8b; ++ __m256i r1x8a, g1x8a, b1x8a, r1x8b, g1x8b, b1x8b; ++ ++ __m256i r0ox16, g0ox16, b0ox16; ++ __m256i y0ox16; ++ __m256i roax8, robx8, goax8, gobx8, boax8, bobx8; ++ __m256i yoax8, yobx8; ++ ++ __m256i r1ox16, g1ox16, b1ox16; ++ __m256i y1ox16; ++ __m256i r1oax8, r1obx8, g1oax8, g1obx8, b1oax8, b1obx8; ++ __m256i y1oax8, y1obx8; ++ __m256i uox8, vox8, ravgx8, gavgx8, bavgx8; ++ ++ __m128 ipt0, ipt1, ipt2, ipt3, ipt4, ipt5, ipt6, ipt7; ++ __m256 ix8, px8, tx8; ++ __m256 lx8, mx8, sx8; ++ __m256 rx8a, gx8a, bx8a, rx8b, gx8b, bx8b; ++ __m256 y0x8af, y0x8bf, y1x8af, y1x8bf, ux8af, ux8bf, vx8af, vx8bf; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0], dstu += dstlinesize[1] / 2, dstv += dstlinesize[1] / 2, ++ srcy += srclinesize[0], srcu += srclinesize[1] / 2, srcv += srclinesize[1] / 2) { ++ for (int xx = 0; xx < width >> 4; xx++) { ++ int x = xx << 4; ++ ++ y0x16 = _mm256_lddqu_si256((__m256i*)(srcy + x)); ++ y1x16 = _mm256_lddqu_si256((__m256i*)(srcy + (srclinesize[0] / 2 + x))); ++ ux8 = _mm256_cvtepi16_epi32(_mm_lddqu_si128((__m128i_u *)(srcu + (x >> 1)))); ++ vx8 = _mm256_cvtepi16_epi32(_mm_lddqu_si128((__m128i_u *)(srcv + (x >> 1)))); ++ ++ y0x8a = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y0x16, 0)); ++ y0x8b = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y0x16, 1)); ++ y1x8a = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y1x16, 0)); ++ y1x8b = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y1x16, 1)); ++ ++ ux8a = _mm256_permutevar8x32_epi32(ux8, _mm256_set_epi32(3, 3, 2, 2, 1, 1, 0, 0)); ++ ux8b = _mm256_permutevar8x32_epi32(ux8, _mm256_set_epi32(7, 7, 6, 6, 5, 5, 4, 4)); ++ vx8a = _mm256_permutevar8x32_epi32(vx8, _mm256_set_epi32(3, 3, 2, 2, 1, 1, 0, 0)); ++ vx8b = _mm256_permutevar8x32_epi32(vx8, _mm256_set_epi32(7, 7, 6, 6, 5, 5, 4, 4)); ++ ++ y0x8af = _mm256_cvtepi32_ps(y0x8a); ++ y0x8bf = _mm256_cvtepi32_ps(y0x8b); ++ y1x8af = _mm256_cvtepi32_ps(y1x8a); ++ y1x8bf = _mm256_cvtepi32_ps(y1x8b); ++ ux8af = _mm256_cvtepi32_ps(ux8a); ++ ux8bf = _mm256_cvtepi32_ps(ux8b); ++ vx8af = _mm256_cvtepi32_ps(vx8a); ++ vx8bf = _mm256_cvtepi32_ps(vx8b); ++ ++ y0x8af = _mm256_div_ps(y0x8af, _mm256_set1_ps(in_rng)); ++ y0x8bf = _mm256_div_ps(y0x8bf, _mm256_set1_ps(in_rng)); ++ y1x8af = _mm256_div_ps(y1x8af, _mm256_set1_ps(in_rng)); ++ y1x8bf = _mm256_div_ps(y1x8bf, _mm256_set1_ps(in_rng)); ++ ux8af = _mm256_div_ps(ux8af, _mm256_set1_ps(in_rng)); ++ ux8bf = _mm256_div_ps(ux8bf, _mm256_set1_ps(in_rng)); ++ vx8af = _mm256_div_ps(vx8af, _mm256_set1_ps(in_rng)); ++ vx8bf = _mm256_div_ps(vx8bf, _mm256_set1_ps(in_rng)); ++ ++ // Reshape y0x8a ++ reshapeiptx8(&ipt0, &ipt1, &ipt2, &ipt3, ++ &ipt4, &ipt5, &ipt6, &ipt7, ++ y0x8af, ux8af, vx8af, params); ++ ++ transpose_ipt8x4(ipt0, ipt1, ipt2, ipt3, ++ ipt4, ipt5, ipt6, ipt7, ++ &ix8, &px8, &tx8); ++ ++ ycc2rgbx8(&lx8, &mx8, &sx8, ix8, px8, tx8, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx8(&rx8a, &gx8a, &bx8a, lx8, mx8, sx8, *params->lms2rgb_matrix); ++ ++ rx8a = _mm256_mul_ps(rx8a, _mm256_set1_ps(28672.0f)); ++ gx8a = _mm256_mul_ps(gx8a, _mm256_set1_ps(28672.0f)); ++ bx8a = _mm256_mul_ps(bx8a, _mm256_set1_ps(28672.0f)); ++ ++ r0x8a = _mm256_cvtps_epi32(rx8a); ++ g0x8a = _mm256_cvtps_epi32(gx8a); ++ b0x8a = _mm256_cvtps_epi32(bx8a); ++ ++ // Reshape y1x8a ++ reshapeiptx8(&ipt0, &ipt1, &ipt2, &ipt3, ++ &ipt4, &ipt5, &ipt6, &ipt7, ++ y1x8af, ux8af, vx8af, params); ++ ++ transpose_ipt8x4(ipt0, ipt1, ipt2, ipt3, ++ ipt4, ipt5, ipt6, ipt7, ++ &ix8, &px8, &tx8); ++ ++ ycc2rgbx8(&lx8, &mx8, &sx8, ix8, px8, tx8, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx8(&rx8a, &gx8a, &bx8a, lx8, mx8, sx8, *params->lms2rgb_matrix); ++ ++ rx8a = _mm256_mul_ps(rx8a, _mm256_set1_ps(28672.0f)); ++ gx8a = _mm256_mul_ps(gx8a, _mm256_set1_ps(28672.0f)); ++ bx8a = _mm256_mul_ps(bx8a, _mm256_set1_ps(28672.0f)); ++ ++ r1x8a = _mm256_cvtps_epi32(rx8a); ++ g1x8a = _mm256_cvtps_epi32(gx8a); ++ b1x8a = _mm256_cvtps_epi32(bx8a); ++ ++ // Reshape y0x8b ++ reshapeiptx8(&ipt0, &ipt1, &ipt2, &ipt3, ++ &ipt4, &ipt5, &ipt6, &ipt7, ++ y0x8bf, ux8bf, vx8bf, params); ++ ++ transpose_ipt8x4(ipt0, ipt1, ipt2, ipt3, ++ ipt4, ipt5, ipt6, ipt7, ++ &ix8, &px8, &tx8); ++ ++ ycc2rgbx8(&lx8, &mx8, &sx8, ix8, px8, tx8, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx8(&rx8b, &gx8b, &bx8b, lx8, mx8, sx8, *params->lms2rgb_matrix); ++ ++ rx8b = _mm256_mul_ps(rx8b, _mm256_set1_ps(28672.0f)); ++ gx8b = _mm256_mul_ps(gx8b, _mm256_set1_ps(28672.0f)); ++ bx8b = _mm256_mul_ps(bx8b, _mm256_set1_ps(28672.0f)); ++ ++ r0x8b = _mm256_cvtps_epi32(rx8b); ++ g0x8b = _mm256_cvtps_epi32(gx8b); ++ b0x8b = _mm256_cvtps_epi32(bx8b); ++ ++ // Reshape y1x8b ++ reshapeiptx8(&ipt0, &ipt1, &ipt2, &ipt3, ++ &ipt4, &ipt5, &ipt6, &ipt7, ++ y1x8bf, ux8bf, vx8bf, params); ++ ++ transpose_ipt8x4(ipt0, ipt1, ipt2, ipt3, ++ ipt4, ipt5, ipt6, ipt7, ++ &ix8, &px8, &tx8); ++ ++ ycc2rgbx8(&lx8, &mx8, &sx8, ix8, px8, tx8, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx8(&rx8b, &gx8b, &bx8b, lx8, mx8, sx8, *params->lms2rgb_matrix); ++ ++ rx8b = _mm256_mul_ps(rx8b, _mm256_set1_ps(28672.0f)); ++ gx8b = _mm256_mul_ps(gx8b, _mm256_set1_ps(28672.0f)); ++ bx8b = _mm256_mul_ps(bx8b, _mm256_set1_ps(28672.0f)); ++ ++ r1x8b = _mm256_cvtps_epi32(rx8b); ++ g1x8b = _mm256_cvtps_epi32(gx8b); ++ b1x8b = _mm256_cvtps_epi32(bx8b); ++ ++ tonemap_int32x8_avx(r0x8a, g0x8a, b0x8a, r, g, b, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x8_avx(r1x8a, g1x8a, b1x8a, r1, g1, b1, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x8_avx(r0x8b, g0x8b, b0x8b, &r[8], &g[8], &b[8], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x8_avx(r1x8b, g1x8b, b1x8b, &r1[8], &g1[8], &b1[8], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ ++ r0ox16 = _mm256_lddqu_si256((const __m256i_u *)r); ++ g0ox16 = _mm256_lddqu_si256((const __m256i_u *)g); ++ b0ox16 = _mm256_lddqu_si256((const __m256i_u *)b); ++ ++ roax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r0ox16, 0)); ++ goax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g0ox16, 0)); ++ boax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b0ox16, 0)); ++ ++ robx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r0ox16, 1)); ++ gobx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g0ox16, 1)); ++ bobx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b0ox16, 1)); ++ ++ yoax8 = _mm256_mullo_epi32(roax8, _mm256_set1_epi32(cry)); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_mullo_epi32(goax8, _mm256_set1_epi32(cgy))); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_mullo_epi32(boax8, _mm256_set1_epi32(cby))); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_set1_epi32(out_rnd)); ++ yoax8 = _mm256_srai_epi32(yoax8, out_sh); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ yobx8 = _mm256_mullo_epi32(robx8, _mm256_set1_epi32(cry)); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_mullo_epi32(gobx8, _mm256_set1_epi32(cgy))); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_mullo_epi32(bobx8, _mm256_set1_epi32(cby))); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_set1_epi32(out_rnd)); ++ yobx8 = _mm256_srai_epi32(yobx8, out_sh); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ y0ox16 = _mm256_packus_epi32(yoax8, yobx8); ++ y0ox16 = _mm256_permute4x64_epi64(y0ox16, _MM_SHUFFLE(3, 1, 2, 0)); ++ _mm256_storeu_si256((__m256i_u *) &dsty[x], y0ox16); ++ ++ r1ox16 = _mm256_lddqu_si256((const __m256i_u *)r1); ++ g1ox16 = _mm256_lddqu_si256((const __m256i_u *)g1); ++ b1ox16 = _mm256_lddqu_si256((const __m256i_u *)b1); ++ ++ r1oax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r1ox16, 0)); ++ g1oax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g1ox16, 0)); ++ b1oax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b1ox16, 0)); ++ ++ r1obx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r1ox16, 1)); ++ g1obx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g1ox16, 1)); ++ b1obx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b1ox16, 1)); ++ ++ y1oax8 = _mm256_mullo_epi32(r1oax8, _mm256_set1_epi32(cry)); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_mullo_epi32(g1oax8, _mm256_set1_epi32(cgy))); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_mullo_epi32(b1oax8, _mm256_set1_epi32(cby))); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_set1_epi32(out_rnd)); ++ y1oax8 = _mm256_srai_epi32(y1oax8, out_sh); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ y1obx8 = _mm256_mullo_epi32(r1obx8, _mm256_set1_epi32(cry)); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_mullo_epi32(g1obx8, _mm256_set1_epi32(cgy))); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_mullo_epi32(b1obx8, _mm256_set1_epi32(cby))); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_set1_epi32(out_rnd)); ++ y1obx8 = _mm256_srai_epi32(y1obx8, out_sh); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ y1ox16 = _mm256_packus_epi32(y1oax8, y1obx8); ++ y1ox16 = _mm256_permute4x64_epi64(y1ox16, _MM_SHUFFLE(3, 1, 2, 0)); ++ _mm256_storeu_si256((__m256i_u *) &dsty[x + dstlinesize[0] / 2], y1ox16); ++ ++ ravgx8 = _mm256_hadd_epi32(roax8, robx8); ++ ravgx8 = _mm256_add_epi32(ravgx8, _mm256_hadd_epi32(r1oax8, r1obx8)); ++ ravgx8 = _mm256_permute4x64_epi64(ravgx8, _MM_SHUFFLE(3, 1, 2, 0)); ++ ravgx8 = _mm256_add_epi32(ravgx8, _mm256_set1_epi32(2)); ++ ravgx8 = _mm256_srai_epi32(ravgx8, 2); ++ ++ gavgx8 = _mm256_hadd_epi32(goax8, gobx8); ++ gavgx8 = _mm256_add_epi32(gavgx8, _mm256_hadd_epi32(g1oax8, g1obx8)); ++ gavgx8 = _mm256_permute4x64_epi64(gavgx8, _MM_SHUFFLE(3, 1, 2, 0)); ++ gavgx8 = _mm256_add_epi32(gavgx8, _mm256_set1_epi32(2)); ++ gavgx8 = _mm256_srai_epi32(gavgx8, 2); ++ ++ bavgx8 = _mm256_hadd_epi32(boax8, bobx8); ++ bavgx8 = _mm256_add_epi32(bavgx8, _mm256_hadd_epi32(b1oax8, b1obx8)); ++ bavgx8 = _mm256_permute4x64_epi64(bavgx8, _MM_SHUFFLE(3, 1, 2, 0)); ++ bavgx8 = _mm256_add_epi32(bavgx8, _mm256_set1_epi32(2)); ++ bavgx8 = _mm256_srai_epi32(bavgx8, 2); ++ ++ uox8 = _mm256_add_epi32(_mm256_set1_epi32(out_rnd), _mm256_mullo_epi32(ravgx8, _mm256_set1_epi32(cru))); ++ uox8 = _mm256_add_epi32(uox8, _mm256_mullo_epi32(gavgx8, _mm256_set1_epi32(ocgu))); ++ uox8 = _mm256_add_epi32(uox8, _mm256_mullo_epi32(bavgx8, _mm256_set1_epi32(cburv))); ++ uox8 = _mm256_srai_epi32(uox8, out_sh); ++ uox8 = _mm256_add_epi32(uox8, _mm256_set1_epi32(out_uv_offset)); ++ uox8 = _mm256_packus_epi32(uox8, _mm256_setzero_si256()); ++ uox8 = _mm256_permute4x64_epi64(uox8, _MM_SHUFFLE(3, 1, 2, 0)); ++ _mm_storeu_si128((__m128i_u *) &dstu[x >> 1], _mm256_castsi256_si128(uox8)); ++ ++ vox8 = _mm256_add_epi32(_mm256_set1_epi32(out_rnd), _mm256_mullo_epi32(ravgx8, _mm256_set1_epi32(cburv))); ++ vox8 = _mm256_add_epi32(vox8, _mm256_mullo_epi32(gavgx8, _mm256_set1_epi32(ocgv))); ++ vox8 = _mm256_add_epi32(vox8, _mm256_mullo_epi32(bavgx8, _mm256_set1_epi32(cbv))); ++ vox8 = _mm256_srai_epi32(vox8, out_sh); ++ vox8 = _mm256_add_epi32(vox8, _mm256_set1_epi32(out_uv_offset)); ++ vox8 = _mm256_packus_epi32(vox8, _mm256_setzero_si256()); ++ vox8 = _mm256_permute4x64_epi64(vox8, _MM_SHUFFLE(3, 1, 2, 0)); ++ _mm_storeu_si128((__m128i_u *) &dstv[x >> 1], _mm256_castsi256_si128(vox8)); ++ } ++ } ++ ++ // Process remaining pixels cannot fill the full simd register with scalar version ++ if (remainw) { ++ int offset = width & (int)0xfffffff0; ++ rdsty += offset; ++ rdstu += offset >> 1; ++ rdstv += offset >> 1; ++ rsrcy += offset; ++ rsrcu += offset >> 1; ++ rsrcv += offset >> 1; ++ tonemap_frame_dovi_2_420p10(rdsty, rdstu, rdstv, ++ rsrcy, rsrcu, rsrcv, ++ dstlinesize, srclinesize, ++ dstdepth, srcdepth, ++ remainw, rheight, params); ++ } ++#endif // ENABLE_TONEMAPX_AVX_INTRINSICS ++} ++ ++X86_64_V3 void tonemap_frame_420p10_2_420p_avx(uint8_t *dsty, uint8_t *dstu, uint8_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++#ifdef ENABLE_TONEMAPX_AVX_INTRINSICS ++ uint8_t *rdsty = dsty; ++ uint8_t *rdstu = dstu; ++ uint8_t *rdstv = dstv; ++ const uint16_t *rsrcy = srcy; ++ const uint16_t *rsrcu = srcu; ++ const uint16_t *rsrcv = srcv; ++ int rheight = height; ++ // not zero when not divisible by 16 ++ // intentionally leave last pixel emtpy when input is odd ++ int remainw = width & 14; ++ ++ const int in_depth = srcdepth; ++ const int in_uv_offset = 128 << (in_depth - 8); ++ const int in_sh = in_depth - 1; ++ const int in_rnd = 1 << (in_sh - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cy = (*params->yuv2rgb_coeffs)[0][0][0]; ++ int crv = (*params->yuv2rgb_coeffs)[0][2][0]; ++ int cgu = (*params->yuv2rgb_coeffs)[1][1][0]; ++ int cgv = (*params->yuv2rgb_coeffs)[1][2][0]; ++ int cbu = (*params->yuv2rgb_coeffs)[2][1][0]; ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int16_t r[16], g[16], b[16]; ++ int16_t r1[16], g1[16], b1[16]; ++ __m256i in_yuv_offx8 = _mm256_set1_epi32(params->in_yuv_off); ++ __m256i in_uv_offx8 = _mm256_set1_epi32(in_uv_offset); ++ __m256i cyx8 = _mm256_set1_epi32(cy); ++ __m256i rndx8 = _mm256_set1_epi32(in_rnd); ++ ++ __m256i ux8, vx8; ++ __m256i y0x16, y1x16; ++ __m256i y0x8a, y0x8b, y1x8a, y1x8b, ux8a, ux8b, vx8a, vx8b; ++ __m256i r0x8a, g0x8a, b0x8a, r0x8b, g0x8b, b0x8b; ++ __m256i r1x8a, g1x8a, b1x8a, r1x8b, g1x8b, b1x8b; ++ ++ __m256i r0ox16, g0ox16, b0ox16; ++ __m256i y0ox16; ++ __m256i roax8, robx8, goax8, gobx8, boax8, bobx8; ++ __m256i yoax8, yobx8; ++ ++ __m256i r1ox16, g1ox16, b1ox16; ++ __m256i y1ox16; ++ __m256i r1oax8, r1obx8, g1oax8, g1obx8, b1oax8, b1obx8; ++ __m256i y1oax8, y1obx8; ++ __m256i uox8, vox8, ravgx8, gavgx8, bavgx8; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0] * 2, dstu += dstlinesize[1], dstv += dstlinesize[2], ++ srcy += srclinesize[0], srcu += srclinesize[1] / 2, srcv += srclinesize[2] / 2) { ++ for (int xx = 0; xx < width >> 4; xx++) { ++ int x = xx << 4; ++ ++ y0x16 = _mm256_lddqu_si256((__m256i*)(srcy + x)); ++ y1x16 = _mm256_lddqu_si256((__m256i*)(srcy + (srclinesize[0] / 2 + x))); ++ ux8 = _mm256_cvtepi16_epi32(_mm_lddqu_si128((__m128i_u *)(srcu + (x >> 1)))); ++ vx8 = _mm256_cvtepi16_epi32(_mm_lddqu_si128((__m128i_u *)(srcv + (x >> 1)))); ++ ++ y0x8a = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y0x16, 0)); ++ y0x8b = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y0x16, 1)); ++ y1x8a = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y1x16, 0)); ++ y1x8b = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y1x16, 1)); ++ ++ y0x8a = _mm256_sub_epi32(y0x8a, in_yuv_offx8); ++ y1x8a = _mm256_sub_epi32(y1x8a, in_yuv_offx8); ++ y0x8b = _mm256_sub_epi32(y0x8b, in_yuv_offx8); ++ y1x8b = _mm256_sub_epi32(y1x8b, in_yuv_offx8); ++ ux8 = _mm256_sub_epi32(ux8, in_uv_offx8); ++ vx8 = _mm256_sub_epi32(vx8, in_uv_offx8); ++ ++ ux8a = _mm256_permutevar8x32_epi32(ux8, _mm256_set_epi32(3, 3, 2, 2, 1, 1, 0, 0)); ++ ux8b = _mm256_permutevar8x32_epi32(ux8, _mm256_set_epi32(7, 7, 6, 6, 5, 5, 4, 4)); ++ vx8a = _mm256_permutevar8x32_epi32(vx8, _mm256_set_epi32(3, 3, 2, 2, 1, 1, 0, 0)); ++ vx8b = _mm256_permutevar8x32_epi32(vx8, _mm256_set_epi32(7, 7, 6, 6, 5, 5, 4, 4)); ++ ++ // r = av_clip_int16((y * cy + crv * v + in_rnd) >> in_sh); ++ r0x8a = g0x8a = b0x8a = _mm256_mullo_epi32(y0x8a, cyx8); ++ r0x8a = _mm256_add_epi32(r0x8a, _mm256_mullo_epi32(vx8a, _mm256_set1_epi32(crv))); ++ r0x8a = _mm256_add_epi32(r0x8a, rndx8); ++ r0x8a = _mm256_srai_epi32(r0x8a, in_sh); ++ r0x8a = av_clip_int16_avx(r0x8a); ++ ++ r1x8a = g1x8a = b1x8a = _mm256_mullo_epi32(y1x8a, cyx8); ++ r1x8a = _mm256_add_epi32(r1x8a, _mm256_mullo_epi32(vx8a, _mm256_set1_epi32(crv))); ++ r1x8a = _mm256_add_epi32(r1x8a, rndx8); ++ r1x8a = _mm256_srai_epi32(r1x8a, in_sh); ++ r1x8a = av_clip_int16_avx(r1x8a); ++ ++ // g = av_clip_int16((y * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g0x8a = _mm256_add_epi32(g0x8a, _mm256_mullo_epi32(ux8a, _mm256_set1_epi32(cgu))); ++ g0x8a = _mm256_add_epi32(g0x8a, _mm256_mullo_epi32(vx8a, _mm256_set1_epi32(cgv))); ++ g0x8a = _mm256_add_epi32(g0x8a, rndx8); ++ g0x8a = _mm256_srai_epi32(g0x8a, in_sh); ++ g0x8a = av_clip_int16_avx(g0x8a); ++ ++ g1x8a = _mm256_add_epi32(g1x8a, _mm256_mullo_epi32(ux8a, _mm256_set1_epi32(cgu))); ++ g1x8a = _mm256_add_epi32(g1x8a, _mm256_mullo_epi32(vx8a, _mm256_set1_epi32(cgv))); ++ g1x8a = _mm256_add_epi32(g1x8a, rndx8); ++ g1x8a = _mm256_srai_epi32(g1x8a, in_sh); ++ g1x8a = av_clip_int16_avx(g1x8a); ++ ++ // b = av_clip_int16((y * cy + cbu * u + in_rnd) >> in_sh); ++ b0x8a = _mm256_add_epi32(b0x8a, _mm256_mullo_epi32(ux8a, _mm256_set1_epi32(cbu))); ++ b0x8a = _mm256_add_epi32(b0x8a, rndx8); ++ b0x8a = _mm256_srai_epi32(b0x8a, in_sh); ++ b0x8a = av_clip_int16_avx(b0x8a); ++ ++ b1x8a = _mm256_add_epi32(b1x8a, _mm256_mullo_epi32(ux8a, _mm256_set1_epi32(cbu))); ++ b1x8a = _mm256_add_epi32(b1x8a, rndx8); ++ b1x8a = _mm256_srai_epi32(b1x8a, in_sh); ++ b1x8a = av_clip_int16_avx(b1x8a); ++ ++ r0x8b = g0x8b = b0x8b = _mm256_mullo_epi32(y0x8b, cyx8); ++ r0x8b = _mm256_add_epi32(r0x8b, _mm256_mullo_epi32(vx8b, _mm256_set1_epi32(crv))); ++ r0x8b = _mm256_add_epi32(r0x8b, rndx8); ++ r0x8b = _mm256_srai_epi32(r0x8b, in_sh); ++ r0x8b = av_clip_int16_avx(r0x8b); ++ ++ r1x8b = g1x8b = b1x8b = _mm256_mullo_epi32(y1x8b, cyx8); ++ r1x8b = _mm256_add_epi32(r1x8b, _mm256_mullo_epi32(vx8b, _mm256_set1_epi32(crv))); ++ r1x8b = _mm256_add_epi32(r1x8b, rndx8); ++ r1x8b = _mm256_srai_epi32(r1x8b, in_sh); ++ r1x8b = av_clip_int16_avx(r1x8b); ++ ++ g0x8b = _mm256_add_epi32(g0x8b, _mm256_mullo_epi32(ux8b, _mm256_set1_epi32(cgu))); ++ g0x8b = _mm256_add_epi32(g0x8b, _mm256_mullo_epi32(vx8b, _mm256_set1_epi32(cgv))); ++ g0x8b = _mm256_add_epi32(g0x8b, rndx8); ++ g0x8b = _mm256_srai_epi32(g0x8b, in_sh); ++ g0x8b = av_clip_int16_avx(g0x8b); ++ ++ g1x8b = _mm256_add_epi32(g1x8b, _mm256_mullo_epi32(ux8b, _mm256_set1_epi32(cgu))); ++ g1x8b = _mm256_add_epi32(g1x8b, _mm256_mullo_epi32(vx8b, _mm256_set1_epi32(cgv))); ++ g1x8b = _mm256_add_epi32(g1x8b, rndx8); ++ g1x8b = _mm256_srai_epi32(g1x8b, in_sh); ++ g1x8b = av_clip_int16_avx(g1x8b); ++ ++ b0x8b = _mm256_add_epi32(b0x8b, _mm256_mullo_epi32(ux8b, _mm256_set1_epi32(cbu))); ++ b0x8b = _mm256_add_epi32(b0x8b, rndx8); ++ b0x8b = _mm256_srai_epi32(b0x8b, in_sh); ++ b0x8b = av_clip_int16_avx(b0x8b); ++ ++ b1x8b = _mm256_add_epi32(b1x8b, _mm256_mullo_epi32(ux8b, _mm256_set1_epi32(cbu))); ++ b1x8b = _mm256_add_epi32(b1x8b, rndx8); ++ b1x8b = _mm256_srai_epi32(b1x8b, in_sh); ++ b1x8b = av_clip_int16_avx(b1x8b); ++ ++ tonemap_int32x8_avx(r0x8a, g0x8a, b0x8a, r, g, b, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x8_avx(r1x8a, g1x8a, b1x8a, r1, g1, b1, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x8_avx(r0x8b, g0x8b, b0x8b, &r[8], &g[8], &b[8], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x8_avx(r1x8b, g1x8b, b1x8b, &r1[8], &g1[8], &b1[8], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ ++ r0ox16 = _mm256_lddqu_si256((const __m256i_u *)r); ++ g0ox16 = _mm256_lddqu_si256((const __m256i_u *)g); ++ b0ox16 = _mm256_lddqu_si256((const __m256i_u *)b); ++ ++ roax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r0ox16, 0)); ++ goax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g0ox16, 0)); ++ boax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b0ox16, 0)); ++ ++ robx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r0ox16, 1)); ++ gobx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g0ox16, 1)); ++ bobx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b0ox16, 1)); ++ ++ yoax8 = _mm256_mullo_epi32(roax8, _mm256_set1_epi32(cry)); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_mullo_epi32(goax8, _mm256_set1_epi32(cgy))); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_mullo_epi32(boax8, _mm256_set1_epi32(cby))); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_set1_epi32(out_rnd)); ++ yoax8 = _mm256_srai_epi32(yoax8, out_sh); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ yobx8 = _mm256_mullo_epi32(robx8, _mm256_set1_epi32(cry)); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_mullo_epi32(gobx8, _mm256_set1_epi32(cgy))); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_mullo_epi32(bobx8, _mm256_set1_epi32(cby))); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_set1_epi32(out_rnd)); ++ yobx8 = _mm256_srai_epi32(yobx8, out_sh); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ y0ox16 = _mm256_packs_epi32(yoax8, yobx8); ++ y0ox16 = _mm256_permute4x64_epi64(y0ox16, _MM_SHUFFLE(3, 1, 2, 0)); ++ _mm_storeu_si128((__m128i_u *) &dsty[x], _mm256_castsi256_si128(_mm256_permute4x64_epi64(_mm256_packus_epi16(y0ox16, _mm256_setzero_si256()), _MM_SHUFFLE(3, 1, 2, 0)))); ++ ++ r1ox16 = _mm256_lddqu_si256((const __m256i_u *)r1); ++ g1ox16 = _mm256_lddqu_si256((const __m256i_u *)g1); ++ b1ox16 = _mm256_lddqu_si256((const __m256i_u *)b1); ++ ++ r1oax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r1ox16, 0)); ++ g1oax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g1ox16, 0)); ++ b1oax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b1ox16, 0)); ++ ++ r1obx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r1ox16, 1)); ++ g1obx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g1ox16, 1)); ++ b1obx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b1ox16, 1)); ++ ++ y1oax8 = _mm256_mullo_epi32(r1oax8, _mm256_set1_epi32(cry)); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_mullo_epi32(g1oax8, _mm256_set1_epi32(cgy))); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_mullo_epi32(b1oax8, _mm256_set1_epi32(cby))); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_set1_epi32(out_rnd)); ++ y1oax8 = _mm256_srai_epi32(y1oax8, out_sh); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ y1obx8 = _mm256_mullo_epi32(r1obx8, _mm256_set1_epi32(cry)); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_mullo_epi32(g1obx8, _mm256_set1_epi32(cgy))); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_mullo_epi32(b1obx8, _mm256_set1_epi32(cby))); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_set1_epi32(out_rnd)); ++ y1obx8 = _mm256_srai_epi32(y1obx8, out_sh); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ y1ox16 = _mm256_packs_epi32(y1oax8, y1obx8); ++ y1ox16 = _mm256_permute4x64_epi64(y1ox16, _MM_SHUFFLE(3, 1, 2, 0)); ++ _mm_storeu_si128((__m128i_u *) &dsty[x + dstlinesize[0]], _mm256_castsi256_si128(_mm256_permute4x64_epi64(_mm256_packus_epi16(y1ox16, _mm256_setzero_si256()), _MM_SHUFFLE(3, 1, 2, 0)))); ++ ++ ravgx8 = _mm256_hadd_epi32(roax8, robx8); ++ ravgx8 = _mm256_add_epi32(ravgx8, _mm256_hadd_epi32(r1oax8, r1obx8)); ++ ravgx8 = _mm256_permute4x64_epi64(ravgx8, _MM_SHUFFLE(3, 1, 2, 0)); ++ ravgx8 = _mm256_add_epi32(ravgx8, _mm256_set1_epi32(2)); ++ ravgx8 = _mm256_srai_epi32(ravgx8, 2); ++ ++ gavgx8 = _mm256_hadd_epi32(goax8, gobx8); ++ gavgx8 = _mm256_add_epi32(gavgx8, _mm256_hadd_epi32(g1oax8, g1obx8)); ++ gavgx8 = _mm256_permute4x64_epi64(gavgx8, _MM_SHUFFLE(3, 1, 2, 0)); ++ gavgx8 = _mm256_add_epi32(gavgx8, _mm256_set1_epi32(2)); ++ gavgx8 = _mm256_srai_epi32(gavgx8, 2); ++ ++ bavgx8 = _mm256_hadd_epi32(boax8, bobx8); ++ bavgx8 = _mm256_add_epi32(bavgx8, _mm256_hadd_epi32(b1oax8, b1obx8)); ++ bavgx8 = _mm256_permute4x64_epi64(bavgx8, _MM_SHUFFLE(3, 1, 2, 0)); ++ bavgx8 = _mm256_add_epi32(bavgx8, _mm256_set1_epi32(2)); ++ bavgx8 = _mm256_srai_epi32(bavgx8, 2); ++ ++ uox8 = _mm256_add_epi32(_mm256_set1_epi32(out_rnd), _mm256_mullo_epi32(ravgx8, _mm256_set1_epi32(cru))); ++ uox8 = _mm256_add_epi32(uox8, _mm256_mullo_epi32(gavgx8, _mm256_set1_epi32(ocgu))); ++ uox8 = _mm256_add_epi32(uox8, _mm256_mullo_epi32(bavgx8, _mm256_set1_epi32(cburv))); ++ uox8 = _mm256_srai_epi32(uox8, out_sh); ++ uox8 = _mm256_add_epi32(uox8, _mm256_set1_epi32(out_uv_offset)); ++ uox8 = _mm256_packs_epi32(uox8, _mm256_setzero_si256()); ++ uox8 = _mm256_permute4x64_epi64(uox8, _MM_SHUFFLE(3, 1, 2, 0)); ++ uox8 = _mm256_packus_epi16(uox8, _mm256_setzero_si256()); ++ _mm_storeu_si64(&dstu[x >> 1], _mm256_castsi256_si128(uox8)); ++ ++ vox8 = _mm256_add_epi32(_mm256_set1_epi32(out_rnd), _mm256_mullo_epi32(ravgx8, _mm256_set1_epi32(cburv))); ++ vox8 = _mm256_add_epi32(vox8, _mm256_mullo_epi32(gavgx8, _mm256_set1_epi32(ocgv))); ++ vox8 = _mm256_add_epi32(vox8, _mm256_mullo_epi32(bavgx8, _mm256_set1_epi32(cbv))); ++ vox8 = _mm256_srai_epi32(vox8, out_sh); ++ vox8 = _mm256_add_epi32(vox8, _mm256_set1_epi32(out_uv_offset)); ++ vox8 = _mm256_packs_epi32(vox8, _mm256_setzero_si256()); ++ vox8 = _mm256_permute4x64_epi64(vox8, _MM_SHUFFLE(3, 1, 2, 0)); ++ vox8 = _mm256_packus_epi16(vox8, _mm256_setzero_si256()); ++ _mm_storeu_si64(&dstv[x >> 1], _mm256_castsi256_si128(vox8)); ++ } ++ } ++ ++ // Process remaining pixels cannot fill the full simd register with scalar version ++ if (remainw) { ++ int offset = width & (int)0xfffffff0; ++ rdsty += offset; ++ rdstu += offset >> 1; ++ rdstv += offset >> 1; ++ rsrcy += offset; ++ rsrcu += offset >> 1; ++ rsrcv += offset >> 1; ++ tonemap_frame_420p10_2_420p(rdsty, rdstu, rdstv, ++ rsrcy, rsrcu, rsrcv, ++ dstlinesize, srclinesize, ++ dstdepth, srcdepth, ++ remainw, rheight, params); ++ } ++#endif // ENABLE_TONEMAPX_AVX_INTRINSICS ++} ++ ++X86_64_V3 void tonemap_frame_420p10_2_420p10_avx(uint16_t *dsty, uint16_t *dstu, uint16_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++#ifdef ENABLE_TONEMAPX_AVX_INTRINSICS ++ uint16_t *rdsty = dsty; ++ uint16_t *rdstu = dstu; ++ uint16_t *rdstv = dstv; ++ const uint16_t *rsrcy = srcy; ++ const uint16_t *rsrcu = srcu; ++ const uint16_t *rsrcv = srcv; ++ int rheight = height; ++ // not zero when not divisible by 8 ++ // intentionally leave last pixel emtpy when input is odd ++ int remainw = width & 14; ++ ++ const int in_depth = srcdepth; ++ const int in_uv_offset = 128 << (in_depth - 8); ++ const int in_sh = in_depth - 1; ++ const int in_rnd = 1 << (in_sh - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cy = (*params->yuv2rgb_coeffs)[0][0][0]; ++ int crv = (*params->yuv2rgb_coeffs)[0][2][0]; ++ int cgu = (*params->yuv2rgb_coeffs)[1][1][0]; ++ int cgv = (*params->yuv2rgb_coeffs)[1][2][0]; ++ int cbu = (*params->yuv2rgb_coeffs)[2][1][0]; ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int16_t r[16], g[16], b[16]; ++ int16_t r1[16], g1[16], b1[16]; ++ __m256i in_yuv_offx8 = _mm256_set1_epi32(params->in_yuv_off); ++ __m256i in_uv_offx8 = _mm256_set1_epi32(in_uv_offset); ++ __m256i cyx8 = _mm256_set1_epi32(cy); ++ __m256i rndx8 = _mm256_set1_epi32(in_rnd); ++ ++ __m256i r0ox16, g0ox16, b0ox16; ++ __m256i y0ox16; ++ __m256i roax8, robx8, goax8, gobx8, boax8, bobx8; ++ __m256i yoax8, yobx8; ++ __m256i ux8, vx8; ++ __m256i y0x16, y1x16; ++ __m256i y0x8a, y0x8b, y1x8a, y1x8b, ux8a, ux8b, vx8a, vx8b; ++ __m256i r0x8a, g0x8a, b0x8a, r0x8b, g0x8b, b0x8b; ++ __m256i r1x8a, g1x8a, b1x8a, r1x8b, g1x8b, b1x8b; ++ ++ __m256i r1ox16, g1ox16, b1ox16; ++ __m256i y1ox16; ++ __m256i r1oax8, r1obx8, g1oax8, g1obx8, b1oax8, b1obx8; ++ __m256i y1oax8, y1obx8; ++ __m256i uox8, vox8, ravgx8, gavgx8, bavgx8; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0], dstu += dstlinesize[1] / 2, dstv += dstlinesize[1] / 2, ++ srcy += srclinesize[0], srcu += srclinesize[1] / 2, srcv += srclinesize[1] / 2) { ++ for (int xx = 0; xx < width >> 4; xx++) { ++ int x = xx << 4; ++ ++ y0x16 = _mm256_lddqu_si256((__m256i*)(srcy + x)); ++ y1x16 = _mm256_lddqu_si256((__m256i*)(srcy + (srclinesize[0] / 2 + x))); ++ ux8 = _mm256_cvtepi16_epi32(_mm_lddqu_si128((__m128i_u *)(srcu + (x >> 1)))); ++ vx8 = _mm256_cvtepi16_epi32(_mm_lddqu_si128((__m128i_u *)(srcv + (x >> 1)))); ++ ++ y0x8a = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y0x16, 0)); ++ y0x8b = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y0x16, 1)); ++ y1x8a = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y1x16, 0)); ++ y1x8b = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y1x16, 1)); ++ ++ y0x8a = _mm256_sub_epi32(y0x8a, in_yuv_offx8); ++ y1x8a = _mm256_sub_epi32(y1x8a, in_yuv_offx8); ++ y0x8b = _mm256_sub_epi32(y0x8b, in_yuv_offx8); ++ y1x8b = _mm256_sub_epi32(y1x8b, in_yuv_offx8); ++ ux8 = _mm256_sub_epi32(ux8, in_uv_offx8); ++ vx8 = _mm256_sub_epi32(vx8, in_uv_offx8); ++ ++ ux8a = _mm256_permutevar8x32_epi32(ux8, _mm256_set_epi32(3, 3, 2, 2, 1, 1, 0, 0)); ++ ux8b = _mm256_permutevar8x32_epi32(ux8, _mm256_set_epi32(7, 7, 6, 6, 5, 5, 4, 4)); ++ vx8a = _mm256_permutevar8x32_epi32(vx8, _mm256_set_epi32(3, 3, 2, 2, 1, 1, 0, 0)); ++ vx8b = _mm256_permutevar8x32_epi32(vx8, _mm256_set_epi32(7, 7, 6, 6, 5, 5, 4, 4)); ++ ++ // r = av_clip_int16((y * cy + crv * v + in_rnd) >> in_sh); ++ r0x8a = g0x8a = b0x8a = _mm256_mullo_epi32(y0x8a, cyx8); ++ r0x8a = _mm256_add_epi32(r0x8a, _mm256_mullo_epi32(vx8a, _mm256_set1_epi32(crv))); ++ r0x8a = _mm256_add_epi32(r0x8a, rndx8); ++ r0x8a = _mm256_srai_epi32(r0x8a, in_sh); ++ r0x8a = av_clip_int16_avx(r0x8a); ++ ++ r1x8a = g1x8a = b1x8a = _mm256_mullo_epi32(y1x8a, cyx8); ++ r1x8a = _mm256_add_epi32(r1x8a, _mm256_mullo_epi32(vx8a, _mm256_set1_epi32(crv))); ++ r1x8a = _mm256_add_epi32(r1x8a, rndx8); ++ r1x8a = _mm256_srai_epi32(r1x8a, in_sh); ++ r1x8a = av_clip_int16_avx(r1x8a); ++ ++ // g = av_clip_int16((y * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g0x8a = _mm256_add_epi32(g0x8a, _mm256_mullo_epi32(ux8a, _mm256_set1_epi32(cgu))); ++ g0x8a = _mm256_add_epi32(g0x8a, _mm256_mullo_epi32(vx8a, _mm256_set1_epi32(cgv))); ++ g0x8a = _mm256_add_epi32(g0x8a, rndx8); ++ g0x8a = _mm256_srai_epi32(g0x8a, in_sh); ++ g0x8a = av_clip_int16_avx(g0x8a); ++ ++ g1x8a = _mm256_add_epi32(g1x8a, _mm256_mullo_epi32(ux8a, _mm256_set1_epi32(cgu))); ++ g1x8a = _mm256_add_epi32(g1x8a, _mm256_mullo_epi32(vx8a, _mm256_set1_epi32(cgv))); ++ g1x8a = _mm256_add_epi32(g1x8a, rndx8); ++ g1x8a = _mm256_srai_epi32(g1x8a, in_sh); ++ g1x8a = av_clip_int16_avx(g1x8a); ++ ++ // b = av_clip_int16((y * cy + cbu * u + in_rnd) >> in_sh); ++ b0x8a = _mm256_add_epi32(b0x8a, _mm256_mullo_epi32(ux8a, _mm256_set1_epi32(cbu))); ++ b0x8a = _mm256_add_epi32(b0x8a, rndx8); ++ b0x8a = _mm256_srai_epi32(b0x8a, in_sh); ++ b0x8a = av_clip_int16_avx(b0x8a); ++ ++ b1x8a = _mm256_add_epi32(b1x8a, _mm256_mullo_epi32(ux8a, _mm256_set1_epi32(cbu))); ++ b1x8a = _mm256_add_epi32(b1x8a, rndx8); ++ b1x8a = _mm256_srai_epi32(b1x8a, in_sh); ++ b1x8a = av_clip_int16_avx(b1x8a); ++ ++ r0x8b = g0x8b = b0x8b = _mm256_mullo_epi32(y0x8b, cyx8); ++ r0x8b = _mm256_add_epi32(r0x8b, _mm256_mullo_epi32(vx8b, _mm256_set1_epi32(crv))); ++ r0x8b = _mm256_add_epi32(r0x8b, rndx8); ++ r0x8b = _mm256_srai_epi32(r0x8b, in_sh); ++ r0x8b = av_clip_int16_avx(r0x8b); ++ ++ r1x8b = g1x8b = b1x8b = _mm256_mullo_epi32(y1x8b, cyx8); ++ r1x8b = _mm256_add_epi32(r1x8b, _mm256_mullo_epi32(vx8b, _mm256_set1_epi32(crv))); ++ r1x8b = _mm256_add_epi32(r1x8b, rndx8); ++ r1x8b = _mm256_srai_epi32(r1x8b, in_sh); ++ r1x8b = av_clip_int16_avx(r1x8b); ++ ++ g0x8b = _mm256_add_epi32(g0x8b, _mm256_mullo_epi32(ux8b, _mm256_set1_epi32(cgu))); ++ g0x8b = _mm256_add_epi32(g0x8b, _mm256_mullo_epi32(vx8b, _mm256_set1_epi32(cgv))); ++ g0x8b = _mm256_add_epi32(g0x8b, rndx8); ++ g0x8b = _mm256_srai_epi32(g0x8b, in_sh); ++ g0x8b = av_clip_int16_avx(g0x8b); ++ ++ g1x8b = _mm256_add_epi32(g1x8b, _mm256_mullo_epi32(ux8b, _mm256_set1_epi32(cgu))); ++ g1x8b = _mm256_add_epi32(g1x8b, _mm256_mullo_epi32(vx8b, _mm256_set1_epi32(cgv))); ++ g1x8b = _mm256_add_epi32(g1x8b, rndx8); ++ g1x8b = _mm256_srai_epi32(g1x8b, in_sh); ++ g1x8b = av_clip_int16_avx(g1x8b); ++ ++ b0x8b = _mm256_add_epi32(b0x8b, _mm256_mullo_epi32(ux8b, _mm256_set1_epi32(cbu))); ++ b0x8b = _mm256_add_epi32(b0x8b, rndx8); ++ b0x8b = _mm256_srai_epi32(b0x8b, in_sh); ++ b0x8b = av_clip_int16_avx(b0x8b); ++ ++ b1x8b = _mm256_add_epi32(b1x8b, _mm256_mullo_epi32(ux8b, _mm256_set1_epi32(cbu))); ++ b1x8b = _mm256_add_epi32(b1x8b, rndx8); ++ b1x8b = _mm256_srai_epi32(b1x8b, in_sh); ++ b1x8b = av_clip_int16_avx(b1x8b); ++ ++ tonemap_int32x8_avx(r0x8a, g0x8a, b0x8a, r, g, b, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x8_avx(r1x8a, g1x8a, b1x8a, r1, g1, b1, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x8_avx(r0x8b, g0x8b, b0x8b, &r[8], &g[8], &b[8], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x8_avx(r1x8b, g1x8b, b1x8b, &r1[8], &g1[8], &b1[8], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ ++ r0ox16 = _mm256_lddqu_si256((const __m256i_u *)r); ++ g0ox16 = _mm256_lddqu_si256((const __m256i_u *)g); ++ b0ox16 = _mm256_lddqu_si256((const __m256i_u *)b); ++ ++ roax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r0ox16, 0)); ++ goax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g0ox16, 0)); ++ boax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b0ox16, 0)); ++ ++ robx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r0ox16, 1)); ++ gobx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g0ox16, 1)); ++ bobx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b0ox16, 1)); ++ ++ yoax8 = _mm256_mullo_epi32(roax8, _mm256_set1_epi32(cry)); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_mullo_epi32(goax8, _mm256_set1_epi32(cgy))); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_mullo_epi32(boax8, _mm256_set1_epi32(cby))); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_set1_epi32(out_rnd)); ++ yoax8 = _mm256_srai_epi32(yoax8, out_sh); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ yobx8 = _mm256_mullo_epi32(robx8, _mm256_set1_epi32(cry)); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_mullo_epi32(gobx8, _mm256_set1_epi32(cgy))); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_mullo_epi32(bobx8, _mm256_set1_epi32(cby))); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_set1_epi32(out_rnd)); ++ yobx8 = _mm256_srai_epi32(yobx8, out_sh); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ y0ox16 = _mm256_packus_epi32(yoax8, yobx8); ++ y0ox16 = _mm256_permute4x64_epi64(y0ox16, _MM_SHUFFLE(3, 1, 2, 0)); ++ _mm256_storeu_si256((__m256i_u *) &dsty[x], y0ox16); ++ ++ r1ox16 = _mm256_lddqu_si256((const __m256i_u *)r1); ++ g1ox16 = _mm256_lddqu_si256((const __m256i_u *)g1); ++ b1ox16 = _mm256_lddqu_si256((const __m256i_u *)b1); ++ ++ r1oax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r1ox16, 0)); ++ g1oax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g1ox16, 0)); ++ b1oax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b1ox16, 0)); ++ ++ r1obx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r1ox16, 1)); ++ g1obx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g1ox16, 1)); ++ b1obx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b1ox16, 1)); ++ ++ y1oax8 = _mm256_mullo_epi32(r1oax8, _mm256_set1_epi32(cry)); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_mullo_epi32(g1oax8, _mm256_set1_epi32(cgy))); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_mullo_epi32(b1oax8, _mm256_set1_epi32(cby))); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_set1_epi32(out_rnd)); ++ y1oax8 = _mm256_srai_epi32(y1oax8, out_sh); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ y1obx8 = _mm256_mullo_epi32(r1obx8, _mm256_set1_epi32(cry)); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_mullo_epi32(g1obx8, _mm256_set1_epi32(cgy))); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_mullo_epi32(b1obx8, _mm256_set1_epi32(cby))); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_set1_epi32(out_rnd)); ++ y1obx8 = _mm256_srai_epi32(y1obx8, out_sh); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ y1ox16 = _mm256_packus_epi32(y1oax8, y1obx8); ++ y1ox16 = _mm256_permute4x64_epi64(y1ox16, _MM_SHUFFLE(3, 1, 2, 0)); ++ _mm256_storeu_si256((__m256i_u *) &dsty[x + dstlinesize[0] / 2], y1ox16); ++ ++ ravgx8 = _mm256_hadd_epi32(roax8, robx8); ++ ravgx8 = _mm256_add_epi32(ravgx8, _mm256_hadd_epi32(r1oax8, r1obx8)); ++ ravgx8 = _mm256_permute4x64_epi64(ravgx8, _MM_SHUFFLE(3, 1, 2, 0)); ++ ravgx8 = _mm256_add_epi32(ravgx8, _mm256_set1_epi32(2)); ++ ravgx8 = _mm256_srai_epi32(ravgx8, 2); ++ ++ gavgx8 = _mm256_hadd_epi32(goax8, gobx8); ++ gavgx8 = _mm256_add_epi32(gavgx8, _mm256_hadd_epi32(g1oax8, g1obx8)); ++ gavgx8 = _mm256_permute4x64_epi64(gavgx8, _MM_SHUFFLE(3, 1, 2, 0)); ++ gavgx8 = _mm256_add_epi32(gavgx8, _mm256_set1_epi32(2)); ++ gavgx8 = _mm256_srai_epi32(gavgx8, 2); ++ ++ bavgx8 = _mm256_hadd_epi32(boax8, bobx8); ++ bavgx8 = _mm256_add_epi32(bavgx8, _mm256_hadd_epi32(b1oax8, b1obx8)); ++ bavgx8 = _mm256_permute4x64_epi64(bavgx8, _MM_SHUFFLE(3, 1, 2, 0)); ++ bavgx8 = _mm256_add_epi32(bavgx8, _mm256_set1_epi32(2)); ++ bavgx8 = _mm256_srai_epi32(bavgx8, 2); ++ ++ uox8 = _mm256_add_epi32(_mm256_set1_epi32(out_rnd), _mm256_mullo_epi32(ravgx8, _mm256_set1_epi32(cru))); ++ uox8 = _mm256_add_epi32(uox8, _mm256_mullo_epi32(gavgx8, _mm256_set1_epi32(ocgu))); ++ uox8 = _mm256_add_epi32(uox8, _mm256_mullo_epi32(bavgx8, _mm256_set1_epi32(cburv))); ++ uox8 = _mm256_srai_epi32(uox8, out_sh); ++ uox8 = _mm256_add_epi32(uox8, _mm256_set1_epi32(out_uv_offset)); ++ uox8 = _mm256_packus_epi32(uox8, _mm256_setzero_si256()); ++ uox8 = _mm256_permute4x64_epi64(uox8, _MM_SHUFFLE(3, 1, 2, 0)); ++ _mm_storeu_si128((__m128i_u *) &dstu[x >> 1], _mm256_castsi256_si128(uox8)); ++ ++ vox8 = _mm256_add_epi32(_mm256_set1_epi32(out_rnd), _mm256_mullo_epi32(ravgx8, _mm256_set1_epi32(cburv))); ++ vox8 = _mm256_add_epi32(vox8, _mm256_mullo_epi32(gavgx8, _mm256_set1_epi32(ocgv))); ++ vox8 = _mm256_add_epi32(vox8, _mm256_mullo_epi32(bavgx8, _mm256_set1_epi32(cbv))); ++ vox8 = _mm256_srai_epi32(vox8, out_sh); ++ vox8 = _mm256_add_epi32(vox8, _mm256_set1_epi32(out_uv_offset)); ++ vox8 = _mm256_packus_epi32(vox8, _mm256_setzero_si256()); ++ vox8 = _mm256_permute4x64_epi64(vox8, _MM_SHUFFLE(3, 1, 2, 0)); ++ _mm_storeu_si128((__m128i_u *) &dstv[x >> 1], _mm256_castsi256_si128(vox8)); ++ } ++ } ++ ++ // Process remaining pixels cannot fill the full simd register with scalar version ++ if (remainw) { ++ int offset = width & (int)0xfffffff0; ++ rdsty += offset; ++ rdstu += offset >> 1; ++ rdstv += offset >> 1; ++ rsrcy += offset; ++ rsrcu += offset >> 1; ++ rsrcv += offset >> 1; ++ tonemap_frame_420p10_2_420p10(rdsty, rdstu, rdstv, ++ rsrcy, rsrcu, rsrcv, ++ dstlinesize, srclinesize, ++ dstdepth, srcdepth, ++ remainw, rheight, params); ++ } ++#endif // ENABLE_TONEMAPX_AVX_INTRINSICS ++} ++ ++X86_64_V3 void tonemap_frame_p016_p010_2_nv12_avx(uint8_t *dsty, uint8_t *dstuv, ++ const uint16_t *srcy, const uint16_t *srcuv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++#ifdef ENABLE_TONEMAPX_AVX_INTRINSICS ++ uint8_t *rdsty = dsty; ++ uint8_t *rdstuv = dstuv; ++ const uint16_t *rsrcy = srcy; ++ const uint16_t *rsrcuv = srcuv; ++ int rheight = height; ++ // not zero when not divisible by 16 ++ // intentionally leave last pixel emtpy when input is odd ++ int remainw = width & 14; ++ ++ const int in_depth = srcdepth; ++ const int in_uv_offset = 128 << (in_depth - 8); ++ const int in_sh = in_depth - 1; ++ const int in_rnd = 1 << (in_sh - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cy = (*params->yuv2rgb_coeffs)[0][0][0]; ++ int crv = (*params->yuv2rgb_coeffs)[0][2][0]; ++ int cgu = (*params->yuv2rgb_coeffs)[1][1][0]; ++ int cgv = (*params->yuv2rgb_coeffs)[1][2][0]; ++ int cbu = (*params->yuv2rgb_coeffs)[2][1][0]; ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int16_t r[16], g[16], b[16]; ++ int16_t r1[16], g1[16], b1[16]; ++ __m256i in_yuv_offx8 = _mm256_set1_epi32(params->in_yuv_off); ++ __m256i in_uv_offx8 = _mm256_set1_epi32(in_uv_offset); ++ __m256i cyx8 = _mm256_set1_epi32(cy); ++ __m256i rndx8 = _mm256_set1_epi32(in_rnd); ++ ++ __m256i uvx16, uvx8a, uvx8b; ++ __m256i y0x16, y1x16; ++ __m256i y0x8a, y0x8b, y1x8a, y1x8b, ux8a, ux8b, vx8a, vx8b; ++ __m256i r0x8a, g0x8a, b0x8a, r0x8b, g0x8b, b0x8b; ++ __m256i r1x8a, g1x8a, b1x8a, r1x8b, g1x8b, b1x8b; ++ ++ __m256i r0ox16, g0ox16, b0ox16; ++ __m256i y0ox16; ++ __m256i roax8, robx8, goax8, gobx8, boax8, bobx8; ++ __m256i yoax8, yobx8; ++ ++ __m256i r1ox16, g1ox16, b1ox16; ++ __m256i y1ox16; ++ __m256i r1oax8, r1obx8, g1oax8, g1obx8, b1oax8, b1obx8; ++ __m256i y1oax8, y1obx8, uvoax8, uvobx8, uvox16; ++ __m256i uox8, vox8, ravgx8, gavgx8, bavgx8; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0] * 2, dstuv += dstlinesize[1], ++ srcy += srclinesize[0], srcuv += srclinesize[1] / 2) { ++ for (int xx = 0; xx < width >> 4; xx++) { ++ int x = xx << 4; ++ ++ y0x16 = _mm256_lddqu_si256((__m256i*)(srcy + x)); ++ y1x16 = _mm256_lddqu_si256((__m256i*)(srcy + (srclinesize[0] / 2 + x))); ++ uvx16 = _mm256_lddqu_si256((__m256i*)(srcuv + x)); ++ ++ if (in_depth == 10) { ++ // shift to low10bits for 10bit input ++ y0x16 = _mm256_srli_epi16(y0x16, 6); ++ y1x16 = _mm256_srli_epi16(y1x16, 6); ++ uvx16 = _mm256_srli_epi16(uvx16, 6); ++ } ++ ++ y0x8a = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y0x16, 0)); ++ y0x8b = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y0x16, 1)); ++ y1x8a = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y1x16, 0)); ++ y1x8b = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y1x16, 1)); ++ uvx8a = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(uvx16, 0)); ++ uvx8b = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(uvx16, 1)); ++ y0x8a = _mm256_sub_epi32(y0x8a, in_yuv_offx8); ++ y1x8a = _mm256_sub_epi32(y1x8a, in_yuv_offx8); ++ y0x8b = _mm256_sub_epi32(y0x8b, in_yuv_offx8); ++ y1x8b = _mm256_sub_epi32(y1x8b, in_yuv_offx8); ++ uvx8a = _mm256_sub_epi32(uvx8a, in_uv_offx8); ++ uvx8b = _mm256_sub_epi32(uvx8b, in_uv_offx8); ++ ++ ux8a = _mm256_shuffle_epi32(uvx8a, _MM_SHUFFLE(2, 2, 0, 0)); ++ ux8b = _mm256_shuffle_epi32(uvx8b, _MM_SHUFFLE(2, 2, 0, 0)); ++ vx8a = _mm256_shuffle_epi32(uvx8a, _MM_SHUFFLE(3, 3, 1, 1)); ++ vx8b = _mm256_shuffle_epi32(uvx8b, _MM_SHUFFLE(3, 3, 1, 1)); ++ ++ // r = av_clip_int16((y * cy + crv * v + in_rnd) >> in_sh); ++ r0x8a = g0x8a = b0x8a = _mm256_mullo_epi32(y0x8a, cyx8); ++ r0x8a = _mm256_add_epi32(r0x8a, _mm256_mullo_epi32(vx8a, _mm256_set1_epi32(crv))); ++ r0x8a = _mm256_add_epi32(r0x8a, rndx8); ++ r0x8a = _mm256_srai_epi32(r0x8a, in_sh); ++ r0x8a = av_clip_int16_avx(r0x8a); ++ ++ r1x8a = g1x8a = b1x8a = _mm256_mullo_epi32(y1x8a, cyx8); ++ r1x8a = _mm256_add_epi32(r1x8a, _mm256_mullo_epi32(vx8a, _mm256_set1_epi32(crv))); ++ r1x8a = _mm256_add_epi32(r1x8a, rndx8); ++ r1x8a = _mm256_srai_epi32(r1x8a, in_sh); ++ r1x8a = av_clip_int16_avx(r1x8a); ++ ++ // g = av_clip_int16((y * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g0x8a = _mm256_add_epi32(g0x8a, _mm256_mullo_epi32(ux8a, _mm256_set1_epi32(cgu))); ++ g0x8a = _mm256_add_epi32(g0x8a, _mm256_mullo_epi32(vx8a, _mm256_set1_epi32(cgv))); ++ g0x8a = _mm256_add_epi32(g0x8a, rndx8); ++ g0x8a = _mm256_srai_epi32(g0x8a, in_sh); ++ g0x8a = av_clip_int16_avx(g0x8a); ++ ++ g1x8a = _mm256_add_epi32(g1x8a, _mm256_mullo_epi32(ux8a, _mm256_set1_epi32(cgu))); ++ g1x8a = _mm256_add_epi32(g1x8a, _mm256_mullo_epi32(vx8a, _mm256_set1_epi32(cgv))); ++ g1x8a = _mm256_add_epi32(g1x8a, rndx8); ++ g1x8a = _mm256_srai_epi32(g1x8a, in_sh); ++ g1x8a = av_clip_int16_avx(g1x8a); ++ ++ // b = av_clip_int16((y * cy + cbu * u + in_rnd) >> in_sh); ++ b0x8a = _mm256_add_epi32(b0x8a, _mm256_mullo_epi32(ux8a, _mm256_set1_epi32(cbu))); ++ b0x8a = _mm256_add_epi32(b0x8a, rndx8); ++ b0x8a = _mm256_srai_epi32(b0x8a, in_sh); ++ b0x8a = av_clip_int16_avx(b0x8a); ++ ++ b1x8a = _mm256_add_epi32(b1x8a, _mm256_mullo_epi32(ux8a, _mm256_set1_epi32(cbu))); ++ b1x8a = _mm256_add_epi32(b1x8a, rndx8); ++ b1x8a = _mm256_srai_epi32(b1x8a, in_sh); ++ b1x8a = av_clip_int16_avx(b1x8a); ++ ++ r0x8b = g0x8b = b0x8b = _mm256_mullo_epi32(y0x8b, cyx8); ++ r0x8b = _mm256_add_epi32(r0x8b, _mm256_mullo_epi32(vx8b, _mm256_set1_epi32(crv))); ++ r0x8b = _mm256_add_epi32(r0x8b, rndx8); ++ r0x8b = _mm256_srai_epi32(r0x8b, in_sh); ++ r0x8b = av_clip_int16_avx(r0x8b); ++ ++ r1x8b = g1x8b = b1x8b = _mm256_mullo_epi32(y1x8b, cyx8); ++ r1x8b = _mm256_add_epi32(r1x8b, _mm256_mullo_epi32(vx8b, _mm256_set1_epi32(crv))); ++ r1x8b = _mm256_add_epi32(r1x8b, rndx8); ++ r1x8b = _mm256_srai_epi32(r1x8b, in_sh); ++ r1x8b = av_clip_int16_avx(r1x8b); ++ ++ g0x8b = _mm256_add_epi32(g0x8b, _mm256_mullo_epi32(ux8b, _mm256_set1_epi32(cgu))); ++ g0x8b = _mm256_add_epi32(g0x8b, _mm256_mullo_epi32(vx8b, _mm256_set1_epi32(cgv))); ++ g0x8b = _mm256_add_epi32(g0x8b, rndx8); ++ g0x8b = _mm256_srai_epi32(g0x8b, in_sh); ++ g0x8b = av_clip_int16_avx(g0x8b); ++ ++ g1x8b = _mm256_add_epi32(g1x8b, _mm256_mullo_epi32(ux8b, _mm256_set1_epi32(cgu))); ++ g1x8b = _mm256_add_epi32(g1x8b, _mm256_mullo_epi32(vx8b, _mm256_set1_epi32(cgv))); ++ g1x8b = _mm256_add_epi32(g1x8b, rndx8); ++ g1x8b = _mm256_srai_epi32(g1x8b, in_sh); ++ g1x8b = av_clip_int16_avx(g1x8b); ++ ++ b0x8b = _mm256_add_epi32(b0x8b, _mm256_mullo_epi32(ux8b, _mm256_set1_epi32(cbu))); ++ b0x8b = _mm256_add_epi32(b0x8b, rndx8); ++ b0x8b = _mm256_srai_epi32(b0x8b, in_sh); ++ b0x8b = av_clip_int16_avx(b0x8b); ++ ++ b1x8b = _mm256_add_epi32(b1x8b, _mm256_mullo_epi32(ux8b, _mm256_set1_epi32(cbu))); ++ b1x8b = _mm256_add_epi32(b1x8b, rndx8); ++ b1x8b = _mm256_srai_epi32(b1x8b, in_sh); ++ b1x8b = av_clip_int16_avx(b1x8b); ++ ++ tonemap_int32x8_avx(r0x8a, g0x8a, b0x8a, r, g, b, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x8_avx(r1x8a, g1x8a, b1x8a, r1, g1, b1, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x8_avx(r0x8b, g0x8b, b0x8b, &r[8], &g[8], &b[8], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x8_avx(r1x8b, g1x8b, b1x8b, &r1[8], &g1[8], &b1[8], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ ++ r0ox16 = _mm256_lddqu_si256((const __m256i_u *)r); ++ g0ox16 = _mm256_lddqu_si256((const __m256i_u *)g); ++ b0ox16 = _mm256_lddqu_si256((const __m256i_u *)b); ++ ++ roax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r0ox16, 0)); ++ goax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g0ox16, 0)); ++ boax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b0ox16, 0)); ++ ++ robx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r0ox16, 1)); ++ gobx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g0ox16, 1)); ++ bobx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b0ox16, 1)); ++ ++ yoax8 = _mm256_mullo_epi32(roax8, _mm256_set1_epi32(cry)); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_mullo_epi32(goax8, _mm256_set1_epi32(cgy))); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_mullo_epi32(boax8, _mm256_set1_epi32(cby))); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_set1_epi32(out_rnd)); ++ yoax8 = _mm256_srai_epi32(yoax8, out_sh); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ yobx8 = _mm256_mullo_epi32(robx8, _mm256_set1_epi32(cry)); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_mullo_epi32(gobx8, _mm256_set1_epi32(cgy))); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_mullo_epi32(bobx8, _mm256_set1_epi32(cby))); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_set1_epi32(out_rnd)); ++ yobx8 = _mm256_srai_epi32(yobx8, out_sh); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ y0ox16 = _mm256_packs_epi32(yoax8, yobx8); ++ y0ox16 = _mm256_permute4x64_epi64(y0ox16, _MM_SHUFFLE(3, 1, 2, 0)); ++ _mm_storeu_si128((__m128i_u *) &dsty[x], _mm256_castsi256_si128(_mm256_permute4x64_epi64(_mm256_packus_epi16(y0ox16, _mm256_setzero_si256()), _MM_SHUFFLE(3, 1, 2, 0)))); ++ ++ r1ox16 = _mm256_lddqu_si256((const __m256i_u *)r1); ++ g1ox16 = _mm256_lddqu_si256((const __m256i_u *)g1); ++ b1ox16 = _mm256_lddqu_si256((const __m256i_u *)b1); ++ ++ r1oax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r1ox16, 0)); ++ g1oax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g1ox16, 0)); ++ b1oax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b1ox16, 0)); ++ ++ r1obx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r1ox16, 1)); ++ g1obx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g1ox16, 1)); ++ b1obx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b1ox16, 1)); ++ ++ y1oax8 = _mm256_mullo_epi32(r1oax8, _mm256_set1_epi32(cry)); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_mullo_epi32(g1oax8, _mm256_set1_epi32(cgy))); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_mullo_epi32(b1oax8, _mm256_set1_epi32(cby))); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_set1_epi32(out_rnd)); ++ y1oax8 = _mm256_srai_epi32(y1oax8, out_sh); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ y1obx8 = _mm256_mullo_epi32(r1obx8, _mm256_set1_epi32(cry)); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_mullo_epi32(g1obx8, _mm256_set1_epi32(cgy))); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_mullo_epi32(b1obx8, _mm256_set1_epi32(cby))); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_set1_epi32(out_rnd)); ++ y1obx8 = _mm256_srai_epi32(y1obx8, out_sh); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ y1ox16 = _mm256_packs_epi32(y1oax8, y1obx8); ++ y1ox16 = _mm256_permute4x64_epi64(y1ox16, _MM_SHUFFLE(3, 1, 2, 0)); ++ _mm_storeu_si128((__m128i_u *) &dsty[x + dstlinesize[0]], _mm256_castsi256_si128(_mm256_permute4x64_epi64(_mm256_packus_epi16(y1ox16, _mm256_setzero_si256()), _MM_SHUFFLE(3, 1, 2, 0)))); ++ ++ ravgx8 = _mm256_hadd_epi32(roax8, robx8); ++ ravgx8 = _mm256_add_epi32(ravgx8, _mm256_hadd_epi32(r1oax8, r1obx8)); ++ ravgx8 = _mm256_permute4x64_epi64(ravgx8, _MM_SHUFFLE(3, 1, 2, 0)); ++ ravgx8 = _mm256_add_epi32(ravgx8, _mm256_set1_epi32(2)); ++ ravgx8 = _mm256_srai_epi32(ravgx8, 2); ++ ++ gavgx8 = _mm256_hadd_epi32(goax8, gobx8); ++ gavgx8 = _mm256_add_epi32(gavgx8, _mm256_hadd_epi32(g1oax8, g1obx8)); ++ gavgx8 = _mm256_permute4x64_epi64(gavgx8, _MM_SHUFFLE(3, 1, 2, 0)); ++ gavgx8 = _mm256_add_epi32(gavgx8, _mm256_set1_epi32(2)); ++ gavgx8 = _mm256_srai_epi32(gavgx8, 2); ++ ++ bavgx8 = _mm256_hadd_epi32(boax8, bobx8); ++ bavgx8 = _mm256_add_epi32(bavgx8, _mm256_hadd_epi32(b1oax8, b1obx8)); ++ bavgx8 = _mm256_permute4x64_epi64(bavgx8, _MM_SHUFFLE(3, 1, 2, 0)); ++ bavgx8 = _mm256_add_epi32(bavgx8, _mm256_set1_epi32(2)); ++ bavgx8 = _mm256_srai_epi32(bavgx8, 2); ++ ++ uox8 = _mm256_add_epi32(_mm256_set1_epi32(out_rnd), _mm256_mullo_epi32(ravgx8, _mm256_set1_epi32(cru))); ++ uox8 = _mm256_add_epi32(uox8, _mm256_mullo_epi32(gavgx8, _mm256_set1_epi32(ocgu))); ++ uox8 = _mm256_add_epi32(uox8, _mm256_mullo_epi32(bavgx8, _mm256_set1_epi32(cburv))); ++ uox8 = _mm256_srai_epi32(uox8, out_sh); ++ uox8 = _mm256_add_epi32(uox8, _mm256_set1_epi32(out_uv_offset)); ++ ++ vox8 = _mm256_add_epi32(_mm256_set1_epi32(out_rnd), _mm256_mullo_epi32(ravgx8, _mm256_set1_epi32(cburv))); ++ vox8 = _mm256_add_epi32(vox8, _mm256_mullo_epi32(gavgx8, _mm256_set1_epi32(ocgv))); ++ vox8 = _mm256_add_epi32(vox8, _mm256_mullo_epi32(bavgx8, _mm256_set1_epi32(cbv))); ++ vox8 = _mm256_srai_epi32(vox8, out_sh); ++ vox8 = _mm256_add_epi32(vox8, _mm256_set1_epi32(out_uv_offset)); ++ ++ uvoax8 = _mm256_unpacklo_epi32(uox8, vox8); ++ uvobx8 = _mm256_unpackhi_epi32(uox8, vox8); ++ uvox16 = _mm256_packs_epi32(uvoax8, uvobx8); ++ _mm_storeu_si128((__m128i_u *) &dstuv[x], _mm256_castsi256_si128(_mm256_permute4x64_epi64(_mm256_packus_epi16(uvox16, _mm256_setzero_si256()), _MM_SHUFFLE(3, 1, 2, 0)))); ++ } ++ } ++ ++ // Process remaining pixels cannot fill the full simd register with scalar version ++ if (remainw) { ++ int offset = width & (int)0xfffffff0; ++ rdsty += offset; ++ rdstuv += offset; ++ rsrcy += offset; ++ rsrcuv += offset; ++ tonemap_frame_p016_p010_2_nv12(rdsty, rdstuv, ++ rsrcy, rsrcuv, ++ dstlinesize, srclinesize, ++ dstdepth, srcdepth, ++ remainw, rheight, params); ++ } ++#endif // ENABLE_TONEMAPX_AVX_INTRINSICS ++} ++ ++X86_64_V3 void tonemap_frame_p016_p010_2_p016_p010_avx(uint16_t *dsty, uint16_t *dstuv, ++ const uint16_t *srcy, const uint16_t *srcuv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++#ifdef ENABLE_TONEMAPX_AVX_INTRINSICS ++ uint16_t *rdsty = dsty; ++ uint16_t *rdstuv = dstuv; ++ const uint16_t *rsrcy = srcy; ++ const uint16_t *rsrcuv = srcuv; ++ int rheight = height; ++ // not zero when not divisible by 8 ++ // intentionally leave last pixel emtpy when input is odd ++ int remainw = width & 14; ++ ++ const int in_depth = srcdepth; ++ const int in_uv_offset = 128 << (in_depth - 8); ++ const int in_sh = in_depth - 1; ++ const int in_rnd = 1 << (in_sh - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ const int out_sh2 = 16 - out_depth; ++ ++ int cy = (*params->yuv2rgb_coeffs)[0][0][0]; ++ int crv = (*params->yuv2rgb_coeffs)[0][2][0]; ++ int cgu = (*params->yuv2rgb_coeffs)[1][1][0]; ++ int cgv = (*params->yuv2rgb_coeffs)[1][2][0]; ++ int cbu = (*params->yuv2rgb_coeffs)[2][1][0]; ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int16_t r[16], g[16], b[16]; ++ int16_t r1[16], g1[16], b1[16]; ++ __m256i in_yuv_offx8 = _mm256_set1_epi32(params->in_yuv_off); ++ __m256i in_uv_offx8 = _mm256_set1_epi32(in_uv_offset); ++ __m256i cyx8 = _mm256_set1_epi32(cy); ++ __m256i rndx8 = _mm256_set1_epi32(in_rnd); ++ ++ __m256i r0ox16, g0ox16, b0ox16; ++ __m256i y0ox16; ++ __m256i roax8, robx8, goax8, gobx8, boax8, bobx8; ++ __m256i yoax8, yobx8; ++ __m256i uvx16, uvx8a, uvx8b; ++ __m256i y0x16, y1x16; ++ __m256i y0x8a, y0x8b, y1x8a, y1x8b, ux8a, ux8b, vx8a, vx8b; ++ __m256i r0x8a, g0x8a, b0x8a, r0x8b, g0x8b, b0x8b; ++ __m256i r1x8a, g1x8a, b1x8a, r1x8b, g1x8b, b1x8b; ++ ++ __m256i r1ox16, g1ox16, b1ox16; ++ __m256i y1ox16; ++ __m256i r1oax8, r1obx8, g1oax8, g1obx8, b1oax8, b1obx8; ++ __m256i y1oax8, y1obx8, uvoax8, uvobx8, uvox16; ++ __m256i uox8, vox8, ravgx8, gavgx8, bavgx8; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0], dstuv += dstlinesize[1] / 2, ++ srcy += srclinesize[0], srcuv += srclinesize[1] / 2) { ++ for (int xx = 0; xx < width >> 4; xx++) { ++ int x = xx << 4; ++ ++ y0x16 = _mm256_lddqu_si256((__m256i*)(srcy + x)); ++ y1x16 = _mm256_lddqu_si256((__m256i*)(srcy + (srclinesize[0] / 2 + x))); ++ uvx16 = _mm256_lddqu_si256((__m256i*)(srcuv + x)); ++ ++ if (in_depth == 10) { ++ // shift to low10bits for 10bit input ++ y0x16 = _mm256_srli_epi16(y0x16, 6); ++ y1x16 = _mm256_srli_epi16(y1x16, 6); ++ uvx16 = _mm256_srli_epi16(uvx16, 6); ++ } ++ ++ y0x8a = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y0x16, 0)); ++ y0x8b = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y0x16, 1)); ++ y1x8a = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y1x16, 0)); ++ y1x8b = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(y1x16, 1)); ++ uvx8a = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(uvx16, 0)); ++ uvx8b = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(uvx16, 1)); ++ y0x8a = _mm256_sub_epi32(y0x8a, in_yuv_offx8); ++ y1x8a = _mm256_sub_epi32(y1x8a, in_yuv_offx8); ++ y0x8b = _mm256_sub_epi32(y0x8b, in_yuv_offx8); ++ y1x8b = _mm256_sub_epi32(y1x8b, in_yuv_offx8); ++ uvx8a = _mm256_sub_epi32(uvx8a, in_uv_offx8); ++ uvx8b = _mm256_sub_epi32(uvx8b, in_uv_offx8); ++ ++ ux8a = _mm256_shuffle_epi32(uvx8a, _MM_SHUFFLE(2, 2, 0, 0)); ++ ux8b = _mm256_shuffle_epi32(uvx8b, _MM_SHUFFLE(2, 2, 0, 0)); ++ vx8a = _mm256_shuffle_epi32(uvx8a, _MM_SHUFFLE(3, 3, 1, 1)); ++ vx8b = _mm256_shuffle_epi32(uvx8b, _MM_SHUFFLE(3, 3, 1, 1)); ++ ++ // r = av_clip_int16((y * cy + crv * v + in_rnd) >> in_sh); ++ r0x8a = g0x8a = b0x8a = _mm256_mullo_epi32(y0x8a, cyx8); ++ r0x8a = _mm256_add_epi32(r0x8a, _mm256_mullo_epi32(vx8a, _mm256_set1_epi32(crv))); ++ r0x8a = _mm256_add_epi32(r0x8a, rndx8); ++ r0x8a = _mm256_srai_epi32(r0x8a, in_sh); ++ r0x8a = av_clip_int16_avx(r0x8a); ++ ++ r1x8a = g1x8a = b1x8a = _mm256_mullo_epi32(y1x8a, cyx8); ++ r1x8a = _mm256_add_epi32(r1x8a, _mm256_mullo_epi32(vx8a, _mm256_set1_epi32(crv))); ++ r1x8a = _mm256_add_epi32(r1x8a, rndx8); ++ r1x8a = _mm256_srai_epi32(r1x8a, in_sh); ++ r1x8a = av_clip_int16_avx(r1x8a); ++ ++ // g = av_clip_int16((y * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g0x8a = _mm256_add_epi32(g0x8a, _mm256_mullo_epi32(ux8a, _mm256_set1_epi32(cgu))); ++ g0x8a = _mm256_add_epi32(g0x8a, _mm256_mullo_epi32(vx8a, _mm256_set1_epi32(cgv))); ++ g0x8a = _mm256_add_epi32(g0x8a, rndx8); ++ g0x8a = _mm256_srai_epi32(g0x8a, in_sh); ++ g0x8a = av_clip_int16_avx(g0x8a); ++ ++ g1x8a = _mm256_add_epi32(g1x8a, _mm256_mullo_epi32(ux8a, _mm256_set1_epi32(cgu))); ++ g1x8a = _mm256_add_epi32(g1x8a, _mm256_mullo_epi32(vx8a, _mm256_set1_epi32(cgv))); ++ g1x8a = _mm256_add_epi32(g1x8a, rndx8); ++ g1x8a = _mm256_srai_epi32(g1x8a, in_sh); ++ g1x8a = av_clip_int16_avx(g1x8a); ++ ++ // b = av_clip_int16((y * cy + cbu * u + in_rnd) >> in_sh); ++ b0x8a = _mm256_add_epi32(b0x8a, _mm256_mullo_epi32(ux8a, _mm256_set1_epi32(cbu))); ++ b0x8a = _mm256_add_epi32(b0x8a, rndx8); ++ b0x8a = _mm256_srai_epi32(b0x8a, in_sh); ++ b0x8a = av_clip_int16_avx(b0x8a); ++ ++ b1x8a = _mm256_add_epi32(b1x8a, _mm256_mullo_epi32(ux8a, _mm256_set1_epi32(cbu))); ++ b1x8a = _mm256_add_epi32(b1x8a, rndx8); ++ b1x8a = _mm256_srai_epi32(b1x8a, in_sh); ++ b1x8a = av_clip_int16_avx(b1x8a); ++ ++ r0x8b = g0x8b = b0x8b = _mm256_mullo_epi32(y0x8b, cyx8); ++ r0x8b = _mm256_add_epi32(r0x8b, _mm256_mullo_epi32(vx8b, _mm256_set1_epi32(crv))); ++ r0x8b = _mm256_add_epi32(r0x8b, rndx8); ++ r0x8b = _mm256_srai_epi32(r0x8b, in_sh); ++ r0x8b = av_clip_int16_avx(r0x8b); ++ ++ r1x8b = g1x8b = b1x8b = _mm256_mullo_epi32(y1x8b, cyx8); ++ r1x8b = _mm256_add_epi32(r1x8b, _mm256_mullo_epi32(vx8b, _mm256_set1_epi32(crv))); ++ r1x8b = _mm256_add_epi32(r1x8b, rndx8); ++ r1x8b = _mm256_srai_epi32(r1x8b, in_sh); ++ r1x8b = av_clip_int16_avx(r1x8b); ++ ++ g0x8b = _mm256_add_epi32(g0x8b, _mm256_mullo_epi32(ux8b, _mm256_set1_epi32(cgu))); ++ g0x8b = _mm256_add_epi32(g0x8b, _mm256_mullo_epi32(vx8b, _mm256_set1_epi32(cgv))); ++ g0x8b = _mm256_add_epi32(g0x8b, rndx8); ++ g0x8b = _mm256_srai_epi32(g0x8b, in_sh); ++ g0x8b = av_clip_int16_avx(g0x8b); ++ ++ g1x8b = _mm256_add_epi32(g1x8b, _mm256_mullo_epi32(ux8b, _mm256_set1_epi32(cgu))); ++ g1x8b = _mm256_add_epi32(g1x8b, _mm256_mullo_epi32(vx8b, _mm256_set1_epi32(cgv))); ++ g1x8b = _mm256_add_epi32(g1x8b, rndx8); ++ g1x8b = _mm256_srai_epi32(g1x8b, in_sh); ++ g1x8b = av_clip_int16_avx(g1x8b); ++ ++ b0x8b = _mm256_add_epi32(b0x8b, _mm256_mullo_epi32(ux8b, _mm256_set1_epi32(cbu))); ++ b0x8b = _mm256_add_epi32(b0x8b, rndx8); ++ b0x8b = _mm256_srai_epi32(b0x8b, in_sh); ++ b0x8b = av_clip_int16_avx(b0x8b); ++ ++ b1x8b = _mm256_add_epi32(b1x8b, _mm256_mullo_epi32(ux8b, _mm256_set1_epi32(cbu))); ++ b1x8b = _mm256_add_epi32(b1x8b, rndx8); ++ b1x8b = _mm256_srai_epi32(b1x8b, in_sh); ++ b1x8b = av_clip_int16_avx(b1x8b); ++ ++ tonemap_int32x8_avx(r0x8a, g0x8a, b0x8a, r, g, b, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x8_avx(r1x8a, g1x8a, b1x8a, r1, g1, b1, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x8_avx(r0x8b, g0x8b, b0x8b, &r[8], &g[8], &b[8], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x8_avx(r1x8b, g1x8b, b1x8b, &r1[8], &g1[8], &b1[8], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ ++ r0ox16 = _mm256_lddqu_si256((const __m256i_u *)r); ++ g0ox16 = _mm256_lddqu_si256((const __m256i_u *)g); ++ b0ox16 = _mm256_lddqu_si256((const __m256i_u *)b); ++ ++ roax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r0ox16, 0)); ++ goax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g0ox16, 0)); ++ boax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b0ox16, 0)); ++ ++ robx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r0ox16, 1)); ++ gobx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g0ox16, 1)); ++ bobx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b0ox16, 1)); ++ ++ yoax8 = _mm256_mullo_epi32(roax8, _mm256_set1_epi32(cry)); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_mullo_epi32(goax8, _mm256_set1_epi32(cgy))); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_mullo_epi32(boax8, _mm256_set1_epi32(cby))); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_set1_epi32(out_rnd)); ++ yoax8 = _mm256_srai_epi32(yoax8, out_sh); ++ yoax8 = _mm256_add_epi32(yoax8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ yobx8 = _mm256_mullo_epi32(robx8, _mm256_set1_epi32(cry)); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_mullo_epi32(gobx8, _mm256_set1_epi32(cgy))); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_mullo_epi32(bobx8, _mm256_set1_epi32(cby))); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_set1_epi32(out_rnd)); ++ yobx8 = _mm256_srai_epi32(yobx8, out_sh); ++ yobx8 = _mm256_add_epi32(yobx8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ y0ox16 = _mm256_packus_epi32(yoax8, yobx8); ++ y0ox16 = _mm256_permute4x64_epi64(y0ox16, _MM_SHUFFLE(3, 1, 2, 0)); ++ y0ox16 = _mm256_slli_epi16(y0ox16, out_sh2); ++ _mm256_storeu_si256((__m256i_u *) &dsty[x], y0ox16); ++ ++ r1ox16 = _mm256_lddqu_si256((const __m256i_u *)r1); ++ g1ox16 = _mm256_lddqu_si256((const __m256i_u *)g1); ++ b1ox16 = _mm256_lddqu_si256((const __m256i_u *)b1); ++ ++ r1oax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r1ox16, 0)); ++ g1oax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g1ox16, 0)); ++ b1oax8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b1ox16, 0)); ++ ++ r1obx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(r1ox16, 1)); ++ g1obx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(g1ox16, 1)); ++ b1obx8 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(b1ox16, 1)); ++ ++ y1oax8 = _mm256_mullo_epi32(r1oax8, _mm256_set1_epi32(cry)); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_mullo_epi32(g1oax8, _mm256_set1_epi32(cgy))); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_mullo_epi32(b1oax8, _mm256_set1_epi32(cby))); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_set1_epi32(out_rnd)); ++ y1oax8 = _mm256_srai_epi32(y1oax8, out_sh); ++ y1oax8 = _mm256_add_epi32(y1oax8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ y1obx8 = _mm256_mullo_epi32(r1obx8, _mm256_set1_epi32(cry)); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_mullo_epi32(g1obx8, _mm256_set1_epi32(cgy))); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_mullo_epi32(b1obx8, _mm256_set1_epi32(cby))); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_set1_epi32(out_rnd)); ++ y1obx8 = _mm256_srai_epi32(y1obx8, out_sh); ++ y1obx8 = _mm256_add_epi32(y1obx8, _mm256_set1_epi32(params->out_yuv_off)); ++ ++ y1ox16 = _mm256_packus_epi32(y1oax8, y1obx8); ++ y1ox16 = _mm256_permute4x64_epi64(y1ox16, _MM_SHUFFLE(3, 1, 2, 0)); ++ y1ox16 = _mm256_slli_epi16(y1ox16, out_sh2); ++ _mm256_storeu_si256((__m256i_u *) &dsty[x + dstlinesize[0] / 2], y1ox16); ++ ++ ravgx8 = _mm256_hadd_epi32(roax8, robx8); ++ ravgx8 = _mm256_add_epi32(ravgx8, _mm256_hadd_epi32(r1oax8, r1obx8)); ++ ravgx8 = _mm256_permute4x64_epi64(ravgx8, _MM_SHUFFLE(3, 1, 2, 0)); ++ ravgx8 = _mm256_add_epi32(ravgx8, _mm256_set1_epi32(2)); ++ ravgx8 = _mm256_srai_epi32(ravgx8, 2); ++ ++ gavgx8 = _mm256_hadd_epi32(goax8, gobx8); ++ gavgx8 = _mm256_add_epi32(gavgx8, _mm256_hadd_epi32(g1oax8, g1obx8)); ++ gavgx8 = _mm256_permute4x64_epi64(gavgx8, _MM_SHUFFLE(3, 1, 2, 0)); ++ gavgx8 = _mm256_add_epi32(gavgx8, _mm256_set1_epi32(2)); ++ gavgx8 = _mm256_srai_epi32(gavgx8, 2); ++ ++ bavgx8 = _mm256_hadd_epi32(boax8, bobx8); ++ bavgx8 = _mm256_add_epi32(bavgx8, _mm256_hadd_epi32(b1oax8, b1obx8)); ++ bavgx8 = _mm256_permute4x64_epi64(bavgx8, _MM_SHUFFLE(3, 1, 2, 0)); ++ bavgx8 = _mm256_add_epi32(bavgx8, _mm256_set1_epi32(2)); ++ bavgx8 = _mm256_srai_epi32(bavgx8, 2); ++ ++ uox8 = _mm256_add_epi32(_mm256_set1_epi32(out_rnd), _mm256_mullo_epi32(ravgx8, _mm256_set1_epi32(cru))); ++ uox8 = _mm256_add_epi32(uox8, _mm256_mullo_epi32(gavgx8, _mm256_set1_epi32(ocgu))); ++ uox8 = _mm256_add_epi32(uox8, _mm256_mullo_epi32(bavgx8, _mm256_set1_epi32(cburv))); ++ uox8 = _mm256_srai_epi32(uox8, out_sh); ++ uox8 = _mm256_add_epi32(uox8, _mm256_set1_epi32(out_uv_offset)); ++ ++ vox8 = _mm256_add_epi32(_mm256_set1_epi32(out_rnd), _mm256_mullo_epi32(ravgx8, _mm256_set1_epi32(cburv))); ++ vox8 = _mm256_add_epi32(vox8, _mm256_mullo_epi32(gavgx8, _mm256_set1_epi32(ocgv))); ++ vox8 = _mm256_add_epi32(vox8, _mm256_mullo_epi32(bavgx8, _mm256_set1_epi32(cbv))); ++ vox8 = _mm256_srai_epi32(vox8, out_sh); ++ vox8 = _mm256_add_epi32(vox8, _mm256_set1_epi32(out_uv_offset)); ++ ++ uvoax8 = _mm256_unpacklo_epi32(uox8, vox8); ++ uvobx8 = _mm256_unpackhi_epi32(uox8, vox8); ++ uvox16 = _mm256_packus_epi32(uvoax8, uvobx8); ++ uvox16 = _mm256_slli_epi16(uvox16, out_sh2); ++ _mm256_storeu_si256((__m256i_u *) &dstuv[x], uvox16); ++ } ++ } ++ ++ // Process remaining pixels cannot fill the full simd register with scalar version ++ if (remainw) { ++ int offset = width & (int)0xfffffff0; ++ rdsty += offset; ++ rdstuv += offset; ++ rsrcy += offset; ++ rsrcuv += offset; ++ tonemap_frame_p016_p010_2_p016_p010(rdsty, rdstuv, ++ rsrcy, rsrcuv, ++ dstlinesize, srclinesize, ++ dstdepth, srcdepth, ++ remainw, rheight, params); ++ } ++#endif // ENABLE_TONEMAPX_AVX_INTRINSICS ++} +Index: FFmpeg/libavfilter/x86/vf_tonemapx_intrin_avx.h +=================================================================== +--- /dev/null ++++ libavfilter/x86/vf_tonemapx_intrin_avx.h +@@ -0,0 +1,68 @@ ++/* ++ * Copyright (c) 2024 Gnattu OC ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVFILTER_X86_TONEMAPX_INTRIN_AVX_H ++#define AVFILTER_X86_TONEMAPX_INTRIN_AVX_H ++ ++#include "libavfilter/vf_tonemapx.h" ++ ++X86_64_V3 void tonemap_frame_dovi_2_420p_avx(uint8_t *dsty, uint8_t *dstu, uint8_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++X86_64_V3 void tonemap_frame_dovi_2_420p10_avx(uint16_t *dsty, uint16_t *dstu, uint16_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++X86_64_V3 void tonemap_frame_420p10_2_420p_avx(uint8_t *dsty, uint8_t *dstu, uint8_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++X86_64_V3 void tonemap_frame_420p10_2_420p10_avx(uint16_t *dsty, uint16_t *dstu, uint16_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++X86_64_V3 void tonemap_frame_p016_p010_2_nv12_avx(uint8_t *dsty, uint8_t *dstuv, ++ const uint16_t *srcy, const uint16_t *srcuv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++X86_64_V3 void tonemap_frame_p016_p010_2_p016_p010_avx(uint16_t *dsty, uint16_t *dstuv, ++ const uint16_t *srcy, const uint16_t *srcuv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++#endif // AVFILTER_X86_TONEMAPX_INTRIN_AVX_H +Index: FFmpeg/libavfilter/x86/vf_tonemapx_intrin_sse.c +=================================================================== +--- /dev/null ++++ libavfilter/x86/vf_tonemapx_intrin_sse.c +@@ -0,0 +1,2353 @@ ++/* ++ * Copyright (c) 2024 Gnattu OC ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "vf_tonemapx_intrin_sse.h" ++ ++#ifdef ENABLE_TONEMAPX_SSE_INTRINSICS ++# include ++#endif // ENABLE_TONEMAPX_SSE_INTRINSICS ++ ++#ifdef ENABLE_TONEMAPX_SSE_INTRINSICS ++// GCC 10 and below does not implement _mm_storeu_si32 with movd instruction ++// cast the register into float register and store with movss as a workaround ++#if (defined(__GNUC__) && !defined(__clang__)) && (__GNUC__ <= 10) ++__attribute__((always_inline)) ++X86_64_V2 static inline void _mm_storeu_si32(void* mem_addr, __m128i a) { ++ _mm_store_ss((float*)mem_addr, _mm_castsi128_ps(a)); ++ return; ++} ++#endif ++ ++X86_64_V2 static inline __m128i av_clip_uint16_sse(__m128i a) ++{ ++ __m128i mask = _mm_set1_epi32(0x7FFF); ++ __m128i condition = _mm_and_si128(a, _mm_set1_epi32(~0x7FFF)); ++ ++ __m128i zero = _mm_setzero_si128(); ++ __m128i cmp = _mm_cmpeq_epi32(condition, zero); ++ ++ __m128i neg_a = _mm_and_si128(_mm_srai_epi32(_mm_xor_si128(a, _mm_set1_epi32(-1)), 31), mask); ++ __m128i result = _mm_or_si128(_mm_and_si128(cmp, a), _mm_andnot_si128(cmp, neg_a)); ++ ++ return result; ++} ++ ++X86_64_V2 static inline __m128i av_clip_int16_sse(__m128i a) ++{ ++ __m128i add_result = _mm_add_epi32(a, _mm_set1_epi32(0x8000U)); ++ __m128i mask = _mm_set1_epi32(~0xFFFF); ++ __m128i condition = _mm_and_si128(add_result, mask); ++ __m128i cmp = _mm_cmpeq_epi32(condition, _mm_setzero_si128()); ++ ++ __m128i shifted = _mm_srai_epi32(a, 31); ++ __m128i xor_result = _mm_xor_si128(shifted, _mm_set1_epi32(0x7FFF)); ++ ++ return _mm_or_si128(_mm_and_si128(cmp, a), _mm_andnot_si128(cmp, xor_result)); ++} ++ ++X86_64_V2 inline static __m128 mix_float32x4(__m128 x, __m128 y, __m128 a) ++{ ++ __m128 n = _mm_sub_ps(y, x); ++ n = _mm_mul_ps(n, a); ++ n = _mm_add_ps(n, x); ++ return n; ++} ++ ++X86_64_V2 inline static float reduce_floatx4(__m128 x) { ++ x = _mm_hadd_ps(x, x); ++ x = _mm_hadd_ps(x, x); ++ return _mm_cvtss_f32(x); ++} ++ ++X86_64_V2 static inline float reshape_poly(float s, __m128 coeffs) ++{ ++ __m128 ps = _mm_set_ps(0.0f, s * s, s, 1.0f); ++ ps = _mm_mul_ps(ps, coeffs); ++ return reduce_floatx4(ps); ++} ++ ++X86_64_V2 inline static float reshape_mmr(__m128 sig, __m128 coeffs, const float* mmr, ++ int mmr_single, int min_order, int max_order) ++{ ++ float s = _mm_cvtss_f32(coeffs); ++ int mmr_idx = 0; ++ int order = 0; ++ ++ __m128 mmr_coeffs, ps; ++ __m128 sigX01 = _mm_mul_ps(sig, _mm_shuffle_ps(sig, sig, _MM_SHUFFLE(1, 1, 1, 1))); // {sig[0]*sig[1], sig[1]*sig[1], sig[2]*sig[1], sig[3]*sig[1]} ++ __m128 sigX02 = _mm_mul_ps(sig, _mm_shuffle_ps(sig, sig, _MM_SHUFFLE(2, 2, 2, 2))); // {sig[0]*sig[2], sig[1]*sig[2], sig[2]*sig[2], sig[3]*sig[2]} ++ __m128 sigX12 = _mm_mul_ps(sigX01, _mm_shuffle_ps(sig, sig, _MM_SHUFFLE(2, 2, 2, 2))); // {sig[0]*sig[1]*sig[2], sig[1]*sig[1]*sig[2], sig[2]*sig[1]*sig[2], sig[3]*sig[1]*sig[2]} ++ __m128 sigX = sigX01; // sig[0]*sig[1] now positioned at 0 ++ ++ sigX = _mm_insert_ps(sigX, sigX02, _MM_MK_INSERTPS_NDX(0, 1, 0)); // sig[0]*sig[2] at 1 ++ sigX = _mm_insert_ps(sigX, sigX02, _MM_MK_INSERTPS_NDX(1, 2, 0)); // sig[1]*sig[2] at 2 ++ sigX = _mm_insert_ps(sigX, sigX12, _MM_MK_INSERTPS_NDX(0, 3, 0)); // sig[0]*sig[1]*sig[2] at 3 ++ ++ mmr_idx = mmr_single ? 0 : (int)_mm_cvtss_f32(_mm_shuffle_ps(coeffs, coeffs, _MM_SHUFFLE(3, 2, 0, 1))); ++ order = (int)_mm_cvtss_f32(_mm_shuffle_ps(coeffs, coeffs, _MM_SHUFFLE(1, 2, 0, 3))); ++ ++ // dot first order ++ mmr_coeffs = _mm_loadu_ps(&mmr[mmr_idx + 0*4]); ++ ps = _mm_mul_ps(sig, mmr_coeffs); ++ s += reduce_floatx4(ps); ++ mmr_coeffs = _mm_loadu_ps(&mmr[mmr_idx + 1*4]); ++ ps = _mm_mul_ps(sigX, mmr_coeffs); ++ s += reduce_floatx4(ps); ++ ++ if (max_order >= 2 && (min_order >= 2 || order >= 2)) { ++ __m128 sig2 = _mm_mul_ps(sig, sig); ++ __m128 sigX2 = _mm_mul_ps(sigX, sigX); ++ ++ mmr_coeffs = _mm_loadu_ps(&mmr[mmr_idx + 2*4]); ++ ps = _mm_mul_ps(sig2, mmr_coeffs); ++ s += reduce_floatx4(ps); ++ mmr_coeffs = _mm_loadu_ps(&mmr[mmr_idx + 3*4]); ++ ps = _mm_mul_ps(sigX2, mmr_coeffs); ++ s += reduce_floatx4(ps); ++ ++ if (max_order == 3 && (min_order == 3 || order >= 3)) { ++ __m128 sig3 = _mm_mul_ps(sig2, sig); ++ __m128 sigX3 = _mm_mul_ps(sigX2, sigX); ++ ++ mmr_coeffs = _mm_loadu_ps(&mmr[mmr_idx + 4*4]); ++ ps = _mm_mul_ps(sig3, mmr_coeffs); ++ s += reduce_floatx4(ps); ++ mmr_coeffs = _mm_loadu_ps(&mmr[mmr_idx + 5*4]); ++ ps = _mm_mul_ps(sigX3, mmr_coeffs); ++ s += reduce_floatx4(ps); ++ } ++ } ++ ++ return s; ++} ++ ++#define CLAMP(a, b, c) (FFMIN(FFMAX((a), (b)), (c))) ++X86_64_V2 inline static __m128 reshape_dovi_iptpqc2(__m128 sig, const TonemapIntParams *ctx) ++{ ++ int has_mmr_poly; ++ float s; ++ ++ float *src_dovi_params = ctx->dovi_pbuf; ++ float *src_dovi_pivots = ctx->dovi_pbuf + 24; ++ float *src_dovi_coeffs = ctx->dovi_pbuf + 48; //float4* ++ float *src_dovi_mmr = ctx->dovi_pbuf + 144; //float4* ++ ++ float* dovi_params_i = src_dovi_params + 0*8; ++ float* dovi_pivots_i = src_dovi_pivots + 0*8; ++ float* dovi_coeffs_i = src_dovi_coeffs + 0 * 8 * 4; //float4* ++ float* dovi_mmr_i = src_dovi_mmr + 0 * 48 * 4; //float4* ++ int dovi_num_pivots_i = dovi_params_i[0]; ++ int dovi_has_mmr_i = dovi_params_i[1]; ++ int dovi_has_poly_i = dovi_params_i[2]; ++ int dovi_mmr_single_i = dovi_params_i[3]; ++ int dovi_min_order_i = dovi_params_i[4]; ++ int dovi_max_order_i = dovi_params_i[5]; ++ float dovi_lo_i = dovi_params_i[6]; ++ float dovi_hi_i = dovi_params_i[7]; ++ ++ float* dovi_params_p = src_dovi_params + 1*8; ++ float* dovi_coeffs_p = src_dovi_coeffs + 1*8 * 4; //float4* ++ float* dovi_mmr_p = src_dovi_mmr + 1*48 * 4; //float4* ++ int dovi_has_mmr_p = dovi_params_p[1]; ++ int dovi_has_poly_p = dovi_params_p[2]; ++ int dovi_mmr_single_p = dovi_params_p[3]; ++ int dovi_min_order_p = dovi_params_p[4]; ++ int dovi_max_order_p = dovi_params_p[5]; ++ float dovi_lo_p = dovi_params_p[6]; ++ float dovi_hi_p = dovi_params_p[7]; ++ ++ float* dovi_params_t = src_dovi_params + 2*8; ++ float* dovi_coeffs_t = src_dovi_coeffs + 2*8 * 4; //float4* ++ float* dovi_mmr_t = src_dovi_mmr + 2*48 * 4; //float4* ++ int dovi_has_mmr_t = dovi_params_t[1]; ++ int dovi_has_poly_t = dovi_params_t[2]; ++ int dovi_mmr_single_t = dovi_params_t[3]; ++ int dovi_min_order_t = dovi_params_t[4]; ++ int dovi_max_order_t = dovi_params_t[5]; ++ float dovi_lo_t = dovi_params_t[6]; ++ float dovi_hi_t = dovi_params_t[7]; ++ ++ __m128 coeffs, result; ++ ++ // reshape I ++ s = _mm_cvtss_f32(sig); ++ result = sig; ++ if (dovi_num_pivots_i > 2) { ++ __m128 m01 = mix_float32x4(_mm_loadu_ps(dovi_coeffs_i), _mm_loadu_ps(dovi_coeffs_i + 4), _mm_set1_ps(s >= dovi_pivots_i[0])); ++ __m128 m23 = mix_float32x4(_mm_loadu_ps(dovi_coeffs_i + 2*4), _mm_loadu_ps(dovi_coeffs_i + 3*4), _mm_set1_ps(s >= dovi_pivots_i[2])); ++ __m128 m0123 = mix_float32x4(m01, m23, _mm_set1_ps(s >= dovi_pivots_i[1])); ++ __m128 m45 = mix_float32x4(_mm_loadu_ps(dovi_coeffs_i + 4*4), _mm_loadu_ps(dovi_coeffs_i + 5*4), _mm_set1_ps(s >= dovi_pivots_i[4])); ++ __m128 m67 = mix_float32x4(_mm_loadu_ps(dovi_coeffs_i + 6*4), _mm_loadu_ps(dovi_coeffs_i + 7*4), _mm_set1_ps(s >= dovi_pivots_i[6])); ++ __m128 m4567 = mix_float32x4(m45, m67, _mm_set1_ps(s >= dovi_pivots_i[5])); ++ coeffs = mix_float32x4(m0123, m4567, _mm_set1_ps(s >= dovi_pivots_i[3])); ++ } else { ++ coeffs = _mm_loadu_ps(dovi_coeffs_i); ++ } ++ ++ has_mmr_poly = dovi_has_mmr_i && dovi_has_poly_i; ++ ++ if ((has_mmr_poly && _mm_cvtss_f32(_mm_shuffle_ps(coeffs, coeffs, _MM_SHUFFLE(3, 3, 3, 3))) == 0.0f) || (!has_mmr_poly && dovi_has_poly_i)) ++ s = reshape_poly(s, coeffs); ++ else ++ s = reshape_mmr(result, coeffs, dovi_mmr_i, ++ dovi_mmr_single_i, dovi_min_order_i, dovi_max_order_i); ++ ++ result = _mm_insert_ps(result, _mm_set1_ps(CLAMP(s, dovi_lo_i, dovi_hi_i)), _MM_MK_INSERTPS_NDX(0, 0, 0)); ++ ++ // reshape P ++ s = _mm_cvtss_f32(_mm_shuffle_ps(sig, sig, _MM_SHUFFLE(1, 1, 1, 1))); ++ coeffs = _mm_loadu_ps(dovi_coeffs_p); ++ has_mmr_poly = dovi_has_mmr_p && dovi_has_poly_p; ++ ++ if ((has_mmr_poly && _mm_cvtss_f32(_mm_shuffle_ps(coeffs, coeffs, _MM_SHUFFLE(3, 3, 3, 3))) == 0.0f) || (!has_mmr_poly && dovi_has_poly_p)) ++ s = reshape_poly(s, coeffs); ++ else ++ s = reshape_mmr(result, coeffs, dovi_mmr_p, ++ dovi_mmr_single_p, dovi_min_order_p, dovi_max_order_p); ++ ++ result = _mm_insert_ps(result, _mm_set1_ps(CLAMP(s, dovi_lo_p, dovi_hi_p)), _MM_MK_INSERTPS_NDX(0, 1, 0)); ++ ++ // reshape T ++ s = _mm_cvtss_f32(_mm_shuffle_ps(sig, sig, _MM_SHUFFLE(2, 2, 2, 2))); ++ coeffs = _mm_loadu_ps(dovi_coeffs_t); ++ has_mmr_poly = dovi_has_mmr_t && dovi_has_poly_t; ++ ++ if ((has_mmr_poly && _mm_cvtss_f32(_mm_shuffle_ps(coeffs, coeffs, _MM_SHUFFLE(3, 3, 3, 3))) == 0.0f) || (!has_mmr_poly && dovi_has_poly_t)) ++ s = reshape_poly(s, coeffs); ++ else ++ s = reshape_mmr(result, coeffs, dovi_mmr_t, ++ dovi_mmr_single_t, dovi_min_order_t, dovi_max_order_t); ++ ++ result = _mm_insert_ps(result, _mm_set1_ps(CLAMP(s, dovi_lo_t, dovi_hi_t)), _MM_MK_INSERTPS_NDX(0, 2, 0)); ++ ++ return result; ++} ++ ++X86_64_V2 inline static void ycc2rgbx4(__m128* dy, __m128* dcb, __m128* dcr, ++ __m128 y, __m128 cb, __m128 cr, ++ const double nonlinear[3][3], const float ycc_offset[3]) ++{ ++ *dy = _mm_mul_ps(y, _mm_set1_ps((float)nonlinear[0][0])); ++ *dy = _mm_add_ps(*dy, _mm_mul_ps(cb, _mm_set1_ps((float)nonlinear[0][1]))); ++ *dy = _mm_add_ps(*dy, _mm_mul_ps(cr, _mm_set1_ps((float)nonlinear[0][2]))); ++ *dy = _mm_sub_ps(*dy, _mm_set1_ps(ycc_offset[0])); ++ ++ *dcb = _mm_mul_ps(y, _mm_set1_ps((float)nonlinear[1][0])); ++ *dcb = _mm_add_ps(*dcb, _mm_mul_ps(cb, _mm_set1_ps((float)nonlinear[1][1]))); ++ *dcb = _mm_add_ps(*dcb, _mm_mul_ps(cr, _mm_set1_ps((float)nonlinear[1][2]))); ++ *dcb = _mm_sub_ps(*dcb, _mm_set1_ps(ycc_offset[1])); ++ ++ *dcr = _mm_mul_ps(y, _mm_set1_ps((float)nonlinear[2][0])); ++ *dcr = _mm_add_ps(*dcr, _mm_mul_ps(cb, _mm_set1_ps((float)nonlinear[2][1]))); ++ *dcr = _mm_add_ps(*dcr, _mm_mul_ps(cr, _mm_set1_ps((float)nonlinear[2][2]))); ++ *dcr = _mm_sub_ps(*dcr, _mm_set1_ps(ycc_offset[2])); ++} ++ ++X86_64_V2 inline static void lms2rgbx4(__m128* dl, __m128* dm, __m128* ds, ++ __m128 l, __m128 m, __m128 s, ++ const double lms2rgb_matrix[3][3]) ++{ ++ *dl = _mm_mul_ps(l, _mm_set1_ps((float)lms2rgb_matrix[0][0])); ++ *dl = _mm_add_ps(*dl, _mm_mul_ps(m, _mm_set1_ps((float)lms2rgb_matrix[0][1]))); ++ *dl = _mm_add_ps(*dl, _mm_mul_ps(s, _mm_set1_ps((float)lms2rgb_matrix[0][2]))); ++ ++ *dm = _mm_mul_ps(l, _mm_set1_ps((float)lms2rgb_matrix[1][0])); ++ *dm = _mm_add_ps(*dm, _mm_mul_ps(m, _mm_set1_ps((float)lms2rgb_matrix[1][1]))); ++ *dm = _mm_add_ps(*dm, _mm_mul_ps(s, _mm_set1_ps((float)lms2rgb_matrix[1][2]))); ++ ++ *ds = _mm_mul_ps(l, _mm_set1_ps((float)lms2rgb_matrix[2][0])); ++ *ds = _mm_add_ps(*ds, _mm_mul_ps(m, _mm_set1_ps((float)lms2rgb_matrix[2][1]))); ++ *ds = _mm_add_ps(*ds, _mm_mul_ps(s, _mm_set1_ps((float)lms2rgb_matrix[2][2]))); ++} ++ ++X86_64_V2 static inline void tonemap_int32x4_sse(__m128i r_in, __m128i g_in, __m128i b_in, ++ int16_t *r_out, int16_t *g_out, int16_t *b_out, ++ float *lin_lut, float *tonemap_lut, uint16_t *delin_lut, ++ const AVLumaCoefficients *coeffs, ++ const AVLumaCoefficients *ocoeffs, double desat, ++ double (*rgb2rgb)[3][3], ++ int rgb2rgb_passthrough) ++{ ++ __m128i sig4; ++ __m128 mapvalx4, r_linx4, g_linx4, b_linx4; ++ __m128 offset = _mm_set1_ps(0.5f); ++ __m128i input_lut_offset = _mm_set1_epi32(2048); ++ __m128 intermediate_upper_bound = _mm_set1_ps(32767.0f); ++ __m128i r, g, b, rx4, gx4, bx4; ++ ++ float mapval4[4], r_lin4[4], g_lin4[4], b_lin4[4]; ++ ++ sig4 = _mm_max_epi32(r_in, _mm_max_epi32(g_in, b_in)); ++ sig4 = _mm_add_epi32(sig4, input_lut_offset); ++ sig4 = av_clip_uint16_sse(sig4); ++ ++ r = _mm_add_epi32(r_in, input_lut_offset); ++ r = av_clip_uint16_sse(r); ++ g = _mm_add_epi32(g_in, input_lut_offset); ++ g = av_clip_uint16_sse(g); ++ b = _mm_add_epi32(b_in, input_lut_offset); ++ b = av_clip_uint16_sse(b); ++ ++ // Cannot use loop here as the lane has to be compile-time constant ++#define LOAD_LUT(i) mapval4[i] = tonemap_lut[_mm_extract_epi32(sig4, i)]; \ ++r_lin4[i] = lin_lut[_mm_extract_epi32(r, i)]; \ ++g_lin4[i] = lin_lut[_mm_extract_epi32(g, i)]; \ ++b_lin4[i] = lin_lut[_mm_extract_epi32(b, i)]; ++ ++ LOAD_LUT(0) ++ LOAD_LUT(1) ++ LOAD_LUT(2) ++ LOAD_LUT(3) ++ ++#undef LOAD_LUT ++ ++ mapvalx4 = _mm_loadu_ps(mapval4); ++ r_linx4 = _mm_loadu_ps(r_lin4); ++ g_linx4 = _mm_loadu_ps(g_lin4); ++ b_linx4 = _mm_loadu_ps(b_lin4); ++ ++ if (!rgb2rgb_passthrough) { ++ r_linx4 = _mm_mul_ps(r_linx4, _mm_set1_ps((float)(*rgb2rgb)[0][0])); ++ r_linx4 = _mm_add_ps(r_linx4, _mm_mul_ps(g_linx4, _mm_set1_ps((float)(*rgb2rgb)[0][1]))); ++ r_linx4 = _mm_add_ps(r_linx4, _mm_mul_ps(b_linx4, _mm_set1_ps((float)(*rgb2rgb)[0][2]))); ++ ++ g_linx4 = _mm_mul_ps(g_linx4, _mm_set1_ps((float)(*rgb2rgb)[1][1])); ++ g_linx4 = _mm_add_ps(g_linx4, _mm_mul_ps(r_linx4, _mm_set1_ps((float)(*rgb2rgb)[1][0]))); ++ g_linx4 = _mm_add_ps(g_linx4, _mm_mul_ps(b_linx4, _mm_set1_ps((float)(*rgb2rgb)[1][2]))); ++ ++ b_linx4 = _mm_mul_ps(b_linx4, _mm_set1_ps((float)(*rgb2rgb)[2][2])); ++ b_linx4 = _mm_add_ps(b_linx4, _mm_mul_ps(r_linx4, _mm_set1_ps((float)(*rgb2rgb)[2][0]))); ++ b_linx4 = _mm_add_ps(b_linx4, _mm_mul_ps(g_linx4, _mm_set1_ps((float)(*rgb2rgb)[2][1]))); ++ } ++ ++ if (desat > 0) { ++ __m128 eps_x4 = _mm_set1_ps(FLOAT_EPS); ++ __m128 desat4 = _mm_set1_ps((float)desat); ++ __m128 luma4 = _mm_set1_ps(0); ++ __m128 overbright4; ++ ++ luma4 = _mm_add_ps(luma4, _mm_mul_ps(r_linx4, _mm_set1_ps((float)av_q2d(coeffs->cr)))); ++ luma4 = _mm_add_ps(luma4, _mm_mul_ps(g_linx4, _mm_set1_ps((float)av_q2d(coeffs->cg)))); ++ luma4 = _mm_add_ps(luma4, _mm_mul_ps(b_linx4, _mm_set1_ps((float)av_q2d(coeffs->cb)))); ++ overbright4 = _mm_div_ps(_mm_max_ps(_mm_sub_ps(luma4, desat4), eps_x4), _mm_max_ps(luma4, eps_x4)); ++ r_linx4 = _mm_sub_ps(r_linx4, _mm_mul_ps(r_linx4, overbright4)); ++ r_linx4 = _mm_add_ps(r_linx4, _mm_mul_ps(luma4, overbright4)); ++ g_linx4 = _mm_sub_ps(g_linx4, _mm_mul_ps(g_linx4, overbright4)); ++ g_linx4 = _mm_add_ps(g_linx4, _mm_mul_ps(luma4, overbright4)); ++ b_linx4 = _mm_sub_ps(b_linx4, _mm_mul_ps(b_linx4, overbright4)); ++ b_linx4 = _mm_add_ps(b_linx4, _mm_mul_ps(luma4, overbright4)); ++ } ++ ++ r_linx4 = _mm_mul_ps(r_linx4, mapvalx4); ++ g_linx4 = _mm_mul_ps(g_linx4, mapvalx4); ++ b_linx4 = _mm_mul_ps(b_linx4, mapvalx4); ++ ++ r_linx4 = _mm_mul_ps(r_linx4, intermediate_upper_bound); ++ r_linx4 = _mm_add_ps(r_linx4, offset); ++ ++ g_linx4 = _mm_mul_ps(g_linx4, intermediate_upper_bound); ++ g_linx4 = _mm_add_ps(g_linx4, offset); ++ ++ b_linx4 = _mm_mul_ps(b_linx4, intermediate_upper_bound); ++ b_linx4 = _mm_add_ps(b_linx4, offset); ++ ++ rx4 = _mm_cvttps_epi32(r_linx4); ++ rx4 = av_clip_uint16_sse(rx4); ++ gx4 = _mm_cvttps_epi32(g_linx4); ++ gx4 = av_clip_uint16_sse(gx4); ++ bx4 = _mm_cvttps_epi32(b_linx4); ++ bx4 = av_clip_uint16_sse(bx4); ++ ++#define SAVE_COLOR(i) r_out[i] = delin_lut[_mm_extract_epi32(rx4, i)]; \ ++g_out[i] = delin_lut[_mm_extract_epi32(gx4, i)]; \ ++b_out[i] = delin_lut[_mm_extract_epi32(bx4, i)]; ++ ++ SAVE_COLOR(0) ++ SAVE_COLOR(1) ++ SAVE_COLOR(2) ++ SAVE_COLOR(3) ++ ++#undef SAVE_COLOR ++} ++#endif // ENABLE_TONEMAPX_SSE_INTRINSICS ++ ++X86_64_V2 void tonemap_frame_dovi_2_420p_sse(uint8_t *dsty, uint8_t *dstu, uint8_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++#ifdef ENABLE_TONEMAPX_SSE_INTRINSICS ++ uint8_t *rdsty = dsty; ++ uint8_t *rdstu = dstu; ++ uint8_t *rdstv = dstv; ++ ++ const uint16_t *rsrcy = srcy; ++ const uint16_t *rsrcu = srcu; ++ const uint16_t *rsrcv = srcv; ++ ++ int rheight = height; ++ // not zero when not divisible by 8 ++ // intentionally leave last pixel emtpy when input is odd ++ int remainw = width & 6; ++ ++ const int in_depth = srcdepth; ++ const float in_rng = (float)((1 << in_depth) - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int16_t r[8], g[8], b[8]; ++ int16_t r1[8], g1[8], b1[8]; ++ ++ __m128i zero128 = _mm_setzero_si128(); ++ __m128i ux4, vx4; ++ __m128i y0x8, y1x8; ++ __m128i y0x4a, y0x4b, y1x4a, y1x4b, ux4a, ux4b, vx4a, vx4b; ++ __m128i r0x4a, g0x4a, b0x4a, r0x4b, g0x4b, b0x4b; ++ __m128i r1x4a, g1x4a, b1x4a, r1x4b, g1x4b, b1x4b; ++ ++ __m128i r0ox8, g0ox8, b0ox8; ++ __m128i y0ox8; ++ __m128i roax4, robx4, goax4, gobx4, boax4, bobx4; ++ __m128i yoax4, yobx4; ++ ++ __m128i r1ox8, g1ox8, b1ox8; ++ __m128i y1ox8; ++ __m128i r1oax4, r1obx4, g1oax4, g1obx4, b1oax4, b1obx4; ++ __m128i y1oax4, y1obx4; ++ __m128i uox4, vox4, ravgx4, gavgx4, bavgx4; ++ ++ __m128 ipt0, ipt1, ipt2, ipt3; ++ __m128 ia1, ib1, ia2, ib2; ++ __m128 ix4, px4, tx4; ++ __m128 lx4, mx4, sx4; ++ __m128 rx4a, gx4a, bx4a, rx4b, gx4b, bx4b; ++ __m128 y0x4af, y0x4bf, y1x4af, y1x4bf, ux4af, ux4bf, vx4af, vx4bf; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0] * 2, dstu += dstlinesize[1], dstv += dstlinesize[2], ++ srcy += srclinesize[0], srcu += srclinesize[1] / 2, srcv += srclinesize[2] / 2) { ++ for (int xx = 0; xx < width >> 3; xx++) { ++ int x = xx << 3; ++ ++ y0x8 = _mm_lddqu_si128((__m128i*)(srcy + x)); ++ y1x8 = _mm_lddqu_si128((__m128i*)(srcy + (srclinesize[0] / 2 + x))); ++ ux4 = _mm_loadu_si64((__m128i*)(srcu + (x >> 1))); ++ vx4 = _mm_loadu_si64((__m128i*)(srcv + (x >> 1))); ++ ++ y0x4a = _mm_cvtepu16_epi32(y0x8); ++ y0x4b = _mm_unpackhi_epi16(y0x8, zero128); ++ y1x4a = _mm_cvtepu16_epi32(y1x8); ++ y1x4b = _mm_unpackhi_epi16(y1x8, zero128); ++ ux4 = _mm_cvtepu16_epi32(ux4); ++ vx4 = _mm_cvtepu16_epi32(vx4); ++ ++ ux4a = _mm_unpacklo_epi32(ux4, ux4); ++ ux4b = _mm_unpackhi_epi32(ux4, ux4); ++ vx4a = _mm_unpacklo_epi32(vx4, vx4); ++ vx4b = _mm_unpackhi_epi32(vx4, vx4); ++ ++ y0x4af = _mm_cvtepi32_ps(y0x4a); ++ y0x4bf = _mm_cvtepi32_ps(y0x4b); ++ y1x4af = _mm_cvtepi32_ps(y1x4a); ++ y1x4bf = _mm_cvtepi32_ps(y1x4b); ++ ux4af = _mm_cvtepi32_ps(ux4a); ++ ux4bf = _mm_cvtepi32_ps(ux4b); ++ vx4af = _mm_cvtepi32_ps(vx4a); ++ vx4bf = _mm_cvtepi32_ps(vx4b); ++ ++ y0x4af = _mm_div_ps(y0x4af, _mm_set1_ps(in_rng)); ++ y0x4bf = _mm_div_ps(y0x4bf, _mm_set1_ps(in_rng)); ++ y1x4af = _mm_div_ps(y1x4af, _mm_set1_ps(in_rng)); ++ y1x4bf = _mm_div_ps(y1x4bf, _mm_set1_ps(in_rng)); ++ ux4af = _mm_div_ps(ux4af, _mm_set1_ps(in_rng)); ++ ux4bf = _mm_div_ps(ux4bf, _mm_set1_ps(in_rng)); ++ vx4af = _mm_div_ps(vx4af, _mm_set1_ps(in_rng)); ++ vx4bf = _mm_div_ps(vx4bf, _mm_set1_ps(in_rng)); ++ ++ // Reshape y0x4a ++ ia1 = _mm_unpacklo_ps(y0x4af, ux4af); ++ ia2 = _mm_unpackhi_ps(y0x4af, ux4af); ++ ib1 = _mm_unpacklo_ps(vx4af, _mm_setzero_ps()); ++ ib2 = _mm_unpackhi_ps(vx4af, _mm_setzero_ps()); ++ ipt0 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(1, 0, 1, 0)); ++ ipt1 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(3, 2, 3, 2)); ++ ipt2 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(1, 0, 1, 0)); ++ ipt3 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(3, 2, 3, 2)); ++ ++ ipt0 = reshape_dovi_iptpqc2(ipt0, params); ++ ipt1 = reshape_dovi_iptpqc2(ipt1, params); ++ ipt2 = reshape_dovi_iptpqc2(ipt2, params); ++ ipt3 = reshape_dovi_iptpqc2(ipt3, params); ++ ++ ipt0 = _mm_shuffle_ps(ipt0, ipt0, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt1 = _mm_shuffle_ps(ipt1, ipt1, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt2 = _mm_shuffle_ps(ipt2, ipt2, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt3 = _mm_shuffle_ps(ipt3, ipt3, _MM_SHUFFLE(3, 1, 2, 0)); ++ ++ ia1 = _mm_unpacklo_ps(ipt0, ipt1); ++ ia2 = _mm_unpacklo_ps(ipt2, ipt3); ++ ib1 = _mm_unpackhi_ps(ipt0, ipt1); ++ ib2 = _mm_unpackhi_ps(ipt2, ipt3); ++ ++ ix4 = _mm_shuffle_ps(ia1, ia2, _MM_SHUFFLE(1, 0, 1, 0)); ++ px4 = _mm_shuffle_ps(ib1, ib2, _MM_SHUFFLE(1, 0, 1, 0)); ++ tx4 = _mm_shuffle_ps(ia1, ia2, _MM_SHUFFLE(3, 2, 3, 2)); ++ ++ ycc2rgbx4(&lx4, &mx4, &sx4, ix4, px4, tx4, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx4(&rx4a, &gx4a, &bx4a, lx4, mx4, sx4, *params->lms2rgb_matrix); ++ ++ rx4a = _mm_mul_ps(rx4a, _mm_set1_ps(28672.0f)); ++ gx4a = _mm_mul_ps(gx4a, _mm_set1_ps(28672.0f)); ++ bx4a = _mm_mul_ps(bx4a, _mm_set1_ps(28672.0f)); ++ ++ r0x4a = _mm_cvtps_epi32(rx4a); ++ g0x4a = _mm_cvtps_epi32(gx4a); ++ b0x4a = _mm_cvtps_epi32(bx4a); ++ ++ // Reshape y1x4a ++ ia1 = _mm_unpacklo_ps(y1x4af, ux4af); ++ ia2 = _mm_unpackhi_ps(y1x4af, ux4af); ++ ib1 = _mm_unpacklo_ps(vx4af, _mm_setzero_ps()); ++ ib2 = _mm_unpackhi_ps(vx4af, _mm_setzero_ps()); ++ ipt0 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(1, 0, 1, 0)); ++ ipt1 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(3, 2, 3, 2)); ++ ipt2 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(1, 0, 1, 0)); ++ ipt3 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(3, 2, 3, 2)); ++ ++ ipt0 = reshape_dovi_iptpqc2(ipt0, params); ++ ipt1 = reshape_dovi_iptpqc2(ipt1, params); ++ ipt2 = reshape_dovi_iptpqc2(ipt2, params); ++ ipt3 = reshape_dovi_iptpqc2(ipt3, params); ++ ++ ipt0 = _mm_shuffle_ps(ipt0, ipt0, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt1 = _mm_shuffle_ps(ipt1, ipt1, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt2 = _mm_shuffle_ps(ipt2, ipt2, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt3 = _mm_shuffle_ps(ipt3, ipt3, _MM_SHUFFLE(3, 1, 2, 0)); ++ ++ ia1 = _mm_unpacklo_ps(ipt0, ipt1); ++ ia2 = _mm_unpacklo_ps(ipt2, ipt3); ++ ib1 = _mm_unpackhi_ps(ipt0, ipt1); ++ ib2 = _mm_unpackhi_ps(ipt2, ipt3); ++ ++ ix4 = _mm_shuffle_ps(ia1, ia2, _MM_SHUFFLE(1, 0, 1, 0)); ++ px4 = _mm_shuffle_ps(ib1, ib2, _MM_SHUFFLE(1, 0, 1, 0)); ++ tx4 = _mm_shuffle_ps(ia1, ia2, _MM_SHUFFLE(3, 2, 3, 2)); ++ ++ ycc2rgbx4(&lx4, &mx4, &sx4, ix4, px4, tx4, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx4(&rx4a, &gx4a, &bx4a, lx4, mx4, sx4, *params->lms2rgb_matrix); ++ ++ rx4a = _mm_mul_ps(rx4a, _mm_set1_ps(28672.0f)); ++ gx4a = _mm_mul_ps(gx4a, _mm_set1_ps(28672.0f)); ++ bx4a = _mm_mul_ps(bx4a, _mm_set1_ps(28672.0f)); ++ ++ r1x4a = _mm_cvtps_epi32(rx4a); ++ g1x4a = _mm_cvtps_epi32(gx4a); ++ b1x4a = _mm_cvtps_epi32(bx4a); ++ ++ // Reshape y0x4b ++ ia1 = _mm_unpacklo_ps(y0x4bf, ux4bf); ++ ia2 = _mm_unpackhi_ps(y0x4bf, ux4bf); ++ ib1 = _mm_unpacklo_ps(vx4bf, _mm_setzero_ps()); ++ ib2 = _mm_unpackhi_ps(vx4bf, _mm_setzero_ps()); ++ ipt0 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(1, 0, 1, 0)); ++ ipt1 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(3, 2, 3, 2)); ++ ipt2 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(1, 0, 1, 0)); ++ ipt3 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(3, 2, 3, 2)); ++ ++ ipt0 = reshape_dovi_iptpqc2(ipt0, params); ++ ipt1 = reshape_dovi_iptpqc2(ipt1, params); ++ ipt2 = reshape_dovi_iptpqc2(ipt2, params); ++ ipt3 = reshape_dovi_iptpqc2(ipt3, params); ++ ++ ipt0 = _mm_shuffle_ps(ipt0, ipt0, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt1 = _mm_shuffle_ps(ipt1, ipt1, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt2 = _mm_shuffle_ps(ipt2, ipt2, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt3 = _mm_shuffle_ps(ipt3, ipt3, _MM_SHUFFLE(3, 1, 2, 0)); ++ ++ ia1 = _mm_unpacklo_ps(ipt0, ipt1); ++ ia2 = _mm_unpacklo_ps(ipt2, ipt3); ++ ib1 = _mm_unpackhi_ps(ipt0, ipt1); ++ ib2 = _mm_unpackhi_ps(ipt2, ipt3); ++ ++ ix4 = _mm_shuffle_ps(ia1, ia2, _MM_SHUFFLE(1, 0, 1, 0)); ++ px4 = _mm_shuffle_ps(ib1, ib2, _MM_SHUFFLE(1, 0, 1, 0)); ++ tx4 = _mm_shuffle_ps(ia1, ia2, _MM_SHUFFLE(3, 2, 3, 2)); ++ ++ ycc2rgbx4(&lx4, &mx4, &sx4, ix4, px4, tx4, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx4(&rx4b, &gx4b, &bx4b, lx4, mx4, sx4, *params->lms2rgb_matrix); ++ ++ rx4b = _mm_mul_ps(rx4b, _mm_set1_ps(28672.0f)); ++ gx4b = _mm_mul_ps(gx4b, _mm_set1_ps(28672.0f)); ++ bx4b = _mm_mul_ps(bx4b, _mm_set1_ps(28672.0f)); ++ ++ r0x4b = _mm_cvtps_epi32(rx4b); ++ g0x4b = _mm_cvtps_epi32(gx4b); ++ b0x4b = _mm_cvtps_epi32(bx4b); ++ ++ // Reshape y1x4b ++ ia1 = _mm_unpacklo_ps(y1x4bf, ux4bf); ++ ia2 = _mm_unpackhi_ps(y1x4bf, ux4bf); ++ ib1 = _mm_unpacklo_ps(vx4bf, _mm_setzero_ps()); ++ ib2 = _mm_unpackhi_ps(vx4bf, _mm_setzero_ps()); ++ ipt0 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(1, 0, 1, 0)); ++ ipt1 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(3, 2, 3, 2)); ++ ipt2 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(1, 0, 1, 0)); ++ ipt3 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(3, 2, 3, 2)); ++ ++ ipt0 = reshape_dovi_iptpqc2(ipt0, params); ++ ipt1 = reshape_dovi_iptpqc2(ipt1, params); ++ ipt2 = reshape_dovi_iptpqc2(ipt2, params); ++ ipt3 = reshape_dovi_iptpqc2(ipt3, params); ++ ++ ipt0 = _mm_shuffle_ps(ipt0, ipt0, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt1 = _mm_shuffle_ps(ipt1, ipt1, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt2 = _mm_shuffle_ps(ipt2, ipt2, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt3 = _mm_shuffle_ps(ipt3, ipt3, _MM_SHUFFLE(3, 1, 2, 0)); ++ ++ ia1 = _mm_unpacklo_ps(ipt0, ipt1); ++ ia2 = _mm_unpacklo_ps(ipt2, ipt3); ++ ib1 = _mm_unpackhi_ps(ipt0, ipt1); ++ ib2 = _mm_unpackhi_ps(ipt2, ipt3); ++ ++ ix4 = _mm_shuffle_ps(ia1, ia2, _MM_SHUFFLE(1, 0, 1, 0)); ++ px4 = _mm_shuffle_ps(ib1, ib2, _MM_SHUFFLE(1, 0, 1, 0)); ++ tx4 = _mm_shuffle_ps(ia1, ia2, _MM_SHUFFLE(3, 2, 3, 2)); ++ ++ ycc2rgbx4(&lx4, &mx4, &sx4, ix4, px4, tx4, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx4(&rx4b, &gx4b, &bx4b, lx4, mx4, sx4, *params->lms2rgb_matrix); ++ ++ rx4b = _mm_mul_ps(rx4b, _mm_set1_ps(28672.0f)); ++ gx4b = _mm_mul_ps(gx4b, _mm_set1_ps(28672.0f)); ++ bx4b = _mm_mul_ps(bx4b, _mm_set1_ps(28672.0f)); ++ ++ r1x4b = _mm_cvtps_epi32(rx4b); ++ g1x4b = _mm_cvtps_epi32(gx4b); ++ b1x4b = _mm_cvtps_epi32(bx4b); ++ ++ tonemap_int32x4_sse(r0x4a, g0x4a, b0x4a, r, g, b, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x4_sse(r1x4a, g1x4a, b1x4a, r1, g1, b1, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x4_sse(r0x4b, g0x4b, b0x4b, &r[4], &g[4], &b[4], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x4_sse(r1x4b, g1x4b, b1x4b, &r1[4], &g1[4], &b1[4], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ ++ r0ox8 = _mm_lddqu_si128((const __m128i_u *)r); ++ g0ox8 = _mm_lddqu_si128((const __m128i_u *)g); ++ b0ox8 = _mm_lddqu_si128((const __m128i_u *)b); ++ ++ roax4 = _mm_cvtepi16_epi32(r0ox8); ++ goax4 = _mm_cvtepi16_epi32(g0ox8); ++ boax4 = _mm_cvtepi16_epi32(b0ox8); ++ ++ robx4 = _mm_unpackhi_epi16(r0ox8, zero128); ++ gobx4 = _mm_unpackhi_epi16(g0ox8, zero128); ++ bobx4 = _mm_unpackhi_epi16(b0ox8, zero128); ++ ++ yoax4 = _mm_mullo_epi32(roax4, _mm_set1_epi32(cry)); ++ yoax4 = _mm_add_epi32(yoax4, _mm_mullo_epi32(goax4, _mm_set1_epi32(cgy))); ++ yoax4 = _mm_add_epi32(yoax4, _mm_mullo_epi32(boax4, _mm_set1_epi32(cby))); ++ yoax4 = _mm_add_epi32(yoax4, _mm_set1_epi32(out_rnd)); ++ // output shift bits for 8bit outputs is 29 - 8 = 21 ++ yoax4 = _mm_srai_epi32(yoax4, 21); ++ yoax4 = _mm_add_epi32(yoax4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ yobx4 = _mm_mullo_epi32(robx4, _mm_set1_epi32(cry)); ++ yobx4 = _mm_add_epi32(yobx4, _mm_mullo_epi32(gobx4, _mm_set1_epi32(cgy))); ++ yobx4 = _mm_add_epi32(yobx4, _mm_mullo_epi32(bobx4, _mm_set1_epi32(cby))); ++ yobx4 = _mm_add_epi32(yobx4, _mm_set1_epi32(out_rnd)); ++ yobx4 = _mm_srai_epi32(yobx4, 21); ++ yobx4 = _mm_add_epi32(yobx4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ y0ox8 = _mm_packs_epi32(yoax4, yobx4); ++ _mm_storeu_si64(&dsty[x], _mm_packus_epi16(y0ox8, zero128)); ++ ++ r1ox8 = _mm_lddqu_si128((const __m128i_u *)r1); ++ g1ox8 = _mm_lddqu_si128((const __m128i_u *)g1); ++ b1ox8 = _mm_lddqu_si128((const __m128i_u *)b1); ++ ++ r1oax4 = _mm_cvtepi16_epi32(r1ox8); ++ g1oax4 = _mm_cvtepi16_epi32(g1ox8); ++ b1oax4 = _mm_cvtepi16_epi32(b1ox8); ++ ++ r1obx4 = _mm_unpackhi_epi16(r1ox8, zero128); ++ g1obx4 = _mm_unpackhi_epi16(g1ox8, zero128); ++ b1obx4 = _mm_unpackhi_epi16(b1ox8, zero128); ++ ++ y1oax4 = _mm_mullo_epi32(r1oax4, _mm_set1_epi32(cry)); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_mullo_epi32(g1oax4, _mm_set1_epi32(cgy))); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_mullo_epi32(b1oax4, _mm_set1_epi32(cby))); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_set1_epi32(out_rnd)); ++ y1oax4 = _mm_srai_epi32(y1oax4, 21); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ y1obx4 = _mm_mullo_epi32(r1obx4, _mm_set1_epi32(cry)); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_mullo_epi32(g1obx4, _mm_set1_epi32(cgy))); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_mullo_epi32(b1obx4, _mm_set1_epi32(cby))); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_set1_epi32(out_rnd)); ++ y1obx4 = _mm_srai_epi32(y1obx4, 21); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ y1ox8 = _mm_packs_epi32(y1oax4, y1obx4); ++ _mm_storeu_si64(&dsty[x + dstlinesize[0]], _mm_packus_epi16(y1ox8, zero128)); ++ ++ ravgx4 = _mm_hadd_epi32(roax4, robx4); ++ ravgx4 = _mm_add_epi32(ravgx4, _mm_hadd_epi32(r1oax4, r1obx4)); ++ ravgx4 = _mm_add_epi32(ravgx4, _mm_set1_epi32(2)); ++ ravgx4 = _mm_srai_epi32(ravgx4, 2); ++ ++ gavgx4 = _mm_hadd_epi32(goax4, gobx4); ++ gavgx4 = _mm_add_epi32(gavgx4, _mm_hadd_epi32(g1oax4, g1obx4)); ++ gavgx4 = _mm_add_epi32(gavgx4, _mm_set1_epi32(2)); ++ gavgx4 = _mm_srai_epi32(gavgx4, 2); ++ ++ bavgx4 = _mm_hadd_epi32(boax4, bobx4); ++ bavgx4 = _mm_add_epi32(bavgx4, _mm_hadd_epi32(b1oax4, b1obx4)); ++ bavgx4 = _mm_add_epi32(bavgx4, _mm_set1_epi32(2)); ++ bavgx4 = _mm_srai_epi32(bavgx4, 2); ++ ++ uox4 = _mm_add_epi32(_mm_set1_epi32(out_rnd), _mm_mullo_epi32(ravgx4, _mm_set1_epi32(cru))); ++ uox4 = _mm_add_epi32(uox4, _mm_mullo_epi32(gavgx4, _mm_set1_epi32(ocgu))); ++ uox4 = _mm_add_epi32(uox4, _mm_mullo_epi32(bavgx4, _mm_set1_epi32(cburv))); ++ uox4 = _mm_srai_epi32(uox4, 21); ++ uox4 = _mm_add_epi32(uox4, _mm_set1_epi32(out_uv_offset)); ++ _mm_storeu_si32(&dstu[x >> 1], _mm_packus_epi16(_mm_packs_epi32(uox4, zero128), zero128)); ++ ++ vox4 = _mm_add_epi32(_mm_set1_epi32(out_rnd), _mm_mullo_epi32(ravgx4, _mm_set1_epi32(cburv))); ++ vox4 = _mm_add_epi32(vox4, _mm_mullo_epi32(gavgx4, _mm_set1_epi32(ocgv))); ++ vox4 = _mm_add_epi32(vox4, _mm_mullo_epi32(bavgx4, _mm_set1_epi32(cbv))); ++ vox4 = _mm_srai_epi32(vox4, 21); ++ vox4 = _mm_add_epi32(vox4, _mm_set1_epi32(out_uv_offset)); ++ _mm_storeu_si32(&dstv[x >> 1], _mm_packus_epi16(_mm_packs_epi32(vox4, zero128), zero128)); ++ } ++ } ++ ++ // Process remaining pixels cannot fill the full simd register with scalar version ++ if (remainw) { ++ int offset = width & (int)0xfffffff8; ++ rdsty += offset; ++ rdstu += offset >> 1; ++ rdstv += offset >> 1; ++ rsrcy += offset; ++ rsrcu += offset >> 1; ++ rsrcv += offset >> 1; ++ tonemap_frame_dovi_2_420p(rdsty, rdstu, rdstv, ++ rsrcy, rsrcu, rsrcv, ++ dstlinesize, srclinesize, ++ dstdepth, srcdepth, ++ remainw, rheight, params); ++ } ++#endif // ENABLE_TONEMAPX_SSE_INTRINSICS ++} ++ ++X86_64_V2 void tonemap_frame_dovi_2_420p10_sse(uint16_t *dsty, uint16_t *dstu, uint16_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++#ifdef ENABLE_TONEMAPX_SSE_INTRINSICS ++ uint16_t *rdsty = dsty; ++ uint16_t *rdstu = dstu; ++ uint16_t *rdstv = dstv; ++ const uint16_t *rsrcy = srcy; ++ const uint16_t *rsrcu = srcu; ++ const uint16_t *rsrcv = srcv; ++ int rheight = height; ++ // not zero when not divisible by 8 ++ // intentionally leave last pixel emtpy when input is odd ++ int remainw = width & 6; ++ ++ const int in_depth = srcdepth; ++ const float in_rng = (float)((1 << in_depth) - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int16_t r[8], g[8], b[8]; ++ int16_t r1[8], g1[8], b1[8]; ++ ++ __m128i zero128 = _mm_setzero_si128(); ++ __m128i ux4, vx4; ++ __m128i y0x8, y1x8; ++ __m128i y0x4a, y0x4b, y1x4a, y1x4b, ux4a, ux4b, vx4a, vx4b; ++ __m128i r0x4a, g0x4a, b0x4a, r0x4b, g0x4b, b0x4b; ++ __m128i r1x4a, g1x4a, b1x4a, r1x4b, g1x4b, b1x4b; ++ ++ __m128i r0ox8, g0ox8, b0ox8; ++ __m128i y0ox8; ++ __m128i roax4, robx4, goax4, gobx4, boax4, bobx4; ++ __m128i yoax4, yobx4; ++ ++ __m128i r1ox8, g1ox8, b1ox8; ++ __m128i y1ox8; ++ __m128i r1oax4, r1obx4, g1oax4, g1obx4, b1oax4, b1obx4; ++ __m128i y1oax4, y1obx4; ++ __m128i uox4, vox4, ravgx4, gavgx4, bavgx4; ++ ++ __m128 ipt0, ipt1, ipt2, ipt3; ++ __m128 ia1, ib1, ia2, ib2; ++ __m128 ix4, px4, tx4; ++ __m128 lx4, mx4, sx4; ++ __m128 rx4a, gx4a, bx4a, rx4b, gx4b, bx4b; ++ __m128 y0x4af, y0x4bf, y1x4af, y1x4bf, ux4af, ux4bf, vx4af, vx4bf; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0], dstu += dstlinesize[1] / 2, dstv += dstlinesize[1] / 2, ++ srcy += srclinesize[0], srcu += srclinesize[1] / 2, srcv += srclinesize[1] / 2) { ++ for (int xx = 0; xx < width >> 3; xx++) { ++ int x = xx << 3; ++ ++ y0x8 = _mm_lddqu_si128((__m128i*)(srcy + x)); ++ y1x8 = _mm_lddqu_si128((__m128i*)(srcy + (srclinesize[0] / 2 + x))); ++ ux4 = _mm_loadu_si64((__m128i*)(srcu + (x >> 1))); ++ vx4 = _mm_loadu_si64((__m128i*)(srcv + (x >> 1))); ++ ++ y0x4a = _mm_cvtepu16_epi32(y0x8); ++ y0x4b = _mm_unpackhi_epi16(y0x8, zero128); ++ y1x4a = _mm_cvtepu16_epi32(y1x8); ++ y1x4b = _mm_unpackhi_epi16(y1x8, zero128); ++ ux4 = _mm_cvtepu16_epi32(ux4); ++ vx4 = _mm_cvtepu16_epi32(vx4); ++ ++ ux4a = _mm_unpacklo_epi32(ux4, ux4); ++ ux4b = _mm_unpackhi_epi32(ux4, ux4); ++ vx4a = _mm_unpacklo_epi32(vx4, vx4); ++ vx4b = _mm_unpackhi_epi32(vx4, vx4); ++ ++ y0x4af = _mm_cvtepi32_ps(y0x4a); ++ y0x4bf = _mm_cvtepi32_ps(y0x4b); ++ y1x4af = _mm_cvtepi32_ps(y1x4a); ++ y1x4bf = _mm_cvtepi32_ps(y1x4b); ++ ux4af = _mm_cvtepi32_ps(ux4a); ++ ux4bf = _mm_cvtepi32_ps(ux4b); ++ vx4af = _mm_cvtepi32_ps(vx4a); ++ vx4bf = _mm_cvtepi32_ps(vx4b); ++ ++ y0x4af = _mm_div_ps(y0x4af, _mm_set1_ps(in_rng)); ++ y0x4bf = _mm_div_ps(y0x4bf, _mm_set1_ps(in_rng)); ++ y1x4af = _mm_div_ps(y1x4af, _mm_set1_ps(in_rng)); ++ y1x4bf = _mm_div_ps(y1x4bf, _mm_set1_ps(in_rng)); ++ ux4af = _mm_div_ps(ux4af, _mm_set1_ps(in_rng)); ++ ux4bf = _mm_div_ps(ux4bf, _mm_set1_ps(in_rng)); ++ vx4af = _mm_div_ps(vx4af, _mm_set1_ps(in_rng)); ++ vx4bf = _mm_div_ps(vx4bf, _mm_set1_ps(in_rng)); ++ ++ // Reshape y0x4a ++ ia1 = _mm_unpacklo_ps(y0x4af, ux4af); ++ ia2 = _mm_unpackhi_ps(y0x4af, ux4af); ++ ib1 = _mm_unpacklo_ps(vx4af, _mm_setzero_ps()); ++ ib2 = _mm_unpackhi_ps(vx4af, _mm_setzero_ps()); ++ ipt0 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(1, 0, 1, 0)); ++ ipt1 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(3, 2, 3, 2)); ++ ipt2 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(1, 0, 1, 0)); ++ ipt3 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(3, 2, 3, 2)); ++ ++ ipt0 = reshape_dovi_iptpqc2(ipt0, params); ++ ipt1 = reshape_dovi_iptpqc2(ipt1, params); ++ ipt2 = reshape_dovi_iptpqc2(ipt2, params); ++ ipt3 = reshape_dovi_iptpqc2(ipt3, params); ++ ++ ipt0 = _mm_shuffle_ps(ipt0, ipt0, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt1 = _mm_shuffle_ps(ipt1, ipt1, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt2 = _mm_shuffle_ps(ipt2, ipt2, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt3 = _mm_shuffle_ps(ipt3, ipt3, _MM_SHUFFLE(3, 1, 2, 0)); ++ ++ ia1 = _mm_unpacklo_ps(ipt0, ipt1); ++ ia2 = _mm_unpacklo_ps(ipt2, ipt3); ++ ib1 = _mm_unpackhi_ps(ipt0, ipt1); ++ ib2 = _mm_unpackhi_ps(ipt2, ipt3); ++ ++ ix4 = _mm_shuffle_ps(ia1, ia2, _MM_SHUFFLE(1, 0, 1, 0)); ++ px4 = _mm_shuffle_ps(ib1, ib2, _MM_SHUFFLE(1, 0, 1, 0)); ++ tx4 = _mm_shuffle_ps(ia1, ia2, _MM_SHUFFLE(3, 2, 3, 2)); ++ ++ ycc2rgbx4(&lx4, &mx4, &sx4, ix4, px4, tx4, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx4(&rx4a, &gx4a, &bx4a, lx4, mx4, sx4, *params->lms2rgb_matrix); ++ ++ rx4a = _mm_mul_ps(rx4a, _mm_set1_ps(28672.0f)); ++ gx4a = _mm_mul_ps(gx4a, _mm_set1_ps(28672.0f)); ++ bx4a = _mm_mul_ps(bx4a, _mm_set1_ps(28672.0f)); ++ ++ r0x4a = _mm_cvtps_epi32(rx4a); ++ g0x4a = _mm_cvtps_epi32(gx4a); ++ b0x4a = _mm_cvtps_epi32(bx4a); ++ ++ // Reshape y1x4a ++ ia1 = _mm_unpacklo_ps(y1x4af, ux4af); ++ ia2 = _mm_unpackhi_ps(y1x4af, ux4af); ++ ib1 = _mm_unpacklo_ps(vx4af, _mm_setzero_ps()); ++ ib2 = _mm_unpackhi_ps(vx4af, _mm_setzero_ps()); ++ ipt0 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(1, 0, 1, 0)); ++ ipt1 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(3, 2, 3, 2)); ++ ipt2 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(1, 0, 1, 0)); ++ ipt3 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(3, 2, 3, 2)); ++ ++ ipt0 = reshape_dovi_iptpqc2(ipt0, params); ++ ipt1 = reshape_dovi_iptpqc2(ipt1, params); ++ ipt2 = reshape_dovi_iptpqc2(ipt2, params); ++ ipt3 = reshape_dovi_iptpqc2(ipt3, params); ++ ++ ipt0 = _mm_shuffle_ps(ipt0, ipt0, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt1 = _mm_shuffle_ps(ipt1, ipt1, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt2 = _mm_shuffle_ps(ipt2, ipt2, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt3 = _mm_shuffle_ps(ipt3, ipt3, _MM_SHUFFLE(3, 1, 2, 0)); ++ ++ ia1 = _mm_unpacklo_ps(ipt0, ipt1); ++ ia2 = _mm_unpacklo_ps(ipt2, ipt3); ++ ib1 = _mm_unpackhi_ps(ipt0, ipt1); ++ ib2 = _mm_unpackhi_ps(ipt2, ipt3); ++ ++ ix4 = _mm_shuffle_ps(ia1, ia2, _MM_SHUFFLE(1, 0, 1, 0)); ++ px4 = _mm_shuffle_ps(ib1, ib2, _MM_SHUFFLE(1, 0, 1, 0)); ++ tx4 = _mm_shuffle_ps(ia1, ia2, _MM_SHUFFLE(3, 2, 3, 2)); ++ ++ ycc2rgbx4(&lx4, &mx4, &sx4, ix4, px4, tx4, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx4(&rx4a, &gx4a, &bx4a, lx4, mx4, sx4, *params->lms2rgb_matrix); ++ ++ rx4a = _mm_mul_ps(rx4a, _mm_set1_ps(28672.0f)); ++ gx4a = _mm_mul_ps(gx4a, _mm_set1_ps(28672.0f)); ++ bx4a = _mm_mul_ps(bx4a, _mm_set1_ps(28672.0f)); ++ ++ r1x4a = _mm_cvtps_epi32(rx4a); ++ g1x4a = _mm_cvtps_epi32(gx4a); ++ b1x4a = _mm_cvtps_epi32(bx4a); ++ ++ // Reshape y0x4b ++ ia1 = _mm_unpacklo_ps(y0x4bf, ux4bf); ++ ia2 = _mm_unpackhi_ps(y0x4bf, ux4bf); ++ ib1 = _mm_unpacklo_ps(vx4bf, _mm_setzero_ps()); ++ ib2 = _mm_unpackhi_ps(vx4bf, _mm_setzero_ps()); ++ ipt0 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(1, 0, 1, 0)); ++ ipt1 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(3, 2, 3, 2)); ++ ipt2 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(1, 0, 1, 0)); ++ ipt3 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(3, 2, 3, 2)); ++ ++ ipt0 = reshape_dovi_iptpqc2(ipt0, params); ++ ipt1 = reshape_dovi_iptpqc2(ipt1, params); ++ ipt2 = reshape_dovi_iptpqc2(ipt2, params); ++ ipt3 = reshape_dovi_iptpqc2(ipt3, params); ++ ++ ipt0 = _mm_shuffle_ps(ipt0, ipt0, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt1 = _mm_shuffle_ps(ipt1, ipt1, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt2 = _mm_shuffle_ps(ipt2, ipt2, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt3 = _mm_shuffle_ps(ipt3, ipt3, _MM_SHUFFLE(3, 1, 2, 0)); ++ ++ ia1 = _mm_unpacklo_ps(ipt0, ipt1); ++ ia2 = _mm_unpacklo_ps(ipt2, ipt3); ++ ib1 = _mm_unpackhi_ps(ipt0, ipt1); ++ ib2 = _mm_unpackhi_ps(ipt2, ipt3); ++ ++ ix4 = _mm_shuffle_ps(ia1, ia2, _MM_SHUFFLE(1, 0, 1, 0)); ++ px4 = _mm_shuffle_ps(ib1, ib2, _MM_SHUFFLE(1, 0, 1, 0)); ++ tx4 = _mm_shuffle_ps(ia1, ia2, _MM_SHUFFLE(3, 2, 3, 2)); ++ ++ ycc2rgbx4(&lx4, &mx4, &sx4, ix4, px4, tx4, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx4(&rx4b, &gx4b, &bx4b, lx4, mx4, sx4, *params->lms2rgb_matrix); ++ ++ rx4b = _mm_mul_ps(rx4b, _mm_set1_ps(28672.0f)); ++ gx4b = _mm_mul_ps(gx4b, _mm_set1_ps(28672.0f)); ++ bx4b = _mm_mul_ps(bx4b, _mm_set1_ps(28672.0f)); ++ ++ r0x4b = _mm_cvtps_epi32(rx4b); ++ g0x4b = _mm_cvtps_epi32(gx4b); ++ b0x4b = _mm_cvtps_epi32(bx4b); ++ ++ // Reshape y1x4b ++ ia1 = _mm_unpacklo_ps(y1x4bf, ux4bf); ++ ia2 = _mm_unpackhi_ps(y1x4bf, ux4bf); ++ ib1 = _mm_unpacklo_ps(vx4bf, _mm_setzero_ps()); ++ ib2 = _mm_unpackhi_ps(vx4bf, _mm_setzero_ps()); ++ ipt0 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(1, 0, 1, 0)); ++ ipt1 = _mm_shuffle_ps(ia1, ib1, _MM_SHUFFLE(3, 2, 3, 2)); ++ ipt2 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(1, 0, 1, 0)); ++ ipt3 = _mm_shuffle_ps(ia2, ib2, _MM_SHUFFLE(3, 2, 3, 2)); ++ ++ ipt0 = reshape_dovi_iptpqc2(ipt0, params); ++ ipt1 = reshape_dovi_iptpqc2(ipt1, params); ++ ipt2 = reshape_dovi_iptpqc2(ipt2, params); ++ ipt3 = reshape_dovi_iptpqc2(ipt3, params); ++ ++ ipt0 = _mm_shuffle_ps(ipt0, ipt0, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt1 = _mm_shuffle_ps(ipt1, ipt1, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt2 = _mm_shuffle_ps(ipt2, ipt2, _MM_SHUFFLE(3, 1, 2, 0)); ++ ipt3 = _mm_shuffle_ps(ipt3, ipt3, _MM_SHUFFLE(3, 1, 2, 0)); ++ ++ ia1 = _mm_unpacklo_ps(ipt0, ipt1); ++ ia2 = _mm_unpacklo_ps(ipt2, ipt3); ++ ib1 = _mm_unpackhi_ps(ipt0, ipt1); ++ ib2 = _mm_unpackhi_ps(ipt2, ipt3); ++ ++ ix4 = _mm_shuffle_ps(ia1, ia2, _MM_SHUFFLE(1, 0, 1, 0)); ++ px4 = _mm_shuffle_ps(ib1, ib2, _MM_SHUFFLE(1, 0, 1, 0)); ++ tx4 = _mm_shuffle_ps(ia1, ia2, _MM_SHUFFLE(3, 2, 3, 2)); ++ ++ ycc2rgbx4(&lx4, &mx4, &sx4, ix4, px4, tx4, params->dovi->nonlinear, *params->ycc_offset); ++ lms2rgbx4(&rx4b, &gx4b, &bx4b, lx4, mx4, sx4, *params->lms2rgb_matrix); ++ ++ rx4b = _mm_mul_ps(rx4b, _mm_set1_ps(28672.0f)); ++ gx4b = _mm_mul_ps(gx4b, _mm_set1_ps(28672.0f)); ++ bx4b = _mm_mul_ps(bx4b, _mm_set1_ps(28672.0f)); ++ ++ r1x4b = _mm_cvtps_epi32(rx4b); ++ g1x4b = _mm_cvtps_epi32(gx4b); ++ b1x4b = _mm_cvtps_epi32(bx4b); ++ ++ tonemap_int32x4_sse(r0x4a, g0x4a, b0x4a, r, g, b, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x4_sse(r1x4a, g1x4a, b1x4a, r1, g1, b1, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x4_sse(r0x4b, g0x4b, b0x4b, &r[4], &g[4], &b[4], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x4_sse(r1x4b, g1x4b, b1x4b, &r1[4], &g1[4], &b1[4], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ ++ r0ox8 = _mm_lddqu_si128((const __m128i_u *)r); ++ g0ox8 = _mm_lddqu_si128((const __m128i_u *)g); ++ b0ox8 = _mm_lddqu_si128((const __m128i_u *)b); ++ ++ roax4 = _mm_cvtepi16_epi32(r0ox8); ++ goax4 = _mm_cvtepi16_epi32(g0ox8); ++ boax4 = _mm_cvtepi16_epi32(b0ox8); ++ ++ robx4 = _mm_unpackhi_epi16(r0ox8, zero128); ++ gobx4 = _mm_unpackhi_epi16(g0ox8, zero128); ++ bobx4 = _mm_unpackhi_epi16(b0ox8, zero128); ++ ++ yoax4 = _mm_mullo_epi32(roax4, _mm_set1_epi32(cry)); ++ yoax4 = _mm_add_epi32(yoax4, _mm_mullo_epi32(goax4, _mm_set1_epi32(cgy))); ++ yoax4 = _mm_add_epi32(yoax4, _mm_mullo_epi32(boax4, _mm_set1_epi32(cby))); ++ yoax4 = _mm_add_epi32(yoax4, _mm_set1_epi32(out_rnd)); ++ yoax4 = _mm_srai_epi32(yoax4, out_sh); ++ yoax4 = _mm_add_epi32(yoax4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ yobx4 = _mm_mullo_epi32(robx4, _mm_set1_epi32(cry)); ++ yobx4 = _mm_add_epi32(yobx4, _mm_mullo_epi32(gobx4, _mm_set1_epi32(cgy))); ++ yobx4 = _mm_add_epi32(yobx4, _mm_mullo_epi32(bobx4, _mm_set1_epi32(cby))); ++ yobx4 = _mm_add_epi32(yobx4, _mm_set1_epi32(out_rnd)); ++ yobx4 = _mm_srai_epi32(yobx4, out_sh); ++ yobx4 = _mm_add_epi32(yobx4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ y0ox8 = _mm_packus_epi32(yoax4, yobx4); ++ _mm_storeu_si128((__m128i_u *) &dsty[x], y0ox8); ++ ++ r1ox8 = _mm_lddqu_si128((const __m128i_u *)r1); ++ g1ox8 = _mm_lddqu_si128((const __m128i_u *)g1); ++ b1ox8 = _mm_lddqu_si128((const __m128i_u *)b1); ++ ++ r1oax4 = _mm_cvtepi16_epi32(r1ox8); ++ g1oax4 = _mm_cvtepi16_epi32(g1ox8); ++ b1oax4 = _mm_cvtepi16_epi32(b1ox8); ++ ++ r1obx4 = _mm_unpackhi_epi16(r1ox8, zero128); ++ g1obx4 = _mm_unpackhi_epi16(g1ox8, zero128); ++ b1obx4 = _mm_unpackhi_epi16(b1ox8, zero128); ++ ++ y1oax4 = _mm_mullo_epi32(r1oax4, _mm_set1_epi32(cry)); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_mullo_epi32(g1oax4, _mm_set1_epi32(cgy))); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_mullo_epi32(b1oax4, _mm_set1_epi32(cby))); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_set1_epi32(out_rnd)); ++ y1oax4 = _mm_srai_epi32(y1oax4, out_sh); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ y1obx4 = _mm_mullo_epi32(r1obx4, _mm_set1_epi32(cry)); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_mullo_epi32(g1obx4, _mm_set1_epi32(cgy))); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_mullo_epi32(b1obx4, _mm_set1_epi32(cby))); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_set1_epi32(out_rnd)); ++ y1obx4 = _mm_srai_epi32(y1obx4, out_sh); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ y1ox8 = _mm_packus_epi32(y1oax4, y1obx4); ++ _mm_storeu_si128((__m128i_u *) &dsty[x + dstlinesize[0] / 2], y1ox8); ++ ++ ravgx4 = _mm_hadd_epi32(roax4, robx4); ++ ravgx4 = _mm_add_epi32(ravgx4, _mm_hadd_epi32(r1oax4, r1obx4)); ++ ravgx4 = _mm_add_epi32(ravgx4, _mm_set1_epi32(2)); ++ ravgx4 = _mm_srai_epi32(ravgx4, 2); ++ ++ gavgx4 = _mm_hadd_epi32(goax4, gobx4); ++ gavgx4 = _mm_add_epi32(gavgx4, _mm_hadd_epi32(g1oax4, g1obx4)); ++ gavgx4 = _mm_add_epi32(gavgx4, _mm_set1_epi32(2)); ++ gavgx4 = _mm_srai_epi32(gavgx4, 2); ++ ++ bavgx4 = _mm_hadd_epi32(boax4, bobx4); ++ bavgx4 = _mm_add_epi32(bavgx4, _mm_hadd_epi32(b1oax4, b1obx4)); ++ bavgx4 = _mm_add_epi32(bavgx4, _mm_set1_epi32(2)); ++ bavgx4 = _mm_srai_epi32(bavgx4, 2); ++ ++ uox4 = _mm_add_epi32(_mm_set1_epi32(out_rnd), _mm_mullo_epi32(ravgx4, _mm_set1_epi32(cru))); ++ uox4 = _mm_add_epi32(uox4, _mm_mullo_epi32(gavgx4, _mm_set1_epi32(ocgu))); ++ uox4 = _mm_add_epi32(uox4, _mm_mullo_epi32(bavgx4, _mm_set1_epi32(cburv))); ++ uox4 = _mm_srai_epi32(uox4, out_sh); ++ uox4 = _mm_add_epi32(uox4, _mm_set1_epi32(out_uv_offset)); ++ _mm_storeu_si64((__m128i_u *) &dstu[x >> 1], _mm_packus_epi32(uox4, zero128)); ++ ++ vox4 = _mm_add_epi32(_mm_set1_epi32(out_rnd), _mm_mullo_epi32(ravgx4, _mm_set1_epi32(cburv))); ++ vox4 = _mm_add_epi32(vox4, _mm_mullo_epi32(gavgx4, _mm_set1_epi32(ocgv))); ++ vox4 = _mm_add_epi32(vox4, _mm_mullo_epi32(bavgx4, _mm_set1_epi32(cbv))); ++ vox4 = _mm_srai_epi32(vox4, out_sh); ++ vox4 = _mm_add_epi32(vox4, _mm_set1_epi32(out_uv_offset)); ++ _mm_storeu_si64((__m128i_u *) &dstv[x >> 1], _mm_packus_epi32(vox4, zero128)); ++ } ++ } ++ ++ // Process remaining pixels cannot fill the full simd register with scalar version ++ if (remainw) { ++ int offset = width & (int)0xfffffff8; ++ rdsty += offset; ++ rdstu += offset >> 1; ++ rdstv += offset >> 1; ++ rsrcy += offset; ++ rsrcu += offset >> 1; ++ rsrcv += offset >> 1; ++ tonemap_frame_dovi_2_420p10(rdsty, rdstu, rdstv, ++ rsrcy, rsrcu, rsrcv, ++ dstlinesize, srclinesize, ++ dstdepth, srcdepth, ++ remainw, rheight, params); ++ } ++#endif // ENABLE_TONEMAPX_SSE_INTRINSICS ++} ++ ++X86_64_V2 void tonemap_frame_420p10_2_420p_sse(uint8_t *dsty, uint8_t *dstu, uint8_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++#ifdef ENABLE_TONEMAPX_SSE_INTRINSICS ++ uint8_t *rdsty = dsty; ++ uint8_t *rdstu = dstu; ++ uint8_t *rdstv = dstv; ++ ++ const uint16_t *rsrcy = srcy; ++ const uint16_t *rsrcu = srcu; ++ const uint16_t *rsrcv = srcv; ++ ++ int rheight = height; ++ // not zero when not divisible by 8 ++ // intentionally leave last pixel emtpy when input is odd ++ int remainw = width & 6; ++ ++ const int in_depth = srcdepth; ++ const int in_uv_offset = 128 << (in_depth - 8); ++ const int in_sh = in_depth - 1; ++ const int in_rnd = 1 << (in_sh - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cy = (*params->yuv2rgb_coeffs)[0][0][0]; ++ int crv = (*params->yuv2rgb_coeffs)[0][2][0]; ++ int cgu = (*params->yuv2rgb_coeffs)[1][1][0]; ++ int cgv = (*params->yuv2rgb_coeffs)[1][2][0]; ++ int cbu = (*params->yuv2rgb_coeffs)[2][1][0]; ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int16_t r[8], g[8], b[8]; ++ int16_t r1[8], g1[8], b1[8]; ++ ++ __m128i in_yuv_offx4 = _mm_set1_epi32(params->in_yuv_off); ++ __m128i in_uv_offx4= _mm_set1_epi32(in_uv_offset); ++ __m128i cyx4 = _mm_set1_epi32(cy); ++ __m128i rndx4 = _mm_set1_epi32(in_rnd); ++ __m128i zero128 = _mm_setzero_si128(); ++ __m128i ux4, vx4; ++ __m128i y0x8, y1x8; ++ __m128i y0x4a, y0x4b, y1x4a, y1x4b, ux4a, ux4b, vx4a, vx4b; ++ __m128i r0x4a, g0x4a, b0x4a, r0x4b, g0x4b, b0x4b; ++ __m128i r1x4a, g1x4a, b1x4a, r1x4b, g1x4b, b1x4b; ++ ++ __m128i r0ox8, g0ox8, b0ox8; ++ __m128i y0ox8; ++ __m128i roax4, robx4, goax4, gobx4, boax4, bobx4; ++ __m128i yoax4, yobx4; ++ ++ __m128i r1ox8, g1ox8, b1ox8; ++ __m128i y1ox8; ++ __m128i r1oax4, r1obx4, g1oax4, g1obx4, b1oax4, b1obx4; ++ __m128i y1oax4, y1obx4; ++ __m128i uox4, vox4, ravgx4, gavgx4, bavgx4; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0] * 2, dstu += dstlinesize[1], dstv += dstlinesize[2], ++ srcy += srclinesize[0], srcu += srclinesize[1] / 2, srcv += srclinesize[2] / 2) { ++ for (int xx = 0; xx < width >> 3; xx++) { ++ int x = xx << 3; ++ ++ y0x8 = _mm_lddqu_si128((__m128i*)(srcy + x)); ++ y1x8 = _mm_lddqu_si128((__m128i*)(srcy + (srclinesize[0] / 2 + x))); ++ ux4 = _mm_loadu_si64((__m128i*)(srcu + (x >> 1))); ++ vx4 = _mm_loadu_si64((__m128i*)(srcv + (x >> 1))); ++ ++ y0x4a = _mm_cvtepu16_epi32(y0x8); ++ y0x4b = _mm_unpackhi_epi16(y0x8, zero128); ++ y1x4a = _mm_cvtepu16_epi32(y1x8); ++ y1x4b = _mm_unpackhi_epi16(y1x8, zero128); ++ ux4 = _mm_cvtepu16_epi32(ux4); ++ vx4 = _mm_cvtepu16_epi32(vx4); ++ y0x4a = _mm_sub_epi32(y0x4a, in_yuv_offx4); ++ y1x4a = _mm_sub_epi32(y1x4a, in_yuv_offx4); ++ y0x4b = _mm_sub_epi32(y0x4b, in_yuv_offx4); ++ y1x4b = _mm_sub_epi32(y1x4b, in_yuv_offx4); ++ ux4 = _mm_sub_epi32(ux4, in_uv_offx4); ++ vx4 = _mm_sub_epi32(vx4, in_uv_offx4); ++ ++ ux4a = _mm_unpacklo_epi32(ux4, ux4); ++ ux4b = _mm_unpackhi_epi32(ux4, ux4); ++ vx4a = _mm_unpacklo_epi32(vx4, vx4); ++ vx4b = _mm_unpackhi_epi32(vx4, vx4); ++ ++ // r = av_clip_int16((y * cy + crv * v + in_rnd) >> in_sh); ++ r0x4a = g0x4a = b0x4a = _mm_mullo_epi32(y0x4a, cyx4); ++ r0x4a = _mm_add_epi32(r0x4a, _mm_mullo_epi32(vx4a, _mm_set1_epi32(crv))); ++ r0x4a = _mm_add_epi32(r0x4a, rndx4); ++ r0x4a = _mm_srai_epi32(r0x4a, in_sh); ++ r0x4a = av_clip_int16_sse(r0x4a); ++ ++ r1x4a = g1x4a = b1x4a = _mm_mullo_epi32(y1x4a, cyx4); ++ r1x4a = _mm_add_epi32(r1x4a, _mm_mullo_epi32(vx4a, _mm_set1_epi32(crv))); ++ r1x4a = _mm_add_epi32(r1x4a, rndx4); ++ r1x4a = _mm_srai_epi32(r1x4a, in_sh); ++ r1x4a = av_clip_int16_sse(r1x4a); ++ ++ // g = av_clip_int16((y * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g0x4a = _mm_add_epi32(g0x4a, _mm_mullo_epi32(ux4a, _mm_set1_epi32(cgu))); ++ g0x4a = _mm_add_epi32(g0x4a, _mm_mullo_epi32(vx4a, _mm_set1_epi32(cgv))); ++ g0x4a = _mm_add_epi32(g0x4a, rndx4); ++ g0x4a = _mm_srai_epi32(g0x4a, in_sh); ++ g0x4a = av_clip_int16_sse(g0x4a); ++ ++ g1x4a = _mm_add_epi32(g1x4a, _mm_mullo_epi32(ux4a, _mm_set1_epi32(cgu))); ++ g1x4a = _mm_add_epi32(g1x4a, _mm_mullo_epi32(vx4a, _mm_set1_epi32(cgv))); ++ g1x4a = _mm_add_epi32(g1x4a, rndx4); ++ g1x4a = _mm_srai_epi32(g1x4a, in_sh); ++ g1x4a = av_clip_int16_sse(g1x4a); ++ ++ // b = av_clip_int16((y * cy + cbu * u + in_rnd) >> in_sh); ++ b0x4a = _mm_add_epi32(b0x4a, _mm_mullo_epi32(ux4a, _mm_set1_epi32(cbu))); ++ b0x4a = _mm_add_epi32(b0x4a, rndx4); ++ b0x4a = _mm_srai_epi32(b0x4a, in_sh); ++ b0x4a = av_clip_int16_sse(b0x4a); ++ ++ b1x4a = _mm_add_epi32(b1x4a, _mm_mullo_epi32(ux4a, _mm_set1_epi32(cbu))); ++ b1x4a = _mm_add_epi32(b1x4a, rndx4); ++ b1x4a = _mm_srai_epi32(b1x4a, in_sh); ++ b1x4a = av_clip_int16_sse(b1x4a); ++ ++ r0x4b = g0x4b = b0x4b = _mm_mullo_epi32(y0x4b, cyx4); ++ r0x4b = _mm_add_epi32(r0x4b, _mm_mullo_epi32(vx4b, _mm_set1_epi32(crv))); ++ r0x4b = _mm_add_epi32(r0x4b, rndx4); ++ r0x4b = _mm_srai_epi32(r0x4b, in_sh); ++ r0x4b = av_clip_int16_sse(r0x4b); ++ ++ r1x4b = g1x4b = b1x4b = _mm_mullo_epi32(y1x4b, cyx4); ++ r1x4b = _mm_add_epi32(r1x4b, _mm_mullo_epi32(vx4b, _mm_set1_epi32(crv))); ++ r1x4b = _mm_add_epi32(r1x4b, rndx4); ++ r1x4b = _mm_srai_epi32(r1x4b, in_sh); ++ r1x4b = av_clip_int16_sse(r1x4b); ++ ++ g0x4b = _mm_add_epi32(g0x4b, _mm_mullo_epi32(ux4b, _mm_set1_epi32(cgu))); ++ g0x4b = _mm_add_epi32(g0x4b, _mm_mullo_epi32(vx4b, _mm_set1_epi32(cgv))); ++ g0x4b = _mm_add_epi32(g0x4b, rndx4); ++ g0x4b = _mm_srai_epi32(g0x4b, in_sh); ++ g0x4b = av_clip_int16_sse(g0x4b); ++ ++ g1x4b = _mm_add_epi32(g1x4b, _mm_mullo_epi32(ux4b, _mm_set1_epi32(cgu))); ++ g1x4b = _mm_add_epi32(g1x4b, _mm_mullo_epi32(vx4b, _mm_set1_epi32(cgv))); ++ g1x4b = _mm_add_epi32(g1x4b, rndx4); ++ g1x4b = _mm_srai_epi32(g1x4b, in_sh); ++ g1x4b = av_clip_int16_sse(g1x4b); ++ ++ b0x4b = _mm_add_epi32(b0x4b, _mm_mullo_epi32(ux4b, _mm_set1_epi32(cbu))); ++ b0x4b = _mm_add_epi32(b0x4b, rndx4); ++ b0x4b = _mm_srai_epi32(b0x4b, in_sh); ++ b0x4b = av_clip_int16_sse(b0x4b); ++ ++ b1x4b = _mm_add_epi32(b1x4b, _mm_mullo_epi32(ux4b, _mm_set1_epi32(cbu))); ++ b1x4b = _mm_add_epi32(b1x4b, rndx4); ++ b1x4b = _mm_srai_epi32(b1x4b, in_sh); ++ b1x4b = av_clip_int16_sse(b1x4b); ++ ++ tonemap_int32x4_sse(r0x4a, g0x4a, b0x4a, r, g, b, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x4_sse(r1x4a, g1x4a, b1x4a, r1, g1, b1, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x4_sse(r0x4b, g0x4b, b0x4b, &r[4], &g[4], &b[4], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x4_sse(r1x4b, g1x4b, b1x4b, &r1[4], &g1[4], &b1[4], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ ++ r0ox8 = _mm_lddqu_si128((const __m128i_u *)r); ++ g0ox8 = _mm_lddqu_si128((const __m128i_u *)g); ++ b0ox8 = _mm_lddqu_si128((const __m128i_u *)b); ++ ++ roax4 = _mm_cvtepi16_epi32(r0ox8); ++ goax4 = _mm_cvtepi16_epi32(g0ox8); ++ boax4 = _mm_cvtepi16_epi32(b0ox8); ++ ++ robx4 = _mm_unpackhi_epi16(r0ox8, zero128); ++ gobx4 = _mm_unpackhi_epi16(g0ox8, zero128); ++ bobx4 = _mm_unpackhi_epi16(b0ox8, zero128); ++ ++ yoax4 = _mm_mullo_epi32(roax4, _mm_set1_epi32(cry)); ++ yoax4 = _mm_add_epi32(yoax4, _mm_mullo_epi32(goax4, _mm_set1_epi32(cgy))); ++ yoax4 = _mm_add_epi32(yoax4, _mm_mullo_epi32(boax4, _mm_set1_epi32(cby))); ++ yoax4 = _mm_add_epi32(yoax4, _mm_set1_epi32(out_rnd)); ++ // output shift bits for 8bit outputs is 29 - 8 = 21 ++ yoax4 = _mm_srai_epi32(yoax4, 21); ++ yoax4 = _mm_add_epi32(yoax4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ yobx4 = _mm_mullo_epi32(robx4, _mm_set1_epi32(cry)); ++ yobx4 = _mm_add_epi32(yobx4, _mm_mullo_epi32(gobx4, _mm_set1_epi32(cgy))); ++ yobx4 = _mm_add_epi32(yobx4, _mm_mullo_epi32(bobx4, _mm_set1_epi32(cby))); ++ yobx4 = _mm_add_epi32(yobx4, _mm_set1_epi32(out_rnd)); ++ yobx4 = _mm_srai_epi32(yobx4, 21); ++ yobx4 = _mm_add_epi32(yobx4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ y0ox8 = _mm_packs_epi32(yoax4, yobx4); ++ _mm_storeu_si64(&dsty[x], _mm_packus_epi16(y0ox8, zero128)); ++ ++ r1ox8 = _mm_lddqu_si128((const __m128i_u *)r1); ++ g1ox8 = _mm_lddqu_si128((const __m128i_u *)g1); ++ b1ox8 = _mm_lddqu_si128((const __m128i_u *)b1); ++ ++ r1oax4 = _mm_cvtepi16_epi32(r1ox8); ++ g1oax4 = _mm_cvtepi16_epi32(g1ox8); ++ b1oax4 = _mm_cvtepi16_epi32(b1ox8); ++ ++ r1obx4 = _mm_unpackhi_epi16(r1ox8, zero128); ++ g1obx4 = _mm_unpackhi_epi16(g1ox8, zero128); ++ b1obx4 = _mm_unpackhi_epi16(b1ox8, zero128); ++ ++ y1oax4 = _mm_mullo_epi32(r1oax4, _mm_set1_epi32(cry)); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_mullo_epi32(g1oax4, _mm_set1_epi32(cgy))); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_mullo_epi32(b1oax4, _mm_set1_epi32(cby))); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_set1_epi32(out_rnd)); ++ y1oax4 = _mm_srai_epi32(y1oax4, 21); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ y1obx4 = _mm_mullo_epi32(r1obx4, _mm_set1_epi32(cry)); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_mullo_epi32(g1obx4, _mm_set1_epi32(cgy))); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_mullo_epi32(b1obx4, _mm_set1_epi32(cby))); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_set1_epi32(out_rnd)); ++ y1obx4 = _mm_srai_epi32(y1obx4, 21); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ y1ox8 = _mm_packs_epi32(y1oax4, y1obx4); ++ _mm_storeu_si64(&dsty[x + dstlinesize[0]], _mm_packus_epi16(y1ox8, zero128)); ++ ++ ravgx4 = _mm_hadd_epi32(roax4, robx4); ++ ravgx4 = _mm_add_epi32(ravgx4, _mm_hadd_epi32(r1oax4, r1obx4)); ++ ravgx4 = _mm_add_epi32(ravgx4, _mm_set1_epi32(2)); ++ ravgx4 = _mm_srai_epi32(ravgx4, 2); ++ ++ gavgx4 = _mm_hadd_epi32(goax4, gobx4); ++ gavgx4 = _mm_add_epi32(gavgx4, _mm_hadd_epi32(g1oax4, g1obx4)); ++ gavgx4 = _mm_add_epi32(gavgx4, _mm_set1_epi32(2)); ++ gavgx4 = _mm_srai_epi32(gavgx4, 2); ++ ++ bavgx4 = _mm_hadd_epi32(boax4, bobx4); ++ bavgx4 = _mm_add_epi32(bavgx4, _mm_hadd_epi32(b1oax4, b1obx4)); ++ bavgx4 = _mm_add_epi32(bavgx4, _mm_set1_epi32(2)); ++ bavgx4 = _mm_srai_epi32(bavgx4, 2); ++ ++ uox4 = _mm_add_epi32(_mm_set1_epi32(out_rnd), _mm_mullo_epi32(ravgx4, _mm_set1_epi32(cru))); ++ uox4 = _mm_add_epi32(uox4, _mm_mullo_epi32(gavgx4, _mm_set1_epi32(ocgu))); ++ uox4 = _mm_add_epi32(uox4, _mm_mullo_epi32(bavgx4, _mm_set1_epi32(cburv))); ++ uox4 = _mm_srai_epi32(uox4, 21); ++ uox4 = _mm_add_epi32(uox4, _mm_set1_epi32(out_uv_offset)); ++ _mm_storeu_si32(&dstu[x >> 1], _mm_packus_epi16(_mm_packs_epi32(uox4, zero128), zero128)); ++ ++ vox4 = _mm_add_epi32(_mm_set1_epi32(out_rnd), _mm_mullo_epi32(ravgx4, _mm_set1_epi32(cburv))); ++ vox4 = _mm_add_epi32(vox4, _mm_mullo_epi32(gavgx4, _mm_set1_epi32(ocgv))); ++ vox4 = _mm_add_epi32(vox4, _mm_mullo_epi32(bavgx4, _mm_set1_epi32(cbv))); ++ vox4 = _mm_srai_epi32(vox4, 21); ++ vox4 = _mm_add_epi32(vox4, _mm_set1_epi32(out_uv_offset)); ++ _mm_storeu_si32(&dstv[x >> 1], _mm_packus_epi16(_mm_packs_epi32(vox4, zero128), zero128)); ++ } ++ } ++ ++ // Process remaining pixels cannot fill the full simd register with scalar version ++ if (remainw) { ++ int offset = width & (int)0xfffffff8; ++ rdsty += offset; ++ rdstu += offset >> 1; ++ rdstv += offset >> 1; ++ rsrcy += offset; ++ rsrcu += offset >> 1; ++ rsrcv += offset >> 1; ++ tonemap_frame_420p10_2_420p(rdsty, rdstu, rdstv, ++ rsrcy, rsrcu, rsrcv, ++ dstlinesize, srclinesize, ++ dstdepth, srcdepth, ++ remainw, rheight, params); ++ } ++#endif // ENABLE_TONEMAPX_SSE_INTRINSICS ++} ++ ++X86_64_V2 void tonemap_frame_420p10_2_420p10_sse(uint16_t *dsty, uint16_t *dstu, uint16_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++#ifdef ENABLE_TONEMAPX_SSE_INTRINSICS ++ uint16_t *rdsty = dsty; ++ uint16_t *rdstu = dstu; ++ uint16_t *rdstv = dstv; ++ const uint16_t *rsrcy = srcy; ++ const uint16_t *rsrcu = srcu; ++ const uint16_t *rsrcv = srcv; ++ int rheight = height; ++ // not zero when not divisible by 8 ++ // intentionally leave last pixel emtpy when input is odd ++ int remainw = width & 6; ++ ++ const int in_depth = srcdepth; ++ const int in_uv_offset = 128 << (in_depth - 8); ++ const int in_sh = in_depth - 1; ++ const int in_rnd = 1 << (in_sh - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cy = (*params->yuv2rgb_coeffs)[0][0][0]; ++ int crv = (*params->yuv2rgb_coeffs)[0][2][0]; ++ int cgu = (*params->yuv2rgb_coeffs)[1][1][0]; ++ int cgv = (*params->yuv2rgb_coeffs)[1][2][0]; ++ int cbu = (*params->yuv2rgb_coeffs)[2][1][0]; ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int16_t r[8], g[8], b[8]; ++ int16_t r1[8], g1[8], b1[8]; ++ ++ __m128i in_yuv_offx4 = _mm_set1_epi32(params->in_yuv_off); ++ __m128i in_uv_offx4= _mm_set1_epi32(in_uv_offset); ++ __m128i cyx4 = _mm_set1_epi32(cy); ++ __m128i rndx4 = _mm_set1_epi32(in_rnd); ++ __m128i zero128 = _mm_setzero_si128(); ++ __m128i ux4, vx4; ++ __m128i y0x8, y1x8; ++ __m128i y0x4a, y0x4b, y1x4a, y1x4b, ux4a, ux4b, vx4a, vx4b; ++ __m128i r0x4a, g0x4a, b0x4a, r0x4b, g0x4b, b0x4b; ++ __m128i r1x4a, g1x4a, b1x4a, r1x4b, g1x4b, b1x4b; ++ ++ __m128i r0ox8, g0ox8, b0ox8; ++ __m128i y0ox8; ++ __m128i roax4, robx4, goax4, gobx4, boax4, bobx4; ++ __m128i yoax4, yobx4; ++ ++ __m128i r1ox8, g1ox8, b1ox8; ++ __m128i y1ox8; ++ __m128i r1oax4, r1obx4, g1oax4, g1obx4, b1oax4, b1obx4; ++ __m128i y1oax4, y1obx4; ++ __m128i uox4, vox4, ravgx4, gavgx4, bavgx4; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0], dstu += dstlinesize[1] / 2, dstv += dstlinesize[1] / 2, ++ srcy += srclinesize[0], srcu += srclinesize[1] / 2, srcv += srclinesize[1] / 2) { ++ for (int xx = 0; xx < width >> 3; xx++) { ++ int x = xx << 3; ++ ++ y0x8 = _mm_lddqu_si128((__m128i*)(srcy + x)); ++ y1x8 = _mm_lddqu_si128((__m128i*)(srcy + (srclinesize[0] / 2 + x))); ++ ux4 = _mm_loadu_si64((__m128i*)(srcu + (x >> 1))); ++ vx4 = _mm_loadu_si64((__m128i*)(srcv + (x >> 1))); ++ ++ y0x4a = _mm_cvtepu16_epi32(y0x8); ++ y0x4b = _mm_unpackhi_epi16(y0x8, zero128); ++ y1x4a = _mm_cvtepu16_epi32(y1x8); ++ y1x4b = _mm_unpackhi_epi16(y1x8, zero128); ++ ux4 = _mm_cvtepu16_epi32(ux4); ++ vx4 = _mm_cvtepu16_epi32(vx4); ++ y0x4a = _mm_sub_epi32(y0x4a, in_yuv_offx4); ++ y1x4a = _mm_sub_epi32(y1x4a, in_yuv_offx4); ++ y0x4b = _mm_sub_epi32(y0x4b, in_yuv_offx4); ++ y1x4b = _mm_sub_epi32(y1x4b, in_yuv_offx4); ++ ux4 = _mm_sub_epi32(ux4, in_uv_offx4); ++ vx4 = _mm_sub_epi32(vx4, in_uv_offx4); ++ ++ ux4a = _mm_unpacklo_epi32(ux4, ux4); ++ ux4b = _mm_unpackhi_epi32(ux4, ux4); ++ vx4a = _mm_unpacklo_epi32(vx4, vx4); ++ vx4b = _mm_unpackhi_epi32(vx4, vx4); ++ ++ // r = av_clip_int16((y * cy + crv * v + in_rnd) >> in_sh); ++ r0x4a = g0x4a = b0x4a = _mm_mullo_epi32(y0x4a, cyx4); ++ r0x4a = _mm_add_epi32(r0x4a, _mm_mullo_epi32(vx4a, _mm_set1_epi32(crv))); ++ r0x4a = _mm_add_epi32(r0x4a, rndx4); ++ r0x4a = _mm_srai_epi32(r0x4a, in_sh); ++ r0x4a = av_clip_int16_sse(r0x4a); ++ ++ r1x4a = g1x4a = b1x4a = _mm_mullo_epi32(y1x4a, cyx4); ++ r1x4a = _mm_add_epi32(r1x4a, _mm_mullo_epi32(vx4a, _mm_set1_epi32(crv))); ++ r1x4a = _mm_add_epi32(r1x4a, rndx4); ++ r1x4a = _mm_srai_epi32(r1x4a, in_sh); ++ r1x4a = av_clip_int16_sse(r1x4a); ++ ++ // g = av_clip_int16((y * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g0x4a = _mm_add_epi32(g0x4a, _mm_mullo_epi32(ux4a, _mm_set1_epi32(cgu))); ++ g0x4a = _mm_add_epi32(g0x4a, _mm_mullo_epi32(vx4a, _mm_set1_epi32(cgv))); ++ g0x4a = _mm_add_epi32(g0x4a, rndx4); ++ g0x4a = _mm_srai_epi32(g0x4a, in_sh); ++ g0x4a = av_clip_int16_sse(g0x4a); ++ ++ g1x4a = _mm_add_epi32(g1x4a, _mm_mullo_epi32(ux4a, _mm_set1_epi32(cgu))); ++ g1x4a = _mm_add_epi32(g1x4a, _mm_mullo_epi32(vx4a, _mm_set1_epi32(cgv))); ++ g1x4a = _mm_add_epi32(g1x4a, rndx4); ++ g1x4a = _mm_srai_epi32(g1x4a, in_sh); ++ g1x4a = av_clip_int16_sse(g1x4a); ++ ++ // b = av_clip_int16((y * cy + cbu * u + in_rnd) >> in_sh); ++ b0x4a = _mm_add_epi32(b0x4a, _mm_mullo_epi32(ux4a, _mm_set1_epi32(cbu))); ++ b0x4a = _mm_add_epi32(b0x4a, rndx4); ++ b0x4a = _mm_srai_epi32(b0x4a, in_sh); ++ b0x4a = av_clip_int16_sse(b0x4a); ++ ++ b1x4a = _mm_add_epi32(b1x4a, _mm_mullo_epi32(ux4a, _mm_set1_epi32(cbu))); ++ b1x4a = _mm_add_epi32(b1x4a, rndx4); ++ b1x4a = _mm_srai_epi32(b1x4a, in_sh); ++ b1x4a = av_clip_int16_sse(b1x4a); ++ ++ r0x4b = g0x4b = b0x4b = _mm_mullo_epi32(y0x4b, cyx4); ++ r0x4b = _mm_add_epi32(r0x4b, _mm_mullo_epi32(vx4b, _mm_set1_epi32(crv))); ++ r0x4b = _mm_add_epi32(r0x4b, rndx4); ++ r0x4b = _mm_srai_epi32(r0x4b, in_sh); ++ r0x4b = av_clip_int16_sse(r0x4b); ++ ++ r1x4b = g1x4b = b1x4b = _mm_mullo_epi32(y1x4b, cyx4); ++ r1x4b = _mm_add_epi32(r1x4b, _mm_mullo_epi32(vx4b, _mm_set1_epi32(crv))); ++ r1x4b = _mm_add_epi32(r1x4b, rndx4); ++ r1x4b = _mm_srai_epi32(r1x4b, in_sh); ++ r1x4b = av_clip_int16_sse(r1x4b); ++ ++ g0x4b = _mm_add_epi32(g0x4b, _mm_mullo_epi32(ux4b, _mm_set1_epi32(cgu))); ++ g0x4b = _mm_add_epi32(g0x4b, _mm_mullo_epi32(vx4b, _mm_set1_epi32(cgv))); ++ g0x4b = _mm_add_epi32(g0x4b, rndx4); ++ g0x4b = _mm_srai_epi32(g0x4b, in_sh); ++ g0x4b = av_clip_int16_sse(g0x4b); ++ ++ g1x4b = _mm_add_epi32(g1x4b, _mm_mullo_epi32(ux4b, _mm_set1_epi32(cgu))); ++ g1x4b = _mm_add_epi32(g1x4b, _mm_mullo_epi32(vx4b, _mm_set1_epi32(cgv))); ++ g1x4b = _mm_add_epi32(g1x4b, rndx4); ++ g1x4b = _mm_srai_epi32(g1x4b, in_sh); ++ g1x4b = av_clip_int16_sse(g1x4b); ++ ++ b0x4b = _mm_add_epi32(b0x4b, _mm_mullo_epi32(ux4b, _mm_set1_epi32(cbu))); ++ b0x4b = _mm_add_epi32(b0x4b, rndx4); ++ b0x4b = _mm_srai_epi32(b0x4b, in_sh); ++ b0x4b = av_clip_int16_sse(b0x4b); ++ ++ b1x4b = _mm_add_epi32(b1x4b, _mm_mullo_epi32(ux4b, _mm_set1_epi32(cbu))); ++ b1x4b = _mm_add_epi32(b1x4b, rndx4); ++ b1x4b = _mm_srai_epi32(b1x4b, in_sh); ++ b1x4b = av_clip_int16_sse(b1x4b); ++ ++ tonemap_int32x4_sse(r0x4a, g0x4a, b0x4a, r, g, b, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x4_sse(r1x4a, g1x4a, b1x4a, r1, g1, b1, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x4_sse(r0x4b, g0x4b, b0x4b, &r[4], &g[4], &b[4], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x4_sse(r1x4b, g1x4b, b1x4b, &r1[4], &g1[4], &b1[4], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ ++ r0ox8 = _mm_lddqu_si128((const __m128i_u *)r); ++ g0ox8 = _mm_lddqu_si128((const __m128i_u *)g); ++ b0ox8 = _mm_lddqu_si128((const __m128i_u *)b); ++ ++ roax4 = _mm_cvtepi16_epi32(r0ox8); ++ goax4 = _mm_cvtepi16_epi32(g0ox8); ++ boax4 = _mm_cvtepi16_epi32(b0ox8); ++ ++ robx4 = _mm_unpackhi_epi16(r0ox8, zero128); ++ gobx4 = _mm_unpackhi_epi16(g0ox8, zero128); ++ bobx4 = _mm_unpackhi_epi16(b0ox8, zero128); ++ ++ yoax4 = _mm_mullo_epi32(roax4, _mm_set1_epi32(cry)); ++ yoax4 = _mm_add_epi32(yoax4, _mm_mullo_epi32(goax4, _mm_set1_epi32(cgy))); ++ yoax4 = _mm_add_epi32(yoax4, _mm_mullo_epi32(boax4, _mm_set1_epi32(cby))); ++ yoax4 = _mm_add_epi32(yoax4, _mm_set1_epi32(out_rnd)); ++ yoax4 = _mm_srai_epi32(yoax4, out_sh); ++ yoax4 = _mm_add_epi32(yoax4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ yobx4 = _mm_mullo_epi32(robx4, _mm_set1_epi32(cry)); ++ yobx4 = _mm_add_epi32(yobx4, _mm_mullo_epi32(gobx4, _mm_set1_epi32(cgy))); ++ yobx4 = _mm_add_epi32(yobx4, _mm_mullo_epi32(bobx4, _mm_set1_epi32(cby))); ++ yobx4 = _mm_add_epi32(yobx4, _mm_set1_epi32(out_rnd)); ++ yobx4 = _mm_srai_epi32(yobx4, out_sh); ++ yobx4 = _mm_add_epi32(yobx4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ y0ox8 = _mm_packus_epi32(yoax4, yobx4); ++ _mm_storeu_si128((__m128i_u *) &dsty[x], y0ox8); ++ ++ r1ox8 = _mm_lddqu_si128((const __m128i_u *)r1); ++ g1ox8 = _mm_lddqu_si128((const __m128i_u *)g1); ++ b1ox8 = _mm_lddqu_si128((const __m128i_u *)b1); ++ ++ r1oax4 = _mm_cvtepi16_epi32(r1ox8); ++ g1oax4 = _mm_cvtepi16_epi32(g1ox8); ++ b1oax4 = _mm_cvtepi16_epi32(b1ox8); ++ ++ r1obx4 = _mm_unpackhi_epi16(r1ox8, zero128); ++ g1obx4 = _mm_unpackhi_epi16(g1ox8, zero128); ++ b1obx4 = _mm_unpackhi_epi16(b1ox8, zero128); ++ ++ y1oax4 = _mm_mullo_epi32(r1oax4, _mm_set1_epi32(cry)); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_mullo_epi32(g1oax4, _mm_set1_epi32(cgy))); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_mullo_epi32(b1oax4, _mm_set1_epi32(cby))); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_set1_epi32(out_rnd)); ++ y1oax4 = _mm_srai_epi32(y1oax4, out_sh); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ y1obx4 = _mm_mullo_epi32(r1obx4, _mm_set1_epi32(cry)); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_mullo_epi32(g1obx4, _mm_set1_epi32(cgy))); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_mullo_epi32(b1obx4, _mm_set1_epi32(cby))); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_set1_epi32(out_rnd)); ++ y1obx4 = _mm_srai_epi32(y1obx4, out_sh); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ y1ox8 = _mm_packus_epi32(y1oax4, y1obx4); ++ _mm_storeu_si128((__m128i_u *) &dsty[x + dstlinesize[0] / 2], y1ox8); ++ ++ ravgx4 = _mm_hadd_epi32(roax4, robx4); ++ ravgx4 = _mm_add_epi32(ravgx4, _mm_hadd_epi32(r1oax4, r1obx4)); ++ ravgx4 = _mm_add_epi32(ravgx4, _mm_set1_epi32(2)); ++ ravgx4 = _mm_srai_epi32(ravgx4, 2); ++ ++ gavgx4 = _mm_hadd_epi32(goax4, gobx4); ++ gavgx4 = _mm_add_epi32(gavgx4, _mm_hadd_epi32(g1oax4, g1obx4)); ++ gavgx4 = _mm_add_epi32(gavgx4, _mm_set1_epi32(2)); ++ gavgx4 = _mm_srai_epi32(gavgx4, 2); ++ ++ bavgx4 = _mm_hadd_epi32(boax4, bobx4); ++ bavgx4 = _mm_add_epi32(bavgx4, _mm_hadd_epi32(b1oax4, b1obx4)); ++ bavgx4 = _mm_add_epi32(bavgx4, _mm_set1_epi32(2)); ++ bavgx4 = _mm_srai_epi32(bavgx4, 2); ++ ++ uox4 = _mm_add_epi32(_mm_set1_epi32(out_rnd), _mm_mullo_epi32(ravgx4, _mm_set1_epi32(cru))); ++ uox4 = _mm_add_epi32(uox4, _mm_mullo_epi32(gavgx4, _mm_set1_epi32(ocgu))); ++ uox4 = _mm_add_epi32(uox4, _mm_mullo_epi32(bavgx4, _mm_set1_epi32(cburv))); ++ uox4 = _mm_srai_epi32(uox4, out_sh); ++ uox4 = _mm_add_epi32(uox4, _mm_set1_epi32(out_uv_offset)); ++ _mm_storeu_si64((__m128i_u *) &dstu[x >> 1], _mm_packus_epi32(uox4, zero128)); ++ ++ vox4 = _mm_add_epi32(_mm_set1_epi32(out_rnd), _mm_mullo_epi32(ravgx4, _mm_set1_epi32(cburv))); ++ vox4 = _mm_add_epi32(vox4, _mm_mullo_epi32(gavgx4, _mm_set1_epi32(ocgv))); ++ vox4 = _mm_add_epi32(vox4, _mm_mullo_epi32(bavgx4, _mm_set1_epi32(cbv))); ++ vox4 = _mm_srai_epi32(vox4, out_sh); ++ vox4 = _mm_add_epi32(vox4, _mm_set1_epi32(out_uv_offset)); ++ _mm_storeu_si64((__m128i_u *) &dstv[x >> 1], _mm_packus_epi32(vox4, zero128)); ++ } ++ } ++ ++ // Process remaining pixels cannot fill the full simd register with scalar version ++ if (remainw) { ++ int offset = width & (int)0xfffffff8; ++ rdsty += offset; ++ rdstu += offset >> 1; ++ rdstv += offset >> 1; ++ rsrcy += offset; ++ rsrcu += offset >> 1; ++ rsrcv += offset >> 1; ++ tonemap_frame_420p10_2_420p10(rdsty, rdstu, rdstv, ++ rsrcy, rsrcu, rsrcv, ++ dstlinesize, srclinesize, ++ dstdepth, srcdepth, ++ remainw, rheight, params); ++ } ++#endif // ENABLE_TONEMAPX_SSE_INTRINSICS ++} ++ ++X86_64_V2 void tonemap_frame_p016_p010_2_nv12_sse(uint8_t *dsty, uint8_t *dstuv, ++ const uint16_t *srcy, const uint16_t *srcuv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++#ifdef ENABLE_TONEMAPX_SSE_INTRINSICS ++ uint8_t *rdsty = dsty; ++ uint8_t *rdstuv = dstuv; ++ const uint16_t *rsrcy = srcy; ++ const uint16_t *rsrcuv = srcuv; ++ int rheight = height; ++ // not zero when not divisible by 8 ++ // intentionally leave last pixel emtpy when input is odd ++ int remainw = width & 6; ++ ++ const int in_depth = srcdepth; ++ const int in_uv_offset = 128 << (in_depth - 8); ++ const int in_sh = in_depth - 1; ++ const int in_rnd = 1 << (in_sh - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ ++ int cy = (*params->yuv2rgb_coeffs)[0][0][0]; ++ int crv = (*params->yuv2rgb_coeffs)[0][2][0]; ++ int cgu = (*params->yuv2rgb_coeffs)[1][1][0]; ++ int cgv = (*params->yuv2rgb_coeffs)[1][2][0]; ++ int cbu = (*params->yuv2rgb_coeffs)[2][1][0]; ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int16_t r[8], g[8], b[8]; ++ int16_t r1[8], g1[8], b1[8]; ++ ++ __m128i in_yuv_offx4 = _mm_set1_epi32(params->in_yuv_off); ++ __m128i in_uv_offx4= _mm_set1_epi32(in_uv_offset); ++ __m128i cyx4 = _mm_set1_epi32(cy); ++ __m128i rndx4 = _mm_set1_epi32(in_rnd); ++ __m128i zero128 = _mm_setzero_si128(); ++ __m128i uvx8, uvx4a, uvx4b; ++ __m128i y0x8, y1x8; ++ __m128i y0x4a, y0x4b, y1x4a, y1x4b, ux4a, ux4b, vx4a, vx4b; ++ __m128i r0x4a, g0x4a, b0x4a, r0x4b, g0x4b, b0x4b; ++ __m128i r1x4a, g1x4a, b1x4a, r1x4b, g1x4b, b1x4b; ++ ++ __m128i r0ox8, g0ox8, b0ox8; ++ __m128i y0ox8; ++ __m128i roax4, robx4, goax4, gobx4, boax4, bobx4; ++ __m128i yoax4, yobx4; ++ ++ __m128i r1ox8, g1ox8, b1ox8; ++ __m128i y1ox8; ++ __m128i r1oax4, r1obx4, g1oax4, g1obx4, b1oax4, b1obx4; ++ __m128i y1oax4, y1obx4, uvoax4, uvobx4; ++ __m128i uoax4, voax4, ravgx4, gavgx4, bavgx4; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0] * 2, dstuv += dstlinesize[1], ++ srcy += srclinesize[0], srcuv += srclinesize[1] / 2) { ++ for (int xx = 0; xx < width >> 3; xx++) { ++ int x = xx << 3; ++ ++ y0x8 = _mm_lddqu_si128((__m128i*)(srcy + x)); ++ y1x8 = _mm_lddqu_si128((__m128i*)(srcy + (srclinesize[0] / 2 + x))); ++ uvx8 = _mm_lddqu_si128((__m128i*)(srcuv + x)); ++ ++ if (in_depth == 10) { ++ // shift to low10bits for 10bit input ++ // shift bit has to be compile-time constant ++ y0x8 = _mm_srli_epi16(y0x8, 6); ++ y1x8 = _mm_srli_epi16(y1x8, 6); ++ uvx8 = _mm_srli_epi16(uvx8, 6); ++ } ++ y0x4a = _mm_cvtepu16_epi32(y0x8); ++ y0x4b = _mm_unpackhi_epi16(y0x8, zero128); ++ y1x4a = _mm_cvtepu16_epi32(y1x8); ++ y1x4b = _mm_unpackhi_epi16(y1x8, zero128); ++ uvx4a = _mm_cvtepu16_epi32(uvx8); ++ uvx4b = _mm_unpackhi_epi16(uvx8, zero128); ++ y0x4a = _mm_sub_epi32(y0x4a, in_yuv_offx4); ++ y1x4a = _mm_sub_epi32(y1x4a, in_yuv_offx4); ++ y0x4b = _mm_sub_epi32(y0x4b, in_yuv_offx4); ++ y1x4b = _mm_sub_epi32(y1x4b, in_yuv_offx4); ++ uvx4a = _mm_sub_epi32(uvx4a, in_uv_offx4); ++ uvx4b = _mm_sub_epi32(uvx4b, in_uv_offx4); ++ ++ ux4a = _mm_shuffle_epi32(uvx4a, _MM_SHUFFLE(2, 2, 0, 0)); ++ ux4b = _mm_shuffle_epi32(uvx4b, _MM_SHUFFLE(2, 2, 0, 0)); ++ vx4a = _mm_shuffle_epi32(uvx4a, _MM_SHUFFLE(3, 3, 1, 1)); ++ vx4b = _mm_shuffle_epi32(uvx4b, _MM_SHUFFLE(3, 3, 1, 1)); ++ ++ // r = av_clip_int16((y * cy + crv * v + in_rnd) >> in_sh); ++ r0x4a = g0x4a = b0x4a = _mm_mullo_epi32(y0x4a, cyx4); ++ r0x4a = _mm_add_epi32(r0x4a, _mm_mullo_epi32(vx4a, _mm_set1_epi32(crv))); ++ r0x4a = _mm_add_epi32(r0x4a, rndx4); ++ r0x4a = _mm_srai_epi32(r0x4a, in_sh); ++ r0x4a = av_clip_int16_sse(r0x4a); ++ ++ r1x4a = g1x4a = b1x4a = _mm_mullo_epi32(y1x4a, cyx4); ++ r1x4a = _mm_add_epi32(r1x4a, _mm_mullo_epi32(vx4a, _mm_set1_epi32(crv))); ++ r1x4a = _mm_add_epi32(r1x4a, rndx4); ++ r1x4a = _mm_srai_epi32(r1x4a, in_sh); ++ r1x4a = av_clip_int16_sse(r1x4a); ++ ++ // g = av_clip_int16((y * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g0x4a = _mm_add_epi32(g0x4a, _mm_mullo_epi32(ux4a, _mm_set1_epi32(cgu))); ++ g0x4a = _mm_add_epi32(g0x4a, _mm_mullo_epi32(vx4a, _mm_set1_epi32(cgv))); ++ g0x4a = _mm_add_epi32(g0x4a, rndx4); ++ g0x4a = _mm_srai_epi32(g0x4a, in_sh); ++ g0x4a = av_clip_int16_sse(g0x4a); ++ ++ g1x4a = _mm_add_epi32(g1x4a, _mm_mullo_epi32(ux4a, _mm_set1_epi32(cgu))); ++ g1x4a = _mm_add_epi32(g1x4a, _mm_mullo_epi32(vx4a, _mm_set1_epi32(cgv))); ++ g1x4a = _mm_add_epi32(g1x4a, rndx4); ++ g1x4a = _mm_srai_epi32(g1x4a, in_sh); ++ g1x4a = av_clip_int16_sse(g1x4a); ++ ++ // b = av_clip_int16((y * cy + cbu * u + in_rnd) >> in_sh); ++ b0x4a = _mm_add_epi32(b0x4a, _mm_mullo_epi32(ux4a, _mm_set1_epi32(cbu))); ++ b0x4a = _mm_add_epi32(b0x4a, rndx4); ++ b0x4a = _mm_srai_epi32(b0x4a, in_sh); ++ b0x4a = av_clip_int16_sse(b0x4a); ++ ++ b1x4a = _mm_add_epi32(b1x4a, _mm_mullo_epi32(ux4a, _mm_set1_epi32(cbu))); ++ b1x4a = _mm_add_epi32(b1x4a, rndx4); ++ b1x4a = _mm_srai_epi32(b1x4a, in_sh); ++ b1x4a = av_clip_int16_sse(b1x4a); ++ ++ r0x4b = g0x4b = b0x4b = _mm_mullo_epi32(y0x4b, cyx4); ++ r0x4b = _mm_add_epi32(r0x4b, _mm_mullo_epi32(vx4b, _mm_set1_epi32(crv))); ++ r0x4b = _mm_add_epi32(r0x4b, rndx4); ++ r0x4b = _mm_srai_epi32(r0x4b, in_sh); ++ r0x4b = av_clip_int16_sse(r0x4b); ++ ++ r1x4b = g1x4b = b1x4b = _mm_mullo_epi32(y1x4b, cyx4); ++ r1x4b = _mm_add_epi32(r1x4b, _mm_mullo_epi32(vx4b, _mm_set1_epi32(crv))); ++ r1x4b = _mm_add_epi32(r1x4b, rndx4); ++ r1x4b = _mm_srai_epi32(r1x4b, in_sh); ++ r1x4b = av_clip_int16_sse(r1x4b); ++ ++ g0x4b = _mm_add_epi32(g0x4b, _mm_mullo_epi32(ux4b, _mm_set1_epi32(cgu))); ++ g0x4b = _mm_add_epi32(g0x4b, _mm_mullo_epi32(vx4b, _mm_set1_epi32(cgv))); ++ g0x4b = _mm_add_epi32(g0x4b, rndx4); ++ g0x4b = _mm_srai_epi32(g0x4b, in_sh); ++ g0x4b = av_clip_int16_sse(g0x4b); ++ ++ g1x4b = _mm_add_epi32(g1x4b, _mm_mullo_epi32(ux4b, _mm_set1_epi32(cgu))); ++ g1x4b = _mm_add_epi32(g1x4b, _mm_mullo_epi32(vx4b, _mm_set1_epi32(cgv))); ++ g1x4b = _mm_add_epi32(g1x4b, rndx4); ++ g1x4b = _mm_srai_epi32(g1x4b, in_sh); ++ g1x4b = av_clip_int16_sse(g1x4b); ++ ++ b0x4b = _mm_add_epi32(b0x4b, _mm_mullo_epi32(ux4b, _mm_set1_epi32(cbu))); ++ b0x4b = _mm_add_epi32(b0x4b, rndx4); ++ b0x4b = _mm_srai_epi32(b0x4b, in_sh); ++ b0x4b = av_clip_int16_sse(b0x4b); ++ ++ b1x4b = _mm_add_epi32(b1x4b, _mm_mullo_epi32(ux4b, _mm_set1_epi32(cbu))); ++ b1x4b = _mm_add_epi32(b1x4b, rndx4); ++ b1x4b = _mm_srai_epi32(b1x4b, in_sh); ++ b1x4b = av_clip_int16_sse(b1x4b); ++ ++ tonemap_int32x4_sse(r0x4a, g0x4a, b0x4a, r, g, b, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x4_sse(r1x4a, g1x4a, b1x4a, r1, g1, b1, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x4_sse(r0x4b, g0x4b, b0x4b, &r[4], &g[4], &b[4], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x4_sse(r1x4b, g1x4b, b1x4b, &r1[4], &g1[4], &b1[4], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ ++ r0ox8 = _mm_lddqu_si128((const __m128i_u *)r); ++ g0ox8 = _mm_lddqu_si128((const __m128i_u *)g); ++ b0ox8 = _mm_lddqu_si128((const __m128i_u *)b); ++ ++ roax4 = _mm_cvtepi16_epi32(r0ox8); ++ goax4 = _mm_cvtepi16_epi32(g0ox8); ++ boax4 = _mm_cvtepi16_epi32(b0ox8); ++ ++ robx4 = _mm_unpackhi_epi16(r0ox8, zero128); ++ gobx4 = _mm_unpackhi_epi16(g0ox8, zero128); ++ bobx4 = _mm_unpackhi_epi16(b0ox8, zero128); ++ ++ yoax4 = _mm_mullo_epi32(roax4, _mm_set1_epi32(cry)); ++ yoax4 = _mm_add_epi32(yoax4, _mm_mullo_epi32(goax4, _mm_set1_epi32(cgy))); ++ yoax4 = _mm_add_epi32(yoax4, _mm_mullo_epi32(boax4, _mm_set1_epi32(cby))); ++ yoax4 = _mm_add_epi32(yoax4, _mm_set1_epi32(out_rnd)); ++ // output shift bits for 8bit outputs is 29 - 8 = 21 ++ yoax4 = _mm_srai_epi32(yoax4, 21); ++ yoax4 = _mm_add_epi32(yoax4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ yobx4 = _mm_mullo_epi32(robx4, _mm_set1_epi32(cry)); ++ yobx4 = _mm_add_epi32(yobx4, _mm_mullo_epi32(gobx4, _mm_set1_epi32(cgy))); ++ yobx4 = _mm_add_epi32(yobx4, _mm_mullo_epi32(bobx4, _mm_set1_epi32(cby))); ++ yobx4 = _mm_add_epi32(yobx4, _mm_set1_epi32(out_rnd)); ++ yobx4 = _mm_srai_epi32(yobx4, 21); ++ yobx4 = _mm_add_epi32(yobx4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ y0ox8 = _mm_packs_epi32(yoax4, yobx4); ++ _mm_storeu_si64(&dsty[x], _mm_packus_epi16(y0ox8, zero128)); ++ ++ r1ox8 = _mm_lddqu_si128((const __m128i_u *)r1); ++ g1ox8 = _mm_lddqu_si128((const __m128i_u *)g1); ++ b1ox8 = _mm_lddqu_si128((const __m128i_u *)b1); ++ ++ r1oax4 = _mm_cvtepi16_epi32(r1ox8); ++ g1oax4 = _mm_cvtepi16_epi32(g1ox8); ++ b1oax4 = _mm_cvtepi16_epi32(b1ox8); ++ ++ r1obx4 = _mm_unpackhi_epi16(r1ox8, zero128); ++ g1obx4 = _mm_unpackhi_epi16(g1ox8, zero128); ++ b1obx4 = _mm_unpackhi_epi16(b1ox8, zero128); ++ ++ y1oax4 = _mm_mullo_epi32(r1oax4, _mm_set1_epi32(cry)); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_mullo_epi32(g1oax4, _mm_set1_epi32(cgy))); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_mullo_epi32(b1oax4, _mm_set1_epi32(cby))); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_set1_epi32(out_rnd)); ++ y1oax4 = _mm_srai_epi32(y1oax4, 21); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ y1obx4 = _mm_mullo_epi32(r1obx4, _mm_set1_epi32(cry)); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_mullo_epi32(g1obx4, _mm_set1_epi32(cgy))); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_mullo_epi32(b1obx4, _mm_set1_epi32(cby))); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_set1_epi32(out_rnd)); ++ y1obx4 = _mm_srai_epi32(y1obx4, 21); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ y1ox8 = _mm_packs_epi32(y1oax4, y1obx4); ++ _mm_storeu_si64(&dsty[x + dstlinesize[0]], _mm_packus_epi16(y1ox8, zero128)); ++ ++ ravgx4 = _mm_hadd_epi32(roax4, robx4); ++ ravgx4 = _mm_add_epi32(ravgx4, _mm_hadd_epi32(r1oax4, r1obx4)); ++ ravgx4 = _mm_add_epi32(ravgx4, _mm_set1_epi32(2)); ++ ravgx4 = _mm_srai_epi32(ravgx4, 2); ++ ++ gavgx4 = _mm_hadd_epi32(goax4, gobx4); ++ gavgx4 = _mm_add_epi32(gavgx4, _mm_hadd_epi32(g1oax4, g1obx4)); ++ gavgx4 = _mm_add_epi32(gavgx4, _mm_set1_epi32(2)); ++ gavgx4 = _mm_srai_epi32(gavgx4, 2); ++ ++ bavgx4 = _mm_hadd_epi32(boax4, bobx4); ++ bavgx4 = _mm_add_epi32(bavgx4, _mm_hadd_epi32(b1oax4, b1obx4)); ++ bavgx4 = _mm_add_epi32(bavgx4, _mm_set1_epi32(2)); ++ bavgx4 = _mm_srai_epi32(bavgx4, 2); ++ ++ uoax4 = _mm_add_epi32(_mm_set1_epi32(out_rnd), _mm_mullo_epi32(ravgx4, _mm_set1_epi32(cru))); ++ uoax4 = _mm_add_epi32(uoax4, _mm_mullo_epi32(gavgx4, _mm_set1_epi32(ocgu))); ++ uoax4 = _mm_add_epi32(uoax4, _mm_mullo_epi32(bavgx4, _mm_set1_epi32(cburv))); ++ uoax4 = _mm_srai_epi32(uoax4, 21); ++ uoax4 = _mm_add_epi32(uoax4, _mm_set1_epi32(out_uv_offset)); ++ ++ voax4 = _mm_add_epi32(_mm_set1_epi32(out_rnd), _mm_mullo_epi32(ravgx4, _mm_set1_epi32(cburv))); ++ voax4 = _mm_add_epi32(voax4, _mm_mullo_epi32(gavgx4, _mm_set1_epi32(ocgv))); ++ voax4 = _mm_add_epi32(voax4, _mm_mullo_epi32(bavgx4, _mm_set1_epi32(cbv))); ++ voax4 = _mm_srai_epi32(voax4, 21); ++ voax4 = _mm_add_epi32(voax4, _mm_set1_epi32(out_uv_offset)); ++ ++ uvoax4 = _mm_unpacklo_epi32(uoax4, voax4); ++ uvobx4 = _mm_unpackhi_epi32(uoax4, voax4); ++ _mm_storeu_si64(&dstuv[x], _mm_packus_epi16(_mm_packs_epi32(uvoax4, uvobx4), zero128)); ++ } ++ } ++ ++ // Process remaining pixels cannot fill the full simd register with scalar version ++ if (remainw) { ++ int offset = width & (int)0xfffffff8; ++ rdsty += offset; ++ rdstuv += offset; ++ rsrcy += offset; ++ rsrcuv += offset; ++ tonemap_frame_p016_p010_2_nv12(rdsty, rdstuv, ++ rsrcy, rsrcuv, ++ dstlinesize, srclinesize, ++ dstdepth, srcdepth, ++ remainw, rheight, params); ++ } ++#endif // ENABLE_TONEMAPX_SSE_INTRINSICS ++} ++ ++X86_64_V2 void tonemap_frame_p016_p010_2_p016_p010_sse(uint16_t *dsty, uint16_t *dstuv, ++ const uint16_t *srcy, const uint16_t *srcuv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params) ++{ ++#ifdef ENABLE_TONEMAPX_SSE_INTRINSICS ++ uint16_t *rdsty = dsty; ++ uint16_t *rdstuv = dstuv; ++ const uint16_t *rsrcy = srcy; ++ const uint16_t *rsrcuv = srcuv; ++ int rheight = height; ++ // not zero when not divisible by 8 ++ // intentionally leave last pixel emtpy when input is odd ++ int remainw = width & 6; ++ ++ const int in_depth = srcdepth; ++ const int in_uv_offset = 128 << (in_depth - 8); ++ const int in_sh = in_depth - 1; ++ const int in_rnd = 1 << (in_sh - 1); ++ ++ const int out_depth = dstdepth; ++ const int out_uv_offset = 128 << (out_depth - 8); ++ const int out_sh = 29 - out_depth; ++ const int out_rnd = 1 << (out_sh - 1); ++ const int out_sh2 = 16 - out_depth; ++ ++ int cy = (*params->yuv2rgb_coeffs)[0][0][0]; ++ int crv = (*params->yuv2rgb_coeffs)[0][2][0]; ++ int cgu = (*params->yuv2rgb_coeffs)[1][1][0]; ++ int cgv = (*params->yuv2rgb_coeffs)[1][2][0]; ++ int cbu = (*params->yuv2rgb_coeffs)[2][1][0]; ++ ++ int cry = (*params->rgb2yuv_coeffs)[0][0][0]; ++ int cgy = (*params->rgb2yuv_coeffs)[0][1][0]; ++ int cby = (*params->rgb2yuv_coeffs)[0][2][0]; ++ int cru = (*params->rgb2yuv_coeffs)[1][0][0]; ++ int ocgu = (*params->rgb2yuv_coeffs)[1][1][0]; ++ int cburv = (*params->rgb2yuv_coeffs)[1][2][0]; ++ int ocgv = (*params->rgb2yuv_coeffs)[2][1][0]; ++ int cbv = (*params->rgb2yuv_coeffs)[2][2][0]; ++ ++ int16_t r[8], g[8], b[8]; ++ int16_t r1[8], g1[8], b1[8]; ++ ++ __m128i in_yuv_offx4 = _mm_set1_epi32(params->in_yuv_off); ++ __m128i in_uv_offx4= _mm_set1_epi32(in_uv_offset); ++ __m128i cyx4 = _mm_set1_epi32(cy); ++ __m128i rndx4 = _mm_set1_epi32(in_rnd); ++ __m128i zero128 = _mm_setzero_si128(); ++ __m128i uvx8, uvx4a, uvx4b; ++ __m128i y0x8, y1x8; ++ __m128i y0x4a, y0x4b, y1x4a, y1x4b, ux4a, ux4b, vx4a, vx4b; ++ __m128i r0x4a, g0x4a, b0x4a, r0x4b, g0x4b, b0x4b; ++ __m128i r1x4a, g1x4a, b1x4a, r1x4b, g1x4b, b1x4b; ++ ++ __m128i r0ox8, g0ox8, b0ox8; ++ __m128i y0ox8; ++ __m128i roax4, robx4, goax4, gobx4, boax4, bobx4; ++ __m128i yoax4, yobx4; ++ ++ __m128i r1ox8, g1ox8, b1ox8; ++ __m128i y1ox8; ++ __m128i r1oax4, r1obx4, g1oax4, g1obx4, b1oax4, b1obx4; ++ __m128i y1oax4, y1obx4, uvoax4, uvobx4; ++ __m128i uoax4, voax4, ravgx4, gavgx4, bavgx4, uvox8; ++ for (; height > 1; height -= 2, ++ dsty += dstlinesize[0], dstuv += dstlinesize[1] / 2, ++ srcy += srclinesize[0], srcuv += srclinesize[1] / 2) { ++ for (int xx = 0; xx < width >> 3; xx++) { ++ int x = xx << 3; ++ ++ y0x8 = _mm_lddqu_si128((__m128i*)(srcy + x)); ++ y1x8 = _mm_lddqu_si128((__m128i*)(srcy + (srclinesize[0] / 2 + x))); ++ uvx8 = _mm_lddqu_si128((__m128i*)(srcuv + x)); ++ ++ if (in_depth == 10) { ++ // shift to low10bits for 10bit input ++ // shift bit has to be compile-time constant ++ y0x8 = _mm_srli_epi16(y0x8, 6); ++ y1x8 = _mm_srli_epi16(y1x8, 6); ++ uvx8 = _mm_srli_epi16(uvx8, 6); ++ } ++ y0x4a = _mm_cvtepu16_epi32(y0x8); ++ y0x4b = _mm_unpackhi_epi16(y0x8, zero128); ++ y1x4a = _mm_cvtepu16_epi32(y1x8); ++ y1x4b = _mm_unpackhi_epi16(y1x8, zero128); ++ uvx4a = _mm_cvtepu16_epi32(uvx8); ++ uvx4b = _mm_unpackhi_epi16(uvx8, zero128); ++ y0x4a = _mm_sub_epi32(y0x4a, in_yuv_offx4); ++ y1x4a = _mm_sub_epi32(y1x4a, in_yuv_offx4); ++ y0x4b = _mm_sub_epi32(y0x4b, in_yuv_offx4); ++ y1x4b = _mm_sub_epi32(y1x4b, in_yuv_offx4); ++ uvx4a = _mm_sub_epi32(uvx4a, in_uv_offx4); ++ uvx4b = _mm_sub_epi32(uvx4b, in_uv_offx4); ++ ++ ux4a = _mm_shuffle_epi32(uvx4a, _MM_SHUFFLE(2, 2, 0, 0)); ++ ux4b = _mm_shuffle_epi32(uvx4b, _MM_SHUFFLE(2, 2, 0, 0)); ++ vx4a = _mm_shuffle_epi32(uvx4a, _MM_SHUFFLE(3, 3, 1, 1)); ++ vx4b = _mm_shuffle_epi32(uvx4b, _MM_SHUFFLE(3, 3, 1, 1)); ++ ++ // r = av_clip_int16((y * cy + crv * v + in_rnd) >> in_sh); ++ r0x4a = g0x4a = b0x4a = _mm_mullo_epi32(y0x4a, cyx4); ++ r0x4a = _mm_add_epi32(r0x4a, _mm_mullo_epi32(vx4a, _mm_set1_epi32(crv))); ++ r0x4a = _mm_add_epi32(r0x4a, rndx4); ++ r0x4a = _mm_srai_epi32(r0x4a, in_sh); ++ r0x4a = av_clip_int16_sse(r0x4a); ++ ++ r1x4a = g1x4a = b1x4a = _mm_mullo_epi32(y1x4a, cyx4); ++ r1x4a = _mm_add_epi32(r1x4a, _mm_mullo_epi32(vx4a, _mm_set1_epi32(crv))); ++ r1x4a = _mm_add_epi32(r1x4a, rndx4); ++ r1x4a = _mm_srai_epi32(r1x4a, in_sh); ++ r1x4a = av_clip_int16_sse(r1x4a); ++ ++ // g = av_clip_int16((y * cy + cgu * u + cgv * v + in_rnd) >> in_sh); ++ g0x4a = _mm_add_epi32(g0x4a, _mm_mullo_epi32(ux4a, _mm_set1_epi32(cgu))); ++ g0x4a = _mm_add_epi32(g0x4a, _mm_mullo_epi32(vx4a, _mm_set1_epi32(cgv))); ++ g0x4a = _mm_add_epi32(g0x4a, rndx4); ++ g0x4a = _mm_srai_epi32(g0x4a, in_sh); ++ g0x4a = av_clip_int16_sse(g0x4a); ++ ++ g1x4a = _mm_add_epi32(g1x4a, _mm_mullo_epi32(ux4a, _mm_set1_epi32(cgu))); ++ g1x4a = _mm_add_epi32(g1x4a, _mm_mullo_epi32(vx4a, _mm_set1_epi32(cgv))); ++ g1x4a = _mm_add_epi32(g1x4a, rndx4); ++ g1x4a = _mm_srai_epi32(g1x4a, in_sh); ++ g1x4a = av_clip_int16_sse(g1x4a); ++ ++ // b = av_clip_int16((y * cy + cbu * u + in_rnd) >> in_sh); ++ b0x4a = _mm_add_epi32(b0x4a, _mm_mullo_epi32(ux4a, _mm_set1_epi32(cbu))); ++ b0x4a = _mm_add_epi32(b0x4a, rndx4); ++ b0x4a = _mm_srai_epi32(b0x4a, in_sh); ++ b0x4a = av_clip_int16_sse(b0x4a); ++ ++ b1x4a = _mm_add_epi32(b1x4a, _mm_mullo_epi32(ux4a, _mm_set1_epi32(cbu))); ++ b1x4a = _mm_add_epi32(b1x4a, rndx4); ++ b1x4a = _mm_srai_epi32(b1x4a, in_sh); ++ b1x4a = av_clip_int16_sse(b1x4a); ++ ++ r0x4b = g0x4b = b0x4b = _mm_mullo_epi32(y0x4b, cyx4); ++ r0x4b = _mm_add_epi32(r0x4b, _mm_mullo_epi32(vx4b, _mm_set1_epi32(crv))); ++ r0x4b = _mm_add_epi32(r0x4b, rndx4); ++ r0x4b = _mm_srai_epi32(r0x4b, in_sh); ++ r0x4b = av_clip_int16_sse(r0x4b); ++ ++ r1x4b = g1x4b = b1x4b = _mm_mullo_epi32(y1x4b, cyx4); ++ r1x4b = _mm_add_epi32(r1x4b, _mm_mullo_epi32(vx4b, _mm_set1_epi32(crv))); ++ r1x4b = _mm_add_epi32(r1x4b, rndx4); ++ r1x4b = _mm_srai_epi32(r1x4b, in_sh); ++ r1x4b = av_clip_int16_sse(r1x4b); ++ ++ g0x4b = _mm_add_epi32(g0x4b, _mm_mullo_epi32(ux4b, _mm_set1_epi32(cgu))); ++ g0x4b = _mm_add_epi32(g0x4b, _mm_mullo_epi32(vx4b, _mm_set1_epi32(cgv))); ++ g0x4b = _mm_add_epi32(g0x4b, rndx4); ++ g0x4b = _mm_srai_epi32(g0x4b, in_sh); ++ g0x4b = av_clip_int16_sse(g0x4b); ++ ++ g1x4b = _mm_add_epi32(g1x4b, _mm_mullo_epi32(ux4b, _mm_set1_epi32(cgu))); ++ g1x4b = _mm_add_epi32(g1x4b, _mm_mullo_epi32(vx4b, _mm_set1_epi32(cgv))); ++ g1x4b = _mm_add_epi32(g1x4b, rndx4); ++ g1x4b = _mm_srai_epi32(g1x4b, in_sh); ++ g1x4b = av_clip_int16_sse(g1x4b); ++ ++ b0x4b = _mm_add_epi32(b0x4b, _mm_mullo_epi32(ux4b, _mm_set1_epi32(cbu))); ++ b0x4b = _mm_add_epi32(b0x4b, rndx4); ++ b0x4b = _mm_srai_epi32(b0x4b, in_sh); ++ b0x4b = av_clip_int16_sse(b0x4b); ++ ++ b1x4b = _mm_add_epi32(b1x4b, _mm_mullo_epi32(ux4b, _mm_set1_epi32(cbu))); ++ b1x4b = _mm_add_epi32(b1x4b, rndx4); ++ b1x4b = _mm_srai_epi32(b1x4b, in_sh); ++ b1x4b = av_clip_int16_sse(b1x4b); ++ ++ tonemap_int32x4_sse(r0x4a, g0x4a, b0x4a, r, g, b, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x4_sse(r1x4a, g1x4a, b1x4a, r1, g1, b1, ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x4_sse(r0x4b, g0x4b, b0x4b, &r[4], &g[4], &b[4], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ tonemap_int32x4_sse(r1x4b, g1x4b, b1x4b, &r1[4], &g1[4], &b1[4], ++ params->lin_lut, params->tonemap_lut, params->delin_lut, ++ params->coeffs, params->ocoeffs, params->desat, params->rgb2rgb_coeffs, ++ params->rgb2rgb_passthrough); ++ ++ r0ox8 = _mm_lddqu_si128((const __m128i_u *)r); ++ g0ox8 = _mm_lddqu_si128((const __m128i_u *)g); ++ b0ox8 = _mm_lddqu_si128((const __m128i_u *)b); ++ ++ roax4 = _mm_cvtepi16_epi32(r0ox8); ++ goax4 = _mm_cvtepi16_epi32(g0ox8); ++ boax4 = _mm_cvtepi16_epi32(b0ox8); ++ ++ robx4 = _mm_unpackhi_epi16(r0ox8, zero128); ++ gobx4 = _mm_unpackhi_epi16(g0ox8, zero128); ++ bobx4 = _mm_unpackhi_epi16(b0ox8, zero128); ++ ++ yoax4 = _mm_mullo_epi32(roax4, _mm_set1_epi32(cry)); ++ yoax4 = _mm_add_epi32(yoax4, _mm_mullo_epi32(goax4, _mm_set1_epi32(cgy))); ++ yoax4 = _mm_add_epi32(yoax4, _mm_mullo_epi32(boax4, _mm_set1_epi32(cby))); ++ yoax4 = _mm_add_epi32(yoax4, _mm_set1_epi32(out_rnd)); ++ yoax4 = _mm_srai_epi32(yoax4, out_sh); ++ yoax4 = _mm_add_epi32(yoax4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ yobx4 = _mm_mullo_epi32(robx4, _mm_set1_epi32(cry)); ++ yobx4 = _mm_add_epi32(yobx4, _mm_mullo_epi32(gobx4, _mm_set1_epi32(cgy))); ++ yobx4 = _mm_add_epi32(yobx4, _mm_mullo_epi32(bobx4, _mm_set1_epi32(cby))); ++ yobx4 = _mm_add_epi32(yobx4, _mm_set1_epi32(out_rnd)); ++ yobx4 = _mm_srai_epi32(yobx4, out_sh); ++ yobx4 = _mm_add_epi32(yobx4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ y0ox8 = _mm_packus_epi32(yoax4, yobx4); ++ y0ox8 = _mm_slli_epi16(y0ox8, out_sh2); ++ _mm_storeu_si128((__m128i_u *) &dsty[x], y0ox8); ++ ++ r1ox8 = _mm_lddqu_si128((const __m128i_u *)r1); ++ g1ox8 = _mm_lddqu_si128((const __m128i_u *)g1); ++ b1ox8 = _mm_lddqu_si128((const __m128i_u *)b1); ++ ++ r1oax4 = _mm_cvtepi16_epi32(r1ox8); ++ g1oax4 = _mm_cvtepi16_epi32(g1ox8); ++ b1oax4 = _mm_cvtepi16_epi32(b1ox8); ++ ++ r1obx4 = _mm_unpackhi_epi16(r1ox8, zero128); ++ g1obx4 = _mm_unpackhi_epi16(g1ox8, zero128); ++ b1obx4 = _mm_unpackhi_epi16(b1ox8, zero128); ++ ++ y1oax4 = _mm_mullo_epi32(r1oax4, _mm_set1_epi32(cry)); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_mullo_epi32(g1oax4, _mm_set1_epi32(cgy))); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_mullo_epi32(b1oax4, _mm_set1_epi32(cby))); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_set1_epi32(out_rnd)); ++ y1oax4 = _mm_srai_epi32(y1oax4, out_sh); ++ y1oax4 = _mm_add_epi32(y1oax4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ y1obx4 = _mm_mullo_epi32(r1obx4, _mm_set1_epi32(cry)); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_mullo_epi32(g1obx4, _mm_set1_epi32(cgy))); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_mullo_epi32(b1obx4, _mm_set1_epi32(cby))); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_set1_epi32(out_rnd)); ++ y1obx4 = _mm_srai_epi32(y1obx4, out_sh); ++ y1obx4 = _mm_add_epi32(y1obx4, _mm_set1_epi32(params->out_yuv_off)); ++ ++ y1ox8 = _mm_packus_epi32(y1oax4, y1obx4); ++ y1ox8 = _mm_slli_epi16(y1ox8, out_sh2); ++ _mm_storeu_si128((__m128i_u *) &dsty[x + dstlinesize[0] / 2], y1ox8); ++ ++ ravgx4 = _mm_hadd_epi32(roax4, robx4); ++ ravgx4 = _mm_add_epi32(ravgx4, _mm_hadd_epi32(r1oax4, r1obx4)); ++ ravgx4 = _mm_add_epi32(ravgx4, _mm_set1_epi32(2)); ++ ravgx4 = _mm_srai_epi32(ravgx4, 2); ++ ++ gavgx4 = _mm_hadd_epi32(goax4, gobx4); ++ gavgx4 = _mm_add_epi32(gavgx4, _mm_hadd_epi32(g1oax4, g1obx4)); ++ gavgx4 = _mm_add_epi32(gavgx4, _mm_set1_epi32(2)); ++ gavgx4 = _mm_srai_epi32(gavgx4, 2); ++ ++ bavgx4 = _mm_hadd_epi32(boax4, bobx4); ++ bavgx4 = _mm_add_epi32(bavgx4, _mm_hadd_epi32(b1oax4, b1obx4)); ++ bavgx4 = _mm_add_epi32(bavgx4, _mm_set1_epi32(2)); ++ bavgx4 = _mm_srai_epi32(bavgx4, 2); ++ ++ uoax4 = _mm_add_epi32(_mm_set1_epi32(out_rnd), _mm_mullo_epi32(ravgx4, _mm_set1_epi32(cru))); ++ uoax4 = _mm_add_epi32(uoax4, _mm_mullo_epi32(gavgx4, _mm_set1_epi32(ocgu))); ++ uoax4 = _mm_add_epi32(uoax4, _mm_mullo_epi32(bavgx4, _mm_set1_epi32(cburv))); ++ uoax4 = _mm_srai_epi32(uoax4, out_sh); ++ uoax4 = _mm_add_epi32(uoax4, _mm_set1_epi32(out_uv_offset)); ++ ++ voax4 = _mm_add_epi32(_mm_set1_epi32(out_rnd), _mm_mullo_epi32(ravgx4, _mm_set1_epi32(cburv))); ++ voax4 = _mm_add_epi32(voax4, _mm_mullo_epi32(gavgx4, _mm_set1_epi32(ocgv))); ++ voax4 = _mm_add_epi32(voax4, _mm_mullo_epi32(bavgx4, _mm_set1_epi32(cbv))); ++ voax4 = _mm_srai_epi32(voax4, out_sh); ++ voax4 = _mm_add_epi32(voax4, _mm_set1_epi32(out_uv_offset)); ++ ++ uvoax4 = _mm_unpacklo_epi32(uoax4, voax4); ++ uvobx4 = _mm_unpackhi_epi32(uoax4, voax4); ++ uvox8 = _mm_packus_epi32(uvoax4, uvobx4); ++ uvox8 = _mm_slli_epi16(uvox8, out_sh2); ++ _mm_storeu_si128((__m128i_u *) &dstuv[x], uvox8); ++ } ++ } ++ ++ // Process remaining pixels cannot fill the full simd register with scalar version ++ if (remainw) { ++ int offset = width & (int)0xfffffff8; ++ rdsty += offset; ++ rdstuv += offset; ++ rsrcy += offset; ++ rsrcuv += offset; ++ tonemap_frame_p016_p010_2_p016_p010(rdsty, rdstuv, ++ rsrcy, rsrcuv, ++ dstlinesize, srclinesize, ++ dstdepth, srcdepth, ++ remainw, rheight, params); ++ } ++#endif // ENABLE_TONEMAPX_SSE_INTRINSICS ++} +Index: FFmpeg/libavfilter/x86/vf_tonemapx_intrin_sse.h +=================================================================== +--- /dev/null ++++ libavfilter/x86/vf_tonemapx_intrin_sse.h +@@ -0,0 +1,68 @@ ++/* ++ * Copyright (c) 2024 Gnattu OC ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVFILTER_X86_TONEMAPX_INTRIN_SSE_H ++#define AVFILTER_X86_TONEMAPX_INTRIN_SSE_H ++ ++#include "libavfilter/vf_tonemapx.h" ++ ++X86_64_V2 void tonemap_frame_dovi_2_420p_sse(uint8_t *dsty, uint8_t *dstu, uint8_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++X86_64_V2 void tonemap_frame_dovi_2_420p10_sse(uint16_t *dsty, uint16_t *dstu, uint16_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++X86_64_V2 void tonemap_frame_420p10_2_420p_sse(uint8_t *dsty, uint8_t *dstu, uint8_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++X86_64_V2 void tonemap_frame_420p10_2_420p10_sse(uint16_t *dsty, uint16_t *dstu, uint16_t *dstv, ++ const uint16_t *srcy, const uint16_t *srcu, const uint16_t *srcv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++X86_64_V2 void tonemap_frame_p016_p010_2_nv12_sse(uint8_t *dsty, uint8_t *dstuv, ++ const uint16_t *srcy, const uint16_t *srcuv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++X86_64_V2 void tonemap_frame_p016_p010_2_p016_p010_sse(uint16_t *dsty, uint16_t *dstuv, ++ const uint16_t *srcy, const uint16_t *srcuv, ++ const int *dstlinesize, const int *srclinesize, ++ int dstdepth, int srcdepth, ++ int width, int height, ++ const struct TonemapIntParams *params); ++ ++#endif // AVFILTER_X86_TONEMAPX_INTRIN_SSE_H diff --git a/cross/ffmpeg7/patches/1061-jellyfin-0061-add-ac4-decoder-for-atsc-3-0.patch b/cross/ffmpeg7/patches/1061-jellyfin-0061-add-ac4-decoder-for-atsc-3-0.patch new file mode 100644 index 00000000000..9efa3ec2771 --- /dev/null +++ b/cross/ffmpeg7/patches/1061-jellyfin-0061-add-ac4-decoder-for-atsc-3-0.patch @@ -0,0 +1,7661 @@ +Index: FFmpeg/libavcodec/Makefile +=================================================================== +--- libavcodec/Makefile ++++ libavcodec/Makefile +@@ -203,6 +203,7 @@ OBJS-$(CONFIG_AC3_ENCODER) + + ac3.o kbdwin.o + OBJS-$(CONFIG_AC3_FIXED_ENCODER) += ac3enc_fixed.o ac3enc.o ac3tab.o ac3.o kbdwin.o + OBJS-$(CONFIG_AC3_MF_ENCODER) += mfenc.o mf_utils.o ++OBJS-$(CONFIG_AC4_DECODER) += ac4dec.o kbdwin.o + OBJS-$(CONFIG_ACELP_KELVIN_DECODER) += g729dec.o lsp.o celp_math.o celp_filters.o acelp_filters.o acelp_pitch_delay.o acelp_vectors.o g729postfilter.o + OBJS-$(CONFIG_AGM_DECODER) += agm.o jpegquanttables.o + OBJS-$(CONFIG_AIC_DECODER) += aic.o +Index: FFmpeg/libavcodec/ac4dec.c +=================================================================== +--- /dev/null ++++ libavcodec/ac4dec.c +@@ -0,0 +1,5924 @@ ++/* ++ * AC-4 Audio Decoder ++ * ++ * Copyright (c) 2019 Paul B Mahol ++ * Copyright (c) 2024 Gnattu OC ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#define ASSERT_LEVEL 5 ++#include "libavutil/avassert.h" ++#include "libavutil/tx.h" ++#include "libavutil/channel_layout.h" ++#include "libavutil/float_dsp.h" ++#include "libavutil/mem_internal.h" ++#include "libavutil/qsort.h" ++#include "libavutil/opt.h" ++ ++#include "ac4dec_data.h" ++#include "avcodec.h" ++#include "codec_internal.h" ++#include "decode.h" ++#include "get_bits.h" ++#include "kbdwin.h" ++#include "unary.h" ++ ++/* Number of model bits */ ++#define SSF_MODEL_BITS 15 ++ ++/* Model unit for the CDF specification */ ++#define SSF_MODEL_UNIT (1U<<(SSF_MODEL_BITS)) ++ ++/* Number of range bits */ ++#define SSF_RANGE_BITS 30 ++ ++/* Half of the range unit */ ++#define SSF_THRESHOLD_LARGE (1U<<((SSF_RANGE_BITS)-1)) ++ ++/* Quarter of the range unit */ ++#define SSF_THRESHOLD_SMALL (1U<<((SSF_RANGE_BITS)-2)) ++ ++/* Offset bits */ ++#define SSF_OFFSET_BITS 14 ++ ++typedef struct ACState { ++ uint32_t ui_low; ++ uint32_t ui_range; ++ uint32_t ui_offset; ++ uint32_t ui_offset2; ++ ++ uint32_t ui_threshold_small; ++ uint32_t ui_threshold_large; ++ uint32_t ui_model_unit; ++ ++ uint32_t ui_range_bits; ++ uint32_t ui_model_bits; ++} ACState; ++ ++typedef struct EMDFInfo { ++ int version; ++ int key_id; ++ int substream_index; ++} EMDFInfo; ++ ++typedef struct SubstreamChannelParameters { ++ uint8_t long_frame; ++ uint8_t transf_length_idx[2]; ++ int transf_length[2]; ++ ++ uint8_t different_framing; ++ uint8_t max_sfb_side[2]; ++ uint8_t max_sfb[2]; ++ uint8_t scale_factor_grouping[15]; ++ ++ uint8_t num_windows; ++ uint8_t num_window_groups; ++ uint8_t window_to_group[16]; ++ uint8_t num_win_in_group[16]; ++ ++ uint8_t dual_maxsfb; ++ uint8_t side_limited; ++ uint8_t side_channel; ++} SubstreamChannelParameters; ++ ++typedef struct SubstreamChannel { ++ SubstreamChannelParameters scp; ++ ++ int master_reset; ++ int num_sbg_master; ++ int num_sb_aspx; ++ int num_sbg_noise; ++ int num_sbg_sig_highres; ++ int num_sbg_sig_lowres; ++ int sba; ++ int sbx; ++ int sbz; ++ ++ int sap_mode; ++ ++ int N_prev; ++ ++ uint8_t ms_used[16][128]; ++ uint8_t sap_coeff_used[16][128]; ++ int dpcm_alpha_q[16][128]; ++ ++ int delta_code_time; ++ ++ int num_sec_lsf[16]; ++ int num_sec[16]; ++ uint8_t sfb_cb[16][128]; ++ uint8_t sect_cb[16][128]; ++ int sect_start[16][128]; ++ int sect_end[16][128]; ++ int sect_sfb_offset[16][128]; ++ ++ int16_t quant_spec[2048]; ++ float scaled_spec[2048]; ++ float spec_reord[2048]; ++ int16_t offset2sfb[2048]; ++ uint8_t offset2g[2048]; ++ int win_offset[16]; ++ DECLARE_ALIGNED(32, float, overlap)[4096]; ++ ++ int max_quant_idx[16][128]; ++ int dpcm_sf[16][128]; ++ int dpcm_snf[16][128]; ++ int snf_data_exists; ++ ++ float sf_gain[16][128]; ++ ++ int aspx_int_class; ++ int aspx_num_noise; ++ int aspx_num_noise_prev; ++ int aspx_num_rel_left; ++ int aspx_num_rel_right; ++ int aspx_num_env; ++ int aspx_num_env_prev; ++ int aspx_freq_res[5]; ++ int aspx_var_bord_left; ++ int aspx_var_bord_right; ++ int aspx_rel_bord_left[4]; ++ int aspx_rel_bord_right[4]; ++ int aspx_tsg_ptr; ++ int aspx_tsg_ptr_prev; ++ ++ int aspx_qmode_env; ++ int aspx_sig_delta_dir[8]; ++ int aspx_noise_delta_dir[2]; ++ int aspx_tna_mode[16]; ++ int aspx_tna_mode_prev[16]; ++ int aspx_add_harmonic[16]; ++ int aspx_fic_used_in_sfb[16]; ++ int aspx_tic_used_in_slot[16]; ++ int aspx_xover_subband_offset; ++ int aspx_balance; ++ ++ uint8_t atsg_freqres[5]; ++ uint8_t atsg_freqres_prev[5]; ++ int atsg_sig[6]; ++ int atsg_noise[3]; ++ int previous_stop_pos; ++ ++ int sbg_noise[6]; ++ int sbg_sig_lowres[24]; ++ int sbg_sig_highres[24]; ++ int sbg_lim[32]; ++ int sbg_patches[6]; ++ int sbg_patch_num_sb[6]; ++ int sbg_patch_start_sb[6]; ++ int sbg_master[24]; ++ ++ int num_sbg_sig[5]; ++ int sbg_sig[5][24]; ++ int num_sbg_patches; ++ int num_sbg_lim; ++ ++ int aspx_data[2][5][64]; ++ ++ int qscf_prev[5][64]; ++ int qscf_noise_prev[2][64]; ++ int qscf_sig_sbg[5][64]; ++ int qscf_sig_sbg_prev[5][64]; ++ int qscf_noise_sbg[2][64]; ++ float scf_noise_sbg[2][64]; ++ float scf_sig_sbg[5][64]; ++ float scf_sig_sb[5][64]; ++ float scf_noise_sb[5][64]; ++ ++ float gain_vec[32]; ++ float chirp_arr[6]; ++ float chirp_arr_prev[6]; ++ float est_sig_sb[5][64]; ++ float sine_idx_sb[5][64]; ++ float sine_idx_sb_prev[5][64]; ++ float sine_area_sb[5][64]; ++ float sine_lev_sb[5][64]; ++ float noise_lev_sb[5][64]; ++ float sig_gain_sb[5][64]; ++ float max_sig_gain_sbg[5][64]; ++ float max_sig_gain_sb[5][64]; ++ float noise_lev_sb_lim[5][64]; ++ float sig_gain_sb_lim[5][64]; ++ float boost_fact_sbg[5][64]; ++ float boost_fact_sb[5][64]; ++ float sig_gain_sb_adj[5][64]; ++ float noise_lev_sb_adj[5][64]; ++ float sine_lev_sb_adj[5][64]; ++ ++ int8_t sine_idx_prev[42][64]; ++ int16_t noise_idx_prev[42][64]; ++ ++ int acpl_interpolation_type; ++ int acpl_num_param_sets_cod; ++ int acpl_param_timeslot[2]; ++ int acpl_data[11][16]; ++ ++ int start_block, end_block; ++ int stride_flag; ++ int num_bands; ++ int predictor_presence[4]; ++ int predictor_lag_delta[4]; ++ int predictor_lag[4]; ++ int variance_preserving[4]; ++ int alloc_offset[4]; ++ int delta[4]; ++ int gain_bits[4]; ++ int env_idx[19]; ++ ACState acs; ++ ++ DECLARE_ALIGNED(32, float, pcm)[2048]; ++ ++ DECLARE_ALIGNED(32, float, qmf_filt)[640]; ++ DECLARE_ALIGNED(32, float, qsyn_filt)[1280]; ++ DECLARE_ALIGNED(32, float, Q)[2][42][64]; ++ DECLARE_ALIGNED(32, float, Q_prev)[2][42][64]; ++ DECLARE_ALIGNED(32, float, Q_low)[2][42][64]; ++ DECLARE_ALIGNED(32, float, Q_low_prev)[2][42][64]; ++ DECLARE_ALIGNED(32, float, Q_low_ext)[2][42][64]; ++ DECLARE_ALIGNED(32, float, Q_high)[2][42][64]; ++ DECLARE_ALIGNED(32, float, cov)[64][3][3][2]; ++ DECLARE_ALIGNED(32, float, alpha0)[64][2]; ++ DECLARE_ALIGNED(32, float, alpha1)[64][2]; ++ DECLARE_ALIGNED(32, float, Y)[2][42][64]; ++ DECLARE_ALIGNED(32, float, Y_prev)[2][42][64]; ++ DECLARE_ALIGNED(32, float, qmf_sine)[2][42][64]; ++ DECLARE_ALIGNED(32, float, qmf_noise)[2][42][64]; ++} SubstreamChannel; ++ ++typedef struct Substream { ++ int codec_mode; ++ ++ int aspx_quant_mode_env; ++ int aspx_start_freq; ++ int prev_aspx_start_freq; ++ int aspx_stop_freq; ++ int prev_aspx_stop_freq; ++ int aspx_master_freq_scale; ++ int prev_aspx_master_freq_scale; ++ int aspx_interpolation; ++ int aspx_preflat; ++ int aspx_limiter; ++ int aspx_noise_sbg; ++ int aspx_num_env_bits_fixfix; ++ int aspx_freq_res_mode; ++ ++ int acpl_qmf_band; ++ int acpl_param_band; ++ int acpl_num_param_bands_id; ++ int acpl_quant_mode[2]; ++ ++ uint8_t mode_2ch; ++ uint8_t chel_matsel; ++ ++ uint8_t compand_on[5]; ++ int compand_avg; ++ ++ int max_sfb_master; ++ ++ uint8_t coding_config; ++ uint8_t mdct_stereo_proc[2]; ++ float matrix_stereo[16][128][2][2]; ++ float alpha_q[16][128]; ++ ++ int spec_frontend_l; ++ int spec_frontend_r; ++ int spec_frontend_m; ++ int spec_frontend_s; ++ ++ SubstreamChannel ssch[9]; ++} Substream; ++ ++typedef struct PresentationSubstreamInfo { ++ int alternative; ++ int pres_ndot; ++ int substream_index; ++} PresentationSubstreamInfo; ++ ++typedef struct Metadata { ++ int dialnorm_bits; ++ int pre_dmixtyp_2ch; ++ int phase90_info_2ch; ++ int loro_center_mixgain; ++ int loro_surround_mixgain; ++ int loro_dmx_loud_corr; ++ int ltrt_center_mixgain; ++ int ltrt_surround_mixgain; ++ int ltrt_dmx_loud_corr; ++ int lfe_mixgain; ++ int preferred_dmx_method; ++ int pre_dmixtyp_5ch; ++ int pre_upmixtyp_5ch; ++ int pre_upmixtyp_3_4; ++ int pre_upmixtyp_3_2_2; ++ int phase90_info_mc; ++ int surround_attenuation_known; ++ int lfe_attenuation_known; ++ int dc_block_on; ++ ++ int loudness_version; ++ int loud_prac_type; ++ int dialgate_prac_type; ++ int loudcorr_type; ++ int loudrelgat; ++ int loudspchgat; ++ int loudstrm3s; ++ int max_loudstrm3s; ++ int truepk; ++ int max_truepk; ++ int prgmbndy; ++ int end_or_start; ++ int prgmbndy_offset; ++ int lra; ++ int lra_prac_type; ++ int loudmntry; ++ int max_loudmntry; ++ ++ int drc_decoder_nr_modes; ++ int drc_eac3_profile; ++} Metadata; ++ ++typedef struct SubstreamInfo { ++ int sus_ver; ++ int channel_mode; ++ int substream_index; ++ int hsf_ext_substream_index; ++ int sf_multiplier; ++ int bitrate_indicator; ++ int add_ch_base; ++ int iframe[4]; ++ int back_channels_present; ++ int centre_present; ++ int top_channels_present; ++ Metadata meta; ++} SubstreamInfo; ++ ++typedef struct SubstreamGroupInfo { ++ int channel_coded; ++ int group_index; ++ SubstreamInfo ssinfo; ++} SubstreamGroupInfo; ++ ++typedef struct PresentationInfo { ++ int single_substream; ++ int enable_presentation; ++ int presentation_config; ++ int presentation_version; ++ int add_emdf_substreams; ++ int n_add_emdf_substreams; ++ int n_substream_groups; ++ int mdcompat; ++ int presentation_id; ++ int multiplier; ++ int multiplier_bit; ++ int pre_virtualized; ++ int frame_rate_factor; ++ int frame_rate_fraction; ++ int multi_pid; ++ int hsf_ext; ++ EMDFInfo emdf[32]; ++ PresentationSubstreamInfo psinfo; ++ SubstreamInfo ssinfo; ++} PresentationInfo; ++ ++typedef struct AC4DecodeContext { ++ AVClass *class; ///< class for AVOptions ++ AVCodecContext *avctx; ///< parent context ++ AVFloatDSPContext *fdsp; ++ GetBitContext gbc; ///< bitstream reader ++ ++ int target_presentation; ++ ++ int version; ++ int sequence_counter; ++ int sequence_counter_prev; ++ int wait_frames; ++ int nb_wait_frames; ++ int fs_index; ++ int frame_rate_index; ++ int frame_len_base; ++ int frame_len_base_idx; ++ AVRational resampling_ratio; ++ int num_qmf_timeslots; ++ int num_aspx_timeslots; ++ int num_ts_in_ats; ++ int ts_offset_hfgen; ++ int transform_length; ++ int iframe_global; ++ int first_frame; ++ int have_iframe; ++ int nb_presentations; ++ int payload_base; ++ int short_program_id; ++ int nb_substreams; ++ int total_groups; ++ int substream_size[32]; ++ int substream_type[32]; ++ ++ DECLARE_ALIGNED(32, float, winl)[2048]; ++ DECLARE_ALIGNED(32, float, winr)[2048]; ++ ++ av_tx_fn tx_fn[8][5]; ++ AVTXContext *tx_ctx[8][5]; ++ ++ DECLARE_ALIGNED(32, float, kbd_window)[8][5][2048]; ++ ++ float quant_lut[8192]; ++ ++ DECLARE_ALIGNED(32, float, cos_atab)[64][128]; ++ DECLARE_ALIGNED(32, float, sin_atab)[64][128]; ++ DECLARE_ALIGNED(32, float, cos_stab)[128][64]; ++ DECLARE_ALIGNED(32, float, sin_stab)[128][64]; ++ ++ PresentationInfo pinfo[8]; ++ SubstreamGroupInfo ssgroup[8]; ++ Substream substream; ++} AC4DecodeContext; ++ ++enum StrideFlag { ++ LONG_STRIDE, ++ SHORT_STRIDE, ++}; ++ ++enum ACPLMode { ++ ACPL_FULL, ++ ACPL_PARTIAL, ++}; ++ ++enum SubstreamType { ++ ST_SUBSTREAM, ++ ST_PRESENTATION, ++}; ++ ++enum StereoMode { ++ SM_LEVEL, ++ SM_BALANCE, ++}; ++ ++enum DataType { ++ DT_SIGNAL, ++ DT_NOISE, ++}; ++ ++enum SpectralFrontend { ++ SF_ASF, ++ SF_SSF, ++}; ++ ++enum HCBType { ++ F0, ++ DF, ++ DT, ++}; ++ ++enum CodecMode { ++ CM_SIMPLE, ++ CM_ASPX, ++ CM_ASPX_ACPL_1, ++ CM_ASPX_ACPL_2, ++ CM_ASPX_ACPL_3, ++}; ++ ++enum IntervalClass { ++ FIXFIX, ++ FIXVAR, ++ VARFIX, ++ VARVAR, ++}; ++ ++enum ACPLDataType { ++ ALPHA1, ++ ALPHA2, ++ BETA1, ++ BETA2, ++ BETA3, ++ GAMMA1, ++ GAMMA2, ++ GAMMA3, ++ GAMMA4, ++ GAMMA5, ++ GAMMA6, ++}; ++ ++static const AVRational resampling_ratios[] = { ++ {25025, 24000}, ++ {25, 24}, ++ {15, 16}, ++ {25025, 24000}, ++ {25, 24}, ++ {25025, 24000}, ++ {25, 24}, ++ {15, 16}, ++ {25025, 24000}, ++ {25, 24}, ++ {15, 16}, ++ {25025, 24000}, ++ {25, 24}, ++ {1, 1}, ++ {1, 1}, ++ {1, 1}, ++}; ++ ++static const uint8_t channel_mode_nb_channels[] = { ++ 1, 2, 3, 5, 6, 7, 8, 7, 8, 7, 8, 11, 12, 13, 14, 24, 0 ++}; ++ ++static const AVChannelLayout ff_ac4_ch_layouts[] = { ++ AV_CHANNEL_LAYOUT_MONO, ++ AV_CHANNEL_LAYOUT_STEREO, ++ AV_CHANNEL_LAYOUT_SURROUND, ++ AV_CHANNEL_LAYOUT_5POINT0, ++ AV_CHANNEL_LAYOUT_5POINT1, ++ AV_CHANNEL_LAYOUT_7POINT0, ++ AV_CHANNEL_LAYOUT_7POINT1, ++ AV_CHANNEL_LAYOUT_7POINT0_FRONT, ++ { ++ .nb_channels = 7, ++ .order = AV_CHANNEL_ORDER_NATIVE, ++ .u.mask = AV_CH_LAYOUT_7POINT0 | AV_CH_LOW_FREQUENCY, ++ }, ++ { 0 }, ++ { 0 }, ++ { 0 }, ++ { 0 }, ++ { 0 }, ++ { 0 }, ++ { 0 }, ++ { 0 }, ++ { 0 }, ++}; ++ ++#define VLC_INIT_CUSTOM_SPARSE_STATIC(vlc, bits, a, b, c, d, e, f, g, \ ++ h, i, j, flags, static_size) \ ++ do { \ ++ static VLCElem table[static_size]; \ ++ (vlc)->table = table; \ ++ (vlc)->table_allocated = static_size; \ ++ ff_vlc_init_sparse(vlc, bits, a, b, c, d, e, f, g, h, i, j, \ ++ flags | VLC_INIT_USE_STATIC); \ ++ } while (0) ++ ++#define VLC_INIT_SPARSE_STATIC(vlc, bits, a, b, c, d, e, f, g, h, i, j, static_size) \ ++ VLC_INIT_CUSTOM_SPARSE_STATIC(vlc, bits, a, b, c, d, e, f, g, \ ++ h, i, j, 0, static_size) ++ ++#define VLC_INIT_STATIC(vlc, bits, a, b, c, d, e, f, g, static_size) \ ++ VLC_INIT_SPARSE_STATIC(vlc, bits, a, b, c, d, e, f, g, NULL, 0, 0, static_size) ++ ++static VLC channel_mode_vlc; ++static VLC bitrate_indicator_vlc; ++static VLC scale_factors_vlc; ++static VLC snf_vlc; ++static VLC asf_codebook_vlc[11]; ++static VLC acpl_codebook_vlc[4][2][3]; ++static VLC aspx_int_class_vlc; ++static VLC aspx_codebook_signal_vlc[2][2][3]; ++static VLC aspx_codebook_noise_vlc[2][3]; ++ ++static av_cold int ac4_decode_init(AVCodecContext *avctx) ++{ ++ AC4DecodeContext *s = avctx->priv_data; ++ int ret; ++ ++ //feenableexcept(FE_INVALID | FE_OVERFLOW); ++ ++ s->avctx = avctx; ++ s->first_frame = 1; ++ ++ avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; ++ ++ VLC_INIT_STATIC(&channel_mode_vlc, 9, sizeof(channel_mode_bits), ++ channel_mode_bits, 1, 1, channel_mode_codes, 2, 2, 512); ++ VLC_INIT_STATIC(&bitrate_indicator_vlc, 5, sizeof(bitrate_indicator_bits), ++ bitrate_indicator_bits, 1, 1, bitrate_indicator_codes, 1, 1, 32); ++ VLC_INIT_STATIC(&scale_factors_vlc, 9, sizeof(scale_factors_bits), ++ scale_factors_bits, 1, 1, scale_factors_codes, 1, 1, 850); ++ VLC_INIT_STATIC(&snf_vlc, 6, sizeof(snf_bits), ++ snf_bits, 1, 1, snf_codes, 1, 1, 70); ++ ++ VLC_INIT_STATIC(&asf_codebook_vlc[0], 9, sizeof(asf_codebook_1_bits), ++ asf_codebook_1_bits, 1, 1, asf_codebook_1_codes, 1, 1, 542); ++ VLC_INIT_STATIC(&asf_codebook_vlc[1], 9, sizeof(asf_codebook_2_bits), ++ asf_codebook_2_bits, 1, 1, asf_codebook_2_codes, 1, 1, 512); ++ VLC_INIT_STATIC(&asf_codebook_vlc[2], 9, sizeof(asf_codebook_3_bits), ++ asf_codebook_3_bits, 1, 1, asf_codebook_3_codes, 1, 1, 612); ++ VLC_INIT_STATIC(&asf_codebook_vlc[3], 9, sizeof(asf_codebook_4_bits), ++ asf_codebook_4_bits, 1, 1, asf_codebook_4_codes, 1, 1, 544); ++ VLC_INIT_STATIC(&asf_codebook_vlc[4], 9, sizeof(asf_codebook_5_bits), ++ asf_codebook_5_bits, 1, 1, asf_codebook_5_codes, 1, 1, 576); ++ VLC_INIT_STATIC(&asf_codebook_vlc[5], 9, sizeof(asf_codebook_6_bits), ++ asf_codebook_6_bits, 1, 1, asf_codebook_6_codes, 1, 1, 546); ++ VLC_INIT_STATIC(&asf_codebook_vlc[6], 9, sizeof(asf_codebook_7_bits), ++ asf_codebook_7_bits, 1, 1, asf_codebook_7_codes, 1, 1, 542); ++ VLC_INIT_STATIC(&asf_codebook_vlc[7], 9, sizeof(asf_codebook_8_bits), ++ asf_codebook_8_bits, 1, 1, asf_codebook_8_codes, 1, 1, 522); ++ VLC_INIT_STATIC(&asf_codebook_vlc[8], 9, sizeof(asf_codebook_9_bits), ++ asf_codebook_9_bits, 1, 1, asf_codebook_9_codes, 1, 1, 670); ++ VLC_INIT_STATIC(&asf_codebook_vlc[9], 9, sizeof(asf_codebook_10_bits), ++ asf_codebook_10_bits, 1, 1, asf_codebook_10_codes, 1, 1, 604); ++ VLC_INIT_STATIC(&asf_codebook_vlc[10], 9, sizeof(asf_codebook_11_bits), ++ asf_codebook_11_bits, 1, 1, asf_codebook_11_codes, 1, 1, 674); ++ ++ VLC_INIT_STATIC(&aspx_int_class_vlc, 5, sizeof(aspx_int_class_bits), ++ aspx_int_class_bits, 1, 1, aspx_int_class_codes, 1, 1, 32); ++ ++ VLC_INIT_STATIC(&aspx_codebook_signal_vlc[0][0][0], 9, sizeof(aspx_hcb_env_level_15_f0_bits), ++ aspx_hcb_env_level_15_f0_bits, 1, 1, aspx_hcb_env_level_15_f0_codes, 4, 4, 1024); ++ VLC_INIT_STATIC(&aspx_codebook_signal_vlc[0][0][1], 9, sizeof(aspx_hcb_env_level_15_df_bits), ++ aspx_hcb_env_level_15_df_bits, 1, 1, aspx_hcb_env_level_15_df_codes, 4, 4, 1888); ++ VLC_INIT_STATIC(&aspx_codebook_signal_vlc[0][0][2], 9, sizeof(aspx_hcb_env_level_15_dt_bits), ++ aspx_hcb_env_level_15_dt_bits, 1, 1, aspx_hcb_env_level_15_dt_codes, 4, 4, 1368); ++ ++ VLC_INIT_STATIC(&aspx_codebook_signal_vlc[0][1][0], 9, sizeof(aspx_hcb_env_level_30_f0_bits), ++ aspx_hcb_env_level_30_f0_bits, 1, 1, aspx_hcb_env_level_30_f0_codes, 4, 4, 772); ++ VLC_INIT_STATIC(&aspx_codebook_signal_vlc[0][1][1], 9, sizeof(aspx_hcb_env_level_30_df_bits), ++ aspx_hcb_env_level_30_df_bits, 1, 1, aspx_hcb_env_level_30_df_codes, 4, 4, 1624); ++ VLC_INIT_STATIC(&aspx_codebook_signal_vlc[0][1][2], 9, sizeof(aspx_hcb_env_level_30_dt_bits), ++ aspx_hcb_env_level_30_dt_bits, 1, 1, aspx_hcb_env_level_30_dt_codes, 4, 4, 1598); ++ ++ VLC_INIT_STATIC(&aspx_codebook_signal_vlc[1][0][0], 9, sizeof(aspx_hcb_env_balance_15_f0_bits), ++ aspx_hcb_env_balance_15_f0_bits, 1, 1, aspx_hcb_env_balance_15_f0_codes, 4, 4, 644); ++ VLC_INIT_STATIC(&aspx_codebook_signal_vlc[1][0][1], 9, sizeof(aspx_hcb_env_balance_15_df_bits), ++ aspx_hcb_env_balance_15_df_bits, 1, 1, aspx_hcb_env_balance_15_df_codes, 4, 4, 1056); ++ VLC_INIT_STATIC(&aspx_codebook_signal_vlc[1][0][2], 9, sizeof(aspx_hcb_env_balance_15_dt_bits), ++ aspx_hcb_env_balance_15_dt_bits, 1, 1, aspx_hcb_env_balance_15_dt_codes, 4, 4, 616); ++ ++ VLC_INIT_STATIC(&aspx_codebook_signal_vlc[1][1][0], 9, sizeof(aspx_hcb_env_balance_30_f0_bits), ++ aspx_hcb_env_balance_30_f0_bits, 1, 1, aspx_hcb_env_balance_30_f0_codes, 2, 2, 520); ++ VLC_INIT_STATIC(&aspx_codebook_signal_vlc[1][1][1], 9, sizeof(aspx_hcb_env_balance_30_df_bits), ++ aspx_hcb_env_balance_30_df_bits, 1, 1, aspx_hcb_env_balance_30_df_codes, 4, 4, 768); ++ VLC_INIT_STATIC(&aspx_codebook_signal_vlc[1][1][2], 9, sizeof(aspx_hcb_env_balance_30_dt_bits), ++ aspx_hcb_env_balance_30_dt_bits, 1, 1, aspx_hcb_env_balance_30_dt_codes, 2, 2, 576); ++ ++ VLC_INIT_STATIC(&aspx_codebook_noise_vlc[0][0], 9, sizeof(aspx_hcb_noise_level_f0_bits), ++ aspx_hcb_noise_level_f0_bits, 1, 1, aspx_hcb_noise_level_f0_codes, 2, 2, 672); ++ VLC_INIT_STATIC(&aspx_codebook_noise_vlc[0][1], 9, sizeof(aspx_hcb_noise_level_df_bits), ++ aspx_hcb_noise_level_df_bits, 1, 1, aspx_hcb_noise_level_df_codes, 4, 4, 1024); ++ VLC_INIT_STATIC(&aspx_codebook_noise_vlc[0][2], 9, sizeof(aspx_hcb_noise_level_dt_bits), ++ aspx_hcb_noise_level_dt_bits, 1, 1, aspx_hcb_noise_level_dt_codes, 2, 2, 768); ++ ++ VLC_INIT_STATIC(&aspx_codebook_noise_vlc[1][0], 9, sizeof(aspx_hcb_noise_balance_f0_bits), ++ aspx_hcb_noise_balance_f0_bits, 1, 1, aspx_hcb_noise_balance_f0_codes, 2, 2, 516); ++ VLC_INIT_STATIC(&aspx_codebook_noise_vlc[1][1], 9, sizeof(aspx_hcb_noise_balance_df_bits), ++ aspx_hcb_noise_balance_df_bits, 1, 1, aspx_hcb_noise_balance_df_codes, 2, 2, 536); ++ VLC_INIT_STATIC(&aspx_codebook_noise_vlc[1][2], 9, sizeof(aspx_hcb_noise_balance_dt_bits), ++ aspx_hcb_noise_balance_dt_bits, 1, 1, aspx_hcb_noise_balance_dt_codes, 2, 2, 530); ++ ++ VLC_INIT_STATIC(&acpl_codebook_vlc[0][1][0], 9, sizeof(acpl_hcb_alpha_coarse_f0_bits), ++ acpl_hcb_alpha_coarse_f0_bits, 1, 1, acpl_hcb_alpha_coarse_f0_codes, 2, 2, 516); ++ VLC_INIT_STATIC(&acpl_codebook_vlc[0][1][1], 9, sizeof(acpl_hcb_alpha_coarse_df_bits), ++ acpl_hcb_alpha_coarse_df_bits, 1, 1, acpl_hcb_alpha_coarse_df_codes, 4, 4, 1032); ++ VLC_INIT_STATIC(&acpl_codebook_vlc[0][1][2], 9, sizeof(acpl_hcb_alpha_coarse_dt_bits), ++ acpl_hcb_alpha_coarse_dt_bits, 1, 1, acpl_hcb_alpha_coarse_dt_codes, 4, 4, 642); ++ ++ VLC_INIT_STATIC(&acpl_codebook_vlc[0][0][0], 9, sizeof(acpl_hcb_alpha_fine_f0_bits), ++ acpl_hcb_alpha_fine_f0_bits, 1, 1, acpl_hcb_alpha_fine_f0_codes, 2, 2, 530); ++ VLC_INIT_STATIC(&acpl_codebook_vlc[0][0][1], 9, sizeof(acpl_hcb_alpha_fine_df_bits), ++ acpl_hcb_alpha_fine_df_bits, 1, 1, acpl_hcb_alpha_fine_df_codes, 4, 4, 1176); ++ VLC_INIT_STATIC(&acpl_codebook_vlc[0][0][2], 9, sizeof(acpl_hcb_alpha_fine_dt_bits), ++ acpl_hcb_alpha_fine_dt_bits, 1, 1, acpl_hcb_alpha_fine_dt_codes, 4, 4, 1158); ++ ++ VLC_INIT_STATIC(&acpl_codebook_vlc[1][1][0], 9, sizeof(acpl_hcb_beta_coarse_f0_bits), ++ acpl_hcb_beta_coarse_f0_bits, 1, 1, acpl_hcb_beta_coarse_f0_codes, 1, 1, 512); ++ VLC_INIT_STATIC(&acpl_codebook_vlc[1][1][1], 9, sizeof(acpl_hcb_beta_coarse_df_bits), ++ acpl_hcb_beta_coarse_df_bits, 1, 1, acpl_hcb_beta_coarse_df_codes, 1, 1, 512); ++ VLC_INIT_STATIC(&acpl_codebook_vlc[1][1][2], 9, sizeof(acpl_hcb_beta_coarse_dt_bits), ++ acpl_hcb_beta_coarse_dt_bits, 1, 1, acpl_hcb_beta_coarse_dt_codes, 1, 1, 512); ++ ++ VLC_INIT_STATIC(&acpl_codebook_vlc[1][0][0], 9, sizeof(acpl_hcb_beta_fine_f0_bits), ++ acpl_hcb_beta_fine_f0_bits, 1, 1, acpl_hcb_beta_fine_f0_codes, 1, 1, 512); ++ VLC_INIT_STATIC(&acpl_codebook_vlc[1][0][1], 9, sizeof(acpl_hcb_beta_fine_df_bits), ++ acpl_hcb_beta_fine_df_bits, 1, 1, acpl_hcb_beta_fine_df_codes, 4, 4, 528); ++ VLC_INIT_STATIC(&acpl_codebook_vlc[1][0][2], 9, sizeof(acpl_hcb_beta_fine_dt_bits), ++ acpl_hcb_beta_fine_dt_bits, 1, 1, acpl_hcb_beta_fine_dt_codes, 4, 4, 576); ++ ++ VLC_INIT_STATIC(&acpl_codebook_vlc[2][1][0], 9, sizeof(acpl_hcb_beta3_coarse_f0_bits), ++ acpl_hcb_beta3_coarse_f0_bits, 1, 1, acpl_hcb_beta3_coarse_f0_codes, 1, 1, 512); ++ VLC_INIT_STATIC(&acpl_codebook_vlc[2][1][1], 9, sizeof(acpl_hcb_beta3_coarse_df_bits), ++ acpl_hcb_beta3_coarse_df_bits, 1, 1, acpl_hcb_beta3_coarse_df_codes, 4, 4, 528); ++ VLC_INIT_STATIC(&acpl_codebook_vlc[2][1][2], 9, sizeof(acpl_hcb_beta3_coarse_dt_bits), ++ acpl_hcb_beta3_coarse_dt_bits, 1, 1, acpl_hcb_beta3_coarse_dt_codes, 2, 2, 576); ++ ++ VLC_INIT_STATIC(&acpl_codebook_vlc[2][0][0], 9, sizeof(acpl_hcb_beta3_fine_f0_bits), ++ acpl_hcb_beta3_fine_f0_bits, 1, 1, acpl_hcb_beta3_fine_f0_codes, 1, 1, 512); ++ VLC_INIT_STATIC(&acpl_codebook_vlc[2][0][1], 9, sizeof(acpl_hcb_beta3_fine_df_bits), ++ acpl_hcb_beta3_fine_df_bits, 1, 1, acpl_hcb_beta3_fine_df_codes, 4, 4, 580); ++ VLC_INIT_STATIC(&acpl_codebook_vlc[2][0][2], 9, sizeof(acpl_hcb_beta3_fine_dt_bits), ++ acpl_hcb_beta3_fine_dt_bits, 1, 1, acpl_hcb_beta3_fine_dt_codes, 4, 4, 768); ++ ++ VLC_INIT_STATIC(&acpl_codebook_vlc[3][1][0], 9, sizeof(acpl_hcb_gamma_coarse_f0_bits), ++ acpl_hcb_gamma_coarse_f0_bits, 1, 1, acpl_hcb_gamma_coarse_f0_codes, 2, 2, 528); ++ VLC_INIT_STATIC(&acpl_codebook_vlc[3][1][1], 9, sizeof(acpl_hcb_gamma_coarse_df_bits), ++ acpl_hcb_gamma_coarse_df_bits, 1, 1, acpl_hcb_gamma_coarse_df_codes, 4, 4, 644); ++ VLC_INIT_STATIC(&acpl_codebook_vlc[3][1][2], 9, sizeof(acpl_hcb_gamma_coarse_dt_bits), ++ acpl_hcb_gamma_coarse_dt_bits, 1, 1, acpl_hcb_gamma_coarse_dt_codes, 4, 4, 896); ++ ++ VLC_INIT_STATIC(&acpl_codebook_vlc[3][0][0], 9, sizeof(acpl_hcb_gamma_fine_f0_bits), ++ acpl_hcb_gamma_fine_f0_bits, 1, 1, acpl_hcb_gamma_fine_f0_codes, 4, 4, 544); ++ VLC_INIT_STATIC(&acpl_codebook_vlc[3][0][1], 9, sizeof(acpl_hcb_gamma_fine_df_bits), ++ acpl_hcb_gamma_fine_df_bits, 1, 1, acpl_hcb_gamma_fine_df_codes, 4, 4, 1026); ++ VLC_INIT_STATIC(&acpl_codebook_vlc[3][0][2], 9, sizeof(acpl_hcb_gamma_fine_dt_bits), ++ acpl_hcb_gamma_fine_dt_bits, 1, 1, acpl_hcb_gamma_fine_dt_codes, 4, 4, 1792); ++ ++ for (int j = 0; j < 8; j++) { ++ const uint16_t *transf_lengths = transf_length_48khz[j]; ++ ++ for (int i = 0; i < 5; i++) { ++ int N_w = transf_lengths[i]; ++ float alpha = kbd_window_alpha[j][i]; ++ float scale = 1.f / (float)N_w; ++ ++ if ((ret = av_tx_init(&s->tx_ctx[j][i], &s->tx_fn[j][i], AV_TX_FLOAT_MDCT, 1, N_w, &scale, 0))) ++ return ret; ++ ++ ff_kbd_window_init(s->kbd_window[j][i], alpha, N_w); ++ } ++ } ++ ++ for (int i = 0; i < 8192; i++) ++ s->quant_lut[i] = powf((float)i, 4.f / 3.f); ++ ++ for (int i = 0; i < 64; i++) { ++ for (int n = 0; n < 128; n++) { ++ s->cos_atab[i][n] = cosf(M_PI/128*(i+0.5)*(2*n-1)); ++ s->sin_atab[i][n] = sinf(M_PI/128*(i+0.5)*(2*n-1)); ++ s->cos_stab[n][i] = cosf(M_PI/128*(i+0.5)*(2*n-255)) / 64.f; ++ s->sin_stab[n][i] = sinf(M_PI/128*(i+0.5)*(2*n-255)) / 64.f; ++ } ++ } ++ ++ s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT); ++ if (!s->fdsp) ++ return AVERROR(ENOMEM); ++ ++ return 0; ++} ++ ++static int variable_bits(GetBitContext *gb, int bits) ++{ ++ int value = 0; ++ int read_more; ++ ++ do { ++ value += (int)get_bits(gb, bits); ++ read_more = (int)get_bits1(gb); ++ if (read_more) { ++ value <<= bits; ++ value += 1 << bits; ++ } ++ } while (read_more); ++ ++ return value; ++} ++ ++static int check_sequence(AC4DecodeContext *s) ++{ ++ if (s->sequence_counter > 1020) { ++ av_log(s->avctx, AV_LOG_ERROR, "invalid sequence counter: %d\n", s->sequence_counter); ++ return AVERROR_INVALIDDATA; ++ } ++ ++ if (s->sequence_counter == s->sequence_counter_prev + 1) ++ return 0; ++ ++ if (s->sequence_counter != 0 && s->sequence_counter_prev == 0) ++ return 0; ++ ++ if (s->sequence_counter == 1 && s->sequence_counter_prev == 1020) ++ return 0; ++ ++ if (s->sequence_counter == 0 && s->sequence_counter_prev == 0) ++ return 0; ++ ++ av_log(s->avctx, AV_LOG_ERROR, "unexpected sequence counter: %d vs %d\n", s->sequence_counter, s->sequence_counter_prev); ++ return AVERROR_INVALIDDATA; ++} ++ ++static int frame_rate_multiply_info(AC4DecodeContext *s, PresentationInfo *p) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ p->multiplier_bit = 0; ++ ++ switch (s->frame_rate_index) { ++ case 2: ++ case 3: ++ case 4: ++ p->multiplier = (int)get_bits1(gb); ++ if (p->multiplier) ++ p->multiplier_bit = (int)get_bits1(gb); ++ p->frame_rate_factor = p->multiplier ? (p->multiplier_bit ? 4 : 2) : 1; ++ break; ++ case 0: ++ case 1: ++ case 7: ++ case 8: ++ case 9: ++ p->multiplier = (int)get_bits1(gb); ++ p->frame_rate_factor = p->multiplier ? 2 : 1; ++ break; ++ default: ++ p->frame_rate_factor = 1; ++ break; ++ } ++ ++ return 0; ++} ++ ++static int emdf_payloads_substream_info(AC4DecodeContext *s, EMDFInfo *e) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ e->substream_index = (int)get_bits(gb, 2); ++ if (e->substream_index == 3) ++ e->substream_index += variable_bits(gb, 2); ++ ++ return 0; ++} ++ ++static int emdf_protection(AC4DecodeContext *s, EMDFInfo *e) ++{ ++ GetBitContext *gb = &s->gbc; ++ int first, second; ++ ++ first = (int)get_bits(gb, 2); ++ second = (int)get_bits(gb, 2); ++ ++ switch (first) { ++ case 0: ++ break; ++ case 1: ++ skip_bits(gb, 8); ++ break; ++ case 2: ++ skip_bits_long(gb, 32); ++ break; ++ case 3: ++ skip_bits_long(gb, 128); ++ break; ++ } ++ ++ switch (second) { ++ case 0: ++ break; ++ case 1: ++ skip_bits(gb, 8); ++ break; ++ case 2: ++ skip_bits_long(gb, 32); ++ break; ++ case 3: ++ skip_bits_long(gb, 128); ++ break; ++ } ++ ++ return 0; ++} ++ ++static int emdf_info(AC4DecodeContext *s, EMDFInfo *e) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ e->version = (int)get_bits(gb, 2); ++ if (e->version == 3) ++ e->version += variable_bits(gb, 2); ++ e->key_id = (int)get_bits(gb, 3); ++ if (e->key_id == 7) ++ e->key_id += variable_bits(gb, 3); ++ ++ if (get_bits1(gb)) ++ emdf_payloads_substream_info(s, e); ++ ++ emdf_protection(s, e); ++ ++ return 0; ++} ++ ++static int content_type(AC4DecodeContext *s, PresentationInfo *p) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ skip_bits(gb, 3); ++ if (get_bits1(gb)) { ++ if (get_bits1(gb)) { ++ skip_bits(gb, 1); ++ skip_bits(gb, 16); ++ } else { ++ int language_tag_bytes = (int)get_bits(gb, 6); ++ ++ skip_bits_long(gb, 8 * language_tag_bytes); ++ } ++ } ++ ++ return 0; ++} ++ ++static int ac4_hsf_ext_substream_info(AC4DecodeContext *s, SubstreamInfo *ssi, ++ int substream_present) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ if (substream_present) { ++ ssi->hsf_ext_substream_index = (int)get_bits(gb, 2); ++ if (ssi->hsf_ext_substream_index == 3) ++ ssi->hsf_ext_substream_index += variable_bits(gb, 2); ++ } ++ ++ return 0; ++} ++ ++static int ac4_substream_info(AC4DecodeContext *s, PresentationInfo *p, ++ SubstreamInfo *ssi) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ ssi->sus_ver = 0; ++ ssi->channel_mode = get_vlc2(gb, channel_mode_vlc.table, channel_mode_vlc.bits, 1); ++ if (ssi->channel_mode < 0) { ++ av_log(s->avctx, AV_LOG_ERROR, "invalid channel mode: %d\n", ssi->channel_mode); ++ return AVERROR_INVALIDDATA; ++ } ++ ++ if (ssi->channel_mode == 16) ++ ssi->channel_mode += variable_bits(gb, 2); ++ ++ if (s->fs_index == 1 && (int)get_bits1(gb)) ++ ssi->sf_multiplier = 1 + (int)get_bits1(gb); ++ av_log(s->avctx, AV_LOG_DEBUG, "sf_multiplier: %d\n", ssi->sf_multiplier); ++ ++ if (get_bits1(gb)) ++ ssi->bitrate_indicator = get_vlc2(gb, bitrate_indicator_vlc.table, bitrate_indicator_vlc.bits, 1); ++ ++ if (ssi->channel_mode == 7 || ++ ssi->channel_mode == 8 || ++ ssi->channel_mode == 9 || ++ ssi->channel_mode == 10) { ++ ssi->add_ch_base = (int)get_bits1(gb); ++ } ++ ++ if (get_bits1(gb)) ++ content_type(s, p); ++ ++ for (int i = 0; i < p->frame_rate_factor; i++) ++ ssi->iframe[i] = (int)get_bits1(gb); ++ ++ ssi->substream_index = (int)get_bits(gb, 2); ++ if (ssi->substream_index == 3) ++ ssi->substream_index += variable_bits(gb, 2); ++ s->substream_type[ssi->substream_index] = ST_SUBSTREAM; ++ av_log(s->avctx, AV_LOG_DEBUG, "substream index: %d\n", ssi->substream_index); ++ ++ return 0; ++} ++ ++static int presentation_config_ext_info(AC4DecodeContext *s) ++{ ++ GetBitContext *gb = &s->gbc; ++ int n_skip_bytes; ++ ++ n_skip_bytes = (int)get_bits(gb, 5); ++ if (get_bits1(gb)) ++ n_skip_bytes += variable_bits(gb, 2) << 5; ++ ++ skip_bits_long(gb, 8 * n_skip_bytes); ++ ++ return 0; ++} ++ ++static int ac4_presentation_info(AC4DecodeContext *s, PresentationInfo *p) ++{ ++ GetBitContext *gb = &s->gbc; ++ int ret; ++ ++ p->single_substream = (int)get_bits1(gb); ++ if (p->single_substream != 1) { ++ p->presentation_config = (int)get_bits(gb, 3); ++ if (p->presentation_config == 0x7) { ++ p->presentation_config += variable_bits(gb, 2); ++ } ++ } ++ ++ p->presentation_version = get_unary(gb, 0, 31); ++ ++ p->add_emdf_substreams = 0; ++ if (p->single_substream != 1 && p->presentation_config == 6) { ++ p->add_emdf_substreams = 1; ++ } else { ++ p->mdcompat = (int)get_bits(gb, 3); ++ ++ if (get_bits1(gb)) ++ p->presentation_id = variable_bits(gb, 2); ++ ++ frame_rate_multiply_info(s, p); ++ emdf_info(s, &p->emdf[0]); ++ ++ if (p->single_substream == 1) { ++ ret = ac4_substream_info(s, p, &p->ssinfo); ++ if (ret < 0) ++ return ret; ++ } else { ++ p->hsf_ext = (int)get_bits1(gb); ++ switch (p->presentation_config) { ++ case 0: ++ ret = ac4_substream_info(s, p, &p->ssinfo); ++ if (ret < 0) ++ return ret; ++ ret = ac4_hsf_ext_substream_info(s, &p->ssinfo, 1); ++ if (ret < 0) ++ return ret; ++ ret = ac4_substream_info(s, p, &p->ssinfo); ++ if (ret < 0) ++ return ret; ++ break; ++ default: ++ presentation_config_ext_info(s); ++ } ++ } ++ ++ p->pre_virtualized = (int)get_bits1(gb); ++ p->add_emdf_substreams = (int)get_bits1(gb); ++ } ++ ++ if (p->add_emdf_substreams) { ++ p->n_add_emdf_substreams = (int)get_bits(gb, 2); ++ if (p->n_add_emdf_substreams == 0) ++ p->n_add_emdf_substreams = variable_bits(gb, 2) + 4; ++ ++ for (int i = 0; i < p->n_add_emdf_substreams; i++) ++ emdf_info(s, &p->emdf[i]); ++ } ++ ++ return 0; ++} ++ ++static int substream_index_table(AC4DecodeContext *s) ++{ ++ GetBitContext *gb = &s->gbc; ++ int size_present; ++ ++ s->nb_substreams = (int)get_bits(gb, 2); ++ if (s->nb_substreams == 0) ++ s->nb_substreams = variable_bits(gb, 2) + 4; ++ ++ av_log(s->avctx, AV_LOG_DEBUG, "nb_substreams: %d\n", s->nb_substreams); ++ ++ if (s->nb_substreams == 1) { ++ size_present = (int)get_bits1(gb); ++ } else { ++ size_present = 1; ++ } ++ ++ if (size_present) { ++ for (int i = 0; i < s->nb_substreams; i++) { ++ int more_bits = (int)get_bits1(gb); ++ ++ s->substream_size[i] = (int)get_bits(gb, 10); ++ if (more_bits) ++ s->substream_size[i] += variable_bits(gb, 2) << 10; ++ av_log(s->avctx, AV_LOG_DEBUG, "substream[%d] size: %d\n", i, s->substream_size[i]); ++ } ++ } ++ ++ return 0; ++} ++ ++static int presentation_substream_info(AC4DecodeContext *s, PresentationSubstreamInfo *psi) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ psi->alternative = (int)get_bits1(gb); ++ psi->pres_ndot = (int)get_bits1(gb); ++ psi->substream_index = (int)get_bits(gb, 2); ++ if (psi->substream_index == 3) ++ psi->substream_index += variable_bits(gb, 2); ++ s->substream_type[psi->substream_index] = ST_PRESENTATION; ++ av_log(s->avctx, AV_LOG_DEBUG, "presentation substream index: %d\n", psi->substream_index); ++ ++ return 0; ++} ++ ++static int frame_rate_fractions_info(AC4DecodeContext *s, PresentationInfo *p) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ p->frame_rate_fraction = 1; ++ if (s->frame_rate_index >= 5 && s->frame_rate_index <= 9) { ++ if (p->frame_rate_factor == 1) { ++ if (get_bits1(gb)) ++ p->frame_rate_fraction = 2; ++ } ++ } ++ ++ if (s->frame_rate_index >= 10 && s->frame_rate_index <= 12) { ++ if (get_bits1(gb)) { ++ if (get_bits1(gb)) ++ p->frame_rate_fraction = 4; ++ else ++ p->frame_rate_fraction = 2; ++ } ++ } ++ ++ return 0; ++} ++ ++static int oamd_substream_info(AC4DecodeContext *s, SubstreamGroupInfo *ssi, ++ int substreams_present) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ skip_bits1(gb); ++ if (substreams_present) { ++ int substream_index = (int)get_bits(gb, 2); ++ if (substream_index == 3) ++ substream_index += variable_bits(gb, 2); ++ } ++ ++ return 0; ++} ++ ++static int ac4_substream_info_chan(AC4DecodeContext *s, SubstreamGroupInfo *g, ++ int substreams_present, ++ int sus_ver) ++{ ++ GetBitContext *gb = &s->gbc; ++ SubstreamInfo *ssi = &g->ssinfo; ++ ++ ssi->sus_ver = sus_ver; ++ ssi->channel_mode = get_vlc2(gb, channel_mode_vlc.table, channel_mode_vlc.bits, 3); ++ if (ssi->channel_mode < 0) { ++ av_log(s->avctx, AV_LOG_ERROR, "invalid chan channel mode: %d\n", ssi->channel_mode); ++ return AVERROR_INVALIDDATA; ++ } ++ ++ if (ssi->channel_mode == 16) ++ ssi->channel_mode += variable_bits(gb, 2); ++ ++ if (ssi->channel_mode == 11 || ++ ssi->channel_mode == 12 || ++ ssi->channel_mode == 13 || ++ ssi->channel_mode == 14) { ++ ssi->back_channels_present = (int)get_bits1(gb); ++ ssi->centre_present = (int)get_bits1(gb); ++ ssi->top_channels_present = (int)get_bits(gb, 2); ++ } ++ ++ if (s->fs_index && get_bits1(gb)) ++ ssi->sf_multiplier = 1 + (int)get_bits1(gb); ++ av_log(s->avctx, AV_LOG_DEBUG, "sf_multiplier: %d\n", ssi->sf_multiplier); ++ ++ if (get_bits1(gb)) ++ ssi->bitrate_indicator = get_vlc2(gb, bitrate_indicator_vlc.table, bitrate_indicator_vlc.bits, 1); ++ ++ if (ssi->channel_mode == 7 || ++ ssi->channel_mode == 8 || ++ ssi->channel_mode == 9 || ++ ssi->channel_mode == 10) ++ ssi->add_ch_base = (int)get_bits1(gb); ++ ++ for (int i = 0; i < s->pinfo[0].frame_rate_factor; i++) ++ ssi->iframe[i] = (int)get_bits1(gb); ++ ++ if (substreams_present) { ++ ssi->substream_index = (int)get_bits(gb, 2); ++ if (ssi->substream_index == 3) ++ ssi->substream_index += variable_bits(gb, 2); ++ av_log(s->avctx, AV_LOG_DEBUG, "substream index: %d\n", ssi->substream_index); ++ } ++ ++ return 0; ++} ++ ++static int ac4_substream_group_info(AC4DecodeContext *s, ++ SubstreamGroupInfo *g) ++{ ++ GetBitContext *gb = &s->gbc; ++ int substreams_present; ++ int n_lf_substreams; ++ int hsf_ext; ++ int sus_ver; ++ int ret; ++ ++ substreams_present = (int)get_bits1(gb); ++ hsf_ext = (int)get_bits1(gb); ++ if (get_bits1(gb)) { ++ n_lf_substreams = 1; ++ } else { ++ n_lf_substreams = (int)get_bits(gb, 2) + 2; ++ if (n_lf_substreams == 5) ++ n_lf_substreams += variable_bits(gb, 2); ++ } ++ g->channel_coded = (int)get_bits1(gb); ++ if (g->channel_coded) { ++ for (int sus = 0; sus < n_lf_substreams; sus++) { ++ if (s->version == 1) { ++ sus_ver = (int)get_bits1(gb); ++ } else { ++ sus_ver = 1; ++ } ++ ++ ret = ac4_substream_info_chan(s, g, substreams_present, sus_ver); ++ if (ret < 0) ++ return ret; ++ if (hsf_ext) ++ ac4_hsf_ext_substream_info(s, &g->ssinfo, substreams_present); ++ } ++ } else { ++ if (get_bits1(gb)) ++ oamd_substream_info(s, g, substreams_present); ++ av_assert0(0); ++ /*for (int sus = 0; sus < n_lf_substreams; sus++) { ++ if (get_bits1(gb)) { ++ ac4_substream_info_ajoc(substreams_present); ++ if (hsf_ext) ++ ac4_hsf_ext_substream_info(substreams_present); ++ } else { ++ ac4_substream_info_obj(substreams_present); ++ if (hsf_ext) ++ ac4_hsf_ext_substream_info(substreams_present); ++ } ++ }*/ ++ } ++ ++ if (get_bits1(gb)) ++ content_type(s, NULL); ++ ++ return 0; ++} ++ ++static int ac4_sgi_specifier(AC4DecodeContext *s, SubstreamGroupInfo *g) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ if (s->version == 1) { ++ av_assert0(0); ++ //ac4_substream_group_info(s); ++ } else { ++ g->group_index = (int)get_bits(gb, 3); ++ if (g->group_index == 7) ++ g->group_index += variable_bits(gb, 2); ++ } ++ ++ s->total_groups = FFMAX(s->total_groups, g->group_index); ++ ++ return 0; ++} ++ ++static int ac4_presentation_v1_info(AC4DecodeContext *s, PresentationInfo *p) ++{ ++ GetBitContext *gb = &s->gbc; ++ int single_substream_group; ++ ++ single_substream_group = (int)get_bits1(gb); ++ if (single_substream_group != 1) { ++ p->presentation_config = (int)get_bits(gb, 3); ++ if (p->presentation_config == 7) ++ p->presentation_config += variable_bits(gb, 2); ++ } ++ if (s->version != 1) ++ p->presentation_version = get_unary(gb, 0, 31); ++ ++ if (single_substream_group != 1 && p->presentation_config == 6) { ++ p->add_emdf_substreams = 1; ++ } else { ++ if (s->version != 1) ++ p->mdcompat = (int)get_bits(gb, 3); ++ ++ if (get_bits1(gb)) ++ p->presentation_id = variable_bits(gb, 2); ++ ++ frame_rate_multiply_info(s, p); ++ frame_rate_fractions_info(s, p); ++ emdf_info(s, &p->emdf[0]); ++ ++ if (get_bits1(gb)) ++ p->enable_presentation = (int)get_bits1(gb); ++ ++ if (single_substream_group == 1) { ++ ac4_sgi_specifier(s, &s->ssgroup[0]); ++ p->n_substream_groups = 1; ++ } else { ++ p->multi_pid = (int)get_bits1(gb); ++ switch (p->presentation_config) { ++ case 0: ++ /* Music and Effects + Dialogue */ ++ ac4_sgi_specifier(s, &s->ssgroup[0]); ++ ac4_sgi_specifier(s, &s->ssgroup[1]); ++ p->n_substream_groups = 2; ++ break; ++ case 1: ++ /* Main + DE */ ++ ac4_sgi_specifier(s, &s->ssgroup[0]); ++ ac4_sgi_specifier(s, &s->ssgroup[1]); ++ p->n_substream_groups = 1; ++ break; ++ case 2: ++ /* Main + Associated Audio */ ++ ac4_sgi_specifier(s, &s->ssgroup[0]); ++ ac4_sgi_specifier(s, &s->ssgroup[1]); ++ p->n_substream_groups = 2; ++ break; ++ case 3: ++ /* Music and Effects + Dialogue + Associated Audio */ ++ ac4_sgi_specifier(s, &s->ssgroup[0]); ++ ac4_sgi_specifier(s, &s->ssgroup[1]); ++ ac4_sgi_specifier(s, &s->ssgroup[2]); ++ p->n_substream_groups = 3; ++ break; ++ case 4: ++ /* Main + DE + Associated Audio */ ++ ac4_sgi_specifier(s, &s->ssgroup[0]); ++ ac4_sgi_specifier(s, &s->ssgroup[1]); ++ ac4_sgi_specifier(s, &s->ssgroup[2]); ++ p->n_substream_groups = 2; ++ break; ++ case 5: ++ /* Arbitrary number of roles and substream groups */ ++ p->n_substream_groups = (int)get_bits(gb, 2) + 2; ++ if (p->n_substream_groups == 5) ++ p->n_substream_groups += variable_bits(gb, 2); ++ ++ for (int sg = 0; sg < p->n_substream_groups; sg++) ++ ac4_sgi_specifier(s, &s->ssgroup[sg]); ++ break; ++ default: ++ /* EMDF and other data */ ++ presentation_config_ext_info(s); ++ break; ++ } ++ } ++ p->pre_virtualized = (int)get_bits1(gb); ++ p->add_emdf_substreams = (int)get_bits1(gb); ++ presentation_substream_info(s, &p->psinfo); ++ } ++ ++ if (p->add_emdf_substreams) { ++ p->n_add_emdf_substreams = (int)get_bits(gb, 2); ++ if (p->n_add_emdf_substreams == 0) ++ p->n_add_emdf_substreams = variable_bits(gb, 2) + 4; ++ for (int i = 0; i < p->n_add_emdf_substreams; i++) ++ emdf_info(s, &p->emdf[i]); ++ } ++ ++ return 0; ++} ++ ++static int get_num_ts_in_ats(int frame_length) ++{ ++ if (frame_length <= 2048 && frame_length >= 1536) ++ return 2; ++ ++ return 1; ++} ++ ++static int ac4_toc(AC4DecodeContext *s) ++{ ++ GetBitContext *gb = &s->gbc; ++ int ret; ++ ++ s->version = (int)get_bits(gb, 2); ++ if (s->version == 3) ++ s->version += variable_bits(gb, 2); ++ av_log(s->avctx, AV_LOG_DEBUG, "bitstream version: %d\n", s->version); ++ s->sequence_counter_prev = s->sequence_counter; ++ s->sequence_counter = (int)get_bits(gb, 10); ++ av_log(s->avctx, AV_LOG_DEBUG, "sequence counter: %d\n", s->sequence_counter); ++ ++ s->wait_frames = (int)get_bits1(gb); ++ if (s->wait_frames) { ++ s->nb_wait_frames = (int)get_bits(gb, 3); ++ if (s->nb_wait_frames > 0) ++ skip_bits(gb, 2); ++ } ++ ++ s->fs_index = (int)get_bits1(gb); ++ s->frame_rate_index = (int)get_bits(gb, 4); ++ av_log(s->avctx, AV_LOG_DEBUG, "frame_rate_index: %d\n", s->frame_rate_index); ++ s->frame_len_base = frame_len_base_48khz[s->frame_rate_index]; ++ s->num_ts_in_ats = get_num_ts_in_ats(s->frame_len_base); ++ s->frame_len_base_idx = frame_len_base_idx_48khz[s->frame_rate_index]; ++ av_log(s->avctx, AV_LOG_DEBUG, "frame_len_base: %d\n", s->frame_len_base); ++ s->resampling_ratio = resampling_ratios[s->frame_rate_index]; ++ s->num_qmf_timeslots = s->frame_len_base / 64; ++ s->num_aspx_timeslots = s->num_qmf_timeslots / s->num_ts_in_ats; ++ s->ts_offset_hfgen = 3 * s->num_ts_in_ats; ++ s->iframe_global = (int)get_bits1(gb); ++ if (s->iframe_global) { ++ s->have_iframe = 1; ++ } else { ++ ret = check_sequence(s); ++ if (ret < 0) ++ return ret; ++ } ++ if (get_bits1(gb)) { ++ s->nb_presentations = 1; ++ } else { ++ if (get_bits1(gb)) { ++ s->nb_presentations = 2 + variable_bits(gb, 2); ++ } else { ++ s->nb_presentations = 0; ++ } ++ } ++ ++ s->payload_base = 0; ++ if (get_bits1(gb)) { ++ s->payload_base = (int)get_bits(gb, 5) + 1; ++ if (s->payload_base == 0x20) { ++ s->payload_base += variable_bits(gb, 3); ++ } ++ } ++ ++ av_log(s->avctx, AV_LOG_DEBUG, "presentations: %d\n", s->nb_presentations); ++ ++ if (s->version <= 1) { ++ for (int i = 0; i < s->nb_presentations; i++) { ++ ret = ac4_presentation_info(s, &s->pinfo[i]); ++ if (ret < 0) ++ return ret; ++ } ++ } else { ++ if (get_bits1(gb)) { ++ s->short_program_id = (int)get_bits(gb, 16); ++ if (get_bits1(gb)) { ++ skip_bits_long(gb, 16 * 8); ++ } ++ } ++ ++ for (int i = 0; i < s->nb_presentations; i++) { ++ ret = ac4_presentation_v1_info(s, &s->pinfo[i]); ++ if (ret < 0) ++ return ret; ++ } ++ ++ av_log(s->avctx, AV_LOG_DEBUG, "total_groups: %d\n", s->total_groups + 1); ++ for (int i = 0; i <= s->total_groups; i++) { ++ ret = ac4_substream_group_info(s, &s->ssgroup[i]); ++ if (ret < 0) ++ return ret; ++ } ++ } ++ ++ substream_index_table(s); ++ ++ align_get_bits(gb); ++ ++ av_log(s->avctx, AV_LOG_DEBUG, "TOC size: %d\n", get_bits_count(gb) >> 3); ++ ++ return 0; ++} ++ ++static int sb_to_pb(int acpl_num_param_bands_id, int acpl_qmf_band) ++{ ++ if (acpl_qmf_band >= 0 && ++ acpl_qmf_band <= 8) ++ return qmf_subbands[acpl_qmf_band][acpl_num_param_bands_id]; ++ if (acpl_qmf_band >= 9 && ++ acpl_qmf_band <= 10) ++ return qmf_subbands[9][acpl_num_param_bands_id]; ++ if (acpl_qmf_band >= 11 && ++ acpl_qmf_band <= 13) ++ return qmf_subbands[10][acpl_num_param_bands_id]; ++ if (acpl_qmf_band >= 14 && ++ acpl_qmf_band <= 17) ++ return qmf_subbands[11][acpl_num_param_bands_id]; ++ if (acpl_qmf_band >= 18 && ++ acpl_qmf_band <= 22) ++ return qmf_subbands[12][acpl_num_param_bands_id]; ++ if (acpl_qmf_band >= 23 && ++ acpl_qmf_band <= 34) ++ return qmf_subbands[13][acpl_num_param_bands_id]; ++ if (acpl_qmf_band >= 35 && ++ acpl_qmf_band <= 63) ++ return qmf_subbands[14][acpl_num_param_bands_id]; ++ return 0; ++} ++ ++static int acpl_config_1ch(AC4DecodeContext *s, Substream *ss, int mode) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ ss->acpl_qmf_band = 0; ++ ss->acpl_param_band = 0; ++ ss->acpl_num_param_bands_id = (int)get_bits(gb, 2); ++ ss->acpl_quant_mode[0] = (int)get_bits1(gb); ++ if (mode == ACPL_PARTIAL) { ++ ss->acpl_qmf_band = (int)get_bits(gb, 3) + 1; ++ ss->acpl_param_band = sb_to_pb(ss->acpl_num_param_bands_id, ss->acpl_qmf_band); ++ } ++ ++ return 0; ++} ++ ++static int acpl_config_2ch(AC4DecodeContext *s, Substream *ss) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ ss->acpl_qmf_band = 0; ++ ss->acpl_param_band = 0; ++ ss->acpl_num_param_bands_id = (int)get_bits(gb, 2); ++ ss->acpl_quant_mode[0] = (int)get_bits1(gb); ++ ss->acpl_quant_mode[1] = (int)get_bits1(gb); ++ ++ return 0; ++} ++ ++static void aspx_config(AC4DecodeContext *s, Substream *ss) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ ss->aspx_quant_mode_env = (int)get_bits1(gb); ++ ss->prev_aspx_start_freq = ss->aspx_start_freq; ++ ss->aspx_start_freq = (int)get_bits(gb, 3); ++ ss->prev_aspx_stop_freq = ss->aspx_stop_freq; ++ ss->aspx_stop_freq = (int)get_bits(gb, 2); ++ ss->prev_aspx_master_freq_scale = ss->aspx_master_freq_scale; ++ ss->aspx_master_freq_scale = (int)get_bits1(gb); ++ ss->aspx_interpolation = (int)get_bits1(gb); ++ ss->aspx_preflat = (int)get_bits1(gb); ++ ss->aspx_limiter = (int)get_bits1(gb); ++ ss->aspx_noise_sbg = (int)get_bits(gb, 2); ++ ss->aspx_num_env_bits_fixfix = (int)get_bits1(gb); ++ ss->aspx_freq_res_mode = (int)get_bits(gb, 2); ++} ++ ++static int get_transfer_length_from_idx(AC4DecodeContext *s, int idx) ++{ ++ const uint16_t *transf_length_tab; ++ ++ switch (s->frame_len_base) { ++ case 2048: ++ transf_length_tab = transf_length_48khz_2048; ++ break; ++ case 1920: ++ transf_length_tab = transf_length_48khz_1920; ++ break; ++ case 1536: ++ transf_length_tab = transf_length_48khz_1536; ++ break; ++ case 1024: ++ transf_length_tab = transf_length_48khz_1024; ++ break; ++ case 960: ++ transf_length_tab = transf_length_48khz_960; ++ break; ++ case 768: ++ transf_length_tab = transf_length_48khz_768; ++ break; ++ case 512: ++ transf_length_tab = transf_length_48khz_512; ++ break; ++ case 384: ++ transf_length_tab = transf_length_48khz_384; ++ break; ++ default: ++ av_assert0(0); ++ } ++ ++ return transf_length_tab[idx]; ++} ++ ++static int asf_transform_info(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ if (s->frame_len_base >= 1536) { ++ ssch->scp.long_frame = get_bits1(gb); ++ if (ssch->scp.long_frame == 0) { ++ ssch->scp.transf_length_idx[0] = get_bits(gb, 2); ++ ssch->scp.transf_length_idx[1] = get_bits(gb, 2); ++ ssch->scp.transf_length[0] = get_transfer_length_from_idx(s, ssch->scp.transf_length_idx[0]); ++ ssch->scp.transf_length[1] = get_transfer_length_from_idx(s, ssch->scp.transf_length_idx[1]); ++ } else { ++ ssch->scp.transf_length[0] = s->frame_len_base; ++ ssch->scp.transf_length[1] = 0; ++ } ++ } else { ++ ssch->scp.long_frame = 0; ++ ssch->scp.transf_length_idx[0] = get_bits(gb, 2); ++ ssch->scp.transf_length[0] = get_transfer_length_from_idx(s, ssch->scp.transf_length_idx[0]); ++ } ++ ++ return 0; ++} ++ ++static int get_msfbl_bits(int transf_length) ++{ ++ if (transf_length <= 2048 && transf_length >= 1536) ++ return 3; ++ ++ return 2; ++} ++ ++static int get_grp_bits(AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++ if (s->frame_len_base >= 1536 && ssch->scp.long_frame == 1) ++ return 0; ++ ++ if (s->frame_len_base >= 1536 && ssch->scp.long_frame == 0) ++ return n_grp_bits_a[ssch->scp.transf_length_idx[0]][ssch->scp.transf_length_idx[1]]; ++ ++ if (s->frame_len_base < 1536 && s->frame_len_base > 512) ++ return n_grp_bits_b[ssch->scp.transf_length_idx[0]]; ++ ++ if (s->frame_len_base <= 512) ++ return n_grp_bits_c[ssch->scp.transf_length_idx[0]]; ++ ++ return 0; ++} ++ ++static int get_msfb_bits(int transf_length) ++{ ++ if (transf_length <= 2048 && transf_length >= 384) ++ return 6; ++ ++ if (transf_length <= 256 && transf_length >= 192) ++ return 5; ++ ++ return 4; ++} ++ ++static int get_side_bits(int transf_length) ++{ ++ if (transf_length <= 2048 && transf_length >= 480) ++ return 5; ++ ++ if (transf_length <= 384 && transf_length >= 240) ++ return 4; ++ ++ return 3; ++} ++ ++static int get_max_sfb(AC4DecodeContext *s, SubstreamChannel *ssch, ++ int g) ++{ ++ int idx = 0; ++ ++ if (s->frame_len_base >= 1536 && (ssch->scp.long_frame == 0) && ++ (ssch->scp.transf_length_idx[0] != ssch->scp.transf_length_idx[1])) { ++ int num_windows_0 = 1 << (3 - ssch->scp.transf_length_idx[0]); ++ ++ if (g >= ssch->scp.window_to_group[num_windows_0]) ++ idx = 1; ++ } ++ ++ if ((ssch->scp.side_limited == 1) || ++ (ssch->scp.dual_maxsfb == 1 && ssch->scp.side_channel == 1)) { ++ return ssch->scp.max_sfb_side[idx]; ++ } else { ++ return ssch->scp.max_sfb[idx]; ++ } ++} ++ ++static int get_transf_length(AC4DecodeContext *s, SubstreamChannel *ssch, int g, int *idx) ++{ ++ const uint16_t *transf_length_tab; ++ ++ switch (s->frame_len_base) { ++ case 2048: ++ transf_length_tab = transf_length_48khz_2048; ++ break; ++ case 1920: ++ transf_length_tab = transf_length_48khz_1920; ++ break; ++ case 1536: ++ transf_length_tab = transf_length_48khz_1536; ++ break; ++ case 1024: ++ transf_length_tab = transf_length_48khz_1024; ++ break; ++ case 960: ++ transf_length_tab = transf_length_48khz_960; ++ break; ++ case 768: ++ transf_length_tab = transf_length_48khz_768; ++ break; ++ case 512: ++ transf_length_tab = transf_length_48khz_512; ++ break; ++ case 384: ++ transf_length_tab = transf_length_48khz_384; ++ break; ++ default: ++ av_assert0(0); ++ } ++ ++ if (s->frame_len_base >= 1536) { ++ if (ssch->scp.long_frame == 0) { ++ int num_windows_0 = 1 << (3 - ssch->scp.transf_length_idx[0]); ++ ++ if (g < ssch->scp.window_to_group[num_windows_0]) { ++ if (idx) ++ *idx = ssch->scp.transf_length_idx[0]; ++ return transf_length_tab[ssch->scp.transf_length_idx[0]]; ++ } else { ++ if (idx) ++ *idx = ssch->scp.transf_length_idx[1]; ++ return transf_length_tab[ssch->scp.transf_length_idx[1]]; ++ } ++ } else { ++ if (idx) ++ *idx = 4; ++ return s->frame_len_base; // long frame, the transform length equals to frame_length ++ } ++ } else { ++ if (idx) ++ *idx = ssch->scp.transf_length_idx[0]; ++ return transf_length_tab[ssch->scp.transf_length_idx[0]]; ++ } ++} ++ ++static const int get_sfb_size(int transf_length) ++{ ++ switch (transf_length) { ++ case 2048: ++ return FF_ARRAY_ELEMS(sfb_offset_48khz_2048); ++ break; ++ case 1920: ++ return FF_ARRAY_ELEMS(sfb_offset_48khz_1920); ++ break; ++ case 1536: ++ return FF_ARRAY_ELEMS(sfb_offset_48khz_1536); ++ break; ++ case 1024: ++ return FF_ARRAY_ELEMS(sfb_offset_48khz_1024); ++ break; ++ case 960: ++ return FF_ARRAY_ELEMS(sfb_offset_48khz_960); ++ break; ++ case 768: ++ return FF_ARRAY_ELEMS(sfb_offset_48khz_768); ++ break; ++ case 512: ++ return FF_ARRAY_ELEMS(sfb_offset_48khz_512); ++ break; ++ case 480: ++ return FF_ARRAY_ELEMS(sfb_offset_48khz_480); ++ break; ++ case 384: ++ return FF_ARRAY_ELEMS(sfb_offset_48khz_384); ++ break; ++ case 256: ++ return FF_ARRAY_ELEMS(sfb_offset_48khz_256); ++ break; ++ case 240: ++ return FF_ARRAY_ELEMS(sfb_offset_48khz_240); ++ break; ++ case 192: ++ return FF_ARRAY_ELEMS(sfb_offset_48khz_192); ++ break; ++ case 128: ++ return FF_ARRAY_ELEMS(sfb_offset_48khz_128); ++ break; ++ case 120: ++ return FF_ARRAY_ELEMS(sfb_offset_48khz_120); ++ break; ++ case 96: ++ return FF_ARRAY_ELEMS(sfb_offset_48khz_96); ++ break; ++ default: ++ av_assert0(0); ++ } ++ return 0; ++} ++ ++static const uint16_t *get_sfb_offset(int transf_length) ++{ ++ switch (transf_length) { ++ case 2048: ++ return sfb_offset_48khz_2048; ++ break; ++ case 1920: ++ return sfb_offset_48khz_1920; ++ break; ++ case 1536: ++ return sfb_offset_48khz_1536; ++ break; ++ case 1024: ++ return sfb_offset_48khz_1024; ++ break; ++ case 960: ++ return sfb_offset_48khz_960; ++ break; ++ case 768: ++ return sfb_offset_48khz_768; ++ break; ++ case 512: ++ return sfb_offset_48khz_512; ++ break; ++ case 480: ++ return sfb_offset_48khz_480; ++ break; ++ case 384: ++ return sfb_offset_48khz_384; ++ break; ++ case 256: ++ return sfb_offset_48khz_256; ++ break; ++ case 240: ++ return sfb_offset_48khz_240; ++ break; ++ case 192: ++ return sfb_offset_48khz_192; ++ break; ++ case 128: ++ return sfb_offset_48khz_128; ++ break; ++ case 120: ++ return sfb_offset_48khz_120; ++ break; ++ case 96: ++ return sfb_offset_48khz_96; ++ break; ++ default: ++ av_assert0(0); ++ } ++ ++ return 0; ++} ++ ++#if 0 ++static int num_sfb_96(int transf_length) ++{ ++ if (transf_length >= 4096) ++ return 79; ++ else if (transf_length >= 3840) ++ return 76; ++ else if (transf_length >= 3072) ++ return 67; ++ else if (transf_length >= 2048) ++ return 57; ++ else if (transf_length >= 1920) ++ return 57; ++ else if (transf_length >= 1536) ++ return 49; ++ else if (transf_length >= 1024) ++ return 44; ++ else if (transf_length >= 920) ++ return 44; ++ else if (transf_length >= 768) ++ return 39; ++ else if (transf_length >= 512) ++ return 28; ++ else if (transf_length >= 480) ++ return 28; ++ else if (transf_length >= 384) ++ return 24; ++ else if (transf_length >= 256) ++ return 22; ++ else if (transf_length >= 240) ++ return 22; ++ else ++ return 18; ++} ++#endif ++ ++static int num_sfb_48(int transf_length) ++{ ++ switch (transf_length) { ++ case 2048: ++ return 63; break; ++ case 1920: ++ return 61; break; ++ case 1536: ++ return 55; break; ++ case 1024: ++ case 960: ++ return 49; break; ++ case 768: ++ return 43; break; ++ case 512: ++ case 480: ++ return 36; break; ++ case 384: ++ return 33; break; ++ case 256: ++ case 240: ++ return 20; break; ++ case 192: ++ return 18; break; ++ case 128: ++ case 120: ++ return 14; break; ++ case 96: ++ return 12; break; ++ default: ++ av_assert0(0); ++ } ++ ++ return 0; ++} ++ ++static int asf_psy_elements(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch, int n_grp_bits) ++{ ++ int group_offset, win_offset, win; ++ ++ ssch->scp.num_windows = 1; ++ ssch->scp.num_window_groups = 1; ++ ssch->scp.window_to_group[0] = 0; ++ ++ if (ssch->scp.long_frame == 0) { ++ ssch->scp.num_windows = n_grp_bits + 1; ++ if (ssch->scp.different_framing) { ++ int num_windows_0 = 1 << (3 - ssch->scp.transf_length_idx[0]); ++ ++ for (int i = n_grp_bits; i >= num_windows_0; i--) { ++ ssch->scp.scale_factor_grouping[i] = ssch->scp.scale_factor_grouping[i - 1]; ++ } ++ ++ ssch->scp.scale_factor_grouping[num_windows_0 - 1] = 0; ++ ssch->scp.num_windows++; ++ } ++ ++ for (int i = 0; i < ssch->scp.num_windows - 1; i++) { ++ if (ssch->scp.scale_factor_grouping[i] == 0) { ++ ssch->scp.num_window_groups++; ++ } ++ ++ ssch->scp.window_to_group[i + 1] = ssch->scp.num_window_groups - 1; ++ } ++ } ++ ++ group_offset = 0; ++ win_offset = 0; ++ win = 0; ++ memset(ssch->offset2sfb, 0, sizeof(ssch->offset2sfb)); ++ memset(ssch->offset2g, 0, sizeof(ssch->offset2g)); ++ for (int g = 0; g < ssch->scp.num_window_groups; g++) { ++ int transf_length_g = get_transf_length(s, ssch, g, NULL); ++ const uint16_t *sfb_offset = get_sfb_offset(transf_length_g); ++ const int sfb_max_size = get_sfb_size(transf_length_g); ++ int max_sfb; ++ ++ ssch->scp.num_win_in_group[g] = 0; ++ for (int w = 0; w < ssch->scp.num_windows; w++) { ++ if (ssch->scp.window_to_group[w] == g) ++ ssch->scp.num_win_in_group[g]++; ++ } ++ ++ max_sfb = get_max_sfb(s, ssch, g); ++ if (max_sfb > sfb_max_size) { ++ av_log(s->avctx, AV_LOG_ERROR, "max_sfb=%d > sfb_max_size=%d\n", max_sfb, sfb_max_size); ++ return AVERROR_INVALIDDATA; ++ } ++ for (int sfb = 0; sfb < max_sfb; sfb++) ++ ssch->sect_sfb_offset[g][sfb] = group_offset + sfb_offset[sfb] * ssch->scp.num_win_in_group[g]; ++ group_offset += sfb_offset[max_sfb] * ssch->scp.num_win_in_group[g]; ++ ssch->sect_sfb_offset[g][max_sfb] = group_offset; ++ for (int sfb = 0; sfb < max_sfb; sfb++) { ++ for (int j = ssch->sect_sfb_offset[g][sfb]; j < ssch->sect_sfb_offset[g][sfb+1]; j++) { ++ ssch->offset2sfb[j] = (short)sfb; ++ ssch->offset2g[j] = g; ++ } ++ } ++ ++ for (int w = 0; w < ssch->scp.num_win_in_group[g]; w++) { ++ ssch->win_offset[win + w] = win_offset; ++ win_offset += transf_length_g; ++ } ++ win += ssch->scp.num_win_in_group[g]; ++ } ++ ++ av_log(s->avctx, AV_LOG_DEBUG, "long_frame: %d\n", ssch->scp.long_frame); ++ av_log(s->avctx, AV_LOG_DEBUG, "different_framing: %d\n", ssch->scp.different_framing); ++ av_log(s->avctx, AV_LOG_DEBUG, "num_windows: %d\n", ssch->scp.num_windows); ++ av_log(s->avctx, AV_LOG_DEBUG, "num_window_groups: %d\n", ssch->scp.num_window_groups); ++ av_log(s->avctx, AV_LOG_DEBUG, "transf_lengths:"); ++ for (int g = 0; g < ssch->scp.num_window_groups; g++) { ++ av_log(s->avctx, AV_LOG_DEBUG, " %d", get_transf_length(s, ssch, g, NULL)); ++ } ++ av_log(s->avctx, AV_LOG_DEBUG, "\n"); ++ av_log(s->avctx, AV_LOG_DEBUG, "num_win_in_group:"); ++ for (int g = 0; g < ssch->scp.num_window_groups; g++) { ++ av_log(s->avctx, AV_LOG_DEBUG, " %d", ssch->scp.num_win_in_group[g]); ++ } ++ av_log(s->avctx, AV_LOG_DEBUG, "\n"); ++ ++ return 0; ++} ++ ++static int asf_psy_info(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch, ++ int dual_maxsfb, int side_limited) ++{ ++ GetBitContext *gb = &s->gbc; ++ int n_side_bits = get_side_bits(ssch->scp.transf_length[0]); ++ int n_msfb_bits = get_msfb_bits(ssch->scp.transf_length[0]); ++ int n_grp_bits = get_grp_bits(s, ssch); ++ ++ ssch->scp.different_framing = 0; ++ if ((s->frame_len_base >= 1536) && (ssch->scp.long_frame == 0) && ++ (ssch->scp.transf_length_idx[0] != ssch->scp.transf_length_idx[1])) { ++ ssch->scp.different_framing = 1; ++ } ++ ++ if (side_limited) { ++ ssch->scp.max_sfb_side[0] = get_bits(gb, n_side_bits); ++ } else { ++ ssch->scp.max_sfb[0] = get_bits(gb, n_msfb_bits); ++ if (dual_maxsfb) ++ ssch->scp.max_sfb_side[0] = get_bits(gb, n_msfb_bits); ++ } ++ ++ if (ssch->scp.different_framing) { ++ n_side_bits = get_side_bits(ssch->scp.transf_length[1]); ++ n_msfb_bits = get_msfb_bits(ssch->scp.transf_length[1]); ++ ++ if (side_limited) { ++ ssch->scp.max_sfb_side[1] = get_bits(gb, n_side_bits); ++ } else { ++ ssch->scp.max_sfb[1] = get_bits(gb, n_msfb_bits); ++ if (dual_maxsfb) ++ ssch->scp.max_sfb_side[1] = get_bits(gb, n_msfb_bits); ++ } ++ } ++ ++ memset(ssch->scp.scale_factor_grouping, 0, sizeof(ssch->scp.scale_factor_grouping)); ++ for (int i = 0; i < n_grp_bits; i++) ++ ssch->scp.scale_factor_grouping[i] = get_bits1(gb); ++ ++ return asf_psy_elements(s, ss, ssch, n_grp_bits); ++} ++ ++static int sf_info(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch, ++ int spec_frontend, int dual_maxsfb, ++ int side_limited) ++{ ++ int ret = 0; ++ ++ ssch->scp.dual_maxsfb = dual_maxsfb; ++ ssch->scp.side_limited = side_limited; ++ ++ if (spec_frontend == SF_ASF) { ++ asf_transform_info(s, ss, ssch); ++ ret = asf_psy_info(s, ss, ssch, dual_maxsfb, side_limited); ++ } ++ ++ return ret; ++} ++ ++static int sap_data(AC4DecodeContext *s, Substream *ss, SubstreamChannel *ssch) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ if (!get_bits1(gb)) { ++ for (int g = 0; g < ssch->scp.num_window_groups; g++) { ++ int max_sfb_g = get_max_sfb(s, ssch, g); ++ ++ for (int sfb = 0; sfb < max_sfb_g; sfb += 2) { ++ ssch->sap_coeff_used[g][sfb] = get_bits1(gb); ++ if (sfb + 1 < max_sfb_g) ++ ssch->sap_coeff_used[g][sfb + 1] = ssch->sap_coeff_used[g][sfb]; ++ } ++ } ++ } else { ++ for (int g = 0; g < ssch->scp.num_window_groups; g++) { ++ int max_sfb_g = get_max_sfb(s, ssch, g); ++ ++ for (int sfb = 0; sfb < max_sfb_g; sfb++) ++ ssch->sap_coeff_used[g][sfb] = 1; ++ } ++ } ++ ++ ssch->delta_code_time = 0; ++ if (ssch->scp.num_window_groups != 1) ++ ssch->delta_code_time = (int)get_bits1(gb); ++ ++ for (int g = 0; g < ssch->scp.num_window_groups; g++) { ++ int max_sfb_g = get_max_sfb(s, ssch, g); ++ ++ for (int sfb = 0; sfb < max_sfb_g; sfb += 2) { ++ if (ssch->sap_coeff_used[g][sfb]) { ++ ssch->dpcm_alpha_q[g][sfb] = get_vlc2(gb, scale_factors_vlc.table, scale_factors_vlc.bits, 3); ++ if (ssch->dpcm_alpha_q[g][sfb] < 0) { ++ av_log(s->avctx, AV_LOG_ERROR, "sap data\n"); ++ return AVERROR_INVALIDDATA; ++ } ++ } ++ } ++ } ++ ++ return 0; ++} ++ ++static int ssf_st_data(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch, int iframe) ++{ ++ GetBitContext *gb = &s->gbc; ++ int num_blocks; ++ ++ ssch->env_idx[0] = (int)get_bits(gb, 5); ++ if (iframe == 1 && ssch->stride_flag == SHORT_STRIDE) ++ get_bits(gb, 5); ++ ++ if (ssch->stride_flag == SHORT_STRIDE) { ++ for (int block = 0; block < 4; block++) ++ ssch->gain_bits[block] = (int)get_bits(gb, 4); ++ } ++ ++ num_blocks = (ssch->stride_flag == SHORT_STRIDE) ? 4 : 1; ++ ++ for (int block = 0; block < num_blocks; block++) { ++ if (block >= ssch->start_block && block < ssch->end_block) { ++ if (ssch->predictor_presence[block]) { ++ if (ssch->delta[block]) ++ ssch->predictor_lag_delta[block] = (int)get_bits(gb, 4); ++ else ++ ssch->predictor_lag[block] = (int)get_bits(gb, 9); ++ } ++ } ++ ssch->variance_preserving[block] = (int)get_bits1(gb); ++ ssch->alloc_offset[block] = (int)get_bits(gb, 5); ++ } ++ ++ return 0; ++} ++ ++static int ac_init(AC4DecodeContext *s, ACState *acs) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ acs->ui_model_bits = SSF_MODEL_BITS; ++ acs->ui_model_unit = SSF_MODEL_UNIT; ++ acs->ui_range_bits = SSF_RANGE_BITS; ++ acs->ui_threshold_large = SSF_THRESHOLD_LARGE; ++ acs->ui_threshold_small = SSF_THRESHOLD_SMALL; ++ ++ acs->ui_low = 0; ++ acs->ui_range = SSF_THRESHOLD_LARGE; ++ ++ acs->ui_offset = get_bits1(gb); ++ for (int index = 1; index < acs->ui_range_bits; index++) { ++ uint32_t ui_tmp = get_bits1(gb); ++ ++ acs->ui_offset <<= 1; ++ acs->ui_offset += ui_tmp; ++ } ++ ++ acs->ui_offset2 = acs->ui_offset; ++ ++ return 0; ++} ++ ++#if 0 ++static int32_t ac_decode(AC4DecodeContext *s, uint32_t cdf_low, ++ uint32_t cdf_high, ++ ACState *acs) ++{ ++ GetBitContext *gb = &s->gbc; ++ uint32_t ui_tmp1, ui_tmp2; ++ uint32_t ui_range; ++ ++ ui_range = acs->ui_range >> acs->ui_model_bits; ++ ui_tmp1 = ui_range * cdf_low; ++ acs->ui_offset = acs->ui_offset - ui_tmp1; ++ ++ if (cdf_high < acs->ui_model_unit) { ++ ui_tmp2 = cdf_high - cdf_low; ++ acs->ui_range = ui_range * ui_tmp2; ++ } else { ++ acs->ui_range = acs->ui_range - ui_tmp1; ++ } ++ ++ // denormalize ++ while (acs->ui_range <= acs->ui_threshold_small) { ++ /* Read a single bit from the bitstream */ ++ uint32_t ui_tmp1 = get_bits1(gb); ++ ++ acs->ui_range <<= 1; ++ acs->ui_offset <<= 1; ++ acs->ui_offset += ui_tmp1; ++ acs->ui_offset2 <<= 1; ++ if (acs->ui_offset & 1) ++ acs->ui_offset2++; ++ } ++ ++ return 0; ++} ++#endif ++ ++static int32_t ac_decode_finish(ACState *acs) ++ ++{ ++ uint32_t fact, ui_bits, ui_val; ++ uint32_t ui_tmp1, ui_tmp2, ui_rev_idx; ++ ++ acs->ui_low = acs->ui_offset & (acs->ui_threshold_large-1); ++ ++ ui_tmp1 = acs->ui_threshold_large - acs->ui_offset; ++ ++ acs->ui_low +=ui_tmp1; ++ ++ for (int bit_idx = 1; bit_idx <= acs->ui_range_bits; bit_idx++) { ++ ui_rev_idx = acs->ui_range_bits - bit_idx; ++ fact = 1U << ui_rev_idx; ++ fact = fact - 1U; ++ ui_tmp1 = acs->ui_low + fact; ++ ui_bits = ui_tmp1 >> ui_rev_idx; ++ ui_val = ui_bits << ui_rev_idx; ++ ui_tmp1 = ui_val + fact; ++ ui_tmp2 = acs->ui_range - 1U; ++ ui_tmp2 += acs->ui_low; ++ ++ if ((acs->ui_low <= ui_val) && (ui_tmp1 <= ui_tmp2)) ++ break; ++ } ++ ++ return 0; ++} ++ ++static int ssf_ac_data(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch) ++{ ++ ac_init(s, &ssch->acs); ++ ac_decode_finish(&ssch->acs); ++ ++ return 0; ++} ++ ++static int ssf_granule(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch, int iframe) ++{ ++ GetBitContext *gb = &s->gbc; ++ int ret; ++ ++ ssch->stride_flag = (int)get_bits1(gb); ++ if (iframe) ++ ssch->num_bands = (int)get_bits(gb, 3) + 12; ++ ++ ssch->start_block = 0; ++ ssch->end_block = 0; ++ if (ssch->stride_flag == LONG_STRIDE && !iframe) ++ ssch->end_block = 1; ++ ++ if (ssch->stride_flag == SHORT_STRIDE) { ++ ssch->end_block = 4; ++ if (iframe) ++ ssch->start_block = 1; ++ } ++ ++ for (int block = ssch->start_block; block < ssch->end_block; block++) { ++ ssch->predictor_presence[block] = (int)get_bits1(gb); ++ if (ssch->predictor_presence[block]) { ++ if (ssch->start_block == 1 && block == 1) { ++ ssch->delta[block] = 0; ++ } else { ++ ssch->delta[block] = (int)get_bits1(gb); ++ } ++ } ++ } ++ ++ ret = ssf_st_data(s, ss, ssch, iframe); ++ if (ret < 0) ++ return ret; ++ ++ return ssf_ac_data(s, ss, ssch); ++} ++ ++static int ssf_data(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch, int iframe) ++{ ++ GetBitContext *gb = &s->gbc; ++ int ssf_iframe, ret; ++ ++ if (iframe) ++ ssf_iframe = 1; ++ else ++ ssf_iframe = (int)get_bits1(gb); ++ ++ ret = ssf_granule(s, ss, ssch, ssf_iframe); ++ if (ret < 0) ++ return ret; ++ if (s->frame_len_base >= 1536) ++ ret = ssf_granule(s, ss, ssch, 0); ++ ++ return ret; ++} ++ ++static int asf_section_data(AC4DecodeContext *s, Substream *ss, SubstreamChannel *ssch) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ memset(&ssch->sect_cb, 0, sizeof(ssch->sect_cb)); ++ memset(&ssch->sfb_cb, 0, sizeof(ssch->sfb_cb)); ++ ++ for (int g = 0; g < ssch->scp.num_window_groups; g++) { ++ int gidx; ++ int transf_length_g = get_transf_length(s, ssch, g, &gidx); ++ int sect_esc_val; ++ int n_sect_bits; ++ int k, i, max_sfb; ++ ++ if (gidx <= 2) { ++ sect_esc_val = (1 << 3) - 1; ++ n_sect_bits = 3; ++ } else { ++ sect_esc_val = (1 << 5) - 1; ++ n_sect_bits = 5; ++ } ++ k = 0; ++ i = 0; ++ ssch->num_sec_lsf[g] = 0; ++ max_sfb = get_max_sfb(s, ssch, g); ++ while (k < max_sfb) { ++ int sect_len_incr; ++ int sect_len; ++ ++ ssch->sect_cb[g][i] = get_bits(gb, 4); ++ if (ssch->sect_cb[g][i] > 11) { ++ av_log(s->avctx, AV_LOG_ERROR, "sect_cb[%d][%d] > 11\n", g, i); ++ return AVERROR_INVALIDDATA; ++ } ++ sect_len = 1; ++ sect_len_incr = (int)get_bits(gb, n_sect_bits); ++ while (sect_len_incr == sect_esc_val) { ++ sect_len += sect_esc_val; ++ sect_len_incr = (int)get_bits(gb, n_sect_bits); ++ } ++ ++ sect_len += sect_len_incr; ++ ssch->sect_start[g][i] = k; ++ ssch->sect_end[g][i] = k + sect_len; ++ ++ if (ssch->sect_start[g][i] < num_sfb_48(transf_length_g) && ++ ssch->sect_end[g][i] >= num_sfb_48(transf_length_g)) { ++ ssch->num_sec_lsf[g] = i + 1; ++ if (ssch->sect_end[g][i] > num_sfb_48(transf_length_g)) { ++ ssch->sect_end[g][i] = num_sfb_48(transf_length_g); ++ i++; ++ ssch->sect_start[g][i] = num_sfb_48(transf_length_g); ++ ssch->sect_end[g][i] = k + sect_len; ++ ssch->sect_cb[g][i] = ssch->sect_cb[g][i-1]; ++ } ++ } ++ ++ for (int sfb = k; sfb < k + sect_len; sfb++) ++ ssch->sfb_cb[g][sfb] = ssch->sect_cb[g][i]; ++ k += sect_len; ++ i++; ++ } ++ ++ ssch->num_sec[g] = i; ++ if (ssch->num_sec_lsf[g] == 0) ++ ssch->num_sec_lsf[g] = ssch->num_sec[g]; ++ } ++ ++ return 0; ++} ++ ++static int ext_decode(AC4DecodeContext *s) ++{ ++ GetBitContext *gb = &s->gbc; ++ int b, ext_val, N_ext = 0; ++ ++ b = (int)get_bits1(gb); ++ while (b) { ++ N_ext++; ++ b = (int)get_bits1(gb); ++ } ++ ++ ext_val = (int)get_bits(gb, N_ext + 4); ++ ++ return (1 << (N_ext + 4)) + ext_val; ++} ++ ++static int asf_spectral_data(AC4DecodeContext *s, Substream *ss, SubstreamChannel *ssch) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ memset(&ssch->max_quant_idx, 0, sizeof(ssch->max_quant_idx)); ++ memset(&ssch->quant_spec, 0, sizeof(ssch->quant_spec)); ++ ++ for (int g = 0; g < ssch->scp.num_window_groups; g++) { ++ for (int i = 0; i < ssch->num_sec_lsf[g]; i++) { ++ int sect_start_line, sect_end_line, cb; ++ ++ if (ssch->sect_cb[g][i] == 0 || ssch->sect_cb[g][i] > 11) ++ continue; ++ ++ sect_start_line = ssch->sect_sfb_offset[g][ssch->sect_start[g][i]]; ++ sect_end_line = ssch->sect_sfb_offset[g][ssch->sect_end[g][i]]; ++ cb = ssch->sect_cb[g][i] - 1; ++ ++ for (int k = sect_start_line; k < sect_end_line;) { ++ int cb_off = asf_codebook_off[cb]; ++ int cb_mod = asf_codebook_mod[cb]; ++ int x; ++ ++ if (asf_codebook_dim[cb] == 4) { ++ int cb_idx = get_vlc2(gb, asf_codebook_vlc[cb].table, asf_codebook_vlc[cb].bits, 3); ++ int cb_mod2 = 9; ++ int cb_mod3 = 27; ++ ++ if (cb_idx < 0) { ++ av_log(s->avctx, AV_LOG_ERROR, "codebook_dim 4\n"); ++ return AVERROR_INVALIDDATA; ++ } ++ ++ ssch->quant_spec[k] = (short)((cb_idx / cb_mod3) - cb_off); ++ cb_idx -= (ssch->quant_spec[k] + cb_off) * cb_mod3; ++ ssch->quant_spec[k+1] = (short)((cb_idx / cb_mod2) - cb_off); ++ cb_idx -= (ssch->quant_spec[k+1] + cb_off) * cb_mod2; ++ ssch->quant_spec[k+2] = (short)((cb_idx / cb_mod) - cb_off); ++ cb_idx -= (ssch->quant_spec[k+2] + cb_off) * cb_mod; ++ ssch->quant_spec[k+3] = (short)(cb_idx - cb_off); ++ ++ if (asf_codebook_unsigned[cb]) { ++ if (ssch->quant_spec[k] && get_bits1(gb)) ++ ssch->quant_spec[k] = (short)-ssch->quant_spec[k]; ++ if (ssch->quant_spec[k+1] && get_bits1(gb)) ++ ssch->quant_spec[k+1] = (short)-ssch->quant_spec[k+1]; ++ if (ssch->quant_spec[k+2] && get_bits1(gb)) ++ ssch->quant_spec[k+2] = (short)-ssch->quant_spec[k+2]; ++ if (ssch->quant_spec[k+3] && get_bits1(gb)) ++ ssch->quant_spec[k+3] = (short)-ssch->quant_spec[k+3]; ++ } ++ x = ssch->offset2sfb[k]; ++ ssch->max_quant_idx[g][x] = FFMAX(ssch->max_quant_idx[g][x], FFABS(ssch->quant_spec[k])); ++ x = ssch->offset2sfb[k+1]; ++ ssch->max_quant_idx[g][x] = FFMAX(ssch->max_quant_idx[g][x], FFABS(ssch->quant_spec[k+1])); ++ x = ssch->offset2sfb[k+2]; ++ ssch->max_quant_idx[g][x] = FFMAX(ssch->max_quant_idx[g][x], FFABS(ssch->quant_spec[k+2])); ++ x = ssch->offset2sfb[k+3]; ++ ssch->max_quant_idx[g][x] = FFMAX(ssch->max_quant_idx[g][x], FFABS(ssch->quant_spec[k+3])); ++ k += 4; ++ } else { /* (asf_codebook_dim[ssch->sect_cb[g][i]] == 2) */ ++ int cb_idx = get_vlc2(gb, asf_codebook_vlc[cb].table, asf_codebook_vlc[cb].bits, 3); ++ int sign0 = 0, sign1 = 0; ++ ++ if (cb_idx < 0) { ++ av_log(s->avctx, AV_LOG_ERROR, "codebook_dim 2\n"); ++ return AVERROR_INVALIDDATA; ++ } ++ ++ ssch->quant_spec[k] = (short)((cb_idx / cb_mod) - cb_off); ++ cb_idx -= (ssch->quant_spec[k] + cb_off) * cb_mod; ++ ssch->quant_spec[k+1] = (short)(cb_idx - cb_off); ++ ++ if (asf_codebook_unsigned[cb]) { ++ if (ssch->quant_spec[k] && get_bits1(gb)) ++ sign0 = 1; ++ if (ssch->quant_spec[k+1] && get_bits1(gb)) ++ sign1 = 1; ++ } ++ if (ssch->sect_cb[g][i] == 11) { ++ if (ssch->quant_spec[k] == 16) ++ ssch->quant_spec[k] = (short)ext_decode(s); ++ if (ssch->quant_spec[k+1] == 16) ++ ssch->quant_spec[k+1] = (short)ext_decode(s); ++ } ++ ++ if (sign0) ++ ssch->quant_spec[k] = (short)-ssch->quant_spec[k]; ++ if (sign1) ++ ssch->quant_spec[k+1] = (short)-ssch->quant_spec[k+1]; ++ ++ x = ssch->offset2sfb[k]; ++ ssch->max_quant_idx[g][x] = FFMAX(ssch->max_quant_idx[g][x], FFABS(ssch->quant_spec[k])); ++ x = ssch->offset2sfb[k+1]; ++ ssch->max_quant_idx[g][x] = FFMAX(ssch->max_quant_idx[g][x], FFABS(ssch->quant_spec[k+1])); ++ k += 2; ++ } ++ } ++ } ++ } ++ ++ return 0; ++} ++ ++static int asf_scalefac_data(AC4DecodeContext *s, Substream *ss, SubstreamChannel *ssch) ++{ ++ GetBitContext *gb = &s->gbc; ++ int first_scf_found = 0; ++ int scale_factor; ++ ++ scale_factor = (int)get_bits(gb, 8); ++ memset(ssch->sf_gain, 0, sizeof(ssch->sf_gain)); ++ ++ for (int g = 0; g < ssch->scp.num_window_groups; g++) { ++ int max_sfb = FFMIN(get_max_sfb(s, ssch, g), num_sfb_48(get_transf_length(s, ssch, g, NULL))); ++ ++ for (int sfb = 0; sfb < max_sfb; sfb++) { ++ if ((ssch->sfb_cb[g][sfb]) != 0 && (ssch->max_quant_idx[g][sfb] > 0)) { ++ if (first_scf_found == 1) { ++ ssch->dpcm_sf[g][sfb] = get_vlc2(gb, scale_factors_vlc.table, scale_factors_vlc.bits, 3); ++ if (ssch->dpcm_sf[g][sfb] < 0) { ++ av_log(s->avctx, AV_LOG_ERROR, "scalefac data\n"); ++ return AVERROR_INVALIDDATA; ++ } ++ scale_factor += ssch->dpcm_sf[g][sfb] - 60; ++ } else { ++ first_scf_found = 1; ++ } ++ ++ ssch->sf_gain[g][sfb] = powf(2.f, 0.25f * (float)(scale_factor - 100)); ++ } ++ } ++ } ++ ++ return 0; ++} ++ ++static int asf_snf_data(AC4DecodeContext *s, Substream *ss, SubstreamChannel *ssch) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ ssch->snf_data_exists = (int)get_bits1(gb); ++ if (ssch->snf_data_exists) { ++ for (int g = 0; g < ssch->scp.num_window_groups; g++) { ++ int transf_length_g = get_transf_length(s, ssch, g, NULL); ++ int max_sfb = FFMIN(get_max_sfb(s, ssch, g), num_sfb_48(transf_length_g)); ++ ++ for (int sfb = 0; sfb < max_sfb; sfb++) { ++ if ((ssch->sfb_cb[g][sfb] == 0) || (ssch->max_quant_idx[g][sfb] == 0)) { ++ ssch->dpcm_snf[g][sfb] = get_vlc2(gb, snf_vlc.table, snf_vlc.bits, 3); ++ if (ssch->dpcm_snf[g][sfb] < 0) { ++ av_log(s->avctx, AV_LOG_ERROR, "snf data\n"); ++ return AVERROR_INVALIDDATA; ++ } ++ } ++ } ++ } ++ } ++ ++ return 0; ++} ++ ++static int sf_data(AC4DecodeContext *s, Substream *ss, SubstreamChannel *ssch, ++ int iframe, int spec_frontend) ++{ ++ int ret; ++ ++ if (spec_frontend == SF_ASF) { ++ ret = asf_section_data(s, ss, ssch); ++ if (ret < 0) ++ return ret; ++ ret = asf_spectral_data(s, ss, ssch); ++ if (ret < 0) ++ return ret; ++ ret = asf_scalefac_data(s, ss, ssch); ++ if (ret < 0) ++ return ret; ++ ret = asf_snf_data(s, ss, ssch); ++ if (ret < 0) ++ return ret; ++ } else { ++ ret = ssf_data(s, ss, ssch, iframe); ++ } ++ ++ return ret; ++} ++ ++static int chparam_info(AC4DecodeContext *s, Substream *ss, SubstreamChannel *ssch) ++{ ++ GetBitContext *gb = &s->gbc; ++ int ret; ++ ++ ssch->sap_mode = (int)get_bits(gb, 2); ++ av_log(s->avctx, AV_LOG_DEBUG, "sap_mode: %d\n", ssch->sap_mode); ++ ++ if (ssch->sap_mode == 1) { ++ for (int g = 0; g < ssch->scp.num_window_groups; g++) { ++ int max_sfb_g = get_max_sfb(s, ssch, g); ++ ++ for (int sfb = 0; sfb < max_sfb_g; sfb++) { ++ ssch->ms_used[g][sfb] = get_bits1(gb); ++ } ++ } ++ } ++ ++ if (ssch->sap_mode == 3) { ++ ret = sap_data(s, ss, ssch); ++ if (ret < 0) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int stereo_data(AC4DecodeContext *s, Substream *ss, int iframe) ++{ ++ GetBitContext *gb = &s->gbc; ++ int ret; ++ ++ ss->mdct_stereo_proc[0] = get_bits1(gb); ++ if (ss->mdct_stereo_proc[0]) { ++ ss->spec_frontend_l = SF_ASF; ++ ss->spec_frontend_r = SF_ASF; ++ ret = sf_info(s, ss, &ss->ssch[0], SF_ASF, 0, 0); ++ if (ret < 0) ++ return ret; ++ ++ memcpy(&ss->ssch[1].scp, &ss->ssch[0].scp, sizeof(ss->ssch[0].scp)); ++ memcpy(&ss->ssch[1].sect_sfb_offset, &ss->ssch[0].sect_sfb_offset, sizeof(ss->ssch[0].sect_sfb_offset)); ++ memcpy(&ss->ssch[1].offset2sfb, &ss->ssch[0].offset2sfb, sizeof(ss->ssch[0].offset2sfb)); ++ memcpy(&ss->ssch[1].offset2g, &ss->ssch[0].offset2g, sizeof(ss->ssch[0].offset2g)); ++ memcpy(&ss->ssch[1].win_offset, &ss->ssch[0].win_offset, sizeof(ss->ssch[0].win_offset)); ++ ++ ret = chparam_info(s, ss, &ss->ssch[0]); ++ if (ret < 0) ++ return ret; ++ } else { ++ ss->spec_frontend_l = (int)get_bits1(gb); ++ sf_info(s, ss, &ss->ssch[0], ss->spec_frontend_l, 0, 0); ++ ss->spec_frontend_r = (int)get_bits1(gb); ++ sf_info(s, ss, &ss->ssch[1], ss->spec_frontend_r, 0, 0); ++ } ++ ++ ret = sf_data(s, ss, &ss->ssch[0], iframe, ss->spec_frontend_l); ++ if (ret < 0) ++ return ret; ++ ret = sf_data(s, ss, &ss->ssch[1], iframe, ss->spec_frontend_r); ++ if (ret < 0) ++ return ret; ++ ++ return 0; ++} ++ ++static int companding_control(AC4DecodeContext *s, Substream *ss, int num_chan) ++{ ++ GetBitContext *gb = &s->gbc; ++ int sync_flag = 0; ++ int need_avg = 0; ++ int nc; ++ ++ if (num_chan > 1) ++ sync_flag = (int)get_bits1(gb); ++ ++ nc = sync_flag ? 1 : num_chan; ++ ++ for (int i = 0; i < nc; i++) { ++ ss->compand_on[i] = get_bits1(gb); ++ if (!ss->compand_on[i]) ++ need_avg = 1; ++ } ++ ++ if (need_avg == 1) ++ ss->compand_avg = (int)get_bits1(gb); ++ ++ return 0; ++} ++ ++static int noise_mid_border(int aspx_tsg_ptr, int aspx_int_class, int num_atsg_sig) ++{ ++ if (aspx_tsg_ptr == -1) { ++ if (aspx_int_class == VARFIX) ++ return 1; ++ else ++ return num_atsg_sig - 1; ++ } else if (aspx_tsg_ptr >= 0) { ++ if (aspx_int_class == VARFIX) ++ return num_atsg_sig - 1; ++ else ++ return FFMAX(1, FFMIN(num_atsg_sig - 1, aspx_tsg_ptr)); ++ } else { ++ av_assert0(0); ++ } ++ ++ return 0; ++} ++ ++static int freq_res(int *atsg_sig, int atsg, int aspx_tsg_ptr, ++ int num_aspx_timeslots, int aspx_freq_res_mode, ++ int *aspx_freq_res) ++{ ++ int freq_res; ++ ++ switch (aspx_freq_res_mode) { ++ case 0: ++ freq_res = aspx_freq_res[atsg]; ++ break; ++ case 1: ++ freq_res = 0; ++ break; ++ case 2: ++ if ((atsg < aspx_tsg_ptr && num_aspx_timeslots > 8) || ++ (atsg_sig[atsg+1]-atsg_sig[atsg]) > (num_aspx_timeslots/6.0+3.25)) ++ freq_res = 1; ++ else ++ freq_res = 0; ++ break; ++ case 3: ++ freq_res = 1; ++ break; ++ default: ++ av_assert0(0); ++ } ++ ++ return freq_res; ++} ++ ++static void get_tab_border(int *atsg_sig, int num_aspx_timeslots, int num_atsg) ++{ ++ int size = (int)((num_atsg + 1) * sizeof(int)); ++ ++ switch (num_aspx_timeslots) { ++ case 6: ++ memcpy(atsg_sig, tab_border[0][num_atsg >> 1], size); ++ break; ++ case 8: ++ memcpy(atsg_sig, tab_border[1][num_atsg >> 1], size); ++ break; ++ case 12: ++ memcpy(atsg_sig, tab_border[2][num_atsg >> 1], size); ++ break; ++ case 15: ++ memcpy(atsg_sig, tab_border[3][num_atsg >> 1], size); ++ break; ++ case 16: ++ memcpy(atsg_sig, tab_border[4][num_atsg >> 1], size); ++ break; ++ default: ++ av_assert0(0); ++ } ++} ++ ++static int aspx_atsg(AC4DecodeContext *s, Substream *ss, SubstreamChannel *ssch, int iframe) ++{ ++ int num_atsg_sig = ssch->aspx_num_env; ++ int num_atsg_noise = ssch->aspx_num_noise; ++ ++ if (ssch->aspx_int_class == FIXFIX) { ++ get_tab_border(ssch->atsg_sig, s->num_aspx_timeslots, num_atsg_sig); ++ get_tab_border(ssch->atsg_noise, s->num_aspx_timeslots, num_atsg_noise); ++ ssch->atsg_freqres[0] = freq_res(ssch->atsg_sig, 0, 0, s->num_aspx_timeslots, ++ ss->aspx_freq_res_mode, ssch->aspx_freq_res); ++ for (int atsg = 1; atsg < num_atsg_sig; atsg++) ++ ssch->atsg_freqres[atsg] = ssch->atsg_freqres[0]; ++ } else { ++ switch (ssch->aspx_int_class) { ++ case FIXVAR: ++ ssch->atsg_sig[0] = 0; ++ ssch->atsg_sig[num_atsg_sig] = ssch->aspx_var_bord_right + s->num_aspx_timeslots; ++ for (int tsg = 0; tsg < ssch->aspx_num_rel_right; tsg++) ++ ssch->atsg_sig[num_atsg_sig-tsg-1] = ssch->atsg_sig[num_atsg_sig-tsg] - ssch->aspx_rel_bord_right[tsg]; ++ break; ++ case VARFIX: ++ if (iframe) ++ ssch->atsg_sig[0] = ssch->aspx_var_bord_left; ++ else ++ ssch->atsg_sig[0] = ssch->previous_stop_pos - s->num_aspx_timeslots; ++ ssch->atsg_sig[num_atsg_sig] = s->num_aspx_timeslots; ++ for (int tsg = 0; tsg < ssch->aspx_num_rel_left; tsg++) ++ ssch->atsg_sig[tsg+1] = ssch->atsg_sig[tsg] + ssch->aspx_rel_bord_left[tsg]; ++ break; ++ case VARVAR: ++ if (iframe) ++ ssch->atsg_sig[0] = ssch->aspx_var_bord_left; ++ else ++ ssch->atsg_sig[0] = ssch->previous_stop_pos - s->num_aspx_timeslots; ++ ssch->atsg_sig[num_atsg_sig] = ssch->aspx_var_bord_right + s->num_aspx_timeslots; ++ for (int tsg = 0; tsg < ssch->aspx_num_rel_left; tsg++) ++ ssch->atsg_sig[tsg+1] = ssch->atsg_sig[tsg] + ssch->aspx_rel_bord_left[tsg]; ++ for (int tsg = 0; tsg < ssch->aspx_num_rel_right; tsg++) ++ ssch->atsg_sig[num_atsg_sig-tsg-1] = ssch->atsg_sig[num_atsg_sig-tsg] - ssch->aspx_rel_bord_right[tsg]; ++ break; ++ } ++ ++ ssch->atsg_noise[0] = ssch->atsg_sig[0]; ++ ssch->atsg_noise[num_atsg_noise] = ssch->atsg_sig[num_atsg_sig]; ++ if (num_atsg_noise > 1) ++ ssch->atsg_noise[1] = ssch->atsg_sig[noise_mid_border(ssch->aspx_tsg_ptr, ++ ssch->aspx_int_class, ++ num_atsg_sig)]; ++ for (int atsg = 0; atsg < num_atsg_sig; atsg++) ++ ssch->atsg_freqres[atsg] = freq_res(ssch->atsg_sig, atsg, ssch->aspx_tsg_ptr, ++ s->num_aspx_timeslots, ss->aspx_freq_res_mode, ++ ssch->aspx_freq_res); ++ } ++ ++ ssch->previous_stop_pos = ssch->atsg_sig[num_atsg_sig]; ++ ++ for (int atsg = 0; atsg < num_atsg_sig; atsg++) { ++ if (ssch->atsg_freqres[atsg]) { ++ ssch->num_sbg_sig[atsg] = ssch->num_sbg_sig_highres; ++ memcpy(ssch->sbg_sig[atsg], ssch->sbg_sig_highres, 24 * 4); ++ } else { ++ ssch->num_sbg_sig[atsg] = ssch->num_sbg_sig_lowres; ++ memcpy(ssch->sbg_sig[atsg], ssch->sbg_sig_lowres, 24 * 4); ++ } ++ } ++ ++ return 0; ++} ++ ++static int aspx_framing(AC4DecodeContext *s, Substream *ss, SubstreamChannel *ssch, int iframe) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ ssch->aspx_num_rel_left = 0; ++ ssch->aspx_num_rel_right = 0; ++ ++ ssch->aspx_int_class = get_vlc2(gb, aspx_int_class_vlc.table, aspx_int_class_vlc.bits, 1); ++ if (ssch->aspx_int_class < 0) { ++ av_log(s->avctx, AV_LOG_ERROR, "invalid aspx int class: %d\n", ssch->aspx_int_class); ++ return AVERROR_INVALIDDATA; ++ } ++ ++ ssch->aspx_num_env_prev = ssch->aspx_num_env; ++ ++ switch (ssch->aspx_int_class) { ++ case FIXFIX: ++ ssch->aspx_num_env = 1 + (int)get_bits(gb, 1 + ss->aspx_num_env_bits_fixfix); ++ if (ssch->aspx_num_env > 4) { ++ av_log(s->avctx, AV_LOG_ERROR, "invalid aspx num env in FIXFIX: %d\n", ssch->aspx_num_env); ++ return AVERROR_INVALIDDATA; ++ } ++ ++ if (ss->aspx_freq_res_mode == 0) ++ ssch->aspx_freq_res[0] = (int)get_bits1(gb); ++ break; ++ case FIXVAR: ++ ssch->aspx_var_bord_right = (int)get_bits(gb, 2); ++ ssch->aspx_num_rel_right = (int)get_bits(gb, 1 + (s->num_aspx_timeslots > 8)); ++ for (int i = 0; i < ssch->aspx_num_rel_right; i++) ++ ssch->aspx_rel_bord_right[i] = 2 * (int)get_bits(gb, 1 + (s->num_aspx_timeslots > 8)) + 2; ++ break; ++ case VARFIX: ++ if (iframe) ++ ssch->aspx_var_bord_left = (int)get_bits(gb, 2); ++ ssch->aspx_num_rel_left = (int)get_bits(gb, 1 + (s->num_aspx_timeslots > 8)); ++ for (int i = 0; i < ssch->aspx_num_rel_left; i++) ++ ssch->aspx_rel_bord_left[i] = 2 * (int)get_bits(gb, 1 + (s->num_aspx_timeslots > 8)) + 2; ++ break; ++ case VARVAR: ++ if (iframe) ++ ssch->aspx_var_bord_left = (int)get_bits(gb, 2); ++ ssch->aspx_num_rel_left = (int)get_bits(gb, 1 + (s->num_aspx_timeslots > 8)); ++ for (int i = 0; i < ssch->aspx_num_rel_left; i++) ++ ssch->aspx_rel_bord_left[i] = 2 * (int)get_bits(gb, 1 + (s->num_aspx_timeslots > 8)) + 2; ++ ssch->aspx_var_bord_right = (int)get_bits(gb, 2); ++ ssch->aspx_num_rel_right = (int)get_bits(gb, 1 + (s->num_aspx_timeslots > 8)); ++ for (int i = 0; i < ssch->aspx_num_rel_right; i++) ++ ssch->aspx_rel_bord_right[i] = 2 * (int)get_bits(gb, 1 + (s->num_aspx_timeslots > 8)) + 2; ++ break; ++ } ++ ++ if (ssch->aspx_int_class != FIXFIX) { ++ int ptr_bits; ++ ++ ssch->aspx_num_env = ssch->aspx_num_rel_left + ssch->aspx_num_rel_right + 1; ++ if (ssch->aspx_num_env > 5) { ++ av_log(s->avctx, AV_LOG_ERROR, "invalid aspx num env: %d (class %d)\n", ssch->aspx_num_env, ssch->aspx_int_class); ++ return AVERROR_INVALIDDATA; ++ } ++ ++ ptr_bits = ceilf(logf(ssch->aspx_num_env + 2) / logf(2)); ++ ssch->aspx_tsg_ptr_prev = ssch->aspx_tsg_ptr; ++ ssch->aspx_tsg_ptr = (int)get_bits(gb, ptr_bits) - 1; ++ if (ss->aspx_freq_res_mode == 0) ++ for (int env = 0; env < ssch->aspx_num_env; env++) ++ ssch->aspx_freq_res[env] = (int)get_bits1(gb); ++ } ++ ++ ssch->aspx_num_noise_prev = ssch->aspx_num_noise; ++ ++ if (ssch->aspx_num_env > 1) ++ ssch->aspx_num_noise = 2; ++ else ++ ssch->aspx_num_noise = 1; ++ ++ if (!ssch->aspx_num_env_prev) ++ ssch->aspx_num_env_prev = ssch->aspx_num_env; ++ if (!ssch->aspx_num_noise_prev) ++ ssch->aspx_num_noise_prev = ssch->aspx_num_noise; ++ ++ return aspx_atsg(s, ss, ssch, iframe); ++} ++ ++static void aspx_delta_dir(AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ for (int env = 0; env < ssch->aspx_num_env; env++) ++ ssch->aspx_sig_delta_dir[env] = (int)get_bits1(gb); ++ ++ for (int env = 0; env < ssch->aspx_num_noise; env++) ++ ssch->aspx_noise_delta_dir[env] = (int)get_bits1(gb); ++} ++ ++static int aspx_hfgen_iwc_2ch(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch0, ++ SubstreamChannel *ssch1, ++ int aspx_balance) ++{ ++ GetBitContext *gb = &s->gbc; ++ int aspx_tic_left = 0, aspx_tic_right = 0; ++ ++ memcpy(ssch0->aspx_tna_mode_prev, ssch0->aspx_tna_mode, sizeof(ssch0->aspx_tna_mode)); ++ memcpy(ssch1->aspx_tna_mode_prev, ssch1->aspx_tna_mode, sizeof(ssch1->aspx_tna_mode)); ++ ++ for (int n = 0; n < ssch0->num_sbg_noise; n++) ++ ssch0->aspx_tna_mode[n] = (int)get_bits(gb, 2); ++ if (aspx_balance == 0) { ++ for (int n = 0; n < ssch0->num_sbg_noise; n++) ++ ssch1->aspx_tna_mode[n] = (int)get_bits(gb, 2); ++ } else { ++ for (int n = 0; n < ssch0->num_sbg_noise; n++) ++ ssch1->aspx_tna_mode[n] = ssch0->aspx_tna_mode[n]; ++ } ++ if (get_bits1(gb)) { ++ for (int n = 0; n < ssch0->num_sbg_sig_highres; n++) ++ ssch0->aspx_add_harmonic[n] = (int)get_bits1(gb); ++ } ++ if (get_bits1(gb)) { ++ for (int n = 0; n < ssch0->num_sbg_sig_highres; n++) ++ ssch1->aspx_add_harmonic[n] = (int)get_bits1(gb); ++ } ++ ++ for (int n = 0; n < ssch0->num_sbg_sig_highres; n++) ++ ssch0->aspx_fic_used_in_sfb[n] = ssch1->aspx_fic_used_in_sfb[n] = 0; ++ ++ if (get_bits1(gb)) { ++ if (get_bits1(gb)) { ++ for (int n = 0; n < ssch0->num_sbg_sig_highres; n++) ++ ssch0->aspx_fic_used_in_sfb[n] = (int)get_bits1(gb); ++ } ++ ++ if (get_bits1(gb)) { ++ for (int n = 0; n < ssch0->num_sbg_sig_highres; n++) ++ ssch1->aspx_fic_used_in_sfb[n] = (int)get_bits1(gb); ++ } ++ } ++ ++ for (int n = 0; n < s->num_aspx_timeslots; n++) ++ ssch0->aspx_tic_used_in_slot[n] = ssch1->aspx_tic_used_in_slot[n] = 0; ++ ++ if (get_bits1(gb)) { ++ int aspx_tic_copy = (int)get_bits1(gb); ++ ++ if (aspx_tic_copy == 0) { ++ aspx_tic_left = (int)get_bits1(gb); ++ aspx_tic_right = (int)get_bits1(gb); ++ } ++ ++ if (aspx_tic_copy || aspx_tic_left) { ++ for (int n = 0; n < s->num_aspx_timeslots; n++) ++ ssch0->aspx_tic_used_in_slot[n] = (int)get_bits1(gb); ++ } ++ ++ if (aspx_tic_right) { ++ for (int n = 0; n < s->num_aspx_timeslots; n++) ++ ssch1->aspx_tic_used_in_slot[n] = (int)get_bits1(gb); ++ } ++ ++ if (aspx_tic_copy) { ++ for (int n = 0; n < s->num_aspx_timeslots; n++) ++ ssch1->aspx_tic_used_in_slot[n] = ssch0->aspx_tic_used_in_slot[n]; ++ } ++ } ++ ++ return 0; ++} ++ ++static VLC *get_aspx_hcb(int data_type, int quant_mode, int stereo_mode, int hcb_type) ++{ ++ VLC *aspx_cb; ++ ++ if (data_type == DT_SIGNAL) ++ aspx_cb = &aspx_codebook_signal_vlc[stereo_mode][quant_mode][hcb_type]; ++ else // NOISE ++ aspx_cb = &aspx_codebook_noise_vlc[stereo_mode][hcb_type]; ++ ++ return aspx_cb; ++} ++ ++static int get_aspx_off(int data_type, int quant_mode, int stereo_mode, int hcb_type) ++{ ++ int off; ++ ++ if (data_type == DT_SIGNAL) ++ off = aspx_codebook_signal_off[stereo_mode][quant_mode][hcb_type]; ++ else // NOISE ++ off = aspx_codebook_noise_off[stereo_mode][hcb_type]; ++ ++ return off; ++} ++ ++static int aspx_huff_data(AC4DecodeContext *s, ++ int data_type, int num_sbg, ++ int quant_mode, int stereo_mode, ++ int direction, int *data) ++{ ++ GetBitContext *gb = &s->gbc; ++ VLC *aspx_cb; ++ int aspx_off; ++ ++ if (direction == 0) { // FREQ ++ aspx_cb = get_aspx_hcb(data_type, quant_mode, stereo_mode, F0); ++ aspx_off = get_aspx_off(data_type, quant_mode, stereo_mode, F0); ++ data[0] = get_vlc2(gb, aspx_cb->table, aspx_cb->bits, 3); ++ if (data[0] < 0) { ++ av_log(s->avctx, AV_LOG_ERROR, "FREQ 1\n"); ++ return AVERROR_INVALIDDATA; ++ } ++ data[0] -= aspx_off; ++ aspx_cb = get_aspx_hcb(data_type, quant_mode, stereo_mode, DF); ++ aspx_off = get_aspx_off(data_type, quant_mode, stereo_mode, DF); ++ for (int i = 1; i < num_sbg; i++) { ++ data[i] = get_vlc2(gb, aspx_cb->table, aspx_cb->bits, 3); ++ if (data[i] < 0) { ++ av_log(s->avctx, AV_LOG_ERROR, "FREQ 2\n"); ++ return AVERROR_INVALIDDATA; ++ } ++ data[i] -= aspx_off; ++ } ++ } else { // TIME ++ aspx_cb = get_aspx_hcb(data_type, quant_mode, stereo_mode, DT); ++ aspx_off = get_aspx_off(data_type, quant_mode, stereo_mode, DT); ++ for (int i = 0; i < num_sbg; i++) { ++ data[i] = get_vlc2(gb, aspx_cb->table, aspx_cb->bits, 3); ++ if (data[i] < 0) { ++ av_log(s->avctx, AV_LOG_ERROR, "TIME\n"); ++ return AVERROR_INVALIDDATA; ++ } ++ data[i] -= aspx_off; ++ } ++ } ++ ++ return 0; ++} ++ ++static int aspx_ec_data(AC4DecodeContext *s, ++ Substream *ss, ++ SubstreamChannel *ssch, ++ int data_type, int num_env, ++ uint8_t *freq_res, int quant_mode, ++ int stereo_mode, int *direction) ++{ ++ int dir, num_sbg, ret; ++ ++ for (int env = 0; env < num_env; env++) { ++ if (data_type == DT_SIGNAL) { ++ if (freq_res[env]) ++ num_sbg = ssch->num_sbg_sig_highres; ++ else ++ num_sbg = ssch->num_sbg_sig_lowres; ++ } else { ++ num_sbg = ssch->num_sbg_noise; ++ } ++ dir = direction[env]; ++ ret = aspx_huff_data(s, data_type, num_sbg, quant_mode, stereo_mode, dir, ++ ssch->aspx_data[data_type][env]); ++ if (ret < 0) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int is_element_of_sbg_patches(int sbg_lim_sbg, int *sbg_patches, ++ int num_sbg_patches) ++{ ++ for (int i = 0; i <= num_sbg_patches; i++) { ++ if (sbg_patches[i] == sbg_lim_sbg) ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static void remove_element(int *sbg_lim, int num_sbg_lim, int sbg) ++{ ++ for (int i = sbg; i < num_sbg_lim; i++) ++ sbg_lim[i] = sbg_lim[i + 1]; ++} ++ ++static int cmpints(const void *p1, const void *p2) ++{ ++ int left = *(const int *)p1; ++ int right = *(const int *)p2; ++ return FFDIFFSIGN(left, right); ++} ++ ++static int aspx_elements(AC4DecodeContext *s, Substream *ss, SubstreamChannel *ssch, ++ int iframe) ++{ ++ int sb, j, sbg = 0, goal_sb, msb, usb; ++ int source_band_low; ++ int idx[6]; ++ ++ ssch->master_reset = ((ss->prev_aspx_start_freq != ss->aspx_start_freq) + ++ (ss->prev_aspx_stop_freq != ss->aspx_stop_freq) + ++ (ss->prev_aspx_master_freq_scale != ss->aspx_master_freq_scale)) * iframe; ++ if (ssch->master_reset) { ++ if (ss->aspx_master_freq_scale == 1) { ++ ssch->num_sbg_master = 22 - 2 * ss->aspx_start_freq - 2 * ss->aspx_stop_freq; ++ for (int sbg = 0; sbg <= ssch->num_sbg_master; sbg++) { ++ ssch->sbg_master[sbg] = sbg_template_highres[2 * ss->aspx_start_freq + sbg]; ++ } ++ } else { ++ ssch->num_sbg_master = 20 - 2 * ss->aspx_start_freq - 2 * ss->aspx_stop_freq; ++ for (int sbg = 0; sbg <= ssch->num_sbg_master; sbg++) { ++ ssch->sbg_master[sbg] = sbg_template_lowres[2 * ss->aspx_start_freq + sbg]; ++ } ++ } ++ } ++ ++ ssch->sba = ssch->sbg_master[0]; ++ ssch->sbz = ssch->sbg_master[ssch->num_sbg_master]; ++ ++ ssch->num_sbg_sig_highres = ssch->num_sbg_master - ssch->aspx_xover_subband_offset; ++ for (int sbg = 0; sbg <= ssch->num_sbg_sig_highres; sbg++) ++ ssch->sbg_sig_highres[sbg] = ssch->sbg_master[sbg + ssch->aspx_xover_subband_offset]; ++ ++ ssch->sbx = ssch->sbg_sig_highres[0]; ++ if (ssch->sbx <= 0) ++ return AVERROR_INVALIDDATA; ++ ssch->num_sb_aspx = ssch->sbg_sig_highres[ssch->num_sbg_sig_highres] - ssch->sbx; ++ ++ ssch->num_sbg_sig_lowres = ssch->num_sbg_sig_highres - (int)floorf((float)(ssch->num_sbg_sig_highres / 2.)); ++ ssch->sbg_sig_lowres[0] = ssch->sbg_sig_highres[0]; ++ if ((ssch->num_sbg_sig_highres & 1) == 0) { ++ for (int sbg = 1; sbg <= ssch->num_sbg_sig_lowres; sbg++) ++ ssch->sbg_sig_lowres[sbg] = ssch->sbg_sig_highres[2*sbg]; ++ } else { ++ for (int sbg = 1; sbg <= ssch->num_sbg_sig_lowres; sbg++) ++ ssch->sbg_sig_lowres[sbg] = ssch->sbg_sig_highres[2*sbg-1]; ++ } ++ ++ ssch->num_sbg_sig[0] = ssch->num_sbg_sig_lowres; ++ ssch->num_sbg_sig[1] = ssch->num_sbg_sig_highres; ++ ++ if (ssch->sbx) { ++ ssch->num_sbg_noise = FFMAX(1, floorf(ss->aspx_noise_sbg * log2f(ssch->sbz / (float)ssch->sbx) + 0.5)); ++ } ++ else { ++ ssch->num_sbg_noise = 0; ++ } ++ if (ssch->num_sbg_noise > 5) { ++ av_log(s->avctx, AV_LOG_ERROR, "invalid num sbg noise: %d\n", ssch->num_sbg_noise); ++ return AVERROR_INVALIDDATA; ++ } ++ ++ idx[0] = 0; ++ ssch->sbg_noise[0] = ssch->sbg_sig_lowres[0]; ++ for (int sbg = 1; sbg <= ssch->num_sbg_noise; sbg++) { ++ idx[sbg] = idx[sbg-1]; ++ idx[sbg] += (int)floorf((float)(ssch->num_sbg_sig_lowres - idx[sbg - 1]) / (float)(ssch->num_sbg_noise + 1 - sbg)); ++ ssch->sbg_noise[sbg] = ssch->sbg_sig_lowres[idx[sbg]]; ++ } ++ ++ msb = ssch->sba; ++ usb = ssch->sbx; ++ ssch->num_sbg_patches = 0; ++ if (s->fs_index) ++ goal_sb = 43; ++ else ++ goal_sb = 46; ++ if (ss->aspx_master_freq_scale == 1) ++ source_band_low = 4; ++ else ++ source_band_low = 2; ++ ++ if (goal_sb < ssch->sbx + ssch->num_sb_aspx) { ++ for (int i = 0; ssch->sbg_master[i] < goal_sb; i++) ++ sbg = i + 1; ++ } else { ++ sbg = ssch->num_sbg_master; ++ } ++ ++ do { ++ int odd; ++ ++ j = sbg; ++ sb = ssch->sbg_master[j]; ++ odd = (sb - 2 + ssch->sba) % 2; ++ ++ while (sb > (ssch->sba - source_band_low + msb - odd) && j >= 1) { ++ j--; ++ sb = ssch->sbg_master[j]; ++ odd = (sb - 2 + ssch->sba) % 2; ++ } ++ ++ ssch->sbg_patch_num_sb[ssch->num_sbg_patches] = FFMAX(sb - usb, 0); ++ ssch->sbg_patch_start_sb[ssch->num_sbg_patches] = ssch->sba - odd - FFMAX(sb - usb, 0); ++ if (ssch->sbg_patch_num_sb[ssch->num_sbg_patches] > 0) { ++ usb = sb; ++ msb = sb; ++ ssch->num_sbg_patches++; ++ } else { ++ msb = ssch->sbx; ++ } ++ ++ if (ssch->sbg_master[sbg] - sb < 3) ++ sbg = ssch->num_sbg_master; ++ } while (sb != (ssch->sbx + ssch->num_sb_aspx) && j > 0); ++ ++ if ((ssch->num_sbg_patches > 1) && (ssch->sbg_patch_num_sb[ssch->num_sbg_patches - 1] < 3)) ++ ssch->num_sbg_patches--; ++ ++ if (ssch->num_sbg_patches > 6) ++ return AVERROR_INVALIDDATA; ++ ++ ssch->sbg_patches[0] = ssch->sbx; ++ for (int i = 1; i <= ssch->num_sbg_patches; i++) ++ ssch->sbg_patches[i] = ssch->sbg_patches[i-1] + ssch->sbg_patch_num_sb[i-1]; ++ ++ /* Copy sbg_sig_lowres into lower part of limiter table */ ++ for (int sbg = 0; sbg <= ssch->num_sbg_sig_lowres; sbg++) ++ ssch->sbg_lim[sbg] = ssch->sbg_sig_lowres[sbg]; ++ ++ /* Copy patch borders into higher part of limiter table */ ++ for (int sbg = 1; sbg < ssch->num_sbg_patches; sbg++) ++ ssch->sbg_lim[sbg + ssch->num_sbg_sig_lowres] = ssch->sbg_patches[sbg]; ++ ++ /* Sort patch borders + low res sbg into temporary limiter table */ ++ ssch->num_sbg_lim = ssch->num_sbg_sig_lowres + ssch->num_sbg_patches - 1; ++ AV_QSORT(ssch->sbg_lim, ssch->num_sbg_lim, int, cmpints); ++ sbg = 1; ++ ++ while (sbg <= ssch->num_sbg_lim) { ++ float num_octaves = log2f((float)ssch->sbg_lim[sbg] / (float)ssch->sbg_lim[sbg - 1]); ++ ++ if (num_octaves < 0.245) { ++ if (ssch->sbg_lim[sbg] == ssch->sbg_lim[sbg-1]) { ++ remove_element(ssch->sbg_lim, ssch->num_sbg_lim, sbg); ++ ssch->num_sbg_lim--; ++ continue; ++ } else { ++ if (is_element_of_sbg_patches(ssch->sbg_lim[sbg], ++ ssch->sbg_patches, ++ ssch->num_sbg_patches)) { ++ if (is_element_of_sbg_patches(ssch->sbg_lim[sbg - 1], ++ ssch->sbg_patches, ++ ssch->num_sbg_patches)) { ++ sbg++; ++ continue; ++ } else { ++ remove_element(ssch->sbg_lim, ssch->num_sbg_lim, sbg - 1); ++ ssch->num_sbg_lim--; ++ continue; ++ } ++ } else { ++ remove_element(ssch->sbg_lim, ssch->num_sbg_lim, sbg); ++ ssch->num_sbg_lim--; ++ continue; ++ } ++ } ++ } else { ++ sbg++; ++ continue; ++ } ++ } ++ ++ return 0; ++} ++ ++static int aspx_data_2ch(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch0, SubstreamChannel *ssch1, ++ int iframe) ++{ ++ GetBitContext *gb = &s->gbc; ++ int ret; ++ ++ if (iframe) { ++ ssch0->aspx_xover_subband_offset = (int)get_bits(gb, 3); ++ ssch1->aspx_xover_subband_offset = ssch0->aspx_xover_subband_offset; ++ } ++ ++ ret = aspx_elements(s, ss, ssch0, iframe); ++ if (ret < 0) ++ return ret; ++ ret = aspx_elements(s, ss, ssch1, iframe); ++ if (ret < 0) ++ return ret; ++ ++ ret = aspx_framing(s, ss, ssch0, iframe); ++ if (ret < 0) ++ return ret; ++ ++ ssch0->aspx_qmode_env = ssch1->aspx_qmode_env = ss->aspx_quant_mode_env; ++ if (ssch0->aspx_int_class == FIXFIX && ssch0->aspx_num_env == 1) ++ ssch0->aspx_qmode_env = ssch1->aspx_qmode_env = 0; ++ ++ ssch0->aspx_balance = ssch1->aspx_balance = (int)get_bits1(gb); ++ ++ if (ssch0->aspx_balance == 0) { ++ ret = aspx_framing(s, ss, ssch1, iframe); ++ if (ret < 0) ++ return ret; ++ ssch1->aspx_qmode_env = ss->aspx_quant_mode_env; ++ if (ssch1->aspx_int_class == FIXFIX && ssch1->aspx_num_env == 1) ++ ssch1->aspx_qmode_env = 0; ++ } else { ++ ssch1->aspx_num_env = ssch0->aspx_num_env; ++ ssch1->aspx_num_noise = ssch0->aspx_num_noise; ++ memcpy(ssch1->atsg_freqres, ssch0->atsg_freqres, sizeof(ssch0->atsg_freqres)); ++ } ++ ++ aspx_delta_dir(s, ssch0); ++ aspx_delta_dir(s, ssch1); ++ aspx_hfgen_iwc_2ch(s, ss, ssch0, ssch1, ssch0->aspx_balance); ++ ++ ret = aspx_ec_data(s, ss, ssch0, DT_SIGNAL, ++ ssch0->aspx_num_env, ++ ssch0->atsg_freqres, ++ ssch0->aspx_qmode_env, ++ SM_LEVEL, ++ ssch0->aspx_sig_delta_dir); ++ if (ret < 0) ++ return ret; ++ ret = aspx_ec_data(s, ss, ssch1, DT_SIGNAL, ++ ssch1->aspx_num_env, ++ ssch1->atsg_freqres, ++ ssch1->aspx_qmode_env, ++ ssch0->aspx_balance ? SM_BALANCE : SM_LEVEL, ++ ssch1->aspx_sig_delta_dir); ++ if (ret < 0) ++ return ret; ++ ret = aspx_ec_data(s, ss, ssch0, DT_NOISE, ++ ssch0->aspx_num_noise, ++ 0, ++ 0, ++ SM_LEVEL, ++ ssch0->aspx_noise_delta_dir); ++ if (ret < 0) ++ return ret; ++ ret = aspx_ec_data(s, ss, ssch1, DT_NOISE, ++ ssch1->aspx_num_noise, ++ 0, ++ 0, ++ ssch0->aspx_balance ? SM_BALANCE : SM_LEVEL, ++ ssch1->aspx_noise_delta_dir); ++ ++ return ret; ++} ++ ++static int aspx_hfgen_iwc_1ch(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ memcpy(ssch->aspx_tna_mode_prev, ssch->aspx_tna_mode, sizeof(ssch->aspx_tna_mode)); ++ ++ for (int n = 0; n < ssch->num_sbg_noise; n++) ++ ssch->aspx_tna_mode[n] = (int)get_bits(gb, 2); ++ if (get_bits1(gb)) { ++ for (int n = 0; n < ssch->num_sbg_sig_highres; n++) ++ ssch->aspx_add_harmonic[n] = (int)get_bits1(gb); ++ } ++ ++ for (int n = 0; n < ssch->num_sbg_sig_highres; n++) ++ ssch->aspx_fic_used_in_sfb[n] = 0; ++ ++ if (get_bits1(gb)) { ++ for (int n = 0; n < ssch->num_sbg_sig_highres; n++) ++ ssch->aspx_fic_used_in_sfb[n] = (int)get_bits1(gb); ++ } ++ ++ for (int n = 0; n < s->num_aspx_timeslots; n++) ++ ssch->aspx_tic_used_in_slot[n] = 0; ++ ++ if (get_bits1(gb)) { ++ for (int n = 0; n < s->num_aspx_timeslots; n++) ++ ssch->aspx_tic_used_in_slot[n] = (int)get_bits1(gb); ++ } ++ ++ return 0; ++} ++ ++static int aspx_data_1ch(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch, int iframe) ++{ ++ GetBitContext *gb = &s->gbc; ++ int ret; ++ ++ if (iframe) ++ ssch->aspx_xover_subband_offset = (int)get_bits(gb, 3); ++ ++ ssch->aspx_balance = 0; ++ ++ ret = aspx_elements(s, ss, ssch, iframe); ++ if (ret < 0) ++ return ret; ++ ++ ret = aspx_framing(s, ss, ssch, iframe); ++ if (ret < 0) ++ return ret; ++ ++ ssch->aspx_qmode_env = ss->aspx_quant_mode_env; ++ if (ssch->aspx_int_class == FIXFIX && ssch->aspx_num_env == 1) ++ ssch->aspx_qmode_env = 0; ++ ++ aspx_delta_dir(s, ssch); ++ aspx_hfgen_iwc_1ch(s, ss, ssch); ++ ++ ret = aspx_ec_data(s, ss, ssch, DT_SIGNAL, ++ ssch->aspx_num_env, ++ ssch->atsg_freqres, ++ ssch->aspx_qmode_env, ++ 0, ++ ssch->aspx_sig_delta_dir); ++ if (ret < 0) ++ return ret; ++ ret = aspx_ec_data(s, ss, ssch, DT_NOISE, ++ ssch->aspx_num_noise, ++ 0, ++ 0, ++ 0, ++ ssch->aspx_noise_delta_dir); ++ return ret; ++} ++ ++static int acpl_framing_data(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ ssch->acpl_interpolation_type = (int)get_bits1(gb); ++ ssch->acpl_num_param_sets_cod = (int)get_bits1(gb); ++ if (ssch->acpl_interpolation_type) { ++ for (int ps = 0; ps < ssch->acpl_num_param_sets_cod + 1; ps++) ++ ssch->acpl_param_timeslot[ps] = (int)get_bits(gb, 5); ++ } ++ ++ return 0; ++} ++ ++static VLC *get_acpl_hcb(int data_type, int quant_mode, int hcb_type) ++{ ++ VLC *acpl_cb; ++ ++ acpl_cb = &acpl_codebook_vlc[data_type][quant_mode][hcb_type]; ++ ++ return acpl_cb; ++} ++ ++static int acpl_huff_data(AC4DecodeContext *s, ++ int data_type, int data_bands, ++ int start_band, int quant_mode, ++ int *data) ++{ ++ GetBitContext *gb = &s->gbc; ++ int diff_type; ++ VLC *acpl_cb; ++ ++ switch (data_type) { ++ case ALPHA1: ++ case ALPHA2: ++ data_type = 0; ++ break; ++ case BETA1: ++ case BETA2: ++ data_type = 1; ++ break; ++ case BETA3: ++ data_type = 2; ++ break; ++ default: ++ data_type = 3; ++ break; ++ }; ++ ++ diff_type = (int)get_bits1(gb); ++ if (diff_type == 0) { // DIFF_FREQ ++ acpl_cb = get_acpl_hcb(data_type, quant_mode, F0); ++ data[start_band] = get_vlc2(gb, acpl_cb->table, acpl_cb->bits, 3); ++ if (data[start_band] < 0) { ++ av_log(s->avctx, AV_LOG_ERROR, "DIFF_FREQ 1\n"); ++ return AVERROR_INVALIDDATA; ++ } ++ acpl_cb = get_acpl_hcb(data_type, quant_mode, DF); ++ for (int i = start_band + 1; i < data_bands; i++) { ++ data[i] = get_vlc2(gb, acpl_cb->table, acpl_cb->bits, 3); ++ if (data[i] < 0) { ++ av_log(s->avctx, AV_LOG_ERROR, "DIFF_FREQ 2\n"); ++ return AVERROR_INVALIDDATA; ++ } ++ } ++ } else { // DIFF_TIME ++ acpl_cb = get_acpl_hcb(data_type, quant_mode, DT); ++ for (int i = start_band; i < data_bands; i++) { ++ data[i] = get_vlc2(gb, acpl_cb->table, acpl_cb->bits, 3); ++ if (data[i] < 0) { ++ av_log(s->avctx, AV_LOG_ERROR, "DIFF_TIME\n"); ++ return AVERROR_INVALIDDATA; ++ } ++ } ++ } ++ ++ return 0; ++} ++ ++static int acpl_ec_data(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch, ++ int data_type, int data_bands, ++ int start_band, int quant_mode) ++{ ++ int ret; ++ ++ for (int ps = 0; ps < ssch->acpl_num_param_sets_cod + 1; ps++) { ++ ret = acpl_huff_data(s, data_type, data_bands, ++ start_band, quant_mode, ++ ssch->acpl_data[data_type]); ++ if (ret < 0) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int acpl_data_2ch(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch0, ++ SubstreamChannel *ssch1) ++{ ++ int ret, num_bands, st; ++ ++ acpl_framing_data(s, ss, ssch0); ++ ++ num_bands = acpl_num_param_bands[ss->acpl_num_param_bands_id]; ++ st = ss->acpl_param_band; ++ ++ ret = acpl_ec_data(s, ss, ssch0, ALPHA1, num_bands, st, ss->acpl_quant_mode[0]); ++ if (ret < 0) ++ return ret; ++ ret = acpl_ec_data(s, ss, ssch0, ALPHA2, num_bands, st, ss->acpl_quant_mode[0]); ++ if (ret < 0) ++ return ret; ++ ret = acpl_ec_data(s, ss, ssch0, BETA1, num_bands, st, ss->acpl_quant_mode[0]); ++ if (ret < 0) ++ return ret; ++ ret = acpl_ec_data(s, ss, ssch0, BETA2, num_bands, st, ss->acpl_quant_mode[0]); ++ if (ret < 0) ++ return ret; ++ ret = acpl_ec_data(s, ss, ssch0, BETA3, num_bands, st, ss->acpl_quant_mode[0]); ++ if (ret < 0) ++ return ret; ++ ret = acpl_ec_data(s, ss, ssch1, GAMMA1, num_bands, st, ss->acpl_quant_mode[1]); ++ if (ret < 0) ++ return ret; ++ ret = acpl_ec_data(s, ss, ssch1, GAMMA2, num_bands, st, ss->acpl_quant_mode[1]); ++ if (ret < 0) ++ return ret; ++ ret = acpl_ec_data(s, ss, ssch1, GAMMA3, num_bands, st, ss->acpl_quant_mode[1]); ++ if (ret < 0) ++ return ret; ++ ret = acpl_ec_data(s, ss, ssch1, GAMMA4, num_bands, st, ss->acpl_quant_mode[1]); ++ if (ret < 0) ++ return ret; ++ ret = acpl_ec_data(s, ss, ssch1, GAMMA5, num_bands, st, ss->acpl_quant_mode[1]); ++ if (ret < 0) ++ return ret; ++ ret = acpl_ec_data(s, ss, ssch1, GAMMA6, num_bands, st, ss->acpl_quant_mode[1]); ++ if (ret < 0) ++ return ret; ++ ++ return 0; ++} ++ ++static int acpl_data_1ch(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch) ++{ ++ int ret, num_bands, start; ++ ++ acpl_framing_data(s, ss, ssch); ++ ++ num_bands = acpl_num_param_bands[ss->acpl_num_param_bands_id]; ++ start = ss->acpl_param_band; ++ ++ ret = acpl_ec_data(s, ss, ssch, ALPHA1, num_bands, start, ss->acpl_quant_mode[0]); ++ if (ret < 0) ++ return ret; ++ ++ ret = acpl_ec_data(s, ss, ssch, BETA1, num_bands, start, ss->acpl_quant_mode[0]); ++ if (ret < 0) ++ return ret; ++ ++ return 0; ++} ++ ++static int channel_pair_element(AC4DecodeContext *s, int iframe) ++{ ++ GetBitContext *gb = &s->gbc; ++ Substream *ss = &s->substream; ++ int spec_frontend; ++ int ret; ++ ++ ss->codec_mode = (int)get_bits(gb, 2); ++ av_log(s->avctx, AV_LOG_DEBUG, "codec_mode: %d\n", ss->codec_mode); ++ if (iframe) { ++ if (ss->codec_mode != CM_SIMPLE) ++ aspx_config(s, ss); ++ if (ss->codec_mode == CM_ASPX_ACPL_1) ++ acpl_config_1ch(s, ss, ACPL_PARTIAL); ++ if (ss->codec_mode == CM_ASPX_ACPL_2) ++ acpl_config_1ch(s, ss, ACPL_FULL); ++ } ++ ++ switch (ss->codec_mode) { ++ case CM_SIMPLE: ++ ret = stereo_data(s, ss, iframe); ++ if (ret < 0) ++ return ret; ++ break; ++ case CM_ASPX: ++ companding_control(s, ss, 2); ++ ret = stereo_data(s, ss, iframe); ++ if (ret < 0) ++ return ret; ++ ret = aspx_data_2ch(s, ss, &ss->ssch[0], &ss->ssch[1], iframe); ++ if (ret < 0) ++ return ret; ++ break; ++ case CM_ASPX_ACPL_1: ++ companding_control(s, ss, 1); ++ ss->mdct_stereo_proc[0] = get_bits1(gb); ++ if (ss->mdct_stereo_proc[0]) { ++ ss->spec_frontend_m = SF_ASF; ++ ss->spec_frontend_s = SF_ASF; ++ ret = sf_info(s, ss, &ss->ssch[0], SF_ASF, 1, 0); ++ if (ret < 0) ++ return ret; ++ ++ memcpy(&ss->ssch[1].scp, &ss->ssch[0].scp, sizeof(ss->ssch[0].scp)); ++ memcpy(&ss->ssch[1].sect_sfb_offset, &ss->ssch[0].sect_sfb_offset, sizeof(ss->ssch[0].sect_sfb_offset)); ++ memcpy(&ss->ssch[1].offset2sfb, &ss->ssch[0].offset2sfb, sizeof(ss->ssch[0].offset2sfb)); ++ memcpy(&ss->ssch[1].offset2g, &ss->ssch[0].offset2g, sizeof(ss->ssch[0].offset2g)); ++ memcpy(&ss->ssch[1].win_offset, &ss->ssch[0].win_offset, sizeof(ss->ssch[0].win_offset)); ++ ++ ret = chparam_info(s, ss, &ss->ssch[0]); ++ if (ret < 0) ++ return ret; ++ } else { ++ ss->spec_frontend_m = (int)get_bits1(gb); ++ ret = sf_info(s, ss, &ss->ssch[0], ss->spec_frontend_m, 0, 0); ++ if (ret < 0) ++ return ret; ++ ss->spec_frontend_s = (int)get_bits1(gb); ++ ret = sf_info(s, ss, &ss->ssch[1], ss->spec_frontend_s, 0, 1); ++ if (ret < 0) ++ return ret; ++ } ++ ret = sf_data(s, ss, &ss->ssch[0], iframe, ss->spec_frontend_m); ++ if (ret < 0) ++ return ret; ++ ret = sf_data(s, ss, &ss->ssch[1], iframe, ss->spec_frontend_m); ++ if (ret < 0) ++ return ret; ++ ret = aspx_data_1ch(s, ss, &ss->ssch[0], iframe); ++ if (ret < 0) ++ return ret; ++ ret = acpl_data_1ch(s, ss, &ss->ssch[0]); ++ if (ret < 0) ++ return ret; ++ break; ++ case CM_ASPX_ACPL_2: ++ companding_control(s, ss, 1); ++ spec_frontend = (int)get_bits1(gb); ++ ret = sf_info(s, ss, &ss->ssch[0], spec_frontend, 0, 0); ++ if (ret < 0) ++ return ret; ++ ret = sf_data(s, ss, &ss->ssch[0], iframe, spec_frontend); ++ if (ret < 0) ++ return ret; ++ ret = aspx_data_1ch(s, ss, &ss->ssch[0], iframe); ++ if (ret < 0) ++ return ret; ++ ret = acpl_data_1ch(s, ss, &ss->ssch[0]); ++ if (ret < 0) ++ return ret; ++ break; ++ } ++ ++ return 0; ++} ++ ++static int four_channel_data(AC4DecodeContext *s, Substream *ss, int iframe) ++{ ++ int ret; ++ ++ ret = sf_info(s, ss, &ss->ssch[0], SF_ASF, 0, 0); ++ if (ret < 0) ++ return ret; ++ ++ for (int i = 1; i < 4; i++) { ++ memcpy(&ss->ssch[i], &ss->ssch[0], sizeof(ss->ssch[0])); ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ ret = chparam_info(s, ss, &ss->ssch[i]); ++ if (ret < 0) ++ return ret; ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ av_log(s->avctx, AV_LOG_DEBUG, "channel: %d/4\n", i); ++ ret = sf_data(s, ss, &ss->ssch[i], iframe, SF_ASF); ++ if (ret < 0) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int five_channel_info(AC4DecodeContext *s, Substream *ss) ++{ ++ GetBitContext *gb = &s->gbc; ++ int ret; ++ ++ ss->chel_matsel = get_bits(gb, 4); ++ ++ for (int i = 0; i < 5; i++) { ++ ret = chparam_info(s, ss, &ss->ssch[i]); ++ if (ret < 0) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int five_channel_data(AC4DecodeContext *s, Substream *ss, int iframe) ++{ ++ int ret; ++ ++ ret = sf_info(s, ss, &ss->ssch[0], SF_ASF, 0, 0); ++ if (ret < 0) ++ return ret; ++ ++ for (int i = 1; i < 5; i++) { ++ memcpy(&ss->ssch[i].scp, &ss->ssch[0].scp, sizeof(ss->ssch[0].scp)); ++ memcpy(&ss->ssch[i].sect_sfb_offset, &ss->ssch[0].sect_sfb_offset, sizeof(ss->ssch[0].sect_sfb_offset)); ++ memcpy(&ss->ssch[i].offset2sfb, &ss->ssch[0].offset2sfb, sizeof(ss->ssch[0].offset2sfb)); ++ memcpy(&ss->ssch[i].offset2g, &ss->ssch[0].offset2g, sizeof(ss->ssch[0].offset2g)); ++ memcpy(&ss->ssch[i].win_offset, &ss->ssch[0].win_offset, sizeof(ss->ssch[0].win_offset)); ++ } ++ ++ ret = five_channel_info(s, ss); ++ if (ret < 0) ++ return ret; ++ ++ for (int i = 0; i < 5; i++) { ++ av_log(s->avctx, AV_LOG_DEBUG, "channel: %d/5\n", i); ++ ret = sf_data(s, ss, &ss->ssch[i], iframe, SF_ASF); ++ if (ret < 0) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int sf_info_lfe(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch) ++{ ++ GetBitContext *gb = &s->gbc; ++ int n_msfbl_bits = get_msfbl_bits(s->frame_len_base); ++ int n_grp_bits = get_grp_bits(s, ssch); ++ ++ ssch->scp.long_frame = 1; ++ ssch->scp.max_sfb[0] = get_bits(gb, n_msfbl_bits); ++ ssch->scp.num_window_groups = 1; ++ ssch->scp.transf_length_idx[0] = 4; ++ ++ return asf_psy_elements(s, ss, ssch, n_grp_bits); ++} ++ ++static int mono_data(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch, int lfe, int iframe) ++{ ++ GetBitContext *gb = &s->gbc; ++ int spec_frontend; ++ int ret; ++ ++ if (lfe) { ++ spec_frontend = SF_ASF; ++ ret = sf_info_lfe(s, ss, ssch); ++ } else { ++ spec_frontend = (int)get_bits1(gb); ++ ret = sf_info(s, ss, ssch, spec_frontend, 0, 0); ++ } ++ if (ret < 0) ++ return ret; ++ av_log(s->avctx, AV_LOG_DEBUG, "channel: %d/1\n", 0); ++ return sf_data(s, ss, ssch, iframe, spec_frontend); ++} ++ ++static int channel_element_7x(AC4DecodeContext *s, int channel_mode, int iframe) ++{ ++ GetBitContext *gb = &s->gbc; ++ Substream *ss = &s->substream; ++ int ret = 0; ++ ++ ss->codec_mode = (int)get_bits(gb, 2); ++ av_log(s->avctx, AV_LOG_DEBUG, "codec_mode: %d\n", ss->codec_mode); ++ if (iframe) { ++ if (ss->codec_mode != CM_SIMPLE) ++ aspx_config(s, ss); ++ if (ss->codec_mode == CM_ASPX_ACPL_1) ++ acpl_config_1ch(s, ss, ACPL_PARTIAL); ++ if (ss->codec_mode == CM_ASPX_ACPL_2) ++ acpl_config_1ch(s, ss, ACPL_FULL); ++ } ++ ++ if (channel_mode == 6) { ++ ret = mono_data(s, ss, &ss->ssch[7], 1, iframe); ++ if (ret < 0) ++ return ret; ++ } ++ ++ if (ss->codec_mode == CM_ASPX_ACPL_1 || ++ ss->codec_mode == CM_ASPX_ACPL_2) ++ companding_control(s, ss, 5); ++ ++ ss->coding_config = get_bits(gb, 2); ++ switch (ss->coding_config) { ++ case 0: ++ break; ++ case 1: ++ break; ++ case 2: ++ ret = four_channel_data(s, ss, iframe); ++ break; ++ case 3: ++ ret = five_channel_data(s, ss, iframe); ++ break; ++ default: ++ av_assert0(0); ++ } ++ ++ return ret; ++} ++ ++static int three_channel_info(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch0, ++ SubstreamChannel *ssch1, ++ SubstreamChannel *ssch2) ++{ ++ GetBitContext *gb = &s->gbc; ++ int ret; ++ ++ ss->chel_matsel = get_bits(gb, 4); ++ ret = chparam_info(s, ss, ssch0); ++ if (ret < 0) ++ return ret; ++ return chparam_info(s, ss, ssch1); ++} ++ ++static int three_channel_data(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch0, ++ SubstreamChannel *ssch1, ++ SubstreamChannel *ssch2) ++{ ++ int ret; ++ ++ ret = sf_info(s, ss, ssch0, SF_ASF, 0, 0); ++ if (ret < 0) ++ return ret; ++ ++ memcpy(&ssch1->scp, &ssch0->scp, sizeof(ss->ssch[0].scp)); ++ memcpy(&ssch1->sect_sfb_offset, &ssch0->sect_sfb_offset, sizeof(ss->ssch[0].sect_sfb_offset)); ++ memcpy(&ssch1->offset2sfb, &ssch0->offset2sfb, sizeof(ss->ssch[0].offset2sfb)); ++ memcpy(&ssch1->offset2g, &ssch0->offset2g, sizeof(ss->ssch[0].offset2g)); ++ memcpy(&ssch1->win_offset, &ssch0->win_offset, sizeof(ss->ssch[0].win_offset)); ++ ++ memcpy(&ssch2->scp, &ssch0->scp, sizeof(ss->ssch[0].scp)); ++ memcpy(&ssch2->sect_sfb_offset, &ssch0->sect_sfb_offset, sizeof(ss->ssch[0].sect_sfb_offset)); ++ memcpy(&ssch2->offset2sfb, &ssch0->offset2sfb, sizeof(ss->ssch[0].offset2sfb)); ++ memcpy(&ssch2->offset2g, &ssch0->offset2g, sizeof(ss->ssch[0].offset2g)); ++ memcpy(&ssch2->win_offset, &ssch0->win_offset, sizeof(ss->ssch[0].win_offset)); ++ ++ ret = three_channel_info(s, ss, ssch0, ssch1, ssch2); ++ if (ret < 0) ++ return ret; ++ av_log(s->avctx, AV_LOG_DEBUG, "channel: %d/3\n", 0); ++ ret = sf_data(s, ss, ssch0, 0, SF_ASF); ++ if (ret < 0) ++ return ret; ++ av_log(s->avctx, AV_LOG_DEBUG, "channel: %d/3\n", 1); ++ ret = sf_data(s, ss, ssch1, 0, SF_ASF); ++ if (ret < 0) ++ return ret; ++ av_log(s->avctx, AV_LOG_DEBUG, "channel: %d/3\n", 2); ++ ret = sf_data(s, ss, ssch2, 0, SF_ASF); ++ if (ret < 0) ++ return ret; ++ ++ return 0; ++} ++ ++static int two_channel_data(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch0, ++ SubstreamChannel *ssch1, ++ int x) ++{ ++ GetBitContext *gb = &s->gbc; ++ int ret; ++ ++ if (get_bits_left(gb) <= 0) { ++ av_log(s->avctx, AV_LOG_ERROR, "two_channel_data underflow\n"); ++ return AVERROR_INVALIDDATA; ++ } ++ ++ ss->mdct_stereo_proc[x] = get_bits1(gb); ++ if (ss->mdct_stereo_proc[x]) { ++ ret = sf_info(s, ss, ssch0, SF_ASF, 0, 0); ++ if (ret < 0) ++ return ret; ++ ++ memcpy(&ssch1->scp, &ssch0->scp, sizeof(ss->ssch[0].scp)); ++ memcpy(&ssch1->sect_sfb_offset, &ssch0->sect_sfb_offset, sizeof(ss->ssch[0].sect_sfb_offset)); ++ memcpy(&ssch1->offset2sfb, &ssch0->offset2sfb, sizeof(ss->ssch[0].offset2sfb)); ++ memcpy(&ssch1->offset2g, &ssch0->offset2g, sizeof(ss->ssch[0].offset2g)); ++ memcpy(&ssch1->win_offset, &ssch0->win_offset, sizeof(ss->ssch[0].win_offset)); ++ ++ ret = chparam_info(s, ss, ssch0); ++ if (ret < 0) ++ return ret; ++ } else { ++ ret = sf_info(s, ss, ssch0, SF_ASF, 0, 0); ++ if (ret < 0) ++ return ret; ++ ret = sf_info(s, ss, ssch1, SF_ASF, 0, 0); ++ if (ret < 0) ++ return ret; ++ } ++ av_log(s->avctx, AV_LOG_DEBUG, "channel: %d/2\n", 0); ++ ret = sf_data(s, ss, ssch0, 0, SF_ASF); ++ if (ret < 0) ++ return ret; ++ av_log(s->avctx, AV_LOG_DEBUG, "channel: %d/2\n", 1); ++ ret = sf_data(s, ss, ssch1, 0, SF_ASF); ++ if (ret < 0) ++ return ret; ++ ++ return 0; ++} ++ ++static int channel_element_3x(AC4DecodeContext *s, int iframe) ++{ ++ GetBitContext *gb = &s->gbc; ++ Substream *ss = &s->substream; ++ int ret; ++ ++ ss->codec_mode = (int)get_bits1(gb); ++ av_log(s->avctx, AV_LOG_DEBUG, "codec_mode: %d\n", ss->codec_mode); ++ if (ss->codec_mode == CM_ASPX) { ++ if (iframe) ++ aspx_config(s, ss); ++ companding_control(s, ss, 3); ++ } ++ ++ ss->coding_config = get_bits1(gb); ++ switch (ss->coding_config) { ++ case 0: ++ ret = stereo_data(s, ss, iframe); ++ if (ret < 0) ++ return ret; ++ ret = mono_data(s, ss, &ss->ssch[2], 0, iframe); ++ if (ret < 0) ++ return ret; ++ break; ++ case 1: ++ ret = three_channel_data(s, ss, ++ &ss->ssch[0], ++ &ss->ssch[1], ++ &ss->ssch[2]); ++ if (ret < 0) ++ return ret; ++ break; ++ } ++ ++ if (ss->codec_mode == CM_ASPX) { ++ ret = aspx_data_2ch(s, ss, &ss->ssch[0], &ss->ssch[1], iframe); ++ if (ret < 0) ++ return ret; ++ ret = aspx_data_1ch(s, ss, &ss->ssch[2], iframe); ++ if (ret < 0) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int channel_element_5x(AC4DecodeContext *s, int lfe, int iframe) ++{ ++ GetBitContext *gb = &s->gbc; ++ Substream *ss = &s->substream; ++ int ret = 0; ++ ++ ss->codec_mode = (int)get_bits(gb, 3); ++ av_log(s->avctx, AV_LOG_DEBUG, "codec_mode: %d\n", ss->codec_mode); ++ if (iframe) { ++ if (ss->codec_mode != CM_SIMPLE) ++ aspx_config(s, ss); ++ if (ss->codec_mode == CM_ASPX_ACPL_1) ++ acpl_config_1ch(s, ss, ACPL_PARTIAL); ++ if (ss->codec_mode == CM_ASPX_ACPL_2) ++ acpl_config_1ch(s, ss, ACPL_FULL); ++ if (ss->codec_mode == CM_ASPX_ACPL_3) ++ acpl_config_2ch(s, ss); ++ } ++ ++ if (lfe) { ++ ret = mono_data(s, ss, &ss->ssch[5], 1, iframe); ++ if (ret < 0) ++ return ret; ++ } ++ ++ switch (ss->codec_mode) { ++ case CM_SIMPLE: ++ case CM_ASPX: ++ if (ss->codec_mode == CM_ASPX) ++ companding_control(s, ss, 5); ++ ++ ss->coding_config = get_bits(gb, 2); ++ av_log(s->avctx, AV_LOG_DEBUG, "coding_config: %d\n", ss->coding_config); ++ switch (ss->coding_config) { ++ case 0: { ++ int fl = 0, fr = 1, fc = 2, sl = 3, sr = 4; ++ ss->mode_2ch = get_bits1(gb); ++ av_log(s->avctx, AV_LOG_DEBUG, "2ch_mode: %d\n", ss->mode_2ch); ++ ret = two_channel_data(s, ss, &ss->ssch[fl], &ss->ssch[ss->mode_2ch ? sl : fr], 0); ++ if (ret < 0) ++ return ret; ++ ret = two_channel_data(s, ss, &ss->ssch[ss->mode_2ch ? fr : sl], &ss->ssch[sr], 1); ++ if (ret < 0) ++ return ret; ++ ret = mono_data(s, ss, &ss->ssch[fc], 0, iframe); ++ if (ret < 0) ++ return ret; ++ break; ++ } ++ case 1: ++ ret = three_channel_data(s, ss, &ss->ssch[0], &ss->ssch[1], &ss->ssch[2]); ++ if (ret < 0) ++ return ret; ++ ret = two_channel_data(s, ss, &ss->ssch[3], &ss->ssch[4], 0); ++ if (ret < 0) ++ return ret; ++ break; ++ case 2: ++ ret = four_channel_data(s, ss, iframe); ++ if (ret < 0) ++ return ret; ++ ret = mono_data(s, ss, &ss->ssch[4], 0, iframe); ++ if (ret < 0) ++ return ret; ++ break; ++ case 3: ++ ret = five_channel_data(s, ss, iframe); ++ if (ret < 0) ++ return ret; ++ break; ++ } ++ ++ if (ss->codec_mode == CM_ASPX) { ++ ret = aspx_data_2ch(s, ss, &ss->ssch[0], &ss->ssch[1], iframe); ++ if (ret < 0) ++ return ret; ++ ret = aspx_data_2ch(s, ss, &ss->ssch[2], &ss->ssch[3], iframe); ++ if (ret < 0) ++ return ret; ++ ret = aspx_data_1ch(s, ss, &ss->ssch[4], iframe); ++ if (ret < 0) ++ return ret; ++ } ++ break; ++ case CM_ASPX_ACPL_1: ++ case CM_ASPX_ACPL_2: ++ companding_control(s, ss, 3); ++ ss->coding_config = get_bits1(gb); ++ if (ss->coding_config) ++ ret = three_channel_data(s, ss, &ss->ssch[0], &ss->ssch[1], &ss->ssch[2]); ++ else ++ ret = two_channel_data(s, ss, &ss->ssch[0], &ss->ssch[1], 0); ++ if (ret < 0) ++ return ret; ++ ++ if (ss->codec_mode == CM_ASPX_ACPL_1) { ++ ss->max_sfb_master = (int)get_bits(gb, 5); // XXX ++ ret = chparam_info(s, ss, &ss->ssch[3]); ++ if (ret < 0) ++ return ret; ++ ret = chparam_info(s, ss, &ss->ssch[4]); ++ if (ret < 0) ++ return ret; ++ ret = sf_data(s, ss, &ss->ssch[3], iframe, SF_ASF); ++ if (ret < 0) ++ return ret; ++ ret = sf_data(s, ss, &ss->ssch[4], iframe, SF_ASF); ++ if (ret < 0) ++ return ret; ++ } ++ if (ss->coding_config == 0) { ++ ret = mono_data(s, ss, &ss->ssch[2], 0, iframe); ++ if (ret < 0) ++ return ret; ++ } ++ ++ ret = aspx_data_2ch(s, ss, &ss->ssch[0], &ss->ssch[1], iframe); ++ if (ret < 0) ++ return ret; ++ ret = aspx_data_1ch(s, ss, &ss->ssch[2], iframe); ++ if (ret < 0) ++ return ret; ++ ret = acpl_data_1ch(s, ss, &ss->ssch[0]); ++ if (ret < 0) ++ return ret; ++ ret = acpl_data_1ch(s, ss, &ss->ssch[1]); ++ if (ret < 0) ++ return ret; ++ break; ++ case CM_ASPX_ACPL_3: ++ companding_control(s, ss, 2); ++ ret = stereo_data(s, ss, iframe); ++ if (ret < 0) ++ return ret; ++ ret = aspx_data_2ch(s, ss, &ss->ssch[0], &ss->ssch[1], iframe); ++ if (ret < 0) ++ return ret; ++ ret = acpl_data_2ch(s, ss, &ss->ssch[0], &ss->ssch[1]); ++ if (ret < 0) ++ return ret; ++ break; ++ default: ++ av_log(s->avctx, AV_LOG_ERROR, "invalid codec mode: %d\n", ss->codec_mode); ++ return AVERROR_INVALIDDATA; ++ } ++ ++ return ret; ++} ++ ++static int single_channel_element(AC4DecodeContext *s, int iframe) ++{ ++ GetBitContext *gb = &s->gbc; ++ Substream *ss = &s->substream; ++ int ret = 0; ++ ++ ss->codec_mode = (int)get_bits1(gb); ++ av_log(s->avctx, AV_LOG_DEBUG, "codec_mode: %d\n", ss->codec_mode); ++ if (iframe) { ++ if (ss->codec_mode == CM_ASPX) ++ aspx_config(s, ss); ++ } ++ if (ss->codec_mode == CM_SIMPLE) { ++ ret = mono_data(s, ss, &ss->ssch[0], 0, iframe); ++ } else { ++ companding_control(s, ss, 1); ++ ret = mono_data(s, ss, &ss->ssch[0], 0, iframe); ++ if (ret < 0) ++ return ret; ++ ret = aspx_data_1ch(s, ss, &ss->ssch[0], iframe); ++ } ++ ++ return ret; ++} ++ ++static int audio_data(AC4DecodeContext *s, int channel_mode, int iframe) ++{ ++ int ret = 0; ++ ++ av_log(s->avctx, AV_LOG_DEBUG, "channel_mode: %d\n", channel_mode); ++ switch (channel_mode) { ++ case 0: ++ ret = single_channel_element(s, iframe); ++ break; ++ case 1: ++ ret = channel_pair_element(s, iframe); ++ break; ++ case 2: ++ ret = channel_element_3x(s, iframe); ++ break; ++ case 3: ++ ret = channel_element_5x(s, 0, iframe); ++ break; ++ case 4: ++ ret = channel_element_5x(s, 1, iframe); ++ break; ++ case 5: ++ ret = channel_element_7x(s, channel_mode, iframe); ++ break; ++ case 6: ++ ret = channel_element_7x(s, channel_mode, iframe); ++ break; ++ default: ++ av_assert0(0); ++ break; ++ } ++ ++ return ret; ++} ++ ++static int further_loudness_info(AC4DecodeContext *s, SubstreamInfo *ssi) ++{ ++ GetBitContext *gb = &s->gbc; ++ Metadata *m = &ssi->meta; ++ ++ m->loudness_version = (int)get_bits(gb, 2); ++ if (m->loudness_version == 3) ++ m->loudness_version += (int)get_bits(gb, 4); ++ ++ m->loud_prac_type = (int)get_bits(gb, 4); ++ if (m->loud_prac_type != 0) { ++ if (get_bits1(gb)) ++ m->dialgate_prac_type = (int)get_bits(gb, 3); ++ m->loudcorr_type = (int)get_bits1(gb); ++ } ++ ++ if (get_bits1(gb)) ++ m->loudrelgat = (int)get_bits(gb, 11); ++ ++ if (get_bits1(gb)) { ++ m->loudspchgat = (int)get_bits(gb, 11); ++ m->dialgate_prac_type = (int)get_bits(gb, 3); ++ } ++ ++ if (get_bits1(gb)) ++ m->loudstrm3s = (int)get_bits(gb, 11); ++ ++ if (get_bits1(gb)) ++ m->max_loudstrm3s = (int)get_bits(gb, 11); ++ ++ if (get_bits1(gb)) ++ m->truepk = (int)get_bits(gb, 11); ++ ++ if (get_bits1(gb)) ++ m->max_truepk = (int)get_bits(gb, 11); ++ ++ if (get_bits1(gb)) { ++ int prgmbndy_bit = 0; ++ ++ m->prgmbndy = 1; ++ while (prgmbndy_bit == 0) { ++ m->prgmbndy <<= 1; ++ prgmbndy_bit = (int)get_bits1(gb); ++ } ++ ++ m->end_or_start = (int)get_bits1(gb); ++ if (get_bits1(gb)) ++ m->prgmbndy_offset = (int)get_bits(gb, 11); ++ } ++ ++ if (get_bits1(gb)) { ++ m->lra = (int)get_bits(gb, 10); ++ m->lra_prac_type = (int)get_bits(gb, 3); ++ } ++ ++ if (get_bits1(gb)) ++ m->loudmntry = (int)get_bits(gb, 11); ++ ++ if (get_bits1(gb)) ++ m->max_loudmntry = (int)get_bits(gb, 11); ++ ++ if (get_bits1(gb)) { ++ int e_bits_size = (int)get_bits(gb, 5); ++ if (e_bits_size == 31) ++ e_bits_size += variable_bits(gb, 4); ++ skip_bits_long(gb, e_bits_size); ++ } ++ ++ return 0; ++} ++ ++static int channel_mode_contains_lfe(int channel_mode) ++{ ++ if (channel_mode == 4 || ++ channel_mode == 6 || ++ channel_mode == 8 || ++ channel_mode == 10) ++ return 1; ++ return 0; ++} ++ ++static int basic_metadata(AC4DecodeContext *s, SubstreamInfo *ssi) ++{ ++ GetBitContext *gb = &s->gbc; ++ Metadata *m = &ssi->meta; ++ ++ if (ssi->sus_ver == 0) ++ m->dialnorm_bits = (int)get_bits(gb, 7); ++ ++ if (get_bits1(gb)) { ++ if (get_bits1(gb)) ++ further_loudness_info(s, ssi); ++ if (ssi->channel_mode == 1) { ++ if (get_bits1(gb)) { ++ m->pre_dmixtyp_2ch = (int)get_bits(gb, 3); ++ m->phase90_info_2ch = (int)get_bits(gb, 2); ++ } ++ } ++ ++ if (ssi->channel_mode > 1) { ++ if (get_bits1(gb)) { ++ m->loro_center_mixgain = (int)get_bits(gb, 3); ++ m->loro_surround_mixgain = (int)get_bits(gb, 3); ++ if (get_bits1(gb)) ++ m->loro_dmx_loud_corr = (int)get_bits(gb, 5); ++ if (get_bits1(gb)) { ++ m->ltrt_center_mixgain = (int)get_bits(gb, 3); ++ m->ltrt_surround_mixgain = (int)get_bits(gb, 3); ++ } ++ if (get_bits1(gb)) ++ m->ltrt_dmx_loud_corr = (int)get_bits(gb, 5); ++ if (channel_mode_contains_lfe(ssi->channel_mode)) { ++ if (get_bits1(gb)) ++ m->lfe_mixgain = (int)get_bits(gb, 5); ++ } ++ m->preferred_dmx_method = (int)get_bits(gb, 2); ++ } ++ if (ssi->channel_mode == 3 || ++ ssi->channel_mode == 4) { ++ if (get_bits1(gb)) ++ m->pre_dmixtyp_5ch = (int)get_bits(gb, 3); ++ if (get_bits1(gb)) ++ m->pre_upmixtyp_5ch = (int)get_bits(gb, 4); ++ } ++ ++ if (ssi->channel_mode >= 5 && ssi->channel_mode <= 10) { ++ if (get_bits1(gb)) { ++ if (ssi->channel_mode >= 5 && ssi->channel_mode <= 6) { ++ m->pre_upmixtyp_3_4 = (int)get_bits(gb, 2); ++ } else if (ssi->channel_mode >= 9 && ssi->channel_mode <= 10) { ++ m->pre_upmixtyp_3_2_2 = (int)get_bits(gb, 1); ++ } ++ } ++ } ++ m->phase90_info_mc = (int)get_bits(gb, 2); ++ m->surround_attenuation_known = (int)get_bits1(gb); ++ m->lfe_attenuation_known = (int)get_bits1(gb); ++ } ++ ++ if (get_bits1(gb)) ++ m->dc_block_on = (int)get_bits1(gb); ++ } ++ return 0; ++} ++ ++static int extended_metadata(AC4DecodeContext *s) ++{ ++ return 0; ++} ++ ++static int drc_decoder_mode_config(AC4DecodeContext *s, SubstreamInfo *ssi) ++{ ++ return 0; ++} ++ ++static int drc_config(AC4DecodeContext *s, SubstreamInfo *ssi) ++{ ++ GetBitContext *gb = &s->gbc; ++ Metadata *m = &ssi->meta; ++ ++ m->drc_decoder_nr_modes = (int)get_bits(gb, 3); ++ for (int i = 0; i <= m->drc_decoder_nr_modes; i++) ++ drc_decoder_mode_config(s, ssi); ++ m->drc_eac3_profile = (int)get_bits(gb, 3); ++ ++ return 0; ++} ++ ++static int drc_data(AC4DecodeContext *s, SubstreamInfo *ssi) ++{ ++ return 0; ++} ++ ++static int drc_frame(AC4DecodeContext *s, SubstreamInfo *ssi, int iframe) ++{ ++ GetBitContext *gb = &s->gbc; ++ ++ if (get_bits1(gb)) { ++ if (iframe) ++ drc_config(s, ssi); ++ ++ drc_data(s, ssi); ++ } ++ ++ return 0; ++} ++ ++static int dialog_enhancement(AC4DecodeContext *s, int iframe) ++{ ++ return 0; ++} ++ ++static int emdf_payloads_substream(AC4DecodeContext *s) ++{ ++ return 0; ++} ++ ++static int metadata(AC4DecodeContext *s, SubstreamInfo *ssi, int iframe) ++{ ++ GetBitContext *gb = &s->gbc; ++ int tools_metadata_size; ++ ++ basic_metadata(s, ssi); ++ extended_metadata(s); ++ tools_metadata_size = (int)get_bits(gb, 7); ++ if (get_bits1(gb)) ++ tools_metadata_size += variable_bits(gb, 3) << 7; ++ ++ drc_frame(s, ssi, iframe); ++ ++ dialog_enhancement(s, iframe); ++ ++ if (get_bits1(gb)) ++ emdf_payloads_substream(s); ++ ++ return 0; ++} ++ ++static int ac4_substream(AC4DecodeContext *s, SubstreamInfo *ssinfo) ++{ ++ GetBitContext *gb = &s->gbc; ++ int audio_size, offset, consumed; ++ int ret; ++ ++ audio_size = (int)get_bits(gb, 15); ++ if (get_bits1(gb)) ++ audio_size += variable_bits(gb, 7) << 15; ++ if (audio_size > 131072) { ++ av_log(s->avctx, AV_LOG_ERROR, "invalid audio_size: %d\n", audio_size); ++ return AVERROR_INVALIDDATA; ++ } ++ ++ av_log(s->avctx, AV_LOG_DEBUG, "audio_size: %d\n", audio_size); ++ ++ align_get_bits(gb); ++ ++ offset = get_bits_count(gb) >> 3; ++ ret = audio_data(s, ssinfo->channel_mode, ssinfo->iframe[0]); ++ if (ret < 0) ++ return ret; ++ ++ align_get_bits(gb); ++ consumed = (get_bits_count(gb) >> 3) - offset; ++ if (consumed > audio_size) { ++ av_log(s->avctx, AV_LOG_ERROR, "substream audio data overread: %d\n", consumed - audio_size); ++ return AVERROR_INVALIDDATA; ++ } ++ if (consumed < audio_size) { ++ int non_zero = 0; ++ ++ for (int i = consumed; i < audio_size; i++) ++ non_zero += get_bits(gb, 8) != 0; ++ if (non_zero) ++ av_log(s->avctx, AV_LOG_WARNING, "substream audio data underread: %d\n", non_zero); ++ } ++ ++ metadata(s, ssinfo, s->iframe_global); ++ ++ align_get_bits(gb); ++ ++ return 0; ++} ++ ++static void spectral_reordering(AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++ float *scaled_spec = ssch->scaled_spec; ++ float *spec_reord = ssch->spec_reord; ++ int *win_offset = ssch->win_offset; ++ int k, win; ++ ++ k = 0; ++ win = 0; ++ memset(ssch->spec_reord, 0, sizeof(ssch->spec_reord)); ++ ++ for (int g = 0; g < ssch->scp.num_window_groups; g++) { ++ int transf_length_g = get_transf_length(s, ssch, g, NULL); ++ const uint16_t *sfb_offset = get_sfb_offset(transf_length_g); ++ int max_sfb = get_max_sfb(s, ssch, g); ++ ++ for (int sfb = 0; sfb < max_sfb; sfb++) { ++ for (int w = 0; w < ssch->scp.num_win_in_group[g]; w++) { ++ for (int l = sfb_offset[sfb]; l < sfb_offset[sfb+1]; l++) ++ spec_reord[win_offset[win+w] + l] = scaled_spec[k++]; ++ } ++ } ++ win += ssch->scp.num_win_in_group[g]; ++ } ++} ++ ++static int compute_window(AC4DecodeContext *s, float *w, int N, ++ int N_prev, int Nfull, int dir) ++{ ++ const uint16_t *transf_lengths = transf_length_48khz[s->frame_len_base_idx]; ++ float *kernel; ++ int i, idx, N_w, N_skip; ++ ++ if (N <= N_prev) ++ N_w = N; ++ if (N > N_prev) ++ N_w = N_prev; ++ ++ for (i = 0; i < 5; i++) { ++ if (transf_lengths[i] == N_w) { ++ idx = i; ++ break; ++ } ++ } ++ ++ av_assert0(i < 5); ++ ++ N_skip = (N - N_w) / 2; ++ kernel = s->kbd_window[s->frame_len_base_idx][idx]; ++ ++ for (int n = 0; n < N; n++) { ++ if (n >= 0 && n < N_skip) ++ w[n] = (float)dir; ++ else if (n >= N_skip && n < N_w + N_skip) ++ w[n] = !dir ? kernel[n - N_skip] : kernel[N_w - n + N_skip - 1]; ++ else if (n >= N_w + N_skip && n < N_w + 2 * N_skip) ++ w[n] = (float)!dir; ++ else ++ av_assert0(0); ++ } ++ ++ return 0; ++} ++ ++static void scale_spec(AC4DecodeContext *s, int ch) ++{ ++ Substream *ss = &s->substream; ++ SubstreamChannel *ssch = &ss->ssch[ch]; ++ const float *quant_lut = s->quant_lut; ++ ++ memset(ssch->scaled_spec, 0, sizeof(ssch->scaled_spec)); ++ ++ for (int k = 0; k < s->frame_len_base; k++) { ++ int x = ssch->quant_spec[k]; ++ int sfb = ssch->offset2sfb[k]; ++ int g = ssch->offset2g[k]; ++ ++ ssch->scaled_spec[k] = ssch->sf_gain[g][sfb] * copysignf(quant_lut[FFABS(x)], (float)x); ++ } ++} ++ ++static int two_channel_processing(AC4DecodeContext *s, Substream *ss, ++ SubstreamChannel *ssch0, ++ SubstreamChannel *ssch1) ++{ ++ int max_sfb_prev; ++ float sap_gain; ++ ++ memset(&ss->alpha_q, 0, sizeof(ss->alpha_q)); ++ ++ max_sfb_prev = get_max_sfb(s, ssch0, 0); ++ for (int g = 0; g < ssch0->scp.num_window_groups; g++) { ++ int max_sfb_g = get_max_sfb(s, ssch0, g); ++ ++ for (int sfb = 0; sfb < max_sfb_g; sfb++) { ++ float m[2][2]; ++ ++ if (ssch0->sap_mode == 0 || ++ (ssch0->sap_mode == 1 && ssch0->ms_used[g][sfb] == 0)) { ++ m[0][0] = m[1][1] = 1; ++ m[0][1] = m[1][0] = 0; ++ } else if (ssch0->sap_mode == 2 || ++ ((ssch0->sap_mode == 1 && ssch0->ms_used[g][sfb] == 1))) { ++ m[0][0] = ++ m[0][1] = ++ m[1][0] = 1; ++ m[1][1] = -1; ++ } else { // sap_mode == 3 ++ if (ssch0->sap_coeff_used[g][sfb]) { // setup alpha_q[g][sfb] ++ if (sfb & 1) { ++ ss->alpha_q[g][sfb] = ss->alpha_q[g][sfb-1]; ++ } else { ++ float delta = (float)(ssch0->dpcm_alpha_q[g][sfb] - 60); ++ int code_delta; ++ ++ if ((g == 0) || (max_sfb_g != max_sfb_prev)) { ++ code_delta = 0; ++ } else { ++ code_delta = ssch0->delta_code_time; ++ } ++ ++ if (code_delta) { ++ ss->alpha_q[g][sfb] = ss->alpha_q[g-1][sfb] + delta; ++ } else if (sfb == 0) { ++ ss->alpha_q[g][sfb] = delta; ++ } else { ++ ss->alpha_q[g][sfb] = ss->alpha_q[g][sfb-2] + delta; ++ } ++ } ++ // inverse quantize alpha_q[g][sfb] ++ sap_gain = ss->alpha_q[g][sfb] * 0.1f; ++ m[0][0] = 1 + sap_gain; ++ m[0][1] = 1; ++ m[1][0] = 1 - sap_gain; ++ m[1][1] = -1; ++ } else { ++ m[0][0] = 1; ++ m[0][1] = 0; ++ m[1][0] = 0; ++ m[1][1] = 1; ++ } ++ } ++ ++ memcpy(&ss->matrix_stereo[g][sfb], m, sizeof(m)); ++ } ++ ++ max_sfb_prev = max_sfb_g; ++ } ++ ++ for (int k = 0; k < s->frame_len_base; k++) { ++ int sfb = ssch0->offset2sfb[k]; ++ int g = ssch0->offset2g[k]; ++ float a = ss->matrix_stereo[g][sfb][0][0]; ++ float b = ss->matrix_stereo[g][sfb][0][1]; ++ float c = ss->matrix_stereo[g][sfb][1][0]; ++ float d = ss->matrix_stereo[g][sfb][1][1]; ++ float i0 = ssch0->scaled_spec[k]; ++ float i1 = ssch1->scaled_spec[k]; ++ float o0, o1; ++ ++ o0 = i0 * a + i1 * b; ++ o1 = i0 * c + i1 * d; ++ ++ ssch0->scaled_spec[k] = o0; ++ ssch1->scaled_spec[k] = o1; ++ } ++ ++ return 0; ++} ++ ++static int stereo_processing(AC4DecodeContext *s, Substream *ss) ++{ ++ if (ss->mdct_stereo_proc[0]) ++ two_channel_processing(s, ss, &ss->ssch[0], &ss->ssch[1]); ++ ++ return 0; ++} ++ ++static int m5channel_processing(AC4DecodeContext *s, Substream *ss) ++{ ++ switch (ss->codec_mode) { ++ case CM_SIMPLE: ++ case CM_ASPX: ++ switch (ss->coding_config) { ++ case 0: ++ if (ss->mdct_stereo_proc[0]) ++ two_channel_processing(s, ss, &ss->ssch[0], &ss->ssch[1]); ++ if (ss->mdct_stereo_proc[1]) ++ two_channel_processing(s, ss, &ss->ssch[2], &ss->ssch[3]); ++ break; ++ } ++ break; ++ case CM_ASPX_ACPL_1: ++ case CM_ASPX_ACPL_2: ++ case CM_ASPX_ACPL_3: ++ switch (ss->coding_config) { ++ case 0: ++ if (ss->mdct_stereo_proc[0]) ++ two_channel_processing(s, ss, &ss->ssch[0], &ss->ssch[1]); ++ break; ++ } ++ break; ++ } ++ ++ return 0; ++} ++ ++static void qmf_analysis(AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++ float *qmf_filt = ssch->qmf_filt; ++ float *pcm = ssch->pcm; ++ LOCAL_ALIGNED_32(float, u, [128]); ++ LOCAL_ALIGNED_32(float, z, [640]); ++ ++ for (int ts = 0; ts < s->num_qmf_timeslots; ts++) { ++ /* shift time-domain input samples by 64 */ ++ memmove(qmf_filt + 64, qmf_filt, sizeof(*qmf_filt) * (640 - 64)); ++ ++ /* feed new audio samples */ ++ for (int sb = 63; sb >= 0; sb--) ++ qmf_filt[sb] = pcm[ts * 64 + 63 - sb] / 32768.f; ++ ++ /* multiply input samples by window coefficients */ ++ s->fdsp->vector_fmul(z, qmf_filt, qwin, 640); ++ ++ /* sum the samples to create vector u */ ++ for (int n = 0; n < 128; n++) { ++ u[n] = z[n]; ++ for (int k = 1; k < 5; k++) ++ u[n] += z[n + k * 128]; ++ } ++ ++ /* compute 64 new subband samples */ ++ for (int sb = 0; sb < 64; sb++) { ++ float *cos_atab = s->cos_atab[sb]; ++ float *sin_atab = s->sin_atab[sb]; ++ ++ ssch->Q[0][ts][sb] = s->fdsp->scalarproduct_float(u, cos_atab, 128); ++ ssch->Q[1][ts][sb] = s->fdsp->scalarproduct_float(u, sin_atab, 128); ++ } ++ } ++} ++ ++static void qmf_synthesis(AC4DecodeContext *s, SubstreamChannel *ssch, float *pcm) ++{ ++ float *qsyn_filt = ssch->qsyn_filt; ++ LOCAL_ALIGNED_32(float, g, [640]); ++ LOCAL_ALIGNED_32(float, w, [640]); ++ ++ for (int ts = 0; ts < s->num_qmf_timeslots; ts++) { ++ /* shift samples by 128 */ ++ memmove(qsyn_filt + 128, qsyn_filt, sizeof(*qsyn_filt) * (1280 - 128)); ++ ++ for (int n = 0; n < 128; n++) { ++ float *cos_stab = s->cos_stab[n]; ++ float *sin_stab = s->sin_stab[n]; ++ ++ qsyn_filt[n] = s->fdsp->scalarproduct_float(ssch->Q[0][ts], cos_stab, 64) - ++ s->fdsp->scalarproduct_float(ssch->Q[1][ts], sin_stab, 64); ++ } ++ ++ for (int n = 0; n < 5; n++) { ++ memcpy(g + 128 * n, qsyn_filt + 256 * n, 64 * sizeof(float)); ++ memcpy(g + 128 * n + 64, qsyn_filt + 256 * n + 192, 64 * sizeof(float)); ++ } ++ /* multiply by window coefficients */ ++ s->fdsp->vector_fmul(w, g, qwin, 640); ++ ++ /* compute 64 new time-domain output samples */ ++ for (int sb = 0; sb < 64; sb++) { ++ float temp = 0; ++ ++ for (int n = 0; n < 10; n++) ++ temp += w[64*n + sb]; ++ pcm[ts*64 + sb] = temp; ++ } ++ } ++} ++ ++static void spectral_synthesis(AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++ LOCAL_ALIGNED_32(float, in, [2048]); ++ LOCAL_ALIGNED_32(float, x, [4096]); ++ const int *win_offset = ssch->win_offset; ++ float *overlap = ssch->overlap; ++ float *winl = s->winl; ++ float *winr = s->winr; ++ float *pcm = ssch->pcm; ++ int Nfull = s->frame_len_base; ++ int nskip, nskip_prev; ++ int win = 0; ++ ++ for (int g = 0; g < ssch->scp.num_window_groups; g++) { ++ int midx = s->frame_len_base_idx; ++ int idx; ++ int N = get_transf_length(s, ssch, g, &idx); ++ ++ if (!ssch->N_prev) ++ ssch->N_prev = Nfull; ++ ++ compute_window(s, winl, N, ssch->N_prev, Nfull, 0); ++ compute_window(s, winr, ssch->N_prev, N, Nfull, 1); ++ ++ for (int w = 0; w < ssch->scp.num_win_in_group[g]; w++) { ++ nskip = (Nfull - N) / 2; ++ nskip_prev = (Nfull - ssch->N_prev) / 2; ++ ++ memcpy(in, ssch->spec_reord + win_offset[win + w], N * 4); ++ ++#if 0 ++ s->tx_fn[midx][idx](s->tx_ctx[midx][idx], x, in, sizeof(float)); ++ ++ s->fdsp->vector_fmul_window(pcm + win_offset[win + w], overlap + nskip, x, ++ s->kbd_window[midx][idx], N >> 1); ++ memcpy(overlap + nskip, x + (N >> 1), (sizeof(float) * N) >> 1); ++#else ++ s->tx_fn[midx][idx](s->tx_ctx[midx][idx], x + (N >> 1), in, sizeof(float)); ++ ++ for (int n = 0; n < N >> 1; n++) { ++ x[n] = -x[N-n-1]; ++ x[N*2-n-1] = x[N+n]; ++ } ++ ++ for (int n = 0; n < N / 4; n++) { ++ x[2*n ] *= winl[2*n ]; ++ x[2*n+1] *= winl[2*n+1]; ++ x[N/2+2*n ] *= winl[N/2+2*n ]; ++ x[N/2+2*n+1] *= winl[N/2+2*n+1]; ++ } ++ ++ /* window second half of previous block */ ++ for (int n = 0; n < ssch->N_prev; n++) ++ overlap[nskip_prev + n] *= winr[n]; ++ ++ /* overlap/add using first N samples from x[n] */ ++ for (int n = 0; n < N; n++) ++ overlap[nskip + n] += x[n]; ++ ++ /* output pcm */ ++ for (int n = 0; n < N; n++) ++ pcm[win_offset[win + w] + n] = overlap[n]; ++ /* move samples in overlap[] not stored in pcm[] */ ++ for (int n = 0; n < nskip; n++) ++ overlap[n] = overlap[N+n]; ++ ++ /* store second N samples from x[n] for next overlap/add */ ++ for (int n = 0; n < N; n++) ++ overlap[nskip + n] = x[N+n]; ++#endif ++ } ++ ++ ssch->N_prev = N; ++ ++ win += ssch->scp.num_win_in_group[g]; ++ } ++} ++ ++static int polyfit(int order, ++ int countOfElements, ++ const float *const dependentValues, ++ const float *const independentValues, ++ float *coefficients) ++{ ++ enum {maxOrder = 5}; ++ float B[maxOrder+1] = {0.0f}; ++ float P[((maxOrder+1) * 2)+1] = {0.0f}; ++ float A[(maxOrder + 1)*2*(maxOrder + 1)] = {0.0f}; ++ float x, y, powx; ++ int ii, jj, kk; ++ ++ // This method requires that the countOfElements > ++ // (order+1) ++ if (countOfElements <= order) ++ return -1; ++ ++ // This method has imposed an arbitrary bound of ++ // order <= maxOrder. Increase maxOrder if necessary. ++ if (order > maxOrder) ++ return -1; ++ ++ // Identify the column vector ++ for (ii = 0; ii < countOfElements; ii++) { ++ x = dependentValues[ii]; ++ y = independentValues[ii]; ++ powx = 1; ++ ++ for (jj = 0; jj < (order + 1); jj++) { ++ B[jj] = B[jj] + (y * powx); ++ powx = powx * x; ++ } ++ } ++ ++ // Initialize the PowX array ++ P[0] = countOfElements; ++ // Compute the sum of the Powers of X ++ for (ii = 0; ii < countOfElements; ii++) { ++ x = dependentValues[ii]; ++ powx = dependentValues[ii]; ++ ++ for (jj = 1; jj < (2 * (order + 1)) + 1; jj++) { ++ P[jj] = P[jj] + powx; ++ powx = powx * x; ++ } ++ } ++ ++ // Initialize the reduction matrix ++ // ++ for (ii = 0; ii < (order + 1); ii++) { ++ for (jj = 0; jj < (order + 1); jj++) { ++ A[(ii * (2 * (order + 1))) + jj] = P[ii+jj]; ++ } ++ A[(ii*(2 * (order + 1))) + (ii + (order + 1))] = 1; ++ } ++ ++ // Move the Identity matrix portion of the redux matrix ++ // to the left side (find the inverse of the left side ++ // of the redux matrix ++ for (ii = 0; ii < (order + 1); ii++) { ++ x = A[(ii * (2 * (order + 1))) + ii]; ++ if (x != 0) { ++ for (kk = 0; kk < (2 * (order + 1)); kk++) { ++ A[(ii * (2 * (order + 1))) + kk] = ++ A[(ii * (2 * (order + 1))) + kk] / x; ++ } ++ ++ for (jj = 0; jj < (order + 1); jj++) { ++ if ((jj - ii) != 0) { ++ y = A[(jj * (2 * (order + 1))) + ii]; ++ for (kk = 0; kk < (2 * (order + 1)); kk++) { ++ A[(jj * (2 * (order + 1))) + kk] = ++ A[(jj * (2 * (order + 1))) + kk] - ++ y * A[(ii * (2 * (order + 1))) + kk]; ++ } ++ } ++ } ++ } else { // Cannot work with singular matrices ++ return -1; ++ } ++ } ++ ++ // Calculate and Identify the coefficients ++ for (ii = 0; ii < order + 1; ii++) { ++ for (jj = 0; jj < order + 1; jj++) { ++ x = 0; ++ for (kk = 0; kk < (order + 1); kk++) { ++ x = x + (A[(ii * (2 * (order + 1))) + (kk + (order + 1))] * ++ B[kk]); ++ } ++ coefficients[ii] = x; ++ } ++ } ++ ++ return 0; ++} ++ ++static int get_qsignal_scale_factors(AC4DecodeContext *s, SubstreamChannel *ssch, int ch) ++{ ++ int sbg_idx_high2low[24] = { 0 }; ++ int sbg_idx_low2high[24] = { 0 }; ++ int sbg_low = 0; ++ int delta; ++ ++ for (int sbg = 0; sbg < ssch->num_sbg_sig_highres; sbg++) { ++ if (ssch->sbg_sig_lowres[sbg_low+1] == ssch->sbg_sig_highres[sbg]) { ++ sbg_low++; ++ sbg_idx_low2high[sbg_low] = sbg; ++ } ++ sbg_idx_high2low[sbg] = sbg_low; ++ } ++ ++ delta = ((ch == 1) && (ssch->aspx_balance == 1)) + 1; ++ ++ memcpy(ssch->qscf_sig_sbg_prev, ssch->qscf_sig_sbg, sizeof(ssch->qscf_sig_sbg)); ++ memset(ssch->qscf_sig_sbg, 0, sizeof(ssch->qscf_sig_sbg)); ++ ++ /* Loop over Envelopes */ ++ for (int atsg = 0; atsg < ssch->aspx_num_env; atsg++) { ++ /* Loop over scale factor subband groups */ ++ for (int sbg = 0; sbg < ssch->num_sbg_sig[atsg]; sbg++) { ++ if (atsg == 0) { ++ ssch->atsg_freqres_prev[atsg] = ssch->atsg_freqres[ssch->aspx_num_env_prev - 1]; ++ ssch->qscf_prev[atsg][sbg] = ssch->qscf_sig_sbg_prev[ssch->aspx_num_env_prev - 1][sbg]; ++ } else { ++ ssch->atsg_freqres_prev[atsg] = ssch->atsg_freqres[atsg-1]; ++ ssch->qscf_prev[atsg][sbg] = ssch->qscf_sig_sbg[atsg-1][sbg]; ++ } ++ if (ssch->aspx_sig_delta_dir[atsg] == 0) { /* FREQ */ ++ ssch->qscf_sig_sbg[atsg][sbg] = 0; ++ for (int i = 0; i <= sbg; i++) { ++ ssch->qscf_sig_sbg[atsg][sbg] += delta * ssch->aspx_data[0][atsg][i]; ++ } ++ } else { /* TIME */ ++ if (ssch->atsg_freqres[atsg] == ssch->atsg_freqres_prev[atsg]) { ++ ssch->qscf_sig_sbg[atsg][sbg] = ssch->qscf_prev[atsg][sbg]; ++ ssch->qscf_sig_sbg[atsg][sbg] += delta * ssch->aspx_data[0][atsg][sbg]; ++ } else if ((ssch->atsg_freqres[atsg] == 0) && (ssch->atsg_freqres_prev[atsg] == 1)) { ++ ssch->qscf_sig_sbg[atsg][sbg] = ssch->qscf_prev[atsg][sbg_idx_low2high[sbg]]; ++ ssch->qscf_sig_sbg[atsg][sbg] += delta * ssch->aspx_data[0][atsg][sbg]; ++ } else if ((ssch->atsg_freqres[atsg] == 1) && (ssch->atsg_freqres_prev[atsg] == 0)) { ++ ssch->qscf_sig_sbg[atsg][sbg] = ssch->qscf_prev[atsg][sbg_idx_high2low[sbg]]; ++ ssch->qscf_sig_sbg[atsg][sbg] += delta * ssch->aspx_data[0][atsg][sbg]; ++ } ++ } ++ } ++ } ++ ++ return 0; ++} ++ ++static int get_qnoise_scale_factors(AC4DecodeContext *s, SubstreamChannel *ssch, int ch) ++{ ++ int delta; ++ ++ delta = ((ch == 1) && (ssch->aspx_balance == 1)) + 1; ++ ++ memcpy(ssch->qscf_noise_prev, ssch->qscf_noise_sbg, sizeof(ssch->qscf_noise_sbg)); ++ memset(ssch->qscf_noise_sbg, 0, sizeof(ssch->qscf_noise_sbg)); ++ ++ /* Loop over envelopes */ ++ for (int atsg = 0; atsg < ssch->aspx_num_noise; atsg++) { ++ /* Loop over noise subband groups */ ++ for (int sbg = 0; sbg < ssch->num_sbg_noise; sbg++) { ++ if (ssch->aspx_noise_delta_dir[atsg] == 0) { /* FREQ */ ++ for (int i = 0; i <= sbg; i++) { ++ ssch->qscf_noise_sbg[atsg][sbg] += delta * ssch->aspx_data[1][atsg][sbg]; ++ } ++ } else { /* TIME */ ++ if (atsg == 0) { ++ ssch->qscf_noise_sbg[atsg][sbg] = ssch->qscf_noise_prev[ssch->aspx_num_noise_prev-1][sbg]; ++ ssch->qscf_noise_sbg[atsg][sbg] += delta * ssch->aspx_data[1][atsg][sbg]; ++ } else { ++ ssch->qscf_noise_sbg[atsg][sbg] = ssch->qscf_noise_sbg[atsg-1][sbg]; ++ ssch->qscf_noise_sbg[atsg][sbg] += delta * ssch->aspx_data[1][atsg][sbg]; ++ } ++ } ++ } ++ } ++ ++ return 0; ++} ++ ++static void prepare_channel(AC4DecodeContext *s, int ch) ++{ ++ Substream *ss = &s->substream; ++ SubstreamChannel *ssch = &ss->ssch[ch]; ++ ++ spectral_reordering(s, ssch); ++ spectral_synthesis(s, ssch); ++ ++ qmf_analysis(s, ssch); ++} ++ ++static void aspx_processing(AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++ memcpy(ssch->Q_low_prev, ssch->Q_low, sizeof(ssch->Q_low)); ++ ++ for (int ts = 0; ts < s->ts_offset_hfgen; ts++) { ++ for (int sb = 0; sb < 64; sb++) { ++ if (sb < ssch->sbx) { ++ ssch->Q_low[0][ts][sb] = ssch->Q_prev[0][ts + s->num_qmf_timeslots - s->ts_offset_hfgen][sb]; ++ ssch->Q_low[1][ts][sb] = ssch->Q_prev[1][ts + s->num_qmf_timeslots - s->ts_offset_hfgen][sb]; ++ } ++ } ++ } ++ ++ for (int ts = s->ts_offset_hfgen; ts < s->num_qmf_timeslots + s->ts_offset_hfgen; ts++) { ++ for (int sb = 0; sb < 64; sb++) { ++ if (sb < ssch->sbx) { ++ ssch->Q_low[0][ts][sb] = ssch->Q[0][ts - s->ts_offset_hfgen][sb]; ++ ssch->Q_low[1][ts][sb] = ssch->Q[1][ts - s->ts_offset_hfgen][sb]; ++ } ++ } ++ } ++} ++ ++static void mono_deq_signal_factors(AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++ float a = (ssch->aspx_qmode_env == 0) + 1; ++ ++ memset(ssch->scf_sig_sbg, 0, sizeof(ssch->scf_sig_sbg)); ++ ++ for (int atsg = 0; atsg < ssch->aspx_num_env; atsg++) { ++ for (int sbg = 0; sbg < ssch->num_sbg_sig[atsg]; sbg++) ++ ssch->scf_sig_sbg[atsg][sbg] = 64.f * powf(2, ssch->qscf_sig_sbg[atsg][sbg] / a); ++ ++ if (ssch->aspx_sig_delta_dir[atsg] == 0 && ++ ssch->qscf_sig_sbg[atsg][0] == 0 && ++ ssch->scf_sig_sbg[atsg][1] < 0) { ++ ssch->scf_sig_sbg[atsg][0] = ssch->scf_sig_sbg[atsg][1]; ++ } ++ } ++} ++ ++static void mono_deq_noise_factors(AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++#define NOISE_FLOOR_OFFSET 6 ++ ++ memset(ssch->scf_noise_sbg, 0, sizeof(ssch->scf_noise_sbg)); ++ ++ for (int atsg = 0; atsg < ssch->aspx_num_noise; atsg++) { ++ for (int sbg = 0; sbg < ssch->num_sbg_noise; sbg++) ++ ssch->scf_noise_sbg[atsg][sbg] = powf(2, NOISE_FLOOR_OFFSET - ssch->qscf_noise_sbg[atsg][sbg]); ++ } ++} ++ ++static void stereo_deq_signoise_factors(AC4DecodeContext *s, ++ SubstreamChannel *ssch0, ++ SubstreamChannel *ssch1) ++{ ++#define PAN_OFFSET 12 ++ ++ float a = 1 + (ssch0->aspx_qmode_env == 0); ++ ++ for (int atsg = 0; atsg < ssch0->aspx_num_env; atsg++) { ++ for (int sbg = 0; sbg < ssch0->num_sbg_sig[atsg]; sbg++) { ++ float nom = 64.f * powf(2, ssch0->qscf_sig_sbg[atsg][sbg] / a + 1); ++ float denom_a = 1 + powf(2, PAN_OFFSET - ssch1->qscf_sig_sbg[atsg][sbg] / a); ++ float denom_b = 1 + powf(2, ssch1->qscf_sig_sbg[atsg][sbg] / a - PAN_OFFSET); ++ ++ ssch0->scf_sig_sbg[atsg][sbg] = nom / denom_a; ++ ssch1->scf_sig_sbg[atsg][sbg] = nom / denom_b; ++ } ++ } ++ ++ for (int atsg = 0; atsg < ssch0->aspx_num_noise; atsg++) { ++ for (int sbg = 0; sbg < ssch0->num_sbg_noise; sbg++) { ++ float nom = powf(2, NOISE_FLOOR_OFFSET - ssch0->qscf_noise_sbg[atsg][sbg] + 1); ++ float denom_a = 1 + powf(2, PAN_OFFSET - ssch1->qscf_noise_sbg[atsg][sbg]); ++ float denom_b = 1 + powf(2, ssch1->qscf_noise_sbg[atsg][sbg] - PAN_OFFSET); ++ ++ ssch0->scf_noise_sbg[atsg][sbg] = nom / denom_a; ++ ssch1->scf_noise_sbg[atsg][sbg] = nom / denom_b; ++ } ++ } ++} ++ ++static void preflattening(AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++ float mean_energy = 0; ++ int polynomial_order = 3; ++ int num_qmf_subbands = ssch->sbx; ++ float poly_array[64]; ++ float pow_env[64]; ++ float slope[64]; ++ float x[64]; ++ ++ for (int i = 0; i < num_qmf_subbands; i++) { ++ x[i] = i; ++ slope[i] = 0; ++ } ++ /* Calculate the spectral signal envelope in dB over the current interval. */ ++ for (int sb = 0; sb < num_qmf_subbands; sb++) { ++ pow_env[sb] = 0; ++ for (int ts = ssch->atsg_sig[0] * s->num_ts_in_ats; ts < ssch->atsg_sig[ssch->aspx_num_env] * s->num_ts_in_ats; ts++) { ++ pow_env[sb] += powf(ssch->Q_low[0][ts][sb], 2); ++ pow_env[sb] += powf(ssch->Q_low[1][ts][sb], 2); ++ } ++ pow_env[sb] /= (ssch->atsg_sig[ssch->aspx_num_env] - ssch->atsg_sig[0]) * s->num_ts_in_ats; ++ pow_env[sb] = 10 * log10f(pow_env[sb] + 1); ++ mean_energy += pow_env[sb]; ++ } ++ ++ mean_energy /= num_qmf_subbands; ++ polyfit(polynomial_order, num_qmf_subbands, x, pow_env, poly_array); ++ ++ /* Transform polynomial into slope */ ++ for (int k = polynomial_order; k >= 0; k--) { ++ for (int sb = 0; sb < num_qmf_subbands; sb++) ++ slope[sb] += powf(x[sb], k) * poly_array[k]; ++ } ++ ++ /* Derive a gain vector from the slope */ ++ for (int sb = 0; sb < num_qmf_subbands; sb++) { ++ ssch->gain_vec[sb] = powf(10, (mean_energy - slope[sb]) / 20.f); ++ } ++} ++ ++static void get_chirps(AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++ memcpy(ssch->chirp_arr_prev, ssch->chirp_arr, sizeof(ssch->chirp_arr)); ++ ++ for (int sbg = 0; sbg < ssch->num_sbg_noise; sbg++) { ++ float new_chirp = new_chirp_tab[ssch->aspx_tna_mode[sbg]][ssch->aspx_tna_mode_prev[sbg]]; ++ ++ if (new_chirp < ssch->chirp_arr_prev[sbg]) { ++ new_chirp = 0.75000f * new_chirp + 0.25000f * ssch->chirp_arr_prev[sbg]; ++ } else { ++ new_chirp = 0.90625f * new_chirp + 0.09375f * ssch->chirp_arr_prev[sbg]; ++ } ++ ++ if (new_chirp < 0.015625f) { ++ ssch->chirp_arr[sbg] = 0.f; ++ } else { ++ ssch->chirp_arr[sbg] = new_chirp; ++ } ++ } ++} ++ ++static void fcomplex_mul(float *r, float *i, float x, float yi, float u, float vi) ++{ ++ *r = x*u - yi*vi; ++ *i = x*vi + u*yi; ++} ++ ++static void get_covariance(AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++ int num_ts_ext; ++ int ts_offset_hfadj = 4; ++ ++ /* Create an additional delay of ts_offset_hfadj QMF time slots */ ++ for (int sb = 0; sb < ssch->sba; sb++) { ++ int ts_offset_prev = s->num_qmf_timeslots - ts_offset_hfadj; ++ ++ for (int ts = 0; ts < ts_offset_hfadj; ts++) { ++ ssch->Q_low_ext[0][ts][sb] = ssch->Q_low_prev[0][ts + ts_offset_prev][sb]; ++ ssch->Q_low_ext[1][ts][sb] = ssch->Q_low_prev[1][ts + ts_offset_prev][sb]; ++ } ++ ++ for (int ts = 0; ts < s->num_qmf_timeslots + s->ts_offset_hfgen; ts++) { ++ ssch->Q_low_ext[0][ts + ts_offset_hfadj][sb] = ssch->Q_low[0][ts][sb]; ++ ssch->Q_low_ext[1][ts + ts_offset_hfadj][sb] = ssch->Q_low[1][ts][sb]; ++ } ++ } ++ ++ num_ts_ext = s->num_qmf_timeslots + s->ts_offset_hfgen + ts_offset_hfadj; ++ /* Loop over QMF subbands */ ++ for (int sb = 0; sb < ssch->sba; sb++) { ++ for (int i = 0; i < 3; i++) { ++ for (int j = 1; j < 3; j++) { ++ ssch->cov[sb][i][j][0] = 0; ++ ssch->cov[sb][i][j][1] = 0; ++ /* Loop over QMF time slots */ ++ for (int ts = ts_offset_hfadj; ts < num_ts_ext; ts += 2) { ++ float re, im; ++ ++ fcomplex_mul(&re, &im, ++ ssch->Q_low_ext[0][ts - 2*i][sb], ssch->Q_low_ext[1][ts - 2*i][sb], ++ ssch->Q_low_ext[0][ts - 2*j][sb], -ssch->Q_low_ext[1][ts - 2*j][sb]); ++ ++ ssch->cov[sb][i][j][0] += re; ++ ssch->cov[sb][i][j][1] += im; ++ } ++ } ++ } ++ } ++} ++ ++static float sqr(float a, float b) ++{ ++ return a * a + b * b; ++} ++ ++static void fcomplex_div(float *r, float *i, float x, float yi, float u, float vi) ++{ ++ *r = (x*u + yi*vi) / sqr(u, vi); ++ *i = (x*vi - u*yi) / sqr(u, vi); ++} ++ ++static void get_alphas(AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++ float EPSILON_INV = powf(2.f, -20.f); ++ ++ for (int sb = 0; sb < ssch->sba; sb++) { ++ float denom[2]; ++ ++ fcomplex_mul(&denom[0], &denom[1], ssch->cov[sb][2][2][0], ssch->cov[sb][2][2][1], ssch->cov[sb][1][1][0], ssch->cov[sb][1][1][1]); ++ denom[0] -= sqr(ssch->cov[sb][1][2][0], ssch->cov[sb][1][2][1]) / (1.f+EPSILON_INV); ++ if (sqr(denom[0], denom[1]) <= 1e-6f) { ++ ssch->alpha1[sb][0] = 0; ++ ssch->alpha1[sb][1] = 0; ++ } else { ++ ssch->alpha1[sb][0] = (ssch->cov[sb][0][1][0] * ssch->cov[sb][1][2][0] - ssch->cov[sb][0][1][1] * ssch->cov[sb][1][2][1]) - ++ (ssch->cov[sb][0][2][0] * ssch->cov[sb][1][1][0] - ssch->cov[sb][0][2][1] * ssch->cov[sb][1][1][1]); ++ ssch->alpha1[sb][1] = (ssch->cov[sb][0][1][0] * ssch->cov[sb][1][2][1] + ssch->cov[sb][0][1][1] * ssch->cov[sb][1][2][0]) - ++ (ssch->cov[sb][0][2][0] * ssch->cov[sb][1][1][1] + ssch->cov[sb][0][2][1] * ssch->cov[sb][1][1][0]); ++ fcomplex_div(&ssch->alpha1[sb][0], &ssch->alpha1[sb][1], ssch->alpha1[sb][0], ssch->alpha1[sb][1], denom[0], denom[1]); ++ } ++ ++ if (sqr(ssch->cov[sb][1][1][0], ssch->cov[sb][1][1][1]) <= 1e-6f) { ++ ssch->alpha0[sb][0] = 0; ++ ssch->alpha0[sb][1] = 0; ++ } else { ++ ssch->alpha0[sb][0] = -ssch->cov[sb][0][1][0] + ssch->alpha1[sb][0] * ssch->cov[sb][1][2][0] + ssch->alpha1[sb][1] * ssch->cov[sb][1][2][1]; ++ ssch->alpha0[sb][1] = -ssch->cov[sb][0][1][1] + ssch->alpha1[sb][1] * ssch->cov[sb][1][2][0] - ssch->alpha1[sb][0] * ssch->cov[sb][1][2][1]; ++ fcomplex_div(&ssch->alpha0[sb][0], &ssch->alpha0[sb][1], ssch->alpha0[sb][0], ssch->alpha0[sb][1], ssch->cov[sb][1][1][0], ssch->cov[sb][1][1][1]); ++ } ++ } ++} ++ ++static void create_high_signal(AC4DecodeContext *s, Substream *ss, SubstreamChannel *ssch) ++{ ++ int ts_offset_hfadj = 4; ++ ++ /* Loop over QMF time slots */ ++ for (int ts = ssch->atsg_sig[0] * s->num_ts_in_ats; ++ ts < ssch->atsg_sig[ssch->aspx_num_env] * s->num_ts_in_ats; ts++) { ++ int sum_sb_patches = 0; ++ int g = 0; ++ /* Loop over number of patches */ ++ for (int i = 0; i < ssch->num_sbg_patches; i++) { ++ /* Loop over number of subbands per patch */ ++ for (int sb = 0; sb < ssch->sbg_patch_num_sb[i]; sb++) { ++ float cplx[2] = { 0 }; ++ /* Map to High QMF Subband */ ++ int n, p; ++ int sb_high = ssch->sbx + sum_sb_patches + sb; ++ ++ /* Map to current noise envelope */ ++ if (ssch->sbg_noise[g+1] == sb_high) ++ g++; ++ ++ n = ts + ts_offset_hfadj; ++ /* Current low QMF Subband */ ++ p = ssch->sbg_patch_start_sb[i] + sb; ++ ssch->Q_high[0][ts][sb_high] = ssch->Q_low_ext[0][n][p]; ++ ssch->Q_high[1][ts][sb_high] = ssch->Q_low_ext[1][n][p]; ++ ++ fcomplex_mul(&cplx[0], &cplx[1], ssch->alpha0[p][0], ssch->alpha0[p][1], ssch->Q_low_ext[0][n-2][p], ssch->Q_low_ext[1][n-2][p]); ++ fcomplex_mul(&cplx[0], &cplx[1], cplx[0], cplx[1], ssch->chirp_arr[g], 0); ++ ssch->Q_high[0][ts][sb_high] += cplx[0]; ++ ssch->Q_high[1][ts][sb_high] += cplx[1]; ++ fcomplex_mul(&cplx[0], &cplx[1], ssch->alpha1[p][0], ssch->alpha1[p][1], ssch->Q_low_ext[0][n-4][p], ssch->Q_low_ext[1][n-4][p]); ++ fcomplex_mul(&cplx[0], &cplx[1], cplx[0], cplx[1], powf(ssch->chirp_arr[g], 2), 0); ++ ssch->Q_high[0][ts][sb_high] += cplx[0]; ++ ssch->Q_high[1][ts][sb_high] += cplx[1]; ++ if (ss->aspx_preflat) ++ fcomplex_mul(&ssch->Q_high[0][ts][sb_high], &ssch->Q_high[1][ts][sb_high], ssch->Q_high[0][ts][sb_high], ssch->Q_high[1][ts][sb_high], 1.f / ssch->gain_vec[p], 0); ++ } ++ sum_sb_patches += ssch->sbg_patch_num_sb[i]; ++ } ++ } ++} ++ ++static void estimate_spectral_envelopes(AC4DecodeContext *s, Substream *ss, SubstreamChannel *ssch) ++{ ++ int ts_offset_hfadj = 4; ++ ++ for (int atsg = 0; atsg < ssch->aspx_num_env; atsg++) { ++ int sbg = 0; ++ /* Loop over QMF subbands in A-SPX range */ ++ for (int sb = 0; sb < ssch->num_sb_aspx; sb++) { ++ int tsa, tsz; ++ float est_sig = 0; ++ ++ /* Update current subband group */ ++ if (sb == ssch->sbg_sig[atsg][sbg+1]) ++ sbg++; ++ ++ tsa = ssch->atsg_sig[atsg]*s->num_ts_in_ats + ts_offset_hfadj; ++ tsz = ssch->atsg_sig[atsg+1]*s->num_ts_in_ats + ts_offset_hfadj; ++ for (int ts = tsa; ts < tsz; ts++) { ++ if (ss->aspx_interpolation == 0) { ++ for (int j = ssch->sbg_sig[atsg][sbg]; j < ssch->sbg_sig[atsg][sbg+1]; j++) { ++ est_sig += hypotf(ssch->Q_high[0][ts][j], ssch->Q_high[1][ts][j]); ++ } ++ } else { ++ est_sig += hypotf(ssch->Q_high[0][ts][sb+ssch->sbx], ssch->Q_high[1][ts][sb+ssch->sbx]); ++ } ++ } ++ ++ if (ss->aspx_interpolation == 0) { ++ est_sig /= ssch->sbg_sig[atsg][sbg+1] - ssch->sbg_sig[atsg][sbg]; ++ est_sig /= ssch->atsg_sig[atsg+1] - ssch->atsg_sig[atsg]; ++ } else { ++ est_sig /= ssch->atsg_sig[atsg+1] - ssch->atsg_sig[atsg]; ++ } ++ ssch->est_sig_sb[atsg][sb] = est_sig; ++ } ++ } ++} ++ ++static void map_signoise(AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++ int atsg_noise = 0; ++ ++ memset(ssch->scf_noise_sb, 0, sizeof(ssch->scf_noise_sb)); ++ memset(ssch->scf_sig_sb, 0, sizeof(ssch->scf_sig_sb)); ++ ++ /* Loop over Signal Envelopes */ ++ for (int atsg = 0; atsg < ssch->aspx_num_env; atsg++) { ++ /* Map Signal Envelopes from subband groups to QMF subbands */ ++ ++ for (int sbg = 0; sbg < ssch->num_sbg_sig[atsg]; sbg++) { ++ for (int sb = ssch->sbg_sig[atsg][sbg]-ssch->sbx; sb < ssch->sbg_sig[atsg][sbg+1]-ssch->sbx; sb++) ++ ssch->scf_sig_sb[atsg][sb] = ssch->scf_sig_sbg[atsg][sbg]; ++ } ++ ++ if (ssch->atsg_sig[atsg] == ssch->atsg_noise[atsg_noise + 1]) ++ atsg_noise++; ++ ++ /* Map Noise Floors from subband groups to QMF subbands, and to signal envelopes */ ++ for (int sbg = 0; sbg < ssch->num_sbg_noise; sbg++) { ++ for (int sb = ssch->sbg_noise[sbg] - ssch->sbx; sb < ssch->sbg_noise[sbg + 1] - ssch->sbx; sb++) ++ ssch->scf_noise_sb[atsg][sb] = ssch->scf_noise_sbg[atsg_noise][sbg]; ++ } ++ } ++} ++ ++static void add_sinusoids(AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++ float EPSILON = 1.0f; ++ float LIM_GAIN = 1.41254f; ++ float EPSILON0 = powf(10.f, -12.f); ++ float MAX_SIG_GAIN = powf(10.f, 5.f); ++ float MAX_BOOST_FACT = 1.584893192f; ++ int p_sine_at_end; ++ ++ if (ssch->aspx_tsg_ptr_prev == ssch->aspx_num_env_prev) ++ p_sine_at_end = 0; ++ else ++ p_sine_at_end = -1; ++ ++ /* Loop over envelopes */ ++ for (int atsg = 0; atsg < ssch->aspx_num_env; atsg++) { ++ /* Loop over high resolution signal envelope subband groups */ ++ for (int sbg = 0; sbg < ssch->num_sbg_sig_highres; sbg++) { ++ int sba = ssch->sbg_sig_highres[sbg] - ssch->sbx; ++ int sbz = ssch->sbg_sig_highres[sbg+1] - ssch->sbx; ++ int sb_mid = (int)(0.5f*(sbz + sba) + 0.5f); ++ /* Map sinusoid markers to QMF subbands */ ++ for (int sb = ssch->sbg_sig_highres[sbg]-ssch->sbx; sb < ssch->sbg_sig_highres[sbg+1]-ssch->sbx; sb++) { ++ if ((sb == sb_mid) && ((atsg >= ssch->aspx_tsg_ptr) || (p_sine_at_end == 0) ++ || ssch->sine_idx_sb_prev[ssch->aspx_num_env_prev-1][sb])) { ++ ssch->sine_idx_sb[atsg][sb] = ssch->aspx_add_harmonic[sbg]; ++ } else { ++ ssch->sine_idx_sb[atsg][sb] = 0; ++ } ++ } ++ } ++ } ++ ++ memcpy(ssch->sine_idx_sb_prev, ssch->sine_idx_sb, sizeof(ssch->sine_idx_sb)); ++ ++ for (int atsg = 0; atsg < ssch->aspx_num_env; atsg++) { ++ /* Loop over subband groups */ ++ for (int sbg = 0; sbg < ssch->num_sbg_sig[atsg]; sbg++) { ++ int sine_present = 0; ++ /* Additional sinusoid present in SF band? */ ++ for (int sb = ssch->sbg_sig[atsg][sbg]-ssch->sbx; sb < ssch->sbg_sig[atsg][sbg+1]-ssch->sbx; sb++) { ++ if (ssch->sine_idx_sb[atsg][sb] == 1) ++ sine_present = 1; ++ } ++ ++ /* Mark all subbands in current subband group accordingly */ ++ for (int sb = ssch->sbg_sig[atsg][sbg]-ssch->sbx; sb < ssch->sbg_sig[atsg][sbg+1]-ssch->sbx; sb++) { ++ ssch->sine_area_sb[atsg][sb] = sine_present; ++ } ++ } ++ } ++ ++ memset(ssch->noise_lev_sb, 0, sizeof(ssch->noise_lev_sb)); ++ ++ /* Loop over envelopes */ ++ for (int atsg = 0; atsg < ssch->aspx_num_env; atsg++) { ++ /* Loop over QMF subbands in A-SPX range */ ++ for (int sb = 0; sb < ssch->num_sb_aspx; sb++) { ++ float sig_noise_fact = ssch->scf_sig_sb[atsg][sb] / (1+ssch->scf_noise_sb[atsg][sb]); ++ ++ ssch->sine_lev_sb[atsg][sb] = sqrtf(sig_noise_fact * ssch->sine_idx_sb[atsg][sb]); ++ ssch->noise_lev_sb[atsg][sb] = sqrtf(sig_noise_fact * ssch->scf_noise_sb[atsg][sb]); ++ } ++ } ++ ++ /* Loop over envelopes */ ++ for (int atsg = 0; atsg < ssch->aspx_num_env; atsg++) { ++ /* Loop over QMF subbands in A-SPX range */ ++ for (int sb = 0; sb < ssch->num_sb_aspx; sb++) { ++ if (ssch->sine_area_sb[atsg][sb] == 0) { ++ float denom = EPSILON + ssch->est_sig_sb[atsg][sb]; ++ if (!(atsg == ssch->aspx_tsg_ptr || atsg == p_sine_at_end)) ++ denom *= (1 + ssch->scf_noise_sb[atsg][sb]); ++ ssch->sig_gain_sb[atsg][sb] = sqrtf(ssch->scf_sig_sb[atsg][sb] / denom); ++ } else { ++ float denom = EPSILON + ssch->est_sig_sb[atsg][sb]; ++ denom *= 1 + ssch->scf_noise_sb[atsg][sb]; ++ ssch->sig_gain_sb[atsg][sb] = sqrtf(ssch->scf_sig_sb[atsg][sb] * ssch->scf_noise_sb[atsg][sb] / denom); ++ } ++ } ++ } ++ ++ /* Loop over envelopes */ ++ for (int atsg = 0; atsg < ssch->aspx_num_env; atsg++) { ++ /* Loop over limiter subband groups */ ++ for (int sbg = 0; sbg < ssch->num_sbg_lim; sbg++) { ++ float nom = 0; ++ float denom = EPSILON0; ++ for (int sb = ssch->sbg_lim[sbg]-ssch->sbx; sb < ssch->sbg_lim[sbg+1]-1-ssch->sbx; sb++) { ++ nom += ssch->scf_sig_sb[atsg][sb]; ++ denom += ssch->est_sig_sb[atsg][sb]; ++ } ++ ++ ssch->max_sig_gain_sbg[atsg][sbg] = sqrtf(nom/denom) * LIM_GAIN; ++ } ++ ++ /* Map to QMF subbands */ ++ for (int sb = 0, sbg = 0; sb < ssch->num_sb_aspx; sb++) { ++ if (sb == ssch->sbg_lim[sbg+1]-ssch->sbx) ++ sbg++; ++ ssch->max_sig_gain_sb[atsg][sb] = FFMIN(ssch->max_sig_gain_sbg[atsg][sbg], MAX_SIG_GAIN); ++ } ++ } ++ ++ memset(ssch->noise_lev_sb_lim, 0, sizeof(ssch->noise_lev_sb_lim)); ++ ++ /* Loop over envelopes */ ++ for (int atsg = 0; atsg < ssch->aspx_num_env; atsg++) { ++ /* Loop over QMF subbands */ ++ for (int sb = 0; sb < ssch->num_sb_aspx; sb++) { ++ float tmp = ssch->noise_lev_sb[atsg][sb]; ++ ++ tmp *= ssch->max_sig_gain_sb[atsg][sb] / ssch->sig_gain_sb[atsg][sb]; ++ ssch->noise_lev_sb_lim[atsg][sb] = FFMIN(ssch->noise_lev_sb[atsg][sb], tmp); ++ } ++ } ++ ++ /* Loop over envelopes */ ++ for (int atsg = 0; atsg < ssch->aspx_num_env; atsg++) { ++ /* Loop over QMF subbands */ ++ for (int sb = 0; sb < ssch->num_sb_aspx; sb++) { ++ ssch->sig_gain_sb_lim[atsg][sb] = FFMIN(ssch->sig_gain_sb[atsg][sb], ++ ssch->max_sig_gain_sb[atsg][sb]); ++ } ++ } ++ ++ /* Loop over envelopes */ ++ for (int atsg = 0; atsg < ssch->aspx_num_env; atsg++) { ++ /* Loop over limiter subband groups */ ++ for (int sbg = 0; sbg < ssch->num_sbg_lim; sbg++) { ++ float nom, denom; ++ ++ nom = denom = EPSILON0; ++ /* Loop over subbands */ ++ for (int sb = ssch->sbg_lim[sbg]-ssch->sbx; sb < ssch->sbg_lim[sbg+1]-1-ssch->sbx; sb++) { ++ nom += ssch->scf_sig_sb[atsg][sb]; ++ denom += ssch->est_sig_sb[atsg][sb] * powf(ssch->sig_gain_sb_lim[atsg][sb], 2); ++ denom += powf(ssch->sine_lev_sb[atsg][sb], 2); ++ if (!((ssch->sine_lev_sb[atsg][sb] != 0) ++ || (atsg == ssch->aspx_tsg_ptr) || (atsg == p_sine_at_end))) ++ denom += powf(ssch->noise_lev_sb_lim[atsg][sb], 2); ++ } ++ ssch->boost_fact_sbg[atsg][sbg] = sqrtf(nom/denom); ++ } ++ } ++ ++ /* Loop over envelopes */ ++ for (int atsg = 0; atsg < ssch->aspx_num_env; atsg++) { ++ int sbg = 0; ++ /* Loop over QMF subbands */ ++ for (int sb = 0; sb < ssch->num_sb_aspx; sb++) { ++ if (sb == ssch->sbg_lim[sbg+1]-ssch->sbx) ++ sbg++; ++ ssch->boost_fact_sb[atsg][sb] = FFMIN(ssch->boost_fact_sbg[atsg][sbg], MAX_BOOST_FACT); ++ } ++ } ++ ++ memset(ssch->noise_lev_sb_adj, 0, sizeof(ssch->noise_lev_sb_adj)); ++ ++ /* Loop over envelopes */ ++ for (int atsg = 0; atsg < ssch->aspx_num_env; atsg++) { ++ /* Loop over QMF subbands */ ++ for (int sb = 0; sb < ssch->num_sb_aspx; sb++) { ++ float boost_fact = ssch->boost_fact_sb[atsg][sb]; ++ ssch->sig_gain_sb_adj[atsg][sb] = ssch->sig_gain_sb_lim[atsg][sb] * boost_fact; ++ ssch->noise_lev_sb_adj[atsg][sb] = ssch->noise_lev_sb_lim[atsg][sb] * boost_fact; ++ ssch->sine_lev_sb_adj[atsg][sb] = ssch->sine_lev_sb[atsg][sb] * boost_fact; ++ } ++ } ++} ++ ++static int sine_idx(int sb, int ts, AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++ int index; ++ ++ if (s->first_frame) { ++ index = 1; ++ s->first_frame = 0; ++ } else { ++ index = (ssch->sine_idx_prev[ts][sb] + 1) % 4; ++ } ++ index += ts - ssch->atsg_sig[0]; ++ ++ return index % 4; ++} ++ ++static int noise_idx(int sb, int ts, AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++ int index; ++ ++ if (ssch->master_reset) { ++ index = 0; ++ } else { ++ index = ssch->noise_idx_prev[ts][sb]; ++ } ++ index += ssch->num_sb_aspx * (ts - ssch->atsg_sig[0]); ++ index += sb + 1; ++ ++ return index % 512; ++} ++ ++static void generate_noise(AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++ int atsg = 0; ++ ++ memset(ssch->qmf_noise, 0, sizeof(ssch->qmf_noise)); ++ ++ /* Loop over QMF time slots */ ++ for (int ts = ssch->atsg_sig[0] * s->num_ts_in_ats; ++ ts < ssch->atsg_sig[ssch->aspx_num_env] * s->num_ts_in_ats; ts++) { ++ if (ts == ssch->atsg_sig[atsg+1] * s->num_ts_in_ats) ++ atsg++; ++ /* Loop over QMF subbands in A-SPX */ ++ for (int sb = 0; sb < ssch->num_sb_aspx; sb++) { ++ short idx; ++ ++ ssch->noise_idx_prev[ts][sb] = idx = (short)noise_idx(sb, ts, s, ssch); ++ ssch->qmf_noise[0][ts][sb] = ssch->noise_lev_sb_adj[atsg][sb] * aspx_noise[idx][0]; ++ ssch->qmf_noise[1][ts][sb] = ssch->noise_lev_sb_adj[atsg][sb] * aspx_noise[idx][1]; ++ } ++ } ++} ++ ++static void generate_tones(AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++ int atsg = 0; ++ ++ /* Loop over QMF time slots */ ++ for (int ts = ssch->atsg_sig[0] * s->num_ts_in_ats; ++ ts < ssch->atsg_sig[ssch->aspx_num_env] * s->num_ts_in_ats; ts++) { ++ if (ts == ssch->atsg_sig[atsg+1] * s->num_ts_in_ats) ++ atsg++; ++ /* Loop over QMF subbands in A-SPX */ ++ for (int sb = 0; sb < ssch->num_sb_aspx; sb++) { ++ int8_t idx; ++ ++ ssch->sine_idx_prev[ts][sb] = idx = (int8_t)sine_idx(sb, ts, s, ssch); ++ ssch->qmf_sine[0][ts][sb] = ssch->sine_lev_sb_adj[atsg][sb]; ++ ssch->qmf_sine[0][ts][sb] *= aspx_sine[0][idx]; ++ ssch->qmf_sine[1][ts][sb] = ssch->sine_lev_sb_adj[atsg][sb] * powf(-1, sb + ssch->sbx); ++ ssch->qmf_sine[1][ts][sb] *= aspx_sine[1][idx]; ++ } ++ } ++} ++ ++static void assemble_hf_signal(AC4DecodeContext *s, SubstreamChannel *ssch) ++{ ++ int ts_offset_hfadj = 4; ++ int atsg = 0; ++ ++ memcpy(ssch->Y_prev, ssch->Y, sizeof(ssch->Y)); ++ memset(ssch->Y, 0, sizeof(ssch->Y)); ++ ++ /* Get delayed QMF subsamples from delay buffer */ ++ for (int ts = 0; ts < ssch->atsg_sig[0] * s->num_ts_in_ats; ts++) { ++ for (int sb = 0; sb < ssch->num_sb_aspx; sb++) { ++ ssch->Y[0][ts][sb] = ssch->Y_prev[0][s->num_qmf_timeslots + ts][sb]; ++ ssch->Y[1][ts][sb] = ssch->Y_prev[1][s->num_qmf_timeslots + ts][sb]; ++ } ++ } ++ ++ /* Loop over QMF time slots */ ++ for (int ts = ssch->atsg_sig[0] * s->num_ts_in_ats; ++ ts < ssch->atsg_sig[ssch->aspx_num_env] * s->num_ts_in_ats; ts++) { ++ if (ts == ssch->atsg_sig[atsg+1] * s->num_ts_in_ats) ++ atsg++; ++ /* Loop over QMF subbands */ ++ for (int sb = 0; sb < ssch->num_sb_aspx; sb++) { ++ ssch->Y[0][ts][sb] = ssch->sig_gain_sb_adj[atsg][sb]; ++ ssch->Y[1][ts][sb] = 0; ++ fcomplex_mul(&ssch->Y[0][ts][sb], &ssch->Y[1][ts][sb], ++ ssch->Y[0][ts][sb], ssch->Y[1][ts][sb], ++ ssch->Q_high[0][ts + ts_offset_hfadj][sb + ssch->sbx], ++ ssch->Q_high[1][ts + ts_offset_hfadj][sb + ssch->sbx]); ++ } ++ } ++ ++ /* Loop over time slots */ ++ for (int ts = ssch->atsg_sig[0] * s->num_ts_in_ats; ++ ts < ssch->atsg_sig[ssch->aspx_num_env] * s->num_ts_in_ats; ts++) { ++ /* Loop over QMF subbands */ ++ for (int sb = 0; sb < ssch->num_sb_aspx; sb++) { ++ ssch->Y[0][ts][sb] += ssch->qmf_sine[0][ts][sb]; ++ ssch->Y[1][ts][sb] += ssch->qmf_sine[1][ts][sb]; ++ ssch->Y[0][ts][sb] += ssch->qmf_noise[0][ts][sb]; ++ ssch->Y[1][ts][sb] += ssch->qmf_noise[1][ts][sb]; ++ } ++ } ++ ++ for (int ts = ssch->atsg_sig[0] * s->num_ts_in_ats; ++ ts < ssch->atsg_sig[ssch->aspx_num_env] * s->num_ts_in_ats; ts++) { ++ /* Loop over QMF subbands */ ++ for (int sb = ssch->sbx; sb < 64; sb++) { ++ ssch->Q[0][ts][sb] += ssch->Y[0][ts][sb-ssch->sbx] / 32768.f; ++ ssch->Q[1][ts][sb] += ssch->Y[1][ts][sb-ssch->sbx] / 32768.f; ++ } ++ } ++ ++ memcpy(ssch->Q_prev, ssch->Q, sizeof(ssch->Q)); ++} ++ ++static int mono_aspx_processing(AC4DecodeContext *s, Substream *ss) ++{ ++ if (ss->codec_mode == CM_ASPX) { ++ aspx_processing(s, &ss->ssch[0]); ++ get_qsignal_scale_factors(s, &ss->ssch[0], 0); ++ get_qnoise_scale_factors(s, &ss->ssch[0], 0); ++ mono_deq_signal_factors(s, &ss->ssch[0]); ++ mono_deq_noise_factors(s, &ss->ssch[0]); ++ preflattening(s, &ss->ssch[0]); ++ get_covariance(s, &ss->ssch[0]); ++ get_alphas(s, &ss->ssch[0]); ++ get_chirps(s, &ss->ssch[0]); ++ create_high_signal(s, ss, &ss->ssch[0]); ++ estimate_spectral_envelopes(s, ss, &ss->ssch[0]); ++ map_signoise(s, &ss->ssch[0]); ++ add_sinusoids(s, &ss->ssch[0]); ++ generate_tones(s, &ss->ssch[0]); ++ generate_noise(s, &ss->ssch[0]); ++ assemble_hf_signal(s, &ss->ssch[0]); ++ } ++ ++ return 0; ++} ++ ++static int stereo_aspx_processing(AC4DecodeContext *s, Substream *ss) ++{ ++ if (ss->codec_mode == CM_ASPX) { ++ aspx_processing(s, &ss->ssch[0]); ++ aspx_processing(s, &ss->ssch[1]); ++ get_qsignal_scale_factors(s, &ss->ssch[0], 0); ++ get_qsignal_scale_factors(s, &ss->ssch[1], 1); ++ get_qnoise_scale_factors(s, &ss->ssch[0], 0); ++ get_qnoise_scale_factors(s, &ss->ssch[1], 1); ++ if (ss->ssch[0].aspx_balance == 0) { ++ mono_deq_signal_factors(s, &ss->ssch[0]); ++ mono_deq_signal_factors(s, &ss->ssch[1]); ++ mono_deq_noise_factors(s, &ss->ssch[0]); ++ mono_deq_noise_factors(s, &ss->ssch[1]); ++ } else { ++ stereo_deq_signoise_factors(s, &ss->ssch[0], &ss->ssch[1]); ++ } ++ preflattening(s, &ss->ssch[0]); ++ preflattening(s, &ss->ssch[1]); ++ get_covariance(s, &ss->ssch[0]); ++ get_covariance(s, &ss->ssch[1]); ++ get_alphas(s, &ss->ssch[0]); ++ get_alphas(s, &ss->ssch[1]); ++ get_chirps(s, &ss->ssch[0]); ++ get_chirps(s, &ss->ssch[1]); ++ create_high_signal(s, ss, &ss->ssch[0]); ++ create_high_signal(s, ss, &ss->ssch[1]); ++ estimate_spectral_envelopes(s, ss, &ss->ssch[0]); ++ estimate_spectral_envelopes(s, ss, &ss->ssch[1]); ++ map_signoise(s, &ss->ssch[0]); ++ map_signoise(s, &ss->ssch[1]); ++ add_sinusoids(s, &ss->ssch[0]); ++ add_sinusoids(s, &ss->ssch[1]); ++ generate_tones(s, &ss->ssch[0]); ++ generate_tones(s, &ss->ssch[1]); ++ generate_noise(s, &ss->ssch[0]); ++ generate_noise(s, &ss->ssch[1]); ++ assemble_hf_signal(s, &ss->ssch[0]); ++ assemble_hf_signal(s, &ss->ssch[1]); ++ } ++ ++ return 0; ++} ++ ++static void decode_channel(AC4DecodeContext *s, int ch, float *pcm) ++{ ++ Substream *ss = &s->substream; ++ SubstreamChannel *ssch = &ss->ssch[ch]; ++ ++ qmf_synthesis(s, ssch, pcm); ++} ++ ++static int ac4_decode_frame(AVCodecContext *avctx, AVFrame *frame, ++ int *got_frame_ptr, AVPacket *avpkt) ++{ ++ AC4DecodeContext *s = avctx->priv_data; ++ GetBitContext *gb = &s->gbc; ++ int ret, start_offset = 0; ++ SubstreamInfo *ssinfo; ++ int presentation; ++ uint32_t header; ++ ++ if (avpkt->size < 8) { ++ av_log(s->avctx, AV_LOG_ERROR, "invalid packet size: %d\n", avpkt->size); ++ return AVERROR_INVALIDDATA; ++ } ++ ++ header = AV_RB16(avpkt->data); ++ if (header == 0xAC40 || header == 0xAC41) { ++ int size = AV_RB16(avpkt->data + 2); ++ ++ start_offset = 4; ++ if (size == 0xFFFF) { ++ start_offset += 3; ++ size = AV_RB24(avpkt->data + 4); ++ } ++ } ++ ++ if ((ret = init_get_bits8(gb, avpkt->data, avpkt->size)) < 0) ++ return ret; ++ av_log(s->avctx, AV_LOG_DEBUG, "packet_size: %d\n", avpkt->size); ++ skip_bits_long(gb, start_offset * 8); ++ ++ ret = ac4_toc(s); ++ if (ret < 0) ++ return ret; ++ ++ if (!s->have_iframe) ++ return avpkt->size; ++ ++ presentation = FFMIN(s->target_presentation, FFMAX(0, s->nb_presentations - 1)); ++ ssinfo = s->version == 2 ? &s->ssgroup[0].ssinfo : &s->pinfo[presentation].ssinfo; ++ avctx->ch_layout.nb_channels = channel_mode_nb_channels[ssinfo->channel_mode]; ++ avctx->ch_layout = ff_ac4_ch_layouts[ssinfo->channel_mode]; ++ avctx->sample_rate = s->fs_index ? 48000 : 44100; ++ avctx->sample_rate = (int)av_rescale(avctx->sample_rate, ++ s->resampling_ratio.den, ++ s->resampling_ratio.num); ++ frame->nb_samples = s->frame_len_base; ++ if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) ++ return ret; ++ ++ skip_bits_long(gb, s->payload_base * 8); ++ ++ for (int i = 0; i < s->nb_substreams; i++) { ++ int substream_type = s->substream_type[i]; ++ ++ switch (substream_type) { ++ case ST_SUBSTREAM: ++ ret = ac4_substream(s, ssinfo); ++ break; ++ case ST_PRESENTATION: ++ skip_bits_long(gb, s->substream_size[i] * 8); ++ break; ++ default: ++ av_assert0(0); ++ } ++ ++ if (ret < 0) ++ return ret; ++ if (substream_type == ST_SUBSTREAM) ++ break; ++ } ++ ++ if (get_bits_left(gb) < 0) ++ av_log(s->avctx, AV_LOG_WARNING, "overread\n"); ++ ++ for (int ch = 0; ch < avctx->ch_layout.nb_channels; ch++) ++ scale_spec(s, ch); ++ ++ switch (ssinfo->channel_mode) { ++ case 0: ++ /* nothing to do */ ++ break; ++ case 1: ++ stereo_processing(s, &s->substream); ++ break; ++ case 3: ++ case 4: ++ m5channel_processing(s, &s->substream); ++ break; ++ } ++ ++ for (int ch = 0; ch < avctx->ch_layout.nb_channels; ch++) ++ prepare_channel(s, ch); ++ ++ switch (ssinfo->channel_mode) { ++ case 0: ++ mono_aspx_processing(s, &s->substream); ++ break; ++ case 1: ++ stereo_aspx_processing(s, &s->substream); ++ break; ++ case 3: ++ case 4: ++ break; ++ } ++ ++ for (int ch = 0; ch < avctx->ch_layout.nb_channels; ch++) ++ decode_channel(s, ch, (float *)frame->extended_data[ch]); ++ ++ if (s->iframe_global) ++ frame->flags |= AV_FRAME_FLAG_KEY; ++ else ++ frame->flags &= ~AV_FRAME_FLAG_KEY; ++ ++ *got_frame_ptr = 1; ++ ++ return avpkt->size; ++} ++ ++static av_cold void ac4_flush(AVCodecContext *avctx) ++{ ++ AC4DecodeContext *s = avctx->priv_data; ++ ++ s->have_iframe = 0; ++ s->sequence_counter_prev = 0; ++} ++ ++static av_cold int ac4_decode_end(AVCodecContext *avctx) ++{ ++ AC4DecodeContext *s = avctx->priv_data; ++ ++ av_freep(&s->fdsp); ++ ++ for (int j = 0; j < 8; j++) ++ for (int i = 0; i < 5; i++) ++ av_tx_uninit(&s->tx_ctx[j][i]); ++ ++ return 0; ++} ++ ++#define OFFSET(param) offsetof(AC4DecodeContext, param) ++#define FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM ++ ++static const AVOption options[] = { ++ { "presentation", "select presentation", OFFSET(target_presentation), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, FLAGS }, ++ { NULL }, ++}; ++ ++static const AVClass ac4_decoder_class = { ++ .class_name = "AC4 decoder", ++ .item_name = av_default_item_name, ++ .option = options, ++ .version = LIBAVUTIL_VERSION_INT, ++}; ++ ++FFCodec ff_ac4_decoder = { ++ .p.name = "ac4", ++ CODEC_LONG_NAME("Dolby AC-4"), ++ .p.type = AVMEDIA_TYPE_AUDIO, ++ .p.id = AV_CODEC_ID_AC4, ++ .p.priv_class = &ac4_decoder_class, ++ .priv_data_size = sizeof (AC4DecodeContext), ++ .init = ac4_decode_init, ++ .close = ac4_decode_end, ++ FF_CODEC_DECODE_CB(ac4_decode_frame), ++ .flush = ac4_flush, ++ .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF, ++ .p.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, ++ AV_SAMPLE_FMT_NONE }, ++}; +Index: FFmpeg/libavcodec/ac4dec_data.h +=================================================================== +--- /dev/null ++++ libavcodec/ac4dec_data.h +@@ -0,0 +1,1664 @@ ++/* ++ * AC-4 Audio Decoder ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVCODEC_AC4DEC_DATA_H ++#define AVCODEC_AC4DEC_DATA_H ++ ++#include "libavutil/mem_internal.h" ++ ++static const uint8_t aspx_hcb_env_level_15_f0_bits[71] = { ++ 7, 9, 9, 9, 9, 8, 8, 7, 7, 7, 7, 7, 6, 6, 6, 6, ++ 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, ++ 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 6, 6, 6, 7, 8, ++ 9, 10, 11, 12, 14, 14, 16, 16, 17, 17, 18, 18, 18, 18, 18, 18, ++ 18, 18, 17, 17, 17, 17, 16, ++}; ++ ++static const uint32_t aspx_hcb_env_level_15_f0_codes[71] = { ++ 0x0003e, 0x0007e, 0x0007f, 0x000be, 0x000bf, 0x0003e, 0x0005e, 0x00016, ++ 0x00017, 0x0001e, 0x0002e, 0x0005e, 0x00012, 0x0000a, 0x0000c, 0x0000e, ++ 0x00013, 0x00016, 0x0001a, 0x0001b, 0x00022, 0x00023, 0x00004, 0x00008, ++ 0x0000c, 0x0000e, 0x00012, 0x00014, 0x00015, 0x0001a, 0x00000, 0x00016, ++ 0x00018, 0x0001e, 0x0001c, 0x00019, 0x0001d, 0x0001f, 0x00001, 0x0001b, ++ 0x00013, 0x00010, 0x0000a, 0x0002e, 0x0001e, 0x0000d, 0x0003f, 0x000be, ++ 0x0017e, 0x002fe, 0x005fe, 0x00bfe, 0x02ffe, 0x02ffc, 0x0bffc, 0x0bff6, ++ 0x17ffa, 0x17ffb, 0x2fff8, 0x2fff9, 0x2fffa, 0x2fffb, 0x2fffc, 0x2fffd, ++ 0x2fffe, 0x2ffff, 0x17fe8, 0x17fe9, 0x17fea, 0x17feb, 0x0bff7, ++}; ++ ++static const uint8_t aspx_hcb_env_level_15_df_bits[141] = { ++ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, ++ 21, 21, 17, 18, 19, 19, 21, 19, 19, 19, 21, 20, 20, 20, 16, 17, ++ 19, 19, 18, 18, 17, 15, 15, 15, 15, 14, 14, 14, 14, 13, 13, 13, ++ 13, 13, 13, 13, 12, 12, 12, 12, 12, 11, 10, 10, 9, 8, 8, 7, ++ 6, 6, 5, 4, 3, 2, 2, 3, 4, 6, 7, 7, 8, 9, 10, 11, ++ 11, 12, 13, 13, 14, 14, 15, 15, 15, 16, 16, 16, 17, 17, 17, 17, ++ 17, 18, 17, 17, 16, 17, 18, 19, 20, 21, 19, 19, 18, 16, 19, 21, ++ 16, 20, 20, 21, 21, 20, 20, 19, 20, 19, 17, 21, 21, 21, 21, 21, ++ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, ++}; ++ ++static const uint32_t aspx_hcb_env_level_15_df_codes[141] = { ++ 0xfeff2, 0xfeff3, 0xfeff4, 0xfeff5, 0xfeff6, 0xfeff7, 0xfeff8, 0xfeff9, ++ 0xfeffa, 0xfeffb, 0xfeffc, 0xfeffd, 0xfeffe, 0xfefff, 0xff5e4, 0xff5e5, ++ 0xff5e6, 0xff5e7, 0x0fef4, 0x1fefe, 0x3fdea, 0x3ffde, 0xff7b6, 0x3fdec, ++ 0x3fdee, 0x3fd78, 0xff5ee, 0x7f798, 0x7faf6, 0x7f799, 0x07ffa, 0x0fef6, ++ 0x3fdef, 0x3ffdf, 0x1fdee, 0x1ffee, 0x0ff7e, 0x03d7e, 0x03fba, 0x03fbe, ++ 0x03ffe, 0x01ebe, 0x01fdc, 0x01fee, 0x01ff6, 0x00f5c, 0x00f5d, 0x00f7e, ++ 0x00f7f, 0x00ff4, 0x00ff6, 0x00ffe, 0x007bc, 0x007bd, 0x007f6, 0x007fc, ++ 0x007fe, 0x003fa, 0x001ea, 0x001fc, 0x000f6, 0x00076, 0x0007e, 0x0003c, ++ 0x0001c, 0x0003e, 0x0001e, 0x0000e, 0x00006, 0x00002, 0x00000, 0x00002, ++ 0x00006, 0x0003f, 0x0003e, 0x0003a, 0x00077, 0x000f4, 0x001ee, 0x003fc, ++ 0x003d6, 0x007be, 0x00ffa, 0x00f5e, 0x01ff7, 0x01fea, 0x03ffc, 0x03fd6, ++ 0x03fbb, 0x07fbe, 0x07fae, 0x07f7e, 0x0fff6, 0x0fffe, 0x0fef5, 0x0fef2, ++ 0x0f5fe, 0x1fffe, 0x0ff78, 0x0fefe, 0x07afe, 0x0ff79, 0x1fef4, 0x3fd7a, ++ 0x7f79a, 0xff5ef, 0x3fdfe, 0x3fffe, 0x1fdef, 0x07ffe, 0x3fdff, 0xff5f2, ++ 0x07f78, 0x7f79b, 0x7f79e, 0xff5f3, 0xff5f4, 0x7f79f, 0x7faf8, 0x3ffff, ++ 0x7f7f8, 0x3fbce, 0x0f5ff, 0xff5f5, 0xff5f6, 0xff5f7, 0xff5f8, 0xff5f9, ++ 0xff5fa, 0xff5fb, 0xff5fc, 0xff5fd, 0xff5fe, 0xff5ff, 0xff7ac, 0xff7ad, ++ 0xff7ae, 0xff7af, 0xff7b4, 0xff7b5, 0xff7b7, ++}; ++ ++static const uint8_t aspx_hcb_env_level_15_dt_bits[141] = { ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, ++ 17, 19, 17, 16, 17, 17, 17, 16, 15, 16, 15, 15, 14, 14, 14, 13, ++ 12, 10, 8, 6, 4, 2, 1, 3, 5, 7, 10, 11, 12, 14, 14, 15, ++ 15, 16, 16, 17, 16, 17, 17, 18, 16, 17, 18, 18, 19, 18, 19, 19, ++ 17, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++}; ++ ++static const uint32_t aspx_hcb_env_level_15_dt_codes[141] = { ++ 0x7ffa0, 0x7ffa1, 0x7ffa2, 0x7ffa3, 0x7ffa4, 0x7ffa5, 0x7ffa6, 0x7ffa7, ++ 0x7ffa8, 0x7ffa9, 0x7ffaa, 0x7ffab, 0x7ffac, 0x7ffad, 0x7ffae, 0x7ffaf, ++ 0x7ffb0, 0x7ffb1, 0x7ffb2, 0x7ffb3, 0x7ffb4, 0x7ffb5, 0x7ffb6, 0x7ffb7, ++ 0x7ffb8, 0x7ffb9, 0x7ffba, 0x7ffbb, 0x7ffbc, 0x7ffbd, 0x7ffbe, 0x7ffbf, ++ 0x7ffc0, 0x7ffc1, 0x7ffc2, 0x7ffc3, 0x7ffc4, 0x7ffc5, 0x7ffc6, 0x7ffc7, ++ 0x7ffcc, 0x7ffcd, 0x7ffce, 0x3ff16, 0x3ff17, 0x3ff1e, 0x3ff1f, 0x3ff3c, ++ 0x1fefa, 0x7ffcf, 0x1ffb6, 0x0ff7c, 0x1ff8e, 0x1ffbe, 0x1fefb, 0x0ffc4, ++ 0x07fba, 0x0ffc6, 0x07fbb, 0x07fee, 0x03fdc, 0x03ff0, 0x03ff2, 0x01ffa, ++ 0x00ffe, 0x003fc, 0x000fe, 0x0003e, 0x0000e, 0x00002, 0x00000, 0x00006, ++ 0x0001e, 0x0007e, 0x003fe, 0x007fa, 0x00ff6, 0x03ffc, 0x03fde, 0x07fe6, ++ 0x07fec, 0x0ffda, 0x0ffde, 0x1fefe, 0x0ffce, 0x1fff2, 0x1ffbf, 0x3ff3d, ++ 0x0ff7e, 0x1feff, 0x3ff3e, 0x3ff3f, 0x7ffd2, 0x3ffe8, 0x7ffd3, 0x7ffd4, ++ 0x1ff8a, 0x3ff6e, 0x3ff6f, 0x7ffd5, 0x7ffd6, 0x7ffd7, 0x7ffd8, 0x7ffd9, ++ 0x7ffda, 0x7ffdb, 0x7ffde, 0x7ffdf, 0x3ffee, 0x7ffe0, 0x7ffe1, 0x7ffe2, ++ 0x7ffe3, 0x7ffe4, 0x7ffe5, 0x7ffe6, 0x7ffe7, 0x7ffe8, 0x7ffe9, 0x7ffea, ++ 0x7ffeb, 0x7ffec, 0x7ffed, 0x7ffee, 0x7ffef, 0x7fff0, 0x7fff1, 0x7fff2, ++ 0x7fff3, 0x7fff4, 0x7fff5, 0x7fff6, 0x7fff7, 0x7fff8, 0x7fff9, 0x7fffa, ++ 0x7fffb, 0x7fffc, 0x7fffd, 0x7fffe, 0x7ffff, ++}; ++ ++static const uint8_t aspx_hcb_env_balance_15_f0_bits[25] = { ++ 13, 16, 15, 15, 13, 12, 11, 9, 8, 7, 5, 3, 1, 2, 4, 7, ++ 8, 9, 10, 10, 11, 12, 13, 15, 16, ++}; ++ ++static const uint32_t aspx_hcb_env_balance_15_f0_codes[25] = { ++ 0x01ffe, 0x0ffea, 0x07ff6, 0x07ff7, 0x01fff, 0x00ffc, 0x007de, 0x001f6, ++ 0x000fa, 0x0007c, 0x0001e, 0x00006, 0x00000, 0x00002, 0x0000e, 0x0007e, ++ 0x000fe, 0x001fe, 0x003fe, 0x003ee, 0x007df, 0x00ffd, 0x01ffc, 0x07ff4, ++ 0x0ffeb, ++}; ++ ++static const uint8_t aspx_hcb_env_balance_15_df_bits[49] = { ++ 19, 19, 19, 19, 19, 18, 18, 17, 16, 16, 16, 15, 13, 13, 12, 12, ++ 12, 11, 10, 9, 8, 7, 5, 3, 1, 2, 4, 7, 8, 10, 10, 11, ++ 12, 12, 13, 13, 14, 15, 16, 18, 18, 17, 18, 18, 18, 19, 19, 18, ++ 19, ++}; ++ ++static const uint32_t aspx_hcb_env_balance_15_df_codes[49] = { ++ 0x7def6, 0x7def7, 0x7defa, 0x7defb, 0x7dffe, 0x3ef7c, 0x3ef7e, 0x1f7b2, ++ 0x0fbd8, 0x0fbda, 0x0fbfe, 0x07dfe, 0x01f6e, 0x01f7a, 0x00f9e, 0x00fb6, ++ 0x00fbc, 0x007da, 0x003ec, 0x001f2, 0x000fa, 0x0007e, 0x0001e, 0x00006, ++ 0x00000, 0x00002, 0x0000e, 0x0007f, 0x000f8, 0x003ee, 0x003e6, 0x007ce, ++ 0x00fbe, 0x00f9f, 0x01f7e, 0x01f6f, 0x03efe, 0x07dee, 0x0fbdb, 0x3ef78, ++ 0x3ef7f, 0x1f7b3, 0x3effc, 0x3effd, 0x3ef79, 0x7dffc, 0x7dffd, 0x3ef7a, ++ 0x7dfff, ++}; ++ ++static const uint8_t aspx_hcb_env_balance_15_dt_bits[49] = { ++ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 13, 13, 12, 12, ++ 11, 12, 11, 9, 8, 7, 4, 2, 1, 3, 5, 7, 9, 9, 10, 11, ++ 11, 12, 12, 12, 12, 13, 15, 14, 15, 14, 15, 15, 15, 14, 14, 15, ++ 15, ++}; ++ ++static const uint32_t aspx_hcb_env_balance_15_dt_codes[49] = { ++ 0x07dce, 0x07dcf, 0x07ddc, 0x07ddd, 0x07dde, 0x07ddf, 0x07dec, 0x07ded, ++ 0x07dee, 0x07def, 0x07df8, 0x03ebe, 0x01f5e, 0x01f7a, 0x00fb8, 0x00fba, ++ 0x007cc, 0x00fbe, 0x007d6, 0x001f2, 0x000f8, 0x0007e, 0x0000e, 0x00002, ++ 0x00000, 0x00006, 0x0001e, 0x0007f, 0x001f6, 0x001f4, 0x003ea, 0x007cd, ++ 0x007ce, 0x00f9e, 0x00fae, 0x00fbc, 0x00f9f, 0x01f76, 0x07df9, 0x03ebf, ++ 0x07dfa, 0x03ee4, 0x07dfb, 0x07dfc, 0x07dfd, 0x03ee5, 0x03ee6, 0x07dfe, ++ 0x07dff, ++}; ++ ++static const uint8_t aspx_hcb_env_level_30_f0_bits[36] = { ++ 11, 13, 11, 11, 10, 10, 9, 8, 8, 7, 7, 7, 6, 6, 5, 5, ++ 4, 4, 4, 4, 3, 3, 3, 3, 4, 5, 6, 7, 9, 12, 14, 16, ++ 17, 16, 17, 16, ++}; ++ ++static const uint32_t aspx_hcb_env_level_30_f0_codes[36] = { ++ 0x007fe, 0x01ffe, 0x001de, 0x001df, 0x000ee, 0x003fe, 0x001fe, 0x0003a, ++ 0x000fe, 0x0001c, 0x0001e, 0x0007e, 0x0002e, 0x0003e, 0x00006, 0x00016, ++ 0x00002, 0x0000a, 0x0000c, 0x0000e, 0x00002, 0x00003, 0x00004, 0x00000, ++ 0x0000d, 0x0001e, 0x0002f, 0x0001f, 0x00076, 0x00ffe, 0x03ffe, 0x0fffc, ++ 0x1fffe, 0x0fffe, 0x1ffff, 0x0fffd, ++}; ++ ++static const uint8_t aspx_hcb_env_level_30_df_bits[71] = { ++ 23, 23, 23, 23, 22, 22, 22, 22, 20, 19, 19, 20, 19, 18, 17, 16, ++ 16, 15, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, 11, 10, 9, 8, ++ 5, 4, 2, 1, 3, 6, 8, 9, 11, 12, 14, 14, 15, 16, 17, 17, ++ 17, 18, 18, 19, 21, 19, 17, 20, 19, 20, 20, 20, 19, 19, 19, 22, ++ 22, 22, 22, 22, 22, 22, 22, ++}; ++ ++static const uint32_t aspx_hcb_env_level_30_df_codes[71] = { ++ 0x7ffbfc, 0x7ffbfd, 0x7ffbfe, 0x7ffbff, 0x3f7f6c, 0x3f7f6d, 0x3f7f6e, ++ 0x3f7f6f, 0x0fdfee, 0x07effe, 0x07efec, 0x0fff7e, 0x07ffbe, 0x03f7fe, ++ 0x01fbfe, 0x00fdfc, 0x00fff6, 0x007ffa, 0x003f7e, 0x003ffc, 0x003ffe, ++ 0x001fba, 0x001fbe, 0x001ffa, 0x001ffb, 0x000fdc, 0x000ffc, 0x000ffe, ++ 0x0007fc, 0x0003f6, 0x0001fe, 0x0000fe, 0x00001e, 0x00000e, 0x000002, ++ 0x000000, 0x000006, 0x00003e, 0x0000fc, 0x0001fa, 0x0007fd, 0x000fde, ++ 0x003fff, 0x003f76, 0x007eee, 0x00fdde, 0x01ffee, 0x01fbfc, 0x01fbfa, ++ 0x03ffde, 0x03f77e, 0x07efee, 0x1ffefe, 0x07eefe, 0x01fbbe, 0x0fdfea, ++ 0x07eff6, 0x0fdfda, 0x0fdfde, 0x0fdfef, 0x07eff4, 0x07efff, 0x07eeff, ++ 0x3f7fae, 0x3f7f7c, 0x3f7f7d, 0x3f7f7e, 0x3f7f7f, 0x3f7fac, 0x3f7fad, ++ 0x3f7faf, ++}; ++ ++static const uint8_t aspx_hcb_env_level_30_dt_bits[71] = { ++ 20, 20, 20, 20, 20, 20, 20, 20, 20, 19, 19, 20, 20, 18, 16, 16, ++ 15, 16, 15, 14, 14, 14, 13, 13, 13, 12, 12, 11, 11, 10, 10, 8, ++ 6, 4, 2, 1, 3, 5, 8, 9, 11, 11, 12, 13, 13, 14, 14, 14, ++ 15, 15, 16, 17, 18, 19, 18, 18, 18, 18, 20, 19, 20, 20, 20, 20, ++ 20, 20, 20, 20, 20, 20, 19, ++}; ++ ++static const uint32_t aspx_hcb_env_level_30_dt_codes[71] = { ++ 0x0ff7d6, 0x0ff7d7, 0x0ff7d8, 0x0ff7d9, 0x0ff7da, 0x0ff7db, 0x0ff7dc, ++ 0x0ff7dd, 0x0ff7de, 0x07fbfe, 0x07ed7a, 0x0ff7df, 0x0ff7f2, 0x03fdf4, ++ 0x00ff7c, 0x00fdae, 0x007ede, 0x00ff7e, 0x007ed2, 0x003f68, 0x003f6e, ++ 0x003ffa, 0x001fb6, 0x001ffe, 0x001fff, 0x000fde, 0x000ff6, 0x0007ec, ++ 0x0007fa, 0x0003fc, 0x0003fe, 0x0000fc, 0x00003e, 0x00000e, 0x000002, ++ 0x000000, 0x000006, 0x00001e, 0x0000fe, 0x0001fa, 0x0007fe, 0x0007ee, ++ 0x000fdf, 0x001ffc, 0x001fee, 0x003ffb, 0x003fde, 0x003f6a, 0x007ed6, ++ 0x007ed3, 0x00fdbe, 0x01fb7e, 0x03f6bc, 0x07fbf8, 0x03f6be, 0x03f6bf, ++ 0x03f6fe, 0x03f6ff, 0x0ff7f3, 0x07fbea, 0x0ff7f4, 0x0ff7f5, 0x0ff7f6, ++ 0x0ff7f7, 0x0ff7f8, 0x0ff7f9, 0x0ff7fa, 0x0ff7fb, 0x0ff7fe, 0x0ff7ff, ++ 0x07ed7b, ++}; ++ ++static const uint8_t aspx_hcb_env_balance_30_f0_bits[13] = { ++ 12, 11, 9, 7, 4, 3, 1, 2, 5, 6, 8, 10, 12, ++}; ++ ++static const uint16_t aspx_hcb_env_balance_30_f0_codes[13] = { ++ 0x000ffe, 0x0007fe, 0x0001fe, 0x00007e, 0x00000e, 0x000006, 0x000000, ++ 0x000002, 0x00001e, 0x00003e, 0x0000fe, 0x0003fe, 0x000fff, ++}; ++ ++static const uint8_t aspx_hcb_env_balance_30_df_bits[25] = { ++ 17, 17, 17, 15, 14, 12, 9, 9, 8, 7, 5, 2, 1, 3, 4, 7, ++ 8, 9, 10, 11, 14, 15, 16, 16, 17, ++}; ++ ++static const uint32_t aspx_hcb_env_balance_30_df_codes[25] = { ++ 0x01fffc, 0x01fffd, 0x01fffe, 0x007ffa, 0x003ffc, 0x000ffe, 0x0001fc, ++ 0x0001fe, 0x0000fa, 0x00007e, 0x00001e, 0x000002, 0x000000, 0x000006, ++ 0x00000e, 0x00007c, 0x0000fb, 0x0001fd, 0x0003fe, 0x0007fe, 0x003ffe, ++ 0x007ffb, 0x00fffc, 0x00fffd, 0x01ffff, ++}; ++ ++static const uint8_t aspx_hcb_env_balance_30_dt_bits[25] = { ++ 15, 15, 15, 14, 13, 13, 10, 8, 8, 7, 4, 2, 1, 3, 5, 7, ++ 9, 9, 9, 12, 14, 13, 14, 15, 14, ++}; ++ ++static const uint16_t aspx_hcb_env_balance_30_dt_codes[25] = { ++ 0x007ff2, 0x007ff3, 0x007ff6, 0x003ff6, 0x001ffe, 0x001fff, 0x0003fe, ++ 0x0000fa, 0x0000fe, 0x00007e, 0x00000e, 0x000002, 0x000000, 0x000006, ++ 0x00001e, 0x00007c, 0x0001fe, 0x0001f6, 0x0001f7, 0x000ffc, 0x003ffa, ++ 0x001ffa, 0x003ff7, 0x007ff7, 0x003ff8, ++}; ++ ++static const uint8_t aspx_hcb_noise_level_f0_bits[30] = { ++ 12, 12, 9, 7, 6, 6, 5, 1, 2, 3, 5, 7, 8, 8, 9, 10, ++ 11, 12, 12, 12, 13, 13, 14, 12, 14, 14, 11, 16, 16, 15, ++}; ++ ++static const uint16_t aspx_hcb_noise_level_f0_codes[30] = { ++ 0x000efe, 0x000eff, 0x0001fe, 0x000076, 0x00003a, 0x00003e, 0x00001e, ++ 0x000000, 0x000002, 0x000006, 0x00001c, 0x00007e, 0x0000fe, 0x0000ee, ++ 0x0001ff, 0x0003bc, 0x00077c, 0x000efa, 0x000ef4, 0x000ef6, 0x001df6, ++ 0x001dee, 0x003bee, 0x000ef5, 0x003bde, 0x003bef, 0x00077e, 0x00ef7e, ++ 0x00ef7f, 0x0077be, ++}; ++ ++static const uint8_t aspx_hcb_noise_level_df_bits[59] = { ++ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 13, 14, 16, 16, 15, 15, ++ 16, 15, 14, 14, 14, 14, 13, 11, 10, 9, 8, 7, 3, 1, 2, 4, ++ 5, 7, 8, 9, 10, 12, 13, 13, 14, 16, 17, 17, 17, 17, 17, 17, ++ 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, ++}; ++ ++static const uint32_t aspx_hcb_noise_level_df_codes[59] = { ++ 0x01f19e, 0x01f19f, 0x01f1dc, 0x01f1dd, 0x01f1de, 0x01f1df, 0x01f1e8, ++ 0x01f1e9, 0x01f1ea, 0x01f1eb, 0x001f1a, 0x003e7e, 0x00f9fe, 0x00f8f6, ++ 0x007c76, 0x007c66, 0x00f8f7, 0x007c7c, 0x003e3c, 0x003e36, 0x003e37, ++ 0x003e3a, 0x001f3e, 0x0007ce, 0x0003e6, 0x0001f0, 0x0000fa, 0x00007e, ++ 0x000006, 0x000000, 0x000002, 0x00000e, 0x00001e, 0x00007f, 0x0000fb, ++ 0x0001f2, 0x0003e2, 0x000f9e, 0x001f1c, 0x001f18, 0x003e32, 0x00f8fa, ++ 0x01f3fe, 0x01f1f6, 0x01f1f7, 0x01f1f8, 0x01f1f9, 0x01f1fa, 0x00f8ce, ++ 0x01f1fb, 0x01f1fc, 0x01f1fd, 0x01f1fe, 0x01f1ff, 0x01f3f8, 0x01f3f9, ++ 0x01f3fa, 0x01f3fb, 0x01f3ff, ++}; ++ ++static const uint8_t aspx_hcb_noise_level_dt_bits[59] = { ++ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, ++ 16, 16, 16, 16, 16, 14, 16, 13, 11, 8, 6, 4, 3, 1, 2, 5, ++ 7, 10, 10, 12, 13, 13, 14, 15, 14, 15, 15, 16, 16, 16, 16, 16, ++ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, ++}; ++ ++static const uint16_t aspx_hcb_noise_level_dt_codes[59] = { ++ 0x00ff70, 0x00ff71, 0x00ff72, 0x00ff73, 0x00ff74, 0x00ff75, 0x00ff76, ++ 0x00ff77, 0x00ff78, 0x00ff79, 0x00ff7a, 0x00ff7b, 0x00ff7c, 0x00ff7d, ++ 0x00ff7e, 0x00ff7f, 0x00ffe0, 0x00ffe1, 0x00ffe2, 0x00ffe3, 0x00ffe6, ++ 0x003fd6, 0x00ffe7, 0x001ffe, 0x0007fe, 0x0000fe, 0x00003e, 0x00000e, ++ 0x000006, 0x000000, 0x000002, 0x00001e, 0x00007e, 0x0003fe, 0x0003fc, ++ 0x000ff6, 0x001fe8, 0x001fea, 0x003fd2, 0x007fae, 0x003fd3, 0x007ff2, ++ 0x007faf, 0x00ffe8, 0x00ffe9, 0x00ffea, 0x00ffeb, 0x00ffec, 0x00ffed, ++ 0x00ffee, 0x00ffef, 0x00fff8, 0x00fff9, 0x00fffa, 0x00fffb, 0x00fffc, ++ 0x00fffd, 0x00fffe, 0x00ffff, ++}; ++ ++static const uint8_t aspx_hcb_noise_balance_f0_bits[13] = { ++ 10, 10, 9, 7, 5, 3, 2, 1, 4, 6, 9, 10, 10, ++}; ++ ++static const uint16_t aspx_hcb_noise_balance_f0_codes[13] = { ++ 0x0003fa, 0x0003fe, 0x0001fc, 0x00007e, 0x00001e, 0x000006, ++ 0x000002, 0x000000, 0x00000e, 0x00003e, 0x0001fe, 0x0003ff, ++ 0x0003fb, ++}; ++ ++static const uint8_t aspx_hcb_noise_balance_df_bits[25] = { ++ 13, 13, 13, 13, 13, 12, 13, 10, 9, 7, 4, 2, 1, 3, 5, 6, ++ 9, 11, 13, 13, 12, 12, 12, 12, 12, ++}; ++ ++static const uint16_t aspx_hcb_noise_balance_df_codes[25] = { ++ 0x001fd8, 0x001fd9, 0x001fda, 0x001fdb, 0x001fdc, 0x000fea, 0x001fdd, ++ 0x0003f8, 0x0001fe, 0x00007e, 0x00000e, 0x000002, 0x000000, 0x000006, ++ 0x00001e, 0x00003e, 0x0001ff, 0x0007f4, 0x001fde, 0x001fdf, 0x000fe4, ++ 0x000fe5, 0x000fe6, 0x000fe7, 0x000feb, ++}; ++ ++static const uint8_t aspx_hcb_noise_balance_dt_bits[25] = { ++ 11, 11, 11, 11, 11, 11, 11, 10, 10, 6, 4, 3, 1, 2, 5, 8, ++ 9, 11, 10, 11, 11, 11, 11, 11, 11, ++}; ++ ++static const uint16_t aspx_hcb_noise_balance_dt_codes[25] = { ++ 0x0007e6, 0x0007e7, 0x0007e8, 0x0007e9, 0x0007ea, 0x0007eb, 0x0007ec, ++ 0x0003fe, 0x0003ff, 0x00003e, 0x00000e, 0x000006, 0x000000, 0x000002, ++ 0x00001e, 0x0000fe, 0x0001f8, 0x0007ed, 0x0003f2, 0x0007ee, 0x0007ef, ++ 0x0007f8, 0x0007f9, 0x0007fa, 0x0007fb, ++}; ++ ++static uint8_t aspx_codebook_signal_off[2][2][3] = { ++ { ++ { 0, 70, 70, }, ++ { 36, 35, 35, }, ++ }, ++ { ++ { 25, 24, 24, }, ++ { 13, 12, 12, }, ++ }, ++}; ++ ++static uint8_t aspx_codebook_noise_off[2][3] = { ++ { 30, 29, 29, }, ++ { 13, 12, 12, }, ++}; ++ ++static const uint8_t scale_factors_bits[121] = { ++ 17, 14, 14, 14, 14, 14, 14, 15, 14, 14, 14, 14, 14, 14, 14, 14, ++ 13, 13, 14, 14, 14, 14, 14, 13, 14, 13, 13, 14, 14, 13, 14, 13, ++ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 11, ++ 11, 11, 10, 9, 9, 8, 8, 6, 6, 4, 4, 3, 1, 4, 4, 5, ++ 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 11, 11, 12, 11, 12, 12, ++ 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, ++ 14, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, ++ 14, 14, 14, 14, 14, 16, 14, 14, 17, ++}; ++ ++static const uint8_t scale_factors_codes[121] = { ++ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x01, ++ 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, ++ 0x19, 0x1a, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x1b, ++ 0x14, 0x1c, 0x1d, 0x15, 0x16, 0x1e, 0x17, 0x1f, ++ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, ++ 0x28, 0x29, 0x2a, 0x1b, 0x1c, 0x1d, 0x1e, 0x12, ++ 0x13, 0x14, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x03, ++ 0x04, 0x02, 0x03, 0x03, 0x01, 0x04, 0x05, 0x03, ++ 0x05, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0e, 0x0f, ++ 0x15, 0x16, 0x17, 0x18, 0x1f, 0x19, 0x20, 0x21, ++ 0x22, 0x23, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, ++ 0x31, 0x32, 0x33, 0x34, 0x18, 0x19, 0x1a, 0x1b, ++ 0x1c, 0x35, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, ++ 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, ++ 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x01, 0x30, 0x31, ++ 0x01, ++}; ++ ++static const uint16_t frame_len_base_48khz[] = { ++ 1920, 1920, 2048, 1536, 1536, 960, 960, 1024, ++ 768, 768, 512, 384, 384, 2048, 0, 0, ++}; ++ ++static const uint8_t frame_len_base_idx_48khz[] = { ++ 1, 1, 0, 2, 2, 4, 4, 3, 5, 5, 6, 7, 7, 0, 0, 0, ++}; ++ ++static const uint16_t transf_length_48khz_2048[] = { ++ 128, 256, 512, 1024, 2048, ++}; ++ ++static const uint16_t transf_length_48khz_1920[] = { ++ 120, 240, 480, 960, 1920, ++}; ++ ++static const uint16_t transf_length_48khz_1536[] = { ++ 96, 192, 384, 768, 1536, ++}; ++ ++static const uint16_t transf_length_48khz_1024[] = { ++ 128, 256, 512, 1024, 1024, ++}; ++ ++static const uint16_t transf_length_48khz_960[] = { ++ 120, 240, 480, 960, 960, ++}; ++ ++static const uint16_t transf_length_48khz_768[] = { ++ 96, 192, 384, 768, 768, ++}; ++ ++static const uint16_t transf_length_48khz_512[] = { ++ 128, 256, 512, 512, 512, ++}; ++ ++static const uint16_t transf_length_48khz_384[] = { ++ 96, 192, 384, 384, 384, ++}; ++ ++static const uint16_t *transf_length_48khz[8] = { ++ transf_length_48khz_2048, ++ transf_length_48khz_1920, ++ transf_length_48khz_1536, ++ transf_length_48khz_1024, ++ transf_length_48khz_960, ++ transf_length_48khz_768, ++ transf_length_48khz_512, ++ transf_length_48khz_384, ++}; ++ ++static const float kbd_window_alpha[8][5] = { ++ { 6.f, 5.f, 4.5f, 4.f, 3.0f }, ++ { 6.f, 5.f, 4.5f, 4.f, 3.0f }, ++ { 6.f, 5.f, 4.5f, 4.f, 3.0f }, ++ { 6.f, 5.f, 4.5f, 4.f, 4.0f }, ++ { 6.f, 5.f, 4.5f, 4.f, 4.0f }, ++ { 6.f, 5.f, 4.5f, 4.f, 4.0f }, ++ { 6.f, 5.f, 4.5f, 0.f, 4.5f }, ++ { 6.f, 5.f, 4.5f, 0.f, 4.5f }, ++}; ++ ++static const uint8_t n_grp_bits_a[4][4] = { ++ { 15, 10, 8, 7 }, ++ { 10, 7, 4, 3 }, ++ { 8, 4, 3, 1 }, ++ { 7, 3, 1, 1 }, ++}; ++ ++static const uint8_t n_grp_bits_b[4] = { ++ 7, 3, 1, 0, ++}; ++ ++static const uint8_t n_grp_bits_c[4] = { ++ 3, 1, 0, 0, ++}; ++ ++static const uint8_t channel_mode_bits[] = { ++ 1, 2, 4, 4, 4, 7, 7, 7, 7, 7, 7, 8, 8, 9, 9, 9, 9, ++}; ++ ++static const uint16_t channel_mode_codes[] = { ++ 0, 2, 12, 13, 14, 120, 121, 122, 123, 124, 125, 252, 253, 508, 509, 510, 511, ++}; ++ ++static const uint8_t bitrate_indicator_bits[] = { ++ 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ++}; ++ ++static const uint8_t bitrate_indicator_codes[] = { ++ 0, 2, 4, 6, 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31, ++}; ++ ++static const uint8_t asf_codebook_dim[] = { ++ 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, ++}; ++ ++static const uint8_t asf_codebook_unsigned[] = { ++ 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, ++}; ++ ++static const uint8_t asf_codebook_off[] = { ++ 1, 1, 0, 0, 4, 4, 0, 0, 0, 0, 0, ++}; ++ ++static const uint8_t asf_codebook_mod[] = { ++ 3, 3, 3, 3, 9, 9, 8, 8, 13, 13, 17, ++}; ++ ++static const uint8_t asf_codebook_1_bits[81] = { ++ 9, 9, 12, 10, 7, 10, 12, 10, 11, 10, 7, 10, 7, 5, 7, 9, ++ 7, 10, 11, 8, 9, 9, 7, 9, 11, 10, 12, 9, 7, 10, 7, 5, ++ 7, 9, 7, 10, 7, 5, 7, 5, 1, 5, 7, 5, 7, 10, 7, 8, ++ 7, 5, 7, 10, 7, 9, 12, 10, 11, 9, 7, 10, 9, 8, 12, 10, ++ 7, 9, 7, 5, 7, 9, 7, 10, 12, 10, 12, 10, 7, 10, 12, 9, ++ 9, ++}; ++ ++static const uint8_t asf_codebook_1_codes[81] = { ++ 0x0b, 0x0c, 0x00, 0x04, 0x08, 0x05, 0x01, 0x06, 0x04, 0x07, 0x09, 0x08, 0x0a, 0x08, 0x0b, 0x0d, ++ 0x0c, 0x09, 0x05, 0x0d, 0x0e, 0x0f, 0x0d, 0x10, 0x06, 0x0a, 0x02, 0x11, 0x0e, 0x0b, 0x0f, 0x09, ++ 0x10, 0x12, 0x11, 0x0c, 0x12, 0x0a, 0x13, 0x0b, 0x01, 0x0c, 0x14, 0x0d, 0x15, 0x0d, 0x16, 0x0e, ++ 0x17, 0x0e, 0x18, 0x0e, 0x19, 0x13, 0x03, 0x0f, 0x07, 0x14, 0x1a, 0x10, 0x15, 0x0f, 0x04, 0x11, ++ 0x1b, 0x16, 0x1c, 0x0f, 0x1d, 0x17, 0x1e, 0x12, 0x05, 0x13, 0x06, 0x14, 0x1f, 0x15, 0x07, 0x18, ++ 0x19, ++}; ++ ++static const uint8_t asf_codebook_2_bits[81] = { ++ 8, 7, 9, 8, 6, 8, 9, 8, 9, 8, 6, 7, 6, 5, 6, 7, ++ 6, 8, 9, 7, 7, 7, 6, 7, 9, 8, 9, 8, 6, 8, 6, 5, ++ 6, 7, 6, 8, 6, 5, 6, 5, 3, 5, 6, 5, 6, 8, 6, 7, ++ 6, 5, 6, 8, 6, 8, 9, 8, 9, 8, 6, 7, 7, 7, 9, 8, ++ 6, 7, 6, 4, 6, 8, 6, 8, 9, 8, 9, 8, 6, 8, 9, 7, ++ 8, ++}; ++ ++static const uint8_t asf_codebook_2_codes[81] = { ++ 0x06, 0x0e, 0x00, 0x07, 0x0e, 0x08, 0x01, 0x09, 0x02, 0x0a, 0x0f, 0x0f, 0x10, 0x13, 0x11, 0x10, ++ 0x12, 0x0b, 0x03, 0x11, 0x12, 0x13, 0x13, 0x14, 0x04, 0x0c, 0x05, 0x0d, 0x14, 0x0e, 0x15, 0x14, ++ 0x16, 0x15, 0x17, 0x0f, 0x18, 0x15, 0x19, 0x16, 0x07, 0x17, 0x1a, 0x18, 0x1b, 0x10, 0x1c, 0x16, ++ 0x1d, 0x19, 0x1e, 0x11, 0x1f, 0x12, 0x06, 0x13, 0x07, 0x14, 0x20, 0x17, 0x18, 0x19, 0x08, 0x15, ++ 0x21, 0x1a, 0x22, 0x0d, 0x23, 0x16, 0x24, 0x17, 0x09, 0x18, 0x0a, 0x19, 0x25, 0x1a, 0x0b, 0x1b, ++ 0x1b, ++}; ++ ++static const uint8_t asf_codebook_3_bits[81] = { ++ 1, 4, 9, 4, 5, 9, 10, 10, 12, 4, 6, 10, 5, 6, 9, 10, ++ 9, 11, 10, 11, 14, 10, 10, 12, 12, 11, 13, 4, 6, 11, 6, 7, ++ 10, 11, 10, 12, 5, 7, 11, 6, 6, 10, 10, 9, 11, 9, 10, 13, ++ 9, 9, 12, 11, 11, 12, 9, 10, 15, 10, 11, 15, 14, 13, 15, 9, ++ 10, 14, 9, 10, 13, 13, 12, 14, 11, 12, 15, 11, 11, 14, 13, 12, ++ 14, ++}; ++ ++static const uint8_t asf_codebook_3_codes[81] = { ++ 0x01, 0x04, 0x0d, 0x05, 0x05, 0x0e, 0x0a, 0x0b, 0x05, 0x06, 0x04, 0x0c, 0x06, 0x05, 0x0f, 0x0d, ++ 0x10, 0x07, 0x0e, 0x08, 0x02, 0x0f, 0x10, 0x06, 0x07, 0x09, 0x04, 0x07, 0x06, 0x0a, 0x07, 0x06, ++ 0x11, 0x0b, 0x12, 0x08, 0x07, 0x07, 0x0c, 0x08, 0x09, 0x13, 0x14, 0x11, 0x0d, 0x12, 0x15, 0x05, ++ 0x13, 0x14, 0x09, 0x0e, 0x0f, 0x0a, 0x15, 0x16, 0x00, 0x17, 0x10, 0x01, 0x03, 0x06, 0x02, 0x16, ++ 0x18, 0x04, 0x17, 0x19, 0x07, 0x08, 0x0b, 0x05, 0x11, 0x0c, 0x03, 0x12, 0x13, 0x06, 0x09, 0x0d, ++ 0x07, ++}; ++ ++static const uint8_t asf_codebook_4_bits[81] = { ++ 4, 4, 9, 5, 4, 8, 9, 8, 11, 4, 5, 8, 5, 4, 8, 8, ++ 8, 10, 9, 9, 11, 8, 8, 10, 11, 10, 12, 4, 5, 8, 5, 4, ++ 8, 9, 8, 10, 4, 4, 8, 4, 4, 7, 8, 7, 9, 8, 8, 11, ++ 8, 7, 10, 10, 9, 10, 9, 8, 11, 8, 8, 11, 11, 10, 12, 8, ++ 8, 11, 8, 7, 10, 10, 9, 11, 11, 10, 12, 10, 9, 11, 12, 10, ++ 11, ++}; ++ ++static const uint8_t asf_codebook_4_codes[81] = { ++ 0x05, 0x06, 0x0a, 0x05, 0x07, 0x0a, 0x0b, 0x0b, 0x02, 0x08, 0x06, 0x0c, 0x07, 0x09, 0x0d, 0x0e, ++ 0x0f, 0x07, 0x0c, 0x0d, 0x03, 0x10, 0x11, 0x08, 0x04, 0x09, 0x00, 0x0a, 0x08, 0x12, 0x09, 0x0b, ++ 0x13, 0x0e, 0x14, 0x0a, 0x0c, 0x0d, 0x15, 0x0e, 0x0f, 0x10, 0x16, 0x11, 0x0f, 0x17, 0x18, 0x05, ++ 0x19, 0x12, 0x0b, 0x0c, 0x10, 0x0d, 0x11, 0x1a, 0x06, 0x1b, 0x1c, 0x07, 0x08, 0x0e, 0x01, 0x1d, ++ 0x1e, 0x09, 0x1f, 0x13, 0x0f, 0x10, 0x12, 0x0a, 0x0b, 0x11, 0x02, 0x12, 0x13, 0x0c, 0x03, 0x13, ++ 0x0d, ++}; ++ ++static const uint8_t asf_codebook_5_bits[81] = { ++ 14, 13, 12, 12, 11, 11, 12, 13, 14, 13, 12, 10, 9, 9, 9, 10, ++ 11, 13, 13, 10, 9, 8, 7, 7, 9, 10, 12, 12, 9, 8, 5, 4, ++ 5, 7, 9, 12, 12, 9, 7, 4, 1, 4, 7, 9, 12, 11, 9, 7, ++ 5, 4, 5, 8, 9, 12, 12, 10, 9, 7, 7, 8, 9, 10, 12, 13, ++ 11, 10, 9, 9, 10, 10, 12, 13, 14, 12, 12, 12, 11, 11, 12, 13, ++ 14, ++}; ++ ++static const uint8_t asf_codebook_5_codes[81] = { ++ 0x00, 0x02, 0x05, 0x06, 0x0b, 0x0c, 0x07, 0x03, 0x01, 0x04, 0x08, 0x09, 0x09, 0x0a, 0x0b, 0x0a, ++ 0x0d, 0x05, 0x06, 0x0b, 0x0c, 0x0c, 0x08, 0x09, 0x0d, 0x0c, 0x09, 0x0a, 0x0e, 0x0d, 0x04, 0x04, ++ 0x05, 0x0a, 0x0f, 0x0b, 0x0c, 0x10, 0x0b, 0x05, 0x01, 0x06, 0x0c, 0x11, 0x0d, 0x0e, 0x12, 0x0d, ++ 0x06, 0x07, 0x07, 0x0e, 0x13, 0x0e, 0x0f, 0x0d, 0x14, 0x0e, 0x0f, 0x0f, 0x15, 0x0e, 0x10, 0x07, ++ 0x0f, 0x0f, 0x16, 0x17, 0x10, 0x11, 0x11, 0x08, 0x02, 0x12, 0x13, 0x14, 0x10, 0x11, 0x15, 0x09, ++ 0x03, ++}; ++ ++static const uint8_t asf_codebook_6_bits[81] = { ++ 12, 11, 10, 10, 10, 10, 10, 11, 12, 11, 9, 8, 8, 8, 8, 8, ++ 9, 11, 10, 8, 6, 6, 6, 5, 6, 8, 10, 10, 8, 6, 4, 4, ++ 4, 5, 8, 10, 10, 8, 6, 4, 4, 4, 6, 8, 10, 10, 8, 5, ++ 4, 4, 4, 6, 8, 10, 10, 8, 6, 5, 6, 5, 6, 8, 11, 11, ++ 9, 8, 8, 8, 8, 8, 9, 11, 12, 11, 10, 10, 11, 10, 10, 11, ++ 12, ++}; ++ ++static const uint8_t asf_codebook_6_codes[81] = { ++ 0x00, 0x02, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x03, 0x01, 0x04, 0x0c, 0x08, 0x09, 0x0a, 0x0b, 0x0c, ++ 0x0d, 0x05, 0x0b, 0x0d, 0x07, 0x08, 0x09, 0x09, 0x0a, 0x0e, 0x0c, 0x0d, 0x0f, 0x0b, 0x07, 0x08, ++ 0x09, 0x0a, 0x10, 0x0e, 0x0f, 0x11, 0x0c, 0x0a, 0x0b, 0x0c, 0x0d, 0x12, 0x10, 0x11, 0x13, 0x0b, ++ 0x0d, 0x0e, 0x0f, 0x0e, 0x14, 0x12, 0x13, 0x15, 0x0f, 0x0c, 0x10, 0x0d, 0x11, 0x16, 0x06, 0x07, ++ 0x0e, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x0f, 0x08, 0x02, 0x09, 0x14, 0x15, 0x0a, 0x16, 0x17, 0x0b, ++ 0x03, ++}; ++ ++static const uint8_t asf_codebook_7_bits[64] = { ++ 1, 3, 6, 8, 9, 9, 10, 11, 3, 4, 6, 7, 8, 8, 9, 10, ++ 6, 5, 7, 8, 8, 9, 10, 11, 8, 7, 8, 8, 9, 9, 10, 11, ++ 9, 8, 8, 9, 10, 10, 10, 11, 9, 8, 9, 9, 10, 10, 11, 12, ++ 10, 9, 10, 10, 10, 11, 12, 12, 12, 10, 11, 11, 12, 12, 12, 12, ++}; ++ ++static const uint8_t asf_codebook_7_codes[64] = { ++ 0x01, 0x02, 0x07, 0x0b, 0x0a, 0x0b, 0x06, 0x04, 0x03, 0x03, 0x08, 0x0b, 0x0c, 0x0d, 0x0c, 0x07, ++ 0x09, 0x05, 0x0c, 0x0e, 0x0f, 0x0d, 0x08, 0x05, 0x10, 0x0d, 0x11, 0x12, 0x0e, 0x0f, 0x09, 0x06, ++ 0x10, 0x13, 0x14, 0x11, 0x0a, 0x0b, 0x0c, 0x07, 0x12, 0x15, 0x13, 0x14, 0x0d, 0x0e, 0x08, 0x00, ++ 0x0f, 0x15, 0x10, 0x11, 0x12, 0x09, 0x01, 0x02, 0x03, 0x13, 0x0a, 0x0b, 0x04, 0x05, 0x06, 0x07, ++}; ++ ++static const uint8_t asf_codebook_8_bits[64] = { ++ 4, 4, 5, 6, 7, 8, 9, 10, 4, 3, 4, 5, 6, 7, 8, 9, ++ 5, 4, 4, 5, 6, 7, 8, 9, 6, 5, 5, 5, 6, 7, 8, 9, ++ 7, 6, 6, 6, 7, 7, 9, 10, 8, 7, 7, 7, 7, 8, 9, 10, ++ 9, 8, 8, 8, 8, 9, 9, 10, 11, 9, 9, 9, 10, 10, 10, 11, ++}; ++ ++static const uint8_t asf_codebook_8_codes[64] = { ++ 0x08, 0x09, 0x09, 0x0a, 0x09, 0x08, 0x04, 0x01, 0x0a, 0x07, 0x0b, 0x0a, 0x0b, 0x0a, 0x09, 0x05, ++ 0x0b, 0x0c, 0x0d, 0x0c, 0x0c, 0x0b, 0x0a, 0x06, 0x0d, 0x0d, 0x0e, 0x0f, 0x0e, 0x0c, 0x0b, 0x07, ++ 0x0d, 0x0f, 0x10, 0x11, 0x0e, 0x0f, 0x08, 0x02, 0x0c, 0x10, 0x11, 0x12, 0x13, 0x0d, 0x09, 0x03, ++ 0x0a, 0x0e, 0x0f, 0x10, 0x11, 0x0b, 0x0c, 0x04, 0x00, 0x0d, 0x0e, 0x0f, 0x05, 0x06, 0x07, 0x01, ++}; ++ ++static const uint8_t asf_codebook_9_bits[169] = { ++ 1, 3, 6, 8, 9, 10, 10, 11, 11, 12, 12, 12, 13, 3, 4, 6, ++ 7, 8, 9, 9, 10, 10, 11, 11, 11, 12, 6, 6, 7, 8, 9, 9, ++ 10, 10, 10, 11, 11, 12, 12, 8, 7, 8, 9, 9, 10, 10, 11, 11, ++ 11, 12, 12, 12, 9, 8, 9, 9, 10, 10, 11, 11, 11, 12, 12, 12, ++ 13, 10, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 10, 9, ++ 10, 10, 11, 11, 11, 12, 12, 12, 12, 13, 13, 11, 10, 10, 11, 11, ++ 12, 12, 12, 12, 12, 13, 13, 13, 11, 10, 10, 11, 11, 11, 12, 12, ++ 12, 13, 13, 13, 14, 12, 11, 11, 11, 11, 12, 12, 12, 13, 13, 13, ++ 14, 14, 12, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 12, ++ 11, 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, 14, 13, 12, 12, 12, ++ 13, 13, 13, 13, 14, 14, 14, 14, 14, ++}; ++ ++static const uint8_t asf_codebook_9_codes[169] = { ++ 0x01, 0x02, 0x08, 0x14, 0x1b, 0x20, 0x21, 0x1e, 0x1f, 0x11, 0x12, 0x13, 0x06, 0x03, 0x03, 0x09, ++ 0x0d, 0x15, 0x1c, 0x1d, 0x22, 0x23, 0x20, 0x21, 0x22, 0x14, 0x0a, 0x0b, 0x0e, 0x16, 0x1e, 0x1f, ++ 0x24, 0x25, 0x26, 0x23, 0x24, 0x15, 0x16, 0x17, 0x0f, 0x18, 0x20, 0x21, 0x27, 0x28, 0x25, 0x26, ++ 0x27, 0x17, 0x18, 0x19, 0x22, 0x19, 0x23, 0x24, 0x29, 0x2a, 0x28, 0x29, 0x2a, 0x1a, 0x1b, 0x1c, ++ 0x07, 0x2b, 0x25, 0x26, 0x2c, 0x2d, 0x2e, 0x2b, 0x2c, 0x2d, 0x1d, 0x1e, 0x1f, 0x08, 0x2f, 0x27, ++ 0x30, 0x31, 0x2e, 0x2f, 0x30, 0x20, 0x21, 0x22, 0x23, 0x09, 0x0a, 0x31, 0x32, 0x33, 0x32, 0x33, ++ 0x24, 0x25, 0x26, 0x27, 0x28, 0x0b, 0x0c, 0x0d, 0x34, 0x34, 0x35, 0x35, 0x36, 0x37, 0x29, 0x2a, ++ 0x2b, 0x0e, 0x0f, 0x10, 0x00, 0x2c, 0x38, 0x39, 0x3a, 0x3b, 0x2d, 0x2e, 0x2f, 0x11, 0x12, 0x13, ++ 0x01, 0x02, 0x30, 0x3c, 0x3d, 0x31, 0x32, 0x33, 0x34, 0x14, 0x15, 0x16, 0x17, 0x03, 0x04, 0x35, ++ 0x3e, 0x3f, 0x36, 0x37, 0x38, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x05, 0x06, 0x1d, 0x39, 0x3a, 0x3b, ++ 0x1e, 0x1f, 0x20, 0x21, 0x07, 0x08, 0x09, 0x0a, 0x0b, ++}; ++ ++static const uint8_t asf_codebook_10_bits[169] = { ++ 6, 5, 6, 6, 7, 8, 9, 10, 10, 11, 11, 12, 12, 5, 4, 4, ++ 5, 6, 7, 7, 8, 9, 10, 10, 10, 11, 6, 4, 4, 5, 6, 6, ++ 7, 8, 9, 9, 10, 10, 11, 6, 5, 5, 5, 6, 7, 7, 8, 8, ++ 9, 10, 10, 11, 7, 6, 6, 6, 6, 7, 7, 8, 9, 9, 10, 10, ++ 11, 8, 7, 6, 7, 7, 7, 8, 8, 9, 9, 10, 10, 11, 9, 7, ++ 7, 7, 7, 8, 8, 9, 9, 10, 10, 10, 12, 10, 8, 8, 8, 8, ++ 8, 9, 9, 9, 10, 10, 11, 12, 10, 8, 8, 8, 8, 9, 9, 9, ++ 10, 10, 10, 11, 12, 11, 9, 9, 9, 9, 9, 10, 10, 10, 10, 11, ++ 11, 13, 11, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 13, 12, ++ 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 12, 12, 12, 11, 11, 11, ++ 11, 11, 11, 11, 12, 12, 13, 12, 13, ++}; ++ ++static const uint8_t asf_codebook_10_codes[169] = { ++ 0x14, 0x11, 0x15, 0x16, 0x17, 0x1a, 0x1d, 0x11, 0x12, 0x07, 0x08, 0x02, 0x03, 0x12, 0x0c, 0x0d, ++ 0x13, 0x17, 0x18, 0x19, 0x1b, 0x1e, 0x13, 0x14, 0x15, 0x09, 0x18, 0x0e, 0x0f, 0x14, 0x19, 0x1a, ++ 0x1a, 0x1c, 0x1f, 0x20, 0x16, 0x17, 0x0a, 0x1b, 0x15, 0x16, 0x17, 0x1c, 0x1b, 0x1c, 0x1d, 0x1e, ++ 0x21, 0x18, 0x19, 0x0b, 0x1d, 0x1d, 0x1e, 0x1f, 0x20, 0x1e, 0x1f, 0x1f, 0x22, 0x23, 0x1a, 0x1b, ++ 0x0c, 0x20, 0x20, 0x21, 0x21, 0x22, 0x23, 0x21, 0x22, 0x24, 0x25, 0x1c, 0x1d, 0x0d, 0x26, 0x24, ++ 0x25, 0x26, 0x27, 0x23, 0x24, 0x27, 0x28, 0x1e, 0x1f, 0x20, 0x04, 0x21, 0x25, 0x26, 0x27, 0x28, ++ 0x29, 0x29, 0x2a, 0x2b, 0x22, 0x23, 0x0e, 0x05, 0x24, 0x2a, 0x2b, 0x2c, 0x2d, 0x2c, 0x2d, 0x2e, ++ 0x25, 0x26, 0x27, 0x0f, 0x06, 0x10, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x28, 0x29, 0x2a, 0x2b, 0x11, ++ 0x12, 0x00, 0x13, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x14, 0x15, 0x16, 0x17, 0x01, 0x07, ++ 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x18, 0x19, 0x1a, 0x08, 0x09, 0x0a, 0x1b, 0x1c, 0x1d, ++ 0x1e, 0x1f, 0x20, 0x21, 0x0b, 0x0c, 0x02, 0x0d, 0x03, ++}; ++ ++static const uint8_t asf_codebook_11_bits[289] = { ++ 3, 4, 6, 7, 8, 9, 9, 10, 10, 11, 11, 11, 12, 12, 12, 12, ++ 10, 4, 4, 5, 6, 7, 8, 8, 9, 9, 9, 9, 10, 10, 10, 11, ++ 11, 9, 6, 5, 6, 6, 7, 7, 8, 8, 9, 9, 9, 9, 10, 10, ++ 10, 11, 8, 7, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 9, 10, ++ 10, 10, 11, 8, 8, 7, 7, 7, 7, 8, 8, 9, 9, 9, 9, 10, ++ 10, 10, 10, 11, 8, 9, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, ++ 10, 10, 10, 11, 11, 8, 9, 8, 8, 8, 8, 8, 9, 9, 9, 10, ++ 10, 10, 10, 10, 11, 11, 8, 10, 9, 8, 8, 8, 9, 9, 9, 9, ++ 10, 10, 10, 11, 11, 11, 11, 9, 10, 9, 9, 9, 9, 9, 9, 9, ++ 10, 10, 10, 10, 11, 11, 11, 11, 9, 11, 9, 9, 9, 9, 9, 10, ++ 10, 10, 10, 10, 10, 11, 11, 11, 12, 9, 11, 9, 9, 9, 9, 10, ++ 10, 10, 10, 10, 10, 11, 11, 11, 11, 12, 9, 11, 10, 9, 9, 9, ++ 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 12, 9, 12, 10, 10, 10, ++ 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 12, 12, 9, 11, 10, 10, ++ 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 12, 12, 12, 9, 12, 11, ++ 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 9, 12, ++ 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 9, ++ 10, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, ++ 5, ++}; ++ ++static const uint8_t asf_codebook_11_codes[289] = { ++ 0x07, 0x0b, 0x1f, 0x2f, 0x3d, 0x36, 0x37, 0x26, 0x27, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, ++ 0x28, 0x0c, 0x0d, 0x13, 0x20, 0x30, 0x3e, 0x3f, 0x38, 0x39, 0x3a, 0x3b, 0x29, 0x2a, 0x2b, 0x10, ++ 0x11, 0x3c, 0x21, 0x14, 0x22, 0x23, 0x31, 0x32, 0x40, 0x41, 0x3d, 0x3e, 0x3f, 0x40, 0x2c, 0x2d, ++ 0x2e, 0x12, 0x42, 0x33, 0x24, 0x25, 0x34, 0x35, 0x36, 0x43, 0x44, 0x41, 0x42, 0x43, 0x44, 0x2f, ++ 0x30, 0x31, 0x13, 0x45, 0x46, 0x37, 0x38, 0x39, 0x3a, 0x47, 0x48, 0x45, 0x46, 0x47, 0x48, 0x32, ++ 0x33, 0x34, 0x35, 0x14, 0x49, 0x49, 0x3b, 0x3c, 0x3d, 0x4a, 0x4b, 0x4c, 0x4a, 0x4b, 0x4c, 0x36, ++ 0x37, 0x38, 0x39, 0x15, 0x16, 0x4d, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x4e, 0x4f, 0x50, 0x3a, ++ 0x3b, 0x3c, 0x3d, 0x3e, 0x17, 0x18, 0x53, 0x3f, 0x51, 0x54, 0x55, 0x56, 0x52, 0x53, 0x54, 0x55, ++ 0x40, 0x41, 0x42, 0x19, 0x1a, 0x1b, 0x1c, 0x56, 0x43, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, ++ 0x44, 0x45, 0x46, 0x47, 0x1d, 0x1e, 0x1f, 0x20, 0x5e, 0x21, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x48, ++ 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x22, 0x23, 0x24, 0x04, 0x64, 0x25, 0x65, 0x66, 0x67, 0x68, 0x4e, ++ 0x4f, 0x50, 0x51, 0x52, 0x53, 0x26, 0x27, 0x28, 0x29, 0x05, 0x69, 0x2a, 0x54, 0x6a, 0x6b, 0x6c, ++ 0x55, 0x56, 0x57, 0x58, 0x59, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x06, 0x6d, 0x07, 0x5a, 0x5b, 0x5c, ++ 0x5d, 0x5e, 0x5f, 0x60, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x08, 0x09, 0x6e, 0x36, 0x61, 0x62, ++ 0x63, 0x64, 0x65, 0x66, 0x67, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x0a, 0x0b, 0x0c, 0x6f, 0x0d, 0x3c, ++ 0x68, 0x69, 0x6a, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x0e, 0x0f, 0x10, 0x11, 0x70, 0x12, ++ 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x71, ++ 0x6b, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, ++ 0x15, ++}; ++ ++static const uint16_t sfb_offset_48khz_2048[] = { ++ 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, ++ 44, 52, 60, 68, 76, 84, 92, 100, 108, 116, ++ 124, 136, 148, 160, 172, 188, 204, 220, 240, ++ 260, 284, 308, 336, 364, 396, 432, 468, 508, ++ 552, 600, 652, 704, 768, 832, 896, 960, 1024, ++ 1088, 1152, 1216, 1280, 1344, 1408, 1472, 1536, ++ 1600, 1664, 1728, 1792, 1856, 1920, 1984, 2048, ++ 2176, 2304, 2432, 2560, 2688, 2816, 2944, 3072, ++ 3200, 3328, 3456, 3584, 3712, 3840, 3968, 4096, ++ 4224, 4352, 4480, 4608, 4736, 4864, 4992, 5120, ++ 5248, 5376, 5504, 5632, 5760, 5888, 6016, 6144, ++ 6272, 6400, 6528, 6656, 6784, 6912, 7040, 7168, ++ 7296, 7424, 7552, 7680, 7808, 7936, 8064, 8192, ++}; ++ ++static const uint16_t sfb_offset_48khz_1920[] = { ++ 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 52, 60, 68, ++ 76, 84, 92, 100, 108, 116, 124, 136, 148, 160, 172, 188, ++ 204, 220, 240, 260, 284, 308, 336, 364, 396, 432, 468, ++ 508, 552, 600, 652, 704, 768, 832, 896, 960, 1024, 1088, ++ 1152, 1216, 1280, 1344, 1408, 1472, 1536, 1600, 1664, 1728, ++ 1792, 1856, 1920, 2048, 2176, 2304, 2432, 2560, 2688, 2816, ++ 2944, 3072, 3200, 3328, 3456, 3584, 3712, 3840, 3968, 4096, ++ 4224, 4352, 4480, 4608, 4736, 4864, 4992, 5120, 5248, 5376, ++ 5504, 5632, 5760, 5888, 6016, 6144, 6272, 6400, 6528, 6656, ++ 6784, 6912, 7040, 7168, 7296, 7424, 7552, 7680, ++}; ++ ++static const uint16_t sfb_offset_48khz_1536[] = { ++ 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, ++ 44, 52, 60, 68, 76, 84, 92, 100, 108, 116, ++ 124, 136, 148, 160, 172, 188, 204, 220, 240, ++ 260, 284, 308, 336, 364, 396, 432, 468, 508, ++ 552, 600, 652, 704, 768, 832, 896, 960, 1024, ++ 1088, 1152, 1216, 1280, 1344, 1408, 1472, 1536, ++ 1664, 1792, 1920, 2048, 2176, 2304, 2432, 2560, ++ 2688, 2816, 2944, 3072, 3200, 3328, 3456, 3584, ++ 3712, 3840, 3968, 4096, 4224, 4352, 4480, 4608, ++ 4736, 4864, 4992, 5120, 5248, 5376, 5504, 5632, ++ 5760, 5888, 6016, 6144, ++}; ++ ++static const uint16_t sfb_offset_48khz_1024[] = { ++ 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 48, ++ 56, 64, 72, 80, 88, 96, 108, 120, 132, 144, ++ 160, 176, 196, 216, 240, 264, 292, 320, 352, ++ 384, 416, 448, 480, 512, 544, 576, 608, 640, ++ 672, 704, 736, 768, 800, 832, 864, 896, 928, ++ 1024, 1152, 1280, 1408, 1536, 1664, 1792, 1920, ++ 2048, 2176, 2304, 2432, 2560, 2688, 2816, 2944, ++ 3072, 3200, 3328, 3456, 3584, 3712, 3840, 3968, ++ 4096, ++}; ++ ++static const uint16_t sfb_offset_48khz_960[] = { ++ 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 48, 56, 64, 72, ++ 80, 88, 96, 108, 120, 132, 144, 160, 176, 196, 216, 240, ++ 264, 292, 320, 352, 384, 416, 448, 480, 512, 544, ++ 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, ++ 928, 960, 1024, 1152, 1280, 1408, 1536, 1664, 1792, 1920, ++ 2048, 2176, 2304, 2432, 2560, 2688, 2816, 2944, 3072, ++ 3200, 3328, 3456, 3584, 3712, 3840, ++}; ++ ++static const uint16_t sfb_offset_48khz_768[] = { ++ 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 48, ++ 56, 64, 72, 80, 88, 96, 108, 120, 132, 144, ++ 160, 176, 196, 216, 240, 264, 292, 320, 352, ++ 384, 416, 448, 480, 512, 544, 576, 608, 640, ++ 672, 704, 736, 768, 896, 1024, 1152, 1280, 1408, ++ 1536, 1664, 1792, 1920, 2048, 2176, 2304, 2432, ++ 2560, 2688, 2816, 2944, 3072, ++}; ++ ++static const uint16_t sfb_offset_48khz_512[] = { ++ 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, ++ 56, 60, 68, 76, 84, 92, 100, 112, 124, 136, 148, 164, ++ 184, 208, 236, 268, 300, 332, 364, 396, 428, 460, 512, ++ 576, 640, 704, 768, 832, 896, 960, 1024, 1088, 1152, ++ 1216, 1280, 1344, 1408, 1472, 1536, 1600, 1664, 1728, ++ 1792, 1856, 1920, 1984, 2048, ++}; ++ ++static const uint16_t sfb_offset_48khz_480[] = { ++ 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, ++ 60, 68, 76, 84, 92, 100, 112, 124, 136, 148, 164, 184, ++ 208, 236, 268, 300, 332, 364, 396, 428, 460, 480, 512, ++ 576, 640, 704, 768, 832, 896, 960, 1024, 1088, 1152, ++ 1216, 1280, 1344, 1408, 1472, 1536, 1600, 1664, 1728, ++ 1792, 1856, 1920, ++}; ++ ++static const uint16_t sfb_offset_48khz_384[] = { ++ 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, ++ 56, 60, 68, 76, 84, 92, 100, 112, 124, 136, 148, 164, ++ 184, 208, 236, 268, 300, 332, 364, 384, 448, 512, 576, ++ 640, 704, 768, 832, 896, 960, 1024, 1088, 1152, ++ 1216, 1280, 1344, 1408, 1472, 1536, ++}; ++ ++static const uint16_t sfb_offset_48khz_256[] = { ++ 0, 4, 8, 12, 16, 20, 24, 28, 36, 44, 52, 64, 76, ++ 92, 108, 128, 148, 172, 196, 224, 256, 288, 320, ++ 352, 384, 416, 448, 480, 512, 576, 640, 704, 768, ++ 832, 896, 960, 1024, ++}; ++ ++static const uint16_t sfb_offset_48khz_240[] = { ++ 0, 4, 8, 12, 16, 20, 24, 28, 36, 44, 52, 64, 76, 92, 108, ++ 128, 148, 172, 196, 224, 240, 256, 288, 320, 352, 384, 416, ++ 448, 480, 512, 576, 640, 704, 768, 832, 896, 960, ++}; ++ ++static const uint16_t sfb_offset_48khz_192[] = { ++ 0, 4, 8, 12, 16, 20, 24, 28, 36, 44, 52, 64, 76, ++ 92, 108, 128, 148, 172, 192, 224, 256, 288, 320, ++ 352, 384, 448, 512, 576, 640, 704, 768, ++}; ++ ++static const uint16_t sfb_offset_48khz_128[] = { ++ 0, 4, 8, 12, 16, 20, 28, 36, 44, 56, 68, 80, 96, 112, ++ 128, 144, 160, 176, 192, 208, 224, 240, 256, 288, 320, ++ 352, 384, 416, 448, 480, 512, ++}; ++ ++static const uint16_t sfb_offset_48khz_120[] = { ++ 0, 4, 8, 12, 16, 20, 28, 36, 44, 56, 68, 80, 96, 112, ++ 120, 128, 144, 160, 176, 192, 208, 224, 240, 256, 288, ++ 320, 352, 384, 416, 448, 480, ++}; ++ ++static const uint16_t sfb_offset_48khz_96[] = { ++ 0, 4, 8, 12, 16, 20, 28, 36, 44, 56, 68, 80, 96, 112, ++ 128, 144, 160, 176, 192, 224, 256, 288, 320, 352, 384, ++}; ++ ++static const uint8_t snf_bits[22] = { ++ 4, 7, 8, 8, 7, 6, 6, 6, 6, 5, 5, 4, 4, 3, 3, 3, 3, 4, 4, 6, 7, 6, ++}; ++ ++static const uint8_t snf_codes[22] = { ++ 0x03, 0x01, 0x00, 0x01, 0x02, 0x02, 0x03, 0x04, ++ 0x05, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x06, ++ 0x07, 0x06, 0x07, 0x06, 0x03, 0x07, ++}; ++ ++static const uint8_t aspx_int_class_bits[4] = { ++ 1, 2, 3, 3, ++}; ++ ++static const uint8_t aspx_int_class_codes[4] = { ++ 0, 2, 6, 7, ++}; ++ ++static const uint8_t sbg_template_lowres[] = { ++ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ++ 22, 24, 26, 28, 30, 32, 35, 38, 42, 46, ++}; ++ ++static const uint8_t sbg_template_highres[] = { ++ 18, 19, 20, 21, 22, 23, 24, 26, 28, 30, 32, 34, ++ 36, 38, 40, 42, 44, 47, 50, 53, 56, 59, 62, ++}; ++ ++static const int tab_border[5][3][5] = { ++ { ++ { 0, 6, 0, 0, 0, }, ++ { 0, 3, 6, 0, 0, }, ++ { 0, 2, 3, 4, 6, }, ++ }, ++ { ++ { 0, 8, 0, 0, 0, }, ++ { 0, 4, 8, 0, 0, }, ++ { 0, 2, 4, 6, 8, }, ++ }, ++ { ++ { 0,12, 0, 0, 0, }, ++ { 0, 6,12, 0, 0, }, ++ { 0, 3, 6, 9,12, }, ++ }, ++ { ++ { 0,15, 0, 0, 0, }, ++ { 0, 8,15, 0, 0, }, ++ { 0, 4, 8,12,15, }, ++ }, ++ { ++ { 0,16, 0, 0, 0, }, ++ { 0, 8,16, 0, 0, }, ++ { 0, 4, 8,12,16, }, ++ }, ++}; ++ ++static const uint8_t qmf_subbands[][4] = { ++ { 0, 0, 0, 0 }, ++ { 1, 1, 1, 1 }, ++ { 2, 2, 2, 2 }, ++ { 3, 3, 3, 2 }, ++ { 4, 4, 3, 3 }, ++ { 5, 4, 4, 3 }, ++ { 6, 5, 4, 3 }, ++ { 7, 5, 5, 3 }, ++ { 8, 6, 5, 4 }, ++ { 9, 6, 6, 4 }, ++ {10, 7, 6, 4 }, ++ {11, 8, 7, 5 }, ++ {12, 9, 7, 5 }, ++ {13,10, 8, 6 }, ++ {14,11, 8, 6 }, ++}; ++ ++static const uint8_t acpl_num_param_bands[] = { ++ 15, 12, 9, 7, ++}; ++ ++static const uint8_t acpl_hcb_alpha_coarse_f0_bits[17] = { ++ 10, 10, 9, 8, 6, 6, 5, 2, 1, 3, 5, 7, 7, 8, 9, 10, ++ 10, ++}; ++ ++static const uint16_t acpl_hcb_alpha_coarse_f0_codes[17] = { ++ 0x0003be, 0x0003fe, 0x0001fe, 0x0000fe, 0x00003e, 0x00003a, ++ 0x00001e, 0x000002, 0x000000, 0x000006, 0x00001c, 0x00007e, ++ 0x000076, 0x0000ee, 0x0001de, 0x0003ff, 0x0003bf, ++}; ++ ++static const uint8_t acpl_hcb_alpha_fine_f0_bits[33] = { ++ 10, 12, 11, 11, 10, 10, 9, 8, 7, 7, 8, 7, 6, 6, 4, 3, ++ 1, 3, 4, 6, 6, 7, 8, 8, 9, 9, 10, 10, 10, 10, 11, 12, ++ 10, ++}; ++ ++static const uint16_t acpl_hcb_alpha_fine_f0_codes[33] = { ++ 0x0002ce, 0x000b5e, 0x0004fe, 0x0005ae, 0x00027e, 0x0002de, ++ 0x00016a, 0x0000b2, 0x00004a, 0x00004b, 0x0000b6, 0x00004e, ++ 0x000024, 0x00002e, 0x00000a, 0x000006, 0x000000, 0x000007, ++ 0x000008, 0x00002f, 0x000026, 0x000058, 0x0000b4, 0x00009e, ++ 0x00016e, 0x000166, 0x0002df, 0x0002cf, 0x00027c, 0x00027d, ++ 0x0004ff, 0x000b5f, 0x0002d6, ++}; ++ ++static const uint8_t acpl_hcb_alpha_coarse_df_bits[33] = { ++ 15, 18, 17, 17, 16, 15, 15, 13, 12, 11, 10, 9, 8, 7, 4, 3, ++ 1, 2, 5, 7, 8, 9, 10, 11, 12, 13, 15, 16, 16, 17, 16, 18, ++ 15, ++}; ++ ++static const uint32_t acpl_hcb_alpha_coarse_df_codes[33] = { ++ 0x007c76, 0x03e3fe, 0x01f1f6, 0x01f1f7, 0x00f8ea, 0x007c74, 0x007c7c, ++ 0x001f1c, 0x000f9e, 0x0007ce, 0x0003e2, 0x0001f0, 0x0000fa, 0x00007e, ++ 0x00000e, 0x000006, 0x000000, 0x000002, 0x00001e, 0x00007f, 0x0000fb, ++ 0x0001f2, 0x0003e6, 0x0007c6, 0x000f9f, 0x001f1e, 0x007c7e, 0x00f8fe, ++ 0x00f8fa, 0x01f1fe, 0x00f8eb, 0x03e3ff, 0x007c77, ++}; ++ ++static const uint8_t acpl_hcb_alpha_fine_df_bits[65] = { ++ 13, 17, 17, 17, 16, 17, 17, 17, 17, 16, 16, 16, 15, 15, 14, 13, ++ 13, 12, 12, 11, 11, 11, 10, 10, 10, 9, 8, 7, 7, 5, 4, 3, ++ 1, 3, 4, 5, 6, 7, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, ++ 13, 13, 14, 15, 15, 16, 16, 17, 16, 16, 17, 16, 16, 17, 17, 17, ++ 13, ++}; ++ ++static const uint32_t acpl_hcb_alpha_fine_df_codes[65] = { ++ 0x0011de, 0x011ffe, 0x013dea, 0x013df6, 0x008eea, 0x013df7, 0x013dee, ++ 0x013deb, 0x013dec, 0x008eee, 0x008ffe, 0x009efe, 0x0047fe, 0x004f7c, ++ 0x0023fe, 0x0011fe, 0x0013fe, 0x0008f6, 0x0009ee, 0x000476, 0x00047a, ++ 0x0004f6, 0x00023a, 0x00027a, 0x00027e, 0x00013e, 0x00009a, 0x00004c, ++ 0x00004e, 0x000012, 0x00000a, 0x000006, 0x000000, 0x000007, 0x00000b, ++ 0x000010, 0x000022, 0x000046, 0x00009b, 0x00013c, 0x00011c, 0x00023e, ++ 0x00023c, 0x0004fe, 0x00047e, 0x0009fe, 0x0008fe, 0x0008f7, 0x0013ff, ++ 0x0011df, 0x0027bc, 0x004f7e, 0x004776, 0x009efa, 0x009ef4, 0x013dfe, ++ 0x008eeb, 0x008ee8, 0x013dff, 0x008ee9, 0x008eef, 0x011fff, 0x013ded, ++ 0x013def, 0x0011dc, ++}; ++ ++static const uint8_t acpl_hcb_alpha_coarse_dt_bits[33] = { ++ 14, 16, 15, 16, 15, 15, 14, 13, 12, 12, 10, 9, 8, 7, 5, 3, ++ 1, 2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 16, 15, 16, ++ 14, ++}; ++ ++static const uint32_t acpl_hcb_alpha_coarse_dt_codes[33] = { ++ 0x003efc, 0x00fbfa, 0x007ddc, 0x00fbfe, 0x007dde, 0x007dfc, 0x003ef6, ++ 0x001f76, 0x000fba, 0x000fbe, 0x0003ec, 0x0001f2, 0x0000f8, 0x00007e, ++ 0x00001e, 0x000006, 0x000000, 0x000002, 0x00000e, 0x00007f, 0x0000fa, ++ 0x0001f3, 0x0003ed, 0x0007dc, 0x000fbc, 0x001f7a, 0x003ef7, 0x007dfe, ++ 0x007ddf, 0x00fbff, 0x007ddd, 0x00fbfb, 0x003efd, ++}; ++ ++static const uint8_t acpl_hcb_alpha_fine_dt_bits[65] = { ++ 16, 18, 18, 18, 17, 17, 17, 18, 17, 17, 17, 16, 16, 16, 15, 15, ++ 14, 14, 13, 13, 13, 12, 11, 11, 10, 10, 9, 9, 7, 6, 5, 3, ++ 1, 2, 5, 6, 7, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, ++ 14, 15, 15, 16, 16, 16, 17, 17, 17, 17, 17, 18, 17, 18, 18, 18, ++ 16, ++}; ++ ++static const uint32_t acpl_hcb_alpha_fine_dt_codes[65] = { ++ 0x00eeee, 0x03b3ee, 0x03b3f6, 0x03b3fc, 0x01d9bc, 0x01d9bd, 0x01d9b2, ++ 0x03b3fe, 0x01d9be, 0x01d9f6, 0x01d9fc, 0x00ecda, 0x00ecfa, 0x00eeef, ++ 0x00766e, 0x007776, 0x003b3a, 0x003bba, 0x001d9a, 0x001ddc, 0x001dde, ++ 0x000eec, 0x000764, 0x000772, 0x0003b0, 0x0003b8, 0x0001da, 0x0001de, ++ 0x000072, 0x000038, 0x00001e, 0x000006, 0x000000, 0x000002, 0x00001f, ++ 0x00003a, 0x000073, 0x0001df, 0x0001db, 0x0003ba, 0x0003b1, 0x000773, ++ 0x000765, 0x000eed, 0x000ecc, 0x001d9e, 0x001d9c, 0x003bbe, 0x003b3b, ++ 0x00777e, 0x00767c, 0x00eefe, 0x00ecfc, 0x00ecd8, 0x01d9fd, 0x01d9fa, ++ 0x01d9bf, 0x01d9b6, 0x01d9b3, 0x03b3fd, 0x01d9b7, 0x03b3ff, 0x03b3ef, ++ 0x03b3f7, 0x00eeff, ++}; ++ ++static const uint8_t acpl_hcb_beta_coarse_f0_bits[5] = { ++ 1, 2, 3, 4, 4, ++}; ++ ++static const uint8_t acpl_hcb_beta_coarse_f0_codes[5] = { ++ 0x000000, 0x000002, 0x000006, 0x00000e, 0x00000f, ++}; ++ ++static const uint8_t acpl_hcb_beta_fine_f0_bits[9] = { ++ 1, 2, 3, 4, 5, 6, 7, 8, 8, ++}; ++ ++static const uint8_t acpl_hcb_beta_fine_f0_codes[9] = { ++ 0x000000, 0x000002, 0x000006, 0x00000e, 0x00001e, 0x00003e, 0x00007e, 0x0000fe, 0x0000ff, ++}; ++ ++static const uint8_t acpl_hcb_beta_coarse_df_bits[9] = { ++ 8, 6, 4, 3, 1, 2, 5, 7, 8, ++}; ++ ++static const uint8_t acpl_hcb_beta_coarse_df_codes[9] = { ++ 0x0000fe, 0x00003e, 0x00000e, 0x000006, 0x000000, 0x000002, 0x00001e, 0x00007e, 0x0000ff, ++}; ++ ++static const uint8_t acpl_hcb_beta_fine_df_bits[17] = { ++ 13, 12, 10, 9, 8, 7, 5, 3, 1, 2, 4, 7, 8, 9, 9, 11, ++ 13, ++}; ++ ++static const uint32_t acpl_hcb_beta_fine_df_codes[17] = { ++ 0x001f1e, 0x000f8e, 0x0003e2, 0x0001f2, 0x0000fa, 0x00007e, ++ 0x00001e, 0x000006, 0x000000, 0x000002, 0x00000e, 0x00007f, ++ 0x0000fb, 0x0001f3, 0x0001f0, 0x0007c6, 0x001f1f, ++}; ++ ++static const uint8_t acpl_hcb_beta_coarse_dt_bits[9] = { ++ 8, 7, 5, 3, 1, 2, 4, 6, 8, ++}; ++ ++static const uint8_t acpl_hcb_beta_coarse_dt_codes[9] = { ++ 0x0000fe, 0x00007e, 0x00001e, 0x000006, 0x000000, 0x000002, 0x00000e, 0x00003e, 0x0000ff, ++}; ++ ++static const uint8_t acpl_hcb_beta_fine_dt_bits[17] = { ++ 15, 14, 12, 10, 8, 7, 5, 3, 1, 2, 4, 7, 7, 9, 11, 13, ++ 15, ++}; ++ ++static const uint32_t acpl_hcb_beta_fine_dt_codes[17] = { ++ 0x007dfe, 0x003efe, 0x000fbe, 0x0003ee, 0x0000fa, ++ 0x00007e, 0x00001e, 0x000006, 0x000000, 0x000002, ++ 0x00000e, 0x00007f, 0x00007c, 0x0001f6, 0x0007de, ++ 0x001f7e, 0x007dff, ++}; ++ ++static const uint8_t acpl_hcb_beta3_coarse_f0_bits[9] = { ++ 5, 3, 3, 2, 2, 3, 4, 6, 6, ++}; ++ ++static const uint8_t acpl_hcb_beta3_coarse_f0_codes[9] = { ++ 0x000001, 0x000006, 0x000007, 0x000001, 0x000002, 0x000001, 0x000001, 0x000001, 0x000000, ++}; ++ ++static const uint8_t acpl_hcb_beta3_fine_f0_bits[17] = { ++ 7, 5, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 6, 6, 7, 7, ++ 7, ++}; ++ ++static const uint8_t acpl_hcb_beta3_fine_f0_codes[17] = { ++ 0x00000d, 0x000002, 0x000000, 0x00000c, 0x00000e, 0x000001, 0x000003, ++ 0x000005, 0x000004, 0x000002, 0x00000d, 0x00001f, 0x00003d, 0x000007, ++ 0x000078, 0x00000c, 0x000079, ++}; ++ ++static const uint8_t acpl_hcb_beta3_coarse_df_bits[17] = { ++ 13, 12, 12, 11, 9, 6, 4, 2, 1, 3, 5, 7, 9, 11, 12, 13, ++ 9, ++}; ++ ++static const uint32_t acpl_hcb_beta3_coarse_df_codes[17] = { ++ 0x000a93, 0x000548, 0x00054b, 0x0002a7, 0x0000ab, 0x000014, ++ 0x000004, 0x000000, 0x000001, 0x000003, 0x00000b, 0x00002b, ++ 0x0000aa, 0x0002a6, 0x00054a, 0x000a92, 0x0000a8, ++}; ++ ++static const uint8_t acpl_hcb_beta3_fine_df_bits[33] = { ++ 14, 15, 14, 13, 13, 12, 11, 11, 9, 8, 7, 6, 5, 4, 3, 2, ++ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 13, 14, 14, 14, ++ 15, ++}; ++ ++static const uint32_t acpl_hcb_beta3_fine_df_codes[33] = { ++ 0x0019e9, 0x0033f7, 0x0019f3, 0x000cf5, 0x000cfc, 0x00067d, 0x00033c, ++ 0x0007ff, 0x0000ce, 0x000066, 0x000032, 0x000018, 0x00000d, 0x000007, ++ 0x000002, 0x000000, 0x000002, 0x000006, 0x00000e, 0x00001e, 0x00003e, ++ 0x00007e, 0x0000fe, 0x0001fe, 0x0003fe, 0x0007fe, 0x00067f, 0x00067b, ++ 0x000cf8, 0x0019fa, 0x0019f2, 0x0019e8, 0x0033f6, ++}; ++ ++static const uint8_t acpl_hcb_beta3_coarse_dt_bits[17] = { ++ 15, 15, 14, 12, 10, 7, 5, 3, 1, 2, 4, 6, 8, 11, 14, 14, ++ 9, ++}; ++ ++static const uint16_t acpl_hcb_beta3_coarse_dt_codes[17] = { ++ 0x000adc, 0x000add, 0x00056c, 0x00015a, 0x000057, 0x00000b, 0x000003, ++ 0x000001, 0x000001, 0x000001, 0x000000, 0x000004, 0x000014, 0x0000ac, ++ 0x00056f, 0x00056d, 0x00002a, ++}; ++ ++static const uint8_t acpl_hcb_beta3_fine_dt_bits[33] = { ++ 16, 16, 16, 16, 16, 16, 15, 14, 12, 11, 10, 9, 8, 7, 5, 3, ++ 1, 2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 16, 16, 16, ++ 16, ++}; ++ ++static const uint32_t acpl_hcb_beta3_fine_dt_codes[33] = { ++ 0x00501e, 0x00501d, 0x00501c, 0x00501b, 0x00510e, 0x00510d, 0x002809, ++ 0x001442, 0x000500, 0x000281, 0x000141, 0x0000a1, 0x000052, 0x00002a, ++ 0x00000b, 0x000003, 0x000001, 0x000000, 0x000004, 0x00002b, 0x000053, ++ 0x0000a3, 0x000145, 0x000289, 0x000511, 0x000a20, 0x001405, 0x00280c, ++ 0x002808, 0x00510f, 0x00510c, 0x00501f, 0x00501a, ++}; ++ ++static const uint8_t acpl_hcb_gamma_coarse_f0_bits[21] = { ++ 13, 13, 13, 13, 11, 9, 7, 6, 5, 3, 2, 3, 3, 4, 3, 3, ++ 8, 11, 12, 13, 13, ++}; ++ ++static const uint16_t acpl_hcb_gamma_coarse_f0_codes[21] = { ++ 0x000af4, 0x000af8, 0x000af9, 0x000afb, 0x0002bc, 0x0000ae, 0x00002a, 0x000014, ++ 0x00000b, 0x000001, 0x000003, 0x000005, 0x000000, 0x000004, 0x000004, 0x000003, ++ 0x000056, 0x0002bf, 0x00057b, 0x000af5, 0x000afa, ++}; ++ ++static const uint8_t acpl_hcb_gamma_fine_f0_bits[41] = { ++ 12, 13, 13, 12, 12, 12, 12, 11, 9, 10, 9, 8, 8, 7, 7, 6, ++ 5, 5, 4, 4, 3, 3, 4, 4, 5, 5, 5, 5, 4, 3, 4, 7, ++ 8, 9, 10, 11, 11, 12, 12, 12, 12, ++}; ++ ++static const uint32_t acpl_hcb_gamma_fine_f0_codes[41] = { ++ 0x0004b6, 0x001c6d, 0x001c6c, 0x00049b, 0x0004b5, 0x0004b7, 0x000e35, 0x00024e, ++ 0x0001c7, 0x00038c, 0x000097, 0x000048, 0x0000e2, 0x000070, 0x000073, 0x000013, ++ 0x000008, 0x000017, 0x000005, 0x00000c, 0x000004, 0x000001, 0x00000d, 0x00000a, ++ 0x00001f, 0x00001e, 0x000016, 0x00001d, 0x000006, 0x000000, 0x000007, 0x000072, ++ 0x00004a, 0x000092, 0x00012c, 0x00024f, 0x00024c, 0x000e34, 0x0004b4, 0x00049a, ++ 0x000e37, ++}; ++ ++static const uint8_t acpl_hcb_gamma_coarse_df_bits[41] = { ++ 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 14, 13, 13, 11, 10, ++ 8, 7, 4, 2, 1, 3, 5, 7, 8, 10, 11, 13, 13, 14, 15, 16, ++ 16, 16, 16, 16, 16, 16, 16, 16, 8, ++}; ++ ++static const uint32_t acpl_hcb_gamma_coarse_df_codes[41] = { ++ 0x0053e1, 0x0053e0, 0x0053db, 0x0053da, 0x0053d9, 0x0053e2, 0x0053e4, 0x0053ea, ++ 0x0053eb, 0x0029ea, 0x0029f4, 0x0014f4, 0x000a78, 0x000a7f, 0x000299, 0x00014d, ++ 0x000051, 0x00002a, 0x000004, 0x000000, 0x000001, 0x000003, 0x00000b, 0x00002b, ++ 0x000052, 0x00014e, 0x000298, 0x000a7e, 0x000a79, 0x0014f7, 0x0029f6, 0x0053ef, ++ 0x0053ee, 0x0053e7, 0x0053e6, 0x0053e3, 0x0053e5, 0x0053d8, 0x0053d7, 0x0053d6, ++ 0x000050, ++}; ++ ++static const uint8_t acpl_hcb_gamma_fine_df_bits[81] = { ++ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, ++ 17, 17, 16, 16, 16, 15, 15, 15, 14, 14, 13, 13, 13, 12, 11, 11, ++ 10, 9, 8, 7, 6, 5, 4, 3, 1, 3, 4, 5, 6, 7, 9, 9, ++ 10, 11, 11, 12, 13, 13, 14, 14, 14, 15, 15, 15, 16, 16, 16, 17, ++ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, ++ 17, ++}; ++ ++static const uint32_t acpl_hcb_gamma_fine_df_codes[81] = { ++ 0x013e1f, 0x013e35, 0x013e1e, 0x013e1d, 0x013e1c, 0x013e1b, 0x013e1a, 0x013e19, ++ 0x013e34, 0x013e33, 0x013e18, 0x013ec2, 0x013ec1, 0x013ece, 0x013edf, 0x013e17, ++ 0x013ede, 0x013edd, 0x009d52, 0x009f18, 0x009f1b, 0x004eaa, 0x004ea8, 0x004fb1, ++ 0x002753, 0x002757, 0x0013a8, 0x0013e0, 0x0013ee, 0x0009d6, 0x0004e9, 0x0004fa, ++ 0x00027b, 0x00013c, 0x00009c, 0x00004d, 0x000021, 0x000012, 0x00000b, 0x000007, ++ 0x000000, 0x000006, 0x00000a, 0x000011, 0x000020, 0x00004c, 0x00013f, 0x00013b, ++ 0x00027a, 0x0004f9, 0x0004e8, 0x0009d7, 0x0013ef, 0x0013e2, 0x0027da, 0x0027c7, ++ 0x002752, 0x004fb6, 0x004eac, 0x004eab, 0x009f65, 0x009d5a, 0x009d53, 0x013ecd, ++ 0x013edc, 0x013ecc, 0x013ecf, 0x013ec9, 0x013e32, 0x013ec3, 0x013e16, 0x013ec0, ++ 0x013ec8, 0x013e15, 0x013e14, 0x013e13, 0x013e12, 0x013e11, 0x013e10, 0x013ab7, ++ 0x013ab6, ++}; ++ ++static const uint8_t acpl_hcb_gamma_coarse_dt_bits[41] = { ++ 17, 17, 17, 17, 16, 17, 16, 16, 16, 15, 14, 13, 12, 12, 10, 9, ++ 8, 7, 5, 3, 1, 2, 4, 7, 8, 10, 11, 12, 13, 13, 14, 15, ++ 16, 16, 16, 17, 17, 17, 17, 17, 9, ++}; ++ ++static const uint32_t acpl_hcb_gamma_coarse_dt_codes[41] = { ++ 0x00a7f3, 0x00a7f1, 0x00a7f9, 0x00a7f8, 0x0050e1, 0x00a7fe, 0x0050e8, 0x0050eb, ++ 0x0053fe, 0x0029fd, 0x00143b, 0x000a1b, 0x00050c, 0x00053e, 0x000142, 0x0000a0, ++ 0x000052, 0x00002b, 0x00000b, 0x000003, 0x000001, 0x000000, 0x000004, 0x00002a, ++ 0x000051, 0x00014e, 0x00029e, 0x00050f, 0x000a7e, 0x000a1a, 0x001439, 0x002871, ++ 0x0050ea, 0x0050e9, 0x0050e0, 0x00a7ff, 0x00a7fb, 0x00a7fa, 0x00a7f2, 0x00a7f0, ++ 0x0000a6, ++}; ++ ++static const uint8_t acpl_hcb_gamma_fine_dt_bits[81] = { ++ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, ++ 17, 17, 17, 17, 16, 16, 16, 15, 15, 15, 14, 14, 13, 13, 12, 12, ++ 11, 10, 9, 8, 7, 6, 5, 2, 1, 3, 5, 6, 7, 8, 10, 10, ++ 11, 12, 13, 13, 14, 14, 15, 15, 15, 15, 16, 16, 17, 17, 17, 17, ++ 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, ++ 18, ++}; ++ ++static const uint32_t acpl_hcb_gamma_fine_dt_codes[81] = { ++ 0x031e44, 0x031d1d, 0x031e42, 0x031e16, 0x031e41, 0x031e47, 0x031d1c, 0x031e43, ++ 0x031e73, 0x031e72, 0x031e15, 0x031e70, 0x031e75, 0x031e7f, 0x031e7e, 0x018e88, ++ 0x018d8b, 0x018e8f, 0x018f0e, 0x018f3e, 0x00c746, 0x00c796, 0x00c79e, 0x006361, ++ 0x0063c9, 0x0063d8, 0x0031d0, 0x0031e6, 0x0018d9, 0x0018f1, 0x000c6d, 0x000c7a, ++ 0x00063b, 0x00031c, 0x00018c, 0x0000c1, 0x000062, 0x000033, 0x00001b, 0x000002, ++ 0x000000, 0x000007, 0x00001a, 0x000032, 0x000061, 0x0000c0, 0x00031f, 0x00031a, ++ 0x000637, 0x000c75, 0x0018f7, 0x0018e9, 0x0031ed, 0x0031e0, 0x0063d9, 0x0063ca, ++ 0x006363, 0x006360, 0x00c786, 0x00c745, 0x018f3b, 0x018f2e, 0x018e89, 0x018d88, ++ 0x018d8a, 0x018d89, 0x031e5f, 0x031e74, 0x031e40, 0x031e71, 0x031e46, 0x031e5e, ++ 0x031e1f, 0x031e45, 0x031e1e, 0x031e14, 0x031e17, 0x031e13, 0x031e12, 0x031e11, ++ 0x031e10, ++}; ++ ++DECLARE_ASM_CONST(16, float, qwin)[640] = { ++ 0, ++ 1.990318758627504e-004, 2.494762615491542e-004, 3.021769445225078e-004, ++ 3.548460080857985e-004, 4.058915811480806e-004, 4.546408052001889e-004, ++ 5.012680176678405e-004, 5.464958142195282e-004, 5.912073950641334e-004, ++ 6.361178026937039e-004, 6.816060488244358e-004, 7.277257095064290e-004, ++ 7.743418255606097e-004, 8.212990636826637e-004, 8.685363488152327e-004, ++ 9.161071539925993e-004, 9.641168291303352e-004, 1.012630507392736e-003, ++ 1.061605258108620e-003, 1.110882587090581e-003, 1.160236901298543e-003, ++ 1.209448942573337e-003, 1.258362795150757e-003, 1.306902381715039e-003, ++ 1.355046337751365e-003, 1.402784629568410e-003, 1.450086694843816e-003, ++ 1.496898951224534e-003, 1.543170821958483e-003, 1.588889089195869e-003, ++ 1.634098242730728e-003, 1.678892372493930e-003, 1.723381173920660e-003, ++ 1.767651163797991e-003, 1.811741998614740e-003, 1.855650606587200e-003, ++ 1.899360915083620e-003, 1.942876625831283e-003, 1.986241654706626e-003, ++ 2.029534125962055e-003, 2.072840712410525e-003, 2.116229103721749e-003, ++ 2.159738034390673e-003, 2.203392976200947e-003, 2.247239773881968e-003, ++ 2.291373966775394e-003, 2.335946110021889e-003, 2.381132815654862e-003, ++ 2.427086732976290e-003, 2.473891839822582e-003, 2.521550367974952e-003, ++ 2.570013995199655e-003, 2.619244058999978e-003, 2.669265893796866e-003, ++ 2.720177146231281e-003, 2.772088849679780e-003, 2.825009494162980e-003, ++ 2.878716544061140e-003, 2.932677076291194e-003, 2.986067366389476e-003, ++ 3.037905983043366e-003, 3.087269477594307e-003, 3.133519274378684e-003, ++ 3.176460810085721e-003, 3.216374095471449e-003, 3.253902493849856e-003, ++ 3.289837867273167e-003, 3.324873276103132e-003, 3.359407689115599e-003, ++ 3.393454084675361e-003, 3.426668323773391e-003, 3.458465815999750e-003, ++ 3.488171121469781e-003, 3.515141351338780e-003, 3.538827383683883e-003, ++ 3.558767785536742e-003, 3.574539247363964e-003, 3.585697968628984e-003, ++ 3.591743339500398e-003, 3.592116764752254e-003, 3.586228204993297e-003, ++ 3.573492966885132e-003, 3.553356715665694e-003, 3.525300399274114e-003, ++ 3.488824092931520e-003, 3.443423145747434e-003, 3.388568319085867e-003, ++ 3.323699442173841e-003, 3.248231770523395e-003, 3.161568930730635e-003, ++ 3.063113666967670e-003, 2.952270973359112e-003, 2.828441943181057e-003, ++ 2.691016173288500e-003, 2.539366102140493e-003, 2.372848583221744e-003, ++ 2.190814088754598e-003, 1.992618085548526e-003, 1.777631090142623e-003, ++ 1.545242163079598e-003, 1.294855985911958e-003, 1.025885587325796e-003, ++ 7.377456851538827e-004, 4.298496740962311e-004, 1.016113723823784e-004, ++ -2.475493814535340e-004, -6.181972580227641e-004, -1.010876063031582e-003, ++ -1.426108207321696e-003, -1.864392667409557e-003, -2.326207721179968e-003, ++ -2.812013688448634e-003, -3.322252633537029e-003, -3.857344314546718e-003, ++ -4.417678415707104e-003, -5.003604409245843e-003, -5.615422427540850e-003, ++ -6.253382198869787e-003, -6.917691380307223e-003, -7.608536937561301e-003, ++ -8.326113472848559e-003, -9.070651572928327e-003, -9.842433610911637e-003, ++ -1.064178450184536e-002, -1.146903570409307e-002, -1.232446526717138e-002, ++ -1.320822893615923e-002, 1.412030102138547e-002, 1.506045143737221e-002, ++ 1.602824700934038e-002, 1.702310507234504e-002, 1.804435938034114e-002, ++ 1.909132707403387e-002, 2.016335321815832e-002, 2.125982139139435e-002, ++ 2.238013015948307e-002, 2.352365148441367e-002, 2.468968228813486e-002, ++ 2.587741357605385e-002, 2.708591966384863e-002, 2.831416731612567e-002, ++ 2.956103453432552e-002, 3.082532788511644e-002, 3.210578787607558e-002, ++ 3.340108247607704e-002, 3.470979250147262e-002, 3.603039785904666e-002, ++ 3.736126987823528e-002, 3.870067428980750e-002, 4.004677994303860e-002, ++ 4.139766786359423e-002, 4.275134353925827e-002, 4.410572893128047e-002, ++ 4.545866171224587e-002, 4.680788921400311e-002, 4.815106534667384e-002, ++ 4.948575188369231e-002, 5.080942296260306e-002, 5.211947012173918e-002, ++ 5.341320372603929e-002, 5.468785186395163e-002, 5.594055607104873e-002, ++ 5.716836923188953e-002, 5.836825629443718e-002, 5.953709945765930e-002, ++ 6.067170625396996e-002, 6.176881705202805e-002, 6.282510999827461e-002, ++ 6.383720245755561e-002, 6.480165083585107e-002, 6.571495100350305e-002, ++ 6.657354346196487e-002, 6.737381445564891e-002, 6.811211000439976e-002, ++ 6.878473991370719e-002, 6.938797895654626e-002, 6.991806618580000e-002, ++ 7.037120381110623e-002, 7.074355866301176e-002, 7.103126866531538e-002, ++ 7.123045563399449e-002, 7.133723888151840e-002, 7.134774334517399e-002, ++ 7.125810128129656e-002, 7.106444395777428e-002, 7.076288963679085e-002, ++ 7.034953453342756e-002, 6.982045490146145e-002, 6.917172452383333e-002, ++ 6.839944399575645e-002, 6.749977716975542e-002, 6.646898181809889e-002, ++ 6.530342654389224e-002, 6.399958984339946e-002, 6.255404354954748e-002, ++ 6.096342863203985e-002, 5.922443337469448e-002, 5.733378365410422e-002, ++ 5.528824660015738e-002, 5.308464739461209e-002, 5.071989148277166e-002, ++ 4.819098634672628e-002, 4.549505579582869e-002, 4.262934676625042e-002, ++ 3.959122947020497e-002, 3.637819581239452e-002, 3.298786054608736e-002, ++ 2.941796954479800e-002, 2.566640058060906e-002, 2.173117939155709e-002, ++ 1.761048656968719e-002, 1.330266415707108e-002, 8.806217289921706e-003, ++ 4.119815918461287e-003, -7.577038291607129e-004, -5.827337082489678e-003, ++ -1.108990619665782e-002, -1.654605559674886e-002, -2.219624707735291e-002, ++ -2.804075556277473e-002, -3.407966641908426e-002, -4.031287253355741e-002, ++ -4.674007190475649e-002, -5.336076390182971e-002, -6.017424526940620e-002, ++ -6.717960594283154e-002, -7.437572538762392e-002, -8.176127022450692e-002, ++ -8.933469320120192e-002, -9.709423309043450e-002, -1.050379143754414e-001, ++ -1.131635475471188e-001, -1.214687284677367e-001, -1.299508386078101e-001, ++ -1.386070430802319e-001, -1.474342913196958e-001, -1.564293167898782e-001, ++ -1.655886374953163e-001, -1.749085568711785e-001, -1.843851642116290e-001, ++ -1.940143360850268e-001, -2.037917371113644e-001, -2.137128217101543e-001, ++ -2.237728356363325e-001, -2.339668182208061e-001, -2.442896055908444e-001, ++ -2.547358344658102e-001, -2.652999476893712e-001, -2.759762003673840e-001, ++ -2.867586659726799e-001, -2.976412485679301e-001, -3.086176827721830e-001, ++ -3.196815399704708e-001, -3.308262316588501e-001, -3.420450091826495e-001, ++ 3.533309414505971e-001, 3.646770149404552e-001, 3.760759747758828e-001, ++ 3.875204555118187e-001, 3.990029533969267e-001, 4.105158411581483e-001, ++ 4.220513789540003e-001, 4.336017251305980e-001, 4.451589452332786e-001, ++ 4.567150149423557e-001, 4.682618290579831e-001, 4.797912086537587e-001, ++ 4.912949058677955e-001, 5.027646134968753e-001, 5.141919746376279e-001, ++ 5.255685924518015e-001, 5.368860394090674e-001, 5.481358656081351e-001, ++ 5.593096071830315e-001, 5.703987947306394e-001, 5.813949615434598e-001, ++ 5.922896536434017e-001, 6.030744392774144e-001, 6.137409201916185e-001, ++ 6.242807411441345e-001, 6.346855991963545e-001, 6.449472531836600e-001, ++ 6.550575323798634e-001, 6.650083455855346e-001, 6.747916901830467e-001, ++ 6.843996616799759e-001, 6.938244627003839e-001, 7.030584122393319e-001, ++ 7.120939537241190e-001, 7.209236637533725e-001, 7.295402599029810e-001, ++ 7.379366091028713e-001, 7.461057359576386e-001, 7.540408314942230e-001, ++ 7.617352611504460e-001, 7.691825714586890e-001, 7.763765020733762e-001, ++ 7.833109874824341e-001, 7.899801646390305e-001, 7.963783815797485e-001, ++ 8.025002033685581e-001, 8.083404191294724e-001, 8.138940486031526e-001, ++ 8.191563476989879e-001, 8.241228138607196e-001, 8.287891904413357e-001, ++ 8.331514714928793e-001, 8.372059062705359e-001, 8.409490040631689e-001, ++ 8.443775395556067e-001, 8.474885573145614e-001, 8.502793750759253e-001, ++ 8.527475863595390e-001, 8.548910606594570e-001, 8.567079441260879e-001, ++ 8.581966597760032e-001, 8.593559096378087e-001, 8.601846769933608e-001, ++ 8.606822313166693e-001, 8.608481078185764e-001, 8.606822313166693e-001, ++ 8.601846769933608e-001, 8.593559096378087e-001, 8.581966597760032e-001, ++ 8.567079441260879e-001, 8.548910606594570e-001, 8.527475863595390e-001, ++ 8.502793750759253e-001, 8.474885573145614e-001, 8.443775395556067e-001, ++ 8.409490040631689e-001, 8.372059062705359e-001, 8.331514714928793e-001, ++ 8.287891904413357e-001, 8.241228138607196e-001, 8.191563476989879e-001, ++ 8.138940486031526e-001, 8.083404191294724e-001, 8.025002033685581e-001, ++ 7.963783815797485e-001, 7.899801646390305e-001, 7.833109874824341e-001, ++ 7.763765020733762e-001, 7.691825714586890e-001, 7.617352611504460e-001, ++ 7.540408314942230e-001, 7.461057359576386e-001, 7.379366091028713e-001, ++ 7.295402599029810e-001, 7.209236637533725e-001, 7.120939537241190e-001, ++ 7.030584122393319e-001, 6.938244627003839e-001, 6.843996616799759e-001, ++ 6.747916901830467e-001, 6.650083455855346e-001, 6.550575323798634e-001, ++ 6.449472531836600e-001, 6.346855991963545e-001, 6.242807411441345e-001, ++ 6.137409201916185e-001, 6.030744392774144e-001, 5.922896536434017e-001, ++ 5.813949615434598e-001, 5.703987947306394e-001, 5.593096071830315e-001, ++ 5.481358656081351e-001, 5.368860394090674e-001, 5.255685924518015e-001, ++ 5.141919746376279e-001, 5.027646134968753e-001, 4.912949058677955e-001, ++ 4.797912086537587e-001, 4.682618290579831e-001, 4.567150149423557e-001, ++ 4.451589452332786e-001, 4.336017251305980e-001, 4.220513789540003e-001, ++ 4.105158411581483e-001, 3.990029533969267e-001, 3.875204555118187e-001, ++ 3.760759747758828e-001, 3.646770149404552e-001, -3.533309414505971e-001, ++ -3.420450091826495e-001, -3.308262316588501e-001, -3.196815399704708e-001, ++ -3.086176827721830e-001, -2.976412485679301e-001, -2.867586659726799e-001, ++ -2.759762003673840e-001, -2.652999476893712e-001, -2.547358344658102e-001, ++ -2.442896055908444e-001, -2.339668182208061e-001, -2.237728356363325e-001, ++ -2.137128217101543e-001, -2.037917371113644e-001, -1.940143360850268e-001, ++ -1.843851642116290e-001, -1.749085568711785e-001, -1.655886374953163e-001, ++ -1.564293167898782e-001, -1.474342913196958e-001, -1.386070430802319e-001, ++ -1.299508386078101e-001, -1.214687284677367e-001, -1.131635475471188e-001, ++ -1.050379143754414e-001, -9.709423309043450e-002, -8.933469320120192e-002, ++ -8.176127022450692e-002, -7.437572538762392e-002, -6.717960594283154e-002, ++ -6.017424526940620e-002, -5.336076390182971e-002, -4.674007190475649e-002, ++ -4.031287253355741e-002, -3.407966641908426e-002, -2.804075556277473e-002, ++ -2.219624707735291e-002, -1.654605559674886e-002, -1.108990619665782e-002, ++ -5.827337082489678e-003, -7.577038291607129e-004, 4.119815918461287e-003, ++ 8.806217289921706e-003, 1.330266415707108e-002, 1.761048656968719e-002, ++ 2.173117939155709e-002, 2.566640058060906e-002, 2.941796954479800e-002, ++ 3.298786054608736e-002, 3.637819581239452e-002, 3.959122947020497e-002, ++ 4.262934676625042e-002, 4.549505579582869e-002, 4.819098634672628e-002, ++ 5.071989148277166e-002, 5.308464739461209e-002, 5.528824660015738e-002, ++ 5.733378365410422e-002, 5.922443337469448e-002, 6.096342863203985e-002, ++ 6.255404354954748e-002, 6.399958984339946e-002, 6.530342654389224e-002, ++ 6.646898181809889e-002, 6.749977716975542e-002, 6.839944399575645e-002, ++ 6.917172452383333e-002, 6.982045490146145e-002, 7.034953453342756e-002, ++ 7.076288963679085e-002, 7.106444395777428e-002, 7.125810128129656e-002, ++ 7.134774334517399e-002, 7.133723888151840e-002, 7.123045563399449e-002, ++ 7.103126866531538e-002, 7.074355866301176e-002, 7.037120381110623e-002, ++ 6.991806618580000e-002, 6.938797895654626e-002, 6.878473991370719e-002, ++ 6.811211000439976e-002, 6.737381445564891e-002, 6.657354346196487e-002, ++ 6.571495100350305e-002, 6.480165083585107e-002, 6.383720245755561e-002, ++ 6.282510999827461e-002, 6.176881705202805e-002, 6.067170625396996e-002, ++ 5.953709945765930e-002, 5.836825629443718e-002, 5.716836923188953e-002, ++ 5.594055607104873e-002, 5.468785186395163e-002, 5.341320372603929e-002, ++ 5.211947012173918e-002, 5.080942296260306e-002, 4.948575188369231e-002, ++ 4.815106534667384e-002, 4.680788921400311e-002, 4.545866171224587e-002, ++ 4.410572893128047e-002, 4.275134353925827e-002, 4.139766786359423e-002, ++ 4.004677994303860e-002, 3.870067428980750e-002, 3.736126987823528e-002, ++ 3.603039785904666e-002, 3.470979250147262e-002, 3.340108247607704e-002, ++ 3.210578787607558e-002, 3.082532788511644e-002, 2.956103453432552e-002, ++ 2.831416731612567e-002, 2.708591966384863e-002, 2.587741357605385e-002, ++ 2.468968228813486e-002, 2.352365148441367e-002, 2.238013015948307e-002, ++ 2.125982139139435e-002, 2.016335321815832e-002, 1.909132707403387e-002, ++ 1.804435938034114e-002, 1.702310507234504e-002, 1.602824700934038e-002, ++ 1.506045143737221e-002, -1.412030102138547e-002, -1.320822893615923e-002, ++ -1.232446526717138e-002, -1.146903570409307e-002, -1.064178450184536e-002, ++ -9.842433610911637e-003, -9.070651572928327e-003, -8.326113472848559e-003, ++ -7.608536937561301e-003, -6.917691380307223e-003, -6.253382198869787e-003, ++ -5.615422427540850e-003, -5.003604409245843e-003, -4.417678415707104e-003, ++ -3.857344314546718e-003, -3.322252633537029e-003, -2.812013688448634e-003, ++ -2.326207721179968e-003, -1.864392667409557e-003, -1.426108207321696e-003, ++ -1.010876063031582e-003, -6.181972580227641e-004, -2.475493814535340e-004, ++ 1.016113723823784e-004, 4.298496740962311e-004, 7.377456851538827e-004, ++ 1.025885587325796e-003, 1.294855985911958e-003, 1.545242163079598e-003, ++ 1.777631090142623e-003, 1.992618085548526e-003, 2.190814088754598e-003, ++ 2.372848583221744e-003, 2.539366102140493e-003, 2.691016173288500e-003, ++ 2.828441943181057e-003, 2.952270973359112e-003, 3.063113666967670e-003, ++ 3.161568930730635e-003, 3.248231770523395e-003, 3.323699442173841e-003, ++ 3.388568319085867e-003, 3.443423145747434e-003, 3.488824092931520e-003, ++ 3.525300399274114e-003, 3.553356715665694e-003, 3.573492966885132e-003, ++ 3.586228204993297e-003, 3.592116764752254e-003, 3.591743339500398e-003, ++ 3.585697968628984e-003, 3.574539247363964e-003, 3.558767785536742e-003, ++ 3.538827383683883e-003, 3.515141351338780e-003, 3.488171121469781e-003, ++ 3.458465815999750e-003, 3.426668323773391e-003, 3.393454084675361e-003, ++ 3.359407689115599e-003, 3.324873276103132e-003, 3.289837867273167e-003, ++ 3.253902493849856e-003, 3.216374095471449e-003, 3.176460810085721e-003, ++ 3.133519274378684e-003, 3.087269477594307e-003, 3.037905983043366e-003, ++ 2.986067366389476e-003, 2.932677076291194e-003, 2.878716544061140e-003, ++ 2.825009494162980e-003, 2.772088849679780e-003, 2.720177146231281e-003, ++ 2.669265893796866e-003, 2.619244058999978e-003, 2.570013995199655e-003, ++ 2.521550367974952e-003, 2.473891839822582e-003, 2.427086732976290e-003, ++ 2.381132815654862e-003, 2.335946110021889e-003, 2.291373966775394e-003, ++ 2.247239773881968e-003, 2.203392976200947e-003, 2.159738034390673e-003, ++ 2.116229103721749e-003, 2.072840712410525e-003, 2.029534125962055e-003, ++ 1.986241654706626e-003, 1.942876625831283e-003, 1.899360915083620e-003, ++ 1.855650606587200e-003, 1.811741998614740e-003, 1.767651163797991e-003, ++ 1.723381173920660e-003, 1.678892372493930e-003, 1.634098242730728e-003, ++ 1.588889089195869e-003, 1.543170821958483e-003, 1.496898951224534e-003, ++ 1.450086694843816e-003, 1.402784629568410e-003, 1.355046337751365e-003, ++ 1.306902381715039e-003, 1.258362795150757e-003, 1.209448942573337e-003, ++ 1.160236901298543e-003, 1.110882587090581e-003, 1.061605258108620e-003, ++ 1.012630507392736e-003, 9.641168291303352e-004, 9.161071539925993e-004, ++ 8.685363488152327e-004, 8.212990636826637e-004, 7.743418255606097e-004, ++ 7.277257095064290e-004, 6.816060488244358e-004, 6.361178026937039e-004, ++ 5.912073950641334e-004, 5.464958142195282e-004, 5.012680176678405e-004, ++ 4.546408052001889e-004, 4.058915811480806e-004, 3.548460080857985e-004, ++ 3.021769445225078e-004, 2.494762615491542e-004, 1.990318758627504e-004, ++}; ++ ++static const float new_chirp_tab[4][4] = { ++ { 0.0, 0.6, 0.0, 0.0, }, ++ { 0.6, 0.75, 0.75, 0.75, }, ++ { 0.9, 0.9, 0.9, 0.9, }, ++ { 0.98, 0.98, 0.98, 0.98, }, ++}; ++ ++static const int8_t aspx_sine[2][4] = { ++ { 1, 0, -1, 0 }, { 0, 1, 0, -1 }, ++}; ++ ++static const float aspx_noise[512][2] = { ++ {-0.782083, -0.623174}, { 0.705088, -0.70912}, { 0.268786, -0.9632}, ++ { 0.689305, -0.724471}, {-0.0430946, -0.999071}, {-0.998077, -0.0619922}, ++ { 0.867875, -0.496783}, { 0.814907, -0.579591}, { 0.880168, 0.474663}, ++ {-0.39062, -0.920552}, { 0.0434465, 0.999056}, { 0.610173, -0.792268}, ++ {-0.942195, 0.335065}, {-0.911161, 0.412052}, {-0.999084, -0.0427925}, ++ {-0.811795, 0.583942}, { 0.836524, 0.54793}, { 0.475443, 0.879747}, ++ { 0.961597, -0.274466}, { 0.820146, 0.572155}, {-0.498318, -0.866994}, ++ { 0.825586, -0.564277}, { 0.999973, 0.00730176}, { 0.895923, 0.44421}, ++ {-0.80517, -0.593044}, { 0.277753, 0.960652}, {-0.999688, 0.0249582}, ++ {-0.802608, -0.596507}, { 0.936737, -0.350035}, {-0.997477, -0.0709933}, ++ {-0.674713, -0.73808}, {-0.957686, -0.287817}, { 0.983532, 0.180732}, ++ { 0.634759, -0.77271}, {-0.150723, -0.988576}, {-0.113979, 0.993483}, ++ {-0.993827, -0.110942}, { 0.781536, 0.62386}, {-0.615104, -0.788446}, ++ {-0.586834, 0.809707}, {-0.226253, 0.974069}, { 0.949896, -0.312565}, ++ { 0.830084, 0.557638}, { 0.60415, 0.796871}, { 0.655412, 0.755272}, ++ {-0.916524, -0.399979}, { 0.763757, -0.645503}, { 0.822298, 0.569058}, ++ {-0.0283442, -0.999598}, {-0.644058, -0.764977}, {-0.390833, -0.920462}, ++ {-0.984217, -0.176967}, { 0.719987, 0.693987}, { 0.999088, -0.042707}, ++ { 0.754487, 0.656315}, { 0.498312, 0.866998}, { 0.999389, 0.0349481}, ++ {-0.811845, -0.583873}, { 0.532015, -0.846735}, {-0.463781, 0.88595}, ++ {-0.819481, -0.573106}, { 0.637789, 0.770211}, { 0.814253, -0.58051}, ++ { 0.180489, -0.983577}, { 0.988691, -0.149968}, { 0.606847, -0.794819}, ++ {-0.061914, 0.998081}, { 0.627066, 0.778966}, {-0.544097, -0.839023}, ++ {-0.859249, -0.511558}, {-0.91818, 0.396163}, {-0.165942, 0.986136}, ++ { 0.992322, 0.123685}, { 0.758555, 0.651609}, { 0.985543, -0.169427}, ++ {-0.154971, -0.987919}, { 0.245336, 0.969438}, {-0.522038, -0.852922}, ++ {-0.33027, -0.943886}, { 0.998067, -0.0621421}, { 0.516758, 0.856132}, ++ { 0.843123, -0.537721}, { 0.44306, 0.896492}, {-0.814913, 0.579584}, ++ {-0.336464, -0.941696}, { 0.732896, 0.68034}, { 0.201774, -0.979432}, ++ { 0.741954, 0.670451}, {-0.469083, 0.883154}, { 0.867784, 0.496941}, ++ { 0.494202, -0.869347}, { 0.9367, -0.350134}, { 0.906328, 0.422575}, ++ { 0.764111, 0.645085}, { 0.631052, -0.77574}, {-0.0498248, -0.998758}, ++ { 0.974691, -0.223555}, { 0.361405, -0.932409}, {-0.748625, 0.662994}, ++ { 0.811839, -0.583881}, { 0.303931, 0.952694}, {-0.992668, -0.120876}, ++ {-0.996672, -0.0815161}, {-0.324622, 0.945844}, {-0.0246385, -0.999696}, ++ {-0.588361, 0.808598}, {-0.49898, 0.866613}, { 0.924578, -0.380994}, ++ {-0.755619, 0.655011}, { 0.92214, -0.386857}, { 0.818638, 0.57431}, ++ {-0.920451, 0.390857}, { 0.0380205, -0.999277}, { 0.446606, -0.894731}, ++ { 0.722557, -0.691312}, { 0.762113, -0.647444}, {-0.256731, -0.966483}, ++ { 0.471301, -0.881972}, {-0.530869, -0.847454}, {-0.749876, 0.661579}, ++ {-0.593767, -0.804637}, {-0.834805, 0.550545}, { 0.748843, 0.662747}, ++ {-0.70794, 0.706273}, {-0.0503228, -0.998733}, { 0.402884, -0.915251}, ++ { 0.0945791, -0.995517}, {-0.390889, -0.920438}, {-0.0994705, -0.995041}, ++ {-0.767207, -0.641399}, {-0.563485, -0.826126}, {-0.521859, 0.853032}, ++ { 0.503637, 0.863915}, {-0.739851, -0.672771}, { 0.442624, -0.896707}, ++ {-0.997671, 0.0682121}, { 0.776117, -0.630589}, {-0.964601, -0.263713}, ++ {-0.656053, 0.754715}, {-0.865578, 0.500775}, {-0.586255, -0.810127}, ++ { 0.0425286, -0.999095}, { 0.656339, 0.754466}, {-0.341906, 0.939734}, ++ {-0.605904, 0.795538}, {-0.658238, -0.752809}, {-0.652856, 0.757482}, ++ {-0.994554, 0.104219}, { 0.176725, -0.98426}, {-0.231945, 0.972729}, ++ {-0.997717, -0.0675303}, {-0.997805, 0.0662198}, { 0.661155, 0.75025}, ++ { 0.999669, -0.02573}, {-0.946982, -0.321287}, {-0.587897, 0.808936}, ++ { 0.957862, -0.28723}, { 0.613392, -0.789779}, {-0.956489, 0.291768}, ++ { 0.169829, -0.985474}, {-0.943551, -0.331226}, { 0.416834, 0.908983}, ++ { 0.684727, -0.7288}, { 0.952329, 0.305074}, {-0.328392, 0.944541}, ++ { 0.943344, 0.331816}, { 0.650872, 0.759187}, {-0.59941, -0.800442}, ++ {-0.768448, -0.639912}, { 0.539894, 0.841733}, { 0.606048, -0.795428}, ++ { 0.403588, 0.914941}, {-0.838111, 0.545499}, { 0.976157, -0.217066}, ++ {-0.995495, -0.0948107}, {-0.943792, 0.330539}, {-0.990415, 0.138123}, ++ { 0.281355, 0.959604}, { 0.371208, 0.92855}, {-0.4711, 0.88208}, ++ {-0.999459, -0.0328787}, {-0.988179, 0.153305}, {-0.843124, -0.537718}, ++ { 0.997108, 0.0759971}, {-0.268201, -0.963363}, { 0.0457651, 0.998952}, ++ {-0.983762, 0.179479}, {-0.439728, -0.898131}, { 0.162945, -0.986635}, ++ {-0.055868, 0.998438}, {-0.384381, 0.923174}, { 0.744138, -0.668026}, ++ {-0.0706482, -0.997501}, { 0.831219, 0.555945}, { 0.711624, 0.70256}, ++ { 0.161772, -0.986828}, { 0.8387, 0.544593}, {-0.418108, 0.908397}, ++ { 0.412208, 0.91109}, {-0.986564, 0.163375}, { 0.925222, 0.379427}, ++ { 0.411092, -0.911594}, {-0.103885, -0.994589}, {-0.923358, -0.383941}, ++ {-0.761339, 0.648355}, {-0.887774, -0.46028}, { 0.755699, -0.654919}, ++ { 0.597832, -0.801622}, { 0.542946, -0.839768}, { 0.999842, -0.0177525}, ++ {-0.954864, 0.297044}, {-0.999961, -0.00886517}, { 0.929688, -0.368347}, ++ { 0.699205, 0.714921}, {-0.894118, 0.447831}, {-0.903496, 0.428597}, ++ { 0.362774, -0.931877}, { 0.850158, -0.526527}, { 0.523993, -0.851722}, ++ {-0.735767, 0.677235}, {-0.999673, -0.02558}, {-0.954685, 0.297619}, ++ {-0.195193, -0.980765}, { 0.0672577, -0.997736}, {-0.659842, -0.751405}, ++ { 0.779366, 0.626569}, { 0.751561, -0.659663}, {-0.34458, -0.938757}, ++ { 0.316872, 0.948468}, { 0.953601, 0.301074}, { 0.363243, 0.931694}, ++ {-0.537299, -0.843392}, { 0.996839, 0.0794518}, { 0.950462, -0.310842}, ++ { 0.224973, 0.974365}, {-0.772894, -0.634535}, {-0.94373, -0.330718}, ++ { 0.930603, 0.36603}, { 0.994059, 0.10884}, { 0.845518, -0.533947}, ++ { 0.988122, -0.15367}, { 0.880641, -0.473785}, { 0.783488, -0.621407}, ++ { 0.72854, 0.685004}, {-0.772294, 0.635265}, { 0.662444, 0.749111}, ++ {-0.0649291, 0.99789}, {-0.285125, 0.95849}, {-0.673637, -0.739062}, ++ {-0.394791, -0.918771}, {-0.938677, -0.344798}, {-0.708925, -0.705284}, ++ { 0.31416, 0.94937}, {-0.113645, 0.993521}, {-0.296446, 0.95505}, ++ { 0.670712, -0.741718}, {-0.605825, 0.795598}, { 0.996229, 0.0867643}, ++ { 0.686613, -0.727023}, {-0.740136, 0.672457}, { 0.876977, 0.480532}, ++ {-0.561046, 0.827785}, { 0.414562, -0.910021}, {-0.645953, -0.763377}, ++ { 0.802263, 0.596971}, {-0.854981, 0.51866}, {-0.769916, 0.638145}, { 0.648047, 0.7616}, ++ {-0.773406, 0.633911}, {-0.252579, -0.967576}, { 0.962561, -0.271064}, ++ { 0.959193, -0.282751}, { 0.727508, 0.686099}, {-0.667916, 0.744236}, ++ {-0.599333, 0.8005}, { 0.622504, 0.782616}, { 0.375433, -0.92685}, ++ {-0.9972, 0.0747742}, {-0.879355, 0.476166}, { 0.409574, 0.912277}, ++ { 0.747405, -0.664369}, {-0.940177, -0.340687}, { 0.562532, 0.826776}, ++ {-0.929015, 0.370041}, { 0.0978642, 0.9952}, { 0.916896, -0.399125}, ++ {-0.608038, -0.793908}, {-0.845653, -0.533732}, {-0.455945, -0.890008}, ++ { 0.923484, -0.383637}, { 0.354901, 0.934904}, { 0.319134, -0.94771}, ++ { 0.769603, -0.638523}, {-0.899207, 0.437524}, { 0.666669, -0.745354}, ++ { 0.142655, -0.989772}, {-0.892, -0.452035}, {-0.999915, -0.0130018}, ++ {-0.87033, 0.492469}, { 0.156511, 0.987676}, {-0.146752, 0.989173}, ++ {-0.809057, 0.58773}, {-0.497325, -0.867565}, {-0.258455, -0.966023}, ++ {-0.863292, -0.504705}, {-0.976343, -0.216225}, {-0.257626, -0.966245}, ++ { 0.809568, -0.587027}, { 0.582491, 0.812837}, {-0.997088, -0.0762565}, ++ {-0.878262, 0.47818}, {-0.165343, -0.986236}, { 0.0455161, 0.998964}, ++ {-0.570664, 0.821184}, { 0.658564, 0.752525}, { 0.319839, -0.947472}, ++ {-0.643905, -0.765106}, {-0.590256, 0.807216}, { 0.512137, 0.858904}, ++ {-0.998558, 0.0536785}, { 0.373964, -0.927443}, { 0.633108, -0.774063}, ++ {-0.968108, 0.250531}, { 0.787337, 0.616523}, { 0.698247, -0.715857}, ++ { 0.98937, 0.145419}, { 0.582241, -0.813016}, { 0.359617, 0.9331}, ++ {-0.758164, -0.652064}, { 0.635102, 0.772429}, {-0.0254028, -0.999677}, ++ { 0.266382, -0.963868}, { 0.660974, -0.750409}, { 0.585176, -0.810906}, ++ {-0.98243, 0.186631}, { 0.777252, -0.629189}, {-0.0267382, 0.999642}, ++ {-0.95591, 0.293661}, { 0.70368, -0.710517}, { 0.732467, -0.680803}, ++ { 0.854099, -0.52011}, { 0.536151, 0.844122}, {-0.00781503, 0.999969}, ++ {-0.534447, 0.845202}, { 0.297782, 0.954634}, { 0.905724, 0.423868}, ++ { 0.115617, -0.993294}, {-0.993408, -0.114636}, { 0.156977, 0.987602}, ++ { 0.639408, -0.768868}, {-0.995898, 0.0904832}, {-0.956372, 0.29215}, ++ { 0.990545, 0.137189}, { 0.659118, 0.75204}, {-0.0398563, -0.999205}, ++ {-0.679682, -0.733507}, {-0.540035, 0.841643}, {-0.0501135, -0.998744}, ++ {-0.196305, -0.980543}, { 0.56964, 0.821894}, {-0.703653, 0.710544}, ++ { 0.162676, -0.98668}, {-0.919545, 0.392984}, { 0.805179, 0.593031}, ++ { 0.998757, 0.0498406}, { 0.358168, 0.933657}, {-0.611152, -0.791513}, ++ {-0.440479, 0.897763}, { 0.292587, 0.956239}, {-0.217415, -0.976079}, ++ {-0.252622, -0.967565}, {-0.679998, -0.733214}, { 0.402652, -0.915353}, ++ {-0.993189, 0.116516}, { 0.0634956, 0.997982}, { 0.432309, -0.901725}, ++ { 0.923434, 0.383756}, {-0.502058, 0.864834}, { 0.935584, -0.353105}, ++ { 0.912111, -0.409944}, {-0.298643, 0.954365}, {-0.796165, 0.60508}, ++ {-0.741295, -0.671179}, { 0.856386, 0.516336}, {-0.515876, -0.856663}, ++ { 0.994745, -0.102384}, { 0.648698, -0.761046}, {-0.999675, -0.0254761}, ++ {-0.130115, 0.991499}, {-0.998787, 0.0492413}, { 0.27449, -0.96159}, ++ {-0.996501, -0.0835749}, {-0.387182, 0.922003}, { 0.701006, -0.713156}, ++ { 0.98733, -0.158679}, {-0.713847, 0.700302}, {-0.329606, 0.944118}, ++ { 0.279362, 0.960186}, {-0.968574, -0.248726}, {-0.68131, 0.731995}, ++ { 0.220789, -0.975322}, {-0.985566, -0.169292}, { 0.0132834, 0.999912}, ++ {-0.422317, 0.906448}, {-0.772023, -0.635594}, { 0.842036, -0.539421}, ++ {-0.803312, 0.595558}, { 0.725035, 0.688712}, { 0.328206, 0.944606}, ++ { 0.711898, 0.702283}, {-0.691674, 0.72221}, {-0.871274, 0.490797}, ++ { 0.213736, 0.976891}, { 0.255845, 0.966718}, { 0.883381, 0.468656}, {-0.596736, 0.802437}, ++ { 0.779861, 0.625953}, {-0.607233, 0.794524}, {-0.944679, -0.327996}, ++ { 0.851219, -0.52481}, {-0.859337, -0.51141}, { 0.953486, -0.301437}, ++ { 0.512244, -0.85884}, { 0.160393, 0.987053}, { 0.752002, 0.659161}, ++ { 0.999882, 0.0153624}, { 0.778011, -0.628251}, { 0.9293, -0.369326}, ++ {-0.605896, 0.795544}, { 0.633164, 0.774017}, {-0.923382, -0.383883}, ++ {-0.790911, 0.611931}, { 0.673492, 0.739195}, {-0.784902, 0.61962}, ++ { 0.289472, 0.957187}, { 0.605387, -0.795931}, {-0.459844, -0.888}, ++ {-0.990035, 0.140822}, {-0.686367, -0.727256}, {-0.549857, 0.835259}, ++ { 0.90982, 0.415003}, {-0.42105, 0.907037}, {-0.07295, 0.997336}, ++ {-0.24021, 0.970721}, { 0.993154, -0.116813}, {-0.59563, -0.803259}, ++ { 0.526545, -0.850147}, { 0.9987, -0.0509667}, {-0.85017, -0.526508}, ++ {-0.818838, -0.574025}, { 0.982094, 0.188394}, { 0.577634, -0.816296}, ++ {-0.418394, -0.908265}, { 0.62868, -0.777664}, {-0.118173, -0.992993}, ++ { 0.896113, 0.443827}, {-0.159857, -0.98714}, { 0.750036, 0.661397}, ++ { 0.745659, 0.666328}, {-0.938865, -0.344285}, {-0.583143, 0.812369}, ++ { 0.479122, 0.877748}, {-0.869898, -0.493231}, {-0.791797, 0.610784}, { 0.35785, 0.933779}, ++ { 0.25248, -0.967602}, {-0.573942, -0.818896}, {-0.930426, -0.366479}, ++ {-0.378158, 0.925741}, {-0.942114, 0.335293}, { 0.647836, -0.76178}, ++ { 0.814174, 0.580622}, { 0.023769, 0.999717}, { 0.112026, -0.993705}, ++ { 0.659378, -0.751811}, {-0.615064, -0.788477}, {-0.00328067, 0.999995}, ++ { 0.902263, -0.431186}, { 0.201174, -0.979556}, { 0.541589, 0.840644}, ++ {-0.996013, 0.0892081}, { 0.987237, 0.159261}, {-0.692353, 0.721559}, ++ { 0.940855, -0.338809}, { 0.164224, -0.986423}, { 0.0618662, 0.998084}, ++ { 0.784694, 0.619883}, { 0.156281, -0.987713}, {-0.424548, -0.905405}, ++ { 0.927622, 0.37352}, {-0.923711, 0.383089}, { 0.708767, -0.705442}, ++ {-0.941076, 0.338195}, {-0.268226, 0.963356}, { 0.653964, 0.756526}, ++ { 0.983767, -0.179452}, {-0.480029, 0.877253}, {-0.845565, -0.533873}, ++ {-0.768586, -0.639746}, { 0.208936, 0.977929}, { 0.512539, -0.858664}, ++ { 0.988163, -0.153409}, { 0.780816, 0.624761}, {-0.232748, 0.972537}, ++ { 0.988528, -0.151035}, {-0.106602, -0.994302}, {-0.633295, 0.773911}, ++ { 0.322068, -0.946717}, { 0.989632, -0.143627}, { 0.973492, -0.228721}, ++ { 0.998266, 0.0588631}, {-0.145619, 0.989341} ++}; ++ ++#endif /* AVCODEC_AC4DEC_DATA_H */ +Index: FFmpeg/libavcodec/allcodecs.c +=================================================================== +--- libavcodec/allcodecs.c ++++ libavcodec/allcodecs.c +@@ -431,6 +431,7 @@ extern const FFCodec ff_ac3_encoder; + extern const FFCodec ff_ac3_decoder; + extern const FFCodec ff_ac3_fixed_encoder; + extern const FFCodec ff_ac3_fixed_decoder; ++extern const FFCodec ff_ac4_decoder; + extern const FFCodec ff_acelp_kelvin_decoder; + extern const FFCodec ff_alac_encoder; + extern const FFCodec ff_alac_decoder; +Index: FFmpeg/libavcodec/kbdwin.h +=================================================================== +--- libavcodec/kbdwin.h ++++ libavcodec/kbdwin.h +@@ -24,7 +24,7 @@ + /** + * Maximum window size for ff_kbd_window_init. + */ +-#define FF_KBD_WINDOW_MAX 1024 ++#define FF_KBD_WINDOW_MAX 2048 + + /** + * Generate a Kaiser-Bessel Derived Window. +Index: FFmpeg/libavcodec/utils.c +=================================================================== +--- libavcodec/utils.c ++++ libavcodec/utils.c +@@ -598,7 +598,8 @@ static int get_audio_frame_duration(enum + case AV_CODEC_ID_ATRAC3P: return 2048; + case AV_CODEC_ID_MP2: + case AV_CODEC_ID_MUSEPACK7: return 1152; +- case AV_CODEC_ID_AC3: return 1536; ++ case AV_CODEC_ID_AC3: ++ case AV_CODEC_ID_AC4: return 1536; + case AV_CODEC_ID_FTR: return 1024; + } + +Index: FFmpeg/libavformat/isom_tags.c +=================================================================== +--- libavformat/isom_tags.c ++++ libavformat/isom_tags.c +@@ -308,6 +308,7 @@ const AVCodecTag ff_codec_movaudio_tags[ + { AV_CODEC_ID_DTS, MKTAG('d', 't', 's', 'e') }, /* DTS Express */ + { AV_CODEC_ID_DTS, MKTAG('D', 'T', 'S', ' ') }, /* non-standard */ + { AV_CODEC_ID_EAC3, MKTAG('e', 'c', '-', '3') }, /* ETSI TS 102 366 Annex F (only valid in ISOBMFF) */ ++ { AV_CODEC_ID_AC4, MKTAG('a', 'c', '-', '4') }, + { AV_CODEC_ID_DVAUDIO, MKTAG('v', 'd', 'v', 'a') }, + { AV_CODEC_ID_DVAUDIO, MKTAG('d', 'v', 'c', 'a') }, + { AV_CODEC_ID_GSM, MKTAG('a', 'g', 's', 'm') }, diff --git a/cross/ffmpeg7/patches/1062-jellyfin-0062-tune-the-default-inter-thread-queue-sizes.patch b/cross/ffmpeg7/patches/1062-jellyfin-0062-tune-the-default-inter-thread-queue-sizes.patch new file mode 100644 index 00000000000..92b25f89625 --- /dev/null +++ b/cross/ffmpeg7/patches/1062-jellyfin-0062-tune-the-default-inter-thread-queue-sizes.patch @@ -0,0 +1,29 @@ +Index: FFmpeg/fftools/ffmpeg_sched.h +=================================================================== +--- fftools/ffmpeg_sched.h ++++ fftools/ffmpeg_sched.h +@@ -243,7 +243,10 @@ int sch_add_mux(Scheduler *sch, SchThrea + /** + * Default size of a frame thread queue. + */ +-#define DEFAULT_FRAME_THREAD_QUEUE_SIZE 8 ++#define DEFAULT_FRAME_THREAD_QUEUE_SIZE 1 ++ ++// The new default value of 8 does little to help hwaccel, but instead increases ++// extra_hw_frames, which causes video memory on dGPU to be exhausted more easily. + + /** + * Add a muxed stream for a previously added muxer. +Index: FFmpeg/tests/ref/fate/ffmpeg-fix_sub_duration_heartbeat +=================================================================== +--- tests/ref/fate/ffmpeg-fix_sub_duration_heartbeat ++++ tests/ref/fate/ffmpeg-fix_sub_duration_heartbeat +@@ -33,3 +33,8 @@ + {\an7}( inaudible radio chatter ) + >> Safety remains our numb + ++9 ++00:00:03,704 --> 00:00:04,004 ++{\an7}( inaudible radio chatter ) ++>> Safety remains our number one ++ diff --git a/cross/ffmpeg7/patches/1063-jellyfin-0063-add-vendor-id-option-to-vaapi-hwcontext.patch b/cross/ffmpeg7/patches/1063-jellyfin-0063-add-vendor-id-option-to-vaapi-hwcontext.patch new file mode 100644 index 00000000000..86fc58b1cfb --- /dev/null +++ b/cross/ffmpeg7/patches/1063-jellyfin-0063-add-vendor-id-option-to-vaapi-hwcontext.patch @@ -0,0 +1,78 @@ +Index: FFmpeg/libavcodec/qsv.c +=================================================================== +--- libavcodec/qsv.c ++++ libavcodec/qsv.c +@@ -473,8 +473,8 @@ static int ff_qsv_set_display_handle(AVC + AVVAAPIDeviceContext *hwctx; + int ret; + +- av_dict_set(&child_device_opts, "kernel_driver", "i915", 0); +- av_dict_set(&child_device_opts, "driver", "iHD", 0); ++ av_dict_set(&child_device_opts, "vendor_id", "0x8086", 0); ++ av_dict_set(&child_device_opts, "driver", "iHD", 0); + + ret = av_hwdevice_ctx_create(&qs->va_device_ref, AV_HWDEVICE_TYPE_VAAPI, NULL, child_device_opts, 0); + av_dict_free(&child_device_opts); +Index: FFmpeg/libavutil/hwcontext_qsv.c +=================================================================== +--- libavutil/hwcontext_qsv.c ++++ libavutil/hwcontext_qsv.c +@@ -2644,8 +2644,8 @@ static int qsv_device_create(AVHWDeviceC + // used on recent Intel hardware. Set options to the VAAPI device + // creation so that we should pick a usable setup by default if + // possible, even when multiple devices and drivers are available. +- av_dict_set(&child_device_opts, "kernel_driver", "i915", 0); +- av_dict_set(&child_device_opts, "driver", "iHD", 0); ++ av_dict_set(&child_device_opts, "vendor_id", "0x8086", 0); ++ av_dict_set(&child_device_opts, "driver", "iHD", 0); + } + break; + #endif +Index: FFmpeg/libavutil/hwcontext_vaapi.c +=================================================================== +--- libavutil/hwcontext_vaapi.c ++++ libavutil/hwcontext_vaapi.c +@@ -1763,7 +1763,9 @@ static int vaapi_device_create(AVHWDevic + #if CONFIG_LIBDRM + drmVersion *info; + const AVDictionaryEntry *kernel_driver; ++ const AVDictionaryEntry *vendor_id; + kernel_driver = av_dict_get(opts, "kernel_driver", NULL, 0); ++ vendor_id = av_dict_get(opts, "vendor_id", NULL, 0); + #endif + for (n = 0; n < max_devices; n++) { + snprintf(path, sizeof(path), +@@ -1818,6 +1820,33 @@ static int vaapi_device_create(AVHWDevic + close(priv->drm_fd); + priv->drm_fd = -1; + continue; ++ } else if (vendor_id) { ++ drmDevicePtr device; ++ char drm_vendor[8]; ++ if (drmGetDevice(priv->drm_fd, &device)) { ++ av_log(ctx, AV_LOG_VERBOSE, ++ "Failed to get DRM device info for device %d.\n", n); ++ close(priv->drm_fd); ++ priv->drm_fd = -1; ++ continue; ++ } ++ ++ snprintf(drm_vendor, sizeof(drm_vendor), "0x%x", device->deviceinfo.pci->vendor_id); ++ if (strcmp(vendor_id->value, drm_vendor)) { ++ av_log(ctx, AV_LOG_VERBOSE, "Ignoring device %d " ++ "with non-matching vendor id (%s).\n", ++ n, vendor_id->value); ++ drmFreeDevice(&device); ++ close(priv->drm_fd); ++ priv->drm_fd = -1; ++ continue; ++ } ++ av_log(ctx, AV_LOG_VERBOSE, "Trying to use " ++ "DRM render node for device %d, " ++ "with matching vendor id (%s).\n", ++ n, vendor_id->value); ++ drmFreeDevice(&device); ++ break; + } + drmFreeVersion(info); + #endif diff --git a/cross/ffmpeg7/patches/1064-jellyfin-0064-backport-fixes-for-vulkan-from-upstream.patch b/cross/ffmpeg7/patches/1064-jellyfin-0064-backport-fixes-for-vulkan-from-upstream.patch new file mode 100644 index 00000000000..b6d8bbcb522 --- /dev/null +++ b/cross/ffmpeg7/patches/1064-jellyfin-0064-backport-fixes-for-vulkan-from-upstream.patch @@ -0,0 +1,2615 @@ +Index: FFmpeg/libavcodec/vulkan_decode.c +=================================================================== +--- libavcodec/vulkan_decode.c ++++ libavcodec/vulkan_decode.c +@@ -87,7 +87,7 @@ int ff_vk_update_thread_context(AVCodecC + + const VkVideoProfileInfoKHR *profile = get_video_profile(ctx, dst->codec_id); + if (!profile) { +- av_log(dst, AV_LOG_ERROR, "Video profile missing from frames context!"); ++ av_log(dst, AV_LOG_ERROR, "Video profile missing from frames context!\n"); + return AVERROR(EINVAL); + } + +@@ -259,7 +259,7 @@ int ff_vk_decode_add_slice(AVCodecContex + const int nb = *nb_slices; + uint8_t *slices; + uint32_t *slice_off; +- FFVkVideoBuffer *vkbuf; ++ FFVkBuffer *vkbuf; + + size_t new_size = vp->slices_size + startcode_len + size + + ctx->caps.minBitstreamBufferSizeAlignment; +@@ -273,29 +273,38 @@ int ff_vk_decode_add_slice(AVCodecContex + *offsets = dec->slice_off = slice_off; + slice_off[nb] = vp->slices_size; + +- vkbuf = vp->slices_buf ? (FFVkVideoBuffer *)vp->slices_buf->data : NULL; +- if (!vkbuf || vkbuf->buf.size < new_size) { ++ vkbuf = vp->slices_buf ? (FFVkBuffer *)vp->slices_buf->data : NULL; ++ if (!vkbuf || vkbuf->size < new_size) { + int err; + AVBufferRef *new_ref; +- FFVkVideoBuffer *new_buf; +- err = ff_vk_video_get_buffer(&ctx->s, &ctx->common, &new_ref, +- VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR, +- ctx->s.hwfc->create_pnext, new_size); ++ FFVkBuffer *new_buf; ++ ++ /* No point in requesting anything smaller. */ ++ size_t buf_size = FFMAX(new_size, 1024*1024); ++ ++ /* Align buffer to nearest power of two. Makes fragmentation management ++ * easier, and gives us ample headroom. */ ++ buf_size = 2 << av_log2(buf_size); ++ ++ err = ff_vk_get_pooled_buffer(&ctx->s, &ctx->buf_pool, &new_ref, ++ VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR, ++ ctx->s.hwfc->create_pnext, buf_size, ++ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + if (err < 0) + return err; + +- new_buf = (FFVkVideoBuffer *)new_ref->data; ++ new_buf = (FFVkBuffer *)new_ref->data; + + /* Copy data from the old buffer */ + if (vkbuf) { +- memcpy(new_buf->mem, vkbuf->mem, vp->slices_size); ++ memcpy(new_buf->mapped_mem, vkbuf->mapped_mem, vp->slices_size); + av_buffer_unref(&vp->slices_buf); + } + + vp->slices_buf = new_ref; + vkbuf = new_buf; + } +- slices = vkbuf->mem; ++ slices = vkbuf->mapped_mem; + + /* Startcode */ + memcpy(slices + vp->slices_size, startcode_prefix, startcode_len); +@@ -346,7 +355,7 @@ int ff_vk_decode_frame(AVCodecContext *a + int err; + VkResult ret; + VkCommandBuffer cmd_buf; +- FFVkVideoBuffer *sd_buf; ++ FFVkBuffer *sd_buf; + + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; +@@ -399,13 +408,13 @@ int ff_vk_decode_frame(AVCodecContext *a + "Result of previous frame decoding: %"PRId64"\n", prev_sub_res); + } + +- sd_buf = (FFVkVideoBuffer *)vp->slices_buf->data; ++ sd_buf = (FFVkBuffer *)vp->slices_buf->data; + + /* Flush if needed */ +- if (!(sd_buf->buf.flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { ++ if (!(sd_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + VkMappedMemoryRange flush_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, +- .memory = sd_buf->buf.mem, ++ .memory = sd_buf->mem, + .offset = 0, + .size = FFALIGN(vp->slices_size, + ctx->s.props.properties.limits.nonCoherentAtomSize), +@@ -419,7 +428,7 @@ int ff_vk_decode_frame(AVCodecContext *a + } + } + +- vp->decode_info.srcBuffer = sd_buf->buf.buf; ++ vp->decode_info.srcBuffer = sd_buf->buf; + vp->decode_info.srcBufferOffset = 0; + vp->decode_info.srcBufferRange = data_size; + +@@ -620,6 +629,8 @@ static void free_common(FFRefStructOpaqu + ctx->empty_session_params, + s->hwctx->alloc); + ++ av_buffer_pool_uninit(&ctx->buf_pool); ++ + ff_vk_video_common_uninit(s, &ctx->common); + + if (ctx->yuv_sampler) +@@ -911,9 +922,9 @@ static int vulkan_decode_get_profile(AVC + return AVERROR_EXTERNAL; + } + +- /* TODO: make dedicated_dpb tunable */ + dec->dedicated_dpb = !(dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR); +- dec->layered_dpb = !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR); ++ dec->layered_dpb = !dec->dedicated_dpb ? 0 : ++ !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR); + + if (dec->dedicated_dpb) { + fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; +@@ -922,6 +933,10 @@ static int vulkan_decode_get_profile(AVC + VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT; ++ ++ if (ctx->s.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE | ++ FF_VK_EXT_VIDEO_MAINTENANCE_1)) ++ fmt_info.imageUsage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR; + } + + /* Get the format of the images necessary */ +@@ -1011,6 +1026,7 @@ int ff_vk_frame_params(AVCodecContext *a + AVVulkanFramesContext *hwfc = frames_ctx->hwctx; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeProfileData *prof; ++ FFVulkanDecodeShared *ctx; + + frames_ctx->sw_format = AV_PIX_FMT_NONE; + +@@ -1047,6 +1063,11 @@ int ff_vk_frame_params(AVCodecContext *a + if (!dec->dedicated_dpb) + hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; + ++ ctx = dec->shared_ctx; ++ if (ctx->s.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE | ++ FF_VK_EXT_VIDEO_MAINTENANCE_1)) ++ hwfc->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR; ++ + return err; + } + +@@ -1106,7 +1127,7 @@ int ff_vk_decode_uninit(AVCodecContext * + + int ff_vk_decode_init(AVCodecContext *avctx) + { +- int err, qf, cxpos = 0, cypos = 0, nb_q = 0; ++ int err, cxpos = 0, cypos = 0, nb_q = 0; + VkResult ret; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx; +@@ -1171,22 +1192,22 @@ int ff_vk_decode_init(AVCodecContext *av + goto fail; + + /* Create queue context */ +- qf = ff_vk_qf_init(s, &ctx->qf, VK_QUEUE_VIDEO_DECODE_BIT_KHR); +- + vk_desc = get_codecdesc(avctx->codec_id); +- /* Check for support */ +- if (!(s->video_props[qf].videoCodecOperations & vk_desc->decode_op)) { +- av_log(avctx, AV_LOG_ERROR, "Decoding %s not supported on the given " +- "queue family %i!\n", avcodec_get_name(avctx->codec_id), qf); +- return AVERROR(EINVAL); ++ err = ff_vk_video_qf_init(s, &ctx->qf, ++ VK_QUEUE_VIDEO_DECODE_BIT_KHR, ++ vk_desc->decode_op); ++ if (err < 0) { ++ av_log(avctx, AV_LOG_ERROR, "Decoding of %s is not supported by this device\n", ++ avcodec_get_name(avctx->codec_id)); ++ return err; + } + + /* Enable queries if supported */ +- if (s->query_props[qf].queryResultStatusSupport) ++ if (s->query_props[ctx->qf.queue_family].queryResultStatusSupport) + nb_q = 1; + + session_create.flags = 0x0; +- session_create.queueFamilyIndex = s->hwctx->queue_family_decode_index; ++ session_create.queueFamilyIndex = ctx->qf.queue_family; + session_create.maxCodedExtent = ctx->caps.maxCodedExtent; + session_create.maxDpbSlots = ctx->caps.maxDpbSlots; + session_create.maxActiveReferencePictures = ctx->caps.maxActiveReferencePictures; +Index: FFmpeg/libavcodec/vulkan_decode.h +=================================================================== +--- libavcodec/vulkan_decode.h ++++ libavcodec/vulkan_decode.h +@@ -48,6 +48,8 @@ typedef struct FFVulkanDecodeShared { + FFVkVideoCommon common; + FFVkQueueFamilyCtx qf; + ++ AVBufferPool *buf_pool; ++ + VkVideoCapabilitiesKHR caps; + VkVideoDecodeCapabilitiesKHR dec_caps; + +Index: FFmpeg/libavcodec/vulkan_video.c +=================================================================== +--- libavcodec/vulkan_video.c ++++ libavcodec/vulkan_video.c +@@ -176,84 +176,18 @@ int ff_vk_h265_level_to_av(StdVideoH265L + } + } + +-static void free_data_buf(void *opaque, uint8_t *data) ++int ff_vk_video_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, ++ VkQueueFlagBits family, VkVideoCodecOperationFlagBitsKHR caps) + { +- FFVulkanContext *ctx = opaque; +- FFVkVideoBuffer *buf = (FFVkVideoBuffer *)data; +- ff_vk_unmap_buffer(ctx, &buf->buf, 0); +- ff_vk_free_buf(ctx, &buf->buf); +- av_free(data); +-} +- +-static AVBufferRef *alloc_data_buf(void *opaque, size_t size) +-{ +- AVBufferRef *ref; +- uint8_t *buf = av_mallocz(size); +- if (!buf) +- return NULL; +- +- ref = av_buffer_create(buf, size, free_data_buf, opaque, 0); +- if (!ref) +- av_free(buf); +- return ref; +-} +- +-int ff_vk_video_get_buffer(FFVulkanContext *ctx, FFVkVideoCommon *s, +- AVBufferRef **buf, VkBufferUsageFlags usage, +- void *create_pNext, size_t size) +-{ +- int err; +- AVBufferRef *ref; +- FFVkVideoBuffer *data; +- +- if (!s->buf_pool) { +- s->buf_pool = av_buffer_pool_init2(sizeof(FFVkVideoBuffer), ctx, +- alloc_data_buf, NULL); +- if (!s->buf_pool) +- return AVERROR(ENOMEM); ++ for (int i = 0; i < s->hwctx->nb_qf; i++) { ++ if ((s->hwctx->qf[i].flags & family) && ++ (s->hwctx->qf[i].video_caps & caps)) { ++ qf->queue_family = s->hwctx->qf[i].idx; ++ qf->nb_queues = s->hwctx->qf[i].num; ++ return 0; ++ } + } +- +- *buf = ref = av_buffer_pool_get(s->buf_pool); +- if (!ref) +- return AVERROR(ENOMEM); +- +- data = (FFVkVideoBuffer *)ref->data; +- +- if (data->buf.size >= size) +- return 0; +- +- /* No point in requesting anything smaller. */ +- size = FFMAX(size, 1024*1024); +- +- /* Align buffer to nearest power of two. Makes fragmentation management +- * easier, and gives us ample headroom. */ +- size--; +- size |= size >> 1; +- size |= size >> 2; +- size |= size >> 4; +- size |= size >> 8; +- size |= size >> 16; +- size++; +- +- ff_vk_free_buf(ctx, &data->buf); +- memset(data, 0, sizeof(FFVkVideoBuffer)); +- +- err = ff_vk_create_buf(ctx, &data->buf, size, +- create_pNext, NULL, usage, +- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); +- if (err < 0) { +- av_buffer_unref(&ref); +- return err; +- } +- +- /* Map the buffer */ +- err = ff_vk_map_buffer(ctx, &data->buf, &data->mem, 0); +- if (err < 0) { +- av_buffer_unref(&ref); +- return err; +- } +- +- return 0; ++ return AVERROR(ENOTSUP); + } + + av_cold void ff_vk_video_common_uninit(FFVulkanContext *s, +@@ -272,8 +206,6 @@ av_cold void ff_vk_video_common_uninit(F + vk->FreeMemory(s->hwctx->act_dev, common->mem[i], s->hwctx->alloc); + + av_freep(&common->mem); +- +- av_buffer_pool_uninit(&common->buf_pool); + } + + av_cold int ff_vk_video_common_init(void *log, FFVulkanContext *s, +Index: FFmpeg/libavcodec/vulkan_video.h +=================================================================== +--- libavcodec/vulkan_video.h ++++ libavcodec/vulkan_video.h +@@ -32,8 +32,6 @@ typedef struct FFVkVideoSession { + VkVideoSessionKHR session; + VkDeviceMemory *mem; + uint32_t nb_mem; +- +- AVBufferPool *buf_pool; + } FFVkVideoCommon; + + /** +@@ -56,6 +54,11 @@ VkVideoChromaSubsamplingFlagBitsKHR ff_v + */ + VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth); + ++/** ++ * Chooses a QF and loads it into a context. ++ */ ++int ff_vk_video_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, ++ VkQueueFlagBits family, VkVideoCodecOperationFlagBitsKHR caps); + + /** + * Convert level from Vulkan to AV. +@@ -63,19 +66,6 @@ VkVideoComponentBitDepthFlagBitsKHR ff_v + int ff_vk_h264_level_to_av(StdVideoH264LevelIdc level); + int ff_vk_h265_level_to_av(StdVideoH265LevelIdc level); + +-typedef struct FFVkVideoBuffer { +- FFVkBuffer buf; +- uint8_t *mem; +-} FFVkVideoBuffer; +- +-/** +- * Get a mapped FFVkPooledBuffer with a specific guaranteed minimum size +- * from a pool. +- */ +-int ff_vk_video_get_buffer(FFVulkanContext *ctx, FFVkVideoCommon *s, +- AVBufferRef **buf, VkBufferUsageFlags usage, +- void *create_pNext, size_t size); +- + /** + * Initialize video session, allocating and binding necessary memory. + */ +Index: FFmpeg/libavfilter/vulkan_filter.c +=================================================================== +--- libavfilter/vulkan_filter.c ++++ libavfilter/vulkan_filter.c +@@ -36,6 +36,7 @@ int ff_vk_filter_init_context(AVFilterCo + if (frames_ref) { + int no_storage = 0; + FFVulkanFunctions *vk; ++ VkImageUsageFlagBits usage_req; + const VkFormat *sub = av_vkfmt_from_pixfmt(sw_format); + + frames_ctx = (AVHWFramesContext *)frames_ref->data; +@@ -52,22 +53,44 @@ int ff_vk_filter_init_context(AVFilterCo + if (sw_format != frames_ctx->sw_format) + goto skip; + +- /* Unusual tiling mismatch. Don't let linear through either. */ +- if (vk_frames->tiling != VK_IMAGE_TILING_OPTIMAL) +- goto skip; +- +- /* Usage mismatch */ +- if ((vk_frames->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT)) != +- (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT)) ++ /* Don't let linear through. */ ++ if (vk_frames->tiling == VK_IMAGE_TILING_LINEAR) + goto skip; + + s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions, + vk_dev->nb_enabled_dev_extensions); ++ ++ /* More advanced format checks */ + err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1); + if (err < 0) + return err; + vk = &s->vkfn; + ++ /* Usage mismatch */ ++ usage_req = VK_IMAGE_USAGE_SAMPLED_BIT; ++ ++ /* If format supports hardware encoding, make sure ++ * the context includes it. */ ++ if (vk_frames->format[1] == VK_FORMAT_UNDEFINED && ++ (s->extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE | ++ FF_VK_EXT_VIDEO_MAINTENANCE_1))) { ++ VkFormatProperties3 fprops = { ++ .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3, ++ }; ++ VkFormatProperties2 prop = { ++ .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, ++ .pNext = &fprops, ++ }; ++ vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev, ++ vk_frames->format[0], ++ &prop); ++ if (fprops.optimalTilingFeatures & VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR) ++ usage_req |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR; ++ } ++ ++ if ((vk_frames->usage & usage_req) != usage_req) ++ goto skip; ++ + /* Check if the subformats can do storage */ + for (int i = 0; sub[i] != VK_FORMAT_UNDEFINED; i++) { + VkFormatProperties2 prop = { +@@ -75,14 +98,8 @@ int ff_vk_filter_init_context(AVFilterCo + }; + vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev, sub[i], + &prop); +- +- if (vk_frames->tiling == VK_IMAGE_TILING_LINEAR) { +- no_storage |= !(prop.formatProperties.linearTilingFeatures & +- VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); +- } else { +- no_storage |= !(prop.formatProperties.optimalTilingFeatures & +- VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); +- } ++ no_storage |= !(prop.formatProperties.optimalTilingFeatures & ++ VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); + } + + /* Check if it's usable */ +@@ -112,13 +129,6 @@ skip: + frames_ctx->width = width; + frames_ctx->height = height; + +- vk_frames = frames_ctx->hwctx; +- vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; +- vk_frames->usage = VK_IMAGE_USAGE_SAMPLED_BIT | +- VK_IMAGE_USAGE_STORAGE_BIT | +- VK_IMAGE_USAGE_TRANSFER_SRC_BIT | +- VK_IMAGE_USAGE_TRANSFER_DST_BIT; +- + err = av_hwframe_ctx_init(frames_ref); + if (err < 0) { + av_buffer_unref(&frames_ref); +Index: FFmpeg/libavfilter/vulkan_shaderc.c +=================================================================== +--- libavfilter/vulkan_shaderc.c ++++ libavfilter/vulkan_shaderc.c +@@ -51,6 +51,7 @@ static int shdc_shader_compile(FFVkSPIRV + shaderc_compile_options_set_target_env(opts, shaderc_target_env_vulkan, + shaderc_env_version_vulkan_1_2); + shaderc_compile_options_set_target_spirv(opts, shaderc_spirv_version_1_5); ++ shaderc_compile_options_set_generate_debug_info(opts); + shaderc_compile_options_set_optimization_level(opts, + shaderc_optimization_level_performance); + +@@ -65,6 +66,9 @@ static int shdc_shader_compile(FFVkSPIRV + warn = shaderc_result_get_num_warnings(res); + message = shaderc_result_get_error_message(res); + ++ if (ret != shaderc_compilation_status_success && !err) ++ err = 1; ++ + loglevel = err ? AV_LOG_ERROR : warn ? AV_LOG_WARNING : AV_LOG_VERBOSE; + + ff_vk_shader_print(avctx, shd, loglevel); +Index: FFmpeg/libavutil/hwcontext_vulkan.c +=================================================================== +--- libavutil/hwcontext_vulkan.c ++++ libavutil/hwcontext_vulkan.c +@@ -67,19 +67,6 @@ + #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x) + #endif + +-typedef struct VulkanQueueCtx { +- VkFence fence; +- VkQueue queue; +- int was_synchronous; +- int qf; +- int qidx; +- +- /* Buffer dependencies */ +- AVBufferRef **buf_deps; +- int nb_buf_deps; +- unsigned int buf_deps_alloc_size; +-} VulkanQueueCtx; +- + typedef struct VulkanDevicePriv { + /** + * The public AVVulkanDeviceContext. See hwcontext_vulkan.h for it. +@@ -105,6 +92,9 @@ typedef struct VulkanDevicePriv { + VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features; + VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_features; + VkPhysicalDeviceCooperativeMatrixFeaturesKHR coop_matrix_features; ++ VkPhysicalDeviceOpticalFlowFeaturesNV optical_flow_features; ++ VkPhysicalDeviceShaderObjectFeaturesEXT shader_object_features; ++ VkPhysicalDeviceVideoMaintenance1FeaturesKHR video_maint_1_features; + + /* Queues */ + pthread_mutex_t **qf_mutex; +@@ -141,6 +131,9 @@ typedef struct VulkanFramesPriv { + FFVkExecPool upload_exec; + FFVkExecPool download_exec; + ++ /* Temporary buffer pools */ ++ AVBufferPool *tmp; ++ + /* Modifier info list to free at uninit */ + VkImageDrmFormatModifierListCreateInfoEXT *modifier_info; + } VulkanFramesPriv; +@@ -424,11 +417,13 @@ static const VulkanOptExtension optional + /* Misc or required by other extensions */ + { VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, + { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, +- { VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, + { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, }, + { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM }, + { VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT }, + { VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX }, ++ { VK_NV_OPTICAL_FLOW_EXTENSION_NAME, FF_VK_EXT_OPTICAL_FLOW }, ++ { VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT }, ++ { VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_1 }, + + /* Imports/exports */ + { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY }, +@@ -443,8 +438,11 @@ static const VulkanOptExtension optional + + /* Video encoding/decoding */ + { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE }, ++ { VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_QUEUE }, + { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE }, ++ { VK_KHR_VIDEO_ENCODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H264 }, + { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 }, ++ { VK_KHR_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 }, + { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, + { VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_AV1 }, + }; +@@ -457,6 +455,14 @@ static VkBool32 VKAPI_CALL vk_dbg_callba + int l; + AVHWDeviceContext *ctx = priv; + ++ /* Ignore false positives */ ++ switch (data->messageIdNumber) { ++ case 0x30f4ac70: /* VUID-VkImageCreateInfo-pNext-06811 */ ++ return VK_FALSE; ++ default: ++ break; ++ } ++ + switch (severity) { + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break; +@@ -469,7 +475,7 @@ static VkBool32 VKAPI_CALL vk_dbg_callba + for (int i = 0; i < data->cmdBufLabelCount; i++) + av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName); + +- return 0; ++ return VK_FALSE; + } + + #define ADD_VAL_TO_LIST(list, count, val) \ +@@ -493,8 +499,19 @@ static VkBool32 VKAPI_CALL vk_dbg_callba + av_free((void *)props); \ + } + ++enum FFVulkanDebugMode { ++ FF_VULKAN_DEBUG_NONE = 0, ++ /* Standard GPU-assisted validation */ ++ FF_VULKAN_DEBUG_VALIDATE = 1, ++ /* Passes printfs in shaders to the debug callback */ ++ FF_VULKAN_DEBUG_PRINTF = 2, ++ /* Enables extra printouts */ ++ FF_VULKAN_DEBUG_PRACTICES = 3, ++}; ++ + static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, +- const char * const **dst, uint32_t *num, int debug) ++ const char * const **dst, uint32_t *num, ++ enum FFVulkanDebugMode debug_mode) + { + const char *tstr; + const char **extension_names = NULL; +@@ -566,7 +583,10 @@ static int check_extensions(AVHWDeviceCo + ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); + } + +- if (debug && !dev) { ++ if (!dev && ++ ((debug_mode == FF_VULKAN_DEBUG_VALIDATE) || ++ (debug_mode == FF_VULKAN_DEBUG_PRINTF) || ++ (debug_mode == FF_VULKAN_DEBUG_PRACTICES))) { + tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME; + found = 0; + for (int j = 0; j < sup_ext_count; j++) { +@@ -622,20 +642,21 @@ fail: + return err; + } + +-static int check_validation_layers(AVHWDeviceContext *ctx, AVDictionary *opts, +- const char * const **dst, uint32_t *num, +- int *debug_mode) ++static int check_layers(AVHWDeviceContext *ctx, AVDictionary *opts, ++ const char * const **dst, uint32_t *num, ++ enum FFVulkanDebugMode *debug_mode) + { +- static const char default_layer[] = { "VK_LAYER_KHRONOS_validation" }; +- +- int found = 0, err = 0; ++ int err = 0; + VulkanDevicePriv *priv = ctx->hwctx; + FFVulkanFunctions *vk = &priv->vkctx.vkfn; + ++ static const char layer_standard_validation[] = { "VK_LAYER_KHRONOS_validation" }; ++ int layer_standard_validation_found = 0; ++ + uint32_t sup_layer_count; + VkLayerProperties *sup_layers; + +- AVDictionaryEntry *user_layers; ++ AVDictionaryEntry *user_layers = av_dict_get(opts, "layers", NULL, 0); + char *user_layers_str = NULL; + char *save, *token; + +@@ -643,99 +664,136 @@ static int check_validation_layers(AVHWD + uint32_t enabled_layers_count = 0; + + AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0); +- int debug = debug_opt && strtol(debug_opt->value, NULL, 10); ++ enum FFVulkanDebugMode mode; + +- /* If `debug=0`, enable no layers at all. */ +- if (debug_opt && !debug) +- return 0; ++ *debug_mode = mode = FF_VULKAN_DEBUG_NONE; + ++ /* Get a list of all layers */ + vk->EnumerateInstanceLayerProperties(&sup_layer_count, NULL); + sup_layers = av_malloc_array(sup_layer_count, sizeof(VkLayerProperties)); + if (!sup_layers) + return AVERROR(ENOMEM); + vk->EnumerateInstanceLayerProperties(&sup_layer_count, sup_layers); + +- av_log(ctx, AV_LOG_VERBOSE, "Supported validation layers:\n"); ++ av_log(ctx, AV_LOG_VERBOSE, "Supported layers:\n"); + for (int i = 0; i < sup_layer_count; i++) + av_log(ctx, AV_LOG_VERBOSE, "\t%s\n", sup_layers[i].layerName); + +- /* If `debug=1` is specified, enable the standard validation layer extension */ +- if (debug) { +- *debug_mode = debug; ++ /* If no user layers or debug layers are given, return */ ++ if (!debug_opt && !user_layers) ++ goto end; ++ ++ /* Check for any properly supported validation layer */ ++ if (debug_opt) { ++ if (!strcmp(debug_opt->value, "printf")) { ++ mode = FF_VULKAN_DEBUG_PRINTF; ++ } else if (!strcmp(debug_opt->value, "validate")) { ++ mode = FF_VULKAN_DEBUG_VALIDATE; ++ } else if (!strcmp(debug_opt->value, "practices")) { ++ mode = FF_VULKAN_DEBUG_PRACTICES; ++ } else { ++ char *end_ptr = NULL; ++ int idx = strtol(debug_opt->value, &end_ptr, 10); ++ if (end_ptr == debug_opt->value || end_ptr[0] != '\0' || ++ idx < 0 || idx > FF_VULKAN_DEBUG_PRACTICES) { ++ av_log(ctx, AV_LOG_ERROR, "Invalid debugging mode \"%s\"\n", ++ debug_opt->value); ++ err = AVERROR(EINVAL); ++ goto end; ++ } ++ mode = idx; ++ } ++ } ++ ++ /* If mode is VALIDATE or PRINTF, try to find the standard validation layer extension */ ++ if ((mode == FF_VULKAN_DEBUG_VALIDATE) || ++ (mode == FF_VULKAN_DEBUG_PRINTF) || ++ (mode == FF_VULKAN_DEBUG_PRACTICES)) { + for (int i = 0; i < sup_layer_count; i++) { +- if (!strcmp(default_layer, sup_layers[i].layerName)) { +- found = 1; +- av_log(ctx, AV_LOG_VERBOSE, "Default validation layer %s is enabled\n", +- default_layer); +- ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, default_layer); ++ if (!strcmp(layer_standard_validation, sup_layers[i].layerName)) { ++ av_log(ctx, AV_LOG_VERBOSE, "Standard validation layer %s is enabled\n", ++ layer_standard_validation); ++ ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, layer_standard_validation); ++ *debug_mode = mode; ++ layer_standard_validation_found = 1; + break; + } + } ++ if (!layer_standard_validation_found) { ++ av_log(ctx, AV_LOG_ERROR, ++ "Validation Layer \"%s\" not supported\n", layer_standard_validation); ++ err = AVERROR(ENOTSUP); ++ goto end; ++ } + } + +- user_layers = av_dict_get(opts, "validation_layers", NULL, 0); +- if (!user_layers) +- goto end; ++ /* Process any custom layers enabled */ ++ if (user_layers) { ++ int found; + +- user_layers_str = av_strdup(user_layers->value); +- if (!user_layers_str) { +- err = AVERROR(ENOMEM); +- goto fail; +- } ++ user_layers_str = av_strdup(user_layers->value); ++ if (!user_layers_str) { ++ err = AVERROR(ENOMEM); ++ goto fail; ++ } + +- token = av_strtok(user_layers_str, "+", &save); +- while (token) { +- found = 0; +- if (!strcmp(default_layer, token)) { +- if (debug) { +- /* if the `debug=1`, default_layer is enabled, skip here */ ++ token = av_strtok(user_layers_str, "+", &save); ++ while (token) { ++ found = 0; ++ ++ /* If debug=1/2 was specified as an option, skip this layer */ ++ if (!strcmp(layer_standard_validation, token) && layer_standard_validation_found) { + token = av_strtok(NULL, "+", &save); +- continue; +- } else { +- /* if the `debug=0`, enable debug mode to load its callback properly */ +- *debug_mode = debug; +- } +- } +- for (int j = 0; j < sup_layer_count; j++) { +- if (!strcmp(token, sup_layers[j].layerName)) { +- found = 1; + break; + } +- } +- if (found) { +- av_log(ctx, AV_LOG_VERBOSE, "Requested Validation Layer: %s\n", token); +- ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, token); +- } else { +- av_log(ctx, AV_LOG_ERROR, +- "Validation Layer \"%s\" not support.\n", token); +- err = AVERROR(EINVAL); +- goto fail; +- } +- token = av_strtok(NULL, "+", &save); +- } + +- av_free(user_layers_str); ++ /* Try to find the layer in the list of supported layers */ ++ for (int j = 0; j < sup_layer_count; j++) { ++ if (!strcmp(token, sup_layers[j].layerName)) { ++ found = 1; ++ break; ++ } ++ } + +-end: +- av_free(sup_layers); ++ if (found) { ++ av_log(ctx, AV_LOG_VERBOSE, "Using layer: %s\n", token); ++ ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, token); + +- *dst = enabled_layers; +- *num = enabled_layers_count; ++ /* If debug was not set as an option, force it */ ++ if (!strcmp(layer_standard_validation, token)) ++ *debug_mode = FF_VULKAN_DEBUG_VALIDATE; ++ } else { ++ av_log(ctx, AV_LOG_ERROR, ++ "Layer \"%s\" not supported\n", token); ++ err = AVERROR(EINVAL); ++ goto end; ++ } + +- return 0; ++ token = av_strtok(NULL, "+", &save); ++ } ++ } + + fail: +- RELEASE_PROPS(enabled_layers, enabled_layers_count); ++end: + av_free(sup_layers); + av_free(user_layers_str); ++ ++ if (err < 0) { ++ RELEASE_PROPS(enabled_layers, enabled_layers_count); ++ } else { ++ *dst = enabled_layers; ++ *num = enabled_layers_count; ++ } ++ + return err; + } + + /* Creates a VkInstance */ + static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) + { +- int err = 0, debug_mode = 0; ++ int err = 0; + VkResult ret; ++ enum FFVulkanDebugMode debug_mode; + VulkanDevicePriv *p = ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; +@@ -771,8 +829,8 @@ static int create_instance(AVHWDeviceCon + return err; + } + +- err = check_validation_layers(ctx, opts, &inst_props.ppEnabledLayerNames, +- &inst_props.enabledLayerCount, &debug_mode); ++ err = check_layers(ctx, opts, &inst_props.ppEnabledLayerNames, ++ &inst_props.enabledLayerCount, &debug_mode); + if (err) + goto fail; + +@@ -784,14 +842,32 @@ static int create_instance(AVHWDeviceCon + if (err < 0) + goto fail; + +- if (debug_mode) { +- VkValidationFeatureEnableEXT feat_list[] = { ++ /* Enable debug features if needed */ ++ if (debug_mode == FF_VULKAN_DEBUG_VALIDATE) { ++ static const VkValidationFeatureEnableEXT feat_list_validate[] = { ++ VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT, ++ VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT, + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT, ++ }; ++ validation_features.pEnabledValidationFeatures = feat_list_validate; ++ validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_validate); ++ inst_props.pNext = &validation_features; ++ } else if (debug_mode == FF_VULKAN_DEBUG_PRINTF) { ++ static const VkValidationFeatureEnableEXT feat_list_debug[] = { ++ VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT, + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT, ++ VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT, ++ }; ++ validation_features.pEnabledValidationFeatures = feat_list_debug; ++ validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_debug); ++ inst_props.pNext = &validation_features; ++ } else if (debug_mode == FF_VULKAN_DEBUG_PRACTICES) { ++ static const VkValidationFeatureEnableEXT feat_list_practices[] = { + VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT, ++ VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT, + }; +- validation_features.pEnabledValidationFeatures = feat_list; +- validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list); ++ validation_features.pEnabledValidationFeatures = feat_list_practices; ++ validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_practices); + inst_props.pNext = &validation_features; + } + +@@ -822,7 +898,10 @@ static int create_instance(AVHWDeviceCon + goto fail; + } + +- if (debug_mode) { ++ /* Setup debugging callback if needed */ ++ if ((debug_mode == FF_VULKAN_DEBUG_VALIDATE) || ++ (debug_mode == FF_VULKAN_DEBUG_PRINTF) || ++ (debug_mode == FF_VULKAN_DEBUG_PRACTICES)) { + VkDebugUtilsMessengerCreateInfoEXT dbg = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, + .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | +@@ -1028,16 +1107,51 @@ end: + } + + /* Picks the least used qf with the fewest unneeded flags, or -1 if none found */ +-static inline int pick_queue_family(VkQueueFamilyProperties *qf, uint32_t num_qf, ++static inline int pick_queue_family(VkQueueFamilyProperties2 *qf, uint32_t num_qf, + VkQueueFlagBits flags) + { + int index = -1; + uint32_t min_score = UINT32_MAX; + + for (int i = 0; i < num_qf; i++) { +- const VkQueueFlagBits qflags = qf[i].queueFlags; ++ VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags; ++ ++ /* Per the spec, reporting transfer caps is optional for these 2 types */ ++ if ((flags & VK_QUEUE_TRANSFER_BIT) && ++ (qflags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT))) ++ qflags |= VK_QUEUE_TRANSFER_BIT; ++ + if (qflags & flags) { +- uint32_t score = av_popcount(qflags) + qf[i].timestampValidBits; ++ uint32_t score = av_popcount(qflags) + qf[i].queueFamilyProperties.timestampValidBits; ++ if (score < min_score) { ++ index = i; ++ min_score = score; ++ } ++ } ++ } ++ ++ if (index > -1) ++ qf[index].queueFamilyProperties.timestampValidBits++; ++ ++ return index; ++} ++ ++static inline int pick_video_queue_family(VkQueueFamilyProperties2 *qf, ++ VkQueueFamilyVideoPropertiesKHR *qf_vid, uint32_t num_qf, ++ VkVideoCodecOperationFlagBitsKHR flags) ++{ ++ int index = -1; ++ uint32_t min_score = UINT32_MAX; ++ ++ for (int i = 0; i < num_qf; i++) { ++ const VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags; ++ const VkQueueFlagBits vflags = qf_vid[i].videoCodecOperations; ++ ++ if (!(qflags & (VK_QUEUE_VIDEO_ENCODE_BIT_KHR | VK_QUEUE_VIDEO_DECODE_BIT_KHR))) ++ continue; ++ ++ if (vflags & flags) { ++ uint32_t score = av_popcount(vflags) + qf[i].queueFamilyProperties.timestampValidBits; + if (score < min_score) { + index = i; + min_score = score; +@@ -1046,7 +1160,7 @@ static inline int pick_queue_family(VkQu + } + + if (index > -1) +- qf[index].timestampValidBits++; ++ qf[index].queueFamilyProperties.timestampValidBits++; + + return index; + } +@@ -1054,12 +1168,12 @@ static inline int pick_queue_family(VkQu + static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd) + { + uint32_t num; +- float *weights; +- VkQueueFamilyProperties *qf = NULL; + VulkanDevicePriv *p = ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; +- int graph_index, comp_index, tx_index, enc_index, dec_index; ++ ++ VkQueueFamilyProperties2 *qf = NULL; ++ VkQueueFamilyVideoPropertiesKHR *qf_vid = NULL; + + /* First get the number of queue families */ + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL); +@@ -1069,118 +1183,161 @@ static int setup_queue_families(AVHWDevi + } + + /* Then allocate memory */ +- qf = av_malloc_array(num, sizeof(VkQueueFamilyProperties)); ++ qf = av_malloc_array(num, sizeof(VkQueueFamilyProperties2)); + if (!qf) + return AVERROR(ENOMEM); + ++ qf_vid = av_malloc_array(num, sizeof(VkQueueFamilyVideoPropertiesKHR)); ++ if (!qf_vid) ++ return AVERROR(ENOMEM); ++ ++ for (uint32_t i = 0; i < num; i++) { ++ qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) { ++ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR, ++ }; ++ qf[i] = (VkQueueFamilyProperties2) { ++ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2, ++ .pNext = &qf_vid[i], ++ }; ++ } ++ + /* Finally retrieve the queue families */ +- vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qf); ++ vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &num, qf); + + av_log(ctx, AV_LOG_VERBOSE, "Queue families:\n"); + for (int i = 0; i < num; i++) { +- av_log(ctx, AV_LOG_VERBOSE, " %i:%s%s%s%s%s%s%s (queues: %i)\n", i, +- ((qf[i].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "", +- ((qf[i].queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "", +- ((qf[i].queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "", +- ((qf[i].queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "", +- ((qf[i].queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "", +- ((qf[i].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "", +- ((qf[i].queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "", +- qf[i].queueCount); ++ av_log(ctx, AV_LOG_VERBOSE, " %i:%s%s%s%s%s%s%s%s (queues: %i)\n", i, ++ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "", ++ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "", ++ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "", ++ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "", ++ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "", ++ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "", ++ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_OPTICAL_FLOW_BIT_NV) ? " optical_flow" : "", ++ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "", ++ qf[i].queueFamilyProperties.queueCount); + + /* We use this field to keep a score of how many times we've used that + * queue family in order to make better choices. */ +- qf[i].timestampValidBits = 0; ++ qf[i].queueFamilyProperties.timestampValidBits = 0; + } + ++ hwctx->nb_qf = 0; ++ + /* Pick each queue family to use */ +- graph_index = pick_queue_family(qf, num, VK_QUEUE_GRAPHICS_BIT); +- comp_index = pick_queue_family(qf, num, VK_QUEUE_COMPUTE_BIT); +- tx_index = pick_queue_family(qf, num, VK_QUEUE_TRANSFER_BIT); +- enc_index = pick_queue_family(qf, num, VK_QUEUE_VIDEO_ENCODE_BIT_KHR); +- dec_index = pick_queue_family(qf, num, VK_QUEUE_VIDEO_DECODE_BIT_KHR); +- +- /* Signalling the transfer capabilities on a queue family is optional */ +- if (tx_index < 0) { +- tx_index = pick_queue_family(qf, num, VK_QUEUE_COMPUTE_BIT); +- if (tx_index < 0) +- tx_index = pick_queue_family(qf, num, VK_QUEUE_GRAPHICS_BIT); ++#define PICK_QF(type, vid_op) \ ++ do { \ ++ uint32_t i; \ ++ uint32_t idx; \ ++ \ ++ if (vid_op) \ ++ idx = pick_video_queue_family(qf, qf_vid, num, vid_op); \ ++ else \ ++ idx = pick_queue_family(qf, num, type); \ ++ \ ++ if (idx == -1) \ ++ continue; \ ++ \ ++ for (i = 0; i < hwctx->nb_qf; i++) { \ ++ if (hwctx->qf[i].idx == idx) { \ ++ hwctx->qf[i].flags |= type; \ ++ hwctx->qf[i].video_caps |= vid_op; \ ++ break; \ ++ } \ ++ } \ ++ if (i == hwctx->nb_qf) { \ ++ hwctx->qf[i].idx = idx; \ ++ hwctx->qf[i].num = qf[idx].queueFamilyProperties.queueCount; \ ++ hwctx->qf[i].flags = type; \ ++ hwctx->qf[i].video_caps = vid_op; \ ++ hwctx->nb_qf++; \ ++ } \ ++ } while (0) ++ ++ PICK_QF(VK_QUEUE_GRAPHICS_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR); ++ PICK_QF(VK_QUEUE_COMPUTE_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR); ++ PICK_QF(VK_QUEUE_TRANSFER_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR); ++ PICK_QF(VK_QUEUE_OPTICAL_FLOW_BIT_NV, VK_VIDEO_CODEC_OPERATION_NONE_KHR); ++ ++ PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR); ++ PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR); ++ ++ PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR); ++ PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR); ++ ++ PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR); ++ ++ av_free(qf); ++ av_free(qf_vid); ++ ++#undef PICK_QF ++ ++ cd->pQueueCreateInfos = av_malloc_array(hwctx->nb_qf, ++ sizeof(VkDeviceQueueCreateInfo)); ++ if (!cd->pQueueCreateInfos) ++ return AVERROR(ENOMEM); ++ ++ for (uint32_t i = 0; i < hwctx->nb_qf; i++) { ++ int dup = 0; ++ float *weights = NULL; ++ VkDeviceQueueCreateInfo *pc; ++ for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++) { ++ if (hwctx->qf[i].idx == cd->pQueueCreateInfos[j].queueFamilyIndex) { ++ dup = 1; ++ break; ++ } ++ } ++ if (dup) ++ continue; ++ ++ weights = av_malloc_array(hwctx->qf[i].num, sizeof(float)); ++ if (!weights) { ++ for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++) ++ av_free((void *)cd->pQueueCreateInfos[i].pQueuePriorities); ++ av_free((void *)cd->pQueueCreateInfos); ++ return AVERROR(ENOMEM); ++ } ++ ++ for (uint32_t j = 0; j < hwctx->qf[i].num; j++) ++ weights[j] = 1.0; ++ ++ pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos; ++ pc[cd->queueCreateInfoCount++] = (VkDeviceQueueCreateInfo) { ++ .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, ++ .queueFamilyIndex = hwctx->qf[i].idx, ++ .queueCount = hwctx->qf[i].num, ++ .pQueuePriorities = weights, ++ }; + } + ++#if FF_API_VULKAN_FIXED_QUEUES ++FF_DISABLE_DEPRECATION_WARNINGS ++ /* Setup deprecated fields */ + hwctx->queue_family_index = -1; + hwctx->queue_family_comp_index = -1; + hwctx->queue_family_tx_index = -1; + hwctx->queue_family_encode_index = -1; + hwctx->queue_family_decode_index = -1; + +-#define SETUP_QUEUE(qf_idx) \ +- if (qf_idx > -1) { \ +- int fidx = qf_idx; \ +- int qc = qf[fidx].queueCount; \ +- VkDeviceQueueCreateInfo *pc; \ +- \ +- if (fidx == graph_index) { \ +- hwctx->queue_family_index = fidx; \ +- hwctx->nb_graphics_queues = qc; \ +- graph_index = -1; \ +- } \ +- if (fidx == comp_index) { \ +- hwctx->queue_family_comp_index = fidx; \ +- hwctx->nb_comp_queues = qc; \ +- comp_index = -1; \ +- } \ +- if (fidx == tx_index) { \ +- hwctx->queue_family_tx_index = fidx; \ +- hwctx->nb_tx_queues = qc; \ +- tx_index = -1; \ +- } \ +- if (fidx == enc_index) { \ +- hwctx->queue_family_encode_index = fidx; \ +- hwctx->nb_encode_queues = qc; \ +- enc_index = -1; \ +- } \ +- if (fidx == dec_index) { \ +- hwctx->queue_family_decode_index = fidx; \ +- hwctx->nb_decode_queues = qc; \ +- dec_index = -1; \ +- } \ +- \ +- pc = av_realloc((void *)cd->pQueueCreateInfos, \ +- sizeof(*pc) * (cd->queueCreateInfoCount + 1)); \ +- if (!pc) { \ +- av_free(qf); \ +- return AVERROR(ENOMEM); \ +- } \ +- cd->pQueueCreateInfos = pc; \ +- pc = &pc[cd->queueCreateInfoCount]; \ +- \ +- weights = av_malloc(qc * sizeof(float)); \ +- if (!weights) { \ +- av_free(qf); \ +- return AVERROR(ENOMEM); \ +- } \ +- \ +- memset(pc, 0, sizeof(*pc)); \ +- pc->sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; \ +- pc->queueFamilyIndex = fidx; \ +- pc->queueCount = qc; \ +- pc->pQueuePriorities = weights; \ +- \ +- for (int i = 0; i < qc; i++) \ +- weights[i] = 1.0f / qc; \ +- \ +- cd->queueCreateInfoCount++; \ +- } +- +- SETUP_QUEUE(graph_index) +- SETUP_QUEUE(comp_index) +- SETUP_QUEUE(tx_index) +- SETUP_QUEUE(enc_index) +- SETUP_QUEUE(dec_index) ++#define SET_OLD_QF(field, nb_field, type) \ ++ do { \ ++ if (field < 0 && hwctx->qf[i].flags & type) { \ ++ field = hwctx->qf[i].idx; \ ++ nb_field = hwctx->qf[i].num; \ ++ } \ ++ } while (0) + +-#undef SETUP_QUEUE ++ for (uint32_t i = 0; i < hwctx->nb_qf; i++) { ++ SET_OLD_QF(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT); ++ SET_OLD_QF(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT); ++ SET_OLD_QF(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT); ++ SET_OLD_QF(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR); ++ SET_OLD_QF(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR); ++ } + +- av_free(qf); ++#undef SET_OLD_QF ++FF_ENABLE_DEPRECATION_WARNINGS ++#endif + + return 0; + } +@@ -1246,9 +1403,21 @@ static int vulkan_device_create_internal + VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, + }; ++ VkPhysicalDeviceVideoMaintenance1FeaturesKHR video_maint_1_features = { ++ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_1_FEATURES_KHR, ++ .pNext = &timeline_features, ++ }; ++ VkPhysicalDeviceShaderObjectFeaturesEXT shader_object_features = { ++ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT, ++ .pNext = &video_maint_1_features, ++ }; ++ VkPhysicalDeviceOpticalFlowFeaturesNV optical_flow_features = { ++ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_FEATURES_NV, ++ .pNext = &shader_object_features, ++ }; + VkPhysicalDeviceCooperativeMatrixFeaturesKHR coop_matrix_features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR, +- .pNext = &timeline_features, ++ .pNext = &optical_flow_features, + }; + VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT, +@@ -1279,21 +1448,6 @@ static int vulkan_device_create_internal + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + }; + +- hwctx->device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; +- hwctx->device_features.pNext = &p->device_features_1_1; +- p->device_features_1_1.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; +- p->device_features_1_1.pNext = &p->device_features_1_2; +- p->device_features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; +- p->device_features_1_2.pNext = &p->device_features_1_3; +- p->device_features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES; +- p->device_features_1_3.pNext = &p->desc_buf_features; +- p->desc_buf_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT; +- p->desc_buf_features.pNext = &p->atomic_float_features; +- p->atomic_float_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT; +- p->atomic_float_features.pNext = &p->coop_matrix_features; +- p->coop_matrix_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR; +- p->coop_matrix_features.pNext = NULL; +- + ctx->free = vulkan_device_free; + + /* Create an instance if not given one */ +@@ -1327,6 +1481,8 @@ static int vulkan_device_create_internal + + p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion; + p->device_features_1_1.storagePushConstant16 = dev_features_1_1.storagePushConstant16; ++ p->device_features_1_1.storageBuffer16BitAccess = dev_features_1_1.storageBuffer16BitAccess; ++ p->device_features_1_1.uniformAndStorageBuffer16BitAccess = dev_features_1_1.uniformAndStorageBuffer16BitAccess; + + p->device_features_1_2.timelineSemaphore = 1; + p->device_features_1_2.bufferDeviceAddress = dev_features_1_2.bufferDeviceAddress; +@@ -1348,6 +1504,8 @@ static int vulkan_device_create_internal + p->device_features_1_3.shaderZeroInitializeWorkgroupMemory = dev_features_1_3.shaderZeroInitializeWorkgroupMemory; + p->device_features_1_3.dynamicRendering = dev_features_1_3.dynamicRendering; + ++ p->video_maint_1_features.videoMaintenance1 = video_maint_1_features.videoMaintenance1; ++ + p->desc_buf_features.descriptorBuffer = desc_buf_features.descriptorBuffer; + p->desc_buf_features.descriptorBufferPushDescriptors = desc_buf_features.descriptorBufferPushDescriptors; + +@@ -1356,12 +1514,11 @@ static int vulkan_device_create_internal + + p->coop_matrix_features.cooperativeMatrix = coop_matrix_features.cooperativeMatrix; + +- dev_info.pNext = &hwctx->device_features; ++ p->optical_flow_features.opticalFlow = optical_flow_features.opticalFlow; + +- /* Setup queue family */ +- if ((err = setup_queue_families(ctx, &dev_info))) +- goto end; ++ p->shader_object_features.shaderObject = shader_object_features.shaderObject; + ++ /* Find and enable extensions */ + if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames, + &dev_info.enabledExtensionCount, 0))) { + for (int i = 0; i < dev_info.queueCreateInfoCount; i++) +@@ -1370,6 +1527,45 @@ static int vulkan_device_create_internal + goto end; + } + ++ /* Setup enabled device features */ ++ hwctx->device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; ++ hwctx->device_features.pNext = &p->device_features_1_1; ++ p->device_features_1_1.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; ++ p->device_features_1_1.pNext = &p->device_features_1_2; ++ p->device_features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; ++ p->device_features_1_2.pNext = &p->device_features_1_3; ++ p->device_features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES; ++ p->device_features_1_3.pNext = NULL; ++ ++#define OPT_CHAIN(EXT_FLAG, STRUCT_P, TYPE) \ ++ do { \ ++ if (p->vkctx.extensions & EXT_FLAG) { \ ++ (STRUCT_P)->sType = TYPE; \ ++ ff_vk_link_struct(hwctx->device_features.pNext, STRUCT_P); \ ++ } \ ++ } while (0) ++ ++ OPT_CHAIN(FF_VK_EXT_DESCRIPTOR_BUFFER, &p->desc_buf_features, ++ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT); ++ OPT_CHAIN(FF_VK_EXT_ATOMIC_FLOAT, &p->atomic_float_features, ++ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT); ++ OPT_CHAIN(FF_VK_EXT_COOP_MATRIX, &p->coop_matrix_features, ++ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR); ++ OPT_CHAIN(FF_VK_EXT_SHADER_OBJECT, &p->shader_object_features, ++ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT); ++ OPT_CHAIN(FF_VK_EXT_OPTICAL_FLOW, &p->optical_flow_features, ++ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_FEATURES_NV); ++ OPT_CHAIN(FF_VK_EXT_VIDEO_MAINTENANCE_1, &p->video_maint_1_features, ++ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_1_FEATURES_KHR); ++#undef OPT_CHAIN ++ ++ /* Add the enabled features into the pnext chain of device creation */ ++ dev_info.pNext = &hwctx->device_features; ++ ++ /* Setup enabled queue families */ ++ if ((err = setup_queue_families(ctx, &dev_info))) ++ goto end; ++ + ret = vk->CreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc, + &hwctx->act_dev); + +@@ -1423,12 +1619,13 @@ static void unlock_queue(AVHWDeviceConte + + static int vulkan_device_init(AVHWDeviceContext *ctx) + { +- int err; ++ int err = 0; + uint32_t qf_num; + VulkanDevicePriv *p = ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; +- VkQueueFamilyProperties *qf; ++ VkQueueFamilyProperties2 *qf; ++ VkQueueFamilyVideoPropertiesKHR *qf_vid; + int graph_index, comp_index, tx_index, enc_index, dec_index; + + /* Set device extension flags */ +@@ -1474,38 +1671,55 @@ static int vulkan_device_init(AVHWDevice + return AVERROR_EXTERNAL; + } + +- qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties)); ++ qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties2)); + if (!qf) + return AVERROR(ENOMEM); + +- vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf); ++ qf_vid = av_malloc_array(qf_num, sizeof(VkQueueFamilyVideoPropertiesKHR)); ++ if (!qf_vid) { ++ av_free(qf); ++ return AVERROR(ENOMEM); ++ } ++ ++ for (uint32_t i = 0; i < qf_num; i++) { ++ qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) { ++ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR, ++ }; ++ qf[i] = (VkQueueFamilyProperties2) { ++ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2, ++ .pNext = &qf_vid[i], ++ }; ++ } ++ ++ vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &qf_num, qf); + + p->qf_mutex = av_calloc(qf_num, sizeof(*p->qf_mutex)); + if (!p->qf_mutex) { +- av_free(qf); +- return AVERROR(ENOMEM); ++ err = AVERROR(ENOMEM); ++ goto end; + } + p->nb_tot_qfs = qf_num; + + for (uint32_t i = 0; i < qf_num; i++) { +- p->qf_mutex[i] = av_calloc(qf[i].queueCount, sizeof(**p->qf_mutex)); ++ p->qf_mutex[i] = av_calloc(qf[i].queueFamilyProperties.queueCount, ++ sizeof(**p->qf_mutex)); + if (!p->qf_mutex[i]) { +- av_free(qf); +- return AVERROR(ENOMEM); ++ err = AVERROR(ENOMEM); ++ goto end; + } +- for (uint32_t j = 0; j < qf[i].queueCount; j++) { ++ for (uint32_t j = 0; j < qf[i].queueFamilyProperties.queueCount; j++) { + err = pthread_mutex_init(&p->qf_mutex[i][j], NULL); + if (err != 0) { + av_log(ctx, AV_LOG_ERROR, "pthread_mutex_init failed : %s\n", + av_err2str(err)); +- av_free(qf); +- return AVERROR(err); ++ err = AVERROR(err); ++ goto end; + } + } + } + +- av_free(qf); +- ++#if FF_API_VULKAN_FIXED_QUEUES ++FF_DISABLE_DEPRECATION_WARNINGS + graph_index = hwctx->nb_graphics_queues ? hwctx->queue_family_index : -1; + comp_index = hwctx->nb_comp_queues ? hwctx->queue_family_comp_index : -1; + tx_index = hwctx->nb_tx_queues ? hwctx->queue_family_tx_index : -1; +@@ -1517,13 +1731,15 @@ static int vulkan_device_init(AVHWDevice + if (ctx_qf < 0 && required) { \ + av_log(ctx, AV_LOG_ERROR, "%s queue family is required, but marked as missing" \ + " in the context!\n", type); \ +- return AVERROR(EINVAL); \ ++ err = AVERROR(EINVAL); \ ++ goto end; \ + } else if (fidx < 0 || ctx_qf < 0) { \ + break; \ + } else if (ctx_qf >= qf_num) { \ + av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \ + type, ctx_qf, qf_num); \ +- return AVERROR(EINVAL); \ ++ err = AVERROR(EINVAL); \ ++ goto end; \ + } \ + \ + av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (queues: %i)" \ +@@ -1550,6 +1766,38 @@ static int vulkan_device_init(AVHWDevice + + #undef CHECK_QUEUE + ++ /* Update the new queue family fields. If non-zero already, ++ * it means API users have set it. */ ++ if (!hwctx->nb_qf) { ++#define ADD_QUEUE(ctx_qf, qc, flag) \ ++ do { \ ++ if (ctx_qf != -1) { \ ++ hwctx->qf[hwctx->nb_qf++] = (AVVulkanDeviceQueueFamily) { \ ++ .idx = ctx_qf, \ ++ .num = qc, \ ++ .flags = flag, \ ++ }; \ ++ } \ ++ } while (0) ++ ++ ADD_QUEUE(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT); ++ ADD_QUEUE(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT); ++ ADD_QUEUE(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT); ++ ADD_QUEUE(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR); ++ ADD_QUEUE(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR); ++#undef ADD_QUEUE ++ } ++FF_ENABLE_DEPRECATION_WARNINGS ++#endif ++ ++ for (int i = 0; i < hwctx->nb_qf; i++) { ++ if (!hwctx->qf[i].video_caps && ++ hwctx->qf[i].flags & (VK_QUEUE_VIDEO_DECODE_BIT_KHR | ++ VK_QUEUE_VIDEO_ENCODE_BIT_KHR)) { ++ hwctx->qf[i].video_caps = qf_vid[hwctx->qf[i].idx].videoCodecOperations; ++ } ++ } ++ + if (!hwctx->lock_queue) + hwctx->lock_queue = lock_queue; + if (!hwctx->unlock_queue) +@@ -1565,7 +1813,10 @@ static int vulkan_device_init(AVHWDevice + ff_vk_qf_init(&p->vkctx, &p->compute_qf, VK_QUEUE_COMPUTE_BIT); + ff_vk_qf_init(&p->vkctx, &p->transfer_qf, VK_QUEUE_TRANSFER_BIT); + +- return 0; ++end: ++ av_free(qf_vid); ++ av_free(qf); ++ return err; + } + + static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device, +@@ -1941,6 +2192,7 @@ enum PrepMode { + PREP_MODE_EXTERNAL_IMPORT, + PREP_MODE_DECODING_DST, + PREP_MODE_DECODING_DPB, ++ PREP_MODE_ENCODING_DPB, + }; + + static int prepare_frame(AVHWFramesContext *hwfc, FFVkExecPool *ectx, +@@ -2002,6 +2254,10 @@ static int prepare_frame(AVHWFramesConte + new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR; + new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; + break; ++ case PREP_MODE_ENCODING_DPB: ++ new_layout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_DPB_KHR; ++ new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; ++ break; + } + + ff_vk_frame_barrier(&p->vkctx, exec, &tmp_frame, img_bar, &nb_img_bar, +@@ -2262,6 +2518,8 @@ static AVBufferRef *vulkan_pool_alloc(vo + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DPB); + else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR) + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DST); ++ else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR) ++ err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_ENCODING_DPB); + else + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_WRITE); + if (err) +@@ -2303,6 +2561,8 @@ static void vulkan_frames_uninit(AVHWFra + ff_vk_exec_pool_free(&p->vkctx, &fp->compute_exec); + ff_vk_exec_pool_free(&p->vkctx, &fp->upload_exec); + ff_vk_exec_pool_free(&p->vkctx, &fp->download_exec); ++ ++ av_buffer_pool_uninit(&fp->tmp); + } + + static int vulkan_frames_init(AVHWFramesContext *hwfc) +@@ -2372,6 +2632,12 @@ static int vulkan_frames_init(AVHWFrames + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT); ++ ++ /* Enables encoding of images, if supported by format and extensions */ ++ if ((supported_usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR) && ++ (p->vkctx.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE | ++ FF_VK_EXT_VIDEO_MAINTENANCE_1))) ++ hwctx->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR; + } + + /* Image creation flags. +@@ -2390,6 +2656,28 @@ static int vulkan_frames_init(AVHWFrames + } + } + ++ /* If the image has an ENCODE_SRC usage, and the maintenance1 ++ * extension is supported, check if it has a profile list. ++ * If there's no profile list, or it has no encode operations, ++ * then allow creating the image with no specific profile. */ ++ if ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR) && ++ p->video_maint_1_features.videoMaintenance1) { ++ const VkVideoProfileListInfoKHR *pl; ++ pl = ff_vk_find_struct(hwctx->create_pnext, VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR); ++ if (!pl) { ++ hwctx->img_flags |= VK_IMAGE_CREATE_VIDEO_PROFILE_INDEPENDENT_BIT_KHR; ++ } else { ++ uint32_t i; ++ for (i = 0; i < pl->profileCount; i++) { ++ /* Video ops start at exactly 0x00010000 */ ++ if (pl->pProfiles[i].videoCodecOperation & 0xFFFF0000) ++ break; ++ } ++ if (i == pl->profileCount) ++ hwctx->img_flags |= VK_IMAGE_CREATE_VIDEO_PROFILE_INDEPENDENT_BIT_KHR; ++ } ++ } ++ + if (!hwctx->lock_frame) + hwctx->lock_frame = lock_frame; + +@@ -3323,128 +3611,290 @@ static int vulkan_map_from(AVHWFramesCon + return AVERROR(ENOSYS); + } + +-static size_t get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height) ++static int copy_buffer_data(AVHWFramesContext *hwfc, AVBufferRef *buf, ++ AVFrame *swf, VkBufferImageCopy *region, ++ int planes, int upload) + { +- size_t size; +- *stride = FFALIGN(*stride, p->props.properties.limits.optimalBufferCopyRowPitchAlignment); +- size = height*(*stride); +- size = FFALIGN(size, p->props.properties.limits.minMemoryMapAlignment); +- return size; ++ VkResult ret; ++ VulkanDevicePriv *p = hwfc->device_ctx->hwctx; ++ FFVulkanFunctions *vk = &p->vkctx.vkfn; ++ AVVulkanDeviceContext *hwctx = &p->p; ++ ++ FFVkBuffer *vkbuf = (FFVkBuffer *)buf->data; ++ ++ const VkMappedMemoryRange flush_info = { ++ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, ++ .memory = vkbuf->mem, ++ .size = VK_WHOLE_SIZE, ++ }; ++ ++ if (!(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) && !upload) { ++ ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, 1, ++ &flush_info); ++ if (ret != VK_SUCCESS) { ++ av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate buffer data: %s\n", ++ ff_vk_ret2str(ret)); ++ return AVERROR_EXTERNAL; ++ } ++ } ++ ++ for (int i = 0; i < planes; i++) ++ av_image_copy_plane(vkbuf->mapped_mem + region[i].bufferOffset, ++ region[i].bufferRowLength, ++ swf->data[i], ++ swf->linesize[i], ++ swf->linesize[i], ++ region[i].imageExtent.height); ++ ++ if (!(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) && upload) { ++ ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, 1, ++ &flush_info); ++ if (ret != VK_SUCCESS) { ++ av_log(hwfc, AV_LOG_ERROR, "Failed to flush buffer data: %s\n", ++ ff_vk_ret2str(ret)); ++ return AVERROR_EXTERNAL; ++ } ++ } ++ ++ return 0; + } + +-static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, +- AVBufferRef **bufs, size_t *buf_offsets, +- const int *buf_stride, int w, +- int h, enum AVPixelFormat pix_fmt, int to_buf) ++static int get_plane_buf(AVHWFramesContext *hwfc, AVBufferRef **dst, ++ AVFrame *swf, VkBufferImageCopy *region, int upload) + { + int err; +- AVVkFrame *frame = (AVVkFrame *)f->data[0]; + VulkanFramesPriv *fp = hwfc->hwctx; + VulkanDevicePriv *p = hwfc->device_ctx->hwctx; +- FFVulkanFunctions *vk = &p->vkctx.vkfn; +- VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; +- int nb_img_bar = 0; ++ const int planes = av_pix_fmt_count_planes(swf->format); + +- const int nb_images = ff_vk_count_images(frame); +- int pixfmt_planes = av_pix_fmt_count_planes(pix_fmt); +- const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); ++ size_t buf_offset = 0; ++ for (int i = 0; i < planes; i++) { ++ size_t size; ++ ptrdiff_t linesize = swf->linesize[i]; + +- VkCommandBuffer cmd_buf; +- FFVkExecContext *exec = ff_vk_exec_get(to_buf ? &fp->download_exec : +- &fp->upload_exec); +- cmd_buf = exec->buf; +- ff_vk_exec_start(&p->vkctx, exec); ++ uint32_t p_w, p_h; ++ get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); ++ ++ linesize = FFALIGN(linesize, ++ p->props.properties.limits.optimalBufferCopyRowPitchAlignment); ++ size = p_h*linesize; ++ ++ region[i] = (VkBufferImageCopy) { ++ .bufferOffset = buf_offset, ++ .bufferRowLength = linesize, ++ .bufferImageHeight = p_h, ++ .imageSubresource.layerCount = 1, ++ .imageExtent = (VkExtent3D){ p_w, p_h, 1 }, ++ /* Rest of the fields adjusted/filled in later */ ++ }; ++ ++ buf_offset = FFALIGN(buf_offset + size, ++ p->props.properties.limits.optimalBufferCopyOffsetAlignment); ++ } + +- err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, pixfmt_planes, 1); ++ err = ff_vk_get_pooled_buffer(&p->vkctx, &fp->tmp, dst, ++ VK_BUFFER_USAGE_TRANSFER_SRC_BIT | ++ VK_BUFFER_USAGE_TRANSFER_DST_BIT, ++ NULL, buf_offset, ++ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | ++ VK_MEMORY_PROPERTY_HOST_CACHED_BIT); + if (err < 0) + return err; + +- err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, f, +- VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, +- VK_PIPELINE_STAGE_2_TRANSFER_BIT); ++ return 0; ++} ++ ++static int create_mapped_buffer(AVHWFramesContext *hwfc, ++ FFVkBuffer *vkb, VkBufferUsageFlags usage, ++ size_t size, ++ VkExternalMemoryBufferCreateInfo *create_desc, ++ VkImportMemoryHostPointerInfoEXT *import_desc, ++ VkMemoryHostPointerPropertiesEXT props) ++{ ++ int err; ++ VkResult ret; ++ VulkanDevicePriv *p = hwfc->device_ctx->hwctx; ++ FFVulkanFunctions *vk = &p->vkctx.vkfn; ++ AVVulkanDeviceContext *hwctx = &p->p; ++ ++ VkBufferCreateInfo buf_spawn = { ++ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, ++ .pNext = create_desc, ++ .usage = usage, ++ .sharingMode = VK_SHARING_MODE_EXCLUSIVE, ++ .size = size, ++ }; ++ VkMemoryRequirements req = { ++ .size = size, ++ .alignment = p->hprops.minImportedHostPointerAlignment, ++ .memoryTypeBits = props.memoryTypeBits, ++ }; ++ ++ err = ff_vk_alloc_mem(&p->vkctx, &req, ++ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, ++ import_desc, &vkb->flags, &vkb->mem); + if (err < 0) + return err; + +- ff_vk_frame_barrier(&p->vkctx, exec, f, img_bar, &nb_img_bar, +- VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, +- VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, +- to_buf ? VK_ACCESS_TRANSFER_READ_BIT : +- VK_ACCESS_TRANSFER_WRITE_BIT, +- to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL : +- VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, +- VK_QUEUE_FAMILY_IGNORED); ++ ret = vk->CreateBuffer(hwctx->act_dev, &buf_spawn, hwctx->alloc, &vkb->buf); ++ if (ret != VK_SUCCESS) { ++ vk->FreeMemory(hwctx->act_dev, vkb->mem, hwctx->alloc); ++ return AVERROR_EXTERNAL; ++ } + +- vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { +- .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, +- .pImageMemoryBarriers = img_bar, +- .imageMemoryBarrierCount = nb_img_bar, +- }); ++ ret = vk->BindBufferMemory(hwctx->act_dev, vkb->buf, vkb->mem, 0); ++ if (ret != VK_SUCCESS) { ++ vk->FreeMemory(hwctx->act_dev, vkb->mem, hwctx->alloc); ++ vk->DestroyBuffer(hwctx->act_dev, vkb->buf, hwctx->alloc); ++ return AVERROR_EXTERNAL; ++ } + +- /* Schedule a copy for each plane */ +- for (int i = 0; i < pixfmt_planes; i++) { +- int idx = FFMIN(i, nb_images - 1); +- VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT, +- VK_IMAGE_ASPECT_PLANE_0_BIT, +- VK_IMAGE_ASPECT_PLANE_1_BIT, +- VK_IMAGE_ASPECT_PLANE_2_BIT, }; +- +- FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[i]->data; +- VkBufferImageCopy buf_reg = { +- .bufferOffset = buf_offsets[i], +- .bufferRowLength = buf_stride[i] / desc->comp[i].step, +- .imageSubresource.layerCount = 1, +- .imageSubresource.aspectMask = plane_aspect[(pixfmt_planes != nb_images) + +- i*(pixfmt_planes != nb_images)], +- .imageOffset = { 0, 0, 0, }, +- }; ++ return 0; ++} ++ ++static void destroy_avvkbuf(void *opaque, uint8_t *data) ++{ ++ FFVulkanContext *s = opaque; ++ FFVkBuffer *buf = (FFVkBuffer *)data; ++ ff_vk_free_buf(s, buf); ++ av_free(buf); ++} ++ ++static int host_map_frame(AVHWFramesContext *hwfc, AVBufferRef **dst, int *nb_bufs, ++ AVFrame *swf, VkBufferImageCopy *region, int upload) ++{ ++ int err; ++ VkResult ret; ++ VulkanDevicePriv *p = hwfc->device_ctx->hwctx; ++ FFVulkanFunctions *vk = &p->vkctx.vkfn; ++ AVVulkanDeviceContext *hwctx = &p->p; ++ ++ const int planes = av_pix_fmt_count_planes(swf->format); + ++ VkExternalMemoryBufferCreateInfo create_desc = { ++ .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, ++ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, ++ }; ++ VkImportMemoryHostPointerInfoEXT import_desc = { ++ .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, ++ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, ++ }; ++ VkMemoryHostPointerPropertiesEXT props; ++ ++ for (int i = 0; i < planes; i++) { ++ FFVkBuffer *vkb; + uint32_t p_w, p_h; +- get_plane_wh(&p_w, &p_h, pix_fmt, w, h, i); ++ size_t offs; ++ size_t buffer_size; + +- buf_reg.bufferImageHeight = p_h; +- buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, }; ++ /* We can't host map images with negative strides */ ++ if (swf->linesize[i] < 0) { ++ err = AVERROR(EINVAL); ++ goto fail; ++ } + +- if (to_buf) +- vk->CmdCopyImageToBuffer(cmd_buf, frame->img[idx], +- img_bar[0].newLayout, +- vkbuf->buf, +- 1, &buf_reg); +- else +- vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[idx], +- img_bar[0].newLayout, +- 1, &buf_reg); +- } ++ get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); + +- err = ff_vk_exec_submit(&p->vkctx, exec); +- if (err < 0) +- return err; ++ /* Get the previous point at which mapping was possible and use it */ ++ offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment; ++ import_desc.pHostPointer = swf->data[i] - offs; ++ ++ props = (VkMemoryHostPointerPropertiesEXT) { ++ VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, ++ }; ++ ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->act_dev, ++ import_desc.handleType, ++ import_desc.pHostPointer, ++ &props); ++ if (!(ret == VK_SUCCESS && props.memoryTypeBits)) { ++ err = AVERROR(EINVAL); ++ goto fail; ++ } ++ ++ /* Buffer region for this plane */ ++ region[i] = (VkBufferImageCopy) { ++ .bufferOffset = offs, ++ .bufferRowLength = swf->linesize[i], ++ .bufferImageHeight = p_h, ++ .imageSubresource.layerCount = 1, ++ .imageExtent = (VkExtent3D){ p_w, p_h, 1 }, ++ /* Rest of the fields adjusted/filled in later */ ++ }; ++ ++ /* Add the offset at the start, which gets ignored */ ++ buffer_size = offs + swf->linesize[i]*p_h; ++ buffer_size = FFALIGN(buffer_size, p->props.properties.limits.minMemoryMapAlignment); ++ buffer_size = FFALIGN(buffer_size, p->hprops.minImportedHostPointerAlignment); ++ ++ /* Create a buffer */ ++ vkb = av_mallocz(sizeof(*vkb)); ++ if (!vkb) { ++ err = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ err = create_mapped_buffer(hwfc, vkb, ++ upload ? VK_BUFFER_USAGE_TRANSFER_SRC_BIT : ++ VK_BUFFER_USAGE_TRANSFER_DST_BIT, ++ buffer_size, &create_desc, &import_desc, ++ props); ++ if (err < 0) { ++ av_free(vkb); ++ goto fail; ++ } + +- ff_vk_exec_wait(&p->vkctx, exec); ++ /* Create a ref */ ++ dst[*nb_bufs] = av_buffer_create((uint8_t *)vkb, sizeof(*vkb), ++ destroy_avvkbuf, &p->vkctx, 0); ++ if (!dst[*nb_bufs]) { ++ destroy_avvkbuf(&p->vkctx, (uint8_t *)vkb); ++ err = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ (*nb_bufs)++; ++ } + + return 0; ++ ++fail: ++ for (int i = 0; i < (*nb_bufs); i++) ++ av_buffer_unref(&dst[i]); ++ return err; + } + +-static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, +- const AVFrame *swf, int from) ++static int vulkan_transfer_frame(AVHWFramesContext *hwfc, ++ AVFrame *swf, AVFrame *hwf, ++ int upload) + { +- int err = 0; +- VkResult ret; +- AVHWDeviceContext *dev_ctx = hwfc->device_ctx; +- VulkanDevicePriv *p = dev_ctx->hwctx; +- AVVulkanDeviceContext *hwctx = &p->p; ++ int err; ++ VulkanFramesPriv *fp = hwfc->hwctx; ++ VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + +- AVFrame tmp; +- FFVkBuffer *vkbufs[AV_NUM_DATA_POINTERS]; +- AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 }; +- size_t buf_offsets[AV_NUM_DATA_POINTERS] = { 0 }; ++ int host_mapped = 0; ++ ++ AVVkFrame *hwf_vk = (AVVkFrame *)hwf->data[0]; ++ VkBufferImageCopy region[AV_NUM_DATA_POINTERS]; // always one per plane + +- uint32_t p_w, p_h; + const int planes = av_pix_fmt_count_planes(swf->format); ++ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(swf->format); ++ const int nb_images = ff_vk_count_images(hwf_vk); ++ static const VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT, ++ VK_IMAGE_ASPECT_PLANE_0_BIT, ++ VK_IMAGE_ASPECT_PLANE_1_BIT, ++ VK_IMAGE_ASPECT_PLANE_2_BIT, }; ++ ++ VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; ++ int nb_img_bar = 0; + +- int host_mapped[AV_NUM_DATA_POINTERS] = { 0 }; +- const int map_host = !!(p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY); ++ AVBufferRef *bufs[AV_NUM_DATA_POINTERS]; ++ int nb_bufs = 0; + ++ VkCommandBuffer cmd_buf; ++ FFVkExecContext *exec; ++ ++ /* Sanity checking */ + if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) { + av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n"); + return AVERROR(EINVAL); +@@ -3453,115 +3903,97 @@ static int vulkan_transfer_data(AVHWFram + if (swf->width > hwfc->width || swf->height > hwfc->height) + return AVERROR(EINVAL); + +- /* Create buffers */ +- for (int i = 0; i < planes; i++) { +- size_t req_size; +- +- VkExternalMemoryBufferCreateInfo create_desc = { +- .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, +- .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, +- }; +- +- VkImportMemoryHostPointerInfoEXT import_desc = { +- .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, +- .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, +- }; +- +- VkMemoryHostPointerPropertiesEXT p_props = { +- .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, +- }; +- +- get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); +- +- tmp.linesize[i] = FFABS(swf->linesize[i]); +- +- /* Do not map images with a negative stride */ +- if (map_host && swf->linesize[i] > 0) { +- size_t offs; +- offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment; +- import_desc.pHostPointer = swf->data[i] - offs; +- +- /* We have to compensate for the few extra bytes of padding we +- * completely ignore at the start */ +- req_size = FFALIGN(offs + tmp.linesize[i] * p_h, +- p->hprops.minImportedHostPointerAlignment); +- +- ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->act_dev, +- import_desc.handleType, +- import_desc.pHostPointer, +- &p_props); +- if (ret == VK_SUCCESS && p_props.memoryTypeBits) { +- host_mapped[i] = 1; +- buf_offsets[i] = offs; +- } +- } ++ /* Setup buffers first */ ++ if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) { ++ err = host_map_frame(hwfc, bufs, &nb_bufs, swf, region, upload); ++ if (err >= 0) ++ host_mapped = 1; ++ } + +- if (!host_mapped[i]) +- req_size = get_req_buffer_size(p, &tmp.linesize[i], p_h); +- +- err = ff_vk_create_avbuf(&p->vkctx, &bufs[i], req_size, +- host_mapped[i] ? &create_desc : NULL, +- host_mapped[i] ? &import_desc : NULL, +- from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT : +- VK_BUFFER_USAGE_TRANSFER_SRC_BIT, +- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | +- (host_mapped[i] ? +- VK_MEMORY_PROPERTY_HOST_COHERENT_BIT : 0x0)); ++ if (!host_mapped) { ++ err = get_plane_buf(hwfc, &bufs[0], swf, region, upload); + if (err < 0) + goto end; ++ nb_bufs = 1; + +- vkbufs[i] = (FFVkBuffer *)bufs[i]->data; ++ if (upload) { ++ err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 1); ++ if (err < 0) ++ goto end; ++ } + } + +- if (!from) { +- /* Map, copy image TO buffer (which then goes to the VkImage), unmap */ +- if ((err = ff_vk_map_buffers(&p->vkctx, vkbufs, tmp.data, planes, 0))) +- goto end; +- +- for (int i = 0; i < planes; i++) { +- if (host_mapped[i]) +- continue; ++ exec = ff_vk_exec_get(&fp->upload_exec); ++ cmd_buf = exec->buf; + +- get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); ++ ff_vk_exec_start(&p->vkctx, exec); + +- av_image_copy_plane(tmp.data[i], tmp.linesize[i], +- (const uint8_t *)swf->data[i], swf->linesize[i], +- FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])), +- p_h); +- } ++ /* Prep destination Vulkan frame */ ++ err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, hwf, ++ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, ++ VK_PIPELINE_STAGE_2_TRANSFER_BIT); ++ if (err < 0) ++ goto end; + +- if ((err = ff_vk_unmap_buffers(&p->vkctx, vkbufs, planes, 1))) ++ /* No need to declare buf deps for synchronous transfers */ ++ if (upload) { ++ err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, nb_bufs, 1); ++ if (err < 0) { ++ ff_vk_exec_discard_deps(&p->vkctx, exec); + goto end; ++ } + } + +- /* Copy buffers into/from image */ +- err = transfer_image_buf(hwfc, (AVFrame *)vkf, bufs, buf_offsets, +- tmp.linesize, swf->width, swf->height, swf->format, +- from); +- +- if (from) { +- /* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */ +- if ((err = ff_vk_map_buffers(&p->vkctx, vkbufs, tmp.data, planes, 0))) +- goto end; ++ ff_vk_frame_barrier(&p->vkctx, exec, hwf, img_bar, &nb_img_bar, ++ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, ++ VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, ++ upload ? VK_ACCESS_TRANSFER_WRITE_BIT : ++ VK_ACCESS_TRANSFER_READ_BIT, ++ upload ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : ++ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, ++ VK_QUEUE_FAMILY_IGNORED); + +- for (int i = 0; i < planes; i++) { +- if (host_mapped[i]) +- continue; ++ vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { ++ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, ++ .pImageMemoryBarriers = img_bar, ++ .imageMemoryBarrierCount = nb_img_bar, ++ }); + +- get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); ++ for (int i = 0; i < planes; i++) { ++ int buf_idx = FFMIN(i, (nb_bufs - 1)); ++ int img_idx = FFMIN(i, (nb_images - 1)); ++ FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[buf_idx]->data; ++ ++ uint32_t orig_stride = region[i].bufferRowLength; ++ region[i].bufferRowLength /= desc->comp[i].step; ++ region[i].imageSubresource.aspectMask = plane_aspect[(planes != nb_images) + ++ i*(planes != nb_images)]; ++ ++ if (upload) ++ vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, ++ hwf_vk->img[img_idx], ++ img_bar[img_idx].newLayout, ++ 1, ®ion[i]); ++ else ++ vk->CmdCopyImageToBuffer(cmd_buf, hwf_vk->img[img_idx], ++ img_bar[img_idx].newLayout, ++ vkbuf->buf, ++ 1, ®ion[i]); + +- av_image_copy_plane_uc_from(swf->data[i], swf->linesize[i], +- (const uint8_t *)tmp.data[i], tmp.linesize[i], +- FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])), +- p_h); +- } ++ region[i].bufferRowLength = orig_stride; ++ } + +- if ((err = ff_vk_unmap_buffers(&p->vkctx, vkbufs, planes, 1))) +- goto end; ++ err = ff_vk_exec_submit(&p->vkctx, exec); ++ if (err < 0) { ++ ff_vk_exec_discard_deps(&p->vkctx, exec); ++ } else if (!upload) { ++ ff_vk_exec_wait(&p->vkctx, exec); ++ if (!host_mapped) ++ err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 0); + } + + end: +- for (int i = 0; i < planes; i++) ++ for (int i = 0; i < nb_bufs; i++) + av_buffer_unref(&bufs[i]); + + return err; +@@ -3588,7 +4020,7 @@ static int vulkan_transfer_data_to(AVHWF + if (src->hw_frames_ctx) + return AVERROR(ENOSYS); + else +- return vulkan_transfer_data(hwfc, dst, src, 0); ++ return vulkan_transfer_frame(hwfc, (AVFrame *)src, dst, 1); + } + } + +@@ -3705,7 +4137,7 @@ static int vulkan_transfer_data_from(AVH + if (dst->hw_frames_ctx) + return AVERROR(ENOSYS); + else +- return vulkan_transfer_data(hwfc, src, dst, 1); ++ return vulkan_transfer_frame(hwfc, dst, (AVFrame *)src, 0); + } + } + +Index: FFmpeg/libavutil/hwcontext_vulkan.h +=================================================================== +--- libavutil/hwcontext_vulkan.h ++++ libavutil/hwcontext_vulkan.h +@@ -29,6 +29,20 @@ + + typedef struct AVVkFrame AVVkFrame; + ++typedef struct AVVulkanDeviceQueueFamily { ++ /* Queue family index */ ++ int idx; ++ /* Number of queues in the queue family in use */ ++ int num; ++ /* Queue family capabilities. Must be non-zero. ++ * Flags may be removed to indicate the queue family may not be used ++ * for a given purpose. */ ++ VkQueueFlagBits flags; ++ /* Vulkan implementations are allowed to list multiple video queues ++ * which differ in what they can encode or decode. */ ++ VkVideoCodecOperationFlagBitsKHR video_caps; ++} AVVulkanDeviceQueueFamily; ++ + /** + * @file + * API-specific header for AV_HWDEVICE_TYPE_VULKAN. +@@ -48,9 +62,8 @@ typedef struct AVVulkanDeviceContext { + const VkAllocationCallbacks *alloc; + + /** +- * Pointer to the instance-provided vkGetInstanceProcAddr loading function. +- * If NULL, will pick either libvulkan or libvolk, depending on libavutil's +- * compilation settings, and set this field. ++ * Pointer to a vkGetInstanceProcAddr loading function. ++ * If unset, will dynamically load and use libvulkan. + */ + PFN_vkGetInstanceProcAddr get_proc_addr; + +@@ -98,6 +111,7 @@ typedef struct AVVulkanDeviceContext { + const char * const *enabled_dev_extensions; + int nb_enabled_dev_extensions; + ++#if FF_API_VULKAN_FIXED_QUEUES + /** + * Queue family index for graphics operations, and the number of queues + * enabled for it. If unavaiable, will be set to -1. Not required. +@@ -105,21 +119,27 @@ typedef struct AVVulkanDeviceContext { + * queue family, or pick the one with the least unrelated flags set. + * Queue indices here may overlap if a queue has to share capabilities. + */ ++ attribute_deprecated + int queue_family_index; ++ attribute_deprecated + int nb_graphics_queues; + + /** + * Queue family index for transfer operations and the number of queues + * enabled. Required. + */ ++ attribute_deprecated + int queue_family_tx_index; ++ attribute_deprecated + int nb_tx_queues; + + /** + * Queue family index for compute operations and the number of queues + * enabled. Required. + */ ++ attribute_deprecated + int queue_family_comp_index; ++ attribute_deprecated + int nb_comp_queues; + + /** +@@ -127,7 +147,9 @@ typedef struct AVVulkanDeviceContext { + * If the device doesn't support such, queue_family_encode_index will be -1. + * Not required. + */ ++ attribute_deprecated + int queue_family_encode_index; ++ attribute_deprecated + int nb_encode_queues; + + /** +@@ -135,8 +157,11 @@ typedef struct AVVulkanDeviceContext { + * If the device doesn't support such, queue_family_decode_index will be -1. + * Not required. + */ ++ attribute_deprecated + int queue_family_decode_index; ++ attribute_deprecated + int nb_decode_queues; ++#endif + + /** + * Locks a queue, preventing other threads from submitting any command +@@ -150,6 +175,17 @@ typedef struct AVVulkanDeviceContext { + * Similar to lock_queue(), unlocks a queue. Must only be called after locking. + */ + void (*unlock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index); ++ ++ /** ++ * Queue families used. Must be preferentially ordered. List may contain ++ * duplicates. ++ * ++ * For compatibility reasons, all the enabled queue families listed above ++ * (queue_family_(tx/comp/encode/decode)_index) must also be included in ++ * this list until they're removed after deprecation. ++ */ ++ AVVulkanDeviceQueueFamily qf[64]; ++ int nb_qf; + } AVVulkanDeviceContext; + + /** +Index: FFmpeg/libavutil/version.h +=================================================================== +--- libavutil/version.h ++++ libavutil/version.h +@@ -112,6 +112,7 @@ + #define FF_API_PALETTE_HAS_CHANGED (LIBAVUTIL_VERSION_MAJOR < 60) + #define FF_API_VULKAN_CONTIGUOUS_MEMORY (LIBAVUTIL_VERSION_MAJOR < 60) + #define FF_API_H274_FILM_GRAIN_VCS (LIBAVUTIL_VERSION_MAJOR < 60) ++#define FF_API_VULKAN_FIXED_QUEUES (LIBAVUTIL_VERSION_MAJOR < 60) + + /** + * @} +Index: FFmpeg/libavutil/vulkan.c +=================================================================== +--- libavutil/vulkan.c ++++ libavutil/vulkan.c +@@ -82,6 +82,25 @@ const char *ff_vk_ret2str(VkResult res) + #undef CASE + } + ++static void load_enabled_qfs(FFVulkanContext *s) ++{ ++ s->nb_qfs = 0; ++ for (int i = 0; i < s->hwctx->nb_qf; i++) { ++ /* Skip duplicates */ ++ int skip = 0; ++ for (int j = 0; j < s->nb_qfs; j++) { ++ if (s->qfs[j] == s->hwctx->qf[i].idx) { ++ skip = 1; ++ break; ++ } ++ } ++ if (skip) ++ continue; ++ ++ s->qfs[s->nb_qfs++] = s->hwctx->qf[i].idx; ++ } ++} ++ + int ff_vk_load_props(FFVulkanContext *s) + { + FFVulkanFunctions *vk = &s->vkfn; +@@ -89,9 +108,13 @@ int ff_vk_load_props(FFVulkanContext *s) + s->hprops = (VkPhysicalDeviceExternalMemoryHostPropertiesEXT) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT, + }; ++ s->optical_flow_props = (VkPhysicalDeviceOpticalFlowPropertiesNV) { ++ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_PROPERTIES_NV, ++ .pNext = &s->hprops, ++ }; + s->coop_matrix_props = (VkPhysicalDeviceCooperativeMatrixPropertiesKHR) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_KHR, +- .pNext = &s->hprops, ++ .pNext = &s->optical_flow_props, + }; + s->subgroup_props = (VkPhysicalDeviceSubgroupSizeControlProperties) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES, +@@ -126,6 +149,8 @@ int ff_vk_load_props(FFVulkanContext *s) + vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); + vk->GetPhysicalDeviceFeatures2(s->hwctx->phys_dev, &s->feats); + ++ load_enabled_qfs(s); ++ + if (s->qf_props) + return 0; + +@@ -188,66 +213,22 @@ int ff_vk_load_props(FFVulkanContext *s) + + static int vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb) + { +- int ret, num; +- +- switch (dev_family) { +- case VK_QUEUE_GRAPHICS_BIT: +- ret = s->hwctx->queue_family_index; +- num = s->hwctx->nb_graphics_queues; +- break; +- case VK_QUEUE_COMPUTE_BIT: +- ret = s->hwctx->queue_family_comp_index; +- num = s->hwctx->nb_comp_queues; +- break; +- case VK_QUEUE_TRANSFER_BIT: +- ret = s->hwctx->queue_family_tx_index; +- num = s->hwctx->nb_tx_queues; +- break; +- case VK_QUEUE_VIDEO_ENCODE_BIT_KHR: +- ret = s->hwctx->queue_family_encode_index; +- num = s->hwctx->nb_encode_queues; +- break; +- case VK_QUEUE_VIDEO_DECODE_BIT_KHR: +- ret = s->hwctx->queue_family_decode_index; +- num = s->hwctx->nb_decode_queues; +- break; +- default: +- av_assert0(0); /* Should never happen */ ++ for (int i = 0; i < s->hwctx->nb_qf; i++) { ++ if (s->hwctx->qf[i].flags & dev_family) { ++ *nb = s->hwctx->qf[i].num; ++ return s->hwctx->qf[i].idx; ++ } + } + +- if (nb) +- *nb = num; +- +- return ret; ++ av_assert0(0); /* Should never happen */ + } + + int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, + VkQueueFlagBits dev_family) + { + /* Fill in queue families from context if not done yet */ +- if (!s->nb_qfs) { +- s->nb_qfs = 0; +- +- /* Simply fills in all unique queues into s->qfs */ +- if (s->hwctx->queue_family_index >= 0) +- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index; +- if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index) +- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index; +- if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index && +- s->qfs[1] != s->hwctx->queue_family_comp_index)) +- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index; +- if (s->hwctx->queue_family_decode_index >= 0 && +- (s->qfs[0] != s->hwctx->queue_family_decode_index && +- s->qfs[1] != s->hwctx->queue_family_decode_index && +- s->qfs[2] != s->hwctx->queue_family_decode_index)) +- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index; +- if (s->hwctx->queue_family_encode_index >= 0 && +- (s->qfs[0] != s->hwctx->queue_family_encode_index && +- s->qfs[1] != s->hwctx->queue_family_encode_index && +- s->qfs[2] != s->hwctx->queue_family_encode_index && +- s->qfs[3] != s->hwctx->queue_family_encode_index)) +- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index; +- } ++ if (!s->nb_qfs) ++ load_enabled_qfs(s); + + return (qf->queue_family = vk_qf_get_index(s, dev_family, &qf->nb_queues)); + } +@@ -304,6 +285,15 @@ int ff_vk_exec_pool_init(FFVulkanContext + VkCommandPoolCreateInfo cqueue_create; + VkCommandBufferAllocateInfo cbuf_create; + ++ const VkQueryPoolVideoEncodeFeedbackCreateInfoKHR *ef = NULL; ++ ++ if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) { ++ ef = ff_vk_find_struct(query_create_pnext, ++ VK_STRUCTURE_TYPE_QUERY_POOL_VIDEO_ENCODE_FEEDBACK_CREATE_INFO_KHR); ++ if (!ef) ++ return AVERROR(EINVAL); ++ } ++ + /* Create command pool */ + cqueue_create = (VkCommandPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, +@@ -361,21 +351,18 @@ int ff_vk_exec_pool_init(FFVulkanContext + } + + pool->nb_queries = nb_queries; +- pool->query_status_stride = 2; ++ pool->query_status_stride = 1 + 1; /* One result, one status by default */ + pool->query_results = nb_queries; +- pool->query_statuses = 0; /* if radv supports it, nb_queries; */ ++ pool->query_statuses = nb_queries; + +-#if 0 /* CONFIG_VULKAN_ENCODE */ + /* Video encode quieries produce two results per query */ + if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) { +- pool->query_status_stride = 3; /* skip,skip,result,skip,skip,result */ +- pool->query_results *= 2; +- } else +-#endif +- if (query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) { ++ int nb_results = av_popcount(ef->encodeFeedbackFlags); ++ pool->query_status_stride = nb_results + 1; ++ pool->query_results *= nb_results; ++ } else if (query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) { + pool->query_status_stride = 1; + pool->query_results = 0; +- pool->query_statuses = nb_queries; + } + + pool->qd_size = (pool->query_results + pool->query_statuses)*(query_64bit ? 8 : 4); +@@ -467,7 +454,7 @@ VkResult ff_vk_exec_get_query(FFVulkanCo + e->query_idx, + pool->nb_queries, + pool->qd_size, e->query_data, +- pool->query_64bit ? 8 : 4, qf); ++ pool->qd_size, qf); + if (ret != VK_SUCCESS) + return ret; + +@@ -832,11 +819,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, + + ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info, + s->hwctx->alloc, mem); +- if (ret != VK_SUCCESS) { +- av_log(s, AV_LOG_ERROR, "Failed to allocate memory: %s\n", +- ff_vk_ret2str(ret)); ++ if (ret != VK_SUCCESS) + return AVERROR(ENOMEM); +- } + + if (mem_flags) + *mem_flags |= s->mprops.memoryTypes[index].propertyFlags; +@@ -881,7 +865,7 @@ int ff_vk_create_buf(FFVulkanContext *s, + .pNext = &ded_req, + }; + +- ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf); ++ ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, s->hwctx->alloc, &buf->buf); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n", + ff_vk_ret2str(ret)); +@@ -1595,6 +1579,7 @@ int ff_vk_exec_pipeline_register(FFVulka + + err = ff_vk_create_buf(s, &set->buf, set->aligned_size*nb, + NULL, NULL, set->usage, ++ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (err < 0) +Index: FFmpeg/libavutil/vulkan.h +=================================================================== +--- libavutil/vulkan.h ++++ libavutil/vulkan.h +@@ -237,6 +237,7 @@ typedef struct FFVulkanContext { + VkPhysicalDeviceDescriptorBufferPropertiesEXT desc_buf_props; + VkPhysicalDeviceSubgroupSizeControlProperties subgroup_props; + VkPhysicalDeviceCooperativeMatrixPropertiesKHR coop_matrix_props; ++ VkPhysicalDeviceOpticalFlowPropertiesNV optical_flow_props; + VkQueueFamilyQueryResultStatusPropertiesKHR *query_props; + VkQueueFamilyVideoPropertiesKHR *video_props; + VkQueueFamilyProperties2 *qf_props; +@@ -257,7 +258,7 @@ typedef struct FFVulkanContext { + AVHWFramesContext *frames; + AVVulkanFramesContext *hwfc; + +- uint32_t qfs[5]; ++ uint32_t qfs[64]; + int nb_qfs; + + /* Properties */ +@@ -289,6 +290,15 @@ static inline const void *ff_vk_find_str + return NULL; + } + ++static inline void ff_vk_link_struct(void *chain, const void *in) ++{ ++ VkBaseOutStructure *out = chain; ++ while (out->pNext) ++ out = out->pNext; ++ ++ out->pNext = (void *)in; ++} ++ + /* Identity mapping - r = r, b = b, g = g, a = a */ + extern const VkComponentMapping ff_comp_identity_map; + +Index: FFmpeg/libavutil/vulkan_functions.h +=================================================================== +--- libavutil/vulkan_functions.h ++++ libavutil/vulkan_functions.h +@@ -46,6 +46,13 @@ typedef enum FFVulkanExtensions { + FF_VK_EXT_VIDEO_DECODE_AV1 = 1ULL << 14, /* VK_KHR_video_decode_av1 */ + FF_VK_EXT_ATOMIC_FLOAT = 1ULL << 15, /* VK_EXT_shader_atomic_float */ + FF_VK_EXT_COOP_MATRIX = 1ULL << 16, /* VK_KHR_cooperative_matrix */ ++ FF_VK_EXT_OPTICAL_FLOW = 1ULL << 17, /* VK_NV_optical_flow */ ++ FF_VK_EXT_SHADER_OBJECT = 1ULL << 18, /* VK_EXT_shader_object */ ++ ++ FF_VK_EXT_VIDEO_MAINTENANCE_1 = 1ULL << 27, /* VK_KHR_video_maintenance1 */ ++ FF_VK_EXT_VIDEO_ENCODE_QUEUE = 1ULL << 28, /* VK_KHR_video_encode_queue */ ++ FF_VK_EXT_VIDEO_ENCODE_H264 = 1ULL << 29, /* VK_KHR_video_encode_h264 */ ++ FF_VK_EXT_VIDEO_ENCODE_H265 = 1ULL << 30, /* VK_KHR_video_encode_h265 */ + + FF_VK_EXT_NO_FLAG = 1ULL << 31, + } FFVulkanExtensions; +@@ -194,6 +201,11 @@ typedef enum FFVulkanExtensions { + \ + /* Video decoding */ \ + MACRO(1, 1, FF_VK_EXT_VIDEO_DECODE_QUEUE, CmdDecodeVideoKHR) \ ++ \ ++ /* Video encoding */ \ ++ MACRO(1, 1, FF_VK_EXT_VIDEO_ENCODE_QUEUE, CmdEncodeVideoKHR) \ ++ MACRO(1, 1, FF_VK_EXT_VIDEO_ENCODE_QUEUE, GetEncodedVideoSessionParametersKHR) \ ++ MACRO(1, 0, FF_VK_EXT_VIDEO_ENCODE_QUEUE, GetPhysicalDeviceVideoEncodeQualityLevelPropertiesKHR) \ + \ + /* Pipeline */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreatePipelineLayout) \ +@@ -208,10 +220,21 @@ typedef enum FFVulkanExtensions { + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySamplerYcbcrConversion) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSampler) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySampler) \ ++ \ ++ /* Optical flow */ \ ++ MACRO(1, 1, FF_VK_EXT_OPTICAL_FLOW, BindOpticalFlowSessionImageNV) \ ++ MACRO(1, 1, FF_VK_EXT_OPTICAL_FLOW, CmdOpticalFlowExecuteNV) \ ++ MACRO(1, 1, FF_VK_EXT_OPTICAL_FLOW, CreateOpticalFlowSessionNV) \ ++ MACRO(1, 1, FF_VK_EXT_OPTICAL_FLOW, DestroyOpticalFlowSessionNV) \ ++ MACRO(1, 0, FF_VK_EXT_OPTICAL_FLOW, GetPhysicalDeviceOpticalFlowImageFormatsNV)\ + \ + /* Shaders */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateShaderModule) \ +- MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyShaderModule) ++ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyShaderModule) \ ++ MACRO(1, 1, FF_VK_EXT_SHADER_OBJECT, CmdBindShadersEXT) \ ++ MACRO(1, 1, FF_VK_EXT_SHADER_OBJECT, CreateShadersEXT) \ ++ MACRO(1, 1, FF_VK_EXT_SHADER_OBJECT, DestroyShaderEXT) \ ++ MACRO(1, 1, FF_VK_EXT_SHADER_OBJECT, GetShaderBinaryDataEXT) + + /* Macro containing every win32 specific function that we utilize in our codebase */ + #define FN_LIST_WIN32(MACRO) \ +Index: FFmpeg/libavutil/vulkan_loader.h +=================================================================== +--- libavutil/vulkan_loader.h ++++ libavutil/vulkan_loader.h +@@ -49,14 +49,20 @@ static inline uint64_t ff_vk_extensions_ + { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM }, + { VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT }, + { VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX }, ++ { VK_NV_OPTICAL_FLOW_EXTENSION_NAME, FF_VK_EXT_OPTICAL_FLOW }, ++ { VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT }, ++ { VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_1 }, + #ifdef _WIN32 + { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY }, + { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM }, + #endif + { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, }, + { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE }, ++ { VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_QUEUE }, + { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE }, ++ { VK_KHR_VIDEO_ENCODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H264 }, + { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 }, ++ { VK_KHR_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 }, + { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, + { VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_AV1 }, + }; diff --git a/cross/ffmpeg7/patches/1065-jellyfin-0065-revert-the-broken-vulkan-hwupload-rewrite.patch b/cross/ffmpeg7/patches/1065-jellyfin-0065-revert-the-broken-vulkan-hwupload-rewrite.patch new file mode 100644 index 00000000000..f9b7996ba9f --- /dev/null +++ b/cross/ffmpeg7/patches/1065-jellyfin-0065-revert-the-broken-vulkan-hwupload-rewrite.patch @@ -0,0 +1,624 @@ +Index: FFmpeg/libavutil/hwcontext_vulkan.c +=================================================================== +--- libavutil/hwcontext_vulkan.c ++++ libavutil/hwcontext_vulkan.c +@@ -131,9 +131,6 @@ typedef struct VulkanFramesPriv { + FFVkExecPool upload_exec; + FFVkExecPool download_exec; + +- /* Temporary buffer pools */ +- AVBufferPool *tmp; +- + /* Modifier info list to free at uninit */ + VkImageDrmFormatModifierListCreateInfoEXT *modifier_info; + } VulkanFramesPriv; +@@ -2561,8 +2558,6 @@ static void vulkan_frames_uninit(AVHWFra + ff_vk_exec_pool_free(&p->vkctx, &fp->compute_exec); + ff_vk_exec_pool_free(&p->vkctx, &fp->upload_exec); + ff_vk_exec_pool_free(&p->vkctx, &fp->download_exec); +- +- av_buffer_pool_uninit(&fp->tmp); + } + + static int vulkan_frames_init(AVHWFramesContext *hwfc) +@@ -3611,290 +3606,128 @@ static int vulkan_map_from(AVHWFramesCon + return AVERROR(ENOSYS); + } + +-static int copy_buffer_data(AVHWFramesContext *hwfc, AVBufferRef *buf, +- AVFrame *swf, VkBufferImageCopy *region, +- int planes, int upload) ++static size_t get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height) + { +- VkResult ret; +- VulkanDevicePriv *p = hwfc->device_ctx->hwctx; +- FFVulkanFunctions *vk = &p->vkctx.vkfn; +- AVVulkanDeviceContext *hwctx = &p->p; +- +- FFVkBuffer *vkbuf = (FFVkBuffer *)buf->data; +- +- const VkMappedMemoryRange flush_info = { +- .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, +- .memory = vkbuf->mem, +- .size = VK_WHOLE_SIZE, +- }; +- +- if (!(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) && !upload) { +- ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, 1, +- &flush_info); +- if (ret != VK_SUCCESS) { +- av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate buffer data: %s\n", +- ff_vk_ret2str(ret)); +- return AVERROR_EXTERNAL; +- } +- } +- +- for (int i = 0; i < planes; i++) +- av_image_copy_plane(vkbuf->mapped_mem + region[i].bufferOffset, +- region[i].bufferRowLength, +- swf->data[i], +- swf->linesize[i], +- swf->linesize[i], +- region[i].imageExtent.height); +- +- if (!(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) && upload) { +- ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, 1, +- &flush_info); +- if (ret != VK_SUCCESS) { +- av_log(hwfc, AV_LOG_ERROR, "Failed to flush buffer data: %s\n", +- ff_vk_ret2str(ret)); +- return AVERROR_EXTERNAL; +- } +- } +- +- return 0; ++ size_t size; ++ *stride = FFALIGN(*stride, p->props.properties.limits.optimalBufferCopyRowPitchAlignment); ++ size = height*(*stride); ++ size = FFALIGN(size, p->props.properties.limits.minMemoryMapAlignment); ++ return size; + } + +-static int get_plane_buf(AVHWFramesContext *hwfc, AVBufferRef **dst, +- AVFrame *swf, VkBufferImageCopy *region, int upload) ++static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, ++ AVBufferRef **bufs, size_t *buf_offsets, ++ const int *buf_stride, int w, ++ int h, enum AVPixelFormat pix_fmt, int to_buf) + { + int err; ++ AVVkFrame *frame = (AVVkFrame *)f->data[0]; + VulkanFramesPriv *fp = hwfc->hwctx; + VulkanDevicePriv *p = hwfc->device_ctx->hwctx; +- const int planes = av_pix_fmt_count_planes(swf->format); +- +- size_t buf_offset = 0; +- for (int i = 0; i < planes; i++) { +- size_t size; +- ptrdiff_t linesize = swf->linesize[i]; +- +- uint32_t p_w, p_h; +- get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); ++ FFVulkanFunctions *vk = &p->vkctx.vkfn; ++ VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; ++ int nb_img_bar = 0; + +- linesize = FFALIGN(linesize, +- p->props.properties.limits.optimalBufferCopyRowPitchAlignment); +- size = p_h*linesize; +- +- region[i] = (VkBufferImageCopy) { +- .bufferOffset = buf_offset, +- .bufferRowLength = linesize, +- .bufferImageHeight = p_h, +- .imageSubresource.layerCount = 1, +- .imageExtent = (VkExtent3D){ p_w, p_h, 1 }, +- /* Rest of the fields adjusted/filled in later */ +- }; ++ const int nb_images = ff_vk_count_images(frame); ++ int pixfmt_planes = av_pix_fmt_count_planes(pix_fmt); ++ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); + +- buf_offset = FFALIGN(buf_offset + size, +- p->props.properties.limits.optimalBufferCopyOffsetAlignment); +- } ++ VkCommandBuffer cmd_buf; ++ FFVkExecContext *exec = ff_vk_exec_get(to_buf ? &fp->download_exec : ++ &fp->upload_exec); ++ cmd_buf = exec->buf; ++ ff_vk_exec_start(&p->vkctx, exec); + +- err = ff_vk_get_pooled_buffer(&p->vkctx, &fp->tmp, dst, +- VK_BUFFER_USAGE_TRANSFER_SRC_BIT | +- VK_BUFFER_USAGE_TRANSFER_DST_BIT, +- NULL, buf_offset, +- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | +- VK_MEMORY_PROPERTY_HOST_CACHED_BIT); ++ err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, pixfmt_planes, 1); + if (err < 0) + return err; + +- return 0; +-} +- +-static int create_mapped_buffer(AVHWFramesContext *hwfc, +- FFVkBuffer *vkb, VkBufferUsageFlags usage, +- size_t size, +- VkExternalMemoryBufferCreateInfo *create_desc, +- VkImportMemoryHostPointerInfoEXT *import_desc, +- VkMemoryHostPointerPropertiesEXT props) +-{ +- int err; +- VkResult ret; +- VulkanDevicePriv *p = hwfc->device_ctx->hwctx; +- FFVulkanFunctions *vk = &p->vkctx.vkfn; +- AVVulkanDeviceContext *hwctx = &p->p; +- +- VkBufferCreateInfo buf_spawn = { +- .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, +- .pNext = create_desc, +- .usage = usage, +- .sharingMode = VK_SHARING_MODE_EXCLUSIVE, +- .size = size, +- }; +- VkMemoryRequirements req = { +- .size = size, +- .alignment = p->hprops.minImportedHostPointerAlignment, +- .memoryTypeBits = props.memoryTypeBits, +- }; +- +- err = ff_vk_alloc_mem(&p->vkctx, &req, +- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, +- import_desc, &vkb->flags, &vkb->mem); ++ err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, f, ++ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, ++ VK_PIPELINE_STAGE_2_TRANSFER_BIT); + if (err < 0) + return err; + +- ret = vk->CreateBuffer(hwctx->act_dev, &buf_spawn, hwctx->alloc, &vkb->buf); +- if (ret != VK_SUCCESS) { +- vk->FreeMemory(hwctx->act_dev, vkb->mem, hwctx->alloc); +- return AVERROR_EXTERNAL; +- } +- +- ret = vk->BindBufferMemory(hwctx->act_dev, vkb->buf, vkb->mem, 0); +- if (ret != VK_SUCCESS) { +- vk->FreeMemory(hwctx->act_dev, vkb->mem, hwctx->alloc); +- vk->DestroyBuffer(hwctx->act_dev, vkb->buf, hwctx->alloc); +- return AVERROR_EXTERNAL; +- } +- +- return 0; +-} +- +-static void destroy_avvkbuf(void *opaque, uint8_t *data) +-{ +- FFVulkanContext *s = opaque; +- FFVkBuffer *buf = (FFVkBuffer *)data; +- ff_vk_free_buf(s, buf); +- av_free(buf); +-} +- +-static int host_map_frame(AVHWFramesContext *hwfc, AVBufferRef **dst, int *nb_bufs, +- AVFrame *swf, VkBufferImageCopy *region, int upload) +-{ +- int err; +- VkResult ret; +- VulkanDevicePriv *p = hwfc->device_ctx->hwctx; +- FFVulkanFunctions *vk = &p->vkctx.vkfn; +- AVVulkanDeviceContext *hwctx = &p->p; +- +- const int planes = av_pix_fmt_count_planes(swf->format); +- +- VkExternalMemoryBufferCreateInfo create_desc = { +- .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, +- .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, +- }; +- VkImportMemoryHostPointerInfoEXT import_desc = { +- .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, +- .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, +- }; +- VkMemoryHostPointerPropertiesEXT props; +- +- for (int i = 0; i < planes; i++) { +- FFVkBuffer *vkb; +- uint32_t p_w, p_h; +- size_t offs; +- size_t buffer_size; +- +- /* We can't host map images with negative strides */ +- if (swf->linesize[i] < 0) { +- err = AVERROR(EINVAL); +- goto fail; +- } +- +- get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); ++ ff_vk_frame_barrier(&p->vkctx, exec, f, img_bar, &nb_img_bar, ++ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, ++ VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, ++ to_buf ? VK_ACCESS_TRANSFER_READ_BIT : ++ VK_ACCESS_TRANSFER_WRITE_BIT, ++ to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL : ++ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, ++ VK_QUEUE_FAMILY_IGNORED); + +- /* Get the previous point at which mapping was possible and use it */ +- offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment; +- import_desc.pHostPointer = swf->data[i] - offs; +- +- props = (VkMemoryHostPointerPropertiesEXT) { +- VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, +- }; +- ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->act_dev, +- import_desc.handleType, +- import_desc.pHostPointer, +- &props); +- if (!(ret == VK_SUCCESS && props.memoryTypeBits)) { +- err = AVERROR(EINVAL); +- goto fail; +- } ++ vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { ++ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, ++ .pImageMemoryBarriers = img_bar, ++ .imageMemoryBarrierCount = nb_img_bar, ++ }); + +- /* Buffer region for this plane */ +- region[i] = (VkBufferImageCopy) { +- .bufferOffset = offs, +- .bufferRowLength = swf->linesize[i], +- .bufferImageHeight = p_h, ++ /* Schedule a copy for each plane */ ++ for (int i = 0; i < pixfmt_planes; i++) { ++ int idx = FFMIN(i, nb_images - 1); ++ VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT, ++ VK_IMAGE_ASPECT_PLANE_0_BIT, ++ VK_IMAGE_ASPECT_PLANE_1_BIT, ++ VK_IMAGE_ASPECT_PLANE_2_BIT, }; ++ ++ FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[i]->data; ++ VkBufferImageCopy buf_reg = { ++ .bufferOffset = buf_offsets[i], ++ .bufferRowLength = buf_stride[i] / desc->comp[i].step, + .imageSubresource.layerCount = 1, +- .imageExtent = (VkExtent3D){ p_w, p_h, 1 }, +- /* Rest of the fields adjusted/filled in later */ ++ .imageSubresource.aspectMask = plane_aspect[(pixfmt_planes != nb_images) + ++ i*(pixfmt_planes != nb_images)], ++ .imageOffset = { 0, 0, 0, }, + }; + +- /* Add the offset at the start, which gets ignored */ +- buffer_size = offs + swf->linesize[i]*p_h; +- buffer_size = FFALIGN(buffer_size, p->props.properties.limits.minMemoryMapAlignment); +- buffer_size = FFALIGN(buffer_size, p->hprops.minImportedHostPointerAlignment); +- +- /* Create a buffer */ +- vkb = av_mallocz(sizeof(*vkb)); +- if (!vkb) { +- err = AVERROR(ENOMEM); +- goto fail; +- } +- +- err = create_mapped_buffer(hwfc, vkb, +- upload ? VK_BUFFER_USAGE_TRANSFER_SRC_BIT : +- VK_BUFFER_USAGE_TRANSFER_DST_BIT, +- buffer_size, &create_desc, &import_desc, +- props); +- if (err < 0) { +- av_free(vkb); +- goto fail; +- } ++ uint32_t p_w, p_h; ++ get_plane_wh(&p_w, &p_h, pix_fmt, w, h, i); + +- /* Create a ref */ +- dst[*nb_bufs] = av_buffer_create((uint8_t *)vkb, sizeof(*vkb), +- destroy_avvkbuf, &p->vkctx, 0); +- if (!dst[*nb_bufs]) { +- destroy_avvkbuf(&p->vkctx, (uint8_t *)vkb); +- err = AVERROR(ENOMEM); +- goto fail; +- } ++ buf_reg.bufferImageHeight = p_h; ++ buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, }; + +- (*nb_bufs)++; ++ if (to_buf) ++ vk->CmdCopyImageToBuffer(cmd_buf, frame->img[idx], ++ img_bar[0].newLayout, ++ vkbuf->buf, ++ 1, &buf_reg); ++ else ++ vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[idx], ++ img_bar[0].newLayout, ++ 1, &buf_reg); + } + +- return 0; ++ err = ff_vk_exec_submit(&p->vkctx, exec); ++ if (err < 0) ++ return err; + +-fail: +- for (int i = 0; i < (*nb_bufs); i++) +- av_buffer_unref(&dst[i]); +- return err; ++ ff_vk_exec_wait(&p->vkctx, exec); ++ ++ return 0; + } + +-static int vulkan_transfer_frame(AVHWFramesContext *hwfc, +- AVFrame *swf, AVFrame *hwf, +- int upload) ++static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, ++ const AVFrame *swf, int from) + { +- int err; +- VulkanFramesPriv *fp = hwfc->hwctx; +- VulkanDevicePriv *p = hwfc->device_ctx->hwctx; ++ int err = 0; ++ VkResult ret; ++ AVHWDeviceContext *dev_ctx = hwfc->device_ctx; ++ VulkanDevicePriv *p = dev_ctx->hwctx; ++ AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + +- int host_mapped = 0; +- +- AVVkFrame *hwf_vk = (AVVkFrame *)hwf->data[0]; +- VkBufferImageCopy region[AV_NUM_DATA_POINTERS]; // always one per plane ++ AVFrame tmp; ++ FFVkBuffer *vkbufs[AV_NUM_DATA_POINTERS]; ++ AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 }; ++ size_t buf_offsets[AV_NUM_DATA_POINTERS] = { 0 }; + ++ uint32_t p_w, p_h; + const int planes = av_pix_fmt_count_planes(swf->format); +- const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(swf->format); +- const int nb_images = ff_vk_count_images(hwf_vk); +- static const VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT, +- VK_IMAGE_ASPECT_PLANE_0_BIT, +- VK_IMAGE_ASPECT_PLANE_1_BIT, +- VK_IMAGE_ASPECT_PLANE_2_BIT, }; +- +- VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; +- int nb_img_bar = 0; + +- AVBufferRef *bufs[AV_NUM_DATA_POINTERS]; +- int nb_bufs = 0; ++ int host_mapped[AV_NUM_DATA_POINTERS] = { 0 }; ++ const int map_host = !!(p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY); + +- VkCommandBuffer cmd_buf; +- FFVkExecContext *exec; +- +- /* Sanity checking */ + if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) { + av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n"); + return AVERROR(EINVAL); +@@ -3903,97 +3736,115 @@ static int vulkan_transfer_frame(AVHWFra + if (swf->width > hwfc->width || swf->height > hwfc->height) + return AVERROR(EINVAL); + +- /* Setup buffers first */ +- if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) { +- err = host_map_frame(hwfc, bufs, &nb_bufs, swf, region, upload); +- if (err >= 0) +- host_mapped = 1; +- } ++ /* Create buffers */ ++ for (int i = 0; i < planes; i++) { ++ size_t req_size; + +- if (!host_mapped) { +- err = get_plane_buf(hwfc, &bufs[0], swf, region, upload); ++ VkExternalMemoryBufferCreateInfo create_desc = { ++ .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, ++ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, ++ }; ++ ++ VkImportMemoryHostPointerInfoEXT import_desc = { ++ .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, ++ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, ++ }; ++ ++ VkMemoryHostPointerPropertiesEXT p_props = { ++ .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, ++ }; ++ ++ get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); ++ ++ tmp.linesize[i] = FFABS(swf->linesize[i]); ++ ++ /* Do not map images with a negative stride */ ++ if (map_host && swf->linesize[i] > 0) { ++ size_t offs; ++ offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment; ++ import_desc.pHostPointer = swf->data[i] - offs; ++ ++ /* We have to compensate for the few extra bytes of padding we ++ * completely ignore at the start */ ++ req_size = FFALIGN(offs + tmp.linesize[i] * p_h, ++ p->hprops.minImportedHostPointerAlignment); ++ ++ ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->act_dev, ++ import_desc.handleType, ++ import_desc.pHostPointer, ++ &p_props); ++ if (ret == VK_SUCCESS && p_props.memoryTypeBits) { ++ host_mapped[i] = 1; ++ buf_offsets[i] = offs; ++ } ++ } ++ ++ if (!host_mapped[i]) ++ req_size = get_req_buffer_size(p, &tmp.linesize[i], p_h); ++ ++ err = ff_vk_create_avbuf(&p->vkctx, &bufs[i], req_size, ++ host_mapped[i] ? &create_desc : NULL, ++ host_mapped[i] ? &import_desc : NULL, ++ from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT : ++ VK_BUFFER_USAGE_TRANSFER_SRC_BIT, ++ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | ++ (host_mapped[i] ? ++ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT : 0x0)); + if (err < 0) + goto end; +- nb_bufs = 1; + +- if (upload) { +- err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 1); +- if (err < 0) +- goto end; +- } ++ vkbufs[i] = (FFVkBuffer *)bufs[i]->data; + } + +- exec = ff_vk_exec_get(&fp->upload_exec); +- cmd_buf = exec->buf; ++ if (!from) { ++ /* Map, copy image TO buffer (which then goes to the VkImage), unmap */ ++ if ((err = ff_vk_map_buffers(&p->vkctx, vkbufs, tmp.data, planes, 0))) ++ goto end; + +- ff_vk_exec_start(&p->vkctx, exec); ++ for (int i = 0; i < planes; i++) { ++ if (host_mapped[i]) ++ continue; + +- /* Prep destination Vulkan frame */ +- err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, hwf, +- VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, +- VK_PIPELINE_STAGE_2_TRANSFER_BIT); +- if (err < 0) +- goto end; ++ get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); + +- /* No need to declare buf deps for synchronous transfers */ +- if (upload) { +- err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, nb_bufs, 1); +- if (err < 0) { +- ff_vk_exec_discard_deps(&p->vkctx, exec); +- goto end; ++ av_image_copy_plane(tmp.data[i], tmp.linesize[i], ++ (const uint8_t *)swf->data[i], swf->linesize[i], ++ FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])), ++ p_h); + } ++ ++ if ((err = ff_vk_unmap_buffers(&p->vkctx, vkbufs, planes, 1))) ++ goto end; + } + +- ff_vk_frame_barrier(&p->vkctx, exec, hwf, img_bar, &nb_img_bar, +- VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, +- VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, +- upload ? VK_ACCESS_TRANSFER_WRITE_BIT : +- VK_ACCESS_TRANSFER_READ_BIT, +- upload ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : +- VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, +- VK_QUEUE_FAMILY_IGNORED); ++ /* Copy buffers into/from image */ ++ err = transfer_image_buf(hwfc, (AVFrame *)vkf, bufs, buf_offsets, ++ tmp.linesize, swf->width, swf->height, swf->format, ++ from); ++ ++ if (from) { ++ /* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */ ++ if ((err = ff_vk_map_buffers(&p->vkctx, vkbufs, tmp.data, planes, 0))) ++ goto end; + +- vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { +- .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, +- .pImageMemoryBarriers = img_bar, +- .imageMemoryBarrierCount = nb_img_bar, +- }); ++ for (int i = 0; i < planes; i++) { ++ if (host_mapped[i]) ++ continue; + +- for (int i = 0; i < planes; i++) { +- int buf_idx = FFMIN(i, (nb_bufs - 1)); +- int img_idx = FFMIN(i, (nb_images - 1)); +- FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[buf_idx]->data; +- +- uint32_t orig_stride = region[i].bufferRowLength; +- region[i].bufferRowLength /= desc->comp[i].step; +- region[i].imageSubresource.aspectMask = plane_aspect[(planes != nb_images) + +- i*(planes != nb_images)]; +- +- if (upload) +- vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, +- hwf_vk->img[img_idx], +- img_bar[img_idx].newLayout, +- 1, ®ion[i]); +- else +- vk->CmdCopyImageToBuffer(cmd_buf, hwf_vk->img[img_idx], +- img_bar[img_idx].newLayout, +- vkbuf->buf, +- 1, ®ion[i]); ++ get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); + +- region[i].bufferRowLength = orig_stride; +- } ++ av_image_copy_plane_uc_from(swf->data[i], swf->linesize[i], ++ (const uint8_t *)tmp.data[i], tmp.linesize[i], ++ FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])), ++ p_h); ++ } + +- err = ff_vk_exec_submit(&p->vkctx, exec); +- if (err < 0) { +- ff_vk_exec_discard_deps(&p->vkctx, exec); +- } else if (!upload) { +- ff_vk_exec_wait(&p->vkctx, exec); +- if (!host_mapped) +- err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 0); ++ if ((err = ff_vk_unmap_buffers(&p->vkctx, vkbufs, planes, 1))) ++ goto end; + } + + end: +- for (int i = 0; i < nb_bufs; i++) ++ for (int i = 0; i < planes; i++) + av_buffer_unref(&bufs[i]); + + return err; +@@ -4020,7 +3871,7 @@ static int vulkan_transfer_data_to(AVHWF + if (src->hw_frames_ctx) + return AVERROR(ENOSYS); + else +- return vulkan_transfer_frame(hwfc, (AVFrame *)src, dst, 1); ++ return vulkan_transfer_data(hwfc, dst, src, 0); + } + } + +@@ -4137,7 +3988,7 @@ static int vulkan_transfer_data_from(AVH + if (dst->hw_frames_ctx) + return AVERROR(ENOSYS); + else +- return vulkan_transfer_frame(hwfc, dst, (AVFrame *)src, 0); ++ return vulkan_transfer_data(hwfc, src, dst, 1); + } + } + +Index: FFmpeg/libavutil/vulkan.c +=================================================================== +--- libavutil/vulkan.c ++++ libavutil/vulkan.c +@@ -819,8 +819,11 @@ int ff_vk_alloc_mem(FFVulkanContext *s, + + ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info, + s->hwctx->alloc, mem); +- if (ret != VK_SUCCESS) ++ if (ret != VK_SUCCESS) { ++ av_log(s, AV_LOG_ERROR, "Failed to allocate memory: %s\n", ++ ff_vk_ret2str(ret)); + return AVERROR(ENOMEM); ++ } + + if (mem_flags) + *mem_flags |= s->mprops.memoryTypes[index].propertyFlags; diff --git a/cross/ffmpeg7/patches/1066-jellyfin-0066-use-linear-contiguous-vulkan-images-for-amd-encoder.patch b/cross/ffmpeg7/patches/1066-jellyfin-0066-use-linear-contiguous-vulkan-images-for-amd-encoder.patch new file mode 100644 index 00000000000..2ab0eb98880 --- /dev/null +++ b/cross/ffmpeg7/patches/1066-jellyfin-0066-use-linear-contiguous-vulkan-images-for-amd-encoder.patch @@ -0,0 +1,279 @@ +Index: FFmpeg/libavutil/hwcontext_vulkan.c +=================================================================== +--- libavutil/hwcontext_vulkan.c ++++ libavutil/hwcontext_vulkan.c +@@ -1382,6 +1382,7 @@ static void vulkan_device_uninit(AVHWDev + + static int vulkan_device_create_internal(AVHWDeviceContext *ctx, + VulkanDeviceSelection *dev_select, ++ int use_linear_images, + int disable_multiplane, + AVDictionary *opts, int flags) + { +@@ -1580,10 +1581,14 @@ static int vulkan_device_create_internal + goto end; + } + +- /* Tiled images setting, use them by default */ +- opt_d = av_dict_get(opts, "linear_images", NULL, 0); +- if (opt_d) +- p->use_linear_images = strtol(opt_d->value, NULL, 10); ++ /* Tiled images setting, use them by default. ++ * The use_linear_images argument takes precedent over the option */ ++ p->use_linear_images = use_linear_images; ++ if (!p->use_linear_images) { ++ opt_d = av_dict_get(opts, "linear_images", NULL, 0); ++ if (opt_d) ++ p->use_linear_images = strtol(opt_d->value, NULL, 10); ++ } + + /* + * The disable_multiplane argument takes precedent over the option. +@@ -1829,7 +1834,7 @@ static int vulkan_device_create(AVHWDevi + } + } + +- return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags); ++ return vulkan_device_create_internal(ctx, &dev_select, 0, 0, opts, flags); + } + + static int vulkan_device_derive(AVHWDeviceContext *ctx, +@@ -1853,6 +1858,7 @@ static int vulkan_device_derive(AVHWDevi + }; + #endif + const char *vendor; ++ int use_linear_images = 0; + + #if VA_CHECK_VERSION(1, 15, 0) + vas = vaGetDisplayAttributes(dpy, &attr, 1); +@@ -1867,11 +1873,14 @@ static int vulkan_device_derive(AVHWDevi + return AVERROR_EXTERNAL; + } + +- if (strstr(vendor, "AMD")) ++ if (strstr(vendor, "AMD")) { + dev_select.vendor_id = 0x1002; ++ use_linear_images = 1; ++ } + } + +- return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags); ++ /* AMD VA-API encoders require linear contiguous (multiplane) surface */ ++ return vulkan_device_create_internal(ctx, &dev_select, use_linear_images, 0, opts, flags); + } + #endif + #if CONFIG_LIBDRM +@@ -1880,6 +1889,7 @@ static int vulkan_device_derive(AVHWDevi + struct stat drm_node_info; + drmDevice *drm_dev_info; + AVDRMDeviceContext *src_hwctx = src_ctx->hwctx; ++ int use_linear_images = 0; + + err = fstat(src_hwctx->fd, &drm_node_info); + if (err) { +@@ -1899,12 +1909,15 @@ static int vulkan_device_derive(AVHWDevi + return AVERROR_EXTERNAL; + } + +- if (drm_dev_info->bustype == DRM_BUS_PCI) ++ if (drm_dev_info->bustype == DRM_BUS_PCI) { + dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id; ++ use_linear_images = drm_dev_info->deviceinfo.pci->vendor_id == 0x1002; ++ } + + drmFreeDevice(&drm_dev_info); + +- return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags); ++ /* AMD VA-API encoders require linear contiguous (multiplane) surface */ ++ return vulkan_device_create_internal(ctx, &dev_select, use_linear_images, 0, opts, flags); + } + #endif + #if CONFIG_CUDA +@@ -1927,7 +1940,7 @@ static int vulkan_device_derive(AVHWDevi + * CUDA is not able to import multiplane images, so always derive a + * Vulkan device with multiplane disabled. + */ +- return vulkan_device_create_internal(ctx, &dev_select, 1, opts, flags); ++ return vulkan_device_create_internal(ctx, &dev_select, 0, 1, opts, flags); + } + #endif + default: +@@ -2774,6 +2787,10 @@ static const struct { + { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM }, + { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM }, + { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM }, ++ { DRM_FORMAT_ARGB2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 }, ++ { DRM_FORMAT_XRGB2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 }, ++ { DRM_FORMAT_ABGR2101010, VK_FORMAT_A2B10G10R10_UNORM_PACK32 }, ++ { DRM_FORMAT_XBGR2101010, VK_FORMAT_A2B10G10R10_UNORM_PACK32 }, + + // All these DRM_FORMATs were added in the same libdrm commit. + #ifdef DRM_FORMAT_XYUV8888 +@@ -2808,6 +2825,7 @@ static int vulkan_map_from_drm_frame_des + const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0]; + VkBindImageMemoryInfo bind_info[AV_DRM_MAX_PLANES]; + VkBindImagePlaneMemoryInfo plane_info[AV_DRM_MAX_PLANES]; ++ const int has_modifiers = !!(p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS); + + for (int i = 0; i < desc->nb_layers; i++) { + if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) { +@@ -2817,13 +2835,21 @@ static int vulkan_map_from_drm_frame_des + } + } + ++ if (!has_modifiers && ++ desc->objects[0].format_modifier != DRM_FORMAT_MOD_INVALID && ++ desc->objects[0].format_modifier != DRM_FORMAT_MOD_LINEAR) { ++ av_log(ctx, AV_LOG_ERROR, "The driver can only import DRM frame with invalid/linear modifier!\n"); ++ err = AVERROR_EXTERNAL; ++ goto fail; ++ } ++ + if (!(f = av_vk_frame_alloc())) { + av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n"); + err = AVERROR(ENOMEM); + goto fail; + } + +- f->tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; ++ f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT : VK_IMAGE_TILING_LINEAR; + + for (int i = 0; i < desc->nb_layers; i++) { + const int planes = desc->layers[i].nb_planes; +@@ -2861,7 +2887,7 @@ static int vulkan_map_from_drm_frame_des + .mipLevels = 1, + .arrayLayers = 1, + .flags = 0x0, +- .tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, ++ .tiling = f->tiling, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */ + .usage = VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT, +@@ -2894,7 +2920,7 @@ static int vulkan_map_from_drm_frame_des + }; + VkPhysicalDeviceImageFormatInfo2 fmt_props = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, +- .pNext = &props_ext, ++ .pNext = has_modifiers ? &props_ext : NULL, + .format = create_info.format, + .type = create_info.imageType, + .tiling = create_info.tiling, +@@ -3396,22 +3422,14 @@ fail: + static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) + { +- av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx; +- + switch (src->format) { + #if CONFIG_LIBDRM + #if CONFIG_VAAPI + case AV_PIX_FMT_VAAPI: +- if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) +- return vulkan_map_from_vaapi(hwfc, dst, src, flags); +- else +- return AVERROR(ENOSYS); ++ return vulkan_map_from_vaapi(hwfc, dst, src, flags); + #endif + case AV_PIX_FMT_DRM_PRIME: +- if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) +- return vulkan_map_from_drm(hwfc, dst, src, flags); +- else +- return AVERROR(ENOSYS); ++ return vulkan_map_from_drm(hwfc, dst, src, flags); + #endif + default: + return AVERROR(ENOSYS); +@@ -3454,13 +3472,14 @@ static int vulkan_map_to_drm(AVHWFramesC + VulkanFramesPriv *fp = hwfc->hwctx; + AVVulkanFramesContext *hwfctx = &fp->p; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); ++ const int has_modifiers = !!(p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS); + VkImageDrmFormatModifierPropertiesEXT drm_mod = { + .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT, + }; + VkSemaphoreWaitInfo wait_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, + .flags = 0x0, +- .semaphoreCount = planes, ++ .semaphoreCount = p->disable_multiplane ? planes : 1, + }; + + AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc)); +@@ -3481,10 +3500,16 @@ static int vulkan_map_to_drm(AVHWFramesC + if (err < 0) + goto end; + +- ret = vk->GetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0], +- &drm_mod); +- if (ret != VK_SUCCESS) { +- av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n"); ++ if (has_modifiers) { ++ ret = vk->GetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0], ++ &drm_mod); ++ if (ret != VK_SUCCESS) { ++ av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n"); ++ err = AVERROR_EXTERNAL; ++ goto end; ++ } ++ } else if (f->tiling != VK_IMAGE_TILING_LINEAR) { ++ av_log(hwfc, AV_LOG_ERROR, "The driver can only export linear images to DRM frame!\n"); + err = AVERROR_EXTERNAL; + goto end; + } +@@ -3506,7 +3531,7 @@ static int vulkan_map_to_drm(AVHWFramesC + + drm_desc->nb_objects++; + drm_desc->objects[i].size = f->size[i]; +- drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier; ++ drm_desc->objects[i].format_modifier = has_modifiers ? drm_mod.drmFormatModifier : 0x0; + } + + drm_desc->nb_layers = planes; +@@ -3517,6 +3542,14 @@ static int vulkan_map_to_drm(AVHWFramesC + }; + VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i]; + ++ if (drm_desc->nb_layers > 1 && !p->disable_multiplane) { ++ switch (i) { ++ case 0: sub.aspectMask = VK_IMAGE_ASPECT_PLANE_0_BIT; break; ++ case 1: sub.aspectMask = VK_IMAGE_ASPECT_PLANE_1_BIT; break; ++ case 2: sub.aspectMask = VK_IMAGE_ASPECT_PLANE_2_BIT; break; ++ } ++ } ++ + drm_desc->layers[i].format = vulkan_fmt_to_drm(plane_vkfmt); + drm_desc->layers[i].nb_planes = 1; + +@@ -3531,7 +3564,7 @@ static int vulkan_map_to_drm(AVHWFramesC + if (f->tiling == VK_IMAGE_TILING_OPTIMAL) + continue; + +- vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout); ++ vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[p->disable_multiplane ? i : 0], &sub, &layout); + drm_desc->layers[i].planes[0].offset = layout.offset; + drm_desc->layers[i].planes[0].pitch = layout.rowPitch; + +@@ -3583,21 +3616,13 @@ fail: + static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) + { +- av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx; +- + switch (dst->format) { + #if CONFIG_LIBDRM + case AV_PIX_FMT_DRM_PRIME: +- if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) +- return vulkan_map_to_drm(hwfc, dst, src, flags); +- else +- return AVERROR(ENOSYS); ++ return vulkan_map_to_drm(hwfc, dst, src, flags); + #if CONFIG_VAAPI + case AV_PIX_FMT_VAAPI: +- if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) +- return vulkan_map_to_vaapi(hwfc, dst, src, flags); +- else +- return AVERROR(ENOSYS); ++ return vulkan_map_to_vaapi(hwfc, dst, src, flags); + #endif + #endif + default: diff --git a/cross/ffmpeg7/patches/1067-jellyfin-0067-prefer-vulkan-device-with-higher-api-version.patch b/cross/ffmpeg7/patches/1067-jellyfin-0067-prefer-vulkan-device-with-higher-api-version.patch new file mode 100644 index 00000000000..6ee4533bd39 --- /dev/null +++ b/cross/ffmpeg7/patches/1067-jellyfin-0067-prefer-vulkan-device-with-higher-api-version.patch @@ -0,0 +1,118 @@ +Index: FFmpeg/libavutil/hwcontext_vulkan.c +=================================================================== +--- libavutil/hwcontext_vulkan.c ++++ libavutil/hwcontext_vulkan.c +@@ -950,7 +950,7 @@ static const char *vk_dev_type(enum VkPh + static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) + { + int err = 0, choice = -1; +- uint32_t num; ++ uint32_t num, api = 0; + VkResult ret; + VulkanDevicePriv *p = ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; +@@ -1017,63 +1017,78 @@ static int find_device(AVHWDeviceContext + + if (select->has_uuid) { + for (int i = 0; i < num; i++) { +- if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) { ++ if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE) ++ && prop[i].properties.apiVersion > api) { + choice = i; +- goto end; +- } ++ api = prop[i].properties.apiVersion; ++ } ++ } ++ if (choice == -1) { ++ av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n"); ++ err = AVERROR(ENODEV); + } +- av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n"); +- err = AVERROR(ENODEV); + goto end; + } else if ((p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) && select->has_drm) { + for (int i = 0; i < num; i++) { + if ((select->drm_major == drm_prop[i].primaryMajor && + select->drm_minor == drm_prop[i].primaryMinor) || + (select->drm_major == drm_prop[i].renderMajor && +- select->drm_minor == drm_prop[i].renderMinor)) { ++ select->drm_minor == drm_prop[i].renderMinor) ++ && prop[i].properties.apiVersion > api) { + choice = i; +- goto end; +- } ++ api = prop[i].properties.apiVersion; ++ } ++ } ++ if (choice == -1) { ++ av_log(ctx, AV_LOG_ERROR, "Unable to find device by given DRM node numbers %i:%i!\n", ++ select->drm_major, select->drm_minor); ++ err = AVERROR(ENODEV); + } +- av_log(ctx, AV_LOG_ERROR, "Unable to find device by given DRM node numbers %i:%i!\n", +- select->drm_major, select->drm_minor); +- err = AVERROR(ENODEV); + goto end; + } else if (select->name) { + av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name); + for (int i = 0; i < num; i++) { +- if (strstr(prop[i].properties.deviceName, select->name)) { ++ if (strstr(prop[i].properties.deviceName, select->name) ++ && prop[i].properties.apiVersion > api) { + choice = i; +- goto end; ++ api = prop[i].properties.apiVersion; + } + } +- av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n", +- select->name); +- err = AVERROR(ENODEV); ++ if (choice == -1) { ++ av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n", ++ select->name); ++ err = AVERROR(ENODEV); ++ } + goto end; + } else if (select->pci_device) { + av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device); + for (int i = 0; i < num; i++) { +- if (select->pci_device == prop[i].properties.deviceID) { ++ if (select->pci_device == prop[i].properties.deviceID ++ && prop[i].properties.apiVersion > api) { + choice = i; +- goto end; ++ api = prop[i].properties.apiVersion; + } + } +- av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n", +- select->pci_device); +- err = AVERROR(EINVAL); ++ if (choice == -1) { ++ av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n", ++ select->pci_device); ++ err = AVERROR(EINVAL); ++ } + goto end; + } else if (select->vendor_id) { + av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id); + for (int i = 0; i < num; i++) { +- if (select->vendor_id == prop[i].properties.vendorID) { ++ if (select->vendor_id == prop[i].properties.vendorID ++ && prop[i].properties.apiVersion > api) { + choice = i; +- goto end; ++ api = prop[i].properties.apiVersion; + } + } +- av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n", +- select->vendor_id); +- err = AVERROR(ENODEV); ++ if (choice == -1) { ++ av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n", ++ select->vendor_id); ++ err = AVERROR(ENODEV); ++ } + goto end; + } else { + if (select->index < num) { diff --git a/cross/ffmpeg7/patches/1068-jellyfin-0068-add-pgs-support-to-vulkan-overlay.patch b/cross/ffmpeg7/patches/1068-jellyfin-0068-add-pgs-support-to-vulkan-overlay.patch new file mode 100644 index 00000000000..11991e6cf5f --- /dev/null +++ b/cross/ffmpeg7/patches/1068-jellyfin-0068-add-pgs-support-to-vulkan-overlay.patch @@ -0,0 +1,363 @@ +Index: FFmpeg/libavfilter/vf_overlay_vulkan.c +=================================================================== +--- libavfilter/vf_overlay_vulkan.c ++++ libavfilter/vf_overlay_vulkan.c +@@ -32,9 +32,11 @@ typedef struct OverlayVulkanContext { + + int initialized; + FFVulkanPipeline pl; ++ FFVulkanPipeline pl_pass; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + FFVkSPIRVShader shd; ++ FFVkSPIRVShader shd_pass; + VkSampler sampler; + + /* Push constants / options */ +@@ -47,6 +49,10 @@ typedef struct OverlayVulkanContext { + int overlay_y; + int overlay_w; + int overlay_h; ++ ++ int opt_repeatlast; ++ int opt_shortest; ++ int opt_eof_action; + } OverlayVulkanContext; + + static const char overlay_noalpha[] = { +@@ -83,15 +89,16 @@ static const char overlay_alpha[] = { + static av_cold int init_filter(AVFilterContext *ctx) + { + int err; +- uint8_t *spv_data; ++ uint8_t *spv_data, *spv_data_pass; + size_t spv_len; + void *spv_opaque = NULL; ++ void *spv_opaque_pass = NULL; + OverlayVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA; + const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(s->vkctx.output_format); +- FFVkSPIRVShader *shd = &s->shd; ++ FFVkSPIRVShader *shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + +@@ -104,90 +111,143 @@ static av_cold int init_filter(AVFilterC + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST)); ++ ++ /* overlay_compute */ + RET(ff_vk_shader_init(&s->pl, &s->shd, "overlay_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); ++ { ++ shd = &s->shd; ++ ff_vk_shader_set_compute_sizes(shd, 32, 32, 1); ++ ++ GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); ++ GLSLC(1, ivec2 o_offset[3]; ); ++ GLSLC(1, ivec2 o_size[3]; ); ++ GLSLC(0, }; ); ++ GLSLC(0, ); ++ ++ ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts), ++ VK_SHADER_STAGE_COMPUTE_BIT); ++ ++ desc = (FFVulkanDescriptorSetBinding []) { ++ { ++ .name = "main_img", ++ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, ++ .dimensions = 2, ++ .elems = planes, ++ .stages = VK_SHADER_STAGE_COMPUTE_BIT, ++ .samplers = DUP_SAMPLER(s->sampler), ++ }, ++ { ++ .name = "overlay_img", ++ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, ++ .dimensions = 2, ++ .elems = planes, ++ .stages = VK_SHADER_STAGE_COMPUTE_BIT, ++ .samplers = DUP_SAMPLER(s->sampler), ++ }, ++ { ++ .name = "output_img", ++ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ++ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), ++ .mem_quali = "writeonly", ++ .dimensions = 2, ++ .elems = planes, ++ .stages = VK_SHADER_STAGE_COMPUTE_BIT, ++ }, ++ }; ++ ++ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 3, 0, 0)); ++ ++ GLSLD( overlay_noalpha ); ++ GLSLD( overlay_alpha ); ++ GLSLC(0, void main() ); ++ GLSLC(0, { ); ++ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); ++ GLSLF(1, int planes = %i; ,planes); ++ GLSLC(1, for (int i = 0; i < planes; i++) { ); ++ if (ialpha) ++ GLSLC(2, overlay_alpha_opaque(i, pos); ); ++ else ++ GLSLC(2, overlay_noalpha(i, pos); ); ++ GLSLC(1, } ); ++ GLSLC(0, } ); ++ ++ RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", ++ &spv_opaque)); ++ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); ++ ++ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); ++ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); ++ ++ s->opts.o_offset[0] = s->overlay_x; ++ s->opts.o_offset[1] = s->overlay_y; ++ s->opts.o_offset[2] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w; ++ s->opts.o_offset[3] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h; ++ s->opts.o_offset[4] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w; ++ s->opts.o_offset[5] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h; ++ ++ s->opts.o_size[0] = s->overlay_w; ++ s->opts.o_size[1] = s->overlay_h; ++ s->opts.o_size[2] = s->opts.o_size[0] >> pix_desc->log2_chroma_w; ++ s->opts.o_size[3] = s->opts.o_size[1] >> pix_desc->log2_chroma_h; ++ s->opts.o_size[4] = s->opts.o_size[0] >> pix_desc->log2_chroma_w; ++ s->opts.o_size[5] = s->opts.o_size[1] >> pix_desc->log2_chroma_h; ++ } + +- ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1); ++ /* overlay_compute_pass */ ++ RET(ff_vk_shader_init(&s->pl_pass, &s->shd_pass, "overlay_compute_pass", ++ VK_SHADER_STAGE_COMPUTE_BIT, 0)); ++ { ++ shd = &s->shd_pass; ++ ff_vk_shader_set_compute_sizes(shd, 32, 32, 1); ++ ++ desc = (FFVulkanDescriptorSetBinding []) { ++ { ++ .name = "main_img", ++ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, ++ .dimensions = 2, ++ .elems = planes, ++ .stages = VK_SHADER_STAGE_COMPUTE_BIT, ++ .samplers = DUP_SAMPLER(s->sampler), ++ }, ++ { ++ .name = "output_img", ++ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ++ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), ++ .mem_quali = "writeonly", ++ .dimensions = 2, ++ .elems = planes, ++ .stages = VK_SHADER_STAGE_COMPUTE_BIT, ++ }, ++ }; ++ ++ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_pass, shd, desc, 2, 0, 0)); ++ ++ GLSLC(0, void main() ); ++ GLSLC(0, { ); ++ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); ++ GLSLF(1, int planes = %i; ,planes); ++ GLSLC(1, for (int i = 0; i < planes; i++) { ); ++ GLSLC(2, vec4 res = texture(main_img[i], pos); ); ++ GLSLC(2, imageStore(output_img[i], pos, res); ); ++ GLSLC(1, } ); ++ GLSLC(0, } ); ++ ++ RET(spv->compile_shader(spv, ctx, shd, &spv_data_pass, &spv_len, "main", ++ &spv_opaque)); ++ RET(ff_vk_shader_create(vkctx, shd, spv_data_pass, spv_len, "main")); + +- GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); +- GLSLC(1, ivec2 o_offset[3]; ); +- GLSLC(1, ivec2 o_size[3]; ); +- GLSLC(0, }; ); +- GLSLC(0, ); +- +- ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts), +- VK_SHADER_STAGE_COMPUTE_BIT); +- +- desc = (FFVulkanDescriptorSetBinding []) { +- { +- .name = "main_img", +- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, +- .dimensions = 2, +- .elems = planes, +- .stages = VK_SHADER_STAGE_COMPUTE_BIT, +- .samplers = DUP_SAMPLER(s->sampler), +- }, +- { +- .name = "overlay_img", +- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, +- .dimensions = 2, +- .elems = planes, +- .stages = VK_SHADER_STAGE_COMPUTE_BIT, +- .samplers = DUP_SAMPLER(s->sampler), +- }, +- { +- .name = "output_img", +- .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, +- .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), +- .mem_quali = "writeonly", +- .dimensions = 2, +- .elems = planes, +- .stages = VK_SHADER_STAGE_COMPUTE_BIT, +- }, +- }; +- +- RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 3, 0, 0)); +- +- GLSLD( overlay_noalpha ); +- GLSLD( overlay_alpha ); +- GLSLC(0, void main() ); +- GLSLC(0, { ); +- GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); +- GLSLF(1, int planes = %i; ,planes); +- GLSLC(1, for (int i = 0; i < planes; i++) { ); +- if (ialpha) +- GLSLC(2, overlay_alpha_opaque(i, pos); ); +- else +- GLSLC(2, overlay_noalpha(i, pos); ); +- GLSLC(1, } ); +- GLSLC(0, } ); +- +- RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", +- &spv_opaque)); +- RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); +- +- RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); +- RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); +- +- s->opts.o_offset[0] = s->overlay_x; +- s->opts.o_offset[1] = s->overlay_y; +- s->opts.o_offset[2] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w; +- s->opts.o_offset[3] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h; +- s->opts.o_offset[4] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w; +- s->opts.o_offset[5] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h; +- +- s->opts.o_size[0] = s->overlay_w; +- s->opts.o_size[1] = s->overlay_h; +- s->opts.o_size[2] = s->opts.o_size[0] >> pix_desc->log2_chroma_w; +- s->opts.o_size[3] = s->opts.o_size[1] >> pix_desc->log2_chroma_h; +- s->opts.o_size[4] = s->opts.o_size[0] >> pix_desc->log2_chroma_w; +- s->opts.o_size[5] = s->opts.o_size[1] >> pix_desc->log2_chroma_h; ++ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl_pass, shd)); ++ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl_pass)); ++ } + + s->initialized = 1; + + fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); ++ if (spv_opaque_pass) ++ spv->free_shader(spv, &spv_opaque_pass); + if (spv) + spv->uninit(&spv); + +@@ -209,22 +269,11 @@ static int overlay_vulkan_blend(FFFrameS + if (err < 0) + goto fail; + +- if (!input_main || !input_overlay) +- return 0; +- +- if (!s->initialized) { +- AVHWFramesContext *main_fc = (AVHWFramesContext*)input_main->hw_frames_ctx->data; +- AVHWFramesContext *overlay_fc = (AVHWFramesContext*)input_overlay->hw_frames_ctx->data; +- if (main_fc->sw_format != overlay_fc->sw_format) { +- av_log(ctx, AV_LOG_ERROR, "Mismatching sw formats!\n"); +- return AVERROR(EINVAL); +- } +- +- s->overlay_w = input_overlay->width; +- s->overlay_h = input_overlay->height; ++ if (!input_main) ++ return AVERROR_BUG; + ++ if (!s->initialized) + RET(init_filter(ctx)); +- } + + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { +@@ -232,9 +281,13 @@ static int overlay_vulkan_blend(FFFrameS + goto fail; + } + +- RET(ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->pl, +- out, (AVFrame *[]){ input_main, input_overlay }, 2, +- s->sampler, &s->opts, sizeof(s->opts))); ++ if (input_overlay) ++ RET(ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->pl, ++ out, (AVFrame *[]){ input_main, input_overlay }, 2, ++ s->sampler, &s->opts, sizeof(s->opts))); ++ else /* passthrough */ ++ RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl_pass, out, input_main, ++ s->sampler, NULL, 0)); + + err = av_frame_copy_props(out, input_main); + if (err < 0) +@@ -252,6 +305,18 @@ static int overlay_vulkan_config_output( + int err; + AVFilterContext *avctx = outlink->src; + OverlayVulkanContext *s = avctx->priv; ++ AVFilterLink *inlink = avctx->inputs[0]; ++ AVFilterLink *inlink_overlay = avctx->inputs[1]; ++ AVHWFramesContext *main_fc = (AVHWFramesContext*)inlink->hw_frames_ctx->data; ++ AVHWFramesContext *overlay_fc = (AVHWFramesContext*)inlink_overlay->hw_frames_ctx->data; ++ ++ if (main_fc->sw_format != overlay_fc->sw_format) { ++ av_log(avctx, AV_LOG_ERROR, "Mismatching sw formats!\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ s->overlay_w = inlink_overlay->w; ++ s->overlay_h = inlink_overlay->h; + + err = ff_vk_filter_config_output(outlink); + if (err < 0) +@@ -261,6 +326,11 @@ static int overlay_vulkan_config_output( + if (err < 0) + return err; + ++ s->fs.opt_repeatlast = s->opt_repeatlast; ++ s->fs.opt_shortest = s->opt_shortest; ++ s->fs.opt_eof_action = s->opt_eof_action; ++ s->fs.time_base = outlink->time_base = inlink->time_base; ++ + return ff_framesync_configure(&s->fs); + } + +@@ -288,7 +358,9 @@ static void overlay_vulkan_uninit(AVFilt + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); ++ ff_vk_pipeline_free(vkctx, &s->pl_pass); + ff_vk_shader_free(vkctx, &s->shd); ++ ff_vk_shader_free(vkctx, &s->shd_pass); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, +@@ -305,6 +377,14 @@ static void overlay_vulkan_uninit(AVFilt + static const AVOption overlay_vulkan_options[] = { + { "x", "Set horizontal offset", OFFSET(overlay_x), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, .flags = FLAGS }, + { "y", "Set vertical offset", OFFSET(overlay_y), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, .flags = FLAGS }, ++ { "eof_action", "Action to take when encountering EOF from secondary input ", ++ OFFSET(opt_eof_action), AV_OPT_TYPE_INT, { .i64 = EOF_ACTION_REPEAT }, ++ EOF_ACTION_REPEAT, EOF_ACTION_PASS, .flags = FLAGS, .unit = "eof_action" }, ++ { "repeat", "Repeat the previous frame.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_REPEAT }, .flags = FLAGS, .unit = "eof_action" }, ++ { "endall", "End both streams.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_ENDALL }, .flags = FLAGS, .unit = "eof_action" }, ++ { "pass", "Pass through the main input.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_PASS }, .flags = FLAGS, .unit = "eof_action" }, ++ { "shortest", "force termination when the shortest input terminates", OFFSET(opt_shortest), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, ++ { "repeatlast", "repeat overlay of the last overlay frame", OFFSET(opt_repeatlast), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, + { NULL }, + }; + diff --git a/cross/ffmpeg7/patches/1069-jellyfin-0069-add-fixes-x265-build-from-upstream.patch b/cross/ffmpeg7/patches/1069-jellyfin-0069-add-fixes-x265-build-from-upstream.patch new file mode 100644 index 00000000000..9839bde29e6 --- /dev/null +++ b/cross/ffmpeg7/patches/1069-jellyfin-0069-add-fixes-x265-build-from-upstream.patch @@ -0,0 +1,79 @@ +Index: FFmpeg/libavcodec/libx265.c +=================================================================== +--- libavcodec/libx265.c ++++ libavcodec/libx265.c +@@ -646,7 +646,13 @@ static int libx265_encode_frame(AVCodecC + { + libx265Context *ctx = avctx->priv_data; + x265_picture x265pic; +- x265_picture x265pic_out = { 0 }; ++#if X265_BUILD >= 210 ++ x265_picture x265pic_layers_out[MAX_SCALABLE_LAYERS]; ++ x265_picture* x265pic_lyrptr_out[MAX_SCALABLE_LAYERS]; ++#else ++ x265_picture x265pic_solo_out = { 0 }; ++#endif ++ x265_picture* x265pic_out; + x265_nal *nal; + x265_sei *sei; + uint8_t *dst; +@@ -764,8 +770,16 @@ static int libx265_encode_frame(AVCodecC + } + } + ++#if X265_BUILD >= 210 ++ for (i = 0; i < MAX_SCALABLE_LAYERS; i++) ++ x265pic_lyrptr_out[i] = &x265pic_layers_out[i]; ++ ++ ret = ctx->api->encoder_encode(ctx->encoder, &nal, &nnal, ++ pic ? &x265pic : NULL, x265pic_lyrptr_out); ++#else + ret = ctx->api->encoder_encode(ctx->encoder, &nal, &nnal, +- pic ? &x265pic : NULL, &x265pic_out); ++ pic ? &x265pic : NULL, &x265pic_solo_out); ++#endif + + for (i = 0; i < sei->numPayloads; i++) + av_free(sei->payloads[i].payload); +@@ -795,10 +809,16 @@ static int libx265_encode_frame(AVCodecC + pkt->flags |= AV_PKT_FLAG_KEY; + } + +- pkt->pts = x265pic_out.pts; +- pkt->dts = x265pic_out.dts; ++#if X265_BUILD >= 210 ++ x265pic_out = x265pic_lyrptr_out[0]; ++#else ++ x265pic_out = &x265pic_solo_out; ++#endif ++ ++ pkt->pts = x265pic_out->pts; ++ pkt->dts = x265pic_out->dts; + +- switch (x265pic_out.sliceType) { ++ switch (x265pic_out->sliceType) { + case X265_TYPE_IDR: + case X265_TYPE_I: + pict_type = AV_PICTURE_TYPE_I; +@@ -816,16 +836,16 @@ static int libx265_encode_frame(AVCodecC + } + + #if X265_BUILD >= 130 +- if (x265pic_out.sliceType == X265_TYPE_B) ++ if (x265pic_out->sliceType == X265_TYPE_B) + #else +- if (x265pic_out.frameData.sliceType == 'b') ++ if (x265pic_out->frameData.sliceType == 'b') + #endif + pkt->flags |= AV_PKT_FLAG_DISPOSABLE; + +- ff_side_data_set_encoder_stats(pkt, x265pic_out.frameData.qp * FF_QP2LAMBDA, NULL, 0, pict_type); ++ ff_side_data_set_encoder_stats(pkt, x265pic_out->frameData.qp * FF_QP2LAMBDA, NULL, 0, pict_type); + +- if (x265pic_out.userData) { +- int idx = (int)(intptr_t)x265pic_out.userData - 1; ++ if (x265pic_out->userData) { ++ int idx = (int)(intptr_t)x265pic_out->userData - 1; + ReorderedData *rd = &ctx->rd[idx]; + + pkt->duration = rd->duration; diff --git a/cross/ffmpeg7/patches/1070-jellyfin-0070-fix-yuv420p-to-p01x-unscaled-conversion.patch b/cross/ffmpeg7/patches/1070-jellyfin-0070-fix-yuv420p-to-p01x-unscaled-conversion.patch new file mode 100644 index 00000000000..3983e764ea2 --- /dev/null +++ b/cross/ffmpeg7/patches/1070-jellyfin-0070-fix-yuv420p-to-p01x-unscaled-conversion.patch @@ -0,0 +1,217 @@ +Index: FFmpeg/libswscale/swscale_unscaled.c +=================================================================== +--- libswscale/swscale_unscaled.c ++++ libswscale/swscale_unscaled.c +@@ -352,7 +352,7 @@ static int planar8ToP01xleWrapper(SwsCon + const uint8_t *tsrc0 = src[0]; + for (x = c->srcW; x > 0; x--) { + t = *tsrc0++; +- output_pixel(tdstY++, t | (t << 8)); ++ output_pixel(tdstY++, (t << 8)); + } + src[0] += srcStride[0]; + dstY += dstStride[0] / 2; +@@ -363,9 +363,9 @@ static int planar8ToP01xleWrapper(SwsCon + const uint8_t *tsrc2 = src[2]; + for (x = c->srcW / 2; x > 0; x--) { + t = *tsrc1++; +- output_pixel(tdstUV++, t | (t << 8)); ++ output_pixel(tdstUV++, (t << 8)); + t = *tsrc2++; +- output_pixel(tdstUV++, t | (t << 8)); ++ output_pixel(tdstUV++, (t << 8)); + } + src[1] += srcStride[1]; + src[2] += srcStride[2]; +Index: FFmpeg/tests/ref/fate/filter-pixdesc-p010le +=================================================================== +--- tests/ref/fate/filter-pixdesc-p010le ++++ tests/ref/fate/filter-pixdesc-p010le +@@ -1 +1 @@ +-pixdesc-p010le 7b4a503997eb4e14cba80ee52db85e39 ++pixdesc-p010le 0268fd44f63022e21ada69704534fc85 +Index: FFmpeg/tests/ref/fate/filter-pixdesc-p016le +=================================================================== +--- tests/ref/fate/filter-pixdesc-p016le ++++ tests/ref/fate/filter-pixdesc-p016le +@@ -1 +1 @@ +-pixdesc-p016le ed04897de0a6788bb3458e7365f10d36 ++pixdesc-p016le 0268fd44f63022e21ada69704534fc85 +Index: FFmpeg/tests/ref/fate/filter-pixfmts-copy +=================================================================== +--- tests/ref/fate/filter-pixfmts-copy ++++ tests/ref/fate/filter-pixfmts-copy +@@ -63,11 +63,11 @@ nv21 335d85c9af6110f26ae9 + nv24 f30fc8d0ac40af69e119ea919a314572 + nv42 29a212f70f8780fe0eb99abcae81894d + p010be 7f9842d6015026136bad60d03c035cc3 +-p010le c453421b9f726bdaf2bacf59a492c43b ++p010le 1929db89609c4b8c6d9c9030a9e7843d + p012be 7f9842d6015026136bad60d03c035cc3 + p012le 1929db89609c4b8c6d9c9030a9e7843d + p016be 7f9842d6015026136bad60d03c035cc3 +-p016le c453421b9f726bdaf2bacf59a492c43b ++p016le 1929db89609c4b8c6d9c9030a9e7843d + p210be 847e9c6e292b17349e69570829252b3e + p210le c06e4b76cf504e908128081f92b60ce2 + p212be 4df641ed058718ad27a01889f923b04f +Index: FFmpeg/tests/ref/fate/filter-pixfmts-crop +=================================================================== +--- tests/ref/fate/filter-pixfmts-crop ++++ tests/ref/fate/filter-pixfmts-crop +@@ -61,11 +61,11 @@ nv21 1bcfc197f4fb95de85ba + nv24 514c8f12082f0737e558778cbe7de258 + nv42 ece9baae1c5de579dac2c66a89e08ef3 + p010be 8b2de2eb6b099bbf355bfc55a0694ddc +-p010le 373b50c766dfd0a8e79c9a73246d803a ++p010le a1e4f713e145dfc465bfe0cc77096a03 + p012be 8b2de2eb6b099bbf355bfc55a0694ddc + p012le a1e4f713e145dfc465bfe0cc77096a03 + p016be 8b2de2eb6b099bbf355bfc55a0694ddc +-p016le 373b50c766dfd0a8e79c9a73246d803a ++p016le a1e4f713e145dfc465bfe0cc77096a03 + p210be 2947f43774352ef61f9e83777548c7c5 + p210le 74fcd5a32eee687eebe002c884103963 + p212be c983aa869bae2c70e7b01810902ffc05 +Index: FFmpeg/tests/ref/fate/filter-pixfmts-field +=================================================================== +--- tests/ref/fate/filter-pixfmts-field ++++ tests/ref/fate/filter-pixfmts-field +@@ -63,11 +63,11 @@ nv21 7294574037cc7f9373ef + nv24 3b100fb527b64ee2b2d7120da573faf5 + nv42 1841ce853152d86b27c130f319ea0db2 + p010be a0311a09bba7383553267d2b3b9c075e +-p010le ee09a18aefa3ebe97715b3a7312cb8ff ++p010le f1cc90d292046109a626db2da9f0f9b6 + p012be a0311a09bba7383553267d2b3b9c075e + p012le f1cc90d292046109a626db2da9f0f9b6 + p016be a0311a09bba7383553267d2b3b9c075e +-p016le ee09a18aefa3ebe97715b3a7312cb8ff ++p016le f1cc90d292046109a626db2da9f0f9b6 + p210be 58d46f566ab28e3bcfb715c7aa53cf58 + p210le 8d68f7655a3d76f2f8436bd25beb3973 + p212be a8901966c5bc111e9e62d3989b0b666b +Index: FFmpeg/tests/ref/fate/filter-pixfmts-hflip +=================================================================== +--- tests/ref/fate/filter-pixfmts-hflip ++++ tests/ref/fate/filter-pixfmts-hflip +@@ -61,11 +61,11 @@ nv21 9f10dfff8963dc327d33 + nv24 f0c5b2f42970f8d4003621d8857a872f + nv42 4dcf9aec82b110712b396a8b365dcb13 + p010be 744b13e44d39e1ff7588983fa03e0101 +-p010le a50b160346ab94f55a425065b57006f0 ++p010le aeb31f50c66f376b0530c7bb6287212b + p012be 744b13e44d39e1ff7588983fa03e0101 + p012le aeb31f50c66f376b0530c7bb6287212b + p016be 744b13e44d39e1ff7588983fa03e0101 +-p016le a50b160346ab94f55a425065b57006f0 ++p016le aeb31f50c66f376b0530c7bb6287212b + p210be 6f5a76d6467b86d55fe5589d3af8a7ea + p210le b6982912b2376371edea4fccf99fe40c + p212be 9ffa4664543233ec7c9b99a627cb7003 +Index: FFmpeg/tests/ref/fate/filter-pixfmts-il +=================================================================== +--- tests/ref/fate/filter-pixfmts-il ++++ tests/ref/fate/filter-pixfmts-il +@@ -63,11 +63,11 @@ nv21 ab586d8781246b5a32d8 + nv24 554153c71d142e3fd8e40b7dcaaec229 + nv42 d699724c8deaeb4f87faf2766512eec3 + p010be 3df51286ef66b53e3e283dbbab582263 +-p010le eadcd8241e97e35b2b47d5eb2eaea6cd ++p010le 38945445b360fa737e9e37257393e823 + p012be 3df51286ef66b53e3e283dbbab582263 + p012le 38945445b360fa737e9e37257393e823 + p016be 3df51286ef66b53e3e283dbbab582263 +-p016le eadcd8241e97e35b2b47d5eb2eaea6cd ++p016le 38945445b360fa737e9e37257393e823 + p210be 29ec4e8912d456cd15203a96487c42e8 + p210le c695064fb9f2cc4e35957d4d649cc281 + p212be ee6f88801823da3d617fb9e073e88068 +Index: FFmpeg/tests/ref/fate/filter-pixfmts-null +=================================================================== +--- tests/ref/fate/filter-pixfmts-null ++++ tests/ref/fate/filter-pixfmts-null +@@ -63,11 +63,11 @@ nv21 335d85c9af6110f26ae9 + nv24 f30fc8d0ac40af69e119ea919a314572 + nv42 29a212f70f8780fe0eb99abcae81894d + p010be 7f9842d6015026136bad60d03c035cc3 +-p010le c453421b9f726bdaf2bacf59a492c43b ++p010le 1929db89609c4b8c6d9c9030a9e7843d + p012be 7f9842d6015026136bad60d03c035cc3 + p012le 1929db89609c4b8c6d9c9030a9e7843d + p016be 7f9842d6015026136bad60d03c035cc3 +-p016le c453421b9f726bdaf2bacf59a492c43b ++p016le 1929db89609c4b8c6d9c9030a9e7843d + p210be 847e9c6e292b17349e69570829252b3e + p210le c06e4b76cf504e908128081f92b60ce2 + p212be 4df641ed058718ad27a01889f923b04f +Index: FFmpeg/tests/ref/fate/filter-pixfmts-pad +=================================================================== +--- tests/ref/fate/filter-pixfmts-pad ++++ tests/ref/fate/filter-pixfmts-pad +@@ -28,9 +28,9 @@ nv16 d3a50501d2ea8535489f + nv21 0fdeb2cdd56cf5a7147dc273456fa217 + nv24 193b9eadcc06ad5081609f76249b3e47 + nv42 1738ad3c31c6c16e17679f5b09ce4677 +-p010le fbbc23cc1d764a5e6fb71883d985f3ed ++p010le 3a92c1bd3e9de050bf6abcc3fd911ab7 + p012le 3a92c1bd3e9de050bf6abcc3fd911ab7 +-p016le fbbc23cc1d764a5e6fb71883d985f3ed ++p016le 3a92c1bd3e9de050bf6abcc3fd911ab7 + p210le 680912c059de39c3401cac856bd1b0c1 + p212le a2f88017bcce2383ba60bc4872e639ba + p216le 8718662e226a4581561e7bb532af2d83 +Index: FFmpeg/tests/ref/fate/filter-pixfmts-scale +=================================================================== +--- tests/ref/fate/filter-pixfmts-scale ++++ tests/ref/fate/filter-pixfmts-scale +@@ -63,11 +63,11 @@ nv21 c74bb1c10dbbdee8a1f6 + nv24 2aa6e805bf6d4179ed8d7dea37d75db3 + nv42 80714d1eb2d8bcaeab3abc3124df1abd + p010be 1d6726d94bf1385996a9a9840dd0e878 +-p010le 4b316f2b9e18972299beb73511278fa8 ++p010le 5d436e6b35292a0e356d81f37f989b66 + p012be e4dc7ccd654c2d74fde9c7b2711d960b + p012le cd4b6bdcd8967fc0e869ce3b8a014133 + p016be 31e204018cbb53f8988c4e1174ea8ce9 +-p016le d5afe557f492a09317e525d7cb782f5b ++p016le 6832661b5fe5f9a7a882f482a881b679 + p210be 2cc6dfcf5e006c8ed5238988a06fd45e + p210le 04efb8f14a9d98417af40954a06aa187 + p212be 611c6e267e7a694ce89467779e44060b +Index: FFmpeg/tests/ref/fate/filter-pixfmts-transpose +=================================================================== +--- tests/ref/fate/filter-pixfmts-transpose ++++ tests/ref/fate/filter-pixfmts-transpose +@@ -60,11 +60,11 @@ nv21 292adaf5271c5c8516b7 + nv24 ea9de8b47faed722ee40182f89489beb + nv42 636af6cd6a4f3ac5edc0fc3ce3c56d63 + p010be ad0de2cc9bff81688b182a870fcf7000 +-p010le e7ff5143595021246733ce6bd0a769e8 ++p010le 024ef1cf56a4872f202b96a6a4bbf10a + p012be ad0de2cc9bff81688b182a870fcf7000 + p012le 024ef1cf56a4872f202b96a6a4bbf10a + p016be ad0de2cc9bff81688b182a870fcf7000 +-p016le e7ff5143595021246733ce6bd0a769e8 ++p016le 024ef1cf56a4872f202b96a6a4bbf10a + p410be 8b3e0ccb31b6a20ff00a29253fb2dec3 + p410le 4e5f78dfccda9a6387e81354a56a033a + p412be 88e4578d2c6d99399a6cf1db9e4c0553 +Index: FFmpeg/tests/ref/fate/filter-pixfmts-vflip +=================================================================== +--- tests/ref/fate/filter-pixfmts-vflip ++++ tests/ref/fate/filter-pixfmts-vflip +@@ -63,11 +63,11 @@ nv21 2909feacd27bebb080c8 + nv24 334420b9d3df84499d2ca16bb66eed2b + nv42 ba4063e2795c17fea3c8a646b01fd1f5 + p010be 06e9354b6e0e38ba41736352cedc0bd5 +-p010le fd18d322bffbf5816902c13102872e22 ++p010le cdf6a3c38d9d4e3f079fa369e1dda662 + p012be 06e9354b6e0e38ba41736352cedc0bd5 + p012le cdf6a3c38d9d4e3f079fa369e1dda662 + p016be 06e9354b6e0e38ba41736352cedc0bd5 +-p016le fd18d322bffbf5816902c13102872e22 ++p016le cdf6a3c38d9d4e3f079fa369e1dda662 + p210be ca886ab2b3ea5c153f1954b3709f7249 + p210le d71c2d4e483030ffd87fa6a68c83fce0 + p212be 1734e5840d4e75defe7a28683c3f8856 diff --git a/cross/ffmpeg7/patches/1071-jellyfin-0071-allow-vt-sw-decoder-for-every-codec.patch b/cross/ffmpeg7/patches/1071-jellyfin-0071-allow-vt-sw-decoder-for-every-codec.patch new file mode 100644 index 00000000000..67a3fd7341a --- /dev/null +++ b/cross/ffmpeg7/patches/1071-jellyfin-0071-allow-vt-sw-decoder-for-every-codec.patch @@ -0,0 +1,15 @@ +Index: FFmpeg/libavcodec/videotoolbox.c +=================================================================== +--- libavcodec/videotoolbox.c ++++ libavcodec/videotoolbox.c +@@ -812,9 +812,7 @@ static CFDictionaryRef videotoolbox_deco + &kCFTypeDictionaryValueCallBacks); + + CFDictionarySetValue(config_info, +- codec_type == kCMVideoCodecType_HEVC ? +- kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder : +- kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder, ++ kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder, + kCFBooleanTrue); + + avc_info = CFDictionaryCreateMutable(kCFAllocatorDefault, diff --git a/cross/ffmpeg7/patches/1072-jellyfin-0072-add-bwdif-videotoolbox-filter.patch b/cross/ffmpeg7/patches/1072-jellyfin-0072-add-bwdif-videotoolbox-filter.patch new file mode 100644 index 00000000000..609be1c4adc --- /dev/null +++ b/cross/ffmpeg7/patches/1072-jellyfin-0072-add-bwdif-videotoolbox-filter.patch @@ -0,0 +1,765 @@ +Index: FFmpeg/configure +=================================================================== +--- configure ++++ configure +@@ -3821,6 +3821,7 @@ boxblur_opencl_filter_deps="opencl gpl" + bs2b_filter_deps="libbs2b" + bwdif_cuda_filter_deps="ffnvcodec" + bwdif_cuda_filter_deps_any="cuda_nvcc cuda_llvm" ++bwdif_videotoolbox_filter_deps="metal corevideo videotoolbox" + bwdif_vulkan_filter_deps="vulkan spirv_compiler" + chromaber_vulkan_filter_deps="vulkan spirv_compiler" + color_vulkan_filter_deps="vulkan spirv_compiler" +Index: FFmpeg/libavfilter/Makefile +=================================================================== +--- libavfilter/Makefile ++++ libavfilter/Makefile +@@ -218,6 +218,10 @@ OBJS-$(CONFIG_BOXBLUR_OPENCL_FILTER) + OBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o bwdifdsp.o yadif_common.o + OBJS-$(CONFIG_BWDIF_CUDA_FILTER) += vf_bwdif_cuda.o vf_bwdif_cuda.ptx.o \ + yadif_common.o ++OBJS-$(CONFIG_BWDIF_VIDEOTOOLBOX_FILTER) += vf_bwdif_videotoolbox.o \ ++ metal/vf_bwdif_videotoolbox.metallib.o \ ++ metal/utils.o \ ++ yadif_common.o + OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vf_bwdif_vulkan.o yadif_common.o vulkan.o vulkan_filter.o + OBJS-$(CONFIG_CAS_FILTER) += vf_cas.o + OBJS-$(CONFIG_CCREPACK_FILTER) += vf_ccrepack.o +Index: FFmpeg/libavfilter/allfilters.c +=================================================================== +--- libavfilter/allfilters.c ++++ libavfilter/allfilters.c +@@ -201,6 +201,7 @@ extern const AVFilter ff_vf_boxblur; + extern const AVFilter ff_vf_boxblur_opencl; + extern const AVFilter ff_vf_bwdif; + extern const AVFilter ff_vf_bwdif_cuda; ++extern const AVFilter ff_vf_bwdif_videotoolbox; + extern const AVFilter ff_vf_bwdif_vulkan; + extern const AVFilter ff_vf_cas; + extern const AVFilter ff_vf_ccrepack; +Index: FFmpeg/libavfilter/metal/vf_bwdif_videotoolbox.metal +=================================================================== +--- /dev/null ++++ libavfilter/metal/vf_bwdif_videotoolbox.metal +@@ -0,0 +1,271 @@ ++/* bwdif.metal ++ ++ Copyright (c) 2003-2024 HandBrake Team ++ Copyright (c) 2019 Philip Langdale ++ ++ port of FFmpeg vf_bwdif_cuda. ++ ++ This file is part of the HandBrake source code ++ Homepage: . ++ It may be used under the terms of the GNU General Public License v2. ++ For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html ++ */ ++ ++#include ++#include ++#include ++ ++using namespace metal; ++ ++/* ++ * Parameters ++ */ ++ ++ ++struct params { ++ uint channels; ++ uint parity; ++ uint tff; ++ bool is_second_field; ++ bool skip_spatial_check; ++ bool is_field_end; ++}; ++ ++/* ++ * Texture access helpers ++ */ ++ ++#define accesstype access::sample ++constexpr sampler s(coord::pixel); ++ ++template ++T tex2D(texture2d tex, int x, int y) ++{ ++ return tex.sample(s, float2(x, y)).x; ++} ++ ++template <> ++float2 tex2D(texture2d tex, int x, int y) ++{ ++ return tex.sample(s, float2(x, y)).xy; ++} ++ ++template ++T tex2D(texture2d tex, uint x, uint y) ++{ ++ return tex.read(uint2(x, y)).x; ++} ++ ++template <> ++float2 tex2D(texture2d tex, uint x, uint y) ++{ ++ return tex.read(uint2(x, y)).xy; ++} ++ ++/* ++ * Bwdiff helpers ++ */ ++ ++constant static const float coef_lf[2] = { 4309, 213 }; ++constant static const float coef_hf[3] = { 5570, 3801, 1016 }; ++constant static const float coef_sp[2] = { 5077, 981 }; ++ ++template ++T filter_intra(T cur_prefs3, T cur_prefs, ++ T cur_mrefs, T cur_mrefs3) ++{ ++ T final = (coef_sp[0] * (cur_mrefs + cur_prefs) - ++ coef_sp[1] * (cur_mrefs3 + cur_prefs3)) / (1 << 13); ++ return saturate(final); ++} ++ ++template ++T filter_temp(T cur_prefs3, T cur_prefs, T cur_mrefs, T cur_mrefs3, ++ T prev2_prefs4, T prev2_prefs2, T prev2_0, T prev2_mrefs2, T prev2_mrefs4, ++ T prev_prefs, T prev_mrefs, T next_prefs, T next_mrefs, ++ T next2_prefs4, T next2_prefs2, T next2_0, T next2_mrefs2, T next2_mrefs4) ++{ ++ T final; ++ ++ T c = cur_mrefs; ++ T d = (prev2_0 + next2_0) / 2; ++ T e = cur_prefs; ++ ++ T temporal_diff0 = abs(prev2_0 - next2_0); ++ T temporal_diff1 = (abs(prev_mrefs - c) + abs(prev_prefs - e)) / 2; ++ T temporal_diff2 = (abs(next_mrefs - c) + abs(next_prefs - e)) / 2; ++ T diff = max3(temporal_diff0 / 2, temporal_diff1, temporal_diff2); ++ ++ if (!diff) { ++ final = d; ++ } else { ++ T b = ((prev2_mrefs2 + next2_mrefs2) / 2) - c; ++ T f = ((prev2_prefs2 + next2_prefs2) / 2) - e; ++ T dc = d - c; ++ T de = d - e; ++ T mmax = max3(de, dc, min(b, f)); ++ T mmin = min3(de, dc, max(b, f)); ++ diff = max3(diff, mmin, -mmax); ++ ++ float interpol; ++ if (abs(c - e) > temporal_diff0) { ++ interpol = (((coef_hf[0] * (prev2_0 + next2_0) ++ - coef_hf[1] * (prev2_mrefs2 + next2_mrefs2 + prev2_prefs2 + next2_prefs2) ++ + coef_hf[2] * (prev2_mrefs4 + next2_mrefs4 + prev2_prefs4 + next2_mrefs4)) / 4) ++ + coef_lf[0] * (c + e) - coef_lf[1] * (cur_mrefs3 + cur_prefs3)) / (1 << 13); ++ } else { ++ interpol = (coef_sp[0] * (c + e) - coef_sp[1] * (cur_mrefs3 + cur_prefs3)) / (1 << 13); ++ } ++ ++ if (interpol > d + diff) { ++ interpol = d + diff; ++ } else if (interpol < d - diff) { ++ interpol = d - diff; ++ } ++ final = saturate(interpol); ++ } ++ ++ return final; ++} ++ ++template ++T bwdif_single(texture2d dst, ++ texture2d prev, ++ texture2d cur, ++ texture2d next, ++ int parity, int tff, ++ bool is_field_end, bool is_second_field, ++ ushort2 pos) ++{ ++ // Don't modify the primary field ++ if (pos.y % 2 == parity) { ++ return tex2D(cur, pos.x, pos.y); ++ } ++ ++ T cur_prefs3 = tex2D(cur, pos.x, pos.y + 3); ++ T cur_prefs = tex2D(cur, pos.x, pos.y + 1); ++ T cur_mrefs = tex2D(cur, pos.x, pos.y - 1); ++ T cur_mrefs3 = tex2D(cur, pos.x, pos.y - 3); ++ ++ if (is_field_end) { ++ return filter_intra(cur_prefs3, cur_prefs, cur_mrefs, cur_mrefs3); ++ } ++ ++ // Calculate temporal prediction ++ texture2d prev2 = prev; ++ texture2d prev1 = is_second_field ? cur : prev; ++ texture2d next1 = is_second_field ? next : cur; ++ texture2d next2 = next; ++ ++ T prev2_prefs4 = tex2D(prev2, pos.x, pos.y+ 4); ++ T prev2_prefs2 = tex2D(prev2, pos.x, pos.y + 2); ++ T prev2_0 = tex2D(prev2, pos.x, pos.y + 0); ++ T prev2_mrefs2 = tex2D(prev2, pos.x, pos.y - 2); ++ T prev2_mrefs4 = tex2D(prev2, pos.x, pos.y - 4); ++ T prev_prefs = tex2D(prev1, pos.x, pos.y + 1); ++ T prev_mrefs = tex2D(prev1, pos.x, pos.y - 1); ++ T next_prefs = tex2D(next1, pos.x, pos.y + 1); ++ T next_mrefs = tex2D(next1, pos.x, pos.y - 1); ++ T next2_prefs4 = tex2D(next2, pos.x, pos.y + 4); ++ T next2_prefs2 = tex2D(next2, pos.x, pos.y + 2); ++ T next2_0 = tex2D(next2, pos.x, pos.y + 0); ++ T next2_mrefs2 = tex2D(next2, pos.x, pos.y - 2); ++ T next2_mrefs4 = tex2D(next2, pos.x, pos.y - 4); ++ ++ return filter_temp(cur_prefs3, cur_prefs, cur_mrefs, cur_mrefs3, ++ prev2_prefs4, prev2_prefs2, prev2_0, prev2_mrefs2, prev2_mrefs4, ++ prev_prefs, prev_mrefs, next_prefs, next_mrefs, ++ next2_prefs4, next2_prefs2, next2_0, next2_mrefs2, next2_mrefs4); ++} ++ ++template ++T bwdif_double(texture2d dst, ++ texture2d prev, ++ texture2d cur, ++ texture2d next, ++ int parity, int tff, ++ bool is_field_end, bool is_second_field, ++ ushort2 pos) ++{ ++ // Don't modify the primary field ++ if (pos.y % 2 == parity) { ++ return tex2D(cur, pos.x, pos.y); ++ } ++ ++ T cur_prefs3 = tex2D(cur, pos.x, pos.y + 3); ++ T cur_prefs = tex2D(cur, pos.x, pos.y + 1); ++ T cur_mrefs = tex2D(cur, pos.x, pos.y - 1); ++ T cur_mrefs3 = tex2D(cur, pos.x, pos.y - 3); ++ ++ if (is_field_end) { ++ T final; ++ final.x = filter_intra(cur_prefs3.x, cur_prefs.x, cur_mrefs.x, cur_mrefs3.x); ++ final.y = filter_intra(cur_prefs3.y, cur_prefs.y, cur_mrefs.y, cur_mrefs3.y); ++ return final; ++ } ++ ++ // Calculate temporal prediction ++ texture2d prev2 = prev; ++ texture2d prev1 = is_second_field ? cur : prev; ++ texture2d next1 = is_second_field ? next : cur; ++ texture2d next2 = next; ++ ++ T prev2_prefs4 = tex2D(prev2, pos.x, pos.y+ 4); ++ T prev2_prefs2 = tex2D(prev2, pos.x, pos.y + 2); ++ T prev2_0 = tex2D(prev2, pos.x, pos.y + 0); ++ T prev2_mrefs2 = tex2D(prev2, pos.x, pos.y - 2); ++ T prev2_mrefs4 = tex2D(prev2, pos.x, pos.y - 4); ++ T prev_prefs = tex2D(prev1, pos.x, pos.y + 1); ++ T prev_mrefs = tex2D(prev1, pos.x, pos.y - 1); ++ T next_prefs = tex2D(next1, pos.x, pos.y + 1); ++ T next_mrefs = tex2D(next1, pos.x, pos.y - 1); ++ T next2_prefs4 = tex2D(next2, pos.x, pos.y + 4); ++ T next2_prefs2 = tex2D(next2, pos.x, pos.y + 2); ++ T next2_0 = tex2D(next2, pos.x, pos.y + 0); ++ T next2_mrefs2 = tex2D(next2, pos.x, pos.y - 2); ++ T next2_mrefs4 = tex2D(next2, pos.x, pos.y - 4); ++ ++ T final; ++ final.x = filter_temp(cur_prefs3.x, cur_prefs.x, cur_mrefs.x, cur_mrefs3.x, ++ prev2_prefs4.x, prev2_prefs2.x, prev2_0.x, prev2_mrefs2.x, prev2_mrefs4.x, ++ prev_prefs.x, prev_mrefs.x, next_prefs.x, next_mrefs.x, ++ next2_prefs4.x, next2_prefs2.x, next2_0.x, next2_mrefs2.x, next2_mrefs4.x); ++ final.y = filter_temp(cur_prefs3.y, cur_prefs.y, cur_mrefs.y, cur_mrefs3.y, ++ prev2_prefs4.y, prev2_prefs2.y, prev2_0.y, prev2_mrefs2.y, prev2_mrefs4.y, ++ prev_prefs.y, prev_mrefs.y, next_prefs.y, next_mrefs.y, ++ next2_prefs4.y, next2_prefs2.y, next2_0.y, next2_mrefs2.y, next2_mrefs4.y); ++ return final; ++} ++ ++ ++/* ++ * Kernel dispatch ++ */ ++ ++kernel void deint( ++ texture2d dst [[texture(0)]], ++ texture2d prev [[texture(1)]], ++ texture2d cur [[texture(2)]], ++ texture2d next [[texture(3)]], ++ constant params& p [[buffer(0)]], ++ ushort2 pos [[thread_position_in_grid]]) ++{ ++ if ((pos.x >= dst.get_width()) || (pos.y >= dst.get_height())) { ++ return; ++ } ++ ++ float2 pred; ++ if (p.channels == 1) { ++ pred = float2(bwdif_single(dst, prev, cur, next, ++ p.parity, p.tff, ++ p.is_field_end, p.is_second_field, ++ pos)); ++ } else { ++ pred = bwdif_double(dst, prev, cur, next, ++ p.parity, p.tff, ++ p.is_field_end, p.is_second_field, ++ pos); ++ } ++ dst.write(pred.xyyy, pos); ++} ++ +Index: FFmpeg/libavfilter/vf_bwdif_videotoolbox.m +=================================================================== +--- /dev/null ++++ libavfilter/vf_bwdif_videotoolbox.m +@@ -0,0 +1,445 @@ ++/* ++ * Copyright (C) 2018 Philip Langdale ++ * 2020 Aman Karmani ++ * 2024 Gnattu OC ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "internal.h" ++#include "metal/utils.h" ++#include "yadif.h" ++#include "libavutil/avassert.h" ++#include "libavutil/hwcontext.h" ++#include "libavutil/hwcontext_videotoolbox.h" ++#include "libavutil/objc.h" ++ ++#include ++ ++extern char ff_vf_bwdif_videotoolbox_metallib_data[]; ++extern unsigned int ff_vf_bwdif_videotoolbox_metallib_len; ++ ++typedef struct API_AVAILABLE(macos(10.11), ios(8.0)) BWDIFVTContext { ++ YADIFContext yadif; ++ ++ AVBufferRef *device_ref; ++ AVBufferRef *input_frames_ref; ++ AVHWFramesContext *input_frames; ++ ++ id mtlDevice; ++ id mtlLibrary; ++ id mtlQueue; ++ id mtlPipeline; ++ id mtlFunction; ++ id mtlParamsBuffer; ++ ++ CVMetalTextureCacheRef textureCache; ++} BWDIFVTContext API_AVAILABLE(macos(10.11), ios(8.0)); ++ ++// Using sizeof(BWDIFVTContext) outside of an availability check will error ++// if we're targeting an older OS version, so we need to calculate the size ourselves ++// (we'll statically verify it's correct in bwdif_videotoolbox_init behind a check) ++#define BWDIF_VT_CTX_SIZE (sizeof(YADIFContext) + sizeof(void*) * 10) ++ ++struct mtlBwdifParams { ++ uint channels; ++ uint parity; ++ uint tff; ++ bool is_second_field; ++ bool skip_spatial_check; ++ bool is_field_end; ++}; ++ ++static void call_kernel(AVFilterContext *ctx, ++ id dst, ++ id prev, ++ id cur, ++ id next, ++ int channels, ++ int parity, ++ int tff) API_AVAILABLE(macos(10.11), ios(8.0)) ++{ ++ BWDIFVTContext *s = ctx->priv; ++ YADIFContext *y = &s->yadif; ++ bool is_field_end = y->current_field == YADIF_FIELD_END; ++ id buffer = s->mtlQueue.commandBuffer; ++ id encoder = buffer.computeCommandEncoder; ++ struct mtlBwdifParams *params = (struct mtlBwdifParams *)s->mtlParamsBuffer.contents; ++ *params = (struct mtlBwdifParams){ ++ .channels = channels, ++ .parity = parity, ++ .tff = tff, ++ .is_second_field = !(parity ^ tff), ++ .skip_spatial_check = s->yadif.mode&2, ++ .is_field_end = is_field_end ++ }; ++ ++ [encoder setTexture:dst atIndex:0]; ++ [encoder setTexture:prev atIndex:1]; ++ [encoder setTexture:cur atIndex:2]; ++ [encoder setTexture:next atIndex:3]; ++ [encoder setBuffer:s->mtlParamsBuffer offset:0 atIndex:4]; ++ ff_metal_compute_encoder_dispatch(s->mtlDevice, s->mtlPipeline, encoder, dst.width, dst.height); ++ [encoder endEncoding]; ++ ++ [buffer commit]; ++ [buffer waitUntilCompleted]; ++} ++ ++static void filter(AVFilterContext *ctx, AVFrame *dst, ++ int parity, int tff) API_AVAILABLE(macos(10.11), ios(8.0)) ++{ ++ BWDIFVTContext *s = ctx->priv; ++ YADIFContext *y = &s->yadif; ++ int i; ++ ++ for (i = 0; i < y->csp->nb_components; i++) { ++ int pixel_size, channels; ++ const AVComponentDescriptor *comp = &y->csp->comp[i]; ++ CVMetalTextureRef prev, cur, next, dest; ++ id tex_prev, tex_cur, tex_next, tex_dest; ++ MTLPixelFormat format; ++ ++ if (comp->plane < i) { ++ // We process planes as a whole, so don't reprocess ++ // them for additional components ++ continue; ++ } ++ ++ pixel_size = (comp->depth + comp->shift) / 8; ++ channels = comp->step / pixel_size; ++ if (pixel_size > 2 || channels > 2) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", y->csp->name); ++ goto exit; ++ } ++ switch (pixel_size) { ++ case 1: ++ format = channels == 1 ? MTLPixelFormatR8Unorm : MTLPixelFormatRG8Unorm; ++ break; ++ case 2: ++ format = channels == 1 ? MTLPixelFormatR16Unorm : MTLPixelFormatRG16Unorm; ++ break; ++ default: ++ av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", y->csp->name); ++ goto exit; ++ } ++ ++ av_log(ctx, AV_LOG_TRACE, ++ "Deinterlacing plane %d: pixel_size: %d channels: %d\n", ++ comp->plane, pixel_size, channels); ++ ++ prev = ff_metal_texture_from_pixbuf(ctx, s->textureCache, (CVPixelBufferRef)y->prev->data[3], i, format); ++ cur = ff_metal_texture_from_pixbuf(ctx, s->textureCache, (CVPixelBufferRef)y->cur->data[3], i, format); ++ next = ff_metal_texture_from_pixbuf(ctx, s->textureCache, (CVPixelBufferRef)y->next->data[3], i, format); ++ dest = ff_metal_texture_from_pixbuf(ctx, s->textureCache, (CVPixelBufferRef)dst->data[3], i, format); ++ ++ tex_prev = CVMetalTextureGetTexture(prev); ++ tex_cur = CVMetalTextureGetTexture(cur); ++ tex_next = CVMetalTextureGetTexture(next); ++ tex_dest = CVMetalTextureGetTexture(dest); ++ ++ call_kernel(ctx, tex_dest, tex_prev, tex_cur, tex_next, ++ channels, parity, tff); ++ ++ CFRelease(prev); ++ CFRelease(cur); ++ CFRelease(next); ++ CFRelease(dest); ++ } ++ ++ CVBufferPropagateAttachments((CVPixelBufferRef)y->cur->data[3], (CVPixelBufferRef)dst->data[3]); ++ ++ if (y->current_field == YADIF_FIELD_END) { ++ y->current_field = YADIF_FIELD_NORMAL; ++ } ++ ++exit: ++ return; ++} ++ ++static av_cold void do_uninit(AVFilterContext *ctx) API_AVAILABLE(macos(10.11), ios(8.0)) ++{ ++ BWDIFVTContext *s = ctx->priv; ++ ++ ff_yadif_uninit(ctx); ++ ++ av_buffer_unref(&s->device_ref); ++ av_buffer_unref(&s->input_frames_ref); ++ s->input_frames = NULL; ++ ++ ff_objc_release(&s->mtlParamsBuffer); ++ ff_objc_release(&s->mtlFunction); ++ ff_objc_release(&s->mtlPipeline); ++ ff_objc_release(&s->mtlQueue); ++ ff_objc_release(&s->mtlLibrary); ++ ff_objc_release(&s->mtlDevice); ++ ++ if (s->textureCache) { ++ CFRelease(s->textureCache); ++ s->textureCache = NULL; ++ } ++} ++ ++ ++static av_cold void bwdif_videotoolbox_uninit(AVFilterContext *ctx) ++{ ++ if (@available(macOS 10.11, iOS 8.0, *)) { ++ do_uninit(ctx); ++ } ++} ++ ++static av_cold int do_init(AVFilterContext *ctx) API_AVAILABLE(macos(10.11), ios(8.0)) ++{ ++ BWDIFVTContext *s = ctx->priv; ++ NSError *err = nil; ++ CVReturn ret; ++ dispatch_data_t libData; ++ ++ s->mtlDevice = MTLCreateSystemDefaultDevice(); ++ if (!s->mtlDevice) { ++ av_log(ctx, AV_LOG_ERROR, "Unable to find Metal device\n"); ++ goto fail; ++ } ++ ++ av_log(ctx, AV_LOG_INFO, "Using Metal device: %s\n", s->mtlDevice.name.UTF8String); ++ ++ libData = dispatch_data_create( ++ ff_vf_bwdif_videotoolbox_metallib_data, ++ ff_vf_bwdif_videotoolbox_metallib_len, ++ nil, ++ nil); ++ s->mtlLibrary = [s->mtlDevice newLibraryWithData:libData error:&err]; ++ dispatch_release(libData); ++ libData = nil; ++ if (err) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to load Metal library: %s\n", err.description.UTF8String); ++ goto fail; ++ } ++ ++ s->mtlFunction = [s->mtlLibrary newFunctionWithName:@"deint"]; ++ if (!s->mtlFunction) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to create Metal function!\n"); ++ goto fail; ++ } ++ ++ s->mtlQueue = s->mtlDevice.newCommandQueue; ++ if (!s->mtlQueue) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to create Metal command queue!\n"); ++ goto fail; ++ } ++ ++ s->mtlPipeline = [s->mtlDevice ++ newComputePipelineStateWithFunction:s->mtlFunction ++ error:&err]; ++ if (err) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to create Metal compute pipeline: %s\n", err.description.UTF8String); ++ goto fail; ++ } ++ ++ s->mtlParamsBuffer = [s->mtlDevice ++ newBufferWithLength:sizeof(struct mtlBwdifParams) ++ options:MTLResourceStorageModeShared]; ++ if (!s->mtlParamsBuffer) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to create Metal buffer for parameters\n"); ++ goto fail; ++ } ++ ++ ret = CVMetalTextureCacheCreate( ++ NULL, ++ NULL, ++ s->mtlDevice, ++ NULL, ++ &s->textureCache ++ ); ++ if (ret != kCVReturnSuccess) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to create CVMetalTextureCache: %d\n", ret); ++ goto fail; ++ } ++ ++ return 0; ++fail: ++ bwdif_videotoolbox_uninit(ctx); ++ return AVERROR_EXTERNAL; ++} ++ ++static av_cold int bwdif_videotoolbox_init(AVFilterContext *ctx) ++{ ++ if (@available(macOS 10.11, iOS 8.0, *)) { ++ // Ensure we calculated BWDIF_VT_CTX_SIZE correctly ++ static_assert(BWDIF_VT_CTX_SIZE == sizeof(BWDIFVTContext), "Incorrect BWDIF_VT_CTX_SIZE value!"); ++ return do_init(ctx); ++ } else { ++ av_log(ctx, AV_LOG_ERROR, "Metal is not available on this OS version\n"); ++ return AVERROR(ENOSYS); ++ } ++} ++ ++static int do_config_input(AVFilterLink *inlink) API_AVAILABLE(macos(10.11), ios(8.0)) ++{ ++ AVFilterContext *ctx = inlink->dst; ++ BWDIFVTContext *s = ctx->priv; ++ ++ if (!inlink->hw_frames_ctx) { ++ av_log(ctx, AV_LOG_ERROR, "A hardware frames reference is " ++ "required to associate the processing device.\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ s->input_frames_ref = av_buffer_ref(inlink->hw_frames_ctx); ++ if (!s->input_frames_ref) { ++ av_log(ctx, AV_LOG_ERROR, "A input frames reference create " ++ "failed.\n"); ++ return AVERROR(ENOMEM); ++ } ++ s->input_frames = (AVHWFramesContext*)s->input_frames_ref->data; ++ ++ return 0; ++} ++ ++static int config_input(AVFilterLink *inlink) ++{ ++ AVFilterContext *ctx = inlink->dst; ++ if (@available(macOS 10.11, iOS 8.0, *)) { ++ return do_config_input(inlink); ++ } else { ++ av_log(ctx, AV_LOG_ERROR, "Metal is not available on this OS version\n"); ++ return AVERROR(ENOSYS); ++ } ++} ++ ++static int do_config_output(AVFilterLink *link) API_AVAILABLE(macos(10.11), ios(8.0)) ++{ ++ AVHWFramesContext *output_frames, *input_frames; ++ AVFilterContext *ctx = link->src; ++ AVFilterLink *inlink = link->src->inputs[0]; ++ BWDIFVTContext *s = ctx->priv; ++ YADIFContext *y = &s->yadif; ++ int ret = 0; ++ ++ av_assert0(s->input_frames); ++ s->device_ref = av_buffer_ref(s->input_frames->device_ref); ++ if (!s->device_ref) { ++ av_log(ctx, AV_LOG_ERROR, "A device reference create " ++ "failed.\n"); ++ return AVERROR(ENOMEM); ++ } ++ ++ link->hw_frames_ctx = av_hwframe_ctx_alloc(s->device_ref); ++ if (!link->hw_frames_ctx) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to create HW frame context " ++ "for output.\n"); ++ ret = AVERROR(ENOMEM); ++ goto exit; ++ } ++ ++ input_frames = (AVHWFramesContext*)inlink->hw_frames_ctx->data; ++ output_frames = (AVHWFramesContext*)link->hw_frames_ctx->data; ++ ++ output_frames->format = AV_PIX_FMT_VIDEOTOOLBOX; ++ output_frames->sw_format = s->input_frames->sw_format; ++ output_frames->width = ctx->inputs[0]->w; ++ output_frames->height = ctx->inputs[0]->h; ++ ((AVVTFramesContext *)output_frames->hwctx)->color_range = ((AVVTFramesContext *)input_frames->hwctx)->color_range; ++ ++ ret = ff_filter_init_hw_frames(ctx, link, 10); ++ if (ret < 0) ++ goto exit; ++ ++ ret = av_hwframe_ctx_init(link->hw_frames_ctx); ++ if (ret < 0) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to initialise VideoToolbox frame " ++ "context for output: %d\n", ret); ++ goto exit; ++ } ++ ++ ret = ff_yadif_config_output_common(link); ++ if (ret < 0) ++ goto exit; ++ ++ y->csp = av_pix_fmt_desc_get(output_frames->sw_format); ++ y->filter = filter; ++ ++exit: ++ return ret; ++} ++ ++static int config_output(AVFilterLink *link) ++{ ++ AVFilterContext *ctx = link->src; ++ if (@available(macOS 10.11, iOS 8.0, *)) { ++ return do_config_output(link); ++ } else { ++ av_log(ctx, AV_LOG_ERROR, "Metal is not available on this OS version\n"); ++ return AVERROR(ENOSYS); ++ } ++} ++ ++#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM ++#define CONST(name, help, val, unit) { name, help, 0, AV_OPT_TYPE_CONST, {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit } ++ ++static const AVOption bwdif_videotoolbox_options[] = { ++ #define OFFSET(x) offsetof(YADIFContext, x) ++ { "mode", "specify the interlacing mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=YADIF_MODE_SEND_FRAME}, 0, 1, FLAGS, .unit = "mode"}, ++ CONST("send_frame", "send one frame for each frame", YADIF_MODE_SEND_FRAME, "mode"), ++ CONST("send_field", "send one frame for each field", YADIF_MODE_SEND_FIELD, "mode"), ++ ++ { "parity", "specify the assumed picture field parity", OFFSET(parity), AV_OPT_TYPE_INT, {.i64=YADIF_PARITY_AUTO}, -1, 1, FLAGS, .unit = "parity" }, ++ CONST("tff", "assume top field first", YADIF_PARITY_TFF, "parity"), ++ CONST("bff", "assume bottom field first", YADIF_PARITY_BFF, "parity"), ++ CONST("auto", "auto detect parity", YADIF_PARITY_AUTO, "parity"), ++ ++ { "deint", "specify which frames to deinterlace", OFFSET(deint), AV_OPT_TYPE_INT, {.i64=YADIF_DEINT_ALL}, 0, 1, FLAGS, .unit = "deint" }, ++ CONST("all", "deinterlace all frames", YADIF_DEINT_ALL, "deint"), ++ CONST("interlaced", "only deinterlace frames marked as interlaced", YADIF_DEINT_INTERLACED, "deint"), ++ #undef OFFSET ++ ++ { NULL } ++}; ++ ++AVFILTER_DEFINE_CLASS(bwdif_videotoolbox); ++ ++static const AVFilterPad bwdif_videotoolbox_inputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .filter_frame = ff_yadif_filter_frame, ++ .config_props = config_input, ++ }, ++}; ++ ++static const AVFilterPad bwdif_videotoolbox_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .request_frame = ff_yadif_request_frame, ++ .config_props = config_output, ++ }, ++}; ++ ++const AVFilter ff_vf_bwdif_videotoolbox = { ++ .name = "bwdif_videotoolbox", ++ .description = NULL_IF_CONFIG_SMALL("BWDIF for VideoToolbox frames using Metal compute"), ++ .priv_size = BWDIF_VT_CTX_SIZE, ++ .priv_class = &bwdif_videotoolbox_class, ++ .init = bwdif_videotoolbox_init, ++ .uninit = bwdif_videotoolbox_uninit, ++ FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VIDEOTOOLBOX), ++ FILTER_INPUTS(bwdif_videotoolbox_inputs), ++ FILTER_OUTPUTS(bwdif_videotoolbox_outputs), ++ .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL, ++ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, ++}; diff --git a/cross/ffmpeg7/patches/1073-jellyfin-0073-add-12bit-decoding-on-videotoolbox.patch b/cross/ffmpeg7/patches/1073-jellyfin-0073-add-12bit-decoding-on-videotoolbox.patch new file mode 100644 index 00000000000..bd295d5324d --- /dev/null +++ b/cross/ffmpeg7/patches/1073-jellyfin-0073-add-12bit-decoding-on-videotoolbox.patch @@ -0,0 +1,24 @@ +Index: FFmpeg/libavcodec/hevcdec.c +=================================================================== +--- libavcodec/hevcdec.c ++++ libavcodec/hevcdec.c +@@ -533,6 +533,9 @@ static enum AVPixelFormat get_format(HEV + #if CONFIG_HEVC_NVDEC_HWACCEL + *fmt++ = AV_PIX_FMT_CUDA; + #endif ++#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL ++ *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; ++#endif + break; + case AV_PIX_FMT_YUV422P12: + #if CONFIG_HEVC_DXVA2_HWACCEL +@@ -548,6 +551,9 @@ static enum AVPixelFormat get_format(HEV + #if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; + #endif ++#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL ++ *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; ++#endif + break; + } + diff --git a/cross/ffmpeg7/patches/1074-jellyfin-0074-fix-the-sub2video-perf-regressions.patch b/cross/ffmpeg7/patches/1074-jellyfin-0074-fix-the-sub2video-perf-regressions.patch new file mode 100644 index 00000000000..92911c5f08c --- /dev/null +++ b/cross/ffmpeg7/patches/1074-jellyfin-0074-fix-the-sub2video-perf-regressions.patch @@ -0,0 +1,47 @@ +Index: FFmpeg/fftools/ffmpeg_filter.c +=================================================================== +--- fftools/ffmpeg_filter.c ++++ fftools/ffmpeg_filter.c +@@ -2458,7 +2458,8 @@ static void sub2video_heartbeat(InputFil + or if we need to initialize the system, update the + overlayed subpicture and its start/end times */ + sub2video_update(ifp, pts2 + 1, NULL); +- else ++ ++ if (av_buffersrc_get_nb_failed_requests(ifp->filter)) + sub2video_push_ref(ifp, pts2); + } + +Index: FFmpeg/tests/ref/fate/filter-overlay-dvdsub-2397 +=================================================================== +--- tests/ref/fate/filter-overlay-dvdsub-2397 ++++ tests/ref/fate/filter-overlay-dvdsub-2397 +@@ -489,12 +489,12 @@ + 1, 3877, 3877, 10, 2013, 0x95a39f9c + 1, 3887, 3887, 10, 2013, 0x4f7ea123 + 1, 3897, 3897, 10, 2013, 0x9efb9ba1 +-0, 117, 117, 1, 518400, 0x61e0f688 ++0, 117, 117, 1, 518400, 0xbf8523da + 1, 3907, 3907, 10, 2013, 0xf395b2cd + 1, 3917, 3917, 10, 2013, 0x261a881e + 1, 3927, 3927, 10, 2013, 0x7f2d9f72 + 1, 3937, 3937, 10, 2013, 0x0105b38d +-0, 118, 118, 1, 518400, 0xa47de755 ++0, 118, 118, 1, 518400, 0x41890ed6 + 1, 3952, 3952, 10, 2013, 0x0e5db67e + 1, 3962, 3962, 10, 2013, 0xfc9baf97 + 0, 119, 119, 1, 518400, 0x588534fc +Index: FFmpeg/tests/ref/fate/sub2video +=================================================================== +--- tests/ref/fate/sub2video ++++ tests/ref/fate/sub2video +@@ -68,8 +68,7 @@ + 0, 258, 258, 1, 518400, 0x34cdddee + 0, 269, 269, 1, 518400, 0xbab197ea + 1, 53910000, 53910000, 2696000, 2095, 0x61bb15ed +-0, 270, 270, 1, 518400, 0xbab197ea +-0, 271, 271, 1, 518400, 0x4db4ce51 ++0, 270, 270, 1, 518400, 0x4db4ce51 + 0, 283, 283, 1, 518400, 0xbab197ea + 1, 56663000, 56663000, 1262000, 1013, 0xc9ae89b7 + 0, 284, 284, 1, 518400, 0xe6bc0ea9 diff --git a/cross/intel-gpu-tools/Makefile b/cross/intel-gpu-tools/Makefile new file mode 100644 index 00000000000..0bd9c2c257a --- /dev/null +++ b/cross/intel-gpu-tools/Makefile @@ -0,0 +1,27 @@ +PKG_NAME = igt-gpu-tools +PKG_VERS = 1.29 +PKG_EXT = tar.bz2 +PKG_DIST_NAME = $(PKG_NAME)-v$(PKG_VERS).$(PKG_EXT) +PKG_DIST_SITE = https://gitlab.freedesktop.org/drm/igt-gpu-tools/-/archive/v$(PKG_VERS) +PKG_DIR = $(PKG_NAME)-v$(PKG_VERS) + +DEPENDS = cross/libdrm cross/libkmod cross/procps-ng cross/elfutils +DEPENDS += cross/pixman cross/cairo +DEPENDS += cross/libudev_219 + +HOMEPAGE = https://gitlab.freedesktop.org/drm/igt-gpu-tools +COMMENT = IGT GPU Tools is a collection of tools for development and testing of the DRM drivers. +LICENSE = MIT License + +## NOTE: +## intel_gpu_top and most of the tools requires a kernel >= 4.16/4.17 +## as such only a subset is being packaged part of synocli-videodriver + +UNSUPPORTED_ARCHS = $(ARM_ARCHS) $(PPC_ARCHS) + +CONFIGURE_ARGS = -Ddocs=disabled +CONFIGURE_ARGS += -Dman=disabled +CONFIGURE_ARGS += -Dtestplan=disabled +CONFIGURE_ARGS += -Dtests=disabled + +include ../../mk/spksrc.cross-meson.mk diff --git a/cross/intel-gpu-tools/PLIST b/cross/intel-gpu-tools/PLIST new file mode 100644 index 00000000000..a74841ba51f --- /dev/null +++ b/cross/intel-gpu-tools/PLIST @@ -0,0 +1,7 @@ +bin:bin/intel_gem_info +bin:bin/intel_gpu_top +bin:bin/intel_opregion_decode +bin:bin/intel_stepping +bin:bin/lsgpu +lnk:lib/libigt.so +lib:lib/libigt.so.0 diff --git a/cross/intel-gpu-tools/digests b/cross/intel-gpu-tools/digests new file mode 100644 index 00000000000..82a53d60b05 --- /dev/null +++ b/cross/intel-gpu-tools/digests @@ -0,0 +1,3 @@ +igt-gpu-tools-v1.29.tar.bz2 SHA1 ee83f23e6fb44355ea6e0b8e206b3a5fd9b21c48 +igt-gpu-tools-v1.29.tar.bz2 SHA256 19a9a3955be509e24ee9fdf5b604b0f51941e746619d79e1c18a61b346471891 +igt-gpu-tools-v1.29.tar.bz2 MD5 0515e054dc5237f6763d0db1cbf98fa1 diff --git a/cross/libdrm/Makefile b/cross/libdrm/Makefile index a3ab52af9ce..dd6b85595e2 100644 --- a/cross/libdrm/Makefile +++ b/cross/libdrm/Makefile @@ -1,5 +1,5 @@ PKG_NAME = libdrm -PKG_VERS = 2.4.122 +PKG_VERS = 2.4.123 PKG_EXT = tar.xz PKG_DIST_NAME = $(PKG_NAME)-$(PKG_VERS).$(PKG_EXT) PKG_DIST_SITE = https://dri.freedesktop.org/libdrm diff --git a/cross/libdrm/PLIST b/cross/libdrm/PLIST index 5884d662122..627a0a41763 100644 --- a/cross/libdrm/PLIST +++ b/cross/libdrm/PLIST @@ -1,12 +1,12 @@ lnk:lib/libdrm.so lnk:lib/libdrm.so.2 -lib:lib/libdrm.so.2.4.0 +lib:lib/libdrm.so.2.123.0 lnk:lib/libdrm_amdgpu.so lnk:lib/libdrm_amdgpu.so.1 -lib:lib/libdrm_amdgpu.so.1.0.0 +lib:lib/libdrm_amdgpu.so.1.123.0 lnk:lib/libdrm_intel.so lnk:lib/libdrm_intel.so.1 -lib:lib/libdrm_intel.so.1.0.0 +lib:lib/libdrm_intel.so.1.123.0 lnk:lib/libdrm_radeon.so lnk:lib/libdrm_radeon.so.1 -lib:lib/libdrm_radeon.so.1.0.1 +lib:lib/libdrm_radeon.so.1.123.0 diff --git a/cross/libdrm/digests b/cross/libdrm/digests index 5ad962074db..ce7336c42d3 100644 --- a/cross/libdrm/digests +++ b/cross/libdrm/digests @@ -1,3 +1,3 @@ -libdrm-2.4.122.tar.xz SHA1 cab543bdcdab935854ea65a2da4e303693611ce4 -libdrm-2.4.122.tar.xz SHA256 d9f5079b777dffca9300ccc56b10a93588cdfbc9dde2fae111940dfb6292f251 -libdrm-2.4.122.tar.xz MD5 143c8df50e09cd1eeb1fb53f05ecb64a +libdrm-2.4.123.tar.xz SHA1 d61f31495f931d53971fa61686239483c25c2a86 +libdrm-2.4.123.tar.xz SHA256 a2b98567a149a74b0f50e91e825f9c0315d86e7be9b74394dae8b298caadb79e +libdrm-2.4.123.tar.xz MD5 322f5ffe2d3adc2f6ecaab52ae64ba4a diff --git a/cross/libkmod/Makefile b/cross/libkmod/Makefile new file mode 100644 index 00000000000..3d49b3bf493 --- /dev/null +++ b/cross/libkmod/Makefile @@ -0,0 +1,22 @@ +PKG_NAME = kmod +PKG_VERS = 33 +PKG_EXT = tar.gz +PKG_DIST_NAME = $(PKG_NAME)-$(PKG_VERS).$(PKG_EXT) +PKG_DIST_SITE = https://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git/snapshot +PKG_DIR = $(PKG_NAME)-$(PKG_VERS) + +DEPENDS = cross/zlib cross/zstd + +HOMEPAGE = https://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git/about/ +COMMENT = kmod is a set of tools to handle common tasks with Linux kernel modules like insert, remove, list, check properties, resolve dependencies and aliases. +LICENSE = LGPL 2.1 + +GNU_CONFIGURE = 1 +PRE_CONFIGURE_TARGET = kmod_pre_configure +CONFIGURE_ARGS = --disable-manpages + +include ../../mk/spksrc.cross-cc.mk + +.PHONY: kmod_pre_configure +kmod_pre_configure: + $(RUN) ./autogen.sh diff --git a/cross/libkmod/PLIST b/cross/libkmod/PLIST new file mode 100644 index 00000000000..f321c72c281 --- /dev/null +++ b/cross/libkmod/PLIST @@ -0,0 +1,3 @@ +lnk:lib/libkmod.so +lnk:lib/libkmod.so.2 +lib:lib/libkmod.so.2.5.0 diff --git a/cross/libkmod/PLIST.all b/cross/libkmod/PLIST.all new file mode 100644 index 00000000000..f2088b623bf --- /dev/null +++ b/cross/libkmod/PLIST.all @@ -0,0 +1,10 @@ +lnk:bin/depmod +lnk:bin/insmod +lnk:bin/lsmod +lnk:bin/rmmod +lnk:bin/modinfo +lnk:bin/modprobe +bin:bin/kmod +lnk:lib/libkmod.so +lnk:lib/libkmod.so.2 +lib:lib/libkmod.so.2.5.0 diff --git a/cross/libkmod/digests b/cross/libkmod/digests new file mode 100644 index 00000000000..d671200527e --- /dev/null +++ b/cross/libkmod/digests @@ -0,0 +1,3 @@ +kmod-33.tar.gz SHA1 429f7deff998aa122aa3dbf19efe674573a4c76f +kmod-33.tar.gz SHA256 c72120a2582ae240221671ddc1aa53ee522764806f50f8bf1522bbf055679985 +kmod-33.tar.gz MD5 99381d87e3cf2cd427428ece33bd4aaf diff --git a/cross/libpciaccess/Makefile b/cross/libpciaccess/Makefile index 7c46cfeffc7..5a4011737b1 100644 --- a/cross/libpciaccess/Makefile +++ b/cross/libpciaccess/Makefile @@ -1,5 +1,5 @@ PKG_NAME = libpciaccess -PKG_VERS = 0.18 +PKG_VERS = 0.18.1 PKG_EXT = tar.xz PKG_DIST_NAME = $(PKG_NAME)-$(PKG_VERS).$(PKG_EXT) PKG_DIST_SITE = https://xorg.freedesktop.org/archive/individual/lib diff --git a/cross/libpciaccess/digests b/cross/libpciaccess/digests index df089050619..05031b0b6be 100644 --- a/cross/libpciaccess/digests +++ b/cross/libpciaccess/digests @@ -1,3 +1,3 @@ -libpciaccess-0.18.tar.xz SHA1 c1ff34f8ab9d22124949cc8712c18eb6b84755b7 -libpciaccess-0.18.tar.xz SHA256 5461b0257d495254346f52a9c329b44b346262663675d3fecdb204a7e7c262a9 -libpciaccess-0.18.tar.xz MD5 54f48367e37666f9e0f12571d1ee3620 +libpciaccess-0.18.1.tar.xz SHA1 0f06bb9579544e6b18cb28514a5f77cb7fdd10a7 +libpciaccess-0.18.1.tar.xz SHA256 4af43444b38adb5545d0ed1c2ce46d9608cc47b31c2387fc5181656765a6fa76 +libpciaccess-0.18.1.tar.xz MD5 57c7efbeceedefde006123a77a7bc825 diff --git a/cross/libudev_219/Makefile b/cross/libudev_219/Makefile index d27aadf4dd2..11090d91a43 100644 --- a/cross/libudev_219/Makefile +++ b/cross/libudev_219/Makefile @@ -76,7 +76,15 @@ CONFIGURE_ARGS += --with-dbuspolicydir=/usr/share/dbus-1/system.d CONFIGURE_ARGS += --with-dbussessionservicedir=/usr/share/dbus-1/services CONFIGURE_ARGS += --with-dbussystemservicedir=/usr/share/dbus-1/system-services -ADDITIONAL_CFLAGS = -DSG_FLAG_LUN_INHIBIT=2 +ADDITIONAL_CFLAGS = -DSG_FLAG_LUN_INHIBIT=2 + +include ../../mk/spksrc.common.mk + +# Build fix for aarch64 and x64 for DSM-7.2: +# error: static declaration of 'renameat2' follows non-static declaration +ifeq ($(call version_ge, $(TCVERSION), 7.2),1) +CONFIGURE_ARGS += ac_cv_have_decl_renameat2=yes +endif include ../../mk/spksrc.cross-cc.mk diff --git a/cross/libudev_219/patches/003-missing-minor-major-x64-dsm72.patch b/cross/libudev_219/patches/003-missing-minor-major-x64-dsm72.patch new file mode 100644 index 00000000000..239fd422e31 --- /dev/null +++ b/cross/libudev_219/patches/003-missing-minor-major-x64-dsm72.patch @@ -0,0 +1,17 @@ +Build fix for x64 (and presumably others) for DSM-7.2. +Has no impacts on DSM-7.1 builds. + + undefined reference to `minor' + undefined reference to `major' + undefined reference to `makedev' + +--- src/shared/macro.h.orig 2015-02-16 20:55:07.000000000 +0000 ++++ src/shared/macro.h 2024-10-01 23:29:07.479848724 +0000 +@@ -23,6 +23,7 @@ + + #include + #include ++#include + #include + #include + #include diff --git a/cross/procps-ng/Makefile b/cross/procps-ng/Makefile new file mode 100644 index 00000000000..1b694f4889d --- /dev/null +++ b/cross/procps-ng/Makefile @@ -0,0 +1,30 @@ +PKG_NAME = procps +PKG_VERS = 4.0.4 +PKG_EXT = tar.bz2 +PKG_DIST_NAME = $(PKG_NAME)-v$(PKG_VERS).$(PKG_EXT) +PKG_DIST_SITE = https://gitlab.com/procps-ng/procps/-/archive/v$(PKG_VERS) +PKG_DIR = $(PKG_NAME)-v$(PKG_VERS) + +DEPENDS = cross/ncursesw cross/libiconv + +HOMEPAGE = https://gitlab.com/procps-ng/procps +COMMENT = procps is a set of command line and full-screen utilities that provide information out of the pseudo-filesystem most commonly located at /proc. +LICENSE = GPLv2 + +GNU_CONFIGURE = 1 +PRE_CONFIGURE_TARGET = procps_pre_configure + +# Only the library is needed, disable tools +# DSM-7.2: Fails to build ‘__NR_pidfd_open’ undeclared +# ref: https://gitlab.com/procps-ng/procps/-/issues/352 +CONFIGURE_ARGS = --disable-static +CONFIGURE_ARGS += --disable-pidof +CONFIGURE_ARGS += --disable-kill +CONFIGURE_ARGS += --disable-w +CONFIGURE_ARGS += --disable-modern-top + +include ../../mk/spksrc.cross-cc.mk + +.PHONY: procps_pre_configure +procps_pre_configure: + $(RUN) ./autogen.sh diff --git a/cross/procps-ng/PLIST b/cross/procps-ng/PLIST new file mode 100644 index 00000000000..8b8d8d3e8cd --- /dev/null +++ b/cross/procps-ng/PLIST @@ -0,0 +1,3 @@ +lib:lib/libproc2.so +lib:lib/libproc2.so.0 +lib:lib/libproc2.so.0.0.2 diff --git a/cross/procps-ng/PLIST.all b/cross/procps-ng/PLIST.all new file mode 100644 index 00000000000..24fe6f82603 --- /dev/null +++ b/cross/procps-ng/PLIST.all @@ -0,0 +1,18 @@ +bin:bin/free +bin:bin/kill +bin:bin/pgrep +bin:bin/pidof +bin:bin/pkill +bin:bin/pmap +bin:bin/ps +bin:bin/pwdx +bin:bin/slabtop +bin:bin/tload +bin:bin/top +bin:bin/uptime +bin:bin/vmstat +bin:bin/w +bin:bin/watch +lib:lib/libproc2.so +lib:lib/libproc2.so.0 +lib:lib/libproc2.so.0.0.2 diff --git a/cross/procps-ng/digests b/cross/procps-ng/digests new file mode 100644 index 00000000000..4fe306f43bf --- /dev/null +++ b/cross/procps-ng/digests @@ -0,0 +1,3 @@ +procps-v4.0.4.tar.bz2 SHA1 a8bc3ed0a4b046ee214c8f0ab8e7898d7b54f52f +procps-v4.0.4.tar.bz2 SHA256 08dbaaaae6afe8d5fbeee8aa3f8b460b01c5e09ce4706b161846f067103a2cf2 +procps-v4.0.4.tar.bz2 MD5 83252d291a20bcc160b00d3163bf7378 diff --git a/cross/procps-ng/patches/0001-fix-pifd_open-check.patch b/cross/procps-ng/patches/0001-fix-pifd_open-check.patch new file mode 100644 index 00000000000..83a1d0269b0 --- /dev/null +++ b/cross/procps-ng/patches/0001-fix-pifd_open-check.patch @@ -0,0 +1,11 @@ +--- configure.ac.orig 2023-08-31 09:54:51.000000000 +0000 ++++ configure.ac 2024-10-01 02:03:23.968808525 +0000 +@@ -170,7 +170,7 @@ AC_TRY_COMPILE([#include ], + AC_MSG_RESULT(yes), + AC_MSG_RESULT(no)) + +-AC_CHECK_FUNC([pidfd_open], [enable_pidwait=yes], [ ++AC_CHECK_FUNCS([pidfd_open], [enable_pidwait=yes], [ + AC_MSG_CHECKING([for __NR_pidfd_open]) + AC_COMPILE_IFELSE([AC_LANG_SOURCE([ + #include diff --git a/cross/snappy/Makefile b/cross/snappy/Makefile new file mode 100644 index 00000000000..26d008c24d1 --- /dev/null +++ b/cross/snappy/Makefile @@ -0,0 +1,28 @@ +PKG_NAME = snappy +PKG_VERS = 1.2.1 +PKG_EXT = tar.gz +PKG_DIST_NAME = $(PKG_VERS).$(PKG_EXT) +PKG_DIST_FILE = $(PKG_NAME)-$(PKG_VERS).$(PKG_EXT) +PKG_DIST_SITE = https://github.com/google/snappy/archive/refs/tags +PKG_DIR = $(PKG_NAME)-$(PKG_VERS) + +DEPENDS = + +# c++11 compiler required +UNSUPPORTED_ARCHS = $(OLD_PPC_ARCHS) $(ARMv5_ARCHS) + +HOMEPAGE = https://google.github.io/snappy/ +COMMENT = Snappy is a compression/decompression library. It does not aim for maximum compression, or compatibility with any other compression library; instead, it aims for very high speeds and reasonable compression. +LICENSE = Copyright 2011, Google Inc. + +CMAKE_ARGS += -DBUILD_SHARED_LIBS=ON +CMAKE_ARGS += -DSNAPPY_BUILD_TESTS=OFF +CMAKE_ARGS += -DSNAPPY_BUILD_BENCHMARKS=OFF + +include ../../mk/spksrc.archs.mk + +ifeq ($(findstring $(ARCH),comcerto2k),$(ARCH)) +CMAKE_ARGS += -DSNAPPY_HAVE_NEON=OFF +endif + +include ../../mk/spksrc.cross-cmake.mk diff --git a/cross/snappy/PLIST b/cross/snappy/PLIST new file mode 100644 index 00000000000..0f6e67311c7 --- /dev/null +++ b/cross/snappy/PLIST @@ -0,0 +1,3 @@ +lnk:lib/libsnappy.so +lnk:lib/libsnappy.so.1 +lib:lib/libsnappy.so.1.2.1 diff --git a/cross/snappy/digests b/cross/snappy/digests new file mode 100644 index 00000000000..3e8f0292cd9 --- /dev/null +++ b/cross/snappy/digests @@ -0,0 +1,3 @@ +snappy-1.2.1.tar.gz SHA1 8271f3a62130ebaa997994a0c5647af8fdc8e46b +snappy-1.2.1.tar.gz SHA256 736aeb64d86566d2236ddffa2865ee5d7a82d26c9016b36218fcc27ea4f09f86 +snappy-1.2.1.tar.gz MD5 dd6f9b667e69491e1dbf7419bdf68823 diff --git a/cross/tvheadend/Makefile b/cross/tvheadend/Makefile index a6665dcb1da..a085e78f012 100644 --- a/cross/tvheadend/Makefile +++ b/cross/tvheadend/Makefile @@ -2,8 +2,8 @@ PKG_NAME = tvheadend PKG_VERS = 4.3 PKG_EXT = tar.gz # no official release since v4.2.8 (Jan 12, 2019) -# git hash from Sept 4th 2024 -PKG_GIT_HASH = 9ac57a0c1a4551012260008cfca6bfc2386f6dcf +# git hash from Sept 28th 2024 +PKG_GIT_HASH = 28de5c092c657ffbbffa422c2ca3c07ba513c567 PKG_DIST_NAME = $(PKG_GIT_HASH).$(PKG_EXT) PKG_DIST_SITE = https://github.com/tvheadend/tvheadend/archive PKG_DIST_FILE = $(PKG_NAME)-git$(PKG_GIT_HASH).$(PKG_EXT) @@ -16,8 +16,10 @@ LICENSE = GPLv3 # PPC archs except qoriq are not supported # https://tvheadend.org/issues/5060 UNSUPPORTED_ARCHS = $(OLD_PPC_ARCHS) +# ffmpeg7 requires c11 support +UNSUPPORTED_ARCHS += $(ARMv5_ARCHS) -OPTIONAL_DEPENDS += cross/ffmpeg6 +OPTIONAL_DEPENDS += cross/ffmpeg7 OPTIONAL_DEPENDS += cross/fdk-aac OPTIONAL_DEPENDS += cross/fontconfig OPTIONAL_DEPENDS += cross/opus @@ -88,7 +90,7 @@ MEDIA_LIBS += x264.pc MEDIA_LIBS += x265.pc else -DEPENDS += cross/ffmpeg6 +DEPENDS += cross/ffmpeg7 DEPENDS += cross/fdk-aac DEPENDS += cross/fontconfig DEPENDS += cross/opus diff --git a/cross/tvheadend/digests b/cross/tvheadend/digests index 40bb8e71cb6..53a86694124 100644 --- a/cross/tvheadend/digests +++ b/cross/tvheadend/digests @@ -1,3 +1,3 @@ -tvheadend-git9ac57a0c1a4551012260008cfca6bfc2386f6dcf.tar.gz SHA1 cb665528384577cec48d1b835ab04deb7a12f186 -tvheadend-git9ac57a0c1a4551012260008cfca6bfc2386f6dcf.tar.gz SHA256 67d56eecbea9cc769bc189709528f90815218fea9e9970141890ad3e4222c1d0 -tvheadend-git9ac57a0c1a4551012260008cfca6bfc2386f6dcf.tar.gz MD5 5b5a8a0b937041c8f212928dab496dba +tvheadend-git28de5c092c657ffbbffa422c2ca3c07ba513c567.tar.gz SHA1 613005d7a090f47195681c1e7832d88ab24157c4 +tvheadend-git28de5c092c657ffbbffa422c2ca3c07ba513c567.tar.gz SHA256 169fad177ad3b81580c6865330e3ccb1b9fa37442d6a79e2fd4ba3415af6fde2 +tvheadend-git28de5c092c657ffbbffa422c2ca3c07ba513c567.tar.gz MD5 ebf507be95fb771e07c5a2ba40120b08 diff --git a/mk/spksrc.videodriver.mk b/mk/spksrc.videodriver.mk index 46f2cfb5c8c..e3650de3ae6 100644 --- a/mk/spksrc.videodriver.mk +++ b/mk/spksrc.videodriver.mk @@ -29,15 +29,35 @@ export ADDITIONAL_CXXFLAGS += -I$(VIDEODRV_STAGING_PREFIX)/include export ADDITIONAL_LDFLAGS += -L$(VIDEODRV_STAGING_PREFIX)/lib export ADDITIONAL_LDFLAGS += -Wl,--rpath-link,$(VIDEODRV_STAGING_PREFIX)/lib -Wl,--rpath,$(VIDEODRV_PREFIX)/lib -# Re-use all default videodrv mandatory libraries -VIDEODRV_LIBS := $(wildcard $(VIDEODRV_STAGING_PREFIX)/lib/pkgconfig/*.pc) -endif +# videodrv library to share with other packages +VIDEODRV_PKGCFG = igc-opencl.pc +VIDEODRV_PKGCFG += igdgmm.pc +VIDEODRV_PKGCFG += igfxcmrt.pc +VIDEODRV_PKGCFG += level-zero.pc +VIDEODRV_PKGCFG += libdrm_amdgpu.pc +VIDEODRV_PKGCFG += libdrm_intel.pc +VIDEODRV_PKGCFG += libdrm.pc +VIDEODRV_PKGCFG += libdrm_radeon.pc +VIDEODRV_PKGCFG += libmfxhw64.pc +VIDEODRV_PKGCFG += libmfx.pc +VIDEODRV_PKGCFG += libva-drm.pc +VIDEODRV_PKGCFG += libva.pc +VIDEODRV_PKGCFG += libze_loader.pc +VIDEODRV_PKGCFG += mfx.pc +VIDEODRV_PKGCFG += ocl-icd.pc +VIDEODRV_PKGCFG += OpenCL.pc +VIDEODRV_PKGCFG += pciaccess.pc +VIDEODRV_PKGCFG += SPIRV-Tools.pc +VIDEODRV_PKGCFG += SPIRV-Tools-shared.pc +VIDEODRV_PKGCFG += vpl.pc -# Re-use all videodrv dependencies and mark as already done -VIDEODRV_DEPENDS := $(foreach cross,$(foreach pkg_name,$(shell $(MAKE) dependency-list -C $(realpath $(VIDEODRV_PACKAGE_ROOT)/../) 2>/dev/null | grep ^$(VIDEODRV_PACKAGE) | cut -f2 -d:),$(shell sed -n 's/^PKG_NAME = \(.*\)/\1/p' $(realpath $(CURDIR)/../../$(pkg_name)/Makefile))),$(wildcard $(VIDEODRV_PACKAGE_ROOT)/.$(cross)-*_done)) +# Re-use a default subset of videodrv mandatory libraries +# This avoids sharing other built-in such as zlib and al +# To share everything: $(wildcard $(VIDEODRV_STAGING_PREFIX)/lib/pkgconfig/*.pc) +VIDEODRV_LIBS := $(wildcard $(patsubst %.pc,$(VIDEODRV_STAGING_PREFIX)/lib/pkgconfig/%.pc, $(VIDEODRV_PKGCFG))) +endif # call-up pre-depend to prepare the shared videodrv build environment - ifeq ($(strip $(PRE_DEPEND_TARGET)),) PRE_DEPEND_TARGET = videodrv_pre_depend endif @@ -54,5 +74,5 @@ videodrv_pre_depend: @$(MSG) "*** PATH: $(VIDEODRV_PACKAGE_ROOT)" @$(MSG) "*****************************************************" @mkdir -p $(STAGING_INSTALL_PREFIX)/lib/pkgconfig/ + $(MSG) VIDEODRV_LIBS: $(VIDEODRV_LIBS) @$(foreach lib,$(VIDEODRV_LIBS),ln -sf $(lib) $(STAGING_INSTALL_PREFIX)/lib/pkgconfig/ ;) - @$(foreach _done,$(VIDEODRV_DEPENDS), ln -sf $(_done) $(WORK_DIR) ;) diff --git a/spk/chromaprint/Makefile b/spk/chromaprint/Makefile index 46b83541ccc..59a494e570b 100644 --- a/spk/chromaprint/Makefile +++ b/spk/chromaprint/Makefile @@ -1,18 +1,21 @@ SPK_NAME = chromaprint SPK_VERS = 1.5.2 -SPK_REV = 21 +SPK_REV = 22 SPK_ICON = src/chromaprint.png -FFMPEG_PACKAGE = ffmpeg6 +FFMPEG_PACKAGE = ffmpeg7 DEPENDS = cross/$(SPK_NAME) SPK_DEPENDS = $(FFMPEG_PACKAGE) +# ffmpeg7 requires c11 support +UNSUPPORTED_ARCHS = $(ARMv5_ARCHS) $(OLD_PPC_ARCHS) + STARTABLE = no MAINTAINER = ymartin59 DESCRIPTION = Chromaprint is the core component of the AcoustID project. It\'s a client-side library that implements a custom algorithm for extracting fingerprints from any audio source. DISPLAY_NAME = Chromaprint -CHANGELOG = "1. Version 1.5.1 + Arch Linux compatibility patches
2. 2. Update to FFMPEG 6.0.1 using new synocli-videodriver package" +CHANGELOG = "1. Version 1.5.1 + Arch Linux compatibility patches
2. Update to FFMPEG 7.0.2" HOMEPAGE = https://acoustid.org/chromaprint LICENSE = LGPL2.1+ diff --git a/spk/ffmpeg5/Makefile b/spk/ffmpeg5/Makefile index 3fb504f9432..b4bf92f0e25 100644 --- a/spk/ffmpeg5/Makefile +++ b/spk/ffmpeg5/Makefile @@ -1,8 +1,8 @@ SPK_NAME = ffmpeg5 -SPK_VERS = 5.1.5 -SPK_REV = 5 +SPK_VERS = 5.1.6 +SPK_REV = 6 SPK_ICON = src/ffmpeg.png -CHANGELOG = "1. Update to version 5.1.5
2. Update Jellyfin upstream patches
3. Update Intel Media Driver 2024Q2 Release (DSM7 only)
4. Enable OpenCL on Intel platforms (DSM7 only)
5. Update to latest version of x264 (fix for \#6176)
6. Now using new synocli-videodriver package" +CHANGELOG = "1. Update to version 5.1.6" DEPENDS = cross/$(SPK_NAME) @@ -20,7 +20,7 @@ POST_STRIP_TARGET = $(SPK_NAME)_extra_install SPK_COMMANDS = bin/ffmpeg SPK_COMMANDS += bin/$(SPK_NAME) -include ../../mk/spksrc.archs.mk +include ../../mk/spksrc.common.mk ifeq ($(call version_ge, $(TCVERSION), 7.0),1) SERVICE_USER = auto @@ -29,7 +29,6 @@ endif ifeq ($(findstring $(ARCH),$(x64_ARCHS)),$(ARCH)) SPK_DEPENDS = "synocli-videodriver" -SERVICE_SETUP = src/service-setup.sh endif include ../../mk/spksrc.videodriver.mk diff --git a/spk/ffmpeg5/src/service-setup.sh b/spk/ffmpeg5/src/service-setup.sh deleted file mode 100644 index b2b5c73cb24..00000000000 --- a/spk/ffmpeg5/src/service-setup.sh +++ /dev/null @@ -1,42 +0,0 @@ -# Package specific behaviors -# Sourced script by generic installer and start-stop-status scripts - -KERNEL_MIN="4.4" -KERNEL_RUNNING=$(uname -r) -STATUS=$(printf '%s\n%s' "${KERNEL_MIN}" "${KERNEL_RUNNING}" | sort -VCr && echo $?) -FFMPEG_VER=$(printf %.1s "$SYNOPKG_PKGVER") -FFMPEG_DIR=/var/packages/ffmpeg${FFMPEG_VER}/target -iHD=${FFMPEG_DIR}/lib/iHD_drv_video.so - -### -### Disable Intel iHD driver on older kernels -### $(uname -r) <= ${KERNEL} -### -disable_iHD () -{ - if [ "${STATUS}" = "0" ]; then - [ -s ${iHD} ] && mv ${iHD} ${iHD}-DISABLED 2>/dev/null - fi -} - -service_postinst () -{ - if [ $SYNOPKG_DSM_VERSION_MAJOR -lt 7 ];then - # setuid for proper vaapi access - chmod u+s ${FFMPEG_DIR}/bin/ffmpeg - chmod u+s ${FFMPEG_DIR}/bin/vainfo - fi - - disable_iHD -} - -service_postupgrade () -{ - if [ $SYNOPKG_DSM_VERSION_MAJOR -lt 7 ];then - # setuid for proper vaapi access - chmod u+s ${FFMPEG_DIR}/bin/ffmpeg - chmod u+s ${FFMPEG_DIR}/bin/vainfo - fi - - disable_iHD -} diff --git a/spk/ffmpeg6/BROKEN b/spk/ffmpeg6/BROKEN new file mode 100644 index 00000000000..0a820f5e575 --- /dev/null +++ b/spk/ffmpeg6/BROKEN @@ -0,0 +1 @@ +Package no-longer maintained - superseeded by ffmpeg7 diff --git a/spk/ffmpeg6/Makefile b/spk/ffmpeg6/Makefile index 13d16c5d04b..23e7b391801 100644 --- a/spk/ffmpeg6/Makefile +++ b/spk/ffmpeg6/Makefile @@ -20,7 +20,7 @@ POST_STRIP_TARGET = $(SPK_NAME)_extra_install SPK_COMMANDS = bin/ffprobe SPK_COMMANDS += bin/$(SPK_NAME) -include ../../mk/spksrc.archs.mk +include ../../mk/spksrc.common.mk ifeq ($(call version_ge, $(TCVERSION), 7.0),1) SERVICE_USER = auto diff --git a/spk/ffmpeg7/Makefile b/spk/ffmpeg7/Makefile new file mode 100644 index 00000000000..8db2dfa9b69 --- /dev/null +++ b/spk/ffmpeg7/Makefile @@ -0,0 +1,41 @@ +SPK_NAME = ffmpeg7 +SPK_VERS = 7.0.2 +SPK_REV = 1 +SPK_ICON = src/ffmpeg.png +CHANGELOG = "1. Initial release of version 7 release" + +DEPENDS = cross/$(SPK_NAME) + +# requires c11 support +UNSUPPORTED_ARCHS = $(ARMv5_ARCHS) $(OLD_PPC_ARCHS) + +MAINTAINER = th0ma7 +DESCRIPTION = FFmpeg is a complete, cross-platform solution to record, convert and stream audio and video. It includes libavcodec - the leading audio/video codec library. More information from SynoCommunity FFmpeg package available at https://github.com/SynoCommunity/spksrc/wiki/FAQ-FFmpeg +DESCRIPTION_FRE = FFmpeg est une solution complète multiplateforme pour enregistrer, convertir et diffuser du contenu audio et vidéo. Il comprend libavcodec - la principale bibliothèque de codecs audio/vidéo. Informations additionnelles concernant le paquet FFmpeg de SynoCommunity disponibles à https://github.com/SynoCommunity/spksrc/wiki/FAQ-FFmpeg +DISPLAY_NAME = FFmpeg $(firstword $(subst ., ,$(SPK_VERS))) +STARTABLE = no + +HOMEPAGE = https://www.ffmpeg.org/ +LICENSE = GPLv3 + +POST_STRIP_TARGET = $(SPK_NAME)_extra_install + +SPK_COMMANDS = bin/ffprobe +SPK_COMMANDS += bin/$(SPK_NAME) + +include ../../mk/spksrc.common.mk + +ifeq ($(call version_ge, $(TCVERSION), 7.0),1) +SERVICE_USER = auto +VIDEODRIVER = on +endif + +ifeq ($(findstring $(ARCH),$(x64_ARCHS)),$(ARCH)) +SPK_DEPENDS = "synocli-videodriver" +endif + +include ../../mk/spksrc.videodriver.mk + +.PHONY: $(SPK_NAME)_extra_install +$(SPK_NAME)_extra_install: + cd $(STAGING_DIR)/bin && ln -sf ffmpeg $(SPK_NAME) diff --git a/spk/ffmpeg7/TODO b/spk/ffmpeg7/TODO new file mode 100644 index 00000000000..48f1802bff6 --- /dev/null +++ b/spk/ffmpeg7/TODO @@ -0,0 +1,34 @@ +To enhance if other requirements: + --enable-frei0r ==> Could benefit from adding cross/cairo + Generates the following extras libs: + lib:lib/frei0r-1/cairoaffineblend.so + lib:lib/frei0r-1/cairoblend.so + lib:lib/frei0r-1/cairogradient.so + lib:lib/frei0r-1/cairoimagegrid.so + +Potential candidates for enablement: + --enable-ladspa + --enable-libbs2b + --enable-libcdio + --enable-libflite + --enable-libgme + --enable-libgsm + --enable-libssh + --enable-libmysofa + --enable-libopenmpt + --enable-librubberband + --enable-libsrt + --enable-libvidstab + --enable-lv2 + --enable-pocketsphinx + --enable-libiec61883 + +Not to be enabled: + --enable-librsvg ==> Requires gobject-introspection + python + many other dependencies increasing spk size + --enable-libjack + --enable-libpulse + --enable-libxvid + --enable-omx + --enable-openal + --enable-opengl + --enable-sdl2 diff --git a/spk/ffmpeg7/src/ffmpeg.png b/spk/ffmpeg7/src/ffmpeg.png new file mode 100644 index 00000000000..de39e6bf300 Binary files /dev/null and b/spk/ffmpeg7/src/ffmpeg.png differ diff --git a/spk/jellyfin/Makefile b/spk/jellyfin/Makefile index be39a07a226..f605e6f712d 100644 --- a/spk/jellyfin/Makefile +++ b/spk/jellyfin/Makefile @@ -18,13 +18,13 @@ MAINTAINER = SynoCommunity DESCRIPTION = "The Free Software Media System. It is an alternative to the proprietary Emby and Plex." DISPLAY_NAME = Jellyfin STARTABLE = yes -CHANGELOG = "Update jellyfin to 10.9.11" +CHANGELOG = "1. Update jellyfin to 10.9.11
2. Migrate to ffmpeg7" HOMEPAGE = https://jellyfin.org HELPURL = https://jellyfin.org/docs/general/server/settings.html SUPPORTURL = https://jellyfin.org/docs/general/getting-help.html LICENSE = GPLv2 LICENSE_FILE = $(WORK_DIR)/$(SPK_NAME)-$(SPK_VERS)/LICENSE -SPK_DEPENDS = "ffmpeg6" +SPK_DEPENDS = "ffmpeg7" # 'auto' reserved value grabs SPK_NAME SERVICE_USER = auto diff --git a/spk/jellyfin/src/service-setup.sh b/spk/jellyfin/src/service-setup.sh index eda0b272f25..067b45ffa8a 100644 --- a/spk/jellyfin/src/service-setup.sh +++ b/spk/jellyfin/src/service-setup.sh @@ -8,7 +8,7 @@ JELLYFIN_ARGS="--service \ -c ${SYNOPKG_PKGVAR}/config \ -l ${SYNOPKG_PKGVAR}/log \ -w ${SYNOPKG_PKGDEST}/web \ - --ffmpeg /var/packages/ffmpeg6/target/bin/ffmpeg" + --ffmpeg /var/packages/ffmpeg7/target/bin/ffmpeg" SERVICE_COMMAND="${SYNOPKG_PKGDEST}/share/jellyfin ${JELLYFIN_ARGS}" diff --git a/spk/synocli-videodriver/Makefile b/spk/synocli-videodriver/Makefile index 6490068a0f8..f81a61265c8 100644 --- a/spk/synocli-videodriver/Makefile +++ b/spk/synocli-videodriver/Makefile @@ -1,11 +1,10 @@ SPK_NAME = synocli-videodriver -SPK_VERS = 1.0 -SPK_REV = 1 +SPK_VERS = 1.2 +SPK_REV = 2 SPK_ICON = src/SynoPackagegreen-videodriver.png -CHANGELOG = "Initial version providing Intel Media Driver 2024Q2 Release" MAINTAINER = th0ma7 -DESCRIPTION = "Provides video driver support for Intel GPU acceleration including OpenCL \(DSM7\)." +DESCRIPTION = "Provides video driver support for Intel GPU acceleration including OpenCL \(DSM7 only\)." DISPLAY_NAME = SynoCli Video Drivers STARTABLE = no @@ -18,8 +17,9 @@ UNSUPPORTED_ARCHS = $(ARM_ARCHS) $(PPC_ARCHS) $(i686_ARCHS) SERVICE_USER = auto VIDEODRIVER = on -SPK_COMMANDS = bin/vainfo -SPK_COMMANDS = bin/clinfo +SPK_COMMANDS = bin/clinfo +SPK_COMMANDS += bin/vainfo +SPK_COMMANDS += bin/lsgpu DEPENDS = cross/libva cross/libva-utils DEPENDS += cross/intel-vaapi-driver @@ -27,11 +27,21 @@ DEPENDS += cross/intel-media-driver cross/intel-mediasdk include ../../mk/spksrc.common.mk +ifeq ($(call version_lt, ${TCVERSION}, 7),1) +CHANGELOG = "1. Intel Media Driver 2022Q4 Release" +else +CHANGELOG = "1. Intel Media Driver 2024Q2 Release" +endif + # Enable OpenCL only on DSM >= 7.1 ifeq ($(call version_ge, ${TCVERSION}, 7.1),1) -CHANGELOG += "Intel Level-Zero 1.17.42, Intel Graphics Compiler \(IGC\) 1.0.17537.20, Intel Compute Runtime 24.31.30508.7" +CHANGELOG += "
2. Intel Level-Zero 1.17.42" DEPENDS += cross/intel-level-zero + +CHANGELOG += "
3. Intel Graphics Compiler \(IGC\) 1.0.17537.20" DEPENDS += cross/intel-graphics-compiler + +CHANGELOG += "
4. Intel Compute Runtime 24.31.30508.7" DEPENDS += cross/intel-compute-runtime DEPENDS += cross/ocl-icd DEPENDS += cross/clinfo @@ -41,8 +51,14 @@ endif # -->> can not use libmfx and libvpl together in ffmpeg # Jellyfin requires QSV provided by libmfx ifeq ($(call version_ge, ${TCVERSION}, 7),1) -CHANGELOG += " and Intel Video Processing Library \(Intel VPL\) 2.13.0" +CHANGELOG += "
5. Intel Video Processing Library \(Intel VPL\) 2.13.0" DEPENDS += cross/intel-libvpl cross/intel-libvpl-tools endif +# Enable intel_gpu_top on DSM >= 7.1 +ifeq ($(call version_ge, ${TCVERSION}, 7.1),1) +CHANGELOG = "
6. Intel GPU Tools \(IGT\) 1.29" +DEPENDS += cross/intel-gpu-tools +endif + include ../../mk/spksrc.spk.mk diff --git a/spk/tvheadend/Makefile b/spk/tvheadend/Makefile index adf5aee1974..5c8f35a0478 100644 --- a/spk/tvheadend/Makefile +++ b/spk/tvheadend/Makefile @@ -1,10 +1,10 @@ SPK_NAME = tvheadend SPK_SHORT_VERS = 4.3 -SPK_GIT_HASH = 9ac57a0 -SPK_GIT_DATE = 20240904 +SPK_GIT_HASH = 28de5c0 +SPK_GIT_DATE = 20240928 SPK_VERS = $(SPK_SHORT_VERS).$(SPK_GIT_DATE) TVH_VERS = $(SPK_SHORT_VERS)~$(SPK_GIT_HASH) -SPK_REV = 38 +SPK_REV = 39 SPK_ICON = src/tvheadend.png DSM_UI_DIR = app @@ -13,9 +13,16 @@ DSM_UI_DIR = app # 000-fix-version.patch from cross/tvheadend export TVH_VERS -FFMPEG_PACKAGE = ffmpeg6 +FFMPEG_PACKAGE = ffmpeg7 SPK_DEPENDS = 'python311>=3.11.4-7:$(FFMPEG_PACKAGE)' +# PPC archs except qoriq are not supported +# https://tvheadend.org/issues/5060 +UNSUPPORTED_ARCHS = $(OLD_PPC_ARCHS) +# ffmpeg7 requires c11 support +UNSUPPORTED_ARCHS += $(ARMv5_ARCHS) + + WHEELS = src/requirements-pure.txt DEPENDS = cross/$(SPK_NAME) @@ -27,14 +34,10 @@ DESCRIPTION = Tvheadend is a TV streaming server and recorder for Linux, FreeBSD DESCRIPTION_FRE = Tvheadend est un serveur de streaming et enregistreur TV pour Linux, FreeBSD et Android prenant en charge DVB-S, DVB-S2, DVB-C, DVB-T, ATSC, ISDB-T, IPTV, SAT IP et HDHomeRun comme sources d\'entrée. Tvheadend offre le streaming HTTP, HTSP et SAT IP. DISPLAY_NAME = Tvheadend STARTABLE = yes -CHANGELOG = "1. Update to latest git version 9ac57a0 as of Sept. 4th 2024
2. Update to FFMPEG 6.0.1 using new synocli-videodriver package" +CHANGELOG = "1. Update to latest git version 55404da as of Sept. 20th 2024
2. Update to FFMPEG 7.0.2" HOMEPAGE = https://tvheadend.org/ LICENSE = GPL v3 -# PPC archs except qoriq are not supported -# https://tvheadend.org/issues/5060 -UNSUPPORTED_ARCHS = $(OLD_PPC_ARCHS) - CONF_DIR = src/conf/ # 'auto' reserved value grabs SPK_NAME