From 855c3c9d3c54e7c75f060ac72018cc3fe382c43b Mon Sep 17 00:00:00 2001 From: Bernhard Stoeckner Date: Thu, 16 Jan 2025 17:34:27 +0100 Subject: [PATCH] 535.230.02 --- README.md | 8 +-- kernel-open/Kbuild | 16 ++++- kernel-open/Makefile | 31 +++++++++- kernel-open/conftest.sh | 32 ++++++++++ kernel-open/nvidia-drm/nvidia-drm-drv.c | 4 ++ kernel-open/nvidia-drm/nvidia-drm.Kbuild | 1 + .../nvidia-modeset/nvidia-modeset.Kbuild | 3 - kernel-open/nvidia-uvm/nvidia-uvm.Kbuild | 1 + kernel-open/nvidia-uvm/uvm.c | 3 + kernel-open/nvidia-uvm/uvm_hmm.c | 20 +++++- kernel-open/nvidia-uvm/uvm_kvmalloc.c | 2 +- kernel-open/nvidia-uvm/uvm_mmu.h | 2 +- kernel-open/nvidia-uvm/uvm_pmm_gpu.c | 2 +- kernel-open/nvidia/nvidia.Kbuild | 3 - src/common/inc/nvBldVer.h | 20 +++--- src/common/inc/nvUnixVersion.h | 2 +- src/common/inc/nvlog_defs.h | 5 +- src/common/nvswitch/kernel/smbpbi_nvswitch.c | 4 +- .../nvidia/inc/ctrl/ctrl0000/ctrl0000nvd.h | 2 +- .../sdk/nvidia/inc/ctrl/ctrl208f/ctrl208fgr.h | 29 +++++++++ .../uproc/os/common/include/liblogdecode.h | 2 +- .../arch/nvalloc/common/inc/inforom/ifrecc.h | 2 +- src/nvidia/arch/nvalloc/common/inc/nvcst.h | 5 +- src/nvidia/arch/nvalloc/common/inc/nvpcie.h | 29 +++++---- src/nvidia/generated/g_all_dcl_pb.c | 14 ++++- src/nvidia/generated/g_all_dcl_pb.h | 8 ++- src/nvidia/generated/g_intr_nvoc.h | 15 ++++- src/nvidia/generated/g_nvdebug_pb.h | 4 +- src/nvidia/generated/g_rs_resource_nvoc.h | 17 +++--- src/nvidia/inc/libraries/nvport/string.h | 6 +- .../inc/libraries/resserv/rs_resource.h | 17 +++--- src/nvidia/src/kernel/diagnostics/journal.c | 27 ++++++++ .../gpu/fifo/kernel_channel_group_api.c | 2 + src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c | 22 +++++-- src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c | 61 +++++++++++++++++++ src/nvidia/src/kernel/gpu/intr/intr.c | 43 ++++++++++++- .../kernel/platform/chipset/chipset_info.c | 11 ++++ .../kernel/platform/chipset/chipset_pcie.c | 20 +++++- src/nvidia/src/kernel/rmapi/alloc_free.c | 41 ++++++++++++- .../libraries/nvport/string/string_generic.c | 29 +++++++-- .../src/libraries/resserv/src/rs_server.c | 1 + version.mk | 2 +- 42 files changed, 473 insertions(+), 95 deletions(-) diff --git a/README.md b/README.md index dbd9e97170..62b9723688 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # NVIDIA Linux Open GPU Kernel Module Source This is the source release of the NVIDIA Linux open GPU kernel modules, -version 535.216.03. +version 535.230.02. ## How to Build @@ -17,7 +17,7 @@ as root: Note that the kernel modules built here must be used with GSP firmware and user-space NVIDIA GPU driver components from a corresponding -535.216.03 driver release. This can be achieved by installing +535.230.02 driver release. This can be achieved by installing the NVIDIA GPU driver from the .run file using the `--no-kernel-modules` option. E.g., @@ -180,7 +180,7 @@ software applications. ## Compatible GPUs The open-gpu-kernel-modules can be used on any Turing or later GPU -(see the table below). However, in the 535.216.03 release, +(see the table below). However, in the 535.230.02 release, GeForce and Workstation support is still considered alpha-quality. To enable use of the open kernel modules on GeForce and Workstation GPUs, @@ -188,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module parameter to 1. For more details, see the NVIDIA GPU driver end user README here: -https://us.download.nvidia.com/XFree86/Linux-x86_64/535.216.03/README/kernel_open.html +https://us.download.nvidia.com/XFree86/Linux-x86_64/535.230.02/README/kernel_open.html In the below table, if three IDs are listed, the first is the PCI Device ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI diff --git a/kernel-open/Kbuild b/kernel-open/Kbuild index e4d1495626..f30933a888 100644 --- a/kernel-open/Kbuild +++ b/kernel-open/Kbuild @@ -57,6 +57,20 @@ ifeq ($(NV_UNDEF_BEHAVIOR_SANITIZER),1) UBSAN_SANITIZE := y endif +# +# Command to create a symbolic link, explicitly resolving the symlink target +# to an absolute path to abstract away the difference between Linux < 6.13, +# where the CWD is the Linux kernel source tree for Kbuild extmod builds, and +# Linux >= 6.13, where the CWD is the external module source tree. +# +# This is used to create the nv*-kernel.o -> nv*-kernel.o_binary symlinks for +# kernel modules which use precompiled binary object files. +# + +quiet_cmd_symlink = SYMLINK $@ + cmd_symlink = ln -sf $(abspath $<) $@ + + $(foreach _module, $(NV_KERNEL_MODULES), \ $(eval include $(src)/$(_module)/$(_module).Kbuild)) @@ -72,7 +86,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc EXTRA_CFLAGS += -I$(src) EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM -EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.216.03\" +EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.230.02\" ifneq ($(SYSSRCHOST1X),) EXTRA_CFLAGS += -I$(SYSSRCHOST1X) diff --git a/kernel-open/Makefile b/kernel-open/Makefile index a88b2f22c0..72672c2a17 100644 --- a/kernel-open/Makefile +++ b/kernel-open/Makefile @@ -52,6 +52,22 @@ else endif endif + # If CC hasn't been set explicitly, check the value of CONFIG_CC_VERSION_TEXT. + # Look for the compiler specified there, and use it by default, if found. + ifeq ($(origin CC),default) + cc_version_text=$(firstword $(shell . $(KERNEL_OUTPUT)/.config; \ + echo "$$CONFIG_CC_VERSION_TEXT")) + + ifneq ($(cc_version_text),) + ifeq ($(shell command -v $(cc_version_text)),) + $(warning WARNING: Unable to locate the compiler $(cc_version_text) \ + from CONFIG_CC_VERSION_TEXT in the kernel configuration.) + else + CC=$(cc_version_text) + endif + endif + endif + CC ?= cc LD ?= ld OBJDUMP ?= objdump @@ -64,6 +80,16 @@ else ) endif + KERNEL_ARCH = $(ARCH) + + ifneq ($(filter $(ARCH),i386 x86_64),) + KERNEL_ARCH = x86 + else + ifeq ($(filter $(ARCH),arm64 powerpc),) + $(error Unsupported architecture $(ARCH)) + endif + endif + NV_KERNEL_MODULES ?= $(wildcard nvidia nvidia-uvm nvidia-vgpu-vfio nvidia-modeset nvidia-drm nvidia-peermem) NV_KERNEL_MODULES := $(filter-out $(NV_EXCLUDE_KERNEL_MODULES), \ $(NV_KERNEL_MODULES)) @@ -103,8 +129,9 @@ else # module symbols on which the Linux kernel's module resolution is dependent # and hence must be used whenever present. - LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds \ - $(KERNEL_SOURCES)/arch/$(ARCH)/kernel/module.lds \ + LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds \ + $(KERNEL_SOURCES)/arch/$(KERNEL_ARCH)/kernel/module.lds \ + $(KERNEL_OUTPUT)/arch/$(KERNEL_ARCH)/module.lds \ $(KERNEL_OUTPUT)/scripts/module.lds NV_MODULE_COMMON_SCRIPTS := $(foreach s, $(wildcard $(LD_SCRIPT)), -T $(s)) diff --git a/kernel-open/conftest.sh b/kernel-open/conftest.sh index 7f0478eaf3..7f1870c32c 100755 --- a/kernel-open/conftest.sh +++ b/kernel-open/conftest.sh @@ -2475,6 +2475,22 @@ compile_test() { fi ;; + file_operations_fop_unsigned_offset_present) + # + # Determine if the FOP_UNSIGNED_OFFSET define is present. + # + # Added by commit 641bb4394f40 ("fs: move FMODE_UNSIGNED_OFFSET to + # fop_flags") in v6.12. + # + CODE=" + #include + int conftest_file_operations_fop_unsigned_offset_present(void) { + return FOP_UNSIGNED_OFFSET; + }" + + compile_check_conftest "$CODE" "NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT" "" "types" + ;; + mm_context_t) # # Determine if the 'mm_context_t' data type is present @@ -6514,6 +6530,22 @@ compile_test() { compile_check_conftest "$CODE" "NV_DRM_UNLOCKED_IOCTL_FLAG_PRESENT" "" "types" ;; + folio_test_swapcache) + # + # Determine if the folio_test_swapcache() function is present. + # + # folio_test_swapcache() was exported by commit d389a4a811551 ("mm: + # Add folio flag manipulation functions") in v5.16. + # + CODE=" + #include + void conftest_folio_test_swapcache(void) { + folio_test_swapcache(); + }" + + compile_check_conftest "$CODE" "NV_FOLIO_TEST_SWAPCACHE_PRESENT" "" "functions" + ;; + # When adding a new conftest entry, please use the correct format for # specifying the relevant upstream Linux kernel commit. # diff --git a/kernel-open/nvidia-drm/nvidia-drm-drv.c b/kernel-open/nvidia-drm/nvidia-drm-drv.c index 7780c2facb..e5c7d9b46e 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-drv.c +++ b/kernel-open/nvidia-drm/nvidia-drm-drv.c @@ -1285,6 +1285,10 @@ static const struct file_operations nv_drm_fops = { .read = drm_read, .llseek = noop_llseek, + +#if defined(NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT) + .fop_flags = FOP_UNSIGNED_OFFSET, +#endif }; static const struct drm_ioctl_desc nv_drm_ioctls[] = { diff --git a/kernel-open/nvidia-drm/nvidia-drm.Kbuild b/kernel-open/nvidia-drm/nvidia-drm.Kbuild index 894f1f9d22..9059223066 100644 --- a/kernel-open/nvidia-drm/nvidia-drm.Kbuild +++ b/kernel-open/nvidia-drm/nvidia-drm.Kbuild @@ -135,3 +135,4 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed +NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present diff --git a/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild b/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild index ebf3f048b2..4e328bae18 100644 --- a/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild +++ b/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild @@ -40,9 +40,6 @@ NV_KERNEL_MODULE_TARGETS += $(NVIDIA_MODESET_KO) NVIDIA_MODESET_BINARY_OBJECT := $(src)/nvidia-modeset/nv-modeset-kernel.o_binary NVIDIA_MODESET_BINARY_OBJECT_O := nvidia-modeset/nv-modeset-kernel.o -quiet_cmd_symlink = SYMLINK $@ -cmd_symlink = ln -sf $< $@ - targets += $(NVIDIA_MODESET_BINARY_OBJECT_O) $(obj)/$(NVIDIA_MODESET_BINARY_OBJECT_O): $(NVIDIA_MODESET_BINARY_OBJECT) FORCE diff --git a/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild b/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild index a38de2166e..a847e9eb59 100644 --- a/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild +++ b/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild @@ -86,6 +86,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmgrab NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_fault_to_errno +NV_CONFTEST_FUNCTION_COMPILE_TESTS += folio_test_swapcache NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t diff --git a/kernel-open/nvidia-uvm/uvm.c b/kernel-open/nvidia-uvm/uvm.c index 6f95f17cb4..e1974cf59c 100644 --- a/kernel-open/nvidia-uvm/uvm.c +++ b/kernel-open/nvidia-uvm/uvm.c @@ -682,6 +682,9 @@ static void uvm_vm_open_semaphore_pool(struct vm_area_struct *vma) // Semaphore pool vmas do not have vma wrappers, but some functions will // assume vm_private_data is a wrapper. vma->vm_private_data = NULL; +#if defined(VM_WIPEONFORK) + nv_vm_flags_set(vma, VM_WIPEONFORK); +#endif if (is_fork) { // If we forked, leave the parent vma alone. diff --git a/kernel-open/nvidia-uvm/uvm_hmm.c b/kernel-open/nvidia-uvm/uvm_hmm.c index 0d82314a46..5ae313823c 100644 --- a/kernel-open/nvidia-uvm/uvm_hmm.c +++ b/kernel-open/nvidia-uvm/uvm_hmm.c @@ -71,6 +71,24 @@ module_param(uvm_disable_hmm, bool, 0444); #include "uvm_va_policy.h" #include "uvm_tools.h" +// The function nv_PageSwapCache() wraps the check for page swap cache flag in +// order to support a wide variety of kernel versions. +// The function PageSwapCache() is removed after 32f51ead3d77 ("mm: remove +// PageSwapCache") in v6.12-rc1. +// The function folio_test_swapcache() was added in Linux 5.16 (d389a4a811551 +// "mm: Add folio flag manipulation functions") +// Systems with HMM patches backported to 5.14 are possible, but those systems +// do not include folio_test_swapcache() +// TODO: Bug 4050579: Remove this when migration of swap cached pages is updated +static __always_inline bool nv_PageSwapCache(struct page *page) +{ +#if defined(NV_FOLIO_TEST_SWAPCACHE_PRESENT) + return folio_test_swapcache(page_folio(page)); +#else + return PageSwapCache(page); +#endif +} + static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block, uvm_page_index_t page_index, struct page *page); @@ -2554,7 +2572,7 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block, continue; } - if (PageSwapCache(src_page)) { + if (nv_PageSwapCache(src_page)) { // TODO: Bug 4050579: Remove this when swap cached pages can be // migrated. if (service_context) { diff --git a/kernel-open/nvidia-uvm/uvm_kvmalloc.c b/kernel-open/nvidia-uvm/uvm_kvmalloc.c index 69e0b30b98..2285a479b3 100644 --- a/kernel-open/nvidia-uvm/uvm_kvmalloc.c +++ b/kernel-open/nvidia-uvm/uvm_kvmalloc.c @@ -36,7 +36,7 @@ typedef struct { size_t alloc_size; - uint8_t ptr[0]; + uint8_t ptr[]; } uvm_vmalloc_hdr_t; typedef struct diff --git a/kernel-open/nvidia-uvm/uvm_mmu.h b/kernel-open/nvidia-uvm/uvm_mmu.h index 4f53e3ffb7..978b18339d 100644 --- a/kernel-open/nvidia-uvm/uvm_mmu.h +++ b/kernel-open/nvidia-uvm/uvm_mmu.h @@ -162,7 +162,7 @@ struct uvm_page_directory_struct // pointers to child directories on the host. // this array is variable length, so it needs to be last to allow it to // take up extra space - uvm_page_directory_t *entries[0]; + uvm_page_directory_t *entries[]; }; enum diff --git a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c index 4e573185d5..b35bd527c8 100644 --- a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c +++ b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c @@ -221,7 +221,7 @@ struct uvm_pmm_gpu_chunk_suballoc_struct // Array of all child subchunks // TODO: Bug 1765461: Can the array be inlined? It could save the parent // pointer. - uvm_gpu_chunk_t *subchunks[0]; + uvm_gpu_chunk_t *subchunks[]; }; typedef enum diff --git a/kernel-open/nvidia/nvidia.Kbuild b/kernel-open/nvidia/nvidia.Kbuild index 789e0e4b5a..f2eabd4463 100644 --- a/kernel-open/nvidia/nvidia.Kbuild +++ b/kernel-open/nvidia/nvidia.Kbuild @@ -40,9 +40,6 @@ NVIDIA_KO = nvidia/nvidia.ko NVIDIA_BINARY_OBJECT := $(src)/nvidia/nv-kernel.o_binary NVIDIA_BINARY_OBJECT_O := nvidia/nv-kernel.o -quiet_cmd_symlink = SYMLINK $@ - cmd_symlink = ln -sf $< $@ - targets += $(NVIDIA_BINARY_OBJECT_O) $(obj)/$(NVIDIA_BINARY_OBJECT_O): $(NVIDIA_BINARY_OBJECT) FORCE diff --git a/src/common/inc/nvBldVer.h b/src/common/inc/nvBldVer.h index 1e245f2353..6976f9be32 100644 --- a/src/common/inc/nvBldVer.h +++ b/src/common/inc/nvBldVer.h @@ -36,25 +36,25 @@ // and then checked back in. You cannot make changes to these sections without // corresponding changes to the buildmeister script #ifndef NV_BUILD_BRANCH - #define NV_BUILD_BRANCH r538_95 + #define NV_BUILD_BRANCH r539_11 #endif #ifndef NV_PUBLIC_BRANCH - #define NV_PUBLIC_BRANCH r538_95 + #define NV_PUBLIC_BRANCH r539_11 #endif #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) -#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r538_95-688" -#define NV_BUILD_CHANGELIST_NUM (35042711) +#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r539_11-770" +#define NV_BUILD_CHANGELIST_NUM (35309837) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "rel/gpu_drv/r535/r538_95-688" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35042711) +#define NV_BUILD_NAME "rel/gpu_drv/r535/r539_11-770" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35309837) #else /* Windows builds */ -#define NV_BUILD_BRANCH_VERSION "r538_95-1" -#define NV_BUILD_CHANGELIST_NUM (34853858) +#define NV_BUILD_BRANCH_VERSION "r539_11-2" +#define NV_BUILD_CHANGELIST_NUM (35309837) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "538.96" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (34853858) +#define NV_BUILD_NAME "539.14" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35309837) #define NV_BUILD_BRANCH_BASE_VERSION R535 #endif // End buildmeister python edited section diff --git a/src/common/inc/nvUnixVersion.h b/src/common/inc/nvUnixVersion.h index 8c1ee77801..33349946d4 100644 --- a/src/common/inc/nvUnixVersion.h +++ b/src/common/inc/nvUnixVersion.h @@ -4,7 +4,7 @@ #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \ (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1) -#define NV_VERSION_STRING "535.216.03" +#define NV_VERSION_STRING "535.230.02" #else diff --git a/src/common/inc/nvlog_defs.h b/src/common/inc/nvlog_defs.h index 86da458ea3..b58d067039 100644 --- a/src/common/inc/nvlog_defs.h +++ b/src/common/inc/nvlog_defs.h @@ -102,10 +102,11 @@ struct _NVLOG_BUFFER #define NVLOG_MAX_BUFFERS_v11 16 #define NVLOG_MAX_BUFFERS_v12 256 +#define NVLOG_MAX_BUFFERS_v13 3840 #if NVOS_IS_UNIX -#define NVLOG_MAX_BUFFERS NVLOG_MAX_BUFFERS_v12 -#define NVLOG_LOGGER_VERSION 12 // v1.2 +#define NVLOG_MAX_BUFFERS NVLOG_MAX_BUFFERS_v13 +#define NVLOG_LOGGER_VERSION 13 // v1.3 #else #define NVLOG_MAX_BUFFERS NVLOG_MAX_BUFFERS_v11 #define NVLOG_LOGGER_VERSION 11 // v1.1 diff --git a/src/common/nvswitch/kernel/smbpbi_nvswitch.c b/src/common/nvswitch/kernel/smbpbi_nvswitch.c index 8cefb389df..801d17d4b7 100644 --- a/src/common/nvswitch/kernel/smbpbi_nvswitch.c +++ b/src/common/nvswitch/kernel/smbpbi_nvswitch.c @@ -90,10 +90,10 @@ nvswitch_smbpbi_post_init if (status == NVL_SUCCESS) { -#if defined(DEBUG) || defined(DEVELOP) || defined(NV_MODS) +#if defined(DEBUG) || defined(DEVELOP) nvswitch_lib_smbpbi_log_sxid(device, NVSWITCH_ERR_NO_ERROR, "NVSWITCH SMBPBI server is online."); -#endif // defined(DEBUG) || defined(DEVELOP) || defined(NV_MODS) +#endif // defined(DEBUG) || defined(DEVELOP) NVSWITCH_PRINT(device, INFO, "%s: SMBPBI POST INIT completed\n", __FUNCTION__); } diff --git a/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000nvd.h b/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000nvd.h index a52be2a374..27e8a7dae6 100644 --- a/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000nvd.h +++ b/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000nvd.h @@ -256,7 +256,7 @@ typedef struct NV0000_CTRL_NVD_GET_TIMESTAMP_PARAMS { #define NV0000_CTRL_NVD_SIGNATURE_SIZE (4) /* Maximum number of buffers */ -#define NV0000_CTRL_NVD_MAX_BUFFERS (256) +#define NV0000_CTRL_NVD_MAX_BUFFERS (3840) #define NV0000_CTRL_NVD_GET_NVLOG_INFO_PARAMS_MESSAGE_ID (0x4U) diff --git a/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208fgr.h b/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208fgr.h index 524f2c454b..90846151ed 100644 --- a/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208fgr.h +++ b/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208fgr.h @@ -106,4 +106,33 @@ typedef struct NV208F_CTRL_GR_ECC_INJECTION_SUPPORTED_PARAMS { NV_DECLARE_ALIGNED(NV2080_CTRL_GR_ROUTE_INFO grRouteInfo, 8); } NV208F_CTRL_GR_ECC_INJECTION_SUPPORTED_PARAMS; +/* + * NV208F_CTRL_CMD_GR_ECC_SET_TRANSIENT_CLEARING_POLICY + * + * Control command to determine whether or not the actions to clear potential transient + * errors in the SM should be taken + * + * Parameters: + * + * policy + * NV208F_CTRL_GR_ECC_TRANSIENT_CLEARING_DISABLED + * Don't attempt to clear a transient error in the SM + * NV208F_CTRL_GR_ECC_TRANSIENT_CLEARING_ENABLED + * Attempt to clear a transient error in the SM + * + * Possible status values returned are: + * NV_OK + * NV_ERR_INVALID_ARGUMENT + */ +#define NV208F_CTRL_GR_ECC_TRANSIENT_CLEARING_DISABLED (0x00000000) +#define NV208F_CTRL_GR_ECC_TRANSIENT_CLEARING_ENABLED (0x00000001) + +#define NV208F_CTRL_CMD_GR_ECC_SET_TRANSIENT_CLEARING_POLICY (0x208f1205) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_DIAG_GR_INTERFACE_ID << 8) | NV208F_CTRL_GR_ECC_SET_TRANSIENT_CLEARING_POLICY_PARAMS_MESSAGE_ID" */ + +#define NV208F_CTRL_GR_ECC_SET_TRANSIENT_CLEARING_POLICY_PARAMS_MESSAGE_ID (0x5U) + +typedef struct NV208F_CTRL_GR_ECC_SET_TRANSIENT_CLEARING_POLICY_PARAMS { + NvU32 policy; +} NV208F_CTRL_GR_ECC_SET_TRANSIENT_CLEARING_POLICY_PARAMS; + /* _ctrl208fgr_h_ */ diff --git a/src/common/uproc/os/common/include/liblogdecode.h b/src/common/uproc/os/common/include/liblogdecode.h index 32e9f86e1a..cf29093176 100644 --- a/src/common/uproc/os/common/include/liblogdecode.h +++ b/src/common/uproc/os/common/include/liblogdecode.h @@ -42,7 +42,7 @@ extern "C" { # define LIBOS_LOG_DECODE_ENABLE 1 # define LIBOS_LOG_TO_NVLOG 0 -# define LIBOS_LOG_MAX_LOGS 160 // Max logs for all GPUs for offline decoder +# define LIBOS_LOG_MAX_LOGS 3840 // Max logs for all GPUs for offline decoder #endif // NVRM diff --git a/src/nvidia/arch/nvalloc/common/inc/inforom/ifrecc.h b/src/nvidia/arch/nvalloc/common/inc/inforom/ifrecc.h index 337537d794..646b8a62e7 100644 --- a/src/nvidia/arch/nvalloc/common/inc/inforom/ifrecc.h +++ b/src/nvidia/arch/nvalloc/common/inc/inforom/ifrecc.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2017-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2017-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a diff --git a/src/nvidia/arch/nvalloc/common/inc/nvcst.h b/src/nvidia/arch/nvalloc/common/inc/nvcst.h index 684eda3d0f..060a663178 100644 --- a/src/nvidia/arch/nvalloc/common/inc/nvcst.h +++ b/src/nvidia/arch/nvalloc/common/inc/nvcst.h @@ -65,6 +65,7 @@ CHIPSET_SETUP_FUNC(Intel_0685_setupFunc) CHIPSET_SETUP_FUNC(Intel_4381_setupFunc) CHIPSET_SETUP_FUNC(Intel_7A82_setupFunc) CHIPSET_SETUP_FUNC(Intel_7A04_setupFunc) +CHIPSET_SETUP_FUNC(Intel_1B81_setupFunc) CHIPSET_SETUP_FUNC(SiS_656_setupFunc) CHIPSET_SETUP_FUNC(ATI_RS400_setupFunc) CHIPSET_SETUP_FUNC(ATI_RS480_setupFunc) @@ -186,8 +187,8 @@ CSINFO chipsetInfo[] = {PCI_VENDOR_ID_INTEL, 0x4385, CS_INTEL_4381, "Intel-RocketLake", Intel_4381_setupFunc}, {PCI_VENDOR_ID_INTEL, 0x7A82, CS_INTEL_7A82, "Intel-AlderLake", Intel_7A82_setupFunc}, {PCI_VENDOR_ID_INTEL, 0x7A84, CS_INTEL_7A82, "Intel-AlderLake", Intel_7A82_setupFunc}, - {PCI_VENDOR_ID_INTEL, 0x1B81, CS_INTEL_1B81, "Intel-SapphireRapids", NULL}, - {PCI_VENDOR_ID_INTEL, 0x7A8A, CS_INTEL_1B81, "Intel-SapphireRapids", NULL}, + {PCI_VENDOR_ID_INTEL, 0x1B81, CS_INTEL_1B81, "Intel-SapphireRapids", Intel_1B81_setupFunc}, + {PCI_VENDOR_ID_INTEL, 0x7A8A, CS_INTEL_1B81, "Intel-SapphireRapids", Intel_1B81_setupFunc}, {PCI_VENDOR_ID_INTEL, 0x18DC, CS_INTEL_18DC, "Intel-IceLake", NULL}, {PCI_VENDOR_ID_INTEL, 0x7A04, CS_INTEL_7A04, "Intel-RaptorLake", Intel_7A04_setupFunc}, {PCI_VENDOR_ID_INTEL, 0x5795, CS_INTEL_5795, "Intel-GraniteRapids", NULL}, diff --git a/src/nvidia/arch/nvalloc/common/inc/nvpcie.h b/src/nvidia/arch/nvalloc/common/inc/nvpcie.h index c5c8217b94..f76f23b1d2 100644 --- a/src/nvidia/arch/nvalloc/common/inc/nvpcie.h +++ b/src/nvidia/arch/nvalloc/common/inc/nvpcie.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2000-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2000-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -211,18 +211,21 @@ // to any specific hardware. // // -#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0 0x000000C8 -#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_ID 7:0 -#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_NEXT 15:8 -#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_LENGTH 23:16 -#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_SIG_LO 31:24 -#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1 0x000000CC -#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_SIG_HI 15:0 -#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_VERSION 18:16 -#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_PEER_CLIQUE_ID 22:19 -#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RSVD 31:23 - -#define NV_PCI_VIRTUAL_P2P_APPROVAL_SIGNATURE 0x00503250 +#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0 0x000000C8 +#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_ID 7:0 +#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_NEXT 15:8 +#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_LENGTH 23:16 +#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_SIG_LO 31:24 +#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1 0x000000CC +#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_SIG_HI 15:0 +#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_VERSION 18:16 +#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_PEER_CLIQUE_ID 22:19 +#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RELAXED_ORDERING 23:23 +#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RELAXED_ORDERING_DEFAULT 0x00000000 +#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RELAXED_ORDERING_DISABLE 0x00000001 +#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RSVD 31:24 + +#define NV_PCI_VIRTUAL_P2P_APPROVAL_SIGNATURE 0x00503250 // Chipset-specific definitions. // Intel SantaRosa definitions diff --git a/src/nvidia/generated/g_all_dcl_pb.c b/src/nvidia/generated/g_all_dcl_pb.c index 5726572df4..eb0d9f947c 100644 --- a/src/nvidia/generated/g_all_dcl_pb.c +++ b/src/nvidia/generated/g_all_dcl_pb.c @@ -122,6 +122,18 @@ const PRB_FIELD_DESC prb_fields_dcl_dclmsg[] = { PRB_MAYBE_FIELD_NAME("engine") PRB_MAYBE_FIELD_DEFAULT(0) }, + { + 331, + { + PRB_OPTIONAL, + PRB_MESSAGE, + 0, + }, + RC_RCDIAGRECORD, + 0, + PRB_MAYBE_FIELD_NAME("rc_diag_recs") + PRB_MAYBE_FIELD_DEFAULT(0) + }, }; // 'ErrorBlock' field defaults @@ -150,7 +162,7 @@ const PRB_MSG_DESC prb_messages_dcl[] = { PRB_MAYBE_MESSAGE_NAME("Dcl.Engines") }, { - 7, + 8, prb_fields_dcl_dclmsg, PRB_MAYBE_MESSAGE_NAME("Dcl.DclMsg") }, diff --git a/src/nvidia/generated/g_all_dcl_pb.h b/src/nvidia/generated/g_all_dcl_pb.h index 8fd5b053a7..93efc31113 100644 --- a/src/nvidia/generated/g_all_dcl_pb.h +++ b/src/nvidia/generated/g_all_dcl_pb.h @@ -18,8 +18,8 @@ extern const PRB_MSG_DESC prb_messages_dcl[]; // Message maximum lengths // Does not include repeated fields, strings and byte arrays. #define DCL_ENGINES_LEN 130 -#define DCL_DCLMSG_LEN 567 -#define DCL_ERRORBLOCK_LEN 571 +#define DCL_DCLMSG_LEN 610 +#define DCL_ERRORBLOCK_LEN 614 extern const PRB_FIELD_DESC prb_fields_dcl_engines[]; @@ -41,6 +41,7 @@ extern const PRB_FIELD_DESC prb_fields_dcl_dclmsg[]; #define DCL_DCLMSG_JOURNAL_BUGCHECK (&prb_fields_dcl_dclmsg[4]) #define DCL_DCLMSG_RCCOUNTER (&prb_fields_dcl_dclmsg[5]) #define DCL_DCLMSG_ENGINE (&prb_fields_dcl_dclmsg[6]) +#define DCL_DCLMSG_RC_DIAG_RECS (&prb_fields_dcl_dclmsg[7]) // 'DclMsg' field lengths #define DCL_DCLMSG_COMMON_LEN 42 @@ -50,6 +51,7 @@ extern const PRB_FIELD_DESC prb_fields_dcl_dclmsg[]; #define DCL_DCLMSG_JOURNAL_BUGCHECK_LEN 69 #define DCL_DCLMSG_RCCOUNTER_LEN 64 #define DCL_DCLMSG_ENGINE_LEN 133 +#define DCL_DCLMSG_RC_DIAG_RECS_LEN 42 extern const PRB_FIELD_DESC prb_fields_dcl_errorblock[]; @@ -57,7 +59,7 @@ extern const PRB_FIELD_DESC prb_fields_dcl_errorblock[]; #define DCL_ERRORBLOCK_DATA (&prb_fields_dcl_errorblock[0]) // 'ErrorBlock' field lengths -#define DCL_ERRORBLOCK_DATA_LEN 570 +#define DCL_ERRORBLOCK_DATA_LEN 613 extern const PRB_SERVICE_DESC prb_services_dcl[]; diff --git a/src/nvidia/generated/g_intr_nvoc.h b/src/nvidia/generated/g_intr_nvoc.h index e4b6095136..6c49f4342b 100644 --- a/src/nvidia/generated/g_intr_nvoc.h +++ b/src/nvidia/generated/g_intr_nvoc.h @@ -7,7 +7,7 @@ extern "C" { #endif /* - * SPDX-FileCopyrightText: Copyright (c) 2006-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2006-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -96,6 +96,11 @@ MAKE_VECTOR(InterruptTable, INTR_TABLE_ENTRY); // Default value for intrStuckThreshold #define INTR_STUCK_THRESHOLD 1000 +// Minimum length of interrupt to log as long-running +#define LONG_INTR_LOG_LENGTH_NS (1000000LLU) // 1ms +// Maximum frequency of long-running interrupt print, per engine +#define LONG_INTR_LOG_RATELIMIT_NS (10000000000LLU) // 10s + #define INTR_TABLE_INIT_KERNEL (1 << 0) #define INTR_TABLE_INIT_PHYSICAL (1 << 1) @@ -194,6 +199,13 @@ typedef struct Device Device; #else #define PRIVATE_FIELD(x) NVOC_PRIVATE_FIELD(x) #endif +struct __nvoc_inner_struc_Intr_1__ { + NvU32 intrCount; + NvU64 intrLength; + NvU64 lastPrintTime; +}; + + struct Intr { const struct NVOC_RTTI *__nvoc_rtti; struct OBJENGSTATE __nvoc_base_OBJENGSTATE; @@ -262,6 +274,7 @@ struct Intr { NvU32 intrEn0Orig; NvBool halIntrEnabled; NvU32 saveIntrEn0; + struct __nvoc_inner_struc_Intr_1__ longIntrStats[167]; }; #ifndef __NVOC_CLASS_Intr_TYPEDEF__ diff --git a/src/nvidia/generated/g_nvdebug_pb.h b/src/nvidia/generated/g_nvdebug_pb.h index 16c11f1c11..c559aaefd1 100644 --- a/src/nvidia/generated/g_nvdebug_pb.h +++ b/src/nvidia/generated/g_nvdebug_pb.h @@ -40,7 +40,7 @@ extern const PRB_MSG_DESC prb_messages_nvdebug[]; // Does not include repeated fields, strings and byte arrays. #define NVDEBUG_SYSTEMINFO_LEN 275 #define NVDEBUG_GPUINFO_LEN 164 -#define NVDEBUG_NVDUMP_LEN 1308 +#define NVDEBUG_NVDUMP_LEN 1351 #define NVDEBUG_SYSTEMINFO_NORTHBRIDGEINFO_LEN 12 #define NVDEBUG_SYSTEMINFO_SOCINFO_LEN 12 #define NVDEBUG_SYSTEMINFO_CPUINFO_LEN 24 @@ -104,7 +104,7 @@ extern const PRB_FIELD_DESC prb_fields_nvdebug_nvdump[]; // 'NvDump' field lengths #define NVDEBUG_NVDUMP_SYSTEM_INFO_LEN 278 -#define NVDEBUG_NVDUMP_DCL_MSG_LEN 570 +#define NVDEBUG_NVDUMP_DCL_MSG_LEN 613 #define NVDEBUG_NVDUMP_GPU_INFO_LEN 167 #define NVDEBUG_NVDUMP_EXCEPTION_ADDRESS_LEN 10 #define NVDEBUG_NVDUMP_SYSTEM_INFO_GSPRM_LEN 278 diff --git a/src/nvidia/generated/g_rs_resource_nvoc.h b/src/nvidia/generated/g_rs_resource_nvoc.h index ff1abe6e02..df878dce94 100644 --- a/src/nvidia/generated/g_rs_resource_nvoc.h +++ b/src/nvidia/generated/g_rs_resource_nvoc.h @@ -81,15 +81,16 @@ typedef struct RsSession RsSession; */ struct RS_LOCK_INFO { - struct RsClient *pClient; ///< Pointer to client that was locked (if any) - struct RsClient *pSecondClient; ///< Pointer to second client, for dual-client locking - RsResourceRef *pContextRef; ///< User-defined reference - struct RsSession *pSession; ///< Session object to be locked, if any - NvU32 flags; ///< RS_LOCK_FLAGS_* - NvU32 state; ///< RS_LOCK_STATE_* + struct RsClient *pClient; ///< Pointer to client that was locked (if any) + struct RsClient *pSecondClient; ///< Pointer to second client, for dual-client locking + RsResourceRef *pContextRef; ///< User-defined reference + RsResourceRef *pResRefToBackRef; ///< Resource from which to infer indirect GPU dependencies + struct RsSession *pSession; ///< Session object to be locked, if any + NvU32 flags; ///< RS_LOCK_FLAGS_* + NvU32 state; ///< RS_LOCK_STATE_* NvU32 gpuMask; - NvU8 traceOp; ///< RS_LOCK_TRACE_* operation for lock-metering - NvU32 traceClassId; ///< Class of initial resource that was locked for lock metering + NvU8 traceOp; ///< RS_LOCK_TRACE_* operation for lock-metering + NvU32 traceClassId; ///< Class of initial resource that was locked for lock metering }; struct RS_RES_ALLOC_PARAMS_INTERNAL diff --git a/src/nvidia/inc/libraries/nvport/string.h b/src/nvidia/inc/libraries/nvport/string.h index 82da75ceb1..402646c624 100644 --- a/src/nvidia/inc/libraries/nvport/string.h +++ b/src/nvidia/inc/libraries/nvport/string.h @@ -49,10 +49,8 @@ /** * @brief Compare two strings, character by character. * - * Will only compare lengthBytes bytes. Strings are assumed to be at least that - * long. - * - * Strings are allowed to overlap, but in . + * Will compare the first 'length' chars of each string, or until + * the nul-terminator is reached in either string, whichever comes first. * * @returns: * - 0 if all bytes are equal diff --git a/src/nvidia/inc/libraries/resserv/rs_resource.h b/src/nvidia/inc/libraries/resserv/rs_resource.h index 2be5031505..516f4a5db2 100644 --- a/src/nvidia/inc/libraries/resserv/rs_resource.h +++ b/src/nvidia/inc/libraries/resserv/rs_resource.h @@ -62,15 +62,16 @@ class RsSession; */ struct RS_LOCK_INFO { - RsClient *pClient; ///< Pointer to client that was locked (if any) - RsClient *pSecondClient; ///< Pointer to second client, for dual-client locking - RsResourceRef *pContextRef; ///< User-defined reference - RsSession *pSession; ///< Session object to be locked, if any - NvU32 flags; ///< RS_LOCK_FLAGS_* - NvU32 state; ///< RS_LOCK_STATE_* + RsClient *pClient; ///< Pointer to client that was locked (if any) + RsClient *pSecondClient; ///< Pointer to second client, for dual-client locking + RsResourceRef *pContextRef; ///< User-defined reference + RsResourceRef *pResRefToBackRef; ///< Resource from which to infer indirect GPU dependencies + RsSession *pSession; ///< Session object to be locked, if any + NvU32 flags; ///< RS_LOCK_FLAGS_* + NvU32 state; ///< RS_LOCK_STATE_* NvU32 gpuMask; - NvU8 traceOp; ///< RS_LOCK_TRACE_* operation for lock-metering - NvU32 traceClassId; ///< Class of initial resource that was locked for lock metering + NvU8 traceOp; ///< RS_LOCK_TRACE_* operation for lock-metering + NvU32 traceClassId; ///< Class of initial resource that was locked for lock metering }; struct RS_RES_ALLOC_PARAMS_INTERNAL diff --git a/src/nvidia/src/kernel/diagnostics/journal.c b/src/nvidia/src/kernel/diagnostics/journal.c index aec16fdef7..ef63247007 100644 --- a/src/nvidia/src/kernel/diagnostics/journal.c +++ b/src/nvidia/src/kernel/diagnostics/journal.c @@ -1817,6 +1817,33 @@ _rcdbDumpDclMsgRecord( } break; } + case RmRcDiagReport: + { + RmRcDiag_RECORD* pRecord = (RmRcDiag_RECORD*) &pDclRecord[1]; + OBJGPU *pGpu = gpumgrGetGpuFromId(pDclRecord->GPUTag); + + // open an RC Diagnostic record in the Proto Bufffer + NV_CHECK_OK(nvStatus, LEVEL_ERROR, + prbEncNestedStart(pPrbEnc, DCL_DCLMSG_RC_DIAG_RECS)); + if (nvStatus == NV_OK) + { + prbEncAddUInt32(pPrbEnc, RC_RCDIAGRECORD_RECORD_ID, pRecord->idx); + prbEncAddUInt32(pPrbEnc, RC_RCDIAGRECORD_RECORD_TYPE, pRecord->type); + if (NULL != pGpu) + { + NvU32 i; + for (i = 0; i < pRecord->count; ++i) + { + if (NV0000_CTRL_CMD_NVD_RCERR_RPT_REG_MAX_PSEDO_REG < pRecord->data[i].tag) + { + prbEncGpuRegImm(pGpu, pRecord->data[i].offset, pRecord->data[i].value, pPrbEnc, RC_RCDIAGRECORD_REGS); + } + } + } + NV_CHECK_OK(nvStatus, LEVEL_ERROR, prbEncNestedEnd(pPrbEnc)); + } + break; + } case RmPrbErrorInfo_V2: case RmPrbFullDump_V2: { diff --git a/src/nvidia/src/kernel/gpu/fifo/kernel_channel_group_api.c b/src/nvidia/src/kernel/gpu/fifo/kernel_channel_group_api.c index a78a782870..18f9240277 100644 --- a/src/nvidia/src/kernel/gpu/fifo/kernel_channel_group_api.c +++ b/src/nvidia/src/kernel/gpu/fifo/kernel_channel_group_api.c @@ -232,7 +232,9 @@ kchangrpapiConstruct_IMPL // vGpu plugin context flag should only be set on host if context is plugin if (gpuIsSriovEnabled(pGpu)) + { pKernelChannelGroup->bIsCallingContextVgpuPlugin = pAllocParams->bIsCallingContextVgpuPlugin; + } if (pKernelChannelGroup->bIsCallingContextVgpuPlugin) gfid = GPU_GFID_PF; diff --git a/src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c b/src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c index 74974f677b..cdf4ad35bd 100644 --- a/src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c +++ b/src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c @@ -930,13 +930,26 @@ kfifoChidMgrReleaseChid_IMPL if (IS_GFID_VF(gfid)) { - NV_ASSERT_OR_RETURN(pChidMgr->ppVirtualChIDHeap[gfid] != NULL, NV_ERR_INVALID_STATE); - NV_ASSERT_OK(pChidMgr->ppVirtualChIDHeap[gfid]->eheapFree(pChidMgr->ppVirtualChIDHeap[gfid], ChID)); + // + // ppVirtualChIDHeap is freed during hostvgpudeviceapiDestruct in GSP-RM. + // In the case of a GSP-Plugin crash after running the VF doorbell fuzzer, only the hostvgpudeviceapi object is freed in GSP-RM. + // Other resources are cleaned up when shutting down the VM. + // + if (pChidMgr->ppVirtualChIDHeap[gfid] != NULL) + { + NV_ASSERT_OK(pChidMgr->ppVirtualChIDHeap[gfid]->eheapFree(pChidMgr->ppVirtualChIDHeap[gfid], ChID)); + } } else { - NV_ASSERT_OR_RETURN(pChidMgr->pGlobalChIDHeap != NULL, NV_ERR_INVALID_STATE); - NV_ASSERT_OK(pChidMgr->pGlobalChIDHeap->eheapFree(pChidMgr->pGlobalChIDHeap, ChID)); + if (pChidMgr->pGlobalChIDHeap != NULL) + { + NV_ASSERT_OK(pChidMgr->pGlobalChIDHeap->eheapFree(pChidMgr->pGlobalChIDHeap, ChID)); + } + else + { + NV_ASSERT(pChidMgr->pGlobalChIDHeap != NULL); + } } NV_ASSERT_OR_RETURN(pChidMgr->pFifoDataHeap != NULL, NV_ERR_INVALID_STATE); @@ -1322,7 +1335,6 @@ kfifoChidMgrAllocChannelGroupHwID_IMPL return NV_OK; } - /** * @brief Releases a hardware channel group ID. * diff --git a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c index aaeb77847c..3133a93d5e 100644 --- a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c +++ b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c @@ -514,6 +514,67 @@ _kgspRpcRCTriggered NV_ERR_INVALID_CHANNEL); } + // Add the RcDiag records we received from GSP-RM to our system wide journal + { + OBJSYS *pSys = SYS_GET_INSTANCE(); + Journal *pRcDB = SYS_GET_RCDB(pSys); + RmClient *pClient; + + NvU32 recordSize = rcdbGetOcaRecordSizeWithHeader(pRcDB, RmRcDiagReport); + NvU32 rcDiagRecStart = pRcDB->RcErrRptNextIdx; + NvU32 rcDiagRecEnd; + NvU32 processId = 0; + NvU32 owner = RCDB_RCDIAG_DEFAULT_OWNER; + + if (pKernelChannel != NULL) + { + pClient = dynamicCast(RES_GET_CLIENT(pKernelChannel), RmClient); + NV_ASSERT(pClient != NULL); + if (pClient != NULL) + processId = pClient->ProcID; + } + + for (NvU32 i = 0; i < rpc_params->rcJournalBufferSize / recordSize; i++) + { + RmRCCommonJournal_RECORD *pCommonRecord = + (RmRCCommonJournal_RECORD *)((NvU8*)&rpc_params->rcJournalBuffer + i * recordSize); + RmRcDiag_RECORD *pRcDiagRecord = + (RmRcDiag_RECORD *)&pCommonRecord[1]; + +#if defined(DEBUG) + NV_PRINTF(LEVEL_INFO, "%d: GPUTag=0x%x CPUTag=0x%llx timestamp=0x%llx stateMask=0x%llx\n", + i, pCommonRecord->GPUTag, pCommonRecord->CPUTag, pCommonRecord->timeStamp, + pCommonRecord->stateMask); + NV_PRINTF(LEVEL_INFO, " idx=%d timeStamp=0x%x type=0x%x flags=0x%x count=%d owner=0x%x processId=0x%x\n", + pRcDiagRecord->idx, pRcDiagRecord->timeStamp, pRcDiagRecord->type, pRcDiagRecord->flags, + pRcDiagRecord->count, pRcDiagRecord->owner, processId); + for (NvU32 j = 0; j < pRcDiagRecord->count; j++) + { + NV_PRINTF(LEVEL_INFO, " %d: offset=0x08%x tag=0x08%x value=0x08%x attribute=0x08%x\n", + j, pRcDiagRecord->data[j].offset, pRcDiagRecord->data[j].tag, + pRcDiagRecord->data[j].value, pRcDiagRecord->data[j].attribute); + } +#endif + if (rcdbAddRcDiagRecFromGsp(pGpu, pRcDB, pCommonRecord, pRcDiagRecord) == NULL) + { + NV_PRINTF(LEVEL_WARNING, "Lost RC diagnostic record coming from GPU%d GSP: type=0x%x stateMask=0x%llx\n", + gpuGetInstance(pGpu), pRcDiagRecord->type, pCommonRecord->stateMask); + } + } + + rcDiagRecEnd = pRcDB->RcErrRptNextIdx - 1; + + // Update records to have the correct PID associated with the channel + if (rcDiagRecStart != rcDiagRecEnd) + { + rcdbUpdateRcDiagRecContext(pRcDB, + rcDiagRecStart, + rcDiagRecEnd, + processId, + owner); + } + } + // With CC enabled, CPU-RM needs to write error notifiers if (gpuIsCCFeatureEnabled(pGpu) && pKernelChannel != NULL) { diff --git a/src/nvidia/src/kernel/gpu/intr/intr.c b/src/nvidia/src/kernel/gpu/intr/intr.c index 6f9709659c..29aaf2337a 100644 --- a/src/nvidia/src/kernel/gpu/intr/intr.c +++ b/src/nvidia/src/kernel/gpu/intr/intr.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -55,6 +55,7 @@ static struct } stuckIntr[MC_ENGINE_IDX_MAX]; static NvBool _intrServiceStallExactList(OBJGPU *pGpu, Intr *pIntr, MC_ENGINE_BITVECTOR *pEngines); +static void _intrLogLongRunningInterrupts(Intr *pIntr); static void _intrInitServiceTable(OBJGPU *pGpu, Intr *pIntr); @@ -141,6 +142,8 @@ intrServiceStall_IMPL(OBJGPU *pGpu, Intr *pIntr) intrProcessDPCQueue_HAL(pGpu, pIntr); } + _intrLogLongRunningInterrupts(pIntr); + exit: return; } @@ -1067,6 +1070,7 @@ NvU32 intrServiceInterruptRecords_IMPL IntrService *pIntrService = pIntr->intrServiceTable[engineIdx].pInterruptService; NvU32 ret = 0; NvBool bShouldService; + NvU64 intrTiming, intrTiming2; IntrServiceClearInterruptArguments clearParams = {engineIdx}; IntrServiceServiceInterruptArguments serviceParams = {engineIdx}; @@ -1088,7 +1092,18 @@ NvU32 intrServiceInterruptRecords_IMPL if (bShouldService) { + osGetPerformanceCounter(&intrTiming); + ret = intrservServiceInterrupt(pGpu, pIntrService, &serviceParams); + + osGetPerformanceCounter(&intrTiming2); + intrTiming = intrTiming2 - intrTiming; + if (intrTiming > LONG_INTR_LOG_LENGTH_NS) + { + pIntr->longIntrStats[engineIdx].intrCount++; + if (intrTiming > pIntr->longIntrStats[engineIdx].intrLength) + pIntr->longIntrStats[engineIdx].intrLength = intrTiming; + } } return ret; } @@ -1401,6 +1416,29 @@ _intrExitCriticalSection } } +static void +_intrLogLongRunningInterrupts(Intr *pIntr) +{ + NvU64 now; + osGetPerformanceCounter(&now); + + for (NvU32 i = 0; i < MC_ENGINE_IDX_MAX; ++i) + { + if (pIntr->longIntrStats[i].intrCount > 0) + { + if (now - pIntr->longIntrStats[i].lastPrintTime > LONG_INTR_LOG_RATELIMIT_NS) + { + NV_PRINTF(LEVEL_WARNING, "%u long-running interrupts (%llu ns or slower) from engine %u, longest taking %llu ns\n", + pIntr->longIntrStats[i].intrCount, LONG_INTR_LOG_LENGTH_NS, i, pIntr->longIntrStats[i].intrLength); + + pIntr->longIntrStats[i].intrCount = 0; + pIntr->longIntrStats[i].intrLength = 0; + pIntr->longIntrStats[i].lastPrintTime = now; + } + } + } +} + static NvBool _intrServiceStallExactList ( @@ -1608,6 +1646,9 @@ intrServiceStallList_IMPL // allow the isr to come in. _intrExitCriticalSection(pGpu, pIntr, &intrMaskCtx); + // Delay prints until after exiting critical sections to save perf impact + _intrLogLongRunningInterrupts(pIntr); + NV_ASSERT_OK(resservRestoreTlsCallContext(pOldContext)); } diff --git a/src/nvidia/src/kernel/platform/chipset/chipset_info.c b/src/nvidia/src/kernel/platform/chipset/chipset_info.c index 31e5601b5e..6984a6a590 100644 --- a/src/nvidia/src/kernel/platform/chipset/chipset_info.c +++ b/src/nvidia/src/kernel/platform/chipset/chipset_info.c @@ -903,6 +903,17 @@ Intel_7A04_setupFunc return NV_OK; } +static NV_STATUS +Intel_1B81_setupFunc +( + OBJCL *pCl +) +{ + pCl->setProperty(pCl, PDB_PROP_CL_RELAXED_ORDERING_NOT_CAPABLE, NV_TRUE); + + return NV_OK; +} + static NV_STATUS Nvidia_T210_setupFunc ( diff --git a/src/nvidia/src/kernel/platform/chipset/chipset_pcie.c b/src/nvidia/src/kernel/platform/chipset/chipset_pcie.c index f27711aeab..50c3174654 100644 --- a/src/nvidia/src/kernel/platform/chipset/chipset_pcie.c +++ b/src/nvidia/src/kernel/platform/chipset/chipset_pcie.c @@ -72,7 +72,7 @@ static void objClGpuUnmapRootPort(OBJGPU *); static void objClGpuMapEnhCfgSpace(OBJGPU *, OBJCL *); static void objClGpuUnmapEnhCfgSpace(OBJGPU *); static NV_STATUS objClGpuIs3DController(OBJGPU *); -static void objClLoadPcieVirtualP2PApproval(OBJGPU *); +static void objClLoadPcieVirtualP2PApproval(OBJGPU *, OBJCL *); static void objClCheckForExternalGpu(OBJGPU *, OBJCL *); static void _objClAdjustTcVcMap(OBJGPU *, OBJCL *, PORTDATA *); static void _objClGetDownstreamAtomicsEnabledMask(void *, NvU32, NvU32 *); @@ -951,7 +951,7 @@ clUpdatePcieConfig_IMPL(OBJGPU *pGpu, OBJCL *pCl) } // Load PCI Express virtual P2P approval config - objClLoadPcieVirtualP2PApproval(pGpu); + objClLoadPcieVirtualP2PApproval(pGpu, pCl); // // Disable NOSNOOP bit for Passthrough. @@ -4259,12 +4259,13 @@ clFreePcieConfigSpaceBase_IMPL(OBJCL *pCl) // other. // static void -objClLoadPcieVirtualP2PApproval(OBJGPU *pGpu) +objClLoadPcieVirtualP2PApproval(OBJGPU *pGpu, OBJCL *pCl) { void *handle; NvU32 data32; NvU8 version; NvU8 cap; + NvU8 rlxdOrderingCfg = 0; NvU8 bus = gpuGetBus(pGpu); NvU8 device = gpuGetDevice(pGpu); NvU32 domain = gpuGetDomain(pGpu); @@ -4324,6 +4325,19 @@ objClLoadPcieVirtualP2PApproval(OBJGPU *pGpu) _PEER_CLIQUE_ID, data32); pGpu->pciePeerClique.bValid = NV_TRUE; + rlxdOrderingCfg = (NvU8)DRF_VAL(_PCI, _VIRTUAL_P2P_APPROVAL_CAP_1, + _RELAXED_ORDERING, data32); + + if (rlxdOrderingCfg == NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RELAXED_ORDERING_DISABLE) + { + // Unset relaxed ordering based on hypervisor's request + pCl->setProperty(pCl, PDB_PROP_CL_RELAXED_ORDERING_NOT_CAPABLE, NV_TRUE); + + NV_PRINTF(LEVEL_INFO, + "Hypervisor has disabled relaxed ordering on GPU%u\n", + gpuGetInstance(pGpu)); + } + NV_PRINTF(LEVEL_INFO, "Hypervisor has assigned GPU%u to peer clique %u\n", gpuGetInstance(pGpu), pGpu->pciePeerClique.id); diff --git a/src/nvidia/src/kernel/rmapi/alloc_free.c b/src/nvidia/src/kernel/rmapi/alloc_free.c index b6981987f8..1c1f8a2b86 100644 --- a/src/nvidia/src/kernel/rmapi/alloc_free.c +++ b/src/nvidia/src/kernel/rmapi/alloc_free.c @@ -328,6 +328,36 @@ serverTopLock_Epilogue } } +static NvU32 +_resGetBackRefGpusMask(RsResourceRef *pResourceRef) +{ + NvU32 gpuMask = 0x0; + RS_INTER_MAPPING_BACK_REF *pBackRefItem; + + if (pResourceRef == NULL) + { + return 0x0; + } + + pBackRefItem = listHead(&pResourceRef->interBackRefs); + while (pBackRefItem != NULL) + { + RsInterMapping *pMapping = pBackRefItem->pMapping; + RsResourceRef *pDeviceRef = pMapping->pContextRef; + GpuResource *pGpuResource = dynamicCast(pDeviceRef->pResource, GpuResource); + + if (pGpuResource != NULL) + { + OBJGPU *pGpu = GPU_RES_GET_GPU(pGpuResource); + gpuMask |= gpumgrGetGpuMask(pGpu); + } + + pBackRefItem = listNext(&pResourceRef->interBackRefs, pBackRefItem); + } + + return gpuMask; +} + NV_STATUS serverResLock_Prologue ( @@ -445,8 +475,15 @@ serverResLock_Prologue } else { - status = rmGpuGroupLockAcquire(pParentGpu->gpuInstance, - GPU_LOCK_GRP_DEVICE, + // + // Lock the parent GPU and if specified any GPUs that resource + // may backreference via mappings. + // + pLockInfo->gpuMask = gpumgrGetGpuMask(pParentGpu) | + _resGetBackRefGpusMask(pLockInfo->pResRefToBackRef); + + status = rmGpuGroupLockAcquire(0, + GPU_LOCK_GRP_MASK, GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_CLIENT, &pLockInfo->gpuMask); diff --git a/src/nvidia/src/libraries/nvport/string/string_generic.c b/src/nvidia/src/libraries/nvport/string/string_generic.c index c576ea81cc..7eb573decd 100644 --- a/src/nvidia/src/libraries/nvport/string/string_generic.c +++ b/src/nvidia/src/libraries/nvport/string/string_generic.c @@ -29,6 +29,7 @@ #include "nvport/nvport.h" #include "nvmisc.h" + #ifndef NVPORT_STRING_DONT_DEFINE_portStringLength NvLength portStringLength @@ -75,18 +76,34 @@ portStringCompare NvLength maxLength ) { - NvLength length; + NvLength i; PORT_ASSERT_CHECKED(str1 != NULL); PORT_ASSERT_CHECKED(str2 != NULL); - length = portStringLengthSafe(str1, maxLength); + for (i = 0; i < maxLength; i++) + { + if (str1[i] != str2[i]) + { + // + // Cast to unsigned before assigning to NvS32, to avoid sign + // extension. E.g., if str1[i] is 0xff, we want s1 to contain + // 0xff, not -1. In practice, this shouldn't matter for printable + // characters, but still... + // + NvS32 s1 = (unsigned char)str1[i]; + NvS32 s2 = (unsigned char)str2[i]; + return s1 - s2; + } - // Add 1 for the null terminator. - if (length < maxLength) - length++; + if ((str1[i] == '\0') && + (str2[i] == '\0')) + { + break; + } + } - return portMemCmp(str1, str2, length); + return 0; } #endif diff --git a/src/nvidia/src/libraries/resserv/src/rs_server.c b/src/nvidia/src/libraries/resserv/src/rs_server.c index 99e7bc7e19..0328b2e329 100644 --- a/src/nvidia/src/libraries/resserv/src/rs_server.c +++ b/src/nvidia/src/libraries/resserv/src/rs_server.c @@ -145,6 +145,7 @@ NV_STATUS serverFreeResourceTreeUnderLock(RsServer *pServer, RS_RES_FREE_PARAMS return status; pLockInfo->flags |= RS_LOCK_FLAGS_FREE_SESSION_LOCK; + pLockInfo->pResRefToBackRef = pResourceRef; pLockInfo->traceOp = RS_LOCK_TRACE_FREE; pLockInfo->traceClassId = pResourceRef->externalClassId; status = serverResLock_Prologue(pServer, LOCK_ACCESS_WRITE, pLockInfo, &releaseFlags); diff --git a/version.mk b/version.mk index f6bc46ebef..6c8b3cc271 100644 --- a/version.mk +++ b/version.mk @@ -1,4 +1,4 @@ -NVIDIA_VERSION = 535.216.03 +NVIDIA_VERSION = 535.230.02 # This file. VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))