From 855c3c9d3c54e7c75f060ac72018cc3fe382c43b Mon Sep 17 00:00:00 2001
From: Bernhard Stoeckner <bstoeckner@nvidia.com>
Date: Thu, 16 Jan 2025 17:34:27 +0100
Subject: [PATCH] 535.230.02

---
 README.md                                     |  8 +--
 kernel-open/Kbuild                            | 16 ++++-
 kernel-open/Makefile                          | 31 +++++++++-
 kernel-open/conftest.sh                       | 32 ++++++++++
 kernel-open/nvidia-drm/nvidia-drm-drv.c       |  4 ++
 kernel-open/nvidia-drm/nvidia-drm.Kbuild      |  1 +
 .../nvidia-modeset/nvidia-modeset.Kbuild      |  3 -
 kernel-open/nvidia-uvm/nvidia-uvm.Kbuild      |  1 +
 kernel-open/nvidia-uvm/uvm.c                  |  3 +
 kernel-open/nvidia-uvm/uvm_hmm.c              | 20 +++++-
 kernel-open/nvidia-uvm/uvm_kvmalloc.c         |  2 +-
 kernel-open/nvidia-uvm/uvm_mmu.h              |  2 +-
 kernel-open/nvidia-uvm/uvm_pmm_gpu.c          |  2 +-
 kernel-open/nvidia/nvidia.Kbuild              |  3 -
 src/common/inc/nvBldVer.h                     | 20 +++---
 src/common/inc/nvUnixVersion.h                |  2 +-
 src/common/inc/nvlog_defs.h                   |  5 +-
 src/common/nvswitch/kernel/smbpbi_nvswitch.c  |  4 +-
 .../nvidia/inc/ctrl/ctrl0000/ctrl0000nvd.h    |  2 +-
 .../sdk/nvidia/inc/ctrl/ctrl208f/ctrl208fgr.h | 29 +++++++++
 .../uproc/os/common/include/liblogdecode.h    |  2 +-
 .../arch/nvalloc/common/inc/inforom/ifrecc.h  |  2 +-
 src/nvidia/arch/nvalloc/common/inc/nvcst.h    |  5 +-
 src/nvidia/arch/nvalloc/common/inc/nvpcie.h   | 29 +++++----
 src/nvidia/generated/g_all_dcl_pb.c           | 14 ++++-
 src/nvidia/generated/g_all_dcl_pb.h           |  8 ++-
 src/nvidia/generated/g_intr_nvoc.h            | 15 ++++-
 src/nvidia/generated/g_nvdebug_pb.h           |  4 +-
 src/nvidia/generated/g_rs_resource_nvoc.h     | 17 +++---
 src/nvidia/inc/libraries/nvport/string.h      |  6 +-
 .../inc/libraries/resserv/rs_resource.h       | 17 +++---
 src/nvidia/src/kernel/diagnostics/journal.c   | 27 ++++++++
 .../gpu/fifo/kernel_channel_group_api.c       |  2 +
 src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c  | 22 +++++--
 src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c    | 61 +++++++++++++++++++
 src/nvidia/src/kernel/gpu/intr/intr.c         | 43 ++++++++++++-
 .../kernel/platform/chipset/chipset_info.c    | 11 ++++
 .../kernel/platform/chipset/chipset_pcie.c    | 20 +++++-
 src/nvidia/src/kernel/rmapi/alloc_free.c      | 41 ++++++++++++-
 .../libraries/nvport/string/string_generic.c  | 29 +++++++--
 .../src/libraries/resserv/src/rs_server.c     |  1 +
 version.mk                                    |  2 +-
 42 files changed, 473 insertions(+), 95 deletions(-)

diff --git a/README.md b/README.md
index dbd9e97170..62b9723688 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # NVIDIA Linux Open GPU Kernel Module Source
 
 This is the source release of the NVIDIA Linux open GPU kernel modules,
-version 535.216.03.
+version 535.230.02.
 
 
 ## How to Build
@@ -17,7 +17,7 @@ as root:
 
 Note that the kernel modules built here must be used with GSP
 firmware and user-space NVIDIA GPU driver components from a corresponding
-535.216.03 driver release.  This can be achieved by installing
+535.230.02 driver release.  This can be achieved by installing
 the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
 option.  E.g.,
 
@@ -180,7 +180,7 @@ software applications.
 ## Compatible GPUs
 
 The open-gpu-kernel-modules can be used on any Turing or later GPU
-(see the table below). However, in the 535.216.03 release,
+(see the table below). However, in the 535.230.02 release,
 GeForce and Workstation support is still considered alpha-quality.
 
 To enable use of the open kernel modules on GeForce and Workstation GPUs,
@@ -188,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
 parameter to 1. For more details, see the NVIDIA GPU driver end user
 README here:
 
-https://us.download.nvidia.com/XFree86/Linux-x86_64/535.216.03/README/kernel_open.html
+https://us.download.nvidia.com/XFree86/Linux-x86_64/535.230.02/README/kernel_open.html
 
 In the below table, if three IDs are listed, the first is the PCI Device 
 ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
diff --git a/kernel-open/Kbuild b/kernel-open/Kbuild
index e4d1495626..f30933a888 100644
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@@ -57,6 +57,20 @@ ifeq ($(NV_UNDEF_BEHAVIOR_SANITIZER),1)
  UBSAN_SANITIZE := y
 endif
 
+#
+# Command to create a symbolic link, explicitly resolving the symlink target
+# to an absolute path to abstract away the difference between Linux < 6.13,
+# where the CWD is the Linux kernel source tree for Kbuild extmod builds, and
+# Linux >= 6.13, where the CWD is the external module source tree.
+#
+# This is used to create the nv*-kernel.o -> nv*-kernel.o_binary symlinks for
+# kernel modules which use precompiled binary object files.
+#
+
+quiet_cmd_symlink = SYMLINK $@
+ cmd_symlink = ln -sf $(abspath $<) $@
+
+
 $(foreach _module, $(NV_KERNEL_MODULES), \
  $(eval include $(src)/$(_module)/$(_module).Kbuild))
 
@@ -72,7 +86,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
 EXTRA_CFLAGS += -I$(src)
 EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
 EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
-EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.216.03\"
+EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.230.02\"
 
 ifneq ($(SYSSRCHOST1X),)
  EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
diff --git a/kernel-open/Makefile b/kernel-open/Makefile
index a88b2f22c0..72672c2a17 100644
--- a/kernel-open/Makefile
+++ b/kernel-open/Makefile
@@ -52,6 +52,22 @@ else
     endif
   endif
 
+  # If CC hasn't been set explicitly, check the value of CONFIG_CC_VERSION_TEXT.
+  # Look for the compiler specified there, and use it by default, if found.
+  ifeq ($(origin CC),default)
+    cc_version_text=$(firstword $(shell . $(KERNEL_OUTPUT)/.config; \
+                      echo "$$CONFIG_CC_VERSION_TEXT"))
+
+    ifneq ($(cc_version_text),)
+      ifeq ($(shell command -v $(cc_version_text)),)
+          $(warning WARNING: Unable to locate the compiler $(cc_version_text) \
+            from CONFIG_CC_VERSION_TEXT in the kernel configuration.)
+      else
+          CC=$(cc_version_text)
+      endif
+    endif
+  endif
+
   CC ?= cc
   LD ?= ld
   OBJDUMP ?= objdump
@@ -64,6 +80,16 @@ else
     )
   endif
 
+  KERNEL_ARCH = $(ARCH)
+
+  ifneq ($(filter $(ARCH),i386 x86_64),)
+    KERNEL_ARCH = x86
+  else
+    ifeq ($(filter $(ARCH),arm64 powerpc),)
+        $(error Unsupported architecture $(ARCH))
+    endif
+  endif
+
   NV_KERNEL_MODULES ?= $(wildcard nvidia nvidia-uvm nvidia-vgpu-vfio nvidia-modeset nvidia-drm nvidia-peermem)
   NV_KERNEL_MODULES := $(filter-out $(NV_EXCLUDE_KERNEL_MODULES), \
                                     $(NV_KERNEL_MODULES))
@@ -103,8 +129,9 @@ else
   # module symbols on which the Linux kernel's module resolution is dependent
   # and hence must be used whenever present.
 
-  LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds      \
-               $(KERNEL_SOURCES)/arch/$(ARCH)/kernel/module.lds \
+  LD_SCRIPT ?= $(KERNEL_SOURCES)/scripts/module-common.lds             \
+               $(KERNEL_SOURCES)/arch/$(KERNEL_ARCH)/kernel/module.lds \
+               $(KERNEL_OUTPUT)/arch/$(KERNEL_ARCH)/module.lds         \
                $(KERNEL_OUTPUT)/scripts/module.lds
   NV_MODULE_COMMON_SCRIPTS := $(foreach s, $(wildcard $(LD_SCRIPT)), -T $(s))
 
diff --git a/kernel-open/conftest.sh b/kernel-open/conftest.sh
index 7f0478eaf3..7f1870c32c 100755
--- a/kernel-open/conftest.sh
+++ b/kernel-open/conftest.sh
@@ -2475,6 +2475,22 @@ compile_test() {
             fi
         ;;
 
+        file_operations_fop_unsigned_offset_present)
+            #
+            # Determine if the FOP_UNSIGNED_OFFSET define is present.
+            #
+            # Added by commit 641bb4394f40 ("fs: move FMODE_UNSIGNED_OFFSET to
+            # fop_flags") in v6.12.
+            #
+            CODE="
+            #include <linux/fs.h>
+            int conftest_file_operations_fop_unsigned_offset_present(void) {
+                return FOP_UNSIGNED_OFFSET;
+            }"
+
+            compile_check_conftest "$CODE" "NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT" "" "types"
+        ;;
+
         mm_context_t)
             #
             # Determine if the 'mm_context_t' data type is present
@@ -6514,6 +6530,22 @@ compile_test() {
             compile_check_conftest "$CODE" "NV_DRM_UNLOCKED_IOCTL_FLAG_PRESENT" "" "types"
         ;;
 
+    folio_test_swapcache)
+            #
+            # Determine if the folio_test_swapcache() function is present.
+            #
+            # folio_test_swapcache() was exported by commit d389a4a811551 ("mm:
+            # Add folio flag manipulation functions") in v5.16.
+            #
+            CODE="
+            #include <linux/page-flags.h>
+            void conftest_folio_test_swapcache(void) {
+                folio_test_swapcache();
+            }"
+
+            compile_check_conftest "$CODE" "NV_FOLIO_TEST_SWAPCACHE_PRESENT" "" "functions"
+        ;;
+
         # When adding a new conftest entry, please use the correct format for
         # specifying the relevant upstream Linux kernel commit.
         #
diff --git a/kernel-open/nvidia-drm/nvidia-drm-drv.c b/kernel-open/nvidia-drm/nvidia-drm-drv.c
index 7780c2facb..e5c7d9b46e 100644
--- a/kernel-open/nvidia-drm/nvidia-drm-drv.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-drv.c
@@ -1285,6 +1285,10 @@ static const struct file_operations nv_drm_fops = {
     .read           = drm_read,
 
     .llseek         = noop_llseek,
+
+#if defined(NV_FILE_OPERATIONS_FOP_UNSIGNED_OFFSET_PRESENT)
+    .fop_flags   = FOP_UNSIGNED_OFFSET,
+#endif
 };
 
 static const struct drm_ioctl_desc nv_drm_ioctls[] = {
diff --git a/kernel-open/nvidia-drm/nvidia-drm.Kbuild b/kernel-open/nvidia-drm/nvidia-drm.Kbuild
index 894f1f9d22..9059223066 100644
--- a/kernel-open/nvidia-drm/nvidia-drm.Kbuild
+++ b/kernel-open/nvidia-drm/nvidia-drm.Kbuild
@@ -135,3 +135,4 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present
 NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed
+NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present
diff --git a/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild b/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild
index ebf3f048b2..4e328bae18 100644
--- a/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild
+++ b/kernel-open/nvidia-modeset/nvidia-modeset.Kbuild
@@ -40,9 +40,6 @@ NV_KERNEL_MODULE_TARGETS += $(NVIDIA_MODESET_KO)
 NVIDIA_MODESET_BINARY_OBJECT := $(src)/nvidia-modeset/nv-modeset-kernel.o_binary
 NVIDIA_MODESET_BINARY_OBJECT_O := nvidia-modeset/nv-modeset-kernel.o
 
-quiet_cmd_symlink = SYMLINK $@
-cmd_symlink = ln -sf $< $@
-
 targets += $(NVIDIA_MODESET_BINARY_OBJECT_O)
 
 $(obj)/$(NVIDIA_MODESET_BINARY_OBJECT_O): $(NVIDIA_MODESET_BINARY_OBJECT) FORCE
diff --git a/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild b/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
index a38de2166e..a847e9eb59 100644
--- a/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
@@ -86,6 +86,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmgrab
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_fault_to_errno
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += folio_test_swapcache
 
 NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
 NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
diff --git a/kernel-open/nvidia-uvm/uvm.c b/kernel-open/nvidia-uvm/uvm.c
index 6f95f17cb4..e1974cf59c 100644
--- a/kernel-open/nvidia-uvm/uvm.c
+++ b/kernel-open/nvidia-uvm/uvm.c
@@ -682,6 +682,9 @@ static void uvm_vm_open_semaphore_pool(struct vm_area_struct *vma)
     // Semaphore pool vmas do not have vma wrappers, but some functions will
     // assume vm_private_data is a wrapper.
     vma->vm_private_data = NULL;
+#if defined(VM_WIPEONFORK)
+    nv_vm_flags_set(vma, VM_WIPEONFORK);
+#endif
 
     if (is_fork) {
         // If we forked, leave the parent vma alone.
diff --git a/kernel-open/nvidia-uvm/uvm_hmm.c b/kernel-open/nvidia-uvm/uvm_hmm.c
index 0d82314a46..5ae313823c 100644
--- a/kernel-open/nvidia-uvm/uvm_hmm.c
+++ b/kernel-open/nvidia-uvm/uvm_hmm.c
@@ -71,6 +71,24 @@ module_param(uvm_disable_hmm, bool, 0444);
 #include "uvm_va_policy.h"
 #include "uvm_tools.h"
 
+// The function nv_PageSwapCache() wraps the check for page swap cache flag in
+// order to support a wide variety of kernel versions.
+// The function PageSwapCache() is removed after 32f51ead3d77 ("mm: remove
+// PageSwapCache") in v6.12-rc1.
+// The function folio_test_swapcache() was added in Linux 5.16 (d389a4a811551
+// "mm: Add folio flag manipulation functions")
+// Systems with HMM patches backported to 5.14 are possible, but those systems
+// do not include folio_test_swapcache()
+// TODO: Bug 4050579: Remove this when migration of swap cached pages is updated
+static __always_inline bool nv_PageSwapCache(struct page *page)
+{
+#if defined(NV_FOLIO_TEST_SWAPCACHE_PRESENT)
+    return folio_test_swapcache(page_folio(page));
+#else
+    return PageSwapCache(page);
+#endif
+}
+
 static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block,
                                uvm_page_index_t page_index,
                                struct page *page);
@@ -2554,7 +2572,7 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
                 continue;
             }
 
-            if (PageSwapCache(src_page)) {
+            if (nv_PageSwapCache(src_page)) {
                 // TODO: Bug 4050579: Remove this when swap cached pages can be
                 // migrated.
                 if (service_context) {
diff --git a/kernel-open/nvidia-uvm/uvm_kvmalloc.c b/kernel-open/nvidia-uvm/uvm_kvmalloc.c
index 69e0b30b98..2285a479b3 100644
--- a/kernel-open/nvidia-uvm/uvm_kvmalloc.c
+++ b/kernel-open/nvidia-uvm/uvm_kvmalloc.c
@@ -36,7 +36,7 @@
 typedef struct
 {
     size_t alloc_size;
-    uint8_t ptr[0];
+    uint8_t ptr[];
 } uvm_vmalloc_hdr_t;
 
 typedef struct
diff --git a/kernel-open/nvidia-uvm/uvm_mmu.h b/kernel-open/nvidia-uvm/uvm_mmu.h
index 4f53e3ffb7..978b18339d 100644
--- a/kernel-open/nvidia-uvm/uvm_mmu.h
+++ b/kernel-open/nvidia-uvm/uvm_mmu.h
@@ -162,7 +162,7 @@ struct uvm_page_directory_struct
     // pointers to child directories on the host.
     // this array is variable length, so it needs to be last to allow it to
     // take up extra space
-    uvm_page_directory_t *entries[0];
+    uvm_page_directory_t *entries[];
 };
 
 enum
diff --git a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c
index 4e573185d5..b35bd527c8 100644
--- a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c
@@ -221,7 +221,7 @@ struct uvm_pmm_gpu_chunk_suballoc_struct
     // Array of all child subchunks
     // TODO: Bug 1765461: Can the array be inlined? It could save the parent
     //       pointer.
-    uvm_gpu_chunk_t *subchunks[0];
+    uvm_gpu_chunk_t *subchunks[];
 };
 
 typedef enum
diff --git a/kernel-open/nvidia/nvidia.Kbuild b/kernel-open/nvidia/nvidia.Kbuild
index 789e0e4b5a..f2eabd4463 100644
--- a/kernel-open/nvidia/nvidia.Kbuild
+++ b/kernel-open/nvidia/nvidia.Kbuild
@@ -40,9 +40,6 @@ NVIDIA_KO = nvidia/nvidia.ko
 NVIDIA_BINARY_OBJECT := $(src)/nvidia/nv-kernel.o_binary
 NVIDIA_BINARY_OBJECT_O := nvidia/nv-kernel.o
 
-quiet_cmd_symlink = SYMLINK $@
- cmd_symlink = ln -sf $< $@
-
 targets += $(NVIDIA_BINARY_OBJECT_O)
 
 $(obj)/$(NVIDIA_BINARY_OBJECT_O): $(NVIDIA_BINARY_OBJECT) FORCE
diff --git a/src/common/inc/nvBldVer.h b/src/common/inc/nvBldVer.h
index 1e245f2353..6976f9be32 100644
--- a/src/common/inc/nvBldVer.h
+++ b/src/common/inc/nvBldVer.h
@@ -36,25 +36,25 @@
 // and then checked back in. You cannot make changes to these sections without
 // corresponding changes to the buildmeister script
 #ifndef NV_BUILD_BRANCH
-    #define NV_BUILD_BRANCH             r538_95
+    #define NV_BUILD_BRANCH             r539_11
 #endif
 #ifndef NV_PUBLIC_BRANCH
-    #define NV_PUBLIC_BRANCH             r538_95
+    #define NV_PUBLIC_BRANCH             r539_11
 #endif
 
 #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
-#define NV_BUILD_BRANCH_VERSION         "rel/gpu_drv/r535/r538_95-688"
-#define NV_BUILD_CHANGELIST_NUM         (35042711)
+#define NV_BUILD_BRANCH_VERSION         "rel/gpu_drv/r535/r539_11-770"
+#define NV_BUILD_CHANGELIST_NUM         (35309837)
 #define NV_BUILD_TYPE                   "Official"
-#define NV_BUILD_NAME                   "rel/gpu_drv/r535/r538_95-688"
-#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35042711)
+#define NV_BUILD_NAME                   "rel/gpu_drv/r535/r539_11-770"
+#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35309837)
 
 #else     /* Windows builds */
-#define NV_BUILD_BRANCH_VERSION         "r538_95-1"
-#define NV_BUILD_CHANGELIST_NUM         (34853858)
+#define NV_BUILD_BRANCH_VERSION         "r539_11-2"
+#define NV_BUILD_CHANGELIST_NUM         (35309837)
 #define NV_BUILD_TYPE                   "Official"
-#define NV_BUILD_NAME                   "538.96"
-#define NV_LAST_OFFICIAL_CHANGELIST_NUM (34853858)
+#define NV_BUILD_NAME                   "539.14"
+#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35309837)
 #define NV_BUILD_BRANCH_BASE_VERSION    R535
 #endif
 // End buildmeister python edited section
diff --git a/src/common/inc/nvUnixVersion.h b/src/common/inc/nvUnixVersion.h
index 8c1ee77801..33349946d4 100644
--- a/src/common/inc/nvUnixVersion.h
+++ b/src/common/inc/nvUnixVersion.h
@@ -4,7 +4,7 @@
 #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
     (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
 
-#define NV_VERSION_STRING               "535.216.03"
+#define NV_VERSION_STRING               "535.230.02"
 
 #else
 
diff --git a/src/common/inc/nvlog_defs.h b/src/common/inc/nvlog_defs.h
index 86da458ea3..b58d067039 100644
--- a/src/common/inc/nvlog_defs.h
+++ b/src/common/inc/nvlog_defs.h
@@ -102,10 +102,11 @@ struct _NVLOG_BUFFER
 
 #define NVLOG_MAX_BUFFERS_v11       16
 #define NVLOG_MAX_BUFFERS_v12       256
+#define NVLOG_MAX_BUFFERS_v13       3840
 
 #if NVOS_IS_UNIX
-#define NVLOG_MAX_BUFFERS           NVLOG_MAX_BUFFERS_v12
-#define NVLOG_LOGGER_VERSION        12          // v1.2
+#define NVLOG_MAX_BUFFERS           NVLOG_MAX_BUFFERS_v13
+#define NVLOG_LOGGER_VERSION        13          // v1.3
 #else
 #define NVLOG_MAX_BUFFERS           NVLOG_MAX_BUFFERS_v11
 #define NVLOG_LOGGER_VERSION        11          // v1.1
diff --git a/src/common/nvswitch/kernel/smbpbi_nvswitch.c b/src/common/nvswitch/kernel/smbpbi_nvswitch.c
index 8cefb389df..801d17d4b7 100644
--- a/src/common/nvswitch/kernel/smbpbi_nvswitch.c
+++ b/src/common/nvswitch/kernel/smbpbi_nvswitch.c
@@ -90,10 +90,10 @@ nvswitch_smbpbi_post_init
 
     if (status == NVL_SUCCESS)
     {
-#if defined(DEBUG) || defined(DEVELOP) || defined(NV_MODS)
+#if defined(DEBUG) || defined(DEVELOP)
         nvswitch_lib_smbpbi_log_sxid(device, NVSWITCH_ERR_NO_ERROR,
                                      "NVSWITCH SMBPBI server is online.");
-#endif // defined(DEBUG) || defined(DEVELOP) || defined(NV_MODS)
+#endif // defined(DEBUG) || defined(DEVELOP)
 
         NVSWITCH_PRINT(device, INFO, "%s: SMBPBI POST INIT completed\n", __FUNCTION__);
     }
diff --git a/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000nvd.h b/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000nvd.h
index a52be2a374..27e8a7dae6 100644
--- a/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000nvd.h
+++ b/src/common/sdk/nvidia/inc/ctrl/ctrl0000/ctrl0000nvd.h
@@ -256,7 +256,7 @@ typedef struct NV0000_CTRL_NVD_GET_TIMESTAMP_PARAMS {
 #define NV0000_CTRL_NVD_SIGNATURE_SIZE     (4)
 
 /* Maximum number of buffers */
-#define NV0000_CTRL_NVD_MAX_BUFFERS        (256)
+#define NV0000_CTRL_NVD_MAX_BUFFERS        (3840)
 
 #define NV0000_CTRL_NVD_GET_NVLOG_INFO_PARAMS_MESSAGE_ID (0x4U)
 
diff --git a/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208fgr.h b/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208fgr.h
index 524f2c454b..90846151ed 100644
--- a/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208fgr.h
+++ b/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208fgr.h
@@ -106,4 +106,33 @@ typedef struct NV208F_CTRL_GR_ECC_INJECTION_SUPPORTED_PARAMS {
     NV_DECLARE_ALIGNED(NV2080_CTRL_GR_ROUTE_INFO grRouteInfo, 8);
 } NV208F_CTRL_GR_ECC_INJECTION_SUPPORTED_PARAMS;
 
+/*
+ * NV208F_CTRL_CMD_GR_ECC_SET_TRANSIENT_CLEARING_POLICY
+ *
+ * Control command to determine whether or not the actions to clear potential transient
+ * errors in the SM should be taken
+ *
+ * Parameters:
+ *
+ * policy
+ *   NV208F_CTRL_GR_ECC_TRANSIENT_CLEARING_DISABLED
+ *      Don't attempt to clear a transient error in the SM
+ *   NV208F_CTRL_GR_ECC_TRANSIENT_CLEARING_ENABLED
+ *      Attempt to clear a transient error in the SM
+ *
+ * Possible status values returned are:
+ *   NV_OK
+ *   NV_ERR_INVALID_ARGUMENT
+ */
+#define NV208F_CTRL_GR_ECC_TRANSIENT_CLEARING_DISABLED       (0x00000000)
+#define NV208F_CTRL_GR_ECC_TRANSIENT_CLEARING_ENABLED        (0x00000001)
+
+#define NV208F_CTRL_CMD_GR_ECC_SET_TRANSIENT_CLEARING_POLICY (0x208f1205) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_DIAG_GR_INTERFACE_ID << 8) | NV208F_CTRL_GR_ECC_SET_TRANSIENT_CLEARING_POLICY_PARAMS_MESSAGE_ID" */
+
+#define NV208F_CTRL_GR_ECC_SET_TRANSIENT_CLEARING_POLICY_PARAMS_MESSAGE_ID (0x5U)
+
+typedef struct NV208F_CTRL_GR_ECC_SET_TRANSIENT_CLEARING_POLICY_PARAMS {
+    NvU32 policy;
+} NV208F_CTRL_GR_ECC_SET_TRANSIENT_CLEARING_POLICY_PARAMS;
+
 /* _ctrl208fgr_h_ */
diff --git a/src/common/uproc/os/common/include/liblogdecode.h b/src/common/uproc/os/common/include/liblogdecode.h
index 32e9f86e1a..cf29093176 100644
--- a/src/common/uproc/os/common/include/liblogdecode.h
+++ b/src/common/uproc/os/common/include/liblogdecode.h
@@ -42,7 +42,7 @@ extern "C" {
 #    define LIBOS_LOG_DECODE_ENABLE 1
 #    define LIBOS_LOG_TO_NVLOG      0
 
-#    define LIBOS_LOG_MAX_LOGS    160   // Max logs for all GPUs for offline decoder
+#    define LIBOS_LOG_MAX_LOGS    3840   // Max logs for all GPUs for offline decoder
 
 #endif // NVRM
 
diff --git a/src/nvidia/arch/nvalloc/common/inc/inforom/ifrecc.h b/src/nvidia/arch/nvalloc/common/inc/inforom/ifrecc.h
index 337537d794..646b8a62e7 100644
--- a/src/nvidia/arch/nvalloc/common/inc/inforom/ifrecc.h
+++ b/src/nvidia/arch/nvalloc/common/inc/inforom/ifrecc.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2017-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2017-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
diff --git a/src/nvidia/arch/nvalloc/common/inc/nvcst.h b/src/nvidia/arch/nvalloc/common/inc/nvcst.h
index 684eda3d0f..060a663178 100644
--- a/src/nvidia/arch/nvalloc/common/inc/nvcst.h
+++ b/src/nvidia/arch/nvalloc/common/inc/nvcst.h
@@ -65,6 +65,7 @@ CHIPSET_SETUP_FUNC(Intel_0685_setupFunc)
 CHIPSET_SETUP_FUNC(Intel_4381_setupFunc)
 CHIPSET_SETUP_FUNC(Intel_7A82_setupFunc)
 CHIPSET_SETUP_FUNC(Intel_7A04_setupFunc)
+CHIPSET_SETUP_FUNC(Intel_1B81_setupFunc)
 CHIPSET_SETUP_FUNC(SiS_656_setupFunc)
 CHIPSET_SETUP_FUNC(ATI_RS400_setupFunc)
 CHIPSET_SETUP_FUNC(ATI_RS480_setupFunc)
@@ -186,8 +187,8 @@ CSINFO chipsetInfo[] =
     {PCI_VENDOR_ID_INTEL,       0x4385, CS_INTEL_4381,      "Intel-RocketLake",     Intel_4381_setupFunc},
     {PCI_VENDOR_ID_INTEL,       0x7A82, CS_INTEL_7A82,      "Intel-AlderLake",      Intel_7A82_setupFunc},
     {PCI_VENDOR_ID_INTEL,       0x7A84, CS_INTEL_7A82,      "Intel-AlderLake",      Intel_7A82_setupFunc},
-    {PCI_VENDOR_ID_INTEL,       0x1B81, CS_INTEL_1B81,      "Intel-SapphireRapids", NULL},
-    {PCI_VENDOR_ID_INTEL,       0x7A8A, CS_INTEL_1B81,      "Intel-SapphireRapids", NULL},
+    {PCI_VENDOR_ID_INTEL,       0x1B81, CS_INTEL_1B81,      "Intel-SapphireRapids", Intel_1B81_setupFunc},
+    {PCI_VENDOR_ID_INTEL,       0x7A8A, CS_INTEL_1B81,      "Intel-SapphireRapids", Intel_1B81_setupFunc},
     {PCI_VENDOR_ID_INTEL,       0x18DC, CS_INTEL_18DC,      "Intel-IceLake",        NULL},
     {PCI_VENDOR_ID_INTEL,       0x7A04, CS_INTEL_7A04,      "Intel-RaptorLake",     Intel_7A04_setupFunc},
     {PCI_VENDOR_ID_INTEL,       0x5795, CS_INTEL_5795,      "Intel-GraniteRapids",  NULL},
diff --git a/src/nvidia/arch/nvalloc/common/inc/nvpcie.h b/src/nvidia/arch/nvalloc/common/inc/nvpcie.h
index c5c8217b94..f76f23b1d2 100644
--- a/src/nvidia/arch/nvalloc/common/inc/nvpcie.h
+++ b/src/nvidia/arch/nvalloc/common/inc/nvpcie.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2000-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2000-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -211,18 +211,21 @@
 // to any specific hardware.
 //
 //
-#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0                       0x000000C8
-#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_ID                           7:0
-#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_NEXT                        15:8
-#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_LENGTH                     23:16
-#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_SIG_LO                     31:24
-#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1                       0x000000CC
-#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_SIG_HI                      15:0
-#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_VERSION                    18:16
-#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_PEER_CLIQUE_ID             22:19
-#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RSVD                       31:23
-
-#define NV_PCI_VIRTUAL_P2P_APPROVAL_SIGNATURE                   0x00503250
+#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0                            0x000000C8
+#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_ID                                7:0
+#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_NEXT                             15:8
+#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_LENGTH                          23:16
+#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_SIG_LO                          31:24
+#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1                            0x000000CC
+#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_SIG_HI                           15:0
+#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_VERSION                         18:16
+#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_PEER_CLIQUE_ID                  22:19
+#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RELAXED_ORDERING                23:23
+#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RELAXED_ORDERING_DEFAULT   0x00000000
+#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RELAXED_ORDERING_DISABLE   0x00000001
+#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RSVD                            31:24
+
+#define NV_PCI_VIRTUAL_P2P_APPROVAL_SIGNATURE                        0x00503250
 
 // Chipset-specific definitions.
 // Intel SantaRosa definitions
diff --git a/src/nvidia/generated/g_all_dcl_pb.c b/src/nvidia/generated/g_all_dcl_pb.c
index 5726572df4..eb0d9f947c 100644
--- a/src/nvidia/generated/g_all_dcl_pb.c
+++ b/src/nvidia/generated/g_all_dcl_pb.c
@@ -122,6 +122,18 @@ const PRB_FIELD_DESC prb_fields_dcl_dclmsg[] = {
         PRB_MAYBE_FIELD_NAME("engine")
         PRB_MAYBE_FIELD_DEFAULT(0)
     },
+    {
+        331,
+        {
+            PRB_OPTIONAL,
+            PRB_MESSAGE,
+            0,
+        },
+        RC_RCDIAGRECORD,
+        0,
+        PRB_MAYBE_FIELD_NAME("rc_diag_recs")
+        PRB_MAYBE_FIELD_DEFAULT(0)
+    },
 };
 
 // 'ErrorBlock' field defaults
@@ -150,7 +162,7 @@ const PRB_MSG_DESC prb_messages_dcl[] = {
         PRB_MAYBE_MESSAGE_NAME("Dcl.Engines")
     },
     {
-        7,
+        8,
         prb_fields_dcl_dclmsg,
         PRB_MAYBE_MESSAGE_NAME("Dcl.DclMsg")
     },
diff --git a/src/nvidia/generated/g_all_dcl_pb.h b/src/nvidia/generated/g_all_dcl_pb.h
index 8fd5b053a7..93efc31113 100644
--- a/src/nvidia/generated/g_all_dcl_pb.h
+++ b/src/nvidia/generated/g_all_dcl_pb.h
@@ -18,8 +18,8 @@ extern const PRB_MSG_DESC prb_messages_dcl[];
 // Message maximum lengths
 // Does not include repeated fields, strings and byte arrays.
 #define DCL_ENGINES_LEN 130
-#define DCL_DCLMSG_LEN 567
-#define DCL_ERRORBLOCK_LEN 571
+#define DCL_DCLMSG_LEN 610
+#define DCL_ERRORBLOCK_LEN 614
 
 extern const PRB_FIELD_DESC prb_fields_dcl_engines[];
 
@@ -41,6 +41,7 @@ extern const PRB_FIELD_DESC prb_fields_dcl_dclmsg[];
 #define DCL_DCLMSG_JOURNAL_BUGCHECK (&prb_fields_dcl_dclmsg[4])
 #define DCL_DCLMSG_RCCOUNTER (&prb_fields_dcl_dclmsg[5])
 #define DCL_DCLMSG_ENGINE (&prb_fields_dcl_dclmsg[6])
+#define DCL_DCLMSG_RC_DIAG_RECS (&prb_fields_dcl_dclmsg[7])
 
 // 'DclMsg' field lengths
 #define DCL_DCLMSG_COMMON_LEN 42
@@ -50,6 +51,7 @@ extern const PRB_FIELD_DESC prb_fields_dcl_dclmsg[];
 #define DCL_DCLMSG_JOURNAL_BUGCHECK_LEN 69
 #define DCL_DCLMSG_RCCOUNTER_LEN 64
 #define DCL_DCLMSG_ENGINE_LEN 133
+#define DCL_DCLMSG_RC_DIAG_RECS_LEN 42
 
 extern const PRB_FIELD_DESC prb_fields_dcl_errorblock[];
 
@@ -57,7 +59,7 @@ extern const PRB_FIELD_DESC prb_fields_dcl_errorblock[];
 #define DCL_ERRORBLOCK_DATA (&prb_fields_dcl_errorblock[0])
 
 // 'ErrorBlock' field lengths
-#define DCL_ERRORBLOCK_DATA_LEN 570
+#define DCL_ERRORBLOCK_DATA_LEN 613
 
 extern const PRB_SERVICE_DESC prb_services_dcl[];
 
diff --git a/src/nvidia/generated/g_intr_nvoc.h b/src/nvidia/generated/g_intr_nvoc.h
index e4b6095136..6c49f4342b 100644
--- a/src/nvidia/generated/g_intr_nvoc.h
+++ b/src/nvidia/generated/g_intr_nvoc.h
@@ -7,7 +7,7 @@ extern "C" {
 #endif
 
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2006-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2006-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -96,6 +96,11 @@ MAKE_VECTOR(InterruptTable, INTR_TABLE_ENTRY);
 // Default value for intrStuckThreshold
 #define INTR_STUCK_THRESHOLD 1000
 
+// Minimum length of interrupt to log as long-running
+#define LONG_INTR_LOG_LENGTH_NS (1000000LLU) // 1ms
+// Maximum frequency of long-running interrupt print, per engine
+#define LONG_INTR_LOG_RATELIMIT_NS (10000000000LLU) // 10s
+
 #define INTR_TABLE_INIT_KERNEL (1 << 0)
 #define INTR_TABLE_INIT_PHYSICAL (1 << 1)
 
@@ -194,6 +199,13 @@ typedef struct Device Device;
 #else
 #define PRIVATE_FIELD(x) NVOC_PRIVATE_FIELD(x)
 #endif
+struct __nvoc_inner_struc_Intr_1__ {
+    NvU32 intrCount;
+    NvU64 intrLength;
+    NvU64 lastPrintTime;
+};
+
+
 struct Intr {
     const struct NVOC_RTTI *__nvoc_rtti;
     struct OBJENGSTATE __nvoc_base_OBJENGSTATE;
@@ -262,6 +274,7 @@ struct Intr {
     NvU32 intrEn0Orig;
     NvBool halIntrEnabled;
     NvU32 saveIntrEn0;
+    struct __nvoc_inner_struc_Intr_1__ longIntrStats[167];
 };
 
 #ifndef __NVOC_CLASS_Intr_TYPEDEF__
diff --git a/src/nvidia/generated/g_nvdebug_pb.h b/src/nvidia/generated/g_nvdebug_pb.h
index 16c11f1c11..c559aaefd1 100644
--- a/src/nvidia/generated/g_nvdebug_pb.h
+++ b/src/nvidia/generated/g_nvdebug_pb.h
@@ -40,7 +40,7 @@ extern const PRB_MSG_DESC prb_messages_nvdebug[];
 // Does not include repeated fields, strings and byte arrays.
 #define NVDEBUG_SYSTEMINFO_LEN 275
 #define NVDEBUG_GPUINFO_LEN 164
-#define NVDEBUG_NVDUMP_LEN 1308
+#define NVDEBUG_NVDUMP_LEN 1351
 #define NVDEBUG_SYSTEMINFO_NORTHBRIDGEINFO_LEN 12
 #define NVDEBUG_SYSTEMINFO_SOCINFO_LEN 12
 #define NVDEBUG_SYSTEMINFO_CPUINFO_LEN 24
@@ -104,7 +104,7 @@ extern const PRB_FIELD_DESC prb_fields_nvdebug_nvdump[];
 
 // 'NvDump' field lengths
 #define NVDEBUG_NVDUMP_SYSTEM_INFO_LEN 278
-#define NVDEBUG_NVDUMP_DCL_MSG_LEN 570
+#define NVDEBUG_NVDUMP_DCL_MSG_LEN 613
 #define NVDEBUG_NVDUMP_GPU_INFO_LEN 167
 #define NVDEBUG_NVDUMP_EXCEPTION_ADDRESS_LEN 10
 #define NVDEBUG_NVDUMP_SYSTEM_INFO_GSPRM_LEN 278
diff --git a/src/nvidia/generated/g_rs_resource_nvoc.h b/src/nvidia/generated/g_rs_resource_nvoc.h
index ff1abe6e02..df878dce94 100644
--- a/src/nvidia/generated/g_rs_resource_nvoc.h
+++ b/src/nvidia/generated/g_rs_resource_nvoc.h
@@ -81,15 +81,16 @@ typedef struct RsSession RsSession;
  */
 struct RS_LOCK_INFO
 {
-    struct RsClient *pClient;              ///< Pointer to client that was locked (if any)
-    struct RsClient *pSecondClient;        ///< Pointer to second client, for dual-client locking
-    RsResourceRef *pContextRef;     ///< User-defined reference
-    struct RsSession *pSession;            ///< Session object to be locked, if any
-    NvU32 flags;                    ///< RS_LOCK_FLAGS_*
-    NvU32 state;                    ///< RS_LOCK_STATE_*
+    struct RsClient *pClient;                  ///< Pointer to client that was locked (if any)
+    struct RsClient *pSecondClient;            ///< Pointer to second client, for dual-client locking
+    RsResourceRef *pContextRef;         ///< User-defined reference
+    RsResourceRef *pResRefToBackRef;    ///< Resource from which to infer indirect GPU dependencies
+    struct RsSession *pSession;                ///< Session object to be locked, if any
+    NvU32 flags;                        ///< RS_LOCK_FLAGS_*
+    NvU32 state;                        ///< RS_LOCK_STATE_*
     NvU32 gpuMask;
-    NvU8  traceOp;                  ///< RS_LOCK_TRACE_* operation for lock-metering
-    NvU32 traceClassId;             ///< Class of initial resource that was locked for lock metering
+    NvU8  traceOp;                      ///< RS_LOCK_TRACE_* operation for lock-metering
+    NvU32 traceClassId;                 ///< Class of initial resource that was locked for lock metering
 };
 
 struct RS_RES_ALLOC_PARAMS_INTERNAL
diff --git a/src/nvidia/inc/libraries/nvport/string.h b/src/nvidia/inc/libraries/nvport/string.h
index 82da75ceb1..402646c624 100644
--- a/src/nvidia/inc/libraries/nvport/string.h
+++ b/src/nvidia/inc/libraries/nvport/string.h
@@ -49,10 +49,8 @@
 /**
  * @brief Compare two strings, character by character.
  *
- * Will only compare lengthBytes bytes. Strings are assumed to be at least that
- * long.
- *
- * Strings are allowed to overlap, but in .
+ * Will compare the first 'length' chars of each string, or until
+ * the nul-terminator is reached in either string, whichever comes first.
  *
  * @returns:
  * - 0 if all bytes are equal
diff --git a/src/nvidia/inc/libraries/resserv/rs_resource.h b/src/nvidia/inc/libraries/resserv/rs_resource.h
index 2be5031505..516f4a5db2 100644
--- a/src/nvidia/inc/libraries/resserv/rs_resource.h
+++ b/src/nvidia/inc/libraries/resserv/rs_resource.h
@@ -62,15 +62,16 @@ class RsSession;
  */
 struct RS_LOCK_INFO
 {
-    RsClient *pClient;              ///< Pointer to client that was locked (if any)
-    RsClient *pSecondClient;        ///< Pointer to second client, for dual-client locking
-    RsResourceRef *pContextRef;     ///< User-defined reference
-    RsSession *pSession;            ///< Session object to be locked, if any
-    NvU32 flags;                    ///< RS_LOCK_FLAGS_*
-    NvU32 state;                    ///< RS_LOCK_STATE_*
+    RsClient *pClient;                  ///< Pointer to client that was locked (if any)
+    RsClient *pSecondClient;            ///< Pointer to second client, for dual-client locking
+    RsResourceRef *pContextRef;         ///< User-defined reference
+    RsResourceRef *pResRefToBackRef;    ///< Resource from which to infer indirect GPU dependencies
+    RsSession *pSession;                ///< Session object to be locked, if any
+    NvU32 flags;                        ///< RS_LOCK_FLAGS_*
+    NvU32 state;                        ///< RS_LOCK_STATE_*
     NvU32 gpuMask;
-    NvU8  traceOp;                  ///< RS_LOCK_TRACE_* operation for lock-metering
-    NvU32 traceClassId;             ///< Class of initial resource that was locked for lock metering
+    NvU8  traceOp;                      ///< RS_LOCK_TRACE_* operation for lock-metering
+    NvU32 traceClassId;                 ///< Class of initial resource that was locked for lock metering
 };
 
 struct RS_RES_ALLOC_PARAMS_INTERNAL
diff --git a/src/nvidia/src/kernel/diagnostics/journal.c b/src/nvidia/src/kernel/diagnostics/journal.c
index aec16fdef7..ef63247007 100644
--- a/src/nvidia/src/kernel/diagnostics/journal.c
+++ b/src/nvidia/src/kernel/diagnostics/journal.c
@@ -1817,6 +1817,33 @@ _rcdbDumpDclMsgRecord(
             }
             break;
         }
+        case RmRcDiagReport:
+        {
+            RmRcDiag_RECORD* pRecord = (RmRcDiag_RECORD*) &pDclRecord[1];
+            OBJGPU *pGpu = gpumgrGetGpuFromId(pDclRecord->GPUTag);
+
+            // open an RC Diagnostic record in the Proto Bufffer
+            NV_CHECK_OK(nvStatus, LEVEL_ERROR,
+                prbEncNestedStart(pPrbEnc, DCL_DCLMSG_RC_DIAG_RECS));
+            if (nvStatus == NV_OK)
+            {
+                prbEncAddUInt32(pPrbEnc, RC_RCDIAGRECORD_RECORD_ID, pRecord->idx);
+                prbEncAddUInt32(pPrbEnc, RC_RCDIAGRECORD_RECORD_TYPE, pRecord->type);
+                if (NULL != pGpu)
+                {
+                    NvU32 i;
+                    for (i = 0; i < pRecord->count; ++i)
+                    {
+                        if (NV0000_CTRL_CMD_NVD_RCERR_RPT_REG_MAX_PSEDO_REG < pRecord->data[i].tag)
+                        {
+                            prbEncGpuRegImm(pGpu, pRecord->data[i].offset, pRecord->data[i].value, pPrbEnc, RC_RCDIAGRECORD_REGS);
+                        }
+                    }
+                }
+                NV_CHECK_OK(nvStatus, LEVEL_ERROR, prbEncNestedEnd(pPrbEnc));
+            }
+            break;
+        }
         case RmPrbErrorInfo_V2:
         case RmPrbFullDump_V2:
         {
diff --git a/src/nvidia/src/kernel/gpu/fifo/kernel_channel_group_api.c b/src/nvidia/src/kernel/gpu/fifo/kernel_channel_group_api.c
index a78a782870..18f9240277 100644
--- a/src/nvidia/src/kernel/gpu/fifo/kernel_channel_group_api.c
+++ b/src/nvidia/src/kernel/gpu/fifo/kernel_channel_group_api.c
@@ -232,7 +232,9 @@ kchangrpapiConstruct_IMPL
 
     // vGpu plugin context flag should only be set on host if context is plugin
     if (gpuIsSriovEnabled(pGpu))
+    {
         pKernelChannelGroup->bIsCallingContextVgpuPlugin = pAllocParams->bIsCallingContextVgpuPlugin;
+    }
 
     if (pKernelChannelGroup->bIsCallingContextVgpuPlugin)
         gfid = GPU_GFID_PF;
diff --git a/src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c b/src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c
index 74974f677b..cdf4ad35bd 100644
--- a/src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c
+++ b/src/nvidia/src/kernel/gpu/fifo/kernel_fifo.c
@@ -930,13 +930,26 @@ kfifoChidMgrReleaseChid_IMPL
 
     if (IS_GFID_VF(gfid))
     {
-        NV_ASSERT_OR_RETURN(pChidMgr->ppVirtualChIDHeap[gfid] != NULL, NV_ERR_INVALID_STATE);
-        NV_ASSERT_OK(pChidMgr->ppVirtualChIDHeap[gfid]->eheapFree(pChidMgr->ppVirtualChIDHeap[gfid], ChID));
+        //
+        // ppVirtualChIDHeap is freed during hostvgpudeviceapiDestruct in GSP-RM.
+        // In the case of a GSP-Plugin crash after running the VF doorbell fuzzer, only the hostvgpudeviceapi object is freed in GSP-RM.
+        // Other resources are cleaned up when shutting down the VM.
+        //
+        if (pChidMgr->ppVirtualChIDHeap[gfid] != NULL)
+        {
+            NV_ASSERT_OK(pChidMgr->ppVirtualChIDHeap[gfid]->eheapFree(pChidMgr->ppVirtualChIDHeap[gfid], ChID));
+        }
     }
     else
     {
-        NV_ASSERT_OR_RETURN(pChidMgr->pGlobalChIDHeap != NULL, NV_ERR_INVALID_STATE);
-        NV_ASSERT_OK(pChidMgr->pGlobalChIDHeap->eheapFree(pChidMgr->pGlobalChIDHeap, ChID));
+        if (pChidMgr->pGlobalChIDHeap != NULL)
+        {
+            NV_ASSERT_OK(pChidMgr->pGlobalChIDHeap->eheapFree(pChidMgr->pGlobalChIDHeap, ChID));
+        }
+        else
+        {
+            NV_ASSERT(pChidMgr->pGlobalChIDHeap != NULL);
+        }
     }
 
     NV_ASSERT_OR_RETURN(pChidMgr->pFifoDataHeap != NULL, NV_ERR_INVALID_STATE);
@@ -1322,7 +1335,6 @@ kfifoChidMgrAllocChannelGroupHwID_IMPL
     return NV_OK;
 }
 
-
 /**
  * @brief Releases a hardware channel group ID.
  *
diff --git a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c
index aaeb77847c..3133a93d5e 100644
--- a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c
+++ b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c
@@ -514,6 +514,67 @@ _kgspRpcRCTriggered
                            NV_ERR_INVALID_CHANNEL);
     }
 
+    // Add the RcDiag records we received from GSP-RM to our system wide journal
+    {
+        OBJSYS   *pSys = SYS_GET_INSTANCE();
+        Journal  *pRcDB = SYS_GET_RCDB(pSys);
+        RmClient *pClient;
+
+        NvU32 recordSize = rcdbGetOcaRecordSizeWithHeader(pRcDB, RmRcDiagReport);
+        NvU32 rcDiagRecStart = pRcDB->RcErrRptNextIdx;
+        NvU32 rcDiagRecEnd;
+        NvU32 processId = 0;
+        NvU32 owner = RCDB_RCDIAG_DEFAULT_OWNER;
+
+        if (pKernelChannel != NULL)
+        {
+            pClient = dynamicCast(RES_GET_CLIENT(pKernelChannel), RmClient);
+            NV_ASSERT(pClient != NULL);
+            if (pClient != NULL)
+                processId = pClient->ProcID;
+        }
+
+        for (NvU32 i = 0; i < rpc_params->rcJournalBufferSize / recordSize; i++)
+        {
+            RmRCCommonJournal_RECORD *pCommonRecord =
+                (RmRCCommonJournal_RECORD *)((NvU8*)&rpc_params->rcJournalBuffer + i * recordSize);
+            RmRcDiag_RECORD *pRcDiagRecord =
+                (RmRcDiag_RECORD *)&pCommonRecord[1];
+
+#if defined(DEBUG)
+            NV_PRINTF(LEVEL_INFO, "%d: GPUTag=0x%x CPUTag=0x%llx timestamp=0x%llx stateMask=0x%llx\n",
+                      i, pCommonRecord->GPUTag, pCommonRecord->CPUTag, pCommonRecord->timeStamp,
+                      pCommonRecord->stateMask);
+            NV_PRINTF(LEVEL_INFO, "   idx=%d timeStamp=0x%x type=0x%x flags=0x%x count=%d owner=0x%x processId=0x%x\n",
+                      pRcDiagRecord->idx, pRcDiagRecord->timeStamp, pRcDiagRecord->type, pRcDiagRecord->flags,
+                      pRcDiagRecord->count, pRcDiagRecord->owner, processId);
+            for (NvU32 j = 0; j < pRcDiagRecord->count; j++)
+            {
+                NV_PRINTF(LEVEL_INFO, "     %d: offset=0x08%x tag=0x08%x value=0x08%x attribute=0x08%x\n",
+                          j, pRcDiagRecord->data[j].offset, pRcDiagRecord->data[j].tag,
+                          pRcDiagRecord->data[j].value, pRcDiagRecord->data[j].attribute);
+            }
+#endif
+            if (rcdbAddRcDiagRecFromGsp(pGpu, pRcDB, pCommonRecord, pRcDiagRecord) == NULL)
+            {
+                NV_PRINTF(LEVEL_WARNING, "Lost RC diagnostic record coming from GPU%d GSP: type=0x%x stateMask=0x%llx\n",
+                          gpuGetInstance(pGpu), pRcDiagRecord->type, pCommonRecord->stateMask);
+            }
+        }
+
+        rcDiagRecEnd = pRcDB->RcErrRptNextIdx - 1;
+
+        // Update records to have the correct PID associated with the channel
+        if (rcDiagRecStart != rcDiagRecEnd)
+        {
+            rcdbUpdateRcDiagRecContext(pRcDB,
+                                       rcDiagRecStart,
+                                       rcDiagRecEnd,
+                                       processId,
+                                       owner);
+        }
+    }
+
     // With CC enabled, CPU-RM needs to write error notifiers
     if (gpuIsCCFeatureEnabled(pGpu) && pKernelChannel != NULL)
     {
diff --git a/src/nvidia/src/kernel/gpu/intr/intr.c b/src/nvidia/src/kernel/gpu/intr/intr.c
index 6f9709659c..29aaf2337a 100644
--- a/src/nvidia/src/kernel/gpu/intr/intr.c
+++ b/src/nvidia/src/kernel/gpu/intr/intr.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -55,6 +55,7 @@ static struct
 } stuckIntr[MC_ENGINE_IDX_MAX];
 
 static NvBool _intrServiceStallExactList(OBJGPU *pGpu, Intr *pIntr, MC_ENGINE_BITVECTOR *pEngines);
+static void _intrLogLongRunningInterrupts(Intr *pIntr);
 static void _intrInitServiceTable(OBJGPU *pGpu, Intr *pIntr);
 
 
@@ -141,6 +142,8 @@ intrServiceStall_IMPL(OBJGPU *pGpu, Intr *pIntr)
         intrProcessDPCQueue_HAL(pGpu, pIntr);
     }
 
+    _intrLogLongRunningInterrupts(pIntr);
+
 exit:
     return;
 }
@@ -1067,6 +1070,7 @@ NvU32 intrServiceInterruptRecords_IMPL
     IntrService *pIntrService = pIntr->intrServiceTable[engineIdx].pInterruptService;
     NvU32 ret = 0;
     NvBool bShouldService;
+    NvU64 intrTiming, intrTiming2;
     IntrServiceClearInterruptArguments clearParams = {engineIdx};
     IntrServiceServiceInterruptArguments serviceParams = {engineIdx};
 
@@ -1088,7 +1092,18 @@ NvU32 intrServiceInterruptRecords_IMPL
 
     if (bShouldService)
     {
+        osGetPerformanceCounter(&intrTiming);
+
         ret = intrservServiceInterrupt(pGpu, pIntrService, &serviceParams);
+
+        osGetPerformanceCounter(&intrTiming2);
+        intrTiming = intrTiming2 - intrTiming;
+        if (intrTiming > LONG_INTR_LOG_LENGTH_NS)
+        {
+            pIntr->longIntrStats[engineIdx].intrCount++;
+            if (intrTiming > pIntr->longIntrStats[engineIdx].intrLength)
+                pIntr->longIntrStats[engineIdx].intrLength = intrTiming;
+        }
     }
     return ret;
 }
@@ -1401,6 +1416,29 @@ _intrExitCriticalSection
     }
 }
 
+static void
+_intrLogLongRunningInterrupts(Intr *pIntr)
+{
+    NvU64 now;
+    osGetPerformanceCounter(&now);
+
+    for (NvU32 i = 0; i < MC_ENGINE_IDX_MAX; ++i)
+    {
+        if (pIntr->longIntrStats[i].intrCount > 0)
+        {
+            if (now - pIntr->longIntrStats[i].lastPrintTime > LONG_INTR_LOG_RATELIMIT_NS)
+            {
+                NV_PRINTF(LEVEL_WARNING, "%u long-running interrupts (%llu ns or slower) from engine %u, longest taking %llu ns\n",
+                          pIntr->longIntrStats[i].intrCount, LONG_INTR_LOG_LENGTH_NS, i, pIntr->longIntrStats[i].intrLength);
+
+                pIntr->longIntrStats[i].intrCount = 0;
+                pIntr->longIntrStats[i].intrLength = 0;
+                pIntr->longIntrStats[i].lastPrintTime = now;
+            }
+        }
+    }
+}
+
 static NvBool
 _intrServiceStallExactList
 (
@@ -1608,6 +1646,9 @@ intrServiceStallList_IMPL
     // allow the isr to come in.
     _intrExitCriticalSection(pGpu, pIntr, &intrMaskCtx);
 
+    // Delay prints until after exiting critical sections to save perf impact
+    _intrLogLongRunningInterrupts(pIntr);
+
     NV_ASSERT_OK(resservRestoreTlsCallContext(pOldContext));
 }
 
diff --git a/src/nvidia/src/kernel/platform/chipset/chipset_info.c b/src/nvidia/src/kernel/platform/chipset/chipset_info.c
index 31e5601b5e..6984a6a590 100644
--- a/src/nvidia/src/kernel/platform/chipset/chipset_info.c
+++ b/src/nvidia/src/kernel/platform/chipset/chipset_info.c
@@ -903,6 +903,17 @@ Intel_7A04_setupFunc
     return NV_OK;
 }
 
+static NV_STATUS
+Intel_1B81_setupFunc
+(
+    OBJCL *pCl
+)
+{
+    pCl->setProperty(pCl, PDB_PROP_CL_RELAXED_ORDERING_NOT_CAPABLE, NV_TRUE);
+
+    return NV_OK;
+}
+
 static NV_STATUS
 Nvidia_T210_setupFunc
 (
diff --git a/src/nvidia/src/kernel/platform/chipset/chipset_pcie.c b/src/nvidia/src/kernel/platform/chipset/chipset_pcie.c
index f27711aeab..50c3174654 100644
--- a/src/nvidia/src/kernel/platform/chipset/chipset_pcie.c
+++ b/src/nvidia/src/kernel/platform/chipset/chipset_pcie.c
@@ -72,7 +72,7 @@ static void      objClGpuUnmapRootPort(OBJGPU *);
 static void      objClGpuMapEnhCfgSpace(OBJGPU *, OBJCL *);
 static void      objClGpuUnmapEnhCfgSpace(OBJGPU *);
 static NV_STATUS objClGpuIs3DController(OBJGPU *);
-static void      objClLoadPcieVirtualP2PApproval(OBJGPU *);
+static void      objClLoadPcieVirtualP2PApproval(OBJGPU *, OBJCL *);
 static void      objClCheckForExternalGpu(OBJGPU *, OBJCL *);
 static void      _objClAdjustTcVcMap(OBJGPU *, OBJCL *, PORTDATA *);
 static void      _objClGetDownstreamAtomicsEnabledMask(void  *, NvU32, NvU32 *);
@@ -951,7 +951,7 @@ clUpdatePcieConfig_IMPL(OBJGPU *pGpu, OBJCL *pCl)
     }
 
     // Load PCI Express virtual P2P approval config
-    objClLoadPcieVirtualP2PApproval(pGpu);
+    objClLoadPcieVirtualP2PApproval(pGpu, pCl);
 
     //
     // Disable NOSNOOP bit for Passthrough.
@@ -4259,12 +4259,13 @@ clFreePcieConfigSpaceBase_IMPL(OBJCL *pCl)
 // other.
 //
 static void
-objClLoadPcieVirtualP2PApproval(OBJGPU *pGpu)
+objClLoadPcieVirtualP2PApproval(OBJGPU *pGpu, OBJCL *pCl)
 {
     void *handle;
     NvU32 data32;
     NvU8  version;
     NvU8  cap;
+    NvU8  rlxdOrderingCfg = 0;
     NvU8  bus = gpuGetBus(pGpu);
     NvU8  device = gpuGetDevice(pGpu);
     NvU32 domain = gpuGetDomain(pGpu);
@@ -4324,6 +4325,19 @@ objClLoadPcieVirtualP2PApproval(OBJGPU *pGpu)
                                             _PEER_CLIQUE_ID, data32);
     pGpu->pciePeerClique.bValid = NV_TRUE;
 
+    rlxdOrderingCfg = (NvU8)DRF_VAL(_PCI, _VIRTUAL_P2P_APPROVAL_CAP_1,
+                                    _RELAXED_ORDERING, data32);
+
+    if (rlxdOrderingCfg == NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RELAXED_ORDERING_DISABLE)
+    {
+        // Unset relaxed ordering based on hypervisor's request
+        pCl->setProperty(pCl, PDB_PROP_CL_RELAXED_ORDERING_NOT_CAPABLE, NV_TRUE);
+
+        NV_PRINTF(LEVEL_INFO,
+                  "Hypervisor has disabled relaxed ordering on GPU%u\n",
+                  gpuGetInstance(pGpu));
+    }
+
     NV_PRINTF(LEVEL_INFO,
               "Hypervisor has assigned GPU%u to peer clique %u\n",
               gpuGetInstance(pGpu), pGpu->pciePeerClique.id);
diff --git a/src/nvidia/src/kernel/rmapi/alloc_free.c b/src/nvidia/src/kernel/rmapi/alloc_free.c
index b6981987f8..1c1f8a2b86 100644
--- a/src/nvidia/src/kernel/rmapi/alloc_free.c
+++ b/src/nvidia/src/kernel/rmapi/alloc_free.c
@@ -328,6 +328,36 @@ serverTopLock_Epilogue
     }
 }
 
+static NvU32
+_resGetBackRefGpusMask(RsResourceRef *pResourceRef)
+{
+    NvU32 gpuMask = 0x0;
+    RS_INTER_MAPPING_BACK_REF *pBackRefItem;
+
+    if (pResourceRef == NULL)
+    {
+        return 0x0;
+    }
+
+    pBackRefItem = listHead(&pResourceRef->interBackRefs);
+    while (pBackRefItem != NULL)
+    {
+        RsInterMapping *pMapping = pBackRefItem->pMapping;
+        RsResourceRef *pDeviceRef = pMapping->pContextRef;
+        GpuResource *pGpuResource = dynamicCast(pDeviceRef->pResource, GpuResource);
+
+        if (pGpuResource != NULL)
+        {
+            OBJGPU *pGpu = GPU_RES_GET_GPU(pGpuResource);
+            gpuMask |= gpumgrGetGpuMask(pGpu);
+        }
+
+        pBackRefItem = listNext(&pResourceRef->interBackRefs, pBackRefItem);
+    }
+
+    return gpuMask;
+}
+
 NV_STATUS
 serverResLock_Prologue
 (
@@ -445,8 +475,15 @@ serverResLock_Prologue
         }
         else
         {
-            status = rmGpuGroupLockAcquire(pParentGpu->gpuInstance,
-                                           GPU_LOCK_GRP_DEVICE,
+            //
+            // Lock the parent GPU and if specified any GPUs that resource
+            // may backreference via mappings.
+            //
+            pLockInfo->gpuMask = gpumgrGetGpuMask(pParentGpu) |
+                                 _resGetBackRefGpusMask(pLockInfo->pResRefToBackRef);
+
+            status = rmGpuGroupLockAcquire(0,
+                                           GPU_LOCK_GRP_MASK,
                                            GPUS_LOCK_FLAGS_NONE,
                                            RM_LOCK_MODULES_CLIENT,
                                            &pLockInfo->gpuMask);
diff --git a/src/nvidia/src/libraries/nvport/string/string_generic.c b/src/nvidia/src/libraries/nvport/string/string_generic.c
index c576ea81cc..7eb573decd 100644
--- a/src/nvidia/src/libraries/nvport/string/string_generic.c
+++ b/src/nvidia/src/libraries/nvport/string/string_generic.c
@@ -29,6 +29,7 @@
 #include "nvport/nvport.h"
 #include "nvmisc.h"
 
+
 #ifndef NVPORT_STRING_DONT_DEFINE_portStringLength
 NvLength
 portStringLength
@@ -75,18 +76,34 @@ portStringCompare
     NvLength maxLength
 )
 {
-    NvLength length;
+    NvLength i;
 
     PORT_ASSERT_CHECKED(str1 != NULL);
     PORT_ASSERT_CHECKED(str2 != NULL);
 
-    length = portStringLengthSafe(str1, maxLength);
+    for (i = 0; i < maxLength; i++)
+    {
+        if (str1[i] != str2[i])
+        {
+            //
+            // Cast to unsigned before assigning to NvS32, to avoid sign
+            // extension.  E.g., if str1[i] is 0xff, we want s1 to contain
+            // 0xff, not -1.  In practice, this shouldn't matter for printable
+            // characters, but still...
+            //
+            NvS32 s1 = (unsigned char)str1[i];
+            NvS32 s2 = (unsigned char)str2[i];
+            return s1 - s2;
+        }
 
-    // Add 1 for the null terminator.
-    if (length < maxLength)
-        length++;
+        if ((str1[i] == '\0') &&
+            (str2[i] == '\0'))
+        {
+            break;
+        }
+    }
 
-    return  portMemCmp(str1, str2, length);
+    return 0;
 }
 #endif
 
diff --git a/src/nvidia/src/libraries/resserv/src/rs_server.c b/src/nvidia/src/libraries/resserv/src/rs_server.c
index 99e7bc7e19..0328b2e329 100644
--- a/src/nvidia/src/libraries/resserv/src/rs_server.c
+++ b/src/nvidia/src/libraries/resserv/src/rs_server.c
@@ -145,6 +145,7 @@ NV_STATUS serverFreeResourceTreeUnderLock(RsServer *pServer, RS_RES_FREE_PARAMS
         return status;
 
     pLockInfo->flags |= RS_LOCK_FLAGS_FREE_SESSION_LOCK;
+    pLockInfo->pResRefToBackRef = pResourceRef;
     pLockInfo->traceOp = RS_LOCK_TRACE_FREE;
     pLockInfo->traceClassId = pResourceRef->externalClassId;
     status = serverResLock_Prologue(pServer, LOCK_ACCESS_WRITE, pLockInfo, &releaseFlags);
diff --git a/version.mk b/version.mk
index f6bc46ebef..6c8b3cc271 100644
--- a/version.mk
+++ b/version.mk
@@ -1,4 +1,4 @@
-NVIDIA_VERSION = 535.216.03
+NVIDIA_VERSION = 535.230.02
 
 # This file.
 VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))