diff --git a/src/dxvk/dxvk_buffer.cpp b/src/dxvk/dxvk_buffer.cpp
index 790081c0b..0098a3c2e 100644
--- a/src/dxvk/dxvk_buffer.cpp
+++ b/src/dxvk/dxvk_buffer.cpp
@@ -128,12 +128,6 @@ namespace dxvk {
         "\n  usage: ", info.usage));
     }
 
-    VkMemoryAllocateFlags memoryAllocateFlags = 0;
-
-    if (info.usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
-      memoryAllocateFlags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
-    }
-    
     VkMemoryDedicatedRequirements dedicatedRequirements;
     dedicatedRequirements.sType                       = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS;
     dedicatedRequirements.pNext                       = VK_NULL_HANDLE;
@@ -177,11 +171,15 @@ namespace dxvk {
     bool isGpuWritable = (m_info.access & (
       VK_ACCESS_SHADER_WRITE_BIT |
       VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT)) != 0;
-    float priority = isGpuWritable ? 1.0f : 0.5f;
     
+    DxvkMemoryFlags hints(DxvkMemoryFlag::GpuReadable);
+
+    if (isGpuWritable)
+      hints.set(DxvkMemoryFlag::GpuWritable);
+
     // Ask driver whether we should be using a dedicated allocation
     handle.memory = m_memAlloc->alloc(&memReq.memoryRequirements,
-      dedicatedRequirements, dedMemoryAllocInfo, m_memFlags, memoryAllocateFlags, priority, category);
+      dedicatedRequirements, dedMemoryAllocInfo, m_memFlags, hints, category);
     
     if (vkd->vkBindBufferMemory(vkd->device(), handle.buffer,
         handle.memory.memory(), handle.memory.offset()) != VK_SUCCESS)
diff --git a/src/dxvk/dxvk_image.cpp b/src/dxvk/dxvk_image.cpp
index ff13e1956..06fd3b99a 100644
--- a/src/dxvk/dxvk_image.cpp
+++ b/src/dxvk/dxvk_image.cpp
@@ -137,7 +137,7 @@ namespace dxvk {
 
     m_vkd->vkGetImageMemoryRequirements2(
       m_vkd->device(), &memReqInfo, &memReq);
- 
+
     if (info.tiling != VK_IMAGE_TILING_LINEAR && !dedicatedRequirements.prefersDedicatedAllocation) {
       memReq.memoryRequirements.size      = align(memReq.memoryRequirements.size,       memAlloc.bufferImageGranularity());
       memReq.memoryRequirements.alignment = align(memReq.memoryRequirements.alignment , memAlloc.bufferImageGranularity());
@@ -151,8 +151,11 @@ namespace dxvk {
       VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
       VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) != 0;
     
-    float priority = isGpuWritable ? 1.0f : 0.5f;
+    DxvkMemoryFlags hints(DxvkMemoryFlag::GpuReadable);
 
+    if (isGpuWritable)
+      hints.set(DxvkMemoryFlag::GpuWritable);
+    
     if (m_shared) {
       dedicatedRequirements.prefersDedicatedAllocation  = VK_TRUE;
       dedicatedRequirements.requiresDedicatedAllocation = VK_TRUE;
@@ -160,7 +163,7 @@ namespace dxvk {
 
     // Ask driver whether we should be using a dedicated allocation
     m_image.memory = memAlloc.alloc(&memReq.memoryRequirements,
-      dedicatedRequirements, dedMemoryAllocInfo, memFlags, 0, priority, category);
+      dedicatedRequirements, dedMemoryAllocInfo, memFlags, hints, category);
     
     // Try to bind the allocated memory slice to the image
     if (m_vkd->vkBindImageMemory(m_vkd->device(), m_image.image,
diff --git a/src/dxvk/dxvk_memory.cpp b/src/dxvk/dxvk_memory.cpp
index fd4a2d010..8513fc4b3 100644
--- a/src/dxvk/dxvk_memory.cpp
+++ b/src/dxvk/dxvk_memory.cpp
@@ -19,6 +19,9 @@
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */
+
+#include <algorithm>
+
 #include "dxvk_device.h"
 #include "dxvk_memory.h"
 
@@ -221,8 +224,9 @@ DxvkMemory::DxvkMemory() { }
   DxvkMemoryChunk::DxvkMemoryChunk(
           DxvkMemoryAllocator*  alloc,
           DxvkMemoryType*       type,
-          DxvkDeviceMemory      memory)
-  : m_alloc(alloc), m_type(type), m_memory(memory) {
+          DxvkDeviceMemory      memory,
+          DxvkMemoryFlags       hints)
+  : m_alloc(alloc), m_type(type), m_memory(memory), m_hints(hints) {
     // Mark the entire chunk as free
     m_freeList.push_back(FreeSlice { 0, memory.memSize });
   }
@@ -236,17 +240,14 @@ DxvkMemory::DxvkMemory() { }
   
   
   DxvkMemory DxvkMemoryChunk::alloc(
-          VkMemoryPropertyFlags    propertyFlags,
-          VkMemoryAllocateFlags    allocateFlags,
+          VkMemoryPropertyFlags    flags,
           VkDeviceSize             size,
           VkDeviceSize             align,
-          float                    priority,
+          DxvkMemoryFlags          hints,
           DxvkMemoryStats::Category category) {
-    // Property and allocate flags must be compatible. This could
+    // Property flags must be compatible. This could
     // be refined a bit in the future if necessary.
-    if (m_memory.memPropertyFlags != propertyFlags
-     || m_memory.memAllocateFlags != allocateFlags
-     || m_memory.priority != priority)
+    if (m_memory.memFlags != flags || !checkHints(hints))
       return DxvkMemory();
     
     // If the chunk is full, return
@@ -322,20 +323,30 @@ DxvkMemory::DxvkMemory() { }
     m_freeList.push_back({ offset, length });
   }
   
-  // NV-DXVK start: Free unused memory
-  bool DxvkMemoryChunk::isWholeChunkFree() const {
-    if (m_freeList.size() != 1)
-      return false;
+  
+  bool DxvkMemoryChunk::isEmpty() const {
+    return m_freeList.size() == 1
+        && m_freeList[0].length == m_memory.memSize;
+  }
+
+
+  bool DxvkMemoryChunk::isCompatible(const Rc<DxvkMemoryChunk>& other) const {
+    return other->m_memory.memFlags == m_memory.memFlags && other->m_hints == m_hints;
+  }
 
-    if (m_freeList[0].offset != 0)
-      return false;
 
-    if (m_freeList[0].length != m_memory.memSize)
-      return false;
+  bool DxvkMemoryChunk::checkHints(DxvkMemoryFlags hints) const {
+    DxvkMemoryFlags mask(
+      DxvkMemoryFlag::Small,
+      DxvkMemoryFlag::GpuReadable,
+      DxvkMemoryFlag::GpuWritable);
 
-    return true;
+    if (hints.test(DxvkMemoryFlag::IgnoreConstraints))
+      mask = DxvkMemoryFlags();
+
+    return (m_hints & mask) == (hints & mask);
   }
-  // NV-DXVK end
+
 
   DxvkMemoryAllocator::DxvkMemoryAllocator(const DxvkDevice* device)
   : m_vkd             (device->vkd()),
@@ -358,7 +369,6 @@ DxvkMemory::DxvkMemory() { }
       m_memTypes[i].heapId     = m_memProps.memoryTypes[i].heapIndex;
       m_memTypes[i].memType    = m_memProps.memoryTypes[i];
       m_memTypes[i].memTypeId  = i;
-      m_memTypes[i].chunkSize  = pickChunkSize(i);
     }
 
     /* Work around an issue on Nvidia drivers where using the entire
@@ -373,10 +383,8 @@ DxvkMemory::DxvkMemory() { }
         for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) {
           VkMemoryPropertyFlags flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
 
-          if ((m_memTypes[i].memType.propertyFlags & flags) == flags) {
+          if ((m_memTypes[i].memType.propertyFlags & flags) == flags)
             m_memTypes[i].heap->budget = 32 << 20;
-            m_memTypes[i].chunkSize    =  1 << 20;
-          }
         }
       }
     }
@@ -392,9 +400,8 @@ DxvkMemory::DxvkMemory() { }
     const VkMemoryRequirements*             req,
     const VkMemoryDedicatedRequirements&    dedAllocReq,
     const VkMemoryDedicatedAllocateInfo&    dedAllocInfo,
-          VkMemoryPropertyFlags             propertyFlags,
-          VkMemoryAllocateFlags             allocateFlags,
-          float                             priority,
+          VkMemoryPropertyFlags             flags,
+          DxvkMemoryFlags                   hints,
           DxvkMemoryStats::Category         category) {
     ScopedCpuProfileZone();
 
@@ -402,26 +409,45 @@ DxvkMemory::DxvkMemory() { }
     // Note: The mutex here in DXVK has been removed in favor of the per-memory type mutex in tryAllocFromType.
     // NV-DXVK end
 
+    // Keep small allocations together to avoid fragmenting
+    // chunks for larger resources with lots of small gaps,
+    // as well as resources with potentially weird lifetimes
+    if (req->size <= SmallAllocationThreshold) {
+      hints.set(DxvkMemoryFlag::Small);
+      hints.clr(DxvkMemoryFlag::GpuWritable, DxvkMemoryFlag::GpuReadable);
+    }
+
+    // Ignore all hints for host-visible allocations since they
+    // usually don't make much sense for those resources
+    if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+      hints = DxvkMemoryFlags();
+
     // Try to allocate from a memory type which supports the given flags exactly
     auto dedAllocPtr = dedAllocReq.prefersDedicatedAllocation ? &dedAllocInfo : nullptr;
-    DxvkMemory result = this->tryAlloc(req, dedAllocPtr, propertyFlags, allocateFlags, priority, category);
+    DxvkMemory result = this->tryAlloc(req, dedAllocPtr, flags, hints, category);
 
     // If the first attempt failed, try ignoring the dedicated allocation
     if (!result && dedAllocPtr && !dedAllocReq.requiresDedicatedAllocation) {
-      result = this->tryAlloc(req, nullptr, propertyFlags, allocateFlags, priority, category);
+      result = this->tryAlloc(req, nullptr, flags, hints, category);
       dedAllocPtr = nullptr;
     }
 
+    // Retry without the hint constraints
+    if (!result) {
+      hints.set(DxvkMemoryFlag::IgnoreConstraints);
+      result = this->tryAlloc(req, nullptr, flags, hints, category);
+    }
+
     // If that still didn't work, probe slower memory types as well
     VkMemoryPropertyFlags optFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
                                    | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
     VkMemoryPropertyFlags remFlags = 0;
     
-    while (!result && (propertyFlags & optFlags)) {
+    while (!result && (flags & optFlags)) {
       remFlags |= optFlags & (0 - optFlags); // Note: 0 - x is a more well defined version of -x for unsigned values
       optFlags &= ~remFlags;
 
-      result = this->tryAlloc(req, dedAllocPtr, propertyFlags & ~remFlags, allocateFlags, priority, category);
+      result = this->tryAlloc(req, dedAllocPtr, flags & ~remFlags, hints, category);
     }
     
     if (!result) {
@@ -431,8 +457,7 @@ DxvkMemory::DxvkMemory() { }
         "DxvkMemoryAllocator: Memory allocation failed",
         "\n  Size:      ", req->size,
         "\n  Alignment: ", req->alignment,
-        "\n  Mem property flags: ", "0x", std::hex, propertyFlags,
-        "\n  Mem allocate flags: ", "0x", std::hex, allocateFlags,
+        "\n  Mem property flags: ", "0x", std::hex, flags,
         "\n  Mem types: ", "0x", std::hex, req->memoryTypeBits));
 
       for (uint32_t i = 0; i < m_memProps.memoryHeapCount; i++) {
@@ -454,36 +479,29 @@ DxvkMemory::DxvkMemory() { }
     return result;
   }
   
-  // NV-DXVK start: Free unused memory
+  //// NV-DXVK start: Free unused memory
   void DxvkMemoryAllocator::freeUnusedChunks() {
-    for (auto& type : m_memTypes) {
-      std::lock_guard<dxvk::mutex> lock(type.mutex);
-
-      const auto new_end_iterator = std::remove_if(type.chunks.begin(), type.chunks.end(), [](const auto& chunk) {
-        return chunk->isWholeChunkFree();
-      });
-
-      type.chunks.erase(new_end_iterator, type.chunks.end());
+    for (auto& heap : m_memHeaps) {
+      freeEmptyChunks(&heap);
     }
   }
-  // NV-DXVK end
+  //// NV-DXVK end
 
   DxvkMemory DxvkMemoryAllocator::tryAlloc(
     const VkMemoryRequirements*             req,
     const VkMemoryDedicatedAllocateInfo*    dedAllocInfo,
-          VkMemoryPropertyFlags             propertyFlags,
-          VkMemoryAllocateFlags             allocateFlags,
-          float                             priority,
+          VkMemoryPropertyFlags             flags,
+          DxvkMemoryFlags                   hints,
           DxvkMemoryStats::Category         category) {
     DxvkMemory result;
 
     for (uint32_t i = 0; i < m_memProps.memoryTypeCount && !result; i++) {
       const bool supported = (req->memoryTypeBits & (1u << i)) != 0;
-      const bool adequate  = (m_memTypes[i].memType.propertyFlags & propertyFlags) == propertyFlags;
+      const bool adequate  = (m_memTypes[i].memType.propertyFlags & flags) == flags;
       
       if (supported && adequate) {
         result = this->tryAllocFromType(&m_memTypes[i],
-                                        propertyFlags, allocateFlags, req->size, req->alignment, priority, dedAllocInfo, category);
+                                        flags, req->size, req->alignment, hints, dedAllocInfo, category);
       }
     }
     
@@ -493,11 +511,10 @@ DxvkMemory::DxvkMemory() { }
   
   DxvkMemory DxvkMemoryAllocator::tryAllocFromType(
           DxvkMemoryType*                   type,
-          VkMemoryPropertyFlags             propertyFlags,
-          VkMemoryAllocateFlags             allocateFlags,
+          VkMemoryPropertyFlags             flags,
           VkDeviceSize                      size,
           VkDeviceSize                      align,
-          float                             priority,
+          DxvkMemoryFlags                   hints,
     const VkMemoryDedicatedAllocateInfo*    dedAllocInfo,
           DxvkMemoryStats::Category         category
           ) {
@@ -506,32 +523,47 @@ DxvkMemory::DxvkMemory() { }
     // NV-DXVK end
 
     // Prevent unnecessary external host memory fragmentation
-    bool isDeviceLocal = (propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0;
-
-    if (!isDeviceLocal)
-      priority = 0.0f;
+    bool isDeviceLocal = (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0;
+  
+    VkDeviceSize chunkSize = pickChunkSize(type->memTypeId, hints);
 
     DxvkMemory memory;
 
-    if (size >= type->chunkSize || dedAllocInfo) {
+    if (size >= chunkSize || dedAllocInfo) {
+      if (this->shouldFreeEmptyChunks(type->heap, size)) {
+        // NV-DXVK start: use a per-memory-type mutex
+        type->mutex.unlock();
+        this->freeEmptyChunks(type->heap);
+        type->mutex.lock();
+        // NV-DXVK end
+      }
+
       DxvkDeviceMemory devMem = this->tryAllocDeviceMemory(
-        type, propertyFlags, allocateFlags, size, priority, dedAllocInfo, category);
+        type, flags, size, hints, dedAllocInfo, category);
 
       if (devMem.memHandle != VK_NULL_HANDLE)
         memory = DxvkMemory(this, nullptr, type, devMem.memHandle, 0, size, devMem.memPointer, category);
     } else {
       for (uint32_t i = 0; i < type->chunks.size() && !memory; i++)
-        memory = type->chunks[i]->alloc(propertyFlags, allocateFlags, size, align, priority, category);
+        memory = type->chunks[i]->alloc(flags, size, align, hints, category);
       
       if (!memory) {
         DxvkDeviceMemory devMem;
-        
-        for (uint32_t i = 0; i < 6 && (type->chunkSize >> i) >= size && !devMem.memHandle; i++)
-          devMem = tryAllocDeviceMemory(type, propertyFlags, allocateFlags, type->chunkSize >> i, priority, nullptr, category);
+
+        if (this->shouldFreeEmptyChunks(type->heap, chunkSize)) {
+          // NV-DXVK start: use a per-memory-type mutex
+          type->mutex.unlock();
+          this->freeEmptyChunks(type->heap);
+          type->mutex.lock();
+          // NV-DXVK end
+        }
+
+        for (uint32_t i = 0; i < 6 && (chunkSize >> i) >= size && !devMem.memHandle; i++)
+          devMem = tryAllocDeviceMemory(type, flags, chunkSize >> i, hints, nullptr, category);
 
         if (devMem.memHandle) {
-          Rc<DxvkMemoryChunk> chunk = new DxvkMemoryChunk(this, type, devMem);
-          memory = chunk->alloc(propertyFlags, allocateFlags, size, align, priority, category);
+          Rc<DxvkMemoryChunk> chunk = new DxvkMemoryChunk(this, type, devMem, hints);
+          memory = chunk->alloc(flags, size, align, hints, category);
 
           type->chunks.push_back(std::move(chunk));
         }
@@ -547,29 +579,34 @@ DxvkMemory::DxvkMemory() { }
   
   DxvkDeviceMemory DxvkMemoryAllocator::tryAllocDeviceMemory(
           DxvkMemoryType*                   type,
-          VkMemoryPropertyFlags             propertyFlags,
-          VkMemoryAllocateFlags             allocateFlags,
+          VkMemoryPropertyFlags             flags,
           VkDeviceSize                      size,
-          float                             priority,
+          DxvkMemoryFlags                   hints,
     const VkMemoryDedicatedAllocateInfo*    dedAllocInfo,
           DxvkMemoryStats::Category         category) {
     ScopedCpuProfileZone();
-    bool useMemoryPriority = (propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
+    bool useMemoryPriority = (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
                           && (m_device->features().extMemoryPriority.memoryPriority);
     
     if (type->heap->budget && type->heap->stats.totalAllocated() + size > type->heap->budget)
       return DxvkDeviceMemory();
 
+    float priority = 0.0f;
+
+    if (hints.test(DxvkMemoryFlag::GpuReadable))
+      priority = 0.5f;
+    if (hints.test(DxvkMemoryFlag::GpuWritable))
+      priority = 1.0f;
+
     DxvkDeviceMemory result;
     result.memSize  = size;
-    result.memPropertyFlags = propertyFlags;
-    result.memAllocateFlags = allocateFlags;
+    result.memFlags = flags;
     result.priority = priority;
 
     VkMemoryAllocateFlagsInfo allocateFlagsInfo;
     allocateFlagsInfo.sType      = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO;
     allocateFlagsInfo.pNext      = dedAllocInfo;
-    allocateFlagsInfo.flags      = allocateFlags;
+    allocateFlagsInfo.flags      = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
     allocateFlagsInfo.deviceMask = 0;
 
     VkMemoryPriorityAllocateInfoEXT prio;
@@ -586,7 +623,7 @@ DxvkMemory::DxvkMemory() { }
     if (m_vkd->vkAllocateMemory(m_vkd->device(), &info, nullptr, &result.memHandle) != VK_SUCCESS)
       return DxvkDeviceMemory();
     
-    if (propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
+    if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
       VkResult status = m_vkd->vkMapMemory(m_vkd->device(), result.memHandle, 0, VK_WHOLE_SIZE, 0, &result.memPointer);
 
       if (status != VK_SUCCESS) {
@@ -631,6 +668,18 @@ DxvkMemory::DxvkMemory() { }
           VkDeviceSize          offset,
           VkDeviceSize          length) {
     chunk->free(offset, length);
+
+    if (chunk->isEmpty()) {
+      Rc<DxvkMemoryChunk> chunkRef = chunk;
+
+      // Free the chunk if we have to, or at least put it at the end of
+      // the list so that chunks that are already in use and cannot be
+      // freed are prioritized for allocations to reduce memory pressure.
+      type->chunks.erase(std::remove(type->chunks.begin(), type->chunks.end(), chunkRef));
+
+      if (!this->shouldFreeChunk(type, chunkRef))
+        type->chunks.push_back(std::move(chunkRef));
+    }
   }
   
 
@@ -643,7 +692,7 @@ DxvkMemory::DxvkMemory() { }
   }
 
 
-  VkDeviceSize DxvkMemoryAllocator::pickChunkSize(uint32_t memTypeId) const {
+  VkDeviceSize DxvkMemoryAllocator::pickChunkSize(uint32_t memTypeId, DxvkMemoryFlags hints) const {
     VkMemoryType type = m_memProps.memoryTypes[memTypeId];
     VkMemoryHeap heap = m_memProps.memoryHeaps[type.heapIndex];
     
@@ -653,11 +702,14 @@ DxvkMemory::DxvkMemory() { }
     VkDeviceSize chunkSize = (isDeviceLocal ? options.deviceLocalMemoryChunkSizeMB : options.otherMemoryChunkSizeMB) << 20;
     // NV-DXVK end
 
-    // Try to waste a bit less system memory in 32-bit
-    // applications due to address space constraints
+    if (hints.test(DxvkMemoryFlag::Small))
+      chunkSize = 16 << 20;
+
+    // Try to waste a bit less system memory especially in
+    // 32-bit applications due to address space constraints
     if (env::is32BitHostPlatform()) {
       if (type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
-        chunkSize = 32 << 20;
+        chunkSize = 16 << 20;
     }
 
     // Reduce the chunk size on small heaps so
@@ -667,5 +719,55 @@ DxvkMemory::DxvkMemory() { }
 
     return chunkSize;
   }
-  
+
+
+  bool DxvkMemoryAllocator::shouldFreeChunk(
+    const DxvkMemoryType*       type,
+    const Rc<DxvkMemoryChunk>&  chunk) const {
+    // Under memory pressure, we should start freeing everything.
+    if (this->shouldFreeEmptyChunks(type->heap, 0))
+      return true;
+
+    // Even if we have enough memory to spare, only keep
+    // one chunk of each type around to save memory.
+    for (const auto& c : type->chunks) {
+      if (c != chunk && c->isEmpty() && c->isCompatible(chunk))
+        return true;
+    }
+
+    return false;
+  }
+
+
+  bool DxvkMemoryAllocator::shouldFreeEmptyChunks(
+    const DxvkMemoryHeap*       heap,
+          VkDeviceSize          allocationSize) const {
+    VkDeviceSize budget = heap->budget;
+
+    if (!budget)
+      budget = (heap->properties.size * 4) / 5;
+
+    return heap->stats.totalAllocated() + allocationSize > budget;
+  }
+
+
+  void DxvkMemoryAllocator::freeEmptyChunks(
+    const DxvkMemoryHeap*       heap) {
+    for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) {
+      DxvkMemoryType* type = &m_memTypes[i];
+      
+      if (type->heap != heap)
+        continue;
+
+      // NV-DXVK start: use a per-memory-type mutex
+      std::lock_guard<dxvk::mutex> lock(type->mutex);
+      // NV-DXVK end
+
+      type->chunks.erase(
+        std::remove_if(type->chunks.begin(), type->chunks.end(),
+          [] (const Rc<DxvkMemoryChunk>& chunk) { return chunk->isEmpty(); }),
+        type->chunks.end());
+    }
+  }
+
 }
diff --git a/src/dxvk/dxvk_memory.h b/src/dxvk/dxvk_memory.h
index 35f555f3d..a74b5b39d 100644
--- a/src/dxvk/dxvk_memory.h
+++ b/src/dxvk/dxvk_memory.h
@@ -125,8 +125,7 @@ namespace dxvk {
     VkDeviceMemory        memHandle        = VK_NULL_HANDLE;
     void*                 memPointer       = nullptr;
     VkDeviceSize          memSize          = 0;
-    VkMemoryPropertyFlags memPropertyFlags = 0;
-    VkMemoryAllocateFlags memAllocateFlags = 0;
+    VkMemoryPropertyFlags memFlags         = 0;
     float                 priority         = 0.0f;
   };
 
@@ -158,8 +157,6 @@ namespace dxvk {
     VkMemoryType      memType;
     uint32_t          memTypeId;
 
-    VkDeviceSize      chunkSize;
-
     std::vector<Rc<DxvkMemoryChunk>> chunks;
 
     // NV-DXVK start: use a per-memory-type mutex rather than an allocator-wide mutex
@@ -259,6 +256,22 @@ namespace dxvk {
     void free();
     
   };
+
+
+  /**
+   * \brief Memory allocation flags
+   *
+   * Used to batch similar allocations into the same
+   * set of chunks, which may help with fragmentation.
+   */
+  enum class DxvkMemoryFlag : uint32_t {
+    Small             = 0,  ///< Small allocation
+    GpuReadable       = 1,  ///< Medium-priority resource
+    GpuWritable       = 2,  ///< High-priority resource
+    IgnoreConstraints = 3,  ///< Ignore most allocation flags
+  };
+
+  using DxvkMemoryFlags = Flags<DxvkMemoryFlag>;
   
   
   /**
@@ -274,7 +287,8 @@ namespace dxvk {
     DxvkMemoryChunk(
             DxvkMemoryAllocator*  alloc,
             DxvkMemoryType*       type,
-            DxvkDeviceMemory      memory);
+            DxvkDeviceMemory      memory,
+            DxvkMemoryFlags       m_hints);
     
     ~DxvkMemoryChunk();
 
@@ -283,19 +297,18 @@ namespace dxvk {
      * 
      * On failure, this returns a slice with
      * \c VK_NULL_HANDLE as the memory handle.
-     * \param [in] flags Requested memory flags
+     * \param [in] flags Requested memory type flags
      * \param [in] size Number of bytes to allocate
      * \param [in] align Required alignment
-     * \param [in] priority Requested priority
+     * \param [in] hints Memory category
      * \returns The allocated memory slice
      */
     DxvkMemory alloc(
-      VkMemoryPropertyFlags propertyFlags,
-      VkMemoryAllocateFlags allocateFlags,
-      VkDeviceSize size,
-      VkDeviceSize align,
-      float priority,
-      DxvkMemoryStats::Category category);
+            VkMemoryPropertyFlags flags,
+            VkDeviceSize          size,
+            VkDeviceSize          align,
+            DxvkMemoryFlags       hints,
+            DxvkMemoryStats::Category category);
     
     /**
      * \brief Frees memory
@@ -309,16 +322,18 @@ namespace dxvk {
     void free(
             VkDeviceSize  offset,
             VkDeviceSize  length);
-    
-    // NV-DXVK start: Free unused memory
+
     /**
-     * \brief Queries if an entire chunk is considered free.
-     * 
-     * Returns true if no allocations exist
-     *   on this chunk.
+     * \brief Checks whether the chunk is being used
+     * \returns \c true if there are no allocations left
      */
-    bool isWholeChunkFree() const;
-    // NV-DXVK end
+    bool isEmpty() const;
+
+    /**
+     * \brief Checks whether hints and flags of another chunk match
+     * \param [in] other The chunk to compare to
+     */
+    bool isCompatible(const Rc<DxvkMemoryChunk>& other) const;
 
   private:
     
@@ -330,8 +345,11 @@ namespace dxvk {
     DxvkMemoryAllocator*  m_alloc;
     DxvkMemoryType*       m_type;
     DxvkDeviceMemory      m_memory;
+    DxvkMemoryFlags       m_hints;
     
     std::vector<FreeSlice> m_freeList;
+
+    bool checkHints(DxvkMemoryFlags hints) const;
     
   };
   
@@ -345,6 +363,8 @@ namespace dxvk {
   class DxvkMemoryAllocator {
     friend class DxvkMemory;
     friend class DxvkMemoryChunk;
+
+    constexpr static VkDeviceSize SmallAllocationThreshold = 256 << 10;
   public:
     
     DxvkMemoryAllocator(const DxvkDevice* device);
@@ -369,16 +389,15 @@ namespace dxvk {
      * \param [in] dedAllocReq Dedicated allocation requirements
      * \param [in] dedAllocInfo Dedicated allocation info
      * \param [in] flags Memory type flags
-     * \param [in] priority Device-local memory priority
+     * \param [in] hints Memory hints
      * \returns Allocated memory slice
      */
     DxvkMemory alloc(
       const VkMemoryRequirements*             req,
       const VkMemoryDedicatedRequirements&    dedAllocReq,
       const VkMemoryDedicatedAllocateInfo&    dedAllocInfo,
-            VkMemoryPropertyFlags             propertyFlags,
-            VkMemoryAllocateFlags             allocateFlags,
-            float                             priority,
+            VkMemoryPropertyFlags             flags,
+            DxvkMemoryFlags                   hints,
             DxvkMemoryStats::Category         category);
 
     /**
@@ -434,31 +453,28 @@ namespace dxvk {
     std::array<DxvkMemoryType, VK_MAX_MEMORY_TYPES> m_memTypes;
 
     DxvkMemory tryAlloc(
-      const VkMemoryRequirements* req,
-      const VkMemoryDedicatedAllocateInfo* dedAllocInfo,
-      VkMemoryPropertyFlags propertyFlags,
-      VkMemoryAllocateFlags allocateFlags,
-      float priority,
-      DxvkMemoryStats::Category category);
+      const VkMemoryRequirements*             req,
+      const VkMemoryDedicatedAllocateInfo*    dedAllocInfo,
+      VkMemoryPropertyFlags                   flags,
+      DxvkMemoryFlags                         hints,
+      DxvkMemoryStats::Category               category);
     
     DxvkMemory tryAllocFromType(
-      DxvkMemoryType* type,
-      VkMemoryPropertyFlags propertyFlags,
-      VkMemoryAllocateFlags allocateFlags,
-      VkDeviceSize size,
-      VkDeviceSize align,
-      float priority,
-      const VkMemoryDedicatedAllocateInfo* dedAllocInfo,
-      DxvkMemoryStats::Category category);
+      DxvkMemoryType*                         type,
+      VkMemoryPropertyFlags                   flags,
+      VkDeviceSize                            size,
+      VkDeviceSize                            align,
+      DxvkMemoryFlags                         hints,
+      const VkMemoryDedicatedAllocateInfo*    dedAllocInfo,
+      DxvkMemoryStats::Category               category);
     
     DxvkDeviceMemory tryAllocDeviceMemory(
-      DxvkMemoryType* type,
-      VkMemoryPropertyFlags propertyFlags,
-      VkMemoryAllocateFlags allocateFlags,
-      VkDeviceSize size,
-      float priority,
-      const VkMemoryDedicatedAllocateInfo* dedAllocInfo,
-      DxvkMemoryStats::Category category);
+      DxvkMemoryType*                         type,
+      VkMemoryPropertyFlags                   flags,
+      VkDeviceSize                            size,
+      DxvkMemoryFlags                         hints,
+      const VkMemoryDedicatedAllocateInfo*    dedAllocInfo,
+      DxvkMemoryStats::Category               category);
     
     void free(
       const DxvkMemory&           memory);
@@ -474,7 +490,19 @@ namespace dxvk {
             DxvkDeviceMemory      memory);
     
     VkDeviceSize pickChunkSize(
-            uint32_t              memTypeId) const;
+            uint32_t              memTypeId,
+            DxvkMemoryFlags       hints) const;
+
+    bool shouldFreeChunk(
+      const DxvkMemoryType*       type,
+      const Rc<DxvkMemoryChunk>&  chunk) const;
+
+    bool shouldFreeEmptyChunks(
+      const DxvkMemoryHeap*       heap,
+            VkDeviceSize          allocationSize) const;
+
+    void freeEmptyChunks(
+      const DxvkMemoryHeap*       heap);
 
   };
   
diff --git a/src/dxvk/hud/dxvk_hud_item.cpp b/src/dxvk/hud/dxvk_hud_item.cpp
index 1873a9728..007fbe86f 100644
--- a/src/dxvk/hud/dxvk_hud_item.cpp
+++ b/src/dxvk/hud/dxvk_hud_item.cpp
@@ -570,8 +570,8 @@ namespace dxvk::hud {
       bool isDeviceLocal = m_memory.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
 
       VkDeviceSize memSizeMib = m_memory.memoryHeaps[i].size >> 20;
-      VkDeviceSize memAllocatedMib = m_heaps[i].totalAllocated() >> 20;
       VkDeviceSize memUsedMib = m_heaps[i].totalUsed() >> 20;
+      VkDeviceSize memAllocatedMib = m_heaps[i].totalAllocated() >> 20;
       uint64_t percentage = (100 * memUsedMib) / memSizeMib;
 
       std::string label = str::format(isDeviceLocal ? "Vidmem" : "Sysmem", " heap ", i, ":");