From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gentoo-commits+bounces-1132374-garchives=archives.gentoo.org@lists.gentoo.org>
Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by finch.gentoo.org (Postfix) with ESMTPS id 21738138334
	for <garchives@archives.gentoo.org>; Sat, 21 Dec 2019 15:01:49 +0000 (UTC)
Received: from pigeon.gentoo.org (localhost [127.0.0.1])
	by pigeon.gentoo.org (Postfix) with SMTP id 34BD9E08DA;
	Sat, 21 Dec 2019 15:01:48 +0000 (UTC)
Received: from smtp.gentoo.org (smtp.gentoo.org [140.211.166.183])
	(using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits))
	(No client certificate requested)
	by pigeon.gentoo.org (Postfix) with ESMTPS id 0AD40E08DA
	for <gentoo-commits@lists.gentoo.org>; Sat, 21 Dec 2019 15:01:47 +0000 (UTC)
Received: from oystercatcher.gentoo.org (unknown [IPv6:2a01:4f8:202:4333:225:90ff:fed9:fc84])
	(using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits))
	(No client certificate requested)
	by smtp.gentoo.org (Postfix) with ESMTPS id 8716334D1FA
	for <gentoo-commits@lists.gentoo.org>; Sat, 21 Dec 2019 15:01:46 +0000 (UTC)
Received: from localhost.localdomain (localhost [IPv6:::1])
	by oystercatcher.gentoo.org (Postfix) with ESMTP id 3F8C630C
	for <gentoo-commits@lists.gentoo.org>; Sat, 21 Dec 2019 15:01:45 +0000 (UTC)
From: "Mike Pagano" <mpagano@gentoo.org>
To: gentoo-commits@lists.gentoo.org
Content-Transfer-Encoding: 8bit
Content-type: text/plain; charset=UTF-8
Reply-To: gentoo-dev@lists.gentoo.org, "Mike Pagano" <mpagano@gentoo.org>
Message-ID: <1576940468.da4daf3c5bacf24958c192713efad2e8db912648.mpagano@gentoo>
Subject: [gentoo-commits] proj/linux-patches:5.4 commit in: /
X-VCS-Repository: proj/linux-patches
X-VCS-Files: 0000_README 1005_linux-5.4.6.patch
X-VCS-Directories: /
X-VCS-Committer: mpagano
X-VCS-Committer-Name: Mike Pagano
X-VCS-Revision: da4daf3c5bacf24958c192713efad2e8db912648
X-VCS-Branch: 5.4
Date: Sat, 21 Dec 2019 15:01:45 +0000 (UTC)
Precedence: bulk
List-Post: <mailto:gentoo-commits@lists.gentoo.org>
List-Help: <mailto:gentoo-commits+help@lists.gentoo.org>
List-Unsubscribe: <mailto:gentoo-commits+unsubscribe@lists.gentoo.org>
List-Subscribe: <mailto:gentoo-commits+subscribe@lists.gentoo.org>
List-Id: Gentoo Linux mail <gentoo-commits.gentoo.org>
X-BeenThere: gentoo-commits@lists.gentoo.org
X-Auto-Response-Suppress: DR, RN, NRN, OOF, AutoReply
X-Archives-Salt: a36be0be-244f-4e3d-9515-fee980f8bed4
X-Archives-Hash: ffdd6e8934ea1dff4002c84bc16c01ec

commit:     da4daf3c5bacf24958c192713efad2e8db912648
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Sat Dec 21 15:01:08 2019 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Sat Dec 21 15:01:08 2019 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=da4daf3c

Linux patch 5.4.6

Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>

 0000_README            |    4 +
 1005_linux-5.4.6.patch | 3420 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 3424 insertions(+)
diff --git a/0000_README b/0000_README
index 7a19811..223c519 100644
--- a/0000_README
+++ b/0000_README
@@ -63,6 +63,10 @@ Patch:  1004_linux-5.4.5.patch
 From:   http://www.kernel.org
 Desc:   Linux 5.4.5
 
+Patch:  1005_linux-5.4.6.patch
+From:   http://www.kernel.org
+Desc:   Linux 5.4.6
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.

diff --git a/1005_linux-5.4.6.patch b/1005_linux-5.4.6.patch
new file mode 100644
index 0000000..d35757d
--- /dev/null
+++ b/1005_linux-5.4.6.patch
@@ -0,0 +1,3420 @@
+diff --git a/Makefile b/Makefile
+index 0f6e72d5e4f1..20ec7c20279e 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 5
+ PATCHLEVEL = 4
+-SUBLEVEL = 5
++SUBLEVEL = 6
+ EXTRAVERSION =
+ NAME = Kleptomaniac Octopus
+ 
+diff --git a/arch/arm/boot/dts/s3c6410-mini6410.dts b/arch/arm/boot/dts/s3c6410-mini6410.dts
+index 0e159c884f97..1aeac33b0d34 100644
+--- a/arch/arm/boot/dts/s3c6410-mini6410.dts
++++ b/arch/arm/boot/dts/s3c6410-mini6410.dts
+@@ -165,6 +165,10 @@
+ 	};
+ };
+ 
++&clocks {
++	clocks = <&fin_pll>;
++};
++
+ &sdhci0 {
+ 	pinctrl-names = "default";
+ 	pinctrl-0 = <&sd0_clk>, <&sd0_cmd>, <&sd0_cd>, <&sd0_bus4>;
+diff --git a/arch/arm/boot/dts/s3c6410-smdk6410.dts b/arch/arm/boot/dts/s3c6410-smdk6410.dts
+index a9a5689dc462..3bf6c450a26e 100644
+--- a/arch/arm/boot/dts/s3c6410-smdk6410.dts
++++ b/arch/arm/boot/dts/s3c6410-smdk6410.dts
+@@ -69,6 +69,10 @@
+ 	};
+ };
+ 
++&clocks {
++	clocks = <&fin_pll>;
++};
++
+ &sdhci0 {
+ 	pinctrl-names = "default";
+ 	pinctrl-0 = <&sd0_clk>, <&sd0_cmd>, <&sd0_cd>, <&sd0_bus4>;
+diff --git a/arch/arm/mach-tegra/reset-handler.S b/arch/arm/mach-tegra/reset-handler.S
+index 67b763fea005..e3f34815c9da 100644
+--- a/arch/arm/mach-tegra/reset-handler.S
++++ b/arch/arm/mach-tegra/reset-handler.S
+@@ -44,16 +44,16 @@ ENTRY(tegra_resume)
+ 	cmp	r6, #TEGRA20
+ 	beq	1f				@ Yes
+ 	/* Clear the flow controller flags for this CPU. */
+-	cpu_to_csr_reg r1, r0
++	cpu_to_csr_reg r3, r0
+ 	mov32	r2, TEGRA_FLOW_CTRL_BASE
+-	ldr	r1, [r2, r1]
++	ldr	r1, [r2, r3]
+ 	/* Clear event & intr flag */
+ 	orr	r1, r1, \
+ 		#FLOW_CTRL_CSR_INTR_FLAG | FLOW_CTRL_CSR_EVENT_FLAG
+ 	movw	r0, #0x3FFD	@ enable, cluster_switch, immed, bitmaps
+ 				@ & ext flags for CPU power mgnt
+ 	bic	r1, r1, r0
+-	str	r1, [r2]
++	str	r1, [r2, r3]
+ 1:
+ 
+ 	mov32	r9, 0xc09
+diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
+index 359ab40e935a..c90fb944f9d8 100644
+--- a/arch/xtensa/include/asm/syscall.h
++++ b/arch/xtensa/include/asm/syscall.h
+@@ -51,7 +51,7 @@ static inline void syscall_set_return_value(struct task_struct *task,
+ 					    struct pt_regs *regs,
+ 					    int error, long val)
+ {
+-	regs->areg[0] = (long) error ? error : val;
++	regs->areg[2] = (long) error ? error : val;
+ }
+ 
+ #define SYSCALL_MAX_ARGS 6
+diff --git a/arch/xtensa/mm/kasan_init.c b/arch/xtensa/mm/kasan_init.c
+index af7152560bc3..b771459778fe 100644
+--- a/arch/xtensa/mm/kasan_init.c
++++ b/arch/xtensa/mm/kasan_init.c
+@@ -56,7 +56,9 @@ static void __init populate(void *start, void *end)
+ 
+ 		for (k = 0; k < PTRS_PER_PTE; ++k, ++j) {
+ 			phys_addr_t phys =
+-				memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
++				memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE,
++							  0,
++							  MEMBLOCK_ALLOC_ANYWHERE);
+ 
+ 			if (!phys)
+ 				panic("Failed to allocate page table page\n");
+diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c
+index 59153d0aa890..b43f03620843 100644
+--- a/arch/xtensa/mm/tlb.c
++++ b/arch/xtensa/mm/tlb.c
+@@ -216,6 +216,8 @@ static int check_tlb_entry(unsigned w, unsigned e, bool dtlb)
+ 	unsigned tlbidx = w | (e << PAGE_SHIFT);
+ 	unsigned r0 = dtlb ?
+ 		read_dtlb_virtual(tlbidx) : read_itlb_virtual(tlbidx);
++	unsigned r1 = dtlb ?
++		read_dtlb_translation(tlbidx) : read_itlb_translation(tlbidx);
+ 	unsigned vpn = (r0 & PAGE_MASK) | (e << PAGE_SHIFT);
+ 	unsigned pte = get_pte_for_vaddr(vpn);
+ 	unsigned mm_asid = (get_rasid_register() >> 8) & ASID_MASK;
+@@ -231,8 +233,6 @@ static int check_tlb_entry(unsigned w, unsigned e, bool dtlb)
+ 	}
+ 
+ 	if (tlb_asid == mm_asid) {
+-		unsigned r1 = dtlb ? read_dtlb_translation(tlbidx) :
+-			read_itlb_translation(tlbidx);
+ 		if ((pte ^ r1) & PAGE_MASK) {
+ 			pr_err("%cTLB: way: %u, entry: %u, mapping: %08x->%08x, PTE: %08x\n",
+ 					dtlb ? 'D' : 'I', w, e, r0, r1, pte);
+diff --git a/block/bio.c b/block/bio.c
+index b1170ec18464..43df756b68c4 100644
+--- a/block/bio.c
++++ b/block/bio.c
+@@ -751,10 +751,12 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
+ 	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
+ 		return false;
+ 
+-	if (bio->bi_vcnt > 0 && !bio_full(bio, len)) {
++	if (bio->bi_vcnt > 0) {
+ 		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
+ 
+ 		if (page_is_mergeable(bv, page, len, off, same_page)) {
++			if (bio->bi_iter.bi_size > UINT_MAX - len)
++				return false;
+ 			bv->bv_len += len;
+ 			bio->bi_iter.bi_size += len;
+ 			return true;
+diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
+index 25c5c071645b..91185db9a952 100644
+--- a/drivers/dma-buf/sync_file.c
++++ b/drivers/dma-buf/sync_file.c
+@@ -221,7 +221,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
+ 	a_fences = get_fences(a, &a_num_fences);
+ 	b_fences = get_fences(b, &b_num_fences);
+ 	if (a_num_fences > INT_MAX - b_num_fences)
+-		return NULL;
++		goto err;
+ 
+ 	num_fences = a_num_fences + b_num_fences;
+ 
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+index b6e1d98ef01e..aef6c396bd58 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+@@ -77,6 +77,7 @@ struct amdgpu_gmc_fault {
+ struct amdgpu_vmhub {
+ 	uint32_t	ctx0_ptb_addr_lo32;
+ 	uint32_t	ctx0_ptb_addr_hi32;
++	uint32_t	vm_inv_eng0_sem;
+ 	uint32_t	vm_inv_eng0_req;
+ 	uint32_t	vm_inv_eng0_ack;
+ 	uint32_t	vm_context0_cntl;
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+index 53090eae0082..596722e79a26 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+@@ -1785,27 +1785,52 @@ static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
+ }
+ 
+-static void gfx_v10_0_init_csb(struct amdgpu_device *adev)
++static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
+ {
++	int r;
++
++	if (adev->in_gpu_reset) {
++		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
++		if (r)
++			return r;
++
++		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj,
++				   (void **)&adev->gfx.rlc.cs_ptr);
++		if (!r) {
++			adev->gfx.rlc.funcs->get_csb_buffer(adev,
++					adev->gfx.rlc.cs_ptr);
++			amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
++		}
++
++		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
++		if (r)
++			return r;
++	}
++
+ 	/* csib */
+ 	WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI,
+ 		     adev->gfx.rlc.clear_state_gpu_addr >> 32);
+ 	WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO,
+ 		     adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+ 	WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
++
++	return 0;
+ }
+ 
+-static void gfx_v10_0_init_pg(struct amdgpu_device *adev)
++static int gfx_v10_0_init_pg(struct amdgpu_device *adev)
+ {
+ 	int i;
++	int r;
+ 
+-	gfx_v10_0_init_csb(adev);
++	r = gfx_v10_0_init_csb(adev);
++	if (r)
++		return r;
+ 
+ 	for (i = 0; i < adev->num_vmhubs; i++)
+ 		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+ 
+ 	/* TODO: init power gating */
+-	return;
++	return 0;
+ }
+ 
+ void gfx_v10_0_rlc_stop(struct amdgpu_device *adev)
+@@ -1907,7 +1932,10 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
+ 		r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
+ 		if (r)
+ 			return r;
+-		gfx_v10_0_init_pg(adev);
++
++		r = gfx_v10_0_init_pg(adev);
++		if (r)
++			return r;
+ 
+ 		/* enable RLC SRM */
+ 		gfx_v10_0_rlc_enable_srm(adev);
+@@ -1933,7 +1961,10 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
+ 				return r;
+ 		}
+ 
+-		gfx_v10_0_init_pg(adev);
++		r = gfx_v10_0_init_pg(adev);
++		if (r)
++			return r;
++
+ 		adev->gfx.rlc.funcs->start(adev);
+ 
+ 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+@@ -2400,7 +2431,7 @@ static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
+ 	return 0;
+ }
+ 
+-static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
++static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+ {
+ 	int i;
+ 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
+@@ -2413,7 +2444,17 @@ static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+ 			adev->gfx.gfx_ring[i].sched.ready = false;
+ 	}
+ 	WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
+-	udelay(50);
++
++	for (i = 0; i < adev->usec_timeout; i++) {
++		if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0)
++			break;
++		udelay(1);
++	}
++
++	if (i >= adev->usec_timeout)
++		DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
++
++	return 0;
+ }
+ 
+ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+index 6ce37ce77d14..d6fbdc6c0548 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+@@ -365,6 +365,8 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev)
+ 	hub->ctx0_ptb_addr_hi32 =
+ 		SOC15_REG_OFFSET(GC, 0,
+ 				 mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
++	hub->vm_inv_eng0_sem =
++		SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_SEM);
+ 	hub->vm_inv_eng0_req =
+ 		SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ);
+ 	hub->vm_inv_eng0_ack =
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+index db10640a3b2f..fbe06c13a09c 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+@@ -350,6 +350,8 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev)
+ 	hub->ctx0_ptb_addr_hi32 =
+ 		SOC15_REG_OFFSET(GC, 0,
+ 				 mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
++	hub->vm_inv_eng0_sem =
++		SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM);
+ 	hub->vm_inv_eng0_req =
+ 		SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ);
+ 	hub->vm_inv_eng0_ack =
+diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+index 5c7d5f73f54f..a7ba4c6cf7a1 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+@@ -235,6 +235,29 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
+ 	const unsigned eng = 17;
+ 	unsigned int i;
+ 
++	spin_lock(&adev->gmc.invalidate_lock);
++	/*
++	 * It may lose gpuvm invalidate acknowldege state across power-gating
++	 * off cycle, add semaphore acquire before invalidation and semaphore
++	 * release after invalidation to avoid entering power gated state
++	 * to WA the Issue
++	 */
++
++	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
++	if (vmhub == AMDGPU_MMHUB_0 ||
++	    vmhub == AMDGPU_MMHUB_1) {
++		for (i = 0; i < adev->usec_timeout; i++) {
++			/* a read return value of 1 means semaphore acuqire */
++			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
++			if (tmp & 0x1)
++				break;
++			udelay(1);
++		}
++
++		if (i >= adev->usec_timeout)
++			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
++	}
++
+ 	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
+ 
+ 	/*
+@@ -254,6 +277,17 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
+ 		udelay(1);
+ 	}
+ 
++	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
++	if (vmhub == AMDGPU_MMHUB_0 ||
++	    vmhub == AMDGPU_MMHUB_1)
++		/*
++		 * add semaphore release after invalidation,
++		 * write with 0 means semaphore release
++		 */
++		WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
++
++	spin_unlock(&adev->gmc.invalidate_lock);
++
+ 	if (i < adev->usec_timeout)
+ 		return;
+ 
+@@ -338,6 +372,20 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ 	uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0);
+ 	unsigned eng = ring->vm_inv_eng;
+ 
++	/*
++	 * It may lose gpuvm invalidate acknowldege state across power-gating
++	 * off cycle, add semaphore acquire before invalidation and semaphore
++	 * release after invalidation to avoid entering power gated state
++	 * to WA the Issue
++	 */
++
++	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
++	if (ring->funcs->vmhub == AMDGPU_MMHUB_0 ||
++	    ring->funcs->vmhub == AMDGPU_MMHUB_1)
++		/* a read return value of 1 means semaphore acuqire */
++		amdgpu_ring_emit_reg_wait(ring,
++					  hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
++
+ 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
+ 			      lower_32_bits(pd_addr));
+ 
+@@ -348,6 +396,15 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ 					    hub->vm_inv_eng0_ack + eng,
+ 					    req, 1 << vmid);
+ 
++	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
++	if (ring->funcs->vmhub == AMDGPU_MMHUB_0 ||
++	    ring->funcs->vmhub == AMDGPU_MMHUB_1)
++		/*
++		 * add semaphore release after invalidation,
++		 * write with 0 means semaphore release
++		 */
++		amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0);
++
+ 	return pd_addr;
+ }
+ 
+diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+index f91337030dc0..d7caca042173 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+@@ -448,6 +448,24 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
+ 	return req;
+ }
+ 
++/**
++ * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
++ *
++ * @adev: amdgpu_device pointer
++ * @vmhub: vmhub type
++ *
++ */
++static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
++				       uint32_t vmhub)
++{
++	return ((vmhub == AMDGPU_MMHUB_0 ||
++		 vmhub == AMDGPU_MMHUB_1) &&
++		(!amdgpu_sriov_vf(adev)) &&
++		(!(adev->asic_type == CHIP_RAVEN &&
++		   adev->rev_id < 0x8 &&
++		   adev->pdev->device == 0x15d8)));
++}
++
+ /*
+  * GART
+  * VMID 0 is the physical GPU addresses as used by the kernel.
+@@ -467,6 +485,7 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
+ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ 					uint32_t vmhub, uint32_t flush_type)
+ {
++	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
+ 	const unsigned eng = 17;
+ 	u32 j, tmp;
+ 	struct amdgpu_vmhub *hub;
+@@ -491,6 +510,28 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ 	}
+ 
+ 	spin_lock(&adev->gmc.invalidate_lock);
++
++	/*
++	 * It may lose gpuvm invalidate acknowldege state across power-gating
++	 * off cycle, add semaphore acquire before invalidation and semaphore
++	 * release after invalidation to avoid entering power gated state
++	 * to WA the Issue
++	 */
++
++	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
++	if (use_semaphore) {
++		for (j = 0; j < adev->usec_timeout; j++) {
++			/* a read return value of 1 means semaphore acuqire */
++			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
++			if (tmp & 0x1)
++				break;
++			udelay(1);
++		}
++
++		if (j >= adev->usec_timeout)
++			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
++	}
++
+ 	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
+ 
+ 	/*
+@@ -506,7 +547,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ 			break;
+ 		udelay(1);
+ 	}
++
++	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
++	if (use_semaphore)
++		/*
++		 * add semaphore release after invalidation,
++		 * write with 0 means semaphore release
++		 */
++		WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
++
+ 	spin_unlock(&adev->gmc.invalidate_lock);
++
+ 	if (j < adev->usec_timeout)
+ 		return;
+ 
+@@ -516,11 +567,25 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ 					    unsigned vmid, uint64_t pd_addr)
+ {
++	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
+ 	struct amdgpu_device *adev = ring->adev;
+ 	struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
+ 	uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
+ 	unsigned eng = ring->vm_inv_eng;
+ 
++	/*
++	 * It may lose gpuvm invalidate acknowldege state across power-gating
++	 * off cycle, add semaphore acquire before invalidation and semaphore
++	 * release after invalidation to avoid entering power gated state
++	 * to WA the Issue
++	 */
++
++	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
++	if (use_semaphore)
++		/* a read return value of 1 means semaphore acuqire */
++		amdgpu_ring_emit_reg_wait(ring,
++					  hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
++
+ 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
+ 			      lower_32_bits(pd_addr));
+ 
+@@ -531,6 +596,14 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ 					    hub->vm_inv_eng0_ack + eng,
+ 					    req, 1 << vmid);
+ 
++	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
++	if (use_semaphore)
++		/*
++		 * add semaphore release after invalidation,
++		 * write with 0 means semaphore release
++		 */
++		amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0);
++
+ 	return pd_addr;
+ }
+ 
+diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+index 04cd4b6f95d4..641f1258f08d 100644
+--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+@@ -418,6 +418,8 @@ void mmhub_v1_0_init(struct amdgpu_device *adev)
+ 	hub->ctx0_ptb_addr_hi32 =
+ 		SOC15_REG_OFFSET(MMHUB, 0,
+ 				 mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
++	hub->vm_inv_eng0_sem =
++		SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_SEM);
+ 	hub->vm_inv_eng0_req =
+ 		SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_REQ);
+ 	hub->vm_inv_eng0_ack =
+diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+index b39bea6f54e9..096bb883c29d 100644
+--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+@@ -341,6 +341,8 @@ void mmhub_v2_0_init(struct amdgpu_device *adev)
+ 	hub->ctx0_ptb_addr_hi32 =
+ 		SOC15_REG_OFFSET(MMHUB, 0,
+ 				 mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
++	hub->vm_inv_eng0_sem =
++		SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_SEM);
+ 	hub->vm_inv_eng0_req =
+ 		SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ);
+ 	hub->vm_inv_eng0_ack =
+diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+index 9ed178fa241c..fb161c83e409 100644
+--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
++++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+@@ -502,6 +502,10 @@ void mmhub_v9_4_init(struct amdgpu_device *adev)
+ 			SOC15_REG_OFFSET(MMHUB, 0,
+ 			    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) +
+ 			    i * MMHUB_INSTANCE_REGISTER_OFFSET;
++		hub[i]->vm_inv_eng0_sem =
++			SOC15_REG_OFFSET(MMHUB, 0,
++					 mmVML2VC0_VM_INVALIDATE_ENG0_SEM) +
++					 i * MMHUB_INSTANCE_REGISTER_OFFSET;
+ 		hub[i]->vm_inv_eng0_req =
+ 			SOC15_REG_OFFSET(MMHUB, 0,
+ 					 mmVML2VC0_VM_INVALIDATE_ENG0_REQ) +
+diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
+index a3dde0c31f57..a1d4ea69a284 100644
+--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
++++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
+@@ -28,8 +28,8 @@
+ #include "nbio_v7_0.h"
+ #include "nbio_v7_4.h"
+ 
+-#define SOC15_FLUSH_GPU_TLB_NUM_WREG		4
+-#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT	1
++#define SOC15_FLUSH_GPU_TLB_NUM_WREG		6
++#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT	3
+ 
+ extern const struct amd_ip_funcs soc15_common_ip_funcs;
+ 
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
+index f4cfa0caeba8..785322cd4c6c 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
+@@ -342,7 +342,8 @@ bool dm_pp_get_clock_levels_by_type(
+ 	if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_clock_by_type) {
+ 		if (adev->powerplay.pp_funcs->get_clock_by_type(pp_handle,
+ 			dc_to_pp_clock_type(clk_type), &pp_clks)) {
+-		/* Error in pplib. Provide default values. */
++			/* Error in pplib. Provide default values. */
++			get_default_clock_levels(clk_type, dc_clks);
+ 			return true;
+ 		}
+ 	} else if (adev->smu.funcs && adev->smu.funcs->get_clock_by_type) {
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+index 1212da12c414..b3ae1c41fc69 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+@@ -1103,6 +1103,25 @@ void dcn20_pipe_control_lock(
+ 	if (pipe->plane_state != NULL)
+ 		flip_immediate = pipe->plane_state->flip_immediate;
+ 
++	if (flip_immediate && lock) {
++		const int TIMEOUT_FOR_FLIP_PENDING = 100000;
++		int i;
++
++		for (i = 0; i < TIMEOUT_FOR_FLIP_PENDING; ++i) {
++			if (!pipe->plane_res.hubp->funcs->hubp_is_flip_pending(pipe->plane_res.hubp))
++				break;
++			udelay(1);
++		}
++
++		if (pipe->bottom_pipe != NULL) {
++			for (i = 0; i < TIMEOUT_FOR_FLIP_PENDING; ++i) {
++				if (!pipe->bottom_pipe->plane_res.hubp->funcs->hubp_is_flip_pending(pipe->bottom_pipe->plane_res.hubp))
++					break;
++				udelay(1);
++			}
++		}
++	}
++
+ 	/* In flip immediate and pipe splitting case, we need to use GSL
+ 	 * for synchronization. Only do setup on locking and on flip type change.
+ 	 */
+diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
+index 82add736e17d..86c17896b532 100644
+--- a/drivers/gpu/drm/drm_dp_mst_topology.c
++++ b/drivers/gpu/drm/drm_dp_mst_topology.c
+@@ -2465,9 +2465,11 @@ int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr)
+ 			drm_dp_mst_topology_put_port(port);
+ 	}
+ 
+-	for (i = 0; i < mgr->max_payloads; i++) {
+-		if (mgr->payloads[i].payload_state != DP_PAYLOAD_DELETE_LOCAL)
++	for (i = 0; i < mgr->max_payloads; /* do nothing */) {
++		if (mgr->payloads[i].payload_state != DP_PAYLOAD_DELETE_LOCAL) {
++			i++;
+ 			continue;
++		}
+ 
+ 		DRM_DEBUG_KMS("removing payload %d\n", i);
+ 		for (j = i; j < mgr->max_payloads - 1; j++) {
+diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
+index 16ed44bfd734..07a038f21619 100644
+--- a/drivers/gpu/drm/i915/display/intel_fbc.c
++++ b/drivers/gpu/drm/i915/display/intel_fbc.c
+@@ -1284,7 +1284,7 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv)
+ 		return 0;
+ 
+ 	/* https://bugs.freedesktop.org/show_bug.cgi?id=108085 */
+-	if (IS_GEMINILAKE(dev_priv))
++	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
+ 		return 0;
+ 
+ 	if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9)
+diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
+index e753b1e706e2..fc29a3705354 100644
+--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
++++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
+@@ -1597,9 +1597,9 @@ static int cmd_handler_mi_op_2f(struct parser_exec_state *s)
+ 	if (!(cmd_val(s, 0) & (1 << 22)))
+ 		return ret;
+ 
+-	/* check if QWORD */
+-	if (DWORD_FIELD(0, 20, 19) == 1)
+-		valid_len += 8;
++	/* check inline data */
++	if (cmd_val(s, 0) & BIT(18))
++		valid_len = CMD_LEN(9);
+ 	ret = gvt_check_valid_cmd_length(cmd_length(s),
+ 			valid_len);
+ 	if (ret)
+diff --git a/drivers/gpu/drm/meson/meson_venc_cvbs.c b/drivers/gpu/drm/meson/meson_venc_cvbs.c
+index 9ab27aecfcf3..1bd6b6d15ffb 100644
+--- a/drivers/gpu/drm/meson/meson_venc_cvbs.c
++++ b/drivers/gpu/drm/meson/meson_venc_cvbs.c
+@@ -64,6 +64,25 @@ struct meson_cvbs_mode meson_cvbs_modes[MESON_CVBS_MODES_COUNT] = {
+ 	},
+ };
+ 
++static const struct meson_cvbs_mode *
++meson_cvbs_get_mode(const struct drm_display_mode *req_mode)
++{
++	int i;
++
++	for (i = 0; i < MESON_CVBS_MODES_COUNT; ++i) {
++		struct meson_cvbs_mode *meson_mode = &meson_cvbs_modes[i];
++
++		if (drm_mode_match(req_mode, &meson_mode->mode,
++				   DRM_MODE_MATCH_TIMINGS |
++				   DRM_MODE_MATCH_CLOCK |
++				   DRM_MODE_MATCH_FLAGS |
++				   DRM_MODE_MATCH_3D_FLAGS))
++			return meson_mode;
++	}
++
++	return NULL;
++}
++
+ /* Connector */
+ 
+ static void meson_cvbs_connector_destroy(struct drm_connector *connector)
+@@ -136,14 +155,8 @@ static int meson_venc_cvbs_encoder_atomic_check(struct drm_encoder *encoder,
+ 					struct drm_crtc_state *crtc_state,
+ 					struct drm_connector_state *conn_state)
+ {
+-	int i;
+-
+-	for (i = 0; i < MESON_CVBS_MODES_COUNT; ++i) {
+-		struct meson_cvbs_mode *meson_mode = &meson_cvbs_modes[i];
+-
+-		if (drm_mode_equal(&crtc_state->mode, &meson_mode->mode))
+-			return 0;
+-	}
++	if (meson_cvbs_get_mode(&crtc_state->mode))
++		return 0;
+ 
+ 	return -EINVAL;
+ }
+@@ -191,24 +204,17 @@ static void meson_venc_cvbs_encoder_mode_set(struct drm_encoder *encoder,
+ 				   struct drm_display_mode *mode,
+ 				   struct drm_display_mode *adjusted_mode)
+ {
++	const struct meson_cvbs_mode *meson_mode = meson_cvbs_get_mode(mode);
+ 	struct meson_venc_cvbs *meson_venc_cvbs =
+ 					encoder_to_meson_venc_cvbs(encoder);
+ 	struct meson_drm *priv = meson_venc_cvbs->priv;
+-	int i;
+ 
+-	for (i = 0; i < MESON_CVBS_MODES_COUNT; ++i) {
+-		struct meson_cvbs_mode *meson_mode = &meson_cvbs_modes[i];
++	if (meson_mode) {
++		meson_venci_cvbs_mode_set(priv, meson_mode->enci);
+ 
+-		if (drm_mode_equal(mode, &meson_mode->mode)) {
+-			meson_venci_cvbs_mode_set(priv,
+-						  meson_mode->enci);
+-
+-			/* Setup 27MHz vclk2 for ENCI and VDAC */
+-			meson_vclk_setup(priv, MESON_VCLK_TARGET_CVBS,
+-					 MESON_VCLK_CVBS, MESON_VCLK_CVBS,
+-					 MESON_VCLK_CVBS, true);
+-			break;
+-		}
++		/* Setup 27MHz vclk2 for ENCI and VDAC */
++		meson_vclk_setup(priv, MESON_VCLK_TARGET_CVBS, MESON_VCLK_CVBS,
++				 MESON_VCLK_CVBS, MESON_VCLK_CVBS, true);
+ 	}
+ }
+ 
+diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.c b/drivers/gpu/drm/mgag200/mgag200_drv.c
+index afd9119b6cf1..c96c4393b124 100644
+--- a/drivers/gpu/drm/mgag200/mgag200_drv.c
++++ b/drivers/gpu/drm/mgag200/mgag200_drv.c
+@@ -30,7 +30,8 @@ module_param_named(modeset, mgag200_modeset, int, 0400);
+ static struct drm_driver driver;
+ 
+ static const struct pci_device_id pciidlist[] = {
+-	{ PCI_VENDOR_ID_MATROX, 0x522, PCI_ANY_ID, PCI_ANY_ID, 0, 0, G200_SE_A },
++	{ PCI_VENDOR_ID_MATROX, 0x522, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++		G200_SE_A | MGAG200_FLAG_HW_BUG_NO_STARTADD},
+ 	{ PCI_VENDOR_ID_MATROX, 0x524, PCI_ANY_ID, PCI_ANY_ID, 0, 0, G200_SE_B },
+ 	{ PCI_VENDOR_ID_MATROX, 0x530, PCI_ANY_ID, PCI_ANY_ID, 0, 0, G200_EV },
+ 	{ PCI_VENDOR_ID_MATROX, 0x532, PCI_ANY_ID, PCI_ANY_ID, 0, 0, G200_WB },
+@@ -63,6 +64,35 @@ static const struct file_operations mgag200_driver_fops = {
+ 	DRM_VRAM_MM_FILE_OPERATIONS
+ };
+ 
++static bool mgag200_pin_bo_at_0(const struct mga_device *mdev)
++{
++	return mdev->flags & MGAG200_FLAG_HW_BUG_NO_STARTADD;
++}
++
++int mgag200_driver_dumb_create(struct drm_file *file,
++			       struct drm_device *dev,
++			       struct drm_mode_create_dumb *args)
++{
++	struct mga_device *mdev = dev->dev_private;
++	unsigned long pg_align;
++
++	if (WARN_ONCE(!dev->vram_mm, "VRAM MM not initialized"))
++		return -EINVAL;
++
++	pg_align = 0ul;
++
++	/*
++	 * Aligning scanout buffers to the size of the video ram forces
++	 * placement at offset 0. Works around a bug where HW does not
++	 * respect 'startadd' field.
++	 */
++	if (mgag200_pin_bo_at_0(mdev))
++		pg_align = PFN_UP(mdev->mc.vram_size);
++
++	return drm_gem_vram_fill_create_dumb(file, dev, &dev->vram_mm->bdev,
++					     pg_align, false, args);
++}
++
+ static struct drm_driver driver = {
+ 	.driver_features = DRIVER_GEM | DRIVER_MODESET,
+ 	.load = mgag200_driver_load,
+@@ -74,7 +104,9 @@ static struct drm_driver driver = {
+ 	.major = DRIVER_MAJOR,
+ 	.minor = DRIVER_MINOR,
+ 	.patchlevel = DRIVER_PATCHLEVEL,
+-	DRM_GEM_VRAM_DRIVER
++	.dumb_create = mgag200_driver_dumb_create,
++	.dumb_map_offset = drm_gem_vram_driver_dumb_mmap_offset,
++	.gem_prime_mmap = drm_gem_prime_mmap,
+ };
+ 
+ static struct pci_driver mgag200_pci_driver = {
+diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h
+index 1c93f8dc08c7..7cc1a242df5f 100644
+--- a/drivers/gpu/drm/mgag200/mgag200_drv.h
++++ b/drivers/gpu/drm/mgag200/mgag200_drv.h
+@@ -159,6 +159,12 @@ enum mga_type {
+ 	G200_EW3,
+ };
+ 
++/* HW does not handle 'startadd' field correct. */
++#define MGAG200_FLAG_HW_BUG_NO_STARTADD	(1ul << 8)
++
++#define MGAG200_TYPE_MASK	(0x000000ff)
++#define MGAG200_FLAG_MASK	(0x00ffff00)
++
+ #define IS_G200_SE(mdev) (mdev->type == G200_SE_A || mdev->type == G200_SE_B)
+ 
+ struct mga_device {
+@@ -188,6 +194,18 @@ struct mga_device {
+ 	u32 unique_rev_id;
+ };
+ 
++static inline enum mga_type
++mgag200_type_from_driver_data(kernel_ulong_t driver_data)
++{
++	return (enum mga_type)(driver_data & MGAG200_TYPE_MASK);
++}
++
++static inline unsigned long
++mgag200_flags_from_driver_data(kernel_ulong_t driver_data)
++{
++	return driver_data & MGAG200_FLAG_MASK;
++}
++
+ 				/* mgag200_mode.c */
+ int mgag200_modeset_init(struct mga_device *mdev);
+ void mgag200_modeset_fini(struct mga_device *mdev);
+diff --git a/drivers/gpu/drm/mgag200/mgag200_main.c b/drivers/gpu/drm/mgag200/mgag200_main.c
+index a9773334dedf..388212b2d63f 100644
+--- a/drivers/gpu/drm/mgag200/mgag200_main.c
++++ b/drivers/gpu/drm/mgag200/mgag200_main.c
+@@ -94,7 +94,8 @@ static int mgag200_device_init(struct drm_device *dev,
+ 	struct mga_device *mdev = dev->dev_private;
+ 	int ret, option;
+ 
+-	mdev->type = flags;
++	mdev->flags = mgag200_flags_from_driver_data(flags);
++	mdev->type = mgag200_type_from_driver_data(flags);
+ 
+ 	/* Hardcode the number of CRTCs to 1 */
+ 	mdev->num_crtc = 1;
+diff --git a/drivers/gpu/drm/nouveau/dispnv50/atom.h b/drivers/gpu/drm/nouveau/dispnv50/atom.h
+index 43df86c38f58..24f7700768da 100644
+--- a/drivers/gpu/drm/nouveau/dispnv50/atom.h
++++ b/drivers/gpu/drm/nouveau/dispnv50/atom.h
+@@ -114,6 +114,7 @@ struct nv50_head_atom {
+ 		u8 nhsync:1;
+ 		u8 nvsync:1;
+ 		u8 depth:4;
++		u8 bpc;
+ 	} or;
+ 
+ 	/* Currently only used for MST */
+diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
+index b46be8a091e9..b5b1a34f896f 100644
+--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
++++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
+@@ -353,10 +353,20 @@ nv50_outp_atomic_check(struct drm_encoder *encoder,
+ 		       struct drm_crtc_state *crtc_state,
+ 		       struct drm_connector_state *conn_state)
+ {
+-	struct nouveau_connector *nv_connector =
+-		nouveau_connector(conn_state->connector);
+-	return nv50_outp_atomic_check_view(encoder, crtc_state, conn_state,
+-					   nv_connector->native_mode);
++	struct drm_connector *connector = conn_state->connector;
++	struct nouveau_connector *nv_connector = nouveau_connector(connector);
++	struct nv50_head_atom *asyh = nv50_head_atom(crtc_state);
++	int ret;
++
++	ret = nv50_outp_atomic_check_view(encoder, crtc_state, conn_state,
++					  nv_connector->native_mode);
++	if (ret)
++		return ret;
++
++	if (crtc_state->mode_changed || crtc_state->connectors_changed)
++		asyh->or.bpc = connector->display_info.bpc;
++
++	return 0;
+ }
+ 
+ /******************************************************************************
+@@ -770,32 +780,54 @@ nv50_msto_atomic_check(struct drm_encoder *encoder,
+ 	struct nv50_mstm *mstm = mstc->mstm;
+ 	struct nv50_head_atom *asyh = nv50_head_atom(crtc_state);
+ 	int slots;
++	int ret;
++
++	ret = nv50_outp_atomic_check_view(encoder, crtc_state, conn_state,
++					  mstc->native);
++	if (ret)
++		return ret;
++
++	if (!crtc_state->mode_changed && !crtc_state->connectors_changed)
++		return 0;
++
++	/*
++	 * When restoring duplicated states, we need to make sure that the bw
++	 * remains the same and avoid recalculating it, as the connector's bpc
++	 * may have changed after the state was duplicated
++	 */
++	if (!state->duplicated) {
++		const int clock = crtc_state->adjusted_mode.clock;
+ 
+-	if (crtc_state->mode_changed || crtc_state->connectors_changed) {
+ 		/*
+-		 * When restoring duplicated states, we need to make sure that
+-		 * the bw remains the same and avoid recalculating it, as the
+-		 * connector's bpc may have changed after the state was
+-		 * duplicated
++		 * XXX: Since we don't use HDR in userspace quite yet, limit
++		 * the bpc to 8 to save bandwidth on the topology. In the
++		 * future, we'll want to properly fix this by dynamically
++		 * selecting the highest possible bpc that would fit in the
++		 * topology
+ 		 */
+-		if (!state->duplicated) {
+-			const int bpp = connector->display_info.bpc * 3;
+-			const int clock = crtc_state->adjusted_mode.clock;
++		asyh->or.bpc = min(connector->display_info.bpc, 8U);
++		asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, asyh->or.bpc * 3);
++	}
+ 
+-			asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, bpp);
+-		}
++	slots = drm_dp_atomic_find_vcpi_slots(state, &mstm->mgr, mstc->port,
++					      asyh->dp.pbn);
++	if (slots < 0)
++		return slots;
+ 
+-		slots = drm_dp_atomic_find_vcpi_slots(state, &mstm->mgr,
+-						      mstc->port,
+-						      asyh->dp.pbn);
+-		if (slots < 0)
+-			return slots;
++	asyh->dp.tu = slots;
+ 
+-		asyh->dp.tu = slots;
+-	}
++	return 0;
++}
+ 
+-	return nv50_outp_atomic_check_view(encoder, crtc_state, conn_state,
+-					   mstc->native);
++static u8
++nv50_dp_bpc_to_depth(unsigned int bpc)
++{
++	switch (bpc) {
++	case  6: return 0x2;
++	case  8: return 0x5;
++	case 10: /* fall-through */
++	default: return 0x6;
++	}
+ }
+ 
+ static void
+@@ -808,7 +840,7 @@ nv50_msto_enable(struct drm_encoder *encoder)
+ 	struct nv50_mstm *mstm = NULL;
+ 	struct drm_connector *connector;
+ 	struct drm_connector_list_iter conn_iter;
+-	u8 proto, depth;
++	u8 proto;
+ 	bool r;
+ 
+ 	drm_connector_list_iter_begin(encoder->dev, &conn_iter);
+@@ -837,14 +869,8 @@ nv50_msto_enable(struct drm_encoder *encoder)
+ 	else
+ 		proto = 0x9;
+ 
+-	switch (mstc->connector.display_info.bpc) {
+-	case  6: depth = 0x2; break;
+-	case  8: depth = 0x5; break;
+-	case 10:
+-	default: depth = 0x6; break;
+-	}
+-
+-	mstm->outp->update(mstm->outp, head->base.index, armh, proto, depth);
++	mstm->outp->update(mstm->outp, head->base.index, armh, proto,
++			   nv50_dp_bpc_to_depth(armh->or.bpc));
+ 
+ 	msto->head = head;
+ 	msto->mstc = mstc;
+@@ -1498,20 +1524,14 @@ nv50_sor_enable(struct drm_encoder *encoder)
+ 					lvds.lvds.script |= 0x0200;
+ 			}
+ 
+-			if (nv_connector->base.display_info.bpc == 8)
++			if (asyh->or.bpc == 8)
+ 				lvds.lvds.script |= 0x0200;
+ 		}
+ 
+ 		nvif_mthd(&disp->disp->object, 0, &lvds, sizeof(lvds));
+ 		break;
+ 	case DCB_OUTPUT_DP:
+-		if (nv_connector->base.display_info.bpc == 6)
+-			depth = 0x2;
+-		else
+-		if (nv_connector->base.display_info.bpc == 8)
+-			depth = 0x5;
+-		else
+-			depth = 0x6;
++		depth = nv50_dp_bpc_to_depth(asyh->or.bpc);
+ 
+ 		if (nv_encoder->link & 1)
+ 			proto = 0x8;
+@@ -1662,7 +1682,7 @@ nv50_pior_enable(struct drm_encoder *encoder)
+ 	nv50_outp_acquire(nv_encoder);
+ 
+ 	nv_connector = nouveau_encoder_connector_get(nv_encoder);
+-	switch (nv_connector->base.display_info.bpc) {
++	switch (asyh->or.bpc) {
+ 	case 10: asyh->or.depth = 0x6; break;
+ 	case  8: asyh->or.depth = 0x5; break;
+ 	case  6: asyh->or.depth = 0x2; break;
+diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.c b/drivers/gpu/drm/nouveau/dispnv50/head.c
+index 71c23bf1fe25..c9692df2b76c 100644
+--- a/drivers/gpu/drm/nouveau/dispnv50/head.c
++++ b/drivers/gpu/drm/nouveau/dispnv50/head.c
+@@ -81,18 +81,17 @@ nv50_head_atomic_check_dither(struct nv50_head_atom *armh,
+ 			      struct nv50_head_atom *asyh,
+ 			      struct nouveau_conn_atom *asyc)
+ {
+-	struct drm_connector *connector = asyc->state.connector;
+ 	u32 mode = 0x00;
+ 
+ 	if (asyc->dither.mode == DITHERING_MODE_AUTO) {
+-		if (asyh->base.depth > connector->display_info.bpc * 3)
++		if (asyh->base.depth > asyh->or.bpc * 3)
+ 			mode = DITHERING_MODE_DYNAMIC2X2;
+ 	} else {
+ 		mode = asyc->dither.mode;
+ 	}
+ 
+ 	if (asyc->dither.depth == DITHERING_DEPTH_AUTO) {
+-		if (connector->display_info.bpc >= 8)
++		if (asyh->or.bpc >= 8)
+ 			mode |= DITHERING_DEPTH_8BPC;
+ 	} else {
+ 		mode |= asyc->dither.depth;
+diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
+index bdf91b75328e..1c67ac434e10 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
++++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
+@@ -303,14 +303,17 @@ static int panfrost_ioctl_mmap_bo(struct drm_device *dev, void *data,
+ 	}
+ 
+ 	/* Don't allow mmapping of heap objects as pages are not pinned. */
+-	if (to_panfrost_bo(gem_obj)->is_heap)
+-		return -EINVAL;
++	if (to_panfrost_bo(gem_obj)->is_heap) {
++		ret = -EINVAL;
++		goto out;
++	}
+ 
+ 	ret = drm_gem_create_mmap_offset(gem_obj);
+ 	if (ret == 0)
+ 		args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+-	drm_gem_object_put_unlocked(gem_obj);
+ 
++out:
++	drm_gem_object_put_unlocked(gem_obj);
+ 	return ret;
+ }
+ 
+@@ -347,20 +350,19 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data,
+ 		return -ENOENT;
+ 	}
+ 
++	mutex_lock(&pfdev->shrinker_lock);
+ 	args->retained = drm_gem_shmem_madvise(gem_obj, args->madv);
+ 
+ 	if (args->retained) {
+ 		struct panfrost_gem_object *bo = to_panfrost_bo(gem_obj);
+ 
+-		mutex_lock(&pfdev->shrinker_lock);
+-
+ 		if (args->madv == PANFROST_MADV_DONTNEED)
+-			list_add_tail(&bo->base.madv_list, &pfdev->shrinker_list);
++			list_add_tail(&bo->base.madv_list,
++				      &pfdev->shrinker_list);
+ 		else if (args->madv == PANFROST_MADV_WILLNEED)
+ 			list_del_init(&bo->base.madv_list);
+-
+-		mutex_unlock(&pfdev->shrinker_lock);
+ 	}
++	mutex_unlock(&pfdev->shrinker_lock);
+ 
+ 	drm_gem_object_put_unlocked(gem_obj);
+ 	return 0;
+diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
+index bc3ff22e5e85..92a95210a899 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
++++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
+@@ -19,6 +19,16 @@ static void panfrost_gem_free_object(struct drm_gem_object *obj)
+ 	struct panfrost_gem_object *bo = to_panfrost_bo(obj);
+ 	struct panfrost_device *pfdev = obj->dev->dev_private;
+ 
++	/*
++	 * Make sure the BO is no longer inserted in the shrinker list before
++	 * taking care of the destruction itself. If we don't do that we have a
++	 * race condition between this function and what's done in
++	 * panfrost_gem_shrinker_scan().
++	 */
++	mutex_lock(&pfdev->shrinker_lock);
++	list_del_init(&bo->base.madv_list);
++	mutex_unlock(&pfdev->shrinker_lock);
++
+ 	if (bo->sgts) {
+ 		int i;
+ 		int n_sgt = bo->base.base.size / SZ_2M;
+@@ -33,11 +43,6 @@ static void panfrost_gem_free_object(struct drm_gem_object *obj)
+ 		kfree(bo->sgts);
+ 	}
+ 
+-	mutex_lock(&pfdev->shrinker_lock);
+-	if (!list_empty(&bo->base.madv_list))
+-		list_del(&bo->base.madv_list);
+-	mutex_unlock(&pfdev->shrinker_lock);
+-
+ 	drm_gem_shmem_free_object(obj);
+ }
+ 
+diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
+index 7089dfc8c2a9..110fb38004b1 100644
+--- a/drivers/gpu/drm/radeon/r100.c
++++ b/drivers/gpu/drm/radeon/r100.c
+@@ -1826,8 +1826,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
+ 			track->textures[i].use_pitch = 1;
+ 		} else {
+ 			track->textures[i].use_pitch = 0;
+-			track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK);
+-			track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK);
++			track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT);
++			track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT);
+ 		}
+ 		if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE)
+ 			track->textures[i].tex_coord_type = 2;
+diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c
+index 840401413c58..f5f2ffea5ab2 100644
+--- a/drivers/gpu/drm/radeon/r200.c
++++ b/drivers/gpu/drm/radeon/r200.c
+@@ -476,8 +476,8 @@ int r200_packet0_check(struct radeon_cs_parser *p,
+ 			track->textures[i].use_pitch = 1;
+ 		} else {
+ 			track->textures[i].use_pitch = 0;
+-			track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK);
+-			track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK);
++			track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT);
++			track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT);
+ 		}
+ 		if (idx_value & R200_TXFORMAT_LOOKUP_DISABLE)
+ 			track->textures[i].lookup_disable = true;
+diff --git a/drivers/md/dm-clone-metadata.c b/drivers/md/dm-clone-metadata.c
+index 6bc8c1d1c351..54e4fdd607e1 100644
+--- a/drivers/md/dm-clone-metadata.c
++++ b/drivers/md/dm-clone-metadata.c
+@@ -67,23 +67,34 @@ struct superblock_disk {
+  * To save constantly doing look ups on disk we keep an in core copy of the
+  * on-disk bitmap, the region_map.
+  *
+- * To further reduce metadata I/O overhead we use a second bitmap, the dmap
+- * (dirty bitmap), which tracks the dirty words, i.e. longs, of the region_map.
++ * In order to track which regions are hydrated during a metadata transaction,
++ * we use a second set of bitmaps, the dmap (dirty bitmap), which includes two
++ * bitmaps, namely dirty_regions and dirty_words. The dirty_regions bitmap
++ * tracks the regions that got hydrated during the current metadata
++ * transaction. The dirty_words bitmap tracks the dirty words, i.e. longs, of
++ * the dirty_regions bitmap.
++ *
++ * This allows us to precisely track the regions that were hydrated during the
++ * current metadata transaction and update the metadata accordingly, when we
++ * commit the current transaction. This is important because dm-clone should
++ * only commit the metadata of regions that were properly flushed to the
++ * destination device beforehand. Otherwise, in case of a crash, we could end
++ * up with a corrupted dm-clone device.
+  *
+  * When a region finishes hydrating dm-clone calls
+  * dm_clone_set_region_hydrated(), or for discard requests
+  * dm_clone_cond_set_range(), which sets the corresponding bits in region_map
+  * and dmap.
+  *
+- * During a metadata commit we scan the dmap for dirty region_map words (longs)
+- * and update accordingly the on-disk metadata. Thus, we don't have to flush to
+- * disk the whole region_map. We can just flush the dirty region_map words.
++ * During a metadata commit we scan dmap->dirty_words and dmap->dirty_regions
++ * and update the on-disk metadata accordingly. Thus, we don't have to flush to
++ * disk the whole region_map. We can just flush the dirty region_map bits.
+  *
+- * We use a dirty bitmap, which is smaller than the original region_map, to
+- * reduce the amount of memory accesses during a metadata commit. As dm-bitset
+- * accesses the on-disk bitmap in 64-bit word granularity, there is no
+- * significant benefit in tracking the dirty region_map bits with a smaller
+- * granularity.
++ * We use the helper dmap->dirty_words bitmap, which is smaller than the
++ * original region_map, to reduce the amount of memory accesses during a
++ * metadata commit. Moreover, as dm-bitset also accesses the on-disk bitmap in
++ * 64-bit word granularity, the dirty_words bitmap helps us avoid useless disk
++ * accesses.
+  *
+  * We could update directly the on-disk bitmap, when dm-clone calls either
+  * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), buts this
+@@ -92,12 +103,13 @@ struct superblock_disk {
+  * e.g., in a hooked overwrite bio's completion routine, and further reduce the
+  * I/O completion latency.
+  *
+- * We maintain two dirty bitmaps. During a metadata commit we atomically swap
+- * the currently used dmap with the unused one. This allows the metadata update
+- * functions to run concurrently with an ongoing commit.
++ * We maintain two dirty bitmap sets. During a metadata commit we atomically
++ * swap the currently used dmap with the unused one. This allows the metadata
++ * update functions to run concurrently with an ongoing commit.
+  */
+ struct dirty_map {
+ 	unsigned long *dirty_words;
++	unsigned long *dirty_regions;
+ 	unsigned int changed;
+ };
+ 
+@@ -115,6 +127,9 @@ struct dm_clone_metadata {
+ 	struct dirty_map dmap[2];
+ 	struct dirty_map *current_dmap;
+ 
++	/* Protected by lock */
++	struct dirty_map *committing_dmap;
++
+ 	/*
+ 	 * In core copy of the on-disk bitmap to save constantly doing look ups
+ 	 * on disk.
+@@ -461,34 +476,53 @@ static size_t bitmap_size(unsigned long nr_bits)
+ 	return BITS_TO_LONGS(nr_bits) * sizeof(long);
+ }
+ 
+-static int dirty_map_init(struct dm_clone_metadata *cmd)
++static int __dirty_map_init(struct dirty_map *dmap, unsigned long nr_words,
++			    unsigned long nr_regions)
+ {
+-	cmd->dmap[0].changed = 0;
+-	cmd->dmap[0].dirty_words = kvzalloc(bitmap_size(cmd->nr_words), GFP_KERNEL);
++	dmap->changed = 0;
+ 
+-	if (!cmd->dmap[0].dirty_words) {
+-		DMERR("Failed to allocate dirty bitmap");
++	dmap->dirty_words = kvzalloc(bitmap_size(nr_words), GFP_KERNEL);
++	if (!dmap->dirty_words)
++		return -ENOMEM;
++
++	dmap->dirty_regions = kvzalloc(bitmap_size(nr_regions), GFP_KERNEL);
++	if (!dmap->dirty_regions) {
++		kvfree(dmap->dirty_words);
+ 		return -ENOMEM;
+ 	}
+ 
+-	cmd->dmap[1].changed = 0;
+-	cmd->dmap[1].dirty_words = kvzalloc(bitmap_size(cmd->nr_words), GFP_KERNEL);
++	return 0;
++}
++
++static void __dirty_map_exit(struct dirty_map *dmap)
++{
++	kvfree(dmap->dirty_words);
++	kvfree(dmap->dirty_regions);
++}
++
++static int dirty_map_init(struct dm_clone_metadata *cmd)
++{
++	if (__dirty_map_init(&cmd->dmap[0], cmd->nr_words, cmd->nr_regions)) {
++		DMERR("Failed to allocate dirty bitmap");
++		return -ENOMEM;
++	}
+ 
+-	if (!cmd->dmap[1].dirty_words) {
++	if (__dirty_map_init(&cmd->dmap[1], cmd->nr_words, cmd->nr_regions)) {
+ 		DMERR("Failed to allocate dirty bitmap");
+-		kvfree(cmd->dmap[0].dirty_words);
++		__dirty_map_exit(&cmd->dmap[0]);
+ 		return -ENOMEM;
+ 	}
+ 
+ 	cmd->current_dmap = &cmd->dmap[0];
++	cmd->committing_dmap = NULL;
+ 
+ 	return 0;
+ }
+ 
+ static void dirty_map_exit(struct dm_clone_metadata *cmd)
+ {
+-	kvfree(cmd->dmap[0].dirty_words);
+-	kvfree(cmd->dmap[1].dirty_words);
++	__dirty_map_exit(&cmd->dmap[0]);
++	__dirty_map_exit(&cmd->dmap[1]);
+ }
+ 
+ static int __load_bitset_in_core(struct dm_clone_metadata *cmd)
+@@ -633,21 +667,23 @@ unsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd
+ 	return find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
+ }
+ 
+-static int __update_metadata_word(struct dm_clone_metadata *cmd, unsigned long word)
++static int __update_metadata_word(struct dm_clone_metadata *cmd,
++				  unsigned long *dirty_regions,
++				  unsigned long word)
+ {
+ 	int r;
+ 	unsigned long index = word * BITS_PER_LONG;
+ 	unsigned long max_index = min(cmd->nr_regions, (word + 1) * BITS_PER_LONG);
+ 
+ 	while (index < max_index) {
+-		if (test_bit(index, cmd->region_map)) {
++		if (test_bit(index, dirty_regions)) {
+ 			r = dm_bitset_set_bit(&cmd->bitset_info, cmd->bitset_root,
+ 					      index, &cmd->bitset_root);
+-
+ 			if (r) {
+ 				DMERR("dm_bitset_set_bit failed");
+ 				return r;
+ 			}
++			__clear_bit(index, dirty_regions);
+ 		}
+ 		index++;
+ 	}
+@@ -721,7 +757,7 @@ static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
+ 		if (word == cmd->nr_words)
+ 			break;
+ 
+-		r = __update_metadata_word(cmd, word);
++		r = __update_metadata_word(cmd, dmap->dirty_regions, word);
+ 
+ 		if (r)
+ 			return r;
+@@ -743,16 +779,18 @@ static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
+ 	return 0;
+ }
+ 
+-int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
++int dm_clone_metadata_pre_commit(struct dm_clone_metadata *cmd)
+ {
+-	int r = -EPERM;
++	int r = 0;
+ 	unsigned long flags;
+ 	struct dirty_map *dmap, *next_dmap;
+ 
+ 	down_write(&cmd->lock);
+ 
+-	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
++	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) {
++		r = -EPERM;
+ 		goto out;
++	}
+ 
+ 	/* Get current dirty bitmap */
+ 	dmap = cmd->current_dmap;
+@@ -764,7 +802,7 @@ int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
+ 	 * The last commit failed, so we don't have a clean dirty-bitmap to
+ 	 * use.
+ 	 */
+-	if (WARN_ON(next_dmap->changed)) {
++	if (WARN_ON(next_dmap->changed || cmd->committing_dmap)) {
+ 		r = -EINVAL;
+ 		goto out;
+ 	}
+@@ -774,11 +812,33 @@ int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
+ 	cmd->current_dmap = next_dmap;
+ 	spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
+ 
+-	/*
+-	 * No one is accessing the old dirty bitmap anymore, so we can flush
+-	 * it.
+-	 */
+-	r = __flush_dmap(cmd, dmap);
++	/* Set old dirty bitmap as currently committing */
++	cmd->committing_dmap = dmap;
++out:
++	up_write(&cmd->lock);
++
++	return r;
++}
++
++int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
++{
++	int r = -EPERM;
++
++	down_write(&cmd->lock);
++
++	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
++		goto out;
++
++	if (WARN_ON(!cmd->committing_dmap)) {
++		r = -EINVAL;
++		goto out;
++	}
++
++	r = __flush_dmap(cmd, cmd->committing_dmap);
++	if (!r) {
++		/* Clear committing dmap */
++		cmd->committing_dmap = NULL;
++	}
+ out:
+ 	up_write(&cmd->lock);
+ 
+@@ -803,6 +863,7 @@ int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long re
+ 	dmap = cmd->current_dmap;
+ 
+ 	__set_bit(word, dmap->dirty_words);
++	__set_bit(region_nr, dmap->dirty_regions);
+ 	__set_bit(region_nr, cmd->region_map);
+ 	dmap->changed = 1;
+ 
+@@ -831,6 +892,7 @@ int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
+ 		if (!test_bit(region_nr, cmd->region_map)) {
+ 			word = region_nr / BITS_PER_LONG;
+ 			__set_bit(word, dmap->dirty_words);
++			__set_bit(region_nr, dmap->dirty_regions);
+ 			__set_bit(region_nr, cmd->region_map);
+ 			dmap->changed = 1;
+ 		}
+diff --git a/drivers/md/dm-clone-metadata.h b/drivers/md/dm-clone-metadata.h
+index 434bff08508b..c7848c49aef8 100644
+--- a/drivers/md/dm-clone-metadata.h
++++ b/drivers/md/dm-clone-metadata.h
+@@ -73,7 +73,23 @@ void dm_clone_metadata_close(struct dm_clone_metadata *cmd);
+ 
+ /*
+  * Commit dm-clone metadata to disk.
++ *
++ * We use a two phase commit:
++ *
++ * 1. dm_clone_metadata_pre_commit(): Prepare the current transaction for
++ *    committing. After this is called, all subsequent metadata updates, done
++ *    through either dm_clone_set_region_hydrated() or
++ *    dm_clone_cond_set_range(), will be part of the **next** transaction.
++ *
++ * 2. dm_clone_metadata_commit(): Actually commit the current transaction to
++ *    disk and start a new transaction.
++ *
++ * This allows dm-clone to flush the destination device after step (1) to
++ * ensure that all freshly hydrated regions, for which we are updating the
++ * metadata, are properly written to non-volatile storage and won't be lost in
++ * case of a crash.
+  */
++int dm_clone_metadata_pre_commit(struct dm_clone_metadata *cmd);
+ int dm_clone_metadata_commit(struct dm_clone_metadata *cmd);
+ 
+ /*
+@@ -110,6 +126,7 @@ int dm_clone_metadata_abort(struct dm_clone_metadata *cmd);
+  * Switches metadata to a read only mode. Once read-only mode has been entered
+  * the following functions will return -EPERM:
+  *
++ *   dm_clone_metadata_pre_commit()
+  *   dm_clone_metadata_commit()
+  *   dm_clone_set_region_hydrated()
+  *   dm_clone_cond_set_range()
+diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
+index 4ca8f1977222..e6e5d24a79f5 100644
+--- a/drivers/md/dm-clone-target.c
++++ b/drivers/md/dm-clone-target.c
+@@ -86,6 +86,12 @@ struct clone {
+ 
+ 	struct dm_clone_metadata *cmd;
+ 
++	/*
++	 * bio used to flush the destination device, before committing the
++	 * metadata.
++	 */
++	struct bio flush_bio;
++
+ 	/* Region hydration hash table */
+ 	struct hash_table_bucket *ht;
+ 
+@@ -1106,10 +1112,13 @@ static bool need_commit_due_to_time(struct clone *clone)
+ /*
+  * A non-zero return indicates read-only or fail mode.
+  */
+-static int commit_metadata(struct clone *clone)
++static int commit_metadata(struct clone *clone, bool *dest_dev_flushed)
+ {
+ 	int r = 0;
+ 
++	if (dest_dev_flushed)
++		*dest_dev_flushed = false;
++
+ 	mutex_lock(&clone->commit_lock);
+ 
+ 	if (!dm_clone_changed_this_transaction(clone->cmd))
+@@ -1120,8 +1129,26 @@ static int commit_metadata(struct clone *clone)
+ 		goto out;
+ 	}
+ 
+-	r = dm_clone_metadata_commit(clone->cmd);
++	r = dm_clone_metadata_pre_commit(clone->cmd);
++	if (unlikely(r)) {
++		__metadata_operation_failed(clone, "dm_clone_metadata_pre_commit", r);
++		goto out;
++	}
+ 
++	bio_reset(&clone->flush_bio);
++	bio_set_dev(&clone->flush_bio, clone->dest_dev->bdev);
++	clone->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
++
++	r = submit_bio_wait(&clone->flush_bio);
++	if (unlikely(r)) {
++		__metadata_operation_failed(clone, "flush destination device", r);
++		goto out;
++	}
++
++	if (dest_dev_flushed)
++		*dest_dev_flushed = true;
++
++	r = dm_clone_metadata_commit(clone->cmd);
+ 	if (unlikely(r)) {
+ 		__metadata_operation_failed(clone, "dm_clone_metadata_commit", r);
+ 		goto out;
+@@ -1194,6 +1221,7 @@ static void process_deferred_flush_bios(struct clone *clone)
+ {
+ 	struct bio *bio;
+ 	unsigned long flags;
++	bool dest_dev_flushed;
+ 	struct bio_list bios = BIO_EMPTY_LIST;
+ 	struct bio_list bio_completions = BIO_EMPTY_LIST;
+ 
+@@ -1213,7 +1241,7 @@ static void process_deferred_flush_bios(struct clone *clone)
+ 	    !(dm_clone_changed_this_transaction(clone->cmd) && need_commit_due_to_time(clone)))
+ 		return;
+ 
+-	if (commit_metadata(clone)) {
++	if (commit_metadata(clone, &dest_dev_flushed)) {
+ 		bio_list_merge(&bios, &bio_completions);
+ 
+ 		while ((bio = bio_list_pop(&bios)))
+@@ -1227,8 +1255,17 @@ static void process_deferred_flush_bios(struct clone *clone)
+ 	while ((bio = bio_list_pop(&bio_completions)))
+ 		bio_endio(bio);
+ 
+-	while ((bio = bio_list_pop(&bios)))
+-		generic_make_request(bio);
++	while ((bio = bio_list_pop(&bios))) {
++		if ((bio->bi_opf & REQ_PREFLUSH) && dest_dev_flushed) {
++			/* We just flushed the destination device as part of
++			 * the metadata commit, so there is no reason to send
++			 * another flush.
++			 */
++			bio_endio(bio);
++		} else {
++			generic_make_request(bio);
++		}
++	}
+ }
+ 
+ static void do_worker(struct work_struct *work)
+@@ -1400,7 +1437,7 @@ static void clone_status(struct dm_target *ti, status_type_t type,
+ 
+ 		/* Commit to ensure statistics aren't out-of-date */
+ 		if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
+-			(void) commit_metadata(clone);
++			(void) commit_metadata(clone, NULL);
+ 
+ 		r = dm_clone_get_free_metadata_block_count(clone->cmd, &nr_free_metadata_blocks);
+ 
+@@ -1834,6 +1871,7 @@ static int clone_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+ 	bio_list_init(&clone->deferred_flush_completions);
+ 	clone->hydration_offset = 0;
+ 	atomic_set(&clone->hydrations_in_flight, 0);
++	bio_init(&clone->flush_bio, NULL, 0);
+ 
+ 	clone->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0);
+ 	if (!clone->wq) {
+@@ -1907,6 +1945,7 @@ static void clone_dtr(struct dm_target *ti)
+ 	struct clone *clone = ti->private;
+ 
+ 	mutex_destroy(&clone->commit_lock);
++	bio_uninit(&clone->flush_bio);
+ 
+ 	for (i = 0; i < clone->nr_ctr_args; i++)
+ 		kfree(clone->ctr_args[i]);
+@@ -1961,7 +2000,7 @@ static void clone_postsuspend(struct dm_target *ti)
+ 	wait_event(clone->hydration_stopped, !atomic_read(&clone->hydrations_in_flight));
+ 	flush_workqueue(clone->wq);
+ 
+-	(void) commit_metadata(clone);
++	(void) commit_metadata(clone, NULL);
+ }
+ 
+ static void clone_resume(struct dm_target *ti)
+diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
+index dbcc1e41cd57..e0c32793c248 100644
+--- a/drivers/md/dm-mpath.c
++++ b/drivers/md/dm-mpath.c
+@@ -599,45 +599,10 @@ static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
+ 	return pgpath;
+ }
+ 
+-static struct pgpath *__map_bio_fast(struct multipath *m, struct bio *bio)
+-{
+-	struct pgpath *pgpath;
+-	unsigned long flags;
+-
+-	/* Do we need to select a new pgpath? */
+-	/*
+-	 * FIXME: currently only switching path if no path (due to failure, etc)
+-	 * - which negates the point of using a path selector
+-	 */
+-	pgpath = READ_ONCE(m->current_pgpath);
+-	if (!pgpath)
+-		pgpath = choose_pgpath(m, bio->bi_iter.bi_size);
+-
+-	if (!pgpath) {
+-		if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
+-			/* Queue for the daemon to resubmit */
+-			spin_lock_irqsave(&m->lock, flags);
+-			bio_list_add(&m->queued_bios, bio);
+-			spin_unlock_irqrestore(&m->lock, flags);
+-			queue_work(kmultipathd, &m->process_queued_bios);
+-
+-			return ERR_PTR(-EAGAIN);
+-		}
+-		return NULL;
+-	}
+-
+-	return pgpath;
+-}
+-
+ static int __multipath_map_bio(struct multipath *m, struct bio *bio,
+ 			       struct dm_mpath_io *mpio)
+ {
+-	struct pgpath *pgpath;
+-
+-	if (!m->hw_handler_name)
+-		pgpath = __map_bio_fast(m, bio);
+-	else
+-		pgpath = __map_bio(m, bio);
++	struct pgpath *pgpath = __map_bio(m, bio);
+ 
+ 	if (IS_ERR(pgpath))
+ 		return DM_MAPIO_SUBMITTED;
+diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
+index 4c68a7b93d5e..b88d6d701f5b 100644
+--- a/drivers/md/dm-thin-metadata.c
++++ b/drivers/md/dm-thin-metadata.c
+@@ -188,6 +188,15 @@ struct dm_pool_metadata {
+ 	unsigned long flags;
+ 	sector_t data_block_size;
+ 
++	/*
++	 * Pre-commit callback.
++	 *
++	 * This allows the thin provisioning target to run a callback before
++	 * the metadata are committed.
++	 */
++	dm_pool_pre_commit_fn pre_commit_fn;
++	void *pre_commit_context;
++
+ 	/*
+ 	 * We reserve a section of the metadata for commit overhead.
+ 	 * All reported space does *not* include this.
+@@ -826,6 +835,14 @@ static int __commit_transaction(struct dm_pool_metadata *pmd)
+ 	if (unlikely(!pmd->in_service))
+ 		return 0;
+ 
++	if (pmd->pre_commit_fn) {
++		r = pmd->pre_commit_fn(pmd->pre_commit_context);
++		if (r < 0) {
++			DMERR("pre-commit callback failed");
++			return r;
++		}
++	}
++
+ 	r = __write_changed_details(pmd);
+ 	if (r < 0)
+ 		return r;
+@@ -892,6 +909,8 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
+ 	pmd->in_service = false;
+ 	pmd->bdev = bdev;
+ 	pmd->data_block_size = data_block_size;
++	pmd->pre_commit_fn = NULL;
++	pmd->pre_commit_context = NULL;
+ 
+ 	r = __create_persistent_data_objects(pmd, format_device);
+ 	if (r) {
+@@ -2044,6 +2063,16 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
+ 	return r;
+ }
+ 
++void dm_pool_register_pre_commit_callback(struct dm_pool_metadata *pmd,
++					  dm_pool_pre_commit_fn fn,
++					  void *context)
++{
++	pmd_write_lock_in_core(pmd);
++	pmd->pre_commit_fn = fn;
++	pmd->pre_commit_context = context;
++	pmd_write_unlock(pmd);
++}
++
+ int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd)
+ {
+ 	int r = -EINVAL;
+diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h
+index f6be0d733c20..7ef56bd2a7e3 100644
+--- a/drivers/md/dm-thin-metadata.h
++++ b/drivers/md/dm-thin-metadata.h
+@@ -230,6 +230,13 @@ bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd);
+  */
+ void dm_pool_issue_prefetches(struct dm_pool_metadata *pmd);
+ 
++/* Pre-commit callback */
++typedef int (*dm_pool_pre_commit_fn)(void *context);
++
++void dm_pool_register_pre_commit_callback(struct dm_pool_metadata *pmd,
++					  dm_pool_pre_commit_fn fn,
++					  void *context);
++
+ /*----------------------------------------------------------------*/
+ 
+ #endif
+diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
+index fcd887703f95..1696bfd23ad1 100644
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -328,6 +328,7 @@ struct pool_c {
+ 	dm_block_t low_water_blocks;
+ 	struct pool_features requested_pf; /* Features requested during table load */
+ 	struct pool_features adjusted_pf;  /* Features used after adjusting for constituent devices */
++	struct bio flush_bio;
+ };
+ 
+ /*
+@@ -2392,8 +2393,16 @@ static void process_deferred_bios(struct pool *pool)
+ 	while ((bio = bio_list_pop(&bio_completions)))
+ 		bio_endio(bio);
+ 
+-	while ((bio = bio_list_pop(&bios)))
+-		generic_make_request(bio);
++	while ((bio = bio_list_pop(&bios))) {
++		/*
++		 * The data device was flushed as part of metadata commit,
++		 * so complete redundant flushes immediately.
++		 */
++		if (bio->bi_opf & REQ_PREFLUSH)
++			bio_endio(bio);
++		else
++			generic_make_request(bio);
++	}
+ }
+ 
+ static void do_worker(struct work_struct *ws)
+@@ -3127,6 +3136,7 @@ static void pool_dtr(struct dm_target *ti)
+ 	__pool_dec(pt->pool);
+ 	dm_put_device(ti, pt->metadata_dev);
+ 	dm_put_device(ti, pt->data_dev);
++	bio_uninit(&pt->flush_bio);
+ 	kfree(pt);
+ 
+ 	mutex_unlock(&dm_thin_pool_table.mutex);
+@@ -3192,6 +3202,29 @@ static void metadata_low_callback(void *context)
+ 	dm_table_event(pool->ti->table);
+ }
+ 
++/*
++ * We need to flush the data device **before** committing the metadata.
++ *
++ * This ensures that the data blocks of any newly inserted mappings are
++ * properly written to non-volatile storage and won't be lost in case of a
++ * crash.
++ *
++ * Failure to do so can result in data corruption in the case of internal or
++ * external snapshots and in the case of newly provisioned blocks, when block
++ * zeroing is enabled.
++ */
++static int metadata_pre_commit_callback(void *context)
++{
++	struct pool_c *pt = context;
++	struct bio *flush_bio = &pt->flush_bio;
++
++	bio_reset(flush_bio);
++	bio_set_dev(flush_bio, pt->data_dev->bdev);
++	flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
++
++	return submit_bio_wait(flush_bio);
++}
++
+ static sector_t get_dev_size(struct block_device *bdev)
+ {
+ 	return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
+@@ -3360,6 +3393,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
+ 	pt->data_dev = data_dev;
+ 	pt->low_water_blocks = low_water_blocks;
+ 	pt->adjusted_pf = pt->requested_pf = pf;
++	bio_init(&pt->flush_bio, NULL, 0);
+ 	ti->num_flush_bios = 1;
+ 
+ 	/*
+@@ -3386,6 +3420,10 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
+ 	if (r)
+ 		goto out_flags_changed;
+ 
++	dm_pool_register_pre_commit_callback(pt->pool->pmd,
++					     metadata_pre_commit_callback,
++					     pt);
++
+ 	pt->callbacks.congested_fn = pool_is_congested;
+ 	dm_table_add_target_callbacks(ti->table, &pt->callbacks);
+ 
+diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c
+index 21ea537bd55e..eff04fa23dfa 100644
+--- a/drivers/md/persistent-data/dm-btree-remove.c
++++ b/drivers/md/persistent-data/dm-btree-remove.c
+@@ -203,7 +203,13 @@ static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent,
+ 	struct btree_node *right = r->n;
+ 	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
+ 	uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
+-	unsigned threshold = 2 * merge_threshold(left) + 1;
++	/*
++	 * Ensure the number of entries in each child will be greater
++	 * than or equal to (max_entries / 3 + 1), so no matter which
++	 * child is used for removal, the number will still be not
++	 * less than (max_entries / 3).
++	 */
++	unsigned int threshold = 2 * (merge_threshold(left) + 1);
+ 
+ 	if (nr_left + nr_right < threshold) {
+ 		/*
+diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
+index 2c71a434c915..95b41c0891d0 100644
+--- a/drivers/mmc/core/block.c
++++ b/drivers/mmc/core/block.c
+@@ -408,38 +408,6 @@ static int mmc_blk_ioctl_copy_to_user(struct mmc_ioc_cmd __user *ic_ptr,
+ 	return 0;
+ }
+ 
+-static int ioctl_rpmb_card_status_poll(struct mmc_card *card, u32 *status,
+-				       u32 retries_max)
+-{
+-	int err;
+-	u32 retry_count = 0;
+-
+-	if (!status || !retries_max)
+-		return -EINVAL;
+-
+-	do {
+-		err = __mmc_send_status(card, status, 5);
+-		if (err)
+-			break;
+-
+-		if (!R1_STATUS(*status) &&
+-				(R1_CURRENT_STATE(*status) != R1_STATE_PRG))
+-			break; /* RPMB programming operation complete */
+-
+-		/*
+-		 * Rechedule to give the MMC device a chance to continue
+-		 * processing the previous command without being polled too
+-		 * frequently.
+-		 */
+-		usleep_range(1000, 5000);
+-	} while (++retry_count < retries_max);
+-
+-	if (retry_count == retries_max)
+-		err = -EPERM;
+-
+-	return err;
+-}
+-
+ static int ioctl_do_sanitize(struct mmc_card *card)
+ {
+ 	int err;
+@@ -468,6 +436,58 @@ out:
+ 	return err;
+ }
+ 
++static inline bool mmc_blk_in_tran_state(u32 status)
++{
++	/*
++	 * Some cards mishandle the status bits, so make sure to check both the
++	 * busy indication and the card state.
++	 */
++	return status & R1_READY_FOR_DATA &&
++	       (R1_CURRENT_STATE(status) == R1_STATE_TRAN);
++}
++
++static int card_busy_detect(struct mmc_card *card, unsigned int timeout_ms,
++			    u32 *resp_errs)
++{
++	unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
++	int err = 0;
++	u32 status;
++
++	do {
++		bool done = time_after(jiffies, timeout);
++
++		err = __mmc_send_status(card, &status, 5);
++		if (err) {
++			dev_err(mmc_dev(card->host),
++				"error %d requesting status\n", err);
++			return err;
++		}
++
++		/* Accumulate any response error bits seen */
++		if (resp_errs)
++			*resp_errs |= status;
++
++		/*
++		 * Timeout if the device never becomes ready for data and never
++		 * leaves the program state.
++		 */
++		if (done) {
++			dev_err(mmc_dev(card->host),
++				"Card stuck in wrong state! %s status: %#x\n",
++				 __func__, status);
++			return -ETIMEDOUT;
++		}
++
++		/*
++		 * Some cards mishandle the status bits,
++		 * so make sure to check both the busy
++		 * indication and the card state.
++		 */
++	} while (!mmc_blk_in_tran_state(status));
++
++	return err;
++}
++
+ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md,
+ 			       struct mmc_blk_ioc_data *idata)
+ {
+@@ -477,7 +497,6 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md,
+ 	struct scatterlist sg;
+ 	int err;
+ 	unsigned int target_part;
+-	u32 status = 0;
+ 
+ 	if (!card || !md || !idata)
+ 		return -EINVAL;
+@@ -611,16 +630,12 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md,
+ 
+ 	memcpy(&(idata->ic.response), cmd.resp, sizeof(cmd.resp));
+ 
+-	if (idata->rpmb) {
++	if (idata->rpmb || (cmd.flags & MMC_RSP_R1B)) {
+ 		/*
+-		 * Ensure RPMB command has completed by polling CMD13
++		 * Ensure RPMB/R1B command has completed by polling CMD13
+ 		 * "Send Status".
+ 		 */
+-		err = ioctl_rpmb_card_status_poll(card, &status, 5);
+-		if (err)
+-			dev_err(mmc_dev(card->host),
+-					"%s: Card Status=0x%08X, error %d\n",
+-					__func__, status, err);
++		err = card_busy_detect(card, MMC_BLK_TIMEOUT_MS, NULL);
+ 	}
+ 
+ 	return err;
+@@ -970,58 +985,6 @@ static unsigned int mmc_blk_data_timeout_ms(struct mmc_host *host,
+ 	return ms;
+ }
+ 
+-static inline bool mmc_blk_in_tran_state(u32 status)
+-{
+-	/*
+-	 * Some cards mishandle the status bits, so make sure to check both the
+-	 * busy indication and the card state.
+-	 */
+-	return status & R1_READY_FOR_DATA &&
+-	       (R1_CURRENT_STATE(status) == R1_STATE_TRAN);
+-}
+-
+-static int card_busy_detect(struct mmc_card *card, unsigned int timeout_ms,
+-			    struct request *req, u32 *resp_errs)
+-{
+-	unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
+-	int err = 0;
+-	u32 status;
+-
+-	do {
+-		bool done = time_after(jiffies, timeout);
+-
+-		err = __mmc_send_status(card, &status, 5);
+-		if (err) {
+-			pr_err("%s: error %d requesting status\n",
+-			       req->rq_disk->disk_name, err);
+-			return err;
+-		}
+-
+-		/* Accumulate any response error bits seen */
+-		if (resp_errs)
+-			*resp_errs |= status;
+-
+-		/*
+-		 * Timeout if the device never becomes ready for data and never
+-		 * leaves the program state.
+-		 */
+-		if (done) {
+-			pr_err("%s: Card stuck in wrong state! %s %s status: %#x\n",
+-				mmc_hostname(card->host),
+-				req->rq_disk->disk_name, __func__, status);
+-			return -ETIMEDOUT;
+-		}
+-
+-		/*
+-		 * Some cards mishandle the status bits,
+-		 * so make sure to check both the busy
+-		 * indication and the card state.
+-		 */
+-	} while (!mmc_blk_in_tran_state(status));
+-
+-	return err;
+-}
+-
+ static int mmc_blk_reset(struct mmc_blk_data *md, struct mmc_host *host,
+ 			 int type)
+ {
+@@ -1671,7 +1634,7 @@ static int mmc_blk_fix_state(struct mmc_card *card, struct request *req)
+ 
+ 	mmc_blk_send_stop(card, timeout);
+ 
+-	err = card_busy_detect(card, timeout, req, NULL);
++	err = card_busy_detect(card, timeout, NULL);
+ 
+ 	mmc_retune_release(card->host);
+ 
+@@ -1895,7 +1858,7 @@ static int mmc_blk_card_busy(struct mmc_card *card, struct request *req)
+ 	if (mmc_host_is_spi(card->host) || rq_data_dir(req) == READ)
+ 		return 0;
+ 
+-	err = card_busy_detect(card, MMC_BLK_TIMEOUT_MS, req, &status);
++	err = card_busy_detect(card, MMC_BLK_TIMEOUT_MS, &status);
+ 
+ 	/*
+ 	 * Do not assume data transferred correctly if there are any error bits
+diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
+index 221127324709..abf8f5eb0a1c 100644
+--- a/drivers/mmc/core/core.c
++++ b/drivers/mmc/core/core.c
+@@ -1469,8 +1469,7 @@ void mmc_detach_bus(struct mmc_host *host)
+ 	mmc_bus_put(host);
+ }
+ 
+-static void _mmc_detect_change(struct mmc_host *host, unsigned long delay,
+-				bool cd_irq)
++void _mmc_detect_change(struct mmc_host *host, unsigned long delay, bool cd_irq)
+ {
+ 	/*
+ 	 * If the device is configured as wakeup, we prevent a new sleep for
+@@ -2129,7 +2128,7 @@ int mmc_hw_reset(struct mmc_host *host)
+ 	ret = host->bus_ops->hw_reset(host);
+ 	mmc_bus_put(host);
+ 
+-	if (ret)
++	if (ret < 0)
+ 		pr_warn("%s: tried to HW reset card, got error %d\n",
+ 			mmc_hostname(host), ret);
+ 
+@@ -2297,11 +2296,8 @@ void mmc_rescan(struct work_struct *work)
+ 
+ 	mmc_bus_get(host);
+ 
+-	/*
+-	 * if there is a _removable_ card registered, check whether it is
+-	 * still present
+-	 */
+-	if (host->bus_ops && !host->bus_dead && mmc_card_is_removable(host))
++	/* Verify a registered card to be functional, else remove it. */
++	if (host->bus_ops && !host->bus_dead)
+ 		host->bus_ops->detect(host);
+ 
+ 	host->detect_change = 0;
+diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
+index 328c78dbee66..575ac0257af2 100644
+--- a/drivers/mmc/core/core.h
++++ b/drivers/mmc/core/core.h
+@@ -70,6 +70,8 @@ void mmc_rescan(struct work_struct *work);
+ void mmc_start_host(struct mmc_host *host);
+ void mmc_stop_host(struct mmc_host *host);
+ 
++void _mmc_detect_change(struct mmc_host *host, unsigned long delay,
++			bool cd_irq);
+ int _mmc_detect_card_removed(struct mmc_host *host);
+ int mmc_detect_card_removed(struct mmc_host *host);
+ 
+diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
+index 26cabd53ddc5..ebb387aa5158 100644
+--- a/drivers/mmc/core/sdio.c
++++ b/drivers/mmc/core/sdio.c
+@@ -1048,9 +1048,35 @@ static int mmc_sdio_runtime_resume(struct mmc_host *host)
+ 	return ret;
+ }
+ 
++/*
++ * SDIO HW reset
++ *
++ * Returns 0 if the HW reset was executed synchronously, returns 1 if the HW
++ * reset was asynchronously scheduled, else a negative error code.
++ */
+ static int mmc_sdio_hw_reset(struct mmc_host *host)
+ {
+-	mmc_power_cycle(host, host->card->ocr);
++	struct mmc_card *card = host->card;
++
++	/*
++	 * In case the card is shared among multiple func drivers, reset the
++	 * card through a rescan work. In this way it will be removed and
++	 * re-detected, thus all func drivers becomes informed about it.
++	 */
++	if (atomic_read(&card->sdio_funcs_probed) > 1) {
++		if (mmc_card_removed(card))
++			return 1;
++		host->rescan_entered = 0;
++		mmc_card_set_removed(card);
++		_mmc_detect_change(host, 0, false);
++		return 1;
++	}
++
++	/*
++	 * A single func driver has been probed, then let's skip the heavy
++	 * hotplug dance above and execute the reset immediately.
++	 */
++	mmc_power_cycle(host, card->ocr);
+ 	return mmc_sdio_reinit_card(host);
+ }
+ 
+diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
+index 2963e6542958..3cc928282af7 100644
+--- a/drivers/mmc/core/sdio_bus.c
++++ b/drivers/mmc/core/sdio_bus.c
+@@ -138,6 +138,8 @@ static int sdio_bus_probe(struct device *dev)
+ 	if (ret)
+ 		return ret;
+ 
++	atomic_inc(&func->card->sdio_funcs_probed);
++
+ 	/* Unbound SDIO functions are always suspended.
+ 	 * During probe, the function is set active and the usage count
+ 	 * is incremented.  If the driver supports runtime PM,
+@@ -153,7 +155,10 @@ static int sdio_bus_probe(struct device *dev)
+ 	/* Set the default block size so the driver is sure it's something
+ 	 * sensible. */
+ 	sdio_claim_host(func);
+-	ret = sdio_set_block_size(func, 0);
++	if (mmc_card_removed(func->card))
++		ret = -ENOMEDIUM;
++	else
++		ret = sdio_set_block_size(func, 0);
+ 	sdio_release_host(func);
+ 	if (ret)
+ 		goto disable_runtimepm;
+@@ -165,6 +170,7 @@ static int sdio_bus_probe(struct device *dev)
+ 	return 0;
+ 
+ disable_runtimepm:
++	atomic_dec(&func->card->sdio_funcs_probed);
+ 	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
+ 		pm_runtime_put_noidle(dev);
+ 	dev_pm_domain_detach(dev, false);
+@@ -181,6 +187,7 @@ static int sdio_bus_remove(struct device *dev)
+ 		pm_runtime_get_sync(dev);
+ 
+ 	drv->remove(func);
++	atomic_dec(&func->card->sdio_funcs_probed);
+ 
+ 	if (func->irq_handler) {
+ 		pr_warn("WARNING: driver %s did not remove its interrupt handler!\n",
+diff --git a/drivers/pci/controller/pcie-rcar.c b/drivers/pci/controller/pcie-rcar.c
+index f6a669a9af41..1ad0b56f11b4 100644
+--- a/drivers/pci/controller/pcie-rcar.c
++++ b/drivers/pci/controller/pcie-rcar.c
+@@ -93,8 +93,11 @@
+ #define  LINK_SPEED_2_5GTS	(1 << 16)
+ #define  LINK_SPEED_5_0GTS	(2 << 16)
+ #define MACCTLR			0x011058
++#define  MACCTLR_NFTS_MASK	GENMASK(23, 16)	/* The name is from SH7786 */
+ #define  SPEED_CHANGE		BIT(24)
+ #define  SCRAMBLE_DISABLE	BIT(27)
++#define  LTSMDIS		BIT(31)
++#define  MACCTLR_INIT_VAL	(LTSMDIS | MACCTLR_NFTS_MASK)
+ #define PMSR			0x01105c
+ #define MACS2R			0x011078
+ #define MACCGSPSETR		0x011084
+@@ -615,6 +618,8 @@ static int rcar_pcie_hw_init(struct rcar_pcie *pcie)
+ 	if (IS_ENABLED(CONFIG_PCI_MSI))
+ 		rcar_pci_write_reg(pcie, 0x801f0000, PCIEMSITXR);
+ 
++	rcar_pci_write_reg(pcie, MACCTLR_INIT_VAL, MACCTLR);
++
+ 	/* Finish initialization - establish a PCI Express link */
+ 	rcar_pci_write_reg(pcie, CFINIT, PCIETCTLR);
+ 
+@@ -1237,6 +1242,7 @@ static int rcar_pcie_resume_noirq(struct device *dev)
+ 		return 0;
+ 
+ 	/* Re-establish the PCIe link */
++	rcar_pci_write_reg(pcie, MACCTLR_INIT_VAL, MACCTLR);
+ 	rcar_pci_write_reg(pcie, CFINIT, PCIETCTLR);
+ 	return rcar_pcie_wait_for_dl(pcie);
+ }
+diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
+index 654c972b8ea0..882ce82c4699 100644
+--- a/drivers/pci/hotplug/pciehp.h
++++ b/drivers/pci/hotplug/pciehp.h
+@@ -72,6 +72,7 @@ extern int pciehp_poll_time;
+  * @reset_lock: prevents access to the Data Link Layer Link Active bit in the
+  *	Link Status register and to the Presence Detect State bit in the Slot
+  *	Status register during a slot reset which may cause them to flap
++ * @ist_running: flag to keep user request waiting while IRQ thread is running
+  * @request_result: result of last user request submitted to the IRQ thread
+  * @requester: wait queue to wake up on completion of user request,
+  *	used for synchronous slot enable/disable request via sysfs
+@@ -101,6 +102,7 @@ struct controller {
+ 
+ 	struct hotplug_slot hotplug_slot;	/* hotplug core interface */
+ 	struct rw_semaphore reset_lock;
++	unsigned int ist_running;
+ 	int request_result;
+ 	wait_queue_head_t requester;
+ };
+diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
+index 21af7b16d7a4..dd8e4a5fb282 100644
+--- a/drivers/pci/hotplug/pciehp_ctrl.c
++++ b/drivers/pci/hotplug/pciehp_ctrl.c
+@@ -375,7 +375,8 @@ int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot)
+ 		ctrl->request_result = -ENODEV;
+ 		pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
+ 		wait_event(ctrl->requester,
+-			   !atomic_read(&ctrl->pending_events));
++			   !atomic_read(&ctrl->pending_events) &&
++			   !ctrl->ist_running);
+ 		return ctrl->request_result;
+ 	case POWERON_STATE:
+ 		ctrl_info(ctrl, "Slot(%s): Already in powering on state\n",
+@@ -408,7 +409,8 @@ int pciehp_sysfs_disable_slot(struct hotplug_slot *hotplug_slot)
+ 		mutex_unlock(&ctrl->state_lock);
+ 		pciehp_request(ctrl, DISABLE_SLOT);
+ 		wait_event(ctrl->requester,
+-			   !atomic_read(&ctrl->pending_events));
++			   !atomic_read(&ctrl->pending_events) &&
++			   !ctrl->ist_running);
+ 		return ctrl->request_result;
+ 	case POWEROFF_STATE:
+ 		ctrl_info(ctrl, "Slot(%s): Already in powering off state\n",
+diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
+index 1a522c1c4177..86d97f3112f0 100644
+--- a/drivers/pci/hotplug/pciehp_hpc.c
++++ b/drivers/pci/hotplug/pciehp_hpc.c
+@@ -583,6 +583,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
+ 	irqreturn_t ret;
+ 	u32 events;
+ 
++	ctrl->ist_running = true;
+ 	pci_config_pm_runtime_get(pdev);
+ 
+ 	/* rerun pciehp_isr() if the port was inaccessible on interrupt */
+@@ -629,6 +630,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
+ 	up_read(&ctrl->reset_lock);
+ 
+ 	pci_config_pm_runtime_put(pdev);
++	ctrl->ist_running = false;
+ 	wake_up(&ctrl->requester);
+ 	return IRQ_HANDLED;
+ }
+diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
+index 0884bedcfc7a..771041784e64 100644
+--- a/drivers/pci/msi.c
++++ b/drivers/pci/msi.c
+@@ -213,12 +213,13 @@ u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag)
+ 
+ 	if (pci_msi_ignore_mask)
+ 		return 0;
++
+ 	desc_addr = pci_msix_desc_addr(desc);
+ 	if (!desc_addr)
+ 		return 0;
+ 
+ 	mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
+-	if (flag)
++	if (flag & PCI_MSIX_ENTRY_CTRL_MASKBIT)
+ 		mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
+ 
+ 	writel(mask_bits, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
+diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
+index a8124e47bf6e..d4ac8ce8c1f9 100644
+--- a/drivers/pci/pci-driver.c
++++ b/drivers/pci/pci-driver.c
+@@ -1076,17 +1076,22 @@ static int pci_pm_thaw_noirq(struct device *dev)
+ 			return error;
+ 	}
+ 
+-	if (pci_has_legacy_pm_support(pci_dev))
+-		return pci_legacy_resume_early(dev);
+-
+ 	/*
+-	 * pci_restore_state() requires the device to be in D0 (because of MSI
+-	 * restoration among other things), so force it into D0 in case the
+-	 * driver's "freeze" callbacks put it into a low-power state directly.
++	 * Both the legacy ->resume_early() and the new pm->thaw_noirq()
++	 * callbacks assume the device has been returned to D0 and its
++	 * config state has been restored.
++	 *
++	 * In addition, pci_restore_state() restores MSI-X state in MMIO
++	 * space, which requires the device to be in D0, so return it to D0
++	 * in case the driver's "freeze" callbacks put it into a low-power
++	 * state.
+ 	 */
+ 	pci_set_power_state(pci_dev, PCI_D0);
+ 	pci_restore_state(pci_dev);
+ 
++	if (pci_has_legacy_pm_support(pci_dev))
++		return pci_legacy_resume_early(dev);
++
+ 	if (drv && drv->pm && drv->pm->thaw_noirq)
+ 		error = drv->pm->thaw_noirq(dev);
+ 
+diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
+index 3d5271a7a849..64ebe3e5e611 100644
+--- a/drivers/pci/probe.c
++++ b/drivers/pci/probe.c
+@@ -1089,14 +1089,15 @@ static unsigned int pci_scan_child_bus_extend(struct pci_bus *bus,
+  * @sec: updated with secondary bus number from EA
+  * @sub: updated with subordinate bus number from EA
+  *
+- * If @dev is a bridge with EA capability, update @sec and @sub with
+- * fixed bus numbers from the capability and return true.  Otherwise,
+- * return false.
++ * If @dev is a bridge with EA capability that specifies valid secondary
++ * and subordinate bus numbers, return true with the bus numbers in @sec
++ * and @sub.  Otherwise return false.
+  */
+ static bool pci_ea_fixed_busnrs(struct pci_dev *dev, u8 *sec, u8 *sub)
+ {
+ 	int ea, offset;
+ 	u32 dw;
++	u8 ea_sec, ea_sub;
+ 
+ 	if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
+ 		return false;
+@@ -1108,8 +1109,13 @@ static bool pci_ea_fixed_busnrs(struct pci_dev *dev, u8 *sec, u8 *sub)
+ 
+ 	offset = ea + PCI_EA_FIRST_ENT;
+ 	pci_read_config_dword(dev, offset, &dw);
+-	*sec =  dw & PCI_EA_SEC_BUS_MASK;
+-	*sub = (dw & PCI_EA_SUB_BUS_MASK) >> PCI_EA_SUB_BUS_SHIFT;
++	ea_sec =  dw & PCI_EA_SEC_BUS_MASK;
++	ea_sub = (dw & PCI_EA_SUB_BUS_MASK) >> PCI_EA_SUB_BUS_SHIFT;
++	if (ea_sec  == 0 || ea_sub < ea_sec)
++		return false;
++
++	*sec = ea_sec;
++	*sub = ea_sub;
+ 	return true;
+ }
+ 
+diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
+index 320255e5e8f8..308f744393eb 100644
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -4313,15 +4313,21 @@ static int pci_quirk_amd_sb_acs(struct pci_dev *dev, u16 acs_flags)
+ 
+ static bool pci_quirk_cavium_acs_match(struct pci_dev *dev)
+ {
++	if (!pci_is_pcie(dev) || pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT)
++		return false;
++
++	switch (dev->device) {
+ 	/*
+-	 * Effectively selects all downstream ports for whole ThunderX 1
+-	 * family by 0xf800 mask (which represents 8 SoCs), while the lower
+-	 * bits of device ID are used to indicate which subdevice is used
+-	 * within the SoC.
++	 * Effectively selects all downstream ports for whole ThunderX1
++	 * (which represents 8 SoCs).
+ 	 */
+-	return (pci_is_pcie(dev) &&
+-		(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) &&
+-		((dev->device & 0xf800) == 0xa000));
++	case 0xa000 ... 0xa7ff: /* ThunderX1 */
++	case 0xaf84:  /* ThunderX2 */
++	case 0xb884:  /* ThunderX3 */
++		return true;
++	default:
++		return false;
++	}
+ }
+ 
+ static int pci_quirk_cavium_acs(struct pci_dev *dev, u16 acs_flags)
+@@ -4706,7 +4712,7 @@ int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags)
+ #define INTEL_BSPR_REG_BPPD  (1 << 9)
+ 
+ /* Upstream Peer Decode Configuration Register */
+-#define INTEL_UPDCR_REG 0x1114
++#define INTEL_UPDCR_REG 0x1014
+ /* 5:0 Peer Decode Enable bits */
+ #define INTEL_UPDCR_REG_MASK 0x3f
+ 
+diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
+index 8c94cd3fd1f2..465d6afd826e 100644
+--- a/drivers/pci/switch/switchtec.c
++++ b/drivers/pci/switch/switchtec.c
+@@ -675,7 +675,7 @@ static int ioctl_event_summary(struct switchtec_dev *stdev,
+ 		return -ENOMEM;
+ 
+ 	s->global = ioread32(&stdev->mmio_sw_event->global_summary);
+-	s->part_bitmap = ioread32(&stdev->mmio_sw_event->part_event_bitmap);
++	s->part_bitmap = ioread64(&stdev->mmio_sw_event->part_event_bitmap);
+ 	s->local_part = ioread32(&stdev->mmio_part_cfg->part_event_summary);
+ 
+ 	for (i = 0; i < stdev->partition_count; i++) {
+diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c
+index 621f1afd4d6b..1995f5b3ea67 100644
+--- a/drivers/rpmsg/qcom_glink_native.c
++++ b/drivers/rpmsg/qcom_glink_native.c
+@@ -241,10 +241,31 @@ static void qcom_glink_channel_release(struct kref *ref)
+ {
+ 	struct glink_channel *channel = container_of(ref, struct glink_channel,
+ 						     refcount);
++	struct glink_core_rx_intent *intent;
++	struct glink_core_rx_intent *tmp;
+ 	unsigned long flags;
++	int iid;
++
++	/* cancel pending rx_done work */
++	cancel_work_sync(&channel->intent_work);
+ 
+ 	spin_lock_irqsave(&channel->intent_lock, flags);
++	/* Free all non-reuse intents pending rx_done work */
++	list_for_each_entry_safe(intent, tmp, &channel->done_intents, node) {
++		if (!intent->reuse) {
++			kfree(intent->data);
++			kfree(intent);
++		}
++	}
++
++	idr_for_each_entry(&channel->liids, tmp, iid) {
++		kfree(tmp->data);
++		kfree(tmp);
++	}
+ 	idr_destroy(&channel->liids);
++
++	idr_for_each_entry(&channel->riids, tmp, iid)
++		kfree(tmp);
+ 	idr_destroy(&channel->riids);
+ 	spin_unlock_irqrestore(&channel->intent_lock, flags);
+ 
+@@ -1094,13 +1115,12 @@ static int qcom_glink_create_remote(struct qcom_glink *glink,
+ close_link:
+ 	/*
+ 	 * Send a close request to "undo" our open-ack. The close-ack will
+-	 * release the last reference.
++	 * release qcom_glink_send_open_req() reference and the last reference
++	 * will be relesed after receiving remote_close or transport unregister
++	 * by calling qcom_glink_native_remove().
+ 	 */
+ 	qcom_glink_send_close_req(glink, channel);
+ 
+-	/* Release qcom_glink_send_open_req() reference */
+-	kref_put(&channel->refcount, qcom_glink_channel_release);
+-
+ 	return ret;
+ }
+ 
+@@ -1415,15 +1435,13 @@ static int qcom_glink_rx_open(struct qcom_glink *glink, unsigned int rcid,
+ 
+ 		ret = rpmsg_register_device(rpdev);
+ 		if (ret)
+-			goto free_rpdev;
++			goto rcid_remove;
+ 
+ 		channel->rpdev = rpdev;
+ 	}
+ 
+ 	return 0;
+ 
+-free_rpdev:
+-	kfree(rpdev);
+ rcid_remove:
+ 	spin_lock_irqsave(&glink->idr_lock, flags);
+ 	idr_remove(&glink->rcids, channel->rcid);
+@@ -1544,6 +1562,18 @@ static void qcom_glink_work(struct work_struct *work)
+ 	}
+ }
+ 
++static void qcom_glink_cancel_rx_work(struct qcom_glink *glink)
++{
++	struct glink_defer_cmd *dcmd;
++	struct glink_defer_cmd *tmp;
++
++	/* cancel any pending deferred rx_work */
++	cancel_work_sync(&glink->rx_work);
++
++	list_for_each_entry_safe(dcmd, tmp, &glink->rx_queue, node)
++		kfree(dcmd);
++}
++
+ struct qcom_glink *qcom_glink_native_probe(struct device *dev,
+ 					   unsigned long features,
+ 					   struct qcom_glink_pipe *rx,
+@@ -1619,23 +1649,24 @@ void qcom_glink_native_remove(struct qcom_glink *glink)
+ 	struct glink_channel *channel;
+ 	int cid;
+ 	int ret;
+-	unsigned long flags;
+ 
+ 	disable_irq(glink->irq);
+-	cancel_work_sync(&glink->rx_work);
++	qcom_glink_cancel_rx_work(glink);
+ 
+ 	ret = device_for_each_child(glink->dev, NULL, qcom_glink_remove_device);
+ 	if (ret)
+ 		dev_warn(glink->dev, "Can't remove GLINK devices: %d\n", ret);
+ 
+-	spin_lock_irqsave(&glink->idr_lock, flags);
+ 	/* Release any defunct local channels, waiting for close-ack */
+ 	idr_for_each_entry(&glink->lcids, channel, cid)
+ 		kref_put(&channel->refcount, qcom_glink_channel_release);
+ 
++	/* Release any defunct local channels, waiting for close-req */
++	idr_for_each_entry(&glink->rcids, channel, cid)
++		kref_put(&channel->refcount, qcom_glink_channel_release);
++
+ 	idr_destroy(&glink->lcids);
+ 	idr_destroy(&glink->rcids);
+-	spin_unlock_irqrestore(&glink->idr_lock, flags);
+ 	mbox_free_channel(glink->mbox_chan);
+ }
+ EXPORT_SYMBOL_GPL(qcom_glink_native_remove);
+diff --git a/drivers/rpmsg/qcom_glink_smem.c b/drivers/rpmsg/qcom_glink_smem.c
+index 4238383d8685..579bc4443f6d 100644
+--- a/drivers/rpmsg/qcom_glink_smem.c
++++ b/drivers/rpmsg/qcom_glink_smem.c
+@@ -105,7 +105,7 @@ static void glink_smem_rx_advance(struct qcom_glink_pipe *np,
+ 	tail = le32_to_cpu(*pipe->tail);
+ 
+ 	tail += count;
+-	if (tail > pipe->native.length)
++	if (tail >= pipe->native.length)
+ 		tail -= pipe->native.length;
+ 
+ 	*pipe->tail = cpu_to_le32(tail);
+diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
+index ebd47c0cf9e9..70b99c0e2e67 100644
+--- a/drivers/scsi/libiscsi.c
++++ b/drivers/scsi/libiscsi.c
+@@ -1945,7 +1945,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
+ 
+ 	ISCSI_DBG_EH(session, "scsi cmd %p timedout\n", sc);
+ 
+-	spin_lock(&session->frwd_lock);
++	spin_lock_bh(&session->frwd_lock);
+ 	task = (struct iscsi_task *)sc->SCp.ptr;
+ 	if (!task) {
+ 		/*
+@@ -2072,7 +2072,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
+ done:
+ 	if (task)
+ 		task->last_timeout = jiffies;
+-	spin_unlock(&session->frwd_lock);
++	spin_unlock_bh(&session->frwd_lock);
+ 	ISCSI_DBG_EH(session, "return %s\n", rc == BLK_EH_RESET_TIMER ?
+ 		     "timer reset" : "shutdown or nh");
+ 	return rc;
+diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
+index 7259bce85e0e..1fbc5c6c6c14 100644
+--- a/drivers/scsi/qla2xxx/qla_attr.c
++++ b/drivers/scsi/qla2xxx/qla_attr.c
+@@ -176,6 +176,7 @@ qla2x00_sysfs_read_nvram(struct file *filp, struct kobject *kobj,
+ 
+ 	faddr = ha->flt_region_nvram;
+ 	if (IS_QLA28XX(ha)) {
++		qla28xx_get_aux_images(vha, &active_regions);
+ 		if (active_regions.aux.vpd_nvram == QLA27XX_SECONDARY_IMAGE)
+ 			faddr = ha->flt_region_nvram_sec;
+ 	}
+diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
+index 99f0a1a08143..cbaf178fc979 100644
+--- a/drivers/scsi/qla2xxx/qla_bsg.c
++++ b/drivers/scsi/qla2xxx/qla_bsg.c
+@@ -2399,7 +2399,7 @@ qla2x00_get_flash_image_status(struct bsg_job *bsg_job)
+ 	struct qla_active_regions regions = { };
+ 	struct active_regions active_regions = { };
+ 
+-	qla28xx_get_aux_images(vha, &active_regions);
++	qla27xx_get_active_image(vha, &active_regions);
+ 	regions.global_image = active_regions.global;
+ 
+ 	if (IS_QLA28XX(ha)) {
+diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
+index 732bb871c433..dc2366a29665 100644
+--- a/drivers/scsi/qla2xxx/qla_fw.h
++++ b/drivers/scsi/qla2xxx/qla_fw.h
+@@ -1523,6 +1523,10 @@ struct qla_flt_header {
+ #define FLT_REG_NVRAM_SEC_28XX_1	0x10F
+ #define FLT_REG_NVRAM_SEC_28XX_2	0x111
+ #define FLT_REG_NVRAM_SEC_28XX_3	0x113
++#define FLT_REG_MPI_PRI_28XX		0xD3
++#define FLT_REG_MPI_SEC_28XX		0xF0
++#define FLT_REG_PEP_PRI_28XX		0xD1
++#define FLT_REG_PEP_SEC_28XX		0xF1
+ 
+ struct qla_flt_region {
+ 	uint16_t code;
+diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
+index d400b51929a6..5d31e3d52b6b 100644
+--- a/drivers/scsi/qla2xxx/qla_init.c
++++ b/drivers/scsi/qla2xxx/qla_init.c
+@@ -534,6 +534,7 @@ static int qla_post_els_plogi_work(struct scsi_qla_host *vha, fc_port_t *fcport)
+ 
+ 	e->u.fcport.fcport = fcport;
+ 	fcport->flags |= FCF_ASYNC_ACTIVE;
++	fcport->disc_state = DSC_LOGIN_PEND;
+ 	return qla2x00_post_work(vha, e);
+ }
+ 
+@@ -4847,6 +4848,7 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags)
+ 	}
+ 
+ 	INIT_WORK(&fcport->del_work, qla24xx_delete_sess_fn);
++	INIT_WORK(&fcport->free_work, qlt_free_session_done);
+ 	INIT_WORK(&fcport->reg_work, qla_register_fcport_fn);
+ 	INIT_LIST_HEAD(&fcport->gnl_entry);
+ 	INIT_LIST_HEAD(&fcport->list);
+diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
+index f2d5115b2d8d..bbe90354f49b 100644
+--- a/drivers/scsi/qla2xxx/qla_sup.c
++++ b/drivers/scsi/qla2xxx/qla_sup.c
+@@ -847,15 +847,15 @@ qla2xxx_get_flt_info(scsi_qla_host_t *vha, uint32_t flt_addr)
+ 				ha->flt_region_img_status_pri = start;
+ 			break;
+ 		case FLT_REG_IMG_SEC_27XX:
+-			if (IS_QLA27XX(ha) && !IS_QLA28XX(ha))
++			if (IS_QLA27XX(ha) || IS_QLA28XX(ha))
+ 				ha->flt_region_img_status_sec = start;
+ 			break;
+ 		case FLT_REG_FW_SEC_27XX:
+-			if (IS_QLA27XX(ha) && !IS_QLA28XX(ha))
++			if (IS_QLA27XX(ha) || IS_QLA28XX(ha))
+ 				ha->flt_region_fw_sec = start;
+ 			break;
+ 		case FLT_REG_BOOTLOAD_SEC_27XX:
+-			if (IS_QLA27XX(ha) && !IS_QLA28XX(ha))
++			if (IS_QLA27XX(ha) || IS_QLA28XX(ha))
+ 				ha->flt_region_boot_sec = start;
+ 			break;
+ 		case FLT_REG_AUX_IMG_PRI_28XX:
+@@ -2725,8 +2725,11 @@ qla28xx_write_flash_data(scsi_qla_host_t *vha, uint32_t *dwptr, uint32_t faddr,
+ 		ql_log(ql_log_warn + ql_dbg_verbose, vha, 0xffff,
+ 		    "Region %x is secure\n", region.code);
+ 
+-		if (region.code == FLT_REG_FW ||
+-		    region.code == FLT_REG_FW_SEC_27XX) {
++		switch (region.code) {
++		case FLT_REG_FW:
++		case FLT_REG_FW_SEC_27XX:
++		case FLT_REG_MPI_PRI_28XX:
++		case FLT_REG_MPI_SEC_28XX:
+ 			fw_array = dwptr;
+ 
+ 			/* 1st fw array */
+@@ -2757,9 +2760,23 @@ qla28xx_write_flash_data(scsi_qla_host_t *vha, uint32_t *dwptr, uint32_t faddr,
+ 				buf_size_without_sfub += risc_size;
+ 				fw_array += risc_size;
+ 			}
+-		} else {
+-			ql_log(ql_log_warn + ql_dbg_verbose, vha, 0xffff,
+-			    "Secure region %x not supported\n",
++			break;
++
++		case FLT_REG_PEP_PRI_28XX:
++		case FLT_REG_PEP_SEC_28XX:
++			fw_array = dwptr;
++
++			/* 1st fw array */
++			risc_size = be32_to_cpu(fw_array[3]);
++			risc_attr = be32_to_cpu(fw_array[9]);
++
++			buf_size_without_sfub = risc_size;
++			fw_array += risc_size;
++			break;
++
++		default:
++			ql_log(ql_log_warn + ql_dbg_verbose, vha,
++			    0xffff, "Secure region %x not supported\n",
+ 			    region.code);
+ 			rval = QLA_COMMAND_ERROR;
+ 			goto done;
+@@ -2880,7 +2897,7 @@ qla28xx_write_flash_data(scsi_qla_host_t *vha, uint32_t *dwptr, uint32_t faddr,
+ 			    "Sending Secure Flash MB Cmd\n");
+ 			rval = qla28xx_secure_flash_update(vha, 0, region.code,
+ 				buf_size_without_sfub, sfub_dma,
+-				sizeof(struct secure_flash_update_block));
++				sizeof(struct secure_flash_update_block) >> 2);
+ 			if (rval != QLA_SUCCESS) {
+ 				ql_log(ql_log_warn, vha, 0xffff,
+ 				    "Secure Flash MB Cmd failed %x.", rval);
+diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
+index a06e56224a55..a9bd0f513316 100644
+--- a/drivers/scsi/qla2xxx/qla_target.c
++++ b/drivers/scsi/qla2xxx/qla_target.c
+@@ -1160,7 +1160,6 @@ void qlt_unreg_sess(struct fc_port *sess)
+ 	sess->last_rscn_gen = sess->rscn_gen;
+ 	sess->last_login_gen = sess->login_gen;
+ 
+-	INIT_WORK(&sess->free_work, qlt_free_session_done);
+ 	queue_work(sess->vha->hw->wq, &sess->free_work);
+ }
+ EXPORT_SYMBOL(qlt_unreg_sess);
+diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+index 042a24314edc..bab2073c1f72 100644
+--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
++++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+@@ -246,6 +246,8 @@ static void tcm_qla2xxx_complete_mcmd(struct work_struct *work)
+  */
+ static void tcm_qla2xxx_free_mcmd(struct qla_tgt_mgmt_cmd *mcmd)
+ {
++	if (!mcmd)
++		return;
+ 	INIT_WORK(&mcmd->free_work, tcm_qla2xxx_complete_mcmd);
+ 	queue_work(tcm_qla2xxx_free_wq, &mcmd->free_work);
+ }
+diff --git a/drivers/scsi/ufs/cdns-pltfrm.c b/drivers/scsi/ufs/cdns-pltfrm.c
+index b2af04c57a39..6feeb0faf123 100644
+--- a/drivers/scsi/ufs/cdns-pltfrm.c
++++ b/drivers/scsi/ufs/cdns-pltfrm.c
+@@ -99,6 +99,12 @@ static int cdns_ufs_link_startup_notify(struct ufs_hba *hba,
+ 	 */
+ 	ufshcd_dme_set(hba, UIC_ARG_MIB(PA_LOCAL_TX_LCC_ENABLE), 0);
+ 
++	/*
++	 * Disabling Autohibern8 feature in cadence UFS
++	 * to mask unexpected interrupt trigger.
++	 */
++	hba->ahit = 0;
++
+ 	return 0;
+ }
+ 
+diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
+index f225eaa98ff8..d0f45600b669 100644
+--- a/drivers/usb/core/hcd.c
++++ b/drivers/usb/core/hcd.c
+@@ -1409,7 +1409,17 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
+ 	if (usb_endpoint_xfer_control(&urb->ep->desc)) {
+ 		if (hcd->self.uses_pio_for_control)
+ 			return ret;
+-		if (hcd_uses_dma(hcd)) {
++		if (hcd->localmem_pool) {
++			ret = hcd_alloc_coherent(
++					urb->dev->bus, mem_flags,
++					&urb->setup_dma,
++					(void **)&urb->setup_packet,
++					sizeof(struct usb_ctrlrequest),
++					DMA_TO_DEVICE);
++			if (ret)
++				return ret;
++			urb->transfer_flags |= URB_SETUP_MAP_LOCAL;
++		} else if (hcd_uses_dma(hcd)) {
+ 			if (is_vmalloc_addr(urb->setup_packet)) {
+ 				WARN_ONCE(1, "setup packet is not dma capable\n");
+ 				return -EAGAIN;
+@@ -1427,23 +1437,22 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
+ 						urb->setup_dma))
+ 				return -EAGAIN;
+ 			urb->transfer_flags |= URB_SETUP_MAP_SINGLE;
+-		} else if (hcd->localmem_pool) {
+-			ret = hcd_alloc_coherent(
+-					urb->dev->bus, mem_flags,
+-					&urb->setup_dma,
+-					(void **)&urb->setup_packet,
+-					sizeof(struct usb_ctrlrequest),
+-					DMA_TO_DEVICE);
+-			if (ret)
+-				return ret;
+-			urb->transfer_flags |= URB_SETUP_MAP_LOCAL;
+ 		}
+ 	}
+ 
+ 	dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+ 	if (urb->transfer_buffer_length != 0
+ 	    && !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
+-		if (hcd_uses_dma(hcd)) {
++		if (hcd->localmem_pool) {
++			ret = hcd_alloc_coherent(
++					urb->dev->bus, mem_flags,
++					&urb->transfer_dma,
++					&urb->transfer_buffer,
++					urb->transfer_buffer_length,
++					dir);
++			if (ret == 0)
++				urb->transfer_flags |= URB_MAP_LOCAL;
++		} else if (hcd_uses_dma(hcd)) {
+ 			if (urb->num_sgs) {
+ 				int n;
+ 
+@@ -1497,15 +1506,6 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
+ 				else
+ 					urb->transfer_flags |= URB_DMA_MAP_SINGLE;
+ 			}
+-		} else if (hcd->localmem_pool) {
+-			ret = hcd_alloc_coherent(
+-					urb->dev->bus, mem_flags,
+-					&urb->transfer_dma,
+-					&urb->transfer_buffer,
+-					urb->transfer_buffer_length,
+-					dir);
+-			if (ret == 0)
+-				urb->transfer_flags |= URB_MAP_LOCAL;
+ 		}
+ 		if (ret && (urb->transfer_flags & (URB_SETUP_MAP_SINGLE |
+ 				URB_SETUP_MAP_LOCAL)))
+diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
+index 54a3c8195c96..2adcabe060c5 100644
+--- a/drivers/usb/storage/scsiglue.c
++++ b/drivers/usb/storage/scsiglue.c
+@@ -135,7 +135,8 @@ static int slave_configure(struct scsi_device *sdev)
+ 	 * For such controllers we need to make sure the block layer sets
+ 	 * up bounce buffers in addressable memory.
+ 	 */
+-	if (!hcd_uses_dma(bus_to_hcd(us->pusb_dev->bus)))
++	if (!hcd_uses_dma(bus_to_hcd(us->pusb_dev->bus)) ||
++			(bus_to_hcd(us->pusb_dev->bus)->localmem_pool != NULL))
+ 		blk_queue_bounce_limit(sdev->request_queue, BLK_BOUNCE_HIGH);
+ 
+ 	/*
+diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
+index 3fa3f728fb39..2056f3f85f59 100644
+--- a/drivers/vfio/pci/vfio_pci_intrs.c
++++ b/drivers/vfio/pci/vfio_pci_intrs.c
+@@ -294,8 +294,8 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
+ 	irq = pci_irq_vector(pdev, vector);
+ 
+ 	if (vdev->ctx[vector].trigger) {
+-		free_irq(irq, vdev->ctx[vector].trigger);
+ 		irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
++		free_irq(irq, vdev->ctx[vector].trigger);
+ 		kfree(vdev->ctx[vector].name);
+ 		eventfd_ctx_put(vdev->ctx[vector].trigger);
+ 		vdev->ctx[vector].trigger = NULL;
+diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
+index 0b4eee3bed66..efb2928ff6c8 100644
+--- a/fs/cifs/cifs_debug.c
++++ b/fs/cifs/cifs_debug.c
+@@ -256,6 +256,11 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
+ 		if (!server->rdma)
+ 			goto skip_rdma;
+ 
++		if (!server->smbd_conn) {
++			seq_printf(m, "\nSMBDirect transport not available");
++			goto skip_rdma;
++		}
++
+ 		seq_printf(m, "\nSMBDirect (in hex) protocol version: %x "
+ 			"transport status: %x",
+ 			server->smbd_conn->protocol,
+diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
+index d78bfcc19156..5d2dd04b55a6 100644
+--- a/fs/cifs/cifsglob.h
++++ b/fs/cifs/cifsglob.h
+@@ -1524,6 +1524,7 @@ struct mid_q_entry {
+ 	struct TCP_Server_Info *server;	/* server corresponding to this mid */
+ 	__u64 mid;		/* multiplex id */
+ 	__u16 credits;		/* number of credits consumed by this mid */
++	__u16 credits_received;	/* number of credits from the response */
+ 	__u32 pid;		/* process id */
+ 	__u32 sequence_number;  /* for CIFS signing */
+ 	unsigned long when_alloc;  /* when mid was created */
+diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
+index ccaa8bad336f..20c70cbab1ad 100644
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -905,6 +905,20 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed)
+ 	spin_unlock(&GlobalMid_Lock);
+ }
+ 
++static unsigned int
++smb2_get_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
++{
++	struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buffer;
++
++	/*
++	 * SMB1 does not use credits.
++	 */
++	if (server->vals->header_preamble_size)
++		return 0;
++
++	return le16_to_cpu(shdr->CreditRequest);
++}
++
+ static void
+ handle_mid(struct mid_q_entry *mid, struct TCP_Server_Info *server,
+ 	   char *buf, int malformed)
+@@ -912,6 +926,7 @@ handle_mid(struct mid_q_entry *mid, struct TCP_Server_Info *server,
+ 	if (server->ops->check_trans2 &&
+ 	    server->ops->check_trans2(mid, server, buf, malformed))
+ 		return;
++	mid->credits_received = smb2_get_credits_from_hdr(buf, server);
+ 	mid->resp_buf = buf;
+ 	mid->large_buf = server->large_buf;
+ 	/* Was previous buf put in mpx struct for multi-rsp? */
+@@ -1222,12 +1237,6 @@ next_pdu:
+ 		for (i = 0; i < num_mids; i++) {
+ 			if (mids[i] != NULL) {
+ 				mids[i]->resp_buf_size = server->pdu_size;
+-				if ((mids[i]->mid_flags & MID_WAIT_CANCELLED) &&
+-				    mids[i]->mid_state == MID_RESPONSE_RECEIVED &&
+-				    server->ops->handle_cancelled_mid)
+-					server->ops->handle_cancelled_mid(
+-							mids[i]->resp_buf,
+-							server);
+ 
+ 				if (!mids[i]->multiRsp || mids[i]->multiEnd)
+ 					mids[i]->callback(mids[i]);
+@@ -4700,6 +4709,17 @@ static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb_vol *vol,
+ }
+ 
+ #ifdef CONFIG_CIFS_DFS_UPCALL
++static inline void set_root_tcon(struct cifs_sb_info *cifs_sb,
++				 struct cifs_tcon *tcon,
++				 struct cifs_tcon **root)
++{
++	spin_lock(&cifs_tcp_ses_lock);
++	tcon->tc_count++;
++	tcon->remap = cifs_remap(cifs_sb);
++	spin_unlock(&cifs_tcp_ses_lock);
++	*root = tcon;
++}
++
+ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
+ {
+ 	int rc = 0;
+@@ -4801,18 +4821,10 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
+ 	/* Cache out resolved root server */
+ 	(void)dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb),
+ 			     root_path + 1, NULL, NULL);
+-	/*
+-	 * Save root tcon for additional DFS requests to update or create a new
+-	 * DFS cache entry, or even perform DFS failover.
+-	 */
+-	spin_lock(&cifs_tcp_ses_lock);
+-	tcon->tc_count++;
+-	tcon->dfs_path = root_path;
++	kfree(root_path);
+ 	root_path = NULL;
+-	tcon->remap = cifs_remap(cifs_sb);
+-	spin_unlock(&cifs_tcp_ses_lock);
+ 
+-	root_tcon = tcon;
++	set_root_tcon(cifs_sb, tcon, &root_tcon);
+ 
+ 	for (count = 1; ;) {
+ 		if (!rc && tcon) {
+@@ -4849,6 +4861,15 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
+ 			mount_put_conns(cifs_sb, xid, server, ses, tcon);
+ 			rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses,
+ 					     &tcon);
++			/*
++			 * Ensure that DFS referrals go through new root server.
++			 */
++			if (!rc && tcon &&
++			    (tcon->share_flags & (SHI1005_FLAGS_DFS |
++						  SHI1005_FLAGS_DFS_ROOT))) {
++				cifs_put_tcon(root_tcon);
++				set_root_tcon(cifs_sb, tcon, &root_tcon);
++			}
+ 		}
+ 		if (rc) {
+ 			if (rc == -EACCES || rc == -EOPNOTSUPP)
+diff --git a/fs/cifs/file.c b/fs/cifs/file.c
+index a3b6be80f8a9..c32650f14c9b 100644
+--- a/fs/cifs/file.c
++++ b/fs/cifs/file.c
+@@ -729,6 +729,13 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
+ 	if (backup_cred(cifs_sb))
+ 		create_options |= CREATE_OPEN_BACKUP_INTENT;
+ 
++	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
++	if (cfile->f_flags & O_SYNC)
++		create_options |= CREATE_WRITE_THROUGH;
++
++	if (cfile->f_flags & O_DIRECT)
++		create_options |= CREATE_NO_BUFFER;
++
+ 	if (server->ops->get_lease_key)
+ 		server->ops->get_lease_key(inode, &cfile->fid);
+ 
+diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
+index 449d1584ff72..766974fe637a 100644
+--- a/fs/cifs/smb2misc.c
++++ b/fs/cifs/smb2misc.c
+@@ -743,36 +743,67 @@ smb2_cancelled_close_fid(struct work_struct *work)
+ 	kfree(cancelled);
+ }
+ 
++/* Caller should already has an extra reference to @tcon */
++static int
++__smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid,
++			      __u64 volatile_fid)
++{
++	struct close_cancelled_open *cancelled;
++
++	cancelled = kzalloc(sizeof(*cancelled), GFP_KERNEL);
++	if (!cancelled)
++		return -ENOMEM;
++
++	cancelled->fid.persistent_fid = persistent_fid;
++	cancelled->fid.volatile_fid = volatile_fid;
++	cancelled->tcon = tcon;
++	INIT_WORK(&cancelled->work, smb2_cancelled_close_fid);
++	WARN_ON(queue_work(cifsiod_wq, &cancelled->work) == false);
++
++	return 0;
++}
++
++int
++smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid,
++			    __u64 volatile_fid)
++{
++	int rc;
++
++	cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count);
++	spin_lock(&cifs_tcp_ses_lock);
++	tcon->tc_count++;
++	spin_unlock(&cifs_tcp_ses_lock);
++
++	rc = __smb2_handle_cancelled_close(tcon, persistent_fid, volatile_fid);
++	if (rc)
++		cifs_put_tcon(tcon);
++
++	return rc;
++}
++
+ int
+ smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server)
+ {
+ 	struct smb2_sync_hdr *sync_hdr = (struct smb2_sync_hdr *)buffer;
+ 	struct smb2_create_rsp *rsp = (struct smb2_create_rsp *)buffer;
+ 	struct cifs_tcon *tcon;
+-	struct close_cancelled_open *cancelled;
++	int rc;
+ 
+ 	if (sync_hdr->Command != SMB2_CREATE ||
+ 	    sync_hdr->Status != STATUS_SUCCESS)
+ 		return 0;
+ 
+-	cancelled = kzalloc(sizeof(*cancelled), GFP_KERNEL);
+-	if (!cancelled)
+-		return -ENOMEM;
+-
+ 	tcon = smb2_find_smb_tcon(server, sync_hdr->SessionId,
+ 				  sync_hdr->TreeId);
+-	if (!tcon) {
+-		kfree(cancelled);
++	if (!tcon)
+ 		return -ENOENT;
+-	}
+ 
+-	cancelled->fid.persistent_fid = rsp->PersistentFileId;
+-	cancelled->fid.volatile_fid = rsp->VolatileFileId;
+-	cancelled->tcon = tcon;
+-	INIT_WORK(&cancelled->work, smb2_cancelled_close_fid);
+-	queue_work(cifsiod_wq, &cancelled->work);
++	rc = __smb2_handle_cancelled_close(tcon, rsp->PersistentFileId,
++					   rsp->VolatileFileId);
++	if (rc)
++		cifs_put_tcon(tcon);
+ 
+-	return 0;
++	return rc;
+ }
+ 
+ /**
+diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
+index cd55af9b7cc5..b5c1cba3e6a1 100644
+--- a/fs/cifs/smb2ops.c
++++ b/fs/cifs/smb2ops.c
+@@ -151,13 +151,7 @@ smb2_get_credits_field(struct TCP_Server_Info *server, const int optype)
+ static unsigned int
+ smb2_get_credits(struct mid_q_entry *mid)
+ {
+-	struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)mid->resp_buf;
+-
+-	if (mid->mid_state == MID_RESPONSE_RECEIVED
+-	    || mid->mid_state == MID_RESPONSE_MALFORMED)
+-		return le16_to_cpu(shdr->CreditRequest);
+-
+-	return 0;
++	return mid->credits_received;
+ }
+ 
+ static int
+diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
+index 05149862aea4..c985caa2d955 100644
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -2972,7 +2972,21 @@ int
+ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
+ 	   u64 persistent_fid, u64 volatile_fid)
+ {
+-	return SMB2_close_flags(xid, tcon, persistent_fid, volatile_fid, 0);
++	int rc;
++	int tmp_rc;
++
++	rc = SMB2_close_flags(xid, tcon, persistent_fid, volatile_fid, 0);
++
++	/* retry close in a worker thread if this one is interrupted */
++	if (rc == -EINTR) {
++		tmp_rc = smb2_handle_cancelled_close(tcon, persistent_fid,
++						     volatile_fid);
++		if (tmp_rc)
++			cifs_dbg(VFS, "handle cancelled close fid 0x%llx returned error %d\n",
++				 persistent_fid, tmp_rc);
++	}
++
++	return rc;
+ }
+ 
+ int
+diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
+index 71b2930b8e0b..2a12a2fa38a2 100644
+--- a/fs/cifs/smb2proto.h
++++ b/fs/cifs/smb2proto.h
+@@ -212,6 +212,9 @@ extern int SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
+ extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
+ 			     const u64 persistent_fid, const u64 volatile_fid,
+ 			     const __u8 oplock_level);
++extern int smb2_handle_cancelled_close(struct cifs_tcon *tcon,
++				       __u64 persistent_fid,
++				       __u64 volatile_fid);
+ extern int smb2_handle_cancelled_mid(char *buffer,
+ 					struct TCP_Server_Info *server);
+ void smb2_cancelled_close_fid(struct work_struct *work);
+diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
+index 3c91fa97c9a8..5b1b97e9e0c9 100644
+--- a/fs/cifs/smbdirect.c
++++ b/fs/cifs/smbdirect.c
+@@ -1069,7 +1069,7 @@ static int smbd_post_send_data(
+ 
+ 	if (n_vec > SMBDIRECT_MAX_SGE) {
+ 		cifs_dbg(VFS, "Can't fit data to SGL, n_vec=%d\n", n_vec);
+-		return -ENOMEM;
++		return -EINVAL;
+ 	}
+ 
+ 	sg_init_table(sgl, n_vec);
+@@ -1476,6 +1476,7 @@ void smbd_destroy(struct TCP_Server_Info *server)
+ 	info->transport_status = SMBD_DESTROYED;
+ 
+ 	destroy_workqueue(info->workqueue);
++	log_rdma_event(INFO,  "rdma session destroyed\n");
+ 	kfree(info);
+ }
+ 
+@@ -1505,8 +1506,9 @@ create_conn:
+ 	log_rdma_event(INFO, "creating rdma session\n");
+ 	server->smbd_conn = smbd_get_connection(
+ 		server, (struct sockaddr *) &server->dstaddr);
+-	log_rdma_event(INFO, "created rdma session info=%p\n",
+-		server->smbd_conn);
++
++	if (server->smbd_conn)
++		cifs_dbg(VFS, "RDMA transport re-established\n");
+ 
+ 	return server->smbd_conn ? 0 : -ENOENT;
+ }
+@@ -1970,7 +1972,7 @@ read_rfc1002_done:
+ 
+ 	if (info->transport_status != SMBD_CONNECTED) {
+ 		log_read(ERR, "disconnected\n");
+-		return 0;
++		return -ECONNABORTED;
+ 	}
+ 
+ 	goto again;
+@@ -2269,12 +2271,7 @@ static void smbd_mr_recovery_work(struct work_struct *work)
+ 	int rc;
+ 
+ 	list_for_each_entry(smbdirect_mr, &info->mr_list, list) {
+-		if (smbdirect_mr->state == MR_INVALIDATED)
+-			ib_dma_unmap_sg(
+-				info->id->device, smbdirect_mr->sgl,
+-				smbdirect_mr->sgl_count,
+-				smbdirect_mr->dir);
+-		else if (smbdirect_mr->state == MR_ERROR) {
++		if (smbdirect_mr->state == MR_ERROR) {
+ 
+ 			/* recover this MR entry */
+ 			rc = ib_dereg_mr(smbdirect_mr->mr);
+@@ -2602,11 +2599,20 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr)
+ 		 */
+ 		smbdirect_mr->state = MR_INVALIDATED;
+ 
+-	/*
+-	 * Schedule the work to do MR recovery for future I/Os
+-	 * MR recovery is slow and we don't want it to block the current I/O
+-	 */
+-	queue_work(info->workqueue, &info->mr_recovery_work);
++	if (smbdirect_mr->state == MR_INVALIDATED) {
++		ib_dma_unmap_sg(
++			info->id->device, smbdirect_mr->sgl,
++			smbdirect_mr->sgl_count,
++			smbdirect_mr->dir);
++		smbdirect_mr->state = MR_READY;
++		if (atomic_inc_return(&info->mr_ready_count) == 1)
++			wake_up_interruptible(&info->wait_mr);
++	} else
++		/*
++		 * Schedule the work to do MR recovery for future I/Os MR
++		 * recovery is slow and don't want it to block current I/O
++		 */
++		queue_work(info->workqueue, &info->mr_recovery_work);
+ 
+ done:
+ 	if (atomic_dec_and_test(&info->mr_used_count))
+diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
+index ca3de62688d6..755434d5e4e7 100644
+--- a/fs/cifs/transport.c
++++ b/fs/cifs/transport.c
+@@ -93,8 +93,14 @@ static void _cifs_mid_q_entry_release(struct kref *refcount)
+ 	__u16 smb_cmd = le16_to_cpu(midEntry->command);
+ 	unsigned long now;
+ 	unsigned long roundtrip_time;
+-	struct TCP_Server_Info *server = midEntry->server;
+ #endif
++	struct TCP_Server_Info *server = midEntry->server;
++
++	if (midEntry->resp_buf && (midEntry->mid_flags & MID_WAIT_CANCELLED) &&
++	    midEntry->mid_state == MID_RESPONSE_RECEIVED &&
++	    server->ops->handle_cancelled_mid)
++		server->ops->handle_cancelled_mid(midEntry->resp_buf, server);
++
+ 	midEntry->mid_state = MID_FREE;
+ 	atomic_dec(&midCount);
+ 	if (midEntry->large_buf)
+@@ -319,8 +325,11 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
+ 	int val = 1;
+ 	__be32 rfc1002_marker;
+ 
+-	if (cifs_rdma_enabled(server) && server->smbd_conn) {
+-		rc = smbd_send(server, num_rqst, rqst);
++	if (cifs_rdma_enabled(server)) {
++		/* return -EAGAIN when connecting or reconnecting */
++		rc = -EAGAIN;
++		if (server->smbd_conn)
++			rc = smbd_send(server, num_rqst, rqst);
+ 		goto smbd_done;
+ 	}
+ 
+@@ -1119,8 +1128,8 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
+ 				 midQ[i]->mid, le16_to_cpu(midQ[i]->command));
+ 			send_cancel(server, &rqst[i], midQ[i]);
+ 			spin_lock(&GlobalMid_Lock);
++			midQ[i]->mid_flags |= MID_WAIT_CANCELLED;
+ 			if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED) {
+-				midQ[i]->mid_flags |= MID_WAIT_CANCELLED;
+ 				midQ[i]->callback = cifs_cancelled_callback;
+ 				cancelled_mid[i] = true;
+ 				credits[i].value = 0;
+diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
+index 997b326247e2..c53e3b892210 100644
+--- a/fs/gfs2/file.c
++++ b/fs/gfs2/file.c
+@@ -381,27 +381,28 @@ static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
+ /**
+  * gfs2_allocate_page_backing - Allocate blocks for a write fault
+  * @page: The (locked) page to allocate backing for
++ * @length: Size of the allocation
+  *
+  * We try to allocate all the blocks required for the page in one go.  This
+  * might fail for various reasons, so we keep trying until all the blocks to
+  * back this page are allocated.  If some of the blocks are already allocated,
+  * that is ok too.
+  */
+-static int gfs2_allocate_page_backing(struct page *page)
++static int gfs2_allocate_page_backing(struct page *page, unsigned int length)
+ {
+ 	u64 pos = page_offset(page);
+-	u64 size = PAGE_SIZE;
+ 
+ 	do {
+ 		struct iomap iomap = { };
+ 
+-		if (gfs2_iomap_get_alloc(page->mapping->host, pos, 1, &iomap))
++		if (gfs2_iomap_get_alloc(page->mapping->host, pos, length, &iomap))
+ 			return -EIO;
+ 
+-		iomap.length = min(iomap.length, size);
+-		size -= iomap.length;
++		if (length < iomap.length)
++			iomap.length = length;
++		length -= iomap.length;
+ 		pos += iomap.length;
+-	} while (size > 0);
++	} while (length > 0);
+ 
+ 	return 0;
+ }
+@@ -501,7 +502,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
+ 	if (gfs2_is_stuffed(ip))
+ 		ret = gfs2_unstuff_dinode(ip, page);
+ 	if (ret == 0)
+-		ret = gfs2_allocate_page_backing(page);
++		ret = gfs2_allocate_page_backing(page, PAGE_SIZE);
+ 
+ out_trans_end:
+ 	if (ret)
+diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
+index 58e237fba565..2aed73666a65 100644
+--- a/fs/gfs2/log.c
++++ b/fs/gfs2/log.c
+@@ -609,6 +609,14 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
+ 	list_add(&bd->bd_list, &sdp->sd_log_revokes);
+ }
+ 
++void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
++{
++	if (atomic_dec_return(&gl->gl_revokes) == 0) {
++		clear_bit(GLF_LFLUSH, &gl->gl_flags);
++		gfs2_glock_queue_put(gl);
++	}
++}
++
+ void gfs2_write_revokes(struct gfs2_sbd *sdp)
+ {
+ 	struct gfs2_trans *tr;
+diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
+index 2315fca47a2b..c762da494546 100644
+--- a/fs/gfs2/log.h
++++ b/fs/gfs2/log.h
+@@ -77,6 +77,7 @@ extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
+ extern void gfs2_log_shutdown(struct gfs2_sbd *sdp);
+ extern int gfs2_logd(void *data);
+ extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
++extern void gfs2_glock_remove_revoke(struct gfs2_glock *gl);
+ extern void gfs2_write_revokes(struct gfs2_sbd *sdp);
+ 
+ #endif /* __LOG_DOT_H__ */
+diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
+index 5b17979af539..e2437b775456 100644
+--- a/fs/gfs2/lops.c
++++ b/fs/gfs2/lops.c
+@@ -882,10 +882,7 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
+ 		bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
+ 		list_del_init(&bd->bd_list);
+ 		gl = bd->bd_gl;
+-		if (atomic_dec_return(&gl->gl_revokes) == 0) {
+-			clear_bit(GLF_LFLUSH, &gl->gl_flags);
+-			gfs2_glock_queue_put(gl);
+-		}
++		gfs2_glock_remove_revoke(gl);
+ 		kmem_cache_free(gfs2_bufdata_cachep, bd);
+ 	}
+ }
+diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
+index 35e3059255fe..9d4227330de4 100644
+--- a/fs/gfs2/trans.c
++++ b/fs/gfs2/trans.c
+@@ -262,6 +262,8 @@ void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
+ 			list_del_init(&bd->bd_list);
+ 			gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
+ 			sdp->sd_log_num_revoke--;
++			if (bd->bd_gl)
++				gfs2_glock_remove_revoke(bd->bd_gl);
+ 			kmem_cache_free(gfs2_bufdata_cachep, bd);
+ 			tr->tr_num_revoke--;
+ 			if (--n == 0)
+diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
+index 9b6336ad3266..e459b38ef33c 100644
+--- a/include/linux/mmc/card.h
++++ b/include/linux/mmc/card.h
+@@ -291,6 +291,7 @@ struct mmc_card {
+ 	struct sd_switch_caps	sw_caps;	/* switch (CMD6) caps */
+ 
+ 	unsigned int		sdio_funcs;	/* number of SDIO functions */
++	atomic_t		sdio_funcs_probed; /* number of probed SDIO funcs */
+ 	struct sdio_cccr	cccr;		/* common card info */
+ 	struct sdio_cis		cis;		/* common tuple info */
+ 	struct sdio_func	*sdio_func[SDIO_MAX_FUNCS]; /* SDIO functions (devices) */
+diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
+index ebf5ef17cc2a..24a6263c9931 100644
+--- a/include/linux/pm_qos.h
++++ b/include/linux/pm_qos.h
+@@ -256,7 +256,7 @@ static inline s32 dev_pm_qos_raw_resume_latency(struct device *dev)
+ #endif
+ 
+ #define FREQ_QOS_MIN_DEFAULT_VALUE	0
+-#define FREQ_QOS_MAX_DEFAULT_VALUE	(-1)
++#define FREQ_QOS_MAX_DEFAULT_VALUE	S32_MAX
+ 
+ enum freq_qos_req_type {
+ 	FREQ_QOS_MIN = 1,
+diff --git a/sound/hda/hdac_stream.c b/sound/hda/hdac_stream.c
+index f9707fb05efe..682ed39f79b0 100644
+--- a/sound/hda/hdac_stream.c
++++ b/sound/hda/hdac_stream.c
+@@ -120,10 +120,8 @@ void snd_hdac_stream_clear(struct hdac_stream *azx_dev)
+ 	snd_hdac_stream_updateb(azx_dev, SD_CTL,
+ 				SD_CTL_DMA_START | SD_INT_MASK, 0);
+ 	snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK); /* to be sure */
+-	if (azx_dev->stripe) {
++	if (azx_dev->stripe)
+ 		snd_hdac_stream_updateb(azx_dev, SD_CTL_3B, SD_CTL_STRIPE_MASK, 0);
+-		azx_dev->stripe = 0;
+-	}
+ 	azx_dev->running = false;
+ }
+ EXPORT_SYMBOL_GPL(snd_hdac_stream_clear);
+diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
+index 4dafc864d765..488c17c9f375 100644
+--- a/sound/pci/hda/patch_hdmi.c
++++ b/sound/pci/hda/patch_hdmi.c
+@@ -1983,6 +1983,8 @@ static int hdmi_pcm_close(struct hda_pcm_stream *hinfo,
+ 		per_cvt->assigned = 0;
+ 		hinfo->nid = 0;
+ 
++		azx_stream(get_azx_dev(substream))->stripe = 0;
++
+ 		mutex_lock(&spec->pcm_lock);
+ 		snd_hda_spdif_ctls_unassign(codec, pcm_idx);
+ 		clear_bit(pcm_idx, &spec->pcm_in_use);