Skip to content

Commit d200964

Browse files
Alex Williamsongregkh
authored andcommitted
vfio/type1: Add proper error unwind for vfio_iommu_replay()
[ Upstream commit aae7a75 ] The vfio_iommu_replay() function does not currently unwind on error, yet it does pin pages, perform IOMMU mapping, and modify the vfio_dma structure to indicate IOMMU mapping. The IOMMU mappings are torn down when the domain is destroyed, but the other actions go on to cause trouble later. For example, the iommu->domain_list can be empty if we only have a non-IOMMU backed mdev attached. We don't currently check if the list is empty before getting the first entry in the list, which leads to a bogus domain pointer. If a vfio_dma entry is erroneously marked as iommu_mapped, we'll attempt to use that bogus pointer to retrieve the existing physical page addresses. This is the scenario that uncovered this issue, attempting to hot-add a vfio-pci device to a container with an existing mdev device and DMA mappings, one of which could not be pinned, causing a failure adding the new group to the existing container and setting the conditions for a subsequent attempt to explode. To resolve this, we can first check if the domain_list is empty so that we can reject replay of a bogus domain, should we ever encounter this inconsistent state again in the future. The real fix though is to add the necessary unwind support, which means cleaning up the current pinning if an IOMMU mapping fails, then walking back through the r-b tree of DMA entries, reading from the IOMMU which ranges are mapped, and unmapping and unpinning those ranges. To be able to do this, we also defer marking the DMA entry as IOMMU mapped until all entries are processed, in order to allow the unwind to know the disposition of each entry. Fixes: a54eb55 ("vfio iommu type1: Add support for mediated devices") Reported-by: Zhiyi Guo <zhguo@redhat.com> Tested-by: Zhiyi Guo <zhguo@redhat.com> Reviewed-by: Cornelia Huck <cohuck@redhat.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent 6b2dd0c commit d200964

1 file changed

Lines changed: 66 additions & 5 deletions

File tree

drivers/vfio/vfio_iommu_type1.c

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1193,13 +1193,16 @@ static int vfio_bus_type(struct device *dev, void *data)
11931193
static int vfio_iommu_replay(struct vfio_iommu *iommu,
11941194
struct vfio_domain *domain)
11951195
{
1196-
struct vfio_domain *d;
1196+
struct vfio_domain *d = NULL;
11971197
struct rb_node *n;
11981198
unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
11991199
int ret;
12001200

12011201
/* Arbitrarily pick the first domain in the list for lookups */
1202-
d = list_first_entry(&iommu->domain_list, struct vfio_domain, next);
1202+
if (!list_empty(&iommu->domain_list))
1203+
d = list_first_entry(&iommu->domain_list,
1204+
struct vfio_domain, next);
1205+
12031206
n = rb_first(&iommu->dma_list);
12041207

12051208
for (; n; n = rb_next(n)) {
@@ -1217,6 +1220,11 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
12171220
phys_addr_t p;
12181221
dma_addr_t i;
12191222

1223+
if (WARN_ON(!d)) { /* mapped w/o a domain?! */
1224+
ret = -EINVAL;
1225+
goto unwind;
1226+
}
1227+
12201228
phys = iommu_iova_to_phys(d->domain, iova);
12211229

12221230
if (WARN_ON(!phys)) {
@@ -1246,7 +1254,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
12461254
if (npage <= 0) {
12471255
WARN_ON(!npage);
12481256
ret = (int)npage;
1249-
return ret;
1257+
goto unwind;
12501258
}
12511259

12521260
phys = pfn << PAGE_SHIFT;
@@ -1255,14 +1263,67 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
12551263

12561264
ret = iommu_map(domain->domain, iova, phys,
12571265
size, dma->prot | domain->prot);
1258-
if (ret)
1259-
return ret;
1266+
if (ret) {
1267+
if (!dma->iommu_mapped)
1268+
vfio_unpin_pages_remote(dma, iova,
1269+
phys >> PAGE_SHIFT,
1270+
size >> PAGE_SHIFT,
1271+
true);
1272+
goto unwind;
1273+
}
12601274

12611275
iova += size;
12621276
}
1277+
}
1278+
1279+
/* All dmas are now mapped, defer to second tree walk for unwind */
1280+
for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
1281+
struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
1282+
12631283
dma->iommu_mapped = true;
12641284
}
1285+
12651286
return 0;
1287+
1288+
unwind:
1289+
for (; n; n = rb_prev(n)) {
1290+
struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
1291+
dma_addr_t iova;
1292+
1293+
if (dma->iommu_mapped) {
1294+
iommu_unmap(domain->domain, dma->iova, dma->size);
1295+
continue;
1296+
}
1297+
1298+
iova = dma->iova;
1299+
while (iova < dma->iova + dma->size) {
1300+
phys_addr_t phys, p;
1301+
size_t size;
1302+
dma_addr_t i;
1303+
1304+
phys = iommu_iova_to_phys(domain->domain, iova);
1305+
if (!phys) {
1306+
iova += PAGE_SIZE;
1307+
continue;
1308+
}
1309+
1310+
size = PAGE_SIZE;
1311+
p = phys + size;
1312+
i = iova + size;
1313+
while (i < dma->iova + dma->size &&
1314+
p == iommu_iova_to_phys(domain->domain, i)) {
1315+
size += PAGE_SIZE;
1316+
p += PAGE_SIZE;
1317+
i += PAGE_SIZE;
1318+
}
1319+
1320+
iommu_unmap(domain->domain, iova, size);
1321+
vfio_unpin_pages_remote(dma, iova, phys >> PAGE_SHIFT,
1322+
size >> PAGE_SHIFT, true);
1323+
}
1324+
}
1325+
1326+
return ret;
12661327
}
12671328

12681329
/*

0 commit comments

Comments
 (0)