Commit ea388678 authored by Paul Durrant's avatar Paul Durrant Committed by Andrew Cooper

x86 / iommu: set up a scratch page in the quarantine domain

This patch introduces a new iommu_op to facilitate a per-implementation
quarantine set up, and then further code for x86 implementations
(amd and vtd) to set up a read-only scratch page to serve as the source
for DMA reads whilst a device is assigned to dom_io. DMA writes will
continue to fault as before.

The reason for doing this is that some hardware may continue to re-try
DMA (despite FLR) in the event of an error, or even BME being cleared, and
will fail to deal with DMA read faults gracefully. Having a scratch page
mapped will allow pending DMA reads to complete and thus such buggy
hardware will eventually be quiesced.

NOTE: These modifications are restricted to x86 implementations only as
      the buggy h/w I am aware of is only used with Xen in an x86
      environment. ARM may require similar code but, since I am not
      aware of the need, this patch does not modify any ARM implementation.
Signed-off-by: default avatarPaul Durrant <pdurrant@amazon.com>
Reviewed-by: default avatarJan Beulich <jbeulich@suse.com>
Release-acked-by: default avatarJuergen Gross <jgross@suse.com>
parent be18e39d
......@@ -560,6 +560,68 @@ int amd_iommu_reserve_domain_unity_map(struct domain *domain,
return rt;
}
int __init amd_iommu_quarantine_init(struct domain *d)
{
struct domain_iommu *hd = dom_iommu(d);
unsigned long max_gfn =
PFN_DOWN((1ul << DEFAULT_DOMAIN_ADDRESS_WIDTH) - 1);
unsigned int level = amd_iommu_get_paging_mode(max_gfn);
struct amd_iommu_pte *table;
if ( hd->arch.root_table )
{
ASSERT_UNREACHABLE();
return 0;
}
spin_lock(&hd->arch.mapping_lock);
hd->arch.root_table = alloc_amd_iommu_pgtable();
if ( !hd->arch.root_table )
goto out;
table = __map_domain_page(hd->arch.root_table);
while ( level )
{
struct page_info *pg;
unsigned int i;
/*
* The pgtable allocator is fine for the leaf page, as well as
* page table pages, and the resulting allocations are always
* zeroed.
*/
pg = alloc_amd_iommu_pgtable();
if ( !pg )
break;
for ( i = 0; i < PTE_PER_TABLE_SIZE; i++ )
{
struct amd_iommu_pte *pde = &table[i];
/*
* PDEs are essentially a subset of PTEs, so this function
* is fine to use even at the leaf.
*/
set_iommu_pde_present(pde, mfn_x(page_to_mfn(pg)), level - 1,
false, true);
}
unmap_domain_page(table);
table = __map_domain_page(pg);
level--;
}
unmap_domain_page(table);
out:
spin_unlock(&hd->arch.mapping_lock);
amd_iommu_flush_all_pages(d);
/* Pages leaked in failure case */
return level ? -ENOMEM : 0;
}
/*
* Local variables:
* mode: C
......
......@@ -95,10 +95,6 @@ static void amd_iommu_setup_domain_device(
u8 bus = pdev->bus;
const struct domain_iommu *hd = dom_iommu(domain);
/* dom_io is used as a sentinel for quarantined devices */
if ( domain == dom_io )
return;
BUG_ON( !hd->arch.root_table || !hd->arch.paging_mode ||
!iommu->dev_table.buffer );
......@@ -235,7 +231,7 @@ static int __must_check allocate_domain_resources(struct domain_iommu *hd)
return rc;
}
static int get_paging_mode(unsigned long entries)
int amd_iommu_get_paging_mode(unsigned long entries)
{
int level = 1;
......@@ -257,7 +253,8 @@ static int amd_iommu_domain_init(struct domain *d)
/* For pv and dom0, stick with get_paging_mode(max_page)
* For HVM dom0, use 2 level page table at first */
hd->arch.paging_mode = is_hvm_domain(d) ? 2 : get_paging_mode(max_page);
hd->arch.paging_mode = is_hvm_domain(d) ?
2 : amd_iommu_get_paging_mode(max_page);
return 0;
}
......@@ -290,10 +287,6 @@ static void amd_iommu_disable_domain_device(const struct domain *domain,
int req_id;
u8 bus = pdev->bus;
/* dom_io is used as a sentinel for quarantined devices */
if ( domain == dom_io )
return;
BUG_ON ( iommu->dev_table.buffer == NULL );
req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn));
table = iommu->dev_table.buffer;
......@@ -632,6 +625,7 @@ static void amd_dump_p2m_table(struct domain *d)
static const struct iommu_ops __initconstrel _iommu_ops = {
.init = amd_iommu_domain_init,
.hwdom_init = amd_iommu_hwdom_init,
.quarantine_init = amd_iommu_quarantine_init,
.add_device = amd_iommu_add_device,
.remove_device = amd_iommu_remove_device,
.assign_device = amd_iommu_assign_device,
......
......@@ -443,6 +443,23 @@ int iommu_iotlb_flush_all(struct domain *d, unsigned int flush_flags)
return rc;
}
static int __init iommu_quarantine_init(void)
{
const struct domain_iommu *hd = dom_iommu(dom_io);
int rc;
dom_io->options |= XEN_DOMCTL_CDF_iommu;
rc = iommu_domain_init(dom_io, 0);
if ( rc )
return rc;
if ( !hd->platform_ops->quarantine_init )
return 0;
return hd->platform_ops->quarantine_init(dom_io);
}
int __init iommu_setup(void)
{
int rc = -ENODEV;
......@@ -476,8 +493,7 @@ int __init iommu_setup(void)
}
else
{
dom_io->options |= XEN_DOMCTL_CDF_iommu;
if ( iommu_domain_init(dom_io, 0) )
if ( iommu_quarantine_init() )
panic("Could not set up quarantine\n");
printk(" - Dom0 mode: %s\n",
......
......@@ -1291,10 +1291,6 @@ int domain_context_mapping_one(
int agaw, rc, ret;
bool_t flush_dev_iotlb;
/* dom_io is used as a sentinel for quarantined devices */
if ( domain == dom_io )
return 0;
ASSERT(pcidevs_locked());
spin_lock(&iommu->lock);
maddr = bus_to_context_maddr(iommu, bus);
......@@ -1541,10 +1537,6 @@ int domain_context_unmap_one(
int iommu_domid, rc, ret;
bool_t flush_dev_iotlb;
/* dom_io is used as a sentinel for quarantined devices */
if ( domain == dom_io )
return 0;
ASSERT(pcidevs_locked());
spin_lock(&iommu->lock);
......@@ -1677,10 +1669,6 @@ static int domain_context_unmap(struct domain *domain, u8 devfn,
goto out;
}
/* dom_io is used as a sentinel for quarantined devices */
if ( domain == dom_io )
goto out;
/*
* if no other devices under the same iommu owned by this domain,
* clear iommu in iommu_bitmap and clear domain_id in domid_bitmp
......@@ -2683,9 +2671,69 @@ static void vtd_dump_p2m_table(struct domain *d)
vtd_dump_p2m_table_level(hd->arch.pgd_maddr, agaw_to_level(hd->arch.agaw), 0, 0);
}
static int __init intel_iommu_quarantine_init(struct domain *d)
{
struct domain_iommu *hd = dom_iommu(d);
struct dma_pte *parent;
unsigned int agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
unsigned int level = agaw_to_level(agaw);
int rc;
if ( hd->arch.pgd_maddr )
{
ASSERT_UNREACHABLE();
return 0;
}
spin_lock(&hd->arch.mapping_lock);
hd->arch.pgd_maddr = alloc_pgtable_maddr(1, hd->node);
if ( !hd->arch.pgd_maddr )
goto out;
parent = map_vtd_domain_page(hd->arch.pgd_maddr);
while ( level )
{
uint64_t maddr;
unsigned int offset;
/*
* The pgtable allocator is fine for the leaf page, as well as
* page table pages, and the resulting allocations are always
* zeroed.
*/
maddr = alloc_pgtable_maddr(1, hd->node);
if ( !maddr )
break;
for ( offset = 0; offset < PTE_NUM; offset++ )
{
struct dma_pte *pte = &parent[offset];
dma_set_pte_addr(*pte, maddr);
dma_set_pte_readable(*pte);
}
iommu_flush_cache_page(parent, 1);
unmap_vtd_domain_page(parent);
parent = map_vtd_domain_page(maddr);
level--;
}
unmap_vtd_domain_page(parent);
out:
spin_unlock(&hd->arch.mapping_lock);
rc = iommu_flush_iotlb_all(d);
/* Pages leaked in failure case */
return level ? -ENOMEM : rc;
}
const struct iommu_ops __initconstrel intel_iommu_ops = {
.init = intel_iommu_domain_init,
.hwdom_init = intel_iommu_hwdom_init,
.quarantine_init = intel_iommu_quarantine_init,
.add_device = intel_iommu_add_device,
.enable_device = intel_iommu_enable_device,
.remove_device = intel_iommu_remove_device,
......
......@@ -54,6 +54,9 @@ int amd_iommu_init_late(void);
int amd_iommu_update_ivrs_mapping_acpi(void);
int iov_adjust_irq_affinities(void);
int amd_iommu_get_paging_mode(unsigned long entries);
int amd_iommu_quarantine_init(struct domain *d);
/* mapping functions */
int __must_check amd_iommu_map_page(struct domain *d, dfn_t dfn,
mfn_t mfn, unsigned int flags,
......
......@@ -211,6 +211,7 @@ typedef int iommu_grdm_t(xen_pfn_t start, xen_ulong_t nr, u32 id, void *ctxt);
struct iommu_ops {
int (*init)(struct domain *d);
void (*hwdom_init)(struct domain *d);
int (*quarantine_init)(struct domain *d);
int (*add_device)(u8 devfn, device_t *dev);
int (*enable_device)(device_t *dev);
int (*remove_device)(u8 devfn, device_t *dev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment