Commit 4067bbfa authored by Jan Beulich's avatar Jan Beulich

VT-d: avoid PCI device lookup

The two uses of pci_get_pdev_by_domain() lack proper locking, but are
also only used to get hold of a NUMA node ID. Calculate and store the
node ID earlier on and remove the lookups (in lieu of fixing the
locking).

While doing this it became apparent that iommu_alloc()'s use of
alloc_pgtable_maddr() would occur before RHSAs would have been parsed:
iommu_alloc() gets called from the DRHD parsing routine, which - on
spec conforming platforms - happens strictly before RHSA parsing. Defer
the allocation until after all ACPI table parsing has finished,
established the node ID there first.
Suggested-by: default avatarKevin Tian <kevin.tian@intel.com>
Signed-off-by: default avatarJan Beulich <jbeulich@suse.com>
Reviewed-by: default avatarKevin Tian <kevin.tian@intel.com>
parent e03eb145
......@@ -151,6 +151,10 @@ int iommu_domain_init(struct domain *d)
struct domain_iommu *hd = dom_iommu(d);
int ret = 0;
#ifdef CONFIG_NUMA
hd->node = NUMA_NO_NODE;
#endif
ret = arch_iommu_domain_init(d);
if ( ret )
return ret;
......
......@@ -965,6 +965,7 @@ int __init acpi_dmar_init(void)
{
acpi_physical_address dmar_addr;
acpi_native_uint dmar_len;
const struct acpi_drhd_unit *drhd;
int ret;
if ( ACPI_SUCCESS(acpi_get_table_phys(ACPI_SIG_DMAR, 0,
......@@ -978,6 +979,21 @@ int __init acpi_dmar_init(void)
ret = parse_dmar_table(acpi_parse_dmar);
for_each_drhd_unit ( drhd )
{
const struct acpi_rhsa_unit *rhsa = drhd_to_rhsa(drhd);
struct iommu *iommu = drhd->iommu;
if ( ret )
break;
if ( rhsa )
iommu->node = pxm_to_node(rhsa->proximity_domain);
if ( !(iommu->root_maddr = alloc_pgtable_maddr(1, iommu->node)) )
ret = -ENOMEM;
}
if ( !ret )
{
iommu_init_ops = &intel_iommu_init_ops;
......
......@@ -73,7 +73,7 @@ unsigned int get_cache_line_size(void);
void cacheline_flush(char *);
void flush_all_cache(void);
u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages);
uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
void free_pgtable_maddr(u64 maddr);
void *map_vtd_domain_page(u64 maddr);
void unmap_vtd_domain_page(void *va);
......
......@@ -795,8 +795,8 @@ int enable_intremap(struct iommu *iommu, int eim)
if ( ir_ctrl->iremap_maddr == 0 )
{
ir_ctrl->iremap_maddr = alloc_pgtable_maddr(iommu->intel->drhd,
IREMAP_ARCH_PAGE_NR);
ir_ctrl->iremap_maddr = alloc_pgtable_maddr(IREMAP_ARCH_PAGE_NR,
iommu->node);
if ( ir_ctrl->iremap_maddr == 0 )
{
dprintk(XENLOG_WARNING VTDPREFIX,
......
......@@ -184,18 +184,12 @@ void iommu_flush_cache_page(void *addr, unsigned long npages)
}
/* Allocate page table, return its machine address */
u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages)
uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node)
{
struct acpi_rhsa_unit *rhsa;
struct page_info *pg, *cur_pg;
u64 *vaddr;
nodeid_t node = NUMA_NO_NODE;
unsigned int i;
rhsa = drhd_to_rhsa(drhd);
if ( rhsa )
node = pxm_to_node(rhsa->proximity_domain);
pg = alloc_domheap_pages(NULL, get_order_from_pages(npages),
(node == NUMA_NO_NODE) ? 0 : MEMF_node(node));
if ( !pg )
......@@ -232,7 +226,7 @@ static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
root = &root_entries[bus];
if ( !root_present(*root) )
{
maddr = alloc_pgtable_maddr(iommu->intel->drhd, 1);
maddr = alloc_pgtable_maddr(1, iommu->node);
if ( maddr == 0 )
{
unmap_vtd_domain_page(root_entries);
......@@ -249,8 +243,6 @@ static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc)
{
struct acpi_drhd_unit *drhd;
struct pci_dev *pdev;
struct domain_iommu *hd = dom_iommu(domain);
int addr_width = agaw_to_width(hd->arch.agaw);
struct dma_pte *parent, *pte = NULL;
......@@ -260,17 +252,10 @@ static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc)
addr &= (((u64)1) << addr_width) - 1;
ASSERT(spin_is_locked(&hd->arch.mapping_lock));
if ( hd->arch.pgd_maddr == 0 )
{
/*
* just get any passthrough device in the domainr - assume user
* assigns only devices from same node to a given guest.
*/
pdev = pci_get_pdev_by_domain(domain, -1, -1, -1);
drhd = acpi_find_matched_drhd_unit(pdev);
if ( !alloc || ((hd->arch.pgd_maddr = alloc_pgtable_maddr(drhd, 1)) == 0) )
goto out;
}
if ( !hd->arch.pgd_maddr &&
(!alloc ||
((hd->arch.pgd_maddr = alloc_pgtable_maddr(1, hd->node)) == 0)) )
goto out;
parent = (struct dma_pte *)map_vtd_domain_page(hd->arch.pgd_maddr);
while ( level > 1 )
......@@ -284,9 +269,7 @@ static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc)
if ( !alloc )
break;
pdev = pci_get_pdev_by_domain(domain, -1, -1, -1);
drhd = acpi_find_matched_drhd_unit(pdev);
pte_maddr = alloc_pgtable_maddr(drhd, 1);
pte_maddr = alloc_pgtable_maddr(1, hd->node);
if ( !pte_maddr )
break;
......@@ -1181,6 +1164,7 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd)
return -ENOMEM;
iommu->msi.irq = -1; /* No irq assigned yet. */
iommu->node = NUMA_NO_NODE;
INIT_LIST_HEAD(&iommu->ats_devices);
iommu->intel = alloc_intel_iommu();
......@@ -1192,9 +1176,6 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd)
iommu->intel->drhd = drhd;
drhd->iommu = iommu;
if ( !(iommu->root_maddr = alloc_pgtable_maddr(drhd, 1)) )
return -ENOMEM;
iommu->reg = ioremap(drhd->address, PAGE_SIZE);
if ( !iommu->reg )
return -ENOMEM;
......@@ -1488,6 +1469,17 @@ static int domain_context_mapping(struct domain *domain, u8 devfn,
if ( !drhd )
return -ENODEV;
/*
* Generally we assume only devices from one node to get assigned to a
* given guest. But even if not, by replacing the prior value here we
* guarantee that at least some basic allocations for the device being
* added will get done against its node. Any further allocations for
* this or other devices may be penalized then, but some would also be
* if we left other than NUMA_NO_NODE untouched here.
*/
if ( drhd->iommu->node != NUMA_NO_NODE )
dom_iommu(domain)->node = drhd->iommu->node;
ASSERT(pcidevs_locked());
switch ( pdev->type )
......
......@@ -542,6 +542,7 @@ struct iommu {
spinlock_t lock; /* protect context, domain ids */
spinlock_t register_lock; /* protect iommu register handling */
u64 root_maddr; /* root entry machine address */
nodeid_t node;
struct msi_desc msi;
struct intel_iommu *intel;
struct list_head ats_devices;
......
......@@ -415,8 +415,8 @@ int enable_qinval(struct iommu *iommu)
if ( qi_ctrl->qinval_maddr == 0 )
{
qi_ctrl->qinval_maddr = alloc_pgtable_maddr(iommu->intel->drhd,
QINVAL_ARCH_PAGE_NR);
qi_ctrl->qinval_maddr = alloc_pgtable_maddr(QINVAL_ARCH_PAGE_NR,
iommu->node);
if ( qi_ctrl->qinval_maddr == 0 )
{
dprintk(XENLOG_WARNING VTDPREFIX,
......
......@@ -266,6 +266,11 @@ struct domain_iommu {
struct list_head dt_devices;
#endif
#ifdef CONFIG_NUMA
/* NUMA node to do IOMMU related allocations against. */
nodeid_t node;
#endif
/* Features supported by the IOMMU */
DECLARE_BITMAP(features, IOMMU_FEAT_count);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment