Commit 95684381 authored by Jan Beulich's avatar Jan Beulich Committed by Ian Jackson

introduce XENMEM_reserved_device_memory_map

This is a prerequisite for punching holes into HVM and PVH guests' P2M
to allow passing through devices that are associated with (on VT-d)
RMRRs.
Signed-off-by: default avatarJan Beulich <jbeulich@suse.com>
Signed-off-by: default avatarTiejun Chen <tiejun.chen@intel.com>
Acked-by: default avatarKevin Tian <kevin.tian@intel.com>
Acked-by: default avatarIan Campbell <ian.campbell@citrix.com>
---
v12a: Move interface structure union member to the end, while moving
     the whole public header block into a __XEN__ / __XEN_TOOLS__
     conditional block.
v12: Restore changes as much as possible to my original version, fixing
     a few issues that got introduced after handing it over. Unionize
     new public memop interface structure to allow for non-PCI to be
     supported later on. Check flags to have all currently undefined
     flags clear. Refine adjustments to xen/pci.h.
parent 10f086ec
......@@ -17,6 +17,42 @@ CHECK_TYPE(domid);
CHECK_mem_access_op;
CHECK_vmemrange;
#ifdef HAS_PASSTHROUGH
struct get_reserved_device_memory {
struct compat_reserved_device_memory_map map;
unsigned int used_entries;
};
static int get_reserved_device_memory(xen_pfn_t start, xen_ulong_t nr,
u32 id, void *ctxt)
{
struct get_reserved_device_memory *grdm = ctxt;
u32 sbdf = PCI_SBDF3(grdm->map.dev.pci.seg, grdm->map.dev.pci.bus,
grdm->map.dev.pci.devfn);
if ( !(grdm->map.flags & XENMEM_RDM_ALL) && (sbdf != id) )
return 0;
if ( grdm->used_entries < grdm->map.nr_entries )
{
struct compat_reserved_device_memory rdm = {
.start_pfn = start, .nr_pages = nr
};
if ( rdm.start_pfn != start || rdm.nr_pages != nr )
return -ERANGE;
if ( __copy_to_compat_offset(grdm->map.buffer, grdm->used_entries,
&rdm, 1) )
return -EFAULT;
}
++grdm->used_entries;
return 1;
}
#endif
int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) compat)
{
int split, op = cmd & MEMOP_CMD_MASK;
......@@ -303,6 +339,35 @@ int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) compat)
break;
}
#ifdef HAS_PASSTHROUGH
case XENMEM_reserved_device_memory_map:
{
struct get_reserved_device_memory grdm;
if ( unlikely(start_extent) )
return -ENOSYS;
if ( copy_from_guest(&grdm.map, compat, 1) ||
!compat_handle_okay(grdm.map.buffer, grdm.map.nr_entries) )
return -EFAULT;
if ( grdm.map.flags & ~XENMEM_RDM_ALL )
return -EINVAL;
grdm.used_entries = 0;
rc = iommu_get_reserved_device_memory(get_reserved_device_memory,
&grdm);
if ( !rc && grdm.map.nr_entries < grdm.used_entries )
rc = -ENOBUFS;
grdm.map.nr_entries = grdm.used_entries;
if ( __copy_to_guest(compat, &grdm.map, 1) )
rc = -EFAULT;
return rc;
}
#endif
default:
return compat_arch_memory_op(cmd, compat);
}
......
......@@ -748,6 +748,39 @@ static int construct_memop_from_reservation(
return 0;
}
#ifdef HAS_PASSTHROUGH
struct get_reserved_device_memory {
struct xen_reserved_device_memory_map map;
unsigned int used_entries;
};
static int get_reserved_device_memory(xen_pfn_t start, xen_ulong_t nr,
u32 id, void *ctxt)
{
struct get_reserved_device_memory *grdm = ctxt;
u32 sbdf = PCI_SBDF3(grdm->map.dev.pci.seg, grdm->map.dev.pci.bus,
grdm->map.dev.pci.devfn);
if ( !(grdm->map.flags & XENMEM_RDM_ALL) && (sbdf != id) )
return 0;
if ( grdm->used_entries < grdm->map.nr_entries )
{
struct xen_reserved_device_memory rdm = {
.start_pfn = start, .nr_pages = nr
};
if ( __copy_to_guest_offset(grdm->map.buffer, grdm->used_entries,
&rdm, 1) )
return -EFAULT;
}
++grdm->used_entries;
return 1;
}
#endif
long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
{
struct domain *d;
......@@ -1162,6 +1195,35 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
break;
}
#ifdef HAS_PASSTHROUGH
case XENMEM_reserved_device_memory_map:
{
struct get_reserved_device_memory grdm;
if ( unlikely(start_extent) )
return -ENOSYS;
if ( copy_from_guest(&grdm.map, arg, 1) ||
!guest_handle_okay(grdm.map.buffer, grdm.map.nr_entries) )
return -EFAULT;
if ( grdm.map.flags & ~XENMEM_RDM_ALL )
return -EINVAL;
grdm.used_entries = 0;
rc = iommu_get_reserved_device_memory(get_reserved_device_memory,
&grdm);
if ( !rc && grdm.map.nr_entries < grdm.used_entries )
rc = -ENOBUFS;
grdm.map.nr_entries = grdm.used_entries;
if ( __copy_to_guest(arg, &grdm.map, 1) )
rc = -EFAULT;
break;
}
#endif
default:
rc = arch_memory_op(cmd, arg);
break;
......
......@@ -375,6 +375,16 @@ void iommu_crash_shutdown(void)
iommu_enabled = iommu_intremap = 0;
}
int iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt)
{
const struct iommu_ops *ops = iommu_get_ops();
if ( !iommu_enabled || !ops->get_reserved_device_memory )
return 0;
return ops->get_reserved_device_memory(func, ctxt);
}
bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature)
{
const struct hvm_iommu *hd = domain_hvm_iommu(d);
......
......@@ -914,3 +914,30 @@ int platform_supports_x2apic(void)
unsigned int mask = ACPI_DMAR_INTR_REMAP | ACPI_DMAR_X2APIC_OPT_OUT;
return cpu_has_x2apic && ((dmar_flags & mask) == ACPI_DMAR_INTR_REMAP);
}
int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt)
{
struct acpi_rmrr_unit *rmrr, *rmrr_cur = NULL;
unsigned int i;
u16 bdf;
for_each_rmrr_device ( rmrr, bdf, i )
{
int rc;
if ( rmrr == rmrr_cur )
continue;
rc = func(PFN_DOWN(rmrr->base_address),
PFN_UP(rmrr->end_address) - PFN_DOWN(rmrr->base_address),
PCI_SBDF2(rmrr->segment, bdf), ctxt);
if ( unlikely(rc < 0) )
return rc;
if ( rc )
rmrr_cur = rmrr;
}
return 0;
}
......@@ -75,6 +75,7 @@ int domain_context_mapping_one(struct domain *domain, struct iommu *iommu,
u8 bus, u8 devfn, const struct pci_dev *);
int domain_context_unmap_one(struct domain *domain, struct iommu *iommu,
u8 bus, u8 devfn);
int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt);
unsigned int io_apic_read_remap_rte(unsigned int apic, unsigned int reg);
void io_apic_write_remap_rte(unsigned int apic,
......
......@@ -2490,6 +2490,7 @@ const struct iommu_ops intel_iommu_ops = {
.crash_shutdown = vtd_crash_shutdown,
.iotlb_flush = intel_iommu_iotlb_flush,
.iotlb_flush_all = intel_iommu_iotlb_flush_all,
.get_reserved_device_memory = intel_iommu_get_reserved_device_memory,
.dump_p2m_table = vtd_dump_p2m_table,
};
......
......@@ -28,6 +28,7 @@
#define __XEN_PUBLIC_MEMORY_H__
#include "xen.h"
#include "physdev.h"
/*
* Increase or decrease the specified domain's memory reservation. Returns the
......@@ -522,6 +523,40 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
* The zero value is appropiate.
*/
/*
* With some legacy devices, certain guest-physical addresses cannot safely
* be used for other purposes, e.g. to map guest RAM. This hypercall
* enumerates those regions so the toolstack can avoid using them.
*/
#define XENMEM_reserved_device_memory_map 27
struct xen_reserved_device_memory {
xen_pfn_t start_pfn;
xen_ulong_t nr_pages;
};
typedef struct xen_reserved_device_memory xen_reserved_device_memory_t;
DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_t);
struct xen_reserved_device_memory_map {
#define XENMEM_RDM_ALL 1 /* Request all regions (ignore dev union). */
/* IN */
uint32_t flags;
/*
* IN/OUT
*
* Gets set to the required number of entries when too low,
* signaled by error code -ERANGE.
*/
unsigned int nr_entries;
/* OUT */
XEN_GUEST_HANDLE(xen_reserved_device_memory_t) buffer;
/* IN */
union {
struct physdev_pci_device pci;
} dev;
};
typedef struct xen_reserved_device_memory_map xen_reserved_device_memory_map_t;
DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_map_t);
#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
/*
......@@ -573,7 +608,7 @@ struct xen_vnuma_topology_info {
typedef struct xen_vnuma_topology_info xen_vnuma_topology_info_t;
DEFINE_XEN_GUEST_HANDLE(xen_vnuma_topology_info_t);
/* Next available subop number is 27 */
/* Next available subop number is 28 */
#endif /* __XEN_PUBLIC_MEMORY_H__ */
......
......@@ -125,6 +125,14 @@ int iommu_do_dt_domctl(struct xen_domctl *, struct domain *,
struct page_info;
/*
* Any non-zero value returned from callbacks of this type will cause the
* function the callback was handed to terminate its iteration. Assigning
* meaning of these non-zero values is left to the top level caller /
* callback pair.
*/
typedef int iommu_grdm_t(xen_pfn_t start, xen_ulong_t nr, u32 id, void *ctxt);
struct iommu_ops {
int (*init)(struct domain *d);
void (*hwdom_init)(struct domain *d);
......@@ -156,12 +164,14 @@ struct iommu_ops {
void (*crash_shutdown)(void);
void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count);
void (*iotlb_flush_all)(struct domain *d);
int (*get_reserved_device_memory)(iommu_grdm_t *, void *);
void (*dump_p2m_table)(struct domain *d);
};
void iommu_suspend(void);
void iommu_resume(void);
void iommu_crash_shutdown(void);
int iommu_get_reserved_device_memory(iommu_grdm_t *, void *);
void iommu_share_p2m_table(struct domain *d);
......
......@@ -26,6 +26,7 @@
* 7:3 = slot
* 2:0 = function
*/
#define PCI_SEG(sbdf) (((sbdf) >> 16) & 0xffff)
#define PCI_BUS(bdf) (((bdf) >> 8) & 0xff)
#define PCI_SLOT(bdf) (((bdf) >> 3) & 0x1f)
#define PCI_FUNC(bdf) ((bdf) & 0x07)
......@@ -33,6 +34,9 @@
#define PCI_DEVFN2(bdf) ((bdf) & 0xff)
#define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
#define PCI_BDF2(b,df) ((((b) & 0xff) << 8) | ((df) & 0xff))
#define PCI_SBDF(s,b,d,f) ((((s) & 0xffff) << 16) | PCI_BDF(b,d,f))
#define PCI_SBDF2(s,bdf) ((((s) & 0xffff) << 16) | ((bdf) & 0xffff))
#define PCI_SBDF3(s,b,df) ((((s) & 0xffff) << 16) | PCI_BDF2(b, df))
struct pci_dev_info {
bool_t is_extfn;
......
......@@ -65,9 +65,10 @@
! memory_exchange memory.h
! memory_map memory.h
! memory_reservation memory.h
? mem_access_op memory.h
? mem_access_op memory.h
! pod_target memory.h
! remove_from_physmap memory.h
! reserved_device_memory_map memory.h
? vmemrange memory.h
! vnuma_topology_info memory.h
? physdev_eoi physdev.h
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment