Commit a1873c62 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge branch 'drm-next-4.10' of git://people.freedesktop.org/~agd5f/linux into drm-next

First new feature pull for 4.10.  Highlights:
- Support for multple virtual displays in the virtual dce component
- New VM mgr to support non-contiguous vram buffers
- Support for UVD powergating on additional asics
- Power management improvements
- lots of code cleanup and bug fixes

* 'drm-next-4.10' of git://people.freedesktop.org/~agd5f/linux: (107 commits)
  drm/amdgpu: turn on/off uvd clock when dpm enable/disable on CI
  drm/amdgpu: disable dpm before turn off clock when vce idle.
  drm/amdgpu: enable uvd bypass mode for CI/VI.
  drm/amdgpu: just not load smc firmware if smu is already running
  drm/amdgpu: when suspend, set boot state instand of disable dpm.
  drm/amdgpu: use failed label to handle context init failure
  drm/amdgpu: add amdgpu_ttm_bo_eviction_valuable callback
  drm/ttm: make eviction decision a driver callback v2
  drm/ttm: fix coding style in ttm_bo_driver.h
  drm/radeon/pm: autoswitch power state when in balanced mode
  drm/amd/powerplay: fix spelling mistake and add KERN_WARNING to printks
  drm/amdgpu:new ids flag for preempt
  drm/amdgpu: mark symbols static where possible
  drm/amdgpu: change function declarations and add missing header dependencies
  drm/amdgpu: s/amdgpuCrtc/amdgpu_crtc/ in pageflip code
  drm/amdgpu/atom: remove a bunch of unused functions
  drm/amdgpu: consolidate atom scratch reg handling for hangs
  drm/amdgpu: use amdgpu_bo_[create|free]_kernel for wb
  drm/amdgpu: add VCE VM session tracking
  drm/amdgpu: improve parse_cs handling a bit
  ...
parents 5481e27f 3495a103
......@@ -27,6 +27,7 @@
int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg);
void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
unsigned amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh);
void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
unsigned max_sh);
#endif
......@@ -168,6 +168,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man,
return -ENOMEM;
node->start = AMDGPU_BO_INVALID_OFFSET;
node->size = mem->num_pages;
mem->mm_node = node;
if (place->fpfn || place->lpfn || place->flags & TTM_PL_FLAG_TOPDOWN) {
......
......@@ -152,8 +152,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
return -EINVAL;
}
alloc_size = amdgpu_ring_get_dma_frame_size(ring) +
num_ibs * amdgpu_ring_get_emit_ib_size(ring);
alloc_size = ring->funcs->emit_frame_size + num_ibs *
ring->funcs->emit_ib_size;
r = amdgpu_ring_alloc(ring, alloc_size);
if (r) {
......@@ -161,7 +161,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
return r;
}
if (ring->type == AMDGPU_RING_TYPE_SDMA && ring->funcs->init_cond_exec)
if (ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
if (vm) {
......
......@@ -306,10 +306,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
}
for (i = 0; i < adev->num_ip_blocks; i++) {
if (adev->ip_blocks[i].type == type &&
adev->ip_block_status[i].valid) {
ip.hw_ip_version_major = adev->ip_blocks[i].major;
ip.hw_ip_version_minor = adev->ip_blocks[i].minor;
if (adev->ip_blocks[i].version->type == type &&
adev->ip_blocks[i].status.valid) {
ip.hw_ip_version_major = adev->ip_blocks[i].version->major;
ip.hw_ip_version_minor = adev->ip_blocks[i].version->minor;
ip.capabilities_flags = 0;
ip.available_rings = ring_mask;
ip.ib_start_alignment = ib_start_alignment;
......@@ -345,8 +345,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
}
for (i = 0; i < adev->num_ip_blocks; i++)
if (adev->ip_blocks[i].type == type &&
adev->ip_block_status[i].valid &&
if (adev->ip_blocks[i].version->type == type &&
adev->ip_blocks[i].status.valid &&
count < AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
count++;
......@@ -411,6 +411,36 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
return copy_to_user(out, &vram_gtt,
min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
}
case AMDGPU_INFO_MEMORY: {
struct drm_amdgpu_memory_info mem;
memset(&mem, 0, sizeof(mem));
mem.vram.total_heap_size = adev->mc.real_vram_size;
mem.vram.usable_heap_size =
adev->mc.real_vram_size - adev->vram_pin_size;
mem.vram.heap_usage = atomic64_read(&adev->vram_usage);
mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
mem.cpu_accessible_vram.total_heap_size =
adev->mc.visible_vram_size;
mem.cpu_accessible_vram.usable_heap_size =
adev->mc.visible_vram_size -
(adev->vram_pin_size - adev->invisible_pin_size);
mem.cpu_accessible_vram.heap_usage =
atomic64_read(&adev->vram_vis_usage);
mem.cpu_accessible_vram.max_allocation =
mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
mem.gtt.total_heap_size = adev->mc.gtt_size;
mem.gtt.usable_heap_size =
adev->mc.gtt_size - adev->gart_pin_size;
mem.gtt.heap_usage = atomic64_read(&adev->gtt_usage);
mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4;
return copy_to_user(out, &mem,
min((size_t)size, sizeof(mem)))
? -EFAULT : 0;
}
case AMDGPU_INFO_READ_MMR_REG: {
unsigned n, alloc_size;
uint32_t *regs;
......@@ -475,6 +505,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
dev_info.ids_flags = 0;
if (adev->flags & AMD_IS_APU)
dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION;
if (amdgpu_sriov_vf(adev))
dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
......@@ -494,6 +526,24 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
return copy_to_user(out, &dev_info,
min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0;
}
case AMDGPU_INFO_VCE_CLOCK_TABLE: {
unsigned i;
struct drm_amdgpu_info_vce_clock_table vce_clk_table = {};
struct amd_vce_state *vce_state;
for (i = 0; i < AMDGPU_VCE_CLOCK_TABLE_ENTRIES; i++) {
vce_state = amdgpu_dpm_get_vce_clock_state(adev, i);
if (vce_state) {
vce_clk_table.entries[i].sclk = vce_state->sclk;
vce_clk_table.entries[i].mclk = vce_state->mclk;
vce_clk_table.entries[i].eclk = vce_state->evclk;
vce_clk_table.num_valid_entries++;
}
}
return copy_to_user(out, &vce_clk_table,
min((size_t)size, sizeof(vce_clk_table))) ? -EFAULT : 0;
}
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL;
......
......@@ -285,7 +285,7 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
unsigned long end = addr + amdgpu_bo_size(bo) - 1;
struct amdgpu_device *adev = bo->adev;
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_mn *rmn;
struct amdgpu_mn_node *node = NULL;
struct list_head bos;
......@@ -340,7 +340,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
*/
void amdgpu_mn_unregister(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = bo->adev;
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_mn *rmn;
struct list_head *head;
......
......@@ -341,8 +341,6 @@ struct amdgpu_mode_info {
int num_dig; /* number of dig blocks */
int disp_priority;
const struct amdgpu_display_funcs *funcs;
struct hrtimer vblank_timer;
enum amdgpu_interrupt_state vsync_timer_enabled;
};
#define AMDGPU_MAX_BL_LEVEL 0xFF
......@@ -413,6 +411,9 @@ struct amdgpu_crtc {
u32 wm_high;
u32 lb_vblank_lead_lines;
struct drm_display_mode hw_mode;
/* for virtual dce */
struct hrtimer vblank_timer;
enum amdgpu_interrupt_state vsync_timer_enabled;
};
struct amdgpu_encoder_atom_dig {
......
......@@ -88,18 +88,19 @@ static void amdgpu_update_memory_usage(struct amdgpu_device *adev,
static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
struct amdgpu_bo *bo;
bo = container_of(tbo, struct amdgpu_bo, tbo);
amdgpu_update_memory_usage(bo->adev, &bo->tbo.mem, NULL);
amdgpu_update_memory_usage(adev, &bo->tbo.mem, NULL);
drm_gem_object_release(&bo->gem_base);
amdgpu_bo_unref(&bo->parent);
if (!list_empty(&bo->shadow_list)) {
mutex_lock(&bo->adev->shadow_list_lock);
mutex_lock(&adev->shadow_list_lock);
list_del_init(&bo->shadow_list);
mutex_unlock(&bo->adev->shadow_list_lock);
mutex_unlock(&adev->shadow_list_lock);
}
kfree(bo->metadata);
kfree(bo);
......@@ -121,12 +122,17 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
unsigned lpfn = 0;
/* This forces a reallocation if the flag wasn't set before */
if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
lpfn = adev->mc.real_vram_size >> PAGE_SHIFT;
if (flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS &&
!(flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
adev->mc.visible_vram_size < adev->mc.real_vram_size) {
places[c].fpfn = visible_pfn;
places[c].lpfn = 0;
places[c].lpfn = lpfn;
places[c].flags = TTM_PL_FLAG_WC |
TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM |
TTM_PL_FLAG_TOPDOWN;
......@@ -134,7 +140,7 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
}
places[c].fpfn = 0;
places[c].lpfn = 0;
places[c].lpfn = lpfn;
places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
TTM_PL_FLAG_VRAM;
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
......@@ -205,8 +211,10 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
{
amdgpu_ttm_placement_init(abo->adev, &abo->placement,
abo->placements, domain, abo->flags);
struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
amdgpu_ttm_placement_init(adev, &abo->placement, abo->placements,
domain, abo->flags);
}
static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo,
......@@ -245,7 +253,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
int r;
r = amdgpu_bo_create(adev, size, align, true, domain,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
NULL, NULL, bo_ptr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r);
......@@ -351,7 +360,6 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
kfree(bo);
return r;
}
bo->adev = adev;
INIT_LIST_HEAD(&bo->shadow_list);
INIT_LIST_HEAD(&bo->va);
bo->prefered_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
......@@ -616,6 +624,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
u64 min_offset, u64 max_offset,
u64 *gpu_addr)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
int r, i;
unsigned fpfn, lpfn;
......@@ -643,18 +652,20 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
return 0;
}
bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
amdgpu_ttm_placement_from_domain(bo, domain);
for (i = 0; i < bo->placement.num_placement; i++) {
/* force to pin into visible video ram */
if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
!(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) &&
(!max_offset || max_offset >
bo->adev->mc.visible_vram_size)) {
adev->mc.visible_vram_size)) {
if (WARN_ON_ONCE(min_offset >
bo->adev->mc.visible_vram_size))
adev->mc.visible_vram_size))
return -EINVAL;
fpfn = min_offset >> PAGE_SHIFT;
lpfn = bo->adev->mc.visible_vram_size >> PAGE_SHIFT;
lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
} else {
fpfn = min_offset >> PAGE_SHIFT;
lpfn = max_offset >> PAGE_SHIFT;
......@@ -669,12 +680,12 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
if (unlikely(r)) {
dev_err(bo->adev->dev, "%p pin failed\n", bo);
dev_err(adev->dev, "%p pin failed\n", bo);
goto error;
}
r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem);
if (unlikely(r)) {
dev_err(bo->adev->dev, "%p bind failed\n", bo);
dev_err(adev->dev, "%p bind failed\n", bo);
goto error;
}
......@@ -682,11 +693,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
if (gpu_addr != NULL)
*gpu_addr = amdgpu_bo_gpu_offset(bo);
if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
bo->adev->vram_pin_size += amdgpu_bo_size(bo);
adev->vram_pin_size += amdgpu_bo_size(bo);
if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
bo->adev->invisible_pin_size += amdgpu_bo_size(bo);
adev->invisible_pin_size += amdgpu_bo_size(bo);
} else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
bo->adev->gart_pin_size += amdgpu_bo_size(bo);
adev->gart_pin_size += amdgpu_bo_size(bo);
}
error:
......@@ -700,10 +711,11 @@ int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr)
int amdgpu_bo_unpin(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
int r, i;
if (!bo->pin_count) {
dev_warn(bo->adev->dev, "%p unpin not necessary\n", bo);
dev_warn(adev->dev, "%p unpin not necessary\n", bo);
return 0;
}
bo->pin_count--;
......@@ -715,16 +727,16 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
}
r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
if (unlikely(r)) {
dev_err(bo->adev->dev, "%p validate failed for unpin\n", bo);
dev_err(adev->dev, "%p validate failed for unpin\n", bo);
goto error;
}
if (bo->tbo.mem.mem_type == TTM_PL_VRAM) {
bo->adev->vram_pin_size -= amdgpu_bo_size(bo);
adev->vram_pin_size -= amdgpu_bo_size(bo);
if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
bo->adev->invisible_pin_size -= amdgpu_bo_size(bo);
adev->invisible_pin_size -= amdgpu_bo_size(bo);
} else if (bo->tbo.mem.mem_type == TTM_PL_TT) {
bo->adev->gart_pin_size -= amdgpu_bo_size(bo);
adev->gart_pin_size -= amdgpu_bo_size(bo);
}
error:
......@@ -849,6 +861,7 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
struct ttm_mem_reg *new_mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct amdgpu_bo *abo;
struct ttm_mem_reg *old_mem = &bo->mem;
......@@ -856,21 +869,21 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
return;
abo = container_of(bo, struct amdgpu_bo, tbo);
amdgpu_vm_bo_invalidate(abo->adev, abo);
amdgpu_vm_bo_invalidate(adev, abo);
/* update statistics */
if (!new_mem)
return;
/* move_notify is called before move happens */
amdgpu_update_memory_usage(abo->adev, &bo->mem, new_mem);
amdgpu_update_memory_usage(adev, &bo->mem, new_mem);
trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
}
int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
{
struct amdgpu_device *adev;
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct amdgpu_bo *abo;
unsigned long offset, size, lpfn;
int i, r;
......@@ -879,13 +892,14 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
return 0;
abo = container_of(bo, struct amdgpu_bo, tbo);
adev = abo->adev;
if (bo->mem.mem_type != TTM_PL_VRAM)
return 0;
size = bo->mem.num_pages << PAGE_SHIFT;
offset = bo->mem.start << PAGE_SHIFT;
if ((offset + size) <= adev->mc.visible_vram_size)
/* TODO: figure out how to map scattered VRAM to the CPU */
if ((offset + size) <= adev->mc.visible_vram_size &&
(abo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS))
return 0;
/* Can't move a pinned BO to visible VRAM */
......@@ -893,6 +907,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
return -EINVAL;
/* hurrah the memory is not visible ! */
abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM);
lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
for (i = 0; i < abo->placement.num_placement; i++) {
......@@ -954,6 +969,8 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
!bo->pin_count);
WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET);
WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM &&
!(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS));
return bo->tbo.offset;
}
......@@ -71,12 +71,13 @@ static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type)
*/
static inline int amdgpu_bo_reserve(struct amdgpu_bo *bo, bool no_intr)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
int r;
r = ttm_bo_reserve(&bo->tbo, !no_intr, false, NULL);
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)
dev_err(bo->adev->dev, "%p reserve failed\n", bo);
dev_err(adev->dev, "%p reserve failed\n", bo);
return r;
}
return 0;
......
......@@ -986,10 +986,10 @@ static struct amdgpu_ps *amdgpu_dpm_pick_power_state(struct amdgpu_device *adev,
static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
{
int i;
struct amdgpu_ps *ps;
enum amd_pm_state_type dpm_state;
int ret;
bool equal;
/* if dpm init failed */
if (!adev->pm.dpm_enabled)
......@@ -1009,46 +1009,6 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
else
return;
/* no need to reprogram if nothing changed unless we are on BTC+ */
if (adev->pm.dpm.current_ps == adev->pm.dpm.requested_ps) {
/* vce just modifies an existing state so force a change */
if (ps->vce_active != adev->pm.dpm.vce_active)
goto force;
if (adev->flags & AMD_IS_APU) {
/* for APUs if the num crtcs changed but state is the same,
* all we need to do is update the display configuration.
*/
if (adev->pm.dpm.new_active_crtcs != adev->pm.dpm.current_active_crtcs) {
/* update display watermarks based on new power state */
amdgpu_display_bandwidth_update(adev);
/* update displays */
amdgpu_dpm_display_configuration_changed(adev);
adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;
}
return;
} else {
/* for BTC+ if the num crtcs hasn't changed and state is the same,
* nothing to do, if the num crtcs is > 1 and state is the same,
* update display configuration.
*/
if (adev->pm.dpm.new_active_crtcs ==
adev->pm.dpm.current_active_crtcs) {
return;
} else if ((adev->pm.dpm.current_active_crtc_count > 1) &&
(adev->pm.dpm.new_active_crtc_count > 1)) {
/* update display watermarks based on new power state */
amdgpu_display_bandwidth_update(adev);
/* update displays */
amdgpu_dpm_display_configuration_changed(adev);
adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;
return;
}
}
}
force:
if (amdgpu_dpm == 1) {
printk("switching from power state:\n");
amdgpu_dpm_print_power_state(adev, adev->pm.dpm.current_ps);
......@@ -1059,31 +1019,21 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
/* update whether vce is active */
ps->vce_active = adev->pm.dpm.vce_active;
amdgpu_dpm_display_configuration_changed(adev);
ret = amdgpu_dpm_pre_set_power_state(adev);
if (ret)
return;
/* update display watermarks based on new power state */
amdgpu_display_bandwidth_update(adev);
if ((0 != amgdpu_dpm_check_state_equal(adev, adev->pm.dpm.current_ps, adev->pm.dpm.requested_ps, &equal)))
equal = false;
/* wait for the rings to drain */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (ring && ring->ready)
amdgpu_fence_wait_empty(ring);
}
if (equal)
return;
/* program the new power state */
amdgpu_dpm_set_power_state(adev);
/* update current power state */
adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps;
amdgpu_dpm_post_set_power_state(adev);
/* update displays */
amdgpu_dpm_display_configuration_changed(adev);
adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;
......@@ -1135,7 +1085,7 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
mutex_lock(&adev->pm.mutex);
adev->pm.dpm.vce_active = true;
/* XXX select vce level based on ring/task */
adev->pm.dpm.vce_level = AMDGPU_VCE_LEVEL_AC_ALL;
adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
mutex_unlock(&adev->pm.mutex);
} else {
mutex_lock(&adev->pm.mutex);
......@@ -1276,20 +1226,20 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
struct drm_device *ddev = adev->ddev;
struct drm_crtc *crtc;
struct amdgpu_crtc *amdgpu_crtc;
int i = 0;
if (!adev->pm.dpm_enabled)
return;
if (adev->pp_enabled) {
int i = 0;
amdgpu_display_bandwidth_update(adev);
amdgpu_display_bandwidth_update(adev);
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (ring && ring->ready)
amdgpu_fence_wait_empty(ring);
}
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (ring && ring->ready)
amdgpu_fence_wait_empty(ring);
}
if (adev->pp_enabled) {
amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_DISPLAY_CONFIG_CHANGE, NULL, NULL);
} else {
mutex_lock(&adev->pm.mutex);
......
......@@ -299,7 +299,7 @@ static int amdgpu_pp_soft_reset(void *handle)
return ret;
}
const struct amd_ip_funcs amdgpu_pp_ip_funcs = {
static const struct amd_ip_funcs amdgpu_pp_ip_funcs = {
.name = "amdgpu_powerplay",
.early_init = amdgpu_pp_early_init,
.late_init = amdgpu_pp_late_init,
......@@ -316,3 +316,12 @@ const struct amd_ip_funcs amdgpu_pp_ip_funcs = {
.set_clockgating_state = amdgpu_pp_set_clockgating_state,
.set_powergating_state = amdgpu_pp_set_powergating_state,
};
const struct amdgpu_ip_block_version amdgpu_pp_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_SMC,
.major = 1,
.minor = 0,
.rev = 0,
.funcs = &amdgpu_pp_ip_funcs,
};
......@@ -23,11 +23,11 @@
*
*/
#ifndef __AMDGPU_POPWERPLAY_H__
#define __AMDGPU_POPWERPLAY_H__
#ifndef __AMDGPU_POWERPLAY_H__
#define __AMDGPU_POWERPLAY_H__
#include "amd_shared.h"
extern const struct amd_ip_funcs amdgpu_pp_ip_funcs;
extern const struct amdgpu_ip_block_version amdgpu_pp_ip_block;
#endif /* __AMDSOC_DM_H__ */
#endif /* __AMDGPU_POWERPLAY_H__ */
......@@ -65,7 +65,7 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
{
/* Align requested size with padding so unlock_commit can
* pad safely */
ndw = (ndw + ring->align_mask) & ~ring->align_mask;
ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
/* Make sure we aren't trying to allocate more space
* than the maximum for one submission
......@@ -94,7 +94,7 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
int i;
for (i = 0; i < count; i++)
amdgpu_ring_write(ring, ring->nop);
amdgpu_ring_write(ring, ring->funcs->nop);
}
/** amdgpu_ring_generic_pad_ib - pad IB with NOP packets
......@@ -106,8 +106,8 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
*/
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
while (ib->length_dw & ring->align_mask)
ib->ptr[ib->length_dw++] = ring->nop;
while (ib->length_dw & ring->funcs->align_mask)
ib->ptr[ib->length_dw++] = ring->funcs->nop;
}
/**
......@@ -125,8 +125,9 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
uint32_t count;
/* We pad to match fetch size */
count = ring->align_mask + 1 - (ring->wptr & ring->align_mask);
count %= ring->align_mask + 1;
count = ring->funcs->align_mask + 1 -
(ring->wptr & ring->funcs->align_mask);
count %= ring->funcs->align_mask + 1;
ring->funcs->insert_nop(ring, count);
mb();
......@@ -163,9 +164,8 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
* Returns 0 on success, error on failure.
*/
int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned max_dw, u32 nop, u32 align_mask,
struct amdgpu_irq_src *irq_src, unsigned irq_type,
enum amdgpu_ring_type ring_type)
unsigned max_dw, struct amdgpu_irq_src *irq_src,
unsigned irq_type)
{
int r;
......@@ -216,9 +216,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->ring_size = roundup_pow_of_two(max_dw * 4 *
amdgpu_sched_hw_submission);
ring->align_mask = align_mask;
ring->nop = nop;
ring->type = ring_type;
/* Allocate ring buffer */
if (ring->ring_obj == NULL) {
......
/*
* Copyright 2016 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Christian König
*/
#ifndef __AMDGPU_RING_H__
#define __AMDGPU_RING_H__
#include "gpu_scheduler.h"
/* max number of rings */
#define AMDGPU_MAX_RINGS 16
#define AMDGPU_MAX_GFX_RINGS 1
#define AMDGPU_MAX_COMPUTE_RINGS 8
#define AMDGPU_MAX_VCE_RINGS 3
/* some special values for the owner field */
#define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul)
#define AMDGPU_FENCE_OWNER_VM ((void*)1ul)
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
enum amdgpu_ring_type {
AMDGPU_RING_TYPE_GFX,
AMDGPU_RING_TYPE_COMPUTE,
AMDGPU_RING_TYPE_SDMA,
AMDGPU_RING_TYPE_UVD,
AMDGPU_RING_TYPE_VCE
};
struct amdgpu_device;
struct amdgpu_ring;
struct amdgpu_ib;
struct amdgpu_cs_parser;
/*
* Fences.
*/
struct amdgpu_fence_driver {
uint64_t gpu_addr;
volatile uint32_t *cpu_addr;
/* sync_seq is protected by ring emission lock */
uint32_t sync_seq;
atomic_t last_seq;
bool initialized;
struct amdgpu_irq_src *irq_src;
unsigned irq_type;
struct timer_list fallback_timer;
unsigned num_fences_mask;
spinlock_t lock;
struct fence **fences;
};
int amdgpu_fence_driver_init(struct amdgpu_device *adev);
void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
unsigned num_hw_submission);
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src,
unsigned irq_type);
void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **fence);
void amdgpu_fence_process(struct amdgpu_ring *ring);
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
/*
* Rings.
*/
/* provided by hw blocks that expose a ring buffer for commands */
struct amdgpu_ring_funcs {
enum amdgpu_ring_type type;
uint32_t align_mask;
u32 nop;
/* ring read/write ptr handling */
u32 (*get_rptr)(struct amdgpu_ring *ring);
u32 (*get_wptr)(struct amdgpu_ring *ring);
void (*set_wptr)(struct amdgpu_ring *ring);
/* validating and patching of IBs */
int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
/* constants to calculate how many DW are needed for an emit */
unsigned emit_frame_size;
unsigned emit_ib_size;
/* command emit functions */
void (*emit_ib)(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
unsigned vm_id, bool ctx_switch);
void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
uint64_t seq, unsigned flags);
void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vm_id,
uint64_t pd_addr);
void (*emit_hdp_flush)(struct amdgpu_ring *ring);
void (*emit_hdp_invalidate)(struct amdgpu_ring *ring);
void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
uint32_t gds_base, uint32_t gds_size,
uint32_t gws_base, uint32_t gws_size,
uint32_t oa_base, uint32_t oa_size);
/* testing functions */
int (*test_ring)(struct amdgpu_ring *ring);
int (*test_ib)(struct amdgpu_ring *ring, long timeout);
/* insert NOP packets */
void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
/* pad the indirect buffer to the necessary number of dw */
void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
/* note usage for clock and power gating */
void (*begin_use)(struct amdgpu_ring *ring);
void (*end_use)(struct amdgpu_ring *ring);
void (*emit_switch_buffer) (struct amdgpu_ring *ring);
void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
};
struct amdgpu_ring {
struct amdgpu_device *adev;
const struct amdgpu_ring_funcs *funcs;
struct amdgpu_fence_driver fence_drv;
struct amd_gpu_scheduler sched;
struct amdgpu_bo *ring_obj;
volatile uint32_t *ring;
unsigned rptr_offs;
unsigned wptr;
unsigned wptr_old;
unsigned ring_size;
unsigned max_dw;
int count_dw;
uint64_t gpu_addr;
uint32_t ptr_mask;
bool ready;
u32 idx;
u32 me;
u32 pipe;
u32 queue;
struct amdgpu_bo *mqd_obj;
u32 doorbell_index;
bool use_doorbell;
unsigned wptr_offs;
unsigned fence_offs;
uint64_t current_ctx;
char name[16];
unsigned cond_exe_offs;
u64 cond_exe_gpu_addr;
volatile u32 *cond_exe_cpu_addr;
#if defined(CONFIG_DEBUG_FS)
struct dentry *ent;
#endif
};
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
void amdgpu_ring_commit(struct amdgpu_ring *ring);
void amdgpu_ring_undo(struct amdgpu_ring *ring);
int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned ring_size, struct amdgpu_irq_src *irq_src,
unsigned irq_type);
void amdgpu_ring_fini(struct amdgpu_ring *ring);
#endif
/*
* Copyright 2016 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Christian König
*/
#ifndef __AMDGPU_SYNC_H__
#define __AMDGPU_SYNC_H__
#include <linux/hashtable.h>
struct fence;
struct reservation_object;
struct amdgpu_device;
struct amdgpu_ring;
/*
* Container for fences used to sync command submissions.
*/
struct amdgpu_sync {
DECLARE_HASHTABLE(fences, 4);
struct fence *last_vm_update;
};
void amdgpu_sync_create(struct amdgpu_sync *sync);
int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct fence *f);
int amdgpu_sync_resv(struct amdgpu_device *adev,
struct amdgpu_sync *sync,
struct reservation_object *resv,
void *owner);
struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
void amdgpu_sync_free(struct amdgpu_sync *sync);
int amdgpu_sync_init(void);
void amdgpu_sync_fini(void);
#endif
......@@ -51,16 +51,6 @@
static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
static struct amdgpu_device *amdgpu_get_adev(struct ttm_bo_device *bdev)
{
struct amdgpu_mman *mman;
struct amdgpu_device *adev;
mman = container_of(bdev, struct amdgpu_mman, bdev);
adev = container_of(mman, struct amdgpu_device, mman);
return adev;
}
/*
* Global memory.
......@@ -150,7 +140,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
{
struct amdgpu_device *adev;
adev = amdgpu_get_adev(bdev);
adev = amdgpu_ttm_adev(bdev);
switch (type) {
case TTM_PL_SYSTEM:
......@@ -168,7 +158,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
break;
case TTM_PL_VRAM:
/* "On-card" video ram */
man->func = &ttm_bo_manager_func;
man->func = &amdgpu_vram_mgr_func;
man->gpu_offset = adev->mc.vram_start;
man->flags = TTM_MEMTYPE_FLAG_FIXED |
TTM_MEMTYPE_FLAG_MAPPABLE;
......@@ -195,6 +185,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
struct ttm_placement *placement)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct amdgpu_bo *abo;
static struct ttm_place placements = {
.fpfn = 0,
......@@ -213,7 +204,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
abo = container_of(bo, struct amdgpu_bo, tbo);
switch (bo->mem.mem_type) {
case TTM_PL_VRAM:
if (abo->adev->mman.buffer_funcs_ring->ready == false) {
if (adev->mman.buffer_funcs_ring->ready == false) {
amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
} else {
amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
......@@ -229,7 +220,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
* allocating address space for the BO.
*/
abo->placements[i].lpfn =
abo->adev->mc.gtt_size >> PAGE_SHIFT;
adev->mc.gtt_size >> PAGE_SHIFT;
}
}
break;
......@@ -260,64 +251,116 @@ static void amdgpu_move_null(struct ttm_buffer_object *bo,
new_mem->mm_node = NULL;
}
static int amdgpu_move_blit(struct ttm_buffer_object *bo,
bool evict, bool no_wait_gpu,
struct ttm_mem_reg *new_mem,
struct ttm_mem_reg *old_mem)
static int amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
struct drm_mm_node *mm_node,
struct ttm_mem_reg *mem,
uint64_t *addr)
{
struct amdgpu_device *adev;
struct amdgpu_ring *ring;
uint64_t old_start, new_start;
struct fence *fence;
int r;
adev = amdgpu_get_adev(bo->bdev);
ring = adev->mman.buffer_funcs_ring;
switch (old_mem->mem_type) {
switch (mem->mem_type) {
case TTM_PL_TT:
r = amdgpu_ttm_bind(bo, old_mem);
r = amdgpu_ttm_bind(bo, mem);
if (r)
return r;
case TTM_PL_VRAM:
old_start = (u64)old_mem->start << PAGE_SHIFT;
old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
*addr = mm_node->start << PAGE_SHIFT;
*addr += bo->bdev->man[mem->mem_type].gpu_offset;
break;
default:
DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
DRM_ERROR("Unknown placement %d\n", mem->mem_type);
return -EINVAL;
}
switch (new_mem->mem_type) {
case TTM_PL_TT:
r = amdgpu_ttm_bind(bo, new_mem);
if (r)
return r;
case TTM_PL_VRAM:
new_start = (u64)new_mem->start << PAGE_SHIFT;
new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
break;
default:
DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
return -EINVAL;
}
return 0;
}
static int amdgpu_move_blit(struct ttm_buffer_object *bo,
bool evict, bool no_wait_gpu,
struct ttm_mem_reg *new_mem,
struct ttm_mem_reg *old_mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct drm_mm_node *old_mm, *new_mm;
uint64_t old_start, old_size, new_start, new_size;
unsigned long num_pages;
struct fence *fence = NULL;
int r;
BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
if (!ring->ready) {
DRM_ERROR("Trying to move memory with ring turned off.\n");
return -EINVAL;
}
BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
old_mm = old_mem->mm_node;
r = amdgpu_mm_node_addr(bo, old_mm, old_mem, &old_start);
if (r)
return r;
old_size = old_mm->size;
r = amdgpu_copy_buffer(ring, old_start, new_start,
new_mem->num_pages * PAGE_SIZE, /* bytes */
bo->resv, &fence, false);
new_mm = new_mem->mm_node;
r = amdgpu_mm_node_addr(bo, new_mm, new_mem, &new_start);
if (r)
return r;
new_size = new_mm->size;
num_pages = new_mem->num_pages;
while (num_pages) {
unsigned long cur_pages = min(old_size, new_size);
struct fence *next;
r = amdgpu_copy_buffer(ring, old_start, new_start,
cur_pages * PAGE_SIZE,
bo->resv, &next, false);
if (r)
goto error;
fence_put(fence);
fence = next;
num_pages -= cur_pages;
if (!num_pages)
break;
old_size -= cur_pages;
if (!old_size) {
r = amdgpu_mm_node_addr(bo, ++old_mm, old_mem,
&old_start);
if (r)
goto error;
old_size = old_mm->size;
} else {
old_start += cur_pages * PAGE_SIZE;
}
new_size -= cur_pages;
if (!new_size) {
r = amdgpu_mm_node_addr(bo, ++new_mm, new_mem,
&new_start);
if (r)
goto error;
new_size = new_mm->size;
} else {
new_start += cur_pages * PAGE_SIZE;
}
}
r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
fence_put(fence);
return r;
error:
if (fence)
fence_wait(fence, false);
fence_put(fence);
return r;
}
static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo,
......@@ -332,7 +375,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo,
struct ttm_placement placement;
int r;
adev = amdgpu_get_adev(bo->bdev);
adev = amdgpu_ttm_adev(bo->bdev);
tmp_mem = *new_mem;
tmp_mem.mm_node = NULL;
placement.num_placement = 1;
......@@ -379,7 +422,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo,
struct ttm_place placements;
int r;
adev = amdgpu_get_adev(bo->bdev);
adev = amdgpu_ttm_adev(bo->bdev);
tmp_mem = *new_mem;
tmp_mem.mm_node = NULL;
placement.num_placement = 1;
......@@ -422,7 +465,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
if (WARN_ON_ONCE(abo->pin_count > 0))
return -EINVAL;
adev = amdgpu_get_adev(bo->bdev);
adev = amdgpu_ttm_adev(bo->bdev);
/* remember the eviction */
if (evict)
......@@ -475,7 +518,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
{
struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
struct amdgpu_device *adev = amdgpu_get_adev(bdev);
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
mem->bus.addr = NULL;
mem->bus.offset = 0;
......@@ -607,7 +650,7 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
/* prepare the sg table with the user pages */
static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
unsigned nents;
int r;
......@@ -639,7 +682,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
struct sg_page_iter sg_iter;
......@@ -799,7 +842,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev,
struct amdgpu_device *adev;
struct amdgpu_ttm_tt *gtt;
adev = amdgpu_get_adev(bdev);
adev = amdgpu_ttm_adev(bdev);
gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
if (gtt == NULL) {
......@@ -843,7 +886,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
return 0;
}
adev = amdgpu_get_adev(ttm->bdev);
adev = amdgpu_ttm_adev(ttm->bdev);
#ifdef CONFIG_SWIOTLB
if (swiotlb_nr_tbl()) {
......@@ -889,7 +932,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
if (slave)
return;
adev = amdgpu_get_adev(ttm->bdev);
adev = amdgpu_ttm_adev(ttm->bdev);
#ifdef CONFIG_SWIOTLB
if (swiotlb_nr_tbl()) {
......@@ -1012,7 +1055,7 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
static void amdgpu_ttm_lru_removal(struct ttm_buffer_object *tbo)
{
struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
unsigned i, j;
for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
......@@ -1029,7 +1072,7 @@ static void amdgpu_ttm_lru_removal(struct ttm_buffer_object *tbo)
static struct amdgpu_mman_lru *amdgpu_ttm_lru(struct ttm_buffer_object *tbo)
{
struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
unsigned log2_size = min(ilog2(tbo->num_pages),
AMDGPU_TTM_LRU_SIZE - 1);
......@@ -1060,12 +1103,37 @@ static struct list_head *amdgpu_ttm_swap_lru_tail(struct ttm_buffer_object *tbo)
return res;
}
static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
const struct ttm_place *place)
{
if (bo->mem.mem_type == TTM_PL_VRAM &&
bo->mem.start == AMDGPU_BO_INVALID_OFFSET) {
unsigned long num_pages = bo->mem.num_pages;
struct drm_mm_node *node = bo->mem.mm_node;
/* Check each drm MM node individually */
while (num_pages) {
if (place->fpfn < (node->start + node->size) &&
!(place->lpfn && place->lpfn <= node->start))
return true;
num_pages -= node->size;
++node;
}
return false;
}
return ttm_bo_eviction_valuable(bo, place);
}
static struct ttm_bo_driver amdgpu_bo_driver = {
.ttm_tt_create = &amdgpu_ttm_tt_create,
.ttm_tt_populate = &amdgpu_ttm_tt_populate,
.ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
.invalidate_caches = &amdgpu_invalidate_caches,
.init_mem_type = &amdgpu_init_mem_type,
.eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
.evict_flags = &amdgpu_evict_flags,
.move = &amdgpu_bo_move,
.verify_access = &amdgpu_verify_access,
......@@ -1119,7 +1187,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
r = amdgpu_bo_create(adev, 256 * 1024, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
NULL, NULL, &adev->stollen_vga_memory);
if (r) {
return r;
......@@ -1317,7 +1386,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
struct reservation_object *resv,
struct fence **fence)
{
struct amdgpu_device *adev = bo->adev;
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_job *job;
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
......
......@@ -66,6 +66,7 @@ struct amdgpu_mman {
};
extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;
int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
struct ttm_buffer_object *tbo,
......
......@@ -228,6 +228,9 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_firmware_info *ucode,
ucode->mc_addr = mc_addr;
ucode->kaddr = kptr;
if (ucode->ucode_id == AMDGPU_UCODE_ID_STORAGE)
return 0;
header = (const struct common_firmware_header *)ucode->fw->data;
memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
le32_to_cpu(header->ucode_array_offset_bytes)),
......@@ -236,6 +239,31 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_firmware_info *ucode,
return 0;
}
static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
uint64_t mc_addr, void *kptr)
{
const struct gfx_firmware_header_v1_0 *header = NULL;
const struct common_firmware_header *comm_hdr = NULL;
uint8_t* src_addr = NULL;
uint8_t* dst_addr = NULL;
if (NULL == ucode->fw)
return 0;
comm_hdr = (const struct common_firmware_header *)ucode->fw->data;
header = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
dst_addr = ucode->kaddr +
ALIGN(le32_to_cpu(comm_hdr->ucode_size_bytes),
PAGE_SIZE);
src_addr = (uint8_t *)ucode->fw->data +
le32_to_cpu(comm_hdr->ucode_array_offset_bytes) +
(le32_to_cpu(header->jt_offset) * 4);
memcpy(dst_addr, src_addr, le32_to_cpu(header->jt_size) * 4);
return 0;
}
int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
{
struct amdgpu_bo **bo = &adev->firmware.fw_buf;
......@@ -247,7 +275,8 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
const struct common_firmware_header *header = NULL;
err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, bo);
amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
0, NULL, NULL, bo);
if (err) {
dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err);
goto failed;
......@@ -259,7 +288,8 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
goto failed_reserve;
}
err = amdgpu_bo_pin(*bo, AMDGPU_GEM_DOMAIN_GTT, &fw_mc_addr);
err = amdgpu_bo_pin(*bo, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
&fw_mc_addr);
if (err) {
dev_err(adev->dev, "(%d) Firmware buffer pin failed\n", err);
goto failed_pin;
......@@ -279,6 +309,13 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
header = (const struct common_firmware_header *)ucode->fw->data;
amdgpu_ucode_init_single_fw(ucode, fw_mc_addr + fw_offset,
fw_buf_ptr + fw_offset);
if (i == AMDGPU_UCODE_ID_CP_MEC1) {
const struct gfx_firmware_header_v1_0 *cp_hdr;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
amdgpu_ucode_patch_jt(ucode, fw_mc_addr + fw_offset,
fw_buf_ptr + fw_offset);
fw_offset += ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
}
fw_offset += ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
}
}
......
......@@ -130,6 +130,7 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_CP_MEC1,
AMDGPU_UCODE_ID_CP_MEC2,
AMDGPU_UCODE_ID_RLC_G,
AMDGPU_UCODE_ID_STORAGE,
AMDGPU_UCODE_ID_MAXIMUM,
};
......
......@@ -876,6 +876,9 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
struct amdgpu_ib *ib = &parser->job->ibs[ib_idx];
int r;
parser->job->vm = NULL;
ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
if (ib->length_dw % 16) {
DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
ib->length_dw);
......@@ -931,7 +934,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
if (r)
return r;
if (!bo->adev->uvd.address_64_bit) {
if (!ring->adev->uvd.address_64_bit) {
amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_uvd_force_into_uvd_segment(bo);
}
......@@ -1002,7 +1005,8 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
NULL, NULL, &bo);
if (r)
return r;
......@@ -1051,7 +1055,8 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
NULL, NULL, &bo);
if (r)
return r;
......
......@@ -157,7 +157,8 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
NULL, NULL, &adev->vce.vcpu_bo);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
......@@ -641,6 +642,9 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
uint32_t *size = &tmp;
int i, r, idx = 0;
p->job->vm = NULL;
ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
r = amdgpu_cs_sysvm_access_required(p);
if (r)
return r;
......@@ -787,6 +791,96 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
return r;
}
/**
* amdgpu_vce_cs_parse_vm - parse the command stream in VM mode
*
* @p: parser context
*
*/
int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
{
struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
int session_idx = -1;
uint32_t destroyed = 0;
uint32_t created = 0;
uint32_t allocated = 0;
uint32_t tmp, handle = 0;
int i, r = 0, idx = 0;
while (idx < ib->length_dw) {
uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
if ((len < 8) || (len & 3)) {
DRM_ERROR("invalid VCE command length (%d)!\n", len);
r = -EINVAL;
goto out;
}
switch (cmd) {
case 0x00000001: /* session */
handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
session_idx = amdgpu_vce_validate_handle(p, handle,
&allocated);
if (session_idx < 0) {
r = session_idx;
goto out;
}
break;
case 0x01000001: /* create */
created |= 1 << session_idx;
if (destroyed & (1 << session_idx)) {
destroyed &= ~(1 << session_idx);
allocated |= 1 << session_idx;
} else if (!(allocated & (1 << session_idx))) {
DRM_ERROR("Handle already in use!\n");
r = -EINVAL;
goto out;
}
break;
case 0x02000001: /* destroy */
destroyed |= 1 << session_idx;
break;
default:
break;
}
if (session_idx == -1) {
DRM_ERROR("no session command at start of IB\n");
r = -EINVAL;
goto out;
}
idx += len / 4;
}
if (allocated & ~created) {
DRM_ERROR("New session without create command!\n");
r = -ENOENT;
}
out:
if (!r) {
/* No error, free all destroyed handle slots */
tmp = destroyed;
amdgpu_ib_free(p->adev, ib, NULL);
} else {
/* Error during parsing, free all allocated handle slots */
tmp = allocated;
}
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
if (tmp & (1 << i))
atomic_set(&p->adev->vce.handles[i], 0);
return r;
}
/**
* amdgpu_vce_ring_emit_ib - execute indirect buffer
*
......@@ -823,18 +917,6 @@ void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
amdgpu_ring_write(ring, VCE_CMD_END);
}
unsigned amdgpu_vce_ring_get_emit_ib_size(struct amdgpu_ring *ring)
{
return
4; /* amdgpu_vce_ring_emit_ib */
}
unsigned amdgpu_vce_ring_get_dma_frame_size(struct amdgpu_ring *ring)
{
return
6; /* amdgpu_vce_ring_emit_fence x1 no user fence */
}
/**
* amdgpu_vce_ring_test_ring - test if VCE ring is working
*
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment