diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 09a5eca494b35a822d71ba20bd74982a8a3ddd3e..0ed99adfd0da56cb57d587486d7683eac8cedb8a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -24,6 +24,7 @@ */ #include <linux/log2.h> +#include <linux/random.h> #include <linux/seq_file.h> #include <linux/stop_machine.h> @@ -3606,6 +3607,31 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm, return err; } +static u64 random_offset(u64 start, u64 end, u64 len, u64 align) +{ + u64 range, addr; + + GEM_BUG_ON(range_overflows(start, len, end)); + GEM_BUG_ON(round_up(start, align) > round_down(end - len, align)); + + range = round_down(end - len, align) - round_up(start, align); + if (range) { + if (sizeof(unsigned long) == sizeof(u64)) { + addr = get_random_long(); + } else { + addr = get_random_int(); + if (range > U32_MAX) { + addr <<= 32; + addr |= get_random_int(); + } + } + div64_u64_rem(addr, range, &addr); + start += addr; + } + + return round_up(start, align); +} + /** * i915_gem_gtt_insert - insert a node into an address_space (GTT) * @vm - the &struct i915_address_space @@ -3627,7 +3653,8 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm, * its @size must then fit entirely within the [@start, @end] bounds. The * nodes on either side of the hole must match @color, or else a guard page * will be inserted between the two nodes (or the node evicted). If no - * suitable hole is found, then the LRU list of objects within the GTT + * suitable hole is found, first a victim is randomly selected and tested + * for eviction, otherwise then the LRU list of objects within the GTT * is scanned to find the first set of replacement nodes to create the hole. * Those old overlapping nodes are evicted from the GTT (and so must be * rebound before any future use). Any node that is currently pinned cannot @@ -3645,6 +3672,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, u64 start, u64 end, unsigned int flags) { u32 search_flag, alloc_flag; + u64 offset; int err; lockdep_assert_held(&vm->i915->drm.struct_mutex); @@ -3687,6 +3715,35 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, if (err != -ENOSPC) return err; + /* No free space, pick a slot at random. + * + * There is a pathological case here using a GTT shared between + * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt): + * + * |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->| + * (64k objects) (448k objects) + * + * Now imagine that the eviction LRU is ordered top-down (just because + * pathology meets real life), and that we need to evict an object to + * make room inside the aperture. The eviction scan then has to walk + * the 448k list before it finds one within range. And now imagine that + * it has to search for a new hole between every byte inside the memcpy, + * for several simultaneous clients. + * + * On a full-ppgtt system, if we have run out of available space, there + * will be lots and lots of objects in the eviction list! Again, + * searching that LRU list may be slow if we are also applying any + * range restrictions (e.g. restriction to low 4GiB) and so, for + * simplicity and similarilty between different GTT, try the single + * random replacement first. + */ + offset = random_offset(start, end, + size, alignment ?: I915_GTT_MIN_ALIGNMENT); + err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags); + if (err != -ENOSPC) + return err; + + /* Randomly selected placement is pinned, do a search */ err = i915_gem_evict_something(vm, size, alignment, color, start, end, flags); if (err)