[PATCH 5/6] drm/radeon: validate relocations in the order determined by userspace (original) (raw)

Christian König deathsimple at vodafone.de
Mon Feb 24 08:27:22 PST 2014


Am 24.02.2014 16:20, schrieb Marek Olšák:

From: Marek Olšák <marek.olsak at amd.com>

Userspace should set the first 4 bits of drmradeoncsreloc::flags to a number from 0 to 15. The higher the number, the higher the priority, which means a buffer with a higher number will be validated sooner.

Assuming that we only have 32 different priorities it would probably be better to add the buffers to 32 different lists while evaluating the priorities in radeon_cs_parser_relocs and then concatenate all 32 lists at the end instead of sorting them.

The old behavior is preserved: Buffers used for write are prioritized over read-only buffers if the userspace doesn't set the number. Signed-off-by: Marek Olšák <marek.olsak at amd.com> --- drivers/gpu/drm/radeon/radeon.h | 2 +- drivers/gpu/drm/radeon/radeoncs.c | 53 ++++++++++++++++++++++++++++++++-- drivers/gpu/drm/radeon/radeonobject.c | 10 ------- drivers/gpu/drm/radeon/radeonobject.h | 2 -- 4 files changed, 51 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index d37a57a..f7a3174 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -481,7 +481,7 @@ struct radeonbolist { struct ttmvalidatebuffer tv; struct radeonbo *bo; uint64t gpuoffset; - bool written; + unsigned priority; unsigned domain; unsigned altdomain; u32 tilingflags; diff --git a/drivers/gpu/drm/radeon/radeoncs.c b/drivers/gpu/drm/radeon/radeoncs.c index d49a3f7..1ba1a48 100644 --- a/drivers/gpu/drm/radeon/radeoncs.c +++ b/drivers/gpu/drm/radeon/radeoncs.c @@ -31,6 +31,41 @@ #include "radeon.h" #include "radeontrace.h" +/* This is a variation of the bucket sort with O(n) time complexity. + * The relocations are sorted from the highest to the lowest priority. */ +static void sortrelocsforvalidation(struct listhead *list) +{ + struct listhead bucket[17], *it, *tmp; + unsigned i, priority; + + for (i = 0; i < 17; i++)_ _+ INITLISTHEAD(&bucket[i]);_ _+_ _+ /* Move the elements into buckets. An i-th bucket only contains_ _+ * elements with priorities i*2 and i*2+1. Odd numbers are added_ _+ * at the head of a bucket and even numbers are added at the tail,_ _+ * therefore all buckets are always sorted. */_ _+ listforeachsafe(it, tmp, list) {_ _+ priority = listentry(it, struct radeonbolist,_ _+ tv.head)->priority; + i = priority / 2; + i = min(i, 16u) ; + + if (priority % 2 == 1) { + listmove(it, &bucket[i]); + } else { + listmovetail(it, &bucket[i]); + } + } + + INITLISTHEAD(list); + + /* connect the sorted buckets */ + for (i = 0; i < 17; i++) {_ _+ listsplice(&bucket[i], list);_ _+ }_ _+}_ _+_ _static int radeoncsparserrelocs(struct radeoncsparser *p)_ _{_ _struct drmdevice *ddev = p->rdev->ddev; @@ -80,7 +115,15 @@ static int radeoncsparserrelocs(struct radeoncsparser *p) p->relocsptr[i] = &p->relocs[i]; p->relocs[i].robj = gemtoradeonbo(p->relocs[i].gobj); p->relocs[i].lobj.bo = p->relocs[i].robj; - p->relocs[i].lobj.written = !!r->writedomain; + + /* The userspace buffer priorities are from 0 to 15. A higher + * number means the buffer is more important. + * Also, the buffers used for write have a higher priority than + * the buffers used for read only, which doubles the range + * to 0 to 31. Numbers 32 and 33 are reserved for the kernel + * driver. + */ + p->relocs[i].lobj.priority = (r->flags & 0xf) * 2 + !!r->writedomain; /* the first reloc of an UVD job is the msg and that must be in VRAM, also but everything into VRAM on AGP cards to avoid @@ -94,6 +137,8 @@ static int radeoncsparserrelocs(struct radeoncsparser *p) p->relocs[i].lobj.altdomain = RADEONGEMDOMAINVRAM; + /* prioritize this over any other relocation */ + p->relocs[i].lobj.priority = 32; } else { uint32t domain = r->writedomain ? r->writedomain : r->readdomains; @@ -107,9 +152,11 @@ static int radeoncsparserrelocs(struct radeoncsparser *p) p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; p->relocs[i].handle = r->handle; - radeonbolistaddobject(&p->relocs[i].lobj, - &p->validated); + listadd(&p->relocs[i].lobj.tv.head, &p->validated); } + + sortrelocsforvalidation(&p->validated); + return radeonbolistvalidate(&p->ticket, &p->validated, p->ring); } diff --git a/drivers/gpu/drm/radeon/radeonobject.c b/drivers/gpu/drm/radeon/radeonobject.c index d676ee2..19042ae 100644 --- a/drivers/gpu/drm/radeon/radeonobject.c +++ b/drivers/gpu/drm/radeon/radeonobject.c @@ -368,16 +368,6 @@ void radeonbofini(struct radeondevice *rdev) archphyswcdel(rdev->mc.vrammtrr); } -void radeonbolistaddobject(struct radeonbolist *lobj, - struct listhead *head) -{ - if (lobj->written) { - listadd(&lobj->tv.head, head); - } else { - listaddtail(&lobj->tv.head, head); - } -} - int radeonbolistvalidate(struct wwacquirectx *ticket, struct listhead *head, int ring) { diff --git a/drivers/gpu/drm/radeon/radeonobject.h b/drivers/gpu/drm/radeon/radeonobject.h index a9a8c11..6c3ca9e 100644 --- a/drivers/gpu/drm/radeon/radeonobject.h +++ b/drivers/gpu/drm/radeon/radeonobject.h @@ -138,8 +138,6 @@ extern int radeonboevictvram(struct radeondevice *rdev); extern void radeonboforcedelete(struct radeondevice *rdev); extern int radeonboinit(struct radeondevice *rdev); extern void radeonbofini(struct radeondevice *rdev); -extern void radeonbolistaddobject(struct radeonbolist *lobj, - struct listhead *head); extern int radeonbolistvalidate(struct wwacquirectx *ticket, struct listhead *head, int ring); extern int radeonbofbdevmmap(struct radeonbo *bo,



More information about the dri-devel mailing list