Custom and default allocator added · tensorflow/community@21be6e9 (original) (raw)
`@@ -437,6 +437,151 @@ typedef struct SE_CreateStreamExecutorParams {
`
437
437
`#define SE_CREATE_STREAM_EXECUTOR_PARAMS_STRUCT_SIZE \
`
438
438
` TF_OFFSET_OF_END(SE_CreateStreamExecutorParams, stream_executor)
`
439
439
``
``
440
`+
typedef struct SP_Allocator {
`
``
441
`+
size_t struct_size;
`
``
442
`+
void* ext; // free-form field set by plugin.
`
``
443
+
``
444
`+
// Whether this platform supports unified memory.
`
``
445
`+
// Unified memory is a single memory address space accessible from any device.
`
``
446
`+
TF_Bool supports_unified_memory;
`
``
447
`+
} SP_Allocator;
`
``
448
+
``
449
`+
#define SP_ALLOCATOR_STRUCT_SIZE \
`
``
450
`+
TF_OFFSET_OF_END(SP_Allocator, supports_unified_memory)
`
``
451
+
``
452
`+
typedef struct SP_AllocatorFns {
`
``
453
`+
size_t struct_size;
`
``
454
`+
void* ext; // reserved for future use.
`
``
455
+
``
456
`` +
// Synchronously allocates size
bytes on the underlying platform and returns
``
``
457
`` +
// SP_DeviceMemoryBase
representing that allocation. In the case of failure,
``
``
458
`+
// nullptr is returned.
`
``
459
`` +
// memory_space
is reserved for a potential future usage and should be set
``
``
460
`+
// to 0.
`
``
461
`+
void (allocate)(const SP_Device device, const SP_Allocator* allocator,
`
``
462
`+
uint64_t size, int64_t memory_space,
`
``
463
`+
SP_DeviceMemoryBase* mem);
`
``
464
+
``
465
`+
// Deallocate the device memory previously allocated via this interface.
`
``
466
`+
// Deallocation of a nullptr-representative value is permitted.
`
``
467
`+
void (deallocate)(const SP_Device device, const SP_Allocator* allocator,
`
``
468
`+
SP_DeviceMemoryBase* memory);
`
``
469
+
``
470
`+
// Allocates a region of host memory and registers it with the platform API.
`
``
471
`+
// Memory allocated in this manner is required for use in asynchronous memcpy
`
``
472
`` +
// operations, such as memcpy_dtoh
.
``
``
473
`+
void* (host_memory_allocate)(const SP_Device device,
`
``
474
`+
const SP_Allocator* allocator, uint64_t size);
`
``
475
+
``
476
`` +
// Deallocates a region of host memory allocated by host_memory_allocate
.
``
``
477
`+
void (host_memory_deallocate)(const SP_Device device,
`
``
478
`+
const SP_Allocator* allocator, void* mem);
`
``
479
+
``
480
`+
// Allocates unified memory space of the given size, if supported. Unified
`
``
481
+
``
482
`` +
// memory support should be added by setting supports_unified_memory
field
``
``
483
`` +
// in SP_Platform
.
``
``
484
`+
void* (unified_memory_allocate)(const SP_Device device,
`
``
485
`+
const SP_Allocator* allocator,
`
``
486
`+
uint64_t bytes);
`
``
487
+
``
488
`+
// Deallocates unified memory space previously allocated with
`
``
489
`` +
// unified_memory_allocate
. Unified
``
``
490
`` +
// memory support should be added by setting supports_unified_memory
field
``
``
491
`` +
// in SP_Platform
.
``
``
492
`+
void (unified_memory_deallocate)(const SP_Device device,
`
``
493
`+
const SP_Allocator* allocator,
`
``
494
`+
void* location);
`
``
495
+
``
496
`+
// Fills SP_AllocatorStats with allocator statistics, if it is available.
`
``
497
`+
// If it is not available, return false.
`
``
498
`+
TF_Bool (get_allocator_stats)(const SP_Device device,
`
``
499
`+
const SP_Allocator* allocator,
`
``
500
`+
SP_AllocatorStats* stats);
`
``
501
+
``
502
`+
// Fills the underlying device memory usage information, if it is
`
``
503
`+
// available. If it is not available (false is returned), free/total need not
`
``
504
`+
// be initialized.
`
``
505
`+
TF_Bool (device_memory_usage)(const SP_Device device,
`
``
506
`+
const SP_Allocator* allocator, int64_t* free,
`
``
507
`+
int64_t* total);
`
``
508
`+
} SP_AllocatorFns;
`
``
509
+
``
510
`+
#define SP_ALLOCATOR_FNS_STRUCT_SIZE \
`
``
511
`+
TF_OFFSET_OF_END(SP_AllocatorFns, device_memory_usage)
`
``
512
+
``
513
`+
typedef struct SP_CustomAllocator {
`
``
514
`+
size_t struct_size;
`
``
515
`+
void* ext; // free-form data set by plugin
`
``
516
`+
} SP_CustomAllocator;
`
``
517
+
``
518
`+
#define SP_CUSTOM_ALLOCATOR_STRUCT_SIZE \
`
``
519
`+
TF_OFFSET_OF_END(SP_CustomAllocator, ext)
`
``
520
+
``
521
`+
typedef struct SP_CustomAllocatorFns {
`
``
522
`+
size_t struct_size;
`
``
523
`+
void* ext; // reserved for future use
`
``
524
+
``
525
`` +
// Synchronously allocates size
bytes on the underlying platform and returns
``
``
526
`+
// a pointer to that allocation. In the case of failure,
`
``
527
`+
// nullptr is returned.
`
``
528
`+
void* (allocate_raw)(const SP_Device device,
`
``
529
`+
const SP_CustomAllocator* allocator, size_t size,
`
``
530
`+
size_t alignment);
`
``
531
+
``
532
`` +
// Deallocate the device memory previously allocated via allocate_raw
.
``
``
533
`+
// Deallocation of a nullptr-representative value is permitted.
`
``
534
`+
void (deallocate_raw)(const SP_Device device,
`
``
535
`+
const SP_CustomAllocator* allocator, void* ptr);
`
``
536
+
``
537
`+
// Allocates a region of host memory.
`
``
538
`+
void* (host_allocate_raw)(const SP_Device device,
`
``
539
`+
const SP_CustomAllocator* allocator,
`
``
540
`+
uint64_t size);
`
``
541
+
``
542
`` +
// Deallocates a region of host memory allocated by host_allocate_raw
.
``
``
543
`+
void (host_deallocate_raw)(const SP_Device device,
`
``
544
`+
const SP_CustomAllocator* allocator, void* mem);
`
``
545
+
``
546
`+
// Fills SP_AllocatorStats with allocator statistics, if it is available.
`
``
547
`+
// If it is not available, return false.
`
``
548
`+
TF_Bool (get_allocator_stats)(const SP_Device device,
`
``
549
`+
const SP_CustomAllocator* allocator,
`
``
550
`+
SP_AllocatorStats* stats);
`
``
551
+
``
552
`+
// Fills the underlying device memory usage information, if it is
`
``
553
`+
// available. If it is not available (false is returned), free/total need not
`
``
554
`+
// be initialized.
`
``
555
`+
TF_Bool (device_memory_usage)(const SP_Device device,
`
``
556
`+
const SP_CustomAllocator* allocator,
`
``
557
`+
int64_t* free, int64_t* total);
`
``
558
`+
} SP_CustomAllocatorFns;
`
``
559
+
``
560
`+
#define SP_CUSTOM_ALLOCATOR_FNS_STRUCT_SIZE \
`
``
561
`+
TF_OFFSET_OF_END(SP_CustomAllocatorFns, device_memory_usage)
`
``
562
+
``
563
`+
typedef struct SE_CreateAllocatorParams {
`
``
564
`+
size_t struct_size;
`
``
565
`+
void* ext; // reserved for future use
`
``
566
+
``
567
`+
SP_Allocator* allocator;
`
``
568
`+
SP_AllocatorFns* allocator_fns;
`
``
569
`+
} SE_CreateAllocatorParams;
`
``
570
+
``
571
`+
#define SE_CREATE_ALLOCATOR_PARAMS_STRUCT_SIZE \
`
``
572
`+
TF_OFFSET_OF_END(SE_CreateAllocatorParams, allocator_fns)
`
``
573
+
``
574
`+
typedef struct SE_CreateCustomAllocatorParams {
`
``
575
`+
size_t struct_size;
`
``
576
`+
void* ext; // reserved for future use
`
``
577
+
``
578
`+
SP_CustomAllocator* custom_allocator;
`
``
579
`+
SP_CustomAllocatorFns* custom_allocator_fns;
`
``
580
`+
} SE_CreateCustomAllocatorParams;
`
``
581
+
``
582
`+
#define SE_CREATE_CUSTOM_ALLOCATOR_PARAMS_STRUCT_SIZE \
`
``
583
`+
TF_OFFSET_OF_END(SE_CreateCustomAllocatorParams, custom_allocator_fns)
`
``
584
+
440
585
`typedef struct SP_Platform {
`
441
586
` size_t struct_size;
`
442
587
``
`@@ -450,15 +595,10 @@ typedef struct SP_Platform {
`
450
595
``
451
596
` // Number of visible devices.
`
452
597
` size_t visible_device_count;
`
453
``
-
454
``
`-
// Whether this platform supports unified memory.
`
455
``
`-
// Unified memory is a single memory address space that virtualizes device and
`
456
``
`-
// host memory addresses. It is accessible to both the device and host.
`
457
``
`-
TF_Bool supports_unified_memory;
`
458
598
`} SP_Platform;
`
459
599
``
460
600
`#define SP_PLATFORM_STRUCT_SIZE \
`
461
``
`-
TF_OFFSET_OF_END(SP_Platform, supports_unified_memory)
`
``
601
`+
TF_OFFSET_OF_END(SP_Platform, visible_device_count)
`
462
602
``
463
603
`typedef struct SP_PlatformFns {
`
464
604
` size_t struct_size;
`
`@@ -488,11 +628,38 @@ typedef struct SP_PlatformFns {
`
488
628
``
489
629
` void (destroy_timer_fns)(const SP_Platform platform,
`
490
630
` SP_TimerFns* timer_fns);
`
``
631
+
``
632
`` +
// Set only one of create_allocator
or create_custom_allocator
functions
``
``
633
`+
// below.
`
``
634
+
``
635
`+
// Callback for creating an allocator that uses default TensorFlow allocation
`
``
636
`+
// strategy (BFC: best-fit with coalescing). For more details, see
`
``
637
`+
`
``
638
`` +
// If create_allocator
is set, then create_custom_allocator
should not
``
``
639
`+
// be set.
`
``
640
`+
void (create_allocator)(const SP_Platform platform,
`
``
641
`+
SE_CreateAllocatorParams* params, TF_Status* status);
`
``
642
`+
void (destroy_allocator)(const SP_Platform platform,
`
``
643
`+
SP_Allocator* allocator,
`
``
644
`+
SP_AllocatorFns* allocator_fns);
`
``
645
+
``
646
`+
// Callback for creating a custom allocator. Allows using a custom allocation
`
``
647
`+
// strategy.
`
``
648
`` +
// If create_custom_allocator
is set, then create_allocator
should not
``
``
649
`+
// be set.
`
``
650
`+
// Note: deallocator functions must be set in params.
`
``
651
`+
void (create_custom_allocator)(const SP_Platform platform,
`
``
652
`+
SE_CreateCustomAllocatorParams* params,
`
``
653
`+
TF_Status* status);
`
``
654
`+
void (destroy_custom_allocator)(const SP_Platform platform,
`
``
655
`+
SP_CustomAllocator* allocator,
`
``
656
`+
SP_CustomAllocatorFns* allocator_fns);
`
491
657
`} SP_PlatformFns;
`
492
658
``
493
659
`#define SP_PLATFORM_FNS_STRUCT_SIZE \
`
494
660
` TF_OFFSET_OF_END(SP_PlatformFns, destroy_timer_fns)
`
495
661
``
``
662
+
496
663
`typedef struct SE_PlatformRegistrationParams {
`
497
664
` size_t struct_size;
`
498
665
` void* ext; // reserved for future use
`
`@@ -584,6 +751,10 @@ void create_timer_fns(const SP_Platform* platform, SP_TimerFns* timer_fns,
`
584
751
` timer_fns->nanoseconds = nanoseconds;
`
585
752
` ...
`
586
753
`}
`
``
754
`+
void create_allocator(const SP_Platform* platform, SP_CreateAllocatorParams* params,
`
``
755
`+
TF_Status* status) {
`
``
756
`+
...
`
``
757
`+
}
`
587
758
`void destroy_device(const SP_Platform* platform, SP_Device* device) {
`
588
759
` // Destroy device handle here.
`
589
760
`}
`
`@@ -594,6 +765,9 @@ void destroy_stream_executor(const SP_Platform* platform,
`
594
765
`void destroy_timer_fns(const SP_Platform* platform, SP_TimerFns* timer_fns) {
`
595
766
` // Destroy timer functions here.
`
596
767
`}
`
``
768
`+
void destroy_allocator(const SP_Platform* platform, SP_Allocator* allocator, SP_AllocatorFns* allocator_fns) {
`
``
769
`+
// Clean up allocator here.
`
``
770
`+
}
`
597
771
```` ```
`598`
`772`
``
`599`
`773`
`` Define `SE_InitPlugin` that TensorFlow will call when registering the device
``
`@@ -616,6 +790,8 @@ void SE_InitPlugin(SE_PlatformRegistrationParams* params, TF_Status* status) {
`
`616`
`790`
` params->platform_fns->destroy_stream_executor = destroy_stream_executor;
`
`617`
`791`
` params->platform_fns->create_timer_fns = create_timer_fns;
`
`618`
`792`
` params->platform_fns->destroy_timer_fns = destroy_timer_fns;
`
``
`793`
`+
params->platform_fns->create_allocator = create_allocator;
`
``
`794`
`+
params->platform_fns->destroy_allocator = destroy_allocator;
`
`619`
`795`
`}
`
`620`
`796`
```` ```
621
797
``