Custom and default allocator added · tensorflow/community@21be6e9 (original) (raw)

`@@ -437,6 +437,151 @@ typedef struct SE_CreateStreamExecutorParams {

`

437

437

`#define SE_CREATE_STREAM_EXECUTOR_PARAMS_STRUCT_SIZE \

`

438

438

` TF_OFFSET_OF_END(SE_CreateStreamExecutorParams, stream_executor)

`

439

439

``

``

440

`+

typedef struct SP_Allocator {

`

``

441

`+

size_t struct_size;

`

``

442

`+

void* ext; // free-form field set by plugin.

`

``

443

+

``

444

`+

// Whether this platform supports unified memory.

`

``

445

`+

// Unified memory is a single memory address space accessible from any device.

`

``

446

`+

TF_Bool supports_unified_memory;

`

``

447

`+

} SP_Allocator;

`

``

448

+

``

449

`+

#define SP_ALLOCATOR_STRUCT_SIZE \

`

``

450

`+

TF_OFFSET_OF_END(SP_Allocator, supports_unified_memory)

`

``

451

+

``

452

`+

typedef struct SP_AllocatorFns {

`

``

453

`+

size_t struct_size;

`

``

454

`+

void* ext; // reserved for future use.

`

``

455

+

``

456

`` +

// Synchronously allocates size bytes on the underlying platform and returns

``

``

457

`` +

// SP_DeviceMemoryBase representing that allocation. In the case of failure,

``

``

458

`+

// nullptr is returned.

`

``

459

`` +

// memory_space is reserved for a potential future usage and should be set

``

``

460

`+

// to 0.

`

``

461

`+

void (allocate)(const SP_Device device, const SP_Allocator* allocator,

`

``

462

`+

uint64_t size, int64_t memory_space,

`

``

463

`+

SP_DeviceMemoryBase* mem);

`

``

464

+

``

465

`+

// Deallocate the device memory previously allocated via this interface.

`

``

466

`+

// Deallocation of a nullptr-representative value is permitted.

`

``

467

`+

void (deallocate)(const SP_Device device, const SP_Allocator* allocator,

`

``

468

`+

SP_DeviceMemoryBase* memory);

`

``

469

+

``

470

`+

// Allocates a region of host memory and registers it with the platform API.

`

``

471

`+

// Memory allocated in this manner is required for use in asynchronous memcpy

`

``

472

`` +

// operations, such as memcpy_dtoh.

``

``

473

`+

void* (host_memory_allocate)(const SP_Device device,

`

``

474

`+

const SP_Allocator* allocator, uint64_t size);

`

``

475

+

``

476

`` +

// Deallocates a region of host memory allocated by host_memory_allocate.

``

``

477

`+

void (host_memory_deallocate)(const SP_Device device,

`

``

478

`+

const SP_Allocator* allocator, void* mem);

`

``

479

+

``

480

`+

// Allocates unified memory space of the given size, if supported. Unified

`

``

481

+

``

482

`` +

// memory support should be added by setting supports_unified_memory field

``

``

483

`` +

// in SP_Platform.

``

``

484

`+

void* (unified_memory_allocate)(const SP_Device device,

`

``

485

`+

const SP_Allocator* allocator,

`

``

486

`+

uint64_t bytes);

`

``

487

+

``

488

`+

// Deallocates unified memory space previously allocated with

`

``

489

`` +

// unified_memory_allocate. Unified

``

``

490

`` +

// memory support should be added by setting supports_unified_memory field

``

``

491

`` +

// in SP_Platform.

``

``

492

`+

void (unified_memory_deallocate)(const SP_Device device,

`

``

493

`+

const SP_Allocator* allocator,

`

``

494

`+

void* location);

`

``

495

+

``

496

`+

// Fills SP_AllocatorStats with allocator statistics, if it is available.

`

``

497

`+

// If it is not available, return false.

`

``

498

`+

TF_Bool (get_allocator_stats)(const SP_Device device,

`

``

499

`+

const SP_Allocator* allocator,

`

``

500

`+

SP_AllocatorStats* stats);

`

``

501

+

``

502

`+

// Fills the underlying device memory usage information, if it is

`

``

503

`+

// available. If it is not available (false is returned), free/total need not

`

``

504

`+

// be initialized.

`

``

505

`+

TF_Bool (device_memory_usage)(const SP_Device device,

`

``

506

`+

const SP_Allocator* allocator, int64_t* free,

`

``

507

`+

int64_t* total);

`

``

508

`+

} SP_AllocatorFns;

`

``

509

+

``

510

`+

#define SP_ALLOCATOR_FNS_STRUCT_SIZE \

`

``

511

`+

TF_OFFSET_OF_END(SP_AllocatorFns, device_memory_usage)

`

``

512

+

``

513

`+

typedef struct SP_CustomAllocator {

`

``

514

`+

size_t struct_size;

`

``

515

`+

void* ext; // free-form data set by plugin

`

``

516

`+

} SP_CustomAllocator;

`

``

517

+

``

518

`+

#define SP_CUSTOM_ALLOCATOR_STRUCT_SIZE \

`

``

519

`+

TF_OFFSET_OF_END(SP_CustomAllocator, ext)

`

``

520

+

``

521

`+

typedef struct SP_CustomAllocatorFns {

`

``

522

`+

size_t struct_size;

`

``

523

`+

void* ext; // reserved for future use

`

``

524

+

``

525

`` +

// Synchronously allocates size bytes on the underlying platform and returns

``

``

526

`+

// a pointer to that allocation. In the case of failure,

`

``

527

`+

// nullptr is returned.

`

``

528

`+

void* (allocate_raw)(const SP_Device device,

`

``

529

`+

const SP_CustomAllocator* allocator, size_t size,

`

``

530

`+

size_t alignment);

`

``

531

+

``

532

`` +

// Deallocate the device memory previously allocated via allocate_raw.

``

``

533

`+

// Deallocation of a nullptr-representative value is permitted.

`

``

534

`+

void (deallocate_raw)(const SP_Device device,

`

``

535

`+

const SP_CustomAllocator* allocator, void* ptr);

`

``

536

+

``

537

`+

// Allocates a region of host memory.

`

``

538

`+

void* (host_allocate_raw)(const SP_Device device,

`

``

539

`+

const SP_CustomAllocator* allocator,

`

``

540

`+

uint64_t size);

`

``

541

+

``

542

`` +

// Deallocates a region of host memory allocated by host_allocate_raw.

``

``

543

`+

void (host_deallocate_raw)(const SP_Device device,

`

``

544

`+

const SP_CustomAllocator* allocator, void* mem);

`

``

545

+

``

546

`+

// Fills SP_AllocatorStats with allocator statistics, if it is available.

`

``

547

`+

// If it is not available, return false.

`

``

548

`+

TF_Bool (get_allocator_stats)(const SP_Device device,

`

``

549

`+

const SP_CustomAllocator* allocator,

`

``

550

`+

SP_AllocatorStats* stats);

`

``

551

+

``

552

`+

// Fills the underlying device memory usage information, if it is

`

``

553

`+

// available. If it is not available (false is returned), free/total need not

`

``

554

`+

// be initialized.

`

``

555

`+

TF_Bool (device_memory_usage)(const SP_Device device,

`

``

556

`+

const SP_CustomAllocator* allocator,

`

``

557

`+

int64_t* free, int64_t* total);

`

``

558

`+

} SP_CustomAllocatorFns;

`

``

559

+

``

560

`+

#define SP_CUSTOM_ALLOCATOR_FNS_STRUCT_SIZE \

`

``

561

`+

TF_OFFSET_OF_END(SP_CustomAllocatorFns, device_memory_usage)

`

``

562

+

``

563

`+

typedef struct SE_CreateAllocatorParams {

`

``

564

`+

size_t struct_size;

`

``

565

`+

void* ext; // reserved for future use

`

``

566

+

``

567

`+

SP_Allocator* allocator;

`

``

568

`+

SP_AllocatorFns* allocator_fns;

`

``

569

`+

} SE_CreateAllocatorParams;

`

``

570

+

``

571

`+

#define SE_CREATE_ALLOCATOR_PARAMS_STRUCT_SIZE \

`

``

572

`+

TF_OFFSET_OF_END(SE_CreateAllocatorParams, allocator_fns)

`

``

573

+

``

574

`+

typedef struct SE_CreateCustomAllocatorParams {

`

``

575

`+

size_t struct_size;

`

``

576

`+

void* ext; // reserved for future use

`

``

577

+

``

578

`+

SP_CustomAllocator* custom_allocator;

`

``

579

`+

SP_CustomAllocatorFns* custom_allocator_fns;

`

``

580

`+

} SE_CreateCustomAllocatorParams;

`

``

581

+

``

582

`+

#define SE_CREATE_CUSTOM_ALLOCATOR_PARAMS_STRUCT_SIZE \

`

``

583

`+

TF_OFFSET_OF_END(SE_CreateCustomAllocatorParams, custom_allocator_fns)

`

``

584

+

440

585

`typedef struct SP_Platform {

`

441

586

` size_t struct_size;

`

442

587

``

`@@ -450,15 +595,10 @@ typedef struct SP_Platform {

`

450

595

``

451

596

` // Number of visible devices.

`

452

597

` size_t visible_device_count;

`

453

``

-

454

``

`-

// Whether this platform supports unified memory.

`

455

``

`-

// Unified memory is a single memory address space that virtualizes device and

`

456

``

`-

// host memory addresses. It is accessible to both the device and host.

`

457

``

`-

TF_Bool supports_unified_memory;

`

458

598

`} SP_Platform;

`

459

599

``

460

600

`#define SP_PLATFORM_STRUCT_SIZE \

`

461

``

`-

TF_OFFSET_OF_END(SP_Platform, supports_unified_memory)

`

``

601

`+

TF_OFFSET_OF_END(SP_Platform, visible_device_count)

`

462

602

``

463

603

`typedef struct SP_PlatformFns {

`

464

604

` size_t struct_size;

`

`@@ -488,11 +628,38 @@ typedef struct SP_PlatformFns {

`

488

628

``

489

629

` void (destroy_timer_fns)(const SP_Platform platform,

`

490

630

` SP_TimerFns* timer_fns);

`

``

631

+

``

632

`` +

// Set only one of create_allocator or create_custom_allocator functions

``

``

633

`+

// below.

`

``

634

+

``

635

`+

// Callback for creating an allocator that uses default TensorFlow allocation

`

``

636

`+

// strategy (BFC: best-fit with coalescing). For more details, see

`

``

637

`+

// https://cs.opensource.google/tensorflow/tensorflow/+/master:tensorflow/core/common_runtime/bfc_allocator.h.

`

``

638

`` +

// If create_allocator is set, then create_custom_allocator should not

``

``

639

`+

// be set.

`

``

640

`+

void (create_allocator)(const SP_Platform platform,

`

``

641

`+

SE_CreateAllocatorParams* params, TF_Status* status);

`

``

642

`+

void (destroy_allocator)(const SP_Platform platform,

`

``

643

`+

SP_Allocator* allocator,

`

``

644

`+

SP_AllocatorFns* allocator_fns);

`

``

645

+

``

646

`+

// Callback for creating a custom allocator. Allows using a custom allocation

`

``

647

`+

// strategy.

`

``

648

`` +

// If create_custom_allocator is set, then create_allocator should not

``

``

649

`+

// be set.

`

``

650

`+

// Note: deallocator functions must be set in params.

`

``

651

`+

void (create_custom_allocator)(const SP_Platform platform,

`

``

652

`+

SE_CreateCustomAllocatorParams* params,

`

``

653

`+

TF_Status* status);

`

``

654

`+

void (destroy_custom_allocator)(const SP_Platform platform,

`

``

655

`+

SP_CustomAllocator* allocator,

`

``

656

`+

SP_CustomAllocatorFns* allocator_fns);

`

491

657

`} SP_PlatformFns;

`

492

658

``

493

659

`#define SP_PLATFORM_FNS_STRUCT_SIZE \

`

494

660

` TF_OFFSET_OF_END(SP_PlatformFns, destroy_timer_fns)

`

495

661

``

``

662

+

496

663

`typedef struct SE_PlatformRegistrationParams {

`

497

664

` size_t struct_size;

`

498

665

` void* ext; // reserved for future use

`

`@@ -584,6 +751,10 @@ void create_timer_fns(const SP_Platform* platform, SP_TimerFns* timer_fns,

`

584

751

` timer_fns->nanoseconds = nanoseconds;

`

585

752

` ...

`

586

753

`}

`

``

754

`+

void create_allocator(const SP_Platform* platform, SP_CreateAllocatorParams* params,

`

``

755

`+

TF_Status* status) {

`

``

756

`+

...

`

``

757

`+

}

`

587

758

`void destroy_device(const SP_Platform* platform, SP_Device* device) {

`

588

759

` // Destroy device handle here.

`

589

760

`}

`

`@@ -594,6 +765,9 @@ void destroy_stream_executor(const SP_Platform* platform,

`

594

765

`void destroy_timer_fns(const SP_Platform* platform, SP_TimerFns* timer_fns) {

`

595

766

` // Destroy timer functions here.

`

596

767

`}

`

``

768

`+

void destroy_allocator(const SP_Platform* platform, SP_Allocator* allocator, SP_AllocatorFns* allocator_fns) {

`

``

769

`+

// Clean up allocator here.

`

``

770

`+

}

`

597

771

```` ```


`598`

`772`

``

`599`

`773`

`` Define `SE_InitPlugin` that TensorFlow will call when registering the device

``

`@@ -616,6 +790,8 @@ void SE_InitPlugin(SE_PlatformRegistrationParams* params, TF_Status* status) {

`

`616`

`790`

` params->platform_fns->destroy_stream_executor = destroy_stream_executor;

`

`617`

`791`

` params->platform_fns->create_timer_fns = create_timer_fns;

`

`618`

`792`

` params->platform_fns->destroy_timer_fns = destroy_timer_fns;

`

``

`793`

`+

params->platform_fns->create_allocator = create_allocator;

`

``

`794`

`+

params->platform_fns->destroy_allocator = destroy_allocator;

`

`619`

`795`

`}

`

`620`

`796`

```` ```

621

797

``