Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mpix: add MPIX_Query_pointer_attr #7049

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/binding/c/misc_api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,22 @@ MPIX_Query_hip_support:

return is_supported;
}

MPIX_Query_pointer_attr:
ptr: BUFFER, [Pointer address to be queried]
type: POINTER_TYPE, direction=out, [Memory type (enum MPIX_Pointer_type)]
device: INDEX, direction=out, [Device id (if the pointer address is associated with a device, -1 otherwise)]
.desc: return buffer type (enum MPIX_Pointer_type) and device id (if it is a device buffer or device registered buffer)
.impl: direct
.skip: global_cs
{
int mpi_errno = MPI_SUCCESS;

MPL_pointer_attr_t attr;
mpi_errno = MPL_gpu_query_pointer_attr(ptr, &attr);

*type = attr.type;
*device = attr.device;

return mpi_errno;
}
7 changes: 7 additions & 0 deletions src/binding/custom_mapping.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,45 +7,52 @@ LIS_KIND_MAP:
STREAM: handle
IOVEC: None
ASYNC_THING: None
POINTER_TYPE: integer

SMALL_F90_KIND_MAP:
GPU_TYPE: INTEGER
GREQUEST_CLASS: INTEGER
STREAM: INTEGER
IOVEC: None
ASYNC_THING: INTEGER(KIND=MPI_ADDRESS_KIND)
POINTER_TYPE: INTEGER

BIG_F90_KIND_MAP:
GPU_TYPE: INTEGER
GREQUEST_CLASS: INTEGER
STREAM: INTEGER
IOVEC: None
ASYNC_THING: INTEGER(KIND=MPI_ADDRESS_KIND)
POINTER_TYPE: INTEGER

SMALL_F08_KIND_MAP:
GPU_TYPE: INTEGER
GREQUEST_CLASS: INTEGER
STREAM: TYPE(MPIX_Stream)
IOVEC: None
ASYNC_THING: INTEGER(KIND=MPI_ADDRESS_KIND)
POINTER_TYPE: INTEGER

BIG_F08_KIND_MAP:
GPU_TYPE: INTEGER
GREQUEST_CLASS: INTEGER
STREAM: TYPE(MPIX_Stream)
IOVEC: None
ASYNC_THING: INTEGER(KIND=MPI_ADDRESS_KIND)
POINTER_TYPE: INTEGER

SMALL_C_KIND_MAP:
GPU_TYPE: int
GREQUEST_CLASS: MPIX_Grequest_class
STREAM: MPIX_Stream
IOVEC: MPIX_Iov
ASYNC_THING: MPIX_Async_thing
POINTER_TYPE: int

BIG_C_KIND_MAP:
GPU_TYPE: int
GREQUEST_CLASS: MPIX_Grequest_class
STREAM: MPIX_Stream
IOVEC: MPIX_Iov
ASYNC_THING: MPIX_Async_thing
POINTER_TYPE: int
8 changes: 8 additions & 0 deletions src/include/mpi.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,14 @@ enum MPIR_Combiner_enum {
#define MPIX_GPU_SUPPORT_ZE (1)
#define MPIX_GPU_SUPPORT_HIP (2)

/* MPIX_Query_pointer_attr */
enum MPIX_Pointer_type {
MPIX_POINTER_UNREGISTERED_HOST = 0,
MPIX_POINTER_REGISTERED_HOST,
MPIX_POINTER_DEV,
MPIX_POINTER_MANAGED
};

/* feature advertisement */
#define MPIIMPL_ADVERTISES_FEATURES 1
#define MPIIMPL_HAVE_MPI_INFO 1
Expand Down
7 changes: 4 additions & 3 deletions src/mpl/include/mpl_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "mpl_gpu_fallback.h"
#endif

/* NOTE: must agree with MPIX_Pointer_type in mpi.h */
typedef enum {
MPL_GPU_POINTER_UNREGISTERED_HOST = 0,
MPL_GPU_POINTER_REGISTERED_HOST,
Expand All @@ -36,7 +37,7 @@ typedef enum {

typedef struct {
MPL_pointer_type_t type;
MPL_gpu_device_handle_t device;
int device;
MPL_gpu_device_attr device_attr;
} MPL_pointer_attr_t;

Expand All @@ -61,7 +62,7 @@ typedef enum {
MPL_GPU_COPY_H2D,
MPL_GPU_COPY_D2D_INCOMING, /* copy from remote to local */
MPL_GPU_COPY_D2D_OUTGOING, /* copy from local to remote */
MPL_GPU_COPY_DIRECTION_NONE, /* copy in any direction and to/from any buffer type */
MPL_GPU_COPY_DIRECTION_NONE, /* copy in any direction and to/from any buffer type */
} MPL_gpu_copy_direction_t;

#define MPL_GPU_COPY_DIRECTION_TYPES 4
Expand Down Expand Up @@ -119,7 +120,7 @@ int MPL_gpu_free_host(void *ptr);
int MPL_gpu_register_host(const void *ptr, size_t size);
int MPL_gpu_unregister_host(const void *ptr);

int MPL_gpu_malloc(void **ptr, size_t size, MPL_gpu_device_handle_t h_device);
int MPL_gpu_malloc(void **ptr, size_t size, int h_device);
int MPL_gpu_free(void *ptr);

int MPL_gpu_init(int debug_summary);
Expand Down
1 change: 0 additions & 1 deletion src/mpl/include/mpl_gpu_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "cuda_runtime_api.h"

typedef cudaIpcMemHandle_t MPL_gpu_ipc_mem_handle_t;
typedef int MPL_gpu_device_handle_t;
typedef struct cudaPointerAttributes MPL_gpu_device_attr;
typedef int MPL_gpu_request;
typedef cudaStream_t MPL_gpu_stream_t;
Expand Down
1 change: 0 additions & 1 deletion src/mpl/include/mpl_gpu_fallback.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#define MPL_GPU_CUDA_H_INCLUDED

typedef int MPL_gpu_ipc_mem_handle_t;
typedef int MPL_gpu_device_handle_t;
typedef int MPL_gpu_device_attr; /* dummy type */
typedef int MPL_gpu_request;
typedef int MPL_gpu_stream_t;
Expand Down
1 change: 0 additions & 1 deletion src/mpl/include/mpl_gpu_hip.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#include "hip/hip_runtime_api.h"

typedef hipIpcMemHandle_t MPL_gpu_ipc_mem_handle_t;
typedef int MPL_gpu_device_handle_t;
typedef struct hipPointerAttribute_t MPL_gpu_device_attr;
typedef int MPL_gpu_request;
typedef hipStream_t MPL_gpu_stream_t;
Expand Down
5 changes: 2 additions & 3 deletions src/mpl/include/mpl_gpu_ze.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ typedef struct _MPL_gpu_ipc_mem_handle_t {
fd_pid_t data;
} MPL_gpu_ipc_mem_handle_t;

typedef ze_device_handle_t MPL_gpu_device_handle_t;
typedef ze_alloc_attr_t MPL_gpu_device_attr;

typedef struct MPL_cmdlist_pool {
Expand All @@ -52,7 +51,7 @@ typedef int MPL_gpu_stream_t;
typedef volatile int MPL_gpu_event_t;

#define MPL_GPU_STREAM_DEFAULT 0
#define MPL_GPU_DEVICE_INVALID NULL
#define MPL_GPU_DEVICE_INVALID -1

#define MPL_GPU_DEV_AFFINITY_ENV "ZE_AFFINITY_MASK"

Expand All @@ -67,7 +66,7 @@ int MPL_ze_ipc_handle_map(MPL_gpu_ipc_mem_handle_t * ipc_handle, int is_shared_h
int MPL_ze_ipc_handle_mmap_host(MPL_gpu_ipc_mem_handle_t * ipc_handle, int shared_handle,
int dev_id, size_t size, void **ptr);
int MPL_ze_mmap_device_pointer(void *dptr, MPL_gpu_device_attr * attr,
MPL_gpu_device_handle_t device, void **mmaped_ptr);
int device, void **mmaped_ptr);
int MPL_ze_mmap_handle_unmap(void *ptr, int dev_id);

#endif /* ifndef MPL_GPU_ZE_H_INCLUDED */
2 changes: 1 addition & 1 deletion src/mpl/src/gpu/mpl_gpu_cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ int MPL_gpu_unregister_host(const void *ptr)
goto fn_exit;
}

int MPL_gpu_malloc(void **ptr, size_t size, MPL_gpu_device_handle_t h_device)
int MPL_gpu_malloc(void **ptr, size_t size, int h_device)
{
int mpl_err = MPL_SUCCESS;
int prev_devid;
Expand Down
2 changes: 1 addition & 1 deletion src/mpl/src/gpu/mpl_gpu_fallback.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ int MPL_gpu_unregister_host(const void *ptr)
return MPL_SUCCESS;
}

int MPL_gpu_malloc(void **ptr, size_t size, MPL_gpu_device_handle_t h_device)
int MPL_gpu_malloc(void **ptr, size_t size, int h_device)
{
abort();
return MPL_ERR_GPU_INTERNAL;
Expand Down
2 changes: 1 addition & 1 deletion src/mpl/src/gpu/mpl_gpu_hip.c
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ int MPL_gpu_unregister_host(const void *ptr)
goto fn_exit;
}

int MPL_gpu_malloc(void **ptr, size_t size, MPL_gpu_device_handle_t h_device)
int MPL_gpu_malloc(void **ptr, size_t size, int h_device)
{
int mpl_err = MPL_SUCCESS;
int prev_devid;
Expand Down
32 changes: 17 additions & 15 deletions src/mpl/src/gpu/mpl_gpu_ze.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ static int gpu_initialized = 0;
static uint32_t device_count; /* Counts all local devices, does not include subdevices */
static uint32_t local_ze_device_count; /* Counts all local devices and subdevices */
static uint32_t global_ze_device_count; /* Counts all global devices and subdevices */
static int max_dev_id; /* Does not include subdevices */
static int max_dev_id; /* Does not include subdevices */
static int max_subdev_id;
static char **device_list = NULL;
static int *engine_conversion = NULL;
Expand Down Expand Up @@ -622,7 +622,7 @@ static int get_physical_device(int dev_id)
}

/* Get dev_id from device handle */
MPL_STATIC_INLINE_PREFIX int device_to_dev_id(MPL_gpu_device_handle_t device)
MPL_STATIC_INLINE_PREFIX int device_to_dev_id(ze_device_handle_t device)
{
int dev_id = -1;
for (int d = 0; d < local_ze_device_count; d++) {
Expand All @@ -636,7 +636,7 @@ MPL_STATIC_INLINE_PREFIX int device_to_dev_id(MPL_gpu_device_handle_t device)
}

/* Get device from dev_id */
MPL_STATIC_INLINE_PREFIX int dev_id_to_device(int dev_id, MPL_gpu_device_handle_t * device)
MPL_STATIC_INLINE_PREFIX int dev_id_to_device(int dev_id, ze_device_handle_t * device)
{
int mpl_err = MPL_SUCCESS;

Expand Down Expand Up @@ -1563,7 +1563,7 @@ int MPL_gpu_ipc_handle_destroy(const void *ptr, MPL_pointer_attr_t * gpu_attr)
}

if (likely(MPL_gpu_info.specialized_cache)) {
dev_id = device_to_dev_id(gpu_attr->device);
dev_id = gpu_attr->device;
if (dev_id == -1) {
goto fn_fail;
}
Expand Down Expand Up @@ -1817,7 +1817,7 @@ int MPL_gpu_query_pointer_attr(const void *ptr, MPL_pointer_attr_t * attr)
ret = zeMemGetAllocProperties(ze_context, ptr,
&attr->device_attr.prop, &attr->device_attr.device);
ZE_ERR_CHECK(ret);
attr->device = attr->device_attr.device;
attr->device = device_to_dev_id(attr->device_attr.device);
switch (attr->device_attr.prop.type) {
case ZE_MEMORY_TYPE_UNKNOWN:
attr->type = MPL_GPU_POINTER_UNREGISTERED_HOST;
Expand Down Expand Up @@ -1930,7 +1930,7 @@ int MPL_gpu_query_is_same_dev(int global_dev1, int global_dev2)
#endif
}

int MPL_gpu_malloc(void **ptr, size_t size, MPL_gpu_device_handle_t h_device)
int MPL_gpu_malloc(void **ptr, size_t size, int h_device)
{
int mpl_err = MPL_SUCCESS;
int ret;
Expand All @@ -1941,10 +1941,16 @@ int MPL_gpu_malloc(void **ptr, size_t size, MPL_gpu_device_handle_t h_device)
.flags = 0,
.ordinal = 0, /* We currently support a single memory type */
};

ze_device_handle_t device_handle;
ret = dev_id_to_device(h_device, &device_handle);
if (ret) {
goto fn_fail;
}
/* Currently ZE ignores this argument and uses an internal alignment
* value. However, this behavior can change in the future. */
mem_alignment = 1;
ret = zeMemAllocDevice(ze_context, &device_desc, size, mem_alignment, h_device, ptr);
ret = zeMemAllocDevice(ze_context, &device_desc, size, mem_alignment, device_handle, ptr);

ZE_ERR_CHECK(ret);

Expand Down Expand Up @@ -2041,11 +2047,7 @@ int MPL_gpu_unregister_host(const void *ptr)

int MPL_gpu_get_dev_id_from_attr(MPL_pointer_attr_t * attr)
{
int dev_id = -1;

dev_id = device_to_dev_id(attr->device);

return dev_id;
return attr->device;
}

int MPL_gpu_get_buffer_bounds(const void *ptr, void **pbase, uintptr_t * len)
Expand Down Expand Up @@ -2855,7 +2857,7 @@ int MPL_ze_ipc_handle_map(MPL_gpu_ipc_mem_handle_t * mpl_ipc_handle, int is_shar
ze_result_t ret;
int fds[2], status;
uint32_t nfds;
MPL_gpu_device_handle_t dev_handle;
ze_device_handle_t dev_handle;

fd_pid_t h;
h = mpl_ipc_handle->data;
Expand Down Expand Up @@ -3006,7 +3008,7 @@ int MPL_ze_ipc_handle_mmap_host(MPL_gpu_ipc_mem_handle_t * mpl_ipc_handle, int i

/* this function takes a local device pointer and mmap to host */
int MPL_ze_mmap_device_pointer(void *dptr, MPL_gpu_device_attr * attr,
MPL_gpu_device_handle_t device, void **mmaped_ptr)
int device, void **mmaped_ptr)
{
ze_result_t ret;
int mpl_err = MPL_SUCCESS;
Expand All @@ -3024,7 +3026,7 @@ int MPL_ze_mmap_device_pointer(void *dptr, MPL_gpu_device_attr * attr,
offset = (char *) dptr - (char *) pbase;

mem_id = attr->prop.id;
local_dev_id = device_to_dev_id(device);
local_dev_id = device;
if (local_dev_id == -1) {
goto fn_fail;
}
Expand Down