26 #ifndef MCA_BTL_SMCUDA_H
27 #define MCA_BTL_SMCUDA_H
29 #include "ompi_config.h"
40 #include "opal/util/bit_ops.h"
41 #include "opal/class/opal_free_list.h"
43 #include "ompi/mca/common/sm/common_sm.h"
75 #define SM_FIFO_FREE (void *) (-2)
81 #define SM_CACHE_LINE_PAD 128
85 volatile void **queue;
86 char pad0[SM_CACHE_LINE_PAD -
sizeof(
void **)];
92 char pad2[SM_CACHE_LINE_PAD -
sizeof(int)];
95 char pad3[SM_CACHE_LINE_PAD -
sizeof(int)];
97 volatile void **queue_recv;
102 char pad4[SM_CACHE_LINE_PAD -
sizeof(
void **) -
112 #if OMPI_ENABLE_PROGRESS_THREADS == 1
168 #if OMPI_ENABLE_PROGRESS_THREADS == 1
169 char sm_fifo_path[PATH_MAX];
233 #define VIRTUAL2RELATIVE(VADDR ) ((long)(VADDR) - (long)mca_btl_smcuda_component.shm_bases[mca_btl_smcuda_component.my_smp_rank])
234 #define RELATIVE2VIRTUAL(OFFSET) ((long)(OFFSET) + (long)mca_btl_smcuda_component.shm_bases[mca_btl_smcuda_component.my_smp_rank])
242 qsize = opal_next_poweroftwo_inclusive (fifo_size);
245 fifo->queue_recv = (
volatile void **)mpool->
mpool_alloc(
246 mpool,
sizeof(
void *) * qsize, opal_cache_line_size, 0, NULL);
247 if(NULL == fifo->queue_recv) {
248 return OMPI_ERR_OUT_OF_RESOURCE;
252 for ( i = 0; i < qsize; i++ )
253 fifo->queue_recv[i] = SM_FIFO_FREE;
256 fifo->queue = (
volatile void **) VIRTUAL2RELATIVE(fifo->queue_recv);
266 fifo->mask = qsize - 1;
268 fifo->num_to_clear = 0;
269 fifo->lazy_free = lazy_free;
275 static inline int sm_fifo_write(
void *value,
sm_fifo_t *fifo)
277 volatile void **q = (
volatile void **) RELATIVE2VIRTUAL(fifo->queue);
281 if ( SM_FIFO_FREE != q[fifo->head] )
282 return OMPI_ERR_OUT_OF_RESOURCE;
285 q[fifo->head] = value;
287 fifo->head = (fifo->head + 1) & fifo->mask;
292 static inline void *sm_fifo_read(
sm_fifo_t *fifo)
297 value = (
void *) fifo->queue_recv[fifo->tail];
302 if ( SM_FIFO_FREE != value ) {
304 fifo->tail = ( fifo->tail + 1 ) & fifo->mask;
305 fifo->num_to_clear += 1;
308 if ( fifo->num_to_clear >= fifo->lazy_free ) {
309 int i = (fifo->tail - fifo->num_to_clear ) & fifo->mask;
311 while ( fifo->num_to_clear > 0 ) {
312 fifo->queue_recv[i] = SM_FIFO_FREE;
313 i = (i+1) & fifo->mask;
314 fifo->num_to_clear -= 1;
452 mca_btl_base_tag_t tag,
465 mca_btl_base_tag_t tag
468 #if OMPI_CUDA_SUPPORT
494 #if OMPI_ENABLE_PROGRESS_THREADS == 1
498 #if OMPI_ENABLE_PROGRESS_THREADS == 1
499 #define MCA_BTL_SMCUDA_SIGNAL_PEER(peer) \
501 unsigned char cmd = DATA; \
502 if(write(peer->fifo_fd, &cmd, sizeof(cmd)) != sizeof(cmd)) { \
503 opal_output(0, "mca_btl_smcuda_send: write fifo failed: errno=%d\n", errno); \
507 #define MCA_BTL_SMCUDA_SIGNAL_PEER(peer)
size_t max_frag_size
maximum (second and beyone) fragment size
Definition: btl_smcuda.h:136
char ** shm_bases
pointer to base pointers in shared memory
Definition: btl_smcuda.h:140
int sm_free_list_max
maximum size of free lists
Definition: btl_smcuda.h:127
SM BTL Interface.
Definition: btl_smcuda.h:201
int num_outstanding_frags
number of fragments sent but not yet returned to free list
Definition: btl_smcuda.h:163
A descriptor that holds the parameters to a send/put/get operation along w/ a callback routine that i...
Definition: btl.h:275
bool btl_inited
flag indicating if btl has been inited
Definition: btl_smcuda.h:203
int sm_extra_procs
number of extra procs to allow
Definition: btl_smcuda.h:130
int mca_btl_smcuda_component_progress(void)
shared memory component progress.
Definition: btl_smcuda_component.c:412
void opal_atomic_rmb(void)
Read memory barrier.
Definition: opal_bitmap.h:53
mca_btl_base_descriptor_t * mca_btl_smcuda_alloc(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, uint8_t order, size_t size, uint32_t flags)
Allocate a segment.
Definition: btl_smcuda.c:656
int nfifos
number of FIFOs per receiver
Definition: btl_smcuda.h:150
int sm_free_list_inc
number of elements to alloc when growing free lists
Definition: btl_smcuda.h:128
mca_mpool_base_module_t ** sm_mpools
shared memory pools (one for each memory node)
Definition: btl_smcuda.h:132
Definition: common_sm.h:60
void * sm_mpool_base
base address of shared memory pool
Definition: btl_smcuda.h:134
int use_knem
MCA: should we be using knem or not? neg=try but continue if not available, 0=don't try...
Definition: btl_smcuda.h:181
uint16_t * shm_mem_nodes
pointer to mem noded in shared memory
Definition: btl_smcuda.h:141
int mca_btl_smcuda_sendi(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, struct opal_convertor_t *convertor, void *header, size_t header_size, size_t payload_size, uint8_t order, uint32_t flags, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t **descriptor)
Initiate an inlined send to the peer or return a descriptor.
Definition: btl_smcuda.c:821
mca_btl_base_module_t super
base BTL interface
Definition: btl_smcuda.h:202
mca_btl_base_component_2_0_0_t super
base BTL component
Definition: btl_smcuda.h:125
void(* mca_btl_base_module_error_cb_fn_t)(struct mca_btl_base_module_t *btl, int32_t flags, struct ompi_proc_t *errproc, char *btlinfo)
Callback function that is called asynchronously on receipt of an error from the transport layer...
Definition: btl.h:538
Definition: mutex_unix.h:53
int sm_free_list_num
initial size of free lists
Definition: btl_smcuda.h:126
mca_mpool_base_module_t * sm_mpool
shared memory pool
Definition: btl_smcuda.h:118
int mca_btl_smcuda_free(struct mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *segment)
Return a segment allocated by this BTL.
Definition: btl_smcuda.c:684
struct mca_btl_base_descriptor_t * mca_btl_smcuda_prepare_src(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, mca_mpool_base_registration_t *registration, struct opal_convertor_t *convertor, uint8_t order, size_t reserve, size_t *size, uint32_t flags)
Pack data.
Definition: btl_smcuda.c:700
Remote Open MPI process structure.
Definition: proc.h:56
int knem_dma_flag
If we want DMA and DMA is supported, this will be loaded with KNEM_FLAG_DMA.
Definition: btl_smcuda.h:193
Volatile lock object (with optional padding).
Definition: atomic.h:102
static void opal_atomic_unlock(opal_atomic_lock_t *lock)
Release a lock.
sm_fifo_t ** fifo
cached copy of the pointer to the 2D fifo array.
Definition: btl_smcuda.h:142
int32_t sm_max_procs
upper limit on the number of processes using the shared memory pool
Definition: btl_smcuda.h:129
Definition: opal_free_list.h:47
ompi_free_list_t sm_first_frags_to_progress
list of first fragments that are awaiting resources
Definition: btl_smcuda.h:157
uint32_t knem_dma_min
MCA: minimal message size (bytes) to offload on DMA engine when using knem.
Definition: btl_smcuda.h:185
Definition: btl_smcuda.h:214
Definition: opal_free_list.h:31
int mca_btl_smcuda_finalize(struct mca_btl_base_module_t *btl)
Cleanup any resources held by the BTL.
Definition: btl_smcuda.c:632
size_t eager_limit
first fragment size
Definition: btl_smcuda.h:135
Byte Transfer Layer (BTL)
int32_t num_smp_procs
current number of smp procs on this host
Definition: btl_smcuda.h:151
int knem_max_simultaneous
MCA: how many simultaneous ongoing knem operations to support.
Definition: btl_smcuda.h:189
Definition: ompi_free_list.h:39
int num_pending_sends
total number on all of my pending-send queues
Definition: btl_smcuda.h:164
ompi_free_list_t sm_frags_max
free list of sm second
Definition: btl_smcuda.h:155
volatile sm_fifo_t ** shm_fifo
pointer to fifo 2D array in shared memory
Definition: btl_smcuda.h:139
State of ELAN endpoint connection.
Definition: btl_elan_endpoint.h:33
size_t fifo_size
number of FIFO queue entries
Definition: btl_smcuda.h:148
int mca_btl_smcuda_add_procs(struct mca_btl_base_module_t *btl, size_t nprocs, struct ompi_proc_t **procs, struct mca_btl_base_endpoint_t **peers, struct opal_bitmap_t *reachability)
PML->BTL notification of change in the process list.
Definition: btl_smcuda.c:458
BTL component descriptor.
Definition: btl.h:411
Base object.
Definition: opal_object.h:182
Definition: opal_convertor.h:90
uint8_t order
order value, this is only valid in the local completion callback and may be used in subsequent calls ...
Definition: btl.h:292
mca_common_sm_module_t * sm_seg
description of shared memory segment
Definition: btl_smcuda.h:138
void opal_atomic_wmb(void)
Write memory barrier.
static void opal_atomic_init(opal_atomic_lock_t *lock, int32_t value)
Initialize a lock to value.
ompi_free_list_t sm_frags_eager
free list of sm first
Definition: btl_smcuda.h:154
int mca_btl_smcuda_send(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, struct mca_btl_base_descriptor_t *descriptor, mca_btl_base_tag_t tag)
Initiate a send to the peer.
Definition: btl_smcuda.c:906
char * sm_mpool_name
name of shared memory pool module
Definition: btl_smcuda.h:131
Shared Memory (SM) BTL module.
Definition: btl_smcuda.h:124
mca_mpool_base_module_alloc_fn_t mpool_alloc
allocate function
Definition: mpool.h:177
uint16_t * mem_nodes
cached copy of mem nodes of each local rank
Definition: btl_smcuda.h:147
int mca_btl_smcuda_register_error_cb(struct mca_btl_base_module_t *btl, mca_btl_base_module_error_cb_fn_t cbfunc)
Register a callback function that is called on error.
Definition: btl_smcuda.c:641
Definition: btl_smcuda.h:117
int mca_btl_smcuda_del_procs(struct mca_btl_base_module_t *btl, size_t nprocs, struct ompi_proc_t **procs, struct mca_btl_base_endpoint_t **peers)
PML->BTL notification of change in the process list.
Definition: btl_smcuda.c:609
int32_t my_smp_rank
My SMP process rank.
Definition: btl_smcuda.h:152
BTL module interface functions and attributes.
Definition: btl.h:786
size_t fifo_lazy_free
number of reads before lazy fifo free is triggered
Definition: btl_smcuda.h:149
mca_mpool_base_module_t * sm_mpool
mpool on local node
Definition: btl_smcuda.h:133
int mca_btl_smcuda_ft_event(int state)
Fault Tolerance Event Notification Function.
Definition: btl_smcuda.c:1068
shared memory send fragment derived type.
Definition: btl_smcuda_frag.h:52
mpool module descriptor.
Definition: mpool.h:174