29 #include "ompi_config.h"
39 #if OMPI_BTL_SM_HAVE_KNEM
43 #include "opal/util/bit_ops.h"
44 #include "opal/class/opal_free_list.h"
46 #include "ompi/mca/common/sm/common_sm.h"
78 #define SM_FIFO_FREE (void *) (-2)
84 #define SM_CACHE_LINE_PAD 128
88 volatile void **queue;
89 char pad0[SM_CACHE_LINE_PAD -
sizeof(
void **)];
95 char pad2[SM_CACHE_LINE_PAD -
sizeof(int)];
98 char pad3[SM_CACHE_LINE_PAD -
sizeof(int)];
100 volatile void **queue_recv;
105 char pad4[SM_CACHE_LINE_PAD -
sizeof(
void **) -
115 #if OMPI_ENABLE_PROGRESS_THREADS == 1
171 #if OMPI_ENABLE_PROGRESS_THREADS == 1
172 char sm_fifo_path[PATH_MAX];
181 #if OMPI_BTL_SM_HAVE_KNEM
183 struct knem_cmd_info knem_info;
217 #if OMPI_BTL_SM_HAVE_KNEM
223 knem_status_t *knem_status_array;
230 int knem_status_first_avail;
233 int knem_status_first_used;
236 int knem_status_num_used;
265 #define VIRTUAL2RELATIVE(VADDR ) ((long)(VADDR) - (long)mca_btl_sm_component.shm_bases[mca_btl_sm_component.my_smp_rank])
266 #define RELATIVE2VIRTUAL(OFFSET) ((long)(OFFSET) + (long)mca_btl_sm_component.shm_bases[mca_btl_sm_component.my_smp_rank])
274 qsize = opal_next_poweroftwo_inclusive (fifo_size);
277 fifo->queue_recv = (
volatile void **)mpool->
mpool_alloc(
278 mpool,
sizeof(
void *) * qsize, opal_cache_line_size, 0, NULL);
279 if(NULL == fifo->queue_recv) {
280 return OMPI_ERR_OUT_OF_RESOURCE;
284 for ( i = 0; i < qsize; i++ )
285 fifo->queue_recv[i] = SM_FIFO_FREE;
288 fifo->queue = (
volatile void **) VIRTUAL2RELATIVE(fifo->queue_recv);
298 fifo->mask = qsize - 1;
300 fifo->num_to_clear = 0;
301 fifo->lazy_free = lazy_free;
307 static inline int sm_fifo_write(
void *value,
sm_fifo_t *fifo)
309 volatile void **q = (
volatile void **) RELATIVE2VIRTUAL(fifo->queue);
313 if ( SM_FIFO_FREE != q[fifo->head] )
314 return OMPI_ERR_OUT_OF_RESOURCE;
317 q[fifo->head] = value;
319 fifo->head = (fifo->head + 1) & fifo->mask;
324 static inline void *sm_fifo_read(
sm_fifo_t *fifo)
329 value = (
void *) fifo->queue_recv[fifo->tail];
334 if ( SM_FIFO_FREE != value ) {
336 fifo->tail = ( fifo->tail + 1 ) & fifo->mask;
337 fifo->num_to_clear += 1;
340 if ( fifo->num_to_clear >= fifo->lazy_free ) {
341 int i = (fifo->tail - fifo->num_to_clear ) & fifo->mask;
343 while ( fifo->num_to_clear > 0 ) {
344 fifo->queue_recv[i] = SM_FIFO_FREE;
345 i = (i+1) & fifo->mask;
346 fifo->num_to_clear -= 1;
484 mca_btl_base_tag_t tag,
497 mca_btl_base_tag_t tag
500 #if OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA
504 extern int mca_btl_sm_get_sync(
520 #if OMPI_BTL_SM_HAVE_KNEM
524 extern int mca_btl_sm_get_async(
538 #if OMPI_ENABLE_PROGRESS_THREADS == 1
542 #if OMPI_ENABLE_PROGRESS_THREADS == 1
543 #define MCA_BTL_SM_SIGNAL_PEER(peer) \
545 unsigned char cmd = DATA; \
546 if(write(peer->fifo_fd, &cmd, sizeof(cmd)) != sizeof(cmd)) { \
547 opal_output(0, "mca_btl_sm_send: write fifo failed: errno=%d\n", errno); \
551 #define MCA_BTL_SM_SIGNAL_PEER(peer)
Shared Memory (SM) BTL module.
Definition: btl_sm.h:127
char ** shm_bases
pointer to base pointers in shared memory
Definition: btl_sm.h:143
size_t max_frag_size
maximum (second and beyone) fragment size
Definition: btl_sm.h:139
int32_t my_smp_rank
My SMP process rank.
Definition: btl_sm.h:155
int32_t sm_max_procs
upper limit on the number of processes using the shared memory pool
Definition: btl_sm.h:132
int knem_max_simultaneous
MCA: how many simultaneous ongoing knem operations to support.
Definition: btl_sm.h:196
bool btl_inited
flag indicating if btl has been inited
Definition: btl_sm.h:214
A descriptor that holds the parameters to a send/put/get operation along w/ a callback routine that i...
Definition: btl.h:275
uint16_t * shm_mem_nodes
pointer to mem noded in shared memory
Definition: btl_sm.h:144
int sm_free_list_max
maximum size of free lists
Definition: btl_sm.h:130
int mca_btl_sm_ft_event(int state)
Fault Tolerance Event Notification Function.
Definition: btl_sm.c:1158
void opal_atomic_rmb(void)
Read memory barrier.
Definition: opal_bitmap.h:53
Definition: common_sm.h:60
int sm_free_list_inc
number of elements to alloc when growing free lists
Definition: btl_sm.h:131
void(* mca_btl_base_module_error_cb_fn_t)(struct mca_btl_base_module_t *btl, int32_t flags, struct ompi_proc_t *errproc, char *btlinfo)
Callback function that is called asynchronously on receipt of an error from the transport layer...
Definition: btl.h:538
Definition: mutex_unix.h:53
sm_fifo_t ** fifo
cached copy of the pointer to the 2D fifo array.
Definition: btl_sm.h:145
mca_btl_base_module_t super
base BTL interface
Definition: btl_sm.h:213
struct mca_btl_base_descriptor_t * mca_btl_sm_prepare_src(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, mca_mpool_base_registration_t *registration, struct opal_convertor_t *convertor, uint8_t order, size_t reserve, size_t *size, uint32_t flags)
Pack data.
Definition: btl_sm.c:678
int sm_free_list_num
initial size of free lists
Definition: btl_sm.h:129
Remote Open MPI process structure.
Definition: proc.h:56
volatile sm_fifo_t ** shm_fifo
pointer to fifo 2D array in shared memory
Definition: btl_sm.h:142
Volatile lock object (with optional padding).
Definition: atomic.h:102
ompi_free_list_t sm_frags_max
free list of sm second
Definition: btl_sm.h:158
static void opal_atomic_unlock(opal_atomic_lock_t *lock)
Release a lock.
int mca_btl_sm_sendi(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, struct opal_convertor_t *convertor, void *header, size_t header_size, size_t payload_size, uint8_t order, uint32_t flags, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t **descriptor)
Initiate an inlined send to the peer or return a descriptor.
Definition: btl_sm.c:815
void * sm_mpool_base
base address of shared memory pool
Definition: btl_sm.h:137
int mca_btl_sm_register_error_cb(struct mca_btl_base_module_t *btl, mca_btl_base_module_error_cb_fn_t cbfunc)
Register a callback function that is called on error.
Definition: btl_sm.c:619
char * sm_mpool_name
name of shared memory pool module
Definition: btl_sm.h:134
Definition: opal_free_list.h:47
mca_btl_base_component_2_0_0_t super
base BTL component
Definition: btl_sm.h:128
size_t fifo_lazy_free
number of reads before lazy fifo free is triggered
Definition: btl_sm.h:152
size_t fifo_size
number of FIFO queue entries
Definition: btl_sm.h:151
int use_knem
MCA: should we be using knem or not? neg=try but continue if not available, 0=don't try...
Definition: btl_sm.h:188
mca_common_sm_module_t * sm_seg
description of shared memory segment
Definition: btl_sm.h:141
size_t eager_limit
first fragment size
Definition: btl_sm.h:138
Definition: opal_free_list.h:31
int mca_btl_sm_add_procs(struct mca_btl_base_module_t *btl, size_t nprocs, struct ompi_proc_t **procs, struct mca_btl_base_endpoint_t **peers, struct opal_bitmap_t *reachability)
PML->BTL notification of change in the process list.
Definition: btl_sm.c:436
ompi_free_list_t sm_frags_eager
free list of sm first
Definition: btl_sm.h:157
Byte Transfer Layer (BTL)
SM BTL Interface.
Definition: btl_sm.h:212
int mca_btl_sm_component_progress(void)
shared memory component progress.
Definition: btl_sm_component.c:608
int mca_btl_sm_send(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, struct mca_btl_base_descriptor_t *descriptor, mca_btl_base_tag_t tag)
Initiate a send to the peer.
Definition: btl_sm.c:900
shared memory send fragment derived type.
Definition: btl_sm_frag.h:51
Definition: ompi_free_list.h:39
int mca_btl_sm_del_procs(struct mca_btl_base_module_t *btl, size_t nprocs, struct ompi_proc_t **procs, struct mca_btl_base_endpoint_t **peers)
PML->BTL notification of change in the process list.
Definition: btl_sm.c:587
int knem_dma_flag
If we want DMA and DMA is supported, this will be loaded with KNEM_FLAG_DMA.
Definition: btl_sm.h:200
State of ELAN endpoint connection.
Definition: btl_elan_endpoint.h:33
BTL component descriptor.
Definition: btl.h:411
Base object.
Definition: opal_object.h:182
Definition: opal_convertor.h:90
int num_pending_sends
total number on all of my pending-send queues
Definition: btl_sm.h:167
uint8_t order
order value, this is only valid in the local completion callback and may be used in subsequent calls ...
Definition: btl.h:292
uint16_t * mem_nodes
cached copy of mem nodes of each local rank
Definition: btl_sm.h:150
void opal_atomic_wmb(void)
Write memory barrier.
mca_btl_base_descriptor_t * mca_btl_sm_alloc(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, uint8_t order, size_t size, uint32_t flags)
Allocate a segment.
Definition: btl_sm.c:634
static void opal_atomic_init(opal_atomic_lock_t *lock, int32_t value)
Initialize a lock to value.
int mca_btl_sm_free(struct mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *segment)
Return a segment allocated by this BTL.
Definition: btl_sm.c:662
int nfifos
number of FIFOs per receiver
Definition: btl_sm.h:153
int num_outstanding_frags
number of fragments sent but not yet returned to free list
Definition: btl_sm.h:166
ompi_free_list_t sm_first_frags_to_progress
list of first fragments that are awaiting resources
Definition: btl_sm.h:160
mca_mpool_base_module_alloc_fn_t mpool_alloc
allocate function
Definition: mpool.h:177
int sm_extra_procs
number of extra procs to allow
Definition: btl_sm.h:133
BTL module interface functions and attributes.
Definition: btl.h:786
uint32_t knem_dma_min
MCA: minimal message size (bytes) to offload on DMA engine when using knem.
Definition: btl_sm.h:192
int mca_btl_sm_finalize(struct mca_btl_base_module_t *btl)
Cleanup any resources held by the BTL.
Definition: btl_sm.c:610
mca_mpool_base_module_t * sm_mpool
shared memory pool
Definition: btl_sm.h:121
int32_t num_smp_procs
current number of smp procs on this host
Definition: btl_sm.h:154
mca_mpool_base_module_t ** sm_mpools
shared memory pools (one for each memory node)
Definition: btl_sm.h:135
int use_cma
MCA: should we be using CMA or not? 0 = no, 1 = yes.
Definition: btl_sm.h:204
mca_mpool_base_module_t * sm_mpool
mpool on local node
Definition: btl_sm.h:136
mpool module descriptor.
Definition: mpool.h:174