OpenMPI  0.1.1
btl_smcuda.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
3  * University Research and Technology
4  * Corporation. All rights reserved.
5  * Copyright (c) 2004-2009 The University of Tennessee and The University
6  * of Tennessee Research Foundation. All rights
7  * reserved.
8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9  * University of Stuttgart. All rights reserved.
10  * Copyright (c) 2004-2005 The Regents of the University of California.
11  * All rights reserved.
12  * Copyright (c) 2006-2007 Voltaire. All rights reserved.
13  * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
14  * Copyright (c) 2010 Los Alamos National Security, LLC.
15  * All rights reserved.
16  * Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
17  * $COPYRIGHT$
18  *
19  * Additional copyrights may follow
20  *
21  * $HEADER$
22  */
23 /**
24  * @file
25  */
26 #ifndef MCA_BTL_SMCUDA_H
27 #define MCA_BTL_SMCUDA_H
28 
29 #include "ompi_config.h"
30 #include <stddef.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #ifdef HAVE_STDINT_H
34 #include <stdint.h>
35 #endif /* HAVE_STDINT_H */
36 #ifdef HAVE_SCHED_H
37 #include <sched.h>
38 #endif /* HAVE_SCHED_H */
39 
40 #include "opal/util/bit_ops.h"
41 #include "opal/class/opal_free_list.h"
42 #include "ompi/mca/btl/btl.h"
43 #include "ompi/mca/common/sm/common_sm.h"
44 
45 BEGIN_C_DECLS
46 
47 /*
48  * Shared Memory FIFOs
49  *
50  * The FIFO is implemented as a circular queue with head and tail pointers
51  * (integer indices). For efficient wraparound indexing, the size of the
52  * queue is constrained to be a power of two and we "&" indices with a "mask".
53  *
54  * More than one process can write to the FIFO head. Therefore, there is a head
55  * lock. One cannot write until the head slot is empty, indicated by the special
56  * queue entry SM_FIFO_FREE.
57  *
58  * Only the receiver can read the FIFO tail. Therefore, the tail lock is
59  * required only in multithreaded applications. If a tail read returns the
60  * SM_FIFO_FREE value, that means the FIFO is empty. Once a non-FREE value
61  * has been read, the queue slot is *not* automatically reset to SM_FIFO_FREE.
62  * Rather, read tail slots are reset "lazily" (see "lazy_free" and "num_to_clear")
63  * to reduce the number of memory barriers and improve performance.
64  *
65  * Since the FIFO lives in shared memory that is mapped differently into
66  * each address space, the "queue" pointer is relative (each process must
67  * add its own offset) and the queue_recv pointer is meaningful only in the
68  * receiver's address space.
69  *
70  * Since multiple processes access different parts of the FIFO structure in
71  * different ways, we introduce padding to keep different parts on different
72  * cachelines.
73  */
74 
75 #define SM_FIFO_FREE (void *) (-2)
76 /* We can't use opal_cache_line_size here because we need a
77  compile-time constant for padding the struct. We can't really have
78  a compile-time constant that is portable, either (e.g., compile on
79  one machine and run on another). So just use a big enough cache
80  line that should hopefully be good in most places. */
81 #define SM_CACHE_LINE_PAD 128
82 
83 struct sm_fifo_t {
84  /* This queue pointer is used only by the heads. */
85  volatile void **queue;
86  char pad0[SM_CACHE_LINE_PAD - sizeof(void **)];
87  /* This lock is used by the heads. */
88  opal_atomic_lock_t head_lock;
89  char pad1[SM_CACHE_LINE_PAD - sizeof(opal_atomic_lock_t)];
90  /* This index is used by the head holding the head lock. */
91  volatile int head;
92  char pad2[SM_CACHE_LINE_PAD - sizeof(int)];
93  /* This mask is used "read only" by all processes. */
94  unsigned int mask;
95  char pad3[SM_CACHE_LINE_PAD - sizeof(int)];
96  /* The following are used only by the tail. */
97  volatile void **queue_recv;
98  opal_atomic_lock_t tail_lock;
99  volatile int tail;
100  int num_to_clear;
101  int lazy_free;
102  char pad4[SM_CACHE_LINE_PAD - sizeof(void **) -
103  sizeof(opal_atomic_lock_t) -
104  sizeof(int) * 3];
105 };
106 typedef struct sm_fifo_t sm_fifo_t;
107 
108 /*
109  * Shared Memory resource managment
110  */
111 
112 #if OMPI_ENABLE_PROGRESS_THREADS == 1
113 #define DATA (char)0
114 #define DONE (char)1
115 #endif
116 
118  mca_mpool_base_module_t* sm_mpool; /**< shared memory pool */
120 
121 /**
122  * Shared Memory (SM) BTL module.
123  */
125  mca_btl_base_component_2_0_0_t super; /**< base BTL component */
126  int sm_free_list_num; /**< initial size of free lists */
127  int sm_free_list_max; /**< maximum size of free lists */
128  int sm_free_list_inc; /**< number of elements to alloc when growing free lists */
129  int32_t sm_max_procs; /**< upper limit on the number of processes using the shared memory pool */
130  int sm_extra_procs; /**< number of extra procs to allow */
131  char* sm_mpool_name; /**< name of shared memory pool module */
132  mca_mpool_base_module_t **sm_mpools; /**< shared memory pools (one for each memory node) */
133  mca_mpool_base_module_t *sm_mpool; /**< mpool on local node */
134  void* sm_mpool_base; /**< base address of shared memory pool */
135  size_t eager_limit; /**< first fragment size */
136  size_t max_frag_size; /**< maximum (second and beyone) fragment size */
137  opal_mutex_t sm_lock;
138  mca_common_sm_module_t *sm_seg; /**< description of shared memory segment */
139  volatile sm_fifo_t **shm_fifo; /**< pointer to fifo 2D array in shared memory */
140  char **shm_bases; /**< pointer to base pointers in shared memory */
141  uint16_t *shm_mem_nodes; /**< pointer to mem noded in shared memory */
142  sm_fifo_t **fifo; /**< cached copy of the pointer to the 2D
143  fifo array. The address in the shared
144  memory segment sm_ctl_header is a relative,
145  but this one, in process private memory, is
146  a real virtual address */
147  uint16_t *mem_nodes; /**< cached copy of mem nodes of each local rank */
148  size_t fifo_size; /**< number of FIFO queue entries */
149  size_t fifo_lazy_free; /**< number of reads before lazy fifo free is triggered */
150  int nfifos; /**< number of FIFOs per receiver */
151  int32_t num_smp_procs; /**< current number of smp procs on this host */
152  int32_t my_smp_rank; /**< My SMP process rank. Used for accessing
153  * SMP specfic data structures. */
154  ompi_free_list_t sm_frags_eager; /**< free list of sm first */
155  ompi_free_list_t sm_frags_max; /**< free list of sm second */
156  ompi_free_list_t sm_frags_user;
158  fragments that are
159  awaiting resources */
160  struct mca_btl_base_endpoint_t **sm_peers;
161 
162  opal_free_list_t pending_send_fl;
163  int num_outstanding_frags; /**< number of fragments sent but not yet returned to free list */
164  int num_pending_sends; /**< total number on all of my pending-send queues */
165  int mem_node;
166  int num_mem_nodes;
167 
168 #if OMPI_ENABLE_PROGRESS_THREADS == 1
169  char sm_fifo_path[PATH_MAX]; /**< path to fifo used to signal this process */
170  int sm_fifo_fd; /**< file descriptor corresponding to opened fifo */
171  opal_thread_t sm_fifo_thread;
172 #endif
173  struct mca_btl_smcuda_t **sm_btls;
174  struct mca_btl_smcuda_frag_t **table;
175  size_t sm_num_btls;
176  size_t sm_max_btls;
177 
178 
179  /** MCA: should we be using knem or not? neg=try but continue if
180  not available, 0=don't try, 1=try and fail if not available */
181  int use_knem;
182 
183  /** MCA: minimal message size (bytes) to offload on DMA engine
184  when using knem */
185  uint32_t knem_dma_min;
186 
187  /** MCA: how many simultaneous ongoing knem operations to
188  support */
190 
191  /** If we want DMA and DMA is supported, this will be loaded with
192  KNEM_FLAG_DMA. Otherwise, it'll be 0. */
194 };
196 OMPI_MODULE_DECLSPEC extern mca_btl_smcuda_component_t mca_btl_smcuda_component;
197 
198 /**
199  * SM BTL Interface
200  */
202  mca_btl_base_module_t super; /**< base BTL interface */
203  bool btl_inited; /**< flag indicating if btl has been inited */
205 
206 };
207 typedef struct mca_btl_smcuda_t mca_btl_smcuda_t;
208 OMPI_MODULE_DECLSPEC extern mca_btl_smcuda_t mca_btl_smcuda;
209 
210 
211 
212 
213 
215 {
216  opal_free_list_item_t super;
217  void *data;
218 };
220 
221 /***
222  * FIFO support for sm BTL.
223  */
224 
225 /***
226  * One or more FIFO components may be a pointer that must be
227  * accessed by multiple processes. Since the shared region may
228  * be mmapped differently into each process's address space,
229  * these pointers will be relative to some base address. Here,
230  * we define macros to translate between relative addresses and
231  * virtual addresses.
232  */
233 #define VIRTUAL2RELATIVE(VADDR ) ((long)(VADDR) - (long)mca_btl_smcuda_component.shm_bases[mca_btl_smcuda_component.my_smp_rank])
234 #define RELATIVE2VIRTUAL(OFFSET) ((long)(OFFSET) + (long)mca_btl_smcuda_component.shm_bases[mca_btl_smcuda_component.my_smp_rank])
235 
236 static inline int sm_fifo_init(int fifo_size, mca_mpool_base_module_t *mpool,
237  sm_fifo_t *fifo, int lazy_free)
238 {
239  int i, qsize;
240 
241  /* figure out the queue size (a power of two that is at least 1) */
242  qsize = opal_next_poweroftwo_inclusive (fifo_size);
243 
244  /* allocate the queue in the receiver's address space */
245  fifo->queue_recv = (volatile void **)mpool->mpool_alloc(
246  mpool, sizeof(void *) * qsize, opal_cache_line_size, 0, NULL);
247  if(NULL == fifo->queue_recv) {
248  return OMPI_ERR_OUT_OF_RESOURCE;
249  }
250 
251  /* initialize the queue */
252  for ( i = 0; i < qsize; i++ )
253  fifo->queue_recv[i] = SM_FIFO_FREE;
254 
255  /* shift queue address to be relative */
256  fifo->queue = (volatile void **) VIRTUAL2RELATIVE(fifo->queue_recv);
257 
258  /* initialize the locks */
259  opal_atomic_init(&(fifo->head_lock), OPAL_ATOMIC_UNLOCKED);
260  opal_atomic_init(&(fifo->tail_lock), OPAL_ATOMIC_UNLOCKED);
261  opal_atomic_unlock(&(fifo->head_lock)); /* should be unnecessary */
262  opal_atomic_unlock(&(fifo->tail_lock)); /* should be unnecessary */
263 
264  /* other initializations */
265  fifo->head = 0;
266  fifo->mask = qsize - 1;
267  fifo->tail = 0;
268  fifo->num_to_clear = 0;
269  fifo->lazy_free = lazy_free;
270 
271  return OMPI_SUCCESS;
272 }
273 
274 
275 static inline int sm_fifo_write(void *value, sm_fifo_t *fifo)
276 {
277  volatile void **q = (volatile void **) RELATIVE2VIRTUAL(fifo->queue);
278 
279  /* if there is no free slot to write, report exhausted resource */
280  opal_atomic_rmb();
281  if ( SM_FIFO_FREE != q[fifo->head] )
282  return OMPI_ERR_OUT_OF_RESOURCE;
283 
284  /* otherwise, write to the slot and advance the head index */
285  q[fifo->head] = value;
286  opal_atomic_wmb();
287  fifo->head = (fifo->head + 1) & fifo->mask;
288  return OMPI_SUCCESS;
289 }
290 
291 
292 static inline void *sm_fifo_read(sm_fifo_t *fifo)
293 {
294  void *value;
295 
296  /* read the next queue entry */
297  value = (void *) fifo->queue_recv[fifo->tail];
298 
299  opal_atomic_rmb();
300 
301  /* if you read a non-empty slot, advance the tail pointer */
302  if ( SM_FIFO_FREE != value ) {
303 
304  fifo->tail = ( fifo->tail + 1 ) & fifo->mask;
305  fifo->num_to_clear += 1;
306 
307  /* check if it's time to free slots, which we do lazily */
308  if ( fifo->num_to_clear >= fifo->lazy_free ) {
309  int i = (fifo->tail - fifo->num_to_clear ) & fifo->mask;
310 
311  while ( fifo->num_to_clear > 0 ) {
312  fifo->queue_recv[i] = SM_FIFO_FREE;
313  i = (i+1) & fifo->mask;
314  fifo->num_to_clear -= 1;
315  }
316  opal_atomic_wmb();
317  }
318  }
319 
320  return value;
321 }
322 
323 /**
324  * shared memory component progress.
325  */
326 extern int mca_btl_smcuda_component_progress(void);
327 
328 
329 
330 /**
331  * Register a callback function that is called on error..
332  *
333  * @param btl (IN) BTL module
334  * @return Status indicating if cleanup was successful
335  */
336 
338  struct mca_btl_base_module_t* btl,
340 );
341 
342 /**
343  * Cleanup any resources held by the BTL.
344  *
345  * @param btl BTL instance.
346  * @return OMPI_SUCCESS or error status on failure.
347  */
348 
349 extern int mca_btl_smcuda_finalize(
350  struct mca_btl_base_module_t* btl
351 );
352 
353 
354 /**
355  * PML->BTL notification of change in the process list.
356  * PML->BTL Notification that a receive fragment has been matched.
357  * Called for message that is send from process with the virtual
358  * address of the shared memory segment being different than that of
359  * the receiver.
360  *
361  * @param btl (IN)
362  * @param proc (IN)
363  * @param peer (OUT)
364  * @return OMPI_SUCCESS or error status on failure.
365  *
366  */
367 
368 extern int mca_btl_smcuda_add_procs(
369  struct mca_btl_base_module_t* btl,
370  size_t nprocs,
371  struct ompi_proc_t **procs,
372  struct mca_btl_base_endpoint_t** peers,
373  struct opal_bitmap_t* reachability
374 );
375 
376 
377 /**
378  * PML->BTL notification of change in the process list.
379  *
380  * @param btl (IN) BTL instance
381  * @param proc (IN) Peer process
382  * @param peer (IN) Peer addressing information.
383  * @return Status indicating if cleanup was successful
384  *
385  */
386 extern int mca_btl_smcuda_del_procs(
387  struct mca_btl_base_module_t* btl,
388  size_t nprocs,
389  struct ompi_proc_t **procs,
390  struct mca_btl_base_endpoint_t **peers
391 );
392 
393 
394 /**
395  * Allocate a segment.
396  *
397  * @param btl (IN) BTL module
398  * @param size (IN) Request segment size.
399  */
401  struct mca_btl_base_module_t* btl,
402  struct mca_btl_base_endpoint_t* endpoint,
403  uint8_t order,
404  size_t size,
405  uint32_t flags
406 );
407 
408 /**
409  * Return a segment allocated by this BTL.
410  *
411  * @param btl (IN) BTL module
412  * @param segment (IN) Allocated segment.
413  */
414 extern int mca_btl_smcuda_free(
415  struct mca_btl_base_module_t* btl,
417 );
418 
419 
420 /**
421  * Pack data
422  *
423  * @param btl (IN) BTL module
424  * @param peer (IN) BTL peer addressing
425  */
427  struct mca_btl_base_module_t* btl,
428  struct mca_btl_base_endpoint_t* endpoint,
429  mca_mpool_base_registration_t* registration,
430  struct opal_convertor_t* convertor,
431  uint8_t order,
432  size_t reserve,
433  size_t* size,
434  uint32_t flags
435 );
436 
437 
438 /**
439  * Initiate an inlined send to the peer or return a descriptor.
440  *
441  * @param btl (IN) BTL module
442  * @param peer (IN) BTL peer addressing
443  */
444 extern int mca_btl_smcuda_sendi( struct mca_btl_base_module_t* btl,
445  struct mca_btl_base_endpoint_t* endpoint,
446  struct opal_convertor_t* convertor,
447  void* header,
448  size_t header_size,
449  size_t payload_size,
450  uint8_t order,
451  uint32_t flags,
452  mca_btl_base_tag_t tag,
453  mca_btl_base_descriptor_t** descriptor );
454 
455 /**
456  * Initiate a send to the peer.
457  *
458  * @param btl (IN) BTL module
459  * @param peer (IN) BTL peer addressing
460  */
461 extern int mca_btl_smcuda_send(
462  struct mca_btl_base_module_t* btl,
463  struct mca_btl_base_endpoint_t* endpoint,
464  struct mca_btl_base_descriptor_t* descriptor,
465  mca_btl_base_tag_t tag
466 );
467 
468 #if OMPI_CUDA_SUPPORT
469 /**
470  * Remote get using device memory.
471  */
472 extern int mca_btl_smcuda_get_cuda(struct mca_btl_base_module_t* btl,
473  struct mca_btl_base_endpoint_t* ep,
474  struct mca_btl_base_descriptor_t* descriptor);
475 
476 extern struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_dst(
477  struct mca_btl_base_module_t* btl,
478  struct mca_btl_base_endpoint_t* endpoint,
479  struct mca_mpool_base_registration_t* registration,
480  struct opal_convertor_t* convertor,
481  uint8_t order,
482  size_t reserve,
483  size_t* size,
484  uint32_t flags);
485 #endif /* OMPI_CUDA_SUPPORT */
486 
487 /**
488  * Fault Tolerance Event Notification Function
489  * @param state Checkpoint Stae
490  * @return OMPI_SUCCESS or failure status
491  */
492 int mca_btl_smcuda_ft_event(int state);
493 
494 #if OMPI_ENABLE_PROGRESS_THREADS == 1
495 void mca_btl_smcuda_component_event_thread(opal_object_t*);
496 #endif
497 
498 #if OMPI_ENABLE_PROGRESS_THREADS == 1
499 #define MCA_BTL_SMCUDA_SIGNAL_PEER(peer) \
500 { \
501  unsigned char cmd = DATA; \
502  if(write(peer->fifo_fd, &cmd, sizeof(cmd)) != sizeof(cmd)) { \
503  opal_output(0, "mca_btl_smcuda_send: write fifo failed: errno=%d\n", errno); \
504  } \
505 }
506 #else
507 #define MCA_BTL_SMCUDA_SIGNAL_PEER(peer)
508 #endif
509 
510 END_C_DECLS
511 
512 #endif
513 
size_t max_frag_size
maximum (second and beyone) fragment size
Definition: btl_smcuda.h:136
char ** shm_bases
pointer to base pointers in shared memory
Definition: btl_smcuda.h:140
int sm_free_list_max
maximum size of free lists
Definition: btl_smcuda.h:127
SM BTL Interface.
Definition: btl_smcuda.h:201
int num_outstanding_frags
number of fragments sent but not yet returned to free list
Definition: btl_smcuda.h:163
A descriptor that holds the parameters to a send/put/get operation along w/ a callback routine that i...
Definition: btl.h:275
bool btl_inited
flag indicating if btl has been inited
Definition: btl_smcuda.h:203
int sm_extra_procs
number of extra procs to allow
Definition: btl_smcuda.h:130
int mca_btl_smcuda_component_progress(void)
shared memory component progress.
Definition: btl_smcuda_component.c:412
void opal_atomic_rmb(void)
Read memory barrier.
Definition: opal_bitmap.h:53
mca_btl_base_descriptor_t * mca_btl_smcuda_alloc(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, uint8_t order, size_t size, uint32_t flags)
Allocate a segment.
Definition: btl_smcuda.c:656
int nfifos
number of FIFOs per receiver
Definition: btl_smcuda.h:150
int sm_free_list_inc
number of elements to alloc when growing free lists
Definition: btl_smcuda.h:128
mca_mpool_base_module_t ** sm_mpools
shared memory pools (one for each memory node)
Definition: btl_smcuda.h:132
Definition: common_sm.h:60
void * sm_mpool_base
base address of shared memory pool
Definition: btl_smcuda.h:134
int use_knem
MCA: should we be using knem or not? neg=try but continue if not available, 0=don't try...
Definition: btl_smcuda.h:181
uint16_t * shm_mem_nodes
pointer to mem noded in shared memory
Definition: btl_smcuda.h:141
int mca_btl_smcuda_sendi(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, struct opal_convertor_t *convertor, void *header, size_t header_size, size_t payload_size, uint8_t order, uint32_t flags, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t **descriptor)
Initiate an inlined send to the peer or return a descriptor.
Definition: btl_smcuda.c:821
mca_btl_base_module_t super
base BTL interface
Definition: btl_smcuda.h:202
mca_btl_base_component_2_0_0_t super
base BTL component
Definition: btl_smcuda.h:125
void(* mca_btl_base_module_error_cb_fn_t)(struct mca_btl_base_module_t *btl, int32_t flags, struct ompi_proc_t *errproc, char *btlinfo)
Callback function that is called asynchronously on receipt of an error from the transport layer...
Definition: btl.h:538
Definition: mutex_unix.h:53
int sm_free_list_num
initial size of free lists
Definition: btl_smcuda.h:126
mca_mpool_base_module_t * sm_mpool
shared memory pool
Definition: btl_smcuda.h:118
int mca_btl_smcuda_free(struct mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *segment)
Return a segment allocated by this BTL.
Definition: btl_smcuda.c:684
struct mca_btl_base_descriptor_t * mca_btl_smcuda_prepare_src(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, mca_mpool_base_registration_t *registration, struct opal_convertor_t *convertor, uint8_t order, size_t reserve, size_t *size, uint32_t flags)
Pack data.
Definition: btl_smcuda.c:700
Definition: mpool.h:44
Remote Open MPI process structure.
Definition: proc.h:56
int knem_dma_flag
If we want DMA and DMA is supported, this will be loaded with KNEM_FLAG_DMA.
Definition: btl_smcuda.h:193
Volatile lock object (with optional padding).
Definition: atomic.h:102
static void opal_atomic_unlock(opal_atomic_lock_t *lock)
Release a lock.
sm_fifo_t ** fifo
cached copy of the pointer to the 2D fifo array.
Definition: btl_smcuda.h:142
int32_t sm_max_procs
upper limit on the number of processes using the shared memory pool
Definition: btl_smcuda.h:129
Definition: opal_free_list.h:47
ompi_free_list_t sm_first_frags_to_progress
list of first fragments that are awaiting resources
Definition: btl_smcuda.h:157
uint32_t knem_dma_min
MCA: minimal message size (bytes) to offload on DMA engine when using knem.
Definition: btl_smcuda.h:185
Definition: btl_smcuda.h:214
Definition: opal_free_list.h:31
int mca_btl_smcuda_finalize(struct mca_btl_base_module_t *btl)
Cleanup any resources held by the BTL.
Definition: btl_smcuda.c:632
size_t eager_limit
first fragment size
Definition: btl_smcuda.h:135
Byte Transfer Layer (BTL)
int32_t num_smp_procs
current number of smp procs on this host
Definition: btl_smcuda.h:151
int knem_max_simultaneous
MCA: how many simultaneous ongoing knem operations to support.
Definition: btl_smcuda.h:189
Definition: ompi_free_list.h:39
int num_pending_sends
total number on all of my pending-send queues
Definition: btl_smcuda.h:164
ompi_free_list_t sm_frags_max
free list of sm second
Definition: btl_smcuda.h:155
volatile sm_fifo_t ** shm_fifo
pointer to fifo 2D array in shared memory
Definition: btl_smcuda.h:139
State of ELAN endpoint connection.
Definition: btl_elan_endpoint.h:33
size_t fifo_size
number of FIFO queue entries
Definition: btl_smcuda.h:148
int mca_btl_smcuda_add_procs(struct mca_btl_base_module_t *btl, size_t nprocs, struct ompi_proc_t **procs, struct mca_btl_base_endpoint_t **peers, struct opal_bitmap_t *reachability)
PML->BTL notification of change in the process list.
Definition: btl_smcuda.c:458
BTL component descriptor.
Definition: btl.h:411
Base object.
Definition: opal_object.h:182
Definition: opal_convertor.h:90
uint8_t order
order value, this is only valid in the local completion callback and may be used in subsequent calls ...
Definition: btl.h:292
mca_common_sm_module_t * sm_seg
description of shared memory segment
Definition: btl_smcuda.h:138
void opal_atomic_wmb(void)
Write memory barrier.
static void opal_atomic_init(opal_atomic_lock_t *lock, int32_t value)
Initialize a lock to value.
ompi_free_list_t sm_frags_eager
free list of sm first
Definition: btl_smcuda.h:154
Definition: threads.h:46
int mca_btl_smcuda_send(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, struct mca_btl_base_descriptor_t *descriptor, mca_btl_base_tag_t tag)
Initiate a send to the peer.
Definition: btl_smcuda.c:906
char * sm_mpool_name
name of shared memory pool module
Definition: btl_smcuda.h:131
Shared Memory (SM) BTL module.
Definition: btl_smcuda.h:124
mca_mpool_base_module_alloc_fn_t mpool_alloc
allocate function
Definition: mpool.h:177
uint16_t * mem_nodes
cached copy of mem nodes of each local rank
Definition: btl_smcuda.h:147
int mca_btl_smcuda_register_error_cb(struct mca_btl_base_module_t *btl, mca_btl_base_module_error_cb_fn_t cbfunc)
Register a callback function that is called on error.
Definition: btl_smcuda.c:641
Definition: btl_smcuda.h:117
int mca_btl_smcuda_del_procs(struct mca_btl_base_module_t *btl, size_t nprocs, struct ompi_proc_t **procs, struct mca_btl_base_endpoint_t **peers)
PML->BTL notification of change in the process list.
Definition: btl_smcuda.c:609
Definition: btl_sm.h:86
int32_t my_smp_rank
My SMP process rank.
Definition: btl_smcuda.h:152
BTL module interface functions and attributes.
Definition: btl.h:786
size_t fifo_lazy_free
number of reads before lazy fifo free is triggered
Definition: btl_smcuda.h:149
mca_mpool_base_module_t * sm_mpool
mpool on local node
Definition: btl_smcuda.h:133
int mca_btl_smcuda_ft_event(int state)
Fault Tolerance Event Notification Function.
Definition: btl_smcuda.c:1068
shared memory send fragment derived type.
Definition: btl_smcuda_frag.h:52
mpool module descriptor.
Definition: mpool.h:174