OpenMPI  0.1.1
btl.h
Go to the documentation of this file.
1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 /*
3  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
4  * University Research and Technology
5  * Corporation. All rights reserved.
6  * Copyright (c) 2004-2008 The University of Tennessee and The University
7  * of Tennessee Research Foundation. All rights
8  * reserved.
9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10  * University of Stuttgart. All rights reserved.
11  * Copyright (c) 2004-2005 The Regents of the University of California.
12  * All rights reserved.
13  * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
14  * reserved.
15  * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
16  * Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
17  * $COPYRIGHT$
18  *
19  * Additional copyrights may follow
20  *
21  * $HEADER$
22  */
23 /**
24  * @file
25  *
26  * Byte Transfer Layer (BTL)
27  *
28  *
29  * BTL Initialization:
30  *
31  * During library initialization, all available BTL components are
32  * loaded and opened via their mca_base_open_component_fn_t
33  * function. The BTL open function should register any mca parameters
34  * used to tune/adjust the behaviour of the BTL (mca_base_param_register_int(),
35  * mca_base_param_register_string()). Note that the open function may fail
36  * if the resources (e.g. shared libraries, etc) required by the network
37  * transport are not available.
38  *
39  * The mca_btl_base_component_init_fn_t() is then called for each of the
40  * components that are succesfully opened. The component init function may
41  * return either:
42  *
43  * (1) a NULL list of BTL modules if the transport is not available,
44  * (2) a list containing a one or more single BTL modules, where the BTL provides
45  * a layer of abstraction over one or more physical devices (e.g. NICs),
46  *
47  * During module initialization, the module should post any addressing
48  * information required by its peers. An example would be the TCP
49  * listen port opened by the TCP module for incoming connection
50  * requests. This information is published to peers via the
51  * ompi_modex_send() interface. Note that peer information is not
52  * guaranteed to be available via ompi_modex_recv() during the
53  * module's init function. However, it will be available during
54  * BTL selection (mca_btl_base_add_proc_fn_t()).
55  *
56  * BTL Selection:
57  *
58  * The upper layer builds an ordered list of the available BTL modules sorted
59  * by their exclusivity ranking. This is a relative ranking that is used
60  * to determine the set of BTLs that may be used to reach a given destination.
61  * During startup the BTL modules are queried via their
62  * mca_btl_base_add_proc_fn_t() to determine if they are able to reach
63  * a given destination. The BTL module with the highest ranking that
64  * returns success is selected. Subsequent BTL modules are selected only
65  * if they have the same exclusivity ranking.
66  *
67  * An example of how this might be used:
68  *
69  * BTL Exclusivity Comments
70  * -------- ----------- ------------------
71  * LO 100 Selected exclusively for local process
72  * SM 50 Selected exclusively for other processes on host
73  * IB 0 Selected based on network reachability
74  * IB 0 Selected based on network reachability
75  * TCP 0 Selected based on network reachability
76  * TCP 0 Selected based on network reachability
77  *
78  * When mca_btl_base_add_proc_fn_t() is called on a BTL module, the BTL
79  * will populate an OUT variable with mca_btl_base_endpoint_t pointers.
80  * Each pointer is treated as an opaque handle by the upper layer and is
81  * returned to the BTL on subsequent data transfer calls to the
82  * corresponding destination process. The actual contents of the
83  * data structure are defined on a per BTL basis, and may be used to
84  * cache addressing or connection information, such as a TCP socket
85  * or IB queue pair.
86  *
87  * Progress:
88  *
89  * By default, the library provides for polling based progress of outstanding
90  * requests. The BTL component exports an interface function (btl_progress)
91  * that is called in a polling mode by the PML during calls into the MPI
92  * library. Note that the btl_progress() function is called on the BTL component
93  * rather than each BTL module. This implies that the BTL author is responsible
94  * for iterating over the pending operations in each of the BTL modules associated
95  * with the component.
96  *
97  * On platforms where threading support is provided, the library provides the
98  * option of building with asynchronous threaded progress. In this case, the BTL
99  * author is responsible for providing a thread to progress pending operations.
100  * A thread is associated with the BTL component/module such that transport specific
101  * functionality/APIs may be used to block the thread until a pending operation
102  * completes. This thread MUST NOT poll for completion as this would oversubscribe
103  * the CPU.
104  *
105  * Note that in the threaded case the PML may choose to use a hybrid approach,
106  * such that polling is implemented from the user thread for a fixed number of
107  * cycles before relying on the background thread(s) to complete requests. If
108  * possible the BTL should support the use of both modes concurrently.
109  *
110  */
111 
112 #ifndef MCA_BTL_H
113 #define MCA_BTL_H
114 
115 #include "ompi_config.h"
116 #include "opal/mca/mca.h"
117 #include "opal/class/opal_bitmap.h"
118 #include "opal/datatype/opal_convertor.h"
119 #include "opal/prefetch.h" /* For OPAL_LIKELY */
120 #include "ompi/mca/mpool/mpool.h"
121 #include "ompi/types.h"
122 #include "opal/types.h"
123 
124 #include "opal/mca/crs/crs.h"
125 #include "opal/mca/crs/base/base.h"
126 
127 BEGIN_C_DECLS
128 
129 /*
130  * BTL types
131  */
132 
133 struct mca_btl_base_module_t;
137 struct ompi_proc_t;
138 
139 
140 /* send/recv operations require tag matching */
141 typedef uint8_t mca_btl_base_tag_t;
142 
143 #define MCA_BTL_NO_ORDER 255
144 
145 /*
146  * Communication specific defines. There are a number of active message ID
147  * that can be shred between all frameworks that need to communicate (i.e.
148  * use the PML or the BTL directly). These ID are exchanged between the
149  * processes, therefore they need to be identical everywhere. The simplest
150  * approach is to have them defined as constants, and give each framework a
151  * small number. Here is the rule that defines these ID (they are 8 bits):
152  * - the first 3 bits are used to code the framework (i.e. PML, OSC, COLL)
153  * - the remaining 5 bytes are used internally by the framework, and divided
154  * based on the components requirements. Therefore, the way the PML and
155  * the OSC frameworks use these defines will be different. For more
156  * information about how these framework ID are defined, take a look in the
157  * header file associated with the framework.
158  */
159 #define MCA_BTL_AM_FRAMEWORK_MASK 0xD0
160 #define MCA_BTL_TAG_BTL 0x20
161 #define MCA_BTL_TAG_FT 0x30
162 #define MCA_BTL_TAG_PML 0x40
163 #define MCA_BTL_TAG_OSC_RDMA 0x60
164 #define MCA_BTL_TAG_USR 0x80
165 #define MCA_BTL_TAG_MAX 255 /* 1 + highest allowed tag num */
166 
167 /*
168  * Reserved tags for specific BTLs. As multiple BTLs can be active
169  * simultaneously, their tags should not collide.
170  */
171 #define MCA_BTL_TAG_IB (MCA_BTL_TAG_BTL + 0)
172 #define MCA_BTL_TAG_UDAPL (MCA_BTL_TAG_BTL + 1)
173 
174 /* prefered protocol */
175 #define MCA_BTL_FLAGS_SEND 0x0001
176 #define MCA_BTL_FLAGS_PUT 0x0002
177 #define MCA_BTL_FLAGS_GET 0x0004
178 #define MCA_BTL_FLAGS_RDMA (MCA_BTL_FLAGS_GET|MCA_BTL_FLAGS_PUT)
179 
180 /* btl can send directly from user buffer w/out registration */
181 #define MCA_BTL_FLAGS_SEND_INPLACE 0x0008
182 
183 /* btl transport reliability flags - currently used only by the DR PML */
184 #define MCA_BTL_FLAGS_NEED_ACK 0x0010
185 #define MCA_BTL_FLAGS_NEED_CSUM 0x0020
186 
187 /** RDMA put/get calls must have a matching prepare_{src,dst} call
188  on the target with the same base (and possibly bound). */
189 #define MCA_BTL_FLAGS_RDMA_MATCHED 0x0040
190 
191 /* btl needs local rdma completion */
192 #define MCA_BTL_FLAGS_RDMA_COMPLETION 0x0080
193 
194  /* btl can do heterogeneous rdma operations on byte buffers */
195 #define MCA_BTL_FLAGS_HETEROGENEOUS_RDMA 0x0100
196 
197 /* btl can support failover if enabled */
198 #define MCA_BTL_FLAGS_FAILOVER_SUPPORT 0x0200
199 
200 #define MCA_BTL_FLAGS_CUDA_PUT 0x0400
201 #define MCA_BTL_FLAGS_CUDA_GET 0x0800
202 #define MCA_BTL_FLAGS_CUDA_RDMA (MCA_BTL_FLAGS_CUDA_GET|MCA_BTL_FLAGS_CUDA_PUT)
203 
204 /* Default exclusivity levels */
205 #define MCA_BTL_EXCLUSIVITY_HIGH (64*1024) /* internal loopback */
206 #define MCA_BTL_EXCLUSIVITY_DEFAULT 1024 /* GM/IB/etc. */
207 #define MCA_BTL_EXCLUSIVITY_LOW 0 /* TCP used as a last resort */
208 
209 /* error callback flags */
210 #define MCA_BTL_ERROR_FLAGS_FATAL 0x1
211 #define MCA_BTL_ERROR_FLAGS_NONFATAL 0x2
212 
213 /**
214  * Asynchronous callback function on completion of an operation.
215  * Completion Semantics: The descriptor can be reused or returned to the
216  * BTL via mca_btl_base_module_free_fn_t. The operation has been queued to
217  * the network device or will otherwise make asynchronous progress without
218  * subsequent calls to btl_progress.
219  *
220  * @param[IN] module the BTL module
221  * @param[IN] endpoint the BTL endpoint
222  * @param[IN] descriptor the BTL descriptor
223  *
224  */
226  struct mca_btl_base_module_t* module,
227  struct mca_btl_base_endpoint_t* endpoint,
228  struct mca_btl_base_descriptor_t* descriptor,
229  int status);
230 
231 /**
232  * Describes a region/segment of memory that is addressable
233  * by an BTL.
234  */
235 
237  /** Address of the memory */
239  /** Length in bytes */
240  uint32_t seg_len;
241 #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
242  /** Heterogeneous padding */
243  uint8_t seg_padding[4];
244 #endif
245  /** Memory segment key required by some RDMA networks */
246  union {
247  uint32_t key32[4];
248  uint64_t key64[2];
249  uint8_t key8[16];
250 #if OMPI_CUDA_SUPPORT
251  uint8_t cudakey[128]; /* 64 bytes for CUDA mem handle, 64 bytes for CUDA event handle */
252 #endif /* OMPI_CUDA_SUPPORT */
253  } seg_key;
254 #if OMPI_CUDA_SUPPORT
255  /** Address of the entire memory handle */
256  ompi_ptr_t memh_seg_addr;
257  /** Length in bytes of entire memory handle */
258  uint32_t memh_seg_len;
259 #endif /* OMPI_CUDA_SUPPORT */
260 };
262 
263 /**
264  * A descriptor that holds the parameters to a send/put/get
265  * operation along w/ a callback routine that is called on
266  * completion of the request.
267  * Note: from the initiator of a PUT operation des_src is the local memory
268  * and des_dst is the remote memory
269  * from the initiator of a GET operations des_dst is the local memory
270  * and des_src is the remote memory
271  * from the initiator of a SEND operation des_src is the local memory
272  * and des_dst is not used
273  */
274 
276  ompi_free_list_item_t super;
277  mca_btl_base_segment_t *des_src; /**< source segments */
278  size_t des_src_cnt; /**< number of source segments */
279  mca_btl_base_segment_t *des_dst; /**< destination segments */
280  size_t des_dst_cnt; /**< number of destination segments */
281  mca_btl_base_completion_fn_t des_cbfunc; /**< local callback function */
282  void* des_cbdata; /**< opaque callback data */
283  void* des_context; /**< more opaque callback data */
284  uint32_t des_flags; /**< hints to BTL */
285  /** order value, this is only
286  valid in the local completion callback
287  and may be used in subsequent calls to
288  btl_alloc, btl_prepare_src/dst to request
289  a descriptor that will be ordered w.r.t.
290  this descriptor
291  */
292  uint8_t order;
293 };
295 
297 
298 #define MCA_BTL_DES_FLAGS_PRIORITY 0x0001
299 /* Allow the BTL to dispose the descriptor once the callback
300  * associated was triggered.
301  */
302 #define MCA_BTL_DES_FLAGS_BTL_OWNERSHIP 0x0002
303 /* Allow the BTL to avoid calling the descriptor callback
304  * if the send succeded in the btl_send (i.e in the fast path).
305  */
306 #define MCA_BTL_DES_SEND_ALWAYS_CALLBACK 0x0004
307 
308 /* Type of transfer that will be done with this frag.
309  */
310 #define MCA_BTL_DES_FLAGS_PUT 0x0010
311 #define MCA_BTL_DES_FLAGS_GET 0x0020
312 
313 /**
314  * Maximum number of allowed segments in src/dst fields of a descriptor.
315  */
316 #define MCA_BTL_DES_MAX_SEGMENTS 16
317 
318 /*
319  * BTL base header, stores the tag at a minimum
320  */
322  mca_btl_base_tag_t tag;
323 };
325 
326 #define MCA_BTL_BASE_HEADER_HTON(hdr)
327 #define MCA_BTL_BASE_HEADER_NTOH(hdr)
328 
329 /*
330  * BTL component interface functions and datatype.
331  */
332 
333 /**
334  * MCA->BTL Initializes the BTL component and creates specific BTL
335  * module(s).
336  *
337  * @param num_btls (OUT) Returns the number of btl modules created, or 0
338  * if the transport is not available.
339  *
340  * @param enable_progress_threads (IN) Whether this component is
341  * allowed to run a hidden/progress thread or not.
342  *
343  * @param enable_mpi_threads (IN) Whether support for multiple MPI
344  * threads is enabled or not (i.e., MPI_THREAD_MULTIPLE), which
345  * indicates whether multiple threads may invoke this component
346  * simultaneously or not.
347  *
348  * @return Array of pointers to BTL modules, or NULL if the transport
349  * is not available.
350  *
351  * During component initialization, the BTL component should discover
352  * the physical devices that are available for the given transport,
353  * and create a BTL module to represent each device. Any addressing
354  * information required by peers to reach the device should be published
355  * during this function via the ompi_modex_send() interface.
356  *
357  */
358 
359 typedef struct mca_btl_base_module_t** (*mca_btl_base_component_init_fn_t)(
360  int *num_btls,
361  bool enable_progress_threads,
362  bool enable_mpi_threads
363 );
364 
365 /**
366  * MCA->BTL Called to progress outstanding requests for
367  * non-threaded polling environments.
368  *
369  * @return Count of "completions", a metric of
370  * how many items where completed in the call
371  * to progress.
372  */
373 
375 
376 
377 /**
378  * Callback function that is called asynchronously on receipt
379  * of data by the transport layer.
380  * Note that the the mca_btl_base_descriptor_t is only valid within the
381  * completion function, this implies that all data payload in the
382  * mca_btl_base_descriptor_t must be copied out within this callback or
383  * forfeited back to the BTL.
384  *
385  * @param[IN] btl BTL module
386  * @param[IN] tag The active message receive callback tag value
387  * @param[IN] descriptor The BTL descriptor (contains the receive payload)
388  * @param[IN] cbdata Opaque callback data
389  */
390 
392  struct mca_btl_base_module_t* btl,
393  mca_btl_base_tag_t tag,
394  mca_btl_base_descriptor_t* descriptor,
395  void* cbdata
396 );
397 
400  void* cbdata;
402 
403 OMPI_DECLSPEC extern
404 mca_btl_active_message_callback_t mca_btl_base_active_message_trigger[MCA_BTL_TAG_MAX];
405 
406 /**
407  * BTL component descriptor. Contains component version information
408  * and component open/close/init functions.
409  */
410 
412  mca_base_component_t btl_version;
413  mca_base_component_data_t btl_data;
416 };
419 
420 /* add the 1_0_0_t typedef for source compatibility
421  * we can do this safely because 1_0_0 components are the same as
422  * 1_0_1 components, the difference is in the btl module.
423  * Fortunately the only difference in the module is an additional interface
424  * function added to 1_0_1. We can therefore safely treat an older module just
425  * just like the new one so long as we check the component version
426  * prior to invoking the new interface function.
427  */
430 
431 
432 
433 /*
434  * BTL module interface functions and datatype.
435  */
436 
437 /**
438  * MCA->BTL Clean up any resources held by BTL module
439  * before the module is unloaded.
440  *
441  * @param btl (IN) BTL module.
442  * @return OMPI_SUCCESS or error status on failure.
443  *
444  * Prior to unloading a BTL module, the MCA framework will call
445  * the BTL finalize method of the module. Any resources held by
446  * the BTL should be released and if required the memory corresponding
447  * to the BTL module freed.
448  *
449  */
451  struct mca_btl_base_module_t* btl
452 );
453 
454 /**
455  * BML->BTL notification of change in the process list.
456  *
457  * @param btl (IN) BTL module
458  * @param nprocs (IN) Number of processes
459  * @param procs (IN) Array of processes
460  * @param endpoint (OUT) Array of mca_btl_base_endpoint_t structures by BTL.
461  * @param reachable (OUT) Bitmask indicating set of peer processes that are reachable by this BTL.
462  * @return OMPI_SUCCESS or error status on failure.
463  *
464  * The mca_btl_base_module_add_procs_fn_t() is called by the BML to
465  * determine the set of BTLs that should be used to reach each process.
466  * Any addressing information exported by the peer via the ompi_modex_send()
467  * function should be available during this call via the corresponding
468  * ompi_modex_recv() function. The BTL may utilize this information to
469  * determine reachability of each peer process.
470  *
471  * For each process that is reachable by the BTL, the bit corresponding to the index
472  * into the proc array (nprocs) should be set in the reachable bitmask. The BTL
473  * will return an array of pointers to a data structure defined
474  * by the BTL that is then returned to the BTL on subsequent calls to the BTL data
475  * transfer functions (e.g btl_send). This may be used by the BTL to cache any addressing
476  * or connection information (e.g. TCP socket, IB queue pair).
477  */
479  struct mca_btl_base_module_t* btl,
480  size_t nprocs,
481  struct ompi_proc_t** procs,
482  struct mca_btl_base_endpoint_t** endpoints,
483  struct opal_bitmap_t* reachable
484 );
485 
486 /**
487  * Notification of change to the process list.
488  *
489  * @param btl (IN) BTL module
490  * @param nprocs (IN) Number of processes
491  * @param proc (IN) Set of processes
492  * @param peer (IN) Set of peer addressing information.
493  * @return Status indicating if cleanup was successful
494  *
495  * When the process list changes, the BML notifies the BTL of the
496  * change, to provide the opportunity to cleanup or release any
497  * resources associated with the peer.
498  */
500  struct mca_btl_base_module_t* btl,
501  size_t nprocs,
502  struct ompi_proc_t** procs,
503  struct mca_btl_base_endpoint_t** peer
504 );
505 
506 /**
507  * Register a callback function that is called on receipt
508  * of a fragment.
509  *
510  * @param[IN] btl BTL module
511  * @param[IN] tag tag value of this callback
512  * (specified on subsequent send operations)
513  * @param[IN] cbfunc The callback function
514  * @param[IN] cbdata Opaque callback data
515  *
516  * @return OMPI_SUCCESS The callback was registered successfully
517  * @return OMPI_ERROR The callback was NOT registered successfully
518  *
519  */
521  struct mca_btl_base_module_t* btl,
522  mca_btl_base_tag_t tag,
524  void* cbdata
525 );
526 
527 
528 /**
529  * Callback function that is called asynchronously on receipt
530  * of an error from the transport layer
531  *
532  * @param[IN] btl BTL module
533  * @param[IN] flags type of error
534  * @param[IN] errproc process that had an error
535  * @param[IN] btlinfo descriptive string from the BTL
536  */
537 
539  struct mca_btl_base_module_t* btl,
540  int32_t flags,
541  struct ompi_proc_t* errproc,
542  char* btlinfo
543 );
544 
545 
546 /**
547  * Register a callback function that is called on receipt
548  * of an error.
549  *
550  * @param[IN] btl BTL module
551  * @param[IN] cbfunc The callback function
552  *
553  * @return OMPI_SUCCESS The callback was registered successfully
554  * @return OMPI_ERROR The callback was NOT registered successfully
555  *
556  */
558  struct mca_btl_base_module_t* btl,
560 );
561 
562 
563 /**
564  * Allocate a descriptor with a segment of the requested size.
565  * Note that the BTL layer may choose to return a smaller size
566  * if it cannot support the request. The order tag value ensures that
567  * operations on the descriptor that is allocated will be
568  * ordered w.r.t. a previous operation on a particular descriptor.
569  * Ordering is only guaranteed if the previous descriptor had its
570  * local completion callback function called and the order tag of
571  * that descriptor is only valid upon the local completion callback function.
572  *
573  *
574  * @param btl (IN) BTL module
575  * @param size (IN) Request segment size.
576  * @param order (IN) The ordering tag (may be MCA_BTL_NO_ORDER)
577  */
578 
579 typedef mca_btl_base_descriptor_t* (*mca_btl_base_module_alloc_fn_t)(
580  struct mca_btl_base_module_t* btl,
581  struct mca_btl_base_endpoint_t* endpoint,
582  uint8_t order,
583  size_t size,
584  uint32_t flags
585 );
586 
587 /**
588  * Return a descriptor allocated from this BTL via alloc/prepare.
589  * A descriptor can only be deallocated after its local completion
590  * callback function has called for all send/put/get operations.
591  *
592  * @param btl (IN) BTL module
593  * @param segment (IN) Descriptor allocated from the BTL
594  */
596  struct mca_btl_base_module_t* btl,
597  mca_btl_base_descriptor_t* descriptor
598 );
599 
600 
601 /**
602  * Prepare a descriptor for send/put/get using the supplied
603  * convertor. If the convertor references data that is contiguous,
604  * the descriptor may simply point to the user buffer. Otherwise,
605  * this routine is responsible for allocating buffer space and
606  * packing if required.
607  *
608  * The descriptor returned can be used in multiple concurrent operations
609  * (send/put/get) unless the BTL has the MCA_BTL_FLAGS_RDMA_MATCHED flag set
610  * in which case a corresponding prepare call must accompany the put/get call
611  * in addition, the address and length that is put/get must match the address
612  * and length which is prepared.
613  *
614  * The order tag value ensures that operations on the
615  * descriptor that is prepared will be ordered w.r.t. a previous
616  * operation on a particular descriptor. Ordering is only guaranteed if
617  * the previous descriptor had its local completion callback function
618  * called and the order tag of that descriptor is only valid upon the local
619  * completion callback function.
620  *
621  * @param btl (IN) BTL module
622  * @param endpoint (IN) BTL peer addressing
623  * @param registration (IN) Memory registration
624  * @param convertor (IN) Data type convertor
625  * @param order (IN) The ordering tag (may be MCA_BTL_NO_ORDER)
626  * @param reserve (IN) Additional bytes requested by upper layer to precede user data
627  * @param size (IN/OUT) Number of bytes to prepare (IN),
628  * number of bytes actually prepared (OUT)
629  *
630  */
631 typedef struct mca_btl_base_descriptor_t* (*mca_btl_base_module_prepare_fn_t)(
632  struct mca_btl_base_module_t* btl,
633  struct mca_btl_base_endpoint_t* endpoint,
634  mca_mpool_base_registration_t* registration,
635  struct opal_convertor_t* convertor,
636  uint8_t order,
637  size_t reserve,
638  size_t* size,
639  uint32_t flags
640 );
641 
642 /**
643  * Initiate an asynchronous send.
644  * Completion Semantics: the descriptor has been queued for a send operation
645  * the BTL now controls the descriptor until local
646  * completion callback is made on the descriptor
647  *
648  * All BTLs allow multiple concurrent asynchronous send operations on a descriptor
649  *
650  * @param btl (IN) BTL module
651  * @param endpoint (IN) BTL addressing information
652  * @param descriptor (IN) Description of the data to be transfered
653  * @param tag (IN) The tag value used to notify the peer.
654  *
655  * @retval OMPI_SUCCESS The descriptor was successfully queued for a send
656  * @retval OMPI_ERROR The descriptor was NOT successfully queued for a send
657  * @retval OMPI_ERR_UNREACH The endpoint is not reachable
658  */
660  struct mca_btl_base_module_t* btl,
661  struct mca_btl_base_endpoint_t* endpoint,
662  struct mca_btl_base_descriptor_t* descriptor,
663  mca_btl_base_tag_t tag
664 );
665 
666 /**
667  * Initiate an immediate blocking send.
668  * Completion Semantics: the BTL will make a best effort
669  * to send the header and "size" bytes from the datatype using the convertor.
670  * The header is guaranteed to be delivered entirely in the first segment.
671  * Should the BTL be unable to deliver the data due to resource constraints
672  * the BTL will return a descriptor (via the OUT param)
673  * of size "payload_size + header_size".
674  *
675  * @param btl (IN) BTL module
676  * @param endpoint (IN) BTL addressing information
677  * @param convertor (IN) Data type convertor
678  * @param header (IN) Pointer to header.
679  * @param header_size (IN) Size of header.
680  * @param payload_size (IN) Size of payload (from convertor).
681  * @param order (IN) The ordering tag (may be MCA_BTL_NO_ORDER)
682  * @param flags (IN) Flags.
683  * @param tag (IN) The tag value used to notify the peer.
684  * @param descriptor (OUT) The descriptor to be returned unable to be sent immediately
685 
686  * @retval OMPI_SUCCESS The send was successfully queued
687  * @retval OMPI_ERROR The send failed
688  * @retval OMPI_ERR_UNREACH The endpoint is not reachable
689  * @retval OMPI_ERR_RESOURCE_BUSY The BTL is busy a descriptor will be returned
690  * (via the OUT param) if descriptors are available
691 
692  */
693 
695  struct mca_btl_base_module_t* btl,
696  struct mca_btl_base_endpoint_t* endpoint,
697  struct opal_convertor_t* convertor,
698  void* header,
699  size_t header_size,
700  size_t payload_size,
701  uint8_t order,
702  uint32_t flags,
703  mca_btl_base_tag_t tag,
704  mca_btl_base_descriptor_t** descriptor
705  );
706 
707 /**
708  * Initiate an asynchronous put.
709  * Completion Semantics: the descriptor has been queued for a put operation
710  * the BTL now controls the descriptor until local
711  * completion callback is made on the descriptor
712  *
713  * BTLs that do not have the MCA_BTL_FLAGS_RDMA_MATCHED flag set
714  * allow multiple concurrent put operations on the same descriptor.
715  * BTLs that do have the MCA_BTL_FLAGS_RDMA_MATCHED flag set require
716  * a corresponding prepare_src/dst call for each put operation and
717  * therefore prohibit multiple concurrent put operations.
718  *
719  * @param btl (IN) BTL module
720  * @param endpoint (IN) BTL addressing information
721  * @param descriptor (IN) Description of the data to be transferred
722  *
723  * @retval OMPI_SUCCESS The descriptor was successfully queued for a put
724  * @retval OMPI_ERROR The descriptor was NOT successfully queued for a put
725  */
726 
728  struct mca_btl_base_module_t* btl,
729  struct mca_btl_base_endpoint_t* endpoint,
730  struct mca_btl_base_descriptor_t* descriptor
731 );
732 
733 /**
734  * Initiate an asynchronous get.
735  *
736  * Completion Semantics: the descriptor has been queued for a get operation
737  * the BTL now controls the descriptor until local
738  * completion callback is made on the descriptor
739  *
740  * BTLs that do not have the MCA_BTL_FLAGS_RDMA_MATCHED flag set
741  * allow multiple concurrent get operations on the same descriptor.
742  * BTLs that do have the MCA_BTL_FLAGS_RDMA_MATCHED flag set require
743  * a corresponding prepare_src/dst call for each get operation and
744  * therefore prohibit multiple concurrent get operations.
745  *
746  * @param btl (IN) BTL module
747  * @param endpoint (IN) BTL addressing information
748  * @param descriptor (IN) Description of the data to be transferred
749  *
750  * @retval OMPI_SUCCESS The descriptor was successfully queued for a get
751  * @retval OMPI_ERROR The descriptor was NOT successfully queued for a get
752  *
753  */
754 
756  struct mca_btl_base_module_t* btl,
757  struct mca_btl_base_endpoint_t* endpoint,
758  struct mca_btl_base_descriptor_t* descriptor
759 );
760 
761 
762 /**
763  * Diagnostic dump of btl state.
764  *
765  * @param btl (IN) BTL module
766  * @param endpoint (IN) BTL endpoint
767  * @param verbose (IN) Verbosity level
768  */
769 
771  struct mca_btl_base_module_t* btl,
772  struct mca_btl_base_endpoint_t* endpoint,
773  int verbose
774 );
775 
776 /**
777  * Fault Tolerance Event Notification Function
778  * @param state Checkpoint Status
779  * @return OMPI_SUCCESS or failure status
780  */
781 typedef int (*mca_btl_base_module_ft_event_fn_t)(int state);
782 
783 /**
784  * BTL module interface functions and attributes.
785  */
787 
788  /* BTL common attributes */
789  mca_btl_base_component_t* btl_component; /**< pointer back to the BTL component structure */
790  size_t btl_eager_limit; /**< maximum size of first fragment -- eager send */
791  size_t btl_rndv_eager_limit; /**< the size of a data sent in a first fragment of rendezvous protocol */
792  size_t btl_max_send_size; /**< maximum send fragment size supported by the BTL */
793  size_t btl_rdma_pipeline_send_length; /**< amount of bytes that should be send by pipeline protocol */
794  size_t btl_rdma_pipeline_frag_size; /**< maximum rdma fragment size supported by the BTL */
795  size_t btl_min_rdma_pipeline_size; /**< minimum packet size for pipeline protocol */
796  uint32_t btl_exclusivity; /**< indicates this BTL should be used exclusively */
797  uint32_t btl_latency; /**< relative ranking of latency used to prioritize btls */
798  uint32_t btl_bandwidth; /**< bandwidth (Mbytes/sec) supported by each endpoint */
799  uint32_t btl_flags; /**< flags (put/get...) */
800 
801  /* BTL function table */
806 
809  mca_btl_base_module_prepare_fn_t btl_prepare_src;
810  mca_btl_base_module_prepare_fn_t btl_prepare_dst;
816 
817  /** the mpool associated with this btl (optional) */
819  /** register a default error handler */
821  /** fault tolerant even notification */
823 };
825 
826 /*
827  * Macro for use in modules that are of type btl v2.0.1
828  */
829 #define MCA_BTL_BASE_VERSION_2_0_0 \
830  MCA_BASE_VERSION_2_0_0, \
831  "btl", 2, 0, 0
832 
833 END_C_DECLS
834 
835 #endif /* OMPI_MCA_BTL_H */
size_t btl_rdma_pipeline_send_length
amount of bytes that should be send by pipeline protocol
Definition: btl.h:793
uint32_t btl_latency
relative ranking of latency used to prioritize btls
Definition: btl.h:797
int(* mca_btl_base_module_free_fn_t)(struct mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *descriptor)
Return a descriptor allocated from this BTL via alloc/prepare.
Definition: btl.h:595
uint32_t btl_exclusivity
indicates this BTL should be used exclusively
Definition: btl.h:796
size_t des_dst_cnt
number of destination segments
Definition: btl.h:280
Common type for all MCA components.
Definition: mca.h:250
void(* mca_btl_base_completion_fn_t)(struct mca_btl_base_module_t *module, struct mca_btl_base_endpoint_t *endpoint, struct mca_btl_base_descriptor_t *descriptor, int status)
Asynchronous callback function on completion of an operation.
Definition: btl.h:225
mca_btl_base_descriptor_t *(* mca_btl_base_module_alloc_fn_t)(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, uint8_t order, size_t size, uint32_t flags)
Allocate a descriptor with a segment of the requested size.
Definition: btl.h:579
A descriptor that holds the parameters to a send/put/get operation along w/ a callback routine that i...
Definition: btl.h:275
Definition: btl.h:321
uint32_t btl_flags
flags (put/get...)
Definition: btl.h:799
uint32_t des_flags
hints to BTL
Definition: btl.h:284
void * des_cbdata
opaque callback data
Definition: btl.h:282
void(* mca_btl_base_module_recv_cb_fn_t)(struct mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t *descriptor, void *cbdata)
Callback function that is called asynchronously on receipt of data by the transport layer...
Definition: btl.h:391
Definition: opal_bitmap.h:53
void(* mca_btl_base_module_dump_fn_t)(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, int verbose)
Diagnostic dump of btl state.
Definition: btl.h:770
struct mca_btl_base_module_t **(* mca_btl_base_component_init_fn_t)(int *num_btls, bool enable_progress_threads, bool enable_mpi_threads)
MCA->BTL Initializes the BTL component and creates specific BTL module(s).
Definition: btl.h:359
ompi_ptr_t seg_addr
Address of the memory.
Definition: btl.h:238
int(* mca_btl_base_module_put_fn_t)(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, struct mca_btl_base_descriptor_t *descriptor)
Initiate an asynchronous put.
Definition: btl.h:727
union mca_btl_base_segment_t::@93 seg_key
Memory segment key required by some RDMA networks.
mca_btl_base_segment_t * des_src
source segments
Definition: btl.h:277
int(* mca_btl_base_module_register_fn_t)(struct mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_module_recv_cb_fn_t cbfunc, void *cbdata)
Register a callback function that is called on receipt of a fragment.
Definition: btl.h:520
Definition: types.h:52
A bitmap implementation.
void(* mca_btl_base_module_error_cb_fn_t)(struct mca_btl_base_module_t *btl, int32_t flags, struct ompi_proc_t *errproc, char *btlinfo)
Callback function that is called asynchronously on receipt of an error from the transport layer...
Definition: btl.h:538
int(* mca_btl_base_module_finalize_fn_t)(struct mca_btl_base_module_t *btl)
MCA->BTL Clean up any resources held by BTL module before the module is unloaded. ...
Definition: btl.h:450
int(* mca_btl_base_component_progress_fn_t)(void)
MCA->BTL Called to progress outstanding requests for non-threaded polling environments.
Definition: btl.h:374
int(* mca_btl_base_module_register_error_fn_t)(struct mca_btl_base_module_t *btl, mca_btl_base_module_error_cb_fn_t cbfunc)
Register a callback function that is called on receipt of an error.
Definition: btl.h:557
size_t btl_rndv_eager_limit
the size of a data sent in a first fragment of rendezvous protocol
Definition: btl.h:791
struct mca_btl_base_descriptor_t *(* mca_btl_base_module_prepare_fn_t)(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, mca_mpool_base_registration_t *registration, struct opal_convertor_t *convertor, uint8_t order, size_t reserve, size_t *size, uint32_t flags)
Prepare a descriptor for send/put/get using the supplied convertor.
Definition: btl.h:631
Definition: mpool.h:44
Remote Open MPI process structure.
Definition: proc.h:56
int(* mca_btl_base_module_send_fn_t)(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, struct mca_btl_base_descriptor_t *descriptor, mca_btl_base_tag_t tag)
Initiate an asynchronous send.
Definition: btl.h:659
Top-level interface for all MCA components.
uint32_t btl_bandwidth
bandwidth (Mbytes/sec) supported by each endpoint
Definition: btl.h:798
int(* mca_btl_base_module_add_procs_fn_t)(struct mca_btl_base_module_t *btl, size_t nprocs, struct ompi_proc_t **procs, struct mca_btl_base_endpoint_t **endpoints, struct opal_bitmap_t *reachable)
BML->BTL notification of change in the process list.
Definition: btl.h:478
void * des_context
more opaque callback data
Definition: btl.h:283
mca_btl_base_component_t * btl_component
pointer back to the BTL component structure
Definition: btl.h:789
int(* mca_btl_base_module_ft_event_fn_t)(int state)
Fault Tolerance Event Notification Function.
Definition: btl.h:781
mca_btl_base_module_register_error_fn_t btl_register_error
register a default error handler
Definition: btl.h:820
int(* mca_btl_base_module_del_procs_fn_t)(struct mca_btl_base_module_t *btl, size_t nprocs, struct ompi_proc_t **procs, struct mca_btl_base_endpoint_t **peer)
Notification of change to the process list.
Definition: btl.h:499
uint32_t seg_len
Length in bytes.
Definition: btl.h:240
int(* mca_btl_base_module_get_fn_t)(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, struct mca_btl_base_descriptor_t *descriptor)
Initiate an asynchronous get.
Definition: btl.h:755
size_t des_src_cnt
number of source segments
Definition: btl.h:278
State of ELAN endpoint connection.
Definition: btl_elan_endpoint.h:33
Definition: ompi_free_list.h:62
BTL component descriptor.
Definition: btl.h:411
Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana University Research and Techno...
Definition: opal_convertor.h:90
size_t btl_min_rdma_pipeline_size
minimum packet size for pipeline protocol
Definition: btl.h:795
Meta data for MCA v2.0.0 components.
Definition: mca.h:309
int(* mca_btl_base_module_sendi_fn_t)(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, struct opal_convertor_t *convertor, void *header, size_t header_size, size_t payload_size, uint8_t order, uint32_t flags, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t **descriptor)
Initiate an immediate blocking send.
Definition: btl.h:694
uint8_t order
order value, this is only valid in the local completion callback and may be used in subsequent calls ...
Definition: btl.h:292
size_t btl_rdma_pipeline_frag_size
maximum rdma fragment size supported by the BTL
Definition: btl.h:794
mca_btl_base_segment_t * des_dst
destination segments
Definition: btl.h:279
size_t btl_max_send_size
maximum send fragment size supported by the BTL
Definition: btl.h:792
Compiler-specific prefetch functions.
mca_btl_base_completion_fn_t des_cbfunc
local callback function
Definition: btl.h:281
mca_mpool_base_module_t * btl_mpool
the mpool associated with this btl (optional)
Definition: btl.h:818
Checkpoint and Restart Service (CRS) Interface.
BTL module interface functions and attributes.
Definition: btl.h:786
uint32_t flags
the properties of this convertor
Definition: opal_convertor.h:93
mca_btl_base_module_ft_event_fn_t btl_ft_event
fault tolerant even notification
Definition: btl.h:822
size_t btl_eager_limit
maximum size of first fragment – eager send
Definition: btl.h:790
Describes a region/segment of memory that is addressable by an BTL.
Definition: btl.h:236
#define OBJ_CLASS_DECLARATION(NAME)
Declaration for class descriptor.
Definition: opal_object.h:236
Definition: mpool_fake.h:38
mpool module descriptor.
Definition: mpool.h:174