OpenMPI  0.1.1
pml.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
3  * University Research and Technology
4  * Corporation. All rights reserved.
5  * Copyright (c) 2004-2005 The University of Tennessee and The University
6  * of Tennessee Research Foundation. All rights
7  * reserved.
8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9  * University of Stuttgart. All rights reserved.
10  * Copyright (c) 2004-2006 The Regents of the University of California.
11  * All rights reserved.
12  * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
13  * reserved.
14  * Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
15  * $COPYRIGHT$
16  *
17  * Additional copyrights may follow
18  *
19  * $HEADER$
20  */
21 /**
22  * @file
23  *
24  * P2P Management Layer (PML)
25  *
26  * An MCA component type that provides the P2P interface functionality
27  * required by the MPI layer. The PML is a relatively thin layer that
28  * primarily provides for the fragmentation and scheduling of messages
29  * over multiple transports (instances of the Byte Transfer Layer
30  * (BTL) MCA component type) as depicted below:
31  *
32  * ------------------------------------
33  * | MPI |
34  * ------------------------------------
35  * | PML |
36  * ------------------------------------
37  * | BTL (TCP) | BTL (SM) | BTL (...) |
38  * ------------------------------------
39  *
40  * A single PML component is selected by the MCA framework during
41  * library initialization. Initially, all available PMLs are loaded
42  * (potentially as shared libraries) and their component open and init
43  * functions called. The MCA framework selects the component
44  * returning the highest priority and closes/unloads any other PML
45  * components that may have been opened.
46  *
47  * After all of the MCA components are initialized, the MPI/RTE will
48  * make downcalls into the PML to provide the initial list of
49  * processes (ompi_proc_t instances), and notification of changes
50  * (add/delete).
51  *
52  * The PML module must select the set of BTL components that are to be
53  * used to reach a given destination. These should be cached on a PML
54  * specific data structure that is hung off the ompi_proc_t.
55  *
56  * The PML should then apply a scheduling algorithm (round-robin,
57  * weighted distribution, etc), to schedule the delivery of messages
58  * over the available BTLs.
59  *
60  */
61 
62 #ifndef MCA_PML_H
63 #define MCA_PML_H
64 
65 #include "ompi_config.h"
66 #include "opal/mca/mca.h"
67 #include "mpi.h" /* needed for MPI_ANY_TAG */
68 
69 BEGIN_C_DECLS
70 
71 /*
72  * PML component types
73  */
74 
75 typedef uint64_t mca_pml_sequence_t;
76 
77 /**
78  * Base PML endpoint structure
79  *
80  * Base PML structure for caching endpoint information on a proc. A
81  * pointer to an mca_pml_endpoint_t is maintained on each ompi_proc_t,
82  * in the proc_pml field, to provide per-process cache information.
83  * The data is opaque to the active PML -- no other subsystem will
84  * attempt to access the information in the cache.
85  *
86  * The PML is responsible for allocation and deallocation of the
87  * endpoint data during pml_add_procs and pml_del_procs.
88  */
89 struct mca_pml_endpoint_t;
90 struct ompi_proc_t;
91 
92 typedef enum {
93  MCA_PML_BASE_SEND_SYNCHRONOUS,
94  MCA_PML_BASE_SEND_COMPLETE,
95  MCA_PML_BASE_SEND_BUFFERED,
96  MCA_PML_BASE_SEND_READY,
97  MCA_PML_BASE_SEND_STANDARD,
98  MCA_PML_BASE_SEND_SIZE
99 } mca_pml_base_send_mode_t;
100 
101 
102 #define OMPI_ANY_TAG MPI_ANY_TAG
103 #define OMPI_ANY_SOURCE MPI_ANY_SOURCE
104 #define OMPI_PROC_NULL MPI_PROC_NULL
105 
106 /**
107  * MCA->PML Called by MCA framework to initialize the component.
108  *
109  * @param priority (OUT) Relative priority or ranking used by MCA to
110  * selected a component.
111  *
112  * @param enable_progress_threads (IN) Whether this component is
113  * allowed to run a hidden/progress thread or not.
114  *
115  * @param enable_mpi_threads (IN) Whether support for multiple MPI
116  * threads is enabled or not (i.e., MPI_THREAD_MULTIPLE), which
117  * indicates whether multiple threads may invoke this component
118  * simultaneously or not.
119  */
120 typedef struct mca_pml_base_module_1_0_0_t * (*mca_pml_base_component_init_fn_t)(
121  int *priority,
122  bool enable_progress_threads,
123  bool enable_mpi_threads);
124 
125 typedef int (*mca_pml_base_component_finalize_fn_t)(void);
126 
127 /**
128  * PML component version and interface functions.
129  */
130 
132  mca_base_component_t pmlm_version;
133  mca_base_component_data_t pmlm_data;
135  mca_pml_base_component_finalize_fn_t pmlm_finalize;
136 };
139 
140 
141 /**
142  * MCA management functions.
143  */
144 
145 
146 /**
147  * Downcall from MPI/RTE layer when new processes are created.
148  *
149  * @param procs Array of new processes
150  * @param nprocs Size of process array
151  * @return OMPI_SUCCESS or failure status.
152  *
153  * Provides a notification to the PML that new processes have been
154  * created, and provides the PML the opportunity to cache data
155  * (e.g. list of BTLs to use) on the ompi_proc_t data structure.
156  */
157 typedef int (*mca_pml_base_module_add_procs_fn_t)(struct ompi_proc_t **procs, size_t nprocs);
158 
159 
160 /**
161  * Downcall from MPI/RTE layer when processes are terminated.
162  *
163  * @param procs Array of processes
164  * @param nprocs Size of process array
165  * @return OMPI_SUCCESS or failure status.
166  *
167  * Provides a notification to the PML that processes have
168  * gone away, and provides the PML the opportunity to cleanup
169  * any data cached on the ompi_proc_t data structure.
170  */
171 typedef int (*mca_pml_base_module_del_procs_fn_t)(struct ompi_proc_t **procs, size_t nprocs);
172 
173 /**
174  * Downcall from MCA layer to enable the PML/BTLs.
175  *
176  * @param enable Enable/Disable PML forwarding
177  * @return OMPI_SUCCESS or failure status.
178 */
180  bool enable
181 );
182 
183 
184 /**
185  * For non-threaded case, provides MCA the opportunity to
186  * progress outstanding requests on all btls.
187  *
188  * * @return Count of "completions", a metric of
189  * how many items where completed in the call
190  * to progress.
191 */
193 
194 /**
195  * MPI Interface Functions
196  */
197 
198 
199 /**
200  * Downcall from MPI layer when a new communicator is created.
201  *
202  * @param comm Communicator
203  * @return OMPI_SUCCESS or failure status.
204  *
205  * Provides the PML the opportunity to initialize/cache a data structure
206  * on the communicator.
207  */
209 
210 
211 /**
212  * Downcall from MPI layer when a communicator is destroyed.
213  *
214  * @param comm Communicator
215  * @return OMPI_SUCCESS or failure status.
216  *
217  * Provides the PML the opportunity to cleanup any datastructures
218  * associated with the communicator.
219  */
221 
222 /**
223  * Initialize a persistent receive request.
224  *
225  * @param buf (IN) User buffer.
226  * @param count (IN) Number of elements of the specified datatype.
227  * @param datatype (IN) User defined datatype.
228  * @param src (IN) Source rank w/in communicator.
229  * @param tag (IN) User defined tag.
230  * @param comm (IN) Communicator.
231  * @param request (OUT) Request handle.
232  * @return OMPI_SUCCESS or failure status.
233  */
235  void *buf,
236  size_t count,
237  struct ompi_datatype_t *datatype,
238  int src,
239  int tag,
240  struct ompi_communicator_t* comm,
241  struct ompi_request_t **request
242 );
243 
244 /**
245  * Post a receive request.
246  *
247  * @param buf (IN) User buffer.
248  * @param count (IN) Number of elements of the specified datatype.
249  * @param datatype (IN) User defined datatype.
250  * @param src (IN) Source rank w/in communicator.
251  * @param tag (IN) User defined tag.
252  * @param comm (IN) Communicator.
253  * @param request (OUT) Request handle.
254  * @return OMPI_SUCCESS or failure status.
255  */
257  void *buf,
258  size_t count,
259  struct ompi_datatype_t *datatype,
260  int src,
261  int tag,
262  struct ompi_communicator_t* comm,
263  struct ompi_request_t **request
264 );
265 typedef int (*mca_pml_base_module_imrecv_fn_t)(
266  void *buf,
267  size_t count,
268  struct ompi_datatype_t *datatype,
269  struct ompi_message_t **message,
270  struct ompi_request_t **request
271 );
272 
273 /**
274  * Post a receive and wait for completion.
275  *
276  * @param buf (IN) User buffer
277  * @param count (IN) Number of elements of the specified datatype
278  * @param datatype (IN) User defined datatype
279  * @param src (IN) Source rank w/in communicator
280  * @param tag (IN) User defined tag
281  * @param comm (IN) Communicator
282  * @param status (OUT) Completion status
283  * @return OMPI_SUCCESS or failure status.
284  */
286  void *buf,
287  size_t count,
288  struct ompi_datatype_t *datatype,
289  int src,
290  int tag,
291  struct ompi_communicator_t* comm,
292  ompi_status_public_t* status
293 );
294 typedef int (*mca_pml_base_module_mrecv_fn_t)(
295  void *buf,
296  size_t count,
297  struct ompi_datatype_t *datatype,
298  struct ompi_message_t **message,
299  ompi_status_public_t* status
300 );
301 
302 /**
303  * Initialize a persistent send request.
304  *
305  * @param buf (IN) User buffer.
306  * @param count (IN) Number of elements of the specified datatype.
307  * @param datatype (IN) User defined datatype.
308  * @param dst (IN) Peer rank w/in communicator.
309  * @param tag (IN) User defined tag.
310  * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
311  * @param comm (IN) Communicator.
312  * @param request (OUT) Request handle.
313  * @return OMPI_SUCCESS or failure status.
314  */
316  void *buf,
317  size_t count,
318  struct ompi_datatype_t *datatype,
319  int dst,
320  int tag,
321  mca_pml_base_send_mode_t mode,
322  struct ompi_communicator_t* comm,
323  struct ompi_request_t **request
324 );
325 
326 
327 /**
328  * Post a send request.
329  *
330  * @param buf (IN) User buffer.
331  * @param count (IN) Number of elements of the specified datatype.
332  * @param datatype (IN) User defined datatype.
333  * @param dst (IN) Peer rank w/in communicator.
334  * @param tag (IN) User defined tag.
335  * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
336  * @param comm (IN) Communicator.
337  * @param request (OUT) Request handle.
338  * @return OMPI_SUCCESS or failure status.
339  */
341  void *buf,
342  size_t count,
343  struct ompi_datatype_t *datatype,
344  int dst,
345  int tag,
346  mca_pml_base_send_mode_t mode,
347  struct ompi_communicator_t* comm,
348  struct ompi_request_t **request
349 );
350 
351 
352 /**
353  * Post a send request and wait for completion.
354  *
355  * @param buf (IN) User buffer.
356  * @param count (IN) Number of elements of the specified datatype.
357  * @param datatype (IN) User defined datatype.
358  * @param dst (IN) Peer rank w/in communicator.
359  * @param tag (IN) User defined tag.
360  * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
361  * @param comm (IN) Communicator.
362  * @return OMPI_SUCCESS or failure status.
363  */
365  void *buf,
366  size_t count,
367  struct ompi_datatype_t *datatype,
368  int dst,
369  int tag,
370  mca_pml_base_send_mode_t mode,
371  struct ompi_communicator_t* comm
372 );
373 
374 /**
375  * Initiate one or more persistent requests.
376  *
377  * @param count Number of requests
378  * @param request Array of persistent requests
379  * @return OMPI_SUCCESS or failure status.
380  */
382  size_t count,
383  struct ompi_request_t** requests
384 );
385 
386 /**
387  * Probe to poll for pending recv.
388  *
389  * @param src (IN) Source rank w/in communicator.
390  * @param tag (IN) User defined tag.
391  * @param comm (IN) Communicator.
392  * @param matched (OUT) Flag indicating if matching recv exists.
393  * @param status (OUT) Completion statuses.
394  * @return OMPI_SUCCESS or failure status.
395  *
396  */
398  int src,
399  int tag,
400  struct ompi_communicator_t* comm,
401  int *matched,
402  ompi_status_public_t *status
403 );
404 
405 typedef int (*mca_pml_base_module_improbe_fn_t)(
406  int src,
407  int tag,
408  struct ompi_communicator_t* comm,
409  int *matched,
410  struct ompi_message_t **message,
411  ompi_status_public_t *status
412 );
413 
414 /**
415  * Blocking probe to wait for pending recv.
416  *
417  * @param src (IN) Source rank w/in communicator.
418  * @param tag (IN) User defined tag.
419  * @param comm (IN) Communicator.
420  * @param status (OUT) Completion statuses.
421  * @return OMPI_SUCCESS or failure status.
422  *
423  */
425  int src,
426  int tag,
427  struct ompi_communicator_t* comm,
428  ompi_status_public_t *status
429 );
430 
431 typedef int (*mca_pml_base_module_mprobe_fn_t)(
432  int src,
433  int tag,
434  struct ompi_communicator_t* comm,
435  struct ompi_message_t **message,
436  ompi_status_public_t *status
437 );
438 
439 /**
440  * Cancel pending operation.
441  *
442  * @param request (IN) Request
443  * @return OMPI_SUCCESS or failure status.
444  *
445  */
447  struct ompi_request_t* request
448 );
449 
450 
451 /**
452  * Has a request been cancelled?
453  *
454  * @param request (IN) Request
455  * @return OMPI_SUCCESS or failure status.
456  *
457  */
459  struct ompi_request_t* request,
460  int *flag
461 );
462 
463 /**
464  * Release resources held by a persistent mode request.
465  *
466  * @param request (IN) Request
467  * @return OMPI_SUCCESS or failure status.
468  *
469  */
471  struct ompi_request_t** request
472 );
473 
474 
475 /**
476  * A special NULL request handle.
477  *
478  * @param request (OUT) Request
479  * @return OMPI_SUCCESS or failure status.
480  *
481  */
483  struct ompi_request_t** request
484 );
485 
486 /**
487  * Diagnostics function.
488  *
489  * @param request (IN) Communicator
490  * @param verbose (IN) Verbosity level (passed to BTL)
491  * @return OMPI_SUCCESS or failure status.
492  *
493  */
495  struct ompi_communicator_t* comm,
496  int verbose
497 );
498 
499 /**
500  * Fault Tolerance Awareness function
501  * @param status Checkpoint status
502  * @return OMPI_SUCCESS or failure status
503  */
504 typedef int (*mca_pml_base_module_ft_event_fn_t) (int status);
505 
506 
507 
508 /**
509  * PML instance.
510  */
511 
513 
514  /* downcalls from MCA to PML */
519 
520  /* downcalls from MPI to PML */
532  mca_pml_base_module_improbe_fn_t pml_improbe;
533  mca_pml_base_module_mprobe_fn_t pml_mprobe;
534  mca_pml_base_module_imrecv_fn_t pml_imrecv;
535  mca_pml_base_module_mrecv_fn_t pml_mrecv;
536 
537  /* diagnostics */
539 
540  /* FT Event */
542 
543  /* maximum constant sizes */
544  uint32_t pml_max_contextid;
545  int pml_max_tag;
546 };
549 
550 /*
551  * Macro for use in components that are of type pml
552  */
553 #define MCA_PML_BASE_VERSION_2_0_0 \
554  MCA_BASE_VERSION_2_0_0, \
555  "pml", 2, 0, 0
556 
557  /*
558  * macro for doing direct call / call through struct
559  */
560 #if MCA_ompi_pml_DIRECT_CALL
561 
562 #include MCA_ompi_pml_DIRECT_CALL_HEADER
563 
564 #define MCA_PML_CALL_STAMP(a, b) mca_pml_ ## a ## _ ## b
565 #define MCA_PML_CALL_EXPANDER(a, b) MCA_PML_CALL_STAMP(a,b)
566 #define MCA_PML_CALL(a) MCA_PML_CALL_EXPANDER(MCA_ompi_pml_DIRECT_CALL_COMPONENT, a)
567 
568 #else
569 #define MCA_PML_CALL(a) mca_pml.pml_ ## a
570 #endif
571 
572 OMPI_DECLSPEC extern mca_pml_base_module_t mca_pml;
573 
574 
575 END_C_DECLS
576 #endif /* MCA_PML_H */
int(* mca_pml_base_module_add_procs_fn_t)(struct ompi_proc_t **procs, size_t nprocs)
MCA management functions.
Definition: pml.h:157
Common type for all MCA components.
Definition: mca.h:250
size_t count
same value as status._ucount
Definition: message.h:28
int(* mca_pml_base_module_irecv_init_fn_t)(void *buf, size_t count, struct ompi_datatype_t *datatype, int src, int tag, struct ompi_communicator_t *comm, struct ompi_request_t **request)
Initialize a persistent receive request.
Definition: pml.h:234
int(* mca_pml_base_module_del_procs_fn_t)(struct ompi_proc_t **procs, size_t nprocs)
Downcall from MPI/RTE layer when processes are terminated.
Definition: pml.h:171
Definition: ompi_datatype.h:68
int(* mca_pml_base_module_irecv_fn_t)(void *buf, size_t count, struct ompi_datatype_t *datatype, int src, int tag, struct ompi_communicator_t *comm, struct ompi_request_t **request)
Post a receive request.
Definition: pml.h:256
int(* mca_pml_base_module_isend_fn_t)(void *buf, size_t count, struct ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t *comm, struct ompi_request_t **request)
Post a send request.
Definition: pml.h:340
int(* mca_pml_base_module_add_comm_fn_t)(struct ompi_communicator_t *comm)
MPI Interface Functions.
Definition: pml.h:208
int(* mca_pml_base_module_progress_fn_t)(void)
For non-threaded case, provides MCA the opportunity to progress outstanding requests on all btls...
Definition: pml.h:192
int(* mca_pml_base_module_cancel_fn_t)(struct ompi_request_t *request)
Cancel pending operation.
Definition: pml.h:446
int(* mca_pml_base_module_send_fn_t)(void *buf, size_t count, struct ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t *comm)
Post a send request and wait for completion.
Definition: pml.h:364
PML component version and interface functions.
Definition: pml.h:131
int(* mca_pml_base_module_iprobe_fn_t)(int src, int tag, struct ompi_communicator_t *comm, int *matched, ompi_status_public_t *status)
Probe to poll for pending recv.
Definition: pml.h:397
Remote Open MPI process structure.
Definition: proc.h:56
int(* mca_pml_base_module_start_fn_t)(size_t count, struct ompi_request_t **requests)
Initiate one or more persistent requests.
Definition: pml.h:381
struct mca_pml_base_module_1_0_0_t *(* mca_pml_base_component_init_fn_t)(int *priority, bool enable_progress_threads, bool enable_mpi_threads)
MCA->PML Called by MCA framework to initialize the component.
Definition: pml.h:120
Top-level interface for all MCA components.
int(* mca_pml_base_module_del_comm_fn_t)(struct ompi_communicator_t *comm)
Downcall from MPI layer when a communicator is destroyed.
Definition: pml.h:220
PML instance.
Definition: pml.h:512
int(* mca_pml_base_module_ft_event_fn_t)(int status)
Fault Tolerance Awareness function.
Definition: pml.h:504
int(* mca_pml_base_module_isend_init_fn_t)(void *buf, size_t count, struct ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t *comm, struct ompi_request_t **request)
Initialize a persistent send request.
Definition: pml.h:315
Meta data for MCA v2.0.0 components.
Definition: mca.h:309
int(* mca_pml_base_module_enable_fn_t)(bool enable)
Downcall from MCA layer to enable the PML/BTLs.
Definition: pml.h:179
int(* mca_pml_base_module_dump_fn_t)(struct ompi_communicator_t *comm, int verbose)
Diagnostics function.
Definition: pml.h:494
Definition: mpi.h:337
int(* mca_pml_base_module_probe_fn_t)(int src, int tag, struct ompi_communicator_t *comm, ompi_status_public_t *status)
Blocking probe to wait for pending recv.
Definition: pml.h:424
Definition: evdns.c:158
int(* mca_pml_base_module_recv_fn_t)(void *buf, size_t count, struct ompi_datatype_t *datatype, int src, int tag, struct ompi_communicator_t *comm, ompi_status_public_t *status)
Post a receive and wait for completion.
Definition: pml.h:285
Definition: communicator.h:118
Main top-level request struct definition.
Definition: request.h:100
int(* mca_pml_base_module_free_fn_t)(struct ompi_request_t **request)
Release resources held by a persistent mode request.
Definition: pml.h:470
int(* mca_pml_base_module_cancelled_fn_t)(struct ompi_request_t *request, int *flag)
Has a request been cancelled?
Definition: pml.h:458
int(* mca_pml_base_module_null_fn_t)(struct ompi_request_t **request)
A special NULL request handle.
Definition: pml.h:482
Definition: message.h:22