OpenMPI  0.1.1
mtl.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004-2006 The Regents of the University of California.
3  * All rights reserved.
4  * Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
5  * $COPYRIGHT$
6  *
7  * Additional copyrights may follow
8  *
9  * $HEADER$
10  */
11 
12 /**
13  * @file
14  *
15  * Matching Transport Layer
16  *
17  * The Matching Transport Layer (MTL) provides device-layer support
18  * for transfer of MPI point-to-point messages over devices that
19  * support hardware / library message matching. This layer is used
20  * with the MTL PML component to provide lowest latency and highest
21  * bandwidth on given architectures. Features found in other PML
22  * interfaces, such as message fragmenting, multi-device support, and
23  * NIC failover are not provided by the upper layers.
24  *
25  * In general, this interface should not be used for transport layer
26  * support. Instead, the BTL interface should be used. The BTL
27  * interface allows for multiplexing between multiple users
28  * (point-to-point, one-sided, etc.) and provides many features not
29  * found in this interface (RDMA from arbitrary buffers, active
30  * messaging, reasonable pinned memory caching, etc.)
31  */
32 
33 #ifndef OMPI_MTL_H
34 #define OMPI_MTL_H
35 
36 #include "ompi_config.h"
37 #include "mpi.h" /* needed for MPI_ANY_TAG */
38 #include "opal/mca/mca.h"
39 #include "ompi/mca/pml/pml.h" /* for send_mode enum */
40 #include "ompi/request/request.h"
41 
42 BEGIN_C_DECLS
43 
44 struct ompi_request_t;
45 struct opal_convertor_t;
46 
48 
50 
52  /** pointer to associated ompi_request_t */
54  void (*completion_callback)(struct mca_mtl_request_t* mtl_request);
55 };
57 
58 /**
59  * Initialization routine for MTL component
60  *
61  * Initialization routine for MTL component. This function should
62  * allocate resources for communication and try to do all local setup.
63  * It should not attempt to contract it's peers, as that should be
64  * done at add_procs time. Contact information should be published
65  * during this initialization function. It will be made available
66  * during add_procs().
67  *
68  * @param enable_progress_threads (IN) Progress threads have been
69  * enabled by the user and the component must be
70  * capable of making asycnhronous progress (either
71  * with its own thread, with the kernel, or with
72  * the event library.
73  * @param enable_mpi_threads (IN) MPI threads have been enabled by the
74  * user and the component must be capable of coping
75  * with threads. If the component can cope with
76  * MPI_THREAD_MULTIPLE, enable_mpi_thread_multiple
77  * should be set to true. Otherwise, it is assumed
78  * that only THREAD_FUNNELLED and THREAD_SERIALIZED
79  * can be used.
80  * @param enable_mpi_thread_multiple (OUT) Component does / does not
81  * support MPI_THREAD_MULTIPLE. This variable only
82  * needs to be set if enable_mpi_threads is true.
83  * Otherwise, the return value will be ignored.
84  *
85  * @retval NULL component can not operate on the current machine
86  * @retval non-NULL component interface function
87  */
88 typedef struct mca_mtl_base_module_t*
89 (*mca_mtl_base_component_init_fn_t)(bool enable_progress_threads,
90  bool enable_mpi_threads);
91 
92 
94  mca_base_component_t mtl_version;
97 };
100 
101 
102 /**
103  * MCA->MTL Clean up any resources held by MTL module
104  *
105  * Opposite of module_init. Called when communication will no longer
106  * be necessary. ussually this is during MPI_FINALIZE, but it can be
107  * earlier if the component was not selected to run. Assuming
108  * module_init was called, finalize will always be called before the
109  * component_close function is called.
110  *
111  * @param mtl (IN) MTL module returned from call to initialize
112  *
113  * @retval OMPI_SUCCESS cleanup finished successfully
114  * @retval other failure during cleanup
115  *
116  */
118 
119 
120 /**
121  * PML->MTL notification of change in the process list.
122  *
123  * The mca_mtl_base_module_add_procs_fn_t() is used by the PML to
124  * notify the MTL that new processes are connected to the current
125  * process. Any addressing information exported by the peer via the
126  * ompi_modex_send() function should be available during this
127  * call via the corresponding ompi_modex_recv() function. The
128  * MTL may utilize this information to determine reachability of each
129  * peer process.
130  *
131  * It is an error for a proc to not be reachable by the given MTL, and
132  * an error should be returned if that case is detected. The PML
133  * provides the MTL the option to return a pointer to a data structure
134  * defined by the MTL that is passed in with all communication
135  * functions. The array of procinfo pointers will be allocated by the
136  * PML, but it is up to the MTL module to create the memory for the
137  * procinfo structure itself. The procinfo structure is opaque to the
138  * PML and is only used internally by the MTL.
139  *
140  * @param mtl (IN) MTL module
141  * @param nprocs (IN) Number of processes
142  * @param procs (IN) Set of processes
143  * @param endpoint (OUT) Array of (optional) mca_mtl_base_procinfo_t
144  * structures, one per proc in procs
145  *
146  * @retval OMPI_SUCCESS successfully connected to processes
147  * @retval other failure during setup
148  */
150  struct mca_mtl_base_module_t* mtl,
151  size_t nprocs,
152  struct ompi_proc_t** procs,
153  struct mca_mtl_base_endpoint_t **mtl_peer_data);
154 
155 
156 /**
157  * Notification of change to the process list.
158  *
159  * When the process list changes, the PML notifies the MTL of the
160  * change, to provide the opportunity to cleanup or release any
161  * resources associated with the peer.
162  *
163  * @param mtl (IN) MTL module
164  * @param nprocs (IN) Number of processes
165  * @param proc (IN) Set of processes
166  * @param peer (IN) Set of peer addressing information.
167  *
168  * @return Status indicating if cleanup was successful
169  */
171  struct mca_mtl_base_module_t* mtl,
172  size_t nprocs,
173  struct ompi_proc_t** procs,
174  struct mca_mtl_base_endpoint_t **mtl_peer_data);
175 
176 
177 /**
178  * Blocking send to peer
179  *
180  * Blocking send (Call should not return until the user buffer may be
181  * used again). Standard MPI semantics must be met by this call, as
182  * mandated in the mode argument. There is one special mode argument,
183  * MCA_PML_BASE_SEND_COMPLETE, which requires local completion before
184  * the function can return. This is an optimization for coillective
185  * routines that can otherwise lead to degenerate performance for
186  * broadcast-based collectives.
187  *
188  * @param comm (IN) Communicator used for operation
189  * @param dest (IN) Destination rank for send (relative to comm)
190  * @param tag (IN) MPI tag used for sending. See note below.
191  * @param convertor (IN) Datatype convertor describing send datatype.
192  * Already prepared for send.
193  * @param mode (IN) Mode for send operation
194  *
195  * @return OMPI_SUCCESS or error value
196  *
197  * \note Open MPI is built around non-blocking operations. This
198  * function is provided for networks where progressing events outside
199  * of point-to-point (for example, collectives, I/O, one-sided) can
200  * occur without a progress function regularily being triggered.
201  *
202  * \note While MPI does not allow users to specify negative tags, they
203  * are used internally in Open MPI to provide a unique channel for
204  * collective operations. Therefore, the MTL can *not* cause an error
205  * if a negative tag is used.
206  */
208  struct mca_mtl_base_module_t* mtl,
209  struct ompi_communicator_t *comm,
210  int dest,
211  int tag,
212  struct opal_convertor_t *convertor,
213  mca_pml_base_send_mode_t mode);
214 
215 
216 /**
217  * Non-blocking send to peer
218  *
219  * Non-blocking send to peer. Standard MPI semantics must be met by
220  * this call, as mandated in the mode argument. There is one special
221  * mode argument, MCA_PML_BASE_SEND_COMPLETE, which requires local
222  * completion before the request is marked as complete.
223  *
224  * The PML will handle creation of the request, leaving the number of
225  * bytes requested in the module structure available for the MTL
226  * directly after the ompi_request_t structure. The PML will handle
227  * proper destruction of the request once it can safely be destructed
228  * (it has been completed and freeed by a call to REQUEST_FReE or
229  * TEST/WAIT). The MTL should remove all resources associated with
230  * the request when it is marked as completed.
231  *
232  * @param comm (IN) Communicator used for operation
233  * @param dest (IN) Destination rank for send (relative to comm)
234  * @param tag (IN) MPI tag used for sending. See note below.
235  * @param convertor (IN) Datatype convertor describing send datatype.
236  * Already prepared for send.
237  * @param mode (IN) Mode for send operation (see pml.h)
238  * @param blocking (IN) True if the call originated from a blocking
239  * call, but the PML decided to use a
240  * non-blocking operation, likely for
241  * internal performance decisions This is an
242  * optimization flag and is not needed for
243  * correctness.
244  * @param mtl_request (IN) Pointer to mtl_request. The ompi_req field
245  * will be populated with an initialized
246  * ompi_request_t before calling.
247  *
248  * @return OMPI_SUCCESS or error value
249  *
250  * \note While MPI does not allow users to specify negative tags, they
251  * are used internally in Open MPI to provide a unique channel for
252  * collective operations. Therefore, the MTL can *not* cause an error
253  * if a negative tag is used.
254  */
256  struct mca_mtl_base_module_t* mtl,
257  struct ompi_communicator_t *comm,
258  int dest,
259  int tag,
260  struct opal_convertor_t *convertor,
261  mca_pml_base_send_mode_t mode,
262  bool blocking,
263  mca_mtl_request_t *mtl_request);
264 
265 
266 /**
267  * Non-blocking receive
268  *
269  * Non-blocking receive function. Standard MPI semantics for
270  * MPI_Irecv must be implemented by this call.
271  *
272  * The PML will handle creation of the request, leaving the number of
273  * bytes requested in teh module structure available for the MTL,
274  * directly after the ompi_request_t structure. The PML will handle
275  * proper destruction of the request once it can safely be destroyed
276  * (it has been completed and free'ed by a call to REQUEST_FREE or
277  * TEST/WAIT). The MTL should remove all resources associated with
278  * the request when it is marked as completed.
279  *
280  * @param comm (IN) Communicator used for operation
281  * @param src (IN) Source rank for send (relative to comm)
282  * @param tag (IN) MPI tag used for sending. See note below.
283  * @param convertor (IN) Datatype convertor describing receive datatype.
284  * Already prepared for receive.
285  * @param mtl_request (IN) Pointer to mtl_request. The ompi_req field
286  * will be populated with an initialized
287  * ompi_request_t before calling.
288  *
289  * @return OMPI_SUCCESS or error value
290  *
291  * \note While MPI does not allow users to specify negative tags, they
292  * are used internally in Open MPI to provide a unique channel for
293  * collective operations. Therefore, the MTL can *not* cause an error
294  * if a negative tag is used. Further, MPI_ANY_TAG should *not* match
295  * against negative tags.
296  */
298  struct mca_mtl_base_module_t* mtl,
299  struct ompi_communicator_t *comm,
300  int src,
301  int tag,
302  struct opal_convertor_t *convertor,
303  struct mca_mtl_request_t *mtl_request);
304 
305 
306 /**
307  * Non-blocking probe
308  *
309  * Non-blocking probe function. Standard MPI semantics for MPI_IPROBE
310  * must be implemented by this call.
311  *
312  * @param comm (IN) Communicator used for operation
313  * @param src (IN) Source rank for send (relative to comm)
314  * @param tag (IN) MPI tag used for sending. See note below.
315  * @param flag (OUT) true if message available, false otherwise
316  * @param status (OUT) Status structure for information on
317  * available message
318  *
319  * \note While MPI does not allow users to specify negative tags, they
320  * are used internally in Open MPI to provide a unique channel for
321  * collective operations. Therefore, the MTL can *not* cause an error
322  * if a negative tag is used. Further, MPI_ANY_TAG should *not* match
323  * against negative tags.
324  */
326  struct mca_mtl_base_module_t* mtl,
327  struct ompi_communicator_t *comm,
328  int src,
329  int tag,
330  int *flag,
331  struct ompi_status_public_t *status);
332 
333 
334 typedef int (*mca_mtl_base_module_imrecv_fn_t)(struct mca_mtl_base_module_t* mtl,
335  struct opal_convertor_t *convertor,
336  struct ompi_message_t **message,
337  struct mca_mtl_request_t *mtl_request);
338 
339 typedef int (*mca_mtl_base_module_improbe_fn_t)(struct mca_mtl_base_module_t *mtl,
340  struct ompi_communicator_t *comm,
341  int src,
342  int tag,
343  int *matched,
344  struct ompi_message_t **message,
345  struct ompi_status_public_t *status);
346 
347 /**
348  * Cancel an existing request
349  *
350  * Attempt to cancel an existing request. The (poorly defined)
351  * semantics for MPI_CANCEL must be implemented by this call. This,
352  * of course, allows the MTL module to do nothing at all.
353  * Implementations of the MTL should make a good faith effort to
354  * cancel receive requests that have not been started, as the "post a
355  * receive for control messages" paradigm is a common one in loosely
356  * coupled MPI applications.
357  *
358  * @param request(IN) Request that should be cancelled
359  * @param flag Unknown exactly what this does.
360  *
361  */
363  struct mca_mtl_base_module_t* mtl,
364  mca_mtl_request_t *mtl_request,
365  int flag);
366 
367 
368 /**
369  * Downcall from PML layer when a new communicator is created.
370  *
371  * @param comm Communicator
372  * @return OMPI_SUCCESS or failure status.
373  *
374  * Provides the MTL the opportunity to initialize/cache a data structure
375  * on the communicator.
376  */
378  struct mca_mtl_base_module_t* mtl,
379  struct ompi_communicator_t* comm);
380 
381 
382 /**
383  * Downcall from PML layer when a communicator is destroyed.
384  *
385  * @param comm Communicator
386  * @return OMPI_SUCCESS or failure status.
387  *
388  * Provides the MTL the opportunity to cleanup any datastructures
389  * associated with the communicator.
390  */
392  struct mca_mtl_base_module_t* mtl,
393  struct ompi_communicator_t* comm);
394 
395 
396 /**
397  * MTL module interface functions and attributes.
398  */
400  int mtl_max_contextid; /**< maximum allowable contextid */
401  int mtl_max_tag; /**< maximum tag value. note that negative tags must be allowed */
402  size_t mtl_request_size; /**< number of bytes to reserve with request structure */
403 
404  uint32_t mtl_flags; /**< flags (put/get...) */
405 
406  /* MTL function table */
410 
415  mca_mtl_base_module_imrecv_fn_t mtl_imrecv;
416  mca_mtl_base_module_improbe_fn_t mtl_improbe;
417 
418  /* Optional MTL functions */
422 };
424 
425 /*
426  * Macro for use in modules that are of type mtl
427  */
428 #define MCA_MTL_BASE_VERSION_2_0_0 \
429  MCA_BASE_VERSION_2_0_0, \
430  "mtl", 2, 0, 0
431 
432 /*
433  * macro for doing direct call / call through struct
434  */
435 #if MCA_ompi_mtl_DIRECT_CALL
436 
437 #include MCA_ompi_mtl_DIRECT_CALL_HEADER
438 
439 #define OMPI_MTL_CALL_STAMP(a, b) ompi_mtl_ ## a ## _ ## b
440 #define OMPI_MTL_CALL_EXPANDER(a, b) OMPI_MTL_CALL_STAMP(a,b)
441 #define OMPI_MTL_CALL(a) OMPI_MTL_CALL_EXPANDER(MCA_ompi_mtl_DIRECT_CALL_COMPONENT, a)
442 
443 #else
444 #define OMPI_MTL_CALL(a) ompi_mtl->mtl_ ## a
445 #endif
446 
447 OMPI_DECLSPEC extern mca_mtl_base_module_t *ompi_mtl;
448 
449 END_C_DECLS
450 #endif
Common type for all MCA components.
Definition: mca.h:250
int mtl_max_contextid
maximum allowable contextid
Definition: mtl.h:400
P2P Management Layer (PML)
int(* mca_mtl_base_module_del_procs_fn_t)(struct mca_mtl_base_module_t *mtl, size_t nprocs, struct ompi_proc_t **procs, struct mca_mtl_base_endpoint_t **mtl_peer_data)
Notification of change to the process list.
Definition: mtl.h:170
size_t mtl_request_size
number of bytes to reserve with request structure
Definition: mtl.h:402
int(* mca_mtl_base_module_isend_fn_t)(struct mca_mtl_base_module_t *mtl, struct ompi_communicator_t *comm, int dest, int tag, struct opal_convertor_t *convertor, mca_pml_base_send_mode_t mode, bool blocking, mca_mtl_request_t *mtl_request)
Non-blocking send to peer.
Definition: mtl.h:255
Definition: mtl.h:93
int mtl_max_tag
maximum tag value.
Definition: mtl.h:401
Definition: mtl.h:51
Remote Open MPI process structure.
Definition: proc.h:56
int(* mca_mtl_base_module_irecv_fn_t)(struct mca_mtl_base_module_t *mtl, struct ompi_communicator_t *comm, int src, int tag, struct opal_convertor_t *convertor, struct mca_mtl_request_t *mtl_request)
Non-blocking receive.
Definition: mtl.h:297
Top-level interface for all MCA components.
uint32_t mtl_flags
flags (put/get...)
Definition: mtl.h:404
Top-level description of requests.
int(* mca_mtl_base_module_finalize_fn_t)(struct mca_mtl_base_module_t *mtl)
MCA->MTL Clean up any resources held by MTL module.
Definition: mtl.h:117
int(* mca_mtl_base_module_iprobe_fn_t)(struct mca_mtl_base_module_t *mtl, struct ompi_communicator_t *comm, int src, int tag, int *flag, struct ompi_status_public_t *status)
Non-blocking probe.
Definition: mtl.h:325
An abstraction that represents a connection to a endpoint process.
Definition: mtl_mx_endpoint.h:50
Definition: opal_convertor.h:90
Meta data for MCA v2.0.0 components.
Definition: mca.h:309
int(* mca_mtl_base_module_send_fn_t)(struct mca_mtl_base_module_t *mtl, struct ompi_communicator_t *comm, int dest, int tag, struct opal_convertor_t *convertor, mca_pml_base_send_mode_t mode)
Blocking send to peer.
Definition: mtl.h:207
struct mca_mtl_base_module_t *(* mca_mtl_base_component_init_fn_t)(bool enable_progress_threads, bool enable_mpi_threads)
Initialization routine for MTL component.
Definition: mtl.h:89
int(* mca_mtl_base_module_cancel_fn_t)(struct mca_mtl_base_module_t *mtl, mca_mtl_request_t *mtl_request, int flag)
Cancel an existing request.
Definition: mtl.h:362
Definition: mpi.h:337
struct ompi_request_t * ompi_req
pointer to associated ompi_request_t
Definition: mtl.h:53
MTL module interface functions and attributes.
Definition: mtl.h:399
int(* mca_mtl_base_module_del_comm_fn_t)(struct mca_mtl_base_module_t *mtl, struct ompi_communicator_t *comm)
Downcall from PML layer when a communicator is destroyed.
Definition: mtl.h:391
Definition: communicator.h:118
Main top-level request struct definition.
Definition: request.h:100
int(* mca_mtl_base_module_add_procs_fn_t)(struct mca_mtl_base_module_t *mtl, size_t nprocs, struct ompi_proc_t **procs, struct mca_mtl_base_endpoint_t **mtl_peer_data)
PML->MTL notification of change in the process list.
Definition: mtl.h:149
int(* mca_mtl_base_module_add_comm_fn_t)(struct mca_mtl_base_module_t *mtl, struct ompi_communicator_t *comm)
Downcall from PML layer when a new communicator is created.
Definition: mtl.h:377
Definition: message.h:22