OpenMPI  0.1.1
coll.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
3  * University Research and Technology
4  * Corporation. All rights reserved.
5  * Copyright (c) 2004-2005 The University of Tennessee and The University
6  * of Tennessee Research Foundation. All rights
7  * reserved.
8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9  * University of Stuttgart. All rights reserved.
10  * Copyright (c) 2004-2005 The Regents of the University of California.
11  * All rights reserved.
12  * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
13  * Copyright (c) 2007-2008 UT-Battelle, LLC
14  * Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved.
15  *
16  * $COPYRIGHT$
17  *
18  * Additional copyrights may follow
19  *
20  * $HEADER$
21  */
22 
23 /**
24  * @file
25  *
26  * Collective Communication Interface
27  *
28  * Interface for implementing the collective communication interface
29  * of MPI. The MPI interface provides error checking and error
30  * handler invocation, but the collective components provide all other
31  * functionality.
32  *
33  * Component selection is done per commuicator, at Communicator
34  * construction time. mca_coll_base_comm_select() is used to
35  * create the list of components available to the compoenent
36  * collm_comm_query function, instantiating a module for each
37  * component that i usable, and sets the module collective function pointers.
38  * mca_coll_base_comm_select() then loops through the list of available
39  * components (via the instantiated module), and uses the
40  * module's coll_module_enable() function to enable the modules, and
41  * if successful, sets the communicator collective functions to the
42  * those supplied by the given module, keeping track of which module it
43  * is associated with.
44  *
45  * The module destructors are called for each module used by the
46  * communicator, at communicator desctruction time.
47  *
48  * This can result in up to N different components being used for a
49  * single communicator, one per needed collective function.
50  *
51  * The interface is the same for inter- or intra-communicators, and
52  * components should be able to handle either style of communicator
53  * during initialization (although handling may include indicating the
54  * component is not available).
55  */
56 
57 #ifndef OMPI_MCA_COLL_COLL_H
58 #define OMPI_MCA_COLL_COLL_H
59 
60 #include "ompi_config.h"
61 
62 #include "mpi.h"
63 #include "opal/mca/mca.h"
64 #include "opal/mca/base/base.h"
65 
66 #include "opal/mca/crs/crs.h"
67 #include "opal/mca/crs/base/base.h"
68 
69 #if OPAL_ENABLE_FT_MPI
70 #include "ompi/proc/proc.h"
71 #include "ompi/request/request.h"
72 #endif
73 
74 BEGIN_C_DECLS
75 
76 
77 /* ******************************************************************** */
78 
79 
80 struct ompi_communicator_t;
81 struct ompi_datatype_t;
82 struct ompi_op_t;
83 #if OPAL_ENABLE_FT_MPI
84 struct ompi_group_t;
85 #endif /* OPAL_ENABLE_FT_MPI */
86 
87 /* ******************************************************************** */
88 
89 
90 /**
91  * Collective component initialization
92  *
93  * Initialize the given collective component. This function should
94  * initialize any component-level. data. It will be called exactly
95  * once during MPI_INIT.
96  *
97  * @note The component framework is not lazily opened, so attempts
98  * should be made to minimze the amount of memory allocated during
99  * this function.
100  *
101  * @param[in] enable_progress_threads True if the component needs to
102  * support progress threads
103  * @param[in] enable_mpi_threads True if the component needs to
104  * support MPI_THREAD_MULTIPLE
105  *
106  * @retval OMPI_SUCCESS Component successfully initialized
107  * @retval OMPI_ERROR An unspecified error occurred
108  */
110  (bool enable_progress_threads, bool enable_mpi_threads);
111 
112 
113 
114 /**
115  * Query whether a component is available for the given communicator
116  *
117  * Query whether the component is available for the given
118  * communicator. If the component is available, an object should be
119  * allocated and returned (with refcount at 1). The module will not
120  * be used for collective operations until module_enable() is called
121  * on the module, but may be destroyed (via OBJ_RELEASE) either before
122  * or after module_enable() is called. If the module needs to release
123  * resources obtained during query(), it should do so in the module
124  * destructor.
125  *
126  * A component may provide NULL to this function to indicate it does
127  * not wish to run or return an error during module_enable().
128  *
129  * @note The communicator is available for point-to-point
130  * communication, but other functionality is not available during this
131  * phase of initialization.
132  *
133  * @param[in] comm The communicator being created
134  * @param[out] priority Priority setting for component on
135  * this communicator
136  *
137  * @returns An initialized module structure if the component can
138  * provide a module with the requested functionality or NULL if the
139  * component should not be used on the given communicator.
140  */
141 typedef struct mca_coll_base_module_2_0_0_t *
142  (*mca_coll_base_component_comm_query_2_0_0_fn_t)
143  (struct ompi_communicator_t *comm, int *priority);
144 
145 
146 /* ******************************************************************** */
147 
148 
149 /**
150  * Enable module for collective communication
151  *
152  * Enable the module for collective commuication. Modules are enabled
153  * in order from lowest to highest priority. At each component,
154  * collective functions with priority higher than the existing
155  * function are copied into the communicator's function table and the
156  * module's reference count is incremented. Replaced functions have
157  * their module's reference count decremented, so a component will go
158  * out of scope when it has been examined and is no longer used in any
159  * collective functions.
160  *
161  * Because the function list is built on increasing priority, a
162  * component that needs functions from a lower priority component
163  * (say, a multi-cast barrier that might need a point-to-point barrier
164  * for resource exhaustion issues) can keep the function pointer and
165  * module pointer and increase the reference count of the module and
166  * use the module during execution.
167  *
168  * When a module is not used for any interface functions and no
169  * higher-priority module has increased its refcount, it will have
170  * it's destructor triggered and the module will be destroyed.
171  *
172  * @note The collective component should not modify the communicator
173  * during this operation. The communicator will be updated with the
174  * collective algorithm's function pointers and module (and the ref
175  * count increased on the module) by the base selection functionality.
176  *
177  * @param[in/out] module Module created during comm_query()
178  * @param[in] comm Communicator being created
179  */
180 typedef int
182  struct ompi_communicator_t *comm);
183 
184 
185 typedef int (*mca_coll_base_module_allgather_fn_t)
186  (void *sbuf, int scount, struct ompi_datatype_t *sdtype,
187  void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
188  struct ompi_communicator_t *comm, struct mca_coll_base_module_2_0_0_t *module);
189 typedef int (*mca_coll_base_module_allgatherv_fn_t)
190  (void *sbuf, int scount, struct ompi_datatype_t *sdtype,
191  void * rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype,
192  struct ompi_communicator_t *comm, struct mca_coll_base_module_2_0_0_t *module);
193 typedef int (*mca_coll_base_module_allreduce_fn_t)
194  (void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,
195  struct ompi_op_t *op, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_0_0_t *module);
196 typedef int (*mca_coll_base_module_alltoall_fn_t)
197  (void *sbuf, int scount, struct ompi_datatype_t *sdtype,
198  void* rbuf, int rcount, struct ompi_datatype_t *rdtype,
199  struct ompi_communicator_t *comm, struct mca_coll_base_module_2_0_0_t *module);
200 typedef int (*mca_coll_base_module_alltoallv_fn_t)
201  (void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t *sdtype,
202  void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t *rdtype,
203  struct ompi_communicator_t *comm, struct mca_coll_base_module_2_0_0_t *module);
204 typedef int (*mca_coll_base_module_alltoallw_fn_t)
205  (void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t **sdtypes,
206  void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t **rdtypes,
207  struct ompi_communicator_t *comm, struct mca_coll_base_module_2_0_0_t *module);
208 typedef int (*mca_coll_base_module_barrier_fn_t)
209  (struct ompi_communicator_t *comm, struct mca_coll_base_module_2_0_0_t *module);
210 typedef int (*mca_coll_base_module_bcast_fn_t)
211  (void *buff, int count, struct ompi_datatype_t *datatype, int root,
212  struct ompi_communicator_t *comm, struct mca_coll_base_module_2_0_0_t *module);
213 typedef int (*mca_coll_base_module_exscan_fn_t)
214  (void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,
215  struct ompi_op_t *op, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_0_0_t *module);
216 typedef int (*mca_coll_base_module_gather_fn_t)
217  (void *sbuf, int scount, struct ompi_datatype_t *sdtype,
218  void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
219  int root, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_0_0_t *module);
220 typedef int (*mca_coll_base_module_gatherv_fn_t)
221  (void *sbuf, int scount, struct ompi_datatype_t *sdtype,
222  void *rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype,
223  int root, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_0_0_t *module);
224 typedef int (*mca_coll_base_module_reduce_fn_t)
225  (void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype,
226  struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_0_0_t *module);
227 typedef int (*mca_coll_base_module_reduce_scatter_fn_t)
228  (void *sbuf, void *rbuf, int *rcounts, struct ompi_datatype_t *dtype,
229  struct ompi_op_t *op, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_0_0_t *module);
230 typedef int (*mca_coll_base_module_scan_fn_t)
231  (void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,
232  struct ompi_op_t *op, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_0_0_t *module);
233 typedef int (*mca_coll_base_module_scatter_fn_t)
234  (void *sbuf, int scount, struct ompi_datatype_t *sdtype,
235  void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
236  int root, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_0_0_t *module);
237 typedef int (*mca_coll_base_module_scatterv_fn_t)
238  (void *sbuf, int *scounts, int *disps, struct ompi_datatype_t *sdtype,
239  void* rbuf, int rcount, struct ompi_datatype_t *rdtype,
240  int root, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_0_0_t *module);
241 
242 #if OPAL_ENABLE_FT_MPI
243 /* Fault Tolerant Agreement - Consensus Protocol */
244 typedef int (*mca_coll_base_module_agreement_fn_t)
245  (struct ompi_communicator_t* comm,
246  struct ompi_group_t **group,
247  int *flag,
248  struct mca_coll_base_module_2_0_0_t *module);
249 typedef int (*mca_coll_base_module_iagreement_fn_t)
250  (struct ompi_communicator_t* comm,
251  struct ompi_group_t **group,
252  int *flag,
253  struct mca_coll_base_module_2_0_0_t *module,
255 #endif /* OPAL_ENABLE_FT_MPI */
256 
257 
258 /**
259  * Fault Tolerance Awareness function.
260  *
261  * Fault tolerance function -- called when a process / job state
262  * change is noticed.
263  *
264  * @param[in] state State change that triggered the function
265  *
266  * @retval OMPI_SUCCESS Component successfully selected
267  * @retval OMPI_ERROR An unspecified error occurred
268  */
269 typedef int (*mca_coll_base_module_ft_event_fn_t) (int state);
270 
271 
272 /* ******************************************************************** */
273 
274 
275 /**
276  * Collective component interface
277  *
278  * Component interface for the collective framework. A public
279  * instance of this structure, called
280  * mca_coll_[component_name]_component, must exist in any collective
281  * component.
282  */
284  /** Base component description */
286  /** Base component data block */
288 
289  /** Component initialization function */
291  /** Query whether component is useable for given communicator */
293 };
295 
296 /** Per guidence in mca.h, use the unversioned struct name if you just
297  want to always keep up with the most recent version of the
298  interace. */
300 
301 
302 /**
303  * Collective module interface
304  *
305  * Module interface to the Collective framework. Modules are
306  * reference counted based on the number of functions from the module
307  * used on the commuicator. There is at most one module per component
308  * on a given communicator, and there can be many component modules on
309  * a given communicator.
310  *
311  * @note The collective framework and the
312  * communicator functionality only stores a pointer to the module
313  * function, so the component is free to create a structure that
314  * inherits from this one for use as the module structure.
315  */
317  /** Collective modules all inherit from opal_object */
319 
320  /** Enable function called when a collective module is (possibly)
321  going to be used for the given communicator */
323 
324  /* Collective function pointers */
325  mca_coll_base_module_allgather_fn_t coll_allgather;
326  mca_coll_base_module_allgatherv_fn_t coll_allgatherv;
327  mca_coll_base_module_allreduce_fn_t coll_allreduce;
328  mca_coll_base_module_alltoall_fn_t coll_alltoall;
329  mca_coll_base_module_alltoallv_fn_t coll_alltoallv;
330  mca_coll_base_module_alltoallw_fn_t coll_alltoallw;
331  mca_coll_base_module_barrier_fn_t coll_barrier;
332  mca_coll_base_module_bcast_fn_t coll_bcast;
333  mca_coll_base_module_exscan_fn_t coll_exscan;
334  mca_coll_base_module_gather_fn_t coll_gather;
335  mca_coll_base_module_gatherv_fn_t coll_gatherv;
336  mca_coll_base_module_reduce_fn_t coll_reduce;
337  mca_coll_base_module_reduce_scatter_fn_t coll_reduce_scatter;
338  mca_coll_base_module_scan_fn_t coll_scan;
339  mca_coll_base_module_scatter_fn_t coll_scatter;
340  mca_coll_base_module_scatterv_fn_t coll_scatterv;
341 
342 #if OPAL_ENABLE_FT_MPI
343  mca_coll_base_module_agreement_fn_t coll_agreement;
344  mca_coll_base_module_iagreement_fn_t coll_iagreement;
345 #endif
346 
347  /** Fault tolerance event trigger function */
349 };
351 
352 /** Per guidence in mca.h, use the unversioned struct name if you just
353  want to always keep up with the most recent version of the
354  interace. */
357 
358 /**
359  * Collectives communicator cache structure
360  *
361  * Collectives communicator cache structure, used to find functions to
362  * implement collective algorithms and their associated modules. This
363  * function may also be used internally by a module if it needs to
364  * keep a large number of "backing" functions, such as the demo
365  * component.
366  */
368  mca_coll_base_module_allgather_fn_t coll_allgather;
369  mca_coll_base_module_2_0_0_t *coll_allgather_module;
370  mca_coll_base_module_allgatherv_fn_t coll_allgatherv;
371  mca_coll_base_module_2_0_0_t *coll_allgatherv_module;
372  mca_coll_base_module_allreduce_fn_t coll_allreduce;
373  mca_coll_base_module_2_0_0_t *coll_allreduce_module;
374  mca_coll_base_module_alltoall_fn_t coll_alltoall;
375  mca_coll_base_module_2_0_0_t *coll_alltoall_module;
376  mca_coll_base_module_alltoallv_fn_t coll_alltoallv;
377  mca_coll_base_module_2_0_0_t *coll_alltoallv_module;
378  mca_coll_base_module_alltoallw_fn_t coll_alltoallw;
379  mca_coll_base_module_2_0_0_t *coll_alltoallw_module;
380  mca_coll_base_module_barrier_fn_t coll_barrier;
381  mca_coll_base_module_2_0_0_t *coll_barrier_module;
382  mca_coll_base_module_bcast_fn_t coll_bcast;
383  mca_coll_base_module_2_0_0_t *coll_bcast_module;
384  mca_coll_base_module_exscan_fn_t coll_exscan;
385  mca_coll_base_module_2_0_0_t *coll_exscan_module;
386  mca_coll_base_module_gather_fn_t coll_gather;
387  mca_coll_base_module_2_0_0_t *coll_gather_module;
388  mca_coll_base_module_gatherv_fn_t coll_gatherv;
389  mca_coll_base_module_2_0_0_t *coll_gatherv_module;
390  mca_coll_base_module_reduce_fn_t coll_reduce;
391  mca_coll_base_module_2_0_0_t *coll_reduce_module;
392  mca_coll_base_module_reduce_scatter_fn_t coll_reduce_scatter;
393  mca_coll_base_module_2_0_0_t *coll_reduce_scatter_module;
394  mca_coll_base_module_scan_fn_t coll_scan;
395  mca_coll_base_module_2_0_0_t *coll_scan_module;
396  mca_coll_base_module_scatter_fn_t coll_scatter;
397  mca_coll_base_module_2_0_0_t *coll_scatter_module;
398  mca_coll_base_module_scatterv_fn_t coll_scatterv;
399  mca_coll_base_module_2_0_0_t *coll_scatterv_module;
400 
401 #if OPAL_ENABLE_FT_MPI
402  mca_coll_base_module_agreement_fn_t coll_agreement;
403  mca_coll_base_module_2_0_0_t *coll_agreement_module;
404  mca_coll_base_module_iagreement_fn_t coll_iagreement;
405  mca_coll_base_module_2_0_0_t *coll_iagreement_module;
406 #endif /* OPAL_ENABLE_FT_MPI */
407 };
409 
410 
411 /* ******************************************************************** */
412 
413 
414 /*
415  * Macro for use in components that are of type coll
416  */
417 #define MCA_COLL_BASE_VERSION_2_0_0 \
418  MCA_BASE_VERSION_2_0_0, \
419  "coll", 2, 0, 0
420 
421 
422 /* ******************************************************************** */
423 
424 
425 END_C_DECLS
426 
427 #endif /* MCA_COLL_H */
Common type for all MCA components.
Definition: mca.h:250
mca_coll_base_module_ft_event_fn_t ft_event
Fault tolerance event trigger function.
Definition: coll.h:348
Collective module interface.
Definition: coll.h:316
Definition: ompi_datatype.h:68
Collective component interface.
Definition: coll.h:283
Process identification structure interface.
mca_coll_base_component_comm_query_2_0_0_fn_t collm_comm_query
Query whether component is useable for given communicator.
Definition: coll.h:292
Top-level interface for all MCA components.
mca_base_component_t collm_version
Base component description.
Definition: coll.h:285
mca_base_component_data_t collm_data
Base component data block.
Definition: coll.h:287
Top-level description of requests.
int(* mca_coll_base_component_init_query_fn_t)(bool enable_progress_threads, bool enable_mpi_threads)
Collective component initialization.
Definition: coll.h:110
int(* mca_coll_base_module_enable_1_1_0_fn_t)(struct mca_coll_base_module_2_0_0_t *module, struct ompi_communicator_t *comm)
Enable module for collective communication.
Definition: coll.h:181
Group structure Currently we have four formats for storing the process pointers that are members of t...
Definition: group.h:79
Base object.
Definition: opal_object.h:182
int(* mca_coll_base_module_ft_event_fn_t)(int state)
Fault Tolerance Awareness function.
Definition: coll.h:269
Meta data for MCA v2.0.0 components.
Definition: mca.h:309
mca_coll_base_component_init_query_fn_t collm_init_query
Component initialization function.
Definition: coll.h:290
mca_coll_base_module_enable_1_1_0_fn_t coll_module_enable
Enable function called when a collective module is (possibly) going to be used for the given communic...
Definition: coll.h:322
Definition: evdns.c:158
Checkpoint and Restart Service (CRS) Interface.
struct mca_coll_base_module_2_0_0_t *(* mca_coll_base_component_comm_query_2_0_0_fn_t)(struct ompi_communicator_t *comm, int *priority)
Query whether a component is available for the given communicator.
Definition: coll.h:143
Definition: communicator.h:118
Main top-level request struct definition.
Definition: request.h:100
Back-end type of MPI_Op.
Definition: op.h:100
Collectives communicator cache structure.
Definition: coll.h:367
opal_object_t super
Collective modules all inherit from opal_object.
Definition: coll.h:318
#define OBJ_CLASS_DECLARATION(NAME)
Declaration for class descriptor.
Definition: opal_object.h:236