OpenMPI  0.1.1
coll_hierarch.h
1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 /*
3  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
4  * University Research and Technology
5  * Corporation. All rights reserved.
6  * Copyright (c) 2004-2007 The University of Tennessee and The University
7  * of Tennessee Research Foundation. All rights
8  * reserved.
9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10  * University of Stuttgart. All rights reserved.
11  * Copyright (c) 2004-2005 The Regents of the University of California.
12  * All rights reserved.
13  * Copyright (c) 2007-2008 University of Houston. All rights reserved.
14  * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
15  * $COPYRIGHT$
16  *
17  * Additional copyrights may follow
18  *
19  * $HEADER$
20  */
21 
22 #ifndef MCA_COLL_HIERARCH_EXPORT_H
23 #define MCA_COLL_HIERARCH_EXPORT_H
24 
25 #define ALL_LEVELS 0
26 #define TWO_LEVELS 2
27 
28 #include "ompi_config.h"
29 #include "ompi/constants.h"
30 
31 #include "mpi.h"
33 #include "opal/mca/mca.h"
34 #include "ompi/mca/coll/coll.h"
35 #include "ompi/request/request.h"
36 #include "ompi/communicator/communicator.h"
37 
38 BEGIN_C_DECLS
39 
40 /*
41  * Globally exported variable
42  */
43 
44 OMPI_MODULE_DECLSPEC extern const mca_coll_base_component_2_0_0_t mca_coll_hierarch_component;
45 
46 extern int mca_coll_hierarch_priority_param;
47 extern int mca_coll_hierarch_verbose_param;
48 extern int mca_coll_hierarch_use_rdma_param;
49 extern int mca_coll_hierarch_ignore_sm_param;
50 extern int mca_coll_hierarch_detection_alg_param;
51 extern int mca_coll_hierarch_bcast_alg_param;
52 extern int mca_coll_hierarch_segsize_param;
53 
54 
55 #define COLL_HIERARCH_SEG_BCAST_ALG 0
56 #define COLL_HIERARCH_SEG1_BCAST_ALG 1
57 #define COLL_HIERARCH_SEG2_BCAST_ALG 2
58 #define COLL_HIERARCH_SEG3_BCAST_ALG 3
59 #define COLL_HIERARCH_BASIC_BCAST_ALG 4
60 
61 
62 
63 #define HIER_DEFAULT_NUM_LLEAD 5
64 /*
65  * Data structure for attaching data to the communicator
66  */
67 
68 /* Clarifying some terminology:
69  * comm: the input communicator, consisting of several lower level communicators.
70  * lcomm: low level communicator, often refered to as subcommunicator
71  * lleader: local leader, a dedicated process of each low level communicator
72  ATTENTION: an lleader might be the 'head' of a low level
73  communicator of size one!
74  * llcomm: local leader communicator, grouping all local leaders of a comm.
75 */
76 
79 
80  struct ompi_communicator_t *hier_comm; /* link back to the attached comm */
81  struct ompi_communicator_t *hier_lcomm; /* low level communicator */
82  opal_pointer_array_t hier_llead; /* local leader communicator structure */
83  int hier_num_lleaders; /* number of local leaders */
84  int hier_level; /* level in the hierarchy. For debugging*/
85  int hier_num_reqs; /* num. of requests */
86  ompi_request_t **hier_reqs; /* list of requests */
87  int hier_num_colorarr; /* size of the colorarr array */
88  int *hier_llr; /* color array compacted (1 entry per color).
89  Array of size hier_num_lleaders */
90  int *hier_max_offset; /* Number of processes for each color.
91  Array of size hier_num_lleaders */
92  int *hier_colorarr; /* array containing the color of all procs */
93 };
96 
98  struct ompi_communicator_t *llcomm; /* local leader communicator */
99  int *lleaders; /* list of local leaders, ranks in comm */
100  int my_lleader; /* rank of my lleader in lcomm */
101  int am_lleader; /* am I an lleader? */
102  int offset; /* Offset used for this llcomm */
103 };
104 
106 
107 
108 static inline int mca_coll_hierarch_count_lleaders ( int size, int *carr)
109 {
110  /*
111  * Determine the number of local leaders. Please note, that any process
112  * with color = MPI_UNDEFINED will be counted as the head of a group of its own.
113  * Please note furthermore, that every process with color=MPI_UNDEFINED will be
114  * stored in this array on its own...
115  */
116  int cnt, i, j, found;
117  int *llr=NULL;
118 
119  llr = (int *) malloc ( size * sizeof(int));
120  if (NULL == llr ){
121  return OMPI_ERR_OUT_OF_RESOURCE;
122  }
123 
124  llr[0] = carr[0];
125  for (cnt=1, i=1; i<size; i++ ) {
126  if ( carr[i] == MPI_UNDEFINED ) {
127  llr[cnt++] = carr[i];
128  continue;
129  }
130  for ( found=0, j=0; j<cnt; j++ ) {
131  if ( carr[i] == llr[j] ) {
132  found = 1;
133  break;
134  }
135  }
136  if ( !found ) {
137  llr[cnt++] = carr[i];
138  }
139  }
140 
141  free (llr);
142  return cnt;
143 }
144 
145 static inline int mca_coll_hierarch_get_offset ( int rank, int size, int *carr)
146 {
147  int offset, i, color = carr[rank];
148 
149  if ( color == MPI_UNDEFINED ) {
150  /* always */
151  return 1;
152  }
153 
154  for ( offset=0, i=0; i<=rank; i++) {
155  if ( carr[i] == color ) {
156  offset++;
157  }
158  }
159 
160  return offset;
161 }
162 
163 
164 
165 /* This function determine the parameters required in hierarchical
166  * collective operations. It is called from the collective operations themselves.
167  *
168  * @param root (input): rank of the root process in comm
169  * @param hierarch_module (input): module structure. Contains
170  * all relevant, precomputed data for this set of collectives.
171  *
172  * @param llroot (output): rank of the root process in llcomm, MPI_UNDEFINED for all
173  * processes not being part of the local leader communicator.
174  * @param lroot (output): rank of the local leader in the low level communicator,
175  * or MPI_UNDEFINED if there is no low level communicator.
176  * return value: llcomm (local leader communicator) or MPI_COMM_NULL for
177  * all processes not being part of the local leader communicator.
178  */
179 
180 struct ompi_communicator_t* mca_coll_hierarch_get_llcomm (int rroot,
181  mca_coll_hierarch_module_t *hierarch_module,
182  int* llroot,
183  int* lleader);
184 
185 /* This function is supposed to set up all elements of the mca_coll_base_comm_t
186  * structure, including:
187  * hierarch_module->hier_num_lleaders: determine number of local leaders in the comms
188  * hierarch_module->hier_llr: array of size hier_num_lleaders containing the colors
189  * hierarch_module->hier_max_offset: array containing the counter for each color how often
190  * it appears in the colorarr array.
191  */
192 
193 int mca_coll_hierarch_get_llr ( mca_coll_hierarch_module_t *hierarch_module );
194 
195 
196 /* This function is supposed to set all elements of the llead structure based on the
197  * offset and the rank of the process.
198  *
199  * @param rank(input): rank of the calling process in comm
200  * @param hierarch_module(input): structure of the hierarchical module. Contains
201  * all relevant, precomputed data for this set of collectives.
202  * @param llead(output): ptr to the mca_coll_hierarch_llead_t element which should
203  * be set
204  * @param offset(input): offset which shall be used.
205  */
206 
207 int mca_coll_hierarch_get_all_lleaders ( int rank, mca_coll_hierarch_module_t *hierarch_module,
208  struct mca_coll_hierarch_llead_t *llead,
209  int offset );
210 
211 
212 
213 /*
214  * coll API functions
215  */
216 int mca_coll_hierarch_init_query(bool allow_hierarch_user_threads,
217  bool have_hidden_threads);
219 mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority );
220 
221 
222 int mca_coll_hierarch_module_enable( mca_coll_base_module_t *module,
223  struct ompi_communicator_t *comm);
224 
225 int mca_coll_hierarch_module_finalize(struct ompi_communicator_t *comm);
226 
227 int mca_coll_hierarch_allgather_intra(void *sbuf, int scount,
228  struct ompi_datatype_t *sdtype,
229  void *rbuf, int rcount,
230  struct ompi_datatype_t *rdtype,
231  struct ompi_communicator_t *comm,
232  mca_coll_base_module_t *module );
233 int mca_coll_hierarch_allgatherv_intra(void *sbuf, int scount,
234  struct ompi_datatype_t *sdtype,
235  void * rbuf, int *rcounts,
236  int *disps,
237  struct ompi_datatype_t *rdtype,
238  struct ompi_communicator_t *comm,
239  mca_coll_base_module_t *module);
240 int mca_coll_hierarch_allreduce_intra(void *sbuf, void *rbuf, int count,
241  struct ompi_datatype_t *dtype,
242  struct ompi_op_t *op,
243  struct ompi_communicator_t *comm,
244  mca_coll_base_module_t *module);
245 int mca_coll_hierarch_alltoall_intra(void *sbuf, int scount,
246  struct ompi_datatype_t *sdtype,
247  void* rbuf, int rcount,
248  struct ompi_datatype_t *rdtype,
249  struct ompi_communicator_t *comm,
250  mca_coll_base_module_t *module);
251 int mca_coll_hierarch_alltoallv_intra(void *sbuf, int *scounts,
252  int *sdisps,
253  struct ompi_datatype_t *sdtype,
254  void *rbuf, int *rcounts,
255  int *rdisps,
256  struct ompi_datatype_t *rdtype,
257  struct ompi_communicator_t *comm,
258  mca_coll_base_module_t *module);
259 int mca_coll_hierarch_alltoallw_intra(void *sbuf, int *scounts,
260  int *sdisps,
261  struct ompi_datatype_t **sdtypes,
262  void *rbuf, int *rcounts,
263  int *rdisps,
264  struct ompi_datatype_t **rdtypes,
265  struct ompi_communicator_t *comm,
266  mca_coll_base_module_t *module);
267 int mca_coll_hierarch_barrier_intra(struct ompi_communicator_t *comm,
268  mca_coll_base_module_t *module);
269 int mca_coll_hierarch_bcast_intra(void *buff, int count,
270  struct ompi_datatype_t *datatype,
271  int root,
272  struct ompi_communicator_t *comm,
273  mca_coll_base_module_t *module);
274 int mca_coll_hierarch_exscan_intra(void *sbuf, void *rbuf, int count,
275  struct ompi_datatype_t *dtype,
276  struct ompi_op_t *op,
277  struct ompi_communicator_t *comm);
278 int mca_coll_hierarch_gather_intra(void *sbuf, int scount,
279  struct ompi_datatype_t *sdtype,
280  void *rbuf, int rcount,
281  struct ompi_datatype_t *rdtype,
282  int root,
283  struct ompi_communicator_t *comm,
284  mca_coll_base_module_t *module);
285 int mca_coll_hierarch_gatherv_intra(void *sbuf, int scount,
286  struct ompi_datatype_t *sdtype,
287  void *rbuf, int *rcounts, int *disps,
288  struct ompi_datatype_t *rdtype,
289  int root,
290  struct ompi_communicator_t *comm,
291  mca_coll_base_module_t *module);
292 int mca_coll_hierarch_reduce_intra(void *sbuf, void* rbuf, int count,
293  struct ompi_datatype_t *dtype,
294  struct ompi_op_t *op,
295  int root,
296  struct ompi_communicator_t *comm,
297  mca_coll_base_module_t *module);
298 int mca_coll_hierarch_reduce_scatter_intra(void *sbuf, void *rbuf,
299  int *rcounts,
300  struct ompi_datatype_t *dtype,
301  struct ompi_op_t *op,
302  struct ompi_communicator_t *comm,
303  mca_coll_base_module_t *module);
304 int mca_coll_hierarch_scan_intra(void *sbuf, void *rbuf, int count,
305  struct ompi_datatype_t *dtype,
306  struct ompi_op_t *op,
307  struct ompi_communicator_t *comm,
308  mca_coll_base_module_t *module);
309 int mca_coll_hierarch_scatter_intra(void *sbuf, int scount,
310  struct ompi_datatype_t *sdtype, void *rbuf,
311  int rcount, struct ompi_datatype_t *rdtype,
312  int root, struct ompi_communicator_t *comm,
313  mca_coll_base_module_t *module);
314 int mca_coll_hierarch_scatterv_intra(void *sbuf, int *scounts, int *disps,
315  struct ompi_datatype_t *sdtype,
316  void* rbuf, int rcount,
317  struct ompi_datatype_t *rdtype, int root,
318  struct ompi_communicator_t *comm,
319  mca_coll_base_module_t *module);
320 
321 /*
322  * These are trivial implementations of these routines used during comm_query/init,
323  * since we cannot access any other collectives
324  */
325 int mca_coll_hierarch_allgather_tmp(void *sbuf, int scount,
326  struct ompi_datatype_t *sdtype,
327  void *rbuf, int rcount,
328  struct ompi_datatype_t *rdtype,
329  struct ompi_communicator_t *comm);
330 int mca_coll_hierarch_allreduce_tmp(void *sbuf, void *rbuf, int count,
331  struct ompi_datatype_t *dtype,
332  struct ompi_op_t *op,
333  struct ompi_communicator_t *comm);
334 int mca_coll_hierarch_bcast_tmp ( void *buf, int count, struct ompi_datatype_t *dtype,
335  int root, struct ompi_communicator_t *comm);
336 
337 int mca_coll_hierarch_gather_tmp(void *sbuf, int scount,
338  struct ompi_datatype_t *sdtype,
339  void *rbuf, int rcount,
340  struct ompi_datatype_t *rdtype,
341  int root, struct ompi_communicator_t *comm);
342 int mca_coll_hierarch_reduce_tmp(void *sbuf, void *rbuf, int count,
343  struct ompi_datatype_t *dtype,
344  struct ompi_op_t *op,
345  int root, struct ompi_communicator_t *comm);
346 
347 int mca_coll_hierarch_ft_event(int status);
348 
349 END_C_DECLS
350 
351 #endif /* MCA_COLL_HIERARCH_EXPORT_H */
dynamic pointer array
Definition: opal_pointer_array.h:45
Collective module interface.
Definition: coll.h:316
Definition: ompi_datatype.h:68
Collective component interface.
Definition: coll.h:283
See opal_bitmap.h for an explanation of why there is a split between OPAL and ORTE for this generic c...
Top-level interface for all MCA components.
Collective Communication Interface.
Top-level description of requests.
Definition: coll_hierarch.h:97
Definition: coll_hierarch.h:77
Definition: communicator.h:118
Main top-level request struct definition.
Definition: request.h:100
Back-end type of MPI_Op.
Definition: op.h:100
#define OBJ_CLASS_DECLARATION(NAME)
Declaration for class descriptor.
Definition: opal_object.h:236