OpenMPI  0.1.1
coll_ftbasic_agreement.h
1 /*
2  * Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved.
3  *
4  * $COPYRIGHT$
5  *
6  * Additional copyrights may follow
7  *
8  * $HEADER$
9  */
10 #ifndef MCA_COLL_FTBASIC_AGREEMENT_EXPORT_H
11 #define MCA_COLL_FTBASIC_AGREEMENT_EXPORT_H
12 
13 #include "coll_ftbasic.h"
14 
15 BEGIN_C_DECLS
16 
17 /* Match this with opal_bitmap.c */
18 #define SIZE_OF_CHAR ((int) (sizeof(char) * 8))
19 
20 /* Extra bits added to bitmap
21  * +1 : COMMIT / ABORT Bit
22  * +SIZE_OF_CHAR : flag (as a separate segment in bitmap)
23  */
24 #define FTBASIC_AGREEMENT_EXTRA_BITS (1 + SIZE_OF_CHAR)
25 
26 #define FTBASIC_AGREEMENT_FLAG_BIT_LOC(bm) ( ( (bm->array_size - 1)*SIZE_OF_CHAR + 1) )
27 
28  /*
29  * Base supporting functions
30  */
31  extern int mca_coll_ftbasic_agreement_num_active_nonblocking;
32  extern int mca_coll_ftbasic_agreement_help_num_asking;
33  extern int mca_coll_ftbasic_agreement_help_wait_cycles;
34 
35  int mca_coll_ftbasic_agreement_init(mca_coll_ftbasic_module_t *module);
36  int mca_coll_ftbasic_agreement_finalize(mca_coll_ftbasic_module_t *module);
37 
38  int mca_coll_ftbasic_agreement_base_setup_blocking(ompi_communicator_t* comm,
39  ompi_group_t **group,
40  int *flag,
41  opal_bitmap_t *local_bitmap,
42  mca_coll_ftbasic_module_t *ftbasic_module);
43  int mca_coll_ftbasic_agreement_base_setup_nonblocking(ompi_communicator_t* comm,
44  ompi_group_t **group,
45  int *flag,
46  opal_bitmap_t *local_bitmap,
47  mca_coll_ftbasic_module_t *ftbasic_module,
49 
50  int mca_coll_ftbasic_agreement_base_finish_blocking(ompi_communicator_t* comm,
51  ompi_group_t **group,
52  int *flag,
53  opal_bitmap_t *local_bitmap,
54  mca_coll_ftbasic_module_t *ftbasic_module);
55 
56  int mca_coll_ftbasic_agreement_base_finish_nonblocking(ompi_communicator_t* comm,
57  ompi_group_t **group,
58  int *flag,
59  opal_bitmap_t *local_bitmap,
60  mca_coll_ftbasic_module_t *ftbasic_module);
61 
62  int mca_coll_ftbasic_agreement_base_term_request_help(ompi_communicator_t* comm,
63  mca_coll_ftbasic_module_t *ftbasic_module);
64 
65 
66  /*
67  * Initialize the local bitmap from locally known failure set
68  */
69  int agreement_init_local_bitmap(ompi_communicator_t* comm, opal_bitmap_t *local_bitmap);
70 
71  /*
72  * Allreduce specific
73  */
74  int mca_coll_ftbasic_agreement_allreduce_init(mca_coll_ftbasic_module_t *module);
75  int mca_coll_ftbasic_agreement_allreduce_finalize(mca_coll_ftbasic_module_t *module);
76 
77  /*
78  * Two phase specific
79  */
80  int mca_coll_ftbasic_agreement_two_phase_init(mca_coll_ftbasic_module_t *module);
81  int mca_coll_ftbasic_agreement_two_phase_finalize(mca_coll_ftbasic_module_t *module);
82 
83  extern int mca_coll_ftbasic_agreement_two_phase_progress(void);
84  extern int mca_coll_ftbasic_agreement_two_phase_term_progress(void);
85 
86  /*
87  * Log two phase specific
88  */
89  int mca_coll_ftbasic_agreement_log_two_phase_init(mca_coll_ftbasic_module_t *module);
90  int mca_coll_ftbasic_agreement_log_two_phase_finalize(mca_coll_ftbasic_module_t *module);
91 
92  extern int mca_coll_ftbasic_agreement_log_two_phase_progress(void);
93  extern int mca_coll_ftbasic_agreement_log_two_phase_term_progress(void);
94  int mca_coll_ftbasic_agreement_log_two_phase_refresh_tree(opal_bitmap_t *local_bitmap,
95  ompi_communicator_t* comm,
96  mca_coll_ftbasic_module_t *ftbasic_module);
97 
98  /*
99  * Log entry structure
100  */
101 typedef enum {
102  /*
103  * Protocol has not started yet
104  */
105  AGREEMENT_STATE_NONE = 0,
106  /*
107  * Vate has been requested of the children
108  */
109  AGREEMENT_STATE_VOTE_REQ = 1,
110  /*
111  * Vate has been sent to the coordinator
112  */
113  AGREEMENT_STATE_VOTED = 2,
114  /*
115  * Previously voted, but not yet commit/abort.
116  * A peer has asked for the result indicating that the coordinator
117  * has failed, but I may or may not know it yet. So I need to
118  * ignore any message from the coordinator that may be buffered
119  * and decide with the remaining group.
120  */
121  AGREEMENT_STATE_UNCERTAIN = 3,
122  /*
123  * Decided Commit from the coordinator
124  */
125  AGREEMENT_STATE_COMMIT = 4,
126  /*
127  * Decided Commit from the coordinator (finished bcast)
128  */
129  AGREEMENT_STATE_COMMITTED = 5,
130  /*
131  * Decided Abort
132  */
133  AGREEMENT_STATE_ABORT = 6,
134  /*
135  * Decided Abort (finished bcast)
136  */
137  AGREEMENT_STATE_ABORTED = 7
138 } agreement_state_t;
139 
140 #define AGREEMENT_STATE_STR(loc_state) \
141  (AGREEMENT_STATE_COMMIT == loc_state ? "Commit" : \
142  (AGREEMENT_STATE_COMMITTED == loc_state ? "Committed" : \
143  (AGREEMENT_STATE_ABORT == loc_state ? "Abort" : \
144  (AGREEMENT_STATE_ABORTED == loc_state ? "Aborted" : \
145  (AGREEMENT_STATE_UNCERTAIN == loc_state ? "Uncertian" : \
146  (AGREEMENT_STATE_VOTED == loc_state ? "Voted" : \
147  (AGREEMENT_STATE_VOTE_REQ == loc_state ? "Voted Req." : \
148  (AGREEMENT_STATE_NONE == loc_state ? "None" : "Unknown") \
149  ))))))) \
150 
152  /** This is a list object */
154 
155  /** Agreement Seq Number */
156  int seq_num;
157 
158  /** State of the protocol */
159  agreement_state_t state;
160 
161  /** Committed bitmap */
163 
164  /** Attempt number - For debugging only */
166 };
169 
171  mca_coll_ftbasic_agreement_log_entry_find(ompi_communicator_t* comm,
172  int seq_num,
173  bool create,
174  mca_coll_ftbasic_module_t *ftbasic_module);
175 
176 
177  /*
178  * Remote bitmap list item
179  * Must be declared here so we can setup the free list
180  */
182  /** This is a list object */
184 
185  /* Rank of the process */
186  int rank;
187 
188  /* Bitmap associated */
189  opal_bitmap_t *bitmap;
190 };
193 
194 extern ompi_free_list_t mca_coll_ftbasic_remote_bitmap_free_list;
195 extern int mca_coll_ftbasic_remote_bitmap_num_modules;
196 
197  /*
198  * Remote bitmap free list accessor macros
199  */
200 #define REMOTE_BITMAP_ALLOC(rbm, size) \
201  do { \
202  ompi_free_list_item_t* item; \
203  int rc; \
204  \
205  OMPI_FREE_LIST_GET(&mca_coll_ftbasic_remote_bitmap_free_list, \
206  item, rc); \
207  rbm = (mca_coll_ftbasic_remote_bitmap_t*)item; \
208  opal_bitmap_init(rbm->bitmap, size + FTBASIC_AGREEMENT_EXTRA_BITS); \
209  } while(0)
210 
211 #define REMOTE_BITMAP_RETURN(rbm) \
212  do { \
213  OMPI_FREE_LIST_RETURN(&mca_coll_ftbasic_remote_bitmap_free_list, \
214  (ompi_free_list_item_t*)rbm); \
215  } while(0)
216 
217 #define REMOTE_BITMAP_GET_NEXT(ftmodule, rbm, size) \
218  do { \
219  /* Otherwise return the 'next' value */ \
220  if( 0 < opal_list_get_size(&(ftmodule->agreement_info->remote_bitmaps)) ) { \
221  if( NULL == ftmodule->agreement_info->last_used ) { \
222  ftmodule->agreement_info->last_used = opal_list_get_first(&(ftmodule->agreement_info->remote_bitmaps)); \
223  } \
224  else if( ftmodule->agreement_info->last_used != opal_list_get_last(&(ftmodule->agreement_info->remote_bitmaps)) ) { \
225  ftmodule->agreement_info->last_used = opal_list_get_next(ftmodule->agreement_info->last_used); \
226  } \
227  else { \
228  REMOTE_BITMAP_ALLOC(rbm, size); \
229  opal_list_append(&(ftmodule->agreement_info->remote_bitmaps), \
230  (opal_list_item_t*)rbm); \
231  ftmodule->agreement_info->last_used = opal_list_get_last(&(ftmodule->agreement_info->remote_bitmaps)); \
232  } \
233  rbm = (mca_coll_ftbasic_remote_bitmap_t*)ftmodule->agreement_info->last_used; \
234  } \
235  /* If the list is empty, then allocate one */ \
236  else { \
237  REMOTE_BITMAP_ALLOC(rbm, size); \
238  opal_list_append(&(ftmodule->agreement_info->remote_bitmaps), \
239  (opal_list_item_t*)rbm); \
240  ftmodule->agreement_info->last_used = opal_list_get_last(&(ftmodule->agreement_info->remote_bitmaps)); \
241  } \
242  } while(0)
243 
244 #define REMOTE_BITMAP_RESET_NEXT(ftmodule) \
245  if( ftmodule->agreement_info ) { \
246  opal_list_item_t* item = NULL; \
247  mca_coll_ftbasic_remote_bitmap_t *rbm = NULL; \
248  \
249  for( item = opal_list_get_first(&(ftmodule->agreement_info->remote_bitmaps)); \
250  item != opal_list_get_end(&(ftmodule->agreement_info->remote_bitmaps)); \
251  item = opal_list_get_next(item) ) { \
252  rbm = (mca_coll_ftbasic_remote_bitmap_t*)item; \
253  rbm->rank = -1; \
254  } \
255  ftmodule->agreement_info->last_used = NULL; \
256  }
257 
258 
259  /*
260  * Agreement Specific Request Object
261  * Used by the nonblocking agreement operation
262  */
264  /* Base request */
265  ompi_request_t req_ompi;
266 
267  /* Source */
268  int mpi_source;
269  /* Error Code */
270  int mpi_error;
271  /* If free was called on this request */
272  bool free_called;
273 
274  /* Local bitmap to start with */
275  opal_bitmap_t *local_bitmap;
276  /* Pointer to group */
277  ompi_group_t *group;
278  /* Point to flag passed by user */
279  int *flag;
280 
281  /* Coordinator in the collective */
282  int coordinator;
283  /* Stage in the collective */
284  int stage;
285  /* Pointer to the log entry */
286  opal_list_item_t *log_entry;
287  /* Current number of outstanding requests */
288  int num_reqs;
289 };
292 
293  /*
294  * Agreement Request Support macros
295  */
296 #define MCA_COLL_FTBASIC_REQUEST_ALLOC(collreq, comm) \
297  { \
298  collreq = OBJ_NEW(mca_coll_ftbasic_request_t); \
299  \
300  OMPI_REQUEST_INIT(&(collreq)->req_ompi, false); \
301  collreq->req_ompi.req_mpi_object.comm = comm; \
302  collreq->req_ompi.req_complete = false; \
303  collreq->req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
304  }
305 
306 #define MCA_COLL_FTBASIC_REQUEST_FREE(collreq) \
307  { \
308  OMPI_REQUEST_FINI(collreq->req_ompi); \
309  }
310 
311 #define MCA_COLL_FTBASIC_REQUEST_COMPLETE(collreq) \
312  { \
313  OPAL_THREAD_LOCK(&ompi_request_lock); \
314  if( false == collreq->req_ompi.req_complete ) { \
315  collreq->req_ompi.req_status.MPI_SOURCE = collreq->mpi_source; \
316  collreq->req_ompi.req_status.MPI_TAG = -1; \
317  collreq->req_ompi.req_status.MPI_ERROR = collreq->mpi_error; \
318  collreq->req_ompi.req_status._ucount = 0; \
319  ompi_request_complete(&(collreq)->req_ompi, true); \
320  } \
321  OPAL_THREAD_UNLOCK(&ompi_request_lock); \
322  }
323 
324  /*
325  * Support functions for process state checking
326  */
327  /*
328  * Check the true state of the process (not what is cached on the comm)
329  * If failed then continue in the loop.
330  */
331 #define COLL_FTBASIC_CHECK_SKIP_ALL_FAILED_BASE(comm, peer) { \
332  if( !ompi_comm_is_proc_active(comm, peer, false) ) { \
333  continue; \
334  } \
335  }
336 
337 
338  /*
339  * Check the true state of the process (not what is cached on the comm)
340  * Find the lowest known alive peer - leader election
341  */
342 #define COLL_FTBASIC_FIND_LOWEST_ALIVE_BASE(comm, size, peer) { \
343  int i; \
344  for(i = 0; i < size; ++i ) { \
345  if( ompi_comm_is_proc_active(comm, i, false) ) { \
346  peer = i; \
347  break; \
348  } \
349  } \
350  }
351 
352 
353 /*
354  * Agreement timing support
355  */
356 #define AGREEMENT_ENABLE_TIMING OPAL_ENABLE_DEBUG
357 
358 #if AGREEMENT_ENABLE_TIMING == 1
359 #define COLL_FTBASIC_AGREEMENT_TIMER_2P_SETUP 0
360 #define COLL_FTBASIC_AGREEMENT_TIMER_2P_BCAST_REQ 1
361 #define COLL_FTBASIC_AGREEMENT_TIMER_2P_GATHER 2
362 #define COLL_FTBASIC_AGREEMENT_TIMER_2P_DECIDE 3
363 #define COLL_FTBASIC_AGREEMENT_TIMER_2P_BCAST 4
364 #define COLL_FTBASIC_AGREEMENT_TIMER_AR_SETUP 5
365 #define COLL_FTBASIC_AGREEMENT_TIMER_AR_GATHER 6
366 #define COLL_FTBASIC_AGREEMENT_TIMER_AR_DECIDE 7
367 #define COLL_FTBASIC_AGREEMENT_TIMER_AR_BCAST 8
368 #define COLL_FTBASIC_AGREEMENT_TIMER_PROTOCOL 9
369 #define COLL_FTBASIC_AGREEMENT_TIMER_DECIDE 10
370 #define COLL_FTBASIC_AGREEMENT_TIMER_REBALANCE 11
371 #define COLL_FTBASIC_AGREEMENT_TIMER_TOTAL 12
372 #define COLL_FTBASIC_AGREEMENT_TIMER_OTHER 13
373 #define COLL_FTBASIC_AGREEMENT_TIMER_MAX 14
374 
375 double agreement_get_time(void);
376 void agreement_start_time(int idx);
377 void agreement_end_time(int idx);
378 void agreement_display_all_timers(void);
379 void agreement_clear_timers(void);
380 
381 #define AGREEMENT_START_TIMER(idx) \
382  { \
383  if(OPAL_UNLIKELY( mca_coll_ftbasic_use_agreement_timer )) { \
384  agreement_start_time(idx); \
385  } \
386  }
387 
388 #define AGREEMENT_END_TIMER(idx) \
389  { \
390  if(OPAL_UNLIKELY( mca_coll_ftbasic_use_agreement_timer )) { \
391  agreement_end_time(idx); \
392  } \
393  }
394 
395 #define AGREEMENT_CLEAR_ALL_TIMERS() \
396  { \
397  if(OPAL_UNLIKELY( mca_coll_ftbasic_use_agreement_timer )) { \
398  agreement_clear_timers(); \
399  } \
400  }
401 
402 #define AGREEMENT_DISPLAY_ALL_TIMERS() \
403  { \
404  if(OPAL_UNLIKELY( mca_coll_ftbasic_use_agreement_timer )) { \
405  agreement_display_all_timers(); \
406  } \
407  }
408 
409 #else
410 #define AGREEMENT_START_TIMER(idx) ;
411 #define AGREEMENT_END_TIMER(idx) ;
412 #define AGREEMENT_CLEAR_ALL_TIMERS() ;
413 #define AGREEMENT_DISPLAY_ALL_TIMERS() ;
414 #endif /* AGREEMENT_ENABLE_TIMING */
415 
416 END_C_DECLS
417 
418 #endif /* MCA_COLL_FTBASIC_AGREEMENT_EXPORT_H */
Definition: coll_ftbasic_agreement.h:181
Definition: opal_bitmap.h:53
Definition: coll_ftbasic_agreement.h:263
int seq_num
Agreement Seq Number.
Definition: coll_ftbasic_agreement.h:156
Definition: opal_list.h:98
Definition: coll_ftbasic.h:109
Definition: ompi_free_list.h:39
Group structure Currently we have four formats for storing the process pointers that are members of t...
Definition: group.h:79
Definition: ompi_free_list.h:62
agreement_state_t state
State of the protocol.
Definition: coll_ftbasic_agreement.h:159
opal_list_item_t super
This is a list object.
Definition: coll_ftbasic_agreement.h:153
ompi_free_list_item_t super
This is a list object.
Definition: coll_ftbasic_agreement.h:183
Definition: evdns.c:158
int attempt_num
Attempt number - For debugging only.
Definition: coll_ftbasic_agreement.h:165
Definition: communicator.h:118
Main top-level request struct definition.
Definition: request.h:100
Definition: coll_ftbasic_agreement.h:151
#define OBJ_CLASS_DECLARATION(NAME)
Declaration for class descriptor.
Definition: opal_object.h:236
opal_bitmap_t * commit_bitmap
Committed bitmap.
Definition: coll_ftbasic_agreement.h:162