OpenMPI  0.1.1
request.h
Go to the documentation of this file.
1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 /*
3  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4  * University Research and Technology
5  * Corporation. All rights reserved.
6  * Copyright (c) 2004-2007 The University of Tennessee and The University
7  * of Tennessee Research Foundation. All rights
8  * reserved.
9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10  * University of Stuttgart. All rights reserved.
11  * Copyright (c) 2004-2005 The Regents of the University of California.
12  * All rights reserved.
13  * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
14  * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
15  * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
16  * $COPYRIGHT$
17  *
18  * Additional copyrights may follow
19  *
20  * $HEADER$
21  */
22 /**
23  * @file
24  *
25  * Top-level description of requests
26  */
27 
28 #ifndef OMPI_REQUEST_H
29 #define OMPI_REQUEST_H
30 
31 #include "ompi_config.h"
32 #include "mpi.h"
33 #include "ompi/class/ompi_free_list.h"
35 #include "opal/threads/condition.h"
36 #include "ompi/constants.h"
37 #include "ompi/runtime/params.h"
38 
39 BEGIN_C_DECLS
40 
41 /**
42  * Request class
43  */
45 
46 /*
47  * The following include pulls in shared typedefs with debugger plugins.
48  * For more information on why we do this see the Notice to developers
49  * comment at the top of the ompi_msgq_dll.c file.
50  */
51 
52 #include "request_dbg.h"
53 
54 struct ompi_request_t;
55 
56 /*
57  * Required function to free the request and any associated resources.
58  */
59 typedef int (*ompi_request_free_fn_t)(struct ompi_request_t** rptr);
60 
61 /*
62  * Optional function to cancel a pending request.
63  */
64 typedef int (*ompi_request_cancel_fn_t)(struct ompi_request_t* request, int flag);
65 
66 /*
67  * Optional function called when the request is completed from the MPI
68  * library perspective. This function is not allowed to release any
69  * ressources related to the request.
70  */
71 typedef int (*ompi_request_complete_fn_t)(struct ompi_request_t* request);
72 
73 /**
74  * Forward declaration
75  */
76 struct ompi_communicator_t;
77 
78 /**
79  * Forward declaration
80  */
81 struct ompi_win_t;
82 
83 /**
84  * Forward declaration
85  */
86 struct ompi_file_t;
87 
88 /**
89  * Union for holding several different MPI pointer types on the request
90  */
91 typedef union ompi_mpi_object_t {
92  struct ompi_communicator_t *comm;
93  struct ompi_file_t *file;
94  struct ompi_win_t *win;
96 
97 /**
98  * Main top-level request struct definition
99  */
101  ompi_free_list_item_t super; /**< Base type */
102  ompi_request_type_t req_type; /**< Enum indicating the type of the request */
103  ompi_status_public_t req_status; /**< Completion status */
104  volatile bool req_complete; /**< Flag indicating wether request has completed */
105  volatile ompi_request_state_t req_state; /**< enum indicate state of the request */
106  bool req_persistent; /**< flag indicating if the this is a persistent request */
107  int req_f_to_c_index; /**< Index in Fortran <-> C translation array */
108  ompi_request_free_fn_t req_free; /**< Called by free */
109  ompi_request_cancel_fn_t req_cancel; /**< Optional function to cancel the request */
110  ompi_request_complete_fn_t req_complete_cb; /**< Called when the request is MPI completed */
111  void *req_complete_cb_data;
112  ompi_mpi_object_t req_mpi_object; /**< Pointer to MPI object that created this request */
113  /* FT Functionality uses the (req_peer) to return the peer that caused a
114  * failure, and (req_tag) to identify which operations are collective in
115  * nature.
116  */
117  int req_peer; /**< Peer rank that this request is associated with */
118  int req_tag; /**< Tag associated with this request */
119  bool req_any_source_pending;
120 };
121 
122 /**
123  * Convenience typedef
124  */
126 
127 /**
128  * Padded struct to maintain back compatibiltiy.
129  * See ompi/communicator/communicator.h comments with struct ompi_communicator_t
130  * for full explanation why we chose the following padding construct for predefines.
131  */
132 #define PREDEFINED_REQUEST_PAD (sizeof(void*) * 32)
133 
135  struct ompi_request_t request;
136  char padding[PREDEFINED_REQUEST_PAD - sizeof(ompi_request_t)];
137 };
138 
140 
141 /**
142  * Initialize a request. This is a macro to avoid function call
143  * overhead, since this is typically invoked in the critical
144  * performance path (since requests may be re-used, it is possible
145  * that we will have to initialize a request multiple times).
146  */
147 #define OMPI_REQUEST_INIT(request, persistent) \
148  do { \
149  (request)->req_complete = false; \
150  (request)->req_state = OMPI_REQUEST_INACTIVE; \
151  (request)->req_persistent = (persistent); \
152  } while (0);
153 
154 /**
155  * Finalize a request. This is a macro to avoid function call
156  * overhead, since this is typically invoked in the critical
157  * performance path (since requests may be re-used, it is possible
158  * that we will have to finalize a request multiple times).
159  *
160  * When finalizing a request, if MPI_Request_f2c() was previously
161  * invoked on that request, then this request was added to the f2c
162  * table, and we need to remove it
163  *
164  * This function should be called only from the MPI layer. It should
165  * never be called from the PML. It take care of the upper level clean-up.
166  * When the user call MPI_Request_free we should release all MPI level
167  * ressources, so we have to call this function too.
168  */
169 #define OMPI_REQUEST_FINI(request) \
170 do { \
171  (request)->req_state = OMPI_REQUEST_INVALID; \
172  if (MPI_UNDEFINED != (request)->req_f_to_c_index) { \
173  opal_pointer_array_set_item(&ompi_request_f_to_c_table, \
174  (request)->req_f_to_c_index, NULL); \
175  (request)->req_f_to_c_index = MPI_UNDEFINED; \
176  } \
177 } while (0);
178 
179 /**
180  * Non-blocking test for request completion.
181  *
182  * @param request (IN) Array of requests
183  * @param complete (OUT) Flag indicating if index is valid (a request completed).
184  * @param status (OUT) Status of completed request.
185  * @return OMPI_SUCCESS or failure status.
186  *
187  * Note that upon completion, the request completed without error is freed, and the
188  * request handle at index set to NULL.
189  */
190 typedef int (*ompi_request_test_fn_t)(ompi_request_t ** rptr,
191  int *completed,
192  ompi_status_public_t * status );
193 /**
194  * Non-blocking test for request completion.
195  *
196  * @param count (IN) Number of requests
197  * @param request (IN) Array of requests
198  * @param index (OUT) Index of first completed request.
199  * @param complete (OUT) Flag indicating if index is valid (a request completed).
200  * @param status (OUT) Status of completed request.
201  * @return OMPI_SUCCESS or failure status.
202  *
203  * Note that upon completion, the request completed without error is freed, and the
204  * request handle at index set to NULL.
205  */
206 typedef int (*ompi_request_test_any_fn_t)(size_t count,
207  ompi_request_t ** requests,
208  int *index,
209  int *completed,
210  ompi_status_public_t * status);
211 /**
212  * Non-blocking test for request completion.
213  *
214  * @param count (IN) Number of requests
215  * @param requests (IN) Array of requests
216  * @param completed (OUT) Flag indicating wether all requests completed.
217  * @param statuses (OUT) Array of completion statuses.
218  * @return OMPI_SUCCESS or failure status.
219  *
220  * This routine returns completed==true if all requests completed without errors
221  * have completed. The statuses parameter is only updated if all requests completed.
222  * Likewise, the requests array is not modified (no requests freed), unless all
223  * requests have completed.
224  */
225 typedef int (*ompi_request_test_all_fn_t)(size_t count,
226  ompi_request_t ** requests,
227  int *completed,
228  ompi_status_public_t * statuses);
229 /**
230  * Non-blocking test for some of N requests to complete.
231  *
232  * @param count (IN) Number of requests
233  * @param requests (INOUT) Array of requests
234  * @param outcount (OUT) Number of finished requests
235  * @param indices (OUT) Indices of the finished requests
236  * @param statuses (OUT) Array of completion statuses.
237  * @return OMPI_SUCCESS, OMPI_ERR_IN_STATUS or failure status.
238  *
239  */
240 typedef int (*ompi_request_test_some_fn_t)(size_t count,
241  ompi_request_t ** requests,
242  int * outcount,
243  int * indices,
244  ompi_status_public_t * statuses);
245 /**
246  * Wait (blocking-mode) for one requests to complete. This function is slightly
247  * different from the MPI counter-part as it does not release the requests
248  * completed with error. Instead, the caller is responsible to call the
249  * ompi_request_free.
250  *
251  * @param request (IN) Pointer to request.
252  * @param status (OUT) Status of completed request.
253  * @return OMPI_SUCCESS or failure status.
254  *
255  */
256 typedef int (*ompi_request_wait_fn_t)(ompi_request_t ** req_ptr,
257  ompi_status_public_t * status);
258 /**
259  * Wait (blocking-mode) for one of N requests to complete. This function is
260  * slightly different from the MPI counter-part as it does not release the
261  * requests completed with error. Instead, the caller is responsible to call
262  * the ompi_request_free.
263  *
264  * @param count (IN) Number of requests
265  * @param requests (IN) Array of requests
266  * @param index (OUT) Index into request array of completed request.
267  * @param status (OUT) Status of completed request.
268  * @return OMPI_SUCCESS or failure status.
269  *
270  */
271 typedef int (*ompi_request_wait_any_fn_t)(size_t count,
272  ompi_request_t ** requests,
273  int *index,
274  ompi_status_public_t * status);
275 /**
276  * Wait (blocking-mode) for all of N requests to complete. This function is
277  * slightly different from the MPI counter-part as it does not release the
278  * requests completed with error. Instead, the caller is responsible to call
279  * the ompi_request_free.
280  *
281  * @param count (IN) Number of requests
282  * @param requests (IN) Array of requests
283  * @param statuses (OUT) Array of completion statuses.
284  * @return OMPI_SUCCESS or failure status.
285  *
286  */
287 typedef int (*ompi_request_wait_all_fn_t)(size_t count,
288  ompi_request_t ** requests,
289  ompi_status_public_t * statuses);
290 /**
291  * Wait (blocking-mode) for some of N requests to complete. This function is
292  * slightly different from the MPI counter-part as it does not release the
293  * requests completed with error. Instead, the caller is responsible to call
294  * the ompi_request_free.
295  *
296  * @param count (IN) Number of requests
297  * @param requests (INOUT) Array of requests
298  * @param outcount (OUT) Number of finished requests
299  * @param indices (OUT) Indices of the finished requests
300  * @param statuses (OUT) Array of completion statuses.
301  * @return OMPI_SUCCESS, OMPI_ERR_IN_STATUS or failure status.
302  *
303  */
304 typedef int (*ompi_request_wait_some_fn_t)(size_t count,
305  ompi_request_t ** requests,
306  int * outcount,
307  int * indices,
308  ompi_status_public_t * statuses);
309 
310 /**
311  * Replaceable request functions
312  */
313 typedef struct ompi_request_fns_t {
314  ompi_request_test_fn_t req_test;
315  ompi_request_test_any_fn_t req_test_any;
316  ompi_request_test_all_fn_t req_test_all;
317  ompi_request_test_some_fn_t req_test_some;
318  ompi_request_wait_fn_t req_wait;
319  ompi_request_wait_any_fn_t req_wait_any;
320  ompi_request_wait_all_fn_t req_wait_all;
321  ompi_request_wait_some_fn_t req_wait_some;
323 
324 /**
325  * Globals used for tracking requests and request completion.
326  */
328 OMPI_DECLSPEC extern size_t ompi_request_waiting;
329 OMPI_DECLSPEC extern size_t ompi_request_completed;
330 OMPI_DECLSPEC extern size_t ompi_request_failed;
331 OMPI_DECLSPEC extern int32_t ompi_request_poll;
332 OMPI_DECLSPEC extern opal_mutex_t ompi_request_lock;
333 OMPI_DECLSPEC extern opal_condition_t ompi_request_cond;
334 OMPI_DECLSPEC extern ompi_predefined_request_t ompi_request_null;
335 OMPI_DECLSPEC extern ompi_request_t ompi_request_empty;
336 OMPI_DECLSPEC extern ompi_status_public_t ompi_status_empty;
337 OMPI_DECLSPEC extern ompi_request_fns_t ompi_request_functions;
338 
339 /**
340  * Initialize the MPI_Request subsystem; invoked during MPI_INIT.
341  */
342 int ompi_request_init(void);
343 
344 /**
345  * Free a persistent request to a MPI_PROC_NULL peer (there's no
346  * freelist to put it back to, so we have to actually OBJ_RELEASE it).
347  */
348 OMPI_DECLSPEC int ompi_request_persistent_proc_null_free(ompi_request_t **request);
349 
350 /**
351  * Shut down the MPI_Request subsystem; invoked during MPI_FINALIZE.
352  */
353 int ompi_request_finalize(void);
354 
355 /**
356  * Cancel a pending request.
357  */
358 static inline int ompi_request_cancel(ompi_request_t* request)
359 {
360  if (request->req_cancel != NULL) {
361  return request->req_cancel(request, true);
362  }
363  return OMPI_SUCCESS;
364 }
365 
366 /**
367  * Free a request.
368  *
369  * @param request (INOUT) Pointer to request.
370  */
371 static inline int ompi_request_free(ompi_request_t** request)
372 {
373  return (*request)->req_free(request);
374 }
375 
376 #define ompi_request_test (ompi_request_functions.req_test)
377 #define ompi_request_test_any (ompi_request_functions.req_test_any)
378 #define ompi_request_test_all (ompi_request_functions.req_test_all)
379 #define ompi_request_test_some (ompi_request_functions.req_test_some)
380 #define ompi_request_wait (ompi_request_functions.req_wait)
381 #define ompi_request_wait_any (ompi_request_functions.req_wait_any)
382 #define ompi_request_wait_all (ompi_request_functions.req_wait_all)
383 #define ompi_request_wait_some (ompi_request_functions.req_wait_some)
384 
385 #if OPAL_ENABLE_FT_MPI
386 OMPI_DECLSPEC bool ompi_request_state_ok(ompi_request_t *req);
387 #endif /* OPAL_ENABLE_FT_MPI */
388 
389 
390 /**
391  * Signal or mark a request as complete. If with_signal is true this will
392  * wake any thread pending on the request and ompi_request_lock should be
393  * held while calling this function. If with_signal is false, there will
394  * signal generated, and no lock required. This is a special case when
395  * the function is called from the critical path for small messages, where
396  * we know the current execution flow created the request, and is still
397  * in the _START macro.
398  */
399 static inline int ompi_request_complete(ompi_request_t* request, bool with_signal)
400 {
401  ompi_request_complete_fn_t tmp = request->req_complete_cb;
402  if( NULL != tmp ) {
403  request->req_complete_cb = NULL;
404  tmp( request );
405  }
406  ompi_request_completed++;
407  request->req_complete = true;
408  if( OPAL_UNLIKELY(MPI_SUCCESS != request->req_status.MPI_ERROR) ) {
409  ompi_request_failed++;
410  }
411  if(with_signal && ompi_request_waiting) {
412  /* Broadcast the condition, otherwise if there is already a thread
413  * waiting on another request it can use all signals.
414  */
415  opal_condition_broadcast(&ompi_request_cond);
416  }
417  return OMPI_SUCCESS;
418 }
419 
420 /**
421  * Wait a particular request for completion
422  */
424 {
425  if(false == req->req_complete) {
426 #if OMPI_ENABLE_PROGRESS_THREADS
427  if(opal_progress_spin(&req->req_complete)) {
428  return;
429  }
430 #endif
431  OPAL_THREAD_LOCK(&ompi_request_lock);
432  ompi_request_waiting++;
433  while(false == req->req_complete) {
434 #if OPAL_ENABLE_FT_MPI
435  /*
436  * Check to make sure that process failure did not break the
437  * request.
438  */
439  if( ompi_ftmpi_enabled ) {
440  if( !ompi_request_state_ok(req) ) {
441  break;
442  }
443  }
444 #endif
445  opal_condition_wait(&ompi_request_cond, &ompi_request_lock);
446  }
447  ompi_request_waiting--;
448  OPAL_THREAD_UNLOCK(&ompi_request_lock);
449  }
450 }
451 
452 /* In a 64-bit library with strict alignment requirements (like 64-bit
453  * SPARC), the _ucount field of a C status is a long and requires 8
454  * byte alignment. Unfortunately a Fortran status is an array of 6
455  * integers which only requires 4 byte alignment. When storing the
456  * length into a status we don't know whether it is a C or Fortran
457  * status. Therefore, we just copy the entire status as an integer
458  * array to avoid any issues. We supply one macro for doing the entire
459  * status and another for just the _ucount field. Note that these
460  * macros are enabled on 64-bit SPARC platforms only. This is because
461  * an investigation into performance effects showed that keeping the
462  * structure assignment code wherever possible resulted in the best
463  * performance. Details of the investigation into this issue are at
464  * https://svn.open-mpi.org/trac/ompi/ticket/2526
465  */
466 #if defined(__sparc) && SIZEOF_SIZE_T == 8
467 #define OMPI_STATUS_SET(outstat, instat) \
468  do { \
469  if (((ulong)(outstat)) & 0x7) { \
470  int _i; \
471  for(_i=0; _i<(int)(sizeof(ompi_status_public_t)/sizeof(int)); _i++) { \
472  ((int *)(outstat))[_i] = ((int *)(instat))[_i]; \
473  } \
474  } else { \
475  *(outstat) = *(instat); \
476  } \
477  } while(0)
478 #define OMPI_STATUS_SET_COUNT(outcount, incount) \
479  do { \
480  if (((ulong)(outcount)) & 0x7) { \
481  ((int *)(outcount))[0] = ((int *)(incount))[0]; \
482  ((int *)(outcount))[1] = ((int *)(incount))[1]; \
483  } else { \
484  *(outcount) = *(incount); \
485  } \
486  } while(0)
487 #else
488 #define OMPI_STATUS_SET(outstat, instat) (*(outstat) = *(instat))
489 #define OMPI_STATUS_SET_COUNT(outcount, incount) (*(outcount) = *(incount))
490 #endif
491 
492 END_C_DECLS
493 
494 #endif
int(* ompi_request_wait_fn_t)(ompi_request_t **req_ptr, ompi_status_public_t *status)
Wait (blocking-mode) for one requests to complete.
Definition: request.h:256
int(* ompi_request_test_any_fn_t)(size_t count, ompi_request_t **requests, int *index, int *completed, ompi_status_public_t *status)
Non-blocking test for request completion.
Definition: request.h:206
Definition: win.h:53
volatile ompi_request_state_t req_state
enum indicate state of the request
Definition: request.h:105
ompi_free_list_item_t super
Base type.
Definition: request.h:101
union ompi_mpi_object_t ompi_mpi_object_t
Union for holding several different MPI pointer types on the request.
ompi_status_public_t req_status
Completion status.
Definition: request.h:103
dynamic pointer array
Definition: opal_pointer_array.h:45
Definition: condition.h:49
#define PREDEFINED_REQUEST_PAD
Padded struct to maintain back compatibiltiy.
Definition: request.h:132
int(* ompi_request_test_some_fn_t)(size_t count, ompi_request_t **requests, int *outcount, int *indices, ompi_status_public_t *statuses)
Non-blocking test for some of N requests to complete.
Definition: request.h:240
int(* ompi_request_wait_all_fn_t)(size_t count, ompi_request_t **requests, ompi_status_public_t *statuses)
Wait (blocking-mode) for all of N requests to complete.
Definition: request.h:287
OMPI_DECLSPEC int ompi_request_persistent_proc_null_free(ompi_request_t **request)
Free a persistent request to a MPI_PROC_NULL peer (there's no freelist to put it back to...
Definition: request.c:88
ompi_request_type_t req_type
Enum indicating the type of the request.
Definition: request.h:102
struct ompi_request_t ompi_request_t
Convenience typedef.
Definition: request.h:125
Definition: mutex_unix.h:53
static bool opal_progress_spin(volatile bool *complete)
Progress until flag is true or poll iterations completed.
Definition: opal_progress.h:188
bool req_persistent
flag indicating if the this is a persistent request
Definition: request.h:106
ompi_mpi_object_t req_mpi_object
Pointer to MPI object that created this request.
Definition: request.h:112
See opal_bitmap.h for an explanation of why there is a split between OPAL and ORTE for this generic c...
OMPI_DECLSPEC opal_pointer_array_t ompi_request_f_to_c_table
Globals used for tracking requests and request completion.
Definition: request.c:32
int req_tag
Tag associated with this request.
Definition: request.h:118
BEGIN_C_DECLS OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_request_t)
Request class.
Union for holding several different MPI pointer types on the request.
Definition: request.h:91
#define OPAL_THREAD_LOCK(mutex)
Lock a mutex if opal_using_threads() says that multiple threads may be active in the process...
Definition: mutex.h:223
#define OPAL_THREAD_UNLOCK(mutex)
Unlock a mutex if opal_using_threads() says that multiple threads may be active in the process...
Definition: mutex.h:309
Definition: request.h:134
static int ompi_request_cancel(ompi_request_t *request)
Cancel a pending request.
Definition: request.h:358
Back-end structure for MPI_File.
Definition: file.h:42
static int ompi_request_complete(ompi_request_t *request, bool with_signal)
Signal or mark a request as complete.
Definition: request.h:399
volatile bool req_complete
Flag indicating wether request has completed.
Definition: request.h:104
int req_peer
Peer rank that this request is associated with.
Definition: request.h:117
Definition: ompi_free_list.h:62
int ompi_request_init(void)
Initialize the MPI_Request subsystem; invoked during MPI_INIT.
Definition: request.c:105
int(* ompi_request_test_all_fn_t)(size_t count, ompi_request_t **requests, int *completed, ompi_status_public_t *statuses)
Non-blocking test for request completion.
Definition: request.h:225
int ompi_request_finalize(void)
Shut down the MPI_Request subsystem; invoked during MPI_FINALIZE.
Definition: request.c:184
Definition: mpi.h:337
int req_f_to_c_index
Index in Fortran <-> C translation array.
Definition: request.h:107
Replaceable request functions.
Definition: request.h:313
static int ompi_request_free(ompi_request_t **request)
Free a request.
Definition: request.h:371
ompi_request_cancel_fn_t req_cancel
Optional function to cancel the request.
Definition: request.h:109
ompi_request_free_fn_t req_free
Called by free.
Definition: request.h:108
int(* ompi_request_wait_any_fn_t)(size_t count, ompi_request_t **requests, int *index, ompi_status_public_t *status)
Wait (blocking-mode) for one of N requests to complete.
Definition: request.h:271
Definition: evdns.c:158
struct ompi_request_fns_t ompi_request_fns_t
Replaceable request functions.
ompi_request_complete_fn_t req_complete_cb
Called when the request is MPI completed.
Definition: request.h:110
int(* ompi_request_test_fn_t)(ompi_request_t **rptr, int *completed, ompi_status_public_t *status)
Non-blocking test for request completion.
Definition: request.h:190
Definition: communicator.h:118
int(* ompi_request_wait_some_fn_t)(size_t count, ompi_request_t **requests, int *outcount, int *indices, ompi_status_public_t *statuses)
Wait (blocking-mode) for some of N requests to complete.
Definition: request.h:304
Main top-level request struct definition.
Definition: request.h:100
static void ompi_request_wait_completion(ompi_request_t *req)
Wait a particular request for completion.
Definition: request.h:423