15 #ifndef MCA_PML_BFO_FAILOVER_H
16 #define MCA_PML_BFO_FAILOVER_H
31 bool repost, mca_btl_base_tag_t tag);
33 bool repost, mca_btl_base_tag_t tag,
int status,
50 mca_btl_base_tag_t tag,
int status);
53 mca_btl_base_tag_t tag,
int status,
64 int32_t flags,
ompi_proc_t *errproc,
char *btlname);
72 mca_btl_base_tag_t tag,
121 mca_btl_base_tag_t tag,
126 mca_btl_base_tag_t tag,
131 mca_btl_base_tag_t tag,
136 mca_btl_base_tag_t tag,
146 #define MCA_PML_BFO_ERROR_CHECK_ON_ACK_CALLBACK(sendreq) \
147 if( OPAL_UNLIKELY((sendreq)->req_error)) { \
148 opal_output_verbose(20, mca_pml_bfo_output, \
149 "ACK: received: dropping because request in error, " \
150 "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", \
151 (uint16_t)(sendreq)->req_send.req_base.req_sequence, \
152 (sendreq)->req_restartseq, \
153 (void *)(sendreq), (sendreq)->req_recv.pval, \
154 (sendreq)->req_send.req_base.req_peer); \
160 #define MCA_PML_BFO_ERROR_CHECK_ON_FRAG_CALLBACK(recvreq) \
161 if( OPAL_UNLIKELY((recvreq)->req_errstate)) { \
162 opal_output_verbose(20, mca_pml_bfo_output, \
163 "FRAG: received: dropping because request in error, " \
164 "PML=%d, src_req=%p, dst_req=%p, peer=%d, offset=%d", \
165 (uint16_t)(recvreq)->req_msgseq, \
166 (recvreq)->remote_req_send.pval, \
168 (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, \
169 (int)hdr->hdr_frag.hdr_frag_offset); \
175 #define MCA_PML_BFO_ERROR_CHECK_ON_PUT_CALLBACK(sendreq) \
176 if( OPAL_UNLIKELY((sendreq)->req_error)) { \
177 opal_output_verbose(20, mca_pml_bfo_output, \
178 "PUT: received: dropping because request in error, " \
179 "PML=%d, src_req=%p, dst_req=%p, peer=%d", \
180 (uint16_t)(sendreq)->req_send.req_base.req_sequence, \
181 (void *)(sendreq), (sendreq)->req_recv.pval, \
182 (sendreq)->req_send.req_base.req_peer); \
197 #define MCA_PML_BFO_ERROR_CHECK_ON_FIN_FOR_PUT(recvreq) \
198 if( OPAL_UNLIKELY((recvreq)->req_errstate)) { \
199 opal_output_verbose(20, mca_pml_bfo_output, \
200 "FIN: received on broken request, skipping, " \
201 "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", \
202 (recvreq)->req_msgseq, (recvreq)->req_restartseq, \
203 (recvreq)->remote_req_send.pval, (void *)(recvreq), \
204 (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); \
206 recv_request_pml_complete_check(recvreq); \
210 #define MCA_PML_BFO_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq) \
211 if ((recvreq)->req_errstate) { \
212 opal_output_verbose(30, mca_pml_bfo_output, \
213 "RDMA read: completion failed, error already seen, " \
214 "PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, peer=%d", \
215 (recvreq)->req_msgseq, (recvreq)->req_restartseq, \
216 (unsigned long)(recvreq)->remote_req_send.pval, \
217 (unsigned long)(recvreq), \
218 (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); \
221 opal_output_verbose(30, mca_pml_bfo_output, \
222 "RDMA read: completion failed, sending RECVERRNOTIFY to " \
223 "sender, PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, peer=%d", \
224 (recvreq)->req_msgseq, (recvreq)->req_restartseq, \
225 (unsigned long)(recvreq)->remote_req_send.pval, \
226 (unsigned long)(recvreq), \
227 (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); \
228 mca_pml_bfo_recv_request_recverrnotify(recvreq, MCA_PML_BFO_HDR_TYPE_RGET, status); \
231 #define MCA_PML_BFO_SECOND_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq, status, btl) \
233 if( OPAL_UNLIKELY(recvreq->req_errstate)) { \
234 if (recvreq->req_errstate & RECVREQ_RNDVRESTART_RECVED) { \
235 opal_output_verbose(30, mca_pml_bfo_output, \
236 "RDMA read: completion: recvreq has error, outstanding events=%d " \
237 "PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, status=%d, peer=%d", \
238 recvreq->req_events, recvreq->req_msgseq, recvreq->req_restartseq, \
239 (unsigned long)recvreq->remote_req_send.pval, \
240 (unsigned long)recvreq, status, \
241 recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); \
242 if (0 == recvreq->req_events) { \
243 mca_pml_bfo_recv_request_rndvrestartack(recvreq, MCA_PML_BFO_HDR_TYPE_RGET, \
247 MCA_PML_BFO_RDMA_FRAG_RETURN(frag); \
262 #define MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq) \
263 if (sendreq->req_state == -1) { \
264 OPAL_THREAD_ADD32(&sendreq->req_state, 1); \
273 #define MCA_PML_BFO_RNDV_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, type, description) \
274 if( OPAL_UNLIKELY ((sendreq)->req_error)) { \
275 mca_pml_bfo_completion_sendreq_has_error(sendreq, status, \
276 btl, type, description); \
290 #define MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, type, description) \
291 if( OPAL_UNLIKELY((sendreq)->req_error)) { \
292 mca_pml_bfo_completion_sendreq_has_error(sendreq, status, \
293 btl, type, description); \
302 #define MCA_PML_BFO_RGET_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, btl, des) \
303 if( OPAL_UNLIKELY(sendreq->req_error)) { \
304 opal_output_verbose(30, mca_pml_bfo_output, \
305 "FIN: received on broken request, skipping, " \
306 "PML=%d, src_req=%lx, dst_req=%lx, peer=%d", \
307 (uint16_t)sendreq->req_send.req_base.req_sequence, \
308 (unsigned long)sendreq, (unsigned long)sendreq->req_recv.pval, \
309 sendreq->req_send.req_base.req_peer); \
310 btl->btl_free(btl, des); \
317 #define MCA_PML_BFO_PUT_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl) \
318 if ( OPAL_UNLIKELY(sendreq->req_error)) { \
319 mca_pml_bfo_completion_sendreq_has_error(sendreq, status, btl, \
320 MCA_PML_BFO_HDR_TYPE_PUT, "RDMA write"); \
321 MCA_PML_BFO_RDMA_FRAG_RETURN(frag); \
325 #define MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, type) \
326 if (0 < sendreq->req_restartseq) { \
327 mca_pml_bfo_update_rndv_fields(hdr, sendreq, type); \
334 #define MCA_PML_BFO_CHECK_EAGER_BML_BTL_ON_FIN_COMPLETION(bml_btl, btl, des) \
335 if (bml_btl->btl != btl) { \
336 ompi_proc_t *proc = (ompi_proc_t*) des->des_cbdata; \
337 mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_bml; \
338 bml_btl = mca_bml_base_btl_array_find(&bml_endpoint->btl_eager, btl); \
340 #define MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, type) \
341 if (bml_btl->btl != btl) { \
342 mca_pml_bfo_find_sendreq_eager_bml_btl(&bml_btl, btl, sendreq, type); \
344 #define MCA_PML_BFO_CHECK_SENDREQ_RDMA_BML_BTL(bml_btl, btl, sendreq, type) \
345 if (bml_btl->btl != btl) { \
346 mca_pml_bfo_find_sendreq_rdma_bml_btl(&bml_btl, btl, sendreq, type); \
349 #define MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL(bml_btl, btl, recvreq, type) \
350 if (bml_btl->btl != btl) { \
351 mca_pml_bfo_find_recvreq_eager_bml_btl(&bml_btl, btl, recvreq, type); \
354 #define MCA_PML_BFO_CHECK_RECVREQ_RDMA_BML_BTL(bml_btl, btl, recvreq, type) \
355 if (bml_btl->btl != btl) { \
356 mca_pml_bfo_find_recvreq_rdma_bml_btl(&bml_btl, btl, recvreq, type); \
359 #define MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL_RECV_CTL(bml_btl, btl, des) \
360 if (bml_btl->btl != btl) { \
361 mca_pml_bfo_update_eager_bml_btl_recv_ctl(&bml_btl, btl, des); \
364 #define MCA_PML_BFO_CHECK_FOR_REMOVED_BML(sendreq, frag, btl) \
365 if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) { \
366 opal_output_verbose(30, mca_pml_bfo_output, \
367 "PUT received: no matching BTL to RDMA write to, oustanding " \
368 "events=%d, PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", \
369 sendreq->req_events, \
370 (uint16_t)sendreq->req_send.req_base.req_sequence, \
371 sendreq->req_restartseq, (void *)sendreq, \
372 sendreq->req_recv.pval, sendreq->req_send.req_base.req_peer); \
373 MCA_PML_BFO_RDMA_FRAG_RETURN(frag); \
374 sendreq->req_error++; \
375 if (0 == sendreq->req_events) { \
376 mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false, \
377 MCA_PML_BFO_HDR_TYPE_PUT, \
388 #define MCA_PML_BFO_CHECK_FOR_REMOVED_BTL(sendreq, range) \
389 if ((int)mca_bml_base_btl_array_get_size(&sendreq->req_endpoint->btl_send) \
390 != range->range_btl_cnt) { \
391 sendreq->req_error++; \
void mca_pml_bfo_recv_request_reset(mca_pml_bfo_recv_request_t *recvreq)
Reset all the receive request fields to match what a request looks like when it is first started...
Definition: pml_bfo_failover.c:1266
void mca_pml_bfo_recv_frag_callback_rndvrestartnack(mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t *descriptor, void *cbdata)
Callback for when a RNDVRESTARTNACK message is received.
Definition: pml_bfo_failover.c:604
Definition: pml_bfo_recvreq.h:41
A descriptor that holds the parameters to a send/put/get operation along w/ a callback routine that i...
Definition: btl.h:275
void mca_pml_bfo_repost_fin(struct mca_btl_base_descriptor_t *des)
Repost a FIN message if we get an error on the completion event.
Definition: pml_bfo_failover.c:264
void mca_pml_bfo_recv_frag_callback_rndvrestartack(mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t *descriptor, void *cbdata)
Callback for when a RNDVRESTARTACK message is received.
Definition: pml_bfo_failover.c:459
BEGIN_C_DECLS bool mca_pml_bfo_is_duplicate_msg(mca_pml_bfo_comm_proc_t *proc, mca_pml_bfo_match_hdr_t *hdr)
When running with failover enabled, check the PML sequence numbers to see if we have received a dupli...
Definition: pml_bfo_failover.c:71
Header definition for the first fragment, contains the attributes required to match the corresponding...
Definition: pml_bfo_hdr.h:77
void mca_pml_bfo_recv_request_rndvrestartnack(mca_btl_base_descriptor_t *olddes, ompi_proc_t *ompi_proc, bool repost)
Called after the receipt of a RNDVRESTARTNOTIFY message to a request that no longer matches...
Definition: pml_bfo_failover.c:1192
void mca_pml_bfo_update_eager_bml_btl_recv_ctl(mca_bml_base_btl_t **bml_btl, mca_btl_base_module_t *btl, struct mca_btl_base_descriptor_t *des)
The following set of functions are all called when it is determined that the cached bml_btl->btl does...
Definition: pml_bfo_failover.c:1953
Union of defined hdr types.
Definition: pml_bfo_hdr.h:441
bool mca_pml_bfo_rndv_completion_status_error(struct mca_btl_base_descriptor_t *des, mca_pml_bfo_send_request_t *sendreq)
The completion event for the RNDV message has returned with an error.
Definition: pml_bfo_failover.c:2114
void mca_pml_bfo_rndvrestartnotify_completion(mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *ep, struct mca_btl_base_descriptor_t *des, int status)
Completion callback for rndvrestartnotify completion event.
Definition: pml_bfo_failover.c:966
Remote Open MPI process structure.
Definition: proc.h:56
void mca_pml_bfo_send_request_rndvrestartnotify(mca_pml_bfo_send_request_t *sendreq, bool repost, mca_btl_base_tag_t tag, int status, mca_btl_base_module_t *btl)
This function gets called when failover is enabled and an error occurs during the rendezvous protocol...
Definition: pml_bfo_failover.c:660
void mca_pml_bfo_update_rndv_fields(mca_pml_bfo_hdr_t *hdr, mca_pml_bfo_send_request_t *, char *type)
Update a few fields when we are restarting either a RNDV or RGET type message.
Definition: pml_bfo_failover.c:1929
Byte Transfer Layer (BTL)
void mca_pml_bfo_check_recv_ctl_completion_status(mca_btl_base_module_t *btl, struct mca_btl_base_descriptor_t *des, int status)
Call each time we get a completion event on ACK or PUT message.
Definition: pml_bfo_failover.c:1766
void mca_pml_bfo_send_request_restart(mca_pml_bfo_send_request_t *sendreq, bool repost, mca_btl_base_tag_t tag)
This function restarts a RNDV send request.
Definition: pml_bfo_failover.c:744
bool mca_pml_bfo_is_duplicate_fin(mca_pml_bfo_hdr_t *hdr, mca_btl_base_descriptor_t *rdma, mca_btl_base_module_t *btl)
This function checks to see if we have received a duplicate FIN message.
Definition: pml_bfo_failover.c:139
void mca_pml_bfo_recv_request_recverrnotify(mca_pml_bfo_recv_request_t *recvreq, mca_btl_base_tag_t tag, int status)
This function is called when an error is detected on a completion event on the receiving side...
Definition: pml_bfo_failover.c:1038
State of ELAN endpoint connection.
Definition: btl_elan_endpoint.h:33
void mca_pml_bfo_completion_sendreq_has_error(mca_pml_bfo_send_request_t *sendreq, int status, mca_btl_base_module_t *btl, int type, char *description)
Check to see if an error has occurred on this send request.
Definition: pml_bfo_failover.c:2136
mca_pml_bfo_recv_request_t * mca_pml_bfo_get_request(mca_pml_bfo_match_hdr_t *hdr)
This function is called when a RNDV or RGET is received with the FLAGS_RESTART flag set...
Definition: pml_bfo_failover.c:309
void mca_pml_bfo_recv_request_rndvrestartack(mca_pml_bfo_recv_request_t *recvreq, mca_btl_base_tag_t tag, int status, mca_btl_base_module_t *btl)
This function is called when it may be time to send a RNDVRESTARTACK message back to the sending side...
Definition: pml_bfo_failover.c:1109
void mca_pml_bfo_recv_frag_callback_recverrnotify(mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t *descriptor, void *cbdata)
Callback for when a RECVERRNOTIFY message is received.
Definition: pml_bfo_failover.c:520
int mca_pml_bfo_register_callbacks(void)
Register four functions to handle extra PML message types that are utilized when a failover occurs...
Definition: pml_bfo_failover.c:1894
Definition: pml_bfo_comm.h:31
Definition: pml_bfo_sendreq.h:41
BTL module interface functions and attributes.
Definition: btl.h:786
void mca_pml_bfo_repost_match_fragment(struct mca_btl_base_descriptor_t *des)
This function will repost a match fragment.
Definition: pml_bfo_failover.c:854
void mca_pml_bfo_recv_frag_callback_rndvrestartnotify(mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t *descriptor, void *cbdata)
Four new callbacks for the four new message types.
Definition: pml_bfo_failover.c:374