OpenMPI
0.1.1
|
Functions that implement failover capabilities. More...
Go to the source code of this file.
Macros | |
#define | MCA_PML_BFO_ERROR_CHECK_ON_ACK_CALLBACK(sendreq) |
A bunch of macros to help isolate failover code from regular ob1 code. More... | |
#define | MCA_PML_BFO_ERROR_CHECK_ON_FRAG_CALLBACK(recvreq) |
#define | MCA_PML_BFO_ERROR_CHECK_ON_PUT_CALLBACK(sendreq) |
#define | MCA_PML_BFO_ERROR_CHECK_ON_FIN_FOR_PUT(recvreq) |
Macros for pml_bfo_recvreq.c file. More... | |
#define | MCA_PML_BFO_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq) |
#define | MCA_PML_BFO_SECOND_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq, status, btl) |
#define | MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq) |
Macros for pml_bfo_sendreq.c file. More... | |
#define | MCA_PML_BFO_RNDV_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, type, description) |
#define | MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, type, description) |
This macro is called within the frag completion function in two places. More... | |
#define | MCA_PML_BFO_RGET_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, btl, des) |
#define | MCA_PML_BFO_PUT_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl) |
#define | MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, type) |
#define | MCA_PML_BFO_CHECK_EAGER_BML_BTL_ON_FIN_COMPLETION(bml_btl, btl, des) |
#define | MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, type) |
#define | MCA_PML_BFO_CHECK_SENDREQ_RDMA_BML_BTL(bml_btl, btl, sendreq, type) |
#define | MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL(bml_btl, btl, recvreq, type) |
#define | MCA_PML_BFO_CHECK_RECVREQ_RDMA_BML_BTL(bml_btl, btl, recvreq, type) |
#define | MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL_RECV_CTL(bml_btl, btl, des) |
#define | MCA_PML_BFO_CHECK_FOR_REMOVED_BML(sendreq, frag, btl) |
#define | MCA_PML_BFO_CHECK_FOR_REMOVED_BTL(sendreq, range) |
Functions | |
BEGIN_C_DECLS bool | mca_pml_bfo_is_duplicate_msg (mca_pml_bfo_comm_proc_t *proc, mca_pml_bfo_match_hdr_t *hdr) |
When running with failover enabled, check the PML sequence numbers to see if we have received a duplicate message. More... | |
bool | mca_pml_bfo_is_duplicate_fin (mca_pml_bfo_hdr_t *hdr, mca_btl_base_descriptor_t *rdma, mca_btl_base_module_t *btl) |
This function checks to see if we have received a duplicate FIN message. More... | |
mca_pml_bfo_recv_request_t * | mca_pml_bfo_get_request (mca_pml_bfo_match_hdr_t *hdr) |
This function is called when a RNDV or RGET is received with the FLAGS_RESTART flag set. More... | |
void | mca_pml_bfo_send_request_restart (mca_pml_bfo_send_request_t *sendreq, bool repost, mca_btl_base_tag_t tag) |
This function restarts a RNDV send request. More... | |
void | mca_pml_bfo_send_request_rndvrestartnotify (mca_pml_bfo_send_request_t *sendreq, bool repost, mca_btl_base_tag_t tag, int status, mca_btl_base_module_t *btl) |
This function gets called when failover is enabled and an error occurs during the rendezvous protocol. More... | |
void | mca_pml_bfo_rndvrestartnotify_completion (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *ep, struct mca_btl_base_descriptor_t *des, int status) |
Completion callback for rndvrestartnotify completion event. More... | |
void | mca_pml_bfo_check_recv_ctl_completion_status (mca_btl_base_module_t *btl, struct mca_btl_base_descriptor_t *des, int status) |
Call each time we get a completion event on ACK or PUT message. More... | |
void | mca_pml_bfo_recv_request_reset (mca_pml_bfo_recv_request_t *recvreq) |
Reset all the receive request fields to match what a request looks like when it is first started. More... | |
void | mca_pml_bfo_recv_request_recverrnotify (mca_pml_bfo_recv_request_t *recvreq, mca_btl_base_tag_t tag, int status) |
This function is called when an error is detected on a completion event on the receiving side. More... | |
void | mca_pml_bfo_recv_request_rndvrestartack (mca_pml_bfo_recv_request_t *recvreq, mca_btl_base_tag_t tag, int status, mca_btl_base_module_t *btl) |
This function is called when it may be time to send a RNDVRESTARTACK message back to the sending side. More... | |
void | mca_pml_bfo_recv_request_rndvrestartnack (mca_btl_base_descriptor_t *olddes, ompi_proc_t *ompi_proc, bool repost) |
Called after the receipt of a RNDVRESTARTNOTIFY message to a request that no longer matches. More... | |
void | mca_pml_bfo_recv_restart_completion (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *ep, struct mca_btl_base_descriptor_t *des, int status) |
void | mca_pml_bfo_failover_error_handler (struct mca_btl_base_module_t *btl, int32_t flags, ompi_proc_t *errproc, char *btlname) |
void | mca_pml_bfo_repost_match_fragment (struct mca_btl_base_descriptor_t *des) |
This function will repost a match fragment. More... | |
void | mca_pml_bfo_repost_fin (struct mca_btl_base_descriptor_t *des) |
Repost a FIN message if we get an error on the completion event. | |
void | mca_pml_bfo_map_out_btl (struct mca_btl_base_module_t *btl, ompi_proc_t *errproc, char *btlname) |
void | mca_pml_bfo_map_out (mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t *descriptor, void *cbdata) |
int | mca_pml_bfo_register_callbacks (void) |
Register four functions to handle extra PML message types that are utilized when a failover occurs. | |
void | mca_pml_bfo_update_rndv_fields (mca_pml_bfo_hdr_t *hdr, mca_pml_bfo_send_request_t *, char *type) |
Update a few fields when we are restarting either a RNDV or RGET type message. | |
void | mca_pml_bfo_update_bml_btl (mca_bml_base_btl_t **bml_btl, mca_btl_base_module_t *btl, struct mca_btl_base_descriptor_t *des) |
void | mca_pml_bfo_find_recvreq_eager_bml_btl (mca_bml_base_btl_t **bml_btl, mca_btl_base_module_t *btl, mca_pml_bfo_recv_request_t *recvreq, char *type) |
void | mca_pml_bfo_find_sendreq_eager_bml_btl (mca_bml_base_btl_t **bml_btl, mca_btl_base_module_t *btl, mca_pml_bfo_send_request_t *sendreq, char *type) |
void | mca_pml_bfo_find_sendreq_rdma_bml_btl (mca_bml_base_btl_t **bml_btl, mca_btl_base_module_t *btl, mca_pml_bfo_send_request_t *sendreq, char *type) |
void | mca_pml_bfo_update_eager_bml_btl_recv_ctl (mca_bml_base_btl_t **bml_btl, mca_btl_base_module_t *btl, struct mca_btl_base_descriptor_t *des) |
The following set of functions are all called when it is determined that the cached bml_btl->btl does not match the btl handed back by the callback function. More... | |
void | mca_pml_bfo_find_recvreq_rdma_bml_btl (mca_bml_base_btl_t **bml_btl, mca_btl_base_module_t *btl, mca_pml_bfo_recv_request_t *recvreq, char *type) |
bool | mca_pml_bfo_rndv_completion_status_error (struct mca_btl_base_descriptor_t *des, mca_pml_bfo_send_request_t *sendreq) |
The completion event for the RNDV message has returned with an error. More... | |
void | mca_pml_bfo_send_ctl_completion_status_error (struct mca_btl_base_descriptor_t *des) |
void | mca_pml_bfo_completion_sendreq_has_error (mca_pml_bfo_send_request_t *sendreq, int status, mca_btl_base_module_t *btl, int type, char *description) |
Check to see if an error has occurred on this send request. More... | |
void | mca_pml_bfo_recv_frag_callback_rndvrestartnotify (mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t *descriptor, void *cbdata) |
Four new callbacks for the four new message types. More... | |
void | mca_pml_bfo_recv_frag_callback_rndvrestartack (mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t *descriptor, void *cbdata) |
Callback for when a RNDVRESTARTACK message is received. More... | |
void | mca_pml_bfo_recv_frag_callback_rndvrestartnack (mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t *descriptor, void *cbdata) |
Callback for when a RNDVRESTARTNACK message is received. More... | |
void | mca_pml_bfo_recv_frag_callback_recverrnotify (mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t *descriptor, void *cbdata) |
Callback for when a RECVERRNOTIFY message is received. More... | |
Functions that implement failover capabilities.
#define MCA_PML_BFO_CHECK_EAGER_BML_BTL_ON_FIN_COMPLETION | ( | bml_btl, | |
btl, | |||
des | |||
) |
#define MCA_PML_BFO_CHECK_FOR_REMOVED_BML | ( | sendreq, | |
frag, | |||
btl | |||
) |
#define MCA_PML_BFO_CHECK_FOR_REMOVED_BTL | ( | sendreq, | |
range | |||
) |
#define MCA_PML_BFO_CHECK_FOR_RNDV_RESTART | ( | hdr, | |
sendreq, | |||
type | |||
) |
#define MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL | ( | bml_btl, | |
btl, | |||
recvreq, | |||
type | |||
) |
#define MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL_RECV_CTL | ( | bml_btl, | |
btl, | |||
des | |||
) |
#define MCA_PML_BFO_CHECK_RECVREQ_RDMA_BML_BTL | ( | bml_btl, | |
btl, | |||
recvreq, | |||
type | |||
) |
#define MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL | ( | bml_btl, | |
btl, | |||
sendreq, | |||
type | |||
) |
#define MCA_PML_BFO_CHECK_SENDREQ_RDMA_BML_BTL | ( | bml_btl, | |
btl, | |||
sendreq, | |||
type | |||
) |
#define MCA_PML_BFO_ERROR_CHECK_ON_ACK_CALLBACK | ( | sendreq | ) |
A bunch of macros to help isolate failover code from regular ob1 code.
Referenced by mca_pml_bfo_recv_frag_callback_ack().
#define MCA_PML_BFO_ERROR_CHECK_ON_FIN_FOR_PUT | ( | recvreq | ) |
Macros for pml_bfo_recvreq.c file.
#define MCA_PML_BFO_ERROR_CHECK_ON_FRAG_CALLBACK | ( | recvreq | ) |
#define MCA_PML_BFO_ERROR_CHECK_ON_PUT_CALLBACK | ( | sendreq | ) |
#define MCA_PML_BFO_ERROR_CHECK_ON_RDMA_READ_COMPLETION | ( | recvreq | ) |
#define MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK | ( | sendreq, | |
status, | |||
btl, | |||
type, | |||
description | |||
) |
This macro is called within the frag completion function in two places.
It is called to see if any errors occur prior to the completion event on the frag. It is then called a second time after the scheduling routine is called as the scheduling routine may have detected that a BTL that was cached on the request had been removed and therefore marked the request in error. In that case, the scheduling of fragments can no longer proceed properly, and if there are no outstanding events, iniated the restart dance.
#define MCA_PML_BFO_PUT_COMPLETION_SENDREQ_ERROR_CHECK | ( | sendreq, | |
status, | |||
btl | |||
) |
#define MCA_PML_BFO_RGET_COMPLETION_SENDREQ_ERROR_CHECK | ( | sendreq, | |
btl, | |||
des | |||
) |
#define MCA_PML_BFO_RNDV_COMPLETION_SENDREQ_ERROR_CHECK | ( | sendreq, | |
status, | |||
btl, | |||
type, | |||
description | |||
) |
#define MCA_PML_BFO_SECOND_ERROR_CHECK_ON_RDMA_READ_COMPLETION | ( | recvreq, | |
status, | |||
btl | |||
) |
#define MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE | ( | sendreq | ) |
Macros for pml_bfo_sendreq.c file.
void mca_pml_bfo_check_recv_ctl_completion_status | ( | mca_btl_base_module_t * | btl, |
struct mca_btl_base_descriptor_t * | des, | ||
int | status | ||
) |
Call each time we get a completion event on ACK or PUT message.
These types of messages are receive control type messages. This function is only called if the underlying BTL supports failover. Otherwise, there is no need for this check.
References mca_btl_base_descriptor_t::des_cbdata, mca_btl_base_descriptor_t::des_src, mca_pml_bfo_rdma_hdr_t::hdr_des, mca_pml_bfo_common_hdr_t::hdr_type, mca_pml_bfo_recv_request_recverrnotify(), mca_pml_bfo_recv_request_rndvrestartack(), opal_output_verbose(), mca_pml_base_recv_request_t::req_base, mca_pml_base_request_t::req_ompi, ompi_request_t::req_status, and mca_btl_base_segment_t::seg_addr.
void mca_pml_bfo_completion_sendreq_has_error | ( | mca_pml_bfo_send_request_t * | sendreq, |
int | status, | ||
mca_btl_base_module_t * | btl, | ||
int | type, | ||
char * | description | ||
) |
Check to see if an error has occurred on this send request.
If it has and there are no outstanding events, then we can start the restart dance.
References mca_pml_bfo_send_request_rndvrestartnotify(), opal_output_verbose(), mca_pml_base_send_request_t::req_base, mca_pml_base_request_t::req_peer, and mca_pml_base_request_t::req_sequence.
mca_pml_bfo_recv_request_t* mca_pml_bfo_get_request | ( | mca_pml_bfo_match_hdr_t * | hdr | ) |
This function is called when a RNDV or RGET is received with the FLAGS_RESTART flag set.
This means this message already has a receive request already associated with it.
References mca_pml_bfo_match_hdr_t::hdr_common, mca_pml_bfo_match_hdr_t::hdr_ctx, mca_pml_bfo_match_hdr_t::hdr_seq, mca_pml_bfo_match_hdr_t::hdr_src, mca_pml_bfo_common_hdr_t::hdr_type, mca_pml_bfo_recv_request_reset(), opal_output_verbose(), mca_pml_base_recv_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_ompi, and ompi_request_t::req_status.
Referenced by mca_pml_bfo_recv_frag_match().
bool mca_pml_bfo_is_duplicate_fin | ( | mca_pml_bfo_hdr_t * | hdr, |
mca_btl_base_descriptor_t * | rdma, | ||
mca_btl_base_module_t * | btl | ||
) |
This function checks to see if we have received a duplicate FIN message.
This is done by first pulling the pointer of the request that the FIN message is pointing to from the message. We then check the various fields in the request to the fields in the header and make sure they match. If they do not, then the request must have been recycled already and this is a duplicate FIN message. We have to do this check on every FIN message that we receive.
References mca_btl_base_module_t::btl_flags, mca_btl_base_descriptor_t::des_cbdata, mca_btl_base_descriptor_t::des_flags, mca_pml_bfo_fin_hdr_t::hdr_common, mca_pml_bfo_common_hdr_t::hdr_flags, opal_output_verbose(), mca_pml_base_recv_request_t::req_base, mca_pml_base_send_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_ompi, mca_pml_base_request_t::req_peer, mca_pml_base_request_t::req_sequence, ompi_request_t::req_status, and mca_pml_base_request_t::req_type.
Referenced by mca_pml_bfo_recv_frag_callback_fin().
BEGIN_C_DECLS bool mca_pml_bfo_is_duplicate_msg | ( | mca_pml_bfo_comm_proc_t * | proc, |
mca_pml_bfo_match_hdr_t * | hdr | ||
) |
When running with failover enabled, check the PML sequence numbers to see if we have received a duplicate message.
This check is done for for all MATCH fragments. It is also done for RNDV and RGET fragments that do not have the MCA_PML_BFO_HDR_FLAGS_RESTART flag set. We set the window size to half the total range of sequence numbers. We only enter this code when the seq_num is not the expected one. A few more notes on the algorithm used here. In normal operation, the expected value will either be equal to or less than the sequence number of the header. This is because we are using this sequence number to detect packets arriving prior to them being expected. If we determine that expected is less than header, then make sure this is not a rollover case. We do that by adding the maxnum to the expected.
proc | Pointer to proc from where message came |
hdr | Pointer to header of message |
References mca_pml_bfo_comm_proc_t::expected_sequence, mca_pml_bfo_comm_proc_t::frags_cant_match, mca_pml_bfo_match_hdr_t::hdr_common, mca_pml_bfo_match_hdr_t::hdr_seq, mca_pml_bfo_common_hdr_t::hdr_type, opal_list_get_end(), opal_list_get_first(), opal_list_get_next, opal_list_get_size(), opal_output(), and opal_output_verbose().
Referenced by mca_pml_bfo_recv_frag_callback_match(), and mca_pml_bfo_recv_frag_match().
void mca_pml_bfo_recv_frag_callback_recverrnotify | ( | mca_btl_base_module_t * | btl, |
mca_btl_base_tag_t | tag, | ||
mca_btl_base_descriptor_t * | des, | ||
void * | cbdata | ||
) |
Callback for when a RECVERRNOTIFY message is received.
This message is sent from the receiver to the sender and tells the sender that the receiver has seen an error. This will trigger the sender to start the request restart sequence.
References mca_btl_base_descriptor_t::des_dst, mca_pml_bfo_match_hdr_t::hdr_ctx, mca_pml_bfo_match_hdr_t::hdr_seq, mca_pml_bfo_match_hdr_t::hdr_src, mca_pml_bfo_send_request_rndvrestartnotify(), opal_output_verbose(), and mca_btl_base_segment_t::seg_addr.
Referenced by mca_pml_bfo_register_callbacks().
void mca_pml_bfo_recv_frag_callback_rndvrestartack | ( | mca_btl_base_module_t * | btl, |
mca_btl_base_tag_t | tag, | ||
mca_btl_base_descriptor_t * | des, | ||
void * | cbdata | ||
) |
Callback for when a RNDVRESTARTACK message is received.
This message is sent from the receiver to the sender to acknowledge the receipt of the RNDVRESTARTNOTIFY message. At this point, the sender can reset the send request and restart the message.
References mca_btl_base_descriptor_t::des_dst, mca_pml_bfo_match_hdr_t::hdr_ctx, mca_pml_bfo_match_hdr_t::hdr_seq, mca_pml_bfo_match_hdr_t::hdr_src, mca_pml_bfo_send_request_restart(), opal_output_verbose(), and mca_btl_base_segment_t::seg_addr.
Referenced by mca_pml_bfo_register_callbacks().
void mca_pml_bfo_recv_frag_callback_rndvrestartnack | ( | mca_btl_base_module_t * | btl, |
mca_btl_base_tag_t | tag, | ||
mca_btl_base_descriptor_t * | des, | ||
void * | cbdata | ||
) |
Callback for when a RNDVRESTARTNACK message is received.
This message is sent from the receiver to the sender and tells the sender that the receiver has already completed the message and there is nothing else to be done. The sender should then just make the send request complete.
References mca_btl_base_descriptor_t::des_dst, mca_pml_bfo_match_hdr_t::hdr_ctx, mca_pml_bfo_match_hdr_t::hdr_seq, mca_pml_bfo_match_hdr_t::hdr_src, opal_output_verbose(), and mca_btl_base_segment_t::seg_addr.
Referenced by mca_pml_bfo_register_callbacks().
void mca_pml_bfo_recv_frag_callback_rndvrestartnotify | ( | mca_btl_base_module_t * | btl, |
mca_btl_base_tag_t | tag, | ||
mca_btl_base_descriptor_t * | des, | ||
void * | cbdata | ||
) |
Four new callbacks for the four new message types.
Four new callbacks for the four new message types.
A RNDVRESTARTNOTIFY message is sent from the sender to the receiver telling the receiver that the message is going to be started over. The receiver first makes sure that the request being pointed to is still valid. If it is not, that means the receiver must have completed the request and therefore we need to send a NACK back to the sender. The receiver then makes sure this is not a duplicate message. If it is a duplicate, it will just drop it. Otherwise, it will then send a RNDVRESTARTACK message if there are no outstanding events on the receiver. Otherwise, it will just change the state of the request and wait for another event to send the RNDVRESTARTACK to the sender.
References mca_btl_base_descriptor_t::des_dst, mca_pml_bfo_match_hdr_t::hdr_ctx, mca_pml_bfo_match_hdr_t::hdr_seq, mca_pml_bfo_match_hdr_t::hdr_src, mca_pml_bfo_recv_request_rndvrestartack(), mca_pml_bfo_recv_request_rndvrestartnack(), ompi_proc_find(), opal_output_verbose(), and mca_btl_base_segment_t::seg_addr.
Referenced by mca_pml_bfo_register_callbacks().
void mca_pml_bfo_recv_request_recverrnotify | ( | mca_pml_bfo_recv_request_t * | recvreq, |
mca_btl_base_tag_t | tag, | ||
int | status | ||
) |
This function is called when an error is detected on a completion event on the receiving side.
This can come from a ACK, PUT, RDMA read (GET) or RECVERRNOTIFY completion event. When this happens, check the state of the request and decide if the sender needs be notified that a problem was seen. If no RECVERRNOTIFY message has been sent and no RNDVRESTARTNOTIFY has been received from the sender, then send a message telling the sender an error was seen.
References mca_bml_base_btl_t::btl, mca_btl_base_descriptor_t::des_cbfunc, mca_btl_base_descriptor_t::des_src, mca_bml_base_btl_array_get_next(), opal_output(), opal_output_verbose(), ompi_proc_t::proc_bml, mca_pml_base_recv_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_ompi, mca_pml_base_request_t::req_proc, ompi_request_t::req_status, and mca_btl_base_segment_t::seg_addr.
Referenced by mca_pml_bfo_check_recv_ctl_completion_status(), and mca_pml_bfo_error_pending_packets().
void mca_pml_bfo_recv_request_reset | ( | mca_pml_bfo_recv_request_t * | match | ) |
Reset all the receive request fields to match what a request looks like when it is first started.
This gets called when the rendezvous/rget message is being restarted.
References mca_mpool_base_module_t::mpool_deregister, mca_pml_bfo_recv_request_t::req_ack_sent, mca_pml_base_recv_request_t::req_base, mca_pml_bfo_recv_request_t::req_bytes_received, ompi_request_t::req_complete, mca_pml_base_request_t::req_convertor, mca_pml_base_request_t::req_ompi, mca_pml_base_request_t::req_pml_complete, and ompi_request_t::req_state.
Referenced by mca_pml_bfo_get_request().
void mca_pml_bfo_recv_request_rndvrestartack | ( | mca_pml_bfo_recv_request_t * | recvreq, |
mca_btl_base_tag_t | tag, | ||
int | status, | ||
mca_btl_base_module_t * | btl | ||
) |
This function is called when it may be time to send a RNDVRESTARTACK message back to the sending side.
This can happen because we received a RNDVRESTARTNOTIFY message from the sender. This can also happen if we have noticed that the request has received the RNDVRESTARTNOTIFY message, but has not yet sent out the RNDVRESTARTACK because there were still some pending receive events on the request. That means we can enter this routine from a completion event on a ACK, PUT, or RDMA read as well as from the receipt of a RNDVRESTARTNOTIFY message. If all is good, we sent the RNDVRESTARTACK message back to the sender. Then sometime later a message will arrive telling us to reset and restart the receive request.
References mca_bml_base_btl_t::btl, mca_btl_base_descriptor_t::des_cbdata, mca_btl_base_descriptor_t::des_cbfunc, mca_btl_base_descriptor_t::des_src, mca_bml_base_btl_array_get_next(), opal_output(), opal_output_verbose(), ompi_proc_t::proc_bml, mca_pml_base_recv_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_ompi, mca_pml_base_request_t::req_proc, ompi_request_t::req_status, and mca_btl_base_segment_t::seg_addr.
Referenced by mca_pml_bfo_check_recv_ctl_completion_status(), mca_pml_bfo_error_pending_packets(), and mca_pml_bfo_recv_frag_callback_rndvrestartnotify().
void mca_pml_bfo_recv_request_rndvrestartnack | ( | mca_btl_base_descriptor_t * | olddes, |
ompi_proc_t * | ompi_proc, | ||
bool | repost | ||
) |
Called after the receipt of a RNDVRESTARTNOTIFY message to a request that no longer matches.
This can happen if the sender detected an error, but the receiver actually received all the data. Therefore send a NACK back instead of the ACK so that the sender can complete its request. This happens very rarely. Note that we need to make use of the hdr_dst_rank that we received from the notify message. This is so the sending side make sure the message matches a valid request on the sending side.
References mca_bml_base_btl_array_t::arr_size, mca_bml_base_endpoint_t::btl_eager, mca_btl_base_descriptor_t::des_cbdata, mca_btl_base_descriptor_t::des_cbfunc, mca_btl_base_descriptor_t::des_dst, mca_btl_base_descriptor_t::des_src, mca_bml_base_btl_array_get_next(), opal_output(), opal_output_verbose(), ompi_proc_t::proc_bml, ompi_proc_t::proc_name, mca_btl_base_segment_t::seg_addr, and orte_process_name_t::vpid.
Referenced by mca_pml_bfo_recv_frag_callback_rndvrestartnotify().
void mca_pml_bfo_repost_match_fragment | ( | struct mca_btl_base_descriptor_t * | des | ) |
This function will repost a match fragment.
This function has to handle the case where there may not be a request associated with the fragment and just use the information in the fragment to repost the send.
References mca_bml_base_btl_array_t::arr_size, mca_bml_base_endpoint_t::btl_eager, mca_btl_base_descriptor_t::des_cbdata, mca_btl_base_descriptor_t::des_context, mca_btl_base_descriptor_t::des_flags, mca_btl_base_descriptor_t::des_src, mca_bml_base_btl_array_get_next(), opal_output(), opal_output_verbose(), mca_pml_base_send_request_t::req_base, mca_pml_base_send_request_t::req_bytes_packed, mca_pml_base_request_t::req_convertor, mca_btl_base_segment_t::seg_addr, and mca_btl_base_segment_t::seg_len.
bool mca_pml_bfo_rndv_completion_status_error | ( | struct mca_btl_base_descriptor_t * | des, |
mca_pml_bfo_send_request_t * | sendreq | ||
) |
The completion event for the RNDV message has returned with an error.
We know that the send request we are looking at is valid because it cannot be completed until the sendreq->req_state value reaches 0. And for the sendreq->req_state to reach 0, the completion event on the RNDV message must occur. So, we do not bother checking whether the send request is valid, because we know it is, but we put a few asserts in for good measure. We then check a few fields in the request to decide what to do. If the sendreq->req_error is set, that means that something has happend already to the request and we do not want to restart it. Presumably, we may have received a RECVERRNOTIFY message from the receiver. We also check the sendreq->req_acked field to see if it has been acked. If it has, then again we do not restart everything because obviously the RNDV message has made it to the other side.
References mca_btl_base_descriptor_t::des_src, mca_pml_bfo_send_request_restart(), and mca_btl_base_segment_t::seg_addr.
void mca_pml_bfo_rndvrestartnotify_completion | ( | mca_btl_base_module_t * | btl, |
struct mca_btl_base_endpoint_t * | ep, | ||
struct mca_btl_base_descriptor_t * | des, | ||
int | status | ||
) |
Completion callback for rndvrestartnotify completion event.
If the RNDVRESTARTACK has already been received, then reset and restart. Otherwise, just update the state and let the RNDVRESTARTACK trigger the reset and restart.
References mca_btl_base_descriptor_t::des_src, mca_pml_bfo_send_request_restart(), mca_pml_bfo_send_request_rndvrestartnotify(), opal_output_verbose(), mca_pml_base_send_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_peer, mca_pml_base_request_t::req_sequence, and mca_btl_base_segment_t::seg_addr.
Referenced by mca_pml_bfo_send_request_rndvrestartnotify().
void mca_pml_bfo_send_request_restart | ( | mca_pml_bfo_send_request_t * | sendreq, |
bool | repost, | ||
mca_btl_base_tag_t | tag | ||
) |
This function restarts a RNDV send request.
When this is called, all the fields in the send request are reset and the send is started over. The sendreq->req_restartseq will be non-zero which will trigger a special flag in the RNDV header which indicates the match has already happened on the receiving side.
References mca_bml_base_endpoint_t::btl_eager, mca_bml_base_btl_array_get_next(), mca_bml_base_btl_array_get_size(), MCA_PML_BASE_SEND_START, opal_list_get_begin(), opal_list_get_last(), opal_list_remove_item(), opal_output_verbose(), OPAL_THREAD_LOCK, OPAL_THREAD_UNLOCK, ompi_proc_t::proc_bml, mca_pml_base_send_request_t::req_addr, mca_pml_base_request_t::req_addr, mca_pml_base_send_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_convertor, mca_pml_base_request_t::req_peer, mca_pml_base_request_t::req_proc, mca_pml_base_send_request_t::req_send_mode, and mca_pml_base_request_t::req_sequence.
Referenced by mca_pml_bfo_recv_frag_callback_rndvrestartack(), mca_pml_bfo_rndv_completion_status_error(), and mca_pml_bfo_rndvrestartnotify_completion().
void mca_pml_bfo_send_request_rndvrestartnotify | ( | mca_pml_bfo_send_request_t * | sendreq, |
bool | repost, | ||
mca_btl_base_tag_t | tag, | ||
int | status, | ||
mca_btl_base_module_t * | btl | ||
) |
This function gets called when failover is enabled and an error occurs during the rendezvous protocol.
A message is sent to the receiving side notifying the request that the communication is going to be starting over. However, none of the information in the send request is reset yet, so that any in flight fragments can still find a home. Information in the send request gets reset when the completion event for this send occurs AND an ACK has been received back from the receiver.
References mca_bml_base_btl_t::btl, mca_btl_base_descriptor_t::des_cbfunc, mca_btl_base_descriptor_t::des_src, mca_bml_base_btl_array_get_next(), mca_pml_bfo_rndvrestartnotify_completion(), opal_output(), opal_output_verbose(), ORTE_PROC_MY_NAME, ompi_proc_t::proc_bml, mca_pml_base_send_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_peer, mca_pml_base_request_t::req_proc, mca_pml_base_request_t::req_sequence, and mca_btl_base_segment_t::seg_addr.
Referenced by mca_pml_bfo_completion_sendreq_has_error(), mca_pml_bfo_error_pending_packets(), mca_pml_bfo_recv_frag_callback_recverrnotify(), and mca_pml_bfo_rndvrestartnotify_completion().
void mca_pml_bfo_update_eager_bml_btl_recv_ctl | ( | mca_bml_base_btl_t ** | bml_btl, |
mca_btl_base_module_t * | btl, | ||
struct mca_btl_base_descriptor_t * | des | ||
) |
The following set of functions are all called when it is determined that the cached bml_btl->btl does not match the btl handed back by the callback function.
This means that the bml_btl array has been shuffled and the bml_btl matching the btl has to be found back. If it cannot be found, then just find a different one to use.
References mca_btl_base_descriptor_t::des_cbdata, mca_btl_base_descriptor_t::des_src, mca_pml_bfo_ack_hdr_t::hdr_dst_req, mca_pml_bfo_common_hdr_t::hdr_type, opal_output(), and mca_btl_base_segment_t::seg_addr.