OpenMPI
0.1.1
|
Functions that implement failover capabilities. More...
#include "ompi_config.h"
#include <stdlib.h>
#include <string.h>
#include "opal/class/opal_bitmap.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/pml/base/base.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
#include "pml_bfo.h"
#include "pml_bfo_component.h"
#include "pml_bfo_comm.h"
#include "pml_bfo_hdr.h"
#include "pml_bfo_recvfrag.h"
#include "pml_bfo_sendreq.h"
#include "pml_bfo_recvreq.h"
#include "pml_bfo_rdmafrag.h"
#include "pml_bfo_failover.h"
#include "ompi/mca/bml/base/base.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/util/show_help.h"
#include "orte/mca/notifier/notifier.h"
#include "ompi/runtime/ompi_cr.h"
Functions | |
static void | mca_pml_bfo_error_pending_packets (mca_btl_base_module_t *btl, mca_bml_base_endpoint_t *ep) |
This function is called since when we are mapping out a BML. More... | |
bool | mca_pml_bfo_is_duplicate_msg (mca_pml_bfo_comm_proc_t *proc, mca_pml_bfo_match_hdr_t *hdr) |
When running with failover enabled, check the PML sequence numbers to see if we have received a duplicate message. More... | |
bool | mca_pml_bfo_is_duplicate_fin (mca_pml_bfo_hdr_t *hdr, mca_btl_base_descriptor_t *rdma, mca_btl_base_module_t *btl) |
This function checks to see if we have received a duplicate FIN message. More... | |
void | mca_pml_bfo_repost_fin (struct mca_btl_base_descriptor_t *des) |
Repost a FIN message if we get an error on the completion event. | |
mca_pml_bfo_recv_request_t * | mca_pml_bfo_get_request (mca_pml_bfo_match_hdr_t *hdr) |
This function is called when a RNDV or RGET is received with the FLAGS_RESTART flag set. More... | |
void | mca_pml_bfo_recv_frag_callback_rndvrestartnotify (mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t *des, void *cbdata) |
Callback for when a RNDVRESTARTNOTIFY message is received. More... | |
void | mca_pml_bfo_recv_frag_callback_rndvrestartack (mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t *des, void *cbdata) |
Callback for when a RNDVRESTARTACK message is received. More... | |
void | mca_pml_bfo_recv_frag_callback_recverrnotify (mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t *des, void *cbdata) |
Callback for when a RECVERRNOTIFY message is received. More... | |
void | mca_pml_bfo_recv_frag_callback_rndvrestartnack (mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t *des, void *cbdata) |
Callback for when a RNDVRESTARTNACK message is received. More... | |
void | mca_pml_bfo_send_request_rndvrestartnotify (mca_pml_bfo_send_request_t *sendreq, bool repost, mca_btl_base_tag_t tag, int status, mca_btl_base_module_t *btl) |
This function gets called when failover is enabled and an error occurs during the rendezvous protocol. More... | |
void | mca_pml_bfo_send_request_restart (mca_pml_bfo_send_request_t *sendreq, bool repost, mca_btl_base_tag_t tag) |
This function restarts a RNDV send request. More... | |
void | mca_pml_bfo_repost_match_fragment (struct mca_btl_base_descriptor_t *des) |
This function will repost a match fragment. More... | |
void | mca_pml_bfo_rndvrestartnotify_completion (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *ep, struct mca_btl_base_descriptor_t *des, int status) |
Completion callback for rndvrestartnotify completion event. More... | |
void | mca_pml_bfo_recv_request_recverrnotify (mca_pml_bfo_recv_request_t *recvreq, mca_btl_base_tag_t tag, int status) |
This function is called when an error is detected on a completion event on the receiving side. More... | |
void | mca_pml_bfo_recv_request_rndvrestartack (mca_pml_bfo_recv_request_t *recvreq, mca_btl_base_tag_t tag, int status, mca_btl_base_module_t *btl) |
This function is called when it may be time to send a RNDVRESTARTACK message back to the sending side. More... | |
void | mca_pml_bfo_recv_request_rndvrestartnack (mca_btl_base_descriptor_t *olddes, ompi_proc_t *ompi_proc, bool repost) |
Called after the receipt of a RNDVRESTARTNOTIFY message to a request that no longer matches. More... | |
void | mca_pml_bfo_recv_request_reset (mca_pml_bfo_recv_request_t *match) |
Reset all the receive request fields to match what a request looks like when it is first started. More... | |
void | mca_pml_bfo_recv_restart_completion (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *ep, struct mca_btl_base_descriptor_t *des, int status) |
void | mca_pml_bfo_map_out_btl (struct mca_btl_base_module_t *btl, ompi_proc_t *errproc, char *btlname) |
void | mca_pml_bfo_failover_error_handler (struct mca_btl_base_module_t *btl, int32_t flags, ompi_proc_t *errproc, char *btlname) |
void | mca_pml_bfo_check_recv_ctl_completion_status (mca_btl_base_module_t *btl, struct mca_btl_base_descriptor_t *des, int status) |
Call each time we get a completion event on ACK or PUT message. More... | |
int | mca_pml_bfo_register_callbacks (void) |
Register four functions to handle extra PML message types that are utilized when a failover occurs. | |
void | mca_pml_bfo_update_rndv_fields (mca_pml_bfo_hdr_t *hdr, mca_pml_bfo_send_request_t *sendreq, char *type) |
Update a few fields when we are restarting either a RNDV or RGET type message. | |
void | mca_pml_bfo_update_eager_bml_btl_recv_ctl (mca_bml_base_btl_t **bml_btl, mca_btl_base_module_t *btl, struct mca_btl_base_descriptor_t *des) |
The following set of functions are all called when it is determined that the cached bml_btl->btl does not match the btl handed back by the callback function. More... | |
void | mca_pml_bfo_find_sendreq_eager_bml_btl (mca_bml_base_btl_t **bml_btl, mca_btl_base_module_t *btl, mca_pml_bfo_send_request_t *sendreq, char *type) |
void | mca_pml_bfo_find_sendreq_rdma_bml_btl (mca_bml_base_btl_t **bml_btl, mca_btl_base_module_t *btl, mca_pml_bfo_send_request_t *sendreq, char *type) |
void | mca_pml_bfo_find_recvreq_eager_bml_btl (mca_bml_base_btl_t **bml_btl, mca_btl_base_module_t *btl, mca_pml_bfo_recv_request_t *recvreq, char *type) |
void | mca_pml_bfo_find_recvreq_rdma_bml_btl (mca_bml_base_btl_t **bml_btl, mca_btl_base_module_t *btl, mca_pml_bfo_recv_request_t *recvreq, char *type) |
bool | mca_pml_bfo_rndv_completion_status_error (struct mca_btl_base_descriptor_t *des, mca_pml_bfo_send_request_t *sendreq) |
The completion event for the RNDV message has returned with an error. More... | |
void | mca_pml_bfo_completion_sendreq_has_error (mca_pml_bfo_send_request_t *sendreq, int status, mca_btl_base_module_t *btl, int type, char *description) |
Check to see if an error has occurred on this send request. More... | |
void | mca_pml_bfo_send_ctl_completion_status_error (struct mca_btl_base_descriptor_t *des) |
Functions that implement failover capabilities.
To utilize the failover feature, one needs to configure the library with –enable-openib-failover. Then the system that is being used must have two or more openib BTLs in use. When an error occurs, the BTL will call into this PML to map out the offending BTL and continue using the one that is still working. Most of the differences between the ob1 PML and the bfo PML are contained in this file.
void mca_pml_bfo_check_recv_ctl_completion_status | ( | mca_btl_base_module_t * | btl, |
struct mca_btl_base_descriptor_t * | des, | ||
int | status | ||
) |
Call each time we get a completion event on ACK or PUT message.
These types of messages are receive control type messages. This function is only called if the underlying BTL supports failover. Otherwise, there is no need for this check.
References mca_btl_base_descriptor_t::des_cbdata, mca_btl_base_descriptor_t::des_src, mca_pml_bfo_rdma_hdr_t::hdr_des, mca_pml_bfo_common_hdr_t::hdr_type, mca_pml_bfo_recv_request_recverrnotify(), mca_pml_bfo_recv_request_rndvrestartack(), opal_output_verbose(), mca_pml_base_recv_request_t::req_base, mca_pml_base_request_t::req_ompi, ompi_request_t::req_status, and mca_btl_base_segment_t::seg_addr.
void mca_pml_bfo_completion_sendreq_has_error | ( | mca_pml_bfo_send_request_t * | sendreq, |
int | status, | ||
mca_btl_base_module_t * | btl, | ||
int | type, | ||
char * | description | ||
) |
Check to see if an error has occurred on this send request.
If it has and there are no outstanding events, then we can start the restart dance.
References mca_pml_bfo_send_request_rndvrestartnotify(), opal_output_verbose(), mca_pml_base_send_request_t::req_base, mca_pml_base_request_t::req_peer, and mca_pml_base_request_t::req_sequence.
|
static |
This function is called since when we are mapping out a BML.
This will walk through the four PML lists and dispatch with the fragments/requests. There are four different lists and each one is handled slighty differently. In all cases, we first see if the message is associated with the endpoint that is being mapped out. If not, then just leave it alone and put it back on the list. If it is associated with the endpoint, then a each list handles it slighlty differently. Also, in some cases, we actually adjust the pointers to the BMLs in the messages as they may have changed when the BML is mapped out. That is because this is called after we have mapped out the offending BML and adjusted the array of available BMLs.
References mca_bml_base_endpoint_t::btl_eager, mca_bml_base_endpoint_t::btl_rdma, mca_bml_base_btl_array_find(), mca_bml_base_btl_array_get_next(), mca_pml_bfo_recv_request_recverrnotify(), mca_pml_bfo_recv_request_rndvrestartack(), mca_pml_bfo_send_request_rndvrestartnotify(), opal_list_append, opal_list_get_size(), opal_list_remove_first(), opal_output(), opal_output_verbose(), OPAL_THREAD_LOCK, OPAL_THREAD_UNLOCK, ompi_proc_t::proc_bml, mca_pml_base_recv_request_t::req_base, mca_pml_base_send_request_t::req_base, and mca_pml_base_request_t::req_proc.
mca_pml_bfo_recv_request_t* mca_pml_bfo_get_request | ( | mca_pml_bfo_match_hdr_t * | hdr | ) |
This function is called when a RNDV or RGET is received with the FLAGS_RESTART flag set.
This means this message already has a receive request already associated with it.
References mca_pml_bfo_match_hdr_t::hdr_common, mca_pml_bfo_match_hdr_t::hdr_ctx, mca_pml_bfo_match_hdr_t::hdr_seq, mca_pml_bfo_match_hdr_t::hdr_src, mca_pml_bfo_common_hdr_t::hdr_type, mca_pml_bfo_recv_request_reset(), opal_output_verbose(), mca_pml_base_recv_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_ompi, and ompi_request_t::req_status.
Referenced by mca_pml_bfo_recv_frag_match().
bool mca_pml_bfo_is_duplicate_fin | ( | mca_pml_bfo_hdr_t * | hdr, |
mca_btl_base_descriptor_t * | rdma, | ||
mca_btl_base_module_t * | btl | ||
) |
This function checks to see if we have received a duplicate FIN message.
This is done by first pulling the pointer of the request that the FIN message is pointing to from the message. We then check the various fields in the request to the fields in the header and make sure they match. If they do not, then the request must have been recycled already and this is a duplicate FIN message. We have to do this check on every FIN message that we receive.
References mca_btl_base_module_t::btl_flags, mca_btl_base_descriptor_t::des_cbdata, mca_btl_base_descriptor_t::des_flags, mca_pml_bfo_fin_hdr_t::hdr_common, mca_pml_bfo_common_hdr_t::hdr_flags, opal_output_verbose(), mca_pml_base_recv_request_t::req_base, mca_pml_base_send_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_ompi, mca_pml_base_request_t::req_peer, mca_pml_base_request_t::req_sequence, ompi_request_t::req_status, and mca_pml_base_request_t::req_type.
Referenced by mca_pml_bfo_recv_frag_callback_fin().
bool mca_pml_bfo_is_duplicate_msg | ( | mca_pml_bfo_comm_proc_t * | proc, |
mca_pml_bfo_match_hdr_t * | hdr | ||
) |
When running with failover enabled, check the PML sequence numbers to see if we have received a duplicate message.
This check is done for for all MATCH fragments. It is also done for RNDV and RGET fragments that do not have the MCA_PML_BFO_HDR_FLAGS_RESTART flag set. We set the window size to half the total range of sequence numbers. We only enter this code when the seq_num is not the expected one. A few more notes on the algorithm used here. In normal operation, the expected value will either be equal to or less than the sequence number of the header. This is because we are using this sequence number to detect packets arriving prior to them being expected. If we determine that expected is less than header, then make sure this is not a rollover case. We do that by adding the maxnum to the expected.
proc | Pointer to proc from where message came |
hdr | Pointer to header of message |
References mca_pml_bfo_comm_proc_t::expected_sequence, mca_pml_bfo_comm_proc_t::frags_cant_match, mca_pml_bfo_match_hdr_t::hdr_common, mca_pml_bfo_match_hdr_t::hdr_seq, mca_pml_bfo_common_hdr_t::hdr_type, opal_list_get_end(), opal_list_get_first(), opal_list_get_next, opal_list_get_size(), opal_output(), and opal_output_verbose().
Referenced by mca_pml_bfo_recv_frag_callback_match(), and mca_pml_bfo_recv_frag_match().
void mca_pml_bfo_recv_frag_callback_recverrnotify | ( | mca_btl_base_module_t * | btl, |
mca_btl_base_tag_t | tag, | ||
mca_btl_base_descriptor_t * | des, | ||
void * | cbdata | ||
) |
Callback for when a RECVERRNOTIFY message is received.
This message is sent from the receiver to the sender and tells the sender that the receiver has seen an error. This will trigger the sender to start the request restart sequence.
References mca_btl_base_descriptor_t::des_dst, mca_pml_bfo_match_hdr_t::hdr_ctx, mca_pml_bfo_match_hdr_t::hdr_seq, mca_pml_bfo_match_hdr_t::hdr_src, mca_pml_bfo_send_request_rndvrestartnotify(), opal_output_verbose(), and mca_btl_base_segment_t::seg_addr.
Referenced by mca_pml_bfo_register_callbacks().
void mca_pml_bfo_recv_frag_callback_rndvrestartack | ( | mca_btl_base_module_t * | btl, |
mca_btl_base_tag_t | tag, | ||
mca_btl_base_descriptor_t * | des, | ||
void * | cbdata | ||
) |
Callback for when a RNDVRESTARTACK message is received.
This message is sent from the receiver to the sender to acknowledge the receipt of the RNDVRESTARTNOTIFY message. At this point, the sender can reset the send request and restart the message.
References mca_btl_base_descriptor_t::des_dst, mca_pml_bfo_match_hdr_t::hdr_ctx, mca_pml_bfo_match_hdr_t::hdr_seq, mca_pml_bfo_match_hdr_t::hdr_src, mca_pml_bfo_send_request_restart(), opal_output_verbose(), and mca_btl_base_segment_t::seg_addr.
Referenced by mca_pml_bfo_register_callbacks().
void mca_pml_bfo_recv_frag_callback_rndvrestartnack | ( | mca_btl_base_module_t * | btl, |
mca_btl_base_tag_t | tag, | ||
mca_btl_base_descriptor_t * | des, | ||
void * | cbdata | ||
) |
Callback for when a RNDVRESTARTNACK message is received.
This message is sent from the receiver to the sender and tells the sender that the receiver has already completed the message and there is nothing else to be done. The sender should then just make the send request complete.
References mca_btl_base_descriptor_t::des_dst, mca_pml_bfo_match_hdr_t::hdr_ctx, mca_pml_bfo_match_hdr_t::hdr_seq, mca_pml_bfo_match_hdr_t::hdr_src, opal_output_verbose(), and mca_btl_base_segment_t::seg_addr.
Referenced by mca_pml_bfo_register_callbacks().
void mca_pml_bfo_recv_frag_callback_rndvrestartnotify | ( | mca_btl_base_module_t * | btl, |
mca_btl_base_tag_t | tag, | ||
mca_btl_base_descriptor_t * | des, | ||
void * | cbdata | ||
) |
Callback for when a RNDVRESTARTNOTIFY message is received.
Four new callbacks for the four new message types.
A RNDVRESTARTNOTIFY message is sent from the sender to the receiver telling the receiver that the message is going to be started over. The receiver first makes sure that the request being pointed to is still valid. If it is not, that means the receiver must have completed the request and therefore we need to send a NACK back to the sender. The receiver then makes sure this is not a duplicate message. If it is a duplicate, it will just drop it. Otherwise, it will then send a RNDVRESTARTACK message if there are no outstanding events on the receiver. Otherwise, it will just change the state of the request and wait for another event to send the RNDVRESTARTACK to the sender.
References mca_btl_base_descriptor_t::des_dst, mca_pml_bfo_match_hdr_t::hdr_ctx, mca_pml_bfo_match_hdr_t::hdr_seq, mca_pml_bfo_match_hdr_t::hdr_src, mca_pml_bfo_recv_request_rndvrestartack(), mca_pml_bfo_recv_request_rndvrestartnack(), ompi_proc_find(), opal_output_verbose(), and mca_btl_base_segment_t::seg_addr.
Referenced by mca_pml_bfo_register_callbacks().
void mca_pml_bfo_recv_request_recverrnotify | ( | mca_pml_bfo_recv_request_t * | recvreq, |
mca_btl_base_tag_t | tag, | ||
int | status | ||
) |
This function is called when an error is detected on a completion event on the receiving side.
This can come from a ACK, PUT, RDMA read (GET) or RECVERRNOTIFY completion event. When this happens, check the state of the request and decide if the sender needs be notified that a problem was seen. If no RECVERRNOTIFY message has been sent and no RNDVRESTARTNOTIFY has been received from the sender, then send a message telling the sender an error was seen.
References mca_bml_base_btl_t::btl, mca_btl_base_descriptor_t::des_cbfunc, mca_btl_base_descriptor_t::des_src, mca_bml_base_btl_array_get_next(), opal_output(), opal_output_verbose(), ompi_proc_t::proc_bml, mca_pml_base_recv_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_ompi, mca_pml_base_request_t::req_proc, ompi_request_t::req_status, and mca_btl_base_segment_t::seg_addr.
Referenced by mca_pml_bfo_check_recv_ctl_completion_status(), and mca_pml_bfo_error_pending_packets().
void mca_pml_bfo_recv_request_reset | ( | mca_pml_bfo_recv_request_t * | match | ) |
Reset all the receive request fields to match what a request looks like when it is first started.
This gets called when the rendezvous/rget message is being restarted.
References mca_mpool_base_module_t::mpool_deregister, mca_pml_bfo_recv_request_t::req_ack_sent, mca_pml_base_recv_request_t::req_base, mca_pml_bfo_recv_request_t::req_bytes_received, ompi_request_t::req_complete, mca_pml_base_request_t::req_convertor, mca_pml_base_request_t::req_ompi, mca_pml_base_request_t::req_pml_complete, and ompi_request_t::req_state.
Referenced by mca_pml_bfo_get_request().
void mca_pml_bfo_recv_request_rndvrestartack | ( | mca_pml_bfo_recv_request_t * | recvreq, |
mca_btl_base_tag_t | tag, | ||
int | status, | ||
mca_btl_base_module_t * | btl | ||
) |
This function is called when it may be time to send a RNDVRESTARTACK message back to the sending side.
This can happen because we received a RNDVRESTARTNOTIFY message from the sender. This can also happen if we have noticed that the request has received the RNDVRESTARTNOTIFY message, but has not yet sent out the RNDVRESTARTACK because there were still some pending receive events on the request. That means we can enter this routine from a completion event on a ACK, PUT, or RDMA read as well as from the receipt of a RNDVRESTARTNOTIFY message. If all is good, we sent the RNDVRESTARTACK message back to the sender. Then sometime later a message will arrive telling us to reset and restart the receive request.
References mca_bml_base_btl_t::btl, mca_btl_base_descriptor_t::des_cbdata, mca_btl_base_descriptor_t::des_cbfunc, mca_btl_base_descriptor_t::des_src, mca_bml_base_btl_array_get_next(), opal_output(), opal_output_verbose(), ompi_proc_t::proc_bml, mca_pml_base_recv_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_ompi, mca_pml_base_request_t::req_proc, ompi_request_t::req_status, and mca_btl_base_segment_t::seg_addr.
Referenced by mca_pml_bfo_check_recv_ctl_completion_status(), mca_pml_bfo_error_pending_packets(), and mca_pml_bfo_recv_frag_callback_rndvrestartnotify().
void mca_pml_bfo_recv_request_rndvrestartnack | ( | mca_btl_base_descriptor_t * | olddes, |
ompi_proc_t * | ompi_proc, | ||
bool | repost | ||
) |
Called after the receipt of a RNDVRESTARTNOTIFY message to a request that no longer matches.
This can happen if the sender detected an error, but the receiver actually received all the data. Therefore send a NACK back instead of the ACK so that the sender can complete its request. This happens very rarely. Note that we need to make use of the hdr_dst_rank that we received from the notify message. This is so the sending side make sure the message matches a valid request on the sending side.
References mca_bml_base_btl_array_t::arr_size, mca_bml_base_endpoint_t::btl_eager, mca_btl_base_descriptor_t::des_cbdata, mca_btl_base_descriptor_t::des_cbfunc, mca_btl_base_descriptor_t::des_dst, mca_btl_base_descriptor_t::des_src, mca_bml_base_btl_array_get_next(), opal_output(), opal_output_verbose(), ompi_proc_t::proc_bml, ompi_proc_t::proc_name, mca_btl_base_segment_t::seg_addr, and orte_process_name_t::vpid.
Referenced by mca_pml_bfo_recv_frag_callback_rndvrestartnotify().
void mca_pml_bfo_repost_match_fragment | ( | struct mca_btl_base_descriptor_t * | des | ) |
This function will repost a match fragment.
This function has to handle the case where there may not be a request associated with the fragment and just use the information in the fragment to repost the send.
References mca_bml_base_btl_array_t::arr_size, mca_bml_base_endpoint_t::btl_eager, mca_btl_base_descriptor_t::des_cbdata, mca_btl_base_descriptor_t::des_context, mca_btl_base_descriptor_t::des_flags, mca_btl_base_descriptor_t::des_src, mca_bml_base_btl_array_get_next(), opal_output(), opal_output_verbose(), mca_pml_base_send_request_t::req_base, mca_pml_base_send_request_t::req_bytes_packed, mca_pml_base_request_t::req_convertor, mca_btl_base_segment_t::seg_addr, and mca_btl_base_segment_t::seg_len.
bool mca_pml_bfo_rndv_completion_status_error | ( | struct mca_btl_base_descriptor_t * | des, |
mca_pml_bfo_send_request_t * | sendreq | ||
) |
The completion event for the RNDV message has returned with an error.
We know that the send request we are looking at is valid because it cannot be completed until the sendreq->req_state value reaches 0. And for the sendreq->req_state to reach 0, the completion event on the RNDV message must occur. So, we do not bother checking whether the send request is valid, because we know it is, but we put a few asserts in for good measure. We then check a few fields in the request to decide what to do. If the sendreq->req_error is set, that means that something has happend already to the request and we do not want to restart it. Presumably, we may have received a RECVERRNOTIFY message from the receiver. We also check the sendreq->req_acked field to see if it has been acked. If it has, then again we do not restart everything because obviously the RNDV message has made it to the other side.
References mca_btl_base_descriptor_t::des_src, mca_pml_bfo_send_request_restart(), and mca_btl_base_segment_t::seg_addr.
void mca_pml_bfo_rndvrestartnotify_completion | ( | mca_btl_base_module_t * | btl, |
struct mca_btl_base_endpoint_t * | ep, | ||
struct mca_btl_base_descriptor_t * | des, | ||
int | status | ||
) |
Completion callback for rndvrestartnotify completion event.
If the RNDVRESTARTACK has already been received, then reset and restart. Otherwise, just update the state and let the RNDVRESTARTACK trigger the reset and restart.
References mca_btl_base_descriptor_t::des_src, mca_pml_bfo_send_request_restart(), mca_pml_bfo_send_request_rndvrestartnotify(), opal_output_verbose(), mca_pml_base_send_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_peer, mca_pml_base_request_t::req_sequence, and mca_btl_base_segment_t::seg_addr.
Referenced by mca_pml_bfo_send_request_rndvrestartnotify().
void mca_pml_bfo_send_request_restart | ( | mca_pml_bfo_send_request_t * | sendreq, |
bool | repost, | ||
mca_btl_base_tag_t | tag | ||
) |
This function restarts a RNDV send request.
When this is called, all the fields in the send request are reset and the send is started over. The sendreq->req_restartseq will be non-zero which will trigger a special flag in the RNDV header which indicates the match has already happened on the receiving side.
References mca_bml_base_endpoint_t::btl_eager, mca_bml_base_btl_array_get_next(), mca_bml_base_btl_array_get_size(), MCA_PML_BASE_SEND_START, opal_list_get_begin(), opal_list_get_last(), opal_list_remove_item(), opal_output_verbose(), OPAL_THREAD_LOCK, OPAL_THREAD_UNLOCK, ompi_proc_t::proc_bml, mca_pml_base_send_request_t::req_addr, mca_pml_base_request_t::req_addr, mca_pml_base_send_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_convertor, mca_pml_base_request_t::req_peer, mca_pml_base_request_t::req_proc, mca_pml_base_send_request_t::req_send_mode, and mca_pml_base_request_t::req_sequence.
Referenced by mca_pml_bfo_recv_frag_callback_rndvrestartack(), mca_pml_bfo_rndv_completion_status_error(), and mca_pml_bfo_rndvrestartnotify_completion().
void mca_pml_bfo_send_request_rndvrestartnotify | ( | mca_pml_bfo_send_request_t * | sendreq, |
bool | repost, | ||
mca_btl_base_tag_t | tag, | ||
int | status, | ||
mca_btl_base_module_t * | btl | ||
) |
This function gets called when failover is enabled and an error occurs during the rendezvous protocol.
A message is sent to the receiving side notifying the request that the communication is going to be starting over. However, none of the information in the send request is reset yet, so that any in flight fragments can still find a home. Information in the send request gets reset when the completion event for this send occurs AND an ACK has been received back from the receiver.
References mca_bml_base_btl_t::btl, mca_btl_base_descriptor_t::des_cbfunc, mca_btl_base_descriptor_t::des_src, mca_bml_base_btl_array_get_next(), mca_pml_bfo_rndvrestartnotify_completion(), opal_output(), opal_output_verbose(), ORTE_PROC_MY_NAME, ompi_proc_t::proc_bml, mca_pml_base_send_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_peer, mca_pml_base_request_t::req_proc, mca_pml_base_request_t::req_sequence, and mca_btl_base_segment_t::seg_addr.
Referenced by mca_pml_bfo_completion_sendreq_has_error(), mca_pml_bfo_error_pending_packets(), mca_pml_bfo_recv_frag_callback_recverrnotify(), and mca_pml_bfo_rndvrestartnotify_completion().
void mca_pml_bfo_update_eager_bml_btl_recv_ctl | ( | mca_bml_base_btl_t ** | bml_btl, |
mca_btl_base_module_t * | btl, | ||
struct mca_btl_base_descriptor_t * | des | ||
) |
The following set of functions are all called when it is determined that the cached bml_btl->btl does not match the btl handed back by the callback function.
This means that the bml_btl array has been shuffled and the bml_btl matching the btl has to be found back. If it cannot be found, then just find a different one to use.
References mca_btl_base_descriptor_t::des_cbdata, mca_btl_base_descriptor_t::des_src, mca_pml_bfo_ack_hdr_t::hdr_dst_req, mca_pml_bfo_common_hdr_t::hdr_type, opal_output(), and mca_btl_base_segment_t::seg_addr.