OpenMPI  0.1.1
pml_bfo_failover.c File Reference

Functions that implement failover capabilities. More...

#include "ompi_config.h"
#include <stdlib.h>
#include <string.h>
#include "opal/class/opal_bitmap.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/pml/base/base.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
#include "pml_bfo.h"
#include "pml_bfo_component.h"
#include "pml_bfo_comm.h"
#include "pml_bfo_hdr.h"
#include "pml_bfo_recvfrag.h"
#include "pml_bfo_sendreq.h"
#include "pml_bfo_recvreq.h"
#include "pml_bfo_rdmafrag.h"
#include "pml_bfo_failover.h"
#include "ompi/mca/bml/base/base.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/util/show_help.h"
#include "orte/mca/notifier/notifier.h"
#include "ompi/runtime/ompi_cr.h"

Functions

static void mca_pml_bfo_error_pending_packets (mca_btl_base_module_t *btl, mca_bml_base_endpoint_t *ep)
 This function is called since when we are mapping out a BML. More...
 
bool mca_pml_bfo_is_duplicate_msg (mca_pml_bfo_comm_proc_t *proc, mca_pml_bfo_match_hdr_t *hdr)
 When running with failover enabled, check the PML sequence numbers to see if we have received a duplicate message. More...
 
bool mca_pml_bfo_is_duplicate_fin (mca_pml_bfo_hdr_t *hdr, mca_btl_base_descriptor_t *rdma, mca_btl_base_module_t *btl)
 This function checks to see if we have received a duplicate FIN message. More...
 
void mca_pml_bfo_repost_fin (struct mca_btl_base_descriptor_t *des)
 Repost a FIN message if we get an error on the completion event.
 
mca_pml_bfo_recv_request_tmca_pml_bfo_get_request (mca_pml_bfo_match_hdr_t *hdr)
 This function is called when a RNDV or RGET is received with the FLAGS_RESTART flag set. More...
 
void mca_pml_bfo_recv_frag_callback_rndvrestartnotify (mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t *des, void *cbdata)
 Callback for when a RNDVRESTARTNOTIFY message is received. More...
 
void mca_pml_bfo_recv_frag_callback_rndvrestartack (mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t *des, void *cbdata)
 Callback for when a RNDVRESTARTACK message is received. More...
 
void mca_pml_bfo_recv_frag_callback_recverrnotify (mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t *des, void *cbdata)
 Callback for when a RECVERRNOTIFY message is received. More...
 
void mca_pml_bfo_recv_frag_callback_rndvrestartnack (mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t *des, void *cbdata)
 Callback for when a RNDVRESTARTNACK message is received. More...
 
void mca_pml_bfo_send_request_rndvrestartnotify (mca_pml_bfo_send_request_t *sendreq, bool repost, mca_btl_base_tag_t tag, int status, mca_btl_base_module_t *btl)
 This function gets called when failover is enabled and an error occurs during the rendezvous protocol. More...
 
void mca_pml_bfo_send_request_restart (mca_pml_bfo_send_request_t *sendreq, bool repost, mca_btl_base_tag_t tag)
 This function restarts a RNDV send request. More...
 
void mca_pml_bfo_repost_match_fragment (struct mca_btl_base_descriptor_t *des)
 This function will repost a match fragment. More...
 
void mca_pml_bfo_rndvrestartnotify_completion (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *ep, struct mca_btl_base_descriptor_t *des, int status)
 Completion callback for rndvrestartnotify completion event. More...
 
void mca_pml_bfo_recv_request_recverrnotify (mca_pml_bfo_recv_request_t *recvreq, mca_btl_base_tag_t tag, int status)
 This function is called when an error is detected on a completion event on the receiving side. More...
 
void mca_pml_bfo_recv_request_rndvrestartack (mca_pml_bfo_recv_request_t *recvreq, mca_btl_base_tag_t tag, int status, mca_btl_base_module_t *btl)
 This function is called when it may be time to send a RNDVRESTARTACK message back to the sending side. More...
 
void mca_pml_bfo_recv_request_rndvrestartnack (mca_btl_base_descriptor_t *olddes, ompi_proc_t *ompi_proc, bool repost)
 Called after the receipt of a RNDVRESTARTNOTIFY message to a request that no longer matches. More...
 
void mca_pml_bfo_recv_request_reset (mca_pml_bfo_recv_request_t *match)
 Reset all the receive request fields to match what a request looks like when it is first started. More...
 
void mca_pml_bfo_recv_restart_completion (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *ep, struct mca_btl_base_descriptor_t *des, int status)
 
void mca_pml_bfo_map_out_btl (struct mca_btl_base_module_t *btl, ompi_proc_t *errproc, char *btlname)
 
void mca_pml_bfo_failover_error_handler (struct mca_btl_base_module_t *btl, int32_t flags, ompi_proc_t *errproc, char *btlname)
 
void mca_pml_bfo_check_recv_ctl_completion_status (mca_btl_base_module_t *btl, struct mca_btl_base_descriptor_t *des, int status)
 Call each time we get a completion event on ACK or PUT message. More...
 
int mca_pml_bfo_register_callbacks (void)
 Register four functions to handle extra PML message types that are utilized when a failover occurs.
 
void mca_pml_bfo_update_rndv_fields (mca_pml_bfo_hdr_t *hdr, mca_pml_bfo_send_request_t *sendreq, char *type)
 Update a few fields when we are restarting either a RNDV or RGET type message.
 
void mca_pml_bfo_update_eager_bml_btl_recv_ctl (mca_bml_base_btl_t **bml_btl, mca_btl_base_module_t *btl, struct mca_btl_base_descriptor_t *des)
 The following set of functions are all called when it is determined that the cached bml_btl->btl does not match the btl handed back by the callback function. More...
 
void mca_pml_bfo_find_sendreq_eager_bml_btl (mca_bml_base_btl_t **bml_btl, mca_btl_base_module_t *btl, mca_pml_bfo_send_request_t *sendreq, char *type)
 
void mca_pml_bfo_find_sendreq_rdma_bml_btl (mca_bml_base_btl_t **bml_btl, mca_btl_base_module_t *btl, mca_pml_bfo_send_request_t *sendreq, char *type)
 
void mca_pml_bfo_find_recvreq_eager_bml_btl (mca_bml_base_btl_t **bml_btl, mca_btl_base_module_t *btl, mca_pml_bfo_recv_request_t *recvreq, char *type)
 
void mca_pml_bfo_find_recvreq_rdma_bml_btl (mca_bml_base_btl_t **bml_btl, mca_btl_base_module_t *btl, mca_pml_bfo_recv_request_t *recvreq, char *type)
 
bool mca_pml_bfo_rndv_completion_status_error (struct mca_btl_base_descriptor_t *des, mca_pml_bfo_send_request_t *sendreq)
 The completion event for the RNDV message has returned with an error. More...
 
void mca_pml_bfo_completion_sendreq_has_error (mca_pml_bfo_send_request_t *sendreq, int status, mca_btl_base_module_t *btl, int type, char *description)
 Check to see if an error has occurred on this send request. More...
 
void mca_pml_bfo_send_ctl_completion_status_error (struct mca_btl_base_descriptor_t *des)
 

Detailed Description

Functions that implement failover capabilities.

To utilize the failover feature, one needs to configure the library with –enable-openib-failover. Then the system that is being used must have two or more openib BTLs in use. When an error occurs, the BTL will call into this PML to map out the offending BTL and continue using the one that is still working. Most of the differences between the ob1 PML and the bfo PML are contained in this file.

Function Documentation

void mca_pml_bfo_check_recv_ctl_completion_status ( mca_btl_base_module_t btl,
struct mca_btl_base_descriptor_t des,
int  status 
)

Call each time we get a completion event on ACK or PUT message.

These types of messages are receive control type messages. This function is only called if the underlying BTL supports failover. Otherwise, there is no need for this check.

References mca_btl_base_descriptor_t::des_cbdata, mca_btl_base_descriptor_t::des_src, mca_pml_bfo_rdma_hdr_t::hdr_des, mca_pml_bfo_common_hdr_t::hdr_type, mca_pml_bfo_recv_request_recverrnotify(), mca_pml_bfo_recv_request_rndvrestartack(), opal_output_verbose(), mca_pml_base_recv_request_t::req_base, mca_pml_base_request_t::req_ompi, ompi_request_t::req_status, and mca_btl_base_segment_t::seg_addr.

void mca_pml_bfo_completion_sendreq_has_error ( mca_pml_bfo_send_request_t sendreq,
int  status,
mca_btl_base_module_t btl,
int  type,
char *  description 
)

Check to see if an error has occurred on this send request.

If it has and there are no outstanding events, then we can start the restart dance.

References mca_pml_bfo_send_request_rndvrestartnotify(), opal_output_verbose(), mca_pml_base_send_request_t::req_base, mca_pml_base_request_t::req_peer, and mca_pml_base_request_t::req_sequence.

static void mca_pml_bfo_error_pending_packets ( mca_btl_base_module_t btl,
mca_bml_base_endpoint_t ep 
)
static

This function is called since when we are mapping out a BML.

This will walk through the four PML lists and dispatch with the fragments/requests. There are four different lists and each one is handled slighty differently. In all cases, we first see if the message is associated with the endpoint that is being mapped out. If not, then just leave it alone and put it back on the list. If it is associated with the endpoint, then a each list handles it slighlty differently. Also, in some cases, we actually adjust the pointers to the BMLs in the messages as they may have changed when the BML is mapped out. That is because this is called after we have mapped out the offending BML and adjusted the array of available BMLs.

References mca_bml_base_endpoint_t::btl_eager, mca_bml_base_endpoint_t::btl_rdma, mca_bml_base_btl_array_find(), mca_bml_base_btl_array_get_next(), mca_pml_bfo_recv_request_recverrnotify(), mca_pml_bfo_recv_request_rndvrestartack(), mca_pml_bfo_send_request_rndvrestartnotify(), opal_list_append, opal_list_get_size(), opal_list_remove_first(), opal_output(), opal_output_verbose(), OPAL_THREAD_LOCK, OPAL_THREAD_UNLOCK, ompi_proc_t::proc_bml, mca_pml_base_recv_request_t::req_base, mca_pml_base_send_request_t::req_base, and mca_pml_base_request_t::req_proc.

bool mca_pml_bfo_is_duplicate_fin ( mca_pml_bfo_hdr_t hdr,
mca_btl_base_descriptor_t rdma,
mca_btl_base_module_t btl 
)

This function checks to see if we have received a duplicate FIN message.

This is done by first pulling the pointer of the request that the FIN message is pointing to from the message. We then check the various fields in the request to the fields in the header and make sure they match. If they do not, then the request must have been recycled already and this is a duplicate FIN message. We have to do this check on every FIN message that we receive.

References mca_btl_base_module_t::btl_flags, mca_btl_base_descriptor_t::des_cbdata, mca_btl_base_descriptor_t::des_flags, mca_pml_bfo_fin_hdr_t::hdr_common, mca_pml_bfo_common_hdr_t::hdr_flags, opal_output_verbose(), mca_pml_base_recv_request_t::req_base, mca_pml_base_send_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_ompi, mca_pml_base_request_t::req_peer, mca_pml_base_request_t::req_sequence, ompi_request_t::req_status, and mca_pml_base_request_t::req_type.

Referenced by mca_pml_bfo_recv_frag_callback_fin().

bool mca_pml_bfo_is_duplicate_msg ( mca_pml_bfo_comm_proc_t proc,
mca_pml_bfo_match_hdr_t hdr 
)

When running with failover enabled, check the PML sequence numbers to see if we have received a duplicate message.

This check is done for for all MATCH fragments. It is also done for RNDV and RGET fragments that do not have the MCA_PML_BFO_HDR_FLAGS_RESTART flag set. We set the window size to half the total range of sequence numbers. We only enter this code when the seq_num is not the expected one. A few more notes on the algorithm used here. In normal operation, the expected value will either be equal to or less than the sequence number of the header. This is because we are using this sequence number to detect packets arriving prior to them being expected. If we determine that expected is less than header, then make sure this is not a rollover case. We do that by adding the maxnum to the expected.

Parameters
procPointer to proc from where message came
hdrPointer to header of message

References mca_pml_bfo_comm_proc_t::expected_sequence, mca_pml_bfo_comm_proc_t::frags_cant_match, mca_pml_bfo_match_hdr_t::hdr_common, mca_pml_bfo_match_hdr_t::hdr_seq, mca_pml_bfo_common_hdr_t::hdr_type, opal_list_get_end(), opal_list_get_first(), opal_list_get_next, opal_list_get_size(), opal_output(), and opal_output_verbose().

Referenced by mca_pml_bfo_recv_frag_callback_match(), and mca_pml_bfo_recv_frag_match().

void mca_pml_bfo_recv_frag_callback_recverrnotify ( mca_btl_base_module_t btl,
mca_btl_base_tag_t  tag,
mca_btl_base_descriptor_t des,
void *  cbdata 
)

Callback for when a RECVERRNOTIFY message is received.

This message is sent from the receiver to the sender and tells the sender that the receiver has seen an error. This will trigger the sender to start the request restart sequence.

References mca_btl_base_descriptor_t::des_dst, mca_pml_bfo_match_hdr_t::hdr_ctx, mca_pml_bfo_match_hdr_t::hdr_seq, mca_pml_bfo_match_hdr_t::hdr_src, mca_pml_bfo_send_request_rndvrestartnotify(), opal_output_verbose(), and mca_btl_base_segment_t::seg_addr.

Referenced by mca_pml_bfo_register_callbacks().

void mca_pml_bfo_recv_frag_callback_rndvrestartack ( mca_btl_base_module_t btl,
mca_btl_base_tag_t  tag,
mca_btl_base_descriptor_t des,
void *  cbdata 
)

Callback for when a RNDVRESTARTACK message is received.

This message is sent from the receiver to the sender to acknowledge the receipt of the RNDVRESTARTNOTIFY message. At this point, the sender can reset the send request and restart the message.

References mca_btl_base_descriptor_t::des_dst, mca_pml_bfo_match_hdr_t::hdr_ctx, mca_pml_bfo_match_hdr_t::hdr_seq, mca_pml_bfo_match_hdr_t::hdr_src, mca_pml_bfo_send_request_restart(), opal_output_verbose(), and mca_btl_base_segment_t::seg_addr.

Referenced by mca_pml_bfo_register_callbacks().

void mca_pml_bfo_recv_frag_callback_rndvrestartnack ( mca_btl_base_module_t btl,
mca_btl_base_tag_t  tag,
mca_btl_base_descriptor_t des,
void *  cbdata 
)

Callback for when a RNDVRESTARTNACK message is received.

This message is sent from the receiver to the sender and tells the sender that the receiver has already completed the message and there is nothing else to be done. The sender should then just make the send request complete.

References mca_btl_base_descriptor_t::des_dst, mca_pml_bfo_match_hdr_t::hdr_ctx, mca_pml_bfo_match_hdr_t::hdr_seq, mca_pml_bfo_match_hdr_t::hdr_src, opal_output_verbose(), and mca_btl_base_segment_t::seg_addr.

Referenced by mca_pml_bfo_register_callbacks().

void mca_pml_bfo_recv_frag_callback_rndvrestartnotify ( mca_btl_base_module_t btl,
mca_btl_base_tag_t  tag,
mca_btl_base_descriptor_t des,
void *  cbdata 
)

Callback for when a RNDVRESTARTNOTIFY message is received.

Four new callbacks for the four new message types.

A RNDVRESTARTNOTIFY message is sent from the sender to the receiver telling the receiver that the message is going to be started over. The receiver first makes sure that the request being pointed to is still valid. If it is not, that means the receiver must have completed the request and therefore we need to send a NACK back to the sender. The receiver then makes sure this is not a duplicate message. If it is a duplicate, it will just drop it. Otherwise, it will then send a RNDVRESTARTACK message if there are no outstanding events on the receiver. Otherwise, it will just change the state of the request and wait for another event to send the RNDVRESTARTACK to the sender.

References mca_btl_base_descriptor_t::des_dst, mca_pml_bfo_match_hdr_t::hdr_ctx, mca_pml_bfo_match_hdr_t::hdr_seq, mca_pml_bfo_match_hdr_t::hdr_src, mca_pml_bfo_recv_request_rndvrestartack(), mca_pml_bfo_recv_request_rndvrestartnack(), ompi_proc_find(), opal_output_verbose(), and mca_btl_base_segment_t::seg_addr.

Referenced by mca_pml_bfo_register_callbacks().

void mca_pml_bfo_recv_request_recverrnotify ( mca_pml_bfo_recv_request_t recvreq,
mca_btl_base_tag_t  tag,
int  status 
)

This function is called when an error is detected on a completion event on the receiving side.

This can come from a ACK, PUT, RDMA read (GET) or RECVERRNOTIFY completion event. When this happens, check the state of the request and decide if the sender needs be notified that a problem was seen. If no RECVERRNOTIFY message has been sent and no RNDVRESTARTNOTIFY has been received from the sender, then send a message telling the sender an error was seen.

References mca_bml_base_btl_t::btl, mca_btl_base_descriptor_t::des_cbfunc, mca_btl_base_descriptor_t::des_src, mca_bml_base_btl_array_get_next(), opal_output(), opal_output_verbose(), ompi_proc_t::proc_bml, mca_pml_base_recv_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_ompi, mca_pml_base_request_t::req_proc, ompi_request_t::req_status, and mca_btl_base_segment_t::seg_addr.

Referenced by mca_pml_bfo_check_recv_ctl_completion_status(), and mca_pml_bfo_error_pending_packets().

void mca_pml_bfo_recv_request_reset ( mca_pml_bfo_recv_request_t match)
void mca_pml_bfo_recv_request_rndvrestartack ( mca_pml_bfo_recv_request_t recvreq,
mca_btl_base_tag_t  tag,
int  status,
mca_btl_base_module_t btl 
)

This function is called when it may be time to send a RNDVRESTARTACK message back to the sending side.

This can happen because we received a RNDVRESTARTNOTIFY message from the sender. This can also happen if we have noticed that the request has received the RNDVRESTARTNOTIFY message, but has not yet sent out the RNDVRESTARTACK because there were still some pending receive events on the request. That means we can enter this routine from a completion event on a ACK, PUT, or RDMA read as well as from the receipt of a RNDVRESTARTNOTIFY message. If all is good, we sent the RNDVRESTARTACK message back to the sender. Then sometime later a message will arrive telling us to reset and restart the receive request.

References mca_bml_base_btl_t::btl, mca_btl_base_descriptor_t::des_cbdata, mca_btl_base_descriptor_t::des_cbfunc, mca_btl_base_descriptor_t::des_src, mca_bml_base_btl_array_get_next(), opal_output(), opal_output_verbose(), ompi_proc_t::proc_bml, mca_pml_base_recv_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_ompi, mca_pml_base_request_t::req_proc, ompi_request_t::req_status, and mca_btl_base_segment_t::seg_addr.

Referenced by mca_pml_bfo_check_recv_ctl_completion_status(), mca_pml_bfo_error_pending_packets(), and mca_pml_bfo_recv_frag_callback_rndvrestartnotify().

void mca_pml_bfo_recv_request_rndvrestartnack ( mca_btl_base_descriptor_t olddes,
ompi_proc_t ompi_proc,
bool  repost 
)

Called after the receipt of a RNDVRESTARTNOTIFY message to a request that no longer matches.

This can happen if the sender detected an error, but the receiver actually received all the data. Therefore send a NACK back instead of the ACK so that the sender can complete its request. This happens very rarely. Note that we need to make use of the hdr_dst_rank that we received from the notify message. This is so the sending side make sure the message matches a valid request on the sending side.

References mca_bml_base_btl_array_t::arr_size, mca_bml_base_endpoint_t::btl_eager, mca_btl_base_descriptor_t::des_cbdata, mca_btl_base_descriptor_t::des_cbfunc, mca_btl_base_descriptor_t::des_dst, mca_btl_base_descriptor_t::des_src, mca_bml_base_btl_array_get_next(), opal_output(), opal_output_verbose(), ompi_proc_t::proc_bml, ompi_proc_t::proc_name, mca_btl_base_segment_t::seg_addr, and orte_process_name_t::vpid.

Referenced by mca_pml_bfo_recv_frag_callback_rndvrestartnotify().

bool mca_pml_bfo_rndv_completion_status_error ( struct mca_btl_base_descriptor_t des,
mca_pml_bfo_send_request_t sendreq 
)

The completion event for the RNDV message has returned with an error.

We know that the send request we are looking at is valid because it cannot be completed until the sendreq->req_state value reaches 0. And for the sendreq->req_state to reach 0, the completion event on the RNDV message must occur. So, we do not bother checking whether the send request is valid, because we know it is, but we put a few asserts in for good measure. We then check a few fields in the request to decide what to do. If the sendreq->req_error is set, that means that something has happend already to the request and we do not want to restart it. Presumably, we may have received a RECVERRNOTIFY message from the receiver. We also check the sendreq->req_acked field to see if it has been acked. If it has, then again we do not restart everything because obviously the RNDV message has made it to the other side.

References mca_btl_base_descriptor_t::des_src, mca_pml_bfo_send_request_restart(), and mca_btl_base_segment_t::seg_addr.

void mca_pml_bfo_rndvrestartnotify_completion ( mca_btl_base_module_t btl,
struct mca_btl_base_endpoint_t ep,
struct mca_btl_base_descriptor_t des,
int  status 
)

Completion callback for rndvrestartnotify completion event.

If the RNDVRESTARTACK has already been received, then reset and restart. Otherwise, just update the state and let the RNDVRESTARTACK trigger the reset and restart.

References mca_btl_base_descriptor_t::des_src, mca_pml_bfo_send_request_restart(), mca_pml_bfo_send_request_rndvrestartnotify(), opal_output_verbose(), mca_pml_base_send_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_peer, mca_pml_base_request_t::req_sequence, and mca_btl_base_segment_t::seg_addr.

Referenced by mca_pml_bfo_send_request_rndvrestartnotify().

void mca_pml_bfo_send_request_rndvrestartnotify ( mca_pml_bfo_send_request_t sendreq,
bool  repost,
mca_btl_base_tag_t  tag,
int  status,
mca_btl_base_module_t btl 
)

This function gets called when failover is enabled and an error occurs during the rendezvous protocol.

A message is sent to the receiving side notifying the request that the communication is going to be starting over. However, none of the information in the send request is reset yet, so that any in flight fragments can still find a home. Information in the send request gets reset when the completion event for this send occurs AND an ACK has been received back from the receiver.

References mca_bml_base_btl_t::btl, mca_btl_base_descriptor_t::des_cbfunc, mca_btl_base_descriptor_t::des_src, mca_bml_base_btl_array_get_next(), mca_pml_bfo_rndvrestartnotify_completion(), opal_output(), opal_output_verbose(), ORTE_PROC_MY_NAME, ompi_proc_t::proc_bml, mca_pml_base_send_request_t::req_base, mca_pml_base_request_t::req_comm, mca_pml_base_request_t::req_peer, mca_pml_base_request_t::req_proc, mca_pml_base_request_t::req_sequence, and mca_btl_base_segment_t::seg_addr.

Referenced by mca_pml_bfo_completion_sendreq_has_error(), mca_pml_bfo_error_pending_packets(), mca_pml_bfo_recv_frag_callback_recverrnotify(), and mca_pml_bfo_rndvrestartnotify_completion().

void mca_pml_bfo_update_eager_bml_btl_recv_ctl ( mca_bml_base_btl_t **  bml_btl,
mca_btl_base_module_t btl,
struct mca_btl_base_descriptor_t des 
)

The following set of functions are all called when it is determined that the cached bml_btl->btl does not match the btl handed back by the callback function.

This means that the bml_btl array has been shuffled and the bml_btl matching the btl has to be found back. If it cannot be found, then just find a different one to use.

References mca_btl_base_descriptor_t::des_cbdata, mca_btl_base_descriptor_t::des_src, mca_pml_bfo_ack_hdr_t::hdr_dst_req, mca_pml_bfo_common_hdr_t::hdr_type, opal_output(), and mca_btl_base_segment_t::seg_addr.