25 #ifndef MCA_BTL_IB_ENDPOINT_H
26 #define MCA_BTL_IB_ENDPOINT_H
29 #include "opal/mca/event/event.h"
32 #include "btl_openib.h"
33 #include "btl_openib_frag.h"
34 #include "btl_openib_eager_rdma.h"
37 #include "ompi/mca/btl/base/btl_base_error.h"
38 #include "connect/base.h"
52 MCA_BTL_IB_CONNECTING,
55 MCA_BTL_IB_CONNECT_ACK,
58 MCA_BTL_IB_WAITING_ACK,
71 } mca_btl_openib_endpoint_state_t;
89 uint64_t rem_subnet_id;
99 uint32_t rem_vendor_id;
101 uint32_t rem_vendor_part_id;
103 mca_btl_openib_transport_type_t rem_transport_type;
133 struct ibv_qp *lcl_qp;
205 uint32_t xrc_recv_qp_num;
206 uint32_t xrc_recv_psn;
294 void *mca_btl_openib_endpoint_invoke_error(
void *endpoint);
300 struct ibv_recv_wr *bad_wr, *wr_list = NULL, *wr = NULL;
306 for(i = 0; i < num_post; i++) {
309 OMPI_FREE_LIST_WAIT(&openib_btl->device->qps[qp].
recv_free, item, rc);
310 to_base_frag(item)->base.order = qp;
311 to_com_frag(item)->endpoint = ep;
313 wr = wr_list = &to_recv_frag(item)->rd_desc;
315 wr = wr->next = &to_recv_frag(item)->rd_desc;
316 OPAL_OUTPUT((-1,
"Posting recv (QP num %d): WR ID %p, SG addr %p, len %d, lkey %d",
317 ep->qps[qp].qp->lcl_qp->qp_num,
319 (
void*) wr->sg_list[0].addr,
320 wr->sg_list[0].length,
321 wr->sg_list[0].lkey));
326 rc = ibv_post_recv(ep->qps[qp].qp->lcl_qp, wr_list, &bad_wr);
330 BTL_ERROR((
"error %d posting receive on qp %d", rc, qp));
334 static inline int mca_btl_openib_endpoint_post_rr_nolock(
337 int rd_rsv = mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv;
338 int rd_num = mca_btl_openib_component.qp_infos[qp].rd_num;
339 int rd_low = mca_btl_openib_component.qp_infos[qp].rd_low;
340 int cqp = mca_btl_openib_component.credits_qp, rc;
341 int cm_received = 0, num_post = 0;
343 assert(BTL_OPENIB_QP_TYPE_PP(qp));
345 if(ep->qps[qp].u.pp_qp.
rd_posted <= rd_low)
346 num_post = rd_num - ep->qps[qp].u.pp_qp.
rd_posted;
348 assert(num_post >= 0);
350 if(ep->qps[qp].u.pp_qp.
cm_received >= (rd_rsv >> 2))
353 if((rc = post_recvs(ep, qp, num_post)) != OMPI_SUCCESS) {
360 if((rc = post_recvs(ep, cqp, cm_received)) != OMPI_SUCCESS) {
366 assert(ep->qps[qp].u.pp_qp.
rd_credits <= rd_num &&
372 static inline int mca_btl_openib_endpoint_post_rr(
377 ret = mca_btl_openib_endpoint_post_rr_nolock(ep, qp);
382 #define BTL_OPENIB_CREDITS_SEND_TRYLOCK(E, Q) \
383 OPAL_ATOMIC_CMPSET_32(&(E)->qps[(Q)].rd_credit_send_lock, 0, 1)
384 #define BTL_OPENIB_CREDITS_SEND_UNLOCK(E, Q) \
385 OPAL_ATOMIC_CMPSET_32(&(E)->qps[(Q)].rd_credit_send_lock, 1, 0)
386 #define BTL_OPENIB_GET_CREDITS(FROM, TO) \
389 } while(0 == OPAL_ATOMIC_CMPSET_32(&FROM, TO, 0))
402 if(!BTL_OPENIB_QP_TYPE_PP(qp))
406 mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_win) ?
true :
false;
411 if(BTL_OPENIB_QP_TYPE_PP(qp)) {
412 if(check_send_credits(ep, qp))
415 qp = mca_btl_openib_component.credits_qp;
418 if(!check_eager_rdma_credits(ep))
422 if(BTL_OPENIB_CREDITS_SEND_TRYLOCK(ep, qp))
423 mca_btl_openib_endpoint_send_credits(ep, qp);
429 int rc = OMPI_ERR_RESOURCE_BUSY;
432 case MCA_BTL_IB_CLOSED:
434 if (OMPI_SUCCESS == rc) {
435 rc = OMPI_ERR_RESOURCE_BUSY;
448 case MCA_BTL_IB_FAILED:
449 rc = OMPI_ERR_UNREACH;
451 case MCA_BTL_IB_CONNECTED:
459 static inline __opal_attribute_always_inline__
int
462 return IBV_SEND_SIGNALED |
471 return OMPI_ERR_OUT_OF_RESOURCE;
482 struct ibv_sge *sg = &to_com_frag(frag)->sg_entry;
483 struct ibv_send_wr *sr_desc = &to_out_frag(frag)->sr_desc;
484 struct ibv_send_wr *bad_wr;
485 int qp = to_base_frag(frag)->base.order;
490 sr_desc->send_flags = ib_send_flags(sg->length, &(ep->qps[qp]));
493 BTL_OPENIB_HEADER_HTON(*frag->hdr);
500 sr_desc->opcode = IBV_WR_RDMA_WRITE;
501 MCA_BTL_OPENIB_RDMA_FRAG_SET_SIZE(ftr, sg->length);
502 MCA_BTL_OPENIB_RDMA_MAKE_LOCAL(ftr);
503 #if OPAL_ENABLE_DEBUG
508 (int32_t) (ftr->seq+1)));
511 BTL_OPENIB_FOOTER_HTON(*ftr);
515 #if BTL_OPENIB_FAILOVER_ENABLED
522 sr_desc->wr.rdma.remote_addr =
528 sr_desc->wr.rdma.remote_addr -= sg->length + BTL_OPENIB_FTR_PADDING(sg->length);
530 if(BTL_OPENIB_QP_TYPE_PP(qp)) {
531 sr_desc->opcode = IBV_WR_SEND;
533 sr_desc->opcode = IBV_WR_SEND_WITH_IMM;
534 #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
535 sr_desc->imm_data = htonl(ep->
rem_info.rem_index);
537 sr_desc->imm_data = ep->
rem_info.rem_index;
543 if(BTL_OPENIB_QP_TYPE_XRC(qp))
544 sr_desc->xrc_remote_srq_num = ep->
rem_info.rem_srqs[qp].rem_srq_num;
546 assert(sg->addr == (uint64_t)(uintptr_t)frag->hdr);
548 return ibv_post_send(ep->qps[qp].qp->lcl_qp, sr_desc, &bad_wr);
#define OPAL_THREAD_ADD32(x, y)
Use an atomic operation for increment/decrement if opal_using_threads() indicates that threads are in...
Definition: mutex.h:367
mca_btl_openib_eager_rdma_local_t eager_rdma_local
info about local RDMA buffer
Definition: btl_openib_endpoint.h:227
#define OPAL_OUTPUT(a)
Main macro for use in sending debugging output to output streams; will be "compiled out" when OPAL is...
Definition: output.h:534
Agggregates all per peer qp info for an endpoint.
Definition: btl_openib_endpoint.h:110
A descriptor that holds the parameters to a send/put/get operation along w/ a callback routine that i...
Definition: btl.h:275
OPAL output stream facility.
int32_t head
RDMA buffer to post to.
Definition: btl_openib_eager_rdma.h:37
ompi_free_list_t recv_free
free lists of receive buffer descriptors
Definition: btl_openib.h:341
Definition: btl_openib_frag.h:322
int32_t credits
number of RDMA credits
Definition: btl_openib_eager_rdma.h:24
int32_t index
index of the endpoint in endpoints array
Definition: btl_openib_endpoint.h:229
int32_t eager_recv_count
number of eager received
Definition: btl_openib_endpoint.h:223
int32_t sd_wqe
number of available send wqe entries
Definition: btl_openib_endpoint.h:135
OPAL_DECLSPEC void opal_progress_event_users_increment(void)
Increase the number of users of the event library.
Definition: opal_progress.c:245
Definition: btl_openib_endpoint.h:132
Definition: btl_openib_frag.h:334
bool endpoint_initiator
If endpoint_local_cpc->cbm_uses_cts is true and this endpoint is iWARP, then endpoint_initiator must ...
Definition: btl_openib_endpoint.h:182
int32_t rd_credit_send_lock
Lock credit send fragment.
Definition: btl_openib_endpoint.h:146
Definition: btl_openib_endpoint.h:85
struct mca_btl_openib_proc_t * endpoint_proc
proc structure corresponding to endpoint
Definition: btl_openib_endpoint.h:169
Definition: btl_openib_xrc.h:29
int32_t get_tokens
number of available get tokens
Definition: btl_openib_endpoint.h:214
Definition: mutex_unix.h:53
ompi_btl_openib_connect_base_module_start_connect_fn_t cbm_start_connect
Connect function.
Definition: connect.h:336
struct ibv_mr * endpoint_cts_mr
Memory registration info for the CTS frag.
Definition: btl_openib_endpoint.h:243
Definition: btl_openib_endpoint.h:140
IB BTL Interface.
Definition: btl_openib.h:432
The opal_list_t interface is used to provide a generic doubly-linked list container for Open MPI...
mca_btl_openib_endpoint_state_t endpoint_state
current state of the connection
Definition: btl_openib_endpoint.h:189
int32_t cm_received
Credit messages received.
Definition: btl_openib_endpoint.h:119
size_t endpoint_retries
number of connection retries attempted
Definition: btl_openib_endpoint.h:192
Definition: opal_list.h:98
#define OPAL_THREAD_LOCK(mutex)
Lock a mutex if opal_using_threads() says that multiple threads may be active in the process...
Definition: mutex.h:223
opal_mutex_t endpoint_lock
lock for concurrent access to endpoint state
Definition: btl_openib_endpoint.h:198
struct mca_btl_elan_module_t * endpoint_btl
BTL instance that created this connection.
Definition: btl_elan_endpoint.h:36
#define OPAL_THREAD_UNLOCK(mutex)
Unlock a mutex if opal_using_threads() says that multiple threads may be active in the process...
Definition: mutex.h:309
size_t ib_inline_max
max size of inline send
Definition: btl_openib_endpoint.h:148
mca_btl_openib_rem_info_t rem_info
information about the remote port
Definition: btl_openib_endpoint.h:237
int32_t rd_posted
number of descriptors posted to the nic
Definition: btl_openib_endpoint.h:117
ompi_ptr_t base
address of remote buffer
Definition: btl_openib_eager_rdma.h:35
bool use_eager_rdma
use eager rdma for this peer?
Definition: btl_openib_endpoint.h:234
ompi_btl_openib_connect_base_module_t * endpoint_local_cpc
local CPC to connect to this endpoint
Definition: btl_openib_endpoint.h:172
double endpoint_tstamp
timestamp of when the first connection was attempted
Definition: btl_openib_endpoint.h:195
void * endpoint_local_cpc_data
hook for local CPC to hang endpoint-specific data
Definition: btl_openib_endpoint.h:175
#define opal_list_append(l, i)
Append an item to the end of the list.
Definition: opal_list.h:410
Definition: btl_openib_endpoint.h:80
IB fragment derived type.
Definition: btl_openib_frag.h:288
Byte Transfer Layer (BTL)
struct ib_address_t * ib_addr
used only for xrc; pointer to struct that keeps remote port info
Definition: btl_openib_endpoint.h:220
uint32_t seg_len
Length in bytes.
Definition: btl.h:240
Struct for holding CPC module and associated meta data.
Definition: connect.h:328
State of ELAN endpoint connection.
Definition: btl_elan_endpoint.h:33
Definition: ompi_free_list.h:62
Meta data about a CPC module.
Definition: connect.h:303
size_t eager_rdma_frag_size
length of eager frag
Definition: btl_openib.h:459
Definition: btl_openib_eager_rdma.h:34
Represents the state of a remote process and the set of addresses that it exports.
Definition: btl_openib_proc.h:61
int32_t cm_return
how may credits to return
Definition: btl_openib_endpoint.h:120
int32_t tokens
number of rdam tokens
Definition: btl_openib_eager_rdma.h:38
Definition: opal_list.h:147
ompi_btl_openib_connect_base_module_data_t * endpoint_remote_cpc_data
pointer to remote proc's CPC data (essentially its CPC modex message)
Definition: btl_openib_endpoint.h:186
opal_list_t pending_put_frags
list of pending rput ops
Definition: btl_openib_endpoint.h:211
bool nbo
does the endpoint require network byte ordering?
Definition: btl_openib_endpoint.h:232
Definition: btl_openib_endpoint.h:73
uint64_t subnet_id
subnet id of this endpoint
Definition: btl_openib_endpoint.h:217
Definition: btl_openib_eager_rdma.h:18
mca_btl_openib_eager_rdma_remote_t eager_rdma_remote
info about remote RDMA buffer
Definition: btl_openib_endpoint.h:225
opal_list_t no_wqe_pending_frags[2]
put fragments here if there is no wqe available
Definition: btl_openib_endpoint.h:144
size_t eager_limit
Eager send limit of first fragment, in Bytes.
Definition: btl_openib.h:200
struct mca_btl_openib_module_t * endpoint_btl
BTL module that created this connection.
Definition: btl_openib_endpoint.h:166
opal_list_t pending_lazy_frags
list of pending frags due to lazy connection establishment for this endpotint
Definition: btl_openib_endpoint.h:202
opal_list_t pending_get_frags
list of pending rget ops
Definition: btl_openib_endpoint.h:209
bool endpoint_posted_recvs
Whether we've posted receives on this EP or not (only used in CTS protocol)
Definition: btl_openib_endpoint.h:247
int32_t sd_credits
this rank's view of the credits available for sending: this is the credits granted by the remote peer...
Definition: btl_openib_endpoint.h:111
opal_list_t no_credits_pending_frags[2]
put fragment here if there is no credits available
Definition: btl_openib_endpoint.h:142
Aggregates all srq qp info for an endpoint.
Definition: btl_openib_endpoint.h:128
int32_t rd_credits
number of credits to return to peer
Definition: btl_openib_endpoint.h:118
bool endpoint_cts_received
Whether we've received the CTS from the peer or not (only used in CTS protocol)
Definition: btl_openib_endpoint.h:251
Data received from the modex.
Definition: btl_openib_proc.h:44
Describes a region/segment of memory that is addressable by an BTL.
Definition: btl.h:236
bool endpoint_cts_sent
Whether we've send out CTS to the peer or not (only used in CTS protocol)
Definition: btl_openib_endpoint.h:255
#define OBJ_CLASS_DECLARATION(NAME)
Declaration for class descriptor.
Definition: opal_object.h:236
uint32_t rkey
RKey for accessing remote buffer.
Definition: btl_openib_eager_rdma.h:36
mca_btl_openib_recv_frag_t endpoint_cts_frag
Frag for initial wireup CTS protocol; will be NULL if CPC indicates that it does not want to use CTS...
Definition: btl_openib_endpoint.h:241
int32_t cm_sent
Outstanding number of credit messages.
Definition: btl_openib_endpoint.h:121