31 #include "ompi_config.h"
32 #include <sys/types.h>
34 #include "btl_wv_def.h"
37 #include "ompi/class/ompi_free_list.h"
41 #include "opal/mca/event/event.h"
42 #include "opal/threads/threads.h"
45 #include "ompi/mca/btl/base/btl_base_error.h"
50 #include "connect/connect.h"
55 #define MCA_BTL_IB_LEAVE_PINNED 1
56 #define IB_DEFAULT_GID_PREFIX 0xfe80000000000000ll
57 #define MCA_BTL_IB_PKEY_MASK 0x7fff
63 #define ATTACH() do { \
65 opal_output(0, "WAITING TO DEBUG ATTACH"); \
66 while (i == 0) sleep(5); \
79 MCA_BTL_WV_TRANSPORT_IB,
80 MCA_BTL_WV_TRANSPORT_IWARP,
81 MCA_BTL_WV_TRANSPORT_RDMAOE,
82 MCA_BTL_WV_TRANSPORT_UNKNOWN,
83 MCA_BTL_WV_TRANSPORT_SIZE
84 } mca_btl_wv_transport_type_t;
89 } mca_btl_wv_qp_type_t;
107 mca_btl_wv_qp_type_t type;
117 #define BTL_WV_QP_TYPE(Q) (mca_btl_wv_component.qp_infos[(Q)].type)
118 #define BTL_WV_QP_TYPE_PP(Q) \
119 (BTL_WV_QP_TYPE(Q) == MCA_BTL_WV_PP_QP)
120 #define BTL_WV_QP_TYPE_SRQ(Q) \
121 (BTL_WV_QP_TYPE(Q) == MCA_BTL_WV_SRQ_QP)
124 BTL_WV_RQ_SOURCE_DEFAULT,
125 BTL_WV_RQ_SOURCE_MCA,
126 BTL_WV_RQ_SOURCE_DEVICE_INI,
128 } btl_wv_receive_queues_source_t;
134 } btl_wv_device_type_t;
201 int32_t ib_max_inline_data;
203 uint32_t ib_pkey_val;
205 uint32_t ib_qp_ous_rd_atom;
207 uint32_t ib_min_rnr_timer;
209 uint32_t ib_retry_count;
210 uint32_t ib_rnr_retry;
211 uint32_t ib_max_rdma_dst_ops;
212 uint32_t ib_service_level;
213 uint32_t ib_path_rec_service_level;
216 int32_t eager_rdma_threshold;
217 int32_t eager_rdma_num;
218 int32_t max_eager_rdma;
219 uint32_t btls_per_lid;
224 uint32_t buffer_alignment;
225 btl_wv_device_type_t device_type;
227 char **if_include_list;
229 char **if_exclude_list;
230 char *ipaddr_include;
231 char *ipaddr_exclude;
234 char *receive_queues;
236 btl_wv_receive_queues_source_t receive_queues_source;
257 bool use_message_coalescing;
258 uint32_t cq_poll_ratio;
259 uint32_t cq_poll_progress;
260 uint32_t eager_rdma_poll_ratio;
275 #if BTL_WV_FAILOVER_ENABLED
276 int verbose_failover;
306 #define MCA_BTL_WV_MODEX_MSG_NTOH(hdr) \
308 (hdr).subnet_id = ntoh64((hdr).subnet_id); \
309 (hdr).lid = ntohs((hdr).lid); \
311 #define MCA_BTL_WV_MODEX_MSG_HTON(hdr) \
313 (hdr).subnet_id = hton64((hdr).subnet_id); \
314 (hdr).lid = htons((hdr).lid); \
329 WV_DEVICE_ATTRIBUTES ib_dev_attr;
331 struct wv_cq *ib_cq[2];
337 uint8_t use_eager_rdma;
341 uint16_t hp_cq_polls;
342 uint16_t eager_rdma_polls;
344 int32_t non_eager_rdma_endpoints;
345 int32_t eager_rdma_buffers_count;
352 uint32_t max_inline_data;
409 WV_PORT_ATTRIBUTES ib_port_attr;
441 int mca_btl_wv_register_error_cb(
454 extern int mca_btl_wv_finalize(
471 extern int mca_btl_wv_add_procs(
489 extern int mca_btl_wv_del_procs(
505 extern int mca_btl_wv_send(
509 mca_btl_base_tag_t tag
534 mca_btl_base_tag_t tag,
545 extern int mca_btl_wv_put(
558 extern int mca_btl_wv_get(
585 extern int mca_btl_wv_free(
624 extern void mca_btl_wv_frag_progress_pending_put_get(
633 extern int mca_btl_wv_ft_event(
int state);
640 void mca_btl_wv_show_init_error(
const char *file,
int line,
641 const char *func,
const char *dev);
643 #define BTL_WV_HP_CQ 0
644 #define BTL_WV_LP_CQ 1
662 const char* btl_wv_get_transport_name(mca_btl_wv_transport_type_t transport_type);
670 static inline int qp_cq_prio(
const int qp)
677 return (mca_btl_wv_component.
qp_infos[qp].size <=
679 BTL_WV_HP_CQ : BTL_WV_LP_CQ;
682 #define BTL_WV_RDMA_QP(QP) \
683 ((QP) == mca_btl_wv_component.rdma_qp)
ompi_free_list_t recv_user_free
frags for coalesced massages
Definition: btl_wv.h:268
opal_mutex_t ib_lock
module level lock
Definition: btl_wv.h:416
IB BTL Interface.
Definition: btl_wv.h:391
int apm_port
Alternative port that may be used for APM.
Definition: btl_wv.h:411
struct mca_btl_base_endpoint_t ** eager_rdma_buffers
frags for control massages
Definition: btl_wv.h:346
int32_t rd_curr_num
The number of receive buffers that can be post in the current time.
Definition: btl_wv.h:368
Definition: btl_wv_def.h:126
ompi_free_list_t send_free
free lists of send buffer descriptors
Definition: btl_wv.h:318
A descriptor that holds the parameters to a send/put/get operation along w/ a callback routine that i...
Definition: btl.h:275
OPAL output stream facility.
mca_btl_wv_modex_message_t port_info
Common information about all ports.
Definition: btl_wv.h:398
opal_list_t pending_frags[2]
list of high/low prio frags
Definition: btl_wv.h:365
opal_event_t ib_recv_event
lock for accessing module state
Definition: btl_wv.h:169
dynamic pointer array
Definition: opal_pointer_array.h:45
int32_t apm_ports
Preferred communication buffer alignment in Bytes (must be power of two)
Definition: btl_wv.h:222
uint16_t apm_lid
APM LID for this port.
Definition: btl_wv.h:293
Definition: opal_bitmap.h:53
bool enable_srq_resize
Whether we want a dynamically resizing srq, enabled by default.
Definition: btl_wv.h:274
uint8_t num_cpcs
Number of elements in the cpcs array.
Definition: btl_wv.h:404
bool verbose
Whether we're in verbose mode or not.
Definition: btl_wv.h:242
mca_btl_base_module_error_cb_fn_t error_cb
error handler
Definition: btl_wv.h:421
uint16_t lid
lid that is actually used (for LMC)
Definition: btl_wv.h:410
Structure to represent a single event.
Definition: event_struct.h:87
size_t max_send_size
Maximum message size for RDMA protocols in Bytes.
Definition: btl_wv.h:189
ompi_free_list_t recv_free
free lists of receive buffer descriptors
Definition: btl_wv.h:319
size_t eager_limit
Maximum send size, in Bytes.
Definition: btl_wv.h:187
uint32_t use_srq
Max outstanding CQE on the CQ.
Definition: btl_wv.h:195
void(* mca_btl_base_module_error_cb_fn_t)(struct mca_btl_base_module_t *btl, int32_t flags, struct ompi_proc_t *errproc, char *btlinfo)
Callback function that is called asynchronously on receipt of an error from the transport layer...
Definition: btl.h:538
Definition: mutex_unix.h:53
uint64_t subnet_id
The subnet ID of this port.
Definition: btl_wv.h:289
char ** if_list
Dummy argv-style list; a copy of names from the if_[in|ex]clude list that we use for error checking (...
Definition: btl_wv.h:256
mca_btl_wv_qp_info_t * qp_infos
Eager send limit of first fragment, in Bytes.
Definition: btl_wv.h:184
volatile int32_t eager_rdma_channels
number of open RDMA channels
Definition: btl_wv.h:419
See opal_bitmap.h for an explanation of why there is a split between OPAL and ORTE for this generic c...
Remote Open MPI process structure.
Definition: proc.h:56
uint8_t src_path_bits
offset from base lid (for LMC)
Definition: btl_wv.h:412
bool cpc_explicitly_defined
free list of frags only; used for pining user memory
Definition: btl_wv.h:264
int32_t max_hw_msg_size
Length of the registration cache most recently used list.
Definition: btl_wv.h:191
uint8_t num_pp_qps
number of srq qp's
Definition: btl_wv.h:178
opal_event_t ib_send_event
event structure for recvs
Definition: btl_wv.h:166
int ib_num_btls
array of available BTLs
Definition: btl_wv.h:144
opal_pointer_array_t * endpoints
< number of btls using this device
Definition: btl_wv.h:339
ompi_btl_wv_connect_base_module_t ** cpcs
Array of CPCs on this port.
Definition: btl_wv.h:401
int ib_max_btls
number of devices available to the wv component
Definition: btl_wv.h:141
Definition: btl_wv_def.h:62
size_t eager_rdma_frag_size
length of eager frag
Definition: btl_wv.h:418
uint8_t end
Dummy field used to calculate the real length.
Definition: btl_wv.h:303
char * ib_mpool_name
number of pp qp's
Definition: btl_wv.h:175
uint32_t ib_cq_size[2]
Max size of inline data.
Definition: btl_wv.h:198
opal_list_t ib_procs
event structure for sends
Definition: btl_wv.h:163
Byte Transfer Layer (BTL)
Definition: btl_wv_def.h:40
bool srq_limit_event_flag
In difference of the "--mca enable_srq_resize" parameter that says, if we want(or no) to start with s...
Definition: btl_wv.h:373
int devices_count
initial size of free lists
Definition: btl_wv.h:151
struct mca_btl_wv_module_t ** wv_btls
array of available devices
Definition: btl_wv.h:147
int ib_free_list_num
maximum size of free lists
Definition: btl_wv.h:154
Definition: ompi_free_list.h:39
A hash table that may be indexed with either fixed length (e.g.
ompi_free_list_t send_user_free
free list of frags only; used for pining user memory
Definition: btl_wv.h:266
char * device_params_file_names
Colon-delimited list of filenames for device parameters.
Definition: btl_wv.h:239
bool warn_nonexistent_if
Whether we want a warning if the user specifies a non-existent device and/or port via btl_wv_if_[in|e...
Definition: btl_wv.h:252
State of ELAN endpoint connection.
Definition: btl_elan_endpoint.h:33
uint32_t vendor_id
vendor id define device type and tuning
Definition: btl_wv.h:297
BTL component descriptor.
Definition: btl.h:411
Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana University Research and Techno...
Base object.
Definition: opal_object.h:182
Definition: opal_convertor.h:90
int ib_free_list_inc
list of ib proc structures
Definition: btl_wv.h:160
Definition: btl_wv_def.h:56
Definition: opal_list.h:147
Definition: btl_wv_def.h:33
mca_btl_base_component_2_0_0_t super
< base BTL component
Definition: btl_wv.h:138
char * default_recv_qps
Default receive queues.
Definition: btl_wv.h:272
bool warn_default_gid_prefix
Whether we want a warning if non default GID prefix is not configured on multiport setup...
Definition: btl_wv.h:249
int32_t rd_low_local
We post additional WQEs only if a number of WQEs (in specific SRQ) is less of this value...
Definition: btl_wv.h:371
uint8_t port_num
ID of the PORT.
Definition: btl_wv.h:407
uint16_t lid
LID of this port.
Definition: btl_wv.h:291
opal_mutex_t ib_lock
name of ib memory pool
Definition: btl_wv.h:172
uint32_t reg_mru_len
Use the Shared Receive Queue (SRQ mode)
Definition: btl_wv.h:193
Struct for holding CPC module and associated meta data.
Definition: connect.h:328
int ib_free_list_max
number of elements to alloc when growing free lists
Definition: btl_wv.h:157
uint8_t transport_type
Transport type of remote port.
Definition: btl_wv.h:301
bool warn_no_device_params_found
Whether we want a warning if no device-specific parameters are found in INI files.
Definition: btl_wv.h:246
BTL module interface functions and attributes.
Definition: btl.h:786
uint32_t vendor_part_id
vendor part id define device type and tuning
Definition: btl_wv.h:299
int32_t use_eager_rdma
After this number of msg, use RDMA for short messages, always.
Definition: btl_wv.h:214
Definition: btl_wv_def.h:93
Common information for all ports that is sent in the modex message.
Definition: btl_wv.h:287
uint8_t num_srq_qps
total number of qp's
Definition: btl_wv.h:180
#define OBJ_CLASS_DECLARATION(NAME)
Declaration for class descriptor.
Definition: opal_object.h:236
mpool module descriptor.
Definition: mpool.h:174
uint8_t mtu
The MTU used by this port.
Definition: btl_wv.h:295