23 #ifndef OMPI_PML_CSUM_RECV_REQUEST_H
24 #define OMPI_PML_CSUM_RECV_REQUEST_H
40 size_t req_pipeline_depth;
43 size_t req_rdma_offset;
44 size_t req_send_offset;
45 uint32_t req_rdma_cnt;
46 uint32_t req_rdma_idx;
73 #define MCA_PML_CSUM_RECV_REQUEST_ALLOC(recvreq, rc) \
75 ompi_free_list_item_t* item; \
77 OMPI_FREE_LIST_GET(&mca_pml_base_recv_requests, item, rc); \
78 recvreq = (mca_pml_csum_recv_request_t*)item; \
94 #define MCA_PML_CSUM_RECV_REQUEST_INIT( request, \
103 MCA_PML_BASE_RECV_REQUEST_INIT( &(request)->req_recv, \
118 #define MCA_PML_CSUM_RECV_REQUEST_MPI_COMPLETE( recvreq ) \
120 PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \
121 &(recvreq->req_recv.req_base), PERUSE_RECV ); \
122 ompi_request_complete( &(recvreq->req_recv.req_base.req_ompi), true ); \
128 #define MCA_PML_CSUM_RECV_REQUEST_RETURN(recvreq) \
130 MCA_PML_BASE_RECV_REQUEST_FINI(&(recvreq)->req_recv); \
131 OMPI_FREE_LIST_RETURN( &mca_pml_base_recv_requests, \
132 (ompi_free_list_item_t*)(recvreq)); \
149 PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END,
150 &recvreq->req_recv.
req_base, PERUSE_RECV );
153 for(i = 0; i < recvreq->req_rdma_cnt; i++) {
155 if( NULL != btl_reg && btl_reg->mpool != NULL) {
159 recvreq->req_rdma_cnt = 0;
163 MCA_PML_CSUM_RECV_REQUEST_RETURN(recvreq);
183 #if OPAL_ENABLE_MULTI_THREADS
188 lock_recv_request(recvreq)) {
197 #define MCA_PML_CSUM_RECV_REQUEST_START(r) mca_pml_csum_recv_req_start(r)
202 opal_convertor_copy_and_prepare_for_recv(
214 #define MCA_PML_CSUM_RECV_REQUEST_MATCHED(request, hdr) \
215 recv_req_matched(request, hdr)
223 #if OPAL_ENABLE_MULTI_THREADS
229 prepare_recv_req_converter(req);
231 PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_XFER_BEGIN,
232 &req->req_recv.
req_base, PERUSE_RECV);
241 #define MCA_PML_CSUM_RECV_REQUEST_UNPACK( request, \
249 bytes_delivered = 0; \
250 if(request->req_recv.req_bytes_packed > 0) { \
251 struct iovec iov[MCA_BTL_DES_MAX_SEGMENTS]; \
252 uint32_t iov_count = 0; \
253 size_t max_data = bytes_received; \
254 size_t n, offset = seg_offset; \
255 mca_btl_base_segment_t* segment = segments; \
257 OPAL_THREAD_LOCK(&request->lock); \
258 for( n = 0; n < num_segments; n++, segment++ ) { \
259 if(offset >= segment->seg_len) { \
260 offset -= segment->seg_len; \
262 iov[iov_count].iov_len = segment->seg_len - offset; \
263 iov[iov_count].iov_base = (IOVBASE_TYPE*) \
264 ((unsigned char*)segment->seg_addr.pval + offset); \
269 PERUSE_TRACE_COMM_OMPI_EVENT (PERUSE_COMM_REQ_XFER_CONTINUE, \
270 &(recvreq->req_recv.req_base), max_data, \
272 opal_convertor_set_position( &(request->req_recv.req_base.req_convertor), \
274 opal_convertor_unpack( &(request)->req_recv.req_base.req_convertor, \
278 bytes_delivered = max_data; \
279 OPAL_THREAD_UNLOCK(&request->lock); \
288 void mca_pml_csum_recv_request_progress_match(
292 size_t num_segments);
298 void mca_pml_csum_recv_request_progress_frag(
302 size_t num_segments);
312 size_t num_segments);
318 void mca_pml_csum_recv_request_progress_rget(
322 size_t num_segments);
332 size_t num_segments);
338 int mca_pml_csum_recv_request_schedule_once(
341 static inline int mca_pml_csum_recv_request_schedule_exclusive(
348 rc = mca_pml_csum_recv_request_schedule_once(req, start_bml_btl);
349 if(OPAL_SOS_GET_ERROR_CODE(rc) == OMPI_ERR_OUT_OF_RESOURCE)
351 }
while(!unlock_recv_request(req));
353 if(OMPI_SUCCESS == rc)
354 recv_request_pml_complete_check(req);
359 static inline void mca_pml_csum_recv_request_schedule(
363 if(!lock_recv_request(req))
366 (void)mca_pml_csum_recv_request_schedule_exclusive(req, start_bml_btl);
369 #define MCA_PML_CSUM_ADD_ACK_TO_PENDING(P, S, D, O) \
371 mca_pml_csum_pckt_pending_t *_pckt; \
374 MCA_PML_CSUM_PCKT_PENDING_ALLOC(_pckt,_rc); \
375 _pckt->hdr.hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_ACK; \
376 _pckt->hdr.hdr_ack.hdr_src_req.lval = (S); \
377 _pckt->hdr.hdr_ack.hdr_dst_req.pval = (D); \
378 _pckt->hdr.hdr_ack.hdr_send_offset = (O); \
380 _pckt->bml_btl = NULL; \
381 OPAL_THREAD_LOCK(&mca_pml_csum.lock); \
382 opal_list_append(&mca_pml_csum.pckt_pending, \
383 (opal_list_item_t*)_pckt); \
384 OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); \
387 int mca_pml_csum_recv_request_ack_send_btl(
ompi_proc_t* proc,
389 uint64_t hdr_rdma_offset,
bool nordma);
391 static inline int mca_pml_csum_recv_request_ack_send(
ompi_proc_t* proc,
392 uint64_t hdr_src_req,
void *hdr_dst_req, uint64_t hdr_send_offset,
402 if(mca_pml_csum_recv_request_ack_send_btl(proc, bml_btl, hdr_src_req,
403 hdr_dst_req, hdr_send_offset, nordma) == OMPI_SUCCESS)
407 MCA_PML_CSUM_ADD_ACK_TO_PENDING(proc, hdr_src_req, hdr_dst_req,
410 return OMPI_ERR_OUT_OF_RESOURCE;
418 void mca_pml_csum_recv_request_process_pending(
void);
void mca_pml_csum_recv_request_progress_rndv(mca_pml_csum_recv_request_t *req, struct mca_btl_base_module_t *btl, mca_btl_base_segment_t *segments, size_t num_segments)
Definition: pml_csum_recvreq.c:593
#define OPAL_THREAD_ADD32(x, y)
Use an atomic operation for increment/decrement if opal_using_threads() indicates that threads are in...
Definition: mutex.h:367
struct mca_bml_base_endpoint_t * proc_bml
BML specific proc data.
Definition: proc.h:64
void mca_pml_csum_recv_request_matched_probe(mca_pml_csum_recv_request_t *req, struct mca_btl_base_module_t *btl, mca_btl_base_segment_t *segments, size_t num_segments)
Handle completion of a probe request.
Definition: pml_csum_recvreq.c:739
#define MCA_PML_CSUM_RECV_REQUEST_MPI_COMPLETE(recvreq)
Mark the request as completed at MPI level for internal purposes.
Definition: pml_csum_recvreq.h:118
size_t req_bytes_expected
local size of the data as suggested by the user
Definition: pml_csum_recvreq.h:42
int32_t hdr_tag
user tag
Definition: pml_csum_hdr.h:78
ompi_status_public_t req_status
Completion status.
Definition: request.h:103
void opal_atomic_rmb(void)
Read memory barrier.
size_t req_count
count of user datatype elements
Definition: pml_base_request.h:70
bool req_match_received
Prevent request to be completed prematurely.
Definition: pml_csum_recvreq.h:49
Header definition for the first fragment, contains the attributes required to match the corresponding...
Definition: pml_csum_hdr.h:73
Definition: pml_csum_recvreq.h:36
Definition: pml_csum.h:297
mca_pml_base_request_t req_base
base request
Definition: pml_base_recvreq.h:37
Definition: mutex_unix.h:53
size_t req_bytes_received
amount of data transferred into the user buffer
Definition: pml_csum_recvreq.h:41
int32_t req_peer
peer process - rank w/in this communicator
Definition: pml_base_request.h:71
Process identification structure interface.
Remote Open MPI process structure.
Definition: proc.h:56
void mca_pml_csum_recv_req_start(mca_pml_csum_recv_request_t *req)
Definition: pml_csum_recvreq.c:1024
#define OPAL_THREAD_LOCK(mutex)
Lock a mutex if opal_using_threads() says that multiple threads may be active in the process...
Definition: mutex.h:223
#define OPAL_THREAD_UNLOCK(mutex)
Unlock a mutex if opal_using_threads() says that multiple threads may be active in the process...
Definition: mutex.h:309
opal_convertor_t req_convertor
always need the convertor
Definition: pml_base_request.h:66
int32_t hdr_src
source rank
Definition: pml_csum_hdr.h:77
volatile bool req_pml_complete
flag indicating if the pt-2-pt layer is done with this request
Definition: pml_base_request.h:61
opal_datatype_t super
Base opal_datatype_t superclass.
Definition: ompi_datatype.h:69
static void recv_request_pml_complete(mca_pml_csum_recv_request_t *recvreq)
Complete receive request.
Definition: pml_csum_recvreq.h:142
struct ompi_proc_t * req_proc
peer process
Definition: pml_base_request.h:73
size_t size
total size in bytes of the memory used by the data if the data is put on a contiguous buffer ...
Definition: opal_datatype.h:108
mca_bml_base_btl_array_t btl_eager
array of btls to use for first fragments
Definition: bml.h:228
Definition: pml_csum_rdmafrag.h:35
mca_mpool_base_module_deregister_fn_t mpool_deregister
deregister memory
Definition: mpool.h:181
struct ompi_datatype_t * req_datatype
pointer to data type
Definition: pml_base_request.h:64
Structure associated w/ ompi_proc_t that contains the set of BTLs used to reach a destination...
Definition: bml.h:222
void * req_addr
pointer to application buffer
Definition: pml_base_request.h:69
void opal_atomic_wmb(void)
Write memory barrier.
struct opal_convertor_t * proc_convertor
Base convertor for the proc described by this process.
Definition: proc.h:70
Base type for receive requests.
Definition: pml_base_recvreq.h:36
ompi_request_t req_ompi
base request
Definition: pml_base_request.h:60
static size_t mca_bml_base_btl_array_get_size(mca_bml_base_btl_array_t *array)
If required, reallocate (grow) the array to the indicate size.
Definition: bml.h:91
BTL module interface functions and attributes.
Definition: btl.h:786
bool req_ack_sent
whether ack was sent to the sender
Definition: pml_csum_recvreq.h:48
size_t req_bytes_packed
size of message being received
Definition: pml_base_recvreq.h:38
Describes a region/segment of memory that is addressable by an BTL.
Definition: btl.h:236
#define OBJ_CLASS_DECLARATION(NAME)
Declaration for class descriptor.
Definition: opal_object.h:236
volatile bool req_free_called
flag indicating if the user has freed this request
Definition: pml_base_request.h:65
static mca_bml_base_btl_t * mca_bml_base_btl_array_get_next(mca_bml_base_btl_array_t *array)
Return the next LRU index in the array.
Definition: bml.h:179