OpenMPI  0.1.1
btl_vader_fbox.h
1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3  * Copyright (c) 2011-2012 Los Alamos National Security, LLC.
4  * All rights reserved.
5  * $COPYRIGHT$
6  *
7  * Additional copyrights may follow
8  *
9  * $HEADER$
10  */
11 
12 #if !defined(MCA_BTL_VADER_FBOX_H)
13 #define MCA_BTL_VADER_FBOX_H
14 
15 #include "btl_vader.h"
16 #include "btl_vader_endpoint.h"
17 
18 /* XXX -- FIXME -- make no assumptions if possible */
19 /* Assumptions: page size: 4096, cache line: 64 or 128 bytes, tag = 1 byte */
20 #define FBOX_SIZE 128 /* 2-4 cache lines */
21 #define LAST_FBOX 31 /* page size assumtion: 4096 */
22 #define MAX_MSG 126 /* 1 byte used each for size and tag */
23 
24 enum {MCA_BTL_VADER_FBOX_FREE = 0xfe, MCA_BTL_VADER_FBOX_RESERVED = 0xff};
25 
26 #define MCA_BTL_VADER_FBOX_OUT_PTR(peer_smp_rank, fbox) \
27  (mca_btl_vader_component.vader_fboxes_out[peer_smp_rank] + FBOX_SIZE * (fbox))
28 
29 #define MCA_BTL_VADER_FBOX_IN_PTR(peer_smp_rank, fbox) \
30  (mca_btl_vader_component.vader_fboxes_in[peer_smp_rank] + FBOX_SIZE * (fbox))
31 
32 static inline unsigned char *mca_btl_vader_reserve_fbox (int peer_smp_rank, size_t size)
33 {
34  int next_fbox = mca_btl_vader_component.vader_next_fbox_out[peer_smp_rank];
35  unsigned char *fbox = MCA_BTL_VADER_FBOX_OUT_PTR(peer_smp_rank, next_fbox);
36 
37  /* todo -- need thread locks/atomics here for the multi-threaded case */
38  if (OPAL_UNLIKELY(size > MAX_MSG || fbox[0] != MCA_BTL_VADER_FBOX_FREE)) {
39  /* fall back on fifo */
40  return NULL;
41  }
42 
43  mca_btl_vader_component.vader_next_fbox_out[peer_smp_rank] = (next_fbox + 1) & LAST_FBOX;
44 
45  /* mark this fast box as in use */
46  fbox[0] = MCA_BTL_VADER_FBOX_RESERVED;
47 
48  return fbox + 2;
49 }
50 
51 static inline void mca_btl_vader_fbox_send (unsigned char *fbox, unsigned char tag, size_t size)
52 {
53  fbox[-1] = tag;
54 
55  /* ensure data writes have completed before we mark the data as available */
56  opal_atomic_wmb ();
57 
58  fbox[-2] = size;
59 }
60 
61 static inline int mca_btl_vader_fbox_sendi (struct mca_btl_base_endpoint_t *endpoint, char tag,
62  void *header, size_t header_size,
63  void *payload, size_t payload_size)
64 {
65  unsigned char *fbox;
66 
67  fbox = mca_btl_vader_reserve_fbox(endpoint->peer_smp_rank, header_size + payload_size);
68  if (OPAL_UNLIKELY(NULL == fbox)) {
69  return 0;
70  }
71 
72  memcpy (fbox, header, header_size);
73  if (OPAL_UNLIKELY(payload)) {
74  /* inline sends are typically just pml headers (due to MCA_BTL_FLAGS_SEND_INPLACE) */
75  memcpy (fbox + header_size, payload, payload_size);
76  }
77 
78  /* mark the fbox as sent */
79  mca_btl_vader_fbox_send (fbox, tag, header_size + payload_size);
80 
81  /* send complete */
82  return 1;
83 }
84 
85 static inline void mca_btl_vader_check_fboxes (void)
86 {
87  int my_smp_rank = mca_btl_vader_component.my_smp_rank;
90  unsigned char size, tag;
91  int i;
92 
93  for (i = 0 ; i < mca_btl_vader_component.num_smp_procs ; ++i) {
94  int next_fbox = mca_btl_vader_component.vader_next_fbox_in[i];
95  unsigned char *fbox = MCA_BTL_VADER_FBOX_IN_PTR(i, next_fbox);
96 
97  if (my_smp_rank == i) {
98  continue;
99  }
100 
101  /* process all fast-box messages */
102  while (0xfe != ((size = fbox[0]) & 0xfe)) {
103  opal_atomic_rmb ();
104 
105  tag = fbox[1];
106 
107  reg = mca_btl_base_active_message_trigger + tag;
108 
109  frag.segment.seg_addr.pval = fbox + 2;
110  frag.segment.seg_len = size;
111 
112  frag.base.des_dst = &frag.segment;
113  frag.base.des_dst_cnt = 1;
114  reg->cbfunc(&mca_btl_vader.super, tag, &(frag.base), reg->cbdata);
115 
116  fbox[0] = MCA_BTL_VADER_FBOX_FREE;
117 
118  next_fbox = next_fbox == LAST_FBOX ? 0 : next_fbox + 1;
119  fbox = MCA_BTL_VADER_FBOX_IN_PTR(i, next_fbox);
120  }
121 
122  mca_btl_vader_component.vader_next_fbox_in[i] = next_fbox;
123  }
124 }
125 
126 #endif /* !defined(MCA_BTL_VADER_FBOX_H) */
size_t des_dst_cnt
number of destination segments
Definition: btl.h:280
void opal_atomic_rmb(void)
Read memory barrier.
int32_t num_smp_procs
current number of smp procs on this host
Definition: btl_vader.h:110
unsigned char * vader_next_fbox_out
indices of fast boxes to write
Definition: btl_vader.h:122
ompi_ptr_t seg_addr
Address of the memory.
Definition: btl.h:238
int peer_smp_rank
My peer's SMP process rank.
Definition: btl_sm_endpoint.h:34
unsigned char * vader_next_fbox_in
indices of fast boxes to poll
Definition: btl_vader.h:121
mca_btl_base_module_t super
base BTL interface
Definition: btl_vader.h:133
int32_t my_smp_rank
My SMP process rank.
Definition: btl_vader.h:111
uint32_t seg_len
Length in bytes.
Definition: btl.h:240
shared memory send fragment derived type.
Definition: btl_vader_frag.h:47
State of ELAN endpoint connection.
Definition: btl_elan_endpoint.h:33
void opal_atomic_wmb(void)
Write memory barrier.
mca_btl_base_segment_t * des_dst
destination segments
Definition: btl.h:279