OpenMPI  0.1.1
opal_convertor.h
1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 /*
3  * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
4  * University Research and Technology
5  * Corporation. All rights reserved.
6  * Copyright (c) 2004-2009 The University of Tennessee and The University
7  * of Tennessee Research Foundation. All rights
8  * reserved.
9  * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
10  * University of Stuttgart. All rights reserved.
11  * Copyright (c) 2004-2006 The Regents of the University of California.
12  * All rights reserved.
13  * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
14  * $COPYRIGHT$
15  *
16  * Additional copyrights may follow
17  *
18  * $HEADER$
19  */
20 
21 #ifndef OPAL_CONVERTOR_H_HAS_BEEN_INCLUDED
22 #define OPAL_CONVERTOR_H_HAS_BEEN_INCLUDED
23 
24 #include "opal_config.h"
25 
26 #include <stddef.h>
27 
28 #ifdef HAVE_SYS_TYPES_H
29 #include <sys/types.h>
30 #endif
31 #ifdef HAVE_SYS_UIO_H
32 #include <sys/uio.h>
33 #endif
34 #ifdef HAVE_NET_UIO_H
35 #include <net/uio.h>
36 #endif
37 #if HAVE_STRING_H
38 #include <string.h>
39 #endif
40 
41 #include "opal/constants.h"
42 #include "opal/datatype/opal_datatype.h"
43 #include "opal/prefetch.h"
44 
45 BEGIN_C_DECLS
46 /*
47  * CONVERTOR SECTION
48  */
49 /* keep the last 16 bits free for data flags */
50 #define CONVERTOR_DATATYPE_MASK 0x0000FFFF
51 #define CONVERTOR_SEND_CONVERSION 0x00010000
52 #define CONVERTOR_RECV 0x00020000
53 #define CONVERTOR_SEND 0x00040000
54 #define CONVERTOR_HOMOGENEOUS 0x00080000
55 #define CONVERTOR_NO_OP 0x00100000
56 #define CONVERTOR_WITH_CHECKSUM 0x00200000
57 #define CONVERTOR_CUDA 0x00400000
58 #define CONVERTOR_TYPE_MASK 0x00FF0000
59 #define CONVERTOR_STATE_START 0x01000000
60 #define CONVERTOR_STATE_COMPLETE 0x02000000
61 #define CONVERTOR_STATE_ALLOC 0x04000000
62 #define CONVERTOR_COMPLETED 0x08000000
63 
64 union dt_elem_desc;
65 typedef struct opal_convertor_t opal_convertor_t;
66 
67 typedef int32_t (*convertor_advance_fct_t)( opal_convertor_t* pConvertor,
68  struct iovec* iov,
69  uint32_t* out_size,
70  size_t* max_data );
71 typedef void*(*memalloc_fct_t)( size_t* pLength, void* userdata );
72 typedef void*(*memcpy_fct_t)( void* dest, const void* src, size_t n );
73 
74 /* The master convertor struct (defined in convertor_internal.h) */
76 
77 struct dt_stack_t {
78  int32_t index; /**< index in the element description */
79  int16_t type; /**< the type used for the last pack/unpack (original or OPAL_DATATYPE_UINT1) */
80  size_t count; /**< number of times we still have to do it */
81  OPAL_PTRDIFF_TYPE disp; /**< actual displacement depending on the count field */
82 };
83 typedef struct dt_stack_t dt_stack_t;
84 
85 /**
86  *
87  */
88 #define DT_STATIC_STACK_SIZE 5 /**< This should be sufficient for most applications */
89 
91  opal_object_t super; /**< basic superclass */
92  uint32_t remoteArch; /**< the remote architecture */
93  uint32_t flags; /**< the properties of this convertor */
94  size_t local_size; /**< overall length data on local machine, compared to bConverted */
95  size_t remote_size; /**< overall length data on remote machine, compared to bConverted */
96  const opal_datatype_t* pDesc; /**< the datatype description associated with the convertor */
97  const dt_type_desc_t* use_desc; /**< the version used by the convertor (normal or optimized) */
98  opal_datatype_count_t count; /**< the total number of full datatype elements */
99  uint32_t stack_size; /**< size of the allocated stack */
100  /* --- cacheline 1 boundary (64 bytes) --- */
101  unsigned char* pBaseBuf; /**< initial buffer as supplied by the user */
102  dt_stack_t* pStack; /**< the local stack for the actual conversion */
103  convertor_advance_fct_t fAdvance; /**< pointer to the pack/unpack functions */
104  struct opal_convertor_master_t* master; /**< the master convertor */
105 
106  /* All others fields get modified for every call to pack/unpack functions */
107  uint32_t stack_pos; /**< the actual position on the stack */
108  uint32_t partial_length; /**< amount of data left over from the last unpack */
109  size_t bConverted; /**< # of bytes already converted */
110  uint32_t checksum; /**< checksum computed by pack/unpack operation */
111  uint32_t csum_ui1; /**< partial checksum computed by pack/unpack operation */
112  size_t csum_ui2; /**< partial checksum computed by pack/unpack operation */
113  /* --- cacheline 2 boundary (128 bytes) --- */
114  dt_stack_t static_stack[DT_STATIC_STACK_SIZE]; /**< local stack for small datatypes */
115  /* --- cacheline 3 boundary (192 bytes) was 56 bytes ago --- */
116 
117 #if OPAL_CUDA_SUPPORT
118  memcpy_fct_t cbmemcpy; /**< memcpy or cuMemcpy */
119 #endif
120  /* size: 248, cachelines: 4, members: 20 */
121  /* last cacheline: 56 bytes */
122 };
123 OPAL_DECLSPEC OBJ_CLASS_DECLARATION( opal_convertor_t );
124 
125 
126 /*
127  *
128  */
129 static inline uint32_t opal_convertor_get_checksum( opal_convertor_t* convertor )
130 {
131  return convertor->checksum;
132 }
133 
134 
135 /*
136  *
137  */
138 OPAL_DECLSPEC int32_t opal_convertor_pack( opal_convertor_t* pConv, struct iovec* iov,
139  uint32_t* out_size, size_t* max_data );
140 
141 /*
142  *
143  */
144 OPAL_DECLSPEC int32_t opal_convertor_unpack( opal_convertor_t* pConv, struct iovec* iov,
145  uint32_t* out_size, size_t* max_data );
146 
147 /*
148  *
149  */
150 OPAL_DECLSPEC opal_convertor_t* opal_convertor_create( int32_t remote_arch, int32_t mode );
151 
152 
153 /**
154  * The cleanup function will put the convertor in exactly the same state as after a call
155  * to opal_convertor_construct. Therefore, all PML can call OBJ_DESTRUCT on the request's
156  * convertors without having to call OBJ_CONSTRUCT everytime they grab a new one from the
157  * cache. The OBJ_CONSTRUCT on the convertor should be called only on the first creation
158  * of a request (not when extracted from the cache).
159  */
160 static inline int opal_convertor_cleanup( opal_convertor_t* convertor )
161 {
162  if( OPAL_UNLIKELY(convertor->stack_size > DT_STATIC_STACK_SIZE) ) {
163  free( convertor->pStack );
164  convertor->pStack = convertor->static_stack;
165  convertor->stack_size = DT_STATIC_STACK_SIZE;
166  }
167 #if OPAL_CUDA_SUPPORT
168  convertor->cbmemcpy = &memcpy;
169 #endif
170  convertor->pDesc = NULL;
171  convertor->stack_pos = 0;
172  convertor->flags = OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED;
173 
174  return OPAL_SUCCESS;
175 }
176 
177 
178 /**
179  * Return: 0 if no packing is required for sending (the upper layer
180  * can use directly the pointer to the contiguous user
181  * buffer).
182  * 1 if data does need to be packed, i.e. heterogeneous peers
183  * (source arch != dest arch) or non contiguous memory
184  * layout.
185  */
186 static inline int32_t opal_convertor_need_buffers( const opal_convertor_t* pConvertor )
187 {
188 #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
189  if (OPAL_UNLIKELY(0 == (pConvertor->flags & CONVERTOR_HOMOGENEOUS))) return 1;
190 #endif
191 #if OPAL_CUDA_SUPPORT
192  if( pConvertor->flags & CONVERTOR_CUDA ) return 1;
193 #endif
194  if( pConvertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS ) return 0;
195  if( (pConvertor->count == 1) && (pConvertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) return 0;
196  return 1;
197 }
198 
199 
200 /*
201  *
202  */
203 static inline void opal_convertor_get_packed_size( const opal_convertor_t* pConv,
204  size_t* pSize )
205 {
206  *pSize = pConv->local_size;
207 }
208 
209 
210 /*
211  *
212  */
213 static inline void opal_convertor_get_unpacked_size( const opal_convertor_t* pConv,
214  size_t* pSize )
215 {
216  *pSize = pConv->remote_size;
217 }
218 
219 
220 /**
221  * Return the current absolute position of the next pack/unpack. This function is
222  * mostly useful for contiguous datatypes, when we need to get the pointer to the
223  * contiguous piece of memory.
224  */
225 static inline void opal_convertor_get_current_pointer( const opal_convertor_t* pConv,
226  void** position )
227 {
228  unsigned char* base = pConv->pBaseBuf + pConv->bConverted + pConv->pDesc->true_lb;
229  *position = (void*)base;
230 }
231 
232 /*
233  *
234  */
235 OPAL_DECLSPEC int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor,
236  const struct opal_datatype_t* datatype,
237  int32_t count,
238  const void* pUserBuf);
239 
240 static inline int32_t opal_convertor_copy_and_prepare_for_send( const opal_convertor_t* pSrcConv,
241  const struct opal_datatype_t* datatype,
242  int32_t count,
243  const void* pUserBuf,
244  int32_t flags,
245  opal_convertor_t* convertor )
246 {
247  convertor->remoteArch = pSrcConv->remoteArch;
248  convertor->flags = pSrcConv->flags | flags;
249  convertor->master = pSrcConv->master;
250 
251  return opal_convertor_prepare_for_send( convertor, datatype, count, pUserBuf );
252 }
253 
254 /*
255  *
256  */
257 OPAL_DECLSPEC int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
258  const struct opal_datatype_t* datatype,
259  int32_t count,
260  const void* pUserBuf );
261 static inline int32_t opal_convertor_copy_and_prepare_for_recv( const opal_convertor_t* pSrcConv,
262  const struct opal_datatype_t* datatype,
263  int32_t count,
264  const void* pUserBuf,
265  int32_t flags,
266  opal_convertor_t* convertor )
267 {
268  convertor->remoteArch = pSrcConv->remoteArch;
269  convertor->flags = (pSrcConv->flags | flags);
270  convertor->master = pSrcConv->master;
271 
272  return opal_convertor_prepare_for_recv( convertor, datatype, count, pUserBuf );
273 }
274 
275 /*
276  * Give access to the raw memory layout based on the datatype.
277  */
278 OPAL_DECLSPEC int32_t
279 opal_convertor_raw( opal_convertor_t* convertor, /* [IN/OUT] */
280  struct iovec* iov, /* [IN/OUT] */
281  uint32_t* iov_count, /* [IN/OUT] */
282  size_t* length ); /* [OUT] */
283 
284 /*
285  * Upper level does not need to call the _nocheck function directly.
286  */
287 OPAL_DECLSPEC int32_t
288 opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
289  size_t* position );
290 static inline int32_t
291 opal_convertor_set_position( opal_convertor_t* convertor,
292  size_t* position )
293 {
294  /*
295  * Do not allow the convertor to go outside the data boundaries. This test include
296  * the check for datatype with size zero as well as for convertors with a count of zero.
297  */
298  if( OPAL_UNLIKELY(convertor->local_size <= *position) ) {
299  convertor->flags |= CONVERTOR_COMPLETED;
300  convertor->bConverted = convertor->local_size;
301  *position = convertor->bConverted;
302  return OPAL_SUCCESS;
303  }
304 
305  /*
306  * If the convertor is already at the correct position we are happy.
307  */
308  if( OPAL_LIKELY((*position) == convertor->bConverted) ) return OPAL_SUCCESS;
309 
310  /* Remove the completed flag if it's already set */
311  convertor->flags &= ~CONVERTOR_COMPLETED;
312 
313  if( !(convertor->flags & CONVERTOR_WITH_CHECKSUM) &&
314  (convertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS) &&
315  (convertor->flags & (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS)) ) {
316  /* Contiguous and no checkpoint and no homogeneous unpack */
317  convertor->bConverted = *position;
318  return OPAL_SUCCESS;
319  }
320 
321  return opal_convertor_set_position_nocheck( convertor, position );
322 }
323 
324 /*
325  *
326  */
327 static inline int32_t
328 opal_convertor_personalize( opal_convertor_t* convertor,
329  uint32_t flags,
330  size_t* position )
331 {
332  convertor->flags |= flags;
333 
334  if( OPAL_UNLIKELY(NULL == position) )
335  return OPAL_SUCCESS;
336  return opal_convertor_set_position( convertor, position );
337 }
338 
339 /*
340  *
341  */
342 OPAL_DECLSPEC int
343 opal_convertor_clone( const opal_convertor_t* source,
344  opal_convertor_t* destination,
345  int32_t copy_stack );
346 
347 static inline int
348 opal_convertor_clone_with_position( const opal_convertor_t* source,
349  opal_convertor_t* destination,
350  int32_t copy_stack,
351  size_t* position )
352 {
353  (void)opal_convertor_clone( source, destination, copy_stack );
354  return opal_convertor_set_position( destination, position );
355 }
356 
357 /*
358  *
359  */
360 OPAL_DECLSPEC void
361 opal_convertor_dump( opal_convertor_t* convertor );
362 
363 OPAL_DECLSPEC void
364 opal_datatype_dump_stack( const dt_stack_t* pStack,
365  int stack_pos,
366  const union dt_elem_desc* pDesc,
367  const char* name );
368 
369 /*
370  *
371  */
372 OPAL_DECLSPEC int
373 opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
374  size_t* position );
375 
376 END_C_DECLS
377 
378 #endif /* OPAL_CONVERTOR_H_HAS_BEEN_INCLUDED */
uint32_t remoteArch
the remote architecture
Definition: opal_convertor.h:92
int16_t type
the type used for the last pack/unpack (original or OPAL_DATATYPE_UINT1)
Definition: opal_convertor.h:79
uint32_t stack_size
size of the allocated stack
Definition: opal_convertor.h:99
Definition: opal_datatype_internal.h:184
size_t count
number of times we still have to do it
Definition: opal_convertor.h:80
opal_object_t super
basic superclass
Definition: opal_convertor.h:91
Definition: opal_datatype.h:92
size_t remote_size
overall length data on remote machine, compared to bConverted
Definition: opal_convertor.h:95
convertor_advance_fct_t fAdvance
pointer to the pack/unpack functions
Definition: opal_convertor.h:103
dt_stack_t static_stack[DT_STATIC_STACK_SIZE]
local stack for small datatypes
Definition: opal_convertor.h:114
unsigned char * pBaseBuf
initial buffer as supplied by the user
Definition: opal_convertor.h:101
uint32_t checksum
checksum computed by pack/unpack operation
Definition: opal_convertor.h:110
uint32_t partial_length
amount of data left over from the last unpack
Definition: opal_convertor.h:108
const opal_datatype_t * pDesc
the datatype description associated with the convertor
Definition: opal_convertor.h:96
uint32_t csum_ui1
partial checksum computed by pack/unpack operation
Definition: opal_convertor.h:111
Definition: opal_convertor.h:77
Definition: opal_convertor_internal.h:31
dt_stack_t * pStack
the local stack for the actual conversion
Definition: opal_convertor.h:102
Definition: opal_datatype.h:103
Definition: ompi_uio.h:29
uint32_t stack_pos
the actual position on the stack
Definition: opal_convertor.h:107
size_t csum_ui2
partial checksum computed by pack/unpack operation
Definition: opal_convertor.h:112
Base object.
Definition: opal_object.h:182
Definition: opal_convertor.h:90
size_t bConverted
of bytes already converted
Definition: opal_convertor.h:109
const dt_type_desc_t * use_desc
the version used by the convertor (normal or optimized)
Definition: opal_convertor.h:97
size_t local_size
overall length data on local machine, compared to bConverted
Definition: opal_convertor.h:94
Compiler-specific prefetch functions.
opal_datatype_count_t count
the total number of full datatype elements
Definition: opal_convertor.h:98
uint32_t flags
the properties of this convertor
Definition: opal_convertor.h:93
struct opal_convertor_master_t * master
the master convertor
Definition: opal_convertor.h:104
OPAL_PTRDIFF_TYPE true_lb
the true lb of the data without user defined lb and ub
Definition: opal_datatype.h:110
int32_t index
index in the element description
Definition: opal_convertor.h:78
OPAL_PTRDIFF_TYPE disp
actual displacement depending on the count field
Definition: opal_convertor.h:81
#define OBJ_CLASS_DECLARATION(NAME)
Declaration for class descriptor.
Definition: opal_object.h:236