OpenMPI  0.1.1
atomic.h
1 /*
2  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3  * University Research and Technology
4  * Corporation. All rights reserved.
5  * Copyright (c) 2004-2005 The University of Tennessee and The University
6  * of Tennessee Research Foundation. All rights
7  * reserved.
8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9  * University of Stuttgart. All rights reserved.
10  * Copyright (c) 2004-2005 The Regents of the University of California.
11  * All rights reserved.
12  * Copyright (c) 2010 IBM Corporation. All rights reserved.
13  * $COPYRIGHT$
14  *
15  * Additional copyrights may follow
16  *
17  * $HEADER$
18  */
19 
20 #ifndef OMPI_SYS_ARCH_ATOMIC_H
21 #define OMPI_SYS_ARCH_ATOMIC_H 1
22 
23 /*
24  * On powerpc ...
25  */
26 
27 #if OPAL_WANT_SMP_LOCKS
28 
29 #define MB() __asm__ __volatile__ ("sync" : : : "memory")
30 #define RMB() __asm__ __volatile__ ("lwsync" : : : "memory")
31 #define WMB() __asm__ __volatile__ ("eieio" : : : "memory")
32 #define SMP_SYNC "sync \n\t"
33 #define SMP_ISYNC "\n\tisync"
34 
35 #else
36 
37 #define MB()
38 #define RMB()
39 #define WMB()
40 #define SMP_SYNC ""
41 #define SMP_ISYNC
42 
43 #endif
44 
45 
46 /**********************************************************************
47  *
48  * Define constants for PowerPC 32
49  *
50  *********************************************************************/
51 #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
52 
53 #define OPAL_HAVE_ATOMIC_CMPSET_32 1
54 
55 #define OPAL_HAVE_ATOMIC_MATH_32 1
56 #define OPAL_HAVE_ATOMIC_ADD_32 1
57 #define OPAL_HAVE_ATOMIC_SUB_32 1
58 
59 
60 #if (OPAL_ASSEMBLY_ARCH == OMPI_POWERPC64) || OPAL_ASM_SUPPORT_64BIT
61 #define OPAL_HAVE_ATOMIC_CMPSET_64 1
62 #endif
63 
64 
65 /**********************************************************************
66  *
67  * Memory Barriers
68  *
69  *********************************************************************/
70 #if OMPI_GCC_INLINE_ASSEMBLY
71 
72 static inline
73 void opal_atomic_mb(void)
74 {
75  MB();
76 }
77 
78 
79 static inline
80 void opal_atomic_rmb(void)
81 {
82  RMB();
83 }
84 
85 
86 static inline
87 void opal_atomic_wmb(void)
88 {
89  WMB();
90 }
91 
92 #elif OMPI_XLC_INLINE_ASSEMBLY /* end OMPI_GCC_INLINE_ASSEMBLY */
93 
94 /* Yeah, I don't know who thought this was a reasonable syntax for
95  * inline assembly. Do these because they are used so often and they
96  * are fairly simple (aka: there is a tech pub on IBM's web site
97  * containing the right hex for the instructions).
98  */
99 
100 #undef OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER
101 #define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 0
102 
103 #pragma mc_func opal_atomic_mb { "7c0004ac" } /* sync */
104 #pragma reg_killed_by opal_atomic_mb /* none */
105 
106 #pragma mc_func opal_atomic_rmb { "7c2004ac" } /* lwsync */
107 #pragma reg_killed_by opal_atomic_rmb /* none */
108 
109 #pragma mc_func opal_atomic_wmb { "7c0006ac" } /* eieio */
110 #pragma reg_killed_by opal_atomic_wmb /* none */
111 
112 #endif
113 
114 /**********************************************************************
115  *
116  * Atomic math operations
117  *
118  *********************************************************************/
119 #if OMPI_GCC_INLINE_ASSEMBLY
120 
121 static inline int opal_atomic_cmpset_32(volatile int32_t *addr,
122  int32_t oldval, int32_t newval)
123 {
124  int32_t ret;
125 
126  __asm__ __volatile__ (
127  "1: lwarx %0, 0, %2 \n\t"
128  " cmpw 0, %0, %3 \n\t"
129  " bne- 2f \n\t"
130  " stwcx. %4, 0, %2 \n\t"
131  " bne- 1b \n\t"
132  "2:"
133  : "=&r" (ret), "=m" (*addr)
134  : "r" (addr), "r" (oldval), "r" (newval), "m" (*addr)
135  : "cc", "memory");
136 
137  return (ret == oldval);
138 }
139 
140 /* these two functions aren't inlined in the non-gcc case because then
141  there would be two function calls (since neither cmpset_32 nor
142  atomic_?mb can be inlined). Instead, we "inline" them by hand in
143  the assembly, meaning there is one function call overhead instead
144  of two */
145 static inline int opal_atomic_cmpset_acq_32(volatile int32_t *addr,
146  int32_t oldval, int32_t newval)
147 {
148  int rc;
149 
150  rc = opal_atomic_cmpset_32(addr, oldval, newval);
151  opal_atomic_rmb();
152 
153  return rc;
154 }
155 
156 
157 static inline int opal_atomic_cmpset_rel_32(volatile int32_t *addr,
158  int32_t oldval, int32_t newval)
159 {
160  opal_atomic_wmb();
161  return opal_atomic_cmpset_32(addr, oldval, newval);
162 }
163 
164 #endif /* OMPI_GCC_INLINE_ASSEMBLY */
165 
166 
167 #if (OPAL_ASSEMBLY_ARCH == OMPI_POWERPC64)
168 
169 #if OMPI_GCC_INLINE_ASSEMBLY
170 static inline int opal_atomic_cmpset_64(volatile int64_t *addr,
171  int64_t oldval, int64_t newval)
172 {
173  int64_t ret;
174 
175  __asm__ __volatile__ (
176  "1: ldarx %0, 0, %2 \n\t"
177  " cmpd 0, %0, %3 \n\t"
178  " bne- 2f \n\t"
179  " stdcx. %4, 0, %2 \n\t"
180  " bne- 1b \n\t"
181  "2:"
182  : "=&r" (ret), "=m" (*addr)
183  : "r" (addr), "r" (oldval), "r" (newval), "m" (*addr)
184  : "cc", "memory");
185 
186  return (ret == oldval);
187 }
188 
189 /* these two functions aren't inlined in the non-gcc case because then
190  there would be two function calls (since neither cmpset_64 nor
191  atomic_?mb can be inlined). Instead, we "inline" them by hand in
192  the assembly, meaning there is one function call overhead instead
193  of two */
194 static inline int opal_atomic_cmpset_acq_64(volatile int64_t *addr,
195  int64_t oldval, int64_t newval)
196 {
197  int rc;
198 
199  rc = opal_atomic_cmpset_64(addr, oldval, newval);
200  opal_atomic_rmb();
201 
202  return rc;
203 }
204 
205 
206 static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr,
207  int64_t oldval, int64_t newval)
208 {
209  opal_atomic_wmb();
210  return opal_atomic_cmpset_64(addr, oldval, newval);
211 }
212 
213 #endif /* OMPI_GCC_INLINE_ASSEMBLY */
214 
215 #elif (OPAL_ASSEMBLY_ARCH == OMPI_POWERPC32) && OPAL_ASM_SUPPORT_64BIT
216 
217 #ifndef ll_low /* GLIBC provides these somewhere, so protect */
218 #define ll_low(x) *(((unsigned int*)&(x))+0)
219 #define ll_high(x) *(((unsigned int*)&(x))+1)
220 #endif
221 
222 #if OMPI_GCC_INLINE_ASSEMBLY
223 
224 static inline int opal_atomic_cmpset_64(volatile int64_t *addr,
225  int64_t oldval, int64_t newval)
226 {
227  int ret;
228 
229  /*
230  * We force oldval and newval into memory because PPC doesn't
231  * appear to have a way to do a move register with offset. Since
232  * this is 32-bit code, a 64 bit integer will be loaded into two
233  * registers (assuming no inlining, addr will be in r3, oldval
234  * will be in r4 and r5, and newval will be r6 and r7. We need
235  * to load the whole thing into one register. So we have the
236  * compiler push the values into memory and load the double word
237  * into registers. We use r4,r5 so that the main block of code
238  * is very similar to the pure 64 bit version.
239  */
240  __asm__ __volatile__ (
241  "ld r4,%2 \n\t"
242  "ld r5,%3 \n\t"
243  "1: ldarx r9, 0, %1 \n\t"
244  " cmpd 0, r9, r4 \n\t"
245  " bne- 2f \n\t"
246  " stdcx. r5, 0, %1 \n\t"
247  " bne- 1b \n\t"
248  "2: \n\t"
249  "xor r5,r4,r9 \n\t"
250  "subfic r9,r5,0 \n\t"
251  "adde %0,r9,r5 \n\t"
252  : "=&r" (ret)
253  : "r"(addr),
254  "m"(oldval), "m"(newval)
255  : "r4", "r5", "r9", "cc", "memory");
256 
257  return ret;
258 }
259 
260 /* these two functions aren't inlined in the non-gcc case because then
261  there would be two function calls (since neither cmpset_64 nor
262  atomic_?mb can be inlined). Instead, we "inline" them by hand in
263  the assembly, meaning there is one function call overhead instead
264  of two */
265 static inline int opal_atomic_cmpset_acq_64(volatile int64_t *addr,
266  int64_t oldval, int64_t newval)
267 {
268  int rc;
269 
270  rc = opal_atomic_cmpset_64(addr, oldval, newval);
271  opal_atomic_rmb();
272 
273  return rc;
274 }
275 
276 
277 static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr,
278  int64_t oldval, int64_t newval)
279 {
280  opal_atomic_wmb();
281  return opal_atomic_cmpset_64(addr, oldval, newval);
282 }
283 
284 #endif /* OMPI_GCC_INLINE_ASSEMBLY */
285 
286 #endif /* OPAL_ASM_SUPPORT_64BIT */
287 
288 
289 #if OMPI_GCC_INLINE_ASSEMBLY
290 
291 static inline int32_t opal_atomic_add_32(volatile int32_t* v, int inc)
292 {
293  int32_t t;
294 
295  __asm__ __volatile__(
296  "1: lwarx %0, 0, %3 \n\t"
297  " add %0, %2, %0 \n\t"
298  " stwcx. %0, 0, %3 \n\t"
299  " bne- 1b \n\t"
300  : "=&r" (t), "=m" (*v)
301  : "r" (inc), "r" (v), "m" (*v)
302  : "cc");
303 
304  return t;
305 }
306 
307 
308 static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int dec)
309 {
310  int32_t t;
311 
312  __asm__ __volatile__(
313  "1: lwarx %0,0,%3 \n\t"
314  " subf %0,%2,%0 \n\t"
315  " stwcx. %0,0,%3 \n\t"
316  " bne- 1b \n\t"
317  : "=&r" (t), "=m" (*v)
318  : "r" (dec), "r" (v), "m" (*v)
319  : "cc");
320 
321  return t;
322 }
323 
324 
325 #endif /* OMPI_GCC_INLINE_ASSEMBLY */
326 
327 #endif /* ! OMPI_SYS_ARCH_ATOMIC_H */
void opal_atomic_rmb(void)
Read memory barrier.
void opal_atomic_mb(void)
Memory barrier.
void opal_atomic_wmb(void)
Write memory barrier.