OpenMPI  0.1.1
bit_ops.h
1 /*
2  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3  * University Research and Technology
4  * Corporation. All rights reserved.
5  * Copyright (c) 2004-2005 The University of Tennessee and The University
6  * of Tennessee Research Foundation. All rights
7  * reserved.
8  * Copyright (c) 2004-2011 High Performance Computing Center Stuttgart,
9  * University of Stuttgart. All rights reserved.
10  * Copyright (c) 2004-2005 The Regents of the University of California.
11  * All rights reserved.
12  * $COPYRIGHT$
13  *
14  * Additional copyrights may follow
15  *
16  * $HEADER$
17  */
18 
19 #ifndef OPAL_BIT_OPS_H
20 #define OPAL_BIT_OPS_H
21 
22 #include "opal/prefetch.h"
23 
24 /**
25  * Calculates the highest bit in an integer
26  *
27  * @param value The integer value to examine
28  * @param start Position to start looking
29  *
30  * @returns pos Position of highest-set integer or -1 if none are set.
31  *
32  * Look at the integer "value" starting at position "start", and move
33  * to the right. Return the index of the highest bit that is set to
34  * 1.
35  *
36  * WARNING: *NO* error checking is performed. This is meant to be a
37  * fast inline function.
38  * Using __builtin_clz (count-leading-zeros) uses 3 cycles instead
39  * of 17 cycles (on average value, with start=32)
40  * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
41  */
42 static inline int opal_hibit(int value, int start)
43 {
44  unsigned int mask;
45 
46 #if OPAL_C_HAVE_BUILTIN_CLZ
47  /* Only look at the part that the caller wanted looking at */
48  mask = value & ((1 << start) - 1);
49 
50  if (OPAL_UNLIKELY (0 == mask)) {
51  return -1;
52  }
53 
54  start = (8*sizeof(int)-1) - __builtin_clz(mask);
55 #else
56  --start;
57  mask = 1 << start;
58 
59  for (; start >= 0; --start, mask >>= 1) {
60  if (value & mask) {
61  break;
62  }
63  }
64 #endif
65 
66  return start;
67 }
68 
69 
70 /**
71  * Returns the cube dimension of a given value.
72  *
73  * @param value The integer value to examine
74  *
75  * @returns cubedim The smallest cube dimension containing that value
76  *
77  * Look at the integer "value" and calculate the smallest power of two
78  * dimension that contains that value.
79  *
80  * WARNING: *NO* error checking is performed. This is meant to be a
81  * fast inline function.
82  * Using __builtin_clz (count-leading-zeros) uses 3 cycles instead of 50 cycles
83  * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
84  */
85 static inline int opal_cube_dim(int value)
86 {
87  int dim, size;
88 
89 #if OPAL_C_HAVE_BUILTIN_CLZ
90  if (OPAL_UNLIKELY (1 >= value)) {
91  return 0;
92  }
93  size = 8 * sizeof(int);
94  dim = size - __builtin_clz(value-1);
95 #else
96  for (dim = 0, size = 1; size < value; ++dim, size <<= 1) /* empty */;
97 #endif
98 
99  return dim;
100 }
101 
102 
103 /**
104  * @brief Returns next power-of-two of the given value.
105  *
106  * @param value The integer value to return power of 2
107  *
108  * @returns The next power of two
109  *
110  * WARNING: *NO* error checking is performed. This is meant to be a
111  * fast inline function.
112  * Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 77
113  * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
114  */
115 static inline int opal_next_poweroftwo(int value)
116 {
117  int power2;
118 
119 #if OPAL_C_HAVE_BUILTIN_CLZ
120  if (OPAL_UNLIKELY (0 == value)) {
121  return 1;
122  }
123  power2 = 1 << (8 * sizeof (int) - __builtin_clz(value));
124 #else
125  for (power2 = 1; value > 0; value >>= 1, power2 <<= 1) /* empty */;
126 #endif
127 
128  return power2;
129 }
130 
131 
132 /**
133  * @brief Returns next power-of-two of the given value (and the value itselve if already power-of-two).
134  *
135  * @param value The integer value to return power of 2
136  *
137  * @returns The next power of two (inclusive)
138  *
139  * WARNING: *NO* error checking is performed. This is meant to be a
140  * fast inline function.
141  * Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 56
142  * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
143  */
144 static inline int opal_next_poweroftwo_inclusive(int value)
145 {
146  int power2;
147 
148 #if OPAL_C_HAVE_BUILTIN_CLZ
149  if (OPAL_UNLIKELY (1 >= value)) {
150  return 1;
151  }
152  power2 = 1 << (8 * sizeof (int) - __builtin_clz(value - 1));
153 #else
154  for (power2 = 1 ; power2 < value; power2 <<= 1) /* empty */;
155 #endif
156 
157  return power2;
158 }
159 
160 
161 #endif /* OPAL_BIT_OPS_H */
162 
Compiler-specific prefetch functions.