libstdc++
|
00001 // -*- C++ -*- 00002 00003 // Copyright (C) 2007, 2008, 2009, 2010, 2012 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the terms 00007 // of the GNU General Public License as published by the Free Software 00008 // Foundation; either version 3, or (at your option) any later 00009 // version. 00010 00011 // This library is distributed in the hope that it will be useful, but 00012 // WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 // General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** @file parallel/compatibility.h 00026 * @brief Compatibility layer, mostly concerned with atomic operations. 00027 * This file is a GNU parallel extension to the Standard C++ Library. 00028 */ 00029 00030 // Written by Felix Putze. 00031 00032 #ifndef _GLIBCXX_PARALLEL_COMPATIBILITY_H 00033 #define _GLIBCXX_PARALLEL_COMPATIBILITY_H 1 00034 00035 #include <parallel/types.h> 00036 #include <parallel/base.h> 00037 00038 #if defined(__SUNPRO_CC) && defined(__sparc) 00039 #include <sys/atomic.h> 00040 #endif 00041 00042 #if !defined(_WIN32) || defined (__CYGWIN__) 00043 #include <sched.h> 00044 #endif 00045 00046 #if defined(_MSC_VER) 00047 #include <Windows.h> 00048 #include <intrin.h> 00049 #undef max 00050 #undef min 00051 #endif 00052 00053 #ifdef __MINGW32__ 00054 // Including <windows.h> will drag in all the windows32 names. Since 00055 // that can cause user code portability problems, we just declare the 00056 // one needed function here. 00057 extern "C" 00058 __attribute((dllimport)) void __attribute__((stdcall)) Sleep (unsigned long); 00059 #endif 00060 00061 namespace __gnu_parallel 00062 { 00063 #if defined(__ICC) 00064 template<typename _MustBeInt = int> 00065 int32_t __faa32(int32_t* __x, int32_t __inc) 00066 { 00067 asm volatile("lock xadd %0,%1" 00068 : "=__r" (__inc), "=__m" (*__x) 00069 : "0" (__inc) 00070 : "memory"); 00071 return __inc; 00072 } 00073 #if defined(__x86_64) 00074 template<typename _MustBeInt = int> 00075 int64_t __faa64(int64_t* __x, int64_t __inc) 00076 { 00077 asm volatile("lock xadd %0,%1" 00078 : "=__r" (__inc), "=__m" (*__x) 00079 : "0" (__inc) 00080 : "memory"); 00081 return __inc; 00082 } 00083 #endif 00084 #endif 00085 00086 // atomic functions only work on integers 00087 00088 /** @brief Add a value to a variable, atomically. 00089 * 00090 * Implementation is heavily platform-dependent. 00091 * @param __ptr Pointer to a 32-bit signed integer. 00092 * @param __addend Value to add. 00093 */ 00094 inline int32_t 00095 __fetch_and_add_32(volatile int32_t* __ptr, int32_t __addend) 00096 { 00097 #if defined(__ICC) //x86 version 00098 return _InterlockedExchangeAdd((void*)__ptr, __addend); 00099 #elif defined(__ECC) //IA-64 version 00100 return _InterlockedExchangeAdd((void*)__ptr, __addend); 00101 #elif defined(__ICL) || defined(_MSC_VER) 00102 return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(__ptr), 00103 __addend); 00104 #elif defined(__GNUC__) 00105 return __atomic_fetch_add(__ptr, __addend, __ATOMIC_ACQ_REL); 00106 #elif defined(__SUNPRO_CC) && defined(__sparc) 00107 volatile int32_t __before, __after; 00108 do 00109 { 00110 __before = *__ptr; 00111 __after = __before + __addend; 00112 } while (atomic_cas_32((volatile unsigned int*)__ptr, __before, 00113 __after) != __before); 00114 return __before; 00115 #else //fallback, slow 00116 #pragma message("slow __fetch_and_add_32") 00117 int32_t __res; 00118 #pragma omp critical 00119 { 00120 __res = *__ptr; 00121 *(__ptr) += __addend; 00122 } 00123 return __res; 00124 #endif 00125 } 00126 00127 /** @brief Add a value to a variable, atomically. 00128 * 00129 * Implementation is heavily platform-dependent. 00130 * @param __ptr Pointer to a 64-bit signed integer. 00131 * @param __addend Value to add. 00132 */ 00133 inline int64_t 00134 __fetch_and_add_64(volatile int64_t* __ptr, int64_t __addend) 00135 { 00136 #if defined(__ICC) && defined(__x86_64) //x86 version 00137 return __faa64<int>((int64_t*)__ptr, __addend); 00138 #elif defined(__ECC) //IA-64 version 00139 return _InterlockedExchangeAdd64((void*)__ptr, __addend); 00140 #elif defined(__ICL) || defined(_MSC_VER) 00141 #ifndef _WIN64 00142 _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case 00143 return 0; 00144 #else 00145 return _InterlockedExchangeAdd64(__ptr, __addend); 00146 #endif 00147 #elif defined(__GNUC__) && defined(__x86_64) 00148 return __atomic_fetch_add(__ptr, __addend, __ATOMIC_ACQ_REL); 00149 #elif defined(__GNUC__) && defined(__i386) && \ 00150 (defined(__i686) || defined(__pentium4) || defined(__athlon) \ 00151 || defined(__k8) || defined(__core2)) 00152 return __atomic_fetch_add(__ptr, __addend, __ATOMIC_ACQ_REL); 00153 #elif defined(__SUNPRO_CC) && defined(__sparc) 00154 volatile int64_t __before, __after; 00155 do 00156 { 00157 __before = *__ptr; 00158 __after = __before + __addend; 00159 } while (atomic_cas_64((volatile unsigned long long*)__ptr, __before, 00160 __after) != __before); 00161 return __before; 00162 #else //fallback, slow 00163 #if defined(__GNUC__) && defined(__i386) 00164 // XXX doesn'__t work with -march=native 00165 //#warning "please compile with -march=i686 or better" 00166 #endif 00167 #pragma message("slow __fetch_and_add_64") 00168 int64_t __res; 00169 #pragma omp critical 00170 { 00171 __res = *__ptr; 00172 *(__ptr) += __addend; 00173 } 00174 return __res; 00175 #endif 00176 } 00177 00178 /** @brief Add a value to a variable, atomically. 00179 * 00180 * Implementation is heavily platform-dependent. 00181 * @param __ptr Pointer to a signed integer. 00182 * @param __addend Value to add. 00183 */ 00184 template<typename _Tp> 00185 inline _Tp 00186 __fetch_and_add(volatile _Tp* __ptr, _Tp __addend) 00187 { 00188 if (sizeof(_Tp) == sizeof(int32_t)) 00189 return 00190 (_Tp)__fetch_and_add_32((volatile int32_t*) __ptr, (int32_t)__addend); 00191 else if (sizeof(_Tp) == sizeof(int64_t)) 00192 return 00193 (_Tp)__fetch_and_add_64((volatile int64_t*) __ptr, (int64_t)__addend); 00194 else 00195 _GLIBCXX_PARALLEL_ASSERT(false); 00196 } 00197 00198 00199 #if defined(__ICC) 00200 00201 template<typename _MustBeInt = int> 00202 inline int32_t 00203 __cas32(volatile int32_t* __ptr, int32_t __old, int32_t __nw) 00204 { 00205 int32_t __before; 00206 __asm__ __volatile__("lock; cmpxchgl %1,%2" 00207 : "=a"(__before) 00208 : "q"(__nw), "__m"(*(volatile long long*)(__ptr)), 00209 "0"(__old) 00210 : "memory"); 00211 return __before; 00212 } 00213 00214 #if defined(__x86_64) 00215 template<typename _MustBeInt = int> 00216 inline int64_t 00217 __cas64(volatile int64_t *__ptr, int64_t __old, int64_t __nw) 00218 { 00219 int64_t __before; 00220 __asm__ __volatile__("lock; cmpxchgq %1,%2" 00221 : "=a"(__before) 00222 : "q"(__nw), "__m"(*(volatile long long*)(__ptr)), 00223 "0"(__old) 00224 : "memory"); 00225 return __before; 00226 } 00227 #endif 00228 00229 #endif 00230 00231 /** @brief Compare @c *__ptr and @c __comparand. If equal, let @c 00232 * *__ptr=__replacement and return @c true, return @c false otherwise. 00233 * 00234 * Implementation is heavily platform-dependent. 00235 * @param __ptr Pointer to 32-bit signed integer. 00236 * @param __comparand Compare value. 00237 * @param __replacement Replacement value. 00238 */ 00239 inline bool 00240 __compare_and_swap_32(volatile int32_t* __ptr, int32_t __comparand, 00241 int32_t __replacement) 00242 { 00243 #if defined(__ICC) //x86 version 00244 return _InterlockedCompareExchange((void*)__ptr, __replacement, 00245 __comparand) == __comparand; 00246 #elif defined(__ECC) //IA-64 version 00247 return _InterlockedCompareExchange((void*)__ptr, __replacement, 00248 __comparand) == __comparand; 00249 #elif defined(__ICL) || defined(_MSC_VER) 00250 return _InterlockedCompareExchange( 00251 reinterpret_cast<volatile long*>(__ptr), 00252 __replacement, __comparand) 00253 == __comparand; 00254 #elif defined(__GNUC__) 00255 return __atomic_compare_exchange_n(__ptr, &__comparand, __replacement, 00256 false, __ATOMIC_ACQ_REL, 00257 __ATOMIC_RELAXED); 00258 #elif defined(__SUNPRO_CC) && defined(__sparc) 00259 return atomic_cas_32((volatile unsigned int*)__ptr, __comparand, 00260 __replacement) == __comparand; 00261 #else 00262 #pragma message("slow __compare_and_swap_32") 00263 bool __res = false; 00264 #pragma omp critical 00265 { 00266 if (*__ptr == __comparand) 00267 { 00268 *__ptr = __replacement; 00269 __res = true; 00270 } 00271 } 00272 return __res; 00273 #endif 00274 } 00275 00276 /** @brief Compare @c *__ptr and @c __comparand. If equal, let @c 00277 * *__ptr=__replacement and return @c true, return @c false otherwise. 00278 * 00279 * Implementation is heavily platform-dependent. 00280 * @param __ptr Pointer to 64-bit signed integer. 00281 * @param __comparand Compare value. 00282 * @param __replacement Replacement value. 00283 */ 00284 inline bool 00285 __compare_and_swap_64(volatile int64_t* __ptr, int64_t __comparand, 00286 int64_t __replacement) 00287 { 00288 #if defined(__ICC) && defined(__x86_64) //x86 version 00289 return __cas64<int>(__ptr, __comparand, __replacement) == __comparand; 00290 #elif defined(__ECC) //IA-64 version 00291 return _InterlockedCompareExchange64((void*)__ptr, __replacement, 00292 __comparand) == __comparand; 00293 #elif defined(__ICL) || defined(_MSC_VER) 00294 #ifndef _WIN64 00295 _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case 00296 return 0; 00297 #else 00298 return _InterlockedCompareExchange64(__ptr, __replacement, 00299 __comparand) == __comparand; 00300 #endif 00301 00302 #elif defined(__GNUC__) && defined(__x86_64) 00303 return __atomic_compare_exchange_n(__ptr, &__comparand, __replacement, 00304 false, __ATOMIC_ACQ_REL, 00305 __ATOMIC_RELAXED); 00306 #elif defined(__GNUC__) && defined(__i386) && \ 00307 (defined(__i686) || defined(__pentium4) || defined(__athlon) \ 00308 || defined(__k8) || defined(__core2)) 00309 return __atomic_compare_exchange_n(__ptr, &__comparand, __replacement, 00310 false, __ATOMIC_ACQ_REL, 00311 __ATOMIC_RELAXED); 00312 #elif defined(__SUNPRO_CC) && defined(__sparc) 00313 return atomic_cas_64((volatile unsigned long long*)__ptr, 00314 __comparand, __replacement) == __comparand; 00315 #else 00316 #if defined(__GNUC__) && defined(__i386) 00317 // XXX -march=native 00318 //#warning "please compile with -march=i686 or better" 00319 #endif 00320 #pragma message("slow __compare_and_swap_64") 00321 bool __res = false; 00322 #pragma omp critical 00323 { 00324 if (*__ptr == __comparand) 00325 { 00326 *__ptr = __replacement; 00327 __res = true; 00328 } 00329 } 00330 return __res; 00331 #endif 00332 } 00333 00334 /** @brief Compare @c *__ptr and @c __comparand. If equal, let @c 00335 * *__ptr=__replacement and return @c true, return @c false otherwise. 00336 * 00337 * Implementation is heavily platform-dependent. 00338 * @param __ptr Pointer to signed integer. 00339 * @param __comparand Compare value. 00340 * @param __replacement Replacement value. */ 00341 template<typename _Tp> 00342 inline bool 00343 __compare_and_swap(volatile _Tp* __ptr, _Tp __comparand, _Tp __replacement) 00344 { 00345 if (sizeof(_Tp) == sizeof(int32_t)) 00346 return __compare_and_swap_32((volatile int32_t*) __ptr, 00347 (int32_t)__comparand, 00348 (int32_t)__replacement); 00349 else if (sizeof(_Tp) == sizeof(int64_t)) 00350 return __compare_and_swap_64((volatile int64_t*) __ptr, 00351 (int64_t)__comparand, 00352 (int64_t)__replacement); 00353 else 00354 _GLIBCXX_PARALLEL_ASSERT(false); 00355 } 00356 00357 /** @brief Yield the control to another thread, without waiting for 00358 the end to the time slice. */ 00359 inline void 00360 __yield() 00361 { 00362 #if defined (_WIN32) && !defined (__CYGWIN__) 00363 Sleep(0); 00364 #else 00365 sched_yield(); 00366 #endif 00367 } 00368 } // end namespace 00369 00370 #endif /* _GLIBCXX_PARALLEL_COMPATIBILITY_H */