libstdc++
parallel/compatibility.h
Go to the documentation of this file.
00001 // -*- C++ -*-
00002 
00003 // Copyright (C) 2007, 2008, 2009, 2010, 2012 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the terms
00007 // of the GNU General Public License as published by the Free Software
00008 // Foundation; either version 3, or (at your option) any later
00009 // version.
00010 
00011 // This library is distributed in the hope that it will be useful, but
00012 // WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014 // General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /** @file parallel/compatibility.h
00026  *  @brief Compatibility layer, mostly concerned with atomic operations.
00027  *  This file is a GNU parallel extension to the Standard C++ Library.
00028  */
00029 
00030 // Written by Felix Putze.
00031 
00032 #ifndef _GLIBCXX_PARALLEL_COMPATIBILITY_H
00033 #define _GLIBCXX_PARALLEL_COMPATIBILITY_H 1
00034 
00035 #include <parallel/types.h>
00036 #include <parallel/base.h>
00037 
00038 #if defined(__SUNPRO_CC) && defined(__sparc)
00039 #include <sys/atomic.h>
00040 #endif
00041 
00042 #if !defined(_WIN32) || defined (__CYGWIN__)
00043 #include <sched.h>
00044 #endif
00045 
00046 #if defined(_MSC_VER)
00047 #include <Windows.h>
00048 #include <intrin.h>
00049 #undef max
00050 #undef min
00051 #endif
00052 
00053 #ifdef __MINGW32__
00054 // Including <windows.h> will drag in all the windows32 names.  Since
00055 // that can cause user code portability problems, we just declare the
00056 // one needed function here.
00057 extern "C"
00058 __attribute((dllimport)) void __attribute__((stdcall)) Sleep (unsigned long);
00059 #endif
00060 
00061 namespace __gnu_parallel
00062 {
00063 #if defined(__ICC)
00064   template<typename _MustBeInt = int>
00065   int32_t __faa32(int32_t* __x, int32_t __inc)
00066   {
00067     asm volatile("lock xadd %0,%1"
00068                  : "=__r" (__inc), "=__m" (*__x)
00069                  : "0" (__inc)
00070                  : "memory");
00071     return __inc;
00072   }
00073 #if defined(__x86_64)
00074   template<typename _MustBeInt = int>
00075   int64_t __faa64(int64_t* __x, int64_t __inc)
00076   {
00077     asm volatile("lock xadd %0,%1"
00078                  : "=__r" (__inc), "=__m" (*__x)
00079                  : "0" (__inc)
00080                  : "memory");
00081     return __inc;
00082   }
00083 #endif
00084 #endif
00085 
00086   // atomic functions only work on integers
00087 
00088   /** @brief Add a value to a variable, atomically.
00089    *
00090    *  Implementation is heavily platform-dependent.
00091    *  @param __ptr Pointer to a 32-bit signed integer.
00092    *  @param __addend Value to add.
00093    */
00094   inline int32_t
00095   __fetch_and_add_32(volatile int32_t* __ptr, int32_t __addend)
00096   {
00097 #if defined(__ICC)      //x86 version
00098     return _InterlockedExchangeAdd((void*)__ptr, __addend);
00099 #elif defined(__ECC)    //IA-64 version
00100     return _InterlockedExchangeAdd((void*)__ptr, __addend);
00101 #elif defined(__ICL) || defined(_MSC_VER)
00102     return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(__ptr),
00103                                    __addend);
00104 #elif defined(__GNUC__)
00105     return __atomic_fetch_add(__ptr, __addend, __ATOMIC_ACQ_REL);
00106 #elif defined(__SUNPRO_CC) && defined(__sparc)
00107     volatile int32_t __before, __after;
00108     do
00109       {
00110         __before = *__ptr;
00111         __after = __before + __addend;
00112       } while (atomic_cas_32((volatile unsigned int*)__ptr, __before,
00113                              __after) != __before);
00114     return __before;
00115 #else   //fallback, slow
00116 #pragma message("slow __fetch_and_add_32")
00117     int32_t __res;
00118 #pragma omp critical
00119     {
00120       __res = *__ptr;
00121       *(__ptr) += __addend;
00122     }
00123     return __res;
00124 #endif
00125   }
00126 
00127   /** @brief Add a value to a variable, atomically.
00128    *
00129    *  Implementation is heavily platform-dependent.
00130    *  @param __ptr Pointer to a 64-bit signed integer.
00131    *  @param __addend Value to add.
00132    */
00133   inline int64_t
00134   __fetch_and_add_64(volatile int64_t* __ptr, int64_t __addend)
00135   {
00136 #if defined(__ICC) && defined(__x86_64) //x86 version
00137     return __faa64<int>((int64_t*)__ptr, __addend);
00138 #elif defined(__ECC)    //IA-64 version
00139     return _InterlockedExchangeAdd64((void*)__ptr, __addend);
00140 #elif defined(__ICL) || defined(_MSC_VER)
00141 #ifndef _WIN64
00142     _GLIBCXX_PARALLEL_ASSERT(false);    //not available in this case
00143     return 0;
00144 #else
00145     return _InterlockedExchangeAdd64(__ptr, __addend);
00146 #endif
00147 #elif defined(__GNUC__) && defined(__x86_64)
00148     return __atomic_fetch_add(__ptr, __addend, __ATOMIC_ACQ_REL);
00149 #elif defined(__GNUC__) && defined(__i386) &&                   \
00150   (defined(__i686) || defined(__pentium4) || defined(__athlon)  \
00151    || defined(__k8) || defined(__core2))
00152     return __atomic_fetch_add(__ptr, __addend, __ATOMIC_ACQ_REL);
00153 #elif defined(__SUNPRO_CC) && defined(__sparc)
00154     volatile int64_t __before, __after;
00155     do
00156       {
00157         __before = *__ptr;
00158         __after = __before + __addend;
00159       } while (atomic_cas_64((volatile unsigned long long*)__ptr, __before,
00160                              __after) != __before);
00161     return __before;
00162 #else   //fallback, slow
00163 #if defined(__GNUC__) && defined(__i386)
00164     // XXX doesn'__t work with -march=native
00165     //#warning "please compile with -march=i686 or better"
00166 #endif
00167 #pragma message("slow __fetch_and_add_64")
00168     int64_t __res;
00169 #pragma omp critical
00170     {
00171       __res = *__ptr;
00172       *(__ptr) += __addend;
00173     }
00174     return __res;
00175 #endif
00176   }
00177 
00178   /** @brief Add a value to a variable, atomically.
00179    *
00180    *  Implementation is heavily platform-dependent.
00181    *  @param __ptr Pointer to a signed integer.
00182    *  @param __addend Value to add.
00183    */
00184   template<typename _Tp>
00185   inline _Tp
00186   __fetch_and_add(volatile _Tp* __ptr, _Tp __addend)
00187   {
00188     if (sizeof(_Tp) == sizeof(int32_t))
00189       return
00190         (_Tp)__fetch_and_add_32((volatile int32_t*) __ptr, (int32_t)__addend);
00191     else if (sizeof(_Tp) == sizeof(int64_t))
00192       return
00193         (_Tp)__fetch_and_add_64((volatile int64_t*) __ptr, (int64_t)__addend);
00194     else
00195       _GLIBCXX_PARALLEL_ASSERT(false);
00196   }
00197 
00198 
00199 #if defined(__ICC)
00200 
00201   template<typename _MustBeInt = int>
00202   inline int32_t
00203   __cas32(volatile int32_t* __ptr, int32_t __old, int32_t __nw)
00204   {
00205     int32_t __before;
00206     __asm__ __volatile__("lock; cmpxchgl %1,%2"
00207                          : "=a"(__before)
00208                          : "q"(__nw), "__m"(*(volatile long long*)(__ptr)),
00209                                "0"(__old)
00210                          : "memory");
00211     return __before;
00212   }
00213 
00214 #if defined(__x86_64)
00215   template<typename _MustBeInt = int>
00216   inline int64_t
00217   __cas64(volatile int64_t *__ptr, int64_t __old, int64_t __nw)
00218   {
00219     int64_t __before;
00220     __asm__ __volatile__("lock; cmpxchgq %1,%2"
00221                          : "=a"(__before)
00222                          : "q"(__nw), "__m"(*(volatile long long*)(__ptr)),
00223                                "0"(__old)
00224                          : "memory");
00225     return __before;
00226   }
00227 #endif
00228 
00229 #endif
00230 
00231   /** @brief Compare @c *__ptr and @c __comparand. If equal, let @c
00232    * *__ptr=__replacement and return @c true, return @c false otherwise.
00233    *
00234    *  Implementation is heavily platform-dependent.
00235    *  @param __ptr Pointer to 32-bit signed integer.
00236    *  @param __comparand Compare value.
00237    *  @param __replacement Replacement value.
00238    */
00239   inline bool
00240   __compare_and_swap_32(volatile int32_t* __ptr, int32_t __comparand,
00241                         int32_t __replacement)
00242   {
00243 #if defined(__ICC)      //x86 version
00244     return _InterlockedCompareExchange((void*)__ptr, __replacement,
00245                                        __comparand) == __comparand;
00246 #elif defined(__ECC)    //IA-64 version
00247     return _InterlockedCompareExchange((void*)__ptr, __replacement,
00248                                        __comparand) == __comparand;
00249 #elif defined(__ICL) || defined(_MSC_VER)
00250     return _InterlockedCompareExchange(
00251                reinterpret_cast<volatile long*>(__ptr),
00252                __replacement, __comparand)
00253              == __comparand;
00254 #elif defined(__GNUC__)
00255     return __atomic_compare_exchange_n(__ptr, &__comparand, __replacement,
00256                        false, __ATOMIC_ACQ_REL,
00257                        __ATOMIC_RELAXED);
00258 #elif defined(__SUNPRO_CC) && defined(__sparc)
00259     return atomic_cas_32((volatile unsigned int*)__ptr, __comparand,
00260                          __replacement) == __comparand;
00261 #else
00262 #pragma message("slow __compare_and_swap_32")
00263     bool __res = false;
00264 #pragma omp critical
00265     {
00266       if (*__ptr == __comparand)
00267         {
00268           *__ptr = __replacement;
00269           __res = true;
00270         }
00271     }
00272     return __res;
00273 #endif
00274   }
00275 
00276   /** @brief Compare @c *__ptr and @c __comparand. If equal, let @c
00277    * *__ptr=__replacement and return @c true, return @c false otherwise.
00278    *
00279    *  Implementation is heavily platform-dependent.
00280    *  @param __ptr Pointer to 64-bit signed integer.
00281    *  @param __comparand Compare value.
00282    *  @param __replacement Replacement value.
00283    */
00284   inline bool
00285   __compare_and_swap_64(volatile int64_t* __ptr, int64_t __comparand,
00286                         int64_t __replacement)
00287   {
00288 #if defined(__ICC) && defined(__x86_64) //x86 version
00289     return __cas64<int>(__ptr, __comparand, __replacement) == __comparand;
00290 #elif defined(__ECC)    //IA-64 version
00291     return _InterlockedCompareExchange64((void*)__ptr, __replacement,
00292                                          __comparand) == __comparand;
00293 #elif defined(__ICL) || defined(_MSC_VER)
00294 #ifndef _WIN64
00295     _GLIBCXX_PARALLEL_ASSERT(false);    //not available in this case
00296     return 0;
00297 #else
00298     return _InterlockedCompareExchange64(__ptr, __replacement,
00299                                          __comparand) == __comparand;
00300 #endif
00301 
00302 #elif defined(__GNUC__) && defined(__x86_64)
00303     return __atomic_compare_exchange_n(__ptr, &__comparand, __replacement,
00304                        false, __ATOMIC_ACQ_REL,
00305                        __ATOMIC_RELAXED);
00306 #elif defined(__GNUC__) && defined(__i386) &&                   \
00307   (defined(__i686) || defined(__pentium4) || defined(__athlon)  \
00308    || defined(__k8) || defined(__core2))
00309     return __atomic_compare_exchange_n(__ptr, &__comparand, __replacement,
00310                        false, __ATOMIC_ACQ_REL,
00311                        __ATOMIC_RELAXED);
00312 #elif defined(__SUNPRO_CC) && defined(__sparc)
00313     return atomic_cas_64((volatile unsigned long long*)__ptr,
00314                          __comparand, __replacement) == __comparand;
00315 #else
00316 #if defined(__GNUC__) && defined(__i386)
00317     // XXX -march=native
00318     //#warning "please compile with -march=i686 or better"
00319 #endif
00320 #pragma message("slow __compare_and_swap_64")
00321     bool __res = false;
00322 #pragma omp critical
00323     {
00324       if (*__ptr == __comparand)
00325         {
00326           *__ptr = __replacement;
00327           __res = true;
00328         }
00329     }
00330     return __res;
00331 #endif
00332   }
00333 
00334   /** @brief Compare @c *__ptr and @c __comparand. If equal, let @c
00335    * *__ptr=__replacement and return @c true, return @c false otherwise.
00336    *
00337    *  Implementation is heavily platform-dependent.
00338    *  @param __ptr Pointer to signed integer.
00339    *  @param __comparand Compare value.
00340    *  @param __replacement Replacement value. */
00341   template<typename _Tp>
00342   inline bool
00343   __compare_and_swap(volatile _Tp* __ptr, _Tp __comparand, _Tp __replacement)
00344   {
00345     if (sizeof(_Tp) == sizeof(int32_t))
00346       return __compare_and_swap_32((volatile int32_t*) __ptr,
00347                                    (int32_t)__comparand,
00348                                    (int32_t)__replacement);
00349     else if (sizeof(_Tp) == sizeof(int64_t))
00350       return __compare_and_swap_64((volatile int64_t*) __ptr,
00351                                    (int64_t)__comparand,
00352                                    (int64_t)__replacement);
00353     else
00354       _GLIBCXX_PARALLEL_ASSERT(false);
00355   }
00356 
00357   /** @brief Yield the control to another thread, without waiting for
00358       the end to the time slice. */
00359   inline void
00360   __yield()
00361   {
00362 #if defined (_WIN32) && !defined (__CYGWIN__)
00363     Sleep(0);
00364 #else
00365     sched_yield();
00366 #endif
00367   }
00368 } // end namespace
00369 
00370 #endif /* _GLIBCXX_PARALLEL_COMPATIBILITY_H */