libstdc++
|
00001 // -*- C++ -*- 00002 00003 // Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the terms 00007 // of the GNU General Public License as published by the Free Software 00008 // Foundation; either version 3, or (at your option) any later 00009 // version. 00010 00011 // This library is distributed in the hope that it will be useful, but 00012 // WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 // General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** @file parallel/omp_loop_static.h 00026 * @brief Parallelization of embarrassingly parallel execution by 00027 * means of an OpenMP for loop with static scheduling. 00028 * This file is a GNU parallel extension to the Standard C++ Library. 00029 */ 00030 00031 // Written by Felix Putze. 00032 00033 #ifndef _GLIBCXX_PARALLEL_OMP_LOOP_STATIC_H 00034 #define _GLIBCXX_PARALLEL_OMP_LOOP_STATIC_H 1 00035 00036 #include <omp.h> 00037 00038 #include <parallel/settings.h> 00039 #include <parallel/basic_iterator.h> 00040 00041 namespace __gnu_parallel 00042 { 00043 /** @brief Embarrassingly parallel algorithm for random access 00044 * iterators, using an OpenMP for loop with static scheduling. 00045 * 00046 * @param __begin Begin iterator of element sequence. 00047 * @param __end End iterator of element sequence. 00048 * @param __o User-supplied functor (comparator, predicate, adding 00049 * functor, ...). 00050 * @param __f Functor to @a process an element with __op (depends on 00051 * desired functionality, e. g. for std::for_each(), ...). 00052 * @param __r Functor to @a add a single __result to the already processed 00053 * __elements (depends on functionality). 00054 * @param __base Base value for reduction. 00055 * @param __output Pointer to position where final result is written to 00056 * @param __bound Maximum number of elements processed (e. g. for 00057 * std::count_n()). 00058 * @return User-supplied functor (that may contain a part of the result). 00059 */ 00060 template<typename _RAIter, 00061 typename _Op, 00062 typename _Fu, 00063 typename _Red, 00064 typename _Result> 00065 _Op 00066 __for_each_template_random_access_omp_loop_static(_RAIter __begin, 00067 _RAIter __end, _Op __o, 00068 _Fu& __f, _Red __r, 00069 _Result __base, 00070 _Result& __output, 00071 typename std::iterator_traits<_RAIter>::difference_type __bound) 00072 { 00073 typedef typename std::iterator_traits<_RAIter>::difference_type 00074 _DifferenceType; 00075 00076 _DifferenceType __length = __end - __begin; 00077 _ThreadIndex __num_threads = std::min<_DifferenceType> 00078 (__get_max_threads(), __length); 00079 00080 _Result *__thread_results; 00081 00082 # pragma omp parallel num_threads(__num_threads) 00083 { 00084 # pragma omp single 00085 { 00086 __num_threads = omp_get_num_threads(); 00087 __thread_results = new _Result[__num_threads]; 00088 00089 for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) 00090 __thread_results[__i] = _Result(); 00091 } 00092 00093 _ThreadIndex __iam = omp_get_thread_num(); 00094 00095 #pragma omp for schedule(static, _Settings::get().workstealing_chunk_size) 00096 for (_DifferenceType __pos = 0; __pos < __length; ++__pos) 00097 __thread_results[__iam] = __r(__thread_results[__iam], 00098 __f(__o, __begin+__pos)); 00099 } //parallel 00100 00101 for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) 00102 __output = __r(__output, __thread_results[__i]); 00103 00104 delete [] __thread_results; 00105 00106 // Points to last element processed (needed as return value for 00107 // some algorithms like transform). 00108 __f.finish_iterator = __begin + __length; 00109 00110 return __o; 00111 } 00112 00113 } // end namespace 00114 00115 #endif /* _GLIBCXX_PARALLEL_OMP_LOOP_STATIC_H */