libstdc++
|
00001 // -*- C++ -*- 00002 00003 // Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the terms 00007 // of the GNU General Public License as published by the Free Software 00008 // Foundation; either version 3, or (at your option) any later 00009 // version. 00010 00011 // This library is distributed in the hope that it will be useful, but 00012 // WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 // General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** @file parallel/settings.h 00026 * @brief Runtime settings and tuning parameters, heuristics to decide 00027 * whether to use parallelized algorithms. 00028 * This file is a GNU parallel extension to the Standard C++ Library. 00029 * 00030 * @section parallelization_decision 00031 * The decision whether to run an algorithm in parallel. 00032 * 00033 * There are several ways the user can switch on and __off the parallel 00034 * execution of an algorithm, both at compile- and run-time. 00035 * 00036 * Only sequential execution can be forced at compile-time. This 00037 * reduces code size and protects code parts that have 00038 * non-thread-safe side effects. 00039 * 00040 * Ultimately, forcing parallel execution at compile-time makes 00041 * sense. Often, the sequential algorithm implementation is used as 00042 * a subroutine, so no reduction in code size can be achieved. Also, 00043 * the machine the program is run on might have only one processor 00044 * core, so to avoid overhead, the algorithm is executed 00045 * sequentially. 00046 * 00047 * To force sequential execution of an algorithm ultimately at 00048 * compile-time, the user must add the tag 00049 * gnu_parallel::sequential_tag() to the end of the parameter list, 00050 * e. g. 00051 * 00052 * \code 00053 * std::sort(__v.begin(), __v.end(), __gnu_parallel::sequential_tag()); 00054 * \endcode 00055 * 00056 * This is compatible with all overloaded algorithm variants. No 00057 * additional code will be instantiated, at all. The same holds for 00058 * most algorithm calls with iterators not providing random access. 00059 * 00060 * If the algorithm call is not forced to be executed sequentially 00061 * at compile-time, the decision is made at run-time. 00062 * The global variable __gnu_parallel::_Settings::algorithm_strategy 00063 * is checked. _It is a tristate variable corresponding to: 00064 * 00065 * a. force_sequential, meaning the sequential algorithm is executed. 00066 * b. force_parallel, meaning the parallel algorithm is executed. 00067 * c. heuristic 00068 * 00069 * For heuristic, the parallel algorithm implementation is called 00070 * only if the input size is sufficiently large. For most 00071 * algorithms, the input size is the (combined) length of the input 00072 * sequence(__s). The threshold can be set by the user, individually 00073 * for each algorithm. The according variables are called 00074 * gnu_parallel::_Settings::[algorithm]_minimal_n . 00075 * 00076 * For some of the algorithms, there are even more tuning options, 00077 * e. g. the ability to choose from multiple algorithm variants. See 00078 * below for details. 00079 */ 00080 00081 // Written by Johannes Singler and Felix Putze. 00082 00083 #ifndef _GLIBCXX_PARALLEL_SETTINGS_H 00084 #define _GLIBCXX_PARALLEL_SETTINGS_H 1 00085 00086 #include <parallel/types.h> 00087 00088 /** 00089 * @brief Determine at compile(?)-time if the parallel variant of an 00090 * algorithm should be called. 00091 * @param __c A condition that is convertible to bool that is overruled by 00092 * __gnu_parallel::_Settings::algorithm_strategy. Usually a decision 00093 * based on the input size. 00094 */ 00095 #define _GLIBCXX_PARALLEL_CONDITION(__c) \ 00096 (__gnu_parallel::_Settings::get().algorithm_strategy \ 00097 != __gnu_parallel::force_sequential \ 00098 && ((__gnu_parallel::__get_max_threads() > 1 && (__c)) \ 00099 || __gnu_parallel::_Settings::get().algorithm_strategy \ 00100 == __gnu_parallel::force_parallel)) 00101 00102 /* 00103 inline bool 00104 parallel_condition(bool __c) 00105 { 00106 bool ret = false; 00107 const _Settings& __s = _Settings::get(); 00108 if (__s.algorithm_strategy != force_seqential) 00109 { 00110 if (__s.algorithm_strategy == force_parallel) 00111 ret = true; 00112 else 00113 ret = __get_max_threads() > 1 && __c; 00114 } 00115 return ret; 00116 } 00117 */ 00118 00119 namespace __gnu_parallel 00120 { 00121 /// class _Settings 00122 /// Run-time settings for the parallel mode including all tunable parameters. 00123 struct _Settings 00124 { 00125 _AlgorithmStrategy algorithm_strategy; 00126 00127 _SortAlgorithm sort_algorithm; 00128 _PartialSumAlgorithm partial_sum_algorithm; 00129 _MultiwayMergeAlgorithm multiway_merge_algorithm; 00130 _FindAlgorithm find_algorithm; 00131 00132 _SplittingAlgorithm sort_splitting; 00133 _SplittingAlgorithm merge_splitting; 00134 _SplittingAlgorithm multiway_merge_splitting; 00135 00136 // Per-algorithm settings. 00137 00138 /// Minimal input size for accumulate. 00139 _SequenceIndex accumulate_minimal_n; 00140 00141 /// Minimal input size for adjacent_difference. 00142 unsigned int adjacent_difference_minimal_n; 00143 00144 /// Minimal input size for count and count_if. 00145 _SequenceIndex count_minimal_n; 00146 00147 /// Minimal input size for fill. 00148 _SequenceIndex fill_minimal_n; 00149 00150 /// Block size increase factor for find. 00151 double find_increasing_factor; 00152 00153 /// Initial block size for find. 00154 _SequenceIndex find_initial_block_size; 00155 00156 /// Maximal block size for find. 00157 _SequenceIndex find_maximum_block_size; 00158 00159 /// Start with looking for this many elements sequentially, for find. 00160 _SequenceIndex find_sequential_search_size; 00161 00162 /// Minimal input size for for_each. 00163 _SequenceIndex for_each_minimal_n; 00164 00165 /// Minimal input size for generate. 00166 _SequenceIndex generate_minimal_n; 00167 00168 /// Minimal input size for max_element. 00169 _SequenceIndex max_element_minimal_n; 00170 00171 /// Minimal input size for merge. 00172 _SequenceIndex merge_minimal_n; 00173 00174 /// Oversampling factor for merge. 00175 unsigned int merge_oversampling; 00176 00177 /// Minimal input size for min_element. 00178 _SequenceIndex min_element_minimal_n; 00179 00180 /// Minimal input size for multiway_merge. 00181 _SequenceIndex multiway_merge_minimal_n; 00182 00183 /// Oversampling factor for multiway_merge. 00184 int multiway_merge_minimal_k; 00185 00186 /// Oversampling factor for multiway_merge. 00187 unsigned int multiway_merge_oversampling; 00188 00189 /// Minimal input size for nth_element. 00190 _SequenceIndex nth_element_minimal_n; 00191 00192 /// Chunk size for partition. 00193 _SequenceIndex partition_chunk_size; 00194 00195 /// Chunk size for partition, relative to input size. If > 0.0, 00196 /// this value overrides partition_chunk_size. 00197 double partition_chunk_share; 00198 00199 /// Minimal input size for partition. 00200 _SequenceIndex partition_minimal_n; 00201 00202 /// Minimal input size for partial_sort. 00203 _SequenceIndex partial_sort_minimal_n; 00204 00205 /// Ratio for partial_sum. Assume "sum and write result" to be 00206 /// this factor slower than just "sum". 00207 float partial_sum_dilation; 00208 00209 /// Minimal input size for partial_sum. 00210 unsigned int partial_sum_minimal_n; 00211 00212 /// Minimal input size for random_shuffle. 00213 unsigned int random_shuffle_minimal_n; 00214 00215 /// Minimal input size for replace and replace_if. 00216 _SequenceIndex replace_minimal_n; 00217 00218 /// Minimal input size for set_difference. 00219 _SequenceIndex set_difference_minimal_n; 00220 00221 /// Minimal input size for set_intersection. 00222 _SequenceIndex set_intersection_minimal_n; 00223 00224 /// Minimal input size for set_symmetric_difference. 00225 _SequenceIndex set_symmetric_difference_minimal_n; 00226 00227 /// Minimal input size for set_union. 00228 _SequenceIndex set_union_minimal_n; 00229 00230 /// Minimal input size for parallel sorting. 00231 _SequenceIndex sort_minimal_n; 00232 00233 /// Oversampling factor for parallel std::sort (MWMS). 00234 unsigned int sort_mwms_oversampling; 00235 00236 /// Such many samples to take to find a good pivot (quicksort). 00237 unsigned int sort_qs_num_samples_preset; 00238 00239 /// Maximal subsequence __length to switch to unbalanced __base case. 00240 /// Applies to std::sort with dynamically load-balanced quicksort. 00241 _SequenceIndex sort_qsb_base_case_maximal_n; 00242 00243 /// Minimal input size for parallel std::transform. 00244 _SequenceIndex transform_minimal_n; 00245 00246 /// Minimal input size for unique_copy. 00247 _SequenceIndex unique_copy_minimal_n; 00248 00249 _SequenceIndex workstealing_chunk_size; 00250 00251 // Hardware dependent tuning parameters. 00252 00253 /// size of the L1 cache in bytes (underestimation). 00254 unsigned long long L1_cache_size; 00255 00256 /// size of the L2 cache in bytes (underestimation). 00257 unsigned long long L2_cache_size; 00258 00259 /// size of the Translation Lookaside Buffer (underestimation). 00260 unsigned int TLB_size; 00261 00262 /// Overestimation of cache line size. Used to avoid false 00263 /// sharing, i.e. elements of different threads are at least this 00264 /// amount apart. 00265 unsigned int cache_line_size; 00266 00267 // Statistics. 00268 00269 /// The number of stolen ranges in load-balanced quicksort. 00270 _SequenceIndex qsb_steals; 00271 00272 /// Minimal input size for search and search_n. 00273 _SequenceIndex search_minimal_n; 00274 00275 /// Block size scale-down factor with respect to current position. 00276 float find_scale_factor; 00277 00278 /// Get the global settings. 00279 _GLIBCXX_CONST static const _Settings& 00280 get() throw(); 00281 00282 /// Set the global settings. 00283 static void 00284 set(_Settings&) throw(); 00285 00286 explicit 00287 _Settings() : 00288 algorithm_strategy(heuristic), 00289 sort_algorithm(MWMS), 00290 partial_sum_algorithm(LINEAR), 00291 multiway_merge_algorithm(LOSER_TREE), 00292 find_algorithm(CONSTANT_SIZE_BLOCKS), 00293 sort_splitting(EXACT), 00294 merge_splitting(EXACT), 00295 multiway_merge_splitting(EXACT), 00296 accumulate_minimal_n(1000), 00297 adjacent_difference_minimal_n(1000), 00298 count_minimal_n(1000), 00299 fill_minimal_n(1000), 00300 find_increasing_factor(2.0), 00301 find_initial_block_size(256), 00302 find_maximum_block_size(8192), 00303 find_sequential_search_size(256), 00304 for_each_minimal_n(1000), 00305 generate_minimal_n(1000), 00306 max_element_minimal_n(1000), 00307 merge_minimal_n(1000), 00308 merge_oversampling(10), 00309 min_element_minimal_n(1000), 00310 multiway_merge_minimal_n(1000), 00311 multiway_merge_minimal_k(2), multiway_merge_oversampling(10), 00312 nth_element_minimal_n(1000), 00313 partition_chunk_size(1000), 00314 partition_chunk_share(0.0), 00315 partition_minimal_n(1000), 00316 partial_sort_minimal_n(1000), 00317 partial_sum_dilation(1.0f), 00318 partial_sum_minimal_n(1000), 00319 random_shuffle_minimal_n(1000), 00320 replace_minimal_n(1000), 00321 set_difference_minimal_n(1000), 00322 set_intersection_minimal_n(1000), 00323 set_symmetric_difference_minimal_n(1000), 00324 set_union_minimal_n(1000), 00325 sort_minimal_n(1000), 00326 sort_mwms_oversampling(10), 00327 sort_qs_num_samples_preset(100), 00328 sort_qsb_base_case_maximal_n(100), 00329 transform_minimal_n(1000), 00330 unique_copy_minimal_n(10000), 00331 workstealing_chunk_size(100), 00332 L1_cache_size(16 << 10), 00333 L2_cache_size(256 << 10), 00334 TLB_size(128), 00335 cache_line_size(64), 00336 qsb_steals(0), 00337 search_minimal_n(1000), 00338 find_scale_factor(0.01f) 00339 { } 00340 }; 00341 } 00342 00343 #endif /* _GLIBCXX_PARALLEL_SETTINGS_H */