libstdc++
settings.h
Go to the documentation of this file.
00001 // -*- C++ -*-
00002 
00003 // Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the terms
00007 // of the GNU General Public License as published by the Free Software
00008 // Foundation; either version 3, or (at your option) any later
00009 // version.
00010 
00011 // This library is distributed in the hope that it will be useful, but
00012 // WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014 // General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /** @file parallel/settings.h
00026  *  @brief Runtime settings and tuning parameters, heuristics to decide
00027  *  whether to use parallelized algorithms.
00028  *  This file is a GNU parallel extension to the Standard C++ Library.
00029  *
00030  *  @section parallelization_decision 
00031  *  The decision whether to run an algorithm in parallel.
00032  *
00033  *  There are several ways the user can switch on and __off the parallel
00034  *  execution of an algorithm, both at compile- and run-time.
00035  *
00036  *  Only sequential execution can be forced at compile-time.  This
00037  *  reduces code size and protects code parts that have
00038  *  non-thread-safe side effects.
00039  *
00040  *  Ultimately, forcing parallel execution at compile-time makes
00041  *  sense.  Often, the sequential algorithm implementation is used as
00042  *  a subroutine, so no reduction in code size can be achieved.  Also,
00043  *  the machine the program is run on might have only one processor
00044  *  core, so to avoid overhead, the algorithm is executed
00045  *  sequentially.
00046  *
00047  *  To force sequential execution of an algorithm ultimately at
00048  *  compile-time, the user must add the tag
00049 *  gnu_parallel::sequential_tag() to the end of the parameter list,
00050  *  e. g.
00051  *
00052  *  \code
00053  *  std::sort(__v.begin(), __v.end(), __gnu_parallel::sequential_tag());
00054  *  \endcode
00055  *
00056  *  This is compatible with all overloaded algorithm variants.  No
00057  *  additional code will be instantiated, at all.  The same holds for
00058  *  most algorithm calls with iterators not providing random access.
00059  *
00060  *  If the algorithm call is not forced to be executed sequentially
00061  *  at compile-time, the decision is made at run-time.
00062  *  The global variable __gnu_parallel::_Settings::algorithm_strategy
00063  *  is checked. _It is a tristate variable corresponding to:
00064  *
00065  *  a. force_sequential, meaning the sequential algorithm is executed.
00066 *  b. force_parallel, meaning the parallel algorithm is executed.
00067 *  c. heuristic
00068  *
00069  *  For heuristic, the parallel algorithm implementation is called
00070  *  only if the input size is sufficiently large.  For most
00071  *  algorithms, the input size is the (combined) length of the input
00072 *  sequence(__s).  The threshold can be set by the user, individually
00073  *  for each algorithm.  The according variables are called
00074 *  gnu_parallel::_Settings::[algorithm]_minimal_n .
00075  *
00076  *  For some of the algorithms, there are even more tuning options,
00077  *  e. g. the ability to choose from multiple algorithm variants.  See
00078  *  below for details.
00079  */
00080 
00081 // Written by Johannes Singler and Felix Putze.
00082 
00083 #ifndef _GLIBCXX_PARALLEL_SETTINGS_H
00084 #define _GLIBCXX_PARALLEL_SETTINGS_H 1
00085 
00086 #include <parallel/types.h>
00087 
00088 /** 
00089   * @brief Determine at compile(?)-time if the parallel variant of an
00090   * algorithm should be called.
00091   * @param __c A condition that is convertible to bool that is overruled by
00092   * __gnu_parallel::_Settings::algorithm_strategy. Usually a decision
00093   * based on the input size.
00094   */
00095 #define _GLIBCXX_PARALLEL_CONDITION(__c) \
00096   (__gnu_parallel::_Settings::get().algorithm_strategy \
00097     != __gnu_parallel::force_sequential \
00098   && ((__gnu_parallel::__get_max_threads() > 1 && (__c)) \
00099      || __gnu_parallel::_Settings::get().algorithm_strategy \
00100         == __gnu_parallel::force_parallel))
00101 
00102 /*
00103 inline bool
00104 parallel_condition(bool __c)
00105 {
00106   bool ret = false;
00107   const _Settings& __s = _Settings::get();
00108   if (__s.algorithm_strategy != force_seqential)
00109     {
00110       if (__s.algorithm_strategy == force_parallel)
00111         ret = true;
00112       else
00113         ret = __get_max_threads() > 1 && __c;
00114     }
00115   return ret;
00116 }
00117 */
00118 
00119 namespace __gnu_parallel
00120 {
00121   /// class _Settings
00122   /// Run-time settings for the parallel mode including all tunable parameters.
00123   struct _Settings
00124   {
00125     _AlgorithmStrategy          algorithm_strategy;
00126     
00127     _SortAlgorithm              sort_algorithm;
00128     _PartialSumAlgorithm        partial_sum_algorithm;
00129     _MultiwayMergeAlgorithm     multiway_merge_algorithm;
00130     _FindAlgorithm              find_algorithm;
00131 
00132     _SplittingAlgorithm         sort_splitting;
00133     _SplittingAlgorithm         merge_splitting;
00134     _SplittingAlgorithm         multiway_merge_splitting;
00135 
00136     // Per-algorithm settings.
00137 
00138     /// Minimal input size for accumulate.
00139     _SequenceIndex              accumulate_minimal_n;
00140 
00141     /// Minimal input size for adjacent_difference.
00142     unsigned int                adjacent_difference_minimal_n;
00143 
00144     /// Minimal input size for count and count_if.
00145     _SequenceIndex              count_minimal_n;
00146 
00147     /// Minimal input size for fill.
00148     _SequenceIndex              fill_minimal_n;
00149 
00150     /// Block size increase factor for find.
00151     double                      find_increasing_factor;
00152 
00153     /// Initial block size for find.
00154     _SequenceIndex              find_initial_block_size;
00155 
00156     /// Maximal block size for find.
00157     _SequenceIndex              find_maximum_block_size;
00158 
00159     /// Start with looking for this many elements sequentially, for find.
00160     _SequenceIndex              find_sequential_search_size;
00161 
00162     /// Minimal input size for for_each.
00163     _SequenceIndex              for_each_minimal_n;
00164 
00165     /// Minimal input size for generate.
00166     _SequenceIndex              generate_minimal_n;
00167 
00168     /// Minimal input size for max_element.
00169     _SequenceIndex              max_element_minimal_n;
00170 
00171     /// Minimal input size for merge.
00172     _SequenceIndex              merge_minimal_n;
00173 
00174     /// Oversampling factor for merge.
00175     unsigned int                merge_oversampling;
00176 
00177     /// Minimal input size for min_element.
00178     _SequenceIndex              min_element_minimal_n;
00179 
00180     /// Minimal input size for multiway_merge.
00181     _SequenceIndex              multiway_merge_minimal_n;
00182 
00183     /// Oversampling factor for multiway_merge.
00184     int                         multiway_merge_minimal_k;
00185 
00186     /// Oversampling factor for multiway_merge.
00187     unsigned int                multiway_merge_oversampling;
00188 
00189     /// Minimal input size for nth_element.
00190     _SequenceIndex              nth_element_minimal_n;
00191 
00192     /// Chunk size for partition.
00193     _SequenceIndex              partition_chunk_size;
00194 
00195     /// Chunk size for partition, relative to input size.  If > 0.0,
00196     /// this value overrides partition_chunk_size.
00197     double                      partition_chunk_share;
00198 
00199     /// Minimal input size for partition.
00200     _SequenceIndex              partition_minimal_n;
00201 
00202     /// Minimal input size for partial_sort.
00203     _SequenceIndex              partial_sort_minimal_n;
00204 
00205     /// Ratio for partial_sum. Assume "sum and write result" to be
00206     /// this factor slower than just "sum".
00207     float                       partial_sum_dilation;
00208 
00209     /// Minimal input size for partial_sum.
00210     unsigned int                partial_sum_minimal_n;
00211 
00212     /// Minimal input size for random_shuffle.
00213     unsigned int                random_shuffle_minimal_n;
00214 
00215     /// Minimal input size for replace and replace_if.
00216     _SequenceIndex              replace_minimal_n;
00217 
00218     /// Minimal input size for set_difference.
00219     _SequenceIndex              set_difference_minimal_n;
00220 
00221     /// Minimal input size for set_intersection.
00222     _SequenceIndex              set_intersection_minimal_n;
00223 
00224     /// Minimal input size for set_symmetric_difference.
00225     _SequenceIndex              set_symmetric_difference_minimal_n;
00226 
00227     /// Minimal input size for set_union.
00228     _SequenceIndex              set_union_minimal_n;
00229 
00230     /// Minimal input size for parallel sorting.
00231     _SequenceIndex              sort_minimal_n;
00232 
00233     /// Oversampling factor for parallel std::sort (MWMS).
00234     unsigned int                sort_mwms_oversampling;
00235 
00236     /// Such many samples to take to find a good pivot (quicksort).
00237     unsigned int                sort_qs_num_samples_preset;
00238 
00239     /// Maximal subsequence __length to switch to unbalanced __base case.
00240     /// Applies to std::sort with dynamically load-balanced quicksort.
00241     _SequenceIndex              sort_qsb_base_case_maximal_n;
00242 
00243     /// Minimal input size for parallel std::transform.
00244     _SequenceIndex              transform_minimal_n;
00245 
00246     /// Minimal input size for unique_copy. 
00247     _SequenceIndex              unique_copy_minimal_n;
00248 
00249     _SequenceIndex              workstealing_chunk_size;
00250 
00251     // Hardware dependent tuning parameters.
00252 
00253     /// size of the L1 cache in bytes (underestimation).
00254     unsigned long long          L1_cache_size;
00255 
00256     /// size of the L2 cache in bytes (underestimation).
00257     unsigned long long          L2_cache_size;
00258 
00259     /// size of the Translation Lookaside Buffer (underestimation).
00260     unsigned int                TLB_size;
00261 
00262     /// Overestimation of cache line size.  Used to avoid false
00263     /// sharing, i.e. elements of different threads are at least this
00264     /// amount apart.
00265     unsigned int                cache_line_size;
00266 
00267     // Statistics.
00268 
00269     /// The number of stolen ranges in load-balanced quicksort.
00270     _SequenceIndex              qsb_steals;
00271 
00272     /// Minimal input size for search and search_n.
00273     _SequenceIndex              search_minimal_n;
00274 
00275     /// Block size scale-down factor with respect to current position.
00276     float                       find_scale_factor;
00277 
00278     /// Get the global settings.
00279     _GLIBCXX_CONST static const _Settings&
00280     get() throw();
00281 
00282     /// Set the global settings.
00283     static void
00284     set(_Settings&) throw();
00285 
00286     explicit 
00287     _Settings() :
00288             algorithm_strategy(heuristic),
00289             sort_algorithm(MWMS),
00290             partial_sum_algorithm(LINEAR),
00291             multiway_merge_algorithm(LOSER_TREE),
00292             find_algorithm(CONSTANT_SIZE_BLOCKS),
00293             sort_splitting(EXACT),
00294             merge_splitting(EXACT),
00295             multiway_merge_splitting(EXACT),
00296             accumulate_minimal_n(1000),
00297             adjacent_difference_minimal_n(1000),
00298             count_minimal_n(1000),
00299             fill_minimal_n(1000),
00300             find_increasing_factor(2.0),
00301             find_initial_block_size(256),
00302             find_maximum_block_size(8192),
00303             find_sequential_search_size(256),
00304             for_each_minimal_n(1000),
00305             generate_minimal_n(1000),
00306             max_element_minimal_n(1000),
00307             merge_minimal_n(1000),
00308             merge_oversampling(10),
00309             min_element_minimal_n(1000),
00310             multiway_merge_minimal_n(1000),
00311             multiway_merge_minimal_k(2), multiway_merge_oversampling(10),
00312             nth_element_minimal_n(1000),
00313             partition_chunk_size(1000),
00314             partition_chunk_share(0.0),
00315             partition_minimal_n(1000),
00316             partial_sort_minimal_n(1000),
00317             partial_sum_dilation(1.0f),
00318             partial_sum_minimal_n(1000),
00319             random_shuffle_minimal_n(1000),
00320             replace_minimal_n(1000),
00321             set_difference_minimal_n(1000),
00322             set_intersection_minimal_n(1000),
00323             set_symmetric_difference_minimal_n(1000),
00324             set_union_minimal_n(1000),
00325             sort_minimal_n(1000),
00326             sort_mwms_oversampling(10),
00327             sort_qs_num_samples_preset(100),
00328             sort_qsb_base_case_maximal_n(100),
00329             transform_minimal_n(1000),
00330             unique_copy_minimal_n(10000),
00331             workstealing_chunk_size(100),
00332             L1_cache_size(16 << 10),
00333             L2_cache_size(256 << 10),
00334             TLB_size(128),
00335             cache_line_size(64),
00336             qsb_steals(0),
00337             search_minimal_n(1000),
00338             find_scale_factor(0.01f)
00339     { }
00340   };
00341 }
00342 
00343 #endif /* _GLIBCXX_PARALLEL_SETTINGS_H */