libstdc++
|
00001 // -*- C++ -*- 00002 00003 // Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the terms 00007 // of the GNU General Public License as published by the Free Software 00008 // Foundation; either version 3, or (at your option) any later 00009 // version. 00010 00011 // This library is distributed in the hope that it will be useful, but 00012 // WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 // General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** @file parallel/multiway_mergesort.h 00026 * @brief Parallel multiway merge sort. 00027 * This file is a GNU parallel extension to the Standard C++ Library. 00028 */ 00029 00030 // Written by Johannes Singler. 00031 00032 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 00033 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1 00034 00035 #include <vector> 00036 00037 #include <parallel/basic_iterator.h> 00038 #include <bits/stl_algo.h> 00039 #include <parallel/parallel.h> 00040 #include <parallel/multiway_merge.h> 00041 00042 namespace __gnu_parallel 00043 { 00044 /** @brief Subsequence description. */ 00045 template<typename _DifferenceTp> 00046 struct _Piece 00047 { 00048 typedef _DifferenceTp _DifferenceType; 00049 00050 /** @brief Begin of subsequence. */ 00051 _DifferenceType _M_begin; 00052 00053 /** @brief End of subsequence. */ 00054 _DifferenceType _M_end; 00055 }; 00056 00057 /** @brief Data accessed by all threads. 00058 * 00059 * PMWMS = parallel multiway mergesort */ 00060 template<typename _RAIter> 00061 struct _PMWMSSortingData 00062 { 00063 typedef std::iterator_traits<_RAIter> _TraitsType; 00064 typedef typename _TraitsType::value_type _ValueType; 00065 typedef typename _TraitsType::difference_type _DifferenceType; 00066 00067 /** @brief Number of threads involved. */ 00068 _ThreadIndex _M_num_threads; 00069 00070 /** @brief Input __begin. */ 00071 _RAIter _M_source; 00072 00073 /** @brief Start indices, per thread. */ 00074 _DifferenceType* _M_starts; 00075 00076 /** @brief Storage in which to sort. */ 00077 _ValueType** _M_temporary; 00078 00079 /** @brief Samples. */ 00080 _ValueType* _M_samples; 00081 00082 /** @brief Offsets to add to the found positions. */ 00083 _DifferenceType* _M_offsets; 00084 00085 /** @brief Pieces of data to merge @c [thread][__sequence] */ 00086 std::vector<_Piece<_DifferenceType> >* _M_pieces; 00087 }; 00088 00089 /** 00090 * @brief Select _M_samples from a sequence. 00091 * @param __sd Pointer to algorithm data. _Result will be placed in 00092 * @c __sd->_M_samples. 00093 * @param __num_samples Number of _M_samples to select. 00094 */ 00095 template<typename _RAIter, typename _DifferenceTp> 00096 void 00097 __determine_samples(_PMWMSSortingData<_RAIter>* __sd, 00098 _DifferenceTp __num_samples) 00099 { 00100 typedef std::iterator_traits<_RAIter> _TraitsType; 00101 typedef typename _TraitsType::value_type _ValueType; 00102 typedef _DifferenceTp _DifferenceType; 00103 00104 _ThreadIndex __iam = omp_get_thread_num(); 00105 00106 _DifferenceType* __es = new _DifferenceType[__num_samples + 2]; 00107 00108 __equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam], 00109 __num_samples + 1, __es); 00110 00111 for (_DifferenceType __i = 0; __i < __num_samples; ++__i) 00112 ::new(&(__sd->_M_samples[__iam * __num_samples + __i])) 00113 _ValueType(__sd->_M_source[__sd->_M_starts[__iam] 00114 + __es[__i + 1]]); 00115 00116 delete[] __es; 00117 } 00118 00119 /** @brief Split consistently. */ 00120 template<bool __exact, typename _RAIter, 00121 typename _Compare, typename _SortingPlacesIterator> 00122 struct _SplitConsistently 00123 { }; 00124 00125 /** @brief Split by exact splitting. */ 00126 template<typename _RAIter, typename _Compare, 00127 typename _SortingPlacesIterator> 00128 struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator> 00129 { 00130 void 00131 operator()(const _ThreadIndex __iam, 00132 _PMWMSSortingData<_RAIter>* __sd, 00133 _Compare& __comp, 00134 const typename 00135 std::iterator_traits<_RAIter>::difference_type 00136 __num_samples) const 00137 { 00138 # pragma omp barrier 00139 00140 std::vector<std::pair<_SortingPlacesIterator, 00141 _SortingPlacesIterator> > 00142 __seqs(__sd->_M_num_threads); 00143 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) 00144 __seqs[__s] = std::make_pair(__sd->_M_temporary[__s], 00145 __sd->_M_temporary[__s] 00146 + (__sd->_M_starts[__s + 1] 00147 - __sd->_M_starts[__s])); 00148 00149 std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads); 00150 00151 // if not last thread 00152 if (__iam < __sd->_M_num_threads - 1) 00153 multiseq_partition(__seqs.begin(), __seqs.end(), 00154 __sd->_M_starts[__iam + 1], __offsets.begin(), 00155 __comp); 00156 00157 for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++) 00158 { 00159 // for each sequence 00160 if (__iam < (__sd->_M_num_threads - 1)) 00161 __sd->_M_pieces[__iam][__seq]._M_end 00162 = __offsets[__seq] - __seqs[__seq].first; 00163 else 00164 // very end of this sequence 00165 __sd->_M_pieces[__iam][__seq]._M_end = 00166 __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq]; 00167 } 00168 00169 # pragma omp barrier 00170 00171 for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++) 00172 { 00173 // For each sequence. 00174 if (__iam > 0) 00175 __sd->_M_pieces[__iam][__seq]._M_begin = 00176 __sd->_M_pieces[__iam - 1][__seq]._M_end; 00177 else 00178 // Absolute beginning. 00179 __sd->_M_pieces[__iam][__seq]._M_begin = 0; 00180 } 00181 } 00182 }; 00183 00184 /** @brief Split by sampling. */ 00185 template<typename _RAIter, typename _Compare, 00186 typename _SortingPlacesIterator> 00187 struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator> 00188 { 00189 void 00190 operator()(const _ThreadIndex __iam, 00191 _PMWMSSortingData<_RAIter>* __sd, 00192 _Compare& __comp, 00193 const typename 00194 std::iterator_traits<_RAIter>::difference_type 00195 __num_samples) const 00196 { 00197 typedef std::iterator_traits<_RAIter> _TraitsType; 00198 typedef typename _TraitsType::value_type _ValueType; 00199 typedef typename _TraitsType::difference_type _DifferenceType; 00200 00201 __determine_samples(__sd, __num_samples); 00202 00203 # pragma omp barrier 00204 00205 # pragma omp single 00206 __gnu_sequential::sort(__sd->_M_samples, 00207 __sd->_M_samples 00208 + (__num_samples * __sd->_M_num_threads), 00209 __comp); 00210 00211 # pragma omp barrier 00212 00213 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s) 00214 { 00215 // For each sequence. 00216 if (__num_samples * __iam > 0) 00217 __sd->_M_pieces[__iam][__s]._M_begin = 00218 std::lower_bound(__sd->_M_temporary[__s], 00219 __sd->_M_temporary[__s] 00220 + (__sd->_M_starts[__s + 1] 00221 - __sd->_M_starts[__s]), 00222 __sd->_M_samples[__num_samples * __iam], 00223 __comp) 00224 - __sd->_M_temporary[__s]; 00225 else 00226 // Absolute beginning. 00227 __sd->_M_pieces[__iam][__s]._M_begin = 0; 00228 00229 if ((__num_samples * (__iam + 1)) < 00230 (__num_samples * __sd->_M_num_threads)) 00231 __sd->_M_pieces[__iam][__s]._M_end = 00232 std::lower_bound(__sd->_M_temporary[__s], 00233 __sd->_M_temporary[__s] 00234 + (__sd->_M_starts[__s + 1] 00235 - __sd->_M_starts[__s]), 00236 __sd->_M_samples[__num_samples * (__iam + 1)], 00237 __comp) 00238 - __sd->_M_temporary[__s]; 00239 else 00240 // Absolute end. 00241 __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1] 00242 - __sd->_M_starts[__s]); 00243 } 00244 } 00245 }; 00246 00247 template<bool __stable, typename _RAIter, typename _Compare> 00248 struct __possibly_stable_sort 00249 { }; 00250 00251 template<typename _RAIter, typename _Compare> 00252 struct __possibly_stable_sort<true, _RAIter, _Compare> 00253 { 00254 void operator()(const _RAIter& __begin, 00255 const _RAIter& __end, _Compare& __comp) const 00256 { __gnu_sequential::stable_sort(__begin, __end, __comp); } 00257 }; 00258 00259 template<typename _RAIter, typename _Compare> 00260 struct __possibly_stable_sort<false, _RAIter, _Compare> 00261 { 00262 void operator()(const _RAIter __begin, 00263 const _RAIter __end, _Compare& __comp) const 00264 { __gnu_sequential::sort(__begin, __end, __comp); } 00265 }; 00266 00267 template<bool __stable, typename Seq_RAIter, 00268 typename _RAIter, typename _Compare, 00269 typename DiffType> 00270 struct __possibly_stable_multiway_merge 00271 { }; 00272 00273 template<typename Seq_RAIter, typename _RAIter, 00274 typename _Compare, typename _DiffType> 00275 struct __possibly_stable_multiway_merge<true, Seq_RAIter, 00276 _RAIter, _Compare, _DiffType> 00277 { 00278 void operator()(const Seq_RAIter& __seqs_begin, 00279 const Seq_RAIter& __seqs_end, 00280 const _RAIter& __target, 00281 _Compare& __comp, 00282 _DiffType __length_am) const 00283 { stable_multiway_merge(__seqs_begin, __seqs_end, __target, 00284 __length_am, __comp, sequential_tag()); } 00285 }; 00286 00287 template<typename Seq_RAIter, typename _RAIter, 00288 typename _Compare, typename _DiffType> 00289 struct __possibly_stable_multiway_merge<false, Seq_RAIter, 00290 _RAIter, _Compare, _DiffType> 00291 { 00292 void operator()(const Seq_RAIter& __seqs_begin, 00293 const Seq_RAIter& __seqs_end, 00294 const _RAIter& __target, 00295 _Compare& __comp, 00296 _DiffType __length_am) const 00297 { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, 00298 __comp, sequential_tag()); } 00299 }; 00300 00301 /** @brief PMWMS code executed by each thread. 00302 * @param __sd Pointer to algorithm data. 00303 * @param __comp Comparator. 00304 */ 00305 template<bool __stable, bool __exact, typename _RAIter, 00306 typename _Compare> 00307 void 00308 parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd, 00309 _Compare& __comp) 00310 { 00311 typedef std::iterator_traits<_RAIter> _TraitsType; 00312 typedef typename _TraitsType::value_type _ValueType; 00313 typedef typename _TraitsType::difference_type _DifferenceType; 00314 00315 _ThreadIndex __iam = omp_get_thread_num(); 00316 00317 // Length of this thread's chunk, before merging. 00318 _DifferenceType __length_local = 00319 __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam]; 00320 00321 // Sort in temporary storage, leave space for sentinel. 00322 00323 typedef _ValueType* _SortingPlacesIterator; 00324 00325 __sd->_M_temporary[__iam] = 00326 static_cast<_ValueType*>(::operator new(sizeof(_ValueType) 00327 * (__length_local + 1))); 00328 00329 // Copy there. 00330 std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam], 00331 __sd->_M_source + __sd->_M_starts[__iam] 00332 + __length_local, 00333 __sd->_M_temporary[__iam]); 00334 00335 __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>() 00336 (__sd->_M_temporary[__iam], 00337 __sd->_M_temporary[__iam] + __length_local, 00338 __comp); 00339 00340 // Invariant: locally sorted subsequence in sd->_M_temporary[__iam], 00341 // __sd->_M_temporary[__iam] + __length_local. 00342 00343 // No barrier here: Synchronization is done by the splitting routine. 00344 00345 _DifferenceType __num_samples = 00346 _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1; 00347 _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>() 00348 (__iam, __sd, __comp, __num_samples); 00349 00350 // Offset from __target __begin, __length after merging. 00351 _DifferenceType __offset = 0, __length_am = 0; 00352 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) 00353 { 00354 __length_am += (__sd->_M_pieces[__iam][__s]._M_end 00355 - __sd->_M_pieces[__iam][__s]._M_begin); 00356 __offset += __sd->_M_pieces[__iam][__s]._M_begin; 00357 } 00358 00359 typedef std::vector< 00360 std::pair<_SortingPlacesIterator, _SortingPlacesIterator> > 00361 _SeqVector; 00362 _SeqVector __seqs(__sd->_M_num_threads); 00363 00364 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s) 00365 { 00366 __seqs[__s] = 00367 std::make_pair(__sd->_M_temporary[__s] 00368 + __sd->_M_pieces[__iam][__s]._M_begin, 00369 __sd->_M_temporary[__s] 00370 + __sd->_M_pieces[__iam][__s]._M_end); 00371 } 00372 00373 __possibly_stable_multiway_merge< 00374 __stable, typename _SeqVector::iterator, 00375 _RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(), 00376 __sd->_M_source + __offset, __comp, 00377 __length_am); 00378 00379 # pragma omp barrier 00380 00381 for (_DifferenceType __i = 0; __i < __length_local; ++__i) 00382 __sd->_M_temporary[__iam][__i].~_ValueType(); 00383 ::operator delete(__sd->_M_temporary[__iam]); 00384 } 00385 00386 /** @brief PMWMS main call. 00387 * @param __begin Begin iterator of sequence. 00388 * @param __end End iterator of sequence. 00389 * @param __comp Comparator. 00390 * @param __num_threads Number of threads to use. 00391 */ 00392 template<bool __stable, bool __exact, typename _RAIter, 00393 typename _Compare> 00394 void 00395 parallel_sort_mwms(_RAIter __begin, _RAIter __end, 00396 _Compare __comp, 00397 _ThreadIndex __num_threads) 00398 { 00399 _GLIBCXX_CALL(__end - __begin) 00400 00401 typedef std::iterator_traits<_RAIter> _TraitsType; 00402 typedef typename _TraitsType::value_type _ValueType; 00403 typedef typename _TraitsType::difference_type _DifferenceType; 00404 00405 _DifferenceType __n = __end - __begin; 00406 00407 if (__n <= 1) 00408 return; 00409 00410 // at least one element per thread 00411 if (__num_threads > __n) 00412 __num_threads = static_cast<_ThreadIndex>(__n); 00413 00414 // shared variables 00415 _PMWMSSortingData<_RAIter> __sd; 00416 _DifferenceType* __starts; 00417 _DifferenceType __size; 00418 00419 # pragma omp parallel num_threads(__num_threads) 00420 { 00421 __num_threads = omp_get_num_threads(); //no more threads than requested 00422 00423 # pragma omp single 00424 { 00425 __sd._M_num_threads = __num_threads; 00426 __sd._M_source = __begin; 00427 00428 __sd._M_temporary = new _ValueType*[__num_threads]; 00429 00430 if (!__exact) 00431 { 00432 __size = 00433 (_Settings::get().sort_mwms_oversampling * __num_threads - 1) 00434 * __num_threads; 00435 __sd._M_samples = static_cast<_ValueType*> 00436 (::operator new(__size * sizeof(_ValueType))); 00437 } 00438 else 00439 __sd._M_samples = 0; 00440 00441 __sd._M_offsets = new _DifferenceType[__num_threads - 1]; 00442 __sd._M_pieces 00443 = new std::vector<_Piece<_DifferenceType> >[__num_threads]; 00444 for (_ThreadIndex __s = 0; __s < __num_threads; ++__s) 00445 __sd._M_pieces[__s].resize(__num_threads); 00446 __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1]; 00447 00448 _DifferenceType __chunk_length = __n / __num_threads; 00449 _DifferenceType __split = __n % __num_threads; 00450 _DifferenceType __pos = 0; 00451 for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) 00452 { 00453 __starts[__i] = __pos; 00454 __pos += ((__i < __split) 00455 ? (__chunk_length + 1) : __chunk_length); 00456 } 00457 __starts[__num_threads] = __pos; 00458 } //single 00459 00460 // Now sort in parallel. 00461 parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp); 00462 } //parallel 00463 00464 delete[] __starts; 00465 delete[] __sd._M_temporary; 00466 00467 if (!__exact) 00468 { 00469 for (_DifferenceType __i = 0; __i < __size; ++__i) 00470 __sd._M_samples[__i].~_ValueType(); 00471 ::operator delete(__sd._M_samples); 00472 } 00473 00474 delete[] __sd._M_offsets; 00475 delete[] __sd._M_pieces; 00476 } 00477 00478 } //namespace __gnu_parallel 00479 00480 #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */