Bullet Collision Detection & Physics Library
btThreads.cpp
Go to the documentation of this file.
1 /*
2 Copyright (c) 2003-2014 Erwin Coumans http://bullet.googlecode.com
3 
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9 
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 
15 
16 #include "btThreads.h"
17 #include "btQuickprof.h"
18 #include <algorithm> // for min and max
19 
20 
21 #if BT_USE_OPENMP && BT_THREADSAFE
22 
23 #include <omp.h>
24 
25 #endif // #if BT_USE_OPENMP && BT_THREADSAFE
26 
27 
28 #if BT_USE_PPL && BT_THREADSAFE
29 
30 // use Microsoft Parallel Patterns Library (installed with Visual Studio 2010 and later)
31 #include <ppl.h> // if you get a compile error here, check whether your version of Visual Studio includes PPL
32 // Visual Studio 2010 and later should come with it
33 #include <concrtrm.h> // for GetProcessorCount()
34 
35 #endif // #if BT_USE_PPL && BT_THREADSAFE
36 
37 
38 #if BT_USE_TBB && BT_THREADSAFE
39 
40 // use Intel Threading Building Blocks for thread management
41 #define __TBB_NO_IMPLICIT_LINKAGE 1
42 #include <tbb/tbb.h>
43 #include <tbb/task_scheduler_init.h>
44 #include <tbb/parallel_for.h>
45 #include <tbb/blocked_range.h>
46 
47 #endif // #if BT_USE_TBB && BT_THREADSAFE
48 
49 
50 #if BT_THREADSAFE
51 //
52 // Lightweight spin-mutex based on atomics
53 // Using ordinary system-provided mutexes like Windows critical sections was noticeably slower
54 // presumably because when it fails to lock at first it would sleep the thread and trigger costly
55 // context switching.
56 //
57 
58 #if __cplusplus >= 201103L
59 
60 // for anything claiming full C++11 compliance, use C++11 atomics
61 // on GCC or Clang you need to compile with -std=c++11
62 #define USE_CPP11_ATOMICS 1
63 
64 #elif defined( _MSC_VER )
65 
66 // on MSVC, use intrinsics instead
67 #define USE_MSVC_INTRINSICS 1
68 
69 #elif defined( __GNUC__ ) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
70 
71 // available since GCC 4.7 and some versions of clang
72 // todo: check for clang
73 #define USE_GCC_BUILTIN_ATOMICS 1
74 
75 #elif defined( __GNUC__ ) && (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
76 
77 // available since GCC 4.1
78 #define USE_GCC_BUILTIN_ATOMICS_OLD 1
79 
80 #endif
81 
82 
83 #if USE_CPP11_ATOMICS
84 
85 #include <atomic>
86 #include <thread>
87 
88 #define THREAD_LOCAL_STATIC thread_local static
89 
91 {
92  std::atomic<int>* aDest = reinterpret_cast<std::atomic<int>*>(&mLock);
93  int expected = 0;
94  return std::atomic_compare_exchange_weak_explicit( aDest, &expected, int(1), std::memory_order_acq_rel, std::memory_order_acquire );
95 }
96 
97 void btSpinMutex::lock()
98 {
99  // note: this lock does not sleep the thread.
100  while (! tryLock())
101  {
102  // spin
103  }
104 }
105 
106 void btSpinMutex::unlock()
107 {
108  std::atomic<int>* aDest = reinterpret_cast<std::atomic<int>*>(&mLock);
109  std::atomic_store_explicit( aDest, int(0), std::memory_order_release );
110 }
111 
112 
113 #elif USE_MSVC_INTRINSICS
114 
115 #define WIN32_LEAN_AND_MEAN
116 
117 #include <windows.h>
118 #include <intrin.h>
119 
120 #define THREAD_LOCAL_STATIC __declspec( thread ) static
121 
122 
124 {
125  volatile long* aDest = reinterpret_cast<long*>(&mLock);
126  return ( 0 == _InterlockedCompareExchange( aDest, 1, 0) );
127 }
128 
129 void btSpinMutex::lock()
130 {
131  // note: this lock does not sleep the thread
132  while (! tryLock())
133  {
134  // spin
135  }
136 }
137 
138 void btSpinMutex::unlock()
139 {
140  volatile long* aDest = reinterpret_cast<long*>( &mLock );
141  _InterlockedExchange( aDest, 0 );
142 }
143 
144 #elif USE_GCC_BUILTIN_ATOMICS
145 
146 #define THREAD_LOCAL_STATIC static __thread
147 
148 
150 {
151  int expected = 0;
152  bool weak = false;
153  const int memOrderSuccess = __ATOMIC_ACQ_REL;
154  const int memOrderFail = __ATOMIC_ACQUIRE;
155  return __atomic_compare_exchange_n(&mLock, &expected, int(1), weak, memOrderSuccess, memOrderFail);
156 }
157 
158 void btSpinMutex::lock()
159 {
160  // note: this lock does not sleep the thread
161  while (! tryLock())
162  {
163  // spin
164  }
165 }
166 
167 void btSpinMutex::unlock()
168 {
169  __atomic_store_n(&mLock, int(0), __ATOMIC_RELEASE);
170 }
171 
172 #elif USE_GCC_BUILTIN_ATOMICS_OLD
173 
174 
175 #define THREAD_LOCAL_STATIC static __thread
176 
178 {
179  return __sync_bool_compare_and_swap(&mLock, int(0), int(1));
180 }
181 
182 void btSpinMutex::lock()
183 {
184  // note: this lock does not sleep the thread
185  while (! tryLock())
186  {
187  // spin
188  }
189 }
190 
191 void btSpinMutex::unlock()
192 {
193  // write 0
194  __sync_fetch_and_and(&mLock, int(0));
195 }
196 
197 #else //#elif USE_MSVC_INTRINSICS
198 
199 #error "no threading primitives defined -- unknown platform"
200 
201 #endif //#else //#elif USE_MSVC_INTRINSICS
202 
203 #else //#if BT_THREADSAFE
204 
205 // These should not be called ever
207 {
208  btAssert( !"unimplemented btSpinMutex::lock() called" );
209 }
210 
212 {
213  btAssert( !"unimplemented btSpinMutex::unlock() called" );
214 }
215 
217 {
218  btAssert( !"unimplemented btSpinMutex::tryLock() called" );
219  return true;
220 }
221 
222 #define THREAD_LOCAL_STATIC static
223 
224 #endif // #else //#if BT_THREADSAFE
225 
226 
228 {
229  unsigned int mCounter;
231 
233  {
234  mCounter = 0;
235  --mCounter; // first count should come back 0
236  }
237 
238  unsigned int getNext()
239  {
240  // no need to optimize this with atomics, it is only called ONCE per thread!
241  mMutex.lock();
242  mCounter++;
243  if ( mCounter >= BT_MAX_THREAD_COUNT )
244  {
245  btAssert( !"thread counter exceeded" );
246  // wrap back to the first worker index
247  mCounter = 1;
248  }
249  unsigned int val = mCounter;
250  mMutex.unlock();
251  return val;
252  }
253 };
254 
255 
257 static int gThreadsRunningCounter = 0; // useful for detecting if we are trying to do nested parallel-for calls
260 
261 
262 //
263 // BT_DETECT_BAD_THREAD_INDEX tries to detect when there are multiple threads assigned the same thread index.
264 //
265 // BT_DETECT_BAD_THREAD_INDEX is a developer option to test if
266 // certain assumptions about how the task scheduler manages its threads
267 // holds true.
268 // The main assumption is:
269 // - when the threadpool is resized, the task scheduler either
270 // 1. destroys all worker threads and creates all new ones in the correct number, OR
271 // 2. never destroys a worker thread
272 //
273 // We make that assumption because we can't easily enumerate the worker threads of a task scheduler
274 // to assign nice sequential thread-indexes. We also do not get notified if a worker thread is destroyed,
275 // so we can't tell when a thread-index is no longer being used.
276 // We allocate thread-indexes as needed with a sequential global thread counter.
277 //
278 // Our simple thread-counting scheme falls apart if the task scheduler destroys some threads but
279 // continues to re-use other threads and the application repeatedly resizes the thread pool of the
280 // task scheduler.
281 // In order to prevent the thread-counter from exceeding the global max (BT_MAX_THREAD_COUNT), we
282 // wrap the thread counter back to 1. This should only happen if the worker threads have all been
283 // destroyed and re-created.
284 //
285 // BT_DETECT_BAD_THREAD_INDEX only works for Win32 right now,
286 // but could be adapted to work with pthreads
287 #define BT_DETECT_BAD_THREAD_INDEX 0
288 
289 #if BT_DETECT_BAD_THREAD_INDEX
290 
291 typedef DWORD ThreadId_t;
292 const static ThreadId_t kInvalidThreadId = 0;
293 ThreadId_t gDebugThreadIds[ BT_MAX_THREAD_COUNT ];
294 
295 static ThreadId_t getDebugThreadId()
296 {
297  return GetCurrentThreadId();
298 }
299 
300 #endif // #if BT_DETECT_BAD_THREAD_INDEX
301 
302 
303 // return a unique index per thread, main thread is 0, worker threads are in [1, BT_MAX_THREAD_COUNT)
305 {
306  const unsigned int kNullIndex = ~0U;
307  THREAD_LOCAL_STATIC unsigned int sThreadIndex = kNullIndex;
308  if ( sThreadIndex == kNullIndex )
309  {
310  sThreadIndex = gThreadCounter.getNext();
311  btAssert( sThreadIndex < BT_MAX_THREAD_COUNT );
312  }
313 #if BT_DETECT_BAD_THREAD_INDEX
314  if ( gBtTaskScheduler && sThreadIndex > 0 )
315  {
316  ThreadId_t tid = getDebugThreadId();
317  // if not set
318  if ( gDebugThreadIds[ sThreadIndex ] == kInvalidThreadId )
319  {
320  // set it
321  gDebugThreadIds[ sThreadIndex ] = tid;
322  }
323  else
324  {
325  if ( gDebugThreadIds[ sThreadIndex ] != tid )
326  {
327  // this could indicate the task scheduler is breaking our assumptions about
328  // how threads are managed when threadpool is resized
329  btAssert( !"there are 2 or more threads with the same thread-index!" );
330  __debugbreak();
331  }
332  }
333  }
334 #endif // #if BT_DETECT_BAD_THREAD_INDEX
335  return sThreadIndex;
336 }
337 
339 {
340  return btGetCurrentThreadIndex() == 0;
341 }
342 
344 {
345  // for when all current worker threads are destroyed
347  gThreadCounter.mCounter = 0;
348 }
349 
351 {
352  m_name = name;
353  m_savedThreadCounter = 0;
354  m_isActive = false;
355 }
356 
358 {
359  // gThreadCounter is used to assign a thread-index to each worker thread in a task scheduler.
360  // The main thread is always thread-index 0, and worker threads are numbered from 1 to 63 (BT_MAX_THREAD_COUNT-1)
361  // The thread-indexes need to be unique amongst the threads that can be running simultaneously.
362  // Since only one task scheduler can be used at a time, it is OK for a pair of threads that belong to different
363  // task schedulers to share the same thread index because they can't be running at the same time.
364  // So each task scheduler needs to keep its own thread counter value
365  if ( !m_isActive )
366  {
367  gThreadCounter.mCounter = m_savedThreadCounter; // restore saved thread counter
368  m_isActive = true;
369  }
370 }
371 
373 {
374  if ( m_isActive )
375  {
376  m_savedThreadCounter = gThreadCounter.mCounter; // save thread counter
377  m_isActive = false;
378  }
379 }
380 
382 {
383  gThreadsRunningCounterMutex.lock();
384  gThreadsRunningCounter++;
385  gThreadsRunningCounterMutex.unlock();
386 }
387 
389 {
390  gThreadsRunningCounterMutex.lock();
391  gThreadsRunningCounter--;
392  gThreadsRunningCounterMutex.unlock();
393 }
394 
396 {
397  return gThreadsRunningCounter != 0;
398 }
399 
400 
402 {
403  int threadId = btGetCurrentThreadIndex(); // make sure we call this on main thread at least once before any workers run
404  if ( threadId != 0 )
405  {
406  btAssert( !"btSetTaskScheduler must be called from the main thread!" );
407  return;
408  }
409  if ( gBtTaskScheduler )
410  {
411  // deactivate old task scheduler
412  gBtTaskScheduler->deactivate();
413  }
414  gBtTaskScheduler = ts;
415  if ( ts )
416  {
417  // activate new task scheduler
418  ts->activate();
419  }
420 }
421 
422 
424 {
425  return gBtTaskScheduler;
426 }
427 
428 
429 void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body )
430 {
431 #if BT_THREADSAFE
432 
433 #if BT_DETECT_BAD_THREAD_INDEX
434  if ( !btThreadsAreRunning() )
435  {
436  // clear out thread ids
437  for ( int i = 0; i < BT_MAX_THREAD_COUNT; ++i )
438  {
439  gDebugThreadIds[ i ] = kInvalidThreadId;
440  }
441  }
442 #endif // #if BT_DETECT_BAD_THREAD_INDEX
443 
444  btAssert( gBtTaskScheduler != NULL ); // call btSetTaskScheduler() with a valid task scheduler first!
445  gBtTaskScheduler->parallelFor( iBegin, iEnd, grainSize, body );
446 
447 #else // #if BT_THREADSAFE
448 
449  // non-parallel version of btParallelFor
450  btAssert( !"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE" );
451  body.forLoop( iBegin, iEnd );
452 
453 #endif// #if BT_THREADSAFE
454 }
455 
456 btScalar btParallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body )
457 {
458 #if BT_THREADSAFE
459 
460 #if BT_DETECT_BAD_THREAD_INDEX
461  if ( !btThreadsAreRunning() )
462  {
463  // clear out thread ids
464  for ( int i = 0; i < BT_MAX_THREAD_COUNT; ++i )
465  {
466  gDebugThreadIds[ i ] = kInvalidThreadId;
467  }
468  }
469 #endif // #if BT_DETECT_BAD_THREAD_INDEX
470 
471  btAssert( gBtTaskScheduler != NULL ); // call btSetTaskScheduler() with a valid task scheduler first!
472  return gBtTaskScheduler->parallelSum( iBegin, iEnd, grainSize, body );
473 
474 #else // #if BT_THREADSAFE
475 
476  // non-parallel version of btParallelSum
477  btAssert( !"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE" );
478  return body.sumLoop( iBegin, iEnd );
479 
480 #endif //#else // #if BT_THREADSAFE
481 }
482 
483 
489 {
490 public:
492  virtual int getMaxNumThreads() const BT_OVERRIDE { return 1; }
493  virtual int getNumThreads() const BT_OVERRIDE { return 1; }
494  virtual void setNumThreads( int numThreads ) BT_OVERRIDE {}
495  virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
496  {
497  BT_PROFILE( "parallelFor_sequential" );
498  body.forLoop( iBegin, iEnd );
499  }
500  virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
501  {
502  BT_PROFILE( "parallelSum_sequential" );
503  return body.sumLoop( iBegin, iEnd );
504  }
505 };
506 
507 
508 #if BT_USE_OPENMP && BT_THREADSAFE
509 class btTaskSchedulerOpenMP : public btITaskScheduler
513 {
514  int m_numThreads;
515 public:
516  btTaskSchedulerOpenMP() : btITaskScheduler( "OpenMP" )
517  {
518  m_numThreads = 0;
519  }
520  virtual int getMaxNumThreads() const BT_OVERRIDE
521  {
522  return omp_get_max_threads();
523  }
524  virtual int getNumThreads() const BT_OVERRIDE
525  {
526  return m_numThreads;
527  }
528  virtual void setNumThreads( int numThreads ) BT_OVERRIDE
529  {
530  // With OpenMP, because it is a standard with various implementations, we can't
531  // know for sure if every implementation has the same behavior of destroying all
532  // previous threads when resizing the threadpool
533  m_numThreads = ( std::max )( 1, ( std::min )( int( BT_MAX_THREAD_COUNT ), numThreads ) );
534  omp_set_num_threads( 1 ); // hopefully, all previous threads get destroyed here
535  omp_set_num_threads( m_numThreads );
536  m_savedThreadCounter = 0;
537  if ( m_isActive )
538  {
540  }
541  }
542  virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
543  {
544  BT_PROFILE( "parallelFor_OpenMP" );
546 #pragma omp parallel for schedule( static, 1 )
547  for ( int i = iBegin; i < iEnd; i += grainSize )
548  {
549  BT_PROFILE( "OpenMP_forJob" );
550  body.forLoop( i, ( std::min )( i + grainSize, iEnd ) );
551  }
553  }
554  virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
555  {
556  BT_PROFILE( "parallelFor_OpenMP" );
558  btScalar sum = btScalar( 0 );
559 #pragma omp parallel for schedule( static, 1 ) reduction(+:sum)
560  for ( int i = iBegin; i < iEnd; i += grainSize )
561  {
562  BT_PROFILE( "OpenMP_sumJob" );
563  sum += body.sumLoop( i, ( std::min )( i + grainSize, iEnd ) );
564  }
566  return sum;
567  }
568 };
569 #endif // #if BT_USE_OPENMP && BT_THREADSAFE
570 
571 
572 #if BT_USE_TBB && BT_THREADSAFE
573 class btTaskSchedulerTBB : public btITaskScheduler
577 {
578  int m_numThreads;
579  tbb::task_scheduler_init* m_tbbSchedulerInit;
580 
581 public:
582  btTaskSchedulerTBB() : btITaskScheduler( "IntelTBB" )
583  {
584  m_numThreads = 0;
585  m_tbbSchedulerInit = NULL;
586  }
587  ~btTaskSchedulerTBB()
588  {
589  if ( m_tbbSchedulerInit )
590  {
591  delete m_tbbSchedulerInit;
592  m_tbbSchedulerInit = NULL;
593  }
594  }
595 
596  virtual int getMaxNumThreads() const BT_OVERRIDE
597  {
598  return tbb::task_scheduler_init::default_num_threads();
599  }
600  virtual int getNumThreads() const BT_OVERRIDE
601  {
602  return m_numThreads;
603  }
604  virtual void setNumThreads( int numThreads ) BT_OVERRIDE
605  {
606  m_numThreads = ( std::max )( 1, ( std::min )( int(BT_MAX_THREAD_COUNT), numThreads ) );
607  if ( m_tbbSchedulerInit )
608  {
609  // destroys all previous threads
610  delete m_tbbSchedulerInit;
611  m_tbbSchedulerInit = NULL;
612  }
613  m_tbbSchedulerInit = new tbb::task_scheduler_init( m_numThreads );
614  m_savedThreadCounter = 0;
615  if ( m_isActive )
616  {
618  }
619  }
620  struct ForBodyAdapter
621  {
622  const btIParallelForBody* mBody;
623 
624  ForBodyAdapter( const btIParallelForBody* body ) : mBody( body ) {}
625  void operator()( const tbb::blocked_range<int>& range ) const
626  {
627  BT_PROFILE( "TBB_forJob" );
628  mBody->forLoop( range.begin(), range.end() );
629  }
630  };
631  virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
632  {
633  BT_PROFILE( "parallelFor_TBB" );
634  ForBodyAdapter tbbBody( &body );
636  tbb::parallel_for( tbb::blocked_range<int>( iBegin, iEnd, grainSize ),
637  tbbBody,
638  tbb::simple_partitioner()
639  );
641  }
642  struct SumBodyAdapter
643  {
644  const btIParallelSumBody* mBody;
645  btScalar mSum;
646 
647  SumBodyAdapter( const btIParallelSumBody* body ) : mBody( body ), mSum( btScalar( 0 ) ) {}
648  SumBodyAdapter( const SumBodyAdapter& src, tbb::split ) : mBody( src.mBody ), mSum( btScalar( 0 ) ) {}
649  void join( const SumBodyAdapter& src ) { mSum += src.mSum; }
650  void operator()( const tbb::blocked_range<int>& range )
651  {
652  BT_PROFILE( "TBB_sumJob" );
653  mSum += mBody->sumLoop( range.begin(), range.end() );
654  }
655  };
656  virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
657  {
658  BT_PROFILE( "parallelSum_TBB" );
659  SumBodyAdapter tbbBody( &body );
661  tbb::parallel_deterministic_reduce( tbb::blocked_range<int>( iBegin, iEnd, grainSize ), tbbBody );
663  return tbbBody.mSum;
664  }
665 };
666 #endif // #if BT_USE_TBB && BT_THREADSAFE
667 
668 
669 #if BT_USE_PPL && BT_THREADSAFE
670 class btTaskSchedulerPPL : public btITaskScheduler
674 {
675  int m_numThreads;
676  concurrency::combinable<btScalar> m_sum; // for parallelSum
677 public:
678  btTaskSchedulerPPL() : btITaskScheduler( "PPL" )
679  {
680  m_numThreads = 0;
681  }
682  virtual int getMaxNumThreads() const BT_OVERRIDE
683  {
684  return concurrency::GetProcessorCount();
685  }
686  virtual int getNumThreads() const BT_OVERRIDE
687  {
688  return m_numThreads;
689  }
690  virtual void setNumThreads( int numThreads ) BT_OVERRIDE
691  {
692  // capping the thread count for PPL due to a thread-index issue
693  const int maxThreadCount = (std::min)(int(BT_MAX_THREAD_COUNT), 31);
694  m_numThreads = ( std::max )( 1, ( std::min )( maxThreadCount, numThreads ) );
695  using namespace concurrency;
696  if ( CurrentScheduler::Id() != -1 )
697  {
698  CurrentScheduler::Detach();
699  }
700  SchedulerPolicy policy;
701  {
702  // PPL seems to destroy threads when threadpool is shrunk, but keeps reusing old threads
703  // force it to destroy old threads
704  policy.SetConcurrencyLimits( 1, 1 );
705  CurrentScheduler::Create( policy );
706  CurrentScheduler::Detach();
707  }
708  policy.SetConcurrencyLimits( m_numThreads, m_numThreads );
709  CurrentScheduler::Create( policy );
710  m_savedThreadCounter = 0;
711  if ( m_isActive )
712  {
714  }
715  }
716  struct ForBodyAdapter
717  {
718  const btIParallelForBody* mBody;
719  int mGrainSize;
720  int mIndexEnd;
721 
722  ForBodyAdapter( const btIParallelForBody* body, int grainSize, int end ) : mBody( body ), mGrainSize( grainSize ), mIndexEnd( end ) {}
723  void operator()( int i ) const
724  {
725  BT_PROFILE( "PPL_forJob" );
726  mBody->forLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
727  }
728  };
729  virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
730  {
731  BT_PROFILE( "parallelFor_PPL" );
732  // PPL dispatch
733  ForBodyAdapter pplBody( &body, grainSize, iEnd );
735  // note: MSVC 2010 doesn't support partitioner args, so avoid them
736  concurrency::parallel_for( iBegin,
737  iEnd,
738  grainSize,
739  pplBody
740  );
742  }
743  struct SumBodyAdapter
744  {
745  const btIParallelSumBody* mBody;
746  concurrency::combinable<btScalar>* mSum;
747  int mGrainSize;
748  int mIndexEnd;
749 
750  SumBodyAdapter( const btIParallelSumBody* body, concurrency::combinable<btScalar>* sum, int grainSize, int end ) : mBody( body ), mSum(sum), mGrainSize( grainSize ), mIndexEnd( end ) {}
751  void operator()( int i ) const
752  {
753  BT_PROFILE( "PPL_sumJob" );
754  mSum->local() += mBody->sumLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
755  }
756  };
757  static btScalar sumFunc( btScalar a, btScalar b ) { return a + b; }
758  virtual btScalar parallelSum( int iBegin, int iEnd, int grainSize, const btIParallelSumBody& body ) BT_OVERRIDE
759  {
760  BT_PROFILE( "parallelSum_PPL" );
761  m_sum.clear();
762  SumBodyAdapter pplBody( &body, &m_sum, grainSize, iEnd );
764  // note: MSVC 2010 doesn't support partitioner args, so avoid them
765  concurrency::parallel_for( iBegin,
766  iEnd,
767  grainSize,
768  pplBody
769  );
771  return m_sum.combine( sumFunc );
772  }
773 };
774 #endif // #if BT_USE_PPL && BT_THREADSAFE
775 
776 
777 // create a non-threaded task scheduler (always available)
779 {
780  static btTaskSchedulerSequential sTaskScheduler;
781  return &sTaskScheduler;
782 }
783 
784 
785 // create an OpenMP task scheduler (if available, otherwise returns null)
787 {
788 #if BT_USE_OPENMP && BT_THREADSAFE
789  static btTaskSchedulerOpenMP sTaskScheduler;
790  return &sTaskScheduler;
791 #else
792  return NULL;
793 #endif
794 }
795 
796 
797 // create an Intel TBB task scheduler (if available, otherwise returns null)
799 {
800 #if BT_USE_TBB && BT_THREADSAFE
801  static btTaskSchedulerTBB sTaskScheduler;
802  return &sTaskScheduler;
803 #else
804  return NULL;
805 #endif
806 }
807 
808 
809 // create a PPL task scheduler (if available, otherwise returns null)
811 {
812 #if BT_USE_PPL && BT_THREADSAFE
813  static btTaskSchedulerPPL sTaskScheduler;
814  return &sTaskScheduler;
815 #else
816  return NULL;
817 #endif
818 }
819 
static T sum(const btAlignedObjectArray< T > &items)
btITaskScheduler * btGetPPLTaskScheduler()
Definition: btThreads.cpp:810
unsigned int getNext()
Definition: btThreads.cpp:238
void btResetThreadIndexCounter()
Definition: btThreads.cpp:343
virtual void forLoop(int iBegin, int iEnd) const =0
bool btIsMainThread()
Definition: btThreads.cpp:338
bool tryLock()
Definition: btThreads.cpp:216
virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody &body) BT_OVERRIDE
Definition: btThreads.cpp:495
btITaskScheduler * btGetTaskScheduler()
Definition: btThreads.cpp:423
static int split(btDbvtNode **leaves, int count, const btVector3 &org, const btVector3 &axis)
Definition: btDbvt.cpp:243
#define btAssert(x)
Definition: btScalar.h:131
virtual void deactivate()
Definition: btThreads.cpp:372
btSpinMutex – lightweight spin-mutex implemented with atomic ops, never puts a thread to sleep becau...
Definition: btThreads.h:47
void btPushThreadsAreRunning()
Definition: btThreads.cpp:381
virtual int getMaxNumThreads() const BT_OVERRIDE
Definition: btThreads.cpp:492
btSpinMutex mMutex
Definition: btThreads.cpp:230
const unsigned int BT_MAX_THREAD_COUNT
Definition: btThreads.h:33
void btSetTaskScheduler(btITaskScheduler *ts)
Definition: btThreads.cpp:401
virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody &body)=0
#define THREAD_LOCAL_STATIC
Definition: btThreads.cpp:222
bool btThreadsAreRunning()
Definition: btThreads.cpp:395
#define BT_OVERRIDE
Definition: btThreads.h:28
void lock()
Definition: btThreads.cpp:206
virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody &body)=0
virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody &body) BT_OVERRIDE
Definition: btThreads.cpp:500
static ThreadsafeCounter gThreadCounter
Definition: btThreads.cpp:259
void unlock()
Definition: btThreads.cpp:211
btTaskSchedulerSequential – non-threaded implementation of task scheduler (really just useful for te...
Definition: btThreads.cpp:488
virtual btScalar sumLoop(int iBegin, int iEnd) const =0
virtual int getNumThreads() const BT_OVERRIDE
Definition: btThreads.cpp:493
btITaskScheduler(const char *name)
Definition: btThreads.cpp:350
#define BT_PROFILE(name)
Definition: btQuickprof.h:216
virtual void setNumThreads(int numThreads) BT_OVERRIDE
Definition: btThreads.cpp:494
static btITaskScheduler * gBtTaskScheduler
Definition: btThreads.cpp:256
btITaskScheduler * btGetSequentialTaskScheduler()
Definition: btThreads.cpp:778
virtual void activate()
Definition: btThreads.cpp:357
btITaskScheduler * btGetTBBTaskScheduler()
Definition: btThreads.cpp:798
btScalar btParallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody &body)
Definition: btThreads.cpp:456
unsigned int mCounter
Definition: btThreads.cpp:229
unsigned int U
Definition: btGjkEpa3.h:87
static int gThreadsRunningCounter
Definition: btThreads.cpp:257
void btParallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody &body)
Definition: btThreads.cpp:429
unsigned int btGetCurrentThreadIndex()
Definition: btThreads.cpp:304
btITaskScheduler * btGetOpenMPTaskScheduler()
Definition: btThreads.cpp:786
static btSpinMutex gThreadsRunningCounterMutex
Definition: btThreads.cpp:258
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
Definition: btScalar.h:292
void btPopThreadsAreRunning()
Definition: btThreads.cpp:388