/*
* Copyright (C) 2010 Sebastian Held (sebastian.held@gmx.de)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
//#define ENABLE_DEBUG_TIME
#ifdef ENABLE_DEBUG_TIME
#define DEBUG_TIME(x) x;
#else
#define DEBUG_TIME(x) ;
#endif
#include "engine_multithread.h"
#include "extensions/engine_extension.h"
#include "tools/array_ops.h"
#include "boost/date_time/posix_time/posix_time.hpp"
#include "boost/date_time/gregorian/gregorian.hpp"
#include
#ifndef SSE_CORRECT_DENORMALS
#include
#endif
//! \brief construct an Engine_Multithread instance
//! it's the responsibility of the caller to free the returned pointer
Engine_Multithread* Engine_Multithread::New(const Operator_Multithread* op, unsigned int numThreads)
{
cout << "Create FDTD engine (compressed SSE + multi-threading)" << endl;
Engine_Multithread* e = new Engine_Multithread(op);
e->setNumThreads( numThreads );
e->Init();
return e;
}
Engine_Multithread::Engine_Multithread(const Operator_Multithread* op) : ENGINE_MULTITHREAD_BASE(op)
{
m_Op_MT = op;
m_type = SSE;
m_IterateBarrier = 0;
m_startBarrier = 0;
m_stopBarrier = 0;
m_thread_group = 0;
m_max_numThreads = boost::thread::hardware_concurrency();
m_numThreads = 0;
m_last_speed = 0;
m_opt_speed = false;
m_stopThreads = true;
#ifdef ENABLE_DEBUG_TIME
m_MPI_Barrier = 0;
#endif
}
Engine_Multithread::~Engine_Multithread()
{
#ifdef ENABLE_DEBUG_TIME
NS_Engine_Multithread::DBG().cout() << "Engine_Multithread::~Engine_Multithread()" << endl;
std::map >::iterator it;
for (it=m_timer_list.begin(); it!=m_timer_list.end(); it++)
{
NS_Engine_Multithread::DBG().cout() << "*** DEBUG Thread: " << it->first << std::endl;
std::vector::iterator it2;
for (it2=it->second.begin(); it2second.end();)
{
NS_Engine_Multithread::DBG().cout() << "after voltage update, before barrier1: " << fixed << setprecision(6) << *(it2++) << std::endl;
NS_Engine_Multithread::DBG().cout() << "after barrier1, before barrier2: " << fixed << setprecision(6) << *(it2++) << std::endl;
NS_Engine_Multithread::DBG().cout() << "after barrier2, before current update: " << fixed << setprecision(6) << *(it2++) << std::endl;
NS_Engine_Multithread::DBG().cout() << "after current update, before barrier3: " << fixed << setprecision(6) << *(it2++) << std::endl;
NS_Engine_Multithread::DBG().cout() << "after barrier3: " << fixed << setprecision(6) << *(it2++) << std::endl;
}
}
#endif
Reset();
}
void Engine_Multithread::setNumThreads( unsigned int numThreads )
{
m_numThreads = numThreads;
}
void Engine_Multithread::Init()
{
m_stopThreads = true;
m_opt_speed = false;
ENGINE_MULTITHREAD_BASE::Init();
// initialize threads
m_stopThreads = false;
if (m_numThreads == 0)
{
m_opt_speed = true;
m_numThreads = 1;
}
else if (m_numThreads > m_max_numThreads)
m_numThreads = m_max_numThreads;
#ifdef MPI_SUPPORT
m_MPI_Barrier = 0;
#endif
this->changeNumThreads(m_numThreads);
}
void Engine_Multithread::Reset()
{
if (m_thread_group!=0) // prevent multiple invocations
{
ClearExtensions(); //prevent extensions from interfering with thread reset...
// stop the threads
//NS_Engine_Multithread::DBG().cout() << "stopping all threads" << endl;
m_thread_group->interrupt_all();
m_thread_group->join_all(); // wait for termination
delete m_IterateBarrier;
m_IterateBarrier = 0;
delete m_startBarrier;
m_startBarrier = 0;
delete m_stopBarrier;
m_stopBarrier = 0;
delete m_thread_group;
m_thread_group = 0;
}
ENGINE_MULTITHREAD_BASE::Reset();
}
void Engine_Multithread::changeNumThreads(unsigned int numThreads)
{
if (m_thread_group!=0)
{
m_thread_group->interrupt_all();
m_thread_group->join_all(); // wait for termination
delete m_thread_group;
m_thread_group = 0;
//m_stopThreads = false;
}
m_numThreads = numThreads;
if (g_settings.GetVerboseLevel()>0)
cout << "Multithreaded engine using " << m_numThreads << " threads. Utilization: (";
vector m_Start_Lines;
vector m_Stop_Lines;
m_Op_MT->CalcStartStopLines( m_numThreads, m_Start_Lines, m_Stop_Lines );
if (m_IterateBarrier!=0)
delete m_IterateBarrier;
// make sure all threads are waiting
m_IterateBarrier = new boost::barrier(m_numThreads); // numThread workers
if (m_startBarrier!=0)
delete m_startBarrier;
m_startBarrier = new boost::barrier(m_numThreads+1); // numThread workers + 1 controller
if (m_stopBarrier!=0)
delete m_stopBarrier;
m_stopBarrier = new boost::barrier(m_numThreads+1); // numThread workers + 1 controller
#ifdef MPI_SUPPORT
m_MPI_Barrier = 0;
#endif
m_thread_group = new boost::thread_group();
for (unsigned int n=0; n0)
cout << stop-start+1 << ")" << endl;
}
else
if (g_settings.GetVerboseLevel()>0)
cout << stop-start+1 << ";";
// NS_Engine_Multithread::DBG().cout() << "###DEBUG## Thread " << n << ": start=" << start << " stop=" << stop << " stop_h=" << stop_h << std::endl;
boost::thread *t = new boost::thread( NS_Engine_Multithread::thread(this,start,stop,stop_h,n) );
m_thread_group->add_thread( t );
}
for (size_t n=0; nSetNumberOfThreads(m_numThreads);
}
bool Engine_Multithread::IterateTS(unsigned int iterTS)
{
m_iterTS = iterTS;
//cerr << "bool Engine_Multithread::IterateTS(): starting threads ...";
m_startBarrier->wait(); // start the threads
//cerr << "... threads started" << endl;
m_stopBarrier->wait(); // wait for the threads to finish time steps
return true;
}
void Engine_Multithread::NextInterval(float curr_speed)
{
ENGINE_MULTITHREAD_BASE::NextInterval(curr_speed);
if (!m_opt_speed) return;
if (curr_speedchangeNumThreads(m_numThreads-1);
cout << "Multithreaded Engine: Best performance found using " << m_numThreads << " threads." << std::endl;
m_opt_speed = false;
}
else if (m_numThreadschangeNumThreads(m_numThreads+1);
}
}
void Engine_Multithread::DoPreVoltageUpdates(int threadID)
{
//execute extensions in reverse order -> highest priority gets access to the voltages last
for (int n=m_Eng_exts.size()-1; n>=0; --n)
{
m_Eng_exts.at(n)->DoPreVoltageUpdates(threadID);
m_IterateBarrier->wait();
}
}
void Engine_Multithread::DoPostVoltageUpdates(int threadID)
{
//execute extensions in normal order -> highest priority gets access to the voltages first
for (size_t n=0; nDoPostVoltageUpdates(threadID);
m_IterateBarrier->wait();
}
}
void Engine_Multithread::Apply2Voltages(int threadID)
{
//execute extensions in normal order -> highest priority gets access to the voltages first
for (size_t n=0; nApply2Voltages(threadID);
m_IterateBarrier->wait();
}
}
void Engine_Multithread::DoPreCurrentUpdates(int threadID)
{
//execute extensions in reverse order -> highest priority gets access to the currents last
for (int n=m_Eng_exts.size()-1; n>=0; --n)
{
m_Eng_exts.at(n)->DoPreCurrentUpdates(threadID);
m_IterateBarrier->wait();
}
}
void Engine_Multithread::DoPostCurrentUpdates(int threadID)
{
//execute extensions in normal order -> highest priority gets access to the currents first
for (size_t n=0; nDoPostCurrentUpdates(threadID);
m_IterateBarrier->wait();
}
}
void Engine_Multithread::Apply2Current(int threadID)
{
//execute extensions in normal order -> highest priority gets access to the currents first
for (size_t n=0; nApply2Current(threadID);
m_IterateBarrier->wait();
}
}
//
// *************************************************************************************************************************
//
namespace NS_Engine_Multithread
{
thread::thread( Engine_Multithread* ptr, unsigned int start, unsigned int stop, unsigned int stop_h, unsigned int threadID )
{
m_enginePtr = ptr;
m_start = start;
m_stop = stop;
m_stop_h = stop_h;
m_threadID = threadID;
}
void thread::operator()()
{
//std::cout << "thread::operator() Parameters: " << m_start << " " << m_stop << std::endl;
//DBG().cout() << "Thread " << m_threadID << " (" << boost::this_thread::get_id() << ") started." << endl;
// speed up the calculation of denormal floating point values (flush-to-zero)
#ifndef SSE_CORRECT_DENORMALS
unsigned int oldMXCSR = _mm_getcsr(); //read the old MXCSR setting
unsigned int newMXCSR = oldMXCSR | 0x8040; // set DAZ and FZ bits
_mm_setcsr( newMXCSR ); //write the new MXCSR setting to the MXCSR
#endif
while (!m_enginePtr->m_stopThreads)
{
// wait for start
//DBG().cout() << "Thread " << m_threadID << " (" << boost::this_thread::get_id() << ") waiting..." << endl;
m_enginePtr->m_startBarrier->wait();
//cout << "Thread " << boost::this_thread::get_id() << " waiting... started." << endl;
if (m_enginePtr->m_stopThreads)
{
//DBG().cout() << "Thread " << m_threadID << " (" << boost::this_thread::get_id() << ") stop!." << endl;
return;
}
DEBUG_TIME( Timer timer1 );
for (unsigned int iter=0; iterm_iterTS; ++iter)
{
// pre voltage stuff...
m_enginePtr->DoPreVoltageUpdates(m_threadID);
//voltage updates
m_enginePtr->UpdateVoltages(m_start,m_stop-m_start+1);
// record time
DEBUG_TIME( m_enginePtr->m_timer_list[boost::this_thread::get_id()].push_back( timer1.elapsed() ); )
//cout << "Thread " << boost::this_thread::get_id() << " m_barrier1 waiting..." << endl;
m_enginePtr->m_IterateBarrier->wait();
// record time
DEBUG_TIME( m_enginePtr->m_timer_list[boost::this_thread::get_id()].push_back( timer1.elapsed() ); )
//post voltage stuff...
m_enginePtr->DoPostVoltageUpdates(m_threadID);
m_enginePtr->Apply2Voltages(m_threadID);
#ifdef MPI_SUPPORT
if (m_threadID==0)
{
if (m_enginePtr->m_MPI_Barrier)
m_enginePtr->m_MPI_Barrier->wait();
m_enginePtr->SendReceiveVoltages();
}
m_enginePtr->m_IterateBarrier->wait();
#endif
// record time
DEBUG_TIME( m_enginePtr->m_timer_list[boost::this_thread::get_id()].push_back( timer1.elapsed() ); )
//pre current stuff
m_enginePtr->DoPreCurrentUpdates(m_threadID);
//current updates
m_enginePtr->UpdateCurrents(m_start,m_stop_h-m_start+1);
// record time
DEBUG_TIME( m_enginePtr->m_timer_list[boost::this_thread::get_id()].push_back( timer1.elapsed() ); )
m_enginePtr->m_IterateBarrier->wait();
// record time
DEBUG_TIME( m_enginePtr->m_timer_list[boost::this_thread::get_id()].push_back( timer1.elapsed() ); )
//post current stuff
m_enginePtr->DoPostCurrentUpdates(m_threadID);
m_enginePtr->Apply2Current(m_threadID);
#ifdef MPI_SUPPORT
if (m_threadID==0)
{
if (m_enginePtr->m_MPI_Barrier)
m_enginePtr->m_MPI_Barrier->wait();
m_enginePtr->SendReceiveCurrents();
}
m_enginePtr->m_IterateBarrier->wait();
#endif
if (m_threadID == 0)
++m_enginePtr->numTS; // only the first thread increments numTS
}
m_enginePtr->m_stopBarrier->wait();
}
//DBG().cout() << "Thread " << m_threadID << " (" << boost::this_thread::get_id() << ") finished." << endl;
}
} // namespace