Improve the speed of the SSE engine by using flush-to-zero

This change modifies the behaviour of the complete program, if the SSE engine is used.
A better approach may be to only enable flush-to-zero in IterateTS()...
pull/1/head
Sebastian Held 2010-07-13 10:47:40 +02:00
parent 911f7c5528
commit 59ffbb5100
2 changed files with 13 additions and 0 deletions

View File

@ -15,6 +15,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include <xmmintrin.h>
#include "engine_sse.h" #include "engine_sse.h"
//! \brief construct an Engine_sse instance //! \brief construct an Engine_sse instance
@ -34,10 +35,18 @@ Engine_sse::Engine_sse(const Operator_sse* op) : Engine(op)
f4_volt = 0; f4_volt = 0;
f4_curr = 0; f4_curr = 0;
numVectors = ceil((double)numLines[2]/4.0); numVectors = ceil((double)numLines[2]/4.0);
// speed up the calculation of denormal floating point values (flush-to-zero)
#ifndef SSE_CORRECT_DENORMALS
int oldMXCSR = _mm_getcsr(); //read the old MXCSR setting
int newMXCSR = oldMXCSR | 0x8040; // set DAZ and FZ bits
_mm_setcsr( newMXCSR ); //write the new MXCSR setting to the MXCSR
#endif
} }
Engine_sse::~Engine_sse() Engine_sse::~Engine_sse()
{ {
//_mm_setcsr( oldMXCSR ); // restore old setting
Reset(); Reset();
} }

View File

@ -12,6 +12,10 @@ INCLUDEPATH += ../CSXCAD \
../tinyxml ../tinyxml
LIBS += -L../CSXCAD -lCSXCAD LIBS += -L../CSXCAD -lCSXCAD
# the SSE engine defaults to flush-to-zero mode, because of speed advantages
# to restore the correct handling of denormals and to comply to IEEE 754 uncomment:
# DEFINES += SSE_CORRECT_DENORMALS
win32 { win32 {
INCLUDEPATH += ../hdf5/include ../boost/include/boost-1_42 INCLUDEPATH += ../hdf5/include ../boost/include/boost-1_42
LIBS += ../hdf5/lib/libhdf5_cpp.a ../hdf5/lib/libhdf5.a LIBS += ../hdf5/lib/libhdf5_cpp.a ../hdf5/lib/libhdf5.a