Improve the speed of the SSE engine by using flush-to-zero
This change modifies the behaviour of the complete program, if the SSE engine is used. A better approach may be to only enable flush-to-zero in IterateTS()...pull/1/head
parent
911f7c5528
commit
59ffbb5100
|
@ -15,6 +15,7 @@
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <xmmintrin.h>
|
||||||
#include "engine_sse.h"
|
#include "engine_sse.h"
|
||||||
|
|
||||||
//! \brief construct an Engine_sse instance
|
//! \brief construct an Engine_sse instance
|
||||||
|
@ -34,10 +35,18 @@ Engine_sse::Engine_sse(const Operator_sse* op) : Engine(op)
|
||||||
f4_volt = 0;
|
f4_volt = 0;
|
||||||
f4_curr = 0;
|
f4_curr = 0;
|
||||||
numVectors = ceil((double)numLines[2]/4.0);
|
numVectors = ceil((double)numLines[2]/4.0);
|
||||||
|
|
||||||
|
// speed up the calculation of denormal floating point values (flush-to-zero)
|
||||||
|
#ifndef SSE_CORRECT_DENORMALS
|
||||||
|
int oldMXCSR = _mm_getcsr(); //read the old MXCSR setting
|
||||||
|
int newMXCSR = oldMXCSR | 0x8040; // set DAZ and FZ bits
|
||||||
|
_mm_setcsr( newMXCSR ); //write the new MXCSR setting to the MXCSR
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
Engine_sse::~Engine_sse()
|
Engine_sse::~Engine_sse()
|
||||||
{
|
{
|
||||||
|
//_mm_setcsr( oldMXCSR ); // restore old setting
|
||||||
Reset();
|
Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,10 @@ INCLUDEPATH += ../CSXCAD \
|
||||||
../tinyxml
|
../tinyxml
|
||||||
LIBS += -L../CSXCAD -lCSXCAD
|
LIBS += -L../CSXCAD -lCSXCAD
|
||||||
|
|
||||||
|
# the SSE engine defaults to flush-to-zero mode, because of speed advantages
|
||||||
|
# to restore the correct handling of denormals and to comply to IEEE 754 uncomment:
|
||||||
|
# DEFINES += SSE_CORRECT_DENORMALS
|
||||||
|
|
||||||
win32 {
|
win32 {
|
||||||
INCLUDEPATH += ../hdf5/include ../boost/include/boost-1_42
|
INCLUDEPATH += ../hdf5/include ../boost/include/boost-1_42
|
||||||
LIBS += ../hdf5/lib/libhdf5_cpp.a ../hdf5/lib/libhdf5.a
|
LIBS += ../hdf5/lib/libhdf5_cpp.a ../hdf5/lib/libhdf5.a
|
||||||
|
|
Loading…
Reference in New Issue