diff --git a/FDTD/engine_sse.cpp b/FDTD/engine_sse.cpp new file mode 100644 index 0000000..e434b68 --- /dev/null +++ b/FDTD/engine_sse.cpp @@ -0,0 +1,145 @@ +/* +* Copyright (C) 2010 Thorsten Liebig (Thorsten.Liebig@gmx.de) +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "engine_sse.h" +#include "tools/array_ops.h" + +//! \brief construct an Engine_sse instance +//! it's the responsibility of the caller to free the returned pointer +Engine_sse* Engine_sse::New(const Operator* op) +{ + Engine_sse* e = new Engine_sse(op); + e->Init(); + return e; +} + +Engine_sse::Engine_sse(const Operator* op) : Engine(op) +{ + Op = op; + for (int n=0;n<3;++n) + { + numLines[n] = Op->GetNumberOfLines(n); + } +} + +Engine_sse::~Engine_sse() +{ + this->Reset(); +} + +void Engine_sse::Init() +{ + numTS = 0; + volt_ = Create_N_3DArray_v4sf(numLines); + curr = Create_N_3DArray(numLines); +} + +void Engine_sse::Reset() +{ + Delete_N_3DArray_v4sf(volt_,numLines); + volt=NULL; + Delete_N_3DArray(curr,numLines); + curr=NULL; +} + +void Engine_sse::UpdateVoltages() +{ + unsigned int pos[4]; + bool shift[3]; + + //voltage updates + for (pos[0]=0;pos[0]vv_[0][pos[0]][pos[1]][pos[2]].v; + volt_[1][pos[0]][pos[1]][pos[2]].v *= Op->vv_[1][pos[0]][pos[1]][pos[2]].v; + volt_[2][pos[0]][pos[1]][pos[2]].v *= Op->vv_[2][pos[0]][pos[1]][pos[2]].v; + + + for (pos[3]=0;pos[3]<4;++pos[3]) { + shift[2]=pos[2]+pos[3]; + volt_[0][pos[0]][pos[1]][pos[2]].f[pos[3]] += Op->vi_[0][pos[0]][pos[1]][pos[2]].f[pos[3]] * ( curr[2][pos[0]][pos[1]][pos[2]] - curr[2][pos[0]][pos[1]-shift[1]][pos[2]] - curr[1][pos[0]][pos[1]][pos[2]] + curr[1][pos[0]][pos[1]][pos[2]-shift[2]]); + volt_[1][pos[0]][pos[1]][pos[2]].f[pos[3]] += Op->vi_[1][pos[0]][pos[1]][pos[2]].f[pos[3]] * ( curr[0][pos[0]][pos[1]][pos[2]] - curr[0][pos[0]][pos[1]][pos[2]-shift[2]] - curr[2][pos[0]][pos[1]][pos[2]] + curr[2][pos[0]-shift[0]][pos[1]][pos[2]]); + volt_[2][pos[0]][pos[1]][pos[2]].f[pos[3]] += Op->vi_[2][pos[0]][pos[1]][pos[2]].f[pos[3]] * ( curr[1][pos[0]][pos[1]][pos[2]] - curr[1][pos[0]-shift[0]][pos[1]][pos[2]] - curr[0][pos[0]][pos[1]][pos[2]] + curr[0][pos[0]][pos[1]-shift[1]][pos[2]]); + } + } + } + } +} + +void Engine_sse::ApplyVoltageExcite() +{ + int exc_pos; + //soft voltage excitation here (E-field excite) + for (unsigned int n=0;nE_Exc_Count;++n) + { + exc_pos = (int)numTS - (int)Op->E_Exc_delay[n]; + exc_pos *= (exc_pos>0 && exc_pos<=(int)Op->ExciteLength); +// if (n==0) cerr << numTS << " => " << Op->ExciteSignal[exc_pos] << endl; + volt[Op->E_Exc_dir[n]][Op->E_Exc_index[0][n]][Op->E_Exc_index[1][n]][Op->E_Exc_index[2][n]] += Op->E_Exc_amp[n]*Op->ExciteSignal[exc_pos]; + } +} + +void Engine_sse::UpdateCurrents() +{ + unsigned int pos[3]; + for (pos[0]=0;pos[0]ii[0][pos[0]][pos[1]][pos[2]]; + curr[0][pos[0]][pos[1]][pos[2]] += Op->iv[0][pos[0]][pos[1]][pos[2]] * ( volt[2][pos[0]][pos[1]][pos[2]] - volt[2][pos[0]][pos[1]+1][pos[2]] - volt[1][pos[0]][pos[1]][pos[2]] + volt[1][pos[0]][pos[1]][pos[2]+1]); + + //for y + curr[1][pos[0]][pos[1]][pos[2]] *= Op->ii[1][pos[0]][pos[1]][pos[2]]; + curr[1][pos[0]][pos[1]][pos[2]] += Op->iv[1][pos[0]][pos[1]][pos[2]] * ( volt[0][pos[0]][pos[1]][pos[2]] - volt[0][pos[0]][pos[1]][pos[2]+1] - volt[2][pos[0]][pos[1]][pos[2]] + volt[2][pos[0]+1][pos[1]][pos[2]]); + + //for z + curr[2][pos[0]][pos[1]][pos[2]] *= Op->ii[2][pos[0]][pos[1]][pos[2]]; + curr[2][pos[0]][pos[1]][pos[2]] += Op->iv[2][pos[0]][pos[1]][pos[2]] * ( volt[1][pos[0]][pos[1]][pos[2]] - volt[1][pos[0]+1][pos[1]][pos[2]] - volt[0][pos[0]][pos[1]][pos[2]] + volt[0][pos[0]][pos[1]+1][pos[2]]); + } + } + } +} + +void Engine_sse::ApplyCurrentExcite() +{ +} + +bool Engine_sse::IterateTS(unsigned int iterTS) +{ + for (unsigned int iter=0;iter. +*/ + +#ifndef ENGINE_SSE_H +#define ENGINE_SSE_H + +#include "operator.h" +#include "engine.h" + +class Engine_sse : public Engine +{ +public: + static Engine_sse* New(const Operator* op); + virtual ~Engine_sse(); + + virtual void Init(); + virtual void Reset(); + + //!Iterate a number of timesteps + virtual bool IterateTS(unsigned int iterTS); + + virtual unsigned int GetNumberOfTimesteps() {return numTS;}; + +// virtual f4vector**** GetVoltages() {return volt;}; + virtual FDTD_FLOAT**** GetCurrents() {return curr;}; + +protected: + Engine_sse(const Operator* op); + const Operator* Op; + + virtual void UpdateVoltages(); + virtual void ApplyVoltageExcite(); + virtual void UpdateCurrents(); + virtual void ApplyCurrentExcite(); + + unsigned int numLines[3]; + + f4vector**** volt_; + FDTD_FLOAT**** curr; + unsigned int numTS; +}; + +#endif // ENGINE_SSE_H diff --git a/FDTD/operator.cpp b/FDTD/operator.cpp index 8314cb6..429e863 100644 --- a/FDTD/operator.cpp +++ b/FDTD/operator.cpp @@ -47,6 +47,8 @@ void Operator::Init() E_Exc_dir=NULL; vv=NULL; vi=NULL; + vv_=NULL; + vi_=NULL; iv=NULL; ii=NULL; for (int n=0;n<3;++n) @@ -75,6 +77,8 @@ void Operator::Reset() delete[] E_Exc_amp; Delete_N_3DArray(vv,numLines); Delete_N_3DArray(vi,numLines); + Delete_N_3DArray_v4sf(vv_,numLines); + Delete_N_3DArray_v4sf(vi_,numLines); Delete_N_3DArray(iv,numLines); Delete_N_3DArray(ii,numLines); for (int n=0;n<3;++n) @@ -502,10 +506,14 @@ void Operator::InitOperator() { Delete_N_3DArray(vv,numLines); Delete_N_3DArray(vi,numLines); + Delete_N_3DArray_v4sf(vv_,numLines); + Delete_N_3DArray_v4sf(vi_,numLines); Delete_N_3DArray(iv,numLines); Delete_N_3DArray(ii,numLines); vv = Create_N_3DArray(numLines); vi = Create_N_3DArray(numLines); + vv_ = Create_N_3DArray_v4sf(numLines); + vi_ = Create_N_3DArray_v4sf(numLines); iv = Create_N_3DArray(numLines); ii = Create_N_3DArray(numLines); } @@ -516,6 +524,9 @@ inline void Operator::Calc_ECOperatorPos(int n, unsigned int* pos) vv[n][pos[0]][pos[1]][pos[2]] = (1-dT*EC_G[n][i]/2/EC_C[n][i])/(1+dT*EC_G[n][i]/2/EC_C[n][i]); vi[n][pos[0]][pos[1]][pos[2]] = (dT/EC_C[n][i])/(1+dT*EC_G[n][i]/2/EC_C[n][i]); + vv_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = vv[n][pos[0]][pos[1]][pos[2]]; + vi_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = vi[n][pos[0]][pos[1]][pos[2]]; + ii[n][pos[0]][pos[1]][pos[2]] = (1-dT*EC_R[n][i]/2/EC_L[n][i])/(1+dT*EC_R[n][i]/2/EC_L[n][i]); iv[n][pos[0]][pos[1]][pos[2]] = (dT/EC_L[n][i])/(1+dT*EC_R[n][i]/2/EC_L[n][i]); } diff --git a/FDTD/operator.h b/FDTD/operator.h index ee1a9b4..899b91f 100644 --- a/FDTD/operator.h +++ b/FDTD/operator.h @@ -24,6 +24,21 @@ #define FDTD_FLOAT float + +#if __SIZEOF_FLOAT__ != 4 + #error wrong size of float +#endif + +typedef float v4sf __attribute__ ((vector_size (16))); // vector of four single floats + +union f4vector +{ + v4sf v; + float f[4]; +}; + + + //! Abstract base-class for the FDTD-operator class Operator { @@ -130,6 +145,8 @@ public: FDTD_FLOAT**** vi; //calc new voltage from old current FDTD_FLOAT**** ii; //calc new current from old current FDTD_FLOAT**** iv; //calc new current from old voltage + f4vector**** vv_; //calc new voltage from old voltage + f4vector**** vi_; //calc new voltage from old current //Excitation time-signal unsigned int ExciteLength; diff --git a/openEMS.pro b/openEMS.pro index 6ab1fba..8a3e634 100644 --- a/openEMS.pro +++ b/openEMS.pro @@ -38,7 +38,8 @@ SOURCES += main.cpp \ openems.cpp \ FDTD/engine_multithread.cpp \ FDTD/operator_cylinder.cpp \ - FDTD/engine_cylinder.cpp + FDTD/engine_cylinder.cpp \ + FDTD/engine_sse.cpp HEADERS += tools/ErrorMsg.h \ tools/AdrOp.h \ tools/constants.h \ @@ -54,10 +55,11 @@ HEADERS += tools/ErrorMsg.h \ openems.h \ FDTD/engine_multithread.h \ FDTD/operator_cylinder.h \ - FDTD/engine_cylinder.h -QMAKE_CXXFLAGS_RELEASE = -O2 \ + FDTD/engine_cylinder.h \ + FDTD/engine_sse.h +QMAKE_CXXFLAGS_RELEASE = -O3 \ -g \ - -march=native + -march=native QMAKE_CXXFLAGS_DEBUG = -O0 \ -g \ -march=native diff --git a/openems.cpp b/openems.cpp index a918e6d..ff9f3d9 100644 --- a/openems.cpp +++ b/openems.cpp @@ -21,6 +21,7 @@ #include "FDTD/engine.h" #include "FDTD/engine_cylinder.h" #include "FDTD/engine_multithread.h" +#include "FDTD/engine_sse.h" #include "FDTD/processvoltage.h" #include "FDTD/processcurrent.h" #include "FDTD/processfields_td.h" @@ -112,6 +113,12 @@ bool openEMS::parseCommandLineArgument( const char *argv ) cout << "openEMS - fixed number of threads: " << m_engine_numThreads << endl; return true; } + else if (strcmp(argv,"--engine=sse")==0) + { + cout << "openEMS - enabled sse engine" << endl; + m_engine = EngineType_SSE; + return true; + } return false; } diff --git a/openems.h b/openems.h index 475a5cd..2ccd7f3 100644 --- a/openems.h +++ b/openems.h @@ -61,7 +61,7 @@ protected: Engine* FDTD_Eng; ProcessingArray* PA; - enum EngineType {EngineType_Standard,EngineType_Multithreaded}; + enum EngineType {EngineType_Standard,EngineType_Multithreaded,EngineType_SSE}; EngineType m_engine; unsigned int m_engine_numThreads; }; diff --git a/tools/array_ops.cpp b/tools/array_ops.cpp index 0393466..a5e1860 100644 --- a/tools/array_ops.cpp +++ b/tools/array_ops.cpp @@ -91,3 +91,70 @@ void Dump_N_3DArray2File(ostream &file, FDTD_FLOAT**** array, unsigned int* numL } } } + + + + +void Delete3DArray_v4sf(f4vector*** array, const unsigned int* numLines) +{ + if (array==NULL) return; + unsigned int pos[3]; + for (pos[0]=0;pos[0]