completed sse engine and enabled it
parent
9e66b0e544
commit
b489d3342d
|
@ -45,23 +45,26 @@ void Engine_sse::Init()
|
||||||
{
|
{
|
||||||
numTS = 0;
|
numTS = 0;
|
||||||
volt_ = Create_N_3DArray_v4sf(numLines);
|
volt_ = Create_N_3DArray_v4sf(numLines);
|
||||||
curr = Create_N_3DArray(numLines);
|
curr_ = Create_N_3DArray_v4sf(numLines);
|
||||||
|
volt = 0; // not used
|
||||||
|
curr = 0; // not used
|
||||||
|
// Engine::Init(); //FIXME currently postprocessing operates on volt and curr arrays, which are not updated by this engine!!!!
|
||||||
}
|
}
|
||||||
|
|
||||||
void Engine_sse::Reset()
|
void Engine_sse::Reset()
|
||||||
{
|
{
|
||||||
Delete_N_3DArray_v4sf(volt_,numLines);
|
Delete_N_3DArray_v4sf(volt_,numLines);
|
||||||
volt=NULL;
|
volt_ = 0;
|
||||||
Delete_N_3DArray(curr,numLines);
|
Delete_N_3DArray_v4sf(curr_,numLines);
|
||||||
curr=NULL;
|
curr_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Engine_sse::UpdateVoltages()
|
void Engine_sse::UpdateVoltages()
|
||||||
{
|
{
|
||||||
unsigned int pos[4];
|
unsigned int pos[3];
|
||||||
bool shift[3];
|
bool shift[2];
|
||||||
|
f4vector temp;
|
||||||
|
|
||||||
//voltage updates
|
|
||||||
for (pos[0]=0;pos[0]<numLines[0];++pos[0])
|
for (pos[0]=0;pos[0]<numLines[0];++pos[0])
|
||||||
{
|
{
|
||||||
shift[0]=pos[0];
|
shift[0]=pos[0];
|
||||||
|
@ -70,19 +73,25 @@ void Engine_sse::UpdateVoltages()
|
||||||
shift[1]=pos[1];
|
shift[1]=pos[1];
|
||||||
for (pos[2]=0;pos[2]<numLines[2]/4;++pos[2])
|
for (pos[2]=0;pos[2]<numLines[2]/4;++pos[2])
|
||||||
{
|
{
|
||||||
//do the updates here
|
// x-polarization
|
||||||
//for x
|
temp.f[0] = curr_[1][pos[0]][pos[1]][pos[2]-(bool)pos[2]].f[3];
|
||||||
|
temp.f[1] = curr_[1][pos[0]][pos[1]][pos[2]].f[0];
|
||||||
|
temp.f[2] = curr_[1][pos[0]][pos[1]][pos[2]].f[1];
|
||||||
|
temp.f[3] = curr_[1][pos[0]][pos[1]][pos[2]].f[2];
|
||||||
volt_[0][pos[0]][pos[1]][pos[2]].v *= Op->vv_[0][pos[0]][pos[1]][pos[2]].v;
|
volt_[0][pos[0]][pos[1]][pos[2]].v *= Op->vv_[0][pos[0]][pos[1]][pos[2]].v;
|
||||||
|
volt_[0][pos[0]][pos[1]][pos[2]].v += Op->vi_[0][pos[0]][pos[1]][pos[2]].v * ( curr_[2][pos[0]][pos[1]][pos[2]].v - curr_[2][pos[0]][pos[1]-shift[1]][pos[2]].v - curr_[1][pos[0]][pos[1]][pos[2]].v + temp.v );
|
||||||
|
|
||||||
|
// y-polarization
|
||||||
|
temp.f[0] = curr_[0][pos[0]][pos[1]][pos[2]-(bool)pos[2]].f[3];
|
||||||
|
temp.f[1] = curr_[0][pos[0]][pos[1]][pos[2]].f[0];
|
||||||
|
temp.f[2] = curr_[0][pos[0]][pos[1]][pos[2]].f[1];
|
||||||
|
temp.f[3] = curr_[0][pos[0]][pos[1]][pos[2]].f[2];
|
||||||
volt_[1][pos[0]][pos[1]][pos[2]].v *= Op->vv_[1][pos[0]][pos[1]][pos[2]].v;
|
volt_[1][pos[0]][pos[1]][pos[2]].v *= Op->vv_[1][pos[0]][pos[1]][pos[2]].v;
|
||||||
|
volt_[1][pos[0]][pos[1]][pos[2]].v += Op->vi_[1][pos[0]][pos[1]][pos[2]].v * ( curr_[0][pos[0]][pos[1]][pos[2]].v - temp.v - curr_[2][pos[0]][pos[1]][pos[2]].v + curr_[2][pos[0]-shift[0]][pos[1]][pos[2]].v);
|
||||||
|
|
||||||
|
// z-polarization
|
||||||
volt_[2][pos[0]][pos[1]][pos[2]].v *= Op->vv_[2][pos[0]][pos[1]][pos[2]].v;
|
volt_[2][pos[0]][pos[1]][pos[2]].v *= Op->vv_[2][pos[0]][pos[1]][pos[2]].v;
|
||||||
|
volt_[2][pos[0]][pos[1]][pos[2]].v += Op->vi_[2][pos[0]][pos[1]][pos[2]].v * ( curr_[1][pos[0]][pos[1]][pos[2]].v - curr_[1][pos[0]-shift[0]][pos[1]][pos[2]].v - curr_[0][pos[0]][pos[1]][pos[2]].v + curr_[0][pos[0]][pos[1]-shift[1]][pos[2]].v);
|
||||||
|
|
||||||
for (pos[3]=0;pos[3]<4;++pos[3]) {
|
|
||||||
shift[2]=pos[2]+pos[3];
|
|
||||||
volt_[0][pos[0]][pos[1]][pos[2]].f[pos[3]] += Op->vi_[0][pos[0]][pos[1]][pos[2]].f[pos[3]] * ( curr[2][pos[0]][pos[1]][pos[2]] - curr[2][pos[0]][pos[1]-shift[1]][pos[2]] - curr[1][pos[0]][pos[1]][pos[2]] + curr[1][pos[0]][pos[1]][pos[2]-shift[2]]);
|
|
||||||
volt_[1][pos[0]][pos[1]][pos[2]].f[pos[3]] += Op->vi_[1][pos[0]][pos[1]][pos[2]].f[pos[3]] * ( curr[0][pos[0]][pos[1]][pos[2]] - curr[0][pos[0]][pos[1]][pos[2]-shift[2]] - curr[2][pos[0]][pos[1]][pos[2]] + curr[2][pos[0]-shift[0]][pos[1]][pos[2]]);
|
|
||||||
volt_[2][pos[0]][pos[1]][pos[2]].f[pos[3]] += Op->vi_[2][pos[0]][pos[1]][pos[2]].f[pos[3]] * ( curr[1][pos[0]][pos[1]][pos[2]] - curr[1][pos[0]-shift[0]][pos[1]][pos[2]] - curr[0][pos[0]][pos[1]][pos[2]] + curr[0][pos[0]][pos[1]-shift[1]][pos[2]]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -91,37 +100,47 @@ void Engine_sse::UpdateVoltages()
|
||||||
void Engine_sse::ApplyVoltageExcite()
|
void Engine_sse::ApplyVoltageExcite()
|
||||||
{
|
{
|
||||||
int exc_pos;
|
int exc_pos;
|
||||||
|
unsigned int pos;
|
||||||
//soft voltage excitation here (E-field excite)
|
//soft voltage excitation here (E-field excite)
|
||||||
for (unsigned int n=0;n<Op->E_Exc_Count;++n)
|
for (unsigned int n=0;n<Op->E_Exc_Count;++n)
|
||||||
{
|
{
|
||||||
exc_pos = (int)numTS - (int)Op->E_Exc_delay[n];
|
exc_pos = (int)numTS - (int)Op->E_Exc_delay[n];
|
||||||
exc_pos *= (exc_pos>0 && exc_pos<=(int)Op->ExciteLength);
|
exc_pos *= (exc_pos>0 && exc_pos<=(int)Op->ExciteLength);
|
||||||
// if (n==0) cerr << numTS << " => " << Op->ExciteSignal[exc_pos] << endl;
|
pos = Op->E_Exc_index[2][n];
|
||||||
volt[Op->E_Exc_dir[n]][Op->E_Exc_index[0][n]][Op->E_Exc_index[1][n]][Op->E_Exc_index[2][n]] += Op->E_Exc_amp[n]*Op->ExciteSignal[exc_pos];
|
volt_[Op->E_Exc_dir[n]][Op->E_Exc_index[0][n]][Op->E_Exc_index[1][n]][pos/4].f[pos%4] += Op->E_Exc_amp[n]*Op->ExciteSignal[exc_pos];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Engine_sse::UpdateCurrents()
|
void Engine_sse::UpdateCurrents()
|
||||||
{
|
{
|
||||||
unsigned int pos[3];
|
unsigned int pos[5];
|
||||||
|
f4vector temp;
|
||||||
|
|
||||||
for (pos[0]=0;pos[0]<numLines[0]-1;++pos[0])
|
for (pos[0]=0;pos[0]<numLines[0]-1;++pos[0])
|
||||||
{
|
{
|
||||||
for (pos[1]=0;pos[1]<numLines[1]-1;++pos[1])
|
for (pos[1]=0;pos[1]<numLines[1]-1;++pos[1])
|
||||||
{
|
{
|
||||||
for (pos[2]=0;pos[2]<numLines[2]-1;++pos[2])
|
for (pos[2]=0;pos[2]<numLines[2]/4;++pos[2]) // FIXME is this correct?
|
||||||
{
|
{
|
||||||
//do the updates here
|
// x-pol
|
||||||
//for x
|
temp.f[0] = volt_[1][pos[0]][pos[1]][pos[2]].f[1];
|
||||||
curr[0][pos[0]][pos[1]][pos[2]] *= Op->ii[0][pos[0]][pos[1]][pos[2]];
|
temp.f[1] = volt_[1][pos[0]][pos[1]][pos[2]].f[2];
|
||||||
curr[0][pos[0]][pos[1]][pos[2]] += Op->iv[0][pos[0]][pos[1]][pos[2]] * ( volt[2][pos[0]][pos[1]][pos[2]] - volt[2][pos[0]][pos[1]+1][pos[2]] - volt[1][pos[0]][pos[1]][pos[2]] + volt[1][pos[0]][pos[1]][pos[2]+1]);
|
temp.f[2] = volt_[1][pos[0]][pos[1]][pos[2]].f[3];
|
||||||
|
temp.f[3] = volt_[1][pos[0]][pos[1]][pos[2]+1].f[0]; // FIXME outside sim area
|
||||||
|
curr_[0][pos[0]][pos[1]][pos[2]].v *= Op->ii_[0][pos[0]][pos[1]][pos[2]].v;
|
||||||
|
curr_[0][pos[0]][pos[1]][pos[2]].v += Op->iv_[0][pos[0]][pos[1]][pos[2]].v * ( volt_[2][pos[0]][pos[1]][pos[2]].v - volt_[2][pos[0]][pos[1]+1][pos[2]].v - volt_[1][pos[0]][pos[1]][pos[2]].v + temp.v);
|
||||||
|
|
||||||
//for y
|
// y-pol
|
||||||
curr[1][pos[0]][pos[1]][pos[2]] *= Op->ii[1][pos[0]][pos[1]][pos[2]];
|
temp.f[0] = volt_[0][pos[0]][pos[1]][pos[2]].f[1];
|
||||||
curr[1][pos[0]][pos[1]][pos[2]] += Op->iv[1][pos[0]][pos[1]][pos[2]] * ( volt[0][pos[0]][pos[1]][pos[2]] - volt[0][pos[0]][pos[1]][pos[2]+1] - volt[2][pos[0]][pos[1]][pos[2]] + volt[2][pos[0]+1][pos[1]][pos[2]]);
|
temp.f[1] = volt_[0][pos[0]][pos[1]][pos[2]].f[2];
|
||||||
|
temp.f[2] = volt_[0][pos[0]][pos[1]][pos[2]].f[3];
|
||||||
|
temp.f[3] = volt_[0][pos[0]][pos[1]][pos[2]+1].f[0]; // FIXME outside sim area
|
||||||
|
curr_[1][pos[0]][pos[1]][pos[2]].v *= Op->ii_[1][pos[0]][pos[1]][pos[2]].v;
|
||||||
|
curr_[1][pos[0]][pos[1]][pos[2]].v += Op->iv_[1][pos[0]][pos[1]][pos[2]].v * ( volt_[0][pos[0]][pos[1]][pos[2]].v - temp.v - volt_[2][pos[0]][pos[1]][pos[2]].v + volt_[2][pos[0]+1][pos[1]][pos[2]].v);
|
||||||
|
|
||||||
//for z
|
// z-pol
|
||||||
curr[2][pos[0]][pos[1]][pos[2]] *= Op->ii[2][pos[0]][pos[1]][pos[2]];
|
curr_[2][pos[0]][pos[1]][pos[2]].v *= Op->ii_[2][pos[0]][pos[1]][pos[2]].v;
|
||||||
curr[2][pos[0]][pos[1]][pos[2]] += Op->iv[2][pos[0]][pos[1]][pos[2]] * ( volt[1][pos[0]][pos[1]][pos[2]] - volt[1][pos[0]+1][pos[1]][pos[2]] - volt[0][pos[0]][pos[1]][pos[2]] + volt[0][pos[0]][pos[1]+1][pos[2]]);
|
curr_[2][pos[0]][pos[1]][pos[2]].v += Op->iv_[2][pos[0]][pos[1]][pos[2]].v * ( volt_[1][pos[0]][pos[1]][pos[2]].v - volt_[1][pos[0]+1][pos[1]][pos[2]].v - volt_[0][pos[0]][pos[1]][pos[2]].v + volt_[0][pos[0]][pos[1]+1][pos[2]].v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,8 +35,8 @@ public:
|
||||||
|
|
||||||
virtual unsigned int GetNumberOfTimesteps() {return numTS;};
|
virtual unsigned int GetNumberOfTimesteps() {return numTS;};
|
||||||
|
|
||||||
// virtual f4vector**** GetVoltages() {return volt;};
|
inline virtual FDTD_FLOAT GetVolt( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return volt_[n][x][y][z/4].f[z%4]; }
|
||||||
virtual FDTD_FLOAT**** GetCurrents() {return curr;};
|
inline virtual FDTD_FLOAT GetCurr( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return curr_[n][x][y][z/4].f[z%4]; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Engine_sse(const Operator* op);
|
Engine_sse(const Operator* op);
|
||||||
|
@ -50,7 +50,7 @@ protected:
|
||||||
unsigned int numLines[3];
|
unsigned int numLines[3];
|
||||||
|
|
||||||
f4vector**** volt_;
|
f4vector**** volt_;
|
||||||
FDTD_FLOAT**** curr;
|
f4vector**** curr_;
|
||||||
unsigned int numTS;
|
unsigned int numTS;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -51,6 +51,8 @@ void Operator::Init()
|
||||||
vi_=NULL;
|
vi_=NULL;
|
||||||
iv=NULL;
|
iv=NULL;
|
||||||
ii=NULL;
|
ii=NULL;
|
||||||
|
iv_=NULL;
|
||||||
|
ii_=NULL;
|
||||||
for (int n=0;n<3;++n)
|
for (int n=0;n<3;++n)
|
||||||
{
|
{
|
||||||
discLines[n]=NULL;
|
discLines[n]=NULL;
|
||||||
|
@ -81,6 +83,8 @@ void Operator::Reset()
|
||||||
Delete_N_3DArray_v4sf(vi_,numLines);
|
Delete_N_3DArray_v4sf(vi_,numLines);
|
||||||
Delete_N_3DArray(iv,numLines);
|
Delete_N_3DArray(iv,numLines);
|
||||||
Delete_N_3DArray(ii,numLines);
|
Delete_N_3DArray(ii,numLines);
|
||||||
|
Delete_N_3DArray_v4sf(iv_,numLines);
|
||||||
|
Delete_N_3DArray_v4sf(ii_,numLines);
|
||||||
for (int n=0;n<3;++n)
|
for (int n=0;n<3;++n)
|
||||||
{
|
{
|
||||||
delete[] discLines[n];
|
delete[] discLines[n];
|
||||||
|
@ -510,12 +514,16 @@ void Operator::InitOperator()
|
||||||
Delete_N_3DArray_v4sf(vi_,numLines);
|
Delete_N_3DArray_v4sf(vi_,numLines);
|
||||||
Delete_N_3DArray(iv,numLines);
|
Delete_N_3DArray(iv,numLines);
|
||||||
Delete_N_3DArray(ii,numLines);
|
Delete_N_3DArray(ii,numLines);
|
||||||
|
Delete_N_3DArray_v4sf(iv_,numLines);
|
||||||
|
Delete_N_3DArray_v4sf(ii_,numLines);
|
||||||
vv = Create_N_3DArray(numLines);
|
vv = Create_N_3DArray(numLines);
|
||||||
vi = Create_N_3DArray(numLines);
|
vi = Create_N_3DArray(numLines);
|
||||||
vv_ = Create_N_3DArray_v4sf(numLines);
|
vv_ = Create_N_3DArray_v4sf(numLines);
|
||||||
vi_ = Create_N_3DArray_v4sf(numLines);
|
vi_ = Create_N_3DArray_v4sf(numLines);
|
||||||
iv = Create_N_3DArray(numLines);
|
iv = Create_N_3DArray(numLines);
|
||||||
ii = Create_N_3DArray(numLines);
|
ii = Create_N_3DArray(numLines);
|
||||||
|
iv_ = Create_N_3DArray_v4sf(numLines);
|
||||||
|
ii_ = Create_N_3DArray_v4sf(numLines);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void Operator::Calc_ECOperatorPos(int n, unsigned int* pos)
|
inline void Operator::Calc_ECOperatorPos(int n, unsigned int* pos)
|
||||||
|
@ -524,9 +532,6 @@ inline void Operator::Calc_ECOperatorPos(int n, unsigned int* pos)
|
||||||
vv[n][pos[0]][pos[1]][pos[2]] = (1-dT*EC_G[n][i]/2/EC_C[n][i])/(1+dT*EC_G[n][i]/2/EC_C[n][i]);
|
vv[n][pos[0]][pos[1]][pos[2]] = (1-dT*EC_G[n][i]/2/EC_C[n][i])/(1+dT*EC_G[n][i]/2/EC_C[n][i]);
|
||||||
vi[n][pos[0]][pos[1]][pos[2]] = (dT/EC_C[n][i])/(1+dT*EC_G[n][i]/2/EC_C[n][i]);
|
vi[n][pos[0]][pos[1]][pos[2]] = (dT/EC_C[n][i])/(1+dT*EC_G[n][i]/2/EC_C[n][i]);
|
||||||
|
|
||||||
vv_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = vv[n][pos[0]][pos[1]][pos[2]];
|
|
||||||
vi_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = vi[n][pos[0]][pos[1]][pos[2]];
|
|
||||||
|
|
||||||
ii[n][pos[0]][pos[1]][pos[2]] = (1-dT*EC_R[n][i]/2/EC_L[n][i])/(1+dT*EC_R[n][i]/2/EC_L[n][i]);
|
ii[n][pos[0]][pos[1]][pos[2]] = (1-dT*EC_R[n][i]/2/EC_L[n][i])/(1+dT*EC_R[n][i]/2/EC_L[n][i]);
|
||||||
iv[n][pos[0]][pos[1]][pos[2]] = (dT/EC_L[n][i])/(1+dT*EC_R[n][i]/2/EC_L[n][i]);
|
iv[n][pos[0]][pos[1]][pos[2]] = (dT/EC_L[n][i])/(1+dT*EC_R[n][i]/2/EC_L[n][i]);
|
||||||
}
|
}
|
||||||
|
@ -572,6 +577,27 @@ int Operator::CalcECOperator()
|
||||||
if (CalcEFieldExcitation()==false) return -1;
|
if (CalcEFieldExcitation()==false) return -1;
|
||||||
CalcPEC();
|
CalcPEC();
|
||||||
|
|
||||||
|
|
||||||
|
// copy operator to aligned memory (only for sse engine)
|
||||||
|
// FIXME this is really inefficient!
|
||||||
|
for (int n=0;n<3;++n)
|
||||||
|
{
|
||||||
|
for (pos[0]=0;pos[0]<numLines[0];++pos[0])
|
||||||
|
{
|
||||||
|
for (pos[1]=0;pos[1]<numLines[1];++pos[1])
|
||||||
|
{
|
||||||
|
for (pos[2]=0;pos[2]<numLines[2];++pos[2])
|
||||||
|
{
|
||||||
|
vv_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = vv[n][pos[0]][pos[1]][pos[2]];
|
||||||
|
vi_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = vi[n][pos[0]][pos[1]][pos[2]];
|
||||||
|
iv_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = iv[n][pos[0]][pos[1]][pos[2]];
|
||||||
|
ii_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = ii[n][pos[0]][pos[1]][pos[2]];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -147,6 +147,8 @@ public:
|
||||||
FDTD_FLOAT**** iv; //calc new current from old voltage
|
FDTD_FLOAT**** iv; //calc new current from old voltage
|
||||||
f4vector**** vv_; //calc new voltage from old voltage
|
f4vector**** vv_; //calc new voltage from old voltage
|
||||||
f4vector**** vi_; //calc new voltage from old current
|
f4vector**** vi_; //calc new voltage from old current
|
||||||
|
f4vector**** iv_; //calc new current from old current
|
||||||
|
f4vector**** ii_; //calc new current from old voltage
|
||||||
|
|
||||||
//Excitation time-signal
|
//Excitation time-signal
|
||||||
unsigned int ExciteLength;
|
unsigned int ExciteLength;
|
||||||
|
|
|
@ -294,6 +294,9 @@ int openEMS::SetupFDTD(const char* file)
|
||||||
case EngineType_Multithreaded:
|
case EngineType_Multithreaded:
|
||||||
FDTD_Eng = Engine_Multithread::New(FDTD_Op,m_engine_numThreads);
|
FDTD_Eng = Engine_Multithread::New(FDTD_Op,m_engine_numThreads);
|
||||||
break;
|
break;
|
||||||
|
case EngineType_SSE:
|
||||||
|
FDTD_Eng = Engine_sse::New(FDTD_Op);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
FDTD_Eng = Engine::New(FDTD_Op);
|
FDTD_Eng = Engine::New(FDTD_Op);
|
||||||
break;
|
break;
|
||||||
|
|
Loading…
Reference in New Issue