sse: removed more unnecessary methods & new operator access & names that make more sense
parent
2a3e8dd20f
commit
d26986fe0d
|
@ -43,18 +43,18 @@ Engine_sse::~Engine_sse()
|
|||
void Engine_sse::Init()
|
||||
{
|
||||
numTS = 0;
|
||||
volt_ = Create_N_3DArray_v4sf(numLines);
|
||||
curr_ = Create_N_3DArray_v4sf(numLines);
|
||||
f4_volt = Create_N_3DArray_v4sf(numLines);
|
||||
f4_curr = Create_N_3DArray_v4sf(numLines);
|
||||
volt = 0; // not used
|
||||
curr = 0; // not used
|
||||
}
|
||||
|
||||
void Engine_sse::Reset()
|
||||
{
|
||||
Delete_N_3DArray_v4sf(volt_,numLines);
|
||||
volt_ = 0;
|
||||
Delete_N_3DArray_v4sf(curr_,numLines);
|
||||
curr_ = 0;
|
||||
Delete_N_3DArray_v4sf(f4_volt,numLines);
|
||||
f4_volt = 0;
|
||||
Delete_N_3DArray_v4sf(f4_curr,numLines);
|
||||
f4_curr = 0;
|
||||
}
|
||||
|
||||
void Engine_sse::UpdateVoltages()
|
||||
|
@ -72,24 +72,24 @@ void Engine_sse::UpdateVoltages()
|
|||
for (pos[2]=0;pos[2]<ceil(numLines[2]/4);++pos[2])
|
||||
{
|
||||
// x-polarization
|
||||
temp.f[0] = curr_[1][pos[0]][pos[1]][pos[2]-(bool)pos[2]].f[3];
|
||||
temp.f[1] = curr_[1][pos[0]][pos[1]][pos[2]].f[0];
|
||||
temp.f[2] = curr_[1][pos[0]][pos[1]][pos[2]].f[1];
|
||||
temp.f[3] = curr_[1][pos[0]][pos[1]][pos[2]].f[2];
|
||||
volt_[0][pos[0]][pos[1]][pos[2]].v *= Op->vv_[0][pos[0]][pos[1]][pos[2]].v;
|
||||
volt_[0][pos[0]][pos[1]][pos[2]].v += Op->vi_[0][pos[0]][pos[1]][pos[2]].v * ( curr_[2][pos[0]][pos[1]][pos[2]].v - curr_[2][pos[0]][pos[1]-shift[1]][pos[2]].v - curr_[1][pos[0]][pos[1]][pos[2]].v + temp.v );
|
||||
temp.f[0] = f4_curr[1][pos[0]][pos[1]][pos[2]-(bool)pos[2]].f[3];
|
||||
temp.f[1] = f4_curr[1][pos[0]][pos[1]][pos[2]].f[0];
|
||||
temp.f[2] = f4_curr[1][pos[0]][pos[1]][pos[2]].f[1];
|
||||
temp.f[3] = f4_curr[1][pos[0]][pos[1]][pos[2]].f[2];
|
||||
f4_volt[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[0][pos[0]][pos[1]][pos[2]].v;
|
||||
f4_volt[0][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[0][pos[0]][pos[1]][pos[2]].v * ( f4_curr[2][pos[0]][pos[1]][pos[2]].v - f4_curr[2][pos[0]][pos[1]-shift[1]][pos[2]].v - f4_curr[1][pos[0]][pos[1]][pos[2]].v + temp.v );
|
||||
|
||||
// y-polarization
|
||||
temp.f[0] = curr_[0][pos[0]][pos[1]][pos[2]-(bool)pos[2]].f[3];
|
||||
temp.f[1] = curr_[0][pos[0]][pos[1]][pos[2]].f[0];
|
||||
temp.f[2] = curr_[0][pos[0]][pos[1]][pos[2]].f[1];
|
||||
temp.f[3] = curr_[0][pos[0]][pos[1]][pos[2]].f[2];
|
||||
volt_[1][pos[0]][pos[1]][pos[2]].v *= Op->vv_[1][pos[0]][pos[1]][pos[2]].v;
|
||||
volt_[1][pos[0]][pos[1]][pos[2]].v += Op->vi_[1][pos[0]][pos[1]][pos[2]].v * ( curr_[0][pos[0]][pos[1]][pos[2]].v - temp.v - curr_[2][pos[0]][pos[1]][pos[2]].v + curr_[2][pos[0]-shift[0]][pos[1]][pos[2]].v);
|
||||
temp.f[0] = f4_curr[0][pos[0]][pos[1]][pos[2]-(bool)pos[2]].f[3];
|
||||
temp.f[1] = f4_curr[0][pos[0]][pos[1]][pos[2]].f[0];
|
||||
temp.f[2] = f4_curr[0][pos[0]][pos[1]][pos[2]].f[1];
|
||||
temp.f[3] = f4_curr[0][pos[0]][pos[1]][pos[2]].f[2];
|
||||
f4_volt[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[1][pos[0]][pos[1]][pos[2]].v;
|
||||
f4_volt[1][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[1][pos[0]][pos[1]][pos[2]].v * ( f4_curr[0][pos[0]][pos[1]][pos[2]].v - temp.v - f4_curr[2][pos[0]][pos[1]][pos[2]].v + f4_curr[2][pos[0]-shift[0]][pos[1]][pos[2]].v);
|
||||
|
||||
// z-polarization
|
||||
volt_[2][pos[0]][pos[1]][pos[2]].v *= Op->vv_[2][pos[0]][pos[1]][pos[2]].v;
|
||||
volt_[2][pos[0]][pos[1]][pos[2]].v += Op->vi_[2][pos[0]][pos[1]][pos[2]].v * ( curr_[1][pos[0]][pos[1]][pos[2]].v - curr_[1][pos[0]-shift[0]][pos[1]][pos[2]].v - curr_[0][pos[0]][pos[1]][pos[2]].v + curr_[0][pos[0]][pos[1]-shift[1]][pos[2]].v);
|
||||
f4_volt[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_vv[2][pos[0]][pos[1]][pos[2]].v;
|
||||
f4_volt[2][pos[0]][pos[1]][pos[2]].v += Op->f4_vi[2][pos[0]][pos[1]][pos[2]].v * ( f4_curr[1][pos[0]][pos[1]][pos[2]].v - f4_curr[1][pos[0]-shift[0]][pos[1]][pos[2]].v - f4_curr[0][pos[0]][pos[1]][pos[2]].v + f4_curr[0][pos[0]][pos[1]-shift[1]][pos[2]].v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -107,24 +107,24 @@ void Engine_sse::UpdateCurrents()
|
|||
for (pos[2]=0;pos[2]<ceil(numLines[2]/4);++pos[2]) // FIXME is this correct?
|
||||
{
|
||||
// x-pol
|
||||
temp.f[0] = volt_[1][pos[0]][pos[1]][pos[2]].f[1];
|
||||
temp.f[1] = volt_[1][pos[0]][pos[1]][pos[2]].f[2];
|
||||
temp.f[2] = volt_[1][pos[0]][pos[1]][pos[2]].f[3];
|
||||
temp.f[3] = volt_[1][pos[0]][pos[1]][pos[2]+1].f[0]; // FIXME outside sim area
|
||||
curr_[0][pos[0]][pos[1]][pos[2]].v *= Op->ii_[0][pos[0]][pos[1]][pos[2]].v;
|
||||
curr_[0][pos[0]][pos[1]][pos[2]].v += Op->iv_[0][pos[0]][pos[1]][pos[2]].v * ( volt_[2][pos[0]][pos[1]][pos[2]].v - volt_[2][pos[0]][pos[1]+1][pos[2]].v - volt_[1][pos[0]][pos[1]][pos[2]].v + temp.v);
|
||||
temp.f[0] = f4_volt[1][pos[0]][pos[1]][pos[2]].f[1];
|
||||
temp.f[1] = f4_volt[1][pos[0]][pos[1]][pos[2]].f[2];
|
||||
temp.f[2] = f4_volt[1][pos[0]][pos[1]][pos[2]].f[3];
|
||||
temp.f[3] = f4_volt[1][pos[0]][pos[1]][pos[2]+1].f[0]; // FIXME outside sim area
|
||||
f4_curr[0][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[0][pos[0]][pos[1]][pos[2]].v;
|
||||
f4_curr[0][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[0][pos[0]][pos[1]][pos[2]].v * ( f4_volt[2][pos[0]][pos[1]][pos[2]].v - f4_volt[2][pos[0]][pos[1]+1][pos[2]].v - f4_volt[1][pos[0]][pos[1]][pos[2]].v + temp.v);
|
||||
|
||||
// y-pol
|
||||
temp.f[0] = volt_[0][pos[0]][pos[1]][pos[2]].f[1];
|
||||
temp.f[1] = volt_[0][pos[0]][pos[1]][pos[2]].f[2];
|
||||
temp.f[2] = volt_[0][pos[0]][pos[1]][pos[2]].f[3];
|
||||
temp.f[3] = volt_[0][pos[0]][pos[1]][pos[2]+1].f[0]; // FIXME outside sim area
|
||||
curr_[1][pos[0]][pos[1]][pos[2]].v *= Op->ii_[1][pos[0]][pos[1]][pos[2]].v;
|
||||
curr_[1][pos[0]][pos[1]][pos[2]].v += Op->iv_[1][pos[0]][pos[1]][pos[2]].v * ( volt_[0][pos[0]][pos[1]][pos[2]].v - temp.v - volt_[2][pos[0]][pos[1]][pos[2]].v + volt_[2][pos[0]+1][pos[1]][pos[2]].v);
|
||||
temp.f[0] = f4_volt[0][pos[0]][pos[1]][pos[2]].f[1];
|
||||
temp.f[1] = f4_volt[0][pos[0]][pos[1]][pos[2]].f[2];
|
||||
temp.f[2] = f4_volt[0][pos[0]][pos[1]][pos[2]].f[3];
|
||||
temp.f[3] = f4_volt[0][pos[0]][pos[1]][pos[2]+1].f[0]; // FIXME outside sim area
|
||||
f4_curr[1][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[1][pos[0]][pos[1]][pos[2]].v;
|
||||
f4_curr[1][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[1][pos[0]][pos[1]][pos[2]].v * ( f4_volt[0][pos[0]][pos[1]][pos[2]].v - temp.v - f4_volt[2][pos[0]][pos[1]][pos[2]].v + f4_volt[2][pos[0]+1][pos[1]][pos[2]].v);
|
||||
|
||||
// z-pol
|
||||
curr_[2][pos[0]][pos[1]][pos[2]].v *= Op->ii_[2][pos[0]][pos[1]][pos[2]].v;
|
||||
curr_[2][pos[0]][pos[1]][pos[2]].v += Op->iv_[2][pos[0]][pos[1]][pos[2]].v * ( volt_[1][pos[0]][pos[1]][pos[2]].v - volt_[1][pos[0]+1][pos[1]][pos[2]].v - volt_[0][pos[0]][pos[1]][pos[2]].v + volt_[0][pos[0]][pos[1]+1][pos[2]].v);
|
||||
f4_curr[2][pos[0]][pos[1]][pos[2]].v *= Op->f4_ii[2][pos[0]][pos[1]][pos[2]].v;
|
||||
f4_curr[2][pos[0]][pos[1]][pos[2]].v += Op->f4_iv[2][pos[0]][pos[1]][pos[2]].v * ( f4_volt[1][pos[0]][pos[1]][pos[2]].v - f4_volt[1][pos[0]+1][pos[1]][pos[2]].v - f4_volt[0][pos[0]][pos[1]][pos[2]].v + f4_volt[0][pos[0]][pos[1]+1][pos[2]].v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,8 +32,8 @@ public:
|
|||
|
||||
virtual unsigned int GetNumberOfTimesteps() {return numTS;};
|
||||
|
||||
inline virtual FDTD_FLOAT& GetVolt( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return volt_[n][x][y][z/4].f[z%4]; }
|
||||
inline virtual FDTD_FLOAT& GetCurr( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return curr_[n][x][y][z/4].f[z%4]; }
|
||||
inline virtual FDTD_FLOAT& GetVolt( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return f4_volt[n][x][y][z/4].f[z%4]; }
|
||||
inline virtual FDTD_FLOAT& GetCurr( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return f4_curr[n][x][y][z/4].f[z%4]; }
|
||||
|
||||
protected:
|
||||
Engine_sse(const Operator_sse* op);
|
||||
|
@ -42,8 +42,8 @@ protected:
|
|||
virtual void UpdateVoltages();
|
||||
virtual void UpdateCurrents();
|
||||
|
||||
f4vector**** volt_;
|
||||
f4vector**** curr_;
|
||||
f4vector**** f4_volt;
|
||||
f4vector**** f4_curr;
|
||||
};
|
||||
|
||||
#endif // ENGINE_SSE_H
|
||||
|
|
|
@ -37,83 +37,31 @@ Operator_sse::~Operator_sse()
|
|||
void Operator_sse::Init()
|
||||
{
|
||||
Operator::Init();
|
||||
vv_ = 0;
|
||||
vi_ = 0;
|
||||
iv_ = 0;
|
||||
ii_ = 0;
|
||||
f4_vv = 0;
|
||||
f4_vi = 0;
|
||||
f4_iv = 0;
|
||||
f4_ii = 0;
|
||||
}
|
||||
|
||||
void Operator_sse::Reset()
|
||||
{
|
||||
Delete_N_3DArray_v4sf(vv_,numLines);
|
||||
Delete_N_3DArray_v4sf(vi_,numLines);
|
||||
Delete_N_3DArray_v4sf(iv_,numLines);
|
||||
Delete_N_3DArray_v4sf(ii_,numLines);
|
||||
Delete_N_3DArray_v4sf(f4_vv,numLines);
|
||||
Delete_N_3DArray_v4sf(f4_vi,numLines);
|
||||
Delete_N_3DArray_v4sf(f4_iv,numLines);
|
||||
Delete_N_3DArray_v4sf(f4_ii,numLines);
|
||||
Operator::Reset();
|
||||
Init(); // FIXME this calls Operator::Init() twice...
|
||||
// Init(); // FIXME this calls Operator::Init() twice...
|
||||
}
|
||||
|
||||
void Operator_sse::InitOperator()
|
||||
{
|
||||
Operator::InitOperator();
|
||||
Delete_N_3DArray_v4sf(vv_,numLines);
|
||||
Delete_N_3DArray_v4sf(vi_,numLines);
|
||||
Delete_N_3DArray_v4sf(iv_,numLines);
|
||||
Delete_N_3DArray_v4sf(ii_,numLines);
|
||||
vv_ = Create_N_3DArray_v4sf(numLines);
|
||||
vi_ = Create_N_3DArray_v4sf(numLines);
|
||||
iv_ = Create_N_3DArray_v4sf(numLines);
|
||||
ii_ = Create_N_3DArray_v4sf(numLines);
|
||||
Delete_N_3DArray_v4sf(f4_vv,numLines);
|
||||
Delete_N_3DArray_v4sf(f4_vi,numLines);
|
||||
Delete_N_3DArray_v4sf(f4_iv,numLines);
|
||||
Delete_N_3DArray_v4sf(f4_ii,numLines);
|
||||
f4_vv = Create_N_3DArray_v4sf(numLines);
|
||||
f4_vi = Create_N_3DArray_v4sf(numLines);
|
||||
f4_iv = Create_N_3DArray_v4sf(numLines);
|
||||
f4_ii = Create_N_3DArray_v4sf(numLines);
|
||||
}
|
||||
|
||||
int Operator_sse::CalcECOperator()
|
||||
{
|
||||
Operator::CalcECOperator();
|
||||
|
||||
// copy operator to aligned memory
|
||||
// FIXME this is really inefficient!
|
||||
unsigned int pos[3];
|
||||
for (int n=0;n<3;++n)
|
||||
{
|
||||
for (pos[0]=0;pos[0]<numLines[0];++pos[0])
|
||||
{
|
||||
for (pos[1]=0;pos[1]<numLines[1];++pos[1])
|
||||
{
|
||||
for (pos[2]=0;pos[2]<numLines[2];++pos[2])
|
||||
{
|
||||
vv_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = vv[n][pos[0]][pos[1]][pos[2]];
|
||||
vi_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = vi[n][pos[0]][pos[1]][pos[2]];
|
||||
iv_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = iv[n][pos[0]][pos[1]][pos[2]];
|
||||
ii_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = ii[n][pos[0]][pos[1]][pos[2]];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Operator_sse::ApplyMagneticBC(bool* dirs)
|
||||
{
|
||||
Operator::ApplyMagneticBC(dirs);
|
||||
|
||||
// copy operator to aligned memory
|
||||
// FIXME this is really inefficient!
|
||||
unsigned int pos[3];
|
||||
for (int n=0;n<3;++n)
|
||||
{
|
||||
for (pos[0]=0;pos[0]<numLines[0];++pos[0])
|
||||
{
|
||||
for (pos[1]=0;pos[1]<numLines[1];++pos[1])
|
||||
{
|
||||
for (pos[2]=0;pos[2]<numLines[2];++pos[2])
|
||||
{
|
||||
vv_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = vv[n][pos[0]][pos[1]][pos[2]];
|
||||
vi_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = vi[n][pos[0]][pos[1]][pos[2]];
|
||||
iv_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = iv[n][pos[0]][pos[1]][pos[2]];
|
||||
ii_[n][pos[0]][pos[1]][pos[2]/4].f[pos[2]%4] = ii[n][pos[0]][pos[1]][pos[2]];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,9 +28,11 @@ public:
|
|||
static Operator_sse* New();
|
||||
virtual ~Operator_sse();
|
||||
|
||||
virtual int CalcECOperator();
|
||||
inline virtual FDTD_FLOAT& GetVV( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return f4_vv[n][x][y][z/4].f[z%4]; }
|
||||
inline virtual FDTD_FLOAT& GetVI( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return f4_vi[n][x][y][z/4].f[z%4]; }
|
||||
|
||||
virtual void ApplyMagneticBC(bool* dirs);
|
||||
inline virtual FDTD_FLOAT& GetII( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return f4_ii[n][x][y][z/4].f[z%4]; }
|
||||
inline virtual FDTD_FLOAT& GetIV( unsigned int n, unsigned int x, unsigned int y, unsigned int z ) const { return f4_iv[n][x][y][z/4].f[z%4]; }
|
||||
|
||||
protected:
|
||||
//! use New() for creating a new Operator
|
||||
|
@ -42,10 +44,10 @@ protected:
|
|||
|
||||
// engine/post-proc needs access
|
||||
public:
|
||||
f4vector**** vv_; //calc new voltage from old voltage
|
||||
f4vector**** vi_; //calc new voltage from old current
|
||||
f4vector**** iv_; //calc new current from old current
|
||||
f4vector**** ii_; //calc new current from old voltage
|
||||
f4vector**** f4_vv; //calc new voltage from old voltage
|
||||
f4vector**** f4_vi; //calc new voltage from old current
|
||||
f4vector**** f4_iv; //calc new current from old current
|
||||
f4vector**** f4_ii; //calc new current from old voltage
|
||||
};
|
||||
|
||||
#endif // OPERATOR_SSE_H
|
||||
|
|
Loading…
Reference in New Issue