2010-05-19 09:41:35 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2010 Thorsten Liebig (Thorsten.Liebig@gmx.de)
|
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "operator_sse_compressed.h"
|
|
|
|
#include "engine_sse_compressed.h"
|
|
|
|
#include "engine_sse.h"
|
|
|
|
#include "tools/array_ops.h"
|
|
|
|
|
2010-08-26 12:50:23 +00:00
|
|
|
#include <map>
|
|
|
|
#include <cstring>
|
2010-05-19 09:41:35 +00:00
|
|
|
|
|
|
|
Operator_SSE_Compressed* Operator_SSE_Compressed::New()
|
|
|
|
{
|
2010-05-19 19:25:15 +00:00
|
|
|
cout << "Create FDTD operator (compressed SSE)" << endl;
|
2010-05-19 09:41:35 +00:00
|
|
|
Operator_SSE_Compressed* op = new Operator_SSE_Compressed();
|
|
|
|
op->Init();
|
|
|
|
return op;
|
|
|
|
}
|
|
|
|
|
|
|
|
Operator_SSE_Compressed::Operator_SSE_Compressed() : Operator_sse()
|
|
|
|
{
|
|
|
|
m_Op_index = NULL;
|
|
|
|
m_Use_Compression = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
Operator_SSE_Compressed::~Operator_SSE_Compressed()
|
|
|
|
{
|
|
|
|
Reset();
|
|
|
|
}
|
|
|
|
|
|
|
|
Engine* Operator_SSE_Compressed::CreateEngine() const
|
|
|
|
{
|
2010-08-26 13:47:04 +00:00
|
|
|
if (!m_Use_Compression)
|
2010-05-19 09:41:35 +00:00
|
|
|
{
|
2010-07-16 08:41:12 +00:00
|
|
|
//! create a default sse-engine
|
2010-05-19 09:41:35 +00:00
|
|
|
Engine_sse* eng = Engine_sse::New(this);
|
|
|
|
return eng;
|
|
|
|
}
|
|
|
|
Engine_SSE_Compressed* eng = Engine_SSE_Compressed::New(this);
|
|
|
|
return eng;
|
|
|
|
}
|
|
|
|
|
2010-10-27 09:17:58 +00:00
|
|
|
int Operator_SSE_Compressed::CalcECOperator( DebugFlags debugFlags )
|
2010-05-19 09:41:35 +00:00
|
|
|
{
|
2010-10-27 09:17:58 +00:00
|
|
|
Operator_sse::CalcECOperator( debugFlags );
|
2010-10-26 16:47:21 +00:00
|
|
|
m_Use_Compression = false;
|
2010-05-19 09:41:35 +00:00
|
|
|
m_Use_Compression = CompressOperator();
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Operator_SSE_Compressed::Init()
|
|
|
|
{
|
|
|
|
Operator_sse::Init();
|
|
|
|
m_Op_index = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Operator_SSE_Compressed::Reset()
|
|
|
|
{
|
|
|
|
Operator_sse::Reset();
|
|
|
|
|
|
|
|
if (m_Op_index)
|
|
|
|
{
|
2010-08-26 13:54:19 +00:00
|
|
|
Delete3DArray<unsigned int>( m_Op_index, numLines );
|
|
|
|
m_Op_index = 0;
|
2010-05-19 09:41:35 +00:00
|
|
|
}
|
|
|
|
|
2010-10-26 16:47:21 +00:00
|
|
|
m_Use_Compression = false;
|
2010-08-26 13:47:04 +00:00
|
|
|
for (int n=0; n<3; n++)
|
|
|
|
{
|
|
|
|
f4_vv_Compressed[n].clear();
|
|
|
|
f4_vi_Compressed[n].clear();
|
|
|
|
f4_iv_Compressed[n].clear();
|
|
|
|
f4_ii_Compressed[n].clear();
|
|
|
|
}
|
2010-05-19 09:41:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void Operator_SSE_Compressed::InitOperator()
|
|
|
|
{
|
2010-10-26 16:47:21 +00:00
|
|
|
//cleanup compression
|
|
|
|
m_Use_Compression = false;
|
|
|
|
for (int n=0; n<3; n++)
|
|
|
|
{
|
|
|
|
f4_vv_Compressed[n].clear();
|
|
|
|
f4_vi_Compressed[n].clear();
|
|
|
|
f4_iv_Compressed[n].clear();
|
|
|
|
f4_ii_Compressed[n].clear();
|
|
|
|
}
|
|
|
|
|
2010-05-19 09:41:35 +00:00
|
|
|
Operator_sse::InitOperator();
|
|
|
|
|
2010-08-26 13:54:19 +00:00
|
|
|
m_Op_index = Create3DArray<unsigned int>( numLines );
|
2010-05-19 09:41:35 +00:00
|
|
|
}
|
|
|
|
|
2010-05-19 09:42:56 +00:00
|
|
|
void Operator_SSE_Compressed::ShowStat() const
|
|
|
|
{
|
|
|
|
Operator_sse::ShowStat();
|
|
|
|
|
2010-08-26 13:47:04 +00:00
|
|
|
cout << "SSE compression enabled\t: " << (m_Use_Compression?"yes":"no") << endl;
|
|
|
|
cout << "Unique SSE operators\t: " << f4_vv_Compressed->size() << endl;
|
2010-05-19 09:42:56 +00:00
|
|
|
cout << "-----------------------------------" << endl;
|
|
|
|
}
|
|
|
|
|
2010-06-25 07:38:49 +00:00
|
|
|
// see http://www.informit.com/articles/article.aspx?p=710752&seqNum=6
|
|
|
|
#define INLINE inline extern __attribute__((always_inline))
|
|
|
|
INLINE int equal(f4vector v1, f4vector v2)
|
|
|
|
{
|
|
|
|
#if defined(__SSE__)
|
2010-12-06 12:04:37 +00:00
|
|
|
#if (__GNUC__ == 4) && (__GNUC_MINOR__ < 4)
|
|
|
|
v4si compare = __builtin_ia32_cmpeqps( v1.v, v2.v );
|
|
|
|
return __builtin_ia32_movmskps( (v4sf)compare ) == 0x0f;
|
|
|
|
#else
|
|
|
|
v4sf compare = __builtin_ia32_cmpeqps( v1.v, v2.v );
|
|
|
|
return __builtin_ia32_movmskps( compare ) == 0x0f;
|
|
|
|
#endif
|
2010-06-25 07:38:49 +00:00
|
|
|
#else
|
2010-08-26 13:47:04 +00:00
|
|
|
return (
|
|
|
|
v1.f[0] == v2.f[0] &&
|
|
|
|
v1.f[1] == v2.f[1] &&
|
|
|
|
v1.f[2] == v2.f[2] &&
|
|
|
|
v1.f[3] == v2.f[3]
|
|
|
|
);
|
2010-06-25 07:38:49 +00:00
|
|
|
#endif
|
|
|
|
}
|
2010-05-19 09:42:56 +00:00
|
|
|
|
2010-05-19 09:41:35 +00:00
|
|
|
bool Operator_SSE_Compressed::CompareOperators(unsigned int pos1[3], unsigned int pos2[3])
|
|
|
|
{
|
|
|
|
// cerr << pos1[0] << " " << pos1[1] << " " << pos1[2] << endl;
|
2010-08-26 13:47:04 +00:00
|
|
|
for (int n=0; n<3; ++n)
|
2010-05-19 09:41:35 +00:00
|
|
|
{
|
2010-06-25 07:38:49 +00:00
|
|
|
if (!equal( f4_vv[n][pos1[0]][pos1[1]][pos1[2]], f4_vv[n][pos2[0]][pos2[1]][pos2[2]] )) return false;
|
|
|
|
if (!equal( f4_vi[n][pos1[0]][pos1[1]][pos1[2]], f4_vi[n][pos2[0]][pos2[1]][pos2[2]] )) return false;
|
|
|
|
if (!equal( f4_iv[n][pos1[0]][pos1[1]][pos1[2]], f4_iv[n][pos2[0]][pos2[1]][pos2[2]] )) return false;
|
|
|
|
if (!equal( f4_ii[n][pos1[0]][pos1[1]][pos1[2]], f4_ii[n][pos2[0]][pos2[1]][pos2[2]] )) return false;
|
2010-05-19 09:41:35 +00:00
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Operator_SSE_Compressed::CompressOperator()
|
|
|
|
{
|
2010-06-07 21:08:38 +00:00
|
|
|
cout << "Compressing the FDTD operator... this may take a while..." << endl;
|
2010-05-19 09:41:35 +00:00
|
|
|
|
2010-08-26 13:47:04 +00:00
|
|
|
map<SSE_coeff,unsigned int> lookUpMap;
|
2010-05-19 09:41:35 +00:00
|
|
|
|
|
|
|
unsigned int pos[3];
|
2010-08-26 13:47:04 +00:00
|
|
|
for (pos[0]=0; pos[0]<numLines[0]; ++pos[0])
|
2010-05-19 09:41:35 +00:00
|
|
|
{
|
2010-08-26 13:47:04 +00:00
|
|
|
for (pos[1]=0; pos[1]<numLines[1]; ++pos[1])
|
2010-05-19 09:41:35 +00:00
|
|
|
{
|
2010-08-26 13:47:04 +00:00
|
|
|
for (pos[2]=0; pos[2]<numVectors; ++pos[2])
|
2010-05-19 09:41:35 +00:00
|
|
|
{
|
2010-08-26 13:47:04 +00:00
|
|
|
f4vector vv[3] = { f4_vv[0][pos[0]][pos[1]][pos[2]], f4_vv[1][pos[0]][pos[1]][pos[2]], f4_vv[2][pos[0]][pos[1]][pos[2]] };
|
|
|
|
f4vector vi[3] = { f4_vi[0][pos[0]][pos[1]][pos[2]], f4_vi[1][pos[0]][pos[1]][pos[2]], f4_vi[2][pos[0]][pos[1]][pos[2]] };
|
|
|
|
f4vector iv[3] = { f4_iv[0][pos[0]][pos[1]][pos[2]], f4_iv[1][pos[0]][pos[1]][pos[2]], f4_iv[2][pos[0]][pos[1]][pos[2]] };
|
|
|
|
f4vector ii[3] = { f4_ii[0][pos[0]][pos[1]][pos[2]], f4_ii[1][pos[0]][pos[1]][pos[2]], f4_ii[2][pos[0]][pos[1]][pos[2]] };
|
|
|
|
SSE_coeff c( vv, vi, iv, ii );
|
2010-08-26 12:50:23 +00:00
|
|
|
|
2010-08-26 13:47:04 +00:00
|
|
|
map<SSE_coeff,unsigned int>::iterator it;
|
|
|
|
it = lookUpMap.find(c);
|
|
|
|
if (it == lookUpMap.end())
|
|
|
|
{
|
|
|
|
// not found -> insert
|
|
|
|
unsigned int index = f4_vv_Compressed[0].size();
|
|
|
|
for (int n=0; n<3; n++)
|
|
|
|
{
|
|
|
|
f4_vv_Compressed[n].push_back( vv[n] );
|
|
|
|
f4_vi_Compressed[n].push_back( vi[n] );
|
|
|
|
f4_iv_Compressed[n].push_back( iv[n] );
|
|
|
|
f4_ii_Compressed[n].push_back( ii[n] );
|
|
|
|
}
|
|
|
|
lookUpMap[c] = index;
|
|
|
|
m_Op_index[pos[0]][pos[1]][pos[2]] = index;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// this operator is already in the list
|
|
|
|
unsigned int index = (*it).second;
|
|
|
|
m_Op_index[pos[0]][pos[1]][pos[2]] = index;
|
|
|
|
}
|
2010-05-19 09:41:35 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Delete_N_3DArray_v4sf(f4_vv,numLines);
|
|
|
|
Delete_N_3DArray_v4sf(f4_vi,numLines);
|
|
|
|
Delete_N_3DArray_v4sf(f4_iv,numLines);
|
|
|
|
Delete_N_3DArray_v4sf(f4_ii,numLines);
|
|
|
|
f4_vv = 0;
|
|
|
|
f4_vi = 0;
|
|
|
|
f4_iv = 0;
|
|
|
|
f4_ii = 0;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
2010-08-26 12:50:23 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
SSE_coeff::SSE_coeff( f4vector vv[3], f4vector vi[3], f4vector iv[3], f4vector ii[3] )
|
|
|
|
{
|
2010-08-26 13:47:04 +00:00
|
|
|
for (int n=0; n<3; n++)
|
|
|
|
{
|
|
|
|
m_vv[n] = vv[n];
|
|
|
|
m_vi[n] = vi[n];
|
|
|
|
m_iv[n] = iv[n];
|
|
|
|
m_ii[n] = ii[n];
|
|
|
|
}
|
2010-08-26 12:50:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool SSE_coeff::operator==( const SSE_coeff& other ) const
|
|
|
|
{
|
2010-08-26 13:47:04 +00:00
|
|
|
for (int n=0; n<3; n++)
|
|
|
|
{
|
|
|
|
if (memcmp( &(m_vv[n]), &(other.m_vv[n]), sizeof(f4vector) ) != 0) return false;
|
|
|
|
if (memcmp( &(m_vi[n]), &(other.m_vi[n]), sizeof(f4vector) ) != 0) return false;
|
|
|
|
if (memcmp( &(m_iv[n]), &(other.m_iv[n]), sizeof(f4vector) ) != 0) return false;
|
|
|
|
if (memcmp( &(m_ii[n]), &(other.m_ii[n]), sizeof(f4vector) ) != 0) return false;
|
|
|
|
}
|
|
|
|
return true;
|
2010-08-26 12:50:23 +00:00
|
|
|
}
|
|
|
|
bool SSE_coeff::operator!=( const SSE_coeff& other ) const
|
|
|
|
{
|
2010-08-26 13:47:04 +00:00
|
|
|
return !(*this == other);
|
2010-08-26 12:50:23 +00:00
|
|
|
}
|
|
|
|
bool SSE_coeff::operator<( const SSE_coeff& other ) const
|
|
|
|
{
|
2010-08-26 13:47:04 +00:00
|
|
|
for (int n=0; n<3; n++)
|
|
|
|
{
|
|
|
|
for (int c=0; c<4; c++)
|
|
|
|
{
|
|
|
|
if (m_vv[n].f[c] > other.m_vv[n].f[c]) return false;
|
|
|
|
if (m_vv[n].f[c] < other.m_vv[n].f[c]) return true;
|
|
|
|
if (m_vi[n].f[c] > other.m_vi[n].f[c]) return false;
|
|
|
|
if (m_vi[n].f[c] < other.m_vi[n].f[c]) return true;
|
|
|
|
if (m_iv[n].f[c] > other.m_iv[n].f[c]) return false;
|
|
|
|
if (m_iv[n].f[c] < other.m_iv[n].f[c]) return true;
|
|
|
|
if (m_ii[n].f[c] > other.m_ii[n].f[c]) return false;
|
|
|
|
if (m_ii[n].f[c] < other.m_ii[n].f[c]) return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
2010-08-26 12:50:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void SSE_coeff::print( ostream& stream ) const
|
|
|
|
{
|
2010-08-26 13:47:04 +00:00
|
|
|
stream << "SSE_coeff: (" << endl;
|
|
|
|
for (int n=0; n<3; n++)
|
|
|
|
{
|
|
|
|
stream << "n=" << n << ":" << endl;
|
|
|
|
stream << "vv=";
|
|
|
|
for (int c=0; c<4; c++)
|
|
|
|
stream << m_vv[n].f[c] << " ";
|
|
|
|
stream << endl << "vi=";
|
|
|
|
for (int c=0; c<4; c++)
|
|
|
|
stream << m_vi[n].f[c] << " ";
|
|
|
|
stream << endl << "iv=";
|
|
|
|
for (int c=0; c<4; c++)
|
|
|
|
stream << m_iv[n].f[c] << " ";
|
|
|
|
stream << endl << "ii=";
|
|
|
|
for (int c=0; c<4; c++)
|
|
|
|
stream << m_ii[n].f[c] << " ";
|
|
|
|
stream << endl;
|
|
|
|
}
|
|
|
|
stream << ")" << endl;
|
2010-08-26 12:50:23 +00:00
|
|
|
}
|