2010-05-19 09:41:35 +00:00
/*
* Copyright ( C ) 2010 Thorsten Liebig ( Thorsten . Liebig @ gmx . de )
*
* This program is free software : you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation , either version 3 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program . If not , see < http : //www.gnu.org/licenses/>.
*/
# include "operator_sse_compressed.h"
# include "engine_sse_compressed.h"
# include "engine_sse.h"
# include "tools/array_ops.h"
2010-08-26 12:50:23 +00:00
# include <map>
# include <cstring>
2010-05-19 09:41:35 +00:00
Operator_SSE_Compressed * Operator_SSE_Compressed : : New ( )
{
2010-05-19 19:25:15 +00:00
cout < < " Create FDTD operator (compressed SSE) " < < endl ;
2010-05-19 09:41:35 +00:00
Operator_SSE_Compressed * op = new Operator_SSE_Compressed ( ) ;
op - > Init ( ) ;
return op ;
}
Operator_SSE_Compressed : : Operator_SSE_Compressed ( ) : Operator_sse ( )
{
m_Op_index = NULL ;
m_Use_Compression = false ;
}
Operator_SSE_Compressed : : ~ Operator_SSE_Compressed ( )
{
Reset ( ) ;
}
Engine * Operator_SSE_Compressed : : CreateEngine ( ) const
{
2010-08-26 13:47:04 +00:00
if ( ! m_Use_Compression )
2010-05-19 09:41:35 +00:00
{
2010-07-16 08:41:12 +00:00
//! create a default sse-engine
2010-05-19 09:41:35 +00:00
Engine_sse * eng = Engine_sse : : New ( this ) ;
return eng ;
}
Engine_SSE_Compressed * eng = Engine_SSE_Compressed : : New ( this ) ;
return eng ;
}
2010-10-02 16:26:23 +00:00
void Operator_SSE_Compressed : : DumpOperator2File ( string filename )
{
if ( m_Use_Compression )
{
cerr < < " Operator_SSE_Compressed::DumpOperator2File: Warning: Operator dump not implemented for a compressed operator yet, try disabling operator compression. " < < endl ;
return ;
}
else
Operator_sse : : DumpOperator2File ( filename ) ;
}
2010-05-19 09:41:35 +00:00
int Operator_SSE_Compressed : : CalcECOperator ( )
{
Operator_sse : : CalcECOperator ( ) ;
m_Use_Compression = CompressOperator ( ) ;
return 0 ;
}
void Operator_SSE_Compressed : : Init ( )
{
Operator_sse : : Init ( ) ;
m_Op_index = NULL ;
}
void Operator_SSE_Compressed : : Reset ( )
{
Operator_sse : : Reset ( ) ;
if ( m_Op_index )
{
2010-08-26 13:54:19 +00:00
Delete3DArray < unsigned int > ( m_Op_index , numLines ) ;
m_Op_index = 0 ;
2010-05-19 09:41:35 +00:00
}
2010-08-26 13:47:04 +00:00
for ( int n = 0 ; n < 3 ; n + + )
{
f4_vv_Compressed [ n ] . clear ( ) ;
f4_vi_Compressed [ n ] . clear ( ) ;
f4_iv_Compressed [ n ] . clear ( ) ;
f4_ii_Compressed [ n ] . clear ( ) ;
}
2010-05-19 09:41:35 +00:00
}
void Operator_SSE_Compressed : : InitOperator ( )
{
Operator_sse : : InitOperator ( ) ;
2010-08-26 13:54:19 +00:00
m_Op_index = Create3DArray < unsigned int > ( numLines ) ;
2010-05-19 09:41:35 +00:00
}
2010-05-19 09:42:56 +00:00
void Operator_SSE_Compressed : : ShowStat ( ) const
{
Operator_sse : : ShowStat ( ) ;
2010-08-26 13:47:04 +00:00
cout < < " SSE compression enabled \t : " < < ( m_Use_Compression ? " yes " : " no " ) < < endl ;
cout < < " Unique SSE operators \t : " < < f4_vv_Compressed - > size ( ) < < endl ;
2010-05-19 09:42:56 +00:00
cout < < " ----------------------------------- " < < endl ;
}
2010-06-25 07:38:49 +00:00
// see http://www.informit.com/articles/article.aspx?p=710752&seqNum=6
# define INLINE inline extern __attribute__((always_inline))
INLINE int equal ( f4vector v1 , f4vector v2 )
{
# if defined(__SSE__)
2010-09-17 12:42:50 +00:00
# if (__GNUC__ == 4) && (__GNUC_MINOR__ < 4)
2010-09-17 09:07:52 +00:00
v4si compare = __builtin_ia32_cmpeqps ( v1 . v , v2 . v ) ;
return __builtin_ia32_movmskps ( ( v4sf ) compare ) = = 0x0f ;
# else
v4sf compare = __builtin_ia32_cmpeqps ( v1 . v , v2 . v ) ;
return __builtin_ia32_movmskps ( compare ) = = 0x0f ;
# endif
2010-06-25 07:38:49 +00:00
# else
2010-08-26 13:47:04 +00:00
return (
v1 . f [ 0 ] = = v2 . f [ 0 ] & &
v1 . f [ 1 ] = = v2 . f [ 1 ] & &
v1 . f [ 2 ] = = v2 . f [ 2 ] & &
v1 . f [ 3 ] = = v2 . f [ 3 ]
) ;
2010-06-25 07:38:49 +00:00
# endif
}
2010-05-19 09:42:56 +00:00
2010-05-19 09:41:35 +00:00
bool Operator_SSE_Compressed : : CompareOperators ( unsigned int pos1 [ 3 ] , unsigned int pos2 [ 3 ] )
{
// cerr << pos1[0] << " " << pos1[1] << " " << pos1[2] << endl;
2010-08-26 13:47:04 +00:00
for ( int n = 0 ; n < 3 ; + + n )
2010-05-19 09:41:35 +00:00
{
2010-06-25 07:38:49 +00:00
if ( ! equal ( f4_vv [ n ] [ pos1 [ 0 ] ] [ pos1 [ 1 ] ] [ pos1 [ 2 ] ] , f4_vv [ n ] [ pos2 [ 0 ] ] [ pos2 [ 1 ] ] [ pos2 [ 2 ] ] ) ) return false ;
if ( ! equal ( f4_vi [ n ] [ pos1 [ 0 ] ] [ pos1 [ 1 ] ] [ pos1 [ 2 ] ] , f4_vi [ n ] [ pos2 [ 0 ] ] [ pos2 [ 1 ] ] [ pos2 [ 2 ] ] ) ) return false ;
if ( ! equal ( f4_iv [ n ] [ pos1 [ 0 ] ] [ pos1 [ 1 ] ] [ pos1 [ 2 ] ] , f4_iv [ n ] [ pos2 [ 0 ] ] [ pos2 [ 1 ] ] [ pos2 [ 2 ] ] ) ) return false ;
if ( ! equal ( f4_ii [ n ] [ pos1 [ 0 ] ] [ pos1 [ 1 ] ] [ pos1 [ 2 ] ] , f4_ii [ n ] [ pos2 [ 0 ] ] [ pos2 [ 1 ] ] [ pos2 [ 2 ] ] ) ) return false ;
2010-05-19 09:41:35 +00:00
}
return true ;
}
bool Operator_SSE_Compressed : : CompressOperator ( )
{
2010-06-07 21:08:38 +00:00
cout < < " Compressing the FDTD operator... this may take a while... " < < endl ;
2010-05-19 09:41:35 +00:00
2010-08-26 13:47:04 +00:00
map < SSE_coeff , unsigned int > lookUpMap ;
2010-05-19 09:41:35 +00:00
unsigned int pos [ 3 ] ;
2010-08-26 13:47:04 +00:00
for ( pos [ 0 ] = 0 ; pos [ 0 ] < numLines [ 0 ] ; + + pos [ 0 ] )
2010-05-19 09:41:35 +00:00
{
2010-08-26 13:47:04 +00:00
for ( pos [ 1 ] = 0 ; pos [ 1 ] < numLines [ 1 ] ; + + pos [ 1 ] )
2010-05-19 09:41:35 +00:00
{
2010-08-26 13:47:04 +00:00
for ( pos [ 2 ] = 0 ; pos [ 2 ] < numVectors ; + + pos [ 2 ] )
2010-05-19 09:41:35 +00:00
{
2010-08-26 13:47:04 +00:00
f4vector vv [ 3 ] = { f4_vv [ 0 ] [ pos [ 0 ] ] [ pos [ 1 ] ] [ pos [ 2 ] ] , f4_vv [ 1 ] [ pos [ 0 ] ] [ pos [ 1 ] ] [ pos [ 2 ] ] , f4_vv [ 2 ] [ pos [ 0 ] ] [ pos [ 1 ] ] [ pos [ 2 ] ] } ;
f4vector vi [ 3 ] = { f4_vi [ 0 ] [ pos [ 0 ] ] [ pos [ 1 ] ] [ pos [ 2 ] ] , f4_vi [ 1 ] [ pos [ 0 ] ] [ pos [ 1 ] ] [ pos [ 2 ] ] , f4_vi [ 2 ] [ pos [ 0 ] ] [ pos [ 1 ] ] [ pos [ 2 ] ] } ;
f4vector iv [ 3 ] = { f4_iv [ 0 ] [ pos [ 0 ] ] [ pos [ 1 ] ] [ pos [ 2 ] ] , f4_iv [ 1 ] [ pos [ 0 ] ] [ pos [ 1 ] ] [ pos [ 2 ] ] , f4_iv [ 2 ] [ pos [ 0 ] ] [ pos [ 1 ] ] [ pos [ 2 ] ] } ;
f4vector ii [ 3 ] = { f4_ii [ 0 ] [ pos [ 0 ] ] [ pos [ 1 ] ] [ pos [ 2 ] ] , f4_ii [ 1 ] [ pos [ 0 ] ] [ pos [ 1 ] ] [ pos [ 2 ] ] , f4_ii [ 2 ] [ pos [ 0 ] ] [ pos [ 1 ] ] [ pos [ 2 ] ] } ;
SSE_coeff c ( vv , vi , iv , ii ) ;
2010-08-26 12:50:23 +00:00
2010-08-26 13:47:04 +00:00
map < SSE_coeff , unsigned int > : : iterator it ;
it = lookUpMap . find ( c ) ;
if ( it = = lookUpMap . end ( ) )
{
// not found -> insert
unsigned int index = f4_vv_Compressed [ 0 ] . size ( ) ;
for ( int n = 0 ; n < 3 ; n + + )
{
f4_vv_Compressed [ n ] . push_back ( vv [ n ] ) ;
f4_vi_Compressed [ n ] . push_back ( vi [ n ] ) ;
f4_iv_Compressed [ n ] . push_back ( iv [ n ] ) ;
f4_ii_Compressed [ n ] . push_back ( ii [ n ] ) ;
}
lookUpMap [ c ] = index ;
m_Op_index [ pos [ 0 ] ] [ pos [ 1 ] ] [ pos [ 2 ] ] = index ;
}
else
{
// this operator is already in the list
unsigned int index = ( * it ) . second ;
m_Op_index [ pos [ 0 ] ] [ pos [ 1 ] ] [ pos [ 2 ] ] = index ;
}
2010-05-19 09:41:35 +00:00
}
}
}
Delete_N_3DArray_v4sf ( f4_vv , numLines ) ;
Delete_N_3DArray_v4sf ( f4_vi , numLines ) ;
Delete_N_3DArray_v4sf ( f4_iv , numLines ) ;
Delete_N_3DArray_v4sf ( f4_ii , numLines ) ;
f4_vv = 0 ;
f4_vi = 0 ;
f4_iv = 0 ;
f4_ii = 0 ;
return true ;
}
2010-08-26 12:50:23 +00:00
// ----------------------------------------------------------------------------
SSE_coeff : : SSE_coeff ( f4vector vv [ 3 ] , f4vector vi [ 3 ] , f4vector iv [ 3 ] , f4vector ii [ 3 ] )
{
2010-08-26 13:47:04 +00:00
for ( int n = 0 ; n < 3 ; n + + )
{
m_vv [ n ] = vv [ n ] ;
m_vi [ n ] = vi [ n ] ;
m_iv [ n ] = iv [ n ] ;
m_ii [ n ] = ii [ n ] ;
}
2010-08-26 12:50:23 +00:00
}
bool SSE_coeff : : operator = = ( const SSE_coeff & other ) const
{
2010-08-26 13:47:04 +00:00
for ( int n = 0 ; n < 3 ; n + + )
{
if ( memcmp ( & ( m_vv [ n ] ) , & ( other . m_vv [ n ] ) , sizeof ( f4vector ) ) ! = 0 ) return false ;
if ( memcmp ( & ( m_vi [ n ] ) , & ( other . m_vi [ n ] ) , sizeof ( f4vector ) ) ! = 0 ) return false ;
if ( memcmp ( & ( m_iv [ n ] ) , & ( other . m_iv [ n ] ) , sizeof ( f4vector ) ) ! = 0 ) return false ;
if ( memcmp ( & ( m_ii [ n ] ) , & ( other . m_ii [ n ] ) , sizeof ( f4vector ) ) ! = 0 ) return false ;
}
return true ;
2010-08-26 12:50:23 +00:00
}
bool SSE_coeff : : operator ! = ( const SSE_coeff & other ) const
{
2010-08-26 13:47:04 +00:00
return ! ( * this = = other ) ;
2010-08-26 12:50:23 +00:00
}
bool SSE_coeff : : operator < ( const SSE_coeff & other ) const
{
2010-08-26 13:47:04 +00:00
for ( int n = 0 ; n < 3 ; n + + )
{
for ( int c = 0 ; c < 4 ; c + + )
{
if ( m_vv [ n ] . f [ c ] > other . m_vv [ n ] . f [ c ] ) return false ;
if ( m_vv [ n ] . f [ c ] < other . m_vv [ n ] . f [ c ] ) return true ;
if ( m_vi [ n ] . f [ c ] > other . m_vi [ n ] . f [ c ] ) return false ;
if ( m_vi [ n ] . f [ c ] < other . m_vi [ n ] . f [ c ] ) return true ;
if ( m_iv [ n ] . f [ c ] > other . m_iv [ n ] . f [ c ] ) return false ;
if ( m_iv [ n ] . f [ c ] < other . m_iv [ n ] . f [ c ] ) return true ;
if ( m_ii [ n ] . f [ c ] > other . m_ii [ n ] . f [ c ] ) return false ;
if ( m_ii [ n ] . f [ c ] < other . m_ii [ n ] . f [ c ] ) return true ;
}
}
return false ;
2010-08-26 12:50:23 +00:00
}
void SSE_coeff : : print ( ostream & stream ) const
{
2010-08-26 13:47:04 +00:00
stream < < " SSE_coeff: ( " < < endl ;
for ( int n = 0 ; n < 3 ; n + + )
{
stream < < " n= " < < n < < " : " < < endl ;
stream < < " vv= " ;
for ( int c = 0 ; c < 4 ; c + + )
stream < < m_vv [ n ] . f [ c ] < < " " ;
stream < < endl < < " vi= " ;
for ( int c = 0 ; c < 4 ; c + + )
stream < < m_vi [ n ] . f [ c ] < < " " ;
stream < < endl < < " iv= " ;
for ( int c = 0 ; c < 4 ; c + + )
stream < < m_iv [ n ] . f [ c ] < < " " ;
stream < < endl < < " ii= " ;
for ( int c = 0 ; c < 4 ; c + + )
stream < < m_ii [ n ] . f [ c ] < < " " ;
stream < < endl ;
}
stream < < " ) " < < endl ;
2010-08-26 12:50:23 +00:00
}