Adding code for non-uniform partitioned convolution

parent 5cd20b7c
......@@ -49,10 +49,55 @@ set( ITAConvolutionSources
)
if( ITA_CONVOLUTION_WITH_NUPCONV )
list( APPEND ITAConvolutionHeader "include/LLC.h" "include/LLCDefinitions.h" "include/LLCFilter.h" )
include( "src/LLC/_SourceFiles.cmake" )
list( APPEND ITAConvolutionHeader "include/ITANUPConvolution.h" "include/ITANUPFilter.h" )
set( ITANUPSources
"src/ITADirectConvolution.cpp"
"src/ITADirectConvolutionImpl.cpp"
"src/ITADirectConvolutionImpl.h"
"src/ITANUPCEventLog.cpp"
"src/ITANUPCEventLog.h"
"src/ITANUPCFade.cpp"
"src/ITANUPCFade.h"
"src/ITANUPCFilterSegmentation.cpp"
"src/ITANUPCFilterSegmentation.h"
"src/ITANUPCHelpers.cpp"
"src/ITANUPCHelpers.h"
"src/ITANUPCInputBuffer1.cpp"
"src/ITANUPCInputBuffer1.h"
"src/ITANUPCOutputBuffer.cpp"
"src/ITANUPCOutputBuffer.h"
"src/ITANUPCPerformanceProfile.cpp"
"src/ITANUPCPerformanceProfile.h"
"src/ITANUPCStage.cpp"
"src/ITANUPCStage.h"
"src/ITANUPCStageInfo.h"
"src/ITANUPCStageStatistic.h"
"src/ITANUPCTask.cpp"
"src/ITANUPCTask.h"
"src/ITANUPCTaskQueue.cpp"
"src/ITANUPCTaskQueue.h"
"src/ITANUPCUFilter.cpp"
"src/ITANUPCUFilter.h"
"src/ITANUPCUFilterPool.cpp"
"src/ITANUPCUFilterPool.h"
"src/ITANUPCUtils.cpp"
"src/ITANUPCUtils.h"
"src/ITANUPConvolution.cpp"
"src/ITANUPConvolutionImpl.cpp"
"src/ITANUPConvolutionImpl.h"
"src/ITANUPFilterComponent.h"
"src/ITANUPFilterImpl.h"
"src/ITANUPartitioningScheme.cpp"
"src/ITANUPartitioningScheme.h"
"src/ITAUPConvolution.cpp"
"src/ITAUPFilter.cpp"
"src/ITAUPFilterPool.cpp"
)
list( APPEND ITAConvolutionSources ${ITANUPSources} )
endif( )
# compiler settings
if( ITA_VISTA_BUILD_STATIC )
add_definitions( -DVISTABASE_STATIC -DVISTAMATH_STATIC -DVISTAASPECTS_STATIC -DVISTATOOLS_STATIC -DVISTAINTERPROCCOMM_STATIC )
......@@ -91,6 +136,20 @@ vista_create_default_info_file( ITAConvolution )
set_property( TARGET ITAConvolution PROPERTY FOLDER "ITACoreLibs" )
# benchmarks
if( ITA_CORE_LIBS_WITH_BENCHMARKS )
set( ITACONVOLUTION_COMMON_BUILD TRUE )
add_subdirectory( "${CMAKE_CURRENT_SOURCE_DIR}/benchmarks" )
endif( )
# profiler
if( ITA_CORE_LIBS_WITH_PROFILERS )
set( ITACONVOLUTION_COMMON_BUILD TRUE )
add_subdirectory( "${CMAKE_CURRENT_SOURCE_DIR}/profilers" )
endif( )
# apps
if( ITA_CORE_LIBS_WITH_APPS )
set( ITACONVOLUTION_COMMON_BUILD TRUE )
......
#include "ITANUPCFilterComponentBenchmark.h"
#include <cmath>
#include <ITAFastMath.h>
#include <ITANUPConvolution.h>
#include <stdio.h>
#include <cstdlib>
#include <time.h>
#include <vector>
#include <ITAHPT.h>
#include <ITAStopWatch.h>
using namespace std;
// Puffergren welche von Interesse sind (0 signalisiert das Ende der Liste}
unsigned int buffersizes[] = { 64, 128, 256, 512, 0 };
//unsigned int buffersizes[] = {256, 0};
// IR-Lngen in Sekunden welche von Interesse sind (0 signalisiert das Ende der Liste}
//double ir_durations[] = {1.0, 0};
//double ir_durations[] = {0.1, 0.25, 0.5, 1.0, 1.5, 2.0, 0};
double ir_durations[] = { 0.1, 0.25, 0.5, 1.0, 1.5, 2.0, 3.0, 5.0, 7.5, 10.0, 15.0, 20.0, 30.0, 0 };
// IR-Lngen fr segmentierten Test
double segload_ir_durations[] = { 0.5, 1.0, 1.5, 2.0, 3.0, 5.0, 7.5, 10.0, 15.0, 0 };
// Lnge des ersten Segments (Samples)
unsigned int first_part_length = 7000; // Samples
const double samplerate = 44100.0;
void benchmarkFilterComponentCreation()
{
vector<double> vIRDurations;
unsigned int i = 0;
while( ir_durations[ i ] != 0 ) vIRDurations.push_back( ir_durations[ i++ ] );
vector<unsigned int> vBuffersizes;
i = 0;
while( buffersizes[ i ] != 0 ) vBuffersizes.push_back( buffersizes[ i++ ] );
unsigned int n = 30;
ITAHPT_init();
unsigned int j;
unsigned int k; // Schleifenzhler
unsigned int b; // Puffergre
double d; // Dauer der Impulsantwort
ITAStopWatch sw;
vector< vector<double> > results;
// Zufallsgenerator initialisieren
srand( ( unsigned int ) time( NULL ) );
for( i = 0; i < vBuffersizes.size(); i++ )
{
b = vBuffersizes[ i ];
vector<double> v;
for( j = 0; j < vIRDurations.size(); j++ )
{
d = vIRDurations[ j ];
// Anzahl der Samples der IR bestimmen
unsigned int l = ( unsigned int ) ceil( d*samplerate );
// Falter erzeugen
ITANUPC::IConvolution* pConv = ITANUPC::IConvolution::create( samplerate, 1, b, l );
float* pfLeft = fm_falloc( l, false );
float* pfRight = fm_falloc( l, false );
// Speicher mit Zufallszahlen initialisieren
for( k = 0; k < l; k++ )
pfLeft[ k ] = pfRight[ k ] = ( float ) rand() / ( float ) RAND_MAX;
// Filterkomponente erzeugen
ITANUPC::CFilterComponent* pFC = pConv->createFilterComponent( 0, l, 0, 0, 0, 0 );
// Mess-Vorlauf zur Einspielung des Systems
for( k = 0; k < 3; k++ );
{
sw.start();
ITANUPC::CFilterComponent* pFC = pConv->createFilterComponent( 0, l, pfLeft, l, pfRight, l );
sw.stop();
pFC->destroy();
}
// Eigentliche Messschleife:
sw.reset();
for( k = 0; k < n; k++ )
{
sw.start();
ITANUPC::CFilterComponent* pFC = pConv->createFilterComponent( 0, l, pfLeft, l, pfRight, l );
sw.stop();
pFC->destroy();
}
// Speicher freigeben und Falter lschen
fm_free( pfLeft );
fm_free( pfRight );
delete pConv;
// Infos ausgeben
printf( "\nbs = %d, dur = %0.2f s = %d samples\nmin = %0.12f s\navg = %0.12f s\nmax = %0.12f s\ncyc = %d\n\n",
b, d, l, sw.minimum(), sw.mean(), sw.maximum(), sw.cycles() );
v.push_back( sw.mean() );
}
results.push_back( v );
}
// Ergebnisse ausgeben
printf( "\n\n\nErzeugen von FCs:\n\n" );
printf( "IRL / BS \t" );
for( j = 0; j < vBuffersizes.size(); j++ ) printf( "%6d \t", vBuffersizes[ j ] );
printf( "\n" );
for( i = 0; i < vIRDurations.size(); i++ )
{
printf( "%0.3f s \t", vIRDurations[ i ] );
for( j = 0; j < vBuffersizes.size(); j++ ) printf( "%0.6f s \t", results[ j ][ i ] );
printf( "\n" );
}
}
void benchmarkFilterComponentLoad() {
vector<double> vIRDurations;
unsigned int i = 0;
while( ir_durations[ i ] != 0 )
vIRDurations.push_back( ir_durations[ i++ ] );
vector<unsigned int> vBuffersizes;
i = 0;
while( buffersizes[ i ] != 0 )
vBuffersizes.push_back( buffersizes[ i++ ] );
unsigned int n = 30;
ITAHPT_init();
unsigned int j;
unsigned int k; // Schleifenzhler
unsigned int b; // Puffergre
double d; // Dauer der Impulsantwort
ITAStopWatch sw;
vector< vector<double> > results;
// Zufallsgenerator initialisieren
srand( time( NULL ) );
for( i = 0; i < vBuffersizes.size(); i++ )
{
b = vBuffersizes[ i ];
vector<double> v;
for( j = 0; j < vIRDurations.size(); j++ )
{
d = vIRDurations[ j ];
// Anzahl der Samples der IR bestimmen
unsigned int l = ( unsigned int ) ceil( d*samplerate );
// Falter erzeugen
ITANUPC::IConvolution* pConv = ITANUPC::IConvolution::create( NULL, samplerate, b, l );
float* pfLeft = fm_falloc( l, false );
float* pfRight = fm_falloc( l, false );
// Speicher mit Zufallszahlen initialisieren
for( k = 0; k < l; k++ )
pfLeft[ k ] = pfRight[ k ] = ( float ) rand() / ( float ) RAND_MAX;
// Filterkomponente erzeugen
ITANUPC::CFilterComponent* pFC = pConv->createFilterComponent( 0, l, 0, 0, 0, 0 );
// Mess-Vorlauf zur Einspielung des Systems
for( k = 0; k < 3; k++ )
{
sw.start();
pFC->load( pfLeft, l, pfRight, l );
sw.stop();
}
// Eigentliche Messschleife:
sw.reset();
for( k = 0; k < n; k++ )
{
sw.start();
pFC->load( pfLeft, l, pfRight, l );
sw.stop();
}
// Speicher freigeben und Falter lschen
fm_free( pfLeft );
fm_free( pfRight );
delete pConv;
// Infos ausgeben
printf( "\nbs = %d, dur = %0.2f s = %d samples\nmin = %0.12f s\navg = %0.12f s\nmax = %0.12f s\ncyc = %d\n\n",
b, d, l, sw.minimum(), sw.mean(), sw.maximum(), sw.cycles() );
v.push_back( sw.mean() );
}
results.push_back( v );
}
// Ergebnisse ausgeben
printf( "\n\n\nLaden von FCs:\n\n" );
printf( "IRL / BS \t" );
for( j = 0; j < vBuffersizes.size(); j++ ) printf( "%6d \t", vBuffersizes[ j ] );
printf( "\n" );
for( i = 0; i < vIRDurations.size(); i++ )
{
printf( "%0.3f s \t", vIRDurations[ i ] );
for( j = 0; j < vBuffersizes.size(); j++ ) printf( "%0.6f s \t", results[ j ][ i ] );
printf( "\n" );
}
}
void benchmarkFilterComponentSegLoad()
{
vector<double> vIRDurations;
unsigned int i = 0;
while( segload_ir_durations[ i ] != 0 ) vIRDurations.push_back( segload_ir_durations[ i++ ] );
vector<unsigned int> vBuffersizes;
i = 0;
while( buffersizes[ i ] != 0 ) vBuffersizes.push_back( buffersizes[ i++ ] );
unsigned int n = 30;
ITAHPT_init();
unsigned int j;
unsigned int k; // Schleifenzhler
unsigned int b; // Puffergre
double d; // Dauer der Impulsantwort
ITAStopWatch sw;
vector< vector<double> > results;
// Zufallsgenerator initialisieren
srand( time( NULL ) );
for( i = 0; i < vBuffersizes.size(); i++ )
{
b = vBuffersizes[ i ];
vector<double> v;
for( j = 0; j < vIRDurations.size(); j++ )
{
d = vIRDurations[ j ];
// Anzahl der Samples der IR bestimmen
unsigned int l = ( unsigned int ) ceil( d*samplerate );
// Falter erzeugen
ITANUPC::IConvolution* pConv = ITANUPC::IConvolution::create( NULL, samplerate, b, l );
float* pfLeft = fm_falloc( l, false );
float* pfRight = fm_falloc( l, false );
// Speicher mit Zufallszahlen initialisieren
for( k = 0; k < l; k++ )
pfLeft[ k ] = pfRight[ k ] = ( float ) rand() / ( float ) RAND_MAX;
unsigned int L1 = pConv->getUpperSegmentOffset( first_part_length );
unsigned int L2 = l - L1;
printf( "\nErstes Segment: %d Samples\n", L1 );
printf( "Zweites Segment: %d Samples\n", L2 );
printf( "Anpasste Laeng.: %d Samples\n\n", L1 + L2 );
// Filterkomponente erzeugen
ITANUPC::CFilterComponent* pFC1 = pConv->createFilterComponent( 0, L1, 0, 0, 0, 0 );
ITANUPC::CFilterComponent* pFC2 = pConv->createFilterComponent( 0, L2, 0, 0, 0, 0 );
// Mess-Vorlauf zur Einspielung des Systems
for( k = 0; k < 3; k++ )
{
sw.start();
pFC1->load( pfLeft, L1, pfRight, L1 );
pFC2->load( pfLeft, L2, pfRight, L2 );
sw.stop();
}
// Eigentliche Messschleife:
sw.reset();
for( k = 0; k < n; k++ )
{
sw.start();
pFC1->load( pfLeft, L1, pfRight, L1 );
pFC2->load( pfLeft, L2, pfRight, L2 );
sw.stop();
}
// Speicher freigeben und Falter lschen
fm_free( pfLeft );
fm_free( pfRight );
delete pConv;
// Infos ausgeben
printf( "\nbs = %d, dur = %0.2f s = %d samples\nmin = %0.12f s\navg = %0.12f s\nmax = %0.12f s\ncyc = %d\n\n",
b, d, l, sw.minimum(), sw.mean(), sw.maximum(), sw.cycles() );
v.push_back( sw.mean() );
}
results.push_back( v );
}
// Ergebnisse ausgeben
printf( "\n\n\nLaden von FCs:\n\n" );
printf( "IRL / BS \t" );
for( j = 0; j < vBuffersizes.size(); j++ ) printf( "%6d \t", vBuffersizes[ j ] );
printf( "\n" );
for( i = 0; i < vIRDurations.size(); i++ )
{
printf( "%0.3f s \t", vIRDurations[ i ] );
for( j = 0; j < vBuffersizes.size(); j++ ) printf( "%0.6f s \t", results[ j ][ i ] );
printf( "\n" );
}
}
\ No newline at end of file
#ifndef __FILTER_COMPONENT_BENCHMARK__
#define __FILTER_COMPONENT_BENCHMARK__
void benchmarkFilterComponentCreation();
void benchmarkFilterComponentLoad();
void benchmarkFilterComponentSegLoad();
#endif
This diff is collapsed.
/*
* ----------------------------------------------------------------
*
* ITA core libs
* (c) Copyright Institute of Technical Acoustics (ITA)
* RWTH Aachen University, Germany, 2015-2017
*
* ----------------------------------------------------------------
* ____ __________ _______
* // / //__ ___/ // _ |
* // / // / // /_| |
* // / // / // ___ |
* //__/ //__/ //__/ |__|
*
* ----------------------------------------------------------------
*
*/
#ifndef IW_ITA_NUP_FILTER
#define IW_ITA_NUP_FILTER
#include <ITAUncopyable.h>
namespace ITANUPC
{
// Within-namespace forwards
class IConvolution;
//! Filter for non-uniform partitioned convolution
/**
* Time-domain representation of impules responses. Can have state 'in use' or 'unused'.
* You can use one filter in many convolution engines, but modification is only possible
* in 'unused' state.
* Use 'Load' method to import time domain data, values are internally transformed into
* required representation for partitioned convolution.
*
* You can only create an instance of a filter by requesting one from the convolution engine.
*/
class IFilter : public ITAUncopyable
{
public:
//! Parent convolution engine or NULL pointer
virtual IConvolution* GetParent() const = 0;
//! If in use, returns true.
virtual bool Used() const = 0;
//! Release filter
/**
* New sate will be 'unused', if release possible and if not otherwise used.
*/
virtual void Release() = 0;
//! Empty filter with all coefficients set to zero ( h( 1:end ) = 0 )
virtual void Zeros() = 0;
//! Identity or unit impulse (Dirac, Delate function) ( h( 1 ) = 1, h( 2:end ) = 0)
virtual void Identity() = 0;
//! Load filter coefficients
/**
* @note Throws ITAException if filter is in use or length is exceeding requested IR coefficients to be loaded.
*/
virtual void Load( const float* pfLeftFilterCoeffs, const int iNumLeftFilterCoeffs, const float* pfRightFilterCoeffs, const int iNumRightFilterCoeffs ) = 0;
//virtual ITANUPC::Filter* Clone() const = 0;
protected:
inline IFilter() {};
inline virtual ~IFilter() {};
};
}
#endif // IW_ITA_NUP_FILTER
#include <conio.h>
#include <stdio.h>
#define USE_FFTW3
//#define USE_MKL7_FFT
// ITAToolkit includes
#include <ITAFastMath.h>
#include <ITAHPT.h>
#include <ITAStopWatch.h>
// STL includes
#include <iostream>
#include "../src/ITANUPCPerformanceProfile.cpp"
#include "../profiler/ITANUPCFilterComponentBenchmark.cpp"
#ifdef USE_FFTW3
#include <fftw3.h>
#endif
#ifdef USE_MKL7_FFT
#include <MKL_FFT.h>
#endif
using namespace std;
void print_header( const char* pszFunc )
{
printf( "%s\n\n", pszFunc );
printf( "%-8s %+12s %+10s %+10s %+8s\n", "Size", "Time", "CPU-Cycl.", "Time/Value", "Cycl./Value" );
printf( "-----------------------------------------------------------\n" );
}
void print_result( unsigned int s, double t ) {
char size[ 255 ], time[ 255 ], cyc[ 255 ], vtime[ 255 ], vcyc[ 255 ];
sprintf( size, "%d", s );
sprintf( time, "%0.3f us", t * 1000000 );
sprintf( cyc, "%0.1f", t / ITAHPT_resolution() );
sprintf( vtime, "%0.3f ns", t / s * 1000000000 );
sprintf( vcyc, "%0.1f", t / s / ITAHPT_resolution() );
printf( "%-8s %+12s %+10s %+10s %+8s\n", size, time, cyc, vtime, vcyc );
}
void print_footer() {
printf( "-----------------------------------------------------------\n\n" );
}
unsigned int minExp, maxExp;
unsigned int minSize, maxSize;
PerformanceProfile* profile;
void fastmath() {
ITAStopWatch sw;
const int cycles = 100;
fm_init();
printf( "FastMath flags: %s\n", fm_flags_str().c_str() );
float* a = fm_falloc( maxSize );
float* b = fm_falloc( maxSize );
float* c = fm_falloc( maxSize );
print_header( "fm_add" );
for( unsigned int x = minExp; x <= maxExp; x++ ) {
unsigned int size = 1 << x;
// Trockene Messung
sw.start();
fm_add( a, b, size );
sw.stop();
sw.reset();
for( int i = 0; i < cycles; i++ ) {
sw.start();
fm_add( a, b, size );
sw.stop();
}
double t = sw.mean();
if( profile ) profile->getOMT()->setRuntime( OperationMeasurementTable::VADD, x, ( float ) t );
print_result( size, t );
}
print_footer();
print_header( "fm_mul" );
for( unsigned int x = minExp; x <= maxExp; x++ ) {
unsigned int size = 1 << x;
// Trockene Messung
sw.start();
fm_mul( a, 1.234F, size );
sw.stop();
sw.reset();
for( int i = 0; i < cycles; i++ ) {
sw.start();
fm_mul( a, 1.234F, size );
sw.stop();
}
double t = sw.mean();
if( profile ) profile->getOMT()->setRuntime( OperationMeasurementTable::SMUL, x, ( float ) t );
print_result( size, t );
}
print_footer();
print_header( "fm_cmulx (Size = Anzahl Fliekommazahlen = Anzahl komplexe Zahlen * 2!)" );
for( unsigned int x = minExp; x <= maxExp; x++ ) {
unsigned int size = 1 << x;
// Trockene Messung
sw.start();
fm_cmul_x( a, b, c, size / 2 );
sw.stop();
sw.reset();
for( int i = 0; i < cycles; i++ ) {
sw.start();
fm_cmul_x( a, b, c, size / 2 );
sw.stop();
}
double t = sw.mean();
if( profile ) profile->getOMT()->setRuntime( OperationMeasurementTable::CMUL, x, ( float ) t );
print_result( size, t );
}
print_footer();
fm_free( a );
fm_free( b );
fm_free( c );
}
#ifdef USE_FFTW3
void fftw3() {
ITAStopWatch sw;
const int cycles = 100;
fm_init();
float* a = fm_falloc( maxSize );
float* b = fm_falloc( 2 * maxSize + 2 );
print_header( "fftw3: dft_r2c_1d, FFTW_MEASURE" );
for( unsigned int x = minExp; x <= maxExp; x++ ) {
unsigned int size = 1 << x;
// fftw-Plan erzeugen
fftwf_plan p = fftwf_plan_dft_r2c_1d( size, a, ( fftwf_complex* ) b, FFTW_MEASURE );
// Trockene Messung
sw.start();
fftwf_execute( p );
sw.stop();
sw.reset();
for( int i = 0; i < cycles; i++ ) {
sw.start();
fftwf_execute( p );
sw.stop();
}
fftwf_destroy_plan( p );
double t = sw.mean();
if( profile ) profile->getOMT()->setRuntime( OperationMeasurementTable::FFT, x, ( float ) t );
print_result( size, t );
}
print_footer();
print_header( "fftw3: dft_c2r_1d, FFTW_MEASURE" );
for( unsigned int x = minExp; x <= maxExp; x++ ) {
unsigned int size = 1 << x;
// fftw-Plan erzeugen
fftwf_plan p = fftwf_plan_dft_c2r_1d( size, ( fftwf_complex* ) b, a, FFTW_MEASURE );
// Trockene Messung
sw.start();
fftwf_execute( p );
sw.stop();
sw.reset();
for( int i = 0; i < cycles; i++ ) {
sw.start();
fftwf_execute( p );
sw.stop();
}
fftwf_destroy_plan( p );
double t = sw.mean();
if( profile ) profile->getOMT()->setRuntime( OperationMeasurementTable::IFFT, x, ( float ) t );
print_result( size, t );
}
print_footer();
fm_free( a );
fm_free( b );
}
//void fftw3_c2c() {
// ITAStopWatch sw;
//
// const int cycles = 100;
//
// fm_init();
// float* a = fm_falloc(2*maxSize);
// float* b = fm_falloc(2*maxSize);
//
// print_header("fftw3: dft_c2c_1d, FFTW_MEASURE");
// for (unsigned int x=minExp; x<=maxExp; x++) {
// unsigned int size = 1<<x;