#include #include #include #include #include #include #include #include #include #include #include #include ITANCTC::ITANCTC( const Config& oNCTCConfig ) : m_oConfig( oNCTCConfig ) , m_pHRIR( NULL ) { m_fBeta = float( 1e-4 ); if( oNCTCConfig.N <= 0 ) ITA_EXCEPT1( INVALID_PARAMETER, "Wrong configuration, N-CTC requires a valid number of loudspeakers" ); m_iOptimization = m_oConfig.iOptimization; m_fCrossTalkCancellationFactor = m_oConfig.fCrossTalkCancellationFactor; m_fWaveIncidenceAngleCompensationFactor = m_oConfig.fWaveIncidenceAngleCompensationFactor; m_oHeadPose.vPos.SetToZeroVector(); m_oHeadPose.vView.SetValues( 0, 0, -1.0f ); m_oHeadPose.vUp.SetValues( 0, 1.0f, 0 ); //m_oHeadPose.qOrient.SetToNeutralQuaternion(); m_sfCTC_temp.init( 2, m_oConfig.iCTCFilterLength, true ); int iDFTSize = m_oConfig.iCTCFilterLength + 1; for( int n = 0; n < GetN(); n++ ) { m_vdWeights.push_back( 1.0f ); m_vpHRTFs.push_back( new ITAHDFTSpectra( m_oConfig.dSampleRate, 2, iDFTSize, true ) ); m_vfDelayTime.push_back( float( m_oConfig.iCTCFilterLength / m_oConfig.dSampleRate / 2.0f ) ); } for( int i = 0; i < 2; i++ ) m_vpHelper2x2.push_back( new ITAHDFTSpectra( m_oConfig.dSampleRate, 2, iDFTSize, true ) ); t = new ITAHDFTSpectrum( m_oConfig.dSampleRate, iDFTSize, true ); det = new ITAHDFTSpectrum( m_oConfig.dSampleRate, iDFTSize, true ); int l = m_oConfig.iCTCFilterLength; m_fft.plan( ITAFFT::FFT_R2C, l, m_sfCTC_temp[ 0 ].data(), ( *m_vpHRTFs[ 0 ] )[ 0 ]->data() ); m_ifft.plan( ITAFFT::IFFT_C2R, l, ( *m_vpHRTFs[ 0 ] )[ 0 ]->data(), m_sfCTC_temp[ 0 ].data() ); } ITANCTC::~ITANCTC() { for( int n = 0; n < GetN(); n++ ) delete m_vpHRTFs[ n ]; for( int i = 0; i < 2; i++ ) delete m_vpHelper2x2[ i ]; delete t, det; return; } const ITANCTC::Config& ITANCTC::GetConfig() const { return m_oConfig; } int ITANCTC::GetN() const { return m_oConfig.N; } void ITANCTC::UpdateHeadPose( const Pose& oHead ) { m_oHeadPose = oHead; return; } int ITANCTC::GetLoudspeakerSide( int iNumLoudspeaker ) { Pose oDest = GetLoudspeakerPose( iNumLoudspeaker ); VistaVector3D vConn = oDest.vPos - m_oHeadPose.vPos; double dDistanceMeters = double( vConn.GetLength() ); float fLS2HeadDelaySamples = ( float ) dDistanceMeters / m_oConfig.fSpeedOfSound * ( float ) m_oConfig.dSampleRate; // 1/r attenuation due to distance law // Note: Gain of HRIR dataset is normalized to 1 m according to convention float fDistanceGain = 1 / ( float ) dDistanceMeters; // @todo jst: mit VistaQuaternion lösen /* VistaVector3D oFrom = vConn; oFrom.Normalize(); VistaQuaternion qOri( Vista::ViewVector, oFrom ); VistaQuaternion qHead2LS = qOri * m_oHeadPose.qOrient; float fX, fY, fZ; qHead2LS.GetAngles( fX, fY, fZ ); VistaVector3D v3View = qHead2LS.GetViewDir(); float fPhi = 180.0f / PI_F * fY; float fTheta = 180.0f / PI_F * fX; */ double dPhiDeg, dThetaDeg; VistaVector3D vDir = oDest.vPos - m_oHeadPose.vPos; vDir.Normalize(); VistaVector3D vViewMinusZ = m_oHeadPose.vView * ( -1.0f ); // local z axis const VistaVector3D vRight = vViewMinusZ.Cross( m_oHeadPose.vUp ); // local x axis const double dAzimuthAngleDeg = atan2( vDir.Dot( vRight ), vDir.Dot( m_oHeadPose.vView ) ) * 180.0f / ITAConstants::PI_D; dPhiDeg = ( ( dAzimuthAngleDeg < 0.0f ) ? ( dAzimuthAngleDeg + 360.0f ) : dAzimuthAngleDeg ); dThetaDeg = asin( vDir.Dot( m_oHeadPose.vUp ) ) * 180.0f / ITAConstants::PI_D; if( dPhiDeg < 180 ) { return Config::Loudspeaker::LEFT_SIDE; } else { return Config::Loudspeaker::RIGHT_SIDE; } } bool ITANCTC::AddHRIR( const Pose& oDest, ITASampleFrame& sfDestHRIR, bool& bOutOfRange, double dReflectionFactor/* = 1.0f */ ) const { if( sfDestHRIR.channels() != 2 ) ITA_EXCEPT1( INVALID_PARAMETER, "Two channel HRIR expected" ); // Calculate sample delay of HRIR insertion position VistaVector3D vConn = oDest.vPos - m_oHeadPose.vPos; double dDistanceMeters = double( vConn.GetLength() ); float fLS2HeadDelaySamples = ( float ) dDistanceMeters / m_oConfig.fSpeedOfSound * ( float ) m_oConfig.dSampleRate; // 1/r attenuation due to distance law // Note: Gain of HRIR dataset is normalized to 1 m according to convention float fDistanceGain = 1 / ( float ) dDistanceMeters; // @todo jst: mit VistaQuaternion lösen /* VistaVector3D oFrom = vConn; oFrom.Normalize(); VistaQuaternion qOri( Vista::ViewVector, oFrom ); VistaQuaternion qHead2LS = qOri * m_oHeadPose.qOrient; float fX, fY, fZ; qHead2LS.GetAngles( fX, fY, fZ ); VistaVector3D v3View = qHead2LS.GetViewDir(); float fPhi = 180.0f / PI_F * fY; float fTheta = 180.0f / PI_F * fX; */ double dPhiDeg, dThetaDeg; VistaVector3D vDir = oDest.vPos - m_oHeadPose.vPos; vDir.Normalize(); VistaVector3D vViewMinusZ = m_oHeadPose.vView * ( -1.0f ); // local z axis const VistaVector3D vRight = vViewMinusZ.Cross( m_oHeadPose.vUp ); // local x axis const double dAzimuthAngleDeg = atan2( vDir.Dot( vRight ), vDir.Dot( m_oHeadPose.vView ) ) * 180.0f / ITAConstants::PI_D; dPhiDeg = ( ( dAzimuthAngleDeg < 0.0f ) ? ( dAzimuthAngleDeg + 360.0f ) : dAzimuthAngleDeg ); dThetaDeg = asin( vDir.Dot( m_oHeadPose.vUp ) ) * 180.0f / ITAConstants::PI_D; int iHRIRPreOffset = m_pHRIR->getMinEffectiveFilterOffset(); int iHRIRFilerTaps = m_pHRIR->getMaxEffectiveFilterLength(); int iOffset = ( std::max )( 0, int( fLS2HeadDelaySamples - iHRIRPreOffset ) ); // remove starting zeros from HRIR iOffset = int( fLS2HeadDelaySamples ) + iHRIRPreOffset; // Check against buffer overrun, prevent if necessary (PROBLEM: LS too far away for target filter) if( iOffset + iHRIRFilerTaps > sfDestHRIR.length() ) return false; int iRecordIndex; m_pHRIR->getNearestNeighbour( DAFF_OBJECT_VIEW, float( dPhiDeg ), float( dThetaDeg ), iRecordIndex, bOutOfRange ); m_pHRIR->addFilterCoeffs( iRecordIndex, 0, sfDestHRIR[ 0 ].data() + iOffset, fDistanceGain * float( dReflectionFactor ) ); m_pHRIR->addFilterCoeffs( iRecordIndex, 1, sfDestHRIR[ 1 ].data() + iOffset, fDistanceGain * float( dReflectionFactor ) ); return true; } const ITANCTC::Pose& ITANCTC::GetLoudspeakerPose( int iLoudspeakerID ) const { if( iLoudspeakerID > GetN() ) ITA_EXCEPT1( INVALID_PARAMETER, "Loudspeaker ID (starting from 1) out of range." ); return m_oConfig.voLoudspeaker[ iLoudspeakerID - 1 ].oPose; } bool ITANCTC::CalculateFilter( std::vector< ITAHDFTSpectra* >& vpCTCFilter ) { if( !m_pHRIR ) return false; bool bOutOfRange = false; for( int n = 0; n < GetN(); n++ ) { m_sfCTC_temp.zero(); switch( m_iOptimization ) { // Here comes more if ready ... see feature/room_compensation branch case Config::OPTIMIZATION_NONE: default: if( AddHRIR( GetLoudspeakerPose( n + 1 ), m_sfCTC_temp, bOutOfRange ) == false ) return false; break; } if( bOutOfRange ) return false; ITAHDFTSpectra* pHRTF( m_vpHRTFs[ n ] ); // Convert HRIRs to HRTFs m_fft.execute( m_sfCTC_temp[ 0 ].data(), ( *pHRTF )[ 0 ]->data() ); m_fft.execute( m_sfCTC_temp[ 1 ].data(), ( *pHRTF )[ 1 ]->data() ); #ifdef NCTC_EXPORT_FILTER_TO_HARDDRIVE ITAFFTUtils::Export(pHRTF, "HRIR_LS" + IntToString(n + 1) + "_RAW"); #endif // NCTC_EXPORT_FILTER_TO_HARDDRIVE } /* Matrix to be inverted (as two-by-two row vector) * a c * b d */ ITAHDFTSpectrum* a = ( *m_vpHelper2x2[ 0 ] )[ 0 ]; ITAHDFTSpectrum* b = ( *m_vpHelper2x2[ 0 ] )[ 1 ]; ITAHDFTSpectrum* c = ( *m_vpHelper2x2[ 1 ] )[ 0 ]; ITAHDFTSpectrum* d = ( *m_vpHelper2x2[ 1 ] )[ 1 ]; // Least-squares minimization: C = WH*(HWH*-\beta)^-1 // using H* as the hermitian (complex conjugated) transpose of H /* Sweet spot optimization: modify input HRTFs * 1. Crosstalk side will be lowered by CTC factor * 2. HRTF will be flattened by WICK factor */ // Prepare HRTF spectra int iDFTSize = m_oConfig.iCTCFilterLength + 1; for( int n = 0; n < GetN(); n++ ) { ITAHDFTSpectra* pHRTF( m_vpHRTFs[ n ] ); // two-channel // --- WICK factor --- // First, store original energy of HRTF (left channel) float fEnergy = ( *pHRTF )[ 0 ]->getEnergy(); // Apply WICK factor only on magnitudes (left channel) for( int i = 0; i < ( *pHRTF )[ 0 ]->getSize(); i++ ) { float fMag = ( *pHRTF )[ 0 ]->calcMagnitude( i ); ( *pHRTF )[ 0 ]->setMagnitudePreservePhase( i, std::pow( fMag, m_fWaveIncidenceAngleCompensationFactor ) ); } // Compensate initial HRTF energy when WICK is used (left channel) assert( fEnergy > 0 ); float fEnergyCompensation = std::pow( fEnergy, ( 1 - m_fWaveIncidenceAngleCompensationFactor ) ); ( *pHRTF )[ 0 ]->mul( fEnergyCompensation ); // First, store original energy of HRTF only on magnitudes (right channel) fEnergy = ( *pHRTF )[ 1 ]->getEnergy(); // Apply WICK factor only on magnitude (right channel) for( int i = 0; i < ( *pHRTF )[ 1 ]->getSize(); i++ ) { float fMag = ( *pHRTF )[ 1 ]->calcMagnitude( i ); ( *pHRTF )[ 1 ]->setMagnitudePreservePhase( i, std::pow( fMag, m_fWaveIncidenceAngleCompensationFactor ) ); } // Compensate initial HRTF energy when WICK is used (right channel) assert( fEnergy > 0 ); fEnergyCompensation = std::pow( fEnergy, ( 1 - m_fWaveIncidenceAngleCompensationFactor ) ); ( *pHRTF )[ 1 ]->mul( fEnergyCompensation ); #ifdef NCTC_EXPORT_FILTER_TO_HARDDRIVE ITAFFTUtils::Export(pHRTF, "HRIR_LS" + IntToString(n + 1) + "_WICKed"); #endif // NCTC_EXPORT_FILTER_TO_HARDDRIVE // --- CTC compensation factor --- // CTC compensation factors int iLSSide = GetLoudspeakerSide(n + 1); float fRightChannelCTC = 1.0f; if (iLSSide == Config::Loudspeaker::LEFT_SIDE) fRightChannelCTC *= m_fCrossTalkCancellationFactor; // only apply to left channel if LS is at left side float fLeftChannelCTC = 1.0f; if (iLSSide == Config::Loudspeaker::RIGHT_SIDE) fLeftChannelCTC *= m_fCrossTalkCancellationFactor; // only apply to right channel if LS is at right side (*pHRTF)[0]->mul(fLeftChannelCTC); (*pHRTF)[1]->mul(fRightChannelCTC); #ifdef NCTC_EXPORT_FILTER_TO_HARDDRIVE ITAFFTUtils::Export(pHRTF, "HRIR_LS" + IntToString(n + 1) + "_WICKedPlusCTCFactor"); #endif // NCTC_EXPORT_FILTER_TO_HARDDRIVE // --- Weighting --- float fWeight = float( m_vdWeights[ n ] ); // diag element // Element wise (a and d): HWH* -> 2x2 t->copy( ( *pHRTF )[ 0 ] ); t->mul( fWeight ); t->mul_conj( ( *pHRTF )[ 0 ] ); n == 0 ? a->copy( t ) : a->add( t ); t->copy( ( *pHRTF )[ 1 ] ); t->mul( fWeight ); t->mul_conj( ( *pHRTF )[ 1 ] ); n == 0 ? d->copy( t ) : d->add( t ); // Cross elements (b and c): HWH* t->copy( ( *pHRTF )[ 1 ] ); t->mul( fWeight ); t->mul_conj( ( *pHRTF )[ 0 ] ); n == 0 ? b->copy( t ) : b->add( t ); t->copy( ( *pHRTF )[ 0 ] ); t->mul( fWeight ); t->mul_conj( ( *pHRTF )[ 1 ] ); n == 0 ? c->copy( t ) : c->add( t ); } // Regularize a->add( m_fBeta ); d->add( m_fBeta ); ITAHDFTSpectra abcd( m_oConfig.dSampleRate, 4, m_oConfig.iCTCFilterLength ); abcd[ 0 ]->copyFrom( *a ); abcd[ 1 ]->copyFrom( *b ); abcd[ 2 ]->copyFrom( *c ); abcd[ 3 ]->copyFrom( *d ); // Invert via determinant (simple 2x2 matrix inversion) /* * d/det -b/det * -c/det a/det */ det->copy( a ); det->mul( d ); t->copy( b ); t->mul( c ); det->sub( t ); t->copy( d ); d->copy( a ); a->copy( t ); b->mul( -1.0f ); c->mul( -1.0f ); a->div( det ); b->div( det ); c->div( det ); d->div( det ); // Calculate CTC filter WH*Inv (Nx2) ITASampleFrame sfTargetData_shift( m_sfCTC_temp.channels(), m_sfCTC_temp.length(), true ); for( int n = 0; n < GetN(); n++ ) { ITAHDFTSpectra* pHRTF( m_vpHRTFs[ n ] ); // two-channel, already WICKed ITAHDFTSpectra* pCTCFilter( vpCTCFilter[ n ] ); // two-channel float fWeight = float( m_vdWeights[ n ] ); // diag element t->copy( a ); t->mul_conj( ( *pHRTF )[ 0 ] ); t->mul( fWeight ); ( *pCTCFilter )[ 0 ]->copy( t ); t->copy( b ); t->mul_conj( ( *pHRTF )[ 1 ] ); t->mul( fWeight ); ( *pCTCFilter )[ 0 ]->add( t ); t->copy( c ); t->mul_conj( ( *pHRTF )[ 0 ] ); t->mul( fWeight ); ( *pCTCFilter )[ 1 ]->copy( t ); t->copy( d ); t->mul_conj( ( *pHRTF )[ 1 ] ); t->mul( fWeight ); ( *pCTCFilter )[ 1 ]->add( t ); #ifdef NCTC_EXPORT_FILTER_TO_HARDDRIVE ITAFFTUtils::Export(pCTCFilter, "CTCFilter_noshift_" + IntToString(n + 1)); #endif // NCTC_EXPORT_FILTER_TO_HARDDRIVE // Time-shift m_ifft.execute( ( *pCTCFilter )[ 0 ]->data(), m_sfCTC_temp[ 0 ].data() ); m_ifft.execute( ( *pCTCFilter )[ 1 ]->data(), m_sfCTC_temp[ 1 ].data() ); // Normalize after IFFT m_sfCTC_temp.div_scalar( float( m_sfCTC_temp.length() ) ); // Shift the CTC filter according to desired delay int iShiftSamples = int( m_oConfig.dSampleRate * m_vfDelayTime[ n ] ); if( m_vfDelayTime[ n ] < 0.0f ) iShiftSamples = m_sfCTC_temp.length() / 2; // if invalid, shift by half length sfTargetData_shift.cyclic_write( m_sfCTC_temp, m_sfCTC_temp.length(), 0, iShiftSamples ); m_fft.execute( sfTargetData_shift[ 0 ].data(), ( *pCTCFilter )[ 0 ]->data() ); m_fft.execute( sfTargetData_shift[ 1 ].data(), ( *pCTCFilter )[ 1 ]->data() ); #ifdef NCTC_EXPORT_FILTER_TO_HARDDRIVE ITAFFTUtils::Export(pCTCFilter, "CTCFilter_shift_" + IntToString(n + 1)); #endif // NCTC_EXPORT_FILTER_TO_HARDDRIVE } return true; } void ITANCTC::SetHRIR( const DAFFContentIR* pHRIR ) { // TODO: Set m_oConfig.iCTCFilterLength according to HRIR length and working area // TODO: Minimize loudspeaker delay if( m_pHRIR != pHRIR ) { m_pHRIR = pHRIR; // Return if HRIR is reset (NULL) if( !m_pHRIR ) return; if( m_pHRIR->getProperties()->getNumberOfChannels() != 2 ) ITA_EXCEPT1( INVALID_PARAMETER, "HRIR dataset must have exactly two channels" ); // Get HRIR filter length if( m_pHRIR->getFilterLength() > m_oConfig.iCTCFilterLength ) ITA_EXCEPT1( INVALID_PARAMETER, std::string( "The filter length of the HRIR database \"" ) + m_pHRIR->getParent()->getFilename() + std::string( "\" is larger than the CTC filters. This leads to buffer overruns." ) ); // Get HRIR delay samples const DAFFMetadata* pHRIRMetadata = m_pHRIR->getParent()->getMetadata(); if( !pHRIRMetadata->hasKey( "DELAY_SAMPLES" ) ) ITA_EXCEPT1( INVALID_PARAMETER, std::string( "HRIR database \"" ) + m_pHRIR->getParent()->getFilename() + std::string( "\" is missing a \"DELAY_SAMPLES\" metadata tag" ) ); if( ( pHRIRMetadata->getKeyType( "DELAY_SAMPLES" ) != DAFFMetadata::DAFF_INT ) && ( pHRIRMetadata->getKeyType( "DELAY_SAMPLES" ) != DAFFMetadata::DAFF_FLOAT ) ) ITA_EXCEPT1( INVALID_PARAMETER, std::string( "The metadata tag \"DELAY_SAMPLES\" in HRIR database \"" ) + m_pHRIR->getParent()->getFilename() + std::string( "\" is not numeric" ) ); float fHRIRDelay = ( float ) pHRIRMetadata->getKeyFloat( "DELAY_SAMPLES" ); if( fHRIRDelay < 0 ) ITA_EXCEPT1( INVALID_PARAMETER, std::string( "The metadata tag \"DELAY_SAMPLES\" in HRIR database \"" ) + m_pHRIR->getParent()->getFilename() + std::string( "\" must not be negative" ) ); } } void ITANCTC::SetBeta( float fBeta ) { m_fBeta = fBeta; } float ITANCTC::GetBeta() { return m_fBeta; } void ITANCTC::SetCrossTalkCancellationFactor( float fFactor ) { m_fCrossTalkCancellationFactor = fFactor; } float ITANCTC::GetCrossTalkCancellationFactor() { return m_fCrossTalkCancellationFactor; } void ITANCTC::SetWaveIncidenceAngleCompensationFactor( float fFactor ) { m_fWaveIncidenceAngleCompensationFactor = fFactor; } float ITANCTC::GetWaveIncidenceAngleCompensation() { return m_fWaveIncidenceAngleCompensationFactor; } void ITANCTC::SetDelayTime( float fDelayTime ) { for( int n = 0; n < GetN(); n++ ) m_vfDelayTime[ n ] = fDelayTime; } void ITANCTC::SetDelayTime( std::vector< float > vfDelayTime ) { if( int( vfDelayTime.size() ) != GetN() ) ITA_EXCEPT1( INVALID_PARAMETER, "Provide as many delay values as channels for NCTC" ); for( int n = 0; n < GetN(); n++ ) m_vfDelayTime[ n ] = vfDelayTime[ n ]; } std::vector< float > ITANCTC::GetDelayTime() { std::vector< float > vfDelayTime( GetN() ); for( int n = 0; n < GetN(); n++ ) vfDelayTime[ n ] = m_vfDelayTime[ n ]; return vfDelayTime; } void ITANCTC::SetOptimization( int iOptimization ) { m_iOptimization = iOptimization; } int ITANCTC::GetOptimization() const { return m_iOptimization; } bool ITANCTC::GetHRTF( std::vector< ITAHDFTSpectra* >& vpHRTF ) const { if( m_vpHRTFs.empty() ) return false; vpHRTF = m_vpHRTFs; return true; } ITANCTC::Pose ITANCTC::GetHeadPose() const { return m_oHeadPose; } // --- Loudspeaker --- ITANCTC::Config::Loudspeaker::Loudspeaker() : pDirectivity( NULL ) , iSide( SIDE_NOT_SPECIFIED ) { } ITANCTC::Config::Loudspeaker::Loudspeaker( const Pose& oStaticPose ) : pDirectivity( NULL ) , iSide( SIDE_NOT_SPECIFIED ) { oPose = oStaticPose; } // --- Pose --- ITANCTC::Pose& ITANCTC::Pose::operator=( const ITANCTC::Pose& oPoseRHS ) { vPos = oPoseRHS.vPos; //qOrient = oPoseRHS.qOrient; vView = oPoseRHS.vView; vUp = oPoseRHS.vUp; return *this; } void ITANCTC::Pose::SetOrientationYPRdeg( double fYaw, double fPitch, double fRoll ) { double yaw = grad2rad( fYaw ); double pitch = grad2rad( fPitch ); double roll = grad2rad( fRoll ); double sy = sin( yaw ), cy = cos( yaw ); double sp = sin( pitch ), cp = cos( pitch ); double sr = sin( roll ), cr = cos( roll ); vView.SetValues( -sy*cp, sp, -cy*cp ); vUp.SetValues( cy*sr + sy*sp*cr, cp*cr, -sy*sr + cy*sp*cr ); return; } ITANCTC::Config::Config() { // Set some default values N = 0; iCTCFilterLength = 4096; fSpeedOfSound = 344.0f; iOptimization = OPTIMIZATION_NONE; fCrossTalkCancellationFactor = 1.0f; fWaveIncidenceAngleCompensationFactor = 1.0f; }