Commit 5f0d5de5 authored by Dipl.-Ing. Jonas Stienen's avatar Dipl.-Ing. Jonas Stienen
Browse files

Merge branch 'develop' of https://git.rwth-aachen.de/ita/VACore into develop

parents 0cc3b108 69a4bfb4
......@@ -84,7 +84,6 @@ if( NOT DEFINED ITA_VACORE_MAX_NUM_LISTENERS )
set( ITA_VACORE_MAX_NUM_LISTENERS 0 CACHE INT "VACore maximum number of listeners ( 0 = unlimited )" )
endif( )
if( NOT DEFINED ITA_VACORE_WITH_TTS_SIGNAL_SOURCE )
vista_use_package( TTSRelay QUIET )
set( ITA_VACORE_WITH_TTS_SIGNAL_SOURCE ${TTSReleay} CACHE BOOL "VACore with text-to-speech signal source support" )
endif( )
......@@ -316,7 +315,10 @@ string( TIMESTAMP VACORE_CMAKE_DATE "%Y-%m-%d" )
add_definitions( "-DVACORE_CMAKE_DATE=\"${VACORE_CMAKE_DATE}\"" )
if( ITA_VACORE_WITH_TTS_SIGNAL_SOURCE )
add_definitions( "-DVACORE_WITH_TTS_SIGNAL_SOURCE" )
vista_use_package( CereVoice REQUIRED FIND_DEPENDENCIES)
if( VCEREVOICE_FOUND )
add_definitions( "-DVACORE_WITH_TTS_SIGNAL_SOURCE" )
endif( )
endif( )
# Dev switches
......
#ifndef IW_VA_TEXT_TO_SPEECH_SIGNAL_SOURCE
#define IW_VA_TEXT_TO_SPEECH_SIGNAL_SOURCE
#include "VAAudiofileSignalSource.h"
#include <VAAudioSignalSource.h>
#include <VAObject.h>
......@@ -8,7 +10,10 @@
#include <ITASampleBuffer.h>
#include <ITAAtomicPrimitives.h>
class ITABufferDatasource;
class CVACoreImpl;
class CPRCEN_engine;
class CPRC_abuf;
/** Text-to-speech signal source
*
......@@ -29,13 +34,88 @@ public:
void HandleRegistration( IVACore* );
void HandleUnregistration( IVACore* );
std::string GetStateString() const;
/*
* This should be used to start a prepared TTS using a CVAStruct with:
* ["prepare_text"] = text to be spoken
* ["id"] = identificator that will be used for playing this speech and reference it (must be unique)
* ["voice"] = the voice to be used //if none is given or the one given cannot be found the standard voice is used (i.e. "Heather")
* ["direct_replay"] = true/false whether the audio should directly be played (in this case no id has to be given, should not be used for lipsyncing)
* This should be used to start a prepared TTS using a CVAStruct with:
* ["play_speech"] = identificator (int) of created speech
* ["free_after"] = true/false, whether the resources can be freed or this sentences should be used again
*/
void SetParameters( const CVAStruct& );
/*
* This can be used to receive the viseme data for a created speech
* ["get_visemes_for"] = identificator as given above
* and returns a CVAStruct with:
* ["visemes"] = viseme data for facial animation as xml string (empty string if something went wrong with creation)
*
* This can also be used to find the available voices using a CVAStruct with:
* ["list_voices"] = true
* and returns a CVAStruct with:
* ["number"] = the number of available voices
* ["voice_i"] = the name of the voice, where i is 0...["number"]-1
* ["sex_i"] = the sex of the voice, i.e. "male" or "female"; where i is 0...["number"]-1
* ["language_i"] = the language of the voice, e.g. "en" or "de"; where i is 0...["number"]-1
* ["country_i"] = the country of the voice, e.g. "GB" or "US"; where i is 0...["number"]-1
*/
CVAStruct GetParameters( const CVAStruct& ) const;
void Reset();
private:
class TTSEngine{
//This is a wrapper for the CereVoice TTS Engine, which has to be only initialized once and not for each TTSignalSource
public:
TTSEngine();
~TTSEngine();
static TTSEngine& getInstance(){
static TTSEngine instance; // Guaranteed to be destroyed.
return instance;// Instantiated on first use.
}
void SetupPhonemeMapping();
TTSEngine(TTSEngine const&) = delete; //to avoid copies being made etc.
void operator=(TTSEngine const&) = delete;
CPRCEN_engine* getEngine() const;
std::string PhonemeToViseme(std::string phoneme);
private:
/*The engine maintains the list of
loaded voices and makes them available to synthesis channels. */
CPRCEN_engine* m_pTTSEngine; //you must not delete this from outside!!!!!!
std::map<std::string, int> m_phonemeToId;
std::map<int, std::string> m_idToViseme;
};
struct UserCallbackData{
float lastEnd = 0.0f;
std::string visemes = "";
std::vector<float> floatBuffer;
};
static void VisemeProcessing(CPRC_abuf* abuf, void * userdata); //used as callback for the CereVoice engine
static std::string to_string_with_precision(float a_value, const int n = 3);
std::map<std::string, ITASampleFrame*> m_AudioSampleFrames;
std::map<std::string, std::string> m_Visemes;
IVACore* m_pAssociatedCore;
ITASampleBuffer m_sbOut;
ITASampleBuffer m_sbOut;
//this mutable keyword here is necessary since the inherited method GetParameters() is const, however we want to be able to change some parts (not very clean code, sorry)
ITAAtomicInt m_iCurrentPlayState;
ITABufferDatasource* m_pBufferDataSource;
ITASampleFrame* m_pFrameToDelete;//this is set if the sample should be freed after playback
};
#endif // IW_VA_TEXT_TO_SPEECH_SIGNAL_SOURCE
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment