Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Institute of Technical Acoustics (ITA)
VACore
Commits
8f7c2a77
Commit
8f7c2a77
authored
Jun 06, 2017
by
jwendt
Browse files
Included CereVoice Implementation for TTS
parent
24de9ee3
Changes
3
Hide whitespace changes
Inline
Side-by-side
CMakeLists.txt
View file @
8f7c2a77
...
...
@@ -84,7 +84,6 @@ if( NOT DEFINED ITA_VACORE_MAX_NUM_LISTENERS )
set
(
ITA_VACORE_MAX_NUM_LISTENERS 0 CACHE INT
"VACore maximum number of listeners ( 0 = unlimited )"
)
endif
(
)
if
(
NOT DEFINED ITA_VACORE_WITH_TTS_SIGNAL_SOURCE
)
vista_use_package
(
TTSRelay QUIET
)
set
(
ITA_VACORE_WITH_TTS_SIGNAL_SOURCE
${
TTSReleay
}
CACHE BOOL
"VACore with text-to-speech signal source support"
)
endif
(
)
...
...
@@ -316,7 +315,10 @@ string( TIMESTAMP VACORE_CMAKE_DATE "%Y-%m-%d" )
add_definitions
(
"-DVACORE_CMAKE_DATE=
\"
${
VACORE_CMAKE_DATE
}
\"
"
)
if
(
ITA_VACORE_WITH_TTS_SIGNAL_SOURCE
)
add_definitions
(
"-DVACORE_WITH_TTS_SIGNAL_SOURCE"
)
vista_use_package
(
CereVoice REQUIRED FIND_DEPENDENCIES
)
if
(
VCEREVOICE_FOUND
)
add_definitions
(
"-DVACORE_WITH_TTS_SIGNAL_SOURCE"
)
endif
(
)
endif
(
)
# Dev switches
...
...
src/Audiosignals/VATextToSpeechSignalSource.cpp
View file @
8f7c2a77
...
...
@@ -11,16 +11,60 @@
#include
<sstream>
#include
<assert.h>
#include
<math.h>
#include
<iomanip>
#include
<VistaTools/VistaFileSystemDirectory.h>
#ifdef VACORE_WITH_TTS_SIGNAL_SOURCE
#include
<cerevoice_eng.h>
#endif
std
::
map
<
std
::
string
,
int
>
CVATextToSpeechSignalSource
::
m_phonemeToId
;
std
::
map
<
int
,
std
::
string
>
CVATextToSpeechSignalSource
::
m_idToViseme
;
CVATextToSpeechSignalSource
::
CVATextToSpeechSignalSource
(
const
double
dSampleRate
,
const
int
iBlockLength
)
:
ITADatasourceRealization
(
1
,
dSampleRate
,
(
unsigned
int
)(
iBlockLength
)
)
,
m_pAssociatedCore
(
NULL
)
{
m_sbOut
.
Init
(
GetBlocklength
(),
true
);
#ifdef VACORE_WITH_TTS_SIGNAL_SOURCE
std
::
string
voices_path
=
CEREVOICE_VOICES_PATH
;
VA_INFO
(
"CVATextToSpeechSignalSource"
,
"CereVoices voices are searched in
\"
"
+
voices_path
+
"
\"
"
);
VistaFileSystemDirectory
voicesDir
(
voices_path
);
if
(
!
voicesDir
.
Exists
()){
VA_WARN
(
"CVATextToSpeechSignalSource"
,
"The voices directory does not exist!"
);
return
;
}
for
(
auto
it
=
voicesDir
.
begin
();
it
!=
voicesDir
.
end
();
++
it
){
std
::
string
name
=
(
*
it
)
->
GetName
();
std
::
size_t
suffix_pos
=
name
.
find
(
".voice"
);
if
(
suffix_pos
==
std
::
string
::
npos
)
continue
;
std
::
string
licence_file
=
name
.
substr
(
0
,
suffix_pos
)
+
".lic"
;
m_pTTSEngine
=
CPRCEN_engine_load
(
licence_file
.
c_str
(),
name
.
c_str
());
VA_INFO
(
"CVATextToSpeechSignalSource"
,
"Loaded voice
\"
"
+
name
+
"
\"
"
);
}
SetupPhonemeMapping
();
//std::string licence_file = voices_path + "cerevoice_heather_4.0.0_48k.lic";
//std::string voice_file = voices_path + "cerevoice_heather_4.0.0_48k.voice";
//m_pTTSEngine = CPRCEN_engine_load(licence_file.c_str(), voice_file.c_str());
#endif
}
CVATextToSpeechSignalSource
::~
CVATextToSpeechSignalSource
()
{
#ifdef VACORE_WITH_TTS_SIGNAL_SOURCE
CPRCEN_engine_delete
(
m_pTTSEngine
);
#endif
}
int
CVATextToSpeechSignalSource
::
GetType
()
const
...
...
@@ -35,7 +79,7 @@ std::string CVATextToSpeechSignalSource::GetTypeString() const
std
::
string
CVATextToSpeechSignalSource
::
GetDesc
()
const
{
return
std
::
string
(
"Creates
a machine that can be started and stopped
"
);
return
std
::
string
(
"Creates
spoken text and facial movements from a text
"
);
}
IVACore
*
CVATextToSpeechSignalSource
::
GetAssociatedCore
()
const
...
...
@@ -46,7 +90,7 @@ IVACore* CVATextToSpeechSignalSource::GetAssociatedCore() const
const
float
*
CVATextToSpeechSignalSource
::
GetStreamBlock
(
const
CVAAudiostreamState
*
pStreamInfo
)
{
// This is the live update function that is called by the audio streaming.
// We will de
i
lver either zeros for a quit talker, or take speech samples from
// We will del
i
ver either zeros for a quit talker, or take speech samples from
// the WAv file or internal sample buffer.
m_sbOut
.
Zero
();
...
...
@@ -73,11 +117,32 @@ std::string CVATextToSpeechSignalSource::GetStateString() const
void
CVATextToSpeechSignalSource
::
Reset
()
{
VA_WARN
(
"CVATextToSpeechSignalSource"
,
"Reset is not yet implemented."
);
}
CVAStruct
CVATextToSpeechSignalSource
::
GetParameters
(
const
CVAStruct
&
oArgs
)
const
{
CVAStruct
oRet
;
if
(
oArgs
.
HasKey
(
"list_voices"
)
&&
oArgs
[
"list_voices"
]){
//list the available voices
int
num_voices
=
CPRCEN_engine_get_voice_count
(
m_pTTSEngine
);
oRet
[
"number"
]
=
num_voices
;
for
(
int
i
=
0
;
i
<
num_voices
;
i
++
)
{
std
::
string
voicename
=
CPRCEN_engine_get_voice_info
(
m_pTTSEngine
,
i
,
"VOICE_NAME"
);
oRet
[
"voice_"
+
std
::
to_string
(
i
)]
=
voicename
;
std
::
string
language
=
CPRCEN_engine_get_voice_info
(
m_pTTSEngine
,
i
,
"LANGUAGE_CODE_ISO"
);
oRet
[
"language_"
+
std
::
to_string
(
i
)]
=
language
;
std
::
string
country
=
CPRCEN_engine_get_voice_info
(
m_pTTSEngine
,
i
,
"COUNTRY_CODE_ISO"
);
oRet
[
"country_"
+
std
::
to_string
(
i
)]
=
country
;
std
::
string
sex
=
CPRCEN_engine_get_voice_info
(
m_pTTSEngine
,
i
,
"SEX"
);
oRet
[
"sex_"
+
std
::
to_string
(
i
)]
=
sex
;
}
return
oRet
;
}
oRet
[
"ready_for_playback"
]
=
false
;
#ifndef VACORE_WITH_TTS_SIGNAL_SOURCE
oRet
[
"error"
]
=
"TTS signal sources not activated in your VACore"
;
VA_WARN
(
"CVATextToSpeechSignalSource"
,
"TTS signal sources was requested but is not activated in your VACore"
);
...
...
@@ -85,11 +150,48 @@ CVAStruct CVATextToSpeechSignalSource::GetParameters( const CVAStruct& oArgs ) c
if
(
oArgs
.
IsEmpty
()
)
{
oRet
[
"info"
]
=
"Parameters of TTS signal source will be delivered through this struct"
;
oRet
[
"preparation_process"
]
=
0.9
;
oRet
[
"ready_for_playback"
]
=
false
;
oRet
[
"prepare_speech"
]
=
"Halleluja!"
;
oRet
[
"prepare_text"
]
=
"Let me say whatever you want"
;
oRet
[
"start_talking"
]
=
false
;
VA_INFO
(
"CVATextToSpeechSignalSource"
,
"GetParameters called with empty argument, so a the argument CVAStruct was filled with example data."
);
return
oRet
;
}
if
(
!
oArgs
.
HasKey
(
"prepare_text"
)){
if
(
oArgs
.
HasKey
(
"start_talking"
)){
VA_WARN
(
"CVATextToSpeechSignalSource"
,
"For start_talking SetParameters() must be used."
);
}
else
{
VA_WARN
(
"CVATextToSpeechSignalSource"
,
"Could not interpret parameters for text-to-speech signal source getter method, use empty getter for help."
);
}
return
oRet
;
}
if
(
!
oArgs
[
"prepare_text"
].
IsString
())
VA_EXCEPT2
(
INVALID_PARAMETER
,
"Text of speech (prepare_text) has to be a string"
);
std
::
string
sText
=
oArgs
[
"prepare_text"
];
std
::
string
sVoice
=
oArgs
[
"voice"
];
// Prepare a WAV file or sample buffer with text-to-speech engine output
VA_INFO
(
"CVATextToSpeechSignalSource"
,
"VA received TTS command, to say
\"
"
+
sText
+
"
\"
"
);
std
::
string
visemes
=
"<speech type=
\"
text/plain
\"
>
\n
"
;
//std::string visemes = "<?xml version=\"1.0\" encoding=\"UTF - 8\"?>\n<speak>\n";
std
::
pair
<
std
::
string
,
float
>
visemes_lastTime
=
std
::
make_pair
(
visemes
,
0.0
f
);
CPRCEN_channel_handle
chan
=
CPRCEN_engine_open_channel
(
m_pTTSEngine
,
""
,
""
,
sVoice
.
c_str
(),
""
);
CPRCEN_engine_channel_reset
(
m_pTTSEngine
,
chan
);
CPRCEN_engine_clear_callback
(
m_pTTSEngine
,
chan
);
CPRCEN_engine_set_callback
(
m_pTTSEngine
,
chan
,
(
void
*
)
&
visemes_lastTime
,
VisemeProcessing
);
CPRCEN_engine_channel_to_file
(
m_pTTSEngine
,
chan
,
"D:/work/tts.wav"
,
CPRCEN_RIFF
);
/* File output on channel */
CPRCEN_engine_channel_speak
(
m_pTTSEngine
,
chan
,
sText
.
c_str
(),
sText
.
length
(),
true
);
visemes
=
visemes_lastTime
.
first
;
visemes
+=
"</speech>
\n
"
;
oRet
[
"visemes"
]
=
visemes
;
oRet
[
"ready_for_playback"
]
=
true
;
#endif
return
oRet
;
...
...
@@ -100,6 +202,8 @@ void CVATextToSpeechSignalSource::SetParameters( const CVAStruct& oParams )
#ifndef VACORE_WITH_TTS_SIGNAL_SOURCE
VA_WARN
(
"CVATextToSpeechSignalSource"
,
"TTS signal sources was requested but is not activated in your VACore"
);
#else
if
(
oParams
.
HasKey
(
"start_talking"
)
)
{
// Do something, if possible
...
...
@@ -108,23 +212,209 @@ void CVATextToSpeechSignalSource::SetParameters( const CVAStruct& oParams )
if
(
oParams
.
HasKey
(
"prepare_speech"
)
)
{
if
(
!
oParams
[
"prepare"
].
IsString
()
)
VA_EXCEPT2
(
INVALID_PARAMETER
,
"Text of speech has be be a string"
);
std
::
string
sText
=
oParams
[
"prepare"
];
// Prepare a WAV file or sample buffer with text-to-speech engine output
VA_WARN
(
"CVATextToSpeechSignalSource"
,
"For preparing speech GetParameters() must be used."
);
return
;
}
VA_WARN
(
"CVATextToSpeechSignalSource"
,
"Could not interpret parameters for text-to-speech signal source setter method, use empty getter for help"
);
VA_WARN
(
"CVATextToSpeechSignalSource"
,
"Could not interpret parameters for text-to-speech signal source setter method, use empty getter for help
.
"
);
#endif
return
;
}
#ifndef VACORE_WITH_TTS_SIGNAL_SOURCE
// put dummy methods here ... no so nice I know.
#else
// put rest here
std
::
string
CVATextToSpeechSignalSource
::
to_string_with_precision
(
float
a_value
,
const
int
n
/* = 3*/
)
{
std
::
ostringstream
out
;
out
<<
std
::
setprecision
(
n
)
<<
a_value
;
return
out
.
str
();
}
void
CVATextToSpeechSignalSource
::
SetupPhonemeMapping
(){
if
(
m_phonemeToId
.
size
()
>
0
)
return
;
//only initialize once
//this should support all phonemes, for American, Scottish and German speech (see https://www.cereproc.com/files/CereVoicePhoneSets.pdf)
m_phonemeToId
[
"sil"
]
=
0
;
m_phonemeToId
[
"@"
]
=
6
;
m_phonemeToId
[
"@@"
]
=
6
;
m_phonemeToId
[
"a"
]
=
3
;
m_phonemeToId
[
"aa"
]
=
3
;
m_phonemeToId
[
"ae"
]
=
6
;
m_phonemeToId
[
"aeh"
]
=
6
;
m_phonemeToId
[
"ah"
]
=
6
;
m_phonemeToId
[
"ai"
]
=
11
;
m_phonemeToId
[
"an"
]
=
9
;
m_phonemeToId
[
"ao"
]
=
3
;
m_phonemeToId
[
"au"
]
=
3
;
m_phonemeToId
[
"aw"
]
=
6
;
m_phonemeToId
[
"ax"
]
=
6
;
m_phonemeToId
[
"ay"
]
=
6
;
m_phonemeToId
[
"b"
]
=
21
;
m_phonemeToId
[
"ch"
]
=
16
;
m_phonemeToId
[
"d"
]
=
19
;
m_phonemeToId
[
"dh"
]
=
17
;
m_phonemeToId
[
"dx"
]
=
19
;
m_phonemeToId
[
"dzh"
]
=
16
;
//not entirely clear
m_phonemeToId
[
"e"
]
=
1
;
m_phonemeToId
[
"e@"
]
=
1
;
m_phonemeToId
[
"eh"
]
=
6
;
m_phonemeToId
[
"ei"
]
=
4
;
m_phonemeToId
[
"en"
]
=
4
;
m_phonemeToId
[
"er"
]
=
5
;
m_phonemeToId
[
"ey"
]
=
6
;
m_phonemeToId
[
"f"
]
=
18
;
m_phonemeToId
[
"g"
]
=
20
;
m_phonemeToId
[
"h"
]
=
12
;
m_phonemeToId
[
"hh"
]
=
6
;
m_phonemeToId
[
"i"
]
=
6
;
m_phonemeToId
[
"i@"
]
=
6
;
m_phonemeToId
[
"ih"
]
=
6
;
m_phonemeToId
[
"ii"
]
=
6
;
m_phonemeToId
[
"iy"
]
=
6
;
m_phonemeToId
[
"j"
]
=
6
;
m_phonemeToId
[
"jh"
]
=
16
;
m_phonemeToId
[
"k"
]
=
20
;
m_phonemeToId
[
"l"
]
=
19
;
m_phonemeToId
[
"m"
]
=
21
;
m_phonemeToId
[
"n"
]
=
20
;
m_phonemeToId
[
"ng"
]
=
20
;
m_phonemeToId
[
"o"
]
=
8
;
m_phonemeToId
[
"oe"
]
=
8
;
//actually no real oe viseme, so take o
m_phonemeToId
[
"oeh"
]
=
8
;
//actually no real oe viseme, so take o
m_phonemeToId
[
"oen"
]
=
21
;
//as in the German Parfum (p_a_rv_f_oen), just took m
m_phonemeToId
[
"oh"
]
=
8
;
m_phonemeToId
[
"oi"
]
=
10
;
m_phonemeToId
[
"on"
]
=
9
;
m_phonemeToId
[
"oo"
]
=
8
;
m_phonemeToId
[
"ou"
]
=
8
;
m_phonemeToId
[
"ow"
]
=
8
;
m_phonemeToId
[
"oy"
]
=
8
;
m_phonemeToId
[
"p"
]
=
21
;
m_phonemeToId
[
"pf"
]
=
21
;
m_phonemeToId
[
"q"
]
=
1
;
//this is somehow only used in German, before starting as, e.g. in Abend (q_ah_b_@_n_t)
m_phonemeToId
[
"r"
]
=
13
;
m_phonemeToId
[
"rv"
]
=
13
;
m_phonemeToId
[
"rl"
]
=
5
;
m_phonemeToId
[
"s"
]
=
15
;
m_phonemeToId
[
"sh"
]
=
16
;
m_phonemeToId
[
"T"
]
=
19
;
m_phonemeToId
[
"t"
]
=
19
;
m_phonemeToId
[
"th"
]
=
17
;
m_phonemeToId
[
"ts"
]
=
15
;
m_phonemeToId
[
"tsh"
]
=
16
;
m_phonemeToId
[
"u"
]
=
8
;
m_phonemeToId
[
"u@"
]
=
8
;
m_phonemeToId
[
"uh"
]
=
8
;
m_phonemeToId
[
"ue"
]
=
6
;
//no real ue
m_phonemeToId
[
"ueh"
]
=
6
;
//no real ue
m_phonemeToId
[
"uu"
]
=
8
;
m_phonemeToId
[
"uw"
]
=
8
;
m_phonemeToId
[
"v"
]
=
18
;
m_phonemeToId
[
"w"
]
=
7
;
m_phonemeToId
[
"x"
]
=
20
;
//actually not really supported, as in Scottish loch (l_o_x)
m_phonemeToId
[
"y"
]
=
7
;
m_phonemeToId
[
"z"
]
=
15
;
m_phonemeToId
[
"zh"
]
=
16
;
m_phonemeToId
[
"R"
]
=
13
;
m_idToViseme
[
0
]
=
"_"
;
/// silence
m_idToViseme
[
1
]
=
"Ah"
;
/// Viseme for aa, ae, ah
m_idToViseme
[
2
]
=
"Aa"
;
/// Viseme for aa
m_idToViseme
[
3
]
=
"Ao"
;
/// ao
m_idToViseme
[
4
]
=
"Eh"
;
/// ey, eh, uh
m_idToViseme
[
5
]
=
"Er"
;
/// er
m_idToViseme
[
6
]
=
"Ih"
;
/// y, iy, ih, ix
m_idToViseme
[
7
]
=
"W"
;
/// w, uw
m_idToViseme
[
8
]
=
"Ow"
;
/// ow
m_idToViseme
[
9
]
=
"Aw"
;
/// aw
m_idToViseme
[
10
]
=
"Oy"
;
/// oy
m_idToViseme
[
11
]
=
"Ay"
;
/// ay
m_idToViseme
[
12
]
=
"H"
;
/// h
m_idToViseme
[
13
]
=
"R"
;
/// r
m_idToViseme
[
14
]
=
"L"
;
/// l
m_idToViseme
[
15
]
=
"Z"
;
/// s, z
m_idToViseme
[
16
]
=
"Sh"
;
/// sh, ch, jh, zh
m_idToViseme
[
17
]
=
"Th"
;
/// th, dh
m_idToViseme
[
18
]
=
"F"
;
/// f, v
m_idToViseme
[
19
]
=
"D"
;
/// d, t, n - also try NG: 2 to 1 against
m_idToViseme
[
20
]
=
"KG"
;
/// k, g, ,ng - also try NG: 2 to 1 against
m_idToViseme
[
21
]
=
"BMP"
;
/// p, b, m
}
std
::
string
CVATextToSpeechSignalSource
::
PhonemeToViseme
(
std
::
string
phoneme
)
{
auto
it
=
m_phonemeToId
.
find
(
phoneme
);
if
(
it
==
m_phonemeToId
.
end
()){
VA_WARN
(
"CVATextToSpeechSignalSource"
,
"There exists no mapping for the phoneme:
\"
"
+
phoneme
+
"
\"
"
);
return
phoneme
;
}
auto
it2
=
m_idToViseme
.
find
(
it
->
second
);
if
(
it2
==
m_idToViseme
.
end
()){
VA_WARN
(
"CVATextToSpeechSignalSource"
,
"There exists no mapping for viseme id: "
+
it
->
second
);
return
phoneme
;
}
return
it2
->
second
;
}
void
CVATextToSpeechSignalSource
::
VisemeProcessing
(
CPRC_abuf
*
abuf
,
void
*
userdata
)
{
#ifdef VACORE_WITH_TTS_SIGNAL_SOURCE
//this callback is called per sentece in the text, so we need to append to the other visemes and also time-wise!
std
::
pair
<
std
::
string
,
float
>*
visemes_time
=
(
std
::
pair
<
std
::
string
,
float
>*
)
userdata
;
float
endTime
=
0.0
f
;
if
(
abuf
==
NULL
){
VA_WARN
(
"CVATextToSpeechSignalSource"
,
"The buffer is NULL, cannot extract visemes!"
);
return
;
}
if
(
userdata
==
NULL
){
VA_WARN
(
"CVATextToSpeechSignalSource"
,
"The userdata viseme string is NULL, cannot extract visemes!"
);
return
;
}
/* Transcriptions contain markers, phonetic information, a
list of these items is available for each audio buffer. */
const
CPRC_abuf_trans
*
trans
;
std
::
string
label
;
float
start
,
end
;
/* Process the transcription buffer items and print information. */
for
(
int
i
=
0
;
i
<
CPRC_abuf_trans_sz
(
abuf
);
i
++
)
{
trans
=
CPRC_abuf_get_trans
(
abuf
,
i
);
start
=
CPRC_abuf_trans_start
(
trans
);
/* Start time in seconds */
end
=
CPRC_abuf_trans_end
(
trans
);
/* End time in seconds */
label
=
CPRC_abuf_trans_name
(
trans
);
/* Label, type dependent */
if
(
CPRC_abuf_trans_type
(
trans
)
==
CPRC_ABUF_TRANS_PHONE
)
{
//VA_INFO("CVATextToSpeechSignalSource", "Phoneme: " + std::to_string(start) + " " + std::to_string(end) + " " + label);
//visemes_time->first.append( "\t<viseme start=\"" + to_string_with_precision(start+visemes_time->second) + "\" articulation=\"1\" type=\"" + PhonemeToViseme(label) + "\" />\n");
std
::
string
viseme
=
std
::
string
(
"
\t
<lips "
)
+
"viseme=
\"
"
+
PhonemeToViseme
(
label
)
+
"
\"
"
+
"articulation=
\"
1.0
\"
"
+
"start=
\"
"
+
to_string_with_precision
(
start
+
visemes_time
->
second
)
+
"
\"
"
+
"ready=
\"
"
+
to_string_with_precision
(
start
+
visemes_time
->
second
)
+
"
\"
"
+
"relax=
\"
"
+
to_string_with_precision
(
end
+
visemes_time
->
second
)
+
"
\"
"
+
"end=
\"
"
+
to_string_with_precision
(
end
+
visemes_time
->
second
)
+
"
\"
"
+
"/>
\n
"
;
visemes_time
->
first
.
append
(
viseme
);
endTime
=
end
;
}
/*else if (CPRC_abuf_trans_type(trans) == CPRC_ABUF_TRANS_WORD) {
VA_INFO("CVATextToSpeechSignalSource", "Word: " + std::to_string(start) + " " + std::to_string(end) + " " + label);
}
else if (CPRC_abuf_trans_type(trans) == CPRC_ABUF_TRANS_MARK) {
VA_INFO("CVATextToSpeechSignalSource", "Marker: " + std::to_string(start) + " " + std::to_string(end) + " " + label);
}*/
else
if
(
CPRC_abuf_trans_type
(
trans
)
==
CPRC_ABUF_TRANS_ERROR
)
{
VA_INFO
(
"CVATextToSpeechSignalSource"
,
"ERROR: could not retrieve transcription at "
+
std
::
to_string
(
i
));
}
}
visemes_time
->
second
=
endTime
;
#endif
}
src/Audiosignals/VATextToSpeechSignalSource.h
View file @
8f7c2a77
...
...
@@ -9,6 +9,8 @@
#include
<ITAAtomicPrimitives.h>
class
CVACoreImpl
;
class
CPRCEN_engine
;
class
CPRC_abuf
;
/** Text-to-speech signal source
*
...
...
@@ -29,13 +31,53 @@ public:
void
HandleRegistration
(
IVACore
*
);
void
HandleUnregistration
(
IVACore
*
);
std
::
string
GetStateString
()
const
;
/*
* This should be used to start a prepared TTS using a CVAStruct with:
* ["play_speech"] = identificator of created speech
*/
void
SetParameters
(
const
CVAStruct
&
);
/*
* This can be used to prepare a TTS using a CVAStruct with:
* ["prepare_text"] = text to be spoken
* ["start_talking"] = true/false //whether it should be directly replayed
* ["voice"] = the voice to be used //if none is given or the one given cannot be found the standard voice is used (i.e. "Heather")
* and returns a CVAStruct with:
* ["ready_for_playback"] = true, if no error occured, false otherwise
* ["id"] = identificator that will be used for replying //see above
* ["visemes"] = viseme data for facial animation as xml string
*
* This can also be used to find the available voices using a CVAStruct with:
* ["list_voices"] = true
* and returns a CVAStruct with:
* ["number"] = the number of available voices
* ["voice_i"] = the name of the voice, where i is 0...["number"]-1
* ["sex_i"] = the sex of the voice, i.e. "male" or "female"; where i is 0...["number"]-1
* ["language_i"] = the language of the voice, e.g. "en" or "de"; where i is 0...["number"]-1
* ["country_i"] = the country of the voice, e.g. "GB" or "US"; where i is 0...["number"]-1
*/
CVAStruct
GetParameters
(
const
CVAStruct
&
)
const
;
void
Reset
();
private:
static
void
VisemeProcessing
(
CPRC_abuf
*
abuf
,
void
*
userdata
);
//used as callback for the CereVoice engine
static
std
::
string
to_string_with_precision
(
float
a_value
,
const
int
n
=
3
);
static
std
::
string
PhonemeToViseme
(
std
::
string
phoneme
);
void
SetupPhonemeMapping
();
IVACore
*
m_pAssociatedCore
;
ITASampleBuffer
m_sbOut
;
/*The engine maintains the list of
loaded voices and makes them available to synthesis channels. */
CPRCEN_engine
*
m_pTTSEngine
;
static
std
::
map
<
std
::
string
,
int
>
m_phonemeToId
;
static
std
::
map
<
int
,
std
::
string
>
m_idToViseme
;
};
#endif // IW_VA_TEXT_TO_SPEECH_SIGNAL_SOURCE
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment