Speech output generation in the SMARTKOM system is realized by a corpus-based unit selection strategy that preserves many properties of the human voice. When the system's avatar Smartakus is present on the screen, the synthetic speech signal is temporally synchronized with Smartakus visible speech gestures and prosodically adjusted to his pointing gestures to enhance multimodal communication. The unit selection voice was formally evaluated and found to be very well accepted and reasonably intelligible in SMARTKOM-specific scenarios.
%0 Book Section
%1 SchweitzerBraunschweilerEtAl06p411
%A Schweitzer, Antje
%A Braunschweiler, Norbert
%A Dogil, Grzegorz
%A Klankert, Tanja
%A Möbius, Bernd
%A Möhler, Gregor
%A Morais, Edmilson
%A Säuberlich, Bettina
%A Thomae, Matthias
%B SmartKom: Foundations of Multimodal Dialogue Systems
%C Berlin
%D 2006
%E Wahlster, Wolfgang
%I Springer
%K v1500 paper ai language processing multimodal speech synthesis smartkom zzz.sds
%P 411-435
%R 10.1007/3-540-36678-4_27
%T Multimodal Speech Synthesis
%X Speech output generation in the SMARTKOM system is realized by a corpus-based unit selection strategy that preserves many properties of the human voice. When the system's avatar Smartakus is present on the screen, the synthetic speech signal is temporally synchronized with Smartakus visible speech gestures and prosodically adjusted to his pointing gestures to enhance multimodal communication. The unit selection voice was formally evaluated and found to be very well accepted and reasonably intelligible in SMARTKOM-specific scenarios.
@incollection{SchweitzerBraunschweilerEtAl06p411,
abstract = {Speech output generation in the SMARTKOM system is realized by a corpus-based unit selection strategy that preserves many properties of the human voice. When the system's avatar Smartakus is present on the screen, the synthetic speech signal is temporally synchronized with Smartakus visible speech gestures and prosodically adjusted to his pointing gestures to enhance multimodal communication. The unit selection voice was formally evaluated and found to be very well accepted and reasonably intelligible in SMARTKOM-specific scenarios.},
added-at = {2012-05-30T10:53:49.000+0200},
address = {Berlin},
author = {Schweitzer, Antje and Braunschweiler, Norbert and Dogil, Grzegorz and Klankert, Tanja and M\"{o}bius, Bernd and M\"{o}hler, Gregor and Morais, Edmilson and S\"{a}uberlich, Bettina and Thomae, Matthias},
biburl = {https://www.bibsonomy.org/bibtex/2adff62952bca7f7e2a1f24f9ac0b019a/flint63},
booktitle = {{SmartKom}: Foundations of Multimodal Dialogue Systems},
crossref = {Wahlster2006},
doi = {10.1007/3-540-36678-4_27},
editor = {Wahlster, Wolfgang},
file = {Preprint:2006/SchweitzerBraunschweilerEtAl06p411.pdf:PDF},
groups = {public},
interhash = {b897cae846bf8b9ffa4744d38ccf61b3},
intrahash = {d3a27077daa91eb35214ba1d66221f29},
keywords = {v1500 paper ai language processing multimodal speech synthesis smartkom zzz.sds},
pages = {411-435},
publisher = {Springer},
timestamp = {2015-03-11T10:51:56.000+0100},
title = {Multimodal Speech Synthesis},
username = {flint63},
year = 2006
}