{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T17:46:58Z","timestamp":1773683218562,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":20,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783540858522","type":"print"},{"value":"9783540858539","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"DOI":"10.1007\/978-3-540-85853-9_12","type":"book-chapter","created":{"date-parts":[[2008,8,30]],"date-time":"2008-08-30T09:27:54Z","timestamp":1220088474000},"page":"125-136","source":"Crossref","is-referenced-by-count":6,"title":["Multimodal Unit Selection for 2D Audiovisual Text-to-Speech Synthesis"],"prefix":"10.1007","author":[{"given":"Wesley","family":"Mattheyses","sequence":"first","affiliation":[]},{"given":"Lukas","family":"Latacz","sequence":"additional","affiliation":[]},{"given":"Werner","family":"Verhelst","sequence":"additional","affiliation":[]},{"given":"Hichem","family":"Sahli","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"12_CR1","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1023\/A:1025700715107","volume":"6","author":"G. Bailly","year":"2003","unstructured":"Bailly, G., Brar, M., Elisei, F., Odisio, M.: Audiovisual speech synthesis. International Journal of Speech Technology\u00a06, 331\u2013346 (2003)","journal-title":"International Journal of Speech Technology"},{"key":"12_CR2","doi-asserted-by":"crossref","unstructured":"Breen, A.P., Bowers, E., Welsh, W.: An Investigation into the Generation of Mouth Shapes for a Talking Head. In: International Conference on Spoken Language Processing, vol.\u00a04, pp. 2159\u20132162 (1996)","DOI":"10.1109\/ICSLP.1996.607231"},{"key":"12_CR3","doi-asserted-by":"crossref","unstructured":"Bregler, C., Covell, M., Slaney, M.: Video Rewrite: Driving Visual Speech with Audio. In: Association for Computing Machinery\u2019s Special Interest Group on Graphics and Interactive Techniques, pp. 353\u2013360 (1997)","DOI":"10.1145\/258734.258880"},{"key":"12_CR4","doi-asserted-by":"crossref","unstructured":"Cosatto, E., Graf, H.P.: Sample-Based Synthesis of Photo-Realistic Talking Heads. Computer Animation, 103\u2013110 (1998)","DOI":"10.1109\/CA.1998.681914"},{"key":"12_CR5","doi-asserted-by":"publisher","first-page":"152","DOI":"10.1109\/6046.865480","volume":"2","author":"E. Cosatto","year":"2000","unstructured":"Cosatto, E., Graf, H.P.: Photo-realistic talking-heads from image samples. IEEE Transactions on multimedia\u00a02, 152\u2013163 (2000)","journal-title":"IEEE Transactions on multimedia"},{"key":"12_CR6","doi-asserted-by":"crossref","unstructured":"Cosatto, E., Potamianos, G., Graf, H.P.: Audio-Visual Unit Selection for the Synthesis of Photo-Realistic Talking-Heads. International Conference on Multimedia and Expo, pp. 619\u2013622 (2000)","DOI":"10.1109\/ICME.2000.871439"},{"key":"12_CR7","unstructured":"Ezzat, T., Poggio, T.: Visual Speech Synthesis by Morphing Visemes (MikeTalk). MIT AI Lab, A.I Memo 1658 (1999)"},{"key":"12_CR8","first-page":"388","volume":"21","author":"T. Ezzat","year":"2002","unstructured":"Ezzat, T., Geiger, G., Poggio, T.: Trainable videorealistic speech animation. Association for Computing Machinery\u2019s Special Interest Group on Graphics and Interactive Techniques\u00a021, 388\u2013398 (2002)","journal-title":"Association for Computing Machinery\u2019s Special Interest Group on Graphics and Interactive Techniques"},{"key":"12_CR9","unstructured":"Fagel, S.: Joint Audio-Visual Units Selection - The Javus Speech Synthesizer. In: International Conference on Speech and Computer (2006)"},{"key":"12_CR10","doi-asserted-by":"crossref","unstructured":"Goyal, U.K., Kapoor, A., Kalra, P.: Text-to-Audio Visual Speech Synthesizer. Virtual Worlds, 256\u2013269 (2000)","DOI":"10.1007\/3-540-45016-5_24"},{"key":"12_CR11","unstructured":"Grant, K.W., Greenberg, S.: Speech Intelligibility Derived From Asynchrounous Processing of Auditory-Visual Information. In: Workshop on Audio-Visual Speech Processing, pp. 132\u2013137 (2001)"},{"key":"12_CR12","doi-asserted-by":"crossref","unstructured":"Hunt, A., Black, A.: Unit selection in a concatenative speech synthesis system using a large speech database. In: International Conference on Acoustics, Speech and Signal Processing, pp. 373\u2013376 (1996)","DOI":"10.1109\/ICASSP.1996.541110"},{"key":"12_CR13","unstructured":"Kerkhoff, J., Marsi, E.: NeXTeNS: a New Open Source Text-to-speech System for Dutch. In: 13th meeting of Computational Linguistics in the Netherlands (2002)"},{"key":"12_CR14","unstructured":"Latacz, L., Kong, Y., Verhelst, W.: Unit Selection Synthesis Using Long Non-Uniform Units and Phoneme Identity Matching. In: 6th ISCA Workshop on Speech Synthesis, pp. 270\u2013275 (2007)"},{"key":"12_CR15","unstructured":"Mattheyses, W., Latacz, L., Kong, Y.O., Verhelst, W.: Flemish Voice for the Nextens Text-To-Speech System. In: Fifth Slovenian and First International Language Technologies Conference (2006)"},{"key":"12_CR16","doi-asserted-by":"publisher","first-page":"746","DOI":"10.1038\/264746a0","volume":"264","author":"H. McGurk","year":"1976","unstructured":"McGurk, H., MacDonald, J.: Hearing lips and seeing voices. Nature\u00a0264, 746\u2013748 (1976)","journal-title":"Nature"},{"key":"12_CR17","doi-asserted-by":"publisher","first-page":"453","DOI":"10.1016\/0167-6393(90)90021-Z","volume":"9","author":"E. Moulines","year":"1990","unstructured":"Moulines, E., Charpentier, F.: Pitch-synchronous waveform processing techniques for text-to-speech synthesis using diphones. Speech Communication\u00a09, 453\u2013467 (1990)","journal-title":"Speech Communication"},{"key":"12_CR18","doi-asserted-by":"publisher","first-page":"2330","DOI":"10.1007\/s003710050182","volume":"15","author":"I. Pandzic","year":"1999","unstructured":"Pandzic, I., Ostermann, J., Millen, D.: Users Evaluation: Synthetic talking faces for interactive services. The Visual Computer\u00a015, 2330\u20132340 (1999)","journal-title":"The Visual Computer"},{"key":"12_CR19","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1016\/j.specom.2004.07.002","volume":"44","author":"B.J. Theobald","year":"2004","unstructured":"Theobald, B.J., Bangham, J.A., Matthews, I.A., Cawley, G.C.: Near-videorealistic synthetic talking faces: implementation and evaluation. Speech Communication\u00a044, 127\u2013140 (2004)","journal-title":"Speech Communication"},{"key":"12_CR20","volume-title":"Digital image warping","author":"G. Wolberg","year":"1990","unstructured":"Wolberg, G.: Digital image warping. IEEE Computer Society Press, Los Alamitos (1990)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning for Multimodal Interaction"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-85853-9_12.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,31]],"date-time":"2025-01-31T19:05:48Z","timestamp":1738350348000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-85853-9_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[null]]},"ISBN":["9783540858522","9783540858539"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-85853-9_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[]}}