{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,6,10]],"date-time":"2024-06-10T00:02:25Z","timestamp":1717977745832},"reference-count":23,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2015,6,29]],"date-time":"2015-06-29T00:00:00Z","timestamp":1435536000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Multimodal User Interfaces"],"published-print":{"date-parts":[[2015,12]]},"DOI":"10.1007\/s12193-015-0186-3","type":"journal-article","created":{"date-parts":[[2015,6,28]],"date-time":"2015-06-28T11:53:57Z","timestamp":1435492437000},"page":"387-394","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["A framework towards expressive speech analysis and synthesis with preliminary results"],"prefix":"10.1007","volume":"9","author":[{"given":"Spyros","family":"Raptis","sequence":"first","affiliation":[]},{"given":"Sotiris","family":"Karabetsos","sequence":"additional","affiliation":[]},{"given":"Aimilios","family":"Chalamandaris","sequence":"additional","affiliation":[]},{"given":"Pirros","family":"Tsiakoulis","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,6,29]]},"reference":[{"key":"186_CR1","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1109\/79.911197","volume":"18","author":"R Cowie","year":"2001","unstructured":"Cowie R, Douglas-Cowie E, Tsapatsoulis N, Kollias S, Fellenz W, Taylor J (2001) Emotion recognition in human-computer interaction. IEEE Signal Process Mag 18:32\u201380","journal-title":"IEEE Signal Process Mag"},{"issue":"1","key":"186_CR2","doi-asserted-by":"crossref","first-page":"18","DOI":"10.1109\/T-AFFC.2010.1","volume":"1","author":"RA Calvo","year":"2010","unstructured":"Calvo RA, D\u2019Mello S (2010) Affect detection: an interdisciplinary review of models, methods, and their applications. IEEE Trans Affect Comput 1(1):18\u201337","journal-title":"IEEE Trans Affect Comput"},{"key":"186_CR3","unstructured":"Schuller B, Batliner A (2014) Computational paralinguistics: emotion, affect and personality in speech and language processing. Wiley, New York. ISBN: 978-1-119-97136-8"},{"key":"186_CR4","doi-asserted-by":"crossref","first-page":"572","DOI":"10.1016\/j.patcog.2010.09.020","volume":"44","author":"WE Ayadi","year":"2011","unstructured":"Ayadi WE, Kamel MS, Karray F (2011) Survey on speech emotion recognition: features, classification schemes, and databases. Pattern Recognit 44:572\u2013587","journal-title":"Pattern Recognit"},{"key":"186_CR5","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1007\/s10772-011-9125-1","volume":"15","author":"SG Koolagudi","year":"2012","unstructured":"Koolagudi SG, Rao KS (2012) Emotion recognition from speech: a review. Int J Speech Technol 15:99\u2013117","journal-title":"Int J Speech Technol"},{"issue":"1\u20132","key":"186_CR6","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1016\/S0167-6393(02)00071-7","volume":"40","author":"R Cowie","year":"2003","unstructured":"Cowie R, Cornelius RR (2003) Describing the emotional states that are expressed in speech. Speech Commun 40(1\u20132):5\u201332","journal-title":"Speech Commun"},{"key":"186_CR7","volume-title":"Affective information processing","author":"M Schroeder","year":"2009","unstructured":"Schroeder M (2009) Expressive speech synthesis: past, present, and possible futures. In: Tao JH, Tan TN (eds) Affective information processing. Springer Science+Business Media LLC, London"},{"issue":"4","key":"186_CR8","doi-asserted-by":"crossref","first-page":"1137","DOI":"10.1109\/TASL.2006.876129","volume":"14","author":"M Theune","year":"2006","unstructured":"Theune M, Meijs K, Heylen D, Ordelman R (2006) Generating Expressive Speech for Storytelling Applications. IEEE Trans Audio Speech Lang Process 14(4):1137\u20131144","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"186_CR9","unstructured":"Chalamandaris A, Tsiakoulis P, Karabetsos S, Raptis S (2014) Using audio books for training a text-to-speech system. In: Proceedings of the 9th International Conference on Language Resources and Evaluation (LREC\u201914), May 26\u201331, Reykjavik, pp 3076\u20133080"},{"key":"186_CR10","doi-asserted-by":"crossref","unstructured":"Chalamandaris A, Tsiakoulis P, Karabetsos S, Raptis S (2013) The ILSP\/INNOETICS text-to-speech system for the Blizzard challenge 2013. In: The Blizzard Challenge 2013 Workshop, Barcelona","DOI":"10.21437\/Blizzard.2013-5"},{"key":"186_CR11","doi-asserted-by":"crossref","unstructured":"Braunschweiler N, Gales MJF, Buchholz S (2010) Lightly supervised recognition for automatic alignment of large coherent speech recordings. In: Proceedings of Interspeech 2010, Makuhari, pp 2222\u20132225","DOI":"10.21437\/Interspeech.2010-611"},{"key":"186_CR12","doi-asserted-by":"crossref","unstructured":"Szekely E, Csapo TG, Toth B, Mihajlik P, Carson-Berndsen J (2012) Synthesizing expressive speech from amateur audiobook recordings\u2019. In: Proceedings of SLT 2012, Florida, pp. 297\u2013302","DOI":"10.1109\/SLT.2012.6424239"},{"key":"186_CR13","doi-asserted-by":"crossref","unstructured":"Eyben F, Buchholz S, Braunschweiler N, Latorre J, Wan V, Gales MJF, Knill K (2012) Unsupervised clustering of emotion and voice styles for expressive TTS. In: Proceedings of IEEE ICASSP 2012, Kyoto, pp. 4009\u20134012","DOI":"10.1109\/ICASSP.2012.6288797"},{"key":"186_CR14","doi-asserted-by":"crossref","unstructured":"Tsiakoulis P, Karabetsos S, Chalamandaris A, Raptis S (2014) An overview of the ILSP unit selection text-to-speech synthesis system. In: Likas A, Blekas K, Kalles D (ed) SETN 2014, LNCS 8445, Springer International Publishing Switzerland, pp. 370\u2013383","DOI":"10.1007\/978-3-319-07064-3_30"},{"key":"186_CR15","unstructured":"Alm CO, Sproat R (2005) Perceptions of emotions in expressive storytelling. In: Proceedings INTERSPEECH 2005"},{"key":"186_CR16","doi-asserted-by":"crossref","unstructured":"Raptis S (2013) Exploring latent structure in expressive speech. In: Proceedings of IEEE CogInfo-Com 2013, 4th IEEE International Conference on Cognitive Infocommunications, December 2\u20135, Budapest, pp. 741\u2013745","DOI":"10.1109\/CogInfoCom.2013.6719197"},{"key":"186_CR17","doi-asserted-by":"crossref","unstructured":"Raptis S, Karabetsos S, Chalamandaris A, Tsiakoulis P (2014) Towards expressive speech synthesis: analysis and modelling of expressive speech. In: 5th IEEE International Conference on Cognitive Infocommunications, IEEE CogInfoCom 2014, Vietri Sul Mare, (Best Paper Award)","DOI":"10.1109\/CogInfoCom.2014.7020500"},{"issue":"1","key":"186_CR18","first-page":"67","volume":"9","author":"P Baranyi","year":"2012","unstructured":"Baranyi P, Csap\u00f3 A (2012) Definition and synergies of cognitive infocommunications. Acta Polytech Hung 9(1):67\u201383","journal-title":"Acta Polytech Hung"},{"key":"186_CR19","doi-asserted-by":"crossref","first-page":"1062","DOI":"10.1016\/j.specom.2011.01.011","volume":"53","author":"B Schuller","year":"2011","unstructured":"Schuller B, Batliner A, Steidl S, Seppi D (2011) Recognising realistic emotions and affect in speech: state of the art and lessons learnt from the first challenge. Speech Commun 53:1062\u20131087","journal-title":"Speech Commun"},{"issue":"4","key":"186_CR20","first-page":"585","volume":"20","author":"S Yun","year":"2012","unstructured":"Yun S, Yoo CD (2012) Loss-scaled large-margin Gaussian mixture models for speech emotion classification. IEEE Trans Audio Speech Lang Process 20(4):585\u2013597","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"186_CR21","doi-asserted-by":"crossref","first-page":"6","DOI":"10.1109\/TMM.2010.2058095","volume":"12","author":"A Tawari","year":"2010","unstructured":"Tawari A, Trivedi MM (2010) Speech emotion analysis: exploring the role of context. IEEE Trans Multimed 12:6","journal-title":"IEEE Trans Multimed"},{"issue":"1","key":"186_CR22","doi-asserted-by":"crossref","first-page":"e006","DOI":"10.3989\/loquens.2014.006","volume":"1","author":"S King","year":"2014","unstructured":"King S (2014) Measuring a decade of progress in text-to-speech. Loquens 1(1):e006. doi: 10.3989\/loquens.2014.006","journal-title":"Loquens"},{"key":"186_CR23","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1016\/j.csl.2009.12.003","volume":"25","author":"A Batliner","year":"2011","unstructured":"Batliner A, Steidl S, Schuller B et al (2011) Whodunnit\u2014searching for the most important feature types signalling emotion-related user states in speech. Comput Speech Lang 25:4\u201328","journal-title":"Comput Speech Lang"}],"container-title":["Journal on Multimodal User Interfaces"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s12193-015-0186-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s12193-015-0186-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s12193-015-0186-3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,9]],"date-time":"2024-06-09T18:54:54Z","timestamp":1717959294000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s12193-015-0186-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,6,29]]},"references-count":23,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2015,12]]}},"alternative-id":["186"],"URL":"https:\/\/doi.org\/10.1007\/s12193-015-0186-3","relation":{},"ISSN":["1783-7677","1783-8738"],"issn-type":[{"value":"1783-7677","type":"print"},{"value":"1783-8738","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,6,29]]}}}