{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T20:10:18Z","timestamp":1776888618030,"version":"3.51.2"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,4,14]],"date-time":"2024-04-14T00:00:00Z","timestamp":1713052800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,4,14]],"date-time":"2024-04-14T00:00:00Z","timestamp":1713052800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,4,14]]},"DOI":"10.1109\/icassp48485.2024.10446852","type":"proceedings-article","created":{"date-parts":[[2024,3,18]],"date-time":"2024-03-18T18:56:31Z","timestamp":1710788191000},"page":"12682-12686","source":"Crossref","is-referenced-by-count":4,"title":["Mels-Tts : Multi-Emotion Multi-Lingual Multi-Speaker Text-To-Speech System Via Disentangled Style Tokens"],"prefix":"10.1109","author":[{"given":"Heejin","family":"Choi","sequence":"first","affiliation":[{"name":"Samsung Research"}]},{"given":"Jae-Sung","family":"Bae","sequence":"additional","affiliation":[{"name":"Samsung Research"}]},{"given":"Joun Yeop","family":"Lee","sequence":"additional","affiliation":[{"name":"Samsung Research"}]},{"given":"Seongkyu","family":"Mun","sequence":"additional","affiliation":[{"name":"Samsung Research"}]},{"given":"Jihwan","family":"Lee","sequence":"additional","affiliation":[{"name":"University of Southern California"}]},{"given":"Hoon-Young","family":"Cho","sequence":"additional","affiliation":[{"name":"Samsung Research"}]},{"given":"Chanwoo","family":"Kim","sequence":"additional","affiliation":[{"name":"Korea University"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref2","first-page":"3165","article-title":"Fastspeech: Fast, robust and controllable text to speech","volume-title":"Proc. Advances in Neural Information Processing Systems","author":"Ren"},{"key":"ref3","article-title":"Fastspeech 2: Fast and high-quality end-to-end text to speech","volume-title":"Proc. Int. Conf. on Learning Representations (ICLR)","author":"Ren"},{"key":"ref4","first-page":"5530","article-title":"Conditional variational autoencoder with adversarial learning for end-to-end text-to-speech","volume-title":"Proc. Int. Conf. on Machine Learning","author":"Kim"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00618"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPAASC47483.2019.9023186"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1769"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-979"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-610"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2022.3203888"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2019-2668"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683519"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746914"},{"key":"ref14","article-title":"Emotional end-to-end neural speech synthesizer","volume-title":"Proc. NIPS","author":"Lee"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2018.03.002"},{"key":"ref16","first-page":"5180","article-title":"Style tokens: Unsupervised style modeling, control and transfer in end-to-end speech synthesis","volume-title":"Proc. Int. Conf. on Machine Learning (ICML)","author":"Wang"},{"key":"ref17","first-page":"4693","article-title":"Towards end-to-end prosody transfer for expressive speech synthesis with tacotron","volume-title":"Proc. Int. Conf. on Machine Learning (ICML)","volume":"80","author":"Skerry-Ryan"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-737"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683682"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1361"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1251"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/odyssey.2020-49"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2464"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295349"},{"key":"ref25","article-title":"Multi-speaker multi-emotion database","author":"website","year":"2022"},{"key":"ref26","article-title":"Cstr vctk corpus: English multi-speaker corpus for cstr voice cloning toolkit","author":"Veaux","year":"2017"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-310"},{"key":"ref28","article-title":"Instance normalization: The missing ingredient for fast stylization","author":"Ulyanov","year":"2016"},{"key":"ref29","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462628"},{"issue":"11","key":"ref31","article-title":"Visualizing data using t-sne","volume":"9","author":"Van der Maaten","year":"2008","journal-title":"Journal of machine learning research"}],"event":{"name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Seoul, Korea, Republic of","start":{"date-parts":[[2024,4,14]]},"end":{"date-parts":[[2024,4,19]]}},"container-title":["ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10445798\/10445803\/10446852.pdf?arnumber=10446852","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,9]],"date-time":"2025-05-09T17:50:14Z","timestamp":1746813014000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10446852\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,14]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/icassp48485.2024.10446852","relation":{},"subject":[],"published":{"date-parts":[[2024,4,14]]}}}