{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T10:14:28Z","timestamp":1740132868843,"version":"3.37.3"},"reference-count":36,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Institute of Information and Communications Technology Planning and Evaluation"},{"name":"Korea Government","award":["2020-0-00059"],"award-info":[{"award-number":["2020-0-00059"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Signal Process. Lett."],"published-print":{"date-parts":[[2022]]},"DOI":"10.1109\/lsp.2021.3125259","type":"journal-article","created":{"date-parts":[[2021,11,4]],"date-time":"2021-11-04T19:25:01Z","timestamp":1636053901000},"page":"55-59","source":"Crossref","is-referenced-by-count":1,"title":["A Controllable Multi-Lingual Multi-Speaker Multi-Style Text-to-Speech Synthesis With Multivariate Information Minimization"],"prefix":"10.1109","volume":"29","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7293-6997","authenticated-orcid":false,"given":"Sung Jun","family":"Cheon","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1319-8215","authenticated-orcid":false,"given":"Byoung Jin","family":"Choi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8150-765X","authenticated-orcid":false,"given":"Minchan","family":"Kim","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6997-205X","authenticated-orcid":false,"given":"Hyeonseung","family":"Lee","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0568-4902","authenticated-orcid":false,"given":"Nam Soo","family":"Kim","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"article-title":"WaveNet: A generative model for raw","year":"2016","author":"van den Oord","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1452"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref4","first-page":"3171","article-title":"FastSpeech: Fast, robust and controllable text to speech","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ren","year":"2019"},{"key":"ref5","article-title":"FastSpeech 2: Fast and high-quality end-to-end text to speech","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Ren","year":"2020"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053795"},{"key":"ref7","first-page":"17022","article-title":"HiFi-GAN: Generative adversarial networks for efficient and high fidelity speech synthesis","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kong","year":"2020"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-414"},{"key":"ref9","article-title":"Hierarchical generative modeling for controllable speech synthesis","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hsu","year":"2019"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1769"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2019-2668"},{"key":"ref12","first-page":"4485","article-title":"Transfer learning from speaker verification to multispeaker text-to-speech synthesis","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Jia","year":"2018"},{"key":"ref13","first-page":"4693","article-title":"Towards end-to-end prosody transfer for expressive speech synthesis with Tacotron","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"80","author":"Skerry-Ryan","year":"2018"},{"key":"ref14","first-page":"5180","article-title":"Style tokens: Unsupervised style modeling, control and transfer in end-to-end speech synthesis","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"80","author":"Wang","year":"2018"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.3390\/app10155325"},{"key":"ref16","first-page":"11 134","article-title":"Learning de-identified representations of prosody from raw audio","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Weston"},{"key":"ref17","first-page":"14 866","article-title":"Generating diverse high-fidelity images with VQ-VAE-2","volume-title":"Proc. Adv. Neural Inf. Process. Syst","author":"Razavi","year":"2019"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-39593-2_1"},{"key":"ref19","first-page":"1876","article-title":"Unsupervised learning of disentangled and interpretable representations from sequential data","volume-title":"Proc. Adv. Neural Inf. Process. Syst","author":"Hsu","year":"2017"},{"article-title":"Towards a definition of disentangled representations","year":"2018","author":"Higgins","key":"ref20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1002\/j.1538-7305.1948.tb01338.x"},{"key":"ref22","first-page":"531","article-title":"Mutual information neural estimation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Belghazi","year":"2018"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/SAFEPROCESS52771.2021.9693550"},{"key":"ref24","first-page":"1779","article-title":"CLUB: A contrastive log-ratio upper bound of mutual information","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Cheng","year":"2020"},{"key":"ref25","first-page":"5171","article-title":"On variational bounds of mutual information","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Poole","year":"2019"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054591"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.5555\/2946645.2946704"},{"article-title":"Deep unsupervised clustering with Gaussian mixture variational autoencoders","year":"2016","author":"Dilokthanakul","key":"ref28"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1147\/rd.41.0066"},{"key":"ref30","article-title":"Estimating total correlation with mutual information bounds","volume-title":"Proc. Conf. Neural Inf. Process. Syst. Workshop: Deep Learn. Inf. Geom.","author":"Cheng","year":"2020"},{"article-title":"CSTR VCTK corpus: English multi-speaker corpus for CSTR voice cloning toolkit","year":"2016","author":"Christophe","key":"ref31"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-465"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/d14-1179"},{"key":"ref34","article-title":"Adam: A method for stochastic optimization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kingma","year":"2014"},{"year":"1996","key":"ref35","article-title":"Methods for subjective determination of transmission quality"},{"year":"2015","key":"ref36","article-title":"Method for the subjective assessment of intermediate quality levels of coding systems"}],"container-title":["IEEE Signal Processing Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/97\/9686799\/09601283.pdf?arnumber=9601283","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T00:59:56Z","timestamp":1705021196000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9601283\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/lsp.2021.3125259","relation":{},"ISSN":["1070-9908","1558-2361"],"issn-type":[{"type":"print","value":"1070-9908"},{"type":"electronic","value":"1558-2361"}],"subject":[],"published":{"date-parts":[[2022]]}}}