{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T10:23:25Z","timestamp":1771064605247,"version":"3.50.1"},"reference-count":82,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"8","license":[{"start":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T00:00:00Z","timestamp":1690848000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T00:00:00Z","timestamp":1690848000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T00:00:00Z","timestamp":1690848000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62171250"],"award-info":[{"award-number":["62171250"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2023,8]]},"DOI":"10.1109\/tpami.2023.3257839","type":"journal-article","created":{"date-parts":[[2023,3,16]],"date-time":"2023-03-16T17:45:29Z","timestamp":1678988729000},"page":"10331-10345","source":"Crossref","is-referenced-by-count":6,"title":["Random Cycle Loss and Its Application to Voice Conversion"],"prefix":"10.1109","volume":"45","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-9019-066X","authenticated-orcid":false,"given":"Haoran","family":"Sun","sequence":"first","affiliation":[{"name":"BNRist, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1286-0644","authenticated-orcid":false,"given":"Dong","family":"Wang","sequence":"additional","affiliation":[{"name":"BNRist, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5546-8060","authenticated-orcid":false,"given":"Lantian","family":"Li","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China"}]},{"given":"Chen","family":"Chen","sequence":"additional","affiliation":[{"name":"BNRist, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0249-4767","authenticated-orcid":false,"given":"Thomas F.","family":"Zheng","sequence":"additional","affiliation":[{"name":"BNRist, Tsinghua University, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1126\/science.173.3994.351"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.4324\/9780429040436"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1080\/15475441.2005.9684216"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1002\/9781119184096.ch6"},{"key":"ref5","first-page":"1876","article-title":"Unsupervised learning of disentangled and interpretable representations from sequential data","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Hsu"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/SSW.2019-49"},{"key":"ref7","first-page":"5180","article-title":"Style tokens: Unsupervised style modeling, control and transfer in end-to-end speech synthesis","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3012893"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462169"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/89.661472"},{"key":"ref11","first-page":"4114","article-title":"Challenging common assumptions in the unsupervised learning of disentangled representations","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Locatello"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2064307"},{"key":"ref13","article-title":"Hierarchical generative modeling for controllable speech synthesis","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hsu"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-283"},{"key":"ref15","article-title":"The information bottleneck method","author":"Tishby","year":"2000"},{"key":"ref16","article-title":"Improving zero-shot voice style transfer via disentangled representation learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Yuan"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPA.2016.7820786"},{"key":"ref18","first-page":"5210","article-title":"AutoVC: Zero-shot voice style transfer with only autoencoder loss","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Qian"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_49"},{"key":"ref20","volume-title":"Reductionism: Analysis and the Fullness of Reality","author":"Jones","year":"2000"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1017\/9781107295490.005"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1126\/science.177.4047.393"},{"key":"ref23","first-page":"7836","article-title":"Unsupervised speech decomposition via triple information bottleneck","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Qian"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Odyssey.2022-38"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1002\/wics.101"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(00)00026-5"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.50"},{"key":"ref28","article-title":"Towards a definition of disentangled representations","author":"Higgins","year":"2018"},{"key":"ref29","first-page":"2180","article-title":"InfoGAN: Interpretable representation learning by information maximizing generative adversarial nets","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Chen"},{"key":"ref30","article-title":"Beta-VAE: Learning basic visual concepts with a constrained variational framework","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Higgins"},{"key":"ref31","first-page":"2649","article-title":"Disentangling by factorising","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kim"},{"key":"ref32","first-page":"2615","article-title":"Isolating sources of disentanglement in variational autoencoders","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Chen"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00825"},{"key":"ref34","first-page":"859","article-title":"Nonlinear ICA using auxiliary variables and generalized contrastive learning","volume-title":"Proc. 22nd Int. Conf. Artif. Intell. Statist.","author":"Hyvarinen"},{"key":"ref35","first-page":"939","article-title":"Hidden Markov nonlinear ICA: Unsupervised learning from nonstationary time series","volume-title":"Proc. Conf. Uncertainty Artif. Intell.","author":"H\u00e4lv\u00e4"},{"key":"ref36","article-title":"Towards nonlinear disentanglement in natural data with temporal sparse coding","volume":"1050","author":"Klindt","year":"2020","journal-title":"Stat"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3055560"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.907344"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2041699"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2353991"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1587\/transinf.E97.D.1403"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2047683"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178896"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2014-447"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1587\/transfun.E96.A.1946"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2333242"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.23919\/eusipco.2018.8553236"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.23919\/EUSIPCO.2018.8553236"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682897"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639535"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-2086"},{"key":"ref52","first-page":"6309","article-title":"Neural discrete representation learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Van Den Oord"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1443"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2663"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-116"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2016.7552917"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1356"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9415079"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.769"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.21437\/Odyssey.2020-28"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2917232"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-63"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1830"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683204"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1990"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746369"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/icassp43922.2022.9747763"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053726"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2307"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414136"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-92659-5_24"},{"key":"ref73","first-page":"8650","article-title":"Global rhythm style transfer without text transcriptions","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Qian","year":"2021"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1016\/0010-0277(88)90031-5"},{"key":"ref75","volume-title":"Humus Chemistry: Genesis, Composition, Reactions","author":"Stevenson","year":"1994"},{"key":"ref76","article-title":"Adam: A method for stochastic optimization","volume-title":"Proc. 3rd Int. Conf. Learn. Representations","author":"Kingma"},{"key":"ref77","first-page":"1779","article-title":"CLUB: A contrastive log-ratio upper bound of mutual information","author":"Cheng","year":"2020","journal-title":"Int. Conf. on Mach. Learn."},{"key":"ref78","article-title":"CSTR VCTK corpus: English multi-speaker corpus for CSTR voice cloning toolkit","author":"Veaux","year":"2017","journal-title":"Univ. Edinburgh. CSTR"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854363"},{"key":"ref80","first-page":"17 022","article-title":"HiFi-GAN: Generative adversarial networks for efficient and high fidelity speech synthesis","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kong"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2003"},{"key":"ref82","first-page":"125","article-title":"WaveNet: A generative model for raw audio","volume-title":"Proc. 9th ISCA Speech Synth. Workshop","author":"Oord"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/10169863\/10073591.pdf?arnumber=10073591","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T09:29:29Z","timestamp":1726046969000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10073591\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8]]},"references-count":82,"journal-issue":{"issue":"8"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2023.3257839","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,8]]}}}