{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,27]],"date-time":"2025-07-27T07:38:04Z","timestamp":1753601884219,"version":"3.28.0"},"reference-count":29,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,12,16]],"date-time":"2023-12-16T00:00:00Z","timestamp":1702684800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,16]],"date-time":"2023-12-16T00:00:00Z","timestamp":1702684800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100017413","name":"Innovation Fund","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100017413","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,12,16]]},"DOI":"10.1109\/asru57964.2023.10389745","type":"proceedings-article","created":{"date-parts":[[2024,1,19]],"date-time":"2024-01-19T13:38:40Z","timestamp":1705671520000},"page":"1-8","source":"Crossref","is-referenced-by-count":2,"title":["Permod: Perceptually Grounded Voice Modification With Latent Diffusion Models"],"prefix":"10.1109","author":[{"given":"Robin","family":"Netzorg","sequence":"first","affiliation":[{"name":"University of California,Berkeley"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ajil","family":"Jalal","sequence":"additional","affiliation":[{"name":"University of California,Berkeley"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Luna","family":"McNulty","sequence":"additional","affiliation":[{"name":"Brown University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gopala Krishna","family":"Anumanchipalli","sequence":"additional","affiliation":[{"name":"University of California,Berkeley"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"volume-title":"Prosodybert: Self-supervised prosody representation for style-controllable tts","year":"2022","author":"Hu","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2022.3175578"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvoice.2020.10.001"},{"key":"ref4","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref5","article-title":"Generative modeling by estimating gradients of the data distribution","volume":"32","author":"Song","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref6","article-title":"Score-based generative modeling through stochastic differential equations","author":"Song","year":"2020","journal-title":"arXiv preprint arXiv:2011.13456"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"volume-title":"Audioldm: Text-to-audio generation with latent diffusion models","year":"2023","author":"Liu","key":"ref8"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-023-06415-8"},{"key":"ref10","first-page":"17981","article-title":"Structured denoising diffusion models in discrete state-spaces","volume":"34","author":"Austin","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref11","first-page":"14938","article-title":"Robust compressed sensing mri with deep generative priors","volume":"34","author":"Jalal","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref12","article-title":"Solving inverse problems in medical imaging with score-based generative models","author":"Song","year":"2021","journal-title":"arXiv preprint arXiv:2111.08005"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3626235"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1044\/jshr.3601.21"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/11520153_4"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472924"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1044\/1058-0360(2008\/08-0017)"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1080\/26895269.2020.1798838"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvoice.2006.05.005"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747272"},{"volume-title":"Towards an interpretable representation of speaker identity via perceptual voice qualities","year":"2023","author":"Netzorg","key":"ref21"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2013.00292"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/j.yhbeh.2016.03.001"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/S0892-1997(05)80339-X"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1121\/1.4944754"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1044\/2023_JSLHR-22-00694"},{"key":"ref27","article-title":"Audit: Audio editing by following instructions with latent diffusion models","author":"Wang","year":"2023","journal-title":"arXiv preprint arXiv:2304.00830"},{"volume-title":"Hifigan: Generative adversarial networks for efficient and high fidelity speech synthesis","year":"2020","author":"Kong","key":"ref28"},{"volume-title":"Lora: Low-rank adaptation of large language models","year":"2021","author":"Hu","key":"ref29"}],"event":{"name":"2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","start":{"date-parts":[[2023,12,16]]},"location":"Taipei, Taiwan","end":{"date-parts":[[2023,12,20]]}},"container-title":["2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10388490\/10389614\/10389745.pdf?arnumber=10389745","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,23]],"date-time":"2024-01-23T11:21:09Z","timestamp":1706008869000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10389745\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,16]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/asru57964.2023.10389745","relation":{},"subject":[],"published":{"date-parts":[[2023,12,16]]}}}