{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T22:37:57Z","timestamp":1771627077813,"version":"3.50.1"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,10,22]],"date-time":"2023-10-22T00:00:00Z","timestamp":1697932800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,10,22]],"date-time":"2023-10-22T00:00:00Z","timestamp":1697932800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,10,22]]},"DOI":"10.1109\/waspaa58266.2023.10248127","type":"proceedings-article","created":{"date-parts":[[2023,9,15]],"date-time":"2023-09-15T17:31:37Z","timestamp":1694799097000},"page":"1-5","source":"Crossref","is-referenced-by-count":1,"title":["Diff-Pitcher: Diffusion-Based Singing Voice Pitch Correction"],"prefix":"10.1109","author":[{"given":"Jiarui","family":"Hai","sequence":"first","affiliation":[{"name":"Johns Hopkins University,Laboratory for Computational Auditory Perception,Baltimore,USA"}]},{"given":"Mounya","family":"Elhilali","sequence":"additional","affiliation":[{"name":"Johns Hopkins University,Laboratory for Computational Auditory Perception,Baltimore,USA"}]}],"member":"263","reference":[{"key":"ref13","article-title":"Diffusion-based voice conversion with fast maximum likelihood sampling scheme","author":"popov","year":"2021","journal-title":"International Conference on Learning Representations"},{"key":"ref12","article-title":"Wavegrad: Estimating gradients for waveform generation","author":"chen","year":"2020","journal-title":"International Conference on Learning Representations"},{"key":"ref15","article-title":"Audioldm: Text-to-audio generation with latent diffusion models","author":"liu","year":"2023"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i10.21350"},{"key":"ref11","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"ho","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095298"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461280"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054308"},{"key":"ref17","article-title":"Differentiable world synthesizer-based neural vocoder with application to end-to-end audio style transfer","author":"nercessian","year":"2023","journal-title":"Audio Engineering Society Convention 154"},{"key":"ref16","article-title":"Attention is all you need","volume":"30","author":"vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref19","article-title":"Conditional positional encodings for vision transformers","author":"chu","year":"2022","journal-title":"In The Eleventh International Conference on Learning Representations"},{"key":"ref18","article-title":"Bigvgan: A universal neural vocoder with large-scale training","author":"lee","year":"2022","journal-title":"In The Eleventh International Conference on Learning Representations"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-48"},{"key":"ref23","article-title":"Children&#x2019;s song dataset for singing voice research","author":"choi","year":"2020","journal-title":"International Society for Music Information Retrieval Conference (ISMIR)"},{"key":"ref26","first-page":"28 492","article-title":"Robust speech recognition via large-scale weak supervision","author":"radford","year":"2023","journal-title":"International Conference on Machine Learning"},{"key":"ref25","article-title":"Fast and reliable f0 estimation method based on the period extraction of vocal fold vibration of singing voice and speech","author":"morise","year":"2009","journal-title":"Audio Engineering Society Conference 35th International Conference Audio for Games"},{"key":"ref20","article-title":"Cstr vctk corpus: English multi-speaker corpus for cstr voice cloning toolkit","author":"veaux","year":"2017","journal-title":"Centre for Speech Technology Research University of Edinburgh"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.549"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475437"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682804"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1587\/transinf.2015EDP7457"},{"key":"ref9","article-title":"Neural pitch-shifting and time-stretching with controllable lpcnet","author":"morrison","year":"2021"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-939"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/2897513"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1121\/1.2020474"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1986.1168657"}],"event":{"name":"2023 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","location":"New Paltz, NY, USA","start":{"date-parts":[[2023,10,22]]},"end":{"date-parts":[[2023,10,25]]}},"container-title":["2023 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10248019\/10248047\/10248127.pdf?arnumber=10248127","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,2]],"date-time":"2023-10-02T17:41:08Z","timestamp":1696268468000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10248127\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,22]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/waspaa58266.2023.10248127","relation":{},"subject":[],"published":{"date-parts":[[2023,10,22]]}}}