{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T19:56:11Z","timestamp":1776887771730,"version":"3.51.2"},"reference-count":28,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,4]],"date-time":"2023-06-04T00:00:00Z","timestamp":1685836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,4]],"date-time":"2023-06-04T00:00:00Z","timestamp":1685836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6,4]]},"DOI":"10.1109\/icassp49357.2023.10095960","type":"proceedings-article","created":{"date-parts":[[2023,5,5]],"date-time":"2023-05-05T17:28:30Z","timestamp":1683307710000},"page":"1-5","source":"Crossref","is-referenced-by-count":7,"title":["Preserving Background Sound in Noise-Robust Voice Conversion Via Multi-Task Learning"],"prefix":"10.1109","author":[{"given":"Jixun","family":"Yao","sequence":"first","affiliation":[{"name":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU) School of Computer Science,Xi&#x2019;an,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yi","family":"Lei","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU) School of Computer Science,Xi&#x2019;an,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qing","family":"Wang","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU) School of Computer Science,Xi&#x2019;an,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pengcheng","family":"Guo","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU) School of Computer Science,Xi&#x2019;an,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ziqian","family":"Ning","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU) School of Computer Science,Xi&#x2019;an,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lei","family":"Xie","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU) School of Computer Science,Xi&#x2019;an,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hai","family":"Li","sequence":"additional","affiliation":[{"name":"iQIYI Inc,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junhui","family":"Liu","sequence":"additional","affiliation":[{"name":"iQIYI Inc,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Danming","family":"Xie","sequence":"additional","affiliation":[{"name":"iQIYI Inc,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683561"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-570"},{"key":"ref15","first-page":"814","article-title":"Noisy-to-noisy voice conversion framework with denoising model","author":"xie","year":"2021","journal-title":"Proc APSIPA"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2020.3025410"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054254"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2022.01.003"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2017.01.008"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3038524"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/SPCOM50965.2020.9179583"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747894"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746962"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2537"},{"key":"ref24","author":"rafii","year":"2017","journal-title":"The MUSDB18 corpus for music separation"},{"key":"ref23","article-title":"HiFi-GAN: Generative adversarial networks for efficient and high fidelity speech synthesis","author":"kong","year":"2020","journal-title":"Proc NeurIPS"},{"key":"ref26","first-page":"626","article-title":"SDR&#x2013;half-baked or well done?","author":"roux","year":"2019","journal-title":"Proc ICASSP"},{"key":"ref25","article-title":"Superseded-cstr vctk corpus: English multi-speaker corpus for cstr voice cloning toolkit","author":"veaux","year":"2016"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1983"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1193"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9415105"},{"key":"ref28","year":"2001","journal-title":"Method for objective measurements of perceived audio quality"},{"key":"ref27","first-page":"749","article-title":"Perceptual evaluation of speech quality (PESQ)-a new method for speech quality assessment of telephone networks and codecs","author":"rix","year":"2001","journal-title":"Proc ICASSP"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747743"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-283"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU51503.2021.9688277"},{"key":"ref4","first-page":"7554","article-title":"In-domain and out-of-domain data augmentation to improve children&#x2019;s speaker verification system in limited data scenario","author":"shahnawazuddin","year":"2020","journal-title":"Proc ICASSP"},{"key":"ref3","first-page":"4382","article-title":"Voice conversion based data augmentation to improve children&#x2019;s speech recognition in limited data scenario","author":"shahnawazuddin","year":"2020","journal-title":"Proc INTERSPEECH"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414257"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747020"}],"event":{"name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Rhodes Island, Greece","start":{"date-parts":[[2023,6,4]]},"end":{"date-parts":[[2023,6,10]]}},"container-title":["ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10094559\/10094560\/10095960.pdf?arnumber=10095960","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,20]],"date-time":"2023-11-20T19:07:59Z","timestamp":1700507279000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10095960\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,4]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/icassp49357.2023.10095960","relation":{},"subject":[],"published":{"date-parts":[[2023,6,4]]}}}