{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T23:58:36Z","timestamp":1774742316377,"version":"3.50.1"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,12,11]],"date-time":"2022-12-11T00:00:00Z","timestamp":1670716800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,11]],"date-time":"2022-12-11T00:00:00Z","timestamp":1670716800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,12,11]]},"DOI":"10.1109\/iscslp57327.2022.10038075","type":"proceedings-article","created":{"date-parts":[[2023,2,8]],"date-time":"2023-02-08T13:53:24Z","timestamp":1675864404000},"page":"96-100","source":"Crossref","is-referenced-by-count":4,"title":["Mix-Guided VC: Any-to-many Voice Conversion by Combining ASR and TTS Bottleneck Features"],"prefix":"10.1109","author":[{"given":"Zeqing","family":"Zhao","sequence":"first","affiliation":[{"name":"AI Lab, Lenovo Research,Beijing,China"}]},{"given":"Sifan","family":"Ma","sequence":"additional","affiliation":[{"name":"AI Lab, Lenovo Research,Beijing,China"}]},{"given":"Yan","family":"Jia","sequence":"additional","affiliation":[{"name":"AI Lab, Lenovo Research,Beijing,China"}]},{"given":"Jingyu","family":"Hou","sequence":"additional","affiliation":[{"name":"AI Lab, Lenovo Research,Beijing,China"}]},{"given":"Lin","family":"Yang","sequence":"additional","affiliation":[{"name":"AI Lab, Lenovo Research,Beijing,China"}]},{"given":"Junjie","family":"Wang","sequence":"additional","affiliation":[{"name":"AI Lab, Lenovo Research,Beijing,China"}]}],"member":"263","reference":[{"key":"ref13","first-page":"17 022","article-title":"Hifi-gan: Generative adversarial networks for efficient and high fidelity speech synthesis","volume":"33","author":"kong","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746139"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747625"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414788"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2663"},{"key":"ref1","first-page":"5210","article-title":"Autovc: Zero-shot voice style transfer with only autoencoder loss","author":"qian","year":"2019","journal-title":"Int Conference on Machine Learning"},{"key":"ref17","article-title":"Fastspeech: Fast, robust and controllable text to speech","volume":"32","author":"ren","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref16","first-page":"864","article-title":"Sptts: Parallel speech synthesis without extra aligner model","author":"zhao","year":"2021","journal-title":"2021 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)"},{"key":"ref19","article-title":"Aishell-3: A multi-speaker mandarin tts corpus and the baselines","author":"shi","year":"2020","journal-title":"arXiv preprint arXiv 2010 11567"},{"key":"ref18","article-title":"Ecapatdnn: Emphasized channel attention, propagation and aggregation in tdnn based speaker verification","author":"desplanques","year":"2020","journal-title":"arXiv preprint arXiv 2005 07091"},{"key":"ref24","article-title":"Decoupled weight decay regularization","author":"loshchilov","year":"2017","journal-title":"arXiv preprint arXiv 1711 05847"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383498"},{"key":"ref25","article-title":"Visualizing data using t-sne","volume":"9","author":"van der maaten","year":"2008","journal-title":"Journal of Machine Learning Research"},{"key":"ref20","article-title":"Aishell-2: Transforming mandarin asr research into industrial scale","author":"du","year":"2018","journal-title":"arXiv preprint arXiv 1808 10393"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1929"},{"key":"ref21","article-title":"Voxceleb: a large-scale speaker identification dataset","author":"nagrani","year":"2017","journal-title":"arXiv preprint arXiv 1706 08612"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2892235"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2016.7552917"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ISCSLP49672.2021.9362095"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-319"},{"key":"ref3","article-title":"Cycleganvc3: Examining and improving cyclegan-vcs for mel-spectrogram conversion","author":"kaneko","year":"2020","journal-title":"arXiv preprint arXiv 2010 11419"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3066047"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747140"}],"event":{"name":"2022 13th International Symposium on Chinese Spoken Language Processing (ISCSLP)","location":"Singapore, Singapore","start":{"date-parts":[[2022,12,11]]},"end":{"date-parts":[[2022,12,14]]}},"container-title":["2022 13th International Symposium on Chinese Spoken Language Processing (ISCSLP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10037756\/10037573\/10038075.pdf?arnumber=10038075","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,6]],"date-time":"2023-03-06T13:37:41Z","timestamp":1678109861000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10038075\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,11]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/iscslp57327.2022.10038075","relation":{},"subject":[],"published":{"date-parts":[[2022,12,11]]}}}