{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T18:01:59Z","timestamp":1743012119432,"version":"3.28.0"},"reference-count":42,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1109\/slt.2018.8639507","type":"proceedings-article","created":{"date-parts":[[2019,2,14]],"date-time":"2019-02-14T23:36:34Z","timestamp":1550187394000},"page":"282-289","source":"Crossref","is-referenced-by-count":16,"title":["Adaptive Wavenet Vocoder for Residual Compensation in GAN-Based Voice Conversion"],"prefix":"10.1109","author":[{"given":"Berrak","family":"Sisman","sequence":"first","affiliation":[]},{"given":"Mingyang","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Sakriani","family":"Sakti","sequence":"additional","affiliation":[]},{"given":"Haizhou","family":"Li","sequence":"additional","affiliation":[]},{"given":"Satoshi","family":"Nakamura","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref39","DOI":"10.21437\/Interspeech.2018-1131"},{"doi-asserted-by":"publisher","key":"ref38","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref33","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-986","article-title":"Statistical voice conversion with wavenet-based waveform generation","author":"kobayashi","year":"2017","journal-title":"InterSpeech"},{"doi-asserted-by":"publisher","key":"ref32","DOI":"10.1109\/SLT.2018.8639535"},{"key":"ref31","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-553","article-title":"Conditional generative adversarial nets classifier for spoken language identification","author":"shen","year":"2017","journal-title":"InterSpeech"},{"doi-asserted-by":"publisher","key":"ref30","DOI":"10.1109\/ICCV.2017.629"},{"key":"ref37","article-title":"Digital speech processing, synthesis, and recognition(revised and expanded)","author":"furui","year":"2000","journal-title":"Digital Speech Processing Synthesis and Recognition"},{"key":"ref36","article-title":"Learning Latent Representations for Speech Generation and Transformation","author":"hsu","year":"2017","journal-title":"ArXiv"},{"key":"ref35","article-title":"Unsupervised Learning of Disentangled and Interpretable Representations from Sequential Data","author":"hsu","year":"2017","journal-title":"ArXiv"},{"key":"ref34","doi-asserted-by":"crossref","first-page":"1068","DOI":"10.21437\/Interspeech.2008-330","article-title":"Probabilistic feature mapping based on trajectory HMMs","author":"zen","year":"2008","journal-title":"InterSpeech"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1109\/ICASSP.2014.6855137"},{"doi-asserted-by":"publisher","key":"ref40","DOI":"10.1109\/PACRIM.1993.407206"},{"key":"ref11","article-title":"Sparse representation of phonetic features for voice conversion with and without parallel data","author":"sisman","year":"2017","journal-title":"IEEE ASRU"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1109\/TASLP.2014.2353991"},{"key":"ref13","first-page":"2278","article-title":"High-order sequence modeling using speaker-dependent recurrent temporal restricted Boltzmann machines for voice conversion","author":"nakashika","year":"2014","journal-title":"Proceedings of the Conference of the International Speech Communication Association-Interspeech"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1109\/ICASSP.2015.7178896"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1109\/ICME.2016.7552917"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1109\/APSIPA.2016.7820786"},{"key":"ref17","article-title":"Voice Conversion from Unaligned Corpora using Variational Autoencoding Wasserstein Generative Adversarial Networks","author":"hsu","year":"2017","journal-title":"ArXiv"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1109\/TASLP.2016.2522655"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1109\/ICASSP.2015.7178894"},{"key":"ref28","article-title":"Wavenet: A generative model for raw audio","author":"van den oord","year":"2016","journal-title":"arXiv preprint arXiv 1609 09861"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1109\/TASL.2010.2041699"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"712","DOI":"10.1109\/ASRU.2017.8269007","article-title":"An investigation of multi-speaker training for wavenet vocoder","author":"hayashi","year":"2017","journal-title":"IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1109\/TASL.2007.907344"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1109\/SLT.2012.6424242"},{"key":"ref29","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-314","article-title":"Speaker-dependent wavenet vocoder","author":"tamamori","year":"2017","journal-title":"InterSpeech"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1109\/TASL.2011.2165944"},{"key":"ref8","first-page":"27","article-title":"Parallel dictionary learning for multimodal voice conversion using matrix factorization","author":"aihara","year":"2016","journal-title":"InterSpeech"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"1506","DOI":"10.1109\/TASLP.2014.2333242","article-title":"Exemplar-based sparse representation with residual compensation for voice conversion","volume":"22","author":"wu","year":"2014","journal-title":"IEEE\/ACM Transactions on Audio Speech and Language Processing"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1109\/ISCAS.1991.176405"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1109\/ICASSP.2016.7472761"},{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1109\/ICASSP.1988.196671"},{"key":"ref20","article-title":"Analysis of spectral enhancement using global variance in HMM-based speech synthesis","author":"nose","year":"2014","journal-title":"InterSpeech"},{"key":"ref22","article-title":"Parallel-data-free voice conversion using cycle-consistent adversarial networks","author":"kaneko","year":"2017","journal-title":"ArXiv"},{"key":"ref21","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-970","article-title":"Sequence-to-sequence voice conversion with similarity metric learned using generative adversarial networks","author":"kaneko","year":"2017","journal-title":"InterSpeech"},{"doi-asserted-by":"publisher","key":"ref42","DOI":"10.1109\/APSIPA.2017.8282288"},{"key":"ref24","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1620","article-title":"Conditional generative adversarial networks for speech enhancement and noise-robust speaker verification","author":"michelsanti","year":"2017","journal-title":"InterSpeech"},{"doi-asserted-by":"publisher","key":"ref41","DOI":"10.4337\/9781781003152.00014"},{"key":"ref23","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1428","article-title":"Segan: Speech enhancement generative adversarial network","author":"pascual","year":"2017","journal-title":"InterSpeech"},{"key":"ref26","article-title":"Statistical parametric speech synthesis incorporating generative adversarial networks","author":"saito","year":"2017","journal-title":"IEEE\/ACM Transactions on Audio Speech and Language Processing"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1109\/ICASSP.2017.7953090"}],"event":{"name":"2018 IEEE Spoken Language Technology Workshop (SLT)","start":{"date-parts":[[2018,12,18]]},"location":"Athens, Greece","end":{"date-parts":[[2018,12,21]]}},"container-title":["2018 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8632666\/8639030\/08639507.pdf?arnumber=8639507","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,27]],"date-time":"2022-01-27T08:06:03Z","timestamp":1643270763000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8639507\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12]]},"references-count":42,"URL":"https:\/\/doi.org\/10.1109\/slt.2018.8639507","relation":{},"subject":[],"published":{"date-parts":[[2018,12]]}}}