{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,21]],"date-time":"2025-05-21T04:45:31Z","timestamp":1747802731937,"version":"3.28.0"},"reference-count":28,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.1109\/ijcnn48605.2020.9207653","type":"proceedings-article","created":{"date-parts":[[2020,9,29]],"date-time":"2020-09-29T20:40:33Z","timestamp":1601412033000},"page":"1-8","source":"Crossref","is-referenced-by-count":5,"title":["Vocoder-free End-to-End Voice Conversion with Transformer Network"],"prefix":"10.1109","author":[{"given":"June-Woo","family":"Kim","sequence":"first","affiliation":[]},{"given":"Ho-Young","family":"Jung","sequence":"additional","affiliation":[]},{"given":"Minho","family":"Lee","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"article-title":"Parallel wavenet: Fast high-fidelity speech synthesis","year":"2017","author":"oord","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683143"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1789"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1023\/A:1007379606734","article-title":"Multitask learning","volume":"28","author":"caruana","year":"1997","journal-title":"Machine Learning"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1951"},{"article-title":"Efficient neural audio synthesis","year":"2018","author":"kalchbrenner","key":"ref15"},{"article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","year":"2018","author":"devlin","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00756"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016706"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"article-title":"Deep complex networks","year":"2017","author":"trabelsi","key":"ref28"},{"key":"ref4","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref27","first-page":"1929","article-title":"Dropout: a simple way to prevent neural networks from overfitting","volume":"15","author":"srivastava","year":"2014","journal-title":"The Journal of Machine Learning Research"},{"article-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","year":"2014","author":"chung","key":"ref3"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1181"},{"key":"ref5","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1984.1164317"},{"key":"ref7","first-page":"1","article-title":"Mel frequency cepstral coefficients for music modeling","volume":"270","author":"logan","year":"2000","journal-title":"ISMIR"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/78.650093"},{"article-title":"Wavenet: A generative model for raw audio","year":"2016","author":"oord","key":"ref9"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"article-title":"Voice transformer network: Sequence-to-sequence voice conversion using transformer with text-to-speech pretraining","year":"2019","author":"huang","key":"ref20"},{"key":"ref22","first-page":"2177","article-title":"Neural word embedding as implicit matrix factorization","author":"levy","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1452"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref23","first-page":"807","article-title":"Rectified linear units improve restricted boltzmann machines","author":"nair","year":"2010","journal-title":"Proceedings of the 27th International Conference on Machine Learning (ICML-10)"},{"article-title":"Adam: A method for stochastic optimization","year":"2014","author":"kingma","key":"ref26"},{"key":"ref25","article-title":"Tidigits ldc93s10","author":"leonard","year":"1993","journal-title":"Linguistic Data Consortium"}],"event":{"name":"2020 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2020,7,19]]},"location":"Glasgow, United Kingdom","end":{"date-parts":[[2020,7,24]]}},"container-title":["2020 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9200848\/9206590\/09207653.pdf?arnumber=9207653","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,28]],"date-time":"2022-06-28T17:57:34Z","timestamp":1656439054000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9207653\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/ijcnn48605.2020.9207653","relation":{},"subject":[],"published":{"date-parts":[[2020,7]]}}}