{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T19:35:50Z","timestamp":1730230550908,"version":"3.28.0"},"reference-count":28,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,6,6]],"date-time":"2021-06-06T00:00:00Z","timestamp":1622937600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,6,6]],"date-time":"2021-06-06T00:00:00Z","timestamp":1622937600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,6,6]],"date-time":"2021-06-06T00:00:00Z","timestamp":1622937600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,6,6]]},"DOI":"10.1109\/icassp39728.2021.9413400","type":"proceedings-article","created":{"date-parts":[[2021,5,13]],"date-time":"2021-05-13T15:53:45Z","timestamp":1620921225000},"page":"7083-7087","source":"Crossref","is-referenced-by-count":2,"title":["Semi-Supervised Learning for Singing Synthesis Timbre"],"prefix":"10.1109","author":[{"given":"Jordi","family":"Bonada","sequence":"first","affiliation":[]},{"given":"Merlijn","family":"Blaauw","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054582"},{"key":"ref11","first-page":"6306","article-title":"Neural discrete representation learning","author":"van den oord","year":"2017","journal-title":"Adv Neural Inf Process Syst (NIPS) 19"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054734"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1862"},{"key":"ref14","article-title":"Unsupervised speech decomposition via triple information bottleneck","volume":"abs 2004 11284","author":"qian","year":"2020","journal-title":"CoRR"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1761"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.23919\/EUSIPCO.2018.8553236"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682897"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639535"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2236"},{"key":"ref28","first-page":"1118","article-title":"Speaker-dependent WaveNet vocoder","author":"tamamori","year":"2017","journal-title":"Proc of the 18th Annual Conf of the Intl Speech Commun Assoc (Interspeech)"},{"key":"ref4","article-title":"Singing synthesis: With a little help from my attention","volume":"abs 1912 5881","author":"angelini","year":"2019","journal-title":"CoRR"},{"key":"ref27","first-page":"6000","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Adv Neural Inf Process Syst (NIPS) 19"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053944"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1109"},{"key":"ref5","article-title":"ByteSing: A chinese singing voice synthesis system using duration allocated encoder-decoder acoustic models and WaveRNN vocoders","volume":"abs 2004 11012","author":"gu","year":"2020","journal-title":"CoRR"},{"key":"ref8","article-title":"HiFiSinger: Towards high-fidelity neural singing voice synthesis","volume":"abs 2009 1776","author":"chen","year":"2020","journal-title":"CoRR"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1410"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1722"},{"key":"ref9","first-page":"5210","article-title":"AutoVC: Zero-shot voice style transfer with only autoencoder loss","author":"qian","year":"2019","journal-title":"Proc Int Conf Machine Learn (ICML)"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.3390\/app7121313"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2016.7552917"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403249"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682656"},{"key":"ref24","first-page":"1143","article-title":"Google&#x2019;s next-generation real-time unit-selection synthesizer using sequence-to-sequence LSTM-based autoencoders","author":"wan","year":"2017","journal-title":"Proc of the 18th Annual Conf of the Intl Speech Commun Assoc (Interspeech)"},{"key":"ref23","article-title":"WaveNet: A generative model for raw audio","volume":"abs 1609 3499","author":"van den oord","year":"2016","journal-title":"CoRR"},{"key":"ref26","article-title":"Discrete autoencoders for sequence models","volume":"abs 1801 9797","author":"kaiser","year":"2018","journal-title":"CoRR"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijar.2008.11.006"}],"event":{"name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2021,6,6]]},"location":"Toronto, ON, Canada","end":{"date-parts":[[2021,6,11]]}},"container-title":["ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9413349\/9413350\/09413400.pdf?arnumber=9413400","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T11:41:07Z","timestamp":1652182867000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9413400\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,6]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/icassp39728.2021.9413400","relation":{},"subject":[],"published":{"date-parts":[[2021,6,6]]}}}