{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T19:54:27Z","timestamp":1725738867001},"reference-count":35,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,11]]},"DOI":"10.23919\/apsipa.2018.8659502","type":"proceedings-article","created":{"date-parts":[[2019,3,18]],"date-time":"2019-03-18T23:11:49Z","timestamp":1552950709000},"page":"983-989","source":"Crossref","is-referenced-by-count":2,"title":["Sequential Generation of Singing F0 Contours from Musical Note Sequences Based on WaveNet"],"prefix":"10.23919","author":[{"given":"Yusuke","family":"Wada","sequence":"first","affiliation":[]},{"given":"Ryo","family":"Nishikimi","sequence":"additional","affiliation":[]},{"given":"Eita","family":"Nakamura","sequence":"additional","affiliation":[]},{"given":"Katsutoshi","family":"Itoyama","sequence":"additional","affiliation":[]},{"given":"Kazuyoshi","family":"Yoshii","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","first-page":"359","article-title":"AIST Annotation for RWC Music Database","author":"goto","year":"2006","journal-title":"Proc ISMIR"},{"key":"ref32","first-page":"229","article-title":"RWC Music Database: Popular, Classical, and Jazz Music Databases","author":"goto","year":"2003","journal-title":"Proc ISMIR"},{"key":"ref31","first-page":"1","article-title":"Multi-scale Context Aggregation by Dilated Convolutions","author":"yu","year":"2016","journal-title":"Proc ICLR"},{"doi-asserted-by":"publisher","key":"ref30","DOI":"10.1109\/ICASSP.2017.7952165"},{"key":"ref35","first-page":"1","article-title":"Adam: A Method for Stochastic Optimization","author":"kingma","year":"2015","journal-title":"Proc ICLR"},{"doi-asserted-by":"publisher","key":"ref34","DOI":"10.1109\/ICASSP.2015.7178034"},{"key":"ref10","first-page":"1","author":"van den oord","year":"2016","journal-title":"WaveNet A Generative Model for Raw Audio"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref12","first-page":"1068","article-title":"Neural Audio Synthesis of Musical Notes with WaveNet Autoencoders","author":"engel","year":"2017","journal-title":"Proc ICML"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.3390\/app7121313"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1109\/ICASSP.2015.7178816"},{"key":"ref15","first-page":"1964","article-title":"TTS Synthesis with Bidirectional LSTM Based Recurrent Neural Networks","author":"fan","year":"2014","journal-title":"Proceedings of the Conference of the International Speech Communication Association-Interspeech"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.21437\/Interspeech.2016-872"},{"key":"ref17","first-page":"3375","article-title":"A Multi-layer F0 Model for Singing Voice Synthesis Using A B-spline Representation with Intuitive Controls","author":"ardaillon","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref18","first-page":"2274","article-title":"An HMM-based Singing Voice Synthesis System","author":"saino","year":"2006","journal-title":"Proc INTERSPEECH"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.21437\/Interspeech.2016-1027"},{"doi-asserted-by":"publisher","key":"ref28","DOI":"10.1109\/ICASSP.2017.7952166"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"1138","DOI":"10.21437\/Interspeech.2017-986","article-title":"Statistical Voice Conversion with WaveNet-based Waveform Generation","author":"kobayashi","year":"2017","journal-title":"Proc INTERSPEECH"},{"doi-asserted-by":"publisher","key":"ref27","DOI":"10.1162\/comj.2008.32.3.72"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1109\/ICASSP.2017.7953215"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1109\/ICASSP.2012.6287908"},{"key":"ref29","first-page":"658","article-title":"A Graphical Model for Recognizing Sung Melodies","author":"raphael","year":"2005","journal-title":"Proc ISMIR"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1016\/j.specom.2005.01.010"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1109\/ICASSP.2014.6854295"},{"key":"ref7","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2012-161","article-title":"A Stochastic Model of Singing Voice F0 Contours for Characterizing Expressive Dynamic Components","author":"ohishi","year":"2012","journal-title":"Proc INTERSPEECH"},{"key":"ref2","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-63","article-title":"Voice Conversion from Unaligned Corpora Using Variational Autoencoding Wasserstein Generative Adversarial Networks","author":"hsu","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref9","first-page":"1","article-title":"SampleRNN: An Unconditional End-to-End Neural Audio Generation Model","author":"mehri","year":"2017","journal-title":"Proc ICLR"},{"key":"ref1","first-page":"4009","article-title":"VOCALOID-Commercial Singing Synthesizer Based on Sample Concatenation","author":"kenmochi","year":"2007","journal-title":"Proc INTERSPEECH"},{"key":"ref20","first-page":"15","article-title":"The AT&T NextGen TTS System","author":"beutnagel","year":"1999","journal-title":"Proc Joint ASA\/EAA\/DAEA Meeting"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1016\/j.csl.2006.01.002"},{"key":"ref21","first-page":"395","article-title":"Segment Selection in the L&H Realspeak Laboratory TTS System","author":"coorman","year":"2000","journal-title":"Proc Spoken Language Processing"},{"key":"ref24","article-title":"A Note on the Physiological and Physical Basis for the Phrase and Accent Components in the Voice Fundamental Frequency Contour","author":"fujisaki","year":"1988","journal-title":"Vocal Physiology Voice Production Mechanisms and Functions"},{"doi-asserted-by":"publisher","key":"ref23","DOI":"10.1109\/TASLP.2015.2418576"},{"doi-asserted-by":"publisher","key":"ref26","DOI":"10.1109\/TASLP.2014.2331102"},{"key":"ref25","first-page":"175","article-title":"On the Detection of Melody Notes in Polyphonic Audio","author":"paiva","year":"2005","journal-title":"Proc ISMIR"}],"event":{"name":"2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","start":{"date-parts":[[2018,11,12]]},"location":"Honolulu, HI, USA","end":{"date-parts":[[2018,11,15]]}},"container-title":["2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8648538\/8659446\/08659502.pdf?arnumber=8659502","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,14]],"date-time":"2023-09-14T16:47:43Z","timestamp":1694710063000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8659502\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,11]]},"references-count":35,"URL":"https:\/\/doi.org\/10.23919\/apsipa.2018.8659502","relation":{},"subject":[],"published":{"date-parts":[[2018,11]]}}}