{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,25]],"date-time":"2026-06-25T09:35:25Z","timestamp":1782380125695,"version":"3.54.5"},"reference-count":34,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001352","name":"National University of Singapore","doi-asserted-by":"publisher","award":["R-263-000-C35-133\/731"],"award-info":[{"award-number":["R-263-000-C35-133\/731"]}],"id":[{"id":"10.13039\/501100001352","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Signal Process. Mag."],"published-print":{"date-parts":[[2019,1]]},"DOI":"10.1109\/msp.2018.2875195","type":"journal-article","created":{"date-parts":[[2018,12,25]],"date-time":"2018-12-25T19:47:00Z","timestamp":1545767220000},"page":"95-102","source":"Crossref","is-referenced-by-count":15,"title":["Speech-to-Singing Voice Conversion: The Challenges and Strategies for Improving Vocal Conversion Processes"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7281-1329","authenticated-orcid":false,"given":"Karthika","family":"Vijayan","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9158-9401","authenticated-orcid":false,"given":"Haizhou","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tomoki","family":"Toda","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref33","year":"2014","journal-title":"Singing voice audio dataset"},{"key":"ref32","first-page":"756","article-title":"A singing voice database in Basque for statistical singing synthesis of bertsolaritza","author":"sarasola","year":"0","journal-title":"Proc Language Resources and Evaluation Conf (LREC)"},{"key":"ref31","year":"2008","journal-title":"Singing voice research database"},{"key":"ref30","first-page":"2148","article-title":"I2R speech2singing perfects everyone&#x2019;s singing","author":"dong","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref34","year":"2018","journal-title":"RAVDESS"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ASPAA.2007.4393001"},{"key":"ref11","first-page":"4005","article-title":"Vocal conversion from speaking voice to singing voice using STRAIGHT","author":"saitou","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref12","first-page":"1421","article-title":"Voice conversion: From spoken vowels to singing vowels","author":"nwe","year":"0","journal-title":"Proc 2010 IEEE Int Conf Multimedia and Expo"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPA.2017.8282289"},{"key":"ref14","article-title":"Analysis of speech and singing signals for temporal alignment","author":"vijayan","year":"0","journal-title":"Proc APSIPA Annual Summit and Conf"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2017.01.008"},{"key":"ref16","first-page":"2499","article-title":"A comparative study of spectral transformation techniques for singing voice synthesis","author":"lee","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPA.2013.6694316"},{"key":"ref18","article-title":"NUS-HLT spoken lyrics and singing (SLS) corpus","author":"gao","year":"0","journal-title":"Proc Int Conf Orange Technologies (ICOT)"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6287908"},{"key":"ref28","first-page":"712","article-title":"An investigation of multi-speaker training for wavenet vocoder","author":"hayashi","year":"0","journal-title":"Automatic Speech Recognition and Understanding (ASRU) 2017 IEEE Workshop on"},{"key":"ref4","first-page":"21","article-title":"The level of the &#x2018;singing formant&#x2019; and the source spectra of professional bass singers","volume":"11","author":"sundberg","year":"1970","journal-title":"STL-Quarterly Progress and Status Report"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2018.2818408"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"703","DOI":"10.1007\/978-1-4939-0755-7_16","article-title":"The human voice in speech and singing","author":"lindblom","year":"2014","journal-title":"Springer Handbook of Acoustics"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1121\/1.402929"},{"key":"ref29","first-page":"1978","article-title":"A voice conversion framework with tandem feature sparse representation and speaker-adapted wavenet vocoder","author":"sisman","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1007\/978-1-4613-8202-7_3","article-title":"Dynamic characteristics of voice fundamental frequency in speech and singing","author":"fujisaki","year":"1983","journal-title":"The Production of Speech"},{"key":"ref8","article-title":"Segmentation of speech signals in template-based speech to singing conversion","author":"cen","year":"0","journal-title":"Proc APSIPA Annual Summit and Conf"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2015.2424572"},{"key":"ref2","year":"2008","journal-title":"Realivox"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6288920"},{"key":"ref1","first-page":"4009","article-title":"VOCALOID - commercial singing synthesizer based on sample concatenation","author":"kenmochi","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/s12046-011-0043-3"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2064307"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1587\/transinf.2015EDP7457"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPA.2017.8282110"},{"key":"ref23","first-page":"165","article-title":"Deep neural network-based speaker embeddings for end-to-end speaker verification","author":"snyder","year":"0","journal-title":"Proceedings of the 2016 IEEE Workshop on Spoken Language Technology (SLT)"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1037\/a0018423"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.4337\/9781781003152.00014"}],"container-title":["IEEE Signal Processing Magazine"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/79\/8588402\/08588407.pdf?arnumber=8588407","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,12]],"date-time":"2023-09-12T12:40:49Z","timestamp":1694522449000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8588407\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,1]]},"references-count":34,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/msp.2018.2875195","relation":{},"ISSN":["1053-5888","1558-0792"],"issn-type":[{"value":"1053-5888","type":"print"},{"value":"1558-0792","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,1]]}}}