{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T02:51:06Z","timestamp":1764557466103,"version":"3.28.0"},"reference-count":29,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,11]]},"DOI":"10.23919\/apsipa.2018.8659465","type":"proceedings-article","created":{"date-parts":[[2019,3,18]],"date-time":"2019-03-18T23:11:49Z","timestamp":1552950709000},"page":"659-664","source":"Crossref","is-referenced-by-count":1,"title":["Prosody-aware subword embedding considering Japanese intonation systems and its application to DNN-based multi-dialect speech synthesis"],"prefix":"10.23919","author":[{"given":"Takanori","family":"Akiyama","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shinnosuke","family":"Takamichi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hiroshi","family":"Saruwatari","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"crossref","first-page":"1045","DOI":"10.21437\/Interspeech.2010-343","article-title":"Reccurent neural network based language model","author":"mikolov","year":"2010","journal-title":"Proc INTERSPEECH"},{"key":"ref11","first-page":"4724","article-title":"Continuous f0 in the source-excitation generation for HMM-based TTS: Do we need voiced\/unvoiced classification?","author":"latorre","year":"2011","journal-title":"Proc ICASSP"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.18653\/v1\/P16-1162"},{"year":"0","journal-title":"Sentence","key":"ref13"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1109\/TASLP.2016.2522655"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.21437\/Interspeech.2016-589"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1109\/TASL.2010.2064307"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1109\/ICASSP.2014.6854363"},{"key":"ref18","first-page":"879","article-title":"A study of speaker adaptation for DNN-based speech synthesis","author":"wu","year":"2015","journal-title":"Proc Inter-speech"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.21437\/Interspeech.2017-1038"},{"key":"ref28","first-page":"2266","article-title":"Maximum likelihood voice conversion based on GMM with STRAIGHT mixed excitation","author":"ohtani","year":"2006","journal-title":"Proc INTERSPEECH"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1109\/ICASSP.2017.7953088"},{"key":"ref27","first-page":"1","article-title":"Aperiodicity extraction and control using mixed mode excitation and group delay manipulation for a high quality speech analysis, modification and synthesis system STRAIGHT","author":"kawahara","year":"2001","journal-title":"MAVEBA 2001"},{"key":"ref3","volume":"abs 1609 3499","author":"oord","year":"2016","journal-title":"WaveNet A Generative Model for Raw Audio"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1109\/ICASSP.2016.7472737"},{"year":"2018","author":"takamichi","journal-title":"CPJD corpus Crowdsourced parallel speech corpus of japanese dialects","key":"ref29"},{"key":"ref5","first-page":"3961","author":"takamichi","year":"2017","journal-title":"Sampling-based speech parameter generation using moment-matching networks"},{"year":"0","journal-title":"Open JTalk","key":"ref8"},{"year":"0","journal-title":"CMU flite","key":"ref7"},{"key":"ref2","first-page":"7962","article-title":"Statistical parametric speech synthesis using deep neural networks","author":"zen","year":"0","journal-title":"Proc ICASSP"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.21437\/Interspeech.2017-521"},{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1016\/j.specom.2009.04.004"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1250\/ast.20.199"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1587\/transinf.2015EDP7457"},{"key":"ref21","volume":"abs 1711 354","author":"sonobe","year":"2017","journal-title":"JSUT corpus free large-scale japanese speech corpus for end-to-end speech synthesis"},{"key":"ref24","first-page":"644","article-title":"A simple, fast, and effective reparameterization of IBM model 2","author":"dyer","year":"2013","journal-title":"Proc NAACL"},{"doi-asserted-by":"publisher","key":"ref23","DOI":"10.1016\/j.specom.2016.09.001"},{"key":"ref26","first-page":"315","article-title":"Deep sparse rectifier neural networks","author":"glorot","year":"2011","journal-title":"Proc AISTATS"},{"key":"ref25","first-page":"131","article-title":"Recent development of open-source speech recognition engine julius","author":"lee","year":"2009","journal-title":"Proc APSIPA ASC"}],"event":{"name":"2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","start":{"date-parts":[[2018,11,12]]},"location":"Honolulu, HI, USA","end":{"date-parts":[[2018,11,15]]}},"container-title":["2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8648538\/8659446\/08659465.pdf?arnumber=8659465","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,14]],"date-time":"2023-09-14T16:47:32Z","timestamp":1694710052000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8659465\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,11]]},"references-count":29,"URL":"https:\/\/doi.org\/10.23919\/apsipa.2018.8659465","relation":{},"subject":[],"published":{"date-parts":[[2018,11]]}}}