{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T15:07:44Z","timestamp":1725808064206},"reference-count":20,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,5]]},"DOI":"10.1109\/icassp40776.2020.9053390","type":"proceedings-article","created":{"date-parts":[[2020,4,9]],"date-time":"2020-04-09T16:21:13Z","timestamp":1586449273000},"page":"6689-6693","source":"Crossref","is-referenced-by-count":17,"title":["A Unified Sequence-to-Sequence Front-End Model for Mandarin Text-to-Speech Synthesis"],"prefix":"10.1109","author":[{"given":"Junjie","family":"Pan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiang","family":"Yin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiling","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shichao","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yang","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zejun","family":"Ma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuxuan","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2007.367010"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLC.2008.4620965"},{"key":"ref12","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref13","article-title":"Towards end-to-end prosody transfer for expressive speech synthesis with tacotron","author":"skerry-ryan","year":"2018","journal-title":"arXiv preprint arXiv 1803 09047"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref15","first-page":"3111","article-title":"Distributed representations of words and phrases and their compositionality","author":"mikolov","year":"2013","journal-title":"Advances in neural information processing systems"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/DICTA.2016.7797053"},{"key":"ref17","article-title":"Transformer: A novel neural network architecture for language understanding","author":"uszkoreit","year":"2017","journal-title":"Google Research Blog"},{"key":"ref18","article-title":"Style tokens: Unsupervised style modeling, control and transfer in end-to-end speech synthesis","author":"wang","year":"2018","journal-title":"arXiv preprint arXiv 1803 09017"},{"key":"ref19","article-title":"Effective use of variational embedding capacity in expressive end-to-end speech synthesis","author":"battenberg","year":"2019","journal-title":"arXiv preprint arXiv 1906 03008"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1162\/089120100561746"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054695"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682368"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2001.941038"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1400"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ISCSLP.2010.5684835"},{"key":"ref2","article-title":"Efficient neural audio synthesis","author":"kalchbrenner","year":"2018","journal-title":"arXiv preprint arXiv 1802 08908"},{"key":"ref9","article-title":"Graphemeto-phoneme conversion for chinese text-to-speech","author":"xu","year":"2004","journal-title":"Eighth International Conference on Spoken Language Processing"},{"key":"ref1","article-title":"Tacotron: Towards end-to-end speech synthesis","author":"wang","year":"2017","journal-title":"arXiv preprint arXiv 1703 10593"},{"article-title":"Conditional random fields: Probabilistic models for segmenting and labeling sequence data","year":"2001","author":"lafferty","key":"ref20"}],"event":{"name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2020,5,4]]},"location":"Barcelona, Spain","end":{"date-parts":[[2020,5,8]]}},"container-title":["ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9040208\/9052899\/09053390.pdf?arnumber=9053390","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,27]],"date-time":"2022-06-27T20:11:03Z","timestamp":1656360663000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9053390\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,5]]},"references-count":20,"URL":"https:\/\/doi.org\/10.1109\/icassp40776.2020.9053390","relation":{},"subject":[],"published":{"date-parts":[[2020,5]]}}}