{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T07:41:29Z","timestamp":1767339689692,"version":"3.37.3"},"reference-count":22,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,8,23]],"date-time":"2021-08-23T00:00:00Z","timestamp":1629676800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,8,23]],"date-time":"2021-08-23T00:00:00Z","timestamp":1629676800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100012681","name":"CNRS","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100012681","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,8,23]]},"DOI":"10.23919\/eusipco54536.2021.9616249","type":"proceedings-article","created":{"date-parts":[[2021,12,8]],"date-time":"2021-12-08T21:55:53Z","timestamp":1639000553000},"page":"31-35","source":"Crossref","is-referenced-by-count":7,"title":["Improving transfer of expressivity for end-to-end multispeaker text-to-speech synthesis"],"prefix":"10.23919","author":[{"given":"Ajinkya","family":"Kulkarni","sequence":"first","affiliation":[{"name":"Universit&#x00E9; de Lorraine, CNRS, Inria, LORIA,Nancy,France,F-54000"}]},{"given":"Vincent","family":"Colotte","sequence":"additional","affiliation":[{"name":"Universit&#x00E9; de Lorraine, CNRS, Inria, LORIA,Nancy,France,F-54000"}]},{"given":"Denis","family":"Jouvet","sequence":"additional","affiliation":[{"name":"Universit&#x00E9; de Lorraine, CNRS, Inria, LORIA,Nancy,France,F-54000"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-59430-5_13"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1297"},{"key":"ref12","article-title":"Improved deep metric learning with multi-class n-pair loss objective","author":"sohn","year":"2016","journal-title":"NIPS"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.3390\/sym11091066"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/K16-1002"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01267-0_42"},{"journal-title":"The siwis French speech synthesis database","year":"2017","author":"yamagishi","key":"ref17"},{"key":"ref18","article-title":"Synpaflex-corpus: An expressive French audiobooks corpus dedicated to expressive speech synthesis","author":"sini","year":"2018","journal-title":"LREC"},{"key":"ref19","article-title":"Conditional variational autoencoder for text driven expressive audiovisual speech synthesis","author":"dahmani","year":"2019","journal-title":"InterSpeech"},{"key":"ref4","article-title":"Towards end-to-end prosody transfer for expressive speech synthesis with Tacotron","volume":"abs 1803 9047","author":"skerry-ryan","year":"2018","journal-title":"ArXiv"},{"key":"ref3","article-title":"Style tokens: Unsupervised style modeling, control and transfer in end-to-end speech synthesis","volume":"abs 1803 9017","author":"wang","year":"2018","journal-title":"ArXiv"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1113"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683623"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053678"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683501"},{"key":"ref2","article-title":"Hierarchical generative modeling for controllable speech synthesis","volume":"abs 1810 7217","author":"hsu","year":"0","journal-title":"ArXiv"},{"key":"ref9","article-title":"Visualization and interpretation of latent spaces for controlling expressive speech synthesis through audio analysis","author":"tits","year":"2018","journal-title":"InterSpeech"},{"key":"ref1","article-title":"Tacotron: A fully end-to-end text-to-speech synthesis model","volume":"abs 1703 10135","author":"wang","year":"2017","journal-title":"ArXiv"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683143"},{"key":"ref22","article-title":"Expressive speech synthesis in MARY TTS using audiobook data and emotion","author":"charfuelan","year":"2013","journal-title":"InterSpeech"},{"key":"ref21","first-page":"213","article-title":"Mean opinion score (MOS) revisited: methods and applications, limitations and alternatives","author":"streijl","year":"2014","journal-title":"Multimedia Systems"}],"event":{"name":"2021 29th European Signal Processing Conference (EUSIPCO)","start":{"date-parts":[[2021,8,23]]},"location":"Dublin, Ireland","end":{"date-parts":[[2021,8,27]]}},"container-title":["2021 29th European Signal Processing Conference (EUSIPCO)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9615915\/9615917\/09616249.pdf?arnumber=9616249","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,3]],"date-time":"2022-08-03T00:14:53Z","timestamp":1659485693000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9616249\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8,23]]},"references-count":22,"URL":"https:\/\/doi.org\/10.23919\/eusipco54536.2021.9616249","relation":{},"subject":[],"published":{"date-parts":[[2021,8,23]]}}}