{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T10:55:13Z","timestamp":1730199313226,"version":"3.28.0"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,12,13]],"date-time":"2021-12-13T00:00:00Z","timestamp":1639353600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,12,13]],"date-time":"2021-12-13T00:00:00Z","timestamp":1639353600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,12,13]]},"DOI":"10.1109\/asru51503.2021.9687876","type":"proceedings-article","created":{"date-parts":[[2022,2,3]],"date-time":"2022-02-03T20:31:00Z","timestamp":1643920260000},"page":"1162-1169","source":"Crossref","is-referenced-by-count":0,"title":["Towards Using Heterogeneous Relation Graphs for End-to-End TTS"],"prefix":"10.1109","author":[{"given":"Amrith","family":"Setlur","sequence":"first","affiliation":[{"name":"Carnegie Mellon University,Machine Learning Department"}]},{"given":"Aman","family":"Madaan","sequence":"additional","affiliation":[{"name":"Language Technologies Institute"}]},{"given":"Tanmay","family":"Parekh","sequence":"additional","affiliation":[{"name":"Language Technologies Institute"}]},{"given":"Yiming","family":"Yang","sequence":"additional","affiliation":[{"name":"Language Technologies Institute"}]},{"given":"Alan W","family":"Black","sequence":"additional","affiliation":[{"name":"Language Technologies Institute"}]}],"member":"263","reference":[{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1166"},{"key":"ref30","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"0","journal-title":"3rd International Conference on Learning Representations ICLR 2015"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3050"},{"key":"ref11","article-title":"Neural discrete representation learning","author":"oord","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2938863"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(00)00074-1"},{"journal-title":"Building Voices in the Festival Speech Synthesis System","year":"2000","author":"black","key":"ref14"},{"key":"ref15","article-title":"Semi-supervised classification with graph convolutional networks","author":"kipf","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2005-72"},{"key":"ref17","first-page":"125","article-title":"Wavenet: A generative model for raw audio","author":"van den oord","year":"0","journal-title":"9th ISCA Speech Synthesis Workshop"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1984.1164317"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683143"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2005-284"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref27","first-page":"427","article-title":"On some biases encountered in modern audio quality listening tests-a review","volume":"56","author":"zielinski","year":"2008","journal-title":"Journal of the Audio Engineering Society"},{"key":"ref3","article-title":"Tacotron: Towards end-to-end speech synthesis","author":"wang","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref6","first-page":"70","article-title":"Festvox: Tools for creation and analyses of large speech corpora","author":"anumanchipalli","year":"2011","journal-title":"Workshop on Very Large Scale Phonetics Research"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016706"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053355"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2228"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"1039","DOI":"10.1016\/j.specom.2009.04.004","article-title":"Statistical parametric speech synthesis","volume":"51","author":"zen","year":"2009","journal-title":"Speech Communication"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413513"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1996.541110"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-628"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683857"},{"key":"ref21","article-title":"Graphpb: Graphical representations of prosody boundary in speech synthesis","author":"sun","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref24","article-title":"Clustergen: A statistical parametric synthe-sizer using trajectory modeling","author":"black","year":"0","journal-title":"Ninth International Conference on Spoken Language Processing"},{"journal-title":"The LJ speech dataset","year":"2017","author":"ito","key":"ref23"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2005-79"},{"key":"ref25","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"ArXiv Preprint"}],"event":{"name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","start":{"date-parts":[[2021,12,13]]},"location":"Cartagena, Colombia","end":{"date-parts":[[2021,12,17]]}},"container-title":["2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9687821\/9687855\/09687876.pdf?arnumber=9687876","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,16]],"date-time":"2022-05-16T20:41:57Z","timestamp":1652733717000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9687876\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12,13]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/asru51503.2021.9687876","relation":{},"subject":[],"published":{"date-parts":[[2021,12,13]]}}}