{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T07:57:38Z","timestamp":1764403058852,"version":"3.28.0"},"reference-count":33,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,1,24]]},"DOI":"10.1109\/iscslp49672.2021.9362098","type":"proceedings-article","created":{"date-parts":[[2021,3,1]],"date-time":"2021-03-01T23:37:05Z","timestamp":1614641825000},"page":"1-5","source":"Crossref","is-referenced-by-count":7,"title":["Estimating Mutual Information in Prosody Representation for Emotional Prosody Transfer in Speech Synthesis"],"prefix":"10.1109","author":[{"given":"Guangyan","family":"Zhang","sequence":"first","affiliation":[{"name":"The Chinese University of Hong Kong,Department of Electronic Engineering"}]},{"given":"Shirong","family":"Qiu","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong,Department of Electronic Engineering"}]},{"given":"Ying","family":"Qin","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong,Department of Electronic Engineering"}]},{"given":"Tan","family":"Lee","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong,Department of Electronic Engineering"}]}],"member":"263","reference":[{"key":"ref33","first-page":"341","article-title":"Praat, a system for doing phonetics by computer","volume":"5","author":"boersma","year":"2001","journal-title":"Glot Int"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1121\/1.1458024"},{"key":"ref31","volume":"22","author":"zwicker","year":"2013","journal-title":"Psychoacoustics Facts and Models"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683172"},{"key":"ref10","first-page":"274","article-title":"Emotional transplant in statistical speech synthesis based on emotion additive model","author":"ohtani","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref11","article-title":"Towards speaking style transplantation in speech synthesis","author":"lorenzo-trueba","year":"2013","journal-title":"Eighth ISCA Workshop on Speech Synthesis"},{"key":"ref12","first-page":"4693","article-title":"Towards end-to-end prosody transfer for expressive speech synthesis with tacotron","author":"skerry-ryan","year":"2018","journal-title":"Proc ICML"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2385478"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e88-d.11.2484"},{"key":"ref15","first-page":"5180","article-title":"Style tokens: Unsupervised style modeling, control and transfer in end-to-end speech synthesis","author":"wang","year":"2018","journal-title":"Proc ICML"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2006.876129"},{"key":"ref17","first-page":"171","article-title":"Prosodic analysis of story-telling discourse modes and narrative situations oriented to text-to-speech synthesis","author":"monta\u00f1o","year":"2013","journal-title":"Proc Eighth ISCA Workshop on Speech Synthesis"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461888"},{"key":"ref19","first-page":"970","article-title":"Speech factorization for hmm-tts based on cluster adaptive training","author":"latorre","year":"2012","journal-title":"Proc INTERSPEECH"},{"year":"0","key":"ref28","article-title":"Webrtc voice activity detector"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1002\/9781118706664"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683561"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1136\/jnnp.64.3.375"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1037\/\/0033-2909.99.2.143"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.25080\/Majora-7b98e3ed-003"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1515\/9783110301465"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1080\/02699939208411068"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(02)00084-5"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1080\/01690961003589492"},{"key":"ref9","first-page":"219","article-title":"Towards emotional speech synthesis: A rule based approach","author":"zovato","year":"2013","journal-title":"Proc Fifth ISCA Workshop on Speech Synthesis"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511816338"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/89.848223"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1452"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1769"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1309933111"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683143"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1002\/cpa.3160280102"},{"key":"ref25","first-page":"531","article-title":"Mutual information neural estimation","author":"belghazi","year":"2018","journal-title":"Proc ICML"}],"event":{"name":"2021 12th International Symposium on Chinese Spoken Language Processing (ISCSLP)","start":{"date-parts":[[2021,1,24]]},"location":"Hong Kong","end":{"date-parts":[[2021,1,27]]}},"container-title":["2021 12th International Symposium on Chinese Spoken Language Processing (ISCSLP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9362048\/9362049\/09362098.pdf?arnumber=9362098","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,5]],"date-time":"2021-04-05T23:20:44Z","timestamp":1617664844000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9362098\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,24]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/iscslp49672.2021.9362098","relation":{},"subject":[],"published":{"date-parts":[[2021,1,24]]}}}