{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T07:21:55Z","timestamp":1764400915119,"version":"3.46.0"},"reference-count":24,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,22]]},"DOI":"10.1109\/apsipaasc65261.2025.11248982","type":"proceedings-article","created":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T18:40:26Z","timestamp":1764355226000},"page":"567-572","source":"Crossref","is-referenced-by-count":0,"title":["Emotional Text-To-Speech Based on Mutual-Information-Guided Emotion-Timbre Disentanglement"],"prefix":"10.1109","author":[{"given":"Jianing","family":"Yang","sequence":"first","affiliation":[{"name":"The University of Tokyo,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sheng","family":"Li","sequence":"additional","affiliation":[{"name":"Institute of Science Tokyo,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Takahiro","family":"Shinozaki","sequence":"additional","affiliation":[{"name":"Institute of Science Tokyo,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuki","family":"Saito","sequence":"additional","affiliation":[{"name":"The University of Tokyo,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hiroshi","family":"Saruwatari","sequence":"additional","affiliation":[{"name":"The University of Tokyo,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Attention is all you need","author":"Vaswani","year":"2017","journal-title":"in Neural Information Processing Systems"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1109\/icassp.2018.8461368"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.48550\/arXiv.1609.03499"},{"key":"ref4","article-title":"Close to Human Quality TTS with Transformer","volume":"abs\/1809.08895","author":"Li","year":"2018","journal-title":"ArXiv"},{"key":"ref5","first-page":"7586","article-title":"Non-autoregressive neural text-to-speech","volume-title":"International conference on machine learning, PMLR","author":"Peng","year":"2020"},{"key":"ref6","article-title":"Fastspeech: Fast, robust and controllable text to speech","volume":"32","author":"Ren","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref7","article-title":"Fastspeech 2: Fast and highquality end-to-end text to speech","author":"Ren","year":"2021","journal-title":"ICLR"},{"key":"ref8","article-title":"Style Tokens: Unsupervised Style Modeling, Control and Transfer in End-to-End Speech Synthesis","volume-title":"International Conference on Machine Learning","author":"Wang","year":"2018"},{"key":"ref9","article-title":"MetaStyleSpeech: Multi-Speaker Adaptive Text-to-Speech Generation","volume":"abs\/2106.03153","author":"Min","year":"2021","journal-title":"ArXiv"},{"key":"ref10","article-title":"Towards End-to-End Prosody Transfer for Expressive Speech Synthesis with Tacotron","volume-title":"International Conference on Machine Learning","author":"Skerry-Ryan","year":"2018"},{"key":"ref11","first-page":"531","article-title":"Mutual Information Neural Estimation","volume-title":"Proceedings of the 35th International Conference on Machine Learning","volume":"80","author":"Belghazi","year":"2018"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1109\/ICASSP40776.2020.9054591"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.21437\/Interspeech.2023-2403"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1109\/TASLP.2021.3129994"},{"key":"ref15","article-title":"High Fidelity Neural Audio Compression","volume":"abs\/2210.13438","author":"D\u2019efossez","year":"2022","journal-title":"ArXiv"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.21437\/Interspeech.2018-1034"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1109\/iscslp49672.2021.9362098"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1109\/icassp43922.2022.9746744"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1109\/wacv56688.2023.00396"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1002\/cpa.3160280102"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.21437\/interspeech.2021-660"},{"key":"ref22","article-title":"Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)","author":"Clevert","year":"2015","journal-title":"arXiv: Learning"},{"doi-asserted-by":"publisher","key":"ref23","DOI":"10.1016\/j.specom.2021.11.006"},{"key":"ref24","article-title":"HiFi-GAN: Generative Adversarial Networks for Efficient and High Fidelity Speech Synthesis","volume-title":"ArXiv","volume":"abs\/2010.05646","author":"Kong","year":"2020"}],"event":{"name":"2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","start":{"date-parts":[[2025,10,22]]},"location":"Singapore, Singapore","end":{"date-parts":[[2025,10,24]]}},"container-title":["2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11248853\/11248968\/11248982.pdf?arnumber=11248982","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T07:19:49Z","timestamp":1764400789000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11248982\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,22]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/apsipaasc65261.2025.11248982","relation":{},"subject":[],"published":{"date-parts":[[2025,10,22]]}}}