{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,18]],"date-time":"2025-09-18T10:07:05Z","timestamp":1758190025974,"version":"3.44.0"},"reference-count":25,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T00:00:00Z","timestamp":1754179200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T00:00:00Z","timestamp":1754179200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key R&D Program of China","doi-asserted-by":"publisher","award":["2020AAA0107901"],"award-info":[{"award-number":["2020AAA0107901"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"NSFC","doi-asserted-by":"publisher","award":["61961043"],"award-info":[{"award-number":["61961043"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,8,3]]},"DOI":"10.1109\/ialp68296.2024.11156902","type":"proceedings-article","created":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T17:32:18Z","timestamp":1758043938000},"page":"255-259","source":"Crossref","is-referenced-by-count":0,"title":["Emotional Speech Synthesis Based on Valence-Arousal-Dominance Model and Multi-Feature Codebook"],"prefix":"10.1109","author":[{"given":"Ying","family":"Liu","sequence":"first","affiliation":[{"name":"School of Information Science and Engineering, Yunnan University,Kunming,China"}]},{"given":"Jian","family":"Yang","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Yunnan University,Kunming,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095515"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-754"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-398"},{"key":"ref4","article-title":"Emotional end-to-end neural speech synthesizer","author":"Lee","year":"2017","journal-title":"arXiv preprint arXiv"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-29516-5_5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-465"},{"key":"ref7","article-title":"EmnSphere++: Emotion-Controllable Zero-Shot Text-to-Speech via Emotion-Adaptive Spherical Vector","volume":"abs\/2411.02625","author":"Cho","year":"2024","journal-title":"ArXiv"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/b978-0-12-558701-3.50007-7"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.2991\/itids-19.2019.52"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1037\/h0077714"},{"volume-title":"Perception and Emotions: The Plutchik Model of Emotions","year":"2023","author":"Sallehuddin","key":"ref11"},{"key":"ref12","article-title":"A Survey on Neural Speech Synthesis","volume":"abs\/2106.15561","author":"Tan","year":"2021","journal-title":"ArXiv"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/icassp.1985.1168147"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Eurospeech.1999-513"},{"key":"ref15","article-title":"Neural Discrete Representation Learning","author":"Oord","year":"2017","journal-title":"Neural Information Processing Systems"},{"key":"ref16","article-title":"Codebook Features: Sparse and Discrete Interpretability for Neural Networks","volume":"abs\/2310.17230","author":"Tamkin","year":"2023","journal-title":"ArXiv"},{"key":"ref17","article-title":"Meta-StyleSpeech: Multi-Speaker Adaptive Text-to-Speech Generation","volume":"abs\/2106.03153","author":"Min","year":"2021","journal-title":"ArXiv"},{"key":"ref18","article-title":"Deep Speaker: an End-to-End Neural Speaker Embedding System","volume":"abs\/1705.02304","author":"Li","year":"2017","journal-title":"ArXiv"},{"key":"ref19","article-title":"wav2vec 2.0:A framework for self-supervised learning of speech representations","author":"Baevski","year":"2020","journal-title":"Proc. Adv. Neural Inf. Process. Syst."},{"key":"ref20","article-title":"The Faiss library","volume":"abs\/2401.08281","author":"Douze","year":"2024","journal-title":"ArXiv"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/icassp48485.2024.10448291"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1166"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2021.11.006"},{"key":"ref24","article-title":"FastSpeech 2: Fast and High-Quality End-to-End Text to Speech","volume":"abs\/2006.04558","author":"Yi","year":"2020","journal-title":"ArXiv"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.931"}],"event":{"name":"2025 International Conference on Asian Language Processing (IALP)","start":{"date-parts":[[2025,8,3]]},"location":"Sarawak, Malaysia","end":{"date-parts":[[2025,8,6]]}},"container-title":["2025 International Conference on Asian Language Processing (IALP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11156192\/11156242\/11156902.pdf?arnumber=11156902","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T05:36:21Z","timestamp":1758087381000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11156902\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,3]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/ialp68296.2024.11156902","relation":{},"subject":[],"published":{"date-parts":[[2025,8,3]]}}}