{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T17:30:54Z","timestamp":1755797454911,"version":"3.44.0"},"reference-count":34,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U23B2053","62176182"],"award-info":[{"award-number":["U23B2053","62176182"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Signal Process. Lett."],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/lsp.2025.3592588","type":"journal-article","created":{"date-parts":[[2025,7,24]],"date-time":"2025-07-24T17:58:52Z","timestamp":1753379932000},"page":"3137-3141","source":"Crossref","is-referenced-by-count":0,"title":["Emotional Style Transfer With Intensity Control in Zero-Shot TTS"],"prefix":"10.1109","volume":"32","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-2393-8679","authenticated-orcid":false,"given":"Haoyu","family":"Wang","sequence":"first","affiliation":[{"name":"Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2290-3074","authenticated-orcid":false,"given":"Chunyu","family":"Qiang","sequence":"additional","affiliation":[{"name":"School of New Media and Communication, Tianjin University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianrui","family":"Wang","sequence":"additional","affiliation":[{"name":"Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-0272-3541","authenticated-orcid":false,"given":"Cheng","family":"Gong","sequence":"additional","affiliation":[{"name":"Institute of Artificial Intelligence (TeleAI), China Telecom, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8094-6861","authenticated-orcid":false,"given":"Longbiao","family":"Wang","sequence":"additional","affiliation":[{"name":"Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"article-title":"Cosyvoice: A scalable multilingual zero-shot text-to-speech synthesizer based on supervised semantic tokens","year":"2024","author":"Du","key":"ref1"},{"article-title":"Seed-TTS: A family of high-quality versatile speech generation models","year":"2024","author":"Anastassiou","key":"ref2"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.313"},{"article-title":"Cross-speaker emotion transfer based on speaker condition layer normalization and semi-supervised training in text-to-speech","year":"2021","author":"Wu","key":"ref4"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2023.3268571"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2022.3145297"},{"key":"ref7","first-page":"10970","article-title":"Generspeech: Towards style transfer for generalizable out-of-domain text-to-speech","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Huang","year":"2022"},{"key":"ref8","first-page":"5180","article-title":"Style tokens: Unsupervised style modeling, control and transfer in end-to-end speech synthesis","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang","year":"2018"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383629"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2024.3453606"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-1351"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1265"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2022-10761"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2022.3203888"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2024.3363444"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2021.03.005"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095840"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00618"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ISCSLP63861.2024.10800531"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2024.3434425"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TASLPRO.2025.3564168"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2024.3451951"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-1775"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2023-363"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054734"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/slt61566.2024.10832365"},{"key":"ref27","article-title":"Neural discrete representation learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Den","year":"2017"},{"key":"ref28","first-page":"17022","article-title":"HiFi-GAN: Generative adversarial networks for efficient and high fidelity speech synthesis","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Kong","year":"2020"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2022.3164181"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-979"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-1428"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.931"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413391"}],"container-title":["IEEE Signal Processing Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/97\/10802935\/11095622.pdf?arnumber=11095622","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,18]],"date-time":"2025-08-18T19:50:44Z","timestamp":1755546644000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11095622\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/lsp.2025.3592588","relation":{},"ISSN":["1070-9908","1558-2361"],"issn-type":[{"type":"print","value":"1070-9908"},{"type":"electronic","value":"1558-2361"}],"subject":[],"published":{"date-parts":[[2025]]}}}