{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T07:40:14Z","timestamp":1767339614462,"version":"3.37.3"},"reference-count":22,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T00:00:00Z","timestamp":1730937600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T00:00:00Z","timestamp":1730937600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62101553,62306316,U21B20210,62201571"],"award-info":[{"award-number":["62101553,62306316,U21B20210,62201571"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,11,7]]},"DOI":"10.1109\/iscslp63861.2024.10800377","type":"proceedings-article","created":{"date-parts":[[2024,12,23]],"date-time":"2024-12-23T19:11:17Z","timestamp":1734981077000},"page":"294-298","source":"Crossref","is-referenced-by-count":1,"title":["EELE: Exploring Efficient and Extensible LoRA Integration in Emotional Text-to-Speech"],"prefix":"10.1109","author":[{"given":"Xin","family":"Qi","sequence":"first","affiliation":[{"name":"Institute of Automation, Chinese Academy of Science"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruibo","family":"Fu","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Science"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhengqi","family":"Wen","sequence":"additional","affiliation":[{"name":"Tsinghua University,Beijing National Research Center for Information Science and Technolgy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianhua","family":"Tao","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Automation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuchen","family":"Shi","sequence":"additional","affiliation":[{"name":"Shanghai Polytechnic University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yi","family":"Lu","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Science"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiyong","family":"Wang","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Science"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaopeng","family":"Wang","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Science"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuankun","family":"Xie","sequence":"additional","affiliation":[{"name":"School of Information and Communication Engineering, Communication University of China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yukun","family":"Liu","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Science"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guanjun","family":"Li","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Science"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuefei","family":"Liu","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Science"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yongwei","family":"Li","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Science"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"5530","article-title":"Conditional variational autoencoder with adversarial learning for end-to-end text-to-speech","volume-title":"International Conference on Machine Learning","author":"Kim","year":"2021"},{"key":"ref2","first-page":"8599","article-title":"Grad-tts: A diffusion probabilistic model for text-to-speech","volume-title":"International Conference on Machine Learning","author":"Popov","year":"2021"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.21437\/Interspeech.2021-1148"},{"key":"ref4","article-title":"Contextual expressive text-to-speech","author":"Tu","year":"2022","journal-title":"arXiv preprint"},{"issue":"5","key":"ref5","first-page":"614","article-title":"Deter-mination of representative emotional style of speech based on k-means algorithm","volume":"38","author":"Oh","year":"2019","journal-title":"The Journal of the Acoustical Society of Korea"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.21437\/interspeech.2021-1236"},{"key":"ref7","first-page":"7748","article-title":"Meta-stylespeech: Multi-speaker adaptive text-to-speech generation","volume-title":"International Conference on Machine Learning","author":"Min","year":"2021"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1109\/ICASSP49357.2023.10095515"},{"key":"ref9","article-title":"Lora: Low-rank adaptation of large language models","author":"Hu","year":"2021","journal-title":"arXiv preprint"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1007\/978-3-031-73232-4_24"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.21437\/interspeech.2023-2313"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1109\/ICASSP39728.2021.9413889"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.21437\/Interspeech.2024-892"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1109\/ICASSP48485.2024.10446321"},{"key":"ref15","article-title":"Lora+: Efficient low rank adaptation of large models","author":"Hayou","year":"2024","journal-title":"arXiv preprint"},{"key":"ref16","article-title":"Lora-fa: Memory-efficient low-rank adaptation for large language models fine-tuning","author":"Zhang","year":"2023","journal-title":"arXiv preprint"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.21437\/Interspeech.2023-534"},{"key":"ref18","article-title":"LoRA: Low-rank adaptation of large language models","volume-title":"International Conference on Learning Representations","author":"Hu","year":"2022"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1109\/MSP.2017.2765202"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1016\/j.specom.2021.11.006"},{"volume-title":"Cstr vctk corpus: English multi-speaker corpus for cstr voice cloning toolkit","year":"2017","author":"Veaux","key":"ref21"},{"volume-title":"Enrique Hern\u00e1ndez Calabr\u00e9s","year":"2024","article-title":"wav2vec2-lg-xlsr-en-speech-emotion-recognition (revision 17cf17c)","key":"ref22"}],"event":{"name":"2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP)","start":{"date-parts":[[2024,11,7]]},"location":"Beijing, China","end":{"date-parts":[[2024,11,10]]}},"container-title":["2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10799944\/10799969\/10800377.pdf?arnumber=10800377","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,15]],"date-time":"2025-01-15T19:28:31Z","timestamp":1736969311000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10800377\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,7]]},"references-count":22,"URL":"https:\/\/doi.org\/10.1109\/iscslp63861.2024.10800377","relation":{},"subject":[],"published":{"date-parts":[[2024,11,7]]}}}