{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T08:00:12Z","timestamp":1764403212370,"version":"3.37.3"},"reference-count":28,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,12,11]],"date-time":"2022-12-11T00:00:00Z","timestamp":1670716800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,11]],"date-time":"2022-12-11T00:00:00Z","timestamp":1670716800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100011222","name":"National Laboratory of Pattern Recognition","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100011222","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002766","name":"Beijing University of Posts and Telecommunications","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002766","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,12,11]]},"DOI":"10.1109\/iscslp57327.2022.10037956","type":"proceedings-article","created":{"date-parts":[[2023,2,8]],"date-time":"2023-02-08T18:53:24Z","timestamp":1675882404000},"page":"230-234","source":"Crossref","is-referenced-by-count":8,"title":["ECAPA-TDNN for Multi-speaker Text-to-speech Synthesis"],"prefix":"10.1109","author":[{"given":"Jinlong","family":"Xue","sequence":"first","affiliation":[{"name":"Beijing University of Posts and Telecommunications,School of Artificial Intelligence,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yayue","family":"Deng","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications,School of Artificial Intelligence,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yichen","family":"Han","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications,School of Artificial Intelligence,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ya","family":"Li","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications,School of Artificial Intelligence,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianqing","family":"Sun","sequence":"additional","affiliation":[{"name":"Unisound AI Technology Co., Ltd,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiaen","family":"Liang","sequence":"additional","affiliation":[{"name":"Unisound AI Technology Co., Ltd,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref2","article-title":"Deep voice 3: Scaling textto-speech with convolutional sequence learning","author":"Ping","year":"2017","journal-title":"arXiv preprint arXiv:1710.07654"},{"key":"ref3","article-title":"Fastspeech 2: Fast and high-quality end-to-end text to speech","author":"Ren","year":"2020","journal-title":"arXiv preprint arXiv:2006.04558"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1109\/ICASSP.2014.6854363"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"ref6","article-title":"Deep speaker: an end-to-end neural speaker embedding system","author":"Li","year":"2017","journal-title":"arXiv preprint arXiv:1705.02304"},{"key":"ref7","article-title":"Transfer learning from speaker verification to multispeaker text-to-speech synthesis","volume":"31","author":"Jia","year":"2018","journal-title":"Advances in neural information processing systems"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.21437\/interspeech.2017-950"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.21437\/interspeech.2018-1929"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.21437\/interspeech.2020-2650"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.21437\/Interspeech.2021-941"},{"key":"ref12","article-title":"Speakernet: 1d depth-wise separable convolutional network for textindependent speaker recognition and verification","author":"Koluguri","year":"2020","journal-title":"arXiv preprint arXiv:2010.12653"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.21437\/Interspeech.2016-1066"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.21437\/Odyssey.2018-28"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.21437\/Interspeech.2019-2003"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1109\/ICASSP39728.2021.9413877"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.21437\/Interspeech.2021-2013"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1016\/j.procs.2018.08.126"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1109\/ICASSP40776.2020.9054535"},{"key":"ref20","article-title":"Deep voice 2: Multi-speaker neural textto-speech","volume":"30","author":"Gibiansky","year":"2017","journal-title":"Advances in neural information processing systems"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.21437\/interspeech.2018-993"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1109\/ICASSP40776.2020.9053889"},{"doi-asserted-by":"publisher","key":"ref23","DOI":"10.1109\/CVPR.2018.00745"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.21437\/Interspeech.2017-1452"},{"year":"2019","author":"Yamagishi","article-title":"Cstr vctk corpus: English multi-speaker corpus for cstr voice cloning toolkit","key":"ref25"},{"key":"ref26","article-title":"Libritts: A corpus derived from librispeech for textto-speech","author":"Zen","year":"2019","journal-title":"arXiv preprint arXiv:1904.02882"},{"doi-asserted-by":"publisher","key":"ref27","DOI":"10.21437\/Interspeech.2017-1386"},{"doi-asserted-by":"publisher","key":"ref28","DOI":"10.1109\/ICASSP39728.2021.9413880"}],"event":{"name":"2022 13th International Symposium on Chinese Spoken Language Processing (ISCSLP)","start":{"date-parts":[[2022,12,11]]},"location":"Singapore, Singapore","end":{"date-parts":[[2022,12,14]]}},"container-title":["2022 13th International Symposium on Chinese Spoken Language Processing (ISCSLP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10037756\/10037573\/10037956.pdf?arnumber=10037956","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,13]],"date-time":"2024-02-13T13:59:11Z","timestamp":1707832751000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10037956\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,11]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/iscslp57327.2022.10037956","relation":{},"subject":[],"published":{"date-parts":[[2022,12,11]]}}}