{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T18:54:24Z","timestamp":1777488864724,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,11,22]]},"DOI":"10.1145\/3725949.3725971","type":"proceedings-article","created":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T05:53:12Z","timestamp":1751953992000},"page":"162-166","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Leveraging Neural Vocoder Artifacts for Improved Synthetic Speech Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-4430-2542","authenticated-orcid":false,"given":"Jingxi","family":"Xue","sequence":"first","affiliation":[{"name":"City University of Macau, Macau, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4709-5786","authenticated-orcid":false,"given":"Sanshuai","family":"Cui","sequence":"additional","affiliation":[{"name":"City University of Macau, Macau, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9132-4272","authenticated-orcid":false,"given":"Weinan","family":"Zhang","sequence":"additional","affiliation":[{"name":"City University of Macau, Macau, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,7,7]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Marcin Andrychowicz Misha Denil Sergio Gomez Matthew\u00a0W Hoffman David Pfau Tom Schaul Brendan Shillingford and Nando De\u00a0Freitas. 2016. Learning to learn by gradient descent by gradient descent. Advances in neural information processing systems 29 (2016)."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"crossref","unstructured":"Arun Babu Changhan Wang Andros Tjandra Kushal Lakhotia Qiantong Xu Naman Goyal Kritika Singh Patrick Von\u00a0Platen Yatharth Saraf Juan Pino et\u00a0al. 2021. XLS-R: Self-supervised cross-lingual speech representation learning at scale. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2111.09296 (2021).","DOI":"10.21437\/Interspeech.2022-143"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"Sanyuan Chen Chengyi Wang Zhengyang Chen Yu Wu Shujie Liu Zhuo Chen Jinyu Li Naoyuki Kanda Takuya Yoshioka Xiong Xiao et\u00a0al. 2022. Wavlm: Large-scale self-supervised pre-training for full stack speech processing. IEEE Journal of Selected Topics in Signal Processing 16 6 (2022) 1505\u20131518.","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Sanshuai Cui Bingyuan Huang Jiwu Huang and Xiangui Kang. 2022. Synthetic speech detection based on local autoregression and variance statistics. IEEE Signal Processing Letters 29 (2022) 1462\u20131466.","DOI":"10.1109\/LSP.2022.3183951"},{"key":"e_1_3_3_1_6_2","unstructured":"Joel Frank and Lea Sch\u00f6nherr. 2021. Wavefake: A data set to facilitate audio deepfake detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2111.02813 (2021)."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00745"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Guang Hua Andrew Beng\u00a0Jin Teoh and Haijian Zhang. 2021. Towards end-to-end synthetic speech detection. IEEE Signal Processing Letters 28 (2021) 1265\u20131269.","DOI":"10.1109\/LSP.2021.3089437"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"crossref","unstructured":"Bingyuan Huang Sanshuai Cui Jiwu Huang and Xiangui Kang. 2023. Discriminative frequency information learning for end-to-end speech anti-spoofing. IEEE Signal Processing Letters 30 (2023) 185\u2013189.","DOI":"10.1109\/LSP.2023.3251895"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Miquel India Pooyan Safari and Javier Hernando. 2019. Self multi-head attention for speaker recognition. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1906.09890 (2019).","DOI":"10.21437\/Interspeech.2019-2616"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Jee-weon Jung Hee-Soo Heo Ju-ho Kim Hye-jin Shim and Ha-Jin Yu. 2019. Rawnet: Advanced end-to-end deep neural network using raw waveforms for text-independent speaker verification. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1904.08104 (2019).","DOI":"10.21437\/Interspeech.2019-1982"},{"key":"e_1_3_3_1_13_2","unstructured":"Diederik\u00a0P Kingma. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1412.6980 (2014)."},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"crossref","unstructured":"Frank Klinker. 2011. Exponential moving average versus moving exponential average. Mathematische Semesterberichte 58 (2011) 97\u2013107.","DOI":"10.1007\/s00591-010-0080-8"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Galina Lavrentyeva Sergey Novoselov Andzhukaev Tseren Marina Volkova Artem Gorlanov and Alexandr Kozlov. 2019. STC antispoofing systems for the ASVspoof2019 challenge. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1904.05576 (2019).","DOI":"10.21437\/Interspeech.2019-1768"},{"key":"e_1_3_3_1_16_2","unstructured":"Menglu Li Yasaman Ahmadiadli and Xiao-Ping Zhang. 2024. Audio Anti-Spoofing Detection: A Survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.13914 (2024)."},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096278"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2015-472"},{"key":"e_1_3_3_1_19_2","unstructured":"Chengzhe Sun Shan Jia Shuwei Hou Ehab AlBadawy and Siwei Lyu. 2023. Exposing ai-synthesized human voices using neural vocoder artifacts. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.09198 (2023)."},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00097"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414234"},{"key":"e_1_3_3_1_22_2","unstructured":"Xu Tan Tao Qin Frank Soong and Tie-Yan Liu. 2021. A survey on neural speech synthesis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2106.15561 (2021)."},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"crossref","unstructured":"Massimiliano Todisco Xin Wang Ville Vestman Md Sahidullah H\u00e9ctor Delgado Andreas Nautsch Junichi Yamagishi Nicholas Evans Tomi Kinnunen and Kong\u00a0Aik Lee. 2019. ASVspoof 2019: Future horizons in spoofed and fake audio detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1904.05441 (2019).","DOI":"10.21437\/Interspeech.2019-2249"},{"key":"e_1_3_3_1_24_2","unstructured":"A Vaswani. 2017. Attention is all you need. Advances in Neural Information Processing Systems (2017)."},{"key":"e_1_3_3_1_25_2","first-page":"1352","volume-title":"2020 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","author":"Wang Zheng","year":"2020","unstructured":"Zheng Wang, Sanshuai Cui, Xiangui Kang, Wei Sun, and Zhonghua Li. 2020. Densely connected convolutional network for audio spoofing detection. In 2020 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC). IEEE, 1352\u20131360."},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10446612"}],"event":{"name":"SSIP 2024: 2024 7th International Conference on Sensors, Signal and Image Processing","location":"Shenzhen China","acronym":"SSIP 2024"},"container-title":["Proceedings of the 2024 7th International Conference on Sensors, Signal and Image Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3725949.3725971","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T06:22:01Z","timestamp":1751955721000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3725949.3725971"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,22]]},"references-count":25,"alternative-id":["10.1145\/3725949.3725971","10.1145\/3725949"],"URL":"https:\/\/doi.org\/10.1145\/3725949.3725971","relation":{},"subject":[],"published":{"date-parts":[[2024,11,22]]},"assertion":[{"value":"2025-07-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}