{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,13]],"date-time":"2026-07-13T15:07:41Z","timestamp":1783955261103,"version":"3.55.0"},"reference-count":68,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,6]]},"DOI":"10.1109\/asru65441.2025.11434679","type":"proceedings-article","created":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T19:48:04Z","timestamp":1775159284000},"page":"1-8","source":"Crossref","is-referenced-by-count":4,"title":["Fake-Mamba: Real-Time Speech Deepfake Detection Using Bidirectional Mamba as Self-Attention\u2019s Alternative"],"prefix":"10.1109","author":[{"given":"Xi","family":"Xuan","sequence":"first","affiliation":[{"name":"University of Eastern Finland,School of Computing,Finland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zimo","family":"Zhu","sequence":"additional","affiliation":[{"name":"University of California Santa Barbara,Department of Statistics and Applied Probability,USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wenxin","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences,School of Computer Science and Technology,China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yi-Cheng","family":"Lin","sequence":"additional","affiliation":[{"name":"Graduate Institute of Communication Engineering National Taiwan University,Taiwan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tomi","family":"Kinnunen","sequence":"additional","affiliation":[{"name":"University of Eastern Finland,School of Computing,Finland"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i16.29793"},{"key":"ref2","article-title":"Breezyvoice: Adapting tts for taiwanese mandarin with enhanced polyphone disambiguation - challenges and insights","author":"Hsu","year":"2025"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-372"},{"key":"ref4","article-title":"Audio anti-spoofing detection: A survey","author":"Li","year":"2024","journal-title":"arXiv preprint arXiv:2404.13914"},{"key":"ref5","article-title":"Translaw: Benchmarking large language models in multi-agent simulation of the collaborative translation","author":"Xuan","year":"2025"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2025.3562486"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ITNEC52019.2021.9587004"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/SLT61566.2024.10832250"},{"key":"ref9","article-title":"Codecfake-omni: A large-scale codec-based deepfake speech dataset","volume":"abs\/2501.08238","author":"Du","year":"2025","journal-title":"CoRR"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3015"},{"issue":"7","key":"ref11","first-page":"147","article-title":"Conformer-based speaker recognition model for real-time multi-scenarios","volume-title":"Computer Engineering and Applications","volume":"60","author":"Xuan"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10448453"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-659"},{"key":"ref14","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2019.2956589"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-844"},{"key":"ref17","doi-asserted-by":"crossref","DOI":"10.65286\/icic.v21i1.23203","article-title":"Addressing noise and stochasticity in fraud detection for service networks","author":"Zhang","year":"2025"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-96-9921-6_2"},{"key":"ref19","doi-asserted-by":"crossref","first-page":"107399","DOI":"10.1016\/j.neunet.2025.107399","article-title":"Decomposition-based multi-scale transformer framework for time series anomaly detection","volume":"187","author":"Zhang","year":"2025","journal-title":"Neural Networks"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10890034"},{"key":"ref21","first-page":"10330","article-title":"Codec-SUPERB: An in-depth analysis of sound codec models","volume-title":"Findings of the Association for Computational Linguistics: ACL 2024","author":"Wu"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPAASC63619.2025.10849259"},{"key":"ref23","article-title":"Mamba: Linear-time sequence modeling with selective state spaces","volume-title":"First Conference on Language Modeling","author":"Gu"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i10.33131"},{"key":"ref25","article-title":"Jamba: Hybrid TransformerMamba Language Models","volume-title":"The Thirteenth International Conference on Learning Representations","author":"Lenz"},{"key":"ref26","article-title":"An empirical study of mambabased language models","author":"Waleffe","year":"2024","journal-title":"arXiv preprint arXiv:2406.07887"},{"key":"ref27","article-title":"Vision mamba: Efficient visual representation learning with bidirectional state space model","volume-title":"Fortyfirst International Conference on Machine Learning","author":"Zhu"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2024.129178"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-024-01001-9"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21437\/ASVSPOOF.2021-8"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-108"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2024.3483009"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-1274"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/SLT61566.2024.10832304"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10890324"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TASLPRO.2025.3533365"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/SLT61566.2024.10832137"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/SLT61566.2024.10832332"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10890412"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10888514"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10888846"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10889446"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10888763"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ITAIC54216.2022.9836695"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ITAIC54216.2022.9836693"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/IAEAC54830.2022.9929964"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CECIT53797.2021.00186"},{"key":"ref48","article-title":"Efficient real-time multi-scenario speaker recognition with mel-spectrogram-based hybrid tdnn for edge system","volume-title":"INTERSPEECH 2024-Young Female* Researchers in Speech Workshop (YFRSW 2024)","author":"Xuan"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-698"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2025.3547861"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1016\/S1573-4412(05)80019-4"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-143"},{"key":"ref53","first-page":"12449","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume":"33","author":"Baevski","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.21437\/SPSC.2025-5"},{"key":"ref55","article-title":"How to learn a new language? an efficient solution for self-supervised learning models unseen languages adaption in low-resource scenario","author":"Wang","year":"2025"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10887615"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.21437\/odyssey.2022-14"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TASLPRO.2025.3566210"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-1820"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681345"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2249"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746213"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-demo.17"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-aacl.18"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/IAEAC50856.2021.9390852"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2017.07.005"},{"key":"ref67","first-page":"1455","article-title":"An analysis of the t -sne algorithm for data visualization","volume-title":"Conference on Learning Theory.","author":"Arora"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-96-9921-6_4"}],"event":{"name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,12,6]]},"end":{"date-parts":[[2025,12,10]]}},"container-title":["2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11434577\/11433836\/11434679.pdf?arnumber=11434679","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T04:58:14Z","timestamp":1775192294000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11434679\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":68,"URL":"https:\/\/doi.org\/10.1109\/asru65441.2025.11434679","relation":{},"subject":[],"published":{"date-parts":[[2025,12,6]]}}}