{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:13:02Z","timestamp":1750219982058,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,10,10]],"date-time":"2022-10-10T00:00:00Z","timestamp":1665360000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62176182"],"award-info":[{"award-number":["62176182"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"MEXT KAKENHI","award":["21H04906"],"award-info":[{"award-number":["21H04906"]}]},{"name":"the Agency for Science, Technology and Research(A*STAR)","award":["CR-2021-005"],"award-info":[{"award-number":["CR-2021-005"]}]},{"name":"JST CREST","award":["JPMJCR18A6, JPMJCR20D3, JPMJFS2136"],"award-info":[{"award-number":["JPMJCR18A6, JPMJCR20D3, JPMJFS2136"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,10,14]]},"DOI":"10.1145\/3552466.3556527","type":"proceedings-article","created":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T12:27:26Z","timestamp":1664627246000},"page":"69-75","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Deep Spectro-temporal Artifacts for Detecting Synthesized Speech"],"prefix":"10.1145","author":[{"given":"Xiaohui","family":"Liu","sequence":"first","affiliation":[{"name":"Tianjin University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Meng","family":"Liu","sequence":"additional","affiliation":[{"name":"Tianjin University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lin","family":"Zhang","sequence":"additional","affiliation":[{"name":"National Institute of Informatics, Tokyo, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Linjuan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Taiyuan University of Technology, Taiyuan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chang","family":"Zeng","sequence":"additional","affiliation":[{"name":"National Institute of Informatics, Tokyo, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kai","family":"Li","sequence":"additional","affiliation":[{"name":"Japan Advanced Institute of Science and Technology, Nomi, Ishikawa, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nan","family":"Li","sequence":"additional","affiliation":[{"name":"Tianjin University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kong Aik","family":"Lee","sequence":"additional","affiliation":[{"name":"Institute for Infocomm Research, A*STAR, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Longbiao","family":"Wang","sequence":"additional","affiliation":[{"name":"Tianjin University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianwu","family":"Dang","sequence":"additional","affiliation":[{"name":"Japan Advanced Institute of Science and Technology, Nomi, Ishikawa, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,10,10]]},"reference":[{"key":"e_1_3_2_2_1_1","first-page":"130","volume-title":"Spoofing and countermeasures for speaker verification: A survey,\" speech communication","author":"Wu Z.","year":"2015","unstructured":"Z. Wu , N. Evans , T. Kinnunen , J. Yamagishi , F. Alegre , and H. Li , \" Spoofing and countermeasures for speaker verification: A survey,\" speech communication , vol. 66 , pp. 130 -- 153 , 2015 . Z. Wu, N. Evans, T. Kinnunen, J. Yamagishi, F. Alegre, and H. Li, \"Spoofing and countermeasures for speaker verification: A survey,\" speech communication, vol. 66, pp. 130--153, 2015."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"crossref","first-page":"2037","DOI":"10.21437\/Interspeech.2015-462","article-title":"ASVspoof 2015: the First Automatic Speaker Verification Spoofing and Countermeasures Challenge","author":"Wu Z.","year":"2015","unstructured":"Z. Wu , T. Kinnunen , N. Evans , J. Yamagishi , C. Hanil\u00e7i , M. Sahidullah , and A. Sizov , \" ASVspoof 2015: the First Automatic Speaker Verification Spoofing and Countermeasures Challenge ,\" in Proc. Interspeech , 2015 , pp. 2037 -- 2041 . Z. Wu, T. Kinnunen, N. Evans, J. Yamagishi, C. Hanil\u00e7i, M. Sahidullah, and A. Sizov, \"ASVspoof 2015: the First Automatic Speaker Verification Spoofing and Countermeasures Challenge,\" in Proc. Interspeech, 2015, pp. 2037--2041.","journal-title":"Proc. Interspeech"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1111"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBIOM.2021.3059479"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.21437\/ASVSPOOF.2021-8"},{"key":"e_1_3_2_2_6_1","unstructured":"\"Add 2022: The first audio deep synthesis detection challenge \" http:\/\/ addchallenge.cn. \"Add 2022: The first audio deep synthesis detection challenge \" http:\/\/ addchallenge.cn."},{"issue":"5","key":"e_1_3_2_2_7_1","first-page":"6","article-title":"Framewise phoneme classification with bidirectional lstm and other neural network architectures","volume":"18","author":"Graves A.","year":"2005","unstructured":"A. Graves and J. Schmidhuber , \" Framewise phoneme classification with bidirectional lstm and other neural network architectures ,\" Neural Networks , vol. 18 , no. 5 -- 6 , pp. 602--610, 2005 . A. Graves and J. Schmidhuber, \"Framewise phoneme classification with bidirectional lstm and other neural network architectures,\" Neural Networks, vol. 18, no. 5--6, pp. 602--610, 2005.","journal-title":"Neural Networks"},{"key":"e_1_3_2_2_8_1","volume-title":"Aishell-3: A multi-speaker mandarin tts corpus and the baselines","author":"Shi Y.","year":"2020","unstructured":"Y. Shi , H. Bu , X. Xu , S. Zhang , and M. Li , \" Aishell-3: A multi-speaker mandarin tts corpus and the baselines ,\" 2020 . Y. Shi, H. Bu, X. Xu, S. Zhang, and M. Li, \"Aishell-3: A multi-speaker mandarin tts corpus and the baselines,\" 2020."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"crossref","first-page":"4264","DOI":"10.21437\/Interspeech.2021-738","article-title":"An Initial Investigation for Detecting Partially Spoofed Audio","author":"Zhang L.","year":"2021","unstructured":"L. Zhang , X. Wang , E. Cooper , J. Yamagishi , J. Patino , and N. Evans , \" An Initial Investigation for Detecting Partially Spoofed Audio ,\" in Proc. Interspeech , 2021 , pp. 4264 -- 4268 . L. Zhang, X. Wang, E. Cooper, J. Yamagishi, J. Patino, and N. Evans, \"An Initial Investigation for Detecting Partially Spoofed Audio,\" in Proc. Interspeech, 2021, pp. 4264--4268.","journal-title":"Proc. Interspeech"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"crossref","first-page":"1654","DOI":"10.21437\/Interspeech.2021-930","article-title":"Half-Truth: A Partially Fake Audio Detection Dataset","volume":"2021","author":"Yi J.","year":"2021","unstructured":"J. Yi , Y. Bai , J. Tao , H. Ma , Z. Tian , C. Wang , T. Wang , and R. Fu , \" Half-Truth: A Partially Fake Audio Detection Dataset ,\" in Proc. Interspeech 2021 , 2021 , pp. 1654 -- 1658 . J. Yi, Y. Bai, J. Tao, H. Ma, Z. Tian, C. Wang, T. Wang, and R. Fu, \"Half-Truth: A Partially Fake Audio Detection Dataset,\" in Proc. Interspeech 2021, 2021, pp. 1654--1658.","journal-title":"Proc. Interspeech"},{"key":"e_1_3_2_2_11_1","first-page":"6369","volume-title":"ICASSP 2021","author":"Tak H.","year":"2021","unstructured":"H. Tak , J. Patino , M. Todisco , A. Nautsch , N. Evans , and A. Larcher , \" End-to-end anti-spoofing with rawnet2,\" in Proc . ICASSP 2021 , 2021 , pp. 6369 -- 6373 . H. Tak, J. Patino, M. Todisco, A. Nautsch, N. Evans, and A. Larcher, \"End-to-end anti-spoofing with rawnet2,\" in Proc. ICASSP 2021, 2021, pp. 6369--6373."},{"key":"e_1_3_2_2_12_1","volume-title":"Speaker recognition from raw waveform with sincnet,\" in 2018 IEEE Spoken Language Technology Workshop (SLT)","author":"Ravanelli M.","year":"2019","unstructured":"M. Ravanelli and Y. Bengio , \" Speaker recognition from raw waveform with sincnet,\" in 2018 IEEE Spoken Language Technology Workshop (SLT) , 2019 . M. Ravanelli and Y. Bengio, \"Speaker recognition from raw waveform with sincnet,\" in 2018 IEEE Spoken Language Technology Workshop (SLT), 2019."},{"key":"e_1_3_2_2_13_1","first-page":"6354","volume-title":"ICASSP 2021","author":"Li X.","year":"2021","unstructured":"X. Li , N. Li , C. Weng , X. Liu , D. Su , D. Yu , and H. Meng , \" Replay and synthetic speech detection with res2net architecture,\" in Proc . ICASSP 2021 . IEEE, 2021 , pp. 6354 -- 6358 . X. Li, N. Li, C. Weng, X. Liu, D. Su, D. Yu, and H. Meng, \"Replay and synthetic speech detection with res2net architecture,\" in Proc. ICASSP 2021. IEEE, 2021, pp. 6354--6358."},{"volume-title":"Combining evidence from residual phase and mfcc features for speaker recognition,\" IEEE signal processing letters","author":"Murty K. S. R.","key":"e_1_3_2_2_14_1","unstructured":"K. S. R. Murty and B. Yegnanarayana , \" Combining evidence from residual phase and mfcc features for speaker recognition,\" IEEE signal processing letters , vol. 13 , no. 1, pp. 52--55, 2005. K. S. R. Murty and B. Yegnanarayana, \"Combining evidence from residual phase and mfcc features for speaker recognition,\" IEEE signal processing letters, vol. 13, no. 1, pp. 52--55, 2005."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2020.101161"},{"key":"e_1_3_2_2_16_1","volume-title":"Investigating self-supervised front ends for speech spoofing countermeasures,\" arXiv preprint arXiv:2111.07725","author":"Wang X.","year":"2021","unstructured":"X. Wang and J. Yamagishi , \" Investigating self-supervised front ends for speech spoofing countermeasures,\" arXiv preprint arXiv:2111.07725 , 2021 . X. Wang and J. Yamagishi, \"Investigating self-supervised front ends for speech spoofing countermeasures,\" arXiv preprint arXiv:2111.07725, 2021."},{"key":"e_1_3_2_2_17_1","volume-title":"Wavlm: Large-scale self-supervised pre-training for full stack speech processing,\" arXiv preprint arXiv:2110.13900","author":"Chen S.","year":"2021","unstructured":"S. Chen , C. Wang , Z. Chen , Y. Wu , S. Liu , Z. Chen , J. Li , N. Kanda , T. Yoshioka , X. Xiao , , \" Wavlm: Large-scale self-supervised pre-training for full stack speech processing,\" arXiv preprint arXiv:2110.13900 , 2021 . S. Chen, C. Wang, Z. Chen, Y. Wu, S. Liu, Z. Chen, J. Li, N. Kanda, T. Yoshioka, X. Xiao, et al., \"Wavlm: Large-scale self-supervised pre-training for full stack speech processing,\" arXiv preprint arXiv:2110.13900, 2021."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.21437\/Odyssey.2022-16"},{"key":"e_1_3_2_2_19_1","first-page":"9231","volume-title":"Speech and Signal Processing (ICASSP)","author":"Lv Z.","year":"2022","unstructured":"Z. Lv , S. Zhang , K. Tang , and P. Hu , \" Fake audio detection based on unsupervised pretraining models,\" in ICASSP 2022 - 2022 IEEE International Conference on Acoustics , Speech and Signal Processing (ICASSP) , 2022 , pp. 9231 -- 9235 . Z. Lv, S. Zhang, K. Tang, and P. Hu, \"Fake audio detection based on unsupervised pretraining models,\" in ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2022, pp. 9231--9235."},{"key":"e_1_3_2_2_20_1","volume-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations,\" in Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems","author":"Baevski A.","year":"2020","unstructured":"A. Baevski , Y. Zhou , A. Mohamed , and M. Auli , \" wav2vec 2.0: A framework for self-supervised learning of speech representations,\" in Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020 , NeurIPS 2020, December 6--12, 2020, virtual, H. Larochelle, M. Ranzato, R. Hadsell, M. Balcan, and H. Lin, Eds ., 2020. A. Baevski, Y. Zhou, A. Mohamed, and M. Auli, \"wav2vec 2.0: A framework for self-supervised learning of speech representations,\" in Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6--12, 2020, virtual, H. Larochelle, M. Ranzato, R. Hadsell, M. Balcan, and H. Lin, Eds., 2020."},{"key":"e_1_3_2_2_21_1","volume-title":"Unsupervised cross-lingual representation learning for speech recognition,\" arXiv preprint arXiv:2006.13979","author":"Conneau A.","year":"2020","unstructured":"A. Conneau , A. Baevski , R. Collobert , A. Mohamed , and M. Auli , \" Unsupervised cross-lingual representation learning for speech recognition,\" arXiv preprint arXiv:2006.13979 , 2020 . A. Conneau, A. Baevski, R. Collobert, A. Mohamed, and M. Auli, \"Unsupervised cross-lingual representation learning for speech recognition,\" arXiv preprint arXiv:2006.13979, 2020."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"crossref","first-page":"1194","DOI":"10.21437\/Interspeech.2021-1775","article-title":"SUPERB: Speech Processing Universal PERformance Benchmark","volume":"2021","author":"Yang S.","year":"2021","unstructured":"S. wen Yang , P.-H. Chi , Y.-S. Chuang , C.-I. J. Lai , K. Lakhotia , Y. Y. Lin , A. T. Liu , J. Shi , X. Chang , G.-T. Lin , T.-H. Huang , W.-C. Tseng , K. tik Lee , D.-R. Liu , Z. Huang , S. Dong , S.-W. Li , S. Watanabe , A. Mohamed , and H. yi Lee , \" SUPERB: Speech Processing Universal PERformance Benchmark ,\" in Proc. Interspeech 2021 , 2021 , pp. 1194 -- 1198 . S. wen Yang, P.-H. Chi, Y.-S. Chuang, C.-I. J. Lai, K. Lakhotia, Y. Y. Lin, A. T. Liu, J. Shi, X. Chang, G.-T. Lin, T.-H. Huang, W.-C. Tseng, K. tik Lee, D.-R. Liu, Z. Huang, S. Dong, S.-W. Li, S. Watanabe, A. Mohamed, and H. yi Lee, \"SUPERB: Speech Processing Universal PERformance Benchmark,\" in Proc. Interspeech 2021, 2021, pp. 1194--1198.","journal-title":"Proc. Interspeech"},{"key":"e_1_3_2_2_23_1","volume-title":"An empirical study on channel effects for synthetic voice spoofing countermeasure systems,\" arXiv preprint arXiv:2104.01320","author":"Zhang Y.","year":"2021","unstructured":"Y. Zhang , G. Zhu , F. Jiang , and Z. Duan , \" An empirical study on channel effects for synthetic voice spoofing countermeasure systems,\" arXiv preprint arXiv:2104.01320 , 2021 . Y. Zhang, G. Zhu, F. Jiang, and Z. Duan, \"An empirical study on channel effects for synthetic voice spoofing countermeasure systems,\" arXiv preprint arXiv:2104.01320, 2021."},{"key":"e_1_3_2_2_24_1","volume-title":"Ur channel-robust synthetic speech detection system for asvspoof","author":"Chen X.","year":"2021","unstructured":"X. Chen , Y. Zhang , G. Zhu , and Z. Duan , \" Ur channel-robust synthetic speech detection system for asvspoof 2021 ,\" arXiv preprint arXiv:2107.12018, 2021. X. Chen, Y. Zhang, G. Zhu, and Z. Duan, \"Ur channel-robust synthetic speech detection system for asvspoof 2021,\" arXiv preprint arXiv:2107.12018, 2021."},{"key":"e_1_3_2_2_25_1","volume-title":"ASVspoof 2021 Workshop","author":"Tomilov A.","year":"2021","unstructured":"A. Tomilov , A. Svishchev , M. Volkova , A. Chirkovskiy , A. Kondratev , and G. Lavrentyeva , \" Stc antispoofing systems for the asvspoof2021 challenge,\" in Proc . ASVspoof 2021 Workshop , 2021 . A. Tomilov, A. Svishchev, M. Volkova, A. Chirkovskiy, A. Kondratev, and G. Lavrentyeva, \"Stc antispoofing systems for the asvspoof2021 challenge,\" in Proc. ASVspoof 2021 Workshop, 2021."},{"key":"e_1_3_2_2_26_1","volume-title":"ASVspoof2021 Workshop","author":"C\u00e1ceres J.","year":"2021","unstructured":"J. C\u00e1ceres , R. Font , T. Grau , J. Molina , and B. V. SL , \" The biometric vox system for the asvspoof 2021 challenge,\" in Proc . ASVspoof2021 Workshop , 2021 . J. C\u00e1ceres, R. Font, T. Grau, J. Molina, and B. V. SL, \"The biometric vox system for the asvspoof 2021 challenge,\" in Proc. ASVspoof2021 Workshop, 2021."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.21437\/ASVSPOOF.2021-14"},{"key":"e_1_3_2_2_28_1","first-page":"29","volume-title":"Physical Access and Speech Deepfake Attacks: ASVspoof","author":"Das R. K.","year":"2021","unstructured":"R. K. Das , \"Known-unknown Data Augmentation Strategies for Detection of Logical Access , Physical Access and Speech Deepfake Attacks: ASVspoof 2021 ,\" in Proc. 2021 Edition of the Automatic Speaker Verification and Spoofing Countermeasures Challenge , 2021, pp. 29 -- 36 . R. K. Das, \"Known-unknown Data Augmentation Strategies for Detection of Logical Access, Physical Access and Speech Deepfake Attacks: ASVspoof 2021,\" in Proc. 2021 Edition of the Automatic Speaker Verification and Spoofing Countermeasures Challenge, 2021, pp. 29--36."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.21437\/ASVSPOOF.2021-16"},{"key":"e_1_3_2_2_30_1","first-page":"3693","volume-title":"Speech and Signal Processing (ICASSP). IEEE","author":"Fu Q.","year":"2022","unstructured":"Q. Fu , Z. Teng , J. White , M. E. Powell , and D. C. Schmidt , \" Fastaudio: A learnable audio front-end for spoof speech detection,\" in ICASSP 2022--2022 IEEE International Conference on Acoustics , Speech and Signal Processing (ICASSP). IEEE , 2022 , pp. 3693 -- 3697 . Q. Fu, Z. Teng, J. White, M. E. Powell, and D. C. Schmidt, \"Fastaudio: A learnable audio front-end for spoof speech detection,\" in ICASSP 2022--2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 2022, pp. 3693--3697."},{"key":"e_1_3_2_2_31_1","first-page":"951","article-title":"Adversarial separation network for speaker recognition","author":"Zhang H.","year":"2020","unstructured":"H. Zhang , L. Wang , Y. Zhang , M. Liu , K. A. Lee , and J. Wei , \" Adversarial separation network for speaker recognition .,\" in INTERSPEECH , 2020 , pp. 951 -- 955 . H. Zhang, L. Wang, Y. Zhang, M. Liu, K. A. Lee, and J. Wei, \"Adversarial separation network for speaker recognition.,\" in INTERSPEECH, 2020, pp. 951--955.","journal-title":"INTERSPEECH"},{"key":"e_1_3_2_2_32_1","first-page":"1038","article-title":"The sjtu robust anti-spoofing system for the asvspoof 2019 challenge","author":"Yang Y.","year":"2019","unstructured":"Y. Yang , H. Wang , H. Dinkel , Z. Chen , S. Wang , Y. Qian , and K. Yu , \" The sjtu robust anti-spoofing system for the asvspoof 2019 challenge .,\" in Interspeech , 2019 , pp. 1038 -- 1042 . Y. Yang, H. Wang, H. Dinkel, Z. Chen, S. Wang, Y. Qian, and K. Yu, \"The sjtu robust anti-spoofing system for the asvspoof 2019 challenge.,\" in Interspeech, 2019, pp. 1038--1042.","journal-title":"Interspeech"},{"key":"e_1_3_2_2_33_1","volume-title":"The dku replay detection system for the asvspoof 2019 challenge: On data augmentation, feature representation, classification, and fusion,\" arXiv preprint arXiv:1907.02663","author":"Cai W.","year":"2019","unstructured":"W. Cai , H. Wu , D. Cai , and M. Li , \" The dku replay detection system for the asvspoof 2019 challenge: On data augmentation, feature representation, classification, and fusion,\" arXiv preprint arXiv:1907.02663 , 2019 . W. Cai, H. Wu, D. Cai, and M. Li, \"The dku replay detection system for the asvspoof 2019 challenge: On data augmentation, feature representation, classification, and fusion,\" arXiv preprint arXiv:1907.02663, 2019."},{"key":"e_1_3_2_2_34_1","volume-title":"Improving state-of-the-art in detecting student engagement with resnet and tcn hybrid network,\" arXiv preprint arXiv:2104.10122","author":"Abedi A.","year":"2021","unstructured":"A. Abedi and S. S. Khan , \" Improving state-of-the-art in detecting student engagement with resnet and tcn hybrid network,\" arXiv preprint arXiv:2104.10122 , 2021 . A. Abedi and S. S. Khan, \"Improving state-of-the-art in detecting student engagement with resnet and tcn hybrid network,\" arXiv preprint arXiv:2104.10122, 2021."},{"key":"e_1_3_2_2_35_1","first-page":"7132","volume-title":"Squeeze-and-excitation networks","author":"Hu J.","year":"2018","unstructured":"J. Hu , L. Shen , and G. Sun , \" Squeeze-and-excitation networks ,\" 2018 , pp. 7132 -- 7141 . J. Hu, L. Shen, and G. Sun, \"Squeeze-and-excitation networks,\" 2018, pp. 7132-- 7141."},{"key":"e_1_3_2_2_36_1","volume-title":"Res2net: A new multi-scale backbone architecture,\" IEEE transactions on pattern analysis and machine intelligence","author":"Gao S.","year":"2019","unstructured":"S. Gao , M.-M. Cheng , K. Zhao , X.-Y. Zhang , M.-H. Yang , and P. H. Torr , \" Res2net: A new multi-scale backbone architecture,\" IEEE transactions on pattern analysis and machine intelligence , 2019 . S. Gao, M.-M. Cheng, K. Zhao, X.-Y. Zhang, M.-H. Yang, and P. H. Torr, \"Res2net: A new multi-scale backbone architecture,\" IEEE transactions on pattern analysis and machine intelligence, 2019."},{"key":"e_1_3_2_2_37_1","volume-title":"Adam: A method for stochastic optimization,\" Computer Science","author":"Kingma D.","year":"2014","unstructured":"D. Kingma and J. Ba , \" Adam: A method for stochastic optimization,\" Computer Science , 2014 . D. Kingma and J. Ba, \"Adam: A method for stochastic optimization,\" Computer Science, 2014."},{"key":"e_1_3_2_2_38_1","first-page":"5998","volume-title":"Attention is all you need,\" in Advances in neural information processing systems","author":"Vaswani A.","year":"2017","unstructured":"A. Vaswani , N. Shazeer , N. Parmar , J. Uszkoreit , L. Jones , A. N. Gomez , \". Kaiser, and I. Polosukhin , \" Attention is all you need,\" in Advances in neural information processing systems , 2017 , pp. 5998 -- 6008 . A. Vaswani, N. Shazeer, N. Parmar, J. Uszkoreit, L. Jones, A. N. Gomez, \". Kaiser, and I. Polosukhin, \"Attention is all you need,\" in Advances in neural information processing systems, 2017, pp. 5998--6008."},{"key":"e_1_3_2_2_39_1","volume-title":"Learning phrase representations using rnn encoder-decoder for statistical machine translation,\" Computer Science","author":"Cho K.","year":"2014","unstructured":"K. Cho , B. V. Merrienboer , C. Gulcehre , D. Bahdanau , F. Bougares , H. Schwenk , and Y. Bengio , \" Learning phrase representations using rnn encoder-decoder for statistical machine translation,\" Computer Science , 2014 . K. Cho, B. V. Merrienboer, C. Gulcehre, D. Bahdanau, F. Bougares, H. Schwenk, and Y. Bengio, \"Learning phrase representations using rnn encoder-decoder for statistical machine translation,\" Computer Science, 2014."},{"key":"e_1_3_2_2_40_1","volume-title":"An empirical evaluation of generic convolutional and recurrent networks for sequence modeling,\" arXiv preprint arXiv:1803.01271","author":"Bai S.","year":"2018","unstructured":"S. Bai , J. Z. Kolter , and V. Koltun , \" An empirical evaluation of generic convolutional and recurrent networks for sequence modeling,\" arXiv preprint arXiv:1803.01271 , 2018 . S. Bai, J. Z. Kolter, and V. Koltun, \"An empirical evaluation of generic convolutional and recurrent networks for sequence modeling,\" arXiv preprint arXiv:1803.01271, 2018."},{"issue":"2605","key":"e_1_3_2_2_41_1","first-page":"2579","article-title":"Visualizing data using t-sne","volume":"9","author":"Laurens V. D. M.","year":"2008","unstructured":"V. D. M. Laurens and G. Hinton , \" Visualizing data using t-sne ,\" Journal of Machine Learning Research , vol. 9 , no. 2605 , pp. 2579 -- 2605 , 2008 V. D. M. Laurens and G. Hinton, \"Visualizing data using t-sne,\" Journal of Machine Learning Research, vol. 9, no. 2605, pp. 2579--2605, 2008","journal-title":"Journal of Machine Learning Research"}],"event":{"name":"MM '22: The 30th ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Lisboa Portugal","acronym":"MM '22"},"container-title":["Proceedings of the 1st International Workshop on Deepfake Detection for Audio Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3552466.3556527","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3552466.3556527","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:49:25Z","timestamp":1750182565000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3552466.3556527"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,10]]},"references-count":41,"alternative-id":["10.1145\/3552466.3556527","10.1145\/3552466"],"URL":"https:\/\/doi.org\/10.1145\/3552466.3556527","relation":{},"subject":[],"published":{"date-parts":[[2022,10,10]]},"assertion":[{"value":"2022-10-10","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}