{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T07:48:55Z","timestamp":1768981735438,"version":"3.49.0"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031613814","type":"print"},{"value":"9783031613821","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-61382-1_8","type":"book-chapter","created":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T01:06:11Z","timestamp":1717203971000},"page":"121-133","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Whisper+AASIST for\u00a0DeepFake Audio Detection"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3413-9727","authenticated-orcid":false,"given":"Qian","family":"Luo","sequence":"first","affiliation":[]},{"given":"Kalyani","family":"Vinayagam Sivasundari","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,6,1]]},"reference":[{"key":"8_CR1","unstructured":"Baevski, A., Zhou, Y., Mohamed, A., Auli, M.: wav2vec 2.0: a framework for self-supervised learning of speech representations. In: Advances in Neural Information Processing Systems, vol. 33, pp. 12449\u201312460 (2020)"},{"key":"8_CR2","unstructured":"Brewster, T.: Fraudsters cloned company director\u2019s voice in \\$35 million heist, police find (2021). https:\/\/www.forbes.com\/sites\/thomasbrewster\/2021\/10\/14\/huge-bank-fraud-uses-deep-fake-voice-tech-to-steal-millions. Accessed 30 Sept 2023"},{"key":"8_CR3","doi-asserted-by":"crossref","unstructured":"De\u00a0Leon, P.L., Stewart, B., Yamagishi, J.: Synthetic speech discrimination using pitch pattern statistics derived from image analysis. In: Interspeech, pp. 370\u2013373 (2012)","DOI":"10.21437\/Interspeech.2012-135"},{"key":"8_CR4","doi-asserted-by":"crossref","unstructured":"Ge, W., Panariello, M., Patino, J., Todisco, M., Evans, N.: Partially-connected differentiable architecture search for deepfake and spoofing detection. arXiv preprint arXiv:2104.03123 (2021)","DOI":"10.21437\/Interspeech.2021-1187"},{"key":"8_CR5","doi-asserted-by":"crossref","unstructured":"Jung, J., et al.: AASIST: audio anti-spoofing using integrated spectro-temporal graph attention networks. In: 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), ICASSP 2022, pp. 6367\u20136371. IEEE (2022)","DOI":"10.1109\/ICASSP43922.2022.9747766"},{"key":"8_CR6","doi-asserted-by":"crossref","unstructured":"Jung, J., Kim, S., Shim, H., Kim, J., Yu, H.J.: Improved RawNet with feature map scaling for text-independent speaker verification using raw waveforms. arXiv preprint arXiv:2004.00526 (2020)","DOI":"10.21437\/Interspeech.2020-1011"},{"key":"8_CR7","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"8_CR8","doi-asserted-by":"crossref","unstructured":"Klontz, J.C., Klare, B.F., Klum, S., Jain, A.K., Burge, M.J.: Open source biometric recognition. In: 2013 IEEE Sixth International Conference on Biometrics: Theory, Applications and Systems (BTAS), pp.\u00a01\u20138. IEEE (2013)","DOI":"10.1109\/BTAS.2013.6712754"},{"key":"8_CR9","doi-asserted-by":"crossref","unstructured":"Li, X., et al.: Replay and synthetic speech detection with Res2Net architecture. In: 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), ICASSP 2021, pp. 6354\u20136358. IEEE (2021)","DOI":"10.1109\/ICASSP39728.2021.9413828"},{"key":"8_CR10","doi-asserted-by":"crossref","unstructured":"Liu, X., Liu, M., Wang, L., Lee, K.A., Zhang, H., Dang, J.: Leveraging positional-related local-global dependency for synthetic speech detection. In: 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), ICASSP 2023, pp.\u00a01\u20135. IEEE (2023)","DOI":"10.1109\/ICASSP49357.2023.10096278"},{"key":"8_CR11","doi-asserted-by":"publisher","first-page":"2507","DOI":"10.1109\/TASLP.2023.3285283","volume":"31","author":"X Liu","year":"2023","unstructured":"Liu, X., et al.: ASVspoof 2021: towards spoofed and deepfake speech detection in the wild. IEEE\/ACM Trans. Audio Speech Lang. Process. 31, 2507\u20132522 (2023)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"8_CR12","doi-asserted-by":"crossref","unstructured":"Mart\u00edn-Do\u00f1as, J.M., \u00c1lvarez, A.: The vicomtech audio deepfake detection system based on Wav2vec2 for the 2022 add challenge. In: 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), ICASSP 2022, pp. 9241\u20139245. IEEE (2022)","DOI":"10.1109\/ICASSP43922.2022.9747768"},{"key":"8_CR13","doi-asserted-by":"crossref","unstructured":"M\u00fcller, N.M., Czempin, P., Dieckmann, F., Froghyar, A., B\u00f6ttinger, K.: Does audio deepfake detection generalize? arXiv preprint arXiv:2203.16263 (2022)","DOI":"10.21437\/Interspeech.2022-108"},{"key":"8_CR14","unstructured":"Radford, A., Kim, J.W., Xu, T., Brockman, G., McLeavey, C., Sutskever, I.: Robust speech recognition via large-scale weak supervision. In: International Conference on Machine Learning, pp. 28492\u201328518. PMLR (2023)"},{"key":"8_CR15","unstructured":"Stupp, C.: Fraudsters use AI to mimic CEO\u2019s voice in unusual cybercrime case. Wall Street J. (2019). https:\/\/www.wsj.com\/articles\/fraudsters-use-ai-to-mimic-ceos-voice-in-unusual-cybercrime-case-11567157402"},{"key":"8_CR16","doi-asserted-by":"crossref","unstructured":"Tak, H., Jung, J., Patino, J., Todisco, M., Evans, N.: Graph attention networks for anti-spoofing. arXiv preprint arXiv:2104.03654 (2021)","DOI":"10.21437\/Interspeech.2021-993"},{"key":"8_CR17","doi-asserted-by":"crossref","unstructured":"Tak, H., Kamble, M., Patino, J., Todisco, M., Evans, N.: RawBoost: a raw data boosting and augmentation method applied to automatic speaker verification anti-spoofing. In: 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), ICASSP 2022, pp. 6382\u20136386. IEEE (2022)","DOI":"10.1109\/ICASSP43922.2022.9746213"},{"key":"8_CR18","doi-asserted-by":"crossref","unstructured":"Tak, H., Todisco, M., Wang, X., Jung, J., Yamagishi, J., Evans, N.: Automatic speaker verification spoofing and deepfake detection using Wav2vec 2.0 and data augmentation. arXiv preprint arXiv:2202.12233 (2022)","DOI":"10.21437\/Odyssey.2022-16"},{"key":"8_CR19","doi-asserted-by":"crossref","unstructured":"Todisco, M., et al.: ASVspoof 2019: future horizons in spoofed and fake audio detection. arXiv preprint arXiv:1904.05441 (2019)","DOI":"10.21437\/Interspeech.2019-2249"},{"key":"8_CR20","unstructured":"U.S. Department of Defense, Federal Bureau of Investigation, Cybersecurity and Infrastructure Security Agency: Contextualizing Deepfake Threats to Organizations (2023). https:\/\/media.defense.gov\/2023\/Sep\/12\/2003298925\/-1\/-1\/0\/CSI-DEEPFAKE-THREATS.PDF. Accessed 30 Sept 2023"},{"key":"8_CR21","doi-asserted-by":"publisher","unstructured":"Veaux, C., Yamagishi, J., MacDonald, K.: CSTR VCTK Corpus: English multi-speaker corpus for CSTR voice cloning toolkit [sound] (2017). https:\/\/doi.org\/10.7488\/ds\/1994","DOI":"10.7488\/ds\/1994"},{"key":"8_CR22","doi-asserted-by":"publisher","first-page":"101114","DOI":"10.1016\/j.csl.2020.101114","volume":"64","author":"X Wang","year":"2020","unstructured":"Wang, X., et al.: ASVspoof 2019: a large-scale public database of synthesized, converted and replayed speech. Comput. Speech Lang. 64, 101114 (2020)","journal-title":"Comput. Speech Lang."},{"key":"8_CR23","doi-asserted-by":"crossref","unstructured":"Yamagishi, J., et\u00a0al.: ASVspoof 2021: accelerating progress in spoofed and deepfake speech detection. arXiv preprint arXiv:2109.00537 (2021)","DOI":"10.21437\/ASVSPOOF.2021-8"},{"key":"8_CR24","unstructured":"Yi, J., Wang, C., Tao, J., Zhang, X., Zhang, C.Y., Zhao, Y.: Audio deepfake detection: a survey. arXiv preprint arXiv:2308.14970 (2023)"}],"container-title":["Lecture Notes in Computer Science","HCI for Cybersecurity, Privacy and Trust"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-61382-1_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T01:53:33Z","timestamp":1717206813000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-61382-1_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031613814","9783031613821"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-61382-1_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"1 June 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"HCII","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Human-Computer Interaction","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Washington DC","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 June 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 July 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"hcii2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2024.hci.international\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}