{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T02:24:48Z","timestamp":1774578288860,"version":"3.50.1"},"publisher-location":"Cham","reference-count":51,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031377419","type":"print"},{"value":"9783031377426","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-37742-6_21","type":"book-chapter","created":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T06:02:51Z","timestamp":1690869771000},"page":"247-263","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":23,"title":["Combining Automatic Speaker Verification and Prosody Analysis for Synthetic Speech Detection"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0180-1323","authenticated-orcid":false,"given":"Luigi","family":"Attorresi","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5163-3364","authenticated-orcid":false,"given":"Davide","family":"Salvi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8127-9976","authenticated-orcid":false,"given":"Clara","family":"Borrelli","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0406-0222","authenticated-orcid":false,"given":"Paolo","family":"Bestagini","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1990-9869","authenticated-orcid":false,"given":"Stefano","family":"Tubaro","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,8,2]]},"reference":[{"key":"21_CR1","doi-asserted-by":"crossref","unstructured":"Agarwal, S., Farid, H.: Detecting deep-fake videos from aural and oral dynamics. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2021)","DOI":"10.1109\/CVPRW53098.2021.00109"},{"key":"21_CR2","doi-asserted-by":"crossref","unstructured":"Agarwal, S., Farid, H., El-Gaaly, T., Lim, S.N.: Detecting deep-fake videos from appearance and behavior. In: IEEE International Workshop on Information Forensics and Security (WIFS) (2020)","DOI":"10.1109\/WIFS49906.2020.9360904"},{"key":"21_CR3","doi-asserted-by":"crossref","unstructured":"Alzantot, M., Wang, Z., Srivastava, M.B.: Deep residual neural networks for audio spoofing detection. In: Conference of the International Speech Communication Association (INTERSPEECH) (2019)","DOI":"10.21437\/Interspeech.2019-3174"},{"key":"21_CR4","doi-asserted-by":"crossref","unstructured":"Bonettini, N., Cannas, E.D., Mandelli, S., Bondi, L., Bestagini, P., Tubaro, S.: Video face manipulation detection through ensemble of CNNs. In: International Conference on Pattern Recognition (ICPR) (2021)","DOI":"10.1109\/ICPR48806.2021.9412711"},{"issue":"1","key":"21_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13635-021-00116-3","volume":"2021","author":"C Borrelli","year":"2021","unstructured":"Borrelli, C., Bestagini, P., Antonacci, F., Sarti, A., Tubaro, S.: Synthetic speech detection through short-term and long-term prediction traces. EURASIP J. Inf. Secur. 2021(1), 1\u201314 (2021). https:\/\/doi.org\/10.1186\/s13635-021-00116-3","journal-title":"EURASIP J. Inf. Secur."},{"issue":"4","key":"21_CR6","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/s10579-008-9076-6","volume":"42","author":"C Busso","year":"2008","unstructured":"Busso, C., et al.: IEMOCAP: interactive emotional dyadic motion capture database. Lang. Resour. Eval. 42(4), 335\u2013359 (2008)","journal-title":"Lang. Resour. Eval."},{"key":"21_CR7","doi-asserted-by":"crossref","unstructured":"Chen, T., Kumar, A., Nagarsheth, P., Sivaraman, G., Khoury, E.: Generalization of audio deepfake detection. In: Odyssey Speaker and Language Recognition Workshop (2020)","DOI":"10.21437\/Odyssey.2020-19"},{"key":"21_CR8","doi-asserted-by":"crossref","unstructured":"Chugh, K., Gupta, P., Dhall, A., Subramanian, R.: Not made for each other-audio-visual dissonance-based deepfake detection and localization. In: International Conference on Multimedia (ACM) (2020)","DOI":"10.1145\/3394171.3413700"},{"key":"21_CR9","doi-asserted-by":"crossref","unstructured":"Chung, J.S., Nagrani, A., Zisserman, A.: VoxCeleb2: deep speaker recognition. In: Conference of the International Speech Communication Association (INTERSPEECH) (2018)","DOI":"10.21437\/Interspeech.2018-1929"},{"key":"21_CR10","doi-asserted-by":"crossref","unstructured":"Conti, E., et al.: Deepfake speech detection through emotion recognition: a semantic approach. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2022)","DOI":"10.1109\/ICASSP43922.2022.9747186"},{"key":"21_CR11","doi-asserted-by":"crossref","unstructured":"Cozzolino, D., R\u00f6ssler, A., Thies, J., Nie\u00dfner, M., Verdoliva, L.: ID-Reveal: identity-aware deepfake video detection. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2021)","DOI":"10.1109\/ICCV48922.2021.01483"},{"key":"21_CR12","doi-asserted-by":"crossref","unstructured":"Desplanques, B., Thienpondt, J., Demuynck, K.: ECAPA-TDNN: emphasized channel attention, propagation and aggregation in TDNN based speaker verification. In: Conference of the International Speech Communication Association (INTERSPEECH) (2020)","DOI":"10.21437\/Interspeech.2020-2650"},{"key":"21_CR13","unstructured":"Forbes: Deepfakes, revenge porn, and the impact on women. https:\/\/www.forbes.com\/sites\/chenxiwang\/2019\/11\/01\/deepfakes-revenge-porn-and-the-impact-on-women\/?sh=45b66a961f53"},{"key":"21_CR14","unstructured":"Forbes: Fraudsters Cloned Company Director\u2019s Voice In 35\\$ Million Bank Heist, Police Find. https:\/\/www.forbes.com\/sites\/thomasbrewster\/2021\/10\/14\/huge-bank-fraud-uses-deep-fake-voice-tech-to-steal-millions"},{"key":"21_CR15","doi-asserted-by":"crossref","unstructured":"Gao, Y., Vuong, T., Elyasi, M., Bharaj, G., Singh, R.: Generalized spoofing detection inspired from audio generation artifacts. In: Conference of the International Speech Communication Association (INTERSPEECH) (2021)","DOI":"10.21437\/Interspeech.2021-1705"},{"key":"21_CR16","unstructured":"The Guardian: The rise of the deepfake and the threat to democracy. https:\/\/www.theguardian.com\/technology\/ng-interactive\/2019\/jun\/22\/the-rise-of-the-deepfake-and-the-threat-to-democracy"},{"key":"21_CR17","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"21_CR18","doi-asserted-by":"crossref","unstructured":"Hosler, B., et al.: Do deepfakes feel emotions? A semantic approach to detecting deepfakes via emotional inconsistencies. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2021)","DOI":"10.1109\/CVPRW53098.2021.00112"},{"key":"21_CR19","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00745"},{"key":"21_CR20","unstructured":"Ito, K., Johnson, L.: The LJ Speech Dataset (2017). https:\/\/keithito.com\/LJ-Speech-Dataset\/"},{"key":"21_CR21","doi-asserted-by":"crossref","unstructured":"Kamble, M.R., Sailor, H.B., Patil, H.A., Li, H.: Advances in anti-spoofing: from the perspective of ASVspoof challenges. APSIPA Trans. Signal Inf. Process. (2020)","DOI":"10.1017\/ATSIP.2019.21"},{"key":"21_CR22","doi-asserted-by":"crossref","unstructured":"King, S., Karaiskos, V.: The Blizzard challenge 2013. In: Blizzard Challenge Workshop (2013)","DOI":"10.21437\/Blizzard.2013-1"},{"key":"21_CR23","doi-asserted-by":"crossref","unstructured":"Li, Y., Chang, M.C., Lyu, S.: In Ictu Oculi: exposing AI created fake videos by detecting eye blinking. In: IEEE International Workshop on Information Forensics and Security (WIFS) (2018)","DOI":"10.1109\/WIFS.2018.8630787"},{"key":"21_CR24","unstructured":"Li, Y., Lyu, S.: Exposing deepfake videos by detecting face warping artifacts. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)"},{"key":"21_CR25","doi-asserted-by":"crossref","unstructured":"Lieto, A., et al.: \u201cHello? Who Am I Talking to?\u201d A shallow CNN approach for Human vs. Bot speech classification. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2019)","DOI":"10.1109\/ICASSP.2019.8682743"},{"key":"21_CR26","doi-asserted-by":"crossref","unstructured":"Malik, H.: Securing voice-driven interfaces against fake (cloned) audio attacks. In: IEEE Conference on Multimedia Information Processing and Retrieval (MIPR) (2019)","DOI":"10.1109\/MIPR.2019.00104"},{"key":"21_CR27","doi-asserted-by":"crossref","unstructured":"Masood, M., Nawaz, M., Malik, K.M., Javed, A., Irtaza, A.: Deepfakes generation and detection: state-of-the-art, open challenges, countermeasures, and way forward. arXiv preprint arXiv:2103.00484 (2021)","DOI":"10.1007\/s10489-022-03766-z"},{"key":"21_CR28","unstructured":"Mimecast: Why Deepfakes are Revolutionizing the World of Phishing. https:\/\/www.mimecast.com\/blog\/deepfakes-revolutionizing-phishing"},{"key":"21_CR29","doi-asserted-by":"crossref","unstructured":"Nagrani, A., Chung, J.S., Zisserman, A.: VoxCeleb: a large-scale speaker identification dataset. In: Conference of the International Speech Communication Association (INTERSPEECH) (2017)","DOI":"10.21437\/Interspeech.2017-950"},{"key":"21_CR30","unstructured":"NewScientist: Fake faces created by AI look more trustworthy than real people. https:\/\/www.newscientist.com\/article\/2308312-fake-faces-created-by-ai-look-more-trustworthy-than-real-people\/"},{"key":"21_CR31","doi-asserted-by":"crossref","unstructured":"Okabe, K., Koshinaka, T., Shinoda, K.: Attentive statistics pooling for deep speaker embedding. In: Conference of the International Speech Communication Association (INTERSPEECH) (2018)","DOI":"10.21437\/Interspeech.2018-993"},{"key":"21_CR32","doi-asserted-by":"crossref","unstructured":"Panayotov, V., Chen, G., Povey, D., Khudanpur, S.: LibriSpeech: an ASR corpus based on public domain audio books. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2015)","DOI":"10.1109\/ICASSP.2015.7178964"},{"issue":"4","key":"21_CR33","doi-asserted-by":"publisher","first-page":"1099","DOI":"10.1109\/TASL.2006.876123","volume":"14","author":"JF Pitrelli","year":"2006","unstructured":"Pitrelli, J.F., Bakis, R., Eide, E.M., Fernandez, R., Hamza, W., Picheny, M.A.: The IBM expressive text-to-speech synthesis system for American English. IEEE Trans. Audio Speech Lang. Process. 14(4), 1099\u20131108 (2006)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"21_CR34","unstructured":"Ravanelli, M., et al.: SpeechBrain: a general-purpose speech toolkit. arXiv:2106.04624 (2021)"},{"issue":"4","key":"21_CR35","doi-asserted-by":"publisher","first-page":"1311","DOI":"10.1007\/s13347-021-00459-2","volume":"34","author":"A de Ruiter","year":"2021","unstructured":"de Ruiter, A.: The distinct wrong of deepfakes. Philos. Technol. 34(4), 1311\u20131332 (2021)","journal-title":"Philos. Technol."},{"key":"21_CR36","unstructured":"Skerry-Ryan, R., et al.: Towards end-to-end prosody transfer for expressive speech synthesis with tacotron. In: International Conference on Machine Learning (ICML) (2018)"},{"key":"21_CR37","doi-asserted-by":"crossref","unstructured":"Snyder, D., Garcia-Romero, D., Sell, G., McCree, A., Povey, D., Khudanpur, S.: Speaker recognition for multi-speaker conversations using X-vectors. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2019)","DOI":"10.1109\/ICASSP.2019.8683760"},{"key":"21_CR38","doi-asserted-by":"crossref","unstructured":"Snyder, D., Garcia-Romero, D., Sell, G., Povey, D., Khudanpur, S.: X-vectors: robust DNN embeddings for speaker recognition. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2018)","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"21_CR39","unstructured":"SoX Sound eXchange. http:\/\/sox.sourceforge.net"},{"key":"21_CR40","doi-asserted-by":"crossref","unstructured":"Tak, H., Patino, J., Todisco, M., Nautsch, A., Evans, N., Larcher, A.: End-to-end anti-spoofing with RawNet2. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2021)","DOI":"10.1109\/ICASSP39728.2021.9414234"},{"key":"21_CR41","unstructured":"The New York Times: Pennsylvania Woman Accused of Using Deepfake Technology to Harass Cheerleaders. https:\/\/www.nytimes.com\/2021\/03\/14\/us\/raffaela-spone-victory-vipers-deepfake.html"},{"key":"21_CR42","doi-asserted-by":"crossref","unstructured":"Todisco, M., et al.: ASVspoof 2019: future horizons in spoofed and fake audio detection. In: Conference of the International Speech Communication Association (INTERSPEECH) (2019)","DOI":"10.21437\/Interspeech.2019-2249"},{"issue":"5","key":"21_CR43","doi-asserted-by":"publisher","first-page":"910","DOI":"10.1109\/JSTSP.2020.3002101","volume":"14","author":"L Verdoliva","year":"2020","unstructured":"Verdoliva, L.: Media forensics and deepfakes: an overview. IEEE J. Sel. Topics Signal Process. 14(5), 910\u2013932 (2020)","journal-title":"IEEE J. Sel. Topics Signal Process."},{"key":"21_CR44","doi-asserted-by":"crossref","unstructured":"Wang, Y., et al.: Tacotron: towards end-to-end speech synthesis. In: Conference of the International Speech Communication Association (INTERSPEECH) (2017)","DOI":"10.21437\/Interspeech.2017-1452"},{"key":"21_CR45","unstructured":"Wang, Y., et al.: Style tokens: unsupervised style modeling, control and transfer in end-to-end speech synthesis. In: International Conference on Machine Learning (ICML) (2018)"},{"key":"21_CR46","doi-asserted-by":"crossref","unstructured":"Wang, Z.F., Wei, G., He, Q.H.: Channel pattern noise based playback attack detection algorithm for speaker recognition. In: IEEE International Conference on Machine Learning and Cybernetics (ICMLC) (2011)","DOI":"10.1109\/ICMLC.2011.6016982"},{"key":"21_CR47","doi-asserted-by":"crossref","unstructured":"Westerlund, M.: The emergence of deepfake technology: a review. Technol. Innov. Manage. Rev. 9(11) (2019)","DOI":"10.22215\/timreview\/1282"},{"key":"21_CR48","doi-asserted-by":"crossref","unstructured":"Yamagishi, J., et al.: ASVspoof 2021: accelerating progress in spoofed and deepfake speech detection. In: Automatic Speaker Verification and Spoofing Countermeasures Challenge (2021)","DOI":"10.21437\/ASVSPOOF.2021-8"},{"key":"21_CR49","doi-asserted-by":"crossref","unstructured":"Yang, X., Li, Y., Lyu, S.: Exposing deep fakes using inconsistent head poses. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2019)","DOI":"10.1109\/ICASSP.2019.8683164"},{"key":"21_CR50","unstructured":"Zeinali, H., Wang, S., Silnova, A., Mat\u011bjka, P., Plchot, O.: BUT system description to VoxCeleb speaker recognition challenge 2019. In: The VoxCeleb Challenge Workshop (2019)"},{"key":"21_CR51","doi-asserted-by":"crossref","unstructured":"Zhang, X., Karaman, S., Chang, S.F.: Detecting and simulating artifacts in GAN fake images. In: IEEE International Workshop on Information Forensics and Security (WIFS) (2019)","DOI":"10.1109\/WIFS47025.2019.9035107"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition, Computer Vision, and Image Processing. ICPR 2022 International Workshops and Challenges"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-37742-6_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,25]],"date-time":"2024-10-25T12:34:44Z","timestamp":1729859684000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-37742-6_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031377419","9783031377426"],"references-count":51,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-37742-6_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"2 August 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Montr\u00e9al, QC","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Canada","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 August 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 August 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icpr2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iapr.org\/icpr2022","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}