{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T23:15:50Z","timestamp":1763507750191,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,12,7]],"date-time":"2020-12-07T00:00:00Z","timestamp":1607299200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100004801","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-1714807, CNS-1526524, CNS-1547350"],"award-info":[{"award-number":["CNS-1714807, CNS-1526524, CNS-1547350"]}],"id":[{"id":"10.13039\/501100004801","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,12,7]]},"DOI":"10.1145\/3427228.3427289","type":"proceedings-article","created":{"date-parts":[[2020,12,9]],"date-time":"2020-12-09T22:20:18Z","timestamp":1607552418000},"page":"870-883","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Voicefox: Leveraging Inbuilt Transcription to Enhance the Security of Machine-Human Speaker Verification against Voice Synthesis Attacks"],"prefix":"10.1145","author":[{"given":"Maliheh","family":"Shirvanian","sequence":"first","affiliation":[{"name":"Visa Research"}]},{"given":"Manar","family":"Mohammed","sequence":"additional","affiliation":[{"name":"Miami University, Egypt"}]},{"given":"Nitesh","family":"Saxena","sequence":"additional","affiliation":[{"name":"The University of Alabama at Birmingham, United States of America"}]},{"given":"S Abhishek","family":"Anand","sequence":"additional","affiliation":[{"name":"Bloomberg LP, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2020,12,8]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n.d.]. A TensorFlow implementation of Baidu\u2019s DeepSpeech architecture. https:\/\/github.com\/mozilla\/DeepSpeech.  [n.d.]. A TensorFlow implementation of Baidu\u2019s DeepSpeech architecture. https:\/\/github.com\/mozilla\/DeepSpeech."},{"key":"e_1_3_2_1_2_1","unstructured":"[n.d.]. Automatic Speaker Verification Spoofing and Countermeasures Challenge. http:\/\/www.asvspoof.org\/.  [n.d.]. Automatic Speaker Verification Spoofing and Countermeasures Challenge. http:\/\/www.asvspoof.org\/."},{"key":"e_1_3_2_1_3_1","unstructured":"[n.d.]. BBC fools HSBC voice recognition security system. https:\/\/www.bbc.com\/news\/technology-39965545.  [n.d.]. BBC fools HSBC voice recognition security system. https:\/\/www.bbc.com\/news\/technology-39965545."},{"key":"e_1_3_2_1_4_1","unstructured":"[n.d.]. Festival. http:\/\/www.cstr.ed.ac.uk\/projects\/festival\/.  [n.d.]. Festival. http:\/\/www.cstr.ed.ac.uk\/projects\/festival\/."},{"key":"e_1_3_2_1_5_1","unstructured":"[n.d.]. Hackers mimicking little kids can fool voice recognition systems. https:\/\/goo.gl\/BNnbkv.  [n.d.]. Hackers mimicking little kids can fool voice recognition systems. https:\/\/goo.gl\/BNnbkv."},{"key":"e_1_3_2_1_6_1","unstructured":"[n.d.]. Nancy Dataset. http:\/\/www.cstr.ed.ac.uk\/projects\/blizzard\/2011\/lessac_blizzard2011\/.  [n.d.]. Nancy Dataset. http:\/\/www.cstr.ed.ac.uk\/projects\/blizzard\/2011\/lessac_blizzard2011\/."},{"key":"e_1_3_2_1_7_1","unstructured":"[n.d.]. SampleRNN: An unconditional end-to-end neural audio generation model. https:\/\/arxiv.org\/pdf\/1612.07837.pdf.  [n.d.]. SampleRNN: An unconditional end-to-end neural audio generation model. https:\/\/arxiv.org\/pdf\/1612.07837.pdf."},{"key":"e_1_3_2_1_8_1","unstructured":"[n.d.]. Silent Circle \u2013 Private Communications. https:\/\/silentcircle.com\/.  [n.d.]. Silent Circle \u2013 Private Communications. https:\/\/silentcircle.com\/."},{"key":"e_1_3_2_1_9_1","unstructured":"[n.d.]. Speech API - Speech Recognition | Google Cloud Platform. https:\/\/cloud.google.com\/speech\/.  [n.d.]. Speech API - Speech Recognition | Google Cloud Platform. https:\/\/cloud.google.com\/speech\/."},{"key":"e_1_3_2_1_10_1","unstructured":"[n.d.]. Speech to Text | IBM Watson Developer Cloud. www.ibm.com\/watson\/developercloud\/speech-to-text.html.  [n.d.]. Speech to Text | IBM Watson Developer Cloud. www.ibm.com\/watson\/developercloud\/speech-to-text.html."},{"key":"e_1_3_2_1_11_1","unstructured":"[n.d.]. The future of fake news: don\u2019t believe everything you read see or hear. https:\/\/www.theguardian.com\/technology\/2017\/jul\/26\/fake-news-obama-video-trump-face2face-doctored-content.  [n.d.]. The future of fake news: don\u2019t believe everything you read see or hear. https:\/\/www.theguardian.com\/technology\/2017\/jul\/26\/fake-news-obama-video-trump-face2face-doctored-content."},{"key":"e_1_3_2_1_12_1","unstructured":"[n.d.]. Viber Encryption Overview. https:\/\/www.viber.com\/en\/security-overview.  [n.d.]. Viber Encryption Overview. https:\/\/www.viber.com\/en\/security-overview."},{"key":"e_1_3_2_1_13_1","unstructured":"[n.d.]. Voice Banking Faces Threats Before It Even Arrives. https:\/\/bankinnovation.net\/2018\/05\/voice-banking-faces-threats-before-it-even-arrives\/.  [n.d.]. Voice Banking Faces Threats Before It Even Arrives. https:\/\/bankinnovation.net\/2018\/05\/voice-banking-faces-threats-before-it-even-arrives\/."},{"key":"e_1_3_2_1_14_1","unstructured":"[n.d.]. Wickr Secure Messenger. https:\/\/www.wickr.com\/.  [n.d.]. Wickr Secure Messenger. https:\/\/www.wickr.com\/."},{"key":"e_1_3_2_1_15_1","unstructured":"Accessed 05\/06\/2018. TRANSFORM: Flexible Voice Synthesis Through Articulatory Voice Transformation. http:\/\/goo.gl\/ZrRtXG.  Accessed 05\/06\/2018. TRANSFORM: Flexible Voice Synthesis Through Articulatory Voice Transformation. http:\/\/goo.gl\/ZrRtXG."},{"key":"e_1_3_2_1_16_1","unstructured":"Accessed 05\/06\/2018. Amazon Developing Advanced Voice-Recognition for Alexa. https:\/\/goo.gl\/5DzVMd.  Accessed 05\/06\/2018. Amazon Developing Advanced Voice-Recognition for Alexa. https:\/\/goo.gl\/5DzVMd."},{"key":"e_1_3_2_1_17_1","unstructured":"Accessed 05\/06\/2018. AppLock from Sensory Keeps Apps Safe with Face and Voice Biometrics. goo.gl\/BKkWYl.  Accessed 05\/06\/2018. AppLock from Sensory Keeps Apps Safe with Face and Voice Biometrics. goo.gl\/BKkWYl."},{"key":"e_1_3_2_1_18_1","unstructured":"Accessed 05\/06\/2018. Banks turning to voice recognition. https:\/\/goo.gl\/bVTm4J.  Accessed 05\/06\/2018. Banks turning to voice recognition. https:\/\/goo.gl\/bVTm4J."},{"key":"e_1_3_2_1_19_1","unstructured":"Accessed 05\/06\/2018. Barclays rolls out voice biometrics for phone banking. https:\/\/goo.gl\/rxjVSs.  Accessed 05\/06\/2018. Barclays rolls out voice biometrics for phone banking. https:\/\/goo.gl\/rxjVSs."},{"key":"e_1_3_2_1_20_1","unstructured":"Accessed 05\/06\/2018. Copy the voice of anyone. https:\/\/lyrebird.ai\/.  Accessed 05\/06\/2018. Copy the voice of anyone. https:\/\/lyrebird.ai\/."},{"key":"e_1_3_2_1_21_1","unstructured":"Accessed 05\/06\/2018. Google Home now recognizes your individual voice. https:\/\/goo.gl\/ohBQBW.  Accessed 05\/06\/2018. Google Home now recognizes your individual voice. https:\/\/goo.gl\/ohBQBW."},{"key":"e_1_3_2_1_22_1","unstructured":"Accessed 05\/06\/2018. HSBC rolls out voice and touch ID security for bank customers. https:\/\/goo.gl\/TR5FyJ.  Accessed 05\/06\/2018. HSBC rolls out voice and touch ID security for bank customers. https:\/\/goo.gl\/TR5FyJ."},{"key":"e_1_3_2_1_23_1","unstructured":"Accessed 05\/06\/2018. More banks turn to biometrics to keep an eye on security. goo.gl\/JcM5wo.  Accessed 05\/06\/2018. More banks turn to biometrics to keep an eye on security. goo.gl\/JcM5wo."},{"key":"e_1_3_2_1_24_1","unstructured":"Accessed 05\/06\/2018. Nuance VocalPassword: Liveness Detection using Voice Biometrics. http:\/\/www.nuance.es\/ucmprod\/groups\/enterprise\/@web-enus\/documents\/collateral\/nc_015226.pdf.  Accessed 05\/06\/2018. Nuance VocalPassword: Liveness Detection using Voice Biometrics. http:\/\/www.nuance.es\/ucmprod\/groups\/enterprise\/@web-enus\/documents\/collateral\/nc_015226.pdf."},{"key":"e_1_3_2_1_25_1","unstructured":"Accessed 05\/06\/2018. Set up your device for automatic unlock. https:\/\/goo.gl\/iXNpJK.  Accessed 05\/06\/2018. Set up your device for automatic unlock. https:\/\/goo.gl\/iXNpJK."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Hadi Abdullah Washington Garcia Christian Peeters Patrick Traynor Kevin\u00a0RB Butler and Joseph Wilson. 2019. Practical hidden voice attacks against speech and speaker recognition systems. arXiv preprint arXiv:1904.05734(2019).  Hadi Abdullah Washington Garcia Christian Peeters Patrick Traynor Kevin\u00a0RB Butler and Joseph Wilson. 2019. Practical hidden voice attacks against speech and speaker recognition systems. arXiv preprint arXiv:1904.05734(2019).","DOI":"10.14722\/ndss.2019.23362"},{"volume-title":"Signal Processing Conference (EUSIPCO), 2012 Proceedings of the 20th European.","author":"Alegre F.","key":"e_1_3_2_1_27_1"},{"volume-title":"O\u2019Reilly Media","author":"Bird Steven","key":"e_1_3_2_1_28_1"},{"volume-title":"FONETIK","year":"2004","author":"Blomberg Mats","key":"e_1_3_2_1_29_1"},{"volume-title":"USENIX Security Symposium. 513\u2013530","year":"2016","author":"Carlini Nicholas","key":"e_1_3_2_1_30_1"},{"key":"e_1_3_2_1_31_1","unstructured":"Guangke Chen Sen Chen Lingling Fan Xiaoning Du Zhe Zhao Fu Song and Yang Liu. 2019. Who is Real Bob? Adversarial Attacks on Speaker Recognition Systems. arXiv preprint arXiv:1911.01840(2019).  Guangke Chen Sen Chen Lingling Fan Xiaoning Du Zhe Zhao Fu Song and Yang Liu. 2019. Who is Real Bob? Adversarial Attacks on Speaker Recognition Systems. arXiv preprint arXiv:1911.01840(2019)."},{"volume-title":"Proc. Image and Vision Computing(2004)","year":"2004","author":"Chetty Girija","key":"e_1_3_2_1_32_1"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2010.5495413"},{"volume-title":"Handbook of Biometric Anti-Spoofing","author":"Evans Nicholas","key":"e_1_3_2_1_34_1"},{"volume-title":"A speaker independent","author":"Eveno Nicolas","key":"e_1_3_2_1_35_1"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2004.828699"},{"volume-title":"Biometric Consortium Conference, 2006 Biometrics Symposium: Special Session on Research at the.","author":"Chetty G.","key":"e_1_3_2_1_37_1"},{"key":"e_1_3_2_1_38_1","unstructured":"Keith Ito. [n.d.]. Implementation of Tacotron. https:\/\/github.com\/keithito\/tacotron.  Keith Ito. [n.d.]. Implementation of Tacotron. https:\/\/github.com\/keithito\/tacotron."},{"key":"e_1_3_2_1_39_1","unstructured":"Keith Ito. 2017. The LJ Speech Dataset. https:\/\/keithito.com\/LJ-Speech-Dataset\/.  Keith Ito. 2017. The LJ Speech Dataset. https:\/\/keithito.com\/LJ-Speech-Dataset\/."},{"key":"e_1_3_2_1_40_1","unstructured":"Alan W\u00a0Black John\u00a0Kominek. 2003. CMU ARCTIC Databases for Speech Synthesis. http:\/\/festvox.org\/cmu_arctic\/cmu_arctic_report.pdf  Alan W\u00a0Black John\u00a0Kominek. 2003. CMU ARCTIC Databases for Speech Synthesis. http:\/\/festvox.org\/cmu_arctic\/cmu_arctic_report.pdf"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6288895"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462693"},{"volume-title":"Video and Speech Processing, 2004. Proceedings of 2004 International Symposium on.","author":"Lau Yee\u00a0Wah","key":"e_1_3_2_1_43_1"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","unstructured":"Dibya Mukhopadhyay Maliheh Shirvanian and Nitesh Saxena. 2015. All Your Voices Are Belong to Us: Stealing Voices to Fool Humans and Machines. In ESORICS.  Dibya Mukhopadhyay Maliheh Shirvanian and Nitesh Saxena. 2015. All Your Voices Are Belong to Us: Stealing Voices to Fool Humans and Machines. In ESORICS.","DOI":"10.1007\/978-3-319-24177-7_30"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/5.18626"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3133956.3134013"},{"volume-title":"Tacotron: Towards end-to-end speech synthesis. arXiv preprint arXiv:1703.10135(2017).","year":"2017","author":"Wang Yuxuan","key":"e_1_3_2_1_47_1"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"crossref","unstructured":"Zhizheng Wu Nicholas Evans Tomi Kinnunen Junichi Yamagishi Federico Alegre and Haizhou Li. 2015. Spoofing and countermeasures for speaker verification: a survey. Speech Communication 66(2015).  Zhizheng Wu Nicholas Evans Tomi Kinnunen Junichi Yamagishi Federico Alegre and Haizhou Li. 2015. Spoofing and countermeasures for speaker verification: a survey. Speech Communication 66(2015).","DOI":"10.1016\/j.specom.2014.10.005"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPA.2014.7041636"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178810"},{"volume-title":"ASVspoof 2015: the first automatic speaker verification spoofing and countermeasures challenge. Training 10, 15","year":"2015","author":"Wu Zhizheng","key":"e_1_3_2_1_51_1"},{"key":"e_1_3_2_1_52_1","unstructured":"Zhizheng Wu Anthony Larcher Kong-Aik Lee Engsiong Chng Tomi Kinnunen and Haizhou Li. 2013. Vulnerability evaluation of speaker verification under voice conversion spoofing: the effect of text constraints.. In INTERSPEECH. 950\u2013954.  Zhizheng Wu Anthony Larcher Kong-Aik Lee Engsiong Chng Tomi Kinnunen and Haizhou Li. 2013. Vulnerability evaluation of speaker verification under voice conversion spoofing: the effect of text constraints.. In INTERSPEECH. 950\u2013954."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPA.2013.6694344"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3133956.3134052"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/2976749.2978296"}],"event":{"name":"ACSAC '20: Annual Computer Security Applications Conference","acronym":"ACSAC '20","location":"Austin USA"},"container-title":["Annual Computer Security Applications Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3427228.3427289","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3427228.3427289","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:02:25Z","timestamp":1750197745000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3427228.3427289"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,12,7]]},"references-count":55,"alternative-id":["10.1145\/3427228.3427289","10.1145\/3427228"],"URL":"https:\/\/doi.org\/10.1145\/3427228.3427289","relation":{},"subject":[],"published":{"date-parts":[[2020,12,7]]},"assertion":[{"value":"2020-12-08","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}