{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:12:48Z","timestamp":1750219968700,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,11,6]],"date-time":"2022-11-06T00:00:00Z","timestamp":1667692800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Singtel Cognitive and Artificial Intelligence Lab for Enterprises","award":["2019-1081"],"award-info":[{"award-number":["2019-1081"]}]},{"name":"The Chinese University of Hong Kong - Research Committee - Direct Grants","award":["178921043"],"award-info":[{"award-number":["178921043"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,11,6]]},"DOI":"10.1145\/3560905.3568500","type":"proceedings-article","created":{"date-parts":[[2023,1,24]],"date-time":"2023-01-24T23:37:10Z","timestamp":1674603430000},"page":"61-74","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Telesonar"],"prefix":"10.1145","author":[{"given":"Zhenyu","family":"Yan","sequence":"first","affiliation":[{"name":"The Chinese University of Hong Kong, Hong Kong SAR, China"}]},{"given":"Rui","family":"Tan","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore"}]},{"given":"Qun","family":"Song","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore"}]},{"given":"Chris Xiaoxuan","family":"Lu","sequence":"additional","affiliation":[{"name":"University of Edinburgh, United Kingdom"}]}],"member":"320","published-online":{"date-parts":[[2023,1,24]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2018. The Google Assistant can help you get things done over the phone - YouTube. https:\/\/www.youtube.com\/watch?v=-qCanuYrR0g."},{"key":"e_1_3_2_1_2_1","unstructured":"2018. Voice Phishing Scams Are Getting More Clever. https:\/\/krebsonsecurity.com\/2018\/10\/voice-phishing-scams-are-getting-more-clever\/comment-page-4\/."},{"key":"e_1_3_2_1_3_1","unstructured":"2019. Deep Residual Neural Networks for Audio Spoofing Detection. https:\/\/github.com\/nesl\/asvspoof2019."},{"key":"e_1_3_2_1_4_1","unstructured":"2020. InsightLake. http:\/\/www.insightlake.com\/call-fraud.html."},{"key":"e_1_3_2_1_5_1","unstructured":"2020. Pindrop. https:\/\/www.pindrop.com\/."},{"key":"e_1_3_2_1_6_1","unstructured":"2020. Voice Biometrics Group. https:\/\/www.voicebiogroup.com\/starting\/detect-fraud-in-your-contact-center.html."},{"key":"e_1_3_2_1_7_1","unstructured":"2021. Automatic Speaker Verification - Spoofing and Countermeasures Challenge (ASVspoof). https:\/\/www.asvspoof.org\/."},{"key":"e_1_3_2_1_8_1","unstructured":"2021. Google AI Blog: Google Duplex. https:\/\/ai.googleblog.com\/2018\/05\/duplex-ai-system-for-natural-conversation.html."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Vijay A. Balasubramaniyan Aamir Poonawalla Mustaque Ahamad Michael T. Hunter and Patrick Traynor. 2010. PinDr0p: Using Single-Ended Audio Features To Determine Call Provenance. In CCS. ACM 109.","DOI":"10.1145\/1866307.1866320"},{"key":"e_1_3_2_1_10_1","unstructured":"Tom B Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. arXiv preprint arXiv:2005.14165 (2020)."},{"volume-title":"CALLHOME American English Speech LDC97S42","author":"Canavan Alexandra","key":"e_1_3_2_1_11_1","unstructured":"Alexandra Canavan, David Graff, and George Zipperlen. 1997. CALLHOME American English Speech LDC97S42."},{"key":"e_1_3_2_1_12_1","unstructured":"N. Carlini P. Mishra T. Vaidya Y. Zhang M. Sherr C. Shields D. Wagner and W. Zhou. 2016. Hidden Voice Commands. In Security. Usenix."},{"key":"e_1_3_2_1_13_1","unstructured":"Federal Trade Commission. 2020. Robocalls. https:\/\/www.consumer.ftc.gov\/features\/feature-0025-robocalls."},{"key":"e_1_3_2_1_14_1","volume-title":"National Do Not Call Registry Data Book for Fiscal Year","author":"Federal Trade Commission","year":"2021","unstructured":"Federal Trade Commission. 2021. National Do Not Call Registry Data Book for Fiscal Year 2021. https:\/\/www.ftc.gov\/reports\/national-do-not-call-registry-data-book-fiscal-year-2021."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2012.2201472"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-66429-3_9"},{"key":"e_1_3_2_1_17_1","unstructured":"J. Engel K. Agrawal S. Chen I. Gulrajani C. Donahue and A. Roberts. 2019. GANSynth: Adversarial Neural Audio Synthesis. In ICLR."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"S. Fleming M. Thompson R. Stevens C. Heneghan A. Pl\u00fcddemann et al. 2011. Normal ranges of heart rate and respiratory rate in children from birth to 18 years of age: a systematic review of observational studies. The Lancet 377 9770 (2011) 1011--1018.","DOI":"10.1016\/S0140-6736(10)62226-X"},{"key":"e_1_3_2_1_19_1","volume-title":"Explaining and harnessing adversarial examples. arXiv preprint arXiv:1412.6572","author":"Goodfellow Ian J","year":"2014","unstructured":"Ian J Goodfellow, Jonathon Shlens, and Christian Szegedy. 2014. Explaining and harnessing adversarial examples. arXiv preprint arXiv:1412.6572 (2014)."},{"volume-title":"Acoustic echo and noise control: a practical approach","author":"H\u00e4nsler Eberhard","key":"e_1_3_2_1_20_1","unstructured":"Eberhard H\u00e4nsler and Gerhard Schmidt. 2005. Acoustic echo and noise control: a practical approach. Vol. 40. John Wiley & Sons."},{"key":"e_1_3_2_1_21_1","volume-title":"One-Way transmission time","author":"ITUT ITU-T.","year":"2003","unstructured":"ITUT ITU-T. 2003. Recommendation G. 114. One-Way transmission time (2003)."},{"volume-title":"Bot Speech Classification","author":"Lieto Alessandro","key":"e_1_3_2_1_22_1","unstructured":"Alessandro Lieto, Daniele Moro, Francesco Devoti, Claudia Parera, Vincenzo Lipari, Paolo Bestagini, and Stefano Tubaro. 2019. \"Hello? Who Am I Talking to?\" A Shallow CNN Approach for Human vs. Bot Speech Classification. In ICASSP. IEEE, 2577--2581."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Nicolas Papernot Patrick McDaniel Ian Goodfellow Somesh Jha Z Berkay Celik and Ananthram Swami. 2017. Practical black-box attacks against machine learning. In Asia CCS. ACM 506--519.","DOI":"10.1145\/3052973.3053009"},{"key":"e_1_3_2_1_24_1","volume-title":"Algorithms to measure audio programme loudness and true-peak audio level","author":"Recommendation ITU","year":"2015","unstructured":"ITU Recommendation. 2015. ITU-R BS. 1770-4. Algorithms to measure audio programme loudness and true-peak audio level (2015)."},{"volume-title":"FoR: A Dataset for Synthetic Speech Detection","author":"Reimao Ricardo","key":"e_1_3_2_1_25_1","unstructured":"Ricardo Reimao and Vassilios Tzerpos. 2019. FoR: A Dataset for Synthetic Speech Detection. In IEEE SpeD. 1--10."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"N. Roy H. Hassanieh and R. Roy Choudhury. 2017. BackDoor: Making Microphones Hear Inaudible Sounds. In MobiSys. ACM.","DOI":"10.1145\/3081333.3081366"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Md Sahidullah Tomi Kinnunen and Cemal Hanil\u00e7i. 2015. A Comparison of Features for Synthetic Speech Detection. In Interspeech.","DOI":"10.21437\/Interspeech.2015-472"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.21437\/Eurospeech.2001-239"},{"key":"e_1_3_2_1_29_1","volume-title":"Pyroomacoustics: A python package for audio room simulation and array processing algorithms","author":"Scheibler Robin","year":"2018","unstructured":"Robin Scheibler, Eric Bezzam, and Ivan Dokmani\u0107. 2018. Pyroomacoustics: A python package for audio room simulation and array processing algorithms. In ICASSP. IEEE."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/89.661472"},{"key":"e_1_3_2_1_31_1","volume-title":"711: Pulse Code Modulation (PCM) of voice frequencies. ITU-T Recommendation G 711","author":"Switzerland ITUT","year":"1988","unstructured":"ITUT Switzerland. 1988. G. 711: Pulse Code Modulation (PCM) of voice frequencies. ITU-T Recommendation G 711 (1988)."},{"key":"e_1_3_2_1_32_1","volume-title":"174: Transmission performance objectives for terrestrial digital wireless systems using portable terminals to access the PSTN. ITU-T Recommendation G","author":"Switzerland ITUT","year":"1994","unstructured":"ITUT Switzerland. 1994. G. 174: Transmission performance objectives for terrestrial digital wireless systems using portable terminals to access the PSTN. ITU-T Recommendation G (1994)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"\u00c9. Sz\u00e9kely G.E. Henter J. Beskow and J. Gustafson. 2020. Breathing and Speech Planning in Spontaneous Speech Synthesis. In ICASSP. IEEE.","DOI":"10.1109\/ICASSP40776.2020.9054107"},{"key":"e_1_3_2_1_34_1","unstructured":"The Wall Street Journal. 2019. Fraudsters Used AI to Mimic CEO's Voice in Unusual Cybercrime Case. https:\/\/on.wsj.com\/38Oj4JA."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2017.01.001"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"M. Todisco X. Wang V. Vestman M. Sahidullah H. Delgado A. Nautsch J. Yamagishi N. Evans T. Kinnunen and K.A. Lee. 2019. ASVspoof 2019: Future Horizons in Spoofed and Fake Audio Detection. In Interspeech.","DOI":"10.21437\/Interspeech.2019-2249"},{"key":"e_1_3_2_1_37_1","unstructured":"Truecaller. 2021. 2021 U.S. Spam & Scam Report. https:\/\/truecaller.blog\/2021\/06\/28\/us-spam-scam-report-21\/."},{"key":"e_1_3_2_1_38_1","unstructured":"A\u00e4ron van den Oord Sander Dieleman Heiga Zen Karen Simonyan Oriol Vinyals Alexander Graves Nal Kalchbrenner Andrew Senior and Koray Kavukcuoglu. 2016. WaveNet: A Generative Model for Raw Audio. In Arxiv. https:\/\/arxiv.org\/abs\/1609.03499"},{"key":"e_1_3_2_1_39_1","unstructured":"The Verge. 2020. How to get the most out of Google Pixel's call screening feature. https:\/\/www.theverge.com\/2020\/2\/6\/21122390\/google-assistant-screen-call-robocalls-spam-phone-pixel. (Accessed on 01\/09\/2021)."},{"key":"e_1_3_2_1_40_1","volume-title":"Proc. of ISCA SSW6","author":"Heiga","year":"2007","unstructured":"Heiga ZEN. 2007. The HMM-based speech synthesis system version 2.0. Proc. of ISCA SSW6, Bonn, Germany, Aug. 2007 (2007)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"H. Zen K. Tokuda and A.W. Black. 2009. Statistical parametric speech synthesis. speech communication 51 11 (2009) 1039--1064.","DOI":"10.1016\/j.specom.2009.04.004"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3133956.3134052"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Linghan Zhang Sheng Tan and Jie Yang. 2017. Hearing your voice is not enough: An articulatory gesture based liveness detection for voice authentication. In CCS. ACM.","DOI":"10.1145\/3133956.3133962"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","unstructured":"Linghan Zhang Sheng Tan Jie Yang and Yingying Chen. 2016. VoiceLive. In CCS. ACM.","DOI":"10.1145\/2976749.2978296"}],"event":{"name":"SenSys '22: The 20th ACM Conference on Embedded Networked Sensor Systems","sponsor":["SIGMETRICS ACM Special Interest Group on Measurement and Evaluation","SIGCOMM ACM Special Interest Group on Data Communication","SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing","SIGOPS ACM Special Interest Group on Operating Systems","SIGBED ACM Special Interest Group on Embedded Systems","SIGARCH ACM Special Interest Group on Computer Architecture"],"location":"Boston Massachusetts","acronym":"SenSys '22"},"container-title":["Proceedings of the 20th ACM Conference on Embedded Networked Sensor Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3560905.3568500","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3560905.3568500","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:49:15Z","timestamp":1750182555000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3560905.3568500"}},"subtitle":["Robocall Alarm System by Detecting Echo Channel and Breath Timing"],"short-title":[],"issued":{"date-parts":[[2022,11,6]]},"references-count":44,"alternative-id":["10.1145\/3560905.3568500","10.1145\/3560905"],"URL":"https:\/\/doi.org\/10.1145\/3560905.3568500","relation":{},"subject":[],"published":{"date-parts":[[2022,11,6]]},"assertion":[{"value":"2023-01-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}