{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T16:05:04Z","timestamp":1775837104255,"version":"3.50.1"},"reference-count":39,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,2]]},"DOI":"10.1109\/slt61566.2024.10832273","type":"proceedings-article","created":{"date-parts":[[2025,1,16]],"date-time":"2025-01-16T18:31:27Z","timestamp":1737052287000},"page":"208-215","source":"Crossref","is-referenced-by-count":1,"title":["Controlling Whisper: Universal Acoustic Adversarial Attacks to Control Multi-Task Automatic Speech Recognition Models"],"prefix":"10.1109","author":[{"given":"Vyas","family":"Raina","sequence":"first","affiliation":[{"name":"University of Cambridge"}]},{"given":"Mark","family":"Gales","sequence":"additional","affiliation":[{"name":"University of Cambridge"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Robust speech recognition via large-scale weak supervision","author":"Radford","year":"2022"},{"key":"ref2","article-title":"SALMONN: Towards generic hearing abilities for large language models","volume-title":"The Twelfth International Conference on Learning Representations","author":"Tang"},{"key":"ref3","article-title":"Crafting adversarial examples for speech paralinguistics applications","volume":"abs\/1711.03280","author":"Gong","year":"2017","journal-title":"CoRR"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.51644\/9781554583201-017"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/icassp48485.2024.10448257"},{"key":"ref6","article-title":"X-llm: Bootstrapping advanced large language models by treating multi-modalities as foreign languages","author":"Chen","year":"2023"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU57964.2023.10389705"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/icassp48485.2024.10445874"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/icassp48485.2024.10447605"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref11","article-title":"New standard for speech recognition and translation from the nvidia nemo canary model","author":"Rastorgueva","year":"2024"},{"key":"ref12","article-title":"Commandersong: A systematic approach for practical adversarial voice recognition","author":"Yuan","year":"2018"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/SPW.2018.00009"},{"key":"ref14","article-title":"ADAGIO: interactive experimentation with adversarial attack and defense for audio","volume":"abs\/1805.11852","author":"Das","year":"2018","journal-title":"CoRR"},{"key":"ref15","article-title":"Imperceptible, robust, and targeted adversarial examples for automatic speech recognition","author":"Qin","year":"2019"},{"key":"ref16","article-title":"Adversarial attacks against automatic speech recognition systems via psychoacoustic hiding","author":"Sch\u00f6nherr","year":"2018"},{"key":"ref17","article-title":"Adversarial attacks against automatic speech recognition systems via psychoacoustic hiding","volume":"abs\/1808.05665","author":"Sch\u00f6nherr","year":"2018"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1353"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3372297.3423348"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1668"},{"key":"ref21","first-page":"2667","article-title":"Devil\u2019s whisper: A general approach for physical adversarial attacks against commercial black-box speech recognition devices","volume-title":"29th USENIX Security Symposium (USENIX Security 20).","author":"Chen"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/GLOBECOM42002.2020.9348184"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01166"},{"key":"ref24","article-title":"Did you hear that? adversarial examples against automatic speech recognition","volume":"abs\/1801.00554","author":"Alzantot","year":"2018","journal-title":"C o R R"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2420"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/SPW.2019.00016"},{"key":"ref27","article-title":"Sirenattack: Generating adversarial audio for end-to-end acoustic systems","author":"Du","year":"2019"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3460120.3485383"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1890"},{"key":"ref30","first-page":"513","article-title":"Hidden voice commands","volume-title":"25th USENIX Security Symposium (USENIX Security 16)","author":"Carlini"},{"key":"ref31","article-title":"Dolphinatack: Inaudible voice commands","volume":"abs\/1708.09537","author":"Zhang","year":"2017","journal-title":"CoRR"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.14722\/ndss.2019.23362"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-1105"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.430"},{"key":"ref35","article-title":"Towards deep learning models resistant to adversarial attacks","author":"Madry","year":"2019"},{"key":"ref36","article-title":"Fleurs: Few-shot learning evaluation of universal representations of speech","author":"Conneau","year":"2022","journal-title":"arXiv preprint arXiv:2205.12446"},{"key":"ref37","doi-asserted-by":"crossref","DOI":"10.3115\/1073083.1073135","article-title":"Bleu: a method for automatic evaluation of machine translation","volume-title":"Annual Meeting of the Association for Computational Linguistics","author":"Papineni"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.213"},{"key":"ref39","article-title":"Language detection library for java","author":"Shuyo","year":"2010"}],"event":{"name":"2024 IEEE Spoken Language Technology Workshop (SLT)","location":"Macao","start":{"date-parts":[[2024,12,2]]},"end":{"date-parts":[[2024,12,5]]}},"container-title":["2024 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10830790\/10830793\/10832273.pdf?arnumber=10832273","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,17]],"date-time":"2025-01-17T08:18:53Z","timestamp":1737101933000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10832273\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,2]]},"references-count":39,"URL":"https:\/\/doi.org\/10.1109\/slt61566.2024.10832273","relation":{},"subject":[],"published":{"date-parts":[[2024,12,2]]}}}