{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T07:03:38Z","timestamp":1775199818256,"version":"3.50.1"},"reference-count":39,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,6]]},"DOI":"10.1109\/asru65441.2025.11434665","type":"proceedings-article","created":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T19:48:04Z","timestamp":1775159284000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["Evaluation of LLMs in Speech is Often Flawed: Test Set Contamination in Large Language Models for Speech Recognition"],"prefix":"10.1109","author":[{"given":"Yuan","family":"Tseng","sequence":"first","affiliation":[{"name":"AI Center Cambridge, Samsung,United Kingdom"}]},{"given":"Titouan","family":"Parcollet","sequence":"additional","affiliation":[{"name":"AI Center Cambridge, Samsung,United Kingdom"}]},{"given":"Rogier","family":"Van Dalen","sequence":"additional","affiliation":[{"name":"AI Center Cambridge, Samsung,United Kingdom"}]},{"given":"Shucong","family":"Zhang","sequence":"additional","affiliation":[{"name":"AI Center Cambridge, Samsung,United Kingdom"}]},{"given":"Sourav","family":"Bhattacharya","sequence":"additional","affiliation":[{"name":"AI Center Cambridge, Samsung,United Kingdom"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10447605"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-2499"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10890391"},{"key":"ref4","article-title":"Let\u2019s fuse step by step: A generative fusion decoding algorithm with 11 ms for multi-modal text recognition","author":"Hsu","year":"2024","journal-title":"arXiv preprint arXiv:2405.14259"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2024-1346"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU57964.2023.10389705"},{"key":"ref7","article-title":"Can generative large language models perform asr error correction?","author":"Ma","year":"2023","journal-title":"arXiv preprint arXiv:2307.04172"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU57964.2023.10389637"},{"key":"ref9","article-title":"Denoising LM: Pushing the limits of error correction models for speech recognition","author":"Gu","year":"2024","journal-title":"arXiv preprint arXiv:2405.15216"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.722"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-short.18"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.eacl-long.5"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref14","article-title":"Common voice: A massively-multilingual speech corpus","volume-title":"Proceedings of Language Resources and Evaluation Conference","author":"Ardila"},{"key":"ref15","article-title":"The Pile: An 800 GB dataset of diverse text for language modeling","author":"Gao","year":"2020","journal-title":"arXiv preprint arXiv:2101.00027"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2024-1533"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10445874"},{"key":"ref18","article-title":"An embarrassingly simple approach for LLM with strong ASR capacity","author":"Ma","year":"2024","journal-title":"arXiv preprint arXiv:2402.08846"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.263"},{"key":"ref20","article-title":"Efficient streaming 11 m for speech recognition","author":"Jia","year":"2024","journal-title":"arXiv preprint arXiv:2410.03752"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/slt61566.2024.10832154"},{"key":"ref22","article-title":"The bigscience ROOTS corpus: A 1.6 TB composite multilingual dataset","volume-title":"Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks","author":"Lauren\u00e7on"},{"key":"ref23","article-title":"RedPajama: an open dataset for training large language models","author":"Computer","year":"2023"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.840"},{"key":"ref25","article-title":"LLM360 K2: Building a 65B 360-open-source large language model from scratch","author":"Liu","year":"2025","journal-title":"arXiv preprint arXiv:2501.07124"},{"key":"ref26","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023","journal-title":"arXiv preprint arXiv:2307.09288"},{"key":"ref27","article-title":"The Llama 3 herd of models","author":"Grattafiori","year":"2024","journal-title":"arXiv preprint arXiv:2407.21783"},{"key":"ref28","article-title":"LLaMA: Open and efficient foundation language models","author":"Touvron","year":"2023","journal-title":"arXiv preprint arXiv:2302.13971"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781139924801"},{"key":"ref30","article-title":"Benchmarking nonparametric statistical tests","author":"Keller","year":"2005"},{"key":"ref31","article-title":"Confidence intervals for evaluation in machine learning","author":"Ferrer"},{"key":"ref32","first-page":"23972430","article-title":"Pythia: A suite for analyzing large language models across training and scaling","volume-title":"Proceedings of International Conference on Machine Learning","volume":"202","author":"Biderman"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.bigscience-1.9"},{"key":"ref34","article-title":"OPT: Open pre-trained transformer language models","author":"Zhang","year":"2022","journal-title":"arXiv preprint arXiv:2205.01068"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.841"},{"key":"ref36","first-page":"267","article-title":"The secret sharer: Evaluating and testing unintended memorization in neural networks","volume-title":"Proceedings of the 28th USENIX Conference on Security Symposium","author":"Carlini"},{"issue":"333","key":"ref37","first-page":"1","article-title":"Opensource conversational AI with SpeechBrain 1.0","volume":"25","author":"Ravanelli","year":"2024","journal-title":"Journal of Machine Learning Research"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"}],"event":{"name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,12,6]]},"end":{"date-parts":[[2025,12,10]]}},"container-title":["2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11434577\/11433836\/11434665.pdf?arnumber=11434665","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T04:57:55Z","timestamp":1775192275000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11434665\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":39,"URL":"https:\/\/doi.org\/10.1109\/asru65441.2025.11434665","relation":{},"subject":[],"published":{"date-parts":[[2025,12,6]]}}}