{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T06:12:07Z","timestamp":1758089527359,"version":"3.44.0"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T00:00:00Z","timestamp":1750550400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T00:00:00Z","timestamp":1750550400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100006190","name":"Research and Development","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006190","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,22]]},"DOI":"10.1109\/dac63849.2025.11132579","type":"proceedings-article","created":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T17:35:41Z","timestamp":1757957741000},"page":"1-7","source":"Crossref","is-referenced-by-count":0,"title":["SpecASR: Accelerating LLM-based Automatic Speech Recognition via Speculative Decoding"],"prefix":"10.1109","author":[{"given":"Linye","family":"Wei","sequence":"first","affiliation":[{"name":"Institute for Artificial Intelligence, Peking University,Beijing,China"}]},{"given":"Shuzhang","family":"Zhong","sequence":"additional","affiliation":[{"name":"Institute for Artificial Intelligence, Peking University,Beijing,China"}]},{"given":"Songqiang","family":"Xu","sequence":"additional","affiliation":[{"name":"Institute for Artificial Intelligence, Peking University,Beijing,China"}]},{"given":"Runsheng","family":"Wang","sequence":"additional","affiliation":[{"name":"Peking University,School of Integrated Circuits,Beijing,China"}]},{"given":"Ru","family":"Huang","sequence":"additional","affiliation":[{"name":"Peking University,School of Integrated Circuits,Beijing,China"}]},{"given":"Meng","family":"Li","sequence":"additional","affiliation":[{"name":"Institute for Artificial Intelligence, Peking University,Beijing,China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Gpt-4 technical report","volume-title":"arXiv preprint arXiv:2303.08774","author":"Achiam","year":"2023"},{"key":"ref2","article-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023","journal-title":"arXiv preprint arXiv:2302.13971"},{"key":"ref3","first-page":"50117","article-title":"Toolqa: A dataset for 1 lm question answering with external tools","volume":"36","author":"Zhuang","year":"2023","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref4","article-title":"Rt-2: Vision-languageaction models transfer web knowledge to robotic control","author":"Brohan","year":"2023","journal-title":"arXiv preprint arXiv:2307.15818"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU57964.2023.10389705"},{"key":"ref6","article-title":"Speech reallm-real-time streaming speech recognition with multimodal 1 lms by teaching the flow of time","author":"Seide","year":"2024","journal-title":"arXiv preprint arXiv:2406.09569"},{"key":"ref7","article-title":"Seed-asr: Understanding diverse speech and contexts with 1lm-based speech recognition","author":"Bai","year":"2024","journal-title":"arXiv preprint arXiv:2407.04675"},{"key":"ref8","article-title":"Llama-omni: Seamless speech interaction with large language models","author":"Fang","year":"2024","journal-title":"arXiv preprint arXiv:2409.06666"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10447605"},{"key":"ref10","article-title":"Bestow: Efficient and streamable speech language model with the best of two worlds in gpt and t51","author":"Chen","year":"2024","journal-title":"arXiv preprint arXiv:2406.19954"},{"key":"ref11","article-title":"Accelerating large language model decoding with speculative sampling","author":"Chen","year":"2023","journal-title":"arXiv preprint arXiv:2302.01318"},{"key":"ref12","first-page":"19274","article-title":"Fast inference from transformers via speculative decoding","volume-title":"International Conference on Machine Learning.","author":"Leviathan"},{"key":"ref13","article-title":"Specinfer: Accelerating generative large language model serving with tree-based speculative inference and verification","author":"Miao","year":"2023","journal-title":"arXiv preprint arXiv:2305.09781"},{"key":"ref14","article-title":"Medusa: Simple 1lm inference acceleration framework with multiple decoding heads","author":"Cai","year":"2024","journal-title":"arXiv preprint arXiv:2401.10774"},{"key":"ref15","article-title":"Eagle: Speculative sampling requires rethinking feature uncertainty","author":"Li","year":"2024","journal-title":"arXiv preprint arXiv:2401.15077"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s11280-025-01344-0"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/icassp49660.2025.10888140"},{"key":"ref18","article-title":"Distil-whisper: Robust knowledge distillation via large-scale pseudo labelling","author":"Gandhi","year":"2023","journal-title":"arXiv preprint arXiv:2311.00430"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2020-3015"},{"key":"ref20","first-page":"28492","article-title":"Robust speech recognition via large-scale weak supervision","volume-title":"International conference on machine learning.","author":"Radford"},{"key":"ref21","article-title":"Qwen technical report","volume-title":"arXiv preprint arXiv:2309.16609","author":"Bai","year":"2023"},{"key":"ref22","article-title":"Lora: Low-rank adaptation of large language models","author":"Hu","year":"2021","journal-title":"arXiv preprint arXiv:2106.09685"},{"key":"ref23","article-title":"Propd: Dynamic token tree pruning and generation for 1lm parallel decoding","author":"Zhong","year":"2024","journal-title":"arXiv preprint arXiv:2402.13485"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.422"},{"key":"ref25","article-title":"Sequoia: Scalable, robust, and hardware-aware speculative decoding","author":"Chen","year":"2024","journal-title":"arXiv preprint arXiv:2402.12374"},{"key":"ref26","article-title":"Multi-candidate speculative decoding","author":"Yang","year":"2024","journal-title":"arXiv preprint arXiv:2401.06706"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref28","article-title":"Lrs3-ted: a large-scale dataset for visual speech recognition","author":"Afouras","year":"2018","journal-title":"arXiv preprint arXiv:1809.00496"},{"key":"ref29","article-title":"Parallelspec: Parallel drafter for efficient speculative decoding","author":"Xiao","year":"2024","journal-title":"arXiv preprint arXiv:2410.05589"},{"key":"ref30","article-title":"Online speculative decoding","author":"Liu","year":"2023","journal-title":"arXiv preprint arXiv:2310.07177"},{"key":"ref31","article-title":"Spectr: Fast speculative decoding via optimal transport","volume":"36","author":"Sun","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref32","article-title":"Recursive speculative decoding: Accelerating 11m inference via sampling without replacement","author":"Jeon","year":"2024","journal-title":"arXiv preprint arXiv:2402.14160"},{"key":"ref33","article-title":"Recurrent drafter for fast speculative decoding in large language models","author":"Cheng","year":"2024","journal-title":"arXiv preprint arXiv:2403.09919"},{"issue":"5","key":"ref34","article-title":"Vicuna: An open-source chatbot impressing gpt-4 with 90* chatgpt quality, march 2023","volume":"3","author":"Chiang","year":"2023"},{"key":"ref35","article-title":"An embarrassingly simple approach for 1 lm with strong asr capacity","author":"Ma","year":"2024","journal-title":"arXiv preprint arXiv:2402.08846"}],"event":{"name":"2025 62nd ACM\/IEEE Design Automation Conference (DAC)","start":{"date-parts":[[2025,6,22]]},"location":"San Francisco, CA, USA","end":{"date-parts":[[2025,6,25]]}},"container-title":["2025 62nd ACM\/IEEE Design Automation Conference (DAC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11132383\/11132091\/11132579.pdf?arnumber=11132579","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T05:21:00Z","timestamp":1758000060000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11132579\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,22]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/dac63849.2025.11132579","relation":{},"subject":[],"published":{"date-parts":[[2025,6,22]]}}}