{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T19:24:47Z","timestamp":1764617087930,"version":"3.46.0"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U23B2018"],"award-info":[{"award-number":["U23B2018"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,22]]},"DOI":"10.1109\/apsipaasc65261.2025.11249374","type":"proceedings-article","created":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T18:40:26Z","timestamp":1764355226000},"page":"777-782","source":"Crossref","is-referenced-by-count":0,"title":["Chain-of-Thought Distillation for ASR Error Correction with Multimodal Large Language Models"],"prefix":"10.1109","author":[{"given":"Shaomeng","family":"Yang","sequence":"first","affiliation":[{"name":"Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences,China"}]},{"given":"Jiaming","family":"Luo","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University,China"}]},{"given":"Jinran","family":"Wang","sequence":"additional","affiliation":[{"name":"Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences,China"}]},{"given":"Rongfeng","family":"Su","sequence":"additional","affiliation":[{"name":"Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences,China"}]},{"given":"Yongjie","family":"Zhou","sequence":"additional","affiliation":[{"name":"Shenzhen Kangning Hospital,Shenzhen, Guangdong,China"}]},{"given":"Lan","family":"Wang","sequence":"additional","affiliation":[{"name":"Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences,China"}]},{"given":"Nan","family":"Yan","sequence":"additional","affiliation":[{"name":"Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences,China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"End-to-end Continuous Speech Recognition using Attention-based Recurrent NN: First Results","volume":"abs\/1412.1602","author":"Chorowski","year":"2014","journal-title":"CoRR"},{"key":"ref2","article-title":"Towards End-To-End Speech Recognition with Recurrent Neural Networks","volume-title":"International Conference on Machine Learning","author":"Graves","year":"2014"},{"key":"ref3","first-page":"173","article-title":"Deep speech 2: end-to-end speech recognition in English and mandarin","volume-title":"Proceedings of the 33rd International Conference on International Conference on Machine Learning-Volume 48, in ICML\u201916","author":"Amodei","year":"2016"},{"key":"ref4","article-title":"Attention-Based Models for Speech Recognition","volume-title":"Advances in Neural Information Processing Systems","author":"Chorowski","year":"2015"},{"key":"ref5","article-title":"Unsupervised speech recognition","volume-title":"Proceedings of the 35th International Conference on Neural Information Processing Systems, in NIPS\u201921","author":"Baevski","year":"2021"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-39593-2_7"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2004-228"},{"key":"ref8","volume":"abs\/2401.10446","author":"Hu","year":"2024","journal-title":"Large Language Models are Efficient Learners of Noise-Robust Speech Recognition"},{"key":"ref9","article-title":"It\u2019s Never Too Late: Fusing Acoustic Information into Large Language Models for Automatic Speech Recognition","volume-title":"The Twelfth International Conference on Learning Representations, ICLR 2024, Vienna, Austria, May 711, 2024","author":"Chen","year":"2024"},{"key":"ref10","article-title":"HyPoradise: an open baseline for generative speech recognition with large language models","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems, in NIPS \u201923","author":"Chen","year":"2023"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.240"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-739"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053051"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-368"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-1605"},{"key":"ref16","article-title":"ASR-EC Benchmark: Evaluating Large Language Models on Chinese ASR Error Correction","author":"Wei","year":"2024","journal-title":"ArXiv Prepr"},{"key":"ref17","article-title":"Distilling the Knowledge in a Neural Network","volume-title":"NIPS Deep Learning and Representation Learning Workshop","author":"Hinton","year":"2015"},{"key":"ref18","article-title":"FitNets: Hints for Thin Deep Nets","volume":"abs\/1412.6550","author":"Romero","year":"2014","journal-title":"CoRR"},{"key":"ref19","article-title":"DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter","volume":"abs\/1910.01108","author":"Sanh","year":"2019","journal-title":"CoRR"},{"key":"ref20","article-title":"MINILM: deep self-attention distillation for task-agnostic compression of pre-trained transformers","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems, in NIPS \u201920","author":"Wang","year":"2020"},{"key":"ref21","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems, in NIPS \u201922","author":"Wei","year":"2022"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.507"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-1428"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-9996"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461404"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053159"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-1513"},{"key":"ref28","volume":"abs\/2412.15115","author":"Yang","year":"2024","journal-title":"Qwen2.5 Technical Report"},{"key":"ref29","volume":"abs\/2407.10759","author":"Chu","year":"2024","journal-title":"Qwen2-Audio Technical Report"},{"journal-title":"Robust Speech Recognition via Large-Scale Weak Supervision","year":"2022","author":"Radford","key":"ref30"},{"key":"ref31","volume":"abs\/2106.09685","author":"Hu","year":"2021","journal-title":"LoRA: Low-Rank Adaptation of Large Language Models"}],"event":{"name":"2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","start":{"date-parts":[[2025,10,22]]},"location":"Singapore, Singapore","end":{"date-parts":[[2025,10,24]]}},"container-title":["2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11248853\/11248968\/11249374.pdf?arnumber=11249374","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T18:22:51Z","timestamp":1764613371000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11249374\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,22]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/apsipaasc65261.2025.11249374","relation":{},"subject":[],"published":{"date-parts":[[2025,10,22]]}}}