{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T06:12:07Z","timestamp":1758089527871,"version":"3.44.0"},"reference-count":37,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T00:00:00Z","timestamp":1750550400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T00:00:00Z","timestamp":1750550400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002701","name":"Ministry of Education","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002701","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,22]]},"DOI":"10.1109\/dac63849.2025.11132960","type":"proceedings-article","created":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T17:35:41Z","timestamp":1757957741000},"page":"1-7","source":"Crossref","is-referenced-by-count":0,"title":["AASD: Accelerate Inference by Aligning Speculative Decoding in Multimodal Large Language Models"],"prefix":"10.1109","author":[{"given":"Chaoqun","family":"Yang","sequence":"first","affiliation":[{"name":"Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center"}]},{"given":"Ran","family":"Chen","sequence":"additional","affiliation":[{"name":"Peking University,School of Mathematical Sciences,Department of Information and Computational Sciences"}]},{"given":"Muyang","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences,School of Artificial Intelligence"}]},{"given":"Weiguang","family":"Pang","sequence":"additional","affiliation":[{"name":"Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center"}]},{"given":"Yuzhi","family":"Chen","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences,School of Artificial Intelligence"}]},{"given":"Rongtao","family":"Xu","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences,School of Artificial Intelligence"}]},{"given":"Kexue","family":"Fu","sequence":"additional","affiliation":[{"name":"Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center"}]},{"given":"Changwei","family":"Wang","sequence":"additional","affiliation":[{"name":"Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center"}]},{"given":"Longxiang","family":"Gao","sequence":"additional","affiliation":[{"name":"Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center"}]}],"member":"263","reference":[{"article-title":"Redpajama: An open source recipe to reproduce llama training dataset","year":"2023","author":"Computer","key":"ref1"},{"key":"ref2","article-title":"Glide with a cape: A low-hassle method to accelerate speculative decoding","author":"Du","year":"2024","journal-title":"arXiv preprint arXiv:2402.02082"},{"key":"ref3","article-title":"On speculative decoding for multimodal large language models","author":"Gagrani","year":"2024","journal-title":"arXiv preprint arXiv:2404.08856"},{"key":"ref4","article-title":"Accelerating pre-training of multimodal 1 lms via chain-of-sight","author":"Huang","year":"2024","journal-title":"arXiv preprint arXiv:2407.15819"},{"key":"ref5","article-title":"Efficient multimodal large language models: A survey","author":"Jin","year":"2024","journal-title":"arXiv preprint arXiv:2405.10739"},{"key":"ref6","article-title":"Scaling laws for neural language models","author":"Kaplan","year":"2020","journal-title":"arXiv preprint arXiv:2001.08361"},{"key":"ref7","article-title":"A comprehensive survey of accelerated generation techniques in large language models","author":"Khoshnoodi","year":"2024","journal-title":"arXiv preprint arXiv:2405.13019"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1139"},{"key":"ref9","article-title":"Openassistant conversations-democratizing large language model alignment","volume":"36","author":"K\u00c3\u00b6pf","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"year":"2023","key":"ref10","article-title":"Oig-small-chip"},{"key":"ref11","first-page":"4664","article-title":"Binaryvit: pushing binary vision transformers towards convolutional models","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Charles Le"},{"key":"ref12","article-title":"Characterizing and efficiently accelerating multimodal generation model inference","author":"Lee","year":"2024","journal-title":"arXiv preprint arXiv:2410.00215"},{"key":"ref13","first-page":"19274","article-title":"Fast inference from transformers via speculative decoding","volume-title":"International Conference on Machine Learning","author":"Leviathan"},{"key":"ref14","article-title":"Moe-llava: Mixture of experts for large vision-language models","author":"Lin","year":"2024","journal-title":"arXiv preprint arXiv:2401.15947"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i5.32567"},{"key":"ref17","article-title":"Visual instruction tuning","volume":"36","author":"Liu","year":"2024","journal-title":"Advances in neural information processing systems"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01946"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72643-9_4"},{"key":"ref20","article-title":"Deepseek-vl: towards real-world vision-language understanding","author":"Lu","year":"2024","journal-title":"arXiv preprint arXiv:2403.05525"},{"key":"ref21","first-page":"2507","article-title":"Learn to explain: Multimodal reasoning via thought chains for science question answering","volume":"35","author":"Lu","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref22","article-title":"Vl-mamba: Exploring state space models for multimodal learning","author":"Qiao","year":"2024","journal-title":"arXiv preprint arXiv:2403.13600"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01627"},{"year":"2024","key":"ref24","article-title":"Karmavlm"},{"key":"ref25","article-title":"Cait: Triple-win compression towards high accuracy, fast inference, and favorable transferability for vits","author":"Wang","year":"2023","journal-title":"arXiv preprint arXiv:2309.15755"},{"key":"ref26","article-title":"Emergent abilities of large language models","author":"Wei","year":"2022","journal-title":"arXiv preprint arXiv:2206.07682"},{"key":"ref27","first-page":"2482424837","article-title":"Chain-ofthought prompting elicits reasoning in large language models","volume":"35","author":"Wei","year":"2022","journal-title":"Advances in neural information processing systems"},{"key":"ref28","article-title":"Efficient vision-language models by summarizing visual tokens into compact registers","author":"Wen","year":"2024","journal-title":"arXiv preprint arXiv:2410.14072"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.456"},{"key":"ref30","article-title":"Llava-uhd: an lmm perceiving any aspect ratio and high-resolution images","author":"Xu","year":"2024","journal-title":"arXiv preprint arXiv:2403.11703"},{"key":"ref31","article-title":"Saurabh Agarwal, and Shivaram Venkataraman. Decoding speculative decoding","author":"Yan","year":"2024","journal-title":"arXiv preprint arXiv:2402.01528"},{"key":"ref32","article-title":"Multi-candidate speculative decoding","author":"Yang","year":"2024","journal-title":"arXiv preprint arXiv:2401.06706"},{"key":"ref33","article-title":"Minicpm-v: A gpt-4v level mllm on your phone","author":"Yao","year":"2024","journal-title":"arXiv preprint arXiv:2408.01800"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1093\/nsr\/nwae403"},{"key":"ref35","article-title":"Balancing performance and efficiency: A multimodal large language model pruning method based image text interaction","author":"Yu","year":"2024","journal-title":"arXiv preprint arXiv:2409.01162"},{"key":"ref36","article-title":"Beyond the speculative game: A survey of speculative execution in large language models","author":"Zhang","year":"2024","journal-title":"arXiv preprint arXiv:2404.14897"},{"key":"ref37","article-title":"Vision transformer pruning","author":"Zhu","year":"2021","journal-title":"arXiv preprint arXiv:2104.08500"}],"event":{"name":"2025 62nd ACM\/IEEE Design Automation Conference (DAC)","start":{"date-parts":[[2025,6,22]]},"location":"San Francisco, CA, USA","end":{"date-parts":[[2025,6,25]]}},"container-title":["2025 62nd ACM\/IEEE Design Automation Conference (DAC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11132383\/11132091\/11132960.pdf?arnumber=11132960","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T05:31:51Z","timestamp":1758000711000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11132960\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,22]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/dac63849.2025.11132960","relation":{},"subject":[],"published":{"date-parts":[[2025,6,22]]}}}