{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T06:12:26Z","timestamp":1758089546720,"version":"3.44.0"},"reference-count":42,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T00:00:00Z","timestamp":1750550400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T00:00:00Z","timestamp":1750550400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,22]]},"DOI":"10.1109\/dac63849.2025.11132644","type":"proceedings-article","created":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T17:35:41Z","timestamp":1757957741000},"page":"1-7","source":"Crossref","is-referenced-by-count":0,"title":["EdgeMM: Multi-Core CPU with Heterogeneous AI-Extension and Activation-aware Weight Pruning for Multimodal LLMs at Edge"],"prefix":"10.1109","author":[{"given":"Kangbo","family":"Bai","sequence":"first","affiliation":[{"name":"School of Integrated Circuits Peking University,Beijing,China"}]},{"given":"Le","family":"Ye","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits Peking University,Beijing,China"}]},{"given":"Ru","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits Peking University,Beijing,China"}]},{"given":"Tianyu","family":"Jia","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits Peking University,Beijing,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/WACVW60836.2024.00106"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/tmech.2025.3574943"},{"key":"ref3","article-title":"Mobilevlm: A fast, reproducible and strong vision language assistant for mobile devices","author":"Chu","year":"2023","journal-title":"arXiv preprint arXiv:2312.16886"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73004-7_19"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01432"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/VLSITechnologyandCir46769.2022.9830277"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42615.2023.10067817"},{"key":"ref8","first-page":"162","article-title":"Hetegen: Efficient heterogeneous parallel inference for large language models on resource-constrained devices","volume":"6","author":"XUANLEI","year":"2024","journal-title":"Machine Learning and Systems"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3729215"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/DAC56929.2023.10247830"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42614.2022.9731107"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA59077.2024.00066"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01365"},{"key":"ref17","article-title":"Eva-clip: Improved training techniques for clip at scale","author":"Sun","year":"2023","journal-title":"arXiv preprint arXiv:2303.15389"},{"key":"ref18","article-title":"Visual instruction tuning","volume":"36","author":"Liu","year":"2024","journal-title":"Advances in neural information processing systems"},{"key":"ref19","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"International Conference on Machine Learning (ICML)","author":"Radford"},{"issue":"5","key":"ref20","article-title":"Vicuna: An open-source chatbot impressing gpt-4 with 90 * chatgpt quality, march 2023","volume":"3","author":"Chiang","year":"2023"},{"key":"ref21","article-title":"Mobilevlm v2: Faster and stronger baseline for vision language model","author":"Chu","year":"2024","journal-title":"arXiv preprint arXiv:2402.03766"},{"key":"ref22","article-title":"Tinygpt-v: Efficient multimodal large language model via small backbones","author":"Yuan","year":"2023","journal-title":"arXiv preprint arXiv:2312.16862"},{"key":"ref23","first-page":"19730","article-title":"Blip-2: Bootstrapping languageimage pre-training with frozen image encoders and large language models","volume-title":"International Conference on Machine Learning (ICML)","author":"Li"},{"article-title":"Phi-2: The surprising power of small language models","year":"2023","author":"Javaheripi","key":"ref24"},{"key":"ref25","article-title":"Sphinx-x: Scaling data and parameters for a family of multi-modal large language models","author":"Gao","year":"2024","journal-title":"arXiv preprint arXiv:2402.05935"},{"key":"ref26","article-title":"Dinov2: Learning robust visual features without supervision","author":"Oquab","year":"2023","journal-title":"arXiv preprint arXiv:2304.07193"},{"key":"ref27","article-title":"Tinyllama: An open-source small language model","author":"Zhang","year":"2024","journal-title":"arXiv preprint arXiv:2401.02385"},{"key":"ref28","article-title":"Deepseek-vl: towards real-world vision-language understanding","author":"Lu","year":"2024","journal-title":"arXiv preprint arXiv:2403.05525"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01100"},{"volume-title":"A family of high efficiency and powerful visual language model.","year":"2024","key":"ref30"},{"key":"ref31","article-title":"Qwen technical report","volume-title":"arXiv preprint arXiv:2309.16609","author":"Bai","year":"2023"},{"key":"ref32","article-title":"Gemini: a family of highly capable multimodal models","author":"Team","year":"2023","journal-title":"arXiv preprint arXiv:2312.11805"},{"key":"ref33","article-title":"Gpt-4 technical report","volume-title":"arXiv preprint arXiv:2303.08774","author":"Achiam","year":"2023"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-018-1116-0"},{"key":"ref36","article-title":"Learn to explain: Multimodal reasoning via thought chains for science question answering","volume":"abs\/2209.09513","author":"Lu","year":"2022","journal-title":"ArXiv"},{"key":"ref37","article-title":"Seed-bench: Benchmarking multimodal 1 lms with generative comprehension","volume":"abs\/2307.16125","author":"Li","year":"2023","journal-title":"ArXiv"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72658-3_13"},{"key":"ref39","article-title":"Full stack optimization of transformer inference: a survey","author":"Kim","year":"2023","journal-title":"arXiv preprint arXiv:2302.14017"},{"key":"ref40","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023","journal-title":"arXiv preprint arXiv:2307.09288"},{"key":"ref41","article-title":"Mistral 7b","author":"Jiang","year":"2023","journal-title":"arXiv preprint arXiv:2310.06825"},{"key":"ref42","article-title":"H2o: Heavyhitter oracle for efficient generative inference of large language models","volume":"abs\/2306.14048","author":"Zhang","year":"2023","journal-title":"ArXiv"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/tc.2020.3027900"},{"key":"ref44","article-title":"A simple and effective pruning approach for large language models","author":"Sun","year":"2023","journal-title":"arXiv preprint arXiv:2306.11695"},{"key":"ref45","article-title":"Cats: Contextually-aware thresholding for sparsity in large language models","author":"Lee","year":"2024","journal-title":"arXiv preprint arXiv:2404.08763"}],"event":{"name":"2025 62nd ACM\/IEEE Design Automation Conference (DAC)","start":{"date-parts":[[2025,6,22]]},"location":"San Francisco, CA, USA","end":{"date-parts":[[2025,6,25]]}},"container-title":["2025 62nd ACM\/IEEE Design Automation Conference (DAC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11132383\/11132091\/11132644.pdf?arnumber=11132644","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T05:35:32Z","timestamp":1758000932000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11132644\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,22]]},"references-count":42,"URL":"https:\/\/doi.org\/10.1109\/dac63849.2025.11132644","relation":{},"subject":[],"published":{"date-parts":[[2025,6,22]]}}}