{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T06:12:09Z","timestamp":1758089529501,"version":"3.44.0"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T00:00:00Z","timestamp":1750550400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T00:00:00Z","timestamp":1750550400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100006190","name":"Research and Development","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006190","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,22]]},"DOI":"10.1109\/dac63849.2025.11132914","type":"proceedings-article","created":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T17:35:41Z","timestamp":1757957741000},"page":"1-7","source":"Crossref","is-referenced-by-count":0,"title":["McPAL: Scaling Unstructured Sparse Inference with Multi-Chiplet HBM-PIM Architecture for LLMs"],"prefix":"10.1109","author":[{"given":"Shiwei","family":"Liu","sequence":"first","affiliation":[{"name":"Fudan University,State Key Laboratory of Integrated Chips and Systems,Shanghai,China"}]},{"given":"Zhirui","family":"Huang","sequence":"additional","affiliation":[{"name":"Fudan University,State Key Laboratory of Integrated Chips and Systems,Shanghai,China"}]},{"given":"Jiangnan","family":"Yu","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology,Hong Kong,SAR,China"}]},{"given":"Qi","family":"Liu","sequence":"additional","affiliation":[{"name":"Fudan University,State Key Laboratory of Integrated Chips and Systems,Shanghai,China"}]},{"given":"Chixiao","family":"Chen","sequence":"additional","affiliation":[{"name":"Fudan University,State Key Laboratory of Integrated Chips and Systems,Shanghai,China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Language models are unsupervised multitask learners","author":"Radford","year":"2019","journal-title":"OpenAI blog"},{"key":"ref2","first-page":"08774","article-title":"Gpt-4 technical report","volume-title":"arXiv preprint arXiv:2303","author":"Achiam","year":"2023"},{"key":"ref3","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023","journal-title":"arXiv preprint arXiv:2307.09288"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1109\/MM.2024.3373763"},{"key":"ref5","article-title":"A simple and effective pruning approach for large language models","author":"Sun","year":"2023","journal-title":"arXiv preprint arXiv:2306.11695"},{"volume-title":"International Conference on Machine Learning","author":"Frantar","article-title":"Sparsegpt: Massive language models can be accurately pruned in one-shot","key":"ref6"},{"volume-title":"International Conference on Machine Learning (ICML)","author":"Xiao","article-title":"Smoothquant: Accurate and efficient post-training quantization for large language models","key":"ref7"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1145\/3649329.3656497"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1109\/isscc42615.2023.10067360"},{"key":"ref10","doi-asserted-by":"crossref","DOI":"10.1145\/3400302.3415640","article-title":"ReTransformer: ReRAM-based processing-in-memory architecture for transformer acceleration","volume-title":"International Conference on Computer-Aided Design (ICCAD)","author":"Yang"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1109\/DAC56929.2023.10247913"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1145\/3649329.3658244"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.1109\/JSSC.2022.3213542"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1109\/JSSC.2023.3305663"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.23919\/DATE51398.2021.9474146"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1109\/MICRO56248.2022.00097"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1109\/HPCA47549.2020.00015"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1145\/3575693.3575706"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1109\/HPCA53966.2022.00082"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1109\/MICRO56248.2022.00051"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1109\/JSSC.2023.3324954"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1109\/DAC18072.2020.9218523"},{"doi-asserted-by":"publisher","key":"ref23","DOI":"10.1109\/JSSC.2020.3039206"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.1109\/HPCA57654.2024.00022"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1145\/3352460.3358302"},{"doi-asserted-by":"publisher","key":"ref26","DOI":"10.1109\/TCPMT.2022.3207195"},{"key":"ref27","doi-asserted-by":"crossref","DOI":"10.1145\/3123939.3124545","article-title":"Fine-grained DRAM: Energy-efficient DRAM for extreme bandwidth systems","volume-title":"IEEE\/ACM International Symposium on Microarchitecture (MICRO)","author":"O\u2019Connor"},{"key":"ref28","article-title":"S3: Increasing GPU utilization during generative inference for higher throughput","author":"Jin","year":"2023","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"doi-asserted-by":"publisher","key":"ref29","DOI":"10.1109\/HPCA57654.2024.00078"},{"doi-asserted-by":"publisher","key":"ref30","DOI":"10.1145\/3620665.3640422"}],"event":{"name":"2025 62nd ACM\/IEEE Design Automation Conference (DAC)","start":{"date-parts":[[2025,6,22]]},"location":"San Francisco, CA, USA","end":{"date-parts":[[2025,6,25]]}},"container-title":["2025 62nd ACM\/IEEE Design Automation Conference (DAC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11132383\/11132091\/11132914.pdf?arnumber=11132914","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T05:50:08Z","timestamp":1758001808000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11132914\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,22]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/dac63849.2025.11132914","relation":{},"subject":[],"published":{"date-parts":[[2025,6,22]]}}}