{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T15:27:14Z","timestamp":1781018834882,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":17,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T00:00:00Z","timestamp":1774224000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"National Science and Technology Council","award":["114-2640-E-011-004"],"award-info":[{"award-number":["114-2640-E-011-004"]}]},{"name":"National Science and Technology Council","award":["113-2628-E-011-007-MY3"],"award-info":[{"award-number":["113-2628-E-011-007-MY3"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,3,23]]},"DOI":"10.1145\/3748522.3779794","type":"proceedings-article","created":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T14:17:49Z","timestamp":1781014669000},"page":"1268-1274","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Adaptive Multi-User Scheduling for Large Language Model Inference on HBM-Based Processing-in-Memory Accelerators"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-7208-9396","authenticated-orcid":false,"given":"Chen-Lin","family":"Shi","sequence":"first","affiliation":[{"name":"National Taiwan University of Science and Technology, Taipei, Taiwan, Taiwan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-2504-723X","authenticated-orcid":false,"given":"Xi-Zhu","family":"Wang","sequence":"additional","affiliation":[{"name":"National Taiwan University of Science and Technology, Taipei, Taiwan, Taiwan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8698-1318","authenticated-orcid":false,"given":"Ya-Shu","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering, National Taiwan University of Science and Technology, Taipei, Taiwan"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,9]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2023. Ramulator V2.0a. https:\/\/github.com\/CMU-SAFARI\/ramulator2."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3673038.3673053"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3676641.3716009"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3620666.3651380"},{"key":"e_1_3_2_1_5_1","unstructured":"Hyucksung Kwon Kyungmo Koo Janghyeon Kim et al. 2025. LoL-PIM: Long-Context LLM Decoding with Scalable DRAM-PIM System. arXiv:2412.20166 [cs.AR] https:\/\/arxiv.org\/abs\/2412.20166"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42613.2021.9365862"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00013"},{"key":"e_1_3_2_1_8_1","volume-title":"Proceedings of the 52nd Annual International Symposium on Computer Architecture (ISCA). Association for Computing Machinery.","author":"Li Cong","year":"2025","unstructured":"Cong Li, Yihan Yin, Xintong Wu, et al. 2025. H2-LLM: Hardware-Dataflow Co-Exploration for Heterogeneous Hybrid-Bonding-based Low-Batch LLM Inference. In Proceedings of the 52nd Annual International Symposium on Computer Architecture (ISCA). Association for Computing Machinery."},{"key":"e_1_3_2_1_9_1","unstructured":"Jinhao Li Jiaming Xu Shan Huang et al. 2024. Large language model inference acceleration: A comprehensive hardware perspective. arXiv preprint arXiv:2410.04466 (2024)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2023.3333759"},{"key":"e_1_3_2_1_11_1","volume-title":"ISAAC: A Convolutional Neural Network Accelerator with In-Situ Analog Arithmetic in Crossbars. In ACM\/IEEE 43rd Annual International Symposium on Computer Architecture.","author":"Shafiee Ali","year":"2016","unstructured":"Ali Shafiee, Anirban Nag, Naveen Muralimanohar, et al. 2016. ISAAC: A Convolutional Neural Network Accelerator with In-Situ Analog Arithmetic in Crossbars. In ACM\/IEEE 43rd Annual International Symposium on Computer Architecture."},{"key":"e_1_3_2_1_12_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_13_1","unstructured":"Christopher Wolters Xiaoxuan Yang Ulf Schlichtmann et al. 2024. Memory is all you need: An overview of compute-in-memory architectures for accelerating large language model inference. arXiv preprint arXiv:2406.08413 (2024)."},{"key":"e_1_3_2_1_14_1","unstructured":"Minrui Xu Dusit Niyato Hongliang Zhang et al. 2024. Toward Edge General Intelligence with Multiple-Large Language Model (Multi-LLM): Architecture Trust and Orchestration. IEEE Network (2024)."},{"key":"e_1_3_2_1_15_1","volume-title":"Orca: A Distributed Serving System for Transformer-Based Generative Models. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Yu Gyeong-In","year":"2022","unstructured":"Gyeong-In Yu, Joo Seong Jeong, Geon-Woo Kim, et al. 2022. Orca: A Distributed Serving System for Transformer-Based Generative Models. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22). USENIX Association, Carlsbad, CA, 521\u2013538."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO61859.2024.00105"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00082"}],"event":{"name":"SAC '26: 41st ACM\/SIGAPP Symposium on Applied Computing","location":"Grand Hotel Palace Thessaloniki Greece","acronym":"SAC '26","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"]},"container-title":["Proceedings of the 41st ACM\/SIGAPP Symposium on Applied Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3748522.3779794","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T14:35:33Z","timestamp":1781015733000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3748522.3779794"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,23]]},"references-count":17,"alternative-id":["10.1145\/3748522.3779794","10.1145\/3748522"],"URL":"https:\/\/doi.org\/10.1145\/3748522.3779794","relation":{},"subject":[],"published":{"date-parts":[[2026,3,23]]},"assertion":[{"value":"2026-06-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}