{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,5]],"date-time":"2026-02-05T08:40:07Z","timestamp":1770280807271,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":16,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3769002.3769979","type":"proceedings-article","created":{"date-parts":[[2026,2,4]],"date-time":"2026-02-04T19:16:19Z","timestamp":1770232579000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Leveraging PyTorch for Hardware-Aware Optimization in Efficient Mixture-of-Experts Large Language Model Inference"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-6013-4122","authenticated-orcid":false,"given":"Mu-Chi","family":"Chen","sequence":"first","affiliation":[{"name":"Institute of Information Science, Academia Sinica, Taipei, Taiwan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7458-9634","authenticated-orcid":false,"given":"Po-Hsuan","family":"Huang","sequence":"additional","affiliation":[{"name":"National Taiwan University, Taipei, Taiwan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-0090-4920","authenticated-orcid":false,"given":"Yanwen","family":"Gai","sequence":"additional","affiliation":[{"name":"National Taiwan University, Taipei, Taiwan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-7363-2947","authenticated-orcid":false,"given":"Shao-Chun","family":"Ho","sequence":"additional","affiliation":[{"name":"National Taiwan University, Taipei, Taiwan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-1532-3332","authenticated-orcid":false,"given":"Cheng","family":"Liang","sequence":"additional","affiliation":[{"name":"National Taiwan University, Taipei, Taiwan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6991-8795","authenticated-orcid":false,"given":"Yu-Hung","family":"Kao","sequence":"additional","affiliation":[{"name":"National Taiwan University, Taipei, Taiwan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0310-4883","authenticated-orcid":false,"given":"Yu-Kai","family":"Hung","sequence":"additional","affiliation":[{"name":"National Taiwan University, Taipei, Taiwan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8967-1385","authenticated-orcid":false,"given":"Chia-Heng","family":"Tu","sequence":"additional","affiliation":[{"name":"National Cheng Kung University, Tainan, Taiwan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2043-2663","authenticated-orcid":false,"given":"Shih-Hao","family":"Hung","sequence":"additional","affiliation":[{"name":"National Taiwan University, Taipei, Taiwan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,2,4]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Technical Report: A Highly Capable Language Model Locally on Your Phone. arXiv:2404.14219 [cs.CL]","author":"Abdin Marah","year":"2024","unstructured":"Marah Abdin, Jyoti Aneja, Hany Awadalla, et al. 2024. Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone. arXiv:2404.14219 [cs.CL]"},{"key":"e_1_3_2_1_2_1","first-page":"351","article-title":"Vidur: A large-scale simulation framework for llm inference","volume":"6","author":"Agrawal Amey","year":"2024","unstructured":"Amey Agrawal, Nitin Kedia, Jayashree Mohan, et al. 2024. Vidur: A large-scale simulation framework for llm inference. In Proceedings of Machine Learning and Systems (MLSys), Vol. 6. 351\u2013366.","journal-title":"Proceedings of Machine Learning and Systems (MLSys)"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.298"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Weilin Cai Juyong Jiang Fan Wang et al. 2025. A Survey on Mixture of Experts in Large Language Models. IEEE Transactions on Knowledge and Data Engineering (2025) 1\u201320.","DOI":"10.1109\/TKDE.2025.3554028"},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of the 2024 International Conference on Research in Adaptive and Convergent Systems (RACS).","author":"Chen Mu-Chi","year":"2024","unstructured":"Mu-Chi Chen, Po-Hsuan Huang, Xiangrui Ke, et al. 2024. Towards Building Private LLMs: Exploring Multi-Node Expert Parallelism on Apple Silicon for Mixture-of-Experts Large Language Model. In Proceedings of the 2024 International Conference on Research in Adaptive and Convergent Systems (RACS)."},{"key":"e_1_3_2_1_6_1","unstructured":"DeepSeek-AI Daya Guo Dejian Yang et al. 2025. DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning. arXiv:2501.12948 [cs.CL]"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607102"},{"key":"e_1_3_2_1_8_1","unstructured":"Albert Q. Jiang Alexandre Sablayrolles Antoine Roux et al. 2024. Mixtral of Experts. arXiv:2401.04088 [cs.LG]"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"e_1_3_2_1_10_1","unstructured":"Yi-Chien Lin Woosuk Kwon Ronald Pineda and Fanny Nina Paravecino. 2024. Toward High-Performance LLM Serving: A Simulation-Based Approach for Identifying Optimal Parallelism. arXiv:2411.17651 [cs.DC]"},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems (NeurIPS). 8024\u20138035","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, et al. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Proceedings of the 33rd International Conference on Neural Information Processing Systems (NeurIPS). 8024\u20138035."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3406703"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3315508.3329973"},{"key":"e_1_3_2_1_14_1","unstructured":"An Yang Anfeng Li Baosong Yang et al. 2025. Qwen3 Technical Report. arXiv:2505.09388 [cs.CL]"},{"key":"e_1_3_2_1_15_1","unstructured":"Zihao Ye Lequn Chen Ruihang Lai et al. 2025. FlashInfer: Efficient and Customizable Attention Engine for LLM Inference Serving. arXiv:2501.01005 [cs.DC]"},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of the 38th International Conference on Neural Information Processing Systems (NeurIPS).","author":"Zheng Lianmin","year":"2024","unstructured":"Lianmin Zheng, Liangsheng Yin, Zhiqiang Xie, et al. 2024. SGLang: Efficient Execution of Structured Language Model Programs. In Proceedings of the 38th International Conference on Neural Information Processing Systems (NeurIPS)."}],"event":{"name":"RACS '25: International Conference on Research in Adaptive and Convergent Systems","location":"Ho Chi Minh Vietnam","acronym":"RACS '25","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"]},"container-title":["Proceedings of the International Conference on Research in Adaptive and Convergent Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3769002.3769979","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,4]],"date-time":"2026-02-04T19:17:05Z","timestamp":1770232625000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3769002.3769979"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,16]]},"references-count":16,"alternative-id":["10.1145\/3769002.3769979","10.1145\/3769002"],"URL":"https:\/\/doi.org\/10.1145\/3769002.3769979","relation":{},"subject":[],"published":{"date-parts":[[2025,11,16]]},"assertion":[{"value":"2026-02-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}