{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,25]],"date-time":"2026-06-25T17:35:28Z","timestamp":1782408928216,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1145\/3773274.3774267","type":"proceedings-article","created":{"date-parts":[[2025,12,31]],"date-time":"2025-12-31T11:40:28Z","timestamp":1767181228000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Splitwise: Collaborative Edge\u2013Cloud Inference for LLMs via Lyapunov-Assisted DRL"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-0052-6475","authenticated-orcid":false,"given":"Abolfazl","family":"Younesi","sequence":"first","affiliation":[{"name":"Departement Computer Science, University of Innsbruck, Innsbruck, Tirol, Austria"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0735-2455","authenticated-orcid":false,"given":"Abbas","family":"Shabrang Maryan","sequence":"additional","affiliation":[{"name":"Sharif University of Technology, Tehran, Iran"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-1456-356X","authenticated-orcid":false,"given":"Elyas","family":"Oustad","sequence":"additional","affiliation":[{"name":"Sharif University of Technology, Tehran, Iran"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5182-9087","authenticated-orcid":false,"given":"Zahra","family":"Najafabadi Samani","sequence":"additional","affiliation":[{"name":"University of Innsbruck, Innsbruck, Austria"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4670-8608","authenticated-orcid":false,"given":"Mohsen","family":"Ansari","sequence":"additional","affiliation":[{"name":"Sharif University of Technology, Tehran, Iran"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4293-1228","authenticated-orcid":false,"given":"Thomas","family":"Fahringer","sequence":"additional","affiliation":[{"name":"University of Innsbruck, Austria, Innsbruck, Austria"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,12,31]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"Sohee Bae Seungyul Han and Youngchul Sung. 2020. A reinforcement learning formulation of the Lyapunov optimization: Application to edge computing systems with queue stability. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2012.07279 (2020)."},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"crossref","unstructured":"Gordon\u00a0Owusu Boateng et\u00a0al. 2025. A survey on large language models for communication network and service management: Application insights challenges and future directions. IEEE Commun. Surv. Tutor. (2025).","DOI":"10.1109\/COMST.2025.3564333"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00046"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-57659-2_11"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"crossref","unstructured":"Yuxuan Chen Rongpeng Li Xiaoxue Yu Zhifeng Zhao and Honggang Zhang. 2025. Adaptive layer splitting for wireless large language model inference in edge computing: a model-based reinforcement learning approach. Front. Inf. Technol. Electron. Eng. 26 2 (2025) 278\u2013292.","DOI":"10.1631\/FITEE.2400468"},{"key":"e_1_3_3_2_7_2","unstructured":"David Ha and J\u00fcrgen Schmidhuber. 2018. World models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1803.10122 2 3 (2018)."},{"key":"e_1_3_3_2_8_2","unstructured":"Ying He Jingcheng Fang F\u00a0Richard Yu and Victor\u00a0C Leung. 2024. Large language models (LLMs) inference offloading and resource allocation in cloud-edge computing: An active inference approach. IEEE Transactions on Mobile Computing (2024)."},{"key":"e_1_3_3_2_9_2","unstructured":"Andrew\u00a0G. Howard et\u00a0al. 2017. MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications. arXiv:https:\/\/arXiv.org\/abs\/1704.04861"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2019.8737614"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/DSD57027.2022.00048"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3538932"},{"key":"e_1_3_3_2_13_2","unstructured":"Hongpeng Jin and Yanzhao Wu. 2024. Ce-collm: Efficient and adaptive large language models through cloud-edge collaboration. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.02829 (2024)."},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"crossref","unstructured":"Dimitrios Kafetzis Ramin Khalili and Iordanis Koutsopoulos. 2025. Large Language Model partitioning for low-latency inference at the edge. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.02533 (2025).","DOI":"10.23919\/WiOpt66569.2025.11123401"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037698"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/381591.381602"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/3372224.3419194"},{"key":"e_1_3_3_2_18_2","unstructured":"En Li Liekang Zeng Zhi Zhou and Xu Chen. 2019. Edge AI: On-Demand Accelerating Deep Neural Network Inference via Edge Computing. arxiv:https:\/\/arXiv.org\/abs\/1910.05316\u00a0[cs.NI] https:\/\/arxiv.org\/abs\/1910.05316"},{"key":"e_1_3_3_2_19_2","unstructured":"Senyao Li et\u00a0al. 2025. Collaborative Inference and Learning between Edge SLMs and Cloud LLMs: A Survey of Algorithms Execution and Open Challenges. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2507.16731 (2025)."},{"key":"e_1_3_3_2_20_2","unstructured":"May Malka Erez Farhan Hai Morgenstern and Nir Shlezinger. 2022. Decentralized Low-Latency Collaborative Inference via Ensembles on the Edge. arxiv:https:\/\/arXiv.org\/abs\/2206.03165\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2206.03165"},{"key":"e_1_3_3_2_21_2","unstructured":"Avanika Narayan Dan Biderman Sabri Eyuboglu Avner May Scott Linderman James Zou and Christopher Re. 2025. Minions: Cost-efficient collaboration between on-device and cloud language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.15964 (2025)."},{"key":"e_1_3_3_2_22_2","unstructured":"Hyeonho Noh Byonghyo Shim and Hyun\u00a0Jong Yang. 2025. Adaptive resource allocation optimization using large language models in dynamic wireless environments. IEEE Trans. Veh. Technol. (2025)."},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"crossref","unstructured":"Elyas Oustad and Others. 2025. DIST: Distributed Learning-Based Energy-Efficient and Reliable Task Scheduling and Resource Allocation in Fog Computing. IEEE Trans. Serv. Comput. 18 3 (2025) 1336\u20131351.","DOI":"10.1109\/TSC.2025.3568255"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA61900.2025.00113"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLOUD.2018.00017"},{"key":"e_1_3_3_2_26_2","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever et\u00a0al. 2019. Language models are unsupervised multitask learners. OpenAI blog 1 8 (2019) 9."},{"key":"e_1_3_3_2_27_2","unstructured":"John Schulman Filip Wolski Prafulla Dhariwal Alec Radford and Oleg Klimov. 2017. Proximal policy optimization algorithms. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1707.06347 (2017)."},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-29400-7_21"},{"key":"e_1_3_3_2_29_2","unstructured":"Chunlin Tian Xinpeng Qin Kahou Tam Li Li Zijian Wang Yuanzhe Zhao Minglei Zhang and Chengzhong Xu. 2025. CLONE: Customizing LLMs for Efficient Latency-Aware Inference at the Edge. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2506.02847 (2025)."},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM55648.2025.11044734"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/IWCMC61514.2024.10592339"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM55648.2025.11044447"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"crossref","unstructured":"Mingjin Zhang Xiaoming Shen Jiannong Cao Zeyang Cui and Shan Jiang. 2024. Edgeshard: Efficient llm inference via collaborative edge computing. IEEE Internet of Things Journal (2024).","DOI":"10.1109\/JIOT.2024.3524255"},{"key":"e_1_3_3_2_34_2","unstructured":"Lianmin Zheng et\u00a0al. 2023. Lmsys-chat-1m: A large-scale real-world llm conversation dataset. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.11998 (2023)."},{"key":"e_1_3_3_2_35_2","unstructured":"Pengyan Zhu and Tingting Yang. 2025. CE-LSLM: Efficient Large-Small Language Model Inference and Communication via Cloud-Edge Collaboration. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.14085 (2025)."}],"event":{"name":"UCC '25: 2025 IEEE\/ACM 18th International Conference on Utility and Cloud Computing","location":"France France","acronym":"UCC '25","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 18th IEEE\/ACM International Conference on Utility and Cloud Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3773274.3774267","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,31]],"date-time":"2025-12-31T11:41:22Z","timestamp":1767181282000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3773274.3774267"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12]]},"references-count":34,"alternative-id":["10.1145\/3773274.3774267","10.1145\/3773274"],"URL":"https:\/\/doi.org\/10.1145\/3773274.3774267","relation":{},"subject":[],"published":{"date-parts":[[2025,12]]},"assertion":[{"value":"2025-12-31","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}