{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,8]],"date-time":"2026-03-08T23:11:10Z","timestamp":1773011470014,"version":"3.50.1"},"reference-count":21,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,3,24]],"date-time":"2025-03-24T00:00:00Z","timestamp":1742774400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,3,24]],"date-time":"2025-03-24T00:00:00Z","timestamp":1742774400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"U.S. National Science Foundation","doi-asserted-by":"publisher","award":["CNS-2114267"],"award-info":[{"award-number":["CNS-2114267"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,3,24]]},"DOI":"10.1109\/wcnc61545.2025.10978177","type":"proceedings-article","created":{"date-parts":[[2025,5,9]],"date-time":"2025-05-09T17:53:33Z","timestamp":1746813213000},"page":"1-6","source":"Crossref","is-referenced-by-count":4,"title":["Edge vs Cloud: How Do We Balance Cost, Latency, and Quality for Large Language Models Over 5G Networks?"],"prefix":"10.1109","author":[{"given":"Minsu","family":"Kim","sequence":"first","affiliation":[{"name":"Virginia Tech,Bradley Department of Electrical and Computer Engineering,Arlington,VA,USA"}]},{"given":"Pinyarash","family":"Pinyoanuntapong","sequence":"additional","affiliation":[{"name":"MediaTek,Warren,NJ,USA"}]},{"given":"Bongho","family":"Kim","sequence":"additional","affiliation":[{"name":"MediaTek,Warren,NJ,USA"}]},{"given":"Walid","family":"Saad","sequence":"additional","affiliation":[{"name":"Virginia Tech,Bradley Department of Electrical and Computer Engineering,Arlington,VA,USA"}]},{"given":"Doru","family":"Calin","sequence":"additional","affiliation":[{"name":"MediaTek,Warren,NJ,USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/atc63255.2024.10908345"},{"key":"ref2","volume-title":"Openai inference api","year":"2024"},{"key":"ref3","volume-title":"Apple intelligence","year":"2024"},{"key":"ref4","volume-title":"Qwen2 technical report","author":"Yang","year":"2024"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.33140\/jeee.02.04.19"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.792"},{"key":"ref7","volume-title":"Frugalgpt: How to use large language models while reducing cost and improving performance","author":"Chen","year":"2023"},{"key":"ref8","volume-title":"Automix: Automatically mixing language models","author":"Madaan","year":"2023"},{"key":"ref9","article-title":"Hybrid llm: Cost-efficient and quality-aware query routing","volume-title":"Proc. of International Conference on Learning Representations (ICLR)","author":"Ding","year":"2024"},{"key":"ref10","author":"Ong","year":"2024","journal-title":"Routellm: Learning to route llms with preference data"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-58923-2_14"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2018.2841349"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2023.3290936"},{"key":"ref14","first-page":"46595","article-title":"Judging llm-as-a-judge with mt-bench and chatbot arena","volume":"36","author":"Zheng","year":"2023","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref15","first-page":"27263","article-title":"Bartscore: Evaluating generated text as text generation","volume":"34","author":"Yuan","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3108423"},{"key":"ref17","volume-title":"Who cares about latency in 5G","author":"Ludwig","year":"2022"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3387514.3405882"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3094760"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/MVT.2024.3359357"},{"key":"ref21","author":"Sanh","year":"2019","journal-title":"Distilbert, a distilled version of bert: Smaller, faster, cheaper and lighter"}],"event":{"name":"2025 IEEE Wireless Communications and Networking Conference (WCNC)","location":"Milan, Italy","start":{"date-parts":[[2025,3,24]]},"end":{"date-parts":[[2025,3,27]]}},"container-title":["2025 IEEE Wireless Communications and Networking Conference (WCNC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10978109\/10978116\/10978177.pdf?arnumber=10978177","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,10]],"date-time":"2025-05-10T06:23:18Z","timestamp":1746858198000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10978177\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,24]]},"references-count":21,"URL":"https:\/\/doi.org\/10.1109\/wcnc61545.2025.10978177","relation":{},"subject":[],"published":{"date-parts":[[2025,3,24]]}}}