{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T14:02:24Z","timestamp":1769090544229,"version":"3.49.0"},"reference-count":15,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/infocomwkshps65812.2025.11152752","type":"proceedings-article","created":{"date-parts":[[2025,9,12]],"date-time":"2025-09-12T17:28:22Z","timestamp":1757698102000},"page":"1-6","source":"Crossref","is-referenced-by-count":1,"title":["EdgePrompt: A Distributed Key-Value Inference Framework for LLMs in 6G Networks"],"prefix":"10.1109","author":[{"given":"Jiahong","family":"Ning","sequence":"first","affiliation":[{"name":"Dalian Marine University,Dalian,China"}]},{"given":"Pengyan","family":"Zhu","sequence":"additional","affiliation":[{"name":"Dalian Marine University,Dalian,China"}]},{"given":"Ce","family":"Zheng","sequence":"additional","affiliation":[{"name":"Pengcheng Lab,Shenzhen,China"}]},{"given":"Gary","family":"Lee","sequence":"additional","affiliation":[{"name":"A*STAR,Institute for Infocomm Research (I2R),Singapore"}]},{"given":"Sumei","family":"Sun","sequence":"additional","affiliation":[{"name":"A*STAR,Institute for Infocomm Research (I2R),Singapore"}]},{"given":"Tingting","family":"Yang","sequence":"additional","affiliation":[{"name":"Dalian Marine University,Dalian,China"}]}],"member":"263","reference":[{"key":"ref1","author":"Li","year":"2024","journal-title":"Personal LLM agents: Insights and survey about the capability, efficiency and security"},{"issue":"240","key":"ref2","first-page":"1","article-title":"Palm: Scaling language modeling with pathways","volume":"24","author":"Chowdhery","year":"2023","journal-title":"Journal of Machine Learning Research"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.2024.3435752"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/OJCOMS.2021.3116437"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2023.3244674"},{"key":"ref6","author":"Yuan","year":"2024","journal-title":"LLM inference unveiled: Survey and roofline model insights"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.26"},{"key":"ref8","author":"Park","year":"2022","journal-title":"LUT-GEMM: Quantized matrix multiplication based on LUTs for efficient inference in large-scale generative language models"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"ref10","author":"Chen","year":"2023","journal-title":"Accelerating large language model decoding with speculative sampling"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCWorkshops62562.2024.10693742"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3662006.3662067"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2024.3513457"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"ref15","first-page":"325","article-title":"Prompt cache: Modular attention reuse for low-latency inference","volume-title":"Proceedings of Machine Learning and Systems","volume":"6","author":"Gim"}],"event":{"name":"IEEE INFOCOM 2025 - IEEE Conference on Computer Communications Workshops (INFOCOM WKSHPS)","location":"London, United Kingdom","start":{"date-parts":[[2025,5,19]]},"end":{"date-parts":[[2025,5,19]]}},"container-title":["IEEE INFOCOM 2025 - IEEE Conference on Computer Communications Workshops (INFOCOM WKSHPS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11152714\/11152715\/11152752.pdf?arnumber=11152752","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T21:04:43Z","timestamp":1769029483000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11152752\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":15,"URL":"https:\/\/doi.org\/10.1109\/infocomwkshps65812.2025.11152752","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}