{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T16:05:29Z","timestamp":1777651529680,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,4]],"date-time":"2024-08-04T00:00:00Z","timestamp":1722729600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2313190, 2146496, 1901466"],"award-info":[{"award-number":["2313190, 2146496, 1901466"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,4]]},"DOI":"10.1145\/3672198.3673797","type":"proceedings-article","created":{"date-parts":[[2024,7,16]],"date-time":"2024-07-16T12:24:10Z","timestamp":1721132650000},"page":"34-40","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Eloquent: A More Robust Transmission Scheme for LLM Token Streaming"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-9980-028X","authenticated-orcid":false,"given":"Hanchen","family":"Li","sequence":"first","affiliation":[{"name":"University of Chicago"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5957-5071","authenticated-orcid":false,"given":"Yuhan","family":"Liu","sequence":"additional","affiliation":[{"name":"University of Chicago"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3924-6886","authenticated-orcid":false,"given":"Yihua","family":"Cheng","sequence":"additional","affiliation":[{"name":"University of Chicago"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0265-2144","authenticated-orcid":false,"given":"Siddhant","family":"Ray","sequence":"additional","affiliation":[{"name":"University of Chicago"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3964-4079","authenticated-orcid":false,"given":"Kuntai","family":"Du","sequence":"additional","affiliation":[{"name":"University of Chicago"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6877-1683","authenticated-orcid":false,"given":"Junchen","family":"Jiang","sequence":"additional","affiliation":[{"name":"University of Chicago"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,8,4]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"https:\/\/arstechnica.com\/gadgets\/2013\/09\/multipath-tcp-lets-siri-seamlessly-switch-between-wi-fi-and-3glte\/","author":"Wi-Fi Multipath TCP","year":"2019","unstructured":"Multipath TCP lets Siri seamlessly switch between Wi-Fi and 3G\/LTE. https:\/\/arstechnica.com\/gadgets\/2013\/09\/multipath-tcp-lets-siri-seamlessly-switch-between-wi-fi-and-3glte\/, 2019."},{"key":"e_1_3_2_1_2_1","volume-title":"Movement, Reach, Densification and Multipath Fading. https:\/\/www.belden.com\/blogs\/smart-building\/5-big-wireles s-challenges-signal-loss-movement-reach-densification-and-multipath-fading\/","author":"Challenges Big Wireless","year":"2020","unstructured":"5 Big Wireless Challenges: Signal Loss, Movement, Reach, Densification and Multipath Fading. https:\/\/www.belden.com\/blogs\/smart-building\/5-big-wireles s-challenges-signal-loss-movement-reach-densification-and-multipath-fading\/, 2020."},{"key":"e_1_3_2_1_3_1","volume-title":"https:\/\/platform.openai.com\/docs\/api-reference\/chat\/create","author":"API","year":"2023","unstructured":"API Reference - OpenAI API. https:\/\/platform.openai.com\/docs\/api-reference\/chat\/create, 2023."},{"key":"e_1_3_2_1_4_1","volume-title":"https:\/\/www.snowflake.com\/blog\/generative-ai-llms-summit-2023\/","author":"Your Data Bring","year":"2023","unstructured":"Bring Gen AI & LLMs to Your Data. https:\/\/www.snowflake.com\/blog\/generative-ai-llms-summit-2023\/, 2023."},{"key":"e_1_3_2_1_5_1","volume-title":"https:\/\/reports.valuates.com\/market-reports\/QYRE-Auto-30B13652\/global-large-language- model- llm","author":"Language Global Large","year":"2023","unstructured":"Global Large Language Model(LLM) Market Research Report 2023. https:\/\/reports.valuates.com\/market-reports\/QYRE-Auto-30B13652\/global-large-language- model- llm, 2023."},{"key":"e_1_3_2_1_6_1","volume-title":"https:\/\/uptrain.medium.com\/llms-are-becoming-increasingly-popular-in-the-enterprise-world-as-businesses-recognize-their-2ac2a61771bd","author":"Ms","year":"2023","unstructured":"LLMs are becoming increasingly popular in the enterprise world. https:\/\/uptrain.medium.com\/llms-are-becoming-increasingly-popular-in-the-enterprise-world-as-businesses-recognize-their-2ac2a61771bd, 2023."},{"key":"e_1_3_2_1_7_1","volume-title":"https:\/\/www.anthropic.com\/","author":"Claude Anthropic","year":"2024","unstructured":"Anthropic Claude. https:\/\/www.anthropic.com\/, 2024."},{"key":"e_1_3_2_1_8_1","volume-title":"https:\/\/chat.openai.com\/","author":"GPT.","year":"2024","unstructured":"ChatGPT. https:\/\/chat.openai.com\/, 2024."},{"key":"e_1_3_2_1_9_1","volume-title":"https:\/\/bard.google.com\/chat","author":"Bard Google","year":"2024","unstructured":"Google Bard. https:\/\/bard.google.com\/chat, 2024."},{"key":"e_1_3_2_1_10_1","volume-title":"https:\/\/www.tcpdump.org\/","author":"TCPDUMP","year":"2024","unstructured":"TCPDUMP documentation. https:\/\/www.tcpdump.org\/, 2024."},{"key":"e_1_3_2_1_11_1","volume-title":"https:\/\/www.livechat.com\/typing-speed-test\/#\/","author":"Test Typing Speed","year":"2024","unstructured":"Typing Speed Test. https:\/\/www.livechat.com\/typing-speed-test\/#\/, 2024."},{"key":"e_1_3_2_1_12_1","volume-title":"https:\/\/openai-status.llm-utils.org\/","author":"Status Unofficial","year":"2024","unstructured":"Unofficial OpenAI Status. https:\/\/openai-status.llm-utils.org\/, 2024."},{"key":"e_1_3_2_1_13_1","volume-title":"Taming throughput-latency tradeoff in llm inference with sarathi-serve","author":"Agrawal Amey","year":"2024","unstructured":"Amey Agrawal, Nitin Kedia, Ashish Panwar, Jayashree Mohan, Nipun Kwatra, Bhargav S. Gulavani, Alexey Tumanov, and Ramachandran Ramjee. Taming throughput-latency tradeoff in llm inference with sarathi-serve, 2024."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2901778"},{"key":"e_1_3_2_1_15_1","volume-title":"Grace: Loss-resilient real-time video through neural codecs","author":"Cheng Yihua","year":"2023","unstructured":"Yihua Cheng, Ziyi Zhang, Hanchen Li, Anton Arapin, Yue Zhang, Qizheng Zhang, Yuhan Liu, Xu Zhang, Francis Y. Yan, Amrita Mazumdar, Nick Feamster, and Junchen Jiang. Grace: Loss-resilient real-time video through neural codecs, 2023."},{"key":"e_1_3_2_1_16_1","volume-title":"Flashattention: Fast and memory-efficient exact attention with io-awareness","author":"Dao Tri","year":"2022","unstructured":"Tri Dao, Daniel Y. Fu, Stefano Ermon, Atri Rudra, and Christopher R\u00e9. Flashattention: Fast and memory-efficient exact attention with io-awareness, 2022."},{"key":"e_1_3_2_1_17_1","first-page":"267","volume-title":"15th USENIX Symposium on Networked Systems Design and Implementation (NSDI 18)","author":"Fouladi Sadjad","year":"2018","unstructured":"Sadjad Fouladi, John Emmons, Emre Orbay, Catherine Wu, Riad S Wahby, and Keith Winstein. Salsify:{Low-Latency} network video through tighter integration between a video codec and a transport protocol. In 15th USENIX Symposium on Networked Systems Design and Implementation (NSDI 18), pages 267--282, 2018."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2307636.2307658"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.17487\/RFC9000"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3199524.3199534"},{"key":"e_1_3_2_1_21_1","first-page":"100","volume-title":"Proceedings of the 6th ACM International Workshop on Modeling Analysis and Simulation of Wireless and Mobile Systems, MSWIM '03","author":"Syed","year":"2003","unstructured":"Syed A. Khayam and Hayder Radha. Markov-based modeling of wireless local area networks. In Proceedings of the 6th ACM International Workshop on Modeling Analysis and Simulation of Wireless and Mobile Systems, MSWIM '03, page 100--107, New York, NY, USA, 2003. Association for Computing Machinery."},{"key":"e_1_3_2_1_22_1","volume-title":"Joseph E. Gonzalez, Hao Zhang, and Ion Stoica. Efficient memory management for large language model serving with pagedattention","author":"Kwon Woosuk","year":"2023","unstructured":"Woosuk Kwon, Zhuohan Li, Siyuan Zhuang, Ying Sheng, Lianmin Zheng, Cody Hao Yu, Joseph E. Gonzalez, Hao Zhang, and Ion Stoica. Efficient memory management for large language model serving with pagedattention, 2023."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCOMM.2016.2618798"},{"key":"e_1_3_2_1_24_1","volume-title":"Fast inference from transformers via speculative decoding","author":"Leviathan Yaniv","year":"2023","unstructured":"Yaniv Leviathan, Matan Kalman, and Yossi Matias. Fast inference from transformers via speculative decoding, 2023."},{"key":"e_1_3_2_1_25_1","volume-title":"Andes: Defining and enhancing quality-of-experience in llm-based text streaming services","author":"Liu Jiachen","year":"2024","unstructured":"Jiachen Liu, Zhiyu Wu, Jae-Won Chung, Fan Lai, Myungjin Lee, and Mosharaf Chowdhury. Andes: Defining and enhancing quality-of-experience in llm-based text streaming services, 2024."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1049\/ip-com:20050237"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1049\/el:19970362"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3487552.3487842"},{"key":"e_1_3_2_1_29_1","first-page":"1325","volume-title":"20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"Ni Yunzhe","year":"2023","unstructured":"Yunzhe Ni, Feng Qian, Taide Liu, Yihua Cheng, Zhiyao Ma, Jing Wang, Zhongfeng Wang, Gang Huang, Xuanzhe Liu, and Chenren Xu. POLYCORN: Data-driven cross-layer multipath networking for high-speed railway through composable schedulerlets. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23), pages 1325--1340, Boston, MA, April 2023. USENIX Association."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.17487\/RFC0793"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2008.124"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1177\/1529100615623267"},{"key":"e_1_3_2_1_33_1","first-page":"953","volume-title":"20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"Rudow Michael","year":"2023","unstructured":"Michael Rudow, Francis Y Yan, Abhishek Kumar, Ganesh Ananthanarayanan, Martin Ellis, and KV Rashmi. Tambur: Efficient loss recovery for videoconferencing via streaming codes. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23), pages 953--971, 2023."},{"key":"e_1_3_2_1_34_1","first-page":"1","volume-title":"2009 Digest of Technical Papers International Conference on Consumer Electronics","author":"Samuel","year":"2009","unstructured":"Samuel H. Russ and Sasan Haghani. Behavior of 802.11g traffic at high sustained bit rates in the home. In 2009 Digest of Technical Papers International Conference on Consumer Electronics, pages 1--2, 2009."},{"key":"e_1_3_2_1_35_1","first-page":"3550","article-title":"RTP: A transport protocol for real-time applications","volume":"3550","author":"Schulzrinne H.","year":"2003","unstructured":"H. Schulzrinne, S. Casner, R. Frederick, and V. Jacobson. RTP: A transport protocol for real-time applications. Internet RFC 3550, July 2003. RFC 3550.","journal-title":"Internet RFC"},{"key":"e_1_3_2_1_36_1","volume-title":"Flexgen: High-throughput generative inference of large language models with a single gpu","author":"Sheng Ying","year":"2023","unstructured":"Ying Sheng, Lianmin Zheng, Binhang Yuan, Zhuohan Li, Max Ryabinin, Daniel Y. Fu, Zhiqiang Xie, Beidi Chen, Clark Barrett, Joseph E. Gonzalez, Percy Liang, Christopher R\u00e9, Ion Stoica, and Ce Zhang. Flexgen: High-throughput generative inference of large language models with a single gpu, 2023."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.5555\/554634"},{"key":"e_1_3_2_1_38_1","first-page":"177","volume-title":"2012 USENIX Annual Technical Conference (USENIX ATC 12)","author":"Winstein Keith","year":"2012","unstructured":"Keith Winstein and Hari Balakrishnan. Mosh: An interactive remote shell for mobile clients. In 2012 USENIX Annual Technical Conference (USENIX ATC 12), pages 177--182, Boston, MA, June 2012. USENIX Association."},{"key":"e_1_3_2_1_39_1","volume-title":"Fast distributed inference serving for large language models","author":"Wu Bingyang","year":"2023","unstructured":"Bingyang Wu, Yinmin Zhong, Zili Zhang, Gang Huang, Xuanzhe Liu, and Xin Jin. Fast distributed inference serving for large language models, 2023."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2020.3005486"},{"key":"e_1_3_2_1_41_1","first-page":"521","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Yu Gyeong-In","year":"2022","unstructured":"Gyeong-In Yu, Joo Seong Jeong, Geon-Woo Kim, Soojeong Kim, and Byung-Gon Chun. Orca: A distributed serving system for Transformer-Based generative models. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22), pages 521--538, Carlsbad, CA, July 2022. USENIX Association."},{"key":"e_1_3_2_1_42_1","first-page":"1813","volume-title":"21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24)","author":"Yuan Gina","year":"2024","unstructured":"Gina Yuan, Matthew Sotoudeh, David K. Zhang, Michael Welzl, David Mazi\u00e8res, and Keith Winstein. Sidekick: In-Network assistance for secure End-to-End transport protocols. In 21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24), pages 1813--1830, Santa Clara, CA, April 2024. USENIX Association."}],"event":{"name":"ACM SIGCOMM '24: ACM SIGCOMM 2024 Conference","location":"Sydney NSW Australia","acronym":"ACM SIGCOMM '24","sponsor":["SIGCOMM ACM Special Interest Group on Data Communication"]},"container-title":["Proceedings of the 2024 SIGCOMM Workshop on Networks for AI Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3672198.3673797","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3672198.3673797","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T18:15:42Z","timestamp":1755972942000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3672198.3673797"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,4]]},"references-count":42,"alternative-id":["10.1145\/3672198.3673797","10.1145\/3672198"],"URL":"https:\/\/doi.org\/10.1145\/3672198.3673797","relation":{},"subject":[],"published":{"date-parts":[[2024,8,4]]},"assertion":[{"value":"2024-08-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}