{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,22]],"date-time":"2026-05-22T04:07:48Z","timestamp":1779422868196,"version":"3.53.1"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,5,26]],"date-time":"2026-05-26T00:00:00Z","timestamp":1779753600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CCF-2450085"],"award-info":[{"award-number":["CCF-2450085"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CNS-2106184"],"award-info":[{"award-number":["CNS-2106184"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,5,26]]},"DOI":"10.1145\/3786335.3813209","type":"proceedings-article","created":{"date-parts":[[2026,5,22]],"date-time":"2026-05-22T03:16:22Z","timestamp":1779419782000},"page":"1316-1321","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Cornserve: A Distributed Serving System for Any-to-Any Multimodal Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5924-3427","authenticated-orcid":false,"given":"Jae-Won","family":"Chung","sequence":"first","affiliation":[{"name":"Computer Science and Engineering, University of Michigan, Ann Arbor, MI, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-9520-5218","authenticated-orcid":false,"given":"Jeff J.","family":"Ma","sequence":"additional","affiliation":[{"name":"Computer Science and Engineering, University of Michigan, Ann Arbor, MI, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-9679-7107","authenticated-orcid":false,"given":"Jisang","family":"Ahn","sequence":"additional","affiliation":[{"name":"Computer Science and Engineering, University of Michigan, Ann Arbor, MI, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0197-2433","authenticated-orcid":false,"given":"Yizhuo","family":"Liang","sequence":"additional","affiliation":[{"name":"Computer Science, USC, Los Angeles, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2788-2886","authenticated-orcid":false,"given":"Akshay","family":"Jajoo","sequence":"additional","affiliation":[{"name":"Cisco Research, San Jose, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2360-7019","authenticated-orcid":false,"given":"Myungjin","family":"Lee","sequence":"additional","affiliation":[{"name":"Cisco Research, Bellevue, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0884-6740","authenticated-orcid":false,"given":"Mosharaf","family":"Chowdhury","sequence":"additional","affiliation":[{"name":"Computer Science and Engineering, University of Michigan, Ann Arbor, MI, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,5,26]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"[n. d.]. The Unified Communication X Library. http:\/\/www.openucx.org."},{"key":"e_1_3_3_2_3_2","unstructured":"Shuai Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge Sibo Song Kai Dang Peng Wang Shijie Wang Jun Tang Humen Zhong Yuanzhi Zhu Mingkun Yang Zhaohai Li Jianqiang Wan Pengfei Wang Wei Ding Zheren Fu Yiheng Xu Jiabo Ye Xi Zhang Tianbao Xie Zesen Cheng Hang Zhang Zhibo Yang Haiyang Xu and Junyang Lin. 2025. Qwen2.5-VL Technical Report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.13923 (2025)."},{"key":"e_1_3_3_2_4_2","unstructured":"Xiaokang Chen Zhiyu Wu Xingchao Liu Zizheng Pan Wen Liu Zhenda Xie Xingkai Yu and Chong Ruan. 2025. Janus-Pro: Unified Multimodal Understanding and Generation with Data and Model Scaling. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.17811 (2025)."},{"key":"e_1_3_3_2_5_2","unstructured":"Jiarui Fang Jinzhe Pan Xibo Sun Aoyu Li and Jiannan Wang. 2024. xDiT: an Inference Engine for Diffusion Transformers (DiTs) with Massive Parallelism. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.01738 (2024)."},{"key":"e_1_3_3_2_6_2","unstructured":"Yoav HaCohen Benny Brazowski Nisan Chiprut Yaki Bitterman Andrew Kvochko Avishai Berkowitz Daniel Shalem Daphna Lifschitz Dudu Moshe Eitan Porat Eitan Richardson Guy Shiran Itay Chachy Jonathan Chetboun Michael Finkelson Michael Kupchick Nir Zabari Nitzan Guetta Noa Kotler Ofir Bibi Ori Gordon Poriya Panet Roi Benita Shahar Armon Victor Kulikov Yaron Inger Yonatan Shiftan Zeev Melumian and Zeev Farbman. 2026. LTX-2: Efficient Joint Audio-Visual Foundation Model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2601.03233 (2026)."},{"key":"e_1_3_3_2_7_2","unstructured":"Hugging Face. 2026. Any-to-Any Models on Hugging Face. https:\/\/huggingface.co\/models?pipeline_tag=any-to-any. Accessed: 2026-03-06."},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"e_1_3_3_2_9_2","unstructured":"Jeff\u00a0J. Ma Jae-Won Chung Jisang Ahn Yizhuo Liang Runyu Lu Akshay Jajoo Myungjin Lee and Mosharaf Chowdhury. 2025. Cornfigurator: Automated Planning for Any-to-Any Multimodal Model Serving. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2512.14098 (2025)."},{"key":"e_1_3_3_2_10_2","volume-title":"ISCA","author":"Patel Pratyush","year":"2024","unstructured":"Pratyush Patel, Esha Choukse, Chaojie Zhang, Aashaka Shah, \u00cd\u00f1igo Goiri, Saeed Maleki, and Ricardo Bianchini. 2024. Splitwise: Efficient Generative LLM Inference Using Phase Splitting. In ISCA."},{"key":"e_1_3_3_2_11_2","unstructured":"Qwen Team. 2025. Qwen-Image 2.0. https:\/\/qwen.ai\/blog?id=qwen-image-2.0."},{"key":"e_1_3_3_2_12_2","volume-title":"ICML","author":"Singh Gursimran","year":"2025","unstructured":"Gursimran Singh, Xinglu Wang, Yifan Hu, Timothy Tin\u00a0Long Yu, Linzi Xing, Wei Jiang, Zhefeng Wang, Bai Xiaolong, Yi Li, Ying Xiong, Yong Zhang, and Zhenan Fan. 2025. Efficiently Serving Large Multimodal Models Using EPD Disaggregation. In ICML."},{"key":"e_1_3_3_2_13_2","volume-title":"OSDI","author":"Sun Biao","year":"2024","unstructured":"Biao Sun, Ziming Huang, Hanyu Zhao, Wencong Xiao, Xinyi Zhang, Yong Li, and Wei Lin. 2024. Llumnix: Dynamic Scheduling for Large Language Model Serving. In OSDI."},{"key":"e_1_3_3_2_14_2","unstructured":"Gemma Team. 2025. Gemma 3 Technical Report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2503.19786 (2025)."},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"e_1_3_3_2_16_2","unstructured":"Chengyue Wu Xiaokang Chen Zhiyu Wu Yiyang Ma Xingchao Liu Zizheng Pan Wen Liu Zhenda Xie Xingkai Yu Chong Ruan et\u00a0al. 2024. Janus: Decoupling visual encoding for unified multimodal understanding and generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.13848 (2024)."},{"key":"e_1_3_3_2_17_2","unstructured":"Chenfei Wu Jiahao Li Jingren Zhou Junyang Lin Kaiyuan Gao Kun Yan Sheng ming Yin Shuai Bai Xiao Xu Yilei Chen Yuxiang Chen Zecheng Tang Zekai Zhang Zhengyi Wang An Yang Bowen Yu Chen Cheng Dayiheng Liu Deqing Li Hang Zhang Hao Meng Hu Wei Jingyuan Ni Kai Chen Kuan Cao Liang Peng Lin Qu Minggang Wu Peng Wang Shuting Yu Tingkun Wen Wensen Feng Xiaoxiao Xu Yi Wang Yichang Zhang Yongqiang Zhu Yujia Wu Yuxuan Cai and Zenan Liu. 2025. Qwen-Image Technical Report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2508.02324 (2025)."},{"key":"e_1_3_3_2_18_2","volume-title":"NSDI","author":"Xiang Yuxing","year":"2026","unstructured":"Yuxing Xiang, Xue Li, Kun Qian, Yan Zhang, Wenyuan Yu, Ennan Zhai, Xin Jin, and Jingren Zhou. 2026. ServeGen: Workload Characterization and Generation of Large Language Model Serving in Production. In NSDI."},{"key":"e_1_3_3_2_19_2","unstructured":"Jin Xu Zhifang Guo Jinzheng He Hangrui Hu Ting He Shuai Bai Keqin Chen Jialin Wang Yang Fan Kai Dang Bin Zhang Xiong Wang Yunfei Chu and Junyang Lin. 2025. Qwen2.5-Omni Technical Report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2503.20215 (2025)."},{"key":"e_1_3_3_2_20_2","unstructured":"Jin Xu Zhifang Guo Hangrui Hu Yunfei Chu Xiong Wang Jinzheng He Yuxuan Wang Xian Shi Ting He Xinfa Zhu Yuanjun Lv Yongqi Wang Dake Guo He Wang Linhan Ma Pei Zhang Xinyu Zhang Hongkun Hao Zishan Guo Baosong Yang Bin Zhang Ziyang Ma Xipin Wei Shuai Bai Keqin Chen Xuejing Liu Peng Wang Mingkun Yang Dayiheng Liu Xingzhang Ren Bo Zheng Rui Men Fan Zhou Bowen Yu Jianxin Yang Le Yu Jingren Zhou and Junyang Lin. 2025. Qwen3-Omni Technical Report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2509.17765 (2025)."},{"key":"e_1_3_3_2_21_2","volume-title":"OSDI","author":"Yu Gyeong-In","year":"2022","unstructured":"Gyeong-In Yu, Joo\u00a0Seong Jeong, Geon-Woo Kim, Soojeong Kim, and Byung-Gon Chun. 2022. Orca: A Distributed Serving System for Transformer-Based Generative Models. In OSDI."},{"key":"e_1_3_3_2_22_2","unstructured":"Z.AI Team. 2025. GLM-Image. https:\/\/z.ai\/blog\/glm-image."},{"key":"e_1_3_3_2_23_2","volume-title":"OSDI","author":"Zhong Yinmin","year":"2024","unstructured":"Yinmin Zhong, Shengyu Liu, Junda Chen, Jianbo Hu, Yibo Zhu, Xuanzhe Liu, Xin Jin, and Hao Zhang. 2024. DistServe: Disaggregating Prefill and Decoding for Goodput-optimized Large Language Model Serving. In OSDI."},{"key":"e_1_3_3_2_24_2","unstructured":"Jinguo Zhu Weiyun Wang Zhe Chen Zhaoyang Liu Shenglong Ye Lixin Gu Hao Tian Yuchen Duan Weijie Su Jie Shao Zhangwei Gao Erfei Cui Xuehui Wang Yue Cao Yangzhou Liu Xingguang Wei Hongjie Zhang Haomin Wang Weiye Xu Hao Li Jiahao Wang Nianchen Deng Songze Li Yinan He Tan Jiang Jiapeng Luo Yi Wang Conghui He Botian Shi Xingcheng Zhang Wenqi Shao Junjun He Yingtong Xiong Wenwen Qu Peng Sun Penglong Jiao Han Lv Lijun Wu Kaipeng Zhang Huipeng Deng Jiaye Ge Kai Chen Limin Wang Min Dou Lewei Lu Xizhou Zhu Tong Lu Dahua Lin Yu Qiao Jifeng Dai and Wenhai Wang. 2025. InternVL3: Exploring Advanced Training and Test-Time Recipes for Open-Source Multimodal Models. arXiv perprint arXiv:https:\/\/arXiv.org\/abs\/2504.10479 (2025)."}],"event":{"name":"CAIS '26: ACM Conference on AI and Agentic Systems","location":"San Jose CA USA","acronym":"CAIS '26"},"container-title":["Proceedings of the ACM Conference on AI and Agentic Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3786335.3813209","content-type":"text\/html","content-version":"vor","intended-application":"syndication"}],"deposited":{"date-parts":[[2026,5,22]],"date-time":"2026-05-22T03:27:33Z","timestamp":1779420453000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3786335.3813209"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,26]]},"references-count":23,"alternative-id":["10.1145\/3786335.3813209","10.1145\/3786335"],"URL":"https:\/\/doi.org\/10.1145\/3786335.3813209","relation":{},"subject":[],"published":{"date-parts":[[2026,5,26]]},"assertion":[{"value":"2026-05-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}