{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T21:34:10Z","timestamp":1777066450501,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":66,"publisher":"ACM","funder":[{"name":"RGC CRF Grant","award":["C6015-23G"],"award-info":[{"award-number":["C6015-23G"]}]},{"name":"RGC GRF Grant","award":["16217124"],"award-info":[{"award-number":["16217124"]}]},{"name":"RGC GRF Grant","award":["16210822"],"award-info":[{"award-number":["16210822"]}]},{"name":"NSFC\/RGC CRS Grant","award":["CRS_HKUST601\/24"],"award-info":[{"award-number":["CRS_HKUST601\/24"]}]},{"name":"NSFC\/RGC CRS Grant","award":["CRS_PolyU501\/23"],"award-info":[{"award-number":["CRS_PolyU501\/23"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,27]]},"DOI":"10.1145\/3767295.3769379","type":"proceedings-article","created":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T20:20:04Z","timestamp":1777062004000},"page":"2109-2125","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["FlashPS: Efficient Generative Image Editing with Mask-aware Caching and Scheduling"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-9161-606X","authenticated-orcid":false,"given":"Xiaoxiao","family":"Jiang","sequence":"first","affiliation":[{"name":"Hong Kong University of Science and Technology, Hong Kong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3921-9037","authenticated-orcid":false,"given":"Suyi","family":"Li","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology, Hong Kong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3186-3189","authenticated-orcid":false,"given":"Lingyun","family":"Yang","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology, Hong Kong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4830-5482","authenticated-orcid":false,"given":"Tianyu","family":"Feng","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology, Hong Kong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5858-2471","authenticated-orcid":false,"given":"Zhipeng","family":"Di","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1834-4077","authenticated-orcid":false,"given":"Weiyi","family":"Lu","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-9829-2627","authenticated-orcid":false,"given":"Guoxuan","family":"Zhu","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-3649-6498","authenticated-orcid":false,"given":"Xiu","family":"Lin","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0477-4814","authenticated-orcid":false,"given":"Kan","family":"Liu","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2744-845X","authenticated-orcid":false,"given":"Yinghao","family":"Yu","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-4363-8888","authenticated-orcid":false,"given":"Tao","family":"Lan","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1908-071X","authenticated-orcid":false,"given":"Guodong","family":"Yang","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2028-0780","authenticated-orcid":false,"given":"Lin","family":"Qu","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2334-3471","authenticated-orcid":false,"given":"Liping","family":"Zhang","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4585-4152","authenticated-orcid":false,"given":"Wei","family":"Wang","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology, Hong Kong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,4,26]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2025. Amazon EC2 P4 Instances. https:\/\/aws.amazon.com\/ec2\/instance-types\/p4\/."},{"key":"e_1_3_2_1_2_1","unstructured":"2025. Amazon EC2 P5 Instances. https:\/\/aws.amazon.com\/ec2\/instance-types\/p5\/."},{"key":"e_1_3_2_1_3_1","unstructured":"Adobe. 2025. Adobe Free Online Photo Editor. https:\/\/www.adobe.com\/products\/photoshop\/ai-photo-editor.html."},{"key":"e_1_3_2_1_4_1","unstructured":"Adobe. 2025. Adobe Free Online Photo Editor. https:\/\/www.adobe.com\/express\/feature\/image\/editor."},{"key":"e_1_3_2_1_5_1","unstructured":"Adobe. 2025. Next-level Generative Fill. Now in Photoshop. https:\/\/www.adobe.com\/products\/photoshop\/generative-fill.html."},{"key":"e_1_3_2_1_6_1","volume-title":"Proc. USENIX NSDI.","author":"Agarwal Shubham","year":"2024","unstructured":"Shubham Agarwal, Subrata Mitra, Sarthak Chakraborty, Srikrishna Karanam, Koyel Mukherjee, and Shiv Kumar Saini. 2024. Approximate Caching for Efficiently Serving Text-to-Image Diffusion Models. In Proc. USENIX NSDI."},{"key":"e_1_3_2_1_7_1","volume-title":"Proc. OSDI.","author":"Agrawal Amey","unstructured":"Amey Agrawal, Nitin Kedia, Ashish Panwar, Jayashree Mohan, Nipun Kwatra, Bhargav Gulavani, Alexey Tumanov, and Ramachandran Ramjee. [n. d.]. Taming Throughput-Latency Tradeoff in LLM Inference with Sarathi-Serve. In Proc. OSDI."},{"key":"e_1_3_2_1_8_1","volume-title":"Proc. ACM ASPLOS.","author":"Ahmad Sohaib","year":"2024","unstructured":"Sohaib Ahmad, Hui Guan, Brian D. Friedman, Thomas Williams, Ramesh K. Sitaraman, and Thomas Woo. 2024. Proteus: A high-throughput inference-serving system with accuracy scaling. In Proc. ACM ASPLOS."},{"key":"e_1_3_2_1_9_1","first-page":"50","volume-title":"How continuous batching enables 23x throughput in LLM inference while reducing","unstructured":"Anyscale. 2025. How continuous batching enables 23x throughput in LLM inference while reducing p50 latency. https:\/\/www.anyscale.com\/blog\/continuous-batching-llm-inference."},{"key":"e_1_3_2_1_10_1","volume-title":"Adetailer: Automatically fix faces and hands. https:\/\/stable-diffusion-art.com\/adetailer\/.","author":"Art Stable Diffusion","year":"2023","unstructured":"Stable Diffusion Art. 2023. Adetailer: Automatically fix faces and hands. https:\/\/stable-diffusion-art.com\/adetailer\/."},{"key":"e_1_3_2_1_11_1","volume-title":"Proc. NIPS Deep Learning Symposium.","author":"Ba Jimmy Lei","year":"2016","unstructured":"Jimmy Lei Ba, Jamie Ryan Kiros, and Geoffrey E Hinton. 2016. Layer normalization. In Proc. NIPS Deep Learning Symposium."},{"key":"e_1_3_2_1_12_1","unstructured":"Bing-su. 2025. adetailer. https:\/\/github.com\/Bing-su\/adetailer."},{"key":"e_1_3_2_1_13_1","unstructured":"Tim Brooks Aleksander Holynski and Alexei A Efros. [n. d.]. Instruct-pix2pix: Learning to follow image editing instructions. In CVPR."},{"key":"e_1_3_2_1_14_1","volume-title":"Proc. MLSys.","author":"Chen Lequn","year":"2024","unstructured":"Lequn Chen, Zihao Ye, Yongji Wu, Danyang Zhuo, Luis Ceze, and Arvind Krishnamurthy. 2024. Punica: Multi-tenant LoRA serving. In Proc. MLSys."},{"key":"e_1_3_2_1_15_1","volume-title":"Proc. CVPR.","author":"Choi Seunghwan","year":"2021","unstructured":"Seunghwan Choi, Sunghyun Park, Minsoo Lee, and Jaegul Choo. 2021. VITON-HD: High-Resolution Virtual Try-On via Misalignment-Aware Normalization. In Proc. CVPR."},{"key":"e_1_3_2_1_16_1","volume-title":"Proc. ICLR.","author":"Couairon Guillaume","year":"2023","unstructured":"Guillaume Couairon, Jakob Verbeek, Holger Schwenk, and Matthieu Cord. 2023. DiffEdit: Diffusion-based semantic image editing with mask guidance. In Proc. ICLR."},{"key":"e_1_3_2_1_17_1","volume-title":"Proc. USENIX NSDI.","author":"Crankshaw Daniel","year":"2017","unstructured":"Daniel Crankshaw, Xin Wang, Guilio Zhou, Michael J. Franklin, Joseph E. Gonzalez, and Ion Stoica. 2017. Clipper: A low-latency online prediction serving system. In Proc. USENIX NSDI."},{"key":"e_1_3_2_1_18_1","volume-title":"Proc. ICLR.","author":"Dao Tri","year":"2024","unstructured":"Tri Dao. 2024. FlashAttention-2: Faster Attention with Better Parallelism and Work Partitioning. In Proc. ICLR."},{"key":"e_1_3_2_1_19_1","unstructured":"HuggingFace Diffusers. 2025. Create a server. https:\/\/github.com\/huggingface\/diffusers\/blob\/main\/docs\/source\/en\/using-diffusers\/create_a_server.md."},{"key":"e_1_3_2_1_20_1","volume-title":"Proc. ICML.","author":"Esser Patrick","year":"2024","unstructured":"Patrick Esser, Sumith Kulal, Andreas Blattmann, Rahim Entezari, Jonas M\u00fcller, Harry Saini, Yam Levi, Dominik Lorenz, Axel Sauer, Frederic Boesel, Dustin Podell, Tim Dockhorn, Zion English, and Robin Rombach. 2024. Scaling Rectified Flow Transformers for High-Resolution Image Synthesis. In Proc. ICML."},{"key":"e_1_3_2_1_21_1","unstructured":"FastAPI. 2025. FastAPI. https:\/\/github.com\/fastapi\/fastapi."},{"key":"e_1_3_2_1_22_1","volume-title":"Proc. ATC.","author":"Gao Bin","year":"2024","unstructured":"Bin Gao, Zhuomin He, Puru Sharma, Qingxuan Kang, Djordje Jevdjic, Junbo Deng, Xingkun Yang, Zhou Yu, and Pengfei Zuo. 2024. Cost-Efficient Large Language Model Serving for Multi-turn Conversations with CachedAttention. In Proc. ATC."},{"key":"e_1_3_2_1_23_1","volume-title":"Proc. USENIX OSDI.","author":"Gujarati Arpan","year":"2020","unstructured":"Arpan Gujarati, Reza Karimi, Safya Alzayat, Wei Hao, Antoine Kaufmann, Ymir Vigfusson, and Jonathan Mace. 2020. Serving DNNs like Clockwork: Performance predictability from the bottom up. In Proc. USENIX OSDI."},{"key":"e_1_3_2_1_24_1","volume-title":"Prashanth Thinakaran, Bikash Sharma, Mahmut Taylan Kandemir, and Chita R. Das.","author":"Gunasekaran Jashwant Raj","year":"2022","unstructured":"Jashwant Raj Gunasekaran, Cyan Subhra Mishra, Prashanth Thinakaran, Bikash Sharma, Mahmut Taylan Kandemir, and Chita R. Das. 2022. Cocktail: A multidimensional optimization for model serving in cloud. In Proc. USENIX NSDI."},{"key":"e_1_3_2_1_25_1","volume-title":"Proc. EMNLP, Marie-Francine Moens, Xuanjing Huang, Lucia Specia, and Scott Wen-tau Yih (Eds.).","author":"Hessel Jack","year":"2021","unstructured":"Jack Hessel, Ari Holtzman, Maxwell Forbes, Ronan Le Bras, and Yejin Choi. 2021. CLIPScore: A Reference-free Evaluation Metric for Image Captioning. In Proc. EMNLP, Marie-Francine Moens, Xuanjing Huang, Lucia Specia, and Scott Wen-tau Yih (Eds.)."},{"key":"e_1_3_2_1_26_1","volume-title":"Proc. NIPS.","author":"Heusel Martin","year":"2017","unstructured":"Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, and Sepp Hochreiter. 2017. GANs trained by a two time-scale update rule converge to a local Nash equilibrium. In Proc. NIPS."},{"key":"e_1_3_2_1_27_1","volume-title":"Proc. ICLR.","author":"Hu Edward J","year":"2022","unstructured":"Edward J Hu, yelong shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2022. LoRA: Low-Rank Adaptation of Large Language Models. In Proc. ICLR."},{"key":"e_1_3_2_1_28_1","unstructured":"HuggingFace. 2025. Accelerate inference of text-to-image diffusion models. https:\/\/huggingface.co\/docs\/diffusers\/en\/tutorials\/fast_diffusion."},{"key":"e_1_3_2_1_29_1","unstructured":"HuggingFace. 2025. Inpainting. https:\/\/huggingface.co\/docs\/diffusers\/en\/using-diffusers\/inpaint."},{"key":"e_1_3_2_1_30_1","unstructured":"Asynchronous I\/O. 2025. Asynchronous I\/O. https:\/\/docs.python.org\/3\/library\/asyncio.html."},{"key":"e_1_3_2_1_31_1","unstructured":"Xuan Ju Ailing Zeng Yuxuan Bian Shaoteng Liu and Qiang Xu. 2024. PnP Inversion: Boosting Diffusion-based Editing with 3 Lines of Code. In ICLR."},{"key":"e_1_3_2_1_32_1","volume-title":"Proc. CVPR.","author":"Kim Jeongho","year":"2024","unstructured":"Jeongho Kim, Guojung Gu, Minho Park, Sunghyun Park, and Jaegul Choo. 2024. Stableviton: Learning semantic correspondence with latent diffusion model for virtual try-on. In Proc. CVPR."},{"key":"e_1_3_2_1_33_1","volume-title":"Proc. SOSP.","author":"Kwon Woosuk","year":"2023","unstructured":"Woosuk Kwon, Zhuohan Li, Siyuan Zhuang, Ying Sheng, Lianmin Zheng, Cody Hao Yu, Joseph Gonzalez, Hao Zhang, and Ion Stoica. 2023. Efficient Memory Management for Large Language Model Serving with Paged Attention. In Proc. SOSP."},{"key":"e_1_3_2_1_34_1","unstructured":"Black Forest Labs. 2024. FLUX. https:\/\/github.com\/black-forest-labs\/flux."},{"key":"e_1_3_2_1_35_1","volume-title":"Proc. IEEE\/CVF CVPR.","author":"Li Muyang","year":"2024","unstructured":"Muyang Li, Tianle Cai, Jiaxin Cao, Qinsheng Zhang, Han Cai, Junjie Bai, Yangqing Jia, Ming-Yu Liu, Kai Li, and Song Han. 2024. DistriFusion: Distributed parallel inference for high-resolution diffusion models. In Proc. IEEE\/CVF CVPR."},{"key":"e_1_3_2_1_36_1","volume-title":"Proc. (NeurIPS).","author":"Li Muyang","year":"2022","unstructured":"Muyang Li, Ji Lin, Chenlin Meng, Stefano Ermon, Song Han, and Jun-Yan Zhu. 2022. Efficient Spatially Sparse Inference for Conditional GANs and Diffusion Models. In Proc. (NeurIPS)."},{"key":"e_1_3_2_1_37_1","volume-title":"Proc. USENIX ATC.","author":"Li Suyi","year":"2025","unstructured":"Suyi Li, Hanfeng Lu, Tianyuan Wu, Minchen Yu, Qizhen Weng, Xusheng Chen, Yizhou Shan, Binhang Yuan, and Wei Wang. 2025. Toppings: CPU-Assisted, Rank-Aware Adapter Serving for LLM Inference. In Proc. USENIX ATC."},{"key":"e_1_3_2_1_38_1","volume-title":"Proc. USENIX ATC.","author":"Li Suyi","year":"2025","unstructured":"Suyi Li, Lingyun Yang, Xiaoxiao Jiang, Hanfeng Lu, Zhipeng Di, Weiyi Lu, Jiawei Chen, Kan Liu, Yinghao Yu, Tao Lan, Guodong Yang, Lin Qu, Liping Zhang, and Wei Wang. 2025. Katz: Efficient Workflow Serving for Diffusion Models with Many Adapters. In Proc. USENIX ATC."},{"key":"e_1_3_2_1_39_1","volume-title":"Proc. OSDI.","author":"Lin Chaofan","year":"2024","unstructured":"Chaofan Lin, Zhenhua Han, Chengruidong Zhang, Yuqing Yang, Fan Yang, Chen Chen, and Lili Qiu. 2024. Parrot: Efficient Serving of LLM-based Applications with Semantic Variable. In Proc. OSDI."},{"key":"e_1_3_2_1_40_1","volume-title":"Proc. IEEE\/CVF CVPR.","author":"Liu Feng","year":"2025","unstructured":"Feng Liu, Shiwei Zhang, Xiaofeng Wang, Yujie Wei, Haonan Qiu, Yuzhong Zhao, Yingya Zhang, Qixiang Ye, and Fang Wan. 2025. Timestep Embedding Tells: It's Time to Cache for Video Diffusion Model. In Proc. IEEE\/CVF CVPR."},{"key":"e_1_3_2_1_41_1","volume-title":"Proc. ICLR.","author":"Meng Chenlin","year":"2022","unstructured":"Chenlin Meng, Yutong He, Yang Song, Jiaming Song, Jiajun Wu, Jun-Yan Zhu, and Stefano Ermon. 2022. SDEdit: Guided Image Synthesis and Editing with Stochastic Differential Equations. In Proc. ICLR."},{"key":"e_1_3_2_1_42_1","unstructured":"Midjourney. 2025. Editor - Midjourney. https:\/\/docs.midjourney.com\/hc\/en-us\/articles\/32764383466893-Editor."},{"key":"e_1_3_2_1_43_1","volume-title":"Proc. ECCV.","author":"Nitzan Yotam","year":"2024","unstructured":"Yotam Nitzan, Zongze Wu, Richard Zhang, Eli Shechtman, Daniel Cohen-Or, Taesung Park, and Micha\u00ebl Gharbi. 2024. Lazy Diffusion Transformer for Interactive Image Editing. In Proc. ECCV."},{"key":"e_1_3_2_1_44_1","unstructured":"OpenAI. 2025. OpenAI CLIP. https:\/\/huggingface.co\/openai\/clip-vit-base-patch16."},{"key":"e_1_3_2_1_45_1","volume-title":"Proc. ICLR.","author":"Podell Dustin","year":"2024","unstructured":"Dustin Podell, Zion English, Kyle Lacey, Andreas Blattmann, Tim Dockhorn, Jonas M\u00fcller, Joe Penna, and Robin Rombach. 2024. SDXL: Improving Latent Diffusion Models for High-Resolution Image Synthesis. In Proc. ICLR."},{"key":"e_1_3_2_1_46_1","volume-title":"Proc. FAST.","author":"Qin Ruoyu","year":"2025","unstructured":"Ruoyu Qin, Zheming Li, Weiran He, Jialei Cui, Feng Ren, Mingxing Zhang, Yongwei Wu, Weimin Zheng, and Xinran Xu. 2025. Mooncake: Trading More Storage for Less Computation \u2014 A KVCache-centric Architecture for Serving LLM Chatbot. In Proc. FAST."},{"key":"e_1_3_2_1_47_1","volume-title":"Proc. ICML.","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In Proc. ICML."},{"key":"e_1_3_2_1_48_1","volume-title":"Proc. IEEE\/CVF CVPR.","author":"Rombach Robin","year":"2022","unstructured":"Robin Rombach, Andreas Blattmann, Dominik Lorenz, Patrick Esser, and Bj\u00f6rn Ommer. 2022. High-resolution image synthesis with latent diffusion models. In Proc. IEEE\/CVF CVPR."},{"key":"e_1_3_2_1_49_1","volume-title":"Proc. MLSys.","author":"Sheng Ying","year":"2023","unstructured":"Ying Sheng, Shiyi Cao, Dacheng Li, Coleman Hooper, Nicholas Lee, Shuo Yang, Christopher Chou, Banghua Zhu, Lianmin Zheng, Kurt Keutzer, Joseph E. Gonzalez, and Ion Stoica. 2023. S-LoRA: Serving thousands of concurrent LoRA adapters. In Proc. MLSys."},{"key":"e_1_3_2_1_50_1","volume-title":"Proc. OSDI.","author":"Sheng Ying","year":"2024","unstructured":"Ying Sheng, Shiyi Cao, Dacheng Li, Banghua Zhu, Zhuohan Li, Danyang Zhuo, Joseph E. Gonzalez, and Ion Stoica. 2024. Fairness in Serving Large Language Models. In Proc. OSDI."},{"key":"e_1_3_2_1_51_1","volume-title":"Proc. NIPS.","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Proc. NIPS."},{"key":"e_1_3_2_1_52_1","volume-title":"Diffusers: State-of-the-art diffusion models. https:\/\/github.com\/huggingface\/diffusers.","author":"von Platen Patrick","year":"2022","unstructured":"Patrick von Platen, Suraj Patil, Anton Lozhkov, Pedro Cuenca, Nathan Lambert, Kashif Rasul, Mishig Davaadorj, Dhruv Nair, Sayak Paul, William Berman, Yiyi Xu, Steven Liu, and Thomas Wolf. 2022. Diffusers: State-of-the-art diffusion models. https:\/\/github.com\/huggingface\/diffusers."},{"key":"e_1_3_2_1_53_1","volume-title":"Proc. ACM SoCC.","author":"Wang Luping","year":"2021","unstructured":"Luping Wang, Lingyun Yang, Yinghao Yu, Wei Wang, Bo Li, Xianchao Sun, Jian He, and Liping Zhang. 2021. Morphling: Fast, near-optimal auto-configuration for cloud-native model serving. In Proc. ACM SoCC."},{"key":"e_1_3_2_1_54_1","unstructured":"Qixun Wang Xu Bai Haofan Wang Zekui Qin Anthony Chen Huaxia Li Xu Tang and Yao Hu. 2024. InstantID: Zero-shot identity-preserving generation in seconds. arXiv preprint arXiv:2401.07519 (2024)."},{"key":"e_1_3_2_1_55_1","volume-title":"Proc. ACM EuroSys.","author":"Wang Yiding","year":"2023","unstructured":"Yiding Wang, Kai Chen, Haisheng Tan, and Kun Guo. 2023. Tabi: An efficient multi-level inference system for large language models. In Proc. ACM EuroSys."},{"key":"e_1_3_2_1_56_1","volume-title":"Image quality assessment: From error visibility to structural similarity","author":"Wang Zhou","year":"2004","unstructured":"Zhou Wang, Alan C Bovik, Hamid R Sheikh, and Eero P Simoncelli. 2004. Image quality assessment: From error visibility to structural similarity. IEEE Trans. Image Process. (2004)."},{"key":"e_1_3_2_1_57_1","volume-title":"Proc. USENIX OSDI.","author":"Wu Bingyang","year":"2024","unstructured":"Bingyang Wu, Ruidong Zhu, Zili Zhang, Peng Sun, Xuanzhe Liu, and Xin Jin. 2024. dLoRA: Dynamically Orchestrating Requests and Adapters for LoRA LLM Serving. In Proc. USENIX OSDI."},{"key":"e_1_3_2_1_58_1","volume-title":"Proc. AAAI","author":"Xu Yuhao","year":"2025","unstructured":"Yuhao Xu, Tao Gu, Weifeng Chen, and Arlene Chen. 2025. OOTDiffusion: Outfitting Fusion Based Latent Diffusion for Controllable Virtual Try-On. Proc. AAAI (2025)."},{"key":"e_1_3_2_1_59_1","volume-title":"Proc. USENIX OSDI.","author":"Yu Gyeong-In","year":"2022","unstructured":"Gyeong-In Yu, Joo Seong Jeong, Geon-Woo Kim, Soojeong Kim, and Byung-Gon Chun. 2022. Orca: A distributed serving system for transformer-based generative models. In Proc. USENIX OSDI."},{"key":"e_1_3_2_1_60_1","volume-title":"Proc. of AAAI.","author":"Yu Zihao","year":"2024","unstructured":"Zihao Yu, Haoyang Li, Fangcheng Fu, Xupeng Miao, and Bin Cui. 2024. Accelerating text-to-image editing via cache-enabled sparse diffusion inference. In Proc. of AAAI."},{"key":"e_1_3_2_1_61_1","unstructured":"ZeroMQ. 2025. ZeroMQ. https:\/\/github.com\/zeromq\/pyzmq."},{"key":"e_1_3_2_1_62_1","volume-title":"Proc. USENIX ATC.","author":"Zhang Chengliang","year":"2019","unstructured":"Chengliang Zhang, Minchen Yu, Wei Wang, and Feng Yan. 2019. MArk: Exploiting cloud services for cost-effective, SLO-aware machine learning inference serving. In Proc. USENIX ATC."},{"key":"e_1_3_2_1_63_1","volume-title":"Proc. USENIX NSDI.","author":"Zhang Hong","year":"2023","unstructured":"Hong Zhang, Yupeng Tang, Anurag Khandelwal, and Ion Stoica. 2023. Shepherd: Serving DNNs in the wild. In Proc. USENIX NSDI."},{"key":"e_1_3_2_1_64_1","volume-title":"Proc. IEEE\/CVF ICCV.","author":"Zhang Lvmin","year":"2023","unstructured":"Lvmin Zhang, Anyi Rao, and Maneesh Agrawala. 2023. Adding Conditional Control to Text-to-Image Diffusion Models. In Proc. IEEE\/CVF ICCV."},{"key":"e_1_3_2_1_65_1","volume-title":"Proc. NeurIPS Datasets and Benchmarks Track.","author":"Zheng Lianmin","year":"2023","unstructured":"Lianmin Zheng, Wei-Lin Chiang, Ying Sheng, et al. 2023. Judging LLM-as-a-judge with MT-Bench and Chatbot Arena. In Proc. NeurIPS Datasets and Benchmarks Track."},{"key":"e_1_3_2_1_66_1","volume-title":"Proc. NIPS.","author":"Zheng Lianmin","year":"2024","unstructured":"Lianmin Zheng, Liangsheng Yin, Zhiqiang Xie, Chuyue Sun, Jeff Huang, Cody Hao Yu, Shiyi Cao, Christos Kozyrakis, Ion Stoica, Joseph E. Gonzalez, Clark Barrett, and Ying Sheng. 2024. SGLang: Efficient Execution of Structured Language Model Programs. In Proc. NIPS."}],"event":{"name":"EUROSYS '26: 21st European Conference on Computer Systems","location":"McEwan Hall\/The University of Edinburgh Edinburgh Scotland UK","acronym":"EUROSYS '26","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the 21st European Conference on Computer Systems"],"original-title":[],"deposited":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T20:35:43Z","timestamp":1777062943000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3767295.3769379"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,26]]},"references-count":66,"alternative-id":["10.1145\/3767295.3769379","10.1145\/3767295"],"URL":"https:\/\/doi.org\/10.1145\/3767295.3769379","relation":{},"subject":[],"published":{"date-parts":[[2026,4,26]]},"assertion":[{"value":"2026-04-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}