{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T22:02:38Z","timestamp":1766268158671,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,9,1]],"date-time":"2023-09-01T00:00:00Z","timestamp":1693526400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CCF1730628"],"award-info":[{"award-number":["CCF1730628"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,9,10]]},"DOI":"10.1145\/3603269.3604848","type":"proceedings-article","created":{"date-parts":[[2023,9,1]],"date-time":"2023-09-01T16:16:29Z","timestamp":1693584989000},"page":"564-577","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Exoshuffle: An Extensible Shuffle Architecture"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8709-6823","authenticated-orcid":false,"given":"Frank Sifei","family":"Luan","sequence":"first","affiliation":[{"name":"UC Berkeley, Berkeley, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-8684-2357","authenticated-orcid":false,"given":"Stephanie","family":"Wang","sequence":"additional","affiliation":[{"name":"UC Berkeley, Berkeley, USA"},{"name":"Anyscale, San Francisco, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2461-7080","authenticated-orcid":false,"given":"Samyukta","family":"Yagati","sequence":"additional","affiliation":[{"name":"UC Berkeley, Berkeley, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0717-3666","authenticated-orcid":false,"given":"Sean","family":"Kim","sequence":"additional","affiliation":[{"name":"UC Berkeley, Berkeley, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3531-3660","authenticated-orcid":false,"given":"Kenneth","family":"Lien","sequence":"additional","affiliation":[{"name":"UC Berkeley, Berkeley, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3551-7394","authenticated-orcid":false,"given":"Isaac","family":"Ong","sequence":"additional","affiliation":[{"name":"UC Berkeley, Berkeley, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5336-1454","authenticated-orcid":false,"given":"Tony","family":"Hong","sequence":"additional","affiliation":[{"name":"UC Berkeley, Berkeley, United States"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-8695-5941","authenticated-orcid":false,"given":"Sangbin","family":"Cho","sequence":"additional","affiliation":[{"name":"Anyscale, San Francisco, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3760-6845","authenticated-orcid":false,"given":"Eric","family":"Liang","sequence":"additional","affiliation":[{"name":"Anyscale, San Francisco, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5373-0088","authenticated-orcid":false,"given":"Ion","family":"Stoica","sequence":"additional","affiliation":[{"name":"UC Berkeley, Berkeley, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2023,9]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Alibaba. 2021. EMR Remote Shuffle Service: A Powerful Elastic Tool of Serverless Spark - Alibaba Cloud Community. https:\/\/www.alibabacloud.com\/blog\/emr-remote-shuffle-service-a-powerful-elastic-tool-of-serverless-spark_597728. (Accessed on 02\/01\/2022)."},{"key":"e_1_3_2_1_2_1","unstructured":"Apache Software Foundation. 2021. Hadoop. https:\/\/hadoop.apache.org."},{"key":"e_1_3_2_1_3_1","unstructured":"Anubhav Awasthi Rajendra Gujja and Mohit Saxena. 2021. Introducing Amazon S3 shuffle in AWS Glue. https:\/\/aws.amazon.com\/blogs\/big-data\/introducing-amazon-s3-shuffle-in-aws-glue\/. (Accessed on 10\/16\/2022)."},{"key":"e_1_3_2_1_4_1","unstructured":"Mayank Bansal and Bo Yang. 2020. Zeus: Uber's Highly Scalable and Distributed Shuffle as a Service - Databricks. https:\/\/databricks.com\/session_na20\/zeus-ubers-highly-scalable-and-distributed-shuffle-as-a-service. (Accessed on 02\/01\/2022)."},{"key":"e_1_3_2_1_5_1","volume-title":"Cosco: An Efficient Facebook-Scale Shuffle Service - Databricks. https:\/\/databricks.com\/session\/cosco-an-efficient-facebook-scale-shuffle-service. (Accessed on 01\/19\/2022).","author":"Borovsky Dmitry","year":"2019","unstructured":"Dmitry Borovsky and Brian Cho. 2019. Cosco: An Efficient Facebook-Scale Shuffle Service - Databricks. https:\/\/databricks.com\/session\/cosco-an-efficient-facebook-scale-shuffle-service. (Accessed on 01\/19\/2022)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1051\/0004-6361\/201732493"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2021.108361"},{"key":"e_1_3_2_1_8_1","volume-title":"MapReduce Online. In Proceedings of the 7th USENIX Conference on Networked Systems Design and Implementation","author":"Condie Tyson","year":"2010","unstructured":"Tyson Condie, Neil Conway, Peter Alvaro, Joseph M. Hellerstein, Khaled Elmeleegy, and Russell Sears. 2010. MapReduce Online. In Proceedings of the 7th USENIX Conference on Networked Systems Design and Implementation (San Jose, California) (NSDI '10). USENIX Association, USA, 21."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357223.3362707"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/1327452.1327492"},{"key":"e_1_3_2_1_11_1","unstructured":"Polars Developers. 2022. Polars - User Guide. https:\/\/pola-rs.github.io\/polars-book\/user-guide\/index.html. (Accessed on 10\/16\/2022)."},{"key":"e_1_3_2_1_12_1","unstructured":"Spark developers. 2021. Spark Release 3.2.0. https:\/\/spark.apache.org\/releases\/spark-release-3-2-0.html. (Accessed on 01\/26\/2022)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/224056.224076"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2602204.2602219"},{"key":"e_1_3_2_1_15_1","unstructured":"Apache Software Foundation. 2022. Apache Arrow DataFusion Documentation. https:\/\/arrow.apache.org\/datafusion\/. (Accessed on 10\/16\/2022)."},{"key":"e_1_3_2_1_16_1","volume-title":"Introducing Petastorm: Uber ATG's Data Access Library for Deep Learning. https:\/\/eng.uber.com\/petastorm\/. (Accessed on 01\/19\/2022).","author":"Gruener Robbie","year":"2018","unstructured":"Robbie Gruener, Owen Cheng, and Yevgeni Litvin. 2018. Introducing Petastorm: Uber ATG's Data Access Library for Deep Learning. https:\/\/eng.uber.com\/petastorm\/. (Accessed on 01\/19\/2022)."},{"key":"e_1_3_2_1_17_1","volume-title":"10th International Conference on Autonomic Computing (ICAC 13)","author":"Guo Yanfei","year":"2013","unstructured":"Yanfei Guo, Jia Rao, and Xiaobo Zhou. 2013. iShuffle: Improving Hadoop Performance with Shuffle-on-Write. In 10th International Conference on Autonomic Computing (ICAC 13). USENIX Association, San Jose, CA, 107--117. https:\/\/www.usenix.org\/conference\/icac13\/technical-sessions\/presentation\/guo"},{"key":"e_1_3_2_1_18_1","unstructured":"Ajay Gupta. 2020. Revealing Apache Spark Shuffling Magic. https:\/\/medium.com\/swlh\/revealing-apache-spark-shuffling-magic-b2c304306142. (Accessed on 02\/01\/2022)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/253262.253291"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CloudCom.2013.42"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","unstructured":"Frank Sifei Luan Stephanie Wang Samyukta Yagati Sean Kim Kenneth Lien Isaac Ong Tony Hong SangBin Cho Eric Liang and Ion Stoica. 2023. Exoshuffle-CloudSort. 10.48550\/ARXIV.2301.03734","DOI":"10.48550\/ARXIV.2301.03734"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/1355734.1355746"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2019.01.037"},{"key":"e_1_3_2_1_24_1","volume-title":"Random Reshuffling: Simple Analysis with Vast Improvements. In Advances in Neural Information Processing Systems","author":"Mishchenko Konstantin","year":"2020","unstructured":"Konstantin Mishchenko, Ahmed Khaled, and Peter Richtarik. 2020. Random Reshuffling: Simple Analysis with Vast Improvements. In Advances in Neural Information Processing Systems, H. Larochelle, M. Ranzato, R. Hadsell, M. F. Balcan, and H. Lin (Eds.), Vol. 33. Curran Associates, Inc., Virtual, 17309--17320. https:\/\/proceedings.neurips.cc\/paper\/2020\/file\/c8cc6e90ccbf44c9cee23611711cdc4-Paper.pdf"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","unstructured":"Piero Molino Yaroslav Dudin and Sai Sumanth Miryala. 2019. Ludwig: a type-based declarative deep learning toolbox. 10.48550\/ARXIV.1909.07930","DOI":"10.48550\/ARXIV.1909.07930"},{"key":"e_1_3_2_1_26_1","volume-title":"Ray: A Distributed Framework for Emerging AI Applications. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Moritz Philipp","year":"2018","unstructured":"Philipp Moritz, Robert Nishihara, Stephanie Wang, Alexey Tumanov, Richard Liaw, Eric Liang, Melih Elibol, Zongheng Yang, William Paul, Michael I. Jordan, and Ion Stoica. 2018. Ray: A Distributed Framework for Emerging AI Applications. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). USENIX Association, Carlsbad, CA, 561--577. https:\/\/www.usenix.org\/conference\/osdi18\/presentation\/moritz"},{"volume-title":"A distributed execution engine supporting data-dependent control flow. Ph. D. Dissertation","author":"Murray Derek Gordon","key":"e_1_3_2_1_27_1","unstructured":"Derek Gordon Murray. 2012. A distributed execution engine supporting data-dependent control flow. Ph. D. Dissertation. University of Cambridge."},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the 8th USENIX Conference on Networked Systems Design and Implementation","author":"Murray Derek G.","year":"2011","unstructured":"Derek G. Murray, Malte Schwarzkopf, Christopher Smowton, Steven Smith, Anil Madhavapeddy, and Steven Hand. 2011. CIEL: A Universal Execution Engine for Distributed Data-Flow Computing. In Proceedings of the 8th USENIX Conference on Networked Systems Design and Implementation (Boston, MA) (NSDI'11). USENIX Association, USA, 113--126."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.14778\/3476311.3476374"},{"key":"e_1_3_2_1_30_1","volume-title":"Garnett (Eds.)","volume":"32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems, H. Wallach, H. Larochelle, A. Beygelzimer, F. d'Alch\u00e9-Buc, E. Fox, and R. Garnett (Eds.), Vol. 32. Curran Associates, Inc., Red Hook, NY, USA. https:\/\/proceedings.neurips.cc\/paper\/2019\/file\/bdbca288fee7f92f2bfa9f7012727740-Paper.pdf"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.14778\/3554821.3554829"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","unstructured":"Devin Petersohn Stephen Macke Doris Xin William Ma Doris Lee Xiangxi Mo Joseph E. Gonzalez Joseph M. Hellerstein Anthony D. Joseph and Aditya Parameswaran. 2020. Towards Scalable Dataframe Systems. 10.48550\/ARXIV.2001.00888","DOI":"10.48550\/ARXIV.2001.00888"},{"key":"e_1_3_2_1_33_1","volume-title":"Fast and Slow: Scalable Analytics on Serverless Infrastructure. In 16th USENIX Symposium on Networked Systems Design and Implementation (NSDI 19)","author":"Pu Qifan","year":"2019","unstructured":"Qifan Pu, Shivaram Venkataraman, and Ion Stoica. 2019. Shuffling, Fast and Slow: Scalable Analytics on Serverless Infrastructure. In 16th USENIX Symposium on Networked Systems Design and Implementation (NSDI 19). USENIX Association, Boston, MA, 193--206. https:\/\/www.usenix.org\/conference\/nsdi19\/presentation\/pu"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3299869.3320212"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/2391229.2391233"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/2391229.2391242"},{"key":"e_1_3_2_1_37_1","volume-title":"Proceedings of the 8th USENIX Conference on Networked Systems Design and Implementation","author":"Rasmussen Alexander","year":"2011","unstructured":"Alexander Rasmussen, George Porter, Michael Conley, Harsha V. Madhyastha, Radhika Niranjan Mysore, Alexander Pucher, and Amin Vahdat. 2011. TritonSort: A Balanced Large-Scale Sorting System. In Proceedings of the 8th USENIX Conference on Networked Systems Design and Implementation (Boston, MA) (NSDI'11). USENIX Association, USA, 29--42."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.25080\/Majora-7b98e3ed-013"},{"key":"e_1_3_2_1_39_1","unstructured":"Mehul A. Shah Amiato and Chris Nyberg. 2014. CloudSort: A TCO Sort Benchmark. http:\/\/sortbenchmark.org\/2014_06_CloudSort_v_0_4.pdf. (Accessed on 01\/24\/2022)."},{"key":"e_1_3_2_1_40_1","unstructured":"Min Shen. 2020. RPC implementation to support pushing and merging shuffle blocks. https:\/\/issues.apache.org\/jira\/browse\/SPARK-32915. (Accessed on 10\/16\/2022)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.14778\/3415478.3415558"},{"key":"e_1_3_2_1_42_1","unstructured":"Sergei Sokolenko. 2018. How Distributed Shuffle improves scalability and performance in Cloud Dataflow pipelines. https:\/\/cloud.google.com\/blog\/products\/data-analytics\/how-distributed-shuffle-improves-scalability-and-performance-cloud-dataflow-pipelines."},{"key":"e_1_3_2_1_43_1","unstructured":"PyTorch Team. 2022. torch.utils.data - PyTorch documentation. https:\/\/pytorch.org\/docs\/stable\/data.html. (Accessed on 10\/16\/2022)."},{"key":"e_1_3_2_1_44_1","volume-title":"Ray Datasets: Distributed Data Preprocessing. https:\/\/docs.ray.io\/en\/latest\/data\/dataset.html. (Accessed on 10\/16\/2022).","author":"Team Ray","year":"2022","unstructured":"Ray Team. 2022. Ray Datasets: Distributed Data Preprocessing. https:\/\/docs.ray.io\/en\/latest\/data\/dataset.html. (Accessed on 10\/16\/2022)."},{"key":"e_1_3_2_1_45_1","unstructured":"Qian Wang Rong Gu Yihua Huang Reynold Xin Wei Wu Jun Song and Junluan Xia. 2016. NADSort. http:\/\/sortbenchmark.org\/NADSort2016.pdf. (Accessed on 01\/26\/2022)."},{"key":"e_1_3_2_1_46_1","unstructured":"Stephanie Wang. 2021. Analyzing memory management and performance in Dask-on-Ray. https:\/\/medium.com\/distributed-computing-with-ray\/analyzing-memory-management-and-performance-in-dask-on-ray-930a2236b70d. (Accessed on 01\/26\/2022)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458336.3465302"},{"key":"e_1_3_2_1_48_1","volume-title":"Ownership: A Distributed Futures System for Fine-Grained Tasks. In 18th USENIX Symposium on Networked Systems Design and Implementation (NSDI 21)","author":"Wang Stephanie","year":"2021","unstructured":"Stephanie Wang, Eric Liang, Edward Oakes, Ben Hindman, Frank Sifei Luan, Audrey Cheng, and Ion Stoica. 2021. Ownership: A Distributed Futures System for Fine-Grained Tasks. In 18th USENIX Symposium on Networked Systems Design and Implementation (NSDI 21). USENIX Association, Virtual, 671--686. https:\/\/www.usenix.org\/conference\/nsdi21\/presentation\/cheng"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2013.13"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.25080\/Majora-92bf1922-00a"},{"key":"e_1_3_2_1_51_1","unstructured":"Reynold Xin. 2014. Apache Spark the Fastest Open Source Engine for Sorting a Petabyte. https:\/\/databricks.com\/blog\/2014\/10\/10\/spark-petabyte-sort.html. (Accessed on 01\/19\/2022)."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/41457.37507"},{"key":"e_1_3_2_1_53_1","volume-title":"Resilient Distributed Datasets: A Fault-Tolerant Abstraction for In-Memory Cluster Computing. In 9th USENIX Symposium on Networked Systems Design and Implementation (NSDI 12)","author":"Zaharia Matei","year":"2012","unstructured":"Matei Zaharia, Mosharaf Chowdhury, Tathagata Das, Ankur Dave, Justin Ma, Murphy McCauly, Michael J. Franklin, Scott Shenker, and Ion Stoica. 2012. Resilient Distributed Datasets: A Fault-Tolerant Abstraction for In-Memory Cluster Computing. In 9th USENIX Symposium on Networked Systems Design and Implementation (NSDI 12). USENIX Association, San Jose, CA, 15--28. https:\/\/www.usenix.org\/conference\/nsdi12\/technical-sessions\/presentation\/zaharia"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/2934664"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/2723372.2735381"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3190508.3190534"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3452296.3472897"}],"event":{"name":"ACM SIGCOMM '23: ACM SIGCOMM 2023 Conference","sponsor":["SIGCOMM ACM Special Interest Group on Data Communication"],"location":"New York NY USA","acronym":"ACM SIGCOMM '23"},"container-title":["Proceedings of the ACM SIGCOMM 2023 Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3603269.3604848","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3603269.3604848","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3603269.3604848","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:42Z","timestamp":1750178802000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3603269.3604848"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9]]},"references-count":57,"alternative-id":["10.1145\/3603269.3604848","10.1145\/3603269"],"URL":"https:\/\/doi.org\/10.1145\/3603269.3604848","relation":{},"subject":[],"published":{"date-parts":[[2023,9]]},"assertion":[{"value":"2023-09-01","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}