{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T16:10:07Z","timestamp":1755879007340,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"vor","delay-in-days":367,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CNS-1908566"],"award-info":[{"award-number":["CNS-1908566"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,30]]},"DOI":"10.1145\/3620678.3624643","type":"proceedings-article","created":{"date-parts":[[2023,10,31]],"date-time":"2023-10-31T13:58:07Z","timestamp":1698760687000},"page":"1-16","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Plexus"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7251-3740","authenticated-orcid":false,"given":"Joel","family":"Wolfrath","sequence":"first","affiliation":[{"name":"University of Minnesota, Minneapolis, MN, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9914-2604","authenticated-orcid":false,"given":"Abhishek","family":"Chandra","sequence":"additional","affiliation":[{"name":"University of Minnesota, Minneapolis, MN, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,10,31]]},"reference":[{"unstructured":"[n.d.]. IMDb Non-Commercial Datasets. https:\/\/developer.imdb.com\/non-commercial-datasets\/. Accessed: 2023-02-12.","key":"e_1_3_2_1_1_1"},{"unstructured":"[n.d.]. Intel HiBench -- Big Data Benchmark. https:\/\/github.com\/Intel-bigdata\/HiBench. Accessed: 2023-02-25.","key":"e_1_3_2_1_2_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_3_1","DOI":"10.14778\/3055540.3055545"},{"volume-title":"Mapping the Expansion of Google's Serving Infrastructure (IMC '13)","author":"Calder Matt","unstructured":"Matt Calder, Xun Fan, Zi Hu, Ethan Katz-Bassett, John Heidemann, and Ramesh Govindan. 2013. Mapping the Expansion of Google's Serving Infrastructure (IMC '13). Association for Computing Machinery, New York, NY, USA, 313--326.","key":"e_1_3_2_1_4_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_5_1","DOI":"10.1145\/304181.304206"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_6_1","DOI":"10.1016\/j.jalgor.2003.12.001"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_7_1","DOI":"10.1145\/304181.304208"},{"volume-title":"Proceedings of the Seventh ACM Symposium on Cloud Computing","author":"Heintz Benjamin","unstructured":"Benjamin Heintz, Abhishek Chandra, and Ramesh K. Sitaraman. 2016. Trading Timeliness and Accuracy in Geo-Distributed Streaming Analytics. In Proceedings of the Seventh ACM Symposium on Cloud Computing (Santa Clara, CA, USA) (SoCC '16). Association for Computing Machinery, New York, NY, USA, 361--373.","key":"e_1_3_2_1_8_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_9_1","DOI":"10.1109\/TCC.2014.2355225"},{"key":"e_1_3_2_1_10_1","first-page":"4","article-title":"Joins on Samples: A Theoretical Guide for Practitioners","volume":"13","author":"Huang Dawei","year":"2020","unstructured":"Dawei Huang, Dong Young Yoon, Seth Pettie, and Barzan Mozafari. 2020. Joins on Samples: A Theoretical Guide for Practitioners. Proc. VLDB Endow. 13, 4 (jan 2020), 547--560.","journal-title":"Proc. VLDB Endow."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_11_1","DOI":"10.14778\/3352063.3352132"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_12_1","DOI":"10.1145\/2806777.2806780"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_13_1","DOI":"10.1145\/3267809.3267842"},{"key":"e_1_3_2_1_14_1","volume-title":"2023 23rd IEEE International Symposium on Cluster, Cloud and Internet Computing (CCGrid).","author":"Kumar Dhruv","year":"2022","unstructured":"Dhruv Kumar, Sohaib Ahman, Abhishek Chandra, and Ramesh Sitaraman. 2022. AggFirstJoin: Optimizing Geo-Distributed Joins using Aggregation-Based Transformations. In 2023 23rd IEEE International Symposium on Cluster, Cloud and Internet Computing (CCGrid)."},{"volume-title":"Proceedings of the 5th International Workshop on Edge Systems, Analytics and Networking","author":"Kumar Dhruv","unstructured":"Dhruv Kumar, Joel Wolfrath, Abhishek Chandra, and Ramesh K. Sitaraman. 2022. Towards WAN-Aware Join Sampling over Geo-Distributed Data. In Proceedings of the 5th International Workshop on Edge Systems, Analytics and Networking (Rennes, France) (EdgeSys '22). Association for Computing Machinery, New York, NY, USA, 13--18.","key":"e_1_3_2_1_15_1"},{"key":"e_1_3_2_1_16_1","volume-title":"Cardinality Estimation Done Right: Index-Based Join Sampling. In Conference on Innovative Data Systems Research.","author":"Leis Viktor","year":"2017","unstructured":"Viktor Leis, Bernhard Radke, Andrey Gubichev, Alfons Kemper, and Thomas Neumann. 2017. Cardinality Estimation Done Right: Index-Based Join Sampling. In Conference on Innovative Data Systems Research."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_17_1","DOI":"10.1145\/2882903.2915235"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_18_1","DOI":"10.1287\/isre.2013.0480"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_19_1","DOI":"10.1145\/3127479.3132561"},{"volume-title":"Explaining Wide Area Data Transfer Performance (HPDC '17)","author":"Liu Zhengchun","unstructured":"Zhengchun Liu, Prasanna Balaprakash, Rajkumar Kettimuthu, and Ian Foster. 2017. Explaining Wide Area Data Transfer Performance (HPDC '17). Association for Computing Machinery, New York, NY, USA, 167--178.","key":"e_1_3_2_1_20_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_21_1","DOI":"10.14778\/3055540.3055551"},{"unstructured":"Frank Olken. 1993. Random Sampling from Databases. Ph.D. dissertation. University of California at Berkeley.","key":"e_1_3_2_1_22_1"},{"volume-title":"Proceedings of the 2014 ACM SIGMOD International Conference on Management of Data","author":"Polychroniou Orestis","unstructured":"Orestis Polychroniou, Rajkumar Sen, and Kenneth A. Ross. 2014. Track Join: Distributed Joins with Minimal Network Traffic. In Proceedings of the 2014 ACM SIGMOD International Conference on Management of Data (Snowbird, Utah, USA) (SIGMOD '14). Association for Computing Machinery, New York, NY, USA, 1483--1494.","key":"e_1_3_2_1_23_1"},{"volume-title":"Proceedings of the 2015 ACM Conference on Special Interest Group on Data Communication","author":"Qifan","unstructured":"Qifan Pu et al. 2015. Low Latency Geo-Distributed Data Analytics. In Proceedings of the 2015 ACM Conference on Special Interest Group on Data Communication (London, United Kingdom) (SIGCOMM '15). Association for Computing Machinery, New York, NY, USA, 421--434.","key":"e_1_3_2_1_24_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_25_1","DOI":"10.1145\/2829988.2787505"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_26_1","DOI":"10.1145\/3267809.3267834"},{"volume-title":"Optimizing Distributed Joins with Bloom Filters","author":"Ramesh Sukriti","unstructured":"Sukriti Ramesh, Odysseas Papapetrou, and Wolf Siberski. 2009. Optimizing Distributed Joins with Bloom Filters. In Distributed Computing and Internet Technology, Manish Parashar and Sanjeev K. Aggarwal (Eds.). Springer Berlin Heidelberg, Berlin, Heidelberg, 145--156.","key":"e_1_3_2_1_27_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_28_1","DOI":"10.1109\/ICDE.2016.7498324"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_29_1","DOI":"10.1145\/3448016.3457302"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_30_1","DOI":"10.1145\/3318464.3386134"},{"unstructured":"TPC-H Benchmark. Accessed: 2022-11-11. http:\/\/www.tpc.org\/tpch\/.","key":"e_1_3_2_1_31_1"},{"key":"e_1_3_2_1_32_1","volume-title":"CLARINET: WAN-Aware Optimization for Analytics Queries. In 12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16)","author":"Viswanathan Raajay","year":"2016","unstructured":"Raajay Viswanathan, Ganesh Ananthanarayanan, and Aditya Akella. 2016. CLARINET: WAN-Aware Optimization for Analytics Queries. In 12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16). USENIX Association, Savannah, GA, 435--450."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_33_1","DOI":"10.1145\/2723372.2735365"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_34_1","DOI":"10.1109\/IC2E55432.2022.00013"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_35_1","DOI":"10.1145\/2934664"},{"volume-title":"Proceedings of the 2018 Conference of the ACM Special Interest Group on Data Communication","author":"Zhang Ben","unstructured":"Ben Zhang, Xin Jin, Sylvia Ratnasamy, John Wawrzynek, and Edward A. Lee. 2018. AWStream: Adaptive Wide-Area Streaming Analytics. In Proceedings of the 2018 Conference of the ACM Special Interest Group on Data Communication (Budapest, Hungary) (SIGCOMM '18). Association for Computing Machinery, New York, NY, USA, 236--252.","key":"e_1_3_2_1_36_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_37_1","DOI":"10.1145\/3514221.3526160"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_38_1","DOI":"10.1145\/3183713.3183739"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_39_1","DOI":"10.1145\/3318464.3389717"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_40_1","DOI":"10.1145\/3542929.3563474"}],"event":{"sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGOPS ACM Special Interest Group on Operating Systems"],"acronym":"SoCC '23","name":"SoCC '23: ACM Symposium on Cloud Computing","location":"Santa Cruz CA USA"},"container-title":["Proceedings of the 2023 ACM Symposium on Cloud Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620678.3624643","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3620678.3624643","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3620678.3624643","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T15:55:14Z","timestamp":1755878114000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620678.3624643"}},"subtitle":["Optimizing Join Approximation for Geo-Distributed Data Analytics"],"short-title":[],"issued":{"date-parts":[[2023,10,30]]},"references-count":40,"alternative-id":["10.1145\/3620678.3624643","10.1145\/3620678"],"URL":"https:\/\/doi.org\/10.1145\/3620678.3624643","relation":{},"subject":[],"published":{"date-parts":[[2023,10,30]]},"assertion":[{"value":"2023-10-31","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}