{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:30:19Z","timestamp":1750221019028,"version":"3.41.0"},"publisher-location":"New York, New York, USA","reference-count":19,"publisher":"ACM Press","license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1145\/3335484.3335495","type":"proceedings-article","created":{"date-parts":[[2019,7,8]],"date-time":"2019-07-08T16:54:49Z","timestamp":1562604889000},"page":"17-23","source":"Crossref","is-referenced-by-count":0,"title":["Intermediate Data Placement Strategy for Different Data Skew Levels Based on Random Sampling in Spark"],"prefix":"10.1145","author":[{"given":"Xueqian","family":"Gong","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, Wuhan University of Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chunlin","family":"Li","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Wuhan University of Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Youlong","family":"Luo","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Wuhan University of Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","reference":[{"key":"key-10.1145\/3335484.3335495-1","unstructured":"Apache Spark. http:\/\/spark.apache.org\/."},{"key":"key-10.1145\/3335484.3335495-2","doi-asserted-by":"crossref","unstructured":"Gufler B, Augsten N, Reiser A, et al. Load Balancing in MapReduce Based on Scalable Cardinality Estimates. IEEE International Conference on Data Engineering, 2012: 522--533.","DOI":"10.1109\/ICDE.2012.58"},{"key":"key-10.1145\/3335484.3335495-3","doi-asserted-by":"crossref","unstructured":"Xu Y, Qu W, Li Z, et al. Balancing reducer workload for skewed data using sampling-based partitioning. Computers & Electrical Engineering, 2014, 40(2): 675--687.","DOI":"10.1016\/j.compeleceng.2013.07.001"},{"key":"key-10.1145\/3335484.3335495-4","doi-asserted-by":"crossref","unstructured":"Tang Z, Zhang X. An intermediate data placement algorithm for load balancing in Spark computing environment. Future Generation Computer Systems, 2018, 78: 287--301.","DOI":"10.1016\/j.future.2016.06.027"},{"key":"key-10.1145\/3335484.3335495-5","doi-asserted-by":"crossref","unstructured":"S.R. Ramakrishnan, G. Swart. Balancing reducer skew in mapreduce workloads using progressive sampling. ACM Symposium on Cloud Computing, 2012, pp. 1--14.","DOI":"10.1145\/2391229.2391245"},{"key":"key-10.1145\/3335484.3335495-6","doi-asserted-by":"crossref","unstructured":"Kwon Y C, Balazinska M, Howe B, et al. Skew-resistant parallel processing of feature-extracting scientific user-defined functions. ACM Symposium on Cloud Computing, 2010: 75--86.","DOI":"10.1145\/1807128.1807140"},{"key":"key-10.1145\/3335484.3335495-7","doi-asserted-by":"crossref","unstructured":"Kwon Y C, Balazinska M, Howe B, et al. SkewTune in action: mitigating skew in MapReduce applications. Proceedings of the Vldb Endowment, 2012, 5(12): 1934--1937.","DOI":"10.14778\/2367502.2367541"},{"key":"key-10.1145\/3335484.3335495-8","doi-asserted-by":"crossref","unstructured":"Karapiperis D, Verykios V S. Load-Balancing the Distance Computations in Record Linkage. ACM Sigkdd Explorations Newsletter, 2015, 17(1): 1--7.","DOI":"10.1145\/2830544.2830546"},{"key":"key-10.1145\/3335484.3335495-9","unstructured":"Lan V, Alaghband G. A load balancing parallel method for frequent pattern mining on multi-core cluster. Symposium on High PERFORMANCE Computing. Society for Computer Simulation International, 2015: 49--58."},{"key":"key-10.1145\/3335484.3335495-10","doi-asserted-by":"crossref","unstructured":"Liu G, Zhu X, Wang J, et al. SP-Partitioner: A novel partition method to handle intermediate data skew in spark streaming. Future Generation Computer Systems, 2018,86: 1054--1063.","DOI":"10.1016\/j.future.2017.07.014"},{"key":"key-10.1145\/3335484.3335495-11","doi-asserted-by":"crossref","unstructured":"Isard M, Budiu M, Yu Y, et al. Dryad:distributed data-parallel programs from sequential building blocks. ACM Sigops\/eurosys European Conference on Computer Systems, 2007: 59--72.","DOI":"10.1145\/1272998.1273005"},{"key":"key-10.1145\/3335484.3335495-12","doi-asserted-by":"crossref","unstructured":"Ibrahim S, Jin H, Lu L, et al. LEEN: Locality\/Fairness-Aware Key Partitioning for MapReduce in the Cloud. IEEE Second International Conference on Cloud Computing Technology and Science, 2011: 17--24.","DOI":"10.1109\/CloudCom.2010.25"},{"key":"key-10.1145\/3335484.3335495-13","doi-asserted-by":"crossref","unstructured":"M. Hammoud, M.S. Rehman, M.F. Sakr, Center-of-gravity reduce task scheduling to lower mapreduce network traffic. IEEE International Conference on Cloud Computing, 2012, pp. 49--58.","DOI":"10.1109\/CLOUD.2012.92"},{"key":"key-10.1145\/3335484.3335495-14","doi-asserted-by":"crossref","unstructured":"Gavagsaz E, Rezaee A, Javadi H H S. Load balancing in reducers for skewed data in MapReduce systems by using scalable simple random sampling. Journal of Supercomputing, 2018(3): 1--26.","DOI":"10.1007\/s11227-018-2578-0"},{"key":"key-10.1145\/3335484.3335495-15","unstructured":"PUMA Datasets [Online]. Available: https:\/\/engineering.purdue.edu\/~puma\/datasets.htm.2016"},{"key":"key-10.1145\/3335484.3335495-16","doi-asserted-by":"crossref","unstructured":"Jure Leskovec, Kevin J. Lang, Anirban Dasgupta, et al. Community Structure in Large Networks: Natural Cluster Sizes and the Absence of Large Well-Defined Clusters. Internet Mathematics, 2009, 6(1): 29--123.","DOI":"10.1080\/15427951.2009.10129177"},{"key":"key-10.1145\/3335484.3335495-17","unstructured":"RangePartitioner. http:\/\/spark.apache.org\/docs\/1.6.3\/api\/java\/index.html"},{"key":"key-10.1145\/3335484.3335495-18","doi-asserted-by":"crossref","unstructured":"Ramakrishnan S R, Swart G, Urmanov A. Balancing reducer skew in MapReduce workloads using progressive sampling. Proceedings of the Third ACM Symposium on Cloud Computing--SoCC, 2012: 1--14.","DOI":"10.1145\/2391229.2391245"},{"key":"key-10.1145\/3335484.3335495-19","doi-asserted-by":"crossref","unstructured":"Chen Q, Yao J, Xiao Z. LIBRA: Lightweight Data Skew Mitigation in MapReduce. IEEE Transactions on Parallel & Distributed Systems, 2015, 26(9): 2520--2533","DOI":"10.1109\/TPDS.2014.2350972"}],"event":{"number":"4","sponsor":["Shenzhen University","Sun Yat-Sen University"],"acronym":"ICBDC 2019","name":"the 2019 4th International Conference","start":{"date-parts":[[2019,5,10]]},"location":"Guangzhou, China","end":{"date-parts":[[2019,5,12]]}},"container-title":["Proceedings of the 2019 4th International Conference on Big Data and Computing  - ICBDC 2019"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3335484.3335495","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/dl.acm.org\/ft_gateway.cfm?id=3335495&ftid=2070100&dwn=1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T00:26:19Z","timestamp":1750206379000},"score":1,"resource":{"primary":{"URL":"http:\/\/dl.acm.org\/citation.cfm?doid=3335484.3335495"}},"subtitle":[],"proceedings-subject":"Big Data and Computing","short-title":[],"issued":{"date-parts":[[2019]]},"references-count":19,"URL":"https:\/\/doi.org\/10.1145\/3335484.3335495","relation":{},"subject":[],"published":{"date-parts":[[2019]]}}}