{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T08:01:49Z","timestamp":1764403309349,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":21,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,8,25]],"date-time":"2023-08-25T00:00:00Z","timestamp":1692921600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,8,25]]},"DOI":"10.1145\/3627341.3630380","type":"proceedings-article","created":{"date-parts":[[2023,12,15]],"date-time":"2023-12-15T12:07:01Z","timestamp":1702642021000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Optimization of Spark Data Skew in Big Data Environment"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-1530-9232","authenticated-orcid":false,"given":"Huanshu","family":"Wang","sequence":"first","affiliation":[{"name":"Changsha University of Science &amp; Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2198-0462","authenticated-orcid":false,"given":"Huali","family":"Wang","sequence":"additional","affiliation":[{"name":"Changsha University of Science &amp; Technology, China"}]}],"member":"320","published-online":{"date-parts":[[2023,12,15]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"2","article-title":"Research on University Campus Big Data Platform Based on Hadoop and Spark [J] .","volume":"2018","author":"Ping L","unstructured":"Ping L. Research on University Campus Big Data Platform Based on Hadoop and Spark [J] .Software Engineering, 2018:2-34.","journal-title":"Software Engineering"},{"issue":"2","key":"e_1_3_2_1_2_1","first-page":"217","article-title":"Improving Load Balancing for MapReduce-based Matching [J]","volume":"17","author":"Midoun K","year":"2019","unstructured":"Midoun K, Loudini M, Hidouci K W, LoEM: Improving Load Balancing for MapReduce-based Matching [J]. International Journal of Artificial Intelligence, 2019, 17 (2): 217-235.","journal-title":"International Journal of Artificial Intelligence"},{"key":"e_1_3_2_1_3_1","volume-title":"A Framework for Workload Balancing in MapReduce [C]\/\/IEEE Computer Society","author":"Martha V S","year":"2013","unstructured":"Martha V S, Zhao W, Xu X .h-MapReduce: A Framework for Workload Balancing in MapReduce [C]\/\/IEEE Computer Society.IEEE Computer Society, 2013:637-644."},{"key":"e_1_3_2_1_4_1","volume-title":"Noordwijkerhout","author":"Gufler B","year":"2011","unstructured":"Gufler B, Augsten N, Reiser A, HANDLING DATA SKEW IN MAPREDUCE [C]\/\/CLOSER 2011 - Proceedings of the 1st International Conference on Cloud Computing and Services Science, Noordwijkerhout, Netherlands, 7-9 May, 2011:1-7."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Son J Choi H Chung Y D .Skew-Tolerant Key Distribution for Load Balancing in MapReduce [J] .Ieice Trans.inf . & Syst 2012 95 (2): 677-680.","DOI":"10.1587\/transinf.E95.D.677"},{"key":"e_1_3_2_1_6_1","first-page":"1","article-title":"Balancing reducer skew in MapReduce workloads using Computsampling [C]\/\/Third Proceedings of the Progressive ACM Symposium on Cloud Computing.","author":"Ramakrishnan S R","year":"2012","unstructured":"Ramakrishnan S R, Swart G, Urmanov A. Balancing reducer skew in MapReduce workloads using Computsampling [C]\/\/Third Proceedings of the Progressive ACM Symposium on Cloud Computing.ACM, 2012. 1-4.","journal-title":"ACM"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.14778\/2367502.2367541"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2014.2350972"},{"key":"e_1_3_2_1_9_1","first-page":"102","article-title":"Improving Spark Performance with Adaptive Skew Mitigation [C]\/\/2015 IEEE International Conference on Progress","volume":"2015","author":"Yu J","unstructured":"Yu J, Chen H, Hu F .SASM: Improving Spark Performance with Adaptive Skew Mitigation [C]\/\/2015 IEEE International Conference on Progress in Informatics and Computing , 2015:102-107.","journal-title":"Informatics and Computing"},{"issue":"3","key":"e_1_3_2_1_10_1","first-page":"1054","article-title":"A novel partition method to handle intermediate data skew in spark streaming [J] .","volume":"86","author":"Liu G","year":"2017","unstructured":"Liu G, Zhu X, Wang J, SP-Partitioner: A novel partition method to handle intermediate data skew in spark streaming [J] .Future Generation Computer Systems, 2017, 86 (3): 1054-1063.","journal-title":"Future Generation Computer Systems"},{"issue":"1","key":"e_1_3_2_1_11_1","first-page":"287","article-title":"An intermediate data placement algorithm for load balancing in the Spark environment [J] .","volume":"78","author":"Tang Z","year":"2016","unstructured":"Tang Z, Zhang X, Li K, An intermediate data placement algorithm for load balancing in the Spark environment [J] .Future Generation Computer Systems, 2016, 78 (1): 287-301.","journal-title":"Future Generation Computer Systems"},{"issue":"1","key":"e_1_3_2_1_12_1","first-page":"220","article-title":"Load balancing in join algorithms for skewed data in Map Reduce systems","volume":"75","author":"Elaheh Gavagsaz","year":"2019","unstructured":"Elaheh Gavagsaz, Ali Rezaee, Hamid H. S. Javadi. Load balancing in join algorithms for skewed data in Map Reduce systems. The Journal of Supercomputing, 2019, 75(1): 220-254.","journal-title":"The Journal of Supercomputing"},{"key":"e_1_3_2_1_13_1","first-page":"175","volume-title":"Scientific Programming","author":"Donghua Chen","year":"2021","unstructured":"Donghua Chen, Runtong Zhang. Map Reduce-based dynamic partition join with shannon entropy for data skewness. Scientific Programming, 2021, 175-209."},{"key":"e_1_3_2_1_14_1","first-page":"973","article-title":"An improved parallel programming model for load balancing of Map Reduce","volume":"105","author":"Jianjiang Li","year":"2020","unstructured":"Jianjiang Li, Yajun Liu, Jian Pan, Peng Zhang, Wei Chen, Lizhe Wang. Map-Balance-Reduce: An improved parallel programming model for load balancing of Map Reduce. Future Generation Computer Systems, 2020, 105: 973-998.","journal-title":"Future Generation Computer Systems"},{"issue":"1","key":"e_1_3_2_1_15_1","first-page":"61","article-title":"An interval migration based approach for skew mitigation","volume":"14","author":"Balraj Singh K.","year":"2021","unstructured":"Balraj Singh, Harsh K. Verma. IMSM: An interval migration based approach for skew mitigation in Map Reduce. Recent Advances in Computer Science and Communications, 2021, 14(1): 61-81.","journal-title":"Map Reduce. Recent Advances in Computer Science and Communications"},{"key":"e_1_3_2_1_16_1","first-page":"1","volume-title":"19th International dating on World Wide Web, WWW 2010","author":"Kotoulas S","year":"2010","unstructured":"Kotoulas S, Oren E, Harmelen F V. Mind the data skew: Distributed inferencing by speeddating in elastic regions [C]\/Proceedings Conference of the 19th International dating on World Wide Web, WWW 2010, Raleigh, North Carolina, USA, April 26-30, 2010.ACM, 2010. 1-33."},{"key":"e_1_3_2_1_17_1","first-page":"10","article-title":"Shuffle Performance in Apache Spark [C]\/\/International Journal of Engineering Research & Technology.","volume":"2015","author":"Rana N","unstructured":"Rana N, Deshmukh S. Shuffle Performance in Apache Spark [C]\/\/International Journal of Engineering Research & Technology.ESRSA Publications, 2015:10-44.","journal-title":"ESRSA Publications"},{"key":"e_1_3_2_1_18_1","unstructured":"Aggarwal C C. On Biased Reservoir Sampling in the Presence of Stream Evolution. [C]\/\/Very Large Data Bases Conference.2006:33-46."},{"key":"e_1_3_2_1_19_1","unstructured":"Adamic Lada A.. Zipf 's law and the Internet.' glottometrics (2002): 3 (1): 143-150."},{"key":"e_1_3_2_1_20_1","unstructured":"Li-Jie X U .Construction and Research of Big Data Processing Platform Based on Spark [J] .Computer Knowledge and Technology 2016:10-23."},{"key":"e_1_3_2_1_21_1","volume-title":"Linguistic inquiry and word count (LIWC) [J]","author":"Pennebaker J W","year":"2001","unstructured":"Pennebaker J W, Francis M E, Booth R J. Linguistic inquiry and word count (LIWC) [J] .Lawrence Erlbaum Associates Mahwah Nj, 2001; 24-56."}],"event":{"name":"ICCVIT 2023: International Conference on Computer, Vision and Intelligent Technology","acronym":"ICCVIT 2023","location":"Chenzhou China"},"container-title":["Proceedings of the 2023 International Conference on Computer, Vision and Intelligent Technology"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627341.3630380","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627341.3630380","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T22:01:16Z","timestamp":1755900076000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627341.3630380"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,25]]},"references-count":21,"alternative-id":["10.1145\/3627341.3630380","10.1145\/3627341"],"URL":"https:\/\/doi.org\/10.1145\/3627341.3630380","relation":{},"subject":[],"published":{"date-parts":[[2023,8,25]]},"assertion":[{"value":"2023-12-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}