{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T03:55:22Z","timestamp":1778644522058,"version":"3.51.4"},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2023,10,31]],"date-time":"2023-10-31T00:00:00Z","timestamp":1698710400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,10,31]],"date-time":"2023-10-31T00:00:00Z","timestamp":1698710400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"the Hunan Provincial Natural Science Foundation of China","award":["Grant 2021JJ40612"],"award-info":[{"award-number":["Grant 2021JJ40612"]}]},{"name":"Natural Science Foundation of Changsha of China","award":["kq2208042"],"award-info":[{"award-number":["kq2208042"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62302157"],"award-info":[{"award-number":["62302157"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62225205"],"award-info":[{"award-number":["62225205"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"National Key Research and Development Program of China","award":["2021ZD40303"],"award-info":[{"award-number":["2021ZD40303"]}]},{"DOI":"10.13039\/501100004761","name":"Natural Science Foundation of Hunan Province of China","doi-asserted-by":"crossref","award":["2021JJ10023"],"award-info":[{"award-number":["2021JJ10023"]}],"id":[{"id":"10.13039\/501100004761","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100017607","name":"Shenzhen Basic Research Project","doi-asserted-by":"crossref","award":["JCYJ20210324140002006"],"award-info":[{"award-number":["JCYJ20210324140002006"]}],"id":[{"id":"10.13039\/501100017607","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Grid Computing"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1007\/s10723-023-09700-y","type":"journal-article","created":{"date-parts":[[2023,10,31]],"date-time":"2023-10-31T11:02:02Z","timestamp":1698750122000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["A Real-Time Partition Generation Mechanism for Data Skew Mitigation in Spark Computing Environment"],"prefix":"10.1007","volume":"21","author":[{"given":"Li","family":"Yang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiong","family":"Xiao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuedong","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhechang","family":"Hu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhuo","family":"Tang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,10,31]]},"reference":[{"issue":"3","key":"9700_CR1","doi-asserted-by":"publisher","first-page":"523","DOI":"10.1016\/j.ins.2023.02.034","volume":"19","author":"Y Song","year":"2023","unstructured":"Song, Y., Yang, L., Wang, Y., Xiao, X., You, S., Tang, Z.: Parallel incremental association rule mining framework for public opinion analysis. Inf. Sci. 19(3), 523\u2013545 (2023)","journal-title":"Inf. Sci."},{"key":"9700_CR2","doi-asserted-by":"crossref","unstructured":"Xiao, X., Li, C., Jiang, B., Cai, Q., Li, k., Tang, Z.: Adaptive search strategy based chemical reaction optimization scheme for task scheduling in discrete multiphysical coupling applications. Appl. Soft Comput. 121 (2022)","DOI":"10.1016\/j.asoc.2022.108748"},{"issue":"1","key":"9700_CR3","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J Dean","year":"2008","unstructured":"Dean, J., Ghemawat, S.: Mapreduce: simplified data processing on large clusters. Communications of the ACM 51(1), 107\u2013113 (2008)","journal-title":"Communications of the ACM"},{"key":"9700_CR4","unstructured":"hdfs (2021) https:\/\/hadoop.apache.org\/docs\/stable\/hadoop-project-dist\/hadoop-hdfs"},{"key":"9700_CR5","unstructured":"Hadoop (2014) http:\/\/hadoop.apache.org"},{"key":"9700_CR6","unstructured":"Zaharia, M., Chowdhury, M., Franklin, M.J., Shenker, S., Stoica, I.: Spark: cluster computing with working sets. In: Usenix conference on hot topics in cloud computing (2010)"},{"key":"9700_CR7","unstructured":"Flink (2017) https:\/\/flink.apache.org"},{"key":"9700_CR8","doi-asserted-by":"crossref","unstructured":"Anusha, K., Usha Rani, K.: Performance evaluation of spark sql for batch processing. In: Emerging research in data engineering systems and computer communications, pp. 145\u2013153 (2020)","DOI":"10.1007\/978-981-15-0135-7_13"},{"key":"9700_CR9","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1016\/j.jpdc.2020.10.010","volume":"149","author":"G Cheng","year":"2021","unstructured":"Cheng, G., Ying, S., Wang, B., Li, Y.: Efficient performance prediction for apache spark. J. Parallel Distrib. Comput. 149, 40\u201351 (2021)","journal-title":"J. Parallel Distrib. Comput."},{"key":"9700_CR10","unstructured":"Apache spark. https:\/\/spark.apache.org\/ docs\/3.5.0\/cluster-overview.html (2016)"},{"key":"9700_CR11","doi-asserted-by":"crossref","unstructured":"Beame, P., Koutris, P., Dan, S.: Skew in parallel query processing. In: 33rd ACM SIGMODSIGACT-SIGART symposium on principles of database systems, pp. 212\u2013223 (2014)","DOI":"10.1145\/2594538.2594558"},{"issue":"2","key":"9700_CR12","doi-asserted-by":"publisher","first-page":"461","DOI":"10.1109\/TCC.2018.2878838","volume":"9","author":"Z Tang","year":"2018","unstructured":"Tang, Z., Lv, W., Li, K., Li, K.: An intermediate data partition algorithm for skew mitigation in spark computing environment. IEEE Trans. Cloud Comput. 9(2), 461\u2013474 (2018)","journal-title":"IEEE Trans. Cloud Comput."},{"issue":"6","key":"9700_CR13","doi-asserted-by":"publisher","first-page":"1649","DOI":"10.1109\/TPDS.2016.2587645","volume":"28","author":"Y Guo","year":"2017","unstructured":"Guo, Y., Rao, J., Cheng, D., Zhou, X.: ishuffle: Improving hadoop performance with shuffleon-write. IEEE Trans. Parallel Distrib. Syst. 28(6), 1649\u20131662 (2017)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"9700_CR14","doi-asserted-by":"crossref","unstructured":"Yu, X., Kostamaa, P., Xin, Z., Liang, C.: Handling data skew in parallel joins in sharednothing systems. In: ACM SIGMOD international conference on Management of data, pp. 1043\u20131052 (2008)","DOI":"10.1145\/1376616.1376720"},{"key":"9700_CR15","doi-asserted-by":"crossref","unstructured":"Cheng, L., Kotoulas, S., Ward, T.E., Theodoropoulos, G.: Efficiently handling skew in outer joins on distributed systems. In: 14th IEEE\/ACM international symposium on cluster, cloud and grid computing, pp. 295\u2013304 (2014)","DOI":"10.1109\/CCGrid.2014.35"},{"key":"9700_CR16","doi-asserted-by":"crossref","unstructured":"Zheng, L., Shen, Y.: Improve parallelism of task execution to optimize utilization of mapreduce cluster resources. In: IEEE 17th International conference on computational science and engineering, pp. 674\u2013681 (2015)","DOI":"10.1109\/CSE.2014.144"},{"key":"9700_CR17","doi-asserted-by":"crossref","unstructured":"Zeng, Z., Li, k., Duan, M., Liu, C., Liao, X.: K-means parallel acceleration for sparse data dimensions on flink. In: 2019 IEEE 21st International conference on high performance computing and communications; IEEE 17th international conference on smart city; IEEE 5th international conference on data science and systems (HPCC\/SmartCity\/ DSS), pp. 2053\u20132058 (2019)","DOI":"10.1109\/HPCC\/SmartCity\/DSS.2019.00284"},{"key":"9700_CR18","doi-asserted-by":"publisher","first-page":"1054","DOI":"10.1016\/j.future.2017.07.014","volume":"86","author":"G Liu","year":"2018","unstructured":"Liu, G., Zhu, X., Wang, J., Guo, D., Bao, W., Guo, H.: Sp-partitioner: A novel partition method to handle intermediate data skew in spark streaming. Futur. Gener. Comput. Syst. 86, 1054\u20131063 (2018)","journal-title":"Futur. Gener. Comput. Syst."},{"key":"9700_CR19","doi-asserted-by":"crossref","unstructured":"He, Z., Li, Z., Peng, X., Weng, C.: Ds2 : Handling data skew using data stealings over high-speed networks. In: 2021 IEEE 37th International conference on data engineering (ICDE), pp. 1865\u20131870 (2021)","DOI":"10.1109\/ICDE51399.2021.00168"},{"key":"9700_CR20","unstructured":"Lin, J.: The curse of zipf and limits to parallelization: A look at the stragglers problem in mapreduce (2012)"},{"issue":"4","key":"9700_CR21","doi-asserted-by":"publisher","first-page":"1149","DOI":"10.1109\/TCC.2016.2607738","volume":"8","author":"Z Tang","year":"2016","unstructured":"Tang, Z., Ma, W., Li, K., Li, K.: A data skew oriented reduce placement algorithm based on sampling. IEEE Trans. Cloud Comput. 8(4), 1149\u20131161 (2016)","journal-title":"IEEE Trans. Cloud Comput."},{"issue":"7","key":"9700_CR22","doi-asserted-by":"publisher","first-page":"703","DOI":"10.1145\/358105.893","volume":"27","author":"JS Vitter","year":"1984","unstructured":"Vitter, J.S.: Faster methods for random sampling. Communications of the ACM 27(7), 703\u2013718 (1984)","journal-title":"Communications of the ACM"},{"key":"9700_CR23","unstructured":"Karau, H., Konwinski, A., Wendell, P., Zaharia, M.: Learning spark: lightning-fast big data analysis, O\u2019Reilly Media, Inc. (2015)"},{"issue":"3","key":"9700_CR24","doi-asserted-by":"publisher","first-page":"365","DOI":"10.1109\/TC.2008.176","volume":"58","author":"X Yuan","year":"2009","unstructured":"Yuan, X., Duan, Z.: Fair round-robin: A low complexity packet schduler with proportional and worst-case fairness. IEEE Trans. Comput. 58(3), 365\u2013379 (2009)","journal-title":"IEEE Trans. Comput."},{"key":"9700_CR25","unstructured":"Murmurhash. https:\/\/en.wikipedia.org\/wiki\/MurmurHash (2016)"},{"key":"9700_CR26","unstructured":"Hibench. https:\/\/github.com\/Intel-bigdata\/ HiBench (2021)"},{"key":"9700_CR27","unstructured":"Hashpartitioner. http:\/\/spark.apache.org\/ docs\/latest\/api\/scala\/index.html (2017)"},{"key":"9700_CR28","doi-asserted-by":"crossref","unstructured":"Yao, X., Wang, C., Zhang, M.: Ec-shuffle: Dynamic erasure coding optimization for efficient and reliable shuffle in spark. In: 2019 19th IEEE\/ACM International symposium on cluster, cloud and grid computing (CCGRID), pp. 41\u201351 (2019)","DOI":"10.1109\/CCGRID.2019.00014"},{"key":"9700_CR29","unstructured":"Ousterhout, K., Panda, A., Rosen, J., Venkataraman, S., Xin, R., Ratnasamy, S., Shenker, S., Stoica, I.:The case for tiny tasks in compute clusters. In: 14th Workshop on hot topics in operating systems (HotOSXIV). (2013)"},{"key":"9700_CR30","doi-asserted-by":"crossref","unstructured":"Leskovec, J., Lang, K.J., Dasgupta, A., Mahoney, M.W.: Community structure in large networks: Natural cluster sizes and the absence of large well-defined clusters. Internet Math. 6(1), 29\u2013123 (2008)","DOI":"10.1080\/15427951.2009.10129177"},{"key":"9700_CR31","unstructured":"Stanford large network dataset collection (2013)"}],"container-title":["Journal of Grid Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10723-023-09700-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10723-023-09700-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10723-023-09700-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T03:32:18Z","timestamp":1730431938000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10723-023-09700-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,31]]},"references-count":31,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,12]]}},"alternative-id":["9700"],"URL":"https:\/\/doi.org\/10.1007\/s10723-023-09700-y","relation":{},"ISSN":["1570-7873","1572-9184"],"issn-type":[{"value":"1570-7873","type":"print"},{"value":"1572-9184","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,10,31]]},"assertion":[{"value":"10 October 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 October 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 October 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest"}}],"article-number":"62"}}