{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T18:20:06Z","timestamp":1761157206664},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2021,8,2]],"date-time":"2021-08-02T00:00:00Z","timestamp":1627862400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,8,2]],"date-time":"2021-08-02T00:00:00Z","timestamp":1627862400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2022,2]]},"DOI":"10.1007\/s11227-021-04000-2","type":"journal-article","created":{"date-parts":[[2021,8,2]],"date-time":"2021-08-02T10:03:04Z","timestamp":1627898584000},"page":"3561-3604","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Data balancing-based intermediate data partitioning and check point-based cache recovery in Spark environment"],"prefix":"10.1007","volume":"78","author":[{"given":"Chunlin","family":"Li","sequence":"first","affiliation":[]},{"given":"Qianqian","family":"Cai","sequence":"additional","affiliation":[]},{"given":"Youlong","family":"Luo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,8,2]]},"reference":[{"issue":"1","key":"4000_CR1","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1111\/dpr.12142","volume":"34","author":"M Hilbert","year":"2016","unstructured":"Hilbert M (2016) Big data for development: a review of promises and challenges. Dev Policy Rev 34(1):135\u2013174","journal-title":"Dev Policy Rev"},{"key":"4000_CR2","doi-asserted-by":"crossref","unstructured":"Wu C H, Lin F, Chang WY et al. (2016) Big data development platform for engineering applications. In: 2016 IEEE International Conference on Big Data (Big Data), IEEE","DOI":"10.1109\/BigData.2016.7840914"},{"key":"4000_CR3","doi-asserted-by":"crossref","unstructured":"Li C, Song M, Yu C, Luo Y (2021) Mobility and marginal gain based content caching and placement for cooperative edge-cloud computing. Inf Sci 548:153\u2013176","DOI":"10.1016\/j.ins.2020.09.016"},{"key":"4000_CR4","doi-asserted-by":"crossref","unstructured":"Hga B (2020) Big data development of tourism resources based on 5G network and internet of things system. Microprocess Microsyst 80","DOI":"10.1016\/j.micpro.2020.103567"},{"issue":"3","key":"4000_CR5","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1002\/(SICI)1096-9128(199703)9:3<163::AID-CPE244>3.0.CO;2-5","volume":"9","author":"C Le\u00f3n","year":"2015","unstructured":"Le\u00f3n C, Rodr\u00edguez C, Garc\u00eda F et al (2015) A PRAM oriented programming system. Concurr Comput Prac Exp 9(3):163\u2013179","journal-title":"Concurr Comput Prac Exp"},{"key":"4000_CR6","doi-asserted-by":"crossref","unstructured":"Lecomber DS, Siniolakis CJ, Sujithan KR (2015) PRAM programming: in theory and in practice. Concurr Comput Prac Exp 12(4):211\u2013226","DOI":"10.1002\/(SICI)1096-9128(20000410)12:4<211::AID-CPE477>3.0.CO;2-R"},{"key":"4000_CR7","doi-asserted-by":"crossref","unstructured":"Li C, Tang J, Ma T, Yang X, Luo Y (2020) Load balance based workflow job scheduling algorithm in distributed cloud. J Netw Comput Appl 152","DOI":"10.1016\/j.jnca.2019.102518"},{"issue":"12","key":"4000_CR8","doi-asserted-by":"publisher","first-page":"1802","DOI":"10.14778\/2367502.2367519","volume":"5","author":"Y Chen","year":"2012","unstructured":"Chen Y, Alspaugh S, Katz R (2012) Interactive analytical processing in big data systems: a cross-industry study of mapreduce workloads. Proc Vldb Endow 5(12):1802\u20131813","journal-title":"Proc Vldb Endow"},{"key":"4000_CR9","doi-asserted-by":"crossref","unstructured":"Kijsanayothin P, Chalumporn G, Hewett R (2019) On using MapReduce to scale algorithms for big data analytics: a case study. J Big Data 6(1)","DOI":"10.1186\/s40537-019-0269-1"},{"key":"4000_CR10","doi-asserted-by":"crossref","unstructured":"Li C, Zhang Y, Hao Z, Luo Y (2020) An effective scheduling strategy based on hypergraph partition in geographically distributed datacenters. Comput Netw 170","DOI":"10.1016\/j.comnet.2020.107096"},{"key":"4000_CR11","unstructured":"Huang CQ, Yang SQ, Tang JC et al. (2017) RDDShare: reusing results of spark RDD. In: IEEE International Conference on Data Science in Cyberspace, IEEE"},{"key":"4000_CR12","doi-asserted-by":"crossref","unstructured":"Li C, Bai J, Chen Y, Luo Y (2020) Resource and replica management strategy for optimizing financial cost and user experience in edge cloud computing system. Inf Sci 516","DOI":"10.1016\/j.ins.2019.12.049"},{"key":"4000_CR13","doi-asserted-by":"crossref","unstructured":"He M, Li G, Huang C et al. (2017) A comparative study of data skew in Hadoop. In: The 2017 VI International Conference","DOI":"10.1145\/3171592.3171610"},{"key":"4000_CR14","doi-asserted-by":"crossref","unstructured":"Zhuo T, Zhang X, Li K et al. (2016) An intermediate data placement algorithm for load balancing in Spark computing environment. Future Gener Comput Syst 78(1):287\u2013301","DOI":"10.1016\/j.future.2016.06.027"},{"key":"4000_CR15","doi-asserted-by":"crossref","unstructured":"Cardoso P, Barcelos P (2018) Dynamic checkpoint architecture for reliability improvement on distributed frameworks. In: IEEE Symposium on Reliable Distributed Systems","DOI":"10.1109\/SRDS.2018.00038"},{"key":"4000_CR16","unstructured":"Zhang ZL, University NN (2016) Development of cloud computing. J Hunan City Univ Nat Sci"},{"key":"4000_CR17","doi-asserted-by":"crossref","unstructured":"Hayashi S, Kawanishi K, Ujike I et al (2020) Development of cloud computing system for concrete structure inspection by deep learning based infrared thermography method In: 37th International Symposium on Automation and Robotics in Construction","DOI":"10.22260\/ISARC2020\/0128"},{"key":"4000_CR18","doi-asserted-by":"crossref","unstructured":"Liu S, Liu J, Wang H et al. (2020) Research on the development of cloud computing. In: 2020 International Conference on Computer Information and Big Data Applications (CIBDA), IEEE","DOI":"10.1109\/CIBDA50819.2020.00055"},{"issue":"2","key":"4000_CR19","first-page":"59","volume":"61","author":"A Berni","year":"2020","unstructured":"Berni A (2020) Data-intensive systems: principles and fundamentals using Hadoop and Spark. Comput Rev 61(2):59\u201359","journal-title":"Comput Rev"},{"key":"4000_CR20","doi-asserted-by":"crossref","unstructured":"Ca\u00edno-Lores S, Carretero J, Nicolae B et al. (2019) Spark-DIY: a framework for interoperable spark operations with high performance block-based data models. In: 2018 IEEE\/ACM 5th International Conference on Big Data Computing Applications and Technologies (BDCAT), IEEE","DOI":"10.1109\/BDCAT.2018.00010"},{"key":"4000_CR21","doi-asserted-by":"crossref","unstructured":"Sudsee B, Kaewkasi C (2019) An improvement of a checkpoint-based distributed testing technique on a big data environment. In: 2019 21st International Conference on Advanced Communication Technology (ICACT)","DOI":"10.23919\/ICACT.2019.8702037"},{"key":"4000_CR22","doi-asserted-by":"crossref","unstructured":"Raj S, Ramesh D, Sethi KK (2020) A Spark-based Apriori algorithm with reduced shuffle overhead. J Supercomput 2020(1)","DOI":"10.1007\/s11227-020-03253-7"},{"issue":"1","key":"4000_CR23","doi-asserted-by":"publisher","first-page":"70","DOI":"10.1016\/j.procs.2015.05.200","volume":"51","author":"M Hassan","year":"2015","unstructured":"Hassan M, Bamha M (2015) Towards scalability and data skew handling in GroupBy-joins using MapReduce model. Procedia Comput Sci 51(1):70\u201379","journal-title":"Procedia Comput Sci"},{"key":"4000_CR24","doi-asserted-by":"crossref","unstructured":"Liu G, Zhu X, Ji W et al. (2017) SP-Partitioner: a novel partition method to handle intermediate data skew in spark streaming. Future Gener Comput Syst 86(SEP.):1054\u20131063","DOI":"10.1016\/j.future.2017.07.014"},{"key":"4000_CR25","doi-asserted-by":"crossref","unstructured":"Fu Z, Tang Z, Yang L et al. (2020) ImRP: a predictive partition method for data skew alleviation in spark streaming environment. Parall Comput 100:102699","DOI":"10.1016\/j.parco.2020.102699"},{"key":"4000_CR26","unstructured":"Tang Z, Lv W, Li K et al. (2018) An intermediate data partition algorithm for skew mitigation in spark computing environment. IEEE Trans Cloud Comput 1\u20131"},{"issue":"1","key":"4000_CR27","doi-asserted-by":"publisher","first-page":"228","DOI":"10.1007\/s11227-018-2578-0","volume":"75","author":"E Gavagsaz","year":"2019","unstructured":"Gavagsaz E, Rezaee A, Javadi H (2019) Load balancing in join algorithms for skewed data in MapReduce systems. J Supercomput 75(1):228\u2013254","journal-title":"J Supercomput"},{"key":"4000_CR28","doi-asserted-by":"crossref","unstructured":"Guo W, Huang C, Tian W (2020) Handling data skew at reduce stage in Spark by ReducePartition. Concurr Comput Prac Exp 32(9)","DOI":"10.1002\/cpe.5637"},{"key":"4000_CR29","doi-asserted-by":"crossref","unstructured":"Alfaia EC, Dusi M, Fiori L et al. (2015) Fault-tolerant streaming computation with BlockMon. In: IEEE GLOBCOM 2015, IEEE","DOI":"10.1109\/GLOCOM.2015.7417644"},{"key":"4000_CR30","unstructured":"Shen Y (2015) Complex query processing and recovery in distributed systems"},{"key":"4000_CR31","unstructured":"Wei Z, Chen H, Fei H (2016) ASC: improving spark driver performance with SPARK automatic checkpoint. In: International Conference on Advanced Communication Technology. IEEE"},{"key":"4000_CR32","doi-asserted-by":"crossref","unstructured":"Zhang YM, Luo Y, Yanchen LI (2017) Optimizing checkpointing performance in Spark","DOI":"10.12783\/dtcse\/csma2017\/17315"},{"key":"4000_CR33","doi-asserted-by":"crossref","unstructured":"Ying C, Yu J, He JS (2018) Towards fault tolerance optimization based on checkpoints of in-memory framework spark. J Ambient Intell Human Comp","DOI":"10.1007\/s12652-018-1018-6"},{"key":"4000_CR34","doi-asserted-by":"crossref","unstructured":"Cardoso PV, Barcelos PP (2018) Definition of an architecture for dynamic and automatic checkpoints on apache spark. In: 2018 IEEE 37th Symposium on Reliable Distributed Systems (SRDS). IEEE","DOI":"10.1109\/SRDS.2018.00041"},{"key":"4000_CR35","doi-asserted-by":"crossref","unstructured":"Tian Y, Shen Q, Zhu Z et al. (2018) Non-authentication based checkpoint fault-tolerant vulnerability in spark streaming. In: 2018 IEEE Symposium on Computers and Communications (ISCC). IEEE Computer Society","DOI":"10.1109\/ISCC.2018.8538745"},{"key":"4000_CR36","unstructured":"Li J (2018) Comparing Spark vs MPI\/OpenMP on word count MapReduce"},{"key":"4000_CR37","doi-asserted-by":"crossref","unstructured":"Jiang H (2019) Research and practice of big data analysis process based on hadoop framework. In: 2019 IEEE 3rd Information Technology, Networking, Electronic and Automation Control Conference (ITNEC). IEEE","DOI":"10.1109\/ITNEC.2019.8729522"},{"key":"4000_CR38","unstructured":"Yu S, Xu C, Liu H (2018) Zipf's law in 50 languages: its structural pattern, linguistic interpretation, and cognitive motivation"},{"issue":"4","key":"4000_CR39","doi-asserted-by":"publisher","first-page":"1276","DOI":"10.1017\/jpr.2020.64","volume":"57","author":"RT Fernholz","year":"2020","unstructured":"Fernholz RT, Fernholz R (2020) Zipf\u2019s law for atlas models. J Appl Probab 57(4):1276\u20131297","journal-title":"J Appl Probab"},{"key":"4000_CR40","doi-asserted-by":"crossref","unstructured":"Sreeyuktha HS, Reddy JG (2019) Partitioning in Apache Spark","DOI":"10.1007\/978-981-13-7082-3_56"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-021-04000-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-021-04000-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-021-04000-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,2,7]],"date-time":"2022-02-07T13:17:36Z","timestamp":1644239856000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-021-04000-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8,2]]},"references-count":40,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2022,2]]}},"alternative-id":["4000"],"URL":"https:\/\/doi.org\/10.1007\/s11227-021-04000-2","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,8,2]]},"assertion":[{"value":"15 July 2021","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 August 2021","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}