{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:44:35Z","timestamp":1740123875074,"version":"3.37.3"},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2020,3,25]],"date-time":"2020-03-25T00:00:00Z","timestamp":1585094400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,3,25]],"date-time":"2020-03-25T00:00:00Z","timestamp":1585094400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Parallel Prog"],"published-print":{"date-parts":[[2020,12]]},"DOI":"10.1007\/s10766-020-00657-z","type":"journal-article","created":{"date-parts":[[2020,3,25]],"date-time":"2020-03-25T09:03:45Z","timestamp":1585127025000},"page":"941-956","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Handling Data Skew for Aggregation in Spark SQL Using Task Stealing"],"prefix":"10.1007","volume":"48","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8017-2344","authenticated-orcid":false,"given":"Zeyu","family":"He","sequence":"first","affiliation":[]},{"given":"Qiuli","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Zhifang","family":"Li","sequence":"additional","affiliation":[]},{"given":"Chuliang","family":"Weng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,3,25]]},"reference":[{"key":"657_CR1","doi-asserted-by":"crossref","unstructured":"Acar, U.A., Chargueraud, A., Rainey, M.: Scheduling parallel programs by work stealing with private deques. In: Proceedings of the 18th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, pp. 219\u2013228. PPoPP \u201913, ACM, New York, NY, USA (2013)","DOI":"10.1145\/2442516.2442538"},{"key":"657_CR2","doi-asserted-by":"crossref","unstructured":"Armbrust, M., Xin, R.S., Lian, C., Huai, Y., Liu, D., Bradley, J.K., Meng, X., Kaftan, T., Franklin, M.J., Ghodsi, A., Zaharia, M.: Spark sql: relational data processing in spark. In: Proceedings of the 2015 ACM SIGMOD International Conference on Management of Data, pp. 1383\u20131394. SIGMOD \u201915, ACM, New York, NY, USA (2015)","DOI":"10.1145\/2723372.2742797"},{"issue":"9","key":"657_CR3","doi-asserted-by":"publisher","first-page":"2520","DOI":"10.1109\/TPDS.2014.2350972","volume":"26","author":"Q Chen","year":"2015","unstructured":"Chen, Q., Yao, J., Xiao, Z.: LIBRA: lightweight data skew mitigation in mapreduce. IEEE Trans. Parallel Distrib. Syst. 26(9), 2520\u20132533 (2015)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"657_CR4","unstructured":"Cieslewicz, J., Ross, K.A.: Adaptive aggregation on chip multiprocessors. In: Proceedings of the 33rd International Conference on Very Large Data Bases, pp. 339\u2013350. VLDB \u201907, VLDB Endowment (2007)"},{"key":"657_CR5","unstructured":"Culhane, W., Kogan, K., Jayalath, C., Eugster, P.: LOOM: optimal aggregation overlays for in-memory big data processing. In: 6th USENIX Workshop on Hot Topics in Cloud Computing (HotCloud 14), pp. 13\u201313. USENIX Association (2014)"},{"key":"657_CR6","doi-asserted-by":"crossref","unstructured":"Culhane, W., Kogan, K., Jayalath, C., Eugster, P.: Optimal communication structures for big data aggregation. In: 2015 IEEE Conference on Computer Communications, pp. 1643\u20131651. IEEE (2015)","DOI":"10.1109\/INFOCOM.2015.7218544"},{"key":"657_CR7","unstructured":"Hua, K.A., Lee, C.: Handling data skew in multiprocessor database computers using partition tuning. In: Proceedings of the 17th International Conference on Very Large Data Bases, pp. 525\u2013535. VLDB \u201991, Morgan Kaufmann Publishers Inc., San Francisco, CA, USA (1991)"},{"key":"657_CR8","doi-asserted-by":"crossref","unstructured":"Jiang, P., Agrawal, G.: Efficient SIMD and MIMD parallelization of hash-based aggregation by conflict mitigation. In: Proceedings of the International Conference on Supercomputing, pp. 24:1\u201324:11. ICS \u201917, ACM, New York, NY, USA (2017)","DOI":"10.1145\/3079079.3079080"},{"key":"657_CR9","doi-asserted-by":"crossref","unstructured":"Kwon, Y., Balazinska, M., Howe, B., Rolia, J.: Skew-resistant parallel processing of feature-extracting scientific user-defined functions. In: Proceedings of the 1st ACM Symposium on Cloud Computing, pp. 75\u201386. SoCC \u201910, ACM, New York, NY, USA (2010)","DOI":"10.1145\/1807128.1807140"},{"key":"657_CR10","first-page":"30","volume":"11","author":"Y Kwon","year":"2011","unstructured":"Kwon, Y., Balazinska, M., Howe, B., Rolia, J.: A study of skew in mapreduce applications. Open Cirrus Summit 11, 30 (2011)","journal-title":"Open Cirrus Summit"},{"issue":"12","key":"657_CR11","doi-asserted-by":"publisher","first-page":"1934","DOI":"10.14778\/2367502.2367541","volume":"5","author":"Y Kwon","year":"2012","unstructured":"Kwon, Y., Balazinska, M., Howe, B., Rolia, J.: Skewtune in action: mitigating skew in mapreduce applications. Proc. VLDB Endow. 5(12), 1934\u20131937 (2012)","journal-title":"Proc. VLDB Endow."},{"key":"657_CR12","doi-asserted-by":"crossref","unstructured":"Kwon, Y., Balazinska, M., Howe, B., Rolia, J.: Skewtune: mitigating skew in mapreduce applications. In: Proceedings of the 2012 ACM SIGMOD International Conference on Management of Data, pp. 25\u201336. SIGMOD \u201912, ACM, New York, NY, USA (2012)","DOI":"10.1145\/2213836.2213840"},{"key":"657_CR13","doi-asserted-by":"crossref","unstructured":"Li, J., Agrawal, K., Elnikety, S., He, Y., Lee, I.T.A., Lu, C., McKinley, K.S.: Work stealing for interactive services to meet target latency. In: Proceedings of the 21st ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, pp. 14:1\u201314:13. PPoPP \u201916, ACM, New York, NY, USA (2016)","DOI":"10.1145\/2851141.2851151"},{"issue":"3","key":"657_CR14","doi-asserted-by":"publisher","first-page":"292","DOI":"10.14778\/3291264.3291273","volume":"12","author":"F Liu","year":"2018","unstructured":"Liu, F., Salmasi, A., Blanas, S., Sidiropoulos, A.: Chasing similarity: distribution-aware aggregation scheduling. Proc. VLDB Endow. 12(3), 292\u2013306 (2018)","journal-title":"Proc. VLDB Endow."},{"key":"657_CR15","doi-asserted-by":"publisher","first-page":"1054","DOI":"10.1016\/j.future.2017.07.014","volume":"86","author":"G Liu","year":"2018","unstructured":"Liu, G., Zhu, X., Wang, J., Guo, D., Bao, W., Guo, H.: SP-partitioner: a novel partition method to handle intermediate data skew in spark streaming. Future Gener. Comput. Syst. 86, 1054\u20131063 (2018)","journal-title":"Future Gener. Comput. Syst."},{"key":"657_CR16","doi-asserted-by":"crossref","unstructured":"Liu, Z., Zhang, Q., Zhani, M.F., Boutaba, R., Liu, Y., Gong, Z.: DREAMS: dynamic resource allocation for mapreduce with data skew. In: 2015 IFIP\/IEEE International Symposium on Integrated Network Management, pp. 18\u201326. IEEE (2015)","DOI":"10.1109\/INM.2015.7140272"},{"key":"657_CR17","doi-asserted-by":"crossref","unstructured":"Merkel, A., Stoess, J., Bellosa, F.: Resource-conscious scheduling for energy efficiency on multicore processors. In: Proceedings of the 5th European Conference on Computer Systems, pp. 153\u2013166. EuroSys \u201910 (2010)","DOI":"10.1145\/1755913.1755930"},{"key":"657_CR18","doi-asserted-by":"crossref","unstructured":"M\u00fcller, I., Sanders, P., Lacurie, A., Lehner, W., F\u00e4rber, F.: Cache-efficient aggregation: hashing is sorting. In: Proceedings of the 2015 ACM SIGMOD International Conference on Management of Data, pp. 1123\u20131136. SIGMOD \u201915, ACM, New York, NY, USA (2015)","DOI":"10.1145\/2723372.2747644"},{"key":"657_CR19","doi-asserted-by":"crossref","unstructured":"Okcan, A., Riedewald, M.: Processing theta-joins using mapreduce. In: Proceedings of the 2011 ACM SIGMOD International Conference on Management of Data, pp. 949\u2013960. SIGMOD \u201911, ACM, New York, NY, USA (2011)","DOI":"10.1145\/1989323.1989423"},{"key":"657_CR20","doi-asserted-by":"crossref","unstructured":"Polychroniou, O., Raghavan, A., Ross, K.A.: Rethinking SIMD vectorization for in-memory databases. In: Proceedings of the 2015 ACM SIGMOD International Conference on Management of Data, pp. 1493\u20131508. SIGMOD \u201915, ACM, New York, NY, USA (2015)","DOI":"10.1145\/2723372.2747645"},{"key":"657_CR21","unstructured":"Ricci, L., Carlini, E., Dazzi, P., Lulli, A.: Static and dynamic big data partitioning on apache spark. In: Conference on Parallel Computing, vol.\u00a027, pp. 489\u2013498. IOS PRESS (2016)"},{"key":"657_CR22","unstructured":"Spark homepage. https:\/\/spark.apache.org, last accessed 9 May 2019"},{"key":"657_CR23","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1016\/j.future.2016.06.027","volume":"78","author":"Z Tang","year":"2018","unstructured":"Tang, Z., Zhang, X., Li, K., Li, K.: An intermediate data placement algorithm for load balancing in spark computing environment. Future Gener. Comput. Syst. 78, 287\u2013301 (2018)","journal-title":"Future Gener. Comput. Syst."},{"key":"657_CR24","unstructured":"The TPC-H benchmark. http:\/\/www.tpc.org\/tpch, last accessed 10 May 2019"},{"issue":"4","key":"657_CR25","doi-asserted-by":"publisher","first-page":"1071","DOI":"10.1109\/TKDE.2014.2359675","volume":"27","author":"L Wang","year":"2015","unstructured":"Wang, L., Zhou, M., Zhang, Z., Shan, M.C., Zhou, A.: NUMA-aware scalable and efficient in-memory aggregation on large domains. IEEE Trans. Knowl. Data Eng. 27(4), 1071\u20131084 (2015)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"657_CR26","doi-asserted-by":"crossref","unstructured":"Wang, L., Zhou, M., Zhang, Z., Yang, Y., Zhou, A., Bitton, D.: Elastic pipelining in an in-memory database cluster. In: Proceedings of the 2016 International Conference on Management of Data, pp. 1279\u20131294. SIGMOD \u201916, ACM, New York, NY, USA (2016)","DOI":"10.1145\/2882903.2882904"},{"key":"657_CR27","doi-asserted-by":"crossref","unstructured":"Wimmer, M., Cederman, D., Tr\u00e4ff, J.L., Tsigas, P.: Work-stealing with configurable scheduling strategies. In: Proceedings of the 18th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, pp. 315\u2013316. PPoPP \u201913, ACM, New York, NY, USA (2013)","DOI":"10.1145\/2442516.2442562"},{"key":"657_CR28","unstructured":"Zaharia, M., Chowdhury, M., Das, T., Dave, A., Ma, J., McCauley, M., Franklin, M.J., Shenker, S., Stoica, I.: Resilient distributed datasets: a fault-tolerant abstraction for in-memory cluster computing. In: Proceedings of the 9th USENIX Conference on Networked Systems Design and Implementation, pp. 2\u20132. NSDI\u201912, USENIX Association, Berkeley, CA, USA (2012)"}],"container-title":["International Journal of Parallel Programming"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-020-00657-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10766-020-00657-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-020-00657-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,3,25]],"date-time":"2021-03-25T00:53:04Z","timestamp":1616633584000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10766-020-00657-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,3,25]]},"references-count":28,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2020,12]]}},"alternative-id":["657"],"URL":"https:\/\/doi.org\/10.1007\/s10766-020-00657-z","relation":{},"ISSN":["0885-7458","1573-7640"],"issn-type":[{"type":"print","value":"0885-7458"},{"type":"electronic","value":"1573-7640"}],"subject":[],"published":{"date-parts":[[2020,3,25]]},"assertion":[{"value":"6 August 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 November 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 March 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}