{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,13]],"date-time":"2026-02-13T05:11:05Z","timestamp":1770959465628,"version":"3.50.1"},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2021,6,23]],"date-time":"2021-06-23T00:00:00Z","timestamp":1624406400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,6,23]],"date-time":"2021-06-23T00:00:00Z","timestamp":1624406400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN COMPUT. SCI."],"published-print":{"date-parts":[[2021,9]]},"DOI":"10.1007\/s42979-021-00738-x","type":"journal-article","created":{"date-parts":[[2021,6,23]],"date-time":"2021-06-23T16:02:31Z","timestamp":1624464151000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["A Theoretical and Experimental Comparison of Large-Scale Join Algorithms in Spark"],"prefix":"10.1007","volume":"2","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1470-5496","authenticated-orcid":false,"given":"Anh-Cang","family":"Phan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thuong-Cang","family":"Phan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thanh-Ngoan","family":"Trieu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thi-To-Quyen","family":"Tran","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,6,23]]},"reference":[{"key":"738_CR1","doi-asserted-by":"publisher","unstructured":"Afrati FN, Ullman JD. Transitive closure and recursive datalog implemented on clusters. In: Proceedings of the 15th International Conference on Extending Database Technology, EDBT \u201912, pp 132\u2013143. ACM, New York, NY, USA 2012. https:\/\/doi.org\/10.1145\/2247596.2247613.","DOI":"10.1145\/2247596.2247613"},{"key":"738_CR2","unstructured":"Ahmad F. Puma benchmarks and dataset downloads 2011. URL https:\/\/engineering.purdue.edu\/~puma\/datasets.htm. Last Accessed: 05 Apr 2019."},{"key":"738_CR3","doi-asserted-by":"crossref","unstructured":"Al-Badarneh A. Join algorithms under apache spark: Revisited. In: Proceedings of the 2019 5th International Conference on Computer and Technology Applications, ICCTA 2019. Association for Computing Machinery, New York, NY, USA 2019, pp 56\u201362.","DOI":"10.1145\/3323933.3324094"},{"key":"738_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/j.jksuci.2020.05.004","author":"AF Al-Badarneh","year":"2020","unstructured":"Al-Badarneh AF, Rababa SA. An analysis of two-way Equi-join algorithms under Mapreduce. J King Saud Univ Comp Inform Sci. 2020. https:\/\/doi.org\/10.1016\/j.jksuci.2020.05.004.","journal-title":"J King Saud Univ Comp Inform Sci"},{"key":"738_CR5","doi-asserted-by":"crossref","unstructured":"Armbrust M, Xin RS, Lian C, Huai Y, Liu D, Bradley JK, Meng X, Kaftan T, Franklin MJ, Ghodsi A, et\u00a0al. Spark sql: Relational data processing in spark. In: Proceedings of the 2015 ACM SIGMOD International Conference on Management of Data, SIGMOD 15. Association for Computing Machinery, New York, NY, USA 2015, pp. 1383\u20131394.","DOI":"10.1145\/2723372.2742797"},{"key":"738_CR6","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1007\/978-1-4612-4980-1_17","volume-title":"On knowledge base management systems","author":"F Bancilhon","year":"1986","unstructured":"Bancilhon F. Naive evaluation of recursively defined relations. In: On knowledge base management systems. Berlin: Springer; 1986. p. 165\u201378."},{"issue":"2","key":"738_CR7","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1145\/16856.16859","volume":"15","author":"F Bancilhon","year":"1986","unstructured":"Bancilhon F, Ramakrishnan R. An amateur\u2019s introduction to recursive query processing strategies. SIGMOD Rec. 1986;15(2):16\u201352. https:\/\/doi.org\/10.1145\/16856.16859.","journal-title":"SIGMOD Rec."},{"key":"738_CR8","doi-asserted-by":"crossref","unstructured":"Blanas S, Patel JM, Ercegovac V, Rao J, Shekita EJ, Tian Y. A comparison of join algorithms for log processing in mapreduce. In: Proceedings of the 2010 ACM SIGMOD International Conference on Management of Data, SIGMOD 10. Association for Computing Machinery, New York, NY, USA 2010, pp 975\u2013986.","DOI":"10.1145\/1807167.1807273"},{"issue":"7","key":"738_CR9","doi-asserted-by":"publisher","first-page":"422","DOI":"10.1145\/362686.362692","volume":"13","author":"BH Bloom","year":"1970","unstructured":"Bloom BH. Space\/time trade-offs in hash coding with allowable errors. Commun ACM. 1970;13(7):422\u20136.","journal-title":"Commun ACM"},{"key":"738_CR10","unstructured":"Bratbergsengen K. Hashing methods and relational algebra operations. In: Proceedings of the 10th International Conference on Very Large Data Bases, VLDB 84. Morgan Kaufmann Publishers Inc., San Francisco, CA, USA 1984, pp 323\u2013333."},{"issue":"3","key":"738_CR11","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1145\/1272743.1272747","volume":"32","author":"S Chen","year":"2007","unstructured":"Chen S, Ailamaki A, Gibbons PB, Mowry TC. Improving hash join performance through prefetching. ACM Trans Database Syst. 2007;32(3):17.","journal-title":"ACM Trans Database Syst."},{"issue":"1","key":"738_CR12","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J Dean","year":"2008","unstructured":"Dean J, Ghemawat S. Mapreduce: simplified data processing on large clusters. Commun ACM. 2008;51(1):107\u201313.","journal-title":"Commun ACM"},{"key":"738_CR13","unstructured":"Gribkoff E. Distributed algorithms for the transitive closure 2013."},{"issue":"2","key":"738_CR14","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1002\/rsa.20208","volume":"33","author":"A Kirsch","year":"2008","unstructured":"Kirsch A, Mitzenmacher M. Less hashing, same performance: building a better bloom filter. Random Struct Algorithms. 2008;33(2):187\u2013218.","journal-title":"Random Struct Algorithms"},{"issue":"4","key":"738_CR15","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1145\/2094114.2094118","volume":"40","author":"KH Lee","year":"2012","unstructured":"Lee KH, Lee YJ, Choi H, Chung YD, Moon B. Parallel data processing with Mapreduce: a survey. SIGMOD Rec. 2012;40(4):11\u201320.","journal-title":"SIGMOD Rec"},{"key":"738_CR16","doi-asserted-by":"crossref","unstructured":"Lee T, Kim K, Kim HJ. Join processing using bloom filter in Mapreduce. In: Proceedings of the 2012 ACM Research in Applied Computation Symposium, RACS 12. Association for Computing Machinery, New York, NY, USA 2012, pp 100\u2013105.","DOI":"10.1145\/2401603.2401626"},{"issue":"1","key":"738_CR17","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1007\/BF01263657","volume":"3","author":"X Lin","year":"1995","unstructured":"Lin X, Orlowska ME. An efficient processing of a chain join with the minimum communication cost in distributed database systems. Distrib Parallel Databases. 1995;3(1):69\u201383.","journal-title":"Distrib Parallel Databases"},{"key":"738_CR18","doi-asserted-by":"crossref","unstructured":"Mackert LF, Lohman GM. R* optimizer validation and performance evaluation for distributed queries. In: Proceedings of the 12th International Conference on Very Large Data Bases, VLDB 86. Morgan Kaufmann Publishers Inc., San Francisco, CA, USA 1986, pp 149\u2013159.","DOI":"10.1145\/16894.16863"},{"key":"738_CR19","unstructured":"Mehta T, Mangla N, Guragon G. A survey paper on big data analytics using map reduce and hive on hadoop framework a survey paper on big data analytics using map reduce and hive on hadoop framework 2016."},{"key":"738_CR20","doi-asserted-by":"crossref","unstructured":"Michael L, Nejdl W, Papapetrou O, Siberski W. Improving distributed join efficiency with extended bloom filter operations. In: Proceedings of the 21st International Conference on Advanced Networking and Applications, AINA 07. IEEE Computer Society, USA 2007, pp 187\u2013194.","DOI":"10.1109\/AINA.2007.80"},{"issue":"1","key":"738_CR21","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1145\/128762.128764","volume":"24","author":"P Mishra","year":"1992","unstructured":"Mishra P, Eich MH. Join processing in relational databases. ACM Comput Surv. 1992;24(1):63\u2013113.","journal-title":"ACM Comput Surv"},{"key":"738_CR22","doi-asserted-by":"crossref","unstructured":"Phan AC, Phan TC, Trieu TN. A comparative study of join algorithms in spark. In: International Conference on Future Data and Security Engineering. Springer, 2020, pp 185\u2013198.","DOI":"10.1007\/978-3-030-63924-2_11"},{"key":"738_CR23","doi-asserted-by":"crossref","unstructured":"Phan TC, d\u2019Orazio L, Rigaux P. Toward intersection filter-based optimization for joins in Mapreduce. In: Proceedings of the 2nd International Workshop on Cloud Intelligence, Cloud-I 13. Association for Computing Machinery, New York, NY, USA 2013.","DOI":"10.1145\/2501928.2501932"},{"key":"738_CR24","doi-asserted-by":"crossref","unstructured":"Phan TC, d\u2019Orazio L, Rigaux P. A theoretical and experimental comparison of filter-based equijoins in mapreduce. In: Transactions on Large-Scale Data-and Knowledge-Centered Systems XXV. Springer 2016, pp 33\u201370.","DOI":"10.1007\/978-3-662-49534-6_2"},{"key":"738_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.3233\/JIFS-182519","volume":"40","author":"S Rababa","year":"2021","unstructured":"Rababa S, Al-Badarneh A. Optimizations for filter-based join algorithms in Mapreduce. J Intell Fuzzy Syst. 2021;40:1\u201318 (Preprint).","journal-title":"J Intell Fuzzy Syst"},{"key":"738_CR26","doi-asserted-by":"crossref","unstructured":"Shaw M, Koutris P, Howe B, Suciu D. Optimizing large-scale semi-na\u00efve datalog evaluation in hadoop. In: International Datalog 2.0 Workshop. Springer 2012, pp 165\u2013176.","DOI":"10.1007\/978-3-642-32925-8_17"},{"issue":"4","key":"738_CR27","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1145\/141356.141392","volume":"20","author":"KL Tan","year":"1991","unstructured":"Tan KL, Lu H. A note on the strategy space of multiway join query optimization problem in parallel systems. ACM SIGMOD Rec. 1991;20(4):81\u20132.","journal-title":"ACM SIGMOD Rec"},{"key":"738_CR28","doi-asserted-by":"crossref","unstructured":"Van Hieu D, Smanchat S, Meesad P. Mapreduce join strategies for key-value storage. In: 2014 11th International Joint Conference on Computer Science and Software Engineering (JCSSE), 2014, pp 164\u2013169.","DOI":"10.1109\/JCSSE.2014.6841861"},{"issue":"4","key":"738_CR29","doi-asserted-by":"publisher","first-page":"218","DOI":"10.1145\/360715.360746","volume":"18","author":"HS Warren Jr","year":"1975","unstructured":"Warren HS Jr. A modification of Warshall\u2019s algorithm for the transitive closure of binary relations. Commun ACM. 1975;18(4):218\u201320. https:\/\/doi.org\/10.1145\/360715.360746.","journal-title":"Commun ACM"},{"issue":"1","key":"738_CR30","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1145\/321105.321107","volume":"9","author":"S Warshall","year":"1962","unstructured":"Warshall S. A theorem on Boolean matrices. J ACM. 1962;9(1):11\u20132. https:\/\/doi.org\/10.1145\/321105.321107.","journal-title":"J ACM"},{"key":"738_CR31","volume-title":"Hadoop: the definitive guide","author":"T White","year":"2015","unstructured":"White T. Hadoop: the definitive guide. 4th ed. Newton: O\u2019Reilly Media Inc; 2015.","edition":"4"},{"key":"738_CR32","unstructured":"Zaharia M, Chowdhury M, Franklin MJ, Shenker S, Stoica I. Spark: cluster computing with working sets. In: Proceedings of the 2nd USENIX Conference on Hot Topics in Cloud Computing, HotCloud\u201910. USENIX Association, USA 2010, p\u00a010."}],"container-title":["SN Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-021-00738-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42979-021-00738-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-021-00738-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,8,30]],"date-time":"2021-08-30T17:50:22Z","timestamp":1630345822000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42979-021-00738-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,23]]},"references-count":32,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2021,9]]}},"alternative-id":["738"],"URL":"https:\/\/doi.org\/10.1007\/s42979-021-00738-x","relation":{},"ISSN":["2662-995X","2661-8907"],"issn-type":[{"value":"2662-995X","type":"print"},{"value":"2661-8907","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,6,23]]},"assertion":[{"value":"11 March 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 June 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 June 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"352"}}