{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,31]],"date-time":"2025-12-31T12:05:41Z","timestamp":1767182741230,"version":"3.40.3"},"publisher-location":"Cham","reference-count":22,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030639235"},{"type":"electronic","value":"9783030639242"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-63924-2_11","type":"book-chapter","created":{"date-parts":[[2020,11,19]],"date-time":"2020-11-19T23:23:20Z","timestamp":1605828200000},"page":"185-198","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["A Comparative Study of Join Algorithms in Spark"],"prefix":"10.1007","author":[{"given":"Anh-Cang","family":"Phan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thuong-Cang","family":"Phan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thanh-Ngoan","family":"Trieu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,11,19]]},"reference":[{"key":"11_CR1","unstructured":"Ahmad, F.: Puma benchmarks and dataset downloads (2011). https:\/\/engineering.purdue.edu\/~puma\/datasets.htm. Accessed: 05 Apr 2019"},{"key":"11_CR2","doi-asserted-by":"crossref","unstructured":"Al-Badarneh, A.: Join algorithms under apache spark: revisited. In: Proceedings of the 2019 5th International Conference on Computer and Technology Applications, ICCTA 2019, pp. 56\u201362. Association for Computing Machinery, New York (2019)","DOI":"10.1145\/3323933.3324094"},{"key":"11_CR3","unstructured":"Apache: Apache Hadoop (2002). https:\/\/hadoop.apache.org. Accessed 03 Apr 2019"},{"key":"11_CR4","unstructured":"Apache: Apache spark (2009). https:\/\/spark.apache.org. Accessed 03 Apr 2019"},{"key":"11_CR5","doi-asserted-by":"crossref","unstructured":"Armbrust, M., et al.: Spark SQL: relational data processing in spark. In: Proceedings of the 2015 ACM SIGMOD International Conference on Management of Data, SIGMOD 2015, pp. 1383\u20131394. Association for Computing Machinery, New York (2015)","DOI":"10.1145\/2723372.2742797"},{"key":"11_CR6","doi-asserted-by":"crossref","unstructured":"Blanas, S., Patel, J.M., Ercegovac, V., Rao, J., Shekita, E.J., Tian, Y.: A comparison of join algorithms for log processing in MapReduce. In: Proceedings of the 2010 ACM SIGMOD International Conference on Management of Data, SIGMOD 2010, pp. 975\u2013986. Association for Computing Machinery, New York (2010)","DOI":"10.1145\/1807167.1807273"},{"issue":"7","key":"11_CR7","doi-asserted-by":"publisher","first-page":"422","DOI":"10.1145\/362686.362692","volume":"13","author":"BH Bloom","year":"1970","unstructured":"Bloom, B.H.: Space\/time trade-offs in hash coding with allowable errors. Commun. ACM 13(7), 422\u2013426 (1970)","journal-title":"Commun. ACM"},{"key":"11_CR8","unstructured":"Bratbergsengen, K.: Hashing methods and relational algebra operations. In: Proceedings of the 10th International Conference on Very Large Data Bases, VLDB 1984, pp. 323\u2013333. Morgan Kaufmann Publishers Inc., San Francisco (1984)"},{"issue":"3","key":"11_CR9","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1145\/1272743.1272747","volume":"32","author":"S Chen","year":"2007","unstructured":"Chen, S., Ailamaki, A., Gibbons, P.B., Mowry, T.C.: Improving hash join performance through prefetching. ACM Trans. Database Syst. 32(3), 17 (2007)","journal-title":"ACM Trans. Database Syst."},{"issue":"1","key":"11_CR10","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J Dean","year":"2008","unstructured":"Dean, J., Ghemawat, S.: MapReduce: simplified data processing on large clusters. Commun. ACM 51(1), 107\u2013113 (2008)","journal-title":"Commun. ACM"},{"issue":"2","key":"11_CR11","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1002\/rsa.20208","volume":"33","author":"A Kirsch","year":"2008","unstructured":"Kirsch, A., Mitzenmacher, M.: Less hashing, same performance: building a better bloom filter. Random Struct. Algorithms 33(2), 187\u2013218 (2008)","journal-title":"Random Struct. Algorithms"},{"issue":"4","key":"11_CR12","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1145\/2094114.2094118","volume":"40","author":"KH Lee","year":"2012","unstructured":"Lee, K.H., Lee, Y.J., Choi, H., Chung, Y.D., Moon, B.: Parallel data processing with MapReduce: a survey. SIGMOD Rec. 40(4), 11\u201320 (2012)","journal-title":"SIGMOD Rec."},{"key":"11_CR13","doi-asserted-by":"crossref","unstructured":"Lee, T., Kim, K., Kim, H.J.: Join processing using bloom filter in MapReduce. In: Proceedings of the 2012 ACM Research in Applied Computation Symposium, RACS 2012, pp. 100\u2013105. Association for Computing Machinery, New York (2012)","DOI":"10.1145\/2401603.2401626"},{"key":"11_CR14","doi-asserted-by":"crossref","unstructured":"Mackert, L.F., Lohman, G.M.: R* optimizer validation and performance evaluation for distributed queries. In: Proceedings of the 12th International Conference on Very Large Data Bases, VLDB 1986, pp. 149\u2013159. Morgan Kaufmann Publishers Inc., San Francisco (1986)","DOI":"10.1145\/16894.16863"},{"key":"11_CR15","unstructured":"Mehta, T., Mangla, N., Guragon, G.: A survey paper on big data analytics using map reduce and hive on Hadoop framework a survey paper on big data analytics using map reduce and hive on Hadoop framework, February 2016"},{"key":"11_CR16","doi-asserted-by":"crossref","unstructured":"Michael, L., Nejdl, W., Papapetrou, O., Siberski, W.: Improving distributed join efficiency with extended bloom filter operations. In: Proceedings of the 21st International Conference on Advanced Networking and Applications, AINA 2007, pp. 187\u2013194. IEEE Computer Society (2007)","DOI":"10.1109\/AINA.2007.80"},{"issue":"1","key":"11_CR17","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1145\/128762.128764","volume":"24","author":"P Mishra","year":"1992","unstructured":"Mishra, P., Eich, M.H.: Join processing in relational databases. ACM Comput. Surv. 24(1), 63\u2013113 (1992)","journal-title":"ACM Comput. Surv."},{"key":"11_CR18","doi-asserted-by":"crossref","unstructured":"Phan, T.C., d\u2019Orazio, L., Rigaux, P.: Toward intersection filter-based optimization for joins in mapreduce. In: Proceedings of the 2nd International Workshop on Cloud Intelligence, Cloud-I 2013. Association for Computing Machinery, New York (2013)","DOI":"10.1145\/2501928.2501932"},{"key":"11_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1007\/978-3-662-49534-6_2","volume-title":"Transactions on Large-Scale Data- and Knowledge-Centered Systems XXV","author":"T-C Phan","year":"2016","unstructured":"Phan, T.-C., d\u2019Orazio, L., Rigaux, P.: A theoretical and experimental comparison of filter-based equijoins in MapReduce. In: Hameurlain, A., K\u00fcng, J., Wagner, R. (eds.) Transactions on Large-Scale Data- and Knowledge-Centered Systems XXV. LNCS, vol. 9620, pp. 33\u201370. Springer, Heidelberg (2016). https:\/\/doi.org\/10.1007\/978-3-662-49534-6_2"},{"key":"11_CR20","doi-asserted-by":"crossref","unstructured":"Van Hieu, D., Smanchat, S., Meesad, P.: Mapreduce join strategies for key-value storage. In: 11th International Joint Conference on Computer Science and Software Engineering (JCSSE), pp. 164\u2013169, May 2014","DOI":"10.1109\/JCSSE.2014.6841861"},{"key":"11_CR21","volume-title":"Hadoop: The Definitive Guide","author":"T White","year":"2015","unstructured":"White, T.: Hadoop: The Definitive Guide, 4th edn. O\u2019Reilly Media Inc., Sebastopol (2015)","edition":"4"},{"key":"11_CR22","unstructured":"Zaharia, M., Chowdhury, M., Franklin, M.J., Shenker, S., Stoica, I.: Spark: cluster computing with working sets. In: Proceedings of the 2nd USENIX Conference on Hot Topics in Cloud Computing, HotCloud 2010, p. 10. USENIX Association, USA (2010)"}],"container-title":["Lecture Notes in Computer Science","Future Data and Security Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-63924-2_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,24]],"date-time":"2021-04-24T19:48:36Z","timestamp":1619293716000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-63924-2_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030639235","9783030639242"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-63924-2_11","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"19 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"FDSE","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Future Data and Security Engineering","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Quy Nhon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vietnam","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 November 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 November 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"fdse2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.thefdse.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"161","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"53","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"8","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"33% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5.5 avg. reviewers\/paper (for two submission rounds in total). LNCS 12466 includes 24 of the accepted papers and 2 invited keynotes. CCIS 1306 includes 29 full and 8 short of the accepted papers. The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}