{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T16:25:11Z","timestamp":1773678311062,"version":"3.50.1"},"reference-count":27,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2014,8,22]],"date-time":"2014-08-22T00:00:00Z","timestamp":1408665600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Cluster Comput"],"published-print":{"date-parts":[[2015,3]]},"DOI":"10.1007\/s10586-014-0400-1","type":"journal-article","created":{"date-parts":[[2014,8,21]],"date-time":"2014-08-21T14:51:29Z","timestamp":1408632689000},"page":"369-383","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":69,"title":["Scaling up MapReduce-based Big Data Processing on Multi-GPU systems"],"prefix":"10.1007","volume":"18","author":[{"given":"Hai","family":"Jiang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yi","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhi","family":"Qiao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tien-Hsiung","family":"Weng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kuan-Ching","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2014,8,22]]},"reference":[{"key":"400_CR1","first-page":"1","volume-title":"Cluster Computing","author":"H Jiang","year":"2013","unstructured":"Jiang, H., Chen, Y., Qiao, Z., Li, K.-C., Ro, W., Gaudiot, J.-C.: Accelerating MapReduce framework on multi-GPU systems. Cluster Computing, pp. 1\u20139. Springer, Berlin (2013)"},{"key":"400_CR2","unstructured":"Cubieboards: an Open ARM Mini PC, http:\/\/www.cubieboard.org 2014"},{"key":"400_CR3","unstructured":"CUDA Programming Guide 6.0, NVIDIA, 2014"},{"issue":"1","key":"400_CR4","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"Jeffrey Dean","year":"2008","unstructured":"Dean, Jeffrey, Ghemawa, Sanjay: MapReduce: simplied data processing on large clusters. Commun. ACM 51(1), 107\u2013113 (2008)","journal-title":"Commun. ACM"},{"key":"400_CR5","series-title":"Lecture Notes in Computer Science","first-page":"433","volume-title":"Grid and Pervasive Computing","author":"Y Chen","year":"2013","unstructured":"Chen, Y., Qiao, Z., Jiang, H., Li, K.-C., Ro, W.W.: MGMR: multi-GPU based MapReduce. Grid and Pervasive Computing. Lecture Notes in Computer Science, vol. 7861, pp. 433\u2013442. Springer, Berlin (2013)"},{"key":"400_CR6","series-title":"Communications and Society Program","volume-title":"The Promise and Peril of Big Data","author":"D Bollier","year":"2010","unstructured":"Bollier, D., Firestone, C.M.: The Promise and Peril of Big Data. Communications and Society Program. Aspen Institute, Washington, DC (2010)"},{"key":"400_CR7","unstructured":"Jinno, R., Seki, K., Uehara, K.: Parallel distributed trajectory pattern mining using MapReduce. In: Proceedings of IEEE 4th International Conference on Cloud Computing Technology and Science, pp. 269\u2013273, 2012"},{"key":"400_CR8","doi-asserted-by":"crossref","first-page":"175","DOI":"10.1016\/j.cmpb.2010.10.013","volume":"106","author":"D Lee","year":"2012","unstructured":"Lee, D., Dinov, I., Dong, B., Gutman, B., Yanovsky, I., Toga, A.W.: CUDA optimization strategies for compute-and memory-bound neuroimaging algorithms. Comput. Methods Programs Biomed. 106, 175 (2012)","journal-title":"Comput. Methods Programs Biomed."},{"key":"400_CR9","unstructured":"Raina, R., Madhavan, A., Ng, A.D.: Large-scale deep unsupervised learning using graphics processors. In: Proceedings of the 26th International Conference on Machine Learning, Canada, 2009"},{"key":"400_CR10","unstructured":"Fadika, z., Dede, E., Hartog, J., Govindaraju, M.: Marla: Mapreduce for heterogeneous clusters. In: Proceedings of the 2012 12th IEEE\/ACM International Symposium on Cluster, Cloud and Grid Computing, pp. 49\u201356, 2012"},{"key":"400_CR11","unstructured":"Stuart, J.A., Owens, J.D.: Multi-GPU MapReduce on GPU clusters. In: Proceedings of the 2011 IEEE International Parallel & Distributed Processing Symposium, pp. 1068\u20131079, 2011"},{"key":"400_CR12","unstructured":"Foster, I., Kesselman, C.: The Grid 2: blueprint for a new computing infrastructure, Morgan Kaufmann, 2003"},{"key":"400_CR13","unstructured":"Czajkowski, K., Fitzgerald, S., Foster, I., Kesselman, C.: Grid information services for distributed resource sharing. In: Proceedings of 10th IEEE International Symposium on High Performance Distributed Computing, pp. 181\u2013194, 2001"},{"key":"400_CR14","volume-title":"Hadoop: The Definitive Guide","author":"T White","year":"2012","unstructured":"White, T.: Hadoop: The Definitive Guide. O\u2019Reilly Media, Sebastopol (2012)"},{"key":"400_CR15","doi-asserted-by":"crossref","unstructured":"Chen, L., Huo, X., Agrawal, G.: Accelerating MapReduce on a coupled CPU-GPU architecture. In: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis 2012","DOI":"10.1109\/SC.2012.16"},{"key":"400_CR16","unstructured":"Nakada, H., Ogawa, H., Kudoh, T.: Stream processing with big data: SSS-MapReduce. In: Proceedings of 2012 IEEE 4th International Conference on Cloud Computing Technology and Science, pp. 618\u2013621, 2012"},{"key":"400_CR17","unstructured":"Ji, F., Ma, X.: Using shared memory to accelerate MapReduce on graphics processing units. In: Proceedings of the IEEE International Parallel & Distributed Processing Symposium, pp. 805\u2013816, 2011"},{"key":"400_CR18","unstructured":"Chen, L., Agrawal, G.: Optimizing MapReduce for GPUs with effective shared memory usage. In: Proceedings of the 21st International Symposium on High-Performance Parallel and Distributed Computing, pp. 199\u2013210, 2012"},{"key":"400_CR19","first-page":"267","volume-title":"Computer Science-Research and Development","author":"G Shainer","year":"2011","unstructured":"Shainer, G., Ayoub, A., Lui, P., Liu, T., Kagan, M., Troot, C.R., Scantlen, G., Crozier, P.S.: The development of Mellanox\/NVIDIA GPU Direct over InfiniBand new model for GPU to GPU communications. Computer Science-Research and Development, pp. 267\u2013273. Springer, Berlin (2011)"},{"issue":"4","key":"400_CR20","doi-asserted-by":"crossref","first-page":"608","DOI":"10.1109\/TPDS.2010.158","volume":"22","author":"Wenbin Fang","year":"2011","unstructured":"Fang, Wenbin, He, Bingsheng, Luo, Qiong, Govindaraju, Naga K.: Mars: Accelerating MapReduce with Graphics Processors. IEEE Trans. Parallel Distrib. Syst. 22(4), 608\u2013620 (2011)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"400_CR21","unstructured":"Elteir, M., Lin, H., Feng, W.C., Scogland, T.R.W: StreamMR: an optimized MapReduce framework for AMD GPUs. In: IEEE 17th International Conference on Parallel and Distributed Systems, pp. 364\u2013371, 2011"},{"key":"400_CR22","unstructured":"Tuning CUDA Applications for Kepler, http:\/\/docs.nvidia.com\/cuda\/kepler-tuning-guide\/"},{"key":"400_CR23","unstructured":"Nathan, B., Jared, H.: Thrust: a productivity-oriented library for CUDA. In: GPU Computing Gems: Jade Edition, Morgan Kaufmann, pp. 359\u2013371, 2011"},{"issue":"10","key":"400_CR24","doi-asserted-by":"crossref","first-page":"1079","DOI":"10.1016\/0167-8191(93)90019-H","volume":"19","author":"L Xiaobo","year":"1993","unstructured":"Xiaobo, L., Paul, L., Jonathan, S., John, S., Sze, W.P., Hanmao, S.: On the versatility of parallel sorting by regular sampling. Parallel Comput. 19(10), 1079\u20131103 (1993)","journal-title":"Parallel Comput."},{"issue":"4","key":"400_CR25","doi-asserted-by":"crossref","first-page":"437","DOI":"10.1111\/1475-3995.00366","volume":"9","author":"P Bartosz","year":"2002","unstructured":"Bartosz, P.: A fast approximation algorithm for the subset-sum problem. Int. Trans. Oper. Res. 9(4), 437\u2013459 (2002)","journal-title":"Int. Trans. Oper. Res."},{"key":"400_CR26","unstructured":"FERMI Compute Architecture White Paper, Nvidia"},{"issue":"5","key":"400_CR27","doi-asserted-by":"crossref","first-page":"1031","DOI":"10.1109\/TPAMI.2011.255","volume":"34","author":"Y Shi","year":"2012","unstructured":"Shi, Y., L\u00e9on-Charles, T., De, M.B., Yves, M.: Optimized data fusion for kernal k-means clustering. IEEE Trans. Pattern Anal. Mach. Intell. 34(5), 1031\u20131039 (2012)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."}],"container-title":["Cluster Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10586-014-0400-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10586-014-0400-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10586-014-0400-1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,30]],"date-time":"2019-05-30T18:40:21Z","timestamp":1559241621000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10586-014-0400-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,8,22]]},"references-count":27,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2015,3]]}},"alternative-id":["400"],"URL":"https:\/\/doi.org\/10.1007\/s10586-014-0400-1","relation":{},"ISSN":["1386-7857","1573-7543"],"issn-type":[{"value":"1386-7857","type":"print"},{"value":"1573-7543","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,8,22]]}}}