{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T02:04:07Z","timestamp":1710295447656},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2014,10,22]],"date-time":"2014-10-22T00:00:00Z","timestamp":1413936000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Cluster Comput"],"published-print":{"date-parts":[[2015,3]]},"DOI":"10.1007\/s10586-014-0405-9","type":"journal-article","created":{"date-parts":[[2014,10,22]],"date-time":"2014-10-22T03:52:22Z","timestamp":1413949942000},"page":"403-418","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["Parallel data intensive applications using MapReduce: a data mining case study in biomedical sciences"],"prefix":"10.1007","volume":"18","author":[{"given":"Liangxiu","family":"Han","sequence":"first","affiliation":[]},{"given":"Hwee Yong","family":"Ong","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,10,22]]},"reference":[{"key":"405_CR1","unstructured":"Amazon: Amazon elastic compute cloud. http:\/\/aws.amazon.com\/ec2 (2013). Accessed on 23 Dec 2013"},{"key":"405_CR2","unstructured":"Apache: Apache hadoop. http:\/\/hadoop.apache.org\/core\/ (2013). Accessed on 23 Dec 2013"},{"key":"405_CR3","doi-asserted-by":"crossref","unstructured":"Arpaci-Dusseau, R.H., Anderson, E., Treuhaft, N., Culler, D.E., Hellerstein, J.M., Patterson, D., Yelick, K.: Cluster i\/o with river: making the fast case common. In: Proceedings of the Sixth Workshop on I\/O in Parallel and Distributed Systems, pp. 10\u201322. ACM, New York (1999)","DOI":"10.1145\/301816.301823"},{"key":"405_CR4","doi-asserted-by":"crossref","unstructured":"Atkinson, M., van Hemert, J., Han, L., Hume, A., Liew, C.S.: A Distributed Architecture for Data Mining and Integration, pp. 11\u201320. ACM, New York (2009)","DOI":"10.1145\/1552280.1552282"},{"key":"405_CR5","doi-asserted-by":"crossref","first-page":"1457","DOI":"10.1016\/S0167-8191(01)00099-0","volume":"27","author":"MD Beynon","year":"2001","unstructured":"Beynon, M.D., Kurc, T., Catalyurek, U., Chang, C., Sussman, A., Saltz, J.: Distributed processing of very large datasets with DataCutter. Parallel Comput. 27, 1457\u20131478 (2001)","journal-title":"Parallel Comput."},{"key":"405_CR6","unstructured":"Cellknn: Cell-knn: an implementation of the knn algorithm on sti\u2019s cell processor. http:\/\/code.google.com\/p\/cell-knn\/ (2011) Accessed on 19 April 2014"},{"key":"405_CR7","unstructured":"Condor DAGMan (directed acyclic graph manager): http:\/\/www.cs.wisc.edu\/condor\/dagman (2007) Accessed on 19 April 2014"},{"issue":"1","key":"405_CR8","doi-asserted-by":"crossref","first-page":"21","DOI":"10.1109\/TIT.1967.1053964","volume":"30","author":"T Cover","year":"1967","unstructured":"Cover, T., Hart, P.: Nearest neighbor pattern classification. IEEE Trans. Inf. Theory 30(1), 21\u201327 (1967)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"405_CR9","unstructured":"DB2: IBM DB2: http:\/\/www-01.ibm.com\/software\/data\/db2\/ (2013). Accessed on 23 Dec 2013"},{"key":"405_CR10","unstructured":"Dean, J., Ghemawat, S.: MapReduce: Simplified data processing on large clusters. In: Proceedings of the 6th Symposium on Operating Systems Design and Implementation (OSDI), pp. 137\u2013150 (2004)."},{"issue":"3","key":"405_CR11","first-page":"219","volume":"13","author":"E Deelman","year":"2005","unstructured":"Deelman, E., Singh, G., Su, M.H., Blythe, J., Gil, Y., Kesselman, C., Mehta, G., Vahi, K., Berriman, G.B., Good, J., Laity, A.C., Jacob, J.C., Katz, D.S.: Pegasus: a framework for mapping complex scientific workflows onto distributed systems. Sci. Program. 13(3), 219\u2013237 (2005)","journal-title":"Sci. Program."},{"key":"405_CR12","unstructured":"Disco: Disco mapreduce framework. http:\/\/discoproject.org\/ (2013). Accessed on 23 Dec 2013"},{"key":"405_CR13","unstructured":"Duda, R.O., Hart, P.E.: Pattern Classification and Scene Analysis. Wiley, New York (1973)"},{"issue":"4","key":"405_CR14","doi-asserted-by":"crossref","first-page":"60","DOI":"10.1109\/MC.2008.125","volume":"41","author":"M Gokhale","year":"2008","unstructured":"Gokhale, M., Cohen, J., Yoo, A., Miller, W.: Hardware technologies for high-performance data-intensive computing. IEEE Comput. 41(4), 60\u201368 (2008)","journal-title":"IEEE Comput."},{"issue":"4","key":"405_CR15","doi-asserted-by":"crossref","first-page":"30","DOI":"10.1109\/MC.2008.122","volume":"41","author":"I Gorton","year":"2008","unstructured":"Gorton, I., Greenfield, P., Szalay, A., Williams, R.: Data-intensive computing in the 21st century. Computer 41(4), 30\u201332 (2008)","journal-title":"Computer"},{"key":"405_CR16","unstructured":"Han, L., Saengngam, T., van Hemert, J.: Accelerating data-intensive applications: a cloud computing approach to parallel image pattern recognition tasks. In: W. Gentzsch, P. Lorenz, O. Dini (eds.) ADVCOMP 2010: The Fourth International Conference on Advanced Engineering Computing and Applications in Sciences, 978-1-61208-101-4, pp. 148\u2013153. IARIA (2010)"},{"issue":"8","key":"405_CR17","doi-asserted-by":"crossref","first-page":"1101","DOI":"10.1093\/bioinformatics\/btr105","volume":"27","author":"L Han","year":"2011","unstructured":"Han, L., van Hemert, J., Baldock, R.: Automatically identifying and annotating mouse embryo gene expression patterns. Bioinformatics 27(8), 1101\u20131107 (2011)","journal-title":"Bioinformatics"},{"issue":"1","key":"405_CR18","doi-asserted-by":"crossref","first-page":"157","DOI":"10.1016\/j.parco.2011.02.006","volume":"37","author":"L Han","year":"2011","unstructured":"Han, L., Liew, C.S., van Hemert, J.I., Atkinson, M.P.: A generic parallel processing model for facilitating data mining and data integration. J. Parallel Comput. 37(1), 157\u2013171 (2011)","journal-title":"J. Parallel Comput."},{"key":"405_CR19","volume-title":"The Fourth Paradigm: Data-Intensive Scientific Discovery","author":"T Hey","year":"2009","unstructured":"Hey, T., Tansley, S., Tolle, K.: The Fourth Paradigm: Data-Intensive Scientific Discovery, 1st edn. Microsoft Research, Redmond (2009)","edition":"1"},{"key":"405_CR20","unstructured":"IDC digital universe study: Big data is here, now what? Accessed on 23 Dec 2013"},{"key":"405_CR21","unstructured":"Jin, R., Agrawal., G.: A middleware for developing parallel data mining implementations. In: Proceedings of the First SIAM Conference on Data Mining (Apr, 2001)"},{"issue":"1","key":"405_CR22","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1109\/TKDE.2005.18","volume":"17","author":"R Jin","year":"2005","unstructured":"Jin, R., Yang, G., Agrawal, G.: Shared memory parallelization of data mining algorithms: techniques, programming interface, and performance. IEEE Trans. Knowl. Data Eng. 17(1), 71\u201389 (2005)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"issue":"3\u20134","key":"405_CR23","doi-asserted-by":"crossref","first-page":"239","DOI":"10.1007\/s10723-005-9013-5","volume":"3","author":"G Laszewski","year":"2005","unstructured":"Laszewski, G., Hategan, M.: Workflow concepts of the Java Cog Kit. Grid Comput. 3(3\u20134), 239\u2013258 (2005)","journal-title":"Grid Comput."},{"key":"405_CR24","series-title":"Technical Report","volume-title":"Java CoG Kit Karajan-Gridant Workflow Guide","author":"G Laszewski","year":"2005","unstructured":"Laszewski, G., Hategan, M.: Java CoG Kit Karajan-Gridant Workflow Guide. Technical Report. Argonne National Laboratory, Argonne (2005)"},{"key":"405_CR25","unstructured":"LINQ: The LINQ project. http:\/\/msdn.microsoft.com\/netframework\/future\/linq\/ Accessed on 19 April 2014"},{"key":"405_CR26","unstructured":"Microsoft: http:\/\/research.microsoft.com\/en-us\/projects\/Dryad\/ (2013). Accessed on 23 Dec 2013"},{"issue":"10","key":"405_CR27","doi-asserted-by":"crossref","first-page":"1067","DOI":"10.1002\/cpe.993","volume":"18","author":"T Oinn","year":"2006","unstructured":"Oinn, T., Greenwood, M., Addis, M., Alpdemir, N., Ferris, J., Glover, K., Goble, C., Goderis, A., Hull, D., Marvin, D., Li, P., Lord, P., Pocock, M., Senger, M., Stevens, R., Wipat, A., Wroe, C.: Taverna: lessons in creating a workflow environment for the life sciences. Concurr. Comput. 18(10), 1067\u20131100 (2006). doi: 10.1002\/cpe.v18:10","journal-title":"Concurr. Comput."},{"key":"405_CR28","volume-title":"Parallel Programming with MPI","author":"PS Pacheco","year":"1997","unstructured":"Pacheco, P.S.: Parallel Programming with MPI. Morgan Kaufmann Publishers, Inc., San Francisco (1997)"},{"issue":"4","key":"405_CR29","first-page":"277","volume":"13","author":"R Pike","year":"2005","unstructured":"Pike, R., Dorward, S., Griesemer, R., Quinlan, S.: Interpreting the data: Parallel analysis with Sawzal. Sci. Program. 13(4), 277\u2013298 (2005)","journal-title":"Sci. Program."},{"key":"405_CR30","unstructured":"PVM: http:\/\/www.csm.ornl.gov\/pvm\/ (2013). Accessed on 23 Dec 2013"},{"key":"405_CR31","doi-asserted-by":"crossref","unstructured":"Raicu, I., Zhao, Y., Dumitrescu, C., Ian Foster, M.W.: Falkon: a fast and light-weight task execution framework. In: IEEE\/ACM SC 2007 (2007)","DOI":"10.1145\/1362622.1362680"},{"key":"405_CR32","doi-asserted-by":"crossref","unstructured":"Raicu, I., Zhao, Y., Foster, I., Szalay, A.: Accelerating large-scale data exploration through data diffusion. In: International Workshop on Data-Aware Distributed Computing 2008. IEEE Computer Scociety (2008)","DOI":"10.1145\/1383519.1383521"},{"key":"405_CR33","doi-asserted-by":"crossref","unstructured":"t Grossman, R., Gu, Y.: Data mining using high performance clouds: Experimental studies using sector and sphere. In: Proceedings of The 14th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. ACM, New York (2008)","DOI":"10.1145\/1401890.1402000"},{"key":"405_CR34","doi-asserted-by":"crossref","unstructured":"Taylor, I., Shields, M., Wang, I., Harrison, A.: The Triana Workflow Environment: architecture and applications. In: I. Taylor, E. Deelman, D. Gannon, M. Shields (eds.) Workflows for e-Science, pp. 320\u2013339. Springer, London (2007)","DOI":"10.1007\/978-1-84628-757-2_20"},{"key":"405_CR35","unstructured":"Teradata: http:\/\/www.teradata.com\/ (2013). Accessed on 23 Dec 2013"},{"key":"405_CR36","unstructured":"Vertica: http:\/\/www.vertica.com\/ (2013). Accessed on 23 Dec 2013"},{"issue":"1","key":"405_CR37","doi-asserted-by":"crossref","first-page":"85","DOI":"10.12785\/amis\/071L13","volume":"7","author":"L Wang","year":"2013","unstructured":"Wang, L., Tao, J., Ma, Y., Khan, S.U., Kolodziej, J., Chen, D.: Software design and implementation for MapReduce across distributed data centers. Appl. Math. Inf. Sci. 7(1), 85\u201390 (2013)","journal-title":"Appl. Math. Inf. Sci."},{"key":"405_CR38","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Hategan, M., Clifford, B., Foster, I., von Laszewski, G., Nefedova, V., Raicu, I., Stef-Praun, T., Wilde, M.: Swift: Fast, reliable, loosely coupled parallel computation. In: IEEE Congress on Services (Services 2007), pp. 199\u2013206 (2007)","DOI":"10.1109\/SERVICES.2007.63"}],"container-title":["Cluster Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10586-014-0405-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10586-014-0405-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10586-014-0405-9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,16]],"date-time":"2019-08-16T12:59:13Z","timestamp":1565960353000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10586-014-0405-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,10,22]]},"references-count":38,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2015,3]]}},"alternative-id":["405"],"URL":"https:\/\/doi.org\/10.1007\/s10586-014-0405-9","relation":{},"ISSN":["1386-7857","1573-7543"],"issn-type":[{"value":"1386-7857","type":"print"},{"value":"1573-7543","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,10,22]]}}}