{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T04:29:41Z","timestamp":1743049781442,"version":"3.40.3"},"publisher-location":"Cham","reference-count":44,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319244617"},{"type":"electronic","value":"9783319244624"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-24462-4_22","type":"book-chapter","created":{"date-parts":[[2015,9,25]],"date-time":"2015-09-25T00:13:10Z","timestamp":1443139990000},"page":"259-272","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Data-Intensive Computing Infrastructure Systems for Unmodified Biological Data Analysis Pipelines"],"prefix":"10.1007","author":[{"given":"Lars Ailo","family":"Bongo","sequence":"first","affiliation":[]},{"given":"Edvard","family":"Pedersen","sequence":"additional","affiliation":[]},{"given":"Martin","family":"Ernstsen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,11,18]]},"reference":[{"key":"22_CR1","doi-asserted-by":"crossref","unstructured":"Kahn, S.D.: On the Future of Genomic Data. Science (80-) 331, 728\u2013729 (2011)","DOI":"10.1126\/science.1197891"},{"key":"22_CR2","unstructured":"Diao, Y., Roy, A., Bloom, T.: Building Highly-Optimized, Low-Latency Pipelines for Genomic Data Analysis. In: 7th Biennial Conference on Innovative Data Systems Research (CIDR 2015), Asilomar, CA, USA (2015)"},{"key":"22_CR3","doi-asserted-by":"publisher","first-page":"459","DOI":"10.1038\/nmeth.1974","volume":"9","author":"L. Clarke","year":"2012","unstructured":"Clarke, L., Zheng-Bradley, X., Smith, R., Kulesha, E., Xiao, C., Toneva, I., Vaughan, B., Preuss, D., Leinonen, R., Shumway, M., Sherry, S., Flicek, P.: The 1000 Genomes Project: data management and community access. Nat. Methods\u00a09, 459\u2013462 (2012)","journal-title":"Nat. Methods"},{"key":"22_CR4","doi-asserted-by":"crossref","unstructured":"Fern\u00e1ndez-Su\u00e1rez, X.M., Rigden, D.J., Galperin, M.Y.: The 2014 Nucleic Acids Research Database Issue and an updated NAR online Molecular Biology Database Collection. Nucleic Acids Res. 42 (2014)","DOI":"10.1093\/nar\/gkt1282"},{"key":"22_CR5","doi-asserted-by":"crossref","unstructured":"Benson, G.: Editorial: Nucleic Acids Research annual Web Server Issue in 2014. Nucleic Acids Res. 42, W1\u2013W2 (2014)","DOI":"10.1093\/nar\/gku629"},{"key":"22_CR6","doi-asserted-by":"crossref","unstructured":"Goecks, J., Nekrutenko, A., Taylor, J.: Galaxy: a comprehensive approach for supporting accessible, reproducible, and transparent computational research in the life sciences. Genome Biol. 11, R86 (2010)","DOI":"10.1186\/gb-2010-11-8-r86"},{"key":"22_CR7","doi-asserted-by":"publisher","first-page":"3045","DOI":"10.1093\/bioinformatics\/bth361","volume":"20","author":"T. Oinn","year":"2004","unstructured":"Oinn, T., Addis, M., Ferris, J., Marvin, D., Senger, M., Greenwood, M., Carver, T., Glover, K., Pocock, M.R., Wipat, A., Li, P.: Taverna: a tool for the composition and enactment of bioinformatics workflows. Bioinformatics\u00a020, 3045\u20133054 (2004)","journal-title":"Bioinformatics"},{"key":"22_CR8","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1145\/1629175.1629198","volume":"53","author":"J. Dean","year":"2010","unstructured":"Dean, J., Ghemawat, S.: MapReduce: a flexible data processing tool. Commun. ACM\u00a053, 72 (2010)","journal-title":"Commun. ACM"},{"key":"22_CR9","unstructured":"Zaharia, M., Chowdhury, M., Das, T., Dave, A., Ma, J., McCauley, M., Franklin, M.J., Shenker, S., Stoica, I.: Resilient distributed datasets: a fault-tolerant abstraction for in-memory cluster computing. In: Proc. of the 9th USENIX conference on Networked Systems Design and Implementation. USENIX Association (2012)"},{"key":"22_CR10","doi-asserted-by":"crossref","unstructured":"Gentleman, R.C., Carey, V.J., Bates, D.M., Bolstad, B., Dettling, M., Dudoit, S., Ellis, B., Gautier, L., Ge, Y., Gentry, J., Hornik, K., Hothorn, T., Huber, W., Iacus, S., Irizarry, R., Leisch, F., Li, C., Maechler, M., Rossini, A.J., Sawitzki, G., Smith, C., Smyth, G., Tierney, L., Yang, J.Y.H., Zhang, J.: Bioconductor: open software development for computational biology and bioinformatics. Genome Biol. 5 (2004)","DOI":"10.1186\/gb-2004-5-10-r80"},{"key":"22_CR11","doi-asserted-by":"publisher","first-page":"403","DOI":"10.1186\/gb4161","volume":"15","author":"D. Blankenberg","year":"2014","unstructured":"Blankenberg, D., Von Kuster, G., Bouvier, E., Baker, D., Afgan, E., Stoler, N., Taylor, J., Nekrutenko, A.: Dissemination of scientific software with Galaxy ToolShed. Genome Biol.\u00a015, 403 (2014)","journal-title":"Genome Biol."},{"key":"22_CR12","unstructured":"Open Grid Scheduler, \n                      \n                        http:\/\/gridscheduler.sourceforge.net\/"},{"key":"22_CR13","unstructured":"Hadoop homepage, \n                      \n                        http:\/\/hadoop.apache.org\/"},{"key":"22_CR14","doi-asserted-by":"crossref","unstructured":"Shvachko, K., Kuang, H., Radia, S., Chansler, R.: The Hadoop Distributed File System. In: 26th Symposium on Mass Storage Systems and Technologies. IEEE (2010)","DOI":"10.1109\/MSST.2010.5496972"},{"key":"22_CR15","doi-asserted-by":"crossref","unstructured":"Ghemawat, S., Gobioff, H., Leung, S.-T.: The Google file system. ACM SIGOPS Operating Systems Review, 29 (2003)","DOI":"10.1145\/1165389.945450"},{"key":"22_CR16","unstructured":"MountableHDFS, \n                      \n                        http:\/\/wiki.apache.org\/hadoop\/MountableHDFS"},{"key":"22_CR17","doi-asserted-by":"crossref","unstructured":"Taylor, R.C.: An overview of the Hadoop\/MapReduce\/HBase framework and its current applications in bioinformatics. BMC Bioinformatics 11 (2010)","DOI":"10.1186\/1471-2105-11-S12-S1"},{"key":"22_CR18","unstructured":"Apache HBase, \n                      \n                        http:\/\/hbase.apache.org\/"},{"key":"22_CR19","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1365815.1365816","volume":"26","author":"F. Chang","year":"2008","unstructured":"Chang, F., Dean, J., Ghemawat, S., Hsieh, W.C., Wallach, D.A., Burrows, M., Chandra, T., Fikes, A., Gruber, R.E.: BigTable: A Distributed Storage System for Structured Data. ACM Trans. Comput. Syst.\u00a026, 1\u201326 (2008)","journal-title":"ACM Trans. Comput. Syst."},{"key":"22_CR20","unstructured":"Apache Spark, \n                      \n                        https:\/\/spark.apache.org\/"},{"key":"22_CR21","doi-asserted-by":"crossref","unstructured":"Gates, A.F., Natkovich, O., Chopra, S., Kamath, P., Narayanamurthy, S.M., Olston, C., Reed, B., Srinivasan, S., Srivastava, U.: Building a high-level dataflow system on top of Map-Reduce: the Pig experience. In: Proc. of the VLDB Endowment, pp. 1414\u20131425 (2009)","DOI":"10.14778\/1687553.1687568"},{"key":"22_CR22","doi-asserted-by":"crossref","unstructured":"Thusoo, A., Sarma, J.S., Jain, N., Shao, Z., Chakka, P., Anthony, S., Liu, H., Wyckoff, P., Murthy, R.: Hive: a warehousing solution over a map-reduce framework. In: Proc. of VLDB Endowment, pp. 1626\u20131629 (2009)","DOI":"10.14778\/1687553.1687609"},{"key":"22_CR23","unstructured":"Cascading, \n                      \n                        http:\/\/www.cascading.org\/"},{"key":"22_CR24","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1145\/1773912.1773922","volume":"44","author":"A. Lakshman","year":"2010","unstructured":"Lakshman, A., Malik, P.: Cassandra: a decentralized structured storage system. ACM SIGOPS Oper. Syst. Rev.\u00a044, 35 (2010)","journal-title":"ACM SIGOPS Oper. Syst. Rev."},{"key":"22_CR25","unstructured":"Impala, \n                      \n                        http:\/\/www.cloudera.com\/content\/cloudera\/en\/products-and-services\/cdh\/impala.html"},{"key":"22_CR26","unstructured":"Apache Drill, \n                      \n                        http:\/\/incubator.apache.org\/drill\/"},{"key":"22_CR27","doi-asserted-by":"crossref","unstructured":"Melnik, S., Gubarev, A., Long, J.J., Romer, G., Shivakumar, S., Tolton, M., Vassilakis, T.: Dremel: interactive analysis of web-scale datasets. In: Proc. VLDB Endow., pp. 330\u2013339 (2010)","DOI":"10.14778\/1920841.1920886"},{"key":"22_CR28","unstructured":"Storm, \n                      \n                        https:\/\/storm.incubator.apache.org\/"},{"key":"22_CR29","unstructured":"Mahout homepage, \n                      \n                        https:\/\/mahout.apache.org\/"},{"key":"22_CR30","doi-asserted-by":"crossref","unstructured":"Pireddu, L., Leo, S., Soranzo, N., Zanetti, G.: A Hadoop-Galaxy adapter for user-friendly and scalable data-intensive bioinformatics in Galaxy. In: Proc. of 5th ACM Conference on Bioinformatics, Computational Biology, and Health Informatics, pp. 184\u2013191 (2014)","DOI":"10.1145\/2649387.2649429"},{"key":"22_CR31","doi-asserted-by":"crossref","unstructured":"Wong, A.K., Park, C.Y., Greene, C.S., Bongo, L.A., Guan, Y., Troyanskaya, O.G.: IMP: a multi-species functional genomics portal for integration, visualization and prediction of protein functions and networks. Nucleic Acids Res.\u00a040, W484\u2013W490 (2012)","DOI":"10.1093\/nar\/gks458"},{"key":"22_CR32","doi-asserted-by":"crossref","unstructured":"Barrett, T., Troup, D.B., Wilhite, S.E., Ledoux, P., Evangelista, C., Kim, I.F., Tomashevsky, M., Marshall, K.A., Phillippy, K.H., Sherman, P.M., Muertter, R.N., Holko, M., Ayanbule, O., Yefanov, A., Soboleva, A.: NCBI GEO: archive for functional genomics data sets\u201310 years on. Nucleic Acids Res. 39, D1005\u2013D1010 (2010)","DOI":"10.1093\/nar\/gkq1184"},{"key":"22_CR33","unstructured":"Hindman, B., Konwinski, A., Zaharia, M., Ghodsi, A., Joseph, A.D., Katz, R., Shenker, S., Stoica, I.: Mesos: a platform for fine-grained resource sharing in the data center. In: Proc.of the 8th USENIX Conference on Networked Systems Design and Implementation. USENIX Association (2011)"},{"key":"22_CR34","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"311","DOI":"10.1007\/978-3-642-54420-0_31","volume-title":"Euro-Par 2013: Parallel Processing Workshops","author":"E. Pedersen","year":"2014","unstructured":"Pedersen, E., Willassen, N.P., Bongo, L.A.: Transparent incremental updates for Genomics Data Analysis Pipelines. In: an Mey, D., Alexander, M., Bientinesi, P., Cannataro, M., Clauss, C., Costan, A., Kecskemeti, G., Morin, C., Ricci, L., Sahuquillo, J., Schulz, M., Scarano, V., Scott, S.L., Weidendorfer, J. (eds.) Euro-Par 2013. LNCS, vol.\u00a08374, pp. 311\u2013320. Springer, Heidelberg (2014)"},{"key":"22_CR35","doi-asserted-by":"crossref","unstructured":"Pedersen, E., Raknes, I.A., Ernstsen, M., Bongo, L.A.: Integrating Data-Intensive Computing Systems with Biological Data Processing Frameworks. In: Euromicro Conference on Parallel, Distributed and Network-Based Processing (2015)","DOI":"10.1109\/PDP.2015.106"},{"key":"22_CR36","doi-asserted-by":"crossref","unstructured":"Magrane, M., Consortium, U.: UniProt Knowledgebase: a hub of integrated protein data. Database (Oxford). 2011, bar009 (2011)","DOI":"10.1093\/database\/bar009"},{"key":"22_CR37","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"263","DOI":"10.1007\/978-3-319-14325-5_23","volume-title":"Euro-Par 2014: Parallel Processing Workshops","author":"M. Ernstsen","year":"2014","unstructured":"Ernstsen, M., Kj\u00e6rner-Semb, E., Willassen, N.P., Bongo, L.A.: Mario: Interactive tuning of biological analysis pipelines using iterative processing. In: Lopes, L., et al. (eds.) Euro-Par 2014, Part I. LNCS, vol.\u00a08805, pp. 263\u2013274. Springer, Heidelberg (2014)"},{"key":"22_CR38","doi-asserted-by":"crossref","unstructured":"Zaharia, M., Das, T., Li, H., Hunter, T., Shenker, S., Stoica, I.: Discretized streams. In: Proc. of Twenty-Fourth ACM Symposium on Operating Systems Principles, pp. 423\u2013438. ACM Press (2013)","DOI":"10.1145\/2517349.2522737"},{"key":"22_CR39","doi-asserted-by":"publisher","first-page":"403","DOI":"10.1016\/S0022-2836(05)80360-2","volume":"215","author":"S.F. Altschul","year":"1990","unstructured":"Altschul, S.F., Gish, W., Miller, W., Myers, E.W., Lipman, D.J.: Basic local alignment search tool. J. Mol. Biol.\u00a0215, 403\u2013410 (1990)","journal-title":"J. Mol. Biol."},{"key":"22_CR40","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1186\/1471-2105-15-149","volume":"15","author":"S. Killcoyne","year":"2014","unstructured":"Killcoyne, S., del Sol, A.: FIGG: simulating populations of whole genome sequences for heterogeneous data analyses. BMC Bioinformatics\u00a015, 149 (2014)","journal-title":"BMC Bioinformatics"},{"key":"22_CR41","unstructured":"Azure: Microsoft\u2019s Cloud Platform, \n                      \n                        http:\/\/azure.microsoft.com\/en-us\/"},{"key":"22_CR42","doi-asserted-by":"crossref","unstructured":"O\u2019Connor, B.D., Merriman, B., Nelson, S.F.: SeqWare Query Engine: storing and searching sequence data in the cloud. BMC Bioinformatics 11(Suppl. 1), S2 (2010)","DOI":"10.1186\/1471-2105-11-S12-S2"},{"key":"22_CR43","doi-asserted-by":"publisher","first-page":"358","DOI":"10.1186\/1471-2105-14-358","volume":"14","author":"A. Roberts","year":"2013","unstructured":"Roberts, A., Feng, H., Pachter, L.: Fragment assignment in the cloud with eXpress-D. BMC Bioinformatics\u00a014, 358 (2013)","journal-title":"BMC Bioinformatics"},{"key":"22_CR44","unstructured":"Dean, J., Ghemawat, S.: MapReduce: simplified data processing on large clusters. In: Proc. of Operating Systems Design & Implementation. USENIX (2004)"}],"container-title":["Lecture Notes in Computer Science","Computational Intelligence Methods for Bioinformatics and Biostatistics"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-24462-4_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,30]],"date-time":"2019-05-30T21:43:51Z","timestamp":1559252631000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-24462-4_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319244617","9783319244624"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-24462-4_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2015]]}}}