{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T01:22:42Z","timestamp":1743038562090,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":57,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783662623855"},{"type":"electronic","value":"9783662623862"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-662-62386-2_1","type":"book-chapter","created":{"date-parts":[[2020,11,20]],"date-time":"2020-11-20T00:30:00Z","timestamp":1605832200000},"page":"1-31","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Extracting Insights: A Data Centre Architecture Approach in Million Genome Era"],"prefix":"10.1007","author":[{"given":"Tariq","family":"Abdullah","sequence":"first","affiliation":[]},{"given":"Ahmed","family":"Ahmet","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,11,20]]},"reference":[{"key":"1_CR1","doi-asserted-by":"crossref","unstructured":"Abdullah, T., Ahmet, A.: Genomics analyser: a big data framework for analysing genomics data. In: Proceedings of the Fourth IEEE\/ACM International Conference on Big Data Computing, Applications and Technologies, pp. 189\u2013197 (2017)","DOI":"10.1145\/3148055.3148072"},{"issue":"12","key":"1_CR2","doi-asserted-by":"publisher","first-page":"1475","DOI":"10.1093\/bioinformatics\/btp274","volume":"25","author":"A Bateman","year":"2009","unstructured":"Bateman, A., Wood, M.: Cloud computing. Bioinformatics 25(12), 1475 (2009)","journal-title":"Bioinformatics"},{"key":"1_CR3","doi-asserted-by":"crossref","unstructured":"Benson, D.A., Karsch-Mizrachi, I., Lipman, D.J., Ostell, J., Sayers, E.W.: GenBank. Nucl. Acids Res. 37(Database), D26\u2013D31 (2009)","DOI":"10.1093\/nar\/gkn723"},{"key":"1_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1471-2164-16-1","volume":"16","author":"ARO Brien","year":"2015","unstructured":"Brien, A.R.O., Saunders, N.F.W., Guo, Y., Buske, F.A., Scott, R.J., Bauer, D.C.: VariantSpark: population scale clustering of genotype information. BMC Genomics 16, 1\u20139 (2015)","journal-title":"BMC Genomics"},{"key":"1_CR5","doi-asserted-by":"crossref","unstructured":"Shaffer, C.: Next-generation sequencing outpaces expectations. Nat. Biotechnol. 25 (2007)","DOI":"10.1038\/nbt0207-149"},{"issue":"19","key":"1_CR6","doi-asserted-by":"publisher","first-page":"3928","DOI":"10.1093\/nar\/29.19.3928","volume":"29","author":"RJ Carter","year":"2001","unstructured":"Carter, R.J., Dubchak, I., Holbrook, S.R.: A computational approach to identify genes for functional RNAs in genomic sequences. Nucl. Acids Res. 29(19), 3928\u20133938 (2001)","journal-title":"Nucl. Acids Res."},{"key":"1_CR7","unstructured":"Hayden, E.C.: Genome researchers raise alarm over big data. Nature (2015)"},{"issue":"15","key":"1_CR8","doi-asserted-by":"publisher","first-page":"754","DOI":"10.1016\/j.drudis.2009.05.005","volume":"14","author":"X Chen","year":"2009","unstructured":"Chen, X., Jorgenson, E., Cheung, S.: New tools for functional genomic analysis. Drug Discov. Today 14(15), 754\u2013760 (2009)","journal-title":"Drug Discov. Today"},{"key":"1_CR9","doi-asserted-by":"crossref","unstructured":"The 1000 Genome Project Consortium: A global reference for human genetic variations. Nature 256, 68\u201378 (2015)","DOI":"10.1038\/nature15393"},{"issue":"19","key":"1_CR10","first-page":"3928","volume":"29","author":"CE Cook","year":"2017","unstructured":"Cook, C.E., Bergman, M.T., Cochrane, G., Apweiler, R., Birney, E.: The European bioinformatics institute in 2017: data coordination and integration. Nucl. Acids Res. 29(19), 3928\u20133938 (2017)","journal-title":"Nucl. Acids Res."},{"issue":"6","key":"1_CR11","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1586\/14737159.2013.811907","volume":"13","author":"E Coonrod","year":"2013","unstructured":"Coonrod, E., Margraf, R., Russell, A., Voelkerding, K., Reese, M.: Clinical analysis of genome next-generation sequencing data using the Omicia platform. Expert. Rev. Mol. Diagn. 13(6), 529\u2013540 (2013)","journal-title":"Expert. Rev. Mol. Diagn."},{"key":"1_CR12","unstructured":"Davies, K.: The 1,000 Dollar Genome - The Revolution in DNA Sequencing and the New Era of Personalized Medicine. Free Press (2010)"},{"key":"1_CR13","doi-asserted-by":"crossref","unstructured":"de Paula, R., Holanda, M., Gomes, L.S.A., Lifschitz, S., Walter, M.E.M.T.: Provenance in bioinformatics workflows. In: BMC Bioinformatics Workshops (2013)","DOI":"10.1186\/1471-2105-14-S11-S6"},{"issue":"15","key":"1_CR14","doi-asserted-by":"publisher","first-page":"2482","DOI":"10.1093\/bioinformatics\/btv179","volume":"31","author":"D Decap","year":"2015","unstructured":"Decap, D., Reumers, J., Herzeel, C., Costanza, P., Fostier, J.: Halvade: scalable sequence analysis with MapReduce. Bioinformatics 31(15), 2482\u20132488 (2015)","journal-title":"Bioinformatics"},{"issue":"2","key":"1_CR15","doi-asserted-by":"publisher","first-page":"188","DOI":"10.1093\/hmg\/ddq391","volume":"19","author":"L Ding","year":"2010","unstructured":"Ding, L., Wendl, M., Koboldt, D., Mardis, E.: Analysis of next-generation genomic data in cancer: accomplishments and challenges. Hum. Mol. Genet. 19(2), 188\u2013196 (2010)","journal-title":"Hum. Mol. Genet."},{"key":"1_CR16","unstructured":"EMBL-EBI. EMBL-EBI annual scientific report 2013. Technical report, EMBL-European Bioinformatics Institute (2014)"},{"key":"1_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1471-2105-13-206","volume":"13","author":"I Borozan","year":"2012","unstructured":"Borozan, I., et al.: CaPSID: a bioinformatics platform for computational pathogen sequence identification in human genome and transcriptomes. BMC Bioinform. 13, 1\u201311 (2012)","journal-title":"BMC Bioinform."},{"key":"1_CR18","unstructured":"National Center for Biotechnology Information. File format guide, U.S. National Library of Medicine. https:\/\/www.ncbi.nlm.nih.gov\/sra\/docs\/submitformats\/"},{"issue":"1","key":"1_CR19","doi-asserted-by":"publisher","first-page":"102","DOI":"10.1186\/1471-2105-15-102","volume":"15","author":"X Guo","year":"2014","unstructured":"Guo, X., Meng, Y., Yu, N., Pan, Y.: Cloud computing for detecting high-order genome-wide epistatic interaction via dynamic clustering. BMC Bioinform. 15(1), 102 (2014)","journal-title":"BMC Bioinform."},{"key":"1_CR20","unstructured":"Gurovich,,Y., et al.: DeepGestalt-identifying rare genetic syndromes using deep learning. arXiv preprint arXiv:1801.07637 (2018)"},{"issue":"1","key":"1_CR21","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1093\/bioinformatics\/bts647","volume":"29","author":"H Huang","year":"2013","unstructured":"Huang, H., Tata, S., Prill, R.J.: BlueSNP. R package for highly scalable genome-wide association studies using Hadoop clusters. Bioinformatics 29(1), 135\u2013136 (2013)","journal-title":"Bioinformatics"},{"issue":"9","key":"1_CR22","doi-asserted-by":"publisher","first-page":"1457","DOI":"10.1093\/bioinformatics\/btx808","volume":"34","author":"L Huang","year":"2017","unstructured":"Huang, L., Kruger, J., Sczyrba, A.: Analyzing large scale genomic data on the cloud with Sparkhit. Bioinformatics 34(9), 1457\u20131465 (2017)","journal-title":"Bioinformatics"},{"key":"1_CR23","unstructured":"Data \u2014 1000 Genomes. IGSR: The International Genome Sample Resource. https:\/\/www.internationalgenome.org\/data"},{"key":"1_CR24","doi-asserted-by":"publisher","first-page":"450","DOI":"10.1186\/1471-2105-8-450","volume":"8","author":"J Tian","year":"2007","unstructured":"Tian, J., Wu, N., Guo, X., Guo, J., Zhang, J., Fan, Y.: Predicting the phenotypic effects of non-synonymous single nucleotide polymorphisms based on support vector machines. BMC Bioinform. 8, 450\u2013546 (2007)","journal-title":"BMC Bioinform."},{"issue":"11","key":"1_CR25","doi-asserted-by":"publisher","first-page":"1542","DOI":"10.1093\/bioinformatics\/bts165","volume":"28","author":"L Jourdren","year":"2012","unstructured":"Jourdren, L., Bernard, M., Dillies, M.A.L., Crom, S.: Eoulsan. A cloud computing-based framework facilitating high throughput sequencing analyses. Bioinformatics 28(11), 1542\u20131543 (2012)","journal-title":"Bioinformatics"},{"key":"1_CR26","doi-asserted-by":"crossref","unstructured":"Kelly, B.J., et al.: Churchill: an ultra-fast, deterministic, highly scalable and balanced parallelization strategy for the discovery of human genetic variation in clinical and population-scale genomics. Genome Biol. 16(1), 6 (2015)","DOI":"10.1186\/s13059-014-0577-x"},{"key":"1_CR27","doi-asserted-by":"crossref","unstructured":"Klinger, J., Mateos-Garcia, J.C., Stathoulopoulos, K.: Deep learning, deep change? Mapping the development of the artificial intelligence general purpose technology. Mapp. Dev. Artif. Intell. Gen. Purp. Technol. (2018)","DOI":"10.2139\/ssrn.3233463"},{"key":"1_CR28","doi-asserted-by":"crossref","unstructured":"Kozanitis, C., Patterson, D.A.: GenAP: a distributed SQL interface for genomic data. BMC Bioinformat. 17(63) (2016)","DOI":"10.1186\/s12859-016-0904-1"},{"key":"1_CR29","doi-asserted-by":"publisher","first-page":"R25","DOI":"10.1186\/gb-2009-10-3-r25","volume":"10","author":"B Langmead","year":"2009","unstructured":"Langmead, B., Trapnell, C., Pop, M., Salzberg, S.L.: Ultrafast and memory-efficient alignment of short DNA sequences to the human genome. Genome Biol. 10, R25 (2009). https:\/\/doi.org\/10.1186\/gb-2009-10-3-r25","journal-title":"Genome Biol."},{"issue":"11","key":"1_CR30","doi-asserted-by":"publisher","first-page":"134:1","DOI":"10.1186\/gb-2009-10-11-r134","volume":"10","author":"B Langmead","year":"2009","unstructured":"Langmead, B., Schatz, M.C., Lin, J., Pop, M., Salzberg, S.L.: Searching for SNPs with cloud computing. Genome Biol. 10(11), 134:1\u2013134:10 (2009)","journal-title":"Genome Biol."},{"issue":"11","key":"1_CR31","doi-asserted-by":"publisher","first-page":"R134","DOI":"10.1186\/gb-2009-10-11-r134","volume":"10","author":"B Langmead","year":"2009","unstructured":"Langmead, B., Schatz, M.C., Lin, J., Pop, M., Salzberg, S.L.: Searching for SNPs with cloud computing. Genome Biol. 10(11), R134 (2009)","journal-title":"Genome Biol."},{"key":"1_CR32","doi-asserted-by":"crossref","unstructured":"Lu, W., Jackson, J., Barga, R.: AzureBlast: a case study of developing science applications on the cloud. In: 19th ACM International Symposium on High Performance Distributed Computing, pp. 413\u2013420 (2010)","DOI":"10.1145\/1851476.1851537"},{"issue":"3","key":"1_CR33","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1016\/j.tig.2007.12.007","volume":"24","author":"ER Mardis","year":"2008","unstructured":"Mardis, E.R.: The impact of next-generation sequencing technology on genetics. Trends Genet. 24(3), 133\u2013141 (2008)","journal-title":"Trends Genet."},{"key":"1_CR34","unstructured":"Massie, M., et al.: Adam: genomics formats and processing patterns for cloud scale computing. Technical report UCB\/EECS-2013-207, EECS Department, University of California, Berkeley, December 2013"},{"issue":"1","key":"1_CR35","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1756-0381-7-22","volume":"7","author":"EA Mohammed","year":"2014","unstructured":"Mohammed, E.A., Far, B.H., Naugler, C.: Applications of the MapReduce programming framework to clinical big data analysis: current landscape and future trends. BioData Min. 7(1), 1\u201323 (2014)","journal-title":"BioData Min."},{"issue":"30","key":"1_CR36","doi-asserted-by":"publisher","first-page":"2652","DOI":"10.1093\/bioinformatics\/btu343","volume":"15","author":"MS Wiewiorka","year":"2014","unstructured":"Wiewiorka, M.S., Messina, A., Pacholewska, A., Maffioletti, S., Gawrysiak, P., Okoniewski, M.J.: SparkSeq: fast, scalable and cloud-ready tool for the interactive genomic data analysis with nucleotide precision. Bioinformatics 15(30), 2652\u20132653 (2014)","journal-title":"Bioinformatics"},{"issue":"23","key":"1_CR37","doi-asserted-by":"publisher","first-page":"3014","DOI":"10.1093\/bioinformatics\/btt528","volume":"29","author":"H Nordberg","year":"2013","unstructured":"Nordberg, H., Bhatia, K., Wang, K., Wang, Z.: BioPig: a Hadoop-based analytic toolkit for large-scale sequence data. Bioinformatics 29(23), 3014\u20133019 (2013)","journal-title":"Bioinformatics"},{"key":"1_CR38","unstructured":"Norrgard, K.: Genetic variation and disease: GWAS. Nat. Educ. 1(1), 87(2008)"},{"key":"1_CR39","doi-asserted-by":"crossref","unstructured":"O\u2019Connor, B.D., Merriman, B., Nelson, S.F.: SeqWare query engine: storing and searching sequence data in the cloud. BMC Bioinform. 11(Suppl. 12), S2 (2010)","DOI":"10.1186\/1471-2105-11-S12-S2"},{"key":"1_CR40","unstructured":"Oliveira, J.H., Holanda, M., Guimaraes, V., Hondo, F., Filho, W.: Data modeling for NoSQL based on document. In: Second Annual International Symposium on Information Management and Big Data, pp. 129\u2013135 (2015)"},{"key":"1_CR41","doi-asserted-by":"crossref","unstructured":"Pinheiro, R., Holanda, M., Arujo, A., Walter, M.E.M.T., Lifschitz, S.: Automatic capture of provenance data in genome project workflows. In: IEEE International Conference on Bioinformatics and Biomedicine (BIBM), pp. 15\u201321 (2013)","DOI":"10.1109\/BIBM.2013.6732621"},{"key":"1_CR42","doi-asserted-by":"crossref","unstructured":"Pinherio, R., Holanda, M., Araujo, A., Walter, M.E.M.t., Lifschitz., S.: Storing provenance data of genome project workflows using graph databases. In: IEEE International Conference on Bioinformatics and Biomedicine (BIBM), pp. 16\u201322 (2014)","DOI":"10.1109\/BIBM.2014.6999292"},{"issue":"15","key":"1_CR43","doi-asserted-by":"publisher","first-page":"2159","DOI":"10.1093\/bioinformatics\/btr325","volume":"27","author":"L Pireddu","year":"2011","unstructured":"Pireddu, L., Leo, S., Zanetti, G.: Seal: a distributed short read mapping and duplicate removal tool. Bioinformatics 27(15), 2159\u20132160 (2011)","journal-title":"Bioinformatics"},{"issue":"10","key":"1_CR44","doi-asserted-by":"publisher","first-page":"983","DOI":"10.1038\/nbt.4235","volume":"36","author":"R Poplin","year":"2018","unstructured":"Poplin, R., et al.: A universal SNP and small-indel variant caller using deep neural networks. Nat. Biotechnol. 36(10), 983\u2013987 (2018)","journal-title":"Nat. Biotechnol."},{"key":"1_CR45","unstructured":"1000 Genomes Project. Data types and file formats"},{"key":"1_CR46","doi-asserted-by":"publisher","first-page":"637","DOI":"10.1093\/bib\/bbs088","volume":"15","author":"Q Zou","year":"2014","unstructured":"Zou, Q., Li, X.B., Jiang, W.R., Lin, Z.Y., Li, G.L., Chen, K.: Survey of MapReduce frame operation in bioinformatics. Brief. Bioinform. 15, 637\u2013647 (2014)","journal-title":"Brief. Bioinform."},{"key":"1_CR47","doi-asserted-by":"crossref","unstructured":"Qiu, J., et al.: Hybrid cloud and cluster computing paradigms for life science applications. BMC Bioinform. 11(12), 1\u20136 (2010). BioMed Central","DOI":"10.1186\/1471-2105-11-S12-S3"},{"key":"1_CR48","doi-asserted-by":"crossref","unstructured":"Quail, M.A., et al.: A tale of three next generation sequencing platforms: comparison of Ion Torrent, Pacific Biosciences and Illumina MiSeq sequencers. BMC Genomics 13(1), 1\u201313 (2012). BioMed Central","DOI":"10.1186\/1471-2164-13-341"},{"key":"1_CR49","doi-asserted-by":"publisher","first-page":"419","DOI":"10.1186\/1471-2164-12-419","volume":"12","author":"T Robinson","year":"2011","unstructured":"Robinson, T., Killcoyne, S., Bressler, R., Boyle, J.: SAMQA: error classification and validation of high-throughput sequenced read data. BMC Genomics 12, 419 (2011)","journal-title":"BMC Genomics"},{"issue":"11","key":"1_CR50","doi-asserted-by":"publisher","first-page":"1363","DOI":"10.1093\/bioinformatics\/btp236","volume":"25","author":"MC Schatz","year":"2009","unstructured":"Schatz, M.C.: Cloudburst: highly sensitive read mapping with MapReduce. Bioinformatics 25(11), 1363\u20131369 (2009)","journal-title":"Bioinformatics"},{"issue":"1","key":"1_CR51","doi-asserted-by":"publisher","first-page":"200","DOI":"10.1186\/1471-2105-13-200","volume":"13","author":"S Schoenherr","year":"2012","unstructured":"Schoenherr, S., Forer, L., Weissensteiner, H., Specht, G., Kronenberg, F., Kloss-Brandstaetter, A.: Cloudgene: a graphical execution platform for MapReduce programs on private and public clouds. BMC Bioinform. 13(1), 200 (2012)","journal-title":"BMC Bioinform."},{"issue":"1","key":"1_CR52","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1093\/bioinformatics\/btt601","volume":"30","author":"A Schumacher","year":"2014","unstructured":"Schumacher, A., et al.: SeqPig: simple and scalable scripting for large sequencing data sets in Hadoop. Bioinformatics 30(1), 119\u2013120 (2014)","journal-title":"Bioinformatics"},{"issue":"5","key":"1_CR53","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1186\/gb-2010-11-5-207","volume":"11","author":"LD Stein","year":"2010","unstructured":"Stein, L.D.: The case for cloud computing in genome informatics. Genome Biol. 11(5), 207 (2010)","journal-title":"Genome Biol."},{"key":"1_CR54","doi-asserted-by":"crossref","unstructured":"Stephens, Z.D., et al.: Big data: astronomical or genomical? PLoS Biol. 13(7), e1002195 (2015)","DOI":"10.1371\/journal.pbio.1002195"},{"key":"1_CR55","doi-asserted-by":"crossref","unstructured":"Taylor, R.C.: An overview of the Hadoop\/MapReduce\/HBase framework and its current applications in bioinformatics. BMC Bioinform. 11(S12), S1 (2010). Springer","DOI":"10.1186\/1471-2105-11-S12-S1"},{"issue":"8","key":"1_CR56","doi-asserted-by":"publisher","first-page":"1112","DOI":"10.1093\/bioinformatics\/btt769","volume":"30","author":"K-C Wong","year":"2014","unstructured":"Wong, K.-C., Zhang, Z.: SNPdryad: predicting deleterious nonsynonymous human SNPs using only orthologous protein sequences. Bioinformatics 30(8), 1112\u20131119 (2014)","journal-title":"Bioinformatics"},{"key":"1_CR57","doi-asserted-by":"publisher","first-page":"403","DOI":"10.1016\/j.csbj.2017.07.004","volume":"15","author":"Z Yin","year":"2017","unstructured":"Yin, Z., Lan, H., Tan, G., Lu, M., Vasilakos, A., Liu, W.: Computing platforms for big biological data analytics: perspectives and challenges. Comput. Struct. Biotechnol. J. 15, 403\u2013411 (2017)","journal-title":"Comput. Struct. Biotechnol. J."}],"container-title":["Lecture Notes in Computer Science","Transactions on Large-Scale Data- and Knowledge-Centered Systems XLVI"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-662-62386-2_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,14]],"date-time":"2021-04-14T21:52:09Z","timestamp":1618437129000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-662-62386-2_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783662623855","9783662623862"],"references-count":57,"URL":"https:\/\/doi.org\/10.1007\/978-3-662-62386-2_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"20 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}