{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T14:47:29Z","timestamp":1778338049883,"version":"3.51.4"},"reference-count":18,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2019,2,14]],"date-time":"2019-02-14T00:00:00Z","timestamp":1550102400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100003453","name":"Natural Science Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2015A030308017"],"award-info":[{"award-number":["2015A030308017"]}],"id":[{"id":"10.13039\/501100003453","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["BMC Bioinformatics"],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1186\/s12859-019-2665-0","type":"journal-article","created":{"date-parts":[[2019,2,14]],"date-time":"2019-02-14T08:03:04Z","timestamp":1550131384000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["ADS-HCSpark: A scalable HaplotypeCaller leveraging adaptive data segmentation to accelerate variant calling on Spark"],"prefix":"10.1186","volume":"20","author":[{"given":"Anghong","family":"Xiao","sequence":"first","affiliation":[]},{"given":"Zongze","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Shoubin","family":"Dong","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,2,14]]},"reference":[{"key":"2665_CR1","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1016\/j.virusres.2016.08.004","volume":"239","author":"M CORNELISSEN","year":"2017","unstructured":"CORNELISSEN M, GALL A, VINK M. From clinical sample to complete genome: comparing methods for the extraction of HIV-1 RNA for high-throughput deep sequencing. Virus Res. 2017;239:10\u20136.","journal-title":"Virus Res"},{"issue":"9","key":"2665_CR2","doi-asserted-by":"publisher","first-page":"1297","DOI":"10.1101\/gr.107524.110","volume":"20","author":"A McKenna","year":"2010","unstructured":"McKenna A, Hanna M, Banks E, et al. The genome analysis toolkit: a MapReduce framework for analyzing next-generation DNA sequencing data. Genome Res. 2010;20(9):1297\u2013303.","journal-title":"Genome Res"},{"key":"2665_CR3","doi-asserted-by":"publisher","first-page":"6275","DOI":"10.1038\/ncomms7275","volume":"6","author":"G Highnam","year":"2015","unstructured":"Highnam G, Wang JJ, Kusler D, et al. An analytical framework for optimizing variant discovery from personal genomes. Nat Commun. 2015;6:6275.","journal-title":"Nat Commun"},{"key":"2665_CR4","doi-asserted-by":"publisher","first-page":"17875","DOI":"10.1038\/srep17875","volume":"5","author":"S Hwang","year":"2015","unstructured":"Hwang S, Kim E, Lee I, et al. Systematic comparison of variant calling pipelines using gold standard personal exome variants. Sci Rep. 2015;5:17875.","journal-title":"Sci Rep"},{"issue":"16","key":"2665_CR5","doi-asserted-by":"publisher","first-page":"2078","DOI":"10.1093\/bioinformatics\/btp352","volume":"25","author":"H Li","year":"2009","unstructured":"Li H, Handsaker B, Wysoker A. The sequence alignment\/map format and SAMtools. Bioinformatics. 2009;25(16):2078\u20139.","journal-title":"Bioinformatics"},{"key":"2665_CR6","doi-asserted-by":"crossref","unstructured":"Huang S, Manikandan GJ, Ramachandran A, et al. Hardware acceleration of the pair-HMM algorithm for DNA variant calling. Proceedings of the 2017 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays. 2017:275\u201384.","DOI":"10.1145\/3020078.3021749"},{"key":"2665_CR7","unstructured":"Deng L, Huang G, Zhuang Y, et al. HiGene: A high-performance platform for genomic data analysis. IEEE International Conference on Bioinformatics and Biomedicine. 2016:576\u201383."},{"issue":"11","key":"2665_CR8","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1145\/2934664","volume":"59","author":"M Zaharia","year":"2016","unstructured":"Zaharia M, Franklin MJ, Ghodsi A. Apache spark: a unified engine for big data processing. Commun ACM. 2016;59(11):56\u201365.","journal-title":"Commun ACM"},{"issue":"Suppl 12","key":"2665_CR9","doi-asserted-by":"publisher","first-page":"S1","DOI":"10.1186\/1471-2105-11-S12-S1","volume":"11","author":"RC Taylor","year":"2010","unstructured":"Taylor RC. An overview of the Hadoop\/MapReduce\/HBase framework and its current applications in bioinformatics. BMC Bioinformatics. 2010;11(Suppl 12):S1.","journal-title":"BMC Bioinformatics"},{"key":"2665_CR10","doi-asserted-by":"crossref","unstructured":"O'Connor B D, Merriman B, Nelson S F. SeqWare Query Engine: storing and searching sequence data in the cloud. BMC Bioinformatics. 2010; 11 Suppl 12: S2-S2.","DOI":"10.1186\/1471-2105-11-S12-S2"},{"key":"2665_CR11","unstructured":"GATK Queue. \n                    https:\/\/software.broadinstitute.org\/gatk\/documentation\/index#intro1306\n                    \n                  . Accessed 23 Mar 2018."},{"issue":"15","key":"2665_CR12","doi-asserted-by":"publisher","first-page":"2482","DOI":"10.1093\/bioinformatics\/btv179","volume":"31","author":"D Decap","year":"2015","unstructured":"Decap D, Reumers J, Herzeel C, et al. Halvade: scalable sequence analysis with MapReduce. Bioinformatics. 2015;31(15):2482.","journal-title":"Bioinformatics"},{"issue":"1","key":"2665_CR13","doi-asserted-by":"publisher","first-page":"6","DOI":"10.1186\/s13059-014-0577-x","volume":"16","author":"BJ Kelly","year":"2015","unstructured":"Kelly BJ, Fitch JR, Hu Y, et al. Churchill: an ultra-fast, deterministic, highly scalable and balanced parallelization strategy for the discovery of human genetic variation in clinical and population-scale genomics. Genome Biol. 2015;16(1):6.","journal-title":"Genome Biol"},{"key":"2665_CR14","unstructured":"Garrison E, Marth G. Haplotype-based variant detection from short-read sequencing. arXiv preprint arXiv. 2012;(1207):3907."},{"key":"2665_CR15","doi-asserted-by":"crossref","unstructured":"Mushtaq H, Liu F, Costa C, et al. SparkGA: A Spark Framework for Cost Effective, Fast and Accurate DNA Analysis at Scale. Proceedings of the 8th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics. 2017:148\u201357.","DOI":"10.1145\/3107411.3107438"},{"key":"2665_CR16","unstructured":"Broad Institute Gatk 4.0. \n                    https:\/\/software.broadinstitute.org\/gatk\/gatk4\n                    \n                  . Accessed 11 May 2018."},{"issue":"6","key":"2665_CR17","doi-asserted-by":"publisher","first-page":"876","DOI":"10.1093\/bioinformatics\/bts054","volume":"28","author":"M Niemenmaa","year":"2012","unstructured":"Niemenmaa M, Kallio A, Schumacher A. Hadoop-BAM: directly manipulating next generation sequencing data in the cloud. Bioinformatics. 2012;28(6):876\u20137.","journal-title":"Bioinformatics"},{"issue":"19","key":"2665_CR18","doi-asserted-by":"publisher","first-page":"2787","DOI":"10.1093\/bioinformatics\/btu345","volume":"30","author":"A Talwalkar","year":"2014","unstructured":"Talwalkar A, Liptrap J, Newcomb J. SMaSH: A benchmarking toolkit for human genome variant calling. Bioinformatics. 2014;30(19):2787\u201395.","journal-title":"Bioinformatics"}],"container-title":["BMC Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-019-2665-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s12859-019-2665-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-019-2665-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,2,13]],"date-time":"2020-02-13T19:07:58Z","timestamp":1581620878000},"score":1,"resource":{"primary":{"URL":"https:\/\/bmcbioinformatics.biomedcentral.com\/articles\/10.1186\/s12859-019-2665-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,2,14]]},"references-count":18,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2019,12]]}},"alternative-id":["2665"],"URL":"https:\/\/doi.org\/10.1186\/s12859-019-2665-0","relation":{},"ISSN":["1471-2105"],"issn-type":[{"value":"1471-2105","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,2,14]]},"assertion":[{"value":"9 November 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 January 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 February 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The datasets used in the experiment are publicly available on the website , so the ethics approval is not required.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare that they have no competing interests.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"Springer Nature remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Publisher\u2019s Note"}}],"article-number":"76"}}