{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T00:45:59Z","timestamp":1767919559432,"version":"3.49.0"},"reference-count":40,"publisher":"Public Library of Science (PLoS)","issue":"10","license":[{"start":{"date-parts":[[2020,10,6]],"date-time":"2020-10-06T00:00:00Z","timestamp":1601942400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100010801","name":"Xunta de Galicia","doi-asserted-by":"publisher","award":["ED481B 2018\/013"],"award-info":[{"award-number":["ED481B 2018\/013"]}],"id":[{"id":"10.13039\/501100010801","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["www.plosone.org"],"crossmark-restriction":false},"short-container-title":["PLoS ONE"],"DOI":"10.1371\/journal.pone.0239741","type":"journal-article","created":{"date-parts":[[2020,10,6]],"date-time":"2020-10-06T17:27:58Z","timestamp":1602005278000},"page":"e0239741","update-policy":"https:\/\/doi.org\/10.1371\/journal.pone.corrections_policy","source":"Crossref","is-referenced-by-count":13,"title":["Big Data in metagenomics: Apache Spark vs MPI"],"prefix":"10.1371","volume":"15","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9771-818X","authenticated-orcid":true,"given":"Jos\u00e9 M.","family":"Abu\u00edn","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8897-5061","authenticated-orcid":true,"given":"Nuno","family":"Lopes","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9635-5372","authenticated-orcid":true,"given":"Lu\u00eds","family":"Ferreira","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7622-4698","authenticated-orcid":true,"given":"Tom\u00e1s F.","family":"Pena","sequence":"additional","affiliation":[]},{"given":"Bertil","family":"Schmidt","sequence":"additional","affiliation":[]}],"member":"340","published-online":{"date-parts":[[2020,10,6]]},"reference":[{"key":"pone.0239741.ref001","article-title":"The impact of high-performance computing best practice applied to next-generation sequencing workflows","author":"P Carrier","year":"2015","journal-title":"BioRxiv"},{"issue":"10","key":"pone.0239741.ref002","doi-asserted-by":"crossref","first-page":"2760","DOI":"10.1109\/TPDS.2017.2692782","article-title":"A hybrid MPI-OpenMP strategy to speedup the compression of big next-generation sequencing datasets","volume":"28","author":"S Vargas-Perez","year":"2017","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"issue":"14","key":"pone.0239741.ref003","doi-asserted-by":"crossref","first-page":"1830","DOI":"10.1093\/bioinformatics\/bts276","article-title":"CUSHAW: a CUDA compatible short read aligner to large genomes based on the Burrows-Wheeler transform","volume":"28","author":"Y Liu","year":"2012","journal-title":"Bioinformatics"},{"key":"pone.0239741.ref004","unstructured":"Apache Software Foundation. Apache Hadoop;. Available from: http:\/\/hadoop.apache.org."},{"key":"pone.0239741.ref005","unstructured":"Zaharia M, Chowdhury M, Franklin MJ, Shenker S, Stoica I. Spark: Cluster Computing with Working Sets. In: Proc. of the 2nd USENIX Conference on Hot Topics in Cloud Computing (HotCloud); 2010. p. 10\u201310."},{"key":"pone.0239741.ref006","unstructured":"Zaharia M, Chowdhury M, Das T, Dave A, Ma J, McCauley M, et al. Resilient Distributed Datasets: A Fault-tolerant Abstraction for In-memory Cluster Computing. In: Proc. of the 9th USENIX Conference on Networked Systems Design and Implementation; 2012. p. 2\u20132."},{"issue":"24","key":"pone.0239741.ref007","doi-asserted-by":"crossref","first-page":"4003","DOI":"10.1093\/bioinformatics\/btv506","article-title":"BigBWA: Approaching the Burrows\u2013Wheeler Aligner to Big Data Technologies","volume":"31","author":"JM Abu\u00edn","year":"2015","journal-title":"Bioinformatics"},{"issue":"5","key":"pone.0239741.ref008","doi-asserted-by":"crossref","DOI":"10.1371\/journal.pone.0155461","article-title":"SparkBWA: speeding up the alignment of high-throughput DNA sequencing data","volume":"11","author":"JM Abu\u00edn","year":"2016","journal-title":"PloS ONE"},{"issue":"18","key":"pone.0239741.ref009","doi-asserted-by":"crossref","first-page":"2948","DOI":"10.1093\/bioinformatics\/btx354","article-title":"PASTASpark: multiple sequence alignment meets Big Data","volume":"33","author":"JM Abu\u00edn","year":"2017","journal-title":"Bioinformatics"},{"issue":"8","key":"pone.0239741.ref010","first-page":"giy098","article-title":"Bioinformatics applications on Apache Spark","volume":"7","author":"R Guo","year":"2018","journal-title":"GigaScience"},{"issue":"11","key":"pone.0239741.ref011","doi-asserted-by":"crossref","first-page":"886","DOI":"10.3390\/genes10110886","article-title":"PipeMEM: A framework to speed up BWA-MEM in Spark with low overhead","volume":"10","author":"L Zhang","year":"2019","journal-title":"Genes"},{"key":"pone.0239741.ref012","first-page":"56","article-title":"MPI: a standard message passing interface","volume":"12","author":"DW Walker","year":"1996","journal-title":"Supercomputer"},{"key":"pone.0239741.ref013","unstructured":"Abu\u00edn JM. Big Data meets High Performance Computing: Genomics and Natural Language Processing as case studies. University of Santiago de Compostela; 2017."},{"issue":"7","key":"pone.0239741.ref014","doi-asserted-by":"crossref","first-page":"56","DOI":"10.1145\/2699414","article-title":"Exascale Computing and Big Data","volume":"58","author":"DA Reed","year":"2015","journal-title":"Commun ACM"},{"issue":"4","key":"pone.0239741.ref015","doi-asserted-by":"crossref","first-page":"712","DOI":"10.1016\/j.drudis.2017.01.014","article-title":"Next-generation sequencing: big data meets high performance computing","volume":"22","author":"B Schmidt","year":"2017","journal-title":"Drug discovery today"},{"key":"pone.0239741.ref016","doi-asserted-by":"crossref","first-page":"121","DOI":"10.1016\/j.procs.2015.07.286","article-title":"Big Data Analytics in the Cloud: Spark on Hadoop vs MPI\/OpenMP on Beowulf","volume":"53","author":"Jorge L Reyes-Ortiz","year":"2015","journal-title":"Procedia Computer Science"},{"key":"pone.0239741.ref017","doi-asserted-by":"crossref","first-page":"639","DOI":"10.1186\/1471-2164-15-639","article-title":"All-Food-Seq (AFS): a quantifiable screen for species in biological samples by deep DNA sequencing","volume":"15","author":"F Ripp","year":"2014","journal-title":"BMC Genomics"},{"issue":"9","key":"pone.0239741.ref018","doi-asserted-by":"crossref","first-page":"1396","DOI":"10.1093\/bioinformatics\/btw822","article-title":"AFS: identification and quantification of species composition by metagenomic sequencing","volume":"33","author":"Y Liu","year":"2017","journal-title":"Bioinformatics"},{"issue":"1","key":"pone.0239741.ref019","doi-asserted-by":"crossref","first-page":"151","DOI":"10.1007\/s00217-010-1371-y","article-title":"Multiplex real-time PCR for the detection and quantification of DNA from beef, pork, horse and sheep","volume":"232","author":"R K\u00f6ppel","year":"2011","journal-title":"European Food Research and Technology"},{"issue":"14","key":"pone.0239741.ref020","doi-asserted-by":"crossref","first-page":"1754","DOI":"10.1093\/bioinformatics\/btp324","article-title":"Fast and Accurate Short Read Alignment with Burrows-Wheeler Transform","volume":"25","author":"H Li","year":"2009","journal-title":"Bioinformatics"},{"issue":"5","key":"pone.0239741.ref021","doi-asserted-by":"crossref","first-page":"589","DOI":"10.1093\/bioinformatics\/btp698","article-title":"Fast and Accurate Long-Read Alignment with Burrows-Wheeler Transform","volume":"26","author":"H Li","year":"2010","journal-title":"Bioinformatics"},{"key":"pone.0239741.ref022","unstructured":"Li H. Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. arXiv:13033997v2. 2013;."},{"issue":"4","key":"pone.0239741.ref023","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1038\/nmeth.1923","article-title":"Fast gapped-read alignment with Bowtie 2","volume":"9","author":"B Langmead","year":"2012","journal-title":"Nature methods"},{"issue":"23","key":"pone.0239741.ref024","doi-asserted-by":"crossref","first-page":"3740","DOI":"10.1093\/bioinformatics\/btx520","article-title":"MetaCache: context-aware classification of metagenomic reads using minhashing","volume":"33","author":"A M\u00fcller","year":"2017","journal-title":"Bioinformatics"},{"issue":"1","key":"pone.0239741.ref025","doi-asserted-by":"crossref","first-page":"102","DOI":"10.1186\/s12859-020-3429-6","article-title":"A big data approach to metagenomics for all-food-sequencing","volume":"21","author":"R Kobus","year":"2020","journal-title":"BMC Bioinformatics"},{"key":"pone.0239741.ref026","doi-asserted-by":"crossref","first-page":"R46","DOI":"10.1186\/gb-2014-15-3-r46","article-title":"Kraken: ultrafast metagenomic sequence classification using exact alignments","volume":"15","author":"DE Wood","year":"2014","journal-title":"Genome Biology"},{"key":"pone.0239741.ref027","doi-asserted-by":"crossref","first-page":"e104","DOI":"10.7717\/peerj-cs.104","article-title":"Bracken: estimating species abundance in metagenomics data","volume":"3","author":"J Lu","year":"2017","journal-title":"PeerJ Computer Science"},{"key":"pone.0239741.ref028","doi-asserted-by":"crossref","unstructured":"Pereira R, Couto M, Ribeiro F, Rua R, Cunha J, Fernandes JP, et al. Energy efficiency across programming languages: How do energy, time, and memory relate? In: Proceedings of the 10th ACM SIGPLAN International Conference on Software Language Engineering. SLE 2017. New York, NY, USA: ACM; 2017. p. 256\u2013267. Available from: http:\/\/doi.acm.org\/10.1145\/3136014.3136031.","DOI":"10.1145\/3136014.3136031"},{"key":"pone.0239741.ref029","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.aca.2018.05.038","article-title":"Modern data science for analytical chemical data\u2013A comprehensive review","volume":"1028","author":"E Szyma\u0144ska","year":"2018","journal-title":"Analytica chimica acta"},{"issue":"11-12","key":"pone.0239741.ref030","doi-asserted-by":"crossref","first-page":"615","DOI":"10.1002\/minf.201600073","article-title":"BIGCHEM: challenges and opportunities for big data analysis in chemistry","volume":"35","author":"IV Tetko","year":"2016","journal-title":"Molecular informatics"},{"issue":"4","key":"pone.0239741.ref031","doi-asserted-by":"crossref","first-page":"433","DOI":"10.1016\/j.drudis.2013.10.012","article-title":"Big Data in biomedicine","volume":"19","author":"FF Costa","year":"2014","journal-title":"Drug discovery today"},{"key":"pone.0239741.ref032","doi-asserted-by":"crossref","first-page":"663","DOI":"10.1057\/978-1-137-52879-7_28","volume-title":"The Palgrave Handbook of Biology and Society","author":"N Levin","year":"2018"},{"key":"pone.0239741.ref033","doi-asserted-by":"crossref","unstructured":"Shvachko K, Kuang H, Radia S, Chansler R. The Hadoop Distributed File System. In: Proceedings of the 2010 IEEE 26th Symposium on Mass Storage Systems and Technologies (MSST). MSST\u201910. Washington, DC, USA: IEEE Computer Society; 2010. p. 1\u201310.","DOI":"10.1109\/MSST.2010.5496972"},{"key":"pone.0239741.ref034","doi-asserted-by":"crossref","unstructured":"Asaadi H, Khaldi D, Chapman B. A comparative survey of the HPC and Big Data paradigms: Analysis and experiments. In: 2016 IEEE International Conference on Cluster Computing (CLUSTER). IEEE; 2016. p. 423\u2013432.","DOI":"10.1109\/CLUSTER.2016.21"},{"key":"pone.0239741.ref035","volume-title":"Spark: The Definitive Guide: Big Data Processing Made Simple","author":"B Chambers","year":"2018"},{"key":"pone.0239741.ref036","doi-asserted-by":"crossref","unstructured":"Vavilapalli VK, Murthy AC, Douglas C, Agarwal S, Konar M, Evans R, et al. Apache Hadoop YARN: Yet Another Resource Negotiator. In: Proc. of the 4th Annual Symposium on Cloud Computing (SOCC); 2013. p. 5:1\u20135:16.","DOI":"10.1145\/2523616.2523633"},{"key":"pone.0239741.ref037","first-page":"22","volume-title":"NSDI","author":"B Hindman","year":"2011"},{"issue":"1","key":"pone.0239741.ref038","doi-asserted-by":"crossref","first-page":"193","DOI":"10.1007\/s00217-019-03404-y","article-title":"Identification and quantification of meat product ingredients by whole-genome metagenomics (All-Food-Seq)","volume":"246","author":"SL Hellmann","year":"2020","journal-title":"European Food Research and Technology"},{"issue":"1","key":"pone.0239741.ref039","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1007\/s00217-009-1138-5","article-title":"Quantification of beef, pork, chicken and turkey proportions in sausages: use of matrix-adapted standards and comparison of single versus multiplex PCR in an interlaboratory trial","volume":"230","author":"A Eugster","year":"2009","journal-title":"European Food Research and Technology"},{"key":"pone.0239741.ref040","doi-asserted-by":"crossref","unstructured":"Amdahl GM. Validity of the single processor approach to achieving large scale computing capabilities. In: Proceedings of the April 18-20, 1967, spring joint computer conference; 1967. p. 483\u2013485.","DOI":"10.1145\/1465482.1465560"}],"container-title":["PLOS ONE"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pone.0239741","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,9]],"date-time":"2023-10-09T04:27:15Z","timestamp":1696825635000},"score":1,"resource":{"primary":{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pone.0239741"}},"subtitle":[],"editor":[{"given":"Francisco","family":"Mart\u00ednez-\u00c1lvarez","sequence":"first","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2020,10,6]]},"references-count":40,"journal-issue":{"issue":"10","published-online":{"date-parts":[[2020,10,6]]}},"URL":"https:\/\/doi.org\/10.1371\/journal.pone.0239741","relation":{},"ISSN":["1932-6203"],"issn-type":[{"value":"1932-6203","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,10,6]]}}}