{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T03:29:23Z","timestamp":1768966163492,"version":"3.49.0"},"reference-count":65,"publisher":"Springer Science and Business Media LLC","issue":"1","content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["BMC Bioinformatics"],"published-print":{"date-parts":[[2017,12]]},"DOI":"10.1186\/s12859-017-1747-0","type":"journal-article","created":{"date-parts":[[2017,7,12]],"date-time":"2017-07-12T11:36:23Z","timestamp":1499859383000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":64,"title":["Investigating reproducibility and tracking provenance \u2013 A genomic workflow case study"],"prefix":"10.1186","volume":"18","author":[{"given":"Sehrish","family":"Kanwal","sequence":"first","affiliation":[]},{"given":"Farah Zaib","family":"Khan","sequence":"additional","affiliation":[]},{"given":"Andrew","family":"Lonie","sequence":"additional","affiliation":[]},{"given":"Richard O.","family":"Sinnott","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,7,12]]},"reference":[{"issue":"3","key":"1747_CR1","doi-asserted-by":"crossref","first-page":"256","DOI":"10.1038\/nbt0308-256b","volume":"26","author":"N Siva","year":"2008","unstructured":"Siva N. 1000 Genomes project. Nat Biotechnol. 2008;26(3):256.","journal-title":"Nat Biotechnol"},{"issue":"65","key":"1747_CR2","first-page":"65ra4","volume":"3","author":"CJ Bell","year":"2011","unstructured":"Bell CJ, et al. Carrier testing for severe childhood recessive diseases by next-generation sequencing. Sci Transl Med. 2011;3(65):65ra4.","journal-title":"Sci Transl Med"},{"key":"1747_CR3","volume-title":"Proceedings of the ninth ACM international conference on Embedded software","author":"J Vitek","year":"2011","unstructured":"Vitek J, Kalibera T. Repeatability, reproducibility, and rigor in systems research. In: Proceedings of the ninth ACM international conference on Embedded software. \u115f: ACM; 2011."},{"key":"1747_CR4","unstructured":"Merriam-webster.com. (n.d.).\u00a0Definition of PROVENANCE. [online] Available at: https:\/\/www.merriam-webster.com\/dictionary\/provenance . Accessed 24 Jul 2015."},{"key":"1747_CR5","doi-asserted-by":"crossref","first-page":"1345","DOI":"10.1145\/1376616.1376772","volume-title":"Proceedings of the 2008 ACM SIGMOD international conference on Management of data","author":"SB Davidson","year":"2008","unstructured":"Davidson SB, Freire J. Provenance and scientific workflows: challenges and opportunities. In: Proceedings of the 2008 ACM SIGMOD international conference on Management of data. Vancouver: ACM; 2008. p. 1345\u201350."},{"issue":"6","key":"1747_CR6","doi-asserted-by":"crossref","first-page":"276","DOI":"10.1016\/S0168-9525(00)02024-2","volume":"16","author":"P Rice","year":"2000","unstructured":"Rice P, L I, Bleasby A. EMBOSS: the European Molecular Biology Open Software Suite. Tends in Genetics. 2000;16(6):276\u20137.","journal-title":"Tends in Genetics"},{"issue":"10","key":"1747_CR7","doi-asserted-by":"crossref","first-page":"1611","DOI":"10.1101\/gr.361602","volume":"12","author":"JE Stajich","year":"2002","unstructured":"Stajich JE, et al. The Bioperl toolkit: Perl modules for the life sciences. Genome Res. 2002;12(10):1611\u20138.","journal-title":"Genome Res"},{"issue":"11","key":"1747_CR8","doi-asserted-by":"crossref","first-page":"1422","DOI":"10.1093\/bioinformatics\/btp163","volume":"25","author":"PJ Cock","year":"2009","unstructured":"Cock PJ, et al. Biopython: freely available Python tools for computational molecular biology and bioinformatics. Bioinformatics. 2009;25(11):1422\u20133.","journal-title":"Bioinformatics"},{"key":"1747_CR9","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1007\/978-3-540-69297-3_6","volume-title":"Cancer Prevention II","author":"DF Ransohoff","year":"2009","unstructured":"Ransohoff DF. Promises and limitations of biomarkers. In: Cancer Prevention II. \u115f: Springer; 2009. p. 55\u20139."},{"key":"1747_CR10","unstructured":"Gilbert Omenn, C M. Evolution of Translational Omics: Lessons Learned and the Path Forward. 2012. Available from: http:\/\/www.nationalacademies.org\/hmd\/Reports\/2012\/Evolution-of-Translational-Omics.aspx . Accessed 21 Aug 2014."},{"issue":"1","key":"1747_CR11","doi-asserted-by":"crossref","first-page":"73","DOI":"10.1186\/s13073-015-0202-y","volume":"7","author":"CL Zheng","year":"2015","unstructured":"Zheng CL, et al. Use of semantic workflows to enhance transparency and reproducibility in clinical omics. Genome medicine. 2015;7(1):73.","journal-title":"Genome medicine"},{"key":"1747_CR12","doi-asserted-by":"crossref","unstructured":"Nekrutenko A, Taylor J. Next-generation sequencing data interpretation: enhancing reproducibility and accessibility. Nature Reviews Genetics. 2012;13(9):667\u2013672.","DOI":"10.1038\/nrg3305"},{"issue":"14","key":"1747_CR13","doi-asserted-by":"crossref","first-page":"1754","DOI":"10.1093\/bioinformatics\/btp324","volume":"25","author":"H Li","year":"2009","unstructured":"Li H, Durbin R. Fast and accurate short read alignment with Burrows\u2013Wheeler transform. Bioinformatics. 2009;25(14):1754\u201360.","journal-title":"Bioinformatics"},{"issue":"6046","key":"1747_CR14","doi-asserted-by":"crossref","first-page":"1157","DOI":"10.1126\/science.1208130","volume":"333","author":"N Stransky","year":"2011","unstructured":"Stransky N, et al. The mutational landscape of head and neck squamous cell carcinoma. Science. 2011;333(6046):1157\u201360.","journal-title":"Science"},{"issue":"2","key":"1747_CR15","doi-asserted-by":"crossref","first-page":"149","DOI":"10.1038\/ng.295","volume":"41","author":"JP Ioannidis","year":"2009","unstructured":"Ioannidis JP, et al. Repeatability of published microarray gene expression analyses. Nat Genet. 2009;41(2):149\u201355.","journal-title":"Nat Genet"},{"issue":"4","key":"1747_CR16","doi-asserted-by":"crossref","first-page":"553","DOI":"10.1002\/bimj.200900154","volume":"51","author":"T Hothorn","year":"2009","unstructured":"Hothorn T, Held L, Friede T. Biometrical journal and reproducible research. Biom J. 2009;51(4):553\u20135.","journal-title":"Biom J"},{"issue":"3","key":"1747_CR17","doi-asserted-by":"crossref","first-page":"288","DOI":"10.1093\/bib\/bbq084","volume":"12","author":"T Hothorn","year":"2011","unstructured":"Hothorn T, Leisch F. Case studies in reproducibility. Brief Bioinform. 2011;12(3):288\u2013300.","journal-title":"Brief Bioinform"},{"key":"1747_CR18","unstructured":"Leipzig J. A review of bioinformatic pipeline frameworks. Briefings in bioinformatics. 2017;18(3):530\u2013536."},{"key":"1747_CR19","doi-asserted-by":"crossref","unstructured":"Kanwal S, et al. Challenges of Large-scale Biomedical Workflows on the Cloud \u2013 A Case Study on the Need for Reproducibility of Results, in 28th IEEE International Conference on Computer Based Medical Systems. 2015: Sao Paulo, Brazil.","DOI":"10.1109\/CBMS.2015.28"},{"issue":"10","key":"1747_CR20","doi-asserted-by":"crossref","first-page":"1039","DOI":"10.1002\/cpe.994","volume":"18","author":"B Lud\u00e4scher","year":"2006","unstructured":"Lud\u00e4scher B, et al. Scientific workflow management and the Kepler system. Concurrency and Computation: Practice and Experience. 2006;18(10):1039\u201365.","journal-title":"Concurrency and Computation: Practice and Experience"},{"issue":"3","key":"1747_CR21","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1016\/S0169-023X(97)00033-5","volume":"24","author":"F Casati","year":"1998","unstructured":"Casati F, et al. Workflow evolution. Data Knowl Eng. 1998;24(3):211\u201338.","journal-title":"Data Knowl Eng"},{"key":"1747_CR22","volume-title":"International Provenance and Annotation Workshop","author":"Y Zhao","year":"2006","unstructured":"Zhao Y, Wilde M, Foster I. Applying the virtual data provenance model. In: International Provenance and Annotation Workshop. \u115f: Springer; 2006."},{"key":"1747_CR23","doi-asserted-by":"crossref","first-page":"148","DOI":"10.1016\/j.compchemeng.2014.04.006","volume":"67","author":"GS Joglekar","year":"2014","unstructured":"Joglekar GS, Giridhar A, Reklaitis G. A workflow modeling system for capturing data provenance. Comput Chem Eng. 2014;67:148\u201358.","journal-title":"Comput Chem Eng"},{"key":"1747_CR24","volume-title":"TaPP","author":"P Missier","year":"2013","unstructured":"Missier P, et al. D-PROV: extending the PROV provenance model with workflow structure. In: TaPP; 2013."},{"issue":"6","key":"1747_CR25","doi-asserted-by":"crossref","first-page":"812","DOI":"10.1016\/j.future.2010.10.012","volume":"27","author":"P Missier","year":"2011","unstructured":"Missier P, Goble C. Workflows to open provenance graphs, round-trip. Future Generation Computer Systems-the International Journal of Grid Computing and Escience. 2011;27(6):812\u20139.","journal-title":"Future Generation Computer Systems-the International Journal of Grid Computing and Escience"},{"key":"1747_CR26","doi-asserted-by":"crossref","first-page":"S2","DOI":"10.1186\/1471-2105-8-S1-S2","volume":"8 Suppl 1","author":"E Bartocci","year":"2007","unstructured":"Bartocci E, et al. BioWMS: a web-based Workflow Management System for bioinformatics. BMC Bioinformatics. 2007;8 Suppl 1:S2.","journal-title":"BMC Bioinformatics"},{"issue":"8","key":"1747_CR27","doi-asserted-by":"crossref","first-page":"R86","DOI":"10.1186\/gb-2010-11-8-r86","volume":"11","author":"J Goecks","year":"2010","unstructured":"Goecks J, Nekrutenko A, Taylor J. Galaxy: a comprehensive approach for supporting accessible, reproducible, and transparent computational research in the life sciences. Genome Biol. 2010;11(8):R86.","journal-title":"Genome Biol"},{"issue":"8","key":"1747_CR28","doi-asserted-by":"crossref","first-page":"1904","DOI":"10.1101\/gr.1363103","volume":"13","author":"S Hoon","year":"2003","unstructured":"Hoon S, et al. Biopipe: A Flexible Framework for Protocol-Based Bioinformatics Analysis. Genome Res. 2003;13(8):1904\u201315.","journal-title":"Genome Res"},{"issue":"22","key":"1747_CR29","doi-asserted-by":"crossref","first-page":"3005","DOI":"10.1093\/bioinformatics\/btp493","volume":"25","author":"B Neron","year":"2009","unstructured":"Neron B, et al. Mobyle: a new full web bioinformatics framework. Bioinformatics. 2009;25(22):3005\u201311.","journal-title":"Bioinformatics"},{"key":"1747_CR30","doi-asserted-by":"crossref","unstructured":"Wolstencroft K, et al. The Taverna workflow suite: designing and executing workflows of Web Services on the desktop, web or in the cloud. Nucleic acids research. 2013;41(W1):W557\u2013W561.","DOI":"10.1093\/nar\/gkt328"},{"key":"1747_CR31","doi-asserted-by":"crossref","unstructured":"Baggerly KA, Coombes KR. Deriving chemosensitivity from cell lines: Forensic bioinformatics and reproducible research in high-throughput biology. The Annals of Applied Statistics. 2009:1309\u201334.","DOI":"10.1214\/09-AOAS291"},{"issue":"9","key":"1747_CR32","doi-asserted-by":"crossref","first-page":"1297","DOI":"10.1101\/gr.107524.110","volume":"20","author":"A McKenna","year":"2010","unstructured":"McKenna A, et al. The Genome Analysis Toolkit: a MapReduce framework for analyzing next-generation DNA sequencing data. Genome Res. 2010;20(9):1297\u2013303.","journal-title":"Genome Res"},{"issue":"1","key":"1747_CR33","doi-asserted-by":"crossref","first-page":"68","DOI":"10.1186\/s13073-015-0191-x","volume":"7","author":"SP Sadedin","year":"2015","unstructured":"Sadedin SP, et al. Cpipe: a shared variant detection pipeline designed for diagnostic settings. Genome medicine. 2015;7(1):68.","journal-title":"Genome medicine"},{"key":"1747_CR34","volume-title":"Common Workflow Language, draft 3","author":"A Peter","year":"2016","unstructured":"Peter, A., Robin Andeer, Brad Chapman, John Chilton, Michael R. Crusoe, Roman Valls Guimer\u00e0, Guillermo Carrasco Hernandez, Sinisa Ivkovic, Andrey Kartashov, John Kern, Dan Leehr, Herv\u00e9 M\u00e9nager, Maxim Mikheev, Tim Pierce, Josh Randall, Stian Soiland-Reyes, Luka Stojanovic, Neboj\u0161a Tijani\u0107. Common Workflow Language, draft 3. 2016 figshare, March 2016."},{"issue":"B","key":"1747_CR35","doi-asserted-by":"crossref","first-page":"30","DOI":"10.14806\/ej.17.B.286","volume":"17","author":"RV Guimera","year":"2012","unstructured":"Guimera RV. bcbio-nextgen: Automated, distributed next-gen sequencing pipeline. EMBnet journal. 2012;17(B):30.","journal-title":"EMBnet journal"},{"key":"1747_CR36","doi-asserted-by":"crossref","unstructured":"Fisch KM, et al. Omics Pipe: a community-based framework for reproducible multi-omics data analysis. Bioinformatics. 2015;31(11):1724\u20131728.","DOI":"10.1093\/bioinformatics\/btv061"},{"key":"1747_CR37","doi-asserted-by":"crossref","first-page":"e644","DOI":"10.7717\/peerj.644","volume":"2","author":"O Golosova","year":"2014","unstructured":"Golosova O, et al. Unipro UGENE NGS pipelines and components for variant calling, RNA-seq and ChIP-seq data analyses. PeerJ. 2014;2:e644.","journal-title":"PeerJ"},{"issue":"19","key":"1747_CR38","doi-asserted-by":"crossref","first-page":"2520","DOI":"10.1093\/bioinformatics\/bts480","volume":"28","author":"J K\u00f6ster","year":"2012","unstructured":"K\u00f6ster J, Rahmann S. Snakemake\u2014a scalable bioinformatics workflow engine. Bioinformatics. 2012;28(19):2520\u20132.","journal-title":"Bioinformatics"},{"issue":"11","key":"1747_CR39","doi-asserted-by":"crossref","first-page":"1525","DOI":"10.1093\/bioinformatics\/bts167","volume":"28","author":"SP Sadedin","year":"2012","unstructured":"Sadedin SP, Pope B, Oshlack A. Bpipe: a tool for running and managing bioinformatics pipelines. Bioinformatics. 2012;28(11):1525\u20136.","journal-title":"Bioinformatics"},{"issue":"21","key":"1747_CR40","doi-asserted-by":"crossref","first-page":"2778","DOI":"10.1093\/bioinformatics\/btq524","volume":"26","author":"L Goodstadt","year":"2010","unstructured":"Goodstadt L. Ruffus: a lightweight Python library for computational pipelines. Bioinformatics. 2010;26(21):2778\u20139.","journal-title":"Bioinformatics"},{"key":"1747_CR41","volume-title":"Proceedings of the 2006 ACM SIGMOD international conference on Management of data","author":"SP Callahan","year":"2006","unstructured":"Callahan SP, et al. VisTrails: visualization meets data management. In: Proceedings of the 2006 ACM SIGMOD international conference on Management of data; 2006. ACM."},{"issue":"1","key":"1747_CR42","doi-asserted-by":"crossref","first-page":"62","DOI":"10.1109\/MIS.2010.9","volume":"26","author":"Y Gil","year":"2011","unstructured":"Gil Y, et al. Wings: Intelligent workflow-based design of computational experiments. IEEE Intell Syst. 2011;26(1):62\u201372.","journal-title":"IEEE Intell Syst"},{"key":"1747_CR43","unstructured":"KNIME. [cited 2017; Available from: http:\/\/www.knime.com \/."},{"key":"1747_CR44","volume-title":"e-Science (e-Science), 2016 IEEE 12th International Conference on","author":"RJ Sethi","year":"2016","unstructured":"Sethi RJ, Gil Y. Reproducibility in computer vision: Towards open publication of image analysis experiments as semantic workflows. In: e-Science (e-Science), 2016 IEEE 12th International Conference on; 2016. IEEE."},{"key":"1747_CR45","volume-title":"Proceedings of the 6th workshop on Workflows in support of large-scale science","author":"M Hauder","year":"2011","unstructured":"Hauder M, et al. Making data analysis expertise broadly accessible through workflows. In: Proceedings of the 6th workshop on Workflows in support of large-scale science; 2011. ACM."},{"key":"1747_CR46","first-page":"1","volume-title":"A survey on semantic scientific workflow","author":"Z Zhao","year":"2012","unstructured":"Zhao Z, Paschke A. A survey on semantic scientific workflow. \u115f: Semantic Web J. IOS Press; 2012. p. 1\u20135."},{"key":"1747_CR47","unstructured":"Azure, M. Workflow Definition Language. [cited 2017; Available from: https:\/\/docs.microsoft.com\/en-us\/rest\/api\/logic\/definition-language ."},{"key":"1747_CR48","volume-title":"Want to better understand the accuracy of your human genome sequencing?","author":"J Zook","year":"2013","unstructured":"Zook, J. Want to better understand the accuracy of your human genome sequencing? 2013 [cited 2015 December]; Available from: http:\/\/www.nist.gov\/mml\/bbd\/ppgenomeinabottle2.cfm ."},{"key":"1747_CR49","unstructured":"Sadedin, S. Melbourne Genomics Cpipe. 2016. Available from: https:\/\/github.com\/MelbourneGenomics\/cpipe . Accessed 28 Mar 2016."},{"issue":"10","key":"1747_CR50","doi-asserted-by":"crossref","first-page":"e0140829","DOI":"10.1371\/journal.pone.0140829","volume":"10","author":"E Afgan","year":"2015","unstructured":"Afgan E, et al. Genomics Virtual Laboratory: A Practical Bioinformatics Workbench for the Cloud. PLoS One. 2015;10(10):e0140829.","journal-title":"PLoS One"},{"key":"1747_CR51","unstructured":"Picard. Picard. [cited 2014 28 Aug]; Available from: http:\/\/broadinstitute.github.io\/picard\/ ."},{"key":"1747_CR52","unstructured":"Common Workflow Language. 2015. Available from: https:\/\/github.com\/common-workflow-language . Accessed 15 Aug 2015."},{"key":"1747_CR53","unstructured":"Rehman J. Cancer research in crisis: Are the drugs we count on based on bad science? 2013. Available from: http:\/\/www.salon.com\/2013\/09\/01\/is_cancer_research_facing_a_crisis\/ . Accessed 14 Aug 2014."},{"issue":"7604","key":"1747_CR54","doi-asserted-by":"crossref","first-page":"452","DOI":"10.1038\/533452a","volume":"533","author":"M Baker","year":"2016","unstructured":"Baker M. 1,500 scientists lift the lid on reproducibility. Nature. 2016;533(7604):452\u20134.","journal-title":"Nature"},{"key":"1747_CR55","volume-title":"Lies, Damned Lies, and Medical Science","author":"DH Freedman","year":"2010","unstructured":"Freedman, D.H. Lies, Damned Lies, and Medical Science. 2010; Available from: https:\/\/www.theatlantic.com\/magazine\/archive\/2010\/11\/lies-damned-lies-and-medical-science\/308269\/ ."},{"key":"1747_CR56","volume-title":"Unreliable research - Trouble at the lab","author":"T Economist","year":"2013","unstructured":"Economist, T. Unreliable research - Trouble at the lab. 2013; Available from: http:\/\/www.economist.com\/news\/briefing\/21588057-scientists-think-science-self-correcting-alarming-degree-it-not-trouble ."},{"issue":"1","key":"1747_CR57","doi-asserted-by":"crossref","first-page":"116","DOI":"10.1161\/CIRCRESAHA.114.303819","volume":"116","author":"CG Begley","year":"2015","unstructured":"Begley CG, Ioannidis JP. Reproducibility in science. Circ Res. 2015;116(1):116\u201326.","journal-title":"Circ Res"},{"key":"1747_CR58","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.future.2013.12.001","volume":"34","author":"V Curcin","year":"2014","unstructured":"Curcin V, et al. Implementing interoperable provenance in biomedical research. Futur Gener Comput Syst. 2014;34:1\u201316.","journal-title":"Futur Gener Comput Syst"},{"key":"1747_CR59","unstructured":"De Roure D, et al. Towards the preservation of scientific workflows. In: Procs. of the 8th International Conference on Preservation of Digital Objects (iPRES 2011). \u115f: ACM. p. 2011."},{"key":"1747_CR60","unstructured":"Why workflows break:understanding and combating decay in Taverna workflows. 2012."},{"issue":"6317","key":"1747_CR61","doi-asserted-by":"crossref","first-page":"1240","DOI":"10.1126\/science.aah6168","volume":"354","author":"V Stodden","year":"2016","unstructured":"Stodden V, et al. Enhancing reproducibility for computational methods. Science. 2016;354(6317):1240\u20131.","journal-title":"Science"},{"key":"1747_CR62","volume-title":"Workflow-centric research objects: First class citizens in scholarly discourse","author":"O Corcho","year":"2012","unstructured":"Corcho, O., et al., Workflow-centric research objects: First class citizens in scholarly discourse. 2012."},{"issue":"4","key":"1747_CR63","doi-asserted-by":"crossref","first-page":"18","DOI":"10.1109\/MCSE.2012.76","volume":"14","author":"J Freire","year":"2012","unstructured":"Freire J, Silva CT. Making Computations and Publications Reproducible with VisTrails. Computing in Science & Engineering. 2012;14(4):18\u201325.","journal-title":"Computing in Science & Engineering"},{"issue":"1","key":"1747_CR64","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1038\/nrg2626","volume":"11","author":"ML Metzker","year":"2009","unstructured":"Metzker ML. Sequencing technologies\u2014the next generation. Nat Rev Genet. 2009;11(1):31\u201346.","journal-title":"Nat Rev Genet"},{"key":"1747_CR65","volume-title":"Proceedings of the 16th International Conference on Extending Database Technology","author":"P Missier","year":"2013","unstructured":"Missier P, Belhajjame K, Cheney J. The W3C PROV family of specifications for modelling provenance metadata. In: Proceedings of the 16th International Conference on Extending Database Technology; 2013. ACM."}],"container-title":["BMC Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-017-1747-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,30]],"date-time":"2022-07-30T20:52:55Z","timestamp":1659214375000},"score":1,"resource":{"primary":{"URL":"http:\/\/bmcbioinformatics.biomedcentral.com\/articles\/10.1186\/s12859-017-1747-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,7,12]]},"references-count":65,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2017,12]]}},"alternative-id":["1747"],"URL":"https:\/\/doi.org\/10.1186\/s12859-017-1747-0","relation":{},"ISSN":["1471-2105"],"issn-type":[{"value":"1471-2105","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,7,12]]},"article-number":"337"}}