{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T02:04:05Z","timestamp":1772244245789,"version":"3.50.1"},"reference-count":29,"publisher":"Oxford University Press (OUP)","issue":"9","license":[{"start":{"date-parts":[[2020,7,28]],"date-time":"2020-07-28T00:00:00Z","timestamp":1595894400000},"content-version":"vor","delay-in-days":1,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["1OT3OD025466-0"],"award-info":[{"award-number":["1OT3OD025466-0"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Heart, Lung, and Blood Institute DataSTAGE","award":["1OT3HL142480-01"],"award-info":[{"award-number":["1OT3HL142480-01"]}]},{"DOI":"10.13039\/100006785","name":"Google","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006785","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100016443","name":"Amazon","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100016443","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,9,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Objective<\/jats:title>\n                  <jats:p>Advancements in human genomics have generated a surge of available data, fueling the growth and accessibility of databases for more comprehensive, in-depth genetic studies.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Methods<\/jats:title>\n                  <jats:p>We provide a straightforward and innovative methodology to optimize cloud configuration in order to conduct genome-wide association studies. We utilized Spark clusters on both Google Cloud Platform and Amazon Web Services, as well as\u00a0Hail (http:\/\/doi.org\/10.5281\/zenodo.2646680) for analysis and exploration of genomic variants dataset.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>Comparative evaluation of numerous cloud-based cluster configurations demonstrate a successful and unprecedented compromise between speed and cost for performing genome-wide association studies on 4 distinct whole-genome sequencing datasets. Results are consistent across the 2 cloud providers and could be highly useful for accelerating research in genetics.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Conclusions<\/jats:title>\n                  <jats:p>We present a timely piece for one of the most frequently asked questions when moving to the cloud: what is the trade-off between speed and cost?<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/jamia\/ocaa068","type":"journal-article","created":{"date-parts":[[2020,4,17]],"date-time":"2020-04-17T11:13:18Z","timestamp":1587121998000},"page":"1425-1430","source":"Crossref","is-referenced-by-count":23,"title":["Scalability and cost-effectiveness analysis of whole genome-wide association studies on Google Cloud Platform and Amazon Web Services"],"prefix":"10.1093","volume":"27","author":[{"given":"In\u00e8s","family":"Krissaane","sequence":"first","affiliation":[]},{"given":"Carlos","family":"De Niz","sequence":"additional","affiliation":[]},{"given":"Alba","family":"Guti\u00e9rrez-Sacrist\u00e1n","sequence":"additional","affiliation":[]},{"given":"Gabor","family":"Korodi","sequence":"additional","affiliation":[]},{"given":"Nneka","family":"Ede","sequence":"additional","affiliation":[]},{"given":"Ranjay","family":"Kumar","sequence":"additional","affiliation":[]},{"given":"Jessica","family":"Lyons","sequence":"additional","affiliation":[]},{"given":"Arjun","family":"Manrai","sequence":"additional","affiliation":[]},{"given":"Chirag","family":"Patel","sequence":"additional","affiliation":[]},{"given":"Isaac","family":"Kohane","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0235-7543","authenticated-orcid":false,"given":"Paul","family":"Avillach","sequence":"additional","affiliation":[]}],"member":"286","published-online":{"date-parts":[[2020,7,27]]},"reference":[{"issue":"23","key":"2021031312260834900_ocaa068-B1","doi-asserted-by":"crossref","first-page":"3709","DOI":"10.1093\/bioinformatics\/btx468","article-title":"Cloud-based interactive analytics for terabytes of genomic variants data","volume":"33","author":"Pan","year":"2017","journal-title":"Bioinformatics"},{"key":"2021031312260834900_ocaa068-B2","doi-asserted-by":"crossref","first-page":"308","DOI":"10.1038\/s41431-018-0279-z","article-title":"The Medical Genome Reference Bank: a whole-genome data resource of 4,000 healthy elderly individuals. Rationale and cohort design","volume":"27","author":"Lacaze","year":"2019","journal-title":"Eur J Hum Genet"},{"key":"2021031312260834900_ocaa068-B3","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1038\/s41586-018-0579-z","article-title":"The UK Biobank resource with deep phenotyping and genomic data","volume":"562","author":"Bycroft","year":"2018","journal-title":"Nature"},{"issue":"4","key":"2021031312260834900_ocaa068-B4","doi-asserted-by":"crossref","first-page":"208","DOI":"10.1038\/nrg.2017.113","article-title":"Cloud computing for genomic data analysis and collaboration","volume":"19","author":"Langmead","year":"2018","journal-title":"Nat Rev Genet"},{"issue":"8","key":"2021031312260834900_ocaa068-B5","doi-asserted-by":"crossref","first-page":"1450","DOI":"10.1101\/gr.211656.116","article-title":"GenomeVIP: a cloud platform for genomic variant discovery and interpretation","volume":"27","author":"Mashl","year":"2017","journal-title":"Genome Res"},{"key":"2021031312260834900_ocaa068-B6","first-page":"4146","volume-title":"Advances in Neural Information Processing Systems 31","author":"Wang","year":"2018"},{"issue":"21","key":"2021031312260834900_ocaa068-B7","doi-asserted-by":"crossref","first-page":"3801","DOI":"10.1093\/hmg\/ddy269","article-title":"Whole exome sequencing analysis in severe chronic obstructive pulmonary disease","volume":"27","author":"Qiao","year":"2018","journal-title":"Hum Mol Genet"},{"issue":"6","key":"2021031312260834900_ocaa068-B8","doi-asserted-by":"crossref","first-page":"969","DOI":"10.1136\/amiajnl-2013-002155","article-title":"Bionimbus: a cloud for managing, analyzing and sharing large genomics datasets","volume":"21","author":"Heath","year":"2014","journal-title":"J Am Med Inform Assoc"},{"issue":"7","key":"2021031312260834900_ocaa068-B9","doi-asserted-by":"crossref","first-page":"e1002195","DOI":"10.1371\/journal.pbio.1002195","article-title":"Big data: astronomical or genomical?","volume":"13","author":"Stephens","year":"2015","journal-title":"PLoS Biol"},{"key":"2021031312260834900_ocaa068-B10","doi-asserted-by":"crossref","first-page":"542","DOI":"10.1038\/s41467-020-14288-y","article-title":"Genome-wide rare variant analysis for thousands of phenotypes in 54,000 exomes","volume":"11","author":"Cirulli","year":"2020","journal-title":"Nat Commun"},{"issue":"18","key":"2021031312260834900_ocaa068-B11","doi-asserted-by":"crossref","first-page":"2652","DOI":"10.1093\/bioinformatics\/btu343","article-title":"SparkSeq: fast, scalable and cloud-ready tool for the interactive genomic data analysis with nucleotide precision","volume":"30","author":"Wiewi\u00f3rka","year":"2014","journal-title":"Bioinformatics"},{"issue":"6","key":"2021031312260834900_ocaa068-B12","doi-asserted-by":"crossref","first-page":"928","DOI":"10.1093\/bioinformatics\/btx702","article-title":"ViraPipe: scalable parallel pipeline for viral metagenome analysis from next generation sequencing reads","volume":"34","author":"Maarala","year":"2018","journal-title":"Bioinformatics"},{"issue":"6","key":"2021031312260834900_ocaa068-B13","doi-asserted-by":"crossref","first-page":"e98146","DOI":"10.1371\/journal.pone.0098146","article-title":"CloudDOE: a user-friendly tool for deploying Hadoop clouds and analyzing high-throughput sequencing data with MapReduce","volume":"9","author":"Chung","year":"2014","journal-title":"PLoS One"},{"issue":"3","key":"2021031312260834900_ocaa068-B14","doi-asserted-by":"crossref","first-page":"245","DOI":"10.1007\/s41019-017-0047-z","article-title":"A review of scalable bioinformatics pipelines","volume":"2","author":"Fjukstad","year":"2017","journal-title":"Data Sci Eng"},{"issue":"1","key":"2021031312260834900_ocaa068-B15","doi-asserted-by":"crossref","first-page":"425","DOI":"10.1186\/1471-2164-14-425","article-title":"Rainbow: a tool for large-scale whole-genome sequencing data analysis using cloud computing","volume":"14","author":"Zhao","year":"2013","journal-title":"BMC Genomics"},{"issue":"1","key":"2021031312260834900_ocaa068-B16","doi-asserted-by":"crossref","first-page":"259","DOI":"10.1186\/1471-2105-11-259","article-title":"Cloud computing for comparative genomics","volume":"11","author":"Wall","year":"2010","journal-title":"BMC Bioinformatics"},{"issue":"3","key":"2021031312260834900_ocaa068-B17","doi-asserted-by":"crossref","first-page":"175","DOI":"10.1038\/nrg.2017.89","article-title":"Prioritizing diversity in human genomics research","volume":"19","author":"Hindorff","year":"2018","journal-title":"Nat Rev Genet"},{"issue":"11","key":"2021031312260834900_ocaa068-B18","doi-asserted-by":"crossref","first-page":"R134","DOI":"10.1186\/gb-2009-10-11-r134","article-title":"Searching for SNPs with cloud computing","volume":"10","author":"Langmead","year":"2009","journal-title":"Genome Biol"},{"issue":"1","key":"2021031312260834900_ocaa068-B19","doi-asserted-by":"crossref","first-page":"1470","DOI":"10.1038\/s41467-018-03819-3","article-title":"Genome-wide association study of depression phenotypes in UK Biobank identifies variants in excitatory synaptic pathways","volume":"9","author":"Howard","year":"2018","journal-title":"Nat Commun"},{"key":"2021031312260834900_ocaa068-B20","doi-asserted-by":"crossref","DOI":"10.1002\/0471142905.hg0119s68","article-title":"Quality control procedures for genome-wide association studies","author":"Turner","year":"2011","journal-title":"Curr Protoc Hum Genet"},{"issue":"7571","key":"2021031312260834900_ocaa068-B21","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1038\/nature15394","article-title":"An integrated map of structural variation in 2,504 human genomes","volume":"526","author":"Sudmant","year":"2015","journal-title":"Nature"},{"issue":"7422","key":"2021031312260834900_ocaa068-B22","doi-asserted-by":"crossref","first-page":"56","DOI":"10.1038\/nature11632","article-title":"An integrated map of genetic variation from 1,092 human genomes","volume":"491","author":"1000 Genomes Project Consortium","year":"2012","journal-title":"Nature"},{"issue":"11","key":"2021031312260834900_ocaa068-B23","doi-asserted-by":"crossref","first-page":"84","DOI":"10.1186\/gm205","article-title":"The 1,000 genome, the 100,000 analysis?","volume":"2","author":"Mardis","year":"2010","journal-title":"Genome Med"},{"issue":"16","key":"2021031312260834900_ocaa068-B24","doi-asserted-by":"crossref","first-page":"2078","DOI":"10.1093\/bioinformatics\/btp352","article-title":"The sequence alignment\/map format and SAMtools","volume":"25","author":"Li","year":"2009","journal-title":"Bioinformatics"},{"key":"2021031312260834900_ocaa068-B25","doi-asserted-by":"crossref","first-page":"1889","DOI":"10.12688\/f1000research.9271.1","article-title":"Automated quality control for genome wide association studies","volume":"5","author":"Ellingson","year":"2016","journal-title":"F1000Res"},{"issue":"12","key":"2021031312260834900_ocaa068-B26","doi-asserted-by":"crossref","first-page":"e190","DOI":"10.1371\/journal.pgen.0020190","article-title":"Population structure and eigen analysis","volume":"2","author":"Patterson","year":"2006","journal-title":"PLoS Genet"},{"key":"2021031312260834900_ocaa068-B27","doi-asserted-by":"crossref","first-page":"3617572","DOI":"10.1155\/2016\/3617572","article-title":"Challenges of identifying clinically actionable genetic variants for precision medicine","volume":"2016","author":"Carter","year":"2016","journal-title":"J Healthc Eng"},{"issue":"21","key":"2021031312260834900_ocaa068-B28","doi-asserted-by":"crossref","first-page":"e3","DOI":"10.1158\/0008-5472.CAN-17-0387","article-title":"The cancer genomics cloud: collaborative, reproducible, and democratized-a new paradigm in large-scale computational research","volume":"77","author":"Lau","year":"2017","journal-title":"Cancer Res"},{"key":"2021031312260834900_ocaa068-B29","doi-asserted-by":"crossref","first-page":"k1687","DOI":"10.1136\/bmj.k1687","article-title":"The 100000 Genomes Project: bringing whole genome sequencing to the NHS","volume":"361","author":"Turnbull","year":"2018","journal-title":"BMJ"}],"container-title":["Journal of the American Medical Informatics Association"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/jamia\/article-pdf\/27\/9\/1425\/36588295\/ocaa068.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"http:\/\/academic.oup.com\/jamia\/article-pdf\/27\/9\/1425\/36588295\/ocaa068.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,3,13]],"date-time":"2021-03-13T12:26:26Z","timestamp":1615638386000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/jamia\/article\/27\/9\/1425\/5876972"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7,27]]},"references-count":29,"journal-issue":{"issue":"9","published-online":{"date-parts":[[2020,7,27]]},"published-print":{"date-parts":[[2020,9,1]]}},"URL":"https:\/\/doi.org\/10.1093\/jamia\/ocaa068","relation":{},"ISSN":["1067-5027","1527-974X"],"issn-type":[{"value":"1067-5027","type":"print"},{"value":"1527-974X","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2020,9]]},"published":{"date-parts":[[2020,7,27]]}}}