{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T18:36:17Z","timestamp":1764873377941,"version":"3.45.0"},"publisher-location":"New York, NY, USA","reference-count":61,"publisher":"ACM","license":[{"start":{"date-parts":[[2015,5,27]],"date-time":"2015-05-27T00:00:00Z","timestamp":1432684800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100006235","name":"Lawrence Berkely National Laboratory","doi-asserted-by":"publisher","award":["7076018"],"award-info":[{"award-number":["7076018"]}],"id":[{"id":"10.13039\/100006235","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-1139158"],"award-info":[{"award-number":["CCF-1139158"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["HHSN261201400006C,1-U54HG007990-01"],"award-info":[{"award-number":["HHSN261201400006C,1-U54HG007990-01"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000185","name":"Defense Advanced Research Projects Agency","doi-asserted-by":"publisher","award":["FA8750-12-2-0331"],"award-info":[{"award-number":["FA8750-12-2-0331"]}],"id":[{"id":"10.13039\/100000185","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2015,5,27]]},"DOI":"10.1145\/2723372.2742787","type":"proceedings-article","created":{"date-parts":[[2015,6,2]],"date-time":"2015-06-02T01:35:02Z","timestamp":1433208902000},"page":"631-646","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":73,"title":["Rethinking Data-Intensive Science Using Scalable Analytics Systems"],"prefix":"10.1145","author":[{"given":"Frank Austin","family":"Nothaft","sequence":"first","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]},{"given":"Matt","family":"Massie","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]},{"given":"Timothy","family":"Danford","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]},{"given":"Zhao","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]},{"given":"Uri","family":"Laserson","sequence":"additional","affiliation":[{"name":"Cloudera, San Francisco, CA, USA"}]},{"given":"Carl","family":"Yeksigian","sequence":"additional","affiliation":[{"name":"Genomebridge, Cambridge, MA, USA"}]},{"given":"Jey","family":"Kottalam","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]},{"given":"Arun","family":"Ahuja","sequence":"additional","affiliation":[{"name":"Carl Icahn School of Medicine, Mount Sinai, New York, NY, USA"}]},{"given":"Jeff","family":"Hammerbacher","sequence":"additional","affiliation":[{"name":"Carl Icahn School of Medicine, Mount Sinai, New York, NY, USA"}]},{"given":"Michael","family":"Linderman","sequence":"additional","affiliation":[{"name":"Carl Icahn School of Medicine, Mount Sinai, New York, NY, USA"}]},{"given":"Michael J.","family":"Franklin","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]},{"given":"Anthony D.","family":"Joseph","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]},{"given":"David A.","family":"Patterson","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, Berkeley, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2015,5,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1142473.1142548"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.14778\/2536222.2536227"},{"key":"e_1_3_2_1_3_1","unstructured":"Apache. Avro. http:\/\/avro.apache.org."},{"key":"e_1_3_2_1_4_1","unstructured":"Apache. Parquet. http:\/\/parquet.incubator.apache.org."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2723372.2742797"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1002\/0471250953.bi1110s43"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2398356.2398376"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/1807167.1807271"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.5555\/1268379.1268407"},{"key":"e_1_3_2_1_10_1","volume-title":"SpeedSeq: Ultra-fast personal genome analysis and interpretation. bioRxiv, page 012179","author":"Chiang C.","year":"2014","unstructured":"C. Chiang, R. M. Layer, G. G. Faust, M. R. Lindberg, D. B. Rose, E. P. Garrison, G. T. Marth, A. R. Quinlan, and I. M. Hall. SpeedSeq: Ultra-fast personal genome analysis and interpretation. bioRxiv, page 012179, 2014."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1038\/nmeth.3071"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.5555\/1251254.1251264"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1327452.1327492"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1038\/ng.806"},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the Conference on Innovative Data Systems Research (CIDR '15)","author":"Diao Y.","year":"2015","unstructured":"Y. Diao, A. Roy, and T. Bloom. Building highly-optimized, low-latency pipelines for genomic data analysis. In Proceedings of the Conference on Innovative Data Systems Research (CIDR '15), 2015."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2015.7113382"},{"key":"e_1_3_2_1_17_1","volume-title":"SAMBLASTER: fast duplicate marking and structural variant read extraction. Bioinformatics, page btu314","author":"Faust G. G.","year":"2014","unstructured":"G. G. Faust and I. M. Hall. SAMBLASTER: fast duplicate marking and structural variant read extraction. Bioinformatics, page btu314 2014."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1038\/nmeth.3041"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1101\/gr.114819.110"},{"key":"e_1_3_2_1_20_1","unstructured":"Genomics England. 100 000 genomes project. https:\/\/www.genomicsengland.co.uk\/."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.41"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.5555\/2685048.2685096"},{"key":"e_1_3_2_1_23_1","volume-title":"LSST: from science drivers to reference design and anticipated data products. arXiv preprint","author":"Ivezic Z.","year":"2008","unstructured":"Z. Ivezic, J. Tyson, E. Acosta, R. Allsman, S. Anderson, J. Andrew, R. Angel, T. Axelrod, J. Barr, A. Becker, et al. LSST: from science drivers to reference design and anticipated data products. arXiv preprint, 2008."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1504\/IJCSE.2009.026999"},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of the Conference on Innovative Data Systems Research (CIDR '15)","author":"Kornacker M.","year":"2015","unstructured":"M. Kornacker, A. Behm, V. Bittorf, T. Bobrovytsky, C. Ching, A. Choi, J. Erickson, M. Grund, D. Hecht, M. Jacobs, I. Joshi, L. Kuff, D. Kumar, A. Leblang, N. Li, I. Pandis, H. Robinson, D. Rorke, S. Rus, J. Russel, D. Tsirogiannis, S. Wanderman-Milne, and M. Yoder. Impala: A modern, open-source SQL engine for Hadoop. In Proceedings of the Conference on Innovative Data Systems Research (CIDR '15), 2015."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btt250"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.14778\/2367502.2367518"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1038\/35057062"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1186\/gb-2009-10-11-r134"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btp698"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btp352"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/1989323.1989370"},{"key":"e_1_3_2_1_33_1","volume-title":"UCB\/EECS-2013-207, EECS Department","author":"Massie M.","year":"2013","unstructured":"M. Massie, F. Nothaft, C. Hartl, C. Kozanitis, A. Schumacher, A. D. Joseph, and D. A. Patterson. ADAM: Genomics formats and processing patterns for cloud scale computing. Technical report, UCB\/EECS-2013-207, EECS Department, University of California, Berkeley, 2013."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1101\/gr.107524.110"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.14778\/1920841.1920886"},{"key":"e_1_3_2_1_36_1","volume-title":"Sequencing technologies|the next generation. Nature Reviews Genetics, 11(1):31--46","author":"Metzker M. L.","year":"2009","unstructured":"M. L. Metzker. Sequencing technologies|the next generation. Nature Reviews Genetics, 11(1):31--46, 2009."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.14778\/2536274.2536306"},{"key":"e_1_3_2_1_38_1","unstructured":"NHGRI. DNA sequencing costs. http:\/\/www.genome.gov\/sequencingcosts\/."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/bts054"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.5555\/1953048.2078195"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1038\/ng.3036"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/357401.357402"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1038\/nrg2857"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btp236"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1101\/gr.089532.108"},{"key":"e_1_3_2_1_46_1","volume-title":"Nature Biotechnology, 26(3):256--256","author":"Siva N.","year":"2008","unstructured":"N. Siva. 1000 genomes project. Nature Biotechnology, 26(3):256--256, 2008."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2013.158"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1186\/gb-2010-11-5-207"},{"key":"e_1_3_2_1_49_1","volume-title":"SMASH: A benchmarking toolkit for human genome variant calling. Bioinformatics, page btu345","author":"Talwalkar A.","year":"2014","unstructured":"A. Talwalkar, J. Liptrap, J. Newcomb, C. Hartl, J. Terhorst, K. Curtis, M. Bresler, Y. S. Song, M. I. Jordan, and D. Patterson. SMASH: A benchmarking toolkit for human genome variant calling. Bioinformatics, page btu345, 2014."},{"key":"e_1_3_2_1_50_1","volume-title":"Bioinformatics","author":"Tarasov A.","year":"2015","unstructured":"A. Tarasov, A. J. Vilella, E. Cuppen, I. J. Nijman, and P. Prins. Sambamba: fast processing of NGS alignment formats. Bioinformatics, 2015."},{"key":"e_1_3_2_1_51_1","volume-title":"http:\/\/broadinstitute.github.io\/picard\/","author":"The Broad Institute of Harvard and MIT. Picard.","year":"2014","unstructured":"The Broad Institute of Harvard and MIT. Picard. http:\/\/broadinstitute.github.io\/picard\/, 2014."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1088\/0067-0049\/192\/1\/9"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1038\/ng.2764"},{"key":"e_1_3_2_1_54_1","first-page":"363","article-title":"FITS|a flexible image transport system","volume":"44","author":"Wells D.","year":"1981","unstructured":"D. Wells, E. Greisen, and R. Harten. FITS|a flexible image transport system. Astronomy and Astrophysics Supplement Series, 44:363, 1981.","journal-title":"Astronomy and Astrophysics Supplement Series"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/2484425.2484427"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1086\/301513"},{"key":"e_1_3_2_1_57_1","volume-title":"Faster and more accurate sequence alignment with SNAP. arXiv preprint","author":"Zaharia M.","year":"2011","unstructured":"M. Zaharia, W. J. Bolosky, K. Curtis, A. Fox, D. Patterson, S. Shenker, I. Stoica, R. M. Karp, and T. Sittler. Faster and more accurate sequence alignment with SNAP. arXiv preprint, 2011."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.5555\/2228298.2228301"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.5555\/1863103.1863113"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/2462902.2462913"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCOM.1980.1094702"}],"event":{"name":"SIGMOD\/PODS'15: International Conference on Management of Data","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"],"location":"Melbourne Victoria Australia","acronym":"SIGMOD\/PODS'15"},"container-title":["Proceedings of the 2015 ACM SIGMOD International Conference on Management of Data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2723372.2742787","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2723372.2742787","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2723372.2742787","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T09:43:08Z","timestamp":1763458988000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2723372.2742787"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,5,27]]},"references-count":61,"alternative-id":["10.1145\/2723372.2742787","10.1145\/2723372"],"URL":"https:\/\/doi.org\/10.1145\/2723372.2742787","relation":{},"subject":[],"published":{"date-parts":[[2015,5,27]]},"assertion":[{"value":"2015-05-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}