{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:33:59Z","timestamp":1750221239691,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":29,"publisher":"ACM","license":[{"start":{"date-parts":[[2018,6,11]],"date-time":"2018-06-11T00:00:00Z","timestamp":1528675200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["OAR-1640775"],"award-info":[{"award-number":["OAR-1640775"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2018,6,11]]},"DOI":"10.1145\/3217880.3217882","type":"proceedings-article","created":{"date-parts":[[2018,6,12]],"date-time":"2018-06-12T12:16:01Z","timestamp":1528805761000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Libra"],"prefix":"10.1145","author":[{"given":"Illyoung","family":"Choi","sequence":"first","affiliation":[{"name":"Dept. of Computer Science, University of Arizona"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alise J.","family":"Ponsero","sequence":"additional","affiliation":[{"name":"Dept. of Agricultural and Biosystems Engineering, University of Arizona"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ken","family":"Youens-Clark","sequence":"additional","affiliation":[{"name":"Dept. of Agricultural and Biosystems Engineering, University of Arizona"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Matthew","family":"Bomhoff","sequence":"additional","affiliation":[{"name":"Dept. of Agricultural and Biosystems Engineering, University of Arizona"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bonnie L.","family":"Hurwitz","sequence":"additional","affiliation":[{"name":"Dept. of Agricultural and Biosystems Engineering, University of Arizona"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"John H.","family":"Hartman","sequence":"additional","affiliation":[{"name":"Dept. of Computer Science, University of Arizona"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2018,6,11]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"The Apache Software Foundation \"Apache Hadoop \" Retrieved from http:\/\/hadoop.apache.org\/.  The Apache Software Foundation \"Apache Hadoop \" Retrieved from http:\/\/hadoop.apache.org\/."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.7717\/peerj-cs.94"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.1261498"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1541-0420.2005.00489.x"},{"key":"e_1_3_2_1_5_1","volume-title":"Near Duplicate Image Detection: min-Hash and tf-idf Weighting,\" BMVC","author":"Chum O.","year":"2008","unstructured":"O. Chum , J. Philbin , and A. Zisserman , \" Near Duplicate Image Detection: min-Hash and tf-idf Weighting,\" BMVC , 2008 . O. Chum, J. Philbin, and A. Zisserman, \"Near Duplicate Image Detection: min-Hash and tf-idf Weighting,\" BMVC, 2008."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/1327452.1327492"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btv022"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1186\/s12859-015-0875-7"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/bth266"},{"key":"e_1_3_2_1_10_1","volume-title":"Bloomfish: A Highly Scalable Distributed K-mer Counting Framework,\" In Proceedings of the IEEE International Conference on Parallel and Distributed Systems (ICPADS)","author":"Gao Tao","year":"2017","unstructured":"Tao Gao , Yanfei Guo , Yanjie Wei , , \" Bloomfish: A Highly Scalable Distributed K-mer Counting Framework,\" In Proceedings of the IEEE International Conference on Parallel and Distributed Systems (ICPADS) , 2017 . Tao Gao, Yanfei Guo, Yanjie Wei, et al., \"Bloomfish: A Highly Scalable Distributed K-mer Counting Framework,\" In Proceedings of the IEEE International Conference on Parallel and Distributed Systems (ICPADS), 2017."},{"key":"e_1_3_2_1_11_1","first-page":"1098","article-title":"Mimir: Memory-Efficient and Scalable MapReduce for Large Supercomputing Systems","author":"Gao Tao","year":"2017","unstructured":"Tao Gao , Yanfei Guo , Boyu Zhang , , \" Mimir: Memory-Efficient and Scalable MapReduce for Large Supercomputing Systems ,\" In Proceedings of the 31st IEEE International Parallel and Distributed Processing Symposium (IPDPS) , pp. 1098 -- 1108 , 2017 . Tao Gao, Yanfei Guo, Boyu Zhang, et al., \"Mimir: Memory-Efficient and Scalable MapReduce for Large Supercomputing Systems,\" In Proceedings of the 31st IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 1098--1108, 2017.","journal-title":"Proceedings of the 31st IEEE International Parallel and Distributed Processing Symposium (IPDPS)"},{"key":"e_1_3_2_1_12_1","volume-title":"Sullivan, \"The Pacific Ocean virome (POV): a marine viral metagenomic dataset and associated protein clusters for quantitative viral ecology,\" PLoS ONE, 8(2)","author":"Hurwitz Bonnie L.","year":"2013","unstructured":"Bonnie L. Hurwitz and Matthew B . Sullivan, \"The Pacific Ocean virome (POV): a marine viral metagenomic dataset and associated protein clusters for quantitative viral ecology,\" PLoS ONE, 8(2) , 2013 . Bonnie L. Hurwitz and Matthew B. Sullivan, \"The Pacific Ocean virome (POV): a marine viral metagenomic dataset and associated protein clusters for quantitative viral ecology,\" PLoS ONE, 8(2), 2013."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1155\/2015\/575687"},{"key":"e_1_3_2_1_14_1","first-page":"647","article-title":"Analysis of similarity\/dissimilarity of DNA sequences based on dual nucleotides","volume":"59","author":"Liao Bo","year":"2008","unstructured":"Bo Liao , Cheng Zeng , F. Q. Li , , \" Analysis of similarity\/dissimilarity of DNA sequences based on dual nucleotides ,\" MATCH Commun. Math. Comput. Chem. , 59 (2008), pp. 647 -- 652 , 2008 . Bo Liao, Cheng Zeng, F. Q. Li, et al., \"Analysis of similarity\/dissimilarity of DNA sequences based on dual nucleotides,\" MATCH Commun. Math. Comput. Chem., 59(2008), pp. 647--652, 2008.","journal-title":"MATCH Commun. Math. Comput. Chem."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.5555\/277390.277397"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btr011"},{"key":"e_1_3_2_1_17_1","first-page":"17","article-title":"Mash: fast genome and metagenome distance estimation using MinHash","author":"Ondov Brian D.","year":"2016","unstructured":"Brian D. Ondov , Todd J. Treangen , Pall Melsted , , \" Mash: fast genome and metagenome distance estimation using MinHash ,\" Genome Biol. , 17 , 2016 . Brian D. Ondov, Todd J. Treangen, Pall Melsted, et al., \"Mash: fast genome and metagenome distance estimation using MinHash,\" Genome Biol., 17, 2016.","journal-title":"Genome Biol."},{"key":"e_1_3_2_1_18_1","article-title":"Kmerind: A Flexible Parallel Library for K-mer Indexing of Biological Sequences on Distributed Memory Systems","author":"Pan Tony","year":"2017","unstructured":"Tony Pan , Patrick Flick , Chirag Jain , , \" Kmerind: A Flexible Parallel Library for K-mer Indexing of Biological Sequences on Distributed Memory Systems ,\" IEEE\/ACM Trans. Comput. Biol. Bioinform. , 2017 . Tony Pan, Patrick Flick, Chirag Jain, et al., \"Kmerind: A Flexible Parallel Library for K-mer Indexing of Biological Sequences on Distributed Memory Systems,\" IEEE\/ACM Trans. Comput. Biol. Bioinform., 2017.","journal-title":"IEEE\/ACM Trans. Comput. Biol. Bioinform."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btt020"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/361219.361220"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.1261359"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1186\/1471-2105-5-163"},{"key":"e_1_3_2_1_23_1","first-page":"56","article-title":"Dongarra, \"MPI: A standard message passing interface","volume":"12","author":"Walker David W.","year":"1996","unstructured":"David W. Walker and Jack J . Dongarra, \"MPI: A standard message passing interface ,\" Supercomputer , 12 , pp. 56 -- 68 , 1996 . David W. Walker and Jack J. Dongarra, \"MPI: A standard message passing interface,\" Supercomputer, 12, pp. 56--68, 1996.","journal-title":"Supercomputer"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.5555\/560889.792378"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1089\/cmb.2010.0245"},{"key":"e_1_3_2_1_26_1","volume-title":"The Sorcerer II Global Ocean Sampling expedition: expanding the universe of protein families,\" PLoS Biol., 5(3)","author":"Yooseph S.","year":"2007","unstructured":"S. Yooseph , G. Sutton , D. B. Rusch , , \" The Sorcerer II Global Ocean Sampling expedition: expanding the universe of protein families,\" PLoS Biol., 5(3) , 2007 . S. Yooseph, G. Sutton, D. B. Rusch, et al., \"The Sorcerer II Global Ocean Sampling expedition: expanding the universe of protein families,\" PLoS Biol., 5(3), 2007."},{"key":"e_1_3_2_1_27_1","first-page":"95","article-title":"Spark: Cluster computing with working sets","author":"Zaharia Matei","year":"2010","unstructured":"Matei Zaharia , Mosharaf Chowdhury , Michael J. Franklin , , \" Spark: Cluster computing with working sets ,\" HotCloud, 10(10--10) , p. 95 , 2010 . Matei Zaharia, Mosharaf Chowdhury, Michael J. Franklin, et al., \"Spark: Cluster computing with working sets,\" HotCloud, 10(10--10), p. 95, 2010.","journal-title":"HotCloud, 10(10--10)"},{"key":"e_1_3_2_1_28_1","unstructured":"Jon Zuanich \"Hadoop I\/O: Sequence Map Set Array BloomMap Files \" Retrieved from http:\/\/blog.cloudera.com\/blog\/2011\/01\/hadoop-io-sequence-map-set-array-bloommap-files\/.  Jon Zuanich \"Hadoop I\/O: Sequence Map Set Array BloomMap Files \" Retrieved from http:\/\/blog.cloudera.com\/blog\/2011\/01\/hadoop-io-sequence-map-set-array-bloommap-files\/."},{"key":"e_1_3_2_1_29_1","unstructured":"Amazon Web Services Inc. \"Amazon EMR \" Retrieved from https:\/\/aws.amazon.com\/emr\/?nc1=h_ls.  Amazon Web Services Inc. \"Amazon EMR \" Retrieved from https:\/\/aws.amazon.com\/emr\/?nc1=h_ls."}],"event":{"name":"HPDC '18: The 27th International Symposium on High-Performance Parallel and Distributed Computing","sponsor":["University of Arizona University of Arizona","SIGARCH ACM Special Interest Group on Computer Architecture","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"],"location":"Tempe AZ USA","acronym":"HPDC '18"},"container-title":["Proceedings of the 9th Workshop on Scientific Cloud Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3217880.3217882","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3217880.3217882","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3217880.3217882","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T02:07:25Z","timestamp":1750212445000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3217880.3217882"}},"subtitle":["Improved Partitioning Strategies for Massive Comparative Metagenomics Analysis"],"short-title":[],"issued":{"date-parts":[[2018,6,11]]},"references-count":29,"alternative-id":["10.1145\/3217880.3217882","10.1145\/3217880"],"URL":"https:\/\/doi.org\/10.1145\/3217880.3217882","relation":{},"subject":[],"published":{"date-parts":[[2018,6,11]]},"assertion":[{"value":"2018-06-11","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}