{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T10:31:37Z","timestamp":1773397897478,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":29,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,8,1]],"date-time":"2021-08-01T00:00:00Z","timestamp":1627776000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,8]]},"DOI":"10.1145\/3459930.3469521","type":"proceedings-article","created":{"date-parts":[[2021,7,30]],"date-time":"2021-07-30T18:30:10Z","timestamp":1627669810000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":12,"title":["LSHvec"],"prefix":"10.1145","author":[{"given":"Lizhen","family":"Shi","sequence":"first","affiliation":[{"name":"Florida Polytechnic University"}]},{"given":"Bo","family":"Chen","sequence":"additional","affiliation":[{"name":"Tallahassee"}]}],"member":"320","published-online":{"date-parts":[[2021,8]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0141287"},{"key":"e_1_3_2_1_2_1","volume-title":"D1","author":"Benson Dennis A","year":"2012","unstructured":"Dennis A Benson, Mark Cavanaugh, Karen Clark, Ilene Karsch-Mizrachi, David J Lipman, James Ostell, and Eric W Sayers. 2012. GenBank. Nucleic acids research 41, D1 (2012),D36--D42."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00051"},{"key":"e_1_3_2_1_4_1","first-page":"286096","article-title":"Gene2Vec: distributed representation of genes based on co-expression. bioRxiv","volume":"10","author":"Du J","year":"2018","unstructured":"J Du, P Jia, Y Dai, C Tao, Z Zhao, and D Zhi. 2018. Gene2Vec: distributed representation of genes based on co-expression. bioRxiv. Preprint 10 (2018), 286096.","journal-title":"Preprint"},{"key":"e_1_3_2_1_5_1","volume-title":"D1","author":"Federhen Scott","year":"2012","unstructured":"Scott Federhen. 2012. The NCBI taxonomy database. Nucleic acids research 40, D1 (2012), D136--D143."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1186\/s40168-019-0633-6"},{"key":"e_1_3_2_1_7_1","unstructured":"Internet. [n.d.]. FNV Hash. http:\/\/www.isthe.com\/chongo\/tech\/comp\/fnv\/."},{"key":"e_1_3_2_1_8_1","volume-title":"Bag of Tricks for Efficient Text Classification. arXiv preprint arXiv:1607.01759","author":"Joulin Armand","year":"2016","unstructured":"Armand Joulin, Edouard Grave, Piotr Bojanowski, and Tomas Mikolov. 2016. Bag of Tricks for Efficient Text Classification. arXiv preprint arXiv:1607.01759 (2016). https:\/\/doi.org\/1511.09249v1 arXiv:1607.01759"},{"key":"e_1_3_2_1_9_1","volume-title":"Distributed representations for biological sequence analysis. arXiv preprint arXiv:1608.05949","author":"Kimothi Dhananjay","year":"2016","unstructured":"Dhananjay Kimothi, Akshay Soni, Pravesh Biyani, and James M Hogan. 2016. Distributed representations for biological sequence analysis. arXiv preprint arXiv:1608.05949 (2016)."},{"key":"e_1_3_2_1_10_1","volume-title":"Assessing the performance of the oxford nanopore technologies minion. Biomolecular detection and quantification 3","author":"Laver Thomas","year":"2015","unstructured":"Thomas Laver, J Harrison, PA O'neill, Karen Moore, Audrey Farbos, Konrad Paszkiewicz, and David J Studholme. 2015. Assessing the performance of the oxford nanopore technologies minion. Biomolecular detection and quantification 3 (2015), 1--8."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Jake Lever Martin Krzywinski and Naomi Altman. 2016. Points of significance: classification evaluation.","DOI":"10.1038\/nmeth.3945"},{"key":"e_1_3_2_1_12_1","volume-title":"Vokan Sevim, Michael Mascagni, Li Deng, and Zhong Wang.","author":"Lu Yakang","year":"2021","unstructured":"Yakang Lu, Lizhen Shi, Marc W Van Goethem, Vokan Sevim, Michael Mascagni, Li Deng, and Zhong Wang. 2021. Hybrid Clustering of Long and Short-read for Improved Metagenome Assembly. bioRxiv (2021)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btr011"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1101\/335943"},{"key":"e_1_3_2_1_15_1","volume-title":"Continuous embeddings of DNA sequencing reads, and application to metagenomics. BioRxiv","author":"Menegaux Romain","year":"2018","unstructured":"Romain Menegaux and Jean-Philippe Vert. 2018. Continuous embeddings of DNA sequencing reads, and application to metagenomics. BioRxiv (2018), 335943."},{"key":"e_1_3_2_1_16_1","volume-title":"Tutorial: Assessing metagenomics software with the CAMI benchmarking toolkit. Nature Protocols","author":"Meyer Fernando","year":"2021","unstructured":"Fernando Meyer, Till-Robin Lesker, David Koslicki, Adrian Fritz, Alexey Gurevich, Aaron E Darling, Alexander Sczyrba, Andreas Bremges, and Alice C McHardy. 2021. Tutorial: Assessing metagenomics software with the CAMI benchmarking toolkit. Nature Protocols (2021), 1--17."},{"key":"e_1_3_2_1_17_1","unstructured":"Tomas Mikolov Ilya Sutskever Kai Chen Greg S Corrado and Jeff Dean. 2013. Distributed representations of words and phrases and their compositionality. In Advances in neural information processing systems. 3111--3119."},{"key":"e_1_3_2_1_18_1","volume-title":"Evaluation of genomic high-throughput sequencing data generated on Illumina HiSeq and genome analyzer systems. Genome biology 12, 11","author":"Minoche Andr\u00e9 E","year":"2011","unstructured":"Andr\u00e9 E Minoche, Juliane C Dohm, and Heinz Himmelbauer. 2011. Evaluation of genomic high-throughput sequencing data generated on Illumina HiSeq and genome analyzer systems. Genome biology 12, 11 (2011), R112."},{"key":"e_1_3_2_1_19_1","volume-title":"dna2vec: Consistent vector representations of variable-length k-mers. arXiv preprint arXiv:1701.06279","author":"Patrick Ng.","year":"2017","unstructured":"Patrick Ng. 2017. dna2vec: Consistent vector representations of variable-length k-mers. arXiv preprint arXiv:1701.06279 (2017)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/bty1007"},{"key":"e_1_3_2_1_22_1","volume-title":"The advantages of SMRT sequencing. Genome biology 14, 6","author":"Roberts Richard J","year":"2013","unstructured":"Richard J Roberts, Mauricio O Carneiro, and Michael C Schatz. 2013. The advantages of SMRT sequencing. Genome biology 14, 6 (2013), 405."},{"key":"e_1_3_2_1_23_1","unstructured":"Alexander Sczyrba Peter Hofmann Peter Belmann David Koslicki Stefan Janssen Johannes Dr\u00f6ge Ivan Gregor Stephan Majda Jessika Fiedler Eik Dahms et al. 2017. Critical assessment of metagenome interpretation---a benchmark of metagenomics software. Nature methods 14 11 (2017) 1063--1071."},{"key":"e_1_3_2_1_24_1","volume-title":"Comparison and Benchmark of Graph Clustering Algorithms. arXiv preprint arXiv:2005.04806","author":"Shi Lizhen","year":"2020","unstructured":"Lizhen Shi and Bo Chen. 2020. Comparison and Benchmark of Graph Clustering Algorithms. arXiv preprint arXiv:2005.04806 (2020)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/bty733"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2016.10.002"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10479-011-0841-3"},{"key":"e_1_3_2_1_28_1","volume-title":"The Free Encyclopedia","unstructured":"Wikipedia. 2019. De Bruijn graph --- Wikipedia, The Free Encyclopedia. http:\/\/en.wikipedia.org\/w\/index.php?title=De%20Bruijn%20graph&oldid=907531111. [Online; accessed 05-August-2019]."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/bty178"}],"event":{"name":"BCB '21: 12th ACM International Conference on Bioinformatics, Computational Biology and Health Informatics","location":"Gainesville Florida","acronym":"BCB '21","sponsor":["SIGBIOM ACM Special Interest Group on Biomedical Computing"]},"container-title":["Proceedings of the 12th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3459930.3469521","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3459930.3469521","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:17:43Z","timestamp":1750191463000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3459930.3469521"}},"subtitle":["a vector representation of DNA sequences using locality sensitive hashing and FastText word embeddings"],"short-title":[],"issued":{"date-parts":[[2021,8]]},"references-count":29,"alternative-id":["10.1145\/3459930.3469521","10.1145\/3459930"],"URL":"https:\/\/doi.org\/10.1145\/3459930.3469521","relation":{},"subject":[],"published":{"date-parts":[[2021,8]]},"assertion":[{"value":"2021-08-01","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}