{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,21]],"date-time":"2025-09-21T17:10:14Z","timestamp":1758474614280,"version":"3.40.3"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030170820"},{"type":"electronic","value":"9783030170837"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-17083-7_13","type":"book-chapter","created":{"date-parts":[[2019,4,14]],"date-time":"2019-04-14T23:02:19Z","timestamp":1555282939000},"page":"208-226","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Fast Approximation of Frequent k-mers and Applications to Metagenomics"],"prefix":"10.1007","author":[{"given":"Leonardo","family":"Pellegrina","sequence":"first","affiliation":[]},{"given":"Cinzia","family":"Pizzi","sequence":"additional","affiliation":[]},{"given":"Fabio","family":"Vandin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,4,2]]},"reference":[{"key":"13_CR1","doi-asserted-by":"publisher","first-page":"e94","DOI":"10.7717\/peerj-cs.94","volume":"2","author":"G Benoit","year":"2016","unstructured":"Benoit, G., Peterlongo, P., et al.: Multiple comparative metagenomics using multiset k-mer counting. PeerJ Comput. Sci. 2, e94 (2016)","journal-title":"PeerJ Comput. Sci."},{"issue":"22","key":"13_CR2","doi-asserted-by":"publisher","first-page":"3584","DOI":"10.1093\/bioinformatics\/btv419","volume":"31","author":"K B\u0159inda","year":"2015","unstructured":"B\u0159inda, K., Sykulski, M., Kucherov, G.: Spaced seeds improve k-mer-based metagenomic classification. Bioinformatics 31(22), 3584\u20133592 (2015)","journal-title":"Bioinformatics"},{"key":"13_CR3","unstructured":"Brown, C.T., Howe, A., et al.: A reference-free algorithm for computational normalization of shotgun sequencing data. arXiv preprint arXiv:1203.4802 (2012)"},{"issue":"1","key":"13_CR4","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1093\/bioinformatics\/btt310","volume":"30","author":"R Chikhi","year":"2013","unstructured":"Chikhi, R., Medvedev, P.: Informed and automated k-mer size selection for genome assembly. Bioinformatics 30(1), 31\u201337 (2013)","journal-title":"Bioinformatics"},{"issue":"6","key":"13_CR5","doi-asserted-by":"publisher","first-page":"0144","DOI":"10.1038\/s41559-017-0144","volume":"1","author":"R Danovaro","year":"2017","unstructured":"Danovaro, R., Canals, M., et al.: A submarine volcanic eruption leads to a novel microbial habitat. Nat. Ecol. Evol. 1(6), 0144 (2017)","journal-title":"Nat. Ecol. Evol."},{"issue":"8","key":"13_CR6","doi-asserted-by":"publisher","first-page":"e1700585","DOI":"10.1126\/sciadv.1700585","volume":"3","author":"LB Dickson","year":"2017","unstructured":"Dickson, L.B., Jiolle, D., et al.: Carryover effects of larval exposure to different environmental bacteria drive adult trait variation in a mosquito vector. Sci. Adv. 3(8), e1700585 (2017)","journal-title":"Sci. Adv."},{"issue":"17","key":"13_CR7","doi-asserted-by":"publisher","first-page":"i567","DOI":"10.1093\/bioinformatics\/btw466","volume":"32","author":"S Girotto","year":"2016","unstructured":"Girotto, S., Pizzi, C., Comin, M.: MetaProb: accurate metagenomic reads binning based on probabilistic sequence signatures. Bioinformatics 32(17), i567\u2013i575 (2016)","journal-title":"Bioinformatics"},{"key":"13_CR8","doi-asserted-by":"crossref","unstructured":"Hrytsenko, Y., Daniels, N.M., Schwartz, R.S.: Efficient distance calculations between genomes using mathematical approximation. In: Proceedings of the ACM-BCB, p. 546 (2018)","DOI":"10.1145\/3233547.3233654"},{"issue":"11","key":"13_CR9","doi-asserted-by":"publisher","first-page":"R116","DOI":"10.1186\/gb-2010-11-11-r116","volume":"11","author":"DR Kelley","year":"2010","unstructured":"Kelley, D.R., Schatz, M.C., Salzberg, S.L.: Quake: quality-aware detection and correction of sequencing errors. Genome Biol. 11(11), R116 (2010)","journal-title":"Genome Biol."},{"issue":"17","key":"13_CR10","doi-asserted-by":"publisher","first-page":"2759","DOI":"10.1093\/bioinformatics\/btx304","volume":"33","author":"M Kokot","year":"2017","unstructured":"Kokot, M., D\u0142ugosz, M., Deorowicz, S.: KMC 3: counting and manipulating k-mer statistics. Bioinformatics 33(17), 2759\u20132761 (2017)","journal-title":"Bioinformatics"},{"issue":"8","key":"13_CR11","doi-asserted-by":"crossref","first-page":"1916","DOI":"10.1101\/gr.1251803","volume":"13","author":"X Li","year":"2003","unstructured":"Li, X., Waterman, M.S.: Estimating the repeat structure and length of DNA sequences using $$\\ell $$ \u2113 -tuples. Genome Res. 13(8), 1916\u20131922 (2003)","journal-title":"Genome Res."},{"key":"13_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"313","DOI":"10.1007\/978-3-642-04128-0_29","volume-title":"Algorithms - ESA 2009","author":"M L\u00f6ffler","year":"2009","unstructured":"L\u00f6ffler, M., Phillips, J.M.: Shape fitting on point sets with probability distributions. In: Fiat, A., Sanders, P. (eds.) ESA 2009. LNCS, vol. 5757, pp. 313\u2013324. Springer, Heidelberg (2009). https:\/\/doi.org\/10.1007\/978-3-642-04128-0_29"},{"issue":"6","key":"13_CR13","doi-asserted-by":"publisher","first-page":"764","DOI":"10.1093\/bioinformatics\/btr011","volume":"27","author":"G Mar\u00e7ais","year":"2011","unstructured":"Mar\u00e7ais, G., Kingsford, C.: A fast, lock-free approach for efficient parallel counting of occurrences of k-mers. Bioinformatics 27(6), 764\u2013770 (2011)","journal-title":"Bioinformatics"},{"issue":"24","key":"13_CR14","doi-asserted-by":"publisher","first-page":"3541","DOI":"10.1093\/bioinformatics\/btu713","volume":"30","author":"P Melsted","year":"2014","unstructured":"Melsted, P., Halld\u00f3rsson, B.V.: KmerStream: streaming algorithms for k-mer abundance estimation. Bioinformatics 30(24), 3541\u20133547 (2014)","journal-title":"Bioinformatics"},{"issue":"1","key":"13_CR15","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1186\/1471-2105-12-333","volume":"12","author":"P Melsted","year":"2011","unstructured":"Melsted, P., Pritchard, J.K.: Efficient counting of k-mers in DNA sequences using a Bloom filter. BMC Bioinform. 12(1), 333 (2011)","journal-title":"BMC Bioinform."},{"key":"13_CR16","volume-title":"Probability and Computing: Randomization and Probabilistic Techniques in Algorithms and Data Analysis","author":"M Mitzenmacher","year":"2017","unstructured":"Mitzenmacher, M., Upfal, E.: Probability and Computing: Randomization and Probabilistic Techniques in Algorithms and Data Analysis. Cambridge University Press, Cambridge (2017)"},{"issue":"9","key":"13_CR17","doi-asserted-by":"crossref","first-page":"1324","DOI":"10.1093\/bioinformatics\/btw832","volume":"33","author":"H Mohamadi","year":"2017","unstructured":"Mohamadi, H., Khan, H., Birol, I.: ntCard: a streaming algorithm for cardinality estimation in genomics data. Bioinformatics 33(9), 1324\u20131330 (2017)","journal-title":"Bioinformatics"},{"issue":"1","key":"13_CR18","doi-asserted-by":"publisher","first-page":"132","DOI":"10.1186\/s13059-016-0997-x","volume":"17","author":"BD Ondov","year":"2016","unstructured":"Ondov, B.D., Treangen, T.J., et al.: Mash: fast genome and metagenome distance estimation using MinHash. Genome Biol. 17(1), 132 (2016)","journal-title":"Genome Biol."},{"issue":"14","key":"13_CR19","first-page":"568","volume":"34","author":"P Pandey","year":"2017","unstructured":"Pandey, P., Bender, M.A., Johnson, R., Patro, R.: Squeakr: an exact and approximate k-mer counting system. Bioinformatics 34(14), 568\u2013575 (2017)","journal-title":"Bioinformatics"},{"issue":"5","key":"13_CR20","doi-asserted-by":"publisher","first-page":"462","DOI":"10.1038\/nbt.2862","volume":"32","author":"R Patro","year":"2014","unstructured":"Patro, R., Mount, S.M., Kingsford, C.: Sailfish enables alignment-free isoform quantification from RNA-seq reads using lightweight algorithms. Nat. Biotechnol. 32(5), 462 (2014)","journal-title":"Nat. Biotechnol."},{"issue":"17","key":"13_CR21","doi-asserted-by":"publisher","first-page":"9748","DOI":"10.1073\/pnas.171285098","volume":"98","author":"PA Pevzner","year":"2001","unstructured":"Pevzner, P.A., Tang, H., Waterman, M.S.: An Eulerian path approach to DNA fragment assembly. Proc. National Acad. Sci. 98(17), 9748\u20139753 (2001)","journal-title":"Proc. National Acad. Sci."},{"issue":"5","key":"13_CR22","doi-asserted-by":"publisher","first-page":"652","DOI":"10.1093\/bioinformatics\/btt020","volume":"29","author":"G Rizk","year":"2013","unstructured":"Rizk, G., Lavenier, D., Chikhi, R.: DSK: k-mer counting with very low memory usage. Bioinformatics 29(5), 652\u2013653 (2013)","journal-title":"Bioinformatics"},{"issue":"14","key":"13_CR23","doi-asserted-by":"publisher","first-page":"1950","DOI":"10.1093\/bioinformatics\/btu132","volume":"30","author":"RS Roy","year":"2014","unstructured":"Roy, R.S., Bhattacharya, D., Schliep, A.: Turtle: Identifying frequent k-mers with cache-efficient algorithms. Bioinformatics 30(14), 1950\u20131957 (2014)","journal-title":"Bioinformatics"},{"issue":"6","key":"13_CR24","doi-asserted-by":"crossref","first-page":"799","DOI":"10.1093\/bioinformatics\/btw321","volume":"33","author":"L Salmela","year":"2016","unstructured":"Salmela, L., Walve, R., Rivals, E., Ukkonen, E.: Accurate self-correction of errors in long reads using de Bruijn graphs. Bioinformatics 33(6), 799\u2013806 (2016)","journal-title":"Bioinformatics"},{"issue":"8","key":"13_CR25","doi-asserted-by":"publisher","first-page":"2677","DOI":"10.1073\/pnas.0813249106","volume":"106","author":"GE Sims","year":"2009","unstructured":"Sims, G.E., Jun, S.-R., Wu, G.A., Kim, S.-H.: Alignment-free genome comparison with feature frequency profiles (FFP) and optimal resolutions. Proc. National Acad. Sci. 106(8), 2677\u20132682 (2009)","journal-title":"Proc. National Acad. Sci."},{"key":"13_CR26","unstructured":"Sivadasan, N., Srinivasan, R., Goyal, K.: Kmerlight: fast and accurate k-mer abundance estimation. arXiv preprint arXiv:1609.05626 (2016)"},{"issue":"3","key":"13_CR27","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1038\/nbt.3442","volume":"34","author":"B Solomon","year":"2016","unstructured":"Solomon, B., Kingsford, C.: Fast search of thousands of short-read sequencing experiments. Nat. Biotechnol. 34(3), 300 (2016)","journal-title":"Nat. Biotechnol."},{"key":"13_CR28","volume-title":"Statistical Learning Theory","author":"V Vapnik","year":"1998","unstructured":"Vapnik, V.: Statistical Learning Theory. Wiley, New York (1998)"},{"issue":"2","key":"13_CR29","doi-asserted-by":"publisher","first-page":"264","DOI":"10.1137\/1116025","volume":"16","author":"V Vapnik","year":"1971","unstructured":"Vapnik, V., Chervonenkis, A.: On the uniform convergence of relative frequencies of events to their probabilities. Theory Prob. Appl. 16(2), 264 (1971)","journal-title":"Theory Prob. Appl."},{"issue":"3","key":"13_CR30","doi-asserted-by":"publisher","first-page":"R46","DOI":"10.1186\/gb-2014-15-3-r46","volume":"15","author":"DE Wood","year":"2014","unstructured":"Wood, D.E., Salzberg, S.L.: Kraken: ultrafast metagenomic sequence classification using exact alignments. Genome Biol. 15(3), R46 (2014)","journal-title":"Genome Biol."},{"issue":"5","key":"13_CR31","doi-asserted-by":"publisher","first-page":"821","DOI":"10.1101\/gr.074492.107","volume":"18","author":"DR Zerbino","year":"2008","unstructured":"Zerbino, D.R., Birney, E.: Velvet: algorithms for de novo short read assembly using de Bruijn graphs. Genome Res. 18(5), 821\u2013829 (2008)","journal-title":"Genome Res."},{"issue":"7","key":"13_CR32","doi-asserted-by":"publisher","first-page":"e101271","DOI":"10.1371\/journal.pone.0101271","volume":"9","author":"Q Zhang","year":"2014","unstructured":"Zhang, Q., Pell, J., et al.: These are not the k-mers you are looking for: efficient online k-mer counting using a probabilistic data structure. PloS One 9(7), e101271 (2014)","journal-title":"PloS One"},{"issue":"12","key":"13_CR33","doi-asserted-by":"publisher","first-page":"i283","DOI":"10.1093\/bioinformatics\/btu288","volume":"30","author":"Z Zhang","year":"2014","unstructured":"Zhang, Z., Wang, W.: RNA-Skim: a rapid method for RNA-Seq quantification at transcript level. Bioinformatics 30(12), i283\u2013i292 (2014)","journal-title":"Bioinformatics"}],"container-title":["Lecture Notes in Computer Science","Research in Computational Molecular Biology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-17083-7_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,15]],"date-time":"2023-09-15T18:40:35Z","timestamp":1694803235000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-17083-7_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030170820","9783030170837"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-17083-7_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"2 April 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"RECOMB","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Research in Computational Molecular Biology","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Washington, DC","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 May 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 May 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"recomb2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/recomb2019.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"175","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"17","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"20","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"10% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}}]}}