{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,5]],"date-time":"2025-10-05T04:12:48Z","timestamp":1759637568594,"version":"3.37.3"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2017,2,7]],"date-time":"2017-02-07T00:00:00Z","timestamp":1486425600000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100002341","name":"Suomen Akatemia","doi-asserted-by":"publisher","award":["284598"],"award-info":[{"award-number":["284598"]}],"id":[{"id":"10.13039\/501100002341","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Algorithmica"],"published-print":{"date-parts":[[2017,11]]},"DOI":"10.1007\/s00453-017-0286-4","type":"journal-article","created":{"date-parts":[[2017,2,7]],"date-time":"2017-02-07T17:13:42Z","timestamp":1486487622000},"page":"857-883","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["A Framework for Space-Efficient String Kernels"],"prefix":"10.1007","volume":"79","author":[{"given":"Djamal","family":"Belazzougui","sequence":"first","affiliation":[]},{"given":"Fabio","family":"Cunial","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,2,7]]},"reference":[{"key":"286_CR1","doi-asserted-by":"crossref","unstructured":"Apostolico, A.: Maximal words in sequence comparisons based on subword composition. In: Algorithms and Applications, pp. 34\u201344. Springer, Berlin (2010)","DOI":"10.1007\/978-3-642-12476-1_2"},{"issue":"3\u20134","key":"286_CR2","doi-asserted-by":"crossref","first-page":"381","DOI":"10.1089\/106652700750050844","volume":"7","author":"A Apostolico","year":"2000","unstructured":"Apostolico, A., Bejerano, G.: Optimal amnesic probabilistic automata or how to learn and classify proteins in linear time and space. J. Comput. Biol. 7(3\u20134), 381\u2013393 (2000)","journal-title":"J. Comput. Biol."},{"issue":"1","key":"286_CR3","doi-asserted-by":"crossref","first-page":"13","DOI":"10.1186\/1748-7188-3-13","volume":"3","author":"A Apostolico","year":"2008","unstructured":"Apostolico, A., Denas, O.: Fast algorithms for computing sequence distances by exhaustive substring composition. Algorithms Mol. Biol. 3(1), 13 (2008)","journal-title":"Algorithms Mol. Biol."},{"key":"286_CR4","doi-asserted-by":"crossref","first-page":"385","DOI":"10.1613\/jair.1491","volume":"22","author":"R Begleiter","year":"2004","unstructured":"Begleiter, R., El-Yaniv, R., Yona, G.: On prediction using variable order Markov models. J. Artif. Intell. Res. 22, 385\u2013421 (2004)","journal-title":"J. Artif. Intell. Res."},{"issue":"10","key":"286_CR5","doi-asserted-by":"crossref","first-page":"927","DOI":"10.1093\/bioinformatics\/17.10.927","volume":"17","author":"G Bejerano","year":"2001","unstructured":"Bejerano, G., Seldin, Y., Margalit, H., Tishby, N.: Markovian domain fingerprinting: statistical segmentation of protein sequences. Bioinformatics 17(10), 927\u2013934 (2001)","journal-title":"Bioinformatics"},{"key":"286_CR6","doi-asserted-by":"crossref","unstructured":"Bejerano, G., Yona, G.: Modeling protein families using probabilistic suffix trees. In: Proceedings of the Third Annual International Conference on Computational Molecular Biology, pp. 15\u201324. ACM, New York (1999)","DOI":"10.1145\/299432.299445"},{"issue":"1","key":"286_CR7","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1093\/bioinformatics\/17.1.23","volume":"17","author":"G Bejerano","year":"2001","unstructured":"Bejerano, G., Yona, G.: Variations on probabilistic suffix trees: statistical modeling and prediction of protein families. Bioinformatics 17(1), 23\u201343 (2001)","journal-title":"Bioinformatics"},{"key":"286_CR8","doi-asserted-by":"crossref","unstructured":"Belazzougui, D.: Linear time construction of compressed text indices in compact space. arXiv preprint arXiv:1401.0936 (2014)","DOI":"10.1145\/2591796.2591885"},{"key":"286_CR9","doi-asserted-by":"crossref","unstructured":"Belazzougui, D.: Linear time construction of compressed text indices in compact space. In: Symposium on Theory of Computing, STOC 2014, New York, NY, USA, May 31\u2013June 03, 2014, pp. 148\u2013193. ACM, New York (2014)","DOI":"10.1145\/2591796.2591885"},{"key":"286_CR10","doi-asserted-by":"crossref","unstructured":"Belazzougui, D., Cunial, F.: Indexed matching statistics and shortest unique substrings. In: String Processing and Information Retrieval, pp. 179\u2013190. Springer, Berlin (2014)","DOI":"10.1007\/978-3-319-11918-2_18"},{"key":"286_CR11","doi-asserted-by":"crossref","unstructured":"Belazzougui, D., Cunial, F.: A framework for space-efficient string kernels. In: Annual Symposium on Combinatorial Pattern Matching, pp. 13\u201325 (2015)","DOI":"10.1007\/978-3-319-19929-0_2"},{"key":"286_CR12","doi-asserted-by":"crossref","unstructured":"Belazzougui, D., Cunial, F.: Space-efficient detection of unusual words. In: String Processing and Information Retrieval, pp. 222\u2013233. Springer, Berlin (2015)","DOI":"10.1007\/978-3-319-23826-5_22"},{"key":"286_CR13","doi-asserted-by":"crossref","unstructured":"Belazzougui, D., Cunial, F., K\u00e4rkk\u00e4inen, J., M\u00e4kinen, V.: Versatile succinct representations of the bidirectional Burrows\u2013Wheeler transform. In: Algorithms\u2013ESA 2013, pp. 133\u2013144. Springer, Berlin (2013)","DOI":"10.1007\/978-3-642-40450-4_12"},{"issue":"4","key":"286_CR14","first-page":"23","volume":"10","author":"D Belazzougui","year":"2014","unstructured":"Belazzougui, D., Navarro, G.: Alphabet-independent compressed text indexing. ACM Trans. Algorithms (TALG) 10(4), 23 (2014)","journal-title":"ACM Trans. Algorithms (TALG)"},{"key":"286_CR15","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1016\/j.jda.2012.07.005","volume":"18","author":"D Belazzougui","year":"2013","unstructured":"Belazzougui, D., Navarro, G., Valenzuela, D.: Improved compressed indexes for full-text document retrieval. J. Discrete Algorithms 18, 3\u201313 (2013)","journal-title":"J. Discrete Algorithms"},{"issue":"2","key":"286_CR16","doi-asserted-by":"crossref","first-page":"480","DOI":"10.1214\/aos\/1018031204","volume":"27","author":"P B\u00fchlmann","year":"1999","unstructured":"B\u00fchlmann, P., Wyner, A.J., et al.: Variable length Markov chains. Ann. Stat. 27(2), 480\u2013513 (1999)","journal-title":"Ann. Stat."},{"issue":"2\/3","key":"286_CR17","doi-asserted-by":"crossref","first-page":"76","DOI":"10.1093\/comjnl\/40.2_and_3.76","volume":"40","author":"S Bunton","year":"1997","unstructured":"Bunton, S.: Semantically motivated improvements for PPM variants. Comput. J. 40(2\/3), 76\u201393 (1997)","journal-title":"Comput. J."},{"key":"286_CR18","unstructured":"Burrows, M., Wheeler, D.: A block sorting lossless data compression algorithm. Tech. Rep. 124, Digital Equipment Corporation (1994)"},{"key":"286_CR19","doi-asserted-by":"crossref","first-page":"109","DOI":"10.1016\/j.tcs.2012.04.031","volume":"450","author":"S Chairungsee","year":"2012","unstructured":"Chairungsee, S., Crochemore, M.: Using minimal absent words to build phylogeny. Theor. Comput. Sci. 450, 109\u2013116 (2012)","journal-title":"Theor. Comput. Sci."},{"issue":"1","key":"286_CR20","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1093\/bioinformatics\/btt310","volume":"30","author":"R Chikhi","year":"2014","unstructured":"Chikhi, R., Medvedev, P.: Informed and automated $$k$$ k -mer size selection for genome assembly. Bioinformatics 30(1), 31\u201337 (2014)","journal-title":"Bioinformatics"},{"issue":"10","key":"286_CR21","doi-asserted-by":"crossref","first-page":"R108","DOI":"10.1186\/gb-2009-10-10-r108","volume":"10","author":"B Chor","year":"2009","unstructured":"Chor, B., Horn, D., Goldman, N., Levy, Y., Massingham, T., et al.: Genomic DNA $$k$$ k -mer spectra: models and modalities. Genome Biol. 10(10), R108 (2009)","journal-title":"Genome Biol."},{"key":"286_CR22","unstructured":"Clark, D.: Compact Pat trees. Ph.D. thesis, University of Waterloo, Canada (1996)"},{"issue":"4","key":"286_CR23","doi-asserted-by":"crossref","first-page":"396","DOI":"10.1109\/TCOM.1984.1096090","volume":"32","author":"JG Cleary","year":"1984","unstructured":"Cleary, J.G., Witten, I.H.: Data compression using adaptive coding and partial string matching. IEEE Trans. Commun. 32(4), 396\u2013402 (1984)","journal-title":"IEEE Trans. Commun."},{"issue":"3","key":"286_CR24","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1016\/S0020-0190(98)00104-5","volume":"67","author":"M Crochemore","year":"1998","unstructured":"Crochemore, M., Mignosi, F., Restivo, A.: Automata and forbidden words. Inform. Process. Lett. 67(3), 111\u2013117 (1998)","journal-title":"Inform. Process. Lett."},{"issue":"11","key":"286_CR25","doi-asserted-by":"crossref","first-page":"5251","DOI":"10.1109\/TIT.2009.2030460","volume":"55","author":"O Dekel","year":"2009","unstructured":"Dekel, O., Shalev-Shwartz, S., Singer, Y.: Individual sequence prediction using memory-efficient context trees. IEEE Trans. Inform. Theory 55(11), 5251\u20135262 (2009)","journal-title":"IEEE Trans. Inform. Theory"},{"key":"286_CR26","first-page":"48","volume":"95","author":"M Farach","year":"1995","unstructured":"Farach, M., Noordewier, M., Savari, S., Shepp, L., Wyner, A., Ziv, J.: On the entropy of DNA: algorithms and measurements based on memory and rapid convergence. SODA 95, 48\u201357 (1995)","journal-title":"SODA"},{"key":"286_CR27","doi-asserted-by":"crossref","unstructured":"Ferragina, P., Manzini, G.: Opportunistic data structures with applications. In: Proceedings on 41st IEEE Symposium on Foundations of Computer Science (FOCS), pp. 390\u2013398 (2000)","DOI":"10.1109\/SFCS.2000.892127"},{"issue":"4","key":"286_CR28","doi-asserted-by":"crossref","first-page":"552","DOI":"10.1145\/1082036.1082039","volume":"52","author":"P Ferragina","year":"2005","unstructured":"Ferragina, P., Manzini, G.: Indexing compressed texts. J. ACM 52(4), 552\u2013581 (2005)","journal-title":"J. ACM"},{"key":"286_CR29","doi-asserted-by":"crossref","unstructured":"Gagie, T.: Rank and select operations on sequences. In: Encyclopedia of Algorithms, pp. 1776\u20131780. Springer, Berlin (2016)","DOI":"10.1007\/978-1-4939-2864-4_638"},{"issue":"3","key":"286_CR30","doi-asserted-by":"crossref","first-page":"331","DOI":"10.1007\/PL00009177","volume":"19","author":"R Giegerich","year":"1997","unstructured":"Giegerich, R., Kurtz, S.: From Ukkonen to McCreight and Weiner: A unifying view of linear-time suffix tree construction. Algorithmica 19(3), 331\u2013353 (1997)","journal-title":"Algorithmica"},{"key":"286_CR31","unstructured":"Gog, S.: Compressed suffix trees: design, construction, and applications. Ph.D. thesis, University of Ulm, Germany (2011)"},{"issue":"1","key":"286_CR32","doi-asserted-by":"crossref","first-page":"167","DOI":"10.1186\/1471-2105-9-167","volume":"9","author":"J Herold","year":"2008","unstructured":"Herold, J., Kurtz, S., Giegerich, R.: Efficient computation of absent words in genomic sequences. BMC Bioinform. 9(1), 167 (2008)","journal-title":"BMC Bioinform."},{"key":"286_CR33","doi-asserted-by":"crossref","unstructured":"Hozza, M., Vina\u0159, T., Brejov\u00e1, B.: How big is that genome? Estimating genome size and coverage from $$k$$ k -mer abundance spectra. In: String Processing and Information Retrieval, pp. 199\u2013209. Springer, Berlin (2015)","DOI":"10.1007\/978-3-319-23826-5_20"},{"key":"286_CR34","doi-asserted-by":"crossref","unstructured":"Ileri, A.M., Xu, B.: Shortest unique substring query revisited. In: Combinatorial Pattern Matching, pp. 172\u2013181 (2014)","DOI":"10.1007\/978-3-319-07566-2_18"},{"issue":"10","key":"286_CR35","doi-asserted-by":"crossref","first-page":"1314","DOI":"10.1093\/bioinformatics\/bts121","volume":"28","author":"J Lin","year":"2012","unstructured":"Lin, J., Adjeroh, D., Jiang, B.H.: Probabilistic suffix array: efficient modeling and prediction of protein families. Bioinformatics 28(10), 1314\u20131323 (2012)","journal-title":"Bioinformatics"},{"issue":"5","key":"286_CR36","doi-asserted-by":"crossref","first-page":"935","DOI":"10.1137\/0222058","volume":"22","author":"U Manber","year":"1993","unstructured":"Manber, U., Myers, G.: Suffix arrays: a new method for on-line string searches. SIAM J. Comput. 22(5), 935\u2013948 (1993)","journal-title":"SIAM J. Comput."},{"key":"286_CR37","doi-asserted-by":"crossref","unstructured":"Munro, I.: Tables. In: Proceedings of 16th FSTTCS, LNCS 1180, pp. 37\u201342 (1996)","DOI":"10.1007\/3-540-62034-6_35"},{"issue":"1","key":"286_CR38","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s00239-003-2493-7","volume":"58","author":"J Qi","year":"2004","unstructured":"Qi, J., Wang, B., Hao, B.I.: Whole proteome prokaryote phylogeny without sequence alignment: a $$k$$ k -string composition approach. J. Mol. Evol. 58(1), 1\u201311 (2004)","journal-title":"J. Mol. Evol."},{"issue":"12","key":"286_CR39","doi-asserted-by":"crossref","first-page":"1615","DOI":"10.1089\/cmb.2009.0198","volume":"16","author":"G Reinert","year":"2009","unstructured":"Reinert, G., Chew, D., Sun, F., Waterman, M.S.: Alignment-free sequence comparison (I): statistics and power. J. Comput. Biol. 16(12), 1615\u20131634 (2009)","journal-title":"J. Comput. Biol."},{"key":"286_CR40","first-page":"23","volume":"9","author":"K Rieck","year":"2008","unstructured":"Rieck, K., Laskov, P.: Linear-time computation of similarity measures for sequential data. J. Mach. Learn. Res. 9, 23\u201348 (2008)","journal-title":"J. Mach. Learn. Res."},{"key":"286_CR41","doi-asserted-by":"crossref","unstructured":"Rieck, K., Laskov, P., Sonnenburg, S.: Computation of similarity measures for sequential data using generalized suffix trees. In: Advances in Neural Information Processing Systems, pp. 1177\u20131184 (2006)","DOI":"10.7551\/mitpress\/7503.003.0152"},{"issue":"5","key":"286_CR42","doi-asserted-by":"crossref","first-page":"656","DOI":"10.1109\/TIT.1983.1056741","volume":"29","author":"J Rissanen","year":"1983","unstructured":"Rissanen, J., et al.: A universal data compression system. IEEE Trans. Inform. Theory 29(5), 656\u2013664 (1983)","journal-title":"IEEE Trans. Inform. Theory"},{"issue":"2\u20133","key":"286_CR43","doi-asserted-by":"crossref","first-page":"117","DOI":"10.1023\/A:1026490906255","volume":"25","author":"D Ron","year":"1996","unstructured":"Ron, D., Singer, Y., Tishby, N.: The power of amnesia: learning probabilistic automata with variable memory length. Mach. Learn. 25(2\u20133), 117\u2013149 (1996)","journal-title":"Mach. Learn."},{"key":"286_CR44","doi-asserted-by":"crossref","unstructured":"Schulz, M.H., Weese, D., Rausch, T., D\u00f6ring, A., Reinert, K., Vingron, M.: Fast and adaptive variable order Markov chain construction. In: Algorithms in Bioinformatics, pp. 306\u2013317. Springer, Berlin (2008)","DOI":"10.1007\/978-3-540-87361-7_26"},{"key":"286_CR45","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511809682","volume-title":"Kernel Methods for Pattern Analysis","author":"J Shawe-Taylor","year":"2004","unstructured":"Shawe-Taylor, J., Cristianini, N.: Kernel Methods for Pattern Analysis. Cambridge University Press, Cambridge (2004)"},{"issue":"8","key":"286_CR46","doi-asserted-by":"crossref","first-page":"2677","DOI":"10.1073\/pnas.0813249106","volume":"106","author":"GE Sims","year":"2009","unstructured":"Sims, G.E., Jun, S.R., Wu, G.A., Kim, S.H.: Alignment-free genome comparison with feature frequency profiles (FFP) and optimal resolutions. Proc. Natl. Acad. Sci. 106(8), 2677\u20132682 (2009)","journal-title":"Proc. Natl. Acad. Sci."},{"key":"286_CR47","first-page":"585","volume-title":"Advances in Neural Information Processing Systems","author":"AJ Smola","year":"2003","unstructured":"Smola, A.J., Vishwanathan, S.V.N.: Fast kernels for string and tree matching. In: Becker, S., Thrun, S., Obermayer, K. (eds.) Advances in Neural Information Processing Systems, vol. 15, pp. 585\u2013592. MIT Press, London (2003)"},{"key":"286_CR48","unstructured":"Sokol, S.M.D.: Engineering small space dictionary matching. arXiv preprint arXiv:1301.6428 (2013)"},{"key":"286_CR49","doi-asserted-by":"crossref","unstructured":"Teo, C.H., Vishwanathan, S.: Fast and space efficient string kernels using suffix arrays. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 929\u2013936. ACM, New York (2006)","DOI":"10.1145\/1143844.1143961"},{"issue":"2","key":"286_CR50","doi-asserted-by":"crossref","first-page":"336","DOI":"10.1089\/cmb.2006.13.336","volume":"13","author":"I Ulitsky","year":"2006","unstructured":"Ulitsky, I., Burstein, D., Tuller, T., Chor, B.: The average common substring approach to phylogenomic reconstruction. J. Comput. Biol. 13(2), 336\u2013350 (2006)","journal-title":"J. Comput. Biol."},{"issue":"3","key":"286_CR51","doi-asserted-by":"crossref","first-page":"643","DOI":"10.1109\/18.382011","volume":"41","author":"MJ Weinberger","year":"1995","unstructured":"Weinberger, M.J., Rissanen, J.J., Feder, M.: A universal finite memory source. IEEE Trans. Inform. Theory 41(3), 643\u2013652 (1995)","journal-title":"IEEE Trans. Inform. Theory"},{"key":"286_CR52","doi-asserted-by":"crossref","unstructured":"Weiner, P.: Linear pattern matching algorithm. In: Proceedings of 14th Annual IEEE Symposium on Switching and Automata Theory, pp. 1\u201311 (1973)","DOI":"10.1109\/SWAT.1973.13"},{"issue":"4","key":"286_CR53","doi-asserted-by":"crossref","first-page":"1085","DOI":"10.1109\/18.87000","volume":"37","author":"IH Witten","year":"1991","unstructured":"Witten, I.H., Bell, T.C.: The zero-frequency problem: estimating the probabilities of novel events in adaptive text compression. IEEE Trans. Inform. Theory 37(4), 1085\u20131094 (1991)","journal-title":"IEEE Trans. Inform. Theory"}],"container-title":["Algorithmica"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00453-017-0286-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00453-017-0286-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00453-017-0286-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,22]],"date-time":"2023-08-22T03:56:01Z","timestamp":1692676561000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00453-017-0286-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,2,7]]},"references-count":53,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2017,11]]}},"alternative-id":["286"],"URL":"https:\/\/doi.org\/10.1007\/s00453-017-0286-4","relation":{},"ISSN":["0178-4617","1432-0541"],"issn-type":[{"type":"print","value":"0178-4617"},{"type":"electronic","value":"1432-0541"}],"subject":[],"published":{"date-parts":[[2017,2,7]]}}}