{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,5]],"date-time":"2025-10-05T04:27:42Z","timestamp":1759638462798},"reference-count":55,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2017,4,1]],"date-time":"2017-04-01T00:00:00Z","timestamp":1491004800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Inf Retrieval J"],"published-print":{"date-parts":[[2017,6]]},"DOI":"10.1007\/s10791-017-9297-7","type":"journal-article","created":{"date-parts":[[2017,4,1]],"date-time":"2017-04-01T10:41:37Z","timestamp":1491043297000},"page":"253-291","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":14,"title":["Document retrieval on repetitive string collections"],"prefix":"10.1007","volume":"20","author":[{"given":"Travis","family":"Gagie","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aleksi","family":"Hartikainen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kalle","family":"Karhu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Juha","family":"K\u00e4rkk\u00e4inen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gonzalo","family":"Navarro","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Simon J.","family":"Puglisi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jouni","family":"Sir\u00e9n","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,4,1]]},"reference":[{"key":"9297_CR1","doi-asserted-by":"crossref","unstructured":"Anick, P. G., & Flynn, R. A. (1992). Versioning a full-text information retrieval system. In Proceedings of the 15th annual international ACM conference on research and development in information retrieval (SIGIR) (pp. 98\u2013111).","DOI":"10.1145\/133160.133183"},{"key":"9297_CR2","volume-title":"Modern information retrieval","author":"R Baeza-Yates","year":"2011","unstructured":"Baeza-Yates, R., & Ribeiro-Neto, B. (2011). Modern information retrieval (2nd ed.). Reading: Addison-Wesley.","edition":"2"},{"key":"9297_CR3","doi-asserted-by":"crossref","unstructured":"Belazzougui, D., Cunial, F., Gagie, T., Prezza, N., & Raffinot, M. (2015). Composite repetition-aware data structures. In Proceedings of the 26th annual symposium on combinatorial pattern matching (CPM) (pp. 26\u201339).","DOI":"10.1007\/978-3-319-19929-0_3"},{"key":"9297_CR4","doi-asserted-by":"crossref","unstructured":"Broder, A., Eiron, N., Fontoura, M., Herscovici, M., Lempel, R., McPherson, J., et al. (2006). Indexing shared content in information retrieval systems. In Proceedings of the 10th international conference on extending database technology (EDBT), LNCS 3896 (pp. 313\u2013330).","DOI":"10.1007\/11687238_21"},{"key":"9297_CR5","volume-title":"Information retrieval: Implementing and evaluating search engines","author":"S B\u00fcttcher","year":"2010","unstructured":"B\u00fcttcher, S., Clarke, C., & Cormack, G. (2010). Information retrieval: Implementing and evaluating search engines. Cambridge: MIT Press."},{"key":"9297_CR6","unstructured":"Clark, D. (1996). Compact PAT trees. PhD thesis, University of Waterloo, Canada."},{"key":"9297_CR7","doi-asserted-by":"crossref","unstructured":"Claude, F., Fari\u00f1a, A., Mart\u00ednez-Prieto, M., & Navarro, G. (2010). Compressed q-gram indexing for highly repetitive biological sequences. In Proceedings of the 10th international conference on bioinformatics and bioengineering (BIBE) (pp. 86\u201391).","DOI":"10.1109\/BIBE.2010.22"},{"key":"9297_CR8","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.is.2016.04.002","volume":"61","author":"F Claude","year":"2016","unstructured":"Claude, F., Fari\u00f1a, A., Mart\u00ednez-Prieto, M., & Navarro, G. (2016). Universal indexes for highly repetitive document collections. Information Systems, 61, 1\u201323.","journal-title":"Information Systems"},{"key":"9297_CR9","doi-asserted-by":"crossref","unstructured":"Claude, F., & Munro, I. (2013). Document listing on versioned documents. In Proceedings of the 20th international symposium on string processing and information retrieval (SPIRE), LNCS 8214 (pp. 72\u201383).","DOI":"10.1007\/978-3-319-02432-5_12"},{"issue":"3","key":"9297_CR10","doi-asserted-by":"crossref","first-page":"313","DOI":"10.3233\/FI-2011-565","volume":"111","author":"F Claude","year":"2010","unstructured":"Claude, F., & Navarro, G. (2010). Self-indexed grammar-based compression. Fundamenta Informaticae, 111(3), 313\u2013337.","journal-title":"Fundamenta Informaticae"},{"key":"9297_CR11","doi-asserted-by":"crossref","unstructured":"Claude, F., & Navarro, G. (2012). Improved grammar-based compressed indexes. In Proceedings of the 19th international symposium on string processing and information retrieval (SPIRE), LNCS 7608 (pp. 180\u2013192).","DOI":"10.1007\/978-3-642-34109-0_19"},{"issue":"4","key":"9297_CR12","doi-asserted-by":"crossref","first-page":"735","DOI":"10.1109\/TKDE.2010.242","volume":"24","author":"J Dhaliwal","year":"2012","unstructured":"Dhaliwal, J., Puglisi, S. J., & Turpin, A. (2012). Practical efficient string mining. IEEE Transactions on Knowledge and Data Engineering, 24(4), 735\u2013744.","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"9297_CR13","doi-asserted-by":"crossref","first-page":"14","DOI":"10.1016\/j.tcs.2013.07.024","volume":"532","author":"HH Do","year":"2014","unstructured":"Do, H. H., Jansson, J., Sadakane, K., & Sung, W. K. (2014). Fast relative Lempel\u2013Ziv self-index for similar sequences. Theoretical Computer Science, 532, 14\u201330.","journal-title":"Theoretical Computer Science"},{"key":"9297_CR14","doi-asserted-by":"crossref","unstructured":"Ferrada, H., & Navarro, G. (2013). A Lempel\u2013Ziv compressed structure for document listing. In Proceedings of the 20th international symposium on string processing and information retrieval (SPIRE), LNCS 8214 (pp. 116\u2013128).","DOI":"10.1007\/978-3-319-02432-5_16"},{"issue":"2","key":"9297_CR15","doi-asserted-by":"crossref","first-page":"465","DOI":"10.1137\/090779759","volume":"40","author":"J Fischer","year":"2011","unstructured":"Fischer, J., & Heun, V. (2011). Space-efficient preprocessing schemes for range minimum queries on static arrays. SIAM Journal on Computing, 40(2), 465\u2013492.","journal-title":"SIAM Journal on Computing"},{"key":"9297_CR16","doi-asserted-by":"crossref","unstructured":"Gagie, T., Gawrychowski, P., K\u00e4rkk\u00e4inen, J., Nekrich, Y., & Puglisi, S. J., (2012a) A faster grammar-based self-index. In Proceedings of the 6th international conference on language and automata theory and applications (LATA), LNCS 7183 (pp. 240\u2013251).","DOI":"10.1007\/978-3-642-28332-1_21"},{"key":"9297_CR17","doi-asserted-by":"crossref","unstructured":"Gagie, T., Gawrychowski, P., K\u00e4rkk\u00e4inen, J., Nekrich, Y., & Puglisi, S. J. (2014). LZ77-based self-indexing with faster pattern matching. In Proceedings of the 11th Latin American theoretical informatics symposium (LATIN), LNCS 8392 (pp. 731\u2013742).","DOI":"10.1007\/978-3-642-54423-1_63"},{"key":"9297_CR18","doi-asserted-by":"crossref","unstructured":"Gagie, T., Hartikainen, A., K\u00e4rkk\u00e4inen, J., Navarro, G., Puglisi, S. J., & Sir\u00e9n, J. (2015). Document counting in compressed space. In Proceedings of the 25th data compression conference (DCC) (pp. 103\u2013112).","DOI":"10.1109\/DCC.2015.55"},{"key":"9297_CR19","doi-asserted-by":"crossref","unstructured":"Gagie, T., Karhu, K., Navarro, G., Puglisi, S. J., & Sir\u00e9n, J. (2013). Document listing on repetitive collections. In Proceedings of the 24th annual symposium on combinatorial pattern matching (CPM), LNCS 7922 (pp. 107\u2013119).","DOI":"10.1007\/978-3-642-38905-4_12"},{"key":"9297_CR20","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1016\/j.tcs.2011.12.002","volume":"426\u2013427","author":"T Gagie","year":"2012","unstructured":"Gagie, T., Navarro, G., & Puglisi, S. J. (2012b). New algorithms on wavelet trees and applications to information retrieval. Theoretical Computer Science, 426\u2013427, 25\u201341.","journal-title":"Theoretical Computer Science"},{"key":"9297_CR21","doi-asserted-by":"crossref","unstructured":"Gog, S., Beller, T., Moffat, A., & Petri, M. (2014). From theory to practice: Plug and play with succinct data structures. In Proceedings of the 13th international symposium on experimental algorithms (SEA), LNCS 8504 (pp. 326\u2013337).","DOI":"10.1007\/978-3-319-07959-2_28"},{"key":"9297_CR22","doi-asserted-by":"crossref","unstructured":"Gog, S., & Navarro, G. (2015a). Improved single-term top-k document retrieval. In Proceedings of the 17th workshop on algorithm engineering and experiments (ALENEX) (pp. 24\u201332).","DOI":"10.1137\/1.9781611973754.3"},{"key":"9297_CR23","doi-asserted-by":"crossref","unstructured":"Gog, S., & Navarro, G. (2015b). Improved single-term top-k document retrieval. In Proceedings of the 17th Workshop on Algorithm Engineering and Experiments (ALENEX) (pp. 24\u201332).","DOI":"10.1137\/1.9781611973754.3"},{"key":"9297_CR24","unstructured":"Grossi, R., Gupta, A., & Vitter, J. (2003). High-order entropy-compressed text indexes. In Proceedings of the 14th Annual ACM-SIAM Symposium on Discrete Algorithms (SODA) (pp. 841\u2013850)."},{"key":"9297_CR25","doi-asserted-by":"crossref","unstructured":"He, J., & Suel, T. (2012). Optimizing positional index structures for versioned document collections. In Proceedings of the 35th international ACM conference on research and development in information retrieval (SIGIR) (pp. 245\u2013254).","DOI":"10.1145\/2348283.2348319"},{"key":"9297_CR26","doi-asserted-by":"crossref","unstructured":"He, J., Yan, H., & Suel, T. (2009) Compact full-text indexing of versioned document collections. In Proceedings of the 18th ACM international conference on information and knowledge management (CIKM) (pp. 415\u2013424).","DOI":"10.1145\/1645953.1646008"},{"key":"9297_CR27","doi-asserted-by":"crossref","unstructured":"He, J., Zeng, J., & Suel, T. (2010). Improved index compression techniques for versioned document collections. In Proceedings of the 19th ACM international conference on information and knowledge management (CIKM) (pp. 1239\u20131248).","DOI":"10.1145\/1871437.1871594"},{"issue":"2","key":"9297_CR28","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/s10115-013-0648-4","volume":"40","author":"C Hern\u00e1ndez","year":"2014","unstructured":"Hern\u00e1ndez, C., & Navarro, G. (2014). Compressed representations for web and social graphs. Knowledge and Information Systems, 40(2), 279\u2013313.","journal-title":"Knowledge and Information Systems"},{"key":"9297_CR29","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1007\/978-3-642-40273-9_22","volume":"8066","author":"WK Hon","year":"2013","unstructured":"Hon, W. K., Patil, M., Shah, R., Thankachan, S. V., & Vitter, J. S. (2013). Indexes for document retrieval with relevance. Space-Efficient Data Structures, Streams, and Algorithms, LNCS, 8066, 351\u2013362.","journal-title":"Space-Efficient Data Structures, Streams, and Algorithms, LNCS"},{"key":"9297_CR30","doi-asserted-by":"crossref","unstructured":"K\u00e4rkk\u00e4inen, J., Kempa, D., & Puglisi, S. J. (2015). Parallel external memory suffix sorting. In Proceedings of the 26th annual symposium on combinatorial pattern matching (CPM), LNCS 9133 (pp. 329\u2013342).","DOI":"10.1007\/978-3-319-19929-0_28"},{"key":"9297_CR31","doi-asserted-by":"crossref","unstructured":"Konow, R., & Navarro, G. (2013). Faster compact top-k document retrieval. In Proceedings of the 23rd data compression conference (DCC) (pp. 351\u2013360).","DOI":"10.1109\/DCC.2013.43"},{"key":"9297_CR32","doi-asserted-by":"crossref","first-page":"115","DOI":"10.1016\/j.tcs.2012.02.006","volume":"483","author":"S Kreft","year":"2013","unstructured":"Kreft, S., & Navarro, G. (2013). On compressing and indexing repetitive sequences. Theoretical Computer Science, 483, 115\u2013133.","journal-title":"Theoretical Computer Science"},{"issue":"11","key":"9297_CR33","doi-asserted-by":"crossref","first-page":"1722","DOI":"10.1109\/5.892708","volume":"88","author":"NJ Larsson","year":"2000","unstructured":"Larsson, N. J., & Moffat, A. (2000). Off-line dictionary-based compression. Proceedings of the IEEE, 88(11), 1722\u20131732.","journal-title":"Proceedings of the IEEE"},{"key":"9297_CR34","unstructured":"Macdonald, C., McCreadie, R., Santos, R., & Ounis, I. (2012). From puppy to maturity: Experiences in developing Terrier. In Proceedings of the SIGIR 2012 workshop in open source information retrieval (pp. 60\u201363)."},{"issue":"3","key":"9297_CR35","doi-asserted-by":"crossref","first-page":"281","DOI":"10.1089\/cmb.2009.0169","volume":"17","author":"V M\u00e4kinen","year":"2010","unstructured":"M\u00e4kinen, V., Navarro, G., Sir\u00e9n, J., & V\u00e4lim\u00e4ki, N. (2010). Storage and retrieval of highly repetitive sequence collections. Journal of Computational Biology, 17(3), 281\u2013308.","journal-title":"Journal of Computational Biology"},{"issue":"5","key":"9297_CR36","doi-asserted-by":"crossref","first-page":"935","DOI":"10.1137\/0222058","volume":"22","author":"U Manber","year":"1993","unstructured":"Manber, U., & Myers, G. (1993). Suffix arrays: A new method for on-line string searches. SIAM Journal on Computing, 22(5), 935\u2013948.","journal-title":"SIAM Journal on Computing"},{"key":"9297_CR37","unstructured":"Marschall, T., et\u00a0al (2016). Computational pan-genomics: Status, promises and challenges. Tech. rep., Cold Spring Harbor bioRxiv. http:\/\/biorxiv.org\/content\/early\/2016\/03\/29\/043430 ."},{"key":"9297_CR38","unstructured":"Muthukrishnan, S. (2002). Efficient algorithms for document retrieval problems. In Proceedings of the 13th annual ACM\u2013SIAM symposium on discrete algorithms (SODA) (pp. 657\u2013666)."},{"issue":"1","key":"9297_CR39","doi-asserted-by":"crossref","first-page":"87","DOI":"10.1016\/S1570-8667(03)00066-2","volume":"2","author":"G Navarro","year":"2004","unstructured":"Navarro, G. (2004). Indexing text using the Ziv\u2013Lempel trie. Journal of Discrete Algorithms, 2(1), 87\u2013114.","journal-title":"Journal of Discrete Algorithms"},{"key":"9297_CR40","doi-asserted-by":"crossref","unstructured":"Navarro, G. (2014). Spaces, trees and colors: The algorithmic landscape of document retrieval on sequences. ACM Computing Surveys, 46(4), article 52.","DOI":"10.1145\/2535933"},{"key":"9297_CR41","doi-asserted-by":"crossref","unstructured":"Navarro, G., & M\u00e4kinen, V. (2007). Compressed full-text indexes. ACM Computing Surveys, 39(1), article\u00a02.","DOI":"10.1145\/1216370.1216372"},{"key":"9297_CR42","doi-asserted-by":"crossref","unstructured":"Navarro, G., & Nekrich, Y. (2012). Top-k document retrieval in optimal time and linear space. In Proceedings of the 23rd annual ACM\u2013SIAM symposium on discrete algorithms (SODA) (pp. 1066\u20131078).","DOI":"10.1137\/1.9781611973099.84"},{"key":"9297_CR43","doi-asserted-by":"crossref","unstructured":"Navarro, G., & Ord\u00f3\u00f1ez, A. (2014). Grammar compressed sequences with rank\/select support. In Proceedings of the 21st international symposium on string processing and information retrieval (SPIRE), LNCS 8799 (pp. 31\u201344).","DOI":"10.1007\/978-3-319-11918-2_4"},{"key":"9297_CR44","doi-asserted-by":"crossref","unstructured":"Navarro, G., Puglisi, S. J., & Sir\u00e9n, J. (2014a). Document retrieval on repetitive collections. In Proceedings of the 22nd annual european symposium on algorithms (ESA B), LNCS 8737 (pp. 725\u2013736).","DOI":"10.1007\/978-3-662-44777-2_60"},{"key":"9297_CR45","doi-asserted-by":"crossref","unstructured":"Navarro, G., Puglisi, S. J., & Valenzuela, D. (2014b). General document retrieval in compact space. ACM Journal of Experimental Algorithmics, 19(2), article 3.","DOI":"10.1145\/2670128"},{"key":"9297_CR46","doi-asserted-by":"crossref","unstructured":"Okanohara, D., & Sadakane, K. (2007). Practical entropy-compressed rank\/select dictionary. In Proceedings of the 9th workshop on algorithm engineering and experiments (ALENEX) (pp. 60\u201370).","DOI":"10.1137\/1.9781611972870.6"},{"key":"9297_CR47","doi-asserted-by":"crossref","unstructured":"Raman, R., Raman, V., & Rao, S.S. (2007). Succinct indexable dictionaries with applications to encoding k-ary trees, prefix sums and multisets. ACM Transactions on Algorithms, 3(4), article\u00a043.","DOI":"10.1145\/1290672.1290680"},{"issue":"4","key":"9297_CR48","doi-asserted-by":"crossref","first-page":"364","DOI":"10.1109\/TSE.1975.6312866","volume":"1","author":"M Rochkind","year":"1975","unstructured":"Rochkind, M. (1975). The source code control system. IEEE Transactions on Software Engineering, 1(4), 364\u2013370.","journal-title":"IEEE Transactions on Software Engineering"},{"key":"9297_CR49","doi-asserted-by":"crossref","first-page":"12","DOI":"10.1016\/j.jda.2006.03.011","volume":"5","author":"K Sadakane","year":"2007","unstructured":"Sadakane, K. (2007). Succinct data structures for flexible text retrieval systems. Journal of Discrete Algorithms, 5, 12\u201322.","journal-title":"Journal of Discrete Algorithms"},{"key":"9297_CR50","doi-asserted-by":"crossref","unstructured":"Sir\u00e9n, J. (2009). Compressed suffix arrays for massive data. In Proceedings of the 16th symposium on string processing and information retrieval (SPIRE), LNCS 5721 (pp. 63\u201374).","DOI":"10.1007\/978-3-642-03784-9_7"},{"key":"9297_CR51","unstructured":"Sir\u00e9n, J. (2012). Compressed full-text indexes for highly repetitive collections. PhD thesis, University of Helsinki."},{"issue":"7","key":"9297_CR52","doi-asserted-by":"crossref","first-page":"e1002,195","DOI":"10.1371\/journal.pbio.1002195","volume":"13","author":"ZD Stephens","year":"2015","unstructured":"Stephens, Z. D., Lee, S. Y., Faghri, F., Campbell, R. H., Zhai, C., Efron, M. J., et al. (2015). Big data: Astronomical or genomical? PLoS Biology, 13(7), e1002,195.","journal-title":"PLoS Biology"},{"issue":"6","key":"9297_CR53","doi-asserted-by":"crossref","first-page":"1176","DOI":"10.1137\/0222070","volume":"22","author":"W Szpankowski","year":"1993","unstructured":"Szpankowski, W. (1993). A generalized suffix tree and its (un)expected asymptotic behaviors. SIAM Journal on Computing, 22(6), 1176\u20131198.","journal-title":"SIAM Journal on Computing"},{"key":"9297_CR54","doi-asserted-by":"crossref","unstructured":"V\u00e4lim\u00e4ki, N., & M\u00e4kinen, V. (2007) Space-efficient algorithms for document retrieval. In Proceedings of the 18th annual symposium on combinatorial pattern matching (CPM), LNCS 4580 (pp. 205\u2013215).","DOI":"10.1007\/978-3-540-73437-6_22"},{"key":"9297_CR55","doi-asserted-by":"crossref","unstructured":"Weiner, P. (1973). Linear pattern matching algorithm. In Proceedings of the 14th annual IEEE symposium on switching and automata theory (pp. 1\u201311).","DOI":"10.1109\/SWAT.1973.13"}],"container-title":["Information Retrieval Journal"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10791-017-9297-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10791-017-9297-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10791-017-9297-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,10,4]],"date-time":"2020-10-04T20:40:37Z","timestamp":1601844037000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10791-017-9297-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,4,1]]},"references-count":55,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2017,6]]}},"alternative-id":["9297"],"URL":"https:\/\/doi.org\/10.1007\/s10791-017-9297-7","relation":{},"ISSN":["1386-4564","1573-7659"],"issn-type":[{"value":"1386-4564","type":"print"},{"value":"1573-7659","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,4,1]]}}}