{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T09:03:16Z","timestamp":1775638996577,"version":"3.50.1"},"reference-count":101,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2020,9,19]],"date-time":"2020-09-19T00:00:00Z","timestamp":1600473600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,9,19]],"date-time":"2020-09-19T00:00:00Z","timestamp":1600473600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61802412"],"award-info":[{"award-number":["61802412"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["The VLDB Journal"],"published-print":{"date-parts":[[2021,3]]},"DOI":"10.1007\/s00778-020-00636-3","type":"journal-article","created":{"date-parts":[[2020,9,19]],"date-time":"2020-09-19T07:03:07Z","timestamp":1600498987000},"page":"163-188","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":62,"title":["TADOC: Text analytics directly on compression"],"prefix":"10.1007","volume":"30","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1983-7321","authenticated-orcid":false,"given":"Feng","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Jidong","family":"Zhai","sequence":"additional","affiliation":[]},{"given":"Xipeng","family":"Shen","sequence":"additional","affiliation":[]},{"given":"Dalin","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Zheng","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Onur","family":"Mutlu","sequence":"additional","affiliation":[]},{"given":"Wenguang","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Xiaoyong","family":"Du","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,9,19]]},"reference":[{"key":"636_CR1","unstructured":"Amazon elastic compute cloud (Amazon EC2). https:\/\/aws.amazon.com\/ec2\/"},{"key":"636_CR2","unstructured":"Re-Pair compression and decompression. https:\/\/users.dcc.uchile.cl\/~gnavarro\/software\/index.html (2010)"},{"key":"636_CR3","unstructured":"word2vec. https:\/\/code.google.com\/archive\/p\/word2vec\/ (2013)"},{"key":"636_CR4","unstructured":"C++ B-tree. https:\/\/code.google.com\/archive\/p\/cpp-btree\/ (2017)"},{"key":"636_CR5","unstructured":"Wikipedia HTML data dumps. https:\/\/dumps.wikimedia.org\/enwiki\/ (2017)"},{"key":"636_CR6","unstructured":"FM-index. https:\/\/en.wikipedia.org\/wiki\/FM-index (2018)"},{"key":"636_CR7","unstructured":"zstd. https:\/\/facebook.github.io\/zstd\/ (2020)"},{"key":"636_CR8","unstructured":"Agarwal, R., Khandelwal, A., Stoica, I.: Succinct: enabling queries on compressed data. In: NSDI (2015)"},{"key":"636_CR9","unstructured":"Ahmad, F., Lee, S., Thottethodi, M., Vijaykumar, T.: PUMA: Purdue MapReduce Benchmarks Suite (2012)"},{"key":"636_CR10","unstructured":"Bille, P., Christiansen, A.R., Cording, P.H., G\u00f8rtz, I.L.: Finger search in grammar-compressed strings (2015). arXiv preprint arXiv:1507.02853"},{"key":"636_CR11","doi-asserted-by":"publisher","first-page":"513","DOI":"10.1137\/130936889","volume":"43","author":"P Bille","year":"2015","unstructured":"Bille, P., Landau, G.M., Raman, R., Sadakane, K., Satti, S.R., Weimann, O.: Random access to grammar-compressed strings and trees. SIAM J. Comput. 43, 513\u2013539 (2015)","journal-title":"SIAM J. Comput."},{"key":"636_CR12","first-page":"993","volume":"3","author":"DM Blei","year":"2003","unstructured":"Blei, D.M., Ng, A.Y., Jordan, M.I.: Latent dirichlet allocation. J Mach Learn Res 3, 993\u20131022 (2003)","journal-title":"J Mach Learn Res"},{"key":"636_CR13","doi-asserted-by":"crossref","unstructured":"Blumenstock, J.E.: Size matters: word count as a measure of quality on Wikipedia. In: WWW (2008)","DOI":"10.1145\/1367497.1367673"},{"key":"636_CR14","doi-asserted-by":"crossref","unstructured":"Boroumand, A., Ghose, S., Kim, Y., Ausavarungnirun, R., Shiu, E., Thakur, R., Kim, D., Kuusela, A., Knies, A., Ranganathan, P., Mutlu, O.: Google workloads for consumer devices: mitigating data movement bottlenecks. In: ASPLOS (2018)","DOI":"10.1145\/3173162.3173177"},{"key":"636_CR15","unstructured":"Borthakur, D.: HDFS architecture guide. HADOOP APACHE PROJECT http:\/\/hadoop.apache.org\/common\/docs\/current\/hdfs design. pdf (2008)"},{"key":"636_CR16","doi-asserted-by":"publisher","first-page":"106","DOI":"10.1016\/j.ins.2019.01.035","volume":"483","author":"NR Brisaboa","year":"2019","unstructured":"Brisaboa, N.R., G\u00f3mez-Brand\u00f3n, A., Navarro, G., Param\u00e1, J.R.: Gract: a grammar-based compressed index for trajectory data. Inf. Sci. 483, 106\u2013135 (2019)","journal-title":"Inf. Sci."},{"key":"636_CR17","unstructured":"Burrows, M., Wheeler, D.J.: A block-sorting lossless data compression algorithm (1994)"},{"key":"636_CR18","unstructured":"Carbone, P., Katsifodimos, A., Ewen, S., Markl, V., Haridi, S., Tzoumas, K.: Apache flink: stream and batch processing in a single engine. Bull. IEEE Comput. Soc. Tech. Comm. Data Eng. (2015)"},{"key":"636_CR19","doi-asserted-by":"crossref","unstructured":"Charikar, M., Lehman, E., Liu, D., Panigrahy, R., Prabhakaran, M., Sahai, A., Shelat, A.: The smallest grammar problem. IEEE Trans. Inf. Theory (2005)","DOI":"10.1109\/TIT.2005.850116"},{"key":"636_CR20","doi-asserted-by":"crossref","unstructured":"Chilimbi, T.M.: Efficient representations and abstractions for quantifying and exploiting data reference locality. In: PLDI (2001)","DOI":"10.1145\/378795.378840"},{"key":"636_CR21","doi-asserted-by":"crossref","unstructured":"Chilimbi, T.M., Hirzel, M.: Dynamic hot data stream prefetching for general-purpose programs. In: PLDI (2002)","DOI":"10.1145\/512529.512554"},{"key":"636_CR22","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1162\/tacl_a_00104","volume":"4","author":"JP Chiu","year":"2016","unstructured":"Chiu, J.P., Nichols, E.: Named entity recognition with bidirectional LSTM-CNNs. Trans. Assoc. Comput. Linguist. 4, 357\u2013370 (2016)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"636_CR23","volume-title":"Introduction to Algorithms","author":"TH Cormen","year":"2009","unstructured":"Cormen, T.H.: Introduction to Algorithms. MIT Press, Cambridge (2009)"},{"key":"636_CR24","doi-asserted-by":"crossref","unstructured":"Farruggia, A., Ferragina, P., Venturini, R.: Bicriteria data compression: efficient and usable. In: European Symposium on Algorithms (2014)","DOI":"10.1137\/1.9781611973402.115"},{"key":"636_CR25","first-page":"1","volume":"13","author":"P Ferragina","year":"2009","unstructured":"Ferragina, P., Gonz\u00e1lez, R., Navarro, G., Venturini, R.: Compressed text indexes: from theory to practice. J. Exp. Algorithm (JEA) 13, 1\u201312 (2009)","journal-title":"J. Exp. Algorithm (JEA)"},{"key":"636_CR26","unstructured":"Ferragina, P., Manzini, G.: Opportunistic data structures with applications. In: Proceedings 41st Annual Symposium on Foundations of Computer Science (2000)"},{"key":"636_CR27","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1016\/S0020-0255(01)00098-6","volume":"135","author":"P Ferragina","year":"2001","unstructured":"Ferragina, P., Manzini, G.: An experimental study of a compressed index. Inf. Sci. 135, 13\u201328 (2001)","journal-title":"Inf. Sci."},{"key":"636_CR28","unstructured":"Ferragina, P., Manzini, G.: An experimental study of an opportunistic index. In: Proceedings of the Twelfth Annual ACM-SIAM Symposium on Discrete Algorithms (2001)"},{"key":"636_CR29","doi-asserted-by":"publisher","first-page":"552","DOI":"10.1145\/1082036.1082039","volume":"52","author":"P Ferragina","year":"2005","unstructured":"Ferragina, P., Manzini, G.: Indexing compressed text. J. ACM (JACM) 52, 552\u2013581 (2005)","journal-title":"J. ACM (JACM)"},{"key":"636_CR30","doi-asserted-by":"crossref","unstructured":"Ferragina, P., Nitto, I., Venturini, R.: On the bit-complexity of Lempel\u2013Ziv compression. In: Proceedings of the Twentieth Annual ACM-SIAM Symposium on Discrete Algorithms (2009)","DOI":"10.1137\/1.9781611973068.84"},{"key":"636_CR31","doi-asserted-by":"crossref","unstructured":"Gagie, T., Gawrychowski, P., K\u00e4rkk\u00e4inen, J., Nekrich, Y., Puglisi, S.J.: A faster grammar-based self-index. In: International Conference on Language and Automata Theory and Applications (2012)","DOI":"10.1007\/978-3-642-28332-1_21"},{"key":"636_CR32","doi-asserted-by":"crossref","unstructured":"Ganardi, M., Je\u017c, A., Lohrey, M.: Balancing straight-line programs. In: IEEE 60th Annual Symposium on Foundations of Computer Science (FOCS) (2019)","DOI":"10.1109\/FOCS.2019.00073"},{"key":"636_CR33","doi-asserted-by":"crossref","unstructured":"Ga\u0144czorz, M., Je\u017c, A.: Improvements on re-pair grammar compressor. In: Data Compression Conference (DCC) (2017)","DOI":"10.1109\/DCC.2017.52"},{"key":"636_CR34","doi-asserted-by":"crossref","unstructured":"Gog, S., Beller, T., Moffat, A., Petri, M.: From theory to practice: plug and play with succinct data structures. In: International Symposium on Experimental Algorithms (2014)","DOI":"10.1007\/978-3-319-07959-2_28"},{"key":"636_CR35","unstructured":"Gonzalez, J.E., Low, Y., Gu, H., Bickson, D., Guestrin, C.: PowerGraph: distributed graph-parallel computation on natural graphs. In: OSDI (2012)"},{"key":"636_CR36","unstructured":"Grossi, R., Gupta, A., Vitter, J.S.: High-order entropy-compressed text indexes. In: Proceedings of the Fourteenth Annual ACM-SIAM Symposium on Discrete Algorithms (2003)"},{"key":"636_CR37","unstructured":"Grossi, R., Gupta, A., Vitter, J.S.: When indexing equals compression: experiments with compressing suffix arrays and applications. In: Proceedings of the Fifteenth Annual ACM-SIAM Symposium on Discrete Algorithms (2004)"},{"key":"636_CR38","doi-asserted-by":"publisher","first-page":"378","DOI":"10.1137\/S0097539702402354","volume":"35","author":"R Grossi","year":"2005","unstructured":"Grossi, R., Vitter, J.S.: Compressed suffix arrays and suffix trees with applications to text indexing and string matching. SIAM J. Comput. 35, 378\u2013407 (2005)","journal-title":"SIAM J. Comput."},{"key":"636_CR39","unstructured":"Hon, W.-K., Lam, T.W., Sung, W.-K., Tse, W.-L., Wong, C.-K., Yiu, S.-M.: Practical aspects of compressed suffix arrays and FM-index in searching DNA sequences. In: ALENEX\/ANALC (2004)"},{"key":"636_CR40","doi-asserted-by":"crossref","unstructured":"Huang, S., Huang, J., Dai, J., Xie, T., Huang, B.: The HiBench benchmark suite: characterization of the MapReduce-based data analysis. In: New Frontiers in Information and Software as Services (2011)","DOI":"10.1007\/978-3-642-19294-4_9"},{"key":"636_CR41","unstructured":"Joachims, T.: A Probabilistic Analysis of the Rocchio Algorithm with TFIDF for Text Categorization. Technical report, Carnegie-Mellon Univ Pittsburgh Pa Dept of Computer Science (1996)"},{"key":"636_CR42","unstructured":"Khandelwal, A., Agarwal, R., Stoica, I.: Blowfish: dynamic storage-performance tradeoff in data stores. In: NSDI (2016)"},{"key":"636_CR43","doi-asserted-by":"crossref","unstructured":"Koiwa, T., Ohwada, H.: Extraction of disease-related genes from PubMed paper using word2vec. In: Proceedings of the 8th International Conference on Computational Systems-Biology and Bioinformatics (2017)","DOI":"10.1145\/3156346.3156355"},{"key":"636_CR44","doi-asserted-by":"publisher","first-page":"1149","DOI":"10.1002\/(SICI)1097-024X(199911)29:13<1149::AID-SPE274>3.0.CO;2-O","volume":"29","author":"S Kurtz","year":"1999","unstructured":"Kurtz, S.: Reducing the space requirement of suffix trees. Softw. Pract. Exp. 29, 1149\u20131171 (1999)","journal-title":"Softw. Pract. Exp."},{"key":"636_CR45","doi-asserted-by":"crossref","unstructured":"Larsson, N.J., Moffat, A.: Off-line dictionary-based compression. In: Proceedings of the IEEE (2000)","DOI":"10.1109\/5.892708"},{"key":"636_CR46","doi-asserted-by":"crossref","unstructured":"Larus, J.R.: Whole program paths. In: PLDI (1999)","DOI":"10.1145\/301618.301678"},{"key":"636_CR47","doi-asserted-by":"crossref","unstructured":"Lau, J., Perelman, E., Hamerly, G., Sherwood, T., Calder, B.: Motivation for variable length intervals and hierarchical phase behavior. In: International Symposium on Performance Analysis of Systems and Software (2005)","DOI":"10.1109\/ISPASS.2005.1430568"},{"key":"636_CR48","doi-asserted-by":"crossref","unstructured":"Law, J., Rothermel, G.: Whole program path-based dynamic impact analysis. In: ICSE (2003)","DOI":"10.1109\/ICSE.2003.1201210"},{"key":"636_CR49","doi-asserted-by":"crossref","unstructured":"Lebart, L.: Classification problems in text analysis and information retrieval. In: Advances in Data Science and Classification (1998)","DOI":"10.1007\/978-3-642-72253-0_63"},{"key":"636_CR50","unstructured":"Levenshtein, V.I.: Binary codes capable of correcting deletions, insertions, and reversals. In: Soviet Physics Doklady (1966)"},{"key":"636_CR51","unstructured":"Lichman, M.: UCI machine learning repository. http:\/\/archive.ics.uci.edu\/ml (2013)"},{"key":"636_CR52","doi-asserted-by":"crossref","unstructured":"Lin, Y., Zhang, Y., Li, Q., Yang, J.: Supporting efficient query processing on compressed XML files. In: Proceedings of ACM Symposium on Applied Computing (2005)","DOI":"10.1145\/1066677.1066827"},{"key":"636_CR53","first-page":"1","volume":"2","author":"Z Liu","year":"2011","unstructured":"Liu, Z., Zhang, Y., Chang, E.Y., Sun, M.: PLDA+: Parallel latent Dirichlet allocation with data placement and pipeline processing. ACM Trans. Intell. Syst. Technol. 2, 1\u201318 (2011)","journal-title":"ACM Trans. Intell. Syst. Technol."},{"key":"636_CR54","doi-asserted-by":"crossref","unstructured":"Mackenzie, J., Mallia, A., Petri, M., Culpepper, J.S., Suel, T.: Compressing inverted indexes with recursive graph bisection: a reproducibility study. In: European Conference on Information Retrieval (2019)","DOI":"10.1007\/978-3-030-15712-8_22"},{"key":"636_CR55","doi-asserted-by":"crossref","unstructured":"Malewicz, G., Austern, M.H., Bik, A.J., Dehnert, J.C., Horn, I., Leiser, N., Czajkowski, G.: Pregel: a system for large-scale graph processing. In: SIGMOD (2010)","DOI":"10.1145\/1807167.1807184"},{"key":"636_CR56","doi-asserted-by":"publisher","first-page":"935","DOI":"10.1137\/0222058","volume":"22","author":"U Manber","year":"1993","unstructured":"Manber, U., Myers, G.: Suffix arrays: a new method for on-line string searches. SIAM J. Comput. 22, 935\u2013948 (1993)","journal-title":"SIAM J. Comput."},{"key":"636_CR57","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4842-1251-6","volume-title":"Practical Graph Analytics with Apache Giraph","author":"C Martella","year":"2015","unstructured":"Martella, C., Shaposhnik, R., Logothetis, D., Harenberg, S.: Practical Graph Analytics with Apache Giraph. Springer, Berlin (2015)"},{"key":"636_CR58","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1142\/S0218213004001466","volume":"13","author":"Y Matsuo","year":"2004","unstructured":"Matsuo, Y., Ishizuka, M.: Keyword extraction from a single document using word co-occurrence statistical information. Int. J. Artif. Intell. Tools 13, 157\u2013169 (2004)","journal-title":"Int. J. Artif. Intell. Tools"},{"key":"636_CR59","unstructured":"Mitsui, K.: Information retrieval based on rank-ordered cumulative query scores calculated from weights of all keywords in an inverted index file for minimizing access to a main database, 1993. US Patent 5,263,159"},{"key":"636_CR60","doi-asserted-by":"crossref","unstructured":"Moffat, A., Petri, M.: Index compression using byte-aligned ANS coding and two-dimensional contexts. In: WSDM (2018)","DOI":"10.1145\/3159652.3159663"},{"key":"636_CR61","unstructured":"Monge, A.E., Elkan, C., et al.: The field matching problem: algorithms and applications. In: Proceedings of the International Conference on Knowledge Discovery and Data Mining (1996)"},{"key":"636_CR62","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781316588284","volume-title":"Compact Data Structures: A Practical Approach","author":"G Navarro","year":"2016","unstructured":"Navarro, G.: Compact Data Structures: A Practical Approach. Cambridge University Press, Cambridge (2016)"},{"key":"636_CR63","unstructured":"Nevill-Manning, C.G.: Inferring sequential structure. PhD thesis, University of Waikato (1996)"},{"key":"636_CR64","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1093\/comjnl\/40.2_and_3.103","volume":"40","author":"CG Nevill-Manning","year":"1997","unstructured":"Nevill-Manning, C.G., Witten, I.H.: Compression and explanation using hierarchical grammars. Comput. J. 40, 103\u2013116 (1997)","journal-title":"Comput. J."},{"key":"636_CR65","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1613\/jair.374","volume":"7","author":"CG Nevill-Manning","year":"1997","unstructured":"Nevill-Manning, C.G., Witten, I.H.: Identifying hierarchical structure in sequences: a linear-time algorithm. J. Artif. Intell. Res. (JAIR) 7, 67\u201382 (1997)","journal-title":"J. Artif. Intell. Res. (JAIR)"},{"key":"636_CR66","unstructured":"Nevill-Manning, C.G., Witten, I.H.: Linear-time, incremental hierarchy inference for compression. In: Data Compression Conference (1997)"},{"key":"636_CR67","volume-title":"Pthreads Programming: A POSIX Standard for Better Multiprocessing","author":"B Nichols","year":"1996","unstructured":"Nichols, B., Buttlar, D., Farrell, J.: Pthreads Programming: A POSIX Standard for Better Multiprocessing. O\u2019Reilly Media Inc, Sebastopol (1996)"},{"key":"636_CR68","doi-asserted-by":"crossref","unstructured":"Oosterhuis, H., Culpepper, J.S., de Rijke, M.: The potential of learned index structures for index compression. In: Proceedings of the 23rd Australasian Document Computing Symposium (2018)","DOI":"10.1145\/3291992.3291993"},{"key":"636_CR69","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., Thirion, B., Grisel, O., Blondel, M., Prettenhofer, P., Weiss, R., Dubourg, V., et al.: Scikit-learn: machine learning in Python. J. Mach. Learn. Res. 12, 2825\u20132830 (2011)","journal-title":"J. Mach. Learn. Res."},{"key":"636_CR70","doi-asserted-by":"crossref","unstructured":"Pekhimenko, G., Seshadri, V., Kim, Y., Xin, H., Mutlu, O., Gibbons, P.B., Kozuch, M.A., Mowry, T.C.: Linearly compressed pages: a low-complexity, low-latency main memory compression framework. In: MICRO (2013)","DOI":"10.1145\/2540708.2540724"},{"key":"636_CR71","volume-title":"Linguistic Inquiry and Word Count: LIWC 2001","author":"JW Pennebaker","year":"2001","unstructured":"Pennebaker, J.W., Francis, M.E., Booth, R.J.: Linguistic Inquiry and Word Count: LIWC 2001. Lawrence Erlbaum Associates, Mahway (2001)"},{"key":"636_CR72","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.: Glove: Global vectors for word representation. In: EMNLP (2014)","DOI":"10.3115\/v1\/D14-1162"},{"key":"636_CR73","doi-asserted-by":"publisher","first-page":"974","DOI":"10.1002\/spe.2556","volume":"48","author":"M Petri","year":"2018","unstructured":"Petri, M., Moffat, A.: Compact inverted index storage using general-purpose compression libraries. Softw. Pract. Exp. 48, 974\u2013982 (2018)","journal-title":"Softw. Pract. Exp."},{"key":"636_CR74","doi-asserted-by":"crossref","unstructured":"Petroni, F., Querzoni, L., Daudjee, K., Kamali, S., Iacoboni, G.: HDRF: stream-based partitioning for power-law graphs. In: CIKM (2015)","DOI":"10.1145\/2806416.2806424"},{"key":"636_CR75","unstructured":"Pibiri, G.E., Perego, R., Venturini, R.: Compressed Indexes for Fast Search of Semantic Data. TKDE (2020)"},{"key":"636_CR76","doi-asserted-by":"crossref","unstructured":"Pibiri, G.E., Petri, M., Moffat, A.: Fast dictionary-based compression for inverted indexes. In: WSDM (2019)","DOI":"10.1145\/3289600.3290962"},{"key":"636_CR77","doi-asserted-by":"crossref","unstructured":"Pibiri, G.E., Venturini, R.: Techniques for inverted index compression. arXiv preprint arXiv:1908.10598 (2019)","DOI":"10.1007\/978-3-319-77525-8_52"},{"key":"636_CR78","doi-asserted-by":"crossref","unstructured":"Popov, I.: Malware detection using machine learning based on word2vec embeddings of machine code instructions. In: 2017 Siberian Symposium on Data Science and Engineering (SSDSE) (2017)","DOI":"10.1109\/SSDSE.2017.8071952"},{"key":"636_CR79","unstructured":"Rong, X.: word2vec parameter learning explained. arXiv preprint arXiv:1411.2738 (2014)"},{"key":"636_CR80","doi-asserted-by":"crossref","unstructured":"Rytter, W.: Grammar compression, lz-encodings, and string algorithms with implicit input. In: International Colloquium on Automata, Languages, and Programming (2004)","DOI":"10.1007\/978-3-540-27836-8_5"},{"key":"636_CR81","doi-asserted-by":"crossref","unstructured":"Sadakane, K.: Compressed text databases with efficient query algorithms based on the compressed suffix array. In: International Symposium on Algorithms and Computation (2000)","DOI":"10.1007\/3-540-40996-3_35"},{"key":"636_CR82","unstructured":"Sadakane, K.: Succinct representations of LCP information and improvements in the compressed suffix arrays. In: Proceedings of the Thirteenth Annual ACM-SIAM Symposium on Discrete Algorithms (2002)"},{"key":"636_CR83","doi-asserted-by":"publisher","first-page":"294","DOI":"10.1016\/S0196-6774(03)00087-7","volume":"48","author":"K Sadakane","year":"2003","unstructured":"Sadakane, K.: New text indexing functionalities of the compressed suffix arrays. J. Algorithms 48, 294\u2013313 (2003)","journal-title":"J. Algorithms"},{"key":"636_CR84","doi-asserted-by":"publisher","first-page":"589","DOI":"10.1007\/s00224-006-1198-x","volume":"41","author":"K Sadakane","year":"2007","unstructured":"Sadakane, K.: Compressed suffix trees with full functionality. Theory Comput. Syst. 41, 589\u2013607 (2007)","journal-title":"Theory Comput. Syst."},{"key":"636_CR85","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1016\/j.jda.2006.03.011","volume":"5","author":"K Sadakane","year":"2007","unstructured":"Sadakane, K.: Succinct data structures for flexible text retrieval systems. J. Discrete Algorithms 5, 12\u201322 (2007)","journal-title":"J. Discrete Algorithms"},{"key":"636_CR86","first-page":"133","volume":"10","author":"M Sharma","year":"2010","unstructured":"Sharma, M.: Compression using Huffman coding. IJCSNS Int. J. Comput. Sci. Netw. Secur. 10, 133\u2013141 (2010)","journal-title":"IJCSNS Int. J. Comput. Sci. Netw. Secur."},{"key":"636_CR87","unstructured":"Takabatake, Y., Sakamoto, H., et al.: A space-optimal grammar compression. In: 25th Annual European Symposium on Algorithms (2017)"},{"key":"636_CR88","doi-asserted-by":"crossref","unstructured":"Vasile, F., Smirnova, E., Conneau, A.: Meta-prod2vec: Product embeddings using side-information for recommendation. In: Proceedings of the 10th ACM Conference on Recommender Systems (2016)","DOI":"10.1145\/2959100.2959160"},{"key":"636_CR89","doi-asserted-by":"crossref","unstructured":"Walkinshaw, N., Afshan, S., McMinn, P.: Using compression algorithms to support the comprehension of program traces. In: Proceedings of the Eighth International Workshop on Dynamic Analysis (2010)","DOI":"10.1145\/1868321.1868323"},{"key":"636_CR90","unstructured":"Whang, K.-Y., Park, B.-K., Han, W.-S., Lee, Y.-K.: Inverted index storage structure using subindexes and large objects for tight coupling of information retrieval with database management systems, 2002. US Patent 6,349,308"},{"key":"636_CR91","doi-asserted-by":"crossref","unstructured":"Xin, R.S., Gonzalez, J.E., Franklin, M.J., Stoica, I.: GraphX: A resilient distributed graph system on spark. In: First International Workshop on Graph Data Management Experiences and Systems (2013)","DOI":"10.1145\/2484425.2484427"},{"key":"636_CR92","doi-asserted-by":"crossref","unstructured":"Xu, A., Liu, Z., Guo, Y., Sinha, V., Akkiraju, R.: A new chatbot for customer service on social media. In: Proceedings of the 2017 CHI Conference on Human Factors in Computing Systems (2017)","DOI":"10.1145\/3025453.3025496"},{"key":"636_CR93","first-page":"95","volume":"10","author":"M Zaharia","year":"2010","unstructured":"Zaharia, M., Chowdhury, M., Franklin, M.J., Shenker, S., Stoica, I.: Spark: cluster computing with working sets. HotCloud 10, 95 (2010)","journal-title":"HotCloud"},{"key":"636_CR94","volume-title":"Lexical Acquisition: Exploiting On-Line Resources to Build a Lexicon","author":"U Zernik","year":"1991","unstructured":"Zernik, U.: Lexical Acquisition: Exploiting On-Line Resources to Build a Lexicon. Psychology Press, Milton Park (1991)"},{"key":"636_CR95","doi-asserted-by":"crossref","unstructured":"Zhang, C., Naughton, J., DeWitt, D., Luo, Q., Lohman, G.: On supporting containment queries in relational database management systems. In: SIGMOD (2001)","DOI":"10.1145\/375663.375722"},{"key":"636_CR96","doi-asserted-by":"crossref","unstructured":"Zhang, F., Wu, B., Zhai, J., He, B., Chen, W., Du, X.: Automatic Irregularity-Aware Fine-Grained Workload Partitioning on Integrated Architectures. TKDE (2019)","DOI":"10.1109\/TKDE.2019.2940184"},{"key":"636_CR97","doi-asserted-by":"crossref","unstructured":"Zhang, F., Zhai, J., Shen, X., Mutlu, O., Chen, W.: Efficient document analytics on compressed data: method, challenges, algorithms, insights. PVLDB (2018)","DOI":"10.14778\/3236187.3236203"},{"key":"636_CR98","doi-asserted-by":"crossref","unstructured":"Zhang, F., Zhai, J., Shen, X., Mutlu, O., Chen, W.: Zwift: A programming framework for high performance text analytics on compressed data. In: ICS (2018)","DOI":"10.1145\/3205289.3205325"},{"key":"636_CR99","doi-asserted-by":"crossref","unstructured":"Zhang, F., Zhai, J., Shen, X., Mutlu, O., Du, X.: Enabling efficient random access to hierarchically-compressed data. In: ICDE (2020)","DOI":"10.1109\/ICDE48307.2020.00097"},{"key":"636_CR100","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1109\/TIT.1977.1055714","volume":"23","author":"J Ziv","year":"1977","unstructured":"Ziv, J., Lempel, A.: A universal algorithm for sequential data compression. IEEE Trans. Inf. Theory 23, 337\u2013343 (1977)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"636_CR101","doi-asserted-by":"publisher","first-page":"6-es","DOI":"10.1145\/1132956.1132959","volume":"38","author":"J Zobel","year":"2006","unstructured":"Zobel, J., Moffat, A.: Inverted files for text search engines. ACM Comput. Surv. CSUR 38, 6-es (2006)","journal-title":"ACM Comput. Surv. CSUR"}],"container-title":["The VLDB Journal"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-020-00636-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00778-020-00636-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-020-00636-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,9,19]],"date-time":"2021-09-19T00:58:25Z","timestamp":1632013105000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00778-020-00636-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,9,19]]},"references-count":101,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2021,3]]}},"alternative-id":["636"],"URL":"https:\/\/doi.org\/10.1007\/s00778-020-00636-3","relation":{},"ISSN":["1066-8888","0949-877X"],"issn-type":[{"value":"1066-8888","type":"print"},{"value":"0949-877X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,9,19]]},"assertion":[{"value":"8 October 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 July 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 September 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 September 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}