{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T21:38:09Z","timestamp":1743111489543,"version":"3.40.3"},"publisher-location":"Cham","reference-count":44,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030731960"},{"type":"electronic","value":"9783030731977"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-73197-7_50","type":"book-chapter","created":{"date-parts":[[2021,4,6]],"date-time":"2021-04-06T19:03:01Z","timestamp":1617735781000},"page":"721-737","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Accurate Cardinality Estimation of Co-occurring Words Using Suffix Trees"],"prefix":"10.1007","author":[{"given":"Jens","family":"Willkomm","sequence":"first","affiliation":[]},{"given":"Martin","family":"Sch\u00e4ler","sequence":"additional","affiliation":[]},{"given":"Klemens","family":"B\u00f6hm","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,4,6]]},"reference":[{"key":"50_CR1","doi-asserted-by":"publisher","unstructured":"Adams, E., Meltzer, A.: Trigrams as index element in full text retrieval: observations and experimental results. In: CSC, pp. 433\u2013439. ACM (1993). https:\/\/doi.org\/10.1145\/170791.170891","DOI":"10.1145\/170791.170891"},{"key":"50_CR2","doi-asserted-by":"publisher","unstructured":"Andersson, A., Nilsson, S.: Improved behaviour of tries by adaptive branching. Inf. Proc. Lett. 46, 295\u2013300 (1993). https:\/\/doi.org\/10.1016\/0020-0190(93)90068-k","DOI":"10.1016\/0020-0190(93)90068-k"},{"key":"50_CR3","doi-asserted-by":"publisher","unstructured":"Arz, J., Fischer, J.: LZ-compressed string dictionaries. In: DCC, IEEE (2014). https:\/\/doi.org\/10.1109\/dcc.2014.36","DOI":"10.1109\/dcc.2014.36"},{"key":"50_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1007\/978-3-319-67428-5_9","volume-title":"String Processing and Information Retrieval","author":"P Bille","year":"2017","unstructured":"Bille, P., Fernstr\u00f8m, F., G\u00f8rtz, I.L.: Tight bounds for top tree compression. In: Fici, G., Sciortino, M., Venturini, R. (eds.) SPIRE 2017. LNCS, vol. 10508, pp. 97\u2013102. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-67428-5_9"},{"key":"50_CR5","doi-asserted-by":"publisher","unstructured":"Bloom, B.: Space\/time trade-offs in hash coding with allowable errors. Commun. ACM 422\u2013426 (1970). https:\/\/doi.org\/10.1145\/362686.362692","DOI":"10.1145\/362686.362692"},{"key":"50_CR6","doi-asserted-by":"publisher","unstructured":"Blumer, A., Blumer, J., Haussler, D., Ehrenfeucht, A., Chen, M., Seiferas, J.: The smallest automation recognizing the subwords of a text. Theor. Comput. Sci. 31\u201355 (1985). https:\/\/doi.org\/10.1016\/0304-3975(85)90157-4","DOI":"10.1016\/0304-3975(85)90157-4"},{"key":"50_CR7","doi-asserted-by":"publisher","unstructured":"Blumer, A., Ehrenfeucht, A., Haussler, D.: Average sizes of suffix trees and DAWGs. Discrete Appl. Math. 37\u201345 (1989). https:\/\/doi.org\/10.1016\/0166-218x(92)90270-k","DOI":"10.1016\/0166-218x(92)90270-k"},{"issue":"1","key":"50_CR8","first-page":"31","volume":"18","author":"P Brown","year":"1992","unstructured":"Brown, P., Della, V., Mercer, R., Pietra, S., Lai, J.: An estimate of an upper bound for the entropy of English. Comput. Linguist. 18(1), 31\u201340 (1992)","journal-title":"Comput. Linguist."},{"key":"50_CR9","doi-asserted-by":"publisher","unstructured":"Chaudhuri, S., Ganti, V., Gravano, L.: Selectivity estimation for string predicates: Overcoming the underestimation problem. ICDE. IEEE (2004). https:\/\/doi.org\/10.1109\/icde.2004.1319999","DOI":"10.1109\/icde.2004.1319999"},{"key":"50_CR10","doi-asserted-by":"publisher","unstructured":"Claude, F., Navarro, G., Peltola, H., Salmela, L., Tarhio, J.: String matching with alphabet sampling. J. Discrete Algorithms 37\u201350 (2012). https:\/\/doi.org\/10.1016\/j.jda.2010.09.004","DOI":"10.1016\/j.jda.2010.09.004"},{"key":"50_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1561\/1900000004","volume":"4","author":"G Cormode","year":"2011","unstructured":"Cormode, G., Garofalakis, M., Haas, P., Jermaine, C.: Synopses for massive data: Samples, histograms, wavelets, sketches. Found. Trends Databases 4, 1\u2013294 (2011). https:\/\/doi.org\/10.1561\/1900000004","journal-title":"Found. Trends Databases"},{"key":"50_CR12","unstructured":"Dorohonceanu, B., Nevill-Manning, C.: Accelerating protein classification using suffix trees. In: ISMB, pp. 128\u2013133 (2000)"},{"key":"50_CR13","doi-asserted-by":"publisher","unstructured":"Ferragina, P., Manzini, G., M\u00e4kinen, V., Navarro, G.: Compressed representations of sequences and full-text indexes. ACM Trans. Algorithms 20 (2007). https:\/\/doi.org\/10.1145\/1240233.1240243","DOI":"10.1145\/1240233.1240243"},{"key":"50_CR14","doi-asserted-by":"publisher","unstructured":"Ferragina, P., Venturini, R.: The compressed permuterm index. ACM Trans. Algorithms 1\u201321 (2010). https:\/\/doi.org\/10.1145\/1868237.1868248","DOI":"10.1145\/1868237.1868248"},{"key":"50_CR15","doi-asserted-by":"publisher","unstructured":"Gog, S., Moffat, A., Culpepper, S., Turpin, A., Wirth, A.: Large-scale pattern search using reduced-space on-disk suffix arrays. TKDE 1918\u20131931 (2014). https:\/\/doi.org\/10.1109\/tkde.2013.129","DOI":"10.1109\/tkde.2013.129"},{"key":"50_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1007\/978-3-319-23826-5_28","volume-title":"String Processing and Information Retrieval","author":"S Grabowski","year":"2015","unstructured":"Grabowski, S., Raniszewski, M.: Sampling the suffix array with minimizers. In: Iliopoulos, C., Puglisi, S., Yilmaz, E. (eds.) SPIRE 2015. LNCS, vol. 9309, pp. 287\u2013298. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-23826-5_28"},{"key":"50_CR17","doi-asserted-by":"publisher","unstructured":"Grossi, R., Ottaviano, G.: Fast compressed tries through path decompositions. J. Exp. Algorithmics 11\u2013120 (2015). https:\/\/doi.org\/10.1145\/2656332","DOI":"10.1145\/2656332"},{"key":"50_CR18","doi-asserted-by":"publisher","unstructured":"Grossi, R., Vitter, J.: Compressed suffix arrays and suffix trees with applications to text indexing and string matching. SIAM J. Comput. 378\u2013407 (2005). https:\/\/doi.org\/10.1137\/s0097539702402354","DOI":"10.1137\/s0097539702402354"},{"key":"50_CR19","doi-asserted-by":"publisher","unstructured":"Hu, T., Tucker, A.: Optimal computer search trees and variable-length alphabetical codes. SIAM J. Appl. Math. 514\u2013532 (1971). https:\/\/doi.org\/10.1137\/0121057","DOI":"10.1137\/0121057"},{"key":"50_CR20","doi-asserted-by":"publisher","unstructured":"Huffman, D.: A method for the construction of minimum-redundancy codes. IRE 1098\u20131101 (1952). https:\/\/doi.org\/10.1109\/jrproc.1952.273898","DOI":"10.1109\/jrproc.1952.273898"},{"key":"50_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1007\/978-3-319-67428-5_19","volume-title":"String Processing and Information Retrieval","author":"S Kanda","year":"2017","unstructured":"Kanda, S., Morita, K., Fuketa, M.: Practical implementation of space-efficient dynamic keyword dictionaries. In: Fici, G., Sciortino, M., Venturini, R. (eds.) SPIRE 2017. LNCS, vol. 10508, pp. 221\u2013233. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-67428-5_19"},{"key":"50_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1007\/3-540-16761-7_67","volume-title":"Automata, Languages and Programming","author":"P Kirschenhofer","year":"1986","unstructured":"Kirschenhofer, P., Prodinger, H.: Some further results on digital search trees. In: Kott, L. (ed.) ICALP 1986. LNCS, vol. 226, pp. 177\u2013185. Springer, Heidelberg (1986). https:\/\/doi.org\/10.1007\/3-540-16761-7_67"},{"key":"50_CR23","doi-asserted-by":"publisher","unstructured":"Krishnan, P., Vitter, J., Iyer, B.: Estimating alphanumeric selectivity in the presence of wildcards. In: ACM SIGMOD Record, pp. 282\u2013293 (1996). https:\/\/doi.org\/10.1145\/235968.233341","DOI":"10.1145\/235968.233341"},{"key":"50_CR24","unstructured":"Kroeger, P.: Analyzing Grammar: An Introduction. Cambridge University Press, Cambridge (2015)"},{"key":"50_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1007\/3-540-61332-3_155","volume-title":"Computing and Combinatorics","author":"J K\u00e4rkk\u00e4inen","year":"1996","unstructured":"K\u00e4rkk\u00e4inen, J., Ukkonen, E.: Sparse suffix trees. In: Cai, J.-Y., Wong, C.K. (eds.) COCOON 1996. LNCS, vol. 1090, pp. 219\u2013230. Springer, Heidelberg (1996). https:\/\/doi.org\/10.1007\/3-540-61332-3_155"},{"key":"50_CR26","doi-asserted-by":"publisher","unstructured":"Larsson, N., Moffat, A.: Off-line dictionary-based compression. IEEE 1722\u20131732 (2000). https:\/\/doi.org\/10.1109\/5.892708","DOI":"10.1109\/5.892708"},{"key":"50_CR27","doi-asserted-by":"publisher","unstructured":"Leis, V., Gubichev, A., Mirchev, A., Boncz, P., Kemper, A., Neumann, T.: How good are query optimizers, really? VLDB Endowment 204\u2013215 (2015). https:\/\/doi.org\/10.14778\/2850583.2850594","DOI":"10.14778\/2850583.2850594"},{"key":"50_CR28","doi-asserted-by":"publisher","unstructured":"Li, D., Zhang, Q., Liang, X., Guan, J., Xu, Y.: Selectivity estimation for string predicates based on modified pruned count-suffix tree. CJE 76\u201382 (2015). https:\/\/doi.org\/10.1049\/cje.2015.01.013","DOI":"10.1049\/cje.2015.01.013"},{"key":"50_CR29","unstructured":"Manning, C., Sch\u00fctze, H.: Foundations of Statistical Natural Language Processing. MIT Press, Cambridge (1999)"},{"key":"50_CR30","unstructured":"Miner, G., Elder, J., Fast, A., Hill, T., Nisbet, R., Delen, D.: Practical Text Mining and Statistical Analysis for Non-structured Text Data Applications. Academic Press, Waltham (2012)"},{"key":"50_CR31","doi-asserted-by":"publisher","unstructured":"Moerkotte, G., DeHaan, D., May, N., Nica, A., Boehm, A.: Exploiting ordered dictionaries to efficiently construct histograms with q-error guarantees in SAP HANA. In: ACM SIGMOD (2014). https:\/\/doi.org\/10.1145\/2588555.2595629","DOI":"10.1145\/2588555.2595629"},{"key":"50_CR32","doi-asserted-by":"publisher","unstructured":"Moerkotte, G., Neumann, T., Steidl, G.: Preventing bad plans by bounding the impact of cardinality estimation errors. VLDB Endowment 982\u2013993 (2009). https:\/\/doi.org\/10.14778\/1687627.1687738","DOI":"10.14778\/1687627.1687738"},{"key":"50_CR33","doi-asserted-by":"publisher","unstructured":"Moradi, H., Grzymala-Busse, J., Roberts, J.: Entropy of english text: Experiments with humans and a machine learning system based on rough sets. Inf. Sci. 31\u201347 (1998). https:\/\/doi.org\/10.1016\/s0020-0255(97)00074-1","DOI":"10.1016\/s0020-0255(97)00074-1"},{"key":"50_CR34","doi-asserted-by":"publisher","unstructured":"M\u00fcller, M., Moerkotte, G., Kolb, O.: Improved selectivity estimation by combining knowledge from sampling and synopses. VLDB Endowment 1016\u20131028 (2018). https:\/\/doi.org\/10.14778\/3213880.3213882","DOI":"10.14778\/3213880.3213882"},{"key":"50_CR35","doi-asserted-by":"publisher","unstructured":"Nilsson, S., Tikkanen, M.: An experimental study of compression methods for dynamic tries. Algorithmica 33, 19\u201333 (2002). https:\/\/doi.org\/10.1007\/s00453-001-0102-y","DOI":"10.1007\/s00453-001-0102-y"},{"key":"50_CR36","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"324","DOI":"10.1007\/978-3-319-23826-5_31","volume-title":"String Processing and Information Retrieval","author":"A Poyias","year":"2015","unstructured":"Poyias, A., Raman, R.: Improved practical compact dynamic tries. In: Iliopoulos, C., Puglisi, S., Yilmaz, E. (eds.) SPIRE 2015. LNCS, vol. 9309, pp. 324\u2013336. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-23826-5_31"},{"key":"50_CR37","doi-asserted-by":"publisher","unstructured":"Sadakane, K.: Compressed suffix trees with full functionality. Theor. Comput. Syst. 41, 589\u2013607 (2007). https:\/\/doi.org\/10.1007\/s00224-006-1198-x","DOI":"10.1007\/s00224-006-1198-x"},{"key":"50_CR38","doi-asserted-by":"publisher","DOI":"10.1145\/1451940.1451972","author":"G Sautter","year":"2008","unstructured":"Sautter, G., Abba, C., B\u00f6hm, K.: Improved count suffix trees for natural language data. IDEAS. ACM (2008). https:\/\/doi.org\/10.1145\/1451940.1451972","journal-title":"IDEAS. ACM"},{"key":"50_CR39","doi-asserted-by":"publisher","unstructured":"Sigurd, B., Eeg-Olofsson, M., van Weijer, J.: Word length, sentence length and frequency - zipf revisited. Studia Linguistica 58, 37\u201352 (2004). https:\/\/doi.org\/10.1111\/j.0039-3193.2004.00109.x","DOI":"10.1111\/j.0039-3193.2004.00109.x"},{"key":"50_CR40","doi-asserted-by":"crossref","unstructured":"Sun, J., Li, G.: An end-to-end learning-based cost estimator (2019)","DOI":"10.14778\/3368289.3368296"},{"key":"50_CR41","doi-asserted-by":"publisher","unstructured":"Vitale, L., Mart\u00edn, \u00c1., Seroussi, G.: Space-efficient representation of truncated suffix trees, with applications to markov order estimation. Theor. Comput. Sci. 595, 34\u201345 (2015). https:\/\/doi.org\/10.1016\/j.tcs.2015.06.013","DOI":"10.1016\/j.tcs.2015.06.013"},{"key":"50_CR42","doi-asserted-by":"publisher","unstructured":"Welch, T.: A technique for high-performance data compression. Computer 8\u201319 (1984). https:\/\/doi.org\/10.1109\/mc.1984.1659158","DOI":"10.1109\/mc.1984.1659158"},{"key":"50_CR43","doi-asserted-by":"publisher","unstructured":"Wu, W., Chi, Y., Zhu, S., Tatemura, J., Hacig\u00fcm\u00fcs, H., Naughton, J.: Predicting query execution time: Are optimizer cost models really unusable? In: ICDE. IEEE (2013). https:\/\/doi.org\/10.1109\/icde.2013.6544899","DOI":"10.1109\/icde.2013.6544899"},{"key":"50_CR44","doi-asserted-by":"publisher","unstructured":"Ziv, J., Lempel, A.: A universal algorithm for sequential data compression. IEEE Trans. Inf. Theor. 23, 337\u2013343 (1977). https:\/\/doi.org\/10.1109\/tit.1977.1055714","DOI":"10.1109\/tit.1977.1055714"}],"container-title":["Lecture Notes in Computer Science","Database Systems for Advanced Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-73197-7_50","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,8,7]],"date-time":"2021-08-07T15:21:38Z","timestamp":1628349698000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-73197-7_50"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030731960","9783030731977"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-73197-7_50","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"6 April 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DASFAA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Database Systems for Advanced Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Taipei","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Taiwan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 April 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 April 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dasfaa2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/dm.iis.sinica.edu.tw\/DASFAA2021\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"490","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"98","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"33","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"20% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to the Corona pandemic this event was held virtually.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}