{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T07:29:35Z","timestamp":1769066975741,"version":"3.49.0"},"reference-count":23,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2023,8,21]],"date-time":"2023-08-21T00:00:00Z","timestamp":1692576000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,8,21]],"date-time":"2023-08-21T00:00:00Z","timestamp":1692576000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-1844234"],"award-info":[{"award-number":["CCF-1844234"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["The VLDB Journal"],"published-print":{"date-parts":[[2024,3]]},"DOI":"10.1007\/s00778-023-00806-z","type":"journal-article","created":{"date-parts":[[2023,8,21]],"date-time":"2023-08-21T15:02:05Z","timestamp":1692630125000},"page":"281-299","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["MinJoin++: a fast algorithm for string similarity joins under edit distance"],"prefix":"10.1007","volume":"33","author":[{"given":"Nikolai","family":"Karpov","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haoyu","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6851-3115","authenticated-orcid":false,"given":"Qin","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,8,21]]},"reference":[{"key":"806_CR1","unstructured":"Arasu, A., Ganti, V., Kaushik, R.: Efficient exact set-similarity joins. In: VLDB, pp.\u00a0918\u2013929 (2006)"},{"key":"806_CR2","doi-asserted-by":"crossref","unstructured":"Bayardo, R.J., Ma, Y., Srikant, R.: Scaling up all pairs similarity search. In: WWW, pp.\u00a0131\u2013140 (2007)","DOI":"10.1145\/1242572.1242591"},{"key":"806_CR3","unstructured":"Bocek, T., Hunt, E., Stiller, B., Hecht, F.: Fast similarity search in large dictionaries. University (2007)"},{"key":"806_CR4","doi-asserted-by":"crossref","unstructured":"Ciaccia, P., Patella, M., Zezula, P.: M-tree: an efficient access method for similarity search in metric spaces. In: VLDB, pp.\u00a0426\u2013435 (1997)","DOI":"10.1145\/275487.275495"},{"key":"806_CR5","unstructured":"Dai, X., Yan, X., Zhou, K., Wang, Y., Yang, H., Cheng, J., Sigir. J., Huang, X., Chang, Y., Cheng, X., Kamps, J., Murdock, V., Wen, J., Liu, Y. (eds.) ACM, pp.\u00a0599\u2013608"},{"key":"806_CR6","unstructured":"Gravano, L., Ipeirotis, P.G., Jagadish, H.V., Koudas, N., Muthukrishnan, S., Srivastava, D.: Approximate string joins in a database (almost) for free. In: VLDB, pp.\u00a0491\u2013500 (2001)"},{"issue":"8","key":"806_CR7","first-page":"625","volume":"7","author":"Y Jiang","year":"2014","unstructured":"Jiang, Y., Li, G., Feng, J., Li, W.: String similarity joins: an experimental evaluation. PVLDB 7(8), 625\u2013636 (2014)","journal-title":"PVLDB"},{"key":"806_CR8","doi-asserted-by":"crossref","unstructured":"Li, C., Lu, J., Lu, Y.: Efficient merging and filtering algorithms for approximate string searches. In: ICDE, pp.\u00a0257\u2013266 (2008)","DOI":"10.1109\/ICDE.2008.4497434"},{"issue":"3","key":"806_CR9","first-page":"253","volume":"5","author":"G Li","year":"2011","unstructured":"Li, G., Deng, D., Wang, J., Feng, J.: PASS-JOIN: a partition-based method for similarity joins. PVLDB 5(3), 253\u2013264 (2011)","journal-title":"PVLDB"},{"key":"806_CR10","doi-asserted-by":"crossref","unstructured":"Myers, G.: Efficient local alignment discovery amongst noisy long reads. In: Brown, D.G., Morgenstern, B. (eds.), Algorithms in Bioinformatics\u201414th International Workshop, WABI 2014, Wroclaw, Poland, September 8\u201310, 2014. Proceedings, vol.\u00a08701 of Lecture Notes in Computer Science, pp.\u00a052\u201367. Springer (2014)","DOI":"10.1007\/978-3-662-44753-6_5"},{"key":"806_CR11","doi-asserted-by":"crossref","unstructured":"Qin, J., Wang, W., Lu, Y., Xiao, C., Lin, X.: Efficient exact edit similarity query processing with the asymmetric signature scheme. In: SIGMOD, pp.\u00a01033\u20131044 (2011)","DOI":"10.1145\/1989323.1989431"},{"issue":"6","key":"806_CR12","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1186\/gb-2013-14-6-405","volume":"14","author":"RJ Roberts","year":"2013","unstructured":"Roberts, R.J., Carneiro, M.O., Schatz, M.C.: The advantages of SMRT sequencing. Genome Biol. 14(6), 405 (2013)","journal-title":"Genome Biol."},{"issue":"19","key":"806_CR13","doi-asserted-by":"publisher","first-page":"4838","DOI":"10.1093\/bioinformatics\/btaa252","volume":"36","author":"Y Song","year":"2020","unstructured":"Song, Y., Tang, H., Zhang, H., Zhang, Q.: Overlap detection on long, error-prone sequencing reads via smooth q-gram. Bioinformatics 36(19), 4838\u20134845 (2020)","journal-title":"Bioinformatics"},{"key":"806_CR14","doi-asserted-by":"crossref","unstructured":"Su, Z., Ahn, B.-R., Eom, K.-Y., Kang, M.-K., Kim, J.-P., Kim, M.-K.: Plagiarism detection using the levenshtein distance and smith-waterman algorithm. In: 2008 3rd International Conference on Innovative Computing Information and Control, pp.\u00a0569\u2013569 (2008)","DOI":"10.1109\/ICICIC.2008.422"},{"issue":"1\u20133","key":"806_CR15","doi-asserted-by":"publisher","first-page":"100","DOI":"10.1016\/S0019-9958(85)80046-2","volume":"64","author":"E Ukkonen","year":"1985","unstructured":"Ukkonen, E.: Algorithms for approximate string matching. Inf. Control 64(1\u20133), 100\u2013118 (1985)","journal-title":"Inf. Control"},{"issue":"1","key":"806_CR16","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1145\/2627692.2627706","volume":"43","author":"S Wandelt","year":"2014","unstructured":"Wandelt, S., Deng, D., Gerdjikov, S., Mishra, S., Mitankin, P., Patil, M., Siragusa, E., Tiskin, A., Wang, W., Wang, J., Leser, U.: State-of-the-art in string similarity search and join. SIGMOD Record 43(1), 64\u201376 (2014)","journal-title":"SIGMOD Record"},{"issue":"1","key":"806_CR17","first-page":"1219","volume":"3","author":"J Wang","year":"2010","unstructured":"Wang, J., Li, G., Feng, J.: Trie-join: efficient trie-based string similarity joins with edit-distance constraints. PVLDB 3(1), 1219\u20131230 (2010)","journal-title":"PVLDB"},{"key":"806_CR18","doi-asserted-by":"crossref","unstructured":"Wang, J., Li, G., Feng, J.: Can we beat the prefix filtering?: an adaptive framework for similarity join and search. In: SIGMOD, pp.\u00a085\u201396 (2012)","DOI":"10.1145\/2213836.2213847"},{"issue":"8","key":"806_CR19","doi-asserted-by":"publisher","first-page":"1916","DOI":"10.1109\/TKDE.2012.79","volume":"25","author":"W Wang","year":"2013","unstructured":"Wang, W., Qin, J., Xiao, C., Lin, X., Shen, H.T.: Vchunkjoin: an efficient algorithm for edit similarity joins. IEEE Trans. Knowl. Data Eng. 25(8), 1916\u20131929 (2013)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"issue":"1","key":"806_CR20","first-page":"933","volume":"1","author":"C Xiao","year":"2008","unstructured":"Xiao, C., Wang, W., Lin, X.: Ed-join: an efficient algorithm for similarity joins with edit distance constraints. PVLDB 1(1), 933\u2013944 (2008)","journal-title":"PVLDB"},{"key":"806_CR21","doi-asserted-by":"crossref","unstructured":"Zhang, H., Zhang, Q.: Embedjoin: efficient edit similarity joins via embeddings. In: KDD, pp. 585\u2013594 (2017)","DOI":"10.1145\/3097983.3098003"},{"key":"806_CR22","doi-asserted-by":"crossref","unstructured":"Zhang, H., Zhang, Q.: Minjoin: efficient edit similarity joins via local hash minima. In: KDD, pp.\u00a01093\u20131103. ACM (2019)","DOI":"10.1145\/3292500.3330853"},{"key":"806_CR23","doi-asserted-by":"crossref","unstructured":"Zini, M., Fabbri, M., Moneglia, M., Panunzi, A.: Plagiarism detection through multilevel text comparison. In: Proceedings of the Second International Conference on Automated Production of Cross Media Content for Multi-Channel Distribution, AXMEDIS 2006, Leeds, UK, December 13\u201315, 2006, pp.\u00a0181\u2013185. IEEE Computer Society (2006)","DOI":"10.1109\/AXMEDIS.2006.40"}],"container-title":["The VLDB Journal"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-023-00806-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00778-023-00806-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-023-00806-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,21]],"date-time":"2024-02-21T20:02:36Z","timestamp":1708545756000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00778-023-00806-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,21]]},"references-count":23,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2024,3]]}},"alternative-id":["806"],"URL":"https:\/\/doi.org\/10.1007\/s00778-023-00806-z","relation":{},"ISSN":["1066-8888","0949-877X"],"issn-type":[{"value":"1066-8888","type":"print"},{"value":"0949-877X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,8,21]]},"assertion":[{"value":"20 July 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 January 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 July 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 August 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}