{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:45:43Z","timestamp":1742913943643,"version":"3.40.3"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030602895"},{"type":"electronic","value":"9783030602901"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-60290-1_33","type":"book-chapter","created":{"date-parts":[[2020,10,13]],"date-time":"2020-10-13T21:02:30Z","timestamp":1602622950000},"page":"425-440","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Unified Framework for Processing Exact and Approximate Top-k Set Similarity Join"],"prefix":"10.1007","author":[{"given":"Cihai","family":"Sun","sequence":"first","affiliation":[]},{"given":"Hongya","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Yingyuan","family":"Xiao","sequence":"additional","affiliation":[]},{"given":"Zhenyu","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,10,14]]},"reference":[{"key":"33_CR1","unstructured":"Inverted index. Wikipedia. https:\/\/en.wikipedia.org\/wiki\/Inverted_index"},{"key":"33_CR2","unstructured":"Zipf\u2019s law. Wikipedia. https:\/\/en.wikipedia.org\/wiki\/Zipf%27s_law"},{"issue":"3","key":"33_CR3","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1016\/j.datak.2004.08.003","volume":"53","author":"F Angiulli","year":"2005","unstructured":"Angiulli, F., Pizzuti, C.: An approximate algorithm for top-k closest pairs join query in large high dimensional data. Data Knowl. Eng. 53(3), 263\u2013281 (2005)","journal-title":"Data Knowl. Eng."},{"key":"33_CR4","doi-asserted-by":"crossref","unstructured":"Bayardo, R.J., Ma, Y., Srikant, R.: Scaling up all pairs similarity search. In: Proceedings of the 16th International Conference on World Wide Web, pp. 131\u2013140 (2007)","DOI":"10.1145\/1242572.1242591"},{"key":"33_CR5","doi-asserted-by":"crossref","unstructured":"Cohen, W.W.: Integration of heterogeneous databases without common domains using queries based on textual similarity. In: Proceedings of the 1998 ACM SIGMOD International Conference on Management of Data, pp. 201\u2013212 (1998)","DOI":"10.1145\/276305.276323"},{"key":"33_CR6","doi-asserted-by":"crossref","unstructured":"Das, A.S., Datar, M., Garg, A., Rajaram, S.: Google news personalization: scalable online collaborative filtering. In: Proceedings of the 16th International Conference on World Wide Web, pp. 271\u2013280 (2007)","DOI":"10.1145\/1242572.1242610"},{"key":"33_CR7","doi-asserted-by":"crossref","unstructured":"Henzinger, M.: Finding near-duplicate web pages: a large-scale evaluation of algorithms. In: Proceedings of the 29th Annual International ACM SIGIR conference on Research and Development in Information Retrieval, pp. 284\u2013291 (2006)","DOI":"10.1145\/1148170.1148222"},{"issue":"1","key":"33_CR8","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1023\/A:1009761603038","volume":"2","author":"MA Hern\u00e1ndez","year":"1998","unstructured":"Hern\u00e1ndez, M.A., Stolfo, S.J.: Real-world data is dirty: data cleansing and the merge\/purge problem. Data Mining Knowl. Disc. 2(1), 9\u201337 (1998)","journal-title":"Data Mining Knowl. Disc."},{"key":"33_CR9","doi-asserted-by":"crossref","unstructured":"Kim, Y., Shim, K.: Parallel top-k similarity join algorithms using mapreduce. In: 2012 IEEE 28th International Conference on Data Engineering, pp. 510\u2013521 (2012)","DOI":"10.1109\/ICDE.2012.87"},{"key":"33_CR10","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1016\/j.is.2013.10.006","volume":"45","author":"Y Malkov","year":"2014","unstructured":"Malkov, Y., Ponomarenko, A., Logvinov, A., Krylov, V.: Approximate nearest neighbor algorithm based on navigable small world graphs. Inf. Syst. 45, 61\u201368 (2014)","journal-title":"Inf. Syst."},{"issue":"9","key":"33_CR11","doi-asserted-by":"publisher","first-page":"636","DOI":"10.14778\/2947618.2947620","volume":"9","author":"W Mann","year":"2016","unstructured":"Mann, W., Augsten, N., Bouros, P.: An empirical evaluation of set similarity join techniques. Proc. VLDB Endowment 9(9), 636\u2013647 (2016)","journal-title":"Proc. VLDB Endowment"},{"key":"33_CR12","unstructured":"Mann, W., Augsten, N., Jensen, C.S.: Swoop: Top-k similarity joins over set streams. arXiv preprint arXiv:1711.02476 (2017)"},{"issue":"4","key":"33_CR13","doi-asserted-by":"publisher","first-page":"e5372","DOI":"10.1371\/journal.pone.0005372","volume":"4","author":"M\u00c1 Serrano","year":"2009","unstructured":"Serrano, M.\u00c1., Flammini, A., Menczer, F.: Modeling statistical properties of written text. PLoS ONE 4(4), e5372 (2009)","journal-title":"PLoS ONE"},{"key":"33_CR14","doi-asserted-by":"crossref","unstructured":"Spertus, E., Sahami, M., Buyukkokten, O.: Evaluating similarity measures: a large-scale study in the orkut social network. In: Proceedings of the Eleventh ACM SIGKDD International Conference on Knowledge Discovery in Data Mining, pp. 678\u2013684 (2005)","DOI":"10.1145\/1081870.1081956"},{"key":"33_CR15","first-page":"379","volume":"2018","author":"I SriUsha","year":"2018","unstructured":"SriUsha, I., Choudary, K.R., Sasikala, T., et al.: Data mining techniques used in the recommendation of e-commerce services. ICECA 2018, 379\u2013382 (2018)","journal-title":"ICECA"},{"key":"33_CR16","doi-asserted-by":"crossref","unstructured":"Theobald, M., Weikum, G., Schenkel, R.: Top-k query evaluation with probabilistic guarantees. In: Proceedings of the Thirtieth International Conference on Very Large Data Bases, vol. 30, pp. 648\u2013659 (2004)","DOI":"10.1016\/B978-012088469-8.50058-9"},{"key":"33_CR17","doi-asserted-by":"crossref","unstructured":"Wang, J., Li, G., Feng, J.: Can we beat the prefix filtering? an adaptive framework for similarity join and search. In: Proceedings of the 2012 ACM SIGMOD International Conference on Management of Data, pp. 85\u201396 (2012)","DOI":"10.1145\/2213836.2213847"},{"key":"33_CR18","doi-asserted-by":"crossref","unstructured":"Wang, X., Qin, L., Lin, X., Zhang, Y., Chang, L.: Leveraging set relations in exact set similarity join. In: Proceedings of the VLDB Endowment (2017)","DOI":"10.14778\/3099622.3099624"},{"key":"33_CR19","doi-asserted-by":"crossref","unstructured":"Xiao, C., Wang, W., Lin, X., Shang, H.: Top-k set similarity joins. In: 2009 IEEE 25th International Conference on Data Engineering, pp. 916\u2013927 (2009)","DOI":"10.1109\/ICDE.2009.111"},{"issue":"3","key":"33_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2000824.2000825","volume":"36","author":"C Xiao","year":"2011","unstructured":"Xiao, C., Wang, W., Lin, X., Yu, J.X., Wang, G.: Efficient similarity joins for near-duplicate detection. ACM Trans. Database Syst. (TODS) 36(3), 1\u201341 (2011)","journal-title":"ACM Trans. Database Syst. (TODS)"}],"container-title":["Lecture Notes in Computer Science","Web and Big Data"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-60290-1_33","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,24]],"date-time":"2021-04-24T12:27:03Z","timestamp":1619267223000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-60290-1_33"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030602895","9783030602901"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-60290-1_33","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"14 October 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"APWeb-WAIM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asia-Pacific Web (APWeb) and Web-Age Information Management (WAIM) Joint International Conference on Web and Big Data","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tianjin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"apwebwaim2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.tjudb.cn\/apwebwaim2020\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"259","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"68","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"37","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"26% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.6","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to the COVID-19 pandemic the conference was organized as a fully online conference.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}