{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T16:47:12Z","timestamp":1762015632124,"version":"3.37.3"},"reference-count":50,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2018,12,11]],"date-time":"2018-12-11T00:00:00Z","timestamp":1544486400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,12,11]],"date-time":"2018-12-11T00:00:00Z","timestamp":1544486400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100000923","name":"Australian Research Council","doi-asserted-by":"publisher","award":["DE140100999","DP160101513"],"award-info":[{"award-number":["DE140100999","DP160101513"]}],"id":[{"id":"10.13039\/501100000923","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["NSFC61232006"],"award-info":[{"award-number":["NSFC61232006"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000923","name":"Australian Research Council","doi-asserted-by":"publisher","award":["DP150102728","DP140103578"],"award-info":[{"award-number":["DP150102728","DP140103578"]}],"id":[{"id":"10.13039\/501100000923","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000923","name":"Australian Research Council","doi-asserted-by":"publisher","award":["DP170101628","DE140100679"],"award-info":[{"award-number":["DP170101628","DE140100679"]}],"id":[{"id":"10.13039\/501100000923","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000923","name":"Australian Research Council","doi-asserted-by":"publisher","award":["DP170103710","DE150100563"],"award-info":[{"award-number":["DP170103710","DE150100563"]}],"id":[{"id":"10.13039\/501100000923","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000923","name":"Australian Research Council","doi-asserted-by":"publisher","award":["DP160101513"],"award-info":[{"award-number":["DP160101513"]}],"id":[{"id":"10.13039\/501100000923","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["The VLDB Journal"],"published-print":{"date-parts":[[2019,4]]},"DOI":"10.1007\/s00778-018-0529-2","type":"journal-article","created":{"date-parts":[[2018,12,11]],"date-time":"2018-12-11T03:48:36Z","timestamp":1544500116000},"page":"267-292","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":14,"title":["Leveraging set relations in exact and dynamic set similarity join"],"prefix":"10.1007","volume":"28","author":[{"given":"Xubo","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6068-5062","authenticated-orcid":false,"given":"Lu","family":"Qin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuemin","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ying","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lijun","family":"Chang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,12,11]]},"reference":[{"key":"529_CR1","unstructured":"http:\/\/liu.cs.uic.edu\/download\/data\/"},{"key":"529_CR2","unstructured":"http:\/\/www.cim.mcgill.ca\/~dudek\/206\/Logs\/AOL-user-ct-collection"},{"key":"529_CR3","unstructured":"http:\/\/www.informatik.uni-freiburg.de\/~cziegler\/BX\/"},{"key":"529_CR4","unstructured":"http:\/\/www.citeulike.org\/faq\/data.adp"},{"key":"529_CR5","unstructured":"http:\/\/dai-labor.de\/IRML\/datasets"},{"key":"529_CR6","unstructured":"http:\/\/www.discogs.com\/"},{"key":"529_CR7","unstructured":"http:\/\/www.cs.cmu.edu\/~enron"},{"key":"529_CR8","unstructured":"http:\/\/fimi.ua.ac.be\/data\/"},{"key":"529_CR9","unstructured":"http:\/\/www.dtic.upf.edu\/~ocelma\/MusicRecommendationDataset\/"},{"key":"529_CR10","unstructured":"http:\/\/konect.uni-koblenz.de\/networks\/lkml_person-thread"},{"key":"529_CR11","unstructured":"http:\/\/socialnetworks.mpi-sws.org\/data-imc2007.html"},{"key":"529_CR12","unstructured":"http:\/\/trec.nist.gov\/data\/reuters\/reuters.html"},{"key":"529_CR13","unstructured":"http:\/\/dbis-twitterdata.uibk.ac.at\/spotifyDataset\/"},{"key":"529_CR14","unstructured":"http:\/\/www.clearbits.net\/torrents\/\n                    \n                  1881-dec-2011"},{"key":"529_CR15","unstructured":"http:\/\/vi.sualize.us\/"},{"key":"529_CR16","unstructured":"http:\/\/wiki.dbpedia.org\/Downloads"},{"key":"529_CR17","unstructured":"http:\/\/dumps.wikimedia.org\/"},{"key":"529_CR18","unstructured":"http:\/\/konect.uni-koblenz.de\/networks\/"},{"key":"529_CR19","unstructured":"http:\/\/ssjoin.dbresearch.uni-salzburg.at\/datasets.html#ZIP"},{"key":"529_CR20","doi-asserted-by":"crossref","unstructured":"Anastasiu, D.C., Karypis,G.: L2AP: fast cosine similarity search with prefix L-2 norm bounds. In: Proceedings of ICDE\u201914 (2014)","DOI":"10.1109\/ICDE.2014.6816700"},{"key":"529_CR21","unstructured":"Arvind, A., Venkatesh, G., Raghav, K.: Efficient exact set-similarity joins. In: Proceedings of VLDB\u201906 (2006)"},{"key":"529_CR22","doi-asserted-by":"crossref","unstructured":"Bayardo, R.J., Ma, Y., M., Srikant, R.: Scaling up all pairs similarity search. In: Proceedings of WWW\u201907 (2007)","DOI":"10.1145\/1242572.1242591"},{"issue":"1","key":"529_CR23","first-page":"1","volume":"6","author":"P Bouros","year":"2012","unstructured":"Bouros, P., Ge, S., Mamoulis, N.: Spatio-textual similarity joins. PVLDB 6(1), 1\u201312 (2012)","journal-title":"Spatio-textual similarity joins. PVLDB"},{"key":"529_CR24","doi-asserted-by":"crossref","unstructured":"Broder, A.Z., Charikar, M., Frieze, A.M., Mitzenmacher, M.: Min-wise independent permutations (extended abstract). In: Proceeding of STOC\u201998 (1998)","DOI":"10.1145\/276698.276781"},{"key":"529_CR25","doi-asserted-by":"crossref","unstructured":"Chakrabarti, A., Parthasarathy, S.: Sequential hypothesis tests for adaptive locality sensitive hashing. In: Proceedings of WWW\u201915 (2015)","DOI":"10.1145\/2736277.2741665"},{"key":"529_CR26","doi-asserted-by":"crossref","unstructured":"Chakrabarti, K., Chaudhuri, S., Ganti, V., Xin, D.: An efficient filter for approximate membership checking. In: Proceedings of SIGMOD\u201908 (2008)","DOI":"10.1145\/1376616.1376697"},{"key":"529_CR27","doi-asserted-by":"crossref","unstructured":"Chaudhuri, S., Ganti, V., Kaushik, R.: A primitive operator for similarity joins in data cleaning. In: Proceedings of ICDE\u201906 (2006)","DOI":"10.1109\/ICDE.2006.9"},{"key":"529_CR28","doi-asserted-by":"crossref","unstructured":"Cohen, W.W.: Integration of heterogeneous databases without common domains using queries based on textual similarity. In: Proceedings of SIGMOD\u201998 (1998)","DOI":"10.1145\/276304.276323"},{"key":"529_CR29","doi-asserted-by":"crossref","unstructured":"Das, A., Datar, M., Garg, A., Rajaram, S.: Google news personalization: scalable online collaborative filtering. In: Proceedings of WWW\u201907 (2007)","DOI":"10.1145\/1242572.1242610"},{"issue":"4","key":"529_CR30","first-page":"360","volume":"9","author":"D Deng","year":"2015","unstructured":"Deng, D., Li, G., Wen, H., Feng, J.: An efficient partition based method for exact set similarity joins. PVLDB 9(4), 360\u2013371 (2015)","journal-title":"PVLDB"},{"key":"529_CR31","unstructured":"Gionis, A., Indyk, P., Motwani, R.: Similarity search in high dimensions via hashing. In: Proceedings of VLDB\u201999 (1999)"},{"key":"529_CR32","doi-asserted-by":"crossref","unstructured":"Indyk, P.,Motwani, R.: Approximate nearest neighbors: towards removing the curse of dimensionality. In: Proceedings of STOC\u201998 (1998)","DOI":"10.1145\/276698.276876"},{"key":"529_CR33","unstructured":"Mann, W., Augsten, N.: PEL: position-enhanced length filter for set similarity joins. In: Proceedings of GVD\u201914 (2014)"},{"issue":"9","key":"529_CR34","first-page":"636","volume":"9","author":"W Mann","year":"2016","unstructured":"Mann, W., Augsten, N., Bouros, P.: An empirical evaluation of set similarity join techniques. PVLDB 9(9), 636\u2013647 (2016)","journal-title":"PVLDB"},{"issue":"8","key":"529_CR35","first-page":"704","volume":"5","author":"A Metwally","year":"2012","unstructured":"Metwally, A., Faloutsos, C.: V-smart-join: a scalable mapreduce framework for all-pair similarity joins of multisets and vectors. PVLDB 5(8), 704\u2013715 (2012)","journal-title":"PVLDB"},{"issue":"1","key":"529_CR36","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1016\/j.is.2010.07.003","volume":"36","author":"LA Ribeiro","year":"2011","unstructured":"Ribeiro, L.A., H\u00e4rder, T.: Generalizing prefix filtering to improve set similarity joins. Inf. Syst. 36(1), 62\u201378 (2011)","journal-title":"Inf. Syst."},{"issue":"12","key":"529_CR37","first-page":"1059","volume":"7","author":"AD Sarma","year":"2014","unstructured":"Sarma, A.D., He, Y., Chaudhuri, S.: Clusterjoin: a similarity joins framework using map-reduce. PVLDB 7(12), 1059\u20131070 (2014)","journal-title":"PVLDB"},{"issue":"5","key":"529_CR38","first-page":"430","volume":"5","author":"V Satuluri","year":"2012","unstructured":"Satuluri, V., Parthasarathy, S.: Bayesian locality sensitive hashing for fast similarity search. PVLDB 5(5), 430\u2013441 (2012)","journal-title":"PVLDB"},{"key":"529_CR39","unstructured":"Schelter, S., Kunegis, J.: Tracking the trackers: a large-scale analysis of embedded web trackers. In: Tenth International AAAI Conference on Web and Social Media (2016)"},{"key":"529_CR40","doi-asserted-by":"crossref","unstructured":"Spertus, E., Sahami, M., Buyukkokten, O.: Evaluating similarity measures: a large-scale study in the orkut social network. In: Proceedings of SIGKDD\u201905 (2005)","DOI":"10.1145\/1081870.1081956"},{"key":"529_CR41","doi-asserted-by":"crossref","unstructured":"Theobald, M., Siddharth, J., Paepcke, A.: Spotsigs: robust and efficient near duplicate detection in large web collections. In: Proceedings of SIGIR\u201908 (2008)","DOI":"10.1145\/1390334.1390431"},{"key":"529_CR42","doi-asserted-by":"crossref","unstructured":"Vernica, R., Carey, M.J., Li, C.: Efficient parallel set-similarity joins using mapreduce. In: Proceedings of SIGMOD\u201910 (2010)","DOI":"10.1145\/1807167.1807222"},{"issue":"11","key":"529_CR43","first-page":"1483","volume":"5","author":"J Wang","year":"2012","unstructured":"Wang, J., Kraska, T., Franklin, M.J., Feng, J.: Crowder: Crowdsourcing entity resolution. PVLDB 5(11), 1483\u20131494 (2012)","journal-title":"PVLDB"},{"key":"529_CR44","doi-asserted-by":"crossref","unstructured":"Wang, J., Li, G., Feng, J.: Can we beat the prefix filtering?: an adaptive framework for similarity join and search. In: Proceedings of SIGMOD\u201912 (2012)","DOI":"10.1145\/2213836.2213847"},{"issue":"9","key":"529_CR45","first-page":"925","volume":"10","author":"X Wang","year":"2017","unstructured":"Wang, X., Qin, L., Lin, X., Zhang, Y., Chang, L.: Leveraging set relations in exact set similarity join. PVLDB 10(9), 925\u2013936 (2017)","journal-title":"PVLDB"},{"key":"529_CR46","doi-asserted-by":"crossref","unstructured":"Xiao, C., Wang, W., Lin, X., Shang, H.: Top-k set similarity joins. In: Proceedings of ICDE\u201909 (2009)","DOI":"10.1109\/ICDE.2009.111"},{"key":"529_CR47","doi-asserted-by":"crossref","unstructured":"Xiao, C., Wang, W., Lin, X., Yu, J.X..: Efficient similarity joins for near duplicate detection. In: Proceedings of WWW\u201908 (2008)","DOI":"10.1145\/1367497.1367516"},{"issue":"3","key":"529_CR48","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1145\/2000824.2000825","volume":"36","author":"C Xiao","year":"2011","unstructured":"Xiao, C., Wang, W., Lin, X., Yu, J.X., Wang, G.: Efficient similarity joins for near-duplicate detection. ACM Trans. Database Syst. 36(3), 15 (2011)","journal-title":"ACM Trans. Database Syst."},{"key":"529_CR49","doi-asserted-by":"crossref","unstructured":"Zhai, J., Lou, Y., Gehrke, J.: ATLAS: a probabilistic algorithm for high dimensional similarity search. In: Proceedings of SIGMOD\u201911 (2011)","DOI":"10.1145\/1989323.1989428"},{"key":"529_CR50","volume-title":"Introduction to Semi-supervised Learning. Synthesis Lectures on Artificial Intelligence and Machine Learning","author":"X Zhu","year":"2009","unstructured":"Zhu, X., Goldberg, A.B.: Introduction to Semi-supervised Learning. Synthesis Lectures on Artificial Intelligence and Machine Learning. Morgan & Claypool Publishers, San Rafael (2009)"}],"container-title":["The VLDB Journal"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-018-0529-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00778-018-0529-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-018-0529-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,5,16]],"date-time":"2020-05-16T15:36:56Z","timestamp":1589643416000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00778-018-0529-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12,11]]},"references-count":50,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2019,4]]}},"alternative-id":["529"],"URL":"https:\/\/doi.org\/10.1007\/s00778-018-0529-2","relation":{},"ISSN":["1066-8888","0949-877X"],"issn-type":[{"type":"print","value":"1066-8888"},{"type":"electronic","value":"0949-877X"}],"subject":[],"published":{"date-parts":[[2018,12,11]]},"assertion":[{"value":"9 November 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 August 2018","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 November 2018","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 December 2018","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}