{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T20:26:07Z","timestamp":1725999967358},"publisher-location":"Berlin, Heidelberg","reference-count":37,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783662583838"},{"type":"electronic","value":"9783662583845"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-662-58384-5_4","type":"book-chapter","created":{"date-parts":[[2018,11,21]],"date-time":"2018-11-21T10:27:33Z","timestamp":1542796053000},"page":"89-118","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["SjClust: A Framework for Incorporating Clustering into Set Similarity Join Algorithms"],"prefix":"10.1007","author":[{"given":"Leonardo Andrade","family":"Ribeiro","sequence":"first","affiliation":[]},{"given":"Alfredo","family":"Cuzzocrea","sequence":"additional","affiliation":[]},{"given":"Karen Aline Alves","family":"Bezerra","sequence":"additional","affiliation":[]},{"given":"Ben Hur Bahia","family":"do Nascimento","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,11,22]]},"reference":[{"issue":"14","key":"4_CR1","first-page":"1846","volume":"6","author":"H Altwaijry","year":"2013","unstructured":"Altwaijry, H., Kalashnikov, D.V., Mehrotra, S.: Query-driven approach to entity resolution. PVLDB 6(14), 1846\u20131857 (2013)","journal-title":"PVLDB"},{"issue":"3","key":"4_CR2","first-page":"120","volume":"9","author":"H Altwaijry","year":"2015","unstructured":"Altwaijry, H., Mehrotra, S., Kalashnikov, D.V.: Query: a framework for integrating entity resolution with query processing. PVLDB 9(3), 120\u2013131 (2015)","journal-title":"PVLDB"},{"key":"4_CR3","doi-asserted-by":"crossref","unstructured":"Andritsos, P., Fuxman, A., Miller, R.J.: Clean answers over dirty databases: a probabilistic approach. In: Proceedings of the ICDE Conference, p. 30 (2006)","DOI":"10.1109\/ICDE.2006.35"},{"key":"4_CR4","unstructured":"Baeza-Yates, R.A., Ribeiro-Neto, B.A.: Modern Information Retrieval - The Concepts and Technology Behind Search, 2 edn. Pearson Education Limited, Harlow, England (2011)"},{"key":"4_CR5","doi-asserted-by":"crossref","unstructured":"Bayardo, R.J., Ma, Y., Srikant, R.: Scaling up all pairs similarity search. In: Proceedings of the WWW Conference, pp. 131\u2013140 (2007)","DOI":"10.1145\/1242572.1242591"},{"issue":"1","key":"4_CR6","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1007\/s00778-008-0098-x","volume":"18","author":"O Benjelloun","year":"2009","unstructured":"Benjelloun, O., Garcia-Molina, H., Menestrina, D., Su, Q., Whang, S.E., Widom, J.: Swoosh: a generic approach to entity resolution. The VLDB J. 18(1), 255\u2013276 (2009)","journal-title":"The VLDB J."},{"issue":"1","key":"4_CR7","first-page":"598","volume":"2","author":"G Beskales","year":"2009","unstructured":"Beskales, G., Soliman, M.A., Ilyas, I.F., Ben-David, S.: Modeling and querying possible repairs in duplicate detection. PVLDB 2(1), 598\u2013609 (2009)","journal-title":"PVLDB"},{"key":"4_CR8","unstructured":"Cannataro, M., Cuzzocrea, A., Mastroianni, C., Ortale, R., Pugliese, A.: Modeling adaptive hypermedia with an object-oriented approach and XML. In: WebDyn 2002 (2002)"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"Chaudhuri, S., Ganjam, K., Ganti, V., Motwani, R.: Robust and efficient fuzzy match for online data cleaning. In: Proceedings of the SIGMOD Conference, pp. 313\u2013324 (2003)","DOI":"10.1145\/872757.872796"},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Chaudhuri, S., Ganti, V., Kaushik, R.: A primitive operator for similarity joins in data cleaning. In: Proceedings of the 22nd International Conference on Data Engineering, p. 5 (2006)","DOI":"10.1109\/ICDE.2006.9"},{"key":"4_CR11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-31164-2","volume-title":"Data Matching - Concepts and Techniques for Record Linkage, Entity Resolution, and Duplicate Detection","author":"P Christen","year":"2012","unstructured":"Christen, P.: Data Matching - Concepts and Techniques for Record Linkage, Entity Resolution, and Duplicate Detection. Springer, Heidelberg (2012). \nhttps:\/\/doi.org\/10.1007\/978-3-642-31164-2"},{"key":"4_CR12","unstructured":"Cohen, W.W., Ravikumar, P.D., Fienberg, S.E.: A comparison of string distance metrics for name-matching tasks. In: Proceedings of IJCAI 2003 Workshop on Information Integration on the Web, pp. 73\u201378 (2003)"},{"key":"4_CR13","volume-title":"Principles of Data Integration","author":"AH Doan","year":"2012","unstructured":"Doan, A.H., Halevy, A.Y., Ives, Z.G.: Principles of Data Integration. Morgan Kaufmann, Waltham (2012)"},{"issue":"1","key":"4_CR14","first-page":"1","volume":"19","author":"AK Elmagarmid","year":"2007","unstructured":"Elmagarmid, A.K., Ipeirotis, P.G., Verykios, V.S.: Duplicate record detection: a survey. TKDE 19(1), 1\u201316 (2007)","journal-title":"TKDE"},{"issue":"1","key":"4_CR15","first-page":"1282","volume":"2","author":"O Hassanzadeh","year":"2009","unstructured":"Hassanzadeh, O., Chiang, F., Miller, R.J., Lee, H.C.: Framework for evaluating clustering algorithms in duplicate detection. PVLDB 2(1), 1282\u20131293 (2009)","journal-title":"PVLDB"},{"issue":"5","key":"4_CR16","doi-asserted-by":"publisher","first-page":"1141","DOI":"10.1007\/s00778-009-0161-2","volume":"18","author":"O Hassanzadeh","year":"2009","unstructured":"Hassanzadeh, O., Miller, R.J.: Creating probabilistic databases from duplicated data. VLDB J. 18(5), 1141\u20131166 (2009)","journal-title":"VLDB J."},{"issue":"2","key":"4_CR17","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1145\/568271.223807","volume":"24","author":"Mauricio A. Hern\u00e1ndez","year":"1995","unstructured":"Hern\u00e1ndez, M.A., Stolfo, S.J.: The merge\/purge problem for large databases. In: Proceedings of the SIGMOD Conference, pp. 127\u2013138 (1995)","journal-title":"ACM SIGMOD Record"},{"key":"4_CR18","doi-asserted-by":"crossref","unstructured":"Idreos, S., Papaemmanouil, O., Chaudhuri, S.: Overview of data exploration techniques. In: Proceedings of the SIGMOD Conference, pp. 277\u2013281 (2015)","DOI":"10.1145\/2723372.2731084"},{"key":"4_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1007\/978-3-642-20152-3_3","volume-title":"Database Systems for Advanced Applications","author":"M Kazimianec","year":"2011","unstructured":"Kazimianec, M., Augsten, N.: PG-Skip: proximity graph based clustering of long strings. In: Yu, J.X., Kim, M.H., Unland, R. (eds.) DASFAA 2011. LNCS, vol. 6588, pp. 31\u201346. Springer, Heidelberg (2011). \nhttps:\/\/doi.org\/10.1007\/978-3-642-20152-3_3"},{"issue":"1","key":"4_CR20","first-page":"484","volume":"3","author":"H K\u00f6pcke","year":"2010","unstructured":"K\u00f6pcke, H., Thor, A., Rahm, E.: Evaluation of entity resolution approaches on real-world match problems. PVLDB 3(1), 484\u2013493 (2010)","journal-title":"PVLDB"},{"key":"4_CR21","doi-asserted-by":"crossref","unstructured":"Koudas, N., Sarawagi, S., Srivastava, D.: Record linkage: similarity measures and algorithms. In: Proceedings of the SIGMOD Conference, pp. 802\u2013803 (2006)","DOI":"10.1145\/1142473.1142599"},{"key":"4_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"174","DOI":"10.1007\/978-3-642-37574-3_8","volume-title":"Transactions on Large-Scale Data- and Knowledge-Centered Systems VIII","author":"CK-S Leung","year":"2013","unstructured":"Leung, C.K.-S., Cuzzocrea, A., Jiang, F.: Discovering frequent patterns from uncertain data streams with time-fading and landmark models. In: Hameurlain, A., K\u00fcng, J., Wagner, R., Cuzzocrea, A., Dayal, U. (eds.) Transactions on Large-Scale Data- and Knowledge-Centered Systems VIII. LNCS, vol. 7790, pp. 174\u2013196. Springer, Heidelberg (2013). \nhttps:\/\/doi.org\/10.1007\/978-3-642-37574-3_8"},{"key":"4_CR23","doi-asserted-by":"crossref","unstructured":"Liu, H., Ashwin Kumar, T.K, Thomas, J.P.: Cleaning framework for big data - object identification and linkage. In: Proceedings of the Big Data Congress, pp. 215\u2013221 (2015)","DOI":"10.1109\/BigDataCongress.2015.38"},{"issue":"9","key":"4_CR24","first-page":"636","volume":"9","author":"W Mann","year":"2016","unstructured":"Mann, W., Augsten, N., Bouros, P.: An empirical evaluation of set similarity join techniques. PVLDB 9(9), 636\u2013647 (2016)","journal-title":"PVLDB"},{"key":"4_CR25","unstructured":"Mazeika, A., B\u00f6hlen, M.H.: Cleansing databases of misspelled proper nouns. In: Proceedings of the VLDB Workshop on Clean Databases (2006)"},{"key":"4_CR26","doi-asserted-by":"crossref","unstructured":"McCallum, A., Nigam, K., Ungar, L.H.: Efficient clustering of high-dimensional data sets with application to reference matching. In: Proceedings of the SIGKDD Conference, pp. 169\u2013178 (2000)","DOI":"10.1145\/347090.347123"},{"issue":"1","key":"4_CR27","first-page":"208","volume":"3","author":"D Menestrina","year":"2010","unstructured":"Menestrina, D., Whang, S., Garcia-Molina, H.: Evaluating entity resolution results. PVLDB 3(1), 208\u2013219 (2010)","journal-title":"PVLDB"},{"key":"4_CR28","doi-asserted-by":"crossref","unstructured":"Ribeiro, L.A., Cuzzocrea, A., Bezerra, K.A.A., do Nascimento, B.H.B.: SjClust: towards a framework for integrating similarity join algorithms and clustering. In: Proceedings of the ICEIS Conference (2016)","DOI":"10.5220\/0005868700750080"},{"key":"4_CR29","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1007\/978-3-319-44403-1_12","volume-title":"Database and Expert Systems Applications","author":"LA Ribeiro","year":"2016","unstructured":"Ribeiro, L.A., Cuzzocrea, A., Bezerra, K.A.A., do Nascimento, B.H.B.: Incorporating clustering into set similarity join algorithms: the SjClust framework. In: Hartmann, S., Ma, H. (eds.) DEXA 2016. LNCS, vol. 9827, pp. 185\u2013204. Springer, Cham (2016). \nhttps:\/\/doi.org\/10.1007\/978-3-319-44403-1_12"},{"issue":"1","key":"4_CR30","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1016\/j.is.2010.07.003","volume":"36","author":"LA Ribeiro","year":"2011","unstructured":"Ribeiro, L.A., H\u00e4rder, T.: Generalizing prefix filtering to improve set similarity joins. Inf. Syst. 36(1), 62\u201378 (2011)","journal-title":"Inf. Syst."},{"key":"4_CR31","doi-asserted-by":"crossref","unstructured":"Sarawagi, S., Kirpal, A.: Efficient set joins on similarity predicates. In: Proceedings of the SIGMOD Conference, pp. 743\u2013754 (2004)","DOI":"10.1145\/1007568.1007652"},{"key":"4_CR32","unstructured":"Schneider, N.C., Ribeiro, L.A., de Souza In\u00e1cio, A., Wagner, H.M., von Wangenheim, A.: SimDataMapper: an architectural pattern to integrate declarative similarity matching into database applications. In: Proceedings of the SBBD Conference, pp. 967\u2013972 (2015)"},{"key":"4_CR33","doi-asserted-by":"crossref","unstructured":"Sidney, C.F., Mendes, D.S., Ribeiro, L.A., H\u00e4rder, T.: Performance prediction for set similarity joins. In: Proceedings of the SAC Conference, pp. 967\u2013972 (2015)","DOI":"10.1145\/2695664.2695694"},{"key":"4_CR34","doi-asserted-by":"crossref","unstructured":"Tang, N.: Big RDF data cleaning. In: Proceedings of the ICDE Conference Workshops, pp. 77\u201379 (2015)","DOI":"10.1109\/ICDEW.2015.7129549"},{"issue":"11","key":"4_CR35","first-page":"1483","volume":"5","author":"J Wang","year":"2012","unstructured":"Wang, J., Kraska, T., Franklin, M.J., Feng, J.: CrowdER: crowdsourcing entity resolution. PVLDB 5(11), 1483\u20131494 (2012)","journal-title":"PVLDB"},{"issue":"3","key":"4_CR36","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1145\/2000824.2000825","volume":"36","author":"C Xiao","year":"2011","unstructured":"Xiao, C., Wang, W., Lin, X., Yu, J.X., Wang, G.: Efficient similarity joins for near-duplicate detection. TODS 36(3), 15 (2011)","journal-title":"TODS"},{"issue":"3","key":"4_CR37","first-page":"77","volume":"9","author":"F Zhang","year":"2013","unstructured":"Zhang, F., Xue, H.-F., Xu, D.-S., Zhang, Y.-H., You, F.: Big data cleaning algorithms in cloud computing. iJOE 9(3), 77\u201381 (2013)","journal-title":"iJOE"}],"container-title":["Lecture Notes in Computer Science","Transactions on Large-Scale Data- and Knowledge-Centered Systems XXXVIII"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-662-58384-5_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2018,11,21]],"date-time":"2018-11-21T10:28:35Z","timestamp":1542796115000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-662-58384-5_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783662583838","9783662583845"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-3-662-58384-5_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]}}}