{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T21:57:54Z","timestamp":1725832674609},"publisher-location":"Cham","reference-count":25,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319261478"},{"type":"electronic","value":"9783319261485"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-26148-5_14","type":"book-chapter","created":{"date-parts":[[2015,10,14]],"date-time":"2015-10-14T05:50:57Z","timestamp":1444801857000},"page":"237-256","source":"Crossref","is-referenced-by-count":2,"title":["$$\\partial u\\partial u$$ Multi-Tenanted Framework: Distributed Near Duplicate Detection for Big Data"],"prefix":"10.1007","author":[{"given":"Pradeeban","family":"Kathiravelu","sequence":"first","affiliation":[]},{"given":"Helena","family":"Galhardas","sequence":"additional","affiliation":[]},{"given":"Lu\u00eds","family":"Veiga","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,10,28]]},"reference":[{"issue":"4","key":"14_CR1","first-page":"3","volume":"23","author":"E Rahm","year":"2000","unstructured":"Rahm, E., Do, H.H.: Data cleaning: Problems and current approaches. IEEE Data Eng. Bull. 23(4), 3\u201313 (2000)","journal-title":"IEEE Data Eng. Bull."},{"key":"14_CR2","unstructured":"Oliveira, P., Rodrigues, F., Henriques, P., Galhardas, H.: A taxonomy of data quality problems. In: 2nd Int. Workshop on Data and Information Quality, pp. 219\u2013233 (2005)"},{"issue":"15\u201321","key":"14_CR3","first-page":"48","volume":"14","author":"J Barateiro","year":"2005","unstructured":"Barateiro, J., Galhardas, H.: A survey of data quality tools. Datenbank-Spektrum 14(15\u201321), 48 (2005)","journal-title":"Datenbank-Spektrum"},{"key":"14_CR4","doi-asserted-by":"crossref","unstructured":"Vernica, R., Carey, M.J., Li, C.: Efficient parallel set-similarity joins using mapreduce. In: Proceedings of the 2010 ACM SIGMOD International Conference on Management of Data, pp. 495\u2013506. ACM (2010)","DOI":"10.1145\/1807167.1807222"},{"key":"14_CR5","doi-asserted-by":"crossref","unstructured":"Di Sanzo, P., Rughetti, D., Ciciani, B., Quaglia, F.: Auto-tuning of cloud-based in-memory transactional data grids via machine learning. In: 2012 Second Symposium on Network Cloud Computing and Applications (NCCA), pp. 9\u201316. IEEE (2012)","DOI":"10.1109\/NCCA.2012.20"},{"key":"14_CR6","unstructured":"Johns, M.: Getting Started with Hazelcast. Packt Publishing Ltd. (2013)"},{"key":"14_CR7","unstructured":"Marchioni, F.: Infinispan data grid platform. Packt Publishing Ltd. (2012)"},{"key":"14_CR8","doi-asserted-by":"crossref","unstructured":"Samovsky, M., Kacur, T.: Cloud-based classification of text documents using the gridgain platform. In: 2012 7th IEEE International Symposium on Applied Computational Intelligence and Informatics (SACI), pp. 241\u2013245. IEEE (2012)","DOI":"10.1109\/SACI.2012.6250009"},{"key":"14_CR9","unstructured":"Seovic, A., Falco, M., Peralta, P.: Oracle Coherence 3.5. Packt Publishing Ltd. (2010)"},{"key":"14_CR10","unstructured":"Arora, P., Khandelwal, D., Marshall, J., Usha, A., Sadtler, C., et al.: Scalable, Integrated Solutions for Elastic Caching Using IBM WebSphere eXtreme Scale. IBM Redbooks (2011)"},{"key":"14_CR11","unstructured":"Galhardas, H., Florescu, D., Shasha, D., Simon, E., Saita, C.: Declarative data cleaning: Language, model, and algorithms (2001)"},{"key":"14_CR12","doi-asserted-by":"crossref","unstructured":"Zhang, D.Q., Chang, S.F.: Detecting image near-duplicate by stochastic attributed relational graph matching with learning. In: Proceedings of the 12th Annual ACM International Conference on Multimedia, pp. 877\u2013884. ACM (2004)","DOI":"10.1145\/1027527.1027730"},{"key":"14_CR13","doi-asserted-by":"crossref","unstructured":"Bilenko, M., Mooney, R.J.: Adaptive duplicate detection using learnable string similarity measures. In: Proceedings of the Ninth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 39\u201348. ACM (2003)","DOI":"10.1145\/956755.956759"},{"key":"14_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"136","DOI":"10.1007\/978-3-642-23544-3_11","volume-title":"Data Warehousing and Knowledge Discovery","author":"H Galhardas","year":"2011","unstructured":"Galhardas, H., Lopes, A., Santos, E.: Support for user involvement in data cleaning. In: Cuzzocrea, A., Dayal, U. (eds.) DaWaK 2011. LNCS, vol. 6862, pp. 136\u2013151. Springer, Heidelberg (2011)"},{"key":"14_CR15","doi-asserted-by":"crossref","unstructured":"Hern\u00e1ndez, M.A., Stolfo, S.J.: The merge\/purge problem for large databases. In: ACM SIGMOD Record, vol. 24, pp. 127\u2013138. ACM (1995)","DOI":"10.1145\/568271.223807"},{"key":"14_CR16","doi-asserted-by":"crossref","unstructured":"Xiao, C., Wang, W., Lin, X., Yu, J.X.: Efficient similarity joins for near duplicate detection. In: Proceedings of the 17th International Conference on World Wide Web, pp. 131\u2013140. ACM (2008)","DOI":"10.1145\/1367497.1367516"},{"issue":"9","key":"14_CR17","doi-asserted-by":"publisher","first-page":"1537","DOI":"10.1109\/TKDE.2011.127","volume":"24","author":"P Christen","year":"2012","unstructured":"Christen, P.: A survey of indexing techniques for scalable record linkage and deduplication. IEEE Transactions on Knowledge and Data Engineering 24(9), 1537\u20131555 (2012)","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"14_CR18","doi-asserted-by":"crossref","unstructured":"Wang, C., Wang, J., Lin, X., Wang, W., Wang, H., Li, H., Tian, W., Xu, J., Li, R.: Mapdupreducer: detecting near duplicates over massive datasets. In: Proceedings of the 2010 ACM SIGMOD International Conference on Management of data, pp. 1119\u20131122. ACM (2010)","DOI":"10.1145\/1807167.1807296"},{"issue":"12","key":"14_CR19","doi-asserted-by":"publisher","first-page":"1878","DOI":"10.14778\/2367502.2367527","volume":"5","author":"L Kolb","year":"2012","unstructured":"Kolb, L., Thor, A., Rahm, E.: Dedoop: efficient deduplication with hadoop. Proceedings of the VLDB Endowment 5(12), 1878\u20131881 (2012)","journal-title":"Proceedings of the VLDB Endowment"},{"key":"14_CR20","unstructured":"Lwenstein, B.: Benchmarking of Middleware Systems: Evaluating and Comparing the Performance and Scalability of XVSM (MozartSpaces), JavaSpaces (GigaSpaces XAP) and J2EE (JBoss AS). VDM Verlag (2010)"},{"key":"14_CR21","unstructured":"Ferrante, M.: Java frameworks for high-level distributed scientific programming (2010)"},{"key":"14_CR22","doi-asserted-by":"crossref","unstructured":"El-Refaey, M., Rimal, B.P.: Grid, soa and cloud computing: On-demand computing models. Computational and Data Grids: Principles, Applications, and Design, 45 (2012)","DOI":"10.4018\/978-1-61350-113-9.ch003"},{"key":"14_CR23","doi-asserted-by":"crossref","unstructured":"Mohanty, S., Jagadeesh, M., Srivatsa, H.: Extracting value from big data: in-memory solutions, real time analytics, and recommendation systems. In: Big Data Imperatives, pp. 221\u2013250. Springer (2013)","DOI":"10.1007\/978-1-4302-4873-6_8"},{"key":"14_CR24","doi-asserted-by":"crossref","unstructured":"Kathiravelu, P., Veiga, L.: An adaptive distributed simulator for cloud and mapreduce algorithms and architectures. In: 2014 IEEE\/ACM 7th International Conference on Utility and Cloud Computing (UCC), pp. 79\u201388. IEEE (2014)","DOI":"10.1109\/UCC.2014.16"},{"key":"14_CR25","doi-asserted-by":"crossref","unstructured":"Sarnovsky, M., Ulbrik, Z.: Cloud-based clustering of text documents using the ghsom algorithm on the gridgain platform. In: 2013 IEEE 8th International Symposium on Applied Computational Intelligence and Informatics (SACI), pp. 309\u2013313. IEEE (2013)","DOI":"10.1109\/SACI.2013.6608988"}],"container-title":["Lecture Notes in Computer Science","On the Move to Meaningful Internet Systems: OTM 2015 Conferences"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-26148-5_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,31]],"date-time":"2019-05-31T05:52:28Z","timestamp":1559281948000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-26148-5_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319261478","9783319261485"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-26148-5_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2015]]}}}