{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,28]],"date-time":"2025-07-28T21:49:15Z","timestamp":1753739355679,"version":"3.40.3"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031469930"},{"type":"electronic","value":"9783031469947"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-46994-7_15","type":"book-chapter","created":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T07:02:16Z","timestamp":1698303736000},"page":"171-184","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Minwise-Independent Permutations with\u00a0Insertion and\u00a0Deletion of\u00a0Features"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8824-6202","authenticated-orcid":false,"given":"Rameshwar","family":"Pratap","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Raghav","family":"Kulkarni","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"15_CR1","doi-asserted-by":"crossref","unstructured":"Bayardo, R.J., Ma, Y., Srikant, R.: Scaling up all pairs similarity search. In: Proceedings of the 16th International Conference on World Wide Web, WWW 2007, pp. 131\u2013140. Association for Computing Machinery , New York, NY, USA (2007)","DOI":"10.1145\/1242572.1242591"},{"key":"15_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"143","DOI":"10.1007\/978-3-319-42634-1_12","volume-title":"Computing and Combinatorics","author":"D Bera","year":"2016","unstructured":"Bera, D., Pratap, R.: Frequent-itemset mining using locality-sensitive hashing. In: Dinh, T.N., Thai, M.T. (eds.) COCOON 2016. LNCS, vol. 9797, pp. 143\u2013155. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-42634-1_12"},{"key":"15_CR3","unstructured":"Broder, A.Z.: On the resemblance and containment of documents. In: . Proceedings of Compression and Complexity of Sequences 1997, pp. 21\u201329. IEEE (1997)"},{"key":"15_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/3-540-45123-4_1","volume-title":"Combinatorial Pattern Matching","author":"AZ Broder","year":"2000","unstructured":"Broder, A.Z.: Identifying and filtering near-duplicate documents. In: Giancarlo, R., Sankoff, D. (eds.) CPM 2000. LNCS, vol. 1848, pp. 1\u201310. Springer, Heidelberg (2000). https:\/\/doi.org\/10.1007\/3-540-45123-4_1"},{"key":"15_CR5","doi-asserted-by":"crossref","unstructured":"Broder, A.Z., Charikar, M., Frieze, A.M., Mitzenmacher, M.: Min-wise independent permutations (extended abstract). In: Proceedings of the Thirtieth Annual ACM Symposium on Theory of Computing, STOC 1998, pp. 327\u2013336. Association for Computing Machinery, New York, NY, USA (1998)","DOI":"10.1145\/276698.276781"},{"key":"15_CR6","unstructured":"Broder, A.Z., Glassman, S.C., Nelson, C.G., Manasse, M.S., Zweig, G.G.: Method for clustering closely resembling data objects, September 12 2000. US Patent 6,119,124"},{"key":"15_CR7","doi-asserted-by":"crossref","unstructured":"Christiani, T., Pagh, R.: Set similarity search beyond minhash. In: Proceedings of the 49th Annual ACM SIGACT Symposium on Theory of Computing, STOC 2017, pp. 1094\u20131107. Association for Computing Machinery, New York, NY, USA, (2017)","DOI":"10.1145\/3055399.3055443"},{"key":"15_CR8","doi-asserted-by":"crossref","unstructured":"Christiani, T., Pagh, R., Sivertsen, J.: Scalable and robust set similarity join. In: 34th IEEE International Conference on Data Engineering, ICDE 2018, Paris, France, April 16\u201319, 2018, pp. 1240\u20131243. IEEE Computer Society (2018)","DOI":"10.1109\/ICDE.2018.00120"},{"key":"15_CR9","doi-asserted-by":"crossref","unstructured":"Chum, O., Philbin, J., Zisserman, A.: Near duplicate image detection: min-hash and TF-IDF weighting. In: Everingham, M., Needham, C.J., Fraile, R. (Eds.), Proceedings of the British Machine Vision Conference 2008, Leeds, UK, September 2008, pp. 1\u201310. British Machine Vision Association (2008)","DOI":"10.5244\/C.22.50"},{"key":"15_CR10","volume-title":"Introduction to Algorithms","author":"TH Cormen","year":"2009","unstructured":"Cormen, T.H., Leiserson, C.E., Rivest, R.L., Stein, C.: Introduction to Algorithms, 3rd edn. MIT Press, Cambridge (2009)","edition":"3"},{"key":"15_CR11","doi-asserted-by":"crossref","unstructured":"Das, A.S., Datar, M., Garg, A., Rajaram, S.: Google news personalization: scalable online collaborative filtering. In WWW 2007: Proceedings of the 16th international conference on World Wide Web, pp. 271\u2013280. ACM, New York, NY, USA (2007)","DOI":"10.1145\/1242572.1242610"},{"key":"15_CR12","doi-asserted-by":"crossref","unstructured":"Henzinger, M.: Finding near-duplicate web pages: a large-scale evaluation of algorithms. In: Proceedings of the 29th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR 2006, pp. 284\u2013291. Association for Computing Machinery, New York, NY, USA (2006)","DOI":"10.1145\/1148170.1148222"},{"key":"15_CR13","doi-asserted-by":"crossref","unstructured":"Indyk, P., Motwani, R.: Approximate nearest neighbors: towards removing the curse of dimensionality. In: Proceedings of the Thirtieth Annual ACM Symposium on the Theory of Computing, Dallas, Texas, USA, May 23\u201326, 1998, pp. 604\u2013613 (1998)","DOI":"10.1145\/276698.276876"},{"issue":"8","key":"15_CR14","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1145\/1978542.1978566","volume":"54","author":"P Li","year":"2011","unstructured":"Li, P., K\u00f6nig, A.C.: Theory and applications of b-bit minwise hashing. Commun. ACM 54(8), 101\u2013109 (2011)","journal-title":"Commun. ACM"},{"key":"15_CR15","unstructured":"Li, P., Owen, A.B., Zhang, C.-H.: One permutation hashing. In: Bartlett, P.L., Pereira, F.C.N., Burges, L\u00e9on Bottou, C.J.C., Weinberger, K.Q., (Eds.), Advances in Neural Information Processing Systems 25: 26th Annual Conference on Neural Information Processing Systems 2012. Proceedings of a meeting held December 3\u20136, 2012, Lake Tahoe, Nevada, United States, pp. 3122\u20133130 (2012)"},{"key":"15_CR16","doi-asserted-by":"crossref","unstructured":"Li, P., Shrivastava, A., K\u00f6nig, A.C.: B-bit minwise hashing in practice. In: Proceedings of the 5th Asia-Pacific Symposium on Internetware, Internetware 2013, New York, NY, USA. Association for Computing Machinery (2013)","DOI":"10.1145\/2532443.2532446"},{"key":"15_CR17","unstructured":"Lichman, M.: UCI machine learning repository (2013)"},{"key":"15_CR18","doi-asserted-by":"crossref","unstructured":"Singh Manku, G., Jain, A., Sarma, A.D.: Detecting near-duplicates for web crawling. In: Proceedings of the 16th International Conference on World Wide Web, WWW 2007, pp. 141\u2013150. Association for Computing Machinery, New York, NY, USA (2007)","DOI":"10.1145\/1242572.1242592"},{"key":"15_CR19","doi-asserted-by":"crossref","unstructured":"McCauley, S., Mikkelsen, J.W., Pagh, R.: Set similarity search for skewed data. In Proceedings of the 37th ACM SIGMOD-SIGACT-SIGAI Symposium on Principles of Database Systems, SIGMOD\/PODS 2018, pap.63\u201374, New York, NY, USA, 2018. Association for Computing Machinery (2018)","DOI":"10.1145\/3196959.3196985"},{"key":"15_CR20","doi-asserted-by":"crossref","unstructured":"Mitzenmacher, M., Pagh, R. Pham, ,N.: Efficient estimation for high similarities using odd sketches. In: Proceedings of the 23rd International Conference on World Wide Web, WWW 2014, p\u2013118. Association for Computing Machinery, New York, NY, USA, 2014","DOI":"10.1145\/2566486.2568017"},{"key":"15_CR21","doi-asserted-by":"crossref","unstructured":"Pratap,R ., Kulkarni, R.: Minwise-independent permutations with insertion and deletion of features. arxiv.org\/abs\/2308.11240 (2023)","DOI":"10.1007\/978-3-031-46994-7_15"},{"key":"15_CR22","unstructured":"Shrivastava, A., Li, P.: Improved densification of one permutation hashing. In: Proceedings of the Thirtieth Conference On Uncertainty In Artificial Intelligence, UAI 2014, pp. 732\u2013741. AUAI Press, Arlington, Virginia, USA, (2014)"},{"issue":"14","key":"15_CR23","doi-asserted-by":"publisher","first-page":"1930","DOI":"10.14778\/2556549.2556574","volume":"6","author":"N Sundaram","year":"2013","unstructured":"Sundaram, N., et al.: Streaming similarity search over one billion tweets using parallel locality-sensitive hashing. Proc. VLDB Endow. 6(14), 1930\u20131941 (2013)","journal-title":"Proc. VLDB Endow."}],"container-title":["Lecture Notes in Computer Science","Similarity Search and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-46994-7_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,24]],"date-time":"2023-12-24T19:31:06Z","timestamp":1703446266000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-46994-7_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031469930","9783031469947"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-46994-7_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"27 October 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SISAP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Similarity Search and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Coruna","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Spain","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 October 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 October 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"sisap2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.sisap.org\/2023\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"33","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"16","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"48% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Familiarity, LaTeX and LNCS friendly, steering committee has subscription","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}