{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:19:35Z","timestamp":1750220375860,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":27,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,7,11]],"date-time":"2021-07-11T00:00:00Z","timestamp":1625961600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,7,11]]},"DOI":"10.1145\/3404835.3463246","type":"proceedings-article","created":{"date-parts":[[2021,7,12]],"date-time":"2021-07-12T03:08:25Z","timestamp":1626059305000},"page":"2398-2404","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":13,"title":["CopyCat: Near-Duplicates Within and Between the ClueWeb and the Common Crawl"],"prefix":"10.1145","author":[{"given":"Maik","family":"Fr\u00f6be","sequence":"first","affiliation":[{"name":"Martin-Luther-Universit\u00e4t Halle-Wittenberg, Halle, Germany"}]},{"given":"Janek","family":"Bevendorff","sequence":"additional","affiliation":[{"name":"Bauhaus-Universit\u00e4t Weimar, Weimar, Germany"}]},{"given":"Lukas","family":"Gienapp","sequence":"additional","affiliation":[{"name":"Leipzig University, Leipzig, Germany"}]},{"given":"Michael","family":"V\u00f6lske","sequence":"additional","affiliation":[{"name":"Bauhaus-Universit\u00e4t Weimar, Weimar, Germany"}]},{"given":"Benno","family":"Stein","sequence":"additional","affiliation":[{"name":"Bauhaus-Universit\u00e4t Weimar, Weimar, Germany"}]},{"given":"Martin","family":"Potthast","sequence":"additional","affiliation":[{"name":"Leipzig University, Leipzig, Germany"}]},{"given":"Matthias","family":"Hagen","sequence":"additional","affiliation":[{"name":"Martin-Luther-Universit\u00e4t Halle-Wittenberg, Halle, Germany"}]}],"member":"320","published-online":{"date-parts":[[2021,7,11]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1498759.1498837"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/1645953.1646283"},{"key":"e_1_3_2_1_3_1","first-page":"147","article-title":"Near duplicate document detection survey","volume":"2","author":"Alsulami Bassma","year":"2012","unstructured":"Bassma Alsulami, Maysoon Abulkhair, and Fathy Eassa. 2012. Near duplicate document detection survey. International Journal of Computer Science and Communications Networks, Vol. 2, 2 (2012), 147--151.","journal-title":"International Journal of Computer Science and Communications Networks"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3--540--30213--1_6"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/1099554.1099733"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3--319--76941--7_83"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/SEQUEN.1997.666900"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/509907.509965"},{"key":"e_1_3_2_1_9_1","volume-title":"VLDB 2000, Proceedings of 26th International Conference on Very Large Data Bases, September 10--14","author":"Cho Junghoo","year":"2000","unstructured":"Junghoo Cho and Hector Garcia-Molina. 2000. The Evolution of the Web and Implications for an Incremental Crawler. In VLDB 2000, Proceedings of 26th International Conference on Very Large Data Bases, September 10--14, 2000, Cairo, Egypt, Amr El Abbadi, Michael L. Brodie, Sharma Chakravarthy, Umeshwar Dayal, Nabil Kamel, Gunter Schlageter, and Kyu-Young Whang (Eds.). Morgan Kaufmann, 200--209. http:\/\/www.vldb.org\/conf\/2000\/P200.pdf"},{"key":"e_1_3_2_1_10_1","volume-title":"Overview of the TREC 2019 deep learning track. CoRR","volume":"2003","author":"Craswell Nick","year":"2020","unstructured":"Nick Craswell, Bhaskar Mitra, Emine Yilmaz, Daniel Campos, and Ellen M. Voorhees. 2020. Overview of the TREC 2019 deep learning track. CoRR, Vol. abs\/2003.07820 (2020). arxiv: 2003.07820 https:\/\/arxiv.org\/abs\/2003.07820"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/LAWEB.2003.1250280"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/775152.775246"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401212"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3-030--45442--5_2"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/1148170.1148222"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3--540--31849--1_62"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/1718487.1718542"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/1718487.1718535"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331339"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/1242572.1242592"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/988672.988674"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/1367497.1367557"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331399"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/1277741.1277756"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.artmed.2020.101987"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/3--540--45691-0_34"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080721"}],"event":{"name":"SIGIR '21: The 44th International ACM SIGIR Conference on Research and Development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Virtual Event Canada","acronym":"SIGIR '21"},"container-title":["Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3404835.3463246","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3404835.3463246","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:17:44Z","timestamp":1750191464000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3404835.3463246"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,11]]},"references-count":27,"alternative-id":["10.1145\/3404835.3463246","10.1145\/3404835"],"URL":"https:\/\/doi.org\/10.1145\/3404835.3463246","relation":{},"subject":[],"published":{"date-parts":[[2021,7,11]]},"assertion":[{"value":"2021-07-11","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}