{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T21:58:32Z","timestamp":1759960712381,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":19,"publisher":"ACM","license":[{"start":{"date-parts":[[2010,2,4]],"date-time":"2010-02-04T00:00:00Z","timestamp":1265241600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2010,2,4]]},"DOI":"10.1145\/1718487.1718535","type":"proceedings-article","created":{"date-parts":[[2010,8,24]],"date-time":"2010-08-24T13:16:50Z","timestamp":1282655810000},"page":"381-390","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":34,"title":["Learning URL patterns for webpage de-duplication"],"prefix":"10.1145","author":[{"given":"Hema Swetha","family":"Koppula","sequence":"first","affiliation":[{"name":"Yahoo! Labs, Bangalore, India"}]},{"given":"Krishna P.","family":"Leela","sequence":"additional","affiliation":[{"name":"Yahoo! Labs, Bangalore, India"}]},{"given":"Amit","family":"Agarwal","sequence":"additional","affiliation":[{"name":"Picsquare.com, Bangalore, India"}]},{"given":"Krishna Prasad","family":"Chitrapura","sequence":"additional","affiliation":[{"name":"Yahoo! Labs, Bangalore, India"}]},{"given":"Sachin","family":"Garg","sequence":"additional","affiliation":[{"name":"Yahoo! Labs, Bangalore, India"}]},{"given":"Amit","family":"Sasturkar","sequence":"additional","affiliation":[{"name":"Yahoo! Inc., Sunnyvale, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2010,2,4]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Hadoop: Open source implementation of mapreduce. http:\/\/lucene.apache.org\/hadoop\/.  Hadoop: Open source implementation of mapreduce. http:\/\/lucene.apache.org\/hadoop\/."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/775152.775192"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1645953.1646283"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1242572.1242588"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/1526709.1526880"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"T. Berners-Lee L. Masinter and M. McCahill. Uniform resource locators (url) 1994.  T. Berners-Lee L. Masinter and M. McCahill. Uniform resource locators (url) 1994.","DOI":"10.17487\/rfc1738"},{"key":"e_1_3_2_1_7_1","first-page":"21","volume-title":"SEQUENCES '97: Proceedings of the Compression and Complexity of Sequences 1997","author":"Broder A.","year":"1997"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/509907.509965"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1401890.1401917"},{"key":"e_1_3_2_1_10_1","first-page":"10","volume-title":"OSDI'04: Proceedings of the 6th conference on Symposium on Opearting Systems Design & Implementation","author":"Dean J.","year":"2004"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.5555\/951953.952397"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.5555\/262228"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1148170.1148222"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.0307528100"},{"volume-title":"Stanford University","year":"2003","author":"Kamvar S.","key":"e_1_3_2_1_15_1"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/1099554.1099649"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/1242572.1242592"},{"volume-title":"Stanford InfoLab","year":"1999","author":"Page L.","key":"e_1_3_2_1_18_1"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022643204877"}],"event":{"name":"WSDM'10: Third ACM International Conference on Web Search and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"],"location":"New York New York USA","acronym":"WSDM'10"},"container-title":["Proceedings of the third ACM international conference on Web search and data mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1718487.1718535","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/1718487.1718535","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T12:41:24Z","timestamp":1750250484000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1718487.1718535"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,2,4]]},"references-count":19,"alternative-id":["10.1145\/1718487.1718535","10.1145\/1718487"],"URL":"https:\/\/doi.org\/10.1145\/1718487.1718535","relation":{},"subject":[],"published":{"date-parts":[[2010,2,4]]},"assertion":[{"value":"2010-02-04","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}