{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,11]],"date-time":"2025-11-11T15:45:02Z","timestamp":1762875902614,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":11,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,8,7]],"date-time":"2017-08-07T00:00:00Z","timestamp":1502064000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61622208, 61532011, 61672311"],"award-info":[{"award-number":["61622208, 61532011, 61672311"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Key Basic Research Program","award":["2015CB358700"],"award-info":[{"award-number":["2015CB358700"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2017,8,7]]},"DOI":"10.1145\/3077136.3080694","type":"proceedings-article","created":{"date-parts":[[2017,7,28]],"date-time":"2017-07-28T19:35:01Z","timestamp":1501270501000},"page":"1233-1236","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["SogouT-16"],"prefix":"10.1145","author":[{"given":"Cheng","family":"Luo","sequence":"first","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yukun","family":"Zheng","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yiqun","family":"Liu","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaochuan","family":"Wang","sequence":"additional","affiliation":[{"name":"Sogou Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jingfang","family":"Xu","sequence":"additional","affiliation":[{"name":"Sogou Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Min","family":"Zhang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shaoping","family":"Ma","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2017,8,7]]},"reference":[{"volume-title":"The ClueWeb12 Dataset - The Lemur Project","key":"e_1_3_2_1_1_1"},{"volume-title":"http:\/\/www.ccf.org.cn\/sites\/ccf\/xhdtnry.jsp?contentId=2937064120111. (2016). Online","year":"2017","author":"Dataset T","key":"e_1_3_2_1_2_1"},{"volume-title":"Clarke","year":"2011","author":"Akinyemi John A.","key":"e_1_3_2_1_3_1"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0306-4573(02)00084-5"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10791-007-9032-x"},{"volume-title":"Cleverdon and Michael Keen","year":"1966","author":"Cyril","key":"e_1_3_2_1_6_1"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"David Hawking Ellen Voorhees Nick Craswell and Peter Bailey. 1999. Overview of the TREC-8 web track. In TREC. David Hawking Ellen Voorhees Nick Craswell and Peter Bailey. 1999. Overview of the TREC-8 web track. In TREC.","DOI":"10.6028\/NIST.SP.500-246.web-overview"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2109205.2109207"},{"volume-title":"Spam double-funnel: Connecting web spammers with advertisers WWW '07","author":"Wang Yi-Min","key":"e_1_3_2_1_10_1"},{"volume-title":"Continuous word embeddings for detecting local text reuses at the semantic level SIGIR '14","author":"Zhang Qi","key":"e_1_3_2_1_11_1"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-41491-6_22"}],"event":{"name":"SIGIR '17: The 40th International ACM SIGIR conference on research and development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Shinjuku Tokyo Japan","acronym":"SIGIR '17"},"container-title":["Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3077136.3080694","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3077136.3080694","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,24]],"date-time":"2025-06-24T18:42:34Z","timestamp":1750790554000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3077136.3080694"}},"subtitle":["A New Web Corpus to Embrace IR Research"],"short-title":[],"issued":{"date-parts":[[2017,8,7]]},"references-count":11,"alternative-id":["10.1145\/3077136.3080694","10.1145\/3077136"],"URL":"https:\/\/doi.org\/10.1145\/3077136.3080694","relation":{},"subject":[],"published":{"date-parts":[[2017,8,7]]},"assertion":[{"value":"2017-08-07","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}