{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T18:43:31Z","timestamp":1771267411572,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","funder":[{"name":"the Strategic Priority Research Program of the Chinese Academy of Sciences","award":["XDB0680101"],"award-info":[{"award-number":["XDB0680101"]}]},{"name":"the National Key Research and Development Program of China","award":["2023YFA1011602"],"award-info":[{"award-number":["2023YFA1011602"]}]},{"name":"the CAS Project for Young Scientists in Basic Research","award":["YSBR-034"],"award-info":[{"award-number":["YSBR-034"]}]},{"name":"the Xiaomi Young Talents Program&#x5c;&#x2f;Xiaomi Foundation","award":["&#x5c;&#x2f;"],"award-info":[{"award-number":["&#x5c;&#x2f;"]}]},{"name":"the National Natural Science Foundation of China &#x28;NSFC&#x29;","award":["62302486"],"award-info":[{"award-number":["62302486"]}]},{"name":"the Innovation Funding of ICT CAS","award":["E361140"],"award-info":[{"award-number":["E361140"]}]},{"name":"the CAS Special Research Assistant Funding Project","award":["&#x5c;&#x2f;"],"award-info":[{"award-number":["&#x5c;&#x2f;"]}]},{"name":"the project","award":["JCKY2022130C039"],"award-info":[{"award-number":["JCKY2022130C039"]}]},{"name":"the Strategic Priority Research Program of the CAS","award":["XDB0680102"],"award-info":[{"award-number":["XDB0680102"]}]},{"name":"the NSFC","award":["62441229"],"award-info":[{"award-number":["62441229"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,2,22]]},"DOI":"10.1145\/3773966.3777988","type":"proceedings-article","created":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T17:50:01Z","timestamp":1771264201000},"page":"238-248","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["How Do LLM-Generated Texts Impact Term-Based Retrieval Models?"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-1556-8198","authenticated-orcid":false,"given":"Wei","family":"Huang","sequence":"first","affiliation":[{"name":"State Key Laboratory of AI Safety, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5123-4999","authenticated-orcid":false,"given":"Keping","family":"Bi","sequence":"additional","affiliation":[{"name":"State Key Laboratory of AI Safety, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7869-8213","authenticated-orcid":false,"given":"Yinqiong","family":"Cai","sequence":"additional","affiliation":[{"name":"Baidu Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7438-5180","authenticated-orcid":false,"given":"Wei","family":"Chen","sequence":"additional","affiliation":[{"name":"State Key Laboratory of AI Safety, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9509-8674","authenticated-orcid":false,"given":"Jiafeng","family":"Guo","sequence":"additional","affiliation":[{"name":"State Key Laboratory of AI Safety, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5201-8195","authenticated-orcid":false,"given":"Xueqi","family":"Cheng","sequence":"additional","affiliation":[{"name":"State Key Laboratory of AI Safety, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,2,21]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia Leoni Aleman Diogo Almeida Janko Altenschmidt Sam Altman Shyamal Anadkat et al. 2024. GPT-4 Technical Report. arXiv:2303.08774 [cs] doi:10.48550\/arXiv.2303.08774"},{"key":"e_1_3_2_1_2_1","first-page":"143","article-title":"Zipf's law and the Internet","volume":"3","author":"Adamic Lada A","year":"2002","unstructured":"Lada A Adamic and Bernardo A Huberman. 2002. Zipf's law and the Internet. Glottometrics, Vol. 3, 1 (2002), 143-150.","journal-title":"Glottometrics"},{"key":"e_1_3_2_1_3_1","volume-title":"Lexical coverage of spoken discourse. Applied linguistics","author":"Adolphs Svenja","year":"2003","unstructured":"Svenja Adolphs and Norbert Schmitt. 2003. Lexical coverage of spoken discourse. Applied linguistics, Vol. 24, 4 (2003), 425-438."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/582415.582416"},{"key":"e_1_3_2_1_5_1","volume-title":"The Twelfth International Conference on Learning Representations.","author":"Asai Akari","year":"2023","unstructured":"Akari Asai, Zeqiu Wu, Yizhong Wang, Avirup Sil, and Hannaneh Hajishirzi. 2023. Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_6_1","first-page":"186","article-title":"Inherent Bias in Large Language Models: A Random Sampling Analysis","volume":"2","author":"Ayoub Noel F","year":"2024","unstructured":"Noel F Ayoub, Karthik Balakrishnan, Marc S Ayoub, Thomas F Barrett, Abel P David, and Stacey T Gray. 2024. Inherent Bias in Large Language Models: A Random Sampling Analysis. Mayo Clinic Proceedings: Digital Health, Vol. 2, 2 (2024), 186-191.","journal-title":"Mayo Clinic Proceedings: Digital Health"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1611.09268"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.5555\/3294996.3295184"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1835449.1835490"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.207"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.421"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671882"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3358168"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","unstructured":"Thomas Diggelmann Jordan Boyd-Graber Jannis Bulian Massimiliano Ciaramita and Markus Leippold. 2021. CLIMATE-FEVER: A Dataset for Verification of Real-World Climate Claims. arXiv:2012.00614 [cs] doi:10.48550\/arXiv.2012.00614","DOI":"10.48550\/arXiv.2012.00614"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","unstructured":"Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Amy Yang Angela Fan et al. 2024. The Llama 3 Herd of Models. arXiv:2407.21783 [cs] doi:10.48550\/arXiv.2407.21783","DOI":"10.48550\/arXiv.2407.21783"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/1008992.1009004"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1076\/jqul.8.3.165.4101"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.0335980100"},{"key":"e_1_3_2_1_19_1","volume-title":"ICML 2024 Next Generation of AI Safety Workshop.","author":"Geng Mingmeng","year":"2024","unstructured":"Mingmeng Geng and Roberto Trotta. 2024. Is ChatGPT Transforming Academics' Writing Style?. In ICML 2024 Next Generation of AI Safety Workshop."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","unstructured":"Biyang Guo Xin Zhang Ziyuan Wang Minqi Jiang Jinran Nie Yuxuan Ding Jianwei Yue and Yupeng Wu. 2023. How Close Is ChatGPT to Human Experts? Comparison Corpus Evaluation and Detection. arXiv:2301.07597 [cs] doi:10.48550\/arXiv.2301.07597","DOI":"10.48550\/arXiv.2301.07597"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-45442-5_21"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3703155"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1037\/h0093508"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00276"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.2196\/48904"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3184558.3192301"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/219717.219748"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-024-10903-2"},{"key":"e_1_3_2_1_29_1","volume-title":"How large a vocabulary is needed for reading and listening? Canadian modern language review","author":"Nation I","year":"2006","unstructured":"I Nation. 2006. How large a vocabulary is needed for reading and listening? Canadian modern language review, Vol. 63, 1 (2006), 59-82."},{"key":"e_1_3_2_1_30_1","volume-title":"Pareto distributions and Zipf's law. Contemporary physics","author":"Newman Mark EJ","year":"2005","unstructured":"Mark EJ Newman. 2005. Power laws, Pareto distributions and Zipf's law. Contemporary physics, Vol. 46, 5 (2005), 323-351."},{"key":"e_1_3_2_1_31_1","first-page":"27730","volume-title":"Oh (Eds.)","volume":"35","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida, Carroll Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, Alex Ray, John Schulman, Jacob Hilton, Fraser Kelton, Luke Miller, Maddie Simens, Amanda Askell, Peter Welinder, Paul F Christiano, Jan Leike, and Ryan Lowe. 2022. Training Language Models to Follow Instructions with Human Feedback. In Advances in Neural Information Processing Systems, S. Koyejo, S. Mohamed, A. Agarwal, D. Belgrave, K. Cho, and A. Oh (Eds.), Vol. 35. Curran Associates, Inc., 27730-27744."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/290941.291008"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000019"},{"key":"e_1_3_2_1_34_1","first-page":"109","article-title":"Okapi at TREC-3","volume":"109","author":"Robertson Stephen E","year":"1995","unstructured":"Stephen E Robertson, Steve Walker, Susan Jones, Micheline M Hancock-Beaulieu, Mike Gatford, et al., 1995. Okapi at TREC-3. Nist Special Publication Sp, Vol. 109 (1995), 109.","journal-title":"Nist Special Publication Sp"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/361219.361220"},{"key":"e_1_3_2_1_36_1","unstructured":"Nina Schick. 2020. Deep fakes and the infocalypse: What you urgently need to know. Hachette UK."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2306.04537"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1108\/eb026526"},{"key":"e_1_3_2_1_39_1","volume-title":"Certain Language Skills in Children: Their Development and Interrelationships (ned - new edition ed.)","author":"Templin Mildred C.","unstructured":"Mildred C. Templin. 1957. Certain Language Skills in Children: Their Development and Interrelationships (ned - new edition ed.). Vol. 26. University of Minnesota Press. showeprint[jstor]10.5749\/j.ctttv2st"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1074"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. arXiv:2307.09288 [cs] doi:10.48550\/arXiv.2307.09288","DOI":"10.48550\/arXiv.2307.09288"},{"key":"e_1_3_2_1_42_1","volume-title":"Perplexity Trap: PLM-Based Retrievers Overrate Low Perplexity Documents. In The Thirteenth International Conference on Learning Representations.","author":"Wang Haoyu","year":"2024","unstructured":"Haoyu Wang, Sunhao Dai, Haiyuan Zhao, Liang Pang, Xiao Zhang, Gang Wang, Zhenhua Dong, Jun Xu, and Ji-Rong Wen. 2024a. Perplexity Trap: PLM-Based Retrievers Overrate Low Perplexity Documents. In The Thirteenth International Conference on Learning Representations."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","unstructured":"Zhenhua Wang Guang Xu and Ming Ren. 2024b. LLM-Generated Natural Language Meets Scaling Laws: New Explorations and Data Augmentation Methods. arXiv:2407.00322 [cs] doi:10.48550\/arXiv.2407.00322","DOI":"10.48550\/arXiv.2407.00322"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-acl.263"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657750"},{"key":"e_1_3_2_1_46_1","first-page":"125","article-title":"Detection of AI-generated essays in writing assessments","volume":"65","author":"Yan Duanli","year":"2023","unstructured":"Duanli Yan, Michael Fauss, Jiangang Hao, and Wenju Cui. 2023. Detection of AI-generated essays in writing assessments. Psychological Test and Assessment Modeling, Vol. 65, 1 (2023), 125-144.","journal-title":"Psychological Test and Assessment Modeling"},{"key":"e_1_3_2_1_47_1","unstructured":"An Yang Baosong Yang Binyuan Hui Bo Zheng Bowen Yu Chang Zhou Chengpeng Li Chengyuan Li Dayiheng Liu Fei Huang Guanting Dong Haoran Wei Huan Lin Jialong Tang Jialin Wang Jian Yang Jianhong Tu Jianwei Zhang Jianxin Ma Jianxin Yang Jin Xu Jingren Zhou Jinze Bai Jinzheng He Junyang Lin Kai Dang Keming Lu Keqin Chen Kexin Yang Mei Li Mingfeng Xue Na Ni Pei Zhang Peng Wang Ru Peng Rui Men Ruize Gao Runji Lin Shijie Wang Shuai Bai Sinan Tan Tianhang Zhu Tianhao Li Tianyu Liu Wenbin Ge Xiaodong Deng Xiaohuan Zhou Xingzhang Ren Xinyu Zhang Xipin Wei Xuancheng Ren Xuejing Liu Yang Fan Yang Yao Yichang Zhang Yu Wan Yunfei Chu Yuqiong Liu Zeyu Cui Zhenru Zhang Zhifang Guo and Zhihao Fan. 2024. Qwen2 Technical Report. arXiv:2407.10671 [cs] doi:10.48550\/arXiv.2407.10671"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1259"},{"key":"e_1_3_2_1_49_1","volume-title":"Human Behavior and the Principle of Least Effort","author":"Zipf George Kingsley","unstructured":"George Kingsley Zipf. 1949. Human Behavior and the Principle of Least Effort. Addison-Wesley Press, Oxford, England. xi, 573 pages."}],"event":{"name":"WSDM '26:The Nineteenth ACM International Conference on Web Search and Data Mining","location":"Boise ID USA","sponsor":["SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGIR ACM Special Interest Group on Information Retrieval","SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the Nineteenth ACM International Conference on Web Search and Data Mining"],"original-title":[],"deposited":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T17:56:00Z","timestamp":1771264560000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3773966.3777988"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,21]]},"references-count":49,"alternative-id":["10.1145\/3773966.3777988","10.1145\/3773966"],"URL":"https:\/\/doi.org\/10.1145\/3773966.3777988","relation":{},"subject":[],"published":{"date-parts":[[2026,2,21]]},"assertion":[{"value":"2026-02-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}