{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T15:54:06Z","timestamp":1780588446192,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":61,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61902209, 62377044"],"award-info":[{"award-number":["61902209, 62377044"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,10]]},"DOI":"10.1145\/3746252.3761158","type":"proceedings-article","created":{"date-parts":[[2025,11,8]],"date-time":"2025-11-08T00:36:36Z","timestamp":1762562196000},"page":"3133-3143","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["CLUE: Using Large Language Models for Judging Document Usefulness in Web Search Evaluation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-9883-0012","authenticated-orcid":false,"given":"Xingzhu","family":"Wang","sequence":"first","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2143-2626","authenticated-orcid":false,"given":"Erhan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6135-2604","authenticated-orcid":false,"given":"Yiqun","family":"Chen","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7753-2592","authenticated-orcid":false,"given":"Jinghan","family":"Xuan","sequence":"additional","affiliation":[{"name":"School of Statistics, Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-1532-743X","authenticated-orcid":false,"given":"Yucheng","family":"Hou","sequence":"additional","affiliation":[{"name":"Baidu Inc., Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0872-8024","authenticated-orcid":false,"given":"Yitong","family":"Xu","sequence":"additional","affiliation":[{"name":"Baidu Inc., Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9585-8335","authenticated-orcid":false,"given":"Ying","family":"Nie","sequence":"additional","affiliation":[{"name":"Baidu Inc., Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9212-1947","authenticated-orcid":false,"given":"Shuaiqiang","family":"Wang","sequence":"additional","affiliation":[{"name":"Baidu Inc., Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0684-6205","authenticated-orcid":false,"given":"Dawei","family":"Yin","sequence":"additional","affiliation":[{"name":"Baidu Inc., Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9257-5498","authenticated-orcid":false,"given":"Jiaxin","family":"Mao","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,11,10]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Can We Use Large Language Models to Fill Relevance Judgment Holes? arXiv preprint arXiv:2405.05600","author":"Abbasiantaeb Zahra","year":"2024","unstructured":"Zahra Abbasiantaeb, Chuan Meng, Leif Azzopardi, and Mohammad Aliannejadi. 2024. Can We Use Large Language Models to Fill Relevance Judgment Holes? arXiv preprint arXiv:2405.05600 (2024)."},{"key":"e_1_3_2_2_2_1","volume-title":"Clareci Silva Cardoso, and Waleska Teixeira Caiaffa.","author":"Silva Abreu Mery Natali","year":"2008","unstructured":"Mery Natali Silva Abreu, Arminda Lucia Siqueira, Clareci Silva Cardoso, and Waleska Teixeira Caiaffa. 2008. Ordinal logistic regression models: application in quality of life studies. Cadernos de Sa\u00fade P\u00fablica 24 (2008), s581--s591."},{"key":"e_1_3_2_2_3_1","volume-title":"Categorical Data Analysis","author":"Agresti Alan","unstructured":"Alan Agresti. 2003. Categorical Data Analysis, Second Edition. Categorical Data Analysis, Second Edition."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1277741.1277902"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173618"},{"key":"e_1_3_2_2_6_1","volume-title":"Seventeenth International Conference on Machine Learning.","author":"Allwein Erin L.","year":"2000","unstructured":"Erin L. Allwein, Robert E. Schapire, and Yoram Singer. 2000. Reducing Multiclass to Binary: A Unifying Approach for Margin Classifiers. In Seventeenth International Conference on Machine Learning."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2766462.2767854"},{"key":"e_1_3_2_2_8_1","volume-title":"Proceedings of the SIGIR 2009 Workshop on the Future of IR Evaluation. 7--8.","author":"Belkin Nicholas J","year":"2009","unstructured":"Nicholas J Belkin, Michael Cole, and Jingjing Liu. 2009. A model for evaluation of interactive information retrieval. In Proceedings of the SIGIR 2009 Workshop on the Future of IR Evaluation. 7--8."},{"key":"e_1_3_2_2_9_1","unstructured":"Christopher Bishop. 2006. Pattern Recognition and Machine Learning. Stat Sci."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080804"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2008.4633963"},{"key":"e_1_3_2_2_12_1","volume-title":"Qlora: Efficient finetuning of quantized llms. , 10088--10115 pages.","author":"Dettmers Tim","year":"2023","unstructured":"Tim Dettmers, Artidoro Pagnoni, Ari Holtzman, and Luke Zettlemoyer. 2023. Qlora: Efficient finetuning of quantized llms. , 10088--10115 pages."},{"key":"e_1_3_2_2_13_1","volume-title":"LLM-Driven Usefulness Judgment for Web Search Evaluation. arXiv preprint arXiv:2504.14401","author":"Dewan Mouly","year":"2025","unstructured":"Mouly Dewan, Jiqun Liu, Aditya Gautam, and Chirag Shah. 2025. LLM-Driven Usefulness Judgment for Web Search Evaluation. arXiv preprint arXiv:2504.14401 (2025)."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3726302.3730223"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3578337.3605136"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-44795-4_13"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343413.3377961"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2015.2457911"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"crossref","unstructured":"Pedro Antonio Guti\u00e9rrez Mar\u00eda P\u00e9rez-Ortiz F. Fern\u00e1ndez-Navarro Javier S\u00e1nchez-Monedero and Cesar Mart\u00ednez. 2012. An Experimental Study of Different Ordinal Regression Methods and Measures. 296--307. https:\/\/doi.org\/10. 1007\/978--3--642--28931--6_29","DOI":"10.1007\/978-3-642-28931-6_29"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2348283.2348323"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/1718487.1718515"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-56060-6_24"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/1277741.1277839"},{"key":"e_1_3_2_2_24_1","volume-title":"Few-shot learning with retrieval augmented language models. arXiv preprint arXiv:2208.03299 1, 2","author":"Izacard Gautier","year":"2022","unstructured":"Gautier Izacard, Patrick Lewis, Maria Lomeli, Lucas Hosseini, Fabio Petroni, Timo Schick, Jane Dwivedi-Yu, Armand Joulin, Sebastian Riedel, and Edouard Grave. 2022. Few-shot learning with retrieval augmented language models. arXiv preprint arXiv:2208.03299 1, 2 (2022), 4."},{"key":"e_1_3_2_2_25_1","volume-title":"Llm-blender: Ensembling large language models with pairwise ranking and generative fusion. arXiv preprint arXiv:2306.02561","author":"Jiang Dongfu","year":"2023","unstructured":"Dongfu Jiang, Xiang Ren, and Bill Yuchen Lin. 2023. Llm-blender: Ensembling large language models with pairwise ranking and generative fusion. arXiv preprint arXiv:2306.02561 (2023)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/2684822.2685319"},{"key":"e_1_3_2_2_27_1","unstructured":"He Jing and Li Xiaoming. 2012. Search Engine Effectiveness Evaluation: Methods and Techniques Based on User Click Log Analysis."},{"key":"e_1_3_2_2_28_1","volume-title":"Effects of rank and precision of search results on users' evaluations of system performance","author":"Kelly Diane","unstructured":"Diane Kelly, Xin Fu, and Chirag Shah. 2007. Effects of rank and precision of search results on users' evaluations of system performance. University of North Carolina (2007)."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657798"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cor.2011.06.023"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401085"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330981"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3592032"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080750"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2911507"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.2517-6161.1980.tb01109.x"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2505515.2507665"},{"key":"e_1_3_2_2_38_1","volume-title":"Rankvicuna: Zero-shot listwise document reranking with open-source large language models. arXiv preprint arXiv:2309.15088","author":"Pradeep Ronak","year":"2023","unstructured":"Ronak Pradeep, Sahel Sharifymoghaddam, and Jimmy Lin. 2023. Rankvicuna: Zero-shot listwise document reranking with open-source large language models. arXiv preprint arXiv:2309.15088 (2023)."},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"crossref","unstructured":"Zhen Qin Rolf Jagerman Kai Hui Honglei Zhuang Junru Wu Le Yan Jiaming Shen Tianqi Liu Jialu Liu Donald Metzler et al. 2023. Large language models are effective text rankers with pairwise ranking prompting. arXiv preprint arXiv:2306.17563 (2023).","DOI":"10.18653\/v1\/2024.findings-naacl.97"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/1835449.1835542"},{"key":"e_1_3_2_2_41_1","unstructured":"Tefko Saracevic. 2022. The Notion of Relevance in Information Science: Everybody knows what relevance is. But what is it really? Springer Nature."},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1097-4571(198805)39:3<161::AID-ASI2>3.0.CO;2-0"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:STCO.0000035301.49549.88"},{"key":"e_1_3_2_2_44_1","volume-title":"Is ChatGPT good at search? investigating large language models as re-ranking agents. arXiv preprint arXiv:2304.09542","author":"Sun Weiwei","year":"2023","unstructured":"Weiwei Sun, Lingyong Yan, Xinyu Ma, Shuaiqiang Wang, Pengjie Ren, Zhumin Chen, Dawei Yin, and Zhaochun Ren. 2023. Is ChatGPT good at search? investigating large language models as re-ranking agents. arXiv preprint arXiv:2304.09542 (2023)."},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657707"},{"key":"e_1_3_2_2_46_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_2_47_1","volume-title":"Jacobijn AC Sandberg, et al","author":"Van Someren Maarten W","year":"1994","unstructured":"Maarten W Van Someren, Yvonne F Barnard, Jacobijn AC Sandberg, et al. 1994. The think aloud method: a practical approach to modelling cognitive processes. London: AcademicPress 11, 6 (1994)."},{"key":"e_1_3_2_2_48_1","first-page":"47","article-title":"An ensemble of weighted support vector machines for ordinal regression","volume":"3","author":"Waegeman Willem","year":"2009","unstructured":"Willem Waegeman, Luc Boullart, et al. 2009. An ensemble of weighted support vector machines for ordinal regression. International Journal of Computer Systems Science and Engineering 3, 1 (2009), 47--51.","journal-title":"International Journal of Computer Systems Science and Engineering"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/2600428.2609607"},{"key":"e_1_3_2_2_50_1","volume-title":"A user-centric benchmark for evaluating large language models. CoRR","author":"Wang Jiayin","year":"2024","unstructured":"Jiayin Wang, Fengran Mo, Weizhi Ma, Peijie Sun, Min Zhang, and Jian-Yun Nie. 2024. A user-centric benchmark for evaluating large language models. CoRR (2024)."},{"key":"e_1_3_2_2_51_1","unstructured":"Jason Wei Yi Tay Rishi Bommasani Colin Raffel Barret Zoph Sebastian Borgeaud Dani Yogatama Maarten Bosma Denny Zhou Donald Metzler et al. 2022. Emergent abilities of large language models. arXiv preprint arXiv:2206.07682 (2022)."},{"key":"e_1_3_2_2_52_1","volume-title":"Denny Zhou, et al.","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le, Denny Zhou, et al. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems 35, 24824--24837."},{"key":"e_1_3_2_2_53_1","volume-title":"Witten and Eibe Frank","author":"Ian","year":"2005","unstructured":"Ian H. Witten and Eibe Frank. 2005. Data Mining: Practical Machine Learning Tools and Techniques (Third Edition). Data Mining: Practical Machine Learning Tools and Techniques (Third Edition)."},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"crossref","unstructured":"Hong Wu Hanqing Lu and Songde Ma. 2003. A practical SVM-based algorithm for ordinal regression in image retrieval. (2003) 612--621.","DOI":"10.1145\/957013.957144"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/1571941.1572073"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/2661829.2661953"},{"key":"e_1_3_2_2_57_1","volume-title":"Usimagent: Large language models for simulating search users, 2687--2692 pages.","author":"Zhang Erhan","year":"2024","unstructured":"Erhan Zhang, Xingzhu Wang, Peiyuan Gong, Yankai Lin, and Jiaxin Mao. 2024. Usimagent: Large language models for simulating search users, 2687--2692 pages."},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401162"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657784"},{"key":"e_1_3_2_2_60_1","volume-title":"ERNIE: Enhanced language representation with informative entities. arXiv preprint arXiv:1905.07129.","author":"Zhang Zhengyan","year":"2019","unstructured":"Zhengyan Zhang, Xu Han, Zhiyuan Liu, Xin Jiang, Maosong Sun, and Qun Liu. 2019. ERNIE: Enhanced language representation with informative entities. arXiv preprint arXiv:1905.07129."},{"key":"e_1_3_2_2_61_1","volume-title":"Beyond yes and no: Improving zero-shot llm rankers via scoring fine-grained relevance labels. arXiv preprint arXiv:2310.14122","author":"Zhuang Honglei","year":"2023","unstructured":"Honglei Zhuang, Zhen Qin, Kai Hui, Junru Wu, Le Yan, Xuanhui Wang, and Michael Bendersky. 2023. Beyond yes and no: Improving zero-shot llm rankers via scoring fine-grained relevance labels. arXiv preprint arXiv:2310.14122 (2023)."}],"event":{"name":"CIKM '25: The 34th ACM International Conference on Information and Knowledge Management","location":"Seoul Republic of Korea","acronym":"CIKM '25","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the 34th ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746252.3761158","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T02:09:11Z","timestamp":1765505351000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746252.3761158"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,10]]},"references-count":61,"alternative-id":["10.1145\/3746252.3761158","10.1145\/3746252"],"URL":"https:\/\/doi.org\/10.1145\/3746252.3761158","relation":{},"subject":[],"published":{"date-parts":[[2025,11,10]]},"assertion":[{"value":"2025-11-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}