{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T11:47:59Z","timestamp":1774352879484,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":20,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,10]],"date-time":"2024-07-10T00:00:00Z","timestamp":1720569600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"EPSRC Fellowship: Task Based Information Retrieval","award":["EP\/P024289\/1"],"award-info":[{"award-number":["EP\/P024289\/1"]}]},{"name":"EPSRC The Engineering and Physical Sciences Research Council","award":["EP\/S021566\/1"],"award-info":[{"award-number":["EP\/S021566\/1"]}]},{"name":"CAMEO, PRIN 2022","award":["2022ZLL7MW"],"award-info":[{"award-number":["2022ZLL7MW"]}]},{"name":"Huawei Finland, University of Amsterdam, and Vrije Universiteit Amsterdam.","award":["Dreams Lab"],"award-info":[{"award-number":["Dreams Lab"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,10]]},"DOI":"10.1145\/3626772.3657992","type":"proceedings-article","created":{"date-parts":[[2024,7,11]],"date-time":"2024-07-11T12:40:05Z","timestamp":1720701605000},"page":"3040-3043","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":20,"title":["LLM4Eval: Large Language Model for Evaluation in IR"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2779-4942","authenticated-orcid":false,"given":"Hossein A.","family":"Rahmani","sequence":"first","affiliation":[{"name":"University College London, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5301-4244","authenticated-orcid":false,"given":"Clemencia","family":"Siro","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9447-4172","authenticated-orcid":false,"given":"Mohammad","family":"Aliannejadi","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amstedam, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9351-8137","authenticated-orcid":false,"given":"Nick","family":"Craswell","sequence":"additional","affiliation":[{"name":"Microsoft, Seattle, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8178-9194","authenticated-orcid":false,"given":"Charles L. A.","family":"Clarke","sequence":"additional","affiliation":[{"name":"University of Waterloo, Waterloo, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5070-2049","authenticated-orcid":false,"given":"Guglielmo","family":"Faggioli","sequence":"additional","affiliation":[{"name":"University of Padua, Padua, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5270-5550","authenticated-orcid":false,"given":"Bhaskar","family":"Mitra","sequence":"additional","affiliation":[{"name":"Microsoft, Montreal, QC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2425-3136","authenticated-orcid":false,"given":"Paul","family":"Thomas","sequence":"additional","affiliation":[{"name":"Microsoft, Adelaide, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4734-4532","authenticated-orcid":false,"given":"Emine","family":"Yilmaz","sequence":"additional","affiliation":[{"name":"University College London &amp; Amazon, London, United Kingdom"}]}],"member":"320","published-online":{"date-parts":[[2024,7,11]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval. 3460--3463","author":"B\u00e9n\u00e9dict Garbiel","year":"2023","unstructured":"Garbiel B\u00e9n\u00e9dict, Ruqing Zhang, and Donald Metzler. 2023. Gen-ir@ sigir 2023: The first workshop on generative information retrieval. In Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval. 3460--3463."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.870"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.599"},{"key":"e_1_3_2_1_4_1","volume-title":"RAGAS: Automated Evaluation of Retrieval Augmented Generation. arXiv preprint arXiv:2309.15217","author":"Es Shahul","year":"2023","unstructured":"Shahul Es, Jithin James, Luis Espinosa-Anke, and Steven Schockaert. 2023. RAGAS: Automated Evaluation of Retrieval Augmented Generation. arXiv preprint arXiv:2309.15217 (2023)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Guglielmo Faggioli Laura Dietz Charles Clarke Gianluca Demartini Matthias Hagen Claudia Hauff Noriko Kando Evangelos Kanoulas Martin Potthast Benno Stein and Henning Wachsmuth. 2023. Perspectives on large language models for relevance judgment. arxiv: 2304.09161 [cs.IR]","DOI":"10.1145\/3578337.3605136"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3578337.3605136"},{"key":"e_1_3_2_1_7_1","volume-title":"Gptscore: Evaluate as you desire. arXiv preprint arXiv:2302.04166","author":"Fu Jinlan","year":"2023","unstructured":"Jinlan Fu, See-Kiong Ng, Zhengbao Jiang, and Pengfei Liu. 2023. Gptscore: Evaluate as you desire. arXiv preprint arXiv:2302.04166 (2023)."},{"key":"e_1_3_2_1_8_1","volume-title":"Latent retrieval for weakly supervised open domain question answering. arXiv preprint arXiv:1906.00300","author":"Lee Kenton","year":"2019","unstructured":"Kenton Lee, Ming-Wei Chang, and Kristina Toutanova. 2019. Latent retrieval for weakly supervised open domain question answering. arXiv preprint arXiv:1906.00300 (2019)."},{"key":"e_1_3_2_1_9_1","first-page":"9459","article-title":"2020. Retrieval-augmented generation for knowledge-intensive nlp tasks","volume":"33","author":"Lewis Patrick","year":"2020","unstructured":"Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen-tau Yih, Tim Rockt\u00e4schel, et al. 2020. Retrieval-augmented generation for knowledge-intensive nlp tasks. Advances in Neural Information Processing Systems, Vol. 33 (2020), 9459--9474.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_10_1","unstructured":"Percy Liang Rishi Bommasani Tony Lee Dimitris Tsipras Dilara Soylu Michihiro Yasunaga Yian Zhang Deepak Narayanan Yuhuai Wu Ananya Kumar et al. 2022. Holistic evaluation of language models. arXiv preprint arXiv:2211.09110 (2022)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.153"},{"key":"e_1_3_2_1_12_1","volume-title":"MS MARCO: A human generated machine reading comprehension dataset. choice","author":"Nguyen Tri","year":"2016","unstructured":"Tri Nguyen, Mir Rosenberg, Xia Song, Jianfeng Gao, Saurabh Tiwary, Rangan Majumder, and Li Deng. 2016. MS MARCO: A human generated machine reading comprehension dataset. choice, Vol. 2640 (2016), 660."},{"key":"e_1_3_2_1_13_1","unstructured":"OpenAI. 2023. GPT-4 Technical Report. arxiv: 2303.08774 [cs.CL]"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Zhen Qin Rolf Jagerman Kai Hui Honglei Zhuang Junru Wu Jiaming Shen Tianqi Liu Jialu Liu Donald Metzler Xuanhui Wang et al. 2023. Large language models are effective text rankers with pairwise ranking prompting. arXiv preprint arXiv:2306.17563 (2023).","DOI":"10.18653\/v1\/2024.findings-naacl.97"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Zhen Qin Rolf Jagerman Kai Hui Honglei Zhuang Junru Wu Jiaming Shen Tianqi Liu Jialu Liu Donald Metzler Xuanhui Wang and Michael Bendersky. 2023. Large language models are effective text rankers with pairwise ranking prompting. arxiv: 2306.17563 [cs.IR]","DOI":"10.18653\/v1\/2024.findings-naacl.97"},{"key":"e_1_3_2_1_16_1","volume-title":"ARES: An Automated Evaluation Framework for Retrieval-Augmented Generation Systems. arXiv preprint arXiv:2311.09476","author":"Saad-Falcon Jon","year":"2023","unstructured":"Jon Saad-Falcon, Omar Khattab, Christopher Potts, and Matei Zaharia. 2023. ARES: An Automated Evaluation Framework for Retrieval-Augmented Generation Systems. arXiv preprint arXiv:2311.09476 (2023)."},{"key":"e_1_3_2_1_17_1","volume-title":"Large language models can accurately predict searcher preferences. arXiv preprint arXiv:2309.10621","author":"Thomas Paul","year":"2023","unstructured":"Paul Thomas, Seth Spielman, Nick Craswell, and Bhaskar Mitra. 2023. Large language models can accurately predict searcher preferences. arXiv preprint arXiv:2309.10621 (2023)."},{"key":"e_1_3_2_1_18_1","volume-title":"Is chatgpt a good nlg evaluator? a preliminary study. arXiv preprint arXiv:2303.04048","author":"Wang Jiaan","year":"2023","unstructured":"Jiaan Wang, Yunlong Liang, Fandong Meng, Haoxiang Shi, Zhixu Li, Jinan Xu, Jianfeng Qu, and Jie Zhou. 2023. Is chatgpt a good nlg evaluator? a preliminary study. arXiv preprint arXiv:2303.04048 (2023)."},{"key":"e_1_3_2_1_19_1","volume-title":"Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:1904.09675","author":"Zhang Tianyi","year":"2019","unstructured":"Tianyi Zhang, Varsha Kishore, Felix Wu, Kilian Q Weinberger, and Yoav Artzi. 2019. Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:1904.09675 (2019)."},{"key":"e_1_3_2_1_20_1","volume-title":"MoverScore: Text generation evaluating with contextualized embeddings and earth mover distance. arXiv preprint arXiv:1909.02622","author":"Zhao Wei","year":"2019","unstructured":"Wei Zhao, Maxime Peyrard, Fei Liu, Yang Gao, Christian M Meyer, and Steffen Eger. 2019. MoverScore: Text generation evaluating with contextualized embeddings and earth mover distance. arXiv preprint arXiv:1909.02622 (2019)."}],"event":{"name":"SIGIR 2024: The 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","location":"Washington DC USA","acronym":"SIGIR 2024","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657992","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3626772.3657992","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:31:55Z","timestamp":1755840715000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657992"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,10]]},"references-count":20,"alternative-id":["10.1145\/3626772.3657992","10.1145\/3626772"],"URL":"https:\/\/doi.org\/10.1145\/3626772.3657992","relation":{},"subject":[],"published":{"date-parts":[[2024,7,10]]},"assertion":[{"value":"2024-07-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}