{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T18:50:25Z","timestamp":1755802225341,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,10]],"date-time":"2025-03-10T00:00:00Z","timestamp":1741564800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Key R&D Program of China","award":["No.2022YFC3301900"],"award-info":[{"award-number":["No.2022YFC3301900"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,10]]},"DOI":"10.1145\/3701551.3703581","type":"proceedings-article","created":{"date-parts":[[2025,2,26]],"date-time":"2025-02-26T12:33:36Z","timestamp":1740573216000},"page":"934-943","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["RetriEVAL: Evaluating Text Generation with Contextualized Lexical Match"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1908-4315","authenticated-orcid":false,"given":"Zhen","family":"Li","sequence":"first","affiliation":[{"name":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2456-7928","authenticated-orcid":false,"given":"Xinchi","family":"Li","sequence":"additional","affiliation":[{"name":"China Telecom Beijing Research Institute, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4162-2119","authenticated-orcid":false,"given":"Chongyang","family":"Tao","sequence":"additional","affiliation":[{"name":"SKLSDE Lab, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5832-6199","authenticated-orcid":false,"given":"Jiazhan","family":"Feng","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3315-2468","authenticated-orcid":false,"given":"Tao","family":"Shen","sequence":"additional","affiliation":[{"name":"University of Technology Sydney, Sydney, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1949-5715","authenticated-orcid":false,"given":"Can","family":"Xu","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9866-972X","authenticated-orcid":false,"given":"Hao","family":"Wang","sequence":"additional","affiliation":[{"name":"China Telecom Beijing Research Institute, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0396-6703","authenticated-orcid":false,"given":"Dongyan","family":"Zhao","sequence":"additional","affiliation":[{"name":"WICT, Peking University, Beijing, China, SKLMCPTS, Beijing, China, &amp; KLIPMT, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4050-0443","authenticated-orcid":false,"given":"Shuai","family":"Ma","sequence":"additional","affiliation":[{"name":"SKLSDE Lab, Beihang University, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,3,10]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"SMART: Sentences as Basic Units for Text Evaluation. arXiv preprint arXiv:2208.01030","author":"Amplayo Reinald Kim","year":"2022","unstructured":"Reinald Kim Amplayo, Peter J Liu, Yao Zhao, and Shashi Narayan. 2022. SMART: Sentences as Basic Units for Text Evaluation. arXiv preprint arXiv:2208.01030 (2022)."},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization. Association for Computational Linguistics","author":"Banerjee Satanjeev","year":"2005","unstructured":"Satanjeev Banerjee and Alon Lavie. 2005. METEOR: An Automatic Metric for MT Evaluation with Improved Correlation with Human Judgments. In Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization. Association for Computational Linguistics, Ann Arbor, Michigan, 65--72. https:\/\/aclanthology.org\/W05-0909"},{"key":"e_1_3_2_1_3_1","unstructured":"Hyung Won Chung Le Hou Shayne Longpre Barret Zoph Yi Tay William Fedus Yunxuan Li Xuezhi Wang Mostafa Dehghani Siddhartha Brahma et al. 2022. Scaling instruction-finetuned language models. arXiv preprint arXiv:2210.11416 (2022)."},{"key":"e_1_3_2_1_4_1","first-page":"1","article-title":"Scaling instruction-finetuned language models","volume":"25","author":"Chung Hyung Won","year":"2024","unstructured":"Hyung Won Chung, Le Hou, Shayne Longpre, Barret Zoph, Yi Tay, William Fedus, Yunxuan Li, Xuezhi Wang, Mostafa Dehghani, Siddhartha Brahma, et al. 2024. Scaling instruction-finetuned language models. Journal of Machine Learning Research, Vol. 25, 70 (2024), 1--53.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19--1264"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.599"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19--1423"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00373"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00373"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463098"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.241"},{"key":"e_1_3_2_1_12_1","volume-title":"Human-like summarization evaluation with chatgpt. arXiv preprint arXiv:2304.02554","author":"Gao Mingqi","year":"2023","unstructured":"Mingqi Gao, Jie Ruan, Renliang Sun, Xunjian Yin, Shiping Yang, and Xiaojun Wan. 2023. Human-like summarization evaluation with chatgpt. arXiv preprint arXiv:2304.02554 (2023)."},{"key":"e_1_3_2_1_13_1","volume-title":"Trueteacher: Learning factual consistency evaluation with large language models. arXiv preprint arXiv:2305.11171","author":"Gekhman Zorik","year":"2023","unstructured":"Zorik Gekhman, Jonathan Herzig, Roee Aharoni, Chen Elkind, and Idan Szpektor. 2023. Trueteacher: Learning factual consistency evaluation with large language models. arXiv preprint arXiv:2305.11171 (2023)."},{"key":"e_1_3_2_1_14_1","volume-title":"Advances in Neural Information Processing Systems 28: Annual Conference on Neural Information Processing Systems 2015","author":"Hermann Karl Moritz","year":"2015","unstructured":"Karl Moritz Hermann, Tom\u00e1s Kocisk\u00fd, Edward Grefenstette, Lasse Espeholt, Will Kay, Mustafa Suleyman, and Phil Blunsom. 2015. Teaching Machines to Read and Comprehend. In Advances in Neural Information Processing Systems 28: Annual Conference on Neural Information Processing Systems 2015, December 7--12, 2015, Montreal, Quebec, Canada, Corinna Cortes, Neil D. Lawrence, Daniel D. Lee, Masashi Sugiyama, and Roman Garnett (Eds.). 1693--1701. https:\/\/proceedings.neurips.cc\/paper\/2015\/hash\/afdec7005cc9f14302cd0474fd0f3c96-Abstract.html"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2007.01.012"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.dialdoc-1.19"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.619"},{"key":"e_1_3_2_1_18_1","volume-title":"Unsupervised dense information retrieval with contrastive learning. arXiv preprint arXiv:2112.09118","author":"Izacard Gautier","year":"2021","unstructured":"Gautier Izacard, Mathilde Caron, Lucas Hosseini, Sebastian Riedel, Piotr Bojanowski, Armand Joulin, and Edouard Grave. 2021. Unsupervised dense information retrieval with contrastive learning. arXiv preprint arXiv:2112.09118 (2021)."},{"key":"e_1_3_2_1_19_1","volume-title":"Dense passage retrieval for open-domain question answering. arXiv preprint arXiv:2004.04906","author":"Karpukhin Vladimir","year":"2020","unstructured":"Vladimir Karpukhin, Barlas O\u011fuz, Sewon Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-tau Yih. 2020. Dense passage retrieval for open-domain question answering. arXiv preprint arXiv:2004.04906 (2020)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/30.1-2.81"},{"key":"e_1_3_2_1_21_1","volume-title":"Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. arXiv preprint arXiv:1910.13461","author":"Lewis Mike","year":"2019","unstructured":"Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov, and Luke Zettlemoyer. 2019. Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. arXiv preprint arXiv:1910.13461 (2019)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","unstructured":"Minghan Li Sheng-Chieh Lin Barlas Oguz Asish Ghoshal Jimmy Lin Yashar Mehdad Wen-tau Yih and Xilun Chen. 2023. CITADEL: Conditional Token Interaction via Dynamic Lexical Routing for Efficient and Effective Multi-Vector Retrieval. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) Anna Rogers Jordan Boyd-Graber and Naoaki Okazaki (Eds.). Association for Computational Linguistics Toronto Canada 11891--11907. https:\/\/doi.org\/10.18653\/v1\/2023.acl-long.663","DOI":"10.18653\/v1\/2023.acl-long.663"},{"key":"e_1_3_2_1_23_1","unstructured":"Zhen Li Xiaohan Xu Tao Shen Can Xu Jia-Chen Gu Yuxuan Lai Chongyang Tao and Shuai Ma. 2024. Leveraging Large Language Models for NLG Evaluation: Advances and Challenges. (2024)."},{"key":"e_1_3_2_1_24_1","volume-title":"ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out. Association for Computational Linguistics, Barcelona, Spain, 74--81. https:\/\/aclanthology.org\/W04--1013"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.153"},{"key":"e_1_3_2_1_26_1","unstructured":"Qingyu Lu Baopu Qiu Liang Ding Liping Xie and Dacheng Tao. 2023. Error analysis prompting enables human-like translation evaluation in large language models: A case study on chatgpt. (2023)."},{"key":"e_1_3_2_1_27_1","volume-title":"Chatgpt as a factual inconsistency evaluator for abstractive text summarization. arXiv preprint arXiv:2303.15621","author":"Luo Zheheng","year":"2023","unstructured":"Zheheng Luo, Qianqian Xie, and Sophia Ananiadou. 2023. Chatgpt as a factual inconsistency evaluator for abstractive text summarization. arXiv preprint arXiv:2303.15621 (2023)."},{"key":"e_1_3_2_1_28_1","volume-title":"GPTEval: A survey on assessments of ChatGPT and GPT-4. arXiv preprint arXiv:2308.12488","author":"Mao Rui","year":"2023","unstructured":"Rui Mao, Guanyi Chen, Xulang Zhang, Frank Guerin, and Erik Cambria. 2023. GPTEval: A survey on assessments of ChatGPT and GPT-4. arXiv preprint arXiv:2308.12488 (2023)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1269"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.64"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-4510"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W15-3049"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.213"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.272"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.704"},{"key":"e_1_3_2_1_37_1","volume-title":"Lexmae: Lexicon-bottlenecked pretraining for large-scale retrieval. arXiv preprint arXiv:2208.14754","author":"Shen Tao","year":"2022","unstructured":"Tao Shen, Xiubo Geng, Chongyang Tao, Can Xu, Xiaolong Huang, Binxing Jiao, Linjun Yang, and Daxin Jiang. 2022. Lexmae: Lexicon-bottlenecked pretraining for large-scale retrieval. arXiv preprint arXiv:2208.14754 (2022)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-6456"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.aacl-main.66"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11321"},{"key":"e_1_3_2_1_41_1","volume-title":"LLMs are Also Effective Embedding Models: An In-depth Overview. arXiv preprint arXiv:2412","author":"Tao Chongyang","year":"2024","unstructured":"Chongyang Tao, Tao Shen, Shen Gao, Junshuo Zhang, Zhen Li, Zhengwei Tao, and Shuai Ma. 2024. LLMs are Also Effective Embedding Models: An In-depth Overview. arXiv preprint arXiv:2412 (2024)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.8"},{"key":"e_1_3_2_1_43_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_1_44_1","volume-title":"RISE: Leveraging Retrieval Techniques for Summarization Evaluation. arXiv preprint arXiv:2212.08775","author":"Uthus David","year":"2022","unstructured":"David Uthus and Jianmo Ni. 2022. RISE: Leveraging Retrieval Techniques for Summarization Evaluation. arXiv preprint arXiv:2212.08775 (2022)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.558"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.558"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-44693-1_54"},{"key":"e_1_3_2_1_48_1","volume-title":"A survey on knowledge distillation of large language models. arXiv preprint arXiv:2402.13116","author":"Xu Xiaohan","year":"2024","unstructured":"Xiaohan Xu, Ming Li, Chongyang Tao, Tao Shen, Reynold Cheng, Jinyang Li, Can Xu, Dacheng Tao, and Tianyi Zhou. 2024. A survey on knowledge distillation of large language models. arXiv preprint arXiv:2402.13116 (2024)."},{"key":"e_1_3_2_1_49_1","volume-title":"Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021","author":"Yuan Weizhe","year":"2021","unstructured":"Weizhe Yuan, Graham Neubig, and Pengfei Liu. 2021. BARTScore: Evaluating Generated Text as Text Generation. In Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6--14, 2021, virtual, Marc'Aurelio Ranzato, Alina Beygelzimer, Yann N. Dauphin, Percy Liang, and Jennifer Wortman Vaughan (Eds.). 27263--27277. https:\/\/proceedings.neurips.cc\/paper\/2021\/hash\/e4d2b6e6fdeca3e60e0f1a62fee3d9dd-Abstract.html"},{"key":"e_1_3_2_1_50_1","volume-title":"Spearman rank correlation. Encyclopedia of biostatistics","author":"Zar Jerrold H","year":"2005","unstructured":"Jerrold H Zar. 2005. Spearman rank correlation. Encyclopedia of biostatistics, Vol. 7 (2005)."},{"key":"e_1_3_2_1_51_1","volume-title":"BERTScore: Evaluating Text Generation with BERT. In 8th International Conference on Learning Representations, ICLR 2020","author":"Zhang Tianyi","year":"2020","unstructured":"Tianyi Zhang, Varsha Kishore, Felix Wu, Kilian Q. Weinberger, and Yoav Artzi. 2020. BERTScore: Evaluating Text Generation with BERT. In 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, April 26--30, 2020. OpenReview.net. https:\/\/openreview.net\/forum?id=SkeHuCVFDr"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19--1053"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.131"},{"key":"e_1_3_2_1_54_1","volume-title":"Judgelm: Fine-tuned large language models are scalable judges. arXiv preprint arXiv:2310.17631","author":"Zhu Lianghui","year":"2023","unstructured":"Lianghui Zhu, Xinggang Wang, and Xinlong Wang. 2023. Judgelm: Fine-tuned large language models are scalable judges. arXiv preprint arXiv:2310.17631 (2023)."}],"event":{"name":"WSDM '25: The Eighteenth ACM International Conference on Web Search and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Hannover Germany","acronym":"WSDM '25"},"container-title":["Proceedings of the Eighteenth ACM International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701551.3703581","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3701551.3703581","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T09:18:00Z","timestamp":1755767880000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701551.3703581"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,10]]},"references-count":54,"alternative-id":["10.1145\/3701551.3703581","10.1145\/3701551"],"URL":"https:\/\/doi.org\/10.1145\/3701551.3703581","relation":{},"subject":[],"published":{"date-parts":[[2025,3,10]]},"assertion":[{"value":"2025-03-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}