{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T18:38:15Z","timestamp":1775068695691,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,10]]},"DOI":"10.1145\/3746252.3761484","type":"proceedings-article","created":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T23:55:33Z","timestamp":1762559733000},"page":"6593-6597","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["RerankArena: A Unified Platform for Evaluating Retrieval, Reranking and RAG with Human and LLM Feedback"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8747-4927","authenticated-orcid":false,"given":"Abdelrahman","family":"Abdallah","sequence":"first","affiliation":[{"name":"University of Innsbruck, Innsbruck, Tyrol, Austria"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4270-2268","authenticated-orcid":false,"given":"Mahmoud","family":"Abdalla","sequence":"additional","affiliation":[{"name":"Chungbuk National University, Cheongju-si, Cheongju, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3578-2393","authenticated-orcid":false,"given":"Bhawna","family":"Piryani","sequence":"additional","affiliation":[{"name":"University of Innsbruck, Innsbruck, Tyrol, Austria"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4850-9239","authenticated-orcid":false,"given":"Jamshid","family":"Mozafari","sequence":"additional","affiliation":[{"name":"University of Innsbruck, Innsbruck, Tyrol, Austria"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3600-0785","authenticated-orcid":false,"given":"Mohammed","family":"Ali","sequence":"additional","affiliation":[{"name":"University of Innsbruck, Innsbruck, Tyrol, Austria"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7235-0665","authenticated-orcid":false,"given":"Adam","family":"Jatowt","sequence":"additional","affiliation":[{"name":"University of Innsbruck, Innsbruck, Tyrol, Austria"}]}],"member":"320","published-online":{"date-parts":[[2025,11,10]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Abdelrahman Abdallah Jamshid Mozafari Bhawna Piryani Mohammed M Abdelgwad and Adam Jatowt. 2024. Dynrank: improving passage retrieval with dynamic zero-shot prompting based on question classification. arXiv preprint arXiv:2412.00600."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Abdelrahman Abdallah Jamshid Mozafari Bhawna Piryani and Adam Jatowt. 2025. Asrank: zero-shot re-ranking with answer scent for document retrieval. arXiv preprint arXiv:2501.15245.","DOI":"10.18653\/v1\/2025.findings-naacl.161"},{"key":"e_1_3_2_1_3_1","unstructured":"Abdelrahman Abdallah Bhawna Piryani Jamshid Mozafari Mohammed Ali and Adam Jatowt. 2025. Rankify: a comprehensive python toolkit for retrieval re-ranking and retrieval-augmented generation. arXiv preprint arXiv:2502.02464."},{"key":"e_1_3_2_1_4_1","unstructured":"Abubakar Abid Ali Abdalla Ali Abid Dawood Khan Abdulrahman Alfozan and James Zou. 2019. Gradio: hassle-free sharing and testing of ml models in the wild. arXiv preprint arXiv:1906.02569."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Ge Bai et al. 2024. Mt-bench-101: a fine-grained benchmark for evaluating large language models in multi-turn dialogues. arXiv preprint arXiv:2402.14762.","DOI":"10.18653\/v1\/2024.acl-long.401"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-56027-9_29"},{"key":"e_1_3_2_1_7_1","unstructured":"Parishad BehnamGhader Vaibhav Adlakha Marius Mosbach Dzmitry Bahdanau Nicolas Chapados and Siva Reddy. 2024. Llm2vec: large language models are secretly powerful text encoders. (2024). https:\/\/arxiv.org\/abs\/2404.05961 arXiv: 2404.05961 [cs.CL]."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Jianlv Chen Shitao Xiao Peitian Zhang Kun Luo Defu Lian and Zheng Liu. 2024. Bge m3-embedding: multi-lingual multi-functionality multi-granularity text embeddings through self-knowledge distillation. arXiv preprint arXiv:2402.03216.","DOI":"10.18653\/v1\/2024.findings-acl.137"},{"key":"e_1_3_2_1_9_1","volume-title":"Bernal Jim\u00e9nez Guti\u00e9rrez, and Yu Su","author":"Chen Shijie","year":"2024","unstructured":"Shijie Chen, Bernal Jim\u00e9nez Guti\u00e9rrez, and Yu Su. 2024. Attention in large language models yields efficient zero-shot re-rankers. arXiv preprint arXiv:2410.02642."},{"key":"e_1_3_2_1_10_1","unstructured":"Zijian Chen Ronak Pradeep and Jimmy Lin. 2024. An early first reproduction and improvements to single-token decoding for fast listwise reranking. (2024). https:\/\/arxiv.org\/abs\/2411.05508 arXiv: 2411.05508 [cs.IR]."},{"key":"e_1_3_2_1_11_1","volume-title":"Forty-first International Conference on Machine Learning.","author":"Wei-Lin","unstructured":"Wei-Lin Chiang et al. 2024. Chatbot arena: an open platform for evaluating llms by human preference. In Forty-first International Conference on Machine Learning."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.10426927"},{"key":"e_1_3_2_1_13_1","unstructured":"Darren Edge et al. 2024. From local to global: a graph rag approach to queryfocused summarization. arXiv preprint arXiv:2404.16130."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531857"},{"key":"e_1_3_2_1_15_1","unstructured":"Yunfan Gao et al. 2023. Retrieval-augmented generation for large language models: a survey. arXiv preprint arXiv:2312.10997 2 1."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Raphael Gruber Abdelrahman Abdallah Michael F\u00e4rber and Adam Jatowt. 2024. Complextempqa: a large-scale dataset for complex temporal question answering. arXiv preprint arXiv:2406.04866.","DOI":"10.18653\/v1\/2025.emnlp-main.463"},{"key":"e_1_3_2_1_17_1","unstructured":"Jiawei Gu et al. 2024.Asurvey on llm-as-a-judge. arXiv preprint arXiv:2411.15594."},{"key":"e_1_3_2_1_18_1","volume-title":"Bonan Min, and Vittorio Castelli.","author":"Han Rujun","year":"2024","unstructured":"Rujun Han, Yuhao Zhang, Peng Qi, Yumo Xu, Jenyuan Wang, Lan Liu, William Yang Wang, Bonan Min, and Vittorio Castelli. 2024. Rag-qa arena: evaluating domain robustness for long-form retrieval augmented question answering. arXiv preprint arXiv:2407.13998."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","unstructured":"Gautier Izacard Mathilde Caron Lucas Hosseini Sebastian Riedel Piotr Bojanowski Armand Joulin and Edouard Grave. 2021. Unsupervised dense information retrieval with contrastive learning. (2021). doi:10.48550\/ARXIV.2112.09 118.","DOI":"10.48550\/ARXIV.2112.09"},{"key":"e_1_3_2_1_20_1","unstructured":"Dongfu Jiang Xiang Ren and Bill Yuchen Lin. 2023. Llm-blender: ensembling large language models with pairwise ranking and generative fusion. (2023). https:\/\/arxiv.org\/abs\/2306.02561 arXiv: 2306.02561 [cs.CL]."},{"key":"e_1_3_2_1_21_1","volume-title":"Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-tau Yih.","author":"Karpukhin Vladimir","year":"2020","unstructured":"Vladimir Karpukhin, Barlas Oguz, Sewon Min, Patrick SH Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-tau Yih. 2020. Dense passage retrieval for open-domain question answering. In EMNLP (1), 6769--6781."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401075"},{"key":"e_1_3_2_1_23_1","unstructured":"Thiago Laitz Konstantinos Papakostas Roberto Lotufo and Rodrigo Nogueira. 2024. Inranker: distilled rankers for zero-shot information retrieval. (2024). https:\/\/arxiv.org\/abs\/2401.06910 arXiv: 2401.06910 [cs.IR]."},{"key":"e_1_3_2_1_24_1","unstructured":"Patrick Lewis et al. 2020. Retrieval-augmented generation for knowledgeintensive nlp tasks. Advances in neural information processing systems 33 9459--9474."},{"key":"e_1_3_2_1_25_1","first-page":"111544","article-title":"Wizardarena: posttraining large language models via simulated offline chatbot arena","volume":"37","author":"Luo Haipeng","year":"2024","unstructured":"Haipeng Luo, Qingfeng Sun, Can Xu, Pu Zhao, Qingwei Lin, Jian-Guang Lou, Shifeng Chen, Yansong Tang, and Weizhu Chen. 2024. Wizardarena: posttraining large language models via simulated offline chatbot arena. Advances in Neural Information Processing Systems, 37, 111544--111570.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Jamshid Mozafari Abdelrahman Abdallah Bhawna Piryani and Adam Jatowt. 2024. Exploring hint generation approaches in open-domain question answering. arXiv preprint arXiv:2409.16096.","DOI":"10.18653\/v1\/2024.findings-emnlp.546"},{"key":"e_1_3_2_1_27_1","volume-title":"Multistage document ranking with bert. (2019). https:\/\/arxiv.org\/abs\/1910.14424 arXiv","author":"Nogueira Rodrigo","year":"1910","unstructured":"Rodrigo Nogueira, Wei Yang, Kyunghyun Cho, and Jimmy Lin. 2019. Multistage document ranking with bert. (2019). https:\/\/arxiv.org\/abs\/1910.14424 arXiv: 1910.14424 [cs.IR]."},{"key":"e_1_3_2_1_28_1","unstructured":"A Paszke. 2019. Pytorch: an imperative style high-performance deep learning library. arXiv preprint arXiv:1912.01703."},{"key":"e_1_3_2_1_29_1","unstructured":"Ronak Pradeep Sahel Sharifymoghaddam and Jimmy Lin. 2023. Rankvicuna: zero-shot listwise document reranking with open-source large language models. (2023). https:\/\/arxiv.org\/abs\/2309.15088 arXiv: 2309.15088 [cs.IR]."},{"key":"e_1_3_2_1_30_1","volume-title":"Rankzephyr: effective and robust zero-shot listwise reranking is a breeze!","author":"Pradeep Ronak","year":"2023","unstructured":"Ronak Pradeep, Sahel Sharifymoghaddam, and Jimmy Lin. 2023. Rankzephyr: effective and robust zero-shot listwise reranking is a breeze! (2023). https:\/\/arxiv.org\/abs\/2312.02724 arXiv: 2312.02724 [cs.IR]."},{"key":"e_1_3_2_1_31_1","volume-title":"2024 International Conference on Inventive Computation Technologies (ICICT). IEEE, 1128--1133","author":"Raja Mahimai","year":"2024","unstructured":"Mahimai Raja, E Yuvaraajan, et al. 2024. A rag-based medical assistant especially for infectious diseases. In 2024 International Conference on Inventive Computation Technologies (ICICT). IEEE, 1128--1133."},{"key":"e_1_3_2_1_32_1","volume-title":"Yue Dong, and Vagelis Hristidis.","author":"Rashid Muhammad Shihab","year":"2024","unstructured":"Muhammad Shihab Rashid, Jannat Ara Meem, Yue Dong, and Vagelis Hristidis. 2024. Ecorank: budget-constrained text re-ranking using large language models. (2024). https:\/\/arxiv.org\/abs\/2402.10866 arXiv: 2402.10866 [cs.CL]."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Stephen Robertson Hugo Zaragoza et al. 2009. The probabilistic relevance framework: bm25 and beyond. Foundations and Trends\u00ae in Information Retrieval 3 4 333--389.","DOI":"10.1561\/1500000019"},{"key":"e_1_3_2_1_34_1","unstructured":"Devendra Singh Sachan Mike Lewis Mandar Joshi Armen Aghajanyan Wentau Yih Joelle Pineau and Luke Zettlemoyer. 2022. Improving passage retrieval with zero-shot question generation. https:\/\/arxiv.org\/abs\/2204.07496."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Weiwei Sun Lingyong Yan Xinyu Ma ShuaiqiangWang Pengjie Ren Zhumin Chen Dawei Yin and Zhaochun Ren. 2023. Is chatgpt good at search? investigating large language models as re-ranking agents. arXiv preprint arXiv:2304.09542.","DOI":"10.18653\/v1\/2023.emnlp-main.923"},{"key":"e_1_3_2_1_36_1","unstructured":"Manveer Singh Tamber Ronak Pradeep and Jimmy Lin. 2023. Scaling down litting up: efficient zero-shot listwise reranking with seq2seq encoder-decoder models. (2023). https:\/\/arxiv.org\/abs\/2312.16098 arXiv: 2312.16098 [cs.IR]."},{"key":"e_1_3_2_1_37_1","unstructured":"Nandan Thakur Nils Reimers Andreas R\u00fcckl\u00e9 Abhishek Srivastava and Iryna Gurevych. 2021. Beir: a heterogenous benchmark for zero-shot evaluation of information retrieval models. arXiv preprint arXiv:2104.08663."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1017\/S1351324901002789"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Jonas Wallat Abdelrahman Abdallah Adam Jatowt and Avishek Anand. 2025. Astudy into investigating temporal robustness of llms. arXiv preprint arXiv:2503.17073.","DOI":"10.18653\/v1\/2025.findings-acl.810"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1237"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"Soyoung Yoon Eunbi Choi Jiyeon Kim Hyeongu Yun Yireun Kim and Seungwon Hwang. 2024. Listt5: listwise reranking with fusion-in-decoder improves zero-shot retrieval. arXiv preprint arXiv:2402.15838.","DOI":"10.18653\/v1\/2024.acl-long.125"},{"key":"e_1_3_2_1_42_1","first-page":"46595","article-title":"Judging llm-as-a-judge with mt-bench and chatbot arena","volume":"36","author":"Lianmin Zheng","year":"2023","unstructured":"Lianmin Zheng et al. 2023. Judging llm-as-a-judge with mt-bench and chatbot arena. Advances in Neural Information Processing Systems, 36, 46595--46623.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Honglei Zhuang Zhen Qin Rolf Jagerman Kai Hui Ji Ma Jing Lu Jianmo Ni Xuanhui Wang and Michael Bendersky. 2022. Rankt5: fine-tuning t5 for text ranking with ranking losses. (2022). https:\/\/arxiv.org\/abs\/2210.10634 arXiv: 2210.10634 [cs.IR].","DOI":"10.1145\/3539618.3592047"}],"event":{"name":"CIKM '25: The 34th ACM International Conference on Information and Knowledge Management","location":"Seoul Republic of Korea","acronym":"CIKM '25","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the 34th ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746252.3761484","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T00:00:58Z","timestamp":1765497658000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746252.3761484"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,10]]},"references-count":43,"alternative-id":["10.1145\/3746252.3761484","10.1145\/3746252"],"URL":"https:\/\/doi.org\/10.1145\/3746252.3761484","relation":{},"subject":[],"published":{"date-parts":[[2025,11,10]]},"assertion":[{"value":"2025-11-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}