{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T16:45:36Z","timestamp":1780418736806,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,10]]},"DOI":"10.1145\/3746252.3761506","type":"proceedings-article","created":{"date-parts":[[2025,11,8]],"date-time":"2025-11-08T01:03:42Z","timestamp":1762563822000},"page":"5642-5649","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["RottenReviews: Benchmarking Review Quality with Human and LLM-Based Judgments"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-1630-3938","authenticated-orcid":false,"given":"Sajad","family":"Ebrahimi","sequence":"first","affiliation":[{"name":"Reviewerly, Toronto, ON, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-2172-7617","authenticated-orcid":false,"given":"Soroush","family":"Sadeghian","sequence":"additional","affiliation":[{"name":"Reviewerly, Toronto, ON, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7383-8105","authenticated-orcid":false,"given":"Ali","family":"Ghorbanpour","sequence":"additional","affiliation":[{"name":"Reviewerly, Toronto, ON, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4411-7089","authenticated-orcid":false,"given":"Negar","family":"Arabzadeh","sequence":"additional","affiliation":[{"name":"Reviewerly, Toronto, ON, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-3676-6023","authenticated-orcid":false,"given":"Sara","family":"Salamat","sequence":"additional","affiliation":[{"name":"Reviewerly, Toronto, ON, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-5190-1808","authenticated-orcid":false,"given":"Muhan","family":"Li","sequence":"additional","affiliation":[{"name":"Reviewerly, Toronto, ON, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2240-0451","authenticated-orcid":false,"given":"Hai Son","family":"Le","sequence":"additional","affiliation":[{"name":"Reviewerly, Toronto, ON, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9211-5475","authenticated-orcid":false,"given":"Mahdi","family":"Bashari","sequence":"additional","affiliation":[{"name":"Reviewerly, Toronto, ON, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5148-6237","authenticated-orcid":false,"given":"Ebrahim","family":"Bagheri","sequence":"additional","affiliation":[{"name":"Reviewerly, Toronto, ON, Canada and University of Toronto, Toronto, ON, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,11,10]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Farkhund Iqbal, Zunera Jalil, Thippa Reddy Gadekallu, and Natalia Kryvinska.","author":"Abbasi Ahmed","year":"2022","unstructured":"Ahmed Abbasi, Abdul Rehman Javed, Farkhund Iqbal, Zunera Jalil, Thippa Reddy Gadekallu, and Natalia Kryvinska. 2022. Authorship identification using ensemble learning. Scientific reports, Vol. 12, 1 (2022), 9537."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2401232121"},{"key":"e_1_3_2_1_3_1","volume-title":"Charles LA Clarke, and Mark Sanderson","author":"Alaofi Marwah","year":"2024","unstructured":"Marwah Alaofi, Negar Arabzadeh, Charles LA Clarke, and Mark Sanderson. 2024. Generative information retrieval evaluation. In Information access in the era of generative ai. Springer, 135-159."},{"key":"e_1_3_2_1_4_1","volume-title":"A comparison of methods for evaluating generative ir. arXiv preprint arXiv:2404.04044","author":"Arabzadeh Negar","year":"2024","unstructured":"Negar Arabzadeh and Charles LA Clarke. 2024. A comparison of methods for evaluating generative ir. arXiv preprint arXiv:2404.04044 (2024)."},{"key":"e_1_3_2_1_5_1","volume-title":"Benchmarking LLM-based Relevance Judgment Methods. arXiv preprint arXiv:2504.12558","author":"Arabzadeh Negar","year":"2025","unstructured":"Negar Arabzadeh and Charles LA Clarke. 2025a. Benchmarking LLM-based Relevance Judgment Methods. arXiv preprint arXiv:2504.12558 (2025)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3726302.3730159"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627673.3679081"},{"key":"e_1_3_2_1_8_1","volume-title":"An examination of sources of peer-review bias. Psychological science","author":"Blackburn Jessica L","year":"2006","unstructured":"Jessica L Blackburn and Milton D Hakel. 2006. An examination of sources of peer-review bias. Psychological science, Vol. 17, 5 (2006), 378-382."},{"key":"e_1_3_2_1_9_1","volume-title":"Specter: Document-level representation learning using citation-informed transformers. arXiv preprint arXiv:2004.07180","author":"Cohan Arman","year":"2020","unstructured":"Arman Cohan, Sergey Feldman, Iz Beltagy, Doug Downey, and Daniel S Weld. 2020. Specter: Document-level representation learning using citation-informed transformers. arXiv preprint arXiv:2004.07180 (2020)."},{"key":"e_1_3_2_1_10_1","volume-title":"Annual Meeting of the Association for Computational Linguistics. https:\/\/api.semanticscholar.org\/CorpusID:12383721","author":"Danescu-Niculescu-Mizil Cristian","year":"2013","unstructured":"Cristian Danescu-Niculescu-Mizil, Moritz Sudhof, Daniel Jurafsky, Jure Leskovec, and Christopher Potts. 2013. A computational approach to politeness with application to social factors. In Annual Meeting of the Association for Computational Linguistics. https:\/\/api.semanticscholar.org\/CorpusID:12383721"},{"key":"e_1_3_2_1_11_1","volume-title":"Pranav Narayanan Venkit, Nan Zhang, Mukund Srinath, et al.","author":"Du Jiangshu","year":"2024","unstructured":"Jiangshu Du, Yibo Wang, Wenting Zhao, Zhongfen Deng, Shuaiqi Liu, Renze Lou, Henry Peng Zou, Pranav Narayanan Venkit, Nan Zhang, Mukund Srinath, et al., 2024. Llms assist nlp researchers: Critique paper (meta-) reviewing. arXiv preprint arXiv:2406.16253 (2024)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-88714-7_1"},{"key":"e_1_3_2_1_13_1","unstructured":"Aaron Grattafiori et al. 2024a. The Llama 3 Herd of Models. arXiv:2407.21783 [cs.AI] https:\/\/arxiv.org\/abs\/2407.21783"},{"key":"e_1_3_2_1_14_1","unstructured":"An Yang et al. 2025. Qwen3 Technical Report. arXiv preprint arXiv:2505.09388 (2025)."},{"key":"e_1_3_2_1_15_1","unstructured":"Marah Abdin et al. 2024b. Phi-4 Technical Report. arXiv:2412.08905 [cs.CL] https:\/\/arxiv.org\/abs\/2412.08905"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1037\/h0057532"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1093\/embo-reports\/kve188"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1129"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0320444"},{"key":"e_1_3_2_1_20_1","volume-title":"H. M. Mutasim Billah, Arifa Akter, Md Al Emran Hossain, Sumayra Islam, Salekul Islam, and Swakkhar Shatabda.","author":"Tarek Hasan Md.","year":"2024","unstructured":"Md. Tarek Hasan, Mohammad Nazmush Shamael, H. M. Mutasim Billah, Arifa Akter, Md Al Emran Hossain, Sumayra Islam, Salekul Islam, and Swakkhar Shatabda. 2024. Deep Transfer Learning Based Peer Review Aggregation and Meta-review Generation for Scientific Articles. arXiv:2410.04202 [cs.LG] https:\/\/arxiv.org\/abs\/2410.04202"},{"key":"e_1_3_2_1_21_1","volume-title":"Gender bias in scholarly peer review. elife","author":"Helmer Markus","year":"2017","unstructured":"Markus Helmer, Manuel Schottdorf, Andreas Neef, and Demian Battaglia. 2017. Gender bias in scholarly peer review. elife, Vol. 6 (2017), e21718."},{"key":"e_1_3_2_1_22_1","volume-title":"A Dataset of Peer Reviews (PeerRead): Collection, Insights and NLP Applications. CoRR","author":"Kang Dongyeop","year":"2018","unstructured":"Dongyeop Kang, Waleed Ammar, Bhavana Dalvi, Madeleine van Zuylen, Sebastian Kohlmeier, Eduard H. Hovy, and Roy Schwartz. 2018. A Dataset of Peer Reviews (PeerRead): Collection, Insights and NLP Applications. CoRR, Vol. abs\/1804.09635 (2018). arXiv:1804.09635 http:\/\/arxiv.org\/abs\/1804.09635"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1057\/9780230511804"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3736402"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1108\/EL-06-2022-0139"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1108\/EL-06-2022-0139"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/1281192.1281247"},{"key":"e_1_3_2_1_28_1","volume-title":"European Conference on Information Retrieval. Springer, 132-148","author":"Pradeep Ronak","year":"2025","unstructured":"Ronak Pradeep, Nandan Thakur, Sahel Sharifymoghaddam, Eric Zhang, Ryan Nguyen, Daniel Campos, Nick Craswell, and Jimmy Lin. 2025. Ragnar\u00f6k: A reusable RAG framework and baselines for TREC 2024 retrieval-augmented generation track. In European Conference on Information Retrieval. Springer, 132-148."},{"key":"e_1_3_2_1_29_1","volume-title":"LazyReview A Dataset for Uncovering Lazy Thinking in NLP Peer Reviews. arXiv preprint arXiv:2504.11042","author":"Purkayastha Sukannya","year":"2025","unstructured":"Sukannya Purkayastha, Zhuang Li, Anne Lauscher, Lizhen Qu, and Iryna Gurevych. 2025. LazyReview A Dataset for Uncovering Lazy Thinking in NLP Peer Reviews. arXiv preprint arXiv:2504.11042 (2025)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-88714-7_29"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2021.101241"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1186\/s13104-022-06080-6"},{"key":"e_1_3_2_1_33_1","volume-title":"Defining quality in peer review reports: a scoping review. Knowledge and Information Systems","author":"Sizo Amanda","year":"2025","unstructured":"Amanda Sizo, Adriano Lino, \u00c1lvaro Rocha, and Lu\u00eds Paulo Reis. 2025. Defining quality in peer review reports: a scoping review. Knowledge and Information Systems (2025), 1-48."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-emnlp.420"},{"key":"e_1_3_2_1_35_1","volume-title":"The limitations to our understanding of peer review. Research integrity and peer review","author":"Tennant Jonathan P","year":"2020","unstructured":"Jonathan P Tennant and Tony Ross-Hellauer. 2020. The limitations to our understanding of peer review. Research integrity and peer review, Vol. 5, 1 (2020), 6."},{"key":"e_1_3_2_1_36_1","volume-title":"Support Evaluation for the TREC 2024 RAG Track: Comparing Human versus LLM Judges. arXiv preprint arXiv:2504","author":"Thakur Nandan","year":"2025","unstructured":"Nandan Thakur, Ronak Pradeep, Shivani Upadhyay, Daniel Campos, Nick Craswell, and Jimmy Lin. 2025. Support Evaluation for the TREC 2024 RAG Track: Comparing Human versus LLM Judges. arXiv preprint arXiv:2504.15205 (2025)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1707323114"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1097\/AOG.0000000000003260"},{"key":"e_1_3_2_1_39_1","volume-title":"Ian Charest, Christopher Summerfield, and Ila R Fiete.","author":"Zahorodnii Andrii","year":"2025","unstructured":"Andrii Zahorodnii, Jasper JF van den Bosch, Ian Charest, Christopher Summerfield, and Ila R Fiete. 2025. Paper Quality Assessment based on Individual Wisdom Metrics from Open Peer Review. arXiv preprint arXiv:2501.13014 (2025)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i1.32106"},{"key":"e_1_3_2_1_41_1","first-page":"9340","volume-title":"Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING","author":"Zhou Ruiyang","year":"2024","unstructured":"Ruiyang Zhou, Lu Chen, and Kai Yu. 2024. Is LLM a reliable reviewer? A comprehensive evaluation of LLM on automatic paper reviewing tasks. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024). 9340-9351."}],"event":{"name":"CIKM '25: The 34th ACM International Conference on Information and Knowledge Management","location":"Seoul Republic of Korea","acronym":"CIKM '25","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the 34th ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746252.3761506","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T01:14:45Z","timestamp":1765502085000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746252.3761506"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,10]]},"references-count":41,"alternative-id":["10.1145\/3746252.3761506","10.1145\/3746252"],"URL":"https:\/\/doi.org\/10.1145\/3746252.3761506","relation":{},"subject":[],"published":{"date-parts":[[2025,11,10]]},"assertion":[{"value":"2025-11-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}