{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T11:48:35Z","timestamp":1774352915414,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":79,"publisher":"ACM","funder":[{"name":"German Research Foundation (DFG)","award":["AN 996\/1-1"],"award-info":[{"award-number":["AN 996\/1-1"]}]},{"name":"European Union's Horizon Europe","award":["101070212"],"award-info":[{"award-number":["101070212"]}]},{"name":"Lower Saxony Ministry of Science and Culture (MWK)","award":["HybrInt"],"award-info":[{"award-number":["HybrInt"]}]},{"name":"Dutch Research Council","award":["024.004.022,NWA.1389.20.\\-183,KICH3.LTP.20.006"],"award-info":[{"award-number":["024.004.022,NWA.1389.20.\\-183,KICH3.LTP.20.006"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,18]]},"DOI":"10.1145\/3731120.3744592","type":"proceedings-article","created":{"date-parts":[[2025,7,18]],"date-time":"2025-07-18T13:34:06Z","timestamp":1752845646000},"page":"22-32","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Correctness is not Faithfulness in Retrieval Augmented Generation Attributions"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1239-2067","authenticated-orcid":false,"given":"Jonas","family":"Wallat","sequence":"first","affiliation":[{"name":"L3S Research Center, Hannover, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5360-9627","authenticated-orcid":false,"given":"Maria","family":"Heuss","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1086-0202","authenticated-orcid":false,"given":"Maarten de","family":"Rijke","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0163-0739","authenticated-orcid":false,"given":"Avishek","family":"Anand","sequence":"additional","affiliation":[{"name":"Delft University of Technology, Delft, Netherlands"}]}],"member":"320","published-online":{"date-parts":[[2025,7,18]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00667"},{"key":"e_1_3_2_1_2_1","volume-title":"Sree Harsha Tanneru, and Himabindu Lakkaraju","author":"Agarwal Chirag","year":"2024","unstructured":"Chirag Agarwal, Sree Harsha Tanneru, and Himabindu Lakkaraju. 2024. Faithfulness vs. Plausibility: On the (Un) reliability of Explanations from Large Language Models. arXiv preprint arXiv:2402.04614, (2024)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Muhammad Aurangzeb Ahmad Ilker Yaramis and Taposh Dutta Roy. 2023. Creating Trustworthy LLMs: Dealing with Hallucinations in Healthcare AI. arXiv preprint arXiv:2311.01463 (2023).","DOI":"10.20944\/preprints202310.1662.v1"},{"key":"e_1_3_2_1_4_1","volume-title":"The Twelfth International Conference on Learning Representations, ICLR 2024","author":"Asai Akari","year":"2024","unstructured":"Akari Asai, Zeqiu Wu, Yizhong Wang, Avirup Sil, and Hannaneh Hajishirzi. 2024. Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection. In The Twelfth International Conference on Learning Representations, ICLR 2024, Vienna, Austria, May 7-11, 2024."},{"key":"e_1_3_2_1_5_1","volume-title":"Marlize Van Sittert, and Sirisha Rambhatla","author":"Barnard Francois","year":"2023","unstructured":"Francois Barnard, Marlize Van Sittert, and Sirisha Rambhatla. 2023. Self-diagnosis and Large Language Models: A New Front for Medical Misinformation. arXiv preprint arXiv:2307.04910, (2023)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.64"},{"key":"e_1_3_2_1_7_1","volume-title":"Massimiliano Ciaramita, Jacob Eisenstein, Kuzman Ganchev, Jonathan Herzig, et al.","author":"Bohnet Bernd","year":"2022","unstructured":"Bernd Bohnet, Vinh Q Tran, Pat Verga, Roee Aharoni, Daniel Andor, Livio Baldini Soares, Massimiliano Ciaramita, Jacob Eisenstein, Kuzman Ganchev, Jonathan Herzig, et al., 2022. Attributed Question Answering: Evaluation and Modeling for Attributed Large Language Models. arXiv preprint arXiv:2212.08037, (2022)."},{"key":"e_1_3_2_1_8_1","volume-title":"Discovering Latent Knowledge in Language Models Without Supervision. In The Eleventh International Conference on Learning Representations.","author":"Burns Collin","year":"2023","unstructured":"Collin Burns, Haotian Ye, Dan Klein, and Jacob Steinhardt. 2023. Discovering Latent Knowledge in Language Models Without Supervision. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_1_9_1","unstructured":"Oana-Maria Camburu Eleonora Giunchiglia Jakob Foerster Thomas Lukasiewicz and Phil Blunsom. 2019. Can I Trust the Explainer? Verifying Post-hoc Explanatory Methods. arXiv preprint arXiv:1910.02065 (2019)."},{"key":"e_1_3_2_1_10_1","volume-title":"Advances in Neural Information Processing Systems","volume":"31","author":"Camburu Oana-Maria","year":"2018","unstructured":"Oana-Maria Camburu, Tim Rockt\u00e4schel, Thomas Lukasiewicz, and Phil Blunsom. 2018. e-SNLI: Natural Language Inference with Natural Language Explanations. Advances in Neural Information Processing Systems, Vol. 31 (2018)."},{"key":"e_1_3_2_1_11_1","volume-title":"PURR: Efficiently Editing Language Model Hallucinations by Denoising Language Model Corruptions. arXiv preprint arXiv:2305.14908","author":"Chen Anthony","year":"2023","unstructured":"Anthony Chen, Panupong Pasupat, Sameer Singh, Hongrae Lee, and Kelvin Guu. 2023. PURR: Efficiently Editing Language Model Hallucinations by Denoising Language Model Corruptions. arXiv preprint arXiv:2305.14908, (2023)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657841"},{"key":"e_1_3_2_1_13_1","volume-title":"John Schulman, Arushi Somani, Peter Hase, Misha Wagner Fabien Roger Vlad Mikulik, Sam Bowman, Jan Leike Jared Kaplan, et al.","author":"Chen Yanda","year":"2025","unstructured":"Yanda Chen, Joe Benton, Ansh Radhakrishnan, Jonathan Uesato Carson Denison, John Schulman, Arushi Somani, Peter Hase, Misha Wagner Fabien Roger Vlad Mikulik, Sam Bowman, Jan Leike Jared Kaplan, et al., 2025. Reasoning Models Don't Always Say What They Think. arXiv preprint arXiv:2505.05410, (2025)."},{"key":"e_1_3_2_1_14_1","volume-title":"Counterfactual Simulatability of Natural Language Explanations. In Forty-first International Conference on Machine Learning.","author":"Chen Yanda","year":"2024","unstructured":"Yanda Chen, Ruiqi Zhong, Narutatsu Ri, Chen Zhao, He He, Jacob Steinhardt, Zhou Yu, and Kathleen McKeown. 2024b. Do Models Explain Themselves? Counterfactual Simulatability of Natural Language Explanations. In Forty-first International Conference on Machine Learning."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1001\/jama.2018.16865"},{"key":"e_1_3_2_1_16_1","volume-title":"ICLR 2025 Workshop on Foundation Models in the Wild.","author":"Chua James","year":"2025","unstructured":"James Chua and Owain Evans. 2025. Are DeepSeek R1 And Other Reasoning Models More Faithful?. In ICLR 2025 Workshop on Foundation Models in the Wild."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657834"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627673.3679172"},{"key":"e_1_3_2_1_19_1","first-page":"12","article-title":". A Mathematical Framework for Transformer Circuits","volume":"1","author":"Elhage Nelson","year":"2021","unstructured":"Nelson Elhage, Neel Nanda, Catherine Olsson, Tom Henighan, Nicholas Joseph, Ben Mann, Amanda Askell, Yuntao Bai, Anna Chen, Tom Conerly, et al., 2021. A Mathematical Framework for Transformer Circuits. Transformer Circuits Thread, Vol. 1, 1 (2021), 12.","journal-title":"Transformer Circuits Thread"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.eacl-demo.16"},{"key":"e_1_3_2_1_21_1","volume-title":"ECIR 2022, Stavanger, Norway, April 10-14, 2022, Proceedings, Part II, (Lecture Notes in Computer Science","volume":"127","author":"Formal Thibault","year":"2022","unstructured":"Thibault Formal, Benjamin Piwowarski, and St\u00e9phane Clinchant. 2022. Match Your Words! A Study of Lexical Matching in Neural Information Retrieval. In Advances in Information Retrieval - 44th European Conference on IR Research, ECIR 2022, Stavanger, Norway, April 10-14, 2022, Proceedings, Part II, (Lecture Notes in Computer Science, Vol. 13186). Springer, 120-127."},{"key":"e_1_3_2_1_22_1","unstructured":"Jianfeng Gao Baolin Peng Chunyuan Li Jinchao Li Shahin Shayandeh Lars Liden and Heung-Yeung Shum. 2020. Robust Conversational AI with Grounded Text Generation. arXiv preprint arXiv:2009.03457 (2020)."},{"key":"e_1_3_2_1_23_1","volume-title":"Using Language Models. In The 61st Annual Meeting Of The Association For Computational Linguistics.","author":"Gao Luyu","year":"2023","unstructured":"Luyu Gao, Zhuyun Dai, Panupong Pasupat, Anthony Chen, Arun Tejasvi Chaganty, Yicheng Fan, Vincent Y Zhao, Ni Lao, Hongrae Lee, Da-Cheng Juan, et al., 2023a. RARR: Researching and Revising What Language Models Say, Using Language Models. In The 61st Annual Meeting Of The Association For Computational Linguistics."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.398"},{"key":"e_1_3_2_1_25_1","unstructured":"Yunfan Gao Yun Xiong Xinyu Gao Kangxiang Jia Jinliu Pan Yuxi Bi Yi Dai Jiawei Sun and Haofen Wang. 2023b. Retrieval-augmented Generation for Large Language Models: A Survey. arXiv preprint arXiv:2312.10997 (2023)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.446"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3594067"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1001\/jama.2023.5321"},{"key":"e_1_3_2_1_29_1","unstructured":"High-Level Expert Group on AI. 2019. Ethics Guidelines for Trustworthy AI. Technical Report. EU. https:\/\/ec.europa.eu\/digital-single-market\/en\/news\/ethics-guidelines-trustworthy-ai."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.trustnlp-1.8"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.386"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445923"},{"key":"e_1_3_2_1_33_1","volume-title":"HAGRID: A Human-LLM Collaborative Dataset for Generative Information-Seeking with Attribution. arXiv preprint arXiv:2307.16883","author":"Kamalloo Ehsan","year":"2023","unstructured":"Ehsan Kamalloo, Aref Jafari, Xinyu Zhang, Nandan Thakur, and Jimmy Lin. 2023. HAGRID: A Human-LLM Collaborative Dataset for Generative Information-Seeking with Attribution. arXiv preprint arXiv:2307.16883, (2023)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Enkelejda Kasneci Kathrin Se\u00dfler Stefan K\u00fcchemann Maria Bannert Daryna Dementieva Frank Fischer Urs Gasser Georg Groh Stephan G\u00fcnnemann Eyke H\u00fcllermeier et al. 2023. ChatGPT for Good? On Opportunities and Challenges of Large Language Models for Education. Learning and individual differences Vol. 103 (2023) 102274.","DOI":"10.1016\/j.lindif.2023.102274"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00276"},{"key":"e_1_3_2_1_36_1","unstructured":"Tamera Lanham Anna Chen Ansh Radhakrishnan Benoit Steiner Carson Denison Danny Hernandez Dustin Li Esin Durmus Evan Hubinger Jackson Kernion et al. 2023. Measuring Faithfulness in Chain-of-Thought Reasoning. arXiv preprint arXiv:2307.13702 (2023)."},{"key":"e_1_3_2_1_37_1","volume-title":"Carl Sable, Vijay Shanker, John Ely, and Hong Yu.","author":"Lee Minsuk","year":"2006","unstructured":"Minsuk Lee, James Cimino, Hai Ran Zhu, Carl Sable, Vijay Shanker, John Ely, and Hong Yu. 2006. Beyond Information Retrieval-Medical Question Answering. In AMIA annual symposium proceedings, Vol. 2006. American Medical Informatics Association, 469."},{"key":"e_1_3_2_1_38_1","first-page":"9459","article-title":"Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks","volume":"33","author":"Lewis Patrick","year":"2020","unstructured":"Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen-tau Yih, Tim Rockt\u00e4schel, Sebastian Riedel, and Douwe Kiela. 2020. Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. In Advances in Neural Information Processing Systems, Vol. 33. 9459-9474.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_39_1","unstructured":"Dongfang Li Zetian Sun Xinshuo Hu Zhenyu Liu Ziyang Chen Baotian Hu Aiguo Wu and Min Zhang. 2023. A Survey of Large Language Models Attribution. arXiv preprint arXiv:2311.03731 (2023)."},{"key":"e_1_3_2_1_40_1","volume-title":"Jian-Yun Nie, and Ji-Rong Wen.","author":"Li Junyi","year":"2024","unstructured":"Junyi Li, Jie Chen, Ruiyang Ren, Xiaoxue Cheng, Wayne Xin Zhao, Jian-Yun Nie, and Ji-Rong Wen. 2024. The Dawn after the Dark: An Empirical Study on Factuality Hallucination in Large Language Models. arXiv preprint arXiv:2401.03205, (2024)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-1082"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.467"},{"key":"e_1_3_2_1_43_1","volume-title":"Yegor Klochkov, Muhammad Faaiz Taufiq, and Hang Li.","author":"Liu Yang","year":"2023","unstructured":"Yang Liu, Yuanshun Yao, Jean-Francois Ton, Xiaoying Zhang, Ruocheng Guo Hao Cheng, Yegor Klochkov, Muhammad Faaiz Taufiq, and Hang Li. 2023a. Trustworthy LLMs: A Survey and Guideline for Evaluating Large Language Models' Alignment. arXiv preprint arXiv:2308.05374, (2023)."},{"key":"e_1_3_2_1_44_1","first-page":"1","volume-title":"Computational Linguistics","author":"Lyu Qing","year":"2024","unstructured":"Qing Lyu, Marianna Apidianaki, and Chris Callison-Burch. 2024. Towards Faithful Model Explanation in NLP: A Survey. Computational Linguistics, (2024), 1-67."},{"key":"e_1_3_2_1_45_1","volume-title":"Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific","author":"Lyu Qing","unstructured":"Qing Lyu, Shreya Havaldar, Adam Stein, Li Zhang, Delip Rao, Eric Wong, Marianna Apidianaki, and Chris Callison-Burch. 2023. Faithful Chain-of-Thought Reasoning. In Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics (Volume 1: Long Papers). 305-329."},{"key":"e_1_3_2_1_46_1","volume-title":"Legal Knowledge and Information Systems","author":"Tamsin Maxwell K","unstructured":"K Tamsin Maxwell and Burkhard Schafer. 2008. Concept and Context in Legal Information Retrieval. In Legal Knowledge and Information Systems. IOS Press, 63-72."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657846"},{"key":"e_1_3_2_1_48_1","unstructured":"Jacob Menick Maja Trebacz Vladimir Mikulik John Aslanides Francis Song Martin Chadwick Mia Glaese Susannah Young Lucy Campbell-Gillingham Geoffrey Irving et al. 2022. Teaching Language Models to Support Answers with Verified Quotes. arXiv preprint arXiv:2203.11147 (2022)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.socscimed.2022.115398"},{"key":"e_1_3_2_1_50_1","first-page":"1","article-title":"From Anecdotal Evidence to Quantitative Evaluation Methods","volume":"55","author":"Nauta Meike","year":"2023","unstructured":"Meike Nauta, Jan Trienes, Shreyasi Pathak, Elisa Nguyen, Michelle Peters, Yasmin Schmitt, J\u00f6rg Schl\u00f6tterer, Maurice van Keulen, and Christin Seifert. 2023. From Anecdotal Evidence to Quantitative Evaluation Methods: A Systematic Review on Evaluating Explainable AI. Comput. Surveys, Vol. 55, 13s (2023), 1-42.","journal-title":"A Systematic Review on Evaluating Explainable AI. Comput. Surveys"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.559"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.200"},{"key":"e_1_3_2_1_53_1","unstructured":"Jirui Qi Gabriele Sarti Raquel Fern\u00e1ndez and Arianna Bisazza. 2024. Model Internals-based Answer Attribution for Trustworthy Retrieval-Augmented Generation. arXiv preprint arXiv:2406.13663 (2024)."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1162\/coli_a_00486"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1021\/acs.est.3c01106"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1177\/0165551513477820"},{"key":"e_1_3_2_1_57_1","volume-title":"Evaluation of RAG Metrics for Question Answering in the Telecom Domain. In ICML 2024 Workshop on Foundation Models in the Wild.","author":"Roychowdhury Sujoy","year":"2024","unstructured":"Sujoy Roychowdhury, Sumit Soman, HG Ranjani, Neeraj Gunda, Vansh Chhabra, and Sai Krishna Bala. 2024. Evaluation of RAG Metrics for Question Answering in the Telecom Domain. In ICML 2024 Workshop on Foundation Models in the Wild."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.20"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627043.3659573"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.272"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3686038.3686043"},{"key":"e_1_3_2_1_62_1","first-page":"2069","volume-title":"Proceedings of the 43rd International ACM SIGIR conference on research and development in Information Retrieval, SIGIR 2020","author":"Sen Procheta","year":"2020","unstructured":"Procheta Sen, Debasis Ganguly, Manisha Verma, and Gareth J. F. Jones. 2020. The Curious Case of IR Explainability: Explaining Document Scores within and across Ranking Models. In Proceedings of the 43rd International ACM SIGIR conference on research and development in Information Retrieval, SIGIR 2020, Virtual Event, China, July 25-30, 2020. ACM, 2069-2072."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3351095.3375234"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"crossref","unstructured":"Aviv Slobodkin Eran Hirsch Arie Cattan Tal Schuster and Ido Dagan. 2024. Attribute First then Generate: Locally-attributable Grounded Text Generation. arXiv preprint arXiv:2403.17104 (2024).","DOI":"10.18653\/v1\/2024.acl-long.182"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.amepre.2019.03.009"},{"key":"e_1_3_2_1_66_1","volume-title":"Jamie Hall, Noam Shazeer, Apoorv Kulshreshtha, Heng-Tze Cheng, Alicia Jin, Taylor Bos, Leslie Baker, Yu Du, et al.","author":"Thoppilan Romal","year":"2022","unstructured":"Romal Thoppilan, Daniel De Freitas, Jamie Hall, Noam Shazeer, Apoorv Kulshreshtha, Heng-Tze Cheng, Alicia Jin, Taylor Bos, Leslie Baker, Yu Du, et al., 2022. LaMDA: Language Models for Dialog Applications. arXiv preprint arXiv:2201.08239, (2022)."},{"key":"e_1_3_2_1_67_1","unstructured":"S.M. Towhidul Islam Tonmoy S.M. Mehedi Zaman Vinija Jain Anku Rani Vipula Rawte Aman Chadha and Amitava Das. 2024. A Comprehensive Survey of Hallucination Mitigation Techniques in Large Language Models. arXiv preprint arXiv:2401.01313 (2024)."},{"key":"e_1_3_2_1_68_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Turpin Miles","year":"2024","unstructured":"Miles Turpin, Julian Michael, Ethan Perez, and Samuel Bowman. 2024. Language Models Don't Always Say What They Think: Unfaithful Explanations in Chain-of-Thought Prompting. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_69_1","volume-title":"Probing BERT for Ranking Abilities. In European Conference on Information Retrieval. Springer Nature Switzerland Cham, 255-273","author":"Wallat Jonas","year":"2023","unstructured":"Jonas Wallat, Fabian Beringer, Abhijit Anand, and Avishek Anand. 2023. Probing BERT for Ranking Abilities. In European Conference on Information Retrieval. Springer Nature Switzerland Cham, 255-273."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627673.3679556"},{"key":"e_1_3_2_1_71_1","unstructured":"Yike Wang Shangbin Feng Heng Wang Weijia Shi Vidhisha Balachandran Tianxing He and Yulia Tsvetkov. 2023. Resolving Knowledge Conflicts in Large Language Models. arXiv preprint arXiv:2310.00935 (2023)."},{"key":"e_1_3_2_1_72_1","volume-title":"Denny Zhou, et al.","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le, Denny Zhou, et al., 2022. Chain-of-Thought Prompting Elicits Reasoning in Large Language Models. In Advances in Neural Information Processing systems, Vol. 35. 24824-24837."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.eacl-long.140"},{"key":"e_1_3_2_1_74_1","unstructured":"Fangzhou Wu Ning Zhang Somesh Jha Patrick McDaniel and Chaowei Xiao. 2024. A New Era in LLM Security: Exploring Security Concerns in Real-world LLM-based Systems. arXiv preprint arXiv:2402.18649 (2024)."},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.383"},{"key":"e_1_3_2_1_76_1","unstructured":"Rongwu Xu Zehan Qi Zhijiang Guo Cunxiang Wang Hongru Wang Yue Zhang and Wei Xu. 2024. Knowledge Conflicts for LLMs: A Survey. arXiv preprint arXiv:2403.08319 (2024)."},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1002\/hcs2.61"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.346"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.inlg-main.35"}],"event":{"name":"ICTIR '25: International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval","location":"Padua Italy","acronym":"ICTIR '25","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR)"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731120.3744592","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T13:17:39Z","timestamp":1755868659000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731120.3744592"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,18]]},"references-count":79,"alternative-id":["10.1145\/3731120.3744592","10.1145\/3731120"],"URL":"https:\/\/doi.org\/10.1145\/3731120.3744592","relation":{},"subject":[],"published":{"date-parts":[[2025,7,18]]},"assertion":[{"value":"2025-07-18","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}