{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T16:37:07Z","timestamp":1780418227426,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":65,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1145\/3627673.3679832","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T19:34:11Z","timestamp":1729452851000},"page":"34-44","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":20,"title":["Can LLMs Reason Like Humans? Assessing Theory of Mind Reasoning in LLMs for Open-Ended Questions"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6142-0637","authenticated-orcid":false,"given":"Maryam","family":"Amirizaniani","sequence":"first","affiliation":[{"name":"University of Washington, Seattle, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3178-1408","authenticated-orcid":false,"given":"Elias","family":"Martin","sequence":"additional","affiliation":[{"name":"University of Washington - Bothell, Bothell, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2889-4247","authenticated-orcid":false,"given":"Maryna","family":"Sivachenko","sequence":"additional","affiliation":[{"name":"University of Washington - Bothell, Bothell, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4631-4438","authenticated-orcid":false,"given":"Afra","family":"Mashhadi","sequence":"additional","affiliation":[{"name":"University of Washington - Bothell, Bothell, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3797-4293","authenticated-orcid":false,"given":"Chirag","family":"Shah","sequence":"additional","affiliation":[{"name":"University of Washington, Seattle, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3616855.3635856"},{"key":"e_1_3_2_1_2_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al. 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3551349.3559555"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1162\/coli.07-034-R2"},{"key":"e_1_3_2_1_5_1","volume-title":"Yuanzhi Li, Scott Lundberg, et al.","author":"Bubeck S\u00e9bastien","year":"2023","unstructured":"S\u00e9bastien Bubeck, Varun Chandrasekaran, Ronen Eldan, Johannes Gehrke, Eric Horvitz, Ece Kamar, Peter Lee, Yin Tat Lee, Yuanzhi Li, Scott Lundberg, et al. 2023. Sparks of artificial general intelligence: Early experiments with gpt-4. arXiv preprint arXiv:2303.12712 (2023)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1080\/17470910600989771"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.840"},{"key":"e_1_3_2_1_9_1","volume-title":"Understanding social reasoning in language models with language models. Advances in Neural Information Processing Systems 36","author":"Gandhi Kanishk","year":"2024","unstructured":"Kanishk Gandhi, Jan-Philipp Fr\u00e4nken, Tobias Gerstenberg, and Noah Goodman. 2024. Understanding social reasoning in language models with language models. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1038\/s43588-023-00527-x"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.507"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41368-023-00239-y"},{"key":"e_1_3_2_1_13_1","volume-title":"Subjective Logic: A formalism for reasoning under uncertainty","author":"Jsang Audun","year":"2018","unstructured":"Audun Jsang. 2018. Subjective Logic: A formalism for reasoning under uncertainty. Springer Publishing Company, Incorporated."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.890"},{"key":"e_1_3_2_1_15_1","volume-title":"Findings of the 2023 conference on machine translation (WMT23): LLMs are here but not quite there yet. In Proceedings of the Eighth Conference on Machine Translation. 1--42","author":"Kocmi Tom","year":"2023","unstructured":"Tom Kocmi, Eleftherios Avramidis, Rachel Bawden, Ond\u0159ej Bojar, Anton Dvorkovich, Christian Federmann, Mark Fishel, Markus Freitag, Thamme Gowda, Roman Grundkiewicz, et al. 2023. Findings of the 2023 conference on machine translation (WMT23): LLMs are here but not quite there yet. In Proceedings of the Eighth Conference on Machine Translation. 1--42."},{"key":"e_1_3_2_1_16_1","volume-title":"Machel Reid, Yutaka Matsuo, and Yusuke Iwasawa.","author":"Kojima Takeshi","year":"2022","unstructured":"Takeshi Kojima, Shixiang Shane Gu, Machel Reid, Yutaka Matsuo, and Yusuke Iwasawa. 2022. Large language models are zero-shot reasoners. Advances in neural information processing systems 35 (2022), 22199--22213."},{"key":"e_1_3_2_1_17_1","volume-title":"Workshop on Uncertainty-Aware NLP (UncertaiNLP","author":"Kolagar Zahra","year":"2024","unstructured":"Zahra Kolagar and Alessandra Zarcone. 2024. Aligning Uncertainty: Leveraging LLMs to Analyze Uncertainty Transfer in Text Summarization. In Workshop on Uncertainty-Aware NLP (UncertaiNLP 2024). 41."},{"key":"e_1_3_2_1_18_1","volume-title":"Theory of mind may have spontaneously emerged in large language models. arXiv preprint arXiv:2302.02083 4","author":"Kosinski Michal","year":"2023","unstructured":"Michal Kosinski. 2023. Theory of mind may have spontaneously emerged in large language models. arXiv preprint arXiv:2302.02083 4 (2023), 169."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_21_1","volume-title":"Core mechanisms in \"theory of mind\". TRENDS in Cognitive Sciences 8, 12","author":"Leslie Alan M","year":"2004","unstructured":"Alan M Leslie, Ori Friedman, and Tim P German. 2004. Core mechanisms in \"theory of mind\". TRENDS in Cognitive Sciences 8, 12 (2004)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.13"},{"key":"e_1_3_2_1_23_1","volume-title":"Improving multi-agent cooperation using theory of mind. arXiv preprint arXiv:2007.15703","author":"Lim Terence X","year":"2020","unstructured":"Terence X Lim, Sidney Tio, and Desmond C Ong. 2020. Improving multi-agent cooperation using theory of mind. arXiv preprint arXiv:2007.15703 (2020)."},{"key":"e_1_3_2_1_24_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81."},{"key":"e_1_3_2_1_25_1","volume-title":"How to write effective prompts for large language models. Nature Human Behaviour","author":"Lin Zhicheng","year":"2024","unstructured":"Zhicheng Lin. 2024. How to write effective prompts for large language models. Nature Human Behaviour (2024), 1--5."},{"key":"e_1_3_2_1_26_1","volume-title":"Deductive Verification of Chain-of-Thought Reasoning. In Thirty-seventh Conference on Neural Information Processing Systems. https:\/\/ openreview.net\/forum?id=I5rsM4CY2z","author":"Ling Zhan","year":"2023","unstructured":"Zhan Ling, Yunhao Fang, Xuanlin Li, Zhiao Huang, Mingu Lee, Roland Memisevic, and Hao Su. 2023. Deductive Verification of Chain-of-Thought Reasoning. In Thirty-seventh Conference on Neural Information Processing Systems. https:\/\/ openreview.net\/forum?id=I5rsM4CY2z"},{"key":"e_1_3_2_1_27_1","volume-title":"Federated Prompting and Chain-of-Thought Reasoning for Improving LLMs Answering. In International Conference on Knowledge Science, Engineering and Management. Springer, 3--11","author":"Liu Xiangyang","year":"2023","unstructured":"Xiangyang Liu, Tianqi Pang, and Chenyou Fan. 2023. Federated Prompting and Chain-of-Thought Reasoning for Improving LLMs Answering. In International Conference on Knowledge Science, Engineering and Management. Springer, 3--11."},{"key":"e_1_3_2_1_28_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_1_29_1","volume-title":"Proceedings of the 4th Workshop on Evaluation and Comparison of NLP Systems, Daniel Deutsch, Rotem Dror, Steffen Eger, Yang Gao, Christoph Leiter, Juri Opitz, and Andreas R\u00fcckl\u00e9 (Eds.)","author":"Lu Yuan","year":"1865","unstructured":"Yuan Lu and Yu-Ting Lin. 2023. Characterised LLMs Affect its Evaluation of Summary and Translation. In Proceedings of the 4th Workshop on Evaluation and Comparison of NLP Systems, Daniel Deutsch, Rotem Dror, Steffen Eger, Yang Gao, Christoph Leiter, Juri Opitz, and Andreas R\u00fcckl\u00e9 (Eds.). Association for Computational Linguistics, Bali, Indonesia, 184--192. https:\/\/doi.org\/10.18653\/ v1\/2023.eval4nlp-1.15"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_32_1","volume-title":"The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=pTHfApDakA","author":"Miao Ning","year":"2024","unstructured":"Ning Miao, Yee Whye Teh, and Tom Rainforth. 2024. SelfCheck: Using LLMs to Zero-Shot Check Their Own Step-by-Step Reasoning. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=pTHfApDakA"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1261"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCNT56998.2023.10307310"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.622"},{"key":"e_1_3_2_1_36_1","first-page":"13249","article-title":"Program synthesis with pragmatic communication","volume":"33","author":"Pu Yewen","year":"2020","unstructured":"Yewen Pu, Kevin Ellis, Marta Kryven, Josh Tenenbaum, and Armando Solar-Lezama. 2020. Program synthesis with pragmatic communication. Advances in Neural Information Processing Systems 33 (2020), 13249--13259.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_37_1","volume-title":"Proceedings of the 18th Conference of the European","author":"Ranaldi Leonardo","year":"1812","unstructured":"Leonardo Ranaldi and Andre Freitas. 2024. Aligning Large and Small Language Models via Chain-of-Thought Reasoning. In Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers), Yvette Graham and Matthew Purver (Eds.). Association for Computational Linguistics, St. Julian's, Malta, 1812--1827. https:\/\/aclanthology. org\/2024.eacl-long.109"},{"key":"e_1_3_2_1_38_1","volume-title":"One Law","author":"Rasiah Vishvaksenan","year":"2024","unstructured":"Vishvaksenan Rasiah, Ronja Stern, Veton Matoshi, Matthias St\u00fcrmer, Ilias Chalkidis, Daniel E Ho, and Joel Niklaus. 2024. One Law, Many Languages: Benchmarking Multilingual Legal Reasoning for Judicial Support. In ICLR 2024 Workshop on Data-centric Machine Learning Research (DMLR): Harnessing Momentum for Science. https:\/\/openreview.net\/forum?id=7vkz7cKd1X"},{"key":"e_1_3_2_1_39_1","volume-title":"a distilled version of BERT: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108","author":"Sanh Victor","year":"2019","unstructured":"Victor Sanh, Lysandre Debut, Julien Chaumond, and Thomas Wolf. 2019. DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108 (2019)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_42_1","volume-title":"BLEURT: Learning robust metrics for text generation. arXiv preprint arXiv:2004.04696","author":"Sellam Thibault","year":"2020","unstructured":"Thibault Sellam, Dipanjan Das, and Ankur P Parikh. 2020. BLEURT: Learning robust metrics for text generation. arXiv preprint arXiv:2004.04696 (2020)."},{"key":"e_1_3_2_1_43_1","volume-title":"Xuhui Zhou, Yejin Choi, Yoav Goldberg, Maarten Sap, and Vered Shwartz.","author":"Shapira Natalie","year":"2024","unstructured":"Natalie Shapira, Mosh Levy, Seyed Hossein Alavi, Xuhui Zhou, Yejin Choi, Yoav Goldberg, Maarten Sap, and Vered Shwartz. 2024. Clever Hans or Neural Theory of Mind? Stress Testing Social Reasoning in Large Language Models. In Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers), Yvette Graham and Matthew Purver (Eds.). Association for Computational Linguistics, St. Julian's, Malta, 2257-- 2273. https:\/\/aclanthology.org\/2024.eacl-long.138"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3584931.3606955"},{"key":"e_1_3_2_1_45_1","volume-title":"LLM Theory of Mind and Alignment: Opportunities and Risks. arXiv preprint arXiv:2405.08154","author":"Street Winnie","year":"2024","unstructured":"Winnie Street. 2024. LLM Theory of Mind and Alignment: Opportunities and Risks. arXiv preprint arXiv:2405.08154 (2024)."},{"key":"e_1_3_2_1_46_1","unstructured":"Eliza Strickland. 2023. AI Outperforms Humans in Theory of Mind Tests: Large Language Models Convincingly Mimic the Understanding of Mental States. https: \/\/spectrum.ieee.org\/theory-of-mind-ai. Accessed: 2024-05--20."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-47240-4_19"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"crossref","unstructured":"Liyan Tang Zhaoyi Sun Betina Idnay Jordan G Nestor Ali Soroush Pierre A Elias Ziyang Xu Ying Ding Greg Durrett Justin F Rousseau et al. 2023. Evaluating large language models on medical evidence summarization. npj Digital Medicine 6 1 (2023) 158.","DOI":"10.1038\/s41746-023-00896-7"},{"key":"e_1_3_2_1_49_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian CantonFerrer Moya Chen Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller Cynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev Punit Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang Ross Taylor Adina Williams Jian XiangKuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang Angela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic Sergey Edunov and Thomas Scialom. 2023. Llama 2: Open foundation and fine-tuned chat models."},{"key":"e_1_3_2_1_50_1","volume-title":"Zephyr: Direct distillation of lm alignment. arXiv preprint arXiv:2310.16944","author":"Tunstall Lewis","year":"2023","unstructured":"Lewis Tunstall, Edward Beeching, Nathan Lambert, Nazneen Rajani, Kashif Rasul, Younes Belkada, Shengyi Huang, Leandro von Werra, Cl\u00e9mentine Fourrier, Nathan Habib, et al. 2023. Zephyr: Direct distillation of lm alignment. arXiv preprint arXiv:2310.16944 (2023)."},{"key":"e_1_3_2_1_51_1","volume-title":"Large language models fail on trivial alterations to theoryof- mind tasks. arXiv preprint arXiv:2302.08399","author":"Ullman Tomer","year":"2023","unstructured":"Tomer Ullman. 2023. Large language models fail on trivial alterations to theoryof- mind tasks. arXiv preprint arXiv:2302.08399 (2023)."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.795"},{"key":"e_1_3_2_1_54_1","volume-title":"The 2023 Conference on Empirical Methods in Natural Language Processing. https:\/\/openreview.net\/forum?id=FRRlmKxuf2","author":"Rui Wang Hongru WANG","year":"2023","unstructured":"Hongru WANG, Rui Wang, Fei Mi, Yang Deng, Zezhong WANG, Bin Liang, Ruifeng Xu, and Kam-FaiWong. 2023. Cue-CoT: Chain-of-thought Prompting for Responding to In-depth Dialogue Questions with LLMs. In The 2023 Conference on Empirical Methods in Natural Language Processing. https:\/\/openreview.net\/forum?id=FRRlmKxuf2"},{"key":"e_1_3_2_1_55_1","volume-title":"Can LLMs Reason with Rules? Logic Scaffolding for Stress-Testing and Improving LLMs. arXiv preprint arXiv:2402.11442","author":"Wang Siyuan","year":"2024","unstructured":"Siyuan Wang, Zhongyu Wei, Yejin Choi, and Xiang Ren. 2024. Can LLMs Reason with Rules? Logic Scaffolding for Stress-Testing and Improving LLMs. arXiv preprint arXiv:2402.11442 (2024)."},{"key":"e_1_3_2_1_56_1","volume-title":"Denny Zhou, et al.","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le, Denny Zhou, et al. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems 35 (2022), 24824--24837."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i17.29909"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_60_1","first-page":"2776","article-title":"Leveraging Generative AI and Large Language Models","volume":"11","author":"Yu Ping","year":"2023","unstructured":"Ping Yu, Hua Xu, Xia Hu, and Chao Deng. 2023. Leveraging Generative AI and Large Language Models: A Comprehensive Roadmap for Healthcare Integration. Healthcare 11, 20 (2023), 2776.","journal-title":"A Comprehensive Roadmap for Healthcare Integration. Healthcare"},{"key":"e_1_3_2_1_61_1","volume-title":"The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=tr0KidwPLc","author":"Zeng Zhiyuan","year":"2024","unstructured":"Zhiyuan Zeng, Jiatong Yu, Tianyu Gao, Yu Meng, Tanya Goyal, and Danqi Chen. 2024. Evaluating Large Language Models at Evaluating Instruction Following. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=tr0KidwPLc"},{"key":"e_1_3_2_1_62_1","volume-title":"Bertscore: Evaluating text generation with bert.","author":"Zhang Tianyi","year":"2019","unstructured":"Tianyi Zhang, Varsha Kishore, Felix Wu, Kilian Q Weinberger, and Yoav Artzi. 2019. Bertscore: Evaluating text generation with bert."},{"key":"e_1_3_2_1_63_1","volume-title":"MoverScore: Text generation evaluating with contextualized embeddings and earth mover distance. arXiv preprint arXiv:1909.02622","author":"Zhao Wei","year":"2019","unstructured":"Wei Zhao, Maxime Peyrard, Fei Liu, Yang Gao, Christian M Meyer, and Steffen Eger. 2019. MoverScore: Text generation evaluating with contextualized embeddings and earth mover distance. arXiv preprint arXiv:1909.02622 (2019)."},{"key":"e_1_3_2_1_64_1","volume-title":"Aditya Gupta, Kevin R McKee, Ari Holtzman, Jay Pujara, Xiang Ren, Swaroop Mishra, Aida Nematzadeh, et al.","author":"Zhou Pei","year":"2023","unstructured":"Pei Zhou, Aman Madaan, Srividya Pranavi Potharaju, Aditya Gupta, Kevin R McKee, Ari Holtzman, Jay Pujara, Xiang Ren, Swaroop Mishra, Aida Nematzadeh, et al. 2023. How FaR Are Large Language Models From Agents with Theory-of-Mind? arXiv preprint arXiv:2310.03051 (2023)."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"}],"event":{"name":"CIKM '24: The 33rd ACM International Conference on Information and Knowledge Management","location":"Boise ID USA","acronym":"CIKM '24","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 33rd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679832","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627673.3679832","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:07Z","timestamp":1750294687000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679832"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":65,"alternative-id":["10.1145\/3627673.3679832","10.1145\/3627673"],"URL":"https:\/\/doi.org\/10.1145\/3627673.3679832","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]},"assertion":[{"value":"2024-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}