{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T02:32:13Z","timestamp":1765506733507,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,10]]},"DOI":"10.1145\/3746252.3761604","type":"proceedings-article","created":{"date-parts":[[2025,11,8]],"date-time":"2025-11-08T00:52:37Z","timestamp":1762563157000},"page":"6575-6579","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["C-FAITH: A Chinese Fine-Grained Benchmark for Automated Hallucination Evaluation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-4899-9060","authenticated-orcid":false,"given":"Xu","family":"Zhang","sequence":"first","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2255-9681","authenticated-orcid":false,"given":"Zhifei","family":"Liu","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6657-6403","authenticated-orcid":false,"given":"Jiahao","family":"Wang","sequence":"additional","affiliation":[{"name":"Trustworthy Technology and Engineering Laboratory, Huawei, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2354-723X","authenticated-orcid":false,"given":"Huixuan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8632-6288","authenticated-orcid":false,"given":"Fan","family":"Xu","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-8902-0055","authenticated-orcid":false,"given":"Junzhe","family":"Zhang","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6887-1994","authenticated-orcid":false,"given":"Xiaojun","family":"Wan","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,11,10]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Yi: Open Foundation Models by 01.AI. arXiv:2403.04652 [cs.CL] https:\/\/arxiv.org\/abs\/2403.04652","author":"Young Alex","year":"2024","unstructured":"01. AI:, Alex Young, Bei Chen, Chao Li, Chengen Huang, Ge Zhang, Guanwei Zhang, Heng Li, Jiangcheng Zhu, Jianqun Chen, et al., 2024. Yi: Open Foundation Models by 01.AI. arXiv:2403.04652 [cs.CL] https:\/\/arxiv.org\/abs\/2403.04652"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.ijcnlp-main.45"},{"key":"e_1_3_2_1_3_1","unstructured":"Tom B. Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared Kaplan Prafulla Dhariwal Arvind Neelakantan et al. 2020. Language Models are Few-Shot Learners. arXiv:2005.14165 [cs.CL] https:\/\/arxiv.org\/abs\/2005.14165"},{"key":"e_1_3_2_1_4_1","unstructured":"Zouying Cao Yifei Yang and Hai Zhao. 2024. AutoHall: Automated Hallucination Dataset Generation for Large Language Models. arXiv:2310.00259 [cs.CL] https:\/\/arxiv.org\/abs\/2310.00259"},{"key":"e_1_3_2_1_5_1","volume-title":"DiaHalu: A Dialogue-level Hallucination Evaluation Benchmark for Large Language Models. arXiv preprint arXiv:2403.00896","author":"Chen Kedi","year":"2024","unstructured":"Kedi Chen, Qin Chen, Jie Zhou, Yishen He, and Liang He. 2024a. DiaHalu: A Dialogue-level Hallucination Evaluation Benchmark for Large Language Models. arXiv preprint arXiv:2403.00896 (2024)."},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the 33rd International Joint Conference on Artificial Intelligence.","author":"Chen Xiang","year":"2024","unstructured":"Xiang Chen, Duanzheng Song, Honghao Gui, Chengxi Wang, Ningyu Zhang, Jiang Yong, Fei Huang, Chengfei Lv, Dan Zhang, and Huajun Chen. 2024b. Factchd: Benchmarking fact-conflicting hallucination detection. In Proceedings of the 33rd International Joint Conference on Artificial Intelligence."},{"key":"e_1_3_2_1_7_1","unstructured":"Qinyuan Cheng Tianxiang Sun Wenwei Zhang Siyin Wang Xiangyang Liu Mozhi Zhang Junliang He Mianqiu Huang Zhangyue Yin Kai Chen et al. 2023. Evaluating hallucinations in chinese large language models. arXiv preprint arXiv:2310.03368 (2023)."},{"key":"e_1_3_2_1_8_1","volume-title":"DeepSeek LLM: Scaling Open-Source Language Models with Longtermism. arXiv preprint arXiv:2401.02954","author":"AI.","year":"2024","unstructured":"DeepSeek-AI. 2024. DeepSeek LLM: Scaling Open-Source Language Models with Longtermism. arXiv preprint arXiv:2401.02954 (2024). https:\/\/github.com\/deepseek-ai\/DeepSeek-LLM"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.26"},{"key":"e_1_3_2_1_10_1","volume-title":"Halo: Estimation and reduction of hallucinations in open-source weak large language models. arXiv preprint arXiv:2308.11764","author":"Elaraby Mohamed","year":"2023","unstructured":"Mohamed Elaraby, Mengyin Lu, Jacob Dunn, Xueying Zhang, Yu Wang, Shizhu Liu, Pingchuan Tian, Yuping Wang, and Yuxuan Wang. 2023. Halo: Estimation and reduction of hallucinations in open-source weak large language models. arXiv preprint arXiv:2308.11764 (2023)."},{"key":"e_1_3_2_1_11_1","unstructured":"Team GLM: Aohan Zeng Bin Xu Bowen Wang Chenhui Zhang Da Yin Dan Zhang Diego Rojas Guanyu Feng et al. 2024. ChatGLM: A Family of Large Language Models from GLM-130B to GLM-4 All Tools. arXiv:2406.12793 [cs.CL] https:\/\/arxiv.org\/abs\/2406.12793"},{"key":"e_1_3_2_1_12_1","unstructured":"Aaron Grattafiori Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman et al. 2024. The Llama 3 Herd of Models. arXiv:2407.21783 [cs.AI] https:\/\/arxiv.org\/abs\/2407.21783"},{"key":"e_1_3_2_1_13_1","unstructured":"Yuzhe Gu Ziwei Ji Wenwei Zhang Chengqi Lyu Dahua Lin and Kai Chen. 2024. ANAH-v2: Scaling Analytical Hallucination Annotation of Large Language Models. arXiv:2407.04693 [cs.CL] https:\/\/arxiv.org\/abs\/2407.04693"},{"key":"e_1_3_2_1_14_1","unstructured":"Yancheng He Shilong Li Jiaheng Liu Yingshui Tan Weixun Wang Hui Huang Xingyuan Bu Hangyu Guo Chengwei Hu Boren Zheng Zhuoran Lin Xuepeng Liu Dekai Sun Shirong Lin Zhicheng Zheng Xiaoyong Zhu Wenbo Su and Bo Zheng. 2024. Chinese SimpleQA: A Chinese Factuality Evaluation for Large Language Models. arXiv:2411.07140 [cs.CL] https:\/\/arxiv.org\/abs\/2411.07140"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3703155"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.442"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3571730"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1468-2958.2004.tb00738.x"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.397"},{"key":"e_1_3_2_1_20_1","volume-title":"Exploring and evaluating hallucinations in llm-powered code generation. arXiv preprint arXiv:2404.00971","author":"Liu Fang","year":"2024","unstructured":"Fang Liu, Yang Liu, Lin Shi, Houkun Huang, Ruifeng Wang, Zhen Yang, Li Zhang, Zhongqi Li, and Yuchi Ma. 2024b. Exploring and evaluating hallucinations in llm-powered code generation. arXiv preprint arXiv:2404.00971 (2024)."},{"key":"e_1_3_2_1_21_1","unstructured":"Jiazhen Liu Yuhan Fu Ruobing Xie Runquan Xie Xingwu Sun Fengzong Lian Zhanhui Kang and Xirong Li. 2024a. PhD: A ChatGPT-Prompted Visual hallucination Evaluation Dataset. arXiv:2403.11116 [cs.CV] https:\/\/arxiv.org\/abs\/2403.11116"},{"key":"e_1_3_2_1_22_1","volume-title":"HalluDial: A Large-Scale Benchmark for Automatic Dialogue-Level Hallucination Evaluation. arXiv preprint arXiv:2406.07070","author":"Luo Wen","year":"2024","unstructured":"Wen Luo, Tianshu Shen, Wei Li, Guangyue Peng, Richeng Xuan, Houfeng Wang, and Xi Yang. 2024. HalluDial: A Large-Scale Benchmark for Automatic Dialogue-Level Hallucination Evaluation. arXiv preprint arXiv:2406.07070 (2024)."},{"key":"e_1_3_2_1_23_1","unstructured":"Abhika Mishra Akari Asai Vidhisha Balachandran Yizhong Wang Graham Neubig Yulia Tsvetkov and Hannaneh Hajishirzi. 2024a. Fine-grained Hallucination Detection and Editing for Language Models. arXiv:2401.06855 [cs.CL] https:\/\/arxiv.org\/abs\/2401.06855"},{"key":"e_1_3_2_1_24_1","volume-title":"Fine-grained hallucination detection and editing for language models. arXiv preprint arXiv:2401.06855","author":"Mishra Abhika","year":"2024","unstructured":"Abhika Mishra, Akari Asai, Vidhisha Balachandran, Yizhong Wang, Graham Neubig, Yulia Tsvetkov, and Hannaneh Hajishirzi. 2024b. Fine-grained hallucination detection and editing for language models. arXiv preprint arXiv:2401.06855 (2024)."},{"key":"e_1_3_2_1_25_1","unstructured":"OpenAI Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia Leoni Aleman Diogo Almeida Janko Altenschmidt Sam Altman Shyamal Anadkat et al. 2024. GPT-4 Technical Report. arXiv:2303.08774 [cs.CL] https:\/\/arxiv.org\/abs\/2303.08774"},{"key":"e_1_3_2_1_26_1","volume-title":"Logesh Kumar Umapathi, and Malaikannan Sankarasubbu","author":"Pal Ankit","year":"2023","unstructured":"Ankit Pal, Logesh Kumar Umapathi, and Malaikannan Sankarasubbu. 2023. Med-halt: Medical domain hallucination test for large language models. arXiv preprint arXiv:2307.15343 (2023)."},{"key":"e_1_3_2_1_27_1","volume-title":"Percy Liang, and Michael S.","author":"Park Joon Sung","year":"2023","unstructured":"Joon Sung Park, Joseph C. O'Brien, Carrie J. Cai, Meredith Ringel Morris, Percy Liang, and Michael S. Bernstein. 2023. Generative Agents: Interactive Simulacra of Human Behavior. arXiv:2304.03442 [cs.HC] https:\/\/arxiv.org\/abs\/2304.03442"},{"key":"e_1_3_2_1_28_1","unstructured":"Qwen-Team. 2024. Qwen2.5: A Party of Foundation Models. https:\/\/qwenlm.github.io\/blog\/qwen2.5\/"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.155"},{"key":"e_1_3_2_1_30_1","unstructured":"Binjie Wang Ethan Chern and Pengfei Liu. 2023. ChineseFactEval: A Factuality Benchmark for Chinese LLMs."},{"key":"e_1_3_2_1_31_1","unstructured":"Ziwei Xu Sanjay Jain and Mohan Kankanhalli. 2024. Hallucination is Inevitable: An Innate Limitation of Large Language Models. arXiv:2401.11817 [cs.CL] https:\/\/arxiv.org\/abs\/2401.11817"},{"key":"e_1_3_2_1_32_1","unstructured":"An Yang Baosong Yang Binyuan Hui Bo Zheng Bowen Yu Chang Zhou Chengpeng Li Chengyuan Li Dayiheng Liu Fei Huang et al. 2024b. Qwen2 technical report. arXiv preprint arXiv:2407.10671 (2024)."},{"key":"e_1_3_2_1_33_1","unstructured":"Chengrun Yang Xuezhi Wang Yifeng Lu Hanxiao Liu Quoc V. Le Denny Zhou and Xinyun Chen. 2024a. Large Language Models as Optimizers. arXiv:2309.03409 [cs.LG] https:\/\/arxiv.org\/abs\/2309.03409 endthebibl"}],"event":{"name":"CIKM '25: The 34th ACM International Conference on Information and Knowledge Management","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Seoul Republic of Korea","acronym":"CIKM '25"},"container-title":["Proceedings of the 34th ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746252.3761604","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T02:29:04Z","timestamp":1765506544000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746252.3761604"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,10]]},"references-count":33,"alternative-id":["10.1145\/3746252.3761604","10.1145\/3746252"],"URL":"https:\/\/doi.org\/10.1145\/3746252.3761604","relation":{},"subject":[],"published":{"date-parts":[[2025,11,10]]},"assertion":[{"value":"2025-11-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}