{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,30]],"date-time":"2025-12-30T17:05:07Z","timestamp":1767114307091,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","funder":[{"name":"NSFC","award":["62441233"],"award-info":[{"award-number":["62441233"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,12]]},"DOI":"10.1145\/3714394.3756275","type":"proceedings-article","created":{"date-parts":[[2025,12,29]],"date-time":"2025-12-29T21:13:49Z","timestamp":1767042829000},"page":"1358-1363","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Evaluating Large Language Models in Traditional Chinese Medicine with Enhanced Misinformation Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-4225-6965","authenticated-orcid":false,"given":"Yawen","family":"Zheng","sequence":"first","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2141-1496","authenticated-orcid":false,"given":"Jinliang","family":"Yuan","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0418-736X","authenticated-orcid":false,"given":"Li","family":"Liu","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6308-4945","authenticated-orcid":false,"given":"Wenkai","family":"Zhao","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, Armenia"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2457-3555","authenticated-orcid":false,"given":"Zhenyu","family":"Wang","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,12,29]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"https:\/\/www.anthropic.com\/news\/claude-4 Technical blog post","author":"Introducing Claude","year":"2025","unstructured":"Anthropic. 2025. Introducing Claude 4. (2025). https:\/\/www.anthropic.com\/news\/claude-4 Technical blog post."},{"key":"e_1_3_2_1_2_1","volume-title":"Magda Dubois, Saleh Khalil, Jasmine Balloch, Joshua Au Yeung, and Dominic Pimenta.","author":"Asgari Elham","year":"2025","unstructured":"Elham Asgari, Nina Monta na-Brown, Magda Dubois, Saleh Khalil, Jasmine Balloch, Joshua Au Yeung, and Dominic Pimenta. 2025. A framework to assess clinical safety and hallucination rates of LLMs for medical text summarisation. npj Digital Medicine, Vol. 8, 1 (2025), 1-15."},{"key":"e_1_3_2_1_3_1","volume-title":"Lisa Soleymani Lehmann, et al","author":"Bedi Suhana","year":"2024","unstructured":"Suhana Bedi, Yutong Liu, Lucy Orr-Ewing, Dev Dash, Sanmi Koyejo, Alison Callahan, Jason A Fries, Michael Wornow, Akshay Swaminathan, Lisa Soleymani Lehmann, et al., 2024. Testing and evaluation of health care applications of large language models: a systematic review. JAMA (2024)."},{"key":"e_1_3_2_1_4_1","volume-title":"Towards Medical Complex Reasoning with LLMs. arXiv preprint arXiv:2412.18925","author":"Chen Junying","year":"2024","unstructured":"Junying Chen, Zhenyang Cai, Ke Ji, Xidong Wang, Wanlong Liu, Rongsheng Wang, Jianye Hou, and Benyou Wang. 2024. HuatuoGPT-o1, Towards Medical Complex Reasoning with LLMs. arXiv preprint arXiv:2412.18925 (2024). https:\/\/arxiv.org\/abs\/2412.18925"},{"key":"e_1_3_2_1_5_1","unstructured":"DeepSeek-AI Aixin Liu Bei Feng Bing Xue Bingxuan Wang Bochao Wu Chengda Lu et al. 2024. DeepSeek-V3 Technical Report. arXiv preprint arXiv:2412.19437 (2024). https:\/\/arxiv.org\/abs\/2412.19437"},{"key":"e_1_3_2_1_6_1","volume-title":"SycEval: Evaluating LLM Sycophancy. arXiv preprint arXiv:2502.08177","author":"Fanous Aaron","year":"2025","unstructured":"Aaron Fanous, Jacob Goldberg, Ank A Agarwal, Joanna Lin, Anson Zhou, Roxana Daneshjou, and Sanmi Koyejo. 2025. SycEval: Evaluating LLM Sycophancy. arXiv preprint arXiv:2502.08177 (2025)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-024-07421-0"},{"key":"e_1_3_2_1_8_1","volume-title":"Measuring massive multitask language understanding. arXiv preprint arXiv:2009.03300","author":"Hendrycks Dan","year":"2020","unstructured":"Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt. 2020. Measuring massive multitask language understanding. arXiv preprint arXiv:2009.03300 (2020)."},{"key":"e_1_3_2_1_9_1","unstructured":"Intelligent Internet. 2025. II-Medical-8B: Medical Reasoning Model. https:\/\/huggingface.co\/Intelligent-Internet\/II-Medical-8B Model repository."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.3390\/app11146421"},{"key":"e_1_3_2_1_11_1","volume-title":"Semantic Entropy Probes: Robust and Cheap Hallucination Detection in LLMs. arXiv preprint arXiv:2406.15927","author":"Kossen Jannik","year":"2024","unstructured":"Jannik Kossen, Jiatong Han, Muhammed Razzak, Lisa Schut, Shreshth Malik, and Yarin Gal. 2024. Semantic Entropy Probes: Robust and Cheap Hallucination Detection in LLMs. arXiv preprint arXiv:2406.15927 (2024). https:\/\/arxiv.org\/abs\/2406.15927"},{"key":"e_1_3_2_1_12_1","unstructured":"Lorenz Kuhn Yarin Gal and Sebastian Farquhar. 2023. Semantic Uncertainty: Linguistic Invariances for Uncertainty Estimation in Natural Language Generation. In The Eleventh International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=VD-AYtP0dve"},{"key":"e_1_3_2_1_13_1","volume-title":"Camille Elepa no, Maria Madriaga, Rimel Aggabao, Giezel Diaz-Candido, James Maningo, et al.","author":"Kung Tiffany H","year":"2023","unstructured":"Tiffany H Kung, Morgan Cheatham, Arielle Medenilla, Czarina Sillos, Lorie De Leon, Camille Elepa no, Maria Madriaga, Rimel Aggabao, Giezel Diaz-Candido, James Maningo, et al., 2023. Performance of ChatGPT on USMLE: potential for AI-assisted medical education using large language models. PLoS digital health, Vol. 2, 2 (2023), e0000198."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1186\/s12911-024-02709-7"},{"key":"e_1_3_2_1_15_1","volume-title":"Exploring the Comprehension of ChatGPT in Traditional Chinese Medicine Knowledge. arXiv preprint arXiv:2403.09164","author":"Li Yizhen","year":"2024","unstructured":"Yizhen Li, Shaohan Huang, Jiaxing Qi, Lei Quan, Dongran Han, and Zhongzhi Luan. 2024. Exploring the Comprehension of ChatGPT in Traditional Chinese Medicine Knowledge. arXiv preprint arXiv:2403.09164 (2024). https:\/\/arxiv.org\/abs\/2403.09164"},{"key":"e_1_3_2_1_16_1","volume-title":"https:\/\/openai.com\/index\/gpt-4-1\/ Technical blog post","author":"AI.","year":"2025","unstructured":"OpenAI. 2025. Introducing GPT-4.1 in the API. (2025). https:\/\/openai.com\/index\/gpt-4-1\/ Technical blog post."},{"key":"e_1_3_2_1_17_1","volume-title":"Conference on health, inference, and learning. PMLR, 248-260","author":"Pal Ankit","year":"2022","unstructured":"Ankit Pal, Logesh Kumar Umapathi, and Malaikannan Sankarasubbu. 2022. Medmcqa: A large-scale multi-subject multi-choice dataset for medical domain question answering. In Conference on health, inference, and learning. PMLR, 248-260."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1186\/s12911-025-02954-4"},{"key":"e_1_3_2_1_19_1","unstructured":"Karan Singhal Tao Tu Juraj Gottweis Rory Sayres Ellery Wulczyn Mohamed Amin Le Hou Kevin Clark Stephen R Pfohl Heather Cole-Lewis et al. 2025. Toward expert-level medical question answering with large language models. Nature Medicine (2025) 1-8."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.conll-1.21"},{"key":"e_1_3_2_1_21_1","volume-title":"An Empirical Evaluation of Confidence Elicitation in LLMs. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=gjeQKFxFpZ","author":"Xiong Miao","year":"2024","unstructured":"Miao Xiong, Zhiyuan Hu, Xinyang Lu, Yifei Li, Jie Fu, Junxian He, and Bryan Hooi. 2024. Can LLMs Express Their Uncertainty? An Empirical Evaluation of Confidence Elicitation in LLMs. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=gjeQKFxFpZ"},{"key":"e_1_3_2_1_22_1","unstructured":"An Yang Baosong Yang Binyuan Hui Bo Zheng Bowen Yu Chang Zhou et al. 2024. Qwen2.5 Technical Report. arXiv preprint arXiv:2412.15115 (2024). https:\/\/arxiv.org\/abs\/2412.15115"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1186\/s12967-024-05128-4"}],"event":{"name":"UbiComp '25:The 2025 ACM International Joint Conference on Pervasive and Ubiquitous Computing \/ ISWC ACM International Symposium on Wearable Computers","sponsor":["SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing","SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGSPATIAL ACM Special Interest Group on Spatial Information"],"location":"Espoo Finland"},"container-title":["Companion of the 2025 ACM International Joint Conference on Pervasive and Ubiquitous Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3714394.3756275","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,30]],"date-time":"2025-12-30T17:00:55Z","timestamp":1767114055000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3714394.3756275"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,12]]},"references-count":23,"alternative-id":["10.1145\/3714394.3756275","10.1145\/3714394"],"URL":"https:\/\/doi.org\/10.1145\/3714394.3756275","relation":{},"subject":[],"published":{"date-parts":[[2025,10,12]]},"assertion":[{"value":"2025-12-29","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}