{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T22:50:42Z","timestamp":1776120642444,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":96,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"Microsoft","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006374","name":"Centers for Disease Control and Prevention","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006374","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-2154118"],"award-info":[{"award-number":["CNS-2154118"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006374","name":"Defense Advanced Research Projects Agency","doi-asserted-by":"publisher","award":["HR00112290102"],"award-info":[{"award-number":["HR00112290102"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1145\/3589334.3645643","type":"proceedings-article","created":{"date-parts":[[2024,5,8]],"date-time":"2024-05-08T07:08:13Z","timestamp":1715152093000},"page":"2627-2638","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":49,"title":["Better to Ask in English: Cross-Lingual Evaluation of Large Language Models for Healthcare Queries"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6974-5970","authenticated-orcid":false,"given":"Yiqiao","family":"Jin","sequence":"first","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5030-6647","authenticated-orcid":false,"given":"Mohit","family":"Chandra","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6182-9857","authenticated-orcid":false,"given":"Gaurav","family":"Verma","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1578-7892","authenticated-orcid":false,"given":"Yibo","family":"Hu","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8939-264X","authenticated-orcid":false,"given":"Munmun","family":"De Choudhury","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5796-3532","authenticated-orcid":false,"given":"Srijan","family":"Kumar","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,5,13]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Language models are few-shot learners. Advances in neural information processing systems, 33:1877--1901","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, et al. Language models are few-shot learners. Advances in neural information processing systems, 33:1877--1901, 2020."},{"key":"e_1_3_2_2_2_1","volume-title":"Exploring the potential of large language models (llms) in learning on graphs. arXiv:2307.03393","author":"Chen Zhikai","year":"2023","unstructured":"Zhikai Chen, Haitao Mao, Hang Li, Wei Jin, Hongzhi Wen, Xiaochi Wei, Shuaiqiang Wang, Dawei Yin, Wenqi Fan, Hui Liu, et al. Exploring the potential of large language models (llms) in learning on graphs. arXiv:2307.03393, 2023."},{"key":"e_1_3_2_2_3_1","volume-title":"Large language models can be good privacy protection learners. arXiv:2310.02469","author":"Xiao Yijia","year":"2023","unstructured":"Yijia Xiao, Yiqiao Jin, Yushi Bai, Yue Wu, Xianjun Yang, Xiao Luo, Wenchao Yu, Xujiang Zhao, Yanchi Liu, Haifeng Chen, et al. Large language models can be good privacy protection learners. arXiv:2310.02469, 2023."},{"key":"e_1_3_2_2_4_1","volume-title":"Mode approximation makes good vision-language prompts. arXiv:2305.08381","author":"Wang Haixin","year":"2023","unstructured":"Haixin Wang, Xinlong Yang, Jianlong Chang, Dian Jin, Jinan Sun, Shikun Zhang, Xiao Luo, and Qi Tian. Mode approximation makes good vision-language prompts. arXiv:2305.08381, 2023."},{"key":"e_1_3_2_2_5_1","volume-title":"Pretrain@KDD","author":"Xiao Yijia","year":"2021","unstructured":"Yijia Xiao, Jiezhong Qiu, Ziang Li, Chang-Yu Hsieh, and Jie Tang. Modeling protein using large-scale pretrain language model. In Pretrain@KDD 2021, 2021."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583868"},{"key":"e_1_3_2_2_7_1","unstructured":"OpenAI. Chatgpt 2023."},{"key":"e_1_3_2_2_8_1","volume-title":"Gpt-4 technical report. Arxiv Preprint, arXiv:2303.08774","author":"AI.","year":"2023","unstructured":"OpenAI. Gpt-4 technical report. Arxiv Preprint, arXiv:2303.08774, 2023."},{"key":"e_1_3_2_2_9_1","unstructured":"Google. Bard 2023."},{"key":"e_1_3_2_2_10_1","volume-title":"Chatgpt sets record for fastest-growing user base - analyst note","author":"Hu Krystal","year":"2023","unstructured":"Krystal Hu. Chatgpt sets record for fastest-growing user base - analyst note, 2023."},{"key":"e_1_3_2_2_11_1","volume-title":"How is ai tech like chatgpt improving digital accessibility?","author":"Accessibility Labs Inclusion","year":"2023","unstructured":"Inclusion & Accessibility Labs. How is ai tech like chatgpt improving digital accessibility?, 2023."},{"key":"e_1_3_2_2_12_1","volume-title":"Chatgpt's contributions to improving accessibility for education and learning","author":"Frackiewicz Marcin","year":"2023","unstructured":"Marcin Frackiewicz. Chatgpt's contributions to improving accessibility for education and learning, 2023."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.2196\/47564"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.semeval-1.1"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v16i1.19356"},{"key":"e_1_3_2_2_16_1","volume-title":"et al. Llama: Open and efficient foundation language models. arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al. Llama: Open and efficient foundation language models. arXiv:2302.13971, 2023."},{"key":"e_1_3_2_2_17_1","volume-title":"The most spoken languages worldwide","year":"2023","unstructured":"Statista. The most spoken languages worldwide in 2023, 2023."},{"key":"e_1_3_2_2_18_1","volume-title":"Ethnologue: Languages of the World. SIL International","author":"Eberhard David M.","year":"2023","unstructured":"David M. Eberhard, Gary F. Simons, and Charles D. Fennig, editors. Ethnologue: Languages of the World. SIL International, Dallas, Texas, twenty-sixth edition, 2023."},{"key":"e_1_3_2_2_19_1","volume-title":"Medalpaca--an open-source collection of medical conversational ai models and training data. arXiv:2304.08247","author":"Han Tianyu","year":"2023","unstructured":"Tianyu Han, Lisa C Adams, Jens-Michalis Papaioannou, Paul Grundmann, Tom Oberhauser, Alexander L\u00f6ser, Daniel Truhn, and Keno K Bressem. Medalpaca--an open-source collection of medical conversational ai models and training data. arXiv:2304.08247, 2023."},{"key":"e_1_3_2_2_20_1","first-page":"2472","volume-title":"WWW","author":"Zhu Ming","year":"2019","unstructured":"Ming Zhu, Aman Ahuja, Wei Wei, and Chandan K Reddy. A hierarchical attention retrieval model for healthcare question answering. In WWW, pages 2472--2482, 2019."},{"key":"e_1_3_2_2_21_1","unstructured":"Patient. Patient 2023."},{"key":"e_1_3_2_2_22_1","volume-title":"Biomedical question answering: a survey of approaches and challenges. ACM Computing Surveys (CSUR), 55(2):1--36","author":"Jin Qiao","year":"2022","unstructured":"Qiao Jin, Zheng Yuan, Guangzhi Xiong, Qianlan Yu, Huaiyuan Ying, Chuanqi Tan, Mosha Chen, Songfang Huang, Xiaozhong Liu, and Sheng Yu. Biomedical question answering: a survey of approaches and challenges. ACM Computing Surveys (CSUR), 55(2):1--36, 2022."},{"key":"e_1_3_2_2_23_1","first-page":"25","volume-title":"MedInfo","author":"Abacha Asma Ben","year":"2019","unstructured":"Asma Ben Abacha, Yassine Mrabet, Mark Sharp, Travis R Goodwin, Sonya E Shooshan, and Dina Demner-Fushman. Bridging the gap between consumers' medication questions and trusted answers. In MedInfo, pages 25--29, 2019."},{"key":"e_1_3_2_2_24_1","volume-title":"Medline plus","author":"National Library of Medicine.","year":"2023","unstructured":"National Library of Medicine. Medline plus, 2023."},{"key":"e_1_3_2_2_25_1","unstructured":"National Library of Medicine. Dailymed 2023."},{"key":"e_1_3_2_2_26_1","volume-title":"Dean Carignan, and Eric Horvitz. Capabilities of gpt-4 on medical challenge problems. arXiv:2303.13375","author":"Nori Harsha","year":"2023","unstructured":"Harsha Nori, Nicholas King, Scott Mayer McKinney, Dean Carignan, and Eric Horvitz. Capabilities of gpt-4 on medical challenge problems. arXiv:2303.13375, 2023."},{"key":"e_1_3_2_2_27_1","volume-title":"Towards expert-level medical question answering with large language models. arXiv:2305.09617","author":"Singhal Karan","year":"2023","unstructured":"Karan Singhal, Tao Tu, Juraj Gottweis, Rory Sayres, Ellery Wulczyn, Le Hou, Kevin Clark, Stephen Pfohl, Heather Cole-Lewis, Darlene Neal, et al. Towards expert-level medical question answering with large language models. arXiv:2305.09617, 2023."},{"key":"e_1_3_2_2_28_1","first-page":"1","volume-title":"Nature","author":"Singhal Karan","year":"2023","unstructured":"Karan Singhal, Shekoofeh Azizi, Tao Tu, S Sara Mahdavi, Jason Wei, Hyung Won Chung, Nathan Scales, Ajay Tanwani, Heather Cole-Lewis, Stephen Pfohl, et al. Large language models encode clinical knowledge. Nature, pages 1--9, 2023."},{"key":"e_1_3_2_2_29_1","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume":"35","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le, Denny Zhou, et al. Chain-of-thought prompting elicits reasoning in large language models. Advances in Neural Information Processing Systems, 35:24824--24837, 2022.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_30_1","first-page":"296","volume-title":"ICML","author":"Lin Dekang","year":"1998","unstructured":"Dekang Lin. An information-theoretic definition of similarity. In ICML, pages 296--304, 1998."},{"key":"e_1_3_2_2_31_1","first-page":"115","volume-title":"SPIRE","author":"Kondrak Grzegorz","year":"2005","unstructured":"Grzegorz Kondrak. N-gram similarity and distance. In SPIRE, pages 115--126. Springer, 2005."},{"issue":"1","key":"e_1_3_2_2_32_1","first-page":"13","article-title":"Evaluating wordnet-based measures of lexical semantic relatedness","volume":"32","author":"Budanitsky Alexander","year":"2006","unstructured":"Alexander Budanitsky and Graeme Hirst. Evaluating wordnet-based measures of lexical semantic relatedness. COLING, 32(1):13--47, 2006.","journal-title":"COLING"},{"issue":"4","key":"e_1_3_2_2_33_1","first-page":"665","article-title":"Evaluating semantic models with (genuine) similarity estimation","volume":"41","author":"Hill Felix","year":"2015","unstructured":"Felix Hill, Roi Reichart, and Anna Korhonen. Simlex-999: Evaluating semantic models with (genuine) similarity estimation. COLING, 41(4):665--695, 2015.","journal-title":"COLING"},{"key":"e_1_3_2_2_34_1","volume-title":"A survey of knowledge graph reasoning on graph types: Static, dynamic, and multimodal. arXiv:2212.05767","author":"Liang Ke","year":"2022","unstructured":"Ke Liang, Lingyuan Meng, Meng Liu, Yue Liu, Wenxuan Tu, Siwei Wang, Sihang Zhou, Xinwang Liu, and Fuchun Sun. A survey of knowledge graph reasoning on graph types: Static, dynamic, and multimodal. arXiv:2212.05767, 2022."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557574"},{"key":"e_1_3_2_2_36_1","volume-title":"Complex query answering on eventuality knowledge graph with implicit logical constraints. arXiv:2305.19068","author":"Bai Jiaxin","year":"2023","unstructured":"Jiaxin Bai, Xin Liu, Weiqi Wang, Chen Luo, and Yangqiu Song. Complex query answering on eventuality knowledge graph with implicit logical constraints. arXiv:2305.19068, 2023."},{"key":"e_1_3_2_2_37_1","first-page":"507","volume-title":"WMT","author":"Hanna Michael","year":"2021","unstructured":"Michael Hanna and Ondrej Bojar. A fine-grained analysis of bertscore. In WMT, pages 507--517, 2021."},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.3934\/mbe.2023657"},{"key":"e_1_3_2_2_39_1","volume-title":"ICLR","author":"Zhang Tianyi","year":"2020","unstructured":"Tianyi Zhang, Varsha Kishore, Felix Wu, Kilian Q Weinberger, and Yoav Artzi. Bertscore: Evaluating text generation with bert. In ICLR, 2020."},{"key":"e_1_3_2_2_40_1","first-page":"3982","volume-title":"EMNLP","author":"Reimers Nils","year":"2019","unstructured":"Nils Reimers and Iryna Gurevych. Sentence-bert: Sentence embeddings using siamese bert-networks. In EMNLP, pages 3982--3992, 2019."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3640"},{"key":"e_1_3_2_2_42_1","volume-title":"NAACL Workshop on Autmatic Summarization","author":"Simfinder V HATZIVASSILOGLOU.","year":"2001","unstructured":"V HATZIVASSILOGLOU. Simfinder: A flexible clustering tool for summarization. In NAACL Workshop on Autmatic Summarization, 2001."},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i5.20517"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539277"},{"key":"e_1_3_2_2_45_1","volume-title":"Latent dirichlet allocation. Journal of machine Learning research, 3(Jan):993--1022","author":"Blei David M","year":"2003","unstructured":"David M Blei, Andrew Y Ng, and Michael I Jordan. Latent dirichlet allocation. Journal of machine Learning research, 3(Jan):993--1022, 2003."},{"key":"e_1_3_2_2_46_1","first-page":"17","article-title":"Sharing clusters among related groups: Hierarchical dirichlet processes","author":"Teh Yee","year":"2004","unstructured":"Yee Teh, Michael Jordan, Matthew Beal, and David Blei. Sharing clusters among related groups: Hierarchical dirichlet processes. NeurIPS, 17, 2004.","journal-title":"NeurIPS"},{"key":"e_1_3_2_2_47_1","volume-title":"ICLR 2023 Workshop on Trustworthy and Reliable Large-Scale Machine Learning Models","author":"Wang Jindong","year":"2023","unstructured":"Jindong Wang, HU Xixu, Wenxin Hou, Hao Chen, Runkai Zheng, Yidong Wang, Linyi Yang, Wei Ye, Haojun Huang, Xiubo Geng, et al. On the robustness of chat-gpt: An adversarial and out-of-distribution perspective. In ICLR 2023 Workshop on Trustworthy and Reliable Large-Scale Machine Learning Models, 2023."},{"key":"e_1_3_2_2_48_1","volume-title":"Consistency analysis of chatgpt. arXiv:2303.06273","author":"Jang Myeongjun","year":"2023","unstructured":"Myeongjun Jang and Thomas Lukasiewicz. Consistency analysis of chatgpt. arXiv:2303.06273, 2023."},{"key":"e_1_3_2_2_49_1","unstructured":"Fuxiao Liu Tianrui Guan Zongxia Li Lichang Chen Yaser Yacoob Dinesh Manocha and Tianyi Zhou. Hallusionbench: You see what you think? or you think what you see? an image-context reasoning benchmark challenging for gpt-4v (ision) llava-1.5 and other multi-modality models. arXiv preprint arXiv:2310.14566 2023."},{"key":"e_1_3_2_2_50_1","volume-title":"Teaching large language models to self-debug. arXiv:2304.05128","author":"Chen Xinyun","year":"2023","unstructured":"Xinyun Chen, Maxwell Lin, Nathanael Sch\u00e4rli, and Denny Zhou. Teaching large language models to self-debug. arXiv:2304.05128, 2023."},{"key":"e_1_3_2_2_51_1","volume-title":"Llm self defense: By self examination, llms know they are being tricked. arXiv:2308.07308","author":"Helbling Alec","year":"2023","unstructured":"Alec Helbling, Mansi Phute, Matthew Hull, and Duen Horng Chau. Llm self defense: By self examination, llms know they are being tricked. arXiv:2308.07308, 2023."},{"key":"e_1_3_2_2_52_1","first-page":"4171","volume-title":"NAACL","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. Bert: Pre-training of deep bidirectional transformers for language understanding. In NAACL, pages 4171--4186, 2019."},{"key":"e_1_3_2_2_53_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. Roberta: A robustly optimized bert pretraining approach. arXiv:1907.11692, 2019."},{"key":"e_1_3_2_2_54_1","first-page":"5753","volume-title":"NeurIPS","author":"Yang Zhilin","year":"2019","unstructured":"Zhilin Yang, Zihang Dai, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov, and Quoc V Le. Xlnet: generalized autoregressive pretraining for language understanding. In NeurIPS, pages 5753--5763, 2019."},{"key":"e_1_3_2_2_55_1","volume-title":"ICLR","author":"Zeng Aohan","year":"2023","unstructured":"Aohan Zeng, Xiao Liu, Zhengxiao Du, Zihan Wang, Hanyu Lai, Ming Ding, Zhuoyi Yang, Yifan Xu, Wendi Zheng, Xiao Xia, Weng Lam Tam, Zixuan Ma, Yufei Xue, Jidong Zhai, Wenguang Chen, Zhiyuan Liu, Peng Zhang, Yuxiao Dong, and Jie Tang. GLM-130b: An open bilingual pre-trained model. In ICLR, 2023."},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.26"},{"key":"e_1_3_2_2_57_1","volume-title":"Foundation model-oriented robustness: Robust image model evaluation with pretrained models. arXiv:2308.10632","author":"Zhang Peiyan","year":"2023","unstructured":"Peiyan Zhang, Haoyang Liu, Chaozhuo Li, Xing Xie, Sunghun Kim, and Haohan Wang. Foundation model-oriented robustness: Robust image model evaluation with pretrained models. arXiv:2308.10632, 2023."},{"key":"e_1_3_2_2_58_1","volume-title":"Towards understanding in-context learning with contrastive demonstrations and saliency maps. arXiv:2307.05052","author":"Li Zongxia","year":"2023","unstructured":"Zongxia Li, Paiheng Xu, Fuxiao Liu, and Hyemi Song. Towards understanding in-context learning with contrastive demonstrations and saliency maps. arXiv:2307.05052, 2023."},{"key":"e_1_3_2_2_59_1","volume-title":"Aligning large multi-modal model with robust instruction tuning. arXiv:2306.14565","author":"Liu Fuxiao","year":"2023","unstructured":"Fuxiao Liu, Kevin Lin, Linjie Li, Jianfeng Wang, Yaser Yacoob, and Lijuan Wang. Aligning large multi-modal model with robust instruction tuning. arXiv:2306.14565, 2023."},{"key":"e_1_3_2_2_60_1","volume-title":"TMLR","author":"Srivastava Aarohi","year":"2023","unstructured":"Aarohi Srivastava, Abhinav Rastogi, Abhishek Rao, Abu Awal Md Shoeb, Abubakar Abid, Adam Fisch, Adam R Brown, Adam Santoro, Aditya Gupta, Adri\u00e0 Garriga-Alonso, et al. Beyond the imitation game: Quantifying and extrapolating the capabilities of language models. TMLR, 2023."},{"key":"e_1_3_2_2_61_1","volume-title":"Chatgpt evaluation on sentence level relations: A focus on temporal, causal, and discourse relations. arXiv:2304.14827","author":"Chan Chunkit","year":"2023","unstructured":"Chunkit Chan, Jiayang Cheng, Weiqi Wang, Yuxin Jiang, Tianqing Fang, Xin Liu, and Yangqiu Song. Chatgpt evaluation on sentence level relations: A focus on temporal, causal, and discourse relations. arXiv:2304.14827, 2023."},{"key":"e_1_3_2_2_62_1","volume-title":"Benchmarking foundation models with language-model-as-an-examiner. arXiv:2306.04181","author":"Bai Yushi","year":"2023","unstructured":"Yushi Bai, Jiahao Ying, Yixin Cao, Xin Lv, Yuze He, Xiaozhi Wang, Jifan Yu, Kaisheng Zeng, Yijia Xiao, Haozhe Lyu, et al. Benchmarking foundation models with language-model-as-an-examiner. arXiv:2306.04181, 2023."},{"key":"e_1_3_2_2_63_1","volume-title":"ICML","author":"Chen Changyu","year":"2023","unstructured":"Changyu Chen, Xiting Wang, Yiqiao Jin, Victor Ye Dong, Li Dong, Jie Cao, Yi Liu, and Rui Yan. Semi-offline reinforcement learning for optimized text generation. In ICML, 2023."},{"key":"e_1_3_2_2_64_1","volume-title":"Mmc: Advancing multimodal chart understanding with large-scale instruction tuning. arXiv preprint arXiv:2311.10774","author":"Liu Fuxiao","year":"2023","unstructured":"Fuxiao Liu, Xiaoyang Wang, Wenlin Yao, Jianshu Chen, Kaiqiang Song, Sangwoo Cho, Yaser Yacoob, and Dong Yu. Mmc: Advancing multimodal chart understanding with large-scale instruction tuning. arXiv preprint arXiv:2311.10774, 2023."},{"key":"e_1_3_2_2_65_1","volume-title":"Exploring recommendation capabilities of gpt-4v (ision): A preliminary case study. arXiv preprint arXiv:2311.04199","author":"Zhou Peilin","year":"2023","unstructured":"Peilin Zhou, Meng Cao, You-Liang Huang, Qichen Ye, Peiyan Zhang, Junling Liu, Yueqi Xie, Yining Hua, and Jaeboum Kim. Exploring recommendation capabilities of gpt-4v (ision): A preliminary case study. arXiv preprint arXiv:2311.04199, 2023."},{"key":"e_1_3_2_2_66_1","volume-title":"et al. Candle: Iterative conceptualization and instantiation distillation from large language models for commonsense reasoning. arXiv preprint arXiv:2401.07286","author":"Wang Weiqi","year":"2024","unstructured":"Weiqi Wang, Tianqing Fang, Chunyang Li, Haochen Shi, Wenxuan Ding, Baixuan Xu, Zhaowei Wang, Jiaxin Bai, Xin Liu, Jiayang Cheng, et al. Candle: Iterative conceptualization and instantiation distillation from large language models for commonsense reasoning. arXiv preprint arXiv:2401.07286, 2024."},{"key":"e_1_3_2_2_67_1","volume-title":"Competeai: Understanding the competition behaviors in large language model-based agents. arXiv preprint arXiv:2310.17512","author":"Zhao Qinlin","year":"2023","unstructured":"Qinlin Zhao, Jindong Wang, Yixuan Zhang, Yiqiao Jin, Kaijie Zhu, Hao Chen, and Xing Xie. Competeai: Understanding the competition behaviors in large language model-based agents. arXiv preprint arXiv:2310.17512, 2023."},{"key":"e_1_3_2_2_68_1","volume-title":"Christoffer Egeberg Hother, and Ole Winther. Can large language models reason about medical questions? arXiv:2207.08143","author":"Li\u00e9vin Valentin","year":"2022","unstructured":"Valentin Li\u00e9vin, Christoffer Egeberg Hother, and Ole Winther. Can large language models reason about medical questions? arXiv:2207.08143, 2022."},{"key":"e_1_3_2_2_69_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.560"},{"key":"e_1_3_2_2_70_1","volume-title":"Amazon-m2: A multilingual multi-locale shopping session dataset for recommendation and text generation. arXiv preprint arXiv:2307.09688","author":"Jin Wei","year":"2023","unstructured":"Wei Jin, Haitao Mao, Zheng Li, Haoming Jiang, Chen Luo, Hongzhi Wen, Haoyu Han, Hanqing Lu, Zhengyang Wang, Ruirui Li, et al. Amazon-m2: A multilingual multi-locale shopping session dataset for recommendation and text generation. arXiv preprint arXiv:2307.09688, 2023."},{"key":"e_1_3_2_2_71_1","volume-title":"Word shape matters: Robust machine translation with visual embedding. arXiv:2010.09997","author":"Wang Haohan","year":"2020","unstructured":"Haohan Wang, Peiyan Zhang, and Eric P Xing. Word shape matters: Robust machine translation with visual embedding. arXiv:2010.09997, 2020."},{"key":"e_1_3_2_2_72_1","volume-title":"Yanning Shen, and Jundong Li. Fairness in graph machine learning: Recent advances and future prospectives. In KDD, page 5794--5795","author":"Dong Yushun","year":"2023","unstructured":"Yushun Dong, Oyku Deniz Kose, Yanning Shen, and Jundong Li. Fairness in graph machine learning: Recent advances and future prospectives. In KDD, page 5794--5795, New York, NY, USA, 2023. Association for Computing Machinery."},{"key":"e_1_3_2_2_73_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i6.25905"},{"key":"e_1_3_2_2_74_1","volume-title":"Fairness in graph mining: A survey. TKDE, (01):1--22","author":"Dong Yushun","year":"2023","unstructured":"Yushun Dong, Jing Ma, Song Wang, Chen Chen, and Jundong Li. Fairness in graph mining: A survey. TKDE, (01):1--22, 2023."},{"key":"e_1_3_2_2_75_1","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611977653.ch18"},{"key":"e_1_3_2_2_76_1","doi-asserted-by":"crossref","unstructured":"Srijan Kumar. Advances in ai for safety equity and well-being on web and social media: detection robustness attribution and mitigation. In Proceedings of the Thirty-Seventh AAAI Conference on Artificial Intelligence and Thirty-Fifth Conference on Innovative Applications of Artificial Intelligence and Thirteenth Symposium on Educational Advances in Artificial Intelligence pages 15444--15444 2023.","DOI":"10.1609\/aaai.v37i13.26811"},{"key":"e_1_3_2_2_77_1","volume-title":"Icmrec: Item cluster-wise multi-objective optimization for unbiased recommendation. arXiv:2109.12887","author":"Wang Yule","year":"2021","unstructured":"Yule Wang, Xin Xin, Yue Ding, Yunzhe Li, and Dong Wang. Icmrec: Item cluster-wise multi-objective optimization for unbiased recommendation. arXiv:2109.12887, 2021."},{"key":"e_1_3_2_2_78_1","volume-title":"Adversarial robustness of prompt-based few-shot learning for natural language understanding. arXiv:2306.11066","author":"Sarath Nookala Venkata Prabhakara","year":"2023","unstructured":"Venkata Prabhakara Sarath Nookala, Gaurav Verma, Subhabrata Mukherjee, and Srijan Kumar. Adversarial robustness of prompt-based few-shot learning for natural language understanding. arXiv:2306.11066, 2023."},{"key":"e_1_3_2_2_79_1","volume-title":"Large language models: what is driving the hype behind llm's in healthcare?","author":"Price Lloyd","year":"2023","unstructured":"Lloyd Price. Large language models: what is driving the hype behind llm's in healthcare?, 2023."},{"key":"e_1_3_2_2_80_1","first-page":"887","volume-title":"Healthcare","author":"Sallam Malik","unstructured":"Malik Sallam. Chatgpt utility in healthcare education, research, and practice: systematic review on the promising perspectives and valid concerns. In Healthcare, volume 11, page 887. MDPI, 2023."},{"key":"e_1_3_2_2_81_1","volume-title":"Everyone speaks english, don't they?","author":"Lyne Chris","year":"2023","unstructured":"Chris Lyne. Everyone speaks english, don't they?, 2023."},{"key":"e_1_3_2_2_82_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533182"},{"key":"e_1_3_2_2_83_1","first-page":"6","article-title":"On achieving and evaluating language-independence in nlp","author":"Bender Emily M","year":"2011","unstructured":"Emily M Bender. On achieving and evaluating language-independence in nlp. Linguistic Issues in Language Technology, 6, 2011.","journal-title":"Linguistic Issues in Language Technology"},{"key":"e_1_3_2_2_84_1","volume-title":"Dr google will see you now: Search giant wants to cash in on your medical queries","author":"Murphy Margy","year":"2019","unstructured":"Margy Murphy. Dr google will see you now: Search giant wants to cash in on your medical queries, 2019."},{"key":"e_1_3_2_2_85_1","volume-title":"Chatlaw: Open-source legal large language model with integrated external knowledge bases. arXiv preprint arXiv:2306.16092","author":"Cui Jiaxi","year":"2023","unstructured":"Jiaxi Cui, Zongjian Li, Yang Yan, Bohua Chen, and Li Yuan. Chatlaw: Open-source legal large language model with integrated external knowledge bases. arXiv preprint arXiv:2306.16092, 2023."},{"key":"e_1_3_2_2_86_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512122"},{"key":"e_1_3_2_2_87_1","volume-title":"Fin-fact: A benchmark dataset for multimodal financial fact checking and explanation generation. arXiv:2309.08793","author":"Rangapur Aman","year":"2023","unstructured":"Aman Rangapur, Haoran Wang, and Kai Shu. Fin-fact: A benchmark dataset for multimodal financial fact checking and explanation generation. arXiv:2309.08793, 2023."},{"key":"e_1_3_2_2_88_1","volume-title":"Folkscope: Intention knowledge graph construction for discovering e-commerce commonsense. arXiv:2211.08316","author":"Yu Changlong","year":"2022","unstructured":"Changlong Yu, Weiqi Wang, Xin Liu, Jiaxin Bai, Yangqiu Song, Zheng Li, Yi-fan Gao, Tianyu Cao, and Bing Yin. Folkscope: Intention knowledge graph construction for discovering e-commerce commonsense. arXiv:2211.08316, 2022."},{"key":"e_1_3_2_2_89_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2023.104431"},{"key":"e_1_3_2_2_90_1","volume-title":"Towards massively multi-domain multilingual readability assessment. arXiv:2305.14463","author":"Naous Tarek","year":"2023","unstructured":"Tarek Naous, Michael J Ryan, Mohit Chandra, and Wei Xu. Towards massively multi-domain multilingual readability assessment. arXiv:2305.14463, 2023."},{"key":"e_1_3_2_2_91_1","first-page":"1","volume-title":"Kabilan Elangovan, Laura Gutierrez, Ting Fang Tan, and Daniel Shu Wei Ting. Large language models in medicine. Nature medicine","author":"Thirunavukarasu Arun James","year":"2023","unstructured":"Arun James Thirunavukarasu, Darren Shu Jeng Ting, Kabilan Elangovan, Laura Gutierrez, Ting Fang Tan, and Daniel Shu Wei Ting. Large language models in medicine. Nature medicine, pages 1--11, 2023."},{"key":"e_1_3_2_2_92_1","doi-asserted-by":"crossref","unstructured":"Tiffany H Kung Morgan Cheatham Arielle Medenilla Czarina Sillos Lorie De Leon Camille Elepa\u00f1o Maria Madriaga Rimel Aggabao Giezel Diaz-Candido James Maningo et al. Performance of chatgpt on usmle: Potential for ai-assisted medical education using large language models. PLoS digital health 2(2):e0000198 2023.","DOI":"10.1371\/journal.pdig.0000198"},{"key":"e_1_3_2_2_93_1","doi-asserted-by":"publisher","DOI":"10.2196\/46599"},{"key":"e_1_3_2_2_94_1","volume-title":"Chatgpt can finally access the internet in real time, but there's a catch","author":"Ortiz Sabrina","year":"2023","unstructured":"Sabrina Ortiz. Chatgpt can finally access the internet in real time, but there's a catch, 2023."},{"key":"e_1_3_2_2_95_1","volume-title":"Google translate","author":"Translate Google","year":"2023","unstructured":"Google Translate. Google translate, 2023."},{"key":"e_1_3_2_2_96_1","first-page":"116","volume-title":"ACL","author":"Junczys-Dowmunt Marcin","year":"2018","unstructured":"Marcin Junczys-Dowmunt, Roman Grundkiewicz, Tomasz Dwojak, Hieu Hoang, Kenneth Heafield, Tom Neckermann, Frank Seide, Ulrich Germann, Alham Fikri Aji, Nikolay Bogoychev, et al. Marian: Fast neural machine translation in c. In ACL, pages 116--121, 2018."}],"event":{"name":"WWW '24: The ACM Web Conference 2024","location":"Singapore Singapore","acronym":"WWW '24","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Web Conference 2024"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589334.3645643","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3589334.3645643","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:22:52Z","timestamp":1755822172000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589334.3645643"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":96,"alternative-id":["10.1145\/3589334.3645643","10.1145\/3589334"],"URL":"https:\/\/doi.org\/10.1145\/3589334.3645643","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]},"assertion":[{"value":"2024-05-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}