{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,18]],"date-time":"2026-07-18T09:39:36Z","timestamp":1784367576364,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":72,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,24]],"date-time":"2025-03-24T00:00:00Z","timestamp":1742774400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CMMI-2326378"],"award-info":[{"award-number":["CMMI-2326378"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Notre Dame University"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,24]]},"DOI":"10.1145\/3708359.3712111","type":"proceedings-article","created":{"date-parts":[[2025,3,19]],"date-time":"2025-03-19T12:50:34Z","timestamp":1742388634000},"page":"1499-1516","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["Unequal Opportunities: Examining the Bias in Geographical Recommendations by Large Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7569-5922","authenticated-orcid":false,"given":"Shiran","family":"Dudy","sequence":"first","affiliation":[{"name":"EAI, Northeastern University, Boston, MA, USA,"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7213-9050","authenticated-orcid":false,"given":"Thulasi","family":"Tholeti","sequence":"additional","affiliation":[{"name":"EAI, Northeastern University, Boston, MA, USA,"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4302-9327","authenticated-orcid":false,"given":"Resmi","family":"Ramachandranpillai","sequence":"additional","affiliation":[{"name":"Northeastern University, Boston, MA, USA,"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8059-9698","authenticated-orcid":false,"given":"Muhammad","family":"Ali","sequence":"additional","affiliation":[{"name":"EAI, Northeastern University, Boston, MA, USA,"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7902-7625","authenticated-orcid":false,"given":"Toby Jia-Jun","family":"Li","sequence":"additional","affiliation":[{"name":"University of Notre Dame, Notre Dame, IN, USA,"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3208-9778","authenticated-orcid":false,"given":"Ricardo","family":"Baeza-Yates","sequence":"additional","affiliation":[{"name":"EAI, Northeastern University, Boston, MA, USA,"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,3,24]]},"reference":[{"key":"e_1_3_3_3_2_2","doi-asserted-by":"crossref","unstructured":"Muhammad\u00a0Farid Adilazuarda Sagnik Mukherjee Pradhyumna Lavania Siddhant Singh Ashutosh Dwivedi Alham\u00a0Fikri Aji Jacki O\u2019Neill Ashutosh Modi and Monojit Choudhury. 2024. Towards measuring and modeling\" culture\" in llms: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.15412 (2024).","DOI":"10.18653\/v1\/2024.emnlp-main.882"},{"key":"e_1_3_3_3_3_2","unstructured":"Ahmed Agiza Mohamed Mostagir and Sherief Reda. 2024. Analyzing the Impact of Data Selection and Fine-Tuning on Economic and Political Biases in LLMs. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.08699 (2024)."},{"key":"e_1_3_3_3_4_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.c3nlp-1.8"},{"key":"e_1_3_3_3_5_2","unstructured":"Ibrahim\u00a0Said Ahmad Shiran Dudy Resmi Ramachandranpillai and Kenneth Church. 2024. Are Generative Language Models Multicultural? A Study on Hausa Culture and Emotions using ChatGPT. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.19504 (2024)."},{"key":"e_1_3_3_3_6_2","doi-asserted-by":"crossref","unstructured":"Shahriar Akter Grace McCarthy Shahriar Sajib Katina Michael Yogesh\u00a0K Dwivedi John D\u2019Ambra and Kathy\u00a0Ning Shen. 2021. Algorithmic bias in data-driven innovation in the age of AI. 102387\u00a0pages.","DOI":"10.1016\/j.ijinfomgt.2021.102387"},{"key":"e_1_3_3_3_7_2","unstructured":"Muhammad Ali Swetasudha Panda Qinlan Shen Michael Wick and Ari Kobren. 2024. Understanding the Interplay of Scale Data and Bias in Language Models: A Case Study with BERT. arxiv:https:\/\/arXiv.org\/abs\/2407.21058\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2407.21058"},{"key":"e_1_3_3_3_8_2","unstructured":"Jiafu An Difang Huang Chen Lin and Mingzhu Tai. 2024. Measuring Gender and Racial Biases in Large Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.15281 (2024)."},{"key":"e_1_3_3_3_9_2","volume-title":"Claude-3.5: \u2019anthropic\/claude-3.5-sonnet:beta\u2019","year":"2024","unstructured":"Anthropic. 2024. Claude-3.5: \u2019anthropic\/claude-3.5-sonnet:beta\u2019. https:\/\/www.anthropic.com [Language Model]."},{"key":"e_1_3_3_3_10_2","doi-asserted-by":"crossref","unstructured":"Raghav Awasthi Shreya Mishra Dwarikanath Mahapatra Ashish Khanna Kamal Maheshwari Jacek Cywinski Frank Papay and Piyush Mathur. 2023. HumanELY: Human evaluation of LLM yield using a novel web-based evaluation tool. medRxiv (2023) 2023\u201312.","DOI":"10.1101\/2023.12.22.23300458"},{"key":"e_1_3_3_3_11_2","unstructured":"Ansar Aynetdinov and Alan Akbik. 2024. SemScore: Automated Evaluation of Instruction-Tuned LLMs based on Semantic Textual Similarity. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.17072 (2024)."},{"key":"e_1_3_3_3_12_2","doi-asserted-by":"crossref","unstructured":"Yejin Bang Samuel Cahyawijaya Nayeon Lee Wenliang Dai Dan Su Bryan Wilie Holy Lovenia Ziwei Ji Tiezheng Yu Willy Chung et\u00a0al. 2023. A multitask multilingual multimodal evaluation of chatgpt on reasoning hallucination and interactivity. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.04023 (2023).","DOI":"10.18653\/v1\/2023.ijcnlp-main.45"},{"key":"e_1_3_3_3_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445922"},{"key":"e_1_3_3_3_14_2","unstructured":"Abeba Birhane. 2022. Automating ambiguity: Challenges and pitfalls of artificial intelligence. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2206.04179 (2022)."},{"key":"e_1_3_3_3_15_2","unstructured":"Rishi Bommasani Kathleen\u00a0A Creel Ananya Kumar Dan Jurafsky and Percy\u00a0S Liang. 2022. Picking on the same person: Does algorithmic monoculture lead to outcome homogenization? Advances in Neural Information Processing Systems 35 (2022) 3663\u20133678."},{"key":"e_1_3_3_3_16_2","unstructured":"Guiming\u00a0Hardy Chen Shunian Chen Ziche Liu Feng Jiang and Benyou Wang. 2024. Humans or llms as the judge? a study on judgement biases. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.10669 (2024)."},{"key":"e_1_3_3_3_17_2","unstructured":"Shiqi Chen Siyang Gao and Junxian He. 2023. Evaluating factual consistency of summaries with large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.14069 (2023)."},{"key":"e_1_3_3_3_18_2","doi-asserted-by":"crossref","unstructured":"Pedro Concei\u00e7\u00e3o and Pedro Ferreira. 2000. The young person\u2019s guide to the Theil index: Suggesting intuitive interpretations and exploring analytical applications. UTIP working paper.","DOI":"10.2139\/ssrn.228703"},{"key":"e_1_3_3_3_19_2","volume-title":"Economics of strategy","author":"Dranove David","year":"2017","unstructured":"David Dranove, David Besanko, Mark Shanley, and Scott Schaefer. 2017. Economics of strategy. John Wiley & Sons."},{"key":"e_1_3_3_3_20_2","unstructured":"Yucong Duan Fuliang Tang Kunguang Wu Zhendong Guo Shuaishuai Huang Yingtian Mei Yuxing Wang Zeyu Yang and Shiming Gong. 2023. Ranking of large language model (llm) regional bias. (2023)."},{"key":"e_1_3_3_3_21_2","unstructured":"Shiran Dudy Ibrahim\u00a0Said Ahmad Ryoko Kitajima and Agata Lapedriza. 2024. Analyzing Cultural Representations of Emotions in LLMs through Mixed Emotion Survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.02143 (2024)."},{"key":"e_1_3_3_3_22_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.eval4nlp-1.13"},{"key":"e_1_3_3_3_23_2","unstructured":"Marcos Fern\u00e1ndez-Pichela Juan\u00a0C Pichela and David\u00a0E Losadaa. 2024a. Search Engines Large Language Models or Both? Evaluating Information Seeking Strategies for Answering Health Questions. arXiv:https:\/\/arXiv.org\/abs\/2407.12468v2."},{"key":"e_1_3_3_3_24_2","doi-asserted-by":"crossref","unstructured":"Luciano Floridi and Josh Cowls. 2022. A unified framework of five principles for AI in society. Machine learning and the city: Applications in architecture and urban design (2022) 535\u2013545.","DOI":"10.1002\/9781119815075.ch45"},{"key":"e_1_3_3_3_25_2","unstructured":"Jinlan Fu See-Kiong Ng Zhengbao Jiang and Pengfei Liu. 2023. Gptscore: Evaluate as you desire. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.04166 (2023)."},{"key":"e_1_3_3_3_26_2","unstructured":"Isabel\u00a0O Gallegos Ryan\u00a0A Rossi Joe Barrow Md\u00a0Mehrab Tanjim Sungchul Kim Franck Dernoncourt Tong Yu Ruiyi Zhang and Nesreen\u00a0K Ahmed. 2024. Bias and fairness in large language models: A survey. Computational Linguistics (2024) 1\u201379."},{"key":"e_1_3_3_3_27_2","unstructured":"Mingqi Gao Jie Ruan Renliang Sun Xunjian Yin Shiping Yang and Xiaojun Wan. 2023. Human-like summarization evaluation with chatgpt. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2304.02554 (2023)."},{"key":"e_1_3_3_3_28_2","unstructured":"Simret\u00a0Araya Gebreegziabher Yukun Yang Elena\u00a0L Glassman and Toby Jia-Jun Li. 2024. Supporting Co-Adaptive Machine Teaching through Human Concept Learning and Cognitive Theories. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.16561 (2024)."},{"key":"e_1_3_3_3_29_2","volume-title":"Gemma: \u2019google\/gemma-7b-it\u2019","year":"2024","unstructured":"Google. 2024. Gemma: \u2019google\/gemma-7b-it\u2019. https:\/\/ai.google.dev\/gemma\/docs [Language Model]."},{"key":"e_1_3_3_3_30_2","first-page":"867","volume-title":"37th International Conference on Neural Information Processing Systems","author":"Hu Ziniu","year":"2023","unstructured":"Ziniu Hu, Ahmet Iscen, Chen Sun, Kai-Wei Chang, Yizhou Sun, David\u00a0A Ross, Cordelia Schmid, and Alireza Fathi. 2023. AVIS: autonomous visual information seeking with large language model agent. In 37th International Conference on Neural Information Processing Systems. 867\u2013878."},{"key":"e_1_3_3_3_31_2","unstructured":"Minyoung Huh Brian Cheung Tongzhou Wang and Phillip Isola. 2024. The platonic representation hypothesis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.07987 (2024)."},{"key":"e_1_3_3_3_32_2","first-page":"13","volume-title":"CHI conference on Human Factors in Computing Systems","author":"Johnson Isaac\u00a0L","year":"2016","unstructured":"Isaac\u00a0L Johnson, Yilun Lin, Toby Jia-Jun Li, Andrew Hall, Aaron Halfaker, Johannes Sch\u00f6ning, and Brent Hecht. 2016. Not at home on the range: Peer production and the urban\/rural divide. In CHI conference on Human Factors in Computing Systems. ACM, 13\u201325."},{"key":"e_1_3_3_3_33_2","doi-asserted-by":"crossref","unstructured":"Mahammed Kamruzzaman Hieu\u00a0Minh Nguyen and Gene\u00a0Louis Kim. 2024. \" Global is Good Local is Bad?\": Understanding Brand Bias in LLMs. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.13997 (2024).","DOI":"10.18653\/v1\/2024.emnlp-main.707"},{"key":"e_1_3_3_3_34_2","unstructured":"Shivani Kapania Ruiyi Wang Toby Jia-Jun Li Tianshi Li and Hong Shen. 2024. \" I\u2019m categorizing LLM as a productivity tool\": Examining ethics of LLM use in HCI research practices. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.19876 (2024)."},{"key":"e_1_3_3_3_35_2","unstructured":"Julia Kharchenko Tanya Roosta Aman Chadha and Chirag Shah. 2024. How well do llms represent values across cultures? empirical analysis of llm responses based on hofstede cultural dimensions. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.14805 (2024)."},{"key":"e_1_3_3_3_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/CONECCT62155.2024.10677324"},{"key":"e_1_3_3_3_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658941"},{"key":"e_1_3_3_3_38_2","unstructured":"Harsh Kumar Mohi Reza Jeb Mitchell Ilya Musabirov Lisa Zhang and Michael Liut. 2024. Understanding Help-Seeking Behavior of Students Using LLMs vs. Web Search for Writing SQL Queries. arXiv e-prints (2024) arXiv\u20132408."},{"key":"e_1_3_3_3_39_2","doi-asserted-by":"crossref","unstructured":"Philippe Laban Tobias Schnabel Paul\u00a0N Bennett and Marti\u00a0A Hearst. 2022. SummaC: Re-visiting NLI-based models for inconsistency detection in summarization. Transactions of the Association for Computational Linguistics 10 (2022) 163\u2013177.","DOI":"10.1162\/tacl_a_00453"},{"key":"e_1_3_3_3_40_2","unstructured":"Shuyue\u00a0Stella Li Vidhisha Balachandran Shangbin Feng Jonathan Ilgen Emma Pierson Pang\u00a0Wei Koh and Yulia Tsvetkov. 2024. MEDIQ: Question-Asking LLMs for Adaptive and Reliable Medical Reasoning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.00922 (2024)."},{"key":"e_1_3_3_3_41_2","first-page":"74","volume-title":"Text summarization branches out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. ACL, 74\u201381."},{"key":"e_1_3_3_3_42_2","unstructured":"Siyang Liu Trish Maturi Bowen Yi Siqi Shen and Rada Mihalcea. 2024. The Generation Gap: Exploring Age Bias in the Value Systems of Large Language Models. arxiv:https:\/\/arXiv.org\/abs\/2404.08760\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2404.08760"},{"key":"e_1_3_3_3_43_2","doi-asserted-by":"crossref","unstructured":"Yang Liu Dan Iter Yichong Xu Shuohang Wang Ruochen Xu and Chenguang Zhu. 2023. G-eval: Nlg evaluation using gpt-4 with better human alignment. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.16634 (2023).","DOI":"10.18653\/v1\/2023.emnlp-main.153"},{"key":"e_1_3_3_3_44_2","unstructured":"Valerio Lorini Javier Rando Diego Saez-Trumper and Carlos Castillo. 2020. Uneven coverage of natural disasters in Wikipedia: The case of flood. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2001.08810 (2020)."},{"key":"e_1_3_3_3_45_2","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v18i1.31367"},{"key":"e_1_3_3_3_46_2","volume-title":"LLaMA-3.1: \u2019meta-llama\/llama-3.1-405b-instruct\u2019","author":"AI Meta","year":"2024","unstructured":"Meta AI. 2024. LLaMA-3.1: \u2019meta-llama\/llama-3.1-405b-instruct\u2019. https:\/\/ai.meta.com\/llama [Language Model]."},{"key":"e_1_3_3_3_47_2","volume-title":"Introduction to data mining","author":"Mining What Is\u00a0Data","year":"2006","unstructured":"What Is\u00a0Data Mining. 2006. Introduction to data mining. Springer."},{"key":"e_1_3_3_3_48_2","volume-title":"Mistral: \u2019mistralai\/mistral-nemo\u2019","author":"AI Mistral","year":"2024","unstructured":"Mistral AI. 2024. Mistral: \u2019mistralai\/mistral-nemo\u2019. https:\/\/www.mistral.ai [Language Model]."},{"key":"e_1_3_3_3_49_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658967"},{"key":"e_1_3_3_3_50_2","unstructured":"Jann\u00a0Railey Montalan Jian\u00a0Gang Ngui Wei\u00a0Qi Leong Yosephine Susanto Hamsawardhini Rengarajan William\u00a0Chandra Tjhi and Alham\u00a0Fikri Aji. 2024. Kalahi: A handcrafted grassroots cultural LLM evaluation suite for Filipino. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.15380 (2024)."},{"key":"e_1_3_3_3_51_2","doi-asserted-by":"crossref","unstructured":"Allan\u00a0H Murphy. 1996. The Finley affair: A signal event in the history of forecast verification. Weather and forecasting 11 1 (1996) 3\u201320.","DOI":"10.1175\/1520-0434(1996)011<0003:TFAASE>2.0.CO;2"},{"key":"e_1_3_3_3_52_2","volume-title":"GPT-4o: \u2019openai\/gpt-3.5-turbo\u2019","year":"2023","unstructured":"OpenAI. 2023. GPT-4o: \u2019openai\/gpt-3.5-turbo\u2019. https:\/\/www.openai.com [Language Model]."},{"key":"e_1_3_3_3_53_2","volume-title":"GPT-4o: \u2019openai\/gpt-4o\u2019","year":"2023","unstructured":"OpenAI. 2023. GPT-4o: \u2019openai\/gpt-4o\u2019. https:\/\/www.openai.com [Language Model]."},{"key":"e_1_3_3_3_54_2","first-page":"311","volume-title":"40th annual meeting of the Association for Computational Linguistics","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In 40th annual meeting of the Association for Computational Linguistics. ACL, 311\u2013318."},{"key":"e_1_3_3_3_55_2","unstructured":"Elinor Poole-Dayan Deb Roy and Jad Kabbara. 2024. LLM Targeted Underperformance Disproportionately Impacts Vulnerable Users. arXiv e-prints (2024) arXiv\u20132406."},{"key":"e_1_3_3_3_56_2","unstructured":"Raphael Poulain Hamed Fayyaz and Rahmatollah Beheshti. 2024. Bias patterns in the application of LLMs for clinical decision support: A comprehensive study. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.15149 (2024)."},{"key":"e_1_3_3_3_57_2","volume-title":"The 7th international student conference on advanced science and technology","author":"Rahutomo Faisal","year":"2012","unstructured":"Faisal Rahutomo, Teruaki Kitasuka, Masayoshi Aritsugi, et\u00a0al. 2012. Semantic cosine similarity. In The 7th international student conference on advanced science and technology. University of Seoul, South Korea."},{"key":"e_1_3_3_3_58_2","doi-asserted-by":"crossref","unstructured":"Anand Rajaraman. 2011. Mining of massive datasets.","DOI":"10.1017\/CBO9781139058452"},{"key":"e_1_3_3_3_59_2","unstructured":"Rajesh Ranjan Shailja Gupta and Surya\u00a0Narayan Singh. 2024. A Comprehensive Survey of Bias in LLMs: Current Landscape and Future Directions. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.16430 (2024)."},{"key":"e_1_3_3_3_60_2","doi-asserted-by":"crossref","unstructured":"Vikas Raunak Arul Menezes and Marcin Junczys-Dowmunt. 2021. The curious case of hallucinations in neural machine translation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2104.06683 (2021).","DOI":"10.18653\/v1\/2021.naacl-main.92"},{"key":"e_1_3_3_3_61_2","doi-asserted-by":"publisher","DOI":"10.1145\/3617694.3623257"},{"key":"e_1_3_3_3_62_2","doi-asserted-by":"publisher","DOI":"10.1145\/3617694.3623257"},{"key":"e_1_3_3_3_63_2","doi-asserted-by":"crossref","unstructured":"Shreya Shankar JD Zamfirescu-Pereira Bj\u00f6rn Hartmann Aditya\u00a0G Parameswaran and Ian Arawjo. 2024. Who Validates the Validators? Aligning LLM-Assisted Evaluation of LLM Outputs with Human Preferences. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.12272 (2024).","DOI":"10.1145\/3654777.3676450"},{"key":"e_1_3_3_3_64_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642459"},{"key":"e_1_3_3_3_65_2","unstructured":"Annalisa Szymanski Simret\u00a0Araya Gebreegziabher Oghenemaro Anuyah Ronald\u00a0A Metoyer and Toby Jia-Jun Li. 2024. Comparing Criteria Development Across Domain Experts Lay Users and Models in Large Language Model Evaluation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.02054 (2024)."},{"key":"e_1_3_3_3_66_2","unstructured":"Annalisa Szymanski Noah Ziems Heather\u00a0A Eicher-Miller Toby Jia-Jun Li Meng Jiang and Ronald\u00a0A Metoyer. 2024. Limitations of the LLM-as-a-Judge Approach for Evaluating LLM Outputs in Expert Knowledge Tasks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.20266 (2024)."},{"key":"e_1_3_3_3_67_2","doi-asserted-by":"publisher","DOI":"10.1093\/oso\/9780197759066.001.0001"},{"key":"e_1_3_3_3_68_2","first-page":"1","volume-title":"CHI Conference on Human Factors in Computing Systems","author":"Wang Xinru","year":"2024","unstructured":"Xinru Wang, Hannah Kim, Sajjadur Rahman, Kushan Mitra, and Zhengjie Miao. 2024. Human-LLM collaborative annotation through effective verification of LLM labels. In CHI Conference on Human Factors in Computing Systems. ACM, 1\u201321."},{"key":"e_1_3_3_3_69_2","unstructured":"Yuanchun Wang Jifan Yu Zijun Yao Jing Zhang Yuyang Xie Shangqing Tu Yiyang Fu Youhe Feng Jinkai Zhang Jingyao Zhang et\u00a0al. 2024. A Solution-based LLM API-using Methodology for Academic Information Seeking. arXiv e-prints (2024) arXiv\u20132405."},{"key":"e_1_3_3_3_70_2","unstructured":"Youfu Yan Yu Hou Yongkang Xiao Rui Zhang and Qianwen Wang. 2024. KNOWNET: Guided Health Information Seeking from LLMs via Knowledge Graph Integration. IEEE Transactions on Visualization and Computer Graphics (2024)."},{"key":"e_1_3_3_3_71_2","unstructured":"Yuheng Zha Yichi Yang Ruichen Li and Zhiting Hu. 2023. AlignScore: Evaluating factual consistency with a unified alignment function. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.16739 (2023)."},{"key":"e_1_3_3_3_72_2","unstructured":"Tianyi Zhang Varsha Kishore Felix Wu Kilian\u00a0Q Weinberger and Yoav Artzi. 2019. Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1904.09675 (2019)."},{"key":"e_1_3_3_3_73_2","unstructured":"Yuhang Zhou Yuchen Ni Xiang Liu Jian Zhang Sen Liu Guangnan Ye and Hongfeng Chai. 2024. Are Large Language Models Rational Investors? arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.12713 (2024)."}],"event":{"name":"IUI '25: 30th International Conference on Intelligent User Interfaces","location":"Cagliari Italy","acronym":"IUI '25","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 30th International Conference on Intelligent User Interfaces"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3708359.3712111","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3708359.3712111","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3708359.3712111","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:57:06Z","timestamp":1750298226000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3708359.3712111"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,24]]},"references-count":72,"alternative-id":["10.1145\/3708359.3712111","10.1145\/3708359"],"URL":"https:\/\/doi.org\/10.1145\/3708359.3712111","relation":{},"subject":[],"published":{"date-parts":[[2025,3,24]]},"assertion":[{"value":"2025-03-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}