{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T20:31:18Z","timestamp":1776112278322,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T00:00:00Z","timestamp":1745539200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,26]]},"DOI":"10.1145\/3706599.3720285","type":"proceedings-article","created":{"date-parts":[[2025,4,23]],"date-time":"2025-04-23T20:15:12Z","timestamp":1745439312000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Data Paradigms in the Era of LLMs: On the Opportunities and Challenges of Qualitative Data in the WILD"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-5660-3241","authenticated-orcid":false,"given":"Shengqi","family":"Zhu","sequence":"first","affiliation":[{"name":"Information Science, Cornell University, Ithaca, New York, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4317-9501","authenticated-orcid":false,"given":"Jeffrey M.","family":"Rzeszotarski","sequence":"additional","affiliation":[{"name":"Information Science, Cornell University, Ithaca, New York, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7510-9404","authenticated-orcid":false,"given":"David","family":"Mimno","sequence":"additional","affiliation":[{"name":"Information Science, Cornell University, Ithaca, New York, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,4,25]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"Ryoko AI. 2023. ShareGPT52K. https:\/\/huggingface.co\/datasets\/RyokoAI\/ShareGPT52K. https:\/\/huggingface.co\/datasets\/RyokoAI\/ShareGPT52K"},{"key":"e_1_3_3_2_3_2","unstructured":"Nicholas\u00a0O Andrews and Edward\u00a0A Fox. 2007. Recent developments in document clustering. (2007)."},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"crossref","unstructured":"Jordan Boyd-Graber Yuening Hu David Mimno et\u00a0al. 2017. Applications of topic models. Foundations and Trends\u00ae in Information Retrieval 11 2-3 (2017) 143\u2013296.","DOI":"10.1561\/1500000030"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1547"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"crossref","unstructured":"James\u00a0W Chesebro and Deborah\u00a0J Borisoff. 2007. What makes qualitative research qualitative? Qualitative research reports in communication 8 1 (2007) 3\u201314.","DOI":"10.1080\/17459430701617846"},{"key":"e_1_3_3_2_7_2","unstructured":"Wei-Lin Chiang Zhuohan Li Zi Lin Ying Sheng Zhanghao Wu Hao Zhang Lianmin Zheng Siyuan Zhuang Yonghao Zhuang Joseph\u00a0E. Gonzalez Ion Stoica and Eric\u00a0P. Xing. 2023. Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90%* ChatGPT Quality. https:\/\/lmsys.org\/blog\/2023-03-30-vicuna\/"},{"key":"e_1_3_3_2_8_2","unstructured":"Peter Craig Cyrus Cooper David Gunnell Sally Haw Kenny Lawson Sally Macintrye David Ogilvie Mark Petticrew Barney Reeves Matt Sutton et\u00a0al. 2011. Using natural experiments to evaluate population health interventions: guidance for producers and users of evidence. Medical Research Council (2011)."},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.669"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"crossref","unstructured":"Angus Dawson and Julius Sim. 2015. The nature and ethics of natural experiments. Journal of Medical Ethics 41 10 (2015) 848\u2013853.","DOI":"10.1136\/medethics-2014-102254"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642782"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781139084444"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"crossref","unstructured":"Thomas\u00a0C Erren Philip Lewis and David\u00a0M Shaw. 2020. The COVID-19 pandemic: ethical and scientific imperatives for \u201cnatural\u201d experiments. Circulation 142 4 (2020) 309\u2013311.","DOI":"10.1161\/CIRCULATIONAHA.120.048671"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"crossref","unstructured":"Leslie\u00a0P Francis. 2021. Understanding the ethics of natural experiments in a pandemic. American Journal of Public Health 111 2 (2021) 212\u2013214.","DOI":"10.2105\/AJPH.2020.306069"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642002"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642660"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3580688"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"crossref","unstructured":"AKM\u00a0Bahalul Haque AKM\u00a0Najmul Islam and Patrick Mikalef. 2023. Explainable Artificial Intelligence (XAI) from a user perspective: A synthesis of prior literature and problematizing avenues for future research. Technological Forecasting and Social Change 186 (2023) 122120.","DOI":"10.1016\/j.techfore.2022.122120"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642834"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"crossref","unstructured":"Clare Herrick and Kirsten Bell. 2024. The social life of natural experiments in epidemiology and public health. Sociology of Health & Illness 46 2 (2024) 276\u2013294.","DOI":"10.1111\/1467-9566.13703"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.516"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"Felicity\u00a0Agwu Kalu and Jack\u00a0C Bwalya. 2017. What makes qualitative research good research? An exploratory analysis of critical elements. International Journal of Social Science Research 5 2 (2017) 43\u201356.","DOI":"10.5296\/ijssr.v5i2.10711"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"crossref","unstructured":"Enkelejda Kasneci Kathrin Se\u00dfler Stefan K\u00fcchemann Maria Bannert Daryna Dementieva Frank Fischer Urs Gasser Georg Groh Stephan G\u00fcnnemann Eyke H\u00fcllermeier et\u00a0al. 2023. ChatGPT for good? On opportunities and challenges of large language models for education. Learning and individual differences 103 (2023) 102274.","DOI":"10.1016\/j.lindif.2023.102274"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3610977.3634966"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"crossref","unstructured":"Scott\u00a0T Leatherdale. 2019. Natural experiment methodology for research: a review of how different methods can support real-world research. International Journal of Social Research Methodology 22 1 (2019) 19\u201335.","DOI":"10.1080\/13645579.2018.1488449"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3580817"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"crossref","unstructured":"Pengfei Liu Weizhe Yuan Jinlan Fu Zhengbao Jiang Hiroaki Hayashi and Graham Neubig. 2023. Pre-train prompt and predict: A systematic survey of prompting methods in natural language processing. Comput. Surveys 55 9 (2023) 1\u201335.","DOI":"10.1145\/3560815"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"crossref","unstructured":"Nicholas Mays and Catherine Pope. 2000. Assessing quality in qualitative research. Bmj 320 7226 (2000) 50\u201352.","DOI":"10.1136\/bmj.320.7226.50"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"crossref","unstructured":"Jim McCambridge John Witton and Diana\u00a0R Elbourne. 2014. Systematic review of the Hawthorne effect: new concepts are needed to study research participation effects. Journal of clinical epidemiology 67 3 (2014) 267\u2013277.","DOI":"10.1016\/j.jclinepi.2013.08.015"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"crossref","unstructured":"Signe Mezinska P\u00e9ter Kakuk Goran Mijaljica Marcin Walig\u00f3ra and D\u00f3nal\u00a0P O\u2019Math\u00fana. 2016. Research in disaster settings: a systematic qualitative review of ethical guidelines. BMC medical ethics 17 (2016) 1\u201311.","DOI":"10.1186\/s12910-016-0148-7"},{"key":"e_1_3_3_2_31_2","volume-title":"First Conference on Language Modeling","author":"Mireshghallah Niloofar","year":"2024","unstructured":"Niloofar Mireshghallah, Maria Antoniak, Yash More, Yejin Choi, and Golnoosh Farnadi. 2024. Trust No Bot: Discovering Personal Disclosures in Human-LLM Conversations in the Wild. In First Conference on Language Modeling."},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.244"},{"key":"e_1_3_3_2_33_2","unstructured":"OpenAI. 2022. Introducing ChatGPT. https:\/\/openai.com\/index\/chatgpt\/"},{"key":"e_1_3_3_2_34_2","unstructured":"Long Ouyang Jeffrey Wu Xu Jiang Diogo Almeida Carroll Wainwright Pamela Mishkin Chong Zhang Sandhini Agarwal Katarina Slama Alex Ray et\u00a0al. 2022. Training language models to follow instructions with human feedback. Advances in neural information processing systems 35 (2022) 27730\u201327744."},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1145\/3640543.3645194"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/3654777.3676401"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642785"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"crossref","unstructured":"Mark Petticrew Steven Cummins Catherine Ferrell Anne Findlay Cassie Higgins Caroline Hoy Adrian Kearns and Leigh Sparks. 2005. Natural experiments: an underused tool for public health? Public health 119 9 (2005) 751\u2013757.","DOI":"10.1016\/j.puhe.2004.11.008"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"crossref","unstructured":"Mark\u00a0O Riedl. 2019. Human-centered artificial intelligence and machine learning. Human behavior and emerging technologies 1 1 (2019) 33\u201336.","DOI":"10.1002\/hbe2.117"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"crossref","unstructured":"Mark\u00a0R Rosenzweig and Kenneth\u00a0I Wolpin. 2000. Natural \u201cnatural experiments\u201d in economics. Journal of Economic Literature 38 4 (2000) 827\u2013874.","DOI":"10.1257\/jel.38.4.827"},{"key":"e_1_3_3_2_41_2","volume-title":"International Conference on Learning Representations","author":"Sanh Victor","year":"2022","unstructured":"Victor Sanh, Albert Webson, Colin Raffel, Stephen Bach, Lintang Sutawika, Zaid Alyafeai, Antoine Chaffin, Arnaud Stiegler, Arun Raja, Manan Dey, et\u00a0al. 2022. Multitask Prompted Training Enables Zero-Shot Task Generalization. In International Conference on Learning Representations."},{"key":"e_1_3_3_2_42_2","unstructured":"Hope Schroeder Marianne Aubin\u00a0Le Qu\u00e9r\u00e9 Casey Randazzo David Mimno and Sarita Schoenebeck. 2024. Large language models in qualitative research: Can we do the data justice? arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.07362 (2024)."},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642414"},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"publisher","DOI":"10.1145\/3654777.3676450"},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642266"},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"crossref","unstructured":"Chandan\u00a0Kumar Tiwari Mohd\u00a0Abass Bhat Shagufta\u00a0Tariq Khan Rajaswaminathan Subramaniam and Mohammad Atif\u00a0Irshad Khan. 2024. What drives students toward ChatGPT? An investigation of the factors influencing adoption and usage of ChatGPT. Interactive Technology and Smart Education 21 3 (2024) 333\u2013355.","DOI":"10.1108\/ITSE-04-2023-0061"},{"key":"e_1_3_3_2_47_2","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar et\u00a0al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.13971 (2023)."},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.340"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3517582"},{"key":"e_1_3_3_2_50_2","unstructured":"Tongshuang Wu Haiyi Zhu Maya Albayrak Alexis Axon Amanda Bertsch Wenxing Deng Ziqi Ding Bill Guo Sireesh Gururaja Tzu-Sheng Kuo et\u00a0al. 2023. Llms as workers in human-computational algorithms? replicating crowdsourcing pipelines with llms. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.10168 (2023)."},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581754.3584136"},{"key":"e_1_3_3_2_52_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613905.3650732"},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaaiss.v3i1.31183"},{"key":"e_1_3_3_2_54_2","doi-asserted-by":"publisher","DOI":"10.1145\/1557019.1557121"},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3580900"},{"key":"e_1_3_3_2_56_2","unstructured":"He Zhang Chuhao Wu Jingyi Xie ChanMin Kim and John\u00a0M Carroll. 2023. QualiGPT: GPT as an easy-to-use tool for qualitative coding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.07061 (2023)."},{"key":"e_1_3_3_2_57_2","unstructured":"Shengyu Zhang Linfeng Dong Xiaoya Li Sen Zhang Xiaofei Sun Shuhe Wang Jiwei Li Runyi Hu Tianwei Zhang Fei Wu et\u00a0al. 2023. Instruction tuning for large language models: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.10792 (2023)."},{"key":"e_1_3_3_2_58_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Zhao Wenting","year":"2024","unstructured":"Wenting Zhao, Xiang Ren, Jack Hessel, Claire Cardie, Yejin Choi, and Yuntian Deng. 2024. WildChat: 1M ChatGPT Interaction Logs in the Wild. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_3_2_59_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Zheng Lianmin","year":"2024","unstructured":"Lianmin Zheng, Wei-Lin Chiang, Ying Sheng, Tianle Li, Siyuan Zhuang, Zhanghao Wu, Yonghao Zhuang, Zhuohan Li, Zi Lin, Eric Xing, et\u00a0al. 2024. LMSYS-Chat-1M: A Large-Scale Real-World LLM Conversation Dataset. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_3_2_60_2","unstructured":"Daniel\u00a0M Ziegler Nisan Stiennon Jeffrey Wu Tom\u00a0B Brown Alec Radford Dario Amodei Paul Christiano and Geoffrey Irving. 2019. Fine-tuning language models from human preferences. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1909.08593 (2019)."},{"key":"e_1_3_3_2_61_2","doi-asserted-by":"crossref","unstructured":"Caleb Ziems William Held Omar Shaikh Jiaao Chen Zhehao Zhang and Diyi Yang. 2024. Can large language models transform computational social science? Computational Linguistics 50 1 (2024) 237\u2013291.","DOI":"10.1162\/coli_a_00502"}],"event":{"name":"CHI EA '25: Extended Abstracts of the CHI Conference on Human Factors in Computing Systems","location":"Yokohama Japan","acronym":"CHI EA '25","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the Extended Abstracts of the CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706599.3720285","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3706599.3720285","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:51Z","timestamp":1750295931000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706599.3720285"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,25]]},"references-count":60,"alternative-id":["10.1145\/3706599.3720285","10.1145\/3706599"],"URL":"https:\/\/doi.org\/10.1145\/3706599.3720285","relation":{},"subject":[],"published":{"date-parts":[[2025,4,25]]},"assertion":[{"value":"2025-04-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}