{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T19:40:10Z","timestamp":1752003610036,"version":"3.41.2"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,22]]},"DOI":"10.1145\/3736733.3736748","type":"proceedings-article","created":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T19:03:51Z","timestamp":1752001431000},"page":"1-7","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Challenges in Using Conversational AI for Data Science"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8638-3863","authenticated-orcid":false,"given":"Bhavya","family":"Chopra","sequence":"first","affiliation":[{"name":"University of California, Berkeley, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7682-611X","authenticated-orcid":false,"given":"Ananya","family":"Singha","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5275-7844","authenticated-orcid":false,"given":"Anna","family":"Fariha","sequence":"additional","affiliation":[{"name":"University of Utah, Salt Lake City, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9226-9634","authenticated-orcid":false,"given":"Sumit","family":"Gulwani","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6182-815X","authenticated-orcid":false,"given":"Chris","family":"Parnin","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5153-2686","authenticated-orcid":false,"given":"Ashish","family":"Tiwari","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1069-2795","authenticated-orcid":false,"given":"Austin Z.","family":"Henley","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,7,8]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2023. Anaconda Assistant Launches to Bring Instant Data Analysis Code Generation and Insights to Users. https:\/\/www.anaconda.com\/blog\/anaconda-assistant-launches-to-bring-instant-data-analysis-code-generation-and-insights-to-users."},{"key":"e_1_3_2_1_2_1","unstructured":"2023. Introducing Databricks Assistant a context-aware AI assistant. https:\/\/www.databricks.com\/blog\/introducing-databricks-assistant."},{"key":"e_1_3_2_1_3_1","unstructured":"2024. DataChat. https:\/\/datachat.ai\/."},{"key":"e_1_3_2_1_4_1","unstructured":"2024. Overview of Predictive Transformation. https:\/\/help.alteryx.com\/aac\/de\/trifacta-classic\/concepts\/feature-overviews\/overview-of-predictive-transformation.html."},{"key":"e_1_3_2_1_5_1","unstructured":"Jacob Austin Augustus Odena Maxwell Nye Maarten Bosma Henryk Michalewski David Dohan Ellen Jiang Carrie Cai Michael Terry Quoc Le et al. 2021. Program synthesis with large language models. arXiv preprint arXiv:2108.07732 (2021)."},{"key":"e_1_3_2_1_6_1","volume-title":"Grounded Copilot: How Programmers Interact with Code-Generating Models. 7, OOPSLA1, Article 78 (April","author":"Barke Shraddha","year":"2023","unstructured":"Shraddha Barke, Michael B. James, and Nadia Polikarpova. 2023. Grounded Copilot: How Programmers Interact with Code-Generating Models. 7, OOPSLA1, Article 78 (April 2023), 27 pages."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3360594"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.2307\/1269768"},{"key":"e_1_3_2_1_9_1","volume-title":"2017 IEEE Symposium on Visual Languages and Human-Centric Computing (VL\/HCC). 25\u201329","author":"Kery Mary Beth","unstructured":"Mary Beth Kery and Brad A. Myers. 2017. Exploring exploratory programming. In 2017 IEEE Symposium on Visual Languages and Human-Centric Computing (VL\/HCC). 25\u201329."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376729"},{"key":"e_1_3_2_1_11_1","unstructured":"Mark Chen Jerry Tworek Heewoo Jun Qiming Yuan Henrique Ponde de Oliveira Pinto Jared Kaplan Harri Edwards Yuri Burda Nicholas Joseph Greg Brockman Alex Ray Raul Puri Gretchen Krueger Michael Petrov Heidy Khlaaf Girish Sastry Pamela Mishkin Brooke Chan Scott Gray Nick Ryder Mikhail Pavlov Alethea Power Lukasz Kaiser Mohammad Bavarian Clemens Winter Philippe Tillet Felipe Petroski Such Dave Cummings Matthias Plappert Fotios Chantzis Elizabeth Barnes Ariel Herbert-Voss William Hebgen Guss Alex Nichol Alex Paino Nikolas Tezak Jie Tang Igor Babuschkin Suchir Balaji Shantanu Jain William Saunders Christopher Hesse Andrew N. Carr Jan Leike Josh Achiam Vedant Misra Evan Morikawa Alec Radford Matthew Knight Miles Brundage Mira Murati Katie Mayer Peter Welinder Bob McGrew Dario Amodei Sam McCandlish Ilya Sutskever and Wojciech Zaremba. 2021. Evaluating Large Language Models Trained on Code. arXiv:2107.03374 [cs.LG]"},{"key":"e_1_3_2_1_12_1","volume-title":"CoWrangler: Recommender System for Data-Wrangling Scripts. In Companion of the 2023 International Conference on Management of Data (SIGMOD '23)","author":"Chopra Bhavya","year":"2023","unstructured":"Bhavya Chopra, Anna Fariha, Sumit Gulwani, Austin Z. Henley, Daniel Perelman, Mohammad Raza, Sherry Shi, Danny Simmons, and Ashish Tiwari. 2023. CoWrangler: Recommender System for Data-Wrangling Scripts. In Companion of the 2023 International Conference on Management of Data (SIGMOD '23). Association for Computing Machinery, New York, NY, USA, 147\u2013150."},{"key":"e_1_3_2_1_13_1","unstructured":"Noah Daniels. 2021. NY EMS Incident Dispatch Data. https:\/\/www.kaggle.com\/datasets\/new-york-city\/ny-ems-incident-dispatch-data."},{"key":"e_1_3_2_1_14_1","volume-title":"Investigating Code Generation Performance of ChatGPT with Crowdsourcing Social Data. In 2023 IEEE 47th Annual Computers, Software, and Applications Conference (COMPSAC). 876\u2013885","author":"Feng Yunhe","unstructured":"Yunhe Feng, Sreecharan Vanam, Manasa Cherukupally, Weijian Zheng, Meikang Qiu, and Haihua Chen. 2023. Investigating Code Generation Performance of ChatGPT with Crowdsourcing Social Data. In 2023 IEEE 47th Annual Computers, Software, and Applications Conference (COMPSAC). 876\u2013885."},{"key":"e_1_3_2_1_15_1","volume-title":"Bliki: Fluentinterface. https:\/\/www.martinfowler.com\/bliki\/FluentInterface.html","author":"Fowler Martin","year":"2005","unstructured":"Martin Fowler. 2005. Bliki: Fluentinterface. https:\/\/www.martinfowler.com\/bliki\/FluentInterface.html"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2568225.2568280"},{"key":"e_1_3_2_1_17_1","volume-title":"Studies in the Way of Words","author":"Grice Paul","unstructured":"Paul Grice. 1991. Studies in the Way of Words. Harvard University Press."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Ken Gu Madeleine Grunde-McLaughlin Andrew M. McNutt Jeffrey Heer and Tim Althoff. 2024. How Do Data Analysts Respond to AI Assistance? A Wizard-of-Oz Study. arXiv:2309.10108 [cs.HC]","DOI":"10.1145\/3613904.3641891"},{"key":"e_1_3_2_1_19_1","volume-title":"Drucker","author":"Gu Ken","year":"2024","unstructured":"Ken Gu, Ruoxi Shang, Tim Althoff, Chenglong Wang, and Steven M. Drucker. 2024. How Do Analysts Understand and Verify AI-Assisted Data Analyses? arXiv:2309.10947 [cs.HC]"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2047196.2047205"},{"key":"e_1_3_2_1_21_1","volume-title":"Measuring massive multitask language understanding. arXiv preprint arXiv:2009.03300","author":"Hendrycks Dan","year":"2020","unstructured":"Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt. 2020. Measuring massive multitask language understanding. arXiv preprint arXiv:2009.03300 (2020)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-SEET52601.2021.00026"},{"key":"e_1_3_2_1_23_1","unstructured":"Noah Hollmann Samuel M\u00fcller and Frank Hutter. 2023. LLMs for Semi-Automated Data Science: Introducing CAAFE for Context-Aware Automated Feature Engineering. arXiv:2305.03403 [cs.AI]"},{"key":"e_1_3_2_1_24_1","volume-title":"DataChat: An Intuitive and Collaborative Data Analytics Platform. In Companion of the 2023 International Conference on Management of Data (SIGMOD '23)","author":"Leo John Rogers Jeffrey","unstructured":"Rogers Jeffrey Leo John, Dylan Bacon, Junda Chen, Ushmal Ramesh, Jiatong Li, Deepan Das, Robert Claus, Amos Kendall, and Jignesh M. Patel. 2023. DataChat: An Intuitive and Collaborative Data Analytics Platform. In Companion of the 2023 International Conference on Management of Data (SIGMOD '23). Association for Computing Machinery, New York, NY, USA, 203\u2013215."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/1978942.1979444"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2012.219"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3654777.3676345"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2017.2754374"},{"key":"e_1_3_2_1_29_1","volume-title":"Proceedings of the 2019 CHI Conference on Human Factors in Computing Systems (CHI '19)","author":"Kross Sean","unstructured":"Sean Kross and Philip J. Guo. 2019. Practitioners Teaching Data Science in Industry and Academia: Expectations, Workflows, and Challenges. In Proceedings of the 2019 CHI Conference on Human Factors in Computing Systems (CHI '19). Association for Computing Machinery, New York, NY, USA, 1\u201314."},{"key":"e_1_3_2_1_30_1","unstructured":"Patrick Lewis Ethan Perez Aleksandra Piktus Fabio Petroni Vladimir Karpukhin Naman Goyal Heinrich K\u00fcttler Mike Lewis Wen-tau Yih Tim Rockt\u00e4schel et al. 2020. Retrieval-augmented generation for knowledge-intensive nlp tasks. Advances in neural information processing systems 33 (2020) 9459\u20139474."},{"key":"e_1_3_2_1_31_1","volume-title":"Yuyao Wang, and Lingming Zhang.","author":"Liu Jiawei","year":"2023","unstructured":"Jiawei Liu, Chunqiu Steven Xia, Yuyao Wang, and Lingming Zhang. 2023. Is Your Code Generated by ChatGPT Really Correct? Rigorous Evaluation of Large Language Models for Code Generation. arXiv:2305.01210 [cs.SE]"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300356"},{"key":"e_1_3_2_1_33_1","unstructured":"David Noever and Forrest McKee. 2023. Numeracy from Literacy: Data Science as an Emergent Skill from Large Language Models. arXiv:2301.13382 [cs.CL]"},{"key":"e_1_3_2_1_34_1","unstructured":"OpenAI. 2023. GPT-4 Technical Report. arXiv:2303.08774 [cs.CL]"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSR52588.2021.00072"},{"key":"e_1_3_2_1_36_1","unstructured":"Itamar Shatz. Accessed 2025. Grice's Maxims of Conversation: The Principles of Effective Communication. https:\/\/effectiviology.com\/principles-of-effective-communication\/."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3449126"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/2531602.2531659"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295349"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3389738"},{"key":"e_1_3_2_1_41_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Yao Shunyu","year":"2023","unstructured":"Shunyu Yao, Jeffrey Zhao, Dian Yu, Nan Du, Izhak Shafran, Karthik Narasimhan, and Yuan Cao. 2023. React: Synergizing reasoning and acting in language models. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_42_1","unstructured":"Pengcheng Yin Wen-Ding Li Kefan Xiao Abhishek Rao Yeming Wen Kensen Shi Joshua Howland Paige Bailey Michele Catasta Henryk Michalewski Alex Polozov and Charles Sutton. 2022. Natural Language to Code Generation in Interactive Data Science Notebooks. arXiv:2212.09248 [cs.CL]"}],"event":{"name":"HILDA '25: Workshop on Human-In-the-Loop Data Analytics","location":"Intercontinental Berlin Berlin Germany","acronym":"HILDA '25","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the Workshop on Human-In-the-Loop Data Analytics"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3736733.3736748","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T19:04:19Z","timestamp":1752001459000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3736733.3736748"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,22]]},"references-count":42,"alternative-id":["10.1145\/3736733.3736748","10.1145\/3736733"],"URL":"https:\/\/doi.org\/10.1145\/3736733.3736748","relation":{},"subject":[],"published":{"date-parts":[[2025,6,22]]},"assertion":[{"value":"2025-07-08","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}