{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T19:46:06Z","timestamp":1776109566863,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":19,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,20]],"date-time":"2024-04-20T00:00:00Z","timestamp":1713571200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,20]]},"DOI":"10.1145\/3643795.3648381","type":"proceedings-article","created":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T13:46:19Z","timestamp":1725975979000},"page":"127-134","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Semantically Aligned Question and Code Generation for Automated Insight Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-7682-611X","authenticated-orcid":false,"given":"Ananya","family":"Singha","sequence":"first","affiliation":[{"name":"Microsoft, Bangalore, Karnataka, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8638-3863","authenticated-orcid":false,"given":"Bhavya","family":"Chopra","sequence":"additional","affiliation":[{"name":"Microsoft, Bangalore, Karnataka, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-7773-4405","authenticated-orcid":false,"given":"Anirudh","family":"Khatry","sequence":"additional","affiliation":[{"name":"Microsoft, Bangalore, Karnataka, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9226-9634","authenticated-orcid":false,"given":"Sumit","family":"Gulwani","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, Washington, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1069-2795","authenticated-orcid":false,"given":"Austin","family":"Henley","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, Washington, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3727-3291","authenticated-orcid":false,"given":"Vu","family":"Le","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, Washington, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6182-815X","authenticated-orcid":false,"given":"Chris","family":"Parnin","sequence":"additional","affiliation":[{"name":"Microsoft, Readmond, Washington, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9510-4512","authenticated-orcid":false,"given":"Mukul","family":"Singh","sequence":"additional","affiliation":[{"name":"Microsoft, Delhi, Delhi, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9182-597X","authenticated-orcid":false,"given":"Gust","family":"Verbruggen","sequence":"additional","affiliation":[{"name":"Microsoft, Keerbergen, Belgium"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,9,10]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan. Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877--1901."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3338906.3340458"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/564691.564719"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.3758\/BF03195563"},{"key":"e_1_3_2_1_5_1","volume-title":"Data science workflow: Overview and challenges. Commun. ACM","author":"Guo Philip","year":"2013","unstructured":"Philip Guo. 2013. Data science workflow: Overview and challenges. Commun. ACM (2013)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3183713.3196889"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3510003.3510203"},{"key":"e_1_3_2_1_8_1","volume-title":"Open-WikiTable: Dataset for Open Domain Question Answering with Complex Reasoning over Table. arXiv preprint arXiv:2305.07288","author":"Kweon Sunjun","year":"2023","unstructured":"Sunjun Kweon, Yeonsu Kwon, Seonhee Cho, Yohan Jo, and Edward Choi. 2023. Open-WikiTable: Dataset for Open Domain Question Answering with Complex Reasoning over Table. arXiv preprint arXiv:2305.07288 (2023)."},{"key":"e_1_3_2_1_9_1","first-page":"3","article-title":"Lux: Always-on Visualization Recommendations for Exploratory Dataframe Workflows","volume":"15","author":"Jung-Lin Lee Doris","year":"2021","unstructured":"Doris Jung-Lin Lee, Dixin Tang, Kunal Agarwal, Thyne Boonmark, Caitlyn Chen. Jake Kang, Ujjaini Mukhopadhyay, Jerry Song, Micah Yong, Marti A. Hearst, and Aditya G. Parameswaran. 2021. Lux: Always-on Visualization Recommendations for Exploratory Dataframe Workflows. Proceedings of the VLDB Endowmen 15, 3 (nov 2021), 727--738.","journal-title":"Proceedings of the VLDB Endowmen"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3457267"},{"key":"e_1_3_2_1_11_1","volume-title":"Demonstration of InsightPilot: An LLM-Empowered Automated Data Exploration System. arXiv preprint arXiv:2304.00477","author":"Ma Pingchuan","year":"2023","unstructured":"Pingchuan Ma, Rui Ding, Shuai Wang, Shi Han, and Dongmei Zhang. 2023. Demonstration of InsightPilot: An LLM-Empowered Automated Data Exploration System. arXiv preprint arXiv:2304.00477 (2023)."},{"key":"e_1_3_2_1_12_1","volume-title":"Jerry Tworek, Qiming Yuan, Nikolas Tezak, Jong Wook Kim, Chris Hallacy, et al.","author":"Neelakantan Arvind","year":"2022","unstructured":"Arvind Neelakantan, Tao Xu, Raul Puri, Alec Radford, Jesse Michael Han, Jerry Tworek, Qiming Yuan, Nikolas Tezak, Jong Wook Kim, Chris Hallacy, et al. 2022. Text and code embeddings by contrastive pre-training. arXiv preprint arXiv:2201.10005 (2022)."},{"key":"e_1_3_2_1_13_1","volume-title":"GPT-4 technical report. arXiv","author":"AI.","year":"2023","unstructured":"OpenAI. 2023. GPT-4 technical report. arXiv (2023), 2303--08774."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.277"},{"key":"e_1_3_2_1_15_1","volume-title":"CORNET: A neurosymbolic approach to learning conditional table formatting rules by example. arXiv preprint arXiv:2208.06032","author":"Singh Mukul","year":"2022","unstructured":"Mukul Singh, Jos\u00e9 Cambronero, Sumit Gulwani, Vu Le, Carina Negreanu, Mohammad Raza, and Gust Verbruggen. 2022. CORNET: A neurosymbolic approach to learning conditional table formatting rules by example. arXiv preprint arXiv:2208.06032 (2022)."},{"key":"e_1_3_2_1_16_1","volume-title":"Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. arXiv preprint arXiv:2109.00859","author":"Wang Yue","year":"2021","unstructured":"Yue Wang, Weishi Wang, Shafiq Joty, and Steven CH Hoi. 2021. Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. arXiv preprint arXiv:2109.00859 (2021)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3389738"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-emnlp.91"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.411"}],"event":{"name":"LLM4Code '24: 1st International Workshop on Large Language Models for Code","location":"Lisbon Portugal","acronym":"LLM4Code '24","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","IEEE CS","Faculty of Engineering of University of Porto"]},"container-title":["Proceedings of the 1st International Workshop on Large Language Models for Code"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643795.3648381","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3643795.3648381","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:57:45Z","timestamp":1750294665000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643795.3648381"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,20]]},"references-count":19,"alternative-id":["10.1145\/3643795.3648381","10.1145\/3643795"],"URL":"https:\/\/doi.org\/10.1145\/3643795.3648381","relation":{},"subject":[],"published":{"date-parts":[[2024,4,20]]},"assertion":[{"value":"2024-09-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}