{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T07:00:24Z","timestamp":1780383624029,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,9]],"date-time":"2024-06-09T00:00:00Z","timestamp":1717891200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,9]]},"DOI":"10.1145\/3663741.3664785","type":"proceedings-article","created":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T10:19:03Z","timestamp":1717237143000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["Are Large Language Models the New Interface for Data Pipelines?"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4988-0702","authenticated-orcid":false,"given":"Sylvio","family":"Barbon Junior","sequence":"first","affiliation":[{"name":"Universita degli Studi di Trieste, Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4519-0173","authenticated-orcid":false,"given":"Paolo","family":"Ceravolo","sequence":"additional","affiliation":[{"name":"University of Milan, Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5196-1117","authenticated-orcid":false,"given":"Sven","family":"Groppe","sequence":"additional","affiliation":[{"name":"University of L\u00fcbeck, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4351-4207","authenticated-orcid":false,"given":"Mustafa","family":"Jarrar","sequence":"additional","affiliation":[{"name":"Birzeit University, Palestine"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8310-2050","authenticated-orcid":false,"given":"Samira","family":"Maghool","sequence":"additional","affiliation":[{"name":"University of Milan, Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9273-302X","authenticated-orcid":false,"given":"Florence","family":"S\u00e8des","sequence":"additional","affiliation":[{"name":"University Toulouse 3 Paul Sabatier, France"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1554-7565","authenticated-orcid":false,"given":"Soror","family":"Sahri","sequence":"additional","affiliation":[{"name":"Universit\u00e9 Paris Cit\u00e9, France"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2436-1372","authenticated-orcid":false,"given":"Maurice","family":"Van Keulen","sequence":"additional","affiliation":[{"name":"University of Twente, The Netherlands"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,6,9]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"World Conf. on Explainable Artificial Intelligence. Springer, 420\u2013432","author":"Arrighi Leonardo","year":"2023","unstructured":"Leonardo Arrighi, Sylvio Barbon\u00a0Junior, Felice\u00a0Andrea Pellegrino, Michele Simonato, and Marco Zullich. 2023. Explainable Automated Anomaly Recognition in Failure Analysis: is Deep Learning Doing it Correctly?. In World Conf. on Explainable Artificial Intelligence. Springer, 420\u2013432."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1089\/big.2021.0326"},{"key":"e_1_3_2_1_3_1","volume-title":"Int. Joint Conf. on Artificial Intelligence. Springer, 2\u201310","author":"Bellomarini Luigi","year":"2017","unstructured":"Luigi Bellomarini, Georg Gottlob, Andreas Pieris, and Emanuel Sallinger. 2017. Swift logic for big data and knowledge graphs. In Int. Joint Conf. on Artificial Intelligence. Springer, 2\u201310."},{"key":"e_1_3_2_1_4_1","volume-title":"A neural probabilistic language model. Advances in neural information processing systems 13","author":"Bengio Yoshua","year":"2000","unstructured":"Yoshua Bengio, R\u00e9jean Ducharme, and Pascal Vincent. 2000. A neural probabilistic language model. Advances in neural information processing systems 13 (2000), 932\u2013938."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/s13740-018-0086-2"},{"key":"e_1_3_2_1_6_1","volume-title":"A survey on evaluation of large language models. ACM Transactions on Intelligent Systems and Technology 1","author":"Chang Yupeng","year":"2023","unstructured":"Yupeng Chang, Xu Wang, Jindong Wang, Yuan Wu, Linyi Yang, Kaijie Zhu, Hao Chen, Xiaoyuan Yi, Cunxiang Wang, Yidong Wang, 2023. A survey on evaluation of large language models. ACM Transactions on Intelligent Systems and Technology 1 (2023)."},{"key":"e_1_3_2_1_7_1","volume-title":"A review: Knowledge reasoning over knowledge graph. Expert systems with applications 141","author":"Chen Xiaojun","year":"2020","unstructured":"Xiaojun Chen, Shengbin Jia, and Yang Xiang. 2020. A review: Knowledge reasoning over knowledge graph. Expert systems with applications 141 (2020), 112948."},{"key":"e_1_3_2_1_8_1","unstructured":"Zichen Chen Ambuj\u00a0K Singh and Misha Sra. 2023. LMExplainer: a Knowledge-Enhanced Explainer for Language Models. arXiv:2303.16537."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.joule.2023.09.004"},{"key":"e_1_3_2_1_10_1","volume-title":"Improving Knowledge Graph Embedding Using Simple Constraints. In Annual Meeting of the Association for Computational Linguistics. Assoc. for Computational Linguistics, 110\u2013121","author":"Ding Boyang","year":"2018","unstructured":"Boyang Ding, Quan Wang, Bin Wang, and Li Guo. 2018. Improving Knowledge Graph Embedding Using Simple Constraints. In Annual Meeting of the Association for Computational Linguistics. Assoc. for Computational Linguistics, 110\u2013121."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.14778\/3611479.3611527"},{"key":"e_1_3_2_1_12_1","volume-title":"Companion Proc. of the Web Conf.ACM, 303\u2013304","author":"Gade Krishna","year":"2020","unstructured":"Krishna Gade, Sahin Geyik, Krishnaram Kenthapadi, Varun Mithal, and Ankur Taly. 2020. Explainable AI in industry: Practical challenges and lessons learned. In Companion Proc. of the Web Conf.ACM, 303\u2013304."},{"key":"e_1_3_2_1_13_1","unstructured":"Varun Godbole George\u00a0E Dahl Justin Gilmer Christopher\u00a0J Shallue and Zachary Nado. 2023. Deep Learning Tuning Playbook. https:\/\/github.com\/google-research\/tuning_playbook"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106622"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447772"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Frank Hutter Lars Kotthoff and Joaquin Vanschoren (Eds.). 2019. Automated Machine Learning - Methods Systems Challenges. Springer.","DOI":"10.1007\/978-3-030-05318-5"},{"key":"e_1_3_2_1_17_1","unstructured":"LangChain Inc.visited on 5.3.2023. LangChain. https:\/\/github.com\/langchain-ai\/langchain."},{"key":"e_1_3_2_1_18_1","unstructured":"LangChain Inc.visited on 5.3.2023. LangGraph. https:\/\/github.com\/langchain-ai\/langgraph."},{"key":"e_1_3_2_1_19_1","unstructured":"LangChain Inc.visited on 5.3.2023. Ontotext GraphDB QA Chain. https:\/\/python.langchain.com\/docs\/use_cases\/graph\/graph_ontotext_graphdb_qa."},{"key":"e_1_3_2_1_20_1","volume-title":"Can llm already serve as a database interface? a big bench for large-scale database grounded text-to-sqls. Advances in Neural Information Processing Systems 36","author":"Li Jinyang","year":"2024","unstructured":"Jinyang Li, Binyuan Hui, Ge Qu, Jiaxi Yang, Binhua Li, Bowen Li, Bailin Wang, Bowen Qin, Ruiying Geng, Nan Huo, 2024. Can llm already serve as a database interface? a big bench for large-scale database grounded text-to-sqls. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_21_1","unstructured":"Ming Li Yong Zhang Zhitao Li Jiuhai Chen Lichang Chen Ning Cheng Jianzong Wang Tianyi Zhou and Jing Xiao. 2024. From Quantity to Quality: Boosting LLM Performance with Self-Guided Data Selection for Instruction Tuning. arxiv:2308.12032."},{"key":"e_1_3_2_1_22_1","volume-title":"Cooperative Information Systems","author":"Maghool Samira","unstructured":"Samira Maghool, Elena Casiraghi, and Paolo Ceravolo. 2024. Enhancing Fairness and Accuracy in Machine Learning Through Similarity Networks. In Cooperative Information Systems. Springer, 3\u201320."},{"key":"e_1_3_2_1_23_1","unstructured":"Philip Mavrepis Georgios Makridis Georgios Fatouros Vasileios Koukos Maria\u00a0Margarita Separdani and Dimosthenis Kyriazis. 2024. XAI for All: Can Large Language Models Simplify Explainable AI?arXiv:2401.13110."},{"key":"e_1_3_2_1_24_1","volume-title":"NeurIPS ICBINB Workshop.","author":"Mohta Jay","year":"2023","unstructured":"Jay Mohta, Kenan\u00a0Emir Ak, Yan Xu, and Mingwei Shen. 2023. Are large language models good annotators?. In NeurIPS ICBINB Workshop."},{"key":"e_1_3_2_1_25_1","volume-title":"GPT in Data Science: A Practical Exploration of Model Selection. In IEEE Int. Conf. on Big Data. 4325\u20134334","author":"Nascimento Nathalia","year":"2023","unstructured":"Nathalia Nascimento, Cristina Tavares, Paulo Alencar, and Donald Cowan. 2023. GPT in Data Science: A Practical Exploration of Model Selection. In IEEE Int. Conf. on Big Data. 4325\u20134334."},{"key":"e_1_3_2_1_26_1","volume-title":"Int. Conf. on Computer Science and Software Engineering. ACM, 24\u201333","author":"Nathalia Nascimento","year":"2023","unstructured":"Nascimento Nathalia, Alencar Paulo, and Cowan Donald. 2023. Artificial intelligence vs. software engineers: An empirical study on performance and efficiency using chatGPT. In Int. Conf. on Computer Science and Software Engineering. ACM, 24\u201333."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583558"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/tkde.2024.3352100"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3434642"},{"key":"e_1_3_2_1_30_1","unstructured":"Alexander Tornede Difan Deng Theresa Eimer Joseph Giovanelli Aditya Mohan Tim Ruhkopf Sarah Segel Daphne Theodorakopoulos Tanja Tornede Henning Wachsmuth and Marius Lindauer. 2024. AutoML in the Age of Large Language Models: Current Challenges Future Opportunities and Risks. arXiv:2306.08107."},{"key":"e_1_3_2_1_31_1","volume-title":"Advances in Neural Information Processing Systems, Vol.\u00a036. Curran Associates","author":"Wang Wenhai","unstructured":"Wenhai Wang, Zhe Chen, Xiaokang Chen, Jiannan Wu, Xizhou Zhu, Gang Zeng, Ping Luo, Tong Lu, Jie Zhou, Yu Qiao, and Jifeng Dai. 2023. VisionLLM: Large Language Model is also an Open-Ended Decoder for Vision-Centric Tasks. In Advances in Neural Information Processing Systems, Vol.\u00a036. Curran Associates, Inc., 61501\u201361513."},{"key":"e_1_3_2_1_32_1","volume-title":"Denny Zhou","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc\u00a0V Le, Denny Zhou, 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems 35 (2022), 24824\u201324837."},{"key":"e_1_3_2_1_33_1","unstructured":"Qingyun Wu Gagan Bansal Jieyu Zhang Yiran Wu Beibin Li Erkang Zhu Li Jiang Xiaoyun Zhang Shaokun Zhang Jiale Liu Ahmed\u00a0Hassan Awadallah Ryen\u00a0W White Doug Burger and Chi Wang. 2023. AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation."},{"key":"e_1_3_2_1_34_1","unstructured":"Liang Yao Chengsheng Mao and Yuan Luo. 2019. KG-BERT: BERT for knowledge graph completion. arXiv:1909.03193."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Rui Zhang Yixin Su Bayu\u00a0Distiawan Trisedya Xiaoyan Zhao Min Yang Hong Cheng and Jianzhong Qi. 2023. AutoAlign: Fully Automatic and Effective Knowledge Graph Alignment enabled by Large Language Models. arXiv:2307.11772.","DOI":"10.1109\/TKDE.2023.3325484"},{"key":"e_1_3_2_1_36_1","first-page":"1","article-title":"Automl for deep recommender systems: A survey","volume":"41","author":"Zheng Ruiqi","year":"2023","unstructured":"Ruiqi Zheng, Liang Qu, Bin Cui, Yuhui Shi, and Hongzhi Yin. 2023. Automl for deep recommender systems: A survey. ACM Transactions on Information Systems 41, 4 (2023), 1\u201338.","journal-title":"ACM Transactions on Information Systems"},{"key":"e_1_3_2_1_37_1","unstructured":"Xuanhe Zhou Xinyang Zhao and Guoliang Li. 2024. LLM-Enhanced Data Management. arXiv:2402.02643."}],"event":{"name":"SIGMOD\/PODS '24: International Conference on Management of Data","location":"Santiago AA Chile","acronym":"SIGMOD\/PODS '24","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["International Workshop on Big Data in Emergent Distributed Environments"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3663741.3664785","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:57:59Z","timestamp":1750294679000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3663741.3664785"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,9]]},"references-count":37,"alternative-id":["10.1145\/3663741.3664785","10.1145\/3663741"],"URL":"https:\/\/doi.org\/10.1145\/3663741.3664785","relation":{},"subject":[],"published":{"date-parts":[[2024,6,9]]},"assertion":[{"value":"2024-06-09","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}