{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,26]],"date-time":"2026-05-26T20:02:51Z","timestamp":1779825771617,"version":"3.53.1"},"publisher-location":"New York, NY, USA","reference-count":108,"publisher":"ACM","funder":[{"name":"NSF of China","award":["62402409, 62525202, 62232009, 62436010, 62441230"],"award-info":[{"award-number":["62402409, 62525202, 62232009, 62436010, 62441230"]}]},{"name":"National Key R&#x5c;&#x5c;&amp;D Program of China","award":["2023YFB4503600"],"award-info":[{"award-number":["2023YFB4503600"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,5,31]]},"DOI":"10.1145\/3788853.3801878","type":"proceedings-article","created":{"date-parts":[[2026,5,26]],"date-time":"2026-05-26T19:14:47Z","timestamp":1779822887000},"page":"571-579","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Data Agents: Levels, State of the Art, and Open Problems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9530-3327","authenticated-orcid":false,"given":"Yuyu","family":"Luo","sequence":"first","affiliation":[{"name":"HKUST (GZ), Guangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1398-0621","authenticated-orcid":false,"given":"Guoliang","family":"Li","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4729-9903","authenticated-orcid":false,"given":"Ju","family":"Fan","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2832-0295","authenticated-orcid":false,"given":"Nan","family":"Tang","sequence":"additional","affiliation":[{"name":"HKUST (GZ), Guangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,5,30]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n.d.]. Xata Agent."},{"key":"e_1_3_2_1_2_1","unstructured":"Agent Team at JDCHO. [n.d.]. JoyAgent-JDGenie."},{"key":"e_1_3_2_1_3_1","volume-title":"LEDD: large language model-empowered data discovery in data lakes. arXiv preprint arXiv:2502.15182","author":"An Qi","year":"2025","unstructured":"Qi An, Chihua Ying, Yuqing Zhu, Yihao Xu, Manwei Zhang, and Jianmin Wang. 2025. LEDD: large language model-empowered data discovery in data lakes. arXiv preprint arXiv:2502.15182 (2025)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Fabian Biester Mohamed Abdelaal and Daniel Del Gaudio. 2024. LLM-Clean: Context-Aware Tabular Data Cleaning via LLM-Generated OFDs. arXiv:2404.18681 [cs.DB]","DOI":"10.1007\/978-3-031-70421-5_7"},{"key":"e_1_3_2_1_5_1","unstructured":"ByteDance Volcengine. [n.d.]. Data Agent."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Lucas Cecchi and Petr Babkin. [n.d.]. ReportGPT: Human-in-the-loop Verifiable Table-to-Text Generation. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track Franck Dernoncourt Daniel Preotiuc-Pietro and Anastasia Shimorina (Eds.).","DOI":"10.18653\/v1\/2024.emnlp-industry.39"},{"key":"e_1_3_2_1_7_1","volume-title":"Demystifying Artificial Intelligence for Data Preparation. In Companion of the 2023 International Conference on Management of Data.","author":"Chai Chengliang","unstructured":"Chengliang Chai, Nan Tang, Ju Fan, and Yuyu Luo. [n.d.]. Demystifying Artificial Intelligence for Data Preparation. In Companion of the 2023 International Conference on Management of Data."},{"key":"e_1_3_2_1_8_1","volume-title":"Data Management for Machine Learning: A Survey","author":"Chai Chengliang","year":"2023","unstructured":"Chengliang Chai, Jiayi Wang, Yuyu Luo, Zeping Niu, and Guoliang Li. 2023. Data Management for Machine Learning: A Survey. IEEE Transactions on Knowledge and Data Engineering (2023)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3709663"},{"key":"e_1_3_2_1_10_1","volume-title":"SEED: Domain-specific data curation with large language models. arXiv preprint arXiv:2310.00749","author":"Chen Zui","year":"2023","unstructured":"Zui Chen, Lei Cao, Sam Madden, Tim Kraska, Zeyuan Shang, Ju Fan, Nan Tang, Zihui Gu, Chunwei Liu, and Michael Cafarella. 2023. SEED: Domain-specific data curation with large language models. arXiv preprint arXiv:2310.00749 (2023)."},{"key":"e_1_3_2_1_11_1","unstructured":"Zhoujun Cheng Tianbao Xie Peng Shi Chengzu Li Rahul Nadkarni Yushi Hu Caiming Xiong Dragomir Radev Mari Ostendorf Luke Zettlemoyer Noah A. Smith and Tao Yu. 2023. Binding Language Models in Symbolic Languages. arXiv:2210.02875 [cs]"},{"key":"e_1_3_2_1_12_1","unstructured":"Databricks. [n.d.]. Assistant Data Science Agent."},{"key":"e_1_3_2_1_13_1","unstructured":"Minghang Deng Ashwin Ramachandran Canwen Xu Lanxiang Hu Zhewei Yao Anupam Datta and Hao Zhang. [n.d.]. ReFoRCE: A Text-to-SQL Agent with Self-Refinement Format Restriction and Column Exploration. arXiv preprint arXiv:2502.00675 ([n.d.])."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00381"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Alvaro AA Fernandes Martin Koehler and et al. 2023. Data preparation: A technological perspective and review. SN Computer Science (2023).","DOI":"10.1007\/s42979-023-01828-8"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.14778\/3665844.3665857"},{"key":"e_1_3_2_1_17_1","volume-title":"Autonomous Data Agents: A New Opportunity for Smart Data. arXiv preprint arXiv:2509.18710","author":"Fu Yanjie","year":"2025","unstructured":"Yanjie Fu, Dongjie Wang, Wangyang Ying, Xiangliang Zhang, Huan Liu, and Jian Pei. 2025. Autonomous Data Agents: A New Opportunity for Smart Data. arXiv preprint arXiv:2509.18710 (2025)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.14778\/3685800.3685869"},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the ACM on Management of Data","author":"Giannakouris Victor","year":"2025","unstructured":"Victor Giannakouris and Immanuel Trummer. 2025. \u03bb-tune: Harnessing large language models for automated database system tuning. Proceedings of the ACM on Management of Data (2025)."},{"key":"e_1_3_2_1_20_1","unstructured":"Google Cloud. [n.d.]. BigQuery."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626765"},{"key":"e_1_3_2_1_22_1","volume-title":"Bang Liu","author":"Hong Sirui","year":"2025","unstructured":"Sirui Hong, Yizhang Lin, and et al. Bang Liu. 2025. Data Interpreter: An LLM Agent for Data Science. In Findings of the Association for Computational Linguistics."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.14778\/3773731.3773732"},{"key":"e_1_3_2_1_24_1","volume-title":"Proc. VLDB Endow.","author":"Jiang Jie","year":"2025","unstructured":"Jie Jiang, Haining Xie, Siqi Shen, Yu Shen, and et al. 2025. SiriusBI: A Comprehensive LLM-powered Solution for Data Analytics in Business Intelligence. Proc. VLDB Endow. (2025)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.495"},{"key":"e_1_3_2_1_26_1","volume-title":"CHORUS: foundation models for unified data discovery and exploration. arXiv preprint arXiv:2306.09610","author":"Kayali Moe","year":"2023","unstructured":"Moe Kayali, Anton Lykov, Ilias Fountalis, Nikolaos Vasiloglou, Dan Olteanu, and Dan Suciu. 2023. CHORUS: foundation models for unified data discovery and exploration. arXiv preprint arXiv:2306.09610 (2023)."},{"key":"e_1_3_2_1_27_1","volume-title":"Evaluating knowledge generation and self-refinement strategies for llm-based column type annotation. arXiv preprint arXiv:2503.02718","author":"Korini Keti","year":"2025","unstructured":"Keti Korini and Christian Bizer. 2025. Evaluating knowledge generation and self-refinement strategies for llm-based column type annotation. arXiv preprint arXiv:2503.02718 (2025)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.14778\/3659437.3659449"},{"key":"e_1_3_2_1_29_1","volume-title":"DeepEye-SQL: A Software-Engineering-Inspired Text-to-SQL Framework. arXiv preprint arXiv:2510.17586","author":"Li Boyan","year":"2025","unstructured":"Boyan Li, Chong Chen, Zhujun Xue, Yinan Mei, and Yuyu Luo. 2025. DeepEye-SQL: A Software-Engineering-Inspired Text-to-SQL Framework. arXiv preprint arXiv:2510.17586 (2025)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.14778\/3681954.3682003"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3788853.3801612"},{"key":"e_1_3_2_1_32_1","volume-title":"Forty-second International Conference on Machine Learning.","author":"Li Boyan","year":"2025","unstructured":"Boyan Li, Jiayi Zhang, Ju Fan, Yanwei Xu, Chong Chen, Nan Tang, and Yuyu Luo. 2025. Alpha-SQL: Zero-Shot Text-to-SQL using Monte Carlo Tree Search. In Forty-second International Conference on Machine Learning."},{"key":"e_1_3_2_1_33_1","volume-title":"The Thirty-ninth Annual Conference on Neural Information Processing Systems Datasets and Benchmarks Track. https:\/\/openreview.net\/forum?id=SXADEhZ0sl","author":"Li Changlun","year":"2025","unstructured":"Changlun Li, Yao SHI, Chen Wang, Qiqi Duan, Runke RUAN, Weijie Huang, Haonan Long, Lijun Huang, Nan Tang, and Yuyu Luo. 2025. Time Travel is Cheating: Going Live with DeepFund for Real-Time Fund Investment Benchmarking. In The Thirty-ninth Annual Conference on Neural Information Processing Systems Datasets and Benchmarks Track. https:\/\/openreview.net\/forum?id=SXADEhZ0sl"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.14778\/3742728.3742734"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3722212.3725641"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.14778\/3685800.3685838"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3654979"},{"key":"e_1_3_2_1_38_1","volume-title":"Fei Hao, and Lei Chen.","author":"Li Shuaimin","year":"2025","unstructured":"Shuaimin Li, Xuanang Chen, Yuanfeng Song, Yunze Song, Chen Jason Zhang, Fei Hao, and Lei Chen. 2025. prompt4vis: prompting large language models with example mining for tabular data visualization. The VLDB Journal (2025)."},{"key":"e_1_3_2_1_39_1","unstructured":"Yuchen Li Kai Wang Zhiqiang Sun et al. 2025. Automatic Database Description Generation for Text-to-SQL. arXiv preprint arXiv:2502.20657 (2025)."},{"key":"e_1_3_2_1_40_1","volume-title":"Proceedings of the VLDB Endowment","author":"Li Zhaodonghui","year":"2024","unstructured":"Zhaodonghui Li, Haitao Yuan, Huiming Wang, Gao Cong, and Lidong Bing. 2024. LLM-R2: A Large Language Model Enhanced Rule-Based Rewrite System for Boosting Query Efficiency. Proceedings of the VLDB Endowment (2024)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.14778\/3778092.3778103"},{"key":"e_1_3_2_1_42_1","unstructured":"Bang Liu Xinfeng Li Jiayi Zhang Jinlin Wang Tanjin He Sirui Hong Hongzhang Liu Shaokun Zhang Kaitao Song Kunlun Zhu et al. 2025. Advances and challenges in foundation agents: From brain-inspired intelligence to evolutionary collaborative and safe systems. arXiv preprint arXiv:2504.01990 (2025)."},{"key":"e_1_3_2_1_43_1","volume-title":"A Survey of Text-to-SQL in the Era of LLMs: Where are we, and where are we going? IEEE Transactions on Knowledge and Data Engineering","author":"Liu Xinyu","year":"2025","unstructured":"Xinyu Liu, Shuyu Shen, Boyan Li, Peixian Ma, Runzhi Jiang, Yuxin Zhang, Ju Fan, Guoliang Li, Nan Tang, and Yuyu Luo. 2025. A Survey of Text-to-SQL in the Era of LLMs: Where are we, and where are we going? IEEE Transactions on Knowledge and Data Engineering (2025)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3711896.3737427"},{"key":"e_1_3_2_1_45_1","unstructured":"Tianqi Luo Chuhan Huang Leixian Shen Boyan Li Shuyu Shen Wei Zeng Nan Tang and Yuyu Luo. 2025. nvBench 2.0: Resolving Ambiguity in Text-to-Visualization through Stepwise Reasoning. arXiv:2503.12880 [cs.CL]"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.14778\/3750601.3750696"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3183713.3193545"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3457261"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2021.3114848"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3588942"},{"key":"e_1_3_2_1_51_1","volume-title":"Chat2VIS: Generating Data Visualizations via Natural Language Using ChatGPT, Codex and GPT-3 Large Language Models","author":"Maddigan Paula","year":"2023","unstructured":"Paula Maddigan and Teo Susnjak. 2023. Chat2VIS: Generating Data Visualizations via Natural Language Using ChatGPT, Codex and GPT-3 Large Language Models. IEEE Access (oct 2023)."},{"key":"e_1_3_2_1_52_1","volume-title":"Large language models: A survey. arXiv preprint arXiv:2402.06196","author":"Minaee Shervin","year":"2024","unstructured":"Shervin Minaee, Tomas Mikolov, Narjes Nikzad, Meysam Chenaghlu, Richard Socher, Xavier Amatriain, and Jianfeng Gao. 2024. Large language models: A survey. arXiv preprint arXiv:2402.06196 (2024)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.14778\/3685800.3685890"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.14778\/3574245.3574258"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.960"},{"key":"e_1_3_2_1_56_1","volume-title":"Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing.","author":"Zadeh Fatemeh Pesaran","unstructured":"Fatemeh Pesaran Zadeh, Juyeon Kim, Jin-Hwa Kim, and Gunhee Kim. [n.d.]. Text2Chart31: Instruction Tuning for Chart Generation with Automatic Feedback. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing."},{"key":"e_1_3_2_1_57_1","volume-title":"CHASE-SQL: Multi-Path Reasoning and Preference Optimized Candidate Selection in Text-to-SQL. In The Thirteenth International Conference on Learning Representations.","author":"Pourreza Mohammadreza","year":"2025","unstructured":"Mohammadreza Pourreza, Hailong Li, Ruoxi Sun, Yeounoh Chung, Shayan Talaei, Gaurav Tarlok Kakkar, Yu Gan, Amin Saberi, Fatma Ozcan, and Sercan O Arik. 2025. CHASE-SQL: Multi-Path Reasoning and Preference Optimized Candidate Selection in Text-to-SQL. In The Thirteenth International Conference on Learning Representations."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"crossref","unstructured":"Mohammadreza Pourreza and Davood Rafiei. 2023. DIN-SQL: Decomposed In-Context Learning of Text-to-SQL with Self-Correction. In Advances in Neural Information Processing Systems 36 Alice Oh Tristan Naumann Amir Globerson Kate Saenko Moritz Hardt and Sergey Levine (Eds.).","DOI":"10.52202\/075280-1577"},{"key":"e_1_3_2_1_59_1","unstructured":"Danrui Qi Zhengjie Miao and Jiannan Wang. 2025. CleanAgent: Automating Data Standardization with LLM-based Agents. arXiv:2403.08291"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.14778\/3712221.3712222"},{"key":"e_1_3_2_1_61_1","volume-title":"Structured Documents. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track.","author":"Saad-Falcon Jon","unstructured":"Jon Saad-Falcon, Joe Barrow, Alexa Siu, Ani Nenkova, Seunghyun Yoon, Ryan A. Rossi, and Franck Dernoncourt. [n.d.]. PDFTriage: Question Answering over Long, Structured Documents. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track."},{"key":"e_1_3_2_1_62_1","volume-title":"Andre Seeck, and Rico Auerswald.","author":"Shi Elisabeth","year":"2020","unstructured":"Elisabeth Shi, Tom Michael Gasser, Andre Seeck, and Rico Auerswald. 2020. The Principles of Operation Framework: A Comprehensive Classification Concept for Automated Driving Functions. SAE International Journal of Connected and Automated Vehicles (2020)."},{"key":"e_1_3_2_1_63_1","volume-title":"Deepvis: Bridging natural language and data visualization through step-wise reasoning. arXiv preprint arXiv:2508.01700","author":"Shuai Zhihao","year":"2025","unstructured":"Zhihao Shuai, Boyan Li, Siyu Yan, Yuyu Luo, and Weikai Yang. 2025. Deepvis: Bridging natural language and data visualization through step-wise reasoning. arXiv preprint arXiv:2508.01700 (2025)."},{"key":"e_1_3_2_1_64_1","unstructured":"Snowflake. [n.d.]. Cortex Agents."},{"key":"e_1_3_2_1_65_1","volume-title":"QUITE: A Query Rewrite System Beyond Rules with LLM Agents. arXiv preprint arXiv:2506.07675","author":"Song Yuyang","year":"2025","unstructured":"Yuyang Song, Hanxu Yan, Jiale Lao, Yibo Wang, Yufei Li, Yuanchun Zhou, Jianguo Wang, and Mingjie Tang. 2025. QUITE: A Query Rewrite System Beyond Rules with LLM Agents. arXiv preprint arXiv:2506.07675 (2025)."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"crossref","unstructured":"Yuan Sui Mengyu Zhou Mingjie Zhou Shi Han and Dongmei Zhang. 2024. Table Meets LLM: Can Large Language Models Understand Structured Table Data? A Benchmark and Empirical Study. arXiv:2305.13062 [cs]","DOI":"10.1145\/3616855.3635752"},{"key":"e_1_3_2_1_67_1","volume-title":"DATATALES: Investigating the use of Large Language Models for Authoring Data-Driven Articles. In 2023 IEEE Visualization and Visual Analytics (VIS).","author":"Sultanum Nicole","year":"2023","unstructured":"Nicole Sultanum and Arjun Srinivasan. 2023. DATATALES: Investigating the use of Large Language Models for Authoring Data-Driven Articles. In 2023 IEEE Visualization and Visual Analytics (VIS)."},{"key":"e_1_3_2_1_68_1","volume-title":"AgenticData: An Agentic Data Analytics System for Heterogeneous Data. CoRR","author":"Sun Ji","year":"2025","unstructured":"Ji Sun, Guoliang Li, Peiyao Zhou, Yihui Ma, Jingzhe Xu, and Yuan Li. 2025. AgenticData: An Agentic Data Analytics System for Heterogeneous Data. CoRR (2025)."},{"key":"e_1_3_2_1_69_1","volume-title":"Rabbit: Retrieval-Augmented Generation Enables Better Automatic Database Knob Tuning. In 2025 IEEE 41st International Conference on Data Engineering (ICDE). IEEE.","author":"Sun Wenwen","unstructured":"Wenwen Sun, Zhicheng Pan, Zirui Hu, Yu Liu, Chengcheng Yang, Rong Zhang, and Xuan Zhou. [n.d.]. Rabbit: Retrieval-Augmented Generation Enables Better Automatic Database Knob Tuning. In 2025 IEEE 41st International Conference on Data Engineering (ICDE). IEEE."},{"key":"e_1_3_2_1_70_1","volume-title":"Data Agent: A Holistic Architecture for Orchestrating Data AI Ecosystems. arXiv preprint arXiv:2507.01599","author":"Sun Zhaoyan","year":"2025","unstructured":"Zhaoyan Sun, Jiayi Wang, Xinyang Zhao, Jiachi Wang, and Guoliang Li. 2025. Data Agent: A Holistic Architecture for Orchestrating Data AI Ecosystems. arXiv preprint arXiv:2507.01599 (2025)."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.14778\/3750601.3750625"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"crossref","unstructured":"Manan Suri Puneet Mathur Franck Dernoncourt Kanika Goswami Ryan A. Rossi and Dinesh Manocha. [n.d.]. VisDoM: Multi-Document QA with Visually Rich Elements Using Multimodal Retrieval-Augmented Generation. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies.","DOI":"10.18653\/v1\/2025.naacl-long.310"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.1094"},{"key":"e_1_3_2_1_74_1","unstructured":"TabTab AI. [n.d.]. TabTab."},{"key":"e_1_3_2_1_75_1","unstructured":"Zirui Tang Weizheng Wang Zihang Zhou Yang Jiao Bangrui Xu Boyu Niu Xuanhe Zhou Guoliang Li Yeye He Wei Zhou et al. 2025. LLM\/Agent-as-Data-Analyst: A Survey. arXiv preprint arXiv:2509.23988 (2025)."},{"key":"e_1_3_2_1_76_1","volume-title":"Atom of Thoughts for Markov LLM Test-Time Scaling. CoRR","author":"Teng Fengwei","year":"2025","unstructured":"Fengwei Teng, Zhaoyang Yu, Quan Shi, Jiayi Zhang, Chenglin Wu, and Yuyu Luo. 2025. Atom of Thoughts for Markov LLM Test-Time Scaling. CoRR (2025)."},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"crossref","unstructured":"Fen Wang Bomiao Wang Xueli Shu Zhen Liu Zekai Shao Chao Liu and Siming Chen. 2025. ChartInsighter: An Approach for Mitigating Hallucination in Time-series Chart Summary Generation with A Benchmark Dataset. arXiv:2501.09349 [cs.CL]","DOI":"10.1109\/TVCG.2025.3567122"},{"key":"e_1_3_2_1_78_1","volume-title":"AOP: Automated and interactive llm pipeline orchestration for answering complex queries. CIDR.","author":"Wang Jiayi","year":"2025","unstructured":"Jiayi Wang and Guoliang Li. 2025. AOP: Automated and interactive llm pipeline orchestration for answering complex queries. CIDR."},{"key":"e_1_3_2_1_79_1","volume-title":"iDataLake: An llm-powered analytics system on data lakes. Data Engineering","author":"Wang Jiayi","year":"2025","unstructured":"Jiayi Wang, Guoliang Li, and Jianhua Feng. 2025. iDataLake: An llm-powered analytics system on data lakes. Data Engineering (2025)."},{"key":"e_1_3_2_1_80_1","volume-title":"REAR: A Relevance-Aware Retrieval-Augmented Framework for Open-Domain Question Answering. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing.","author":"Wang Yuhao","unstructured":"Yuhao Wang, Ruiyang Ren, Junyi Li, Xin Zhao, Jing Liu, and Ji-Rong Wen. [n.d.]. REAR: A Relevance-Aware Retrieval-Augmented Framework for Open-Domain Question Answering. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing."},{"key":"e_1_3_2_1_81_1","unstructured":"Yifan Wu Yiran Peng Yiyu Chen Jianhao Ruan Zijie Zhuang Cheng Yang Jiayi Zhang Man Chen Yenchi Tseng Zhaoyang Yu Liang Chen Yuyao Zhai Bang Liu Chenglin Wu and Yuyu Luo. 2026. AutoWebWorld: Synthesizing Infinite Verifiable Web Environments via Finite State Machines. arXiv:2602.14296 [cs.AI] https:\/\/arxiv.org\/abs\/2602.14296"},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.710"},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.14778\/3681954.3681992"},{"key":"e_1_3_2_1_84_1","volume-title":"VisJudge-Bench: Aesthetics and Quality Assessment of Visualizations. CoRR abs\/2510.22373","author":"Xie Yupeng","year":"2025","unstructured":"Yupeng Xie, Zhiyang Zhang, Yifan Wu, Sirong Lu, Jiayi Zhang, Zhaoyang Yu, Jinlin Wang, Sirui Hong, Bang Liu, Chenglin Wu, and Yuyu Luo. 2025. VisJudge-Bench: Aesthetics and Quality Assessment of Visualizations. CoRR abs\/2510.22373 (2025)."},{"key":"e_1_3_2_1_85_1","unstructured":"Wenyi Xu Yuren Mao Xiaolu Zhang Chao Zhang Xuemei Dong Mengfei Zhang and Yunjun Gao. 2025. DAgent: A Relational Database-Driven Data Analysis Report Generation Agent. arXiv:2503.13269 [cs.DB]"},{"key":"e_1_3_2_1_86_1","volume-title":"MCTuner: Spatial Decomposition-Enhanced Database Tuning via LLM-Guided Exploration. arXiv preprint arXiv:2509.06298","author":"Yan Zihan","year":"2025","unstructured":"Zihan Yan, Rui Xi, and Mengshu Hou. 2025. MCTuner: Spatial Decomposition-Enhanced Database Tuning via LLM-Guided Exploration. arXiv preprint arXiv:2509.06298 (2025)."},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","DOI":"10.14778\/3748191.3748200"},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"crossref","unstructured":"Zhaorui Yang Bo Pan Han Wang Yiyao Wang Xingyu Liu Minfeng Zhu Bo Zhang and Wei Chen. 2025. Multimodal DeepResearcher: Generating Text-Chart Interleaved Reports From Scratch with Agentic Framework. arXiv:2506.02454 [cs.CL]","DOI":"10.1609\/aaai.v40i40.40734"},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.701"},{"key":"e_1_3_2_1_90_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591708"},{"key":"e_1_3_2_1_91_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.853"},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-020-00636-3"},{"key":"e_1_3_2_1_93_1","unstructured":"Huan Zhang Yizhan Li Wenhao Huang Ziyu Hou Yu Song Xuye Liu Farshid Effaty Jinya Jiang Sifan Wu Qianggang Ding Izumi Takahara Leonard R. MacGillivray Teruyasu Mizoguchi Tianshu Yu Lizi Liao Yuyu Luo Yu Rong Jia Li Ying Diao Heng Ji and Bang Liu. 2026. Towards Agentic Intelligence for Materials Science. arXiv:2602.00169 [cond-mat.mtrl-sci] https:\/\/arxiv.org\/abs\/2602.00169"},{"key":"e_1_3_2_1_94_1","volume-title":"Autoddg: Automated dataset description generation using large language models. arXiv preprint arXiv:2502.01050","author":"Zhang Haoxiang","year":"2025","unstructured":"Haoxiang Zhang, Yurong Liu, A\u00e9cio Santos, Juliana Freire, et al. 2025. Autoddg: Automated dataset description generation using large language models. arXiv preprint arXiv:2502.01050 (2025)."},{"key":"e_1_3_2_1_95_1","volume-title":"Aflow: Automating agentic workflow generation. arXiv preprint arXiv:2410.10762","author":"Zhang Jiayi","year":"2024","unstructured":"Jiayi Zhang, Jinyu Xiang, Zhaoyang Yu, Fengwei Teng, Xionghui Chen, Jiaqi Chen, Mingchen Zhuge, Xin Cheng, Sirui Hong, Jinlin Wang, et al. 2024. Aflow: Automating agentic workflow generation. arXiv preprint arXiv:2410.10762 (2024)."},{"key":"e_1_3_2_1_96_1","unstructured":"Shaolei Zhang Ju Fan Meihao Fan Guoliang Li and Xiaoyong Du. 2025. DeepAnalyze: Agentic Large Language Models for Autonomous Data Science. arXiv:2510.16872 [cs.AI]"},{"key":"e_1_3_2_1_97_1","volume-title":"Dial: A Knowledge-Grounded Dialect-Specific NL2SQL System. arXiv:2603.07449 [cs.DB] https:\/\/arxiv.org\/abs\/2603.07449","author":"Zhang Xiang","year":"2026","unstructured":"Xiang Zhang, Hongming Xu, Le Zhou, Wei Zhou, Xuanhe Zhou, Guoliang Li, Yuyu Luo, Changdong Liu, Guorun Chen, Jiang Liao, and Fan Wu. 2026. Dial: A Knowledge-Grounded Dialect-Specific NL2SQL System. arXiv:2603.07449 [cs.DB] https:\/\/arxiv.org\/abs\/2603.07449"},{"key":"e_1_3_2_1_98_1","unstructured":"Yuxin Zhang Meihao Fan Ju Fan Mingyang Yi Yuyu Luo Jian Tan and Guoliang Li. 2025. Reward-SQL: Boosting Text-to-SQL via Stepwise Reasoning and Process-Supervised Rewards. arXiv:2505.04671 [cs.CL] https:\/\/arxiv.org\/abs\/2505.04671"},{"key":"e_1_3_2_1_99_1","volume-title":"DataPuzzle: Breaking Free from the Hallucinated Promise of LLMs in Data Analysis. arXiv preprint arXiv:2504.10036","author":"Zhang Zhengxuan","year":"2025","unstructured":"Zhengxuan Zhang, Zhuowen Liang, Yin Wu, Teng Lin, Yuyu Luo, and Nan Tang. 2025. DataPuzzle: Breaking Free from the Hallucinated Promise of LLMs in Data Analysis. arXiv preprint arXiv:2504.10036 (2025)."},{"key":"e_1_3_2_1_100_1","volume-title":"Automatic database knob tuning: A survey","author":"Zhao Xinyang","year":"2023","unstructured":"Xinyang Zhao, Xuanhe Zhou, and Guoliang Li. 2023. Automatic database knob tuning: A survey. IEEE Transactions on Knowledge and Data Engineering (2023)."},{"key":"e_1_3_2_1_101_1","volume-title":"GaussMaster: An LLM-based Database Copilot System. arXiv preprint arXiv:2506.23322","author":"Zhou Wei","year":"2025","unstructured":"Wei Zhou, Ji Sun, Xuanhe Zhou, Guoliang Li, Luyang Liu, Hao Wu, and Tianyuan Wang. 2025. GaussMaster: An LLM-based Database Copilot System. arXiv preprint arXiv:2506.23322 (2025)."},{"key":"e_1_3_2_1_102_1","unstructured":"Wei Zhou Jun Zhou Haoyu Wang Zhenghao Li Qikang He Shaokun Han Guoliang Li Xuanhe Zhou Yeye He Chunwei Liu Zirui Tang Bin Wang Shen Tang Kai Zuo Yuyu Luo Zhenzhe Zheng Conghui He Jingren Zhou and Fan Wu. 2026. Can LLMs Clean Up Your Mess? A Survey of Application-Ready Data Preparation with LLMs. arXiv:2601.17058 [cs.DB] https:\/\/arxiv.org\/abs\/2601.17058"},{"key":"e_1_3_2_1_103_1","unstructured":"Xuanhe Zhou Junxuan He Wei Zhou Haodong Chen Zirui Tang Haoyu Zhao Xin Tong Guoliang Li Youmin Chen Jun Zhou et al. 2025. A Survey of LLM\u00d7 DATA. arXiv preprint arXiv:2505.18458 (2025)."},{"key":"e_1_3_2_1_104_1","volume-title":"LLM as DBA. arXiv preprint arXiv:2308.05481","author":"Zhou Xuanhe","year":"2023","unstructured":"Xuanhe Zhou, Guoliang Li, and Zhiyuan Liu. 2023. LLM as DBA. arXiv preprint arXiv:2308.05481 (2023)."},{"key":"e_1_3_2_1_105_1","doi-asserted-by":"publisher","DOI":"10.14778\/3675034.3675043"},{"key":"e_1_3_2_1_106_1","volume-title":"Are large language models good statisticians? Advances in Neural Information Processing Systems","author":"Zhu Yizhang","year":"2024","unstructured":"Yizhang Zhu, Shiyin Du, Boyan Li, Yuyu Luo, and Nan Tang. 2024. Are large language models good statisticians? Advances in Neural Information Processing Systems (2024)."},{"key":"e_1_3_2_1_107_1","volume-title":"EllieSQL: Cost-Efficient Text-to-SQL with Complexity-Aware Routing. In Second Conference on Language Modeling.","author":"Zhu Yizhang","year":"2025","unstructured":"Yizhang Zhu, Runzhi JIANG, Boyan Li, Nan Tang, and Yuyu Luo. 2025. EllieSQL: Cost-Efficient Text-to-SQL with Complexity-Aware Routing. In Second Conference on Language Modeling."},{"key":"e_1_3_2_1_108_1","unstructured":"Yizhang Zhu Liangwei Wang Chenyu Yang Xiaotian Lin Boyan Li Wei Zhou Xinyu Liu Zhangyang Peng Tianqi Luo Yu Li Chengliang Chai Chong Chen Shimin Di Ju Fan Ji Sun Nan Tang Fugee Tsung Jiannan Wang Chenglin Wu Yanwei Xu Shaolei Zhang Yong Zhang Xuanhe Zhou Guoliang Li and Yuyu Luo. 2025. A Survey of Data Agents: Emerging Paradigm or Overstated Hype? arXiv:2510.23587 [cs.DB] https:\/\/arxiv.org\/abs\/2510.23587"}],"event":{"name":"SIGMOD\/PODS '26: International Conference on Management of Data","location":"Bengaluru India","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Companion of the International Conference on Management of Data"],"original-title":[],"deposited":{"date-parts":[[2026,5,26]],"date-time":"2026-05-26T19:16:44Z","timestamp":1779823004000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3788853.3801878"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,30]]},"references-count":108,"alternative-id":["10.1145\/3788853.3801878","10.1145\/3788853"],"URL":"https:\/\/doi.org\/10.1145\/3788853.3801878","relation":{},"subject":[],"published":{"date-parts":[[2026,5,30]]},"assertion":[{"value":"2026-05-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}