{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T20:21:32Z","timestamp":1774729292283,"version":"3.50.1"},"reference-count":253,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["The VLDB Journal"],"published-print":{"date-parts":[[2026,1]]},"DOI":"10.1007\/s00778-025-00953-5","type":"journal-article","created":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T10:26:03Z","timestamp":1768213563000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Revisiting data analysis with Pre-trained foundation models"],"prefix":"10.1007","volume":"35","author":[{"given":"Chen","family":"Liang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Donghua","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zheng","family":"Liang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiyu","family":"Liang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianle","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Boyu","family":"Xiao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuqing","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenqi","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7521-2871","authenticated-orcid":false,"given":"Hongzhi","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,12]]},"reference":[{"key":"953_CR1","unstructured":"Ahuja, K., et al.: \u201cInterventional causal representation learning\u201d. In: International conference on machine learning. PMLR. 372\u2013407, (2023)"},{"key":"953_CR2","unstructured":"Aky\u00fcrek, E., et al.: \u201cWhat learning algorithm is in-context learning? Investigations with linear models\u201d. In: The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1-5, 2023. OpenReview.net, (2023). https:\/\/openreview.net\/forum?id=0g0X4H8yN4I"},{"key":"953_CR3","unstructured":"Allen-Zhu, Z., Li, Y.: \u201cPhysics of Language Models: Part 1, Context-Free Grammar\u201d. (2023). arXiv: 2305.13673 https:\/\/doi.1687 org\/10.48550\/arXiv.2305.13673"},{"key":"953_CR4","doi-asserted-by":"crossref","unstructured":"Alshahwan, N., et al.: \u201cAutomated unit test improvement using large language models at meta\u201d. In: Companion Proceedings of the 32nd ACM International Conference on the Foundations of Software Engineering. 185\u2013196, (2024)","DOI":"10.1145\/3663529.3663839"},{"key":"953_CR5","doi-asserted-by":"crossref","unstructured":"Arlot, S., Celisse, A.: \u201cA survey of cross-validation procedures for model selection\u201d. In: (2010)","DOI":"10.1214\/09-SS054"},{"key":"953_CR6","unstructured":"Atzeni, M., Sachan, M., Loukas, A.: \u201cInfusing Lattice Symmetry Priors in Attention Mechanisms for Sample-Efficient Abstract Geometric Reasoning\u201d. In: International Conference on Machine Learning. (2023). https:\/\/api.semanticscholar.org\/CorpusID:259088564"},{"key":"953_CR7","unstructured":"Bai, Y., et al.: \u201cTransformers as statisticians: Provable in-context learning with in-context algorithm selection\u201d. In: Advances in neural information processing systems 36 (2024)"},{"key":"953_CR8","unstructured":"Basu, S., Rawat, A.S., Zaheer, M.: \u201cA Statistical Framework for Data-dependent Retrieval-Augmented Models\u201d. In: Forty-first International Conference on Machine Learning, ICML 2024, Vienna, Austria, July 21-27, 2024. OpenReview.net, (2024). https:\/\/openreview.net\/forum?id=A9MiJdetnZ"},{"key":"953_CR9","unstructured":"Basu, S., Rawat, A.S., Zaheer, M: \u201cA Statistical Perspective on Retrieval-Based Models\u201d. In: International Conference on Machine Learning, ICML 2023, 23-29 July 2023, Honolulu, Hawaii, USA. Ed. by Andreas Krause et al. Vol.\u00a0202. Proceedings of Machine Learning Research. PMLR, 1852\u20131886, (2023). https:\/\/proceedings.mlr.press\/v202\/basu23a.html"},{"key":"953_CR10","doi-asserted-by":"crossref","unstructured":"Batini, C., et al.: \u201cMethodologies for data quality assessment and improvement\u201d. In: ACM computing surveys (CSUR) 41.3 ,1\u201352 (2009)","DOI":"10.1145\/1541880.1541883"},{"key":"953_CR11","doi-asserted-by":"crossref","unstructured":"Bavishi, R., et al.: \u201cNeurosymbolic repair for low-code formula languages\u201d. In: Proceedings of the ACM on Programming Languages 6 , 1093 \u20131122, (2022). https:\/\/api.semanticscholar.org\/CorpusID:251040048","DOI":"10.1145\/3563327"},{"key":"953_CR12","doi-asserted-by":"crossref","unstructured":"Bender, E.M., et al.: \u201cOn the dangers of stochastic parrots: Can language models be too big?\u201d In: Proceedings of the 2021 ACM conference on fairness, accountability, and transparency. 610\u2013623, (2021)","DOI":"10.1145\/3442188.3445922"},{"key":"953_CR13","unstructured":"Blaauwbroek, L., et al.: \u201cGraph2Tac: Online Representation Learning of Formal Math Concepts\u201d. In: Forty-first International Conference on Machine Learning"},{"issue":"6","key":"953_CR14","doi-asserted-by":"publisher","first-page":"6834","DOI":"10.1609\/aaai.v37i6.25837","volume":"37","author":"C Brand","year":"2023","unstructured":"Brand, C., Ganian, R., Simonov, K.: A parameterized theory of PAC learning. Proceedings of the AAAI Conference on Artificial Intelligence. 37(6), 6834\u20136841 (2023)","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence."},{"key":"953_CR15","unstructured":"Brandt, S., et al.: Statistical and computational methods in data analysis. 04. North-Holl-and Publishing Company Amsterdam, The Net-herlands: (1976)"},{"key":"953_CR16","unstructured":"Brooker, D.: Practical Reliability Data Analysis for Non-Reliability Engineers. Artech House, (2020)"},{"key":"953_CR17","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"953_CR18","unstructured":"Bruce, P., Bruce, A., Gedeck, P.: Practical statistics for data scientists: 50+ essential concepts using R and Python. O\u2019Reilly Media, (2020)"},{"key":"953_CR19","unstructured":"Buchholz, S., Sch\u00f6lkopf, B.: \u201cRobustness of Nonlinear Representation Learning\u201d. In: Forty-first International Conference on Machine Learning"},{"key":"953_CR20","doi-asserted-by":"crossref","unstructured":"Carrott, P., et al.: \u201cCoqPyt: Proof Navigation in Python in the Era of LLMs\u201d. In: SIGSOFT FSE Companion. 2024. https:\/\/api.semanticscholar.org\/CorpusID:269614455","DOI":"10.1145\/3663529.3663814"},{"key":"953_CR21","doi-asserted-by":"crossref","unstructured":"Caruana, R., Nori, H.: \u201cWhy data scientists prefer glassbox machine learning: Algorithms, differential privacy, editing and bias mitigation\u201d. In: Proceedings of the 28th ACM SIG-KDD conference on knowledge discovery and data mining. 4776\u20134777, (2022)","DOI":"10.1145\/3534678.3542627"},{"key":"953_CR22","unstructured":"Cavalleri, E., et al.: \u201cSPIREX: Improving LLM-based relation extraction from RNA-focus-ed scientific literature using graph machine learning\u201d. In: Proceedings of Workshops at the 50th International Conference on Very Large Data Bases, VLDB 2024, Guangzhou, China, August 26-30, 2024. VLDB.org, (2024). https:\/\/vldb.org\/workshops\/2024\/proceedings\/LLM+KG\/LLM+KG-12.pdf"},{"key":"953_CR23","doi-asserted-by":"publisher","unstructured":"Chakraborty, N., Ornik, M., Driggs-Campbell, K.R.: \u201cHallucination Detection in Foundation Models for Decision-Mak-ing: A Flexible Definition and Review of the State of the Art\u201d. In: ACM Comput. Surv. 57.7 , 188:1\u2013188:35, (2025). https:\/\/doi.org\/10.1145\/3716846","DOI":"10.1145\/3716846"},{"key":"953_CR24","doi-asserted-by":"publisher","unstructured":"Chang, T.Y., Jia, R.: \u201cData Curation Alone Can Stabilize In-context Learning\u201d. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), ACL 2023, Toronto, Canada, July 9-14, 2023. Ed. by Anna Rogers, Jordan L. Boyd-Graber, and Naoaki Okazaki. Association for Computational Linguistics, 8123\u20138144, (2023). https:\/\/doi.org\/10.18653\/v1\/2023.acl-long.452","DOI":"10.18653\/v1\/2023.acl-long.452"},{"issue":"3","key":"953_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3641289","volume":"15","author":"Y Chang","year":"2024","unstructured":"Chang, Y., et al.: A survey on evaluation of large language models. ACM transactions on intelligent systems and technology 15(3), 1\u201345 (2024)","journal-title":"ACM transactions on intelligent systems and technology"},{"key":"953_CR26","unstructured":"Chavan, A., et al.: \u201cFaster and Lighter LLMs: A Survey on Current Challenges and Way Forward\u201d. In: Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence, IJCAI 2024, Jeju, South Korea, August 3-9, 2024. ijcai.org, 7980\u20137988, (2024) https:\/\/www.ijcai.org\/proceedings\/2024\/883"},{"key":"953_CR27","unstructured":"Cheng, Z., et al.: \u201cBinding Language Models in Symbolic Languages\u201d. In: The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1-5, 2023. OpenReview.net, (2023). https:\/\/openreview.net\/forum?id=lH1PV42cbF"},{"key":"953_CR28","doi-asserted-by":"crossref","unstructured":"Chow, K., et al.: \u201cPerformance Optimization in the LLM World 2024\u201d. In: Companion of the 15th ACM\/SPEC International Conference on Performance Engineering. 156\u2013157, (2024)","DOI":"10.1145\/3629527.3651436"},{"key":"953_CR29","unstructured":"Cummins, C., et al.: \u201cLarge language models for compiler optimization\u201d. In: arXiv preprint arXiv:2309.07062 (2023)"},{"key":"953_CR30","doi-asserted-by":"crossref","unstructured":"Cummins, C., et al.: \u201cMeta Large Language Model Compiler: Foundation Models of Compiler Optimization\u201d. In: arXiv preprint arXiv:2407.02524 (2024)","DOI":"10.1145\/3708493.3712691"},{"key":"953_CR31","unstructured":"Dai, Z., et al.: \u201cGood semi-supervised learning that requires a bad gan\u201d. In: Advances in neural information processing systems 30 (2017)"},{"key":"953_CR32","unstructured":"Davenport, T., Harris, J.: Competing on analytics: Updated, with a new introduction: The new science of winning. Harvard Business Press, (2017)"},{"key":"953_CR33","unstructured":"Del\u00e9tang, G., et al.: \u201cLanguage Modeling Is Compression\u201d. In: The Twelfth International Conference on Learning Representations, ICLR 2024, Vienna, Austria, May 7-11, 2024. OpenReview.net, (2024). https:\/\/openreview.net\/forum?id=jznbgiynus"},{"issue":"3","key":"953_CR34","doi-asserted-by":"publisher","first-page":"307","DOI":"10.5555\/3430915.3442430.","volume":"14","author":"X Deng","year":"2020","unstructured":"Deng, X., et al.: TURL: Table Understanding through Representation Learning. Proc. VLDB Endow. 14(3), 307\u2013319 (2020). https:\/\/doi.org\/10.5555\/3430915.3442430. (http:\/\/www.vldb.org\/pvldb\/vol14\/p307-deng.pdf)","journal-title":"Proc. VLDB Endow."},{"key":"953_CR35","doi-asserted-by":"publisher","unstructured":"Dhuliawala, S., et al.: \u201cChain-of-Verificat-ion Reduces Hallucination in Large Language Models\u201d. In: Findings of the Association for Computational Linguistics, ACL 2024, Bangkok, Thailand and virtual meeting, August 11-16, 2024. Ed. by Lun-Wei Ku, Andre Martins, and Vivek Srikumar. Association for Computational Linguistics, 3563\u20133578, (2024). https:\/\/doi.org\/10.18653\/v1\/2024.findings-acl.212","DOI":"10.18653\/v1\/2024.findings-acl.212"},{"key":"953_CR36","doi-asserted-by":"publisher","unstructured":"Dibia, V.: \u201cLIDA: A Tool for Automatic Generation of Grammar-Agnostic Visualizations and Infographics using Large Language Models\u201d. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics: System Demonstrations, ACL 2023, Toronto, Canada, July 10-12, 2023. Ed. by Danushka Bollegala, Ruihong Huang, and Alan Ritter. Association for Computational Linguistics,113\u2013126, ( 2023) https:\/\/doi.org\/10.18653\/v1\/2023.acl-demo.11","DOI":"10.18653\/v1\/2023.acl-demo.11"},{"issue":"3","key":"953_CR37","doi-asserted-by":"publisher","first-page":"326","DOI":"10.1145\/212094.212114","volume":"27","author":"T Dietterich","year":"1995","unstructured":"Dietterich, T.: Overfitting and undercomputing in machine learning. ACM computing surveys (CSUR) 27(3), 326\u2013327 (1995)","journal-title":"ACM computing surveys (CSUR)"},{"key":"953_CR38","first-page":"41386","volume":"36","author":"T Dinh","year":"2024","unstructured":"Dinh, T., et al.: Large language models of code fail at completing code with potential bugs. Advances in Neural Information Processing Systems 36, 41386\u201341412 (2024)","journal-title":"Advances in Neural Information Processing Systems"},{"key":"953_CR39","doi-asserted-by":"publisher","unstructured":"Dong, Y., et al.: \u201cDeepJoin: Joinable Table Discovery with Pre-trained Language Models\u201d. Proc. VLDB Endow. 16.10 , 2458\u20132470. (2023) https:\/\/doi.org\/10.14778\/3603581.3603587. https:\/\/www.vldb.org\/pvldb\/vol16\/p2458-dong.pdf","DOI":"10.14778\/3603581.3603587"},{"key":"953_CR40","unstructured":"Du, Y., Kaelbling, L.P.: \u201cPosition: Compositional Generative Modeling: A Single Model is Not All You Need\u201d. In: Forty-first International Conference on Machine Learning, ICML 2024, Vienna, Austria, July 21-27, 2024. OpenReview.net, (2024). https:\/\/openreview.net\/forum?id=SoNexFx8qz"},{"issue":"8","key":"953_CR41","doi-asserted-by":"publisher","first-page":"8363","DOI":"10.1609\/aaai.v38i8.28678","volume":"38","author":"Y Du","year":"2024","unstructured":"Du, Y., et al.: Enhancing job recommendation through llm-based generative adversarial networks. Proceedings of the AAAI Conference on Artificial Intelligence. 38(8), 8363\u20138371 (2024)","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence."},{"issue":"1","key":"953_CR42","doi-asserted-by":"publisher","first-page":"12731","DOI":"10.1038\/s41598-024-63380-6","volume":"14","author":"M Dubiel","year":"2024","unstructured":"Dubiel, M., et al.: On-device query intent prediction with lightweight LLMs to support ubiquitous conversations. Scientific Reports 14(1), 12731 (2024)","journal-title":"Scientific Reports"},{"key":"953_CR43","unstructured":"Ebraheem, M., et al.: \u201cDeepER\u2013Deep Entity Resolution\u201d. In: arXiv preprint arXiv:1710.00597 (2017)"},{"key":"953_CR44","doi-asserted-by":"crossref","unstructured":"Ellis, K., et al.: \u201cDreamCoder: growing generalizable, interpretable knowledge with wake\u2013sleep Bayesian program learning\u201d. In: Philosophical Transactions of the Royal Society A 381 (2020). https:\/\/api.semanticscholar.org\/CorpusID:219687434","DOI":"10.1098\/rsta.2022.0050"},{"key":"953_CR45","doi-asserted-by":"publisher","unstructured":"Fan, G., et al.: \u201cSemantics-aware Dataset Discovery from Data Lakes with Contextualized Column-based Representation Learning\u201d. Proc. VLDB Endow. 16.7 , 1726\u20131739. (2023) https:\/\/doi.org\/10.14778\/3587136.3587146. https:\/\/www.vldb.org\/pvldb\/vol16\/p1726-fan.pdf","DOI":"10.14778\/3587136.3587146"},{"key":"953_CR46","doi-asserted-by":"publisher","unstructured":"Fan, J., et al.: \u201cCombining Small Language Models and Large Language Models for Zero-Shot NL2SQL\u201d. Proc. VLDB Endow. 17.11 ,2750\u20132763, (2024). https:\/\/doi.org\/10.14778\/3681954.3681960. https:\/\/www.vldb.org\/pvldb\/vol17\/p2750-fan.pdf","DOI":"10.14778\/3681954.3681960"},{"key":"953_CR47","doi-asserted-by":"publisher","unstructured":"Fan, W., et al.: \u201cA Survey on RAG Meeting LLMs: Towards Retrieval-Augmented Large Language Models\u201d. In: Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, KDD 2024, Barcelona, Spain, August 25-29, 2024. Ed. by Ricardo Baeza-Yates and Francesco Bonchi. ACM, 6491\u20136501, (2024). https:\/\/doi.org\/10.1145\/3637528.3671470","DOI":"10.1145\/3637528.3671470"},{"key":"953_CR48","unstructured":"Fang, X., et al.: \u201cLarge Language Models (LLMs) on Tabular Data: Prediction, Generation, and Understanding - A Survey\u201d. In: Trans. Mach. Learn. Res. 2024 (2024). https:\/\/openreview.net\/forum?id=IZnrCGF9WI"},{"issue":"11","key":"953_CR49","doi-asserted-by":"publisher","first-page":"3302","DOI":"10.14778\/3611479.3611527","volume":"16","author":"RC Fernandez","year":"2023","unstructured":"Fernandez, R.C., et al.: How large language models will disrupt data management. Proceedings of the VLDB Endowment 16(11), 3302\u20133309 (2023)","journal-title":"Proceedings of the VLDB Endowment"},{"key":"953_CR50","doi-asserted-by":"publisher","unstructured":"F\u00e9vry, T., et al.: \u201cEntities as Experts: Sparse Memory Access with Entity Supervision\u201d. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing, EMNLP 2020, Online, November 16-20, 2020. Ed. by Bonnie Webber et al. Association for Computational Linguistics, 4937\u20134951, (2020). https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.400","DOI":"10.18653\/v1\/2020.emnlp-main.400"},{"key":"953_CR51","unstructured":"Fey, M., et al.: \u201cPosition: Relational Deep Learning-Graph Representation Learning on Relational Databases\u201d. In: Forty-first International Conference on Machine Learning"},{"key":"953_CR52","doi-asserted-by":"publisher","first-page":"5799","DOI":"10.1109\/OJCOMS.2024.3456549","volume":"5","author":"O Friha","year":"2024","unstructured":"Friha, O., et al.: LLM-Based Edge Intelligence: A Comprehensive Survey on Architectures, Applications, Security and Trustworthiness. IEEE Open J. Commun. Soc. 5, 5799\u20135856 (2024). https:\/\/doi.org\/10.1109\/OJCOMS.2024.3456549","journal-title":"IEEE Open J. Commun. Soc."},{"key":"953_CR53","doi-asserted-by":"crossref","unstructured":"Fu, C., et al.: \u201cEnd-to-end multi-perspective matching for entity resolution\u201d. In: Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence. International Joint Conferences on Artificial Intelligence Organization. 4961, (2019)","DOI":"10.24963\/ijcai.2019\/689"},{"key":"953_CR54","doi-asserted-by":"publisher","unstructured":"Fu, H., et al.: \u201cCatSQL: Towards Real World Natural Language to SQL Applications\u201d. Proc. VLDB Endow. 16.6 , 1534\u20131547, (2023). https:\/\/doi.org\/10.14778\/3583140.3583165. https:\/\/www.vldb.org\/pvldb\/vol16\/p1534-fu.pdf","DOI":"10.14778\/3583140.3583165"},{"key":"953_CR55","doi-asserted-by":"publisher","unstructured":"Gao, D., et al.: \u201cText-to-SQL Empowered by Large Language Models: A Benchmark Evaluation\u201d. In: Proc. VLDB Endow. 17.5 , 1132\u20131145, (2024). https:\/\/doi.org\/10.14778\/3641204.3641221. https:\/\/www.vldb.org\/pvldb\/vol17\/p1132-gao.pdf","DOI":"10.14778\/3641204.3641221"},{"key":"953_CR56","unstructured":"Gao, Y., et al.: \u201cRetrieval-augmented generation for large language models: A survey\u201d. In: arXiv preprint arXiv:2312.10997 (2023)"},{"key":"953_CR57","doi-asserted-by":"crossref","unstructured":"Reyes, H.G., Giachetti, R.: \u201cUsing experts to develop a supply chain maturity model in Mexico\u201d. In: Supply Chain Management: An International Journal 15.6 , 415\u2013424, (2010)","DOI":"10.1108\/13598541011080400"},{"issue":"10","key":"953_CR58","doi-asserted-by":"publisher","first-page":"1587","DOI":"10.3390\/jpm12101587","volume":"12","author":"A Gerussi","year":"2022","unstructured":"Gerussi, A., et al.: LLM-PBC: Logic Learning Machine-based explainable rules accurately stratify the genetic risk of Primary Biliary Cholangitis. Journal of Personalized Medicine 12(10), 1587 (2022)","journal-title":"Journal of Personalized Medicine"},{"key":"953_CR59","unstructured":"Grand, G., et al.: \u201cLILO: Learning Interpretable Libraries by Compressing and Documenting Code\u201d. In: The Twelfth International Conference on Learning Representations, ICLR 2024, Vienna, Austria, May 7-11, 2024. OpenReview.net, (2024). https:\/\/openreview.net\/forum?id=TqYbAWKMIe"},{"key":"953_CR60","first-page":"507","volume":"35","author":"L Grinsztajn","year":"2022","unstructured":"Grinsztajn, L., Oyallon, E., Varoquaux, G.: Why do tree-based models still outperform deep learning on typical tabular data? Adv. Neural. Inf. Process. Syst. 35, 507\u2013520 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"2","key":"953_CR61","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3589292","volume":"1","author":"Z Gu","year":"2023","unstructured":"Gu, Z., et al.: Few-shot text-to-sql translation using structure and content prompt learning. Proceedings of the ACM on Management of Data 1(2), 1\u201328 (2023)","journal-title":"Proceedings of the ACM on Management of Data"},{"key":"953_CR62","unstructured":"Guyon, I., Elisseeff, A.: \u201cAn introduction to variable and feature selection\u201d. In: Journal of machine learning research 3.Mar , 1157\u20131182 (2003)"},{"key":"953_CR63","unstructured":"Hansen, A.A., Calissano, A., Feragen, A.: \u201cInterpreting Equivariant Representations\u201d. In: Forty-first International Conference on Machine Learning, ICML 2024, Vienna, Austria, July 21-27, 2024. OpenReview.net, (2024). https:\/\/openreview.net\/forum?id=vFk9fqXLst"},{"key":"953_CR64","doi-asserted-by":"crossref","unstructured":"Hastie, T., et al.: The elements of statistical learning: data mining, inference, and prediction. 2. Springer, (2009)","DOI":"10.1007\/978-0-387-84858-7"},{"key":"953_CR65","doi-asserted-by":"publisher","unstructured":"He, W., et al.: \u201cOptimizing Video Selection LIMIT Queries With Commonsense Knowledge\u201d. In: Proc. VLDB Endow. 17.7 , 1751\u20131764, (2024). https:\/\/doi.org\/10.14778\/3654621.3654639. https:\/\/www.vldb.org\/pvldb\/vol17\/p1751-he.pdf","DOI":"10.14778\/3654621.3654639"},{"key":"953_CR66","unstructured":"Hendrycks, D., et al.: \u201cMeasuring Coding Challenge Competence With APPS\u201d. In: Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks 1, NeurIPS Datasets and Benchmarks 2021, December 2021, virtual. Ed. by Joaquin Vanschoren and Sai-Kit Yeung. (2021). https:\/\/datasets-benchmarks-proceedings.neurips.cc\/paper\/2021\/hash\/c24cd76e1ce41366a4bbe8a49b02a028-Abstract-round2.html"},{"key":"953_CR67","unstructured":"Hollmann, N., M\u00fcller, S., Hutter, F.: \u201cLarge language models for automated data science: Introducing caafe for context-aware automated feature engineering\u201d. In: Advances in Neural Information Processing Systems 36 (2024)"},{"key":"953_CR68","unstructured":"Hollmann, N., M\u00fcller, S.G., Hutter, F.: \u201cLarge Language Models for Automated Data Science: Introducing CAAFE for Context-Aware Automated Feature Engineering\u201d. In: Neural Information Processing Systems. (2023). https:\/\/api.semanticscholar.org\/CorpusID:258547322"},{"key":"953_CR69","doi-asserted-by":"crossref","unstructured":"Hollmann, N., et al.: \u201cAccurate predictions on small data with a tabular foundation model\u201d. In: Nature 637.8045 , 319\u2013326 (2025)","DOI":"10.1038\/s41586-024-08328-6"},{"key":"953_CR70","unstructured":"Hongjin, S., et al.: \u201cBRIGHT: A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval\u201d. In: The Thirteenth International Conference on Learning Representations. (2024)"},{"key":"953_CR71","unstructured":"Hsu, J., et al.: \u201cWhat\u2019s Left? Concept Grounding with Logic-Enhanced Foundation Models\u201d. In: Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023. Ed. by Alice Oh et al. (2023). http:\/\/papers.nips.cc\/paper\/_files\/paper\/2023\/hash\/79fea214543ba263952ac3f4e5452b14-Abstract-Conference.html"},{"key":"953_CR72","unstructured":"Hu, E.J., et al.: \u201cAmortizing intractable inference in large language models\u201d. In: The Twelfth International Conference on Learning Representations"},{"key":"953_CR73","unstructured":"Hu, X., et al.: \u201cInfiAgent-DABench: Evaluating Agents on Data Analysis Tasks\u201d. In: Forty-first International Conference on Machine Learning, ICML 2024, Vienna, Austria, July 21-27, 2024. OpenReview.net, (2024). https:\/\/openreview.net\/forum?id=d5LURMSfTx"},{"key":"953_CR74","doi-asserted-by":"crossref","unstructured":"Huang, F., et al.: \u201cKOSA: KO enhanced salary analytics based on knowledge graph and LLM capabilities\u201d. In: 2023 IEEE International Conference on Data Mining Workshops (ICDMW). IEEE. (2023), 499\u2013505","DOI":"10.1109\/ICDMW60847.2023.00071"},{"key":"953_CR75","doi-asserted-by":"publisher","unstructured":"Huang, J., Chang, K.C.C.: \u201cTowards Reasoning in Large Language Models: A Survey\u201d. In: Findings of the Association for Computational Linguistics: ACL 2023, Toronto, Canada, July 9-14, 2023. Ed. by Anna Rogers, Jordan L. Boyd-Graber, and Naoaki Okazaki. Association for Computational Linguistics, (2023), 1049\u20131065. https:\/\/doi.org\/10.18653\/v1\/2023.findings-acl.67","DOI":"10.18653\/v1\/2023.findings-acl.67"},{"key":"953_CR76","doi-asserted-by":"publisher","unstructured":"Huang, L., et al.: \u201cA Survey on Hallucination in Large Language Models: Principles, Taxonomy, Challenges, and Open Questions\u201d. In: ACM Trans. Inf. Syst. 43.2 , 42:1\u201342:55, (2025). https:\/\/doi.org\/10.1145\/3703155","DOI":"10.1145\/3703155"},{"key":"953_CR77","unstructured":"Huh, J.S., Shin, C., Choi, E.: \u201cPool-search-demonstrate: Improving data-wrangling LLMs via better in-context examples\u201d. In: NeurIPS 2023 Second Table Representation Learning Workshop. (2023)"},{"key":"953_CR78","doi-asserted-by":"publisher","unstructured":"Ibrahim, N., et al.: \u201cA survey on augmenting knowledge graphs (KGs) with large language models (LLMs): models, evaluation metrics, ben-chmarks, and challenges\u201d. In: Discov. Artif. Intell. 4.1 , 76, (2024). https:\/\/doi.org\/10.1007\/s44163-024-00175-8","DOI":"10.1007\/s44163-024-00175-8"},{"key":"953_CR79","volume-title":"DAMA-DMBOK: Data management body of knowledge","author":"Dama International","year":"2017","unstructured":"Dama International: DAMA-DMBOK: Data management body of knowledge. Technics Publications, LLC (2017)"},{"key":"953_CR80","doi-asserted-by":"publisher","unstructured":"Jain, A., Sarawagi, S., Sen, P.: \u201cDeep Indexed Active Learning for Matching Heterogeneous Entity Representations\u201d. In: Proc. VLDB Endow. 15.1 , 31\u201345, (2021). https:\/\/doi.org\/10.14778\/3485450.3485455. http:\/\/www.vldb.org\/pvldb\/vol15\/p31-jain.pdf","DOI":"10.14778\/3485450.3485455"},{"key":"953_CR81","doi-asserted-by":"crossref","unstructured":"Jain, N., et al.: \u201cJigsaw: Large language models meet program synthesis\u201d. In: Proceedings of the 44th International Conference on Software Engineering. 1219\u20131231 (2022)","DOI":"10.1145\/3510003.3510203"},{"key":"953_CR82","unstructured":"Jain, N., et al.: \u201cR2E: Turning any Github Repository into a Programming Agent Environment\u201d. In: Forty-first International Conference on Machine Learning. (2024)"},{"key":"953_CR83","unstructured":"Jeon, H.J., et al.: \u201cAn Information-Theoretic Analysis of In-Context Learning\u201d. In: Forty-first International Conference on Machine Learning, ICML 2024, Vienna, Austria, July 21-27, 2024. OpenReview.net, (2024). https:\/\/openreview.net\/forum?id=NQn2tYLv5I"},{"key":"953_CR84","doi-asserted-by":"crossref","unstructured":"Jha, S.K., et al.: \u201cCounterexample guided inductive synthesis using large language models and satisfiability solving\u201d. In: MILCOM 2023-2023 IEEE Military Communications Conference (MILCOM). IEEE. 944\u2013949 (2023)","DOI":"10.1109\/MILCOM58377.2023.10356332"},{"key":"953_CR85","doi-asserted-by":"crossref","unstructured":"Ji, Z., et al.: \u201cSurvey of Hallucination in Natural Language Generation\u201d. ACM Computing Surveys 55 , 1 \u201338. (2022) https:\/\/api.semanticscholar.org\/CorpusID:246652372","DOI":"10.1145\/3571730"},{"key":"953_CR86","unstructured":"Jiang, C., et al.: \u201cLLMOPT: Learning to Define and Solve General Optimization Problems from Scratch\u201d. In: The Thirteenth International Conference on Learning Representations, ICLR 2025, Singapore, April 24-28, 2025. (2025). https:\/\/openreview.net\/forum?id=9OMvtboTJg"},{"key":"953_CR87","unstructured":"Johnson, T., Lakshmanan, L.V., Ng, R.T.: \u201cThe 3W Model and Algebra for Unified Data Mining\u201d. In: VLDB 2000, Proceedings of 26th International Conference on Very Large Data Bases, September 10-14, 2000, Cairo, Egypt. Ed. by Amr El Abbadi et al. Morgan Kaufmann, , 21\u201332, (2000) http:\/\/www.vldb.org\/conf\/2000\/P021.pdf"},{"key":"953_CR88","unstructured":"Jong, M.d., et al.: \u201cMention Memory: incorporating textual knowledge into Transformers through entity mention attention\u201d. In: The Tenth International Conference on Learning Representations, ICLR 2022, Virtual Event, April 25-29, 2022. OpenReview.net, (2022). https:\/\/openreview.net\/forum?id=OY1A8ejQgEX"},{"key":"953_CR89","doi-asserted-by":"crossref","unstructured":"Kaelbling, L.P., Littman, M.L., Moore, A.W.: \u201cReinforcement learning: A survey\u201d. In: Journal of artificial intelligence research 4 ,237\u2013285 (1996)","DOI":"10.1613\/jair.301"},{"key":"953_CR90","doi-asserted-by":"publisher","first-page":"1417","DOI":"10.1162\/tacl_a_00713","volume":"12","author":"R Kamoi","year":"2024","unstructured":"Kamoi, R., et al.: When can llms actually correct their own mistakes? a critical survey of self-correction of llms. Transactions of the Association for Computational Linguistics 12, 1417\u20131440 (2024)","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"953_CR91","unstructured":"Kang, M., et al.: \u201cC-RAG: Certified Generation Risks for Retrieval-Augmented Language Models\u201d. In: Forty-first International Conference on Machine Learning, ICML 2024, Vienna, Austria, July 21-27, 2024. OpenReview.net, (2024). https:\/\/openreview.net\/forum?id=FMa4c5NhOe"},{"key":"953_CR92","doi-asserted-by":"publisher","unstructured":"Kasai, J., et al.: \u201cLow-resource Deep Entity Resolution with Transfer and Active Learning\u201d. In: Proceedings of the 57th Conference of the Association for Computational Linguistics, ACL 2019, Florence, Italy, July 28- August 2, 2019, Volume 1: Long Papers. Ed. by Anna Korhonen, David R. Traum, and Llu\u00eds M\u00e0rquez. Association for Computational Linguistics, 5851\u20135861, (2019) https:\/\/doi.org\/10.18653\/v1\/p19-1586","DOI":"10.18653\/v1\/p19-1586"},{"key":"953_CR93","unstructured":"Kaufmann, T., et al.: \u201cA Survey of Reinforcement Learning from Human Feedback\u201d. In: ArXiv abs\/2312.14925 (2023). https:\/\/api.semanticscholar.org\/CorpusID:266521540"},{"key":"953_CR94","doi-asserted-by":"publisher","unstructured":"Kayali, M., et al.: \u201cCHORUS: Foundation Models for Unified Data Discovery and Exploration\u201d. In: Proc. VLDB Endow. 17.8 , 2104\u20132114, (2024). https:\/\/doi.org\/10.14778\/3659437.3659461. https:\/\/www.vldb.org\/pvldb\/vol17\/p2104-kayali.pdf","DOI":"10.14778\/3659437.3659461"},{"key":"953_CR95","doi-asserted-by":"publisher","unstructured":"Kazmierczak, R., et al.: \u201cExplainability and vision foundation models: A survey\u201d. In: Inf. Fusion 122 ,103184, (2025). https:\/\/doi.org\/10.1016\/j.inffus.2025.103184","DOI":"10.1016\/j.inffus.2025.103184"},{"key":"953_CR96","doi-asserted-by":"crossref","unstructured":"Kenett, R.S., Shmueli, G.: Information quality: The potential of data and analytics to generate knowledge. John Wiley & Sons, (2016)","DOI":"10.1002\/9781118890622"},{"key":"953_CR97","doi-asserted-by":"publisher","unstructured":"Kenthapadi, K., Sameki, M., Taly, A.: \u201cGrounding and Evaluation for Large Language Models: Practical Challenges and Lessons Learned (Survey)\u201d. In: Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, KDD 2024, Barcelona, Spain, August 25-29, 2024. Ed. by Ricardo Baeza-Yates and Francesco Bonchi. ACM, 6523\u20136533, (2024). https:\/\/doi.org\/10.1145\/3637528.3671467","DOI":"10.1145\/3637528.3671467"},{"key":"953_CR98","unstructured":"Khakhar, A., Mell, S., Bastani, O.: \u201cPAC Prediction Sets for Large Language Models of Code\u201d. In: International Conference on Machine Learning. (2023). https:\/\/api.semanticscholar.org\/CorpusID:257020011"},{"key":"953_CR99","doi-asserted-by":"crossref","unstructured":"Khoshafian, S.N., Copeland, G.P.: \u201cObject identity\u201d. In: ACM SIGPLAN Notices 21.11 , 406\u2013416 (1986)","DOI":"10.1145\/960112.28739"},{"key":"953_CR100","first-page":"39648","volume":"36","author":"G Kim","year":"2023","unstructured":"Kim, G., Baldi, P., McAleer, S.: Language models can solve computer tasks. Adv. Neural. Inf. Process. Syst. 36, 39648\u201339677 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"953_CR101","unstructured":"Ko, H., et al.: \u201cFilling in the Gaps: LLM-Based Structured Data Generation from Semi-Structured Scientific Data\u201d. In: ICML 2024 AI for Science Workshop. (2024)"},{"key":"953_CR102","doi-asserted-by":"crossref","unstructured":"Konda, P., et al.: \u201cMagellan: toward building entity matching management systems over data science stacks\u201d. In: Proceedings of the VLDB Endowment 9.13 , 1581\u20131584 (2016)","DOI":"10.14778\/3007263.3007314"},{"key":"953_CR103","unstructured":"Konda, P.V.: Magellan: Toward building entity matching management systems. The University of Wisconsin-Madison, (2018)"},{"key":"953_CR104","doi-asserted-by":"publisher","unstructured":"Koziolek, H., et al.: \u201cLLM-based and Retrieval-Augmented Control Code Generation\u201d. In: LLM4CODE@ICSE. , 22\u201329, (2024). https:\/\/doi.org\/10.1145\/3643795.3648384","DOI":"10.1145\/3643795.3648384"},{"key":"953_CR105","doi-asserted-by":"publisher","unstructured":"Kumar, P.: \u201cLarge language models (LLMs): survey, technical frameworks, and future challenges\u201d. In: Artif. Intell. Rev. 57.9 ,260, (2024). https:\/\/doi.org\/10.1007\/s10462-024-10888-y","DOI":"10.1007\/s10462-024-10888-y"},{"key":"953_CR106","unstructured":"Lai, Y., et al.: \u201cDS-1000: A natural and reliable benchmark for data science code generation\u201d. In: International Conference on Machine Learning. PMLR. , 18319\u201318345 (2023)"},{"key":"953_CR107","doi-asserted-by":"publisher","unstructured":"Lao, J., et al.: \u201cGPTuner: A Manual-Reading Database Tuning System via GPT-Guided Bayes-ian Optimization\u201d. In: Proc. VLDB Endow. 17.8 , 1939\u20131952, (2024). https:\/\/doi.org\/10.14778\/3659437.3659449. https:\/\/www.vldb.org\/pvldb\/vol17\/p1939-tang.pdf","DOI":"10.14778\/3659437.3659449"},{"key":"953_CR108","unstructured":"Launer, R.L., Siegel, A.F.: Modern data analysis. Academic Press, (2014)"},{"key":"953_CR109","doi-asserted-by":"crossref","unstructured":"Lehmann, J., Ferr\u00e9, S., Vahdati, S.: \u201cLanguage Models as Controlled Natural Language Semantic Parsers for Knowledge Graph Question Answering\u201d. In: European Conference on Artificial Intelligence. (2023). https:\/\/api.semanticscholar.org\/CorpusID:260844053","DOI":"10.3233\/FAIA230411"},{"key":"953_CR110","doi-asserted-by":"publisher","unstructured":"Li, B., et al.: \u201cThe Dawn of Natural Language to SQL: Are We Fully Ready? [Experiment, Analysis & Benchmark ]\u201d. In: Proc. VLDB Endow. 17.11 , 3318\u20133331, (2024). https:\/\/doi.org\/10.14778\/3681954.3682003. https:\/\/www.vldb.org\/pvldb\/vol17\/p3318-luo.pdf","DOI":"10.14778\/3681954.3682003"},{"issue":"11","key":"953_CR111","doi-asserted-by":"publisher","first-page":"13067","DOI":"10.1609\/aaai.v37i11.26535","volume":"37","author":"H Li","year":"2023","unstructured":"Li, H., et al.: Resdsql: Decoupling schema linking and skeleton parsing for text-to-sql. Proceedings of the AAAI Conference on Artificial Intelligence. 37(11), 13067\u201313075 (2023)","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence."},{"key":"953_CR112","unstructured":"Li, J., et al.: \u201cCan llm already serve as a database interface? a big bench for large-scale database grounded text-to-sqls\u201d. In: Advances in Neural Information Processing Systems 36 (2024)"},{"key":"953_CR113","doi-asserted-by":"crossref","unstructured":"Li, P., et al.: \u201cTable-gpt: Table fine-tuned gpt for diverse table tasks\u201d. In: Proceedings of the ACM on Management of Data 2.3 , 1\u201328 (2024)","DOI":"10.1145\/3654979"},{"key":"953_CR114","unstructured":"Li, W.D., Ellis, K.: \u201cIs Programming by Example Solved by LLMs?\u201d In: Advances in Neural Information Processing Systems 38: Annual Conference on Neural Information Processing Systems 2024, NeurIPS 2024, Vancouver, BC, Canada, December 10 - 15, 2024. Ed. by Amir Globersons et al. (2024). http:\/\/papers.nips.cc\/paper\/_files\/paper\/2024\/hash\/4eff61b79274124bc71efe2ee9772f95-Abstract-Conference.html"},{"key":"953_CR115","doi-asserted-by":"publisher","unstructured":"Li, X., D\u00f6hmen, T.: \u201cTowards Efficient Data Wrangling with LLMs using Code Generation\u201d. In: Proceedings of the Eighth Workshop on Data Management for End-to-End Machine Learning, DEEM 2024, Santiago, AA, Chile, 9 June 2024. ACM, 62\u201366 (2024) https:\/\/doi.org\/10.1145\/3650203.3663334","DOI":"10.1145\/3650203.3663334"},{"key":"953_CR116","doi-asserted-by":"crossref","unstructured":"Li, X., D\u00f6hmen, T.: \u201cTowards Efficient Data Wrangling with LLMs using Code Generation\u201d. In: Proceedings of the Eighth Workshop on Data Management for End-to-End Machine Learning. 62\u201366 (2024)","DOI":"10.1145\/3650203.3663334"},{"key":"953_CR117","doi-asserted-by":"publisher","unstructured":"Li, Y., et al.: \u201cDeep Entity Matching with Pre-Trained Language Models\u201d. In: Proc. VLDB Endow. 14.1 ,50\u201360 (2020) https:\/\/doi.org\/10.14778\/3421424.3421431. http:\/\/www.vldb.org\/pvldb\/vol14\/p50-li.pdf","DOI":"10.14778\/3421424.3421431"},{"key":"953_CR118","doi-asserted-by":"crossref","unstructured":"Li, Z., et al.: \u201cFlexkbqa: A flexible llm-powered framework for few-shot knowledge base question answering\u201d. In: Proceedings of the AAAI Conference on Artificial Intelligence. 38. 17., 18608\u201318616 ( 2024)","DOI":"10.1609\/aaai.v38i17.29823"},{"key":"953_CR119","doi-asserted-by":"publisher","unstructured":"Liang, Y., et al.: \u201cFoundation Models for Time Series Analysis: A Tutorial and Survey\u201d. In: Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, KDD 2024, Barcelona, Spain, August 25-29, 2024. Ed. by Ricardo Baeza-Yates and Francesco Bonchi. ACM, 6555\u20136565, (2024). https:\/\/doi.org\/10.1145\/3637528.3671451","DOI":"10.1145\/3637528.3671451"},{"key":"953_CR120","doi-asserted-by":"publisher","unstructured":"Liang, Z., et al.: \u201c Tailoring the Shapley Value for In-Context Example Selection Towards Data Wrangling \u201d. In: 2025 IEEE 41st International Conference on Data Engineering (ICDE). Los Alamitos, CA, USA: IEEE Computer Society, May ,2281\u20132294, (2025) . https:\/\/doi.org\/10.1109\/ICDE65448.2025.00173","DOI":"10.1109\/ICDE65448.2025.00173"},{"key":"953_CR121","unstructured":"Lin, Y., et al.: \u201cSMARTFEAT: Efficient Feature Construction through Feature-Level Foundation Model Interactions\u201d. In: 14th Conference on Innovative Data Systems Research, CIDR 2024, Chaminade, HI, USA, January 14-17, 2024. (2024)"},{"key":"953_CR122","doi-asserted-by":"crossref","unstructured":"Little, R.J., Rubin, D.B.: Statistical analysis with missing data. 793. John Wiley & Sons, (2019)","DOI":"10.1002\/9781119482260"},{"key":"953_CR123","doi-asserted-by":"publisher","unstructured":"Liu, F., et al.: \u201cFew-shot adaptation of multi-modal foundation models: a survey\u201d. In: Artif. Intell. Rev. 57.10 , 268, (2024). https:\/\/doi.org\/10.1007\/s10462-024-10915-y","DOI":"10.1007\/s10462-024-10915-y"},{"key":"953_CR124","doi-asserted-by":"publisher","unstructured":"Liu, P., et al.: \u201cPre-train, Prompt, and Predict: A Systematic Survey of Prompting Methods in Natural Language Processing\u201d. In: ACM Comput. Surv. 55.9 , 195:1\u2013195:35, (2023). https:\/\/doi.org\/10.1145\/3560815","DOI":"10.1145\/3560815"},{"key":"953_CR125","doi-asserted-by":"crossref","unstructured":"Liu, S.C., et al.: \u201cJarviX: A LLM no code platform for tabular data analysis and optimization\u201d. In: Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track. , 622\u2013630 (2023)","DOI":"10.18653\/v1\/2023.emnlp-industry.59"},{"key":"953_CR126","unstructured":"Liu, X., et al.: \u201cEnhancing Large Language Models with Multimodality and Knowledge Graphs for Hallucination-free Open-set Object Recognition\u201d. In: Proceedings of Workshops at the 50th International Conference on Very Large Data Bases, VLDB 2024, Guangzhou, China, August 26-30, 2024. VLDB.org, (2024). https:\/\/vldb.org\/workshops\/2024\/proceedings\/LLM+KG\/LLM+KG-11.pdf"},{"key":"953_CR127","doi-asserted-by":"crossref","unstructured":"Loem, M., Kaneko, M., Okazaki, N.: \u201cSAIE Framework: Support Alone Isn\u2019t Enough - Advancing LLM Training with Adversarial Remarks\u201d. In: European Conference on Artificial Intelligence. (2023). https:\/\/api.semanticscholar.org\/CorpusID:265157830","DOI":"10.3233\/FAIA240931"},{"key":"953_CR128","doi-asserted-by":"publisher","unstructured":"Long, L., et al.: \u201cOn LLMs-Driven Synthetic Data Generation, Curation, and Evaluation: A Survey\u201d. In: Findings of the Association for Computational Linguistics, ACL 2024, Bangkok, Thailand and virtual meeting, August 11-16, 2024. Ed. by Lun-Wei Ku, Andre Martins, and Vivek Srikumar. Association for Computational Linguistics , 11065\u201311082, (2024) https:\/\/doi.org\/10.18653\/v1\/2024.findings-acl.658","DOI":"10.18653\/v1\/2024.findings-acl.658"},{"key":"953_CR129","doi-asserted-by":"crossref","unstructured":"Lu, M., Delaware, B., Zhang, T.: \u201cProof automation with large language models\u201d. In: Proceedings of the 39th IEEE\/ACM International Conference on Automated Software Engineering. 1509\u20131520 (2024)","DOI":"10.1145\/3691620.3695521"},{"key":"953_CR130","doi-asserted-by":"publisher","unstructured":"Lu, W., et al.: \u201cLarge language model for table processing: a survey\u201d. In: Frontiers Comput. Sci. 19.2 , 192350. (2025) https:\/\/doi.org\/10.1007\/s11704-024-40763-6","DOI":"10.1007\/s11704-024-40763-6"},{"key":"953_CR131","doi-asserted-by":"publisher","unstructured":"Lu, Y., et al.: \u201cFantastically Ordered Prompts and Where to Find Them: Overcoming Few-Shot Prompt Order Sensitivity\u201d. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), ACL 2022, Dublin, Ireland, May 22-27, 2022. Ed. by Smaranda Muresan, Preslav Nakov, and Aline Villavicencio. Association for Computational Linguistics, , 8086\u20138098 (2022) https:\/\/doi.org\/10.18653\/v1\/2022.acl-long.556","DOI":"10.18653\/v1\/2022.acl-long.556"},{"key":"953_CR132","doi-asserted-by":"publisher","unstructured":"Lu, Y., et al.: \u201cPre-training Summarization Models of Structured Datasets for Cardinality Estimation\u201d. In: Proc. VLDB Endow. 15.3 , 414\u2013426, (2021) https:\/\/doi.org\/10.14778\/3494124.3494127. http:\/\/www.vldb.org\/pvldb\/vol15\/p414-lu.pdf","DOI":"10.14778\/3494124.3494127"},{"key":"953_CR133","unstructured":"Lyu, M.R., et al.: \u201cAutomatic programming: Large language models and beyond\u201d. In: ACM Transactions on Software Engineering and Methodology (2024)"},{"key":"953_CR134","doi-asserted-by":"crossref","unstructured":"Ma, P., et al.: \u201cInsightPilot: An LLM-empowered automated data exploration system\u201d. In: Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: System Demonstrations. , 346\u2013352 (2023)","DOI":"10.18653\/v1\/2023.emnlp-demo.31"},{"key":"953_CR135","first-page":"46534","volume":"36","author":"A Madaan","year":"2023","unstructured":"Madaan, A., et al.: Self-refine: Iterative refinement with self-feedback. Adv. Neural. Inf. Process. Syst. 36, 46534\u201346594 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"953_CR136","doi-asserted-by":"publisher","unstructured":"Madan, N., et al.: \u201cFoundation Models for Video Understanding: A Survey\u201d. In: CoRR abs\/2405.03770 (2024). https:\/\/doi.org\/10.48550\/arXiv.2405.03770","DOI":"10.48550\/arXiv.2405.03770"},{"key":"953_CR137","doi-asserted-by":"crossref","unstructured":"Mahdavi, M., et al.: \u201cRaha: A configurat-ion-free error detection system\u201d. In: Proceedings of the 2019 International Conference on Management of Data. , 865\u2013882 (2019)","DOI":"10.1145\/3299869.3324956"},{"key":"953_CR138","doi-asserted-by":"crossref","unstructured":"Malberg, S., Mosca, E., Groh, G.: \u201cFELIX: Automatic and interpretable feature engineering using llms\u201d. In: Joint European Conference on Machine Learning and Knowledge Discovery in Databases. Springer. , 230\u2013246 (2024)","DOI":"10.1007\/978-3-031-70359-1_14"},{"key":"953_CR139","doi-asserted-by":"publisher","unstructured":"Mao, Y., et al.: \u201cA survey on LoRA of large language models\u201d. In: Frontiers Comput. Sci. 19.7 ,197605, (2025). https:\/\/doi.org\/10.1007\/s11704-024-40663-9","DOI":"10.1007\/s11704-024-40663-9"},{"key":"953_CR140","first-page":"76336","volume":"36","author":"D McElfresh","year":"2023","unstructured":"McElfresh, D., et al.: When do neural nets outperform boosted trees on tabular data? Adv. Neural. Inf. Process. Syst. 36, 76336\u201376369 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"953_CR141","unstructured":"Miao, N., et al.: \u201cLearning Instance-Specific Augmentations by Capturing Local Invariances\u201d. In: International Conference on Machine Learning. (2022). https:\/\/api.semanticscholar.org\/CorpusID:258967766"},{"key":"953_CR142","doi-asserted-by":"publisher","unstructured":"Morris, C., Jurado, M., Zutty, J.: \u201cLLM Guided Evolution - The Automation of Models Advancing Models\u201d. In: Proceedings of the Genetic and Evolutionary Computation Conference, GECCO 2024, Melbourne, VIC, Australia, July 14-18, 2024. Ed. by Xiaodong Li and Julia Handl. ACM, (2024). https:\/\/doi.org\/10.1145\/3638529.3654178","DOI":"10.1145\/3638529.3654178"},{"key":"953_CR143","doi-asserted-by":"crossref","unstructured":"Mudgal, S., et al.: \u201cDeep learning for entity matching: A design space exploration\u201d. In: Proceedings of the 2018 international conference on management of data. , 19\u201334 (2018)","DOI":"10.1145\/3183713.3196926"},{"key":"953_CR144","doi-asserted-by":"publisher","unstructured":"Mueller, A., et al.: \u201cGAMformer: In-Context Learning for Generalized Additive Models\u201d. In: CoRR abs\/2410.04560 (2024). https:\/\/doi.org\/10.48550\/arXiv.2410.04560","DOI":"10.48550\/arXiv.2410.04560"},{"key":"953_CR145","unstructured":"Mueller, A.C., Curino, C., Ramakrishnan, R.: \u201cMotherNet: Fast Training and Inference via Hyper-Network Transformers\u201d. In: The Thirteenth International Conference on Learning Representations, ICLR 2025, Singapore, April 24-28, 2025. OpenReview.net, (2025). https:\/\/openreview.net\/forum?id=6H4jRWKFc3"},{"key":"953_CR146","doi-asserted-by":"crossref","unstructured":"Myatt, G.J: Making sense of data: a practical guide to exploratory data analysis and data mining. John Wiley & Sons, (2007)","DOI":"10.1002\/0470101024"},{"key":"953_CR147","unstructured":"Nagler, T.: \u201cStatistical Foundations of Prior-Data Fitted Networks\u201d. In: International Conference on Machine Learning, ICML 2023, 23-29 July 2023, Honolulu, Hawaii, USA. Ed. by Andreas Krause et al. Vol.\u00a0202. Proceedings of Machine Learning Research. PMLR, 25660\u201325676. (2023) https:\/\/proceedings.mlr.press\/v202\/nagler23a.html"},{"key":"953_CR148","doi-asserted-by":"crossref","unstructured":"Nam, D., et al.: \u201cUsing an llm to help with code understanding\u201d. In: Proceedings of the IEEE\/ACM 46th International Conference on Software Engineering. 1\u201313 (2024)","DOI":"10.1145\/3597503.3639187"},{"key":"953_CR149","first-page":"92352","volume":"37","author":"J Nam","year":"2024","unstructured":"Nam, J., et al.: Optimized feature generation for tabular data via llms with decision tree reasoning. Adv. Neural. Inf. Process. Syst. 37, 92352\u201392380 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"953_CR150","doi-asserted-by":"publisher","unstructured":"Narayan, A., et al.: \u201cCan Foundation Models Wrangle Your Data?\u201d In: Proc. VLDB Endow. 16.4 , 738\u2013746. (2022) https:\/\/doi.org\/10.14778\/3574245.3574258. https:\/\/www.vldb.org\/pvldb\/vol16\/p738-narayan.pdf","DOI":"10.14778\/3574245.3574258"},{"key":"953_CR151","doi-asserted-by":"publisher","unstructured":"Narayan, A., et al.: \u201cCan Foundation Models Wrangle Your Data?\u201d In: Proc. VLDB Endow. 16.4 ,738\u2013746, (2022). https:\/\/doi.org\/10.14778\/3574245.3574258. https:\/\/www.vldb.org\/pvldb\/vol16\/p738-narayan.pdf","DOI":"10.14778\/3574245.3574258"},{"key":"953_CR152","unstructured":"Neu, G., Lugosi, G.: \u201cGeneralization Bounds via Convex Analysis\u201d. In: Annual Conference Computational Learning Theory. (2022). https:\/\/api.semanticscholar.org\/CorpusID:246706121"},{"key":"953_CR153","doi-asserted-by":"crossref","unstructured":"Ngom, A.L., Kraska, T.: \u201cMallet: SQL Dialect Translation with LLM Rule Generation\u201d. In: Proceedings of the Seventh International Workshop on Exploiting Artificial Intelligence Techniques for Data Management. 1\u20135 (2024)","DOI":"10.1145\/3663742.3663973"},{"key":"953_CR154","doi-asserted-by":"publisher","unstructured":"Ni, W., et al.: \u201c ZeroED: Hybrid Zero-shot Error Detection through Large Language Model Reasoning \u201d. In: 2025 IEEE 41st International Conference on Data Engineering (ICDE). Los Alamitos, CA, USA: IEEE Computer Society, May , 3126\u20133139, (2025). https:\/\/doi.org\/10.1109\/ICDE65448.2025.00234","DOI":"10.1109\/ICDE65448.2025.00234"},{"key":"953_CR155","doi-asserted-by":"crossref","unstructured":"Ni, W., et al.: \u201cIterClean: An Iterative Data Cleaning Framework with Large Language Models\u201d. In: Proceedings of the ACM Turing Award Celebration Conference-China 2024. , 100\u2013105 (2024)","DOI":"10.1145\/3674399.3674436"},{"key":"953_CR156","unstructured":"Nie, J., et al.: \u201cKnowledge Graph Efficient Construction: Embedding Chain-of-Thought into LLMs\u201d. In: Proceedings of Workshops at the 50th International Conference on Very Large Data Bases, VLDB 2024, Guangzhou, China, August 26-30, 2024. VLDB.org, (2024). https:\/\/vldb.org\/workshops\/2024\/proceedings\/LLM+KG\/LLM+KG-4.pdf"},{"key":"953_CR157","unstructured":"Nisbet, R., Elder, J., Miner, G.D.: Handbook of statistical analysis and data mining applications. Academic press, (2009)"},{"key":"953_CR158","doi-asserted-by":"crossref","unstructured":"Nobari, A.D., Rafiei, D.: \u201cDTT: An Example-Driven Tabular Transformer for Joinability by Leveraging Large Language Models\u201d. In: Proceedings of the ACM on Management of Data 2 , 1 \u201324. (2023) https:\/\/api.semanticscholar.org\/CorpusID:257495879","DOI":"10.1145\/3639279"},{"key":"953_CR159","unstructured":"Olson, J.E.: Data quality: the accuracy dimension. Elsevier, (2003)"},{"key":"953_CR160","unstructured":"Pal, K., et al.: \u201cALT-GEN: Benchmarking Table Union Search using Large Language Models\u201d. In: Proceedings of Workshops at the 50th International Conference on Very Large Data Bases, VLDB 2024, Guangzhou, China, August 26-30, 2024. VLDB.org, (2024). https:\/\/vldb.org\/workshops\/2024\/proceedings\/TaDA\/TaDA.3.pdf"},{"key":"953_CR161","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1162\/tacl\/_a\/_00660","volume":"12","author":"L Pan","year":"2024","unstructured":"Pan, L., et al.: Automatically Correcting Large Language Models: Surveying the Landscape of Diverse Automated Correction Strategies. Trans. Assoc. Comput. Linguistics 12, 484\u2013506 (2024). https:\/\/doi.org\/10.1162\/tacl\/_a\/_00660","journal-title":"Trans. Assoc. Comput. Linguistics"},{"key":"953_CR162","unstructured":"Parciak, M., et al.: \u201cSchema Matching with Large Language Models: an Experimental Study\u201d. In: Proceedings of Workshops at the 50th International Conference on Very Large Data Bases, VLDB 2024, Guangzhou, China, August 26-30, 2024. VLDB.org, (2024). https:\/\/vldb.org\/workshops\/2024\/proceedings\/TaDA\/TaDA.8.pdf"},{"key":"953_CR163","doi-asserted-by":"publisher","unstructured":"Parry, A., Ganguly, D., Chandra, M.: \u201c\u00a0\u201dIn-Context Learning\u201d or: How I learned to stop worrying and love \u201dApplied Information Retrieval\u201d\u00a0\u201d. In: Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR 2024, Washington DC, USA, July 14-18, 2024. Ed. by Grace Hui Yang et al. ACM, 14\u201325, (2024).https:\/\/doi.org\/10.1145\/3626772.3657842","DOI":"10.1145\/3626772.3657842"},{"key":"953_CR164","unstructured":"Pearl, J., Mackenzie, D.: The book of why: the new science of cause and effect. Basic books, (2018)"},{"key":"953_CR165","unstructured":"Pei, K., et al.: \u201cCan Large Language Models Reason about Program Invariants?\u201d In: International Conference on Machine Learning. (2023). https:\/\/api.semanticscholar.org\/CorpusID:260871141"},{"key":"953_CR166","doi-asserted-by":"crossref","unstructured":"Peng, J., et al.: \u201cSelf-supervised and interpretable data cleaning with sequence generative adversarial networks\u201d. In: Proceedings of the VLDB Endowment 16.3 , 433\u2013446 (2022)","DOI":"10.14778\/3570690.3570694"},{"key":"953_CR167","unstructured":"Popov, S., Morozov, S., Babenko, A.: \u201cNeural Oblivious Decision Ensembles for Deep Learning on Tabular Data\u201d. In: 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, April 26-30, 2020. OpenReview.net, (2020). https:\/\/openreview.net\/forum?id=r1eiu2VtwH"},{"key":"953_CR168","unstructured":"Provost, F.: Data Science for Business: What you need to know about data mining and data-analytic thinking. 355. O\u2019Reilly Media, Inc, (2013)"},{"key":"953_CR169","unstructured":"Qi, D., Wang, J.: \u201cCleanAgent: Automating Data Standardization with LLM-based Agents\u201d. In: arXiv preprint arXiv:2403.08291 (2024)"},{"key":"953_CR170","doi-asserted-by":"publisher","unstructured":"Qin, Y., et al.: \u201cTool Learning with Foundation Models\u201d. In: ACM Comput. Surv. 57.4 , 101:1\u2013101:40. (2025) https:\/\/doi.org\/10.1145\/3704435","DOI":"10.1145\/3704435"},{"key":"953_CR171","doi-asserted-by":"publisher","unstructured":"Qu, C., et al.: \u201cTool learning with large language models: a survey\u201d. In: Frontiers Comput. Sci. 19.8 , 198343 (2025) https:\/\/doi.org\/10.1007\/s11704-024-40678-2","DOI":"10.1007\/s11704-024-40678-2"},{"key":"953_CR172","doi-asserted-by":"publisher","unstructured":"Qu, Y., et al.: \u201cThe Frontier of Data Erasure: A Survey on Machine Unlearning for Large Language Models\u201d. In: Computer 58.1 , 45\u201357. (2025) https:\/\/doi.org\/10.1109\/MC.2024.3405397","DOI":"10.1109\/MC.2024.3405397"},{"key":"953_CR173","doi-asserted-by":"publisher","unstructured":"Raihan, N., Newman, C.D., Zampieri, M.: \u201cCode LLMs: A Taxonomy-based Survey\u201d. In: IEEE International Conference on Big Data, BigData 2024, Washington, DC, USA, December 15-18, 2024. Ed. by Wei Ding et al. IEEE, 5402\u20135411, (2024) https:\/\/doi.org\/10.1109\/BigData62323.2024.10826108","DOI":"10.1109\/BigData62323.2024.10826108"},{"key":"953_CR174","unstructured":"Reizinger, P., et al.: \u201cPosition: Understanding LLMs Requires More Than Statistical Generalization\u201d. In: Forty-first International Conference on Machine Learning"},{"key":"953_CR175","unstructured":"Reizinger, P., et al.: \u201cPosition: Understanding LLMs Requires More Than Statistical Generalization\u201d. In: Forty-first International Conference on Machine Learning, ICML 2024, Vienna, Austria, July 21-27, 2024. OpenReview.net, (2024). https:\/\/openreview.net\/forum?id=pVyOchWUBa"},{"key":"953_CR176","doi-asserted-by":"publisher","unstructured":"Ren, X., et al.: \u201cA Survey of Large Language Models for Graphs\u201d. In: Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, KDD 2024, Barcelona, Spain, August 25-29, 2024. Ed. by Ricardo Baeza-Yates and Francesco Bonchi. ACM, 6616\u20136626, (2024). https:\/\/doi.org\/10.1145\/3637528.3671460","DOI":"10.1145\/3637528.3671460"},{"key":"953_CR177","doi-asserted-by":"publisher","unstructured":"Rodr\u00edguez-Carbonell, E., Kapur, D.: \u201cGenerating all polynomial invariants in simple loops\u201d. In: J. Symb. Comput. 42.4 , 443\u2013476, (2007) https:\/\/doi.org\/10.1016\/j.jsc.2007.01.002","DOI":"10.1016\/j.jsc.2007.01.002"},{"key":"953_CR178","doi-asserted-by":"crossref","unstructured":"Salehin, I., et al.: \u201cAutoML: A systematic review on automated machine learning with neural architecture search\u201d. In: Journal of Information and Intelligence 2.1 , 52\u201381 (2024)","DOI":"10.1016\/j.jiixd.2023.10.002"},{"key":"953_CR179","unstructured":"Sayed, E., et al.: \u201cGizaML: A Collaborative Meta-learning Based Framework Using LLM For Automated Time-Series Forecasting.\u201d In: EDBT. 830\u2013833 (2024)"},{"key":"953_CR180","unstructured":"Sch\u00e4fer, M., et al.: \u201cAn empirical evaluation of using large language models for automated unit test generation\u201d. In: IEEE Transactions on Software Engineering (2023)"},{"key":"953_CR181","unstructured":"Schumann, J.M.: Automated theorem proving in software engineering. Springer Science & Business Media, (2013)"},{"key":"953_CR182","unstructured":"Shao, Z., et al.: \u201cSynthetic prompting: Generating chain-of-thought demonstrations for large language models\u201d. In: International Conference on Machine Learning. PMLR. , 30706\u201330775 (2023)"},{"key":"953_CR183","unstructured":"Shi, F., et al.: \u201cLarge Language Models Can Be Easily Distracted by Irrelevant Context\u201d. In: International Conference on Machine Learning. (2023). https:\/\/api.semanticscholar.org\/CorpusID:256459776"},{"key":"953_CR184","unstructured":"Shinn, N., et al.: \u201cReflexion: language agents with verbal reinforcement learning\u201d. In: Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023. Ed. by Alice Oh et al. (2023). http:\/\/papers.nips.cc\/paper\/_files\/paper\/2023\/hash\/1b44b878bb782e6954cd888628510e90-Abstract-Conference.html"},{"key":"953_CR185","doi-asserted-by":"crossref","unstructured":"Shmueli, G., Koppius, O.R.: \u201cPredictive analytics in information systems research\u201d. In: MIS quarterly , 553\u2013572 (2011)","DOI":"10.2307\/23042796"},{"key":"953_CR186","doi-asserted-by":"publisher","unstructured":"Siam, M.K., Gu, H., Cheng, J.Q.: \u201cProgramming with AI: Evaluating ChatGPT, Gemini, AlphaCode, and GitHub Copilot for Programmers\u201d. In: Proceedings of the 3rd International Conference on Computing Advancements, ICCA 2024, Dhaka, Bangladesh, October 17-18, 2024. ACM, 346\u2013354, (2024). https:\/\/doi.org\/10.1145\/3723178.3723224","DOI":"10.1145\/3723178.3723224"},{"key":"953_CR187","doi-asserted-by":"crossref","unstructured":"Singh, C., et al.: \u201cAugmenting interpretable models with large language models during training\u201d. In: Nature Communications 14.1 , 7913 (2023)","DOI":"10.1038\/s41467-023-43713-1"},{"key":"953_CR188","doi-asserted-by":"publisher","unstructured":"Singh, S., Namin, AS.: \u201cA survey on chatbots and large language models: Testing and evaluation techniques\u201d. In: Nat. Lang. Process. J. 10 , 100128, (2025). https:\/\/doi.org\/10.1016\/j.nlp.2025.100128","DOI":"10.1016\/j.nlp.2025.100128"},{"key":"953_CR189","unstructured":"Song, Z., et al.: \u201cLatent Logic Tree Extraction for Event Sequence Explanation from LLMs\u201d. In: Forty-first International Conference on Machine Learning, ICML 2024, Vienna, Austria, July 21-27, 2024. OpenReview.net, (2024). https:\/\/openreview.net\/forum?id=pwfcwEqdUz"},{"key":"953_CR190","unstructured":"Sordoni, A., et al.: \u201cJoint prompt optimization of stacked llms using variational inference\u201d. In: Advances in Neural Information Processing Systems 36 (2024)"},{"key":"953_CR191","doi-asserted-by":"publisher","unstructured":"Suri, S., et al.: \u201cEmber: No-Code Context Enrichment via Similarity-Based Keyless Joins\u201d. In: Proc. VLDB Endow. 15.3 , 699\u2013712, (2021) https:\/\/doi.org\/10.14778\/3494124.3494149. http:\/\/www.vldb.org\/pvldb\/vol15\/p699-suri.pdf","DOI":"10.14778\/3494124.3494149"},{"key":"953_CR192","unstructured":"Tan, M., et al.: \u201cAre Language Models Actually Useful for Time Series Forecasting?\u201d In: Advances in Neural Information Processing Systems 38: Annual Conference on Neural Information Processing Systems 2024, NeurIPS 2024, Vancouver, BC, Canada, December 10 - 15, 2024. Ed. by Amir Globersons et al. (2024). http:\/\/papers.nips.cc\/paper\/_files\/paper\/2024\/hash\/6ed5bf446f59e2c6646d23058c86424b-Abstract-Conference.html"},{"key":"953_CR193","unstructured":"Tang, H., Key, D., Ellis, K.: \u201cWorldCoder, a Model-Based LLM Agent: Building World Models by Writing Code and Interacting with the Environment\u201d. In: Advances in Neural Information Processing Systems 38: Annual Conference on Neural Information Processing Systems 2024, NeurIPS 2024, Vancouver, BC, Canada, December 10 - 15, 2024. Ed. by Amir Globersons et al. (2024). http:\/\/papers.nips.cc\/paper\/_files\/paper\/2024\/hash\/820c61a0cd419163ccbd2c33b268816e-Abstract-Conference.html"},{"key":"953_CR194","unstructured":"Tang, H., et al.: \u201cCode Repair with LLMs gives an Exploration-Exploitation Tradeoff\u201d. In: Advances in Neural Information Processing Systems 38: Annual Conference on Neural Information Processing Systems 2024, NeurIPS 2024, Vancouver, BC, Canada, December 10 - 15, 2024. Ed. by Amir Globersons et al. 2024. http:\/\/papers.nips.cc\/paper\/_files\/paper\/2024\/hash\/d5c56ec4f69c9a473089b16000d3f8cd-Abstract-Conference.html"},{"key":"953_CR195","doi-asserted-by":"publisher","unstructured":"Thorne, J., et al.: \u201cFrom Natural Language Processing to Neural Databases\u201d. In: Proc. VLDB Endow. 14.6 , 1033\u20131039, (2021). https:\/\/doi.org\/10.14778\/3447689.3447706. http:\/\/www.vldb.org\/pvldb\/vol14\/p1033-thorne.pdf","DOI":"10.14778\/3447689.3447706"},{"key":"953_CR196","doi-asserted-by":"crossref","unstructured":"Truhn, D., Reis-Filho, J.S., Kather, J.N.: \u201cLarge language models should be used as scientific reasoning engines, not knowledge databases\u201d. In: Nature medicine 29.12 ,2983\u20132984 (2023)","DOI":"10.1038\/s41591-023-02594-z"},{"key":"953_CR197","doi-asserted-by":"crossref","unstructured":"Trummer, I.: \u201cCodexDB: Synthesizing code for query processing from natural language instructions using GPT-3 Codex\u201d. In: Proceedings of the VLDB Endowment 15.11 , 2921\u20132928 (2022)","DOI":"10.14778\/3551793.3551841"},{"key":"953_CR198","doi-asserted-by":"publisher","unstructured":"Trummer, I.: \u201cThe Case for NLP-Enhanced Database Tuning: Towards Tuning Tools that \u201dRead the Manual\u201d\u00a0\u201d. In: Proc. VLDB Endow. 14.7 , 1159\u20131165, (2021). https:\/\/doi.org\/10.14778\/3450980.3450984. http:\/\/www.vldb.org\/pvldb\/vol14\/p1159-trummer.pdf","DOI":"10.14778\/3450980.3450984"},{"key":"953_CR199","doi-asserted-by":"publisher","unstructured":"Tu, J., et al.: \u201cUnicorn: A Unified Multi-tasking Model for Supporting Matching Tasks in Data Integration\u201d. In: Proc. ACM Manag. Data 1.1 , 84:1\u201384:26. (2023) https:\/\/doi.org\/10.1145\/3588938","DOI":"10.1145\/3588938"},{"key":"953_CR200","unstructured":"Vacareanu, R., et al.: \u201cFrom Words to Numbers: Your Large Language Model Is Secretly A Capable Regressor When Given In-Context Examples\u201d. In: First Conference on Language Modeling. (2024). https:\/\/openreview.net\/forum?id=LzpaUxcNFK"},{"key":"953_CR201","unstructured":"Gils, B.V.: Data Management: a gentle introduction: Balancing theory and practice. Van Haren, (2020)"},{"key":"953_CR202","unstructured":"Vertsel, A., Rumiantsau, M.: \u201cHybrid LLM\/Rule-based Approaches to Business Insights Generation from Structured Data\u201d. In: arXiv preprint arXiv:2404.15604 (2024)"},{"key":"953_CR203","doi-asserted-by":"crossref","unstructured":"Voj\u00ed, S., Kliegr, T.: \u201cEditable machine learning models? A rule-based framework for user studies of explainability\u201d. In: Advances in Data Analysis and Classification 14.4 ,785\u2013799 (2020)","DOI":"10.1007\/s11634-020-00419-2"},{"key":"953_CR204","unstructured":"Vos, D., D\u00f6hmen, T., Schelter, S.: \u201cTowards parameter-efficient automation of data wrangling tasks with prefix-tuning\u201d. In: NeurIPS 2022 First Table Representation Workshop. (2022)"},{"key":"953_CR205","doi-asserted-by":"crossref","unstructured":"Wang, J., et al.: \u201cLarge Language Model for Geometric Algebra: A Preliminary Attempt\u201d. In: Computer Graphics International Conference. Springer. 237\u2013249 (2023)","DOI":"10.1007\/978-3-031-50078-7_19"},{"key":"953_CR206","doi-asserted-by":"publisher","unstructured":"Wang, L., et al.: \u201cA survey on large language model based autonomous agents\u201d. In: Frontiers Comput. Sci. 18.6 , 186345, (2024) https:\/\/doi.org\/10.1007\/s11704-024-40231-1","DOI":"10.1007\/s11704-024-40231-1"},{"key":"953_CR207","doi-asserted-by":"publisher","unstructured":"Wang, L., et al.: \u201cParameter-efficient fine-tuning in large language models: a survey of methodologies\u201d. In: Artif. Intell. Rev. 58.8 , 227, (2025) https:\/\/doi.org\/10.1007\/s10462-025-11236-4","DOI":"10.1007\/s10462-025-11236-4"},{"key":"953_CR208","doi-asserted-by":"crossref","unstructured":"Wang, Q., Fernandez, R.C.: \u201cSolo: Data Discovery Using Natural Language Questions Via A Self-Supervised Approach\u201d. In: Proceedings of the ACM on Management of Data 1 , 1 \u201327. (2023) https:\/\/api.semanticscholar.org\/CorpusID:255546367","DOI":"10.1145\/3626756"},{"key":"953_CR209","doi-asserted-by":"publisher","unstructured":"Wang, R., et al.: \u201cEmpowering large language models to edge intelligence: A survey of edge efficient LLMs and techniques\u201d. In: Comput. Sci. Rev. 57 ,100755. (2025) https:\/\/doi.org\/10.1016\/j.cosrev.2025.100755","DOI":"10.1016\/j.cosrev.2025.100755"},{"key":"953_CR210","doi-asserted-by":"publisher","unstructured":"Wang, R., et al.: \u201cTheoremLlama: Transforming General-Purpose LLMs into Lean4 Experts\u201d. In: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, EMNLP 2024, Miami, FL, USA, November 12-16, 2024. Ed. by Yaser Al-Onaizan, Mohit Bansal, and Yun-Nung Chen. Association for Computational Linguistics, 11953\u201311974. (2024) https:\/\/doi.org\/10.18653\/v1\/2024.emnlp-main.667","DOI":"10.18653\/v1\/2024.emnlp-main.667"},{"key":"953_CR211","doi-asserted-by":"publisher","unstructured":"Wang, S., et al.: \u201cKnowledge Editing for Large Language Models: A Survey\u201d. In: ACM Comput. Surv. 57.3 , 59:1\u201359:37. (2025) https:\/\/doi.org\/10.1145\/3698590","DOI":"10.1145\/3698590"},{"key":"953_CR212","doi-asserted-by":"publisher","unstructured":"Wang, S., et al.: \u201cKnowledge Editing for Large Language Models: A Survey\u201d. In: ACM Comput. Surv. 57.3 , 59:1\u201359:37. (2025) https:\/\/doi.org\/10.1145\/3698590","DOI":"10.1145\/3698590"},{"key":"953_CR213","unstructured":"Wang, X., et al.: \u201cExecutable Code Actions Elicit Better LLM Agents\u201d. In: Forty-first International Conference on Machine Learning, ICML 2024, Vienna, Austria, July 21-27, 2024. OpenReview.net, (2024). https:\/\/openreview.net\/forum?id=jJ9BoXAfFa"},{"key":"953_CR214","unstructured":"Wang, Z., Jiang, B., Li, S.: \u201cIn-context learning on function classes unveiled for transformers\u201d. In: Forty-first International Conference on Machine Learning. (2024)"},{"key":"953_CR215","unstructured":"Wei, Z., Chen, W.L., Meng, Y.: \u201cInstructRAG: Instructing Retrieval-Augmented Generation via Self-Synthesized Rationales\u201d. In: International Conference on Learning Representations. (2024). https:\/\/api.semanticscholar.org\/CorpusID:271909559"},{"key":"953_CR216","doi-asserted-by":"crossref","unstructured":"Weng, R., Cheng, W.S., Zhang, M.: \u201cG-tuning: Improving generalization of pre-trained language models with generative adversarial network\u201d. In: Findings of the Association for Computational Linguistics: ACL 2023. , 4747\u20134755 (2023)","DOI":"10.18653\/v1\/2023.findings-acl.291"},{"key":"953_CR217","unstructured":"Wu, D., et al.: \u201cRepoformer: Selective Retrieval for Repository-Level Code Completion\u201d. In: Forty-first International Conference on Machine Learning, ICML 2024, Vienna, Austria, July 21-27, 2024. OpenReview.net, (2024). https:\/\/openreview.net\/forum?id=moyG54Okrj"},{"key":"953_CR218","doi-asserted-by":"crossref","unstructured":"Wu, J., et al.: \u201cMultimodal large language models: A survey\u201d. In: 2023 IEEE International Conference on Big Data (BigData). IEEE. , 2247\u20132256 (2023)","DOI":"10.1109\/BigData59044.2023.10386743"},{"key":"953_CR219","unstructured":"Wu, T., et al.: \u201cContinual learning for large language models: A survey\u201d. In: arXiv preprint arXiv:2402.01364 (2024)"},{"key":"953_CR220","doi-asserted-by":"publisher","unstructured":"Xi, Z., et al.: \u201cThe rise and potential of large language model based agents: a survey\u201d. In: Sci. China Inf. Sci. 68.2 (2025). https:\/\/doi.org\/10.1007\/s11432-024-4222-0","DOI":"10.1007\/s11432-024-4222-0"},{"key":"953_CR221","unstructured":"Xia, Y., et al.: \u201cBeyond Chain-of-Thought: A Survey of Chain-of-X Paradigms for LLMs\u201d. In: Proceedings of the 31st International Conference on Computational Linguistics, COLING 2025, Abu Dhabi, UAE, January 19-24, 2025. Ed. by Owen Rambow et al. Association for Computational Linguistics, 10795\u201310809, (2025). https:\/\/aclanthology.org\/2025.coling-main.719\/"},{"key":"953_CR222","doi-asserted-by":"publisher","unstructured":"Xu, D., et al.: \u201cLarge language models for generative information extraction: a survey\u201d. In: Frontiers Comput. Sci. 18.6 , 186357, (2024). https:\/\/doi.org\/10.1007\/s11704-024-40555-y","DOI":"10.1007\/s11704-024-40555-y"},{"key":"953_CR223","doi-asserted-by":"crossref","unstructured":"Xu, M., et al.: \u201cResource-efficient algorithms and systems of foundation models: A survey\u201d. In: ACM Computing Surveys 57.5 , 1\u201339 (2025)","DOI":"10.1145\/3706418"},{"key":"953_CR224","unstructured":"Xu, R., et al.: \u201cKnowledge Conflicts for LLMs: A Survey\u201d. In: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, EMNLP 2024, Miami, FL, USA, November 12-16, 2024. Ed. by Yaser Al-Onaizan, Mohit Bansal, and Yun-Nung Chen. Association for Computational Linguistics, 8541\u20138565, (2024). https:\/\/aclanthology.org\/2024.emnlp-main.486"},{"key":"953_CR225","doi-asserted-by":"publisher","unstructured":"Yan, M., et al.: \u201cGIDCL: A Graph-Enhanced Interpretable Data Cleaning Framework with Large Language Models\u201d. In: Proc. ACM Manag. Data 2.6 , 236:1\u2013236:29. (2024) https:\/\/doi.org\/10.1145\/3698811","DOI":"10.1145\/3698811"},{"key":"953_CR226","doi-asserted-by":"publisher","unstructured":"Yang, J., et al.: \u201cHarnessing the Power of LLMs in Practice: A Survey on ChatGPT and Beyond\u201d. In: ACM Trans. Knowl. Discov. Data 18.6 , 160:1\u2013160:32. (2024) https:\/\/doi.org\/10.1145\/3649506","DOI":"10.1145\/3649506"},{"key":"953_CR227","doi-asserted-by":"crossref","unstructured":"Yang, L., et al.: \u201cGive us the facts: Enhancing large language models with knowledge graphs for fact-aware language modeling\u201d. In: IEEE Transactions on Knowledge and Data Engineering (2024)","DOI":"10.1109\/TKDE.2024.3360454"},{"key":"953_CR228","doi-asserted-by":"publisher","unstructured":"Yang, W., et al.: \u201cA comprehensive survey on integrating large language models with knowledge-based methods\u201d. In: Knowl. Based Syst. 318 ,113503, (2025). https:\/\/doi.org\/10.1016\/j.knosys.2025.113503","DOI":"10.1016\/j.knosys.2025.113503"},{"key":"953_CR229","doi-asserted-by":"crossref","unstructured":"Yang, X., et al.: \u201cDeep learning for practical image recognition: Case study on kaggle competitions\u201d. In: Proceedings of the 24th ACM SIGKDD international conference on knowledge discovery & data mining. , 923\u2013931 (2018)","DOI":"10.1145\/3219819.3219907"},{"key":"953_CR230","doi-asserted-by":"publisher","unstructured":"Yang, Y., et al.: \u201cRecent Advances of Foundation Language Models-based Continual Learning: A Survey\u201d. In: ACM Comput. Surv. 57.5 , 112:1\u2013112:38, (2025). https:\/\/doi.org\/10.1145\/3705725","DOI":"10.1145\/3705725"},{"key":"953_CR231","unstructured":"Yao, S., et al.: \u201cTree of thoughts: Deliberate problem solving with large language models\u201d. In: Advances in Neural Information Processing Systems 36 (2024)"},{"key":"953_CR232","unstructured":"Ye, X., et al.: \u201cSatlm: Satisfiability-aided language models using declarative prompting\u201d. In: Advances in Neural Information Processing Systems 36 (2024)"},{"key":"953_CR233","unstructured":"Yin, S., et al.: \u201cA survey on multimodal large language models\u201d. In: arXiv preprint arXiv:2306.13549 (2023)"},{"key":"953_CR234","unstructured":"Yoon, A.S., et al.: \u201cSemi-supervised learning with deep generative models for asset failure prediction\u201d. In: arXiv preprint arXiv:1709.00845 (2017)"},{"key":"953_CR235","unstructured":"Yuan, Y.: \u201cOn the power of foundation models\u201d. In: International Conference on Machine Learning. PMLR. , 40519\u201340530 (2023)"},{"key":"953_CR236","unstructured":"Yuan, Y.: \u201cOn the Power of Foundation Models\u201d. In: International Conference on Machine Learning, ICML 2023, 23-29 July 2023, Honolulu, Hawaii, USA. Ed. by Andreas Krause et al. Vol.\u00a0202. Proceedings of Machine Learning Research. PMLR, 40519\u201340530, (2023). https:\/\/proceedings.mlr.press\/v202\/yuan23b.html"},{"key":"953_CR237","doi-asserted-by":"publisher","unstructured":"Zeakis, A., et al.: \u201cPre-trained Embeddings for Entity Resolution: An Experimental Analysis\u201d. In: Proc. VLDB Endow. 16.9 ,2225\u20132238, (2023). https:\/\/doi.org\/10.14778\/3598581.3598594. https:\/\/www.vldb.org\/pvldb\/vol16\/p2225-skoutas.pdf","DOI":"10.14778\/3598581.3598594"},{"key":"953_CR238","doi-asserted-by":"crossref","unstructured":"Zhai, X., et al.: \u201cScaling vision transformers\u201d. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 12104\u201312113 (2022)","DOI":"10.1109\/CVPR52688.2022.01179"},{"key":"953_CR239","doi-asserted-by":"crossref","unstructured":"Zhang, B., Soh, H.: \u201cExtract, Define, Canonicalize: An LLM-based Framework for Knowledge Graph Construction\u201d. In: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, EMNLP 2024, Miami, FL, USA, November 12-16, 2024. Ed. by Yaser Al-Onaizan, Mohit Bansal, and Yun-Nung Chen. Association for Computational Linguistics, 9820\u20139836, (2024). https:\/\/aclanthology.org\/2024.emnlp-main.548","DOI":"10.18653\/v1\/2024.emnlp-main.548"},{"key":"953_CR240","doi-asserted-by":"publisher","unstructured":"Zhang, H., et al.: \u201cExperimental Analysis of Large-scale Learnable Vector Storage Compression\u201d. In: Proc. VLDB Endow. 17.4 , 808\u2013822, (2023). https:\/\/doi.org\/10.14778\/3636218.3636234. https:\/\/www.vldb.org\/pvldb\/vol17\/p808-zhang.pdf","DOI":"10.14778\/3636218.3636234"},{"key":"953_CR241","doi-asserted-by":"crossref","unstructured":"Zhang, H., et al.: \u201cJellyfish: Instruction-tuning local large language models for data preprocessing\u201d. In: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing. 8754\u20138782 (2024)","DOI":"10.18653\/v1\/2024.emnlp-main.497"},{"key":"953_CR242","doi-asserted-by":"crossref","unstructured":"Zhang, L., Chen, Z.: \u201cLarge Language Model-Based Interpretable Machine Learning Control in Building Energy Systems\u201d. In: ArXiv abs\/2402.09584 (2024). https:\/\/api.semanticscholar.org\/CorpusID:267682233","DOI":"10.1016\/j.enbuild.2024.114278"},{"key":"953_CR243","doi-asserted-by":"publisher","unstructured":"Zhang, X., Wang, D., Dou, L., Zhu, Q., Che, W.: \u201cA survey of table reasoning with large language models\u201d. In: Frontiers Comput. Sci. 19.9 (2025), p.\u00a0199348. https:\/\/doi.org\/10.1007\/s11704-024-40330-z","DOI":"10.1007\/s11704-024-40330-z"},{"key":"953_CR244","doi-asserted-by":"publisher","unstructured":"Zhang, Y., Henkel, J., Floratou, A., Cahoon, J., Deep, S., Patel, J.M.:\u201cReAcTable: Enhancing ReAct for Table Question Answering\u201d. In: Proc. VLDB Endow. 17.8 (2024), pp.\u00a01981\u20131994. https:\/\/doi.org\/10.14778\/3659437.3659452. https:\/\/www.vldb.org\/pvldb\/vol17\/p1981-zhang.pdf","DOI":"10.14778\/3659437.3659452"},{"key":"953_CR245","doi-asserted-by":"publisher","unstructured":"Zhang, Z., Groth, P., Calixto, I., Schelter, S:\u201cA Deep Dive Into Cross-Dataset Entity Matching with Large and Small Language Models\u201d. In: Proceedings 28th International Conference on Extending Database Technology, EDBT 2025, Barcelona, Spain, March 25-28, 2025. Ed. by Alkis Simitsis et al. OpenProceedings.org, 2025, pp.\u00a0922\u2013934. https:\/\/doi.org\/10.48786\/edbt.2025.75","DOI":"10.48786\/edbt.2025.75"},{"key":"953_CR246","doi-asserted-by":"publisher","unstructured":"Zhao, H., Chen, H., Yang, F., Liu, N., Deng, H., Cai, H., Du, M.:\u201cExplainability for Large Language Models: A Survey\u201d. In: ACM Trans. Intell. Syst. Technol. 15.2 (2024), 20:1\u201320:38. https:\/\/doi.org\/10.1145\/3639372","DOI":"10.1145\/3639372"},{"key":"953_CR247","unstructured":"Zheng, R., Cheng, C. A., Daum\u00e9 III, H., Huang, F., Kolobov, A.:\u201cPRISE: LLM-Style Sequence Compression for Learning Temporal Action Abstractions in Control\u201d. In: Forty-first International Conference on Machine Learning"},{"key":"953_CR248","unstructured":"Zheng, Y., Li, B., Lin, Z., Luo, Y., Zhou, X., Lin, C., Su, J.:\u201cRevolutionizing Database Q&A with Large Language Models: Comprehensive Benchmark and Evaluation\u201d. In: arXiv preprint arXiv:2409.04475 (2024)"},{"key":"953_CR249","unstructured":"Zhou, C., Li, Q., Li, C., Yu, J., Liu, Y., Wang, G., Sun, L.:\u201cA comprehensive survey on pretrained foundation models: A history from bert to chatgpt\u201d. In: arXiv preprint arXiv:2302.09419 (2023)"},{"key":"953_CR250","doi-asserted-by":"publisher","unstructured":"Zhou, J., et al.: Training and Serving System of Foundation Models: A Comprehensive Survey. IEEE Open J. Comput. Soc. 5, 107\u2013119 (2024). https:\/\/doi.org\/10.1109\/OJCS.2024.3380828","DOI":"10.1109\/OJCS.2024.3380828"},{"key":"953_CR251","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Su, Y., Sun, Y., Wang, S., Wang, T., He, R., Fang, Y.:\u201cIn-depth Analysis of Graph-based RAG in a Unified Framework\u201d. In: arXiv preprint arXiv:2503.04338 (2025)","DOI":"10.14778\/3773731.3773738"},{"key":"953_CR252","doi-asserted-by":"crossref","unstructured":"Zhu, P., Li, N., Zhao, Z.:\u201cRetrieval-augmented Query Reformulation for Heterogeneous Research Asset Retrieval in Virtual Research Environment\u201d. In: Companion Proceedings of the ACM on Web Conference 2024. , 907\u2013910 (2024)","DOI":"10.1145\/3589335.3651553"},{"key":"953_CR253","doi-asserted-by":"crossref","unstructured":"Zuo, C., Assadi, S., Deng, D.:\u201cSp-ine: Scaling up programming-by-negative-example for string filtering and transformation\u201d. In: Proceedings of the 2022 International Conference on Management of Data. , 521\u2013530 (2022)","DOI":"10.1145\/3514221.3517908"}],"container-title":["The VLDB Journal"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-025-00953-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00778-025-00953-5","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-025-00953-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,7]],"date-time":"2026-02-07T07:28:21Z","timestamp":1770449301000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00778-025-00953-5"}},"subtitle":["Pre-trained foundation model-enhanced scalability"],"short-title":[],"issued":{"date-parts":[[2026,1]]},"references-count":253,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,1]]}},"alternative-id":["953"],"URL":"https:\/\/doi.org\/10.1007\/s00778-025-00953-5","relation":{},"ISSN":["1066-8888","0949-877X"],"issn-type":[{"value":"1066-8888","type":"print"},{"value":"0949-877X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1]]},"assertion":[{"value":"30 December 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 October 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 October 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 January 2026","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"10"}}