{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T17:51:31Z","timestamp":1775065891381,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"National academy of Engineering Grainger Foundation Frontiers of Engineering Grants"},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2421864, 2421803, 2421865"],"award-info":[{"award-number":["2421864, 2421803, 2421865"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1145\/3627673.3679102","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T19:34:21Z","timestamp":1729452861000},"page":"5522-5525","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Tabular Data-centric AI: Challenges, Techniques and Future Perspectives"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1767-8024","authenticated-orcid":false,"given":"Yanjie","family":"Fu","sequence":"first","affiliation":[{"name":"Arizona State University, Tempe, Arizona,, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3948-0059","authenticated-orcid":false,"given":"Dongjie","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Kansas, Lawrence, Kansas, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6016-6465","authenticated-orcid":false,"given":"Hui","family":"Xiong","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology (Guangzhou), Guangzhou, Guangdong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6053-5977","authenticated-orcid":false,"given":"Kunpeng","family":"Liu","sequence":"additional","affiliation":[{"name":"Portland State University, Portland, Oregon, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/s13748-015-0080-y"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3229161"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599326"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3533028.3533310"},{"key":"e_1_3_2_1_5_1","volume-title":"Feature and instance joint selection: A reinforcement learning perspective. arXiv preprint arXiv:2205.07867","author":"Fan Wei","year":"2022","unstructured":"Wei Fan, Kunpeng Liu, Hao Liu, Hengshu Zhu, Hui Xiong, and Yanjie Fu. 2022. Feature and instance joint selection: A reinforcement learning perspective. arXiv preprint arXiv:2205.07867 (2022)."},{"key":"e_1_3_2_1_6_1","first-page":"18932","article-title":"Revisiting deep learning models for tabular data","volume":"34","author":"Gorishniy Yury","year":"2021","unstructured":"Yury Gorishniy, Ivan Rubachev, Valentin Khrulkov, and Artem Babenko. 2021. Revisiting deep learning models for tabular data. Advances in Neural Information Processing Systems, Vol. 34 (2021), 18932--18943.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-12423-5_14"},{"key":"e_1_3_2_1_8_1","first-page":"33781","article-title":"Reimagining Synthetic Tabular Data Generation through Data-Centric AI: A Comprehensive Benchmark","volume":"36","author":"Hansen Lasse","year":"2023","unstructured":"Lasse Hansen, Nabeel Seedat, Mihaela van der Schaar, and Andrija Petrovic. 2023. Reimagining Synthetic Tabular Data Generation through Data-Centric AI: A Comprehensive Benchmark. Advances in Neural Information Processing Systems, Vol. 36 (2023), 33781--33823.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_9_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Hao Jie","year":"2024","unstructured":"Jie Hao, Kaiyi Ji, and Mingrui Liu. 2024. Bilevel Coreset Selection in Continual Learning: A New Formulation and Algorithm. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_10_1","volume-title":"Data-centric artificial intelligence. Business & Information Systems Engineering","author":"Jakubik Johannes","year":"2024","unstructured":"Johannes Jakubik, Michael V\u00f6ssing, Niklas K\u00fchl, Jannis Walk, and Gerhard Satzger. 2024. Data-centric artificial intelligence. Business & Information Systems Engineering (2024), 1--9."},{"key":"e_1_3_2_1_11_1","volume-title":"Retrieve: Coreset selection for efficient and robust semi-supervised learning. Advances in neural information processing systems","author":"Killamsetty Krishnateja","year":"2021","unstructured":"Krishnateja Killamsetty, Xujiang Zhao, Feng Chen, and Rishabh Iyer. 2021. Retrieve: Coreset selection for efficient and robust semi-supervised learning. Advances in neural information processing systems, Vol. 34 (2021), 14488--14501."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330868"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10115-022-01812-3"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2021.3077212"},{"key":"e_1_3_2_1_15_1","volume-title":"Sudnya Diamos, Greg Diamos, Lynn He, Alicia Parrish, Hannah Rose Kirk, et al.","author":"Mazumder Mark","year":"2024","unstructured":"Mark Mazumder, Colby Banbury, Xiaozhe Yao, Bojan Karlavs, William Gaviria Rojas, Sudnya Diamos, Greg Diamos, Lynn He, Alicia Parrish, Hannah Rose Kirk, et al. 2024. Dataperf: Benchmarks for data-centric ai development. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_16_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"McElfresh Duncan","year":"2024","unstructured":"Duncan McElfresh, Sujay Khandagale, Jonathan Valverde, Vishak Prasad C, Ganesh Ramakrishnan, Micah Goldblum, and Colin White. 2024. When do neural nets outperform boosted trees on tabular data? Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_17_1","first-page":"1","article-title":"Ethical reflections on data-centric AI: balancing benefits and risks","volume":"2","author":"Patel Kaushikkumar","year":"2024","unstructured":"Kaushikkumar Patel. 2024. Ethical reflections on data-centric AI: balancing benefits and risks. International Journal of Artificial Intelligence Research and Development, Vol. 2, 1 (2024), 1--17.","journal-title":"International Journal of Artificial Intelligence Research and Development"},{"key":"e_1_3_2_1_18_1","volume-title":"What can data-centric AI learn from data and ML engineering? arXiv preprint arXiv:2112.06439","author":"Polyzotis Neoklis","year":"2021","unstructured":"Neoklis Polyzotis and Matei Zaharia. 2021. What can data-centric AI learn from data and ML engineering? arXiv preprint arXiv:2112.06439 (2021)."},{"key":"e_1_3_2_1_19_1","volume-title":"Dissecting Sample Hardness: A Fine-Grained Analysis of Hardness Characterization Methods for Data-Centric AI. arXiv preprint arXiv:2403.04551","author":"Seedat Nabeel","year":"2024","unstructured":"Nabeel Seedat, Fergus Imrie, and Mihaela van der Schaar. 2024. Dissecting Sample Hardness: A Fine-Grained Analysis of Hardness Characterization Methods for Data-Centric AI. arXiv preprint arXiv:2403.04551 (2024)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.3390\/app14010417"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.18280\/isi.260107"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3616855.3635752"},{"key":"e_1_3_2_1_23_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Tan Haoru","year":"2024","unstructured":"Haoru Tan, Sitong Wu, Fei Du, Yukang Chen, Zhibin Wang, Fan Wang, and Xiaojuan Qi. 2024. Data pruning via moving-one-sample-out. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_24_1","volume-title":"Contributing Dimension Structure of Deep Feature for Coreset Selection. arXiv preprint arXiv:2401.16193","author":"Wan Zhijing","year":"2024","unstructured":"Zhijing Wan, Zhixiang Wang, Yuran Wang, Zheng Wang, Hongyuan Zhu, and Shin'ichi Satoh. 2024. Contributing Dimension Structure of Deep Feature for Coreset Selection. arXiv preprint arXiv:2401.16193 (2024)."},{"key":"e_1_3_2_1_25_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Wang Dongjie","year":"2024","unstructured":"Dongjie Wang, Meng Xiao, Min Wu, Yuanchun Zhou, Yanjie Fu, et al. 2024. Reinforcement-enhanced autoregressive feature transformation: Gradient-steered search in continuous space for postfix expressions. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_26_1","volume-title":"Traceable group-wise self-optimizing feature transformation learning: A dual optimization perspective. ACM Transactions on Knowledge Discovery from Data","author":"Xiao Meng","year":"2024","unstructured":"Meng Xiao, Dongjie Wang, Min Wu, Kunpeng Liu, Hui Xiong, Yuanchun Zhou, and Yanjie Fu. 2024. Traceable group-wise self-optimizing feature transformation learning: A dual optimization perspective. ACM Transactions on Knowledge Discovery from Data, Vol. 18, 4 (2024), 1--22."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM58522.2023.00078"},{"key":"e_1_3_2_1_28_1","volume-title":"Dataclue: A benchmark suite for data-centric nlp. arXiv preprint arXiv:2111.08647","author":"Xu Liang","year":"2021","unstructured":"Liang Xu, Jiacheng Liu, Xiang Pan, Xiaojing Lu, and Xiaofeng Hou. 2021. Dataclue: A benchmark suite for data-centric nlp. arXiv preprint arXiv:2111.08647 (2021)."},{"key":"e_1_3_2_1_29_1","volume-title":"Modeling tabular data using conditional gan. Advances in neural information processing systems","author":"Xu Lei","year":"2019","unstructured":"Lei Xu, Maria Skoularidou, Alfredo Cuesta-Infante, and Kalyan Veeramachaneni. 2019. Modeling tabular data using conditional gan. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_30_1","volume-title":"Kwei-Herng Lai, Fan Yang, Zhimeng Jiang, Shaochen Zhong, and Xia Hu.","author":"Zha Daochen","year":"2023","unstructured":"Daochen Zha, Zaid Pervaiz Bhat, Kwei-Herng Lai, Fan Yang, Zhimeng Jiang, Shaochen Zhong, and Xia Hu. 2023. Data-centric artificial intelligence: A survey. arXiv preprint arXiv:2303.10158 (2023)."},{"key":"e_1_3_2_1_31_1","volume-title":"Data-centric foundation models in computational healthcare: A survey. arXiv preprint arXiv:2401.02458","author":"Zhang Yunkun","year":"2024","unstructured":"Yunkun Zhang, Jin Gao, Zheling Tan, Lingfeng Zhou, Kexin Ding, Mu Zhou, Shaoting Zhang, and Dequan Wang. 2024. Data-centric foundation models in computational healthcare: A survey. arXiv preprint arXiv:2401.02458 (2024)."}],"event":{"name":"CIKM '24: The 33rd ACM International Conference on Information and Knowledge Management","location":"Boise ID USA","acronym":"CIKM '24","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 33rd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679102","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627673.3679102","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:28Z","timestamp":1750291408000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679102"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":31,"alternative-id":["10.1145\/3627673.3679102","10.1145\/3627673"],"URL":"https:\/\/doi.org\/10.1145\/3627673.3679102","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]},"assertion":[{"value":"2024-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}