{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T15:23:32Z","timestamp":1767626612924,"version":"3.41.0"},"publisher-location":"Singapore","reference-count":54,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819681822","type":"print"},{"value":"9789819681839","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-8183-9_28","type":"book-chapter","created":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T17:38:35Z","timestamp":1750354715000},"page":"381-398","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Generative AI for\u00a0Tabular Data Synthesis"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3691-8652","authenticated-orcid":false,"given":"Alex X.","family":"Wang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6203-6664","authenticated-orcid":false,"given":"Binh P.","family":"Nguyen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5194-8083","authenticated-orcid":false,"given":"Colin R.","family":"Simpson","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,20]]},"reference":[{"key":"28_CR1","unstructured":"Alaa, A., Van\u00a0Breugel, B., Saveliev, E.S., van\u00a0der Schaar, M.: How faithful is your synthetic data? sample-level metrics for evaluating and auditing generative models. In: International Conference on Machine Learning, pp. 290\u2013306. PMLR (2022)"},{"key":"28_CR2","unstructured":"Baak, M., Brugman, S., Rojas, I.F., Dalmeida, L., Urlus, R.E., Oger, J.B.: Synthsonic: fast, probabilistic modeling and synthesis of tabular data. In: International Conference on Artificial Intelligence and Statistics, pp. 4747\u20134763. PMLR (2022)"},{"issue":"1","key":"28_CR3","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1145\/1007730.1007735","volume":"6","author":"GE Batista","year":"2004","unstructured":"Batista, G.E., Prati, R.C., Monard, M.C.: A study of the behavior of several methods for balancing machine learning training data. ACM SIGKDD Explorations Newsl. 6(1), 20\u201329 (2004)","journal-title":"ACM SIGKDD Explorations Newsl."},{"key":"28_CR4","unstructured":"Borisov, V., Leemann, T., Se\u00dfler, K., Haug, J., Pawelczyk, M., Kasneci, G.: Deep neural networks and tabular data: a survey. IEEE Trans. Neural Netw. Learn. Syst. (2022)"},{"key":"28_CR5","unstructured":"Borisov, V., Sessler, K., Leemann, T., Pawelczyk, M., Kasneci, G.: Language models are realistic tabular data generators. In: The Eleventh International Conference on Learning Representations (2023)"},{"issue":"11","key":"28_CR6","doi-asserted-by":"publisher","first-page":"2190","DOI":"10.14778\/3476249.3476272","volume":"14","author":"K Cai","year":"2021","unstructured":"Cai, K., Lei, X., Wei, J., Xiao, X.: Data synthesis via differentially private markov random fields. Proc. VLDB Endowment 14(11), 2190\u20132202 (2021)","journal-title":"Proc. VLDB Endowment"},{"key":"28_CR7","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"NV Chawla","year":"2002","unstructured":"Chawla, N.V., Bowyer, K.W., Hall, L.O., Kegelmeyer, W.P.: SMOTE: synthetic minority over-sampling technique. J. Artif. Intell. Res. 16, 321\u2013357 (2002)","journal-title":"J. Artif. Intell. Res."},{"key":"28_CR8","unstructured":"Chen, Q., et al.: Differentially private data generative models. arXiv preprint arXiv:1812.02274 (2018)"},{"key":"28_CR9","unstructured":"Fang, L., Liu, A., Zhang, H., Zou, H.P., Zhang, W., Philip, S.Y.: TABGEN-RAG: iterative retrieval for tabular data generation with large language models. In: NeurIPS Table Representation Learning Workshop (2024)"},{"key":"28_CR10","doi-asserted-by":"crossref","unstructured":"Feuerriegel, S., Hartmann, J., Janiesch, C., Zschech, P.: Generative AI. Bus. Inf. Syst. Eng. 66(1), 111\u2013126 (2024)","DOI":"10.1007\/s12599-023-00834-7"},{"issue":"1","key":"28_CR11","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1186\/s40537-023-00792-7","volume":"10","author":"J Fonseca","year":"2023","unstructured":"Fonseca, J., Bacao, F.: Tabular and latent space synthetic data generation: a literature review. J. Big Data 10(1), 115 (2023)","journal-title":"J. Big Data"},{"key":"28_CR12","doi-asserted-by":"crossref","unstructured":"Galloni, A., Lend\u00e1k, I., Horv\u00e1th, T.: A novel evaluation metric for synthetic data generation. In: International Conference on Intelligent Data Engineering and Automated Learning, pp. 25\u201334. Springer (2020)","DOI":"10.1007\/978-3-030-62365-4_3"},{"key":"28_CR13","doi-asserted-by":"crossref","unstructured":"Giomi, M., Boenisch, F., Wehmeyer, C., Tasn\u00e1di, B.: A unified framework for quantifying privacy risk in synthetic data. arXiv preprint arXiv:2211.10459 (2022)","DOI":"10.56553\/popets-2023-0055"},{"key":"28_CR14","unstructured":"Goodfellow, I., et al.: Generative adversarial nets. In: Advances in Neural Information Processing Systems, pp. 2672\u20132680 (2014)"},{"key":"28_CR15","doi-asserted-by":"crossref","unstructured":"He, H., Bai, Y., Garcia, E.A., Li, S.: ADASYN: adaptive synthetic sampling approach for imbalanced learning. In: IEEE International Joint Conference on Neural Networks, pp. 1322\u20131328. IEEE (2008)","DOI":"10.1109\/IJCNN.2008.4633969"},{"key":"28_CR16","unstructured":"Hudovernik, V.: Relational data generation with graph neural networks and latent diffusion models. In: NeurIPS Table Representation Learning Workshop (2024)"},{"key":"28_CR17","doi-asserted-by":"crossref","unstructured":"Jakubik, J., V\u00f6ssing, M., K\u00fchl, N., Walk, J., Satzger, G.: Data-centric artificial intelligence. Bus. Inf. Syst. Eng. 1\u20139 (2024)","DOI":"10.1007\/s12599-024-00857-8"},{"key":"28_CR18","unstructured":"Jiang, D., Zhang, G., Karami, M., Chen, X., Shao, Y., Yu, Y.: $$DP^2$$-VAE: differentially private pre-trained variational autoencoders. arXiv preprint arXiv:2208.03409 (2022)"},{"key":"28_CR19","unstructured":"Jordon, J., Yoon, J., Van Der\u00a0Schaar, M.: PATE-GAN: generating synthetic data with differential privacy guarantees. In: ICLR (2018)"},{"key":"28_CR20","unstructured":"Kim, J., Lee, C., Park, N.: STaSy: score-based tabular data synthesis. In: International Conference on Learning Representations (2023)"},{"key":"28_CR21","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. In: International Conference on Learning Representations (2014)"},{"key":"28_CR22","unstructured":"Kotelnikov, A., Baranchuk, D., Rubachev, I., Babenko, A.: TabDDPM: modelling tabular data with diffusion models. In: International Conference on Machine Learning, pp. 17564\u201317579. PMLR (2023)"},{"key":"28_CR23","unstructured":"Lee, C., Kim, J., Park, N.: CoDi: co-evolving contrastive diffusion models for mixed-type tabular synthesis. In: International Conference on Machine Learning, pp. 18940\u201318956. PMLR (2023)"},{"key":"28_CR24","unstructured":"Liu, T., Qian, Z., Berrevoets, J., van\u00a0der Schaar, M.: GOGGLE: generative modelling for tabular data by learning relational structure. In: The Eleventh International Conference on Learning Representations (2023)"},{"key":"28_CR25","doi-asserted-by":"crossref","unstructured":"Mohammed, A., Kora, R.: A comprehensive review on ensemble deep learning: opportunities and challenges. J. King Saud Univ.-Comput. Inf. Sci. (2023)","DOI":"10.1016\/j.jksuci.2023.01.014"},{"issue":"1","key":"28_CR26","doi-asserted-by":"publisher","first-page":"18","DOI":"10.3390\/asi4010018","volume":"4","author":"M Mukherjee","year":"2021","unstructured":"Mukherjee, M., Khushi, M.: SMOTE-ENC: a novel smote-based method to generate synthetic data for nominal and continuous features. Appl. Syst. Innov. 4(1), 18 (2021)","journal-title":"Appl. Syst. Innov."},{"issue":"11","key":"28_CR27","doi-asserted-by":"publisher","first-page":"1","DOI":"10.18637\/jss.v074.i11","volume":"74","author":"B Nowok","year":"2016","unstructured":"Nowok, B., Raab, G.M., Dibben, C., et al.: synthpop: bespoke creation of synthetic data in r. J. Stat. Softw. 74(11), 1\u201326 (2016)","journal-title":"J. Stat. Softw."},{"key":"28_CR28","doi-asserted-by":"crossref","unstructured":"Patki, N., Wedge, R., Veeramachaneni, K.: The synthetic data vault. In: 2016 IEEE International Conference on Data Science and Advanced Analytics (DSAA), pp. 399\u2013410. IEEE (2016)","DOI":"10.1109\/DSAA.2016.49"},{"key":"28_CR29","doi-asserted-by":"crossref","unstructured":"Ping, H., Stoyanovich, J., Howe, B.: Datasynthesizer: Privacy-preserving synthetic datasets. In: Proceedings of the 29th International Conference on Scientific and Statistical Database Management, pp.\u00a01\u20135 (2017)","DOI":"10.1145\/3085504.3091117"},{"key":"28_CR30","unstructured":"Qian, Z., Davis, R., van\u00a0der Schaar, M.: Synthcity: a benchmark framework for diverse use cases of tabular synthetic data. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"28_CR31","doi-asserted-by":"crossref","unstructured":"Raghunathan, T.E.: Synthetic data. Ann. Rev. Stat. Appl. 8 (2021)","DOI":"10.1146\/annurev-statistics-040720-031848"},{"key":"28_CR32","unstructured":"Shi, J., Xu, M., Hua, H., Zhang, H., Ermon, S., Leskovec, J.: TabDiff: a unified diffusion model for multi-modal tabular data generation. In: NeurIPS 2024 Third Table Representation Learning Workshop (2024)"},{"key":"28_CR33","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1016\/j.inffus.2021.11.011","volume":"81","author":"R Shwartz-Ziv","year":"2022","unstructured":"Shwartz-Ziv, R., Armon, A.: Tabular data: deep learning is not all you need. Inf. Fusion 81, 84\u201390 (2022)","journal-title":"Inf. Fusion"},{"key":"28_CR34","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: International Conference on Machine Learning, pp. 2256\u20132265. PMLR (2015)"},{"key":"28_CR35","unstructured":"Solatorio, A.V., Dupriez, O.: Realtabformer: generating realistic relational and tabular data using transformers. arXiv preprint arXiv:2302.02041 (2023)"},{"key":"28_CR36","unstructured":"Suh, N., Lin, X., Hsieh, D.Y., Honarkhah, M., Cheng, G.: AutoDiff: combining auto-encoder and diffusion model for tabular data synthesizing. In: NeurIPS Workshop on Synthetic Data Generation with Generative AI (2023)"},{"key":"28_CR37","doi-asserted-by":"publisher","first-page":"485","DOI":"10.1016\/j.ins.2021.12.018","volume":"586","author":"A Torfi","year":"2022","unstructured":"Torfi, A., Fox, E.A., Reddy, C.K.: Differentially private synthetic medical data generation using convolutional GANs. Inf. Sci. 586, 485\u2013500 (2022)","journal-title":"Inf. Sci."},{"key":"28_CR38","unstructured":"Truda, G.: Generating tabular datasets under differential privacy. arXiv preprint arXiv:2308.14784 (2023)"},{"key":"28_CR39","unstructured":"Van\u00a0Breugel, B., Qian, Z., Van Der\u00a0Schaar, M.: Synthetic data, real errors: how (not) to publish and use synthetic data. In: International Conference on Machine Learning, pp. 34793\u201334808. PMLR (2023)"},{"key":"28_CR40","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"28_CR41","doi-asserted-by":"crossref","unstructured":"Wang, A.X., Chukova, S.S., Simpson, C.R., Nguyen, B.P.: Challenges and opportunities of generative models on tabular data. Appl. Soft Comput. 166 (2024)","DOI":"10.1016\/j.asoc.2024.112223"},{"issue":"1","key":"28_CR42","doi-asserted-by":"publisher","first-page":"103558","DOI":"10.1016\/j.ipm.2023.103558","volume":"61","author":"AX Wang","year":"2024","unstructured":"Wang, A.X., Chukova, S.S., Sporle, A., Milne, B.J., Simpson, C.R., Nguyen, B.P.: Enhancing public research on citizen data: an empirical investigation of data synthesis using statistics New Zealand\u2019s integrated data infrastructure. Inf. Process. Manage. 61(1), 103558 (2024)","journal-title":"Inf. Process. Manage."},{"key":"28_CR43","doi-asserted-by":"publisher","first-page":"109830","DOI":"10.1016\/j.compbiomed.2025.109830","volume":"188","author":"AX Wang","year":"2025","unstructured":"Wang, A.X., Le, V.T., Trung, H.N., Nguyen, B.P.: Addressing imbalance in health data: synthetic minority oversampling using deep learning. Comput. Biol. Med. 188, 109830 (2025)","journal-title":"Comput. Biol. Med."},{"key":"28_CR44","doi-asserted-by":"crossref","unstructured":"Wang, A.X., Nguyen, B.P.: Deterministic autoencoder using Wasserstein loss for tabular data generation. Neural Netw. 107208 (2025)","DOI":"10.1016\/j.neunet.2025.107208"},{"key":"28_CR45","doi-asserted-by":"crossref","unstructured":"Wang, A.X., Nguyen, B.P.: TTVAE: transformer-based generative modeling for tabular data generation. Artif. Intell. 104292 (2025)","DOI":"10.1016\/j.artint.2025.104292"},{"key":"28_CR46","doi-asserted-by":"publisher","first-page":"121610","DOI":"10.1016\/j.ins.2024.121610","volume":"691","author":"AX Wang","year":"2025","unstructured":"Wang, A.X., Simpson, C.R., Nguyen, B.P.: Blending is all you need: data-centric ensemble synthetic data. Inf. Sci. 691, 121610 (2025)","journal-title":"Inf. Sci."},{"key":"28_CR47","unstructured":"Wang, Y., et al.: HARMONIC: Harnessing llms for tabular data synthesis and privacy protection. In: The Conference on Neural Information Processing Systems Datasets and Benchmarks Track (2024)"},{"key":"28_CR48","unstructured":"Xu, L., Skoularidou, M., Cuesta-Infante, A., Veeramachaneni, K.: Modeling tabular data using conditional GAN. In: Advances in Neural Information Processing Systems, pp. 7335\u20137345 (2019)"},{"key":"28_CR49","doi-asserted-by":"crossref","unstructured":"Zhai, J., Zhang, S., Chen, J., He, Q.: Autoencoder and its various variants. In: 2018 IEEE International Conference on Systems, Man, and Cybernetics (SMC), pp. 415\u2013419. IEEE (2018)","DOI":"10.1109\/SMC.2018.00080"},{"key":"28_CR50","unstructured":"Zhang, H., et al.: Mixed-type tabular data synthesis with score-based diffusion in latent space. In: International Conference on Learning Representations (2024)"},{"issue":"4","key":"28_CR51","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3134428","volume":"42","author":"J Zhang","year":"2017","unstructured":"Zhang, J., Cormode, G., Procopiuc, C.M., Srivastava, D., Xiao, X.: Privbayes: private data release via bayesian networks. ACM Trans. Database Syst. (TODS) 42(4), 1\u201341 (2017)","journal-title":"ACM Trans. Database Syst. (TODS)"},{"key":"28_CR52","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Zaidi, N.A., Zhou, J., Li, G.: GANBLR: a tabular data generation model. In: IEEE International Conference on Data Mining, p.\u00a0181. IEEE (2021)","DOI":"10.1109\/ICDM51629.2021.00103"},{"key":"28_CR53","unstructured":"Zhao, Z., Birke, R., Chen, L.: Tabula: harnessing language models for tabular data synthesis. arXiv preprint arXiv:2310.12746 (2023)"},{"key":"28_CR54","doi-asserted-by":"publisher","first-page":"1296508","DOI":"10.3389\/fdata.2023.1296508","volume":"6","author":"Z Zhao","year":"2024","unstructured":"Zhao, Z., Kunar, A., Birke, R., Chen, L.Y.: CTAB-GAN+: enhancing tabular data synthesis. Front. Big Data 6, 1296508 (2024)","journal-title":"Front. Big Data"}],"container-title":["Lecture Notes in Computer Science","Advances in Knowledge Discovery and Data Mining"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-8183-9_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T17:38:54Z","timestamp":1750354734000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-8183-9_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819681822","9789819681839"],"references-count":54,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-8183-9_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"20 June 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PAKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pacific-Asia Conference on Knowledge Discovery and Data Mining","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Sydney, NSW","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 June 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 June 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pakdd2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/pakdd2025.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}