{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T12:01:34Z","timestamp":1778328094672,"version":"3.51.4"},"reference-count":56,"publisher":"Ubiquity Press, Ltd.","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,9]]},"DOI":"10.5334\/dsj-2025-037","type":"journal-article","created":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T12:06:26Z","timestamp":1765281986000},"source":"Crossref","is-referenced-by-count":1,"title":["Benchmarking Tabular Data Synthesis: Evaluating Tools, Metrics, and Datasets on Prosumer Hardware for End-Users"],"prefix":"10.5334","volume":"24","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-8002-7134","authenticated-orcid":false,"given":"Maria Fernanda","family":"Davila Restrepo","sequence":"first","affiliation":[]},{"given":"Benjamin","family":"Wollmer","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0675-4116","authenticated-orcid":false,"given":"Fabian","family":"Panse","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3512-5789","authenticated-orcid":false,"given":"Wolfram","family":"Wingerath","sequence":"additional","affiliation":[]}],"member":"3285","reference":[{"issue":"3","key":"key20251209130817_B1","doi-asserted-by":"crossref","first-page":"305","DOI":"10.14778\/3632093.3632097","article-title":"\u2018TSGBench: Time Series Generation Benchmark\u2019","volume":"17","year":"2023","journal-title":"Proc VLDB Endow"},{"key":"key20251209130817_B2","unstructured":"Arjovsky, M., Chintala, S. and Bottou, L. (2017) \u2018Wasserstein GAN\u2019. Available at: https:\/\/arxiv.org\/abs\/1701.07875."},{"key":"key20251209130817_B3","unstructured":"Banerjee, S. (2016) \u2018Airline Dataset\u2019. Available at: https:\/\/www.kaggle.com\/datasets\/iamsouravbanerjee\/airline-dataset."},{"key":"key20251209130817_B4","year":"1996"},{"key":"key20251209130817_B5","unstructured":"BlastChar (2017) \u2018Customer Churn Dataset\u2019. Available at: https:\/\/www.kaggle.com\/datasets\/blastchar\/telco-customer-churn."},{"key":"key20251209130817_B6","first-page":"1","article-title":"\u2018Language models are realistic tabular data generators\u2019","year":"2023"},{"key":"key20251209130817_B7","unstructured":"Brandt, J. and Lanz\u00e9n, E. (n.d.) \u2018A Comparative Review of SMOTE and ADASYN in Imbalanced Data Classification\u2019, Probability Theory and Statistics. Available at: https:\/\/urn.kb.se\/resolve?urn=urn:nbn:se:uu:diva-432162."},{"key":"key20251209130817_B8","first-page":"1097","article-title":"\u2018Flexible Database Generators\u2019","year":"2005"},{"key":"key20251209130817_B9","unstructured":"Cantalupo, S. (2021) \u2018SMOGN: Synthetic Minority Oversampling TEchnique for Regression with Gaussian Noise\u2019. Available at: https:\/\/github.com\/nickkunz\/smogn, gitHub repository."},{"key":"key20251209130817_B10","article-title":"\u2018Pre-trained transformers: an empirical comparison\u2019","volume":"9","year":"2022","journal-title":"Machine Learning with Applications"},{"issue":"1","key":"key20251209130817_B11","doi-asserted-by":"crossref","first-page":"300","DOI":"10.1109\/TAI.2022.3229289","article-title":"\u2018A universal metric for robust evaluation of synthetic tabular data\u2019","volume":"5","year":"2024","journal-title":"IEEE Transactions on Artificial Intelligence"},{"key":"key20251209130817_B12","unstructured":"City of Los Angeles (2013) \u2018City Payroll Dataset\u2019. Available at: https:\/\/www.kaggle.com\/datasets\/cityofLA\/city-payroll-data."},{"issue":"4","key":"key20251209130817_B13","doi-asserted-by":"crossref","first-page":"18","DOI":"10.1145\/3712311.3712315","article-title":"\u2018Navigating tabular data synthesis research understanding user needs and tool capabilities\u2019","volume":"53","year":"2025","journal-title":"SIGMOD Record"},{"key":"key20251209130817_B14","unstructured":"Davila, R.M.F., Turaev, A. and Wingerath, W. (2025) \u2018Measuring LLM Sensitivity in Transformer-based Tabular Data Synthesis\u2019. Available at: https:\/\/arxiv.org\/abs\/2509.20768."},{"key":"key20251209130817_B15","unstructured":"Dua, D. and Graff, C. (2017) \u2018UCI machine learning repository\u2019. Available at: https:\/\/archive.ics.uci.edu\/ml."},{"key":"key20251209130817_B16","unstructured":"European Parliament (2023) \u2018Boosting data sharing in the EU: what are the benefits?\u2019. Available at: https:\/\/www.europarl.europa.eu\/news\/en\/headlines\/society\/20220331STO26411\/boosting-data-sharing-in-the-eu-what-are-the-benefits (Accessed: 2024-10-30)."},{"issue":"10","key":"key20251209130817_B17","doi-asserted-by":"crossref","first-page":"1886","DOI":"10.14778\/3467861.3467876","article-title":"\u2018Kamino: constraint-aware differentially private data synthesis\u2019","volume":"14","year":"2021","journal-title":"Proceedings of the VLDB Endowment (PVLDB)"},{"key":"key20251209130817_B18","article-title":"\u2018Generation and evaluation of synthetic patient data\u2019","volume":"20","year":"2020","journal-title":"BMC Medical Research Methodology"},{"key":"key20251209130817_B19","first-page":"243","article-title":"\u2018Quickly generating billion-record synthetic databases\u2019","year":"1994"},{"key":"key20251209130817_B20","unstructured":"harlfoxem, K.C. (2016) \u2018House sales in King County dataset\u2019. Available at: https:\/\/www.kaggle.com\/datasets\/harlfoxem\/housesalesprediction."},{"key":"key20251209130817_B21","doi-asserted-by":"crossref","first-page":"28","DOI":"10.1016\/j.neucom.2022.04.053","article-title":"\u2018Synthetic data generation for tabular health records: A systematic review\u2019","volume":"493","year":"2022","journal-title":"Neurocomputing"},{"key":"key20251209130817_B22","article-title":"\u2018Heart Disease\u2019, UCI machine learning repository","year":"1989"},{"key":"key20251209130817_B23","article-title":"\u2018Wilt\u2019, UCI machine learning repository","year":"2013"},{"key":"key20251209130817_B24","article-title":"\u2018Diabetes\u2019, UCI machine learning repository"},{"key":"key20251209130817_B25","first-page":"1","article-title":"\u2018STaSy: Score-based Tabular data Synthesis\u2019","year":"2023"},{"key":"key20251209130817_B26","first-page":"17564","article-title":"\u2018TabDDPM: modelling tabular data with diffusion models\u2019","year":"2023"},{"key":"key20251209130817_B27","unstructured":"Kumar, H. (2020) \u2018Medical insurance price prediction dataset\u2019. Available at: https:\/\/www.kaggle.com\/datasets\/harishkumardatalab\/medical-insurance-price-prediction\/data."},{"key":"key20251209130817_B28","first-page":"1","article-title":"\u2018Imbalanced-learn: A Python toolbox to tackle the curse of imbalanced datasets in machine learning\u2019","year":"2017"},{"key":"key20251209130817_B29","first-page":"1","article-title":"\u2018GOGGLE: Generative modelling for tabular data by learning relational structure\u2019","year":"2023"},{"key":"key20251209130817_B30","unstructured":"LLC, G. (2010) \u2018Kaggle: data science platform and datasets\u2019. Available at: https:\/\/www.kaggle.com."},{"key":"key20251209130817_B31","unstructured":"Mescheder, L., Geiger, A. and Nowozin, S. (2018) \u2018Which training methods for GANs do actually converge?\u2019. Available at: https:\/\/arxiv.org\/abs\/1801.04406."},{"key":"key20251209130817_B32","article-title":"\u2018Abalone\u2019, UCI machine learning repository","year":"1994"},{"key":"key20251209130817_B33","doi-asserted-by":"crossref","unstructured":"Neufeld, A., Moerkotte, G. and Lockemann, P.C. (1993) \u2018Generating consistent test data for a variable set of general consistency constraints\u2019, VLDB Journal, 2(2), pp. 173\u2013213. Available at: http:\/\/www.vldb.org\/journal\/VLDBJ2\/P172.pdf.","DOI":"10.1007\/BF01232186"},{"key":"key20251209130817_B34","unstructured":"Nugent, C. (n.d.) \u2018California housing prices\u2019. Available at: https:\/\/www.kaggle.com\/datasets\/camnugent\/california-housing-prices."},{"issue":"10","key":"key20251209130817_B35","doi-asserted-by":"crossref","first-page":"1071","DOI":"10.14778\/3231751.3231757","article-title":"\u2018Data Synthesis based on generative adversarial networks\u2019","volume":"11","year":"2018","journal-title":"Proceedings of the VLDB Endowment (PVLDB)"},{"key":"key20251209130817_B36","first-page":"399","article-title":"\u2018The synthetic data vault\u2019","year":"2016"},{"key":"key20251209130817_B37","first-page":"2825","article-title":"\u2018Scikit-learn: Machine learning in Python\u2019","volume":"12","year":"2011","journal-title":"Journal of Machine Learning Research"},{"key":"key20251209130817_B38","article-title":"\u2018Synthcity: A benchmark framework for diverse use cases of tabular synthetic data\u2019","year":"2023"},{"key":"key20251209130817_B39","article-title":"\u2018Credit Approval\u2019, UCI machine learning repository","year":"1987"},{"key":"key20251209130817_B40","article-title":"\u2018MiniBooNE particle identification\u2019, UCI machine learning repository","year":"2005"},{"key":"key20251209130817_B41","article-title":"\u2018SynthRO: A dashboard-based benchmarking framework for health-related synthetic tabular data\u2019","year":"2025"},{"key":"key20251209130817_B42","article-title":"Jensen-Shannon Divergence \u2014 SciPy v1.10.1 Manual","author":"SciPy","year":"2024"},{"key":"key20251209130817_B43","article-title":"Kolmogorov-Smirnov Test \u2014 SciPy v1.10.1 Manual","author":"SciPy","year":"2024"},{"key":"key20251209130817_B44","article-title":"Kullback-Leibler Divergence \u2014 SciPy v1.10.1 Manual","author":"SciPy","year":"2024"},{"key":"key20251209130817_B45","article-title":"Wasserstein Distance \u2014 SciPy v1.10.1 Manual","author":"SciPy","year":"2024"},{"key":"key20251209130817_B46","first-page":"1","article-title":"\u2018REaLTabFormer: Generating realistic relational and tabular data using transformers\u2019","year":"2023","journal-title":"CoRR"},{"key":"key20251209130817_B47","first-page":"1","article-title":"\u2018AutoDiff: combining Auto-encoder and Diffusion model for tabular data synthesizing\u2019","year":"2023","journal-title":"CoRR"},{"key":"key20251209130817_B48","unstructured":"Torgo, L. (2014) \u2018House Dataset\u2019. Available at: https:\/\/www.openml.org\/search?type=data&sort=runs&id=574&status=active."},{"key":"key20251209130817_B49","article-title":"\u2018HIGGS\u2019, UCI machine learning repository","year":"2014"},{"key":"key20251209130817_B50","first-page":"7333","article-title":"\u2018Modeling tabular data using conditional GAN\u2019","year":"2019"},{"key":"key20251209130817_B51","article-title":"\u2018Mixed-type tabular data synthesis with score-based diffusion in latent space\u2019","year":"2024"},{"issue":"4","key":"key20251209130817_B52","first-page":"25:1","article-title":"\u2018PrivBayes: Private data release via Bayesian networks\u2019","volume":"42","year":"2017","journal-title":"ACM Transactions on Database Systems"},{"key":"key20251209130817_B53","first-page":"298","article-title":"\u2018GANBLR++: Incorporating capacity to generate numeric attributes and leveraging unrestricted Bayesian networks\u2019","year":"2022"},{"key":"key20251209130817_B55","first-page":"247","volume-title":"Advances in Knowledge Discovery and Data Mining","year":"2025"},{"key":"key20251209130817_B54","first-page":"97","article-title":"\u2018CTAB-GAN: Effective table data synthesizing\u2019","year":"2021"},{"key":"key20251209130817_B56","article-title":"\u2018CTAB-GAN+: enhancing tabular data synthesis\u2019","volume":"6","year":"2024","journal-title":"Frontiers in Big Data"}],"container-title":["Data Science Journal"],"original-title":[],"language":"en","deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T13:08:32Z","timestamp":1765285712000},"score":1,"resource":{"primary":{"URL":"https:\/\/datascience.codata.org\/articles\/10.5334\/dsj-2025-037\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":56,"alternative-id":["10.5334\/dsj-2025-037"],"URL":"https:\/\/doi.org\/10.5334\/dsj-2025-037","relation":{},"ISSN":["1683-1470"],"issn-type":[{"value":"1683-1470","type":"print"}],"subject":[],"published":{"date-parts":[[2025]]},"article-number":"37"}}