{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,27]],"date-time":"2025-09-27T13:52:19Z","timestamp":1758981139682,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":71,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,30]],"date-time":"2023-04-30T00:00:00Z","timestamp":1682812800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["R35GM134927"],"award-info":[{"award-number":["R35GM134927"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,30]]},"DOI":"10.1145\/3543507.3583297","type":"proceedings-article","created":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T23:30:51Z","timestamp":1682551851000},"page":"2110-2121","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Preserving Missing Data Distribution in Synthetic Data"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5837-5361","authenticated-orcid":false,"given":"Xinyue","family":"Wang","sequence":"first","affiliation":[{"name":"Rutgers University, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9674-7747","authenticated-orcid":false,"given":"Hafiz","family":"Asif","sequence":"additional","affiliation":[{"name":"Rutgers University, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7420-6947","authenticated-orcid":false,"given":"Jaideep","family":"Vaidya","sequence":"additional","affiliation":[{"name":"Rutgers University, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,4,30]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Health insurance portability and accountability act of","author":"Act Accountability","year":"1996","unstructured":"Accountability Act. 1996. Health insurance portability and accountability act of 1996. Public law 104 (1996), 191."},{"key":"e_1_3_2_1_2_1","volume-title":"Missing data. Bmj 334, 7590","author":"Altman G","year":"2007","unstructured":"Douglas\u00a0G Altman and J\u00a0Martin Bland. 2007. Missing data. Bmj 334, 7590 (2007), 424\u2013424."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_3_1","DOI":"10.1145\/3559613.3563202"},{"key":"e_1_3_2_1_4_1","volume-title":"Approximability of discriminators implies diversity in GANs. arXiv preprint arXiv:1806.10586","author":"Bai Yu","year":"2018","unstructured":"Yu Bai, Tengyu Ma, and Andrej Risteski. 2018. Approximability of discriminators implies diversity in GANs. arXiv preprint arXiv:1806.10586 (2018)."},{"key":"e_1_3_2_1_5_1","volume-title":"Deep neural networks and tabular data: A survey. arXiv preprint arXiv:2110.01889","author":"Borisov Vadim","year":"2021","unstructured":"Vadim Borisov, Tobias Leemann, Kathrin Se\u00dfler, Johannes Haug, Martin Pawelczyk, and Gjergji Kasneci. 2021. Deep neural networks and tabular data: A survey. arXiv preprint arXiv:2110.01889 (2021)."},{"key":"e_1_3_2_1_6_1","volume-title":"The use of the area under the ROC curve in the evaluation of machine learning algorithms. Pattern recognition 30, 7","author":"Bradley P","year":"1997","unstructured":"Andrew\u00a0P Bradley. 1997. The use of the area under the ROC curve in the evaluation of machine learning algorithms. Pattern recognition 30, 7 (1997), 1145\u20131159."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_7_1","DOI":"10.14778\/3476249.3476272"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_8_1","DOI":"10.1145\/3459992"},{"key":"e_1_3_2_1_9_1","volume-title":"SB-1121 California Consumer Privacy Act of","author":"Legislature Website California State","year":"2018","unstructured":"California State Legislature Website. 2018. SB-1121 California Consumer Privacy Act of 2018. https:\/\/leginfo.legislature.ca.gov\/faces\/billTextClient.xhtml? bill_id=201720180SB1121, Last accessed on 2022-10."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_10_1","DOI":"10.1145\/3372297.3417238"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_11_1","DOI":"10.1016\/j.trc.2019.03.003"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_12_1","DOI":"10.1080\/00031305.2015.1086685"},{"key":"e_1_3_2_1_13_1","volume-title":"VAEs in the presence of missing data. arXiv preprint arXiv:2006.05301","author":"Collier Mark","year":"2020","unstructured":"Mark Collier, Alfredo Nazabal, and Christopher\u00a0KI Williams. 2020. VAEs in the presence of missing data. arXiv preprint arXiv:2006.05301 (2020)."},{"key":"e_1_3_2_1_14_1","volume-title":"Support-vector networks. Machine learning 20, 3","author":"Cortes Corinna","year":"1995","unstructured":"Corinna Cortes and Vladimir Vapnik. 1995. Support-vector networks. Machine learning 20, 3 (1995), 273\u2013297."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_15_1","DOI":"10.3390\/s19051181"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_16_1","DOI":"10.1145\/1864708.1864770"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_17_1","DOI":"10.1109\/CVPR.2018.00367"},{"key":"e_1_3_2_1_18_1","article-title":"95\/46\/EC of the European Parliament and of the Council of 24 October 1995 on the protection of individuals with regard to the processing of personal data and on the free movement of such data","volume":"23","author":"Directive EU","year":"1995","unstructured":"EU Directive. 1995. 95\/46\/EC of the European Parliament and of the Council of 24 October 1995 on the protection of individuals with regard to the processing of personal data and on the free movement of such data. Official Journal of the EC 23, 6 (1995).","journal-title":"Official Journal of the EC"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_19_1","DOI":"10.1007\/978-3-540-79228-4_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_20_1","DOI":"10.14778\/3407790.3407802"},{"key":"e_1_3_2_1_21_1","volume-title":"Relational data synthesis using generative adversarial networks: A design space exploration. arXiv preprint arXiv:2008.12763","author":"Fan Ju","year":"2020","unstructured":"Ju Fan, Tongyu Liu, Guoliang Li, Junyou Chen, Yuwei Shen, and Xiaoyong Du. 2020. Relational data synthesis using generative adversarial networks: A design space exploration. arXiv preprint arXiv:2008.12763 (2020)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_22_1","DOI":"10.1109\/ICDM.2017.106"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_23_1","DOI":"10.1016\/j.neucom.2018.09.013"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_24_1","DOI":"10.1145\/3474838"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_25_1","DOI":"10.1007\/s00521-009-0295-6"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_26_1","DOI":"10.14778\/3467861.3467876"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_27_1","DOI":"10.14778\/3467861.3467876"},{"key":"e_1_3_2_1_28_1","volume-title":"Generation and evaluation of synthetic patient data. BMC medical research methodology 20, 1","author":"Goncalves Andre","year":"2020","unstructured":"Andre Goncalves, Priyadip Ray, Braden Soper, Jennifer Stevens, Linda Coyle, and Ana\u00a0Paula Sales. 2020. Generation and evaluation of synthetic patient data. BMC medical research methodology 20, 1 (2020), 1\u201340."},{"key":"e_1_3_2_1_29_1","volume-title":"Generative adversarial nets. Advances in neural information processing systems 27","author":"Goodfellow Ian","year":"2014","unstructured":"Ian Goodfellow, Jean Pouget-Abadie, Mehdi Mirza, Bing Xu, David Warde-Farley, Sherjil Ozair, Aaron Courville, and Yoshua Bengio. 2014. Generative adversarial nets. Advances in neural information processing systems 27 (2014)."},{"key":"e_1_3_2_1_30_1","volume-title":"Logan: Membership inference attacks against generative models. arXiv preprint arXiv:1705.07663","author":"Hayes Jamie","year":"2017","unstructured":"Jamie Hayes, Luca Melis, George Danezis, and Emiliano De\u00a0Cristofaro. 2017. Logan: Membership inference attacks against generative models. arXiv preprint arXiv:1705.07663 (2017)."},{"volume-title":"Probabilistic graphical models: principles and techniques","author":"Koller Daphne","unstructured":"Daphne Koller and Nir Friedman. 2009. Probabilistic graphical models: principles and techniques. MIT press.","key":"e_1_3_2_1_31_1"},{"key":"e_1_3_2_1_32_1","volume-title":"Misgan: Learning from incomplete data with generative adversarial networks. arXiv preprint arXiv:1902.09599","author":"Cheng-Xian Li Steven","year":"2019","unstructured":"Steven Cheng-Xian Li, Bo Jiang, and Benjamin Marlin. 2019. Misgan: Learning from incomplete data with generative adversarial networks. arXiv preprint arXiv:1902.09599 (2019)."},{"key":"e_1_3_2_1_33_1","volume-title":"International Conference on Machine Learning. PMLR, 5937\u20135946","author":"Cheng-Xian Li Steven","year":"2020","unstructured":"Steven Cheng-Xian Li and Benjamin Marlin. 2020. Learning from irregularly-sampled time series: A missing data perspective. In International Conference on Machine Learning. PMLR, 5937\u20135946."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_34_1","DOI":"10.1145\/3318464.3384414"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_35_1","DOI":"10.1016\/j.jbi.2007.06.001"},{"key":"e_1_3_2_1_36_1","volume-title":"International Conference on Artificial Intelligence and Statistics. PMLR, 1522\u20131530","author":"Lin Zinan","year":"2021","unstructured":"Zinan Lin, Vyas Sekar, and Giulia Fanti. 2021. On the privacy properties of gan-generated samples. In International Conference on Artificial Intelligence and Statistics. PMLR, 1522\u20131530."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_37_1","DOI":"10.1080\/01621459.1988.10478722"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_38_1","DOI":"10.1016\/j.clinthera.2019.11.003"},{"key":"e_1_3_2_1_39_1","volume-title":"Identifiable Generative Models for Missing Not at Random Data Imputation. Advances in Neural Information Processing Systems 34","author":"Ma Chao","year":"2021","unstructured":"Chao Ma and Cheng Zhang. 2021. Identifiable Generative Models for Missing Not at Random Data Imputation. Advances in Neural Information Processing Systems 34 (2021)."},{"key":"e_1_3_2_1_40_1","first-page":"2579","article-title":"Visualizing data using t-SNE","author":"van\u00a0der Maaten Laurens","year":"2008","unstructured":"Laurens van\u00a0der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research 9, Nov (2008), 2579\u20132605.","journal-title":"Journal of machine learning research 9"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_41_1","DOI":"10.1080\/01621459.1984.10478038"},{"volume-title":"Missing data: A gentle introduction","author":"McKnight E","unstructured":"Patrick\u00a0E McKnight, Katherine\u00a0M McKnight, Souraya Sidani, and Aurelio\u00a0Jose Figueredo. 2007. Missing data: A gentle introduction. Guilford Press.","key":"e_1_3_2_1_42_1"},{"key":"e_1_3_2_1_43_1","volume-title":"Missing-values imputation algorithms for microarray gene expression data. Microarray Bioinformatics","author":"Moorthy Kohbalan","year":"2019","unstructured":"Kohbalan Moorthy, Aws\u00a0Naser Jaber, Mohd\u00a0Arfian Ismail, Ferda Ernawan, Mohd\u00a0Saberi Mohamad, and Safaai Deris. 2019. Missing-values imputation algorithms for microarray gene expression data. Microarray Bioinformatics (2019), 255\u2013266."},{"key":"e_1_3_2_1_44_1","volume-title":"From missing data imputation to data generation. Journal of Computational Science","author":"Neves Diogo\u00a0Telmo","year":"2022","unstructured":"Diogo\u00a0Telmo Neves, Jo\u00e3o Alves, Marcel\u00a0Ganesh Naik, Alberto\u00a0Jos\u00e9 Proen\u00e7a, and Fabian Prasser. 2022. From missing data imputation to data generation. Journal of Computational Science (2022), 101640."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_45_1","DOI":"10.1109\/ACCESS.2019.2905015"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_46_1","DOI":"10.14778\/3231751.3231757"},{"key":"e_1_3_2_1_47_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32 (2019)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_48_1","DOI":"10.5555\/1953048.2078195"},{"unstructured":"Soorya Prakash. 2021. Brain Tumor Dataset. https:\/\/www.kaggle.com\/datasets\/sooryaprakash12\/texephyr.","key":"e_1_3_2_1_49_1"},{"volume-title":"Multiple imputation for nonresponse in surveys. Vol.\u00a081","author":"Rubin B","unstructured":"Donald\u00a0B Rubin. 2004. Multiple imputation for nonresponse in surveys. Vol.\u00a081. John Wiley & Sons.","key":"e_1_3_2_1_50_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_51_1","DOI":"10.18637\/jss.v077.i02"},{"unstructured":"Kshitij Singh. 2021. Retail Prices Of Commodities In India. https:\/\/www.kaggle.com\/datasets\/kk9969\/retail-prices-of-commodities-in-india?select=Monthly_Food_Retail_Prices.csv.","key":"e_1_3_2_1_52_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_53_1","DOI":"10.1142\/S0218488502001648"},{"unstructured":"Nicholas Tierney Di Cook Miles McBain and Colin Fay. 2021. naniar: Data Structures Summaries and Visualisations for Missing Data. https:\/\/CRAN.R-project.org\/package=naniar R package version 0.6.1.","key":"e_1_3_2_1_54_1"},{"key":"e_1_3_2_1_55_1","volume-title":"Classification and regression trees (CART) theory and applications","author":"Timofeev Roman","year":"2004","unstructured":"Roman Timofeev. 2004. Classification and regression trees (CART) theory and applications. Humboldt University, Berlin 54 (2004)."},{"key":"e_1_3_2_1_56_1","volume-title":"Generating high-fidelity synthetic patient data for assessing machine learning healthcare software. NPJ digital medicine 3, 1","author":"Tucker Allan","year":"2020","unstructured":"Allan Tucker, Zhenchen Wang, Ylenia Rotalinti, and Puja Myles. 2020. Generating high-fidelity synthetic patient data for assessing machine learning healthcare software. NPJ digital medicine 3, 1 (2020), 1\u201313."},{"key":"e_1_3_2_1_57_1","volume-title":"AMIA Annual Symposium Proceedings, Vol.\u00a02017","author":"Vaidya Jaideep","year":"2017","unstructured":"Jaideep Vaidya, Basit Shafiq, Muazzam Asani, Nabil Adam, Xiaoqian Jiang, and Lucila Ohno-Machado. 2017. A scalable privacy-preserving data generation methodology for exploratory analysis. In AMIA Annual Symposium Proceedings, Vol.\u00a02017. American Medical Informatics Association, 1695."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_58_1","DOI":"10.1145\/3448016.3457286"},{"volume-title":"Classification, clustering, and data mining applications","author":"Wagstaff Kiri","unstructured":"Kiri Wagstaff. 2004. Clustering with missing values: No imputation required. In Classification, clustering, and data mining applications. Springer, 649\u2013658.","key":"e_1_3_2_1_59_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_60_1","DOI":"10.1145\/3219819.3219869"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_61_1","DOI":"10.1145\/3366423.3380098"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_62_1","DOI":"10.1109\/CBMS.2019.00036"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_63_1","DOI":"10.1109\/TPAMI.2007.52"},{"key":"e_1_3_2_1_64_1","volume-title":"Modeling tabular data using conditional gan. Advances in Neural Information Processing Systems 32","author":"Xu Lei","year":"2019","unstructured":"Lei Xu, Maria Skoularidou, Alfredo Cuesta-Infante, and Kalyan Veeramachaneni. 2019. Modeling tabular data using conditional gan. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_2_1_65_1","volume-title":"Synthesizing tabular data using generative adversarial networks. arXiv preprint arXiv:1811.11264","author":"Xu Lei","year":"2018","unstructured":"Lei Xu and Kalyan Veeramachaneni. 2018. Synthesizing tabular data using generative adversarial networks. arXiv preprint arXiv:1811.11264 (2018)."},{"key":"e_1_3_2_1_66_1","volume-title":"ESANN 2019-European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning.","author":"Yale Andrew","year":"2019","unstructured":"Andrew Yale, Saloni Dash, Ritik Dutta, Isabelle Guyon, Adrien Pavao, and Kristin Bennett. 2019. Privacy preserving synthetic health data. In ESANN 2019-European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning."},{"doi-asserted-by":"crossref","unstructured":"Yang Yang Zhuangdi Xu and Dandan Song. 2016. Missing value imputation for microRNA expression data by using a GO-based similarity measure. In BMC bioinformatics Vol.\u00a017. BioMed Central 109\u2013116.","key":"e_1_3_2_1_67_1","DOI":"10.1186\/s12859-015-0853-0"},{"key":"e_1_3_2_1_68_1","volume-title":"International conference on machine learning. PMLR, 5689\u20135698","author":"Yoon Jinsung","year":"2018","unstructured":"Jinsung Yoon, James Jordon, and Mihaela Schaar. 2018. Gain: Missing data imputation using generative adversarial nets. In International conference on machine learning. PMLR, 5689\u20135698."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_69_1","DOI":"10.1145\/3366423.3380037"},{"key":"e_1_3_2_1_70_1","volume-title":"NIPS workshop on Adversarial Training, Vol.\u00a021","author":"Zhang Yizhe","year":"2016","unstructured":"Yizhe Zhang, Zhe Gan, and Lawrence Carin. 2016. Generating text via adversarial training. In NIPS workshop on Adversarial Training, Vol.\u00a021. academia. edu, 21\u201332."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_71_1","DOI":"10.1109\/ACCESS.2020.2980624"}],"event":{"sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"acronym":"WWW '23","name":"WWW '23: The ACM Web Conference 2023","location":"Austin TX USA"},"container-title":["Proceedings of the ACM Web Conference 2023"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583297","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3543507.3583297","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:37:22Z","timestamp":1750178242000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583297"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,30]]},"references-count":71,"alternative-id":["10.1145\/3543507.3583297","10.1145\/3543507"],"URL":"https:\/\/doi.org\/10.1145\/3543507.3583297","relation":{},"subject":[],"published":{"date-parts":[[2023,4,30]]},"assertion":[{"value":"2023-04-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}