{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T03:43:42Z","timestamp":1777520622470,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":51,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1145\/3627673.3679829","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T19:34:11Z","timestamp":1729452851000},"page":"1513-1522","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":12,"title":["Self-Supervision Improves Diffusion Models for Tabular Data Imputation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4309-5076","authenticated-orcid":false,"given":"Yixin","family":"Liu","sequence":"first","affiliation":[{"name":"Monash University, Melbourne, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6431-0775","authenticated-orcid":false,"given":"Thalaiyasingam","family":"Ajanthan","sequence":"additional","affiliation":[{"name":"Amazon, Canberra, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-3736-3410","authenticated-orcid":false,"given":"Hisham","family":"Husain","sequence":"additional","affiliation":[{"name":"Amazon, Melbourne, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0294-4561","authenticated-orcid":false,"given":"Vu","family":"Nguyen","sequence":"additional","affiliation":[{"name":"Amazon, Adelaide, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBME.2017.2698602"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383455.3422554"},{"key":"e_1_3_2_1_3_1","unstructured":"Arthur Asuncion and David Newman. 2007. UCI machine learning repository."},{"key":"e_1_3_2_1_4_1","unstructured":"Jinyu Cai and Jicong Fan. 2022. Perturbation learning based anomaly detection. In Advances in Neural Information Processing Systems. 14317--14330."},{"key":"e_1_3_2_1_5_1","volume-title":"International conference on machine learning. PMLR, 1597--1607","author":"Chen Ting","year":"2020","unstructured":"Ting Chen, Simon Kornblith, Mohammad Norouzi, and Geoffrey Hinton. 2020. A simple framework for contrastive learning of visual representations. In International conference on machine learning. PMLR, 1597--1607."},{"key":"e_1_3_2_1_6_1","volume-title":"Radu Tudor Ionescu, and Mubarak Shah","author":"Croitoru Florinel-Alin","year":"2023","unstructured":"Florinel-Alin Croitoru, Vlad Hondru, Radu Tudor Ionescu, and Mubarak Shah. 2023. Diffusion models in vision: A survey. IEEE Transactions on Pattern Analysis and Machine Intelligence (2023)."},{"key":"e_1_3_2_1_7_1","volume-title":"Diffusion models beat gans on image synthesis. Advances in neural information processing systems","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat gans on image synthesis. Advances in neural information processing systems, Vol. 34 (2021), 8780--8794."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCA.2007.902631"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.04.053"},{"key":"e_1_3_2_1_10_1","volume-title":"Denoising diffusion probabilistic models. Advances in neural information processing systems","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in neural information processing systems, Vol. 33 (2020), 6840--6851."},{"key":"e_1_3_2_1_11_1","first-page":"12454","article-title":"Argmax flows and multinomial diffusion: Learning categorical distributions","volume":"34","author":"Hoogeboom Emiel","year":"2021","unstructured":"Emiel Hoogeboom, Didrik Nielsen, Priyank Jaini, Patrick Forr\u00e9, and Max Welling. 2021. Argmax flows and multinomial diffusion: Learning categorical distributions. Advances in Neural Information Processing Systems, Vol. 34 (2021), 12454--12465.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_12_1","article-title":"Estimation of non-normalized statistical models by score matching","volume":"6","author":"Hyv\u00e4rinen Aapo","year":"2005","unstructured":"Aapo Hyv\u00e4rinen and Peter Dayan. 2005. Estimation of non-normalized statistical models by score matching. Journal of Machine Learning Research, Vol. 6, 4 (2005).","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_13_1","volume-title":"International Conference on Machine Learning. 9916--9937","author":"Jarrett Daniel","unstructured":"Daniel Jarrett, Bogdan C Cebere, Tennison Liu, Alicia Curth, and Mihaela van der Schaar. 2022. Hyperimpute: Generalized iterative imputation with automatic model selection. In International Conference on Machine Learning. 9916--9937."},{"key":"e_1_3_2_1_14_1","volume-title":"Generating and Imputing Tabular Data via Diffusion and Flow-based Gradient-Boosted Trees. arXiv preprint arXiv:2309.09968","author":"Jolicoeur-Martineau Alexia","year":"2023","unstructured":"Alexia Jolicoeur-Martineau, Kilian Fatras, and Tal Kachman. 2023. Generating and Imputing Tabular Data via Diffusion and Flow-based Gradient-Boosted Trees. arXiv preprint arXiv:2309.09968 (2023)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Richard Judson Ann Richard David J Dix Keith Houck Matthew Martin Robert Kavlock Vicki Dellarco Tala Henry Todd Holderman Philip Sayre et al. 2009. The toxicity data landscape for environmental chemicals. Environmental health perspectives Vol. 117 5 (2009) 685--695.","DOI":"10.1289\/ehp.0800168"},{"key":"e_1_3_2_1_16_1","volume-title":"The Eleventh International Conference on Learning Representations.","author":"Kim Jayoung","year":"2023","unstructured":"Jayoung Kim, Chaejeong Lee, and Noseong Park. 2023. STaSy: Score-based Tabular data Synthesis. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_1_17_1","volume-title":"DiffWave: A Versatile Diffusion Model for Audio Synthesis. In International Conference on Learning Representations.","author":"Kong Zhifeng","year":"2021","unstructured":"Zhifeng Kong, Wei Ping, Jiaji Huang, Kexin Zhao, and Bryan Catanzaro. 2021. DiffWave: A Versatile Diffusion Model for Audio Synthesis. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_18_1","volume-title":"International Conference on Machine Learning. PMLR, 17564--17579","author":"Kotelnikov Akim","year":"2023","unstructured":"Akim Kotelnikov, Dmitry Baranchuk, Ivan Rubachev, and Artem Babenko. 2023. Tabddpm: Modelling tabular data with diffusion models. In International Conference on Machine Learning. PMLR, 17564--17579."},{"key":"e_1_3_2_1_19_1","volume-title":"International Conference on Machine Learning. PMLR, 17920--17930","author":"Kulikov Vladimir","year":"2023","unstructured":"Vladimir Kulikov, Shahar Yadin, Matan Kleiner, and Tomer Michaeli. 2023. Sinddm: A single image denoising diffusion model. In International Conference on Machine Learning. PMLR, 17920--17930."},{"key":"e_1_3_2_1_20_1","first-page":"23806","article-title":"Miracle: Causally-aware imputation via learning missing data mechanisms","volume":"34","author":"Kyono Trent","year":"2021","unstructured":"Trent Kyono, Yao Zhang, Alexis Bellot, and Mihaela van der Schaar. 2021. Miracle: Causally-aware imputation via learning missing data mechanisms. Advances in Neural Information Processing Systems, Vol. 34 (2021), 23806--23817.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_21_1","volume-title":"CoDi: Co-Evolving Contrastive Diffusion Models for Mixed-Type Tabular Synthesis. In International Conference on Machine Learning. PMLR.","author":"Lee Chaejeong","year":"2023","unstructured":"Chaejeong Lee, Jayoung Kim, and Noseong Park. 2023. CoDi: Co-Evolving Contrastive Diffusion Models for Mixed-Type Tabular Synthesis. In International Conference on Machine Learning. PMLR."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i9.28935"},{"key":"e_1_3_2_1_23_1","volume-title":"ARC: A Generalist Graph Anomaly Detector with In-Context Learning. arXiv preprint arXiv:2405.16771","author":"Liu Yixin","year":"2024","unstructured":"Yixin Liu, Shiyuan Li, Yu Zheng, Qingfeng Chen, Chengqi Zhang, and Shirui Pan. 2024. ARC: A Generalist Graph Anomaly Detector with In-Context Learning. arXiv preprint arXiv:2405.16771 (2024)."},{"key":"e_1_3_2_1_24_1","volume-title":"International Conference on Machine Learning. PMLR, 21794--21812","author":"Liu Yue","year":"2023","unstructured":"Yue Liu, Ke Liang, Jun Xia, Sihang Zhou, Xihong Yang, Xinwang Liu, and Stan Z Li. 2023. Dink-net: Neural clustering on large graphs. In International Conference on Machine Learning. PMLR, 21794--21812."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"e_1_3_2_1_26_1","volume-title":"International conference on machine learning. PMLR, 4413--4423","author":"Mattei Pierre-Alexandre","year":"2019","unstructured":"Pierre-Alexandre Mattei and Jes Frellsen. 2019. MIWAE: Deep generative modelling and imputation of incomplete data sets. In International conference on machine learning. PMLR, 4413--4423."},{"key":"e_1_3_2_1_27_1","volume-title":"International Conference on Machine Learning. PMLR, 7130--7140","author":"Muzellec Boris","year":"2020","unstructured":"Boris Muzellec, Julie Josse, Claire Boyer, and Marco Cuturi. 2020. Missing data imputation using optimal transport. In International Conference on Machine Learning. PMLR, 7130--7140."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2877269"},{"key":"e_1_3_2_1_29_1","volume-title":"MissDiff: Training Diffusion Models on Tabular Data with Missing Values. arXiv preprint arXiv:2307.00467","author":"Ouyang Yidong","year":"2023","unstructured":"Yidong Ouyang, Liyan Xie, Chongxuan Li, and Guang Cheng. 2023. MissDiff: Training Diffusion Models on Tabular Data with Missing Values. arXiv preprint arXiv:2307.00467 (2023)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/MIS.2023.3332242"},{"key":"e_1_3_2_1_31_1","first-page":"8026","article-title":"Pytorch: An imperative style, high-performance deep learning library","volume":"32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in Neural Information Processing Systems, Vol. 32 (2019), 8026--8037.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_32_1","volume-title":"International Conference on Machine Learning. PMLR, 8857--8868","author":"Rasul Kashif","year":"2021","unstructured":"Kashif Rasul, Calvin Seward, Ingmar Schuster, and Roland Vollgraf. 2021. Autoregressive denoising diffusion models for multivariate probabilistic time series forecasting. In International Conference on Machine Learning. PMLR, 8857--8868."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1049\/cit2.12123"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-019-0197-0"},{"key":"e_1_3_2_1_35_1","volume-title":"International conference on machine learning. PMLR, 2256--2265","author":"Sohl-Dickstein Jascha","year":"2015","unstructured":"Jascha Sohl-Dickstein, Eric Weiss, Niru Maheswaranathan, and Surya Ganguli. 2015. Deep unsupervised learning using nonequilibrium thermodynamics. In International conference on machine learning. PMLR, 2256--2265."},{"key":"e_1_3_2_1_36_1","volume-title":"Improved techniques for training score-based generative models. Advances in neural information processing systems","author":"Song Yang","year":"2020","unstructured":"Yang Song and Stefano Ermon. 2020. Improved techniques for training score-based generative models. Advances in neural information processing systems, Vol. 33 (2020), 12438--12448."},{"key":"e_1_3_2_1_37_1","volume-title":"Bioinformatics","volume":"28","author":"Stekhoven Daniel J","year":"2012","unstructured":"Daniel J Stekhoven and Peter B\u00fchlmann. 2012. MissForest-non-parametric missing value imputation for mixed-type data. Bioinformatics, Vol. 28, 1 (2012)."},{"key":"e_1_3_2_1_38_1","first-page":"24804","article-title":"Csdi: Conditional score-based diffusion models for probabilistic time series imputation","volume":"34","author":"Tashiro Yusuke","year":"2021","unstructured":"Yusuke Tashiro, Jiaming Song, Yang Song, and Stefano Ermon. 2021. Csdi: Conditional score-based diffusion models for probabilistic time series imputation. Advances in Neural Information Processing Systems, Vol. 34 (2021), 24804--24816.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_39_1","volume-title":"International Conference on Artificial Intelligence and Statistics. PMLR, 2661--2676","author":"Telyatnikov Lev","year":"2023","unstructured":"Lev Telyatnikov and Simone Scardapane. 2023. EGG-GAE: scalable graph neural networks for tabular data imputation. In International Conference on Artificial Intelligence and Statistics. PMLR, 2661--2676."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/17.6.520"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.18637\/jss.v045.i03"},{"key":"e_1_3_2_1_42_1","article-title":"Visualizing data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research, Vol. 9, 11 (2008).","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_43_1","volume-title":"Unifying Unsupervised Graph-Level Anomaly Detection and Out-of-Distribution Detection: A Benchmark. arXiv preprint arXiv:2406.15523","author":"Wang Yili","year":"2024","unstructured":"Yili Wang, Yixin Liu, Xu Shen, Chenyu Li, Kaize Ding, Rui Miao, Ying Wang, Shirui Pan, and Xin Wang. 2024. Unifying Unsupervised Graph-Level Anomaly Detection and Out-of-Distribution Detection: A Benchmark. arXiv preprint arXiv:2406.15523 (2024)."},{"key":"e_1_3_2_1_44_1","volume-title":"Geodiff: A geometric diffusion model for molecular conformation generation. arXiv preprint arXiv:2203.02923","author":"Xu Minkai","year":"2022","unstructured":"Minkai Xu, Lantao Yu, Yang Song, Chence Shi, Stefano Ermon, and Jian Tang. 2022. Geodiff: A geometric diffusion model for molecular conformation generation. arXiv preprint arXiv:2203.02923 (2022)."},{"key":"e_1_3_2_1_45_1","volume-title":"Diffusion models: A comprehensive survey of methods and applications. Comput. Surveys","author":"Yang Ling","year":"2022","unstructured":"Ling Yang, Zhilong Zhang, Yang Song, Shenda Hong, Runsheng Xu, Yue Zhao, Wentao Zhang, Bin Cui, and Ming-Hsuan Yang. 2022. Diffusion models: A comprehensive survey of methods and applications. Comput. Surveys (2022)."},{"key":"e_1_3_2_1_46_1","volume-title":"International conference on machine learning. PMLR, 5689--5698","author":"Yoon Jinsung","year":"2018","unstructured":"Jinsung Yoon, James Jordon, and Mihaela Schaar. 2018. Gain: Missing data imputation using generative adversarial nets. In International conference on machine learning. PMLR, 5689--5698."},{"key":"e_1_3_2_1_47_1","first-page":"19075","article-title":"Handling missing data with graph representation learning","volume":"33","author":"You Jiaxuan","year":"2020","unstructured":"Jiaxuan You, Xiaobai Ma, Yi Ding, Mykel J Kochenderfer, and Jure Leskovec. 2020. Handling missing data with graph representation learning. Advances in Neural Information Processing Systems, Vol. 33 (2020), 19075--19087.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_48_1","volume-title":"Diffusion models for missing value imputation in tabular data. arXiv preprint arXiv:2210.17128","author":"Zheng Shuhan","year":"2022","unstructured":"Shuhan Zheng and Nontawat Charoenphakdee. 2022. Diffusion models for missing value imputation in tabular data. arXiv preprint arXiv:2210.17128 (2022)."},{"key":"e_1_3_2_1_49_1","volume-title":"Alan Wee-Chung Liew, and Shirui Pan","author":"Zheng Xin","year":"2023","unstructured":"Xin Zheng, Yixin Liu, Zhifeng Bao, Meng Fang, Xia Hu, Alan Wee-Chung Liew, and Shirui Pan. 2023. Towards data-centric graph machine learning: Review and outlook. arXiv preprint arXiv:2309.10979 (2023)."},{"key":"e_1_3_2_1_50_1","volume-title":"International Conference on Machine Learning. PMLR, 42492--42505","author":"Zheng Yizhen","year":"2023","unstructured":"Yizhen Zheng, He Zhang, Vincent Lee, Yu Zheng, Xiao Wang, and Shirui Pan. 2023. Finding the missing-half: Graph complementary learning for homophily-prone and heterophily-prone graphs. In International Conference on Machine Learning. PMLR, 42492--42505."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i9.26348"}],"event":{"name":"CIKM '24: The 33rd ACM International Conference on Information and Knowledge Management","location":"Boise ID USA","acronym":"CIKM '24","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 33rd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679829","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627673.3679829","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:07Z","timestamp":1750294687000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679829"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":51,"alternative-id":["10.1145\/3627673.3679829","10.1145\/3627673"],"URL":"https:\/\/doi.org\/10.1145\/3627673.3679829","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]},"assertion":[{"value":"2024-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}