{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:08:13Z","timestamp":1755907693426,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T00:00:00Z","timestamp":1731542400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,11,14]]},"DOI":"10.1145\/3677052.3698625","type":"proceedings-article","created":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T06:38:06Z","timestamp":1731566286000},"page":"547-554","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Entity-based Financial Tabular Data Synthesis with Diffusion Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-3141-5341","authenticated-orcid":false,"given":"Changshuo","family":"Liu","sequence":"first","affiliation":[{"name":"Strike Technologies LLC, US"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3291-247X","authenticated-orcid":false,"given":"Canyao","family":"Liu","sequence":"additional","affiliation":[{"name":"Yale University, US"}]}],"member":"320","published-online":{"date-parts":[[2024,11,14]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383455.3422554"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3229161"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.5555\/1747335.1747337"},{"key":"e_1_3_2_1_4_1","volume-title":"NeurIPS 2022 Workshop on Distribution Shifts: Connecting Methods and Applications.","author":"Cao Defu","year":"2022","unstructured":"Defu Cao, Yousef El-Laham, Loc Trinh, Svitlana Vyetrenko, and Yan Liu. 2022. A Synthetic Limit Order Book Dataset for Benchmarking Forecasting Algorithms under Distributional Shift. In NeurIPS 2022 Workshop on Distribution Shifts: Connecting Methods and Applications."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10446690"},{"key":"e_1_3_2_1_6_1","volume-title":"Diffusion models beat gans on image synthesis. Advances in neural information processing systems 34","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat gans on image synthesis. Advances in neural information processing systems 34 (2021), 8780\u20138794."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.3390\/math10152733"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-023-00792-7"},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of the thirteenth international conference on artificial intelligence and statistics. JMLR Workshop and Conference Proceedings, 249\u2013256","author":"Glorot Xavier","year":"2010","unstructured":"Xavier Glorot and Yoshua Bengio. 2010. Understanding the difficulty of training deep feedforward neural networks. In Proceedings of the thirteenth international conference on artificial intelligence and statistics. JMLR Workshop and Conference Proceedings, 249\u2013256."},{"key":"e_1_3_2_1_10_1","unstructured":"Manbir Gulati and Paul Roysdon. 2024. TabMT: Generating tabular data with masked transformers. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_11_1","volume-title":"Denoising diffusion probabilistic models. Advances in neural information processing systems 33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in neural information processing systems 33 (2020), 6840\u20136851."},{"key":"e_1_3_2_1_12_1","volume-title":"Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114","author":"Kingma P","year":"2013","unstructured":"Diederik\u00a0P Kingma and Max Welling. 2013. Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3614844"},{"key":"e_1_3_2_1_14_1","volume-title":"International Conference on Machine Learning. PMLR, 17564\u201317579","author":"Kotelnikov Akim","year":"2023","unstructured":"Akim Kotelnikov, Dmitry Baranchuk, Ivan Rubachev, and Artem Babenko. 2023. Tabddpm: Modelling tabular data with diffusion models. In International Conference on Machine Learning. PMLR, 17564\u201317579."},{"key":"e_1_3_2_1_15_1","volume-title":"SynthEval: A Framework for Detailed Utility and Privacy Evaluation of Tabular Synthetic Data. arXiv preprint arXiv:2404.15821","author":"Lautrup Anton\u00a0Danholt","year":"2024","unstructured":"Anton\u00a0Danholt Lautrup, Tobias Hyrup, Arthur Zimek, and Peter Schneider-Kamp. 2024. SynthEval: A Framework for Detailed Utility and Privacy Evaluation of Tabular Synthetic Data. arXiv preprint arXiv:2404.15821 (2024)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDMW51313.2020.00082"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSAIT.2024.3403811"},{"key":"e_1_3_2_1_18_1","volume-title":"Time weaver: A conditional time series generation model. arXiv preprint arXiv:2403.02682","author":"Narasimhan Sai\u00a0Shankar","year":"2024","unstructured":"Sai\u00a0Shankar Narasimhan, Shubhankar Agarwal, Oguzhan Akcin, Sujay Sanghavi, and Sandeep Chinchali. 2024. Time weaver: A conditional time series generation model. arXiv preprint arXiv:2403.02682 (2024)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jocs.2022.101640"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","unstructured":"Olist and Andr\u00e9 Sionek. 2018. Brazilian E-Commerce Public Dataset by Olist. https:\/\/doi.org\/10.34740\/KAGGLE\/DSV\/195341","DOI":"10.34740\/KAGGLE\/DSV\/195341"},{"key":"e_1_3_2_1_21_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_1_22_1","volume-title":"Synthetic data applications in finance. arXiv preprint arXiv:2401.00081","author":"Potluru K","year":"2023","unstructured":"Vamsi\u00a0K Potluru, Daniel Borrajo, Andrea Coletta, Niccol\u00f2 Dalmasso, Yousef El-Laham, Elizabeth Fons, Mohsen Ghassemi, Sriram Gopalakrishnan, Vikesh Gosai, Eleonora Krea\u010di\u0107, 2023. Synthetic data applications in finance. arXiv preprint arXiv:2401.00081 (2023)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3604237.3626876"},{"key":"e_1_3_2_1_25_1","volume-title":"BARRA\u2019s risk models. Barra Research Insights","author":"Sheikh Aamir","year":"1996","unstructured":"Aamir Sheikh. 1996. BARRA\u2019s risk models. Barra Research Insights (1996), 1\u201324."},{"key":"e_1_3_2_1_26_1","volume-title":"International conference on machine learning. PMLR, 2256\u20132265","author":"Sohl-Dickstein Jascha","year":"2015","unstructured":"Jascha Sohl-Dickstein, Eric Weiss, Niru Maheswaranathan, and Surya Ganguli. 2015. Deep unsupervised learning using nonequilibrium thermodynamics. In International conference on machine learning. PMLR, 2256\u20132265."},{"key":"e_1_3_2_1_27_1","volume-title":"Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502","author":"Song Jiaming","year":"2020","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2020. Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.3233\/IDT-210195"},{"key":"e_1_3_2_1_29_1","first-page":"24804","article-title":"Csdi: Conditional score-based diffusion models for probabilistic time series imputation","volume":"34","author":"Tashiro Yusuke","year":"2021","unstructured":"Yusuke Tashiro, Jiaming Song, Yang Song, and Stefano Ermon. 2021. Csdi: Conditional score-based diffusion models for probabilistic time series imputation. Advances in Neural Information Processing Systems 34 (2021), 24804\u201324816.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_30_1","volume-title":"Synthetic Data Outliers: Navigating Identity Disclosure. arXiv preprint arXiv:2406.02736","author":"Trindade Carolina","year":"2024","unstructured":"Carolina Trindade, Lu\u00eds Antunes, T\u00e2nia Carvalho, and Nuno Moniz. 2024. Synthetic Data Outliers: Navigating Identity Disclosure. arXiv preprint arXiv:2406.02736 (2024)."},{"key":"e_1_3_2_1_31_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1080\/14697688.2020.1730426"},{"key":"e_1_3_2_1_33_1","first-page":"954","article-title":"Why do artificially generated data help adversarial robustness","volume":"35","author":"Xing Yue","year":"2022","unstructured":"Yue Xing, Qifan Song, and Guang Cheng. 2022. Why do artificially generated data help adversarial robustness. Advances in Neural Information Processing Systems 35 (2022), 954\u2013966.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_34_1","volume-title":"Modeling tabular data using conditional gan. Advances in neural information processing systems 32","author":"Xu Lei","year":"2019","unstructured":"Lei Xu, Maria Skoularidou, Alfredo Cuesta-Infante, and Kalyan Veeramachaneni. 2019. Modeling tabular data using conditional gan. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-61146-0_26"},{"key":"e_1_3_2_1_36_1","volume-title":"A survey on diffusion models for time series and spatio-temporal data. arXiv preprint arXiv:2404.18886","author":"Yang Yiyuan","year":"2024","unstructured":"Yiyuan Yang, Ming Jin, Haomin Wen, Chaoli Zhang, Yuxuan Liang, Lintao Ma, Yi Wang, Chenghao Liu, Bin Yang, Zenglin Xu, 2024. A survey on diffusion models for time series and spatio-temporal data. arXiv preprint arXiv:2404.18886 (2024)."},{"key":"e_1_3_2_1_37_1","unstructured":"Yelp. [n. d.]. Yelp Open Dataset. https:\/\/www.yelp.com\/dataset"},{"key":"e_1_3_2_1_38_1","volume-title":"Mixed-type tabular data synthesis with score-based diffusion in latent space. arXiv preprint arXiv:2310.09656","author":"Zhang Hengrui","year":"2023","unstructured":"Hengrui Zhang, Jiani Zhang, Balasubramaniam Srinivasan, Zhengyuan Shen, Xiao Qin, Christos Faloutsos, Huzefa Rangwala, and George Karypis. 2023. Mixed-type tabular data synthesis with score-based diffusion in latent space. arXiv preprint arXiv:2310.09656 (2023)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData55660.2022.10020639"}],"event":{"name":"ICAIF '24: 5th ACM International Conference on AI in Finance","acronym":"ICAIF '24","location":"Brooklyn NY USA"},"container-title":["Proceedings of the 5th ACM International Conference on AI in Finance"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3677052.3698625","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3677052.3698625","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T17:11:05Z","timestamp":1755882665000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3677052.3698625"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,14]]},"references-count":39,"alternative-id":["10.1145\/3677052.3698625","10.1145\/3677052"],"URL":"https:\/\/doi.org\/10.1145\/3677052.3698625","relation":{},"subject":[],"published":{"date-parts":[[2024,11,14]]},"assertion":[{"value":"2024-11-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}