{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T01:40:53Z","timestamp":1755826853973,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":28,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,10]],"date-time":"2025-03-10T00:00:00Z","timestamp":1741564800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Ant Group","award":["Research Intern Program"],"award-info":[{"award-number":["Research Intern Program"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,10]]},"DOI":"10.1145\/3701551.3703501","type":"proceedings-article","created":{"date-parts":[[2025,2,26]],"date-time":"2025-02-26T12:33:36Z","timestamp":1740573216000},"page":"764-772","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["MedTransTab: Advancing Medical Cross-Table Tabular Data Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4381-486X","authenticated-orcid":false,"given":"Yuyan","family":"Chen","sequence":"first","affiliation":[{"name":"Shanghai Key Laboratory of Data Science, School of Computer Science, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0521-9664","authenticated-orcid":false,"given":"Qingpei","family":"Guo","sequence":"additional","affiliation":[{"name":"Ant Group, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7825-3861","authenticated-orcid":false,"given":"Shuangjie","family":"You","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2355-288X","authenticated-orcid":false,"given":"Zhixu","family":"Li","sequence":"additional","affiliation":[{"name":"Suzhou Key Laboratory of Artificial Intelligence and Social Governance Technologies, International College (Suzhou Research Institute) &amp; School of Information, Renmin University of China, Suzhou &amp; Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,3,10]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1186\/s12939-018-0748-6"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i8.16826"},{"key":"e_1_3_2_1_3_1","volume-title":"Scarf: Self-supervised contrastive learning using random feature corruption. arXiv preprint arXiv:2106.15147","author":"Bahri Dara","year":"2021","unstructured":"Dara Bahri, Heinrich Jiang, Yi Tay, and Donald Metzler. 2021. Scarf: Self-supervised contrastive learning using random feature corruption. arXiv preprint arXiv:2106.15147 (2021)."},{"key":"e_1_3_2_1_4_1","volume-title":"Language models are realistic tabular data generators. arXiv preprint arXiv:2210.06280","author":"Borisov Vadim","year":"2022","unstructured":"Vadim Borisov, Kathrin Se\u00dfler, Tobias Leemann, Martin Pawelczyk, and Gjergji Kasneci. 2022. Language models are realistic tabular data generators. arXiv preprint arXiv:2210.06280 (2022)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3614904"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3614905"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539597.3570431"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i22.34536"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.130"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3616855.3635860"},{"key":"e_1_3_2_1_11_1","volume-title":"Synthesising multi-modal minority samples for tabular data. arXiv preprint arXiv:2105.08204","author":"Darabi Sajad","year":"2021","unstructured":"Sajad Darabi and Yotam Elor. 2021. Synthesising multi-modal minority samples for tabular data. arXiv preprint arXiv:2105.08204 (2021)."},{"key":"e_1_3_2_1_12_1","first-page":"18932","article-title":"Revisiting deep learning models for tabular data","volume":"34","author":"Gorishniy Yury","year":"2021","unstructured":"Yury Gorishniy, Ivan Rubachev, Valentin Khrulkov, and Artem Babenko. 2021. Revisiting deep learning models for tabular data. Advances in Neural Information Processing Systems, Vol. 34 (2021), 18932--18943.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_13_1","volume-title":"Tabpfn: A transformer that solves small tabular classification problems in a second. arXiv preprint arXiv:2207.01848","author":"Hollmann Noah","year":"2022","unstructured":"Noah Hollmann, Samuel M\u00fcller, Katharina Eggensperger, and Frank Hutter. 2022. Tabpfn: A transformer that solves small tabular classification problems in a second. arXiv preprint arXiv:2207.01848 (2022)."},{"key":"e_1_3_2_1_14_1","volume-title":"Tabtransformer: Tabular data modeling using contextual embeddings. arXiv preprint arXiv:2012.06678","author":"Huang Xin","year":"2020","unstructured":"Xin Huang, Ashish Khetan, Milan Cvitkovic, and Zohar Karnin. 2020. Tabtransformer: Tabular data modeling using contextual embeddings. arXiv preprint arXiv:2012.06678 (2020)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1093\/jamia\/ocaa127"},{"key":"e_1_3_2_1_16_1","first-page":"28742","article-title":"Self-attention between datapoints: Going beyond individual input-output pairs in deep learning","volume":"34","author":"Kossen Jannik","year":"2021","unstructured":"Jannik Kossen, Neil Band, Clare Lyle, Aidan N Gomez, Thomas Rainforth, and Yarin Gal. 2021. Self-attention between datapoints: Going beyond individual input-output pairs in deep learning. Advances in Neural Information Processing Systems, Vol. 34 (2021), 28742--28756.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_17_1","volume-title":"Unified structure generation for universal information extraction. arXiv preprint arXiv:2203.12277","author":"Lu Yaojie","year":"2022","unstructured":"Yaojie Lu, Qing Liu, Dai Dai, Xinyan Xiao, Hongyu Lin, Xianpei Han, Le Sun, and Hua Wu. 2022. Unified structure generation for universal information extraction. arXiv preprint arXiv:2203.12277 (2022)."},{"volume-title":"Clinical trials: a practical approach","author":"Pocock Stuart J","key":"e_1_3_2_1_18_1","unstructured":"Stuart J Pocock. 2013. Clinical trials: a practical approach. John Wiley & Sons."},{"key":"e_1_3_2_1_19_1","volume-title":"Saint: Improved neural networks for tabular data via row attention and contrastive pre-training. arXiv preprint arXiv:2106.01342","author":"Somepalli Gowthami","year":"2021","unstructured":"Gowthami Somepalli, Micah Goldblum, Avi Schwarzschild, C Bayan Bruss, and Tom Goldstein. 2021. Saint: Improved neural networks for tabular data via row attention and contrastive pre-training. arXiv preprint arXiv:2106.01342 (2021)."},{"key":"e_1_3_2_1_20_1","volume-title":"Proceedings of the Workshop on Economics of Privacy and Data Labor at the 37 th International Conference on Machine Learning (ICML).","author":"Vardhan L Vivek Harsha","year":"2020","unstructured":"L Vivek Harsha Vardhan and Stanley Kok. 2020. Generating privacy-preserving synthetic tabular data using oblivious variational autoencoders. In Proceedings of the Workshop on Economics of Privacy and Data Labor at the 37 th International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_1_21_1","first-page":"2902","article-title":"Transtab: Learning transferable tabular transformers across tables","volume":"35","author":"Wang Zifeng","year":"2022","unstructured":"Zifeng Wang and Jimeng Sun. 2022. Transtab: Learning transferable tabular transformers across tables. Advances in Neural Information Processing Systems, Vol. 35 (2022), 2902--2915.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_22_1","volume-title":"PyTrial: A Comprehensive Platform for Artificial Intelligence for Drug Development. arXiv preprint arXiv:2306.04018","author":"Wang Zifeng","year":"2023","unstructured":"Zifeng Wang, Brandon Theodorou, Tianfan Fu, Cao Xiao, and Jimeng Sun. 2023. PyTrial: A Comprehensive Platform for Artificial Intelligence for Drug Development. arXiv preprint arXiv:2306.04018 (2023)."},{"key":"e_1_3_2_1_23_1","volume-title":"Fastformer: Additive attention can be all you need. arXiv preprint arXiv:2108.09084","author":"Wu Chuhan","year":"2021","unstructured":"Chuhan Wu, Fangzhao Wu, Tao Qi, Yongfeng Huang, and Xing Xie. 2021. Fastformer: Additive attention can be all you need. arXiv preprint arXiv:2108.09084 (2021)."},{"key":"e_1_3_2_1_24_1","volume-title":"CT-BERT: Learning Better Tabular Representations Through Cross-Table Pre-training. arXiv preprint arXiv:2307.04308","author":"Ye Chao","year":"2023","unstructured":"Chao Ye, Guoshan Lu, Haobo Wang, Liyao Li, Sai Wu, Gang Chen, and Junbo Zhao. 2023. CT-BERT: Learning Better Tabular Representations Through Cross-Table Pre-training. arXiv preprint arXiv:2307.04308 (2023)."},{"key":"e_1_3_2_1_25_1","first-page":"11033","article-title":"Vime: Extending the success of self-and semi-supervised learning to tabular domain","volume":"33","author":"Yoon Jinsung","year":"2020","unstructured":"Jinsung Yoon, Yao Zhang, James Jordon, and Mihaela van der Schaar. 2020. Vime: Extending the success of self-and semi-supervised learning to tabular domain. Advances in Neural Information Processing Systems, Vol. 33 (2020), 11033--11043.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_26_1","volume-title":"Chatdoctor: A medical chat model fine-tuned on llama model using medical domain knowledge. arXiv preprint arXiv:2303.14070","author":"Yunxiang Li","year":"2023","unstructured":"Li Yunxiang, Li Zihan, Zhang Kai, Dan Ruilong, and Zhang You. 2023. Chatdoctor: A medical chat model fine-tuned on llama model using medical domain knowledge. arXiv preprint arXiv:2303.14070 (2023)."},{"key":"e_1_3_2_1_27_1","volume-title":"PMC-Patients: A Large-scale Dataset of Patient Summaries and Relations for Benchmarking Retrieval-based Clinical Decision Support Systems. arXiv preprint arXiv:2202.13876","author":"Zhao Zhengyun","year":"2022","unstructured":"Zhengyun Zhao, Qiao Jin, Fangyuan Chen, Tuorui Peng, and Sheng Yu. 2022. PMC-Patients: A Large-scale Dataset of Patient Summaries and Relations for Benchmarking Retrieval-based Clinical Decision Support Systems. arXiv preprint arXiv:2202.13876 (2022)."},{"key":"e_1_3_2_1_28_1","volume-title":"XTab: Cross-table Pretraining for Tabular Transformers. arXiv preprint arXiv:2305.06090","author":"Zhu Bingzhao","year":"2023","unstructured":"Bingzhao Zhu, Xingjian Shi, Nick Erickson, Mu Li, George Karypis, and Mahsa Shoaran. 2023. XTab: Cross-table Pretraining for Tabular Transformers. arXiv preprint arXiv:2305.06090 (2023)."}],"event":{"name":"WSDM '25: The Eighteenth ACM International Conference on Web Search and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Hannover Germany","acronym":"WSDM '25"},"container-title":["Proceedings of the Eighteenth ACM International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701551.3703501","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3701551.3703501","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T09:17:31Z","timestamp":1755767851000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701551.3703501"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,10]]},"references-count":28,"alternative-id":["10.1145\/3701551.3703501","10.1145\/3701551"],"URL":"https:\/\/doi.org\/10.1145\/3701551.3703501","relation":{},"subject":[],"published":{"date-parts":[[2025,3,10]]},"assertion":[{"value":"2025-03-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}