{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T07:17:41Z","timestamp":1769757461130,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":59,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1145\/3589334.3645707","type":"proceedings-article","created":{"date-parts":[[2024,5,8]],"date-time":"2024-05-08T07:08:13Z","timestamp":1715152093000},"page":"4449-4459","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Towards Cross-Table Masked Pretraining for Web Data Mining"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-1356-8246","authenticated-orcid":false,"given":"Chao","family":"Ye","sequence":"first","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1732-9617","authenticated-orcid":false,"given":"Guoshan","family":"Lu","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8586-3048","authenticated-orcid":false,"given":"Haobo","family":"Wang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5235-1982","authenticated-orcid":false,"given":"Liyao","family":"Li","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1866-9197","authenticated-orcid":false,"given":"Sai","family":"Wu","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7483-0045","authenticated-orcid":false,"given":"Gang","family":"Chen","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3637-2936","authenticated-orcid":false,"given":"Junbo","family":"Zhao","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2024,5,13]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i8.16826"},{"key":"e_1_3_2_2_2_1","unstructured":"Vadim Borisov Kathrin Se\u00dfler Tobias Leemann Martin Pawelczyk and Gjergji Kasneci. 2023. Language Models are Realistic Tabular Data Generators. arxiv: 2210.06280 [cs.LG]"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.14778\/1453856.1453916"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1541880.1541882"},{"key":"e_1_3_2_2_5_1","volume-title":"TabCaps: A Capsule Neural Network for Tabular Data Classification with BoW Routing. In International Conference on Learning Representations.","author":"Chen Jintai","year":"2022","unstructured":"Jintai Chen, KuanLun Liao, Yanwen Fang, Danny Chen, and Jian Wu. 2022a. TabCaps: A Capsule Neural Network for Tabular Data Classification with BoW Routing. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i4.20309"},{"key":"e_1_3_2_2_7_1","volume-title":"Danny Ziyi Chen, and Jian Wu","author":"Chen Jintai","year":"2023","unstructured":"Jintai Chen, Jiahuan Yan, Danny Ziyi Chen, and Jian Wu. 2023. ExcelFormer: A Neural Network Surpassing GBDTs on Tabular Data. arXiv preprint arXiv:2301.02819 (2023)."},{"key":"e_1_3_2_2_8_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Chen Shuo","year":"2019","unstructured":"Shuo Chen, Lei Luo, Jian Yang, Chen Gong, Jun Li, and Heng Huang. 2019. Curvilinear distance metric learning. Advances in Neural Information Processing Systems , Vol. 32 (2019)."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939785"},{"key":"e_1_3_2_2_10_1","volume-title":"International conference on machine learning. PMLR, 1597--1607","author":"Chen Ting","year":"2020","unstructured":"Ting Chen, Simon Kornblith, Mohammad Norouzi, and Geoffrey Hinton. 2020. A simple framework for contrastive learning of visual representations. In International conference on machine learning. PMLR, 1597--1607."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3542700.3542709"},{"key":"e_1_3_2_2_12_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_2_13_1","volume-title":"Artificial neural networks (the multilayer perceptron)-a review of applications in the atmospheric sciences. Atmospheric environment","author":"Gardner Matt W","year":"1998","unstructured":"Matt W Gardner and SR Dorling. 1998. Artificial neural networks (the multilayer perceptron)-a review of applications in the atmospheric sciences. Atmospheric environment, Vol. 32, 14--15 (1998), 2627--2636."},{"key":"e_1_3_2_2_14_1","first-page":"18932","article-title":"Revisiting deep learning models for tabular data","volume":"34","author":"Gorishniy Yury","year":"2021","unstructured":"Yury Gorishniy, Ivan Rubachev, Valentin Khrulkov, and Artem Babenko. 2021. Revisiting deep learning models for tabular data. Advances in Neural Information Processing Systems , Vol. 34 (2021), 18932--18943.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"e_1_3_2_2_16_1","volume-title":"Thomas M\u00fcller, Francesco Piccinno, and Julian Martin Eisenschlos.","author":"Herzig Jonathan","year":"2020","unstructured":"Jonathan Herzig, Pawe\u0142 Krzysztof Nowak, Thomas M\u00fcller, Francesco Piccinno, and Julian Martin Eisenschlos. 2020. TaPas: Weakly supervised table parsing via pre-training. arXiv preprint arXiv:2004.02349 (2020)."},{"key":"e_1_3_2_2_17_1","unstructured":"Noah Hollmann Samuel M\u00fcller Katharina Eggensperger and Frank Hutter. 2023. TabPFN: A Transformer That Solves Small Tabular Classification Problems in a Second. arxiv: 2207.01848 [cs.LG]"},{"key":"e_1_3_2_2_18_1","volume-title":"Tabtransformer: Tabular data modeling using contextual embeddings. arXiv preprint arXiv:2012.06678","author":"Huang Xin","year":"2020","unstructured":"Xin Huang, Ashish Khetan, Milan Cvitkovic, and Zohar Karnin. 2020. Tabtransformer: Tabular data modeling using contextual embeddings. arXiv preprint arXiv:2012.06678 (2020)."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3588710"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330993"},{"key":"e_1_3_2_2_21_1","volume-title":"International Conference on Machine Learning. PMLR, 9916--9937","author":"Jarrett Daniel","unstructured":"Daniel Jarrett, Bogdan C Cebere, Tennison Liu, Alicia Curth, and Mihaela van der Schaar. 2022. Hyperimpute: Generalized iterative imputation with automatic model selection. In International Conference on Machine Learning. PMLR, 9916--9937."},{"key":"e_1_3_2_2_22_1","volume-title":"Lightgbm: A highly efficient gradient boosting decision tree. Advances in neural information processing systems","author":"Ke Guolin","year":"2017","unstructured":"Guolin Ke, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. Lightgbm: A highly efficient gradient boosting decision tree. Advances in neural information processing systems , Vol. 30 (2017)."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10362"},{"key":"e_1_3_2_2_24_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"crossref","unstructured":"Alexander Kirillov Eric Mintun Nikhila Ravi Hanzi Mao Chloe Rolland Laura Gustafson Tete Xiao Spencer Whitehead Alexander C Berg Wan-Yen Lo et al. 2023. Segment anything. arXiv preprint arXiv:2304.02643 (2023).","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"e_1_3_2_2_26_1","volume-title":"Machine learning for medical diagnosis: history, state of the art and perspective. Artificial Intelligence in medicine","author":"Kononenko Igor","year":"2001","unstructured":"Igor Kononenko. 2001. Machine learning for medical diagnosis: history, state of the art and perspective. Artificial Intelligence in medicine, Vol. 23, 1 (2001), 89--109."},{"key":"e_1_3_2_2_27_1","volume-title":"The Eleventh International Conference on Learning Representations.","author":"Li Liyao","year":"2023","unstructured":"Liyao Li, Haobo Wang, Liangyu Zha, Qingyi Huang, Sai Wu, Gang Chen, and Junbo Zhao. 2023. Learning a Data-Driven Policy Network for Pre-Training Automated Feature Engineering. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583382"},{"key":"e_1_3_2_2_29_1","volume-title":"PTab: Using the Pre-trained Language Model for Modeling Tabular Data. arXiv preprint arXiv:2209.08060","author":"Liu Guang","year":"2022","unstructured":"Guang Liu, Jie Yang, and Ledell Wu. 2022. PTab: Using the Pre-trained Language Model for Modeling Tabular Data. arXiv preprint arXiv:2209.08060 (2022)."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11925"},{"key":"e_1_3_2_2_31_1","volume-title":"AUC: a misleading measure of the performance of predictive distribution models. Global ecology and Biogeography","author":"Lobo Jorge M","year":"2008","unstructured":"Jorge M Lobo, Alberto Jim\u00e9nez-Valverde, and Raimundo Real. 2008. AUC: a misleading measure of the performance of predictive distribution models. Global ecology and Biogeography, Vol. 17, 2 (2008), 145--151."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583527"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330679"},{"key":"e_1_3_2_2_34_1","volume-title":"Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781","author":"Mikolov Tomas","year":"2013","unstructured":"Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean. 2013. Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)."},{"key":"e_1_3_2_2_35_1","unstructured":"OpenAI. 2022. ChatGPT. https:\/\/openai.com\/blog\/chatgpt."},{"key":"e_1_3_2_2_36_1","volume-title":"Energy-based Automated Model Evaluation. In The Twelfth International Conference on Learning Representations.","author":"Peng Ru","year":"2024","unstructured":"Ru Peng, Heming Zou, Haobo Wang, Yawen Zeng, Zenan Huang, and Junbo Zhao. 2024. Energy-based Automated Model Evaluation. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3406703"},{"key":"e_1_3_2_2_38_1","volume-title":"International Conference on Learning Representations.","author":"Shenkar Tom","year":"2021","unstructured":"Tom Shenkar and Lior Wolf. 2021. Anomaly detection for tabular data with internal contrastive learning. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_39_1","volume-title":"Saint: Improved neural networks for tabular data via row attention and contrastive pre-training. arXiv preprint arXiv:2106.01342","author":"Somepalli Gowthami","year":"2021","unstructured":"Gowthami Somepalli, Micah Goldblum, Avi Schwarzschild, C Bayan Bruss, and Tom Goldstein. 2021. Saint: Improved neural networks for tabular data via row attention and contrastive pre-training. arXiv preprint arXiv:2106.01342 (2021)."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3357925"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btr597"},{"key":"e_1_3_2_2_42_1","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar Aurelien Rodriguez Armand Joulin Edouard Grave and Guillaume Lample. 2023. LLaMA: Open and Efficient Foundation Language Models. arxiv: 2302.13971 [cs.CL]"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3511972"},{"key":"e_1_3_2_2_44_1","first-page":"18853","article-title":"Subtab: Subsetting features of tabular data for self-supervised representation learning","volume":"34","author":"Ucar Talip","year":"2021","unstructured":"Talip Ucar, Ehsan Hajiramezanali, and Lindsay Edwards. 2021. Subtab: Subsetting features of tabular data for self-supervised representation learning. Advances in Neural Information Processing Systems , Vol. 34 (2021), 18853--18865.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_45_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems , Vol. 30 (2017)."},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450090"},{"key":"e_1_3_2_2_47_1","volume-title":"International Conference on Learning Representations.","author":"Wang Haobo","year":"2021","unstructured":"Haobo Wang, Ruixuan Xiao, Yixuan Li, Lei Feng, Gang Niu, Gang Chen, and Junbo Zhao. 2021c. Pico: Contrastive label disambiguation for partial label learning. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_48_1","first-page":"1785","volume-title":"Chi","author":"Wang Ruoxi","year":"2021","unstructured":"Ruoxi Wang, Rakesh Shivanna, Derek Cheng, Sagar Jain, Dong Lin, Lichan Hong, and Ed Chi. 2021b. Dcn v2: Improved deep & cross network and practical lessons for web-scale learning to rank systems. , bibinfonumpages1785--1797 pages."},{"key":"e_1_3_2_2_49_1","volume-title":"Transtab: Learning transferable tabular transformers across tables. arXiv preprint arXiv:2205.09328","author":"Wang Zifeng","year":"2022","unstructured":"Zifeng Wang and Jimeng Sun. 2022. Transtab: Learning transferable tabular transformers across tables. arXiv preprint arXiv:2205.09328 (2022)."},{"key":"e_1_3_2_2_50_1","unstructured":"Raymond E Wright. 1995. Logistic regression. (1995)."},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00943"},{"key":"e_1_3_2_2_52_1","volume-title":"Modeling Tabular data using Conditional GAN. arxiv","author":"Xu Lei","year":"1907","unstructured":"Lei Xu, Maria Skoularidou, Alfredo Cuesta-Infante, and Kalyan Veeramachaneni. 2019. Modeling Tabular data using Conditional GAN. arxiv: 1907.00503 [cs.LG]"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i9.26272"},{"key":"e_1_3_2_2_54_1","volume-title":"Making Pre-trained Language Models Great on Tabular Prediction. In International Conference on Learning Representations.","author":"Yan Jiahuan","year":"2024","unstructured":"Jiahuan Yan, Hongxia Xu, Yiheng Zhu, Danny Chen, Jimeng Sun, Jian Wu, and Jintai Chen. 2024. Making Pre-trained Language Models Great on Tabular Prediction. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_55_1","volume-title":"Wen tau Yih, and Sebastian Riedel","author":"Yin Pengcheng","year":"2020","unstructured":"Pengcheng Yin, Graham Neubig, Wen tau Yih, and Sebastian Riedel. 2020. TaBERT: Pretraining for Joint Understanding of Textual and Tabular Data. arxiv: 2005.08314 [cs.CL]"},{"key":"e_1_3_2_2_56_1","first-page":"11033","article-title":"Vime: Extending the success of self-and semi-supervised learning to tabular domain","volume":"33","author":"Yoon Jinsung","year":"2020","unstructured":"Jinsung Yoon, Yao Zhang, James Jordon, and Mihaela van der Schaar. 2020. Vime: Extending the success of self-and semi-supervised learning to tabular domain. Advances in Neural Information Processing Systems , Vol. 33 (2020), 11033--11043.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"crossref","unstructured":"Tianping Zhang Shaowen Wang Shuicheng Yan Jian Li and Qian Liu. 2023. Generative Table Pre-training Empowers Models for Tabular Prediction. arxiv: 2305.09696 [cs.LG]","DOI":"10.18653\/v1\/2023.emnlp-main.917"},{"key":"e_1_3_2_2_58_1","volume-title":"Disentangled Causal Embedding With Contrastive Learning For Recommender System. arXiv preprint arXiv:2302.03248","author":"Zhao Weiqi","year":"2023","unstructured":"Weiqi Zhao, Dian Tang, Xin Chen, Dawei Lv, Daoli Ou, Biao Li, Peng Jiang, and Kun Gai. 2023. Disentangled Causal Embedding With Contrastive Learning For Recommender System. arXiv preprint arXiv:2302.03248 (2023)."},{"key":"e_1_3_2_2_59_1","unstructured":"Bingzhao Zhu Xingjian Shi Nick Erickson Mu Li George Karypis and Mahsa Shoaran. 2023. XTab: Cross-table Pretraining for Tabular Transformers. arxiv: 2305.06090 [cs.LG]io"}],"event":{"name":"WWW '24: The ACM Web Conference 2024","location":"Singapore Singapore","acronym":"WWW '24","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Web Conference 2024"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589334.3645707","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3589334.3645707","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:26:24Z","timestamp":1755822384000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589334.3645707"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":59,"alternative-id":["10.1145\/3589334.3645707","10.1145\/3589334"],"URL":"https:\/\/doi.org\/10.1145\/3589334.3645707","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]},"assertion":[{"value":"2024-05-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}