{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T19:46:09Z","timestamp":1778355969619,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":51,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,30]],"date-time":"2023-04-30T00:00:00Z","timestamp":1682812800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NSFC","award":["62206247"],"award-info":[{"award-number":["62206247"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,30]]},"DOI":"10.1145\/3543507.3583527","type":"proceedings-article","created":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T23:30:25Z","timestamp":1682551825000},"page":"1886-1896","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Catch: Collaborative Feature Set Search for Automated Feature Engineering"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1732-9617","authenticated-orcid":false,"given":"Guoshan","family":"Lu","sequence":"first","affiliation":[{"name":"Zhejiang University, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8586-3048","authenticated-orcid":false,"given":"Haobo","family":"Wang","sequence":"additional","affiliation":[{"name":"Zhejiang University, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4461-5990","authenticated-orcid":false,"given":"Saisai","family":"Yang","sequence":"additional","affiliation":[{"name":"Institute of Computing Innovation, Zhejiang University, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9800-6563","authenticated-orcid":false,"given":"Jing","family":"Yuan","sequence":"additional","affiliation":[{"name":"Institute of Computing Innovation, Zhejiang University, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6284-0959","authenticated-orcid":false,"given":"Guozheng","family":"Yang","sequence":"additional","affiliation":[{"name":"Zheshang Bank Co., Ltd., China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2175-4560","authenticated-orcid":false,"given":"Cheng","family":"Zang","sequence":"additional","affiliation":[{"name":"Zheshang Bank Co., Ltd., China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7483-0045","authenticated-orcid":false,"given":"Gang","family":"Chen","sequence":"additional","affiliation":[{"name":"Zhejiang University, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3637-2936","authenticated-orcid":false,"given":"Junbo","family":"Zhao","sequence":"additional","affiliation":[{"name":"Zhejiang University, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,4,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-35326-0_43"},{"key":"e_1_3_2_1_2_1","volume-title":"20th European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN). i6doc. com publ, 441\u2013446","author":"Anguita Davide","year":"2012","unstructured":"Davide Anguita, Luca Ghelardoni, Alessandro Ghio, Luca Oneto, and Sandro Ridella. 2012. The \u2018K\u2019in K-fold cross validation. In 20th European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN). i6doc. com publ, 441\u2013446."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCNI.2017.8123782"},{"key":"e_1_3_2_1_4_1","unstructured":"Joao Bastos. 2007. Credit scoring with boosted decision trees. (2007)."},{"key":"e_1_3_2_1_5_1","volume-title":"Representation learning: A review and new perspectives","author":"Bengio Yoshua","year":"2013","unstructured":"Yoshua Bengio, Aaron Courville, and Pascal Vincent. 2013. Representation learning: A review and new perspectives. IEEE transactions on pattern analysis and machine intelligence 35, 8 (2013), 1798\u20131828."},{"key":"e_1_3_2_1_6_1","unstructured":"Svitlana Bondarenko Olena Laburtseva Olena Sadchenko Vira Lebedieva Oleksandra Haidukova and Tetyana Kharchenko. 2019. Modern lead generation in internet marketing for the development of enterprise potential. (2019)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380039"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3186029"},{"key":"e_1_3_2_1_9_1","volume-title":"Xgboost: extreme gradient boosting. R package version 0.4-2 1, 4","author":"Chen Tianqi","year":"2015","unstructured":"Tianqi Chen, Tong He, Michael Benesty, Vadim Khotilovich, Yuan Tang, Hyunsu Cho, Kailong Chen, 2015. Xgboost: extreme gradient boosting. R package version 0.4-2 1, 4 (2015), 1\u20134."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2019.00017"},{"key":"e_1_3_2_1_11_1","volume-title":"The advantages of the Matthews correlation coefficient (MCC) over F1 score and accuracy in binary classification evaluation. BMC genomics 21, 1","author":"Chicco Davide","year":"2020","unstructured":"Davide Chicco and Giuseppe Jurman. 2020. The advantages of the Matthews correlation coefficient (MCC) over F1 score and accuracy in binary classification evaluation. BMC genomics 21, 1 (2020), 1\u201313."},{"key":"e_1_3_2_1_12_1","volume-title":"International Conference on Machine Learning. PMLR, 2556\u20132566","author":"Ding Hu","year":"2020","unstructured":"Hu Ding and Zixiu Wang. 2020. Layered sampling for robust optimization problems. In International Conference on Machine Learning. PMLR, 2556\u20132566."},{"key":"e_1_3_2_1_13_1","volume-title":"Feature engineering for machine learning and data analytics","author":"Dong Guozhu","unstructured":"Guozhu Dong and Huan Liu. 2018. Feature engineering for machine learning and data analytics. CRC Press."},{"key":"e_1_3_2_1_14_1","volume-title":"Autogluon-tabular: Robust and accurate automl for structured data. arXiv preprint arXiv:2003.06505","author":"Erickson Nick","year":"2020","unstructured":"Nick Erickson, Jonas Mueller, Alexander Shirkov, Hang Zhang, Pedro Larroy, Mu Li, and Alexander Smola. 2020. Autogluon-tabular: Robust and accurate automl for structured data. arXiv preprint arXiv:2003.06505 (2020)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313629"},{"key":"e_1_3_2_1_16_1","volume-title":"Efficient and robust automated machine learning. Advances in neural information processing systems 28","author":"Feurer Matthias","year":"2015","unstructured":"Matthias Feurer, Aaron Klein, Katharina Eggensperger, Jost Springenberg, Manuel Blum, and Frank Hutter. 2015. Efficient and robust automated machine learning. Advances in neural information processing systems 28 (2015)."},{"key":"e_1_3_2_1_17_1","unstructured":"Dibya Ghosh Jad Rahme Aviral Kumar Amy Zhang Ryan\u00a0P. Adams and Sergey Levine. 2021. Why Generalization in RL is Difficult: Epistemic POMDPs and Implicit Partial Observability. arxiv:2107.06277\u00a0[cs.LG]"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313414"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106622"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/34.709601"},{"key":"e_1_3_2_1_21_1","volume-title":"Long short-term memory. Neural computation 9, 8","author":"Hochreiter Sepp","year":"1997","unstructured":"Sepp Hochreiter and J\u00fcrgen Schmidhuber. 1997. Long short-term memory. Neural computation 9, 8 (1997), 1735\u20131780."},{"key":"e_1_3_2_1_22_1","volume-title":"The autofeat python library for automated feature engineering and selection. arXiv preprint arXiv:1901.07329","author":"Horn Franziska","year":"2019","unstructured":"Franziska Horn, Robert Pack, and Michael Rieger. 2019. The autofeat python library for automated feature engineering and selection. arXiv preprint arXiv:1901.07329 (2019)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1002\/0471722146"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/DSAA.2015.7344858"},{"key":"e_1_3_2_1_25_1","volume-title":"Lightgbm: A highly efficient gradient boosting decision tree. Advances in neural information processing systems 30","author":"Ke Guolin","year":"2017","unstructured":"Guolin Ke, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. Lightgbm: A highly efficient gradient boosting decision tree. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11678"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDMW.2016.0190"},{"key":"e_1_3_2_1_28_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma P","year":"2014","unstructured":"Diederik\u00a0P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE51399.2021.00305"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","unstructured":"Hoang\u00a0Thanh Lam Tran\u00a0Ngoc Minh Mathieu Sinn Beat Buesser and Martin Wistuba. 2018. Neural Feature Learning From Relational Database. https:\/\/doi.org\/10.48550\/ARXIV.1801.05372","DOI":"10.48550\/ARXIV.1801.05372"},{"key":"e_1_3_2_1_31_1","volume-title":"One button machine for automating feature engineering in relational databases. arXiv preprint arXiv:1706.00327","author":"Lam Hoang\u00a0Thanh","year":"2017","unstructured":"Hoang\u00a0Thanh Lam, Johann-Michael Thiebaut, Mathieu Sinn, Bei Chen, Tiep Mai, and Oznur Alkan. 2017. One button machine for automating feature engineering in relational databases. arXiv preprint arXiv:1706.00327 (2017)."},{"key":"e_1_3_2_1_32_1","unstructured":"Tor Lattimore Marcus Hutter and Peter Sunehag. 2013. The sample-complexity of general reinforcement learning. (2013) 28\u201336."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220023"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijresmar.2020.04.005"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Fatemeh Nargesian Horst Samulowitz Udayan Khurana Elias\u00a0B Khalil and Deepak\u00a0S Turaga. 2017. Learning Feature Engineering for Classification.. In Ijcai. 2529\u20132535.","DOI":"10.24963\/ijcai.2017\/352"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the VLDB Endowment","year":"2021","unstructured":"SalazarRicardo, NeutatzFelix, and AbedjanZiawasch. 2021. Automated feature engineering for algorithmic fairness. Proceedings of the VLDB Endowment (2021)."},{"key":"e_1_3_2_1_37_1","volume-title":"Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)."},{"key":"e_1_3_2_1_38_1","first-page":"171","article-title":"A survey of forecast error measures","volume":"24","author":"Shcherbakov Maxim\u00a0Vladimirovich","year":"2013","unstructured":"Maxim\u00a0Vladimirovich Shcherbakov, Adriaan Brebels, Nataliya\u00a0Lvovna Shcherbakova, Anton\u00a0Pavlovich Tyukov, Timur\u00a0Alexandrovich Janovsky, Valeriy\u00a0Anatol\u2019evich Kamaev, 2013. A survey of forecast error measures. World Applied Sciences Journal 24, 24 (2013), 171\u2013176.","journal-title":"World Applied Sciences Journal"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE48307.2020.00146"},{"key":"e_1_3_2_1_40_1","volume-title":"Julian Schrittwieser, Ioannis Antonoglou","author":"Silver David","year":"2016","unstructured":"David Silver, Aja Huang, Chris\u00a0J Maddison, Arthur Guez, Laurent Sifre, George Van Den\u00a0Driessche, Julian Schrittwieser, Ioannis Antonoglou, Veda Panneershelvam, Marc Lanctot, 2016. Mastering the game of Go with deep neural networks and tree search. nature 529, 7587 (2016), 484\u2013489."},{"key":"e_1_3_2_1_41_1","volume-title":"A tutorial on support vector regression. Statistics and computing 14, 3","author":"Smola J","year":"2004","unstructured":"Alex\u00a0J Smola and Bernhard Sch\u00f6lkopf. 2004. A tutorial on support vector regression. Statistics and computing 14, 3 (2004), 199\u2013222."},{"key":"e_1_3_2_1_42_1","volume-title":"https:\/\/db-engines.com\/en\/ranking","author":"Ranking Engines","year":"2020","unstructured":"solid IT\u00a0gmbh. 2020. DB-Engines Ranking. (2020). https:\/\/db-engines.com\/en\/ranking"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3357925"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.2517-6161.1996.tb02080.x"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/JAS.2016.7471613"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3124749.3124754"},{"key":"e_1_3_2_1_47_1","volume-title":"Chi","author":"Wang Ruoxi","year":"2021","unstructured":"Ruoxi Wang, Rakesh Shivanna, Derek Cheng, Sagar Jain, Dong Lin, Lichan Hong, and Ed Chi. 2021. Dcn v2: Improved deep & cross network and practical lessons for web-scale learning to rank systems., 1785\u20131797\u00a0pages."},{"key":"e_1_3_2_1_48_1","volume-title":"Mean squared error: Love it or leave it? A new look at signal fidelity measures","author":"Wang Zhou","year":"2009","unstructured":"Zhou Wang and Alan\u00a0C Bovik. 2009. Mean squared error: Love it or leave it? A new look at signal fidelity measures. IEEE signal processing magazine 26, 1 (2009), 98\u2013117."},{"key":"e_1_3_2_1_49_1","volume-title":"Machine learning 8, 3-4","author":"Watkins JCH","year":"1992","unstructured":"Christopher\u00a0JCH Watkins and Peter Dayan. 1992. Q-learning. Machine learning 8, 3-4 (1992), 279\u2013292."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1016\/0377-2217(89)90348-2"},{"key":"e_1_3_2_1_51_1","volume-title":"International Conference on Automated Machine Learning. PMLR, 17\u20131.","author":"Zhu Guanghui","year":"2022","unstructured":"Guanghui Zhu, Zhuoer Xu, Chunfeng Yuan, and Yihua Huang. 2022. DIFER: differentiable automated feature engineering. In International Conference on Automated Machine Learning. PMLR, 17\u20131."}],"event":{"name":"WWW '23: The ACM Web Conference 2023","location":"Austin TX USA","acronym":"WWW '23","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Web Conference 2023"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583527","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3543507.3583527","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:47:02Z","timestamp":1750178822000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583527"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,30]]},"references-count":51,"alternative-id":["10.1145\/3543507.3583527","10.1145\/3543507"],"URL":"https:\/\/doi.org\/10.1145\/3543507.3583527","relation":{},"subject":[],"published":{"date-parts":[[2023,4,30]]},"assertion":[{"value":"2023-04-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}