{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T16:49:35Z","timestamp":1755794975850,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,7,20]],"date-time":"2025-07-20T00:00:00Z","timestamp":1752969600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,20]]},"DOI":"10.1145\/3690624.3709291","type":"proceedings-article","created":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T18:44:43Z","timestamp":1743792283000},"page":"1385-1396","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["GROOT: Effective Design of Biological Sequences with Limited Experimental Data"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8663-1652","authenticated-orcid":false,"given":"Thanh V. T.","family":"Tran","sequence":"first","affiliation":[{"name":"FPT Software AI Center, Hanoi, Vietnam"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-8762-9457","authenticated-orcid":false,"given":"Nhat Khang","family":"Ngo","sequence":"additional","affiliation":[{"name":"FPT Software AI Center, Ho Chi Minh, Vietnam"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4475-1993","authenticated-orcid":false,"given":"Viet Anh","family":"Nguyen","sequence":"additional","affiliation":[{"name":"FPT Software AI Center, Hanoi, Vietnam"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5092-3757","authenticated-orcid":false,"given":"Truong Son","family":"Hy","sequence":"additional","affiliation":[{"name":"University of Alabama at Birmingham, Birmingham, AL, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,7,20]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330701"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1002\/anie.201708408"},{"key":"e_1_3_2_2_3_1","volume-title":"Learning in high dimension always amounts to extrapolation. arXiv preprint arXiv:2110.09485","author":"Balestriero Randall","year":"2021","unstructured":"Randall Balestriero, Jerome Pesenti, and Yann LeCun. 2021. Learning in high dimension always amounts to extrapolation. arXiv preprint arXiv:2110.09485 (2021)."},{"key":"e_1_3_2_2_4_1","volume-title":"Proceedings of the 36th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"782","author":"Brookes David","year":"2019","unstructured":"David Brookes, Hahnbeom Park, and Jennifer Listgarten. 2019. Conditioning by adaptive sampling for robust design. In Proceedings of the 36th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 97), Kamalika Chaudhuri and Ruslan Salakhutdinov (Eds.). PMLR, 773--782. https:\/\/proceedings.mlr.press\/v97\/brookes19a.html"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2109649118"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2109649118"},{"key":"e_1_3_2_2_7_1","volume-title":"Lin (Eds.)","volume":"33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, Sandhini Agarwal, Ariel Herbert-Voss, Gretchen Krueger, Tom Henighan, Rewon Child, Aditya Ramesh, Daniel Ziegler, Jeffrey Wu, Clemens Winter, Chris Hesse, Mark Chen, Eric Sigler, Mateusz Litwin, Scott Gray, Benjamin Chess, Jack Clark, Christopher Berner, Sam McCandlish, Alec Radford, Ilya Sutskever, and Dario Amodei. 2020. Language Models are Few-Shot Learners. In Advances in Neural Information Processing Systems, H. Larochelle, M. Ranzato, R. Hadsell, M.F. Balcan, and H. Lin (Eds.), Vol. 33. Curran Associates, Inc., 1877--1901. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/ 1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41587-020-00793-4"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-022-00532--1"},{"key":"e_1_3_2_2_10_1","volume-title":"The convex geometry of linear inverse problems. Foundations of Computational mathematics 12, 6","author":"Chandrasekaran Venkat","year":"2012","unstructured":"Venkat Chandrasekaran, Benjamin Recht, Pablo A Parrilo, and Alan S Willsky. 2012. The convex geometry of linear inverse problems. Foundations of Computational mathematics 12, 6 (2012), 805--849."},{"key":"e_1_3_2_2_11_1","volume-title":"Oh (Eds.)","volume":"35","author":"Chen Can","year":"2022","unstructured":"Can Chen, Yingxueff Zhang, Jie Fu, Xue (Steve) Liu, and Mark Coates. 2022. Bidirectional Learning for Offline Infinite-width Model-based Optimization. In Advances in Neural Information Processing Systems, S. Koyejo, S. Mohamed, A. Agarwal, D. Belgrave, K. Cho, and A. Oh (Eds.), Vol. 35. Curran Associates, Inc., 29454--29467. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/ bd391cf5bdc4b63674d6da3edc1bde0d-Paper-Conference.pdf"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"crossref","unstructured":"Tianlai Chen Pranay Vure Rishab Pulugurta and Pranam Chatterjee. 2023. AMP-Diffusion: Integrating Latent Diffusion with Protein Language Models for Antimicrobial Peptide Generation. In NeurIPS 2023 Generative AI and Biology (GenBio) Workshop. https:\/\/openreview.net\/forum?id=145TM9VQhx","DOI":"10.1101\/2024.03.03.583201"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1088\/2632--2153\/accacd"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1038\/nmeth.3027"},{"key":"e_1_3_2_2_15_1","volume-title":"Protein Discovery with Discrete Walk-Jump Sampling. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id= zMPHKOmQNb","author":"Frey Nathan C.","year":"2024","unstructured":"Nathan C. Frey, Dan Berenberg, Karina Zadorozhny, Joseph Kleinhenz, Julien Lafrance-Vanasse, Isidro Hotzel, Yan Wu, Stephen Ra, Richard Bonneau, Kyunghyun Cho, Andreas Loukas, Vladimir Gligorijevic, and Saeed Saremi. 2024. Protein Discovery with Discrete Walk-Jump Sampling. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id= zMPHKOmQNb"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1021\/acscentsci.7b00572"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature19946"},{"key":"e_1_3_2_2_18_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=8E1-f3VhX1o","author":"Huang Qian","year":"2021","unstructured":"Qian Huang, Horace He, Abhay Singh, Ser-Nam Lim, and Austin Benson. 2021. Combining Label Propagation and Simple Models out-performs Graph Neural Networks. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=8E1-f3VhX1o"},{"key":"e_1_3_2_2_19_1","volume-title":"Proceedings of the 39th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"9801","author":"Jain Moksh","year":"2022","unstructured":"Moksh Jain, Emmanuel Bengio, Alex Hernandez-Garcia, Jarrid Rector-Brooks, Bonaventure F. P. Dossou, Chanakya Ajit Ekbote, Jie Fu, Tianyu Zhang, Michael Kilgour, Dinghuai Zhang, Lena Simine, Payel Das, and Yoshua Bengio. 2022. Biological Sequence Design with GFlowNets. In Proceedings of the 39th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 162), Kamalika Chaudhuri, Stefanie Jegelka, Le Song, Csaba Szepesvari, Gang Niu, and Sivan Sabato (Eds.). PMLR, 9786--9801. https:\/\/proceedings.mlr.press\/v162\/jain22a.html"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/s0022-5193(89)80019-0"},{"key":"e_1_3_2_2_21_1","volume-title":"arXiv:2304.02643","author":"Kirillov Alexander","year":"2023","unstructured":"Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alexander C. Berg, Wan-Yen Lo, Piotr Doll\u00e1r, and Ross Girshick. 2023. Segment Anything. arXiv:2304.02643 (2023)."},{"key":"e_1_3_2_2_22_1","volume-title":"The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=rxlF2Zv8x0","author":"Kirjner Andrew","year":"2024","unstructured":"Andrew Kirjner, Jason Yim, Raman Samusevich, Shahar Bracha, Tommi S. Jaakkola, Regina Barzilay, and Ila R Fiete. 2024. Improving protein optimization with smoothed fitness landscapes. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=rxlF2Zv8x0"},{"key":"e_1_3_2_2_23_1","volume-title":"Lin (Eds.)","volume":"33","author":"Kumar Aviral","year":"2020","unstructured":"Aviral Kumar and Sergey Levine. 2020. Model Inversion Networks for Model-Based Optimization. In Advances in Neural Information Processing Systems, H. Larochelle, M. Ranzato, R. Hadsell, M.F. Balcan, and H. Lin (Eds.), Vol. 33. Curran Associates, Inc., 5126--5137. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/373e4c5d8edfa8b74fd4b6791d0cf6dc-Paper.pdf"},{"key":"e_1_3_2_2_24_1","volume-title":"Hyunkyu Jung, Hyunjoo Ro, Meeyoung Cha, and Ho Min Kim.","author":"Lee Minji","year":"2023","unstructured":"Minji Lee, Luiz Felipe Vecchietti, Hyunkyu Jung, Hyunjoo Ro, Meeyoung Cha, and Ho Min Kim. 2023. Protein Sequence Design in a Latent Space via Model-based Reinforcement Learning. https:\/\/openreview.net\/forum?id=OhjGzRE5N6o"},{"key":"e_1_3_2_2_25_1","volume-title":"Robust Optimization in Protein Fitness Landscapes Using Reinforcement Learning in Latent Space. In Forty-first International Conference on Machine Learning. https:\/\/openreview.net\/forum?id=0zbxwvJqwf","author":"Lee Minji","year":"2024","unstructured":"Minji Lee, Luiz Felipe Vecchietti, Hyunkyu Jung, Hyun Joo Ro, Meeyoung Cha, and Ho Min Kim. 2024. Robust Optimization in Protein Fitness Landscapes Using Reinforcement Learning in Latent Space. In Forty-first International Conference on Machine Learning. https:\/\/openreview.net\/forum?id=0zbxwvJqwf"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.ade2574"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF01589116"},{"key":"e_1_3_2_2_28_1","volume-title":"Proceedings of the 40th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"24197","author":"Mashkaria Satvik Mehul","year":"2023","unstructured":"Satvik Mehul Mashkaria, Siddarth Krishnamoorthy, and Aditya Grover. 2023. Generative Pretraining for Black-Box Optimization. In Proceedings of the 40th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 202), Andreas Krause, Emma Brunskill, Kyunghyun Cho, Barbara Engelhardt, Sivan Sabato, and Jonathan Scarlett (Eds.). PMLR, 24173--24197. https:\/\/proceedings.mlr.press\/v202\/mashkaria23a.html"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1038\/225563a0"},{"key":"e_1_3_2_2_30_1","volume-title":"Levine (Eds.)","volume":"36","author":"Nguyen Tung","year":"2023","unstructured":"Tung Nguyen, Sudhanshu Agrawal, and Aditya Grover. 2023. ExPT: Synthetic Pretraining for Few-Shot Experimental Design. In Advances in Neural Information Processing Systems, A. Oh, T. Naumann, A. Globerson, K. Saenko, M. Hardt, and S. Levine (Eds.), Vol. 36. Curran Associates, Inc., 45856--45869. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/file\/8fab4407e1fe9006b39180525c0d323c-Paper-Conference.pdf"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.2c01046"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.76.036106"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1101\/2022.04.12.487986"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature17995"},{"key":"e_1_3_2_2_35_1","volume-title":"Kelsic","author":"Sinai Sam","year":"2020","unstructured":"Sam Sinai, Richard Wang, Alexander Whatley, Stewart Slocum, Elina Locane, and Eric D. Kelsic. 2020. AdaLead: A simple and robust adaptive greedy search algorithm for sequence design. CoRR abs\/2010.02141 (2020). arXiv:2010.02141 https:\/\/arxiv.org\/abs\/2010.02141"},{"key":"e_1_3_2_2_36_1","volume-title":"Proceedings of the 39th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"20478","author":"Stanton Samuel","year":"2022","unstructured":"Samuel Stanton,Wesley Maddox, Nate Gruver, Phillip Maffettone, Emily Delaney, Peyton Greenside, and Andrew Gordon Wilson. 2022. Accelerating Bayesian Optimization for Biological Sequence Design with Denoising Autoencoders. In Proceedings of the 39th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 162), Kamalika Chaudhuri, Stefanie Jegelka, Le Song, Csaba Szepesvari, Gang Niu, and Sivan Sabato (Eds.). PMLR, 20459--20478. https:\/\/proceedings.mlr.press\/v162\/stanton22a.html"},{"key":"e_1_3_2_2_37_1","volume-title":"Proceedings of the 39th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"21676","author":"Trabucco Brandon","year":"2022","unstructured":"Brandon Trabucco, Xinyang Geng,Aviral Kumar, and Sergey Levine. 2022. Design- Bench: Benchmarks for Data-Driven Offline Model-Based Optimization. In Proceedings of the 39th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 162), Kamalika Chaudhuri, Stefanie Jegelka, Le Song, Csaba Szepesvari, Gang Niu, and Sivan Sabato (Eds.). PMLR, 21658--21676. https:\/\/proceedings.mlr.press\/v162\/trabucco22a.html"},{"key":"e_1_3_2_2_38_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"10368","author":"Trabucco Brandon","year":"2021","unstructured":"Brandon Trabucco, Aviral Kumar, Xinyang Geng, and Sergey Levine. 2021. Conservative Objective Models for Effective Offline Model-Based Optimization. In Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, 10358--10368. https:\/\/proceedings.mlr.press\/v139\/trabucco21a.html"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1101\/2023.11.28.568945"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1021\/acs.chemrev.1c00260"},{"key":"e_1_3_2_2_41_1","unstructured":"James T. Wilson Riccardo Moriconi Frank Hutter and Marc Peter Deisenroth. 2017. The reparameterization trick for acquisition functions. arXiv:1712.00424 [stat.ML] https:\/\/arxiv.org\/abs\/1712.00424"},{"key":"e_1_3_2_2_42_1","volume-title":"Advances in Neural Information Processing Systems","author":"Zhou Dengyong","year":"2003","unstructured":"Dengyong Zhou, Olivier Bousquet, Thomas Lal, Jason Weston, and Bernhard Sch\u00f6lkopf. 2003. Learning with Local and Global Consistency. In Advances in Neural Information Processing Systems, S. Thrun, L. Saul, and B. Sch\u00f6lkopf (Eds.), Vol. 16. MIT Press. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2003\/file\/87682805257e619d49b8e0dfdc14affa-Paper.pdf"}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"],"location":"Toronto ON Canada","acronym":"KDD '25"},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709291","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3690624.3709291","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,16]],"date-time":"2025-08-16T15:46:44Z","timestamp":1755359204000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709291"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,20]]},"references-count":42,"alternative-id":["10.1145\/3690624.3709291","10.1145\/3690624"],"URL":"https:\/\/doi.org\/10.1145\/3690624.3709291","relation":{},"subject":[],"published":{"date-parts":[[2025,7,20]]},"assertion":[{"value":"2025-07-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}