{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T07:55:10Z","timestamp":1776930910302,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3731599.3767345","type":"proceedings-article","created":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T16:13:44Z","timestamp":1762532024000},"page":"45-51","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Compute4Biology: Taking Stock of High Performance Computing Needs for Foundation Models in Biological Sciences"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1579-8946","authenticated-orcid":false,"given":"Pratik","family":"Dutta","sequence":"first","affiliation":[{"name":"Stony Brook University, Stony Brook, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2358-522X","authenticated-orcid":false,"given":"Tirthankar","family":"Ghosal","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,11,15]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Elliot Bolton Abhinav Venigalla Michihiro Yasunaga David Hall Betty Xiong Tony Lee Roxana Daneshjou Jonathan Frankle Percy Liang Michael Carbin et\u00a0al. 2024. Biomedlm: A 2.7 b parameter language model trained on biomedical text. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.18421 (2024)."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"crossref","unstructured":"Nadav Brandes Dan Ofer Yam Peleg Nadav Rappoport and Michal Linial. 2022. ProteinBERT: a universal deep-learning model of protein sequence and function. Bioinformatics 38 8 (2022) 2102\u20132110.","DOI":"10.1093\/bioinformatics\/btac020"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"Jiayang Chen Zhihang Hu Siqi Sun Qingxiong Tan Yixuan Wang Qinze Yu Licheng Zong Liang Hong Jin Xiao Tao Shen et\u00a0al. 2022. Interpretable RNA foundation model from unannotated data for highly accurate RNA structure and function predictions. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2204.00300 (2022).","DOI":"10.1101\/2022.08.06.503062"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Ken Chen Yue Zhou Maolin Ding Yu Wang Zhixiang Ren and Yuedong Yang. 2023. Self-supervised learning on millions of pre-mRNA sequences improves sequence-based RNA splicing prediction. BioRxiv (2023) 2023\u201301.","DOI":"10.1101\/2023.01.31.526427"},{"key":"e_1_3_3_1_6_2","unstructured":"Seyone Chithrananda Gabriel Grand and Bharath Ramsundar. 2020. ChemBERTa: large-scale self-supervised pretraining for molecular property prediction. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2010.09885 (2020)."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"crossref","unstructured":"Haotian Cui Chloe Wang Hassaan Maan Kuan Pang Fengning Luo Nan Duan and Bo Wang. 2024. scGPT: toward building a foundation model for single-cell multi-omics using generative AI. Nature methods 21 8 (2024) 1470\u20131480.","DOI":"10.1038\/s41592-024-02201-0"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Hugo Dalla-Torre Liam Gonzalez Javier Mendoza-Revilla Nicolas Lopez\u00a0Carranza Adam\u00a0Henryk Grzywaczewski Francesco Oteri Christian Dallago Evan Trop Bernardo\u00a0P de Almeida Hassan Sirelkhatim et\u00a0al. 2025. Nucleotide Transformer: building and evaluating robust foundation models for human genomics. Nature Methods 22 2 (2025) 287\u2013297.","DOI":"10.1038\/s41592-024-02523-z"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Ahmed Elnaggar Michael Heinzinger Christian Dallago Ghalia Rehawi Yu Wang Llion Jones Tom Gibbs Tamas Feher Christoph Angerer Martin Steinegger et\u00a0al. 2021. Prottrans: Toward understanding the language of life through self-supervised learning. IEEE transactions on pattern analysis and machine intelligence 44 10 (2021) 7112\u20137127.","DOI":"10.1109\/TPAMI.2021.3095381"},{"key":"e_1_3_3_1_10_2","first-page":"137","volume-title":"International Conference of Cloud Computing Technologies and Applications","author":"Ettifouri Imane","year":"2023","unstructured":"Imane Ettifouri, Mostapha Zbakh, and Claude Tadonki. 2023. The Need for HPC in AI Solutions. In International Conference of Cloud Computing Technologies and Applications. Springer, 137\u2013159."},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Fei Guo Renchu Guan Yaohang Li Qi Liu Xiaowo Wang Can Yang and Jianxin Wang. 2025. Foundation models in bioinformatics. National science review 12 4 (2025) nwaf028.","DOI":"10.1093\/nsr\/nwaf028"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Yanrong Ji Zhihan Zhou Han Liu and Ramana\u00a0V Davuluri. 2021. DNABERT: pre-trained Bidirectional Encoder Representations from Transformers model for DNA-language in genome. Bioinformatics 37 15 (2021) 2112\u20132120.","DOI":"10.1093\/bioinformatics\/btab083"},{"key":"e_1_3_3_1_13_2","unstructured":"John Jumper Richard Evans Alexander Pritzel Tim Green Michael Figurnov Kathryn Tunyasuvunakool Olaf Ronneberger Russ Bates Augustin \u017d\u00eddek Alex Bridgland et\u00a0al. 2020. AlphaFold 2. Fourteenth Critical Assessment of Techniques for Protein Structure Prediction (2020) 13."},{"key":"e_1_3_3_1_14_2","unstructured":"Colin\u00a0Hall Kalicki and Esin\u00a0Darici Haritaoglu. 2020. RNAbert: RNA family classification and secondary structure prediction with BERT pretrained on RNA sequences."},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Jinhyuk Lee Wonjin Yoon Sungdong Kim Donghyeon Kim Sunkyu Kim Chan\u00a0Ho So and Jaewoo Kang. 2020. BioBERT: a pre-trained biomedical language representation model for biomedical text mining. Bioinformatics 36 4 (2020) 1234\u20131240.","DOI":"10.1093\/bioinformatics\/btz682"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"crossref","unstructured":"Zeming Lin Halil Akin Roshan Rao Brian Hie Zhongkai Zhu Wenting Lu Nikita Smetanin Robert Verkuil Ori Kabeli Yaniv Shmueli et\u00a0al. 2023. Evolutionary-scale prediction of atomic-level protein structure with a language model. Science 379 6637 (2023) 1123\u20131130.","DOI":"10.1126\/science.ade2574"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-13829-4_13"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"crossref","unstructured":"Renqian Luo Liai Sun Yingce Xia Tao Qin Sheng Zhang Hoifung Poon and Tie-Yan Liu. 2022. BioGPT: generative pre-trained transformer for biomedical text generation and mining. Briefings in bioinformatics 23 6 (2022) bbac409.","DOI":"10.1093\/bib\/bbac409"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"crossref","unstructured":"Joshua Meier Roshan Rao Robert Verkuil Jason Liu Tom Sercu and Alex Rives. 2021. Language models enable zero-shot prediction of the effects of mutations on protein function. Advances in neural information processing systems 34 (2021) 29287\u201329303.","DOI":"10.1101\/2021.07.09.450648"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"Eric Nguyen Michael Poli Matthew\u00a0G Durrant Brian Kang Dhruva Katrekar David\u00a0B Li Liam\u00a0J Bartie Armin\u00a0W Thomas Samuel\u00a0H King Garyk Brixi et\u00a0al. 2024. Sequence modeling and design from molecular to genome scale with Evo. Science 386 6723 (2024) eado9336.","DOI":"10.1126\/science.ado9336"},{"key":"e_1_3_3_1_21_2","unstructured":"Eric Nguyen Michael Poli Marjan Faizi Armin Thomas Michael Wornow Callum Birch-Sykes Stefano Massaroli Aman Patel Clayton Rabideau Yoshua Bengio et\u00a0al. 2023. Hyenadna: Long-range genomic sequence modeling at single nucleotide resolution. Advances in neural information processing systems 36 (2023) 43177\u201343201."},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"crossref","unstructured":"Alexander Rives Joshua Meier Tom Sercu Siddharth Goyal Zeming Lin Jason Liu Demi Guo Myle Ott C\u00a0Lawrence Zitnick Jerry Ma et\u00a0al. 2021. Biological structure and function emerge from scaling unsupervised learning to 250 million protein sequences. Proceedings of the National Academy of Sciences 118 15 (2021) e2016239118.","DOI":"10.1073\/pnas.2016239118"},{"key":"e_1_3_3_1_23_2","unstructured":"Yu Rong Yatao Bian Tingyang Xu Weiyang Xie Ying Wei Wenbing Huang and Junzhou Huang. 2020. Grover: Self-supervised message passing transformer on large-scale molecular data. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2007.02835 2 3 (2020) 17."},{"key":"e_1_3_3_1_24_2","unstructured":"Hoo-Chang Shin Yang Zhang Evelina Bakhturina Raul Puri Mostofa Patwary Mohammad Shoeybi and Raghav Mani. 2020. BioMegatron: larger biomedical domain language model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2010.06060 (2020)."},{"key":"e_1_3_3_1_25_2","unstructured":"Ross Taylor Marcin Kardas Guillem Cucurull Thomas Scialom Anthony Hartshorn Elvis Saravia Andrew Poulton Viktor Kerkez and Robert Stojnic. 2022. Galactica: A large language model for science. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2211.09085 (2022)."},{"key":"e_1_3_3_1_26_2","unstructured":"Ricardo Vinuesa Jean Rabault Hossein Azizpour Stefan Bauer Bingni\u00a0W Brunton Arne Elofsson Elias Jarlebring Hedvig Kjellstrom Stefano Markidis David Marlevi et\u00a0al. 2024. Opportunities for machine learning in scientific discovery. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.04161 (2024)."},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"crossref","unstructured":"Yuyang Wang Jianren Wang Zhonglin Cao and Amir Barati\u00a0Farimani. 2022. Molecular contrastive learning of representations via graph neural networks. Nature Machine Intelligence 4 3 (2022) 279\u2013287.","DOI":"10.1038\/s42256-022-00447-x"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i4.25662"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"crossref","unstructured":"Fan Yang Wenchuan Wang Fang Wang Yuan Fang Duyu Tang Junzhou Huang Hui Lu and Jianhua Yao. 2022. scBERT as a large-scale pretrained deep language model for cell type annotation of single-cell RNA-seq data. Nature Machine Intelligence 4 10 (2022) 852\u2013866.","DOI":"10.1038\/s42256-022-00534-z"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"crossref","unstructured":"Daoan Zhang Weitong Zhang Yu Zhao Jianguo Zhang Bing He Chenchen Qin and Jianhua Yao. 2023. DNAGPT: A generalized pre-trained tool for versatile DNA sequence analysis tasks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.05628 (2023).","DOI":"10.1101\/2023.07.11.548628"},{"key":"e_1_3_3_1_31_2","unstructured":"Zhihan Zhou Yanrong Ji Weijian Li Pratik Dutta Ramana Davuluri and Han Liu. 2023. Dnabert-2: Efficient foundation model and benchmark for multi-species genome. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2306.15006 (2023)."},{"key":"e_1_3_3_1_32_2","unstructured":"Zhihan Zhou Weimin Wu Harrison Ho Jiayi Wang Lizhen Shi Ramana\u00a0V Davuluri Zhong Wang and Han Liu. 2024. DNABERT-S: Pioneering species differentiation with species-aware DNA embeddings. ArXiv (2024) arXiv\u20132402."}],"event":{"name":"SC Workshops '25: Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St Louis MO USA","acronym":"SC Workshops '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731599.3767345","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T19:28:48Z","timestamp":1767986928000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731599.3767345"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,15]]},"references-count":31,"alternative-id":["10.1145\/3731599.3767345","10.1145\/3731599"],"URL":"https:\/\/doi.org\/10.1145\/3731599.3767345","relation":{},"subject":[],"published":{"date-parts":[[2025,11,15]]},"assertion":[{"value":"2025-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}