{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T16:11:43Z","timestamp":1772899903957,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,2,20]],"date-time":"2024-02-20T00:00:00Z","timestamp":1708387200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,3,2]]},"DOI":"10.1145\/3627535.3638465","type":"proceedings-article","created":{"date-parts":[[2024,2,20]],"date-time":"2024-02-20T14:22:41Z","timestamp":1708438961000},"page":"417-430","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["FastFold: Optimizing AlphaFold Training and Inference on GPU Clusters"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7966-2941","authenticated-orcid":false,"given":"Shenggan","family":"Cheng","sequence":"first","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-4877-3115","authenticated-orcid":false,"given":"Xuanlei","family":"Zhao","sequence":"additional","affiliation":[{"name":"HPC-AI Tech, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8474-4516","authenticated-orcid":false,"given":"Guangyang","family":"Lu","sequence":"additional","affiliation":[{"name":"HPC-AI Tech, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6724-2763","authenticated-orcid":false,"given":"Jiarui","family":"Fang","sequence":"additional","affiliation":[{"name":"HPC-AI Tech, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6164-7778","authenticated-orcid":false,"given":"Tian","family":"Zheng","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6402-4717","authenticated-orcid":false,"given":"Ruidong","family":"Wu","sequence":"additional","affiliation":[{"name":"HeliXon, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7134-366X","authenticated-orcid":false,"given":"Xiwen","family":"Zhang","sequence":"additional","affiliation":[{"name":"HeliXon, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1736-2978","authenticated-orcid":false,"given":"Jian","family":"Peng","sequence":"additional","affiliation":[{"name":"HeliXon, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2816-4384","authenticated-orcid":false,"given":"Yang","family":"You","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}]}],"member":"320","published-online":{"date-parts":[[2024,2,20]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","unstructured":"Gustaf Ahdritz Nazim Bouatta Sachin Kadyan Qinghui Xia William Gerecke and Mohammed AlQuraishi. 2021. OpenFold. 10.5281\/zenodo.5709539","DOI":"10.5281\/zenodo.5709539"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.181.4096.223"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"e_1_3_2_1_4_1","volume-title":"Jue Wang, Qian Cong, Lisa N Kinch, R Dustin Schaeffer, et al.","author":"Baek Minkyung","year":"2021","unstructured":"Minkyung Baek, Frank DiMaio, Ivan Anishchenko, Justas Dauparas, Sergey Ovchinnikov, Gyu Rie Lee, Jue Wang, Qian Cong, Lisa N Kinch, R Dustin Schaeffer, et al. 2021. Accurate prediction of protein structures and interactions using a three-track neural network. Science 373, 6557 (2021), 871--876."},{"key":"e_1_3_2_1_5_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877--1901."},{"key":"e_1_3_2_1_6_1","volume-title":"Training deep nets with sublinear memory cost. arXiv preprint arXiv:1604.06174","author":"Chen Tianqi","year":"2016","unstructured":"Tianqi Chen, Bing Xu, Chiyuan Zhang, and Carlos Guestrin. 2016. Training deep nets with sublinear memory cost. arXiv preprint arXiv:1604.06174 (2016)."},{"key":"e_1_3_2_1_7_1","volume-title":"International Conference on Learning Representations","author":"Choromanski Krzysztof","year":"2021","unstructured":"Krzysztof Choromanski, Valerii Likhosherstov, David Dohan, Xingyou Song, Andreea Gane, Tamas Sarlos, Peter Hawkins, Jared Davis, Afroz Mohiuddin, Lukasz Kaiser, et al. 2021. Rethinking attention with performers. International Conference on Learning Representations (2021)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/N19-1423"},{"key":"e_1_3_2_1_9_1","volume-title":"International Conference on Learning Representations","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, et al. 2021. An image is worth 16x16 words: Transformers for image recognition at scale. International Conference on Learning Representations (2021)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441593"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441578"},{"key":"e_1_3_2_1_12_1","volume-title":"Matthew James Johnson, and Chris Leary","author":"Frostig Roy","year":"2018","unstructured":"Roy Frostig, Matthew James Johnson, and Chris Leary. 2018. Compiling machine learning programs via high-level tracing. Systems for Machine Learning (2018), 23--24."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1002\/pro.4653"},{"key":"e_1_3_2_1_14_1","volume-title":"Axial attention in multidimensional transformers. arXiv preprint arXiv:1912.12180","author":"Ho Jonathan","year":"2019","unstructured":"Jonathan Ho, Nal Kalchbrenner, Dirk Weissenborn, and Tim Salimans. 2019. Axial attention in multidimensional transformers. arXiv preprint arXiv:1912.12180 (2019)."},{"key":"e_1_3_2_1_15_1","volume-title":"Gpipe: Efficient training of giant neural networks using pipeline parallelism. Advances in neural information processing systems 32","author":"Huang Yanping","year":"2019","unstructured":"Yanping Huang, Youlong Cheng, Ankur Bapna, Orhan Firat, Dehao Chen, Mia Chen, HyoukJoong Lee, Jiquan Ngiam, Quoc V Le, Yonghui Wu, et al. 2019. Gpipe: Efficient training of giant neural networks using pipeline parallelism. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"John Jumper Richard Evans Alexander Pritzel Tim Green Michael Figurnov Olaf Ronneberger Kathryn Tunyasuvunakool Russ Bates Augustin \u017d\u00eddek Anna Potapenko et al. 2021. Highly accurate protein structure prediction with AlphaFold. Nature 596 7873 (2021) 583--589.","DOI":"10.1038\/s41586-021-03819-2"},{"key":"e_1_3_2_1_18_1","volume-title":"Nataraj Jammalamadaka, Jianyu Huang, Hector Yuen, et al.","author":"Kalamkar Dhiraj","year":"2019","unstructured":"Dhiraj Kalamkar, Dheevatsa Mudigere, Naveen Mellempudi, Dipankar Das, Kunal Banerjee, Sasikanth Avancha, Dharma Teja Vooturi, Nataraj Jammalamadaka, Jianyu Huang, Hector Yuen, et al. 2019. A study of BFLOAT16 for deep learning training. arXiv preprint arXiv:1905.12322 (2019)."},{"key":"e_1_3_2_1_19_1","volume-title":"International Conference on Learning Representations","author":"Kitaev Nikita","year":"2020","unstructured":"Nikita Kitaev, \u0141ukasz Kaiser, and Anselm Levskaya. 2020. Reformer: The efficient transformer. International Conference on Learning Representations (2020)."},{"key":"e_1_3_2_1_20_1","first-page":"500902","article-title":"Language models of protein sequences at the scale of evolution enable accurate structure prediction","volume":"2022","author":"Lin Zeming","year":"2022","unstructured":"Zeming Lin, Halil Akin, Roshan Rao, Brian Hie, Zhongkai Zhu, Wenting Lu, Allan dos Santos Costa, Maryam Fazel-Zarandi, Tom Sercu, Sal Candido, et al. 2022. Language models of protein sequences at the scale of evolution enable accurate structure prediction. BioRxiv 2022 (2022), 500902.","journal-title":"BioRxiv"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607073"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476209"},{"key":"e_1_3_2_1_23_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00024"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476205"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1101\/2021.02.12.430858"},{"key":"e_1_3_2_1_27_1","volume-title":"2021 USENIX Annual Technical Conference (USENIX ATC 21)","author":"Ren Jie","year":"2021","unstructured":"Jie Ren, Samyam Rajbhandari, Reza Yazdani Aminabadi, Olatunji Ruwase, Shuangyan Yang, Minjia Zhang, Dong Li, and Yuxiong He. 2021. {ZeRO-Offload}: Democratizing {Billion-Scale} model training. In 2021 USENIX Annual Technical Conference (USENIX ATC 21). 551--564."},{"key":"e_1_3_2_1_28_1","volume-title":"Alex Bridgland, et al.","author":"Senior Andrew W","year":"2020","unstructured":"Andrew W Senior, Richard Evans, John Jumper, James Kirkpatrick, Laurent Sifre, Tim Green, Chongli Qin, Augustin \u017d\u00eddek, Alexander WR Nelson, Alex Bridgland, et al. 2020. Improved protein structure prediction using potentials from deep learning. Nature 577, 7792 (2020), 706--710."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41591-021-01533-0"},{"key":"e_1_3_2_1_30_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_31_1","volume-title":"Linformer: Self-attention with linear complexity. arXiv preprint arXiv:2006.04768","author":"Wang Sinong","year":"2020","unstructured":"Sinong Wang, Belinda Z Li, Madian Khabsa, Han Fang, and Hao Ma. 2020. Linformer: Self-attention with linear complexity. arXiv preprint arXiv:2006.04768 (2020)."},{"key":"e_1_3_2_1_32_1","volume-title":"SC22: International Conference for High Performance Computing, Networking, Storage and Analysis. IEEE, 1--14","author":"Wang Xiaohui","year":"2022","unstructured":"Xiaohui Wang, Yang Wei, Ying Xiong, Guyue Huang, Xian Qian, Yufei Ding, Mingxuan Wang, and Lei Li. 2022. Lightseq2: Accelerated training for transformer-based models on gpus. In SC22: International Conference for High Performance Computing, Networking, Storage and Analysis. IEEE, 1--14."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-industry.15"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1821309116"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btq066"},{"key":"e_1_3_2_1_36_1","volume-title":"Large batch training of convolutional networks. arXiv preprint arXiv:1708.03888","author":"You Yang","year":"2017","unstructured":"Yang You, Igor Gitman, and Boris Ginsburg. 2017. Large batch training of convolutional networks. arXiv preprint arXiv:1708.03888 (2017)."},{"key":"e_1_3_2_1_37_1","volume-title":"International Conference on Learning Representations","author":"You Yang","year":"2020","unstructured":"Yang You, Jing Li, Sashank Reddi, Jonathan Hseu, Sanjiv Kumar, Srinadh Bhojanapalli, Xiaodan Song, James Demmel, Kurt Keutzer, and Cho-Jui Hsieh. 2020. Large batch optimization for deep learning: Training bert in 76 minutes. International Conference on Learning Representations (2020)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1002\/prot.20264"},{"key":"e_1_3_2_1_39_1","volume-title":"ParaFold: Paralleling AlphaFold for Large-Scale Predictions. In International Conference on High Performance Computing in Asia-Pacific Region Workshops. 1--9.","author":"Zhong Bozitao","year":"2022","unstructured":"Bozitao Zhong, Xiaoming Su, Minhua Wen, Sicheng Zuo, Liang Hong, and James Lin. 2022. ParaFold: Paralleling AlphaFold for Large-Scale Predictions. In International Conference on High Performance Computing in Asia-Pacific Region Workshops. 1--9."}],"event":{"name":"PPoPP '24: 29th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming","location":"Edinburgh United Kingdom","acronym":"PPoPP '24","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","SIGPLAN ACM Special Interest Group on Programming Languages"]},"container-title":["Proceedings of the 29th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627535.3638465","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627535.3638465","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:49:26Z","timestamp":1750182566000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627535.3638465"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,20]]},"references-count":39,"alternative-id":["10.1145\/3627535.3638465","10.1145\/3627535"],"URL":"https:\/\/doi.org\/10.1145\/3627535.3638465","relation":{},"subject":[],"published":{"date-parts":[[2024,2,20]]},"assertion":[{"value":"2024-02-20","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}