{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T06:30:41Z","timestamp":1775802641006,"version":"3.50.1"},"reference-count":95,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T00:00:00Z","timestamp":1750636800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T00:00:00Z","timestamp":1750636800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Sci. China Inf. Sci."],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1007\/s11432-024-4457-2","type":"journal-article","created":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T04:34:14Z","timestamp":1750826054000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Molecular pretraining models towards molecular property prediction"],"prefix":"10.1007","volume":"68","author":[{"given":"Jianbo","family":"Qiao","sequence":"first","affiliation":[]},{"given":"Wenjia","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Junru","family":"Jin","sequence":"additional","affiliation":[]},{"given":"Ding","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Xu","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Balachandran","family":"Manavalan","sequence":"additional","affiliation":[]},{"given":"Leyi","family":"Wei","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,23]]},"reference":[{"key":"4457_CR1","unstructured":"Battaglia P W, Hamrick J B, Bapst V, et al. Relational inductive biases, deep learning, and graph networks. 2018. ArXiv:1806.01261"},{"key":"4457_CR2","doi-asserted-by":"publisher","first-page":"btad059","DOI":"10.1093\/bioinformatics\/btad059","volume":"39","author":"L Chen","year":"2023","unstructured":"Chen L, Yu L, Gao L, et al. Potent antibiotic design via guided search from antibacterial activity evaluations. Bioinformatics, 2023, 39: btad059","journal-title":"Bioinformatics"},{"key":"4457_CR3","doi-asserted-by":"publisher","first-page":"212106","DOI":"10.1007\/s11432-024-4171-9","volume":"67","author":"Y Z Wang","year":"2024","unstructured":"Wang Y Z, Zhai Y X, Ding Y J, et al. SBSM-Pro: support bio-sequence machine for proteins. Sci China Inf Sci, 2024, 67: 212106","journal-title":"Sci China Inf Sci"},{"key":"4457_CR4","doi-asserted-by":"publisher","first-page":"205","DOI":"10.1261\/rna.069112.118","volume":"25","author":"Q Zou","year":"2019","unstructured":"Zou Q, Xing P, Wei L, et al. Gene2vec: gene subsequence embedding for prediction of mammalian N6-methyladenosine sites from mRNA. RNA, 2019, 25: 205\u2013218","journal-title":"RNA"},{"key":"4457_CR5","doi-asserted-by":"publisher","first-page":"1291352","DOI":"10.3389\/fmed.2023.1291352","volume":"10","author":"H Zulfiqar","year":"2024","unstructured":"Zulfiqar H, Guo Z, Ahmad R M, et al. Deep-STP: a deep learning-based approach to predict snake toxin proteins by using word embeddings. Front Med, 2024, 10: 1291352","journal-title":"Front Med"},{"key":"4457_CR6","doi-asserted-by":"publisher","first-page":"e1012229","DOI":"10.1371\/journal.pcbi.1012229","volume":"20","author":"C Ai","year":"2024","unstructured":"Ai C, Yang H, Liu X, et al. MTMol-GPT: de novo multi-target molecular generation with transformer-based generative adversarial imitation learning. PLoS Comput Biol, 2024, 20: e1012229","journal-title":"PLoS Comput Biol"},{"key":"4457_CR7","doi-asserted-by":"publisher","first-page":"1784","DOI":"10.1016\/j.drudis.2018.06.016","volume":"23","author":"F Ghasemi","year":"2018","unstructured":"Ghasemi F, Mehridehnavi A, P\u00e9rez-Garrido A, et al. Neural network and deep-learning algorithms used in QSAR studies: merits and drawbacks. Drug Discov Today, 2018, 23: 1784\u20131790","journal-title":"Drug Discov Today"},{"key":"4457_CR8","doi-asserted-by":"publisher","first-page":"107261","DOI":"10.1016\/j.compbiomed.2023.107261","volume":"164","author":"Z Huang","year":"2023","unstructured":"Huang Z, Chen S, Yu L. Predicting new drug indications based on double variational autoencoders. Comput Biol Med, 2023, 164: 107261","journal-title":"Comput Biol Med"},{"key":"4457_CR9","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1186\/s12915-024-01968-0","volume":"22","author":"H Zhu","year":"2024","unstructured":"Zhu H, Hao H, Yu L. Identification of microbe-disease signed associations via multi-scale variational graph autoencoder based on signed message propagation. BMC Biol, 2024, 22: 172","journal-title":"BMC Biol"},{"key":"4457_CR10","doi-asserted-by":"crossref","unstructured":"Joshi M, Singh B K. Deep learning techniques for brain lesion classification using various MRI (from 2010 to 2022): review and challenges. Medinformatics, 2024. https:\/\/ojs.bonviewpress.com\/index.php\/MEDIN\/article\/view\/1686","DOI":"10.47852\/bonviewMEDIN42021686"},{"key":"4457_CR11","doi-asserted-by":"publisher","first-page":"3033","DOI":"10.1109\/TCBB.2023.3274587","volume":"20","author":"C Ai","year":"2023","unstructured":"Ai C, Yang H, Ding Y, et al. Low rank matrix factorization algorithm based on multi-graph regularization for detecting drug-disease association. IEEE ACM Trans Comput Biol Bioinf, 2023, 20: 3033\u20133043","journal-title":"IEEE ACM Trans Comput Biol Bioinf"},{"key":"4457_CR12","doi-asserted-by":"publisher","first-page":"7538","DOI":"10.1038\/s41467-024-51933-2","volume":"15","author":"T Li","year":"2024","unstructured":"Li T, Ren X, Luo X, et al. A foundation model identifies broad-spectrum antimicrobial peptides against drug-resistant bacterial infection. Nat Commun, 2024, 15: 7538","journal-title":"Nat Commun"},{"key":"4457_CR13","doi-asserted-by":"publisher","first-page":"bbad371","DOI":"10.1093\/bib\/bbad371","volume":"24","author":"W Tao","year":"2023","unstructured":"Tao W, Liu Y, Lin X, et al. Prediction of multi-relational drug-gene interaction via dynamic hypergraph contrastive learning. Brief BioInf, 2023, 24: bbad371","journal-title":"Brief BioInf"},{"key":"4457_CR14","doi-asserted-by":"publisher","first-page":"122498","DOI":"10.1016\/j.eswa.2023.122498","volume":"240","author":"M Liu","year":"2024","unstructured":"Liu M, Li C, Chen R, et al. Geometric deep learning for drug discovery. Expert Syst Appl, 2024, 240: 122498","journal-title":"Expert Syst Appl"},{"key":"4457_CR15","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1186\/s12915-025-02166-2","volume":"23","author":"Z Wang","year":"2025","unstructured":"Wang Z, Chen Y, Shang Y, et al. MultiCycPermea: accurate and interpretable prediction of cyclic peptide permeability using a multimodal image-sequence model. BMC Biol, 2025, 23: 63","journal-title":"BMC Biol"},{"key":"4457_CR16","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1021\/ci00057a005","volume":"28","author":"D Weininger","year":"1988","unstructured":"Weininger D. SMILES, a chemical language and information system. 1. Introduction to methodology and encoding rules. J Chem Inf Comput Sci, 1988, 28: 31\u201336","journal-title":"J Chem Inf Comput Sci"},{"key":"4457_CR17","doi-asserted-by":"publisher","first-page":"1749","DOI":"10.1007\/s40262-022-01180-9","volume":"61","author":"Y Yang","year":"2022","unstructured":"Yang Y, Gao D, Xie X, et al. DeepIDC: a prediction framework of injectable drug combination based on heterogeneous information and deep learning. Clin Pharmacokinet, 2022, 61: 1749\u20131759","journal-title":"Clin Pharmacokinet"},{"key":"4457_CR18","doi-asserted-by":"publisher","first-page":"1004","DOI":"10.1038\/s42256-022-00557-6","volume":"4","author":"X Zeng","year":"2022","unstructured":"Zeng X, Xiang H, Yu L, et al. Accurate prediction of molecular properties and drug targets using a self-supervised image representation learning framework. Nat Mach Intell, 2022, 4: 1004\u20131016","journal-title":"Nat Mach Intell"},{"key":"4457_CR19","doi-asserted-by":"publisher","first-page":"244","DOI":"10.1038\/s42256-025-00982-3","volume":"7","author":"Z Wang","year":"2025","unstructured":"Wang Z, Chen Y, Ma P, et al. Image-based generation for molecule design with SketchMol. Nat Mach Intell, 2025, 7: 244\u2013255","journal-title":"Nat Mach Intell"},{"key":"4457_CR20","doi-asserted-by":"publisher","first-page":"9696","DOI":"10.1038\/s41467-024-53742-z","volume":"15","author":"H Xiang","year":"2024","unstructured":"Xiang H, Zeng L, Hou L, et al. A molecular video-derived foundation model for scientific drug discovery. Nat Commun, 2024, 15: 9696","journal-title":"Nat Commun"},{"key":"4457_CR21","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1109\/TNNLS.2020.2978386","volume":"32","author":"Z Wu","year":"2020","unstructured":"Wu Z, Pan S, Chen F, et al. A comprehensive survey on graph neural networks. IEEE Trans Neural Netw Learn Syst, 2020, 32: 4\u201324","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"4457_CR22","unstructured":"Yang X, Zhao X, Shen Z. A generalizable anomaly detection method in dynamic graphs. 2024. ArXiv:2412.16447"},{"key":"4457_CR23","first-page":"15870","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"Z Zhang","year":"2021","unstructured":"Zhang Z, Liu Q, Wang H, et al. Motif-based graph self-supervised learning for molecular property prediction. In: Proceedings of Advances in Neural Information Processing Systems, 2021. 15870\u201315882"},{"key":"4457_CR24","doi-asserted-by":"publisher","first-page":"bbab109","DOI":"10.1093\/bib\/bbab109","volume":"22","author":"P Li","year":"2021","unstructured":"Li P, Wang J, Qiao Y, et al. An effective self-supervised framework for learning expressive molecular global representations to drug discovery. Brief BioInf, 2021, 22: bbab109","journal-title":"Brief BioInf"},{"key":"4457_CR25","doi-asserted-by":"publisher","first-page":"045024","DOI":"10.1088\/2632-2153\/aba947","volume":"1","author":"M Krenn","year":"2020","unstructured":"Krenn M, H\u00e4se F, Nigam A K, et al. Self-referencing embedded strings (selfies): a 100% robust molecular string representation. Mach Learn Sci Technol, 2020, 1: 045024","journal-title":"Mach Learn Sci Technol"},{"key":"4457_CR26","doi-asserted-by":"publisher","first-page":"268","DOI":"10.1093\/bioinformatics\/btn181","volume":"24","author":"R Klinger","year":"2008","unstructured":"Klinger R, Kol\u00e1\u0159ik C, Fluck J, et al. Detection of IUPAC and IUPAC-like chemical names. Bioinformatics, 2008, 24: 268\u2013276","journal-title":"Bioinformatics"},{"key":"4457_CR27","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"D K Duvenaud","year":"2015","unstructured":"Duvenaud D K, Maclaurin D, Iparraguirre J, et al. Convolutional networks on graphs for learning molecular fingerprints. In: Proceedings of Advances in Neural Information Processing Systems, 2015"},{"key":"4457_CR28","doi-asserted-by":"publisher","first-page":"e127","DOI":"10.1093\/nar\/gkz740","volume":"47","author":"B Liu","year":"2019","unstructured":"Liu B, Gao X, Zhang H. BioSeq-Analysis2.0: an updated platform for analyzing DNA, RNA and protein sequences at sequence level and residue level based on machine learning approaches. Nucleic Acids Res, 2019, 47: e127","journal-title":"Nucleic Acids Res"},{"key":"4457_CR29","doi-asserted-by":"publisher","first-page":"e129","DOI":"10.1093\/nar\/gkab829","volume":"49","author":"H L Li","year":"2021","unstructured":"Li H L, Pang Y H, Liu B. BioSeq-BLM: a platform for analyzing DNA, RNA and protein sequences based on biological language models. Nucleic Acids Res, 2021, 49: e129","journal-title":"Nucleic Acids Res"},{"key":"4457_CR30","doi-asserted-by":"publisher","first-page":"4960","DOI":"10.1021\/acs.jcim.3c00564","volume":"63","author":"X W Liu","year":"2023","unstructured":"Liu X W, Shi T Y, Gao D, et al. iPADD: a computational tool for predicting potential antidiabetic drugs using machine learning algorithms. J Chem Inf Model, 2023, 63: 4960\u20134969","journal-title":"J Chem Inf Model"},{"key":"4457_CR31","doi-asserted-by":"publisher","first-page":"1273","DOI":"10.1021\/ci010132r","volume":"42","author":"J L Durant","year":"2002","unstructured":"Durant J L, Leland B A, Henry D R, et al. Reoptimization of MDL keys for use in drug discovery. J Chem Inf Comput Sci, 2002, 42: 1273\u20131280","journal-title":"J Chem Inf Comput Sci"},{"key":"4457_CR32","doi-asserted-by":"publisher","first-page":"742","DOI":"10.1021\/ci100050t","volume":"50","author":"D Rogers","year":"2010","unstructured":"Rogers D, Hahn M. Extended-connectivity fingerprints. J Chem Inf Model, 2010, 50: 742\u2013754","journal-title":"J Chem Inf Model"},{"key":"4457_CR33","unstructured":"Xia J, Zhu Y, Du Y, et al. A systematic survey of molecular pre-trained models. 2022. ArXiv:2210.16484"},{"key":"4457_CR34","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1038\/s42256-021-00438-4","volume":"4","author":"X Fang","year":"2022","unstructured":"Fang X, Liu L, Lei J, et al. Geometry-enhanced molecular representation learning for property prediction. Nat Mach Intell, 2022, 4: 127\u2013134","journal-title":"Nat Mach Intell"},{"key":"4457_CR35","doi-asserted-by":"publisher","first-page":"D1102","DOI":"10.1093\/nar\/gky1033","volume":"47","author":"S Kim","year":"2019","unstructured":"Kim S, Chen J, Cheng T, et al. PubChem 2019 update: improved access to chemical data. Nucleic Acids Res, 2019, 47: D1102\u2013D1109","journal-title":"Nucleic Acids Res"},{"key":"4457_CR36","doi-asserted-by":"publisher","first-page":"2324","DOI":"10.1021\/acs.jcim.5b00559","volume":"55","author":"T Sterling","year":"2015","unstructured":"Sterling T, Irwin J J. ZINC 15\u2014ligand discovery for everyone. J Chem Inf Model, 2015, 55: 2324\u20132337","journal-title":"J Chem Inf Model"},{"key":"4457_CR37","doi-asserted-by":"publisher","first-page":"D945","DOI":"10.1093\/nar\/gkw1074","volume":"45","author":"A Gaulton","year":"2017","unstructured":"Gaulton A, Hersey A, Nowotka M, et al. The ChEMBL database in 2017. Nucleic Acids Res, 2017, 45: D945\u2013D954","journal-title":"Nucleic Acids Res"},{"key":"4457_CR38","unstructured":"Hu W, Fey M, Ren H, et al. OGB-LSC: a large-scale challenge for machine learning on graphs. 2021. ArXiv:2103.09430"},{"key":"4457_CR39","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1038\/s41597-022-01288-4","volume":"9","author":"S Axelrod","year":"2022","unstructured":"Axelrod S, G\u00f3mez-Bombarelli R. GEOM, energy-annotated molecular conformations for property prediction and molecular generation. Sci Data, 2022, 9: 185","journal-title":"Sci Data"},{"key":"4457_CR40","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1038\/s41597-022-01390-7","volume":"9","author":"C Isert","year":"2022","unstructured":"Isert C, Atz K, Jim\u00e9nez-Luna J, et al. QMugs, quantum mechanical properties of drug-like molecules. Sci Data, 2022, 9: 273","journal-title":"Sci Data"},{"key":"4457_CR41","unstructured":"Xu Z, Luo Y, Zhang X, et al. Molecule3D: a benchmark for predicting 3D geometries from molecular graphs. 2021. ArXiv:2110.01717"},{"key":"4457_CR42","doi-asserted-by":"publisher","first-page":"513","DOI":"10.1039\/C7SC02664A","volume":"9","author":"Z Wu","year":"2018","unstructured":"Wu Z, Ramsundar B, Feinberg E N, et al. MoleculeNet: a benchmark for molecular machine learning. Chem Sci, 2018, 9: 513\u2013530","journal-title":"Chem Sci"},{"key":"4457_CR43","doi-asserted-by":"publisher","first-page":"1145","DOI":"10.1016\/S0031-3203(96)00142-2","volume":"30","author":"A P Bradley","year":"1997","unstructured":"Bradley A P. The use of the area under the ROC curve in the evaluation of machine learning algorithms. Pattern Recogn, 1997, 30: 1145\u20131159","journal-title":"Pattern Recogn"},{"key":"4457_CR44","doi-asserted-by":"publisher","first-page":"1247","DOI":"10.5194\/gmd-7-1247-2014","volume":"7","author":"T Chai","year":"2014","unstructured":"Chai T, Draxler R R. Root mean square error (RMSE) or mean absolute error (MAE)? \u2014 Arguments against avoiding RMSE in the literature. Geosci Model Dev, 2014, 7: 1247\u20131250","journal-title":"Geosci Model Dev"},{"key":"4457_CR45","doi-asserted-by":"publisher","first-page":"1936","DOI":"10.1021\/acs.jcim.6b00290","volume":"56","author":"G Subramanian","year":"2016","unstructured":"Subramanian G, Ramsundar B, Pande V, et al. Computational modeling of \u03b2-secretase 1 (BACE-1) inhibitors using ligand based approaches. J Chem Inf Model, 2016, 56: 1936\u20131949","journal-title":"J Chem Inf Model"},{"key":"4457_CR46","doi-asserted-by":"publisher","first-page":"1686","DOI":"10.1021\/ci300124c","volume":"52","author":"I F Martins","year":"2012","unstructured":"Martins I F, Teixeira A L, Pinheiro L, et al. A Bayesian approach to in silico blood-brain barrier penetration modeling. J Chem Inf Model, 2012, 52: 1686\u20131697","journal-title":"J Chem Inf Model"},{"key":"4457_CR47","doi-asserted-by":"publisher","first-page":"1294","DOI":"10.1016\/j.chembiol.2016.07.023","volume":"23","author":"K M Gayvert","year":"2016","unstructured":"Gayvert K M, Madhukar N S, Elemento O. A data-driven approach to predicting successes and failures of clinical trials. Cell Chem Biol, 2016, 23: 1294\u20131301","journal-title":"Cell Chem Biol"},{"key":"4457_CR48","doi-asserted-by":"publisher","first-page":"85","DOI":"10.3389\/fenvs.2015.00085","volume":"3","author":"R Huang","year":"2016","unstructured":"Huang R, Xia M, Nguyen D T, et al. Tox21Challenge to build predictive models of nuclear receptor and stress response pathways as mediated by exposure to environmental chemicals and drugs. Front Environ Sci, 2016, 3: 85","journal-title":"Front Environ Sci"},{"key":"4457_CR49","doi-asserted-by":"publisher","first-page":"1225","DOI":"10.1021\/acs.chemrestox.6b00135","volume":"29","author":"A M Richard","year":"2016","unstructured":"Richard A M, Judson R S, Houck K A, et al. ToxCast chemical landscape: paving the road to 21st century toxicology. Chem Res Toxicol, 2016, 29: 1225\u20131251","journal-title":"Chem Res Toxicol"},{"key":"4457_CR50","doi-asserted-by":"publisher","first-page":"1075","DOI":"10.1093\/nar\/gkv1075","volume":"44","author":"M Kuhn","year":"2016","unstructured":"Kuhn M, Letunic I, Jensen L J, et al. The SIDER database of drugs and side effects. Nucleic Acids Res, 2016, 44: 1075\u20131079","journal-title":"Nucleic Acids Res"},{"key":"4457_CR51","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1021\/ci8002649","volume":"49","author":"S G Rohrer","year":"2009","unstructured":"Rohrer S G, Baumann K. Maximum unbiased validation (MUV) data sets for virtual screening based on PubChem bioactivity data. J Chem Inf Model, 2009, 49: 169\u2013184","journal-title":"J Chem Inf Model"},{"key":"4457_CR52","first-page":"287","volume-title":"Proceedings of Joint IAPR International Workshops on Statistical Techniques in Pattern Recognition (SPR) and Structural and Syntactic Pattern Recognition (SSPR)","author":"K Riesen","year":"2008","unstructured":"Riesen K, Bunke H. IAM graph database repository for graph based pattern recognition and machine learning. In: Proceedings of Joint IAPR International Workshops on Statistical Techniques in Pattern Recognition (SPR) and Structural and Syntactic Pattern Recognition (SSPR), 2008. 287\u2013297"},{"key":"4457_CR53","doi-asserted-by":"publisher","first-page":"711","DOI":"10.1007\/s10822-014-9747-x","volume":"28","author":"D L Mobley","year":"2014","unstructured":"Mobley D L, Guthrie J P. FreeSolv: a database of experimental and calculated hydration free energies, with input files. J Comput Aided Mol Des, 2014, 28: 711\u2013720","journal-title":"J Comput Aided Mol Des"},{"key":"4457_CR54","doi-asserted-by":"publisher","first-page":"1000","DOI":"10.1021\/ci034243x","volume":"44","author":"J S Delaney","year":"2004","unstructured":"Delaney J S. ESOL: estimating aqueous solubility directly from molecular structure. J Chem Inf Comput Sci, 2004, 44: 1000\u20131005","journal-title":"J Chem Inf Comput Sci"},{"key":"4457_CR55","doi-asserted-by":"publisher","first-page":"D1100","DOI":"10.1093\/nar\/gkr777","volume":"40","author":"A Gaulton","year":"2012","unstructured":"Gaulton A, Bellis L J, Bento A P, et al. ChEMBL: a large-scale bioactivity database for drug discovery. Nucleic Acids Res, 2012, 40: D1100\u2013D1107","journal-title":"Nucleic Acids Res"},{"key":"4457_CR56","doi-asserted-by":"publisher","first-page":"8732","DOI":"10.1021\/ja902302h","volume":"131","author":"L C Blum","year":"2009","unstructured":"Blum L C, Reymond J L. 970 million druglike small molecules for virtual screening in the chemical universe database GDB-13. J Am Chem Soc, 2009, 131: 8732\u20138733","journal-title":"J Am Chem Soc"},{"key":"4457_CR57","doi-asserted-by":"publisher","first-page":"084111","DOI":"10.1063\/1.4928757","volume":"143","author":"R Ramakrishnan","year":"2015","unstructured":"Ramakrishnan R, Hartmann M, Tapavicza E, et al. Electronic spectra from TDDFT and machine learning in chemical space. J Chem Phys, 2015, 143: 084111","journal-title":"J Chem Phys"},{"key":"4457_CR58","doi-asserted-by":"publisher","first-page":"2864","DOI":"10.1021\/ci300415d","volume":"52","author":"L Ruddigkeit","year":"2012","unstructured":"Ruddigkeit L, van Deursen R, Blum L C, et al. Enumeration of 166 billion organic small molecules in the chemical universe database GDB-17. J Chem Inf Model, 2012, 52: 2864\u20132875","journal-title":"J Chem Inf Model"},{"key":"4457_CR59","doi-asserted-by":"publisher","first-page":"429","DOI":"10.1145\/3307339.3342186","volume-title":"Proceedings of the 10th ACM International Conference on Bioinformatics, Computational Biology and Health Informatics","author":"S Wang","year":"2019","unstructured":"Wang S, Guo Y, Wang Y, et al. SMILES-BERT: large scale unsupervised pre-training for molecular property prediction. In: Proceedings of the 10th ACM International Conference on Bioinformatics, Computational Biology and Health Informatics, 2019. 429\u2013436"},{"key":"4457_CR60","unstructured":"Chithrananda S, Grand G, Ramsundar B. ChemBERTa: large-scale self-supervised pretraining for molecular property prediction. 2020. ArXiv:2010.09885"},{"key":"4457_CR61","doi-asserted-by":"publisher","first-page":"015022","DOI":"10.1088\/2632-2153\/ac3ffb","volume":"3","author":"R Irwin","year":"2022","unstructured":"Irwin R, Dimitriadis S, He J, et al. Chemformer: a pre-trained transformer for computational chemistry. Mach Learn-Sci Technol, 2022, 3: 015022","journal-title":"Mach Learn-Sci Technol"},{"key":"4457_CR62","unstructured":"Ahmad W, Simon E, Chithrananda S, et al. Chemberta-2: towards chemical foundation models. 2022. ArXiv:2209.01712"},{"key":"4457_CR63","doi-asserted-by":"crossref","unstructured":"Pei Q, Zhang W, Zhu J, et al. Biot5: enriching cross-modal integration in biology with chemical knowledge and natural language associations. 2023. ArXiv:2310.07276","DOI":"10.18653\/v1\/2023.emnlp-main.70"},{"key":"4457_CR64","doi-asserted-by":"publisher","first-page":"bbad398","DOI":"10.1093\/bib\/bbad398","volume":"24","author":"B Li","year":"2023","unstructured":"Li B, Lin M, Chen T, et al. FG-BERT: a generalized and self-supervised functional group-based molecular representation learning framework for properties prediction. Brief BioInf, 2023, 24: bbad398","journal-title":"Brief BioInf"},{"key":"4457_CR65","doi-asserted-by":"crossref","unstructured":"Pei Q, Wu L, Gao K, et al. Biot5+: towards generalized biological understanding with IUPAC integration and multi-task tuning. 2024. ArXiv:2402.17810","DOI":"10.18653\/v1\/2024.findings-acl.71"},{"key":"4457_CR66","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1038\/s42256-022-00447-x","volume":"4","author":"Y Wang","year":"2022","unstructured":"Wang Y, Wang J, Cao Z, et al. Molecular contrastive learning of representations via graph neural networks. Nat Mach Intell, 2022, 4: 279\u2013287","journal-title":"Nat Mach Intell"},{"key":"4457_CR67","unstructured":"Zaidi S, Schaarschmidt M, Martens J, et al. Pre-training via denoising for molecular property prediction. 2022. ArXiv:2206.00133"},{"key":"4457_CR68","unstructured":"Liu S, Guo H, Tang J. Molecular geometry pretraining with SE(3)-invariant denoising distance matching. 2022. ArXiv:2206.13602"},{"key":"4457_CR69","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1038\/s42004-023-00825-5","volume":"6","author":"X Zang","year":"2023","unstructured":"Zang X, Zhao X, Tang B. Hierarchical molecular graph self-supervised learning for property prediction. Commun Chem, 2023, 6: 34","journal-title":"Commun Chem"},{"key":"4457_CR70","doi-asserted-by":"publisher","first-page":"542","DOI":"10.1038\/s42256-023-00654-0","volume":"5","author":"Y Fang","year":"2023","unstructured":"Fang Y, Zhang Q, Zhang N, et al. Knowledge graph-enhanced molecular contrastive learning with functional prompt. Nat Mach Intell, 2023, 5: 542\u2013553","journal-title":"Nat Mach Intell"},{"key":"4457_CR71","doi-asserted-by":"publisher","first-page":"1169","DOI":"10.1038\/s42256-024-00900-z","volume":"6","author":"Y Ni","year":"2024","unstructured":"Ni Y, Feng S, Hong X, et al. Pre-training with fractional denoising to enhance molecular property prediction. Nat Mach Intell, 2024, 6: 1169\u20131178","journal-title":"Nat Mach Intell"},{"key":"4457_CR72","first-page":"12559","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"Y Rong","year":"2020","unstructured":"Rong Y, Bian Y, Xu T, et al. Self-supervised graph transformer on large-scale molecular data. In: Proceedings of Advances in Neural Information Processing Systems, 2020. 12559\u201312571"},{"key":"4457_CR73","doi-asserted-by":"publisher","first-page":"bbad296","DOI":"10.1093\/bib\/bbad296","volume":"24","author":"A Xie","year":"2023","unstructured":"Xie A, Zhang Z, Guan J, et al. Self-supervised learning with chemistry-aware fragmentation for effective molecular property prediction. Brief BioInf, 2023, 24: bbad296","journal-title":"Brief BioInf"},{"key":"4457_CR74","doi-asserted-by":"publisher","first-page":"102092","DOI":"10.1016\/j.inffus.2023.102092","volume":"103","author":"H Zhang","year":"2024","unstructured":"Zhang H, Wu J, Liu S, et al. A pre-trained multi-representation fusion network for molecular property prediction. Inf Fusion, 2024, 103: 102092","journal-title":"Inf Fusion"},{"key":"4457_CR75","volume-title":"Proceedings of the 11th International Conference on Learning Representations","author":"G Zhou","year":"2023","unstructured":"Zhou G, Gao Z, Ding Q, et al. Uni-Mol: a universal 3D molecular representation learning framework. In: Proceedings of the 11th International Conference on Learning Representations, 2023"},{"key":"4457_CR76","volume-title":"Proceedings of the 11th International Conference on Learning Representations","author":"S Luo","year":"2023","unstructured":"Luo S, Chen T, Xu Y, et al. One transformer can understand both 2D & 3D molecular data. In: Proceedings of the 11th International Conference on Learning Representations, 2023"},{"key":"4457_CR77","doi-asserted-by":"publisher","first-page":"7568","DOI":"10.1038\/s41467-023-43214-1","volume":"14","author":"H Li","year":"2023","unstructured":"Li H, Zhang R, Min Y, et al. A knowledge-guided pre-training framework for improving molecular representation learning. Nat Commun, 2023, 14: 7568","journal-title":"Nat Commun"},{"key":"4457_CR78","first-page":"56793","volume-title":"Proceedings of the 41st International Conference on Machine Learning","author":"J Yang","year":"2024","unstructured":"Yang J, Zheng K, Long S, et al. Mol-AE: auto-encoder based molecular representation learning with 3D cloze test objective. In: Proceedings of the 41st International Conference on Machine Learning, 2024. 56793\u201356811"},{"key":"4457_CR79","doi-asserted-by":"publisher","first-page":"7104","DOI":"10.1038\/s41467-024-51321-w","volume":"15","author":"S Lu","year":"2024","unstructured":"Lu S, Gao Z, He D, et al. Data-driven quantum chemical property prediction leveraging 3D conformations with Uni-Mol+. Nat Commun, 2024, 15: 7104","journal-title":"Nat Commun"},{"key":"4457_CR80","doi-asserted-by":"publisher","first-page":"102784","DOI":"10.1016\/j.inffus.2024.102784","volume":"115","author":"R Chen","year":"2025","unstructured":"Chen R, Li C, Wang L, et al. Pretraining graph transformer for molecular representation with fusion of multimodal information. Inf Fusion, 2025, 115: 102784","journal-title":"Inf Fusion"},{"key":"4457_CR81","doi-asserted-by":"crossref","unstructured":"Lewis M. Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. 2019. ArXiv:1910.13461","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"4457_CR82","unstructured":"Kipf T N, Welling M. Semi-supervised classification with graph convolutional networks. 2016. ArXiv:1609.02907"},{"key":"4457_CR83","unstructured":"Veli\u010dkovi\u0107 P, Cucurull G, Casanova A, et al. Graph attention networks. 2017. ArXiv:1710.10903"},{"key":"4457_CR84","unstructured":"Xu K, Hu W, Leskovec J, et al. How powerful are graph neural networks? 2018. ArXiv:1810.00826"},{"key":"4457_CR85","first-page":"1263","volume-title":"Proceedings of International Conference on Machine Learning","author":"J Gilmer","year":"2017","unstructured":"Gilmer J, Schoenholz S S, Riley P F, et al. Neural message passing for quantum chemistry. In: Proceedings of International Conference on Machine Learning, 2017. 1263\u20131272"},{"key":"4457_CR86","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"W Hamilton","year":"2017","unstructured":"Hamilton W, Ying Z, Leskovec J. Inductive representation learning on large graphs. In: Proceedings of Advances in Neural Information Processing Systems, 2017"},{"key":"4457_CR87","first-page":"2831","volume-title":"Proceedings of the 29th International Joint Conference on Artificial Intelligence","author":"Y Song","year":"2020","unstructured":"Song Y, Zheng S, Niu Z, et al. Communicative representation learning on attributed molecular graphs. In: Proceedings of the 29th International Joint Conference on Artificial Intelligence, 2020. 2831\u20132838"},{"key":"4457_CR88","doi-asserted-by":"publisher","first-page":"184","DOI":"10.2174\/1574893616666210820095144","volume":"17","author":"H V Tran","year":"2022","unstructured":"Tran H V, Nguyen Q H. iAnt: combination of convolutional neural network and random forest models using PSSM and BERT features to identify antioxidant proteins. Curr Bioinform, 2022, 17: 184\u2013195","journal-title":"Curr Bioinform"},{"key":"4457_CR89","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1038\/ni1416","volume":"8","author":"J W Shui","year":"2007","unstructured":"Shui J W, Boomer J S, Han J, et al. Hematopoietic progenitor kinase 1 negatively regulates T cell receptor signaling and T cell-mediated immune responses. Nat Immunol, 2007, 8: 84\u201391","journal-title":"Nat Immunol"},{"key":"4457_CR90","doi-asserted-by":"publisher","first-page":"551","DOI":"10.1016\/j.ccell.2020.08.001","volume":"38","author":"J Si","year":"2020","unstructured":"Si J, Shi X, Sun S, et al. Hematopoietic progenitor kinase1 (HPK1) mediates T cell dysfunction and is a druggable target for T cell-based immunotherapies. Cancer Cell, 2020, 38: 551\u2013566.e11","journal-title":"Cancer Cell"},{"key":"4457_CR91","doi-asserted-by":"publisher","first-page":"559","DOI":"10.1016\/j.ccr.2007.11.004","volume":"12","author":"V D Acevedo","year":"2007","unstructured":"Acevedo V D, Gangula R D, Freeman K W, et al. Inducible FGFR-1 activation leads to irreversible prostate adenocarcinoma and an epithelial-to-mesenchymal transition. Cancer Cell, 2007, 12: 559\u2013571","journal-title":"Cancer Cell"},{"key":"4457_CR92","doi-asserted-by":"publisher","first-page":"2248","DOI":"10.1038\/bjc.2013.550","volume":"109","author":"P T Nguyen","year":"2013","unstructured":"Nguyen P T, Tsunematsu T, Yanagisawa S, et al. The FGFR1 inhibitor PD173074 induces mesenchymal-epithelial transition through the transcription factor AP-1. Br J Cancer, 2013, 109: 2248\u20132258","journal-title":"Br J Cancer"},{"key":"4457_CR93","doi-asserted-by":"publisher","first-page":"5938","DOI":"10.1021\/acs.jcim.2c01073","volume":"62","author":"D van Tilborg","year":"2022","unstructured":"van Tilborg D, Alenicheva A, Grisoni F. Exposing the limitations of molecular machine learning with activity cliffs. J Chem Inf Model, 2022, 62: 5938\u20135951","journal-title":"J Chem Inf Model"},{"key":"4457_CR94","unstructured":"Li Y, Liang F, Zhao L, et al. Supervision exists everywhere: a data efficient contrastive language-image pre-training paradigm. 2021. ArXiv:2110.05208"},{"key":"4457_CR95","first-page":"9694","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"J Li","year":"2021","unstructured":"Li J, Selvaraju R, Gotmare A, et al. Align before fuse: vision and language representation learning with momentum distillation. In: Proceedings of Advances in Neural Information Processing Systems, 2021. 9694\u20139705"}],"container-title":["Science China Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-024-4457-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11432-024-4457-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-024-4457-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T06:02:19Z","timestamp":1750831339000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11432-024-4457-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,23]]},"references-count":95,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2025,7]]}},"alternative-id":["4457"],"URL":"https:\/\/doi.org\/10.1007\/s11432-024-4457-2","relation":{},"ISSN":["1674-733X","1869-1919"],"issn-type":[{"value":"1674-733X","type":"print"},{"value":"1869-1919","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6,23]]},"assertion":[{"value":"31 December 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 February 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 March 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 June 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"170104"}}