{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T13:54:31Z","timestamp":1774014871326,"version":"3.50.1"},"reference-count":79,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,11,6]],"date-time":"2024-11-06T00:00:00Z","timestamp":1730851200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2024,11,6]],"date-time":"2024-11-06T00:00:00Z","timestamp":1730851200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"Beijing Frontier Research Center for Biological Structure","award":["041500002"],"award-info":[{"award-number":["041500002"]}]},{"name":"Tsinghua University Initiative Scientific Research Program","award":["20231080030"],"award-info":[{"award-number":["20231080030"]}]},{"name":"the Tsinghua-Peking University Center for Life Sciences","award":["No.20111770319"],"award-info":[{"award-number":["No.20111770319"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Cheminform"],"DOI":"10.1186\/s13321-024-00920-2","type":"journal-article","created":{"date-parts":[[2024,11,6]],"date-time":"2024-11-06T17:02:57Z","timestamp":1730912577000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":27,"title":["Protein-small molecule binding site prediction based on a pre-trained protein language model with contrastive learning"],"prefix":"10.1186","volume":"16","author":[{"given":"Jue","family":"Wang","sequence":"first","affiliation":[]},{"given":"Yufan","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Boxue","family":"Tian","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,6]]},"reference":[{"issue":"17","key":"920_CR1","doi-asserted-by":"publisher","first-page":"11269","DOI":"10.1021\/acs.chemrev.7b00077","volume":"117","author":"GM Burslem","year":"2017","unstructured":"Burslem GM, Crews CM (2017) Small-molecule modulation of protein homeostasis. Chem Rev 117(17):11269\u201311301","journal-title":"Chem Rev"},{"issue":"4","key":"920_CR2","doi-asserted-by":"publisher","first-page":"232","DOI":"10.1038\/nchembio.1199","volume":"9","author":"M Schenone","year":"2013","unstructured":"Schenone M, Dan\u010d\u00edk V, Wagner BK, Clemons PA (2013) Target identification and mechanism of action in chemical biology and drug discovery. Nat Chem Biol 9(4):232\u2013240","journal-title":"Nat Chem Biol"},{"key":"920_CR3","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/bs.apcsb.2015.09.004","volume":"102","author":"P Sneha","year":"2016","unstructured":"Sneha P, Doss CGP (2016) Molecular dynamics: new frontier in personalized medicine. Adv Protein Chem Struct Biol 102:181\u2013224","journal-title":"Adv Protein Chem Struct Biol"},{"issue":"1","key":"920_CR4","doi-asserted-by":"publisher","first-page":"3017","DOI":"10.1038\/ncomms4017","volume":"4","author":"B Xiao","year":"2013","unstructured":"Xiao B, Sanders MJ, Carmena D, Bright NJ, Haire LF, Underwood E et al (2013) Structural basis of AMPK regulation by small molecule activators. Nat Commun 4(1):3017","journal-title":"Nat Commun"},{"issue":"1","key":"920_CR5","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1038\/nrc2559","volume":"9","author":"J Zhang","year":"2009","unstructured":"Zhang J, Yang PL, Gray NS (2009) Targeting cancer with small molecule kinase inhibitors. Nat Rev Cancer 9(1):28\u201339","journal-title":"Nat Rev Cancer"},{"issue":"1","key":"920_CR6","doi-asserted-by":"publisher","first-page":"4","DOI":"10.2174\/1386207319666151203001529","volume":"19","author":"M Gal","year":"2016","unstructured":"Gal M, Bloch I, Shechter N, Romanenko O, Shir M, O. (2016) Efficient isothermal titration calorimetry technique identifies direct interaction of small molecule inhibitors with the target protein. Comb Chem High Throughput Screen 19(1):4\u201313","journal-title":"Comb Chem High Throughput Screen"},{"issue":"1","key":"920_CR7","doi-asserted-by":"publisher","first-page":"96","DOI":"10.1177\/1087057115607814","volume":"21","author":"AE Kennedy","year":"2016","unstructured":"Kennedy AE, Sheffield KS, Eibl JK, Murphy MB, Vohra R, Scott JA et al (2016) A surface plasmon resonance spectroscopy method for characterizing small-molecule binding to nerve growth factor. J Biomol Screen 21(1):96\u2013100","journal-title":"J Biomol Screen"},{"issue":"10","key":"920_CR8","doi-asserted-by":"publisher","first-page":"981","DOI":"10.1080\/17460441.2017.1363734","volume":"12","author":"GR Masson","year":"2017","unstructured":"Masson GR, Jenkins ML, Burke JE (2017) An overview of hydrogen deuterium exchange mass spectrometry (HDX-MS) in drug discovery. Expert Opin Drug Discov 12(10):981\u2013994","journal-title":"Expert Opin Drug Discov"},{"issue":"7","key":"920_CR9","doi-asserted-by":"publisher","first-page":"1698","DOI":"10.1016\/j.cell.2016.05.040","volume":"165","author":"A Merk","year":"2016","unstructured":"Merk A, Bartesaghi A, Banerjee S, Falconieri V, Rao P, Davis MI et al (2016) Breaking cryo-EM resolution barriers to facilitate drug discovery. Cell 165(7):1698\u20131707","journal-title":"Cell"},{"issue":"1","key":"920_CR10","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1016\/j.sbi.2008.11.009","volume":"19","author":"O Guvench","year":"2009","unstructured":"Guvench O, MacKerell AD Jr (2009) Computational evaluation of protein\u2013small molecule binding. Curr Opin Struct Biol 19(1):56\u201361","journal-title":"Curr Opin Struct Biol"},{"issue":"6","key":"920_CR11","doi-asserted-by":"publisher","first-page":"730","DOI":"10.1038\/s41592-022-01490-7","volume":"19","author":"J Tubiana","year":"2022","unstructured":"Tubiana J, Schneidman-Duhovny D, Wolfson HJ (2022) ScanNet: an interpretable geometric deep learning model for structure-based protein binding site prediction. Nat Methods 19(6):730\u2013739","journal-title":"Nat Methods"},{"key":"920_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13321-018-0285-8","volume":"10","author":"R Kriv\u00e1k","year":"2018","unstructured":"Kriv\u00e1k R, Hoksza D (2018) P2Rank: machine learning based tool for rapid and accurate prediction of ligand binding sites from protein structure. J Cheminform 10:1\u201312","journal-title":"J Cheminform"},{"issue":"19","key":"920_CR13","doi-asserted-by":"publisher","first-page":"3036","DOI":"10.1093\/bioinformatics\/btx350","volume":"33","author":"J Jim\u00e9nez","year":"2017","unstructured":"Jim\u00e9nez J, Doerr S, Mart\u00ednez-Rosell G, Rose AS, De Fabritiis G (2017) DeepSite: protein-binding site predictor using 3D-convolutional neural networks. Bioinformatics 33(19):3036\u20133042","journal-title":"Bioinformatics"},{"key":"920_CR14","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1472-6807-9-52","volume":"9","author":"J Hu","year":"2009","unstructured":"Hu J, Yan C (2009) A tool for calculating binding-site residues on proteins from PDB structures. BMC Struct Biol 9:1\u20136","journal-title":"BMC Struct Biol"},{"issue":"1","key":"920_CR15","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pcbi.1005324","volume":"13","author":"S Wang","year":"2017","unstructured":"Wang S, Sun S, Li Z, Zhang R, Xu J (2017) Accurate de novo prediction of protein contact map by ultra-deep learning model. PLoS Comput Biol 13(1):e1005324","journal-title":"PLoS Comput Biol"},{"issue":"2","key":"920_CR16","doi-asserted-by":"publisher","first-page":"162","DOI":"10.1093\/bioinformatics\/btaa701","volume":"37","author":"A Villegas-Morcillo","year":"2021","unstructured":"Villegas-Morcillo A, Makrodimitris S, van Ham RC, Gomez AM, Sanchez V, Reinders MJ (2021) Unsupervised protein embeddings outperform hand-crafted sequence and structure features at predicting molecular function. Bioinformatics 37(2):162\u2013170","journal-title":"Bioinformatics"},{"issue":"9","key":"920_CR17","doi-asserted-by":"publisher","first-page":"e51","DOI":"10.1093\/nar\/gkab044","volume":"49","author":"Y Xia","year":"2021","unstructured":"Xia Y, Xia C-Q, Pan X, Shen H-B (2021) GraphBind: protein structural context embedded rules learned by hierarchical graph neural networks for recognizing nucleic-acid-binding residues. Nucleic Acids Res 49(9):e51\u2013e51","journal-title":"Nucleic Acids Res"},{"issue":"11","key":"920_CR18","doi-asserted-by":"publisher","first-page":"1700021","DOI":"10.1002\/minf.201700021","volume":"36","author":"PP Pai","year":"2017","unstructured":"Pai PP, Dattatreya RK, Mondal S (2017) Ensemble architecture for prediction of enzyme-ligand binding residues using evolutionary information. Mol Inform 36(11):1700021","journal-title":"Mol Inform"},{"issue":"10","key":"920_CR19","doi-asserted-by":"publisher","first-page":"887","DOI":"10.1007\/s10822-019-00235-7","volume":"33","author":"G Macari","year":"2019","unstructured":"Macari G, Toti D, Polticelli F (2019) Computational methods and tools for binding site recognition between proteins and small molecules: from classical geometrical approaches to modern machine learning strategies. J Comput Aided Mol Des 33(10):887\u2013903","journal-title":"J Comput Aided Mol Des"},{"issue":"10","key":"920_CR20","doi-asserted-by":"publisher","first-page":"2326","DOI":"10.1038\/s41596-022-00728-0","volume":"17","author":"X Zhou","year":"2022","unstructured":"Zhou X, Zheng W, Li Y, Pearce R, Zhang C, Bell EW et al (2022) I-TASSER-MTD: a deep-learning-based platform for multi-domain protein structure and function prediction. Nat Protoc 17(10):2326\u20132353","journal-title":"Nat Protoc"},{"issue":"1","key":"920_CR21","doi-asserted-by":"publisher","first-page":"bbad488","DOI":"10.1093\/bib\/bbad488","volume":"25","author":"Y Liu","year":"2024","unstructured":"Liu Y, Tian B (2024) Protein\u2013DNA binding sites prediction based on pre-trained protein language model and contrastive learning. Brief Bioinform 25(1):bbad488","journal-title":"Brief Bioinform"},{"issue":"12","key":"920_CR22","doi-asserted-by":"publisher","first-page":"btad718","DOI":"10.1093\/bioinformatics\/btad718","volume":"39","author":"Y Fang","year":"2023","unstructured":"Fang Y, Jiang Y, Wei L, Ma Q, Ren Z, Yuan Q et al (2023) DeepProSite: structure-aware protein binding site prediction using ESMFold and pretrained language model. Bioinformatics 39(12):btad718","journal-title":"Bioinformatics"},{"issue":"6637","key":"920_CR23","doi-asserted-by":"publisher","first-page":"1123","DOI":"10.1126\/science.ade2574","volume":"379","author":"Z Lin","year":"2023","unstructured":"Lin Z, Akin H, Rao R, Hie B, Zhu Z, Lu W et al (2023) Evolutionary-scale prediction of atomic-level protein structure with a language model. Science 379(6637):1123\u20131130","journal-title":"Science"},{"issue":"10","key":"920_CR24","doi-asserted-by":"publisher","first-page":"7112","DOI":"10.1109\/TPAMI.2021.3095381","volume":"44","author":"A Elnaggar","year":"2021","unstructured":"Elnaggar A, Heinzinger M, Dallago C, Rehawi G, Wang Y, Jones L et al (2021) Prottrans: toward understanding the language of life through self-supervised learning. IEEE Trans Pattern Anal Mach Intell 44(10):7112\u20137127","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"11","key":"920_CR25","doi-asserted-by":"publisher","first-page":"1072","DOI":"10.1038\/nbt.2419","volume":"30","author":"DS Marks","year":"2012","unstructured":"Marks DS, Hopf TA, Sander C (2012) Protein structure prediction from sequence variation. Nat Biotechnol 30(11):1072\u20131080","journal-title":"Nat Biotechnol"},{"issue":"7","key":"920_CR26","doi-asserted-by":"publisher","first-page":"498","DOI":"10.2174\/138920310794109094","volume":"11","author":"R Shenoy","year":"2010","unstructured":"Shenoy R, S., & Jayaram, B. (2010) Proteins: sequence to structure and function-current status. Curr Protein Pept Sci 11(7):498\u2013514","journal-title":"Curr Protein Pept Sci"},{"issue":"3","key":"920_CR27","doi-asserted-by":"publisher","first-page":"307","DOI":"10.1017\/S0033583503003901","volume":"36","author":"JC Whisstock","year":"2003","unstructured":"Whisstock JC, Lesk AM (2003) Prediction of protein function from protein sequence and structure. Q Rev Biophys 36(3):307\u2013340","journal-title":"Q Rev Biophys"},{"key":"920_CR28","doi-asserted-by":"crossref","unstructured":"He X, Zhou Y, Zhou Z, Bai S, Bai X. Triplet-center loss for multi-view 3d object retrieval. In 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2018 (pp. 1945\u20131954)","DOI":"10.1109\/CVPR.2018.00208"},{"issue":"5","key":"920_CR29","doi-asserted-by":"publisher","first-page":"613","DOI":"10.1016\/j.str.2011.02.015","volume":"19","author":"K Chen","year":"2011","unstructured":"Chen K, Mizianty MJ, Gao J, Kurgan L (2011) A critical comparative assessment of predictions of protein-binding sites for biologically relevant organic compounds. Structure 19(5):613\u2013621","journal-title":"Structure"},{"issue":"20","key":"920_CR30","doi-asserted-by":"publisher","first-page":"2588","DOI":"10.1093\/bioinformatics\/btt447","volume":"29","author":"J Yang","year":"2013","unstructured":"Yang J, Roy A, Zhang Y (2013) Protein\u2013ligand binding site recognition using complementary binding-specific substructure comparison and sequence profile alignment. Bioinformatics 29(20):2588\u20132595","journal-title":"Bioinformatics"},{"issue":"D1","key":"920_CR31","doi-asserted-by":"publisher","first-page":"D399","DOI":"10.1093\/nar\/gku928","volume":"43","author":"J Desaphy","year":"2015","unstructured":"Desaphy J, Bret G, Rognan D, Kellenberger E (2015) sc-PDB: a 3D-database of ligandable binding sites\u201410 years on. Nucleic Acids Res 43(D1):D399\u2013D404","journal-title":"Nucleic Acids Res"},{"key":"920_CR32","doi-asserted-by":"crossref","unstructured":"Vig J, Madani A, Varshney LR, Xiong C, Socher R, Rajani NF (2020) Bertology meets biology: interpreting attention in protein language models. arXiv preprint arXiv:2006.15222.","DOI":"10.1101\/2020.06.26.174417"},{"key":"920_CR33","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Goyal P, Girshick R, He K, Doll\u00e1r P. Focal loss for dense object detection. In 2017 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2017 (pp. 2980\u20132988)","DOI":"10.1109\/ICCV.2017.324"},{"key":"920_CR34","doi-asserted-by":"crossref","unstructured":"Cui Y, Jia M, Lin T-Y, Song Y, Belongie S. Class-balanced loss based on effective number of samples. In 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2019 (pp. 9268\u20139277)","DOI":"10.1109\/CVPR.2019.00949"},{"key":"920_CR35","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s12864-019-6413-7","volume":"21","author":"D Chicco","year":"2020","unstructured":"Chicco D, Jurman G (2020) The advantages of the Matthews correlation coefficient (MCC) over F1 score and accuracy in binary classification evaluation. BMC Genomics 21:1\u201313","journal-title":"BMC Genomics"},{"issue":"6","key":"920_CR36","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0177678","volume":"12","author":"S Boughorbel","year":"2017","unstructured":"Boughorbel S, Jarray F, El-Anbari M (2017) Optimal classifier for imbalanced data using Matthews correlation coefficient metric. PLoS ONE 12(6):e0177678","journal-title":"PLoS ONE"},{"key":"920_CR37","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13040-021-00244-z","volume":"14","author":"D Chicco","year":"2021","unstructured":"Chicco D, T\u00f6tsch N, Jurman G (2021) The Matthews correlation coefficient (MCC) is more reliable than balanced accuracy, bookmaker informedness, and markedness in two-class confusion matrix evaluation. BioData Min 14:1\u201322","journal-title":"BioData Min"},{"issue":"3","key":"920_CR38","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1038\/s42256-022-00457-9","volume":"4","author":"S Unsal","year":"2022","unstructured":"Unsal S, Atas H, Albayrak M, Turhan K, Acar AC, Do\u011fan T (2022) Learning functional properties of proteins with language models. Nat Mach Intell 4(3):227\u2013245","journal-title":"Nat Mach Intell"},{"issue":"4","key":"920_CR39","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1016\/S1364-6613(99)01294-2","volume":"3","author":"RM French","year":"1999","unstructured":"French RM (1999) Catastrophic forgetting in connectionist networks. Trends Cogn Sci 3(4):128\u2013135","journal-title":"Trends Cogn Sci"},{"issue":"13","key":"920_CR40","doi-asserted-by":"publisher","first-page":"3521","DOI":"10.1073\/pnas.1611835114","volume":"114","author":"J Kirkpatrick","year":"2017","unstructured":"Kirkpatrick J, Pascanu R, Rabinowitz N, Veness J, Desjardins G, Rusu AA et al (2017) Overcoming catastrophic forgetting in neural networks. Proc Natl Acad Sci U S A 114(13):3521\u20133526","journal-title":"Proc Natl Acad Sci U S A"},{"key":"920_CR41","unstructured":"Oord AVD, Li Y, Vinyals O (2018) Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748"},{"key":"920_CR42","doi-asserted-by":"crossref","unstructured":"Li X, Sun A, Zhao M, Yu J, Zhu K, Jin D, et al. Multi-intention oriented contrastive learning for sequential recommendation. In ACM International Conference on Web Search and Data Mining, 2023 (pp. 411\u2013419)","DOI":"10.1145\/3539597.3570411"},{"key":"920_CR43","unstructured":"Cai X, Huang C, Xia L, Ren X. (2023). LightGCL: simple yet effective graph contrastive learning for recommendation. arXiv preprint arXiv:2302.08191"},{"issue":"19","key":"920_CR44","doi-asserted-by":"publisher","first-page":"2460","DOI":"10.1093\/bioinformatics\/btq461","volume":"26","author":"RC Edgar","year":"2010","unstructured":"Edgar RC (2010) Search and clustering orders of magnitude faster than BLAST. Bioinformatics 26(19):2460\u20132461","journal-title":"Bioinformatics"},{"issue":"13","key":"920_CR45","doi-asserted-by":"publisher","first-page":"1658","DOI":"10.1093\/bioinformatics\/btl158","volume":"22","author":"W Li","year":"2006","unstructured":"Li W, Godzik A (2006) Cd-hit: a fast program for clustering and comparing large sets of protein or nucleotide sequences. Bioinformatics 22(13):1658\u20131659","journal-title":"Bioinformatics"},{"key":"920_CR46","unstructured":"UniProt: the universal protein knowledgebase in 2021 (2021). Nucleic Acids Res, 49(D1), D480\u2013D489."},{"issue":"10","key":"920_CR47","doi-asserted-by":"publisher","first-page":"1282","DOI":"10.1093\/bioinformatics\/btm098","volume":"23","author":"BE Suzek","year":"2007","unstructured":"Suzek BE, Huang H, McGarvey P, Mazumder R, Wu CH (2007) UniRef: comprehensive and non-redundant UniProt reference clusters. Bioinformatics 23(10):1282\u20131288","journal-title":"Bioinformatics"},{"issue":"4","key":"920_CR48","doi-asserted-by":"publisher","first-page":"772","DOI":"10.1093\/molbev\/mst010","volume":"30","author":"K Katoh","year":"2013","unstructured":"Katoh K, Standley DM (2013) MAFFT multiple sequence alignment software version 7: improvements in performance and usability. Mol Biol Evol 30(4):772\u2013780","journal-title":"Mol Biol Evol"},{"issue":"4","key":"920_CR49","doi-asserted-by":"publisher","first-page":"823","DOI":"10.1002\/j.1460-2075.1986.tb04288.x","volume":"5","author":"C Chothia","year":"1986","unstructured":"Chothia C, Lesk AM (1986) The relation between the divergence of sequence and structure in proteins. EMBO J 5(4):823\u2013826","journal-title":"EMBO J"},{"issue":"3","key":"920_CR50","doi-asserted-by":"publisher","first-page":"679","DOI":"10.1006\/jmbi.2000.3974","volume":"301","author":"A-S Yang","year":"2000","unstructured":"Yang A-S, Honig B (2000) An integrated approach to the analysis and modeling of protein sequences and structures. II. On the relationship between sequence and structural similarity for proteins that are not obviously related in sequence. J Mol Biol 301(3):679\u2013689","journal-title":"J Mol Biol"},{"issue":"1","key":"920_CR51","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1016\/S0021-9258(19)52289-X","volume":"216","author":"EH Fischer","year":"1955","unstructured":"Fischer EH, Krebs EG (1955) Conversion of phosphorylase b to phosphorylase a in muscle extracts. J Biol Chem 216(1):121\u2013132","journal-title":"J Biol Chem"},{"issue":"9","key":"920_CR52","doi-asserted-by":"publisher","first-page":"639","DOI":"10.1038\/nrm908","volume":"3","author":"KL Pierce","year":"2002","unstructured":"Pierce KL, Premont RT, Lefkowitz RJ (2002) Seven-transmembrane receptors. Nat Rev Mol Cell Biol 3(9):639\u2013650","journal-title":"Nat Rev Mol Cell Biol"},{"issue":"3","key":"920_CR53","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1017\/S003358350000425X","volume":"25","author":"JE Walker","year":"1992","unstructured":"Walker JE (1992) The NADH: ubiquinone oxidoreductase (complex I) of respiratory chains. Q Rev Biophys 25(3):253\u2013324","journal-title":"Q Rev Biophys"},{"key":"920_CR54","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s12911-019-1014-6","volume":"20","author":"AM Carrington","year":"2020","unstructured":"Carrington AM, Fieguth PW, Qazi H, Holzinger A, Chen HH, Mayr F et al (2020) A new concordant partial AUC and partial c statistic for imbalanced data in the evaluation of machine learning algorithms. BMC Med Inform Decis Mak 20:1\u201312","journal-title":"BMC Med Inform Decis Mak"},{"issue":"10","key":"920_CR55","first-page":"2224","volume":"3","author":"M Bekkar","year":"2013","unstructured":"Bekkar M, Djemaa HK, Alitouche TA (2013) Evaluation measures for models assessment over imbalanced data sets. J Inf Eng Appl 3(10):2224","journal-title":"J Inf Eng Appl"},{"key":"920_CR56","unstructured":"Weng CG, Poon J. A new evaluation measure for imbalanced datasets. In Australasian data mining conference, 2008 (pp. 27\u201332)"},{"issue":"11","key":"920_CR57","first-page":"12","volume":"9","author":"L Van der Maaten","year":"2008","unstructured":"Van der Maaten L, Hinton G (2008) Visualizing data using t-SNE. J Mach Learn Res 9(11):12","journal-title":"J Mach Learn Res"},{"key":"920_CR58","first-page":"415","volume":"30","author":"A Vaswani","year":"2017","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN et al (2017) Attention is all you need. Adv Neural Inf Process Syst 30:415","journal-title":"Adv Neural Inf Process Syst"},{"key":"920_CR59","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1016\/j.neucom.2020.07.061","volume":"415","author":"L Yang","year":"2020","unstructured":"Yang L, Shami A (2020) On hyperparameter optimization of machine learning algorithms: theory and practice. Neurocomputing 415:295\u2013316","journal-title":"Neurocomputing"},{"issue":"1","key":"920_CR60","first-page":"26","volume":"17","author":"J Wu","year":"2019","unstructured":"Wu J, Chen X-Y, Zhang H, Xiong L-D, Lei H, Deng S-H (2019) Hyperparameter optimization for machine learning models based on Bayesian optimization. J Electron Sci Technol 17(1):26\u201340","journal-title":"J Electron Sci Technol"},{"key":"920_CR61","first-page":"499","volume-title":"European conference on computer vision","author":"Y Wen","year":"2016","unstructured":"Wen Y, Zhang K, Li Z, Qiao Y (2016) A discriminative feature learning approach for deep face recognition. In: Wen Y (ed) European conference on computer vision. Springer, Cham, pp 499\u2013515"},{"issue":"7738","key":"920_CR62","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1038\/s41586-018-0820-9","volume":"565","author":"AP Katsoulidis","year":"2019","unstructured":"Katsoulidis AP, Antypov D, Whitehead GF, Carrington EJ, Adams DJ, Berry NG et al (2019) Chemical control of structure and guest uptake by a conformationally mobile porous material. Nature 565(7738):213\u2013217","journal-title":"Nature"},{"key":"920_CR63","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.phrs.2015.07.010","volume":"100","author":"R Roskoski Jr","year":"2015","unstructured":"Roskoski R Jr (2015) A historical overview of protein kinases and their targeted small molecule inhibitors. Pharmacol Res 100:1\u201323","journal-title":"Pharmacol Res"},{"issue":"3","key":"920_CR64","doi-asserted-by":"publisher","first-page":"457","DOI":"10.1016\/S0022-2836(05)80364-X","volume":"215","author":"JA Ippolito","year":"1990","unstructured":"Ippolito JA, Alexander RS, Christianson DW (1990) Hydrogen bond stereochemistry in protein structure and function. J Mol Biol 215(3):457\u2013471","journal-title":"J Mol Biol"},{"issue":"2","key":"920_CR65","doi-asserted-by":"publisher","first-page":"144","DOI":"10.2174\/1574892814666181218170257","volume":"14","author":"D Chen","year":"2019","unstructured":"Chen D, Zhou XZ, Lee TH (2019) Death-associated protein kinase 1 as a promising drug target in cancer and Alzheimer\u2019s disease. Recent Pat Anticancer Drug Discov 14(2):144\u2013157","journal-title":"Recent Pat Anticancer Drug Discov"},{"key":"920_CR66","doi-asserted-by":"publisher","first-page":"46","DOI":"10.3389\/fnmol.2016.00046","volume":"9","author":"P Singh","year":"2016","unstructured":"Singh P, Ravanan P, Talwar P (2016) Death associated protein kinase 1 (DAPK1): a regulator of apoptosis and autophagy. Front Mol Neurosci 9:46","journal-title":"Front Mol Neurosci"},{"issue":"5","key":"920_CR67","doi-asserted-by":"publisher","first-page":"1284","DOI":"10.1021\/bi0621213","volume":"46","author":"E Yeh","year":"2007","unstructured":"Yeh E, Blasiak LC, Koglin A, Drennan CL, Walsh CT (2007) Chlorination by a long-lived intermediate in the mechanism of flavin-dependent halogenases. Biochemistry 46(5):1284\u20131292","journal-title":"Biochemistry"},{"issue":"11","key":"920_CR68","doi-asserted-by":"publisher","first-page":"3960","DOI":"10.1073\/pnas.0500755102","volume":"102","author":"E Yeh","year":"2005","unstructured":"Yeh E, Garneau S, Walsh CT (2005) Robust in vitro activity of RebF and RebH, a two-component reductase\/halogenase, generating 7-chlorotryptophan during rebeccamycin biosynthesis. Proc Natl Acad Sci U S A 102(11):3960\u20133965","journal-title":"Proc Natl Acad Sci U S A"},{"issue":"4","key":"920_CR69","doi-asserted-by":"publisher","first-page":"519","DOI":"10.1016\/S1074-5521(02)00126-6","volume":"9","author":"C S\u00e1nchez","year":"2002","unstructured":"S\u00e1nchez C, Butovich IA, Bra\u00f1a AF, Rohr J, M\u00e9ndez C, Salas JA (2002) The biosynthetic gene cluster for the antitumor rebeccamycin: characterization and generation of indolocarbazole derivatives. Chem Biol 9(4):519\u2013531","journal-title":"Chem Biol"},{"issue":"3","key":"920_CR70","doi-asserted-by":"publisher","first-page":"226","DOI":"10.1038\/nchembio.2569","volume":"14","author":"PV Sergiev","year":"2018","unstructured":"Sergiev PV, Aleksashin NA, Chugunova AA, Polikanov YS, Dontsova OA (2018) Structural and evolutionary insights into ribosomal RNA methylation. Nat Chem Biol 14(3):226\u2013235","journal-title":"Nat Chem Biol"},{"issue":"7","key":"920_CR71","doi-asserted-by":"publisher","first-page":"2387","DOI":"10.1093\/nar\/gkp1189","volume":"38","author":"JP Wurm","year":"2010","unstructured":"Wurm JP, Meyer B, Bahr U, Held M, Frolow O, K\u00f6tter P et al (2010) The ribosome assembly factor Nep1 responsible for Bowen-Conradi syndrome is a pseudouridine-N1-specific methyltransferase. Nucleic Acids Res 38(7):2387\u20132398","journal-title":"Nucleic Acids Res"},{"key":"920_CR72","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1146\/annurev.biophys.37.032807.125924","volume":"37","author":"VN Uversky","year":"2008","unstructured":"Uversky VN, Oldfield CJ, Dunker AK (2008) Intrinsically disordered proteins in human diseases: introducing the D2 concept. Annu Rev Biophys 37:215\u2013246","journal-title":"Annu Rev Biophys"},{"issue":"2","key":"920_CR73","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1006\/jmbi.1999.3110","volume":"293","author":"PE Wright","year":"1999","unstructured":"Wright PE, Dyson HJ (1999) Intrinsically unstructured proteins: re-assessing the protein structure-function paradigm. J Mol Biol 293(2):321\u2013331","journal-title":"J Mol Biol"},{"issue":"7","key":"920_CR74","doi-asserted-by":"publisher","first-page":"1913","DOI":"10.1021\/bi002503n","volume":"40","author":"LR Olsen","year":"2001","unstructured":"Olsen LR, Roderick SL (2001) Structure of the Escherichia coli GlmU pyrophosphorylase and acetyltransferase active sites. Biochemistry 40(7):1913\u20131921","journal-title":"Biochemistry"},{"issue":"10","key":"920_CR75","doi-asserted-by":"publisher","first-page":"3977","DOI":"10.1074\/jbc.M117.775957","volume":"292","author":"Z Li","year":"2017","unstructured":"Li Z, Kitanishi K, Twahir UT, Cracan V, Chapman D, Warncke K et al (2017) Cofactor editing by the G-protein metallochaperone domain regulates the radical B12 enzyme IcmF. J Biol Chem 292(10):3977\u20133987","journal-title":"J Biol Chem"},{"issue":"1","key":"920_CR76","doi-asserted-by":"publisher","first-page":"2775","DOI":"10.1038\/s41467-024-46808-5","volume":"15","author":"W Liu","year":"2024","unstructured":"Liu W, Wang Z, You R, Xie C, Wei H, Xiong Y et al (2024) PLMSearch: Protein language model powers accurate and fast sequence search for remote homology. Nat Commun 15(1):2775","journal-title":"Nat Commun"},{"key":"920_CR77","volume":"2022","author":"Z Lin","year":"2022","unstructured":"Lin Z, Akin H, Rao R, Hie B, Zhu Z, Lu W et al (2022) Language models of protein sequences at the scale of evolution enable accurate structure prediction. BioRxiv 2022:500902","journal-title":"BioRxiv"},{"key":"920_CR78","doi-asserted-by":"publisher","DOI":"10.1016\/j.foodchem.2023.137162","volume":"431","author":"Z Du","year":"2024","unstructured":"Du Z, Ding X, Hsu W, Munir A, Xu Y, Li Y (2024) pLM4ACE: a protein language model based predictor for antihypertensive peptide screening. Food Chem 431:137162","journal-title":"Food Chem"},{"key":"920_CR79","doi-asserted-by":"crossref","unstructured":"Hayes T, Rao R, Akin H, Sofroniew NJ, Oktay D, Lin Z, et al (2024) Simulating 500 million years of evolution with a language model. bioRxiv, 2024.2007. 2001.600583.","DOI":"10.1101\/2024.07.01.600583"}],"container-title":["Journal of Cheminformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-024-00920-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1186\/s13321-024-00920-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-024-00920-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,6]],"date-time":"2024-11-06T18:04:10Z","timestamp":1730916250000},"score":1,"resource":{"primary":{"URL":"https:\/\/jcheminf.biomedcentral.com\/articles\/10.1186\/s13321-024-00920-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,6]]},"references-count":79,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2024,12]]}},"alternative-id":["920"],"URL":"https:\/\/doi.org\/10.1186\/s13321-024-00920-2","relation":{},"ISSN":["1758-2946"],"issn-type":[{"value":"1758-2946","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,6]]},"assertion":[{"value":"4 June 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 October 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 November 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"125"}}