{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T04:12:35Z","timestamp":1748751155875,"version":"3.41.0"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T00:00:00Z","timestamp":1748649600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T00:00:00Z","timestamp":1748649600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100004837","name":"Ministerio de Ciencia e Innovaci\u00f3n","doi-asserted-by":"publisher","award":["PID2020-119894GB-I00\/AEI\/10.13039\/501100011033","PID2020-119894GB-I00\/AEI\/10.13039\/501100011033","PID2020-119894GB-I00\/AEI\/10.13039\/501100011033","PID2020-119894GB-I00\/AEI\/10.13039\/501100011033","PID2020-119894GB-I00\/AEI\/10.13039\/501100011033"],"award-info":[{"award-number":["PID2020-119894GB-I00\/AEI\/10.13039\/501100011033","PID2020-119894GB-I00\/AEI\/10.13039\/501100011033","PID2020-119894GB-I00\/AEI\/10.13039\/501100011033","PID2020-119894GB-I00\/AEI\/10.13039\/501100011033","PID2020-119894GB-I00\/AEI\/10.13039\/501100011033"]}],"id":[{"id":"10.13039\/501100004837","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008431","name":"Consejer\u00eda de Educaci\u00f3n, Junta de Castilla y Le\u00f3n","doi-asserted-by":"publisher","award":["EDU\/875\/2021"],"award-info":[{"award-number":["EDU\/875\/2021"]}],"id":[{"id":"10.13039\/501100008431","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100019629","name":"Universidad de Burgos","doi-asserted-by":"publisher","award":["2021\/00001\/007\/001\/018","2021\/00001\/007\/001\/018"],"award-info":[{"award-number":["2021\/00001\/007\/001\/018","2021\/00001\/007\/001\/018"]}],"id":[{"id":"10.13039\/100019629","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100014180","name":"Junta de Castilla y Le\u00f3n","doi-asserted-by":"publisher","award":["BU055P20 (JCyL\/FEDER, UE)","BU055P20 (JCyL\/FEDER, UE)","BU055P20 (JCyL\/FEDER, UE)"],"award-info":[{"award-number":["BU055P20 (JCyL\/FEDER, UE)","BU055P20 (JCyL\/FEDER, UE)","BU055P20 (JCyL\/FEDER, UE)"]}],"id":[{"id":"10.13039\/501100014180","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001659","name":"Deutsche Forschungsgemeinschaft","doi-asserted-by":"publisher","award":["SPP2363"],"award-info":[{"award-number":["SPP2363"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Cheminform"],"DOI":"10.1186\/s13321-025-01029-w","type":"journal-article","created":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T18:50:56Z","timestamp":1748717456000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Semi-supervised prediction of protein fitness for data-driven protein engineering"],"prefix":"10.1186","volume":"17","author":[{"given":"Alicia","family":"Olivares-Gil","sequence":"first","affiliation":[]},{"given":"Jos\u00e9 A.","family":"Barbero-Aparicio","sequence":"additional","affiliation":[]},{"given":"Juan J.","family":"Rodr\u00edguez","sequence":"additional","affiliation":[]},{"given":"Jos\u00e9 F.","family":"D\u00edez-Pastor","sequence":"additional","affiliation":[]},{"given":"C\u00e9sar","family":"Garc\u00eda-Osorio","sequence":"additional","affiliation":[]},{"given":"Mehdi D.","family":"Davari","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,31]]},"reference":[{"issue":"2","key":"1029_CR1","doi-asserted-by":"publisher","first-page":"100","DOI":"10.1016\/S0968-0004(00)01755-2","volume":"26","author":"FH Arnold","year":"2001","unstructured":"Arnold FH, Wintrode PL, Miyazaki K, Gershenson A (2001) How enzymes adapt: lessons from directed evolution. Trends Biochem Sci 26(2):100\u2013106. https:\/\/doi.org\/10.1016\/S0968-0004(00)01755-2","journal-title":"Trends Biochem Sci"},{"key":"1029_CR2","doi-asserted-by":"publisher","unstructured":"Pramanik S, Contreras F, Davari MD, Schwaneberg U (2021) Protein engineering by efficient sequence space exploration through combination of directed evolution and computational design methodologies. Protein Eng Tools Appl. p. 153\u2013176. https:\/\/doi.org\/10.1002\/9783527815128.ch7","DOI":"10.1002\/9783527815128.ch7"},{"issue":"22","key":"1029_CR3","doi-asserted-by":"publisher","first-page":"14243","DOI":"10.1021\/acscatal.2c01426","volume":"12","author":"M Wittmund","year":"2022","unstructured":"Wittmund M, Cadet F, Davari MD (2022) Learning epistasis and residue coevolution patterns: Current trends and future perspectives for advancing enzyme engineering. ACS Catal 12(22):14243\u201314263. https:\/\/doi.org\/10.1021\/acscatal.2c01426","journal-title":"ACS Catal"},{"issue":"8","key":"1029_CR4","doi-asserted-by":"publisher","first-page":"687","DOI":"10.1038\/s41592-019-0496-6","volume":"16","author":"KK Yang","year":"2019","unstructured":"Yang KK, Wu Z, Arnold FH (2019) Machine-learning-guided directed evolution for protein engineering. Nat Methods 16(8):687\u2013694. https:\/\/doi.org\/10.1038\/s41592-019-0496-6","journal-title":"Nat Methods"},{"key":"1029_CR5","doi-asserted-by":"publisher","first-page":"281","DOI":"10.1016\/bs.mie.2020.05.005","volume":"643","author":"NE Siedhoff","year":"2020","unstructured":"Siedhoff NE, Schwaneberg U, Davari MD (2020) Machine learning-assisted enzyme engineering. Methods Enzymol 643:281\u2013315","journal-title":"Methods Enzymol"},{"key":"1029_CR6","doi-asserted-by":"publisher","first-page":"13863","DOI":"10.1021\/acscatal.3c02743","volume":"13","author":"P Kouba","year":"2023","unstructured":"Kouba P, Kohout P, Haddadi F, Bushuiev A, Samusevich R, Sedlar J et al (2023) Machine Learning-Guided Protein Engineering. ACS Catal 13:13863\u201313895. https:\/\/doi.org\/10.1021\/acscatal.3c02743","journal-title":"ACS Catal"},{"issue":"4","key":"1029_CR7","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1038\/s41592-021-01100-y","volume":"18","author":"S Biswas","year":"2021","unstructured":"Biswas S, Khimulya G, Alley EC, Esvelt KM, Church GM (2021) Low-N protein engineering with data-efficient deep learning. Nat Methods 18(4):389\u2013396. https:\/\/doi.org\/10.1038\/s41592-021-01100-y","journal-title":"Nat Methods"},{"issue":"7","key":"1029_CR8","doi-asserted-by":"publisher","first-page":"1114","DOI":"10.1038\/s41587-021-01146-5","volume":"40","author":"C Hsu","year":"2022","unstructured":"Hsu C, Nisonoff H, Fannjiang C, Listgarten J (2022) Learning protein fitness models from evolutionary and assay-labeled data. Nat Biotechnol 40(7):1114\u20131122. https:\/\/doi.org\/10.1038\/s41587-021-01146-5","journal-title":"Nat Biotechnol"},{"issue":"7","key":"1029_CR9","doi-asserted-by":"publisher","first-page":"3463","DOI":"10.1021\/acs.jcim.1c00099","volume":"61","author":"NE Siedhoff","year":"2021","unstructured":"Siedhoff NE, Illig AM, Schwaneberg U, Davari MD (2021) PyPEF\u2014an integrated framework for data-driven protein engineering. J Chem Inf Model 61(7):3463\u20133476. https:\/\/doi.org\/10.1021\/acs.jcim.1c00099","journal-title":"J Chem Inf Model"},{"issue":"12","key":"1029_CR10","doi-asserted-by":"publisher","first-page":"1315","DOI":"10.1038\/s41592-019-0598-1","volume":"16","author":"EC Alley","year":"2019","unstructured":"Alley EC, Khimulya G, Biswas S, AlQuraishi M, Church GM (2019) Unified rational protein engineering with sequence-based deep representation learning. Nat Methods 16(12):1315\u20131322. https:\/\/doi.org\/10.1038\/s41592-019-0598-1","journal-title":"Nat Methods"},{"issue":"2","key":"1029_CR11","doi-asserted-by":"publisher","first-page":"373","DOI":"10.1007\/s10994-019-05855-6","volume":"109","author":"JE Van Engelen","year":"2020","unstructured":"Van Engelen JE, Hoos HH (2020) A survey on semi-supervised learning. Mach Learn 109(2):373\u2013440. https:\/\/doi.org\/10.1007\/s10994-019-05855-6","journal-title":"Mach Learn"},{"key":"1029_CR12","doi-asserted-by":"crossref","unstructured":"Illig AM, Siedhoff NE, Schwaneberg U, Davari MD (2022) A hybrid model combining evolutionary probability and machine learning leverages data-driven protein engineering. Preprint at https:\/\/www.biorxiv.org\/content\/early\/2022\/06\/07\/2022.06.07.495081","DOI":"10.1101\/2022.06.07.495081"},{"key":"1029_CR13","unstructured":"Zhou ZH, Li M, et\u00a0al (2005) Semi-supervised regression with co-training. In: IJCAI. vol.\u00a05. p. 908\u2013913"},{"issue":"2","key":"1029_CR14","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1038\/nbt.3769","volume":"35","author":"TA Hopf","year":"2017","unstructured":"Hopf TA, Ingraham JB, Poelwijk FJ, Scharfe CP, Springer M, Sander C et al (2017) Mutation effects predicted from sequence co-variation. Nat Biotechnol 35(2):128\u2013135. https:\/\/doi.org\/10.1038\/nbt.3769","journal-title":"Nat Biotechnol"},{"key":"1029_CR15","doi-asserted-by":"crossref","unstructured":"Blum A, Mitchell T (1998) Combining labeled and unlabeled data with co-training. In: Proceedings of the eleventh annual conference on Computational learning theory. p. 92\u2013100","DOI":"10.1145\/279943.279962"},{"issue":"11","key":"1029_CR16","doi-asserted-by":"publisher","first-page":"1529","DOI":"10.1109\/TKDE.2005.186","volume":"17","author":"ZH Zhou","year":"2005","unstructured":"Zhou ZH, Li M (2005) Tri-training: exploiting unlabeled data using three classifiers. IEEE Trans Knowl Data Eng 17(11):1529\u20131541. https:\/\/doi.org\/10.1109\/TKDE.2005.186","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"11","key":"1029_CR17","doi-asserted-by":"publisher","first-page":"1134","DOI":"10.1145\/1968.1972","volume":"27","author":"LG Valiant","year":"1984","unstructured":"Valiant LG (1984) A theory of the learnable. Commun ACM 27(11):1134\u20131142. https:\/\/doi.org\/10.1145\/1968.1972","journal-title":"Commun ACM"},{"key":"1029_CR18","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1007\/s10115-013-0706-y","volume":"42","author":"I Triguero","year":"2015","unstructured":"Triguero I, Garc\u00eda S, Herrera F (2015) Self-labeled techniques for semi-supervised learning: taxonomy, software and empirical study. Knowl Inf Syst 42:245\u2013284. https:\/\/doi.org\/10.1007\/s10115-013-0706-y","journal-title":"Knowl Inf Syst"},{"key":"1029_CR19","unstructured":"Eddy SR (1992) HMMER user\u2019s guide. Department of Genetics, Washington University School of Medicine. 2(1):13"},{"issue":"10","key":"1029_CR20","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pcbi.1002195","volume":"7","author":"SR Eddy","year":"2011","unstructured":"Eddy SR (2011) Accelerated profile HMM searches. PLoS Comput Biol 7(10):e1002195. https:\/\/doi.org\/10.1371\/journal.pcbi.1002195","journal-title":"PLoS Comput Biol"},{"key":"1029_CR21","doi-asserted-by":"publisher","unstructured":"Suzek BE, Wang Y, Huang H, McGarvey PB, Wu CH (2014) The UniProt\u00a0Consortium. UniRef clusters: a comprehensive and scalable alternative for improving sequence similarity searches. Bioinformatics. 31(6):926\u2013932. https:\/\/doi.org\/10.1093\/bioinformatics\/btu739","DOI":"10.1093\/bioinformatics\/btu739"},{"key":"1029_CR22","unstructured":"Dayhoff M, Schwartz R, Orcutt B (1978) A model of evolutionary change in proteins. In: Atlas of protein sequence and structure. vol.\u00a05. National biomedical research foundation Silver Spring, MD, USA. p. 345\u2013352"},{"key":"1029_CR23","doi-asserted-by":"publisher","unstructured":"Loh WY (2011) Classification and regression trees. Wiley interdisciplinary reviews: data mining and knowledge discovery. 1(1):14\u201323. https:\/\/doi.org\/10.1002\/widm.8","DOI":"10.1002\/widm.8"},{"issue":"3","key":"1029_CR24","first-page":"61","volume":"10","author":"JC Platt","year":"1999","unstructured":"Platt JC (1999) Probabilistic outputs for support vector machines and comparisons to regularized likelihood methods. Adv Large Margin Classif 10(3):61\u201374","journal-title":"Adv Large Margin Classif"},{"key":"1029_CR25","unstructured":"Rifkin RM, Lippert RA (2007) Notes on regularized least squares. Massachusetts Institute of Technology. MIT-CSAIL-TR-2007-025"},{"key":"1029_CR26","unstructured":"Hodges JL (1950) Discriminatory analysis. vol.\u00a011. USAF School of Aviation Medicine"},{"key":"1029_CR27","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1007\/978-1-4419-9326-7_5","volume-title":"Ensemble machine learning: methods and applications","author":"A Cutler","year":"2012","unstructured":"Cutler A, Cutler DR, Stevens JR (2012) Random Forests. In: Zhang C, Ma Y (eds) Ensemble machine learning: methods and applications. Springer, US, Boston, pp 157\u2013175"},{"key":"1029_CR28","unstructured":"Drucker H (1997) Improving regressors using boosting techniques. In: ICML. vol.\u00a097. Citeseer. p. 107\u2013115"},{"key":"1029_CR29","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa F, Varoquaux G, Gramfort A, Michel V, Thirion B, Grisel O et al (2011) Scikit-learn: Machine Learning in Python. J Mach Learn Res 12:2825\u20132830","journal-title":"J Mach Learn Res"},{"key":"1029_CR30","doi-asserted-by":"publisher","unstructured":"Jia LH, Guo LZ, Zhou Z, Li YF (2022) LAMDA-SSL: Semi-Supervised Learning in Python. Preprint at https:\/\/doi.org\/10.48550\/arXiv.2208.04610","DOI":"10.48550\/arXiv.2208.04610"},{"issue":"3","key":"1029_CR31","doi-asserted-by":"publisher","first-page":"503","DOI":"10.2307\/2336116","volume":"76","author":"P Burman","year":"1989","unstructured":"Burman P (1989) A comparative study of ordinary cross-validation, v-fold cross-validation and the repeated learning-testing methods. Biometrika 76(3):503\u2013514. https:\/\/doi.org\/10.2307\/2336116","journal-title":"Biometrika"},{"issue":"1","key":"1029_CR32","doi-asserted-by":"publisher","first-page":"72","DOI":"10.2307\/1422689","volume":"15","author":"CE Spearman","year":"1904","unstructured":"Spearman CE (1904) The proof and measurement of association between two things. Am J Psychol 15(1):72\u2013101. https:\/\/doi.org\/10.2307\/1422689","journal-title":"Am J Psychol"},{"key":"1029_CR33","unstructured":"Van\u00a0der Zee M. Weighted correlation in Python. Pandas based implementation of weighted Pearson and Spearman correlations. Github. Available from: https:\/\/github.com\/matthijsz\/weightedcorr"},{"key":"1029_CR34","unstructured":"Benavoli A, Corani G, Mangili F, Zaffalon M, Ruggeri F (2014) A Bayesian Wilcoxon signed-rank test based on the Dirichlet process. In: International conference on machine learning. PMLR. p. 1026\u20131034"},{"issue":"1","key":"1029_CR35","first-page":"2653","volume":"18","author":"A Benavoli","year":"2017","unstructured":"Benavoli A, Corani G, Dem\u0161ar J, Zaffalon M (2017) Time for a change: a tutorial for comparing multiple classifiers through Bayesian analysis. J Mach Learn Res 18(1):2653\u20132688","journal-title":"J Mach Learn Res"},{"key":"1029_CR36","doi-asserted-by":"publisher","unstructured":"Favor A, Jayapurna I (2020) Evaluating eUniRep and other protein feature representations for in silico directed evolution. Preprint at https:\/\/doi.org\/10.22541\/au.159683529.96283070","DOI":"10.22541\/au.159683529.96283070"}],"container-title":["Journal of Cheminformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-025-01029-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1186\/s13321-025-01029-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-025-01029-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T18:51:00Z","timestamp":1748717460000},"score":1,"resource":{"primary":{"URL":"https:\/\/jcheminf.biomedcentral.com\/articles\/10.1186\/s13321-025-01029-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,31]]},"references-count":36,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["1029"],"URL":"https:\/\/doi.org\/10.1186\/s13321-025-01029-w","relation":{},"ISSN":["1758-2946"],"issn-type":[{"value":"1758-2946","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,31]]},"assertion":[{"value":"2 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 May 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"88"}}