{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T06:09:49Z","timestamp":1774505389000,"version":"3.50.1"},"reference-count":77,"publisher":"American Chemical Society (ACS)","issue":"6","license":[{"start":{"date-parts":[[2020,5,5]],"date-time":"2020-05-05T00:00:00Z","timestamp":1588636800000},"content-version":"vor","delay-in-days":29,"URL":"http:\/\/pubs.acs.org\/page\/policy\/authorchoice_termsofuse.html"}],"funder":[{"name":"Protein Engineering Department, Merck Research Laboratories"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J. Chem. Inf. Model."],"published-print":{"date-parts":[[2020,6,22]]},"DOI":"10.1021\/acs.jcim.0c00073","type":"journal-article","created":{"date-parts":[[2020,4,6]],"date-time":"2020-04-06T17:15:09Z","timestamp":1586193309000},"page":"2773-2790","source":"Crossref","is-referenced-by-count":198,"title":["Deep Dive into Machine Learning Models for Protein Engineering"],"prefix":"10.1021","volume":"60","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2091-3854","authenticated-orcid":true,"given":"Yuting","family":"Xu","sequence":"first","affiliation":[{"name":"Biometrics Research, Merck & Co., Inc., Rahway, New Jersey 07065, United States"}]},{"given":"Deeptak","family":"Verma","sequence":"additional","affiliation":[{"name":"Computational and Structural Chemistry, Merck & Co., Inc., Kenilworth, New Jersey 07033, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6549-1635","authenticated-orcid":true,"given":"Robert P.","family":"Sheridan","sequence":"additional","affiliation":[{"name":"Computational and Structural Chemistry, Merck & Co., Inc., Kenilworth, New Jersey 07033, United States"}]},{"given":"Andy","family":"Liaw","sequence":"additional","affiliation":[{"name":"Biometrics Research, Merck & Co., Inc., Rahway, New Jersey 07065, United States"}]},{"given":"Junshui","family":"Ma","sequence":"additional","affiliation":[{"name":"Early Oncology Statistics, Merck & Co., Inc., Rahway, New Jersey 07065, United States"}]},{"given":"Nicholas M.","family":"Marshall","sequence":"additional","affiliation":[{"name":"Invenra, Inc., 505 South Rosa Road, Madison, Wisconsin 53719, United States"}]},{"given":"John","family":"McIntosh","sequence":"additional","affiliation":[{"name":"Process Research & Development, Merck & Co., Inc., Rahway, New Jersey 07065, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8178-9186","authenticated-orcid":true,"given":"Edward C.","family":"Sherer","sequence":"additional","affiliation":[{"name":"Computational and Structural Chemistry, Merck & Co., Inc., Kenilworth, New Jersey 07033, United States"}]},{"given":"Vladimir","family":"Svetnik","sequence":"additional","affiliation":[{"name":"Biometrics Research, Merck & Co., Inc., Rahway, New Jersey 07065, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3582-9567","authenticated-orcid":true,"given":"Jennifer M.","family":"Johnston","sequence":"additional","affiliation":[{"name":"Computational and Structural Chemistry, Merck & Co., Inc., Kenilworth, New Jersey 07033, United States"}]}],"member":"316","published-online":{"date-parts":[[2020,4,6]]},"reference":[{"key":"ref1\/cit1","doi-asserted-by":"publisher","DOI":"10.1002\/aic.13995"},{"key":"ref2\/cit2","doi-asserted-by":"publisher","DOI":"10.1146\/annurev.biophys.37.032807.125832"},{"key":"ref3\/cit3","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-018-24760-x"},{"key":"ref4\/cit4","unstructured":"Yang, K. K.; Wu, Z.; Arnold, F. H. Machine Learning in Protein Engineering. 2018, arXiv preprint arXiv:1811.10775."},{"key":"ref5\/cit5","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1901979116"},{"key":"ref6\/cit6","doi-asserted-by":"publisher","DOI":"10.1109\/TCBB.2017.2773063"},{"key":"ref7\/cit7","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btx780"},{"key":"ref8\/cit8","doi-asserted-by":"publisher","DOI":"10.7717\/peerj-cs.124"},{"key":"ref9\/cit9","doi-asserted-by":"publisher","DOI":"10.1186\/s12859-017-1972-6"},{"key":"ref10\/cit10","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.6b00740"},{"key":"ref11\/cit11","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/bty178"},{"key":"ref12\/cit12","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1215251110"},{"key":"ref13\/cit13","doi-asserted-by":"publisher","DOI":"10.1038\/s41592-019-0496-6"},{"key":"ref14\/cit14","doi-asserted-by":"publisher","DOI":"10.1101\/617860"},{"key":"ref15\/cit15","doi-asserted-by":"publisher","DOI":"10.1101\/817890"},{"key":"ref16\/cit16","unstructured":"Kimothi, D.; Soni, A.; Biyani, P.; Hogan, J. M. Distributed Representations for Biological Sequence Analysis. 2016,  arXiv preprint arXiv:1608.05949, https:\/\/arxiv.org\/abs\/1608.05949 (accessed 2020-04-10)."},{"key":"ref17\/cit17","first-page":"1188","author":"Le Q.","year":"2014","journal-title":"International conference on machine learning"},{"key":"ref18\/cit18","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btv042"},{"key":"ref19\/cit19","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/28.1.374"},{"key":"ref20\/cit20","doi-asserted-by":"publisher","DOI":"10.32614\/RJ-2015-001"},{"key":"ref21\/cit21","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gkm998"},{"key":"ref22\/cit22","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0141287"},{"key":"ref23\/cit23","doi-asserted-by":"publisher","DOI":"10.1021\/jm9700575"},{"key":"ref24\/cit24","doi-asserted-by":"publisher","DOI":"10.1002\/bip.20296"},{"key":"ref25\/cit25","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gkm998"},{"key":"ref26\/cit26","doi-asserted-by":"publisher","DOI":"10.1093\/protein\/2.3.185"},{"key":"ref27\/cit27","doi-asserted-by":"publisher","DOI":"10.1126\/science.8503008"},{"key":"ref28\/cit28","doi-asserted-by":"publisher","DOI":"10.1016\/0022-2836(73)90030-2"},{"key":"ref29\/cit29","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/18.7.985"},{"key":"ref30\/cit30","doi-asserted-by":"publisher","DOI":"10.1016\/0014-5793(92)80506-C"},{"key":"ref31\/cit31","doi-asserted-by":"publisher","DOI":"10.1006\/jmbi.1996.0804"},{"key":"ref32\/cit32","doi-asserted-by":"publisher","DOI":"10.1038\/282109a0"},{"key":"ref33\/cit33","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1097-0134(19990101)34:1<49::AID-PROT5>3.0.CO;2-L"},{"key":"ref34\/cit34","doi-asserted-by":"publisher","DOI":"10.1111\/j.1432-1033.1982.tb07002.x"},{"key":"ref35\/cit35","doi-asserted-by":"crossref","first-page":"3376","DOI":"10.1016\/S0021-9258(19)45151-X","volume":"247","author":"Smith R.","year":"1972","journal-title":"J. Biol. Chem."},{"key":"ref36\/cit36","doi-asserted-by":"publisher","DOI":"10.1111\/j.1399-3011.1988.tb01261.x"},{"key":"ref37\/cit37","doi-asserted-by":"publisher","DOI":"10.1021\/ma60054a013"},{"key":"ref38\/cit38","doi-asserted-by":"publisher","DOI":"10.1038\/nsb1203-980"},{"key":"ref39\/cit39","doi-asserted-by":"publisher","DOI":"10.1021\/ct100578z"},{"key":"ref40\/cit40","unstructured":"Release, S. Schr\u00f6dinger Release 2019-4, Maestro; Schr\u00f6dinger, LLC: New York, NY, 2019."},{"key":"ref41\/cit41","unstructured":"Lee, T. K.; Nguyen, T. Protein Family Classification with Neural Networks; 2016."},{"key":"ref42\/cit42","unstructured":"Mikolov, T.; Chen, K.; Corrado, G.; Dean, J. Efficient Estimation of Word Representations in Vector Space. 2013,  arXiv preprint arXiv:1301.3781.https:\/\/arxiv.org\/abs\/1301.3781 (accessed 2020-04-10)."},{"key":"ref43\/cit43","first-page":"3111","author":"Mikolov T.","year":"2013","journal-title":"Advances in neural information processing systems"},{"key":"ref44\/cit44","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2005.10.010"},{"key":"ref45\/cit45","doi-asserted-by":"crossref","first-page":"1","DOI":"10.18637\/jss.v033.i01","volume":"33","author":"Friedman J.","year":"2010","journal-title":"Journal of Statistical Software"},{"key":"ref46\/cit46","first-page":"155","author":"Drucker H.","year":"1997","journal-title":"Advances in neural information processing systems"},{"key":"ref47\/cit47","doi-asserted-by":"publisher","DOI":"10.2174\/157340907782799372"},{"key":"ref48\/cit48","doi-asserted-by":"crossref","DOI":"10.1201\/b10911","volume-title":"Support Vector Machines and their Application in Chemistry and Biotechnology","author":"Liang Y.","year":"2016"},{"key":"ref49\/cit49","doi-asserted-by":"publisher","DOI":"10.1021\/ci049965i"},{"key":"ref50\/cit50","doi-asserted-by":"publisher","DOI":"10.1023\/A:1010933404324"},{"key":"ref51\/cit51","doi-asserted-by":"publisher","DOI":"10.1021\/ci034160g"},{"key":"ref52\/cit52","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-9473(01)00065-2"},{"key":"ref53\/cit53","doi-asserted-by":"publisher","DOI":"10.1021\/ci0500379"},{"key":"ref54\/cit54","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939785"},{"key":"ref55\/cit55","doi-asserted-by":"publisher","DOI":"10.1162\/neco_a_00990"},{"key":"ref56\/cit56","unstructured":"Alom, M. Z.; Taha, T. M.; Yakopcic, C.; Westberg, S.; Sidike, P.; Nasrin, M. S.; Van Esesn, B. C.; Awwal, A. A. S.; Asari, V. K. The history began from Alexnet: A Comprehensive Survey on Deep Learning Approaches. 2018,  arXiv preprint arXiv:1803.01164. https:\/\/arxiv.org\/ftp\/arxiv\/papers\/1803\/1803.01164.pdf (accessed 2020-04-10)."},{"key":"ref57\/cit57","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"ref58\/cit58","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref59\/cit59","unstructured":"Cire\u015fan, D.; Meier, U.; Schmidhuber, J. Multi-column Deep Neural Networks for Image Classification. 2012,  arXiv preprint arXiv:1202.2745. https:\/\/arxiv.org\/abs\/1202.2745 (accessed 2020-04-10)."},{"key":"ref60\/cit60","unstructured":"Bai, S.; Kolter, J. Z.; Koltun, V. An Empirical Evaluation of Generic Convolutional and Recurrent Networks for Sequence Modeling. 2018,  arXiv preprint arXiv:1803.01271. https:\/\/arxiv.org\/abs\/1803.01271 (accessed 2020-04-10)."},{"key":"ref61\/cit61","unstructured":"Kalchbrenner, N.; Grefenstette, E.; Blunsom, P. A Convolutional Neural Network for Modelling Sentences. 2014,  arXiv preprint arXiv:1404.2188. https:\/\/arxiv.org\/abs\/1404.2188 (accessed 2020-04-10)."},{"key":"ref62\/cit62","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btw255"},{"key":"ref63\/cit63","first-page":"1995","volume":"3361","author":"LeCun Y.","year":"1995","journal-title":"handbook of brain theory and neural networks"},{"key":"ref64\/cit64","unstructured":"Dieleman, S.; Schlter, J.; Raffel, C.; Olson, E.; Snderby, S. K.; Nouri, D.; Maturana, D.; Thoma, M.; Battenberg, E.; Kelly, J. Lasagne: First Release; 2015; http:\/\/dx.doi.org\/10.5281\/zenodo.27878 (accessed 2020-04-10)."},{"key":"ref65\/cit65","unstructured":"Theano Development Team,  Theano: A Python Framework for Fast Computation\nof Mathematical Expressions. 2016,  arXiv\ne-prints abs\/1605.02688. https:\/\/arxiv.org\/abs\/1605.02688 (accessed 2020-04-10)."},{"key":"ref66\/cit66","unstructured":"Chen, T.; Li, M.; Li, Y.; Lin, M.; Wang, N.; Wang, M.; Xiao, T.; Xu, B.; Zhang, C.; Zhang, Z. Mxnet: A Flexible and Efficient Machine Learning Library for Heterogeneous Distributed Systems. 2015,  arXiv preprint arXiv:1512.01274. https:\/\/arxiv.org\/abs\/1512.01274 (accessed 2020-04-10)."},{"key":"ref67\/cit67","first-page":"2825","volume":"12","author":"Pedregosa F.","year":"2011","journal-title":"Journal of Machine Learning Research"},{"key":"ref68\/cit68","doi-asserted-by":"publisher","DOI":"10.18637\/jss.v011.i09"},{"key":"ref69\/cit69","unstructured":"Chen, T.; He, T.; Benesty, M.; Khotilovich, V.; Tang, Y.; Cho, H.; Chen, K.; Mitchell, R.; Cano, I.; Zhou, T.; Li, M.; Xie, J.; Lin, M.; Geng, Y.; Li, Y. xgboost: Extreme Gradient Boosting, R package version 0.82.1; 2019."},{"key":"ref70\/cit70","unstructured":"Kuhn, M. caret: Classification and Regression Training, R package version 6.0-84; 2019."},{"key":"ref71\/cit71","doi-asserted-by":"publisher","DOI":"10.1016\/j.jmb.2014.06.015"},{"key":"ref72\/cit72","doi-asserted-by":"publisher","DOI":"10.1002\/cbic.201100784"},{"key":"ref73\/cit73","doi-asserted-by":"publisher","DOI":"10.1038\/nature17995"},{"key":"ref74\/cit74","doi-asserted-by":"publisher","DOI":"10.1021\/ci400084k"},{"key":"ref75\/cit75","unstructured":"Yosinski, J.; Clune, J.; Bengio, Y.; Lipson, H. How Transferable are Features in Deep Neural Networks? Advances in neural information processing systems; 2014; pp 3320\u20133328."},{"key":"ref76\/cit76","unstructured":"Smith, L. N. A Disciplined Approach to Neural Network Hyper-parameters: Part 1\u2013Learning Rate, Batch Size, Momentum, and Weight Decay. 2018,  arXiv preprint arXiv:1803.09820. https:\/\/arxiv.org\/abs\/1803.09820 (accessed 2020-04-10)."},{"key":"ref77\/cit77","unstructured":"Guo, C.; Berkhahn, F. Entity Embeddings of Categorical Variables. 2016,  arXiv preprint arXiv:1604.06737. https:\/\/arxiv.org\/abs\/1604.06737 (accessed 2020-04-10)."}],"container-title":["Journal of Chemical Information and Modeling"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/pubs.acs.org\/doi\/pdf\/10.1021\/acs.jcim.0c00073","content-type":"application\/pdf","content-version":"vor","intended-application":"unspecified"},{"URL":"https:\/\/pubs.acs.org\/doi\/pdf\/10.1021\/acs.jcim.0c00073","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,4,27]],"date-time":"2023-04-27T06:48:21Z","timestamp":1682578101000},"score":1,"resource":{"primary":{"URL":"https:\/\/pubs.acs.org\/doi\/10.1021\/acs.jcim.0c00073"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,4,6]]},"references-count":77,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2020,6,22]]}},"alternative-id":["10.1021\/acs.jcim.0c00073"],"URL":"https:\/\/doi.org\/10.1021\/acs.jcim.0c00073","relation":{},"ISSN":["1549-9596","1549-960X"],"issn-type":[{"value":"1549-9596","type":"print"},{"value":"1549-960X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,4,6]]}}}