{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,20]],"date-time":"2026-06-20T04:50:17Z","timestamp":1781931017092,"version":"3.54.5"},"reference-count":44,"publisher":"American Chemical Society (ACS)","issue":"4","license":[{"start":{"date-parts":[[2021,3,15]],"date-time":"2021-03-15T00:00:00Z","timestamp":1615766400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,3,15]],"date-time":"2021-03-15T00:00:00Z","timestamp":1615766400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2021,3,15]],"date-time":"2021-03-15T00:00:00Z","timestamp":1615766400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-045"}],"funder":[{"DOI":"10.13039\/100000185","name":"Defense Advanced Research Projects Agency","doi-asserted-by":"publisher","award":["W911NF1810315"],"award-info":[{"award-number":["W911NF1810315"]}],"id":[{"id":"10.13039\/100000185","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000183","name":"Army Research Office","doi-asserted-by":"publisher","award":["W911NF1810315"],"award-info":[{"award-number":["W911NF1810315"]}],"id":[{"id":"10.13039\/100000183","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J. Chem. Inf. Model."],"published-print":{"date-parts":[[2021,4,26]]},"DOI":"10.1021\/acs.jcim.0c01127","type":"journal-article","created":{"date-parts":[[2021,3,15]],"date-time":"2021-03-15T00:31:26Z","timestamp":1615768286000},"page":"1560-1569","source":"Crossref","is-referenced-by-count":114,"title":["SMILES Pair Encoding: A Data-Driven Substructure Tokenization Algorithm for Deep Learning"],"prefix":"10.1021","volume":"61","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1821-2680","authenticated-orcid":true,"given":"Xinhao","family":"Li","sequence":"first","affiliation":[{"name":"Department of Chemistry, Bioinformatics Research Center, North Carolina State University, Raleigh, North Carolina 27695, United States"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5642-8303","authenticated-orcid":true,"given":"Denis","family":"Fourches","sequence":"additional","affiliation":[{"name":"Department of Chemistry, Bioinformatics Research Center, North Carolina State University, Raleigh, North Carolina 27695, United States"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"316","published-online":{"date-parts":[[2021,3,15]]},"reference":[{"key":"ref1\/cit1","doi-asserted-by":"publisher","DOI":"10.1016\/J.DRUDIS.2018.01.039"},{"key":"ref2\/cit2","doi-asserted-by":"publisher","DOI":"10.1016\/j.drudis.2019.07.006"},{"key":"ref3\/cit3","doi-asserted-by":"publisher","DOI":"10.1016\/j.cell.2020.01.021"},{"key":"ref4\/cit4","unstructured":"Maziarka, \u0141.; Danel, T.; Mucha, S.; Rataj, K.; Tabor, J.; Jastrz ebski, S. Molecule Attention Transformer. 2020, arXiv:2002.08264."},{"key":"ref5\/cit5","doi-asserted-by":"publisher","DOI":"10.1039\/D0CS00098A"},{"key":"ref6\/cit6","doi-asserted-by":"publisher","DOI":"10.1039\/c9me00039a"},{"key":"ref7\/cit7","unstructured":"Polykovskiy, D.; Zhebrak, A.; Sanchez-Lengeling, B.; Golovanov, S.; Tatanov, O.; Belyaev, S.; Kurbanov, R.; Artamonov, A.; Aladinskiy, V.; Veselov, M.; Kadurin, A.; Johansson, S.; Chen, H.; Nikolenko, S.; Aspuru-Guzik, A.; Zhavoronkov, A. Molecular Sets (MOSES): A Benchmarking Platform for Molecular Generation Models. 2018, arXiv:1811.12823."},{"key":"ref8\/cit8","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.8b00839"},{"key":"ref9\/cit9","doi-asserted-by":"crossref","unstructured":"Fu, T.; Xiao, C.; Li, X.; Glass, L. M.; Sun, J. MIMOSA: Multi-Constraint Molecule Sampling for Molecule Optimization. 2020, arXiv:2010.02318.","DOI":"10.1609\/aaai.v35i1.16085"},{"key":"ref10\/cit10","doi-asserted-by":"crossref","unstructured":"Fu, T.; Xiao, C.; Sun, J. CORE: Automatic Molecule Optimization Using Copy & Refine Strategy. Proceedings of the AAAI Conference on Artificial Intelligence, 2019; Vol. 34.","DOI":"10.1609\/aaai.v34i01.5404"},{"key":"ref11\/cit11","doi-asserted-by":"publisher","DOI":"10.1016\/J.DRUDIS.2020.01.020"},{"key":"ref12\/cit12","unstructured":"Lipton, Z. C.; Berkowitz, J.; Elkan, C. A Critical Review of Recurrent Neural Networks for Sequence Learning. 2015, arXiv preprint arXiv:1506.00019."},{"key":"ref13\/cit13","doi-asserted-by":"crossref","unstructured":"Kim, Y. Convolutional Neural Networks for Sentence Classification.\n                      EMNLP 2014\u2014Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing\n                      , 2014; pp 1746\u20131751.","DOI":"10.3115\/v1\/D14-1181"},{"key":"ref14\/cit14","unstructured":"Vaswani, A.; Shazeer, N.; Parmar, N.; Uszkoreit, J.; Jones, L.; Gomez, A. N.; Kaiser, \u0141.; Polosukhin, I. Attention Is All You Need.\n                      Advances in Neural Information Processing Systems\n                      ; 2017; Vol. 2017, pp 5999\u20136009."},{"key":"ref15\/cit15","doi-asserted-by":"publisher","DOI":"10.1021\/ci00057a005"},{"key":"ref16\/cit16","doi-asserted-by":"publisher","DOI":"10.1021\/ci00062a008"},{"key":"ref17\/cit17","doi-asserted-by":"publisher","DOI":"10.1021\/ci0496797"},{"key":"ref18\/cit18","doi-asserted-by":"crossref","unstructured":"Sennrich, R.; Haddow, B.; Birch, A. Neural Machine Translation of Rare Words with Subword Units. In\n                      Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)\n                      ; Association for Computational Linguistics: Stroudsburg, PA, USA, 2016; pp 1715\u20131725.","DOI":"10.18653\/v1\/P16-1162"},{"key":"ref19\/cit19","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gkr777"},{"key":"ref20\/cit20","doi-asserted-by":"crossref","unstructured":"O\u2019Boyle, N.; Dalke, A. DeepSMILES: An Adaptation of SMILES for Use in Machine-Learning of Chemical Structures. 2018, ChemRxiv:7097960.v1.","DOI":"10.26434\/chemrxiv.7097960"},{"key":"ref21\/cit21","doi-asserted-by":"publisher","DOI":"10.1088\/2632-2153\/aba947"},{"key":"ref22\/cit22","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.8b00542"},{"key":"ref23\/cit23","unstructured":"Swain, M.\n                      MolVS: Molecule Validation and Standardization\n                      ."},{"key":"ref24\/cit24","unstructured":"Landrum, G.\n                      RDKit: Open-Source Cheminformatics\n                      ."},{"key":"ref25\/cit25","doi-asserted-by":"publisher","DOI":"10.1021\/acscentsci.7b00512"},{"key":"ref26\/cit26","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-020-00430-x"},{"key":"ref27\/cit27","doi-asserted-by":"crossref","unstructured":"Howard, J.; Ruder, S. Universal Language Model Fine-Tuning for Text Classification. In\n                      ACL 2018\u201456th Annual Meeting of the Association for Computational Linguistics, Proceedings of the Conference (Long Papers)\n                      , 2018; Vol. 1, pp 328\u2013339.","DOI":"10.18653\/v1\/P18-1031"},{"key":"ref28\/cit28","doi-asserted-by":"publisher","DOI":"10.1002\/cmdc.200800178"},{"key":"ref29\/cit29","doi-asserted-by":"publisher","DOI":"10.1021\/jm9602928"},{"key":"ref30\/cit30","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-017-0225-z"},{"key":"ref31\/cit31","doi-asserted-by":"crossref","unstructured":"Zhang, J.; Mercado, R.; Engkvist, O.; Chen, H. Comparative Study of Deep Generative Models on Chemical Space Coverage. 2020, No. 2. ChemRxiv:13234289.v1.","DOI":"10.26434\/chemrxiv.13234289.v1"},{"key":"ref32\/cit32","volume-title":"Statistical Power Analysis for the Behavioral Sciences","author":"Cohen J.","year":"1988"},{"key":"ref33\/cit33","doi-asserted-by":"publisher","DOI":"10.1007\/s10822-016-9904-5"},{"key":"ref34\/cit34","unstructured":"Bjerrum, E. J. Smiles Enumeration as Data Augmentation for Neural Network Modeling of Molecules. 2017, arXiv preprint arXiv:1703.07076."},{"key":"ref35\/cit35","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-019-0393-0"},{"key":"ref36\/cit36","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-019-0341-z"},{"key":"ref37\/cit37","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-020-00423-w"},{"key":"ref38\/cit38","unstructured":"Merity, S.; Keskar, N. S.; Socher, R. Regularizing and Optimizing LSTM Language Models. In\n                      6th International Conference on Learning Representations\n                      , ICLR 2018\u2014Conference Track Proceedings, 2018."},{"key":"ref39\/cit39","unstructured":"Smith, L. N. A Disciplined Approach To Neural Network Hyper-Parameters: Part 1 \u2013 Learning Rate, Batch Size, Momentum, and Weight Decay. 2018, arXiv preprint arXiv:1803.09820 March 26."},{"key":"ref40\/cit40","doi-asserted-by":"publisher","DOI":"10.1007\/s13398-014-0173-7.2"},{"key":"ref41\/cit41","unstructured":"Paszke, A.; Gross, S.\n                      Automatic Differentiation in PyTorch, 31st Conference on Neural Information Processing Systems\n                      ; NIPS, 2017."},{"key":"ref42\/cit42","doi-asserted-by":"publisher","DOI":"10.3390\/info11020108"},{"key":"ref43\/cit43","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.8b00803"},{"key":"ref44\/cit44","unstructured":"Goh, G. B.; Hodas, N. O.; Siegel, C.; Vishnu, A. SMILES2Vec: An Interpretable General-Purpose Deep Neural Network for Predicting Chemical Properties. 2017, arXiv preprint arXiv:1712.02034."}],"container-title":["Journal of Chemical Information and Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/pubs.acs.org\/doi\/pdf\/10.1021\/acs.jcim.0c01127","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,4,27]],"date-time":"2023-04-27T04:31:01Z","timestamp":1682569861000},"score":1,"resource":{"primary":{"URL":"https:\/\/pubs.acs.org\/doi\/10.1021\/acs.jcim.0c01127"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,3,15]]},"references-count":44,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2021,4,26]]}},"alternative-id":["10.1021\/acs.jcim.0c01127"],"URL":"https:\/\/doi.org\/10.1021\/acs.jcim.0c01127","relation":{"has-preprint":[{"id-type":"doi","id":"10.26434\/chemrxiv.12339368.v1","asserted-by":"object"},{"id-type":"doi","id":"10.26434\/chemrxiv.12339368","asserted-by":"object"}]},"ISSN":["1549-9596","1549-960X"],"issn-type":[{"value":"1549-9596","type":"print"},{"value":"1549-960X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,3,15]]}}}