{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T16:02:37Z","timestamp":1774454557900,"version":"3.50.1"},"reference-count":82,"publisher":"American Chemical Society (ACS)","issue":"9","license":[{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-045"}],"funder":[{"DOI":"10.13039\/100004330","name":"GlaxoSmithKline","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100004330","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J. Chem. Inf. Model."],"published-print":{"date-parts":[[2025,5,12]]},"DOI":"10.1021\/acs.jcim.5c00359","type":"journal-article","created":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T20:01:58Z","timestamp":1746129718000},"page":"4381-4402","source":"Crossref","is-referenced-by-count":6,"title":["Exploring BERT for Reaction Yield Prediction: Evaluating the Impact of Tokenization, Molecular Representation, and Pretraining Data Augmentation"],"prefix":"10.1021","volume":"65","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3604-9274","authenticated-orcid":true,"given":"Adrian","family":"Krzyzanowski","sequence":"first","affiliation":[{"name":"GSK Medicines Research Centre, Gunnels Wood Road, Stevenage SG1 2NY, U.K."}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0958-9830","authenticated-orcid":true,"given":"Stephen D.","family":"Pickett","sequence":"additional","affiliation":[{"name":"GSK Medicines Research Centre, Gunnels Wood Road, Stevenage SG1 2NY, U.K."}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3536-0746","authenticated-orcid":true,"given":"Peter","family":"Pog\u00e1ny","sequence":"additional","affiliation":[{"name":"GSK Medicines Research Centre, Gunnels Wood Road, Stevenage SG1 2NY, U.K."}]}],"member":"316","published-online":{"date-parts":[[2025,5,1]]},"reference":[{"key":"ref1\/cit1","doi-asserted-by":"publisher","DOI":"10.1002\/anie.201503890"},{"key":"ref2\/cit2","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-017-02303-0"},{"key":"ref3\/cit3","doi-asserted-by":"publisher","DOI":"10.1088\/2632-2153\/abc81d"},{"key":"ref4\/cit4","doi-asserted-by":"publisher","DOI":"10.1002\/anie.202204647"},{"key":"ref5\/cit5","doi-asserted-by":"publisher","DOI":"10.1021\/jacs.1c12005"},{"key":"ref6\/cit6","doi-asserted-by":"publisher","DOI":"10.1039\/D1DD00006C"},{"key":"ref7\/cit7","doi-asserted-by":"publisher","DOI":"10.1039\/D2SC06041H"},{"key":"ref8\/cit8","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.3c01524"},{"key":"ref9\/cit9","doi-asserted-by":"publisher","DOI":"10.1021\/acsomega.2c05546"},{"key":"ref10\/cit10","doi-asserted-by":"publisher","DOI":"10.1021\/acscentsci.3c01163"},{"key":"ref11\/cit11","doi-asserted-by":"publisher","DOI":"10.1039\/D3SC03902A"},{"key":"ref12\/cit12","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar5169"},{"key":"ref13\/cit13","doi-asserted-by":"publisher","DOI":"10.1016\/j.chempr.2020.02.017"},{"key":"ref14\/cit14","doi-asserted-by":"publisher","DOI":"10.1021\/acs.accounts.0c00770"},{"key":"ref15\/cit15","doi-asserted-by":"publisher","DOI":"10.1021\/jacs.4c00098"},{"key":"ref16\/cit16","doi-asserted-by":"publisher","DOI":"10.1126\/sciadv.adn3478"},{"key":"ref17\/cit17","doi-asserted-by":"publisher","DOI":"10.1039\/D0QO00544D"},{"key":"ref18\/cit18","doi-asserted-by":"publisher","DOI":"10.1126\/science.adg2114"},{"key":"ref19\/cit19","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-023-39283-x"},{"key":"ref20\/cit20","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-021-00579-z"},{"key":"ref21\/cit21","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.1c01467"},{"key":"ref22\/cit22","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-023-00639-z"},{"key":"ref23\/cit23","doi-asserted-by":"publisher","DOI":"10.1039\/D4FD00104D"},{"key":"ref24\/cit24","doi-asserted-by":"crossref","unstructured":"Schwaller, P.; Vaucher, A. C.; Laino, T.; Reymond, J. L. Data augmentation strategies to improve reaction yield predictions and estimate uncertainty. In  Proceedings of NeurIPS 2020 Machine Learning for Molecules Workshop, 2020.","DOI":"10.26434\/chemrxiv.13286741.v1"},{"key":"ref25\/cit25","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-023-00685-0"},{"key":"ref26\/cit26","first-page":"4171","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","volume":"1","author":"Devlin J.","year":"2019"},{"key":"ref27\/cit27","volume-title":"Extraction of chemical structures and reactions from the literature","author":"Lowe D. M.","year":"2012"},{"key":"ref28\/cit28","volume-title":"Chemical reactions from US patents (1976\u2013Sep2016)","author":"Lowe D. M.","year":"2017"},{"key":"ref29\/cit29","doi-asserted-by":"publisher","DOI":"10.1021\/ci00057a005"},{"key":"ref30\/cit30","doi-asserted-by":"publisher","DOI":"10.1126\/science.aap9112"},{"key":"ref31\/cit31","doi-asserted-by":"publisher","DOI":"10.1038\/s41557-023-01393-w"},{"key":"ref32\/cit32","doi-asserted-by":"publisher","DOI":"10.26434\/chemrxiv.7097960.v1"},{"key":"ref33\/cit33","doi-asserted-by":"publisher","DOI":"10.1088\/2632-2153\/aba947"},{"key":"ref34\/cit34","doi-asserted-by":"publisher","DOI":"10.1021\/ci100050t"},{"key":"ref35\/cit35","doi-asserted-by":"publisher","DOI":"10.5555\/177910.177914"},{"key":"ref36\/cit36","volume-title":"Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Vol. 1: Long Papers)","author":"Sennrich R.","year":"2016"},{"key":"ref37\/cit37","doi-asserted-by":"crossref","unstructured":"Schuster, M.; Nakajima, K. Japanese and Korean voice search. In  2012 IEEE International Conference on Acoustics, Speech and Signal Processing, 2012.","DOI":"10.1109\/ICASSP.2012.6289079"},{"key":"ref38\/cit38","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1609.08144"},{"key":"ref39\/cit39","volume-title":"Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Vol. 1: Long Papers)","author":"Kudo T.","year":"2018"},{"key":"ref40\/cit40","volume-title":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing: System Demonstrations","author":"Kudo T.","year":"2018"},{"key":"ref41\/cit41","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1312.6199"},{"key":"ref42\/cit42","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1412.6572"},{"key":"ref43\/cit43","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1605.07725"},{"key":"ref44\/cit44","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2101.03700"},{"key":"ref45\/cit45","first-page":"8797","volume-title":"International Conference on Pattern Recognition (ICPR)","author":"Karimi A.","year":"2021"},{"key":"ref46\/cit46","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2004.08994"},{"key":"ref47\/cit47","unstructured":"CAS Reactions\nData Set, available from CAS. https:\/\/www.cas.org\/cas-data\/cas-reactions."},{"key":"ref48\/cit48","unstructured":"Reaxys\nData Set, available from Elsevier. https:\/\/www.reaxys.com."},{"key":"ref49\/cit49","unstructured":"Pistachio\nData Set, available from NextMove Software. https:\/\/www.nextmovesoftware.com\/pistachio.html."},{"key":"ref50\/cit50","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-024-00805-4"},{"key":"ref51\/cit51","doi-asserted-by":"publisher","DOI":"10.3762\/bjoc.20.212"},{"key":"ref52\/cit52","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.1c00699"},{"key":"ref53\/cit53","doi-asserted-by":"publisher","DOI":"10.2307\/2279372"},{"key":"ref54\/cit54","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177731944"},{"key":"ref55\/cit55","doi-asserted-by":"crossref","unstructured":"Conover, W. J.; Iman, R. L. On multiple-comparisons procedures. Technical Report LA-7677-MS, 1979.","DOI":"10.2172\/6057803"},{"key":"ref56\/cit56","volume-title":"Practical Nonparametric Statistics","author":"Conover W. J.","year":"1999"},{"key":"ref57\/cit57","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/93.3.491"},{"key":"ref58\/cit58","doi-asserted-by":"publisher","DOI":"10.1039\/D1DD00013F"},{"key":"ref59\/cit59","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-020-00469-w"},{"key":"ref60\/cit60","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-024-00821-x"},{"key":"ref61\/cit61","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-019-0393-0"},{"key":"ref62\/cit62","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939785"},{"key":"ref63\/cit63","first-page":"1","volume":"32","author":"Yang Z.","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref64\/cit64","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1907.11692"},{"key":"ref65\/cit65","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1909.11942"},{"key":"ref66\/cit66","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1904.05342"},{"key":"ref67\/cit67","volume-title":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)","author":"Beltagy I.","year":"2019"},{"key":"ref68\/cit68","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btz682"},{"key":"ref69\/cit69","doi-asserted-by":"publisher","DOI":"10.1145\/3458754"},{"key":"ref70\/cit70","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2212.07914"},{"key":"ref71\/cit71","doi-asserted-by":"publisher","DOI":"10.1126\/science.aat8603"},{"key":"ref72\/cit72","doi-asserted-by":"publisher","DOI":"10.1021\/acscombsci.0c00118"},{"key":"ref73\/cit73","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.5b00559"},{"key":"ref74\/cit74","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jmedchem.3c00689"},{"key":"ref75\/cit75","unstructured":"RDKit:\nOpen-Source Cheminformatics. https:\/\/www.rdkit.org."},{"key":"ref76\/cit76","doi-asserted-by":"publisher","DOI":"10.1039\/D3DD00044C"},{"key":"ref77\/cit77","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-022-00595-7"},{"key":"ref78\/cit78","doi-asserted-by":"crossref","first-page":"38","DOI":"10.18653\/v1\/2020.emnlp-demos.6","volume-title":"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations","author":"Wolf T.","year":"2020"},{"key":"ref79\/cit79","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2010.09885"},{"key":"ref80\/cit80","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2412.05269"},{"key":"ref81\/cit81","doi-asserted-by":"publisher","DOI":"10.1039\/C8SC02339E"},{"key":"ref82\/cit82","doi-asserted-by":"publisher","DOI":"10.1021\/acscentsci.9b00576"}],"container-title":["Journal of Chemical Information and Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/pubs.acs.org\/doi\/pdf\/10.1021\/acs.jcim.5c00359","content-type":"application\/pdf","content-version":"vor","intended-application":"unspecified"},{"URL":"https:\/\/pubs.acs.org\/doi\/pdf\/10.1021\/acs.jcim.5c00359","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,12]],"date-time":"2025-05-12T08:10:50Z","timestamp":1747037450000},"score":1,"resource":{"primary":{"URL":"https:\/\/pubs.acs.org\/doi\/10.1021\/acs.jcim.5c00359"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,1]]},"references-count":82,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2025,5,12]]}},"alternative-id":["10.1021\/acs.jcim.5c00359"],"URL":"https:\/\/doi.org\/10.1021\/acs.jcim.5c00359","relation":{},"ISSN":["1549-9596","1549-960X"],"issn-type":[{"value":"1549-9596","type":"print"},{"value":"1549-960X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,1]]}}}