{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T07:39:50Z","timestamp":1774078790415,"version":"3.50.1"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2023,12,5]],"date-time":"2023-12-05T00:00:00Z","timestamp":1701734400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,12,5]],"date-time":"2023-12-05T00:00:00Z","timestamp":1701734400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nat Mach Intell"],"DOI":"10.1038\/s42256-023-00764-9","type":"journal-article","created":{"date-parts":[[2023,12,5]],"date-time":"2023-12-05T06:02:08Z","timestamp":1701756128000},"page":"1476-1485","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":20,"title":["Bridging the gap between chemical reaction pretraining and conditional molecule generation with a unified model"],"prefix":"10.1038","volume":"5","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7428-4104","authenticated-orcid":false,"given":"Bo","family":"Qiang","sequence":"first","affiliation":[]},{"given":"Yiran","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Yuheng","family":"Ding","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2378-7809","authenticated-orcid":false,"given":"Ningfeng","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Song","family":"Song","sequence":"additional","affiliation":[]},{"given":"Liangren","family":"Zhang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3822-9110","authenticated-orcid":false,"given":"Bo","family":"Huang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8993-4015","authenticated-orcid":false,"given":"Zhenming","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,12,5]]},"reference":[{"key":"764_CR1","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.-W., Lee, K. & Toutanova, K. BERT: pre-training of deep bidirectional transformers for language understanding. Preprint at https:\/\/doi.org\/10.48550\/arXiv.1810.04805 (2018).","DOI":"10.48550\/arXiv.1810.04805"},{"key":"764_CR2","doi-asserted-by":"publisher","first-page":"583","DOI":"10.1038\/s41586-021-03819-2","volume":"596","author":"J Jumper","year":"2021","unstructured":"Jumper, J. et al. Highly accurate protein structure prediction with alphafold. Nature 596, 583\u2013589 (2021).","journal-title":"Nature"},{"key":"764_CR3","doi-asserted-by":"publisher","first-page":"1099","DOI":"10.1038\/s41587-022-01618-2","volume":"41","author":"A Madani","year":"2023","unstructured":"Madani, A. et al. Large language models generate functional protein sequences across diverse families. Nat. Biotechnol. 41, 1099\u20131106 (2023).","journal-title":"Nat. Biotechnol."},{"key":"764_CR4","doi-asserted-by":"crossref","unstructured":"Hendrycks, D. et al. Pretrained transformers improve out-of-distribution robustness. In Proc. 58th Annual Meeting of the Association for Computational Linguistics (eds Jurafsky, D. et al.) 2744\u20132751 (Association for Computational Linguistics, 2020).","DOI":"10.18653\/v1\/2020.acl-main.244"},{"key":"764_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3458754","volume":"3","author":"Y Gu","year":"2021","unstructured":"Gu, Y. et al. Domain-specific language model pretraining for biomedical natural language processing. ACM Trans. Comput. Healthc. 3, 1\u201323 (2021).","journal-title":"ACM Trans. Comput. Healthc."},{"key":"764_CR6","unstructured":"Lowe, D. M. Extraction of Chemical Structures and Reactions from the Literature. PhD thesis, Univ. Cambridge (2012)."},{"key":"764_CR7","doi-asserted-by":"publisher","unstructured":"Lowe, D. Chemical reactions from US patents (1976-Sep2016). figshare https:\/\/doi.org\/10.6084\/m9.figshare.5104873.v1 (2017).","DOI":"10.6084\/m9.figshare.5104873.v1"},{"key":"764_CR8","unstructured":"Goodfellow, I., Bengio, Y. & Courville, A. Deep Learning (MIT Press, 2016)."},{"key":"764_CR9","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1021\/ci5006614","volume":"55","author":"N Schneider","year":"2015","unstructured":"Schneider, N., Lowe, D. M., Sayle, R. A. & Landrum, G. A. Development of a novel fingerprint for chemical reactions and its application to large-scale reaction classification and similarity. J. Chem. Inf. Model. 55, 39\u201353 (2015).","journal-title":"J. Chem. Inf. Model."},{"key":"764_CR10","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1039\/D1DD00006C","volume":"1","author":"D Probst","year":"2022","unstructured":"Probst, D., Schwaller, P. & Reymond, J.-L. Reaction classification and yield prediction using the differential reaction fingerprint DRFP. Digit. Discov. 1, 91\u201397 (2022).","journal-title":"Digit. Discov."},{"key":"764_CR11","doi-asserted-by":"publisher","first-page":"144","DOI":"10.1038\/s42256-020-00284-w","volume":"3","author":"P Schwaller","year":"2021","unstructured":"Schwaller, P. et al. Mapping the space of chemical reactions using attention-based neural networks. Nat. Mach. Intell. 3, 144\u2013152 (2021).","journal-title":"Nat. Mach. Intell."},{"key":"764_CR12","first-page":"015022","volume":"3","author":"R Irwin","year":"2022","unstructured":"Irwin, R., Dimitriadis, S., He, J. & Bjerrum, E. J. Chemformer: a pretrained transformer for computational chemistry. Mach. Learn. 3, 015022 (2022).","journal-title":"Mach. Learn."},{"key":"764_CR13","doi-asserted-by":"publisher","first-page":"1446","DOI":"10.1039\/D1SC06515G","volume":"13","author":"M Wen","year":"2022","unstructured":"Wen, M., Blau, S. M., Xie, X., Dwaraknath, S. & Persson, K. A. Improving machine learning performance on small chemical reaction data with unsupervised contrastive pretraining. Chem. Sci. 13, 1446\u20131458 (2022).","journal-title":"Chem. Sci."},{"key":"764_CR14","unstructured":"Wang, H. et al. International Conference on Learning Representations (ICLR, 2022)."},{"key":"764_CR15","unstructured":"NameRXN (Nextmove Software, 2021); http:\/\/www.nextmovesoftware.com\/namerxn.html"},{"key":"764_CR16","first-page":"015016","volume":"2","author":"P Schwaller","year":"2021","unstructured":"Schwaller, P., Vaucher, A. C., Laino, T. & Reymond, J.-L. Prediction of chemical reaction yields using deep learning. Mach. Learn. 2, 015016 (2021).","journal-title":"Mach. Learn."},{"key":"764_CR17","unstructured":"Korovina, K. et al. ChemBO: Bayesian optimization of small organic molecules with synthesizable recommendations. In Proc. 23rd International Conference on Artificial Intelligence and Statistics (eds Chiappa, S. & Calandra, R.) 3393\u20133403 (PMLR, 2020)."},{"key":"764_CR18","doi-asserted-by":"publisher","first-page":"307","DOI":"10.1038\/s42256-019-0067-7","volume":"1","author":"A Button","year":"2019","unstructured":"Button, A., Merk, D., Hiss, J. A. & Schneider, G. Automated de novo molecular design by hybrid machine intelligence and rule-driven chemical synthesis. Nat. Mach. Intell. 1, 307\u2013315 (2019).","journal-title":"Nat. Mach. Intell."},{"key":"764_CR19","unstructured":"Gao, W., Mercado, R. & Coley, C. W. International Conference on Learning Representations (ICLR, 2022)."},{"key":"764_CR20","unstructured":"Noh, J. et al. Path-aware and structure-preserving generation of synthetically accessible molecules. In Proc. 39th International Conference on Machine Learning (eds Chaudhuri, K. et al.) 16952\u201316968 (PMLR, 2022)."},{"key":"764_CR21","doi-asserted-by":"publisher","first-page":"434","DOI":"10.1021\/acscentsci.7b00064","volume":"3","author":"CW Coley","year":"2017","unstructured":"Coley, C. W., Barzilay, R., Jaakkola, T. S., Green, W. H. & Jensen, K. F. Prediction of organic reaction outcomes using machine learning. ACS Cent. Sci. 3, 434\u2013443 (2017).","journal-title":"ACS Cent. Sci."},{"key":"764_CR22","unstructured":"Jin, W., Coley, C., Barzilay, R. & Jaakkola, T. Predicting organic reaction outcomes with Weisfeiler\u2013Lehman network. In Proc. 31st International Conference on Neural Information Processing Systems (eds Guyon, I. et al.) 2604\u20132613 (Curran Associates Inc., 2017)."},{"key":"764_CR23","doi-asserted-by":"publisher","first-page":"1572","DOI":"10.1021\/acscentsci.9b00576","volume":"5","author":"P Schwaller","year":"2019","unstructured":"Schwaller, P. et al. Molecular transformer: a model for uncertainty-calibrated chemical reaction prediction. ACS Cent. Sci. 5, 1572\u20131583 (2019).","journal-title":"ACS Cent. Sci."},{"key":"764_CR24","unstructured":"Bradshaw, J., Paige, B., Kusner, M. J., Segler, M. & Hern\u00e1ndez-Lobato, J. M. A model to search for synthesizable molecules. In Proc. 33rd International Conference on Neural Information Processing Systems (eds Wallach, H. et al.) 7937\u20137949 (Curran Associates Inc., 2019)."},{"key":"764_CR25","first-page":"6852","volume":"33","author":"J Bradshaw","year":"2020","unstructured":"Bradshaw, J., Paige, B., Kusner, M. J., Segler, M. & Hern\u00e1ndez-Lobato, J. M. Barking up the right tree: an approach to search over molecule synthesis DAGs. Adv. Neural Inf. Process. Syst. 33, 6852\u20136866 (2020).","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"764_CR26","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A. et al. Language models are unsupervised multitask learners. OpenAI Blog 1, 9 (2019).","journal-title":"OpenAI Blog"},{"key":"764_CR27","doi-asserted-by":"publisher","unstructured":"Genheden, S., Engkvist, O. & Bjerrum, E. J. A quick policy to filter reactions based on feasibility in AI-guided retrosynthetic planning. Preprint at chemRxiv https:\/\/doi.org\/10.26434\/chemrxiv.13280495.v1 (2020).","DOI":"10.26434\/chemrxiv.13280495.v1"},{"key":"764_CR28","doi-asserted-by":"publisher","first-page":"1074","DOI":"10.1093\/nar\/gkx1037","volume":"46","author":"DS Wishart","year":"2018","unstructured":"Wishart, D. S. et al. Drugbank 5.0: a major update to the drugbank database for 2018. Nucleic Acids Res. 46, 1074\u20131082 (2018).","journal-title":"Nucleic Acids Res."},{"key":"764_CR29","doi-asserted-by":"publisher","first-page":"2046","DOI":"10.1021\/acs.jcim.1c00469","volume":"62","author":"V Fialkov\u00e1","year":"2021","unstructured":"Fialkov\u00e1, V. et al. LibINVENT: reaction-based generative scaffold decoration for in silico library design. J. Chem. Inf. Model. 62, 2046\u20132063 (2021).","journal-title":"J. Chem. Inf. Model."},{"key":"764_CR30","doi-asserted-by":"publisher","first-page":"8","DOI":"10.1186\/1758-2946-1-8","volume":"1","author":"P Ertl","year":"2009","unstructured":"Ertl, P. & Schuffenhauer, A. Estimation of synthetic accessibility score of drug-like molecules based on molecular complexity and fragment contributions. J. Cheminform. 1, 8 (2009).","journal-title":"J. Cheminform."},{"key":"764_CR31","doi-asserted-by":"publisher","first-page":"3339","DOI":"10.1039\/D0SC05401A","volume":"12","author":"A Thakkar","year":"2021","unstructured":"Thakkar, A., Chadimov\u00b4a, V., Bjerrum, E. J., Engkvist, O. & Reymond, J.-L. Retrosynthetic accessibility score (RAscore)\u2013rapid machine learned synthesizability classification from AI driven retrosynthetic planning. Chem. Sci. 12, 3339\u20133349 (2021).","journal-title":"Chem. Sci."},{"key":"764_CR32","doi-asserted-by":"publisher","first-page":"5909","DOI":"10.1039\/D1CC00050K","volume":"57","author":"A Morris","year":"2021","unstructured":"Morris, A. et al. Discovery of sars-cov-2 main protease inhibitors using a synthesis-directed de novo design model. Chem. Commun. 57, 5909\u20135912 (2021).","journal-title":"Chem. Commun."},{"key":"764_CR33","first-page":"045024","volume":"1","author":"M Krenn","year":"2020","unstructured":"Krenn, M., H\u00e4se, F., Nigam, A., Friederich, P. & Aspuru-Guzik, A. Self-referencing embedded strings (selfies): a 100% robust molecular string representation. Mach. Learn. 1, 045024 (2020).","journal-title":"Mach. Learn."},{"key":"764_CR34","unstructured":"Vaswani, A. et al. Attention is all you need. In Proc. 31st International Conference on Neural Information Processing Systems (eds Guyon, I. et al.) 6000\u20136010 (Curran Associates Inc., 2017)."},{"key":"764_CR35","first-page":"28877","volume":"34","author":"C Ying","year":"2021","unstructured":"Ying, C. et al. Do transformers really perform badly for graph representation? Adv. Neural Inf. Process. Syst. 34, 28877\u201328888 (2021).","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"764_CR36","doi-asserted-by":"crossref","unstructured":"Zhang, L., Xu, D., Arnab, A. & Torr, P. H. Dynamic graph message passing networks. In Proc. IEEE\/CVF Conference on Computer Vision and Pattern Recognition 3726\u20133735 (2020).","DOI":"10.1109\/CVPR42600.2020.00378"},{"key":"764_CR37","doi-asserted-by":"publisher","first-page":"102","DOI":"10.1039\/C7RE00129K","volume":"3","author":"P-M Jacob","year":"2018","unstructured":"Jacob, P.-M. & Lapkin, A. Statistics of the network of organic chemistry. React. Chem. Eng. 3, 102\u2013118 (2018).","journal-title":"React. Chem. Eng."},{"key":"764_CR38","unstructured":"Vignac, C. & Frossard, P. International Conference on Learning Representations (ICLR, 2022)."},{"key":"764_CR39","doi-asserted-by":"publisher","first-page":"772","DOI":"10.1038\/s42256-022-00526-z","volume":"4","author":"S Chen","year":"2022","unstructured":"Chen, S. & Jung, Y. A generalized-template-based graph neural network for accurate organic reactivity prediction. Nat. Mach. Intell. 4, 772\u2013780 (2022).","journal-title":"Nat. Mach. Intell."},{"key":"764_CR40","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1038\/nchem.1243","volume":"4","author":"GR Bickerton","year":"2012","unstructured":"Bickerton, G. R., Paolini, G. V., Besnard, J., Muresan, S. & Hopkins, A. L. Quantifying the chemical beauty of drugs. Nat. Chem. 4, 90\u201398 (2012).","journal-title":"Nat. Chem."},{"key":"764_CR41","doi-asserted-by":"publisher","first-page":"6177","DOI":"10.1021\/jm051256o","volume":"49","author":"RA Friesner","year":"2006","unstructured":"Friesner, R. A. et al. Extra precision glide: docking and scoring incorporating a model of hydrophobic enclosure for protein-ligand complexes. J. Med. Chem. 49, 6177\u20136196 (2006).","journal-title":"J. Med. Chem."},{"key":"764_CR42","doi-asserted-by":"publisher","unstructured":"Qiang, B. Processed training data for \u2018Bridging the gap between chemical reaction pretraining and conditional molecule generation with a unified model\u2019. Zenodo https:\/\/doi.org\/10.5281\/zenodo.8075067 (2023).","DOI":"10.5281\/zenodo.8075067"},{"key":"764_CR43","doi-asserted-by":"publisher","unstructured":"Qiang, B. qiangbo1222\/Uni-RXN-official V1.0. Zenodo https:\/\/doi.org\/10.5281\/zenodo.8113249 (2020).","DOI":"10.5281\/zenodo.8113249"},{"key":"764_CR44","unstructured":"Reymond Group: DRFP. GitHub https:\/\/github.com\/reymond-group\/drfp (2023)."}],"container-title":["Nature Machine Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s42256-023-00764-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-023-00764-9","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-023-00764-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,18]],"date-time":"2023-12-18T15:09:03Z","timestamp":1702912143000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s42256-023-00764-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,5]]},"references-count":44,"journal-issue":{"issue":"12","published-online":{"date-parts":[[2023,12]]}},"alternative-id":["764"],"URL":"https:\/\/doi.org\/10.1038\/s42256-023-00764-9","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-2691838\/v1","asserted-by":"object"}]},"ISSN":["2522-5839"],"issn-type":[{"value":"2522-5839","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,12,5]]},"assertion":[{"value":"14 March 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 October 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 December 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}