{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T08:38:01Z","timestamp":1778056681377,"version":"3.51.4"},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2021,3,29]],"date-time":"2021-03-29T00:00:00Z","timestamp":1616976000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,3,29]],"date-time":"2021-03-29T00:00:00Z","timestamp":1616976000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nat Mach Intell"],"DOI":"10.1038\/s42256-021-00319-w","type":"journal-article","created":{"date-parts":[[2021,3,29]],"date-time":"2021-03-29T12:03:18Z","timestamp":1617019398000},"page":"485-494","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":46,"title":["Unassisted noise reduction of chemical reaction datasets"],"prefix":"10.1038","volume":"3","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5218-8653","authenticated-orcid":false,"given":"Alessandra","family":"Toniato","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3046-6576","authenticated-orcid":false,"given":"Philippe","family":"Schwaller","sequence":"additional","affiliation":[]},{"given":"Antonio","family":"Cardinale","sequence":"additional","affiliation":[]},{"given":"Joppe","family":"Geluykens","sequence":"additional","affiliation":[]},{"given":"Teodoro","family":"Laino","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,3,29]]},"reference":[{"key":"319_CR1","unstructured":"Lowe, D. M. Extraction of Chemical Structures and Reactions from the Literature. PhD thesis, Univ. Cambridge (2012)."},{"key":"319_CR2","unstructured":"Lowe, D. Chemical reactions from US patents (1976\u2013Sep2016). figshare https:\/\/figshare.com\/articles\/Chemical_reactions_from_US_patents_1976-Sep2016_\/5104873 (2017)."},{"key":"319_CR3","unstructured":"Nextmove Software Pistachio (NextMove Software, accessed 2 April 2020); https:\/\/www.nextmovesoftware.com\/pistachio.html"},{"key":"319_CR4","unstructured":"Reaxys (Reaxys, accessed 2 April 2020); https:\/\/www.reaxys.com"},{"key":"319_CR5","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1038\/nature25978","volume":"555","author":"M Segler","year":"2018","unstructured":"Segler, M., Preuss, M. & Waller, M. Planning chemical syntheses with deep neural networks and symbolic AI. Nature 555, 604\u2013610 (2018).","journal-title":"Nature"},{"key":"319_CR6","doi-asserted-by":"crossref","unstructured":"Coley, C. W.et al. A robotic platform for flow synthesis of organic compounds informed by AI planning. Science 365, eaax1566 (2019).","DOI":"10.1126\/science.aax1566"},{"key":"319_CR7","doi-asserted-by":"crossref","unstructured":"Schwaller, P. & Laino, T. Data-Driven Learning Systems for Chemical Reaction Prediction: An Analysis of Recent Approaches. In Machine Learning in Chemistry: Data-Driven Algorithms, Learning Systems and Predictions (eds. Pyzer-Knapp, E. O. & Laino, T.) 61\u201379 (ACS Publications, 2019).","DOI":"10.1021\/bk-2019-1326.ch004"},{"key":"319_CR8","doi-asserted-by":"publisher","first-page":"1572","DOI":"10.1021\/acscentsci.9b00576","volume":"5","author":"P Schwaller","year":"2019","unstructured":"Schwaller, P. et al. Molecular Transformer: a model for uncertainty-calibrated chemical reaction prediction. ACS Cent. Sci. 5, 1572\u20131583 (2019).","journal-title":"ACS Cent. Sci."},{"key":"319_CR9","doi-asserted-by":"publisher","first-page":"3316","DOI":"10.1039\/C9SC05704H","volume":"11","author":"P Schwaller","year":"2020","unstructured":"Schwaller, P. et al. Predicting retrosynthetic pathways using transformer-based models and a hyper-graph exploration strategy. Chem. Sci. 11, 3316\u20133325 (2020).","journal-title":"Chem. Sci."},{"key":"319_CR10","doi-asserted-by":"crossref","unstructured":"\u00d6zt\u00fcrk H., \u00d6zg\u00fcr A., Schwaller P., Laino T. & Ozkirimli E. Exploring chemical space using natural language processing methodologies for drug discovery. Drug Discov. Today 25, 689\u2013705 (2020).","DOI":"10.1016\/j.drudis.2020.01.020"},{"key":"319_CR11","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1021\/ci00023a005","volume":"35","author":"H Satoh","year":"1995","unstructured":"Satoh, H. & Funatsu, K. Sophia, a knowledge base-guided reaction prediction system-utilization of a knowledge base derived from a reaction database. J. Chem. Inf. Comput. Sci. 35, 34\u201344 (1995).","journal-title":"J. Chem. Inf. Comput. Sci."},{"key":"319_CR12","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1039\/C9SC04944D","volume":"11","author":"A Thakkar","year":"2020","unstructured":"Thakkar, A., Kogej, T., Reymond, J. L., Engkvist, O. & Esben, J. Datasets and their influence on the development of computer assisted synthesis planning tools in the pharmaceutical domain. Chem. Sci 11, 154\u2013168 (2020).","journal-title":"Chem. Sci"},{"key":"319_CR13","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1007\/s10462-004-0751-8","volume":"22","author":"X Zhu","year":"2004","unstructured":"Zhu, X. & Wu, X. Class noise vs. attribute noise: a quantitative study of their impacts. Artif. Intell. Rev. 22, 177\u2013210 (2004).","journal-title":"Artif. Intell. Rev."},{"key":"319_CR14","unstructured":"Toneva, M. et al. An empirical study of example forgetting during deep neural network learning. In Proc. International Conference on Learning Representations (ICLR, 2019)."},{"key":"319_CR15","doi-asserted-by":"publisher","first-page":"1237","DOI":"10.1021\/acscentsci.7b00355","volume":"3","author":"CW Coley","year":"2017","unstructured":"Coley, C. W., Rogers, L., Green, W. H. & Jensen, K. F. Computer-assisted retrosynthesis based on molecular similarity. ACS Cent. Sci. 3, 1237\u20131245 (2017).","journal-title":"ACS Cent. Sci."},{"key":"319_CR16","doi-asserted-by":"publisher","first-page":"5966","DOI":"10.1002\/chem.201605499","volume":"23","author":"MHS Segler","year":"2017","unstructured":"Segler, M. H. S. & Waller, M. P. Neural-symbolic machine learning for retrosynthesis and reaction prediction. Chem. Eur. J. 23, 5966\u20135971 (2017).","journal-title":"Chem. Eur. J."},{"key":"319_CR17","unstructured":"Somnath, V. R., Bunne, C., Coley, C. W., Krause, A. & Barzilay, R. Learning graph models for template-free retrosynthesis. Preprint at https:\/\/arxiv.org\/pdf\/2006.07038.pdf (2020)."},{"key":"319_CR18","unstructured":"Dai, H., Li, C., Coley, C., Dai, B. & Song, L. Retrosynthesis prediction with conditional graph logic network. In Proc. Advances in Neural Information Processing Systems 32 (eds Wallach, H. et al.) 8872\u20138882 (Curran Associates, 2019)."},{"key":"319_CR19","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1021\/acs.jcim.9b00949","volume":"60","author":"S Zheng","year":"2020","unstructured":"Zheng, S., Rao, J., Zhang, Z., Xu, J. & Yang, Y. Predicting retrosynthetic reactions using self-corrected transformer neural networks. J. Chem. Inf. Model. 60, 47\u201355 (2020).","journal-title":"J. Chem. Inf. Model."},{"key":"319_CR20","unstructured":"Sacha, M., B\u0142a\u017c, M., Byrski, P., W\u0142odarczyk-Pruszy\u0144ski, P. & Jastrzebski, S. Molecule edit graph attention network: modeling chemical reactions as sequences of graph edits. Preprint at https:\/\/arxiv.org\/pdf\/2006.15426.pdf (2020)."},{"key":"319_CR21","doi-asserted-by":"publisher","first-page":"5575","DOI":"10.1038\/s41467-020-19266-y","volume":"11","author":"IV Tetko","year":"2020","unstructured":"Tetko, I. V., Karpov, P., Van Deursen, R. & Godin, G. State-of-the-art augmented NLP transformer models for direct and single-step retrosynthesis. Nat. Commun. 11, 5575 (2020).","journal-title":"Nat. Commun."},{"key":"319_CR22","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1016\/S0079-7421(08)60536-8","volume":"24","author":"M McCloskey","year":"1989","unstructured":"McCloskey, M. & Cohen, N. J. Catastrophic interference in connectionist networks: the sequential learning problem. Psychol. Learn. Motiv 24, 109\u2013165 (1989).","journal-title":"Psychol. Learn. Motiv"},{"key":"319_CR23","unstructured":"Vaswani, A. et al. Attention is all you need. In Proc. Advances in Neural Information Processing Systems Vol. 30, 5998\u20136008 (Curran Associates, 2017)."},{"key":"319_CR24","doi-asserted-by":"publisher","first-page":"178","DOI":"10.1080\/09296174.2013.799918","volume":"20","author":"S Wallis","year":"2013","unstructured":"Wallis, S. Binomial confidence intervals and contingency tests: mathematical fundamentals and the evaluation of alternative methods. J. Quant. Linguist. 20, 178\u2013208 (2013).","journal-title":"J. Quant. Linguist."},{"key":"319_CR25","unstructured":"IBM RXN for chemistry (IBM, 2020); https:\/\/rxn.res.ibm.com"},{"key":"319_CR26","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1021\/ci00057a005","volume":"28","author":"D Weininger","year":"1988","unstructured":"Weininger, D. SMILES, a chemical language and information system. 1. Introduction to methodology and encoding rules. J. Chem. Inf. Comput. Sci. 28, 31\u201336 (1988).","journal-title":"J. Chem. Inf. Comput. Sci."},{"key":"319_CR27","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1021\/ci00062a008","volume":"29","author":"D Weininger","year":"1989","unstructured":"Weininger, D., Weininger, A. & Weininger, J. L. SMILES. 2. Algorithm for generation of unique SMILES notation. J. Chem. Inf. Comput. Sci. 29, 97\u2013101 (1989).","journal-title":"J. Chem. Inf. Comput. Sci."},{"key":"319_CR28","doi-asserted-by":"crossref","unstructured":"Klein, G., Kim, Y., Deng, Y., Senellart, J. & Rush, A. M. OpenNMT: open-source toolkit for neural machine translation. In Proc. ACL 2017, System Demonstrations 67\u201372 (ACL, 2017).","DOI":"10.18653\/v1\/P17-4012"},{"key":"319_CR29","unstructured":"Paszke, A. et al. PyTorch: an imperative style, high-performance deep learning library. In Proc. Advances in Neural Information Processing Systems Vol. 32, 8024\u20138035 (Curran Associates, 2019)."},{"key":"319_CR30","doi-asserted-by":"publisher","unstructured":"Landrum, G. et al. rdkit\/rdkit: 2019_03_4 (Q1 2019) Version Release_2019_03_4 Zenodo https:\/\/doi.org\/10.5281\/zenodo.3366468 (2019).","DOI":"10.5281\/zenodo.3366468"},{"key":"319_CR31","doi-asserted-by":"publisher","first-page":"623","DOI":"10.1002\/j.1538-7305.1948.tb00917.x","volume":"29","author":"CE Shannon","year":"1948","unstructured":"Shannon, C. E. A mathematical theory of communication. Bell Syst. Tech. J. 29, 623\u2013656 (1948).","journal-title":"Bell Syst. Tech. J."},{"key":"319_CR32","doi-asserted-by":"publisher","first-page":"1220","DOI":"10.1109\/TIT.2004.828057","volume":"50","author":"R Murali","year":"2004","unstructured":"Murali, R., Chen, Y., Vemuri, B. C. & Fei, W. Cumulative residual entropy: a new measure of information. IEEE Trans. Inf. Theory 50, 1220\u20131228 (2004).","journal-title":"IEEE Trans. Inf. Theory"},{"key":"319_CR33","doi-asserted-by":"crossref","unstructured":"Nguyen, H. V. & Vreeken, J. Non-parametric Jensen\u2013Shannon divergence. In Machine Learning and Knowledge Discovery in Databases. ECML PKDD 2015. Lecture Notes in Computer Science Vol. 9285 (eds. Appice, A. et al.) 173\u2013189 (Springer, 2015).","DOI":"10.1007\/978-3-319-23525-7_11"},{"key":"319_CR34","doi-asserted-by":"publisher","first-page":"2336","DOI":"10.1021\/acs.jcim.6b00564","volume":"56","author":"N Schneider","year":"2016","unstructured":"Schneider, N., Stiefl, N. & Landrum, G. A. What\u2019s what: the (nearly) definitive guide to reaction role assignment. J. Chem. Inf. Model. 56, 2336\u20132346 (2016).","journal-title":"J. Chem. Inf. Model."},{"key":"319_CR35","unstructured":"Noise reduction repository (v0.1). Zenodo https:\/\/zenodo.org\/badge\/latestdoi\/281679964 (2020)."}],"container-title":["Nature Machine Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s42256-021-00319-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-021-00319-w","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-021-00319-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,3]],"date-time":"2022-12-03T15:50:20Z","timestamp":1670082620000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s42256-021-00319-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,3,29]]},"references-count":35,"journal-issue":{"issue":"6","published-online":{"date-parts":[[2021,6]]}},"alternative-id":["319"],"URL":"https:\/\/doi.org\/10.1038\/s42256-021-00319-w","relation":{"has-preprint":[{"id-type":"doi","id":"10.26434\/chemrxiv.12395120.v2","asserted-by":"object"},{"id-type":"doi","id":"10.26434\/chemrxiv.12395120","asserted-by":"object"}]},"ISSN":["2522-5839"],"issn-type":[{"value":"2522-5839","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,3,29]]},"assertion":[{"value":"7 July 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 February 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 March 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}