{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:03:30Z","timestamp":1750309410485,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":22,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-sa\/4.0\/"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CHE?2202693"],"award-info":[{"award-number":["CHE?2202693"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1145\/3627673.3679874","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T19:34:21Z","timestamp":1729452861000},"page":"3797-3801","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Application of Large Language Models in Chemistry Reaction Data Extraction and Cleaning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-1679-3888","authenticated-orcid":false,"given":"Xiaobao","family":"Huang","sequence":"first","affiliation":[{"name":"University of Notre Dame, Notre Dame, IN, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-8456-1109","authenticated-orcid":false,"given":"Mihir","family":"Surve","sequence":"additional","affiliation":[{"name":"University of Notre Dame, Notre Dame, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4793-5543","authenticated-orcid":false,"given":"Yuhan","family":"Liu","sequence":"additional","affiliation":[{"name":"University of Notre Dame, Notre Dame, IN, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0126-1443","authenticated-orcid":false,"given":"Tengfei","family":"Luo","sequence":"additional","affiliation":[{"name":"University of Notre Dame, Notre Dame, IN, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9316-7720","authenticated-orcid":false,"given":"Olaf","family":"Wiest","sequence":"additional","affiliation":[{"name":"University of Notre Dame, Notre Dame, IN, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3574-5665","authenticated-orcid":false,"given":"Xiangliang","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Notre Dame, Notre Dame, IN, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3932-5956","authenticated-orcid":false,"given":"Nitesh V.","family":"Chawla","sequence":"additional","affiliation":[{"name":"University of Notre Dame, Notre Dame, IN, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Simon JL Billinge, et al","author":"Choudhary Kamal","year":"2022","unstructured":"Kamal Choudhary, Brian DeCost, Chi Chen, Anubhav Jain, Francesca Tavazza, Ryan Cohn, CheolWoo Park, Alok Choudhary, Ankit Agrawal, Simon JL Billinge, et al. 2022. Recent advances and applications of deep learning methods in materials science. npj Computational Materials (2022)."},{"key":"e_1_3_2_1_2_1","volume-title":"Machine learning in computer-aided synthesis planning. Accounts of chemical research 51, 5","author":"Coley Connor W","year":"2018","unstructured":"Connor W Coley, William H Green, and Klavs F Jensen. 2018. Machine learning in computer-aided synthesis planning. Accounts of chemical research 51, 5 (2018), 1281--1289."},{"key":"e_1_3_2_1_3_1","volume-title":"A graphconvolutional neural network model for the prediction of chemical reactivity. Chemical science 10, 2","author":"Coley Connor W","year":"2019","unstructured":"Connor W Coley, Wengong Jin, Luke Rogers, Timothy F Jamison, Tommi S Jaakkola, William H Green, Regina Barzilay, and Klavs F Jensen. 2019. A graphconvolutional neural network model for the prediction of chemical reactivity. Chemical science 10, 2 (2019), 370--377."},{"key":"e_1_3_2_1_4_1","volume-title":"Justin AM Lummiss, Jonathan N Jaworski, Christopher P Breen, Victor Schultz, Travis Hart, Joshua S Fishman, Luke Rogers, Hanyu Gao, et al.","author":"Coley Connor W","year":"2019","unstructured":"Connor W Coley, Dale A Thomas III, Justin AM Lummiss, Jonathan N Jaworski, Christopher P Breen, Victor Schultz, Travis Hart, Joshua S Fishman, Luke Rogers, Hanyu Gao, et al. 2019. A robotic platform for flow synthesis of organic compounds informed by AI planning. Science 365, 6453 (2019), eaax1566."},{"key":"e_1_3_2_1_5_1","volume-title":"Structured information extraction from scientific text with large language models. Nature Communications","author":"Dagdelen John","year":"2024","unstructured":"John Dagdelen, Alexander Dunn, Sanghoon Lee, Nicholas Walker, Andrew S Rosen, Gerbrand Ceder, Kristin A Persson, and Anubhav Jain. 2024. Structured information extraction from scientific text with large language models. Nature Communications (2024)."},{"key":"e_1_3_2_1_7_1","first-page":"59662","article-title":"What can large language models do in chemistry? a comprehensive benchmark on eight tasks","volume":"36","author":"Guo Taicheng","year":"2023","unstructured":"Taicheng Guo, Bozhao Nan, Zhenwen Liang, Zhichun Guo, Nitesh Chawla, Olaf Wiest, Xiangliang Zhang, et al. 2023. What can large language models do in chemistry? a comprehensive benchmark on eight tasks. Advances in Neural Information Processing Systems 36 (2023), 59662--59688.","journal-title":"Advances in Neural Information Processing Systems"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_8_1","DOI":"10.1145\/3442381.3450112"},{"key":"e_1_3_2_1_9_1","volume-title":"Cole","author":"Huang Shu","year":"2022","unstructured":"Shu Huang and Jacqueline M. Cole. 2022. BatteryBERT: A Pretrained Language Model for Battery Database Enhancement. Journal of Chemical Information and Modeling (2022)."},{"unstructured":"Kevin Maik Jablonka Qianxiang Ai Alexander Al-Feghali Shruti Badhwar Joshua D Bocarsly Andres M Bran Stefan Bringuier L Catherine Brinson Kamal Choudhary Defne Circi et al. 2023. 14 examples of how LLMs can transform materials science and chemistry: a reflection on a large language model hackathon. Digital Discovery (2023).","key":"e_1_3_2_1_10_1"},{"key":"e_1_3_2_1_11_1","volume-title":"ChatGPT for good? On opportunities and challenges of large language models for education. Learning and Individual Differences","author":"Kasneci Enkelejda","year":"2023","unstructured":"Enkelejda Kasneci, Kathrin Sessler, Stefan K\u00fcchemann, Maria Bannert, Daryna Dementieva, Frank Fischer, Urs Gasser, Georg Groh, Stephan G\u00fcnnemann, Eyke H\u00fcllermeier, Stephan Krusche, Gitta Kutyniok, Tilman Michaeli, Claudia Nerdel, J\u00fcrgen Pfeffer, Oleksandra Poquet, Michael Sailer, Albrecht Schmidt, Tina Seidel, Matthias Stadler, JochenWeller, Jochen Kuhn, and Gjergji Kasneci. 2023. ChatGPT for good? On opportunities and challenges of large language models for education. Learning and Individual Differences (2023)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_12_1","DOI":"10.1021\/jacs.1c09820"},{"key":"e_1_3_2_1_13_1","volume-title":"ROUGE: A Package for Automatic Evaluation of Summaries. In ACL.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. ROUGE: A Package for Automatic Evaluation of Summaries. In ACL."},{"unstructured":"Daniel Mark Lowe. 2012. Extraction of chemical structures and reactions from the literature. (2012).","key":"e_1_3_2_1_14_1"},{"key":"e_1_3_2_1_15_1","volume-title":"Chawla","author":"Ma Yihong","year":"2024","unstructured":"Yihong Ma, Xiaobao Huang, Bozhao Nan, Nuno Moniz, Xiangliang Zhang, Olaf Wiest, and Nitesh V. Chawla. 2024. Are we Making Much Progress? Revisiting Chemical Reaction Yield Prediction from an Imbalanced Regression Perspective. In WWW."},{"key":"e_1_3_2_1_16_1","volume-title":"Materials discovery with machine learning and knowledge discovery. Frontiers in chemistry","author":"Oliveira Osvaldo N","year":"2022","unstructured":"Osvaldo N Oliveira Jr and Maria Cristina F Oliveira. 2022. Materials discovery with machine learning and knowledge discovery. Frontiers in chemistry (2022)."},{"doi-asserted-by":"crossref","unstructured":"Kishore Papineni Salim Roukos Todd Ward and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In ACL.","key":"e_1_3_2_1_17_1","DOI":"10.3115\/1073083.1073135"},{"doi-asserted-by":"crossref","unstructured":"Matt Post. 2018. A Call for Clarity in Reporting BLEU Scores. In WMT.","key":"e_1_3_2_1_18_1","DOI":"10.18653\/v1\/W18-6319"},{"doi-asserted-by":"crossref","unstructured":"Mandana Saebi Bozhao Nan John E Herr Jessica Wahlers Zhichun Guo Andrzej M Zura'ski Thierry Kogej Per-Ola Norrby Abigail G Doyle Nitesh V Chawla et al. 2023. On the use of real-world datasets for reaction yield prediction. Chemical science 14 19 (2023) 4997--5005.","key":"e_1_3_2_1_19_1","DOI":"10.1039\/D2SC06041H"},{"key":"e_1_3_2_1_20_1","volume-title":"Mapping the space of chemical reactions using attention-based neural networks. Nature Machine Intelligence","author":"Schwaller Philippe","year":"2021","unstructured":"Philippe Schwaller, Daniel Probst, Alain C Vaucher, VishnuHNair, David Kreutter, Teodoro Laino, and Jean-Louis Reymond. 2021. Mapping the space of chemical reactions using attention-based neural networks. Nature Machine Intelligence (2021)."},{"doi-asserted-by":"crossref","unstructured":"Jessica Vamathevan Dominic Clark Paul Czodrowski Ian Dunham Edgardo Ferran George Lee Bin Li Anant Madabhushi Parantu Shah Michaela Spitzer et al. 2019. Applications of machine learning in drug discovery and development. Nature reviews Drug discovery (2019).","key":"e_1_3_2_1_21_1","DOI":"10.1038\/s41573-019-0024-5"},{"doi-asserted-by":"crossref","unstructured":"Nicholas Walker John Dagdelen Kevin Cruse Sanghoon Lee Samuel Gleason Alexander Dunn Gerbrand Ceder A. Paul Alivisatos Kristin A. Persson and Anubhav Jain. 2023. Extracting Structured Seed-Mediated Gold Nanorod Growth Procedures from Literature with GPT-3. (2023).","key":"e_1_3_2_1_22_1","DOI":"10.1039\/D3DD00019B"},{"key":"e_1_3_2_1_23_1","volume-title":"Yaghi","author":"Zheng Zhiling","year":"2023","unstructured":"Zhiling Zheng, Oufan Zhang, Christian Borgs, Jennifer T. Chayes, and Omar M. Yaghi. 2023. ChatGPT Chemistry Assistant for Text Mining and the Prediction of MOF Synthesis. Journal of the American Chemical Society (2023)."}],"event":{"sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"acronym":"CIKM '24","name":"CIKM '24: The 33rd ACM International Conference on Information and Knowledge Management","location":"Boise ID USA"},"container-title":["Proceedings of the 33rd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679874","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627673.3679874","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:08Z","timestamp":1750294688000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679874"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":22,"alternative-id":["10.1145\/3627673.3679874","10.1145\/3627673"],"URL":"https:\/\/doi.org\/10.1145\/3627673.3679874","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]},"assertion":[{"value":"2024-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}