{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,12]],"date-time":"2026-07-12T01:24:25Z","timestamp":1783819465552,"version":"3.55.0"},"reference-count":31,"publisher":"American Chemical Society (ACS)","issue":"22","license":[{"start":{"date-parts":[[2025,11,3]],"date-time":"2025-11-03T00:00:00Z","timestamp":1762128000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,11,3]],"date-time":"2025-11-03T00:00:00Z","timestamp":1762128000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2025,11,3]],"date-time":"2025-11-03T00:00:00Z","timestamp":1762128000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-045"}],"funder":[{"DOI":"10.13039\/100014717","name":"National Outstanding Youth Science Fund Project of National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100014717","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008048","name":"Nanjing University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100008048","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100018537","name":"National Science and Technology Major Project","doi-asserted-by":"publisher","award":["20252D0121802"],"award-info":[{"award-number":["20252D0121802"]}],"id":[{"id":"10.13039\/501100018537","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J. Chem. Inf. Model."],"published-print":{"date-parts":[[2025,11,24]]},"DOI":"10.1021\/acs.jcim.5c02033","type":"journal-article","created":{"date-parts":[[2025,11,3]],"date-time":"2025-11-03T14:59:19Z","timestamp":1762181959000},"page":"12268-12278","source":"Crossref","is-referenced-by-count":3,"title":["QCBench: Evaluating Large Language Models on Domain-Specific Quantitative Chemistry"],"prefix":"10.1021","volume":"65","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7634-4457","authenticated-orcid":true,"given":"Jiaqing","family":"Xie","sequence":"first","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory, 701 Yunjin Road, Xuhui, Shanghai 200232, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Weida","family":"Wang","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory, 701 Yunjin Road, Xuhui, Shanghai 200232, China"},{"name":"Fudan University, 220 Handan Rd, Yangpu, Shanghai 200433, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ben","family":"Gao","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory, 701 Yunjin Road, Xuhui, Shanghai 200232, China"},{"name":"Wuhan University, G9P7+CP8, Wuhan, Hubei 430072, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhuo","family":"Yang","sequence":"additional","affiliation":[{"name":"Xidian University, 266 Xinglong Section of Xifeng Road, Xi\u2019an, Shaanxi 710126, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Haiyuan","family":"Wan","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory, 701 Yunjin Road, Xuhui, Shanghai 200232, China"},{"name":"Tsinghua University, Haidian District, Beijing 100084, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shufei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory, 701 Yunjin Road, Xuhui, Shanghai 200232, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tianfan","family":"Fu","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory, 701 Yunjin Road, Xuhui, Shanghai 200232, China"},{"name":"Nanjing University, 163 Xianlin Road, Qixia District, Nanjing, Jiangsu 210023, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6756-6154","authenticated-orcid":true,"given":"Yuqiang","family":"Li","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory, 701 Yunjin Road, Xuhui, Shanghai 200232, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"316","published-online":{"date-parts":[[2025,11,3]]},"reference":[{"key":"ref1\/cit1","unstructured":"Gao, B.; Song, F.; Yang, Z.; Cai, Z.; Miao, Y.; Dong, Q.; Li, L.; Ma, C.; Chen, L.; Xu, R. Omni-math: A universal olympiad level mathematic benchmark for large language models\n                      arXiv\n                      , preprint arXiv:2410.07985, 2024."},{"key":"ref2\/cit2","unstructured":"Fan, J.; Martinson, S.; Wang, E. Y.; Hausknecht, K.; Brenner, J.; Liu, D.; Peng, N.; Wang, C.; Brenner, M. P. Hardmath: A benchmark dataset for challenging problems in applied mathematics\n                      arXiv\n                      , preprint arXiv:2410.09988, 2024."},{"key":"ref3\/cit3","unstructured":"Glazer, E.; Erdil, E.; Besiroglu, T.; Chicharro, D.; Chen, E.; Gunning, A.; Olsson, C. F.; Denain, J.S.; Ho, A.; Santos, E. d. O.  Frontiermath: A benchmark for evaluating advanced mathematical reasoning in ai.\n                      arXiv\n                      , preprint arXiv:2411.04872, 2024."},{"key":"ref4\/cit4","doi-asserted-by":"crossref","unstructured":"Chung, D. J.; Gao, Z.; Kvasiuk, Y.; Li, T.; M\u00fcnchmeyer, M.; Rudolph, M.; Sala, F.; Tadepalli, S. C. Theoretical Physics Benchmark (TPBench)\u2013a Dataset and Study of AI Reasoning Capabilities in Theoretical Physics.\n                      arXiv\n                      , preprint arXiv:2502.15815, 2025.","DOI":"10.1088\/2632-2153\/adfcb0"},{"key":"ref5\/cit5","unstructured":"Qiu, S.; Guo, S.; Song, Z.Y.; Sun, Y.; Cai, Z.; Wei, J.; Luo, T.; Yin, Y.; Zhang, H.; Hu, Y.  Phybench: Holistic evaluation of physical perception and reasoning in large language models.\n                      arXiv\n                      , preprint arXiv:2504.16074, 2025."},{"key":"ref6\/cit6","doi-asserted-by":"crossref","unstructured":"He, C.; Luo, R.; Bai, Y.; Hu, S.; Thai, Z. L.; Shen, J.; Hu, J.; Han, X.; Huang, Y.; Zhang, Y.  Olympiadbench: A challenging benchmark for promoting agi with olympiad-level bilingual multimodal scientific problems.\n                      arXiv\n                      , preprint arXiv:2402.14008, 2024.","DOI":"10.18653\/v1\/2024.acl-long.211"},{"key":"ref7\/cit7","unstructured":"Wang, X.; Hu, Z.; Lu, P.; Zhu, Y.; Zhang, J.; Subramaniam, S.; Loomba, A. R.; Zhang, S.; Sun, Y.; Wang, W. Scibench: Evaluating college-level scientific problem-solving abilities of large language models.\n                      arXiv\n                      , preprint arXiv:2307.10635, 2023."},{"key":"ref8\/cit8","first-page":"59662","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Guo T.","year":"2023"},{"key":"ref9\/cit9","unstructured":"Mirza, A.; Alampara, N.; Kunchapu, S.; R\u00edos-Garc\u00eda, M.; Emoekabu, B.; Krishnan, A.; Gupta, T.; Schilling-Wilhelmi, M.; Okereke, M.; Aneesh, A.  Are large language models superhuman chemists?\n                      arXiv\n                      , preprint arXiv:2404.01475, 2024."},{"key":"ref10\/cit10","unstructured":"Yue, A. S.; Madaan, L.; Moskovitz, T.; Strouse, D.; Singh, A. K. HARP: A challenging human-annotated math reasoning benchmark.\n                      arXiv\n                      , preprint arXiv:2412.08819, 2024."},{"key":"ref11\/cit11","doi-asserted-by":"crossref","unstructured":"Yan, Y.; Su, J.; He, J.; Fu, F.; Zheng, X.; Lyu, Y.; Wang, K.; Wang, S.; Wen, Q.; Hu, X. A survey of mathematical reasoning in the era of multimodal large language model: Benchmark, method & challenges.\n                      arXiv\n                      , preprint arXiv:2412.11936, 2024.","DOI":"10.18653\/v1\/2025.findings-acl.614"},{"key":"ref12\/cit12","unstructured":"Zhang, Y.; Ma, Y.; Gu, Y.; Yang, Z.; Zhuang, Y.; Wang, F.; Huang, Z.; Wang, Y.; Huang, C.; Song, B.  ABench-Physics: Benchmarking Physical Reasoning in LLMs via High-Difficulty and Dynamic Physics Problems.\n                      arXiv\n                      , preprint arXiv:2507.04766, 2025."},{"key":"ref13\/cit13","doi-asserted-by":"publisher","DOI":"10.1039\/D4SC08802F"},{"key":"ref14\/cit14","first-page":"134721","volume-title":"Advances in Neural Information Processing Systems","volume":"37","author":"Guo K.","year":"2024"},{"key":"ref15\/cit15","doi-asserted-by":"crossref","unstructured":"Zhao, Z.; Ma, D.; Chen, L.; Sun, L.; Li, Z.; Xia, Y.; Chen, B.; Xu, H.; Zhu, Z.; Zhu, S.  ChemDFM: a large language foundation model for chemistry.\n                      arXiv\n                      , preprint arXiv:2401.14818, 2024.","DOI":"10.1016\/j.xcrp.2025.102523"},{"key":"ref16\/cit16","unstructured":"Hendrycks, D.; Burns, C.; Basart, S.; Zou, A.; Mazeika, M.; Song, D.; Steinhardt, J. Measuring massive multitask language understanding.\n                      arXiv\n                      , preprint arXiv:2009.03300, 2020."},{"key":"ref17\/cit17","first-page":"19209","volume-title":"Advances in Neural Information Processing Systems","volume":"37","author":"Huang Z.","year":"2024"},{"key":"ref18\/cit18","unstructured":"Chen, D.; Yu, Q.; Wang, P.; Zhang, W.; Tang, B.; Xiong, F.; Li, X.; Yang, M.; Li, Z. xverify: Efficient answer verifier for reasoning model evaluations.\n                      arXiv\n                      , preprint arXiv:2504.10481, 2025."},{"key":"ref19\/cit19","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511800870","volume-title":"An Introduction to Computational Physics","author":"Pang T.","year":"2006"},{"key":"ref20\/cit20","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i17.29872"},{"key":"ref21\/cit21","doi-asserted-by":"crossref","unstructured":"Arora, D.; Singh, H. G.  Have llms advanced enough? a challenging problem solving benchmark for large language models.\n                      arXiv\n                      , preprint arXiv:2305.15074, 2023.","DOI":"10.18653\/v1\/2023.emnlp-main.468"},{"key":"ref22\/cit22","doi-asserted-by":"publisher","DOI":"10.1007\/s00799-022-00329-y"},{"key":"ref23\/cit23","first-page":"30624","volume-title":"Advances in Neural Information Processing Systems","volume":"37","author":"Tian M.","year":"2024"},{"key":"ref24\/cit24","unstructured":"Alampara, N.; Mandal, I.; Khetarpal, P.; Grover, H. S.; Schilling-Wilhelmi, M.; Krishnan, N. A.; Jablonka, K. M.\n                      MaCBench: A Multimodal Chemistry and Materials Science Benchmark\n                      , Proceedings of the 38th Conference on Neural Information Processing Systems (NeurIPS 2024), 2024."},{"key":"ref25\/cit25","unstructured":"Zhou, Y.; Cheng, M.; Mao, Q.; Luo, Y.; Liu, Q.; Li, Y.; Zhang, X.; Liu, D.; Li, X.; Chen, E. Benchmarking Multimodal LLMs on Recognition and Understanding over Chemical Tables.\n                      arXiv\n                      , preprint arXiv:2506.11375, 2025."},{"key":"ref26\/cit26","unstructured":"Hao, Y.; Gu, J.; Wang, H. W.; Li, L.; Yang, Z.; Wang, L.; Cheng, Y. Can mllms reason in multimodality? emma: An enhanced multimodal reasoning benchmark.\n                      arXiv\n                      , preprint arXiv:2501.05444, 2025."},{"key":"ref27\/cit27","doi-asserted-by":"crossref","unstructured":"Liang, Z.; Guo, K.; Liu, G.; Guo, T.; Zhou, Y.; Yang, T.; Jiao, J.; Pi, R.; Zhang, J.; Zhang, X. Scemqa: A scientific college entrance level multimodal question answering benchmark.\n                      arXiv\n                      , preprint arXiv:2402.05138, 2024.","DOI":"10.18653\/v1\/2024.acl-short.11"},{"key":"ref28\/cit28","unstructured":"Hurst, A.; Lerer, A.; Goucher, A. P.; Perelman, A.; Ramesh, A.; Clark, A.; Ostrow, A.; Welihinda, A.; Hayes, A.; Radford, A.  Gpt-4o system card.\n                      arXiv\n                      , preprint arXiv:2410.21276, 2024."},{"key":"ref29\/cit29","unstructured":"Comanici, G.; Bieber, E.; Schaekermann, M.; Pasupat, I.; Sachdeva, N.; Dhillon, I.; Blistein, M.; Ram, O.; Zhang, D.; Rosen, E.  Gemini 2.5: Pushing the frontier with advanced reasoning, multimodality, long context, and next generation agentic capabilities.\n                      arXiv\n                      , preprint arXiv:2507.06261, 2025."},{"key":"ref30\/cit30","unstructured":"Guo, D.; Yang, D.; Zhang, H.; Song, J.; Zhang, R.; Xu, R.; Zhu, Q.; Ma, S.; Wang, P.; Bi, X.  Deepseek-r1: Incentivizing reasoning capability in llms via reinforcement learning.\n                      arXiv\n                      , preprint arXiv:2501.12948, 2025."},{"key":"ref31\/cit31","unstructured":"Yang, A.; Li, A.; Yang, B.; Zhang, B.; Hui, B.; Zheng, B.; Yu, B.; Gao, C.; Huang, C.; Lv, C.  Qwen3 technical report.\n                      arXiv\n                      , preprint arXiv:2505.09388, 2025."}],"container-title":["Journal of Chemical Information and Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/pubs.acs.org\/doi\/pdf\/10.1021\/acs.jcim.5c02033","content-type":"application\/pdf","content-version":"vor","intended-application":"unspecified"},{"URL":"https:\/\/pubs.acs.org\/doi\/pdf\/10.1021\/acs.jcim.5c02033","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,24]],"date-time":"2025-11-24T09:11:29Z","timestamp":1763975489000},"score":1,"resource":{"primary":{"URL":"https:\/\/pubs.acs.org\/doi\/10.1021\/acs.jcim.5c02033"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,3]]},"references-count":31,"journal-issue":{"issue":"22","published-print":{"date-parts":[[2025,11,24]]}},"alternative-id":["10.1021\/acs.jcim.5c02033"],"URL":"https:\/\/doi.org\/10.1021\/acs.jcim.5c02033","relation":{},"ISSN":["1549-9596","1549-960X"],"issn-type":[{"value":"1549-9596","type":"print"},{"value":"1549-960X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,3]]}}}