{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T18:47:41Z","timestamp":1775069261043,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,10]],"date-time":"2024-07-10T00:00:00Z","timestamp":1720569600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"Deutscher Akademischer Austauschdienst","doi-asserted-by":"publisher","award":["57515245"],"award-info":[{"award-number":["57515245"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006374","name":"Deutsche Forschungsgemeinschaft","doi-asserted-by":"publisher","award":["437179652"],"award-info":[{"award-number":["437179652"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,10]]},"DOI":"10.1145\/3626772.3657945","type":"proceedings-article","created":{"date-parts":[[2024,7,11]],"date-time":"2024-07-11T12:40:05Z","timestamp":1720701605000},"page":"2316-2320","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":14,"title":["Can LLMs Master Math? Investigating Large Language Models on Math Stack Exchange"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3219-026X","authenticated-orcid":false,"given":"Ankit","family":"Satpute","sequence":"first","affiliation":[{"name":"FIZ Karlsruhe, Berlin, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5268-2519","authenticated-orcid":false,"given":"Noah","family":"Gie\u00dfing","sequence":"additional","affiliation":[{"name":"Technical University Berlin, Berlin, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5828-5497","authenticated-orcid":false,"given":"Andr\u00e9","family":"Greiner-Petter","sequence":"additional","affiliation":[{"name":"University of G\u00f6ttingen, G\u00f6ttingen, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7141-4997","authenticated-orcid":false,"given":"Moritz","family":"Schubotz","sequence":"additional","affiliation":[{"name":"FIZ Karlsruhe, Berlin, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-4089-9647","authenticated-orcid":false,"given":"Olaf","family":"Teschke","sequence":"additional","affiliation":[{"name":"FIZ Karlsruhe, Berlin, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6544-5076","authenticated-orcid":false,"given":"Akiko","family":"Aizawa","sequence":"additional","affiliation":[{"name":"National Insutitute of Informatics, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6522-3019","authenticated-orcid":false,"given":"Bela","family":"Gipp","sequence":"additional","affiliation":[{"name":"University of G\u00f6ttingen, G\u00f6ttingen, Germany"}]}],"member":"320","published-online":{"date-parts":[[2024,7,11]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591960"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591960"},{"key":"e_1_3_2_1_3_1","unstructured":"Aida Amini Saadia Gabriel Peter Lin Rik Koncel-Kedziorski Yejin Choi and Hannaneh Hajishirzi. 2019. MathQA: Towards Interpretable Math Word Problem Solving with Operation-Based Formalisms. arXiv:1905.13319 [cs.CL]"},{"key":"e_1_3_2_1_4_1","volume-title":"Stephen McAleer, Albert Q. Jiang, Jia Deng, Stella Biderman, and Sean Welleck.","author":"Azerbayev Zhangir","year":"2023","unstructured":"Zhangir Azerbayev, Hailey Schoelkopf, Keiran Paster, Marco Dos Santos, Stephen McAleer, Albert Q. Jiang, Jia Deng, Stella Biderman, and Sean Welleck. 2023. Llemma: An Open Language Model For Mathematics. arXiv:2310.10631 [cs.CL]"},{"key":"e_1_3_2_1_5_1","unstructured":"Karl Cobbe Vineet Kosaraju Mohammad Bavarian Mark Chen Heewoo Jun Lukasz Kaiser Matthias Plappert Jerry Tworek Jacob Hilton Reiichiro Nakano Christopher Hesse and John Schulman. 2021. Training Verifiers to Solve Math Word Problems. arXiv:2110.14168 [cs.LG]"},{"key":"e_1_3_2_1_6_1","unstructured":"Aniruddha Deb Neeva Oza Sarthak Singla Dinesh Khandelwal Dinesh Garg and Parag Singla. 2023. Fill in the Blank: Exploring and Enhancing LLM Capabilities for Backward Reasoning in Math Word Problems. arXiv:2310.01991 [cs.CL]"},{"key":"e_1_3_2_1_7_1","volume-title":"Yujiu Yang, Minlie Huang, Nan Duan, and Weizhu Chen.","author":"Gou Zhibin","year":"2023","unstructured":"Zhibin Gou, Zhihong Shao, Yeyun Gong, yelong shen, Yujiu Yang, Minlie Huang, Nan Duan, and Weizhu Chen. 2023. ToRA: A Tool-Integrated Reasoning Agent for Mathematical Problem Solving. arXiv:2309.17452 [cs.CL]"},{"key":"e_1_3_2_1_8_1","unstructured":"Dan Hendrycks Collin Burns Saurav Kadavath Akul Arora Steven Basart Eric Tang Dawn Song and Jacob Steinhardt. 2021. Measuring Mathematical Problem Solving With the MATH Dataset. arXiv:2103.03874 [cs.LG]"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Pengfei Hong Deepanway Ghosal Navonil Majumder Somak Aditya Rada Mihalcea and Soujanya Poria. 2024. Stuck in the Quicksand of Numeracy Far from AGI Summit: Evaluating LLMs' Mathematical Competency through Ontologyguided Perturbations. arXiv:2401.09395 [cs.CL]","DOI":"10.18653\/v1\/2025.findings-acl.1172"},{"key":"e_1_3_2_1_10_1","unstructured":"Albert Q. Jiang Alexandre Sablayrolles Arthur Mensch Chris Bamford Devendra Singh Chaplot Diego de las Casas Florian Bressand Gianna Lengyel Guillaume Lample Lucile Saulnier L\u00e9lio Renard Lavaud Marie-Anne Lachaux Pierre Stock Teven Le Scao Thibaut Lavril Thomas Wang Timoth\u00e9e Lacroix and William El Sayed. 2023. Mistral 7B. arXiv:2310.06825 [cs.CL]"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.307"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1705.04146"},{"key":"e_1_3_2_1_13_1","unstructured":"Yiheng Liu Hao He Tianle Han Xu Zhang Mengyuan Liu Jiaming Tian Yutong Zhang Jiaqi Wang Xiaohui Gao Tianyang Zhong Yi Pan Shaochen Xu Zihao Wu Zhengliang Liu Xin Zhang Shu Zhang Xintao Hu Tuo Zhang Ning Qiang Tianming Liu and Bao Ge. 2024. Understanding LLMs: A Comprehensive Overview from Training to Inference. arXiv:2401.02038 [cs.CL]"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.3390\/forecast5030030"},{"key":"e_1_3_2_1_15_1","volume-title":"Advancing Math-Aware Search: The ARQMath-3 Lab at CLEF","author":"Mansouri Behrooz","year":"2022","unstructured":"Behrooz Mansouri, Anurag Agarwal, DouglasW. Oard, and Richard Zanibbi. 2022. Advancing Math-Aware Search: The ARQMath-3 Lab at CLEF 2022. In Advances in Information Retrieval, Matthias Hagen, Suzan Verberne, Craig Macdonald, Christin Seifert, Krisztian Balog, Kjetil N\u00f8rv\u00e5g, and Vinay Setty (Eds.). Springer International Publishing, Cham, 408--415."},{"key":"e_1_3_2_1_16_1","volume-title":"CHAMP: A Competition-level Dataset for Fine-Grained Analyses of LLMs' Mathematical Reasoning Capabilities. arXiv:2401.06961 [cs.CL]","author":"Mao Yujun","year":"2024","unstructured":"Yujun Mao, Yoon Kim, and Yilun Zhou. 2024. CHAMP: A Competition-level Dataset for Fine-Grained Analyses of LLMs' Mathematical Reasoning Capabilities. arXiv:2401.06961 [cs.CL]"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3605943"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413810"},{"key":"e_1_3_2_1_19_1","unstructured":"OpenAI. 2023. GPT-4 Technical Report. arXiv:2303.08774 [cs.CL]"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-023-06924-"},{"key":"e_1_3_2_1_21_1","unstructured":"Baptiste Rozi\u00e8re Jonas Gehring Fabian Gloeckle Sten Sootla Itai Gat Xiaoqing Ellen Tan Yossi Adi Jingyu Liu Romain Sauvestre Tal Remez J\u00e9r\u00e9my Rapin Artyom Kozhevnikov Ivan Evtimov Joanna Bitton Manish Bhatt Cristian Canton Ferrer Aaron Grattafiori Wenhan Xiong Alexandre D\u00e9fossez Jade Copet Faisal Azhar Hugo Touvron Louis Martin Nicolas Usunier Thomas Scialom and Gabriel Synnaeve. 2024. Code Llama: Open Foundation Models for Code. arXiv:2308.12950 [cs.CL]"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3-031--56066--8_2"},{"key":"e_1_3_2_1_23_1","unstructured":"LLaMa-2 Team. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. arXiv:2307.09288 [cs.CL]"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-023-06747--5"},{"key":"e_1_3_2_1_25_1","unstructured":"Felipe Urrutia and Roberto Araya. 2023. Who's the Best Detective? LLMs vs. MLs in Detecting Incoherent Fourth Grade Math Answers. arXiv:2304.11257 [cs.CL]"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1177\/07356331231191174"},{"key":"e_1_3_2_1_27_1","unstructured":"Xiang Yue Xingwei Qu Ge Zhang Yao Fu Wenhao Huang Huan Sun Yu Su and Wenhu Chen. 2023. MAmmoTH: Building Math Generalist Models through Hybrid Instruction Tuning. arXiv:2309.05653 [cs.CL]"},{"key":"e_1_3_2_1_28_1","unstructured":"Wei Zhong. 2023. Effective Math-Aware Ad-Hoc Retrieval based on Structure Search and Semantic Similarities. Ph.D. Dissertation. http:\/\/hdl.handle.net\/10012\/19865"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591746"},{"key":"e_1_3_2_1_30_1","unstructured":"Aojun Zhou KeWang Zimu Lu Weikang Shi Sichun Luo Zipeng Qin Shaoqing Lu Anya Jia Linqi Song Mingjie Zhan and Hongsheng Li. 2023. Solving Challenging Math Word Problems Using GPT-4 Code Interpreter with Code-based Self-Verification. arXiv:2308.07921 [cs.CL]"}],"event":{"name":"SIGIR 2024: The 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","location":"Washington DC USA","acronym":"SIGIR 2024","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657945","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3626772.3657945","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:38:47Z","timestamp":1755841127000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657945"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,10]]},"references-count":30,"alternative-id":["10.1145\/3626772.3657945","10.1145\/3626772"],"URL":"https:\/\/doi.org\/10.1145\/3626772.3657945","relation":{},"subject":[],"published":{"date-parts":[[2024,7,10]]},"assertion":[{"value":"2024-07-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}