{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T23:13:45Z","timestamp":1776122025464,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100018537","name":"National Science and Technology Major Project","doi-asserted-by":"publisher","award":["No. 2021ZD0114002"],"award-info":[{"award-number":["No. 2021ZD0114002"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100018537","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100003399","name":"Science and Technology Commission of Shanghai Municipality","doi-asserted-by":"publisher","award":["No.22511105901,No.21511100402"],"award-info":[{"award-number":["No.22511105901,No.21511100402"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100003399","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1145\/3627673.3679881","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T19:34:21Z","timestamp":1729452861000},"page":"3730-3735","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["Boosting Large Language Models with Socratic Method for Conversational Mathematics Teaching"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-9986-4606","authenticated-orcid":false,"given":"Yuyang","family":"Ding","sequence":"first","affiliation":[{"name":"East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-2220-4009","authenticated-orcid":false,"given":"Hanglei","family":"Hu","sequence":"additional","affiliation":[{"name":"East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2589-0164","authenticated-orcid":false,"given":"Jie","family":"Zhou","sequence":"additional","affiliation":[{"name":"East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5602-1877","authenticated-orcid":false,"given":"Qin","family":"Chen","sequence":"additional","affiliation":[{"name":"East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7914-1978","authenticated-orcid":false,"given":"Bo","family":"Jiang","sequence":"additional","affiliation":[{"name":"East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4723-5486","authenticated-orcid":false,"given":"Liang","family":"He","sequence":"additional","affiliation":[{"name":"East China Normal University, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al. 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.bea-1.57"},{"key":"e_1_3_2_1_3_1","volume-title":"Lasang Jimba Tamang, and Vasile Rus","author":"Alshaikh Zeyad","year":"2020","unstructured":"Zeyad Alshaikh, Lasang Jimba Tamang, and Vasile Rus. 2020. Experiments with a socratic intelligent tutoring system for source code understanding. In FLAIRS."},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of ACL. 2357--2367","author":"Amini Aida","year":"2019","unstructured":"Aida Amini, Saadia Gabriel, Shanchuan Lin, Rik Koncel-Kedziorski, Yejin Choi, and Hannaneh Hajishirzi. 2019. MathQA: Towards Interpretable Math Word Problem Solving with Operation-Based Formalisms. In Proceedings of ACL. 2357--2367."},{"key":"e_1_3_2_1_5_1","unstructured":"Jinze Bai Shuai Bai Yunfei Chu Zeyu Cui Kai Dang Xiaodong Deng Yang Fan Wenbin Ge Yu Han Fei Huang et al. 2023. Qwen technical report. arXiv preprint arXiv:2309.16609 (2023)."},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 65--72","author":"Banerjee Satanjeev","year":"2005","unstructured":"Satanjeev Banerjee and Alon Lavie. 2005. METEOR: An automatic metric for MT evaluation with improved correlation with human judgments. In Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 65--72."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Maciej Besta Nils Blach Ales Kubicek Robert Gerstenberger Lukas Gianinazzi Joanna Gajda Tomasz Lehmann Michal Podstawski Hubert Niewiadomski Piotr Nyczyk et al. 2023. Graph of thoughts: Solving elaborate problems with large language models. arXiv preprint arXiv:2308.09687 (2023).","DOI":"10.1609\/aaai.v38i16.29720"},{"key":"e_1_3_2_1_8_1","unstructured":"Daniel Bobrow et al. 1964. Natural language input for a computer problem solving system. (1964)."},{"key":"e_1_3_2_1_9_1","volume-title":"An integrated model of skill in solving elementary word problems. Cognition and instruction","author":"Briars Diane J","year":"1984","unstructured":"Diane J Briars and Jill H Larkin. 1984. An integrated model of skill in solving elementary word problems. Cognition and instruction, Vol. 1, 3 (1984), 245--296."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Thomas C Brickhouse and Nicholas D Smith. 2009. Socratic teaching and Socratic method. (2009).","DOI":"10.1093\/oxfordhb\/9780195312881.003.0011"},{"key":"e_1_3_2_1_11_1","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, et al. 2020. Language models are few-shot learners. NeurIPS, Vol. 33 (2020), 1877--1901.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_12_1","volume-title":"Mei-Hung Chiu, and Christian LaVancher.","author":"Chi Michelene TH","year":"1994","unstructured":"Michelene TH Chi, Nicholas De Leeuw, Mei-Hung Chiu, and Christian LaVancher. 1994. Eliciting self-explanations improves understanding. Cognitive science, Vol. 18, 3 (1994), 439--477."},{"key":"e_1_3_2_1_13_1","unstructured":"Karl Cobbe Vineet Kosaraju Mohammad Bavarian Mark Chen Heewoo Jun Lukasz Kaiser Matthias Plappert Jerry Tworek Jacob Hilton Reiichiro Nakano Christopher Hesse and John Schulman. 2021. Training Verifiers to Solve Math Word Problems. arxiv: 2110.14168 [cs.LG]"},{"key":"e_1_3_2_1_14_1","unstructured":"Jelle Couperus. 2023. Large Language Models and Mathematical Understanding. Master's thesis."},{"key":"e_1_3_2_1_15_1","volume-title":"The NCTE Transcripts: A dataset of elementary math classroom transcripts. arXiv preprint arXiv:2211.11772","author":"Demszky Dorottya","year":"2022","unstructured":"Dorottya Demszky and Heather Hill. 2022. The NCTE Transcripts: A dataset of elementary math classroom transcripts. arXiv preprint arXiv:2211.11772 (2022)."},{"key":"e_1_3_2_1_16_1","unstructured":"Edward A Feigenbaum Julian Feldman et al. 1963. Computers and thought. Vol. 7. New York McGraw-Hill."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.3758\/BF03207654"},{"key":"e_1_3_2_1_18_1","unstructured":"Rick Garlikov. 2001. The Socratic method: Teaching by asking instead of by telling. Website http:\/\/www. garlikov. com\/Soc_Meth. html (2001)."},{"key":"e_1_3_2_1_19_1","volume-title":"Measuring mathematical problem solving with the math dataset. arXiv preprint arXiv:2103.03874","author":"Hendrycks Dan","year":"2021","unstructured":"Dan Hendrycks, Collin Burns, Saurav Kadavath, Akul Arora, Steven Basart, Eric Tang, Dawn Song, and Jacob Steinhardt. 2021. Measuring mathematical problem solving with the math dataset. arXiv preprint arXiv:2103.03874 (2021)."},{"key":"e_1_3_2_1_20_1","volume-title":"LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=nZeVKeeFYf9","author":"Hu Edward J","year":"2022","unstructured":"Edward J Hu, yelong shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2022. LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=nZeVKeeFYf9"},{"key":"e_1_3_2_1_21_1","volume-title":"Scaling laws for neural language models. arXiv preprint arXiv:2001.08361","author":"Kaplan Jared","year":"2020","unstructured":"Jared Kaplan, Sam McCandlish, Tom Henighan, Tom B Brown, Benjamin Chess, Rewon Child, Scott Gray, Alec Radford, Jeffrey Wu, and Dario Amodei. 2020. Scaling laws for neural language models. arXiv preprint arXiv:2001.08361 (2020)."},{"key":"e_1_3_2_1_22_1","volume-title":"Large language models are state-of-the-art evaluators of translation quality. arXiv preprint arXiv:2302.14520","author":"Kocmi Tom","year":"2023","unstructured":"Tom Kocmi and Christian Federmann. 2023. Large language models are state-of-the-art evaluators of translation quality. arXiv preprint arXiv:2302.14520 (2023)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P14-1026"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1080\/08993400500224286"},{"key":"e_1_3_2_1_25_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1015"},{"key":"e_1_3_2_1_27_1","volume-title":"Goat: Fine-tuned LLaMA Outperforms GPT-4 on Arithmetic Tasks","author":"Liu Tiedong","year":"2023","unstructured":"Tiedong Liu and Bryan Kian Hsiang Low. 2023. Goat: Fine-tuned LLaMA Outperforms GPT-4 on Arithmetic Tasks. http:\/\/arxiv.org\/abs\/2305.14201 arXiv:2305.14201 [cs]."},{"key":"e_1_3_2_1_28_1","unstructured":"Wentao Liu Hanglei Hu Jie Zhou Yuyang Ding Junsong Li Jiayi Zeng Mengliang He Qin Chen Bo Jiang Aimin Zhou et al. 2023. Mathematical Language Models: A Survey. arXiv preprint arXiv:2312.07622 (2023)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.817"},{"key":"e_1_3_2_1_30_1","volume-title":"Wizardmath: Empowering mathematical reasoning for large language models via reinforced evol-instruct. arXiv preprint arXiv:2308.09583","author":"Luo Haipeng","year":"2023","unstructured":"Haipeng Luo, Qingfeng Sun, Can Xu, Pu Zhao, Jianguang Lou, Chongyang Tao, Xiubo Geng, Qingwei Lin, Shifeng Chen, and Dongmei Zhang. 2023. Wizardmath: Empowering mathematical reasoning for large language models via reinforced evol-instruct. arXiv preprint arXiv:2308.09583 (2023)."},{"key":"e_1_3_2_1_31_1","volume-title":"Tanmay Sinha, Manu Kapur, Iryna Gurevych, and Mrinmaya Sachan.","author":"Macina Jakub","year":"2023","unstructured":"Jakub Macina, Nico Daheim, Sankalan Pal Chowdhury, Tanmay Sinha, Manu Kapur, Iryna Gurevych, and Mrinmaya Sachan. 2023. MathDial: A Dialogue Tutoring Dataset with Rich Pedagogical Properties Grounded in Math Reasoning Problems. arXiv preprint arXiv:2305.14536 (2023)."},{"key":"e_1_3_2_1_32_1","volume-title":"Tanmay Sinha, Manu Kapur, Iryna Gurevych, and Mrinmaya Sachan.","author":"Macina Jakub","year":"2023","unstructured":"Jakub Macina, Nico Daheim, Sankalan Pal Chowdhury, Tanmay Sinha, Manu Kapur, Iryna Gurevych, and Mrinmaya Sachan. 2023. MathDial: A Dialogue Tutoring Dataset with Rich Pedagogical Properties Grounded in Math Reasoning Problems. In EMNLP, Houda Bouamor, Juan Pino, and Kalika Bali (Eds.). Association for Computational Linguistics, 5602--5621. https:\/\/aclanthology.org\/2023.findings-emnlp.372"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.3991\/ijet.v18i20.42979"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1202"},{"key":"e_1_3_2_1_35_1","volume-title":"Proceedings of SIGCSE. 148--152","author":"Murphy Laurie","year":"2005","unstructured":"Laurie Murphy and Josh Tenenberg. 2005. Do computer science students know what they know? A calibration study of data structure knowledge. In Proceedings of SIGCSE. 148--152."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1238"},{"key":"e_1_3_2_1_37_1","volume-title":"Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311--318","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311--318."},{"key":"e_1_3_2_1_38_1","volume-title":"TALM: Tool Augmented Language Models. arxiv: 2205.12255 [cs.CL]","author":"Parisi Aaron","year":"2022","unstructured":"Aaron Parisi, Yao Zhao, and Noah Fiedel. 2022. TALM: Tool Augmented Language Models. arxiv: 2205.12255 [cs.CL]"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.168"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.255"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1202"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00118"},{"key":"e_1_3_2_1_43_1","volume-title":"Toolformer: Language Models Can Teach Themselves to Use Tools. arxiv: 2302.04761 [cs.CL]","author":"Schick Timo","year":"2023","unstructured":"Timo Schick, Jane Dwivedi-Yu, Roberto Dess\u00ec, Roberta Raileanu, Maria Lomeli, Luke Zettlemoyer, Nicola Cancedda, and Thomas Scialom. 2023. Toolformer: Language Models Can Teach Themselves to Use Tools. arxiv: 2302.04761 [cs.CL]"},{"key":"e_1_3_2_1_44_1","unstructured":"John Schulman B Zoph C Kim J Hilton J Menick J Weng JFC Uribe L Fedus L Metz M Pokorny et al. 2022. ChatGPT: Optimizing language models for dialogue. In OpenAI blog."},{"key":"e_1_3_2_1_45_1","volume-title":"Distilling multi-step reasoning capabilities of large language models into smaller models via semantic decompositions. arXiv preprint arXiv:2212.00193","author":"Shridhar Kumar","year":"2022","unstructured":"Kumar Shridhar, Alessandro Stolfo, and Mrinmaya Sachan. 2022. Distilling multi-step reasoning capabilities of large language models into smaller models via semantic decompositions. arXiv preprint arXiv:2212.00193 (2022)."},{"key":"e_1_3_2_1_46_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1088"},{"key":"e_1_3_2_1_48_1","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume":"35","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le, Denny Zhou, et al. 2022. Chain-of-thought prompting elicits reasoning in large language models. NeurIPS, Vol. 35 (2022), 24824--24837.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_49_1","volume-title":"Wizardlm: Empowering large language models to follow complex instructions. arXiv preprint arXiv:2304.12244","author":"Xu Can","year":"2023","unstructured":"Can Xu, Qingfeng Sun, Kai Zheng, Xiubo Geng, Pu Zhao, Jiazhan Feng, Chongyang Tao, and Daxin Jiang. 2023. Wizardlm: Empowering large language models to follow complex instructions. arXiv preprint arXiv:2304.12244 (2023)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.41"},{"key":"e_1_3_2_1_51_1","volume-title":"Tree of thoughts: Deliberate problem solving with large language models. arXiv preprint arXiv:2305.10601","author":"Yao Shunyu","year":"2023","unstructured":"Shunyu Yao, Dian Yu, Jeffrey Zhao, Izhak Shafran, Thomas L Griffiths, Yuan Cao, and Karthik Narasimhan. 2023. Tree of thoughts: Deliberate problem solving with large language models. arXiv preprint arXiv:2305.10601 (2023)."},{"key":"e_1_3_2_1_52_1","volume-title":"Metamath: Bootstrap your own mathematical questions for large language models. arXiv preprint arXiv:2309.12284","author":"Yu Longhui","year":"2023","unstructured":"Longhui Yu, Weisen Jiang, Han Shi, Jincheng Yu, Zhengying Liu, Yu Zhang, James T Kwok, Zhenguo Li, Adrian Weller, and Weiyang Liu. 2023. Metamath: Bootstrap your own mathematical questions for large language models. arXiv preprint arXiv:2309.12284 (2023)."},{"key":"e_1_3_2_1_53_1","first-page":"27263","article-title":"Bartscore: Evaluating generated text as text generation","volume":"34","author":"Yuan Weizhe","year":"2021","unstructured":"Weizhe Yuan, Graham Neubig, and Pengfei Liu. 2021. Bartscore: Evaluating generated text as text generation. Advances in Neural Information Processing Systems, Vol. 34 (2021), 27263--27277.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_54_1","volume-title":"Mammoth: Building math generalist models through hybrid instruction tuning. arXiv preprint arXiv:2309.05653","author":"Yue Xiang","year":"2023","unstructured":"Xiang Yue, Xingwei Qu, Ge Zhang, Yao Fu, Wenhao Huang, Huan Sun, Yu Su, and Wenhu Chen. 2023. Mammoth: Building math generalist models through hybrid instruction tuning. arXiv preprint arXiv:2309.05653 (2023)."},{"key":"e_1_3_2_1_55_1","volume-title":"Ape210k: A large-scale and template-rich dataset of math word problems. arXiv preprint arXiv:2009.11506","author":"Zhao Wei","year":"2020","unstructured":"Wei Zhao, Mingyue Shang, Yang Liu, Liang Wang, and Jingming Liu. 2020. Ape210k: A large-scale and template-rich dataset of math word problems. arXiv preprint arXiv:2009.11506 (2020)."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1096"}],"event":{"name":"CIKM '24: The 33rd ACM International Conference on Information and Knowledge Management","location":"Boise ID USA","acronym":"CIKM '24","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 33rd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679881","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627673.3679881","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:08Z","timestamp":1750294688000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679881"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":56,"alternative-id":["10.1145\/3627673.3679881","10.1145\/3627673"],"URL":"https:\/\/doi.org\/10.1145\/3627673.3679881","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]},"assertion":[{"value":"2024-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}