{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T01:20:48Z","timestamp":1772846448323,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,8]],"date-time":"2024-04-08T00:00:00Z","timestamp":1712534400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,8]]},"DOI":"10.1145\/3605098.3636030","type":"proceedings-article","created":{"date-parts":[[2024,5,21]],"date-time":"2024-05-21T17:59:16Z","timestamp":1716314356000},"page":"65-73","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["ChatGPT as a Math Questioner? Evaluating ChatGPT on Generating Pre-university Math Questions"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-7840-293X","authenticated-orcid":false,"given":"Phuoc Van Long","family":"Pham","sequence":"first","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1708-113X","authenticated-orcid":false,"given":"Anh Vu","family":"Duc","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2765-558X","authenticated-orcid":false,"given":"Nhat Minh","family":"Hoang","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6424-8388","authenticated-orcid":false,"given":"Xuan Long","family":"Do","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6062-207X","authenticated-orcid":false,"given":"Anh Tuan","family":"Luu","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}]}],"member":"320","published-online":{"date-parts":[[2024,5,21]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"METEOR: An Automatic Metric for MT Evaluation with Improved Correlation with Human Judgments. In IEE valuation@ACL. https:\/\/api.semanticscholar.org\/CorpusID:7164502","author":"Banerjee Satanjeev","year":"2005","unstructured":"Satanjeev Banerjee and Alon Lavie. 2005. METEOR: An Automatic Metric for MT Evaluation with Improved Correlation with Human Judgments. In IEE valuation@ACL. https:\/\/api.semanticscholar.org\/CorpusID:7164502"},{"key":"e_1_3_2_1_2_1","unstructured":"Tom B. Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell Sandhini Agarwal Ariel Herbert-Voss Gretchen Krueger Tom Henighan Rewon Child Aditya Ramesh Daniel M. Ziegler Jeffrey Wu Clemens Winter Christopher Hesse Mark Chen Eric Sigler Mateusz Litwin Scott Gray Benjamin Chess Jack Clark Christopher Berner Sam McCandlish Alec Radford Ilya Sutskever and Dario Amodei. 2020. Language Models are Few-Shot Learners. arXiv:2005.14165 [cs.CL]"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.218"},{"key":"e_1_3_2_1_4_1","unstructured":"Hyung Won Chung Le Hou Shayne Longpre Barret Zoph Yi Tay William Fedus Eric Li Xuezhi Wang Mostafa Dehghani Siddhartha Brahma et al. 2022. Scaling instruction-finetuned language models. arXiv preprint arXiv:2210.11416 (2022)."},{"key":"e_1_3_2_1_5_1","unstructured":"Karl Cobbe Vineet Kosaraju Mohammad Bavarian Mark Chen Heewoo Jun Lukasz Kaiser Matthias Plappert Jerry Tworek Jacob Hilton Reiichiro Nakano et al. 2021. Training verifiers to solve math word problems. arXiv preprint arXiv:2110.14168 (2021)."},{"key":"e_1_3_2_1_6_1","volume-title":"Liangming Pan, Nancy F Chen, and Ai Ti Aw.","author":"Do Xuan Long","year":"2023","unstructured":"Xuan Long Do, Bowei Zou, Shafiq Joty, Anh Tai Tran, Liangming Pan, Nancy F Chen, and Ai Ti Aw. 2023. Modeling What-to-ask and How-to-ask for Answer-unaware Conversational Question Generation. arXiv preprint arXiv:2305.03088 (2023)."},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the 29th International Conference on Computational Linguistics. International Committee on Computational Linguistics, Gyeongju, Republic of Korea, 580--591","author":"Do Xuan Long","year":"2022","unstructured":"Xuan Long Do, Bowei Zou, Liangming Pan, Nancy F. Chen, Shafiq Joty, and Ai Ti Aw. 2022. CoHS-CQG: Context and History Selection for Conversational Question Generation. In Proceedings of the 29th International Conference on Computational Linguistics. International Committee on Computational Linguistics, Gyeongju, Republic of Korea, 580--591. https:\/\/aclanthology.org\/2022.coling-1.48"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1123"},{"key":"e_1_3_2_1_9_1","volume-title":"PAL: Program-aided Language Models. arXiv preprint arXiv:2211.10435","author":"Gao Luyu","year":"2022","unstructured":"Luyu Gao, Aman Madaan, Shuyan Zhou, Uri Alon, Pengfei Liu, Yiming Yang, Jamie Callan, and Graham Neubig. 2022. PAL: Program-aided Language Models. arXiv preprint arXiv:2211.10435 (2022)."},{"key":"e_1_3_2_1_10_1","unstructured":"Dan Hendrycks Collin Burns Saurav Kadavath Akul Arora Steven Basart Eric Tang Dawn Song and Jacob Steinhardt. 2021. Measuring Mathematical Problem Solving With the MATH Dataset. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2). https:\/\/openreview.net\/forum?id=7Bywt2mQsCe"},{"key":"e_1_3_2_1_11_1","volume-title":"Mathprompter: Mathematical reasoning using large language models. arXiv preprint arXiv:2303.05398","author":"Imani Shima","year":"2023","unstructured":"Shima Imani, Liang Du, and Harsh Shrivastava. 2023. Mathprompter: Mathematical reasoning using large language models. arXiv preprint arXiv:2303.05398 (2023)."},{"key":"e_1_3_2_1_12_1","volume-title":"The critical importance of retrieval for learning. science 319, 5865","author":"Karpicke Jeffrey D","year":"2008","unstructured":"Jeffrey D Karpicke and Henry L Roediger III. 2008. The critical importance of retrieval for learning. science 319, 5865 (2008), 966--968."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Enkelejda Kasneci Kathrin Sessler Stefan K\u00fcchemann Maria Bannert Daryna Dementieva Frank Fischer Urs Gasser George Louis Groh Stephan G\u00fcnnemann Eyke H\u00fcllermeier Stephan Krusche Gitta Kutyniok Tilman Michaeli Claudia Nerdel J. Pfeffer Oleksandra Poquet Michael Sailer Albrecht Schmidt Tina Seidel Matthias Stadler Jochen Weller Jochen Kuhn and Gjergji Kasneci. 2023. ChatGPT for good? On opportunities and challenges of large language models for education. Learning and Individual Differences (2023).","DOI":"10.35542\/osf.io\/5er8f"},{"key":"e_1_3_2_1_14_1","volume-title":"Machel Reid, Yutaka Matsuo, and Yusuke Iwasawa.","author":"Kojima Takeshi","year":"2022","unstructured":"Takeshi Kojima, Shixiang Shane Gu, Machel Reid, Yutaka Matsuo, and Yusuke Iwasawa. 2022. Large Language Models are Zero-Shot Reasoners. ArXiv abs\/2205.11916 (2022)."},{"key":"e_1_3_2_1_15_1","volume-title":"Reasoning Circuits: Few-shot Multihop Question Generation with Structured Rationales. arXiv preprint arXiv:2211.08466","author":"Kulshreshtha Saurabh","year":"2022","unstructured":"Saurabh Kulshreshtha and Anna Rumshisky. 2022. Reasoning Circuits: Few-shot Multihop Question Generation with Structured Rationales. arXiv preprint arXiv:2211.08466 (2022)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/s40593-019-00186-y"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-1014"},{"key":"e_1_3_2_1_19_1","volume-title":"ROUGE: A Package for Automatic Evaluation of Summaries. In Annual Meeting of the Association for Computational Linguistics. https:\/\/api.semanticscholar.org\/CorpusID:964287","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. ROUGE: A Package for Automatic Evaluation of Summaries. In Annual Meeting of the Association for Computational Linguistics. https:\/\/api.semanticscholar.org\/CorpusID:964287"},{"key":"e_1_3_2_1_20_1","unstructured":"Tianqiao Liu Qiang Fang Wenbiao Ding Hang Li Zhongqin Wu and Zitao Liu. 2021. Mathematical Word Problem Generation from Commonsense Knowledge Graph and Equations. arXiv:2010.06196 [cs.CL]"},{"key":"e_1_3_2_1_21_1","volume-title":"Decoupled Weight Decay Regularization. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=Bkg6RiCqY7","author":"Loshchilov Ilya","year":"2019","unstructured":"Ilya Loshchilov and Frank Hutter. 2019. Decoupled Weight Decay Regularization. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=Bkg6RiCqY7"},{"key":"e_1_3_2_1_22_1","first-page":"159","article-title":"Expert-authored and machine-generated short-answer questions for assessing students learning performance","volume":"24","author":"Lu Owen HT","year":"2021","unstructured":"Owen HT Lu, Anna YQ Huang, Danny CL Tsai, and Stephen JH Yang. 2021. Expert-authored and machine-generated short-answer questions for assessing students learning performance. Educational Technology & Society 24, 3 (2021), 159--173.","journal-title":"Educational Technology & Society"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-naacl.111"},{"key":"e_1_3_2_1_24_1","unstructured":"OpenAI. 2022. Introducing ChatGPT. https:\/\/openai.com\/blog\/chatgpt"},{"key":"e_1_3_2_1_25_1","unstructured":"Long Ouyang Jeffrey Wu Xu Jiang Diogo Almeida Carroll Wainwright Pamela Mishkin Chong Zhang Sandhini Agarwal Katarina Slama Alex Gray John Schulman Jacob Hilton Fraser Kelton Luke Miller Maddie Simens Amanda Askell Peter Welinder Paul Christiano Jan Leike and Ryan Lowe. 2022. Training language models to follow instructions with human feedback. In Advances in Neural Information Processing Systems Alice H. Oh Alekh Agarwal Danielle Belgrave and Kyunghyun Cho (Eds.). https:\/\/openreview.net\/forum?id=TG8KACxEON"},{"key":"e_1_3_2_1_26_1","volume-title":"Annual Meeting of the Association for Computational Linguistics. https:\/\/api.semanticscholar.org\/ CorpusID:11080756","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a Method for Automatic Evaluation of Machine Translation. In Annual Meeting of the Association for Computational Linguistics. https:\/\/api.semanticscholar.org\/ CorpusID:11080756"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Arkil Patel Satwik Bhattamishra and Navin Goyal. 2021. Are NLP Models really able to Solve Simple Math Word Problems? arXiv:2103.07191 [cs.CL]","DOI":"10.18653\/v1\/2021.naacl-main.168"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1002\/j.2168-9830.2004.tb00809.x"},{"key":"e_1_3_2_1_29_1","unstructured":"Weizhen Qi Yu Yan Yeyun Gong Dayiheng Liu Nan Duan Jiusheng Chen Ruofei Zhang and Ming Zhou. 2020. ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training. arXiv:2001.04063 [cs.CL]"},{"key":"e_1_3_2_1_30_1","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever et al. 2019. Language models are unsupervised multitask learners. OpenAI blog 1 8 (2019) 9."},{"key":"e_1_3_2_1_31_1","first-page":"1","article-title":"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J. Liu. 2020. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. Journal of Machine Learning Research 21, 140 (2020), 1--67. http:\/\/jmlr.org\/papers\/v21\/20-074.html","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1162\/coli_a_00360"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","unstructured":"Lei Shen Fandong Meng Jinchao Zhang Yang Feng and Jie Zhou. 2021. GTM: A Generative Triple-wise Model for Conversational Question Generation. In Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers). Association for Computational Linguistics Online 3495--3506. 10.18653\/v1\/2021.acl-long.271","DOI":"10.18653\/v1\/2021.acl-long.271"},{"key":"e_1_3_2_1_34_1","volume-title":"Automatic Generation of Socratic Subquestions for Teaching Math Word Problems. arXiv preprint arXiv:2211.12835","author":"Shridhar Kumar","year":"2022","unstructured":"Kumar Shridhar, Jakub Macina, Mennatallah El-Assady, Tanmay Sinha, Manu Ka-pur, and Mrinmaya Sachan. 2022. Automatic Generation of Socratic Subquestions for Teaching Math Word Problems. arXiv preprint arXiv:2211.12835 (2022)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.5220\/0004795300140025"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11858-020-01130-4"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.484"},{"key":"e_1_3_2_1_38_1","volume-title":"Towards Human-Like Educational Question Generation with Large Language Models. In International Conference on Artificial Intelligence in Education.","author":"Wang Zichao","year":"2022","unstructured":"Zichao Wang, Jakob Valdez, Debshila Basu Mallick, and Richard Baraniuk. 2022. Towards Human-Like Educational Question Generation with Large Language Models. In International Conference on Artificial Intelligence in Education."},{"key":"e_1_3_2_1_39_1","volume-title":"Brian Lester, Nan Du, Andrew M. Dai, and Quoc V. Le.","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Maarten Bosma, Vincent Y. Zhao, Kelvin Guu, Adams Wei Yu, Brian Lester, Nan Du, Andrew M. Dai, and Quoc V. Le. 2022. Finetuned Language Models Are Zero-Shot Learners. arXiv:2109.01652 [cs.CL]"},{"key":"e_1_3_2_1_40_1","volume-title":"Quoc V Le, Denny Zhou, et al.","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed H Chi, Quoc V Le, Denny Zhou, et al. 2022. Chain-of-Thought Prompting Elicits Reasoning in Large Language Models. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_41_1","volume-title":"Sylvain Gugger, Mariama Drame, Quentin Lhoest, and Alexander M. Rush.","author":"Wolf Thomas","year":"2020","unstructured":"Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Clement Delangue, Anthony Moi, Pierric Cistac, Tim Rault, R\u00e9mi Louf, Morgan Funtowicz, Joe Davison, Sam Shleifer, Patrick von Platen, Clara Ma, Yacine Jernite, Julien Plu, Canwen Xu, Teven Le Scao, Sylvain Gugger, Mariama Drame, Quentin Lhoest, and Alexander M. Rush. 2020. HuggingFace's Transformers: State-of-the-art Natural Language Processing. arXiv:1910.03771 [cs.CL]"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2022.3155284"},{"key":"e_1_3_2_1_43_1","volume-title":"BERTScore: Evaluating Text Generation with BERT. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=SkeHuCVFDr","author":"Zhang Tianyi","year":"2020","unstructured":"Tianyi Zhang, Varsha Kishore, Felix Wu*, Kilian Q. Weinberger, and Yoav Artzi. 2020. BERTScore: Evaluating Text Generation with BERT. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=SkeHuCVFDr"}],"event":{"name":"SAC '24: 39th ACM\/SIGAPP Symposium on Applied Computing","location":"Avila Spain","acronym":"SAC '24","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"]},"container-title":["Proceedings of the 39th ACM\/SIGAPP Symposium on Applied Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3605098.3636030","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3605098.3636030","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:59Z","timestamp":1750291439000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3605098.3636030"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,8]]},"references-count":43,"alternative-id":["10.1145\/3605098.3636030","10.1145\/3605098"],"URL":"https:\/\/doi.org\/10.1145\/3605098.3636030","relation":{},"subject":[],"published":{"date-parts":[[2024,4,8]]},"assertion":[{"value":"2024-05-21","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}