{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,17]],"date-time":"2026-06-17T20:08:19Z","timestamp":1781726899470,"version":"3.54.5"},"reference-count":67,"publisher":"Elsevier BV","issue":"1","license":[{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T00:00:00Z","timestamp":1776816000000},"content-version":"vor","delay-in-days":417,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["International Journal of Artificial Intelligence in Education"],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1007\/s40593-024-00403-3","type":"journal-article","created":{"date-parts":[[2024,5,5]],"date-time":"2024-05-05T18:03:14Z","timestamp":1714932194000},"page":"367-397","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":24,"title":["GPT-4 in Education: Evaluating Aptness, Reliability, and Loss of Coherence in Solving Calculus Problems and Grading Submissions"],"prefix":"10.1016","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6956-7513","authenticated-orcid":false,"given":"Alberto","family":"Gandolfi","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1007\/s40593-024-00403-3_bib1","unstructured":"AoPSOnline. (2022). 2021 SMT Team Round - Stanford Math Tournament. Retrieved March 30, 2023, from https:\/\/artofproblemsolving.com\/community\/c4h2775229p24340532."},{"key":"10.1007\/s40593-024-00403-3_bib2","series-title":"International Conference on Artificial Intelligence in Education","first-page":"685","article-title":"Enhancing auto-scoring of student open responses in the presence of mathematical terms and expressions","author":"Baral","year":"2022"},{"key":"10.1007\/s40593-024-00403-3_bib3","article-title":"Improving Automated Scoring of Student Open Responses in Mathematics","author":"Baral","year":"2021","journal-title":"International Educational Data Mining Society"},{"key":"10.1007\/s40593-024-00403-3_bib4","article-title":"Auto-Scoring Student Responses with Images in Mathematics","author":"Baral","year":"2023","journal-title":"International Educational Data Mining Society"},{"key":"10.1007\/s40593-024-00403-3_bib5","series-title":"Personalized education and artificial intelligence in the United States, China, and India: A systematic review using a Human-In-The-Loop model. Computers and Education: Artificial Intelligence.","first-page":"100068","author":"Bhutoria","year":"2022"},{"issue":"6","key":"10.1007\/s40593-024-00403-3_bib6","doi-asserted-by":"crossref","DOI":"10.1073\/pnas.2218523120","article-title":"Using cognitive psychology to understand GPT-3","volume":"120","author":"Binz","year":"2023","journal-title":"Proceedings of the National Academy of Sciences"},{"issue":"3","key":"10.1007\/s40593-024-00403-3_bib7","doi-asserted-by":"crossref","first-page":"823","DOI":"10.1111\/jcal.12793","article-title":"Leveraging natural language processing to support automated assessment and feedback for student open responses in mathematics","volume":"39","author":"Botelho","year":"2023","journal-title":"Journal of Computer Assisted Learning"},{"key":"10.1007\/s40593-024-00403-3_bib8","unstructured":"Bubeck, S\u00e9bastien, et al. (2023) Sparks of artificial general intelligence: Early experiments with gpt-4. arXiv preprint arXiv:2303.12712."},{"key":"10.1007\/s40593-024-00403-3_bib9","series-title":"Human resource management in the age of generative artificial intelligence: Perspectives and research directions on ChatGPT. Human Resource Management Journal.","author":"Budhwar","year":"2023"},{"issue":"2","key":"10.1007\/s40593-024-00403-3_bib10","article-title":"Enough of the chit-chat: A comparative analysis of four AI chatbots for calculus and statistics","volume":"6","author":"Calonge","year":"2023","journal-title":"Journal of Applied Learning and Teaching"},{"key":"10.1007\/s40593-024-00403-3_bib11","doi-asserted-by":"crossref","DOI":"10.3389\/fpsyg.2024.1221177","article-title":"Grading by AI Makes Me Feel Fairer? How Different Evaluators Affect College Students\u2019 Perception of Fairness","volume":"15","author":"Chai","year":"2024","journal-title":"Frontiers in Psychology"},{"key":"10.1007\/s40593-024-00403-3_bib12","unstructured":"Chat-GPT-LangChain (2023). Retrieved February 12, 2023, from https:\/\/huggingface.co\/spaces\/JavaFXpert\/Chat-GPT-LangChain"},{"key":"10.1007\/s40593-024-00403-3_bib13","doi-asserted-by":"crossref","first-page":"75264","DOI":"10.1109\/ACCESS.2020.2988510","article-title":"Artificial intelligence in education: A review","volume":"8","author":"Chen","year":"2020","journal-title":"IEEE Access"},{"key":"10.1007\/s40593-024-00403-3_bib14","doi-asserted-by":"crossref","unstructured":"Chen, L., Zaharia, M., & Zou, J. (2023). How is ChatGPT\u2019s behavior changing over time? arXiv preprint arXiv:2307.09009.","DOI":"10.1162\/99608f92.5317da47"},{"key":"10.1007\/s40593-024-00403-3_bib15","unstructured":"Chen, M., et al. (2021). Evaluating large language models trained on code. arXiv preprint arXiv:2107.03374."},{"key":"10.1007\/s40593-024-00403-3_bib16","series-title":"Certified programming with dependent types: A pragmatic introduction to the Coq proof assistant.","author":"Chlipala","year":"2022"},{"key":"10.1007\/s40593-024-00403-3_bib17","unstructured":"Chung, H. W., et al. (2022). Scaling Instruction-Finetuned Language Models. arXiv preprint arXiv:2210.11416."},{"key":"10.1007\/s40593-024-00403-3_bib18","unstructured":"Cobbe, K., Kosaraju, V., Bavarian, M., Chen, M., Jun, H., Kaiser, L., \u2026 & Tworek, J. (2021). Training verifiers to solve math word problems. arXiv preprint arXiv:2110.14168."},{"issue":"1","key":"10.1007\/s40593-024-00403-3_bib19","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s41239-023-00392-8","article-title":"Artificial intelligence in higher education: The state of the field","volume":"20","author":"Crompton","year":"2023","journal-title":"International Journal of Educational Technology in Higher Education"},{"key":"10.1007\/s40593-024-00403-3_bib20","doi-asserted-by":"crossref","unstructured":"Crothers, E., Japkowicz, N., & Viktor, H. (2022). Machine Generated Text: A Comprehensive Survey of Threat Models and Detection Methods. arXiv preprint arXiv:2210.07321.","DOI":"10.1109\/ACCESS.2023.3294090"},{"key":"10.1007\/s40593-024-00403-3_bib21","unstructured":"Dao, X.-Q., & Le, N.-B. (2023). Investigating the Effectiveness of ChatGPT in Mathematical Reasoning and Problem Solving: Evidence from the Vietnamese National High School Graduation Examination. arXiv preprint arXiv:2306.06331."},{"key":"10.1007\/s40593-024-00403-3_bib22","doi-asserted-by":"crossref","unstructured":"Dao, X.-Q. (2023). Which Large Language Model should You Use in Vietnamese Education: ChatGPT, Bing Chat, or Bard? Bing Chat, or Bard.","DOI":"10.2139\/ssrn.4527476"},{"key":"10.1007\/s40593-024-00403-3_bib23","unstructured":"Davis, E., & Aaronson, S. (2023). Testing GPT-4 with Wolfram Alpha and Code Interpreter plug-ins on math and science problems. arXiv preprint arXiv:2308.05713."},{"issue":"4","key":"10.1007\/s40593-024-00403-3_bib24","article-title":"ChatGPT in Education: Empowering educators through methods for recognition and assessment","volume":"10","year":"2023","journal-title":"Informatics"},{"issue":"8","key":"10.1007\/s40593-024-00403-3_bib25","doi-asserted-by":"crossref","first-page":"1061","DOI":"10.1080\/0020739X.2010.493241","article-title":"Teaching calculus with Wolfram| Alpha","volume":"41","author":"Dimiceli","year":"2010","journal-title":"International Journal of Mathematical Education in Science and Technology"},{"key":"10.1007\/s40593-024-00403-3_bib26","doi-asserted-by":"crossref","unstructured":"Erickson, J. A., Botelho, A. F., McAteer, S., Varatharaj, A., & Heffernan, N. T. (2020). The automated grading of student open responses in mathematics Proceedings of the Tenth International Conference on Learning Analytics & Knowledge, Frankfurt, Germany.","DOI":"10.1145\/3375462.3375523"},{"key":"10.1007\/s40593-024-00403-3_bib27","unstructured":"European Commission (2023). Ethical Guidelines on the Use of Artificial Intelligence (AI) and Data in Teaching and Learning for Educators. Available online: https:\/\/op.europa.eu\/en\/publication-detail\/-\/publication\/d81a0d54-5348-11ed-92ed-01aa75ed71a1\/language-en (accessed on February 12, 2024)."},{"key":"10.1007\/s40593-024-00403-3_bib28","doi-asserted-by":"crossref","first-page":"232","DOI":"10.18653\/v1\/2023.bea-1.20","article-title":"Towards Extracting and Understanding the Implicit Rubrics of Transformer Based Automatic Essay Scoring Models","author":"Fiacco","year":"2023","journal-title":"Proceedings of the 18th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2023)"},{"key":"10.1007\/s40593-024-00403-3_bib29","article-title":"Mathematical capabilities of chatgpt","volume":"36","author":"Frieder","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1007\/s40593-024-00403-3_bib30","series-title":"Comparing scientific abstracts generated by ChatGPT to original abstracts using an artificial intelligence output detector, plagiarism detector, and blinded human reviewers. bioRxiv","author":"Gao","year":"2022"},{"key":"10.1007\/s40593-024-00403-3_bib31","doi-asserted-by":"crossref","unstructured":"Gao, R.; Merzdorf, H.E.; Anwar, S.; Hipwell, M.C.; Srinivasa, A. (2023) Automatic Assessment of Text-Based Responses in Post-Secondary Education: A Systematic Review. arXiv 2023, arXiv:2308.16151.","DOI":"10.1016\/j.caeai.2024.100206"},{"key":"10.1007\/s40593-024-00403-3_bib32","doi-asserted-by":"crossref","first-page":"30583","DOI":"10.52202\/068431-2217","article-title":"What can transformers learn in-context? A case study of simple function classes","volume":"35","author":"Garg","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1007\/s40593-024-00403-3_bib33","doi-asserted-by":"crossref","unstructured":"Ghaith, S. (2024). The Triple Attention Transformer: Advancing Contextual Coherence in Transformer Models. De Winter, Joost CF, Dimitra Dodou, and Arno HA Stienen (2023). \"ChatGPT in Education: Empowering educators through methods for recognition and assessment.\" Informatics. Vol. 10. No. 4. MDPI.","DOI":"10.3390\/informatics10040087"},{"key":"10.1007\/s40593-024-00403-3_bib34","unstructured":"Huang, J., Chen, X., Mishra, S., Zheng, H. S., Yu, A. W., Song, X., & Zhou, D. (2023). Large language models cannot self-correct reasoning yet. arXiv preprint arXiv:2310.01798."},{"issue":"1","key":"10.1007\/s40593-024-00403-3_bib35","doi-asserted-by":"crossref","first-page":"12187","DOI":"10.1038\/s41598-023-38964-3","article-title":"Perception, performance, and detectability of conversational artificial intelligence across 32 university courses","volume":"13","author":"Ibrahim","year":"2023","journal-title":"Scientific Reports"},{"key":"10.1007\/s40593-024-00403-3_bib36","series-title":"The impact of artificial intelligence on learning, teaching, and education.","author":"Ilkka","year":"2018"},{"key":"10.1007\/s40593-024-00403-3_bib37","unstructured":"Jacob, A. (2023). The Impact of Context Window Limitation on AI and Insights from GPT. Published online: www.linkedin.com\/pulse\/impact-context-window-limitation-ai-insights-from-gpt-jacob-adm\/."},{"key":"10.1007\/s40593-024-00403-3_bib38","series-title":"International Journal of Artificial Intelligence in Education.","first-page":"1","article-title":"How to Harness Generative AI to Accelerate Human Learning","author":"Johnson","year":"2023"},{"key":"10.1007\/s40593-024-00403-3_bib39","doi-asserted-by":"crossref","first-page":"102274","DOI":"10.1016\/j.lindif.2023.102274","article-title":"ChatGPT for good? On opportunities and challenges of large language models for education","volume":"103","author":"Kasneci","year":"2023","journal-title":"Learning and individual differences"},{"key":"10.1007\/s40593-024-00403-3_bib40","unstructured":"Keely, S. J. (2009). Writing Mathematical Expressions in Plain Text \u2013 Examples and Cautions. Retrieved February 11, 2023, from https:\/\/www.integreat.ca\/OL\/docs\/WritingMathPlainText.pdf."},{"key":"10.1007\/s40593-024-00403-3_bib41","unstructured":"Kumar, K. (2023). Geotechnical Parrot Tales (GPT): Overcoming GPT hallucinations with prompt engineering for geotechnical applications. arXiv preprint arXiv:2304.02138."},{"key":"10.1007\/s40593-024-00403-3_bib42","first-page":"3843","article-title":"Solving quantitative reasoning problems with language models","volume":"35","author":"Lewkowycz","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"9","key":"10.1007\/s40593-024-00403-3_bib43","first-page":"1","article-title":"Pre-train, prompt, and predict: A systematic survey of prompting methods in natural language processing","volume":"55","author":"Liu","year":"2023","journal-title":"ACM Computing Surveys"},{"issue":"2","key":"10.1007\/s40593-024-00403-3_bib44","doi-asserted-by":"crossref","first-page":"100050","DOI":"10.1016\/j.rmal.2023.100050","article-title":"Exploring the potential of using an AI language model for automated essay scoring","volume":"2","author":"Mizumoto","year":"2023","journal-title":"Research Methods in Applied Linguistics"},{"key":"10.1007\/s40593-024-00403-3_bib45","series-title":"Artificial Intelligence in Education: 20th International Conference, AIED 2019, Chicago, IL, USA, June 25\u201329, 2019, Proceedings, Part II 20","first-page":"365","article-title":"A machine learning grading system using chatbots","author":"Ndukwe","year":"2019"},{"key":"10.1007\/s40593-024-00403-3_bib46","unstructured":"Nilsson, F., & Tuvstedt, J. (2023). GPT-4 as an Automatic Grader: The accuracy of grades set by GPT-4 on introductory programming assignments."},{"key":"10.1007\/s40593-024-00403-3_bib47","first-page":"100033","article-title":"Chatbots applications in education: A systematic review","volume":"2","author":"Okonkwo","year":"2021","journal-title":"Computers and Education: Artificial Intelligence"},{"key":"10.1007\/s40593-024-00403-3_bib48","unstructured":"OpenAI. (2023). GPT-4 Technical Report. arXiv preprint arXiv:2303.08774."},{"key":"10.1007\/s40593-024-00403-3_bib49","unstructured":"Ouyang, L., et al. (2022). Training language models to follow instructions with human feedback. arXiv preprint arXiv:2203.02155."},{"key":"10.1007\/s40593-024-00403-3_bib50","doi-asserted-by":"crossref","first-page":"1529","DOI":"10.3758\/s13423-019-01649-y","article-title":"Age-related differences in recall and recognition: A meta-analysis","volume":"26","author":"Rhodes","year":"2019","journal-title":"Psychonomic Bulletin & Review"},{"issue":"1","key":"10.1007\/s40593-024-00403-3_bib51","doi-asserted-by":"crossref","first-page":"88","DOI":"10.1007\/s40593-022-00289-z","article-title":"Towards trustworthy autograding of short, multi-lingual, multi-type answers","volume":"33","author":"Schneider","year":"2023","journal-title":"International Journal of Artificial Intelligence in Education"},{"key":"10.1007\/s40593-024-00403-3_bib52","first-page":"1","article-title":"Investigating the impact of backward strategy learning in a logic tutor: Aiding subgoal learning towards improved problem solving","author":"Shabrina","year":"2023","journal-title":"International Journal of Artificial Intelligence in Education"},{"key":"10.1007\/s40593-024-00403-3_bib53","unstructured":"Srivastava, A., et al. (2022). Beyond the imitation game: Quantifying and extrapolating the capabilities of language models. arXiv preprint arXiv:2206.04615."},{"key":"10.1007\/s40593-024-00403-3_bib54","series-title":"Calculus: Early transcendentals. International metric edition. 9th Edition.","first-page":"759","author":"Stewart","year":"2020"},{"key":"10.1007\/s40593-024-00403-3_bib55","series-title":"A student\u2019s take on challenges of AI-driven grading in higher education. B.S. thesis","author":"Stoica","year":"2022"},{"key":"10.1007\/s40593-024-00403-3_bib56","doi-asserted-by":"crossref","unstructured":"Suzgun, et al. (2022). Challenging BIG-Bench tasks and whether chain-of-thought can solve them. arXiv preprint arXiv:2210.09261.","DOI":"10.18653\/v1\/2023.findings-acl.824"},{"key":"10.1007\/s40593-024-00403-3_bib57","unstructured":"Tamkin, A., Brundage, M., Clark, J., & Ganguli, D. (2021). Understanding the capabilities, limitations, and societal impact of large language models. arXiv preprint arXiv:2102.02503."},{"key":"10.1007\/s40593-024-00403-3_bib58","doi-asserted-by":"crossref","DOI":"10.1007\/s41469-023-00153-x","article-title":"Designing Human Resource Management Systems in the Age of AI","author":"Tinguely","year":"2023","journal-title":"Journal of Organization Design, Forthcoming."},{"key":"10.1007\/s40593-024-00403-3_bib59","series-title":"International Conference on Document Analysis and Recognition.","article-title":"\u201cCan Pre-trained Language Models Help in Understanding Handwritten Symbols?.\u201d","author":"Tiwari","year":"2023"},{"key":"10.1007\/s40593-024-00403-3_bib60","first-page":"30","article-title":"Attention is all you need","author":"Vaswani","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1007\/s40593-024-00403-3_bib61","doi-asserted-by":"crossref","unstructured":"Vig, J. (2019). A multiscale visualization of attention in the transformer model. arXiv preprint arXiv:1906.05714.","DOI":"10.18653\/v1\/P19-3007"},{"key":"10.1007\/s40593-024-00403-3_bib62","series-title":"Theorem Proving in Higher Order Logics: 21st International Conference, TPHOLs 2008","first-page":"33","article-title":"The Isabelle framework","author":"Wenzel","year":"2008"},{"key":"10.1007\/s40593-024-00403-3_bib63","series-title":"National CCF Conference on Natural Language Processing and Chinese Computing","first-page":"3","article-title":"Deep learning for natural language processing: Advantages and challenges","author":"Liu","year":"2020"},{"key":"10.1007\/s40593-024-00403-3_bib64","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1613\/jair.1.12862","article-title":"Can we automate scientific reviewing?","volume":"75","author":"Yuan","year":"2022","journal-title":"Journal of Artificial Intelligence Research"},{"key":"10.1007\/s40593-024-00403-3_bib65","unstructured":"Zhang, M., Baral, S., Heffernan, N., & Lan, A. (2022). Automatic short math answer grading via in-context meta-learning. arXiv preprint arXiv:2205.15219."},{"key":"10.1007\/s40593-024-00403-3_bib66","unstructured":"Zhang, T., Zhang, Y., Vineet, V., Joshi, N., & Wang, X. (2023). Controllable Text-to-Image Generation with GPT-4. arXiv preprint arXiv:2305.18583."},{"key":"10.1007\/s40593-024-00403-3_bib67","doi-asserted-by":"crossref","unstructured":"Zhou, J., M\u00fcller, H., Holzinger, A., & Chen, F. (2023). Ethical ChatGPT: Concerns, challenges, and commandments. arXiv preprint arXiv:2305.10646.","DOI":"10.3390\/electronics13173417"}],"container-title":["International Journal of Artificial Intelligence in Education"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40593-024-00403-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40593-024-00403-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1560429226001022?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1560429226001022?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40593-024-00403-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,18]],"date-time":"2026-05-18T06:23:18Z","timestamp":1779085398000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1560429226001022"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3]]},"references-count":67,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,3]]}},"alternative-id":["S1560429226001022"],"URL":"https:\/\/doi.org\/10.1007\/s40593-024-00403-3","relation":{},"ISSN":["1560-4292"],"issn-type":[{"value":"1560-4292","type":"print"}],"subject":[],"published":{"date-parts":[[2025,3]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"GPT-4 in Education: Evaluating Aptness, Reliability, and Loss of Coherence in Solving Calculus Problems and Grading Submissions","name":"articletitle","label":"Article Title"},{"value":"International Journal of Artificial Intelligence in Education","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1007\/s40593-024-00403-3","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"Copyright \u00a9 2024 The Author(s). Published by Elsevier Ltd","name":"copyright","label":"Copyright"}]}}