{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T01:15:53Z","timestamp":1774314953815,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":26,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3769002.3769962","type":"proceedings-article","created":{"date-parts":[[2026,2,4]],"date-time":"2026-02-04T19:16:19Z","timestamp":1770232579000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Grade Like a Human: Rethinking Automated Assessment with Large Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0857-4426","authenticated-orcid":false,"given":"Wenjing","family":"Xie","sequence":"first","affiliation":[{"name":"City University of Hong Kong, Kowloon, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-8223-4245","authenticated-orcid":false,"given":"Juxin","family":"Niu","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Kowloon, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6431-9868","authenticated-orcid":false,"given":"Chun Jason","family":"Xue","sequence":"additional","affiliation":[{"name":"Mohamed bin Zayed University of Artificial Intelligence, Abu Dhabi, United Arab Emirates"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3775-911X","authenticated-orcid":false,"given":"Nan","family":"Guan","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Kowloon, Hong Kong"}]}],"member":"320","published-online":{"date-parts":[[2026,2,4]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Ateret Anaby-Tavor, Ioana Baldini, Sara E Berger, Bishwaranjan Bhattacharjee, Djallel Bouneffouf, Subhajit Chaudhury, Pin-Yu Chen, Lamogha Chiazor, et al.","author":"Achintalwar Swapnaja","year":"2024","unstructured":"Swapnaja Achintalwar, Adriana Alvarado Garcia, Ateret Anaby-Tavor, Ioana Baldini, Sara E Berger, Bishwaranjan Bhattacharjee, Djallel Bouneffouf, Subhajit Chaudhury, Pin-Yu Chen, Lamogha Chiazor, et al. 2024. Detectors for Safe and Reliable LLMs: Implementations, Uses, and Limitations. arXiv preprint arXiv:2403.06009 (2024)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i21.30363"},{"key":"e_1_3_2_1_3_1","volume-title":"Humans or LLMs as the Judge? A Study on Judgement Biases. arXiv preprint arXiv:2402.10669","author":"Chen Guiming Hardy","year":"2024","unstructured":"Guiming Hardy Chen, Shunian Chen, Ziche Liu, Feng Jiang, and Benyou Wang. 2024. Humans or LLMs as the Judge? A Study on Judgement Biases. arXiv preprint arXiv:2402.10669 (2024)."},{"key":"e_1_3_2_1_4_1","first-page":"1","article-title":"Palm: Scaling language modeling with pathways","volume":"24","author":"Chowdhery Aakanksha","year":"2023","unstructured":"Aakanksha Chowdhery, Sharan Narang, Jacob Devlin, Maarten Bosma, Gaurav Mishra, Adam Roberts, Paul Barham, Hyung Won Chung, Charles Sutton, Sebastian Gehrmann, et al. 2023. Palm: Scaling language modeling with pathways. Journal of Machine Learning Research 24, 240 (2023), 1\u2013113.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i21.30364"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","first-page":"4295","DOI":"10.1007\/s10115-023-01892-9","article-title":"GradeAid: a framework for automatic short answers grading in educational contexts\u2014design, implementation and evaluation","volume":"65","author":"Gobbo Emiliano Del","year":"2023","unstructured":"Emiliano Del Gobbo, Alfonso Guarino, Barbara Cafarelli, and Luca Grilli. 2023. GradeAid: a framework for automatic short answers grading in educational contexts\u2014design, implementation and evaluation. Knowledge and Information Systems 65, 10 (2023), 4295\u20134334.","journal-title":"Knowledge and Information Systems"},{"key":"e_1_3_2_1_7_1","volume-title":"2023 Fifth International Conference on Electrical, Computer and Communication Technologies (ICECCT). IEEE, 1\u20137.","author":"Divya Arunima","year":"2023","unstructured":"Arunima Divya, Vivek Haridas, and Jayasree Narayanan. 2023. Automation of Short Answer Grading Techniques: Comparative Study using Deep Learning Techniques. In 2023 Fifth International Conference on Electrical, Computer and Communication Technologies (ICECCT). IEEE, 1\u20137."},{"key":"e_1_3_2_1_8_1","volume-title":"How Reliable Are Automatic Evaluation Methods for Instruction-Tuned LLMs? arXiv preprint arXiv:2402.10770","author":"Doostmohammadi Ehsan","year":"2024","unstructured":"Ehsan Doostmohammadi, Oskar Holmstr\u00f6m, and Marco Kuhlmann. 2024. How Reliable Are Automatic Evaluation Methods for Instruction-Tuned LLMs? arXiv preprint arXiv:2402.10770 (2024)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.51219\/JAIMLD\/oluwole-fagbohun\/19"},{"key":"e_1_3_2_1_10_1","volume-title":"Felipe Vieira Frujeri, and Ida Momennejad","author":"Hasanbeig Hosein","year":"2023","unstructured":"Hosein Hasanbeig, Hiteshi Sharma, Leo Betthauser, Felipe Vieira Frujeri, and Ida Momennejad. 2023. Allure: A systematic protocol for auditing and improving llm-based evaluation of text using iterative in-context-learning. arXiv preprint arXiv:2309.13701 (2023)."},{"key":"e_1_3_2_1_11_1","volume-title":"RankPrompt: Step-by-Step Comparisons Make Language Models Better Reasoners. arXiv preprint arXiv:2403.12373","author":"Hu Chi","year":"2024","unstructured":"Chi Hu, Yuan Ge, Xiangnan Ma, Hang Cao, Qiang Li, Yonghua Yang, Tong Xiao, and Jingbo Zhu. 2024. RankPrompt: Step-by-Step Comparisons Make Language Models Better Reasoners. arXiv preprint arXiv:2403.12373 (2024)."},{"key":"e_1_3_2_1_12_1","unstructured":"Lei Huang Weijiang Yu Weitao Ma Weihong Zhong Zhangyin Feng Haotian Wang Qianglong Chen Weihua Peng Xiaocheng Feng Bing Qin and Ting Liu. 2023. A Survey on Hallucination in Large Language Models: Principles Taxonomy Challenges and Open Questions. arXiv:2311.05232 [cs.CL]"},{"key":"e_1_3_2_1_13_1","volume-title":"Jauhiainen and Agust\u00edn Garagorry Guerra","author":"Jussi","year":"2024","unstructured":"Jussi S. Jauhiainen and Agust\u00edn Garagorry Guerra. 2024. Evaluating Students' Open-ended Written Responses with LLMs: Using the RAG Framework for GPT-3.5, GPT-4, Claude-3, and Mistral-Large. arXiv:2405.05444 [cs.CL]"},{"key":"e_1_3_2_1_14_1","volume-title":"Fine-tuning chatgpt for automatic scoring. Computers and Education: Artificial Intelligence","author":"Latif Ehsan","year":"2024","unstructured":"Ehsan Latif and Xiaoming Zhai. 2024. Fine-tuning chatgpt for automatic scoring. Computers and Education: Artificial Intelligence (2024), 100210."},{"key":"e_1_3_2_1_15_1","volume-title":"Aligning with Human Judgement: The Role of Pairwise Preference in Large Language Model Evaluators. arXiv preprint arXiv:2403.16950","author":"Liu Yinhong","year":"2024","unstructured":"Yinhong Liu, Han Zhou, Zhijiang Guo, Ehsan Shareghi, Ivan Vulic, Anna Korhonen, and Nigel Collier. 2024. Aligning with Human Judgement: The Role of Pairwise Preference in Large Language Model Evaluators. arXiv preprint arXiv:2403.16950 (2024)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.eacl-long.8"},{"key":"e_1_3_2_1_17_1","volume-title":"Self-refine: Iterative refinement with self-feedback. Advances in Neural Information Processing Systems 36","author":"Madaan Aman","year":"2024","unstructured":"Aman Madaan, Niket Tandon, Prakhar Gupta, Skyler Hallinan, Luyu Gao, Sarah Wiegreffe, Uri Alon, Nouha Dziri, Shrimai Prabhumoye, Yiming Yang, et al. 2024. Self-refine: Iterative refinement with self-feedback. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.5555\/2002472.2002568"},{"key":"e_1_3_2_1_19_1","unstructured":"Filippa Nilsson and Jonatan Tuvstedt. 2023. GPT-4 as an Automatic Grader: The accuracy of grades set by GPT-4 on introductory programming assignments."},{"key":"e_1_3_2_1_20_1","volume-title":"Gonzalez","author":"Patil Shishir G.","year":"2023","unstructured":"Shishir G. Patil, Tianjun Zhang, Xin Wang, and Joseph E. Gonzalez. 2023. Gorilla: Large Language Model Connected with Massive APIs. arXiv:2305.15334 [cs.CL]"},{"key":"e_1_3_2_1_21_1","volume-title":"Is LLM-as-a-Judge Robust? Investigating Universal Adversarial Attacks on Zero-shot LLM Assessment. arXiv preprint arXiv:2402.14016","author":"Raina Vyas","year":"2024","unstructured":"Vyas Raina, Adian Liusie, and Mark Gales. 2024. Is LLM-as-a-Judge Robust? Investigating Universal Adversarial Attacks on Zero-shot LLM Assessment. arXiv preprint arXiv:2402.14016 (2024)."},{"key":"e_1_3_2_1_22_1","unstructured":"Yongliang Shen Kaitao Song Xu Tan Dongsheng Li Weiming Lu and Yueting Zhuang. 2023. HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face. arXiv:2303.17580 [cs.CL]"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TLT.2024.3396873"},{"key":"e_1_3_2_1_24_1","unstructured":"LangChain Team. 2024. LangChain. https:\/\/www.langchain.com\/ Accessed: 2025-08-25."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Rui Wang Hongru Wang Fei Mi Yi Chen Boyang Xue Kam-Fai Wong and Ruifeng Xu. 2024. Enhancing Large Language Models Against Inductive Instructions with Dual-critique Prompting. arXiv:2305.13733 [cs.CL]","DOI":"10.18653\/v1\/2024.naacl-long.299"},{"key":"e_1_3_2_1_26_1","volume-title":"Short Answer Grading Using One-shot Prompting and Text Similarity Scoring Model. arXiv preprint arXiv:2305.18638","author":"Yoon Su-Youn","year":"2023","unstructured":"Su-Youn Yoon. 2023. Short Answer Grading Using One-shot Prompting and Text Similarity Scoring Model. arXiv preprint arXiv:2305.18638 (2023)."}],"event":{"name":"RACS '25: International Conference on Research in Adaptive and Convergent Systems","location":"Ho Chi Minh Vietnam","acronym":"RACS '25","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"]},"container-title":["Proceedings of the International Conference on Research in Adaptive and Convergent Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3769002.3769962","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,4]],"date-time":"2026-02-04T19:17:24Z","timestamp":1770232644000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3769002.3769962"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,16]]},"references-count":26,"alternative-id":["10.1145\/3769002.3769962","10.1145\/3769002"],"URL":"https:\/\/doi.org\/10.1145\/3769002.3769962","relation":{},"subject":[],"published":{"date-parts":[[2025,11,16]]},"assertion":[{"value":"2026-02-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}