{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,24]],"date-time":"2025-10-24T16:49:57Z","timestamp":1761324597572,"version":"3.28.0"},"reference-count":37,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,6,30]],"date-time":"2024-06-30T00:00:00Z","timestamp":1719705600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,6,30]],"date-time":"2024-06-30T00:00:00Z","timestamp":1719705600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,6,30]]},"DOI":"10.1109\/ijcnn60899.2024.10650668","type":"proceedings-article","created":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T17:35:05Z","timestamp":1725903305000},"page":"1-9","source":"Crossref","is-referenced-by-count":1,"title":["Exploring and Improving Consistency in Large Language Models for Multiple-Choice Question Assessment"],"prefix":"10.1109","author":[{"given":"Wenjie","family":"Zhou","sequence":"first","affiliation":[{"name":"Soochow University,School of Computer Science and Technology,Soochow,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiangyu","family":"Duan","sequence":"additional","affiliation":[{"name":"Soochow University,School of Computer Science and Technology,Soochow,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1877","article-title":"Language models are few-shot learners[J]","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in neural information processing systems"},{"journal-title":"Llama 2: Open foundation and fine-tuned chat models[J]","year":"2023","author":"Touvron","key":"ref2"},{"key":"ref3","first-page":"27730","article-title":"Training language models to follow instructions with human feedback[J]","volume":"35","author":"Ouyang","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref4","first-page":"3","volume-title":"ICML 2011 Workshop on New Developments in Imitation Learning (July 2011)","volume":"855","author":"Knox"},{"journal-title":"Proximal policy optimization algorithms[J]","year":"2017","author":"Schulman","key":"ref5"},{"key":"ref6","first-page":"301","article-title":"Why can GPT learn in-context? language models secretly perform gradient descent as metaoptimizers[C]","volume-title":"Findings of the Association for Computational Linguistics: ACL 2023. 2023: 4005-4019. August 1987 [Digests 9th Annual Conf. Magnetics Japan","author":"Dai"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3560815"},{"journal-title":"Glm-130b: An open bilingual pretrained model[J]","year":"2022","author":"Zeng","key":"ref8"},{"journal-title":"Measuring massive multitask language understanding[J]","year":"2020","author":"Hendrycks","key":"ref9"},{"journal-title":"C-eval: A multi-level multidiscipline chinese evaluation suite for foundation models[J]","year":"2023","author":"Huang","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.234"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-naacl.149"},{"volume-title":"Training verifiers to solve math word problems, 2021[J]","year":"2021","author":"Cobbe","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.824"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.229"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.991"},{"journal-title":"Evaluating task understanding through multilingual consistency: A ChatGPT case study[J]","year":"2023","author":"Ohmer","key":"ref17"},{"journal-title":"Judging LLM-as-ajudge with MT-Bench and Chatbot Arena[J]","year":"2023","author":"Zheng","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3641289"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_x_00455"},{"journal-title":"Assessing Hidden Risks of LLMs: An Empirical Study on Robustness, Consistency, and Credibility[J]","year":"2023","author":"Ye","key":"ref21"},{"journal-title":"Evaluating the Decency and Consistency of Data Validation Tests Generated by LLMs[J]","year":"2023","author":"Alexander","key":"ref22"},{"journal-title":"A prompt pattern catalog to enhance prompt engineering with chatgpt[J]","year":"2023","author":"White","key":"ref23"},{"article-title":"Large language models are not robust multiple choice selectors[C]","volume-title":"The Twelfth International Conference on Learning Representations","author":"Zheng","key":"ref24"},{"key":"ref25","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models[J]","volume":"35","author":"Wei","year":"2022","journal-title":"Advances in neural information processing systems"},{"key":"ref26","first-page":"22199","article-title":"Large language models are zero-shot reasoners[J]","volume":"35","author":"Kojima","year":"2022","journal-title":"Advances in neural information processing systems"},{"journal-title":"\u201cMy Answer is C\u201d: First-Token Probabilities Do Not Match Text Answers in Instruction-Tuned Language Models[J]","year":"2024","author":"Wang","key":"ref27"},{"journal-title":"Self-instruct: Aligning language models with self-generated instructions[J]","year":"2022","author":"Wang","key":"ref28"},{"journal-title":"Dola: Decoding by contrasting layers improves factuality in large language models[J]","year":"2023","author":"Chuang","key":"ref29"},{"journal-title":"Opt-iml: Scaling language model instruction meta learning through the lens of generalization[J]","year":"2022","author":"Iyer","key":"ref30"},{"key":"ref31","first-page":"36","article-title":"Principle-driven self-alignment of language models from scratch with minimal human supervision[J]","author":"Sun","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_x_00455"},{"journal-title":"Platypus: Quick, cheap, and powerful refinement of llms[J]","year":"2023","author":"Lee","key":"ref33"},{"journal-title":"Lora: Low-rank adaptation of large language models[J]","year":"2021","author":"Hu","key":"ref34"},{"journal-title":"Qwen-vl: A frontier large vision-language model with versatile abilities[J]","year":"2023","author":"Bai","key":"ref35"},{"journal-title":"Baichuan 2: Open large-scale language models[J]","year":"2023","author":"Yang","key":"ref36"},{"journal-title":"Holistic evaluation of language models[J]","year":"2022","author":"Liang","key":"ref37"}],"event":{"name":"2024 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2024,6,30]]},"location":"Yokohama, Japan","end":{"date-parts":[[2024,7,5]]}},"container-title":["2024 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10649807\/10649898\/10650668.pdf?arnumber=10650668","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T05:42:41Z","timestamp":1725946961000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10650668\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,30]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/ijcnn60899.2024.10650668","relation":{},"subject":[],"published":{"date-parts":[[2024,6,30]]}}}