{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T10:06:59Z","timestamp":1776679619339,"version":"3.51.2"},"publisher-location":"Singapore","reference-count":41,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819570775","type":"print"},{"value":"9789819570782","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-7078-2_37","type":"book-chapter","created":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T09:24:46Z","timestamp":1776677086000},"page":"578-591","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Bridging Confidence and\u00a0Competence: Evaluating Self-assessment Alignment in\u00a0LLM Mathematical Reasoning"],"prefix":"10.1007","author":[{"given":"Mingze","family":"Zhong","sequence":"first","affiliation":[]},{"given":"Zijing","family":"Shi","sequence":"additional","affiliation":[]},{"given":"Ziyan","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Runze","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Meng","family":"Fang","sequence":"additional","affiliation":[]},{"given":"Ling","family":"Chen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,4,21]]},"reference":[{"key":"37_CR1","doi-asserted-by":"crossref","unstructured":"Ahn, J., Verma, R., Lou, R., Liu, D., Zhang, R., Yin, W.: Large language models for mathematical reasoning: progresses and challenges (2024)","DOI":"10.18653\/v1\/2024.eacl-srw.17"},{"key":"37_CR2","unstructured":"Azerbayev, Z., et al.: Llemma: an open language model for mathematics (2023)"},{"key":"37_CR3","doi-asserted-by":"crossref","unstructured":"Benedetto, L.: A quantitative study of NLP approaches to question difficulty estimation. In: International Conference on Artificial Intelligence in Education, pp. 428\u2013434. Springer (2023)","DOI":"10.1007\/978-3-031-36336-8_67"},{"key":"37_CR4","unstructured":"Boratyn, D., S\u0142omczy\u0144ski, W., Stolicki, D., Szufa, S.: Spoiler susceptibility in multi-district party elections (2022)"},{"key":"37_CR5","unstructured":"Chen, X., et al.: Universal self-consistency for large language model generation (2023)"},{"key":"37_CR6","doi-asserted-by":"crossref","unstructured":"Deuschel, J., Foltyn, A., Roscher, K., Scheele, S.: The role of uncertainty quantification for trustworthy AI. In: Unlocking Artificial Intelligence: From Theory to Applications, pp. 95\u2013115. Springer (2024)","DOI":"10.1007\/978-3-031-64832-8_5"},{"key":"37_CR7","unstructured":"Dutulescu, A., Ruseti, S., Dascalu, M., Mcnamara, D.: How hard can this question be? An exploratory analysis of features assessing question difficulty using LLMs. In: Proceedings of the 17th International Conference on Educational Data Mining, pp. 802\u2013808 (2024)"},{"issue":"1","key":"37_CR8","doi-asserted-by":"publisher","first-page":"1392","DOI":"10.1038\/s41597-025-05283-3","volume":"12","author":"M Fang","year":"2025","unstructured":"Fang, M., Wan, X., Lu, F., Xing, F., Zou, K.: Mathodyssey: benchmarking mathematical problem-solving skills in large language models using odyssey math data. Sci. Data 12(1), 1392 (2025)","journal-title":"Sci. Data"},{"key":"37_CR9","unstructured":"Gu, J., et al.: A survey on LLM-as-a-judge (2024)"},{"key":"37_CR10","unstructured":"Hendrycks, D., et al.: Measuring mathematical problem solving with the math dataset (2021)"},{"key":"37_CR11","unstructured":"Huang, J., et al.: Large language models cannot self-correct reasoning yet (2023)"},{"issue":"1","key":"37_CR12","doi-asserted-by":"publisher","first-page":"18","DOI":"10.3847\/1538-4357\/abebd8","volume":"912","author":"P Judge","year":"2021","unstructured":"Judge, P., Casini, R., Paraschiv, A.: On single-point inversions of magnetic dipole lines in the corona. Astrophys. J. 912(1), 18 (2021)","journal-title":"Astrophys. J."},{"key":"37_CR13","unstructured":"Kadavath, S., et al.: Language models (mostly) know what they know (2022)"},{"key":"37_CR14","doi-asserted-by":"publisher","first-page":"1417","DOI":"10.1162\/tacl_a_00713","volume":"12","author":"R Kamoi","year":"2024","unstructured":"Kamoi, R., Zhang, Y., Zhang, N., Han, J., Zhang, R.: When can LLMs actually correct their own mistakes? A critical survey of self-correction of LLMs. Trans. Assoc. Comput. Linguist. 12, 1417\u20131440 (2024)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"37_CR15","unstructured":"Kapoor, S., et al.: Large Language Models Must Be Taught to Know What They Don\u2019t Know (2024)"},{"key":"37_CR16","doi-asserted-by":"crossref","unstructured":"Kumar, A., Morabito, R., Umbet, S., Kabbara, J., Emami, A.: Confidence Under the Hood: An Investigation into the Confidence-Probability Alignment in Large Language Models (2024)","DOI":"10.18653\/v1\/2024.acl-long.20"},{"key":"37_CR17","doi-asserted-by":"crossref","unstructured":"Lewkowycz, A., et al.: Solving quantitative reasoning problems with language models. In: Advances in Neural Information Processing Systems, vol. 35, pp. 3843\u20133857 (2022)","DOI":"10.52202\/068431-0278"},{"key":"37_CR18","unstructured":"Li, L., et al.: Confidence matters: revisiting intrinsic self-correction capabilities of large language models (2024)"},{"key":"37_CR19","unstructured":"Lin, Z., Trivedi, S., Sun, J.: Generating with confidence: uncertainty quantification for black-box large language models (2023)"},{"key":"37_CR20","unstructured":"Liu, M., Bo, S., Fang, J.: Enhancing Mathematical Reasoning in Large Language Models with Self-Consistency-Based Hallucination Detection (2025)"},{"key":"37_CR21","unstructured":"Liu, W., et al.: Mathematical language models: a survey (2023)"},{"key":"37_CR22","unstructured":"Myers, V., Biyik, E., Anari, N., Sadigh, D.: Learning multimodal rewards from rankings. In: Conference on Robot Learning, pp. 342\u2013352. PMLR (2022)"},{"key":"37_CR23","unstructured":"OpenAI. GPT-4 technical report (2023)"},{"key":"37_CR24","doi-asserted-by":"crossref","unstructured":"Park, J.-W., Park, S.-J., Won, H.-S., Kim, K.-M.: Large language models are students at various levels: zero-shot question difficulty estimation. In: Findings of the ACL: EMNLP 2024, pp. 8157\u20138177 (2024)","DOI":"10.18653\/v1\/2024.findings-emnlp.477"},{"key":"37_CR25","unstructured":"Raz, T., Luchini, S., Beaty, R., Kenett, Y.: Bridging the measurement gap: a large language model method of assessing open-ended question complexity. In: Proceedings of the Annual Meeting of the Cognitive Science Society, vol. 46 (2024)"},{"key":"37_CR26","unstructured":"Ren, J., Zhao, Y., Vu, T., Liu, P., Lakshminarayanan, B.: Self-evaluation improves selective generation in large language models. In: Proceedings of Machine Learning Research, pp. 49\u201364 (2023)"},{"key":"37_CR27","doi-asserted-by":"crossref","unstructured":"Renze, M., Guven, E.: Self-reflection in LLM agents: effects on problem-solving performance (2024)","DOI":"10.1109\/FLLM63129.2024.10852426"},{"key":"37_CR28","doi-asserted-by":"crossref","unstructured":"Shorinwa, O., Mei, Z., Lidard, J., Ren, A.Z., Majumdar, A.: A survey on uncertainty quantification of large language models: taxonomy, open research challenges, and future directions (2024)","DOI":"10.1145\/3744238"},{"key":"37_CR29","doi-asserted-by":"crossref","unstructured":"Taubenfeld, A., et al.: Confidence Improves Self-Consistency in LLMs (2025)","DOI":"10.18653\/v1\/2025.findings-acl.1030"},{"key":"37_CR30","doi-asserted-by":"crossref","unstructured":"Tian, K., et al.: Just ask for calibration: strategies for eliciting calibrated confidence scores from language models fine-tuned with human feedback (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.330"},{"key":"37_CR31","unstructured":"Touvron, H., et al.: LLaMA: open and efficient foundation language models (2023)"},{"key":"37_CR32","doi-asserted-by":"crossref","unstructured":"Ulmer, D., Gubri, M., Lee, H., Yun, S., Oh, S.: Calibrating large language models using their generations only (2024)","DOI":"10.18653\/v1\/2024.acl-long.824"},{"key":"37_CR33","unstructured":"Wang, K., et al.: Mathcoder: seamless code integration in LLMs for enhanced mathematical reasoning (2023)"},{"key":"37_CR34","unstructured":"Wang, X., et al.: Self-consistency improves chain of thought reasoning in language models (2022)"},{"key":"37_CR35","unstructured":"Wu, Y., Sun, Z., Li, S., Welleck, S., Yang, Y.: Inference scaling laws: an empirical analysis of compute-optimal inference for LLM problem-solving. In: Proceedings of ICLR 2025 (2025)"},{"key":"37_CR36","unstructured":"Xiong, M., et al.: Can LLMs express their uncertainty? An empirical evaluation of confidence elicitation in LLMs (2023)"},{"key":"37_CR37","unstructured":"Yang, A., et al.: Qwen2.5-math technical report: toward mathematical expert model via self-improvement (2024)"},{"key":"37_CR38","unstructured":"Yang, D., Tsai, Y.-H., Yamada, M.: On Verbalized Confidence Scores for LLMs (2024)"},{"key":"37_CR39","doi-asserted-by":"crossref","unstructured":"Ye, F., et al.: Benchmarking LLMs via uncertainty quantification. In: Advances in Neural Information Processing Systems, vol. 37, pp. 15356\u201315385 (2024)","DOI":"10.52202\/079017-0491"},{"key":"37_CR40","unstructured":"Yuan, Z., Yuan, H., Tan, C., Wang, W., Huang, S.: How well do large language models perform in arithmetic tasks? (2023)"},{"key":"37_CR41","unstructured":"Zeng, Q., et al.: Uncertainty is fragile: manipulating uncertainty in large language models (2024)"}],"container-title":["Lecture Notes in Computer Science","PRICAI 2025: Trends in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-7078-2_37","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T09:25:19Z","timestamp":1776677119000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-7078-2_37"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819570775","9789819570782"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-7078-2_37","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"21 April 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRICAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pacific Rim International Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Wellington","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"New Zealand","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 November 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 November 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pricai2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.pricai.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}