{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,23]],"date-time":"2025-11-23T18:27:11Z","timestamp":1763922431693,"version":"3.45.0"},"publisher-location":"Cham","reference-count":55,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032093677","type":"print"},{"value":"9783032093684","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,11,24]],"date-time":"2025-11-24T00:00:00Z","timestamp":1763942400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,24]],"date-time":"2025-11-24T00:00:00Z","timestamp":1763942400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-09368-4_21","type":"book-chapter","created":{"date-parts":[[2025,11,23]],"date-time":"2025-11-23T18:14:20Z","timestamp":1763921660000},"page":"344-361","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["GeoGRPO: Investigating the\u00a0Stepwise-GRPO Enhancement in\u00a0RLHF Framework"],"prefix":"10.1007","author":[{"given":"Kecheng","family":"Liang","sequence":"first","affiliation":[]},{"given":"Xinyu","family":"Li","sequence":"additional","affiliation":[]},{"given":"Weixing","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Yang","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,24]]},"reference":[{"key":"21_CR1","unstructured":"Achiam, J., et\u00a0al.: Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)"},{"key":"21_CR2","first-page":"23716","volume":"35","author":"JB Alayrac","year":"2022","unstructured":"Alayrac, J.B., et al.: Flamingo: a visual language model for few-shot learning. Adv. Neural. Inf. Process. Syst. 35, 23716\u201323736 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"21_CR3","doi-asserted-by":"crossref","unstructured":"Bellman, R.: Dynamic programming. Science 153(3731), 34\u201337 (1966)","DOI":"10.1126\/science.153.3731.34"},{"key":"21_CR4","unstructured":"Cao, J., Xiao, J.: An augmented benchmark dataset for geometric question answering through dual parallel text encoding. In: Proceedings of the 29th International Conference on Computational Linguistics, pp. 1511\u20131520 (2022)"},{"key":"21_CR5","doi-asserted-by":"crossref","unstructured":"Chen, J., et al.: Geoqa: a geometric question answering benchmark towards multimodal numerical reasoning. arXiv preprint arXiv:2105.14517 (2021)","DOI":"10.18653\/v1\/2021.findings-acl.46"},{"key":"21_CR6","unstructured":"Chen, L.C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: Semantic image segmentation with deep convolutional nets and fully connected CRFs. In: ICLR (2015). http:\/\/arxiv.org\/abs\/1412.7062"},{"key":"21_CR7","unstructured":"DeepSeek-AI: Deepseek-r1: Incentivizing reasoning capability in LLMs via reinforcement learning (2025). https:\/\/arxiv.org\/abs\/2501.12948"},{"key":"21_CR8","unstructured":"Gao, J., et\u00a0al.: G-llava: Solving geometric problem with multi-modal large language model. arXiv preprint arXiv:2312.11370 (2023)"},{"key":"21_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"297","DOI":"10.1007\/978-3-319-10584-0_20","volume-title":"Computer Vision \u2013 ECCV 2014","author":"B Hariharan","year":"2014","unstructured":"Hariharan, B., Arbel\u00e1ez, P., Girshick, R., Malik, J.: Simultaneous detection and segmentation. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8695, pp. 297\u2013312. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10584-0_20"},{"key":"21_CR10","unstructured":"Hoffmann, J., et\u00a0al.: Training compute-optimal large language models. arXiv preprint arXiv:2203.15556 (2022)"},{"issue":"2","key":"21_CR11","first-page":"3","volume":"1","author":"EJ Hu","year":"2022","unstructured":"Hu, E.J., et al.: Lora: low-rank adaptation of large language models. ICLR 1(2), 3 (2022)","journal-title":"ICLR"},{"key":"21_CR12","unstructured":"Jiang, J., et\u00a0al.: Technical report: Enhancing LLM reasoning with reward-guided tree search. arXiv preprint arXiv:2411.11694 (2024)"},{"key":"21_CR13","unstructured":"Kaplan, J., et al.: Scaling laws for neural language models. arXiv preprint arXiv:2001.08361 (2020)"},{"key":"21_CR14","unstructured":"Lee, Y.: Qwen2-vl-finetune (2024). https:\/\/github.com\/2U1\/Qwen2-VL-Finetune"},{"key":"21_CR15","unstructured":"Lightman, H., et al.: Let\u2019s verify step by step. In: The Twelfth International Conference on Learning Representations (2023)"},{"key":"21_CR16","doi-asserted-by":"crossref","unstructured":"Liu, C., Yuen, J., Torralba, A.: Sift flow: dense correspondence across scenes and its applications. IEEE Trans. Pattern Anal. Mach. Intell. 33(5), 978\u2013994 (2011)","DOI":"10.1109\/TPAMI.2010.147"},{"key":"21_CR17","first-page":"34892","volume":"36","author":"H Liu","year":"2023","unstructured":"Liu, H., Li, C., Wu, Q., Lee, Y.J.: Visual instruction tuning. Adv. Neural. Inf. Process. Syst. 36, 34892\u201334916 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"21_CR18","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3431\u20133440 (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"21_CR19","unstructured":"Lu, P., et al.: Mathvista: Evaluating mathematical reasoning of foundation models in visual contexts. arXiv preprint arXiv:2310.02255 (2023)"},{"key":"21_CR20","doi-asserted-by":"crossref","unstructured":"Lu, P., et al.: Inter-gps: Interpretable geometry problem solving with formal language and symbolic reasoning. In: The Joint Conference of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (ACL-IJCNLP 2021) (2021)","DOI":"10.18653\/v1\/2021.acl-long.528"},{"key":"21_CR21","unstructured":"Lu, P., et al.: Dynamic prompt learning via policy gradient for semi-structured mathematical reasoning. arXiv preprint arXiv:2209.14610 (2022)"},{"key":"21_CR22","unstructured":"Min, Y., et\u00a0al.: Imitate, explore, and self-improve: A reproduction report on slow-thinking reasoning systems. arXiv preprint arXiv:2412.09413 (2024)"},{"key":"21_CR23","unstructured":"Mirzadeh, I., Alizadeh, K., Shahrokhi, H., Tuzel, O., Bengio, S., Farajtabar, M.: GSM-symbolic: understanding the limitations of mathematical reasoning in large language models. arXiv preprint arXiv:2410.05229 (2024)"},{"key":"21_CR24","unstructured":"Murphy, L., Yang, K., Sun, J., Li, Z., Anandkumar, A., Si, X.: Autoformalizing euclidean geometry. arXiv preprint arXiv:2405.17216 (2024)"},{"key":"21_CR25","first-page":"27730","volume":"35","author":"L Ouyang","year":"2022","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback. Adv. Neural. Inf. Process. Syst. 35, 27730\u201327744 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"21_CR26","unstructured":"Wang, Q., Yang, Y., Jiang, J.: Easy RL: Reinforcement Learning Tutorial. Posts & Telecom Press, Beijing (2022). https:\/\/github.com\/datawhalechina\/easy-rl"},{"key":"21_CR27","unstructured":"Qiao, R., et\u00a0al.: We-math: Does your large multimodal model achieve human-like mathematical reasoning? arXiv preprint arXiv:2407.01284 (2024)"},{"key":"21_CR28","unstructured":"Sutton, R.S., Barto, A.G., et\u00a0al.: Reinforcement learning: An introduction, vol.\u00a01. MIT press Cambridge (1998)"},{"key":"21_CR29","unstructured":"Team, G., et\u00a0al.: Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805 (2023)"},{"key":"21_CR30","unstructured":"Team, Q.: Qwen2.5-vl (January 2025). https:\/\/qwenlm.github.io\/blog\/qwen2.5-vl\/"},{"key":"21_CR31","doi-asserted-by":"crossref","unstructured":"Tighe, J., Lazebnik, S.: Finding things: image parsing with regions and per-exemplar detectors. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3001\u20133008 (2013)","DOI":"10.1109\/CVPR.2013.386"},{"key":"21_CR32","unstructured":"Turing, A.M.: Computing machinery and intelligence. Springer (2009)"},{"key":"21_CR33","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"21_CR34","unstructured":"Wang, J., et\u00a0al.: Openr: An open source framework for advanced reasoning with large language models. arXiv preprint arXiv:2410.09671 (2024)"},{"key":"21_CR35","unstructured":"Wang, W., et\u00a0al.: Visualprm: an effective process reward model for multimodal reasoning. arXiv preprint arXiv:2503.10291 (2025)"},{"key":"21_CR36","unstructured":"Wang, X., et al.: Self-consistency improves chain of thought reasoning in language models. arXiv preprint arXiv:2203.11171 (2022)"},{"key":"21_CR37","first-page":"24824","volume":"35","author":"J Wei","year":"2022","unstructured":"Wei, J., et al.: Chain-of-thought prompting elicits reasoning in large language models. Adv. Neural. Inf. Process. Syst. 35, 24824\u201324837 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"21_CR38","doi-asserted-by":"crossref","unstructured":"Yan, Y., et al.: A survey of mathematical reasoning in the era of multimodal large language model: Benchmark, method & challenges. arXiv preprint arXiv:2412.11936 (2024)","DOI":"10.18653\/v1\/2025.findings-acl.614"},{"key":"21_CR39","doi-asserted-by":"crossref","unstructured":"Zhang, R., et\u00a0al.: Mathverse: Does your multi-modal LLM truly see the diagrams in visual math problems? In: European Conference on Computer Vision, pp. 169\u2013186. Springer (2024)","DOI":"10.1007\/978-3-031-73242-3_10"},{"key":"21_CR40","unstructured":"Chen, L.C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: \u201cSemantic image segmentation with deep convolutional nets and fully connected CRFs,\u201d arXiv preprint arXiv:1412.7062 (2014)"},{"key":"21_CR41","doi-asserted-by":"crossref","unstructured":"Chen, J., et al. UniGeo: unifying geometry logical reasoning via reformulating mathematical expression. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pp. 3313\u20133323 (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.218"},{"key":"21_CR42","doi-asserted-by":"crossref","unstructured":"Ning, M., Wang, Q.F., Huang, K., Huang, X.: A symbolic characters aware model for solving geometry problems. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 7767\u20137775 (2023)","DOI":"10.1145\/3581783.3612570"},{"key":"21_CR43","unstructured":"Zhang, M.L., Li, Z.Z., Yin, F., Lin, L., Liu, C.L.: Fuse, reason and verify: Geometry problem solving with parsed clauses from diagram. CoRR 2024"},{"key":"21_CR44","unstructured":"Liu, H., et al.: LlaVA-next: Improved reasoning, OCR, and world knowledge, January 2024. https:\/\/llava-vl.github.io\/blog\/2024-01-30-llava-next (2024)"},{"key":"21_CR45","unstructured":"Yang, A., et al. Qwen2 Technical Report. CoRR (2024)"},{"key":"21_CR46","unstructured":"Yang, A., et al. Qwen2.5 technical report. arXiv preprint arXiv:2412.15115 (2024)"},{"key":"21_CR47","doi-asserted-by":"crossref","unstructured":"Chen, Z., et al. InternVL: scaling up vision foundation models and aligning for generic visual-linguistic tasks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 24185\u201324198 (2024)","DOI":"10.1109\/CVPR52733.2024.02283"},{"key":"21_CR48","doi-asserted-by":"crossref","unstructured":"Zhang, M.L., Yin, F., Hao, Y.H., Liu, C.L.: Plane geometry diagram parsing. In: Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence, pp. 1636\u20131643 (2022)","DOI":"10.24963\/ijcai.2022\/228"},{"key":"21_CR49","doi-asserted-by":"crossref","unstructured":"Zhang, M.L., Yin, F., Liu, C.L.: A multi-modal neural geometric solver with textual clauses parsed from diagram. In: Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence, pp. 3374\u20133382, 2023","DOI":"10.24963\/ijcai.2023\/376"},{"key":"21_CR50","doi-asserted-by":"crossref","unstructured":"Hao, Y., Zhang, M., Yin, F., Huang, L.L.: PGDP5K: a diagram parsing dataset for plane geometry problems. In: Proceedings of the 2022 26th International Conference on Pattern Recognition (ICPR), pp. 1763\u20131769. IEEE (2022)","DOI":"10.1109\/ICPR56361.2022.9956397"},{"key":"21_CR51","unstructured":"Zhang, R., et al. Mavis: Mathematical visual instruction tuning. arXiv e-prints, page arXiv:2407.xxxxx, (2024)"},{"key":"21_CR52","unstructured":"Li, Z., et al.: Eagle: elevating geometric reasoning through LLM-empowered visual instruction tuning. arXiv preprint arXiv:2408.11397 (2024)"},{"key":"21_CR53","unstructured":"Deng, L., et al.: R-CoT: Reverse Chain-of-Thought Problem Generation for Geometric Reasoning in Large Multimodal Models. CoRR (2024)"},{"key":"21_CR54","unstructured":"Zhou, Z., et al.: Is your model really a good math reasoner? evaluating mathematical reasoning with checklist. In: International Conference on Learning Representations (ICLR) (2025)"},{"key":"21_CR55","doi-asserted-by":"crossref","unstructured":"Ning, M., Zhou, Z., Wang, Q.*, Huang, X., Huang, K.: GNS: Solving Plane Geometry Problems by Neural-Symbolic Reasoning with Multi-Modal LLMs. In: AAAI Conference on Artificial Intelligence (AAAI), (Oral), (2025)","DOI":"10.1609\/aaai.v39i23.34679"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition \u2013 ICDAR 2025 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-09368-4_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,23]],"date-time":"2025-11-23T18:14:39Z","timestamp":1763921679000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-09368-4_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,24]]},"ISBN":["9783032093677","9783032093684"],"references-count":55,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-09368-4_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,24]]},"assertion":[{"value":"24 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Wuhan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iapr.org\/icdar2025","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}