{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T04:09:18Z","timestamp":1777954158123,"version":"3.51.4"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T00:00:00Z","timestamp":1770768000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T00:00:00Z","timestamp":1775606400000},"content-version":"vor","delay-in-days":56,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J. King Saud Univ. Comput. Inf. Sci."],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1007\/s44443-026-00550-8","type":"journal-article","created":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T11:07:04Z","timestamp":1770808024000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Look, compare and refine: Iterative image-text alignment-driven self-refinement for handwritten mathematical expression recognition"],"prefix":"10.1007","volume":"38","author":[{"given":"Jinzheng","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0515-7211","authenticated-orcid":false,"given":"Ting","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bin","family":"He","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuai","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiayu","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xueer","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinguo","family":"Yu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,2,11]]},"reference":[{"key":"550_CR1","doi-asserted-by":"crossref","unstructured":"Amabile TM (1983) A Theoretical Framework, pp 65\u201396. Springer, New York, NY","DOI":"10.1007\/978-1-4612-5533-8_4"},{"key":"550_CR2","unstructured":"Bai J, Bai S, Yang S, Wang S, Tan S, Wang P, Lin J, Zhou C, Zhou J (2023) Qwen-vl: A versatile vision-language model for understanding, localization, text reading, and beyond"},{"key":"550_CR3","doi-asserted-by":"crossref","unstructured":"Bian X, Qin B, Xin X, Li J, Su X, Wang Y (2021) Handwritten mathematical expression recognition via attention aggregation based bi-directional mutual learning. In: AAAI Conference on Artificial Intelligence","DOI":"10.1609\/aaai.v36i1.19885"},{"key":"550_CR4","doi-asserted-by":"crossref","unstructured":"Chen Z, Wu J, Wang W, Su W, Chen G, Xing S, Muyan Z, Zhang Q, Zhu X, Lu L, Li B, Luo P, Lu T, Qiao Y, Dai J (2023) Intern vl: Scaling up vision foundation models and aligning for generic visual-linguistic tasks. In: 2024 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 24185\u201324198","DOI":"10.1109\/CVPR52733.2024.02283"},{"key":"550_CR5","doi-asserted-by":"crossref","unstructured":"Ding Y, Min MJ, Kaiser GE, Ray B (2024) Cycle: Learning to self-refine the code generation. In: Proceedings of the ACM on programming languages 8:392\u2013418","DOI":"10.1145\/3649825"},{"key":"550_CR6","doi-asserted-by":"crossref","unstructured":"Flower LS, Hayes JR (1981) A cognitive process theory of writing. College Composition & Communication","DOI":"10.58680\/ccc198115885"},{"key":"550_CR7","unstructured":"Gou Z, Shao Z, Gong Y, shen Yang Y, Duan N, Chen W (2024) Critic: Large language models can self-correct with tool-interactive critiquing. In: The twelfth international conference on learning representations"},{"key":"550_CR8","doi-asserted-by":"crossref","unstructured":"Guan T, Lin C, Shen W, Yang X (2024) Posformer: Recognizing complex handwritten mathematical expression with position forest transformer. In: European conference on computer vision","DOI":"10.1007\/978-3-031-72670-5_8"},{"key":"550_CR9","doi-asserted-by":"crossref","unstructured":"He J, Lin H, Wang Q, Fung YR, Ji H (2025) Self-correction is more than refinement: A learning framework for visual and language reasoning tasks. In: Che W, Nabende J, Shutova E, Pilehvar MT (eds) Findings of the Association for Computational Linguistics: ACL 2025. Association for Computational Linguistics, Vienna, Austria, pp 6405\u20136421","DOI":"10.18653\/v1\/2025.findings-acl.331"},{"key":"550_CR10","doi-asserted-by":"crossref","unstructured":"Hu L, Zanibbi, R (2011) Hmm-based recognition of online handwritten mathematical symbols using segmental k-means initialization and a modified pen-up\/down feature. In: 2011 international conference on document analysis and recognition, 457\u2013462","DOI":"10.1109\/ICDAR.2011.98"},{"key":"550_CR11","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Weinberger KQ (2016) Densely connected convolutional networks. In: 2017 IEEE conference on computer vision and pattern recognition (CVPR), pp 2261\u20132269","DOI":"10.1109\/CVPR.2017.243"},{"key":"550_CR12","doi-asserted-by":"crossref","unstructured":"Jiang N, Liang S, Wang C, Wang J, Tan L (2025) Latte: Improving latex recognition for tables and formulae with iterative refinement. In: AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v39i4.32422"},{"key":"550_CR13","doi-asserted-by":"crossref","unstructured":"Lavirotte S, Pottier L (1998) Mathematical formula recognition using graph grammar. In: Electronic imaging","DOI":"10.1117\/12.304644"},{"key":"550_CR14","doi-asserted-by":"crossref","unstructured":"Le AD, Nakagawa M (2017) Training an end-to-end system for handwritten mathematical expression recognition by generated patterns. In: 2017 14th IAPR international conference on document analysis and recognition (ICDAR). 01, 1056\u20131061","DOI":"10.1109\/ICDAR.2017.175"},{"key":"550_CR15","doi-asserted-by":"crossref","unstructured":"Lee S, Park SH, Jo Y, Seo M (2024) Volcano: Mitigating multimodal hallucination through self-feedback guided revision. In: Duh K, Gomez H, Bethard S (eds) Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pp 391\u2013404. Association for Computational Linguistics, Mexico City, Mexico","DOI":"10.18653\/v1\/2024.naacl-long.23"},{"key":"550_CR16","unstructured":"Li H, Li J, Cao J, Yang Z, Xiong Y (2025) Towards Scalable Training for Handwritten Mathematical Expression Recognition"},{"key":"550_CR17","doi-asserted-by":"crossref","unstructured":"Li B, Yuan Y, Liang D, Liu X, Ji Z, Bai J, Liu W, Bai X (2022) When counting meets hmer: Counting-aware network for handwritten mathematical expression recognition. In: European conference on computer vision","DOI":"10.1007\/978-3-031-19815-1_12"},{"key":"550_CR18","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Goyal P, Girshick R, He K, Doll\u00e1r P (2017) Focal loss for dense object detection. In: 2017 IEEE international conference on computer vision (ICCV), pp 2999\u20133007","DOI":"10.1109\/ICCV.2017.324"},{"key":"550_CR19","doi-asserted-by":"crossref","unstructured":"Liu C, Pan J, Hu J, Yin B, Yin B, Chen M, Liu C, Du J, Liu Q (2024) Namer: Non-autoregressive modeling for handwritten mathematical expression recognition. In: European conference on computer vision","DOI":"10.1007\/978-3-031-72998-0_16"},{"key":"550_CR20","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.110220","volume":"149","author":"Z Li","year":"2023","unstructured":"Li Z, Yang W, Qi H, Jin L, Huang Y, Ding K (2023) A tree-based model with branch parallel decoding for handwritten mathematical expression recognition. Pattern Recognit 149:110220","journal-title":"Pattern Recognit"},{"key":"550_CR21","unstructured":"Madaan A, Tandon N, Gupta P, Hallinan S, Gao L, Wiegreffe S, Alon U, Dziri N, Prabhumoye S, Yang Y, Gupta S, Majumder BP, Hermann K, Welleck S, Yazdanbakhsh A, Clark P (2023) Self-refine: iterative refinement with self-feedback. In: Proceedings of the 37th international conference on neural information processing systems. NIPS \u201923. Curran Associates Inc., Red Hook, NY, USA"},{"key":"550_CR22","doi-asserted-by":"crossref","unstructured":"Mahdavi M, Zanibbi R, Mouch\u00e8re H, Viard-Gaudin C, Garain U (2019) Icdar 2019 crohme + tfd: Competition on recognition of handwritten mathematical expressions and typeset formula detection. In: 2019 international conference on document analysis and recognition (ICDAR), pp 1533\u20131538","DOI":"10.1109\/ICDAR.2019.00247"},{"key":"550_CR23","doi-asserted-by":"crossref","unstructured":"Mouch\u00e8re H, Viard-Gaudin C, Zanibbi R, Garain U (2014) Icfhr 2014 competition on recognition of on-line handwritten mathematical expressions (crohme 2014). In: 2014 14th international conference on frontiers in handwriting recognition, pp 791\u2013796","DOI":"10.1109\/ICFHR.2014.138"},{"key":"550_CR24","doi-asserted-by":"crossref","unstructured":"Mouch\u00e8re H, Viard-Gaudin C, Zanibbi R, Garain U (2016) Icfhr 2016 crohme: Competition on recognition of online handwritten mathematical expressions. In: 2016 15th international conference on frontiers in handwriting recognition (ICFHR), pp 607\u2013612","DOI":"10.1109\/ICFHR.2016.0116"},{"key":"550_CR25","doi-asserted-by":"crossref","unstructured":"Simon HA (1991) The Architecture of Complexity, pp 457\u2013476. Springer, Boston, MA","DOI":"10.1007\/978-1-4899-0718-9_31"},{"key":"550_CR26","unstructured":"Team BS (2025) Seed1.6 Tech Introduction. https:\/\/seed.bytedance.com\/en\/seed1_6. Accessed: 2025\u201310-14"},{"key":"550_CR27","doi-asserted-by":"crossref","unstructured":"Toyota S, Uchida S, Suzuki M (2006) Structural analysis of mathematical formulae with verification based on formula description grammar. In: International workshop on document analysis systems","DOI":"10.1007\/11669487_14"},{"key":"550_CR28","unstructured":"Vaswani A, Shazeer NM, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser L, Polosukhin I (2017) Attention is all you need. In: Neural information processing systems"},{"key":"550_CR29","unstructured":"Wei H, Liu C, Chen J, Wang J, Kong L, Xu Y, Ge Z, Zhao L, Sun J, Peng Y, Han C, Zhang X (2024) General ocr theory: Towards ocr-2.0 via a unified end-to-end model. ArXiv:2409.01704"},{"key":"550_CR30","doi-asserted-by":"publisher","first-page":"2386","DOI":"10.1007\/s11263-020-01291-5","volume":"128","author":"J-W Wu","year":"2020","unstructured":"Wu J-W, Yin F, Zhang Y, Zhang X-Y, Liu C-L (2020) Handwritten mathematical expression recognition via paired adversarial learning. Int J Comput Vision 128:2386\u20132401","journal-title":"Int J Comput Vision"},{"key":"550_CR31","doi-asserted-by":"crossref","unstructured":"Wu J-W, Yin F, Zhang Y, Zhang X-Y, Liu C-L (2018) Image-to-markup generation via paired adversarial learning. In: ECML\/PKDD","DOI":"10.1007\/978-3-030-10925-7_2"},{"key":"550_CR32","doi-asserted-by":"crossref","unstructured":"Yang Z, Wang J, Li L, Lin KQ, Lin C-C, Liu Z, Wang L (2024) Idea2img: Iterative self-refinement with gpt-4v for automatic image design and generation. In: European conference on computer vision","DOI":"10.1007\/978-3-031-72920-1_10"},{"key":"550_CR33","doi-asserted-by":"crossref","unstructured":"Yuan Y, Liu X, Dikubab W, Liu H, Ji Z, Wu Z, Bai X (2022) Syntax-aware network for handwritten mathematical expression recognition. In: 2022 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 4543\u20134552","DOI":"10.1109\/CVPR52688.2022.00451"},{"key":"550_CR34","doi-asserted-by":"crossref","unstructured":"Zhang J, Du J, Dai L (2018) Multi-scale attention with dense encoder for handwritten mathematical expression recognition. In: 2018 24th international conference on pattern recognition (ICPR), pp 2245\u20132250","DOI":"10.1109\/ICPR.2018.8546031"},{"key":"550_CR35","unstructured":"Zhang J, Du J, Yang Y, Song Y-Z, Wei S, Dai L (2020) A tree-structured decoder for image-to-markup generation. In: International conference on machine learning"},{"key":"550_CR36","doi-asserted-by":"crossref","unstructured":"Zhang L, Zeng X, Li K, Yu G, Chen T (2025) SC-Captioner: Improving Image Captioning with Self-Correction by Reinforcement Learning","DOI":"10.1109\/ICCV51701.2025.02149"},{"key":"550_CR37","doi-asserted-by":"publisher","first-page":"196","DOI":"10.1016\/j.patcog.2017.06.017","volume":"71","author":"J Zhang","year":"2017","unstructured":"Zhang J, Du J, Zhang S, Liu D, Hu Y, Hu J, Wei S, Dai L (2017) Watch, attend and parse: An end-to-end neural network based approach to handwritten mathematical expression recognition. Pattern Recognit 71:196\u2013206","journal-title":"Pattern Recognit"},{"key":"550_CR38","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.112731","volume":"307","author":"T Zhang","year":"2025","unstructured":"Zhang T, Jin X, Ma X, Peng X, Zhao Y, Liu J, Yu X (2025) Can question-texts improve the recognition of handwritten mathematical expressions in respondents\u2019 solutions? Knowl Based Syst 307:112731","journal-title":"Knowl Based Syst"},{"key":"550_CR39","doi-asserted-by":"crossref","unstructured":"Zhao W, Gao L, Yan Z, Peng S, Du L, Zhang Z (2021) Handwritten mathematical expression recognition with bidirectionally trained transformer. In: IEEE international conference on document analysis and recognition","DOI":"10.1007\/978-3-030-86331-9_37"},{"key":"550_CR40","doi-asserted-by":"publisher","first-page":"392","DOI":"10.1007\/978-3-031-19815-1_23","volume-title":"Computer Vision - ECCV 2022","author":"W Zhao","year":"2022","unstructured":"Zhao W, Gao L (2022) Comer: Modeling coverage for transformer-based handwritten mathematical expression recognition. In: Avidan S, Brostow G, Ciss\u00e9 M, Farinella GM, Hassner T (eds) Computer Vision - ECCV 2022. Springer, Cham, pp 392\u2013408"},{"key":"550_CR41","doi-asserted-by":"crossref","unstructured":"Zhou Z, Song J, Yao K, Shu Z, Ma L (2023) Isr-llm: Iterative self-refined large language model for long-horizon sequential task planning. In: 2024 IEEE international conference on robotics and automation (ICRA), pp 2081\u20132088","DOI":"10.1109\/ICRA57147.2024.10610065"},{"key":"550_CR42","doi-asserted-by":"crossref","unstructured":"Zhu J, Gao L, Zhao W (2024) Ical: Implicit character-aided learning for enhanced handwritten mathematical expression recognition. In: IEEE international conference on document analysis and recognition)","DOI":"10.1007\/978-3-031-70549-6_2"},{"key":"550_CR43","doi-asserted-by":"crossref","unstructured":"Zhu J, Zhao W, Li Y, Hu X, Gao L (2025) Tamer: Tree-aware transformer for handwritten mathematical expression recognition. Proceedings of the AAAI conference on artificial intelligence 39(10):10950\u201310958","DOI":"10.1609\/aaai.v39i10.33190"}],"container-title":["Journal of King Saud University Computer and Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s44443-026-00550-8","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s44443-026-00550-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s44443-026-00550-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T17:27:48Z","timestamp":1777915668000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s44443-026-00550-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,11]]},"references-count":43,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2026,4]]}},"alternative-id":["550"],"URL":"https:\/\/doi.org\/10.1007\/s44443-026-00550-8","relation":{},"ISSN":["1319-1578","2213-1248"],"issn-type":[{"value":"1319-1578","type":"print"},{"value":"2213-1248","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,11]]},"assertion":[{"value":"18 October 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 February 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}}],"article-number":"125"}}