{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,3]],"date-time":"2026-07-03T19:15:35Z","timestamp":1783106135031,"version":"3.54.6"},"publisher-location":"Singapore","reference-count":22,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819533459","type":"print"},{"value":"9789819533466","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,11,23]],"date-time":"2025-11-23T00:00:00Z","timestamp":1763856000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,23]],"date-time":"2025-11-23T00:00:00Z","timestamp":1763856000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-3346-6_7","type":"book-chapter","created":{"date-parts":[[2025,11,22]],"date-time":"2025-11-22T05:49:52Z","timestamp":1763790592000},"page":"88-100","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Southeast Asian Language OCR Dataset and\u00a0Evaluation for\u00a0Large Multimodal Models"],"prefix":"10.1007","author":[{"given":"Xu","family":"Yang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Rui","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Cunli","family":"Mao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ying","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shengxiang","family":"Gao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhengtao","family":"Yu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,11,23]]},"reference":[{"key":"7_CR1","unstructured":"Anthropic: Claude 3.5 sonnet (2025). https:\/\/www.anthropic.com\/news\/claude-3-5-sonnet. Accessed 1 Apr 2025"},{"key":"7_CR2","unstructured":"Awadalla, A., et\u00a0al.: Openflamingo: an open-source framework for training large autoregressive vision-language models. arXiv preprint arXiv:2308.01390 (2023)"},{"key":"7_CR3","doi-asserted-by":"crossref","unstructured":"Chen, Z., et\u00a0al.: Internvl: scaling up vision foundation models and aligning for generic visual-linguistic tasks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 24185\u201324198 (2024)","DOI":"10.1109\/CVPR52733.2024.02283"},{"key":"7_CR4","unstructured":"Dong, X., et\u00a0al.: Internlm-xcomposer2: mastering free-form text-image composition and comprehension in vision-language large model. arXiv preprint arXiv:2401.16420 (2024)"},{"key":"7_CR5","unstructured":"Fu, L., et\u00a0al.: Ocrbench v2: an improved benchmark for evaluating large multimodal models on visual text localization and reasoning. arXiv preprint arXiv:2501.00321 (2024)"},{"key":"7_CR6","unstructured":"GLM, T., et\u00a0al.: Chatglm: a family of large language models from glm-130b to glm-4 all tools. arXiv preprint arXiv:2406.12793 (2024)"},{"key":"7_CR7","unstructured":"Hurst, A., et\u00a0al.: Gpt-4o system card. arXiv preprint arXiv:2410.21276 (2024)"},{"key":"7_CR8","unstructured":"JaidedAI: Easyocr (2025). https:\/\/github.com\/JaidedAI\/EasyOCR. Accessed 1 Apr 2025"},{"key":"7_CR9","unstructured":"Kim, Y., Yim, M., Song, K.Y.: Tablevqa-bench: a visual question answering benchmark on multiple table domains. arXiv preprint arXiv:2404.19205 (2024)"},{"key":"7_CR10","unstructured":"Li, B., et\u00a0al.: Llava-onevision: easy visual task transfer. arXiv preprint arXiv:2408.03326 (2024)"},{"key":"7_CR11","doi-asserted-by":"crossref","unstructured":"Li, B., Ge, Y., Chen, Y., Ge, Y., Zhang, R., Shan, Y.: Seed-bench-2-plus: benchmarking multimodal large language models with text-rich visual comprehension. arXiv preprint arXiv:2404.16790 (2024)","DOI":"10.1109\/CVPR52733.2024.01263"},{"issue":"12","key":"7_CR12","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-024-4235-6","volume":"67","author":"Y Liu","year":"2024","unstructured":"Liu, Y., et al.: Ocrbench: on the hidden mystery of OCR in large multimodal models. Sci. China Inf. Sci. 67(12), 220102 (2024)","journal-title":"Sci. China Inf. Sci."},{"key":"7_CR13","unstructured":"Liu, Y., et al.: On the hidden mystery of ocr in large multimodal models. arXiv e-prints pp. arXiv\u20132305 (2023)"},{"key":"7_CR14","unstructured":"PaddlePaddle: Paddleocr (2025). https:\/\/github.com\/PaddlePaddle\/PaddleOCR. Accessed 1 Apr 2025"},{"key":"7_CR15","unstructured":"Paruchuri, V.: Surya (2025). https:\/\/github.com\/VikParuchuri\/surya. Accessed 1 Apr 2025"},{"key":"7_CR16","unstructured":"Shi, Y., et al.: Exploring ocr capabilities of gpt-4v (ision): a quantitative and in-depth evaluation. arXiv preprint arXiv:2310.16809 (2023)"},{"key":"7_CR17","unstructured":"Team, G., et\u00a0al.: Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805 (2023)"},{"key":"7_CR18","unstructured":"Wang, P., et\u00a0al.: Qwen2-vl: enhancing vision-language model\u2019s perception of the world at any resolution. arXiv preprint arXiv:2409.12191 (2024)"},{"key":"7_CR19","unstructured":"Wu, Z., et\u00a0al.: Deepseek-vl2: mixture-of-experts vision-language models for advanced multimodal understanding. arXiv preprint arXiv:2412.10302 (2024)"},{"key":"7_CR20","unstructured":"Yang, Z., et al.: The dawn of lmms: preliminary explorations with gpt-4 (vision), 9(1), 1. arXiv preprint arXiv:2309.17421 (2023)"},{"key":"7_CR21","unstructured":"Yao, Yet\u00a0al.: Minicpm-v: a gpt-4v level mllm on your phone. arXiv preprint arXiv:2408.01800 (2024)"},{"key":"7_CR22","unstructured":"Ye, Q., et\u00a0al.: mplug-owl: modularization empowers large language models with multimodality. arXiv preprint arXiv:2304.14178 (2023)"}],"container-title":["Lecture Notes in Computer Science","Natural Language Processing and Chinese Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-3346-6_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,7,3]],"date-time":"2026-07-03T18:15:24Z","timestamp":1783102524000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-3346-6_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,23]]},"ISBN":["9789819533459","9789819533466"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-3346-6_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,23]]},"assertion":[{"value":"23 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NLPCC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"CCF International Conference on Natural Language Processing and Chinese Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 August 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 August 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"nlpcc2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/tcci.ccf.org.cn\/conference\/2025\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}