{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T12:20:56Z","timestamp":1769170856895,"version":"3.49.0"},"reference-count":40,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100002367","name":"Chinese Academy of Sciences","doi-asserted-by":"publisher","award":["CAS-WX2022SF-0104"],"award-info":[{"award-number":["CAS-WX2022SF-0104"]}],"id":[{"id":"10.13039\/501100002367","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100011181","name":"Chinese Academy of Sciences Institute of High Energy Physics","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100011181","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Engineering Applications of Artificial Intelligence"],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1016\/j.engappai.2025.113504","type":"journal-article","created":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T08:05:28Z","timestamp":1766563528000},"page":"113504","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PA","title":["HaiNougat: An academic document parser that preserves formulas and tables for high-energy physics"],"prefix":"10.1016","volume":"166","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-2871-881X","authenticated-orcid":false,"given":"Jianwen","family":"Luo","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6542-052X","authenticated-orcid":false,"given":"Zhengde","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Fazhi","family":"Qi","sequence":"additional","affiliation":[]},{"given":"Yiyu","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.engappai.2025.113504_b1","unstructured":"Adhikari, A., Ram, A., Tang, R., Lin, J., Docbert: Bert for document classification, [Online], Available: https:\/\/arxiv.org\/abs\/1904.08398."},{"key":"10.1016\/j.engappai.2025.113504_b2","unstructured":"Artifex Software Inc,, MuPDF. Available: https:\/\/mupdf.com\/. (Accessed 25 2023)."},{"key":"10.1016\/j.engappai.2025.113504_b3","first-page":"319","article-title":"Vision transformer for fast and efficient scene text recognition","author":"Atienza","year":"2021","journal-title":"Int. Conf. Doc. Anal. Recognit."},{"key":"10.1016\/j.engappai.2025.113504_b4","series-title":"International Conference on Knowledge and Systems Engineering","first-page":"1","article-title":"CRNN based OCR for American and british sign language fingerspelling","author":"Aung","year":"2021"},{"key":"10.1016\/j.engappai.2025.113504_b5","article-title":"Qwen: Technical report, [Online]","author":"Bai","year":"2023"},{"key":"10.1016\/j.engappai.2025.113504_b6","series-title":"Qwen2.5-VL: Technical report, [Online]","author":"Bai","year":"2025"},{"key":"10.1016\/j.engappai.2025.113504_b7","unstructured":"Banerjee, S., Lavie, A., 2005. METEOR: An automatic metric for MT evaluation with improved correlation with human judgments. In: Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/Or Summarization. pp. 65\u201372."},{"key":"10.1016\/j.engappai.2025.113504_b8","unstructured":"Blecher, L., pix2tex - LaTeX OCR. Available: https:\/\/github.com\/lukas-blecher\/LaTeX-OCR. (Accessed 2 2024)."},{"key":"10.1016\/j.engappai.2025.113504_b9","series-title":"Nougat: Neural optical understanding for academic documents, [Online]","author":"Blecher","year":"2023"},{"key":"10.1016\/j.engappai.2025.113504_b10","unstructured":"Clark, C., pdffigures2: A Tool for Extracting Figures, Tables, and Captions from PDF Documents. Available: https:\/\/github.com\/your-username\/pdffigures2. (Accessed: 14 2023)."},{"key":"10.1016\/j.engappai.2025.113504_b11","series-title":"Annual Industrial Automation and Electromechanical Engineering Conference","first-page":"110","article-title":"Optical character recognition using KNN on custom image dataset","author":"Hazra","year":"2017"},{"key":"10.1016\/j.engappai.2025.113504_b12","unstructured":"hepai,, HepAI Platform, Available: https:\/\/ai.ihep.ac.cn\/. (Accessed 2 2024)."},{"key":"10.1016\/j.engappai.2025.113504_b13","series-title":"Proceedings of the 30th ACM International Conference on Multimedia","first-page":"4083","article-title":"Layoutlmv3: Pre-training for document AI with unified text and image masking","author":"Huang","year":"2022"},{"key":"10.1016\/j.engappai.2025.113504_b14","series-title":"International Conference on Pattern Recognition","first-page":"3168","article-title":"Convolutional neural networks for document image classification","author":"Kang","year":"2014"},{"issue":"8","key":"10.1016\/j.engappai.2025.113504_b15","first-page":"707","article-title":"Binary codes capable of correcting deletions, insertions, and reversals","volume":"10","author":"Levenshtein","year":"1966","journal-title":"Sov. Phys. Dokl."},{"key":"10.1016\/j.engappai.2025.113504_b16","unstructured":"Lewis, M., Liu, Y., Goyal, N., Ghazvininejad, M., Mohamed, A., Levy, O., Stoyanov, V., Zettlemoyer, L., Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension, [Online], Available: https:\/\/arxiv.org\/abs\/1910.13461."},{"issue":"11","key":"10.1016\/j.engappai.2025.113504_b17","first-page":"13094","article-title":"Trocr: Transformer-based optical character recognition with pre-trained models","volume":"37","author":"Li","year":"2023","journal-title":"Proc. the AAAI Conf. Artif. Intell."},{"key":"10.1016\/j.engappai.2025.113504_b18","first-page":"10012","article-title":"Swin transformer: Hierarchical vision transformer using shifted windows","author":"Liu","year":"2021","journal-title":"Proc. the IEEE\/CVF Int. Conf. Comput. Vis."},{"key":"10.1016\/j.engappai.2025.113504_b19","unstructured":"Lopez, P., GROBID. Available: https:\/\/github.com\/kermitt2\/grobid. (Accessed 14 2023)."},{"key":"10.1016\/j.engappai.2025.113504_b20","unstructured":"Mathpix, Inc, mathpix-markdown-it. Available: https:\/\/github.com\/Mathpix\/mathpix-markdown-it. (Accessed 4 2024)."},{"key":"10.1016\/j.engappai.2025.113504_b21","unstructured":"Meta,, React: A JavaScript library for building user interfaces. Available: https:\/\/reactjs.org\/. (Accessed 3 2024)."},{"key":"10.1016\/j.engappai.2025.113504_b22","unstructured":"Miller, B.R., Ginev, D., LaTeXML: A LaTeX to XML\/HTML\/MathML Converter. Available: https:\/\/github.com\/brucemiller\/LaTeXML. (Accessed 14 2023)."},{"key":"10.1016\/j.engappai.2025.113504_b23","first-page":"4614","article-title":"Tableformer: Table structure understanding with transformers","author":"Nassar","year":"2022","journal-title":"Proc. the IEEE\/CVF Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.engappai.2025.113504_b24","unstructured":"OpenAI,, 2023. GPT-4 Technical report, [Online], Available: https:\/\/arxiv.org\/abs\/2303.08774."},{"key":"10.1016\/j.engappai.2025.113504_b25","unstructured":"OpenAI,, 2023. GPT-4 Technical report, [Online], Available: https:\/\/arxiv.org\/abs\/2303.08774."},{"key":"10.1016\/j.engappai.2025.113504_b26","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.-J., 2002. Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics. pp. 311\u2013318.","DOI":"10.3115\/1073083.1073135"},{"key":"10.1016\/j.engappai.2025.113504_b27","unstructured":"Paruchuri, Vik, Lampa, Samuel, Marker. Available: https:\/\/github.com\/VikParuchuri\/marker. (Accessed 14 2023)."},{"issue":"3","key":"10.1016\/j.engappai.2025.113504_b28","doi-asserted-by":"crossref","first-page":"796","DOI":"10.1049\/cit2.12117","article-title":"Novel multi-domain attention for abstractive summarisation","volume":"8","author":"Qu","year":"2023","journal-title":"CAAI Trans. Intell. Technol."},{"key":"10.1016\/j.engappai.2025.113504_b29","unstructured":"Ram\u00edrez, S., FastAPI: A modern, fast (high-performance), web framework for building APIs with Python 3.7+ based on standard Python type hints. Available: https:\/\/fastapi.tiangolo.com\/. (Accessed 10 2024)."},{"key":"10.1016\/j.engappai.2025.113504_b30","series-title":"Icdar, Vol. 3","article-title":"Best practices for convolutional neural networks applied to visual document analysis","author":"Simard","year":"2003"},{"key":"10.1016\/j.engappai.2025.113504_b31","series-title":"Ninth International Conference on Document Analysis and Recognition, Vol. 2","first-page":"629","article-title":"An overview of the tesseract OCR engine","author":"Smith","year":"2007"},{"key":"10.1016\/j.engappai.2025.113504_b32","unstructured":"Taylor, R., Kardas, M., Cucurull, G., Scialom, T., Hartshorn, A., Saravia, E., Poulton, A., Kerkez, V., Stojnic, R., Galactica: A large language model for science, [Online], Available: https:\/\/arxiv.org\/abs\/2401.00434."},{"key":"10.1016\/j.engappai.2025.113504_b33","unstructured":"TeX Live Team,, TeX Live 2023. Available: http:\/\/www.tug.org\/texlive\/. (Accessed 25 2023)."},{"key":"10.1016\/j.engappai.2025.113504_b34","series-title":"Qwen2-VL: Enhancing vision-language model\u2019s perception of the world at any resolution, [Online]","author":"Wang","year":"2024"},{"issue":"6","key":"10.1016\/j.engappai.2025.113504_b35","doi-asserted-by":"crossref","first-page":"2199","DOI":"10.1007\/s13042-023-02023-0","article-title":"Local or global? A novel transformer for Chinese named entity recognition based on multi-view and sliding attention","volume":"15","author":"Wang","year":"2024","journal-title":"Int. J. Mach. Learn. Cybern."},{"key":"10.1016\/j.engappai.2025.113504_b36","series-title":"Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining","first-page":"1192","article-title":"Layoutlm: Pre-training of text and layout for document image understanding","author":"Xu","year":"2020"},{"key":"10.1016\/j.engappai.2025.113504_b37","series-title":"Layoutlmv2: Multi-modal pre-training for visually-rich document understanding, [Online]","author":"Xu","year":"2012"},{"key":"10.1016\/j.engappai.2025.113504_b38","first-page":"4043","article-title":"Transformer-based approach for document layout understanding","author":"Yang","year":"2022","journal-title":"IEEE Int. Conf. Image Process."},{"key":"10.1016\/j.engappai.2025.113504_b39","unstructured":"Yang, An, Li, Anfeng, Yang, Baosong, Zhang, Beichen, Hui, Binyuan, Zheng, Bo, Yu, Bowen, et al., 2025. Qwen3: Technical report, [Online], Available: https:\/\/arxiv.org\/abs\/2505.09388."},{"key":"10.1016\/j.engappai.2025.113504_b40","series-title":"IAPR International Conference on Document Analysis and Recognition, Volume 1","first-page":"230","article-title":"CNN based page object detection in document images","author":"Yi","year":"2017"}],"container-title":["Engineering Applications of Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197625035353?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197625035353?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T12:42:19Z","timestamp":1768999339000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0952197625035353"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2]]},"references-count":40,"alternative-id":["S0952197625035353"],"URL":"https:\/\/doi.org\/10.1016\/j.engappai.2025.113504","relation":{},"ISSN":["0952-1976"],"issn-type":[{"value":"0952-1976","type":"print"}],"subject":[],"published":{"date-parts":[[2026,2]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"HaiNougat: An academic document parser that preserves formulas and tables for high-energy physics","name":"articletitle","label":"Article Title"},{"value":"Engineering Applications of Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.engappai.2025.113504","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"113504"}}