{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T06:14:02Z","timestamp":1775628842063,"version":"3.50.1"},"reference-count":42,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T00:00:00Z","timestamp":1764720000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T00:00:00Z","timestamp":1764720000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,3]]},"DOI":"10.1109\/icmla66185.2025.00025","type":"proceedings-article","created":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T19:54:58Z","timestamp":1775591698000},"page":"144-151","source":"Crossref","is-referenced-by-count":0,"title":["Optical Character Recognition for Pre-Digital Historical Documents using Large Language Models"],"prefix":"10.1109","author":[{"given":"Chreston","family":"Miller","sequence":"first","affiliation":[{"name":"University Libraries Virginia Tech,Blacksburg,USA"}]},{"given":"Bipasha","family":"Banerjee","sequence":"additional","affiliation":[{"name":"University Libraries Virginia Tech,Blacksburg,USA"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"Good and Best Practices for Making Digital Images","author":"Kennedy","year":"2012"},{"key":"ref2","article-title":"Ultimate Guide About Optical Character Recognition (OCR)","author":"Martinez","year":"2025"},{"key":"ref3","article-title":"The Complete Guide to OCR Technology | Inscribe AI","author":"Valleskey","year":"2024"},{"key":"ref4","article-title":"Milestone Documents","year":"2021"},{"key":"ref5","article-title":"pytesseract: Python-tesseract is a python wrapper for Google\u2019s Tesseract-OCR","author":"Hoffstaetter","year":"2025"},{"key":"ref6","article-title":"doctr: Document text recognition","year":"2021"},{"key":"ref7","article-title":"VikParuchuri\/surya","author":"Paruchuri","year":"2025"},{"key":"ref8","article-title":"The llama 3.2 multimodal models","year":"2024"},{"key":"ref9","article-title":"olmOCR: Unlocking Trillions of Tokens in PDFs with Vision Language Models","author":"Poznanski","year":"2025"},{"key":"ref10","article-title":"mistralai\/Mistral-Small-3.1-24B-Instruct-2503 \u00b7 Hugging Face"},{"key":"ref11","article-title":"mistralai\/Pixtral-12B-2409 \u00b7 Hugging Face"},{"key":"ref12","first-page":"26 296","article-title":"Improved baselines with visual instruction tuning","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Liu"},{"key":"ref13","article-title":"microsoft\/Phi-4-multimodal-instruct \u00b7 Hugging Face","year":"2025"},{"key":"ref14","article-title":"Best OCR Models for Text Recognition in Images","author":"Ueno","year":"2024"},{"key":"ref15","article-title":"Top Large Language Models with Vision Capabilities"},{"key":"ref16","article-title":"8 Top Open-Source OCR Models Compared: A Complete Guide","author":"Lu"},{"key":"ref17","article-title":"The best open source OCR models - OmniAI. Automate document workflows"},{"key":"ref18","article-title":"Easyocr","year":"2025"},{"key":"ref19","first-page":"629","article-title":"An Overview of the Tesseract OCR Engine","volume-title":"Ninth International Conference on Document Analysis and Recognition (ICDAR 2007)","volume":"2","author":"Smith"},{"key":"ref20","article-title":"docTR-Open Source OCR-Mindee"},{"key":"ref21","article-title":"Hugging Face: The AI Community Building the Future","year":"2025"},{"key":"ref22","article-title":"Llama 3.2: A meta-llama\u2019s collection of transformers and original repos of the Llama 3.2 and Llama guard 3","year":"2024"},{"key":"ref23","article-title":"Llama 3.2: Revolutionizing edge AI and vision with open, customizable models"},{"key":"ref24","article-title":"olmOCR: Unlocking Trillions of Tokens in PDFs with Vision Language Models","author":"Poznanski","year":"2025"},{"key":"ref25","article-title":"Qwen2-vl: Enhancing vision-language model\u2019s perception of the world at any resolution","author":"Wang","year":"2024"},{"key":"ref26","article-title":"Phi-4-Mini Technical Report: Compact yet Powerful Multimodal Language Models via Mixture-of-LoRAs","year":"2025"},{"key":"ref27","article-title":"Announcing Pixtral 12B | Mistral AI"},{"key":"ref28","article-title":"Mistral Small 3.1 | Mistral AI"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3476887.3476888"},{"key":"ref30","first-page":"74","article-title":"ROUGE: A package for automatic evaluation of summaries","volume-title":"Text Summarization Branches Out","author":"Lin","year":"2004"},{"key":"ref31","first-page":"311","article-title":"Bleu: a method for automatic evaluation of machine translation","volume-title":"Proceedings of the 40th Annual Meeting on Association for Computational Linguistics","author":"Papineni"},{"key":"ref32","first-page":"1557","article-title":"ICDAR 2019 competition on large-scale street view text with partial labeling - rrc-lsvt","volume-title":"2019 International Conference on Document Analysis and Recognition (ICDAR)","author":"Sun"},{"key":"ref33","first-page":"707","article-title":"Binary codes capable of correcting deletions, insertions, and reversals","volume-title":"Soviet physics. Doklady","volume":"10","author":"Levenshtein","year":"1965"},{"key":"ref34","article-title":"Chicago Covenants"},{"key":"ref35","article-title":"Analysis and Benchmarking of OCR Accuracy for Data Extraction Models","author":"Rao"},{"key":"ref36","article-title":"5. Character Error Rate and Learning Curve"},{"key":"ref37","first-page":"342","article-title":"Combining OCR Models forReading Early Modern Books","volume-title":"Document Analysis and Recognition - ICDAR 2023: 17th International Conference, San Jos\u00e9, CA, USA, August 21\u201326, 2023, Proceedings, Part V","author":"Seuret"},{"key":"ref38","article-title":"meta-llama\/Llama-4-Scout-17B-16E-Instruct Hugging Face","year":"2025"},{"key":"ref39","article-title":"LLM Performance Leaderboard - a Hugging Face Space by ArtificialAnalysis","author":"Community"},{"key":"ref40","article-title":"Qwen3 technical report","year":"2025"},{"key":"ref41","article-title":"Qwen\/Qwen3-32B \u00b7 Hugging Face","year":"2025"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"}],"event":{"name":"2025 International Conference on Machine Learning and Applications (ICMLA)","location":"Boca Raton, FL, USA","start":{"date-parts":[[2025,12,3]]},"end":{"date-parts":[[2025,12,5]]}},"container-title":["2025 International Conference on Machine Learning and Applications (ICMLA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11471302\/11471304\/11471349.pdf?arnumber=11471349","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T05:38:26Z","timestamp":1775626706000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11471349\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,3]]},"references-count":42,"URL":"https:\/\/doi.org\/10.1109\/icmla66185.2025.00025","relation":{},"subject":[],"published":{"date-parts":[[2025,12,3]]}}}