{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,8]],"date-time":"2026-03-08T01:51:50Z","timestamp":1772934710228,"version":"3.50.1"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T00:00:00Z","timestamp":1765152000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T00:00:00Z","timestamp":1765152000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,8]]},"DOI":"10.1109\/bigdata66926.2025.11401834","type":"proceedings-article","created":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T20:57:57Z","timestamp":1772830677000},"page":"5975-5984","source":"Crossref","is-referenced-by-count":0,"title":["Using an Ensemble Approach for Layout Detection and Extraction from Historical Newspapers"],"prefix":"10.1109","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-0507-3721","authenticated-orcid":false,"given":"Aditya","family":"Jadhav","sequence":"first","affiliation":[{"name":"Virginia Tech,Department of Computer Science,Blacksburg,USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4472-1902","authenticated-orcid":false,"given":"Bipasha","family":"Banerjee","sequence":"additional","affiliation":[{"name":"University Libraries, Virginia Tech,Blacksburg,USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0037-5322","authenticated-orcid":false,"given":"Jennifer","family":"Goyne","sequence":"additional","affiliation":[{"name":"University Libraries, Virginia Tech,Blacksburg,USA"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"OpenCV: Introduction"},{"key":"ref2","volume-title":"LibraryOfCongress\/newspaper-navigator","year":"2020"},{"key":"ref3","volume-title":"Detectron2","author":"Wu","year":"2019"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/icdar.2009.271"},{"key":"ref5","volume-title":"Document parsing unveiled: Techniques, challenges, and prospects for structured information extraction","author":"Zhang","year":"2024"},{"key":"ref6","volume-title":"Page layout analysis of text-heavy historical documents: a comparison of textual and visual approaches","author":"Najem-Meyer","year":"2022"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i11.21539"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/s00799-025-00413-z"},{"key":"ref9","volume-title":"OCR Software, Data Extraction Tool - Amazon Textract - AWS","year":"2019"},{"key":"ref10","volume-title":"Tesseract documentation"},{"key":"ref11","volume-title":"Document AI"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-84062-3_17"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/compe53109.2021.9752204"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/s10032-019-00332-1"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-19-1018-0_23"},{"key":"ref16","volume-title":"Yolov3: An incremental improvement","author":"Redmon","year":"2018"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3603287.3651184"},{"key":"ref18","volume-title":"Ultralytics yolov8","author":"Jocher","year":"2023"},{"key":"ref19","volume":"abs\/2103.15348","author":"Shen","year":"2021","journal-title":"Layoutparser: A unified toolkit for deep learning based document image analysis"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/icdar.2017.311"},{"key":"ref21","article-title":"Optical character recognition for pre-digital historical documents using large language models","volume-title":"Proceedings of the 24th IEEE International Conference on Machine Learning and Applications","author":"Miller","year":"2025"},{"key":"ref22","volume-title":"mistralai\/Mistral-Small-3.1-24B-Instruct-2503 \u2022 Hugging Face"},{"key":"ref23","volume-title":"olmOCR: Unlocking Trillions of Tokens in PDFs with Vision Language Models","author":"Poznanski","year":"2025"},{"key":"ref24","volume-title":"Hugging Face Vision: Detection Metrics Demo"},{"key":"ref25","volume-title":"Virginia Tech Digital Libraries | Montgomery Museum"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/VBC.1990.109340"}],"event":{"name":"2025 IEEE International Conference on Big Data (BigData)","location":"Macau, China","start":{"date-parts":[[2025,12,8]]},"end":{"date-parts":[[2025,12,11]]}},"container-title":["2025 IEEE International Conference on Big Data (BigData)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11400704\/11400712\/11401834.pdf?arnumber=11401834","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T07:15:18Z","timestamp":1772867718000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11401834\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,8]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/bigdata66926.2025.11401834","relation":{},"subject":[],"published":{"date-parts":[[2025,12,8]]}}}