{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,15]],"date-time":"2025-08-15T00:16:26Z","timestamp":1755216986402,"version":"3.43.0"},"reference-count":55,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62476093"],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022YFC3301703"],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1016\/j.patcog.2025.112191","type":"journal-article","created":{"date-parts":[[2025,7,26]],"date-time":"2025-07-26T06:28:39Z","timestamp":1753511319000},"page":"112191","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PA","title":["DocAligner: Automating the annotation of photographed documents through real-virtual alignment"],"prefix":"10.1016","volume":"171","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9787-9514","authenticated-orcid":false,"given":"Jiaxin","family":"Zhang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1857-5473","authenticated-orcid":false,"given":"Peirong","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Huiyi","family":"Cheng","sequence":"additional","affiliation":[]},{"given":"Xinhong","family":"Chen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0002-1203-047X","authenticated-orcid":false,"given":"Haowei","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Kai","family":"Ding","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5456-0957","authenticated-orcid":false,"given":"Lianwen","family":"Jin","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.patcog.2025.112191_bib0001","doi-asserted-by":"crossref","first-page":"310","DOI":"10.1016\/j.patcog.2017.02.023","article-title":"A survey of document image word spotting techniques","volume":"68","author":"Giotis","year":"2017","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2025.112191_bib0002","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2022.109006","article-title":"Robust table detection and structure recognition from heterogeneous document images","volume":"133","author":"Ma","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2025.112191_bib0003","series-title":"CVPR","first-page":"19254","article-title":"Unifying vision, text, and layout for universal document processing","author":"Tang","year":"2023"},{"key":"10.1016\/j.patcog.2025.112191_bib0004","series-title":"AAAI","first-page":"9923","article-title":"DocKylin: a large multimodal model for visual document understanding with efficient visual slimming","volume":"39","author":"Zhang","year":"2025"},{"key":"10.1016\/j.patcog.2025.112191_bib0005","series-title":"COLING","first-page":"949","article-title":"DocBank: a benchmark dataset for document layout analysis","author":"Li","year":"2020"},{"key":"10.1016\/j.patcog.2025.112191_bib0006","series-title":"ICDAR","first-page":"1015","article-title":"PubLayNet: largest dataset ever for document layout analysis","author":"Zhong","year":"2019"},{"key":"10.1016\/j.patcog.2025.112191_bib0007","series-title":"LREC","first-page":"1918","article-title":"Tablebank: table benchmark for image-based table detection and recognition","author":"Li","year":"2020"},{"key":"10.1016\/j.patcog.2025.112191_bib0008","series-title":"CVPR","first-page":"4634","article-title":"PubTables-1M: towards comprehensive table extraction from unstructured documents","author":"Smock","year":"2022"},{"key":"10.1016\/j.patcog.2025.112191_bib0009","series-title":"ECCV","first-page":"402","article-title":"RAFT: recurrent all-pairs field transforms for optical flow","author":"Teed","year":"2020"},{"key":"10.1016\/j.patcog.2025.112191_bib0010","series-title":"CVPR","first-page":"6258","article-title":"GLU-Net: global-local universal network for dense flow and correspondences","author":"Truong","year":"2020"},{"key":"10.1016\/j.patcog.2025.112191_bib0011","series-title":"WACV","article-title":"DGC-Net: dense geometric correspondence network","author":"Melekhov","year":"2019"},{"key":"10.1016\/j.patcog.2025.112191_bib0012","series-title":"NeurIPS","article-title":"Convolutional LSTM network: a machine learning approach for precipitation nowcasting","volume":"28","author":"Shi","year":"2015"},{"key":"10.1016\/j.patcog.2025.112191_bib0013","series-title":"CVPR","first-page":"19790","article-title":"RoMa: robust dense feature matching","author":"Edstedt","year":"2024"},{"key":"10.1016\/j.patcog.2025.112191_bib0014","series-title":"CVPR","first-page":"17765","article-title":"DKM: dense kernelized feature matching for geometry estimation","author":"Edstedt","year":"2023"},{"key":"10.1016\/j.patcog.2025.112191_bib0015","series-title":"NeurIPS","first-page":"14278","article-title":"GOCor: bringing globally optimized correspondence volumes into your neural network","author":"Truong","year":"2020"},{"key":"10.1016\/j.patcog.2025.112191_bib0016","series-title":"ICME","first-page":"181","article-title":"Unsupervised learning for optical flow estimation using pyramid convolution lstm","author":"Guan","year":"2019"},{"key":"10.1016\/j.patcog.2025.112191_bib0017","series-title":"AAAI","first-page":"525","article-title":"Spatio-temporal recurrent networks for event-based optical flow estimation","volume":"36","author":"Ding","year":"2022"},{"key":"10.1016\/j.patcog.2025.112191_bib0018","series-title":"ECCV","first-page":"618","article-title":"RANSAC-Flow: generic two-stage image alignment","author":"Shen","year":"2020"},{"key":"10.1016\/j.patcog.2025.112191_bib0019","series-title":"ICCV","first-page":"10346","article-title":"Warp consistency for unsupervised learning of dense correspondences","author":"Truong","year":"2021"},{"key":"10.1016\/j.patcog.2025.112191_bib0020","series-title":"ICDAR","first-page":"115","article-title":"VSR: a unified framework for document layout analysis combining vision, semantics and relations","author":"Zhang","year":"2021"},{"key":"10.1016\/j.patcog.2025.112191_bib0021","doi-asserted-by":"crossref","first-page":"3788","DOI":"10.1109\/TMM.2022.3165717","article-title":"SRRV: a novel document object detector based on spatial-related relation and vision","volume":"25","author":"Bi","year":"2022","journal-title":"IEEE Trans. Multimedia"},{"key":"10.1016\/j.patcog.2025.112191_bib0022","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2022.108660","article-title":"Synthetic document generator for annotation-free layout recognition","volume":"128","author":"Raman","year":"2022","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2025.112191_bib0023","series-title":"CVPR","first-page":"15138","article-title":"M6Doc: a large-scale multi-format, multi-type, multi-layout, multi-language, multi-annotation category dataset for modern document layout analysis","author":"Cheng","year":"2023"},{"key":"10.1016\/j.patcog.2025.112191_bib0024","series-title":"ICCV","first-page":"944","article-title":"Parsing table structures in the wild","author":"Long","year":"2021"},{"key":"10.1016\/j.patcog.2025.112191_bib0025","series-title":"ACM MM","first-page":"273","article-title":"DocTr: document image transformer for geometric unwarping and illumination correction","author":"Feng","year":"2021"},{"issue":"6","key":"10.1016\/j.patcog.2025.112191_bib0026","first-page":"1","article-title":"Document rectification and illumination correction using a patch-based CNN","volume":"38","author":"Li","year":"2019","journal-title":"ACM Trans. Graph."},{"key":"10.1016\/j.patcog.2025.112191_bib0027","series-title":"BMVC","article-title":"Intrinsic decomposition of document images in-the-wild","author":"Das","year":"2020"},{"key":"10.1016\/j.patcog.2025.112191_bib0028","series-title":"CVPR","first-page":"15654","article-title":"DocRes: a generalist model toward unifying document image restoration tasks","author":"Zhang","year":"2024"},{"issue":"5","key":"10.1016\/j.patcog.2025.112191_bib0029","doi-asserted-by":"crossref","first-page":"2319","DOI":"10.1109\/TAI.2023.3321257","article-title":"Appearance enhancement for camera-captured document images in the wild","volume":"5","author":"Zhang","year":"2023","journal-title":"IEEE Trans. Artif. Intell."},{"key":"10.1016\/j.patcog.2025.112191_bib0030","series-title":"ICCV","first-page":"131","article-title":"DewarpNet: single-image document unwarping with stacked 3D and 2D regression networks","author":"Das","year":"2019"},{"key":"10.1016\/j.patcog.2025.112191_bib0031","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109989","article-title":"GDB: gated convolutions-based document binarization","volume":"146","author":"Yang","year":"2024","journal-title":"Pattern Recognit."},{"issue":"2","key":"10.1016\/j.patcog.2025.112191_bib0032","doi-asserted-by":"crossref","first-page":"41","DOI":"10.3390\/jimaging9020041","article-title":"A quality, size and time assessment of the binarization of documents photographed by smartphones","volume":"9","author":"Bernardino","year":"2023","journal-title":"J. Imaging"},{"key":"10.1016\/j.patcog.2025.112191_bib0033","series-title":"ACM DocEng","first-page":"1","article-title":"Binarization of photographed documents image quality, processing time and size assessment","author":"Lins","year":"2022"},{"key":"10.1016\/j.patcog.2025.112191_bib0034","series-title":"ICDAR","first-page":"466","article-title":"Document dewarping with control points","author":"Xie","year":"2021"},{"key":"10.1016\/j.patcog.2025.112191_bib0035","series-title":"CVPR","first-page":"4700","article-title":"DocUNet: document image unwarping via a stacked U-Net","author":"Ma","year":"2018"},{"key":"10.1016\/j.patcog.2025.112191_bib0036","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2020.107576","article-title":"Geometric rectification of document images using adversarial gated unwarping network","volume":"108","author":"Liu","year":"2020","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2025.112191_bib0037","series-title":"CVPR","first-page":"4573","article-title":"Fourier document restoration for robust document dewarping and recognition","author":"Xue","year":"2022"},{"key":"10.1016\/j.patcog.2025.112191_bib0038","series-title":"ACM SIGGRAPH","first-page":"1","article-title":"Learning from documents in the wild to improve document unwarping","author":"Ma","year":"2022"},{"issue":"1","key":"10.1016\/j.patcog.2025.112191_bib0039","doi-asserted-by":"crossref","first-page":"110","DOI":"10.1038\/s41597-023-01985-8","article-title":"A large-scale dataset for end-to-end table recognition in the wild","volume":"10","author":"Yang","year":"2023","journal-title":"Sci. Data"},{"key":"10.1016\/j.patcog.2025.112191_bib0040","series-title":"ICDAR","first-page":"554","article-title":"TABLEX: a benchmark dataset for structure and content information extraction from scientific tables","author":"Desai","year":"2021"},{"key":"10.1016\/j.patcog.2025.112191_bib0041","series-title":"ECCV","first-page":"351","article-title":"PARN: pyramidal affine regression networks for dense semantic correspondence","author":"Jeon","year":"2018"},{"key":"10.1016\/j.patcog.2025.112191_bib0042","series-title":"ACM MM","first-page":"2805","article-title":"Marior: margin removal and iterative content rectification for document dewarping in the wild","author":"Zhang","year":"2022"},{"key":"10.1016\/j.patcog.2025.112191_bib0043","series-title":"ECCV","first-page":"801","article-title":"Encoder-decoder with atrous separable convolution for semantic image segmentation","author":"Chen","year":"2018"},{"issue":"12","key":"10.1016\/j.patcog.2025.112191_bib0044","doi-asserted-by":"crossref","first-page":"9052","DOI":"10.1109\/TPAMI.2024.3415112","article-title":"A survey on self-supervised learning: algorithms, applications, and future trends","volume":"46","author":"Gui","year":"2024","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.patcog.2025.112191_bib0045","series-title":"SIGGRAPH Asia","first-page":"1","article-title":"UVDoc: neural grid-based document unwarping","author":"Verhoeven","year":"2023"},{"key":"10.1016\/j.patcog.2025.112191_bib0046","series-title":"ICDAR","first-page":"343","article-title":"Coarse-to-fine document image registration for dewarping","author":"Zhang","year":"2024"},{"key":"10.1016\/j.patcog.2025.112191_bib0047","series-title":"ECCV","first-page":"475","article-title":"Geometric representation learning for document image rectification","author":"Feng","year":"2022"},{"key":"10.1016\/j.patcog.2025.112191_bib0048","series-title":"WACV","first-page":"665","article-title":"DocReal: robust document dewarping of real-life images via attention-enhanced control point prediction","author":"Yu","year":"2024"},{"key":"10.1016\/j.patcog.2025.112191_bib0049","doi-asserted-by":"crossref","first-page":"51","DOI":"10.1016\/j.patrec.2025.04.038","article-title":"Enhancing document dewarping evaluation: a new metric with improved accuracy and efficiency","volume":"195","author":"Zhang","year":"2025","journal-title":"Pattern Recognit. Lett."},{"key":"10.1016\/j.patcog.2025.112191_bib0050","series-title":"CVPR","first-page":"2961","article-title":"Mask R-CNN","author":"He","year":"2017"},{"key":"10.1016\/j.patcog.2025.112191_bib0051","series-title":"ACM MM","first-page":"5074","article-title":"UDoc-GAN: unpaired document illumination correction with background light prior","author":"Wang","year":"2022"},{"key":"10.1016\/j.patcog.2025.112191_bib0052","series-title":"ICDAR","first-page":"45","article-title":"Binarizing document images acquired with portable cameras","volume":"6","author":"Lins","year":"2017"},{"key":"10.1016\/j.patcog.2025.112191_bib0053","series-title":"MICCAI","first-page":"23","article-title":"UNeXt: MLP-based rapid medical image segmentation network","author":"Valanarasu","year":"2022"},{"key":"10.1016\/j.patcog.2025.112191_bib0054","series-title":"ICDAR","first-page":"384","article-title":"Augraphy: a data augmentation library for document images","author":"Groleau","year":"2023"},{"key":"10.1016\/j.patcog.2025.112191_bib0055","unstructured":"J. Ye, X. Qi, Y. He, Y. Chen, D. Gu, P. Gao, R. Xiao, PingAn-VCGroup\u2019s solution for ICDAR 2021 competition on scientific literature parsing task B: table recognition to HTML, (2021). arXiv preprint arXiv:2105.01848."}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320325008520?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320325008520?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,8,5]],"date-time":"2025-08-05T01:42:56Z","timestamp":1754358176000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320325008520"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3]]},"references-count":55,"alternative-id":["S0031320325008520"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2025.112191","relation":{},"ISSN":["0031-3203"],"issn-type":[{"type":"print","value":"0031-3203"}],"subject":[],"published":{"date-parts":[[2026,3]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"DocAligner: Automating the annotation of photographed documents through real-virtual alignment","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2025.112191","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"112191"}}